经常会用到gbk和utf8互转的情况,下面的代码就是实现了这样的功能,希望对大家有用~
//GBK 2 UTF8
int API_Gbk2Utf8(const char *szSource, string &strDest)
{
char szUniString[strlen(szSource)*2];
int iLen = string_gbk2unicode(szSource, szUniString, strlen(szSource));
unsigned char pTemp[4] = {0};
unsigned short iTemp;
char *pUTFString = (char *)malloc(sizeof(szUniString) *2+1);
int pos=0;
for (int i=0; i<iLen; i++)
{
iTemp =(((unsigned char)szUniString[i])<<8) + (szUniString[i+1] & 0xff);
i++;
int len= UCS2toUTF8Code(iTemp, pTemp);
if (0 == len)
{
return -1;
}else{
memcpy((void*)(pUTFString+pos),(const void*)pTemp,len);
pos+=len;
}
memset(pTemp , 0 ,sizeof(pTemp));
}
pUTFString[pos]='\0';
strDest = pUTFString;
free(pUTFString);
return 0;
}
// UTF8 to GBK
int API_IC_UTFToGBK(string& str)
{
const char * rsIn = str.c_str();
int rsInLength = str.length();
char buff[str.length() + 128];
memset(buff, 0, sizeof(buff));
char *rsOut = buff;
int rsOutLength = str.length() + 128;
int iLeftRoomLen,iLeftInLen,iOutLen;
iconv_t stCvt;
stCvt = iconv_open("GBK", "UTF-8");
if (stCvt == 0) return -1;
iLeftInLen = rsInLength;
iLeftRoomLen = iLeftInLen * 4 + 1;
iOutLen = iLeftRoomLen;
char * pszWorkingBuffer = new char[iLeftRoomLen];
if (pszWorkingBuffer == NULL)
return -1;
char * pszOutBuf = pszWorkingBuffer;
memset(pszWorkingBuffer,0,iLeftRoomLen);
int iRet;
char *pInBuf = (char *)rsIn;
while (iLeftInLen > 0)
{
iRet = iconv(stCvt, &pInBuf, (size_t*)&iLeftInLen, &pszWorkingBuffer, (size_t *)&iLeftRoomLen);
if (iRet == (int)((size_t)-1))
{
if (errno == EILSEQ)
{
iLeftInLen -= 2;
pInBuf += 2;
}
else
{
iconv_close(stCvt);
delete[] pszOutBuf;
return -2;
}
}
}
iconv_close(stCvt);
pszOutBuf[iOutLen - iLeftRoomLen] = 0;
rsOutLength = iOutLen - iLeftRoomLen;
memcpy(rsOut, pszOutBuf, rsOutLength);
delete[] pszOutBuf;
str = rsOut;
return 0;
}
datoucai on #
string_gbk2unicode和UCS2toUTF8Code函数是需要再单独写?
Reply
c/c++程序员之家 on #
这个有个最大的缺点啊,二进制流UTF8转GBK的时候,字符串会被截断。比如 byte *str,用这个string.c_str()的时候,字符串会被截断。
Reply
Dante on #
二进制流的时候,也不该转utf8或者gbk呀。。
Reply
c/c++程序员之家 on #
那二进制流utf8,该如何安全转换到gbk编码呢???
Reply
Dante on #
所谓utf8,gbk都是文本格式的编码,不会有二进制的转化。。
Reply