Commit 64bdbda2 authored by DailyShana's avatar DailyShana

update encode/decode utf8

parent f486f0c1
...@@ -60,7 +60,7 @@ public: ...@@ -60,7 +60,7 @@ public:
*pstr = 0; *pstr = 0;
return l; return l;
} }
// UCS-2 to UTF-8 // UTF-16/UTF-32 to UTF-8
static int EncodeUTF8(const wchar_t * wsrc, char * str) { static int EncodeUTF8(const wchar_t * wsrc, char * str) {
char* pstr = str; char* pstr = str;
while(*wsrc != 0) { while(*wsrc != 0) {
...@@ -71,18 +71,35 @@ public: ...@@ -71,18 +71,35 @@ public:
str[0] = ((*wsrc >> 6) & 0x1f) | 0xc0; str[0] = ((*wsrc >> 6) & 0x1f) | 0xc0;
str[1] = ((*wsrc) & 0x3f) | 0x80; str[1] = ((*wsrc) & 0x3f) | 0x80;
str += 2; str += 2;
} else { } else if(*wsrc < 0x10000 && (*wsrc < 0xd800 || *wsrc > 0xdfff)) {
str[0] = ((*wsrc >> 12) & 0xf) | 0xe0; str[0] = ((*wsrc >> 12) & 0xf) | 0xe0;
str[1] = ((*wsrc >> 6) & 0x3f) | 0x80; str[1] = ((*wsrc >> 6) & 0x3f) | 0x80;
str[2] = ((*wsrc) & 0x3f) | 0x80; str[2] = ((*wsrc) & 0x3f) | 0x80;
str += 3; str += 3;
} else {
#ifdef _WIN32
unsigned unicode = 0;
unicode |= (*wsrc++ & 0x3ff) << 10;
unicode |= *wsrc & 0x3ff;
unicode += 0x10000;
str[0] = ((unicode >> 18) & 0x7) | 0xf0;
str[1] = ((unicode >> 12) & 0x3f) | 0x80;
str[2] = ((unicode >> 6) & 0x3f) | 0x80;
str[3] = ((unicode) & 0x3f) | 0x80;
#else
str[0] = ((*wsrc >> 18) & 0x7) | 0xf0;
str[1] = ((*wsrc >> 12) & 0x3f) | 0x80;
str[2] = ((*wsrc >> 6) & 0x3f) | 0x80;
str[3] = ((*wsrc) & 0x3f) | 0x80;
#endif // _WIN32
str += 4;
} }
wsrc++; wsrc++;
} }
*str = 0; *str = 0;
return str - pstr; return str - pstr;
} }
// UTF-8 to UCS-2 // UTF-8 to UTF-16/UTF-32
static int DecodeUTF8(const char * src, wchar_t * wstr) { static int DecodeUTF8(const char * src, wchar_t * wstr) {
const char* p = src; const char* p = src;
wchar_t* wp = wstr; wchar_t* wp = wstr;
...@@ -97,7 +114,14 @@ public: ...@@ -97,7 +114,14 @@ public:
*wp = (((unsigned)p[0] & 0xf) << 12) | (((unsigned)p[1] & 0x3f) << 6) | ((unsigned)p[2] & 0x3f); *wp = (((unsigned)p[0] & 0xf) << 12) | (((unsigned)p[1] & 0x3f) << 6) | ((unsigned)p[2] & 0x3f);
p += 3; p += 3;
} else if((*p & 0xf8) == 0xf0) { } else if((*p & 0xf8) == 0xf0) {
#ifdef _WIN32
unsigned unicode = (((unsigned)p[0] & 0x7) << 18) | (((unsigned)p[1] & 0x3f) << 12) | (((unsigned)p[2] & 0x3f) << 6) | ((unsigned)p[3] & 0x3f);
unicode -= 0x10000;
*wp++ = (unicode >> 10) | 0xd800;
*wp = (unicode & 0x3ff) | 0xdc00;
#else
*wp = (((unsigned)p[0] & 0x7) << 18) | (((unsigned)p[1] & 0x3f) << 12) | (((unsigned)p[2] & 0x3f) << 6) | ((unsigned)p[3] & 0x3f); *wp = (((unsigned)p[0] & 0x7) << 18) | (((unsigned)p[1] & 0x3f) << 12) | (((unsigned)p[2] & 0x3f) << 6) | ((unsigned)p[3] & 0x3f);
#endif // _WIN32
p += 4; p += 4;
} else } else
p++; p++;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment