diff --git a/tools/wmc/lang.c b/tools/wmc/lang.c index d80d959f6bf..9d3d6e01452 100644 --- a/tools/wmc/lang.c +++ b/tools/wmc/lang.c @@ -222,13 +222,11 @@ int is_valid_codepage(int id) int wmc_mbstowcs( int codepage, int flags, const char *src, int srclen, WCHAR *dst, int dstlen ) { - if (codepage == CP_UTF8) return wine_utf8_mbstowcs( flags, src, srclen, dst, dstlen ); return wine_cp_mbstowcs( wine_cp_get_table( codepage ), flags, src, srclen, dst, dstlen ); } int wmc_wcstombs( int codepage, int flags, const WCHAR *src, int srclen, char *dst, int dstlen ) { - if (codepage == CP_UTF8) return wine_utf8_wcstombs( flags, src, srclen, dst, dstlen ); return wine_cp_wcstombs( wine_cp_get_table( codepage ), flags, src, srclen, dst, dstlen, NULL, NULL ); } diff --git a/tools/wmc/mcl.c b/tools/wmc/mcl.c index 68d24448609..56c5ca640ca 100644 --- a/tools/wmc/mcl.c +++ b/tools/wmc/mcl.c @@ -198,9 +198,18 @@ try_again: xyyerror(err_fatalread); else if(!cptr) return 0; - n = wmc_mbstowcs(codepage, 0, xlatebuffer, strlen(xlatebuffer)+1, inputbuffer, INPUTBUFFER_SIZE); - if(n < 0) - internal_error(__FILE__, __LINE__, "Could not translate to unicode (%d)\n", n); + if (codepage == CP_UTF8) + { + WCHAR *buf = utf8_to_unicode( xlatebuffer, strlen(xlatebuffer), &n ); + memcpy( inputbuffer, buf, (n + 1) * sizeof(WCHAR) ); + free( buf ); + } + else + { + n = wmc_mbstowcs(codepage, 0, xlatebuffer, strlen(xlatebuffer)+1, inputbuffer, INPUTBUFFER_SIZE); + if(n < 0) + internal_error(__FILE__, __LINE__, "Could not translate to unicode (%d)\n", n); + } if(n <= 1) goto try_again; /* Should not happen */ n--; /* Strip added conversion '\0' from input length */ diff --git a/tools/wmc/po.c b/tools/wmc/po.c index bf10c50c637..d0a053cfb67 100644 --- a/tools/wmc/po.c +++ b/tools/wmc/po.c @@ -404,14 +404,6 @@ static char *get_message_context( char **msgid ) #ifdef HAVE_LIBGETTEXTPO -static char *convert_string_utf8( const lanmsg_t *msg ) -{ - char *buffer = xmalloc( msg->len * 4 + 1 ); - int len = wmc_wcstombs( CP_UTF8, 0, msg->msg, msg->len, buffer, msg->len * 4 ); - buffer[len] = 0; - return buffer; -} - static po_message_t find_message( po_file_t po, const char *msgid, const char *msgctxt, po_message_iterator_t *iterator ) { @@ -467,7 +459,8 @@ static void add_po_string( po_file_t po, const lanmsg_t *msgid, const lanmsg_t * if (msgstr) { - str_buffer = str = convert_string_utf8( msgstr ); + int len; + str_buffer = str = unicode_to_utf8( msgstr->msg, msgstr->len, &len ); if (is_english( msgstr->lan )) get_message_context( &str ); } if (!(msg = find_message( po, id, context, &iterator ))) @@ -644,7 +637,6 @@ static lanmsg_t *translate_string( lanmsg_t *str, int lang, int *found ) { lanmsg_t *new; const char *transl; - int res; char *buffer, *msgid, *context; if (str->len <= 1 || !(buffer = convert_msgid_ascii( str, 0 ))) return str; @@ -658,11 +650,7 @@ static lanmsg_t *translate_string( lanmsg_t *str, int lang, int *found ) new->cp = 0; /* FIXME */ new->file = str->file; new->line = str->line; - new->len = wmc_mbstowcs( CP_UTF8, 0, transl, strlen(transl) + 1, NULL, 0 ); - new->msg = xmalloc( new->len * sizeof(WCHAR) ); - res = wmc_mbstowcs( CP_UTF8, MB_ERR_INVALID_CHARS, transl, strlen(transl) + 1, new->msg, new->len ); - if (res == -2) - error( "Invalid utf-8 character in string '%s'\n", transl ); + new->msg = utf8_to_unicode( transl, strlen(transl) + 1, &new->len ); free( buffer ); return new; } diff --git a/tools/wmc/utils.c b/tools/wmc/utils.c index bd3f0a6160b..e3da5422d7e 100644 --- a/tools/wmc/utils.c +++ b/tools/wmc/utils.c @@ -272,6 +272,127 @@ int unistrcmp(const WCHAR *s1, const WCHAR *s2) return *s1 - *s2; } +WCHAR *utf8_to_unicode( const char *src, int srclen, int *dstlen ) +{ + static const char utf8_length[128] = + { + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x80-0x8f */ + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x90-0x9f */ + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xa0-0xaf */ + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xb0-0xbf */ + 0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0xc0-0xcf */ + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0xd0-0xdf */ + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, /* 0xe0-0xef */ + 3,3,3,3,3,0,0,0,0,0,0,0,0,0,0,0 /* 0xf0-0xff */ + }; + static const unsigned char utf8_mask[4] = { 0x7f, 0x1f, 0x0f, 0x07 }; + + const char *srcend = src + srclen; + int len, res; + WCHAR *ret, *dst; + + dst = ret = xmalloc( (srclen + 1) * sizeof(WCHAR) ); + while (src < srcend) + { + unsigned char ch = *src++; + if (ch < 0x80) /* special fast case for 7-bit ASCII */ + { + *dst++ = ch; + continue; + } + len = utf8_length[ch - 0x80]; + if (len && src + len <= srcend) + { + res = ch & utf8_mask[len]; + switch (len) + { + case 3: + if ((ch = *src ^ 0x80) >= 0x40) break; + res = (res << 6) | ch; + src++; + if (res < 0x10) break; + case 2: + if ((ch = *src ^ 0x80) >= 0x40) break; + res = (res << 6) | ch; + if (res >= 0x110000 >> 6) break; + src++; + if (res < 0x20) break; + if (res >= 0xd800 >> 6 && res <= 0xdfff >> 6) break; + case 1: + if ((ch = *src ^ 0x80) >= 0x40) break; + res = (res << 6) | ch; + src++; + if (res < 0x80) break; + if (res <= 0xffff) *dst++ = res; + else + { + res -= 0x10000; + *dst++ = 0xd800 | (res >> 10); + *dst++ = 0xdc00 | (res & 0x3ff); + } + continue; + } + } + *dst++ = 0xfffd; + } + *dst = 0; + *dstlen = dst - ret; + return ret; +} + +char *unicode_to_utf8( const WCHAR *src, int srclen, int *dstlen ) +{ + char *ret, *dst; + + dst = ret = xmalloc( srclen * 3 + 1 ); + for ( ; srclen; srclen--, src++) + { + unsigned int ch = *src; + + if (ch < 0x80) /* 0x00-0x7f: 1 byte */ + { + *dst++ = ch; + continue; + } + if (ch < 0x800) /* 0x80-0x7ff: 2 bytes */ + { + dst[1] = 0x80 | (ch & 0x3f); + ch >>= 6; + dst[0] = 0xc0 | ch; + dst += 2; + continue; + } + if (ch >= 0xd800 && ch <= 0xdbff && srclen > 1 && src[1] >= 0xdc00 && src[1] <= 0xdfff) + { + /* 0x10000-0x10ffff: 4 bytes */ + ch = 0x10000 + ((ch & 0x3ff) << 10) + (src[1] & 0x3ff); + dst[3] = 0x80 | (ch & 0x3f); + ch >>= 6; + dst[2] = 0x80 | (ch & 0x3f); + ch >>= 6; + dst[1] = 0x80 | (ch & 0x3f); + ch >>= 6; + dst[0] = 0xf0 | ch; + dst += 4; + src++; + srclen--; + continue; + } + if (ch >= 0xd800 && ch <= 0xdfff) ch = 0xfffd; /* invalid surrogate pair */ + + /* 0x800-0xffff: 3 bytes */ + dst[2] = 0x80 | (ch & 0x3f); + ch >>= 6; + dst[1] = 0x80 | (ch & 0x3f); + ch >>= 6; + dst[0] = 0xe0 | ch; + dst += 3; + } + *dst = 0; + *dstlen = dst - ret; + return ret; +} + /******************************************************************* * buffer management * diff --git a/tools/wmc/utils.h b/tools/wmc/utils.h index 2ca62e94114..48f47ee7512 100644 --- a/tools/wmc/utils.h +++ b/tools/wmc/utils.h @@ -49,6 +49,8 @@ WCHAR *unistrcpy(WCHAR *dst, const WCHAR *src); int unistrlen(const WCHAR *s); int unistricmp(const WCHAR *s1, const WCHAR *s2); int unistrcmp(const WCHAR *s1, const WCHAR *s2); +WCHAR *utf8_to_unicode( const char *src, int srclen, int *dstlen ); +char *unicode_to_utf8( const WCHAR *src, int srclen, int *dstlen ); /* buffer management */ diff --git a/tools/wmc/write.c b/tools/wmc/write.c index 59cb86955e0..b11f00831db 100644 --- a/tools/wmc/write.c +++ b/tools/wmc/write.c @@ -94,17 +94,13 @@ static const char str_header[] = "\n" ; -static char *dup_u2c(int cp, const WCHAR *uc) +static char *dup_u2c(const WCHAR *uc) { - int len; - char *cptr; + int i; + char *cptr = xmalloc( unistrlen(uc)+1 ); - if (!cp) cp = CP_UTF8; - len = wmc_wcstombs(cp, 0, uc, unistrlen(uc)+1, NULL, 0); - cptr = xmalloc(len); - len = wmc_wcstombs(cp, 0, uc, unistrlen(uc)+1, cptr, len); - if (len < 0) - internal_error(__FILE__, __LINE__, "Buffer overflow? code %d\n", len); + for (i = 0; *uc; i++, uc++) cptr[i] = (*uc <= 0xff) ? *uc : '_'; + cptr[i] = 0; return cptr; } @@ -183,7 +179,7 @@ void write_h_file(const char *fname) { if(ttab[i].type == tok_severity && ttab[i].alias) { - cptr = dup_u2c(WMC_DEFAULT_CODEPAGE, ttab[i].alias); + cptr = dup_u2c(ttab[i].alias); fprintf(fp, "#define %s\t0x%x\n", cptr, ttab[i].token); free(cptr); } @@ -195,7 +191,7 @@ void write_h_file(const char *fname) { if(ttab[i].type == tok_facility && ttab[i].alias) { - cptr = dup_u2c(WMC_DEFAULT_CODEPAGE, ttab[i].alias); + cptr = dup_u2c(ttab[i].alias); fprintf(fp, "#define %s\t0x%x\n", cptr, ttab[i].token); free(cptr); } @@ -209,7 +205,7 @@ void write_h_file(const char *fname) switch(ndp->type) { case nd_comment: - cptr = dup_u2c(WMC_DEFAULT_CODEPAGE, ndp->u.comment+1); + cptr = dup_u2c(ndp->u.comment+1); killnl(cptr, 0); killcomment(cptr); if(*cptr) @@ -237,14 +233,14 @@ void write_h_file(const char *fname) fprintf(fp, "\n"); } fprintf(fp, "/* MessageId : 0x%08x */\n", ndp->u.msg->realid); - cptr = dup_u2c(ndp->u.msg->msgs[idx_en]->cp, ndp->u.msg->msgs[idx_en]->msg); + cptr = dup_u2c(ndp->u.msg->msgs[idx_en]->msg); killnl(cptr, 0); killcomment(cptr); fprintf(fp, "/* Approximate msg: %s */\n", cptr); free(cptr); - cptr = dup_u2c(WMC_DEFAULT_CODEPAGE, ndp->u.msg->sym); + cptr = dup_u2c(ndp->u.msg->sym); if(ndp->u.msg->cast) - cast = dup_u2c(WMC_DEFAULT_CODEPAGE, ndp->u.msg->cast); + cast = dup_u2c(ndp->u.msg->cast); else cast = NULL; switch(ndp->u.msg->base) @@ -299,7 +295,7 @@ static void write_rcbin(FILE *fp) if(ttab[i].type == tok_language && ttab[i].token == lbp->lan) { if(ttab[i].alias) - cptr = dup_u2c(WMC_DEFAULT_CODEPAGE, ttab[i].alias); + cptr = dup_u2c(ttab[i].alias); break; } } @@ -317,7 +313,7 @@ static char *make_string(WCHAR *uc, int len, int codepage) int i; int b; - if(!codepage) + if (!codepage || codepage == CP_UTF8) { *cptr++ = ' '; *cptr++ = 'L'; @@ -379,8 +375,10 @@ static char *make_string(WCHAR *uc, int len, int codepage) else { char *tmp, *cc; + int unilen = unistrlen(uc) + 1; - cc = tmp = dup_u2c(codepage, uc); + cc = tmp = xmalloc( unilen * 2 ); + wmc_wcstombs( codepage, 0, uc, unilen, cptr, unilen * 2 ); *cptr++ = ' '; *cptr++ = '"'; for(i = b = 0; i < len; i++, cc++) @@ -539,7 +537,7 @@ void write_bin_files(void) { if (ttab[i].type == tok_language && ttab[i].token == lbp->lan) { - if (ttab[i].alias) cptr = dup_u2c(WMC_DEFAULT_CODEPAGE, ttab[i].alias); + if (ttab[i].alias) cptr = dup_u2c(ttab[i].alias); break; } }