diff --git a/tools/wmc/lang.c b/tools/wmc/lang.c index d7bceef8fd2..b5ab3be7225 100644 --- a/tools/wmc/lang.c +++ b/tools/wmc/lang.c @@ -185,31 +185,3 @@ const language_t *find_language(unsigned id) return (const language_t *)bsearch(&id, languages, ARRAY_SIZE(languages), sizeof(languages[0]), langcmp); } - -#ifdef _WIN32 - -int is_valid_codepage(int id) -{ - return IsValidCodePage( id ); -} - -int wmc_mbstowcs( int codepage, int flags, const char *src, int srclen, WCHAR *dst, int dstlen ) -{ - return MultiByteToWideChar( codepage, flags, src, srclen, dst, dstlen ); -} - -#else /* _WIN32 */ - -#include "wine/unicode.h" - -int is_valid_codepage(int id) -{ - return id == CP_UTF8 || wine_cp_get_table(id); -} - -int wmc_mbstowcs( int codepage, int flags, const char *src, int srclen, WCHAR *dst, int dstlen ) -{ - return wine_cp_mbstowcs( wine_cp_get_table( codepage ), flags, src, srclen, dst, dstlen ); -} - -#endif /* _WIN32 */ diff --git a/tools/wmc/lang.h b/tools/wmc/lang.h index d655556e8dc..365aacdd766 100644 --- a/tools/wmc/lang.h +++ b/tools/wmc/lang.h @@ -32,7 +32,5 @@ typedef struct language { void show_languages(void); const language_t *find_language(unsigned id); -int is_valid_codepage(int id); -int wmc_mbstowcs( int codepage, int flags, const char *src, int srclen, WCHAR *dst, int dstlen ); #endif diff --git a/tools/wmc/mcl.c b/tools/wmc/mcl.c index 1319113fff6..829f955606f 100644 --- a/tools/wmc/mcl.c +++ b/tools/wmc/mcl.c @@ -153,8 +153,6 @@ static int codepage; void set_codepage(int cp) { codepage = cp; - if (!is_valid_codepage( cp )) - xyyerror("Codepage %d not found; cannot process\n", codepage); } /* @@ -226,8 +224,9 @@ static int fill_inputbuffer(void) { case INPUT_ASCII: if (!fgets( buffer + len, sizeof(buffer) - len, yyin )) break; - ninputbuffer = wmc_mbstowcs( codepage, 0, buffer, strlen(buffer), inputbuffer, INPUTBUFFER_SIZE ); - if (ninputbuffer < 0) internal_error(__FILE__, __LINE__, "Could not translate to unicode\n"); + wbuf = codepage_to_unicode( codepage, buffer, strlen(buffer), &ninputbuffer ); + memcpy( inputbuffer, wbuf, ninputbuffer * sizeof(WCHAR) ); + free( wbuf ); return 1; case INPUT_UTF8: if (!fgets( buffer + len, sizeof(buffer) - len, yyin )) break; diff --git a/tools/wmc/utils.c b/tools/wmc/utils.c index 7261fe715ab..bd01f6cfe79 100644 --- a/tools/wmc/utils.c +++ b/tools/wmc/utils.c @@ -29,6 +29,7 @@ #include #include "wmctypes.h" +#include "winnls.h" #include "utils.h" #include "wmc.h" @@ -400,6 +401,129 @@ char *unicode_to_utf8( const WCHAR *src, int srclen, int *dstlen ) return ret; } +#ifdef _WIN32 + +int is_valid_codepage(int id) +{ + return IsValidCodePage( id ); +} + +WCHAR *codepage_to_unicode( int codepage, const char *src, int srclen, int *dstlen ) +{ + WCHAR *dst = xmalloc( (srclen + 1) * sizeof(WCHAR) ); + DWORD ret = MultiByteToWideChar( codepage, MB_ERR_INVALID_CHARS, src, srclen, dst, srclen ); + if (!ret) return NULL; + dst[ret] = 0; + *dstlen = ret; + return dst; +} + +#else /* _WIN32 */ + +struct nls_info +{ + unsigned short codepage; + unsigned short unidef; + unsigned short trans_unidef; + unsigned short *cp2uni; + unsigned short *dbcs_offsets; +}; + +static struct nls_info nlsinfo[128]; + +static void init_nls_info( struct nls_info *info, unsigned short *ptr ) +{ + unsigned short hdr_size = ptr[0]; + + info->codepage = ptr[1]; + info->unidef = ptr[4]; + info->trans_unidef = ptr[6]; + ptr += hdr_size; + info->cp2uni = ++ptr; + ptr += 256; + if (*ptr++) ptr += 256; /* glyph table */ + info->dbcs_offsets = *ptr ? ptr + 1 : NULL; +} + +static const struct nls_info *get_nls_info( unsigned int codepage ) +{ + struct stat st; + unsigned short *data; + char *path; + unsigned int i; + int fd; + + for (i = 0; i < ARRAY_SIZE(nlsinfo) && nlsinfo[i].codepage; i++) + if (nlsinfo[i].codepage == codepage) return &nlsinfo[i]; + + assert( i < ARRAY_SIZE(nlsinfo) ); + + for (i = 0; nlsdirs[i]; i++) + { + path = strmake( "%s/c_%03u.nls", nlsdirs[i], codepage ); + if ((fd = open( path, O_RDONLY )) != -1) break; + free( path ); + } + if (!nlsdirs[i]) return NULL; + + fstat( fd, &st ); + data = xmalloc( st.st_size ); + if (read( fd, data, st.st_size ) != st.st_size) error( "failed to load %s\n", path ); + close( fd ); + free( path ); + init_nls_info( &nlsinfo[i], data ); + return &nlsinfo[i]; +} + +int is_valid_codepage(int cp) +{ + return cp == CP_UTF8 || get_nls_info( cp ); +} + +WCHAR *codepage_to_unicode( int codepage, const char *src, int srclen, int *dstlen ) +{ + const struct nls_info *info = get_nls_info( codepage ); + unsigned int i; + WCHAR dbch, *dst = xmalloc( (srclen + 1) * sizeof(WCHAR) ); + + if (!info) error( "codepage %u not supported\n", codepage ); + + if (info->dbcs_offsets) + { + for (i = 0; srclen; i++, srclen--, src++) + { + unsigned short off = info->dbcs_offsets[(unsigned char)*src]; + if (off) + { + if (srclen == 1) return NULL; + dbch = (src[0] << 8) | (unsigned char)src[1]; + src++; + srclen--; + dst[i] = info->dbcs_offsets[off + (unsigned char)*src]; + if (dst[i] == info->unidef && dbch != info->trans_unidef) return NULL; + } + else + { + dst[i] = info->cp2uni[(unsigned char)*src]; + if (dst[i] == info->unidef && *src != info->trans_unidef) return NULL; + } + } + } + else + { + for (i = 0; i < srclen; i++) + { + dst[i] = info->cp2uni[(unsigned char)src[i]]; + if (dst[i] == info->unidef && src[i] != info->trans_unidef) return NULL; + } + } + dst[i] = 0; + *dstlen = i; + return dst; +} + +#endif /* _WIN32 */ + /******************************************************************* * buffer management * diff --git a/tools/wmc/utils.h b/tools/wmc/utils.h index e4c546765dc..726a36731ab 100644 --- a/tools/wmc/utils.h +++ b/tools/wmc/utils.h @@ -52,6 +52,8 @@ int unistricmp(const WCHAR *s1, const WCHAR *s2); int unistrcmp(const WCHAR *s1, const WCHAR *s2); WCHAR *utf8_to_unicode( const char *src, int srclen, int *dstlen ); char *unicode_to_utf8( const WCHAR *src, int srclen, int *dstlen ); +int is_valid_codepage(int id); +WCHAR *codepage_to_unicode( int codepage, const char *src, int srclen, int *dstlen ); /* buffer management */