/* * MultiByteToWideChar implementation * * Copyright 2000 Alexandre Julliard * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA */ #include #include "wine/unicode.h" extern unsigned int wine_decompose( int flags, WCHAR ch, WCHAR *dst, unsigned int dstlen ) DECLSPEC_HIDDEN; /* check the code whether it is in Unicode Private Use Area (PUA). */ /* MB_ERR_INVALID_CHARS raises an error converting from 1-byte character to PUA. */ static inline int is_private_use_area_char(WCHAR code) { return (code >= 0xe000 && code <= 0xf8ff); } /* check src string for invalid chars; return non-zero if invalid char found */ static inline int check_invalid_chars_sbcs( const struct sbcs_table *table, int flags, const unsigned char *src, unsigned int srclen ) { const WCHAR * const cp2uni = (flags & MB_USEGLYPHCHARS) ? table->cp2uni_glyphs : table->cp2uni; const WCHAR def_unicode_char = table->info.def_unicode_char; const unsigned char def_char = table->uni2cp_low[table->uni2cp_high[def_unicode_char >> 8] + (def_unicode_char & 0xff)]; while (srclen) { if ((cp2uni[*src] == def_unicode_char && *src != def_char) || is_private_use_area_char(cp2uni[*src])) break; src++; srclen--; } return srclen; } /* mbstowcs for single-byte code page */ /* all lengths are in characters, not bytes */ static inline int mbstowcs_sbcs( const struct sbcs_table *table, int flags, const unsigned char *src, unsigned int srclen, WCHAR *dst, unsigned int dstlen ) { const WCHAR * const cp2uni = (flags & MB_USEGLYPHCHARS) ? table->cp2uni_glyphs : table->cp2uni; int ret = srclen; if (dstlen < srclen) { /* buffer too small: fill it up to dstlen and return error */ srclen = dstlen; ret = -1; } while (srclen >= 16) { dst[0] = cp2uni[src[0]]; dst[1] = cp2uni[src[1]]; dst[2] = cp2uni[src[2]]; dst[3] = cp2uni[src[3]]; dst[4] = cp2uni[src[4]]; dst[5] = cp2uni[src[5]]; dst[6] = cp2uni[src[6]]; dst[7] = cp2uni[src[7]]; dst[8] = cp2uni[src[8]]; dst[9] = cp2uni[src[9]]; dst[10] = cp2uni[src[10]]; dst[11] = cp2uni[src[11]]; dst[12] = cp2uni[src[12]]; dst[13] = cp2uni[src[13]]; dst[14] = cp2uni[src[14]]; dst[15] = cp2uni[src[15]]; src += 16; dst += 16; srclen -= 16; } /* now handle the remaining characters */ src += srclen; dst += srclen; switch (srclen) { case 15: dst[-15] = cp2uni[src[-15]]; case 14: dst[-14] = cp2uni[src[-14]]; case 13: dst[-13] = cp2uni[src[-13]]; case 12: dst[-12] = cp2uni[src[-12]]; case 11: dst[-11] = cp2uni[src[-11]]; case 10: dst[-10] = cp2uni[src[-10]]; case 9: dst[-9] = cp2uni[src[-9]]; case 8: dst[-8] = cp2uni[src[-8]]; case 7: dst[-7] = cp2uni[src[-7]]; case 6: dst[-6] = cp2uni[src[-6]]; case 5: dst[-5] = cp2uni[src[-5]]; case 4: dst[-4] = cp2uni[src[-4]]; case 3: dst[-3] = cp2uni[src[-3]]; case 2: dst[-2] = cp2uni[src[-2]]; case 1: dst[-1] = cp2uni[src[-1]]; case 0: break; } return ret; } /* mbstowcs for single-byte code page with char decomposition */ static int mbstowcs_sbcs_decompose( const struct sbcs_table *table, int flags, const unsigned char *src, unsigned int srclen, WCHAR *dst, unsigned int dstlen ) { const WCHAR * const cp2uni = (flags & MB_USEGLYPHCHARS) ? table->cp2uni_glyphs : table->cp2uni; unsigned int len; if (!dstlen) /* compute length */ { WCHAR dummy[4]; /* no decomposition is larger than 4 chars */ for (len = 0; srclen; srclen--, src++) len += wine_decompose( 0, cp2uni[*src], dummy, 4 ); return len; } for (len = dstlen; srclen && len; srclen--, src++) { unsigned int res = wine_decompose( 0, cp2uni[*src], dst, len ); if (!res) break; len -= res; dst += res; } if (srclen) return -1; /* overflow */ return dstlen - len; } /* query necessary dst length for src string */ static inline int get_length_dbcs( const struct dbcs_table *table, const unsigned char *src, unsigned int srclen ) { const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes; int len; for (len = 0; srclen; srclen--, src++, len++) { if (cp2uni_lb[*src] && srclen > 1 && src[1]) { src++; srclen--; } } return len; } /* check src string for invalid chars; return non-zero if invalid char found */ static inline int check_invalid_chars_dbcs( const struct dbcs_table *table, const unsigned char *src, unsigned int srclen ) { const WCHAR * const cp2uni = table->cp2uni; const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes; const WCHAR def_unicode_char = table->info.def_unicode_char; const unsigned short def_char = table->uni2cp_low[table->uni2cp_high[def_unicode_char >> 8] + (def_unicode_char & 0xff)]; while (srclen) { unsigned char off = cp2uni_lb[*src]; if (off) /* multi-byte char */ { if (srclen == 1) break; /* partial char, error */ if (cp2uni[(off << 8) + src[1]] == def_unicode_char && ((src[0] << 8) | src[1]) != def_char) break; src++; srclen--; } else if ((cp2uni[*src] == def_unicode_char && *src != def_char) || is_private_use_area_char(cp2uni[*src])) break; src++; srclen--; } return srclen; } /* mbstowcs for double-byte code page */ /* all lengths are in characters, not bytes */ static inline int mbstowcs_dbcs( const struct dbcs_table *table, const unsigned char *src, unsigned int srclen, WCHAR *dst, unsigned int dstlen ) { const WCHAR * const cp2uni = table->cp2uni; const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes; unsigned int len; if (!dstlen) return get_length_dbcs( table, src, srclen ); for (len = dstlen; srclen && len; len--, srclen--, src++, dst++) { unsigned char off = cp2uni_lb[*src]; if (off && srclen > 1 && src[1]) { src++; srclen--; *dst = cp2uni[(off << 8) + *src]; } else *dst = cp2uni[*src]; } if (srclen) return -1; /* overflow */ return dstlen - len; } /* mbstowcs for double-byte code page with character decomposition */ static int mbstowcs_dbcs_decompose( const struct dbcs_table *table, const unsigned char *src, unsigned int srclen, WCHAR *dst, unsigned int dstlen ) { const WCHAR * const cp2uni = table->cp2uni; const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes; unsigned int len, res; WCHAR ch; if (!dstlen) /* compute length */ { WCHAR dummy[4]; /* no decomposition is larger than 4 chars */ for (len = 0; srclen; srclen--, src++) { unsigned char off = cp2uni_lb[*src]; if (off && srclen > 1 && src[1]) { src++; srclen--; ch = cp2uni[(off << 8) + *src]; } else ch = cp2uni[*src]; len += wine_decompose( 0, ch, dummy, 4 ); } return len; } for (len = dstlen; srclen && len; srclen--, src++) { unsigned char off = cp2uni_lb[*src]; if (off && srclen > 1 && src[1]) { src++; srclen--; ch = cp2uni[(off << 8) + *src]; } else ch = cp2uni[*src]; if (!(res = wine_decompose( 0, ch, dst, len ))) break; dst += res; len -= res; } if (srclen) return -1; /* overflow */ return dstlen - len; } /* return -1 on dst buffer overflow, -2 on invalid input char */ int wine_cp_mbstowcs( const union cptable *table, int flags, const char *s, int srclen, WCHAR *dst, int dstlen ) { const unsigned char *src = (const unsigned char*) s; if (table->info.char_size == 1) { if (flags & MB_ERR_INVALID_CHARS) { if (check_invalid_chars_sbcs( &table->sbcs, flags, src, srclen )) return -2; } if (!(flags & MB_COMPOSITE)) { if (!dstlen) return srclen; return mbstowcs_sbcs( &table->sbcs, flags, src, srclen, dst, dstlen ); } return mbstowcs_sbcs_decompose( &table->sbcs, flags, src, srclen, dst, dstlen ); } else /* mbcs */ { if (flags & MB_ERR_INVALID_CHARS) { if (check_invalid_chars_dbcs( &table->dbcs, src, srclen )) return -2; } if (!(flags & MB_COMPOSITE)) return mbstowcs_dbcs( &table->dbcs, src, srclen, dst, dstlen ); else return mbstowcs_dbcs_decompose( &table->dbcs, src, srclen, dst, dstlen ); } }