Added support for composite Unicode characters in MultiByteToWideChar

and WideCharToMultiByte.
2000-12-29 03:56:06 +00:00 · 2000-12-29 03:56:06 +00:00 · e709cdbae3
parent 441f874517
commit e709cdbae3
6 changed files with 1578 additions and 57 deletions
--- a/memory/codepage.c
+++ b/memory/codepage.c
@ -254,7 +254,6 @@ INT WINAPI MultiByteToWideChar( UINT page, DWORD flags, LPCSTR src, INT srclen,

    if (srclen == -1) srclen = strlen(src) + 1;

-    if (flags & MB_COMPOSITE) FIXME("MB_COMPOSITE not supported\n");
    if (flags & MB_USEGLYPHCHARS) FIXME("MB_USEGLYPHCHARS not supported\n");

    switch(page)
@ -330,8 +329,6 @@ INT WINAPI WideCharToMultiByte( UINT page, DWORD flags, LPCWSTR src, INT srclen,

    if (srclen == -1) srclen = strlenW(src) + 1;

-/*    if (flags & WC_COMPOSITECHECK) FIXME( "WC_COMPOSITECHECK (%lx) not supported\n", flags );*/
-
    switch(page)
    {
    case CP_UTF7:
--- a/unicode/Makefile.in
+++ b/unicode/Makefile.in
@ -70,6 +70,7 @@ CODEPAGES = \

 C_SRCS = \
 	casemap.c \
+	compose.c \
 	cptable.c \
 	mbtowc.c \
 	string.c \
--- a/unicode/compose.c
+++ b/unicode/compose.c
--- a/unicode/cpmap.pl
+++ b/unicode/cpmap.pl
@ -166,6 +166,7 @@ $DEF_CHAR = ord '?';

 READ_DEFAULTS();
 DUMP_CASE_MAPPINGS();
+DUMP_COMPOSE_TABLES();
 DUMP_CTYPE_TABLES();

 foreach $file (@allfiles) { HANDLE_FILE( @$file ); }
@ -185,6 +186,8 @@ sub READ_DEFAULTS
    @toupper_table = ();
    @category_table = ();
    @direction_table = ();
+    @decomp_table = ();
+    @compose_table = ();

    # first setup a few default mappings

@ -285,6 +288,12 @@ sub READ_DEFAULTS
            # decomposition contains only char values without prefix -> use first char
            $dst = hex $1;
            $category_table[$src] |= $category_table[$dst];
+            # store decomposition if it contains two chars
+            if ($decomp =~ /^([0-9a-fA-F]+)\s+([0-9a-fA-F]+)$/)
+            {
+                $decomp_table[$src] = [ hex $1, hex $2 ];
+                push @compose_table, [ hex $1, hex $2, $src ];
+            }
        }
        else
        {
@ -465,7 +474,7 @@ sub DUMP_SBCS_TABLE
        next unless defined $uni2cp[$i];
        $filled[$i >> 8] = 1;
        $subtables++;
-        $i = ($i & ~255) + 256;
+        $i |= 255;
    }

    # output all the subtables into a single array
@ -572,7 +581,7 @@ sub DUMP_DBCS_TABLE
        next unless defined $uni2cp[$i];
        $filled[$i >> 8] = 1;
        $subtables++;
-        $i = ($i & ~255) + 256;
+        $i |= 255;
    }

    # output all the subtables into a single array
@ -669,7 +678,7 @@ sub DUMP_CASE_TABLE
        next unless defined $table[$i];
        $filled[$i >> 8] = $pos;
        $pos += 256;
-        $i = ($i & ~255) + 256;
+        $i |= 255;
    }
    for ($i = 0; $i < 65536; $i++)
    {
@ -737,6 +746,144 @@ sub DUMP_CTYPE_TABLES
    close OUTPUT;
 }

+
+################################################################
+# dump the char composition tables
+sub DUMP_COMPOSE_TABLES
+{
+    open OUTPUT,">compose.c" or die "Cannot create compose.c";
+    printf "Building compose.c\n";
+    printf OUTPUT "/* Unicode char composition */\n";
+    printf OUTPUT "/* Automatically generated; DO NOT EDIT!! */\n\n";
+    printf OUTPUT "#include \"wine/unicode.h\"\n\n";
+
+    ######### composition table
+
+    my @filled = ();
+    foreach $i (@compose_table)
+    {
+        my @comp = @$i;
+        push @{$filled[$comp[1]]}, [ $comp[0], $comp[2] ];
+    }
+
+    # count how many different second chars we have
+
+    for ($i = $count = 0; $i < 65536; $i++)
+    {
+        next unless defined $filled[$i];
+        $count++;
+    }
+
+    # build the table of second chars and offsets
+
+    my $pos = $count + 1;
+    for ($i = 0; $i < 65536; $i++)
+    {
+        next unless defined $filled[$i];
+        push @table, $i, $pos;
+        $pos += @{$filled[$i]};
+    }
+    # terminator with last position
+    push @table, 0, $pos;
+    printf OUTPUT "const WCHAR unicode_compose_table[0x%x] =\n{\n", 2*$pos;
+    printf OUTPUT "    /* second chars + offsets */\n%s", DUMP_ARRAY( "0x%04x", 0, @table );
+
+    # build the table of first chars and mappings
+
+    for ($i = 0; $i < 65536; $i++)
+    {
+        next unless defined $filled[$i];
+        my @table = ();
+        my @list = sort { $a->[0] <=> $b->[0] } @{$filled[$i]};
+        for ($j = 0; $j <= $#list; $j++)
+        {
+            push @table, $list[$j][0], $list[$j][1];
+        }
+        printf OUTPUT ",\n    /* 0x%04x */\n%s", $i, DUMP_ARRAY( "0x%04x", 0, @table );
+    }
+    printf OUTPUT "\n};\n\nconst unsigned int unicode_compose_table_size = %d;\n\n", $count;
+
+    ######### decomposition table
+
+    # first determine all the 16-char subsets that contain something
+
+    my @filled = (0) x 4096;
+    my $pos = 16*2;  # for the null subset
+    for ($i = 0; $i < 65536; $i++)
+    {
+        next unless defined $decomp_table[$i];
+        $filled[$i >> 4] = $pos;
+        $pos += 16*2;
+        $i |= 15;
+    }
+    my $total = $pos;
+
+    # now count the 256-char subsets that contain something
+
+    my @filled_idx = (256) x 256;
+    $pos = 256 + 16;
+    for ($i = 0; $i < 4096; $i++)
+    {
+        next unless $filled[$i];
+        $filled_idx[$i >> 4] = $pos;
+        $pos += 16;
+        $i |= 15;
+    }
+    my $null_offset = $pos;  # null mapping
+    $total += $pos;
+
+    # add the index offsets to the subsets positions
+
+    for ($i = 0; $i < 4096; $i++)
+    {
+        next unless $filled[$i];
+        $filled[$i] += $null_offset;
+    }
+
+    # dump the main index
+
+    printf OUTPUT "const WCHAR unicode_decompose_table[%d] =\n", $total;
+    printf OUTPUT "{\n    /* index */\n";
+    printf OUTPUT "%s", DUMP_ARRAY( "0x%04x", 0, @filled_idx );
+    printf OUTPUT ",\n    /* null sub-index */\n%s", DUMP_ARRAY( "0x%04x", 0, ($null_offset) x 16 );
+
+    # dump the second-level indexes
+
+    for ($i = 0; $i < 256; $i++)
+    {
+        next unless ($filled_idx[$i] > 256);
+        my @table = @filled[($i<<4)..($i<<4)+15];
+        for ($j = 0; $j < 16; $j++) { $table[$j] ||= $null_offset; }
+        printf OUTPUT ",\n    /* sub-index %02x */\n", $i;
+        printf OUTPUT "%s", DUMP_ARRAY( "0x%04x", 0, @table );
+    }
+
+    # dump the 16-char subsets
+
+    printf OUTPUT ",\n    /* null mapping */\n";
+    printf OUTPUT "%s", DUMP_ARRAY( "0x%04x", 0, (0) x 32 );
+
+    for ($i = 0; $i < 4096; $i++)
+    {
+        next unless $filled[$i];
+        my @table = (0) x 32;
+        for ($j = 0; $j < 16; $j++)
+        {
+            if (defined $decomp_table[($i<<4) + $j])
+            {
+                $table[2 * $j] = ${$decomp_table[($i << 4) + $j]}[0];
+                $table[2 * $j + 1] = ${$decomp_table[($i << 4) + $j]}[1];
+            }
+        }
+        printf OUTPUT ",\n    /* 0x%03x0 .. 0x%03xf */\n", $i, $i;
+        printf OUTPUT "%s", DUMP_ARRAY( "0x%04x", 0, @table );
+    }
+
+    printf OUTPUT "\n};\n";
+    close OUTPUT;
+}
+
+
 ################################################################
 # read an input file and generate the corresponding .c file
 sub HANDLE_FILE
--- a/unicode/mbtowc.c
+++ b/unicode/mbtowc.c
@ -9,6 +9,23 @@
 #include "winnls.h"
 #include "wine/unicode.h"

+/* get the decomposition of a Unicode char */
+static int get_decomposition( WCHAR src, WCHAR *dst, unsigned int dstlen )
+{
+    extern const WCHAR unicode_decompose_table[];
+    const WCHAR *ptr = unicode_decompose_table;
+    int res;
+
+    *dst = src;
+    ptr = unicode_decompose_table + ptr[src >> 8];
+    ptr = unicode_decompose_table + ptr[(src >> 4) & 0x0f] + 2 * (src & 0x0f);
+    if (!*ptr) return 1;
+    if (dstlen <= 1) return 0;
+    /* apply the decomposition recursively to the first char */
+    if ((res = get_decomposition( *ptr, dst, dstlen-1 ))) dst[res++] = ptr[1];
+    return res;
+}
+
 /* check src string for invalid chars; return non-zero if invalid char found */
 static inline int check_invalid_chars_sbcs( const struct sbcs_table *table,
                                            const unsigned char *src, unsigned int srclen )
@ -70,6 +87,33 @@ static inline int mbstowcs_sbcs( const struct sbcs_table *table,
    }
 }

+/* mbstowcs for single-byte code page with char decomposition */
+static int mbstowcs_sbcs_decompose( const struct sbcs_table *table,
+                                    const unsigned char *src, unsigned int srclen,
+                                    WCHAR *dst, unsigned int dstlen )
+{
+    const WCHAR * const cp2uni = table->cp2uni;
+    unsigned int len;
+
+    if (!dstlen)  /* compute length */
+    {
+        WCHAR dummy[4]; /* no decomposition is larger than 4 chars */
+        for (len = 0; srclen; srclen--, src++)
+            len += get_decomposition( cp2uni[*src], dummy, 4 );
+        return len;
+    }
+
+    for (len = dstlen; srclen && len; srclen--, src++)
+    {
+        int res = get_decomposition( cp2uni[*src], dst, len );
+        if (!res) break;
+        len -= res;
+        dst += res;
+    }
+    if (srclen) return -1;  /* overflow */
+    return dstlen - len;
+}
+
 /* query necessary dst length for src string */
 static inline int get_length_dbcs( const struct dbcs_table *table,
                                   const unsigned char *src, unsigned int srclen )
@ -122,7 +166,9 @@ static inline int mbstowcs_dbcs( const struct dbcs_table *table,
 {
    const WCHAR * const cp2uni = table->cp2uni;
    const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes;
-    int len;
+    unsigned int len;
+
+    if (!dstlen) return get_length_dbcs( table, src, srclen );

    for (len = dstlen; srclen && len; len--, srclen--, src++, dst++)
    {
@ -140,6 +186,54 @@ static inline int mbstowcs_dbcs( const struct dbcs_table *table,
 }


+/* mbstowcs for double-byte code page with character decomposition */
+static int mbstowcs_dbcs_decompose( const struct dbcs_table *table,
+                                    const unsigned char *src, unsigned int srclen,
+                                    WCHAR *dst, unsigned int dstlen )
+{
+    const WCHAR * const cp2uni = table->cp2uni;
+    const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes;
+    unsigned int len;
+    WCHAR ch;
+    int res;
+
+    if (!dstlen)  /* compute length */
+    {
+        WCHAR dummy[4]; /* no decomposition is larger than 4 chars */
+        for (len = 0; srclen; srclen--, src++)
+        {
+            unsigned char off = cp2uni_lb[*src];
+            if (off)
+            {
+                if (!--srclen) break;  /* partial char, ignore it */
+                src++;
+                ch = cp2uni[(off << 8) + *src];
+            }
+            else ch = cp2uni[*src];
+            len += get_decomposition( ch, dummy, 4 );
+        }
+        return len;
+    }
+
+    for (len = dstlen; srclen && len; srclen--, src++)
+    {
+        unsigned char off = cp2uni_lb[*src];
+        if (off)
+        {
+            if (!--srclen) break;  /* partial char, ignore it */
+            src++;
+            ch = cp2uni[(off << 8) + *src];
+        }
+        else ch = cp2uni[*src];
+        if (!(res = get_decomposition( ch, dst, len ))) break;
+        dst += res;
+        len -= res;
+    }
+    if (srclen) return -1;  /* overflow */
+    return dstlen - len;
+}
+
+
 /* return -1 on dst buffer overflow, -2 on invalid input char */
 int cp_mbstowcs( const union cptable *table, int flags,
                 const char *src, int srclen,
@ -151,8 +245,12 @@ int cp_mbstowcs( const union cptable *table, int flags,
        {
            if (check_invalid_chars_sbcs( &table->sbcs, src, srclen )) return -2;
        }
-        if (!dstlen) return srclen;
-        return mbstowcs_sbcs( &table->sbcs, src, srclen, dst, dstlen );
+        if (!(flags & MB_COMPOSITE))
+        {
+            if (!dstlen) return srclen;
+            return mbstowcs_sbcs( &table->sbcs, src, srclen, dst, dstlen );
+        }
+        return mbstowcs_sbcs_decompose( &table->sbcs, src, srclen, dst, dstlen );
    }
    else /* mbcs */
    {
@ -160,7 +258,9 @@ int cp_mbstowcs( const union cptable *table, int flags,
        {
            if (check_invalid_chars_dbcs( &table->dbcs, src, srclen )) return -2;
        }
-        if (!dstlen) return get_length_dbcs( &table->dbcs, src, srclen );
-        return mbstowcs_dbcs( &table->dbcs, src, srclen, dst, dstlen );
+        if (!(flags & MB_COMPOSITE))
+            return mbstowcs_dbcs( &table->dbcs, src, srclen, dst, dstlen );
+        else
+            return mbstowcs_dbcs_decompose( &table->dbcs, src, srclen, dst, dstlen );
    }
 }
--- a/unicode/wctomb.c
+++ b/unicode/wctomb.c
@ -9,6 +9,90 @@
 #include "winnls.h"
 #include "wine/unicode.h"

+/* search for a character in the unicode_compose_table; helper for compose() */
+static inline int binary_search( WCHAR ch, int low, int high )
+{
+    extern const WCHAR unicode_compose_table[];
+    while (low <= high)
+    {
+        int pos = (low + high) / 2;
+        if (unicode_compose_table[2*pos] < ch)
+        {
+            low = pos + 1;
+            continue;
+        }
+        if (unicode_compose_table[2*pos] > ch)
+        {
+            high = pos - 1;
+            continue;
+        }
+        return pos;
+    }
+    return -1;
+}
+
+/* return the result of the composition of two Unicode chars, or 0 if none */
+static WCHAR compose( const WCHAR *str )
+{
+    extern const WCHAR unicode_compose_table[];
+    extern const unsigned int unicode_compose_table_size;
+
+    int idx = 1, low = 0, high = unicode_compose_table_size - 1;
+    for (;;)
+    {
+        int pos = binary_search( str[idx], low, high );
+        if (pos == -1) return 0;
+        if (!idx--) return unicode_compose_table[2*pos+1];
+        low = unicode_compose_table[2*pos+1];
+        high = unicode_compose_table[2*pos+3] - 1;
+    }
+}
+
+
+/****************************************************************/
+/* sbcs support */
+
+/* check if 'ch' is an acceptable sbcs mapping for 'wch' */
+static inline int is_valid_sbcs_mapping( const struct sbcs_table *table, int flags,
+                                         WCHAR wch, unsigned char ch )
+{
+    if (flags & WC_NO_BEST_FIT_CHARS) return (table->cp2uni[ch] == wch);
+    if (ch != (unsigned char)table->info.def_char) return 1;
+    return (wch == table->info.def_unicode_char);
+}
+
+/* query necessary dst length for src string */
+static inline int get_length_sbcs( const struct sbcs_table *table, int flags,
+                                   const WCHAR *src, unsigned int srclen )
+{
+    unsigned int ret = srclen;
+
+    if (flags & WC_COMPOSITECHECK)
+    {
+        const unsigned char  * const uni2cp_low = table->uni2cp_low;
+        const unsigned short * const uni2cp_high = table->uni2cp_high;
+        WCHAR composed;
+
+        for (ret = 0; srclen > 1; ret++, srclen--, src++)
+        {
+            if (!(composed = compose(src))) continue;
+            /* check if we should skip the next char */
+
+            /* in WC_DEFAULTCHAR and WC_DISCARDNS mode, we always skip */
+            /* the next char no matter if the composition is valid or not */
+            if (!(flags & (WC_DEFAULTCHAR|WC_DISCARDNS)))
+            {
+                unsigned char ch = uni2cp_low[uni2cp_high[composed >> 8] + (composed & 0xff)];
+                if (!is_valid_sbcs_mapping( table, flags, composed, ch )) continue;
+            }
+            src++;
+            srclen--;
+        }
+        if (srclen) ret++;  /* last char */
+    }
+    return ret;
+}
+
 /* wcstombs for single-byte code page */
 static inline int wcstombs_sbcs( const struct sbcs_table *table,
                                 const WCHAR *src, unsigned int srclen,
@ -61,49 +145,139 @@ static int wcstombs_sbcs_slow( const struct sbcs_table *table, int flags,
                               char *dst, unsigned int dstlen,
                               const char *defchar, int *used )
 {
-    const WCHAR * const cp2uni = table->cp2uni;
    const unsigned char  * const uni2cp_low = table->uni2cp_low;
    const unsigned short * const uni2cp_high = table->uni2cp_high;
    const unsigned char table_default = table->info.def_char & 0xff;
-    int ret = srclen, tmp;
-
-    if (dstlen < srclen)
-    {
-        /* buffer too small: fill it up to dstlen and return error */
-        srclen = dstlen;
-        ret = -1;
-    }
+    unsigned int len;
+    int tmp;
+    WCHAR composed;

    if (!defchar) defchar = &table_default;
    if (!used) used = &tmp;  /* avoid checking on every char */

-    while (srclen)
+    for (len = dstlen; srclen && len; dst++, len--, src++, srclen--)
    {
-        unsigned char ch = uni2cp_low[uni2cp_high[*src >> 8] + (*src & 0xff)];
-        if (((flags & WC_NO_BEST_FIT_CHARS) && (cp2uni[ch] != *src)) ||
-            (ch == table_default && *src != table->info.def_unicode_char))
+        WCHAR wch = *src;
+
+        if ((flags & WC_COMPOSITECHECK) && (srclen > 1) && (composed = compose(src)))
        {
-            ch = *defchar;
+            /* now check if we can use the composed char */
+            *dst = uni2cp_low[uni2cp_high[composed >> 8] + (composed & 0xff)];
+            if (is_valid_sbcs_mapping( table, flags, composed, *dst ))
+            {
+                /* we have a good mapping, use it */
+                src++;
+                srclen--;
+                continue;
+            }
+            /* no mapping for the composed char, check the other flags */
+            if (flags & WC_DEFAULTCHAR) /* use the default char instead */
+            {
+                *dst = *defchar;
+                *used = 1;
+                src++;  /* skip the non-spacing char */
+                srclen--;
+                continue;
+            }
+            if (flags & WC_DISCARDNS) /* skip the second char of the composition */
+            {
+                src++;
+                srclen--;
+            }
+            /* WC_SEPCHARS is the default */
+        }
+
+        *dst = uni2cp_low[uni2cp_high[wch >> 8] + (wch & 0xff)];
+        if (!is_valid_sbcs_mapping( table, flags, wch, *dst ))
+        {
+            *dst = *defchar;
            *used = 1;
        }
-        *dst++ = ch;
-        src++;
-        srclen--;
    }
-    return ret;
+    if (srclen) return -1;  /* overflow */
+    return dstlen - len;
+}
+
+
+/****************************************************************/
+/* dbcs support */
+
+/* check if 'ch' is an acceptable dbcs mapping for 'wch' */
+static inline int is_valid_dbcs_mapping( const struct dbcs_table *table, int flags,
+                                         WCHAR wch, unsigned short ch )
+{
+    if (ch == table->info.def_char && wch != table->info.def_unicode_char) return 0;
+    if (flags & WC_NO_BEST_FIT_CHARS)
+    {
+        /* check if char maps back to the same Unicode value */
+        if (ch & 0xff00)
+        {
+            unsigned char off = table->cp2uni_leadbytes[ch >> 8];
+            return (table->cp2uni[(off << 8) + (ch & 0xff)] == wch);
+        }
+        return (table->cp2uni[ch & 0xff] == wch);
+    }
+    return 1;
 }

 /* query necessary dst length for src string */
-static inline int get_length_dbcs( const struct dbcs_table *table,
-                                   const WCHAR *src, unsigned int srclen )
+static int get_length_dbcs( const struct dbcs_table *table, int flags,
+                            const WCHAR *src, unsigned int srclen,
+                            const char *defchar )
 {
    const unsigned short * const uni2cp_low = table->uni2cp_low;
    const unsigned short * const uni2cp_high = table->uni2cp_high;
+    WCHAR defchar_value = table->info.def_char;
+    WCHAR composed;
    int len;

-    for (len = 0; srclen; srclen--, src++, len++)
+    if (!defchar && !(flags & WC_COMPOSITECHECK))
    {
-        if (uni2cp_low[uni2cp_high[*src >> 8] + (*src & 0xff)] & 0xff00) len++;
+        for (len = 0; srclen; srclen--, src++, len++)
+        {
+            if (uni2cp_low[uni2cp_high[*src >> 8] + (*src & 0xff)] & 0xff00) len++;
+        }
+        return len;
+    }
+
+    if (defchar) defchar_value = defchar[1] ? ((defchar[0] << 8) | defchar[1]) : defchar[0];
+    for (len = 0; srclen; len++, srclen--, src++)
+    {
+        unsigned short res;
+        WCHAR wch = *src;
+
+        if ((flags & WC_COMPOSITECHECK) && (srclen > 1) && (composed = compose(src)))
+        {
+            /* now check if we can use the composed char */
+            res = uni2cp_low[uni2cp_high[composed >> 8] + (composed & 0xff)];
+
+            if (is_valid_dbcs_mapping( table, flags, composed, res ))
+            {
+                /* we have a good mapping for the composed char, use it */
+                if (res & 0xff00) len++;
+                src++;
+                srclen--;
+                continue;
+            }
+            /* no mapping for the composed char, check the other flags */
+            if (flags & WC_DEFAULTCHAR) /* use the default char instead */
+            {
+                if (defchar_value & 0xff00) len++;
+                src++;  /* skip the non-spacing char */
+                srclen--;
+                continue;
+            }
+            if (flags & WC_DISCARDNS) /* skip the second char of the composition */
+            {
+                src++;
+                srclen--;
+            }
+            /* WC_SEPCHARS is the default */
+        }
+
+        res = uni2cp_low[uni2cp_high[wch >> 8] + (wch & 0xff)];
+        if (!is_valid_dbcs_mapping( table, flags, wch, res )) res = defchar_value;
+        if (res & 0xff00) len++;
    }
    return len;
 }
@ -138,11 +312,10 @@ static int wcstombs_dbcs_slow( const struct dbcs_table *table, int flags,
                               char *dst, unsigned int dstlen,
                               const char *defchar, int *used )
 {
-    const WCHAR * const cp2uni = table->cp2uni;
    const unsigned short * const uni2cp_low = table->uni2cp_low;
    const unsigned short * const uni2cp_high = table->uni2cp_high;
-    const unsigned char  * const cp2uni_lb = table->cp2uni_leadbytes;
    WCHAR defchar_value = table->info.def_char;
+    WCHAR composed;
    int len, tmp;

    if (defchar) defchar_value = defchar[1] ? ((defchar[0] << 8) | defchar[1]) : defchar[0];
@ -150,32 +323,46 @@ static int wcstombs_dbcs_slow( const struct dbcs_table *table, int flags,

    for (len = dstlen; srclen && len; len--, srclen--, src++)
    {
-        unsigned short res = uni2cp_low[uni2cp_high[*src >> 8] + (*src & 0xff)];
+        unsigned short res;
+        WCHAR wch = *src;

-        if (res == table->info.def_char && *src != table->info.def_unicode_char)
+        if ((flags & WC_COMPOSITECHECK) && (srclen > 1) && (composed = compose(src)))
+        {
+            /* now check if we can use the composed char */
+            res = uni2cp_low[uni2cp_high[composed >> 8] + (composed & 0xff)];
+
+            if (is_valid_dbcs_mapping( table, flags, composed, res ))
+            {
+                /* we have a good mapping for the composed char, use it */
+                src++;
+                srclen--;
+                goto output_char;
+            }
+            /* no mapping for the composed char, check the other flags */
+            if (flags & WC_DEFAULTCHAR) /* use the default char instead */
+            {
+                res = defchar_value;
+                *used = 1;
+                src++;  /* skip the non-spacing char */
+                srclen--;
+                goto output_char;
+            }
+            if (flags & WC_DISCARDNS) /* skip the second char of the composition */
+            {
+                src++;
+                srclen--;
+            }
+            /* WC_SEPCHARS is the default */
+        }
+
+        res = uni2cp_low[uni2cp_high[wch >> 8] + (wch & 0xff)];
+        if (!is_valid_dbcs_mapping( table, flags, wch, res ))
        {
            res = defchar_value;
            *used = 1;
        }
-        else if (flags & WC_NO_BEST_FIT_CHARS)
-        {
-            /* check if char maps back to the same Unicode value */
-            if (res & 0xff00)
-            {
-                unsigned char off = cp2uni_lb[res >> 8];
-                if (cp2uni[(off << 8) + (res & 0xff)] != *src)
-                {
-                    res = defchar_value;
-                    *used = 1;
-                }
-            }
-            else if (cp2uni[res & 0xff] != *src)
-            {
-                res = defchar_value;
-                *used = 1;
-            }
-        }

+    output_char:
        if (res & 0xff00)
        {
            if (len == 1) break;  /* do not output a partial char */
@ -196,7 +383,7 @@ int cp_wcstombs( const union cptable *table, int flags,
 {
    if (table->info.char_size == 1)
    {
-        if (!dstlen) return srclen;
+        if (!dstlen) return get_length_sbcs( &table->sbcs, flags, src, srclen );
        if (flags || defchar || used)
            return wcstombs_sbcs_slow( &table->sbcs, flags, src, srclen,
                                       dst, dstlen, defchar, used );
@ -204,7 +391,7 @@ int cp_wcstombs( const union cptable *table, int flags,
    }
    else /* mbcs */
    {
-        if (!dstlen) return get_length_dbcs( &table->dbcs, src, srclen );
+        if (!dstlen) return get_length_dbcs( &table->dbcs, flags, src, srclen, defchar );
        if (flags || defchar || used)
            return wcstombs_dbcs_slow( &table->dbcs, flags, src, srclen,
                                       dst, dstlen, defchar, used );