unicode: Expand surrogates in decomposition tables.

Signed-off-by: Alexandre Julliard <julliard@winehq.org>
feature/deterministic
Alexandre Julliard 2020-02-04 14:36:21 +01:00
parent cd8eaef47e
commit 0576fa43c4
3 changed files with 518 additions and 507 deletions

View File

@ -4,7 +4,7 @@
#include "windef.h"
const WCHAR DECLSPEC_HIDDEN nfd_table[6061] =
const WCHAR DECLSPEC_HIDDEN nfd_table[6075] =
{
/* index */
0x0110, 0x0120, 0x0130, 0x0140, 0x0150, 0x0100, 0x0160, 0x0100,
@ -569,42 +569,42 @@ const WCHAR DECLSPEC_HIDDEN nfd_table[6061] =
0x16ee, 0x16ef, 0x16f0, 0x16f1, 0x16f2, 0x16f3, 0x16f4, 0x16f5,
/* offsets 0xfa60 .. 0xfa6f */
0x16f6, 0x16f7, 0x16f8, 0x16f9, 0x16fa, 0x16fb, 0x16fc, 0x16fd,
0x16fe, 0x16ff, 0x1700, 0x1701, 0x1702, 0x1702, 0x1703, 0x1703,
0x16fe, 0x16ff, 0x1700, 0x1701, 0x1702, 0x1704, 0x1705, 0x1705,
/* offsets 0xfa70 .. 0xfa7f */
0x1703, 0x1704, 0x1705, 0x1706, 0x1707, 0x1708, 0x1709, 0x170a,
0x170b, 0x170c, 0x170d, 0x170e, 0x170f, 0x1710, 0x1711, 0x1712,
0x1705, 0x1706, 0x1707, 0x1708, 0x1709, 0x170a, 0x170b, 0x170c,
0x170d, 0x170e, 0x170f, 0x1710, 0x1711, 0x1712, 0x1713, 0x1714,
/* offsets 0xfa80 .. 0xfa8f */
0x1713, 0x1714, 0x1715, 0x1716, 0x1717, 0x1718, 0x1719, 0x171a,
0x171b, 0x171c, 0x171d, 0x171e, 0x171f, 0x1720, 0x1721, 0x1722,
0x1715, 0x1716, 0x1717, 0x1718, 0x1719, 0x171a, 0x171b, 0x171c,
0x171d, 0x171e, 0x171f, 0x1720, 0x1721, 0x1722, 0x1723, 0x1724,
/* offsets 0xfa90 .. 0xfa9f */
0x1723, 0x1724, 0x1725, 0x1726, 0x1727, 0x1728, 0x1729, 0x172a,
0x172b, 0x172c, 0x172d, 0x172e, 0x172f, 0x1730, 0x1731, 0x1732,
0x1725, 0x1726, 0x1727, 0x1728, 0x1729, 0x172a, 0x172b, 0x172c,
0x172d, 0x172e, 0x172f, 0x1730, 0x1731, 0x1732, 0x1733, 0x1734,
/* offsets 0xfaa0 .. 0xfaaf */
0x1733, 0x1734, 0x1735, 0x1736, 0x1737, 0x1738, 0x1739, 0x173a,
0x173b, 0x173c, 0x173d, 0x173e, 0x173f, 0x1740, 0x1741, 0x1742,
0x1735, 0x1736, 0x1737, 0x1738, 0x1739, 0x173a, 0x173b, 0x173c,
0x173d, 0x173e, 0x173f, 0x1740, 0x1741, 0x1742, 0x1743, 0x1744,
/* offsets 0xfab0 .. 0xfabf */
0x1743, 0x1744, 0x1745, 0x1746, 0x1747, 0x1748, 0x1749, 0x174a,
0x174b, 0x174c, 0x174d, 0x174e, 0x174f, 0x1750, 0x1751, 0x1752,
0x1745, 0x1746, 0x1747, 0x1748, 0x1749, 0x174a, 0x174b, 0x174c,
0x174d, 0x174e, 0x174f, 0x1750, 0x1751, 0x1752, 0x1753, 0x1754,
/* offsets 0xfac0 .. 0xfacf */
0x1753, 0x1754, 0x1755, 0x1756, 0x1757, 0x1758, 0x1759, 0x175a,
0x175b, 0x175c, 0x175d, 0x175e, 0x175f, 0x1760, 0x1761, 0x1762,
0x1755, 0x1756, 0x1757, 0x1758, 0x1759, 0x175a, 0x175b, 0x175c,
0x175d, 0x175e, 0x175f, 0x1760, 0x1761, 0x1762, 0x1763, 0x1764,
/* offsets 0xfad0 .. 0xfadf */
0x1762, 0x1762, 0x1762, 0x1763, 0x1764, 0x1765, 0x1765, 0x1765,
0x1765, 0x1766, 0x1767, 0x1767, 0x1767, 0x1767, 0x1767, 0x1767,
0x1766, 0x1768, 0x176a, 0x176b, 0x176c, 0x176d, 0x176f, 0x1771,
0x1773, 0x1774, 0x1775, 0x1775, 0x1775, 0x1775, 0x1775, 0x1775,
/* offsets 0xfb10 .. 0xfb1f */
0x1767, 0x1767, 0x1767, 0x1767, 0x1767, 0x1767, 0x1767, 0x1767,
0x1767, 0x1767, 0x1767, 0x1767, 0x1767, 0x1767, 0x1769, 0x1769,
0x1775, 0x1775, 0x1775, 0x1775, 0x1775, 0x1775, 0x1775, 0x1775,
0x1775, 0x1775, 0x1775, 0x1775, 0x1775, 0x1775, 0x1777, 0x1777,
/* offsets 0xfb20 .. 0xfb2f */
0x176b, 0x176b, 0x176b, 0x176b, 0x176b, 0x176b, 0x176b, 0x176b,
0x176b, 0x176b, 0x176b, 0x176d, 0x176f, 0x1772, 0x1775, 0x1777,
0x1779, 0x1779, 0x1779, 0x1779, 0x1779, 0x1779, 0x1779, 0x1779,
0x1779, 0x1779, 0x1779, 0x177b, 0x177d, 0x1780, 0x1783, 0x1785,
/* offsets 0xfb30 .. 0xfb3f */
0x1779, 0x177b, 0x177d, 0x177f, 0x1781, 0x1783, 0x1785, 0x1787,
0x1787, 0x1789, 0x178b, 0x178d, 0x178f, 0x1791, 0x1791, 0x1793,
0x1787, 0x1789, 0x178b, 0x178d, 0x178f, 0x1791, 0x1793, 0x1795,
0x1795, 0x1797, 0x1799, 0x179b, 0x179d, 0x179f, 0x179f, 0x17a1,
/* offsets 0xfb40 .. 0xfb4f */
0x1793, 0x1795, 0x1797, 0x1797, 0x1799, 0x179b, 0x179b, 0x179d,
0x179f, 0x17a1, 0x17a3, 0x17a5, 0x17a7, 0x17a9, 0x17ab, 0x17ad,
0x17a1, 0x17a3, 0x17a5, 0x17a5, 0x17a7, 0x17a9, 0x17a9, 0x17ab,
0x17ad, 0x17af, 0x17b1, 0x17b3, 0x17b5, 0x17b7, 0x17b9, 0x17bb,
/* offset sentinel */
0x17ad,
0x17bb,
/* data */
0x0041, 0x0300, 0x0041, 0x0301, 0x0041, 0x0302, 0x0041, 0x0303,
0x0041, 0x0308, 0x0041, 0x030a, 0x0043, 0x0327, 0x0045, 0x0300,
@ -934,26 +934,28 @@ const WCHAR DECLSPEC_HIDDEN nfd_table[6061] =
0x798e, 0x7a40, 0x7a81, 0x7bc0, 0x7df4, 0x7e09, 0x7e41, 0x7f72,
0x8005, 0x81ed, 0x8279, 0x8279, 0x8457, 0x8910, 0x8996, 0x8b01,
0x8b39, 0x8cd3, 0x8d08, 0x8fb6, 0x9038, 0x96e3, 0x97ff, 0x983b,
0x6075, 0x8218, 0x4e26, 0x51b5, 0x5168, 0x4f80, 0x5145, 0x5180,
0x52c7, 0x52fa, 0x559d, 0x5555, 0x5599, 0x55e2, 0x585a, 0x58b3,
0x5944, 0x5954, 0x5a62, 0x5b28, 0x5ed2, 0x5ed9, 0x5f69, 0x5fad,
0x60d8, 0x614e, 0x6108, 0x618e, 0x6160, 0x61f2, 0x6234, 0x63c4,
0x641c, 0x6452, 0x6556, 0x6674, 0x6717, 0x671b, 0x6756, 0x6b79,
0x6bba, 0x6d41, 0x6edb, 0x6ecb, 0x6f22, 0x701e, 0x716e, 0x77a7,
0x7235, 0x72af, 0x732a, 0x7471, 0x7506, 0x753b, 0x761d, 0x761f,
0x76ca, 0x76db, 0x76f4, 0x774a, 0x7740, 0x78cc, 0x7ab1, 0x7bc0,
0x7c7b, 0x7d5b, 0x7df4, 0x7f3e, 0x8005, 0x8352, 0x83ef, 0x8779,
0x8941, 0x8986, 0x8996, 0x8abf, 0x8af8, 0x8acb, 0x8b01, 0x8afe,
0x8aed, 0x8b39, 0x8b8a, 0x8d08, 0x8f38, 0x9072, 0x9199, 0x9276,
0x967c, 0x96e3, 0x9756, 0x97db, 0x97ff, 0x980b, 0x983b, 0x9b12,
0x9f9c, 0x3b9d, 0x4018, 0x4039, 0x9f43, 0x9f8e, 0x05d9, 0x05b4,
0x05f2, 0x05b7, 0x05e9, 0x05c1, 0x05e9, 0x05c2, 0x05e9, 0x05bc,
0x05c1, 0x05e9, 0x05bc, 0x05c2, 0x05d0, 0x05b7, 0x05d0, 0x05b8,
0x05d0, 0x05bc, 0x05d1, 0x05bc, 0x05d2, 0x05bc, 0x05d3, 0x05bc,
0x05d4, 0x05bc, 0x05d5, 0x05bc, 0x05d6, 0x05bc, 0x05d8, 0x05bc,
0x05d9, 0x05bc, 0x05da, 0x05bc, 0x05db, 0x05bc, 0x05dc, 0x05bc,
0x05de, 0x05bc, 0x05e0, 0x05bc, 0x05e1, 0x05bc, 0x05e3, 0x05bc,
0x05e4, 0x05bc, 0x05e6, 0x05bc, 0x05e7, 0x05bc, 0x05e8, 0x05bc,
0x05e9, 0x05bc, 0x05ea, 0x05bc, 0x05d5, 0x05b9, 0x05d1, 0x05bf,
0x05db, 0x05bf, 0x05e4, 0x05bf
0x6075, 0xd850, 0xdeee, 0x8218, 0x4e26, 0x51b5, 0x5168, 0x4f80,
0x5145, 0x5180, 0x52c7, 0x52fa, 0x559d, 0x5555, 0x5599, 0x55e2,
0x585a, 0x58b3, 0x5944, 0x5954, 0x5a62, 0x5b28, 0x5ed2, 0x5ed9,
0x5f69, 0x5fad, 0x60d8, 0x614e, 0x6108, 0x618e, 0x6160, 0x61f2,
0x6234, 0x63c4, 0x641c, 0x6452, 0x6556, 0x6674, 0x6717, 0x671b,
0x6756, 0x6b79, 0x6bba, 0x6d41, 0x6edb, 0x6ecb, 0x6f22, 0x701e,
0x716e, 0x77a7, 0x7235, 0x72af, 0x732a, 0x7471, 0x7506, 0x753b,
0x761d, 0x761f, 0x76ca, 0x76db, 0x76f4, 0x774a, 0x7740, 0x78cc,
0x7ab1, 0x7bc0, 0x7c7b, 0x7d5b, 0x7df4, 0x7f3e, 0x8005, 0x8352,
0x83ef, 0x8779, 0x8941, 0x8986, 0x8996, 0x8abf, 0x8af8, 0x8acb,
0x8b01, 0x8afe, 0x8aed, 0x8b39, 0x8b8a, 0x8d08, 0x8f38, 0x9072,
0x9199, 0x9276, 0x967c, 0x96e3, 0x9756, 0x97db, 0x97ff, 0x980b,
0x983b, 0x9b12, 0x9f9c, 0xd84a, 0xdc4a, 0xd84a, 0xdc44, 0xd84c,
0xdfd5, 0x3b9d, 0x4018, 0x4039, 0xd854, 0xde49, 0xd857, 0xdcd0,
0xd85f, 0xded3, 0x9f43, 0x9f8e, 0x05d9, 0x05b4, 0x05f2, 0x05b7,
0x05e9, 0x05c1, 0x05e9, 0x05c2, 0x05e9, 0x05bc, 0x05c1, 0x05e9,
0x05bc, 0x05c2, 0x05d0, 0x05b7, 0x05d0, 0x05b8, 0x05d0, 0x05bc,
0x05d1, 0x05bc, 0x05d2, 0x05bc, 0x05d3, 0x05bc, 0x05d4, 0x05bc,
0x05d5, 0x05bc, 0x05d6, 0x05bc, 0x05d8, 0x05bc, 0x05d9, 0x05bc,
0x05da, 0x05bc, 0x05db, 0x05bc, 0x05dc, 0x05bc, 0x05de, 0x05bc,
0x05e0, 0x05bc, 0x05e1, 0x05bc, 0x05e3, 0x05bc, 0x05e4, 0x05bc,
0x05e6, 0x05bc, 0x05e7, 0x05bc, 0x05e8, 0x05bc, 0x05e9, 0x05bc,
0x05ea, 0x05bc, 0x05d5, 0x05b9, 0x05d1, 0x05bf, 0x05db, 0x05bf,
0x05e4, 0x05bf
};

File diff suppressed because it is too large Load Diff

View File

@ -428,6 +428,14 @@ my %joining_forms =
"medial" => []
);
sub get_utf16($)
{
my $ch = shift;
return $ch if ($ch < 0x10000);
$ch -= 0x10000;
return ( 0xd800 | ($ch >> 10), 0xdc00 | ($ch & 0x3ff) );
}
################################################################
# fetch a unicode.org file and open it
sub open_data_file($$)
@ -453,7 +461,7 @@ sub get_decomposition($$)
my ($char, $table) = @_;
my @ret;
return $char unless defined ${$table}[$char];
return get_utf16($char) unless defined ${$table}[$char];
foreach my $ch (@{${$table}[$char]})
{
push @ret, get_decomposition( $ch, $table );
@ -614,10 +622,7 @@ sub load_data($)
elsif ($decomp =~ /^([0-9a-fA-F]+)$/)
{
# Single char decomposition
if (hex $1 < 65536)
{
$decomp_table[$src] = $decomp_compat_table[$src] = [ hex $1 ];
}
$decomp_table[$src] = $decomp_compat_table[$src] = [ hex $1 ];
if (($src >= 0xf900 && $src < 0xfb00) || ($src >= 0xfe30 && $src < 0xfffd))
{