Skip to content

Commit b319634

Browse files
committed
[[ Bug 20504 ]] Fix __MCStringCheck function
This function corrects the __MCStringCheck function when the input string contains a decomposed char sequence which can be mapped to a composed char in the native encoding. Additionally, it corrects the ISO8850-1 'native pair' mapping table ensuring that decomposable native chars in the encoding map back to their composed counterparts.
1 parent 92d42ae commit b319634

File tree

4 files changed

+58
-12
lines changed

4 files changed

+58
-12
lines changed

docs/notes/bugfix-20504.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
# Fetching a char of a string can cause incorrect comparisons later on

libfoundation/src/foundation-string.cpp

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6841,9 +6841,26 @@ static void __MCStringCheck(MCStringRef self)
68416841

68426842
if (!MCUnicodeIsGraphemeClusterBoundary(self -> chars[i], self -> chars[i + 1]))
68436843
{
6844-
__MCStringSetFlags(self, kMCStringFlagNoChange, false, false);
6845-
t_can_be_native = false;
6846-
break;
6844+
/* There is no boundary between i and i+1, so check that the pair of
6845+
* codeunits won't map to native (e.g. e,acute).
6846+
* If they do map to native, check that there is a boundary after
6847+
* the second codeunit - otherwise it is more than 2 cu sequence which
6848+
* cannot be native. */
6849+
char_t t_native_char;
6850+
if (!MCUnicodeMapToNative(self->chars+i, 2, t_native_char) ||
6851+
(i+1 < self->char_count &&
6852+
!MCUnicodeIsGraphemeClusterBoundary(self->chars[i+1], self->chars[i+2])))
6853+
{
6854+
__MCStringSetFlags(self, kMCStringFlagNoChange, false, false);
6855+
t_can_be_native = false;
6856+
break;
6857+
}
6858+
6859+
/* At this point i is the first codeunit of a 2-codeunit combining
6860+
* sequence which maps to a native, so we can skip the combiner. */
6861+
i++;
6862+
6863+
continue;
68476864
}
68486865

68496866
char_t t_native;

libfoundation/src/foundation-unicode.cpp

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1743,15 +1743,27 @@ bool MCUnicodeMapToNativePair_MacRoman(uinteger_t x, uinteger_t y, char_t& r_cha
17431743

17441744
bool MCUnicodeMapToNativePair_ISO8859_1(uinteger_t x, uinteger_t y, char_t& r_char)
17451745
{
1746-
static const uinteger_t s_pairs[] =
1747-
{
1748-
/* S */ 0xA653030C,
1749-
/* Y */ 0xBE590308,
1750-
/* Z */ 0x8E5A030C,
1751-
/* s */ 0xA873030C,
1752-
/* z */ 0xB87A030C
1753-
};
1754-
1746+
static const uinteger_t s_pairs[] =
1747+
{
1748+
/* A */ 0xC0410300, 0xC1410301, 0xC2410302, 0xC3410303, 0xC4410308, 0xC541030A,
1749+
/* C */ 0xC7430327,
1750+
/* E */ 0xC8450300, 0xC9450301, 0xCA450302, 0xCB450308,
1751+
/* I */ 0xCC490300, 0xCD490301, 0xCE490302, 0xCF490308,
1752+
/* N */ 0xD14E0303,
1753+
/* O */ 0xD24F0300, 0xD34F0301, 0xD44F0302, 0xD54F0303, 0xD64F0308,
1754+
/* U */ 0xD9550300, 0xDA550301, 0xDB550302, 0xDC550308,
1755+
/* Y */ 0xDD590301,
1756+
1757+
/* a */ 0xE0610300, 0xE1610301, 0xE2610302, 0xE3610303, 0xE4610308, 0xE561030A,
1758+
/* c */ 0xE7630327,
1759+
/* e */ 0xE8650300, 0xE9650301, 0xEA650302, 0xEB650308,
1760+
/* i */ 0xEC690300, 0xED690301, 0xEE690302, 0xEF690308,
1761+
/* n */ 0xF16E0303,
1762+
/* i */ 0xF26F0300, 0xF36F0301, 0xF46F0302, 0xF56F0303, 0xF66F0308,
1763+
/* u */ 0xF9750300, 0xFA750301, 0xFB750302, 0xFC750308,
1764+
/* y */ 0xFD790301, 0xFF790308,
1765+
};
1766+
17551767
uinteger_t z;
17561768
z = (x << 16) | y;
17571769

libfoundation/test/test_string.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,3 +174,19 @@ TEST(string, surrogate_unicode_props)
174174
const int kSPUA_B_Upper = 0x10FFFD + 1; // non-inclusive
175175
check_bidi_of_surrogate_range(kSPUA_B_Lower, kSPUA_B_Upper);
176176
}
177+
178+
TEST(string, normalize_compare)
179+
{
180+
MCAutoStringRef t_decomposed;
181+
MCStringCreateWithWString((const unichar_t*)u"\u0065\u0301\u0065\u0301\u0065\u0301", &t_decomposed);
182+
183+
MCAutoStringRef t_composed;
184+
MCStringCreateWithWString((const unichar_t*)u"\u00e9\u00e9\u00e9", &t_composed);
185+
186+
ASSERT_TRUE(MCStringIsEqualTo(*t_decomposed, *t_composed, kMCStringOptionCompareCaseless));
187+
188+
MCRange t_range;
189+
MCStringMapGraphemeIndices(*t_decomposed, MCRangeMake(0, 1), t_range);
190+
191+
ASSERT_TRUE(MCStringIsEqualTo(*t_decomposed, *t_composed, kMCStringOptionCompareCaseless));
192+
}

0 commit comments

Comments
 (0)