Skip to content

Commit 2d291d9

Browse files
Merge pull request livecode#6794 from livecode/bytesoffset-bug
[[ ByteOffset ]] Fixed bug causing incorrect results
2 parents 784ab55 + 7b63f69 commit 2d291d9

File tree

3 files changed

+136
-4
lines changed

3 files changed

+136
-4
lines changed

docs/notes/bugfix-byteoffset.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
# Fixed bug causing byteOffset to behave incorectly in certain cases

libfoundation/src/foundation-data.cpp

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -798,11 +798,18 @@ bool MCDataFirstIndexOf(MCDataRef p_data, MCDataRef p_chunk, MCRange p_range, ui
798798
if (p_range.length == 0)
799799
return false;
800800

801-
uindex_t t_limit, t_chunk_byte_count;
801+
uindex_t t_limit, t_chunk_byte_count, t_data_byte_count;
802802
t_chunk_byte_count = MCDataGetLength(p_chunk);
803+
t_data_byte_count = MCDataGetLength(p_data);
804+
803805
if (t_chunk_byte_count == 0)
804806
return false;
805-
t_limit = p_range . offset + p_range . length - t_chunk_byte_count + 1;
807+
else if (t_data_byte_count < t_chunk_byte_count)
808+
return false;
809+
else if (p_range.length < t_chunk_byte_count)
810+
return false;
811+
812+
t_limit = p_range . offset + t_data_byte_count- t_chunk_byte_count + 1;
806813

807814
const byte_t *t_bytes = MCDataGetBytePtr(p_data);
808815
const byte_t *t_chunk_bytes = MCDataGetBytePtr(p_chunk);

tests/lcs/core/strings/byteoffset.livecodescript

Lines changed: 126 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
script "CoreStringsByteoffset"
1+
script "CoreStringsByteoffset"
22
/*
33
Copyright (C) 2016 LiveCode Ltd.
44
@@ -16,6 +16,38 @@ for more details.
1616
You should have received a copy of the GNU General Public License
1717
along with LiveCode. If not see <http://www.gnu.org/licenses/>. */
1818

19+
// Function provided by Bernd Niggemann as part of bug report
20+
// #21704
21+
// We have changed the implementation to use byteOffset rather than offset
22+
function allOffsets pDelim, pString, pCaseSensitive, pNoOverlap
23+
local tNewPos, tPos, tResult, tLen
24+
25+
set the caseSensitive to pCaseSensitive is true
26+
27+
put 0 into tPos
28+
if pNoOverlap then
29+
put the len of pDelim - 1 into tLen
30+
repeat forever
31+
put byteoffset(pDelim, pString, tPos) into tNewPos
32+
if tNewPos = 0 then exit repeat
33+
add tNewPos to tPos
34+
put tPos,"" after tResult
35+
add tLen to tPos
36+
end repeat
37+
else
38+
repeat forever
39+
put byteoffset(pDelim, pString, tPos) into tNewPos
40+
if tNewPos = 0 then exit repeat
41+
add tNewPos to tPos
42+
put tPos,"" after tResult
43+
end repeat
44+
end if
45+
46+
if tResult is empty then return 0
47+
else return char 1 to -2 of tResult
48+
end allOffsets
49+
50+
1951
on TestByteOffsetNonEmptyNeedleEmptyHaystack
2052
local tNeedle, tHaystack
2153
put "DEF" into tNeedle
@@ -53,4 +85,96 @@ on TestByteOffsetEmptyNeedleNonEmptyHaystack
5385

5486
TestAssert "byteoffset of empty needle in non-empty haystack", byteoffset(tNeedle, tHaystack) is 0
5587

56-
end TestByteOffsetEmptyNeedleNonEmptyHaystack
88+
end TestByteOffsetEmptyNeedleNonEmptyHaystack
89+
90+
on TestByteOffsetBinaryEncodingShouldNotGiveAnswer
91+
local tNeedle, tHaystack
92+
put "Rejkyavik" into tNeedle
93+
put "R" into tHaystack
94+
95+
TestAssert "byteoffset of needler longer than haystack should return 0", byteoffset(tNeedle, tHaystack) is 0
96+
end TestByteOffsetBinaryEncodingShouldNotGiveAnswer
97+
98+
on TestByteOffsetBinaryEncodingShouldGiveAnswer
99+
local tNeedle, tHaystack
100+
put "j" into tNeedle
101+
put "Rejkyavik" into tHaystack
102+
103+
TestAssert "byteoffset of non-empty needle in non-empty haystack", byteoffset(tNeedle, tHaystack) is 3
104+
end TestByteOffsetBinaryEncodingShouldGiveAnswer
105+
106+
on TestByteOffsetBinaryEncodingShouldNotGiveAnswerEncoded
107+
local tNeedle, tHaystack
108+
put textEncode("Rejkyavik", "UTF-32") into tNeedle
109+
put textEncode("R", "UTF-32") into tHaystack
110+
111+
TestAssert "byteoffset of needler longer than haystack should return 0", byteoffset(tNeedle, tHaystack) is 0
112+
end TestByteOffsetBinaryEncodingShouldNotGiveAnswerEncoded
113+
114+
on TestByteOffsetBinaryEncodingShouldGiveAnswerEncoded
115+
local tNeedle, tHaystack
116+
put textEncode("j", "UTF-32") into tNeedle
117+
put textEncode("Rejkyavik", "UTF-32") into tHaystack
118+
119+
TestAssert "byteoffset of non-empty needle in non-empty haystack", byteoffset(tNeedle, tHaystack) is 9
120+
end TestByteOffsetBinaryEncodingShouldGiveAnswerEncoded
121+
122+
123+
on TestByteOffsetBinaryEncodingShouldGiveAnswerEncodedChineseChars
124+
local tNeedle, tHaystack
125+
put textEncode("aaaaaaa", "UTF-32") into tNeedle
126+
put textEncode("𠜎aa𠜎aaa ばいしんせい ばいしんせい aaaaaaaaaaaaaaaaaaa c aaaaaaaaaaaaaaaaaaa", "UTF-32") into tHaystack
127+
128+
local tExpectedResult
129+
put "89,93,97,101,105,109,113,117,121,125,129,133,137,177,181,185,189,193,197,201,205,209,213,217,221,225" into tExpectedResult
130+
131+
TestAssert "byteoffset of non-empty needle in non-empty haystack with chinese characters", allOffsets(tNeedle, tHaystack, true, false) is tExpectedResult
132+
end TestByteOffsetBinaryEncodingShouldGiveAnswerEncodedChineseChars
133+
134+
// Test whether we can correctly detect offsets in strings that contain
135+
// native unicode chars, in a case in-sensitive manner, in a different text
136+
on TestOffsetMultipleUnicodeCharsInSensitiveText2
137+
local tNeedle, tHaystack
138+
139+
put textEncode("aaa", "UTF-32") into tNeedle
140+
put textEncode("aa 𠜎 aaaaaaaaaaaaa↘𠜎aaaa", "UTF-32") into tHaystack
141+
142+
local tExpectedOffsets
143+
put "21,25,29,33,37,41,45,49,53,57,61,81,85" into tExpectedOffsets
144+
145+
put allOffsets(tNeedle, tHaystack, true, false)
146+
147+
TestAssert "the offsets match the expected offsets", tExpectedOffsets is allOffsets(tNeedle, tHaystack, true, false)
148+
end TestOffsetMultipleUnicodeCharsInSensitiveText2
149+
150+
// Test whether we can correctly detect offsets in strings that contain
151+
// native unicode chars, in a case in-sensitive manner
152+
on TestOffsetMultipleUnicodeCharsInSensitive2
153+
local tNeedle, tHaystack
154+
155+
put textEncode("aaa", "UTF-32") into tNeedle
156+
put textEncode("aa 𠜎 aaaaaaaaAAAaa↘𠜎aaaa", "UTF-32") into tHaystack
157+
158+
local tExpectedOffsets
159+
put "21,25,29,33,37,41,45,49,53,57,61,81,85" into tExpectedOffsets
160+
161+
put allOffsets(tNeedle, tHaystack, false, false)
162+
163+
TestAssert "the offsets match the expected offsets", tExpectedOffsets is not allOffsets(tNeedle, tHaystack, false, false)
164+
end TestOffsetMultipleUnicodeCharsInSensitive2
165+
166+
// Test whether we can correctly detect offsets in strings that contain
167+
// native unicode chars, in a case sensitive manner
168+
on TestOffsetMultipleUnicodeCharsInSensitive3
169+
local tNeedle, tHaystack
170+
171+
put textEncode("𐀁", "UTF-32") into tNeedle
172+
put textEncode("𐀁𐀁𐀁𐀁𐀁𐀁", "UTF-32") into tHaystack
173+
174+
local tExpectedOffsets
175+
put "1,3,5,7,9,11,13,15,17,19,21" into tExpectedOffsets
176+
177+
put allOffsets(tNeedle, tHaystack, false, false)
178+
179+
TestAssert "the offsets match the expected offsets", tExpectedOffsets is allOffsets(tNeedle, tHaystack, true, false)
180+
end TestOffsetMultipleUnicodeCharsInSensitive3

0 commit comments

Comments
 (0)