1- script "CoreStringsByteoffset"
1+ script "CoreStringsByteoffset"
22/*
33Copyright (C) 2016 LiveCode Ltd.
44
@@ -16,6 +16,38 @@ for more details.
1616You should have received a copy of the GNU General Public License
1717along with LiveCode. If not see <http://www.gnu.org/licenses/>. */
1818
19+ // Function provided by Bernd Niggemann as part of bug report
20+ // #21704
21+ // We have changed the implementation to use byteOffset rather than offset
22+ function allOffsets pDelim, pString, pCaseSensitive, pNoOverlap
23+ local tNewPos , tPos , tResult , tLen
24+
25+ set the caseSensitive to pCaseSensitive is true
26+
27+ put 0 into tPos
28+ if pNoOverlap then
29+ put the len of pDelim - 1 into tLen
30+ repeat forever
31+ put byteoffset (pDelim , pString , tPos ) into tNewPos
32+ if tNewPos = 0 then exit repeat
33+ add tNewPos to tPos
34+ put tPos ,"" after tResult
35+ add tLen to tPos
36+ end repeat
37+ else
38+ repeat forever
39+ put byteoffset (pDelim , pString , tPos ) into tNewPos
40+ if tNewPos = 0 then exit repeat
41+ add tNewPos to tPos
42+ put tPos ,"" after tResult
43+ end repeat
44+ end if
45+
46+ if tResult is empty then return 0
47+ else return char 1 to - 2 of tResult
48+ end allOffsets
49+
50+
1951on TestByteOffsetNonEmptyNeedleEmptyHaystack
2052 local tNeedle , tHaystack
2153 put "DEF" into tNeedle
@@ -53,4 +85,96 @@ on TestByteOffsetEmptyNeedleNonEmptyHaystack
5385
5486 TestAssert "byteoffset of empty needle in non-empty haystack" , byteoffset (tNeedle , tHaystack ) is 0
5587
56- end TestByteOffsetEmptyNeedleNonEmptyHaystack
88+ end TestByteOffsetEmptyNeedleNonEmptyHaystack
89+
90+ on TestByteOffsetBinaryEncodingShouldNotGiveAnswer
91+ local tNeedle , tHaystack
92+ put "Rejkyavik" into tNeedle
93+ put "R" into tHaystack
94+
95+ TestAssert "byteoffset of needler longer than haystack should return 0" , byteoffset (tNeedle , tHaystack ) is 0
96+ end TestByteOffsetBinaryEncodingShouldNotGiveAnswer
97+
98+ on TestByteOffsetBinaryEncodingShouldGiveAnswer
99+ local tNeedle , tHaystack
100+ put "j" into tNeedle
101+ put "Rejkyavik" into tHaystack
102+
103+ TestAssert "byteoffset of non-empty needle in non-empty haystack" , byteoffset (tNeedle , tHaystack ) is 3
104+ end TestByteOffsetBinaryEncodingShouldGiveAnswer
105+
106+ on TestByteOffsetBinaryEncodingShouldNotGiveAnswerEncoded
107+ local tNeedle , tHaystack
108+ put textEncode ("Rejkyavik" , "UTF-32" ) into tNeedle
109+ put textEncode ("R" , "UTF-32" ) into tHaystack
110+
111+ TestAssert "byteoffset of needler longer than haystack should return 0" , byteoffset (tNeedle , tHaystack ) is 0
112+ end TestByteOffsetBinaryEncodingShouldNotGiveAnswerEncoded
113+
114+ on TestByteOffsetBinaryEncodingShouldGiveAnswerEncoded
115+ local tNeedle , tHaystack
116+ put textEncode ("j" , "UTF-32" ) into tNeedle
117+ put textEncode ("Rejkyavik" , "UTF-32" ) into tHaystack
118+
119+ TestAssert "byteoffset of non-empty needle in non-empty haystack" , byteoffset (tNeedle , tHaystack ) is 9
120+ end TestByteOffsetBinaryEncodingShouldGiveAnswerEncoded
121+
122+
123+ on TestByteOffsetBinaryEncodingShouldGiveAnswerEncodedChineseChars
124+ local tNeedle , tHaystack
125+ put textEncode ("aaaaaaa" , "UTF-32" ) into tNeedle
126+ put textEncode ("𠜎aa𠜎aaa ばいしんせい ばいしんせい aaaaaaaaaaaaaaaaaaa c aaaaaaaaaaaaaaaaaaa" , "UTF-32" ) into tHaystack
127+
128+ local tExpectedResult
129+ put "89,93,97,101,105,109,113,117,121,125,129,133,137,177,181,185,189,193,197,201,205,209,213,217,221,225" into tExpectedResult
130+
131+ TestAssert "byteoffset of non-empty needle in non-empty haystack with chinese characters" , allOffsets(tNeedle , tHaystack , true , false ) is tExpectedResult
132+ end TestByteOffsetBinaryEncodingShouldGiveAnswerEncodedChineseChars
133+
134+ // Test whether we can correctly detect offsets in strings that contain
135+ // native unicode chars, in a case in-sensitive manner, in a different text
136+ on TestOffsetMultipleUnicodeCharsInSensitiveText2
137+ local tNeedle , tHaystack
138+
139+ put textEncode ("aaa" , "UTF-32" ) into tNeedle
140+ put textEncode ("aa 𠜎 aaaaaaaaaaaaa↘𠜎aaaa" , "UTF-32" ) into tHaystack
141+
142+ local tExpectedOffsets
143+ put "21,25,29,33,37,41,45,49,53,57,61,81,85" into tExpectedOffsets
144+
145+ put allOffsets(tNeedle , tHaystack , true , false )
146+
147+ TestAssert "the offsets match the expected offsets" , tExpectedOffsets is allOffsets(tNeedle , tHaystack , true , false )
148+ end TestOffsetMultipleUnicodeCharsInSensitiveText2
149+
150+ // Test whether we can correctly detect offsets in strings that contain
151+ // native unicode chars, in a case in-sensitive manner
152+ on TestOffsetMultipleUnicodeCharsInSensitive2
153+ local tNeedle , tHaystack
154+
155+ put textEncode ("aaa" , "UTF-32" ) into tNeedle
156+ put textEncode ("aa 𠜎 aaaaaaaaAAAaa↘𠜎aaaa" , "UTF-32" ) into tHaystack
157+
158+ local tExpectedOffsets
159+ put "21,25,29,33,37,41,45,49,53,57,61,81,85" into tExpectedOffsets
160+
161+ put allOffsets(tNeedle , tHaystack , false , false )
162+
163+ TestAssert "the offsets match the expected offsets" , tExpectedOffsets is not allOffsets(tNeedle , tHaystack , false , false )
164+ end TestOffsetMultipleUnicodeCharsInSensitive2
165+
166+ // Test whether we can correctly detect offsets in strings that contain
167+ // native unicode chars, in a case sensitive manner
168+ on TestOffsetMultipleUnicodeCharsInSensitive3
169+ local tNeedle , tHaystack
170+
171+ put textEncode ("𐀁" , "UTF-32" ) into tNeedle
172+ put textEncode ("𐀁𐀁𐀁𐀁𐀁𐀁" , "UTF-32" ) into tHaystack
173+
174+ local tExpectedOffsets
175+ put "1,3,5,7,9,11,13,15,17,19,21" into tExpectedOffsets
176+
177+ put allOffsets(tNeedle , tHaystack , false , false )
178+
179+ TestAssert "the offsets match the expected offsets" , tExpectedOffsets is allOffsets(tNeedle , tHaystack , true , false )
180+ end TestOffsetMultipleUnicodeCharsInSensitive3
0 commit comments