@@ -6,9 +6,11 @@ import { monoStringToString, utf16ToString } from "../strings";
6
6
import { MonoObject , MonoObjectRef , MonoString , MonoStringRef } from "../types/internal" ;
7
7
import { Int32Ptr } from "../types/emscripten" ;
8
8
import { wrap_error_root , wrap_no_error_root } from "../invoke-js" ;
9
+ import { GraphemeSegmenter } from "./grapheme-segmenter" ;
9
10
10
11
const COMPARISON_ERROR = - 2 ;
11
12
const INDEXING_ERROR = - 1 ;
13
+ let graphemeSegmenterCached : GraphemeSegmenter | null ;
12
14
13
15
export function mono_wasm_compare_string ( culture : MonoStringRef , str1 : number , str1Length : number , str2 : number , str2Length : number , options : number , is_exception : Int32Ptr , ex_address : MonoObjectRef ) : number {
14
16
const cultureRoot = mono_wasm_new_external_root < MonoString > ( culture ) ,
@@ -20,7 +22,7 @@ export function mono_wasm_compare_string(culture: MonoStringRef, str1: number, s
20
22
const casePicker = ( options & 0x1f ) ;
21
23
const locale = cultureName ? cultureName : undefined ;
22
24
wrap_no_error_root ( is_exception , exceptionRoot ) ;
23
- return compare_strings ( string1 , string2 , locale , casePicker ) ;
25
+ return compareStrings ( string1 , string2 , locale , casePicker ) ;
24
26
}
25
27
catch ( ex : any ) {
26
28
wrap_error_root ( is_exception , ex , exceptionRoot ) ;
@@ -37,19 +39,19 @@ export function mono_wasm_starts_with(culture: MonoStringRef, str1: number, str1
37
39
exceptionRoot = mono_wasm_new_external_root < MonoObject > ( ex_address ) ;
38
40
try {
39
41
const cultureName = monoStringToString ( cultureRoot ) ;
40
- const prefix = decode_to_clean_string ( str2 , str2Length ) ;
42
+ const prefix = decodeToCleanString ( str2 , str2Length ) ;
41
43
// no need to look for an empty string
42
44
if ( prefix . length == 0 )
43
45
return 1 ; // true
44
46
45
- const source = decode_to_clean_string ( str1 , str1Length ) ;
47
+ const source = decodeToCleanString ( str1 , str1Length ) ;
46
48
if ( source . length < prefix . length )
47
49
return 0 ; //false
48
50
const sourceOfPrefixLength = source . slice ( 0 , prefix . length ) ;
49
51
50
52
const casePicker = ( options & 0x1f ) ;
51
53
const locale = cultureName ? cultureName : undefined ;
52
- const result = compare_strings ( sourceOfPrefixLength , prefix , locale , casePicker ) ;
54
+ const result = compareStrings ( sourceOfPrefixLength , prefix , locale , casePicker ) ;
53
55
wrap_no_error_root ( is_exception , exceptionRoot ) ;
54
56
return result === 0 ? 1 : 0 ; // equals ? true : false
55
57
}
@@ -68,19 +70,19 @@ export function mono_wasm_ends_with(culture: MonoStringRef, str1: number, str1Le
68
70
exceptionRoot = mono_wasm_new_external_root < MonoObject > ( ex_address ) ;
69
71
try {
70
72
const cultureName = monoStringToString ( cultureRoot ) ;
71
- const suffix = decode_to_clean_string ( str2 , str2Length ) ;
73
+ const suffix = decodeToCleanString ( str2 , str2Length ) ;
72
74
if ( suffix . length == 0 )
73
75
return 1 ; // true
74
76
75
- const source = decode_to_clean_string ( str1 , str1Length ) ;
77
+ const source = decodeToCleanString ( str1 , str1Length ) ;
76
78
const diff = source . length - suffix . length ;
77
79
if ( diff < 0 )
78
80
return 0 ; //false
79
81
const sourceOfSuffixLength = source . slice ( diff , source . length ) ;
80
82
81
83
const casePicker = ( options & 0x1f ) ;
82
84
const locale = cultureName ? cultureName : undefined ;
83
- const result = compare_strings ( sourceOfSuffixLength , suffix , locale , casePicker ) ;
85
+ const result = compareStrings ( sourceOfSuffixLength , suffix , locale , casePicker ) ;
84
86
wrap_no_error_root ( is_exception , exceptionRoot ) ;
85
87
return result === 0 ? 1 : 0 ; // equals ? true : false
86
88
}
@@ -100,68 +102,57 @@ export function mono_wasm_index_of(culture: MonoStringRef, needlePtr: number, ne
100
102
try {
101
103
const needle = utf16ToString ( < any > needlePtr , < any > ( needlePtr + 2 * needleLength ) ) ;
102
104
// no need to look for an empty string
103
- if ( clean_string ( needle ) . length == 0 ) {
105
+ if ( cleanString ( needle ) . length == 0 ) {
104
106
wrap_no_error_root ( is_exception , exceptionRoot ) ;
105
107
return fromBeginning ? 0 : srcLength ;
106
108
}
107
109
108
110
const source = utf16ToString ( < any > srcPtr , < any > ( srcPtr + 2 * srcLength ) ) ;
109
111
// no need to look in an empty string
110
- if ( clean_string ( source ) . length == 0 ) {
112
+ if ( cleanString ( source ) . length == 0 ) {
111
113
wrap_no_error_root ( is_exception , exceptionRoot ) ;
112
114
return fromBeginning ? 0 : srcLength ;
113
115
}
114
116
const cultureName = monoStringToString ( cultureRoot ) ;
115
117
const locale = cultureName ? cultureName : undefined ;
116
118
const casePicker = ( options & 0x1f ) ;
117
-
118
- const segmenter = new Intl . Segmenter ( locale , { granularity : "grapheme" } ) ;
119
- const needleSegments = Array . from ( segmenter . segment ( needle ) ) . map ( s => s . segment ) ;
120
- let i = 0 ;
121
- let stop = false ;
122
119
let result = - 1 ;
123
- let segmentWidth = 0 ;
124
- let index = 0 ;
125
- let nextIndex = 0 ;
126
- while ( ! stop ) {
127
- // we need to restart the iterator in this outer loop because we have shifted it in the inner loop
128
- const iteratorSrc = segmenter . segment ( source . slice ( i , source . length ) ) [ Symbol . iterator ] ( ) ;
129
- let srcNext = iteratorSrc . next ( ) ;
130
120
131
- if ( srcNext . done )
132
- break ;
121
+ const graphemeSegmenter = graphemeSegmenterCached || ( graphemeSegmenterCached = new GraphemeSegmenter ( ) ) ;
122
+ const needleSegments = [ ] ;
123
+ let needleIdx = 0 ;
124
+
125
+ // Grapheme segmentation of needle string
126
+ while ( needleIdx < needle . length ) {
127
+ const needleGrapheme = graphemeSegmenter . nextGrapheme ( needle , needleIdx ) ;
128
+ needleSegments . push ( needleGrapheme ) ;
129
+ needleIdx += needleGrapheme . length ;
130
+ }
131
+
132
+ let srcIdx = 0 ;
133
+ while ( srcIdx < source . length ) {
134
+ const srcGrapheme = graphemeSegmenter . nextGrapheme ( source , srcIdx ) ;
135
+ srcIdx += srcGrapheme . length ;
133
136
134
- let matchFound = check_match_found ( srcNext . value . segment , needleSegments [ 0 ] , locale , casePicker ) ;
135
- index = nextIndex ;
136
- srcNext = iteratorSrc . next ( ) ;
137
- if ( srcNext . done ) {
138
- result = matchFound ? index : result ;
139
- break ;
137
+ if ( ! checkMatchFound ( srcGrapheme , needleSegments [ 0 ] , locale , casePicker ) ) {
138
+ continue ;
140
139
}
141
- segmentWidth = srcNext . value . index ;
142
- nextIndex = index + segmentWidth ;
143
- if ( matchFound ) {
144
- for ( let j = 1 ; j < needleSegments . length ; j ++ ) {
145
- if ( srcNext . done ) {
146
- stop = true ;
147
- break ;
148
- }
149
- matchFound = check_match_found ( srcNext . value . segment , needleSegments [ j ] , locale , casePicker ) ;
150
- if ( ! matchFound )
151
- break ;
152
140
153
- srcNext = iteratorSrc . next ( ) ;
154
- }
155
- if ( stop )
141
+ let j ;
142
+ let srcNextIdx = srcIdx ;
143
+ for ( j = 1 ; j < needleSegments . length ; j ++ ) {
144
+ const srcGrapheme = graphemeSegmenter . nextGrapheme ( source , srcNextIdx ) ;
145
+
146
+ if ( ! checkMatchFound ( srcGrapheme , needleSegments [ j ] , locale , casePicker ) ) {
156
147
break ;
148
+ }
149
+ srcNextIdx += srcGrapheme . length ;
157
150
}
158
-
159
- if ( matchFound ) {
160
- result = index ;
151
+ if ( j === needleSegments . length ) {
152
+ result = srcIdx - srcGrapheme . length ;
161
153
if ( fromBeginning )
162
154
break ;
163
155
}
164
- i = nextIndex ;
165
156
}
166
157
wrap_no_error_root ( is_exception , exceptionRoot ) ;
167
158
return result ;
@@ -175,12 +166,12 @@ export function mono_wasm_index_of(culture: MonoStringRef, needlePtr: number, ne
175
166
exceptionRoot . release ( ) ;
176
167
}
177
168
178
- function check_match_found ( str1 : string , str2 : string , locale : string | undefined , casePicker : number ) : boolean {
179
- return compare_strings ( str1 , str2 , locale , casePicker ) === 0 ;
169
+ function checkMatchFound ( str1 : string , str2 : string , locale : string | undefined , casePicker : number ) : boolean {
170
+ return compareStrings ( str1 , str2 , locale , casePicker ) === 0 ;
180
171
}
181
172
}
182
173
183
- function compare_strings ( string1 : string , string2 : string , locale : string | undefined , casePicker : number ) : number {
174
+ function compareStrings ( string1 : string , string2 : string , locale : string | undefined , casePicker : number ) : number {
184
175
switch ( casePicker ) {
185
176
case 0 :
186
177
// 0: None - default algorithm for the platform OR
@@ -272,12 +263,12 @@ function compare_strings(string1: string, string2: string, locale: string | unde
272
263
}
273
264
}
274
265
275
- function decode_to_clean_string ( strPtr : number , strLen : number ) {
266
+ function decodeToCleanString ( strPtr : number , strLen : number ) {
276
267
const str = utf16ToString ( < any > strPtr , < any > ( strPtr + 2 * strLen ) ) ;
277
- return clean_string ( str ) ;
268
+ return cleanString ( str ) ;
278
269
}
279
270
280
- function clean_string ( str : string ) {
271
+ function cleanString ( str : string ) {
281
272
const nStr = str . normalize ( ) ;
282
273
return nStr . replace ( / [ \u200B - \u200D \uFEFF \0 ] / g, "" ) ;
283
274
}
0 commit comments