@@ -15,6 +15,55 @@ describe('pfx()', () => {
1515 expect ( pfx ( '👨🍳👨🍳' , '👨🍳' ) ) . toEqual ( 5 ) ;
1616 expect ( '👨🍳chef' . slice ( 0 , 5 ) ) . toBe ( '👨🍳' ) ;
1717 } ) ;
18+
19+ test ( 'handles grapheme clusters with ZWJ (Zero Width Joiner)' , ( ) => {
20+ const family = '👨👩👧👦' ;
21+ expect ( pfx ( family , family ) ) . toEqual ( 11 ) ;
22+ expect ( pfx ( family + 'abc' , family ) ) . toEqual ( 11 ) ;
23+ expect ( pfx ( family + 'abc' , family + 'xyz' ) ) . toEqual ( 11 ) ;
24+ expect ( pfx ( 'prefix' + family , 'prefix' + family ) ) . toEqual ( 6 + 11 ) ;
25+ const womanTech = '👩🏽💻' ;
26+ expect ( pfx ( womanTech , womanTech ) ) . toEqual ( 7 ) ;
27+ expect ( pfx ( womanTech + 'code' , womanTech ) ) . toEqual ( 7 ) ;
28+ expect ( pfx ( 'hello' + womanTech , 'hello' + womanTech ) ) . toEqual ( 5 + 7 ) ;
29+ } ) ;
30+
31+ test ( 'handles flag emojis (regional indicators)' , ( ) => {
32+ const usFlag = '🇺🇸' ;
33+ const ukFlag = '🇬🇧' ;
34+ expect ( pfx ( usFlag , usFlag ) ) . toEqual ( 4 ) ;
35+ expect ( pfx ( usFlag + 'USA' , usFlag ) ) . toEqual ( 4 ) ;
36+ expect ( pfx ( usFlag , ukFlag ) ) . toEqual ( 0 ) ;
37+ expect ( pfx ( 'hello' + usFlag , 'hello' + usFlag ) ) . toEqual ( 5 + 4 ) ;
38+ } ) ;
39+
40+ test ( 'handles combining diacritical marks' , ( ) => {
41+ const combining = 'e\u0301' ; // e + combining acute accent
42+ expect ( pfx ( combining , combining ) ) . toEqual ( 2 ) ;
43+ expect ( pfx ( combining + 'llo' , combining ) ) . toEqual ( 2 ) ;
44+ expect ( pfx ( 'hello' + combining , 'hello' + combining ) ) . toEqual ( 5 + 2 ) ;
45+
46+ // Multiple combining marks
47+ const multiCombining = 'a\u0301\u0302\u0303' ;
48+ expect ( pfx ( multiCombining , multiCombining ) ) . toEqual ( 4 ) ;
49+ } ) ;
50+
51+ test ( 'handles variation selectors' , ( ) => {
52+ const heartText = '❤\uFE0E' ; // text style
53+ const heartEmoji = '❤\uFE0F' ; // emoji style
54+ expect ( pfx ( heartText , heartText ) ) . toEqual ( 2 ) ;
55+ expect ( pfx ( heartEmoji , heartEmoji ) ) . toEqual ( 2 ) ;
56+ expect ( pfx ( heartText , heartEmoji ) ) . toEqual ( 1 ) ; // Only the base character matches
57+ } ) ;
58+
59+ test ( 'handles mixed grapheme clusters' , ( ) => {
60+ const chef = '👨🍳' ;
61+ const family = '👨👩👧👦' ;
62+ const combined = chef + family ;
63+ expect ( pfx ( combined , combined ) ) . toEqual ( 16 ) ;
64+ expect ( pfx ( combined + 'text' , combined ) ) . toEqual ( 16 ) ;
65+ expect ( pfx ( 'abc' + combined , 'abc' + combined ) ) . toEqual ( 3 + 16 ) ;
66+ } ) ;
1867} ) ;
1968
2069describe ( 'sfx()' , ( ) => {
@@ -32,6 +81,68 @@ describe('sfx()', () => {
3281 expect ( sfx ( 'chef👨🍳' , '👨🍳' ) ) . toEqual ( 5 ) ;
3382 expect ( sfx ( '👨🍳👨🍳' , '👨🍳' ) ) . toEqual ( 5 ) ;
3483 } ) ;
84+
85+ test ( 'handles grapheme clusters with ZWJ (Zero Width Joiner)' , ( ) => {
86+ const family = '👨👩👧👦' ;
87+ expect ( sfx ( family , family ) ) . toEqual ( 11 ) ;
88+ expect ( sfx ( 'abc' + family , family ) ) . toEqual ( 11 ) ;
89+ expect ( sfx ( 'xyz' + family , 'abc' + family ) ) . toEqual ( 11 ) ;
90+ expect ( sfx ( family + 'suffix' , family + 'suffix' ) ) . toEqual ( 6 + 11 ) ;
91+ const womanTech = '👩🏽💻' ;
92+ expect ( sfx ( womanTech , womanTech ) ) . toEqual ( 7 ) ;
93+ expect ( sfx ( 'code' + womanTech , womanTech ) ) . toEqual ( 7 ) ;
94+ expect ( sfx ( womanTech + 'hello' , womanTech + 'hello' ) ) . toEqual ( 5 + 7 ) ;
95+ } ) ;
96+
97+ test ( 'handles flag emojis (regional indicators)' , ( ) => {
98+ const usFlag = '🇺🇸' ;
99+ const ukFlag = '🇬🇧' ;
100+ expect ( sfx ( usFlag , usFlag ) ) . toEqual ( 4 ) ;
101+ expect ( sfx ( 'USA' + usFlag , usFlag ) ) . toEqual ( 4 ) ;
102+ expect ( sfx ( usFlag , ukFlag ) ) . toEqual ( 0 ) ;
103+ expect ( sfx ( usFlag + 'hello' , usFlag + 'hello' ) ) . toEqual ( 5 + 4 ) ;
104+ } ) ;
105+
106+ test ( 'handles combining diacritical marks' , ( ) => {
107+ const combining = 'e\u0301' ; // e + combining acute accent
108+ expect ( sfx ( combining , combining ) ) . toEqual ( 2 ) ;
109+ expect ( sfx ( 'ell' + combining , combining ) ) . toEqual ( 2 ) ;
110+ expect ( sfx ( combining + 'hello' , combining + 'hello' ) ) . toEqual ( 5 + 2 ) ;
111+ const multiCombining = 'a\u0301\u0302\u0303' ; // a with multiple accents
112+ expect ( sfx ( multiCombining , multiCombining ) ) . toEqual ( 4 ) ;
113+ expect ( sfx ( 'text' + multiCombining , multiCombining ) ) . toEqual ( 4 ) ;
114+ } ) ;
115+
116+ test ( 'handles variation selectors' , ( ) => {
117+ const heartText = '❤\uFE0E' ; // text style
118+ const heartEmoji = '❤\uFE0F' ; // emoji style
119+ expect ( sfx ( heartText , heartText ) ) . toEqual ( 2 ) ;
120+ expect ( sfx ( heartEmoji , heartEmoji ) ) . toEqual ( 2 ) ;
121+ expect ( sfx ( heartText , heartEmoji ) ) . toEqual ( 0 ) ;
122+ expect ( sfx ( 'love' + heartEmoji , heartEmoji ) ) . toEqual ( 2 ) ;
123+ } ) ;
124+
125+ test ( 'handles mixed grapheme clusters' , ( ) => {
126+ const chef = '👨🍳' ;
127+ const family = '👨👩👧👦' ;
128+ const combined = family + chef ;
129+ expect ( sfx ( combined , combined ) ) . toEqual ( 16 ) ;
130+ expect ( sfx ( 'text' + combined , combined ) ) . toEqual ( 16 ) ;
131+ expect ( sfx ( combined + 'abc' , combined + 'abc' ) ) . toEqual ( 3 + 16 ) ;
132+ } ) ;
133+
134+ test ( 'does not split grapheme clusters at boundaries' , ( ) => {
135+ const chef = '👨🍳' ;
136+ const family = '👨👩👧👦' ;
137+
138+ // Ensure we don't split in the middle of a grapheme cluster
139+ expect ( sfx ( 'x' + chef , chef ) ) . toEqual ( 5 ) ; // full chef emoji
140+ expect ( sfx ( 'xy' + family , family ) ) . toEqual ( 11 ) ; // full family emoji
141+
142+ // When the suffix is part of a larger grapheme, it should not match partially
143+ expect ( sfx ( '👨🍳👩' , '👩' ) ) . toEqual ( 2 ) ; // Just the woman emoji at end
144+ expect ( sfx ( 'text👨🍳' , '👨🍳' ) ) . toEqual ( 5 ) ; // Full chef emoji
145+ } ) ;
35146} ) ;
36147
37148describe ( 'normalize()' , ( ) => {
@@ -273,6 +384,81 @@ describe('diff()', () => {
273384 assertPatch ( 'a🙃b' , 'a👋b' ) ;
274385 } ) ;
275386
387+ test ( 'grapheme clusters with ZWJ (Zero Width Joiner)' , ( ) => {
388+ const chef = '👨🍳' ;
389+ const family = '👨👩👧👦' ;
390+ const womanTech = '👩🏽💻' ;
391+ assertPatch ( chef , family ) ;
392+ assertPatch ( family , chef ) ;
393+ assertPatch ( womanTech , chef ) ;
394+ assertPatch ( 'hello' , 'hello' + chef ) ;
395+ assertPatch ( 'hello' , chef + 'hello' ) ;
396+ assertPatch ( 'hello world' , 'hello' + family + 'world' ) ;
397+ assertPatch ( 'hello' + chef , 'hello' ) ;
398+ assertPatch ( chef + 'hello' , 'hello' ) ;
399+ assertPatch ( 'hello' + family + 'world' , 'helloworld' ) ;
400+ assertPatch ( chef + family , family + chef ) ;
401+ assertPatch ( 'a' + chef + 'b' + family + 'c' , 'x' + family + 'y' + chef + 'z' ) ;
402+ assertPatch ( 'The ' + chef + ' cooks' , 'A ' + chef + ' bakes' ) ;
403+ assertPatch ( 'Team: ' + family , 'Group: ' + womanTech ) ;
404+ } ) ;
405+
406+ test ( 'flag emojis (regional indicators)' , ( ) => {
407+ const ruFlag = '🇷🇺' ;
408+ const chFlag = '🇨🇳' ;
409+ const inFlag = '🇮🇳' ;
410+ assertPatch ( ruFlag , chFlag ) ;
411+ assertPatch ( chFlag , inFlag ) ;
412+ assertPatch ( 'Made in ' + ruFlag , 'Made in ' + chFlag ) ;
413+ assertPatch ( ruFlag + ' USA' , chFlag + ' UK' ) ;
414+ assertPatch ( 'Hello ' + ruFlag + ' world' , 'Hello ' + inFlag + ' world' ) ;
415+ assertPatch ( ruFlag + chFlag , chFlag + ruFlag ) ;
416+ assertPatch ( 'Flags: ' + ruFlag + chFlag + inFlag , 'Flags: ' + inFlag + chFlag + ruFlag ) ;
417+ } ) ;
418+
419+ test ( 'combining diacritical marks' , ( ) => {
420+ const combining1 = 'e\u0301' ;
421+ const combining2 = 'e\u0300' ;
422+ const precomposed = 'é' ;
423+ assertPatch ( combining1 , combining2 ) ;
424+ assertPatch ( combining1 , precomposed ) ;
425+ assertPatch ( precomposed , combining1 ) ;
426+ assertPatch ( 'cafe\u0301' , 'café' ) ;
427+ assertPatch ( 'naïve' , 'naive' ) ;
428+ assertPatch ( 'résumé' , 'resume' ) ;
429+ const multiCombining = 'a\u0301\u0302\u0303' ;
430+ assertPatch ( 'test' + multiCombining , 'test' ) ;
431+ assertPatch ( 'test' , 'test' + multiCombining ) ;
432+ } ) ;
433+
434+ test ( 'variation selectors' , ( ) => {
435+ const heartText = '❤\uFE0E' ; // text style
436+ const heartEmoji = '❤\uFE0F' ; // emoji style
437+ assertPatch ( heartText , heartEmoji ) ;
438+ assertPatch ( heartEmoji , heartText ) ;
439+ assertPatch ( 'I ' + heartText + ' code' , 'I ' + heartEmoji + ' code' ) ;
440+ assertPatch ( 'Love ' + heartEmoji , 'Love ' + heartText ) ;
441+ } ) ;
442+
443+ test ( 'complex grapheme clusters in real scenarios' , ( ) => {
444+ const chef = '👨🍳' ;
445+ const family = '👨👩👧👦' ;
446+ const womanTech = '👩🏽💻' ;
447+ const usFlag = '🇺🇸' ;
448+ assertPatch (
449+ 'Hey ' + chef + ', dinner ready?' ,
450+ 'Hi ' + womanTech + ', code ready?'
451+ ) ;
452+ assertPatch (
453+ family + ' going to ' + usFlag ,
454+ family + ' staying home'
455+ ) ;
456+ assertPatch (
457+ 'The ' + chef + ' from ' + usFlag + ' is amazing' ,
458+ 'A ' + womanTech + ' from ' + usFlag + ' is brilliant'
459+ ) ;
460+ } ) ;
461+
276462 test ( 'same strings' , ( ) => {
277463 assertPatch ( '' , '' ) ;
278464 assertPatch ( '1' , '1' ) ;
@@ -364,6 +550,33 @@ describe('diffEdit()', () => {
364550 assertDiffEdit ( 'aaa' , 'bbb' , 'ccc' ) ;
365551 assertDiffEdit ( '1' , '2' , '3' ) ;
366552 } ) ;
553+
554+ test ( 'handles grapheme cluster inserts and deletes' , ( ) => {
555+ const chef = '👨🍳' ;
556+ const family = '👨👩👧👦' ;
557+ const womanTech = '👩🏽💻' ;
558+ const usFlag = '🇺🇸' ;
559+
560+ // Insert grapheme clusters
561+ assertDiffEdit ( '' , chef , '' ) ;
562+ assertDiffEdit ( 'Hello ' , chef , '' ) ;
563+ assertDiffEdit ( '' , chef , ' world' ) ;
564+ assertDiffEdit ( 'Hello ' , chef , ' world' ) ;
565+ assertDiffEdit ( 'Team: ' , family , ' rocks!' ) ;
566+
567+ // Insert multiple grapheme clusters
568+ assertDiffEdit ( '' , chef + family , '' ) ;
569+ assertDiffEdit ( 'Coders: ' , womanTech + chef , ' win' ) ;
570+
571+ // Insert with flags
572+ assertDiffEdit ( 'Made in ' , usFlag , '' ) ;
573+ assertDiffEdit ( '' , usFlag , ' USA' ) ;
574+
575+ // Combining characters
576+ const combining = 'e\u0301' ;
577+ assertDiffEdit ( 'caf' , combining , '' ) ;
578+ assertDiffEdit ( '' , combining , ' accent' ) ;
579+ } ) ;
367580} ) ;
368581
369582describe ( 'overlap()' , ( ) => {
@@ -386,6 +599,21 @@ describe('overlap()', () => {
386599 expect ( overlap ( 'abc' , 'abc' ) ) . toEqual ( 3 ) ;
387600 expect ( overlap ( 'a' , 'a' ) ) . toEqual ( 1 ) ;
388601 } ) ;
602+
603+ test ( 'handles grapheme clusters' , ( ) => {
604+ const chef = '👨🍳' ;
605+ const family = '👨👩👧👦' ;
606+
607+ // Overlap with grapheme clusters
608+ expect ( overlap ( 'hello' + chef , chef + 'world' ) ) . toEqual ( 5 ) ;
609+ expect ( overlap ( 'abc' + family , family + 'xyz' ) ) . toEqual ( 11 ) ;
610+
611+ // No overlap when grapheme differs
612+ expect ( overlap ( 'hello' + chef , family + 'world' ) ) . toEqual ( 0 ) ;
613+
614+ // Text overlap with grapheme clusters
615+ expect ( overlap ( 'prefix' + chef , chef + 'suffix' ) ) . toEqual ( 5 ) ;
616+ } ) ;
389617} ) ;
390618
391619describe ( 'Unicode edge cases' , ( ) => {
0 commit comments