Skip to content

Commit 28145d3

Browse files
committed
test: 💍 add more diff tests
1 parent dd5c5c9 commit 28145d3

File tree

1 file changed

+228
-0
lines changed

1 file changed

+228
-0
lines changed

packages/json-joy/src/util/diff/__tests__/str.spec.ts

Lines changed: 228 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,55 @@ describe('pfx()', () => {
1515
expect(pfx('👨‍🍳👨‍🍳', '👨‍🍳')).toEqual(5);
1616
expect('👨‍🍳chef'.slice(0, 5)).toBe('👨‍🍳');
1717
});
18+
19+
test('handles grapheme clusters with ZWJ (Zero Width Joiner)', () => {
20+
const family = '👨‍👩‍👧‍👦';
21+
expect(pfx(family, family)).toEqual(11);
22+
expect(pfx(family + 'abc', family)).toEqual(11);
23+
expect(pfx(family + 'abc', family + 'xyz')).toEqual(11);
24+
expect(pfx('prefix' + family, 'prefix' + family)).toEqual(6 + 11);
25+
const womanTech = '👩🏽‍💻';
26+
expect(pfx(womanTech, womanTech)).toEqual(7);
27+
expect(pfx(womanTech + 'code', womanTech)).toEqual(7);
28+
expect(pfx('hello' + womanTech, 'hello' + womanTech)).toEqual(5 + 7);
29+
});
30+
31+
test('handles flag emojis (regional indicators)', () => {
32+
const usFlag = '🇺🇸';
33+
const ukFlag = '🇬🇧';
34+
expect(pfx(usFlag, usFlag)).toEqual(4);
35+
expect(pfx(usFlag + 'USA', usFlag)).toEqual(4);
36+
expect(pfx(usFlag, ukFlag)).toEqual(0);
37+
expect(pfx('hello' + usFlag, 'hello' + usFlag)).toEqual(5 + 4);
38+
});
39+
40+
test('handles combining diacritical marks', () => {
41+
const combining = 'e\u0301'; // e + combining acute accent
42+
expect(pfx(combining, combining)).toEqual(2);
43+
expect(pfx(combining + 'llo', combining)).toEqual(2);
44+
expect(pfx('hello' + combining, 'hello' + combining)).toEqual(5 + 2);
45+
46+
// Multiple combining marks
47+
const multiCombining = 'a\u0301\u0302\u0303';
48+
expect(pfx(multiCombining, multiCombining)).toEqual(4);
49+
});
50+
51+
test('handles variation selectors', () => {
52+
const heartText = '❤\uFE0E'; // text style
53+
const heartEmoji = '❤\uFE0F'; // emoji style
54+
expect(pfx(heartText, heartText)).toEqual(2);
55+
expect(pfx(heartEmoji, heartEmoji)).toEqual(2);
56+
expect(pfx(heartText, heartEmoji)).toEqual(1); // Only the base character matches
57+
});
58+
59+
test('handles mixed grapheme clusters', () => {
60+
const chef = '👨‍🍳';
61+
const family = '👨‍👩‍👧‍👦';
62+
const combined = chef + family;
63+
expect(pfx(combined, combined)).toEqual(16);
64+
expect(pfx(combined + 'text', combined)).toEqual(16);
65+
expect(pfx('abc' + combined, 'abc' + combined)).toEqual(3 + 16);
66+
});
1867
});
1968

2069
describe('sfx()', () => {
@@ -32,6 +81,68 @@ describe('sfx()', () => {
3281
expect(sfx('chef👨‍🍳', '👨‍🍳')).toEqual(5);
3382
expect(sfx('👨‍🍳👨‍🍳', '👨‍🍳')).toEqual(5);
3483
});
84+
85+
test('handles grapheme clusters with ZWJ (Zero Width Joiner)', () => {
86+
const family = '👨‍👩‍👧‍👦';
87+
expect(sfx(family, family)).toEqual(11);
88+
expect(sfx('abc' + family, family)).toEqual(11);
89+
expect(sfx('xyz' + family, 'abc' + family)).toEqual(11);
90+
expect(sfx(family + 'suffix', family + 'suffix')).toEqual(6 + 11);
91+
const womanTech = '👩🏽‍💻';
92+
expect(sfx(womanTech, womanTech)).toEqual(7);
93+
expect(sfx('code' + womanTech, womanTech)).toEqual(7);
94+
expect(sfx(womanTech + 'hello', womanTech + 'hello')).toEqual(5 + 7);
95+
});
96+
97+
test('handles flag emojis (regional indicators)', () => {
98+
const usFlag = '🇺🇸';
99+
const ukFlag = '🇬🇧';
100+
expect(sfx(usFlag, usFlag)).toEqual(4);
101+
expect(sfx('USA' + usFlag, usFlag)).toEqual(4);
102+
expect(sfx(usFlag, ukFlag)).toEqual(0);
103+
expect(sfx(usFlag + 'hello', usFlag + 'hello')).toEqual(5 + 4);
104+
});
105+
106+
test('handles combining diacritical marks', () => {
107+
const combining = 'e\u0301'; // e + combining acute accent
108+
expect(sfx(combining, combining)).toEqual(2);
109+
expect(sfx('ell' + combining, combining)).toEqual(2);
110+
expect(sfx(combining + 'hello', combining + 'hello')).toEqual(5 + 2);
111+
const multiCombining = 'a\u0301\u0302\u0303'; // a with multiple accents
112+
expect(sfx(multiCombining, multiCombining)).toEqual(4);
113+
expect(sfx('text' + multiCombining, multiCombining)).toEqual(4);
114+
});
115+
116+
test('handles variation selectors', () => {
117+
const heartText = '❤\uFE0E'; // text style
118+
const heartEmoji = '❤\uFE0F'; // emoji style
119+
expect(sfx(heartText, heartText)).toEqual(2);
120+
expect(sfx(heartEmoji, heartEmoji)).toEqual(2);
121+
expect(sfx(heartText, heartEmoji)).toEqual(0);
122+
expect(sfx('love' + heartEmoji, heartEmoji)).toEqual(2);
123+
});
124+
125+
test('handles mixed grapheme clusters', () => {
126+
const chef = '👨‍🍳';
127+
const family = '👨‍👩‍👧‍👦';
128+
const combined = family + chef;
129+
expect(sfx(combined, combined)).toEqual(16);
130+
expect(sfx('text' + combined, combined)).toEqual(16);
131+
expect(sfx(combined + 'abc', combined + 'abc')).toEqual(3 + 16);
132+
});
133+
134+
test('does not split grapheme clusters at boundaries', () => {
135+
const chef = '👨‍🍳';
136+
const family = '👨‍👩‍👧‍👦';
137+
138+
// Ensure we don't split in the middle of a grapheme cluster
139+
expect(sfx('x' + chef, chef)).toEqual(5); // full chef emoji
140+
expect(sfx('xy' + family, family)).toEqual(11); // full family emoji
141+
142+
// When the suffix is part of a larger grapheme, it should not match partially
143+
expect(sfx('👨‍🍳👩', '👩')).toEqual(2); // Just the woman emoji at end
144+
expect(sfx('text👨‍🍳', '👨‍🍳')).toEqual(5); // Full chef emoji
145+
});
35146
});
36147

37148
describe('normalize()', () => {
@@ -273,6 +384,81 @@ describe('diff()', () => {
273384
assertPatch('a🙃b', 'a👋b');
274385
});
275386

387+
test('grapheme clusters with ZWJ (Zero Width Joiner)', () => {
388+
const chef = '👨‍🍳';
389+
const family = '👨‍👩‍👧‍👦';
390+
const womanTech = '👩🏽‍💻';
391+
assertPatch(chef, family);
392+
assertPatch(family, chef);
393+
assertPatch(womanTech, chef);
394+
assertPatch('hello', 'hello' + chef);
395+
assertPatch('hello', chef + 'hello');
396+
assertPatch('hello world', 'hello' + family + 'world');
397+
assertPatch('hello' + chef, 'hello');
398+
assertPatch(chef + 'hello', 'hello');
399+
assertPatch('hello' + family + 'world', 'helloworld');
400+
assertPatch(chef + family, family + chef);
401+
assertPatch('a' + chef + 'b' + family + 'c', 'x' + family + 'y' + chef + 'z');
402+
assertPatch('The ' + chef + ' cooks', 'A ' + chef + ' bakes');
403+
assertPatch('Team: ' + family, 'Group: ' + womanTech);
404+
});
405+
406+
test('flag emojis (regional indicators)', () => {
407+
const ruFlag = '🇷🇺';
408+
const chFlag = '🇨🇳';
409+
const inFlag = '🇮🇳';
410+
assertPatch(ruFlag, chFlag);
411+
assertPatch(chFlag, inFlag);
412+
assertPatch('Made in ' + ruFlag, 'Made in ' + chFlag);
413+
assertPatch(ruFlag + ' USA', chFlag + ' UK');
414+
assertPatch('Hello ' + ruFlag + ' world', 'Hello ' + inFlag + ' world');
415+
assertPatch(ruFlag + chFlag, chFlag + ruFlag);
416+
assertPatch('Flags: ' + ruFlag + chFlag + inFlag, 'Flags: ' + inFlag + chFlag + ruFlag);
417+
});
418+
419+
test('combining diacritical marks', () => {
420+
const combining1 = 'e\u0301';
421+
const combining2 = 'e\u0300';
422+
const precomposed = 'é';
423+
assertPatch(combining1, combining2);
424+
assertPatch(combining1, precomposed);
425+
assertPatch(precomposed, combining1);
426+
assertPatch('cafe\u0301', 'café');
427+
assertPatch('naïve', 'naive');
428+
assertPatch('résumé', 'resume');
429+
const multiCombining = 'a\u0301\u0302\u0303';
430+
assertPatch('test' + multiCombining, 'test');
431+
assertPatch('test', 'test' + multiCombining);
432+
});
433+
434+
test('variation selectors', () => {
435+
const heartText = '❤\uFE0E'; // text style
436+
const heartEmoji = '❤\uFE0F'; // emoji style
437+
assertPatch(heartText, heartEmoji);
438+
assertPatch(heartEmoji, heartText);
439+
assertPatch('I ' + heartText + ' code', 'I ' + heartEmoji + ' code');
440+
assertPatch('Love ' + heartEmoji, 'Love ' + heartText);
441+
});
442+
443+
test('complex grapheme clusters in real scenarios', () => {
444+
const chef = '👨‍🍳';
445+
const family = '👨‍👩‍👧‍👦';
446+
const womanTech = '👩🏽‍💻';
447+
const usFlag = '🇺🇸';
448+
assertPatch(
449+
'Hey ' + chef + ', dinner ready?',
450+
'Hi ' + womanTech + ', code ready?'
451+
);
452+
assertPatch(
453+
family + ' going to ' + usFlag,
454+
family + ' staying home'
455+
);
456+
assertPatch(
457+
'The ' + chef + ' from ' + usFlag + ' is amazing',
458+
'A ' + womanTech + ' from ' + usFlag + ' is brilliant'
459+
);
460+
});
461+
276462
test('same strings', () => {
277463
assertPatch('', '');
278464
assertPatch('1', '1');
@@ -364,6 +550,33 @@ describe('diffEdit()', () => {
364550
assertDiffEdit('aaa', 'bbb', 'ccc');
365551
assertDiffEdit('1', '2', '3');
366552
});
553+
554+
test('handles grapheme cluster inserts and deletes', () => {
555+
const chef = '👨‍🍳';
556+
const family = '👨‍👩‍👧‍👦';
557+
const womanTech = '👩🏽‍💻';
558+
const usFlag = '🇺🇸';
559+
560+
// Insert grapheme clusters
561+
assertDiffEdit('', chef, '');
562+
assertDiffEdit('Hello ', chef, '');
563+
assertDiffEdit('', chef, ' world');
564+
assertDiffEdit('Hello ', chef, ' world');
565+
assertDiffEdit('Team: ', family, ' rocks!');
566+
567+
// Insert multiple grapheme clusters
568+
assertDiffEdit('', chef + family, '');
569+
assertDiffEdit('Coders: ', womanTech + chef, ' win');
570+
571+
// Insert with flags
572+
assertDiffEdit('Made in ', usFlag, '');
573+
assertDiffEdit('', usFlag, ' USA');
574+
575+
// Combining characters
576+
const combining = 'e\u0301';
577+
assertDiffEdit('caf', combining, '');
578+
assertDiffEdit('', combining, ' accent');
579+
});
367580
});
368581

369582
describe('overlap()', () => {
@@ -386,6 +599,21 @@ describe('overlap()', () => {
386599
expect(overlap('abc', 'abc')).toEqual(3);
387600
expect(overlap('a', 'a')).toEqual(1);
388601
});
602+
603+
test('handles grapheme clusters', () => {
604+
const chef = '👨‍🍳';
605+
const family = '👨‍👩‍👧‍👦';
606+
607+
// Overlap with grapheme clusters
608+
expect(overlap('hello' + chef, chef + 'world')).toEqual(5);
609+
expect(overlap('abc' + family, family + 'xyz')).toEqual(11);
610+
611+
// No overlap when grapheme differs
612+
expect(overlap('hello' + chef, family + 'world')).toEqual(0);
613+
614+
// Text overlap with grapheme clusters
615+
expect(overlap('prefix' + chef, chef + 'suffix')).toEqual(5);
616+
});
389617
});
390618

391619
describe('Unicode edge cases', () => {

0 commit comments

Comments
 (0)