linxGnu · shranet · Apr 6, 2025 · Apr 24, 2025 · May 4, 2025 · Sep 4, 2025
diff --git a/data/codings.go b/data/codings.go
@@ -320,9 +320,9 @@ func (*ucs2) Decode(data []byte) (string, error) {
 	return decode(data, tmp.NewDecoder())
 }
 
-func (*ucs2) ShouldSplit(text string, octetLimit uint) (shouldSplit bool) {
-	runeSlice := []rune(text)
-	return uint(len(runeSlice)*2) > octetLimit
+func (c *ucs2) ShouldSplit(text string, octetLimit uint) (shouldSplit bool) {
+	totalBytes, _ := c.Encode(text)
+	return uint(len(totalBytes)) > octetLimit
 }
 
 func (c *ucs2) EncodeSplit(text string, octetLimit uint) (allSeg [][]byte, err error) {
@@ -331,25 +331,26 @@ func (c *ucs2) EncodeSplit(text string, octetLimit uint) (allSeg [][]byte, err e
 	}
 
 	allSeg = [][]byte{}
-	runeSlice := []rune(text)
-	hextetLim := int(octetLimit / 2) // round down
-
-	// hextet = 16 bits, the correct terms should be hexadectet
-	fr, to := 0, hextetLim
-	for fr < len(runeSlice) {
-		if to > len(runeSlice) {
-			to = len(runeSlice)
-		}
+	var runeBytes []byte
+	var seg []byte
 
-		seg, err := c.Encode(string(runeSlice[fr:to]))
+	for _, r := range text {
+		runeBytes, err = c.Encode(string(r))
 		if err != nil {
-			return nil, err
+			return
 		}
-		allSeg = append(allSeg, seg)
 
-		fr, to = to, to+hextetLim
+		if uint(len(seg)+len(runeBytes)) > octetLimit {
+			allSeg = append(allSeg, seg)
+			seg = runeBytes[:]
+		} else {
+			seg = append(seg, runeBytes...)
+		}
 	}
 
+	if len(seg) > 0 {
+		allSeg = append(allSeg, seg)
+	}
 	return
 }
 

diff --git a/data/codings_test.go b/data/codings_test.go
@@ -160,6 +160,32 @@ func TestSplit(t *testing.T) {
 			})
 	})
 
+	t.Run("testEmojiSplitUCS2_N1", func(t *testing.T) {
+		testEncodingSplit(t, UCS2,
+			140,
+			"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA😀",
+			[]string{
+				"00410041004100410041004100410041004100410041004100410041004100410041004100410041004100410041004100410041004100410041004100410041004100410041004100410041004100410041004100410041004100410041004100410041004100410041004100410041004100410041004100410041004100410041004100410041D83DDE00",
+			},
+			[]string{
+				"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA😀",
+			})
+	})
+
+	t.Run("testEmojiSplitUCS2_N2", func(t *testing.T) {
+		testEncodingSplit(t, UCS2,
+			134,
+			"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA😀BBB",
+			[]string{
+				"004100410041004100410041004100410041004100410041004100410041004100410041004100410041004100410041004100410041004100410041004100410041004100410041004100410041004100410041004100410041004100410041004100410041004100410041004100410041004100410041004100410041004100410041",
+				"D83DDE00004200420042",
+			},
+			[]string{
+				"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA",
+				"😀BBB",
+			})
+	})
+
 	t.Run("testSplitUCS2Empty", func(t *testing.T) {
 		testEncodingSplit(t, UCS2,
 			134,