diff --git a/src/Senparc.NeuChar.Tests/Helpers/MessageHandlerHelperTests.cs b/src/Senparc.NeuChar.Tests/Helpers/MessageHandlerHelperTests.cs index e7f3b1f..70023dd 100644 --- a/src/Senparc.NeuChar.Tests/Helpers/MessageHandlerHelperTests.cs +++ b/src/Senparc.NeuChar.Tests/Helpers/MessageHandlerHelperTests.cs @@ -36,5 +36,35 @@ public void SubstringByByteTest() } + + [TestMethod] + public void ChunkStringByUnicode() + { + var limit = 10; + var text = "Senparc.NeuChar🤝 跨平台信息交互🔄标准🌍。使用 NeuChar 标准💬可以跨平台🌐兼容不同平台的交互🔀信息设置,一次设置🚀,多平台共享📱🖥。"; + + var results = MessageHandlerHelper.ChunkStringByUnicode(text, limit); + + foreach (var result in results) + { + var bytes = Encoding.UTF8.GetBytes(result); + Assert.IsTrue(bytes.Length <= limit); + } + } + + [TestMethod] + public async Task HandleLimitedTextAsync() + { + var limit = 10; + var text = "Senparc.NeuChar🤝 跨平台信息交互🔄标准🌍。使用 NeuChar 标准💬可以跨平台🌐兼容不同平台的交互🔀信息设置,一次设置🚀,多平台共享📱🖥。"; + + var results = await MessageHandlerHelper.TryHandleLimitedText(text, limit, chunk => + { + return Task.FromResult(chunk); + }); + + Assert.AreEqual(text, string.Join(string.Empty, results)); + } + } } \ No newline at end of file diff --git a/src/Senparc.NeuChar/Helpers/MessageHandlerHelper.cs b/src/Senparc.NeuChar/Helpers/MessageHandlerHelper.cs index 42b08e9..f223f8a 100644 --- a/src/Senparc.NeuChar/Helpers/MessageHandlerHelper.cs +++ b/src/Senparc.NeuChar/Helpers/MessageHandlerHelper.cs @@ -31,6 +31,7 @@ and limitations under the License. using System; using System.Collections.Generic; +using System.Globalization; using System.Linq; using System.Security.Principal; using System.Text; @@ -219,5 +220,56 @@ public static async Task TrySendLimistedText(string accessTokenOrAppId, st return null;//不做处理 } + + /// + /// 尝试使用Unicode编码分批处理超长的文本内容,返回处理结果集合 + /// + /// 文本内容 + /// 每段文本的限制长度 + /// 处理方法 + /// 处理结果集合 + public static async Task> TryHandleLimitedText(string content, int limitedBytes, Func> handleTextFuncAsync) + where T : class + { + List results = new(); + + if (limitedBytes > 0) + { + foreach (var chunk in ChunkStringByUnicode(content, limitedBytes)) + { + results.Add(await handleTextFuncAsync(chunk)); + } + } + + return results; + } + + /// + /// 使用Unicode编码对文本进行拆分 + /// + /// 文本内容 + /// 分片大小 + /// + public static IEnumerable ChunkStringByUnicode(string text, int chunkSize) + { + var stringBuilder = new StringBuilder(); + var byteSize = 0; + TextElementEnumerator enumerator = StringInfo.GetTextElementEnumerator(text); + + while (enumerator.MoveNext()) + { + string unicodeCharacter = enumerator.GetTextElement(); + var b = Encoding.UTF8.GetBytes(unicodeCharacter); + if (byteSize + b.Length >= chunkSize) + { + yield return stringBuilder.ToString(); + stringBuilder.Clear(); + byteSize = 0; + } + byteSize += b.Length; + stringBuilder.Append(unicodeCharacter); + } + yield return stringBuilder.ToString(); + } } }