mofa-org · Ishita-190 · Feb 27, 2026
diff --git a/node-hub/dora-primespeech/dora_primespeech/moyoyo_tts/text/english.py b/node-hub/dora-primespeech/dora_primespeech/moyoyo_tts/text/english.py
@@ -218,12 +218,22 @@ def get_namedict():
     return name_dict
 
 
+
 def text_normalize(text):
-    # todo: eng text normalize
     # 适配中文及 g2p_en 标点
+    if not text:
+        return ""
+
+    # ensure string
+    text = str(text)
+
+    # punctuation compatibility (smart quotes, dashes, ellipsis, CJK punctuation)
     rep_map = {
         "[;:：，；]": ",",
-        '["’]': "'",
+        "[\u2018\u2019`‘’]": "'",
+        "[\u201c\u201d\u00ab\u00bb\u201e\"]": '"',
+        "\u2026": "...",
+        "[\u2013\u2014]": "-",
         "。": ".",
         "！": "!",
         "？": "?",
@@ -233,16 +243,36 @@ def text_normalize(text):
 
     # 来自 g2p_en 文本格式化处理
     # 增加大写兼容
-    text = unicode(text)
-    text = normalize_numbers(text)
-    text = ''.join(char for char in unicodedata.normalize('NFD', text)
-                    if unicodedata.category(char) != 'Mn')  # Strip accents
-    text = re.sub("[^ A-Za-z'.,?!\-]", "", text)
-    text = re.sub(r"(?i)i\.e\.", "that is", text)
-    text = re.sub(r"(?i)e\.g\.", "for example", text)
+    try:
+        text = normalize_numbers(text)
+    except Exception:
+        pass
+
+    # strip accents
+    text = "".join(
+        ch for ch in unicodedata.normalize('NFD', text)
+        if unicodedata.category(ch) != 'Mn'
+    )
+
+    # keep letters, digits, quotes/apostrophes, basic punctuation and hyphen
+    text = re.sub(r"[^ 0-9a-z'\".,?!\-]", "", text)
+
+    # expand abbreviations (word boundaries, case-insensitive)
+    text = re.sub(r"\bi\.e\.\b", "that is", text, flags=re.IGNORECASE)
+    text = re.sub(r"\be\.g\.\b", "for example", text, flags=re.IGNORECASE)
 
     # 避免重复标点引起的参考泄露
-    text = replace_consecutive_punctuation(text)
+    try:
+        text = replace_consecutive_punctuation(text)
+    except Exception:
+        text = re.sub(r"([.,?!\-])\1+", r"\1", text)
+
+    # normalize whitespace
+    text = re.sub(r"\s+", " ", text).strip()
+
+    # ensure terminal punctuation for TTS stability
+    if text and text[-1] not in ".?!":
+        text += "."
 
     return text