Skip to content

Commit 7a1f6de

Browse files
committed
Tune data packaging (in Hugging Face)
1 parent 9c11c81 commit 7a1f6de

File tree

4 files changed

+650
-174
lines changed

4 files changed

+650
-174
lines changed

assets/compile_stats.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -186,13 +186,15 @@ def to_name_subset(name):
186186

187187

188188
def to_language_name_subset(name, subset=None):
189+
_languages = ["fr", "en", "de", "es", "it"]
189190
if subset is None:
190191
name, subset = to_name_subset(name)
191-
for lan in "fr", "en", "de", "es", "it":
192+
for lan in _languages:
192193
subset2 = subset.rstrip(":.0123456789")
193194
if subset.startswith(lan) and (len(subset) == len(lan) or subset[len(lan)] in ".:-"):
194-
if "-" in subset and len(subset2) == 5:
195-
subset = subset2
195+
if len(subset2) >= 5 and subset[len(lan)] in ".:-" and subset2[3:5] in _languages:
196+
# multi-lingual
197+
subset = subset2[:5]
196198
lan = subset
197199
subset = subset[len(lan) :].strip(":.")
198200
subset = subset.strip(":_")

0 commit comments

Comments
 (0)