File tree Expand file tree Collapse file tree 1 file changed +14
-0
lines changed
Expand file tree Collapse file tree 1 file changed +14
-0
lines changed Original file line number Diff line number Diff line change @@ -259,6 +259,7 @@ def check_json(
259259 if not os .path .isfile (file ):
260260 report_dict ["file_present" ] = f"File not found at given file path { file } "
261261 report_dict ["is_check_passed" ] = False
262+ return report_dict
262263 else :
263264 report_dict ["file_present" ] = "File found"
264265
@@ -272,9 +273,22 @@ def check_json(
272273 elif file_size == 0 :
273274 report_dict ["file_size" ] = "File is empty"
274275 report_dict ["is_check_passed" ] = False
276+ return report_dict
275277 else :
276278 report_dict ["file_size" ] = f"File size { round (file_size / (2 ** 30 ) ,3 )} GB"
277279
280+ # Check that the file is UTF-8 encoded. If not report where the error occurs.
281+ try :
282+ with open (file , "r" , encoding = "utf-8" ) as f :
283+ f .read ()
284+ except UnicodeDecodeError as e :
285+ report_dict ["utf8" ] = (
286+ f"File is not UTF-8 encoded. Error raised: { e } ."
287+ f"See https://docs.together.ai/docs/fine-tuning for more information."
288+ )
289+ report_dict ["is_check_passed" ] = False
290+ return report_dict
291+
278292 with open (file ) as f :
279293 # idx must be instantiated so decode errors (e.g. file is a tar) or empty files are caught
280294 idx = - 1
You can’t perform that action at this time.
0 commit comments