1+ from __future__ import annotations
2+
3+ import argparse
4+ import json
5+ import logging
6+ from typing import Any , Optional
7+
8+ try :
9+ from transformers import TrainingArguments
10+ except ImportError : # pragma: no cover - exercised only when transformers is absent.
11+ class TrainingArguments : # type: ignore[no-redef]
12+ def __init__ (self , * args : Any , ** kwargs : Any ) -> None :
13+ raise ImportError (
14+ "transformers is required to construct HuggingFace TrainingArguments."
15+ )
16+
17+
18+ logger = logging .getLogger (__name__ )
19+
20+ DEFAULT_OUTPUT_DIR = "./output"
21+
22+
23+ def parse_training_args (raw : Optional [str ]) -> dict [str , Any ]:
24+ """Parse a JSON string into a TrainingArguments configuration."""
25+
26+ if raw is None :
27+ return {}
28+
29+ if not isinstance (raw , str ):
30+ raise ValueError (
31+ "training_parameters must be a JSON string or None; got "
32+ f"{ type (raw ).__name__ } ."
33+ )
34+
35+ normalized = raw .strip ()
36+ if not normalized :
37+ return {}
38+
39+ try :
40+ parsed = json .loads (normalized )
41+ except json .JSONDecodeError as exc :
42+ raise ValueError (
43+ "Invalid JSON in training_parameters. Provide a JSON object string, for "
44+ f"example '{{\" output_dir\" : \" ./output\" }}'. Received: { raw !r} . "
45+ f"JSON error: { exc .msg } at line { exc .lineno } , column { exc .colno } ."
46+ ) from exc
47+
48+ if not isinstance (parsed , dict ):
49+ raise ValueError (
50+ "training_parameters must decode to a JSON object. Received "
51+ f"{ type (parsed ).__name__ } : { parsed !r} ."
52+ )
53+
54+ invalid_keys = [key for key in parsed .keys () if not isinstance (key , str ) or not key .strip ()]
55+ if invalid_keys :
56+ raise ValueError (
57+ "training_parameters contains invalid keys. JSON object keys must be non-empty "
58+ f"strings. Invalid keys: { invalid_keys !r} ."
59+ )
60+
61+ return parsed
62+
63+
64+ def build_training_arguments (raw : Optional [str ]) -> TrainingArguments :
65+ logger .info ("Raw training_parameters payload: %r" , raw )
66+ parsed_config = parse_training_args (raw )
67+
68+ if not parsed_config :
69+ logger .info (
70+ "training_parameters is empty or missing; using default TrainingArguments with output_dir=%s" ,
71+ DEFAULT_OUTPUT_DIR ,
72+ )
73+ return TrainingArguments (output_dir = DEFAULT_OUTPUT_DIR )
74+
75+ logger .info ("Parsed training_parameters config: %s" , json .dumps (parsed_config , sort_keys = True ))
76+ try :
77+ return TrainingArguments (** parsed_config )
78+ except Exception as exc :
79+ logger .error (
80+ "Failed to create TrainingArguments from parsed training_parameters: %s" ,
81+ json .dumps (parsed_config , sort_keys = True ),
82+ exc_info = True ,
83+ )
84+ raise ValueError (
85+ "Failed to initialize TrainingArguments from training_parameters. "
86+ "Check the JSON keys and values, and ensure they match the HuggingFace "
87+ f"TrainingArguments signature. Parsed config: { json .dumps (parsed_config , sort_keys = True )} "
88+ ) from exc
89+
90+
91+ def _build_parser () -> argparse .ArgumentParser :
92+ parser = argparse .ArgumentParser (description = "Run a HuggingFace training job." )
93+ parser .add_argument (
94+ "--training_parameters" ,
95+ type = str ,
96+ default = "{}" ,
97+ help = "JSON object used to initialize HuggingFace TrainingArguments." ,
98+ )
99+ return parser
100+
101+
102+ def main () -> None :
103+ logging .basicConfig (level = logging .INFO , format = "%(asctime)s %(levelname)s %(name)s %(message)s" )
104+ parser = _build_parser ()
105+ args = parser .parse_args ()
106+
107+ training_args = build_training_arguments (args .training_parameters )
108+ logger .info ("TrainingArguments initialized successfully: %s" , training_args )
109+
110+ # Replace this with the actual training workflow used by the example.
111+ logger .info ("Trainer entrypoint completed parsing and initialization only." )
112+
113+
114+ if __name__ == "__main__" :
115+ main ()
0 commit comments