12
12
from clp_py_utils .clp_config import CLP_METADATA_TABLE_PREFIX , CLPConfig , Database
13
13
from clp_py_utils .sql_adapter import SQL_Adapter
14
14
from job_orchestration .scheduler .constants import QueryJobStatus , QueryJobType
15
- from job_orchestration .scheduler .job_config import ExtractIrJobConfig
15
+ from job_orchestration .scheduler .job_config import (
16
+ ExtractIrJobConfig ,
17
+ ExtractJsonJobConfig ,
18
+ QueryJobConfig ,
19
+ )
16
20
17
21
from clp_package_utils .general import (
18
22
CLP_DEFAULT_CONFIG_FILE_RELATIVE_PATH ,
19
23
EXTRACT_FILE_CMD ,
20
24
EXTRACT_IR_CMD ,
25
+ EXTRACT_JSON_CMD ,
21
26
get_clp_home ,
22
27
load_config_file ,
23
28
)
@@ -70,45 +75,37 @@ def get_orig_file_id(db_config: Database, path: str) -> Optional[str]:
70
75
return results [0 ]["orig_file_id" ]
71
76
72
77
73
- def submit_and_monitor_ir_extraction_job_in_db (
78
+ def submit_and_monitor_extraction_job_in_db (
74
79
db_config : Database ,
75
- orig_file_id : str ,
76
- msg_ix : int ,
77
- target_uncompressed_size : Optional [int ],
80
+ job_type : QueryJobType ,
81
+ job_config : QueryJobConfig ,
78
82
) -> int :
79
83
"""
80
- Submits an IR extraction job to the scheduler and waits until the job finishes.
84
+ Submits a stream extraction job to the scheduler and waits until it finishes.
81
85
:param db_config:
82
- :param orig_file_id:
83
- :param msg_ix:
84
- :param target_uncompressed_size:
86
+ :param job_type:
87
+ :param job_config:
85
88
:return: 0 on success, -1 otherwise.
86
89
"""
87
- extract_ir_config = ExtractIrJobConfig (
88
- orig_file_id = orig_file_id ,
89
- msg_ix = msg_ix ,
90
- target_uncompressed_size = target_uncompressed_size ,
91
- )
92
-
93
90
sql_adapter = SQL_Adapter (db_config )
94
- job_id = submit_query_job (sql_adapter , extract_ir_config , QueryJobType . EXTRACT_IR )
91
+ job_id = submit_query_job (sql_adapter , job_config , job_type )
95
92
job_status = wait_for_query_job (sql_adapter , job_id )
96
93
97
94
if QueryJobStatus .SUCCEEDED == job_status :
98
- logger .info (f"Finished IR extraction job { job_id } ." )
95
+ logger .info (f"Finished extraction job { job_id } ." )
99
96
return 0
100
97
101
- logger .error (
102
- f"IR extraction job { job_id } finished with unexpected status: { job_status .to_str ()} ."
103
- )
98
+ logger .error (f"Extraction job { job_id } finished with unexpected status: { job_status .to_str ()} ." )
104
99
return - 1
105
100
106
101
107
- def handle_extract_ir_cmd (
108
- parsed_args : argparse .Namespace , clp_home : pathlib .Path , default_config_file_path : pathlib .Path
102
+ def handle_extract_stream_cmd (
103
+ parsed_args : argparse .Namespace ,
104
+ clp_home : pathlib .Path ,
105
+ default_config_file_path : pathlib .Path ,
109
106
) -> int :
110
107
"""
111
- Handles the IR extraction command.
108
+ Handles the stream extraction command.
112
109
:param parsed_args:
113
110
:param clp_home:
114
111
:param default_config_file_path:
@@ -121,26 +118,46 @@ def handle_extract_ir_cmd(
121
118
if clp_config is None :
122
119
return - 1
123
120
124
- orig_file_id : str
125
- if parsed_args .orig_file_id :
126
- orig_file_id = parsed_args .orig_file_id
121
+ command = parsed_args .command
122
+
123
+ job_config : QueryJobConfig
124
+ job_type : QueryJobType
125
+ if EXTRACT_IR_CMD == command :
126
+ job_type = QueryJobType .EXTRACT_IR
127
+ orig_file_id : str
128
+ if parsed_args .orig_file_id :
129
+ orig_file_id = parsed_args .orig_file_id
130
+ else :
131
+ orig_file_path = parsed_args .orig_file_path
132
+ orig_file_id = get_orig_file_id (clp_config .database , orig_file_path )
133
+ if orig_file_id is None :
134
+ logger .error (f"Cannot find orig_file_id corresponding to '{ orig_file_path } '." )
135
+ return - 1
136
+ job_config = ExtractIrJobConfig (
137
+ orig_file_id = orig_file_id ,
138
+ msg_ix = parsed_args .msg_ix ,
139
+ target_uncompressed_size = parsed_args .target_uncompressed_size ,
140
+ )
141
+ elif EXTRACT_JSON_CMD == command :
142
+ job_type = QueryJobType .EXTRACT_JSON
143
+ job_config = ExtractJsonJobConfig (
144
+ archive_id = parsed_args .archive_id , target_chunk_size = parsed_args .target_chunk_size
145
+ )
127
146
else :
128
- orig_file_id = get_orig_file_id (clp_config .database , parsed_args .orig_file_path )
129
- if orig_file_id is None :
130
- return - 1
147
+ logger .error (f"Unsupported stream extraction command: { command } " )
148
+ return - 1
131
149
132
150
try :
133
151
return asyncio .run (
134
152
run_function_in_process (
135
- submit_and_monitor_ir_extraction_job_in_db ,
153
+ submit_and_monitor_extraction_job_in_db ,
136
154
clp_config .database ,
137
- orig_file_id ,
138
- parsed_args .msg_ix ,
139
- parsed_args .target_uncompressed_size ,
155
+ job_type ,
156
+ job_config ,
140
157
)
141
158
)
142
159
except asyncio .CancelledError :
143
- logger .error ("IR extraction cancelled." )
160
+ logger .error ("Stream extraction cancelled." )
144
161
return - 1
145
162
146
163
@@ -278,13 +295,20 @@ def main(argv):
278
295
group .add_argument ("--orig-file-id" , type = str , help = "Original file's ID." )
279
296
group .add_argument ("--orig-file-path" , type = str , help = "Original file's path." )
280
297
298
+ # JSON extraction command parser
299
+ json_extraction_parser = command_args_parser .add_parser (EXTRACT_JSON_CMD )
300
+ json_extraction_parser .add_argument ("archive_id" , type = str , help = "Archive ID" )
301
+ json_extraction_parser .add_argument (
302
+ "--target-chunk-size" , type = int , help = "Target chunk size." , required = True
303
+ )
304
+
281
305
parsed_args = args_parser .parse_args (argv [1 :])
282
306
283
307
command = parsed_args .command
284
308
if EXTRACT_FILE_CMD == command :
285
309
return handle_extract_file_cmd (parsed_args , clp_home , default_config_file_path )
286
- elif EXTRACT_IR_CMD == command :
287
- return handle_extract_ir_cmd (parsed_args , clp_home , default_config_file_path )
310
+ elif command in ( EXTRACT_IR_CMD , EXTRACT_JSON_CMD ) :
311
+ return handle_extract_stream_cmd (parsed_args , clp_home , default_config_file_path )
288
312
else :
289
313
logger .exception (f"Unexpected command: { command } " )
290
314
return - 1
0 commit comments