Skip to content

Commit e20f06a

Browse files
committed
Rename read_params_from_cmdline and save_metrics_params
Rename - `read_params_from_cmdline` to `initialize_job`, and - `save_metrics_params` to `finalize_job`. The purpose of the renaming is to better reflect when/why to call the functions instead of focusing too much on technical details. The old names are kept as aliases but are marked as deprecated. BREAKING: I used this occasion to remove the following options from `initialize_job/read_params_from_cmdline`: - `make_immutable`: Parameters are always immutable now. If one really needs a mutable structure, this is still possible by copying the data. - `save_params`: It simply always saves now.
1 parent 21fa982 commit e20f06a

File tree

12 files changed

+253
-181
lines changed

12 files changed

+253
-181
lines changed

CHANGELOG.md

+13
Original file line numberDiff line numberDiff line change
@@ -55,8 +55,19 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
5555
- Local submissions now store stdout and stderr to log files, like they would do on the cluster.
5656
This should be useful for debugging scripts to work with the cluster locally, as previously,
5757
there was no way to access the outputs of locally running jobs.
58+
- Renamed `read_params_from_cmdline` to `initialize_job`. An alias with the old name is
59+
available but will raise a FutureWarning.
60+
- Renamed `save_metrics_params` to `finalize_job`. An alias with the old name is
61+
available but will raise a FutureWarning.
5862
- *Relevant for Dev's only:* Use ruff instead of flake8 for linting.
5963

64+
### Removed
65+
- Removed option `save_params` from `read_params_from_cmdline`. They will always be
66+
saved now.
67+
- Removed option `make_immutable` from `read_params_from_cmdline`. Returned parameters
68+
are always immutable now. If needed, a mutable copy can be created with
69+
`smart_settings.param_classes.AttributeDict(params)`.
70+
6071
### Added
6172
- Setting `generate_report` to control automatic report generation (See
6273
{ref}`config.general_settings`).
@@ -86,6 +97,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
8697
- The package has been renamed from "cluster" to "cluster_utils", please update your
8798
imports accordingly. There is still a wrapper package with the old name (so existing
8899
code should still work) but it will be removed in the next major release.
100+
- `read_params_from_cmdline` is deprecated. Use `initialize_job` instead.
101+
- `save_metrics_params` is deprecated. Use `finalize_job` instead.
89102

90103

91104
## 2.5 - 2023-10-05

README.md

+3-3
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ def main(
7272
If you don't want to use a decorator, use the following:
7373

7474
```python
75-
from cluster_utils import read_params_from_cmdline, save_metrics_params
75+
import cluster_utils
7676

7777
def main(params):
7878
results = ... # Code that computes something interesting
@@ -81,12 +81,12 @@ def main(params):
8181
if __name__ == "__main__":
8282
# Dictionary that contains parameters passed by cluster_utils. This call also establishes
8383
# communication with the cluster_utils server. Also contains "working_dir" and "id", as above.
84-
params = read_params_from_cmdline()
84+
params = cluster_utils.initialize_job()
8585

8686
results = main(params)
8787

8888
# Report results back to cluster_utils.
89-
save_metrics_params(results)
89+
cluster_utils.finalize_job(results)
9090
```
9191

9292
To start a cluster run, start the cluster_utils server on the login node of the cluster.

cluster_utils/__init__.py

+4
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
announce_fraction_finished,
77
cluster_main,
88
exit_for_resume,
9+
finalize_job,
10+
initialize_job,
911
read_params_from_cmdline,
1012
save_metrics_params,
1113
)
@@ -24,6 +26,8 @@
2426
"announce_fraction_finished",
2527
"cluster_main",
2628
"exit_for_resume",
29+
"finalize_job",
30+
"initialize_job",
2731
"save_metrics_params",
2832
"read_params_from_cmdline",
2933
]

cluster_utils/client/__init__.py

+61-8
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import pathlib
2020
import sys
2121
import time
22+
import warnings
2223
from typing import Mapping, MutableMapping, Optional
2324

2425
import smart_settings
@@ -122,7 +123,42 @@ def read_params_from_cmdline(
122123
verbose: bool = True,
123124
dynamic: bool = True,
124125
save_params: bool = True,
125-
) -> smart_settings.AttributeDict:
126+
) -> smart_settings.param_classes.AttributeDict:
127+
"""Alias for :func:`initialize_job`.
128+
129+
Deprecated:
130+
This function is deprecated and will be removed in a future release. Use
131+
:func:`initialize_job` instead.
132+
"""
133+
warnings.warn(
134+
"`read_params_from_cmdline` is deprecated! Use `initialize_job` instead.",
135+
FutureWarning,
136+
stacklevel=2,
137+
)
138+
139+
if not make_immutable:
140+
msg = (
141+
"The option `make_immutable=False` is not supported anymore."
142+
" You can create a mutable copy of the parameters with"
143+
" `smart_settings.param_classes.AttributeDict(params)`"
144+
)
145+
raise RuntimeError(msg)
146+
147+
if not save_params:
148+
msg = (
149+
"The option `save_params=False` is not supported anymore."
150+
" Parameters will always be saved."
151+
)
152+
raise RuntimeError(msg)
153+
154+
return initialize_job(cmd_line, verbose=verbose, dynamic=dynamic)
155+
156+
157+
def initialize_job(
158+
cmd_line: Optional[list[str]] = None,
159+
verbose: bool = True,
160+
dynamic: bool = True,
161+
) -> smart_settings.param_classes.AttributeDict:
126162
"""Read parameters from command line and register at cluster_utils server.
127163
128164
This function is intended to be called at the beginning of your job scripts. It
@@ -135,10 +171,8 @@ def read_params_from_cmdline(
135171
136172
Args:
137173
cmd_line: Command line arguments (defaults to sys.argv).
138-
make_immutable: See ``smart_settings.loads()``
139174
verbose: If true, print the loaded parameters.
140175
dynamic: See ``smart_settings.loads()``
141-
save_params: If true, save the settings as JSON file in the working_dir.
142176
143177
Returns:
144178
Parameters as loaded from the command line arguments with smart_settings.
@@ -174,7 +208,7 @@ def add_cmd_params(orig_dict):
174208

175209
final_params = smart_settings.loads(
176210
json.dumps(parameter_dict),
177-
make_immutable=make_immutable,
211+
make_immutable=True,
178212
dynamic=dynamic,
179213
post_unpack_hooks=([add_cmd_params, check_reserved_params]),
180214
)
@@ -186,7 +220,7 @@ def add_cmd_params(orig_dict):
186220

187221
final_params = smart_settings.load(
188222
os.fspath(parameter_file),
189-
make_immutable=make_immutable,
223+
make_immutable=True,
190224
dynamic=dynamic,
191225
post_unpack_hooks=([add_cmd_params, check_reserved_params]),
192226
)
@@ -205,14 +239,31 @@ def add_cmd_params(orig_dict):
205239

206240
submission_state.start_time = time.time()
207241

208-
if save_params and "working_dir" in final_params:
242+
# TODO should probably rather be an assert, there should always be a working dir
243+
if "working_dir" in final_params:
209244
os.makedirs(final_params.working_dir, exist_ok=True)
210245
_save_settings_to_json(final_params, final_params.working_dir)
211246

212247
return final_params
213248

214249

215250
def save_metrics_params(metrics: MutableMapping[str, float], params) -> None:
251+
"""Alias for :func:`finalize_job`.
252+
253+
Deprecated:
254+
This function is deprecated and will be removed in a future release. Use
255+
:func:`finalize_job` instead.
256+
"""
257+
warnings.warn(
258+
"`save_metric_params` is deprecated! Use `finalize_job` instead.",
259+
FutureWarning,
260+
stacklevel=2,
261+
)
262+
263+
finalize_job(metrics, params)
264+
265+
266+
def finalize_job(metrics: MutableMapping[str, float], params) -> None:
216267
"""Save metrics and parameters and send metrics to the cluster_utils server.
217268
218269
Save the used parameters and resulting metrics to CSV files (filenames defined by
@@ -339,9 +390,9 @@ def wrapper():
339390
"""Saves settings file on beginning, calls wrapped function with params from cmd
340391
and saves metrics to working_dir
341392
"""
342-
params = read_params_from_cmdline(**read_params_args)
393+
params = initialize_job(**read_params_args)
343394
metrics = main_func(**params)
344-
save_metrics_params(metrics, params)
395+
finalize_job(metrics, params)
345396
return metrics
346397

347398
return wrapper
@@ -352,6 +403,8 @@ def wrapper():
352403
"announce_fraction_finished",
353404
"cluster_main",
354405
"exit_for_resume",
406+
"finalize_job",
407+
"initialize_job",
355408
"save_metrics_params",
356409
"read_params_from_cmdline",
357410
]

docs/configuration.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -497,7 +497,7 @@ settings (i.e. the ones independent of the optimisation method set in
497497
**Required.**
498498

499499
Name of the metric that is used for the optimisation. Has to match the name of one
500-
of the metrics that are saved with :func:`cluster_utils.save_metrics_params`.
500+
of the metrics that are saved with :func:`~cluster_utils.client.finalize_job`.
501501

502502
.. confval:: optimization_setting.minimize: bool
503503

docs/troubleshooting.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ Below is a list of error messages that may occur with potential solutions.
88

99
**Pandas DataError: No numeric types to aggregate**
1010

11-
If one of the values stored with :func:`~cluster_utils.save_metrics_params` has a
11+
If one of the values stored with :func:`~cluster_utils.client.finalize_job` has a
1212
non-numeric type (e.g. string).
1313

1414

examples/basic/main.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
import numpy as np
55

6-
from cluster_utils import exit_for_resume, read_params_from_cmdline, save_metrics_params
6+
from cluster_utils import exit_for_resume, finalize_job, initialize_job
77

88

99
def fn_to_optimize(*, u, v, w, x, y, sharp_penalty, tuple_input=None):
@@ -46,7 +46,7 @@ def fn_to_optimize(*, u, v, w, x, y, sharp_penalty, tuple_input=None):
4646
if np.random.rand() < 0.05:
4747
raise ValueError("5 percent of all jobs die early for testing")
4848

49-
params = read_params_from_cmdline()
49+
params = initialize_job()
5050

5151
# simulate that the jobs take some time
5252
max_sleep_time = params.get("max_sleep_time", 10)
@@ -68,5 +68,5 @@ def fn_to_optimize(*, u, v, w, x, y, sharp_penalty, tuple_input=None):
6868

6969
noisy_result = noiseless_result + 0.5 * np.random.normal()
7070
metrics = {"result": noisy_result, "noiseless_result": noiseless_result}
71-
save_metrics_params(metrics, params)
71+
finalize_job(metrics, params)
7272
print(noiseless_result)

examples/basic/main_no_fail.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
import numpy as np
77

8-
from cluster_utils import exit_for_resume, read_params_from_cmdline, save_metrics_params
8+
from cluster_utils import exit_for_resume, finalize_job, initialize_job
99

1010

1111
def fn_to_optimize(*, u, v, w, x, y, sharp_penalty, tuple_input=None):
@@ -41,7 +41,7 @@ def fn_to_optimize(*, u, v, w, x, y, sharp_penalty, tuple_input=None):
4141

4242

4343
if __name__ == "__main__":
44-
params = read_params_from_cmdline()
44+
params = initialize_job()
4545

4646
# simulate that the jobs take some time
4747
max_sleep_time = params.get("max_sleep_time", 10)
@@ -63,5 +63,5 @@ def fn_to_optimize(*, u, v, w, x, y, sharp_penalty, tuple_input=None):
6363

6464
noisy_result = noiseless_result + 0.5 * np.random.normal()
6565
metrics = {"result": noisy_result, "noiseless_result": noiseless_result}
66-
save_metrics_params(metrics, params)
66+
finalize_job(metrics, params)
6767
print(noiseless_result)

examples/checkpointing/checkpoint_example.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@
55

66
from cluster_utils import (
77
exit_for_resume,
8-
read_params_from_cmdline,
9-
save_metrics_params,
8+
finalize_job,
9+
initialize_job,
1010
)
1111

1212

@@ -41,7 +41,7 @@ def load_checkpoint(load_path, model, optim):
4141

4242
if __name__ == "__main__":
4343
# parameters are loaded from json file
44-
params = read_params_from_cmdline()
44+
params = initialize_job()
4545
# a folder for each run is created
4646
os.makedirs(params.working_dir, exist_ok=True)
4747
checkpoint_path = os.path.join(params.working_dir, "checkpoint.pt")
@@ -88,5 +88,5 @@ def load_checkpoint(load_path, model, optim):
8888

8989
metrics = {"loss": loss, "iterations": iteration}
9090
# save final metrics, you will only see the resuming in the cluster_run.log file
91-
save_metrics_params(metrics, params)
91+
finalize_job(metrics, params)
9292
print(f"Training finished, final loss {loss} at episode {iteration}")

examples/slurm_timeout_signal/main.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ def timeout_signal_handler(sig, frame):
2121

2222
def main() -> int:
2323
"""Main function."""
24-
params = cluster_utils.read_params_from_cmdline()
24+
params = cluster_utils.initialize_job()
2525

2626
n_training_iterations = 60
2727
start_iteration = 0
@@ -53,7 +53,7 @@ def main() -> int:
5353

5454
# just return some dummy metric value here
5555
metrics = {"result": params.x + params.y, "n_iterations": i}
56-
cluster_utils.save_metrics_params(metrics, params)
56+
cluster_utils.finalize_job(metrics, params)
5757

5858
return 0
5959

tests/main_no_save_metrics.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
import time
22

3-
from cluster_utils import read_params_from_cmdline
3+
from cluster_utils import initialize_job
44

55
if __name__ == "__main__":
6-
params = read_params_from_cmdline()
6+
params = initialize_job()
77
time.sleep(2)
88
# Here we exit without sending result to cluster utils. We want cluster utils to count the job
99
# as failed.

0 commit comments

Comments
 (0)