diff --git a/hpcflow/sdk/data/config_schema.yaml b/hpcflow/sdk/data/config_schema.yaml index 500ffc84..195923f6 100644 --- a/hpcflow/sdk/data/config_schema.yaml +++ b/hpcflow/sdk/data/config_schema.yaml @@ -152,8 +152,9 @@ rules: - path: [schedulers, { type: map_value }, defaults] condition: value.allowed_keys: # TODO: split these up based on scheduler type - - shebang_args - - options + - shebang_executable + - directives # TODO: only actually allowed in `QueuedScheduler` sub-classes + - options # deprecated, replaced by directives - submit_cmd - show_cmd - del_cmd @@ -211,6 +212,7 @@ rules: condition: value.allowed_keys: - executable + - executable_args - os_args - WSL_executable - WSL_distribution diff --git a/hpcflow/sdk/submission/jobscript.py b/hpcflow/sdk/submission/jobscript.py index e5f095ac..5dee1976 100644 --- a/hpcflow/sdk/submission/jobscript.py +++ b/hpcflow/sdk/submission/jobscript.py @@ -1440,7 +1440,7 @@ def compose_jobscript( scheduler_name: str | None = None, scheduler_args: dict[str, Any] | None = None, ) -> str: - """Prepare the jobscript file string.""" + """Prepare the jobscript file contents as a string.""" scheduler_name = scheduler_name or self.scheduler_name assert scheduler_name assert os_name @@ -1465,15 +1465,14 @@ def compose_jobscript( } shebang = shell.JS_SHEBANG.format( - shebang_executable=" ".join(shell.shebang_executable), - shebang_args=scheduler.shebang_args, + shebang=" ".join(scheduler.shebang_executable or shell.shebang_executable) ) header = shell.JS_HEADER.format(**header_args) if isinstance(scheduler, QueuedScheduler): header = shell.JS_SCHEDULER_HEADER.format( shebang=shebang, - scheduler_options=scheduler.format_options( + scheduler_options=scheduler.format_directives( resources=self.resources, num_elements=self.blocks[0].num_elements, # only used for array jobs is_array=self.is_array, diff --git a/hpcflow/sdk/submission/schedulers/__init__.py b/hpcflow/sdk/submission/schedulers/__init__.py index e61b0292..53a0d4f2 100644 --- a/hpcflow/sdk/submission/schedulers/__init__.py +++ b/hpcflow/sdk/submission/schedulers/__init__.py @@ -7,6 +7,7 @@ import sys import time from typing import Generic, TypeVar, TYPE_CHECKING +import warnings from typing_extensions import override from hpcflow.sdk.typing import hydrate from hpcflow.sdk.core.app_aware import AppAware @@ -39,12 +40,9 @@ class Scheduler(ABC, Generic[JSRefType], AppAware): Parameters ---------- - shell_args: str - Arguments to pass to the shell. Pre-quoted. - shebang_args: str - Arguments to set on the shebang line. Pre-quoted. - options: dict - Options to the scheduler. + shebang_executable: list[str] + If specified, this will be used in the jobscript's shebang line instead of the + shell's `executable` and `executable_args` attributes. """ # This would be in the docstring except it renders really wrongly! @@ -53,20 +51,8 @@ class Scheduler(ABC, Generic[JSRefType], AppAware): # T # The type of a jobscript reference. - #: Default value for arguments to the shell. - DEFAULT_SHELL_ARGS: ClassVar[str] = "" - #: Default value for arguments on the shebang line. - DEFAULT_SHEBANG_ARGS: ClassVar[str] = "" - - def __init__( - self, - shell_args: str | None = None, - shebang_args: str | None = None, - options: dict | None = None, - ): - self.shebang_args = shebang_args or self.DEFAULT_SHEBANG_ARGS - self.shell_args = shell_args or self.DEFAULT_SHELL_ARGS - self.options = options or {} + def __init__(self, shebang_executable: list[str] | None = None): + self.shebang_executable = shebang_executable @property def unique_properties(self) -> tuple[str, ...]: @@ -168,6 +154,13 @@ class QueuedScheduler(Scheduler[str]): Parameters ---------- + directives: dict + Scheduler directives. Each item is written verbatim in the jobscript as a + scheduler directive, and is not processed in any way. If a value is `None`, the + key is considered a flag-like directive. If a value is a list, multiple directives + will be printed to the jobscript with the same key, but different values. + options: dict + Deprecated. Please use `directives` instead. submit_cmd: str The submission command, if overridden from default. show_cmd: str @@ -203,6 +196,8 @@ class QueuedScheduler(Scheduler[str]): def __init__( self, + directives: dict | None = None, + options: dict | None = None, submit_cmd: str | None = None, show_cmd: Sequence[str] | None = None, del_cmd: str | None = None, @@ -215,6 +210,16 @@ def __init__( ) -> None: super().__init__(*args, **kwargs) + if options: + warnings.warn( + f"{self.__class__.__name__!r}: Please use `directives` instead of " + f"`options`, which will be removed in a future release.", + DeprecationWarning, + stacklevel=2, + ) + directives = options + + self.directives = directives or {} self.submit_cmd: str = submit_cmd or self.DEFAULT_SUBMIT_CMD self.show_cmd = show_cmd or self.DEFAULT_SHOW_CMD self.del_cmd = del_cmd or self.DEFAULT_DEL_CMD @@ -250,7 +255,7 @@ def wait_for_jobscripts(self, js_refs: list[str]) -> None: time.sleep(2) @abstractmethod - def format_options( + def format_directives( self, resources: ElementResources, num_elements: int, @@ -259,7 +264,7 @@ def format_options( js_idx: int, ) -> str: """ - Render options in a way that the scheduler can handle. + Render directives in a way that the scheduler can handle. """ def get_std_out_err_filename( diff --git a/hpcflow/sdk/submission/schedulers/direct.py b/hpcflow/sdk/submission/schedulers/direct.py index 215b1c1f..36e3b109 100644 --- a/hpcflow/sdk/submission/schedulers/direct.py +++ b/hpcflow/sdk/submission/schedulers/direct.py @@ -44,14 +44,6 @@ class DirectScheduler(Scheduler[DirectRef]): The correct subclass (:py:class:`DirectPosix` or :py:class:`DirectWindows`) should be used to create actual instances. - Keyword Args - ------------ - shell_args: str - Arguments to pass to the shell. Pre-quoted. - shebang_args: str - Arguments to set on the shebang line. Pre-quoted. - options: dict - Options to the jobscript command. """ @classmethod @@ -221,36 +213,16 @@ class DirectPosix(DirectScheduler): """ A direct scheduler for POSIX systems. - Keyword Args - ------------ - shell_args: str - Arguments to pass to the shell. Pre-quoted. - shebang_args: str - Arguments to set on the shebang line. Pre-quoted. - options: dict - Options to the jobscript command. """ - #: Default shell. - DEFAULT_SHELL_EXECUTABLE: ClassVar[str] = "/bin/bash" - @hydrate class DirectWindows(DirectScheduler): """ A direct scheduler for Windows. - Keyword Args - ------------ - shell_args: str - Arguments to pass to the shell. Pre-quoted. - options: dict - Options to the jobscript command. """ - #: Default shell. - DEFAULT_SHELL_EXECUTABLE: ClassVar[str] = "powershell.exe" - @override def get_submit_command( self, shell: Shell, js_path: str, deps: dict[Any, tuple[Any, ...]] diff --git a/hpcflow/sdk/submission/schedulers/sge.py b/hpcflow/sdk/submission/schedulers/sge.py index ddde40bd..0b076a74 100644 --- a/hpcflow/sdk/submission/schedulers/sge.py +++ b/hpcflow/sdk/submission/schedulers/sge.py @@ -37,12 +37,11 @@ class SGEPosix(QueuedScheduler): ------------ cwd_switch: str Override of default switch to use to set the current working directory. - shell_args: str - Arguments to pass to the shell. Pre-quoted. - shebang_args: str - Arguments to set on the shebang line. Pre-quoted. - options: dict - Options to the jobscript command. + directives: dict + Scheduler directives. Each item is written verbatim in the jobscript as a + scheduler directive, and is not processed in any way. If a value is `None`, the + key is considered a flag-like directive. If a value is a list, multiple directives + will be printed to the jobscript with the same key, but different values. Notes ----- @@ -55,8 +54,6 @@ class SGEPosix(QueuedScheduler): """ - #: Default args for shebang line. - DEFAULT_SHEBANG_ARGS: ClassVar[str] = "" #: Default submission command. DEFAULT_SUBMIT_CMD: ClassVar[str] = "qsub" #: Default command to show the queue state. @@ -203,7 +200,7 @@ def __format_std_stream_file_option_lines( yield f"{self.js_cmd} -e {base}" @override - def format_options( + def format_directives( self, resources: ElementResources, num_elements: int, @@ -212,7 +209,7 @@ def format_options( js_idx: int, ) -> str: """ - Format the options to the jobscript command. + Format the directives to the jobscript command. """ opts: list[str] = [] opts.append(self.format_switch(self.cwd_switch)) @@ -226,7 +223,7 @@ def format_options( ) ) - for opt_k, opt_v in self.options.items(): + for opt_k, opt_v in self.directives.items(): if opt_v is None: opts.append(f"{self.js_cmd} {opt_k}") elif isinstance(opt_v, list): diff --git a/hpcflow/sdk/submission/schedulers/slurm.py b/hpcflow/sdk/submission/schedulers/slurm.py index 426344be..e6a4f4b0 100644 --- a/hpcflow/sdk/submission/schedulers/slurm.py +++ b/hpcflow/sdk/submission/schedulers/slurm.py @@ -37,12 +37,11 @@ class SlurmPosix(QueuedScheduler): Keyword Args ------------ - shell_args: str - Arguments to pass to the shell. Pre-quoted. - shebang_args: str - Arguments to set on the shebang line. Pre-quoted. - options: dict - Options to the jobscript command. + directives: dict + Scheduler directives. Each item is written verbatim in the jobscript as a + scheduler directive, and is not processed in any way. If a value is `None`, the + key is considered a flag-like directive. If a value is a list, multiple directives + will be printed to the jobscript with the same key, but different values. Notes ----- @@ -59,10 +58,6 @@ class SlurmPosix(QueuedScheduler): """ - #: Default shell. - DEFAULT_SHELL_EXECUTABLE: ClassVar[str] = "/bin/bash" - #: Default args for shebang line. - DEFAULT_SHEBANG_ARGS: ClassVar[str] = "" #: Default submission command. DEFAULT_SUBMIT_CMD: ClassVar[str] = "sbatch" #: Default command to show the queue state. @@ -368,7 +363,7 @@ def __format_std_stream_file_option_lines( yield f"{self.js_cmd} --error {base}.err" @override - def format_options( + def format_directives( self, resources: ElementResources, num_elements: int, @@ -377,7 +372,7 @@ def format_options( js_idx: int, ) -> str: """ - Format the options to the scheduler. + Format the directives to the scheduler. """ opts: list[str] = [] opts.extend(self.__format_core_request_lines(resources)) @@ -391,7 +386,7 @@ def format_options( ) ) - for opt_k, opt_v in self.options.items(): + for opt_k, opt_v in self.directives.items(): if isinstance(opt_v, list): for i in opt_v: opts.append(f"{self.js_cmd} {opt_k} {i}") diff --git a/hpcflow/sdk/submission/shells/base.py b/hpcflow/sdk/submission/shells/base.py index 5babb1a4..1daf4b73 100644 --- a/hpcflow/sdk/submission/shells/base.py +++ b/hpcflow/sdk/submission/shells/base.py @@ -79,30 +79,39 @@ class Shell(ABC): #: Template for the jobscript footer. JS_FOOTER: ClassVar[str] - __slots__ = ("_executable", "os_args") + __slots__ = ("_executable", "executable_args", "os_args") def __init__( - self, executable: str | None = None, os_args: dict[str, str] | None = None + self, + executable: str | None = None, + executable_args: list[str] | None = None, + os_args: dict[str, str] | None = None, ): #: Which executable implements the shell. self._executable = executable or self.DEFAULT_EXE + #: Arguments to provide to the shell executable (e.g. `--login`). + self.executable_args = executable_args or [] #: Arguments to pass to the shell. self.os_args = os_args or {} def __eq__(self, other: Any) -> bool: if not isinstance(other, self.__class__): return False - return self._executable == other._executable and self.os_args == other.os_args + return ( + self._executable == other._executable + and self.executable_args == other.executable_args + and self.os_args == other.os_args + ) def __hash__(self): - return get_hash((self._executable, self.os_args)) + return get_hash((self._executable, tuple(self.executable_args), self.os_args)) @property def executable(self) -> list[str]: """ The executable to use plus any mandatory arguments. """ - return [self._executable] + return [self._executable, *self.executable_args] @property def shebang_executable(self) -> list[str]: diff --git a/hpcflow/sdk/submission/shells/bash.py b/hpcflow/sdk/submission/shells/bash.py index c349af2d..9ac1f223 100644 --- a/hpcflow/sdk/submission/shells/bash.py +++ b/hpcflow/sdk/submission/shells/bash.py @@ -38,7 +38,7 @@ class Bash(Shell): #: Indent for environment setup. JS_ENV_SETUP_INDENT: ClassVar[str] = 2 * JS_INDENT #: Template for the jobscript shebang line. - JS_SHEBANG: ClassVar[str] = """#!{shebang_executable} {shebang_args}""" + JS_SHEBANG: ClassVar[str] = """#!{shebang}""" #: Template for the jobscript functions file. JS_FUNCS: ClassVar[str] = dedent( """\ @@ -429,6 +429,10 @@ def executable(self) -> list[str]: @property def shebang_executable(self) -> list[str]: + """ + The executable to use in a shebang line, overridden here to exclude the WSL + command. + """ return super().executable def _get_OS_info_POSIX(self) -> Mapping[str, str]: diff --git a/hpcflow/tests/unit/test_shell.py b/hpcflow/tests/unit/test_shell.py index f7ecd7e0..16bfcbf5 100644 --- a/hpcflow/tests/unit/test_shell.py +++ b/hpcflow/tests/unit/test_shell.py @@ -1,4 +1,10 @@ from __future__ import annotations +from pathlib import Path +import sys + +import pytest + +import hpcflow.app as hf from hpcflow.sdk.submission.shells import ALL_SHELLS @@ -97,3 +103,27 @@ def test_format_array_bash(): def test_format_array_get_item_bash(): shell = ALL_SHELLS["bash"]["posix"]() assert shell.format_array_get_item("my_arr", 3) == r"${my_arr[3]}" + + +@pytest.mark.integration +@pytest.mark.skipif(condition=sys.platform == "win32", reason="This is a bash-only test.") +def test_executable_args_bash_login(null_config, tmp_path: Path): + """Check if we provide a `--login` argument to the shell `executable_args`, we end up + in a login shell on bash.""" + + cmd = "shopt -q login_shell && echo 'Login shell' || echo 'Not login shell'" + s1 = hf.TaskSchema( + objective="t1", + actions=[hf.Action(commands=[hf.Command(command=cmd)])], + ) + t1 = hf.Task( + schema=s1, + resources={"any": {"shell_args": {"executable_args": ["--login"]}}}, + ) + wkt = hf.WorkflowTemplate(name="test_bash_login", tasks=[t1]) + wk = hf.Workflow.from_template( + template=wkt, + path=tmp_path, + ) + wk.submit(wait=True, status=False, add_to_known=False) + assert wk.submissions[0].jobscripts[0].get_stdout().strip() == "Login shell" diff --git a/hpcflow/tests/unit/test_submission.py b/hpcflow/tests/unit/test_submission.py index 20d81486..04716f0b 100644 --- a/hpcflow/tests/unit/test_submission.py +++ b/hpcflow/tests/unit/test_submission.py @@ -468,7 +468,11 @@ def test_unique_schedulers_two_direct_and_SLURM(new_null_config, tmp_path) -> No def test_scheduler_config_defaults(new_null_config, tmp_path) -> None: """Check default options defined in the config are merged into jobscript resources.""" - hf.config.set("schedulers.direct.defaults.options", {"a": "c"}) + + # note we use the `shebang_executable` for this test. On Windows, this will not be + # included in the jobscript, so it is effectively ignored, but the test is still + # valid. + hf.config.set("schedulers.direct.defaults.shebang_executable", ["/bin/bash"]) t1 = hf.Task( schema=hf.task_schemas.test_t1_ps, @@ -479,7 +483,10 @@ def test_scheduler_config_defaults(new_null_config, tmp_path) -> None: schema=hf.task_schemas.test_t1_ps, inputs={"p1": 1}, resources={ - "any": {"scheduler": "direct", "scheduler_args": {"options": {"a": "b"}}} + "any": { + "scheduler": "direct", + "scheduler_args": {"shebang_executable": ["bash"]}, + } }, ) wkt = hf.WorkflowTemplate(name="temp", tasks=[t1, t2]) @@ -489,5 +496,7 @@ def test_scheduler_config_defaults(new_null_config, tmp_path) -> None: ) sub = wk.add_submission() assert sub is not None - assert sub.jobscripts[0].resources.scheduler_args == {"options": {"a": "c"}} - assert sub.jobscripts[1].resources.scheduler_args == {"options": {"a": "b"}} + assert sub.jobscripts[0].resources.scheduler_args == { + "shebang_executable": ["/bin/bash"] + } + assert sub.jobscripts[1].resources.scheduler_args == {"shebang_executable": ["bash"]}