|
12 | 12 |
|
13 | 13 | import logging
|
14 | 14 | import math
|
15 |
| -import subprocess |
16 | 15 | from textwrap import wrap
|
17 | 16 |
|
18 | 17 | from common.schedulers.converters import ComparableObject, from_table_to_obj_list
|
19 |
| -from common.utils import check_command_output, run_command |
| 18 | +from common.utils import check_command_output |
20 | 19 |
|
21 |
| -SLURM_NODE_ERROR_STATES = ["down", "drained", "fail"] |
22 |
| -SLURM_NODE_DISABLED_STATES = ["draining", "drained"] |
23 | 20 | PENDING_RESOURCES_REASONS = [
|
24 | 21 | "Resources",
|
25 | 22 | "Nodes required for job are DOWN, DRAINED or reserved for jobs in higher priority partitions",
|
@@ -62,16 +59,6 @@ def get_jobs_info(job_state_filter=None):
|
62 | 59 | return SlurmJob.from_table(output)
|
63 | 60 |
|
64 | 61 |
|
65 |
| -def get_node_state(hostname): |
66 |
| - # retrieves the state of a specific node |
67 |
| - # https://slurm.schedmd.com/sinfo.html#lbAG |
68 |
| - # Output format: |
69 |
| - # down* |
70 |
| - command = "/opt/slurm/bin/sinfo --noheader -o '%T' -n {}".format(hostname) |
71 |
| - output = check_command_output(command).strip() |
72 |
| - return output |
73 |
| - |
74 |
| - |
75 | 62 | def get_pending_jobs_info(
|
76 | 63 | instance_properties=None, max_nodes_filter=None, filter_by_pending_reasons=None, log_pending_jobs=True
|
77 | 64 | ):
|
@@ -282,39 +269,6 @@ def job_runnable_on_given_node(job_resources_per_node, resources_available, exis
|
282 | 269 | return True
|
283 | 270 |
|
284 | 271 |
|
285 |
| -def lock_node(hostname, reason=None): |
286 |
| - # hostname format: ip-10-0-0-114.eu-west-1.compute.internal |
287 |
| - hostname = hostname.split(".")[0] |
288 |
| - logging.info("Locking host %s", hostname) |
289 |
| - command = [ |
290 |
| - "/opt/slurm/bin/scontrol", |
291 |
| - "update", |
292 |
| - "NodeName={0}".format(hostname), |
293 |
| - "State=DRAIN", |
294 |
| - "Reason={}".format(reason if reason else '"Shutting down"'), |
295 |
| - ] |
296 |
| - try: |
297 |
| - run_command(command) |
298 |
| - except subprocess.CalledProcessError: |
299 |
| - logging.error("Error locking host %s", hostname) |
300 |
| - |
301 |
| - |
302 |
| -def unlock_node(hostname, reason=None): |
303 |
| - hostname = hostname.split(".")[0] |
304 |
| - logging.info("Unlocking host %s", hostname) |
305 |
| - command = [ |
306 |
| - "/opt/slurm/bin/scontrol", |
307 |
| - "update", |
308 |
| - "NodeName={0}".format(hostname), |
309 |
| - "State=RESUME", |
310 |
| - "Reason={}".format(reason if reason else '"Unlocking"'), |
311 |
| - ] |
312 |
| - try: |
313 |
| - run_command(command) |
314 |
| - except subprocess.CalledProcessError: |
315 |
| - logging.error("Error unlocking host %s", hostname) |
316 |
| - |
317 |
| - |
318 | 272 | class SlurmJob(ComparableObject):
|
319 | 273 | # This is the format after being processed by reformat_table function
|
320 | 274 | # JOBID|ST|NODES|CPUS|TASKS|CPUS_PER_TASK|MIN_CPUS|REASON|TRES_PER_JOB|TRES_PER_TASK
|
|
0 commit comments