diff --git a/poncho/src/poncho/library_network_code.py b/poncho/src/poncho/library_network_code.py index a5bedebc30..38a4524309 100755 --- a/poncho/src/poncho/library_network_code.py +++ b/poncho/src/poncho/library_network_code.py @@ -28,6 +28,9 @@ r, w = os.pipe() exec_method = None +# infile load mode for function tasks inside this library +function_infile_load_mode = None + # This class captures how results from FunctionCalls are conveyed from # the library to the manager. @@ -85,6 +88,18 @@ def sigchld_handler(signum, frame): os.write(w, b"a") +# Load the infile for a function task inside this library +def load_function_infile(in_file_path): + if function_infile_load_mode == "cloudpickle": + with open(in_file_path, "rb") as f: + return cloudpickle.load(f) + elif function_infile_load_mode == "json": + with open(in_file_path, "r", encoding="utf-8") as f: + return json.load(f) + else: + raise ValueError(f"invalid infile load mode: {function_infile_load_mode}") + + # Read data from worker, start function, and dump result to `outfile`. def start_function(in_pipe_fd, thread_limit=1): # read length of buffer to read @@ -131,8 +146,7 @@ def start_function(in_pipe_fd, thread_limit=1): os.chdir(function_sandbox) # parameters are represented as infile. - with open("infile", "rb") as f: - event = cloudpickle.load(f) + event = load_function_infile("infile") # output of execution should be dumped to outfile. result = globals()[function_name](event) @@ -158,11 +172,10 @@ def start_function(in_pipe_fd, thread_limit=1): return -1, function_id elif exec_method == "fork": try: - arg_infile = os.path.join(function_sandbox, "infile") - with open(arg_infile, "rb") as f: - event = cloudpickle.load(f) + infile_path = os.path.join(function_sandbox, "infile") + event = load_function_infile(infile_path) except Exception: - stdout_timed_message(f"TASK {function_id} error: can't load the arguments from {arg_infile}") + stdout_timed_message(f"TASK {function_id} error: can't load the arguments from {infile_path}") return -1, function_id p = os.fork() if p == 0: @@ -368,11 +381,16 @@ def main(): global exec_method exec_method = library_info['exec_mode'] + # set infile load mode of functions in this library + global function_infile_load_mode + function_infile_load_mode = library_info['function_infile_load_mode'] + # send configuration of library, just its name for now config = { "name": library_info['library_name'], "taskid": args.task_id, "exec_mode": exec_method, + "function_infile_load_mode": function_infile_load_mode, } send_configuration(config, out_pipe_fd, args.worker_pid) diff --git a/poncho/src/poncho/package_serverize.py b/poncho/src/poncho/package_serverize.py index 4a6e5e7a29..cfc789a11b 100755 --- a/poncho/src/poncho/package_serverize.py +++ b/poncho/src/poncho/package_serverize.py @@ -178,6 +178,7 @@ def pack_library_code(path, envpath): # @param exec_mode The execution mode of functions in this library. # @param hoisting_modules A list of modules imported at the preamble of library, including packages, functions and classes. # @param library_context_info A list containing [library_context_func, library_context_args, library_context_kwargs]. Used to create the library context on remote nodes. +# @param function_infile_load_mode The mode to load infile for function tasks inside this library. # @return A hash value. def generate_library_hash(library_name, function_list, @@ -186,7 +187,8 @@ def generate_library_hash(library_name, add_env, exec_mode, hoisting_modules, - library_context_info): + library_context_info, + function_infile_load_mode): library_info = [library_name] function_list = list(function_list) function_names = set() @@ -234,6 +236,8 @@ def generate_library_hash(library_name, for kwarg in library_context_info[2]: library_info.append(str(kwarg)) library_info.append(str(library_context_info[2][kwarg])) + + library_info.append(str(function_infile_load_mode)) library_info = ''.join(library_info) # linear time complexity msg = hashlib.sha1() @@ -293,6 +297,7 @@ def generate_taskvine_library_code(library_path, hoisting_modules=None): # @param exec_mode execution mode of functions in this library # @param hoisting_modules a list of modules to be imported at the preamble of library # @param library_context_info a list containing a library's context to be created remotely +# @param function_infile_load_mode The mode to load infile for function tasks inside this library. # @return name of the file containing serialized information about the library def generate_library(library_cache_path, library_code_path, @@ -303,7 +308,8 @@ def generate_library(library_cache_path, need_pack=True, exec_mode='fork', hoisting_modules=None, - library_context_info=None + library_context_info=None, + function_infile_load_mode='cloudpickle' ): # create library_info.clpk library_info = {} @@ -313,6 +319,7 @@ def generate_library(library_cache_path, library_info['library_name'] = library_name library_info['exec_mode'] = exec_mode library_info['context_info'] = cloudpickle.dumps(library_context_info) + library_info['function_infile_load_mode'] = function_infile_load_mode with open(library_info_path, 'wb') as f: cloudpickle.dump(library_info, f) diff --git a/taskvine/src/Makefile b/taskvine/src/Makefile index 8f828fd7bf..1bd3ef4f43 100644 --- a/taskvine/src/Makefile +++ b/taskvine/src/Makefile @@ -1,12 +1,13 @@ include ../../config.mk include ../../rules.mk -TARGETS=manager worker tools bindings examples +TARGETS=manager worker tools bindings examples graph all: $(TARGETS) worker: manager -bindings: manager +graph: manager +bindings: manager graph tools: manager examples: manager worker tools bindings diff --git a/taskvine/src/bindings/python3/Makefile b/taskvine/src/bindings/python3/Makefile index ca4ca6a52b..905b2703ca 100644 --- a/taskvine/src/bindings/python3/Makefile +++ b/taskvine/src/bindings/python3/Makefile @@ -34,4 +34,4 @@ install: all mkdir -p $(CCTOOLS_PYTHON3_PATH)/ndcctools/taskvine/compat cp ndcctools/taskvine/*.py $(DSPYTHONSO) $(CCTOOLS_PYTHON3_PATH)/ndcctools/taskvine cp ndcctools/taskvine/compat/*.py $(CCTOOLS_PYTHON3_PATH)/ndcctools/taskvine/compat - cp taskvine.py $(CCTOOLS_PYTHON3_PATH)/ + cp taskvine.py $(CCTOOLS_PYTHON3_PATH)/ \ No newline at end of file diff --git a/taskvine/src/bindings/python3/ndcctools/taskvine/compat/dask_executor.py b/taskvine/src/bindings/python3/ndcctools/taskvine/compat/dask_executor.py index f6f4834069..2b48b2675a 100644 --- a/taskvine/src/bindings/python3/ndcctools/taskvine/compat/dask_executor.py +++ b/taskvine/src/bindings/python3/ndcctools/taskvine/compat/dask_executor.py @@ -138,8 +138,10 @@ def get(self, dsk, keys, *, hoisting_modules=None, # Deprecated, use lib_modules import_modules=None, # Deprecated, use lib_modules lazy_transfers=True, # Deprecated, use worker_tranfers + extra_serialize_time_sec=0, ): try: + self.extra_serialize_time_sec = extra_serialize_time_sec self.set_property("framework", "dask") if retries and retries < 1: raise ValueError("retries should be larger than 0") @@ -213,10 +215,13 @@ def __call__(self, *args, **kwargs): return self.get(*args, **kwargs) def _dask_execute(self, dsk, keys): + indices = {k: inds for (k, inds) in find_dask_keys(keys)} keys_flatten = indices.keys() + time_start = time.time() dag = DaskVineDag(dsk, low_memory_mode=self.low_memory_mode, prune_depth=self.prune_depth) + print(f"Time taken to enqueue tasks: {time.time() - time_start:.6f} seconds") tag = f"dag-{id(dag)}" # create Library if using 'function-calls' task mode. @@ -437,7 +442,8 @@ def _enqueue_dask_calls(self, dag, tag, rs, retries, enqueued_calls): extra_files=self.extra_files, retries=retries, worker_transfers=lazy, - wrapper=self.wrapper) + wrapper=self.wrapper, + extra_serialize_time_sec=self.extra_serialize_time_sec) t.set_priority(priority) t.set_tag(tag) # tag that identifies this dag @@ -557,7 +563,9 @@ def __init__(self, m, env_vars=None, retries=5, worker_transfers=False, - wrapper=None): + wrapper=None, + extra_serialize_time_sec=0): + time.sleep(extra_serialize_time_sec) self._key = key self._sexpr = sexpr @@ -658,7 +666,9 @@ def __init__(self, m, extra_files=None, retries=5, worker_transfers=False, - wrapper=None): + wrapper=None, + extra_serialize_time_sec=0): + time.sleep(extra_serialize_time_sec) self._key = key self.resources = resources diff --git a/taskvine/src/bindings/python3/ndcctools/taskvine/manager.py b/taskvine/src/bindings/python3/ndcctools/taskvine/manager.py index a26946be1e..7a9006731b 100644 --- a/taskvine/src/bindings/python3/ndcctools/taskvine/manager.py +++ b/taskvine/src/bindings/python3/ndcctools/taskvine/manager.py @@ -946,8 +946,9 @@ def check_library_exists(self, library_name): # @param hoisting_modules A list of modules imported at the preamble of library, including packages, functions and classes. # @param exec_mode Execution mode that the library should use to run function calls. Either 'direct' or 'fork' # @param library_context_info A list containing [library_context_func, library_context_args, library_context_kwargs]. Used to create the library context on remote nodes. + # @param function_infile_load_mode The mode to load infile for function tasks inside this library. # @returns A task to be used with @ref ndcctools.taskvine.manager.Manager.install_library. - def create_library_from_functions(self, library_name, *function_list, poncho_env=None, init_command=None, add_env=True, hoisting_modules=None, exec_mode='fork', library_context_info=None): + def create_library_from_functions(self, library_name, *function_list, poncho_env=None, init_command=None, add_env=True, hoisting_modules=None, exec_mode='fork', library_context_info=None, function_infile_load_mode='cloudpickle'): # Delay loading of poncho until here, to avoid bringing in poncho dependencies unless needed. # Ensure poncho python library is available. from ndcctools.poncho import package_serverize @@ -969,7 +970,8 @@ def create_library_from_functions(self, library_name, *function_list, poncho_env add_env=add_env, exec_mode=exec_mode, hoisting_modules=hoisting_modules, - library_context_info=library_context_info) + library_context_info=library_context_info, + function_infile_load_mode=function_infile_load_mode) # Create path for caching library code and environment based on function hash. library_cache_dir_name = "vine-library-cache" @@ -1017,7 +1019,8 @@ def create_library_from_functions(self, library_name, *function_list, poncho_env need_pack=need_pack, exec_mode=exec_mode, hoisting_modules=hoisting_modules, - library_context_info=library_context_info) + library_context_info=library_context_info, + function_infile_load_mode=function_infile_load_mode) # enable correct permissions for library code os.chmod(library_code_path, 0o775) diff --git a/taskvine/src/bindings/python3/taskvine.i b/taskvine/src/bindings/python3/taskvine.i index ba4e66cb74..1d875f97be 100644 --- a/taskvine/src/bindings/python3/taskvine.i +++ b/taskvine/src/bindings/python3/taskvine.i @@ -1,5 +1,5 @@ /* taskvine.i */ -%module cvine +%module(package="ndcctools.taskvine") cvine %include carrays.i %array_functions(struct rmsummary *, rmsummayArray); diff --git a/taskvine/src/graph/Makefile b/taskvine/src/graph/Makefile new file mode 100644 index 0000000000..50314300bc --- /dev/null +++ b/taskvine/src/graph/Makefile @@ -0,0 +1,14 @@ +include ../../../config.mk +include ../../../rules.mk + +SUBDIRS = dagvine + +all clean install test lint format: $(SUBDIRS) + +$(SUBDIRS): %: + $(MAKE) -C $@ $(MAKECMDGOALS) + +.PHONY: all clean install test lint format $(SUBDIRS) example + +example: + PYTHONPATH=../../../ python -m taskvine.src.graph.example_blueprint diff --git a/taskvine/src/graph/__init__.py b/taskvine/src/graph/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/taskvine/src/graph/dagvine/.gitignore b/taskvine/src/graph/dagvine/.gitignore new file mode 100644 index 0000000000..25000a1241 --- /dev/null +++ b/taskvine/src/graph/dagvine/.gitignore @@ -0,0 +1,7 @@ +*.a +*.so +*.o +*_wrap.c +*_wrap.0 +build/ +context_graph/ \ No newline at end of file diff --git a/taskvine/src/graph/dagvine/.gitkeep b/taskvine/src/graph/dagvine/.gitkeep new file mode 100644 index 0000000000..e69de29bb2 diff --git a/taskvine/src/graph/dagvine/Makefile b/taskvine/src/graph/dagvine/Makefile new file mode 100644 index 0000000000..6e08b844bd --- /dev/null +++ b/taskvine/src/graph/dagvine/Makefile @@ -0,0 +1,36 @@ +include ../../../../config.mk +include ../../../../rules.mk + +MODULE_DIR := $(CCTOOLS_PYTHON3_PATH)/ndcctools/taskvine/dagvine +SUBDIRS := vine_graph blueprint_graph + +.PHONY: all install clean lint format $(SUBDIRS) + +all: $(SUBDIRS) + +$(SUBDIRS): + $(MAKE) -C $@ all + +install: all + mkdir -p $(MODULE_DIR) + cp dagvine.py $(MODULE_DIR) + cp __init__.py $(MODULE_DIR) + @for dir in $(SUBDIRS); do \ + $(MAKE) -C $$dir install; \ + done + +clean: + @for dir in $(SUBDIRS); do \ + $(MAKE) -C $$dir clean; \ + done + rm -rf build + +lint: + @for dir in $(SUBDIRS); do \ + $(MAKE) -C $$dir lint; \ + done + +format: + @for dir in $(SUBDIRS); do \ + $(MAKE) -C $$dir format; \ + done diff --git a/taskvine/src/graph/dagvine/__init__.py b/taskvine/src/graph/dagvine/__init__.py new file mode 100644 index 0000000000..feee0c0384 --- /dev/null +++ b/taskvine/src/graph/dagvine/__init__.py @@ -0,0 +1,7 @@ +# Copyright (C) 2025- The University of Notre Dame +# This software is distributed under the GNU General Public License. +# See the file COPYING for details. + +from .dagvine import DAGVine + +__all__ = ["DAGVine"] diff --git a/taskvine/src/graph/dagvine/blueprint_graph/.gitignore b/taskvine/src/graph/dagvine/blueprint_graph/.gitignore new file mode 100644 index 0000000000..60123e6dfc --- /dev/null +++ b/taskvine/src/graph/dagvine/blueprint_graph/.gitignore @@ -0,0 +1,2 @@ +adaptor_test.py +__pycache__/ \ No newline at end of file diff --git a/taskvine/src/graph/dagvine/blueprint_graph/Makefile b/taskvine/src/graph/dagvine/blueprint_graph/Makefile new file mode 100644 index 0000000000..55bbb1c5e4 --- /dev/null +++ b/taskvine/src/graph/dagvine/blueprint_graph/Makefile @@ -0,0 +1,31 @@ +include ../../../../../config.mk +include ../../../../../rules.mk + +PROJECT_NAME = dagvine + +SOURCE_DIR = $(CCTOOLS_HOME)/taskvine/src/graph/$(PROJECT_NAME)/blueprint_graph +MODULE_ROOT = $(CCTOOLS_PYTHON3_PATH)/ndcctools/taskvine/$(PROJECT_NAME) +MODULE_DIR = $(MODULE_ROOT)/blueprint_graph + +PY_SOURCES = $(wildcard $(SOURCE_DIR)/*.py) + +.PHONY: all install clean lint format + +all: + @true + +install: + mkdir -p $(MODULE_DIR) + cp $(PY_SOURCES) $(MODULE_DIR) + +clean: + rm -rf __pycache__ + +lint: + flake8 --ignore=$(CCTOOLS_FLAKE8_IGNORE_ERRORS) \ + --exclude=$(CCTOOLS_FLAKE8_IGNORE_FILES) \ + $(SOURCE_DIR)/ + +format: + @true + diff --git a/taskvine/src/graph/dagvine/blueprint_graph/__init__.py b/taskvine/src/graph/dagvine/blueprint_graph/__init__.py new file mode 100644 index 0000000000..f8282164a7 --- /dev/null +++ b/taskvine/src/graph/dagvine/blueprint_graph/__init__.py @@ -0,0 +1,16 @@ +# Copyright (C) 2025 The University of Notre Dame +# This software is distributed under the GNU General Public License. +# See the file COPYING for details. + + +from .blueprint_graph import BlueprintGraph, TaskOutputRef, TaskOutputWrapper +from .proxy_functions import compute_single_key +from .adaptor import Adaptor + +__all__ = [ + "BlueprintGraph", + "TaskOutputRef", + "TaskOutputWrapper", + "compute_single_key", + "Adaptor", +] diff --git a/taskvine/src/graph/dagvine/blueprint_graph/adaptor.py b/taskvine/src/graph/dagvine/blueprint_graph/adaptor.py new file mode 100644 index 0000000000..20cd935012 --- /dev/null +++ b/taskvine/src/graph/dagvine/blueprint_graph/adaptor.py @@ -0,0 +1,633 @@ +from collections.abc import Mapping + +try: + import dask +except ImportError: + dask = None + +try: + from dask.base import is_dask_collection +except ImportError: + is_dask_collection = None + +try: + import importlib + + dts = importlib.import_module("dask._task_spec") +except Exception: + # Treat any import failure as "no TaskSpec support" (including environments + # where the private module is unavailable or type-checkers can't resolve it). + dts = None + +from ndcctools.taskvine.dagvine.blueprint_graph.blueprint_graph import TaskOutputRef, BlueprintGraph + + +def _identity(value): + """Tiny helper that just hands back whatever you pass in (e.g. `_identity(3)` -> 3).""" + return value + + +def _apply_with_kwargs_kvlist(func, args_list, kwargs_kvlist): + """Execute `func(*args_list, **kwargs)` where kwargs is encoded as a list of [k, v] pairs. + + This encoding is intentional: `dask_executor.execute_graph_vertex.rec_call` only recurses + into lists and task tuples, not dicts/tuples. By representing kwargs as lists, upstream + task-key references can be resolved before we rebuild the dict here. + """ + kwargs = {k: v for (k, v) in kwargs_kvlist} + return func(*args_list, **kwargs) + + +def collections_from_blueprint_graph(bg): + assert isinstance(bg, BlueprintGraph), "bg must be a BlueprintGraph" + + def _ref_to_key(ref: TaskOutputRef): + # Replace TaskOutputRef occurrences with the referenced task_key only. + # NOTE: This intentionally drops any `path` component on the ref, per request. + return ref.task_key + + out = {} + for task_key, (func, args, kwargs) in bg.task_dict.items(): + # Only rewrite references inside args/kwargs; keep everything else unchanged. + new_args = bg._visit_task_output_refs(args, _ref_to_key, rewrite=True) + new_kwargs = bg._visit_task_output_refs(kwargs, _ref_to_key, rewrite=True) + + # IMPORTANT: `dask_executor.execute_graph_vertex` expects classic Dask sexprs: + # (func, arg1, arg2, ...) + # It does not understand the BlueprintGraph triple (func, args_tuple, kwargs_dict), + # and it also does not recurse into dicts. So: + # - No-kwargs tasks become (func, *args) + # - Kwargs tasks become (_apply_with_kwargs_kvlist, func, [*args], [[k, v], ...]) + if new_kwargs: + args_list = list(new_args) + kwargs_kvlist = [[k, v] for k, v in new_kwargs.items()] + out[task_key] = (_apply_with_kwargs_kvlist, func, args_list, kwargs_kvlist) + else: + out[task_key] = (func, *new_args) + + return out + + +class Adaptor: + """Normalize user task inputs so `BlueprintGraph` can consume them without extra massaging.""" + + _LEAF_TYPES = (str, bytes, bytearray, memoryview, int, float, bool, type(None)) + + def __init__(self, task_dict): + + if isinstance(task_dict, BlueprintGraph): + self.converted = task_dict + return + + # TaskSpec-only state used to "lift" inline Tasks that cannot be reduced to + # a pure Python value (or would be unsafe/expensive to inline). + self._lifted_nodes = {} + self._lift_cache = {} + self._lift_counter = 0 + # Global shared key-set for the whole adaptation run (original keys + lifted keys). + # IMPORTANT: TaskSpec conversion must always consult the same shared set so that + # lifted keys remain visible across subsequent conversions/dedup/reference checks. + self._task_keys = set() + + normalized = self._normalize_task_dict(task_dict) + self.converted = self._convert_to_blueprint_tasks(normalized) + + def _normalize_task_dict(self, task_dict): + """Collapse every supported input style into a classic `{key: sexpr or TaskSpec}` mapping.""" + from_dask_collection = bool( + is_dask_collection and any(is_dask_collection(v) for v in task_dict.values()) + ) + + if from_dask_collection: + task_dict = self._dask_collections_to_task_dict(task_dict) + else: + # IMPORTANT: treat plain user dicts as DAGVine sexprs by default. + # If we unconditionally run `dask._task_spec.convert_legacy_graph(...)` when + # dts is available, Dask will interpret our "final Mapping is kwargs" + # convention as a positional dict argument, breaking sexpr semantics. + task_dict = dict(task_dict) + + # Only ask Dask to rewrite legacy graphs when we *know* the input came + # from a Dask collection/HLG. This keeps classic DAGVine sexprs stable + # even in environments where dask._task_spec is installed. + if from_dask_collection and dts and hasattr(dts, "convert_legacy_graph"): + task_dict = dts.convert_legacy_graph(task_dict) + + return task_dict + + def _convert_to_blueprint_tasks(self, task_dict): + """Turn each normalized entry into the `(func, args, kwargs)` triple that BlueprintGraph expects.""" + if not task_dict: + return {} + + converted = {} + # Shared task key universe for TaskSpec lifting/dedup/reference decisions. + # Keep this as a single shared set for the whole conversion. + self._task_keys = set(task_dict.keys()) + task_keys = self._task_keys + + for key, value in task_dict.items(): + if self._is_dts_node(value): + converted[key] = self._convert_dts_graph_node(key, value, task_keys) + else: + converted[key] = self._convert_sexpr_task(value, task_keys) + + # If any inline TaskSpec Tasks were lifted during conversion, convert them too. + # We do this iteratively because lifting a node can expose further inline Tasks. + while True: + pending = [(k, v) for k, v in self._lifted_nodes.items() if k not in converted] + if not pending: + break + for k, node in pending: + converted[k] = self._convert_dts_graph_node(k, node, task_keys) + + return converted + + def _convert_dts_graph_node(self, key, node, task_keys): + """Translate modern Dask TaskSpec graph nodes into blueprint expressions.""" + if not dts: + raise RuntimeError("Dask TaskSpec support unavailable: dask._task_spec is not installed") + + task_cls = getattr(dts, "Task", None) + alias_cls = getattr(dts, "Alias", None) + literal_cls = getattr(dts, "Literal", None) + datanode_cls = getattr(dts, "DataNode", None) + nested_cls = getattr(dts, "NestedContainer", None) + taskref_cls = getattr(dts, "TaskRef", None) + + if task_cls and isinstance(node, task_cls): + func = self._extract_callable_from_task(node) + if func is None: + raise TypeError(f"Task {key} is missing a callable function/op attribute") + + raw_args = getattr(node, "args", ()) or () + raw_kwargs = getattr(node, "kwargs", {}) or {} + + args = [] + try: + for i, arg in enumerate(raw_args): + args.append(self._unwrap_dts_operand(arg, task_keys, parent_key=key)) + except Exception as e: + raise TypeError( + "Failed to adapt TaskSpec node argument while converting to BlueprintGraph.\n" + f"- parent_task_key: {key!r}\n" + f"- func: {self._safe_repr(func)}\n" + f"- arg_index: {i}\n" + f"- arg_value: {self._safe_repr(arg)}\n" + f"- raw_args: {self._safe_repr(raw_args)}\n" + f"- raw_kwargs: {self._safe_repr(raw_kwargs)}" + ) from e + + kwargs = {} + try: + for k, v in raw_kwargs.items(): + kwargs[k] = self._unwrap_dts_operand(v, task_keys, parent_key=key) + except Exception as e: + raise TypeError( + "Failed to adapt TaskSpec node kwarg while converting to BlueprintGraph.\n" + f"- parent_task_key: {key!r}\n" + f"- func: {self._safe_repr(func)}\n" + f"- kwarg_key: {k!r}\n" + f"- kwarg_value: {self._safe_repr(v)}\n" + f"- raw_args: {self._safe_repr(raw_args)}\n" + f"- raw_kwargs: {self._safe_repr(raw_kwargs)}" + ) from e + + return self._build_expr(func, args, kwargs) + + if alias_cls and isinstance(node, alias_cls): + alias_ref = self._extract_alias_target(node, task_keys) + if alias_ref is None: + raise ValueError(f"Alias {key} is missing a resolvable upstream task") + return self._build_expr(_identity, [alias_ref], {}) + + if datanode_cls and isinstance(node, datanode_cls): + return self._build_expr(_identity, [node.value], {}) + + if literal_cls and isinstance(node, literal_cls): + return self._build_expr(_identity, [node.value], {}) + + if taskref_cls and isinstance(node, taskref_cls): + ref = TaskOutputRef(node.key, getattr(node, "path", ()) or ()) + return self._build_expr(_identity, [ref], {}) + + if nested_cls and isinstance(node, nested_cls): + payload = getattr(node, "value", None) + if payload is None: + payload = getattr(node, "data", None) + value = self._unwrap_dts_operand(payload, task_keys, parent_key=key) + return self._build_expr(_identity, [value], {}) + + return self._build_expr(_identity, [node], {}) + + def _convert_sexpr_task(self, sexpr, task_keys): + """Handle legacy sexpr-style nodes by replacing embedded task keys with `TaskOutputRef`.""" + if not isinstance(sexpr, (list, tuple)) or not sexpr: + raise TypeError(f"Task definition must be a non-empty tuple/list, got {type(sexpr)}") + + func = sexpr[0] + tail = sexpr[1:] + + if tail and isinstance(tail[-1], Mapping): + raw_args, raw_kwargs = tail[:-1], tail[-1] + else: + raw_args, raw_kwargs = tail, {} + + args = tuple(self._wrap_dependency(arg, task_keys) for arg in raw_args) + kwargs = {k: self._wrap_dependency(v, task_keys) for k, v in raw_kwargs.items()} + + return func, args, kwargs + + def _wrap_dependency(self, obj, task_keys): + """Wrap nested objects inside a sexpr when they point at other tasks.""" + if isinstance(obj, TaskOutputRef): + return obj + + if self._should_wrap(obj, task_keys): + return TaskOutputRef(obj) + + if isinstance(obj, list): + return [self._wrap_dependency(v, task_keys) for v in obj] + + if isinstance(obj, tuple): + if obj and callable(obj[0]): + head = obj[0] + tail = tuple(self._wrap_dependency(v, task_keys) for v in obj[1:]) + return (head, *tail) + return tuple(self._wrap_dependency(v, task_keys) for v in obj) + + if isinstance(obj, Mapping): + return {k: self._wrap_dependency(v, task_keys) for k, v in obj.items()} + + if isinstance(obj, set): + return {self._wrap_dependency(v, task_keys) for v in obj} + + if isinstance(obj, frozenset): + return frozenset(self._wrap_dependency(v, task_keys) for v in obj) + + return obj + + def _should_wrap(self, obj, task_keys): + """Decide whether a value should become a `TaskOutputRef`.""" + if isinstance(obj, self._LEAF_TYPES): + if isinstance(obj, str): + hit = obj in task_keys + return hit + return False + try: + hit = obj in task_keys + return hit + except TypeError: + return False + + # Flatten Dask collections into the dict-of-tasks structure the rest of the + # pipeline expects. DAGVine clients often hand us a dict like + # {"result": dask.delayed(...)}; we merge the underlying HighLevelGraphs so + # `ContextGraph` sees the same dict representation C does. + def _dask_collections_to_task_dict(self, task_dict): + """Flatten Dask collections into the classic dict-of-task layout.""" + assert is_dask_collection is not None + from dask.highlevelgraph import HighLevelGraph, ensure_dict + + if not isinstance(task_dict, dict): + raise TypeError("Input must be a dict") + + for k, v in task_dict.items(): + if not is_dask_collection(v): + raise TypeError( + f"Input must be a dict of DaskCollection, but found {k} with type {type(v)}" + ) + + if dts: + sub_hlgs = [v.dask for v in task_dict.values()] + hlg = HighLevelGraph.merge(*sub_hlgs).to_dict() + else: + hlg = dask.base.collections_to_dsk(task_dict.values()) + + return ensure_dict(hlg) + + def _is_dts_node(self, value): + """Return True when the value is part of the TaskSpec family.""" + if not dts: + return False + try: + return isinstance(value, dts.GraphNode) + except AttributeError: + return False + + def _unwrap_dts_operand(self, operand, task_keys, *, parent_key=None): + """Recursively unwrap TaskSpec operands into pure Python values/containers and `TaskOutputRef`. + + Contract (TaskSpec path only): + - TaskRef/Alias become `TaskOutputRef` (references, never lifted). + - Literals/DataNode become plain Python values. + - NestedContainer unwraps recursively. + - Task inside args/kwargs is either: + - treated as a reference when it has a top-level key, or + - reduced to a pure value only for a small "pure constructor/identity" whitelist, or + - lifted into a new top-level node and replaced with `TaskOutputRef(new_key)`. + """ + if not dts: + return operand + + taskref_cls = getattr(dts, "TaskRef", None) + if taskref_cls and isinstance(operand, taskref_cls): + key = getattr(operand, "key", None) + path = getattr(operand, "path", ()) + return TaskOutputRef(key, path or ()) + + alias_cls = getattr(dts, "Alias", None) + if alias_cls and isinstance(operand, alias_cls): + alias_ref = self._extract_alias_target(operand, task_keys) + if alias_ref is None: + raise ValueError("Alias node is missing a valid upstream source") + return alias_ref + + literal_cls = getattr(dts, "Literal", None) + if literal_cls and isinstance(operand, literal_cls): + value = getattr(operand, "value", None) + return value + + datanode_cls = getattr(dts, "DataNode", None) + if datanode_cls and isinstance(operand, datanode_cls): + value = operand.value + return value + + nested_cls = getattr(dts, "NestedContainer", None) + if nested_cls and isinstance(operand, nested_cls): + payload = getattr(operand, "value", None) + if payload is None: + payload = getattr(operand, "data", None) + value = self._unwrap_dts_operand(payload, task_keys, parent_key=parent_key) + return value + + task_cls = getattr(dts, "Task", None) + if task_cls and isinstance(operand, task_cls): + inline_key = getattr(operand, "key", None) + # Rule 3: if it is a real graph node (key is present and in task_keys), + # treat it as a dependency reference. + if inline_key is not None and inline_key in task_keys: + return TaskOutputRef(inline_key, ()) + + # Otherwise it is an inline expression. Reduce if safe, else lift. + func = self._extract_callable_from_task(operand) + if func is None: + out = self._lift_inline_task(operand, task_keys, parent_key=parent_key) + return out + + # Special-case: Dask internal identity-cast wrappers should not be called + # during adaptation. Reduce structurally by unwrapping all args and + # rebuilding the requested container type. This preserves dependency + # edges (critical for WCC) without executing arbitrary code. + if self._is_identity_cast_op(func): + raw_args = getattr(operand, "args", ()) or () + raw_kwargs = getattr(operand, "kwargs", {}) or {} + typ = raw_kwargs.get("typ", None) + + values = [self._unwrap_dts_operand(a, task_keys, parent_key=parent_key) for a in raw_args] + + # Only allow safe container constructors here; otherwise lift. + safe_types = (list, tuple, set, frozenset, dict) + if typ in safe_types: + try: + casted = typ(values) + except Exception: + return self._lift_inline_task(operand, task_keys, parent_key=parent_key) + return casted + + # Unknown/unsafe typ: lift so the worker executes the real op. + return self._lift_inline_task(operand, task_keys, parent_key=parent_key) + + if self._is_pure_value_op(func): + reduced, used_lift = self._reduce_inline_task(operand, task_keys, parent_key=parent_key) + if used_lift: + # Rule 2: if any child required lifting/unknown handling, lift the whole expression. + return self._lift_inline_task(operand, task_keys, parent_key=parent_key) + if self._is_too_large_inline_value(reduced): + return self._lift_inline_task(operand, task_keys, parent_key=parent_key) + return reduced + + # Rule 1: unknown/unsafe op -> must lift. + return self._lift_inline_task(operand, task_keys, parent_key=parent_key) + + if isinstance(operand, list): + return [self._unwrap_dts_operand(v, task_keys, parent_key=parent_key) for v in operand] + + if isinstance(operand, tuple): + return tuple(self._unwrap_dts_operand(v, task_keys, parent_key=parent_key) for v in operand) + + if isinstance(operand, Mapping): + return {k: self._unwrap_dts_operand(v, task_keys, parent_key=parent_key) for k, v in operand.items()} + + if isinstance(operand, set): + return {self._unwrap_dts_operand(v, task_keys, parent_key=parent_key) for v in operand} + + if isinstance(operand, frozenset): + return frozenset(self._unwrap_dts_operand(v, task_keys, parent_key=parent_key) for v in operand) + + return operand + + def _extract_alias_target(self, alias_node, task_keys): + """Discover which upstream key an alias points at and return it as a `TaskOutputRef`.""" + fields = getattr(alias_node.__class__, "__dataclass_fields__", {}) if dts else {} + + path = getattr(alias_node, "path", ()) + path = tuple(path) if path else () + + for candidate in ("alias_of", "target", "source", "ref"): + if candidate in fields: + raw_value = getattr(alias_node, candidate, None) + if self._should_wrap(raw_value, task_keys): + return TaskOutputRef(raw_value, path) + + deps = getattr(alias_node, "dependencies", None) + if deps: + deps = list(deps) + if len(deps) == 1: + return TaskOutputRef(deps[0], path) + + return None + + @staticmethod + def _build_expr(func, args, kwargs): + return func, tuple(args), dict(kwargs) + + @staticmethod + def _safe_repr(value, limit=800): + """Best-effort repr that won't explode logs on huge graphs.""" + try: + text = repr(value) + except Exception as e: + text = f"" + if limit and len(text) > limit: + return text[:limit] + "..." + return text + + @staticmethod + def _is_pure_value_op(func): + """Return True if `func` is safe to execute during adaptation to build a pure value. + + This is intentionally conservative: only pure constructors/identity-like ops. + """ + if func in (dict, list, tuple, set, frozenset): + return True + return False + + @staticmethod + def _is_identity_cast_op(func): + """Detect Dask's private identity-cast op without executing it.""" + name = getattr(func, "__name__", None) + module = getattr(func, "__module__", None) + return bool(name == "_identity_cast" and module and module.startswith("dask")) + + def _reduce_inline_task(self, task_node, task_keys, *, parent_key=None): + """Best-effort reduction of an inline TaskSpec Task into a pure value. + + Returns (value, used_lift) where used_lift indicates a nested operand triggered lifting. + """ + func = self._extract_callable_from_task(task_node) + raw_args = getattr(task_node, "args", ()) or () + raw_kwargs = getattr(task_node, "kwargs", {}) or {} + + used_lift = False + + # unwrap args/kwargs; if we see a lifted ref, mark used_lift (Rule 2). + args = [] + for arg in raw_args: + before = len(self._lifted_nodes) + args.append(self._unwrap_dts_operand(arg, task_keys, parent_key=parent_key)) + used_lift = used_lift or (len(self._lifted_nodes) != before) + + kwargs = {} + for k, v in raw_kwargs.items(): + before = len(self._lifted_nodes) + kwargs[k] = self._unwrap_dts_operand(v, task_keys, parent_key=parent_key) + used_lift = used_lift or (len(self._lifted_nodes) != before) + + # Pure constructors are safe to execute even if they contain TaskOutputRefs + # (they just build containers of refs). Anything else is lifted. + try: + value = func(*args, **kwargs) + except Exception: + # If evaluation fails, prefer lifting over guessing semantics. + return self._lift_inline_task(task_node, task_keys, parent_key=parent_key), True + + return value, used_lift + + @staticmethod + def _is_too_large_inline_value(value, *, max_container_len=2000): + """Heuristic to avoid inlining huge container constructions that would bloat memory.""" + try: + if isinstance(value, (list, tuple, set, frozenset, dict)): + return len(value) > max_container_len + except Exception: + return False + return False + + def _lift_inline_task(self, task_node, task_keys, *, parent_key=None): + """Lift an inline TaskSpec Task into its own node and return a `TaskOutputRef` to it.""" + inline_key = getattr(task_node, "key", None) + if parent_key is not None and inline_key == parent_key: + raise ValueError(f"Refusing to lift Task that would self-reference parent key {parent_key!r}") + + sig = self._dts_structural_signature(task_node, task_keys) + cached = self._lift_cache.get(sig) + if cached is not None: + return TaskOutputRef(cached, ()) + + import hashlib + + digest = hashlib.sha1(sig.encode("utf-8")).hexdigest()[:16] + base = f"__lift__{digest}" + new_key = base + # Collision handling + avoid clobbering existing user keys. + while new_key in task_keys or new_key in self._lifted_nodes: + self._lift_counter += 1 + new_key = f"{base}_{self._lift_counter}" + + self._lift_cache[sig] = new_key + self._lifted_nodes[new_key] = task_node + task_keys.add(new_key) + return TaskOutputRef(new_key, ()) + + def _dts_structural_signature(self, obj, task_keys): + """Best-effort stable signature for deduping lifted inline expressions.""" + # Keep it deterministic and conservative. If we can't make it stable, fall back to repr. + try: + taskref_cls = getattr(dts, "TaskRef", None) + alias_cls = getattr(dts, "Alias", None) + literal_cls = getattr(dts, "Literal", None) + datanode_cls = getattr(dts, "DataNode", None) + nested_cls = getattr(dts, "NestedContainer", None) + task_cls = getattr(dts, "Task", None) + + if taskref_cls and isinstance(obj, taskref_cls): + return f"TaskRef({getattr(obj, 'key', None)!r},{tuple(getattr(obj, 'path', ()) or ())!r})" + if alias_cls and isinstance(obj, alias_cls): + ref = self._extract_alias_target(obj, task_keys) + return f"Alias({getattr(ref, 'task_key', None)!r},{getattr(ref, 'path', ())!r})" + if literal_cls and isinstance(obj, literal_cls): + return f"Literal({self._safe_repr(getattr(obj, 'value', None))})" + if datanode_cls and isinstance(obj, datanode_cls): + return f"DataNode({self._safe_repr(getattr(obj, 'value', None))})" + if nested_cls and isinstance(obj, nested_cls): + payload = getattr(obj, "value", None) + if payload is None: + payload = getattr(obj, "data", None) + return f"Nested({self._dts_structural_signature(payload, task_keys)})" + if task_cls and isinstance(obj, task_cls): + key = getattr(obj, "key", None) + if key is not None and key in task_keys: + return f"TaskKey({key!r})" + func = self._extract_callable_from_task(obj) + func_id = (getattr(func, "__module__", None), getattr(func, "__qualname__", None), getattr(func, "__name__", None)) + args = getattr(obj, "args", ()) or () + kwargs = getattr(obj, "kwargs", {}) or {} + arg_sigs = ",".join(self._dts_structural_signature(a, task_keys) for a in args) + kw_sigs = ",".join(f"{k}={self._dts_structural_signature(v, task_keys)}" for k, v in sorted(kwargs.items())) + return f"TaskInline(func={func_id!r},args=[{arg_sigs}],kwargs=[{kw_sigs}])" + + if isinstance(obj, list): + return "list(" + ",".join(self._dts_structural_signature(v, task_keys) for v in obj) + ")" + if isinstance(obj, tuple): + return "tuple(" + ",".join(self._dts_structural_signature(v, task_keys) for v in obj) + ")" + if isinstance(obj, dict): + items = ",".join( + f"{self._safe_repr(k)}:{self._dts_structural_signature(v, task_keys)}" + for k, v in sorted(obj.items(), key=lambda kv: repr(kv[0])) + ) + return "dict(" + items + ")" + if isinstance(obj, (set, frozenset)): + items = ",".join(sorted(self._dts_structural_signature(v, task_keys) for v in obj)) + return f"{type(obj).__name__}(" + items + ")" + + return f"py({self._safe_repr(obj)})" + except Exception: + return f"fallback({self._safe_repr(obj)})" + + @staticmethod + def _extract_callable_from_task(node): + candidates = ( + "function", + "op", + "callable", + "func", + "operation", + "callable_obj", + ) + + for attr in candidates: + if not hasattr(node, attr): + continue + value = getattr(node, attr) + if value is None: + continue + if callable(value): + return value + if hasattr(value, "__call__"): + return value + + if hasattr(node, "__call__") and callable(node): + return node + + return None diff --git a/taskvine/src/graph/dagvine/blueprint_graph/adaptor_test.py b/taskvine/src/graph/dagvine/blueprint_graph/adaptor_test.py new file mode 100644 index 0000000000..d641ecea64 --- /dev/null +++ b/taskvine/src/graph/dagvine/blueprint_graph/adaptor_test.py @@ -0,0 +1,537 @@ +import importlib +import importlib.util +import pathlib +import sys +import unittest +from dataclasses import dataclass, field + +from ndcctools.taskvine.dagvine.blueprint_graph.adaptor import Adaptor as _Adaptor, TaskOutputRef as _TaskOutputRef + +_MODULE_NAME = "ndcctools.taskvine.dagvine.blueprint_graph.adaptor" +_LOCAL_ADAPTOR = pathlib.Path(__file__).resolve().parent / "adaptor.py" + +if _LOCAL_ADAPTOR.exists(): + spec = importlib.util.spec_from_file_location(_MODULE_NAME, _LOCAL_ADAPTOR) + module = importlib.util.module_from_spec(spec) + assert spec.loader is not None + sys.modules[_MODULE_NAME] = module + spec.loader.exec_module(module) + Adaptor = module.Adaptor + TaskOutputRef = module.TaskOutputRef + adaptor_impl = module +else: + Adaptor = _Adaptor + TaskOutputRef = _TaskOutputRef + adaptor_impl = importlib.import_module(Adaptor.__module__) + + +class AdaptorSexprTests(unittest.TestCase): + def test_empty_graph_returns_empty_dict(self): + self.assertEqual(Adaptor({}).task_dict, {}) + + def test_wraps_references_in_args_and_kwargs(self): + def seed(): + return 1 + + def consume(value, bonus=0): + return value + bonus + + graph = { + "seed": (seed,), + "consumer": (consume, "seed", {"bonus": "seed"}), + } + + adaptor = Adaptor(graph) + adapted = adaptor.task_dict + + seed_func, seed_args, seed_kwargs = adapted["seed"] + self.assertIs(seed_func, seed) + self.assertEqual(seed_args, ()) + self.assertEqual(seed_kwargs, {}) + + func, args, kwargs = adapted["consumer"] + self.assertIs(func, consume) + self.assertEqual(len(args), 1) + self.assertIsInstance(args[0], TaskOutputRef) + self.assertEqual(args[0].task_key, "seed") + self.assertEqual(args[0].path, ()) + + self.assertIsInstance(kwargs["bonus"], TaskOutputRef) + self.assertEqual(kwargs["bonus"].task_key, "seed") + self.assertEqual(kwargs["bonus"].path, ()) + + def test_handles_nested_collections(self): + def aggregate(structure, *, options=None): + return structure, options + + graph = { + "alpha": (lambda: {"value": 1},), + "beta": (lambda: 2,), + "collector": ( + aggregate, + ["alpha", ("beta", "alpha")], + { + "mapping": { + "left": "alpha", + "right": ["beta", {"deep": "alpha"}], + }, + "flags": {"alpha", "unchanged"}, + }, + ), + } + + adaptor = Adaptor(graph) + adapted = adaptor.task_dict + + func, args, kwargs = adapted["collector"] + self.assertIs(func, aggregate) + + self.assertEqual(len(args), 1) + structure = args[0] + self.assertIsInstance(structure, list) + self.assertIsInstance(structure[0], TaskOutputRef) + self.assertEqual(structure[0].task_key, "alpha") + + tuple_fragment = structure[1] + self.assertIsInstance(tuple_fragment, tuple) + self.assertIsInstance(tuple_fragment[0], TaskOutputRef) + self.assertEqual(tuple_fragment[0].task_key, "beta") + self.assertIsInstance(tuple_fragment[1], TaskOutputRef) + self.assertEqual(tuple_fragment[1].task_key, "alpha") + + mapping = kwargs["mapping"] + self.assertIsInstance(mapping["left"], TaskOutputRef) + self.assertEqual(mapping["left"].task_key, "alpha") + + right_list = mapping["right"] + self.assertIsInstance(right_list[0], TaskOutputRef) + self.assertEqual(right_list[0].task_key, "beta") + self.assertIsInstance(right_list[1]["deep"], TaskOutputRef) + self.assertEqual(right_list[1]["deep"].task_key, "alpha") + + flags = kwargs["flags"] + self.assertIsInstance(flags, set) + ref_keys = {item.task_key for item in flags if isinstance(item, TaskOutputRef)} + self.assertEqual(ref_keys, {"alpha"}) + self.assertIn("unchanged", flags) + + def test_literal_strings_remain_literals(self): + def attach_unit(value, *, unit): + return value, unit + + graph = { + "value": (lambda: 42,), + "with_unit": ( + attach_unit, + "value", + {"unit": "kg"}, + ), + } + + adaptor = Adaptor(graph) + adapted = adaptor.task_dict + func, args, kwargs = adapted["with_unit"] + self.assertIs(func, attach_unit) + self.assertEqual(len(args), 1) + self.assertIsInstance(args[0], TaskOutputRef) + self.assertEqual(kwargs["unit"], "kg") + + def test_existing_task_output_ref_is_preserved(self): + original_ref = TaskOutputRef("seed") + + graph = { + "seed": (lambda: 5,), + "forward": (lambda x: x, original_ref), + } + + adapted = Adaptor(graph).task_dict + func, args, kwargs = adapted["forward"] + self.assertIs(func, graph["forward"][0]) + self.assertEqual(kwargs, {}) + self.assertIs(args[0], original_ref) + + def test_sets_and_frozensets_are_rewritten(self): + graph = { + "seed": (lambda: 1,), + "consumer": ( + lambda payload, *, meta=None: (payload, meta), + ( + { + "set_refs": {"seed", "literal"}, + "froze_refs": frozenset({"seed"}), + }, + ), + { + "meta": { + "labels": {"seed", "plain"}, + "deep": frozenset({"seed"}), + } + }, + ), + } + + adapted = Adaptor(graph).task_dict + func, args, kwargs = adapted["consumer"] + self.assertEqual(len(args), 1) + + self.assertIsInstance(args[0], tuple) + payload = args[0][0] + set_refs = payload["set_refs"] + self.assertIsInstance(set_refs, set) + self.assertIn("literal", set_refs) + refs = [item for item in set_refs if isinstance(item, TaskOutputRef)] + self.assertEqual(len(refs), 1) + self.assertEqual(refs[0].task_key, "seed") + + froze_refs = payload["froze_refs"] + self.assertIsInstance(froze_refs, frozenset) + sole_ref = next(iter(froze_refs)) + self.assertIsInstance(sole_ref, TaskOutputRef) + self.assertEqual(sole_ref.task_key, "seed") + + labels = kwargs["meta"]["labels"] + self.assertIsInstance(labels, set) + label_refs = [item for item in labels if isinstance(item, TaskOutputRef)] + self.assertEqual(len(label_refs), 1) + self.assertEqual(label_refs[0].task_key, "seed") + self.assertIn("plain", labels) + + deep_froze = kwargs["meta"]["deep"] + self.assertIsInstance(deep_froze, frozenset) + deep_ref = next(iter(deep_froze)) + self.assertIsInstance(deep_ref, TaskOutputRef) + self.assertEqual(deep_ref.task_key, "seed") + + def test_callable_tuple_preserves_callable(self): + def source(): + return 2 + + def apply(func_tuple): + fn, value = func_tuple + return fn(value) + + increment = lambda x: x + 1 # noqa: E731 + + graph = { + "value": (source,), + "result": ( + apply, + (increment, "value"), + ), + } + + adapted = Adaptor(graph).task_dict + func, args, kwargs = adapted["result"] + self.assertIs(func, apply) + nested = args[0] + self.assertIsInstance(nested, tuple) + self.assertIs(nested[0], increment) + self.assertIsInstance(nested[1], TaskOutputRef) + self.assertEqual(nested[1].task_key, "value") + + def test_invalid_task_definition_raises(self): + graph = {"broken": []} + with self.assertRaises(TypeError): + Adaptor(graph) + + def test_large_graph_scaling(self): + size = 500 + + graph = {"root": (lambda: 1,)} + for i in range(1, size + 1): + key = f"node_{i}" + prev_key = "root" if i == 1 else f"node_{i - 1}" + graph[key] = ( + lambda x, inc=1: x + inc, + prev_key, + {"inc": i}, + ) + + graph["fanout"] = ( + lambda *vals: sum(vals), + tuple(graph.keys()), + ) + + adapted = Adaptor(graph).task_dict + + self.assertEqual(len(adapted), len(graph)) + + fanout_func, fanout_args, fanout_kwargs = adapted["fanout"] + self.assertEqual(fanout_kwargs, {}) + self.assertEqual(len(fanout_args), 1) + arg_tuple = fanout_args[0] + self.assertEqual(len(arg_tuple), len(graph) - 1) + refs = [item for item in arg_tuple if isinstance(item, TaskOutputRef)] + self.assertEqual(len(refs), len(graph) - 1) + ref_keys = {ref.task_key for ref in refs} + expected_keys = set(graph.keys()) - {"fanout"} + self.assertEqual(ref_keys, expected_keys) + + +class _FakeGraphNode: + __slots__ = () + + +@dataclass +class _FakeTaskRef(_FakeGraphNode): + key: str + path: tuple = field(default_factory=tuple) + + +@dataclass +class _FakeAlias(_FakeGraphNode): + target: str + path: tuple = field(default_factory=tuple) + dependencies: frozenset = field(default_factory=frozenset) + + def __post_init__(self): + if not self.dependencies: + self.dependencies = frozenset({self.target}) + + +@dataclass +class _FakeLiteral(_FakeGraphNode): + value: object + + +@dataclass +class _FakeDataNode(_FakeGraphNode): + value: object + + +@dataclass +class _FakeNestedContainer(_FakeGraphNode): + value: object + + +@dataclass +class _FakeTask(_FakeGraphNode): + key: str + function: object + args: tuple = field(default_factory=tuple) + kwargs: dict = field(default_factory=dict) + dependencies: frozenset = field(default_factory=frozenset) + + def __post_init__(self): + if not self.dependencies: + deps = set() + for arg in self.args: + if isinstance(arg, _FakeTaskRef): + deps.add(arg.key) + for value in self.kwargs.values(): + if isinstance(value, _FakeTaskRef): + deps.add(value.key) + self.dependencies = frozenset(deps) + + +class _FakeDtsModule: + GraphNode = _FakeGraphNode + Task = _FakeTask + TaskRef = _FakeTaskRef + Alias = _FakeAlias + Literal = _FakeLiteral + DataNode = _FakeDataNode + NestedContainer = _FakeNestedContainer + + @staticmethod + def convert_legacy_graph(task_dict): + return task_dict + + +class AdaptorTaskSpecTests(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls._original_dts = adaptor_impl.dts + adaptor_impl.dts = _FakeDtsModule() + + @classmethod + def tearDownClass(cls): + adaptor_impl.dts = cls._original_dts + + def test_adapts_taskspec_graph(self): + def add_bonus(value, bonus): + return value + bonus + + def combine(upstream, payload=None): + return upstream, payload + + graph = { + "raw": _FakeDataNode(value=7), + "literal_wrapper": _FakeTask( + key="literal_wrapper", + function=add_bonus, + args=(_FakeTaskRef("raw"),), + kwargs={"bonus": _FakeLiteral(3)}, + ), + "alias": _FakeAlias(target="literal_wrapper", path=("result",)), + "aggregate": _FakeTask( + key="aggregate", + function=combine, + args=(_FakeTaskRef("alias", path=("payload",)),), + kwargs={ + "payload": _FakeNestedContainer( + value=[ + _FakeTaskRef("raw"), + {"inner": _FakeTaskRef("alias", path=("extra",))}, + ] + ) + }, + ), + } + + adaptor = Adaptor(graph) + adapted = adaptor.task_dict + + raw_func, raw_args, raw_kwargs = adapted["raw"] + self.assertEqual(raw_args, (7,)) + self.assertEqual(raw_kwargs, {}) + self.assertEqual(raw_func(raw_args[0]), raw_args[0]) + + wrapper_func, wrapper_args, wrapper_kwargs = adapted["literal_wrapper"] + self.assertIs(wrapper_func, add_bonus) + self.assertEqual(len(wrapper_args), 1) + self.assertIsInstance(wrapper_args[0], TaskOutputRef) + self.assertEqual(wrapper_args[0].task_key, "raw") + self.assertEqual(wrapper_kwargs["bonus"], 3) + + self.assertIn("alias", adapted) + alias_func, alias_args, alias_kwargs = adapted["alias"] + self.assertIs(alias_func, adaptor_impl._identity) + self.assertEqual(alias_kwargs, {}) + self.assertEqual(len(alias_args), 1) + alias_input = alias_args[0] + self.assertIsInstance(alias_input, TaskOutputRef) + self.assertEqual(alias_input.task_key, "literal_wrapper") + self.assertEqual(alias_input.path, ("result",)) + + agg_func, agg_args, agg_kwargs = adapted["aggregate"] + self.assertIs(agg_func, combine) + self.assertEqual(len(agg_args), 1) + primary_input = agg_args[0] + self.assertIsInstance(primary_input, TaskOutputRef) + self.assertEqual(primary_input.task_key, "alias") + self.assertEqual(primary_input.path, ("payload",)) + + payload = agg_kwargs["payload"] + self.assertIsInstance(payload, list) + self.assertIsInstance(payload[0], TaskOutputRef) + self.assertEqual(payload[0].task_key, "raw") + nested_inner = payload[1]["inner"] + self.assertIsInstance(nested_inner, TaskOutputRef) + self.assertEqual(nested_inner.task_key, "alias") + self.assertEqual(nested_inner.path, ("extra",)) + + def test_taskspec_data_node_literal_passthrough(self): + graph = {"literal": _FakeDataNode(value=11)} + adapted = Adaptor(graph).task_dict + func, args, kwargs = adapted["literal"] + self.assertEqual(args, (11,)) + self.assertEqual(kwargs, {}) + self.assertEqual(func(args[0]), 11) + + def test_taskspec_alias_with_missing_target_raises(self): + alias = _FakeAlias(target="ghost", dependencies=frozenset()) + alias.dependencies = frozenset() + graph = {"alias": alias} + with self.assertRaises(ValueError): + Adaptor(graph) + + def test_taskspec_nested_container_fallback_to_data(self): + container = _FakeNestedContainer(value=None) + container.data = [_FakeTaskRef("raw")] + graph = { + "raw": _FakeDataNode(value=5), + "use_container": _FakeTask( + key="use_container", + function=lambda payload: payload, + args=(container,), + ), + } + + adapted = Adaptor(graph).task_dict + func, args, kwargs = adapted["use_container"] + self.assertEqual(kwargs, {}) + (payload,) = args + self.assertIsInstance(payload, list) + self.assertIsInstance(payload[0], TaskOutputRef) + self.assertEqual(payload[0].task_key, "raw") + + def test_taskspec_task_missing_function_raises(self): + graph = { + "broken": _FakeTask( + key="broken", + function=None, + args=(), + ) + } + with self.assertRaises(TypeError): + Adaptor(graph) + + def test_taskspec_task_nested_inside_args_is_lifted(self): + # Inline Tasks inside args/kwargs should be lifted unless they are a top-level key + # reference or a pure value constructor. Here `inner` is an inline task with + # an unknown (lambda) op, so it must be lifted to a new node. + inner = _FakeTask( + key=None, + function=lambda: 1, + args=(), + ) + graph = { + "outer": _FakeTask( + key="outer", + function=lambda x: x, + args=(inner,), + ) + } + adapted = Adaptor(graph).task_dict + outer_func, outer_args, outer_kwargs = adapted["outer"] + self.assertEqual(outer_kwargs, {}) + self.assertEqual(len(outer_args), 1) + self.assertIsInstance(outer_args[0], TaskOutputRef) + lifted_key = outer_args[0].task_key + self.assertIn(lifted_key, adapted) + self.assertNotEqual(lifted_key, "outer") + + def test_taskspec_identity_cast_is_structurally_reduced(self): + # Ensure we never execute dask private identity-cast during adaptation. + def fake_identity_cast(x, *_, **__): + raise RuntimeError("must not be executed") + + fake_identity_cast.__name__ = "_identity_cast" + fake_identity_cast.__module__ = "dask._fake" + + graph = { + "raw0": _FakeDataNode(value=5), + "raw1": _FakeDataNode(value=6), + "outer": _FakeTask( + key="outer", + function=lambda x: x, + args=( + _FakeTask( + key=None, + function=fake_identity_cast, + args=( + _FakeTaskRef("raw0"), + _FakeTaskRef("raw1"), + ), + kwargs={"typ": list}, + ), + ), + ), + } + + adapted = Adaptor(graph).task_dict + _, outer_args, outer_kwargs = adapted["outer"] + self.assertEqual(outer_kwargs, {}) + self.assertEqual(len(outer_args), 1) + self.assertIsInstance(outer_args[0], list) + self.assertEqual(len(outer_args[0]), 2) + self.assertIsInstance(outer_args[0][0], TaskOutputRef) + self.assertIsInstance(outer_args[0][1], TaskOutputRef) + self.assertEqual(outer_args[0][0].task_key, "raw0") + self.assertEqual(outer_args[0][1].task_key, "raw1") + self.assertFalse(any(str(k).startswith("__lift__") for k in adapted.keys())) + + +if __name__ == "__main__": + unittest.main() diff --git a/taskvine/src/graph/dagvine/blueprint_graph/blueprint_graph.py b/taskvine/src/graph/dagvine/blueprint_graph/blueprint_graph.py new file mode 100644 index 0000000000..0ff1c6caa1 --- /dev/null +++ b/taskvine/src/graph/dagvine/blueprint_graph/blueprint_graph.py @@ -0,0 +1,206 @@ +# Copyright (C) 2025- The University of Notre Dame +# This software is distributed under the GNU General Public License. +# See the file COPYING for details. + +from collections import defaultdict, deque +from collections.abc import Mapping +from dataclasses import is_dataclass, fields, replace +import cloudpickle + + +# Lightweight wrapper around task results that optionally pads the payload. The +# padding lets tests model large outputs without altering the logical result. +class TaskOutputWrapper: + def __init__(self, result, extra_size_mb=None): + self.result = result + self.extra_obj = bytearray(int(extra_size_mb * 1024 * 1024)) if extra_size_mb and extra_size_mb > 0 else None + + @staticmethod + def load_from_path(path): + try: + with open(path, "rb") as f: + result_obj = cloudpickle.load(f) + assert isinstance(result_obj, TaskOutputWrapper), "Loaded object is not of type TaskOutputWrapper" + return result_obj.result + except FileNotFoundError: + raise FileNotFoundError(f"Task result file not found at {path}") + + +# A reference to a task output. This is used to represent the output of a task as a dependency of another task. +class TaskOutputRef: + __slots__ = ("task_key", "path") + + def __init__(self, task_key, path=()): + self.task_key = task_key + self.path = tuple(path) + + def __getitem__(self, key): + if isinstance(key, tuple): + return TaskOutputRef(self.task_key, self.path + key) + return TaskOutputRef(self.task_key, self.path + (key,)) + + +# The BlueprintGraph is a directed acyclic graph (DAG) that represents the logical dependencies between tasks. +# It is used to build the C vine graph. +class BlueprintGraph: + + _LEAF_TYPES = (str, bytes, bytearray, memoryview, int, float, bool, type(None)) + + def __init__(self): + self.task_dict = {} # task_key -> (func, frozen_args, frozen_kwargs) + + self.parents_of = defaultdict(set) # task_key -> set of task_keys + self.children_of = defaultdict(set) # task_key -> set of task_keys + + self.producer_of = {} # filename -> task_key + self.consumers_of = defaultdict(set) # filename -> set of task_keys + + self.outfile_remote_name = defaultdict(lambda: None) # task_key -> remote outfile name, will be set by vine graph + + self.pykey2cid = {} # py_key -> c_id + self.cid2pykey = {} # c_id -> py_key + + self.extra_task_output_size_mb = {} # task_key -> extra size in MB + self.extra_task_sleep_time = {} # task_key -> extra sleep time in seconds + + def _visit_task_output_refs(self, obj, on_ref, *, rewrite: bool): + seen = set() + + def rec(x): + if isinstance(x, TaskOutputRef): + return on_ref(x) + + if x is None or isinstance(x, self._LEAF_TYPES): + return x if rewrite else None + + oid = id(x) + if oid in seen: + return x if rewrite else None + seen.add(oid) + + if isinstance(x, Mapping): + for k in x.keys(): + if isinstance(k, TaskOutputRef): + raise ValueError("TaskOutputRef cannot be used as dict key") + if not rewrite: + for v in x.values(): + rec(v) + return None + return {k: rec(v) for k, v in x.items()} + + if is_dataclass(x) and not isinstance(x, type): + if not rewrite: + for f in fields(x): + rec(getattr(x, f.name)) + return None + updates = {f.name: rec(getattr(x, f.name)) for f in fields(x)} + try: + return replace(x, **updates) + except Exception: + return x.__class__(**updates) + + if isinstance(x, tuple) and hasattr(x, "_fields"): # namedtuple + if not rewrite: + for v in x: + rec(v) + return None + return x.__class__(*(rec(v) for v in x)) + + if isinstance(x, (list, tuple, set, frozenset, deque)): + if not rewrite: + for v in x: + rec(v) + return None + it = (rec(v) for v in x) + if isinstance(x, list): + return list(it) + if isinstance(x, tuple): + return tuple(it) + if isinstance(x, set): + return set(it) + if isinstance(x, frozenset): + return frozenset(it) + return deque(it) + + return x if rewrite else None + + return rec(obj) + + def _find_parents(self, obj): + parents = set() + + def on_ref(r): + parents.add(r.task_key) + return None + + self._visit_task_output_refs(obj, on_ref, rewrite=False) + return parents + + def add_task(self, task_key, func, *args, **kwargs): + if task_key in self.task_dict: + raise ValueError(f"Task {task_key} already exists") + + self.task_dict[task_key] = (func, args, kwargs) + + parents = self._find_parents(args) | self._find_parents(kwargs) + + for parent in parents: + self.parents_of[task_key].add(parent) + self.children_of[parent].add(task_key) + + def task_produces(self, task_key, *filenames): + for filename in filenames: + # a file can only be produced by one task + if filename in self.producer_of: + raise ValueError(f"File {filename} already produced by task {self.producer_of[filename]}") + self.producer_of[filename] = task_key + + def task_consumes(self, task_key, *filenames): + for filename in filenames: + # a file can be consumed by multiple tasks + self.consumers_of[filename].add(task_key) + + def save_task_output(self, task_key, output): + with open(self.outfile_remote_name[task_key], "wb") as f: + wrapped_output = TaskOutputWrapper(output, extra_size_mb=self.extra_task_output_size_mb[task_key]) + cloudpickle.dump(wrapped_output, f) + + def load_task_output(self, task_key): + return TaskOutputWrapper.load_from_path(self.outfile_remote_name[task_key]) + + def get_topological_order(self): + indegree = {} + for task_key in self.task_dict: + indegree[task_key] = len(self.parents_of.get(task_key, ())) + + q = deque(t for t, d in indegree.items() if d == 0) + order = [] + + while q: + u = q.popleft() + order.append(u) + + for v in self.children_of.get(u, ()): + indegree[v] -= 1 + if indegree[v] == 0: + q.append(v) + + if len(order) != len(self.task_dict): + raise ValueError("Graph has a cycle or missing dependencies") + + return order + + def verify_topo(g, topo): + pos = {k: i for i, k in enumerate(topo)} + for child, parents in g.parents_of.items(): + for p in parents: + if pos[p] > pos[child]: + raise AssertionError(f"bad topo: parent {p} after child {child}") + print("topo verified: ok") + + def finalize(self): + # build the dependencies determined by files produced and consumed + for file, producer in self.producer_of.items(): + for consumer in self.consumers_of.get(file, ()): + self.parents_of[consumer].add(producer) + self.children_of[producer].add(consumer) diff --git a/taskvine/src/graph/dagvine/blueprint_graph/proxy_functions.py b/taskvine/src/graph/dagvine/blueprint_graph/proxy_functions.py new file mode 100644 index 0000000000..0c50e6201c --- /dev/null +++ b/taskvine/src/graph/dagvine/blueprint_graph/proxy_functions.py @@ -0,0 +1,50 @@ +# Copyright (C) 2025- The University of Notre Dame +# This software is distributed under the GNU General Public License. +# See the file COPYING for details. + + +from ndcctools.taskvine.utils import load_variable_from_library +import time + + +def compute_task(bg, task_expr): + func, args, kwargs = task_expr + cache = {} + + def _follow_path(value, path): + current = value + for token in path: + if isinstance(current, (list, tuple)): + current = current[token] + elif isinstance(current, dict): + current = current[token] + else: + current = getattr(current, token) + return current + + def on_ref(r): + x = cache.get(r.task_key) + if x is None: + x = bg.load_task_output(r.task_key) + cache[r.task_key] = x + if r.path: + return _follow_path(x, r.path) + return x + + r_args = bg._visit_task_output_refs(args, on_ref, rewrite=True) + r_kwargs = bg._visit_task_output_refs(kwargs, on_ref, rewrite=True) + + return func(*r_args, **r_kwargs) + + +def compute_single_key(vine_key): + bg = load_variable_from_library('graph') + + task_key = bg.cid2pykey[vine_key] + task_expr = bg.task_dict[task_key] + + output = compute_task(bg, task_expr) + + time.sleep(bg.extra_task_sleep_time[task_key]) + + bg.save_task_output(task_key, output) diff --git a/taskvine/src/graph/dagvine/blueprint_graph/proxy_library.py b/taskvine/src/graph/dagvine/blueprint_graph/proxy_library.py new file mode 100644 index 0000000000..78a7b71ca8 --- /dev/null +++ b/taskvine/src/graph/dagvine/blueprint_graph/proxy_library.py @@ -0,0 +1,94 @@ +# Copyright (C) 2025- The University of Notre Dame +# This software is distributed under the GNU General Public License. +# See the file COPYING for details. + +import os +import uuid +import cloudpickle +import types +import time +import random +import hashlib +import collections + +from ndcctools.taskvine.dagvine.blueprint_graph.blueprint_graph import BlueprintGraph, TaskOutputRef, TaskOutputWrapper +from ndcctools.taskvine.dagvine.blueprint_graph.proxy_functions import compute_single_key +from ndcctools.taskvine.utils import load_variable_from_library + + +class ProxyLibrary: + def __init__(self, py_manager): + self.py_manager = py_manager + + self.name = None + self.libcores = None + + self.libtask = None + + # these modules are always included in the preamble of the library task, so that function calls can execute directly + # using the loaded context without importing them over and over again + self.hoisting_modules = [ + os, cloudpickle, BlueprintGraph, TaskOutputRef, TaskOutputWrapper, uuid, hashlib, random, types, collections, time, + load_variable_from_library, compute_single_key + ] + + # environment files serve as additional inputs to the library task, where each key is the local path and the value is the remote path + # those local files will be sent remotely to the workers so tasks can access them as appropriate + self.env_files = {} + + # context loader is a function that will be used to load the library context on remote nodes. + self.context_loader_func = None + self.context_loader_args = [] + self.context_loader_kwargs = {} + + self.local_path = None + self.remote_path = None + + def set_libcores(self, libcores): + self.libcores = libcores + + def set_name(self, name): + self.name = name + + def add_hoisting_modules(self, new_modules): + assert isinstance(new_modules, list), "new_modules must be a list of modules" + self.hoisting_modules.extend(new_modules) + + def add_env_files(self, new_env_files): + assert isinstance(new_env_files, dict), "new_env_files must be a dictionary" + self.env_files.update(new_env_files) + + def set_context_loader(self, context_loader_func, context_loader_args=[], context_loader_kwargs={}): + self.context_loader_func = context_loader_func + self.context_loader_args = context_loader_args + self.context_loader_kwargs = context_loader_kwargs + + def get_context_size(self): + dumped_data = self.context_loader_args[0] + serialized = round(len(dumped_data) / 1024 / 1024, 2) + return serialized + + def install(self): + assert self.name is not None, "Library name must be set before installing (use set_name method)" + assert self.libcores is not None, "Library cores must be set before installing (use set_libcores method)" + + self.libtask = self.py_manager.create_library_from_functions( + self.name, + compute_single_key, + library_context_info=[self.context_loader_func, self.context_loader_args, self.context_loader_kwargs], + add_env=False, + function_infile_load_mode="json", + hoisting_modules=self.hoisting_modules, + ) + for local, remote in self.env_files.items(): + # check if the local file exists + if not os.path.exists(local): + raise FileNotFoundError(f"Local file {local} not found") + # attach as the input file to the library task + self.libtask.add_input(self.py_manager.declare_file(local, cache=True, peer_transfer=True), remote) + self.libtask.set_cores(self.libcores) + self.libtask.set_function_slots(self.libcores) + self.py_manager.install_library(self.libtask) + + def uninstall(self): + self.py_manager.remove_library(self.name) diff --git a/taskvine/src/graph/dagvine/dagvine.py b/taskvine/src/graph/dagvine/dagvine.py new file mode 100644 index 0000000000..b6b668b890 --- /dev/null +++ b/taskvine/src/graph/dagvine/dagvine.py @@ -0,0 +1,305 @@ +# Copyright (C) 2025 The University of Notre Dame +# This software is distributed under the GNU General Public License. +# See the file COPYING for details. + +from ndcctools.taskvine import cvine +from ndcctools.taskvine.manager import Manager + +from ndcctools.taskvine.dagvine.blueprint_graph.adaptor import Adaptor +from ndcctools.taskvine.dagvine.blueprint_graph.proxy_library import ProxyLibrary +from ndcctools.taskvine.dagvine.blueprint_graph.proxy_functions import compute_single_key +from ndcctools.taskvine.dagvine.blueprint_graph.blueprint_graph import BlueprintGraph, TaskOutputWrapper +from ndcctools.taskvine.dagvine.vine_graph.vine_graph_client import VineGraphClient + +import cloudpickle +import os +import signal +import json +import random +import time + + +def context_loader_func(graph_pkl): + graph = cloudpickle.loads(graph_pkl) + + return { + "graph": graph, + } + + +def delete_all_files(root_dir): + """Clean the run-info template directory between runs so stale files never leak into a new DAG.""" + if not os.path.exists(root_dir): + return + for dirpath, dirnames, filenames in os.walk(root_dir): + for filename in filenames: + file_path = os.path.join(dirpath, filename) + try: + os.remove(file_path) + except FileNotFoundError: + print(f"Failed to delete file {file_path}") + + +# Nicely format terminal output when printing manager metadata. +def color_text(text, color_code): + """Render a colored string for the friendly status banners Vineyard prints at start-up.""" + return f"\033[{color_code}m{text}\033[0m" + + +class GraphParams: + def __init__(self): + """Hold all tweakable knobs (manager-side, vine_graph-side, and misc).""" + # Manager-level knobs: fed into `Manager.tune(...)` before execution. + self.vine_manager_tuning_params = { + "worker-source-max-transfers": 100, + "max-retrievals": -1, + "prefer-dispatch": 1, + "transient-error-interval": 1, + "attempt-schedule-depth": 10000, + "temp-replica-count": 1, + "enforce-worker-eviction-interval": -1, + "shift-disk-load": 0, + "clean-redundant-replicas": 0, + } + # VineGraph-level knobs: forwarded to the underlying vine graph via VineGraphClient. + self.vine_graph_tuning_params = { + "failure-injection-step-percent": -1, + "task-priority-mode": "largest-input-first", + "prune-depth": 1, + "output-dir": "./outputs", + "checkpoint-dir": "./checkpoints", + "checkpoint-fraction": 0, + "progress-bar-update-interval-sec": 0.1, + "time-metrics-filename": 0, + "enable-debug-log": 1, + "auto-recovery": 1, + "max-retry-attempts": 15, + "retry-interval-sec": 1, + } + # Misc knobs used purely on the Python side (e.g., generate fake outputs). + self.other_params = { + "schedule": "worst", + "libcores": 16, + "failure-injection-step-percent": -1, + "extra-task-output-size-mb": [0, 0], + "extra-task-sleep-time": [0, 0], + } + + def print_params(self): + """Dump current knob values to stdout for debugging.""" + all_params = {**self.vine_manager_tuning_params, **self.vine_graph_tuning_params, **self.other_params} + print(json.dumps(all_params, indent=4)) + + def update_param(self, param_name, new_value): + """Update a single knob, falling back to manager-level if unknown.""" + if param_name in self.vine_manager_tuning_params: + self.vine_manager_tuning_params[param_name] = new_value + elif param_name in self.vine_graph_tuning_params: + self.vine_graph_tuning_params[param_name] = new_value + elif param_name in self.other_params: + self.other_params[param_name] = new_value + else: + self.vine_manager_tuning_params[param_name] = new_value + + def get_value_of(self, param_name): + """Helper so DAGVine can pull a knob value without caring where it lives.""" + if param_name in self.vine_manager_tuning_params: + return self.vine_manager_tuning_params[param_name] + elif param_name in self.vine_graph_tuning_params: + return self.vine_graph_tuning_params[param_name] + elif param_name in self.other_params: + return self.other_params[param_name] + else: + raise ValueError(f"Invalid param name: {param_name}") + + +class DAGVine(Manager): + def __init__(self, + *args, + **kwargs): + """Spin up a TaskVine manager that knows how to mirror a Python DAG into the C orchestration layer.""" + + # React to Ctrl+C so we can tear down the graphs cleanly. + signal.signal(signal.SIGINT, self._on_sigint) + + self.params = GraphParams() + + # Ensure run-info templates don't accumulate garbage between runs. + run_info_path = kwargs.get("run_info_path", None) + run_info_template = kwargs.get("run_info_template", None) + + self.run_info_template_path = os.path.join(run_info_path, run_info_template) + if self.run_info_template_path: + delete_all_files(self.run_info_template_path) + + # Boot the underlying TaskVine manager. The TaskVine manager keeps alive until the dagvine object is destroyed + super().__init__(*args, **kwargs) + self.runtime_directory = cvine.vine_get_runtime_directory(self._taskvine) + + print(f"=== Manager name: {color_text(self.name, 92)}") + print(f"=== Manager port: {color_text(self.port, 92)}") + print(f"=== Runtime directory: {color_text(self.runtime_directory, 92)}") + self._sigint_received = False + + def param(self, param_name): + """Convenience accessor so callers can read tuned parameters at runtime.""" + return self.params.get_value_of(param_name) + + def update_params(self, new_params): + """Apply a batch of overrides before constructing graphs. + + All parameter dictionaries—whether set via `update_params()` or passed + to `run(..., params={...})`—flow through here. We funnel each key into + the appropriate bucket (manager/vine_graph/misc). Subsequent runs can override + them by calling this again. + """ + assert isinstance(new_params, dict), "new_params must be a dict" + for k, new_v in new_params.items(): + self.params.update_param(k, new_v) + + def tune_manager(self): + """Push our manager-side tuning knobs into the C layer.""" + for k, v in self.params.vine_manager_tuning_params.items(): + try: + self.tune(k, v) + except Exception: + raise ValueError(f"Unrecognized parameter: {k}") + + def tune_vine_graph(self, vine_graph): + """Push VineGraph-specific tuning knobs before we build the graph.""" + for k, v in self.params.vine_graph_tuning_params.items(): + vine_graph.tune(k, str(v)) + + def build_blueprint_graph(self, task_dict): + if isinstance(task_dict, BlueprintGraph): + bg = task_dict + else: + bg = BlueprintGraph() + + for k, v in task_dict.items(): + func, args, kwargs = v + assert callable(func), f"Task {k} does not have a callable" + bg.add_task(k, func, *args, **kwargs) + + bg.finalize() + + return bg + + def build_vine_graph(self, py_graph, target_keys): + """Mirror the Python graph into VineGraph, preserving ordering and targets.""" + assert py_graph is not None, "Python graph must be built before building the VineGraph" + + vine_graph = VineGraphClient(self._taskvine) + + vine_graph.set_proxy_function(compute_single_key) + + # Tune both manager and vine_graph before we start adding nodes/edges. + self.tune_manager() + self.tune_vine_graph(vine_graph) + + topo_order = py_graph.get_topological_order() + + # Build the cross-language mapping as we walk the topo order. + for k in topo_order: + node_id = vine_graph.add_node(k) + py_graph.pykey2cid[k] = node_id + py_graph.cid2pykey[node_id] = k + for pk in py_graph.parents_of[k]: + vine_graph.add_dependency(pk, k) + + # Now that every node is present, mark which ones are final outputs. + for k in target_keys: + vine_graph.set_target(k) + + vine_graph.compute_topology_metrics() + + return vine_graph + + def build_graphs(self, task_dict, target_keys): + """Create both the python side graph and its C counterpart, wiring outputs for later use.""" + # Build the python side graph. + py_graph = self.build_blueprint_graph(task_dict) + + # filter out target keys that are not in the collection dict + missing_keys = [k for k in target_keys if k not in py_graph.task_dict] + if missing_keys: + print(f"=== Warning: the following target keys are not in the graph: {','.join(map(str, missing_keys))}") + target_keys = list(set(target_keys) - set(missing_keys)) + + # Build the c side graph. + vine_graph = self.build_vine_graph(py_graph, target_keys) + + # Cross-fill the outfile locations so the runtime graph knows where to read/write. + for k in py_graph.pykey2cid: + outfile_remote_name = vine_graph.get_node_outfile_remote_name(k) + py_graph.outfile_remote_name[k] = outfile_remote_name + + # For each task, declare the input and output files in the vine graph + for filename in py_graph.producer_of: + task_key = py_graph.producer_of[filename] + print(f"adding output file {filename} to task {task_key}") + vine_graph.add_task_output(task_key, filename) + for filename in py_graph.consumers_of: + for task_key in py_graph.consumers_of[filename]: + print(f"adding input file {filename} to task {task_key}") + vine_graph.add_task_input(task_key, filename) + + return py_graph, vine_graph + + def create_proxy_library(self, py_graph, vine_graph, hoisting_modules, env_files): + """Package up the python side graph as a TaskVine library.""" + proxy_library = ProxyLibrary(self) + proxy_library.add_hoisting_modules(hoisting_modules) + proxy_library.add_env_files(env_files) + proxy_library.set_context_loader(context_loader_func, context_loader_args=[cloudpickle.dumps(py_graph)]) + proxy_library.set_libcores(self.param("libcores")) + proxy_library.set_name(vine_graph.get_proxy_library_name()) + + return proxy_library + + def run(self, task_dict, target_keys=[], params={}, hoisting_modules=[], env_files={}, adapt_dask=False): + """High-level entry point: normalise input, build graphs, ship the library, execute, and return results.""" + time_start = time.time() + + # first update the params so that they can be used for the following construction + self.update_params(params) + + if adapt_dask: + task_dict = Adaptor(task_dict).converted + + # Build both the Python DAG and its C mirror. + py_graph, vine_graph = self.build_graphs(task_dict, target_keys) + + # set extra task output size and sleep time for each task + for k in py_graph.task_dict: + py_graph.extra_task_output_size_mb[k] = random.uniform(*self.param("extra-task-output-size-mb")) + py_graph.extra_task_sleep_time[k] = random.uniform(*self.param("extra-task-sleep-time")) + + # Ship the execution context to workers via a proxy library + proxy_library = self.create_proxy_library(py_graph, vine_graph, hoisting_modules, env_files) + proxy_library.install() + + try: + print(f"=== Library serialized size: {color_text(proxy_library.get_context_size(), 92)} MB") + print(f"Time taken to initialize the graph in Python: {time.time() - time_start:.6f} seconds") + vine_graph.execute() + results = {} + for k in target_keys: + if k not in py_graph.task_dict: + continue + outfile_path = os.path.join(self.param("output-dir"), py_graph.outfile_remote_name[k]) + results[k] = TaskOutputWrapper.load_from_path(outfile_path) + makespan_s = round(vine_graph.get_makespan_us() / 1e6, 6) + throughput_tps = round(len(py_graph.task_dict) / makespan_s, 6) + print(f"Makespan: {color_text(makespan_s, 92)} seconds") + print(f"Throughput: {color_text(throughput_tps, 92)} tasks/second") + return results + finally: + try: + proxy_library.uninstall() + finally: + vine_graph.delete() + + def _on_sigint(self, signum, frame): + self._sigint_received = True + raise KeyboardInterrupt diff --git a/taskvine/src/graph/dagvine/vine_graph/.gitignore b/taskvine/src/graph/dagvine/vine_graph/.gitignore new file mode 100644 index 0000000000..15309787ad --- /dev/null +++ b/taskvine/src/graph/dagvine/vine_graph/.gitignore @@ -0,0 +1 @@ +*.o \ No newline at end of file diff --git a/taskvine/src/graph/dagvine/vine_graph/Makefile b/taskvine/src/graph/dagvine/vine_graph/Makefile new file mode 100644 index 0000000000..090682269a --- /dev/null +++ b/taskvine/src/graph/dagvine/vine_graph/Makefile @@ -0,0 +1,90 @@ +include ../../../../../config.mk +include ../../../../../rules.mk + +PROJECT_NAME = dagvine + +LOCAL_LINKAGE+=${CCTOOLS_HOME}/taskvine/src/manager/libtaskvine.a ${CCTOOLS_HOME}/dttools/src/libdttools.a +LOCAL_CCFLAGS+=-I ${CCTOOLS_HOME}/taskvine/src/manager +LOCAL_CCFLAGS+=-I ${CCTOOLS_HOME}/taskvine/src/graph/$(PROJECT_NAME)/vine_graph + +SOURCE_DIR = $(CCTOOLS_HOME)/taskvine/src/graph/$(PROJECT_NAME)/vine_graph +MODULE_ROOT = $(CCTOOLS_PYTHON3_PATH)/ndcctools/taskvine/$(PROJECT_NAME) +MODULE_DIR = $(MODULE_ROOT)/vine_graph + +SOURCES = vine_node.c vine_graph.c +OBJECTS = $(SOURCES:%.c=%.o) + +BUILD_DIR := ../build + +# put SWIG generated sources and Python extension artifacts into ../build/ +SWIG_I = vine_graph.i + +WRAP_NAME = vine_graph_wrap +MODULE_NAME = vine_graph_capi + +SWIG_WRAP = $(BUILD_DIR)/$(WRAP_NAME).c +WRAP_OBJ = $(BUILD_DIR)/$(WRAP_NAME).o +PYMODULE = $(BUILD_DIR)/_$(MODULE_NAME).$(CCTOOLS_DYNAMIC_SUFFIX) + +LIBRARIES = +PYDEPS = $(WRAP_OBJ) $(OBJECTS) +PYLINK_INPUT = $(WRAP_OBJ) $(OBJECTS) +PROGRAMS = +SCRIPTS = +TARGETS = $(LIBRARIES) $(PYMODULE) $(PROGRAMS) + +.PHONY: all install clean lint format + +all: $(TARGETS) + +$(PROGRAMS): $(EXTERNALS) + +$(BUILD_DIR): + mkdir -p $(BUILD_DIR) + +$(SWIG_WRAP): $(SWIG_I) vine_graph.h | $(BUILD_DIR) + $(CCTOOLS_SWIG) -python -threads -relativeimport \ + -I$(CCTOOLS_HOME)/taskvine/src/manager \ + -I$(CCTOOLS_HOME)/dttools/src \ + -I$(CCTOOLS_HOME)/taskvine/src/graph/$(PROJECT_NAME) \ + -I$(CCTOOLS_HOME)/taskvine/src/graph/$(PROJECT_NAME)/vine_graph \ + -outdir $(BUILD_DIR) -o $@ $< + +# Build Python module (mimic bindings: silence SWIG warnings and build PIC) +$(WRAP_OBJ): $(SWIG_WRAP) + $(CCTOOLS_CC) -o $@ -c $(CCTOOLS_INTERNAL_CCFLAGS) $(LOCAL_CCFLAGS) $(CCTOOLS_PYTHON3_CCFLAGS) -w -fPIC -DNDEBUG $< + +$(PYMODULE): $(PYDEPS) +ifeq ($(CCTOOLS_STATIC),1) + $(CCTOOLS_LD) -o $@ $(CCTOOLS_DYNAMIC_FLAG) $(CCTOOLS_INTERNAL_LDFLAGS) $(LOCAL_LDFLAGS) $(PYLINK_INPUT) $(LOCAL_LINKAGE) $(CCTOOLS_PYTHON3_LDFLAGS) $(CCTOOLS_EXTERNAL_LINKAGE) +else + $(CCTOOLS_LD) -o $@ $(CCTOOLS_DYNAMIC_FLAG) $(CCTOOLS_INTERNAL_LDFLAGS) $(LOCAL_LDFLAGS) $(PYLINK_INPUT) $(LOCAL_LINKAGE) $(CCTOOLS_PYTHON3_LDFLAGS) $(CCTOOLS_EXTERNAL_LINKAGE) +endif + +install: all + mkdir -p $(CCTOOLS_INSTALL_DIR)/graph/$(PROJECT_NAME)/include + cp ${CCTOOLS_HOME}/taskvine/src/manager/taskvine.h $(CCTOOLS_INSTALL_DIR)/graph/$(PROJECT_NAME)/include/ + mkdir -p $(MODULE_DIR) + cp $(PYMODULE) $(MODULE_DIR) + cp $(BUILD_DIR)/$(MODULE_NAME).py $(MODULE_DIR) + cp $(SOURCE_DIR)/__init__.py $(MODULE_DIR) + cp $(SOURCE_DIR)/vine_graph_client.py $(MODULE_DIR) + +clean: + rm -f $(PROGRAMS) $(OBJECTS) $(WRAP_OBJ) + rm -f $(PYMODULE) $(BUILD_DIR)/$(MODULE_NAME).py + rm -rf $(BUILD_DIR) + +lint: + if ( ! clang-format -Werror --dry-run --style='file:${CCTOOLS_HOME}/.clang-format' $(SOURCE_DIR)/*.c $(SOURCE_DIR)/*.h); \ + then \ + echo "========================================================"; \ + echo "NOTICE: Run \`make format\` to format your latest changes."; \ + echo "========================================================"; \ + exit 1; \ + fi + +format: + clang-format -i $(SOURCE_DIR)/*.c $(SOURCE_DIR)/*.h + + diff --git a/taskvine/src/graph/dagvine/vine_graph/__init__.py b/taskvine/src/graph/dagvine/vine_graph/__init__.py new file mode 100644 index 0000000000..536eed6b88 --- /dev/null +++ b/taskvine/src/graph/dagvine/vine_graph/__init__.py @@ -0,0 +1,10 @@ +# Copyright (C) 2025 The University of Notre Dame +# This software is distributed under the GNU General Public License. +# See the file COPYING for details. + + +from . import vine_graph_capi +from .vine_graph_client import VineGraphClient + + +__all__ = ["vine_graph_capi", "VineGraphClient"] diff --git a/taskvine/src/graph/dagvine/vine_graph/vine_graph.c b/taskvine/src/graph/dagvine/vine_graph/vine_graph.c new file mode 100644 index 0000000000..0935e4552d --- /dev/null +++ b/taskvine/src/graph/dagvine/vine_graph/vine_graph.c @@ -0,0 +1,1611 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "priority_queue.h" +#include "list.h" +#include "debug.h" +#include "itable.h" +#include "xxmalloc.h" +#include "stringtools.h" +#include "random.h" +#include "hash_table.h" +#include "set.h" +#include "timestamp.h" +#include "progress_bar.h" +#include "macros.h" +#include "uuid.h" + +#include "vine_node.h" +#include "vine_graph.h" +#include "vine_manager.h" +#include "vine_worker_info.h" +#include "vine_task.h" +#include "vine_file.h" +#include "vine_mount.h" +#include "taskvine.h" +#include "vine_temp.h" + +static volatile sig_atomic_t interrupted = 0; + +/*************************************************************/ +/* Private Functions */ +/*************************************************************/ + +/** + * Handle the SIGINT signal. + * @param signal Reference to the signal. + */ +static void handle_sigint(int signal) +{ + interrupted = 1; +} + +/** + * Calculate the priority of a node given the priority mode. + * @param node Reference to the node object. + * @param priority_mode Reference to the priority mode. + * @return The priority. + */ +static double calculate_task_priority(struct vine_node *node, task_priority_mode_t priority_mode) +{ + if (!node) { + return 0; + } + + double priority = 0; + timestamp_t current_time = timestamp_get(); + + struct vine_node *parent_node; + + switch (priority_mode) { + case TASK_PRIORITY_MODE_RANDOM: + priority = random_double(); + break; + case TASK_PRIORITY_MODE_DEPTH_FIRST: + priority = (double)node->depth; + break; + case TASK_PRIORITY_MODE_BREADTH_FIRST: + priority = -(double)node->depth; + break; + case TASK_PRIORITY_MODE_FIFO: + priority = -(double)current_time; + break; + case TASK_PRIORITY_MODE_LIFO: + priority = (double)current_time; + break; + case TASK_PRIORITY_MODE_LARGEST_INPUT_FIRST: + LIST_ITERATE(node->parents, parent_node) + { + if (!parent_node->outfile) { + continue; + } + priority += (double)vine_file_size(parent_node->outfile); + } + break; + case TASK_PRIORITY_MODE_LARGEST_STORAGE_FOOTPRINT_FIRST: + LIST_ITERATE(node->parents, parent_node) + { + if (!parent_node->outfile) { + continue; + } + timestamp_t parent_task_completion_time = parent_node->task->time_workers_execute_last; + priority += (double)vine_file_size(parent_node->outfile) * (double)parent_task_completion_time; + } + break; + } + + return priority; +} + +/** + * Submit a node to the TaskVine manager via the vine graph. + * @param vg Reference to the vine graph. + * @param node Reference to the node. + */ +static void submit_node_task(struct vine_graph *vg, struct vine_node *node) +{ + if (!vg || !node) { + return; + } + + if (!node->task) { + debug(D_ERROR, "submit_node_task: node %" PRIu64 " has no task", node->node_id); + return; + } + + /* Avoid double-submitting the same task object. This should never be needed + * for correctness and leads to task_id mapping corruption if it happens. */ + if (node->task->state != VINE_TASK_INITIAL) { + debug(D_VINE, "submit_node_task: skipping node %" PRIu64 " (task already submitted, state=%d, task_id=%d)", node->node_id, node->task->state, node->task->task_id); + return; + } + + /* calculate the priority of the node */ + double priority = calculate_task_priority(node, vg->task_priority_mode); + vine_task_set_priority(node->task, priority); + + /* submit the task to the manager */ + timestamp_t time_start = timestamp_get(); + int task_id = vine_submit(vg->manager, node->task); + node->submission_time = timestamp_get() - time_start; + + if (task_id <= 0) { + debug(D_ERROR, "submit_node_task: failed to submit node %" PRIu64 " (returned task_id=%d)", node->node_id, task_id); + return; + } + + /* insert the task id to the task id to node map */ + itable_insert(vg->task_id_to_node, (uint64_t)task_id, node); + + debug(D_VINE, "submitted node %" PRIu64 " with task id %d", node->node_id, task_id); + + return; +} + +/** + * Submit the children of a node once every dependency has completed. + * @param vg Reference to the vine graph. + * @param node Reference to the node. + */ +static void submit_unblocked_children(struct vine_graph *vg, struct vine_node *node) +{ + if (!vg || !node) { + return; + } + + struct vine_node *child_node; + LIST_ITERATE(node->children, child_node) + { + if (!child_node) { + continue; + } + + /* Edge-fired dependency resolution: each parent->child edge is consumed at most once. + * This is critical for recomputation/resubmission, where a parent may "complete" multiple times. */ + if (child_node->fired_parents && set_lookup(child_node->fired_parents, node)) { + continue; + } + if (child_node->fired_parents) { + set_insert(child_node->fired_parents, node); + } + + if (child_node->remaining_parents_count > 0) { + child_node->remaining_parents_count--; + } + + /* If no more parents are remaining, submit the child (if it is not already done / in-flight). */ + if (child_node->remaining_parents_count == 0 && !child_node->completed && child_node->task && + child_node->task->state == VINE_TASK_INITIAL) { + submit_node_task(vg, child_node); + } + } + + return; +} + +/** + * Compute a topological ordering of the vine graph. + * Call only after all nodes, edges, and metrics have been populated. + * @param vg Reference to the vine graph. + * @return Nodes in topological order. + */ +static struct list *get_topological_order(struct vine_graph *vg) +{ + if (!vg) { + return NULL; + } + + int total_nodes = itable_size(vg->nodes); + struct list *topo_order = list_create(); + struct itable *in_degree_map = itable_create(0); + struct priority_queue *pq = priority_queue_create(total_nodes); + + uint64_t nid; + struct vine_node *node; + ITABLE_ITERATE(vg->nodes, nid, node) + { + int deg = list_size(node->parents); + itable_insert(in_degree_map, nid, (void *)(intptr_t)deg); + if (deg == 0) { + priority_queue_push(pq, node, -(double)node->node_id); + } + } + + while (priority_queue_size(pq) > 0) { + struct vine_node *current = priority_queue_pop(pq); + list_push_tail(topo_order, current); + + struct vine_node *child; + LIST_ITERATE(current->children, child) + { + intptr_t raw_deg = (intptr_t)itable_lookup(in_degree_map, child->node_id); + int deg = (int)raw_deg - 1; + itable_insert(in_degree_map, child->node_id, (void *)(intptr_t)deg); + + if (deg == 0) { + priority_queue_push(pq, child, -(double)child->node_id); + } + } + } + + if (list_size(topo_order) != total_nodes) { + debug(D_ERROR, "Error: vine graph contains cycles or is malformed."); + debug(D_ERROR, "Expected %d nodes, but only sorted %d.", total_nodes, list_size(topo_order)); + + uint64_t id; + ITABLE_ITERATE(vg->nodes, id, node) + { + intptr_t raw_deg = (intptr_t)itable_lookup(in_degree_map, id); + int deg = (int)raw_deg; + if (deg > 0) { + debug(D_ERROR, " Node %" PRIu64 " has in-degree %d. Parents:", id, deg); + struct vine_node *p; + LIST_ITERATE(node->parents, p) + { + debug(D_ERROR, " -> %" PRIu64, p->node_id); + } + } + } + + list_delete(topo_order); + itable_delete(in_degree_map); + priority_queue_delete(pq); + exit(1); + } + + itable_delete(in_degree_map); + priority_queue_delete(pq); + return topo_order; +} + +/** + * Extract weakly connected components of the vine graph. + * Currently used for debugging and instrumentation only. + * @param vg Reference to the vine graph. + * @return List of weakly connected components. + */ +static struct list *extract_weakly_connected_components(struct vine_graph *vg) +{ + if (!vg) { + return NULL; + } + + struct set *visited = set_create(0); + struct list *components = list_create(); + + uint64_t nid; + struct vine_node *node; + ITABLE_ITERATE(vg->nodes, nid, node) + { + if (set_lookup(visited, node)) { + continue; + } + + struct list *component = list_create(); + struct list *queue = list_create(); + + list_push_tail(queue, node); + set_insert(visited, node); + list_push_tail(component, node); + + while (list_size(queue) > 0) { + struct vine_node *curr = list_pop_head(queue); + + struct vine_node *p; + LIST_ITERATE(curr->parents, p) + { + if (!set_lookup(visited, p)) { + list_push_tail(queue, p); + set_insert(visited, p); + list_push_tail(component, p); + } + } + + struct vine_node *c; + LIST_ITERATE(curr->children, c) + { + if (!set_lookup(visited, c)) { + list_push_tail(queue, c); + set_insert(visited, c); + list_push_tail(component, c); + } + } + } + + list_push_tail(components, component); + list_delete(queue); + } + + set_delete(visited); + return components; +} + +/** + * Compute the heavy score of a node in the vine graph. + * @param node Reference to the node. + * @return Heavy score. + */ +static double compute_node_heavy_score(struct vine_node *node) +{ + if (!node) { + return 0; + } + + double up_score = node->depth * node->upstream_subgraph_size * node->fan_in; + double down_score = node->height * node->downstream_subgraph_size * node->fan_out; + + return up_score / (down_score + 1); +} + +/** + * Map a TaskVine task back to its vine node. + * @param vg Reference to the vine graph. + * @param task Task reported by the manager. + * @return Matching node. + */ +static struct vine_node *get_node_by_task(struct vine_graph *vg, struct vine_task *task) +{ + if (!vg || !task) { + return NULL; + } + + if (task->type == VINE_TASK_TYPE_STANDARD) { + /* standard tasks are mapped directly to a node */ + return itable_lookup(vg->task_id_to_node, (uint64_t)task->task_id); + } else if (task->type == VINE_TASK_TYPE_RECOVERY) { + /* note that recovery tasks are not mapped to any node but we still need the original node for pruning, + * so we look up the outfile of the task, then map it back to get the original node */ + struct vine_mount *mount; + LIST_ITERATE(task->output_mounts, mount) + { + uint64_t original_producer_task_id = mount->file->original_producer_task_id; + if (original_producer_task_id > 0) { + return itable_lookup(vg->task_id_to_node, original_producer_task_id); + } + } + } + + debug(D_ERROR, "task %d has no original producer task id", task->task_id); + + return NULL; +} + +/** + * Prune the ancestors of a persisted node. This is only used for persisted nodes that produce persisted files. + * All ancestors we consider here include both temp nodes and persisted nodes, because data written to the shared file system + * is safe and can definitely trigger upstream data redundancy to be released. + * @param vg Reference to the vine graph. + * @param node Reference to the node object. + * @return The number of pruned replicas. + */ +static int prune_ancestors_of_persisted_node(struct vine_graph *vg, struct vine_node *node) +{ + if (!vg || !node) { + return -1; + } + + /* find all safe ancestors */ + struct set *safe_ancestors = vine_node_find_safe_ancestors(node); + if (!safe_ancestors) { + return 0; + } + + int pruned_replica_count = 0; + + timestamp_t start_time = timestamp_get(); + + /* prune all safe ancestors */ + struct vine_node *ancestor_node; + SET_ITERATE(safe_ancestors, ancestor_node) + { + switch (ancestor_node->outfile_type) { + case NODE_OUTFILE_TYPE_LOCAL: + /* do not prune the local file */ + break; + case NODE_OUTFILE_TYPE_TEMP: + /* prune the temp file */ + vine_prune_file(vg->manager, ancestor_node->outfile); + break; + case NODE_OUTFILE_TYPE_SHARED_FILE_SYSTEM: + /* unlink directly from the shared file system */ + unlink(ancestor_node->outfile_remote_name); + break; + } + ancestor_node->prune_status = PRUNE_STATUS_SAFE; + pruned_replica_count++; + } + + set_delete(safe_ancestors); + + node->time_spent_on_prune_ancestors_of_persisted_node += timestamp_get() - start_time; + + return pruned_replica_count; +} + +/** + * Prune the ancestors of a temp node. + * This function opportunistically releases upstream temporary files + * that are no longer needed once this temp-producing node has completed. + * + * Only ancestors producing temporary outputs are considered here. + * Files stored in the shared filesystem are never pruned by this function, + * because temp outputs are not considered sufficiently safe to trigger + * deletion of persisted data upstream. + * @param vg Reference to the vine graph. + * @param node Reference to the node object. + * @return The number of pruned replicas. + */ +static int prune_ancestors_of_temp_node(struct vine_graph *vg, struct vine_node *node) +{ + if (!vg || !node || !node->outfile || node->prune_depth <= 0) { + return 0; + } + + timestamp_t start_time = timestamp_get(); + + int pruned_replica_count = 0; + + struct list *parents = vine_node_find_parents_by_depth(node, node->prune_depth); + + struct vine_node *parent_node; + LIST_ITERATE(parents, parent_node) + { + /* skip if the parent does not produce a temp file */ + if (parent_node->outfile_type != NODE_OUTFILE_TYPE_TEMP) { + continue; + } + + /* a file is prunable if its outfile is no longer needed by any child node: + * 1. it has no pending dependents + * 2. all completed dependents have also completed their corresponding recovery tasks, if any */ + int all_children_completed = 1; + struct vine_node *child_node; + LIST_ITERATE(parent_node->children, child_node) + { + /* break early if the child node is not completed */ + if (!child_node->completed) { + all_children_completed = 0; + break; + } + /* if the task produces a temp file and the recovery task is running, the parent is not prunable */ + if (child_node->outfile && child_node->outfile->type == VINE_TEMP) { + struct vine_task *child_node_recovery_task = child_node->outfile->recovery_task; + if (child_node_recovery_task && (child_node_recovery_task->state != VINE_TASK_INITIAL && child_node_recovery_task->state != VINE_TASK_DONE)) { + all_children_completed = 0; + break; + } + } + } + if (!all_children_completed) { + continue; + } + + pruned_replica_count += vine_prune_file(vg->manager, parent_node->outfile); + /* this parent is pruned because a successor that produces a temp file is completed, it is unsafe because the + * manager may submit a recovery task to bring it back in case of worker failures. */ + parent_node->prune_status = PRUNE_STATUS_UNSAFE; + } + + list_delete(parents); + + node->time_spent_on_prune_ancestors_of_temp_node += timestamp_get() - start_time; + + return pruned_replica_count; +} + +/** + * Prune the ancestors of a node when it is completed. + * @param node Reference to the node object. + */ +static void prune_ancestors_of_node(struct vine_graph *vg, struct vine_node *node) +{ + if (!vg || !node) { + return; + } + + /* do not prune if the node has not completed */ + if (!node->completed) { + return; + } + + timestamp_t start_time = timestamp_get(); + + int pruned_replica_count = 0; + + switch (node->outfile_type) { + case NODE_OUTFILE_TYPE_LOCAL: + case NODE_OUTFILE_TYPE_SHARED_FILE_SYSTEM: + /* If the outfile was declared as a VINE_FILE or was written to the shared fs, then it is guaranteed to be persisted + * and there is no chance that it will be lost unexpectedly. So we can safely prune all ancestors of this node. */ + pruned_replica_count = prune_ancestors_of_persisted_node(vg, node); + break; + case NODE_OUTFILE_TYPE_TEMP: + /* Otherwise, if the node outfile is a temp file, we need to be careful about pruning, because temp files are prone + * to failures, while means they can be lost due to node evictions or failures. */ + pruned_replica_count = prune_ancestors_of_temp_node(vg, node); + break; + } + + timestamp_t elapsed_time = timestamp_get() - start_time; + + debug(D_VINE, "pruned %d ancestors of node %" PRIu64 " in %.6f seconds", pruned_replica_count, node->node_id, elapsed_time / 1000000.0); + + return; +} + +/** + * Print the time metrics of the vine graph to a csv file. + * @param vg Reference to the vine graph. + * @param filename Reference to the filename of the csv file. + */ +static void print_time_metrics(struct vine_graph *vg, const char *filename) +{ + if (!vg) { + return; + } + + /* first delete the file if it exists */ + if (access(filename, F_OK) != -1) { + unlink(filename); + } + + /* print the header as a csv file */ + FILE *fp = fopen(filename, "w"); + if (!fp) { + debug(D_ERROR, "failed to open file %s", filename); + return; + } + fprintf(fp, "node_id,submission_time_us,commit_time_us,execution_time_us,retrieval_time_us,postprocessing_time_us\n"); + + uint64_t nid; + struct vine_node *node; + ITABLE_ITERATE(vg->nodes, nid, node) + { + fprintf(fp, "%" PRIu64 "," TIMESTAMP_FORMAT "," TIMESTAMP_FORMAT "," TIMESTAMP_FORMAT "," TIMESTAMP_FORMAT "," TIMESTAMP_FORMAT "\n", node->node_id, node->submission_time, node->commit_time, node->execution_time, node->retrieval_time, node->postprocessing_time); + } + fclose(fp); + + return; +} + +/** + * Enqueue a node to be resubmitted later. + * @param vg Reference to the vine graph. + * @param node Reference to the node. + */ +static void enqueue_resubmit_node(struct vine_graph *vg, struct vine_node *node) +{ + if (!vg || !node) { + return; + } + + if (node->in_resubmit_queue) { + return; + } + + node->last_failure_time = timestamp_get(); + list_push_tail(vg->resubmit_queue, node); + node->in_resubmit_queue = 1; +} + +/* Try to resubmit a previously failed node. + * @return 1 if a node was actually resubmitted (reset + submit invoked), 0 otherwise. */ +static int try_resubmitting_node(struct vine_graph *vg) +{ + if (!vg) { + return 0; + } + + struct vine_node *node = list_pop_head(vg->resubmit_queue); + if (!node) { + return 0; + } + node->in_resubmit_queue = 0; + + /* if the task failed due to inputs missing, we must submit the producer tasks for the lost data */ + int all_inputs_ready = 1; + if (node->task->result == VINE_RESULT_INPUT_MISSING) { + struct vine_mount *m; + LIST_ITERATE(node->task->input_mounts, m) + { + struct vine_file *f = m->file; + /* this is a temp file, it has a producer task id and the task pointer is null */ + if (f->type != VINE_TEMP) { + continue; + } + if (vine_temp_exists_somewhere(vg->manager, f)) { + continue; + } + if (itable_lookup(vg->manager->tasks, f->original_producer_task_id)) { + continue; + } + /* get the original producer node by task id */ + struct vine_node *original_producer_node = itable_lookup(vg->task_id_to_node, f->original_producer_task_id); + if (!original_producer_node) { + continue; + } + enqueue_resubmit_node(vg, original_producer_node); + all_inputs_ready = 0; + } + } + + /* if not all inputs are ready, enqueue the node and consider later */ + if (!all_inputs_ready) { + enqueue_resubmit_node(vg, node); + return 0; + } + + timestamp_t interval = timestamp_get() - node->last_failure_time; + + if (interval <= vg->retry_interval_sec * 1e6) { + enqueue_resubmit_node(vg, node); + return 0; + } + + if (node->retry_attempts_left-- <= 0) { + debug(D_ERROR, "node %" PRIu64 " has no retries left. Aborting.", node->node_id); + vine_graph_delete(vg); + exit(1); + } + + debug(D_VINE, "Resubmitting node %" PRIu64 " (remaining=%d)", node->node_id, node->retry_attempts_left); + vine_task_reset(node->task); + submit_node_task(vg, node); + + return 1; +} + +/*************************************************************/ +/* Public APIs */ +/*************************************************************/ + +/** Tune the vine graph. + *@param vg Reference to the vine graph. + *@param name Reference to the name of the parameter to tune. + *@param value Reference to the value of the parameter to tune. + *@return 0 on success, -1 on failure. + */ +int vine_graph_tune(struct vine_graph *vg, const char *name, const char *value) +{ + if (!vg || !name || !value) { + return -1; + } + + if (strcmp(name, "failure-injection-step-percent") == 0) { + vg->failure_injection_step_percent = atof(value); + + } else if (strcmp(name, "task-priority-mode") == 0) { + if (strcmp(value, "random") == 0) { + vg->task_priority_mode = TASK_PRIORITY_MODE_RANDOM; + } else if (strcmp(value, "depth-first") == 0) { + vg->task_priority_mode = TASK_PRIORITY_MODE_DEPTH_FIRST; + } else if (strcmp(value, "breadth-first") == 0) { + vg->task_priority_mode = TASK_PRIORITY_MODE_BREADTH_FIRST; + } else if (strcmp(value, "fifo") == 0) { + vg->task_priority_mode = TASK_PRIORITY_MODE_FIFO; + } else if (strcmp(value, "lifo") == 0) { + vg->task_priority_mode = TASK_PRIORITY_MODE_LIFO; + } else if (strcmp(value, "largest-input-first") == 0) { + vg->task_priority_mode = TASK_PRIORITY_MODE_LARGEST_INPUT_FIRST; + } else if (strcmp(value, "largest-storage-footprint-first") == 0) { + vg->task_priority_mode = TASK_PRIORITY_MODE_LARGEST_STORAGE_FOOTPRINT_FIRST; + } else { + debug(D_ERROR, "invalid priority mode: %s", value); + return -1; + } + + } else if (strcmp(name, "output-dir") == 0) { + if (vg->output_dir) { + free(vg->output_dir); + } + if (mkdir(value, 0777) != 0 && errno != EEXIST) { + debug(D_ERROR, "failed to mkdir %s (errno=%d)", value, errno); + return -1; + } + vg->output_dir = xxstrdup(value); + + } else if (strcmp(name, "prune-depth") == 0) { + vg->prune_depth = atoi(value); + + } else if (strcmp(name, "checkpoint-fraction") == 0) { + double fraction = atof(value); + if (fraction < 0.0 || fraction > 1.0) { + debug(D_ERROR, "invalid checkpoint fraction: %s (must be between 0.0 and 1.0)", value); + return -1; + } + vg->checkpoint_fraction = fraction; + + } else if (strcmp(name, "checkpoint-dir") == 0) { + if (vg->checkpoint_dir) { + free(vg->checkpoint_dir); + } + if (mkdir(value, 0777) != 0 && errno != EEXIST) { + debug(D_ERROR, "failed to mkdir %s (errno=%d)", value, errno); + return -1; + } + vg->checkpoint_dir = xxstrdup(value); + + } else if (strcmp(name, "progress-bar-update-interval-sec") == 0) { + double val = atof(value); + vg->progress_bar_update_interval_sec = (val > 0.0) ? val : 0.1; + + } else if (strcmp(name, "time-metrics-filename") == 0) { + if (value == NULL || strcmp(value, "0") == 0) { + return 0; + } + + if (vg->time_metrics_filename) { + free(vg->time_metrics_filename); + } + + vg->time_metrics_filename = xxstrdup(value); + + /** Extract parent directory inline **/ + const char *slash = strrchr(vg->time_metrics_filename, '/'); + if (slash) { + size_t len = slash - vg->time_metrics_filename; + char *parent = malloc(len + 1); + memcpy(parent, vg->time_metrics_filename, len); + parent[len] = '\0'; + + /** Ensure the parent directory exists **/ + if (mkdir(parent, 0777) != 0 && errno != EEXIST) { + debug(D_ERROR, "failed to mkdir %s (errno=%d)", parent, errno); + free(parent); + return -1; + } + free(parent); + } + + /** Truncate or create the file **/ + FILE *fp = fopen(vg->time_metrics_filename, "w"); + if (!fp) { + debug(D_ERROR, "failed to create file %s (errno=%d)", vg->time_metrics_filename, errno); + return -1; + } + fclose(fp); + + } else if (strcmp(name, "enable-debug-log") == 0) { + if (vg->enable_debug_log == 0) { + return -1; + } + vg->enable_debug_log = (atoi(value) == 1) ? 1 : 0; + if (vg->enable_debug_log == 0) { + debug_flags_clear(); + debug_close(); + } + + } else if (strcmp(name, "auto-recovery") == 0) { + vg->auto_recovery = (atoi(value) == 1) ? 1 : 0; + + } else if (strcmp(name, "max-retry-attempts") == 0) { + vg->max_retry_attempts = MAX(0, atoi(value)); + + } else if (strcmp(name, "retry-interval-sec") == 0) { + vg->retry_interval_sec = MAX(0.0, atof(value)); + + } else { + debug(D_ERROR, "invalid parameter name: %s", name); + return -1; + } + + return 0; +} + +/** + * Get the outfile remote name of a node in the vine graph. + * @param vg Reference to the vine graph. + * @param node_id Reference to the node id. + * @return The outfile remote name. + */ +const char *vine_graph_get_node_outfile_remote_name(const struct vine_graph *vg, uint64_t node_id) +{ + if (!vg) { + return NULL; + } + + struct vine_node *node = itable_lookup(vg->nodes, node_id); + if (!node) { + return NULL; + } + + return node->outfile_remote_name; +} + +/** + * Get the proxy library name of the vine graph. + * @param vg Reference to the vine graph. + * @return The proxy library name. + */ +const char *vine_graph_get_proxy_library_name(const struct vine_graph *vg) +{ + if (!vg) { + return NULL; + } + + return vg->proxy_library_name; +} + +/** + * Add an input file to a task. The input file will be declared as a temp file. + * @param vg Reference to the vine graph. + * @param task_id Reference to the task id. + * @param filename Reference to the filename. + */ +void vine_graph_add_task_input(struct vine_graph *vg, uint64_t task_id, const char *filename) +{ + if (!vg || !task_id || !filename) { + return; + } + + struct vine_node *node = itable_lookup(vg->nodes, task_id); + if (!node) { + return; + } + + struct vine_file *f = NULL; + const char *cached_name = hash_table_lookup(vg->inout_filename_to_cached_name, filename); + + if (cached_name) { + f = vine_manager_lookup_file(vg->manager, cached_name); + } else { + f = vine_declare_temp(vg->manager); + hash_table_insert(vg->inout_filename_to_cached_name, filename, xxstrdup(f->cached_name)); + } + + vine_task_add_input(node->task, f, filename, VINE_TRANSFER_ALWAYS); +} + +/** + * Add an output file to a task. The output file will be declared as a temp file. + * @param vg Reference to the vine graph. + * @param task_id Reference to the task id. + * @param filename Reference to the filename. + */ +void vine_graph_add_task_output(struct vine_graph *vg, uint64_t task_id, const char *filename) +{ + if (!vg || !task_id || !filename) { + return; + } + + struct vine_node *node = itable_lookup(vg->nodes, task_id); + if (!node) { + return; + } + + struct vine_file *f = NULL; + const char *cached_name = hash_table_lookup(vg->inout_filename_to_cached_name, filename); + + if (cached_name) { + f = vine_manager_lookup_file(vg->manager, cached_name); + } else { + f = vine_declare_temp(vg->manager); + hash_table_insert(vg->inout_filename_to_cached_name, filename, xxstrdup(f->cached_name)); + } + + vine_task_add_output(node->task, f, filename, VINE_TRANSFER_ALWAYS); +} + +/** + * Set the proxy function name of the vine graph. + * @param vg Reference to the vine graph. + * @param proxy_function_name Reference to the proxy function name. + */ +void vine_graph_set_proxy_function_name(struct vine_graph *vg, const char *proxy_function_name) +{ + if (!vg || !proxy_function_name) { + return; + } + + if (vg->proxy_function_name) { + free(vg->proxy_function_name); + } + + vg->proxy_function_name = xxstrdup(proxy_function_name); +} + +/** + * Get the heavy score of a node in the vine graph. + * @param vg Reference to the vine graph. + * @param node_id Reference to the node id. + * @return The heavy score. + */ +double vine_graph_get_node_heavy_score(const struct vine_graph *vg, uint64_t node_id) +{ + if (!vg) { + return -1; + } + + struct vine_node *node = itable_lookup(vg->nodes, node_id); + if (!node) { + return -1; + } + + return node->heavy_score; +} + +/** + * Get the local outfile source of a node in the vine graph, only valid for local output files. + * The source of a local output file is the path on the local filesystem. + * @param vg Reference to the vine graph. + * @param node_id Reference to the node id. + * @return The local outfile source. + */ +const char *vine_graph_get_node_local_outfile_source(const struct vine_graph *vg, uint64_t node_id) +{ + if (!vg) { + return NULL; + } + + struct vine_node *node = itable_lookup(vg->nodes, node_id); + if (!node) { + debug(D_ERROR, "node %" PRIu64 " not found", node_id); + exit(1); + } + + if (node->outfile_type != NODE_OUTFILE_TYPE_LOCAL) { + debug(D_ERROR, "node %" PRIu64 " is not a local output file", node_id); + exit(1); + } + + return node->outfile->source; +} + +/** + * Compute the topology metrics of the vine graph, including depth, height, upstream and downstream counts, + * heavy scores, and weakly connected components. Must be called after all nodes and dependencies are added. + * @param vg Reference to the vine graph. + */ +void vine_graph_compute_topology_metrics(struct vine_graph *vg) +{ + if (!vg) { + return; + } + + /* get nodes in topological order */ + struct list *topo_order = get_topological_order(vg); + if (!topo_order) { + return; + } + + struct vine_node *node; + struct vine_node *parent_node; + struct vine_node *child_node; + + /* compute the depth of the node */ + LIST_ITERATE(topo_order, node) + { + node->depth = 0; + LIST_ITERATE(node->parents, parent_node) + { + if (node->depth < parent_node->depth + 1) { + node->depth = parent_node->depth + 1; + } + } + } + + /* compute the height of the node */ + LIST_ITERATE_REVERSE(topo_order, node) + { + node->height = 0; + LIST_ITERATE(node->children, child_node) + { + if (node->height < child_node->height + 1) { + node->height = child_node->height + 1; + } + } + } + + /* compute the upstream and downstream counts for each node */ + struct itable *upstream_map = itable_create(0); + struct itable *downstream_map = itable_create(0); + uint64_t nid_tmp; + ITABLE_ITERATE(vg->nodes, nid_tmp, node) + { + struct set *upstream = set_create(0); + struct set *downstream = set_create(0); + itable_insert(upstream_map, node->node_id, upstream); + itable_insert(downstream_map, node->node_id, downstream); + } + LIST_ITERATE(topo_order, node) + { + struct set *upstream = itable_lookup(upstream_map, node->node_id); + LIST_ITERATE(node->parents, parent_node) + { + struct set *parent_upstream = itable_lookup(upstream_map, parent_node->node_id); + set_union(upstream, parent_upstream); + set_insert(upstream, parent_node); + } + } + LIST_ITERATE_REVERSE(topo_order, node) + { + struct set *downstream = itable_lookup(downstream_map, node->node_id); + LIST_ITERATE(node->children, child_node) + { + struct set *child_downstream = itable_lookup(downstream_map, child_node->node_id); + set_union(downstream, child_downstream); + set_insert(downstream, child_node); + } + } + LIST_ITERATE(topo_order, node) + { + node->upstream_subgraph_size = set_size(itable_lookup(upstream_map, node->node_id)); + node->downstream_subgraph_size = set_size(itable_lookup(downstream_map, node->node_id)); + node->fan_in = list_size(node->parents); + node->fan_out = list_size(node->children); + set_delete(itable_lookup(upstream_map, node->node_id)); + set_delete(itable_lookup(downstream_map, node->node_id)); + } + itable_delete(upstream_map); + itable_delete(downstream_map); + + /* compute the heavy score for each node */ + LIST_ITERATE(topo_order, node) + { + node->heavy_score = compute_node_heavy_score(node); + } + + /* sort nodes using priority queue */ + int total_nodes = list_size(topo_order); + int total_target_nodes = 0; + struct priority_queue *sorted_nodes = priority_queue_create(total_nodes); + LIST_ITERATE(topo_order, node) + { + if (node->is_target) { + total_target_nodes++; + } + priority_queue_push(sorted_nodes, node, node->heavy_score); + } + /* calculate the number of nodes to be checkpointed */ + int checkpoint_count = (int)((total_nodes - total_target_nodes) * vg->checkpoint_fraction); + if (checkpoint_count < 0) { + checkpoint_count = 0; + } + + /* assign outfile types to each node */ + int assigned_checkpoint_count = 0; + while ((node = priority_queue_pop(sorted_nodes))) { + if (node->is_target) { + /* declare the output file as a vine_file so that it can be retrieved by the manager as usual */ + node->outfile_type = NODE_OUTFILE_TYPE_LOCAL; + char *local_outfile_path = string_format("%s/%s", vg->output_dir, node->outfile_remote_name); + node->outfile = vine_declare_file(vg->manager, local_outfile_path, VINE_CACHE_LEVEL_WORKFLOW, 0); + free(local_outfile_path); + continue; + } + if (assigned_checkpoint_count < checkpoint_count) { + /* checkpointed files will be written directly to the shared file system, no need to manage them in the manager */ + node->outfile_type = NODE_OUTFILE_TYPE_SHARED_FILE_SYSTEM; + char *shared_file_system_outfile_path = string_format("%s/%s", vg->checkpoint_dir, node->outfile_remote_name); + free(node->outfile_remote_name); + node->outfile_remote_name = shared_file_system_outfile_path; + node->outfile = NULL; + assigned_checkpoint_count++; + } else { + /* other nodes will be declared as temp files to leverage node-local storage */ + node->outfile_type = NODE_OUTFILE_TYPE_TEMP; + node->outfile = vine_declare_temp(vg->manager); + } + } + /* track the output dependencies of user and vine_temp nodes */ + LIST_ITERATE(topo_order, node) + { + if (node->outfile) { + vine_task_add_output(node->task, node->outfile, node->outfile_remote_name, VINE_TRANSFER_ALWAYS); + } + } + priority_queue_delete(sorted_nodes); + + /* extract weakly connected components */ + struct list *weakly_connected_components = extract_weakly_connected_components(vg); + struct list *component; + int component_index = 0; + debug(D_VINE, "graph has %d weakly connected components\n", list_size(weakly_connected_components)); + LIST_ITERATE(weakly_connected_components, component) + { + debug(D_VINE, "component %d size: %d\n", component_index, list_size(component)); + list_delete(component); + component_index++; + } + list_delete(weakly_connected_components); + + list_delete(topo_order); + + return; +} + +/** + * Create a new node and track it in the vine graph. + * @param vg Reference to the vine graph. + * @return The auto-assigned node id. + */ +uint64_t vine_graph_add_node(struct vine_graph *vg) +{ + if (!vg) { + return 0; + } + + /* assign a new id based on current node count, ensure uniqueness */ + uint64_t candidate_id = itable_size(vg->nodes); + candidate_id += 1; + while (itable_lookup(vg->nodes, candidate_id)) { + candidate_id++; + } + uint64_t node_id = candidate_id; + + /* create the backing node (defaults to non-target) */ + struct vine_node *node = vine_node_create(node_id); + + if (!node) { + debug(D_ERROR, "failed to create node %" PRIu64, node_id); + vine_graph_delete(vg); + exit(1); + } + + if (!vg->proxy_function_name) { + debug(D_ERROR, "proxy function name is not set"); + vine_graph_delete(vg); + exit(1); + } + + if (!vg->proxy_library_name) { + debug(D_ERROR, "proxy library name is not set"); + vine_graph_delete(vg); + exit(1); + } + + /* create node task */ + node->task = vine_task_create(vg->proxy_function_name); + vine_task_set_library_required(node->task, vg->proxy_library_name); + vine_task_addref(node->task); + + /* construct the task arguments and declare the infile */ + char *task_arguments = vine_node_construct_task_arguments(node); + node->infile = vine_declare_buffer(vg->manager, task_arguments, strlen(task_arguments), VINE_CACHE_LEVEL_TASK, VINE_UNLINK_WHEN_DONE); + free(task_arguments); + vine_task_add_input(node->task, node->infile, "infile", VINE_TRANSFER_ALWAYS); + + /* initialize the pruning depth of each node, currently statically set to the global prune depth */ + node->prune_depth = vg->prune_depth; + + node->retry_attempts_left = vg->max_retry_attempts; + + itable_insert(vg->nodes, node_id, node); + + return node_id; +} + +/** + * Mark a node as a retrieval target. + */ +void vine_graph_set_target(struct vine_graph *vg, uint64_t node_id) +{ + if (!vg) { + return; + } + struct vine_node *node = itable_lookup(vg->nodes, node_id); + if (!node) { + debug(D_ERROR, "node %" PRIu64 " not found", node_id); + exit(1); + } + + node->is_target = 1; +} + +/** + * Create a new vine graph and bind a manager to it. + * @param q Reference to the manager object. + * @return A new vine graph instance. + */ +struct vine_graph *vine_graph_create(struct vine_manager *q) +{ + if (!q) { + return NULL; + } + + struct vine_graph *vg = xxmalloc(sizeof(struct vine_graph)); + + vg->manager = q; + + vg->checkpoint_dir = xxstrdup(vg->manager->runtime_directory); // default to current working directory + vg->output_dir = xxstrdup(vg->manager->runtime_directory); // default to current working directory + + vg->nodes = itable_create(0); + vg->task_id_to_node = itable_create(0); + vg->outfile_cachename_to_node = hash_table_create(0, 0); + vg->inout_filename_to_cached_name = hash_table_create(0, 0); + vg->resubmit_queue = list_create(); + + cctools_uuid_t proxy_library_name_id; + cctools_uuid_create(&proxy_library_name_id); + vg->proxy_library_name = xxstrdup(proxy_library_name_id.str); + + vg->proxy_function_name = NULL; + + vg->prune_depth = 1; + + vg->task_priority_mode = TASK_PRIORITY_MODE_LARGEST_INPUT_FIRST; + vg->failure_injection_step_percent = -1.0; + + vg->progress_bar_update_interval_sec = 0.1; + + /* enable debug system for C code since it uses a separate debug system instance + * from the Python bindings. Use the same function that the manager uses. */ + char *debug_tmp = string_format("%s/vine-logs/debug", vg->manager->runtime_directory); + vine_enable_debug_log(debug_tmp); + free(debug_tmp); + + vg->time_metrics_filename = NULL; + + vg->enable_debug_log = 1; + + vg->max_retry_attempts = 15; + vg->retry_interval_sec = 1.0; + + /* disable auto recovery so that the graph executor can handle all tasks with missing inputs + * this ensures that no recovery tasks are created automatically by the taskvine manager and that + * we can be in control of when to recreate what lost data. */ + vg->auto_recovery = 0; + + vg->time_first_task_dispatched = UINT64_MAX; + vg->time_last_task_retrieved = 0; + vg->makespan_us = 0; + + return vg; +} + +/** + * Add a dependency between two nodes in the vine graph. Note that the input-output file relationship + * is not handled here, because their file names might not have been determined yet. + * @param vg Reference to the vine graph. + * @param parent_id Reference to the parent node id. + * @param child_id Reference to the child node id. + */ +void vine_graph_add_dependency(struct vine_graph *vg, uint64_t parent_id, uint64_t child_id) +{ + if (!vg) { + return; + } + + struct vine_node *parent_node = itable_lookup(vg->nodes, parent_id); + struct vine_node *child_node = itable_lookup(vg->nodes, child_id); + if (!parent_node) { + debug(D_ERROR, "parent node %" PRIu64 " not found", parent_id); + uint64_t nid; + struct vine_node *node; + printf("parent_ids:\n"); + ITABLE_ITERATE(vg->nodes, nid, node) + { + printf(" %" PRIu64 "\n", node->node_id); + } + exit(1); + } + if (!child_node) { + debug(D_ERROR, "child node %" PRIu64 " not found", child_id); + exit(1); + } + + list_push_tail(child_node->parents, parent_node); + list_push_tail(parent_node->children, child_node); + + return; +} + +uint64_t vine_graph_get_makespan_us(const struct vine_graph *vg) +{ + if (!vg) { + return 0; + } + + return (uint64_t)vg->makespan_us; +} + +/** + * Execute the vine graph. This must be called after all nodes and dependencies are added and the topology metrics are computed. + * @param vg Reference to the vine graph. + */ +void vine_graph_execute(struct vine_graph *vg) +{ + if (!vg) { + return; + } + + timestamp_t time_start = timestamp_get(); + + void (*previous_sigint_handler)(int) = signal(SIGINT, handle_sigint); + + debug(D_VINE, "start executing vine graph"); + + /* print the info of all nodes */ + uint64_t nid_iter; + struct vine_node *node; + ITABLE_ITERATE(vg->nodes, nid_iter, node) + { + vine_node_debug_print(node); + } + + /* enable return recovery tasks */ + vine_enable_return_recovery_tasks(vg->manager); + + vg->manager->auto_recovery = vg->auto_recovery; + + /* create mappings from task IDs and outfile cache names to nodes */ + ITABLE_ITERATE(vg->nodes, nid_iter, node) + { + if (node->outfile) { + hash_table_insert(vg->outfile_cachename_to_node, node->outfile->cached_name, node); + } + } + + /* add the parents' outfiles as inputs to the task */ + struct list *topo_order = get_topological_order(vg); + LIST_ITERATE(topo_order, node) + { + struct vine_node *parent_node; + LIST_ITERATE(node->parents, parent_node) + { + if (parent_node->outfile) { + vine_task_add_input(node->task, parent_node->outfile, parent_node->outfile_remote_name, VINE_TRANSFER_ALWAYS); + } + } + } + + /* initialize remaining_parents_count for all nodes */ + ITABLE_ITERATE(vg->nodes, nid_iter, node) + { + node->remaining_parents_count = list_size(node->parents); + } + + /* enqueue those without dependencies */ + ITABLE_ITERATE(vg->nodes, nid_iter, node) + { + if (node->remaining_parents_count == 0) { + submit_node_task(vg, node); + } + } + + /* calculate steps to inject failure */ + double next_failure_threshold = -1.0; + if (vg->failure_injection_step_percent > 0) { + next_failure_threshold = vg->failure_injection_step_percent / 100.0; + } + + timestamp_t time_end = timestamp_get(); + printf("Time taken to initialize the graph in C: %.6f seconds\n", (double)(time_end - time_start) / 1e6); + + struct ProgressBar *pbar = progress_bar_init("Executing Tasks"); + progress_bar_set_update_interval(pbar, vg->progress_bar_update_interval_sec); + + struct ProgressBarPart *user_tasks_part = progress_bar_create_part("User", itable_size(vg->nodes)); + struct ProgressBarPart *recovery_tasks_part = progress_bar_create_part("Recovery", 0); + progress_bar_bind_part(pbar, user_tasks_part); + progress_bar_bind_part(pbar, recovery_tasks_part); + + int wait_timeout = 1; + + while (user_tasks_part->current < user_tasks_part->total) { + if (interrupted) { + break; + } + + /* Always process graph-level resubmissions (this affects correctness), + * but the Recovery progress bar source depends on auto_recovery: + * - auto_recovery==1: Recovery tracks manager-created recovery tasks only + * - auto_recovery==0: Recovery tracks graph resubmit queue only */ + int did_resubmit = try_resubmitting_node(vg); + + if (vg->manager->auto_recovery) { + /* Recovery progress reflects manager recovery tasks. */ + (void)did_resubmit; + progress_bar_set_part_total(pbar, recovery_tasks_part, (uint64_t)vg->manager->stats->recovery_tasks_submitted); + progress_bar_update_part(pbar, recovery_tasks_part, 0); + } else { + /* Recovery progress reflects graph-level resubmissions: + * total = (already attempted resubmits) + (currently queued resubmits). */ + if (did_resubmit) { + progress_bar_update_part(pbar, recovery_tasks_part, 1); + } + uint64_t queued_resubmits = (uint64_t)list_size(vg->resubmit_queue); + progress_bar_set_part_total(pbar, recovery_tasks_part, recovery_tasks_part->current + queued_resubmits); + progress_bar_update_part(pbar, recovery_tasks_part, 0); + } + + struct vine_task *task = vine_wait(vg->manager, wait_timeout); + if (task) { + /* retrieve all possible tasks */ + wait_timeout = 0; + timestamp_t time_when_postprocessing_start = timestamp_get(); + + /* If auto_recovery is enabled, recovery tasks should be reflected in the Recovery progress bar. */ + if (vg->manager->auto_recovery && task->type == VINE_TASK_TYPE_RECOVERY) { + progress_bar_update_part(pbar, recovery_tasks_part, 1); + } + + /* get the original node by task id */ + struct vine_node *node = get_node_by_task(vg, task); + if (!node) { + debug(D_ERROR, "fatal: task %d could not be mapped to a task node, this indicates a serious bug.", task->task_id); + exit(1); + } + + /* in case of failure, resubmit this task */ + if (node->task->result != VINE_RESULT_SUCCESS || node->task->exit_code != 0) { + enqueue_resubmit_node(vg, node); + debug(D_VINE, "Task %d failed (result=%d, exit=%d)", task->task_id, node->task->result, node->task->exit_code); + continue; + } + + /* update time metrics */ + vg->time_first_task_dispatched = MIN(vg->time_first_task_dispatched, task->time_when_commit_end); + vg->time_last_task_retrieved = MAX(vg->time_last_task_retrieved, task->time_when_retrieval); + if (vg->time_last_task_retrieved < vg->time_first_task_dispatched) { + debug(D_ERROR, "task %d time_last_task_retrieved < time_first_task_dispatched: %" PRIu64 " < %" PRIu64, task->task_id, vg->time_last_task_retrieved, vg->time_first_task_dispatched); + vg->time_last_task_retrieved = vg->time_first_task_dispatched; + } + vg->makespan_us = vg->time_last_task_retrieved - vg->time_first_task_dispatched; + + /* if the outfile is set to save on the sharedfs, stat to get the size of the file */ + switch (node->outfile_type) { + case NODE_OUTFILE_TYPE_SHARED_FILE_SYSTEM: { + struct stat info; + int result = stat(node->outfile_remote_name, &info); + if (result < 0) { + debug(D_VINE, "Task %d succeeded but missing sharedfs output %s", task->task_id, node->outfile_remote_name); + enqueue_resubmit_node(vg, node); + continue; + } + node->outfile_size_bytes = info.st_size; + break; + } + case NODE_OUTFILE_TYPE_LOCAL: + case NODE_OUTFILE_TYPE_TEMP: + node->outfile_size_bytes = node->outfile->size; + break; + } + debug(D_VINE, "Node %" PRIu64 " completed with outfile %s size: %zu bytes", node->node_id, node->outfile_remote_name, node->outfile_size_bytes); + + /* mark the node as completed + * Note: a node may complete multiple times due to resubmission/recomputation. + * Only the first completion should advance the "User" progress. */ + int first_completion = !node->completed; + node->completed = 1; + node->commit_time = task->time_when_commit_end - task->time_when_commit_start; + node->execution_time = task->time_workers_execute_last; + node->retrieval_time = task->time_when_done - task->time_when_retrieval; + + /* prune nodes on task completion */ + prune_ancestors_of_node(vg, node); + + /* skip manager-created recovery tasks. + * - If auto_recovery==1, we already accounted for them in the Recovery bar above. + * - If auto_recovery==0, Recovery bar tracks graph resubmits only. */ + if (task->type == VINE_TASK_TYPE_RECOVERY) { + continue; + } + + if (first_completion) { + /* set the start time to the submit time of the first user task */ + if (user_tasks_part->current == 0) { + progress_bar_set_start_time(pbar, task->time_when_commit_start); + } + + /* update critical time */ + vine_node_update_critical_path_time(node, node->execution_time); + + /* mark this user task as completed */ + progress_bar_update_part(pbar, user_tasks_part, 1); + } + + /* inject failure */ + if (vg->failure_injection_step_percent > 0) { + double progress = (double)user_tasks_part->current / (double)user_tasks_part->total; + if (progress >= next_failure_threshold && release_random_worker(vg->manager)) { + debug(D_VINE, "released a random worker at %.2f%% (threshold %.2f%%)", progress * 100, next_failure_threshold * 100); + next_failure_threshold += vg->failure_injection_step_percent / 100.0; + } + } + + /* enqueue the output file for replication */ + switch (node->outfile_type) { + case NODE_OUTFILE_TYPE_TEMP: + /* replicate the outfile of the temp node */ + vine_temp_queue_for_replication(vg->manager, node->outfile); + break; + case NODE_OUTFILE_TYPE_LOCAL: + case NODE_OUTFILE_TYPE_SHARED_FILE_SYSTEM: + break; + } + + /* submit children nodes with dependencies all resolved */ + submit_unblocked_children(vg, node); + + timestamp_t time_when_postprocessing_end = timestamp_get(); + node->postprocessing_time = time_when_postprocessing_end - time_when_postprocessing_start; + } else { + wait_timeout = 1; + } + } + + progress_bar_finish(pbar); + progress_bar_delete(pbar); + + double total_time_spent_on_unlink_local_files = 0; + double total_time_spent_on_prune_ancestors_of_temp_node = 0; + double total_time_spent_on_prune_ancestors_of_persisted_node = 0; + ITABLE_ITERATE(vg->nodes, nid_iter, node) + { + total_time_spent_on_unlink_local_files += node->time_spent_on_unlink_local_files; + total_time_spent_on_prune_ancestors_of_temp_node += node->time_spent_on_prune_ancestors_of_temp_node; + total_time_spent_on_prune_ancestors_of_persisted_node += node->time_spent_on_prune_ancestors_of_persisted_node; + } + total_time_spent_on_unlink_local_files /= 1e6; + total_time_spent_on_prune_ancestors_of_temp_node /= 1e6; + total_time_spent_on_prune_ancestors_of_persisted_node /= 1e6; + + debug(D_VINE, "total time spent on prune ancestors of temp node: %.6f seconds\n", total_time_spent_on_prune_ancestors_of_temp_node); + debug(D_VINE, "total time spent on prune ancestors of persisted node: %.6f seconds\n", total_time_spent_on_prune_ancestors_of_persisted_node); + debug(D_VINE, "total time spent on unlink local files: %.6f seconds\n", total_time_spent_on_unlink_local_files); + + if (vg->time_metrics_filename) { + print_time_metrics(vg, vg->time_metrics_filename); + } + + signal(SIGINT, previous_sigint_handler); + if (interrupted) { + raise(SIGINT); + } + + return; +} + +/** + * Delete a vine graph instance. + * @param vg Reference to the vine graph. + */ +void vine_graph_delete(struct vine_graph *vg) +{ + if (!vg) { + return; + } + + uint64_t nid; + struct vine_node *node; + ITABLE_ITERATE(vg->nodes, nid, node) + { + if (node->infile) { + vine_prune_file(vg->manager, node->infile); + hash_table_remove(vg->manager->file_table, node->infile->cached_name); + } + if (node->outfile) { + vine_prune_file(vg->manager, node->outfile); + hash_table_remove(vg->outfile_cachename_to_node, node->outfile->cached_name); + hash_table_remove(vg->manager->file_table, node->outfile->cached_name); + } + if (node->outfile_type == NODE_OUTFILE_TYPE_SHARED_FILE_SYSTEM) { + unlink(node->outfile_remote_name); + } + vine_node_delete(node); + } + + list_delete(vg->resubmit_queue); + + free(vg->proxy_library_name); + free(vg->proxy_function_name); + + itable_delete(vg->nodes); + itable_delete(vg->task_id_to_node); + hash_table_delete(vg->outfile_cachename_to_node); + + hash_table_clear(vg->inout_filename_to_cached_name, (void *)free); + hash_table_delete(vg->inout_filename_to_cached_name); + + free(vg); +} diff --git a/taskvine/src/graph/dagvine/vine_graph/vine_graph.h b/taskvine/src/graph/dagvine/vine_graph/vine_graph.h new file mode 100644 index 0000000000..4433563ec9 --- /dev/null +++ b/taskvine/src/graph/dagvine/vine_graph/vine_graph.h @@ -0,0 +1,185 @@ +#ifndef VINE_GRAPH_H +#define VINE_GRAPH_H + +#include + +#include "vine_task.h" +#include "hash_table.h" +#include "itable.h" +#include "list.h" +#include "vine_manager.h" +#include "set.h" +#include "vine_node.h" +#include "taskvine.h" +#include "timestamp.h" + +/** The task priority algorithm used for vine graph scheduling. */ +typedef enum { + TASK_PRIORITY_MODE_RANDOM = 0, /**< Assign random priority to tasks */ + TASK_PRIORITY_MODE_DEPTH_FIRST, /**< Prioritize deeper tasks first */ + TASK_PRIORITY_MODE_BREADTH_FIRST, /**< Prioritize shallower tasks first */ + TASK_PRIORITY_MODE_FIFO, /**< First in, first out priority */ + TASK_PRIORITY_MODE_LIFO, /**< Last in, first out priority */ + TASK_PRIORITY_MODE_LARGEST_INPUT_FIRST, /**< Prioritize tasks with larger inputs first */ + TASK_PRIORITY_MODE_LARGEST_STORAGE_FOOTPRINT_FIRST /**< Prioritize tasks with larger storage footprint first */ +} task_priority_mode_t; + +/** The vine graph (logical scheduling layer). */ +struct vine_graph { + struct vine_manager *manager; + struct itable *nodes; + struct itable *task_id_to_node; + struct hash_table *outfile_cachename_to_node; + /* Maps a logical in/out filename (remote_name) to a stable cached_name. */ + struct hash_table *inout_filename_to_cached_name; + + /* Unsuccessful tasks are appended to this list to be resubmitted later. */ + struct list *resubmit_queue; + + /* The directory to store the checkpointed results. + * Only intermediate results can be checkpointed, the fraction of intermediate results to checkpoint is controlled by the checkpoint-fraction parameter. */ + char *checkpoint_dir; + + /* Results of target nodes will be stored in this directory. + * This dir path can not necessarily be a shared file system directory, + * output files will be retrieved through the network instead, + * as long as the manager can access it. */ + char *output_dir; + + /* Python-side proxy library name. The context_graph runtime owns this library and sends calls into the vine graph + * so the manager can execute them through the proxy function. */ + char *proxy_library_name; + + /* The proxy function lives inside that library. It receives vine node IDs, looks up the + * Python callable and arguments inside the context_graph runtime, and executes the work. The runtime generates the name + * and shares it with the vine graph. */ + char *proxy_function_name; + + /* The depth of the pruning strategy. 0 means no pruning, 1 means the most aggressive pruning. */ + int prune_depth; + double checkpoint_fraction; /* 0 - 1, the fraction of intermediate results to checkpoint */ + + task_priority_mode_t task_priority_mode; /* priority mode for task graph task scheduling */ + double failure_injection_step_percent; /* 0 - 100, the percentage of steps to inject failure */ + + double progress_bar_update_interval_sec; /* update interval for the progress bar in seconds */ + + /* The filename of the csv file to store the time metrics of the vine graph. */ + char *time_metrics_filename; + + int enable_debug_log; /* whether to enable debug log */ + + int auto_recovery; /* whether to enable auto recovery */ + + int max_retry_attempts; /* the maximum number of times to retry a task */ + double retry_interval_sec; /* the interval between retries in seconds, 0 means no retry interval */ + + timestamp_t time_first_task_dispatched; /* the time when the first task is dispatched */ + timestamp_t time_last_task_retrieved; /* the time when the last task is retrieved */ + timestamp_t makespan_us; /* the makespan of the vine graph in microseconds */ +}; + +/* Public APIs for operating the vine graph */ + +/** Create a vine graph and return it. +@param q Reference to the current manager object. +@return A new vine graph. +*/ +struct vine_graph *vine_graph_create(struct vine_manager *q); + +/** Create a new node in the vine graph. +@param vg Reference to the vine graph. +@return The auto-assigned node id. +*/ +uint64_t vine_graph_add_node(struct vine_graph *vg); + +/** Mark a node as a retrieval target. +@param vg Reference to the vine graph. +@param node_id Identifier of the node to mark as target. +*/ +void vine_graph_set_target(struct vine_graph *vg, uint64_t node_id); + +/** Add a dependency between two nodes in the vine graph. +@param vg Reference to the vine graph. +@param parent_id Identifier of the parent node. +@param child_id Identifier of the child node. +*/ +void vine_graph_add_dependency(struct vine_graph *vg, uint64_t parent_id, uint64_t child_id); + +/** Finalize the metrics of the vine graph. +@param vg Reference to the vine graph. +*/ +void vine_graph_compute_topology_metrics(struct vine_graph *vg); + +/** Get the heavy score of a node in the vine graph. +@param vg Reference to the vine graph. +@param node_id Identifier of the node. +@return The heavy score. +*/ +double vine_graph_get_node_heavy_score(const struct vine_graph *vg, uint64_t node_id); + +/** Execute the task graph. +@param vg Reference to the vine graph. +*/ +void vine_graph_execute(struct vine_graph *vg); + +/** Get the outfile remote name of a node in the vine graph. +@param vg Reference to the vine graph. +@param node_id Identifier of the node. +@return The outfile remote name. +*/ +const char *vine_graph_get_node_outfile_remote_name(const struct vine_graph *vg, uint64_t node_id); + +/** Get the local outfile source of a node in the vine graph. +@param vg Reference to the vine graph. +@param node_id Identifier of the node. +@return The local outfile source, or NULL if the node does not produce a local file. +*/ +const char *vine_graph_get_node_local_outfile_source(const struct vine_graph *vg, uint64_t node_id); + +/** Delete a vine graph. +@param vg Reference to the vine graph. +*/ +void vine_graph_delete(struct vine_graph *vg); + +/** Get the proxy library name of the vine graph. +@param vg Reference to the vine graph. +@return The proxy library name. +*/ +const char *vine_graph_get_proxy_library_name(const struct vine_graph *vg); + +/** Add an input file to a task. The input file will be declared as a temp file. +@param vg Reference to the vine graph. +@param task_id Identifier of the task. +@param filename Reference to the filename. +*/ +void vine_graph_add_task_input(struct vine_graph *vg, uint64_t task_id, const char *filename); + +/** Add an output file to a task. The output file will be declared as a temp file. +@param vg Reference to the vine graph. +@param task_id Identifier of the task. +@param filename Reference to the filename. +*/ +void vine_graph_add_task_output(struct vine_graph *vg, uint64_t task_id, const char *filename); + +/** Set the proxy function name of the vine graph. +@param vg Reference to the vine graph. +@param proxy_function_name Reference to the proxy function name. +*/ +void vine_graph_set_proxy_function_name(struct vine_graph *vg, const char *proxy_function_name); + +/** Tune the vine graph. +@param vg Reference to the vine graph. +@param name Reference to the name of the parameter to tune. +@param value Reference to the value of the parameter to tune. +@return 0 on success, -1 on failure. +*/ +int vine_graph_tune(struct vine_graph *vg, const char *name, const char *value); + +/** Get the makespan of the vine graph in microseconds. +@param vg Reference to the vine graph. +@return The makespan in microseconds. +*/ +uint64_t vine_graph_get_makespan_us(const struct vine_graph *vg); + +#endif // VINE_GRAPH_H diff --git a/taskvine/src/graph/dagvine/vine_graph/vine_graph.i b/taskvine/src/graph/dagvine/vine_graph/vine_graph.i new file mode 100644 index 0000000000..65be1a8edb --- /dev/null +++ b/taskvine/src/graph/dagvine/vine_graph/vine_graph.i @@ -0,0 +1,15 @@ +/* SWIG interface for local dagvine graph API bindings */ +%module vine_graph_capi + +%{ +#include "int_sizes.h" +#include "vine_graph.h" +%} + +%include "stdint.i" +%include "int_sizes.h" + +/* Import existing SWIG interface for type information (do not wrap again) */ +%import "../../bindings/python3/taskvine.i" + +%include "vine_graph.h" diff --git a/taskvine/src/graph/dagvine/vine_graph/vine_graph_client.py b/taskvine/src/graph/dagvine/vine_graph/vine_graph_client.py new file mode 100644 index 0000000000..43c7b7e210 --- /dev/null +++ b/taskvine/src/graph/dagvine/vine_graph/vine_graph_client.py @@ -0,0 +1,93 @@ +# Copyright (C) 2025 The University of Notre Dame +# This software is distributed under the GNU General Public License. +# See the file COPYING for details. + +"""High-level client that exposes the C vine graph to Python callers.""" + +from . import vine_graph_capi + + +class VineGraphClient: + """Python-friendly wrapper that hides the raw SWIG API surface.""" + + def __init__(self, c_taskvine): + """Create and own the lifecycle of the backing C vine graph instance.""" + self._c_graph = vine_graph_capi.vine_graph_create(c_taskvine) + self._key_to_id = {} + self._id_to_key = {} + + def tune(self, name, value): + """Forward tuning parameters directly to the C vine graph.""" + vine_graph_capi.vine_graph_tune(self._c_graph, name, value) + + def add_node(self, key, is_target=None): + """Create a node in the C graph and remember the key↔id mapping.""" + node_id = vine_graph_capi.vine_graph_add_node(self._c_graph) + self._key_to_id[key] = node_id + self._id_to_key[node_id] = key + if is_target is not None and bool(is_target): + vine_graph_capi.vine_graph_set_target(self._c_graph, node_id) + return node_id + + def set_target(self, key): + """Mark an existing node as a target output.""" + node_id = self._key_to_id.get(key) + if node_id is None: + raise KeyError(f"Key not found: {key}") + vine_graph_capi.vine_graph_set_target(self._c_graph, node_id) + + def add_dependency(self, parent_key, child_key): + """Add an edge in the C graph using the remembered id mapping.""" + if parent_key not in self._key_to_id or child_key not in self._key_to_id: + raise KeyError("parent_key or child_key missing in mapping; call add_node() first") + vine_graph_capi.vine_graph_add_dependency( + self._c_graph, self._key_to_id[parent_key], self._key_to_id[child_key] + ) + + def compute_topology_metrics(self): + """Trigger the C graph to compute depth/height, heavy-score, etc.""" + vine_graph_capi.vine_graph_compute_topology_metrics(self._c_graph) + + def get_node_outfile_remote_name(self, key): + """Ask the C layer where a node's output will be stored.""" + if key not in self._key_to_id: + raise KeyError(f"Key not found: {key}") + return vine_graph_capi.vine_graph_get_node_outfile_remote_name( + self._c_graph, self._key_to_id[key] + ) + + def get_proxy_library_name(self): + """Expose the randomly generated proxy library name from the C side.""" + return vine_graph_capi.vine_graph_get_proxy_library_name(self._c_graph) + + def set_proxy_function(self, proxy_function): + """Tell the C graph which Python function should run on the workers.""" + vine_graph_capi.vine_graph_set_proxy_function_name( + self._c_graph, proxy_function.__name__ + ) + + def add_task_input(self, task_key, filename): + """Add an input file to a task.""" + task_id = self._key_to_id.get(task_key) + if task_id is None: + raise KeyError(f"Task key not found: {task_key}") + vine_graph_capi.vine_graph_add_task_input(self._c_graph, task_id, filename) + + def add_task_output(self, task_key, filename): + """Add an output file to a task.""" + task_id = self._key_to_id.get(task_key) + if task_id is None: + raise KeyError(f"Task key not found: {task_key}") + vine_graph_capi.vine_graph_add_task_output(self._c_graph, task_id, filename) + + def execute(self): + """Kick off execution; runs through SWIG down into the C orchestration loop.""" + vine_graph_capi.vine_graph_execute(self._c_graph) + + def get_makespan_us(self): + """Get the makespan of the vine graph in microseconds.""" + return vine_graph_capi.vine_graph_get_makespan_us(self._c_graph) + + def delete(self): + """Release the C resources and clear the client.""" + vine_graph_capi.vine_graph_delete(self._c_graph) diff --git a/taskvine/src/graph/dagvine/vine_graph/vine_node.c b/taskvine/src/graph/dagvine/vine_graph/vine_node.c new file mode 100644 index 0000000000..48d43ffccb --- /dev/null +++ b/taskvine/src/graph/dagvine/vine_graph/vine_node.c @@ -0,0 +1,410 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "jx.h" +#include "jx_print.h" +#include "xxmalloc.h" +#include "stringtools.h" +#include "timestamp.h" +#include "set.h" +#include "hash_table.h" +#include "debug.h" +#include "random.h" +#include "uuid.h" + +#include "vine_file.h" +#include "vine_task.h" +#include "vine_worker_info.h" +#include "vine_temp.h" +#include "vine_node.h" +#include "taskvine.h" + +/*************************************************************/ +/* Private Functions */ +/*************************************************************/ + +/** + * Check if the outfile of a node is persisted. + * A node is considered persisted if it has completed and 1) the outfile is written to the shared file system, + * 2) the outfile is written to the local staging directory. + * @param node Reference to the node object. + * @return 1 if the outfile is persisted, 0 otherwise. + */ +static int node_outfile_has_been_persisted(struct vine_node *node) +{ + if (!node) { + return 0; + } + + /* if the node is not completed then the outfile is definitely not persisted */ + if (!node->completed) { + return 0; + } + + switch (node->outfile_type) { + case NODE_OUTFILE_TYPE_LOCAL: + return 1; + case NODE_OUTFILE_TYPE_SHARED_FILE_SYSTEM: + return 1; + case NODE_OUTFILE_TYPE_TEMP: + return 0; + } + + return 0; +} + +/** + * Update the critical path time of a node. + * @param node Reference to the node object. + * @param execution_time Reference to the execution time of the node. + */ +void vine_node_update_critical_path_time(struct vine_node *node, timestamp_t execution_time) +{ + timestamp_t max_parent_critical_path_time = 0; + struct vine_node *parent_node; + LIST_ITERATE(node->parents, parent_node) + { + if (parent_node->critical_path_time > max_parent_critical_path_time) { + max_parent_critical_path_time = parent_node->critical_path_time; + } + } + node->critical_path_time = max_parent_critical_path_time + execution_time; +} + +/** + * The dfs helper function for finding parents in a specific depth. + * @param node Reference to the node object. + * @param remaining_depth Reference to the remaining depth. + * @param result Reference to the result list. + * @param visited Reference to the visited set. + */ +static void find_parents_dfs(struct vine_node *node, int remaining_depth, struct list *result, struct set *visited) +{ + if (!node || set_lookup(visited, node)) { + return; + } + + set_insert(visited, node); + if (remaining_depth == 0) { + list_push_tail(result, node); + return; + } + struct vine_node *parent_node; + LIST_ITERATE(node->parents, parent_node) + { + find_parents_dfs(parent_node, remaining_depth - 1, result, visited); + } +} + +/*************************************************************/ +/* Public APIs */ +/*************************************************************/ + +/** + * Create a new vine node owned by the C-side graph. + * @param node_id Graph-assigned identifier that keeps C and Python in sync. + * @return Newly allocated vine node. + */ +struct vine_node *vine_node_create(uint64_t node_id) +{ + struct vine_node *node = xxmalloc(sizeof(struct vine_node)); + + node->is_target = 0; + node->node_id = node_id; + + /* create a unique UUID-based remote outfile name for this node */ + cctools_uuid_t uuid; + cctools_uuid_create(&uuid); + node->outfile_remote_name = xxstrdup(uuid.str); + + node->prune_status = PRUNE_STATUS_NOT_PRUNED; + node->parents = list_create(); + node->children = list_create(); + node->remaining_parents_count = 0; + node->fired_parents = set_create(0); + node->completed = 0; + node->prune_depth = 0; + node->outfile_size_bytes = 0; + node->retry_attempts_left = 0; + node->in_resubmit_queue = 0; + + node->depth = -1; + node->height = -1; + node->upstream_subgraph_size = -1; + node->downstream_subgraph_size = -1; + node->fan_in = -1; + node->fan_out = -1; + node->heavy_score = -1; + + node->time_spent_on_unlink_local_files = 0; + node->time_spent_on_prune_ancestors_of_temp_node = 0; + node->time_spent_on_prune_ancestors_of_persisted_node = 0; + + node->submission_time = 0; + node->scheduling_time = 0; + node->commit_time = 0; + node->execution_time = 0; + node->retrieval_time = 0; + node->postprocessing_time = 0; + + node->critical_path_time = -1; + node->last_failure_time = 0; + + return node; +} + +/** + * Construct the task arguments for the node. + * @param node Reference to the node object. + * @return The task arguments in JSON format: {"fn_args": [node_id], "fn_kwargs": {}}. + */ +char *vine_node_construct_task_arguments(struct vine_node *node) +{ + if (!node) { + return NULL; + } + + struct jx *event = jx_object(NULL); + struct jx *args = jx_array(NULL); + jx_array_append(args, jx_integer(node->node_id)); + jx_insert(event, jx_string("fn_args"), args); + jx_insert(event, jx_string("fn_kwargs"), jx_object(NULL)); + + char *infile_content = jx_print_string(event); + jx_delete(event); + + return infile_content; +} + +/** + * Find all parents in a specific depth of the node. + * @param node Reference to the node object. + * @param depth Reference to the depth. + * @return The list of parents. + */ +struct list *vine_node_find_parents_by_depth(struct vine_node *node, int depth) +{ + if (!node || depth < 0) { + return NULL; + } + + struct list *result = list_create(); + + struct set *visited = set_create(0); + find_parents_dfs(node, depth, result, visited); + set_delete(visited); + + return result; +} + +/** + * Perform a reverse BFS traversal to identify all ancestors of a given node + * whose outputs can be safely pruned. + * + * A parent node is considered "safe" if: + * 1. All of its child nodes are either: + * - already persisted (their outputs are stored in a reliable location), or + * - already marked as safely pruned. + * 2. None of its child nodes remain in an unsafe or incomplete state. + * + * This function starts from the given node and iteratively walks up the DAG, + * collecting all such "safe" ancestors into a set. Nodes that have already + * been marked as PRUNE_STATUS_SAFE are skipped early. + * + * The returned set contains all ancestors that can be safely pruned once the + * current node’s output has been persisted. + * + * @param start_node The node from which to begin the reverse search. + * @return A set of ancestor nodes that are safe to prune (excluding start_node). + */ +struct set *vine_node_find_safe_ancestors(struct vine_node *start_node) +{ + if (!start_node) { + return NULL; + } + + struct set *visited_nodes = set_create(0); + struct set *safe_ancestors = set_create(0); + + struct list *queue = list_create(); + + list_push_tail(queue, start_node); + set_insert(visited_nodes, start_node); + + while (list_size(queue) > 0) { + struct vine_node *current_node = list_pop_head(queue); + struct vine_node *parent_node; + + LIST_ITERATE(current_node->parents, parent_node) + { + if (set_lookup(visited_nodes, parent_node)) { + continue; + } + + set_insert(visited_nodes, parent_node); + + /* shortcut if this parent has already been marked as safely pruned */ + if (parent_node->prune_status == PRUNE_STATUS_SAFE) { + continue; + } + + /* check if all children of this parent are safe */ + int all_children_safe = 1; + struct vine_node *child_node; + LIST_ITERATE(parent_node->children, child_node) + { + /* shortcut if this child is part of the recovery subgraph */ + if (set_lookup(visited_nodes, child_node)) { + continue; + } + /* shortcut if this outside child is not persisted */ + if (!node_outfile_has_been_persisted(child_node)) { + all_children_safe = 0; + break; + } + /* shortcut if this outside child is unsafely pruned */ + if (child_node->prune_status == PRUNE_STATUS_UNSAFE) { + all_children_safe = 0; + break; + } + } + + if (all_children_safe) { + set_insert(safe_ancestors, parent_node); + list_push_tail(queue, parent_node); + } + } + } + + list_delete(queue); + set_delete(visited_nodes); + + return safe_ancestors; +} + +/** + * Print the info of the node. + * @param node Reference to the node object. + */ +void vine_node_debug_print(struct vine_node *node) +{ + if (!node) { + return; + } + + if (!node->task) { + debug(D_ERROR, "node %" PRIu64 " has no task", node->node_id); + return; + } + + debug(D_VINE, "---------------- Node Info ----------------"); + debug(D_VINE, "node_id: %" PRIu64, node->node_id); + debug(D_VINE, "task_id: %d", node->task->task_id); + debug(D_VINE, "depth: %d", node->depth); + debug(D_VINE, "height: %d", node->height); + debug(D_VINE, "prune_depth: %d", node->prune_depth); + + if (node->outfile_remote_name) { + debug(D_VINE, "outfile_remote_name: %s", node->outfile_remote_name); + } + + if (node->outfile) { + const char *type_str = "UNKNOWN"; + switch (node->outfile->type) { + case VINE_FILE: + type_str = "VINE_FILE"; + break; + case VINE_TEMP: + type_str = "VINE_TEMP"; + break; + case VINE_URL: + type_str = "VINE_URL"; + break; + case VINE_BUFFER: + type_str = "VINE_BUFFER"; + break; + case VINE_MINI_TASK: + type_str = "VINE_MINI_TASK"; + break; + } + debug(D_VINE, "outfile_type: %s", type_str); + debug(D_VINE, "outfile_cached_name: %s", node->outfile->cached_name ? node->outfile->cached_name : "(null)"); + } else { + debug(D_VINE, "outfile_type: SHARED_FILE_SYSTEM or none"); + } + + /* print parent and child node ids */ + char *parent_ids = NULL; + struct vine_node *p; + LIST_ITERATE(node->parents, p) + { + if (!parent_ids) { + parent_ids = string_format("%" PRIu64, p->node_id); + } else { + char *tmp = string_format("%s, %" PRIu64, parent_ids, p->node_id); + free(parent_ids); + parent_ids = tmp; + } + } + + char *child_ids = NULL; + struct vine_node *c; + LIST_ITERATE(node->children, c) + { + if (!child_ids) { + child_ids = string_format("%" PRIu64, c->node_id); + } else { + char *tmp = string_format("%s, %" PRIu64, child_ids, c->node_id); + free(child_ids); + child_ids = tmp; + } + } + + debug(D_VINE, "parents: %s", parent_ids ? parent_ids : "(none)"); + debug(D_VINE, "children: %s", child_ids ? child_ids : "(none)"); + + free(parent_ids); + free(child_ids); + + debug(D_VINE, "-------------------------------------------"); +} + +/** + * Delete the node and all of its associated resources. + * @param node Reference to the node object. + */ +void vine_node_delete(struct vine_node *node) +{ + if (!node) { + return; + } + + if (node->outfile_remote_name) { + free(node->outfile_remote_name); + } + + vine_task_delete(node->task); + node->task = NULL; + + if (node->infile) { + vine_file_delete(node->infile); + node->infile = NULL; + } + if (node->outfile) { + vine_file_delete(node->outfile); + node->outfile = NULL; + } + + list_delete(node->parents); + list_delete(node->children); + + if (node->fired_parents) { + set_delete(node->fired_parents); + } + free(node); +} \ No newline at end of file diff --git a/taskvine/src/graph/dagvine/vine_graph/vine_node.h b/taskvine/src/graph/dagvine/vine_graph/vine_node.h new file mode 100644 index 0000000000..1910a11afa --- /dev/null +++ b/taskvine/src/graph/dagvine/vine_graph/vine_node.h @@ -0,0 +1,123 @@ +#ifndef VINE_NODE_H +#define VINE_NODE_H + +#include + +#include "vine_task.h" +#include "hash_table.h" +#include "list.h" +#include "set.h" +#include "taskvine.h" + +/** The storage type of the node's output file. */ +typedef enum { + NODE_OUTFILE_TYPE_LOCAL = 0, /* Node-output file will be stored locally on the local staging directory */ + NODE_OUTFILE_TYPE_TEMP, /* Node-output file will be stored in the temporary node-local storage */ + NODE_OUTFILE_TYPE_SHARED_FILE_SYSTEM, /* Node-output file will be stored in the persistent shared file system */ +} node_outfile_type_t; + +/** The status of an output file of a node. */ +typedef enum { + PRUNE_STATUS_NOT_PRUNED = 0, + PRUNE_STATUS_SAFE, + PRUNE_STATUS_UNSAFE +} prune_status_t; + +/** The vine node object. */ +struct vine_node { + /* Identity */ + uint64_t node_id; /* Unique identifier assigned by the graph when the node is created. */ + int is_target; /* If true, the output of the node is retrieved when the task finishes. */ + + /* Task and files */ + struct vine_task *task; + struct vine_file *infile; + struct vine_file *outfile; + char *outfile_remote_name; + size_t outfile_size_bytes; + node_outfile_type_t outfile_type; + + /* Graph relationships */ + struct list *parents; + struct list *children; + + /* Execution and scheduling state */ + /* Number of unresolved parent dependencies. This is initialized to the in-degree + * (list_size(parents)) before execution starts, and decremented exactly once per + * parent->child edge when the parent first completes. */ + int remaining_parents_count; + /* Edge-fired guard: tracks which parent edges have already been consumed for this child. */ + struct set *fired_parents; + int completed; + prune_status_t prune_status; + int retry_attempts_left; + int in_resubmit_queue; + + /* Structural metrics */ + int prune_depth; + int depth; + int height; + int upstream_subgraph_size; + int downstream_subgraph_size; + int fan_in; + int fan_out; + double heavy_score; + + /* Time metrics */ + timestamp_t critical_path_time; + timestamp_t time_spent_on_unlink_local_files; + timestamp_t time_spent_on_prune_ancestors_of_temp_node; + timestamp_t time_spent_on_prune_ancestors_of_persisted_node; + + timestamp_t submission_time; + timestamp_t scheduling_time; + timestamp_t commit_time; + timestamp_t execution_time; + timestamp_t retrieval_time; + timestamp_t postprocessing_time; + + timestamp_t last_failure_time; +}; + +/** Create a new vine node. +@param node_id Unique node identifier supplied by the owning graph. +@return Newly allocated vine node instance. +*/ +struct vine_node *vine_node_create(uint64_t node_id); + +/** Create the task arguments for a vine node. +@param node Reference to the vine node. +@return The task arguments in JSON format: {"fn_args": [node_id], "fn_kwargs": {}}. +*/ +char *vine_node_construct_task_arguments(struct vine_node *node); + +/** Delete a vine node and release owned resources. +@param node Reference to the vine node. +*/ +void vine_node_delete(struct vine_node *node); + +/** Print information about a vine node. +@param node Reference to the vine node. +*/ +void vine_node_debug_print(struct vine_node *node); + +/** Find all safe ancestors of a vine node. +@param start_node Reference to the start node. +@return The set of safe ancestors. +*/ +struct set *vine_node_find_safe_ancestors(struct vine_node *start_node); + +/** Find all parents of a vine node at a specific depth. +@param node Reference to the node. +@param depth Reference to the depth. +@return The list of parents. +*/ +struct list *vine_node_find_parents_by_depth(struct vine_node *node, int depth); + +/** Update the critical path time of a vine node. +@param node Reference to the vine node. +@param execution_time Reference to the execution time of the node. +*/ +void vine_node_update_critical_path_time(struct vine_node *node, timestamp_t execution_time); + +#endif // VINE_NODE_H \ No newline at end of file diff --git a/taskvine/src/manager/Makefile b/taskvine/src/manager/Makefile index db09403667..5c56c9f09b 100644 --- a/taskvine/src/manager/Makefile +++ b/taskvine/src/manager/Makefile @@ -40,6 +40,7 @@ TARGETS = $(LIBRARIES) all: $(TARGETS) + libtaskvine.a: $(OBJECTS) install: all diff --git a/taskvine/src/manager/stnPTyT6 b/taskvine/src/manager/stnPTyT6 new file mode 100644 index 0000000000..8e72ad4194 Binary files /dev/null and b/taskvine/src/manager/stnPTyT6 differ diff --git a/taskvine/src/manager/taskvine.h b/taskvine/src/manager/taskvine.h index bc3f2f7ac8..946c2c62cd 100644 --- a/taskvine/src/manager/taskvine.h +++ b/taskvine/src/manager/taskvine.h @@ -149,7 +149,6 @@ typedef enum { VINE_MINI_TASK, /**< A file obtained by executing a Unix command line. */ } vine_file_type_t; - /** Statistics describing a manager. */ struct vine_stats { /* Stats for the current state of workers: */ @@ -1119,6 +1118,14 @@ int vine_enable_peer_transfers(struct vine_manager *m); /** Disable taskvine peer transfers to be scheduled by the manager **/ int vine_disable_peer_transfers(struct vine_manager *m); +/** Enable recovery tasks to be returned by vine_wait. +By default, recovery tasks are handled internally by the manager. **/ +int vine_enable_return_recovery_tasks(struct vine_manager *m); + +/** Disable recovery tasks from being returned by vine_wait. +Recovery tasks will be handled internally by the manager. **/ +int vine_disable_return_recovery_tasks(struct vine_manager *m); + /** When enabled, resources to tasks in are assigned in proportion to the size of the worker. If a resource is specified (e.g. with @ref vine_task_set_cores), proportional resources never go below explicit specifications. This mode is most @@ -1530,6 +1537,12 @@ void vine_counters_print(); */ char *vine_version_string(); +/** Returns the runtime directory +@param m Reference to the current manager object. +@return A string. +*/ +char *vine_get_runtime_directory(struct vine_manager *m); + /** Returns path relative to the logs runtime directory @param m Reference to the current manager object. @param path Target filename. diff --git a/taskvine/src/manager/vine_file_replica_table.c b/taskvine/src/manager/vine_file_replica_table.c index 1dad20dd62..bf954f06a4 100644 --- a/taskvine/src/manager/vine_file_replica_table.c +++ b/taskvine/src/manager/vine_file_replica_table.c @@ -25,7 +25,8 @@ See the file COPYING for details. int vine_file_replica_table_insert(struct vine_manager *m, struct vine_worker_info *w, const char *cachename, struct vine_file_replica *replica) { if (hash_table_lookup(w->current_files, cachename)) { - return 0; + // delete the previous replcia because the replica's size might have changed + vine_file_replica_table_remove(m, w, cachename); } double prev_available = w->resources->disk.total - BYTES_TO_MEGABYTES(w->inuse_cache); diff --git a/taskvine/src/manager/vine_manager.c b/taskvine/src/manager/vine_manager.c index 66a672b835..e4df62c292 100644 --- a/taskvine/src/manager/vine_manager.c +++ b/taskvine/src/manager/vine_manager.c @@ -170,6 +170,7 @@ static void delete_uncacheable_files(struct vine_manager *q, struct vine_worker_ static int release_worker(struct vine_manager *q, struct vine_worker_info *w); struct vine_task *send_library_to_worker(struct vine_manager *q, struct vine_worker_info *w, const char *name); +static void push_task_to_ready_tasks(struct vine_manager *q, struct vine_task *t); /* Return the number of workers matching a given type: WORKER, STATUS, etc */ @@ -733,7 +734,9 @@ static vine_msg_code_t vine_manager_recv_no_retry(struct vine_manager *q, struct string_prefix_is(line, "wable_status") || string_prefix_is(line, "resources_status")) { result = handle_manager_status(q, w, line, stoptime); } else if (string_prefix_is(line, "available_results")) { - hash_table_insert(q->workers_with_watched_file_updates, w->hashkey, w); + if (!hash_table_lookup(q->workers_with_watched_file_updates, w->hashkey)) { + hash_table_insert(q->workers_with_watched_file_updates, w->hashkey, w); + } result = VINE_MSG_PROCESSED; } else if (string_prefix_is(line, "resources")) { result = handle_resources(q, w, stoptime); @@ -1482,6 +1485,10 @@ static int retrieve_ready_task(struct vine_manager *q, struct vine_task *t, doub result = VINE_RESULT_FIXED_LOCATION_MISSING; } } + if (!q->auto_recovery && !vine_manager_check_inputs_available(q, t)) { + debug(D_VINE, "task %d has missing input files", t->task_id); + result = VINE_RESULT_INPUT_MISSING; + } /* If any of the reasons fired, then expire the task and put in the retrieved queue. */ if (result != VINE_RESULT_SUCCESS) { @@ -3418,6 +3425,10 @@ static void vine_manager_consider_recovery_task(struct vine_manager *q, struct v return; } + if (!q->auto_recovery) { + return; + } + /* Prevent race between original task and recovery task after worker crash. * Example: Task T completes on worker W, creates file F, T moves to WAITING_RETRIEVAL. * W crashes before stdout retrieval, T gets rescheduled to READY, F is lost and triggers @@ -3479,6 +3490,7 @@ static int vine_manager_check_inputs_available(struct vine_manager *q, struct vi all_available = 0; } } + return all_available; } @@ -4166,6 +4178,13 @@ struct vine_manager *vine_ssl_create(int port, const char *key, const char *cert q->enforce_worker_eviction_interval = 0; q->time_start_worker_eviction = 0; + q->return_recovery_tasks = 0; + q->auto_recovery = 1; + q->balance_worker_disk_load = 0; + q->when_last_offloaded = 0; + q->peak_used_cache = 0; + q->shutting_down = 0; + if ((envstring = getenv("VINE_BANDWIDTH"))) { q->bandwidth_limit = string_metric_parse(envstring); if (q->bandwidth_limit < 0) { @@ -4242,6 +4261,20 @@ int vine_disable_peer_transfers(struct vine_manager *q) return 1; } +int vine_enable_return_recovery_tasks(struct vine_manager *q) +{ + debug(D_VINE, "Return recovery tasks enabled"); + q->return_recovery_tasks = 1; + return 1; +} + +int vine_disable_return_recovery_tasks(struct vine_manager *q) +{ + debug(D_VINE, "Return recovery tasks disabled"); + q->return_recovery_tasks = 0; + return 1; +} + int vine_enable_proportional_resources(struct vine_manager *q) { debug(D_VINE, "Proportional resources enabled"); @@ -4386,6 +4419,8 @@ void vine_delete(struct vine_manager *q) * disable the immediate recovery to avoid submitting recovery tasks for lost files */ q->immediate_recovery = 0; + q->shutting_down = 1; + vine_fair_write_workflow_info(q); release_all_workers(q); @@ -4591,8 +4626,7 @@ char *vine_monitor_wrap(struct vine_manager *q, struct vine_worker_info *w, stru return wrap_cmd; } -/* Put a given task on the ready list, taking into account the task priority and the manager schedule. */ - +/* Put a given task on the ready queue, taking into account the task priority and the manager schedule. */ static void push_task_to_ready_tasks(struct vine_manager *q, struct vine_task *t) { vine_priority_t manager_priority = VINE_PRIORITY_NORMAL; @@ -5049,6 +5083,7 @@ static int poll_active_workers(struct vine_manager *q, int stoptime) // promptly dispatch tasks, while avoiding wasting cpu cycles when the // state of the system cannot be advanced. int msec = q->nothing_happened_last_wait_cycle ? 1000 : 0; + msec = 0; if (stoptime) { msec = MIN(msec, (stoptime - time(0)) * 1000); } @@ -5158,7 +5193,11 @@ struct vine_task *find_task_to_return(struct vine_manager *q, const char *tag, i return t; break; case VINE_TASK_TYPE_RECOVERY: - /* do nothing and let vine_manager_consider_recovery_task do its job */ + /* if configured to return recovery tasks, return them to the user */ + if (q->return_recovery_tasks) { + return t; + } + /* otherwise, do nothing and let vine_manager_consider_recovery_task do its job */ break; case VINE_TASK_TYPE_LIBRARY_INSTANCE: /* silently delete the task, since it was created by the manager. @@ -5878,10 +5917,12 @@ int vine_tune(struct vine_manager *q, const char *name, double value) } else if (!strcmp(name, "max-library-retries")) { q->max_library_retries = MIN(1, value); + } else if (!strcmp(name, "disk-proportion-available-to-task")) { if (value < 1 && value > 0) { q->disk_proportion_available_to_task = value; } + } else if (!strcmp(name, "enforce-worker-eviction-interval")) { q->enforce_worker_eviction_interval = (timestamp_t)(MAX(0, (int)value) * ONE_SECOND); diff --git a/taskvine/src/manager/vine_manager.h b/taskvine/src/manager/vine_manager.h index bcc2d2afd1..3588cd443a 100644 --- a/taskvine/src/manager/vine_manager.h +++ b/taskvine/src/manager/vine_manager.h @@ -237,6 +237,13 @@ struct vine_manager { double sandbox_grow_factor; /* When task disk sandboxes are exhausted, increase the allocation using their measured valued times this factor */ double disk_proportion_available_to_task; /* intentionally reduces disk allocation for tasks to reserve some space for cache growth. */ + int return_recovery_tasks; /* If true, recovery tasks are returned by vine_wait to the user. By default they are handled internally. */ + int auto_recovery; /* If true, recovery tasks are created automatically internally. */ + int balance_worker_disk_load; /* If true, offload replicas from workers that are overloaded with temp files. */ + timestamp_t when_last_offloaded; + int64_t peak_used_cache; + int shutting_down; + /* todo: confirm datatype. int or int64 */ int max_task_stdout_storage; /* Maximum size of standard output from task. (If larger, send to a separate file.) */ int max_new_workers; /* Maximum number of workers to add in a single cycle before dealing with other matters. */ diff --git a/taskvine/src/manager/vine_runtime_dir.c b/taskvine/src/manager/vine_runtime_dir.c index a2bc4194a0..5f5f586b32 100644 --- a/taskvine/src/manager/vine_runtime_dir.c +++ b/taskvine/src/manager/vine_runtime_dir.c @@ -134,6 +134,11 @@ char *vine_runtime_directory_create() return runtime_dir; } +char *vine_get_runtime_directory(struct vine_manager *m) +{ + return m->runtime_directory; +} + char *vine_get_path_log(struct vine_manager *m, const char *path) { return string_format("%s/vine-logs%s%s", m->runtime_directory, path ? "/" : "", path ? path : ""); diff --git a/taskvine/src/manager/vine_schedule.c b/taskvine/src/manager/vine_schedule.c index dd08eabc50..284be290c1 100644 --- a/taskvine/src/manager/vine_schedule.c +++ b/taskvine/src/manager/vine_schedule.c @@ -153,6 +153,51 @@ int check_worker_have_enough_disk_with_inputs(struct vine_manager *q, struct vin return ok; } +/* Count the number of committable cores for all workers. */ +int vine_schedule_count_committable_cores(struct vine_manager *q) +{ + int count = 0; + + char *key; + struct vine_worker_info *w; + HASH_TABLE_ITERATE(q->worker_table, key, w) + { + /* skip if the worker hasn't reported any resources yet */ + if (!w->resources) { + continue; + } + /* skip if the worker has no cores or gpus */ + if (w->resources->cores.total <= 0 && w->resources->gpus.total <= 0) { + continue; + } + /* count the number of free slots on running libraries */ + if (w->current_libraries && itable_size(w->current_libraries) > 0) { + uint64_t library_task_id = 0; + struct vine_task *library_task = NULL; + ITABLE_ITERATE(w->current_libraries, library_task_id, library_task) + { + if (!library_task || !library_task->provides_library) { + continue; + } + if (library_task->function_slots_total > library_task->function_slots_inuse) { + count += library_task->function_slots_total - library_task->function_slots_inuse; + } + } + } + /* count the number of free cores */ + if (w->resources->cores.total > 0 && overcommitted_resource_total(q, w->resources->cores.total) > w->resources->cores.inuse) { + count += overcommitted_resource_total(q, w->resources->cores.total) - w->resources->cores.inuse; + } + /* count the number of free gpus */ + if (w->resources->gpus.total > 0 && overcommitted_resource_total(q, w->resources->gpus.total) > w->resources->gpus.inuse) { + // Don't count gpus for now, because the manager has not yet fully supported scheduling tasks to GPUs. + // count += overcommitted_resource_total(q, w->resources->gpus.total) - w->resources->gpus.inuse; + } + } + + return count; +} + /* Check if this worker has committable resources for any type of task. * If it returns false, neither a function task, library task nor a regular task can run on this worker. * If it returns true, the worker has either free slots for function calls or sufficient resources for regular tasks. @@ -627,6 +672,5 @@ int vine_schedule_check_fixed_location(struct vine_manager *q, struct vine_task return 1; } } - debug(D_VINE, "Missing fixed_location dependencies for task: %d", t->task_id); return 0; -} +} \ No newline at end of file diff --git a/taskvine/src/manager/vine_schedule.h b/taskvine/src/manager/vine_schedule.h index 4ef0c613a9..6455c18798 100644 --- a/taskvine/src/manager/vine_schedule.h +++ b/taskvine/src/manager/vine_schedule.h @@ -24,4 +24,6 @@ int vine_schedule_check_fixed_location(struct vine_manager *q, struct vine_task int vine_schedule_in_ramp_down(struct vine_manager *q); struct vine_task *vine_schedule_find_library(struct vine_manager *q, struct vine_worker_info *w, const char *library_name); int check_worker_against_task(struct vine_manager *q, struct vine_worker_info *w, struct vine_task *t); +int vine_schedule_count_committable_cores(struct vine_manager *q); + #endif diff --git a/taskvine/src/manager/vine_task.c b/taskvine/src/manager/vine_task.c index c401324b9f..8616e4ecda 100644 --- a/taskvine/src/manager/vine_task.c +++ b/taskvine/src/manager/vine_task.c @@ -91,6 +91,7 @@ void vine_task_clean(struct vine_task *t) { t->time_when_commit_start = 0; t->time_when_commit_end = 0; + t->time_when_retrieval = 0; t->time_when_done = 0; @@ -154,6 +155,9 @@ void vine_task_reset(struct vine_task *t) t->time_workers_execute_exhaustion = 0; t->time_workers_execute_failure = 0; + t->time_when_commit_start = 0; + t->time_when_commit_end = 0; + rmsummary_delete(t->resources_measured); rmsummary_delete(t->resources_allocated); t->resources_measured = rmsummary_create(-1); diff --git a/taskvine/src/manager/vine_task.h b/taskvine/src/manager/vine_task.h index 665ffee187..ad03efd76e 100644 --- a/taskvine/src/manager/vine_task.h +++ b/taskvine/src/manager/vine_task.h @@ -124,7 +124,6 @@ struct vine_task { timestamp_t time_when_last_failure; /**< If larger than 0, the time at which the last task failure was detected. */ - timestamp_t time_workers_execute_last_start; /**< The time when the last complete execution for this task started at a worker. */ timestamp_t time_workers_execute_last_end; /**< The time when the last complete execution for this task ended at a worker. */ diff --git a/taskvine/src/manager/vine_temp.c b/taskvine/src/manager/vine_temp.c index 269d47c8e0..18fa7c8e49 100644 --- a/taskvine/src/manager/vine_temp.c +++ b/taskvine/src/manager/vine_temp.c @@ -450,4 +450,4 @@ void vine_temp_shift_disk_load(struct vine_manager *q, struct vine_worker_info * /* We can clean up the original one safely when the replica arrives at the destination worker. */ vine_temp_clean_redundant_replicas(q, f); -} \ No newline at end of file +} diff --git a/taskvine/src/manager/vine_temp.h b/taskvine/src/manager/vine_temp.h index d94c05d372..25116d77b8 100644 --- a/taskvine/src/manager/vine_temp.h +++ b/taskvine/src/manager/vine_temp.h @@ -18,4 +18,4 @@ void vine_temp_clean_redundant_replicas(struct vine_manager *q, struct vine_file /** Shift a temp file replica away from the worker using the most cache space. */ void vine_temp_shift_disk_load(struct vine_manager *q, struct vine_worker_info *source_worker, struct vine_file *f); -#endif \ No newline at end of file +#endif diff --git a/taskvine/src/worker/vine_cache.c b/taskvine/src/worker/vine_cache.c index 40b226c11e..fc97b46416 100644 --- a/taskvine/src/worker/vine_cache.c +++ b/taskvine/src/worker/vine_cache.c @@ -941,13 +941,22 @@ If any have definitively failed, they are removed from the cache. int vine_cache_check_files(struct vine_cache *c, struct link *manager) { - struct vine_cache_file *f; + if (hash_table_size(c->processing_transfers) < 1) { + return 1; + } + + char **cachenames = hash_table_keys_array(c->processing_transfers); + int i = 0; char *cachename; - HASH_TABLE_ITERATE(c->table, cachename, f) - { - vine_cache_check_file(c, f, cachename, manager); + while ((cachename = cachenames[i])) { + i++; + struct vine_cache_file *f = hash_table_lookup(c->table, cachename); + if (f) { + vine_cache_check_file(c, f, cachename, manager); + } - if (f->status == VINE_CACHE_STATUS_FAILED) { + f = hash_table_lookup(c->table, cachename); + if (f && f->status == VINE_CACHE_STATUS_FAILED) { /* if transfer failed, then we delete all of our records of the file. The manager * assumes that the file is not at the worker after the manager receives * the cache invalid message sent from vine_cache_check_outputs. */ @@ -956,5 +965,7 @@ int vine_cache_check_files(struct vine_cache *c, struct link *manager) /* Note that f may no longer be valid at this point */ } + + hash_table_free_keys_array(cachenames); return 1; }