From c601924b2668e784ee1c9430dba88977dca0e1c0 Mon Sep 17 00:00:00 2001 From: Jin Zhou Date: Mon, 3 Nov 2025 12:16:13 -0500 Subject: [PATCH 1/8] vine: don't print Submitted recovery xxx on terminal --- taskvine/src/manager/taskvine.h | 6 ++++++ taskvine/src/manager/vine_manager.c | 12 ++++++++++-- taskvine/src/manager/vine_manager.h | 1 + 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/taskvine/src/manager/taskvine.h b/taskvine/src/manager/taskvine.h index 16152b7102..38bb0d857c 100644 --- a/taskvine/src/manager/taskvine.h +++ b/taskvine/src/manager/taskvine.h @@ -1388,6 +1388,12 @@ void vine_set_password(struct vine_manager *m, const char *password); int vine_set_password_file(struct vine_manager *m, const char *file); +/** Get the number of recovery tasks submitted to the manager. +@param m A manager object +@return The number of recovery tasks submitted to the manager. +*/ +int vine_get_num_submitted_recovery_tasks(struct vine_manager *m); + /** Change the keepalive interval for a given manager. @param m A manager object @param interval The minimum number of seconds to wait before sending new keepalive checks to workers. diff --git a/taskvine/src/manager/vine_manager.c b/taskvine/src/manager/vine_manager.c index ca72d257d9..06297d9512 100644 --- a/taskvine/src/manager/vine_manager.c +++ b/taskvine/src/manager/vine_manager.c @@ -3519,7 +3519,7 @@ static void vine_manager_consider_recovery_task(struct vine_manager *q, struct v case VINE_TASK_INITIAL: /* The recovery task has never been run, so submit it now. */ vine_submit(q, rt); - notice(D_VINE, "Submitted recovery task %d (%s) to re-create lost temporary file %s.", rt->task_id, rt->command_line, lost_file->cached_name); + debug(D_VINE, "Submitted recovery task %d (%s) to re-create lost temporary file %s.", rt->task_id, rt->command_line, lost_file->cached_name); break; case VINE_TASK_READY: case VINE_TASK_RUNNING: @@ -3533,7 +3533,7 @@ static void vine_manager_consider_recovery_task(struct vine_manager *q, struct v * here. */ vine_task_reset(rt); vine_submit(q, rt); - notice(D_VINE, "Submitted recovery task %d (%s) to re-create lost temporary file %s.", rt->task_id, rt->command_line, lost_file->cached_name); + debug(D_VINE, "Submitted recovery task %d (%s) to re-create lost temporary file %s.", rt->task_id, rt->command_line, lost_file->cached_name); break; } } @@ -4440,6 +4440,11 @@ int vine_set_password_file(struct vine_manager *q, const char *file) return copy_file_to_buffer(file, &q->password, NULL) > 0; } +int vine_get_num_submitted_recovery_tasks(struct vine_manager *q) +{ + return q->num_submitted_recovery_tasks; +} + static void delete_task_at_exit(struct vine_task *t) { if (!t) { @@ -4869,6 +4874,7 @@ int vine_submit(struct vine_manager *q, struct vine_task *t) * this distinction is important when many files are lost and the workflow is effectively rerun from scratch. */ if (t->type == VINE_TASK_TYPE_RECOVERY) { vine_task_set_priority(t, t->priority + priority_queue_get_top_priority(q->ready_tasks) + 1); + q->num_submitted_recovery_tasks++; } if (t->has_fixed_locations) { @@ -5375,6 +5381,8 @@ static struct vine_task *vine_wait_internal(struct vine_manager *q, int timeout, q->nothing_happened_last_wait_cycle = 0; + q->num_submitted_recovery_tasks = 0; + // Retrieve results from workers. We do a worker at a time to be more efficient. // We get a known worker with results from the first task in the waiting_retrieval_list, // and get as many tasks as possible under the q->max_retrievals constraint. diff --git a/taskvine/src/manager/vine_manager.h b/taskvine/src/manager/vine_manager.h index bcf2405616..edeb50175b 100644 --- a/taskvine/src/manager/vine_manager.h +++ b/taskvine/src/manager/vine_manager.h @@ -138,6 +138,7 @@ struct vine_manager { int num_tasks_left; /* Optional: Number of tasks remaining, if given by user. @ref vine_set_num_tasks */ int nothing_happened_last_wait_cycle; /* Set internally in main loop if no messages or tasks were processed during the last wait loop. If set, poll longer to avoid wasting cpu cycles, and growing log files unnecessarily.*/ + int num_submitted_recovery_tasks; /* Number of recovery tasks submitted to restore lost temp files. */ /* Accumulation of statistics for reporting to the caller. */ From 6f6b15c517dd069e34e0189fb68dd6448294d7a5 Mon Sep 17 00:00:00 2001 From: Jin Zhou Date: Mon, 3 Nov 2025 14:49:07 -0500 Subject: [PATCH 2/8] trigger rebuild From 094c18ad61e377c3cd6838a66fe43d5440849c28 Mon Sep 17 00:00:00 2001 From: Jin Zhou Date: Wed, 5 Nov 2025 12:27:28 -0500 Subject: [PATCH 3/8] use stats --- .../bindings/python3/ndcctools/taskvine/display.py | 1 + taskvine/src/manager/taskvine.h | 7 +------ taskvine/src/manager/vine_manager.c | 11 +++-------- taskvine/src/manager/vine_manager.h | 3 +-- 4 files changed, 6 insertions(+), 16 deletions(-) diff --git a/taskvine/src/bindings/python3/ndcctools/taskvine/display.py b/taskvine/src/bindings/python3/ndcctools/taskvine/display.py index 95da12d924..db174298d1 100644 --- a/taskvine/src/bindings/python3/ndcctools/taskvine/display.py +++ b/taskvine/src/bindings/python3/ndcctools/taskvine/display.py @@ -110,6 +110,7 @@ def generate_manager_table(self, manager_s): "tasks_done", "tasks_waiting", "tasks_running", + "recovery_tasks_submitted", "tasks_exhausted_attempts", "workers_connected", "workers_busy", diff --git a/taskvine/src/manager/taskvine.h b/taskvine/src/manager/taskvine.h index 38bb0d857c..a28ff2a022 100644 --- a/taskvine/src/manager/taskvine.h +++ b/taskvine/src/manager/taskvine.h @@ -176,6 +176,7 @@ struct vine_stats { int tasks_on_workers; /**< Number of tasks currently dispatched to some worker. */ int tasks_running; /**< Number of tasks currently executing at some worker. */ int tasks_with_results; /**< Number of tasks with retrieved results and waiting to be returned to user. */ + int recovery_tasks_submitted; /**< Total number of recovery tasks submitted since the manager started. */ /* Cumulative stats for tasks: */ int tasks_submitted; /**< Total number of tasks submitted to the manager. */ @@ -1388,12 +1389,6 @@ void vine_set_password(struct vine_manager *m, const char *password); int vine_set_password_file(struct vine_manager *m, const char *file); -/** Get the number of recovery tasks submitted to the manager. -@param m A manager object -@return The number of recovery tasks submitted to the manager. -*/ -int vine_get_num_submitted_recovery_tasks(struct vine_manager *m); - /** Change the keepalive interval for a given manager. @param m A manager object @param interval The minimum number of seconds to wait before sending new keepalive checks to workers. diff --git a/taskvine/src/manager/vine_manager.c b/taskvine/src/manager/vine_manager.c index 06297d9512..6a29389e26 100644 --- a/taskvine/src/manager/vine_manager.c +++ b/taskvine/src/manager/vine_manager.c @@ -2262,6 +2262,7 @@ static struct jx *manager_to_jx(struct vine_manager *q) jx_insert_integer(j, "tasks_on_workers", info.tasks_on_workers); jx_insert_integer(j, "tasks_running", info.tasks_running); jx_insert_integer(j, "tasks_with_results", info.tasks_with_results); + jx_insert_integer(j, "recovery_tasks_submitted", info.recovery_tasks_submitted); jx_insert_integer(j, "tasks_left", q->num_tasks_left); jx_insert_integer(j, "tasks_submitted", info.tasks_submitted); @@ -2387,6 +2388,7 @@ static struct jx *manager_lean_to_jx(struct vine_manager *q) // additional task information for vine_factory jx_insert_integer(j, "tasks_on_workers", info.tasks_on_workers); jx_insert_integer(j, "tasks_left", q->num_tasks_left); + jx_insert_integer(j, "recovery_tasks_submitted", info.recovery_tasks_submitted); // capacity information the factory needs jx_insert_integer(j, "capacity_tasks", info.capacity_tasks); @@ -4440,11 +4442,6 @@ int vine_set_password_file(struct vine_manager *q, const char *file) return copy_file_to_buffer(file, &q->password, NULL) > 0; } -int vine_get_num_submitted_recovery_tasks(struct vine_manager *q) -{ - return q->num_submitted_recovery_tasks; -} - static void delete_task_at_exit(struct vine_task *t) { if (!t) { @@ -4874,7 +4871,7 @@ int vine_submit(struct vine_manager *q, struct vine_task *t) * this distinction is important when many files are lost and the workflow is effectively rerun from scratch. */ if (t->type == VINE_TASK_TYPE_RECOVERY) { vine_task_set_priority(t, t->priority + priority_queue_get_top_priority(q->ready_tasks) + 1); - q->num_submitted_recovery_tasks++; + q->stats->recovery_tasks_submitted++; } if (t->has_fixed_locations) { @@ -5381,8 +5378,6 @@ static struct vine_task *vine_wait_internal(struct vine_manager *q, int timeout, q->nothing_happened_last_wait_cycle = 0; - q->num_submitted_recovery_tasks = 0; - // Retrieve results from workers. We do a worker at a time to be more efficient. // We get a known worker with results from the first task in the waiting_retrieval_list, // and get as many tasks as possible under the q->max_retrievals constraint. diff --git a/taskvine/src/manager/vine_manager.h b/taskvine/src/manager/vine_manager.h index edeb50175b..8239a3bf82 100644 --- a/taskvine/src/manager/vine_manager.h +++ b/taskvine/src/manager/vine_manager.h @@ -137,8 +137,7 @@ struct vine_manager { int fixed_location_in_queue; /* Number of fixed location tasks currently being managed */ int num_tasks_left; /* Optional: Number of tasks remaining, if given by user. @ref vine_set_num_tasks */ int nothing_happened_last_wait_cycle; /* Set internally in main loop if no messages or tasks were processed during the last wait loop. - If set, poll longer to avoid wasting cpu cycles, and growing log files unnecessarily.*/ - int num_submitted_recovery_tasks; /* Number of recovery tasks submitted to restore lost temp files. */ + If set, poll longer to avoid wasting cpu cycles, and growing log files unnecessarily.*/ /* Accumulation of statistics for reporting to the caller. */ From 9ff1636837647d864a02a1a6b3f716a8b0d06f2c Mon Sep 17 00:00:00 2001 From: Jin Zhou Date: Wed, 5 Nov 2025 12:28:25 -0500 Subject: [PATCH 4/8] restore --- taskvine/src/manager/vine_manager.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/taskvine/src/manager/vine_manager.h b/taskvine/src/manager/vine_manager.h index 8239a3bf82..bcf2405616 100644 --- a/taskvine/src/manager/vine_manager.h +++ b/taskvine/src/manager/vine_manager.h @@ -137,7 +137,7 @@ struct vine_manager { int fixed_location_in_queue; /* Number of fixed location tasks currently being managed */ int num_tasks_left; /* Optional: Number of tasks remaining, if given by user. @ref vine_set_num_tasks */ int nothing_happened_last_wait_cycle; /* Set internally in main loop if no messages or tasks were processed during the last wait loop. - If set, poll longer to avoid wasting cpu cycles, and growing log files unnecessarily.*/ + If set, poll longer to avoid wasting cpu cycles, and growing log files unnecessarily.*/ /* Accumulation of statistics for reporting to the caller. */ From 73f25e284df1c95c6e11aa668f80e3ed9c5a8fc5 Mon Sep 17 00:00:00 2001 From: Jin Zhou Date: Wed, 5 Nov 2025 12:29:26 -0500 Subject: [PATCH 5/8] no factory display --- taskvine/src/manager/vine_manager.c | 1 - 1 file changed, 1 deletion(-) diff --git a/taskvine/src/manager/vine_manager.c b/taskvine/src/manager/vine_manager.c index 6a29389e26..04d3edeca7 100644 --- a/taskvine/src/manager/vine_manager.c +++ b/taskvine/src/manager/vine_manager.c @@ -2388,7 +2388,6 @@ static struct jx *manager_lean_to_jx(struct vine_manager *q) // additional task information for vine_factory jx_insert_integer(j, "tasks_on_workers", info.tasks_on_workers); jx_insert_integer(j, "tasks_left", q->num_tasks_left); - jx_insert_integer(j, "recovery_tasks_submitted", info.recovery_tasks_submitted); // capacity information the factory needs jx_insert_integer(j, "capacity_tasks", info.capacity_tasks); From 11de79cc60b100fb914a17746687e91cff00ba72 Mon Sep 17 00:00:00 2001 From: Jin Zhou Date: Mon, 5 Jan 2026 12:57:41 -0500 Subject: [PATCH 6/8] merge --- taskvine/src/manager/vine_manager.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/taskvine/src/manager/vine_manager.c b/taskvine/src/manager/vine_manager.c index 7dd2dfb428..86271988ef 100644 --- a/taskvine/src/manager/vine_manager.c +++ b/taskvine/src/manager/vine_manager.c @@ -3492,6 +3492,7 @@ static void vine_manager_consider_recovery_task(struct vine_manager *q, struct v /* The recovery task has never been run, so submit it now. */ vine_submit(q, rt); debug(D_VINE, "Submitted recovery task %d (%s) to re-create lost temporary file %s.", rt->task_id, rt->command_line, lost_file->cached_name); + printf("All submitted recovery tasks: %d\n", q->stats->recovery_tasks_submitted); break; case VINE_TASK_READY: case VINE_TASK_RUNNING: @@ -3506,6 +3507,7 @@ static void vine_manager_consider_recovery_task(struct vine_manager *q, struct v vine_task_reset(rt); vine_submit(q, rt); debug(D_VINE, "Submitted recovery task %d (%s) to re-create lost temporary file %s.", rt->task_id, rt->command_line, lost_file->cached_name); + printf("All submitted recovery tasks: %d\n", q->stats->recovery_tasks_submitted); break; } } @@ -4841,6 +4843,10 @@ int vine_submit(struct vine_manager *q, struct vine_task *t) /* Issue warnings if the files are set up strangely. */ vine_task_check_consistency(t); + + if (t->type == VINE_TASK_TYPE_RECOVERY) { + q->stats->recovery_tasks_submitted++; + } if (t->has_fixed_locations) { q->fixed_location_in_queue++; From 1ee0d7e3fab7d9df3fb6782feee4c3f2be44e2d4 Mon Sep 17 00:00:00 2001 From: Jin Zhou Date: Mon, 5 Jan 2026 12:58:16 -0500 Subject: [PATCH 7/8] remove printf --- taskvine/src/manager/vine_manager.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/taskvine/src/manager/vine_manager.c b/taskvine/src/manager/vine_manager.c index 86271988ef..2e57fc6d42 100644 --- a/taskvine/src/manager/vine_manager.c +++ b/taskvine/src/manager/vine_manager.c @@ -3492,7 +3492,6 @@ static void vine_manager_consider_recovery_task(struct vine_manager *q, struct v /* The recovery task has never been run, so submit it now. */ vine_submit(q, rt); debug(D_VINE, "Submitted recovery task %d (%s) to re-create lost temporary file %s.", rt->task_id, rt->command_line, lost_file->cached_name); - printf("All submitted recovery tasks: %d\n", q->stats->recovery_tasks_submitted); break; case VINE_TASK_READY: case VINE_TASK_RUNNING: @@ -3507,7 +3506,6 @@ static void vine_manager_consider_recovery_task(struct vine_manager *q, struct v vine_task_reset(rt); vine_submit(q, rt); debug(D_VINE, "Submitted recovery task %d (%s) to re-create lost temporary file %s.", rt->task_id, rt->command_line, lost_file->cached_name); - printf("All submitted recovery tasks: %d\n", q->stats->recovery_tasks_submitted); break; } } From cbb640d153f1ef8ef6dce92095ea93ce62825058 Mon Sep 17 00:00:00 2001 From: Jin Zhou Date: Mon, 5 Jan 2026 12:59:38 -0500 Subject: [PATCH 8/8] lint --- taskvine/src/manager/vine_manager.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/taskvine/src/manager/vine_manager.c b/taskvine/src/manager/vine_manager.c index 2e57fc6d42..f385fbab06 100644 --- a/taskvine/src/manager/vine_manager.c +++ b/taskvine/src/manager/vine_manager.c @@ -4841,7 +4841,7 @@ int vine_submit(struct vine_manager *q, struct vine_task *t) /* Issue warnings if the files are set up strangely. */ vine_task_check_consistency(t); - + if (t->type == VINE_TASK_TYPE_RECOVERY) { q->stats->recovery_tasks_submitted++; }