Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/manuals/taskvine/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -2750,6 +2750,7 @@ change.
|-----------|-------------|---------------|
| attempt-schedule-depth | The amount of tasks to attempt scheduling on each pass of send_one_task in the main loop. | 100 |
| category-steady-n-tasks | Minimum number of successful tasks to use a sample for automatic resource allocation modes after encountering a new resource maximum. | 25 |
| clean-redundant-replicas | Remove redundant temporary file replicas to save worker's local disk space. | 0 |
| default-transfer-rate | The assumed network bandwidth used until sufficient data has been collected. (1MB/s)
| disconnect-slow-workers-factor | Set the multiplier of the average task time at which point to disconnect a worker; disabled if less than 1. (default=0)
| hungry-minimum | Smallest number of waiting tasks in the manager before declaring it hungry | 10 |
Expand All @@ -2767,6 +2768,7 @@ change.
| ramp-down-heuristic | If set to 1 and there are more workers than tasks waiting, then tasks are allocated all the free resources of a worker large enough to run them. If monitoring watchdog is not enabled, then this heuristic has no effect. | 0 |
| resource-submit-multiplier | Assume that workers have `resource x resources-submit-multiplier` available.<br> This overcommits resources at the worker, causing tasks to be sent to workers that cannot be immediately executed.<br>The extra tasks wait at the worker until resources become available. | 1 |
| sandbox-grow-factor | When task disk sandboxes are exhausted, increase the allocation using their measured valued times this factor. Minimum is 1.1. | 2 |
| shift-disk-load | Proactively shift temporary files away from the most disk-heavy worker to those with more available disk. | 0 |
| short-timeout | Set the minimum timeout in seconds when sending a brief message to a single worker. | 5 |
| temp-replica-count | Number of temp file replicas created across workers | 0 |
| transfer-outlier-factor | Transfer that are this many times slower than the average will be terminated. | 10 |
Expand Down
3 changes: 2 additions & 1 deletion taskvine/src/manager/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@ SOURCES = \
vine_file_replica_table.c \
vine_fair.c \
vine_runtime_dir.c \
vine_task_groups.c
vine_task_groups.c \
vine_temp.c

PUBLIC_HEADERS = taskvine.h

Expand Down
114 changes: 0 additions & 114 deletions taskvine/src/manager/vine_file_replica_table.c
Original file line number Diff line number Diff line change
Expand Up @@ -147,94 +147,6 @@ struct vine_worker_info *vine_file_replica_table_find_worker(struct vine_manager
return peer_selected;
}

// trigger replications of file to satisfy temp_replica_count
int vine_file_replica_table_replicate(struct vine_manager *m, struct vine_file *f, struct set *source_workers, int to_find)
{
if (!f || !source_workers) {
return 0;
}

int nsource_workers = set_size(source_workers);
int round_replication_request_sent = 0;

/* get the elements of set so we can insert new replicas to sources */
struct vine_worker_info **source_workers_frozen = (struct vine_worker_info **)set_values(source_workers);
struct vine_worker_info *source_worker;

for (int i = 0; i < nsource_workers; i++) {

source_worker = source_workers_frozen[i];
int dest_workers_found = 0;

// skip if the file on the source is not ready to transfer
struct vine_file_replica *replica = hash_table_lookup(source_worker->current_files, f->cached_name);
if (!replica || replica->state != VINE_FILE_REPLICA_STATE_READY) {
continue;
}

// skip if the source is busy with other transfers
if (source_worker->outgoing_xfer_counter >= m->worker_source_max_transfers) {
continue;
}

char *source_addr = string_format("%s/%s", source_worker->transfer_url, f->cached_name);

char *id;
struct vine_worker_info *dest_worker;
int offset_bookkeep;

HASH_TABLE_ITERATE_RANDOM_START(m->worker_table, offset_bookkeep, id, dest_worker)
{
// skip if the source and destination are on the same host
if (set_lookup(source_workers, dest_worker) || strcmp(source_worker->hostname, dest_worker->hostname) == 0) {
continue;
}

// skip if the destination is not ready to transfer
if (!dest_worker->transfer_port_active) {
continue;
}

// skip if the destination is draining
if (dest_worker->draining) {
continue;
}

// skip if the destination is busy with other transfers
if (dest_worker->incoming_xfer_counter >= m->worker_source_max_transfers) {
continue;
}

debug(D_VINE, "replicating %s from %s to %s", f->cached_name, source_worker->addrport, dest_worker->addrport);

vine_manager_put_url_now(m, dest_worker, source_worker, source_addr, f);

round_replication_request_sent++;

// break if we have found enough destinations for this source
if (++dest_workers_found >= MIN(m->file_source_max_transfers, to_find)) {
break;
}

// break if the source becomes busy with transfers
if (source_worker->outgoing_xfer_counter >= m->worker_source_max_transfers) {
break;
}
}

free(source_addr);

// break if we have sent enough replication requests for this file
if (round_replication_request_sent >= to_find) {
break;
}
}

free(source_workers_frozen);

return round_replication_request_sent;
}

/*
Count number of replicas of a file in the system.
*/
Expand All @@ -258,32 +170,6 @@ int vine_file_replica_table_count_replicas(struct vine_manager *q, const char *c
return count;
}

/*
Check if a file replica exists on a worker. We accept both CREATING and READY replicas,
since a CREATING replica may already exist physically but hasn't yet received the cache-update
message from the manager. However, we do not accept DELETING replicas, as they indicate
the source worker has already been sent an unlink request—any subsequent cache-update or
cache-invalid events will lead to deletion.
*/
int vine_file_replica_table_exists_somewhere(struct vine_manager *q, const char *cachename)
{
struct set *workers = hash_table_lookup(q->file_worker_table, cachename);
if (!workers || set_size(workers) < 1) {
return 0;
}

struct vine_worker_info *w;
SET_ITERATE(workers, w)
{
struct vine_file_replica *r = vine_file_replica_table_lookup(w, cachename);
if (r && (r->state == VINE_FILE_REPLICA_STATE_CREATING || r->state == VINE_FILE_REPLICA_STATE_READY)) {
return 1;
}
}

return 0;
}

// get or create a replica for a worker and cachename
struct vine_file_replica *vine_file_replica_table_get_or_create(struct vine_manager *m, struct vine_worker_info *w, const char *cachename, vine_file_type_t type, vine_cache_level_t cache_level, int64_t size, time_t mtime)
{
Expand Down
4 changes: 0 additions & 4 deletions taskvine/src/manager/vine_file_replica_table.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,6 @@ struct vine_file_replica *vine_file_replica_table_get_or_create(struct vine_mana

struct vine_worker_info *vine_file_replica_table_find_worker(struct vine_manager *q, const char *cachename);

int vine_file_replica_table_replicate(struct vine_manager *q, struct vine_file *f, struct set *source_workers, int to_find);

int vine_file_replica_table_exists_somewhere( struct vine_manager *q, const char *cachename );

int vine_file_replica_table_count_replicas( struct vine_manager *q, const char *cachename, vine_file_replica_state_t state );

#endif
Loading