Skip to content

Commit

Permalink
[Disco] Disable splitting nccl communicator in single-group (#17264)
Browse files Browse the repository at this point in the history
  • Loading branch information
vinx13 authored Aug 10, 2024
1 parent 2d828f5 commit bed66d2
Showing 1 changed file with 6 additions and 2 deletions.
8 changes: 6 additions & 2 deletions src/runtime/disco/nccl/nccl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -101,8 +101,12 @@ void InitCCLPerWorker(IntTuple device_ids, std::string unique_id_bytes) {
ncclUniqueId id;
std::memcpy(id.internal, unique_id_bytes.data(), NCCL_UNIQUE_ID_BYTES);
NCCL_CALL(ncclCommInitRank(&ctx->global_comm, worker->num_workers, id, worker->worker_id));
NCCL_CALL(ncclCommSplit(ctx->global_comm, worker->worker_id / group_size,
worker->worker_id % group_size, &ctx->group_comm, NULL));
if (worker->num_groups == 1) {
ctx->group_comm = ctx->global_comm;
} else {
NCCL_CALL(ncclCommSplit(ctx->global_comm, worker->worker_id / group_size,
worker->worker_id % group_size, &ctx->group_comm, NULL));
}
}

void AllReduce(NDArray send, ReduceKind reduce_kind, bool in_group, NDArray recv) {
Expand Down

0 comments on commit bed66d2

Please sign in to comment.