Skip to content

Commit 473ec22

Browse files
author
Hanbin Hu
committedNov 6, 2020
Add support for examples under one GPU without NCCL
1 parent 115b909 commit 473ec22

5 files changed

+14
-9
lines changed
 

‎examples/pytorch_benchmark.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,8 @@
6969
bf.init()
7070

7171
if args.cuda:
72-
torch.cuda.set_device(bf.local_rank())
72+
device_id = bf.local_rank() if bf.nccl_built() else bf.local_rank() % torch.cuda.device_count()
73+
torch.cuda.set_device(device_id)
7374
cudnn.benchmark = True
7475

7576
# Set up standard model.

‎examples/pytorch_mnist.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,8 @@
7979

8080
if args.cuda:
8181
# Bluefog: pin GPU to local rank.
82-
torch.cuda.set_device(bf.local_rank())
82+
device_id = bf.local_rank() if bf.nccl_built() else bf.local_rank() % torch.cuda.device_count()
83+
torch.cuda.set_device(device_id)
8384
torch.cuda.manual_seed(args.seed)
8485

8586

‎examples/pytorch_resnet.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,8 @@
103103
if args.cuda:
104104
print("using cuda.")
105105
# Bluefog: pin GPU to local rank.
106-
torch.cuda.set_device(bf.local_rank())
106+
device_id = bf.local_rank() if bf.nccl_built() else bf.local_rank() % torch.cuda.device_count()
107+
torch.cuda.set_device(device_id)
107108
torch.cuda.manual_seed(args.seed)
108109
else:
109110
print("using cpu")

‎test/torch_ops_test.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -61,10 +61,11 @@ def convert_cpu_fp16_to_fp32(self, *values):
6161

6262
def cast_and_place(self, tensor, dtype):
6363
if dtype.is_cuda:
64-
if bf.local_size() > torch.cuda.device_count():
64+
if bf.nccl_built() and bf.local_size() > torch.cuda.device_count():
6565
raise EnvironmentError(
66-
"Cannot run number of processes in one machine are more than device count")
67-
return tensor.cuda(bf.local_rank()).type(dtype)
66+
"Cannot run number of processes in one machine more than GPU device count"
67+
" in NCCL environment")
68+
return tensor.cuda(bf.local_rank() % torch.cuda.device_count()).type(dtype)
6869
return tensor.type(dtype)
6970

7071
def test_broadcast(self):

‎test/torch_win_ops_test.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -54,10 +54,11 @@ def tearDown(self):
5454
@staticmethod
5555
def cast_and_place(tensor, dtype):
5656
if dtype.is_cuda:
57-
if bf.local_size() > torch.cuda.device_count():
57+
if bf.nccl_built() and bf.local_size() > torch.cuda.device_count():
5858
raise EnvironmentError(
59-
"Cannot run number of processes in one machine are more than device count")
60-
return tensor.cuda(bf.local_rank()).type(dtype)
59+
"Cannot run number of processes in one machine more than GPU device count"
60+
" in NCCL environment")
61+
return tensor.cuda(bf.local_rank() % torch.cuda.device_count()).type(dtype)
6162
return tensor.type(dtype)
6263

6364
def test_win_create_and_sync_and_free(self):

0 commit comments

Comments
 (0)