29
29
skipCUDAIfMiopen ,
30
30
skipCUDAIfNoCudnn ,
31
31
skipCUDAIfNoMiopen ,
32
- skipCUDAIfNotMiopenSuggestNHWC ,
33
32
skipCUDAIfRocm ,
34
33
skipMeta ,
35
34
skipMPS ,
50
49
parametrize as parametrize_test ,
51
50
run_tests ,
52
51
set_default_dtype ,
53
- skipIfNotMiopenSuggestNHWC ,
54
- skipIfRocmVersionLessThan ,
55
52
subtest ,
56
53
TEST_SCIPY ,
57
54
TEST_WITH_ROCM ,
61
58
AMPERE_OR_ROCM = TEST_WITH_ROCM or torch .cuda .is_tf32_supported ()
62
59
63
60
61
+ if TEST_WITH_ROCM :
62
+ os .environ ["PYTORCH_MIOPEN_SUGGEST_NHWC" ] = "1"
63
+ os .environ ["PYTORCH_MIOPEN_SUGGEST_NHWC_BATCHNORM" ] = "1"
64
+
65
+
64
66
if TEST_SCIPY :
65
67
import scipy .ndimage
66
68
import scipy .signal
@@ -710,7 +712,6 @@ def test_ConvTranspose2d_half_cublas_gemm(self):
710
712
# Almost identical to the above `test_Conv2d_naive_groups`
711
713
@torch .backends .cudnn .flags (enabled = True , deterministic = True , benchmark = False )
712
714
@tf32_on_and_off (0.001 )
713
- @unittest .skipIf (TEST_WITH_ROCM , "Skipped on ROCm, since it is failing on ROCm 5.7" )
714
715
def test_Conv2d_groups_nobias (self ):
715
716
dev_dtypes = [("cpu" , torch .float )]
716
717
if TEST_CUDA :
@@ -756,7 +757,6 @@ def test_Conv2d_groups_nobias(self):
756
757
# and https://github.com/pytorch/pytorch/pull/18463#issuecomment-477001024
757
758
@torch .backends .cudnn .flags (enabled = True , deterministic = True , benchmark = False )
758
759
@tf32_on_and_off (0.001 )
759
- @unittest .skipIf (TEST_WITH_ROCM , "Skipped on ROCm, since it is failing on ROCm 5.7" )
760
760
def test_Conv2d_groups_nobias_v2 (self ):
761
761
torch .manual_seed (123 )
762
762
dev_dtypes = [("cpu" , torch .float )]
@@ -891,7 +891,6 @@ def test_conv_tbc(self):
891
891
892
892
@unittest .skipIf (not TEST_CUDA , "CUDA unavailable" )
893
893
@unittest .skipIf (not TEST_CUDNN , "needs cudnn" )
894
- @skipIfNotMiopenSuggestNHWC
895
894
def test_grouped_conv_cudnn_nhwc_support (self ):
896
895
# in order to catch the hols in grouped convolution in nhwc support for earlier cudnn version
897
896
input = torch .randn ((16 , 16 , 8 , 8 ), dtype = torch .float16 , device = "cuda" ).to (
@@ -3140,7 +3139,6 @@ def test_conv_noncontig_weights_and_bias(self, device):
3140
3139
3141
3140
@onlyCUDA
3142
3141
@largeTensorTest ("12GB" )
3143
- @skipIfRocmVersionLessThan ((6 , 0 ))
3144
3142
def test_conv_transposed_large (self , device ):
3145
3143
dtype = torch .half if self .device_type == "cuda" else torch .float
3146
3144
conv = nn .ConvTranspose2d (1 , 1 , 1 , 1 , bias = False ).to (device ).to (dtype )
@@ -3184,7 +3182,6 @@ def test_conv_transposed_large(self, device):
3184
3182
self .assertEqual (maxdiff3 , 0 )
3185
3183
3186
3184
@onlyCUDA
3187
- @skipCUDAIfRocm
3188
3185
@largeTensorTest ("12GB" )
3189
3186
def test_conv_large (self , device ):
3190
3187
dtype = torch .half if self .device_type == "cuda" else torch .float
@@ -3217,7 +3214,6 @@ def test_conv_large(self, device):
3217
3214
self .assertEqual (grad1 , grad2 , atol = 5e-2 , rtol = 5e-3 )
3218
3215
3219
3216
@onlyCUDA
3220
- @skipCUDAIfRocm
3221
3217
@largeTensorTest ("20GB" , "cpu" )
3222
3218
@largeTensorTest ("60GB" , "cuda" )
3223
3219
def test_conv_large_batch_1 (self , device ):
@@ -3365,7 +3361,6 @@ def test_ConvTranspose3d_size_1_kernel(self, device):
3365
3361
@dtypes (torch .float )
3366
3362
@torch .backends .cudnn .flags (enabled = True , deterministic = True , benchmark = False )
3367
3363
@tf32_on_and_off (0.001 )
3368
- @unittest .skipIf (TEST_WITH_ROCM , "Skipped on ROCm, since it is failing on ROCm 5.7" )
3369
3364
def test_Conv2d_naive_groups (self , device , dtype ):
3370
3365
# Check that grouped convolutions matches two half convolutions
3371
3366
m = nn .Conv2d (4 , 4 , kernel_size = 3 , groups = 2 ).to (device , dtype )
@@ -3634,19 +3629,21 @@ def helper(
3634
3629
)
3635
3630
3636
3631
@onlyCUDA
3637
- @skipCUDAIfNotMiopenSuggestNHWC
3638
3632
@dtypes (torch .half , torch .float , torch .cfloat )
3639
3633
def test_conv_cudnn_nhwc (self , device , dtype ):
3640
3634
def helper (n , c , h , w , out_channels , kernel_size , groups ):
3641
- input = torch .randint (- 3 , 3 , (n , c , h , w ), dtype = dtype , device = device ).to (
3642
- memory_format = torch .channels_last
3643
- )
3635
+ # randint with dtype=torch.cfloat fails with
3636
+ # RuntimeError: check_random_bounds handles only integral, floating-point and boolean types
3637
+ # must create randint and randint_like using default int64, then cast to desired
3638
+ input = torch .randint (
3639
+ - 3 , 3 , (n , c , h , w ), dtype = torch .int64 , device = device
3640
+ ).to (dtype , memory_format = torch .channels_last )
3644
3641
input .requires_grad_ ()
3645
3642
conv = nn .Conv2d (c , out_channels , kernel_size , groups = groups ).to (
3646
3643
device = "cuda" , dtype = dtype , memory_format = torch .channels_last
3647
3644
)
3648
3645
for p in conv .parameters ():
3649
- p .data = torch .randint_like (p , - 3 , 3 )
3646
+ p .data = torch .randint_like (p , - 3 , 3 , dtype = torch . int64 ). to ( p . dtype )
3650
3647
3651
3648
# use FP64 channels-first conv as reference
3652
3649
ref_input = input .detach ().clone ().contiguous ().double ().requires_grad_ ()
@@ -3660,7 +3657,7 @@ def helper(n, c, h, w, out_channels, kernel_size, groups):
3660
3657
out = conv (input )
3661
3658
ref_out = ref_conv (ref_input )
3662
3659
3663
- grad = torch .randint_like (out , - 3 , 3 )
3660
+ grad = torch .randint_like (out , - 3 , 3 , dtype = torch . int64 ). to ( out . dtype )
3664
3661
ref_grad = grad .detach ().clone ().double ().contiguous ()
3665
3662
3666
3663
out .backward (grad )
@@ -3687,7 +3684,6 @@ def helper(n, c, h, w, out_channels, kernel_size, groups):
3687
3684
helper (1 , 16 , 56 , 56 , out_channels = 16 , kernel_size = 3 , groups = 16 )
3688
3685
3689
3686
@onlyCUDA
3690
- @skipCUDAIfRocm
3691
3687
@dtypes (torch .half , torch .float )
3692
3688
def test_conv_cudnn_ndhwc (self , device , dtype ):
3693
3689
def helper (n , c , d , h , w , out_channels , kernel_size , groups ):
@@ -3817,7 +3813,6 @@ def _test_conv_cudnn_nhwc_nchw(self, layer, n, c, h, w, k, filter_size, device):
3817
3813
)
3818
3814
3819
3815
@onlyCUDA
3820
- @skipCUDAIfNotMiopenSuggestNHWC
3821
3816
@tf32_on_and_off (0.05 )
3822
3817
def test_conv_cudnn_mismatch_memory_format (self , device ):
3823
3818
configs = [
@@ -3950,7 +3945,6 @@ def test_cudnn_convolution_add_relu(self, device, dtype):
3950
3945
self .assertEqual (F .relu (conv2d_out + alpha * z ), cudnn_out )
3951
3946
3952
3947
@onlyCUDA
3953
- @skipCUDAIfRocm
3954
3948
def test_convert_conv2d_weight_memory_format (self , device ):
3955
3949
input = torch .randint (1 , 10 , (2 , 8 , 4 , 4 ), dtype = torch .float32 , device = device )
3956
3950
model = nn .Sequential (nn .Conv2d (8 , 4 , 3 ), nn .BatchNorm2d (4 )).to (device ).float ()
@@ -3970,7 +3964,6 @@ def test_convert_conv2d_weight_memory_format(self, device):
3970
3964
self .assertTrue (out .is_contiguous (memory_format = memory_format ))
3971
3965
3972
3966
@onlyCUDA
3973
- @skipCUDAIfRocm
3974
3967
def test_convert_conv3d_weight_memory_format (self , device ):
3975
3968
input = torch .randint (
3976
3969
1 , 10 , (2 , 8 , 4 , 4 , 4 ), dtype = torch .float32 , device = device
0 commit comments