Skip to content

Commit cf389e5

Browse files
committedFeb 11, 2025
[NCCL] Upgrade TF NCCL version to 2.25.1
PiperOrigin-RevId: 725521622
1 parent f278fd0 commit cf389e5

19 files changed

+106
-70
lines changed
 

‎ci/official/requirements_updater/numpy1_requirements/requirements.in

+1-1
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ nvidia-cufft-cu12 == 11.2.3.61
4040
nvidia-curand-cu12 == 10.3.6.82
4141
nvidia-cusolver-cu12 == 11.6.3.83
4242
nvidia-cusparse-cu12 == 12.5.1.3
43-
nvidia-nccl-cu12 == 2.23.4
43+
nvidia-nccl-cu12 == 2.25.1
4444
nvidia-nvjitlink-cu12 == 12.5.82
4545
# The dependencies below are needed for TF wheel testing.
4646
tensorflow-io-gcs-filesystem==0.37.1

‎ci/official/requirements_updater/numpy1_requirements/requirements_lock_3_10.txt

+3-3
Original file line numberDiff line numberDiff line change
@@ -487,9 +487,9 @@ nvidia-cusparse-cu12==12.5.1.3 \
487487
# via
488488
# -r ci/official/requirements_updater/requirements.in
489489
# nvidia-cusolver-cu12
490-
nvidia-nccl-cu12==2.23.4 \
491-
--hash=sha256:aa946c8327e22ced28e7cef508a334673abc42064ec85f02d005ba1785ea4cec \
492-
--hash=sha256:b097258d9aab2fa9f686e33c6fe40ae57b27df60cedbd15d139701bb5509e0c1
490+
nvidia-nccl-cu12==2.25.1 \
491+
--hash=sha256:362aed5963fb9ea2ed2f264409baae30143498fd0e5c503aeaa1badd88cdc54a \
492+
--hash=sha256:4ab428bc915785cc66e8c57cb34c7a64cf739c46702b8db748b6ad6cc7180cf8
493493
# via -r ci/official/requirements_updater/requirements.in
494494
nvidia-nvjitlink-cu12==12.5.82 \
495495
--hash=sha256:98103729cc5226e13ca319a10bbf9433bbbd44ef64fe72f45f067cacc14b8d27 \

‎ci/official/requirements_updater/numpy1_requirements/requirements_lock_3_11.txt

+3-3
Original file line numberDiff line numberDiff line change
@@ -487,9 +487,9 @@ nvidia-cusparse-cu12==12.5.1.3 \
487487
# via
488488
# -r ci/official/requirements_updater/requirements.in
489489
# nvidia-cusolver-cu12
490-
nvidia-nccl-cu12==2.23.4 \
491-
--hash=sha256:aa946c8327e22ced28e7cef508a334673abc42064ec85f02d005ba1785ea4cec \
492-
--hash=sha256:b097258d9aab2fa9f686e33c6fe40ae57b27df60cedbd15d139701bb5509e0c1
490+
nvidia-nccl-cu12==2.25.1 \
491+
--hash=sha256:362aed5963fb9ea2ed2f264409baae30143498fd0e5c503aeaa1badd88cdc54a \
492+
--hash=sha256:4ab428bc915785cc66e8c57cb34c7a64cf739c46702b8db748b6ad6cc7180cf8
493493
# via -r ci/official/requirements_updater/requirements.in
494494
nvidia-nvjitlink-cu12==12.5.82 \
495495
--hash=sha256:98103729cc5226e13ca319a10bbf9433bbbd44ef64fe72f45f067cacc14b8d27 \

‎ci/official/requirements_updater/numpy1_requirements/requirements_lock_3_12.txt

+3-3
Original file line numberDiff line numberDiff line change
@@ -487,9 +487,9 @@ nvidia-cusparse-cu12==12.5.1.3 \
487487
# via
488488
# -r ci/official/requirements_updater/requirements.in
489489
# nvidia-cusolver-cu12
490-
nvidia-nccl-cu12==2.23.4 \
491-
--hash=sha256:aa946c8327e22ced28e7cef508a334673abc42064ec85f02d005ba1785ea4cec \
492-
--hash=sha256:b097258d9aab2fa9f686e33c6fe40ae57b27df60cedbd15d139701bb5509e0c1
490+
nvidia-nccl-cu12==2.25.1 \
491+
--hash=sha256:362aed5963fb9ea2ed2f264409baae30143498fd0e5c503aeaa1badd88cdc54a \
492+
--hash=sha256:4ab428bc915785cc66e8c57cb34c7a64cf739c46702b8db748b6ad6cc7180cf8
493493
# via -r ci/official/requirements_updater/requirements.in
494494
nvidia-nvjitlink-cu12==12.5.82 \
495495
--hash=sha256:98103729cc5226e13ca319a10bbf9433bbbd44ef64fe72f45f067cacc14b8d27 \

‎ci/official/requirements_updater/numpy1_requirements/requirements_lock_3_9.txt

+3-3
Original file line numberDiff line numberDiff line change
@@ -491,9 +491,9 @@ nvidia-cusparse-cu12==12.5.1.3 \
491491
# via
492492
# -r ci/official/requirements_updater/requirements.in
493493
# nvidia-cusolver-cu12
494-
nvidia-nccl-cu12==2.23.4 \
495-
--hash=sha256:aa946c8327e22ced28e7cef508a334673abc42064ec85f02d005ba1785ea4cec \
496-
--hash=sha256:b097258d9aab2fa9f686e33c6fe40ae57b27df60cedbd15d139701bb5509e0c1
494+
nvidia-nccl-cu12==2.25.1 \
495+
--hash=sha256:362aed5963fb9ea2ed2f264409baae30143498fd0e5c503aeaa1badd88cdc54a \
496+
--hash=sha256:4ab428bc915785cc66e8c57cb34c7a64cf739c46702b8db748b6ad6cc7180cf8
497497
# via -r ci/official/requirements_updater/requirements.in
498498
nvidia-nvjitlink-cu12==12.5.82 \
499499
--hash=sha256:98103729cc5226e13ca319a10bbf9433bbbd44ef64fe72f45f067cacc14b8d27 \

‎ci/official/requirements_updater/requirements.in

+1-1
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ nvidia-cufft-cu12 == 11.2.3.61
4040
nvidia-curand-cu12 == 10.3.6.82
4141
nvidia-cusolver-cu12 == 11.6.3.83
4242
nvidia-cusparse-cu12 == 12.5.1.3
43-
nvidia-nccl-cu12 == 2.23.4
43+
nvidia-nccl-cu12 == 2.25.1
4444
nvidia-nvjitlink-cu12 == 12.5.82
4545
# The dependencies below are needed for TF wheel testing.
4646
tensorflow-io-gcs-filesystem==0.37.1

‎requirements_lock_3_10.txt

+6-4
Original file line numberDiff line numberDiff line change
@@ -504,9 +504,9 @@ nvidia-cusparse-cu12==12.5.1.3 \
504504
# via
505505
# -r ci/official/requirements_updater/requirements.in
506506
# nvidia-cusolver-cu12
507-
nvidia-nccl-cu12==2.23.4 \
508-
--hash=sha256:aa946c8327e22ced28e7cef508a334673abc42064ec85f02d005ba1785ea4cec \
509-
--hash=sha256:b097258d9aab2fa9f686e33c6fe40ae57b27df60cedbd15d139701bb5509e0c1
507+
nvidia-nccl-cu12==2.25.1 \
508+
--hash=sha256:362aed5963fb9ea2ed2f264409baae30143498fd0e5c503aeaa1badd88cdc54a \
509+
--hash=sha256:4ab428bc915785cc66e8c57cb34c7a64cf739c46702b8db748b6ad6cc7180cf8
510510
# via -r ci/official/requirements_updater/requirements.in
511511
nvidia-nvjitlink-cu12==12.5.82 \
512512
--hash=sha256:98103729cc5226e13ca319a10bbf9433bbbd44ef64fe72f45f067cacc14b8d27 \
@@ -526,7 +526,9 @@ opt-einsum==3.3.0 \
526526
packaging==23.2 \
527527
--hash=sha256:048fb0e9405036518eaaf48a55953c750c11e1a1b68e0dd1a9d62ed0c092cfc5 \
528528
--hash=sha256:8c491190033a9af7e1d931d0b5dacc2ef47509b34dd0de67ed209b5203fc88c7
529-
# via -r ci/official/requirements_updater/requirements.in
529+
# via
530+
# -r ci/official/requirements_updater/requirements.in
531+
# auditwheel
530532
# tb-nightly
531533
portpicker==1.6.0 \
532534
--hash=sha256:b2787a41404cf7edbe29b07b9e0ed863b09f2665dcc01c1eb0c2261c1e7d0755 \

‎requirements_lock_3_11.txt

+3-3
Original file line numberDiff line numberDiff line change
@@ -504,9 +504,9 @@ nvidia-cusparse-cu12==12.5.1.3 \
504504
# via
505505
# -r ci/official/requirements_updater/requirements.in
506506
# nvidia-cusolver-cu12
507-
nvidia-nccl-cu12==2.23.4 \
508-
--hash=sha256:aa946c8327e22ced28e7cef508a334673abc42064ec85f02d005ba1785ea4cec \
509-
--hash=sha256:b097258d9aab2fa9f686e33c6fe40ae57b27df60cedbd15d139701bb5509e0c1
507+
nvidia-nccl-cu12==2.25.1 \
508+
--hash=sha256:362aed5963fb9ea2ed2f264409baae30143498fd0e5c503aeaa1badd88cdc54a \
509+
--hash=sha256:4ab428bc915785cc66e8c57cb34c7a64cf739c46702b8db748b6ad6cc7180cf8
510510
# via -r ci/official/requirements_updater/requirements.in
511511
nvidia-nvjitlink-cu12==12.5.82 \
512512
--hash=sha256:98103729cc5226e13ca319a10bbf9433bbbd44ef64fe72f45f067cacc14b8d27 \

‎requirements_lock_3_12.txt

+6-4
Original file line numberDiff line numberDiff line change
@@ -504,9 +504,9 @@ nvidia-cusparse-cu12==12.5.1.3 \
504504
# via
505505
# -r ci/official/requirements_updater/requirements.in
506506
# nvidia-cusolver-cu12
507-
nvidia-nccl-cu12==2.23.4 \
508-
--hash=sha256:aa946c8327e22ced28e7cef508a334673abc42064ec85f02d005ba1785ea4cec \
509-
--hash=sha256:b097258d9aab2fa9f686e33c6fe40ae57b27df60cedbd15d139701bb5509e0c1
507+
nvidia-nccl-cu12==2.25.1 \
508+
--hash=sha256:362aed5963fb9ea2ed2f264409baae30143498fd0e5c503aeaa1badd88cdc54a \
509+
--hash=sha256:4ab428bc915785cc66e8c57cb34c7a64cf739c46702b8db748b6ad6cc7180cf8
510510
# via -r ci/official/requirements_updater/requirements.in
511511
nvidia-nvjitlink-cu12==12.5.82 \
512512
--hash=sha256:98103729cc5226e13ca319a10bbf9433bbbd44ef64fe72f45f067cacc14b8d27 \
@@ -526,7 +526,9 @@ opt-einsum==3.3.0 \
526526
packaging==23.2 \
527527
--hash=sha256:048fb0e9405036518eaaf48a55953c750c11e1a1b68e0dd1a9d62ed0c092cfc5 \
528528
--hash=sha256:8c491190033a9af7e1d931d0b5dacc2ef47509b34dd0de67ed209b5203fc88c7
529-
# via -r ci/official/requirements_updater/requirements.in
529+
# via
530+
# -r ci/official/requirements_updater/requirements.in
531+
# auditwheel
530532
# tb-nightly
531533
portpicker==1.6.0 \
532534
--hash=sha256:b2787a41404cf7edbe29b07b9e0ed863b09f2665dcc01c1eb0c2261c1e7d0755 \

‎requirements_lock_3_9.txt

+6-4
Original file line numberDiff line numberDiff line change
@@ -500,9 +500,9 @@ nvidia-cusparse-cu12==12.5.1.3 \
500500
# via
501501
# -r ci/official/requirements_updater/requirements.in
502502
# nvidia-cusolver-cu12
503-
nvidia-nccl-cu12==2.23.4 \
504-
--hash=sha256:aa946c8327e22ced28e7cef508a334673abc42064ec85f02d005ba1785ea4cec \
505-
--hash=sha256:b097258d9aab2fa9f686e33c6fe40ae57b27df60cedbd15d139701bb5509e0c1
503+
nvidia-nccl-cu12==2.25.1 \
504+
--hash=sha256:362aed5963fb9ea2ed2f264409baae30143498fd0e5c503aeaa1badd88cdc54a \
505+
--hash=sha256:4ab428bc915785cc66e8c57cb34c7a64cf739c46702b8db748b6ad6cc7180cf8
506506
# via -r ci/official/requirements_updater/requirements.in
507507
nvidia-nvjitlink-cu12==12.5.82 \
508508
--hash=sha256:98103729cc5226e13ca319a10bbf9433bbbd44ef64fe72f45f067cacc14b8d27 \
@@ -522,7 +522,9 @@ opt-einsum==3.3.0 \
522522
packaging==23.2 \
523523
--hash=sha256:048fb0e9405036518eaaf48a55953c750c11e1a1b68e0dd1a9d62ed0c092cfc5 \
524524
--hash=sha256:8c491190033a9af7e1d931d0b5dacc2ef47509b34dd0de67ed209b5203fc88c7
525-
# via -r ci/official/requirements_updater/requirements.in
525+
# via
526+
# -r ci/official/requirements_updater/requirements.in
527+
# auditwheel
526528
# tb-nightly
527529
portpicker==1.6.0 \
528530
--hash=sha256:b2787a41404cf7edbe29b07b9e0ed863b09f2665dcc01c1eb0c2261c1e7d0755 \

‎tensorflow/tools/pip_package/setup.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,7 @@ def standard_or_nightly(standard, nightly):
159159
'nvidia-curand-cu12 == 10.3.6.82',
160160
'nvidia-cusolver-cu12 == 11.6.3.83',
161161
'nvidia-cusparse-cu12 == 12.5.1.3',
162-
'nvidia-nccl-cu12 == 2.23.4',
162+
'nvidia-nccl-cu12 == 2.25.1',
163163
'nvidia-nvjitlink-cu12 == 12.5.82',
164164
]
165165

‎tensorflow/workspace2.bzl

+3-3
Original file line numberDiff line numberDiff line change
@@ -526,9 +526,9 @@ def _tf_repositories():
526526
name = "nccl_archive",
527527
build_file = "//third_party:nccl/archive.BUILD",
528528
patch_file = ["//third_party/nccl:archive.patch"],
529-
sha256 = "6b946b70a9d2d01871842cbd15ec56488d358abe9a0f3767e372fddc3e241ba7",
530-
strip_prefix = "nccl-2.23.4-1",
531-
urls = tf_mirror_urls("https://github.com/nvidia/nccl/archive/v2.23.4-1.tar.gz"),
529+
sha256 = "7b154ad1f8ccafa795ed6696507d402b1b4ccac944c5fceb7f4e29b19a39cc47",
530+
strip_prefix = "nccl-2.25.1-1",
531+
urls = tf_mirror_urls("https://github.com/nvidia/nccl/archive/v2.25.1-1.tar.gz"),
532532
)
533533

534534
tf_http_archive(

‎third_party/gpus/cuda/hermetic/cuda_redist_versions.bzl

+6-6
Original file line numberDiff line numberDiff line change
@@ -133,14 +133,14 @@ CUDNN_REDIST_JSON_DICT = {
133133

134134
CUDA_12_NCCL_WHEEL_DICT = {
135135
"x86_64-unknown-linux-gnu": {
136-
"version": "2.23.4",
137-
"url": "https://files.pythonhosted.org/packages/ed/1f/6482380ec8dcec4894e7503490fc536d846b0d59694acad9cf99f27d0e7d/nvidia_nccl_cu12-2.23.4-py3-none-manylinux2014_x86_64.whl",
138-
"sha256": "b097258d9aab2fa9f686e33c6fe40ae57b27df60cedbd15d139701bb5509e0c1",
136+
"version": "2.25.1",
137+
"url": "https://files.pythonhosted.org/packages/11/0c/8c78b7603f4e685624a3ea944940f1e75f36d71bd6504330511f4a0e1557/nvidia_nccl_cu12-2.25.1-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl",
138+
"sha256": "362aed5963fb9ea2ed2f264409baae30143498fd0e5c503aeaa1badd88cdc54a",
139139
},
140140
"aarch64-unknown-linux-gnu": {
141-
"version": "2.23.4",
142-
"url": "https://files.pythonhosted.org/packages/c8/3a/0112397396dec37ffc8edd7836d48261b4d14ca60ec8ed7bc857cce1d916/nvidia_nccl_cu12-2.23.4-py3-none-manylinux2014_aarch64.whl",
143-
"sha256": "aa946c8327e22ced28e7cef508a334673abc42064ec85f02d005ba1785ea4cec",
141+
"version": "2.25.1",
142+
"url": "https://files.pythonhosted.org/packages/4b/28/f62adab24f2d4b2165b22145af56a7598ab535feb6ccd172f76b9106ebaa/nvidia_nccl_cu12-2.25.1-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl",
143+
"sha256": "4ab428bc915785cc66e8c57cb34c7a64cf739c46702b8db748b6ad6cc7180cf8",
144144
},
145145
}
146146

‎third_party/nccl/archive.BUILD

+2-2
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,9 @@ exports_files(["LICENSE.txt"])
2222

2323
NCCL_MAJOR = 2
2424

25-
NCCL_MINOR = 23
25+
NCCL_MINOR = 25
2626

27-
NCCL_PATCH = 4
27+
NCCL_PATCH = 1
2828

2929
NCCL_VERSION = NCCL_MAJOR * 10000 + NCCL_MINOR * 100 + NCCL_PATCH # e.g., 21605
3030

‎third_party/nccl/generated_names.bzl.tpl

+24-9
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@ GENERATED_SOURCES = [
66
"src/device/generated/all_reduce_minmax_f16.cu.cc",
77
"src/device/generated/all_reduce_minmax_f32.cu.cc",
88
"src/device/generated/all_reduce_minmax_f64.cu.cc",
9+
"src/device/generated/all_reduce_minmax_f8e4m3.cu.cc",
10+
"src/device/generated/all_reduce_minmax_f8e5m2.cu.cc",
911
"src/device/generated/all_reduce_minmax_i32.cu.cc",
1012
"src/device/generated/all_reduce_minmax_i64.cu.cc",
1113
"src/device/generated/all_reduce_minmax_u32.cu.cc",
@@ -15,23 +17,26 @@ GENERATED_SOURCES = [
1517
"src/device/generated/all_reduce_premulsum_f16.cu.cc",
1618
"src/device/generated/all_reduce_premulsum_f32.cu.cc",
1719
"src/device/generated/all_reduce_premulsum_f64.cu.cc",
20+
"src/device/generated/all_reduce_premulsum_f8e4m3.cu.cc",
21+
"src/device/generated/all_reduce_premulsum_f8e5m2.cu.cc",
1822
"src/device/generated/all_reduce_premulsum_u32.cu.cc",
1923
"src/device/generated/all_reduce_premulsum_u64.cu.cc",
2024
"src/device/generated/all_reduce_premulsum_u8.cu.cc",
2125
"src/device/generated/all_reduce_prod_bf16.cu.cc",
2226
"src/device/generated/all_reduce_prod_f16.cu.cc",
2327
"src/device/generated/all_reduce_prod_f32.cu.cc",
2428
"src/device/generated/all_reduce_prod_f64.cu.cc",
29+
"src/device/generated/all_reduce_prod_f8e4m3.cu.cc",
30+
"src/device/generated/all_reduce_prod_f8e5m2.cu.cc",
2531
"src/device/generated/all_reduce_prod_u32.cu.cc",
2632
"src/device/generated/all_reduce_prod_u64.cu.cc",
2733
"src/device/generated/all_reduce_prod_u8.cu.cc",
2834
"src/device/generated/all_reduce_sum_bf16.cu.cc",
2935
"src/device/generated/all_reduce_sum_f16.cu.cc",
3036
"src/device/generated/all_reduce_sum_f32.cu.cc",
3137
"src/device/generated/all_reduce_sum_f64.cu.cc",
32-
"src/device/generated/all_reduce_sumpostdiv_i32.cu.cc",
33-
"src/device/generated/all_reduce_sumpostdiv_i64.cu.cc",
34-
"src/device/generated/all_reduce_sumpostdiv_i8.cu.cc",
38+
"src/device/generated/all_reduce_sum_f8e4m3.cu.cc",
39+
"src/device/generated/all_reduce_sum_f8e5m2.cu.cc",
3540
"src/device/generated/all_reduce_sumpostdiv_u32.cu.cc",
3641
"src/device/generated/all_reduce_sumpostdiv_u64.cu.cc",
3742
"src/device/generated/all_reduce_sumpostdiv_u8.cu.cc",
@@ -46,20 +51,26 @@ GENERATED_SOURCES = [
4651
"src/device/generated/reduce_minmax_f16.cu.cc",
4752
"src/device/generated/reduce_minmax_f32.cu.cc",
4853
"src/device/generated/reduce_minmax_f64.cu.cc",
54+
"src/device/generated/reduce_minmax_f8e4m3.cu.cc",
55+
"src/device/generated/reduce_minmax_f8e5m2.cu.cc",
4956
"src/device/generated/reduce_minmax_u32.cu.cc",
5057
"src/device/generated/reduce_minmax_u64.cu.cc",
5158
"src/device/generated/reduce_minmax_u8.cu.cc",
5259
"src/device/generated/reduce_premulsum_bf16.cu.cc",
5360
"src/device/generated/reduce_premulsum_f16.cu.cc",
5461
"src/device/generated/reduce_premulsum_f32.cu.cc",
5562
"src/device/generated/reduce_premulsum_f64.cu.cc",
63+
"src/device/generated/reduce_premulsum_f8e4m3.cu.cc",
64+
"src/device/generated/reduce_premulsum_f8e5m2.cu.cc",
5665
"src/device/generated/reduce_premulsum_u32.cu.cc",
5766
"src/device/generated/reduce_premulsum_u64.cu.cc",
5867
"src/device/generated/reduce_premulsum_u8.cu.cc",
5968
"src/device/generated/reduce_prod_bf16.cu.cc",
6069
"src/device/generated/reduce_prod_f16.cu.cc",
6170
"src/device/generated/reduce_prod_f32.cu.cc",
6271
"src/device/generated/reduce_prod_f64.cu.cc",
72+
"src/device/generated/reduce_prod_f8e4m3.cu.cc",
73+
"src/device/generated/reduce_prod_f8e5m2.cu.cc",
6374
"src/device/generated/reduce_prod_u32.cu.cc",
6475
"src/device/generated/reduce_prod_u64.cu.cc",
6576
"src/device/generated/reduce_prod_u8.cu.cc",
@@ -68,6 +79,8 @@ GENERATED_SOURCES = [
6879
"src/device/generated/reduce_scatter_minmax_f16.cu.cc",
6980
"src/device/generated/reduce_scatter_minmax_f32.cu.cc",
7081
"src/device/generated/reduce_scatter_minmax_f64.cu.cc",
82+
"src/device/generated/reduce_scatter_minmax_f8e4m3.cu.cc",
83+
"src/device/generated/reduce_scatter_minmax_f8e5m2.cu.cc",
7184
"src/device/generated/reduce_scatter_minmax_i32.cu.cc",
7285
"src/device/generated/reduce_scatter_minmax_i64.cu.cc",
7386
"src/device/generated/reduce_scatter_minmax_u32.cu.cc",
@@ -77,23 +90,26 @@ GENERATED_SOURCES = [
7790
"src/device/generated/reduce_scatter_premulsum_f16.cu.cc",
7891
"src/device/generated/reduce_scatter_premulsum_f32.cu.cc",
7992
"src/device/generated/reduce_scatter_premulsum_f64.cu.cc",
93+
"src/device/generated/reduce_scatter_premulsum_f8e4m3.cu.cc",
94+
"src/device/generated/reduce_scatter_premulsum_f8e5m2.cu.cc",
8095
"src/device/generated/reduce_scatter_premulsum_u32.cu.cc",
8196
"src/device/generated/reduce_scatter_premulsum_u64.cu.cc",
8297
"src/device/generated/reduce_scatter_premulsum_u8.cu.cc",
8398
"src/device/generated/reduce_scatter_prod_bf16.cu.cc",
8499
"src/device/generated/reduce_scatter_prod_f16.cu.cc",
85100
"src/device/generated/reduce_scatter_prod_f32.cu.cc",
86101
"src/device/generated/reduce_scatter_prod_f64.cu.cc",
102+
"src/device/generated/reduce_scatter_prod_f8e4m3.cu.cc",
103+
"src/device/generated/reduce_scatter_prod_f8e5m2.cu.cc",
87104
"src/device/generated/reduce_scatter_prod_u32.cu.cc",
88105
"src/device/generated/reduce_scatter_prod_u64.cu.cc",
89106
"src/device/generated/reduce_scatter_prod_u8.cu.cc",
90107
"src/device/generated/reduce_scatter_sum_bf16.cu.cc",
91108
"src/device/generated/reduce_scatter_sum_f16.cu.cc",
92109
"src/device/generated/reduce_scatter_sum_f32.cu.cc",
93110
"src/device/generated/reduce_scatter_sum_f64.cu.cc",
94-
"src/device/generated/reduce_scatter_sumpostdiv_i32.cu.cc",
95-
"src/device/generated/reduce_scatter_sumpostdiv_i64.cu.cc",
96-
"src/device/generated/reduce_scatter_sumpostdiv_i8.cu.cc",
111+
"src/device/generated/reduce_scatter_sum_f8e4m3.cu.cc",
112+
"src/device/generated/reduce_scatter_sum_f8e5m2.cu.cc",
97113
"src/device/generated/reduce_scatter_sumpostdiv_u32.cu.cc",
98114
"src/device/generated/reduce_scatter_sumpostdiv_u64.cu.cc",
99115
"src/device/generated/reduce_scatter_sumpostdiv_u8.cu.cc",
@@ -104,9 +120,8 @@ GENERATED_SOURCES = [
104120
"src/device/generated/reduce_sum_f16.cu.cc",
105121
"src/device/generated/reduce_sum_f32.cu.cc",
106122
"src/device/generated/reduce_sum_f64.cu.cc",
107-
"src/device/generated/reduce_sumpostdiv_i32.cu.cc",
108-
"src/device/generated/reduce_sumpostdiv_i64.cu.cc",
109-
"src/device/generated/reduce_sumpostdiv_i8.cu.cc",
123+
"src/device/generated/reduce_sum_f8e4m3.cu.cc",
124+
"src/device/generated/reduce_sum_f8e5m2.cu.cc",
110125
"src/device/generated/reduce_sumpostdiv_u32.cu.cc",
111126
"src/device/generated/reduce_sumpostdiv_u64.cu.cc",
112127
"src/device/generated/reduce_sumpostdiv_u8.cu.cc",

‎third_party/xla/third_party/tsl/third_party/gpus/cuda/hermetic/cuda_redist_versions.bzl

+6-6
Original file line numberDiff line numberDiff line change
@@ -133,14 +133,14 @@ CUDNN_REDIST_JSON_DICT = {
133133

134134
CUDA_12_NCCL_WHEEL_DICT = {
135135
"x86_64-unknown-linux-gnu": {
136-
"version": "2.23.4",
137-
"url": "https://files.pythonhosted.org/packages/ed/1f/6482380ec8dcec4894e7503490fc536d846b0d59694acad9cf99f27d0e7d/nvidia_nccl_cu12-2.23.4-py3-none-manylinux2014_x86_64.whl",
138-
"sha256": "b097258d9aab2fa9f686e33c6fe40ae57b27df60cedbd15d139701bb5509e0c1",
136+
"version": "2.25.1",
137+
"url": "https://files.pythonhosted.org/packages/11/0c/8c78b7603f4e685624a3ea944940f1e75f36d71bd6504330511f4a0e1557/nvidia_nccl_cu12-2.25.1-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl",
138+
"sha256": "362aed5963fb9ea2ed2f264409baae30143498fd0e5c503aeaa1badd88cdc54a",
139139
},
140140
"aarch64-unknown-linux-gnu": {
141-
"version": "2.23.4",
142-
"url": "https://files.pythonhosted.org/packages/c8/3a/0112397396dec37ffc8edd7836d48261b4d14ca60ec8ed7bc857cce1d916/nvidia_nccl_cu12-2.23.4-py3-none-manylinux2014_aarch64.whl",
143-
"sha256": "aa946c8327e22ced28e7cef508a334673abc42064ec85f02d005ba1785ea4cec",
141+
"version": "2.25.1",
142+
"url": "https://files.pythonhosted.org/packages/4b/28/f62adab24f2d4b2165b22145af56a7598ab535feb6ccd172f76b9106ebaa/nvidia_nccl_cu12-2.25.1-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl",
143+
"sha256": "4ab428bc915785cc66e8c57cb34c7a64cf739c46702b8db748b6ad6cc7180cf8",
144144
},
145145
}
146146

‎third_party/xla/third_party/tsl/third_party/nccl/archive.BUILD

+2-2
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,9 @@ exports_files(["LICENSE.txt"])
2222

2323
NCCL_MAJOR = 2
2424

25-
NCCL_MINOR = 23
25+
NCCL_MINOR = 25
2626

27-
NCCL_PATCH = 4
27+
NCCL_PATCH = 1
2828

2929
NCCL_VERSION = NCCL_MAJOR * 10000 + NCCL_MINOR * 100 + NCCL_PATCH # e.g., 21605
3030

‎third_party/xla/third_party/tsl/third_party/nccl/generated_names.bzl.tpl

+24-9
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@ GENERATED_SOURCES = [
66
"src/device/generated/all_reduce_minmax_f16.cu.cc",
77
"src/device/generated/all_reduce_minmax_f32.cu.cc",
88
"src/device/generated/all_reduce_minmax_f64.cu.cc",
9+
"src/device/generated/all_reduce_minmax_f8e4m3.cu.cc",
10+
"src/device/generated/all_reduce_minmax_f8e5m2.cu.cc",
911
"src/device/generated/all_reduce_minmax_i32.cu.cc",
1012
"src/device/generated/all_reduce_minmax_i64.cu.cc",
1113
"src/device/generated/all_reduce_minmax_u32.cu.cc",
@@ -15,23 +17,26 @@ GENERATED_SOURCES = [
1517
"src/device/generated/all_reduce_premulsum_f16.cu.cc",
1618
"src/device/generated/all_reduce_premulsum_f32.cu.cc",
1719
"src/device/generated/all_reduce_premulsum_f64.cu.cc",
20+
"src/device/generated/all_reduce_premulsum_f8e4m3.cu.cc",
21+
"src/device/generated/all_reduce_premulsum_f8e5m2.cu.cc",
1822
"src/device/generated/all_reduce_premulsum_u32.cu.cc",
1923
"src/device/generated/all_reduce_premulsum_u64.cu.cc",
2024
"src/device/generated/all_reduce_premulsum_u8.cu.cc",
2125
"src/device/generated/all_reduce_prod_bf16.cu.cc",
2226
"src/device/generated/all_reduce_prod_f16.cu.cc",
2327
"src/device/generated/all_reduce_prod_f32.cu.cc",
2428
"src/device/generated/all_reduce_prod_f64.cu.cc",
29+
"src/device/generated/all_reduce_prod_f8e4m3.cu.cc",
30+
"src/device/generated/all_reduce_prod_f8e5m2.cu.cc",
2531
"src/device/generated/all_reduce_prod_u32.cu.cc",
2632
"src/device/generated/all_reduce_prod_u64.cu.cc",
2733
"src/device/generated/all_reduce_prod_u8.cu.cc",
2834
"src/device/generated/all_reduce_sum_bf16.cu.cc",
2935
"src/device/generated/all_reduce_sum_f16.cu.cc",
3036
"src/device/generated/all_reduce_sum_f32.cu.cc",
3137
"src/device/generated/all_reduce_sum_f64.cu.cc",
32-
"src/device/generated/all_reduce_sumpostdiv_i32.cu.cc",
33-
"src/device/generated/all_reduce_sumpostdiv_i64.cu.cc",
34-
"src/device/generated/all_reduce_sumpostdiv_i8.cu.cc",
38+
"src/device/generated/all_reduce_sum_f8e4m3.cu.cc",
39+
"src/device/generated/all_reduce_sum_f8e5m2.cu.cc",
3540
"src/device/generated/all_reduce_sumpostdiv_u32.cu.cc",
3641
"src/device/generated/all_reduce_sumpostdiv_u64.cu.cc",
3742
"src/device/generated/all_reduce_sumpostdiv_u8.cu.cc",
@@ -46,20 +51,26 @@ GENERATED_SOURCES = [
4651
"src/device/generated/reduce_minmax_f16.cu.cc",
4752
"src/device/generated/reduce_minmax_f32.cu.cc",
4853
"src/device/generated/reduce_minmax_f64.cu.cc",
54+
"src/device/generated/reduce_minmax_f8e4m3.cu.cc",
55+
"src/device/generated/reduce_minmax_f8e5m2.cu.cc",
4956
"src/device/generated/reduce_minmax_u32.cu.cc",
5057
"src/device/generated/reduce_minmax_u64.cu.cc",
5158
"src/device/generated/reduce_minmax_u8.cu.cc",
5259
"src/device/generated/reduce_premulsum_bf16.cu.cc",
5360
"src/device/generated/reduce_premulsum_f16.cu.cc",
5461
"src/device/generated/reduce_premulsum_f32.cu.cc",
5562
"src/device/generated/reduce_premulsum_f64.cu.cc",
63+
"src/device/generated/reduce_premulsum_f8e4m3.cu.cc",
64+
"src/device/generated/reduce_premulsum_f8e5m2.cu.cc",
5665
"src/device/generated/reduce_premulsum_u32.cu.cc",
5766
"src/device/generated/reduce_premulsum_u64.cu.cc",
5867
"src/device/generated/reduce_premulsum_u8.cu.cc",
5968
"src/device/generated/reduce_prod_bf16.cu.cc",
6069
"src/device/generated/reduce_prod_f16.cu.cc",
6170
"src/device/generated/reduce_prod_f32.cu.cc",
6271
"src/device/generated/reduce_prod_f64.cu.cc",
72+
"src/device/generated/reduce_prod_f8e4m3.cu.cc",
73+
"src/device/generated/reduce_prod_f8e5m2.cu.cc",
6374
"src/device/generated/reduce_prod_u32.cu.cc",
6475
"src/device/generated/reduce_prod_u64.cu.cc",
6576
"src/device/generated/reduce_prod_u8.cu.cc",
@@ -68,6 +79,8 @@ GENERATED_SOURCES = [
6879
"src/device/generated/reduce_scatter_minmax_f16.cu.cc",
6980
"src/device/generated/reduce_scatter_minmax_f32.cu.cc",
7081
"src/device/generated/reduce_scatter_minmax_f64.cu.cc",
82+
"src/device/generated/reduce_scatter_minmax_f8e4m3.cu.cc",
83+
"src/device/generated/reduce_scatter_minmax_f8e5m2.cu.cc",
7184
"src/device/generated/reduce_scatter_minmax_i32.cu.cc",
7285
"src/device/generated/reduce_scatter_minmax_i64.cu.cc",
7386
"src/device/generated/reduce_scatter_minmax_u32.cu.cc",
@@ -77,23 +90,26 @@ GENERATED_SOURCES = [
7790
"src/device/generated/reduce_scatter_premulsum_f16.cu.cc",
7891
"src/device/generated/reduce_scatter_premulsum_f32.cu.cc",
7992
"src/device/generated/reduce_scatter_premulsum_f64.cu.cc",
93+
"src/device/generated/reduce_scatter_premulsum_f8e4m3.cu.cc",
94+
"src/device/generated/reduce_scatter_premulsum_f8e5m2.cu.cc",
8095
"src/device/generated/reduce_scatter_premulsum_u32.cu.cc",
8196
"src/device/generated/reduce_scatter_premulsum_u64.cu.cc",
8297
"src/device/generated/reduce_scatter_premulsum_u8.cu.cc",
8398
"src/device/generated/reduce_scatter_prod_bf16.cu.cc",
8499
"src/device/generated/reduce_scatter_prod_f16.cu.cc",
85100
"src/device/generated/reduce_scatter_prod_f32.cu.cc",
86101
"src/device/generated/reduce_scatter_prod_f64.cu.cc",
102+
"src/device/generated/reduce_scatter_prod_f8e4m3.cu.cc",
103+
"src/device/generated/reduce_scatter_prod_f8e5m2.cu.cc",
87104
"src/device/generated/reduce_scatter_prod_u32.cu.cc",
88105
"src/device/generated/reduce_scatter_prod_u64.cu.cc",
89106
"src/device/generated/reduce_scatter_prod_u8.cu.cc",
90107
"src/device/generated/reduce_scatter_sum_bf16.cu.cc",
91108
"src/device/generated/reduce_scatter_sum_f16.cu.cc",
92109
"src/device/generated/reduce_scatter_sum_f32.cu.cc",
93110
"src/device/generated/reduce_scatter_sum_f64.cu.cc",
94-
"src/device/generated/reduce_scatter_sumpostdiv_i32.cu.cc",
95-
"src/device/generated/reduce_scatter_sumpostdiv_i64.cu.cc",
96-
"src/device/generated/reduce_scatter_sumpostdiv_i8.cu.cc",
111+
"src/device/generated/reduce_scatter_sum_f8e4m3.cu.cc",
112+
"src/device/generated/reduce_scatter_sum_f8e5m2.cu.cc",
97113
"src/device/generated/reduce_scatter_sumpostdiv_u32.cu.cc",
98114
"src/device/generated/reduce_scatter_sumpostdiv_u64.cu.cc",
99115
"src/device/generated/reduce_scatter_sumpostdiv_u8.cu.cc",
@@ -104,9 +120,8 @@ GENERATED_SOURCES = [
104120
"src/device/generated/reduce_sum_f16.cu.cc",
105121
"src/device/generated/reduce_sum_f32.cu.cc",
106122
"src/device/generated/reduce_sum_f64.cu.cc",
107-
"src/device/generated/reduce_sumpostdiv_i32.cu.cc",
108-
"src/device/generated/reduce_sumpostdiv_i64.cu.cc",
109-
"src/device/generated/reduce_sumpostdiv_i8.cu.cc",
123+
"src/device/generated/reduce_sum_f8e4m3.cu.cc",
124+
"src/device/generated/reduce_sum_f8e5m2.cu.cc",
110125
"src/device/generated/reduce_sumpostdiv_u32.cu.cc",
111126
"src/device/generated/reduce_sumpostdiv_u64.cu.cc",
112127
"src/device/generated/reduce_sumpostdiv_u8.cu.cc",

‎third_party/xla/tsl_workspace2.bzl

+3-3
Original file line numberDiff line numberDiff line change
@@ -391,9 +391,9 @@ def _tf_repositories():
391391
name = "nccl_archive",
392392
build_file = "//third_party:nccl/archive.BUILD",
393393
patch_file = ["@local_tsl//third_party/nccl:archive.patch"],
394-
sha256 = "6b946b70a9d2d01871842cbd15ec56488d358abe9a0f3767e372fddc3e241ba7",
395-
strip_prefix = "nccl-2.23.4-1",
396-
urls = tf_mirror_urls("https://github.com/nvidia/nccl/archive/v2.23.4-1.tar.gz"),
394+
sha256 = "7b154ad1f8ccafa795ed6696507d402b1b4ccac944c5fceb7f4e29b19a39cc47",
395+
strip_prefix = "nccl-2.25.1-1",
396+
urls = tf_mirror_urls("https://github.com/nvidia/nccl/archive/v2.25.1-1.tar.gz"),
397397
)
398398

399399
tf_http_archive(

0 commit comments

Comments
 (0)
Please sign in to comment.