@@ -6,6 +6,8 @@ GENERATED_SOURCES = [
6
6
"src/device/generated/all_reduce_minmax_f16.cu.cc",
7
7
"src/device/generated/all_reduce_minmax_f32.cu.cc",
8
8
"src/device/generated/all_reduce_minmax_f64.cu.cc",
9
+ "src/device/generated/all_reduce_minmax_f8e4m3.cu.cc",
10
+ "src/device/generated/all_reduce_minmax_f8e5m2.cu.cc",
9
11
"src/device/generated/all_reduce_minmax_i32.cu.cc",
10
12
"src/device/generated/all_reduce_minmax_i64.cu.cc",
11
13
"src/device/generated/all_reduce_minmax_u32.cu.cc",
@@ -15,23 +17,26 @@ GENERATED_SOURCES = [
15
17
"src/device/generated/all_reduce_premulsum_f16.cu.cc",
16
18
"src/device/generated/all_reduce_premulsum_f32.cu.cc",
17
19
"src/device/generated/all_reduce_premulsum_f64.cu.cc",
20
+ "src/device/generated/all_reduce_premulsum_f8e4m3.cu.cc",
21
+ "src/device/generated/all_reduce_premulsum_f8e5m2.cu.cc",
18
22
"src/device/generated/all_reduce_premulsum_u32.cu.cc",
19
23
"src/device/generated/all_reduce_premulsum_u64.cu.cc",
20
24
"src/device/generated/all_reduce_premulsum_u8.cu.cc",
21
25
"src/device/generated/all_reduce_prod_bf16.cu.cc",
22
26
"src/device/generated/all_reduce_prod_f16.cu.cc",
23
27
"src/device/generated/all_reduce_prod_f32.cu.cc",
24
28
"src/device/generated/all_reduce_prod_f64.cu.cc",
29
+ "src/device/generated/all_reduce_prod_f8e4m3.cu.cc",
30
+ "src/device/generated/all_reduce_prod_f8e5m2.cu.cc",
25
31
"src/device/generated/all_reduce_prod_u32.cu.cc",
26
32
"src/device/generated/all_reduce_prod_u64.cu.cc",
27
33
"src/device/generated/all_reduce_prod_u8.cu.cc",
28
34
"src/device/generated/all_reduce_sum_bf16.cu.cc",
29
35
"src/device/generated/all_reduce_sum_f16.cu.cc",
30
36
"src/device/generated/all_reduce_sum_f32.cu.cc",
31
37
"src/device/generated/all_reduce_sum_f64.cu.cc",
32
- "src/device/generated/all_reduce_sumpostdiv_i32.cu.cc",
33
- "src/device/generated/all_reduce_sumpostdiv_i64.cu.cc",
34
- "src/device/generated/all_reduce_sumpostdiv_i8.cu.cc",
38
+ "src/device/generated/all_reduce_sum_f8e4m3.cu.cc",
39
+ "src/device/generated/all_reduce_sum_f8e5m2.cu.cc",
35
40
"src/device/generated/all_reduce_sumpostdiv_u32.cu.cc",
36
41
"src/device/generated/all_reduce_sumpostdiv_u64.cu.cc",
37
42
"src/device/generated/all_reduce_sumpostdiv_u8.cu.cc",
@@ -46,20 +51,26 @@ GENERATED_SOURCES = [
46
51
"src/device/generated/reduce_minmax_f16.cu.cc",
47
52
"src/device/generated/reduce_minmax_f32.cu.cc",
48
53
"src/device/generated/reduce_minmax_f64.cu.cc",
54
+ "src/device/generated/reduce_minmax_f8e4m3.cu.cc",
55
+ "src/device/generated/reduce_minmax_f8e5m2.cu.cc",
49
56
"src/device/generated/reduce_minmax_u32.cu.cc",
50
57
"src/device/generated/reduce_minmax_u64.cu.cc",
51
58
"src/device/generated/reduce_minmax_u8.cu.cc",
52
59
"src/device/generated/reduce_premulsum_bf16.cu.cc",
53
60
"src/device/generated/reduce_premulsum_f16.cu.cc",
54
61
"src/device/generated/reduce_premulsum_f32.cu.cc",
55
62
"src/device/generated/reduce_premulsum_f64.cu.cc",
63
+ "src/device/generated/reduce_premulsum_f8e4m3.cu.cc",
64
+ "src/device/generated/reduce_premulsum_f8e5m2.cu.cc",
56
65
"src/device/generated/reduce_premulsum_u32.cu.cc",
57
66
"src/device/generated/reduce_premulsum_u64.cu.cc",
58
67
"src/device/generated/reduce_premulsum_u8.cu.cc",
59
68
"src/device/generated/reduce_prod_bf16.cu.cc",
60
69
"src/device/generated/reduce_prod_f16.cu.cc",
61
70
"src/device/generated/reduce_prod_f32.cu.cc",
62
71
"src/device/generated/reduce_prod_f64.cu.cc",
72
+ "src/device/generated/reduce_prod_f8e4m3.cu.cc",
73
+ "src/device/generated/reduce_prod_f8e5m2.cu.cc",
63
74
"src/device/generated/reduce_prod_u32.cu.cc",
64
75
"src/device/generated/reduce_prod_u64.cu.cc",
65
76
"src/device/generated/reduce_prod_u8.cu.cc",
@@ -68,6 +79,8 @@ GENERATED_SOURCES = [
68
79
"src/device/generated/reduce_scatter_minmax_f16.cu.cc",
69
80
"src/device/generated/reduce_scatter_minmax_f32.cu.cc",
70
81
"src/device/generated/reduce_scatter_minmax_f64.cu.cc",
82
+ "src/device/generated/reduce_scatter_minmax_f8e4m3.cu.cc",
83
+ "src/device/generated/reduce_scatter_minmax_f8e5m2.cu.cc",
71
84
"src/device/generated/reduce_scatter_minmax_i32.cu.cc",
72
85
"src/device/generated/reduce_scatter_minmax_i64.cu.cc",
73
86
"src/device/generated/reduce_scatter_minmax_u32.cu.cc",
@@ -77,23 +90,26 @@ GENERATED_SOURCES = [
77
90
"src/device/generated/reduce_scatter_premulsum_f16.cu.cc",
78
91
"src/device/generated/reduce_scatter_premulsum_f32.cu.cc",
79
92
"src/device/generated/reduce_scatter_premulsum_f64.cu.cc",
93
+ "src/device/generated/reduce_scatter_premulsum_f8e4m3.cu.cc",
94
+ "src/device/generated/reduce_scatter_premulsum_f8e5m2.cu.cc",
80
95
"src/device/generated/reduce_scatter_premulsum_u32.cu.cc",
81
96
"src/device/generated/reduce_scatter_premulsum_u64.cu.cc",
82
97
"src/device/generated/reduce_scatter_premulsum_u8.cu.cc",
83
98
"src/device/generated/reduce_scatter_prod_bf16.cu.cc",
84
99
"src/device/generated/reduce_scatter_prod_f16.cu.cc",
85
100
"src/device/generated/reduce_scatter_prod_f32.cu.cc",
86
101
"src/device/generated/reduce_scatter_prod_f64.cu.cc",
102
+ "src/device/generated/reduce_scatter_prod_f8e4m3.cu.cc",
103
+ "src/device/generated/reduce_scatter_prod_f8e5m2.cu.cc",
87
104
"src/device/generated/reduce_scatter_prod_u32.cu.cc",
88
105
"src/device/generated/reduce_scatter_prod_u64.cu.cc",
89
106
"src/device/generated/reduce_scatter_prod_u8.cu.cc",
90
107
"src/device/generated/reduce_scatter_sum_bf16.cu.cc",
91
108
"src/device/generated/reduce_scatter_sum_f16.cu.cc",
92
109
"src/device/generated/reduce_scatter_sum_f32.cu.cc",
93
110
"src/device/generated/reduce_scatter_sum_f64.cu.cc",
94
- "src/device/generated/reduce_scatter_sumpostdiv_i32.cu.cc",
95
- "src/device/generated/reduce_scatter_sumpostdiv_i64.cu.cc",
96
- "src/device/generated/reduce_scatter_sumpostdiv_i8.cu.cc",
111
+ "src/device/generated/reduce_scatter_sum_f8e4m3.cu.cc",
112
+ "src/device/generated/reduce_scatter_sum_f8e5m2.cu.cc",
97
113
"src/device/generated/reduce_scatter_sumpostdiv_u32.cu.cc",
98
114
"src/device/generated/reduce_scatter_sumpostdiv_u64.cu.cc",
99
115
"src/device/generated/reduce_scatter_sumpostdiv_u8.cu.cc",
@@ -104,9 +120,8 @@ GENERATED_SOURCES = [
104
120
"src/device/generated/reduce_sum_f16.cu.cc",
105
121
"src/device/generated/reduce_sum_f32.cu.cc",
106
122
"src/device/generated/reduce_sum_f64.cu.cc",
107
- "src/device/generated/reduce_sumpostdiv_i32.cu.cc",
108
- "src/device/generated/reduce_sumpostdiv_i64.cu.cc",
109
- "src/device/generated/reduce_sumpostdiv_i8.cu.cc",
123
+ "src/device/generated/reduce_sum_f8e4m3.cu.cc",
124
+ "src/device/generated/reduce_sum_f8e5m2.cu.cc",
110
125
"src/device/generated/reduce_sumpostdiv_u32.cu.cc",
111
126
"src/device/generated/reduce_sumpostdiv_u64.cu.cc",
112
127
"src/device/generated/reduce_sumpostdiv_u8.cu.cc",
0 commit comments