Skip to content

Commit 21c0d6b

Browse files
committed
Fix several integer-signedness warnings
1 parent 033d9ef commit 21c0d6b

15 files changed

+44
-38
lines changed

include/cute/numeric/integral_constant.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -451,7 +451,7 @@ template <class... Ts>
451451
constexpr uint64_t parse_int_digits(uint64_t result, int digit, Ts... digits)
452452
{
453453
if constexpr (sizeof...(Ts) == 0) {
454-
return 10 * result + digit;
454+
return 10 * result + uint64_t(digit);
455455
} else {
456456
return parse_int_digits(10 * result + digit, digits...);
457457
}

include/cute/numeric/math.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@ bit_width(T x) {
151151
(numeric_limits<T>::digits == 8 ? 3 : (assert(false),0)))));
152152
T r = 0;
153153
for (int i = N - 1; i >= 0; --i) {
154-
T shift = (x > ((T(1) << (T(1) << i))-1)) << i;
154+
T shift = T(x > ((T(1) << (T(1) << i))-1)) << i;
155155
x >>= shift;
156156
r |= shift;
157157
}

include/cutlass/arch/mma_sm60.h

+6-6
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ struct Mma<
8484

8585
#else
8686
CUTLASS_PRAGMA_UNROLL
87-
for (int i = 0; i < 2; ++i) {
87+
for (size_t i = 0; i < 2; ++i) {
8888
d[i] = a[i] * b[0] + c[i];
8989
}
9090
#endif
@@ -130,7 +130,7 @@ struct Mma<
130130

131131
#else
132132
CUTLASS_PRAGMA_UNROLL
133-
for (int i = 0; i < 2; ++i) {
133+
for (size_t i = 0; i < 2; ++i) {
134134
d[i] = a[0] * b[i] + c[i];
135135
}
136136
#endif
@@ -182,9 +182,9 @@ struct Mma <
182182

183183
#else
184184
CUTLASS_PRAGMA_UNROLL
185-
for (int j = 0; j < 2; ++j) {
185+
for (size_t j = 0; j < 2; ++j) {
186186
CUTLASS_PRAGMA_UNROLL
187-
for (int i = 0; i < 2; ++i) {
187+
for (size_t i = 0; i < 2; ++i) {
188188
d[i + 2 * j] = a[i] * b[j] + c[i + 2 * j];
189189
}
190190
}
@@ -236,9 +236,9 @@ struct Mma<
236236
D[1] = reinterpret_cast<Array<half_t, 2> &>(Dhi);
237237
#else
238238
CUTLASS_PRAGMA_UNROLL
239-
for (int i = 0; i < 2; ++i) {
239+
for (size_t i = 0; i < 2; ++i) {
240240
CUTLASS_PRAGMA_UNROLL
241-
for (int j = 0; j < 2; ++j) {
241+
for (size_t j = 0; j < 2; ++j) {
242242
d[i * 2 + j] = a[i] * b[j] + c[i * 2 + j];
243243
}
244244
}

include/cutlass/arch/mma_sm61.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ struct Mma<
8282
d[0] = c[0];
8383

8484
CUTLASS_PRAGMA_UNROLL
85-
for (int k = 0; k < 4; ++k) {
85+
for (size_t k = 0; k < 4; ++k) {
8686
d[0] += a[k] * b[k];
8787
}
8888

@@ -129,7 +129,7 @@ struct Mma<
129129
d[0] = c[0];
130130

131131
CUTLASS_PRAGMA_UNROLL
132-
for (int k = 0; k < 2; ++k) {
132+
for (size_t k = 0; k < 2; ++k) {
133133
d[0] += a[k] * b[k];
134134
}
135135
#endif

include/cutlass/epilogue/warp/tile_iterator_tensor_op_mixed.h

+4-4
Original file line numberDiff line numberDiff line change
@@ -384,7 +384,7 @@ class TileIteratorTensorOpMixed<WarpShape_, OperatorShape_, int32_t, 32, OutputS
384384
TensorRef const &ref,
385385
unsigned lane_id
386386
):
387-
stride_(ref.stride()[0] / AccessType::kElements) {
387+
stride_(size_t(ref.stride()[0]) / AccessType::kElements) {
388388

389389
int quad_id = (lane_id / Detail::kLanesInQuad);
390390
int lane_in_quad = (lane_id % Detail::kLanesInQuad);
@@ -579,7 +579,7 @@ class TileIteratorTensorOpMixed<WarpShape_, OperatorShape_, int32_t, 32, OutputS
579579
TensorRef const &ref,
580580
unsigned lane_id
581581
):
582-
stride_(ref.stride()[0] / AccessType::kElements) {
582+
stride_(size_t(ref.stride()[0]) / AccessType::kElements) {
583583

584584
int quad_id = (lane_id / Detail::kLanesInQuad);
585585
int lane_in_quad = (lane_id % Detail::kLanesInQuad);
@@ -774,7 +774,7 @@ class TileIteratorTensorOpMixed<WarpShape_, OperatorShape_, float, 32, 8, 16, 8>
774774
TensorRef const &ref,
775775
unsigned lane_id
776776
):
777-
stride_(ref.stride()[0] / AccessType::kElements) {
777+
stride_(size_t(ref.stride()[0]) / AccessType::kElements) {
778778

779779
int quad_id = (lane_id / Detail::kLanesInQuad);
780780
int lane_in_quad = (lane_id % Detail::kLanesInQuad);
@@ -963,7 +963,7 @@ class TileIteratorTensorOpMixed<WarpShape_, OperatorShape_, float, 32, 8, 8, 8>
963963
TensorRef const &ref,
964964
unsigned lane_id
965965
):
966-
stride_(ref.stride()[0] / AccessType::kElements) {
966+
stride_(size_t(ref.stride()[0]) / AccessType::kElements) {
967967

968968
int quad_id = (lane_id / Detail::kLanesInQuad);
969969
int lane_in_quad = (lane_id % Detail::kLanesInQuad);

include/cutlass/fast_math.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,7 @@ CUTLASS_CONSTEXPR_IF_CXX17
242242
value_t find_log2(value_t x) {
243243
int a = int(31 - clz(x));
244244
a += (x & (x - 1)) != 0; // Round up, add 1 if not a power of 2.
245-
return a;
245+
return value_t(a);
246246
}
247247

248248

@@ -364,7 +364,7 @@ struct FastDivmod {
364364
FastDivmod(int divisor): divisor(divisor) {
365365

366366
if (divisor != 1) {
367-
unsigned int p = 31 + find_log2(divisor);
367+
auto p = static_cast<unsigned int>(31 + find_log2(divisor));
368368
unsigned m = unsigned(((1ull << p) + unsigned(divisor) - 1) / unsigned(divisor));
369369

370370
multiplier = m;

include/cutlass/gemm/kernel/tile_scheduler_params.h

+3-3
Original file line numberDiff line numberDiff line change
@@ -958,9 +958,9 @@ struct PersistentTileSchedulerSm90StreamKParams {
958958
uint32_t epilogue_subtile = 1,
959959
uint32_t num_accumulator_mtxs = 1) {
960960

961-
auto log_swizzle_size = UnderlyingParams::get_log_swizzle_size(problem_blocks.x, problem_blocks.y, max_swizzle);
962-
problem_blocks.x = round_up(problem_blocks.x, (1 << log_swizzle_size) * cluster_shape.m());
963-
problem_blocks.y = round_up(problem_blocks.y, (1 << log_swizzle_size) * cluster_shape.n());
961+
auto log_swizzle_size = UnderlyingParams::get_log_swizzle_size(int(problem_blocks.x), int(problem_blocks.y), max_swizzle);
962+
problem_blocks.x = unsigned(round_up(int(problem_blocks.x), (1 << log_swizzle_size) * cluster_shape.m()));
963+
problem_blocks.y = unsigned(round_up(int(problem_blocks.y), (1 << log_swizzle_size) * cluster_shape.n()));
964964

965965
// Workspace is needed only for output tiles that will be split. Thus, we first determine the number
966966
// of output tiles that will be split, and then calculate the workspace needed to cover these.

include/cutlass/gemm/threadblock/threadblock_swizzle.h

+8-2
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,10 @@ struct GemmIdentityThreadblockSwizzle {
109109
CUTLASS_HOST_DEVICE
110110
static dim3 get_grid_shape(GemmCoord tiled_shape) {
111111
int tile = 1 << get_log_tile(tiled_shape);
112-
return dim3(tiled_shape.m() * tile, (tiled_shape.n() + tile - 1) / tile, tiled_shape.k());
112+
return dim3(
113+
unsigned(tiled_shape.m() * tile),
114+
unsigned((tiled_shape.n() + tile - 1) / tile),
115+
unsigned(tiled_shape.k()));
113116
}
114117

115118
/// Calculates optimal swizzle width
@@ -301,7 +304,10 @@ struct GemmSplitKIdentityThreadblockSwizzle {
301304
CUTLASS_HOST_DEVICE
302305
static dim3 get_grid_shape(GemmCoord tiled_shape) {
303306
int tile = 1 << get_log_tile(tiled_shape);
304-
return dim3(tiled_shape.m() * tile, (tiled_shape.n() + tile - 1) / tile, tiled_shape.k());
307+
return dim3(
308+
unsigned(tiled_shape.m() * tile),
309+
unsigned((tiled_shape.n() + tile - 1) / tile),
310+
unsigned(tiled_shape.k()));
305311
}
306312

307313
/// Obtains the threadblock offset (in units of threadblock-scoped tiles)

include/cutlass/gemm/threadblock/threadblock_swizzle_streamk.h

+3-3
Original file line numberDiff line numberDiff line change
@@ -437,9 +437,9 @@ struct ThreadblockSwizzleStreamK {
437437
batch_count);
438438

439439
size_t problem_bytes =
440-
(element_C_bytes_ * problem_size.m() * problem_size.n()) +
441-
(element_A_bytes_ * problem_size.m() * problem_size.k()) +
442-
(element_B_bytes_ * problem_size.k() * problem_size.n());
440+
(element_C_bytes_ * size_t(problem_size.m()) * size_t(problem_size.n())) +
441+
(element_A_bytes_ * size_t(problem_size.m()) * size_t(problem_size.k())) +
442+
(element_B_bytes_ * size_t(problem_size.k()) * size_t(problem_size.n()));
443443

444444
size_t problem_flops = size_t(problem_size.m()) * size_t(problem_size.n()) * size_t(problem_size.k()) * 2;
445445

include/cutlass/gemm/warp/mma_tensor_op.h

+4-4
Original file line numberDiff line numberDiff line change
@@ -99,8 +99,8 @@ struct ConvertAndPack<bfloat16_t, float, N, Round> {
9999
Array<float, N> tmp;
100100

101101
CUTLASS_PRAGMA_UNROLL
102-
for (int i = 0; i < N; ++i) {
103-
int idx = (((i << 1) & 2) | ((i >> 1) & 1) | (i & 0xfffffffc));
102+
for (uint32_t i = 0; i < N; ++i) {
103+
uint32_t idx = (((i << 1) & 2) | ((i >> 1) & 1) | (i & 0xfffffffc));
104104
tmp[i] = source[idx];
105105
}
106106

@@ -120,8 +120,8 @@ struct ConvertAndPack<half_t, float, N, Round> {
120120
Array<float, N> tmp;
121121

122122
CUTLASS_PRAGMA_UNROLL
123-
for (int i = 0; i < N; ++i) {
124-
int idx = (((i << 1) & 2) | ((i >> 1) & 1) | (i & 0xfffffffc));
123+
for (uint32_t i = 0; i < N; ++i) {
124+
uint32_t idx = (((i << 1) & 2) | ((i >> 1) & 1) | (i & 0xfffffffc));
125125
tmp[i] = source[idx];
126126
}
127127

include/cutlass/half.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -214,7 +214,7 @@ struct alignas(2) half_t {
214214
#endif
215215

216216
uint16_t sign = uint16_t((s >> 16) & 0x8000);
217-
int16_t exp = uint16_t(((s >> 23) & 0xff) - 127);
217+
auto exp = int16_t(((s >> 23) & 0xff) - 127);
218218
int mantissa = s & 0x7fffff;
219219
uint16_t u = 0;
220220

@@ -238,7 +238,7 @@ struct alignas(2) half_t {
238238

239239
if (exp >= -14) {
240240
// normal fp32 to normal fp16
241-
exp = uint16_t(exp + uint16_t(15));
241+
exp = static_cast<decltype(exp)>(exp + 15);
242242
u = uint16_t(((exp & 0x1f) << 10));
243243
u = uint16_t(u | (mantissa >> 13));
244244
} else {

include/cutlass/layout/permute.h

+4-4
Original file line numberDiff line numberDiff line change
@@ -318,7 +318,7 @@ class Tensor4DPermuteBMM0213RowMajor : public PermuteBase {
318318
LongIndex operator()(MatrixCoord coord) const {
319319

320320
// The batch index for BMM
321-
Index BMM_batch_idx = blockIdx.z;
321+
auto BMM_batch_idx = Index(blockIdx.z);
322322

323323
// [i,j,k,l] -> [i,k,j,l]
324324
Index l = coord.column();
@@ -381,7 +381,7 @@ class Tensor4DPermuteBMM0213RowMajorInverse : public PermuteBase {
381381
LongIndex operator()(MatrixCoord coord) const {
382382

383383
// The batch index for BMM
384-
Index BMM_batch_idx = blockIdx.z;
384+
auto BMM_batch_idx = Index(blockIdx.z);
385385

386386
// The following assumes grouping [(D0)->batch, (D2)->row, (D1,D3)->col]
387387
Index l = coord.column() % D3_;
@@ -453,7 +453,7 @@ class Tensor4DPermuteBMM0321ColumnMajor : public PermuteBase {
453453
CUTLASS_HOST_DEVICE
454454
LongIndex operator()(MatrixCoord coord) const {
455455

456-
Index BMM_batch_idx = blockIdx.z;
456+
auto BMM_batch_idx = Index(blockIdx.z);
457457

458458
// [i,j,k,l] -> [i,k,j,l]
459459
Index l = coord.column();
@@ -514,7 +514,7 @@ class Tensor4DPermuteBMM0321ColumnMajorInverse : public PermuteBase {
514514
CUTLASS_HOST_DEVICE
515515
LongIndex operator()(MatrixCoord coord) const {
516516

517-
Index BMM_batch_idx = blockIdx.z;
517+
auto BMM_batch_idx = Index(blockIdx.z);
518518

519519
// The following assumes grouping [(D0)->batch, (D1,D2)->row, (D3)->col]
520520
Index l = coord.column();

include/cutlass/predicate_vector.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -159,8 +159,8 @@ struct PredicateVector {
159159
int byte = (idx / kPredicatesPerByte);
160160
int bit_offset = (idx % kPredicatesPerByte);
161161

162-
word = byte / sizeof(Storage);
163-
int byte_offset = (byte % sizeof(Storage));
162+
word = byte / int(sizeof(Storage));
163+
int byte_offset = (byte % int(sizeof(Storage)));
164164

165165
bit = byte_offset * 8 + bit_offset + kPredicateStart;
166166
}

test/unit/gemm/device/testbed.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,7 @@ struct Testbed {
156156
else if (dist_kind == cutlass::Distribution::Sequential) {
157157

158158
cutlass::reference::host::BlockFillSequential(
159-
view.data(), view.capacity());
159+
view.data(), int64_t(view.capacity()));
160160
}
161161
else {
162162
EXPECT_TRUE(false) << "Not implemented";

test/unit/gemm/device/testbed_universal.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,7 @@ struct TestbedUniversal {
133133
else if (dist_kind == cutlass::Distribution::Sequential) {
134134

135135
cutlass::reference::host::BlockFillSequential(
136-
view.data(), view.capacity());
136+
view.data(), int64_t(view.capacity()));
137137
}
138138
else {
139139
EXPECT_TRUE(false) << "Not implemented";

0 commit comments

Comments
 (0)