File tree Expand file tree Collapse file tree 3 files changed +5
-5
lines changed Expand file tree Collapse file tree 3 files changed +5
-5
lines changed Original file line number Diff line number Diff line change 33
33
version = 2 ,
34
34
)
35
35
36
+ # only 128 group_size is supported
36
37
FP8_ACT_CONFIG = Float8DynamicActivationInt4WeightConfig (
37
- group_size = 128 ,
38
38
packing_format = "preshuffled" ,
39
39
)
40
40
Original file line number Diff line number Diff line change @@ -1927,7 +1927,7 @@ def test_quantize_api_fp8_int4(self):
1927
1927
quantize_(model, QATConfig(Float8DynamicActivationInt4WeightConfig(), step="convert"))
1928
1928
"""
1929
1929
self ._test_quantize_api_against_ptq (
1930
- Float8DynamicActivationInt4WeightConfig (group_size = 128 ),
1930
+ Float8DynamicActivationInt4WeightConfig (),
1931
1931
target_prepare_sqnr = 15 ,
1932
1932
target_convert_sqnr = float ("inf" ),
1933
1933
)
Original file line number Diff line number Diff line change @@ -1156,13 +1156,13 @@ def _int4_weight_only_transform(
1156
1156
class Float8DynamicActivationInt4WeightConfig (AOBaseConfig ):
1157
1157
"""Configuration for apply float8 dynamic per row quantization and int4
1158
1158
per group weight quantization to linear
1159
+ (only group_size 128 is supported right now since underlying kernel used only supports 128
1160
+ and above and no benefits of making it bigger)
1159
1161
1160
1162
Args:
1161
- `group_size`: group size for groupwise quantization for weight
1162
1163
`packing_format`: how the weight is packed, only preshuffled is supported
1163
1164
"""
1164
1165
1165
- group_size : int = 128
1166
1166
packing_format : PackingFormat = "preshuffled"
1167
1167
1168
1168
@@ -1174,13 +1174,13 @@ def _float8_dynamic_activation_int4_weight_transform(
1174
1174
"applying int8 weight only quant requires module to have weight attribute"
1175
1175
+ " but {module} does not have one"
1176
1176
)
1177
- group_size = config .group_size
1178
1177
packing_format = config .packing_format
1179
1178
1180
1179
assert packing_format == "preshuffled" , (
1181
1180
f"only preshuffled packing_format supported right now, got: { packing_format } "
1182
1181
)
1183
1182
weight = module .weight
1183
+ group_size = 128
1184
1184
block_size = tuple ([1 for _ in range (weight .ndim - 1 )] + [group_size ])
1185
1185
new_weight = Int4PreshuffledTensor .from_hp (
1186
1186
module .weight ,
You can’t perform that action at this time.
0 commit comments