Skip to content

Commit 4c478b6

Browse files
author
yyzhang
committed
[update]update ver:3.0.7
1 parent 3ec30cd commit 4c478b6

40 files changed

Lines changed: 2914 additions & 470 deletions

doc/tutorial/release.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,4 +17,7 @@
1717
# V3.0.6 2026.03.26
1818
* 解决linger.init之后加载浮点模型dict不匹配问题
1919
* 解决avgpool算子一致性问题
20-
* 解决gru算子一致性问题
20+
* 解决gru算子一致性问题
21+
# V3.0.7 2026.04.30
22+
* 解决venusA和arcs平台gru一致性问题;
23+
* 重构导图后处理代码,增加图优化和常量折叠功能;

linger/__version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,5 +8,5 @@ def _to_int(s):
88
return s
99

1010

11-
__version__ = "3.0.6"
11+
__version__ = "3.0.7"
1212
version_info = tuple(_to_int(s) for s in __version__.split("."))

linger/constrain/clayernorm.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@ def ccreate(
2020
module.normalized_shape,
2121
module.eps,
2222
module.elementwise_affine,
23-
None if module.bias is None else True,
2423
dtype=module.weight.dtype,
2524
device=device,
2625
constrain=constrain,

linger/initialize.py

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -85,29 +85,31 @@ def quant_module(module: nn.Module, c_activation_val: float = 8.0, c_weight_val:
8585
m.output_quantizer.data_bits = out_bits
8686
m.output_quantizer.clamp_activation_value = c_activation_val
8787

88-
def constrain(model: nn.Module, config_file: str = None, disable_module=None, disable_submodel=[]):
88+
def constrain(model: nn.Module, config_file: str = None, disable_module=None, disable_submodel=None):
8989
c_configs = QUANT_CONFIGS
9090
if config_file is not None:
9191
c_configs._load_from_yaml(config_file)
92+
disabled_submodels = [] if disable_submodel is None else list(disable_submodel)
9293

9394
if disable_module is not None:
9495
for name in disable_module:
9596
if _CMODULE_TABLE.get(name, None) is not None:
9697
_CMODULE_TABLE.pop(name)
9798

9899
for name, m in model.named_modules():
99-
if any(name.startswith(p) for p in disable_submodel): # disable_submodel 直接按照module的名称进行屏蔽
100+
if any(name.startswith(p) for p in disabled_submodels): # disable_submodel 直接按照module的名称进行屏蔽
100101
continue
101102
_constrain_submodule(model, name, m, c_configs.clamp_info.to_dict())
102103

103104
model.to(c_configs.device)
104105
return model
105106

106-
def init(model: nn.Module, config_file: str = None, disable_module=None, disable_submodel=[]):
107+
def init(model: nn.Module, config_file: str = None, disable_module=None, disable_submodel=None):
107108

108109
q_configs = QUANT_CONFIGS
109110
if config_file is not None:
110111
q_configs._load_from_yaml(config_file)
112+
disabled_submodels = [] if disable_submodel is None else list(disable_submodel)
111113

112114
if disable_module is not None:
113115
for name in disable_module:
@@ -118,15 +120,15 @@ def init(model: nn.Module, config_file: str = None, disable_module=None, disable
118120
# model = _replace_ops(traced_model, q_configs)
119121

120122
for name, m in model.named_modules():
121-
if any(name.startswith(p) for p in disable_submodel): # disable_submodel 直接按照module的名称进行量化屏蔽
123+
if any(name.startswith(p) for p in disabled_submodels): # disable_submodel 直接按照module的名称进行量化屏蔽
122124
continue
123125

124126
m.register_forward_pre_hook(hook_pre_forward)
125127
m.register_forward_hook(hook_forward)
126128

127129
is_replaced = _quantize_submodule(model, name, m, weights_cfg=q_configs.quant_info.to_dict(), activations_cfg=q_configs.quant_info.to_dict(), bias_cfg=q_configs.quant_info.to_dict(), constrain = q_configs.clamp_info.to_dict())
128130
if is_replaced:
129-
disable_submodel.append(name)
131+
disabled_submodels.append(name)
130132

131133
def quant_tensor_pre_hook(state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs):
132134
quantizer_tags = (
@@ -192,6 +194,14 @@ def quant_tensor_layer(module, prefix=''):
192194
iq_layer.training = model.training
193195
# iq_layer = iq_layer.to(device)
194196
setattr(module, input_name, iq_layer)
197+
elif '_qgelu_' in input_name:
198+
iq_layer = QGelu(activate_config=activate_cfg, num_input=1)
199+
iq_layer.training = model.training
200+
setattr(module, input_name, iq_layer)
201+
elif '_qswish_' in input_name:
202+
iq_layer = QSwish(activate_config=activate_cfg, num_input=1)
203+
iq_layer.training = model.training
204+
setattr(module, input_name, iq_layer)
195205
elif '_qsoftmax_' in input_name:
196206
iq_layer = QSoftmax(activate_config=activate_cfg, num_input=1)
197207
iq_layer.training = model.training

linger/kernel/cpu/venusa_qsigmoid_kernel.cpp

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,21 @@
1111
((x) > MAX_BITS(bits) ? MAX_BITS(bits) \
1212
: ((x) < MIN_BITS(bits) ? MIN_BITS(bits) : (x)))
1313

14+
static int64_t shfit_floor_x05_int64(int64_t x, int32_t shift)
15+
{
16+
int64_t val = x;
17+
18+
if (shift >= 64) {
19+
return 0;
20+
}
21+
if (shift > 0) {
22+
val = val >> (shift - 1);
23+
val = (val & 0x1) + (val >> 1);
24+
}
25+
26+
return val;
27+
}
28+
1429
torch::Tensor venusa_qsigmoid_cpu(torch::Tensor a)
1530
{
1631
int32_t N = a.numel();
@@ -29,7 +44,6 @@ torch::Tensor venusa_qsigmoid_cpu(torch::Tensor a)
2944
int64_t absx = 0;
3045
int64_t slope = 0;
3146
int64_t bias = 0;
32-
int32_t shift = 0;
3347
int64_t tmp = 0;
3448
int64_t out = 0;
3549

@@ -73,8 +87,8 @@ torch::Tensor venusa_qsigmoid_cpu(torch::Tensor a)
7387
bias = 0;
7488
}
7589
}
76-
bias = bias << 3;
77-
out = ((slope * tmp) >> 27) + bias;
90+
out = slope * tmp + (bias << 30);
91+
out = shfit_floor_x05_int64(out, 27);
7892
c_ptr[i] = SATURATE(out, 32);
7993
}
8094

linger/kernel/cpu/venusa_qsoftmax_kernel.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ static int32_t shift_pure(int64_t v, int32_t s)
2121
} else {
2222
v = v >> (-s);
2323
}
24-
return SATURATE(v, 32);
24+
return v;
2525
}
2626

2727
static int32_t sub32s(int32_t a, int32_t b)
@@ -150,7 +150,7 @@ torch::Tensor venusa_qsoftmax_cpu(const torch::Tensor& in, int64_t dim)
150150
for (int i = 0; i < N; i++)
151151
{
152152
data = sub32s(x_ptr[i], max_value);
153-
X = shfit_floor_x05_int64((int64_t)X * (int64_t)774541002, 31);
153+
X = shfit_floor_x05_int64((int64_t)data * (int64_t)774541002, 31);
154154
// X = shift_rasyms((int64_t)data * (int64_t)774541002, -31);//exp=>2xp,Q6.25=>Q8.23
155155
E = X >> 23;
156156
E = E + 1;//与118行对应
@@ -201,4 +201,3 @@ torch::Tensor venusa_qsoftmax_cpu(const torch::Tensor& in, int64_t dim)
201201

202202
return out;
203203
}
204-

linger/kernel/cpu/venusa_qtanh_kernel.cpp

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,21 @@
1111
((x) > MAX_BITS(bits) ? MAX_BITS(bits) \
1212
: ((x) < MIN_BITS(bits) ? MIN_BITS(bits) : (x)))
1313

14+
static int64_t shfit_floor_x05_int64(int64_t x, int32_t shift)
15+
{
16+
int64_t val = x;
17+
18+
if (shift >= 64) {
19+
return 0;
20+
}
21+
if (shift > 0) {
22+
val = val >> (shift - 1);
23+
val = (val & 0x1) + (val >> 1);
24+
}
25+
26+
return val;
27+
}
28+
1429
torch::Tensor venusa_qtanh_cpu(torch::Tensor a)
1530
{
1631
int32_t N = a.numel();
@@ -28,7 +43,6 @@ torch::Tensor venusa_qtanh_cpu(torch::Tensor a)
2843
int64_t absx = 0;
2944
int64_t slope = 0;
3045
int64_t bias = 0;
31-
int32_t shift = 0;
3246
int64_t tmp = 0;
3347
int64_t out = 0;
3448

@@ -68,16 +82,15 @@ torch::Tensor venusa_qtanh_cpu(torch::Tensor a)
6882
bias = 0;
6983
}
7084
}
71-
72-
bias = bias << 3;
7385
if (1 == sign)
7486
{
75-
out = ((-1 * slope * absx) >> 27) - bias;
87+
out = (-slope * absx) - (bias << 30);
7688
}
7789
else
7890
{
79-
out = ((slope * absx) >> 27) + bias;
91+
out = (slope * absx) + (bias << 30);
8092
}
93+
out = shfit_floor_x05_int64(out, 27);
8194

8295
c_ptr[i] = SATURATE(out, 32);
8396
}

linger/kernel/gpu/fake_quant_kernel.cu

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ std::tuple<torch::Tensor, torch::Tensor, float> fake_quant_cuda(
4444
// printf("到位置 2 了 \n");
4545
// 计算 scale
4646
float f = (float)(bit - 1) - factor;
47-
float scale = powf(2.0f, roundf(f));
47+
float scale = powf(2.0f, fminf(fmaxf(roundf(f), 0.0f), 23.0f));
4848
// scale = fminf(fmaxf(scale, 1e-6f), powf(2.0f, 32));
4949
// printf("到位置 1 了,scale:%f \n", scale);
5050
auto output = torch::empty_like(input);
@@ -84,7 +84,7 @@ std::tuple<torch::Tensor, torch::Tensor, float> fake_quant_cuda(
8484
std::tuple<torch::Tensor, torch::Tensor> bias_quant_cuda(
8585
torch::Tensor input,
8686
int bit,
87-
float scale,
87+
float scale, // 这里scale是在python代码里进行clamp到0-31的
8888
float scale_min,
8989
float quant_min,
9090
float quant_max) {
@@ -175,7 +175,7 @@ std::tuple<torch::Tensor, torch::Tensor, torch::Tensor, float> fake_quant_cuda_w
175175
// printf("到位置 2 了 \n");
176176
// 计算 scale
177177
float f = (float)(bit - 1) - factor;
178-
float scale = powf(2.0f, roundf(f));
178+
float scale = powf(2.0f, fminf(fmaxf(roundf(f), 0.0f), 23.0f));
179179
// scale = fminf(fmaxf(scale, 1e-6f), powf(2.0f, 32));
180180
// printf("到位置 1 了,scale:%f \n", scale);
181181
auto output = torch::empty_like(input);
@@ -218,7 +218,7 @@ std::tuple<torch::Tensor, torch::Tensor, torch::Tensor, float> fake_quant_cuda_w
218218
std::tuple<torch::Tensor, torch::Tensor, torch::Tensor> bias_quant_cuda_with_grad_scale(
219219
torch::Tensor input,
220220
int bit,
221-
float scale,
221+
float scale, // 这里的scale是在python里clamp的
222222
float scale_min,
223223
float quant_min,
224224
float quant_max) {

linger/kernel/gpu/venusa_qsigmoid_kernel.cu

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,21 @@
1010
((x) > MAX_BITS(bits) ? MAX_BITS(bits) \
1111
: ((x) < MIN_BITS(bits) ? MIN_BITS(bits) : (x)))
1212

13+
static __device__ int64_t shfit_floor_x05_int64(int64_t x, int32_t shift)
14+
{
15+
int64_t val = x;
16+
17+
if (shift >= 64) {
18+
return 0;
19+
}
20+
if (shift > 0) {
21+
val = val >> (shift - 1);
22+
val = (val & 0x1) + (val >> 1);
23+
}
24+
25+
return val;
26+
}
27+
1328
__global__ void venusa_qsigmoid_gpu_kernel(const int* __restrict__ a,
1429
int* __restrict__ c,
1530
int32_t len, uint32_t* bands , uint32_t* slopes,
@@ -21,7 +36,6 @@ __global__ void venusa_qsigmoid_gpu_kernel(const int* __restrict__ a,
2136
int64_t absx = 0;
2237
int64_t slope = 0;
2338
int64_t bias = 0;
24-
int32_t shift = 0;
2539
int64_t tmp = 0;
2640
int64_t out = 0;
2741

@@ -65,12 +79,11 @@ __global__ void venusa_qsigmoid_gpu_kernel(const int* __restrict__ a,
6579
{
6680
slope = 0;
6781
bias = 0;
68-
shift = 0;
6982
}
7083
}
7184

72-
bias = bias << 3;
73-
out = ((slope * tmp) >> 27) + bias;
85+
out = slope * tmp + (bias << 30);
86+
out = shfit_floor_x05_int64(out, 27);
7487
c[idx] = SATURATE(out, 32);
7588
}
7689
}
@@ -111,4 +124,3 @@ torch::Tensor venusa_qsigmoid_gpu(torch::Tensor a)
111124

112125
return c;
113126
}
114-

linger/kernel/gpu/venusa_qsoftmax_kernel.cu

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ static __device__ int32_t shift_pure(int64_t v, int32_t s)
2222
} else {
2323
v = v >> (-s);
2424
}
25-
return SATURATE(v, 32);
25+
return v;
2626
}
2727

2828
static __device__ int32_t sub32s(int32_t a, int32_t b)
@@ -98,6 +98,7 @@ static __device__ int64_t shfit_floor_x05_int64(int64_t x, int32_t shift)
9898
static __device__ void venusa_qsoftmax_c(const int* __restrict__ x_ptr, int* __restrict__ y_ptr, int32_t N, int32_t* p23)
9999
{
100100
int32_t max_value = x_ptr[0];
101+
int32_t data = 0;
101102
int32_t X = 0;
102103
int64_t Y = 0;
103104
int32_t E = 0;
@@ -112,8 +113,8 @@ static __device__ void venusa_qsoftmax_c(const int* __restrict__ x_ptr, int* __r
112113

113114
for (int i = 0; i < N; i++)
114115
{
115-
X = sub32s(x_ptr[i] ,max_value);
116-
X = shfit_floor_x05_int64((int64_t)X * (int64_t)774541002, 31);
116+
data = sub32s(x_ptr[i] ,max_value);
117+
X = shfit_floor_x05_int64((int64_t)data * (int64_t)774541002, 31);
117118
// X = shift_rasyms((int64_t)X * (int64_t)774541002,-31);
118119
E = X >> 23;
119120
E = E + 1;
@@ -207,4 +208,3 @@ torch::Tensor venusa_qsoftmax_gpu(const torch::Tensor& in, int64_t dim)
207208

208209
return out;
209210
}
210-

0 commit comments

Comments
 (0)