File tree Expand file tree Collapse file tree 1 file changed +5
-4
lines changed
Expand file tree Collapse file tree 1 file changed +5
-4
lines changed Original file line number Diff line number Diff line change @@ -206,17 +206,18 @@ def load_model_core(self, req: ShardLoadModelRequest) -> None:
206206 model_metadata = self .model_metadata ,
207207 )
208208 if is_quant and not applied :
209- logger .warning (
210- "Failed to quantize what appears to be a quantized model."
211- )
209+ raise RuntimeError ("apply_quantization_from_config failed." )
212210 logger .info (
213211 "[QUANT] runtime=%s applied=%s model=%s" ,
214212 self .shard_id ,
215213 applied ,
216214 self .model_metadata .model_type ,
217215 )
218216 except RuntimeError as e :
219- logger .warning ("[QUANT] apply failed: %s" , e )
217+ logger .error (
218+ f"[QUANT] Failed to quantize what appears to be a quantized model: { e } "
219+ )
220+ raise
220221
221222 self .model .eval ()
222223 self .cache = make_cache (
You can’t perform that action at this time.
0 commit comments