File tree Expand file tree Collapse file tree 2 files changed +13
-9
lines changed
Expand file tree Collapse file tree 2 files changed +13
-9
lines changed Original file line number Diff line number Diff line change @@ -226,7 +226,7 @@ def _abskey_to_local_path(self, key: str) -> Optional[str]:
226226
227227 def apply_quantization_from_config (
228228 self , model_config : Any , model_metadata : Any
229- ) -> bool :
229+ ) -> Tuple [ bool , bool ] :
230230 """Quantize using a simple MLX-style predicate with optional per-path overrides.
231231
232232 - If config["quantization"][path] exists, use that for this path.
@@ -408,15 +408,17 @@ def _predicate(path: str, module: nn.Module):
408408 )
409409 except Exception :
410410 self ._converted_to_quantized = False
411- return False
411+ if g_bits != 0 and g_group != 0 :
412+ return (True , False )
413+ return (False , False )
412414 self ._converted_to_quantized = True
413- return True
415+ return ( True , True )
414416 except Exception :
415417 try :
416418 self ._converted_to_quantized = False
417419 except Exception :
418420 pass
419- return False
421+ return ( False , False )
420422
421423 @staticmethod
422424 def _shrink_linear_like (mod ) -> None :
Original file line number Diff line number Diff line change @@ -201,12 +201,14 @@ def load_model_core(self, req: ShardLoadModelRequest) -> None:
201201 is_api_layer = False ,
202202 )
203203 try :
204- applied = bool (
205- self .model .apply_quantization_from_config (
206- self .model_metadata .model_config ,
207- model_metadata = self .model_metadata ,
208- )
204+ is_quant , applied = self .model .apply_quantization_from_config (
205+ self .model_metadata .model_config ,
206+ model_metadata = self .model_metadata ,
209207 )
208+ if is_quant and not applied :
209+ logger .warning (
210+ f"Failed to quantize what appears to be a quantized model."
211+ )
210212 logger .info (
211213 "[QUANT] runtime=%s applied=%s model=%s" ,
212214 self .shard_id ,
You can’t perform that action at this time.
0 commit comments