Skip to content

Commit 098678f

Browse files
committed
add warning when quant config exists but quantization fails
1 parent 81287ba commit 098678f

File tree

2 files changed

+13
-9
lines changed

2 files changed

+13
-9
lines changed

src/dnet/core/models/base.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,7 @@ def _abskey_to_local_path(self, key: str) -> Optional[str]:
226226

227227
def apply_quantization_from_config(
228228
self, model_config: Any, model_metadata: Any
229-
) -> bool:
229+
) -> Tuple[bool, bool]:
230230
"""Quantize using a simple MLX-style predicate with optional per-path overrides.
231231
232232
- If config["quantization"][path] exists, use that for this path.
@@ -408,15 +408,17 @@ def _predicate(path: str, module: nn.Module):
408408
)
409409
except Exception:
410410
self._converted_to_quantized = False
411-
return False
411+
if g_bits != 0 and g_group != 0:
412+
return (True, False)
413+
return (False, False)
412414
self._converted_to_quantized = True
413-
return True
415+
return (True, True)
414416
except Exception:
415417
try:
416418
self._converted_to_quantized = False
417419
except Exception:
418420
pass
419-
return False
421+
return (False, False)
420422

421423
@staticmethod
422424
def _shrink_linear_like(mod) -> None:

src/dnet/shard/runtime.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -201,12 +201,14 @@ def load_model_core(self, req: ShardLoadModelRequest) -> None:
201201
is_api_layer=False,
202202
)
203203
try:
204-
applied = bool(
205-
self.model.apply_quantization_from_config(
206-
self.model_metadata.model_config,
207-
model_metadata=self.model_metadata,
208-
)
204+
is_quant, applied = self.model.apply_quantization_from_config(
205+
self.model_metadata.model_config,
206+
model_metadata=self.model_metadata,
209207
)
208+
if is_quant and not applied:
209+
logger.warning(
210+
f"Failed to quantize what appears to be a quantized model."
211+
)
210212
logger.info(
211213
"[QUANT] runtime=%s applied=%s model=%s",
212214
self.shard_id,

0 commit comments

Comments
 (0)