add warning when quant config exists but quantization fails

GandalfTea · GandalfTea · commit 098678f7a5a2 · 2025-11-27T01:30:43.000-08:00
diff --git a/src/dnet/core/models/base.py b/src/dnet/core/models/base.py
@@ -226,7 +226,7 @@ def _abskey_to_local_path(self, key: str) -> Optional[str]:
 
     def apply_quantization_from_config(
         self, model_config: Any, model_metadata: Any
-    ) -> bool:
+    ) -> Tuple[bool, bool]:
         """Quantize using a simple MLX-style predicate with optional per-path overrides.
 
         - If config["quantization"][path] exists, use that for this path.
@@ -408,15 +408,17 @@ def _predicate(path: str, module: nn.Module):
                     )
             except Exception:
                 self._converted_to_quantized = False
-                return False
+                if g_bits != 0 and g_group != 0:
+                    return (True, False)
+                return (False, False)
             self._converted_to_quantized = True
-            return True
+            return (True, True)
         except Exception:
             try:
                 self._converted_to_quantized = False
             except Exception:
                 pass
-            return False
+            return (False, False)
 
     @staticmethod
     def _shrink_linear_like(mod) -> None:
diff --git a/src/dnet/shard/runtime.py b/src/dnet/shard/runtime.py
@@ -201,12 +201,14 @@ def load_model_core(self, req: ShardLoadModelRequest) -> None:
             is_api_layer=False,
         )
         try:
-            applied = bool(
-                self.model.apply_quantization_from_config(
-                    self.model_metadata.model_config,
-                    model_metadata=self.model_metadata,
-                )
+            is_quant, applied = self.model.apply_quantization_from_config(
+                self.model_metadata.model_config,
+                model_metadata=self.model_metadata,
             )
+            if is_quant and not applied:
+                logger.warning(
+                    f"Failed to quantize what appears to be a quantized model."
+                )
             logger.info(
                 "[QUANT] runtime=%s applied=%s model=%s",
                 self.shard_id,