resemble-ai · jonata · Sep 7, 2025
diff --git a/src/chatterbox/mtl_tts.py b/src/chatterbox/mtl_tts.py
@@ -126,6 +126,8 @@ def save(self, fpath: Path):
 
     @classmethod
     def load(cls, fpath, map_location="cpu"):
+        if isinstance(map_location, str):
+            map_location = torch.device(map_location)
         kwargs = torch.load(fpath, map_location=map_location, weights_only=True)
         return cls(T3Cond(**kwargs['t3']), kwargs['gen'])
 
@@ -161,9 +163,15 @@ def get_supported_languages(cls):
     def from_local(cls, ckpt_dir, device) -> 'ChatterboxMultilingualTTS':
         ckpt_dir = Path(ckpt_dir)
 
+        # Always load to CPU first for non-CUDA devices to handle CUDA-saved models
+        if device in ["cpu", "mps"]:
+            map_location = torch.device('cpu')
+        else:
+            map_location = None
+
         ve = VoiceEncoder()
         ve.load_state_dict(
-            torch.load(ckpt_dir / "ve.pt", weights_only=True)
+            torch.load(ckpt_dir / "ve.pt", map_location=map_location, weights_only=True)
         )
         ve.to(device).eval()
 
@@ -176,7 +184,7 @@ def from_local(cls, ckpt_dir, device) -> 'ChatterboxMultilingualTTS':
 
         s3gen = S3Gen()
         s3gen.load_state_dict(
-            torch.load(ckpt_dir / "s3gen.pt", weights_only=True)
+            torch.load(ckpt_dir / "s3gen.pt", map_location=map_location, weights_only=True)
         )
         s3gen.to(device).eval()
 
@@ -186,12 +194,19 @@ def from_local(cls, ckpt_dir, device) -> 'ChatterboxMultilingualTTS':
 
         conds = None
         if (builtin_voice := ckpt_dir / "conds.pt").exists():
-            conds = Conditionals.load(builtin_voice).to(device)
+            conds = Conditionals.load(builtin_voice, map_location=map_location).to(device)
 
         return cls(t3, s3gen, ve, tokenizer, device, conds=conds)
 
     @classmethod
-    def from_pretrained(cls, device: torch.device) -> 'ChatterboxMultilingualTTS':
+    def from_pretrained(cls, device) -> 'ChatterboxMultilingualTTS':
+        if device == "mps" and not torch.backends.mps.is_available():
+            if not torch.backends.mps.is_built():
+                print("MPS not available because the current PyTorch install was not built with MPS enabled.")
+            else:
+                print("MPS not available because the current MacOS version is not 12.3+ and/or you do not have an MPS-enabled device on this machine.")
+            device = "cpu"
+
         ckpt_dir = Path(
             snapshot_download(
                 repo_id=REPO_ID,
@@ -201,6 +216,7 @@ def from_pretrained(cls, device: torch.device) -> 'ChatterboxMultilingualTTS':
                 token=os.getenv("HF_TOKEN"),
             )
         )
+
         return cls.from_local(ckpt_dir, device)
 
     def prepare_conditionals(self, wav_fpath, exaggeration=0.5):