Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 20 additions & 4 deletions src/chatterbox/mtl_tts.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,8 @@ def save(self, fpath: Path):

@classmethod
def load(cls, fpath, map_location="cpu"):
if isinstance(map_location, str):
map_location = torch.device(map_location)
kwargs = torch.load(fpath, map_location=map_location, weights_only=True)
return cls(T3Cond(**kwargs['t3']), kwargs['gen'])

Expand Down Expand Up @@ -161,9 +163,15 @@ def get_supported_languages(cls):
def from_local(cls, ckpt_dir, device) -> 'ChatterboxMultilingualTTS':
ckpt_dir = Path(ckpt_dir)

# Always load to CPU first for non-CUDA devices to handle CUDA-saved models
if device in ["cpu", "mps"]:
map_location = torch.device('cpu')
else:
map_location = None

ve = VoiceEncoder()
ve.load_state_dict(
torch.load(ckpt_dir / "ve.pt", weights_only=True)
torch.load(ckpt_dir / "ve.pt", map_location=map_location, weights_only=True)
)
ve.to(device).eval()

Expand All @@ -176,7 +184,7 @@ def from_local(cls, ckpt_dir, device) -> 'ChatterboxMultilingualTTS':

s3gen = S3Gen()
s3gen.load_state_dict(
torch.load(ckpt_dir / "s3gen.pt", weights_only=True)
torch.load(ckpt_dir / "s3gen.pt", map_location=map_location, weights_only=True)
)
s3gen.to(device).eval()

Expand All @@ -186,12 +194,19 @@ def from_local(cls, ckpt_dir, device) -> 'ChatterboxMultilingualTTS':

conds = None
if (builtin_voice := ckpt_dir / "conds.pt").exists():
conds = Conditionals.load(builtin_voice).to(device)
conds = Conditionals.load(builtin_voice, map_location=map_location).to(device)

return cls(t3, s3gen, ve, tokenizer, device, conds=conds)

@classmethod
def from_pretrained(cls, device: torch.device) -> 'ChatterboxMultilingualTTS':
def from_pretrained(cls, device) -> 'ChatterboxMultilingualTTS':
if device == "mps" and not torch.backends.mps.is_available():
if not torch.backends.mps.is_built():
print("MPS not available because the current PyTorch install was not built with MPS enabled.")
else:
print("MPS not available because the current MacOS version is not 12.3+ and/or you do not have an MPS-enabled device on this machine.")
device = "cpu"

ckpt_dir = Path(
snapshot_download(
repo_id=REPO_ID,
Expand All @@ -201,6 +216,7 @@ def from_pretrained(cls, device: torch.device) -> 'ChatterboxMultilingualTTS':
token=os.getenv("HF_TOKEN"),
)
)

return cls.from_local(ckpt_dir, device)

def prepare_conditionals(self, wav_fpath, exaggeration=0.5):
Expand Down