diff --git a/api/generate.py b/api/generate.py index 245e1ba..ff3cc96 100644 --- a/api/generate.py +++ b/api/generate.py @@ -5,12 +5,22 @@ import json import logging +import re import threading import time import uuid from pathlib import Path from flask import Blueprint, jsonify, request, send_file +def _uppercase_track_in_instruction(instruction): + """Uppercase TRACK_NAME in 'Generate the X track ...' to match ACE-Step (cli.py _default_instruction_for_task).""" + if not instruction or " track " not in instruction: + return instruction + m = re.search(r"(\bthe\s+)(\w+)(\s+track\b)", instruction, re.IGNORECASE) + if m: + return instruction[: m.start(2)] + m.group(2).upper() + instruction[m.end(2) :] + return instruction + from cdmf_paths import get_output_dir, get_user_data_dir, load_config from cdmf_tracks import list_lora_adapters, load_track_meta, save_track_meta from cdmf_generation_job import GenerationCancelled @@ -122,22 +132,32 @@ def _run_generation(job_id: str) -> None: if task not in allowed_tasks: task = "text2music" # Single style/caption field drives all text conditioning (ACE-Step caption). - # Simple mode: songDescription. Advanced mode: style. Both can have key, time sig, vocal language. - prompt = (params.get("style") or "").strip() if custom_mode else (params.get("songDescription") or "").strip() + # Simple mode: songDescription. Advanced mode: style. Lego/extract/complete: instruction + caption only (no metas; source sets context). + if task in ("lego", "extract", "complete"): + instruction = (params.get("instruction") or "").strip() + caption = (params.get("style") or "").strip() + if not instruction and not caption: + instruction = "Generate an instrument track based on the audio context:" + prompt = None # built below after we have duration/bpm/metas + else: + instruction = None + caption = None + prompt = (params.get("style") or "").strip() if custom_mode else (params.get("songDescription") or "").strip() key_scale = (params.get("keyScale") or "").strip() time_sig = (params.get("timeSignature") or "").strip() vocal_lang = (params.get("vocalLanguage") or "").strip().lower() extra_bits = [] - if key_scale: - extra_bits.append(f"key {key_scale}") - if time_sig: - extra_bits.append(f"time signature {time_sig}") - if vocal_lang and vocal_lang not in ("unknown", ""): - extra_bits.append(f"vocal language {vocal_lang}") - if extra_bits: - prompt = f"{prompt}, {', '.join(extra_bits)}" if prompt else ", ".join(extra_bits) - # When user explicitly chose English, reinforce in caption so model conditions on it - if vocal_lang == "en" and prompt: + if task != "lego": + if key_scale: + extra_bits.append(f"key {key_scale}") + if time_sig: + extra_bits.append(f"time signature {time_sig}") + if vocal_lang and vocal_lang not in ("unknown", ""): + extra_bits.append(f"vocal language {vocal_lang}") + if extra_bits: + prompt = f"{prompt}, {', '.join(extra_bits)}" if prompt else ", ".join(extra_bits) + # When user explicitly chose English, reinforce in caption so model conditions on it (skip for lego) + if task != "lego" and vocal_lang == "en" and prompt: if not prompt.lower().startswith("english"): prompt = f"English vocals, {prompt}" if not prompt: @@ -190,21 +210,32 @@ def _run_generation(job_id: str) -> None: bpm = None except (TypeError, ValueError): bpm = None + # Lego/extract/complete: instruction (uppercase track) + caption only. No metas — BPM/key/timesignature + # should match the input backing; passing them would be for cover/target-style mode. + if task in ("lego", "extract", "complete"): + instruction = _uppercase_track_in_instruction( + instruction or "Generate an instrument track based on the audio context:" + ) + prompt = (instruction + "\n\n" + (caption or "")).strip() or instruction title = (params.get("title") or "Untitled").strip() or "Track" reference_audio_url = (params.get("referenceAudioUrl") or params.get("reference_audio_path") or "").strip() source_audio_url = (params.get("sourceAudioUrl") or params.get("src_audio_path") or "").strip() - # For cover/retake use source-first (song to cover); for style/reference use reference-first - if task in ("cover", "retake"): + # For cover/retake/lego use source-first (backing/song to cover); for style/reference use reference-first + if task in ("cover", "retake", "lego"): resolved = _resolve_audio_url_to_path(source_audio_url) if source_audio_url else None src_audio_path = resolved or (_resolve_audio_url_to_path(reference_audio_url) if reference_audio_url else None) else: resolved = _resolve_audio_url_to_path(reference_audio_url) if reference_audio_url else None src_audio_path = resolved or (_resolve_audio_url_to_path(source_audio_url) if source_audio_url else None) - # When reference/source audio is provided, enable Audio2Audio so ACE-Step uses it (cover/retake/repaint). - # See docs/ACE-Step-INFERENCE.md: audio_cover_strength 1.0 = strong adherence; 0.5–0.8 = more caption influence. + # When reference/source audio is provided, enable Audio2Audio so ACE-Step uses it (cover/retake/repaint/lego). + # Lego/extract/complete: use ref_audio so the model gets backing as context; use LOW ref_audio_strength + # (e.g. 0.3) so diffusion starts from noisy backing and denoises toward the prompt (new instrument), not a copy. + # See docs/ACE-Step-INFERENCE.md: audio_cover_strength 1.0 = strong adherence; lower = more prompt influence. audio2audio_enable = bool(src_audio_path) ref_default = 0.8 if task in ("cover", "retake", "audio2audio") else 0.7 + if task in ("lego", "extract", "complete"): + ref_default = 0.3 # low strength so output follows prompt (instrument) while matching backing timing ref_audio_strength = float(params.get("audioCoverStrength") or params.get("ref_audio_strength") or ref_default) ref_audio_strength = max(0.0, min(1.0, ref_audio_strength)) @@ -231,6 +262,10 @@ def _run_generation(job_id: str) -> None: thinking = bool(params.get("thinking", False)) use_cot_metas = bool(params.get("useCotMetas", True)) use_cot_caption = bool(params.get("useCotCaption", True)) + # Lego/extract/complete: instruction must stay verbatim ("Generate the X track based on the audio context:"). + # LM refinement would rephrase and can drop the track-type instruction, so disable CoT caption for these tasks. + if task in ("lego", "extract", "complete"): + use_cot_caption = False use_cot_language = bool(params.get("useCotLanguage", True)) try: lm_temperature = float(params.get("lmTemperature") or params.get("lm_temperature") or 0.85) @@ -403,7 +438,18 @@ def create_job(): data = raw if isinstance(raw, dict) else {} logging.info("[API generate] Request body keys: %s", list(data.keys()) if data else []) - if not data.get("customMode") and not data.get("songDescription"): + task_for_validation = (data.get("taskType") or "text2music").strip().lower() + base_only_tasks = ("lego", "extract", "complete") + if task_for_validation in base_only_tasks: + # Lego/extract/complete: require source audio and caption/instruction (no songDescription) + src_audio = (data.get("sourceAudioUrl") or data.get("source_audio_path") or "").strip() + instruction = (data.get("instruction") or "").strip() + style = (data.get("style") or "").strip() + if not src_audio: + return jsonify({"error": "Backing/source audio required for Lego (and extract/complete)"}), 400 + if not instruction and not style: + return jsonify({"error": "Describe the track (caption) or instruction required for Lego"}), 400 + elif not data.get("customMode") and not data.get("songDescription"): return jsonify({"error": "Song description required for simple mode"}), 400 # Custom mode: require at least one of style, lyrics, reference audio, or source audio if data.get("customMode"): diff --git a/cdmf_pipeline_ace_step.py b/cdmf_pipeline_ace_step.py index 1d686e4..e5c48fb 100644 --- a/cdmf_pipeline_ace_step.py +++ b/cdmf_pipeline_ace_step.py @@ -2052,11 +2052,16 @@ def __call__( preprocess_time_cost = end_time - start_time start_time = end_time + # Lego/extract/complete: generate NEW track from prompt only; use source only for duration (no repaint/retake). + # Repaint/retake/extend: use src_latents in diffusion. add_retake_noise = task in ("retake", "repaint", "extend") # retake equal to repaint if task == "retake": repaint_start = 0 repaint_end = audio_duration + if task in ("lego", "extract", "complete"): + repaint_start = 0 + repaint_end = audio_duration src_latents = None if src_audio_path is not None: @@ -2064,11 +2069,21 @@ def __call__( "repaint", "edit", "extend", - ), "src_audio_path is required for retake/repaint/extend task" + "lego", + "extract", + "complete", + ), "src_audio_path is required for repaint/extend/lego/extract/complete task" assert os.path.exists( src_audio_path ), f"src_audio_path {src_audio_path} does not exist" - src_latents = self.infer_latents(src_audio_path) + src_latents_inferred = self.infer_latents(src_audio_path) + if task in ("lego", "extract", "complete"): + # Use source only to set output duration; do not pass latents into diffusion (generate from scratch with prompt). + num_frames = src_latents_inferred.shape[-1] + audio_duration = num_frames * 512 * 8 / 44100.0 + src_latents = None # no repaint for lego + else: + src_latents = src_latents_inferred ref_latents = None if ref_audio_input is not None and audio2audio_enable: diff --git a/docs/ACEFORGE_API.md b/docs/ACEFORGE_API.md index 263c5b5..8631625 100644 --- a/docs/ACEFORGE_API.md +++ b/docs/ACEFORGE_API.md @@ -125,8 +125,9 @@ ACE-Step text-to-music (and related tasks). Jobs are queued and run one at a tim - `inferenceSteps`: int (e.g. 55). - `guidanceScale`: float (e.g. 6.0). - `seed`: int; if `randomSeed` is true, server may override with random. -- `taskType`: `"text2music"` | `"retake"` | `"repaint"` | `"extend"` | `"cover"` | `"audio2audio"`. -- `referenceAudioUrl`, `sourceAudioUrl`: URLs like `/audio/refs/...` or `/audio/` for reference/cover. +- `taskType`: `"text2music"` | `"retake"` | `"repaint"` | `"extend"` | `"cover"` | `"audio2audio"` | `"lego"` | `"extract"` | `"complete"`. **Lego**, **extract**, and **complete** require the ACE-Step **Base** DiT model (see Preferences and ACE-Step models). +- `instruction`: optional; for `taskType` **lego** (and extract/complete), task-specific instruction (e.g. `"Generate the guitar track based on the audio context:"`). If omitted for lego, the server builds one from track name/caption. +- `referenceAudioUrl`, `sourceAudioUrl`: URLs like `/audio/refs/...` or `/audio/` for reference/cover. For **lego**, **extract**, and **complete**, **sourceAudioUrl** is the backing/source audio (required). - `audioCoverStrength` / `ref_audio_strength`: 0–1. - `repaintingStart`, `repaintingEnd`: for repaint task. - `title`: base name for output file. @@ -135,6 +136,8 @@ ACE-Step text-to-music (and related tasks). Jobs are queued and run one at a tim - `loraNameOrPath`: optional; folder name from LoRA list or path to adapter (see `GET /api/generate/lora_adapters`). - `loraWeight`: optional; 0–2, default 0.75. +**Base-only tasks (lego, extract, complete):** Require `ace_step_dit_model: "base"` in preferences and the Base model to be installed (Settings or `GET /api/ace-step/models`). For **lego**: send `taskType: "lego"`, `sourceAudioUrl` (backing audio), `instruction` (e.g. `"Generate the track based on the audio context:"`), and `style` as the track description (caption). Supported track names: `vocals`, `backing_vocals`, `drums`, `bass`, `guitar`, `keyboard`, `percussion`, `strings`, `synth`, `fx`, `brass`, `woodwinds`. See `docs/ACE-Step-INFERENCE.md` for extract/complete parameters. + **Response (POST):** `{ "jobId": "", "status": "queued", "queuePosition": 1 }` **Status response:** `{ "jobId", "status": "queued"|"running"|"succeeded"|"failed"|"cancelled", "queuePosition"?, "etaSeconds"?, "result"?, "error"? }`. On success, `result` includes e.g. `audioUrls`, `duration`, `status`. Cancelled jobs have `status: "cancelled"` and `error: "Cancelled by user"`. @@ -203,11 +206,12 @@ List available DiT/LM models and trigger downloads. **The ACE-Step 1.5 downloade | Method | Path | Description | |--------|------|-------------| -| GET | `/api/ace-step/models` | List DiT and LM models with `installed` status, plus `discovered_models`: all model directories found under the checkpoints folder (including custom trained models). Response includes `dit_models`, `lm_models`, `discovered_models` (id, label, path, custom), `acestep_download_available`, `checkpoints_path`. | -| POST | `/api/ace-step/models/download` | Start download. Body: `{ "model": "turbo" | "turbo-shift1" | "sft" | "base" | "0.6B" | "1.7B" | "4B" }`. Uses bundled downloader (or `acestep-download` on PATH if not bundled). Returns `{ "ok", "model", "path" }` or `{ "error", "hint" }`. | -| GET | `/api/ace-step/models/status` | Download progress: `{ "running", "model", "progress", "error" }`. | +| GET | `/api/ace-step/models` | List DiT and LM models with `installed` status, plus `discovered_models`: all model directories found under the checkpoints folder (including custom trained models). Response includes `dit_models`, `lm_models`, `discovered_models` (id, label, path, custom), `acestep_download_available`, `checkpoints_path`. Use this to verify the **Base** model is installed before starting a lego/extract/complete job. | +| POST | `/api/ace-step/models/download` | Start download. Body: `{ "model": "turbo" \| "turbo-shift1" \| "turbo-shift3" \| "turbo-continuous" \| "sft" \| "base" \| "0.6B" \| "1.7B" \| "4B" }`. Uses bundled downloader (or `acestep-download` on PATH if not bundled). Returns `{ "ok", "model", "path" }` or `{ "error", "hint" }`. | +| GET | `/api/ace-step/models/status` | Download progress: `{ "running", "model", "progress", "error", "current_file", "file_index", "total_files", "eta_seconds", "cancelled" }`. | +| POST | `/api/ace-step/models/download/cancel` | Request cancellation of the current download. Returns `{ "cancelled", "message" }`. | -**Task types:** Generation accepts `taskType`: `text2music`, `cover`, `audio2audio`, `repaint`, `extend`, and (ACE-Step 1.5 Base) `lego`, `extract`, `complete`. Lego/extract/complete require the Base model and full 1.5 integration (planned). +**Task → model:** Generation accepts `taskType`: `text2music`, `cover`, `audio2audio`, `repaint`, `extend`, and (Base-only) `lego`, `extract`, `complete`. **Lego**, **extract**, and **complete** require the **Base** DiT model: set `ace_step_dit_model` to `"base"` in preferences and ensure the Base model is installed (download via Settings or `POST /api/ace-step/models/download` with `"model": "base"`). The UI checks `GET /api/ace-step/models` for `dit_models[].installed` before allowing these tasks. --- diff --git a/generate_ace.py b/generate_ace.py index 287ed19..ddabd91 100644 --- a/generate_ace.py +++ b/generate_ace.py @@ -592,15 +592,18 @@ def _prepare_reference_audio( if audio2audio_enable and task_norm == "text2music": task_norm = "retake" - # Any of the edit-style tasks imply some form of Audio2Audio. + # Any of the edit-style tasks imply some form of Audio2Audio or source-backed (lego/extract/complete). audio2audio_flag = bool( audio2audio_enable or task_norm in ("retake", "repaint", "extend") ) + needs_src_path = audio2audio_flag or task_norm in ("lego", "extract", "complete") - # If we *think* we're in an edit / audio2audio mode but there's no - # reference audio path at all, don't crash — just fall back to - # plain text2music. - if audio2audio_flag and not src_audio_path: + # If we need source/reference audio but none was provided, fall back to text2music (or fail for lego/extract/complete). + if needs_src_path and not src_audio_path: + if task_norm in ("lego", "extract", "complete"): + raise ValueError( + f"Task '{task_norm}' requires backing/source audio. Please provide it in the Lego tab or Custom audio card." + ) print( "[ACE] Audio2Audio / edit task requested but no reference audio " "was provided — falling back to plain text2music.", @@ -612,6 +615,8 @@ def _prepare_reference_audio( if audio2audio_flag: ref_path = _ensure_reference_wav(src_audio_path) + elif task_norm in ("lego", "extract", "complete"): + ref_path = _ensure_reference_wav(src_audio_path) # pipeline uses this as src_audio_path else: ref_path = None @@ -978,10 +983,10 @@ def _run_ace_text2music( # Wire up reference vs source audio per ACE-Step pipeline: # - # - retake / cover / audio2audio: use ref_audio_input (pipeline sets task to - # "audio2audio" and uses ref_latents). Do NOT pass src_audio_path. - # - repaint / extend: use src_audio_path (pipeline uses src_latents for the - # segment to repaint or extend). Do NOT pass ref_audio_input for this path. + # - retake / cover / audio2audio / lego / extract / complete: use ref_audio_input so the pipeline + # gets backing latents. For lego we use LOW ref_audio_strength (API default 0.3) so diffusion + # starts from noisy backing and denoises toward the prompt (new instrument), matching timing. + # - repaint / extend: use src_audio_path (pipeline uses src_latents for repaint/extend segment). # - text2music: leave both unset (None). if not src_audio_path: call_kwargs["ref_audio_input"] = None @@ -990,7 +995,7 @@ def _run_ace_text2music( call_kwargs["src_audio_path"] = src_audio_path call_kwargs["ref_audio_input"] = None else: - # retake (including cover/audio2audio from UI) + # retake, cover, audio2audio, lego, extract, complete: backing as ref (lego uses low ref_audio_strength) call_kwargs["ref_audio_input"] = src_audio_path call_kwargs["src_audio_path"] = None diff --git a/ui/App.tsx b/ui/App.tsx index dd7dfe6..4278939 100644 --- a/ui/App.tsx +++ b/ui/App.tsx @@ -1101,6 +1101,7 @@ export default function App() { onGenerate={handleGenerate} isGenerating={isGenerating} initialData={reuseData} + onOpenSettings={() => setShowSettingsModal(true)} /> diff --git a/ui/components/CreatePanel.tsx b/ui/components/CreatePanel.tsx index 732d2ca..8ac03af 100644 --- a/ui/components/CreatePanel.tsx +++ b/ui/components/CreatePanel.tsx @@ -2,7 +2,13 @@ import React, { useState, useEffect, useRef, useCallback } from 'react'; import { Sparkles, ChevronDown, Settings2, Trash2, Music2, Sliders, Dices, Hash, RefreshCw, Plus, Upload, Play, Pause, Info, Loader2, Wrench } from 'lucide-react'; import { GenerationParams, Song } from '../types'; import { useAuth } from '../context/AuthContext'; -import { generateApi, type LoraAdapter } from '../services/api'; +import { generateApi, preferencesApi, aceStepModelsApi, type LoraAdapter } from '../services/api'; + +/** Tasks that require ACE-Step Base model only (see docs/ACE-Step-Tutorial.md). */ +const TASKS_REQUIRING_BASE = ['lego', 'extract', 'complete'] as const; +function taskRequiresBase(taskType: string): boolean { + return TASKS_REQUIRING_BASE.includes(taskType as typeof TASKS_REQUIRING_BASE[number]); +} interface ReferenceTrack { id: string; @@ -23,6 +29,8 @@ interface CreatePanelProps { onGenerate: (params: GenerationParams) => void; isGenerating: boolean; initialData?: { song: Song, timestamp: number } | null; + /** Open Settings modal (e.g. to download required model). */ + onOpenSettings?: () => void; } /** Visible tooltip on hover (native title has delay and is unreliable). */ @@ -60,6 +68,22 @@ const KEY_SIGNATURES = [ const TIME_SIGNATURES = ['', '2/4', '3/4', '4/4', '6/8']; +// Lego / Extract / Complete: available track names (ACE-Step 1.5 Base model) +const LEGO_TRACKS = [ + { value: 'vocals', label: 'Vocals' }, + { value: 'backing_vocals', label: 'Backing vocals' }, + { value: 'drums', label: 'Drums' }, + { value: 'bass', label: 'Bass' }, + { value: 'guitar', label: 'Guitar' }, + { value: 'keyboard', label: 'Keyboard' }, + { value: 'percussion', label: 'Percussion' }, + { value: 'strings', label: 'Strings' }, + { value: 'synth', label: 'Synth' }, + { value: 'fx', label: 'FX' }, + { value: 'brass', label: 'Brass' }, + { value: 'woodwinds', label: 'Woodwinds' }, +]; + const VOCAL_LANGUAGES = [ { value: 'unknown', label: 'Auto / Instrumental' }, { value: 'ar', label: 'Arabic' }, @@ -114,11 +138,20 @@ const VOCAL_LANGUAGES = [ { value: 'zh', label: 'Chinese (Mandarin)' }, ]; -export const CreatePanel: React.FC = ({ onGenerate, isGenerating, initialData }) => { +// Create panel mode: Simple (description), Custom (full controls), Lego (add-instrument tracks) +type CreateMode = 'simple' | 'custom' | 'lego'; + +export const CreatePanel: React.FC = ({ onGenerate, isGenerating, initialData, onOpenSettings }) => { const { isAuthenticated, token } = useAuth(); - // Mode - const [customMode, setCustomMode] = useState(true); + // Mode: simple | custom | lego (Lego = dedicated tab for add-instrument) + const [createMode, setCreateMode] = useState('custom'); + const customMode = createMode === 'custom'; + + // Lego tab only + const [legoTrackName, setLegoTrackName] = useState('guitar'); + const [legoCaption, setLegoCaption] = useState(''); + const [legoValidationError, setLegoValidationError] = useState(''); // Simple Mode const [songDescription, setSongDescription] = useState(''); @@ -225,6 +258,8 @@ export const CreatePanel: React.FC = ({ onGenerate, isGenerati const [getLrc, setGetLrc] = useState(false); const [scoreScale, setScoreScale] = useState(0.5); const [lmBatchChunkSize, setLmBatchChunkSize] = useState(8); + const [trackName, setTrackName] = useState(''); // Lego/Extract: single track (e.g. guitar) + const [completeTrackClasses, setCompleteTrackClasses] = useState(''); // Complete: comma-separated (e.g. drums, bass, guitar) const [isFormatCaption, setIsFormatCaption] = useState(false); const [isUploadingReference, setIsUploadingReference] = useState(false); @@ -639,8 +674,95 @@ export const CreatePanel: React.FC = ({ onGenerate, isGenerati e.preventDefault(); }; - const handleGenerate = () => { - console.log('[CreatePanel] Create button clicked', { bulkCount, customMode, isAuthenticated }); + const handleGenerate = async () => { + console.log('[CreatePanel] Create button clicked', { bulkCount, customMode, createMode, isAuthenticated }); + + const effectiveTaskType = createMode === 'lego' ? 'lego' : (customMode ? taskType : (sourceAudioUrl?.trim() ? 'cover' : 'text2music')); + if (taskRequiresBase(effectiveTaskType)) { + setLegoValidationError(''); + try { + const list = await aceStepModelsApi.list(); + const baseInstalled = list.dit_models.some((m) => m.id === 'base' && m.installed); + if (!baseInstalled) { + setLegoValidationError('Lego (and Extract/Complete) require the Base model. Open Settings to download it, then try again.'); + onOpenSettings?.(); + return; + } + } catch (e) { + setLegoValidationError('Could not check models. Open Settings to ensure the Base model is installed.'); + onOpenSettings?.(); + return; + } + } + + // Lego mode: require backing audio and send a single lego job + if (createMode === 'lego') { + setLegoValidationError(''); + if (!sourceAudioUrl?.trim()) { + setLegoValidationError('Please select backing audio (required for Lego).'); + return; + } + const instruction = `Generate the ${legoTrackName} track based on the audio context:`; + const effGuidance = guidanceScale; + const effAudioCover = audioCoverStrength; + const effLmTemp = lmTemperature; + onGenerate({ + customMode: false, + songDescription: undefined, + prompt: instruction + (legoCaption.trim() ? ' ' + legoCaption.trim() : ''), + lyrics: '', + style: legoCaption.trim() || instruction, + title: title.trim() || `Lego ${legoTrackName}`, + instrumental: true, + vocalLanguage: 'en', + bpm: 0, + keyScale: '', + timeSignature: '', + duration: -1, + inferenceSteps, + guidanceScale: effGuidance, + batchSize: 1, + randomSeed: randomSeed, + seed: randomSeed ? -1 : seed, + thinking, + audioFormat, + inferMethod, + shift, + lmTemperature: effLmTemp, + lmCfgScale, + lmTopK, + lmTopP, + lmNegativePrompt, + referenceAudioUrl: undefined, + sourceAudioUrl: sourceAudioUrl.trim(), + audioCodes: undefined, + repaintingStart: 0, + repaintingEnd: -1, + audioCoverStrength: effAudioCover, + taskType: 'lego', + instruction, + useAdg, + cfgIntervalStart, + cfgIntervalEnd, + customTimesteps: customTimesteps.trim() || undefined, + loraNameOrPath: loraNameOrPath.trim() || undefined, + loraWeight, + useCotMetas, + useCotCaption, + useCotLanguage, + autogen, + constrainedDecodingDebug, + allowLmBatch, + getScores, + getLrc, + scoreScale, + lmBatchChunkSize, + negativePrompt: negativePrompt.trim() || undefined, + isFormatCaption, + }); + return; + } + // Bulk generation: loop bulkCount times for (let i = 0; i < bulkCount; i++) { // Seed handling: first job uses user's seed, rest get random seeds @@ -766,22 +888,32 @@ export const CreatePanel: React.FC = ({ onGenerate, isGenerati
+
{/* SIMPLE MODE */} - {!customMode && ( + {createMode === 'simple' && (
{/* Title (same as Custom mode) */}
@@ -1066,8 +1198,110 @@ export const CreatePanel: React.FC = ({ onGenerate, isGenerati
)} + {/* LEGO MODE — generate a single instrument track over existing audio */} + {createMode === 'lego' && ( +
+
+

+ Generate one instrument track to layer over your backing audio. Pick the track type and describe how it should sound. +

+
+ + {/* Title (optional) */} +
+
+ Title (optional) +
+ setTitle(e.target.value)} + placeholder="Name the output" + className="w-full bg-transparent p-3 text-sm text-zinc-900 dark:text-white placeholder-zinc-400 dark:placeholder-zinc-600 focus:outline-none" + /> +
+ + {/* Source audio (required for Lego) */} +
+
+ Backing audio + * + +
+
+
+ + +
+ { const f = e.target.files?.[0]; if (f) void uploadAudio(f, 'source'); e.target.value = ''; }} /> + {sourceAudioUrl ? ( +

{getAudioLabel(sourceAudioUrl)}

+ ) : ( +

No backing audio selected

+ )} +
+
+ + {/* Track to generate */} +
+
+ Track to generate +
+ +
+ + {/* Describe the track (caption) */} +
+
+ Describe the track +
+