LoRA-in-generation: API, pipeline shift, and Create UI

E · cursoragent · E · commit 23238bae4794 · 2026-02-06T09:42:16.000+01:00
- api/generate: lora_adapters endpoint, pass loraNameOrPath/loraWeight to generation; defaults steps 65, guidance 4.0
- cdmf_pipeline_ace_step: shift parameter (default 6.0) for scheduler
- generate_ace: pass shift 6.0 into pipeline
- CreatePanel: LoRA adapter selector and weight
- TrainingPanel: copy noting LoRA appears in Create after training
- api.ts / types.ts: LoRA types and getLoraAdapters()

Co-authored-by: Cursor &lt;cursoragent@cursor.com&gt;
diff --git a/api/generate.py b/api/generate.py
@@ -12,6 +12,7 @@
 from flask import Blueprint, jsonify, request, send_file
 
 from cdmf_paths import get_output_dir, get_user_data_dir
+from cdmf_tracks import list_lora_adapters
 
 bp = Blueprint("api_generate", __name__)
 
@@ -103,17 +104,16 @@ def _run_generation(job_id: str) -> None:
             duration = 60
         # UI may send duration=-1 or 0; clamp to valid range (15–240s)
         duration = max(15, min(240, duration))
+        # Guide: 65 steps + CFG 4.0 for best quality; low CFG reduces artifacts (see community guide).
         try:
-            steps = int(params.get("inferenceSteps") or 55)
+            steps = int(params.get("inferenceSteps") or 65)
         except (TypeError, ValueError):
-            steps = 55
+            steps = 65
         steps = max(1, min(100, steps))
-        # Doc recommends 7.0 default; higher helps adherence to caption and reference (see ACE-Step-INFERENCE.md).
         try:
-            guidance_default = 7.0 if src_audio_path else 6.0
-            guidance_scale = float(params.get("guidanceScale") or guidance_default)
+            guidance_scale = float(params.get("guidanceScale") or 4.0)
         except (TypeError, ValueError):
-            guidance_scale = 7.0 if src_audio_path else 6.0
+            guidance_scale = 4.0
         try:
             seed = int(params.get("seed") or 0)
         except (TypeError, ValueError):
@@ -159,6 +159,14 @@ def _run_generation(job_id: str) -> None:
             repaint_end = -1.0
         # -1 means "end of audio"; generate_track_ace converts to target duration
 
+        # LoRA adapter (optional): path or folder name under custom_lora
+        lora_name_or_path = (params.get("loraNameOrPath") or params.get("lora_name_or_path") or "").strip()
+        try:
+            lora_weight = float(params.get("loraWeight") or params.get("lora_weight") or 0.75)
+        except (TypeError, ValueError):
+            lora_weight = 0.75
+        lora_weight = max(0.0, min(2.0, lora_weight))
+
         if src_audio_path:
             logging.info("[API generate] Using reference audio: %s (task=%s, audio2audio=%s)", src_audio_path, task, audio2audio_enable)
         else:
@@ -193,6 +201,8 @@ def _run_generation(job_id: str) -> None:
             repaint_end=repaint_end,
             vocal_gain_db=0.0,
             instrumental_gain_db=0.0,
+            lora_name_or_path=lora_name_or_path or None,
+            lora_weight=lora_weight,
         )
 
         wav_path = summary.get("wav_path")
@@ -234,6 +244,17 @@ def _run_generation(job_id: str) -> None:
                     break
 
 
+@bp.route("/lora_adapters", methods=["GET"])
+def get_lora_adapters():
+    """GET /api/generate/lora_adapters — list LoRA adapters (e.g. from Training or custom_lora)."""
+    try:
+        adapters = list_lora_adapters()
+        return jsonify({"adapters": adapters})
+    except Exception as e:
+        logging.exception("[API generate] list_lora_adapters failed: %s", e)
+        return jsonify({"adapters": []})
+
+
 @bp.route("", methods=["POST"], strict_slashes=False)
 @bp.route("/", methods=["POST"], strict_slashes=False)
 def create_job():
diff --git a/cdmf_pipeline_ace_step.py b/cdmf_pipeline_ace_step.py
@@ -921,6 +921,7 @@ def flowedit_diffusion_process(
         n_max=1.0,
         n_avg=1,
         scheduler_type="euler",
+        shift: float = 6.0,
     ):
 
         do_classifier_free_guidance = True
@@ -932,7 +933,7 @@ def flowedit_diffusion_process(
 
         scheduler = FlowMatchEulerDiscreteScheduler(
             num_train_timesteps=1000,
-            shift=3.0,
+            shift=shift,
         )
 
         T_steps = infer_steps
@@ -1111,25 +1112,26 @@ def add_latents_noise(
         noise,
         scheduler_type,
         infer_steps,
+        shift: float = 6.0,
     ):
 
         bsz = gt_latents.shape[0]
         if scheduler_type == "euler":
             scheduler = FlowMatchEulerDiscreteScheduler(
                 num_train_timesteps=1000,
-                shift=3.0,
+                shift=shift,
                 sigma_max=sigma_max,
             )
         elif scheduler_type == "heun":
             scheduler = FlowMatchHeunDiscreteScheduler(
                 num_train_timesteps=1000,
-                shift=3.0,
+                shift=shift,
                 sigma_max=sigma_max,
             )
         elif scheduler_type == "pingpong":
             scheduler = FlowMatchPingPongScheduler(
                 num_train_timesteps=1000,
-                shift=3.0,
+                shift=shift,
                 sigma_max=sigma_max
             )
 
@@ -1180,6 +1182,7 @@ def text2music_diffusion_process(
         audio2audio_enable=False,
         ref_audio_strength=0.5,
         ref_latents=None,
+        shift: float = 6.0,
     ):
 
         logger.info(
@@ -1212,17 +1215,17 @@ def text2music_diffusion_process(
         if scheduler_type == "euler":
             scheduler = FlowMatchEulerDiscreteScheduler(
                 num_train_timesteps=1000,
-                shift=3.0,
+                shift=shift,
             )
         elif scheduler_type == "heun":
             scheduler = FlowMatchHeunDiscreteScheduler(
                 num_train_timesteps=1000,
-                shift=3.0,
+                shift=shift,
             )
         elif scheduler_type == "pingpong":
             scheduler = FlowMatchPingPongScheduler(
                 num_train_timesteps=1000,
-                shift=3.0,
+                shift=shift,
             )
 
         frame_length = int(duration * 44100 / 512 / 8)
@@ -1400,6 +1403,7 @@ def text2music_diffusion_process(
                 noise=target_latents,
                 scheduler_type=scheduler_type,
                 infer_steps=infer_steps,
+                shift=shift,
             )
 
         attention_mask = torch.ones(bsz, frame_length, device=self.device, dtype=self.dtype)
@@ -1876,6 +1880,7 @@ def __call__(
         save_path: str = None,
         batch_size: int = 1,
         debug: bool = False,
+        shift: float = 6.0,
     ):
 
         start_time = time.time()
@@ -2029,6 +2034,7 @@ def __call__(
                 n_max=edit_n_max,
                 n_avg=edit_n_avg,
                 scheduler_type=scheduler_type,
+                shift=shift,
             )
         else:
             target_latents = self.text2music_diffusion_process(
@@ -2062,6 +2068,7 @@ def __call__(
                 audio2audio_enable=audio2audio_enable,
                 ref_audio_strength=ref_audio_strength,
                 ref_latents=ref_latents,
+                shift=shift,
             )
 
         end_time = time.time()
diff --git a/generate_ace.py b/generate_ace.py
@@ -877,6 +877,7 @@ def _run_ace_text2music(
             "batch_size": 1,
             "save_path": str(output_path),
             "debug": False,
+            "shift": 6.0,
         }
 
         # Wire up reference vs source audio per ACE-Step pipeline:
diff --git a/ui/components/CreatePanel.tsx b/ui/components/CreatePanel.tsx
@@ -1,8 +1,8 @@
 import React, { useState, useEffect, useRef, useCallback } from 'react';
-import { Sparkles, ChevronDown, Settings2, Trash2, Music2, Sliders, Dices, Hash, RefreshCw, Plus, Upload, Play, Pause, Info } from 'lucide-react';
+import { Sparkles, ChevronDown, Settings2, Trash2, Music2, Sliders, Dices, Hash, RefreshCw, Plus, Upload, Play, Pause, Info, Loader2 } from 'lucide-react';
 import { GenerationParams, Song } from '../types';
 import { useAuth } from '../context/AuthContext';
-import { generateApi } from '../services/api';
+import { generateApi, type LoraAdapter } from '../services/api';
 
 interface ReferenceTrack {
   id: string;
@@ -142,12 +142,12 @@ export const CreatePanel: React.FC<CreatePanelProps> = ({ onGenerate, isGenerati
   const [duration, setDuration] = useState(-1);
   const [batchSize, setBatchSize] = useState(1);
   const [bulkCount, setBulkCount] = useState(1); // Number of independent generation jobs to queue
-  const [guidanceScale, setGuidanceScale] = useState(7.0);
+  const [guidanceScale, setGuidanceScale] = useState(4.0);
   const [randomSeed, setRandomSeed] = useState(true);
   const [seed, setSeed] = useState(-1);
   const [thinking, setThinking] = useState(false); // Default false for GPU compatibility
   const [audioFormat, setAudioFormat] = useState<'mp3' | 'flac'>('mp3');
-  const [inferenceSteps, setInferenceSteps] = useState(8);
+  const [inferenceSteps, setInferenceSteps] = useState(65);
   const [inferMethod, setInferMethod] = useState<'ode' | 'sde'>('ode');
   const [shift, setShift] = useState(3.0);
 
@@ -171,6 +171,10 @@ export const CreatePanel: React.FC<CreatePanelProps> = ({ onGenerate, isGenerati
   const [cfgIntervalStart, setCfgIntervalStart] = useState(0.0);
   const [cfgIntervalEnd, setCfgIntervalEnd] = useState(1.0);
   const [customTimesteps, setCustomTimesteps] = useState('');
+  const [loraAdapters, setLoraAdapters] = useState<LoraAdapter[]>([]);
+  const [loraLoading, setLoraLoading] = useState(false);
+  const [loraNameOrPath, setLoraNameOrPath] = useState('');
+  const [loraWeight, setLoraWeight] = useState(0.75);
   const [useCotMetas, setUseCotMetas] = useState(true);
   const [useCotCaption, setUseCotCaption] = useState(true);
   const [useCotLanguage, setUseCotLanguage] = useState(true);
@@ -249,6 +253,17 @@ export const CreatePanel: React.FC<CreatePanelProps> = ({ onGenerate, isGenerati
     }
   }, [referenceAudioUrl, sourceAudioUrl]);
 
+  const fetchLoraAdapters = useCallback(() => {
+    setLoraLoading(true);
+    generateApi.getLoraAdapters()
+      .then((res) => setLoraAdapters(res.adapters || []))
+      .catch(() => setLoraAdapters([]))
+      .finally(() => setLoraLoading(false));
+  }, []);
+
+  // Fetch LoRA adapters on mount (Training output + custom_lora)
+  useEffect(() => { fetchLoraAdapters(); }, [fetchLoraAdapters]);
+
   useEffect(() => {
     const handleMouseMove = (e: MouseEvent) => {
       if (!isResizing) return;
@@ -629,6 +644,8 @@ export const CreatePanel: React.FC<CreatePanelProps> = ({ onGenerate, isGenerati
         cfgIntervalStart,
         cfgIntervalEnd,
         customTimesteps: customTimesteps.trim() || undefined,
+        loraNameOrPath: loraNameOrPath.trim() || undefined,
+        loraWeight,
         useCotMetas,
         useCotCaption,
         useCotLanguage,
@@ -1373,7 +1390,7 @@ export const CreatePanel: React.FC<CreatePanelProps> = ({ onGenerate, isGenerati
               <div className="flex items-center justify-between">
                 <span className="inline-flex items-center gap-1.5">
                   <label className="text-xs font-medium text-zinc-600 dark:text-zinc-400">Inference Steps</label>
-                  <InfoTooltip text="Number of denoising steps. Turbo: 1–20 (8 recommended). More steps = better quality, slower." />
+                  <InfoTooltip text="Number of denoising steps. 65 recommended for quality (low CFG + high steps). Turbo: 8–20." />
                 </span>
                 <span className="text-xs font-mono text-zinc-900 dark:text-white bg-zinc-100 dark:bg-black/20 px-2 py-0.5 rounded">{inferenceSteps}</span>
               </div>
@@ -1386,7 +1403,7 @@ export const CreatePanel: React.FC<CreatePanelProps> = ({ onGenerate, isGenerati
                 onChange={(e) => setInferenceSteps(Number(e.target.value))}
                 className="w-full h-2 bg-zinc-200 dark:bg-zinc-700 rounded-lg appearance-none cursor-pointer accent-pink-500"
               />
-              <p className="text-[10px] text-zinc-500">More steps = better quality, slower (8 recommended for turbo)</p>
+              <p className="text-[10px] text-zinc-500">65 recommended for quality; more steps = slower</p>
             </div>
 
             {/* Guidance Scale */}
@@ -1442,6 +1459,50 @@ export const CreatePanel: React.FC<CreatePanelProps> = ({ onGenerate, isGenerati
               </div>
             </div>
 
+            {/* LoRA adapter (Training / custom_lora) */}
+            <div className="grid grid-cols-2 gap-3">
+              <div className="space-y-1.5">
+                <span className="inline-flex items-center gap-1.5">
+                  <label className="text-xs font-medium text-zinc-600 dark:text-zinc-400">LoRA adapter</label>
+                  <InfoTooltip text="Use a custom LoRA (e.g. from Training). After training, click Refresh to see new adapters." />
+                  <button
+                    type="button"
+                    onClick={fetchLoraAdapters}
+                    disabled={loraLoading}
+                    className="p-0.5 rounded hover:bg-zinc-200 dark:hover:bg-zinc-600 disabled:opacity-50"
+                    title="Refresh LoRA list"
+                  >
+                    {loraLoading ? <Loader2 size={12} className="animate-spin" /> : <RefreshCw size={12} />}
+                  </button>
+                </span>
+                <select
+                  value={loraNameOrPath}
+                  onChange={(e) => setLoraNameOrPath(e.target.value)}
+                  className="w-full bg-zinc-50 dark:bg-black/20 border border-zinc-200 dark:border-white/10 rounded-lg px-2 py-1.5 text-xs text-zinc-900 dark:text-white focus:outline-none"
+                >
+                  <option value="">None</option>
+                  {loraAdapters.map((a) => (
+                    <option key={a.path} value={a.path}>{a.name}</option>
+                  ))}
+                </select>
+              </div>
+              <div className="space-y-1.5">
+                <span className="inline-flex items-center gap-1.5">
+                  <label className="text-xs font-medium text-zinc-600 dark:text-zinc-400">LoRA weight</label>
+                  <InfoTooltip text="Strength of the LoRA (0–2). 0.75 is a good default; lower = subtler, higher = stronger style." />
+                </span>
+                <input
+                  type="number"
+                  min={0}
+                  max={2}
+                  step={0.05}
+                  value={loraWeight}
+                  onChange={(e) => setLoraWeight(Number(e.target.value))}
+                  className="w-full bg-zinc-50 dark:bg-black/20 border border-zinc-200 dark:border-white/10 rounded-lg px-2 py-1.5 text-xs text-zinc-900 dark:text-white focus:outline-none"
+                />
+              </div>
+            </div>
+
             {/* Seed */}
             <div className="space-y-2">
               <div className="flex items-center justify-between">
diff --git a/ui/components/TrainingPanel.tsx b/ui/components/TrainingPanel.tsx
@@ -250,7 +250,7 @@ export const TrainingPanel: React.FC<TrainingPanelProps> = ({ onTracksUpdated: _
         <h2 className="text-lg font-semibold">Train Custom LoRA</h2>
       </div>
       <p className="text-sm text-zinc-500 dark:text-zinc-400 mb-4">
-        Run LoRA training on your dataset. Dataset folder must be under <code className="bg-zinc-200 dark:bg-zinc-700 px-1 rounded">training_datasets</code>. Use Browse to select a folder.
+        Run LoRA training on your dataset. Dataset folder must be under <code className="bg-zinc-200 dark:bg-zinc-700 px-1 rounded">training_datasets</code>. Use Browse to select a folder. When training finishes, the LoRA is saved automatically and will appear in <strong>Create → LoRA adapter</strong> (click Refresh there if needed).
       </p>
 
       {aceReady === false && aceState !== 'downloading' && (
diff --git a/ui/services/api.ts b/ui/services/api.ts
@@ -264,6 +264,10 @@ export interface GenerationParams {
   completeTrackClasses?: string[];
   isFormatCaption?: boolean;
   outputDir?: string;
+  /** LoRA adapter: folder name (from list) or full path. Used for ACE-Step generation. */
+  loraNameOrPath?: string;
+  /** LoRA weight 0–2. Default 0.75. */
+  loraWeight?: number;
 }
 
 export interface GenerationJob {
@@ -281,6 +285,12 @@ export interface GenerationJob {
   error?: string;
 }
 
+export interface LoraAdapter {
+  name: string;
+  path: string;
+  size_bytes?: number | null;
+}
+
 export const generateApi = {
   startGeneration: (params: GenerationParams, token: string): Promise<GenerationJob> =>
     api('/api/generate', { method: 'POST', body: params, token }),
@@ -291,6 +301,10 @@ export const generateApi = {
   getHistory: (token: string): Promise<{ jobs: GenerationJob[] }> =>
     api('/api/generate/history', { token }),
 
+  /** List LoRA adapters (Training output and custom_lora folder). */
+  getLoraAdapters: (): Promise<{ adapters: LoraAdapter[] }> =>
+    api('/api/generate/lora_adapters'),
+
   uploadAudio: async (file: File, token: string): Promise<{ url: string; key: string }> => {
     const url = `${API_BASE}/api/generate/upload-audio`;
     console.log('[API] POST', url);
diff --git a/ui/types.ts b/ui/types.ts
@@ -110,6 +110,8 @@ export interface GenerationParams {
   trackName?: string;
   completeTrackClasses?: string[];
   isFormatCaption?: boolean;
+  loraNameOrPath?: string;
+  loraWeight?: number;
 }
 
 export interface PlayerState {

Original file line number	Diff line number	Diff line change
`@@ -877,6 +877,7 @@ def _run_ace_text2music(`
`877`	`877`	`"batch_size": 1,`
`878`	`878`	`"save_path": str(output_path),`
`879`	`879`	`"debug": False,`
	`880`	`+ "shift": 6.0,`
`880`	`881`	`}`
`881`	`882`
`882`	`883`	`# Wire up reference vs source audio per ACE-Step pipeline:`
Original file line number	Diff line number	Diff line change
`@@ -110,6 +110,8 @@ export interface GenerationParams {`
`110`	`110`	`trackName?: string;`
`111`	`111`	`completeTrackClasses?: string[];`
`112`	`112`	`isFormatCaption?: boolean;`
	`113`	`+ loraNameOrPath?: string;`
	`114`	`+ loraWeight?: number;`
`113`	`115`	`}`
`114`	`116`
`115`	`117`	`export interface PlayerState {`