diff --git a/.github/workflows/build-release.yml b/.github/workflows/build-release.yml index 17b7340..31c481c 100644 --- a/.github/workflows/build-release.yml +++ b/.github/workflows/build-release.yml @@ -43,7 +43,14 @@ jobs: - name: Display Python version run: python --version - + + - name: Set up Bun + uses: oven-sh/setup-bun@v2 + with: + bun-version: latest + - name: Build new UI (React SPA) with Bun + run: ./scripts/build_ui.sh + - name: Install dependencies run: | python -m pip install --upgrade pip @@ -67,6 +74,20 @@ jobs: pip install "coqpit" "trainer>=0.0.32" "pysbd>=0.3.4" "inflect>=5.6.0" "unidecode>=1.3.2" pip install "TTS==0.21.2" python -c "from TTS.api import TTS" || (echo "::error::TTS import failed. Voice cloning will not work." && exit 1) + + - name: Install Demucs for stem splitting + run: | + pip install "demucs==4.0.1" + python -c "import demucs.separate" || (echo "::warning::Demucs import failed. Stem splitting may not work." && true) + + - name: Install basic-pitch for MIDI generation + run: | + pip install "basic-pitch>=0.4.0" + python -c "from basic_pitch.inference import predict" || (echo "::warning::basic-pitch import failed. MIDI generation may not work." && true) + + - name: Slim bundle (remove Sudachi if present) + run: | + pip uninstall -y SudachiDict-core SudachiPy sudachidict-core sudachipy 2>/dev/null || true - name: Install PyInstaller run: | @@ -150,4 +171,5 @@ jobs: with: files: | AceForge-macOS.dmg + AceForge-macOS.zip checksums.txt diff --git a/.github/workflows/new-ui-api-tests.yml b/.github/workflows/new-ui-api-tests.yml new file mode 100644 index 0000000..203fe37 --- /dev/null +++ b/.github/workflows/new-ui-api-tests.yml @@ -0,0 +1,68 @@ +# New UI API integration tests: real Flask app, no mocks. +# Asserts API contract (paths and JSON shape) for ace-step-ui compatibility. + +name: New UI API Tests + +on: + push: + branches: [main, develop, experimental-ui] + pull_request: + branches: [main, develop, experimental-ui] + workflow_dispatch: + +permissions: + contents: read + +jobs: + api-tests: + name: New UI API integration tests + runs-on: macos-latest + timeout-minutes: 25 + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements_ace_macos.txt + pip install pytest + + - name: Install optional deps (no-deps) + run: | + pip install "audio-separator==0.40.0" --no-deps || true + pip install "py3langid==0.3.0" --no-deps || true + pip install "git+https://github.com/ace-step/ACE-Step.git" --no-deps || true + + - name: Run New UI API integration tests + run: | + cd "$GITHUB_WORKSPACE" + python -m pytest tests/test_new_ui_api.py -v --tb=short + + build-ui: + name: Build new UI (React SPA) with Bun + runs-on: macos-latest + timeout-minutes: 10 + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Bun + uses: oven-sh/setup-bun@v2 + with: + bun-version: latest + + - name: Build UI + run: | + cd ui + bun install --frozen-lockfile 2>/dev/null || bun install + bun run build + test -f dist/index.html + test -d dist/assets || true diff --git a/.github/workflows/test-bundled-app.yml b/.github/workflows/test-bundled-app.yml index 3a21859..64eb73e 100644 --- a/.github/workflows/test-bundled-app.yml +++ b/.github/workflows/test-bundled-app.yml @@ -97,6 +97,32 @@ jobs: ./build/macos/codesign.sh dist/AceForge.app env: MACOS_SIGNING_IDENTITY: ${{ secrets.MACOS_SIGNING_IDENTITY || '-' }} + + - name: Test training entry point (--train) from bundled app + run: | + BUNDLED_BIN="./dist/AceForge.app/Contents/MacOS/AceForge_bin" + if [ ! -f "$BUNDLED_BIN" ]; then + echo "::error::Bundled binary not found at $BUNDLED_BIN" + exit 1 + fi + echo "Running bundled app with --train --help (must exit 0 and print trainer options)..." + OUTPUT=$("$BUNDLED_BIN" --train --help 2>&1) || EXIT=$? + EXIT=${EXIT:-0} + if [ "$EXIT" -ne 0 ]; then + echo "::error::Binary with --train --help exited with code $EXIT" + echo "$OUTPUT" + exit 1 + fi + for opt in "--dataset_path" "--exp_name" "--epochs" "--max_steps"; do + if echo "$OUTPUT" | grep -q -- "$opt"; then + echo "✓ Trainer option $opt present" + else + echo "::error::Trainer help missing option: $opt" + echo "$OUTPUT" + exit 1 + fi + done + echo "✓ Training-from-bundle entry point (--train) works correctly" - name: Download ACE-Step models (if not cached) if: steps.cache-models.outputs.cache-hit != 'true' diff --git a/.gitignore b/.gitignore index 0c6fe30..3f63869 100644 --- a/.gitignore +++ b/.gitignore @@ -54,6 +54,10 @@ release/ *.app *.spec.bak +# ---- New UI (React/Vite) ---- +ui/node_modules/ +ui/dist/ + # ---- Minification / tooling caches ---- node_modules/ npm-debug.log* diff --git a/CDMF.spec b/CDMF.spec index 97fec15..fece5a9 100644 --- a/CDMF.spec +++ b/CDMF.spec @@ -16,6 +16,7 @@ static_dir = spec_root / 'static' training_config_dir = spec_root / 'training_config' ace_models_dir = spec_root / 'ace_models' icon_path = spec_root / 'build' / 'macos' / 'AceForge.icns' +ui_dist_dir = spec_root / 'ui' / 'dist' # Collect _lzma binary explicitly (critical for py3langid in frozen apps) # PyInstaller should auto-detect it, but we ensure it's included @@ -216,7 +217,8 @@ a = Analysis( ('presets.json', '.'), # Include VERSION file (placed in MacOS directory for frozen apps) ('VERSION', '.'), - ] + _py3langid_data + _acestep_lyrics_data + _tokenizers_data + _basic_pitch_data + _tts_data + _tts_vocoder_configs + _trainer_data + _gruut_data + _jamo_data + _demucs_data, + # Include new React UI (built by build_local.sh / scripts/build_ui.sh) + ] + ([(str(ui_dist_dir), 'ui/dist')] if ui_dist_dir.is_dir() else []) + _py3langid_data + _acestep_lyrics_data + _tokenizers_data + _basic_pitch_data + _tts_data + _tts_vocoder_configs + _trainer_data + _gruut_data + _jamo_data + _demucs_data, hiddenimports=[ 'diffusers', 'diffusers.loaders', @@ -242,6 +244,8 @@ a = Analysis( 'huggingface_hub', # ACE-Step wrapper module (imported with try/except in generate_ace.py) 'cdmf_pipeline_ace_step', + # Trainer CLI parser (--train --help path; avoids loading full cdmf_trainer in frozen app) + 'cdmf_trainer_parser', # Lyrics prompt model (lazily imported in cdmf_generation.py) 'lyrics_prompt_model', # ACE-Step package and all its submodules (critical for frozen app) diff --git a/README.md b/README.md index 79ffbda..2e145f6 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ AceForge is a **local-first AI music workstation for macOS Silicon** powered by > Status: **ALPHA** -image +AceForge-UI ## Features @@ -28,18 +28,16 @@ AceForge is a **local-first AI music workstation for macOS Silicon** powered by ### Minimum -- macOS 12.0 (Monterey) or later -- Apple Silicon (M1/M2/M3) or Intel Mac with AMD GPU +- Apple Silicon (M1/M2/M3) - 16 GB unified memory (for Apple Silicon) or 16 GB RAM -- ~10–12 GB VRAM/unified memory (more = more headroom) +- ~8–16 GB VRAM/unified memory (more = more headroom) - SSD with tens of GB free (models + audio + datasets) ### Recommended -- Apple Silicon M1 Pro/Max/Ultra, M2 Pro/Max/Ultra, or M3 Pro/Max +- Apple Silicon M4 Pro or M3 Pro/Max - 32 GB+ unified memory - Fast SSD -- Comfort reading terminal logs when something goes wrong ## Install and run @@ -202,5 +200,6 @@ Issues and PRs welcome. If you’re changing anything related to training, model ## License -This project’s **source code** is licensed under the **Apache License 2.0**. See `LICENSE`. +AceForge is licensed under the **Apache License 2.0**. See `LICENSE`. +THe UI is forked and extended from [Ace-Step UI](https://github.com/fspecii/ace-step-ui) (MIT) diff --git a/USAGE.md b/USAGE.md index 17df8b3..9d2b134 100644 --- a/USAGE.md +++ b/USAGE.md @@ -353,6 +353,20 @@ The **Advanced** tab exposes more ACE-Step internals: - Browse for a LoRA folder under `custom_lora`. - Set the LoRA weight (0–10). +### 5.5 Cover and Audio→Audio: key parameters + +In **Cover** and **Audio→Audio** modes you transform an existing track. The following parameters directly control how much the output follows the **source audio** vs your **Style** and **Lyrics**: + +| Parameter | What it controls | Effect | +|-----------|------------------|--------| +| **Style of Music** (caption) | Target style for the output | Describes the *target* genre, mood, instruments. Strongly influences the result when Cover Strength is lower. | +| **Lyrics** | Target lyrics for the output | The *target* lyric content and structure. Uncheck **Instrumental** to use them; otherwise the model gets an instrumental token. | +| **Cover Strength** (Source influence) | Balance: source vs your text | **1.0** = output follows the source closely (structure + character). **Lower (e.g. 0.5–0.7)** = more influence from your Style and Lyrics. **0.2** = loose style transfer. | +| **Instrumental** | Whether lyrics are used | When checked, lyrics are ignored and the model receives an instrumental token. Uncheck to apply your Lyrics. | +| **Guidance scale** | How strongly the model follows text | Higher = stronger adherence to your Style/Lyrics (and to the source when combined with high Cover Strength). | + +**Summary:** For covers that reflect your own style and lyrics, set **Style** and **Lyrics** as desired, uncheck **Instrumental** if you use lyrics, and lower **Cover Strength** (e.g. 0.5–0.7) so your text has more influence. The (i) tooltips in the Create panel repeat this for quick reference. + If you're new to ACE-Step, you can ignore the Advanced tab entirely. The defaults were chosen to be safe and high quality out of the box. diff --git a/aceforge_app.py b/aceforge_app.py index 750e805..7926a3f 100644 --- a/aceforge_app.py +++ b/aceforge_app.py @@ -105,6 +105,19 @@ def _patched_getsource(obj): except Exception as e: print(f"[AceForge] WARNING: lzma initialization: {e}", flush=True) +# When frozen and launched with --train, run the LoRA trainer in this process and exit (no GUI). +# This allows Training to work from the app bundle; the parent app spawns us with --train + args. +# For --train --help we only load the parser (no heavy deps) so the bundle test can pass. +if getattr(sys, "frozen", False) and "--train" in sys.argv: + sys.argv = [sys.argv[0]] + [a for a in sys.argv[1:] if a != "--train"] + if "--help" in sys.argv or "-h" in sys.argv: + from cdmf_trainer_parser import _make_parser + _make_parser().print_help() + sys.exit(0) + from cdmf_trainer import run_from_argv + run_from_argv() + sys.exit(0) + # Import pywebview FIRST and patch it BEFORE importing music_forge_ui # This ensures that even if music_forge_ui tries to use webview, it will be protected import webview @@ -428,7 +441,7 @@ def on_window_closed(): # Create pywebview window pointing to Flask server # The singleton wrapper ensures this can only be called once window = webview.create_window( - title="AceForge - AI Music Generation", + title="AceForge", url=SERVER_URL, width=1400, height=900, @@ -458,9 +471,31 @@ def on_window_closed(): import atexit atexit.register(cleanup_resources) + # Apply zoom from preferences (default 80%); takes effect on next launch if changed in Settings + try: + from cdmf_paths import load_config + _cfg = load_config() + _z = int(_cfg.get("ui_zoom") or 80) + _z = max(50, min(150, _z)) + except Exception: + _z = 80 + _WEBVIEW_ZOOM = f"{_z}%" + _WEBVIEW_ZOOM_JS = f'document.documentElement.style.zoom = "{_WEBVIEW_ZOOM}";' + + def _apply_webview_zoom(win): + time.sleep(1.8) # allow initial page load + try: + if hasattr(win, 'run_js'): + win.run_js(_WEBVIEW_ZOOM_JS) + else: + win.evaluate_js(_WEBVIEW_ZOOM_JS) + print(f"[AceForge] Webview zoom set to {_WEBVIEW_ZOOM}", flush=True) + except Exception as e: + print(f"[AceForge] Could not set webview zoom: {e}", flush=True) + # Start the GUI event loop (only once - this is a blocking call) - # The singleton wrapper ensures this can only be called once globally - webview.start(debug=False) + # Pass _apply_webview_zoom so it runs in a separate thread after window is ready + webview.start(_apply_webview_zoom, window, debug=False) # This should not be reached (on_window_closed exits), but just in case cleanup_resources() diff --git a/api/__init__.py b/api/__init__.py new file mode 100644 index 0000000..aee2631 --- /dev/null +++ b/api/__init__.py @@ -0,0 +1,25 @@ +# AceForge New UI API compatibility layer. +# Blueprints match Express routes from ace-step-ui for the ported React front end. +# No auth (local-only); all persistence via cdmf_paths global app settings. + +from api.auth import bp as auth_bp +from api.songs import bp as songs_bp +from api.generate import bp as generate_bp +from api.playlists import bp as playlists_bp +from api.users import bp as users_bp +from api.contact import bp as contact_bp +from api.reference_tracks import bp as reference_tracks_bp +from api.search import bp as search_bp +from api.preferences import bp as preferences_bp + +__all__ = [ + "auth_bp", + "songs_bp", + "generate_bp", + "playlists_bp", + "users_bp", + "contact_bp", + "reference_tracks_bp", + "search_bp", + "preferences_bp", +] diff --git a/api/auth.py b/api/auth.py new file mode 100644 index 0000000..2210521 --- /dev/null +++ b/api/auth.py @@ -0,0 +1,73 @@ +""" +Auth API stub for new UI. Local-only, no real auth. +GET /api/auth/auto returns a local user (OS username) so the React app starts straight into the UI. +Contract: { user: { id, username, ... }, token }. +""" + +import getpass +import os +from flask import Blueprint, jsonify + +bp = Blueprint("api_auth", __name__) + + +def _local_username() -> str: + """Use macOS/system username when available, else 'Local'.""" + try: + return (getpass.getuser() or os.environ.get("USER") or os.environ.get("USERNAME") or "Local").strip() or "Local" + except Exception: + return "Local" + + +def _local_user(): + """Single user dict for all auth/user stubs.""" + return { + "id": "local", + "username": _local_username(), + "bio": None, + "avatar_url": None, + "banner_url": None, + "isAdmin": False, + "createdAt": None, + } + + +@bp.route("/auto", methods=["GET"]) +def auto(): + """Return local user (OS username). No token; app does not support login.""" + user = _local_user() + return jsonify({"user": user, "token": None}) + + +@bp.route("/me", methods=["GET"]) +def me(): + """Stub: always return local user (no token check).""" + return jsonify({"user": _local_user()}) + + +@bp.route("/setup", methods=["POST"]) +def setup(): + """Stub: no-op; UI can call after 'first run'. Return same as auto.""" + return jsonify({"user": _local_user(), "token": None}) + + +@bp.route("/logout", methods=["POST"]) +def logout(): + """Stub: no-op; local app has no session.""" + return jsonify({"success": True}) + + +@bp.route("/refresh", methods=["POST"]) +def refresh(): + """Stub: return same as auto (no refresh token).""" + return jsonify({"user": _local_user(), "token": None}) + + +@bp.route("/username", methods=["PATCH"]) +def update_username(): + """Stub: accept body.username but keep OS username for display.""" + return jsonify({"user": _local_user(), "token": None}) + + +# For api.users: single snapshot so "from api.auth import LOCAL_USER" works +LOCAL_USER = _local_user() diff --git a/api/contact.py b/api/contact.py new file mode 100644 index 0000000..73d9e6c --- /dev/null +++ b/api/contact.py @@ -0,0 +1,13 @@ +""" +Contact API stub for new UI. No email/DB; returns success. +""" + +from flask import Blueprint, jsonify + +bp = Blueprint("api_contact", __name__) + + +@bp.route("", methods=["POST"], strict_slashes=False) +@bp.route("/", methods=["POST"], strict_slashes=False) +def submit(): + return jsonify({"success": True, "message": "Received", "id": "local"}) diff --git a/api/generate.py b/api/generate.py new file mode 100644 index 0000000..ca5a2dd --- /dev/null +++ b/api/generate.py @@ -0,0 +1,485 @@ +""" +Generation API for new UI. Maps ace-step-ui GenerationParams to generate_track_ace(); +job queue stored under get_user_data_dir(). No auth. Real implementation (no mocks). +""" + +import json +import logging +import threading +import time +import uuid +from pathlib import Path +from flask import Blueprint, jsonify, request, send_file + +from cdmf_paths import get_output_dir, get_user_data_dir +from cdmf_tracks import list_lora_adapters +from cdmf_generation_job import GenerationCancelled + +bp = Blueprint("api_generate", __name__) + +# In-memory job store (key: jobId, value: { status, params, result?, error?, startTime, queuePosition? }) +_jobs: dict = {} +_jobs_lock = threading.Lock() +# Queue order for queuePosition +_job_order: list = [] +# One worker at a time (must use 'global _generation_busy' in any function that assigns to it) +_generation_busy = False +# Current running job id (for cancel); set by worker, read by cancel endpoint +_current_job_id: str | None = None +# Job ids for which cancel was requested (cooperative stop) +_cancel_requested: set = set() + + +def _is_cancel_requested(job_id: str) -> bool: + with _jobs_lock: + return job_id in _cancel_requested + + +def _refs_dir() -> Path: + d = get_user_data_dir() / "references" + d.mkdir(parents=True, exist_ok=True) + return d + + +def _jobs_path() -> Path: + return get_user_data_dir() / "generation_jobs.json" + + +def _resolve_audio_url_to_path(url: str) -> str | None: + """Convert /audio/filename or /audio/refs/filename (or full URL) to absolute path.""" + if not url or not isinstance(url, str): + return None + url = url.strip() + # Allow full-origin URLs from the UI (e.g. http://127.0.0.1:5056/audio/refs/xxx) + if "://" in url and "/audio/" in url: + url = "/audio/" + url.split("/audio/", 1)[-1] + if url.startswith("/audio/refs/"): + name = url.replace("/audio/refs/", "", 1).split("?")[0] + path = _refs_dir() / name + return str(path) if path.is_file() else None + if url.startswith("/audio/"): + name = url.replace("/audio/", "", 1).split("?")[0] + path = Path(get_output_dir()) / name + return str(path) if path.is_file() else None + return None + + +def _run_generation(job_id: str) -> None: + """Background: run generate_track_ace and update job.""" + global _generation_busy, _current_job_id + with _jobs_lock: + job = _jobs.get(job_id) + if not job or job.get("status") != "queued": + return + job["status"] = "running" + _current_job_id = job_id + + cancel_check = lambda: _is_cancel_requested(job_id) + try: + from generate_ace import generate_track_ace + + params = job.get("params") or {} + if not isinstance(params, dict): + params = {} + # Map ace-step-ui GenerationParams to our API (support full UI payload including duration=-1, seed=-1, bpm=0) + custom_mode = bool(params.get("customMode", False)) + task = (params.get("taskType") or "text2music").strip().lower() + if task not in ("text2music", "retake", "repaint", "extend", "cover", "audio2audio"): + task = "text2music" + # Single style/caption field drives all text conditioning (ACE-Step caption). + # Optionally fold key, time signature, and vocal language into the prompt when set. + prompt = (params.get("style") or "").strip() if custom_mode else (params.get("songDescription") or "").strip() + key_scale = (params.get("keyScale") or "").strip() + time_sig = (params.get("timeSignature") or "").strip() + vocal_lang = (params.get("vocalLanguage") or "").strip() + extra_bits = [] + if key_scale: + extra_bits.append(f"key {key_scale}") + if time_sig: + extra_bits.append(f"time signature {time_sig}") + if vocal_lang and vocal_lang not in ("unknown", ""): + extra_bits.append(f"vocal language {vocal_lang}") + if extra_bits: + prompt = f"{prompt}, {', '.join(extra_bits)}" if prompt else ", ".join(extra_bits) + if not prompt: + # For cover/audio2audio, default encourages transformation while keeping structure; otherwise generic instrumental + if task in ("cover", "audio2audio", "retake"): + prompt = "transform style while preserving structure, re-interpret with new character" + else: + prompt = "instrumental background music" + lyrics = (params.get("lyrics") or "").strip() + instrumental = bool(params.get("instrumental", True)) + try: + d = params.get("duration") + duration = float(d if d is not None else 60) + except (TypeError, ValueError): + duration = 60 + # UI may send duration=-1 or 0; clamp to valid range (15–240s) + duration = max(15, min(240, duration)) + # Guide: 65 steps + CFG 4.0 for best quality; low CFG reduces artifacts (see community guide). + try: + steps = int(params.get("inferenceSteps") or 65) + except (TypeError, ValueError): + steps = 65 + steps = max(1, min(100, steps)) + try: + guidance_scale = float(params.get("guidanceScale") or 4.0) + except (TypeError, ValueError): + guidance_scale = 4.0 + try: + seed = int(params.get("seed") or 0) + except (TypeError, ValueError): + seed = 0 + random_seed = params.get("randomSeed", True) + if random_seed: + import random + seed = random.randint(0, 2**31 - 1) + bpm = params.get("bpm") + if bpm is not None: + try: + bpm = float(bpm) + if bpm <= 0: + bpm = None + except (TypeError, ValueError): + bpm = None + title = (params.get("title") or "Untitled").strip() or "Track" + reference_audio_url = (params.get("referenceAudioUrl") or params.get("reference_audio_path") or "").strip() + source_audio_url = (params.get("sourceAudioUrl") or params.get("src_audio_path") or "").strip() + # For cover/retake use source-first (song to cover); for style/reference use reference-first + if task in ("cover", "retake"): + resolved = _resolve_audio_url_to_path(source_audio_url) if source_audio_url else None + src_audio_path = resolved or (_resolve_audio_url_to_path(reference_audio_url) if reference_audio_url else None) + else: + resolved = _resolve_audio_url_to_path(reference_audio_url) if reference_audio_url else None + src_audio_path = resolved or (_resolve_audio_url_to_path(source_audio_url) if source_audio_url else None) + + # When reference/source audio is provided, enable Audio2Audio so ACE-Step uses it (cover/retake/repaint). + # See docs/ACE-Step-INFERENCE.md: audio_cover_strength 1.0 = strong adherence; 0.5–0.8 = more caption influence. + audio2audio_enable = bool(src_audio_path) + ref_default = 0.8 if task in ("cover", "retake", "audio2audio") else 0.7 + ref_audio_strength = float(params.get("audioCoverStrength") or params.get("ref_audio_strength") or ref_default) + ref_audio_strength = max(0.0, min(1.0, ref_audio_strength)) + + # Repaint segment (for task=repaint); -1 means end of audio (converted to duration in generate_track_ace). + try: + repaint_start = float(params.get("repaintingStart") or params.get("repaint_start") or 0) + except (TypeError, ValueError): + repaint_start = 0.0 + try: + repaint_end = float(params.get("repaintingEnd") or params.get("repaint_end") or -1) + except (TypeError, ValueError): + repaint_end = -1.0 + # -1 means "end of audio"; generate_track_ace converts to target duration + + # LoRA adapter (optional): path or folder name under custom_lora + lora_name_or_path = (params.get("loraNameOrPath") or params.get("lora_name_or_path") or "").strip() + try: + lora_weight = float(params.get("loraWeight") or params.get("lora_weight") or 0.75) + except (TypeError, ValueError): + lora_weight = 0.75 + lora_weight = max(0.0, min(2.0, lora_weight)) + + if src_audio_path: + logging.info("[API generate] Using reference audio: %s (task=%s, audio2audio=%s)", src_audio_path, task, audio2audio_enable) + else: + logging.info("[API generate] No reference audio; text2music only") + + out_dir_str = params.get("outputDir") or params.get("output_dir") or get_output_dir() + out_dir = Path(out_dir_str) + out_dir.mkdir(parents=True, exist_ok=True) + + # ACE-Step params aligned with docs/ACE-Step-INFERENCE.md: + # caption/style, lyrics, src_audio (→ ref_audio_input for cover/retake), audio_cover_strength, + # task, repainting_*; guidance_scale 7.0 when using reference improves adherence. + summary = generate_track_ace( + genre_prompt=prompt, + lyrics=lyrics, + instrumental=instrumental, + negative_prompt="", + target_seconds=duration, + fade_in_seconds=0.5, + fade_out_seconds=0.5, + seed=seed, + out_dir=out_dir, + basename=title[:200], + steps=steps, + guidance_scale=guidance_scale, + bpm=bpm, + src_audio_path=src_audio_path, + task=task, + audio2audio_enable=audio2audio_enable, + ref_audio_strength=ref_audio_strength, + repaint_start=repaint_start, + repaint_end=repaint_end, + vocal_gain_db=0.0, + instrumental_gain_db=0.0, + lora_name_or_path=lora_name_or_path or None, + lora_weight=lora_weight, + cancel_check=cancel_check, + ) + + wav_path = summary.get("wav_path") + if isinstance(wav_path, Path): + path = wav_path + else: + path = Path(str(wav_path)) + filename = path.name + audio_url = f"/audio/{filename}" + actual_seconds = float(summary.get("actual_seconds") or duration) + + with _jobs_lock: + job = _jobs.get(job_id) + if job: + job["status"] = "succeeded" + job["result"] = { + "audioUrls": [audio_url], + "duration": int(actual_seconds), + "bpm": bpm, + "keyScale": params.get("keyScale"), + "timeSignature": params.get("timeSignature"), + "status": "succeeded", + } + except GenerationCancelled: + logging.info("Generation job %s cancelled by user", job_id) + with _jobs_lock: + job = _jobs.get(job_id) + if job: + job["status"] = "cancelled" + job["error"] = "Cancelled by user" + except Exception as e: + logging.exception("Generation job %s failed", job_id) + with _jobs_lock: + job = _jobs.get(job_id) + if job: + job["status"] = "failed" + job["error"] = str(e) + finally: + _generation_busy = False + with _jobs_lock: + _current_job_id = None + _cancel_requested.discard(job_id) + # Start next queued job (skips cancelled: they are no longer "queued") + with _jobs_lock: + for jid in _job_order: + j = _jobs.get(jid) + if j and j.get("status") == "queued": + threading.Thread(target=_run_generation, args=(jid,), daemon=True).start() + break + + +@bp.route("/lora_adapters", methods=["GET"]) +def get_lora_adapters(): + """GET /api/generate/lora_adapters — list LoRA adapters (e.g. from Training or custom_lora).""" + try: + adapters = list_lora_adapters() + return jsonify({"adapters": adapters}) + except Exception as e: + logging.exception("[API generate] list_lora_adapters failed: %s", e) + return jsonify({"adapters": []}) + + +@bp.route("", methods=["POST"], strict_slashes=False) +@bp.route("/", methods=["POST"], strict_slashes=False) +def create_job(): + """POST /api/generate or /api/generate/ — enqueue generation job. Returns jobId, status, queuePosition.""" + global _generation_busy + try: + logging.info("[API generate] POST /api/generate received") + raw = request.get_json(silent=True) + # Ensure we always have a dict (get_json can return list or None; UI sends object) + data = raw if isinstance(raw, dict) else {} + logging.info("[API generate] Request body keys: %s", list(data.keys()) if data else []) + + if not data.get("customMode") and not data.get("songDescription"): + return jsonify({"error": "Song description required for simple mode"}), 400 + # Custom mode: require at least one of style, lyrics, reference audio, or source audio + if data.get("customMode"): + style = (data.get("style") or "").strip() + lyrics = (data.get("lyrics") or "").strip() + ref_audio = (data.get("referenceAudioUrl") or data.get("reference_audio_path") or "").strip() + src_audio = (data.get("sourceAudioUrl") or data.get("source_audio_path") or "").strip() + if not style and not lyrics and not ref_audio and not src_audio: + return jsonify({"error": "Style, lyrics, or reference/source audio required for custom mode"}), 400 + + job_id = str(uuid.uuid4()) + # Store a copy so we don't keep a reference to the request body + try: + params_copy = dict(data) + except (TypeError, ValueError): + params_copy = {} + with _jobs_lock: + _jobs[job_id] = { + "status": "queued", + "params": params_copy, + "result": None, + "error": None, + "startTime": time.time(), + "queuePosition": len(_job_order) + 1, + } + _job_order.append(job_id) + pos = _jobs[job_id]["queuePosition"] + + if not _generation_busy: + _generation_busy = True + threading.Thread(target=_run_generation, args=(job_id,), daemon=True).start() + + logging.info("[API generate] Job %s queued at position %s", job_id, pos) + return jsonify({ + "jobId": job_id, + "status": "queued", + "queuePosition": pos, + }) + except Exception as e: + logging.exception("[API generate] create_job failed: %s", e) + raise + + +@bp.route("/status/", methods=["GET"]) +def get_status(job_id: str): + """GET /api/generate/status/:jobId — return job status and result when done.""" + with _jobs_lock: + job = _jobs.get(job_id) + if not job: + return jsonify({"error": "Job not found"}), 404 + status = job.get("status", "unknown") + out = { + "jobId": job_id, + "status": status, + "queuePosition": job.get("queuePosition"), + "etaSeconds": 180, + "result": job.get("result"), + "error": job.get("error"), + } + return jsonify(out) + + +@bp.route("/cancel/", methods=["POST"]) +def cancel_job(job_id: str): + """POST /api/generate/cancel/:jobId — cancel a queued or running generation job.""" + with _jobs_lock: + job = _jobs.get(job_id) + if not job: + return jsonify({"error": "Job not found"}), 404 + status = job.get("status", "unknown") + if status == "queued": + job["status"] = "cancelled" + job["error"] = "Cancelled by user" + return jsonify({"cancelled": True, "jobId": job_id, "message": "Job removed from queue."}) + if status == "running": + _cancel_requested.add(job_id) + return jsonify({"cancelled": True, "jobId": job_id, "message": "Cancel requested; generation will stop after the current step."}) + # already succeeded, failed, or cancelled + return jsonify({"cancelled": False, "jobId": job_id, "message": f"Job already {status}."}) + + +def _reference_tracks_meta_path() -> Path: + """Path to reference_tracks.json (shared with api.reference_tracks).""" + return get_user_data_dir() / "reference_tracks.json" + + +def _append_to_reference_library(ref_id: str, filename: str, audio_url: str, file_path: Path) -> None: + """Add an entry to reference_tracks.json so the file appears in 'From library' and in the main player.""" + meta_path = _reference_tracks_meta_path() + records = [] + if meta_path.is_file(): + try: + with meta_path.open("r", encoding="utf-8") as f: + data = json.load(f) + records = data if isinstance(data, list) else [] + except Exception: + pass + records.append({ + "id": ref_id, + "filename": filename, + "storage_key": filename, + "audio_url": audio_url, + "duration": None, + "file_size_bytes": file_path.stat().st_size if file_path.is_file() else None, + "tags": ["uploaded"], + }) + meta_path.parent.mkdir(parents=True, exist_ok=True) + with meta_path.open("w", encoding="utf-8") as f: + json.dump(records, f, indent=2) + + +@bp.route("/upload-audio", methods=["POST"]) +def upload_audio(): + """POST /api/generate/upload-audio — multipart file; save to references dir and add to library.""" + if "audio" not in request.files: + return jsonify({"error": "Audio file is required"}), 400 + f = request.files["audio"] + if not f.filename: + return jsonify({"error": "No filename"}), 400 + ext = Path(f.filename).suffix.lower() or ".audio" + ref_id = str(uuid.uuid4()) + name = f"{ref_id}{ext}" + path = _refs_dir() / name + f.save(str(path)) + url = f"/audio/refs/{name}" + _append_to_reference_library(ref_id, name, url, path) + return jsonify({"url": url, "key": name}) + + +@bp.route("/audio", methods=["GET"]) +def get_audio(): + """GET /api/generate/audio?path=... — serve file from output or references.""" + path_arg = request.args.get("path") + if not path_arg: + return jsonify({"error": "Path required"}), 400 + path_arg = path_arg.strip() + if ".." in path_arg or path_arg.startswith("/"): + path_arg = path_arg.lstrip("/") + if path_arg.startswith("refs/"): + local = _refs_dir() / path_arg.replace("refs/", "", 1) + else: + local = Path(get_output_dir()) / path_arg + if not local.is_file(): + return jsonify({"error": "File not found"}), 404 + return send_file(local, as_attachment=False, download_name=local.name) + + +@bp.route("/history", methods=["GET"]) +def get_history(): + """GET /api/generate/history — last 50 jobs.""" + with _jobs_lock: + order = _job_order[-50:] + order.reverse() + jobs = [{"id": jid, **_jobs.get(jid, {})} for jid in order if jid in _jobs] + return jsonify({"jobs": jobs}) + + +@bp.route("/endpoints", methods=["GET"]) +def get_endpoints(): + """GET /api/generate/endpoints.""" + return jsonify({"endpoints": {"provider": "acestep-local", "endpoint": "local"}}) + + +@bp.route("/health", methods=["GET"]) +def get_health(): + """GET /api/generate/health.""" + return jsonify({"healthy": True}) + + +@bp.route("/debug/", methods=["GET"]) +def get_debug(task_id: str): + """GET /api/generate/debug/:taskId — raw job info.""" + with _jobs_lock: + job = _jobs.get(task_id) + if not job: + return jsonify({"error": "Job not found"}), 404 + return jsonify({"rawResponse": job}) + + +@bp.route("/format", methods=["POST"]) +def format_input(): + """POST /api/generate/format — stub; return same payload.""" + data = request.get_json(silent=True) or {} + return jsonify({ + "success": True, + "caption": data.get("caption"), + "lyrics": data.get("lyrics"), + "bpm": data.get("bpm"), + "duration": data.get("duration"), + "key_scale": data.get("keyScale"), + "time_signature": data.get("timeSignature"), + }) diff --git a/api/playlists.py b/api/playlists.py new file mode 100644 index 0000000..455a1ae --- /dev/null +++ b/api/playlists.py @@ -0,0 +1,133 @@ +""" +Playlists API for new UI. Stored in get_user_data_dir() / playlists.json. +No auth. Contract matches Express. +""" + +import json +import uuid +from pathlib import Path +from flask import Blueprint, jsonify, request + +from cdmf_paths import get_user_data_dir + +bp = Blueprint("api_playlists", __name__) + + +def _playlists_path() -> Path: + return get_user_data_dir() / "playlists.json" + + +def _load_playlists() -> list: + p = _playlists_path() + if not p.is_file(): + return [] + try: + with p.open("r", encoding="utf-8") as f: + data = json.load(f) + return data if isinstance(data, list) else [] + except Exception: + return [] + + +def _save_playlists(playlists: list) -> None: + _playlists_path().parent.mkdir(parents=True, exist_ok=True) + with _playlists_path().open("w", encoding="utf-8") as f: + json.dump(playlists, f, indent=2) + + +@bp.route("", methods=["GET"], strict_slashes=False) +@bp.route("/", methods=["GET"], strict_slashes=False) +def list_playlists(): + return jsonify({"playlists": _load_playlists()}) + + +@bp.route("", methods=["POST"], strict_slashes=False) +@bp.route("/", methods=["POST"], strict_slashes=False) +def create_playlist(): + data = request.get_json(silent=True) or {} + name = (data.get("name") or "").strip() or "Untitled" + description = (data.get("description") or "").strip() + is_public = data.get("isPublic", True) + playlists = _load_playlists() + pid = str(uuid.uuid4()) + playlists.append({ + "id": pid, + "name": name, + "description": description, + "is_public": is_public, + "song_ids": [], + }) + _save_playlists(playlists) + return jsonify({"playlist": playlists[-1]}) + + +@bp.route("/public/featured", methods=["GET"]) +def list_featured(): + return jsonify({"playlists": []}) + + +@bp.route("/", methods=["GET"]) +def get_playlist(playlist_id: str): + playlists = _load_playlists() + for p in playlists: + if p.get("id") == playlist_id: + return jsonify({"playlist": p, "songs": []}) + return jsonify({"error": "Playlist not found"}), 404 + + +@bp.route("//songs", methods=["POST"]) +def add_song_to_playlist(playlist_id: str): + data = request.get_json(silent=True) or {} + song_id = data.get("songId") + if not song_id: + return jsonify({"error": "songId required"}), 400 + playlists = _load_playlists() + for p in playlists: + if p.get("id") == playlist_id: + ids = p.get("song_ids") or [] + if song_id not in ids: + ids.append(song_id) + p["song_ids"] = ids + _save_playlists(playlists) + return jsonify({"success": True}) + return jsonify({"error": "Playlist not found"}), 404 + + +@bp.route("//songs/", methods=["DELETE"]) +def remove_song_from_playlist(playlist_id: str, song_id: str): + playlists = _load_playlists() + for p in playlists: + if p.get("id") == playlist_id: + ids = p.get("song_ids") or [] + if song_id in ids: + ids.remove(song_id) + p["song_ids"] = ids + _save_playlists(playlists) + return jsonify({"success": True}) + return jsonify({"error": "Playlist not found"}), 404 + + +@bp.route("/", methods=["PATCH"]) +def update_playlist(playlist_id: str): + data = request.get_json(silent=True) or {} + playlists = _load_playlists() + for p in playlists: + if p.get("id") == playlist_id: + if "name" in data: + p["name"] = str(data["name"])[: 200] + if "description" in data: + p["description"] = str(data["description"])[: 2000] + _save_playlists(playlists) + return jsonify({"playlist": p}) + return jsonify({"error": "Playlist not found"}), 404 + + +@bp.route("/", methods=["DELETE"]) +def delete_playlist(playlist_id: str): + playlists = _load_playlists() + for i, p in enumerate(playlists): + if p.get("id") == playlist_id: + playlists.pop(i) + _save_playlists(playlists) + return jsonify({"success": True}) + return jsonify({"error": "Playlist not found"}), 404 diff --git a/api/preferences.py b/api/preferences.py new file mode 100644 index 0000000..c17bca7 --- /dev/null +++ b/api/preferences.py @@ -0,0 +1,52 @@ +""" +Preferences API for new UI. Load/save app-wide settings from aceforge_config.json. +GET /api/preferences — return full config (output_dir, models_folder, ui_zoom, module settings). +PATCH /api/preferences — merge partial config and save. +No auth (local-only). +""" + +import os +from pathlib import Path + +from flask import Blueprint, jsonify, request + +from cdmf_paths import load_config, save_config + +bp = Blueprint("api_preferences", __name__) + + +def _deep_merge(base: dict, update: dict) -> dict: + """Merge update into base recursively. base is mutated and returned.""" + for k, v in update.items(): + if k in base and isinstance(base[k], dict) and isinstance(v, dict): + _deep_merge(base[k], v) + else: + base[k] = v + return base + + +@bp.route("", methods=["GET"], strict_slashes=False) +@bp.route("/", methods=["GET"], strict_slashes=False) +def get_preferences(): + """GET /api/preferences — return current app preferences (global + per-module).""" + config = load_config() + return jsonify(config) + + +@bp.route("", methods=["PATCH"], strict_slashes=False) +@bp.route("/", methods=["PATCH"], strict_slashes=False) +def update_preferences(): + """PATCH /api/preferences — merge partial preferences and save.""" + data = request.get_json(silent=True) + if not isinstance(data, dict): + return jsonify({"error": "JSON object required"}), 400 + config = load_config() + _deep_merge(config, data) + save_config(config) + # So ACE-Step and HuggingFace use the new models folder immediately + if "models_folder" in data and data["models_folder"]: + try: + os.environ["HF_HOME"] = str(Path(data["models_folder"]).resolve()) + except Exception: + pass + return jsonify(config) diff --git a/api/reference_tracks.py b/api/reference_tracks.py new file mode 100644 index 0000000..3ea1b38 --- /dev/null +++ b/api/reference_tracks.py @@ -0,0 +1,107 @@ +""" +Reference tracks API for new UI. Uploads in get_user_data_dir() / references/; +metadata in reference_tracks.json. No auth. +""" + +import json +import uuid +from pathlib import Path +from flask import Blueprint, jsonify, request, send_from_directory + +from cdmf_paths import get_user_data_dir + +bp = Blueprint("api_reference_tracks", __name__) + + +def _refs_dir() -> Path: + d = get_user_data_dir() / "references" + d.mkdir(parents=True, exist_ok=True) + return d + + +def _meta_path() -> Path: + return get_user_data_dir() / "reference_tracks.json" + + +def _load_meta() -> list: + p = _meta_path() + if not p.is_file(): + return [] + try: + with p.open("r", encoding="utf-8") as f: + data = json.load(f) + return data if isinstance(data, list) else [] + except Exception: + return [] + + +def _save_meta(records: list) -> None: + _meta_path().parent.mkdir(parents=True, exist_ok=True) + with _meta_path().open("w", encoding="utf-8") as f: + json.dump(records, f, indent=2) + + +@bp.route("/", methods=["GET"]) +@bp.route("", methods=["GET"], strict_slashes=False) +def list_refs(): + """GET /api/reference-tracks — return { tracks: [...] } for UI (CreatePanel expects data.tracks).""" + return jsonify({"tracks": _load_meta()}) + + +@bp.route("/", methods=["POST"]) +@bp.route("", methods=["POST"], strict_slashes=False) +def upload_ref(): + if "audio" not in request.files: + return jsonify({"error": "No audio file"}), 400 + f = request.files["audio"] + if not f.filename: + return jsonify({"error": "No filename"}), 400 + ext = Path(f.filename).suffix.lower() or ".audio" + ref_id = str(uuid.uuid4()) + safe_name = f"{ref_id}{ext}" + path = _refs_dir() / safe_name + f.save(str(path)) + url = f"/audio/refs/{safe_name}" + track = { + "id": ref_id, + "filename": safe_name, + "storage_key": safe_name, + "audio_url": url, + "duration": None, + "file_size_bytes": path.stat().st_size if path.is_file() else None, + "tags": ["uploaded"], + } + records = _load_meta() + records.append(track) + _save_meta(records) + # UI (CreatePanel) expects data.track with at least audio_url + return jsonify({"track": track, "url": url, "key": safe_name}) + + +@bp.route("/", methods=["PATCH"]) +def update_ref(ref_id: str): + data = request.get_json(silent=True) or {} + records = _load_meta() + for r in records: + if r.get("id") == ref_id: + if "tags" in data: + r["tags"] = data["tags"] if isinstance(data["tags"], list) else [] + _save_meta(records) + return jsonify(r) + return jsonify({"error": "Not found"}), 404 + + +@bp.route("/", methods=["DELETE"]) +def delete_ref(ref_id: str): + records = _load_meta() + for i, r in enumerate(records): + if r.get("id") == ref_id: + safe_name = r.get("filename") or r.get("storage_key") + if safe_name: + path = _refs_dir() / safe_name + if path.is_file(): + path.unlink() + records.pop(i) + _save_meta(records) + return jsonify({"success": True}) + return jsonify({"error": "Not found"}), 404 diff --git a/api/search.py b/api/search.py new file mode 100644 index 0000000..5a793cb --- /dev/null +++ b/api/search.py @@ -0,0 +1,59 @@ +""" +Search API stub for new UI. Local-only: searches tracks by title/style; +returns { songs, creators, playlists } to match Express contract. +""" + +from flask import Blueprint, jsonify, request + +bp = Blueprint("api_search", __name__) + + +@bp.route("", methods=["GET"], strict_slashes=False) +@bp.route("/", methods=["GET"], strict_slashes=False) +def search(): + """GET /api/search?q=...&type=songs|creators|playlists|all — search local tracks.""" + q = (request.args.get("q") or "").strip() + type_ = request.args.get("type", "all") + if not q: + return jsonify({"songs": [], "creators": [], "playlists": []}) + + # Defer to songs list and filter by title/style (simple substring) + try: + import cdmf_tracks + from cdmf_paths import DEFAULT_OUT_DIR + from pathlib import Path + tracks = cdmf_tracks.list_music_files() + meta = cdmf_tracks.load_track_meta() + q_lower = q.lower() + out = [] + for name in tracks: + info = meta.get(name, {}) + title = (info.get("title") or name) if isinstance(info, dict) else name + style = (info.get("style") or "") if isinstance(info, dict) else "" + if q_lower in (title or "").lower() or q_lower in (style or "").lower() or q_lower in name.lower(): + stem = Path(name).stem if name else name + out.append({ + "id": name, + "title": title or stem, + "style": style or stem, + "audio_url": f"/audio/{name}", + "creator": "Local", + "user_id": "local", + }) + songs = out + except Exception: + songs = [] + + creators = [] if type_ in ("all", "creators") else [] + playlists = [] if type_ in ("all", "playlists") else [] + if type_ == "songs": + creators = [] + playlists = [] + elif type_ == "creators": + songs = [] + playlists = [] + elif type_ == "playlists": + songs = [] + creators = [] + + return jsonify({"songs": songs, "creators": creators, "playlists": playlists}) diff --git a/api/songs.py b/api/songs.py new file mode 100644 index 0000000..414f20c --- /dev/null +++ b/api/songs.py @@ -0,0 +1,411 @@ +""" +Songs API for new UI. Maps AceForge tracks (configured output dir + TRACK_META_PATH) and +uploaded reference tracks to the Express song contract. No auth. +""" + +import json +import time +from pathlib import Path +from flask import Blueprint, jsonify, request, send_from_directory + +import cdmf_tracks +from cdmf_paths import get_output_dir, TRACK_META_PATH, get_user_data_dir + +bp = Blueprint("api_songs", __name__) + +# Prefix for reference-track ids so they don't clash with generated track filenames +REF_ID_PREFIX = "ref:" + + +def _refs_dir() -> Path: + return get_user_data_dir() / "references" + + +def _track_meta() -> dict: + return cdmf_tracks.load_track_meta() + + +def _save_track_meta(meta: dict) -> None: + cdmf_tracks.save_track_meta(meta) + + +def _music_dir() -> Path: + return Path(get_output_dir()) + + +def _filename_to_id(name: str) -> str: + """Use filename as song id for simplicity (stable, no extra store).""" + return name + + +def _id_to_filename(song_id: str) -> str: + """Id is filename for our implementation.""" + return song_id + + +def _song_from_filename(name: str, meta: dict) -> dict: + """Build one song dict matching Express shape for the UI.""" + music_dir = _music_dir() + path = music_dir / name + info = meta.get(name, {}) + seconds = float(info.get("seconds") or 0.0) + if seconds <= 0 and path.is_file(): + try: + seconds = cdmf_tracks.get_audio_duration(path) + except Exception: + pass + stem = path.stem if path.suffix else name + # Audio URL: frontend expects /audio/... for playback + audio_url = f"/audio/{name}" + return { + "id": _filename_to_id(name), + "title": stem, + "lyrics": info.get("lyrics") or "", + "style": info.get("style") or stem, + "caption": info.get("caption") or stem, + "cover_url": info.get("cover_url"), + "audio_url": audio_url, + "duration": int(seconds) if seconds else None, + "bpm": info.get("bpm"), + "key_scale": info.get("key_scale"), + "time_signature": info.get("time_signature"), + "tags": info.get("tags") or [], + "is_public": True, + "like_count": 0, + "view_count": info.get("view_count") or 0, + "user_id": "local", + "created_at": info.get("created") or (path.stat().st_mtime if path.is_file() else None), + "creator": "Local", + } + + +def _ref_song_created_at(filename: str) -> float: + """Created_at for a ref track (file mtime or now) so UI sort works.""" + refs = _refs_dir() + if filename and (refs / filename).is_file(): + return (refs / filename).stat().st_mtime + return time.time() + + +def _load_reference_tracks_as_songs() -> list: + """Load reference_tracks.json and return song-shaped dicts for the library/player.""" + meta_path = get_user_data_dir() / "reference_tracks.json" + records = [] + if meta_path.is_file(): + try: + with meta_path.open("r", encoding="utf-8") as f: + data = json.load(f) + records = data if isinstance(data, list) else [] + except Exception: + pass + # Also scan references/ so uploads appear even if JSON is missing or out of sync + refs = _refs_dir() + seen_ids = {r.get("id") for r in records if r.get("id")} + if refs.is_dir(): + for f in refs.iterdir(): + if f.is_file() and f.suffix.lower() in (".wav", ".mp3", ".m4a", ".flac", ".ogg", ".webm"): + name = f.name + ref_id = f.stem + if ref_id not in seen_ids: + seen_ids.add(ref_id) + records.append({ + "id": ref_id, + "filename": name, + "storage_key": name, + "audio_url": f"/audio/refs/{name}", + "duration": None, + "file_size_bytes": f.stat().st_size, + "tags": ["uploaded"], + }) + out = [] + for r in records: + ref_id = r.get("id") or "" + filename = r.get("filename") or r.get("storage_key") or "" + stem = Path(filename).stem if filename else ref_id or "Reference" + audio_url = (r.get("audio_url") or "").strip() + if not audio_url: + continue + tags = list(r.get("tags") or []) + if "uploaded" not in tags: + tags.append("uploaded") + created_at = _ref_song_created_at(filename) + out.append({ + "id": REF_ID_PREFIX + ref_id, + "title": stem, + "lyrics": "", + "style": "Reference", + "caption": stem, + "cover_url": None, + "audio_url": audio_url, + "duration": r.get("duration"), + "bpm": None, + "key_scale": None, + "time_signature": None, + "tags": tags, + "is_public": True, + "like_count": 0, + "view_count": 0, + "user_id": "local", + "created_at": created_at, + "creator": "Local", + }) + return out + + +@bp.route("", methods=["GET"], strict_slashes=False) +@bp.route("/", methods=["GET"], strict_slashes=False) +def list_songs(): + """GET /api/songs — generated tracks + uploaded reference tracks (no auth).""" + tracks = cdmf_tracks.list_music_files() + meta = _track_meta() + songs = [_song_from_filename(name, meta) for name in tracks] + songs.extend(_load_reference_tracks_as_songs()) + return jsonify({"songs": songs}) + + +@bp.route("/public", methods=["GET"]) +def list_public(): + """GET /api/songs/public — same as list (all local).""" + return list_songs() + + +@bp.route("/public/featured", methods=["GET"]) +def list_featured(): + """GET /api/songs/public/featured — same as list, limited.""" + tracks = cdmf_tracks.list_music_files() + meta = _track_meta() + songs = [_song_from_filename(name, meta) for name in tracks[:20]] + songs.extend(_load_reference_tracks_as_songs()) + return jsonify({"songs": songs}) + + +def _get_reference_song_by_id(ref_id: str): + """Return one song dict for ref:id or None if not found.""" + meta_path = get_user_data_dir() / "reference_tracks.json" + if not meta_path.is_file(): + return None + try: + with meta_path.open("r", encoding="utf-8") as f: + data = json.load(f) + records = data if isinstance(data, list) else [] + except Exception: + return None + for r in records: + if (r.get("id") or "") == ref_id: + filename = r.get("filename") or r.get("storage_key") or "" + stem = Path(filename).stem if filename else ref_id or "Reference" + audio_url = (r.get("audio_url") or "").strip() + if not audio_url: + return None + tags = list(r.get("tags") or []) + if "uploaded" not in tags: + tags.append("uploaded") + filename = r.get("filename") or r.get("storage_key") or "" + created_at = _ref_song_created_at(filename) + return { + "id": REF_ID_PREFIX + ref_id, + "title": stem, + "lyrics": "", + "style": "Reference", + "caption": stem, + "cover_url": None, + "audio_url": audio_url, + "duration": r.get("duration"), + "bpm": None, + "key_scale": None, + "time_signature": None, + "tags": tags, + "is_public": True, + "like_count": 0, + "view_count": 0, + "user_id": "local", + "created_at": created_at, + "creator": "Local", + } + return None + + +@bp.route("/", methods=["GET"]) +def get_song(song_id: str): + """GET /api/songs/:id — one song by id (filename or ref:uuid).""" + if song_id.startswith(REF_ID_PREFIX): + ref_id = song_id[len(REF_ID_PREFIX) :] + song = _get_reference_song_by_id(ref_id) + if song: + return jsonify({"song": song}) + return jsonify({"error": "Song not found"}), 404 + filename = _id_to_filename(song_id) + tracks = cdmf_tracks.list_music_files() + if filename not in tracks: + return jsonify({"error": "Song not found"}), 404 + meta = _track_meta() + song = _song_from_filename(filename, meta) + return jsonify({"song": song}) + + +@bp.route("//full", methods=["GET"]) +def get_song_full(song_id: str): + """GET /api/songs/:id/full — song plus comments (stub comments).""" + r = get_song(song_id) + if isinstance(r, tuple): + return r + data = r.get_json() + data["comments"] = [] + return jsonify(data) + + +@bp.route("//audio", methods=["GET"]) +def get_song_audio(song_id: str): + """GET /api/songs/:id/audio — stream audio file.""" + filename = _id_to_filename(song_id) + music_dir = _music_dir() + path = music_dir / filename + if not path.is_file(): + return jsonify({"error": "Song not found"}), 404 + return send_from_directory(music_dir, filename, as_attachment=False) + + +@bp.route("", methods=["POST"], strict_slashes=False) +@bp.route("/", methods=["POST"], strict_slashes=False) +def create_song(): + """POST /api/songs — create song record (e.g. after generation). Called by adapter.""" + data = request.get_json(silent=True) or {} + # We don't persist to a separate DB; tracks are files. So create is no-op for listing. + # Generation adapter will write the file to configured output dir and metadata to TRACK_META_PATH. + return jsonify({"song": data}), 201 + + +@bp.route("/", methods=["PATCH"]) +def update_song(song_id: str): + """PATCH /api/songs/:id — update metadata (title, style, etc.); ref tracks are read-only.""" + if song_id.startswith(REF_ID_PREFIX): + ref_id = song_id[len(REF_ID_PREFIX) :] + song = _get_reference_song_by_id(ref_id) + if song: + return jsonify({"song": song}) # no-op for refs + return jsonify({"error": "Song not found"}), 404 + filename = _id_to_filename(song_id) + music_dir = _music_dir() + if not (music_dir / filename).is_file(): + return jsonify({"error": "Song not found"}), 404 + meta = _track_meta() + entry = meta.get(filename, {}) + data = request.get_json(silent=True) or {} + if "title" in data: + entry["title"] = str(data["title"])[: 500] + if "style" in data: + entry["style"] = str(data["style"])[: 500] + if "lyrics" in data: + entry["lyrics"] = str(data["lyrics"])[: 10000] + meta[filename] = entry + _save_track_meta(meta) + song = _song_from_filename(filename, meta) + return jsonify({"song": song}) + + +def _delete_reference_track(ref_id: str): + """Remove ref from reference_tracks.json and delete file. Returns True if found and removed.""" + meta_path = get_user_data_dir() / "reference_tracks.json" + refs_dir = get_user_data_dir() / "references" + if not meta_path.is_file(): + return False + try: + with meta_path.open("r", encoding="utf-8") as f: + data = json.load(f) + records = data if isinstance(data, list) else [] + except Exception: + return False + for i, r in enumerate(records): + if (r.get("id") or "") == ref_id: + filename = r.get("filename") or r.get("storage_key") + if filename: + path = refs_dir / filename + if path.is_file(): + try: + path.unlink() + except OSError: + pass + records.pop(i) + meta_path.parent.mkdir(parents=True, exist_ok=True) + with meta_path.open("w", encoding="utf-8") as f: + json.dump(records, f, indent=2) + return True + return False + + +@bp.route("/", methods=["DELETE"]) +def delete_song(song_id: str): + """DELETE /api/songs/:id — delete file and metadata (or reference track).""" + if song_id.startswith(REF_ID_PREFIX): + ref_id = song_id[len(REF_ID_PREFIX) :] + if _delete_reference_track(ref_id): + return jsonify({"success": True}) + return jsonify({"error": "Song not found"}), 404 + filename = _id_to_filename(song_id) + path = _music_dir() / filename + if not path.is_file(): + return jsonify({"error": "Song not found"}), 404 + try: + path.unlink() + except OSError as e: + return jsonify({"error": str(e)}), 500 + meta = _track_meta() + meta.pop(filename, None) + _save_track_meta(meta) + return jsonify({"success": True}) + + +@bp.route("//like", methods=["POST"]) +def toggle_like(song_id: str): + """POST /api/songs/:id/like — stub: toggle like in metadata.""" + filename = _id_to_filename(song_id) + meta = _track_meta() + entry = meta.get(filename, {}) + liked = not entry.get("favorite", False) + entry["favorite"] = liked + meta[filename] = entry + _save_track_meta(meta) + return jsonify({"liked": liked}) + + +@bp.route("/liked/list", methods=["GET"]) +def list_liked(): + """GET /api/songs/liked/list — songs marked favorite.""" + tracks = cdmf_tracks.list_music_files() + meta = _track_meta() + songs = [ + _song_from_filename(name, meta) + for name in tracks + if meta.get(name, {}).get("favorite") + ] + return jsonify({"songs": songs}) + + +@bp.route("//privacy", methods=["PATCH"]) +def update_privacy(song_id: str): + """PATCH /api/songs/:id/privacy — stub (all local public).""" + return jsonify({"isPublic": True}) + + +@bp.route("//play", methods=["POST"]) +def track_play(song_id: str): + """POST /api/songs/:id/play — stub.""" + return jsonify({"viewCount": 0}) + + +@bp.route("//comments", methods=["GET"]) +def get_comments(song_id: str): + """GET /api/songs/:id/comments — stub.""" + return jsonify({"comments": []}) + + +@bp.route("//comments", methods=["POST"]) +def add_comment(song_id: str): + """POST /api/songs/:id/comments — stub.""" + return jsonify({"comment": {"id": "stub", "content": ""}}), 201 + + +@bp.route("/comments/", methods=["DELETE"]) +def delete_comment(comment_id: str): + """DELETE /api/songs/comments/:commentId — stub.""" + return jsonify({"success": True}) diff --git a/api/users.py b/api/users.py new file mode 100644 index 0000000..19b85ba --- /dev/null +++ b/api/users.py @@ -0,0 +1,72 @@ +""" +Users API for new UI. No auth; stubs return fixed local user / empty lists. +Contract matches Express for compatibility. +""" + +from flask import Blueprint, jsonify + +from api.auth import LOCAL_USER + +bp = Blueprint("api_users", __name__) + + +@bp.route("/me", methods=["GET"]) +def get_me(): + return jsonify({"user": LOCAL_USER}) + + +@bp.route("/public/featured", methods=["GET"]) +def list_featured(): + return jsonify({"creators": []}) + + +@bp.route("/", methods=["GET"]) +def get_profile(username: str): + return jsonify({"user": {**LOCAL_USER, "username": username or LOCAL_USER["username"]}}) + + +@bp.route("//songs", methods=["GET"]) +def get_user_songs(username: str): + from api.songs import list_songs + return list_songs() + + +@bp.route("//playlists", methods=["GET"]) +def get_user_playlists(username: str): + from api.playlists import list_playlists + return list_playlists() + + +@bp.route("/me", methods=["PATCH"]) +def update_me(): + return jsonify({"user": LOCAL_USER}) + + +@bp.route("/me/avatar", methods=["POST"]) +def upload_avatar(): + return jsonify({"user": LOCAL_USER, "url": None}) + + +@bp.route("/me/banner", methods=["POST"]) +def upload_banner(): + return jsonify({"user": LOCAL_USER, "url": None}) + + +@bp.route("//follow", methods=["POST"]) +def follow(username: str): + return jsonify({"following": False, "followerCount": 0}) + + +@bp.route("//followers", methods=["GET"]) +def get_followers(username: str): + return jsonify({"followers": []}) + + +@bp.route("//following", methods=["GET"]) +def get_following(username: str): + return jsonify({"following": []}) + + +@bp.route("//stats", methods=["GET"]) +def get_stats(username: str): + return jsonify({"followerCount": 0, "followingCount": 0, "isFollowing": False}) diff --git a/build_local.sh b/build_local.sh index 468ec8d..0e08958 100755 --- a/build_local.sh +++ b/build_local.sh @@ -1,7 +1,14 @@ #!/bin/bash # --------------------------------------------------------------------------- # AceForge - Local Build Script -# Builds the PyInstaller app bundle for local testing +# Builds the PyInstaller app bundle for local testing. +# Includes the new React UI (ui/) when present; requires Bun (https://bun.sh). +# +# Optional env vars (safe, non-destructive caching for faster rebuilds): +# ACEFORGE_QUICK_BUILD=1 - Reuse PyInstaller cache (omit --clean, keep build/AceForge). +# Use when only code changed; full clean build if things break. +# ACEFORGE_SKIP_UI_BUILD=1 - Skip UI build; use existing ui/dist/. Use when only Python changed. +# ACEFORGE_SKIP_PIP=1 - Skip venv/pip steps. Use when deps unchanged and venv already ready. # --------------------------------------------------------------------------- set -e # Exit on error @@ -15,6 +22,32 @@ echo "" APP_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" cd "$APP_DIR" +# --------------------------------------------------------------------------- +# Build new UI (React/Vite) with Bun when ui/ exists; skip if ACEFORGE_SKIP_UI_BUILD=1. +# --------------------------------------------------------------------------- +UI_DIR="${APP_DIR}/ui" +if [ -f "$UI_DIR/package.json" ]; then + if [ -n "${ACEFORGE_SKIP_UI_BUILD}" ]; then + if [ ! -f "$UI_DIR/dist/index.html" ]; then + echo "ERROR: ACEFORGE_SKIP_UI_BUILD is set but ui/dist/index.html not found. Run without it once." + exit 1 + fi + echo "[Build] Skipping UI build (ACEFORGE_SKIP_UI_BUILD)" + else + if ! command -v bun &> /dev/null; then + echo "ERROR: Bun is required to build the new UI. Install from https://bun.sh" + exit 1 + fi + echo "[Build] Building new UI (React SPA) with Bun..." + "${APP_DIR}/scripts/build_ui.sh" + echo "[Build] New UI build OK" + fi +else + echo "ERROR: ui/package.json not found. The new UI source is required for the full app build." + exit 1 +fi +echo "" + # Check Python version PYTHON_CMD="" if command -v python3.11 &> /dev/null; then @@ -46,6 +79,10 @@ VENV_PY="${VENV_DIR}/bin/python" # Create/activate virtual environment if [ ! -f "$VENV_PY" ]; then + if [ -n "${ACEFORGE_SKIP_PIP}" ]; then + echo "ERROR: ACEFORGE_SKIP_PIP is set but venv_build not found. Run without it once." + exit 1 + fi echo "[Build] Creating virtual environment..." $PYTHON_CMD -m venv "$VENV_DIR" fi @@ -56,6 +93,7 @@ source "${VENV_DIR}/bin/activate" # Use venv Python for all installs and PyInstaller (ensures TTS and deps are in the bundle) PY="${VENV_PY}" +if [ -z "${ACEFORGE_SKIP_PIP}" ]; then # Upgrade pip echo "[Build] Upgrading pip..." "$PY" -m pip install --upgrade pip --quiet @@ -118,7 +156,14 @@ fi "$PY" -m pip install "pyinstaller>=6.0" --quiet -# Check for PyInstaller +# One last pass right before bundling, in case anything reintroduced Sudachi. +echo "[Build] Final check: removing Japanese Sudachi packages (if present)..." +"$PY" -m pip uninstall -y SudachiDict-core SudachiPy sudachidict-core sudachipy >/dev/null 2>&1 || true +else + echo "[Build] Skipping pip steps (ACEFORGE_SKIP_PIP)" +fi + +# Check for PyInstaller (always run) if ! "$PY" -m PyInstaller --version &> /dev/null; then echo "ERROR: PyInstaller not found. Please install it:" echo " $PY -m pip install pyinstaller" @@ -128,14 +173,17 @@ fi echo "[Build] PyInstaller version: $("$PY" -m PyInstaller --version)" echo "" -# One last pass right before bundling, in case anything reintroduced Sudachi. -echo "[Build] Final check: removing Japanese Sudachi packages (if present)..." -"$PY" -m pip uninstall -y SudachiDict-core SudachiPy sudachidict-core sudachipy >/dev/null 2>&1 || true - # Clean previous builds (PyInstaller outputs only). # NEVER delete build/macos/ — it contains AceForge.icns (app icon), codesign.sh, pyinstaller hooks. -echo "[Build] Cleaning previous builds..." -rm -rf dist/AceForge.app dist/CDMF build/AceForge +# NEVER delete ui/dist/ — may have been produced by the new UI build above. +# ACEFORGE_QUICK_BUILD=1: keep build/AceForge so PyInstaller can reuse cache. +if [ -n "${ACEFORGE_QUICK_BUILD}" ]; then + echo "[Build] Quick build: reusing PyInstaller cache (keeping build/AceForge)" + rm -rf dist/AceForge.app dist/CDMF +else + echo "[Build] Cleaning previous PyInstaller builds..." + rm -rf dist/AceForge.app dist/CDMF build/AceForge +fi # Safeguard: build/macos must exist for the app icon and code signing if [ ! -f "build/macos/AceForge.icns" ]; then @@ -144,10 +192,14 @@ if [ ! -f "build/macos/AceForge.icns" ]; then exit 1 fi -# Build with PyInstaller +# Build with PyInstaller (omit --clean when ACEFORGE_QUICK_BUILD=1 to reuse cache) echo "[Build] Building app bundle with PyInstaller..." echo "This may take several minutes..." -"$PY" -m PyInstaller CDMF.spec --clean --noconfirm +if [ -n "${ACEFORGE_QUICK_BUILD}" ]; then + "$PY" -m PyInstaller CDMF.spec --noconfirm +else + "$PY" -m PyInstaller CDMF.spec --clean --noconfirm +fi # Check if build succeeded BUNDLED_APP="${APP_DIR}/dist/AceForge.app" @@ -224,3 +276,5 @@ echo "" echo " 3. Or run directly:" echo " \"$BUNDLED_BIN\"" echo "" +echo " ✓ New React UI is bundled; app will serve it at / when launched." +echo "" diff --git a/cdmf_generation.py b/cdmf_generation.py index 5e07049..92df944 100644 --- a/cdmf_generation.py +++ b/cdmf_generation.py @@ -19,7 +19,7 @@ import cdmf_tracks from cdmf_paths import ( APP_DIR, - DEFAULT_OUT_DIR, + get_output_dir, TRAINING_DATA_ROOT, CUSTOM_LORA_ROOT, SEED_VIBES, @@ -81,10 +81,11 @@ def create_generation_blueprint( html_template: str, ui_defaults: Dict[str, Any], generate_track_ace: Callable[..., Dict[str, Any]], + serve_index: bool = True, ) -> Blueprint: """ Create a blueprint that defines: - * "/" -> index page + * "/" -> index page (optional; set serve_index=False when new UI serves /) * "/generate" -> ACE-Step generation endpoint """ bp = Blueprint("cdmf_generation", __name__) @@ -99,8 +100,7 @@ def create_generation_blueprint( ui_defaults.get("instrumental_gain_db", 0.0) ) - @bp.route("/", methods=["GET"]) - def index(): + def _index_view(): cdmf_state.reset_progress() tracks = cdmf_tracks.list_music_files() @@ -134,9 +134,9 @@ def index(): UI_DEFAULT_VOCAL_GAIN_DB=UI_DEFAULT_VOCAL_GAIN_DB, UI_DEFAULT_INSTRUMENTAL_GAIN_DB=UI_DEFAULT_INSTRUMENTAL_GAIN_DB, seed=0, - out_dir=DEFAULT_OUT_DIR, + out_dir=get_output_dir(), basename="Candy Dreams", - default_out_dir=DEFAULT_OUT_DIR, + default_out_dir=get_output_dir(), seed_vibe="any", seed_vibes=SEED_VIBES, message=None, @@ -158,6 +158,9 @@ def index(): lora_name_or_path="", ) + if serve_index: + bp.add_url_rule("/", "index", _index_view, methods=["GET"]) + @bp.route("/generate", methods=["POST"]) def generate(): presets = cdmf_tracks.load_presets() @@ -398,7 +401,7 @@ def generate(): # Misc / shared fields seed = int(request.form.get("seed", "0")) out_dir = ( - request.form.get("out_dir", DEFAULT_OUT_DIR).strip() or DEFAULT_OUT_DIR + (request.form.get("out_dir") or "").strip() or get_output_dir() ) basename = request.form.get("basename", "").strip() if not basename: @@ -585,6 +588,10 @@ def generate(): ) entry["lora_weight"] = summary.get("lora_weight", lora_weight) entry["generator"] = "gen" + tags = list(entry.get("tags") or []) + if "generation" not in tags: + tags.append("generation") + entry["tags"] = tags # Save input file as full path when available if src_audio_path: entry["input_file"] = src_audio_path @@ -603,7 +610,7 @@ def generate(): ) current_track = None - if wav_path.parent.resolve() == Path(DEFAULT_OUT_DIR).resolve(): + if wav_path.parent.resolve() == Path(get_output_dir()).resolve(): current_track = wav_path.name with cdmf_state.PROGRESS_LOCK: @@ -657,7 +664,7 @@ def generate(): seed=summary["seed"], out_dir=str(out_dir_path), basename=basename, - default_out_dir=DEFAULT_OUT_DIR, + default_out_dir=get_output_dir(), seed_vibe=seed_vibe, seed_vibes=SEED_VIBES, instrumental=instrumental, @@ -719,9 +726,9 @@ def generate(): UI_DEFAULT_VOCAL_GAIN_DB=UI_DEFAULT_VOCAL_GAIN_DB, UI_DEFAULT_INSTRUMENTAL_GAIN_DB=UI_DEFAULT_INSTRUMENTAL_GAIN_DB, seed=request.form.get("seed", "0"), - out_dir=request.form.get("out_dir", DEFAULT_OUT_DIR), + out_dir=request.form.get("out_dir") or get_output_dir(), basename=request.form.get("basename", "Candy Dreams"), - default_out_dir=DEFAULT_OUT_DIR, + default_out_dir=get_output_dir(), seed_vibe=request.form.get("seed_vibe", "any"), seed_vibes=SEED_VIBES, instrumental=instrumental, diff --git a/cdmf_generation_job.py b/cdmf_generation_job.py new file mode 100644 index 0000000..a968547 --- /dev/null +++ b/cdmf_generation_job.py @@ -0,0 +1,6 @@ +# Shared exception for cooperative cancellation of generation jobs. +# Used by api/generate (catches) and cdmf_pipeline_ace_step (raises) to avoid circular imports. + + +class GenerationCancelled(Exception): + """Raised when a running generation is cancelled by the user via the cancel API.""" diff --git a/cdmf_midi_generation_bp.py b/cdmf_midi_generation_bp.py index dead4c9..ef2e63b 100644 --- a/cdmf_midi_generation_bp.py +++ b/cdmf_midi_generation_bp.py @@ -13,7 +13,7 @@ from werkzeug.utils import secure_filename import cdmf_tracks -from cdmf_paths import DEFAULT_OUT_DIR, APP_VERSION, get_next_available_output_path +from cdmf_paths import APP_VERSION, get_output_dir, get_next_available_output_path from cdmf_midi_generation import get_midi_generator logger = logging.getLogger(__name__) @@ -103,7 +103,7 @@ def midi_generate(): stem = output_filename # Get output directory (same as music generation) - out_dir = request.form.get("out_dir", DEFAULT_OUT_DIR) + out_dir = request.form.get("out_dir") or get_output_dir() out_dir_path = Path(out_dir) out_dir_path.mkdir(parents=True, exist_ok=True) @@ -173,6 +173,10 @@ def midi_generate(): entry["out_dir"] = str(out_dir_path) entry["original_file"] = str(temp_input_path) entry["input_file"] = str(temp_input_path) # Full path for consistency + tags = list(entry.get("tags") or []) + if "midi" not in tags: + tags.append("midi") + entry["tags"] = tags track_meta[midi_filename] = entry cdmf_tracks.save_track_meta(track_meta) diff --git a/cdmf_models.py b/cdmf_models.py index e832fe1..e4343b5 100644 --- a/cdmf_models.py +++ b/cdmf_models.py @@ -229,6 +229,87 @@ def models_stem_split_ensure(): except ImportError: pass + # Voice cloning (TTS/XTTS) model status and ensure - only if voice cloning is available + try: + from cdmf_voice_cloning import voice_clone_models_present, ensure_voice_clone_models + + def _download_voice_clone_models_worker() -> None: + """Background worker to pre-download and load TTS/XTTS model.""" + cdmf_state.reset_progress() + with cdmf_state.PROGRESS_LOCK: + cdmf_state.GENERATION_PROGRESS["stage"] = "voice_clone_model_download" + cdmf_state.GENERATION_PROGRESS["done"] = False + cdmf_state.GENERATION_PROGRESS["error"] = False + cdmf_state.GENERATION_PROGRESS["current"] = 0.0 + cdmf_state.GENERATION_PROGRESS["total"] = 1.0 + try: + def _progress(f: float) -> None: + with cdmf_state.PROGRESS_LOCK: + cdmf_state.GENERATION_PROGRESS["current"] = max(0.0, min(1.0, f)) + ensure_voice_clone_models(device_preference="auto", progress_cb=_progress) + with cdmf_state.VOICE_CLONE_LOCK: + cdmf_state.VOICE_CLONE_STATUS["state"] = "ready" + cdmf_state.VOICE_CLONE_STATUS["message"] = "XTTS voice cloning model is ready." + with cdmf_state.PROGRESS_LOCK: + cdmf_state.GENERATION_PROGRESS["current"] = 1.0 + cdmf_state.GENERATION_PROGRESS["stage"] = "done" + cdmf_state.GENERATION_PROGRESS["done"] = True + cdmf_state.GENERATION_PROGRESS["error"] = False + except Exception as exc: + with cdmf_state.VOICE_CLONE_LOCK: + cdmf_state.VOICE_CLONE_STATUS["state"] = "error" + cdmf_state.VOICE_CLONE_STATUS["message"] = f"Failed to load voice cloning model: {exc}" + with cdmf_state.PROGRESS_LOCK: + cdmf_state.GENERATION_PROGRESS["stage"] = "error" + cdmf_state.GENERATION_PROGRESS["done"] = True + cdmf_state.GENERATION_PROGRESS["error"] = True + + @bp.route("/models/voice_clone/status", methods=["GET"]) + def models_voice_clone_status(): + """Report whether the TTS/XTTS (voice cloning) model is loaded.""" + with cdmf_state.VOICE_CLONE_LOCK: + state = cdmf_state.VOICE_CLONE_STATUS["state"] + if state not in ("downloading", "ready"): + if voice_clone_models_present(): + with cdmf_state.VOICE_CLONE_LOCK: + cdmf_state.VOICE_CLONE_STATUS["state"] = "ready" + cdmf_state.VOICE_CLONE_STATUS["message"] = "XTTS voice cloning model is ready." + else: + with cdmf_state.VOICE_CLONE_LOCK: + if cdmf_state.VOICE_CLONE_STATUS["state"] == "unknown": + cdmf_state.VOICE_CLONE_STATUS["state"] = "absent" + cdmf_state.VOICE_CLONE_STATUS["message"] = ( + "Voice cloning model has not been downloaded yet." + ) + with cdmf_state.VOICE_CLONE_LOCK: + state = cdmf_state.VOICE_CLONE_STATUS["state"] + message = cdmf_state.VOICE_CLONE_STATUS["message"] + return jsonify({"ok": True, "ready": state == "ready", "state": state, "message": message}) + + @bp.route("/models/voice_clone/ensure", methods=["POST"]) + def models_voice_clone_ensure(): + """Trigger a background download/load of the TTS/XTTS model if not present.""" + with cdmf_state.VOICE_CLONE_LOCK: + state = cdmf_state.VOICE_CLONE_STATUS["state"] + if state == "ready": + return jsonify({"ok": True, "already_ready": True}) + if state == "downloading": + return jsonify({"ok": True, "already_downloading": True}) + if voice_clone_models_present(): + with cdmf_state.VOICE_CLONE_LOCK: + cdmf_state.VOICE_CLONE_STATUS["state"] = "ready" + cdmf_state.VOICE_CLONE_STATUS["message"] = "XTTS voice cloning model is ready." + return jsonify({"ok": True, "already_ready": True}) + with cdmf_state.VOICE_CLONE_LOCK: + cdmf_state.VOICE_CLONE_STATUS["state"] = "downloading" + cdmf_state.VOICE_CLONE_STATUS["message"] = ( + "Downloading XTTS voice cloning model. This may take several minutes (first use only)." + ) + threading.Thread(target=_download_voice_clone_models_worker, daemon=True).start() + return jsonify({"ok": True, "started": True}) + except ImportError: + pass + # MIDI generation (basic-pitch) model status and ensure - only if MIDI generation is available try: from midi_model_setup import basic_pitch_models_present, ensure_basic_pitch_models diff --git a/cdmf_paths.py b/cdmf_paths.py index 240027d..399d6de 100644 --- a/cdmf_paths.py +++ b/cdmf_paths.py @@ -129,6 +129,20 @@ def _get_default_output_dir() -> Path: DEFAULT_OUT_DIR = str(_get_default_output_dir()) +def get_output_dir() -> str: + """Return configured global output directory, or default. Used when client does not send out_dir.""" + config = load_config() + path = config.get("output_dir") + if path: + try: + p = Path(path).resolve() + p.mkdir(parents=True, exist_ok=True) + return str(p) + except Exception as e: + print(f"[AceForge] Invalid output_dir in config: {e}", flush=True) + return DEFAULT_OUT_DIR + + def get_next_available_output_path(out_dir: Path | str, base_stem: str, ext: str = ".wav") -> Path: """ Return a path under out_dir for the given base name and extension that does not diff --git a/cdmf_pipeline_ace_step.py b/cdmf_pipeline_ace_step.py index 52d653c..81ebfe4 100644 --- a/cdmf_pipeline_ace_step.py +++ b/cdmf_pipeline_ace_step.py @@ -54,6 +54,11 @@ def no_grad(): import json import math +try: + from cdmf_generation_job import GenerationCancelled +except ImportError: + GenerationCancelled = Exception # fallback if module not available + try: from huggingface_hub import snapshot_download except ImportError as e: @@ -921,6 +926,8 @@ def flowedit_diffusion_process( n_max=1.0, n_avg=1, scheduler_type="euler", + shift: float = 6.0, + cancel_check=None, ): do_classifier_free_guidance = True @@ -932,7 +939,7 @@ def flowedit_diffusion_process( scheduler = FlowMatchEulerDiscreteScheduler( num_train_timesteps=1000, - shift=3.0, + shift=shift, ) T_steps = infer_steps @@ -996,7 +1003,8 @@ def flowedit_diffusion_process( logger.info("flowedit start from {} to {}".format(n_min, n_max)) for i, t in tqdm(enumerate(timesteps), total=T_steps): - + if cancel_check and callable(cancel_check) and cancel_check(): + raise GenerationCancelled() if i < n_min: continue @@ -1111,25 +1119,26 @@ def add_latents_noise( noise, scheduler_type, infer_steps, + shift: float = 6.0, ): bsz = gt_latents.shape[0] if scheduler_type == "euler": scheduler = FlowMatchEulerDiscreteScheduler( num_train_timesteps=1000, - shift=3.0, + shift=shift, sigma_max=sigma_max, ) elif scheduler_type == "heun": scheduler = FlowMatchHeunDiscreteScheduler( num_train_timesteps=1000, - shift=3.0, + shift=shift, sigma_max=sigma_max, ) elif scheduler_type == "pingpong": scheduler = FlowMatchPingPongScheduler( num_train_timesteps=1000, - shift=3.0, + shift=shift, sigma_max=sigma_max ) @@ -1180,6 +1189,8 @@ def text2music_diffusion_process( audio2audio_enable=False, ref_audio_strength=0.5, ref_latents=None, + shift: float = 6.0, + cancel_check=None, ): logger.info( @@ -1212,17 +1223,17 @@ def text2music_diffusion_process( if scheduler_type == "euler": scheduler = FlowMatchEulerDiscreteScheduler( num_train_timesteps=1000, - shift=3.0, + shift=shift, ) elif scheduler_type == "heun": scheduler = FlowMatchHeunDiscreteScheduler( num_train_timesteps=1000, - shift=3.0, + shift=shift, ) elif scheduler_type == "pingpong": scheduler = FlowMatchPingPongScheduler( num_train_timesteps=1000, - shift=3.0, + shift=shift, ) frame_length = int(duration * 44100 / 512 / 8) @@ -1400,6 +1411,7 @@ def text2music_diffusion_process( noise=target_latents, scheduler_type=scheduler_type, infer_steps=infer_steps, + shift=shift, ) attention_mask = torch.ones(bsz, frame_length, device=self.device, dtype=self.dtype) @@ -1523,7 +1535,8 @@ def hook(module, input, output): return sample for i, t in tqdm(enumerate(timesteps), total=num_inference_steps): - + if cancel_check and callable(cancel_check) and cancel_check(): + raise GenerationCancelled() if is_repaint: if i < n_min: continue @@ -1876,6 +1889,8 @@ def __call__( save_path: str = None, batch_size: int = 1, debug: bool = False, + shift: float = 6.0, + cancel_check=None, ): start_time = time.time() @@ -2029,6 +2044,8 @@ def __call__( n_max=edit_n_max, n_avg=edit_n_avg, scheduler_type=scheduler_type, + shift=shift, + cancel_check=cancel_check, ) else: target_latents = self.text2music_diffusion_process( @@ -2062,6 +2079,8 @@ def __call__( audio2audio_enable=audio2audio_enable, ref_audio_strength=ref_audio_strength, ref_latents=ref_latents, + shift=shift, + cancel_check=cancel_check, ) end_time = time.time() diff --git a/cdmf_state.py b/cdmf_state.py index 34f381d..680a062 100644 --- a/cdmf_state.py +++ b/cdmf_state.py @@ -70,6 +70,16 @@ "message": "", } +# --------------------------------------------------------------------------- +# Voice cloning (TTS/XTTS) model availability +# --------------------------------------------------------------------------- + +VOICE_CLONE_LOCK = threading.Lock() +VOICE_CLONE_STATUS: Dict[str, Any] = { + "state": "unknown", + "message": "", +} + # --------------------------------------------------------------------------- # Training state (ACE-Step LoRA) # --------------------------------------------------------------------------- diff --git a/cdmf_stem_splitting.py b/cdmf_stem_splitting.py index dbe6c3c..518d9f5 100644 --- a/cdmf_stem_splitting.py +++ b/cdmf_stem_splitting.py @@ -319,35 +319,27 @@ def split_audio( logger.info(f"Splitting audio: {input_path.name} -> {stem_count} stems using {model}") # Prepare arguments for demucs.separate.main - # Demucs CLI: demucs.separate.main(["-n", model, "-o", output_dir, input_file]) + # Demucs CLI supports --device (mps, cuda, cpu) for GPU acceleration on Apple Silicon / NVIDIA args = ["-n", model, "-o", str(output_path), str(input_path)] + # Explicit device so Demucs uses MPS on Apple Silicon (faster than CPU) + device_arg = self.device.type if self.device.type in ("mps", "cuda", "cpu") else "cpu" + args.extend(["--device", device_arg]) # Add two-stems option for 2-stem mode if stem_count == 2: args.append("--two-stems=vocals") - # Set device preference via environment (Demucs respects CUDA_VISIBLE_DEVICES, etc.) - # For MPS, we'll let PyTorch handle it automatically - try: import demucs.separate # Patch Demucs's tqdm to report progress self._patch_demucs_tqdm() - # Demucs will use the device based on PyTorch's default - # We can't directly pass device to demucs.separate.main, but - # we can set torch's default device before calling - try: - if self.device.type == "mps": - # MPS is already set as default via torch.device("mps") - # Demucs should pick it up automatically - pass - elif self.device.type == "cuda": - # Set CUDA device + if self.device.type == "cuda": + try: torch.cuda.set_device(self.device) - except Exception as e: - logger.warning(f"Could not set device preference: {e}") + except Exception as e: + logger.warning(f"Could not set CUDA device: {e}") # Report start _report_stem_split_progress(0.05, "stem_split_load") diff --git a/cdmf_stem_splitting_bp.py b/cdmf_stem_splitting_bp.py index f731881..f276a72 100644 --- a/cdmf_stem_splitting_bp.py +++ b/cdmf_stem_splitting_bp.py @@ -14,7 +14,7 @@ import cdmf_tracks import cdmf_state -from cdmf_paths import DEFAULT_OUT_DIR, APP_VERSION +from cdmf_paths import APP_VERSION, get_output_dir from cdmf_stem_splitting import get_stem_splitter logger = logging.getLogger(__name__) @@ -76,7 +76,7 @@ def stem_split(): export_format = "wav" # Get output directory (same as music generation) - out_dir = request.form.get("out_dir", DEFAULT_OUT_DIR) + out_dir = request.form.get("out_dir") or get_output_dir() out_dir_path = Path(out_dir) out_dir_path.mkdir(parents=True, exist_ok=True) @@ -137,50 +137,51 @@ def stem_split(): except Exception as e: logger.warning(f"Failed to clean up temp directory: {e}") - # Save track metadata for Music Player - # Each stem gets its own metadata entry - try: - from cdmf_ffmpeg import ensure_ffmpeg_in_path - ensure_ffmpeg_in_path() - - from pydub import AudioSegment - - track_meta = cdmf_tracks.load_track_meta() - - for stem_name, stem_path in stem_files.items(): - stem_filename = Path(stem_path).name + # Save track metadata for Music Player so stems appear in library + # Each stem gets its own metadata entry; never fail the request if metadata has issues + track_meta = cdmf_tracks.load_track_meta() + base_filename_form = request.form.get("base_filename", "").strip() + for stem_name, stem_path in stem_files.items(): + stem_filename = Path(stem_path).name + dur = 0.0 + try: + from cdmf_ffmpeg import ensure_ffmpeg_in_path + ensure_ffmpeg_in_path() + from pydub import AudioSegment dur = len(AudioSegment.from_file(str(stem_path))) / 1000.0 - - entry = track_meta.get(stem_filename, {}) - if "favorite" not in entry: - entry["favorite"] = False - entry["seconds"] = dur - entry["created"] = time.time() - entry["generator"] = "stem" - entry["basename"] = Path(stem_filename).stem - # original_file already saved below - entry["stem_name"] = stem_name - entry["stem_count"] = stem_count - entry["mode"] = mode or "" - entry["export_format"] = export_format - entry["device_preference"] = device_preference - entry["out_dir"] = str(out_dir_path) - entry["original_file"] = str(temp_input_path) - entry["input_file"] = str(temp_input_path) # Full path for consistency - # Save base_filename if provided - base_filename = request.form.get("base_filename", "").strip() - if base_filename: - entry["base_filename"] = base_filename - track_meta[stem_filename] = entry - + except Exception as e: + if stem_path and Path(stem_path).is_file(): + try: + dur = cdmf_tracks.get_audio_duration(Path(stem_path)) + except Exception: + pass + logger.debug("[Stem Splitting] Duration for %s: %s (fallback used)", stem_filename, e) + entry = track_meta.get(stem_filename, {}) + if "favorite" not in entry: + entry["favorite"] = False + entry["seconds"] = dur + entry["created"] = time.time() + entry["generator"] = "stem" + entry["basename"] = Path(stem_filename).stem + entry["stem_name"] = stem_name + entry["stem_count"] = stem_count + entry["mode"] = mode or "" + entry["export_format"] = export_format + entry["device_preference"] = device_preference + entry["out_dir"] = str(out_dir_path) + entry["original_file"] = str(temp_input_path) + entry["input_file"] = str(temp_input_path) + tags = list(entry.get("tags") or []) + if "stems" not in tags: + tags.append("stems") + entry["tags"] = tags + if base_filename_form: + entry["base_filename"] = base_filename_form + track_meta[stem_filename] = entry + try: cdmf_tracks.save_track_meta(track_meta) except Exception as e: - from cdmf_ffmpeg import FFMPEG_INSTALL_HINT, is_ffmpeg_not_found_error - - if is_ffmpeg_not_found_error(e): - logger.warning("[Stem Splitting] Failed to save track metadata: %s", FFMPEG_INSTALL_HINT) - else: - logger.warning("[Stem Splitting] Failed to save track metadata: %s", e) + logger.warning("[Stem Splitting] Failed to save track metadata: %s", e) # Mark progress as done with cdmf_state.PROGRESS_LOCK: diff --git a/cdmf_tracks.py b/cdmf_tracks.py index 772baa6..94bc9a2 100644 --- a/cdmf_tracks.py +++ b/cdmf_tracks.py @@ -14,7 +14,7 @@ import cdmf_state from cdmf_paths import ( - DEFAULT_OUT_DIR, + get_output_dir, PRESETS_PATH, TRACK_META_PATH, USER_PRESETS_PATH, @@ -123,8 +123,8 @@ def get_audio_duration(path: Path) -> float: def list_music_files() -> List[str]: - """Return a sorted list of .wav, .mp3, and .mid files in the default music directory.""" - music_dir = Path(DEFAULT_OUT_DIR) + """Return a sorted list of .wav, .mp3, and .mid files in the configured output directory.""" + music_dir = Path(get_output_dir()) if not music_dir.exists(): return [] names = [ @@ -185,7 +185,7 @@ def create_tracks_blueprint() -> Blueprint: @bp.route("/music/") def serve_music(filename: str): """Serve audio files from the AceForge music directory.""" - return send_from_directory(DEFAULT_OUT_DIR, filename) + return send_from_directory(get_output_dir(), filename) @bp.route("/progress", methods=["GET"]) def get_progress(): @@ -232,7 +232,7 @@ def tracks_json(): tracks = list_music_files() meta = load_track_meta() - music_dir = Path(DEFAULT_OUT_DIR) + music_dir = Path(get_output_dir()) # Prefer the last generated track, if it's in the list with cdmf_state.PROGRESS_LOCK: @@ -294,7 +294,7 @@ def tracks_meta(): if not name: return jsonify({"error": "Missing track name"}), 400 - track_path = Path(DEFAULT_OUT_DIR) / name + track_path = Path(get_output_dir()) / name if not track_path.is_file(): return jsonify({"error": "Track not found"}), 404 @@ -311,7 +311,7 @@ def tracks_meta(): if not name: return jsonify({"error": "Missing track name"}), 400 - track_path = Path(DEFAULT_OUT_DIR) / name + track_path = Path(get_output_dir()) / name if not track_path.is_file(): return jsonify({"error": "Track not found"}), 404 @@ -418,8 +418,8 @@ def rename_track(): return jsonify({"error": "New track name cannot be empty."}), 400 final_name = new_base + ".wav" - old_path = Path(DEFAULT_OUT_DIR) / (old_base + ".wav") - new_path = Path(DEFAULT_OUT_DIR) / final_name + old_path = Path(get_output_dir()) / (old_base + ".wav") + new_path = Path(get_output_dir()) / final_name if not old_path.is_file(): return jsonify({"error": "Original track not found."}), 404 @@ -457,7 +457,7 @@ def delete_track(): if not name: return jsonify({"error": "Missing track name"}), 400 - track_path = Path(DEFAULT_OUT_DIR) / name + track_path = Path(get_output_dir()) / name if not track_path.is_file(): return jsonify({"error": "Track not found"}), 404 @@ -491,7 +491,7 @@ def reveal_in_finder(): if "/" in name or "\\" in name or ".." in name: return jsonify({"ok": False, "error": "Invalid track name"}), 400 - track_path = Path(DEFAULT_OUT_DIR) / name + track_path = Path(get_output_dir()) / name if not track_path.is_file(): return jsonify({"ok": False, "error": "Track not found"}), 404 diff --git a/cdmf_trainer.py b/cdmf_trainer.py index 12a1863..ba9df11 100644 --- a/cdmf_trainer.py +++ b/cdmf_trainer.py @@ -31,6 +31,7 @@ import random import os from cdmf_pipeline_ace_step import ACEStepPipeline +from cdmf_paths import CUSTOM_LORA_ROOT matplotlib.use("Agg") # Configure CUDA backends if available @@ -1021,9 +1022,8 @@ def _save_lora_adapter(self, tag: str) -> None: run_ckpt_dir, adapter_name=self.adapter_name ) - # Stable copy under /custom_lora/ - app_dir = Path(__file__).resolve().parent - custom_root = app_dir / "custom_lora" / self.adapter_name + # Stable copy under CUSTOM_LORA_ROOT (same as list_lora_adapters; macOS = user data dir) + custom_root = CUSTOM_LORA_ROOT / self.adapter_name os.makedirs(custom_root, exist_ok=True) self.transformers.save_lora_adapter( str(custom_root), adapter_name=self.adapter_name @@ -1352,46 +1352,15 @@ def main(args): ) -if __name__ == "__main__": - args = argparse.ArgumentParser() - args.add_argument("--num_nodes", type=int, default=1) - args.add_argument("--shift", type=float, default=3.0) - args.add_argument("--learning_rate", type=float, default=1e-4) - args.add_argument("--num_workers", type=int, default=8) - - # Stop training by epochs by default; this is what we’ll expose in the UI - args.add_argument("--epochs", type=int, default=20) - - # By default, do NOT stop by max_steps (Lightning treats -1 as "no step limit") - args.add_argument("--max_steps", type=int, default=-1) - - args.add_argument("--every_n_train_steps", type=int, default=50) - args.add_argument("--dataset_path", type=str, default="./zh_lora_dataset") - args.add_argument("--exp_name", type=str, default="chinese_rap_lora") - args.add_argument("--precision", type=str, default="32") - args.add_argument("--accumulate_grad_batches", type=int, default=1) - args.add_argument("--devices", type=int, default=1) - args.add_argument("--logger_dir", type=str, default="./exps/logs/") - args.add_argument("--ckpt_path", type=str, default=None) - args.add_argument("--checkpoint_dir", type=str, default=None) - args.add_argument("--gradient_clip_val", type=float, default=0.5) - args.add_argument("--gradient_clip_algorithm", type=str, default="norm") - args.add_argument("--reload_dataloaders_every_n_epochs", type=int, default=1) - args.add_argument("--every_plot_step", type=int, default=2000) - args.add_argument("--val_check_interval", type=int, default=None) - args.add_argument("--lora_config_path", type=str, default="config/zh_rap_lora_config.json") - - # New knobs - args.add_argument("--ssl_coeff", type=float, default=1.0) - args.add_argument("--max_audio_seconds", type=float, default=60.0) - args.add_argument( - "--instrumental_only", - action="store_true", - help=( - "Treat dataset as instrumental / no vocals. " - "LoRA layers attached to lyric and speaker-specific blocks will be frozen." - ), - ) +def run_from_argv(): + """Parse sys.argv and run training. Used when the frozen app is launched with --train.""" + from cdmf_trainer_parser import _make_parser + parser = _make_parser() + args = parser.parse_args() + main(args) - args = args.parse_args() + +if __name__ == "__main__": + from cdmf_trainer_parser import _make_parser + args = _make_parser().parse_args() main(args) \ No newline at end of file diff --git a/cdmf_trainer_parser.py b/cdmf_trainer_parser.py new file mode 100644 index 0000000..25b4822 --- /dev/null +++ b/cdmf_trainer_parser.py @@ -0,0 +1,43 @@ +# Parser-only module for the LoRA trainer CLI. +# Used by aceforge_app for --train --help so we can show help without importing +# heavy deps (diffusers, pytorch_lightning, etc.) in the frozen app. +from __future__ import annotations + +import argparse + + +def _make_parser() -> argparse.ArgumentParser: + """Build the trainer ArgumentParser (used by cdmf_trainer and by aceforge_app for --train --help).""" + p = argparse.ArgumentParser() + p.add_argument("--num_nodes", type=int, default=1) + p.add_argument("--shift", type=float, default=3.0) + p.add_argument("--learning_rate", type=float, default=1e-4) + p.add_argument("--num_workers", type=int, default=8) + p.add_argument("--epochs", type=int, default=20) + p.add_argument("--max_steps", type=int, default=-1) + p.add_argument("--every_n_train_steps", type=int, default=50) + p.add_argument("--dataset_path", type=str, default="./zh_lora_dataset") + p.add_argument("--exp_name", type=str, default="chinese_rap_lora") + p.add_argument("--precision", type=str, default="32") + p.add_argument("--accumulate_grad_batches", type=int, default=1) + p.add_argument("--devices", type=int, default=1) + p.add_argument("--logger_dir", type=str, default="./exps/logs/") + p.add_argument("--ckpt_path", type=str, default=None) + p.add_argument("--checkpoint_dir", type=str, default=None) + p.add_argument("--gradient_clip_val", type=float, default=0.5) + p.add_argument("--gradient_clip_algorithm", type=str, default="norm") + p.add_argument("--reload_dataloaders_every_n_epochs", type=int, default=1) + p.add_argument("--every_plot_step", type=int, default=2000) + p.add_argument("--val_check_interval", type=int, default=None) + p.add_argument("--lora_config_path", type=str, default="config/zh_rap_lora_config.json") + p.add_argument("--ssl_coeff", type=float, default=1.0) + p.add_argument("--max_audio_seconds", type=float, default=60.0) + p.add_argument( + "--instrumental_only", + action="store_true", + help=( + "Treat dataset as instrumental / no vocals. " + "LoRA layers attached to lyric and speaker-specific blocks will be frozen." + ), + ) + return p diff --git a/cdmf_training.py b/cdmf_training.py index e9e3a70..cf81c9a 100644 --- a/cdmf_training.py +++ b/cdmf_training.py @@ -2,6 +2,7 @@ from __future__ import annotations +import sys from pathlib import Path from typing import Optional, Tuple, Dict, Any @@ -12,6 +13,7 @@ import psutil from flask import Blueprint, jsonify, request +from werkzeug.utils import secure_filename from ace_model_setup import ace_models_present from cdmf_paths import ( @@ -230,9 +232,13 @@ def _start_lora_training( f"{ds_path}: {exc}" ) - trainer_script = APP_DIR / "cdmf_trainer.py" - if not trainer_script.exists(): - return False, f"trainer.py not found at {trainer_script}" + # When frozen, we run the same executable with --train + args (no .py script). + # When not frozen, we run cdmf_trainer.py as a subprocess. + frozen = getattr(sys, "frozen", False) + if not frozen: + trainer_script = APP_DIR / "cdmf_trainer.py" + if not trainer_script.exists(): + return False, f"trainer script not found at {trainer_script}" # Training logs live under APP_DIR / ace_training / , but the # heavy ACE-Step base model weights are cached in a shared root folder. @@ -267,10 +273,10 @@ def _start_lora_training( cfg_path_str = str(cfg_path) - # Base command + # Base command: when frozen, second arg is --train (entry point); else path to trainer script cmd: list[str] = [ sys.executable, - str(trainer_script), + "--train" if frozen else str(APP_DIR / "cdmf_trainer.py"), "--dataset_path", str(hf_ds_path), "--exp_name", @@ -770,6 +776,44 @@ def train_lora(): flush=True, ) + # If the UI sent dataset_files (e.g. from Browse folder), save them under + # TRAINING_DATA_ROOT / dataset_path so _start_lora_training can use them. + uploaded = request.files.getlist("dataset_files") or request.files.getlist("files") + if uploaded and dataset_path: + ds_rel = Path(dataset_path) + if ds_rel.is_absolute() or ".." in dataset_path or dataset_path.startswith("/"): + html = ( + "
"
+                    "ERROR: dataset_path must be a relative folder name (no path traversal).\n"
+                    "
" + ) + return html + target_dir = (TRAINING_DATA_ROOT / ds_rel).resolve() + try: + training_root_real = TRAINING_DATA_ROOT.resolve() + except Exception: # noqa: BLE001 + training_root_real = TRAINING_DATA_ROOT + if not str(target_dir).startswith(str(training_root_real)): + html = ( + "
"
+                    "ERROR: dataset_path must be under training_datasets.\n"
+                    "
" + ) + return html + target_dir.mkdir(parents=True, exist_ok=True) + for f in uploaded: + if not f or not f.filename: + continue + name = secure_filename(f.filename) + if not name: + continue + dest = target_dir / name + try: + f.save(str(dest)) + except Exception as e: # noqa: BLE001 + print(f"[CDMF] Failed to save uploaded file {f.filename}: {e}", flush=True) + print(f"[CDMF] Uploaded {len([x for x in uploaded if x and x.filename])} files to {target_dir}", flush=True) + # LoRA config selection: # If the user picks a simple file name from the dropdown # (e.g. "light_full_stack.json"), resolve it relative to diff --git a/cdmf_voice_cloning.py b/cdmf_voice_cloning.py index 1b5989d..8fb3ba9 100644 --- a/cdmf_voice_cloning.py +++ b/cdmf_voice_cloning.py @@ -26,7 +26,7 @@ import torch from pathlib import Path -from typing import Optional, Dict, Any, Tuple +from typing import Optional, Dict, Any, Tuple, Callable import logging logger = logging.getLogger(__name__) @@ -284,3 +284,27 @@ def get_voice_cloner() -> VoiceCloner: if _voice_cloner is None: _voice_cloner = VoiceCloner() return _voice_cloner + + +def voice_clone_models_present() -> bool: + """Return True if the TTS/XTTS model is already loaded (initialized).""" + global _voice_cloner + return _voice_cloner is not None and getattr(_voice_cloner, "_initialized", False) + + +def ensure_voice_clone_models(device_preference: str = "auto", progress_cb: Optional[Callable[[float], None]] = None) -> None: + """ + Pre-download and load the TTS/XTTS model in the current process. + progress_cb(fraction) is called with 0.0 at start and 1.0 when done (TTS does not expose download progress). + """ + if progress_cb: + try: + progress_cb(0.0) + except Exception: + pass + get_voice_cloner()._initialize(device_preference=device_preference) + if progress_cb: + try: + progress_cb(1.0) + except Exception: + pass diff --git a/cdmf_voice_cloning_bp.py b/cdmf_voice_cloning_bp.py index 7629b8c..72fe746 100644 --- a/cdmf_voice_cloning_bp.py +++ b/cdmf_voice_cloning_bp.py @@ -13,7 +13,7 @@ from werkzeug.utils import secure_filename import cdmf_tracks -from cdmf_paths import DEFAULT_OUT_DIR, APP_VERSION, get_next_available_output_path +from cdmf_paths import APP_VERSION, get_output_dir, get_next_available_output_path from cdmf_voice_cloning import get_voice_cloner logger = logging.getLogger(__name__) @@ -83,7 +83,7 @@ def voice_clone(): output_filename += ".mp3" # Get output directory (same as music generation) - out_dir = request.form.get("out_dir", DEFAULT_OUT_DIR) + out_dir = request.form.get("out_dir") or get_output_dir() out_dir_path = Path(out_dir) out_dir_path.mkdir(parents=True, exist_ok=True) @@ -165,6 +165,10 @@ def voice_clone(): entry["enable_text_splitting"] = enable_text_splitting entry["device_preference"] = device_preference entry["out_dir"] = str(out_dir_path) + tags = list(entry.get("tags") or []) + if "voice_cloning" not in tags: + tags.append("voice_cloning") + entry["tags"] = tags track_meta[final_name] = entry cdmf_tracks.save_track_meta(track_meta) except Exception as e: diff --git a/docs/ACE-Step-INFERENCE.md b/docs/ACE-Step-INFERENCE.md new file mode 100644 index 0000000..fa91170 --- /dev/null +++ b/docs/ACE-Step-INFERENCE.md @@ -0,0 +1,1206 @@ + + +# ACE-Step Inference API Documentation + +**Language / 语言 / 言語:** [English](INFERENCE.md) | [中文](../zh/INFERENCE.md) | [日本語](../ja/INFERENCE.md) + +--- + +This document provides comprehensive documentation for the ACE-Step inference API, including parameter specifications for all supported task types. + +## Table of Contents + +- [Quick Start](#quick-start) +- [API Overview](#api-overview) +- [GenerationParams Parameters](#generationparams-parameters) +- [GenerationConfig Parameters](#generationconfig-parameters) +- [Task Types](#task-types) +- [Helper Functions](#helper-functions) +- [Complete Examples](#complete-examples) +- [Best Practices](#best-practices) + +--- + +## Quick Start + +### Basic Usage + +```python +from acestep.handler import AceStepHandler +from acestep.llm_inference import LLMHandler +from acestep.inference import GenerationParams, GenerationConfig, generate_music + +# Initialize handlers +dit_handler = AceStepHandler() +llm_handler = LLMHandler() + +# Initialize services +dit_handler.initialize_service( + project_root="/path/to/project", + config_path="acestep-v15-turbo", + device="cuda" +) + +llm_handler.initialize( + checkpoint_dir="/path/to/checkpoints", + lm_model_path="acestep-5Hz-lm-0.6B", + backend="vllm", + device="cuda" +) + +# Configure generation parameters +params = GenerationParams( + caption="upbeat electronic dance music with heavy bass", + bpm=128, + duration=30, +) + +# Configure generation settings +config = GenerationConfig( + batch_size=2, + audio_format="flac", +) + +# Generate music +result = generate_music(dit_handler, llm_handler, params, config, save_dir="/path/to/output") + +# Access results +if result.success: + for audio in result.audios: + print(f"Generated: {audio['path']}") + print(f"Key: {audio['key']}") + print(f"Seed: {audio['params']['seed']}") +else: + print(f"Error: {result.error}") +``` + +--- + +## API Overview + +### Main Functions + +#### generate_music + +```python +def generate_music( + dit_handler, + llm_handler, + params: GenerationParams, + config: GenerationConfig, + save_dir: Optional[str] = None, + progress=None, +) -> GenerationResult +``` + +Main function for generating music using the ACE-Step model. + +#### understand_music + +```python +def understand_music( + llm_handler, + audio_codes: str, + temperature: float = 0.85, + top_k: Optional[int] = None, + top_p: Optional[float] = None, + repetition_penalty: float = 1.0, + use_constrained_decoding: bool = True, + constrained_decoding_debug: bool = False, +) -> UnderstandResult +``` + +Analyze audio semantic codes and extract metadata (caption, lyrics, BPM, key, etc.). + +#### create_sample + +```python +def create_sample( + llm_handler, + query: str, + instrumental: bool = False, + vocal_language: Optional[str] = None, + temperature: float = 0.85, + top_k: Optional[int] = None, + top_p: Optional[float] = None, + repetition_penalty: float = 1.0, + use_constrained_decoding: bool = True, + constrained_decoding_debug: bool = False, +) -> CreateSampleResult +``` + +Generate a complete music sample (caption, lyrics, metadata) from a natural language description. + +#### format_sample + +```python +def format_sample( + llm_handler, + caption: str, + lyrics: str, + user_metadata: Optional[Dict[str, Any]] = None, + temperature: float = 0.85, + top_k: Optional[int] = None, + top_p: Optional[float] = None, + repetition_penalty: float = 1.0, + use_constrained_decoding: bool = True, + constrained_decoding_debug: bool = False, +) -> FormatSampleResult +``` + +Format and enhance user-provided caption and lyrics, generating structured metadata. + +### Configuration Objects + +The API uses two configuration dataclasses: + +**GenerationParams** - Contains all music generation parameters: + +```python +@dataclass +class GenerationParams: + # Task & Instruction + task_type: str = "text2music" + instruction: str = "Fill the audio semantic mask based on the given conditions:" + + # Audio Uploads + reference_audio: Optional[str] = None + src_audio: Optional[str] = None + + # LM Codes Hints + audio_codes: str = "" + + # Text Inputs + caption: str = "" + lyrics: str = "" + instrumental: bool = False + + # Metadata + vocal_language: str = "unknown" + bpm: Optional[int] = None + keyscale: str = "" + timesignature: str = "" + duration: float = -1.0 + + # Advanced Settings + inference_steps: int = 8 + seed: int = -1 + guidance_scale: float = 7.0 + use_adg: bool = False + cfg_interval_start: float = 0.0 + cfg_interval_end: float = 1.0 + shift: float = 1.0 # NEW: Timestep shift factor + infer_method: str = "ode" # NEW: Diffusion inference method + timesteps: Optional[List[float]] = None # NEW: Custom timesteps + + repainting_start: float = 0.0 + repainting_end: float = -1 + audio_cover_strength: float = 1.0 + + # 5Hz Language Model Parameters + thinking: bool = True + lm_temperature: float = 0.85 + lm_cfg_scale: float = 2.0 + lm_top_k: int = 0 + lm_top_p: float = 0.9 + lm_negative_prompt: str = "NO USER INPUT" + use_cot_metas: bool = True + use_cot_caption: bool = True + use_cot_lyrics: bool = False + use_cot_language: bool = True + use_constrained_decoding: bool = True + + # CoT Generated Values (auto-filled by LM) + cot_bpm: Optional[int] = None + cot_keyscale: str = "" + cot_timesignature: str = "" + cot_duration: Optional[float] = None + cot_vocal_language: str = "unknown" + cot_caption: str = "" + cot_lyrics: str = "" +``` + +**GenerationConfig** - Contains batch and output configuration: + +```python +@dataclass +class GenerationConfig: + batch_size: int = 2 + allow_lm_batch: bool = False + use_random_seed: bool = True + seeds: Optional[List[int]] = None + lm_batch_chunk_size: int = 8 + constrained_decoding_debug: bool = False + audio_format: str = "flac" +``` + +### Result Objects + +**GenerationResult** - Result of music generation: + +```python +@dataclass +class GenerationResult: + # Audio Outputs + audios: List[Dict[str, Any]] # List of audio dictionaries + + # Generation Information + status_message: str # Status message from generation + extra_outputs: Dict[str, Any] # Extra outputs (latents, masks, lm_metadata, time_costs) + + # Success Status + success: bool # Whether generation succeeded + error: Optional[str] # Error message if failed +``` + +**Audio Dictionary Structure:** + +Each item in `audios` list contains: + +```python +{ + "path": str, # File path to saved audio + "tensor": Tensor, # Audio tensor [channels, samples], CPU, float32 + "key": str, # Unique audio key (UUID based on params) + "sample_rate": int, # Sample rate (default: 48000) + "params": Dict, # Generation params for this audio (includes seed, audio_codes, etc.) +} +``` + +**UnderstandResult** - Result of music understanding: + +```python +@dataclass +class UnderstandResult: + # Metadata Fields + caption: str = "" + lyrics: str = "" + bpm: Optional[int] = None + duration: Optional[float] = None + keyscale: str = "" + language: str = "" + timesignature: str = "" + + # Status + status_message: str = "" + success: bool = True + error: Optional[str] = None +``` + +**CreateSampleResult** - Result of sample creation: + +```python +@dataclass +class CreateSampleResult: + # Metadata Fields + caption: str = "" + lyrics: str = "" + bpm: Optional[int] = None + duration: Optional[float] = None + keyscale: str = "" + language: str = "" + timesignature: str = "" + instrumental: bool = False + + # Status + status_message: str = "" + success: bool = True + error: Optional[str] = None +``` + +**FormatSampleResult** - Result of sample formatting: + +```python +@dataclass +class FormatSampleResult: + # Metadata Fields + caption: str = "" + lyrics: str = "" + bpm: Optional[int] = None + duration: Optional[float] = None + keyscale: str = "" + language: str = "" + timesignature: str = "" + + # Status + status_message: str = "" + success: bool = True + error: Optional[str] = None +``` + +--- + +## GenerationParams Parameters + +### Text Inputs + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `caption` | `str` | `""` | Text description of the desired music. Can be a simple prompt like "relaxing piano music" or detailed description with genre, mood, instruments, etc. Max 512 characters. | +| `lyrics` | `str` | `""` | Lyrics text for vocal music. Use `"[Instrumental]"` for instrumental tracks. Supports multiple languages. Max 4096 characters. | +| `instrumental` | `bool` | `False` | If True, generate instrumental music regardless of lyrics. | + +### Music Metadata + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `bpm` | `Optional[int]` | `None` | Beats per minute (30-300). `None` enables auto-detection via LM. | +| `keyscale` | `str` | `""` | Musical key (e.g., "C Major", "Am", "F# minor"). Empty string enables auto-detection. | +| `timesignature` | `str` | `""` | Time signature (2 for '2/4', 3 for '3/4', 4 for '4/4', 6 for '6/8'). Empty string enables auto-detection. | +| `vocal_language` | `str` | `"unknown"` | Language code for vocals (ISO 639-1). Supported: `"en"`, `"zh"`, `"ja"`, `"es"`, `"fr"`, etc. Use `"unknown"` for auto-detection. | +| `duration` | `float` | `-1.0` | Target audio length in seconds (10-600). If <= 0 or None, model chooses automatically based on lyrics length. | + +### Generation Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `inference_steps` | `int` | `8` | Number of denoising steps. Turbo model: 1-20 (recommended 8). Base model: 1-200 (recommended 32-64). Higher = better quality but slower. | +| `guidance_scale` | `float` | `7.0` | Classifier-free guidance scale (1.0-15.0). Higher values increase adherence to text prompt. Only supported for non-turbo model. Typical range: 5.0-9.0. | +| `seed` | `int` | `-1` | Random seed for reproducibility. Use `-1` for random seed, or any positive integer for fixed seed. | + +### Advanced DiT Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `use_adg` | `bool` | `False` | Use Adaptive Dual Guidance (base model only). Improves quality at the cost of speed. | +| `cfg_interval_start` | `float` | `0.0` | CFG application start ratio (0.0-1.0). Controls when to start applying classifier-free guidance. | +| `cfg_interval_end` | `float` | `1.0` | CFG application end ratio (0.0-1.0). Controls when to stop applying classifier-free guidance. | +| `shift` | `float` | `1.0` | Timestep shift factor (range 1.0-5.0, default 1.0). When != 1.0, applies `t = shift * t / (1 + (shift - 1) * t)` to timesteps. Recommended 3.0 for turbo models. | +| `infer_method` | `str` | `"ode"` | Diffusion inference method. `"ode"` (Euler) is faster and deterministic. `"sde"` (stochastic) may produce different results with variance. | +| `timesteps` | `Optional[List[float]]` | `None` | Custom timesteps as a list of floats from 1.0 to 0.0 (e.g., `[0.97, 0.76, 0.615, 0.5, 0.395, 0.28, 0.18, 0.085, 0]`). If provided, overrides `inference_steps` and `shift`. | + +### Task-Specific Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `task_type` | `str` | `"text2music"` | Generation task type. See [Task Types](#task-types) section for details. | +| `instruction` | `str` | `"Fill the audio semantic mask based on the given conditions:"` | Task-specific instruction prompt. | +| `reference_audio` | `Optional[str]` | `None` | Path to reference audio file for style transfer or continuation tasks. | +| `src_audio` | `Optional[str]` | `None` | Path to source audio file for audio-to-audio tasks (cover, repaint, etc.). | +| `audio_codes` | `str` | `""` | Pre-extracted 5Hz audio semantic codes as a string. Advanced use only. | +| `repainting_start` | `float` | `0.0` | Repainting start time in seconds (for repaint/lego tasks). | +| `repainting_end` | `float` | `-1` | Repainting end time in seconds. Use `-1` for end of audio. | +| `audio_cover_strength` | `float` | `1.0` | Strength of audio cover/codes influence (0.0-1.0). Set smaller (0.2) for style transfer tasks. | + +### 5Hz Language Model Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `thinking` | `bool` | `True` | Enable 5Hz Language Model "Chain-of-Thought" reasoning for semantic/music metadata and codes. | +| `lm_temperature` | `float` | `0.85` | LM sampling temperature (0.0-2.0). Higher = more creative/diverse, lower = more conservative. | +| `lm_cfg_scale` | `float` | `2.0` | LM classifier-free guidance scale. Higher = stronger adherence to prompt. | +| `lm_top_k` | `int` | `0` | LM top-k sampling. `0` disables top-k filtering. Typical values: 40-100. | +| `lm_top_p` | `float` | `0.9` | LM nucleus sampling (0.0-1.0). `1.0` disables nucleus sampling. Typical values: 0.9-0.95. | +| `lm_negative_prompt` | `str` | `"NO USER INPUT"` | Negative prompt for LM guidance. Helps avoid unwanted characteristics. | +| `use_cot_metas` | `bool` | `True` | Generate metadata using LM CoT reasoning (BPM, key, duration, etc.). | +| `use_cot_caption` | `bool` | `True` | Refine user caption using LM CoT reasoning. | +| `use_cot_language` | `bool` | `True` | Detect vocal language using LM CoT reasoning. | +| `use_cot_lyrics` | `bool` | `False` | (Reserved for future use) Generate/refine lyrics using LM CoT. | +| `use_constrained_decoding` | `bool` | `True` | Enable constrained decoding for structured LM output. | + +### CoT Generated Values + +These fields are automatically populated by the LM when CoT reasoning is enabled: + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `cot_bpm` | `Optional[int]` | `None` | LM-generated BPM value. | +| `cot_keyscale` | `str` | `""` | LM-generated key/scale. | +| `cot_timesignature` | `str` | `""` | LM-generated time signature. | +| `cot_duration` | `Optional[float]` | `None` | LM-generated duration. | +| `cot_vocal_language` | `str` | `"unknown"` | LM-detected vocal language. | +| `cot_caption` | `str` | `""` | LM-refined caption. | +| `cot_lyrics` | `str` | `""` | LM-generated/refined lyrics. | + +--- + +## GenerationConfig Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `batch_size` | `int` | `2` | Number of samples to generate in parallel (1-8). Higher values require more GPU memory. | +| `allow_lm_batch` | `bool` | `False` | Allow batch processing in LM. Faster when `batch_size >= 2` and `thinking=True`. | +| `use_random_seed` | `bool` | `True` | Whether to use random seed. `True` for different results each time, `False` for reproducible results. | +| `seeds` | `Optional[List[int]]` | `None` | List of seeds for batch generation. If provided, will be padded with random seeds if fewer than batch_size. Can also be single int. | +| `lm_batch_chunk_size` | `int` | `8` | Maximum batch size per LM inference chunk (GPU memory constraint). | +| `constrained_decoding_debug` | `bool` | `False` | Enable debug logging for constrained decoding. | +| `audio_format` | `str` | `"flac"` | Output audio format. Options: `"mp3"`, `"wav"`, `"flac"`. Default is FLAC for fast saving. | + +--- + +## Task Types + +ACE-Step supports 6 different generation task types, each optimized for specific use cases. + +### 1. Text2Music (Default) + +**Purpose**: Generate music from text descriptions and optional metadata. + +**Key Parameters**: +```python +params = GenerationParams( + task_type="text2music", + caption="energetic rock music with electric guitar", + lyrics="[Instrumental]", # or actual lyrics + bpm=140, + duration=30, +) +``` + +**Required**: +- `caption` or `lyrics` (at least one) + +**Optional but Recommended**: +- `bpm`: Controls tempo +- `keyscale`: Controls musical key +- `timesignature`: Controls rhythm structure +- `duration`: Controls length +- `vocal_language`: Controls vocal characteristics + +**Use Cases**: +- Generate music from text descriptions +- Create backing tracks from prompts +- Generate songs with lyrics + +--- + +### 2. Cover + +**Purpose**: Transform existing audio while maintaining structure but changing style/timbre. + +**Key Parameters**: +```python +params = GenerationParams( + task_type="cover", + src_audio="original_song.mp3", + caption="jazz piano version", + audio_cover_strength=0.8, # 0.0-1.0 +) +``` + +**Required**: +- `src_audio`: Path to source audio file +- `caption`: Description of desired style/transformation + +**Optional**: +- `audio_cover_strength`: Controls influence of original audio + - `1.0`: Strong adherence to original structure + - `0.5`: Balanced transformation + - `0.1`: Loose interpretation +- `lyrics`: New lyrics (if changing vocals) + +**Parameters that control source vs prompt/lyrics (important for Cover/Audio→Audio):** +- **caption** (Style): target style for the output; lower `audio_cover_strength` gives it more influence. +- **lyrics**: target lyrics; use `[Instrumental]` or instrumental flag to omit. +- **audio_cover_strength**: 1.0 = strong adherence to source; lower (e.g. 0.5–0.7) = more influence from caption/lyrics. +- **guidance_scale**: higher = stronger adherence to text (and to source when cover strength is high). + +**Use Cases**: +- Create covers in different styles +- Change instrumentation while keeping melody +- Genre transformation + +--- + +### 3. Repaint + +**Purpose**: Regenerate a specific time segment of audio while keeping the rest unchanged. + +**Key Parameters**: +```python +params = GenerationParams( + task_type="repaint", + src_audio="original.mp3", + repainting_start=10.0, # seconds + repainting_end=20.0, # seconds + caption="smooth transition with piano solo", +) +``` + +**Required**: +- `src_audio`: Path to source audio file +- `repainting_start`: Start time in seconds +- `repainting_end`: End time in seconds (use `-1` for end of file) +- `caption`: Description of desired content for repainted section + +**Use Cases**: +- Fix specific sections of generated music +- Add variations to parts of a song +- Create smooth transitions +- Replace problematic segments + +--- + +### 4. Lego (Base Model Only) + +**Purpose**: Generate a specific instrument track in context of existing audio. + +**Key Parameters**: +```python +params = GenerationParams( + task_type="lego", + src_audio="backing_track.mp3", + instruction="Generate the guitar track based on the audio context:", + caption="lead guitar melody with bluesy feel", + repainting_start=0.0, + repainting_end=-1, +) +``` + +**Required**: +- `src_audio`: Path to source/backing audio +- `instruction`: Must specify the track type (e.g., "Generate the {TRACK_NAME} track...") +- `caption`: Description of desired track characteristics + +**Available Tracks**: +- `"vocals"`, `"backing_vocals"`, `"drums"`, `"bass"`, `"guitar"`, `"keyboard"`, +- `"percussion"`, `"strings"`, `"synth"`, `"fx"`, `"brass"`, `"woodwinds"` + +**Use Cases**: +- Add specific instrument tracks +- Layer additional instruments over backing tracks +- Create multi-track compositions iteratively + +--- + +### 5. Extract (Base Model Only) + +**Purpose**: Extract/isolate a specific instrument track from mixed audio. + +**Key Parameters**: +```python +params = GenerationParams( + task_type="extract", + src_audio="full_mix.mp3", + instruction="Extract the vocals track from the audio:", +) +``` + +**Required**: +- `src_audio`: Path to mixed audio file +- `instruction`: Must specify track to extract + +**Available Tracks**: Same as Lego task + +**Use Cases**: +- Stem separation +- Isolate specific instruments +- Create remixes +- Analyze individual tracks + +--- + +### 6. Complete (Base Model Only) + +**Purpose**: Complete/extend partial tracks with specified instruments. + +**Key Parameters**: +```python +params = GenerationParams( + task_type="complete", + src_audio="incomplete_track.mp3", + instruction="Complete the input track with drums, bass, guitar:", + caption="rock style completion", +) +``` + +**Required**: +- `src_audio`: Path to incomplete/partial track +- `instruction`: Must specify which tracks to add +- `caption`: Description of desired style + +**Use Cases**: +- Arrange incomplete compositions +- Add backing tracks +- Auto-complete musical ideas + +--- + +## Helper Functions + +### understand_music + +Analyze audio codes to extract metadata about the music. + +```python +from acestep.inference import understand_music + +result = understand_music( + llm_handler=llm_handler, + audio_codes="<|audio_code_123|><|audio_code_456|>...", + temperature=0.85, + use_constrained_decoding=True, +) + +if result.success: + print(f"Caption: {result.caption}") + print(f"Lyrics: {result.lyrics}") + print(f"BPM: {result.bpm}") + print(f"Key: {result.keyscale}") + print(f"Duration: {result.duration}s") + print(f"Language: {result.language}") +else: + print(f"Error: {result.error}") +``` + +**Use Cases**: +- Analyze existing music +- Extract metadata from audio codes +- Reverse-engineer generation parameters + +--- + +### create_sample + +Generate a complete music sample from a natural language description. This is the "Simple Mode" / "Inspiration Mode" feature. + +```python +from acestep.inference import create_sample + +result = create_sample( + llm_handler=llm_handler, + query="a soft Bengali love song for a quiet evening", + instrumental=False, + vocal_language="bn", # Optional: constrain to Bengali + temperature=0.85, +) + +if result.success: + print(f"Caption: {result.caption}") + print(f"Lyrics: {result.lyrics}") + print(f"BPM: {result.bpm}") + print(f"Duration: {result.duration}s") + print(f"Key: {result.keyscale}") + print(f"Is Instrumental: {result.instrumental}") + + # Use with generate_music + params = GenerationParams( + caption=result.caption, + lyrics=result.lyrics, + bpm=result.bpm, + duration=result.duration, + keyscale=result.keyscale, + vocal_language=result.language, + ) +else: + print(f"Error: {result.error}") +``` + +**Parameters**: + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `query` | `str` | required | Natural language description of desired music | +| `instrumental` | `bool` | `False` | Whether to generate instrumental music | +| `vocal_language` | `Optional[str]` | `None` | Constrain lyrics to specific language (e.g., "en", "zh", "bn") | +| `temperature` | `float` | `0.85` | Sampling temperature | +| `top_k` | `Optional[int]` | `None` | Top-k sampling (None disables) | +| `top_p` | `Optional[float]` | `None` | Top-p sampling (None disables) | +| `repetition_penalty` | `float` | `1.0` | Repetition penalty | +| `use_constrained_decoding` | `bool` | `True` | Use FSM-based constrained decoding | + +--- + +### format_sample + +Format and enhance user-provided caption and lyrics, generating structured metadata. + +```python +from acestep.inference import format_sample + +result = format_sample( + llm_handler=llm_handler, + caption="Latin pop, reggaeton", + lyrics="[Verse 1]\nBailando en la noche...", + user_metadata={"bpm": 95}, # Optional: constrain specific values + temperature=0.85, +) + +if result.success: + print(f"Enhanced Caption: {result.caption}") + print(f"Formatted Lyrics: {result.lyrics}") + print(f"BPM: {result.bpm}") + print(f"Duration: {result.duration}s") + print(f"Key: {result.keyscale}") + print(f"Detected Language: {result.language}") +else: + print(f"Error: {result.error}") +``` + +**Parameters**: + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `caption` | `str` | required | User's caption/description | +| `lyrics` | `str` | required | User's lyrics with structure tags | +| `user_metadata` | `Optional[Dict]` | `None` | Constrain specific metadata values (bpm, duration, keyscale, timesignature, language) | +| `temperature` | `float` | `0.85` | Sampling temperature | +| `top_k` | `Optional[int]` | `None` | Top-k sampling (None disables) | +| `top_p` | `Optional[float]` | `None` | Top-p sampling (None disables) | +| `repetition_penalty` | `float` | `1.0` | Repetition penalty | +| `use_constrained_decoding` | `bool` | `True` | Use FSM-based constrained decoding | + +--- + +## Complete Examples + +### Example 1: Simple Text-to-Music Generation + +```python +from acestep.inference import GenerationParams, GenerationConfig, generate_music + +params = GenerationParams( + task_type="text2music", + caption="calm ambient music with soft piano and strings", + duration=60, + bpm=80, + keyscale="C Major", +) + +config = GenerationConfig( + batch_size=2, # Generate 2 variations + audio_format="flac", +) + +result = generate_music(dit_handler, llm_handler, params, config, save_dir="/output") + +if result.success: + for i, audio in enumerate(result.audios, 1): + print(f"Variation {i}: {audio['path']}") +``` + +### Example 2: Song Generation with Lyrics + +```python +params = GenerationParams( + task_type="text2music", + caption="pop ballad with emotional vocals", + lyrics="""Verse 1: +Walking down the street today +Thinking of the words you used to say +Everything feels different now +But I'll find my way somehow + +Chorus: +I'm moving on, I'm staying strong +This is where I belong +""", + vocal_language="en", + bpm=72, + duration=45, +) + +config = GenerationConfig(batch_size=1) + +result = generate_music(dit_handler, llm_handler, params, config, save_dir="/output") +``` + +### Example 3: Using Custom Timesteps + +```python +params = GenerationParams( + task_type="text2music", + caption="jazz fusion with complex harmonies", + # Custom 9-step schedule + timesteps=[0.97, 0.76, 0.615, 0.5, 0.395, 0.28, 0.18, 0.085, 0], + thinking=True, +) + +config = GenerationConfig(batch_size=1) + +result = generate_music(dit_handler, llm_handler, params, config, save_dir="/output") +``` + +### Example 4: Using Shift Parameter (Turbo Model) + +```python +params = GenerationParams( + task_type="text2music", + caption="upbeat electronic dance music", + inference_steps=8, + shift=3.0, # Recommended for turbo models + infer_method="ode", +) + +config = GenerationConfig(batch_size=2) + +result = generate_music(dit_handler, llm_handler, params, config, save_dir="/output") +``` + +### Example 5: Simple Mode with create_sample + +```python +from acestep.inference import create_sample, GenerationParams, GenerationConfig, generate_music + +# Step 1: Create sample from description +sample = create_sample( + llm_handler=llm_handler, + query="energetic K-pop dance track with catchy hooks", + vocal_language="ko", +) + +if sample.success: + # Step 2: Generate music using the sample + params = GenerationParams( + caption=sample.caption, + lyrics=sample.lyrics, + bpm=sample.bpm, + duration=sample.duration, + keyscale=sample.keyscale, + vocal_language=sample.language, + thinking=True, + ) + + config = GenerationConfig(batch_size=2) + result = generate_music(dit_handler, llm_handler, params, config, save_dir="/output") +``` + +### Example 6: Format and Enhance User Input + +```python +from acestep.inference import format_sample, GenerationParams, GenerationConfig, generate_music + +# Step 1: Format user input +formatted = format_sample( + llm_handler=llm_handler, + caption="rock ballad", + lyrics="[Verse]\nIn the darkness I find my way...", +) + +if formatted.success: + # Step 2: Generate with enhanced input + params = GenerationParams( + caption=formatted.caption, + lyrics=formatted.lyrics, + bpm=formatted.bpm, + duration=formatted.duration, + keyscale=formatted.keyscale, + thinking=True, + use_cot_metas=False, # Already formatted, skip metas CoT + ) + + config = GenerationConfig(batch_size=2) + result = generate_music(dit_handler, llm_handler, params, config, save_dir="/output") +``` + +### Example 7: Style Cover with LM Reasoning + +```python +params = GenerationParams( + task_type="cover", + src_audio="original_pop_song.mp3", + caption="orchestral symphonic arrangement", + audio_cover_strength=0.7, + thinking=True, # Enable LM for metadata + use_cot_metas=True, +) + +config = GenerationConfig(batch_size=1) + +result = generate_music(dit_handler, llm_handler, params, config, save_dir="/output") + +# Access LM-generated metadata +if result.extra_outputs.get("lm_metadata"): + lm_meta = result.extra_outputs["lm_metadata"] + print(f"LM detected BPM: {lm_meta.get('bpm')}") + print(f"LM detected Key: {lm_meta.get('keyscale')}") +``` + +### Example 8: Batch Generation with Specific Seeds + +```python +params = GenerationParams( + task_type="text2music", + caption="epic cinematic trailer music", +) + +config = GenerationConfig( + batch_size=4, # Generate 4 variations + seeds=[42, 123, 456], # Specify 3 seeds, 4th will be random + use_random_seed=False, # Use provided seeds + lm_batch_chunk_size=2, # Process 2 at a time (GPU memory) +) + +result = generate_music(dit_handler, llm_handler, params, config, save_dir="/output") + +if result.success: + print(f"Generated {len(result.audios)} variations") + for audio in result.audios: + print(f" Seed {audio['params']['seed']}: {audio['path']}") +``` + +### Example 9: High-Quality Generation (Base Model) + +```python +params = GenerationParams( + task_type="text2music", + caption="intricate jazz fusion with complex harmonies", + inference_steps=64, # High quality + guidance_scale=8.0, + use_adg=True, # Adaptive Dual Guidance + cfg_interval_start=0.0, + cfg_interval_end=1.0, + shift=3.0, # Timestep shift + seed=42, # Reproducible results +) + +config = GenerationConfig( + batch_size=1, + use_random_seed=False, + audio_format="wav", # Lossless format +) + +result = generate_music(dit_handler, llm_handler, params, config, save_dir="/output") +``` + +### Example 10: Understand Audio from Codes + +```python +from acestep.inference import understand_music + +# Analyze audio codes (e.g., from a previous generation) +result = understand_music( + llm_handler=llm_handler, + audio_codes="<|audio_code_10695|><|audio_code_54246|>...", + temperature=0.85, +) + +if result.success: + print(f"Detected Caption: {result.caption}") + print(f"Detected Lyrics: {result.lyrics}") + print(f"Detected BPM: {result.bpm}") + print(f"Detected Key: {result.keyscale}") + print(f"Detected Duration: {result.duration}s") + print(f"Detected Language: {result.language}") +``` + +--- + +## Best Practices + +### 1. Caption Writing + +**Good Captions**: +```python +# Specific and descriptive +caption="upbeat electronic dance music with heavy bass and synthesizer leads" + +# Include mood and genre +caption="melancholic indie folk with acoustic guitar and soft vocals" + +# Specify instruments +caption="jazz trio with piano, upright bass, and brush drums" +``` + +**Avoid**: +```python +# Too vague +caption="good music" + +# Contradictory +caption="fast slow music" # Conflicting tempos +``` + +### 2. Parameter Tuning + +**For Best Quality**: +- Use base model with `inference_steps=64` or higher +- Enable `use_adg=True` +- Set `guidance_scale=7.0-9.0` +- Set `shift=3.0` for better timestep distribution +- Use lossless audio format (`audio_format="wav"`) + +**For Speed**: +- Use turbo model with `inference_steps=8` +- Disable ADG (`use_adg=False`) +- Use `infer_method="ode"` (default) +- Use compressed format (`audio_format="mp3"`) or default FLAC + +**For Consistency**: +- Set `use_random_seed=False` in config +- Use fixed `seeds` list or single `seed` in params +- Keep `lm_temperature` lower (0.7-0.85) + +**For Diversity**: +- Set `use_random_seed=True` in config +- Increase `lm_temperature` (0.9-1.1) +- Use `batch_size > 1` for variations + +### 3. Duration Guidelines + +- **Instrumental**: 30-180 seconds works well +- **With Lyrics**: Auto-detection recommended (set `duration=-1` or leave default) +- **Short clips**: 10-20 seconds minimum +- **Long form**: Up to 600 seconds (10 minutes) maximum + +### 4. LM Usage + +**When to Enable LM (`thinking=True`)**: +- Need automatic metadata detection +- Want caption refinement +- Generating from minimal input +- Need diverse outputs + +**When to Disable LM (`thinking=False`)**: +- Have precise metadata already +- Need faster generation +- Want full control over parameters + +### 5. Batch Processing + +```python +# Efficient batch generation +config = GenerationConfig( + batch_size=8, # Max supported + allow_lm_batch=True, # Enable for speed (when thinking=True) + lm_batch_chunk_size=4, # Adjust based on GPU memory +) +``` + +### 6. Error Handling + +```python +result = generate_music(dit_handler, llm_handler, params, config, save_dir="/output") + +if not result.success: + print(f"Generation failed: {result.error}") + print(f"Status: {result.status_message}") +else: + # Process successful result + for audio in result.audios: + path = audio['path'] + key = audio['key'] + seed = audio['params']['seed'] + # ... process audio files +``` + +### 7. Memory Management + +For large batch sizes or long durations: +- Monitor GPU memory usage +- Reduce `batch_size` if OOM errors occur +- Reduce `lm_batch_chunk_size` for LM operations +- Consider using `offload_to_cpu=True` during initialization + +### 8. Accessing Time Costs + +```python +result = generate_music(dit_handler, llm_handler, params, config, save_dir="/output") + +if result.success: + time_costs = result.extra_outputs.get("time_costs", {}) + print(f"LM Phase 1 Time: {time_costs.get('lm_phase1_time', 0):.2f}s") + print(f"LM Phase 2 Time: {time_costs.get('lm_phase2_time', 0):.2f}s") + print(f"DiT Total Time: {time_costs.get('dit_total_time_cost', 0):.2f}s") + print(f"Pipeline Total: {time_costs.get('pipeline_total_time', 0):.2f}s") +``` + +--- + +## Troubleshooting + +### Common Issues + +**Issue**: Out of memory errors +- **Solution**: Reduce `batch_size`, `inference_steps`, or enable CPU offloading + +**Issue**: Poor quality results +- **Solution**: Increase `inference_steps`, adjust `guidance_scale`, use base model + +**Issue**: Results don't match prompt +- **Solution**: Make caption more specific, increase `guidance_scale`, enable LM refinement (`thinking=True`) + +**Issue**: Slow generation +- **Solution**: Use turbo model, reduce `inference_steps`, disable ADG + +**Issue**: LM not generating codes +- **Solution**: Verify `llm_handler` is initialized, check `thinking=True` and `use_cot_metas=True` + +**Issue**: Seeds not being respected +- **Solution**: Set `use_random_seed=False` in config and provide `seeds` list or `seed` in params + +**Issue**: Custom timesteps not working +- **Solution**: Ensure timesteps are a list of floats from 1.0 to 0.0, properly ordered + +--- + +## API Reference Summary + +### GenerationParams Fields + +See [GenerationParams Parameters](#generationparams-parameters) for complete documentation. + +### GenerationConfig Fields + +See [GenerationConfig Parameters](#generationconfig-parameters) for complete documentation. + +### GenerationResult Fields + +```python +@dataclass +class GenerationResult: + # Audio Outputs + audios: List[Dict[str, Any]] + # Each audio dict contains: + # - "path": str (file path) + # - "tensor": Tensor (audio data) + # - "key": str (unique identifier) + # - "sample_rate": int (48000) + # - "params": Dict (generation params with seed, audio_codes, etc.) + + # Generation Information + status_message: str + extra_outputs: Dict[str, Any] + # extra_outputs contains: + # - "lm_metadata": Dict (LM-generated metadata) + # - "time_costs": Dict (timing information) + # - "latents": Tensor (intermediate latents, if available) + # - "masks": Tensor (attention masks, if available) + + # Success Status + success: bool + error: Optional[str] +``` + +--- + +## Version History + +- **v1.5.2**: Current version + - Added `shift` parameter for timestep shifting + - Added `infer_method` parameter for ODE/SDE selection + - Added `timesteps` parameter for custom timestep schedules + - Added `understand_music()` function for audio analysis + - Added `create_sample()` function for simple mode generation + - Added `format_sample()` function for input enhancement + - Added `UnderstandResult`, `CreateSampleResult`, `FormatSampleResult` dataclasses + +- **v1.5.1**: Previous version + - Split `GenerationConfig` into `GenerationParams` and `GenerationConfig` + - Renamed parameters for consistency (`key_scale` → `keyscale`, `time_signature` → `timesignature`, `audio_duration` → `duration`, `use_llm_thinking` → `thinking`, `audio_code_string` → `audio_codes`) + - Added `instrumental` parameter + - Added `use_constrained_decoding` parameter + - Added CoT auto-filled fields (`cot_*`) + - Changed default `audio_format` to "flac" + - Changed default `batch_size` to 2 + - Changed default `thinking` to True + - Simplified `GenerationResult` structure with unified `audios` list + - Added unified `time_costs` in `extra_outputs` + +- **v1.5**: Initial version + - Introduced `GenerationConfig` and `GenerationResult` dataclasses + - Simplified parameter passing + - Added comprehensive documentation + +--- + +For more information, see: +- Main README: [`../../README.md`](../../README.md) +- REST API Documentation: [`API.md`](API.md) +- Gradio Demo Guide: [`GRADIO_GUIDE.md`](GRADIO_GUIDE.md) +- Project repository: [ACE-Step-1.5](https://github.com/yourusername/ACE-Step-1.5) diff --git a/docs/ACE-Step-Tutorial.md b/docs/ACE-Step-Tutorial.md new file mode 100644 index 0000000..247a39c --- /dev/null +++ b/docs/ACE-Step-Tutorial.md @@ -0,0 +1,972 @@ + + +# ACE-Step 1.5 Ultimate Guide (Must Read) + +**Language / 语言 / 言語:** [English](Tutorial.md) | [中文](../zh/Tutorial.md) | [日本語](../ja/Tutorial.md) + +--- + +Hello everyone, I'm Gong Junmin, the developer of ACE-Step. Through this tutorial, I'll guide you through the design philosophy and usage of ACE-Step 1.5. + +## Mental Models + +Before we begin, we need to establish the correct mental models to set proper expectations. + +### Human-Centered Design + +This model is not designed for **one-click generation**, but for **human-centered generation**. + +Understanding this distinction is crucial. + +### What is One-Click Generation? + +You input a prompt, click generate, listen to a few versions, pick one that sounds good, and use it. If someone else inputs the same prompt, they'll likely get similar results. + +In this mode, you and AI have a **client-vendor** relationship. You come with a clear purpose, with a vague expectation in mind, hoping AI delivers a product close to that expectation. Essentially, it's not much different from searching on Google or finding songs on Spotify—just with a bit more customization. + +AI is a service, not a creative inspirer. + +Suno, Udio, MiniMax, Mureka—these platforms are all designed with this philosophy. They can scale up models as services to ensure delivery. Your generated music is bound by their agreements; you can't run it locally, can't fine-tune for personalized exploration; if they secretly change models or terms, you can only accept it. + +### What is Human-Centered Generation? + +If we weaken the AI layer and strengthen the human layer—letting more human will, creativity, and inspiration give life to AI—this is human-centered generation. + +Unlike the strong purposefulness of one-click generation, human-centered generation has more of a **playful** nature. It's more like an interactive game where you and the model are **collaborators**. + +The workflow is like this: you throw out some inspiration seeds, get a few songs, choose interesting directions from them to continue iterating— +- Adjust prompts to regenerate +- Use **Cover** to maintain structure and adjust details +- Use **Repaint** for local modifications +- Use **Add Layer** to add or remove instrument layers + +At this point, AI is not a servant to you, but an **inspirer**. + +### What Conditions Must This Design Meet? + +For human-centered generation to truly work, the model must meet several key conditions: + +**First, it must be open-source, locally runnable, and trainable.** + +This isn't technical purism, but a matter of ownership. When you use closed-source platforms, you don't own the model, and your generated works are bound by their agreements. Version updates, term changes, service shutdowns—none of these are under your control. + +But when the model is open-source and locally runnable, everything changes: **You forever own this model, and you forever own all the creations you make with it.** No third-party agreement hassles, no platform risks, you can fine-tune, modify, and build your own creative system based on it. Your works will forever belong to you. It's like buying an instrument—you can use it anytime, anywhere, and adjust it anytime, anywhere. + +**Second, it must be fast.** + +Human time is precious, but more importantly—**slow generation breaks flow state**. + +The core of human-centered workflow is the rapid cycle of "try, listen, adjust." If each generation takes minutes, your inspiration dissipates while waiting, and the "play" experience degrades into the "wait" ordeal. + +Therefore, we specifically optimized ACE-Step for this: while ensuring quality, we made generation fast enough to support a smooth human-machine dialogue rhythm. + +### Finite Game vs Infinite Game + +One-click generation is a **finite game**—clear goals, result-oriented, ends at the finish line. To some extent, it coldly hollows out the music industry, replacing many people's jobs. + +Human-centered generation is an **infinite game**—because the fun lies in the process, and the process never ends. + +Our vision is to democratize AI music generation. Let ACE-Step become a big toy in your pocket, let music return to **Play** itself—the creative "play," not just clicking play. + +--- + +## The Elephant Rider Metaphor + +> Recommended reading: [The Complete Guide to Mastering Suno](https://www.notion.so/The-Complete-Guide-to-Mastering-Suno-Advanced-Strategies-for-Professional-Music-Generation-2d6ae744ebdf8024be42f6645f884221)—this blog tutorial can help you establish the foundational understanding of AI music. + +AI music generation is like the famous **elephant rider metaphor** in psychology. + +Consciousness rides on the subconscious, humans ride on elephants. You can give directions, but you can't make the elephant precisely and instantly execute every command. It has its own inertia, its own temperament, its own will. + +This elephant is the music generation model. + +### The Iceberg Model + +Between audio and semantics lies a hidden iceberg. + +What we can describe with language—style, instruments, timbre, emotion, scenes, progression, lyrics, vocal style—these are familiar words, the parts we can touch. But together, they're still just a tiny tip of the audio iceberg above the water. + +What's the most precise control? You input the expected audio, and the model returns it unchanged. + +But as long as you're using text descriptions, references, prompts—the model will have room to play. This isn't a bug, it's the nature of things. + +### What is the Elephant? + +This elephant is a fusion of countless elements: data distribution, model scale, algorithm design, annotation bias, evaluation bias—**it's an abstract crystallization of human music history and engineering trade-offs.** + +Any deviation in these elements will cause it to fail to accurately reflect your taste and expectations. + +Of course, we can expand data scale, improve algorithm efficiency, increase annotation precision, expand model capacity, introduce more professional evaluation systems—these are all directions we can optimize as model developers. + +But even if one day we achieve technical "perfection," there's still a fundamental problem we can't avoid: **taste.** + +### Taste and Expectations + +Taste varies from person to person. + +If a music generation model tries to please all listeners, its output will tend toward the popular average of human music history—**this will be extremely mediocre.** + +It's humans who give sound meaning, emotion, experience, life, and cultural symbolic value. It's a small group of artists who create unique tastes, then drive ordinary people to consume and follow, turning niche into mainstream popularity. These pioneering minority artists become legends. + +So when you find the model's output "not to your taste," this might not be the model's problem—**but rather your taste happens to be outside that "average."** This is a good thing. + +This means: **You need to learn to guide this elephant, not expect it to automatically understand you.** + +--- + +## Knowing the Elephant Herd: Model Architecture and Selection + +Now you understand the "elephant" metaphor. But actually— + +**This isn't one elephant, but an entire herd—elephants large and small, forming a family.** 🐘🐘🐘🐘 + +### Architecture Principles: Two Brains + +ACE-Step 1.5 uses a **hybrid architecture** with two core components working together: + +``` +User Input → [5Hz LM] → Semantic Blueprint → [DiT] → Audio + ↓ + Metadata Inference + Caption Optimization + Structure Planning +``` + +**5Hz LM (Language Model) — Planner (Optional)** + +The LM is an "omni-capable planner" responsible for understanding your intent and making plans: +- Infers music metadata (BPM, key, duration, etc.) through **Chain-of-Thought** +- Optimizes and expands your caption—understanding and supplementing your intent +- Generates **semantic codes**—implicitly containing composition melody, orchestration, and some timbre information + +The LM learns **world knowledge** from training data. It's a planner that improves usability and helps you quickly generate prototypes. + +**But the LM is not required.** + +If you're very clear about what you want, or already have a clear planning goal—you can completely skip the LM planning step by not using `thinking` mode. + +For example, in **Cover mode**, you use reference audio to constrain composition, chords, and structure, letting DiT generate directly. Here, **you replace the LM's work**—you become the planner yourself. + +Another example: in **Repaint mode**, you use reference audio as context, constraining timbre, mixing, and details, letting DiT directly adjust locally. Here, DiT is more like your creative brainstorming partner, helping with creative ideation and fixing local disharmony. + +**DiT (Diffusion Transformer) — Executor** + +DiT is the "audio craftsman," responsible for turning plans into reality: +- Receives semantic codes and conditions generated by LM +- Gradually "carves" audio from noise through the **diffusion process** +- Decides final timbre, mixing, details + +**Why this design?** + +Traditional methods let diffusion models generate audio directly from text, but text-to-audio mapping is too vague. ACE-Step introduces LM as an intermediate layer: +- LM excels at understanding semantics and planning +- DiT excels at generating high-fidelity audio +- They work together, each doing their part + +### Choosing the Planner: LM Models + +LM has four options: **No LM** (disable thinking mode), **0.6B**, **1.7B**, **4B**. + +Their training data is completely identical; the difference is purely in **knowledge capacity**: +- Larger models have richer world knowledge +- Larger models have stronger memory (e.g., remembering reference audio melodies) +- Larger models perform relatively better on long-tail styles or instruments + +| Choice | Speed | World Knowledge | Memory | Use Cases | +|--------|:-----:|:---------------:|:------:|-----------| +| No LM | ⚡⚡⚡⚡ | — | — | You do the planning (e.g., Cover mode) | +| `0.6B` | ⚡⚡⚡ | Basic | Weak | Low VRAM (< 8GB), rapid prototyping | +| `1.7B` | ⚡⚡ | Medium | Medium | **Default recommendation** | +| `4B` | ⚡ | Rich | Strong | Complex tasks, high-quality generation | + +**How to choose?** + +Based on your hardware: +- **VRAM < 8GB** → No LM or `0.6B` +- **VRAM 8–16GB** → `1.7B` (default) +- **VRAM > 16GB** → `1.7B` or `4B` + +### Choosing the Executor: DiT Models + +With a planning scheme, you still need to choose an executor. DiT is the core of ACE-Step 1.5—it handles various tasks and decides how to interpret LM-generated codes. + +We've open-sourced **4 Turbo models**, **1 SFT model**, and **1 Base model**. + +#### Turbo Series (Recommended for Daily Use) + +Turbo models are trained with distillation, generating high-quality audio in just 8 steps. The core difference between the four variants is the **shift hyperparameter configuration during distillation**. + +**What is shift?** + +Shift determines the "attention allocation" during DiT denoising: +- **Larger shift** → More effort spent on early denoising (building large structure from pure noise), **stronger semantics**, clearer overall framework +- **Smaller shift** → More even step distribution, **more details**, but details might also be noise + +Simple understanding: high shift is like "draw outline first then fill details," low shift is like "draw and fix simultaneously." + +| Model | Distillation Config | Characteristics | +|-------|---------------------|-----------------| +| `turbo` (default) | Joint distillation on shift 1, 2, 3 | **Best balance of creativity and semantics**, thoroughly tested, recommended first choice | +| `turbo-shift1` | Distilled only on shift=1 | Richer details, but semantics weaker | +| `turbo-shift3` | Distilled only on shift=3 | Clearer, richer timbre, but may sound "dry," minimal orchestration | +| `turbo-continuous` | Experimental, supports continuous shift 1–5 | Most flexible tuning, but not thoroughly tested | + +You can choose based on target music style—you might find you prefer a certain variant. **We recommend starting with default turbo**—it's the most balanced and proven choice. + +#### SFT Model + +Compared to Turbo, SFT model has two notable features: +- **Supports CFG** (Classifier-Free Guidance), allowing fine-tuning of prompt adherence +- **More steps** (50 steps), giving the model more time to "think" + +The cost: more steps mean error accumulation, audio clarity may be slightly inferior to Turbo. But its **detail expression and semantic parsing will be better**. + +If you don't care about inference time, like tuning CFG and steps, and prefer that rich detail feel—SFT is a good choice. LM-generated codes can also work with SFT models. + +#### Base Model + +Base is the **master of all tasks**, with three exclusive tasks beyond SFT and Turbo: + +| Task | Description | +|------|-------------| +| `extract` | Extract single tracks from mixed audio (e.g., separate vocals) | +| `lego` | Add new tracks to existing tracks (e.g., add drums to guitar) | +| `complete` | Add mixed accompaniment to single track (e.g., add guitar+drums accompaniment to vocals) | + +Additionally, Base has the **strongest plasticity**. If you have large-scale fine-tuning needs, we recommend starting experiments with Base to train your own SFT model. + +#### Creating Your Custom Model + +Beyond official models, you can also use **LoRA fine-tuning** to create your custom model. + +We'll release an example LoRA model—trained on 20+ "Happy New Year" themed songs, specifically suited for expressing festive atmosphere. This is just a starting point. + +**What does a custom model mean?** + +You can reshape DiT's capabilities and preferences with your own data recipe: +- Like a specific timbre style? Train with that type of songs +- Want the model better at a certain genre? Collect related data for fine-tuning +- Have your own unique aesthetic taste? "Teach" it to the model + +This greatly expands **customization and playability**—train a model unique to you with your aesthetic taste. + +> For detailed LoRA training guide, see the "LoRA Training" tab in Gradio UI. + +#### DiT Selection Summary + +| Model | Steps | CFG | Speed | Exclusive Tasks | Recommended Scenarios | +|-------|:-----:|:---:|:-----:|-----------------|----------------------| +| `turbo` (default) | 8 | ❌ | ⚡⚡⚡ | — | Daily use, rapid iteration | +| `sft` | 50 | ✅ | ⚡ | — | Pursuing details, like tuning | +| `base` | 50 | ✅ | ⚡ | extract, lego, complete | Special tasks, large-scale fine-tuning | + +### Combination Strategies + +Default configuration is **turbo + 1.7B LM**, suitable for most scenarios. + +| Need | Recommended Combination | +|------|------------------------| +| Fastest speed | `turbo` + No LM or `0.6B` | +| Daily use | `turbo` + `1.7B` (default) | +| Pursuing details | `sft` + `1.7B` or `4B` | +| Special tasks | `base` | +| Large-scale fine-tuning | `base` | +| Low VRAM (< 4GB) | `turbo` + No LM + CPU offload | + +### Downloading Models + +```bash +# Download default models (turbo + 1.7B LM) +uv run acestep-download + +# Download all models +uv run acestep-download --all + +# Download specific model +uv run acestep-download --model acestep-v15-base +uv run acestep-download --model acestep-5Hz-lm-0.6B + +# List available models +uv run acestep-download --list +``` + +You need to download models into a `checkpoints` folder for easy identification. + +--- + +## Guiding the Elephant: What Can You Control? + +Now that you know this herd of elephants, let's learn how to communicate with them. + +Each generation is determined by three types of factors: **input control**, **inference hyperparameters**, and **random factors**. + +### I. Input Control: What Do You Want? + +This is the part where you communicate "creative intent" with the model—what kind of music you want to generate. + +| Category | Parameter | Function | +|----------|-----------|----------| +| **Task Type** | `task_type` | Determines generation mode: text2music, cover, repaint, lego, extract, complete | +| **Text Input** | `caption` | Description of overall music elements: style, instruments, emotion, atmosphere, timbre, vocal gender, progression, etc. | +| | `lyrics` | Temporal element description: lyric content, music structure evolution, vocal changes, vocal/instrument performance style, start/end style, articulation, etc. (use `[Instrumental]` for instrumental music) | +| **Music Metadata** | `bpm` | Tempo (30–300) | +| | `keyscale` | Key (e.g., C Major, Am) | +| | `timesignature` | Time signature (4/4, 3/4, 6/8) | +| | `vocal_language` | Vocal language | +| | `duration` | Target duration (seconds) | +| **Audio Reference** | `reference_audio` | Global reference for timbre or style (for cover, style transfer) | +| | `src_audio` | Source audio for non-text2music tasks (text2music defaults to silence, no input needed) | +| | `audio_codes` | Semantic codes input to model in Cover mode (advanced: reuse codes for variants, convert songs to codes for extension, combine like DJ mixing) | +| **Interval Control** | `repainting_start/end` | Time interval for operations (repaint redraw area / lego new track area) | + +--- + +#### About Caption: The Most Important Input + +**Caption is the most important factor affecting generated music.** + +It supports multiple input formats: simple style words, comma-separated tags, complex natural language descriptions. We've trained to be compatible with various formats, ensuring text format doesn't significantly affect model performance. + +**We provide at least 5 ways to help you write good captions:** + +1. **Random Dice** — Click the random button in the UI to see how example captions are written. You can use this standardized caption as a template and have an LLM rewrite it to your desired form. + +2. **Format Auto-Rewrite** — We support using the `format` feature to automatically expand your handwritten simple caption into complex descriptions. + +3. **CoT Rewrite** — If LM is initialized, whether `thinking` mode is enabled or not, we support rewriting and expanding captions through Chain-of-Thought (unless you actively disable it in settings, or LM is not initialized). + +4. **Audio to Caption** — Our LM supports converting your input audio to caption. While precision is limited, the vague direction is correct—enough as a starting point. + +5. **Simple Mode** — Just input a simple song description, and LM will automatically generate complete caption, lyrics, and metas samples—suitable for quick starts. + +Regardless of which method, they all solve a real problem: **As ordinary people, our music vocabulary is impoverished.** + +If you want generated music to be more interesting and meet expectations, **Prompting is always the optimal option**—it brings the highest marginal returns and surprises. + +**Common Dimensions for Caption Writing:** + +| Dimension | Examples | +|-----------|----------| +| **Style/Genre** | pop, rock, jazz, electronic, hip-hop, R&B, folk, classical, lo-fi, synthwave | +| **Emotion/Atmosphere** | melancholic, uplifting, energetic, dreamy, dark, nostalgic, euphoric, intimate | +| **Instruments** | acoustic guitar, piano, synth pads, 808 drums, strings, brass, electric bass | +| **Timbre Texture** | warm, bright, crisp, muddy, airy, punchy, lush, raw, polished | +| **Era Reference** | 80s synth-pop, 90s grunge, 2010s EDM, vintage soul, modern trap | +| **Production Style** | lo-fi, high-fidelity, live recording, studio-polished, bedroom pop | +| **Vocal Characteristics** | female vocal, male vocal, breathy, powerful, falsetto, raspy, choir | +| **Speed/Rhythm** | slow tempo, mid-tempo, fast-paced, groovy, driving, laid-back | +| **Structure Hints** | building intro, catchy chorus, dramatic bridge, fade-out ending | + +**Some Practical Principles:** + +1. **Specific beats vague** — "sad piano ballad with female breathy vocal" works better than "a sad song." + +2. **Combine multiple dimensions** — Single-dimension descriptions give the model too much room to play; combining style+emotion+instruments+timbre can more precisely anchor your desired direction. + +3. **Use references well** — "in the style of 80s synthwave" or "reminiscent of Bon Iver" can quickly convey complex aesthetic preferences. + +4. **Texture words are useful** — Adjectives like warm, crisp, airy, punchy can influence mixing and timbre tendencies. + +5. **Don't pursue perfect descriptions** — Caption is a starting point, not an endpoint. Write a general direction first, then iterate based on results. + +6. **Description granularity determines freedom** — More omitted descriptions give the model more room to play, more random factor influence; more detailed descriptions constrain the model more. Decide specificity based on your needs—want surprises? Write less. Want control? Write more details. + +7. **Avoid conflicting words** — Conflicting style combinations easily lead to degraded output. For example, wanting both "classical strings" and "hardcore metal" simultaneously—the model will try to fuse but usually not ideal. Especially when `thinking` mode is enabled, LM has weaker caption generalization than DiT. When prompting is unreasonable, the chance of pleasant surprises is smaller. + + **Ways to resolve conflicts:** + - **Repetition reinforcement** — Strengthen the elements you want more in mixed styles by repeating certain words + - **Conflict to evolution** — Transform style conflicts into temporal style evolution. For example: "Start with soft strings, middle becomes noisy dynamic metal rock, end turns to hip-hop"—this gives the model clear guidance on how to handle different styles, rather than mixing them into a mess + +> For more prompting tips, see: [The Complete Guide to Mastering Suno](https://www.notion.so/The-Complete-Guide-to-Mastering-Suno-Advanced-Strategies-for-Professional-Music-Generation-2d6ae744ebdf8024be42f6645f884221)—although it's a Suno tutorial, prompting ideas are universal. + +--- + +#### About Lyrics: The Temporal Script + +If Caption describes the music's "overall portrait"—style, atmosphere, timbre—then **Lyrics is the music's "temporal script"**, controlling how music unfolds over time. + +Lyrics is not just lyric content. It carries: +- The lyric text itself +- **Structure tags** ([Verse], [Chorus], [Bridge]...) +- **Vocal style hints** ([raspy vocal], [whispered]...) +- **Instrumental sections** ([guitar solo], [drum break]...) +- **Energy changes** ([building energy], [explosive drop]...) + +**Structure Tags are Key** + +Structure tags (Meta Tags) are the most powerful tool in Lyrics. They tell the model: "What is this section, how should it be performed?" + +**Common Structure Tags:** + +| Category | Tag | Description | +|----------|-----|-------------| +| **Basic Structure** | `[Intro]` | Opening, establish atmosphere | +| | `[Verse]` / `[Verse 1]` | Verse, narrative progression | +| | `[Pre-Chorus]` | Pre-chorus, build energy | +| | `[Chorus]` | Chorus, emotional climax | +| | `[Bridge]` | Bridge, transition or elevation | +| | `[Outro]` | Ending, conclusion | +| **Dynamic Sections** | `[Build]` | Energy gradually rising | +| | `[Drop]` | Electronic music energy release | +| | `[Breakdown]` | Reduced instrumentation, space | +| **Instrumental Sections** | `[Instrumental]` | Pure instrumental, no vocals | +| | `[Guitar Solo]` | Guitar solo | +| | `[Piano Interlude]` | Piano interlude | +| **Special Tags** | `[Fade Out]` | Fade out ending | +| | `[Silence]` | Silence | + +**Combining Tags: Use Moderately** + +Structure tags can be combined with `-` for finer control: + +``` +[Chorus - anthemic] +This is the chorus lyrics +Dreams are burning + +[Bridge - whispered] +Whisper those words softly +``` + +This works better than writing `[Chorus]` alone—you're telling the model both what this section is (Chorus) and how to sing it (anthemic). + +**⚠️ Note: Don't stack too many tags.** + +``` +❌ Not recommended: +[Chorus - anthemic - stacked harmonies - high energy - powerful - epic] + +✅ Recommended: +[Chorus - anthemic] +``` + +Stacking too many tags has two risks: +1. The model might mistake tag content as lyrics to sing +2. Too many instructions confuse the model, making effects worse + +**Principle**: Keep structure tags concise; put complex style descriptions in Caption. + +**⚠️ Key: Maintain Consistency Between Caption and Lyrics** + +**Models are not good at resolving conflicts.** If descriptions in Caption and Lyrics contradict, the model gets confused and output quality decreases. + +``` +❌ Conflict example: +Caption: "violin solo, classical, intimate chamber music" +Lyrics: [Guitar Solo - electric - distorted] + +✅ Consistent example: +Caption: "violin solo, classical, intimate chamber music" +Lyrics: [Violin Solo - expressive] +``` + +**Checklist:** +- Instruments in Caption ↔ Instrumental section tags in Lyrics +- Emotion in Caption ↔ Energy tags in Lyrics +- Vocal description in Caption ↔ Vocal control tags in Lyrics + +Think of Caption as "overall setting" and Lyrics as "shot script"—they should tell the same story. + +**Vocal Control Tags:** + +| Tag | Effect | +|-----|--------| +| `[raspy vocal]` | Raspy, textured vocals | +| `[whispered]` | Whispered | +| `[falsetto]` | Falsetto | +| `[powerful belting]` | Powerful, high-pitched singing | +| `[spoken word]` | Rap/recitation | +| `[harmonies]` | Layered harmonies | +| `[call and response]` | Call and response | +| `[ad-lib]` | Improvised embellishments | + +**Energy and Emotion Tags:** + +| Tag | Effect | +|-----|--------| +| `[high energy]` | High energy, passionate | +| `[low energy]` | Low energy, restrained | +| `[building energy]` | Increasing energy | +| `[explosive]` | Explosive energy | +| `[melancholic]` | Melancholic | +| `[euphoric]` | Euphoric | +| `[dreamy]` | Dreamy | +| `[aggressive]` | Aggressive | + +**Lyric Text Writing Tips** + +**1. Control Syllable Count** + +**6-10 syllables per line** usually works best. The model aligns syllables to beats—if one line has 6 syllables and the next has 14, rhythm becomes strange. + +``` +❌ Bad example: +我站在窗前看着外面的世界一切都在改变(18 syllables) +你好(2 syllables) + +✅ Good example: +我站在窗前(5 syllables) +看着外面世界(6 syllables) +一切都在改变(6 syllables) +``` + +**Tip**: Keep similar syllable counts for lines in the same position (e.g., first line of each verse) (±1-2 deviation). + +**2. Use Case to Control Intensity** + +Uppercase indicates stronger vocal intensity: + +``` +[Verse] +walking through the empty streets (normal intensity) + +[Chorus] +WE ARE THE CHAMPIONS! (high intensity, shouting) +``` + +**3. Use Parentheses for Background Vocals** + +``` +[Chorus] +We rise together (together) +Into the light (into the light) +``` + +Content in parentheses is processed as background vocals or harmonies. + +**4. Extend Vowels** + +You can extend sounds by repeating vowels: + +``` +Feeeling so aliiive +``` + +But use cautiously—effects are unstable, sometimes ignored or mispronounced. + +**5. Clear Section Separation** + +Separate each section with blank lines: + +``` +[Verse 1] +First verse lyrics +Continue first verse + +[Chorus] +Chorus lyrics +Chorus continues +``` + +**Avoiding "AI-flavored" Lyrics** + +These characteristics make lyrics seem mechanical and lack human touch: + +| Red Flag 🚩 | Description | +|-------------|-------------| +| **Adjective stacking** | "neon skies, electric hearts, endless dreams"—filling a section with vague imagery | +| **Rhyme chaos** | Inconsistent rhyme patterns, or forced rhymes causing semantic breaks | +| **Blurred section boundaries** | Lyric content crosses structure tags, Verse content "flows" into Chorus | +| **No breathing room** | Each line too long, can't sing in one breath | +| **Mixed metaphors** | First verse uses water imagery, second suddenly becomes fire, third is flying—listeners can't anchor | + +**Metaphor discipline**: Stick to one core metaphor per song, exploring its multiple aspects. For example, choosing "water" as metaphor, you can explore: how love flows around obstacles like water, can be gentle rain or flood, reflects the other's image, can't be grasped but exists. One image, multiple facets—this gives lyrics cohesion. + +**Writing Instrumental Music** + +If generating pure instrumental music without vocals: + +``` +[Instrumental] +``` + +Or use structure tags to describe instrumental development: + +``` +[Intro - ambient] + +[Main Theme - piano] + +[Climax - powerful] + +[Outro - fade out] +``` + +**Complete Example** + +Assuming Caption is: `female vocal, piano ballad, emotional, intimate atmosphere, strings, building to powerful chorus` + +``` +[Intro - piano] + +[Verse 1] +月光洒在窗台上 +我听见你的呼吸 +城市在远处沉睡 +只有我们还醒着 + +[Pre-Chorus] +这一刻如此安静 +却藏着汹涌的心 + +[Chorus - powerful] +让我们燃烧吧 +像夜空中的烟火 +短暂却绚烂 +这就是我们的时刻 + +[Verse 2] +时间在指尖流过 +我们抓不住什么 +但至少此刻拥有 +彼此眼中的火焰 + +[Bridge - whispered] +如果明天一切消散 +至少我们曾经闪耀 + +[Final Chorus] +让我们燃烧吧 +像夜空中的烟火 +短暂却绚烂 +THIS IS OUR MOMENT! + +[Outro - fade out] +``` + +Note: In this example, Lyrics tags (piano, powerful, whispered) are consistent with Caption descriptions (piano ballad, building to powerful chorus, intimate), with no conflicts. + +--- + +#### About Music Metadata: Optional Fine Control + +**Most of the time, you don't need to manually set metadata.** + +When you enable `thinking` mode (or enable `use_cot_metas`), LM automatically infers appropriate BPM, key, time signature, etc. based on your Caption and Lyrics. This is usually good enough. + +But if you have clear ideas, you can also manually control them: + +| Parameter | Control Range | Description | +|-----------|--------------|-------------| +| `bpm` | 30–300 | Tempo. Common distribution: slow songs 60–80, mid-tempo 90–120, fast songs 130–180 | +| `keyscale` | Key | e.g., `C Major`, `Am`, `F# Minor`. Affects overall pitch and emotional color | +| `timesignature` | Time signature | `4/4` (most common), `3/4` (waltz), `6/8` (swing feel) | +| `vocal_language` | Language | Vocal language. LM usually auto-detects from lyrics | +| `duration` | Seconds | Target duration. Actual generation may vary slightly | + +**Understanding Control Boundaries** + +These parameters are **guidance** rather than **precise commands**: + +- **BPM**: Common range (60–180) works well; extreme values (like 30 or 280) have less training data, may be unstable +- **Key**: Common keys (C, G, D, Am, Em) are stable; rare keys may be ignored or shifted +- **Time signature**: `4/4` is most reliable; `3/4`, `6/8` usually OK; complex signatures (like `5/4`, `7/8`) are advanced, effects vary by style +- **Duration**: Short songs (30–60s) and medium length (2–4min) are stable; very long generation may have repetition or structure issues + +**The Model's "Reference" Approach** + +The model doesn't mechanically execute `bpm=120`, but rather: +1. Uses `120 BPM` as an **anchor point** +2. Samples from distribution near this anchor +3. Final result might be 118 or 122, not exactly 120 + +It's like telling a musician "around 120 tempo"—they'll naturally play in this range, not rigidly follow a metronome. + +**When Do You Need Manual Settings?** + +| Scenario | Suggestion | +|----------|------------| +| Daily generation | Don't worry, let LM auto-infer | +| Clear tempo requirement | Manually set `bpm` | +| Specific style (e.g., waltz) | Manually set `timesignature=3/4` | +| Need to match other material | Manually set `bpm` and `duration` | +| Pursue specific key color | Manually set `keyscale` | + +**Tip**: If you manually set metadata but generation results clearly don't match—check if there's conflict with Caption/Lyrics. For example, Caption says "slow ballad" but `bpm=160`, the model gets confused. + +**Recommended Practice**: Don't write tempo, BPM, key, and other metadata information in Caption. These should be set through dedicated metadata parameters (`bpm`, `keyscale`, `timesignature`, etc.), not described in Caption. Caption should focus on style, emotion, instruments, timbre, and other musical characteristics, while metadata information is handled by corresponding parameters. + +--- + +#### About Audio Control: Controlling Sound with Sound + +**Text is dimensionally reduced abstraction; the best control is still controlling with audio.** + +There are three ways to control generation with audio, each with different control ranges and uses: + +--- + +##### 1. Reference Audio: Global Acoustic Feature Control + +Reference audio (`reference_audio`) is used to control the **acoustic features** of generated music—timbre, mixing style, performance style, etc. It **averages temporal dimension information** and acts **globally**. + +**What Does Reference Audio Control?** + +Reference audio mainly controls the **acoustic features** of generated music, including: +- **Timbre texture**: Vocal timbre, instrument timbre +- **Mixing style**: Spatial sense, dynamic range, frequency distribution +- **Performance style**: Vocal techniques, playing techniques, expression +- **Overall atmosphere**: The "feeling" conveyed through reference audio + +**How Does the Backend Process Reference Audio?** + +When you provide reference audio, the system performs the following processing: + +1. **Audio Preprocessing**: + - Load audio file, normalize to **stereo 48kHz** format + - Detect silence, ignore if audio is completely silent + - If audio length is less than 30 seconds, repeat to fill to at least 30 seconds + - Randomly select 10-second segments from front, middle, and back positions, concatenate into 30-second reference segment + +2. **Encoding Conversion**: + - Use **VAE (Variational Autoencoder)** `tiled_encode` method to encode audio into **latent representation (latents)** + - These latents contain acoustic feature information but remove specific melody, rhythm, and other structural information + - Encoded latents are input as conditions to DiT generation process, **averaging temporal dimension information, acting globally on entire generation process** + +--- + +##### 2. Source Audio: Semantic Structure Control + +Source audio (`src_audio`) is used for **Cover tasks**, performing **melodic structure control**. Its principle is to quantize your input source audio into semantically structured information. + +**What Does Source Audio Control?** + +Source audio is converted into **semantically structured information**, including: +- **Melody**: Note direction and pitch +- **Rhythm**: Beat, accent, groove +- **Chords**: Harmonic progression and changes +- **Orchestration**: Instrument arrangement and layers +- **Some timbre**: Partial timbre information + +**What Can You Do With It?** + +1. **Control style**: Maintain source audio structure, change style and details +2. **Transfer style**: Apply source audio structure to different styles +3. **Retake lottery**: Generate similar structure but different variants, get different interpretations through multiple generations +4. **Control influence degree**: Control source audio influence strength through `audio_cover_strength` parameter (0.0–1.0) + - Higher strength: generation results more strictly follow source audio structure + - Lower strength: generation results have more room for free play + +**Advanced Cover Usage** + +You can use Cover to **Remix a song**, and it supports changing Caption and Lyrics: + +- **Remix creation**: Input a song as source audio, reinterpret it by modifying Caption and Lyrics + - Change style: Use different Caption descriptions (e.g., change from pop to rock) + - Change lyrics: Rewrite lyrics with new Lyrics, maintaining original melody structure + - Change emotion: Adjust overall atmosphere through Caption (e.g., change from sad to joyful) + +- **Build complex music structures**: Build complex melodic direction, layers, and groove based on your needed structure influence degree + - Fine-tune structure adherence through `audio_cover_strength` + - Combine Caption and Lyrics modifications to create new expression while maintaining core structure + - Can generate multiple versions, each with different emphasis on structure, style, lyrics + +--- + +##### 3. Source Audio Context-Based Control: Local Completion and Modification + +This is the **Repaint task**, performing completion or modification based on source audio context. + +**Repaint Principle** + +Repaint is based on **context completion** principle: +- Can complete **beginning**, **middle local**, **ending**, or **any region** +- Operation range: **3 seconds to 90 seconds** +- Model references source audio context information, generating within specified interval + +**What Can You Do With It?** + +1. **Local modification**: Modify lyrics, structure, or content in specified interval +2. **Change lyrics**: Maintain melody and orchestration, only change lyric content +3. **Change structure**: Change music structure in specified interval (e.g., change Verse to Chorus) +4. **Continue writing**: Continue writing beginning or ending based on context +5. **Clone timbre**: Clone source audio timbre characteristics based on context + +**Advanced Repaint Usage** + +You can use Repaint for more complex creative needs: + +- **Infinite duration generation**: + - Through multiple Repaint operations, can continuously extend audio, achieving infinite duration generation + - Each continuation is based on previous segment's context, maintaining natural transitions and coherence + - Can generate in segments, each 3–90 seconds, finally concatenate into complete work + +- **Intelligent audio stitching**: + - Intelligently organize and stitch two audios together + - Use Repaint at first audio's end to continue, making transitions naturally connect + - Or use Repaint to modify connection part between two audios for smooth transitions + - Model automatically handles rhythm, harmony, timbre connections based on context, making stitched audio sound like a complete work + +--- + +##### 4. Base Model Advanced Audio Control Tasks + +In the **Base model**, we also support more advanced audio control tasks: + +**Lego Task**: Intelligently add new tracks based on existing tracks +- Input an existing audio track (e.g., vocals) +- Model intelligently adds new tracks (e.g., drums, guitar, bass, etc.) +- New tracks coordinate with original tracks in rhythm and harmony + +**Complete Task**: Add mixed tracks to single track +- Input a single-track audio (e.g., a cappella vocals) +- Model generates complete mixed accompaniment tracks +- Generated accompaniment matches vocals in style, rhythm, and harmony + +**These advanced context completion tasks** greatly expand control methods, more intelligently providing inspiration and creativity. + +--- + +The combination of these parameters determines what you "want." We'll explain input control **principles** and **techniques** in detail later. + +### II. Inference Hyperparameters: How Does the Model Generate? + +This is the part that affects "generation process behavior"—doesn't change what you want, but changes how the model does it. + +**DiT (Diffusion Model) Hyperparameters:** + +| Parameter | Function | Default | Tuning Advice | +|-----------|----------|---------|---------------| +| `inference_steps` | Diffusion steps | 8 (turbo) | More steps = finer but slower. Turbo uses 8, Base uses 32–100 | +| `guidance_scale` | CFG strength | 7.0 | Higher = more prompt adherence, but may overfit. Only Base model effective | +| `use_adg` | Adaptive Dual Guidance | False | After enabling, dynamically adjusts CFG, Base model only | +| `cfg_interval_start/end` | CFG effective interval | 0.0–1.0 | Controls which stage to apply CFG | +| `shift` | Timestep offset | 1.0 | Adjusts denoising trajectory, affects generation style | +| `infer_method` | Inference method | "ode" | `ode` deterministic, `sde` introduces randomness | +| `timesteps` | Custom timesteps | None | Advanced usage, overrides steps and shift | +| `audio_cover_strength` | Reference audio/codes influence strength | 1.0 | 0.0–1.0, higher = closer to reference, lower = more freedom | + +**5Hz LM (Language Model) Hyperparameters:** + +| Parameter | Function | Default | Tuning Advice | +|-----------|----------|---------|---------------| +| `thinking` | Enable CoT reasoning | True | Enable to let LM reason metadata and codes | +| `lm_temperature` | Sampling temperature | 0.85 | Higher = more random/creative, lower = more conservative/deterministic | +| `lm_cfg_scale` | LM CFG strength | 2.0 | Higher = more positive prompt adherence | +| `lm_top_k` | Top-K sampling | 0 | 0 means disabled, limits candidate word count | +| `lm_top_p` | Top-P sampling | 0.9 | Nucleus sampling, limits cumulative probability | +| `lm_negative_prompt` | Negative prompt | "NO USER INPUT" | Tells LM what not to generate | +| `use_cot_metas` | CoT reason metadata | True | Let LM auto-infer BPM, key, etc. | +| `use_cot_caption` | CoT rewrite caption | True | Let LM optimize your description | +| `use_cot_language` | CoT detect language | True | Let LM auto-detect vocal language | +| `use_constrained_decoding` | Constrained decoding | True | Ensures correct output format | + +The combination of these parameters determines how the model "does it." + +**About Parameter Tuning** + +It's important to emphasize that **tuning factors and random factors sometimes have comparable influence**. When you adjust a parameter, it may be hard to tell if it's the parameter's effect or randomness causing the change. + +Therefore, **we recommend fixing random factors when tuning**—by setting a fixed `seed` value, ensuring each generation starts from the same initial noise, so you can accurately feel the parameter's real impact on generated audio. Otherwise, parameter change effects may be masked by randomness, causing you to misjudge the parameter's role. + +### III. Random Factors: Sources of Uncertainty + +Even with identical inputs and hyperparameters, two generations may produce different results. This is because: + +**1. DiT's Initial Noise** +- Diffusion models start from random noise and gradually denoise +- `seed` parameter controls this initial noise +- Different seed → different starting point → different endpoint + +**2. LM's Sampling Randomness** +- When `lm_temperature > 0`, the sampling process itself has randomness +- Same prompt, each sampling may choose different tokens + +**3. Additional Noise When `infer_method = "sde"`** +- SDE method injects additional randomness during denoising + +--- + +#### Pros and Cons of Random Factors + +Randomness is a double-edged sword. + +**Benefits of Randomness:** +- **Explore creative space**: Same input can produce different variants, giving you more choices +- **Discover unexpected surprises**: Sometimes randomness brings excellent results you didn't expect +- **Avoid repetition**: Each generation is different, won't fall into single-pattern loops + +**Challenges of Randomness:** +- **Uncontrollable results**: You can't precisely predict generation results, may generate multiple times without satisfaction +- **Hard to reproduce**: Even with identical inputs, hard to reproduce a specific good result +- **Tuning difficulty**: When adjusting parameters, hard to tell if it's parameter effect or randomness change +- **Screening cost**: Need to generate multiple versions to find satisfactory ones, increasing time cost + +#### What Mindset to Face Random Factors? + +**1. Accept Uncertainty** +- Randomness is an essential characteristic of AI music generation, not a bug, but a feature +- Don't expect every generation to be perfect; treat randomness as an exploration tool + +**2. Embrace the Exploration Process** +- Treat generation process as "gacha" or "treasure hunting"—try multiple times, always find surprises +- Enjoy discovering unexpectedly good results, rather than obsessing over one-time success + +**3. Use Fixed Seed Wisely** +- When you want to **understand parameter effects**, fix `seed` to eliminate randomness interference +- When you want to **explore creative space**, let `seed` vary randomly + +**4. Batch Generation + Intelligent Screening** +- Don't rely on single generation; batch generate multiple versions +- Use automatic scoring mechanisms for initial screening to improve efficiency + +#### Our Solution: Large Batch + Automatic Scoring + +Because our inference is extremely fast, if your GPU VRAM is sufficient, you can explore random space through **large batch**: + +- **Batch generation**: Generate multiple versions at once (e.g., batch_size=2,4,8), quickly explore random space +- **Automatic scoring mechanism**: We provide automatic scoring mechanisms that can help you initially screen, doing **test time scaling** + +**Automatic Scoring Mechanism** + +We provide multiple scoring metrics, among which **my favorite is DiT Lyrics Alignment Score**: + +- **DiT Lyrics Alignment Score**: This score implicitly affects lyric accuracy + - It evaluates the alignment degree between lyrics and audio in generated audio + - Higher score means lyrics are more accurately positioned in audio, better match between singing and lyrics + - This is particularly important for music generation with lyrics, can help you screen versions with higher lyric accuracy + +- **Other scoring metrics**: Also include other quality assessment metrics, can evaluate generation results from multiple dimensions + +**Recommended Workflow:** + +1. **Batch generation**: Set larger `batch_size` (e.g., 2, 4, 8), generate multiple versions at once +2. **Enable AutoGen**: Enable automatic generation, let system continuously generate new batches in background + - **AutoGen mechanism**: AutoGen automatically uses same parameters (but random seed) to generate next batch in background while you're viewing current batch results + - This lets you continuously explore random space without manually clicking generate button + - Each new batch uses new random seed, ensuring result diversity +3. **Automatic scoring**: Enable automatic scoring, let system automatically score each version +4. **Initial screening**: Screen versions with higher scores based on DiT Lyrics Alignment Score and other metrics +5. **Manual selection**: Manually select the final version that best meets your needs from screened versions + +This fully utilizes randomness to explore creative space while improving efficiency through automation tools, avoiding blind searching in large generation results. AutoGen lets you "generate while listening"—while browsing current results, the next batch is already prepared in the background. + +--- + +## Conclusion + +This tutorial currently covers ACE-Step 1.5's core concepts and usage methods: + +- **Mental Models**: Understanding human-centered generation design philosophy +- **Model Architecture**: Understanding how LM and DiT work together +- **Input Control**: Mastering text (Caption, Lyrics, metadata) and audio (reference audio, source audio) control methods +- **Inference Hyperparameters**: Understanding parameters affecting generation process +- **Random Factors**: Learning to use randomness to explore creative space, improving efficiency through Large Batch + AutoGen + Automatic Scoring + +This is just the beginning. There's much more content we want to share with you: + +- More Prompting tips and practical cases +- Detailed usage guides for different task types +- Advanced techniques and creative workflows +- Common issues and solutions +- Performance optimization suggestions + +**This tutorial will continue to be updated and improved.** If you have any questions or suggestions during use, feedback is welcome. Let's make ACE-Step your creative partner in your pocket together. + +--- + +*To be continued...* diff --git a/docs/ACEFORGE_API.md b/docs/ACEFORGE_API.md new file mode 100644 index 0000000..915463a --- /dev/null +++ b/docs/ACEFORGE_API.md @@ -0,0 +1,302 @@ +# AceForge API Reference + +This document describes the HTTP API exposed by AceForge for local clients (e.g. the bundled React UI, CLI tools, or third-party apps). The server runs locally; no authentication is required. + +**Base URL (typical):** `http://127.0.0.1:5056` when running the app or dev server. + +**Content type:** JSON for request/response bodies unless noted. Use `Content-Type: application/json` for POST/PATCH. + +--- + +## Overview + +- **REST-style JSON API** under `/api/*`: auth, songs, generation, playlists, users, preferences, reference tracks, search, contact. +- **Audio serving:** `GET /audio/` and `GET /audio/refs/` for playback. +- **Legacy / tools routes** (used by the UI for stem splitting, voice cloning, MIDI, training, model downloads): at root paths like `/progress`, `/train_lora`, `/stem_split`, `/voice_clone`, `/midi_generate`, `/models/*`. + +--- + +## 1. Auth (`/api/auth`) + +Local-only; no real login. All routes return a single local user (e.g. OS username). + +| Method | Path | Description | +|--------|------|-------------| +| GET | `/api/auth/auto` | Current user and token (token may be `null`) | +| GET | `/api/auth/me` | Current user (no token check) | +| POST | `/api/auth/setup` | Body: `{ "username": "..." }`. Stub; returns same as auto. | +| POST | `/api/auth/logout` | Stub; returns `{ "success": true }` | +| POST | `/api/auth/refresh` | Stub; returns same as auto | +| PATCH | `/api/auth/username` | Body: `{ "username": "..." }`. Stub; keeps OS username. | + +**Example response (e.g. GET /api/auth/auto):** +```json +{ + "user": { + "id": "local", + "username": "YourOSUsername", + "bio": null, + "avatar_url": null, + "banner_url": null, + "isAdmin": false, + "createdAt": null + }, + "token": null +} +``` + +--- + +## 2. Songs (`/api/songs`) + +Tracks from the configured output directory plus uploaded reference tracks. Song IDs for generated tracks are filenames; reference tracks use IDs prefixed with `ref:`. + +| Method | Path | Description | +|--------|------|-------------| +| GET | `/api/songs` | List all songs (generated + reference). Response: `{ "songs": [ ... ] }` | +| GET | `/api/songs/public` | Same as list | +| GET | `/api/songs/public/featured` | Same as list, limited | +| GET | `/api/songs/` | One song. Ref tracks: use ID without `ref:` prefix for lookup; response may use `ref:`. | +| GET | `/api/songs//full` | Song + comments (stub comments: `[]`) | +| GET | `/api/songs//audio` | Redirect/serve audio file (generated track) | +| POST | `/api/songs` | Create song record (no-op for file-based tracks). Body: song object. Returns `{ "song": }`, 201. | +| PATCH | `/api/songs/` | Update metadata. Body: `{ "title"?, "style"?, "lyrics"? }`. Ref tracks: no-op. | +| DELETE | `/api/songs/` | Delete file and metadata (or remove reference track if `ref:...`) | +| POST | `/api/songs//like` | Toggle like in metadata. Response: `{ "liked": true|false }` | +| GET | `/api/songs/liked/list` | Songs marked favorite. Response: `{ "songs": [ ... ] }` | +| PATCH | `/api/songs//privacy` | Stub. Response: `{ "isPublic": true }` | +| POST | `/api/songs//play` | Stub. Response: `{ "viewCount": 0 }` | +| GET | `/api/songs//comments` | Stub. Response: `{ "comments": [] }` | +| POST | `/api/songs//comments` | Stub. Body: `{ "content": "..." }`. Returns stub comment. | +| DELETE | `/api/songs/comments/` | Stub. Response: `{ "success": true }` | + +**Song object shape (representative):** +```json +{ + "id": "filename.wav", + "title": "Track title", + "lyrics": "", + "style": "genre", + "caption": "genre", + "cover_url": null, + "audio_url": "/audio/filename.wav", + "duration": 120, + "bpm": null, + "key_scale": null, + "time_signature": null, + "tags": [], + "is_public": true, + "like_count": 0, + "view_count": 0, + "user_id": "local", + "created_at": 1234567890, + "creator": "Local" +} +``` + +Reference tracks have `audio_url` like `/audio/refs/` and `id` may be returned as `ref:` in some contexts. + +--- + +## 3. Generation (`/api/generate`) + +ACE-Step text-to-music (and related tasks). Jobs are queued and run one at a time. + +| Method | Path | Description | +|--------|------|-------------| +| POST | `/api/generate` | Start a generation job. Returns `jobId`, `status`, `queuePosition`. | +| GET | `/api/generate/status/` | Job status and result when done | +| POST | `/api/generate/cancel/` | Cancel a queued or running job. Queued jobs are removed; running jobs stop after the current step. Returns `{ "cancelled", "jobId", "message" }`. | +| GET | `/api/generate/lora_adapters` | List LoRA adapters (Training output and custom_lora folder). Response: `{ "adapters": [ { "name", "path", "size_bytes"? } ] }`. | +| POST | `/api/generate/upload-audio` | Upload audio (multipart form field `audio`). Saves to references dir and library. Returns `{ "url", "key" }`. | +| GET | `/api/generate/audio` | Query: `?path=...`. Serve file from output or references dir. | +| GET | `/api/generate/history` | Last 50 jobs. Response: `{ "jobs": [ ... ] }` | +| GET | `/api/generate/endpoints` | Response: `{ "endpoints": { "provider": "acestep-local", "endpoint": "local" } }` | +| GET | `/api/generate/health` | Response: `{ "healthy": true }` | +| GET | `/api/generate/debug/` | Raw job info (debug) | +| POST | `/api/generate/format` | Stub; echoes caption, lyrics, bpm, duration, keyScale, timeSignature | + +**POST /api/generate body (main fields):** +- `customMode`: boolean. If false, `songDescription` is required. +- `songDescription` or `style`: text prompt (caption). +- `lyrics`: optional lyrics (or "[inst]" for instrumental). +- `instrumental`: boolean (default true). +- `duration`: seconds (15–240). +- `inferenceSteps`: int (e.g. 55). +- `guidanceScale`: float (e.g. 6.0). +- `seed`: int; if `randomSeed` is true, server may override with random. +- `taskType`: `"text2music"` | `"retake"` | `"repaint"` | `"extend"` | `"cover"` | `"audio2audio"`. +- `referenceAudioUrl`, `sourceAudioUrl`: URLs like `/audio/refs/...` or `/audio/` for reference/cover. +- `audioCoverStrength` / `ref_audio_strength`: 0–1. +- `repaintingStart`, `repaintingEnd`: for repaint task. +- `title`: base name for output file. +- `outputDir` / `output_dir`: optional; else uses app default. +- `keyScale`, `timeSignature`, `vocalLanguage`, `bpm`: optional. +- `loraNameOrPath`: optional; folder name from LoRA list or path to adapter (see `GET /api/generate/lora_adapters`). +- `loraWeight`: optional; 0–2, default 0.75. + +**Response (POST):** `{ "jobId": "", "status": "queued", "queuePosition": 1 }` + +**Status response:** `{ "jobId", "status": "queued"|"running"|"succeeded"|"failed"|"cancelled", "queuePosition"?, "etaSeconds"?, "result"?, "error"? }`. On success, `result` includes e.g. `audioUrls`, `duration`, `status`. Cancelled jobs have `status: "cancelled"` and `error: "Cancelled by user"`. + +**Cancel response (POST /api/generate/cancel/):** `{ "cancelled": true|false, "jobId": "", "message": "..." }`. For queued jobs the job is removed immediately; for running jobs the worker stops after the current inference step and the job status becomes `cancelled`. + +--- + +## 4. Playlists (`/api/playlists`) + +Stored in user data as JSON. No auth. + +| Method | Path | Description | +|--------|------|-------------| +| GET | `/api/playlists` | List playlists. Response: `{ "playlists": [ ... ] }` | +| POST | `/api/playlists` | Create. Body: `{ "name", "description"?, "isPublic"?: true }`. Response: `{ "playlist": { "id", "name", "description", "is_public", "song_ids": [] } }` | +| GET | `/api/playlists/public/featured` | Stub. Response: `{ "playlists": [] }` | +| GET | `/api/playlists/` | One playlist. Response: `{ "playlist", "songs": [] }` (songs not expanded) | +| POST | `/api/playlists//songs` | Add song. Body: `{ "songId": "..." }`. Response: `{ "success": true }` | +| DELETE | `/api/playlists//songs/` | Remove song. Response: `{ "success": true }` | +| PATCH | `/api/playlists/` | Update. Body: `{ "name"?, "description"? }`. Response: `{ "playlist" }` | +| DELETE | `/api/playlists/` | Delete playlist. Response: `{ "success": true }` | + +--- + +## 5. Users (`/api/users`) + +Stubs for local single-user. All return the same local user or empty lists. + +| Method | Path | Description | +|--------|------|-------------| +| GET | `/api/users/me` | Current user | +| GET | `/api/users/public/featured` | `{ "creators": [] }` | +| GET | `/api/users/` | User profile (local user with optional username override) | +| GET | `/api/users//songs` | Same as GET /api/songs | +| GET | `/api/users//playlists` | Same as GET /api/playlists | +| PATCH | `/api/users/me` | Stub. Returns local user | +| POST | `/api/users/me/avatar` | Stub. Returns `{ "user", "url": null }` | +| POST | `/api/users/me/banner` | Stub. Returns `{ "user", "url": null }` | +| POST | `/api/users//follow` | Stub. Returns `{ "following": false, "followerCount": 0 }` | +| GET | `/api/users//followers` | `{ "followers": [] }` | +| GET | `/api/users//following` | `{ "following": [] }` | +| GET | `/api/users//stats` | `{ "followerCount": 0, "followingCount": 0, "isFollowing": false }` | + +--- + +## 6. Preferences (`/api/preferences`) + +App-wide settings (paths, UI zoom, optional module config). Stored in `aceforge_config.json`. + +| Method | Path | Description | +|--------|------|-------------| +| GET | `/api/preferences` | Full config object. Keys may include: `output_dir`, `models_folder`, `ui_zoom`, `stem_split`, `voice_clone`, `midi_gen`, `training`. | +| PATCH | `/api/preferences` | Merge partial object and save. Returns full config. Example: `{ "output_dir": "/path", "models_folder": "/path", "ui_zoom": 90 }`. | + +--- + +## 7. Reference tracks (`/api/reference-tracks`) + +Upload and manage reference audio (for generation and library). Stored under user data `references/` and `reference_tracks.json`. + +| Method | Path | Description | +|--------|------|-------------| +| GET | `/api/reference-tracks` | List. Response: `{ "tracks": [ { "id", "filename", "storage_key", "audio_url", "duration", "file_size_bytes", "tags" } ] }` | +| POST | `/api/reference-tracks` | Upload (multipart form field `audio`). Response: `{ "track", "url", "key" }` | +| PATCH | `/api/reference-tracks/` | Update. Body: `{ "tags": [ ... ] }`. Response: updated track. | +| DELETE | `/api/reference-tracks/` | Delete file and metadata. Response: `{ "success": true }` | + +--- + +## 8. Search (`/api/search`) + +Simple local search over tracks (title/style/filename). + +| Method | Path | Description | +|--------|------|-------------| +| GET | `/api/search?q=&type=all|songs|creators|playlists` | Response: `{ "songs": [ ... ], "creators": [ ... ], "playlists": [ ... ] }`. Local: creators/playlists usually empty. | + +--- + +## 9. Contact (`/api/contact`) + +Stub; no email or DB. + +| Method | Path | Description | +|--------|------|-------------| +| POST | `/api/contact` | Body: arbitrary. Response: `{ "success": true, "message": "Received", "id": "local" }` | + +--- + +## 10. Audio serving (app-level) + +Not under `/api/`. Used for playback by the UI. + +| Method | Path | Description | +|--------|------|-------------| +| GET | `/audio/` | Serve file from configured **output** directory (generated tracks). | +| GET | `/audio/refs/` | Serve file from user data **references** directory. | + +Paths must not contain `..` or leading `/`. Returns 400 for invalid path, 404 if file not found. + +--- + +## 11. Legacy / tools routes (root paths) + +Used by the bundled UI for stem splitting, voice cloning, MIDI generation, LoRA training, and model downloads. These are registered without an `/api` prefix. + +### Progress (generation / long-running tasks) +| Method | Path | Description | +|--------|------|-------------| +| GET | `/progress` | Current job progress. Response: `{ "fraction", "done", "error", "stage"?, "current"?, "total"? }` | + +### ACE-Step and other models +| Method | Path | Description | +|--------|------|-------------| +| GET | `/models/status` | ACE-Step model status. Response: `{ "ok", "ready", "state", "message"? }` | +| POST | `/models/ensure` | Ensure ACE-Step models are downloaded (start download if needed). | +| GET | `/models/folder` | Get configured models folder path. | +| POST | `/models/folder` | Set models folder (body or form). | +| GET | `/models/stem_split/status` | Demucs/stem-split model status. | +| POST | `/models/stem_split/ensure` | Ensure stem-split models. | +| GET | `/models/voice_clone/status` | Voice-clone (TTS) model status. | +| POST | `/models/voice_clone/ensure` | Ensure voice-clone models. | +| GET | `/models/midi_gen/status` | MIDI (basic-pitch) model status. | +| POST | `/models/midi_gen/ensure` | Ensure MIDI models. | + +### Stem splitting +| Method | Path | Description | +|--------|------|-------------| +| POST | `/stem_split` | Form data: audio file and options (e.g. stem count, mode, device, export format). May return JSON or HTML. | + +### Voice cloning +| Method | Path | Description | +|--------|------|-------------| +| POST | `/voice_clone` | Form data: audio and options. May return JSON or HTML. | + +### MIDI generation +| Method | Path | Description | +|--------|------|-------------| +| POST | `/midi_generate` | Form data: audio and options. May return JSON or HTML. | + +### LoRA training +| Method | Path | Description | +|--------|------|-------------| +| GET | `/train_lora/status` | Training status. Response: `{ "running"?, "paused"?, "progress"?, "current_step"?, "max_steps"?, "current_epoch"?, "max_epochs"?, "last_message"?, "returncode"? }` | +| GET | `/train_lora/configs` | Available configs. Response: `{ "ok", "configs": [ { "file", "label" } ], "default"? }` | +| POST | `/train_lora` | Start training (form data). May return HTML. | +| POST | `/train_lora/pause` | Pause training. | +| POST | `/train_lora/resume` | Resume training. | +| POST | `/train_lora/cancel` | Cancel training. | + +Other legacy routes (e.g. `/music/`, `/tracks.json`, `/tracks/meta`, `/user_presets`, `/tracks/rename`, `/tracks/delete`, `/generate` POST for legacy form, `/lyrics/*`, `/mufun/*`, `/dataset_mass_tag`) exist for the classic UI or internal use; see source if you need them. + +--- + +## Errors + +- API routes under `/api/*` return JSON on error, e.g. `{ "error": "Message" }` with HTTP 4xx/5xx. +- Legacy routes may return HTML or plain text on failure; check `Content-Type` and handle accordingly. + +--- + +## CORS and credentials + +The server is intended for local use. The bundled UI uses relative URLs and `credentials: 'include'`. For other local clients, same-origin or explicit CORS may apply depending on deployment. diff --git a/docs/EXPERIMENTAL_UI_EXPLORATION.md b/docs/EXPERIMENTAL_UI_EXPLORATION.md new file mode 100644 index 0000000..8414e0b --- /dev/null +++ b/docs/EXPERIMENTAL_UI_EXPLORATION.md @@ -0,0 +1,157 @@ +# Experimental UI: ace-step-ui Exploration + +**Branch:** `experimental-ui` +**Goal:** Evaluate [fspecii/ace-step-ui](https://github.com/fspecii/ace-step-ui) as a replacement/alternative UI for AceForge, retain our existing ACE-Step pipeline and add Voice Cloning, Stem Splitting, etc. later. Target: **macOS Apple Silicon only**, project remains **standalone** (no runtime dependency on external UI repo). + +--- + +## 1. Clone Location and Setup + +- **Temp clone (exploration):** Sibling directory to AceForge + `../ace-step-ui` → full path: `/Users/ethehot/Documents/git/ace-step-ui` + (Not committed inside AceForge; used only for inspection and possible copy of source.) + +- **To run ace-step-ui locally (for tryout):** + ```bash + cd /path/to/ace-step-ui + ./setup.sh # or setup.bat on Windows + # Set ACESTEP_PATH to ACE-Step-1.5 dir (or use their default sibling) + ./start.sh # Backend 3001 + Frontend 3000 + ``` + Open http://localhost:3000. Backend proxies `/api` and `/audio` to port 3001. + +--- + +## 2. ace-step-ui Architecture Summary + +| Layer | Tech | Role | +|-------------|-------------------|------| +| **Frontend**| React 18, TypeScript, Vite, TailwindCSS | SPA: Create, Library, Player, Playlists, Settings, Search | +| **Backend** | Express (Node), SQLite (better-sqlite3) | Auth, songs DB, playlists, generation job queue, audio storage, proxies | +| **ACE-Step**| External API or Python spawn | Music generation; UI expects **ACE-Step 1.5**-style HTTP API | + +- **Frontend** talks only to the **Express server** (relative `API_BASE = ''`; Vite dev proxy forwards `/api`, `/audio`, `/editor`, `/blog` to backend). +- **Express** either: + - Calls an **external ACE-Step API** at `ACESTEP_API_URL` (e.g. `http://localhost:8001`), or + - Falls back to **spawning Python** from an ACE-Step-1.5 directory using `server/scripts/simple_generate.py`. + +--- + +## 3. ACE-Step API Contract (What ace-step-ui Expects) + +When the Express server uses “API mode”, it expects the following from the ACE-Step service: + +| Endpoint | Method | Purpose | +|----------|--------|---------| +| `/health` | GET | Availability; response with `status === 'ok'` or `healthy === true` | +| `/release_task` | POST | Submit generation. Body: `prompt`, `lyrics`, `audio_duration`, `batch_size`, `inference_steps`, `guidance_scale`, `audio_format`, `vocal_language`, `use_random_seed`, `seed`, optional `bpm`, `key_scale`, `time_signature`, `reference_audio_path`, `src_audio_path`, `task_type`, repainting, thinking/LLM params, etc. | +| `/query_result` | POST | Poll with `{ task_id_list: [taskId] }`. Response: per-task `status` (0=processing, 1=done, 2=failed), `result` (e.g. array of `{ file }` paths or similar). | +| `/v1/audio?path=...` or path like `/v1/audio/...` | GET | Download generated audio file by path returned in result. | + +- **Generation params** (from `server/src/services/acestep.ts` and Express `generate.ts`) include: simple vs custom mode, style/lyrics, instrumental, duration, BPM, key/time signature, inference steps, guidance scale, batch size, seed, thinking/LLM options, reference/source audio paths, repainting, audio cover strength, etc. +- **Our AceForge** uses the **original ACE-Step** pipeline in-process via `generate_ace.generate_track_ace()` and does **not** expose this HTTP API today. + +--- + +## 4. Express API Surface (What the React App Calls) + +The React app uses these backend routes (from `services/api.ts` and server routes): + +- **Auth:** `/api/auth/auto`, `/api/auth/setup`, `/api/auth/me`, `/api/auth/logout`, `/api/auth/refresh`, `/api/auth/username` (PATCH). +- **Generation:** + `POST /api/generate` (body = GenerationParams), + `GET /api/generate/status/:jobId`, + `POST /api/generate/upload-audio`, + `GET /api/generate/audio?path=...`, + `GET /api/generate/history`, + `GET /api/generate/endpoints`, + `GET /api/generate/health`, + `GET /api/generate/debug/:taskId`, + `POST /api/generate/format` (LLM-style caption/lyrics formatting). +- **Songs:** CRUD, likes, privacy, play count, comments (e.g. `/api/songs`, `/api/songs/:id`, `/api/songs/liked/list`, etc.). +- **Playlists:** create, list, get, add/remove song, update, delete. +- **Users:** profile, public songs/playlists, featured, avatar/banner upload, follow. +- **Search:** `/api/search?q=...&type=...` +- **Contact:** `POST /api/contact` +- **Reference tracks:** `/api/reference-tracks` +- **Static:** `/audio/*` (generated files), `/editor` (AudioMass), `/demucs-web` (stem extraction). + +All generation goes through Express; Express then calls the ACE-Step API (or Python script). The React app never talks to the ACE-Step API directly. + +--- + +## 5. AceForge vs ace-step-ui (Relevant Differences) + +| Aspect | AceForge (current) | ace-step-ui | +|--------|---------------------|-------------| +| **UI** | Single Jinja template + vanilla JS (cdmf_*.js), Flask serves HTML + static | React SPA, Vite build; dev proxy to Express | +| **Backend** | Flask (Waitress), port 5056 | Express, port 3001 | +| **ACE-Step** | In-process Python `generate_track_ace()` | External HTTP API (or Python spawn from ACE-Step-1.5) | +| **Auth** | None | JWT, SQLite users, username setup | +| **Songs/tracks** | File-based list from output dir, track metadata in JSON | SQLite `songs` table, storage abstraction | +| **Extra features** | Voice cloning (XTTS), Stem splitting (Demucs), MIDI (basic-pitch), LoRA training, presets | Stem extraction (Demucs web), AudioMass editor, Pexels video, playlists, likes | +| **Platform** | macOS .app (PyInstaller + pywebview) | Cross-platform Node + Python (ACE-Step 1.5) | + +Important: ace-step-ui targets **ACE-Step 1.5** (separate repo/API). AceForge uses the **original ACE-Step** pipeline. Parameter names and API shape differ; an adapter would be required to map between them. + +--- + +## 6. Integration Options for Standalone macOS + +### Option A: Flask as “ACE-Step API” + keep Node (not standalone) + +- Implement on Flask the 4 endpoints: `/health`, `/release_task`, `/query_result`, `/v1/audio`. +- Map `release_task` → our `generate_track_ace()` (and job store); `query_result` → our job state; `v1/audio` → serve from our output dir. +- Run ace-step-ui’s Express with `ACESTEP_API_URL=http://127.0.0.1:5056`. +- **Downside:** Requires Node at runtime for Express (auth, songs, playlists). Not a single-binary, standalone app. + +### Option B: Embed React build + Flask compatibility API (recommended for standalone) + +- Copy or submodule the **React source** into AceForge (e.g. `ui/` or `ace_step_ui/`). +- Build the React app with Vite (`npm run build`) so output is static files (e.g. `dist/`). +- Serve that build from Flask (e.g. `/` or `/app`), and add **Flask routes** that implement the subset of the Express API the React app needs: + - **Auth:** e.g. `/api/auth/auto` (return single local user + token), `/api/auth/setup`, `/api/auth/me` (optional). + - **Generate:** `POST /api/generate` → enqueue and call our `generate_track_ace()`; `GET /api/generate/status/:jobId` → return job status and result (audio URLs pointing at our track serving). + - **Songs:** map to our tracks (list from output dir + metadata); implement GET list, GET one, PATCH, DELETE, and optionally “create” when generation finishes. + - **Playlists:** optional; can start with “no playlists” and stub endpoints. + - **Upload audio:** for reference/source audio, store under our output or a temp dir and return a path/URL the adapter understands. + - **Format:** optional; can stub or call our lyrics/prompt helper if we have one. +- **Audio:** serve generated files under `/audio/` or same scheme as current track serving. +- **Editor / Demucs-web:** either serve static from Flask or keep as external; can add later. +- **Result:** Single process (Flask + pywebview), no Node at runtime, project stays standalone. We keep our ACE-Step pipeline and add Voice Cloning, Stem Splitting, MIDI, etc. as extra tabs or routes later. + +### Option C: Run both servers (dev only) + +- Run Flask (5056) and Express (3001); point Express at Flask for “ACE-Step API”. +- Good for quick UI tryout; not suitable for shipped .app without bundling Node. + +--- + +## 7. Recommendation and Next Steps + +- **Short term:** Keep the temp clone at `../ace-step-ui` for reference. On `experimental-ui` branch, decide whether to: + - **Option B (recommended):** Add a directory in AceForge (e.g. `ui/`) containing the React source (copy or submodule) and a small build script (e.g. `npm ci && npm run build`). Add a Flask blueprint (or set of routes) that implements the minimal `/api` surface above and serves the built static files. Use an **adapter** in Flask that maps `release_task`-style params to our `generate_track_ace()` and our job/result shape. +- **Parameter mapping:** Our pipeline uses e.g. `prompt`, `lyrics`, `instrumental`, `target_seconds`, `steps`, `guidance_scale`, `seed`, `vocal_gain_db`, `instrumental_gain_db`, reference audio, LoRA. The adapter must map from ace-step-ui’s `GenerationParams` (style, lyrics, duration, inferenceSteps, guidanceScale, etc.) to our Python API and back (e.g. result audio URLs). +- **Later:** Add Voice Cloning, Stem Splitting, MIDI as additional UI sections or tabs, backed by existing AceForge Python modules; either by extending the React app or keeping a hybrid (e.g. React for “Create” + Library, Flask-rendered pages for tools). +- **macOS / Metal:** No change to current target; all heavy work remains in Python (ACE-Step, Demucs, etc.). React is front-end only. + +--- + +## 8. Key Files in ace-step-ui (for reference) + +| Path | Purpose | +|------|--------| +| `server/src/services/acestep.ts` | ACE-Step API client: `isApiAvailable()`, `submitToApi()`, `pollApiResult()`, `downloadAudioFromApi()`; fallback Python spawn; `GenerationParams` and job queue | +| `server/src/routes/generate.ts` | Express generate routes: POST create job, GET status, upload-audio, format, history, health, debug | +| `server/src/config/index.ts` | `ACESTEP_API_URL`, port, DB path, storage | +| `services/api.ts` | Frontend API client: auth, songs, generate, playlists, users, search | +| `components/CreatePanel.tsx` | Main generation form (simple/custom mode, style, lyrics, params) | +| `vite.config.ts` | Dev proxy: `/api`, `/audio`, `/editor` → 3001 | + +--- + +**Implementation plan:** See **`docs/NEW_UI_IMPLEMENTATION_PLAN.md`** for the phased plan to port the UI in-tree, add Flask API compatibility, and integrate the UI build into local and PyInstaller builds (Option B, standalone, single port). + +--- + +*Document created on branch `experimental-ui`. Clone used: [fspecii/ace-step-ui](https://github.com/fspecii/ace-step-ui) (sibling directory).* diff --git a/docs/NEW_UI_API_AUDIT.md b/docs/NEW_UI_API_AUDIT.md new file mode 100644 index 0000000..6efce70 --- /dev/null +++ b/docs/NEW_UI_API_AUDIT.md @@ -0,0 +1,139 @@ +# New UI API Audit — AceForge Flask vs UI (ace-step-ui contract) + +This document lists **every API call the React UI makes** and the **Flask backend route** that handles it. All routes must support the exact path and method the UI uses (no trailing slash required). + +## Summary of fixes applied + +- **POST /api/generate** was returning **405 Method Not Allowed** because the blueprint only registered `"/"` → `/api/generate/` (trailing slash). The UI sends `POST /api/generate`. Fixed by adding `@bp.route("", methods=["POST"], strict_slashes=False)` in `api/generate.py`. +- **Auth stubs** added so no UI path 404s: `GET /api/auth/me`, `POST /api/auth/setup`, `POST /api/auth/logout`, `POST /api/auth/refresh`, `PATCH /api/auth/username`. All return local user or success. +- **GET /api/search** added in `api/search.py` (stub: searches local tracks by title/style, returns `{ songs, creators, playlists }`). +- **Reference-tracks** response shape aligned with UI: GET returns `{ tracks: [...] }`, POST returns `{ track: { id, audio_url, ... }, url, key }`. +- **Index routes** (no trailing slash): added `""` + `strict_slashes=False` for generate, playlists (GET/POST), songs (GET/POST), contact (POST), reference-tracks (GET/POST), search (GET). + +--- + +## 1. Auth — `ui/services/api.ts` → `api/auth.py` + +| UI call | Method | Backend route | Status | +|--------|--------|----------------|--------| +| `authApi.auto()` | GET | `/api/auth/auto` | ✅ Returns `{ user, token: null }` | +| `authApi.setup(username)` | POST | `/api/auth/setup` | ✅ Stub: same as auto | +| `authApi.me(token)` | GET | `/api/auth/me` | ✅ Stub: `{ user }` | +| `authApi.logout()` | POST | `/api/auth/logout` | ✅ Stub: `{ success: true }` | +| `authApi.refresh(token)` | POST | `/api/auth/refresh` | ✅ Stub: same as auto | +| `authApi.updateUsername(username, token)` | PATCH | `/api/auth/username` | ✅ Stub: `{ user, token }` | + +--- + +## 2. Songs — `ui/services/api.ts` → `api/songs.py` + +| UI call | Method | Backend route | Status | +|--------|--------|----------------|--------| +| `songsApi.getMySongs(token)` | GET | `/api/songs` | ✅ `""` + `"/"` | +| `songsApi.getPublicSongs(limit, offset)` | GET | `/api/songs/public?limit=&offset=` | ✅ | +| `songsApi.getFeaturedSongs()` | GET | `/api/songs/public/featured` | ✅ | +| `songsApi.getSong(id, token)` | GET | `/api/songs/:id` | ✅ | +| `songsApi.getSongFull(id, token)` | GET | `/api/songs/:id/full` | ✅ | +| `songsApi.createSong(song, token)` | POST | `/api/songs` | ✅ `""` + `"/"` | +| `songsApi.updateSong(id, updates, token)` | PATCH | `/api/songs/:id` | ✅ | +| `songsApi.deleteSong(id, token)` | DELETE | `/api/songs/:id` | ✅ | +| `songsApi.toggleLike(id, token)` | POST | `/api/songs/:id/like` | ✅ Stub | +| `songsApi.getLikedSongs(token)` | GET | `/api/songs/liked/list` | ✅ | +| `songsApi.updatePrivacy(id, isPublic, token)` | PATCH | `/api/songs/:id/privacy` | ✅ Stub | +| `songsApi.recordPlay(id, token)` | POST | `/api/songs/:id/play` | ✅ Stub | +| `songsApi.getComments(id, token)` | GET | `/api/songs/:id/comments` | ✅ Stub | +| `songsApi.addComment(id, content, token)` | POST | `/api/songs/:id/comments` | ✅ Stub | +| `songsApi.deleteComment(commentId, token)` | DELETE | `/api/songs/comments/:commentId` | ✅ Stub | + +--- + +## 3. Generate — `ui/services/api.ts` → `api/generate.py` + +| UI call | Method | Backend route | Status | +|--------|--------|----------------|--------| +| `generateApi.startGeneration(params, token)` | POST | `/api/generate` | ✅ **Fixed** `""` + `"/"` | +| `generateApi.getStatus(jobId, token)` | GET | `/api/generate/status/:jobId` | ✅ | +| `generateApi.getHistory(token)` | GET | `/api/generate/history` | ✅ | +| `generateApi.uploadAudio(file, token)` | POST | `/api/generate/upload-audio` | ✅ | +| `generateApi.formatInput(params, token)` | POST | `/api/generate/format` | ✅ Stub | +| (audio playback) | GET | `/api/generate/audio?path=...` or `/audio/:filename` | ✅ App-level `/audio/` | + +--- + +## 4. Playlists — `ui/services/api.ts` → `api/playlists.py` + +| UI call | Method | Backend route | Status | +|--------|--------|----------------|--------| +| `playlistsApi.getMyPlaylists(token)` | GET | `/api/playlists` | ✅ `""` + `"/"` | +| `playlistsApi.create(name, description, isPublic, token)` | POST | `/api/playlists` | ✅ `""` + `"/"` | +| `playlistsApi.getPlaylist(id, token)` | GET | `/api/playlists/:id` | ✅ | +| `playlistsApi.getFeaturedPlaylists()` | GET | `/api/playlists/public/featured` | ✅ | +| `playlistsApi.addSong(playlistId, songId, token)` | POST | `/api/playlists/:playlistId/songs` body `{ songId }` | ✅ | +| `playlistsApi.removeSong(playlistId, songId, token)` | DELETE | `/api/playlists/:playlistId/songs/:songId` | ✅ | +| `playlistsApi.update(id, updates, token)` | PATCH | `/api/playlists/:id` | ✅ | +| `playlistsApi.delete(id, token)` | DELETE | `/api/playlists/:id` | ✅ | + +--- + +## 5. Users — `ui/services/api.ts` → `api/users.py` + +| UI call | Method | Backend route | Status | +|--------|--------|----------------|--------| +| `usersApi.getProfile(username, token)` | GET | `/api/users/:username` | ✅ | +| `usersApi.getPublicSongs(username)` | GET | `/api/users/:username/songs` | ✅ | +| `usersApi.getPublicPlaylists(username)` | GET | `/api/users/:username/playlists` | ✅ | +| `usersApi.getFeaturedCreators()` | GET | `/api/users/public/featured` | ✅ | +| `usersApi.updateProfile(updates, token)` | PATCH | `/api/users/me` | ✅ Stub | +| `usersApi.uploadAvatar(file, token)` | POST | `/api/users/me/avatar` | ✅ Stub | +| `usersApi.uploadBanner(file, token)` | POST | `/api/users/me/banner` | ✅ Stub | +| `usersApi.follow(username, token)` | POST | `/api/users/:username/follow` | ✅ Stub | +| `usersApi.getFollowers(username)` | GET | `/api/users/:username/followers` | ✅ Stub | +| `usersApi.getFollowing(username)` | GET | `/api/users/:username/following` | ✅ Stub | +| `usersApi.getStats(username, token)` | GET | `/api/users/:username/stats` | ✅ Stub | + +--- + +## 6. Reference tracks — `ui/components/CreatePanel.tsx` (fetch) → `api/reference_tracks.py` + +| UI call | Method | Backend route | Status | +|--------|--------|----------------|--------| +| `fetch('/api/reference-tracks')` | GET | `/api/reference-tracks` | ✅ Returns `{ tracks: [...] }` | +| `fetch('/api/reference-tracks', { method: 'POST', body: formData })` | POST | `/api/reference-tracks` | ✅ Returns `{ track, url, key }` | +| `fetch('/api/reference-tracks/:id', { method: 'DELETE' })` | DELETE | `/api/reference-tracks/:id` | ✅ | + +CreatePanel also uses `PATCH /api/reference-tracks/:id` (tags) — ✅ implemented. + +--- + +## 7. Search — `ui/services/api.ts` → `api/search.py` + +| UI call | Method | Backend route | Status | +|--------|--------|----------------|--------| +| `searchApi.search(query, type)` | GET | `/api/search?q=...&type=...` | ✅ New; searches local tracks | + +--- + +## 8. Contact — `ui/services/api.ts` → `api/contact.py` + +| UI call | Method | Backend route | Status | +|--------|--------|----------------|--------| +| `contactApi.submit(data)` | POST | `/api/contact` | ✅ `""` + `"/"` stub | + +--- + +## 9. Optional / not implemented + +| UI usage | Method | Path | Note | +|----------|--------|------|------| +| VideoGeneratorModal (proxy image) | GET | `/api/proxy/image?url=...` | Not in Flask; optional feature | +| VideoGeneratorModal (Pexels) | GET | `/api/pexels/photos?query=...`, `/api/pexels/videos?query=...` | Not in Flask; optional | + +--- + +## Running the audit tests + +```bash +pytest tests/test_new_ui_api.py -v +``` + +The test `test_generate_create_job_no_trailing_slash` asserts that `POST /api/generate` (no trailing slash) returns 200. diff --git a/docs/NEW_UI_IMPLEMENTATION_PLAN.md b/docs/NEW_UI_IMPLEMENTATION_PLAN.md new file mode 100644 index 0000000..a7a1552 --- /dev/null +++ b/docs/NEW_UI_IMPLEMENTATION_PLAN.md @@ -0,0 +1,354 @@ +# Standalone Port of ace-step-ui into AceForge — Implementation Plan + +**Branch:** `experimental-ui` +**Goal:** Embed the [ace-step-ui](https://github.com/fspecii/ace-step-ui) React frontend into AceForge with **no external dependency** on that repo after completion. + +**Implementation status:** Not started. Phases 1–7 below are the execution order. + + +--- + +## Principles + +- **Standalone:** All UI source lives inside AceForge (copied/ported once). No git submodule or npm dependency on `fspecii/ace-step-ui`. +- **Single server:** One Flask app on one port (e.g. 5056). Pywebview loads that URL. No separate Node or ACE-Step HTTP API process. +- **Local-only, no auth:** No authentication. We do not implement login, signup, JWT, or auth middleware. The app is single-user, local-only. The UI can be adjusted to remove auth flows (no username modal, no token); if the ported UI still calls `GET /api/auth/auto`, we provide a minimal stub that returns a fixed "user" so the app doesn't error, but no token or validation anywhere. +- **API parity:** Flask routes implement the same paths and JSON shapes the React app expects (from `services/api.ts` and Express routes), with no auth requirements. +- **Build integration:** Building the app (local and PyInstaller) runs the UI build step and bundles the built static files. +- **Storage in global app settings:** All persistent data for the new APIs/UI (generations, settings, playlists, reference uploads, job history, etc.) must use AceForge’s **global user directories** — not paths inside the app bundle or relative to the app. On macOS this is the standard app support and preferences locations; other platforms use the same abstraction via `cdmf_paths`. +- **Roadmap:** After the new UI is successfully integrated (Generation + shared library/player), the plan is to extend it to **Training, Stem Splitting, Voice Cloning, and Audio-to-MIDI** with the same shared player approach. That extension is a follow-on phase, not part of the initial Phases 1–7; see **Roadmap** section below. + +--- + +## Storage: global app settings (macOS / cross‑platform) + +All new API and UI persistence must go through **`cdmf_paths`** so that: + +- **macOS:** User data lives under **`~/Library/Application Support/AceForge/`**; preferences/settings under **`~/Library/Preferences/com.audiohacking.AceForge/`** (or the bundle ID in use). +- **Windows/Linux:** Same pattern where supported; otherwise `cdmf_paths` falls back to app directory as it does today. + +**Use these consistently:** + +| What | Where (via cdmf_paths) | +|------|-------------------------| +| **Generated tracks** | Already: `get_user_data_dir() / "generated"` (DEFAULT_OUT_DIR). New API must use this for output; do not write under the app or cwd. | +| **Track metadata** | Already: `TRACK_META_PATH` (user data dir). Songs API reads/writes here. | +| **Playlists** | `get_user_data_dir() / "playlists.json"` (or similar). Not in app dir. | +| **Generation job history** | `get_user_data_dir() / "generation_jobs.json"` (or a subdir). File-backed job queue can live here. | +| **Reference uploads** | `get_user_data_dir() / "references"` — uploaded reference/source audio files. | +| **Reference metadata** | `get_user_data_dir() / "reference_tracks.json"` (or similar). | +| **User presets / new UI settings** | Prefer **`get_user_preferences_dir()`** for small settings (or extend `CONFIG_PATH` / `aceforge_config.json`); larger data in **`get_user_data_dir()`**. | +| **Optional auth stub state** | If we ever store anything for the auth stub: `get_user_data_dir()` or preferences dir. | + +**Do not:** Store user data in `APP_DIR`, inside the .app bundle, or in the current working directory. Always use `cdmf_paths.get_user_data_dir()` or `get_user_preferences_dir()` (and existing constants like `DEFAULT_OUT_DIR`, `TRACK_META_PATH`) so behaviour is consistent with the rest of AceForge and respects OS conventions (e.g. macOS Application Support). + +--- + +## Phase 1: Port UI Source Into Repo + +### 1.1 Create `ui/` and copy frontend only + +- **Directory:** `AceForge/ui/` (new). +- **Copy from clone** (one-time port from `../ace-step-ui` or similar): + - Root: `package.json`, `package-lock.json`, `tsconfig.json`, `vite.config.ts`, `index.html`, `index.tsx`, `App.tsx`, `types.ts`, `global.d.ts`, `metadata.json`. + - Dirs: `components/`, `context/`, `services/` (frontend `api.ts`, `geminiService.ts` if used). + - **Do not copy:** `server/`, `audiomass-editor/` (optional later), `docs/`, `setup.sh`, `start.sh`, etc. +- **Remove external reference:** In `ui/package.json`, ensure no dependency on the ace-step-ui repo (only public npm packages). Add a top-level comment or README in `ui/` stating: “Ported from fspecii/ace-step-ui; maintained in-tree. No external dependency on that repo.” + +### 1.2 Configure Vite build for Flask + +- **Base path:** In `ui/vite.config.ts`, set `base: '/'` so assets are requested at root (e.g. `/assets/...`). Flask will serve the app at `/` and assets under `/assets/` (or whatever Vite emits). +- **API proxy (dev only):** For local `bun run dev`, keep proxy target as `http://127.0.0.1:5056` (our Flask) so the React app talks to Flask during development. Update `vite.config.ts` so `server.proxy['/api']` and `server.proxy['/audio']` point to Flask port (5056), not 3001. +- **Build output:** Default Vite build outputs to `ui/dist/` (index.html + `assets/`). We will use this as the only production artifact. + +### 1.3 Optional: Editor / Demucs static assets + +- **Defer:** Do not copy `audiomass-editor/` or `server/public/demucs-web/` in Phase 1. We can add them in a later phase and serve from Flask at `/editor` and `/demucs-web`. The main Create/Library/Player flow does not require them for the first cut. + +--- + +## Phase 2: Flask API Compatibility Layer + +Implement Flask blueprints (or route modules) that mirror the Express API the React app calls. All under prefix `/api` and optionally `/audio`. No authentication: all routes are open; use file/JSON-based storage where needed. + +### 2.1 Auth — removed (local-only, no auth) + +- **No auth flow.** We do not implement login, signup, JWT, or any auth middleware. +- **Optional stub only:** If the ported React app still calls `GET /api/auth/auto` on load, implement a single route that returns a fixed payload so the app doesn’t 404: e.g. `{ user: { id: 'local', username: 'Local' }, token: null }`. No other auth routes (`/setup`, `/me`, `/logout`, `/refresh`, `/username`) are required; the UI should be updated to remove auth UI (username modal, login) so those are never called. No token is ever validated; no `Authorization` header is read. + +### 2.2 Generation — `api_generate.py` (Blueprint) + +- **Base path:** `/api/generate` +- **Routes:** + - `POST /api/generate` — Body: ace-step-ui `GenerationParams` (JSON). Create a job (in-memory or file-backed queue under **`get_user_data_dir()`**), map params to our `generate_track_ace()` (see Phase 3), return `{ jobId, status: 'queued', queuePosition: 1 }`. + - `GET /api/generate/status/:jobId` — Return `{ jobId, status, queuePosition?, etaSeconds?, result?, error? }`. When status is `succeeded`, `result` must include `audioUrls` (array of URLs the app can fetch, e.g. `/audio/...` or our track URL pattern). + - `POST /api/generate/upload-audio` — Multipart file upload for reference/source audio. Save under **`get_user_data_dir() / "references"`**; return `{ url, key }` where `url` is the path the adapter will use (e.g. `/audio/refs/`). + - `GET /api/generate/audio` — Query `?path=...`. Proxy or send file from our output dir (DEFAULT_OUT_DIR) or references dir (**`get_user_data_dir() / "references"`**) so the app can stream generated or uploaded audio. + - `GET /api/generate/history` — Return list of recent jobs (e.g. last 50) from job store persisted under **`get_user_data_dir()`** (e.g. `generation_jobs.json` or equivalent). + - `GET /api/generate/endpoints` — Return `{ endpoints: { provider: 'acestep-local', endpoint: ... } }`. + - `GET /api/generate/health` — Return `{ healthy: true }` (and optionally check model presence). + - `GET /api/generate/debug/:taskId` — Optional; return raw debug info for a task. + - `POST /api/generate/format` — Body: caption, lyrics, bpm, duration, etc. Stub with 200 and same payload or call our lyrics/prompt helper if available; otherwise return placeholder. + +**No auth:** All generation routes are open; no token or auth middleware. + +### 2.3 Songs — `api_songs.py` (Blueprint) + +- **Base path:** `/api/songs` +- **Data model:** Map “songs” to our tracks: list from `cdmf_tracks.list_music_files()` (output dir = **DEFAULT_OUT_DIR**, i.e. `get_user_data_dir() / "generated"` on macOS) and existing track metadata from **`TRACK_META_PATH`** (already in user data dir). Assign a stable `id` per track (e.g. filename-based or UUID stored in metadata). No user filtering — all tracks are the single local library. +- **Routes:** + - `GET /api/songs` — List all songs (all tracks in output dir); return `{ songs: [...] }` with shape expected by the UI (id, title, lyrics, style, audio_url, duration, bpm, etc.). + - `GET /api/songs/public` — Optional; return public songs (we can treat all as same user for now). + - `GET /api/songs/public/featured` — Optional; subset or same list. + - `GET /api/songs/:id` — One song by id; return `{ song }`. Resolve id to filename and metadata. + - `GET /api/songs/:id/full` — Same as above plus optional `comments: []` (stub). + - `GET /api/songs/:id/audio` — Stream the audio file for that song (or redirect to our track URL). + - `POST /api/songs` — Create song record when generation completes (called by our adapter when we add a new track). + - `PATCH /api/songs/:id` — Update metadata (title, style, etc.); persist to **`TRACK_META_PATH`** (global app settings). + - `DELETE /api/songs/:id` — Delete file and metadata. + - `POST /api/songs/:id/like` — Stub or minimal: toggle like in metadata; return `{ liked: boolean }`. + - `GET /api/songs/liked/list` — Return songs marked liked. + - `PATCH /api/songs/:id/privacy` — Stub or minimal (is_public). + - `POST /api/songs/:id/play` — Stub; return `{ viewCount }`. + - `GET /api/songs/:id/comments` — Return `{ comments: [] }`. + - `POST /api/songs/:id/comments` — Stub 201 with a fake comment or 501. + - `DELETE /api/songs/comments/:commentId` — Stub 200. + +### 2.4 Playlists — `api_playlists.py` (Blueprint) + +- **Storage:** One JSON file at **`get_user_data_dir() / "playlists.json"`** (global app settings). Structure: `{ "playlists": [ { id, name, description, is_public, song_ids: [] } ] }`. No user_id needed (local-only). +- **Routes:** + - `POST /api/playlists` — Create playlist; append to store; return `{ playlist }`. + - `GET /api/playlists` — List all playlists (single local user). + - `GET /api/playlists/public/featured` — Optional; return [] or subset. + - `GET /api/playlists/:id` — Get playlist with `songs` array (resolve song ids to song objects). + - `POST /api/playlists/:id/songs` — Body `{ songId }`; add to playlist. + - `DELETE /api/playlists/:id/songs/:songId` — Remove from playlist. + - `PATCH /api/playlists/:id` — Update name/description. + - `DELETE /api/playlists/:id` — Delete playlist. + +### 2.5 Users — `api_users.py` (Blueprint) + +- **Base path:** `/api/users` +- **No auth.** All routes are stubs or return fixed local data. No “current user” or token. +- **Routes:** + - `GET /api/users/me` — Stub: return fixed `{ user: { id: 'local', username: 'Local', ... } }` (or 404 if UI doesn’t need it). + - `GET /api/users/public/featured` — Return [] or a single fixed “creator” for display. + - `GET /api/users/:username` — Return a fixed profile (same for any username; local app). + - `GET /api/users/:username/songs` — Same as our full tracks list. + - `GET /api/users/:username/playlists` — Same as our playlists list. + - `PATCH /api/users/me` — Stub 200 (no-op or optional local display name). + - `POST /api/users/me/avatar`, `POST /api/users/me/banner` — Stub 200. + - `POST /api/users/:username/follow`, `GET /api/users/:username/followers`, `GET /api/users/:username/following`, `GET /api/users/:username/stats` — Stub (e.g. 200, [], zeros). + +### 2.6 Search — `api_search.py` or part of main app + +- **Route:** `GET /api/search?q=...&type=...` — Search our tracks (and optionally playlists) by title/style; return `{ songs, creators, playlists }` with the shape the UI expects. + +### 2.7 Contact — `api_contact.py` (Blueprint) + +- **Route:** `POST /api/contact` — Stub 200 and return `{ success: true, message: '...', id: '...' }` (no email or DB). + +### 2.8 Reference tracks — `api_reference_tracks.py` (Blueprint) + +- **Base path:** `/api/reference-tracks` +- **Storage:** Uploaded files in **`get_user_data_dir() / "references"`**; metadata in **`get_user_data_dir() / "reference_tracks.json"`** (global app settings). No user scoping. +- **Routes:** + - `GET /api/reference-tracks` — List all reference tracks. + - `POST /api/reference-tracks` — Upload audio; save file; return record with `audio_url`. + - `PATCH /api/reference-tracks/:id` — Update tags/metadata. + - `DELETE /api/reference-tracks/:id` — Delete file and record. + +### 2.9 Static and non-API routes + +- **`/audio/*`** — Serve generated and reference audio from **DEFAULT_OUT_DIR** (`get_user_data_dir() / "generated"` on macOS) and **`get_user_data_dir() / "references"`**. Reuse or extend existing track-serving logic so that `audioUrls` point to `/audio/` and resolve to these paths on disk. +- **`/editor`**, **`/demucs-web`** — Defer or add later; serve static files from a folder if we port AudioMass/Demucs-web. + +### 2.10 Top-level and health + +- **`GET /health`** — Already have `/healthz`; add alias `/health` returning JSON `{ status: 'ok' }` or `{ healthy: true }` so any client expecting “health” is satisfied. +- **Existing:** Keep `/healthz`, `/loading`, `/logs/stream`, `/shutdown` as-is. New UI will use `/api/*` and `/audio/*`. + +--- + +## Phase 3: Generation Adapter (AceForge backend) + +### 3.1 Job queue and state + +- **Job store under global app settings:** When `POST /api/generate` is called, create a job record with a unique `jobId` (UUID), status `queued`, and the received `GenerationParams`. Persist job state under **`get_user_data_dir()`** (e.g. `generation_jobs.json` or a small JSON file per job) so it survives restarts. Run generation in a background thread (or reuse existing pattern from `cdmf_generation`). Output files go to **DEFAULT_OUT_DIR** (`get_user_data_dir() / "generated"` on macOS). When `generate_track_ace()` finishes, update job to `succeeded` and set `result.audioUrls` to URLs the client can request (e.g. `/audio/.mp3`). On failure, set status `failed` and `error`. + +### 3.2 Parameter mapping (GenerationParams → our pipeline) + +- Map from ace-step-ui names to our Python API: + - `prompt` / `songDescription` / `style` → our `prompt` + - `lyrics` → our `lyrics` + - `instrumental` → our `instrumental` + - `duration` → our `target_seconds` + - `inferenceSteps` → our `steps` + - `guidanceScale` → our `guidance_scale` + - `randomSeed`, `seed` → our `seed` + - `bpm` → our `bpm` + - `keyScale`, `timeSignature` → optional (we can add to prompt or ignore if not supported) + - `referenceAudioUrl` / `reference_audio_path` → our reference audio file path (resolve upload path to disk) + - `sourceAudioUrl` / `src_audio_path` → source for “audio cover” if we support it + - `audioFormat` → our output format (wav/mp3) + - `batchSize` — we can run one at a time and ignore or loop +- **Unsupported params:** thinking/LLM, repainting, etc. can be ignored or logged; do not break the request. + +### 3.3 Result and audio URLs + +- After generation, output is written to **DEFAULT_OUT_DIR** (global app settings). Set `result.audioUrls = [ '/audio/' ]` (or the path Flask serves under `/audio`). Ensure `GET /api/generate/audio?path=...` and/or `GET /audio/` resolve to files under DEFAULT_OUT_DIR (and references dir) so the player can stream them. + +### 3.4 Uploaded reference audio + +- `POST /api/generate/upload-audio` saves the file under **`get_user_data_dir() / "references"`**; return a `url` that the adapter can resolve to that path when calling `generate_track_ace(reference_audio_path=...)`. Same for reference-tracks if used as reference. No storage inside the app or cwd. + +--- + +## Phase 4: Serve the New UI from Flask (Single Port) + +### 4.1 SPA at root + +- **When “new UI” is enabled (default on experimental-ui):** + - `GET /` → Serve `ui/dist/index.html` (or the built index from the path we bundle). + - Static assets (JS, CSS) have paths like `/assets/...` (Vite default). Serve them from the same `dist` folder (e.g. `dist/assets/`). +- **Catch-all for SPA routing:** For any GET request that is not `/api`, `/audio`, `/health`, `/healthz`, `/loading`, `/logs`, `/shutdown`, and that does not match a file in the built app, return `index.html` so client-side routing works. + +### 4.2 Where the built app lives + +- **Development:** `ui/dist/` after `bun run build`. Flask can be configured with a second static folder or a dedicated route for the app root (e.g. `send_from_directory('ui/dist', 'index.html')` and static files from `ui/dist`). +- **Frozen app:** PyInstaller will bundle a copy of `ui/dist` (see Phase 5). At runtime, path comes from `sys._MEIPASS`; serve from `Path(sys._MEIPASS) / 'app'` or similar. + +### 4.3 Loading screen + +- Keep `/loading` serving the current `static/loading.html` that polls `/healthz` and redirects to `/`. So first load: open `/loading` in pywebview, then redirect to `/` (new UI) when server is ready. + +### 4.4 Legacy UI (optional) + +- We can keep the old Jinja UI under a route like `/legacy` for fallback or remove it in a later cleanup. Plan: implement new UI at `/`; legacy can remain at `/legacy` if we explicitly register it, or be removed once the new UI is stable. + +--- + +## Phase 5: Build Integration + +### 5.1 Prerequisites + +- **Bun:** Required only at build time (local and CI) for the new UI. Document in README: “Building the app with the new UI requires Bun (https://bun.sh).” + +### 5.2 Script: `scripts/build_ui.sh` (or `ui/build.sh`) + +- **Location:** `AceForge/scripts/build_ui.sh` or `AceForge/ui/build.sh`. +- **Steps:** + 1. `cd` to `AceForge/ui`. + 2. `bun install` (or `bun install --frozen-lockfile` if using a lockfile). + 3. `bun run build`. + 4. Exit 0 only if `ui/dist/index.html` (and ideally `ui/dist/assets/`) exists. +- **Idempotent:** If `ui/` is missing, exit with a clear message (e.g. “ui/ not found; run from repo root after copying UI source”). + +### 5.3 Integrate into `build_local.sh` + +- **Before PyInstaller:** If directory `ui/` exists and `package.json` is present, run the UI build script. If the script fails, optionally fail the whole build or warn and continue (decide: fail so we don’t ship without UI). +- **After UI build:** PyInstaller should see `ui/dist` and include it in the bundle (see 5.4). + +### 5.4 PyInstaller spec (`CDMF.spec`) + +- **Add to `datas`:** + - If `ui/dist` exists: `(str(ui_dist_dir), 'app')` so that the built app is placed at `app/` inside the bundle (e.g. under `_MEIPASS/app`). +- **In Flask:** When frozen, set the “app root” to `Path(sys._MEIPASS) / 'app'` and serve `index.html` and static assets from there. When not frozen, use `Path(__file__).parent / 'ui' / 'dist'` or the path from config. + +### 5.5 .gitignore + +- **Ignore:** `ui/node_modules/`, `ui/dist/` (so we don’t commit build artifacts). Optionally commit `ui/bun.lockb` for reproducible installs. Commit only source (components, services, package.json, vite.config.ts, etc.). + +### 5.6 CI (optional) + +- In GitHub Actions (e.g. `build-release.yml`), before PyInstaller: run Bun setup and `scripts/build_ui.sh` (or `cd ui && bun install && bun run build`). Ensure `ui/dist` is present so the spec can include it. + +--- + +## Phase 6: File and Module Layout (Summary) + +``` +AceForge/ + ui/ # React app source (ported; auth removed from UI) + package.json + vite.config.ts + index.html + index.tsx + App.tsx + components/ + context/ + services/ + dist/ # Build output (gitignored) + scripts/ + build_ui.sh + api_auth.py # Optional: single stub GET /api/auth/auto only (no JWT, no other routes) + api_generate.py # Blueprint: /api/generate (no auth) + api_songs.py # Blueprint: /api/songs (no auth) + api_playlists.py # Blueprint: /api/playlists (no auth) + api_users.py # Blueprint: /api/users (stubs, no auth) + api_reference_tracks.py # Blueprint: /api/reference-tracks (no auth) + api_contact.py # Blueprint: /api/contact (stub) + api_search.py # or in music_forge_ui: /api/search + music_forge_ui.py # Register blueprints; serve SPA from / and /app/* + static/ # Existing (loading, legacy if kept) + ... + build_local.sh # Calls scripts/build_ui.sh then PyInstaller + CDMF.spec # datas: (ui/dist, 'app') +``` + +--- + +## Phase 7: Order of Implementation (Suggested) + +1. **Phase 1** — Create `ui/`, copy frontend source, adjust Vite base and proxy. When porting the UI, remove auth flows (username modal, login, token usage) so the app is local-only. +2. **Phase 2.1** — No auth blueprint. Optionally add a single stub: `GET /api/auth/auto` → `{ user: { id: 'local', username: 'Local' }, token: null }` only if the UI still calls it and we don’t change the front end yet. +3. **Phase 2.2 + Phase 3** — Generate blueprint + adapter (POST generate, job queue, map params to `generate_track_ace`, status, upload-audio, audio proxy, history, health). No auth on any route. +4. **Phase 2.3** — Songs blueprint (map tracks to songs, list/get/create/update/delete, stub likes/comments). No auth. +5. **Phase 4** — Serve `ui/dist` at `/` and assets; SPA fallback. +6. **Phase 5** — `build_ui.sh`, integrate into `build_local.sh`, update `CDMF.spec` and Flask app root for frozen. +7. **Phase 2.4–2.8** — Playlists, users, search, contact, reference-tracks (stubs or simple impl; no auth). +8. **Phase 6** — Finalize file layout and any renames (e.g. group API modules in an `api/` package if desired). + +--- + +## Roadmap: Extend new UI to all AceForge features (post–Phase 7) + +Once the new UI is successfully integrated and Generation + shared library/player work end-to-end, the plan is to **extend the same UI** to support the rest of AceForge’s capabilities. This is a follow-on phase, not a prerequisite for the initial port. + +**Goal:** One app, one shared player and library. Every feature that produces audio (or MIDI) feeds into the same library and can be played/managed from the same player. + +| Feature | Backend (existing) | New UI / API extension | +|--------|----------------------|-------------------------| +| **Generation** | `generate_ace.py`, `cdmf_generation.py` | Core of Phase 2–3 (Create panel, job status, library). | +| **Training (LoRA)** | `cdmf_training.py`, `cdmf_trainer.py` | New section or tab: dataset config, LoRA params, start/pause/cancel; outputs or checkpoints can be surfaced in library if applicable. | +| **Stem Splitting** | `cdmf_stem_splitting.py`, `cdmf_stem_splitting_bp.py` | New section: upload audio, choose 2/4/6 stem, run; results (stems) appear in **shared library** and are playable in the **same player**. | +| **Voice Cloning** | `cdmf_voice_cloning.py`, `cdmf_voice_cloning_bp.py` | New section: reference clip + text → TTS; output appears in **shared library** and **same player**. | +| **Audio to MIDI** | `cdmf_midi_generation.py`, `cdmf_midi_generation_bp.py` | New section: upload audio → MIDI; output appears in **shared library** and **same player** (MIDI playback as today). | + +**Shared player approach:** The existing ace-step-ui front end has a single “Library” and “Player”. All AceForge outputs (generated tracks, stem files, voice-clone clips, MIDI files) should be represented in that same library and playable from that same player. Backend: continue using the same output/track surface (e.g. DEFAULT_OUT_DIR and TRACK_META_PATH, or a unified “tracks” API that includes all types). Front end: extend the library to show source/type (e.g. “Generation”, “Stem”, “Voice”, “MIDI”) and reuse the same player for audio and MIDI. + +**Order (suggested):** After Generation + library + player are solid: add Stem Splitting and Voice Cloning (both produce audio for the shared player), then Audio to MIDI (shared player already supports MIDI in current AceForge), then Training (LoRA) as the most complex. + +This roadmap is **not** part of the current implementation order (Phases 1–7). It is the intended tail once the new UI is successfully working inside AceForge. + +--- + +## Success Criteria + +- AceForge runs as a single process (Flask + pywebview); no separate JS runtime or extra ports at run time (Bun only for building the UI). +- New UI loads at `/` after `/loading` redirect; no login. User can create a generation (simple or custom), see job status, and play the result from the library. +- Build: `./build_local.sh` runs the UI build and produces an .app that serves the new UI. +- No dependency on the fspecii/ace-step-ui repository after the one-time port; all code lives under AceForge. +- All new UI/API persistence (generations, playlists, reference uploads, job history, settings) uses global app settings via **`cdmf_paths`** (e.g. on macOS: `~/Library/Application Support/AceForge/` and `~/Library/Preferences/...`), not directories inside the app or cwd. + +--- + +## References + +- ace-step-ui Express API: `server/src/routes/*.ts`, `server/src/index.ts` +- ace-step-ui frontend API client: `services/api.ts` +- AceForge paths (global app settings): **`cdmf_paths.py`** — `get_user_data_dir()`, `get_user_preferences_dir()`, `DEFAULT_OUT_DIR`, `TRACK_META_PATH`, etc. +- AceForge generation: `generate_ace.py`, `cdmf_generation.py` +- AceForge tracks: `cdmf_tracks.py` +- Exploration doc: `docs/EXPERIMENTAL_UI_EXPLORATION.md` diff --git a/generate_ace.py b/generate_ace.py index 79bd344..c9df2ff 100644 --- a/generate_ace.py +++ b/generate_ace.py @@ -533,6 +533,8 @@ def _prepare_reference_audio( Normalise the ACE-Step edit / audio2audio mode: - Task is clamped to one of: text2music / retake / repaint / extend. + - UI tasks "cover" and "audio2audio" are mapped to "retake" (ACE-Step + then uses ref_audio_input and sets task to "audio2audio" internally). - If Audio2Audio is enabled while task is still 'text2music', we internally flip it to 'retake' (this is how ACE-Step expects edits). - For any edit mode (retake/repaint/extend) we prefer to have a @@ -541,8 +543,12 @@ def _prepare_reference_audio( text2music instead of throwing. """ task_norm = (task or "text2music").strip().lower() - if task_norm not in ("text2music", "retake", "repaint", "extend"): + if task_norm not in ("text2music", "retake", "repaint", "extend", "cover", "audio2audio"): task_norm = "text2music" + # Map UI task names to pipeline task: cover and audio2audio both run as retake + # (pipeline will set task to "audio2audio" when ref_audio_input is passed). + if task_norm in ("cover", "audio2audio"): + task_norm = "retake" # Audio2Audio is effectively an edit of an existing clip. If the user # left the task on "Text → music", run it as a retake under the hood. @@ -764,6 +770,7 @@ def _run_ace_text2music( ref_audio_strength: float = 0.7, lora_name_or_path: str | None = None, lora_weight: float = 0.75, + cancel_check: Optional[Callable[[], bool]] = None, ) -> None: """ Call ACE-Step Text2Music and render a single track into ``output_path``. @@ -871,21 +878,27 @@ def _run_ace_text2music( "batch_size": 1, "save_path": str(output_path), "debug": False, + "shift": 6.0, } + if cancel_check is not None: + call_kwargs["cancel_check"] = cancel_check - # Wire up reference vs source audio correctly: - # - # - For audio2audio: send the clip as `ref_audio_input` and DO NOT - # pass `src_audio_path`. ACE-Step will internally flip `task` to - # "audio2audio", and older builds avoid the buggy assert. + # Wire up reference vs source audio per ACE-Step pipeline: # - # - For plain text2music: leave both unset (None). - if audio2audio_enable and src_audio_path: - call_kwargs["ref_audio_input"] = src_audio_path - # Important: never set a non-None src_audio_path for this mode. + # - retake / cover / audio2audio: use ref_audio_input (pipeline sets task to + # "audio2audio" and uses ref_latents). Do NOT pass src_audio_path. + # - repaint / extend: use src_audio_path (pipeline uses src_latents for the + # segment to repaint or extend). Do NOT pass ref_audio_input for this path. + # - text2music: leave both unset (None). + if not src_audio_path: + call_kwargs["ref_audio_input"] = None call_kwargs["src_audio_path"] = None - else: + elif task in ("repaint", "extend"): + call_kwargs["src_audio_path"] = src_audio_path call_kwargs["ref_audio_input"] = None + else: + # retake (including cover/audio2audio from UI) + call_kwargs["ref_audio_input"] = src_audio_path call_kwargs["src_audio_path"] = None # Only forward LoRA configuration if an adapter path/name was provided. @@ -1023,6 +1036,7 @@ def generate_track_ace( src_audio_path: str | None = None, lora_name_or_path: str | None = None, lora_weight: float = 0.75, + cancel_check: Optional[Callable[[], bool]] = None, ) -> Dict[str, Any]: """ High-level wrapper for the Flask UI. @@ -1099,6 +1113,11 @@ def generate_track_ace( src_audio_path, ) + # repaint_end < 0 means "end of audio" (see ACE-Step-INFERENCE.md); use target duration. + eff_repaint_end = float(repaint_end) if repaint_end is not None else 0.0 + if eff_repaint_end < 0: + eff_repaint_end = requested_total + out_path = _next_available_output_path(out_dir, basename, ext=".wav") print( @@ -1139,13 +1158,14 @@ def generate_track_ace( oss_steps=oss_steps, task=task, repaint_start=float(repaint_start), - repaint_end=float(repaint_end), + repaint_end=eff_repaint_end, retake_variance=float(retake_variance), src_audio_path=src_audio_path, audio2audio_enable=bool(audio2audio_enable), ref_audio_strength=float(ref_audio_strength), lora_name_or_path=lora_name_or_path, lora_weight=float(lora_weight), + cancel_check=cancel_check, ) _report_progress(0.90, "fades") diff --git a/music_forge_ui.py b/music_forge_ui.py index 674f7f0..0b5192e 100644 --- a/music_forge_ui.py +++ b/music_forge_ui.py @@ -22,7 +22,7 @@ if 'PYTORCH_MPS_HIGH_WATERMARK_RATIO' not in os.environ: os.environ['PYTORCH_MPS_HIGH_WATERMARK_RATIO'] = '0.0' -from flask import Flask, Response, request +from flask import Flask, Response, request, send_from_directory # --------------------------------------------------------------------------- # Early module imports for frozen app compatibility @@ -150,7 +150,7 @@ # Demucs stem splitting uses torch.hub for model download; cache must be writable. # --------------------------------------------------------------------------- import cdmf_paths -from cdmf_paths import APP_VERSION +from cdmf_paths import APP_VERSION, get_output_dir, get_user_data_dir os.environ.setdefault("TORCH_HOME", str(cdmf_paths.get_models_folder())) # --------------------------------------------------------------------------- @@ -361,6 +361,89 @@ def flush(self): # Initialize model status before first page render cdmf_state.init_model_status() +# New UI (React SPA) build output; when present we serve it at / and skip legacy index +if getattr(sys, "frozen", False) and hasattr(sys, "_MEIPASS"): + _UI_DIST = Path(sys._MEIPASS) / "ui" / "dist" +else: + _UI_DIST = Path(__file__).resolve().parent / "ui" / "dist" +_USE_NEW_UI = _UI_DIST.is_dir() + +# --------------------------------------------------------------------------- +# New UI API (ace-step-ui compatibility). Register first so / can be overridden later by new UI SPA. +# --------------------------------------------------------------------------- +try: + from api import ( + auth_bp, + songs_bp, + generate_bp, + playlists_bp, + users_bp, + contact_bp, + reference_tracks_bp, + search_bp, + preferences_bp, + ) + app.register_blueprint(auth_bp, url_prefix="/api/auth") + app.register_blueprint(songs_bp, url_prefix="/api/songs") + app.register_blueprint(generate_bp, url_prefix="/api/generate") + app.register_blueprint(playlists_bp, url_prefix="/api/playlists") + app.register_blueprint(users_bp, url_prefix="/api/users") + app.register_blueprint(contact_bp, url_prefix="/api/contact") + app.register_blueprint(reference_tracks_bp, url_prefix="/api/reference-tracks") + app.register_blueprint(search_bp, url_prefix="/api/search") + app.register_blueprint(preferences_bp, url_prefix="/api/preferences") +except ImportError as e: + print(f"[AceForge] New UI API not available: {e}", flush=True) + + +# --------------------------------------------------------------------------- +# Global error handler: log 500s to app console and return JSON for /api/* +# --------------------------------------------------------------------------- +def _log_exception_and_return_response(error, status_code=500): + """Log full traceback to root logger (so it appears in app console), then return response.""" + import traceback + tb = traceback.format_exc() + logging.getLogger().error("[AceForge] Server error (%s):\n%s", status_code, tb) + try: + path = request.path if request else "" + except Exception: + path = "" + if path.startswith("/api/"): + last_line = [l.strip() for l in tb.strip().split("\n") if l.strip()][-1] if tb else None + return {"error": str(error), "detail": last_line}, status_code + return None # Let Flask use default HTML error page for non-API + + +@app.errorhandler(500) +def handle_500(error): + resp = _log_exception_and_return_response(error, 500) + if resp is not None: + from flask import jsonify + return jsonify(resp[0]), resp[1] + raise error + + +@app.route("/audio/") +def serve_audio(filename: str): + """Serve generated tracks and reference audio. /audio/ -> configured output dir; /audio/refs/ -> references dir.""" + if ".." in filename or filename.startswith("/"): + return Response("Invalid path", status=400, mimetype="text/plain") + if filename.startswith("refs/"): + ref_name = filename[5:].lstrip("/") + if not ref_name: + return Response("Invalid path", status=400, mimetype="text/plain") + directory = get_user_data_dir() / "references" + path = directory / ref_name + if not path.is_file(): + return Response("Not found", status=404, mimetype="text/plain") + return send_from_directory(directory, ref_name) + directory = Path(get_output_dir()) + path = directory / filename + if not path.is_file(): + return Response("Not found", status=404, mimetype="text/plain") + return send_from_directory(directory, filename) + + # Register blueprints (no URL prefixes; routes match original paths) app.register_blueprint(create_tracks_blueprint()) app.register_blueprint(create_models_blueprint()) @@ -371,6 +454,7 @@ def flush(self): html_template=HTML, ui_defaults=UI_DEFAULTS, generate_track_ace=generate_track_ace, + serve_index=not _USE_NEW_UI, ) ) app.register_blueprint(create_lyrics_blueprint()) @@ -398,8 +482,6 @@ def flush(self): # MIDI generation is optional - if basic-pitch library is not installed, skip it print(f"[AceForge] MIDI generation not available: {e}", flush=True) - - # --------------------------------------------------------------------------- # Health + loading routes (simple, kept local) # --------------------------------------------------------------------------- @@ -491,6 +573,45 @@ def shutdown(): return {"status": "error", "message": str(e)}, 500 +# --------------------------------------------------------------------------- +# New UI SPA: serve React app at / when ui/dist exists (registered last for catch-all) +# --------------------------------------------------------------------------- +if _USE_NEW_UI: + _NEW_UI_RESERVED = ( + "api/", + "healthz", + "loading", + "logs", + "shutdown", + "audio/", + "music/", + "tracks", + "progress", + "user_presets", + ) + + def _send_new_ui_index(): + return send_from_directory(str(_UI_DIST), "index.html") + + @app.route("/") + def new_ui_index(): + return _send_new_ui_index() + + @app.route("/assets/") + def new_ui_assets(filename: str): + assets_dir = _UI_DIST / "assets" + if not assets_dir.is_dir(): + return Response("Not found", status=404, mimetype="text/plain") + return send_from_directory(str(assets_dir), filename) + + @app.route("/") + def new_ui_spa_fallback(path: str): + for prefix in _NEW_UI_RESERVED: + if path == prefix or path.startswith(prefix + "/"): + return Response("Not found", status=404, mimetype="text/plain") + return _send_new_ui_index() + + # --------------------------------------------------------------------------- # Entry point # --------------------------------------------------------------------------- @@ -724,8 +845,8 @@ def on_closed(): return # Create window with native macOS styling - webview.create_window( - title="AceForge - AI Music Generation", + window = webview.create_window( + title="AceForge", url=window_url, width=1400, height=900, @@ -739,9 +860,29 @@ def on_closed(): on_closed=on_closed, ) + # Apply zoom from preferences (default 80%); takes effect on next launch if changed in Settings + try: + _cfg = cdmf_paths.load_config() + _z = int(_cfg.get("ui_zoom") or 80) + _z = max(50, min(150, _z)) + except Exception: + _z = 80 + _webview_zoom = f"{_z}%" + _webview_zoom_js = f'document.documentElement.style.zoom = "{_webview_zoom}";' + def _apply_webview_zoom(win): + time.sleep(1.8) + try: + if hasattr(win, 'run_js'): + win.run_js(_webview_zoom_js) + else: + win.evaluate_js(_webview_zoom_js) + print(f"[AceForge] Webview zoom set to {_webview_zoom}", flush=True) + except Exception as e: + print(f"[AceForge] Could not set webview zoom: {e}", flush=True) + # Start the GUI event loop (this blocks until window is closed) - # When window closes, on_closed() will be called automatically - webview.start(debug=False) + # Pass _apply_webview_zoom so it runs in a separate thread after window is ready + webview.start(_apply_webview_zoom, window, debug=False) # This should not be reached (on_closed exits), but just in case shutdown_server() diff --git a/scripts/build_ui.sh b/scripts/build_ui.sh new file mode 100755 index 0000000..0e54840 --- /dev/null +++ b/scripts/build_ui.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash +# Build the new UI (React/Vite) for AceForge. Output: ui/dist/ +# Run from repo root. Requires Bun (https://bun.sh). + +set -e +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" +UI_DIR="$REPO_ROOT/ui" + +if [ ! -f "$UI_DIR/package.json" ]; then + echo "ERROR: ui/package.json not found. Run from repo root after copying UI source into ui/." >&2 + exit 1 +fi + +if ! command -v bun &> /dev/null; then + echo "ERROR: Bun not found. Install from https://bun.sh" >&2 + exit 1 +fi + +cd "$UI_DIR" +bun install --frozen-lockfile 2>/dev/null || bun install +bun run build + +if [ ! -f "$UI_DIR/dist/index.html" ]; then + echo "ERROR: UI build did not produce ui/dist/index.html" >&2 + exit 1 +fi +echo "UI build OK: $UI_DIR/dist/" diff --git a/test_train_from_bundle.sh b/test_train_from_bundle.sh new file mode 100755 index 0000000..9178efb --- /dev/null +++ b/test_train_from_bundle.sh @@ -0,0 +1,72 @@ +#!/usr/bin/env bash +# --------------------------------------------------------------------------- +# Verify that LoRA training works from the frozen app bundle (--train entry point). +# +# Run after building the app (e.g. ./build_local.sh or pyinstaller CDMF.spec). +# Does not require ACE-Step models or a dataset. +# +# Usage: +# ./test_train_from_bundle.sh +# --------------------------------------------------------------------------- + +set -e + +BUNDLED_BIN="${1:-./dist/AceForge.app/Contents/MacOS/AceForge_bin}" + +echo "==================================================" +echo "AceForge - Training from bundle test (--train)" +echo "==================================================" +echo "" + +if [ ! -f "$BUNDLED_BIN" ]; then + echo "✗ Bundled binary not found: $BUNDLED_BIN" + echo " Build the app first, e.g.: ./build_local.sh" + echo " Or pass the binary path: $0 /path/to/AceForge_bin" + exit 1 +fi + +echo "Using binary: $BUNDLED_BIN" +echo "" + +# 1. Run frozen binary with --train --help. Should print trainer help and exit 0 (no GUI). +echo "Step 1: Running bundled app with --train --help..." +OUTPUT=$("$BUNDLED_BIN" --train --help 2>&1) || EXIT=$? +EXIT=${EXIT:-0} + +if [ "$EXIT" -ne 0 ]; then + echo "✗ Binary exited with code $EXIT (expected 0)" + echo "Output:" + echo "$OUTPUT" + exit 1 +fi + +# Trainer help must include these options (same as cdmf_training passes) +for opt in "--dataset_path" "--exp_name" "--epochs" "--max_steps"; do + if echo "$OUTPUT" | grep -q -- "$opt"; then + echo " ✓ Trainer option $opt present" + else + echo "✗ Trainer help missing option: $opt" + echo "Output:" + echo "$OUTPUT" + exit 1 + fi +done + +echo "✓ Bundled app correctly enters trainer mode with --train and shows help" +echo "" + +# 2. Optional: from source, trainer --help works (sanity check) +if command -v python3 &>/dev/null && [ -f "cdmf_trainer.py" ]; then + echo "Step 2: Sanity check - trainer script --help from source..." + if python3 cdmf_trainer.py --help &>/dev/null; then + echo " ✓ python3 cdmf_trainer.py --help OK" + else + echo " (skip: python3 cdmf_trainer.py --help failed or not run)" + fi +fi + +echo "" +echo "==================================================" +echo "✓ Training-from-bundle test PASSED" +echo " The frozen app supports LoRA training via: binary --train [args...]" +echo "==================================================" diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..8c65b16 --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1 @@ +# Tests for AceForge diff --git a/tests/test_new_ui_api.py b/tests/test_new_ui_api.py new file mode 100644 index 0000000..593ee94 --- /dev/null +++ b/tests/test_new_ui_api.py @@ -0,0 +1,317 @@ +""" +Integration tests for the new UI Flask API (ace-step-ui compatibility layer). +Uses the real Flask app and real API implementations; no mocks. +Storage is redirected to a temp directory via cdmf_paths patch so CI/user data is not touched. +""" + +from __future__ import annotations + +import io +import json +import tempfile +from pathlib import Path + +import pytest + + +@pytest.fixture(scope="module") +def temp_user_dir(): + """Isolate test storage under a temp dir; real implementation, isolated data.""" + with tempfile.TemporaryDirectory(prefix="aceforge_test_") as d: + yield Path(d) + + +@pytest.fixture(scope="module") +def app_client(temp_user_dir): + """Create Flask test client with patched user dirs so API uses temp storage.""" + (temp_user_dir / "prefs").mkdir(parents=True, exist_ok=True) + (temp_user_dir / "references").mkdir(parents=True, exist_ok=True) + (temp_user_dir / "generated").mkdir(parents=True, exist_ok=True) + + import cdmf_paths + orig_data = cdmf_paths.get_user_data_dir + orig_pref = cdmf_paths.get_user_preferences_dir + orig_default_out = getattr(cdmf_paths, "DEFAULT_OUT_DIR", None) + orig_track_meta = getattr(cdmf_paths, "TRACK_META_PATH", None) + + def _data(): + return temp_user_dir + + def _pref(): + return temp_user_dir / "prefs" + + cdmf_paths.get_user_data_dir = _data + cdmf_paths.get_user_preferences_dir = _pref + cdmf_paths.DEFAULT_OUT_DIR = str(temp_user_dir / "generated") + cdmf_paths.TRACK_META_PATH = temp_user_dir / "tracks_meta.json" + + try: + from music_forge_ui import app + app.config["TESTING"] = True + with app.test_client() as client: + yield client + finally: + cdmf_paths.get_user_data_dir = orig_data + cdmf_paths.get_user_preferences_dir = orig_pref + if orig_default_out is not None: + cdmf_paths.DEFAULT_OUT_DIR = orig_default_out + if orig_track_meta is not None: + cdmf_paths.TRACK_META_PATH = orig_track_meta + + +# ---- Auth (stub) ---- +def test_auth_auto(app_client): + r = app_client.get("/api/auth/auto") + assert r.status_code == 200 + data = r.get_json() + assert "user" in data + assert data["user"]["id"] == "local" + assert isinstance(data["user"].get("username"), str) and len(data["user"]["username"]) > 0 + assert data.get("token") is None + + +# ---- Songs ---- +def test_songs_list(app_client): + r = app_client.get("/api/songs/") + assert r.status_code == 200 + data = r.get_json() + assert "songs" in data + assert isinstance(data["songs"], list) + + +def test_songs_public(app_client): + r = app_client.get("/api/songs/public") + assert r.status_code == 200 + data = r.get_json() + assert "songs" in data + + +def test_songs_public_featured(app_client): + r = app_client.get("/api/songs/public/featured") + assert r.status_code == 200 + data = r.get_json() + assert "songs" in data + + +# ---- Generate (no ACE model; we only test API contract) ---- +def test_generate_health(app_client): + r = app_client.get("/api/generate/health") + assert r.status_code == 200 + data = r.get_json() + assert data.get("healthy") is True + + +def test_generate_endpoints(app_client): + r = app_client.get("/api/generate/endpoints") + assert r.status_code == 200 + data = r.get_json() + assert "endpoints" in data + assert "provider" in data["endpoints"] + + +def test_generate_history(app_client): + r = app_client.get("/api/generate/history") + assert r.status_code == 200 + data = r.get_json() + assert "jobs" in data + assert isinstance(data["jobs"], list) + + +def test_generate_format_stub(app_client): + r = app_client.post( + "/api/generate/format", + data=json.dumps({"caption": "test", "lyrics": "", "duration": 60}), + content_type="application/json", + ) + assert r.status_code == 200 + data = r.get_json() + assert data.get("success") is True + + +def test_generate_upload_audio(app_client): + r = app_client.post( + "/api/generate/upload-audio", + data={"audio": (io.BytesIO(b"fake-wav-content"), "ref.wav")}, + content_type="multipart/form-data", + ) + assert r.status_code == 200 + data = r.get_json() + assert "url" in data + assert "key" in data + assert "/audio/refs/" in data["url"] + + +def test_generate_create_job_validation(app_client): + r = app_client.post( + "/api/generate/", + data=json.dumps({}), + content_type="application/json", + ) + assert r.status_code == 400 + + +def test_generate_create_job_success(app_client): + r = app_client.post( + "/api/generate/", + data=json.dumps({ + "songDescription": "instrumental background music", + "duration": 30, + "instrumental": True, + }), + content_type="application/json", + ) + assert r.status_code == 200 + data = r.get_json() + assert "jobId" in data + assert data.get("status") in ("queued", "running") + assert "queuePosition" in data + + +def test_generate_create_job_no_trailing_slash(app_client): + """POST /api/generate (no slash) must work — UI sends this; was 405 before fix.""" + r = app_client.post( + "/api/generate", + data=json.dumps({ + "songDescription": "test track", + "duration": 30, + "instrumental": True, + }), + content_type="application/json", + ) + assert r.status_code == 200 + data = r.get_json() + assert "jobId" in data + assert data.get("status") in ("queued", "running") + + +def test_generate_status_not_found(app_client): + r = app_client.get("/api/generate/status/nonexistent-uuid") + assert r.status_code == 404 + + +def test_generate_cancel_not_found(app_client): + r = app_client.post("/api/generate/cancel/nonexistent-uuid") + assert r.status_code == 404 + + +def test_generate_cancel_queued(app_client): + # Create a minimal job then cancel it. If still queued, status becomes cancelled; if already running, cancel is requested. + r = app_client.post( + "/api/generate/", + data=json.dumps({ + "customMode": True, + "style": "test cancel", + "duration": 30, + "instrumental": True, + }), + content_type="application/json", + ) + assert r.status_code == 200 + job_id = r.get_json()["jobId"] + r2 = app_client.post(f"/api/generate/cancel/{job_id}") + assert r2.status_code == 200 + data = r2.get_json() + assert data.get("cancelled") is True + assert data.get("jobId") == job_id + # Queued jobs become cancelled immediately; running jobs get cancel requested and become cancelled after current step + r3 = app_client.get(f"/api/generate/status/{job_id}") + assert r3.status_code == 200 + assert r3.get_json().get("status") in ("cancelled", "running") + + +def test_generate_audio_query_required(app_client): + r = app_client.get("/api/generate/audio") + assert r.status_code == 400 + + +# ---- Playlists ---- +def test_playlists_list(app_client): + r = app_client.get("/api/playlists/") + assert r.status_code == 200 + data = r.get_json() + assert "playlists" in data + assert isinstance(data["playlists"], list) + + +def test_playlists_create(app_client): + r = app_client.post( + "/api/playlists/", + data=json.dumps({"name": "Test", "description": "", "isPublic": True}), + content_type="application/json", + ) + assert r.status_code == 200 + data = r.get_json() + assert "playlist" in data + assert data["playlist"]["name"] == "Test" + pid = data["playlist"]["id"] + r2 = app_client.get(f"/api/playlists/{pid}") + assert r2.status_code == 200 + assert r2.get_json().get("playlist", {}).get("id") == pid + + +def test_playlists_public_featured(app_client): + r = app_client.get("/api/playlists/public/featured") + assert r.status_code == 200 + data = r.get_json() + assert "playlists" in data + + +# ---- Users (stubs) ---- +def test_users_me(app_client): + r = app_client.get("/api/users/me") + assert r.status_code == 200 + data = r.get_json() + assert data["user"]["id"] == "local" + + +def test_users_public_featured(app_client): + r = app_client.get("/api/users/public/featured") + assert r.status_code == 200 + data = r.get_json() + assert "creators" in data + + +def test_users_username(app_client): + r = app_client.get("/api/users/anyname") + assert r.status_code == 200 + data = r.get_json() + assert data["user"]["username"] == "anyname" + + +# ---- Contact (stub) ---- +def test_contact(app_client): + r = app_client.post( + "/api/contact", + data=json.dumps({"message": "test", "email": "test@test.com"}), + content_type="application/json", + ) + assert r.status_code == 200 + data = r.get_json() + assert data.get("success") is True or "message" in data or "id" in data + + +# ---- Reference tracks ---- +def test_reference_tracks_list(app_client): + r = app_client.get("/api/reference-tracks/") + assert r.status_code == 200 + data = r.get_json() + assert "tracks" in data + assert isinstance(data["tracks"], list) + + +# ---- Audio route (app-level) ---- +def test_audio_invalid_path(app_client): + r = app_client.get("/audio/..%2Fetc%2Fpasswd") + assert r.status_code in (400, 404) + + +def test_audio_not_found(app_client): + r = app_client.get("/audio/nonexistent.wav") + assert r.status_code == 404 + + +# ---- Health (existing) ---- +def test_healthz(app_client): + r = app_client.get("/healthz") + assert r.status_code == 200 + assert r.data.strip() == b"ok" diff --git a/ui/App.tsx b/ui/App.tsx new file mode 100644 index 0000000..bbb38b0 --- /dev/null +++ b/ui/App.tsx @@ -0,0 +1,1261 @@ +import React, { useState, useEffect, useRef, useCallback } from 'react'; +import { Sidebar } from './components/Sidebar'; +import { CreatePanel } from './components/CreatePanel'; +import { SongList } from './components/SongList'; +import { RightSidebar } from './components/RightSidebar'; +import { Player } from './components/Player'; +import { LibraryView } from './components/LibraryView'; +import { CreatePlaylistModal, AddToPlaylistModal } from './components/PlaylistModals'; +import { VideoGeneratorModal } from './components/VideoGeneratorModal'; +import { UserProfile } from './components/UserProfile'; +import { SettingsModal } from './components/SettingsModal'; +import { ConsolePanel } from './components/ConsolePanel'; +import { SongProfile } from './components/SongProfile'; +import { Song, GenerationParams, View, Playlist } from './types'; +import { generateApi, songsApi, playlistsApi, getAudioUrl, preferencesApi } from './services/api'; +import { useAuth } from './context/AuthContext'; +import { useResponsive } from './context/ResponsiveContext'; +import { List } from 'lucide-react'; +import { PlaylistDetail } from './components/PlaylistDetail'; +import { Toast, ToastType } from './components/Toast'; +import { SearchPage } from './components/SearchPage'; +import { TrainingPanel } from './components/TrainingPanel'; +import { StemSplittingPanel } from './components/StemSplittingPanel'; +import { VoiceCloningPanel } from './components/VoiceCloningPanel'; +import { MidiPanel } from './components/MidiPanel'; + + +export default function App() { + // Responsive + const { isMobile, isDesktop } = useResponsive(); + + // Auth + const { user, token, isLoading: authLoading } = useAuth(); + // Track multiple concurrent generation jobs + const activeJobsRef = useRef }>>(new Map()); + const [activeJobCount, setActiveJobCount] = useState(0); + + // Theme State + const [theme, setTheme] = useState<'dark' | 'light'>(() => { + const stored = localStorage.getItem('theme'); + if (stored === 'dark' || stored === 'light') return stored; + return window.matchMedia('(prefers-color-scheme: dark)').matches ? 'dark' : 'light'; + }); + + // Navigation State - default to create view + const [currentView, setCurrentView] = useState('create'); + + // Content State + const [songs, setSongs] = useState([]); + const [playlists, setPlaylists] = useState([]); + const [likedSongIds, setLikedSongIds] = useState>(new Set()); + const [playQueue, setPlayQueue] = useState([]); + const [queueIndex, setQueueIndex] = useState(-1); + const [isRefreshingLibrary, setIsRefreshingLibrary] = useState(false); + + // Selection State + const [currentSong, setCurrentSong] = useState(null); + const [selectedSong, setSelectedSong] = useState(null); + const [selectedPlaylist, setSelectedPlaylist] = useState(null); + + // Player State + const [isPlaying, setIsPlaying] = useState(false); + const [currentTime, setCurrentTime] = useState(0); + const [duration, setDuration] = useState(0); + const [volume, setVolume] = useState(0.8); + const [isShuffle, setIsShuffle] = useState(false); + const [repeatMode, setRepeatMode] = useState<'none' | 'all' | 'one'>('all'); + + // UI State + const [isGenerating, setIsGenerating] = useState(false); + const [showRightSidebar, setShowRightSidebar] = useState(true); + + // Mobile UI Toggle + const [mobileShowList, setMobileShowList] = useState(false); + + // Modals + const [isCreatePlaylistModalOpen, setIsCreatePlaylistModalOpen] = useState(false); + const [isAddToPlaylistModalOpen, setIsAddToPlaylistModalOpen] = useState(false); + const [songToAddToPlaylist, setSongToAddToPlaylist] = useState(null); + + // Video Modal + const [isVideoModalOpen, setIsVideoModalOpen] = useState(false); + const [songForVideo, setSongForVideo] = useState(null); + + // Settings Modal + const [showSettingsModal, setShowSettingsModal] = useState(false); + // Console (logs / errors) + const [showConsole, setShowConsole] = useState(false); + + // Profile View + const [viewingUsername, setViewingUsername] = useState(null); + + // Song View + const [viewingSongId, setViewingSongId] = useState(null); + + // Playlist View + const [viewingPlaylistId, setViewingPlaylistId] = useState(null); + + // Reuse State + const [reuseData, setReuseData] = useState<{ song: Song, timestamp: number } | null>(null); + + const audioRef = useRef(null); + const pendingSeekRef = useRef(null); + const playNextRef = useRef<() => void>(() => {}); + + // Mobile Details Modal State + const [showMobileDetails, setShowMobileDetails] = useState(false); + + // Toast State + const [toast, setToast] = useState<{ message: string; type: ToastType; isVisible: boolean }>({ + message: '', + type: 'success', + isVisible: false, + }); + + const showToast = (message: string, type: ToastType = 'success') => { + setToast({ message, type, isVisible: true }); + }; + + const closeToast = () => { + setToast(prev => ({ ...prev, isVisible: false })); + }; + + // Load playlists (local app: always "logged in") + useEffect(() => { + playlistsApi.getMyPlaylists(token ?? undefined) + .then(res => setPlaylists(res.playlists)) + .catch(err => console.error('Failed to load playlists', err)); + }, [token]); + + // Cleanup active jobs on unmount + useEffect(() => { + return () => { + // Clear all polling intervals when component unmounts + activeJobsRef.current.forEach(({ pollInterval }) => { + clearInterval(pollInterval); + }); + activeJobsRef.current.clear(); + }; + }, []); + + const handleShowDetails = (song: Song) => { + setSelectedSong(song); + setShowMobileDetails(true); + }; + + // Reuse Handler + const handleReuse = (song: Song) => { + setReuseData({ song, timestamp: Date.now() }); + setCurrentView('create'); + setMobileShowList(false); + }; + + // Song Update Handler + const handleSongUpdate = (updatedSong: Song) => { + setSongs(prev => prev.map(s => s.id === updatedSong.id ? updatedSong : s)); + if (selectedSong?.id === updatedSong.id) { + setSelectedSong(updatedSong); + } + }; + + // Navigate to Profile Handler + const handleNavigateToProfile = (username: string) => { + setViewingUsername(username); + setCurrentView('profile'); + window.history.pushState({}, '', `/@${username}`); + }; + + // Back from Profile Handler + const handleBackFromProfile = () => { + setViewingUsername(null); + setCurrentView('create'); + window.history.pushState({}, '', '/'); + }; + + // Navigate to Song Handler + const handleNavigateToSong = (songId: string) => { + setViewingSongId(songId); + setCurrentView('song'); + window.history.pushState({}, '', `/song/${songId}`); + }; + + // Back from Song Handler + const handleBackFromSong = () => { + setViewingSongId(null); + setCurrentView('create'); + window.history.pushState({}, '', '/'); + }; + + // Theme Effect + useEffect(() => { + localStorage.setItem('theme', theme); + if (theme === 'dark') { + document.documentElement.classList.add('dark'); + } else { + document.documentElement.classList.remove('dark'); + } + }, [theme]); + + const toggleTheme = () => { + setTheme(prev => prev === 'dark' ? 'light' : 'dark'); + }; + + // URL Routing Effect + useEffect(() => { + const handleUrlChange = () => { + const path = window.location.pathname; + const params = new URLSearchParams(window.location.search); + + // Handle ?song= query parameter + const songParam = params.get('song'); + if (songParam) { + setViewingSongId(songParam); + setCurrentView('song'); + window.history.replaceState({}, '', `/song/${songParam}`); + return; + } + + if (path === '/create' || path === '/') { + setCurrentView('create'); + setMobileShowList(false); + } else if (path === '/library') { + setCurrentView('library'); + } else if (path.startsWith('/@')) { + const username = path.substring(2); + if (username) { + setViewingUsername(username); + setCurrentView('profile'); + } + } else if (path.startsWith('/song/')) { + const songId = path.substring(6); + if (songId) { + setViewingSongId(songId); + setCurrentView('song'); + } + } else if (path.startsWith('/playlist/')) { + const playlistId = path.substring(10); + if (playlistId) { + setViewingPlaylistId(playlistId); + setCurrentView('playlist'); + } + } else if (path === '/search') { + setCurrentView('search'); + } else if (path === '/training') { + setCurrentView('training'); + } else if (path === '/stem-splitting') { + setCurrentView('stem-splitting'); + } else if (path === '/voice-cloning') { + setCurrentView('voice-cloning'); + } else if (path === '/midi') { + setCurrentView('midi'); + } + }; + + handleUrlChange(); + + window.addEventListener('popstate', handleUrlChange); + return () => window.removeEventListener('popstate', handleUrlChange); + }, []); + + // Load Songs Effect (local app: always "logged in") + useEffect(() => { + const loadSongs = async () => { + try { + const t = token ?? ''; + const [mySongsRes, likedSongsRes] = await Promise.all([ + songsApi.getMySongs(t), + songsApi.getLikedSongs(t) + ]); + + const mapSong = (s: any): Song => ({ + id: s.id, + title: s.title, + lyrics: s.lyrics, + style: s.style, + coverUrl: `https://picsum.photos/seed/${s.id}/400/400`, + duration: s.duration && s.duration > 0 ? `${Math.floor(s.duration / 60)}:${String(Math.floor(s.duration % 60)).padStart(2, '0')}` : '0:00', + createdAt: new Date(s.created_at || s.createdAt), + tags: s.tags || [], + audioUrl: getAudioUrl(s.audio_url, s.id), + isPublic: s.is_public, + likeCount: s.like_count || 0, + viewCount: s.view_count || 0, + userId: s.user_id, + creator: s.creator, + }); + + const mySongs = mySongsRes.songs.map(mapSong); + const likedSongs = likedSongsRes.songs.map(mapSong); + + const songsMap = new Map(); + [...mySongs, ...likedSongs].forEach(s => songsMap.set(s.id, s)); + + // Preserve any generating songs (temp songs) + setSongs(prev => { + const generatingSongs = prev.filter(s => s.isGenerating); + const loadedSongs = Array.from(songsMap.values()); + return [...generatingSongs, ...loadedSongs]; + }); + + const likedIds = new Set(likedSongs.map(s => s.id)); + setLikedSongIds(likedIds); + + } catch (error) { + console.error('Failed to load songs:', error); + } + }; + + loadSongs(); + }, [token]); + + // Player Logic + const getActiveQueue = (song?: Song) => { + if (playQueue.length > 0) return playQueue; + if (song && songs.some(s => s.id === song.id)) return songs; + return songs; + }; + + const playNext = useCallback(() => { + if (!currentSong) return; + const queue = getActiveQueue(currentSong); + if (queue.length === 0) return; + + const currentIndex = queueIndex >= 0 && queue[queueIndex]?.id === currentSong.id + ? queueIndex + : queue.findIndex(s => s.id === currentSong.id); + if (currentIndex === -1) return; + + if (repeatMode === 'one') { + if (audioRef.current) { + audioRef.current.currentTime = 0; + audioRef.current.play(); + } + return; + } + + let nextIndex; + if (isShuffle) { + do { + nextIndex = Math.floor(Math.random() * queue.length); + } while (queue.length > 1 && nextIndex === currentIndex); + } else { + nextIndex = (currentIndex + 1) % queue.length; + } + + const nextSong = queue[nextIndex]; + setQueueIndex(nextIndex); + setCurrentSong(nextSong); + setIsPlaying(true); + }, [currentSong, queueIndex, isShuffle, repeatMode, playQueue, songs]); + + const playPrevious = useCallback(() => { + if (!currentSong) return; + const queue = getActiveQueue(currentSong); + if (queue.length === 0) return; + + const currentIndex = queueIndex >= 0 && queue[queueIndex]?.id === currentSong.id + ? queueIndex + : queue.findIndex(s => s.id === currentSong.id); + if (currentIndex === -1) return; + + if (currentTime > 3) { + if (audioRef.current) audioRef.current.currentTime = 0; + return; + } + + let prevIndex = (currentIndex - 1 + queue.length) % queue.length; + if (isShuffle) { + prevIndex = Math.floor(Math.random() * queue.length); + } + + const prevSong = queue[prevIndex]; + setQueueIndex(prevIndex); + setCurrentSong(prevSong); + setIsPlaying(true); + }, [currentSong, queueIndex, currentTime, isShuffle, playQueue, songs]); + + useEffect(() => { + playNextRef.current = playNext; + }, [playNext]); + + // Audio Setup + useEffect(() => { + audioRef.current = new Audio(); + audioRef.current.crossOrigin = "anonymous"; + const audio = audioRef.current; + audio.volume = volume; + + const onTimeUpdate = () => setCurrentTime(audio.currentTime); + const applyPendingSeek = () => { + if (pendingSeekRef.current === null) return; + if (audio.seekable.length === 0) return; + const target = pendingSeekRef.current; + const safeTarget = Number.isFinite(audio.duration) + ? Math.min(Math.max(target, 0), audio.duration) + : Math.max(target, 0); + audio.currentTime = safeTarget; + setCurrentTime(safeTarget); + pendingSeekRef.current = null; + }; + + const onLoadedMetadata = () => { + setDuration(audio.duration); + applyPendingSeek(); + }; + + const onCanPlay = () => { + applyPendingSeek(); + }; + + const onProgress = () => { + applyPendingSeek(); + }; + + const onEnded = () => { + playNextRef.current(); + }; + + const onError = (e: Event) => { + if (audio.error && audio.error.code !== 1) { + console.error("Audio playback error:", audio.error); + if (audio.error.code === 4) { + showToast('This song is no longer available.', 'error'); + } else { + showToast('Unable to play this song.', 'error'); + } + } + setIsPlaying(false); + }; + + audio.addEventListener('timeupdate', onTimeUpdate); + audio.addEventListener('loadedmetadata', onLoadedMetadata); + audio.addEventListener('canplay', onCanPlay); + audio.addEventListener('progress', onProgress); + audio.addEventListener('ended', onEnded); + audio.addEventListener('error', onError); + + return () => { + audio.pause(); + audio.removeEventListener('timeupdate', onTimeUpdate); + audio.removeEventListener('loadedmetadata', onLoadedMetadata); + audio.removeEventListener('canplay', onCanPlay); + audio.removeEventListener('progress', onProgress); + audio.removeEventListener('ended', onEnded); + audio.removeEventListener('error', onError); + }; + }, []); + + // Handle Playback State + useEffect(() => { + const audio = audioRef.current; + if (!audio || !currentSong?.audioUrl) return; + + const playAudio = async () => { + try { + await audio.play(); + } catch (err) { + if (err instanceof Error && err.name !== 'AbortError') { + console.error("Playback failed:", err); + if (err.name === 'NotSupportedError') { + showToast('This song is no longer available.', 'error'); + } + setIsPlaying(false); + } + } + }; + + if (audio.src !== currentSong.audioUrl) { + audio.src = currentSong.audioUrl; + audio.load(); + if (isPlaying) playAudio(); + } else { + if (isPlaying) playAudio(); + else audio.pause(); + } + }, [currentSong, isPlaying]); + + // Handle Volume + useEffect(() => { + if (audioRef.current) { + audioRef.current.volume = volume; + } + }, [volume]); + + // Helper to cleanup a job and check if all jobs are done + const cleanupJob = useCallback((jobId: string, tempId: string) => { + const jobData = activeJobsRef.current.get(jobId); + if (jobData) { + clearInterval(jobData.pollInterval); + activeJobsRef.current.delete(jobId); + } + + // Remove temp song + setSongs(prev => prev.filter(s => s.id !== tempId)); + + // Update active job count + setActiveJobCount(activeJobsRef.current.size); + + // If no more active jobs, set isGenerating to false + if (activeJobsRef.current.size === 0) { + setIsGenerating(false); + } + }, []); + + // Refresh songs list (called when any job completes successfully) + const refreshSongsList = useCallback(async () => { + try { + const response = await songsApi.getMySongs(token ?? ''); + const loadedSongs: Song[] = response.songs.map(s => ({ + id: s.id, + title: s.title, + lyrics: s.lyrics, + style: s.style, + coverUrl: `https://picsum.photos/seed/${s.id}/400/400`, + duration: s.duration && s.duration > 0 ? `${Math.floor(s.duration / 60)}:${String(Math.floor(s.duration % 60)).padStart(2, '0')}` : '0:00', + createdAt: new Date(s.created_at), + tags: s.tags || [], + audioUrl: getAudioUrl(s.audio_url, s.id), + isPublic: s.is_public, + likeCount: s.like_count || 0, + viewCount: s.view_count || 0, + userId: s.user_id, + creator: s.creator, + })); + + // Preserve any generating songs that aren't in the loaded list + setSongs(prev => { + const generatingSongs = prev.filter(s => s.isGenerating); + const mergedSongs = [...generatingSongs]; + for (const song of loadedSongs) { + if (!mergedSongs.some(s => s.id === song.id)) { + mergedSongs.push(song); + } + } + // Sort by creation date, newest first + return mergedSongs.sort((a, b) => b.createdAt.getTime() - a.createdAt.getTime()); + }); + } catch (error) { + console.error('Failed to refresh songs:', error); + } + }, [token]); + + const handleRefreshLibrary = useCallback(async () => { + setIsRefreshingLibrary(true); + try { + await refreshSongsList(); + } finally { + setIsRefreshingLibrary(false); + } + }, [refreshSongsList]); + + // Refresh library when navigating to Library (picks up API-completed generations) + useEffect(() => { + if (currentView === 'library') { + refreshSongsList(); + } + }, [currentView, refreshSongsList]); + + // Periodic refresh when on Library or Create so API-completed tracks show up without leaving the view + const LIBRARY_REFRESH_MS = 20_000; + useEffect(() => { + if (currentView !== 'library' && currentView !== 'create') return; + const id = setInterval(refreshSongsList, LIBRARY_REFRESH_MS); + return () => clearInterval(id); + }, [currentView, refreshSongsList]); + + // Refresh library when tab/window gains focus (e.g. user returns after an API generation in another terminal) + useEffect(() => { + const onVisible = () => { + if (document.visibilityState === 'visible') refreshSongsList(); + }; + document.addEventListener('visibilitychange', onVisible); + return () => document.removeEventListener('visibilitychange', onVisible); + }, [refreshSongsList]); + + // Handlers (local app: always "logged in", no auth checks) + const handleGenerate = async (params: GenerationParams) => { + console.log('[Create] handleGenerate called', { params: { customMode: params.customMode, title: params.title } }); + + setIsGenerating(true); + setCurrentView('create'); + setMobileShowList(false); + + // Create unique temp ID for this job + const tempId = `temp_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`; + const tempSong: Song = { + id: tempId, + title: params.title || 'Generating...', + lyrics: '', + style: params.style, + coverUrl: 'https://picsum.photos/200/200?blur=10', + duration: '--:--', + createdAt: new Date(), + isGenerating: true, + tags: params.customMode ? ['custom'] : ['simple'], + isPublic: true + }; + + setSongs(prev => [tempSong, ...prev]); + setSelectedSong(tempSong); + setShowRightSidebar(true); + + try { + const prefs = await preferencesApi.get(); + const genParams = { + customMode: params.customMode, + songDescription: params.songDescription, + lyrics: params.lyrics, + style: params.style, + title: params.title, + instrumental: params.instrumental, + vocalLanguage: params.vocalLanguage, + duration: params.duration, + bpm: params.bpm, + keyScale: params.keyScale, + timeSignature: params.timeSignature, + inferenceSteps: params.inferenceSteps, + guidanceScale: params.guidanceScale, + batchSize: params.batchSize, + randomSeed: params.randomSeed, + seed: params.seed, + thinking: params.thinking, + audioFormat: params.audioFormat, + inferMethod: params.inferMethod, + shift: params.shift, + lmTemperature: params.lmTemperature, + lmCfgScale: params.lmCfgScale, + lmTopK: params.lmTopK, + lmTopP: params.lmTopP, + lmNegativePrompt: params.lmNegativePrompt, + referenceAudioUrl: params.referenceAudioUrl, + sourceAudioUrl: params.sourceAudioUrl, + audioCodes: params.audioCodes, + repaintingStart: params.repaintingStart, + repaintingEnd: params.repaintingEnd, + instruction: params.instruction, + audioCoverStrength: params.audioCoverStrength, + taskType: params.taskType, + useAdg: params.useAdg, + cfgIntervalStart: params.cfgIntervalStart, + cfgIntervalEnd: params.cfgIntervalEnd, + customTimesteps: params.customTimesteps, + useCotMetas: params.useCotMetas, + useCotCaption: params.useCotCaption, + useCotLanguage: params.useCotLanguage, + autogen: params.autogen, + constrainedDecodingDebug: params.constrainedDecodingDebug, + allowLmBatch: params.allowLmBatch, + getScores: params.getScores, + getLrc: params.getLrc, + scoreScale: params.scoreScale, + lmBatchChunkSize: params.lmBatchChunkSize, + trackName: params.trackName, + completeTrackClasses: params.completeTrackClasses, + isFormatCaption: params.isFormatCaption, + ...(prefs.output_dir ? { outputDir: prefs.output_dir } : {}), + }; + console.log('[Create] Calling POST /api/generate'); + const job = await generateApi.startGeneration(genParams, token ?? ''); + + // Poll for completion - each job has its own polling interval + const pollInterval = setInterval(async () => { + try { + const status = await generateApi.getStatus(job.jobId, token ?? ''); + + // Update queue position on the temp song + setSongs(prev => prev.map(s => { + if (s.id === tempId) { + return { + ...s, + queuePosition: status.status === 'queued' ? status.queuePosition : undefined, + }; + } + return s; + })); + + if (status.status === 'succeeded' && status.result) { + cleanupJob(job.jobId, tempId); + await refreshSongsList(); + + if (window.innerWidth < 768) { + setMobileShowList(true); + } + } else if (status.status === 'failed') { + cleanupJob(job.jobId, tempId); + console.error(`Job ${job.jobId} failed:`, status.error); + showToast(`Generation failed: ${status.error || 'Unknown error'}`, 'error'); + } + } catch (pollError) { + console.error(`Polling error for job ${job.jobId}:`, pollError); + cleanupJob(job.jobId, tempId); + } + }, 2000); + + // Track this job + activeJobsRef.current.set(job.jobId, { tempId, pollInterval }); + setActiveJobCount(activeJobsRef.current.size); + + // Timeout after 10 minutes + setTimeout(() => { + if (activeJobsRef.current.has(job.jobId)) { + console.warn(`Job ${job.jobId} timed out`); + cleanupJob(job.jobId, tempId); + showToast('Generation timed out', 'error'); + } + }, 600000); + + } catch (e) { + console.error('Generation error:', e); + setSongs(prev => prev.filter(s => s.id !== tempId)); + + if (activeJobsRef.current.size === 0) { + setIsGenerating(false); + } + const msg = e instanceof Error ? e.message : 'Generation failed. Please try again.'; + showToast(msg, 'error'); + } + }; + + const togglePlay = () => { + if (!currentSong) return; + setIsPlaying(!isPlaying); + }; + + const playSong = (song: Song, list?: Song[]) => { + const nextQueue = list && list.length > 0 + ? list + : (playQueue.length > 0 && playQueue.some(s => s.id === song.id)) + ? playQueue + : (songs.some(s => s.id === song.id) ? songs : [song]); + const nextIndex = nextQueue.findIndex(s => s.id === song.id); + setPlayQueue(nextQueue); + setQueueIndex(nextIndex); + + if (currentSong?.id !== song.id) { + const updatedSong = { ...song, viewCount: (song.viewCount || 0) + 1 }; + setCurrentSong(updatedSong); + setSelectedSong(updatedSong); + setIsPlaying(true); + setSongs(prev => prev.map(s => s.id === song.id ? updatedSong : s)); + songsApi.trackPlay(song.id, token ?? '').catch(err => console.error('Failed to track play:', err)); + } else { + togglePlay(); + } + if (currentSong?.id === song.id) { + setSelectedSong(song); + } + setShowRightSidebar(true); + }; + + const handleSeek = (time: number) => { + const audio = audioRef.current; + if (!audio) return; + if (Number.isNaN(audio.duration) || audio.readyState < 1 || audio.seekable.length === 0) { + pendingSeekRef.current = time; + return; + } + audio.currentTime = time; + setCurrentTime(time); + }; + + const toggleLike = async (songId: string) => { + const isLiked = likedSongIds.has(songId); + + // Optimistic update + setLikedSongIds(prev => { + const next = new Set(prev); + if (isLiked) next.delete(songId); + else next.add(songId); + return next; + }); + + setSongs(prev => prev.map(s => { + if (s.id === songId) { + const newCount = (s.likeCount || 0) + (isLiked ? -1 : 1); + return { ...s, likeCount: Math.max(0, newCount) }; + } + return s; + })); + + if (selectedSong?.id === songId) { + setSelectedSong(prev => prev ? { + ...prev, + likeCount: Math.max(0, (prev.likeCount || 0) + (isLiked ? -1 : 1)) + } : null); + } + + // Persist to database + try { + await songsApi.toggleLike(songId, token ?? ''); + } catch (error) { + console.error('Failed to toggle like:', error); + // Revert on error + setLikedSongIds(prev => { + const next = new Set(prev); + if (isLiked) next.add(songId); + else next.delete(songId); + return next; + }); + } + }; + + const handleDeleteSong = async (song: Song) => { + // Show confirmation dialog + const confirmed = window.confirm( + `Are you sure you want to delete "${song.title}"? This action cannot be undone.` + ); + + if (!confirmed) return; + + try { + // Call API to delete song + await songsApi.deleteSong(song.id, token ?? ''); + + // Remove from songs list + setSongs(prev => prev.filter(s => s.id !== song.id)); + + // Remove from liked songs if it was liked + setLikedSongIds(prev => { + const next = new Set(prev); + next.delete(song.id); + return next; + }); + + // Handle if deleted song is currently selected + if (selectedSong?.id === song.id) { + setSelectedSong(null); + } + + // Handle if deleted song is currently playing + if (currentSong?.id === song.id) { + setCurrentSong(null); + setIsPlaying(false); + if (audioRef.current) { + audioRef.current.pause(); + audioRef.current.src = ''; + } + } + + // Remove from play queue if present + setPlayQueue(prev => prev.filter(s => s.id !== song.id)); + + showToast('Song deleted successfully'); + } catch (error) { + console.error('Failed to delete song:', error); + showToast('Failed to delete song', 'error'); + } + }; + + const createPlaylist = async (name: string, description: string) => { + try { + const res = await playlistsApi.create(name, description, true, token ?? ''); + setPlaylists(prev => [res.playlist, ...prev]); + + if (songToAddToPlaylist) { + await playlistsApi.addSong(res.playlist.id, songToAddToPlaylist.id, token ?? ''); + setSongToAddToPlaylist(null); + playlistsApi.getMyPlaylists(token ?? undefined).then(r => setPlaylists(r.playlists)); + } + showToast('Playlist created successfully!'); + } catch (error) { + console.error('Create playlist error:', error); + showToast('Failed to create playlist', 'error'); + } + }; + + const openAddToPlaylistModal = (song: Song) => { + setSongToAddToPlaylist(song); + setIsAddToPlaylistModalOpen(true); + }; + + const addSongToPlaylist = async (playlistId: string) => { + if (!songToAddToPlaylist) return; + try { + await playlistsApi.addSong(playlistId, songToAddToPlaylist.id, token ?? ''); + setSongToAddToPlaylist(null); + showToast('Song added to playlist'); + playlistsApi.getMyPlaylists(token ?? undefined).then(r => setPlaylists(r.playlists)); + } catch (error) { + console.error('Add song error:', error); + showToast('Failed to add song to playlist', 'error'); + } + }; + + const handleNavigateToPlaylist = (playlistId: string) => { + setViewingPlaylistId(playlistId); + setCurrentView('playlist'); + window.history.pushState({}, '', `/playlist/${playlistId}`); + }; + + const handleBackFromPlaylist = () => { + setViewingPlaylistId(null); + setCurrentView('library'); + window.history.pushState({}, '', '/library'); + }; + + const openVideoGenerator = (song: Song) => { + if (isPlaying) { + setIsPlaying(false); + if (audioRef.current) audioRef.current.pause(); + } + setSongForVideo(song); + setIsVideoModalOpen(true); + }; + + // Render Layout Logic + const renderContent = () => { + switch (currentView) { + case 'library': + return ( + likedSongIds.has(s.id))} + playlists={playlists} + onPlaySong={playSong} + onCreatePlaylist={() => { + setSongToAddToPlaylist(null); + setIsCreatePlaylistModalOpen(true); + }} + onSelectPlaylist={(p) => handleNavigateToPlaylist(p.id)} + onRefreshLibrary={handleRefreshLibrary} + isRefreshingLibrary={isRefreshingLibrary} + /> + ); + + case 'profile': + if (!viewingUsername) return null; + return ( + + ); + + case 'playlist': + if (!viewingPlaylistId) return null; + return ( + { + setSelectedSong(s); + setShowRightSidebar(true); + }} + onNavigateToProfile={handleNavigateToProfile} + /> + ); + + case 'song': + if (!viewingSongId) return null; + return ( + + ); + + case 'search': + return ( + + ); + + case 'training': + case 'stem-splitting': + case 'voice-cloning': + case 'midi': + return ( +
+
+ {currentView === 'training' && } + {currentView === 'stem-splitting' && } + {currentView === 'voice-cloning' && } + {currentView === 'midi' && } +
+
+ { + setSelectedSong(s); + setShowRightSidebar(true); + }} + onToggleLike={toggleLike} + onAddToPlaylist={openAddToPlaylistModal} + onOpenVideo={openVideoGenerator} + onShowDetails={handleShowDetails} + onNavigateToProfile={handleNavigateToProfile} + onReusePrompt={handleReuse} + onDelete={handleDeleteSong} + /> +
+ {showRightSidebar && ( +
+ setShowRightSidebar(false)} + onOpenVideo={() => selectedSong && openVideoGenerator(selectedSong)} + onReuse={handleReuse} + onSongUpdate={handleSongUpdate} + onNavigateToProfile={handleNavigateToProfile} + onNavigateToSong={handleNavigateToSong} + isLiked={selectedSong ? likedSongIds.has(selectedSong.id) : false} + onToggleLike={toggleLike} + onPlay={playSong} + isPlaying={isPlaying} + currentSong={currentSong} + onDelete={handleDeleteSong} + /> +
+ )} +
+ +
+
+ ); + + case 'create': + default: + return ( +
+ {/* Create Panel */} +
+ +
+ + {/* Song List */} +
+ { + setSelectedSong(s); + setShowRightSidebar(true); + }} + onToggleLike={toggleLike} + onAddToPlaylist={openAddToPlaylistModal} + onOpenVideo={openVideoGenerator} + onShowDetails={handleShowDetails} + onNavigateToProfile={handleNavigateToProfile} + onReusePrompt={handleReuse} + onDelete={handleDeleteSong} + /> +
+ + {/* Right Sidebar */} + {showRightSidebar && ( +
+ setShowRightSidebar(false)} + onOpenVideo={() => selectedSong && openVideoGenerator(selectedSong)} + onReuse={handleReuse} + onSongUpdate={handleSongUpdate} + onNavigateToProfile={handleNavigateToProfile} + onNavigateToSong={handleNavigateToSong} + isLiked={selectedSong ? likedSongIds.has(selectedSong.id) : false} + onToggleLike={toggleLike} + onPlay={playSong} + isPlaying={isPlaying} + currentSong={currentSong} + onDelete={handleDeleteSong} + /> +
+ )} + + {/* Mobile Toggle Button */} +
+ +
+
+ ); + } + }; + + return ( +
+
+ { + setCurrentView(v); + if (v === 'create') { + setMobileShowList(false); + window.history.pushState({}, '', '/'); + } else if (v === 'library') { + window.history.pushState({}, '', '/library'); + } else if (v === 'search') { + window.history.pushState({}, '', '/search'); + } else if (v === 'training') { + window.history.pushState({}, '', '/training'); + } else if (v === 'stem-splitting') { + window.history.pushState({}, '', '/stem-splitting'); + } else if (v === 'voice-cloning') { + window.history.pushState({}, '', '/voice-cloning'); + } else if (v === 'midi') { + window.history.pushState({}, '', '/midi'); + } + }} + theme={theme} + onToggleTheme={toggleTheme} + user={user} + onOpenConsole={() => setShowConsole(true)} + onOpenSettings={() => setShowSettingsModal(true)} + /> + +
+ {renderContent()} +
+
+ + setIsShuffle(!isShuffle)} + repeatMode={repeatMode} + onToggleRepeat={() => setRepeatMode(prev => prev === 'none' ? 'all' : prev === 'all' ? 'one' : 'none')} + isLiked={currentSong ? likedSongIds.has(currentSong.id) : false} + onToggleLike={() => currentSong && toggleLike(currentSong.id)} + onNavigateToSong={handleNavigateToSong} + onOpenVideo={() => currentSong && openVideoGenerator(currentSong)} + onReusePrompt={() => currentSong && handleReuse(currentSong)} + onAddToPlaylist={() => currentSong && openAddToPlaylistModal(currentSong)} + onDelete={() => currentSong && handleDeleteSong(currentSong)} + /> + + setIsCreatePlaylistModalOpen(false)} + onCreate={createPlaylist} + /> + setIsAddToPlaylistModalOpen(false)} + playlists={playlists} + onSelect={addSongToPlaylist} + onCreateNew={() => { + setIsAddToPlaylistModalOpen(false); + setIsCreatePlaylistModalOpen(true); + }} + /> + + setIsVideoModalOpen(false)} + song={songForVideo} + /> + setShowConsole(false)} + /> + setShowSettingsModal(false)} + theme={theme} + onToggleTheme={toggleTheme} + onNavigateToProfile={handleNavigateToProfile} + /> + + {/* Mobile Details Modal */} + {showMobileDetails && selectedSong && ( +
+
setShowMobileDetails(false)} + /> +
+ setShowMobileDetails(false)} + onOpenVideo={() => selectedSong && openVideoGenerator(selectedSong)} + onReuse={handleReuse} + onSongUpdate={handleSongUpdate} + onNavigateToProfile={handleNavigateToProfile} + onNavigateToSong={handleNavigateToSong} + isLiked={selectedSong ? likedSongIds.has(selectedSong.id) : false} + onToggleLike={toggleLike} + onPlay={playSong} + isPlaying={isPlaying} + currentSong={currentSong} + onDelete={handleDeleteSong} + /> +
+
+ )} +
+ ); +} diff --git a/ui/README.md b/ui/README.md new file mode 100644 index 0000000..f139f6b --- /dev/null +++ b/ui/README.md @@ -0,0 +1,6 @@ +# AceForge New UI + +Ported from [fspecii/ace-step-ui](https://github.com/fspecii/ace-step-ui). Maintained in-tree; **no external dependency** on that repository. + +- **Dev:** `npm run dev` — Vite dev server (port 3000) proxies `/api` and `/audio` to Flask (5056). +- **Build:** `npm run build` — Output in `dist/`; served by Flask at `/` in production. diff --git a/ui/bun.lock b/ui/bun.lock new file mode 100644 index 0000000..38c2b59 --- /dev/null +++ b/ui/bun.lock @@ -0,0 +1,422 @@ +{ + "lockfileVersion": 1, + "configVersion": 0, + "workspaces": { + "": { + "name": "ace-step-ui", + "dependencies": { + "@ffmpeg/ffmpeg": "^0.12.15", + "@ffmpeg/util": "^0.12.2", + "@google/genai": "^1.38.0", + "lucide-react": "^0.563.0", + "react": "^19.2.4", + "react-dom": "^19.2.4", + }, + "devDependencies": { + "@types/node": "^22.14.0", + "@vitejs/plugin-react": "^5.0.0", + "typescript": "~5.8.2", + "vite": "^6.2.0", + }, + }, + }, + "packages": { + "@babel/code-frame": ["@babel/code-frame@7.29.0", "", { "dependencies": { "@babel/helper-validator-identifier": "^7.28.5", "js-tokens": "^4.0.0", "picocolors": "^1.1.1" } }, "sha512-9NhCeYjq9+3uxgdtp20LSiJXJvN0FeCtNGpJxuMFZ1Kv3cWUNb6DOhJwUvcVCzKGR66cw4njwM6hrJLqgOwbcw=="], + + "@babel/compat-data": ["@babel/compat-data@7.29.0", "", {}, "sha512-T1NCJqT/j9+cn8fvkt7jtwbLBfLC/1y1c7NtCeXFRgzGTsafi68MRv8yzkYSapBnFA6L3U2VSc02ciDzoAJhJg=="], + + "@babel/core": ["@babel/core@7.29.0", "", { "dependencies": { "@babel/code-frame": "^7.29.0", "@babel/generator": "^7.29.0", "@babel/helper-compilation-targets": "^7.28.6", "@babel/helper-module-transforms": "^7.28.6", "@babel/helpers": "^7.28.6", "@babel/parser": "^7.29.0", "@babel/template": "^7.28.6", "@babel/traverse": "^7.29.0", "@babel/types": "^7.29.0", "@jridgewell/remapping": "^2.3.5", "convert-source-map": "^2.0.0", "debug": "^4.1.0", "gensync": "^1.0.0-beta.2", "json5": "^2.2.3", "semver": "^6.3.1" } }, "sha512-CGOfOJqWjg2qW/Mb6zNsDm+u5vFQ8DxXfbM09z69p5Z6+mE1ikP2jUXw+j42Pf1XTYED2Rni5f95npYeuwMDQA=="], + + "@babel/generator": ["@babel/generator@7.29.0", "", { "dependencies": { "@babel/parser": "^7.29.0", "@babel/types": "^7.29.0", "@jridgewell/gen-mapping": "^0.3.12", "@jridgewell/trace-mapping": "^0.3.28", "jsesc": "^3.0.2" } }, "sha512-vSH118/wwM/pLR38g/Sgk05sNtro6TlTJKuiMXDaZqPUfjTFcudpCOt00IhOfj+1BFAX+UFAlzCU+6WXr3GLFQ=="], + + "@babel/helper-compilation-targets": ["@babel/helper-compilation-targets@7.28.6", "", { "dependencies": { "@babel/compat-data": "^7.28.6", "@babel/helper-validator-option": "^7.27.1", "browserslist": "^4.24.0", "lru-cache": "^5.1.1", "semver": "^6.3.1" } }, "sha512-JYtls3hqi15fcx5GaSNL7SCTJ2MNmjrkHXg4FSpOA/grxK8KwyZ5bubHsCq8FXCkua6xhuaaBit+3b7+VZRfcA=="], + + "@babel/helper-globals": ["@babel/helper-globals@7.28.0", "", {}, "sha512-+W6cISkXFa1jXsDEdYA8HeevQT/FULhxzR99pxphltZcVaugps53THCeiWA8SguxxpSp3gKPiuYfSWopkLQ4hw=="], + + "@babel/helper-module-imports": ["@babel/helper-module-imports@7.28.6", "", { "dependencies": { "@babel/traverse": "^7.28.6", "@babel/types": "^7.28.6" } }, "sha512-l5XkZK7r7wa9LucGw9LwZyyCUscb4x37JWTPz7swwFE/0FMQAGpiWUZn8u9DzkSBWEcK25jmvubfpw2dnAMdbw=="], + + "@babel/helper-module-transforms": ["@babel/helper-module-transforms@7.28.6", "", { "dependencies": { "@babel/helper-module-imports": "^7.28.6", "@babel/helper-validator-identifier": "^7.28.5", "@babel/traverse": "^7.28.6" }, "peerDependencies": { "@babel/core": "^7.0.0" } }, "sha512-67oXFAYr2cDLDVGLXTEABjdBJZ6drElUSI7WKp70NrpyISso3plG9SAGEF6y7zbha/wOzUByWWTJvEDVNIUGcA=="], + + "@babel/helper-plugin-utils": ["@babel/helper-plugin-utils@7.28.6", "", {}, "sha512-S9gzZ/bz83GRysI7gAD4wPT/AI3uCnY+9xn+Mx/KPs2JwHJIz1W8PZkg2cqyt3RNOBM8ejcXhV6y8Og7ly/Dug=="], + + "@babel/helper-string-parser": ["@babel/helper-string-parser@7.27.1", "", {}, "sha512-qMlSxKbpRlAridDExk92nSobyDdpPijUq2DW6oDnUqd0iOGxmQjyqhMIihI9+zv4LPyZdRje2cavWPbCbWm3eA=="], + + "@babel/helper-validator-identifier": ["@babel/helper-validator-identifier@7.28.5", "", {}, "sha512-qSs4ifwzKJSV39ucNjsvc6WVHs6b7S03sOh2OcHF9UHfVPqWWALUsNUVzhSBiItjRZoLHx7nIarVjqKVusUZ1Q=="], + + "@babel/helper-validator-option": ["@babel/helper-validator-option@7.27.1", "", {}, "sha512-YvjJow9FxbhFFKDSuFnVCe2WxXk1zWc22fFePVNEaWJEu8IrZVlda6N0uHwzZrUM1il7NC9Mlp4MaJYbYd9JSg=="], + + "@babel/helpers": ["@babel/helpers@7.28.6", "", { "dependencies": { "@babel/template": "^7.28.6", "@babel/types": "^7.28.6" } }, "sha512-xOBvwq86HHdB7WUDTfKfT/Vuxh7gElQ+Sfti2Cy6yIWNW05P8iUslOVcZ4/sKbE+/jQaukQAdz/gf3724kYdqw=="], + + "@babel/parser": ["@babel/parser@7.29.0", "", { "dependencies": { "@babel/types": "^7.29.0" }, "bin": { "parser": "bin/babel-parser.js" } }, "sha512-IyDgFV5GeDUVX4YdF/3CPULtVGSXXMLh1xVIgdCgxApktqnQV0r7/8Nqthg+8YLGaAtdyIlo2qIdZrbCv4+7ww=="], + + "@babel/plugin-transform-react-jsx-self": ["@babel/plugin-transform-react-jsx-self@7.27.1", "", { "dependencies": { "@babel/helper-plugin-utils": "^7.27.1" }, "peerDependencies": { "@babel/core": "^7.0.0-0" } }, "sha512-6UzkCs+ejGdZ5mFFC/OCUrv028ab2fp1znZmCZjAOBKiBK2jXD1O+BPSfX8X2qjJ75fZBMSnQn3Rq2mrBJK2mw=="], + + "@babel/plugin-transform-react-jsx-source": ["@babel/plugin-transform-react-jsx-source@7.27.1", "", { "dependencies": { "@babel/helper-plugin-utils": "^7.27.1" }, "peerDependencies": { "@babel/core": "^7.0.0-0" } }, "sha512-zbwoTsBruTeKB9hSq73ha66iFeJHuaFkUbwvqElnygoNbj/jHRsSeokowZFN3CZ64IvEqcmmkVe89OPXc7ldAw=="], + + "@babel/template": ["@babel/template@7.28.6", "", { "dependencies": { "@babel/code-frame": "^7.28.6", "@babel/parser": "^7.28.6", "@babel/types": "^7.28.6" } }, "sha512-YA6Ma2KsCdGb+WC6UpBVFJGXL58MDA6oyONbjyF/+5sBgxY/dwkhLogbMT2GXXyU84/IhRw/2D1Os1B/giz+BQ=="], + + "@babel/traverse": ["@babel/traverse@7.29.0", "", { "dependencies": { "@babel/code-frame": "^7.29.0", "@babel/generator": "^7.29.0", "@babel/helper-globals": "^7.28.0", "@babel/parser": "^7.29.0", "@babel/template": "^7.28.6", "@babel/types": "^7.29.0", "debug": "^4.3.1" } }, "sha512-4HPiQr0X7+waHfyXPZpWPfWL/J7dcN1mx9gL6WdQVMbPnF3+ZhSMs8tCxN7oHddJE9fhNE7+lxdnlyemKfJRuA=="], + + "@babel/types": ["@babel/types@7.29.0", "", { "dependencies": { "@babel/helper-string-parser": "^7.27.1", "@babel/helper-validator-identifier": "^7.28.5" } }, "sha512-LwdZHpScM4Qz8Xw2iKSzS+cfglZzJGvofQICy7W7v4caru4EaAmyUuO6BGrbyQ2mYV11W0U8j5mBhd14dd3B0A=="], + + "@esbuild/aix-ppc64": ["@esbuild/aix-ppc64@0.25.12", "", { "os": "aix", "cpu": "ppc64" }, "sha512-Hhmwd6CInZ3dwpuGTF8fJG6yoWmsToE+vYgD4nytZVxcu1ulHpUQRAB1UJ8+N1Am3Mz4+xOByoQoSZf4D+CpkA=="], + + "@esbuild/android-arm": ["@esbuild/android-arm@0.25.12", "", { "os": "android", "cpu": "arm" }, "sha512-VJ+sKvNA/GE7Ccacc9Cha7bpS8nyzVv0jdVgwNDaR4gDMC/2TTRc33Ip8qrNYUcpkOHUT5OZ0bUcNNVZQ9RLlg=="], + + "@esbuild/android-arm64": ["@esbuild/android-arm64@0.25.12", "", { "os": "android", "cpu": "arm64" }, "sha512-6AAmLG7zwD1Z159jCKPvAxZd4y/VTO0VkprYy+3N2FtJ8+BQWFXU+OxARIwA46c5tdD9SsKGZ/1ocqBS/gAKHg=="], + + "@esbuild/android-x64": ["@esbuild/android-x64@0.25.12", "", { "os": "android", "cpu": "x64" }, "sha512-5jbb+2hhDHx5phYR2By8GTWEzn6I9UqR11Kwf22iKbNpYrsmRB18aX/9ivc5cabcUiAT/wM+YIZ6SG9QO6a8kg=="], + + "@esbuild/darwin-arm64": ["@esbuild/darwin-arm64@0.25.12", "", { "os": "darwin", "cpu": "arm64" }, "sha512-N3zl+lxHCifgIlcMUP5016ESkeQjLj/959RxxNYIthIg+CQHInujFuXeWbWMgnTo4cp5XVHqFPmpyu9J65C1Yg=="], + + "@esbuild/darwin-x64": ["@esbuild/darwin-x64@0.25.12", "", { "os": "darwin", "cpu": "x64" }, "sha512-HQ9ka4Kx21qHXwtlTUVbKJOAnmG1ipXhdWTmNXiPzPfWKpXqASVcWdnf2bnL73wgjNrFXAa3yYvBSd9pzfEIpA=="], + + "@esbuild/freebsd-arm64": ["@esbuild/freebsd-arm64@0.25.12", "", { "os": "freebsd", "cpu": "arm64" }, "sha512-gA0Bx759+7Jve03K1S0vkOu5Lg/85dou3EseOGUes8flVOGxbhDDh/iZaoek11Y8mtyKPGF3vP8XhnkDEAmzeg=="], + + "@esbuild/freebsd-x64": ["@esbuild/freebsd-x64@0.25.12", "", { "os": "freebsd", "cpu": "x64" }, "sha512-TGbO26Yw2xsHzxtbVFGEXBFH0FRAP7gtcPE7P5yP7wGy7cXK2oO7RyOhL5NLiqTlBh47XhmIUXuGciXEqYFfBQ=="], + + "@esbuild/linux-arm": ["@esbuild/linux-arm@0.25.12", "", { "os": "linux", "cpu": "arm" }, "sha512-lPDGyC1JPDou8kGcywY0YILzWlhhnRjdof3UlcoqYmS9El818LLfJJc3PXXgZHrHCAKs/Z2SeZtDJr5MrkxtOw=="], + + "@esbuild/linux-arm64": ["@esbuild/linux-arm64@0.25.12", "", { "os": "linux", "cpu": "arm64" }, "sha512-8bwX7a8FghIgrupcxb4aUmYDLp8pX06rGh5HqDT7bB+8Rdells6mHvrFHHW2JAOPZUbnjUpKTLg6ECyzvas2AQ=="], + + "@esbuild/linux-ia32": ["@esbuild/linux-ia32@0.25.12", "", { "os": "linux", "cpu": "ia32" }, "sha512-0y9KrdVnbMM2/vG8KfU0byhUN+EFCny9+8g202gYqSSVMonbsCfLjUO+rCci7pM0WBEtz+oK/PIwHkzxkyharA=="], + + "@esbuild/linux-loong64": ["@esbuild/linux-loong64@0.25.12", "", { "os": "linux", "cpu": "none" }, "sha512-h///Lr5a9rib/v1GGqXVGzjL4TMvVTv+s1DPoxQdz7l/AYv6LDSxdIwzxkrPW438oUXiDtwM10o9PmwS/6Z0Ng=="], + + "@esbuild/linux-mips64el": ["@esbuild/linux-mips64el@0.25.12", "", { "os": "linux", "cpu": "none" }, "sha512-iyRrM1Pzy9GFMDLsXn1iHUm18nhKnNMWscjmp4+hpafcZjrr2WbT//d20xaGljXDBYHqRcl8HnxbX6uaA/eGVw=="], + + "@esbuild/linux-ppc64": ["@esbuild/linux-ppc64@0.25.12", "", { "os": "linux", "cpu": "ppc64" }, "sha512-9meM/lRXxMi5PSUqEXRCtVjEZBGwB7P/D4yT8UG/mwIdze2aV4Vo6U5gD3+RsoHXKkHCfSxZKzmDssVlRj1QQA=="], + + "@esbuild/linux-riscv64": ["@esbuild/linux-riscv64@0.25.12", "", { "os": "linux", "cpu": "none" }, "sha512-Zr7KR4hgKUpWAwb1f3o5ygT04MzqVrGEGXGLnj15YQDJErYu/BGg+wmFlIDOdJp0PmB0lLvxFIOXZgFRrdjR0w=="], + + "@esbuild/linux-s390x": ["@esbuild/linux-s390x@0.25.12", "", { "os": "linux", "cpu": "s390x" }, "sha512-MsKncOcgTNvdtiISc/jZs/Zf8d0cl/t3gYWX8J9ubBnVOwlk65UIEEvgBORTiljloIWnBzLs4qhzPkJcitIzIg=="], + + "@esbuild/linux-x64": ["@esbuild/linux-x64@0.25.12", "", { "os": "linux", "cpu": "x64" }, "sha512-uqZMTLr/zR/ed4jIGnwSLkaHmPjOjJvnm6TVVitAa08SLS9Z0VM8wIRx7gWbJB5/J54YuIMInDquWyYvQLZkgw=="], + + "@esbuild/netbsd-arm64": ["@esbuild/netbsd-arm64@0.25.12", "", { "os": "none", "cpu": "arm64" }, "sha512-xXwcTq4GhRM7J9A8Gv5boanHhRa/Q9KLVmcyXHCTaM4wKfIpWkdXiMog/KsnxzJ0A1+nD+zoecuzqPmCRyBGjg=="], + + "@esbuild/netbsd-x64": ["@esbuild/netbsd-x64@0.25.12", "", { "os": "none", "cpu": "x64" }, "sha512-Ld5pTlzPy3YwGec4OuHh1aCVCRvOXdH8DgRjfDy/oumVovmuSzWfnSJg+VtakB9Cm0gxNO9BzWkj6mtO1FMXkQ=="], + + "@esbuild/openbsd-arm64": ["@esbuild/openbsd-arm64@0.25.12", "", { "os": "openbsd", "cpu": "arm64" }, "sha512-fF96T6KsBo/pkQI950FARU9apGNTSlZGsv1jZBAlcLL1MLjLNIWPBkj5NlSz8aAzYKg+eNqknrUJ24QBybeR5A=="], + + "@esbuild/openbsd-x64": ["@esbuild/openbsd-x64@0.25.12", "", { "os": "openbsd", "cpu": "x64" }, "sha512-MZyXUkZHjQxUvzK7rN8DJ3SRmrVrke8ZyRusHlP+kuwqTcfWLyqMOE3sScPPyeIXN/mDJIfGXvcMqCgYKekoQw=="], + + "@esbuild/openharmony-arm64": ["@esbuild/openharmony-arm64@0.25.12", "", { "os": "none", "cpu": "arm64" }, "sha512-rm0YWsqUSRrjncSXGA7Zv78Nbnw4XL6/dzr20cyrQf7ZmRcsovpcRBdhD43Nuk3y7XIoW2OxMVvwuRvk9XdASg=="], + + "@esbuild/sunos-x64": ["@esbuild/sunos-x64@0.25.12", "", { "os": "sunos", "cpu": "x64" }, "sha512-3wGSCDyuTHQUzt0nV7bocDy72r2lI33QL3gkDNGkod22EsYl04sMf0qLb8luNKTOmgF/eDEDP5BFNwoBKH441w=="], + + "@esbuild/win32-arm64": ["@esbuild/win32-arm64@0.25.12", "", { "os": "win32", "cpu": "arm64" }, "sha512-rMmLrur64A7+DKlnSuwqUdRKyd3UE7oPJZmnljqEptesKM8wx9J8gx5u0+9Pq0fQQW8vqeKebwNXdfOyP+8Bsg=="], + + "@esbuild/win32-ia32": ["@esbuild/win32-ia32@0.25.12", "", { "os": "win32", "cpu": "ia32" }, "sha512-HkqnmmBoCbCwxUKKNPBixiWDGCpQGVsrQfJoVGYLPT41XWF8lHuE5N6WhVia2n4o5QK5M4tYr21827fNhi4byQ=="], + + "@esbuild/win32-x64": ["@esbuild/win32-x64@0.25.12", "", { "os": "win32", "cpu": "x64" }, "sha512-alJC0uCZpTFrSL0CCDjcgleBXPnCrEAhTBILpeAp7M/OFgoqtAetfBzX0xM00MUsVVPpVjlPuMbREqnZCXaTnA=="], + + "@ffmpeg/ffmpeg": ["@ffmpeg/ffmpeg@0.12.15", "", { "dependencies": { "@ffmpeg/types": "^0.12.4" } }, "sha512-1C8Obr4GsN3xw+/1Ww6PFM84wSQAGsdoTuTWPOj2OizsRDLT4CXTaVjPhkw6ARyDus1B9X/L2LiXHqYYsGnRFw=="], + + "@ffmpeg/types": ["@ffmpeg/types@0.12.4", "", {}, "sha512-k9vJQNBGTxE5AhYDtOYR5rO5fKsspbg51gbcwtbkw2lCdoIILzklulcjJfIDwrtn7XhDeF2M+THwJ2FGrLeV6A=="], + + "@ffmpeg/util": ["@ffmpeg/util@0.12.2", "", {}, "sha512-ouyoW+4JB7WxjeZ2y6KpRvB+dLp7Cp4ro8z0HIVpZVCM7AwFlHa0c4R8Y/a4M3wMqATpYKhC7lSFHQ0T11MEDw=="], + + "@google/genai": ["@google/genai@1.39.0", "", { "dependencies": { "google-auth-library": "^10.3.0", "protobufjs": "^7.5.4", "ws": "^8.18.0" }, "peerDependencies": { "@modelcontextprotocol/sdk": "^1.25.2" }, "optionalPeers": ["@modelcontextprotocol/sdk"] }, "sha512-Vz7AQsOdBeiIcxmXIQNy/hzDvyAOE1lSpWA10itUQza7h3aQFF6QSGaQ7o1GYsjMD3XslK4Ee/Ol0eLhRXb7gA=="], + + "@isaacs/cliui": ["@isaacs/cliui@8.0.2", "", { "dependencies": { "string-width": "^5.1.2", "string-width-cjs": "npm:string-width@^4.2.0", "strip-ansi": "^7.0.1", "strip-ansi-cjs": "npm:strip-ansi@^6.0.1", "wrap-ansi": "^8.1.0", "wrap-ansi-cjs": "npm:wrap-ansi@^7.0.0" } }, "sha512-O8jcjabXaleOG9DQ0+ARXWZBTfnP4WNAqzuiJK7ll44AmxGKv/J2M4TPjxjY3znBCfvBXFzucm1twdyFybFqEA=="], + + "@jridgewell/gen-mapping": ["@jridgewell/gen-mapping@0.3.13", "", { "dependencies": { "@jridgewell/sourcemap-codec": "^1.5.0", "@jridgewell/trace-mapping": "^0.3.24" } }, "sha512-2kkt/7niJ6MgEPxF0bYdQ6etZaA+fQvDcLKckhy1yIQOzaoKjBBjSj63/aLVjYE3qhRt5dvM+uUyfCg6UKCBbA=="], + + "@jridgewell/remapping": ["@jridgewell/remapping@2.3.5", "", { "dependencies": { "@jridgewell/gen-mapping": "^0.3.5", "@jridgewell/trace-mapping": "^0.3.24" } }, "sha512-LI9u/+laYG4Ds1TDKSJW2YPrIlcVYOwi2fUC6xB43lueCjgxV4lffOCZCtYFiH6TNOX+tQKXx97T4IKHbhyHEQ=="], + + "@jridgewell/resolve-uri": ["@jridgewell/resolve-uri@3.1.2", "", {}, "sha512-bRISgCIjP20/tbWSPWMEi54QVPRZExkuD9lJL+UIxUKtwVJA8wW1Trb1jMs1RFXo1CBTNZ/5hpC9QvmKWdopKw=="], + + "@jridgewell/sourcemap-codec": ["@jridgewell/sourcemap-codec@1.5.5", "", {}, "sha512-cYQ9310grqxueWbl+WuIUIaiUaDcj7WOq5fVhEljNVgRfOUhY9fy2zTvfoqWsnebh8Sl70VScFbICvJnLKB0Og=="], + + "@jridgewell/trace-mapping": ["@jridgewell/trace-mapping@0.3.31", "", { "dependencies": { "@jridgewell/resolve-uri": "^3.1.0", "@jridgewell/sourcemap-codec": "^1.4.14" } }, "sha512-zzNR+SdQSDJzc8joaeP8QQoCQr8NuYx2dIIytl1QeBEZHJ9uW6hebsrYgbz8hJwUQao3TWCMtmfV8Nu1twOLAw=="], + + "@pkgjs/parseargs": ["@pkgjs/parseargs@0.11.0", "", {}, "sha512-+1VkjdD0QBLPodGrJUeqarH8VAIvQODIbwh9XpP5Syisf7YoQgsJKPNFoqqLQlu+VQ/tVSshMR6loPMn8U+dPg=="], + + "@protobufjs/aspromise": ["@protobufjs/aspromise@1.1.2", "", {}, "sha512-j+gKExEuLmKwvz3OgROXtrJ2UG2x8Ch2YZUxahh+s1F2HZ+wAceUNLkvy6zKCPVRkU++ZWQrdxsUeQXmcg4uoQ=="], + + "@protobufjs/base64": ["@protobufjs/base64@1.1.2", "", {}, "sha512-AZkcAA5vnN/v4PDqKyMR5lx7hZttPDgClv83E//FMNhR2TMcLUhfRUBHCmSl0oi9zMgDDqRUJkSxO3wm85+XLg=="], + + "@protobufjs/codegen": ["@protobufjs/codegen@2.0.4", "", {}, "sha512-YyFaikqM5sH0ziFZCN3xDC7zeGaB/d0IUb9CATugHWbd1FRFwWwt4ld4OYMPWu5a3Xe01mGAULCdqhMlPl29Jg=="], + + "@protobufjs/eventemitter": ["@protobufjs/eventemitter@1.1.0", "", {}, "sha512-j9ednRT81vYJ9OfVuXG6ERSTdEL1xVsNgqpkxMsbIabzSo3goCjDIveeGv5d03om39ML71RdmrGNjG5SReBP/Q=="], + + "@protobufjs/fetch": ["@protobufjs/fetch@1.1.0", "", { "dependencies": { "@protobufjs/aspromise": "^1.1.1", "@protobufjs/inquire": "^1.1.0" } }, "sha512-lljVXpqXebpsijW71PZaCYeIcE5on1w5DlQy5WH6GLbFryLUrBD4932W/E2BSpfRJWseIL4v/KPgBFxDOIdKpQ=="], + + "@protobufjs/float": ["@protobufjs/float@1.0.2", "", {}, "sha512-Ddb+kVXlXst9d+R9PfTIxh1EdNkgoRe5tOX6t01f1lYWOvJnSPDBlG241QLzcyPdoNTsblLUdujGSE4RzrTZGQ=="], + + "@protobufjs/inquire": ["@protobufjs/inquire@1.1.0", "", {}, "sha512-kdSefcPdruJiFMVSbn801t4vFK7KB/5gd2fYvrxhuJYg8ILrmn9SKSX2tZdV6V+ksulWqS7aXjBcRXl3wHoD9Q=="], + + "@protobufjs/path": ["@protobufjs/path@1.1.2", "", {}, "sha512-6JOcJ5Tm08dOHAbdR3GrvP+yUUfkjG5ePsHYczMFLq3ZmMkAD98cDgcT2iA1lJ9NVwFd4tH/iSSoe44YWkltEA=="], + + "@protobufjs/pool": ["@protobufjs/pool@1.1.0", "", {}, "sha512-0kELaGSIDBKvcgS4zkjz1PeddatrjYcmMWOlAuAPwAeccUrPHdUqo/J6LiymHHEiJT5NrF1UVwxY14f+fy4WQw=="], + + "@protobufjs/utf8": ["@protobufjs/utf8@1.1.0", "", {}, "sha512-Vvn3zZrhQZkkBE8LSuW3em98c0FwgO4nxzv6OdSxPKJIEKY2bGbHn+mhGIPerzI4twdxaP8/0+06HBpwf345Lw=="], + + "@rolldown/pluginutils": ["@rolldown/pluginutils@1.0.0-rc.2", "", {}, "sha512-izyXV/v+cHiRfozX62W9htOAvwMo4/bXKDrQ+vom1L1qRuexPock/7VZDAhnpHCLNejd3NJ6hiab+tO0D44Rgw=="], + + "@rollup/rollup-android-arm-eabi": ["@rollup/rollup-android-arm-eabi@4.57.1", "", { "os": "android", "cpu": "arm" }, "sha512-A6ehUVSiSaaliTxai040ZpZ2zTevHYbvu/lDoeAteHI8QnaosIzm4qwtezfRg1jOYaUmnzLX1AOD6Z+UJjtifg=="], + + "@rollup/rollup-android-arm64": ["@rollup/rollup-android-arm64@4.57.1", "", { "os": "android", "cpu": "arm64" }, "sha512-dQaAddCY9YgkFHZcFNS/606Exo8vcLHwArFZ7vxXq4rigo2bb494/xKMMwRRQW6ug7Js6yXmBZhSBRuBvCCQ3w=="], + + "@rollup/rollup-darwin-arm64": ["@rollup/rollup-darwin-arm64@4.57.1", "", { "os": "darwin", "cpu": "arm64" }, "sha512-crNPrwJOrRxagUYeMn/DZwqN88SDmwaJ8Cvi/TN1HnWBU7GwknckyosC2gd0IqYRsHDEnXf328o9/HC6OkPgOg=="], + + "@rollup/rollup-darwin-x64": ["@rollup/rollup-darwin-x64@4.57.1", "", { "os": "darwin", "cpu": "x64" }, "sha512-Ji8g8ChVbKrhFtig5QBV7iMaJrGtpHelkB3lsaKzadFBe58gmjfGXAOfI5FV0lYMH8wiqsxKQ1C9B0YTRXVy4w=="], + + "@rollup/rollup-freebsd-arm64": ["@rollup/rollup-freebsd-arm64@4.57.1", "", { "os": "freebsd", "cpu": "arm64" }, "sha512-R+/WwhsjmwodAcz65guCGFRkMb4gKWTcIeLy60JJQbXrJ97BOXHxnkPFrP+YwFlaS0m+uWJTstrUA9o+UchFug=="], + + "@rollup/rollup-freebsd-x64": ["@rollup/rollup-freebsd-x64@4.57.1", "", { "os": "freebsd", "cpu": "x64" }, "sha512-IEQTCHeiTOnAUC3IDQdzRAGj3jOAYNr9kBguI7MQAAZK3caezRrg0GxAb6Hchg4lxdZEI5Oq3iov/w/hnFWY9Q=="], + + "@rollup/rollup-linux-arm-gnueabihf": ["@rollup/rollup-linux-arm-gnueabihf@4.57.1", "", { "os": "linux", "cpu": "arm" }, "sha512-F8sWbhZ7tyuEfsmOxwc2giKDQzN3+kuBLPwwZGyVkLlKGdV1nvnNwYD0fKQ8+XS6hp9nY7B+ZeK01EBUE7aHaw=="], + + "@rollup/rollup-linux-arm-musleabihf": ["@rollup/rollup-linux-arm-musleabihf@4.57.1", "", { "os": "linux", "cpu": "arm" }, "sha512-rGfNUfn0GIeXtBP1wL5MnzSj98+PZe/AXaGBCRmT0ts80lU5CATYGxXukeTX39XBKsxzFpEeK+Mrp9faXOlmrw=="], + + "@rollup/rollup-linux-arm64-gnu": ["@rollup/rollup-linux-arm64-gnu@4.57.1", "", { "os": "linux", "cpu": "arm64" }, "sha512-MMtej3YHWeg/0klK2Qodf3yrNzz6CGjo2UntLvk2RSPlhzgLvYEB3frRvbEF2wRKh1Z2fDIg9KRPe1fawv7C+g=="], + + "@rollup/rollup-linux-arm64-musl": ["@rollup/rollup-linux-arm64-musl@4.57.1", "", { "os": "linux", "cpu": "arm64" }, "sha512-1a/qhaaOXhqXGpMFMET9VqwZakkljWHLmZOX48R0I/YLbhdxr1m4gtG1Hq7++VhVUmf+L3sTAf9op4JlhQ5u1Q=="], + + "@rollup/rollup-linux-loong64-gnu": ["@rollup/rollup-linux-loong64-gnu@4.57.1", "", { "os": "linux", "cpu": "none" }, "sha512-QWO6RQTZ/cqYtJMtxhkRkidoNGXc7ERPbZN7dVW5SdURuLeVU7lwKMpo18XdcmpWYd0qsP1bwKPf7DNSUinhvA=="], + + "@rollup/rollup-linux-loong64-musl": ["@rollup/rollup-linux-loong64-musl@4.57.1", "", { "os": "linux", "cpu": "none" }, "sha512-xpObYIf+8gprgWaPP32xiN5RVTi/s5FCR+XMXSKmhfoJjrpRAjCuuqQXyxUa/eJTdAE6eJ+KDKaoEqjZQxh3Gw=="], + + "@rollup/rollup-linux-ppc64-gnu": ["@rollup/rollup-linux-ppc64-gnu@4.57.1", "", { "os": "linux", "cpu": "ppc64" }, "sha512-4BrCgrpZo4hvzMDKRqEaW1zeecScDCR+2nZ86ATLhAoJ5FQ+lbHVD3ttKe74/c7tNT9c6F2viwB3ufwp01Oh2w=="], + + "@rollup/rollup-linux-ppc64-musl": ["@rollup/rollup-linux-ppc64-musl@4.57.1", "", { "os": "linux", "cpu": "ppc64" }, "sha512-NOlUuzesGauESAyEYFSe3QTUguL+lvrN1HtwEEsU2rOwdUDeTMJdO5dUYl/2hKf9jWydJrO9OL/XSSf65R5+Xw=="], + + "@rollup/rollup-linux-riscv64-gnu": ["@rollup/rollup-linux-riscv64-gnu@4.57.1", "", { "os": "linux", "cpu": "none" }, "sha512-ptA88htVp0AwUUqhVghwDIKlvJMD/fmL/wrQj99PRHFRAG6Z5nbWoWG4o81Nt9FT+IuqUQi+L31ZKAFeJ5Is+A=="], + + "@rollup/rollup-linux-riscv64-musl": ["@rollup/rollup-linux-riscv64-musl@4.57.1", "", { "os": "linux", "cpu": "none" }, "sha512-S51t7aMMTNdmAMPpBg7OOsTdn4tySRQvklmL3RpDRyknk87+Sp3xaumlatU+ppQ+5raY7sSTcC2beGgvhENfuw=="], + + "@rollup/rollup-linux-s390x-gnu": ["@rollup/rollup-linux-s390x-gnu@4.57.1", "", { "os": "linux", "cpu": "s390x" }, "sha512-Bl00OFnVFkL82FHbEqy3k5CUCKH6OEJL54KCyx2oqsmZnFTR8IoNqBF+mjQVcRCT5sB6yOvK8A37LNm/kPJiZg=="], + + "@rollup/rollup-linux-x64-gnu": ["@rollup/rollup-linux-x64-gnu@4.57.1", "", { "os": "linux", "cpu": "x64" }, "sha512-ABca4ceT4N+Tv/GtotnWAeXZUZuM/9AQyCyKYyKnpk4yoA7QIAuBt6Hkgpw8kActYlew2mvckXkvx0FfoInnLg=="], + + "@rollup/rollup-linux-x64-musl": ["@rollup/rollup-linux-x64-musl@4.57.1", "", { "os": "linux", "cpu": "x64" }, "sha512-HFps0JeGtuOR2convgRRkHCekD7j+gdAuXM+/i6kGzQtFhlCtQkpwtNzkNj6QhCDp7DRJ7+qC/1Vg2jt5iSOFw=="], + + "@rollup/rollup-openbsd-x64": ["@rollup/rollup-openbsd-x64@4.57.1", "", { "os": "openbsd", "cpu": "x64" }, "sha512-H+hXEv9gdVQuDTgnqD+SQffoWoc0Of59AStSzTEj/feWTBAnSfSD3+Dql1ZruJQxmykT/JVY0dE8Ka7z0DH1hw=="], + + "@rollup/rollup-openharmony-arm64": ["@rollup/rollup-openharmony-arm64@4.57.1", "", { "os": "none", "cpu": "arm64" }, "sha512-4wYoDpNg6o/oPximyc/NG+mYUejZrCU2q+2w6YZqrAs2UcNUChIZXjtafAiiZSUc7On8v5NyNj34Kzj/Ltk6dQ=="], + + "@rollup/rollup-win32-arm64-msvc": ["@rollup/rollup-win32-arm64-msvc@4.57.1", "", { "os": "win32", "cpu": "arm64" }, "sha512-O54mtsV/6LW3P8qdTcamQmuC990HDfR71lo44oZMZlXU4tzLrbvTii87Ni9opq60ds0YzuAlEr/GNwuNluZyMQ=="], + + "@rollup/rollup-win32-ia32-msvc": ["@rollup/rollup-win32-ia32-msvc@4.57.1", "", { "os": "win32", "cpu": "ia32" }, "sha512-P3dLS+IerxCT/7D2q2FYcRdWRl22dNbrbBEtxdWhXrfIMPP9lQhb5h4Du04mdl5Woq05jVCDPCMF7Ub0NAjIew=="], + + "@rollup/rollup-win32-x64-gnu": ["@rollup/rollup-win32-x64-gnu@4.57.1", "", { "os": "win32", "cpu": "x64" }, "sha512-VMBH2eOOaKGtIJYleXsi2B8CPVADrh+TyNxJ4mWPnKfLB/DBUmzW+5m1xUrcwWoMfSLagIRpjUFeW5CO5hyciQ=="], + + "@rollup/rollup-win32-x64-msvc": ["@rollup/rollup-win32-x64-msvc@4.57.1", "", { "os": "win32", "cpu": "x64" }, "sha512-mxRFDdHIWRxg3UfIIAwCm6NzvxG0jDX/wBN6KsQFTvKFqqg9vTrWUE68qEjHt19A5wwx5X5aUi2zuZT7YR0jrA=="], + + "@types/babel__core": ["@types/babel__core@7.20.5", "", { "dependencies": { "@babel/parser": "^7.20.7", "@babel/types": "^7.20.7", "@types/babel__generator": "*", "@types/babel__template": "*", "@types/babel__traverse": "*" } }, "sha512-qoQprZvz5wQFJwMDqeseRXWv3rqMvhgpbXFfVyWhbx9X47POIA6i/+dXefEmZKoAgOaTdaIgNSMqMIU61yRyzA=="], + + "@types/babel__generator": ["@types/babel__generator@7.27.0", "", { "dependencies": { "@babel/types": "^7.0.0" } }, "sha512-ufFd2Xi92OAVPYsy+P4n7/U7e68fex0+Ee8gSG9KX7eo084CWiQ4sdxktvdl0bOPupXtVJPY19zk6EwWqUQ8lg=="], + + "@types/babel__template": ["@types/babel__template@7.4.4", "", { "dependencies": { "@babel/parser": "^7.1.0", "@babel/types": "^7.0.0" } }, "sha512-h/NUaSyG5EyxBIp8YRxo4RMe2/qQgvyowRwVMzhYhBCONbW8PUsg4lkFMrhgZhUe5z3L3MiLDuvyJ/CaPa2A8A=="], + + "@types/babel__traverse": ["@types/babel__traverse@7.28.0", "", { "dependencies": { "@babel/types": "^7.28.2" } }, "sha512-8PvcXf70gTDZBgt9ptxJ8elBeBjcLOAcOtoO/mPJjtji1+CdGbHgm77om1GrsPxsiE+uXIpNSK64UYaIwQXd4Q=="], + + "@types/estree": ["@types/estree@1.0.8", "", {}, "sha512-dWHzHa2WqEXI/O1E9OjrocMTKJl2mSrEolh1Iomrv6U+JuNwaHXsXx9bLu5gG7BUWFIN0skIQJQ/L1rIex4X6w=="], + + "@types/node": ["@types/node@22.19.8", "", { "dependencies": { "undici-types": "~6.21.0" } }, "sha512-ebO/Yl+EAvVe8DnMfi+iaAyIqYdK0q/q0y0rw82INWEKJOBe6b/P3YWE8NW7oOlF/nXFNrHwhARrN/hdgDkraA=="], + + "@vitejs/plugin-react": ["@vitejs/plugin-react@5.1.3", "", { "dependencies": { "@babel/core": "^7.29.0", "@babel/plugin-transform-react-jsx-self": "^7.27.1", "@babel/plugin-transform-react-jsx-source": "^7.27.1", "@rolldown/pluginutils": "1.0.0-rc.2", "@types/babel__core": "^7.20.5", "react-refresh": "^0.18.0" }, "peerDependencies": { "vite": "^4.2.0 || ^5.0.0 || ^6.0.0 || ^7.0.0" } }, "sha512-NVUnA6gQCl8jfoYqKqQU5Clv0aPw14KkZYCsX6T9Lfu9slI0LOU10OTwFHS/WmptsMMpshNd/1tuWsHQ2Uk+cg=="], + + "agent-base": ["agent-base@7.1.4", "", {}, "sha512-MnA+YT8fwfJPgBx3m60MNqakm30XOkyIoH1y6huTQvC0PwZG7ki8NacLBcrPbNoo8vEZy7Jpuk7+jMO+CUovTQ=="], + + "ansi-regex": ["ansi-regex@6.2.2", "", {}, "sha512-Bq3SmSpyFHaWjPk8If9yc6svM8c56dB5BAtW4Qbw5jHTwwXXcTLoRMkpDJp6VL0XzlWaCHTXrkFURMYmD0sLqg=="], + + "ansi-styles": ["ansi-styles@6.2.3", "", {}, "sha512-4Dj6M28JB+oAH8kFkTLUo+a2jwOFkuqb3yucU0CANcRRUbxS0cP0nZYCGjcc3BNXwRIsUVmDGgzawme7zvJHvg=="], + + "balanced-match": ["balanced-match@1.0.2", "", {}, "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw=="], + + "base64-js": ["base64-js@1.5.1", "", {}, "sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA=="], + + "baseline-browser-mapping": ["baseline-browser-mapping@2.9.19", "", { "bin": "dist/cli.js" }, "sha512-ipDqC8FrAl/76p2SSWKSI+H9tFwm7vYqXQrItCuiVPt26Km0jS+NzSsBWAaBusvSbQcfJG+JitdMm+wZAgTYqg=="], + + "bignumber.js": ["bignumber.js@9.3.1", "", {}, "sha512-Ko0uX15oIUS7wJ3Rb30Fs6SkVbLmPBAKdlm7q9+ak9bbIeFf0MwuBsQV6z7+X768/cHsfg+WlysDWJcmthjsjQ=="], + + "brace-expansion": ["brace-expansion@2.0.2", "", { "dependencies": { "balanced-match": "^1.0.0" } }, "sha512-Jt0vHyM+jmUBqojB7E1NIYadt0vI0Qxjxd2TErW94wDz+E2LAm5vKMXXwg6ZZBTHPuUlDgQHKXvjGBdfcF1ZDQ=="], + + "browserslist": ["browserslist@4.28.1", "", { "dependencies": { "baseline-browser-mapping": "^2.9.0", "caniuse-lite": "^1.0.30001759", "electron-to-chromium": "^1.5.263", "node-releases": "^2.0.27", "update-browserslist-db": "^1.2.0" }, "bin": "cli.js" }, "sha512-ZC5Bd0LgJXgwGqUknZY/vkUQ04r8NXnJZ3yYi4vDmSiZmC/pdSN0NbNRPxZpbtO4uAfDUAFffO8IZoM3Gj8IkA=="], + + "buffer-equal-constant-time": ["buffer-equal-constant-time@1.0.1", "", {}, "sha512-zRpUiDwd/xk6ADqPMATG8vc9VPrkck7T07OIx0gnjmJAnHnTVXNQG3vfvWNuiZIkwu9KrKdA1iJKfsfTVxE6NA=="], + + "caniuse-lite": ["caniuse-lite@1.0.30001767", "", {}, "sha512-34+zUAMhSH+r+9eKmYG+k2Rpt8XttfE4yXAjoZvkAPs15xcYQhyBYdalJ65BzivAvGRMViEjy6oKr/S91loekQ=="], + + "color-convert": ["color-convert@2.0.1", "", { "dependencies": { "color-name": "~1.1.4" } }, "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ=="], + + "color-name": ["color-name@1.1.4", "", {}, "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA=="], + + "convert-source-map": ["convert-source-map@2.0.0", "", {}, "sha512-Kvp459HrV2FEJ1CAsi1Ku+MY3kasH19TFykTz2xWmMeq6bk2NU3XXvfJ+Q61m0xktWwt+1HSYf3JZsTms3aRJg=="], + + "cross-spawn": ["cross-spawn@7.0.6", "", { "dependencies": { "path-key": "^3.1.0", "shebang-command": "^2.0.0", "which": "^2.0.1" } }, "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA=="], + + "data-uri-to-buffer": ["data-uri-to-buffer@4.0.1", "", {}, "sha512-0R9ikRb668HB7QDxT1vkpuUBtqc53YyAwMwGeUFKRojY/NWKvdZ+9UYtRfGmhqNbRkTSVpMbmyhXipFFv2cb/A=="], + + "debug": ["debug@4.4.3", "", { "dependencies": { "ms": "^2.1.3" } }, "sha512-RGwwWnwQvkVfavKVt22FGLw+xYSdzARwm0ru6DhTVA3umU5hZc28V3kO4stgYryrTlLpuvgI9GiijltAjNbcqA=="], + + "eastasianwidth": ["eastasianwidth@0.2.0", "", {}, "sha512-I88TYZWc9XiYHRQ4/3c5rjjfgkjhLyW2luGIheGERbNQ6OY7yTybanSpDXZa8y7VUP9YmDcYa+eyq4ca7iLqWA=="], + + "ecdsa-sig-formatter": ["ecdsa-sig-formatter@1.0.11", "", { "dependencies": { "safe-buffer": "^5.0.1" } }, "sha512-nagl3RYrbNv6kQkeJIpt6NJZy8twLB/2vtz6yN9Z4vRKHN4/QZJIEbqohALSgwKdnksuY3k5Addp5lg8sVoVcQ=="], + + "electron-to-chromium": ["electron-to-chromium@1.5.286", "", {}, "sha512-9tfDXhJ4RKFNerfjdCcZfufu49vg620741MNs26a9+bhLThdB+plgMeou98CAaHu/WATj2iHOOHTp1hWtABj2A=="], + + "emoji-regex": ["emoji-regex@9.2.2", "", {}, "sha512-L18DaJsXSUk2+42pv8mLs5jJT2hqFkFE4j21wOmgbUqsZ2hL72NsUU785g9RXgo3s0ZNgVl42TiHp3ZtOv/Vyg=="], + + "esbuild": ["esbuild@0.25.12", "", { "optionalDependencies": { "@esbuild/aix-ppc64": "0.25.12", "@esbuild/android-arm": "0.25.12", "@esbuild/android-arm64": "0.25.12", "@esbuild/android-x64": "0.25.12", "@esbuild/darwin-arm64": "0.25.12", "@esbuild/darwin-x64": "0.25.12", "@esbuild/freebsd-arm64": "0.25.12", "@esbuild/freebsd-x64": "0.25.12", "@esbuild/linux-arm": "0.25.12", "@esbuild/linux-arm64": "0.25.12", "@esbuild/linux-ia32": "0.25.12", "@esbuild/linux-loong64": "0.25.12", "@esbuild/linux-mips64el": "0.25.12", "@esbuild/linux-ppc64": "0.25.12", "@esbuild/linux-riscv64": "0.25.12", "@esbuild/linux-s390x": "0.25.12", "@esbuild/linux-x64": "0.25.12", "@esbuild/netbsd-arm64": "0.25.12", "@esbuild/netbsd-x64": "0.25.12", "@esbuild/openbsd-arm64": "0.25.12", "@esbuild/openbsd-x64": "0.25.12", "@esbuild/openharmony-arm64": "0.25.12", "@esbuild/sunos-x64": "0.25.12", "@esbuild/win32-arm64": "0.25.12", "@esbuild/win32-ia32": "0.25.12", "@esbuild/win32-x64": "0.25.12" }, "bin": "bin/esbuild" }, "sha512-bbPBYYrtZbkt6Os6FiTLCTFxvq4tt3JKall1vRwshA3fdVztsLAatFaZobhkBC8/BrPetoa0oksYoKXoG4ryJg=="], + + "escalade": ["escalade@3.2.0", "", {}, "sha512-WUj2qlxaQtO4g6Pq5c29GTcWGDyd8itL8zTlipgECz3JesAiiOKotd8JU6otB3PACgG6xkJUyVhboMS+bje/jA=="], + + "extend": ["extend@3.0.2", "", {}, "sha512-fjquC59cD7CyW6urNXK0FBufkZcoiGG80wTuPujX590cB5Ttln20E2UB4S/WARVqhXffZl2LNgS+gQdPIIim/g=="], + + "fdir": ["fdir@6.5.0", "", { "peerDependencies": { "picomatch": "^3 || ^4" } }, "sha512-tIbYtZbucOs0BRGqPJkshJUYdL+SDH7dVM8gjy+ERp3WAUjLEFJE+02kanyHtwjWOnwrKYBiwAmM0p4kLJAnXg=="], + + "fetch-blob": ["fetch-blob@3.2.0", "", { "dependencies": { "node-domexception": "^1.0.0", "web-streams-polyfill": "^3.0.3" } }, "sha512-7yAQpD2UMJzLi1Dqv7qFYnPbaPx7ZfFK6PiIxQ4PfkGPyNyl2Ugx+a/umUonmKqjhM4DnfbMvdX6otXq83soQQ=="], + + "foreground-child": ["foreground-child@3.3.1", "", { "dependencies": { "cross-spawn": "^7.0.6", "signal-exit": "^4.0.1" } }, "sha512-gIXjKqtFuWEgzFRJA9WCQeSJLZDjgJUOMCMzxtvFq/37KojM1BFGufqsCy0r4qSQmYLsZYMeyRqzIWOMup03sw=="], + + "formdata-polyfill": ["formdata-polyfill@4.0.10", "", { "dependencies": { "fetch-blob": "^3.1.2" } }, "sha512-buewHzMvYL29jdeQTVILecSaZKnt/RJWjoZCF5OW60Z67/GmSLBkOFM7qh1PI3zFNtJbaZL5eQu1vLfazOwj4g=="], + + "fsevents": ["fsevents@2.3.3", "", { "os": "darwin" }, "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw=="], + + "gaxios": ["gaxios@7.1.3", "", { "dependencies": { "extend": "^3.0.2", "https-proxy-agent": "^7.0.1", "node-fetch": "^3.3.2", "rimraf": "^5.0.1" } }, "sha512-YGGyuEdVIjqxkxVH1pUTMY/XtmmsApXrCVv5EU25iX6inEPbV+VakJfLealkBtJN69AQmh1eGOdCl9Sm1UP6XQ=="], + + "gcp-metadata": ["gcp-metadata@8.1.2", "", { "dependencies": { "gaxios": "^7.0.0", "google-logging-utils": "^1.0.0", "json-bigint": "^1.0.0" } }, "sha512-zV/5HKTfCeKWnxG0Dmrw51hEWFGfcF2xiXqcA3+J90WDuP0SvoiSO5ORvcBsifmx/FoIjgQN3oNOGaQ5PhLFkg=="], + + "gensync": ["gensync@1.0.0-beta.2", "", {}, "sha512-3hN7NaskYvMDLQY55gnW3NQ+mesEAepTqlg+VEbj7zzqEMBVNhzcGYYeqFo/TlYz6eQiFcp1HcsCZO+nGgS8zg=="], + + "glob": ["glob@10.5.0", "", { "dependencies": { "foreground-child": "^3.1.0", "jackspeak": "^3.1.2", "minimatch": "^9.0.4", "minipass": "^7.1.2", "package-json-from-dist": "^1.0.0", "path-scurry": "^1.11.1" }, "bin": "dist/esm/bin.mjs" }, "sha512-DfXN8DfhJ7NH3Oe7cFmu3NCu1wKbkReJ8TorzSAFbSKrlNaQSKfIzqYqVY8zlbs2NLBbWpRiU52GX2PbaBVNkg=="], + + "google-auth-library": ["google-auth-library@10.5.0", "", { "dependencies": { "base64-js": "^1.3.0", "ecdsa-sig-formatter": "^1.0.11", "gaxios": "^7.0.0", "gcp-metadata": "^8.0.0", "google-logging-utils": "^1.0.0", "gtoken": "^8.0.0", "jws": "^4.0.0" } }, "sha512-7ABviyMOlX5hIVD60YOfHw4/CxOfBhyduaYB+wbFWCWoni4N7SLcV46hrVRktuBbZjFC9ONyqamZITN7q3n32w=="], + + "google-logging-utils": ["google-logging-utils@1.1.3", "", {}, "sha512-eAmLkjDjAFCVXg7A1unxHsLf961m6y17QFqXqAXGj/gVkKFrEICfStRfwUlGNfeCEjNRa32JEWOUTlYXPyyKvA=="], + + "gtoken": ["gtoken@8.0.0", "", { "dependencies": { "gaxios": "^7.0.0", "jws": "^4.0.0" } }, "sha512-+CqsMbHPiSTdtSO14O51eMNlrp9N79gmeqmXeouJOhfucAedHw9noVe/n5uJk3tbKE6a+6ZCQg3RPhVhHByAIw=="], + + "https-proxy-agent": ["https-proxy-agent@7.0.6", "", { "dependencies": { "agent-base": "^7.1.2", "debug": "4" } }, "sha512-vK9P5/iUfdl95AI+JVyUuIcVtd4ofvtrOr3HNtM2yxC9bnMbEdp3x01OhQNnjb8IJYi38VlTE3mBXwcfvywuSw=="], + + "is-fullwidth-code-point": ["is-fullwidth-code-point@3.0.0", "", {}, "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg=="], + + "isexe": ["isexe@2.0.0", "", {}, "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw=="], + + "jackspeak": ["jackspeak@3.4.3", "", { "dependencies": { "@isaacs/cliui": "^8.0.2" }, "optionalDependencies": { "@pkgjs/parseargs": "^0.11.0" } }, "sha512-OGlZQpz2yfahA/Rd1Y8Cd9SIEsqvXkLVoSw/cgwhnhFMDbsQFeZYoJJ7bIZBS9BcamUW96asq/npPWugM+RQBw=="], + + "js-tokens": ["js-tokens@4.0.0", "", {}, "sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ=="], + + "jsesc": ["jsesc@3.1.0", "", { "bin": "bin/jsesc" }, "sha512-/sM3dO2FOzXjKQhJuo0Q173wf2KOo8t4I8vHy6lF9poUp7bKT0/NHE8fPX23PwfhnykfqnC2xRxOnVw5XuGIaA=="], + + "json-bigint": ["json-bigint@1.0.0", "", { "dependencies": { "bignumber.js": "^9.0.0" } }, "sha512-SiPv/8VpZuWbvLSMtTDU8hEfrZWg/mH/nV/b4o0CYbSxu1UIQPLdwKOCIyLQX+VIPO5vrLX3i8qtqFyhdPSUSQ=="], + + "json5": ["json5@2.2.3", "", { "bin": "lib/cli.js" }, "sha512-XmOWe7eyHYH14cLdVPoyg+GOH3rYX++KpzrylJwSW98t3Nk+U8XOl8FWKOgwtzdb8lXGf6zYwDUzeHMWfxasyg=="], + + "jwa": ["jwa@2.0.1", "", { "dependencies": { "buffer-equal-constant-time": "^1.0.1", "ecdsa-sig-formatter": "1.0.11", "safe-buffer": "^5.0.1" } }, "sha512-hRF04fqJIP8Abbkq5NKGN0Bbr3JxlQ+qhZufXVr0DvujKy93ZCbXZMHDL4EOtodSbCWxOqR8MS1tXA5hwqCXDg=="], + + "jws": ["jws@4.0.1", "", { "dependencies": { "jwa": "^2.0.1", "safe-buffer": "^5.0.1" } }, "sha512-EKI/M/yqPncGUUh44xz0PxSidXFr/+r0pA70+gIYhjv+et7yxM+s29Y+VGDkovRofQem0fs7Uvf4+YmAdyRduA=="], + + "long": ["long@5.3.2", "", {}, "sha512-mNAgZ1GmyNhD7AuqnTG3/VQ26o760+ZYBPKjPvugO8+nLbYfX6TVpJPseBvopbdY+qpZ/lKUnmEc1LeZYS3QAA=="], + + "lru-cache": ["lru-cache@5.1.1", "", { "dependencies": { "yallist": "^3.0.2" } }, "sha512-KpNARQA3Iwv+jTA0utUVVbrh+Jlrr1Fv0e56GGzAFOXN7dk/FviaDW8LHmK52DlcH4WP2n6gI8vN1aesBFgo9w=="], + + "lucide-react": ["lucide-react@0.563.0", "", { "peerDependencies": { "react": "^16.5.1 || ^17.0.0 || ^18.0.0 || ^19.0.0" } }, "sha512-8dXPB2GI4dI8jV4MgUDGBeLdGk8ekfqVZ0BdLcrRzocGgG75ltNEmWS+gE7uokKF/0oSUuczNDT+g9hFJ23FkA=="], + + "minimatch": ["minimatch@9.0.5", "", { "dependencies": { "brace-expansion": "^2.0.1" } }, "sha512-G6T0ZX48xgozx7587koeX9Ys2NYy6Gmv//P89sEte9V9whIapMNF4idKxnW2QtCcLiTWlb/wfCabAtAFWhhBow=="], + + "minipass": ["minipass@7.1.2", "", {}, "sha512-qOOzS1cBTWYF4BH8fVePDBOO9iptMnGUEZwNc/cMWnTV2nVLZ7VoNWEPHkYczZA0pdoA7dl6e7FL659nX9S2aw=="], + + "ms": ["ms@2.1.3", "", {}, "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA=="], + + "nanoid": ["nanoid@3.3.11", "", { "bin": "bin/nanoid.cjs" }, "sha512-N8SpfPUnUp1bK+PMYW8qSWdl9U+wwNWI4QKxOYDy9JAro3WMX7p2OeVRF9v+347pnakNevPmiHhNmZ2HbFA76w=="], + + "node-domexception": ["node-domexception@1.0.0", "", {}, "sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ=="], + + "node-fetch": ["node-fetch@3.3.2", "", { "dependencies": { "data-uri-to-buffer": "^4.0.0", "fetch-blob": "^3.1.4", "formdata-polyfill": "^4.0.10" } }, "sha512-dRB78srN/l6gqWulah9SrxeYnxeddIG30+GOqK/9OlLVyLg3HPnr6SqOWTWOXKRwC2eGYCkZ59NNuSgvSrpgOA=="], + + "node-releases": ["node-releases@2.0.27", "", {}, "sha512-nmh3lCkYZ3grZvqcCH+fjmQ7X+H0OeZgP40OierEaAptX4XofMh5kwNbWh7lBduUzCcV/8kZ+NDLCwm2iorIlA=="], + + "package-json-from-dist": ["package-json-from-dist@1.0.1", "", {}, "sha512-UEZIS3/by4OC8vL3P2dTXRETpebLI2NiI5vIrjaD/5UtrkFX/tNbwjTSRAGC/+7CAo2pIcBaRgWmcBBHcsaCIw=="], + + "path-key": ["path-key@3.1.1", "", {}, "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q=="], + + "path-scurry": ["path-scurry@1.11.1", "", { "dependencies": { "lru-cache": "^10.2.0", "minipass": "^5.0.0 || ^6.0.2 || ^7.0.0" } }, "sha512-Xa4Nw17FS9ApQFJ9umLiJS4orGjm7ZzwUrwamcGQuHSzDyth9boKDaycYdDcZDuqYATXw4HFXgaqWTctW/v1HA=="], + + "picocolors": ["picocolors@1.1.1", "", {}, "sha512-xceH2snhtb5M9liqDsmEw56le376mTZkEX/jEb/RxNFyegNul7eNslCXP9FDj/Lcu0X8KEyMceP2ntpaHrDEVA=="], + + "picomatch": ["picomatch@4.0.3", "", {}, "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q=="], + + "postcss": ["postcss@8.5.6", "", { "dependencies": { "nanoid": "^3.3.11", "picocolors": "^1.1.1", "source-map-js": "^1.2.1" } }, "sha512-3Ybi1tAuwAP9s0r1UQ2J4n5Y0G05bJkpUIO0/bI9MhwmD70S5aTWbXGBwxHrelT+XM1k6dM0pk+SwNkpTRN7Pg=="], + + "protobufjs": ["protobufjs@7.5.4", "", { "dependencies": { "@protobufjs/aspromise": "^1.1.2", "@protobufjs/base64": "^1.1.2", "@protobufjs/codegen": "^2.0.4", "@protobufjs/eventemitter": "^1.1.0", "@protobufjs/fetch": "^1.1.0", "@protobufjs/float": "^1.0.2", "@protobufjs/inquire": "^1.1.0", "@protobufjs/path": "^1.1.2", "@protobufjs/pool": "^1.1.0", "@protobufjs/utf8": "^1.1.0", "@types/node": ">=13.7.0", "long": "^5.0.0" } }, "sha512-CvexbZtbov6jW2eXAvLukXjXUW1TzFaivC46BpWc/3BpcCysb5Vffu+B3XHMm8lVEuy2Mm4XGex8hBSg1yapPg=="], + + "react": ["react@19.2.4", "", {}, "sha512-9nfp2hYpCwOjAN+8TZFGhtWEwgvWHXqESH8qT89AT/lWklpLON22Lc8pEtnpsZz7VmawabSU0gCjnj8aC0euHQ=="], + + "react-dom": ["react-dom@19.2.4", "", { "dependencies": { "scheduler": "^0.27.0" }, "peerDependencies": { "react": "^19.2.4" } }, "sha512-AXJdLo8kgMbimY95O2aKQqsz2iWi9jMgKJhRBAxECE4IFxfcazB2LmzloIoibJI3C12IlY20+KFaLv+71bUJeQ=="], + + "react-refresh": ["react-refresh@0.18.0", "", {}, "sha512-QgT5//D3jfjJb6Gsjxv0Slpj23ip+HtOpnNgnb2S5zU3CB26G/IDPGoy4RJB42wzFE46DRsstbW6tKHoKbhAxw=="], + + "rimraf": ["rimraf@5.0.10", "", { "dependencies": { "glob": "^10.3.7" }, "bin": "dist/esm/bin.mjs" }, "sha512-l0OE8wL34P4nJH/H2ffoaniAokM2qSmrtXHmlpvYr5AVVX8msAyW0l8NVJFDxlSK4u3Uh/f41cQheDVdnYijwQ=="], + + "rollup": ["rollup@4.57.1", "", { "dependencies": { "@types/estree": "1.0.8" }, "optionalDependencies": { "@rollup/rollup-android-arm-eabi": "4.57.1", "@rollup/rollup-android-arm64": "4.57.1", "@rollup/rollup-darwin-arm64": "4.57.1", "@rollup/rollup-darwin-x64": "4.57.1", "@rollup/rollup-freebsd-arm64": "4.57.1", "@rollup/rollup-freebsd-x64": "4.57.1", "@rollup/rollup-linux-arm-gnueabihf": "4.57.1", "@rollup/rollup-linux-arm-musleabihf": "4.57.1", "@rollup/rollup-linux-arm64-gnu": "4.57.1", "@rollup/rollup-linux-arm64-musl": "4.57.1", "@rollup/rollup-linux-loong64-gnu": "4.57.1", "@rollup/rollup-linux-loong64-musl": "4.57.1", "@rollup/rollup-linux-ppc64-gnu": "4.57.1", "@rollup/rollup-linux-ppc64-musl": "4.57.1", "@rollup/rollup-linux-riscv64-gnu": "4.57.1", "@rollup/rollup-linux-riscv64-musl": "4.57.1", "@rollup/rollup-linux-s390x-gnu": "4.57.1", "@rollup/rollup-linux-x64-gnu": "4.57.1", "@rollup/rollup-linux-x64-musl": "4.57.1", "@rollup/rollup-openbsd-x64": "4.57.1", "@rollup/rollup-openharmony-arm64": "4.57.1", "@rollup/rollup-win32-arm64-msvc": "4.57.1", "@rollup/rollup-win32-ia32-msvc": "4.57.1", "@rollup/rollup-win32-x64-gnu": "4.57.1", "@rollup/rollup-win32-x64-msvc": "4.57.1", "fsevents": "~2.3.2" }, "bin": "dist/bin/rollup" }, "sha512-oQL6lgK3e2QZeQ7gcgIkS2YZPg5slw37hYufJ3edKlfQSGGm8ICoxswK15ntSzF/a8+h7ekRy7k7oWc3BQ7y8A=="], + + "safe-buffer": ["safe-buffer@5.2.1", "", {}, "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ=="], + + "scheduler": ["scheduler@0.27.0", "", {}, "sha512-eNv+WrVbKu1f3vbYJT/xtiF5syA5HPIMtf9IgY/nKg0sWqzAUEvqY/xm7OcZc/qafLx/iO9FgOmeSAp4v5ti/Q=="], + + "semver": ["semver@6.3.1", "", { "bin": "bin/semver.js" }, "sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA=="], + + "shebang-command": ["shebang-command@2.0.0", "", { "dependencies": { "shebang-regex": "^3.0.0" } }, "sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA=="], + + "shebang-regex": ["shebang-regex@3.0.0", "", {}, "sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A=="], + + "signal-exit": ["signal-exit@4.1.0", "", {}, "sha512-bzyZ1e88w9O1iNJbKnOlvYTrWPDl46O1bG0D3XInv+9tkPrxrN8jUUTiFlDkkmKWgn1M6CfIA13SuGqOa9Korw=="], + + "source-map-js": ["source-map-js@1.2.1", "", {}, "sha512-UXWMKhLOwVKb728IUtQPXxfYU+usdybtUrK/8uGE8CQMvrhOpwvzDBwj0QhSL7MQc7vIsISBG8VQ8+IDQxpfQA=="], + + "string-width": ["string-width@5.1.2", "", { "dependencies": { "eastasianwidth": "^0.2.0", "emoji-regex": "^9.2.2", "strip-ansi": "^7.0.1" } }, "sha512-HnLOCR3vjcY8beoNLtcjZ5/nxn2afmME6lhrDrebokqMap+XbeW8n9TXpPDOqdGK5qcI3oT0GKTW6wC7EMiVqA=="], + + "string-width-cjs": ["string-width@4.2.3", "", { "dependencies": { "emoji-regex": "^8.0.0", "is-fullwidth-code-point": "^3.0.0", "strip-ansi": "^6.0.1" } }, "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g=="], + + "strip-ansi": ["strip-ansi@7.1.2", "", { "dependencies": { "ansi-regex": "^6.0.1" } }, "sha512-gmBGslpoQJtgnMAvOVqGZpEz9dyoKTCzy2nfz/n8aIFhN/jCE/rCmcxabB6jOOHV+0WNnylOxaxBQPSvcWklhA=="], + + "strip-ansi-cjs": ["strip-ansi@6.0.1", "", { "dependencies": { "ansi-regex": "^5.0.1" } }, "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A=="], + + "tinyglobby": ["tinyglobby@0.2.15", "", { "dependencies": { "fdir": "^6.5.0", "picomatch": "^4.0.3" } }, "sha512-j2Zq4NyQYG5XMST4cbs02Ak8iJUdxRM0XI5QyxXuZOzKOINmWurp3smXu3y5wDcJrptwpSjgXHzIQxR0omXljQ=="], + + "typescript": ["typescript@5.8.3", "", { "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" } }, "sha512-p1diW6TqL9L07nNxvRMM7hMMw4c5XOo/1ibL4aAIGmSAt9slTE1Xgw5KWuof2uTOvCg9BY7ZRi+GaF+7sfgPeQ=="], + + "undici-types": ["undici-types@6.21.0", "", {}, "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ=="], + + "update-browserslist-db": ["update-browserslist-db@1.2.3", "", { "dependencies": { "escalade": "^3.2.0", "picocolors": "^1.1.1" }, "peerDependencies": { "browserslist": ">= 4.21.0" }, "bin": "cli.js" }, "sha512-Js0m9cx+qOgDxo0eMiFGEueWztz+d4+M3rGlmKPT+T4IS/jP4ylw3Nwpu6cpTTP8R1MAC1kF4VbdLt3ARf209w=="], + + "vite": ["vite@6.4.1", "", { "dependencies": { "esbuild": "^0.25.0", "fdir": "^6.4.4", "picomatch": "^4.0.2", "postcss": "^8.5.3", "rollup": "^4.34.9", "tinyglobby": "^0.2.13" }, "optionalDependencies": { "fsevents": "~2.3.3" }, "peerDependencies": { "@types/node": "^18.0.0 || ^20.0.0 || >=22.0.0", "jiti": ">=1.21.0", "less": "*", "lightningcss": "^1.21.0", "sass": "*", "sass-embedded": "*", "stylus": "*", "sugarss": "*", "terser": "^5.16.0", "tsx": "^4.8.1", "yaml": "^2.4.2" }, "optionalPeers": ["jiti", "less", "lightningcss", "sass", "sass-embedded", "stylus", "sugarss", "terser", "tsx", "yaml"], "bin": "bin/vite.js" }, "sha512-+Oxm7q9hDoLMyJOYfUYBuHQo+dkAloi33apOPP56pzj+vsdJDzr+j1NISE5pyaAuKL4A3UD34qd0lx5+kfKp2g=="], + + "web-streams-polyfill": ["web-streams-polyfill@3.3.3", "", {}, "sha512-d2JWLCivmZYTSIoge9MsgFCZrt571BikcWGYkjC1khllbTeDlGqZ2D8vD8E/lJa8WGWbb7Plm8/XJYV7IJHZZw=="], + + "which": ["which@2.0.2", "", { "dependencies": { "isexe": "^2.0.0" }, "bin": { "node-which": "bin/node-which" } }, "sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA=="], + + "wrap-ansi": ["wrap-ansi@8.1.0", "", { "dependencies": { "ansi-styles": "^6.1.0", "string-width": "^5.0.1", "strip-ansi": "^7.0.1" } }, "sha512-si7QWI6zUMq56bESFvagtmzMdGOtoxfR+Sez11Mobfc7tm+VkUckk9bW2UeffTGVUbOksxmSw0AA2gs8g71NCQ=="], + + "wrap-ansi-cjs": ["wrap-ansi@7.0.0", "", { "dependencies": { "ansi-styles": "^4.0.0", "string-width": "^4.1.0", "strip-ansi": "^6.0.0" } }, "sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q=="], + + "ws": ["ws@8.19.0", "", { "peerDependencies": { "bufferutil": "^4.0.1", "utf-8-validate": ">=5.0.2" }, "optionalPeers": ["bufferutil", "utf-8-validate"] }, "sha512-blAT2mjOEIi0ZzruJfIhb3nps74PRWTCz1IjglWEEpQl5XS/UNama6u2/rjFkDDouqr4L67ry+1aGIALViWjDg=="], + + "yallist": ["yallist@3.1.1", "", {}, "sha512-a4UGQaWPH59mOXUYnAG2ewncQS4i4F43Tv3JoAM+s2VDAmS9NsK8GpDMLrCHPksFT7h3K6TOoUNn2pb7RoXx4g=="], + + "path-scurry/lru-cache": ["lru-cache@10.4.3", "", {}, "sha512-JNAzZcXrCt42VGLuYz0zfAzDfAvJWW6AfYlDBQyDV5DClI2m5sAmK+OIO7s59XfsRsWHp02jAJrRadPRGTt6SQ=="], + + "string-width-cjs/emoji-regex": ["emoji-regex@8.0.0", "", {}, "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A=="], + + "string-width-cjs/strip-ansi": ["strip-ansi@6.0.1", "", { "dependencies": { "ansi-regex": "^5.0.1" } }, "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A=="], + + "strip-ansi-cjs/ansi-regex": ["ansi-regex@5.0.1", "", {}, "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ=="], + + "wrap-ansi-cjs/ansi-styles": ["ansi-styles@4.3.0", "", { "dependencies": { "color-convert": "^2.0.1" } }, "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg=="], + + "wrap-ansi-cjs/string-width": ["string-width@4.2.3", "", { "dependencies": { "emoji-regex": "^8.0.0", "is-fullwidth-code-point": "^3.0.0", "strip-ansi": "^6.0.1" } }, "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g=="], + + "wrap-ansi-cjs/strip-ansi": ["strip-ansi@6.0.1", "", { "dependencies": { "ansi-regex": "^5.0.1" } }, "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A=="], + + "string-width-cjs/strip-ansi/ansi-regex": ["ansi-regex@5.0.1", "", {}, "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ=="], + + "wrap-ansi-cjs/string-width/emoji-regex": ["emoji-regex@8.0.0", "", {}, "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A=="], + + "wrap-ansi-cjs/strip-ansi/ansi-regex": ["ansi-regex@5.0.1", "", {}, "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ=="], + } +} diff --git a/ui/components/AlbumCover.tsx b/ui/components/AlbumCover.tsx new file mode 100644 index 0000000..3287cb1 --- /dev/null +++ b/ui/components/AlbumCover.tsx @@ -0,0 +1,296 @@ +import React, { useMemo } from 'react'; + +interface AlbumCoverProps { + seed: string; + size?: 'xs' | 'sm' | 'md' | 'lg' | 'xl' | 'full'; + className?: string; + children?: React.ReactNode; +} + +// Seeded random number generator for consistent results +class SeededRandom { + private seed: number; + + constructor(seed: string) { + this.seed = this.hashString(seed); + } + + private hashString(str: string): number { + let hash = 0; + for (let i = 0; i < str.length; i++) { + const char = str.charCodeAt(i); + hash = ((hash << 5) - hash) + char; + hash = hash & hash; + } + return Math.abs(hash) || 1; + } + + next(): number { + this.seed = (this.seed * 1103515245 + 12345) & 0x7fffffff; + return this.seed / 0x7fffffff; + } + + range(min: number, max: number): number { + return min + this.next() * (max - min); + } + + int(min: number, max: number): number { + return Math.floor(this.range(min, max)); + } + + pick(arr: T[]): T { + return arr[this.int(0, arr.length)]; + } +} + +// Curated color palettes - music-themed combinations +const palettes = [ + // Sunset Vibes + { colors: ['#FF6B6B', '#FEC89A', '#FFD93D', '#C9184A'], bg: '#1a1a2e' }, + // Ocean Depths + { colors: ['#0077B6', '#00B4D8', '#90E0EF', '#CAF0F8'], bg: '#03045E' }, + // Forest Night + { colors: ['#2D6A4F', '#40916C', '#52B788', '#95D5B2'], bg: '#1B4332' }, + // Neon Dreams + { colors: ['#F72585', '#7209B7', '#3A0CA3', '#4CC9F0'], bg: '#10002B' }, + // Golden Hour + { colors: ['#FF9500', '#FF5400', '#FFBD00', '#FFE066'], bg: '#2D1B00' }, + // Arctic Aurora + { colors: ['#48CAE4', '#00F5D4', '#9B5DE5', '#F15BB5'], bg: '#0A0A1A' }, + // Lavender Haze + { colors: ['#E0AAFF', '#C77DFF', '#9D4EDD', '#7B2CBF'], bg: '#240046' }, + // Cherry Blossom + { colors: ['#FFCCD5', '#FFB3C1', '#FF758F', '#C9184A'], bg: '#2B0A14' }, + // Cyber Punk + { colors: ['#00FF87', '#60EFFF', '#FF00E5', '#FFE500'], bg: '#0D0D0D' }, + // Deep Space + { colors: ['#7400B8', '#5E60CE', '#4EA8DE', '#56CFE1'], bg: '#03071E' }, + // Warm Ember + { colors: ['#FFBA08', '#FAA307', '#F48C06', '#E85D04'], bg: '#370617' }, + // Cool Mint + { colors: ['#64DFDF', '#72EFDD', '#80FFDB', '#5EEAD4'], bg: '#0D3B3B' }, + // Velvet Rose + { colors: ['#9D174D', '#BE185D', '#DB2777', '#EC4899'], bg: '#1C0A14' }, + // Electric Blue + { colors: ['#0EA5E9', '#38BDF8', '#7DD3FC', '#E0F2FE'], bg: '#0C1929' }, + // Jungle Fever + { colors: ['#84CC16', '#A3E635', '#BEF264', '#ECFCCB'], bg: '#1A2E05' }, +]; + +type PatternType = 'aurora' | 'mesh' | 'orbs' | 'rays' | 'waves' | 'geometric' | 'nebula' | 'gradient' | 'rings' | 'crystal'; + +const generatePattern = (rng: SeededRandom, palette: typeof palettes[0]): React.CSSProperties => { + const patterns: PatternType[] = ['aurora', 'mesh', 'orbs', 'rays', 'waves', 'geometric', 'nebula', 'gradient', 'rings', 'crystal']; + const pattern = rng.pick(patterns); + const colors = palette.colors; + const bg = palette.bg; + + switch (pattern) { + case 'aurora': { + const angle1 = rng.int(0, 360); + const angle2 = rng.int(0, 360); + return { + background: ` + linear-gradient(${angle1}deg, ${colors[0]}00 0%, ${colors[0]}88 25%, ${colors[1]}88 50%, ${colors[2]}88 75%, ${colors[3]}00 100%), + linear-gradient(${angle2}deg, ${colors[2]}00 0%, ${colors[3]}66 30%, ${colors[0]}66 70%, ${colors[1]}00 100%), + radial-gradient(ellipse at ${rng.int(20, 80)}% ${rng.int(60, 100)}%, ${colors[1]}44 0%, transparent 50%), + linear-gradient(180deg, ${bg} 0%, ${colors[3]}22 100%) + `, + backgroundColor: bg, + }; + } + + case 'mesh': { + const points = [ + { x: rng.int(0, 40), y: rng.int(0, 40) }, + { x: rng.int(60, 100), y: rng.int(0, 40) }, + { x: rng.int(0, 40), y: rng.int(60, 100) }, + { x: rng.int(60, 100), y: rng.int(60, 100) }, + ]; + return { + background: ` + radial-gradient(at ${points[0].x}% ${points[0].y}%, ${colors[0]} 0%, transparent 50%), + radial-gradient(at ${points[1].x}% ${points[1].y}%, ${colors[1]} 0%, transparent 50%), + radial-gradient(at ${points[2].x}% ${points[2].y}%, ${colors[2]} 0%, transparent 50%), + radial-gradient(at ${points[3].x}% ${points[3].y}%, ${colors[3]} 0%, transparent 50%) + `, + backgroundColor: bg, + }; + } + + case 'orbs': { + const orbCount = rng.int(3, 6); + const orbs = Array.from({ length: orbCount }, (_, i) => { + const size = rng.int(30, 70); + const x = rng.int(10, 90); + const y = rng.int(10, 90); + const color = colors[i % colors.length]; + const blur = rng.int(20, 40); + return `radial-gradient(circle ${size}% at ${x}% ${y}%, ${color}99 0%, ${color}44 ${blur}%, transparent 70%)`; + }); + return { + background: [...orbs, `linear-gradient(135deg, ${bg} 0%, ${colors[0]}11 100%)`].join(', '), + backgroundColor: bg, + }; + } + + case 'rays': { + const centerX = rng.int(30, 70); + const centerY = rng.int(30, 70); + const rayCount = rng.int(6, 12); + const rays = Array.from({ length: rayCount }, (_, i) => { + const angle = (360 / rayCount) * i + rng.int(-10, 10); + const color = colors[i % colors.length]; + return `linear-gradient(${angle}deg, transparent 0%, transparent 45%, ${color}66 48%, ${color}66 52%, transparent 55%, transparent 100%)`; + }); + return { + background: [ + `radial-gradient(circle at ${centerX}% ${centerY}%, ${colors[0]} 0%, transparent 30%)`, + ...rays, + ].join(', '), + backgroundColor: bg, + }; + } + + case 'waves': { + const waveAngle = rng.int(0, 180); + const waveSize = rng.int(8, 20); + return { + background: ` + repeating-linear-gradient( + ${waveAngle}deg, + ${colors[0]}44 0px, + ${colors[1]}44 ${waveSize}px, + ${colors[2]}44 ${waveSize * 2}px, + ${colors[3]}44 ${waveSize * 3}px, + ${colors[0]}44 ${waveSize * 4}px + ), + radial-gradient(ellipse at 50% 0%, ${colors[0]}66 0%, transparent 70%), + radial-gradient(ellipse at 50% 100%, ${colors[2]}66 0%, transparent 70%) + `, + backgroundColor: bg, + }; + } + + case 'geometric': { + const angle = rng.int(0, 90); + return { + background: ` + conic-gradient(from ${angle}deg at 50% 50%, ${colors[0]}, ${colors[1]}, ${colors[2]}, ${colors[3]}, ${colors[0]}), + repeating-conic-gradient(from 0deg at 50% 50%, ${bg}00 0deg, ${bg}88 ${90/rng.int(2,6)}deg) + `, + backgroundBlendMode: 'overlay', + backgroundColor: bg, + }; + } + + case 'nebula': { + const x1 = rng.int(20, 80); + const y1 = rng.int(20, 80); + const x2 = rng.int(20, 80); + const y2 = rng.int(20, 80); + return { + background: ` + radial-gradient(ellipse ${rng.int(60, 100)}% ${rng.int(40, 80)}% at ${x1}% ${y1}%, ${colors[0]}88 0%, transparent 50%), + radial-gradient(ellipse ${rng.int(40, 80)}% ${rng.int(60, 100)}% at ${x2}% ${y2}%, ${colors[1]}88 0%, transparent 50%), + radial-gradient(ellipse ${rng.int(50, 90)}% ${rng.int(50, 90)}% at ${100-x1}% ${100-y1}%, ${colors[2]}66 0%, transparent 60%), + radial-gradient(ellipse ${rng.int(30, 60)}% ${rng.int(30, 60)}% at ${100-x2}% ${100-y2}%, ${colors[3]}44 0%, transparent 70%), + linear-gradient(${rng.int(0, 360)}deg, ${bg} 0%, ${colors[0]}22 50%, ${bg} 100%) + `, + backgroundColor: bg, + }; + } + + case 'gradient': { + const angle = rng.int(0, 360); + const type = rng.int(0, 3); + if (type === 0) { + return { + background: `linear-gradient(${angle}deg, ${colors[0]} 0%, ${colors[1]} 33%, ${colors[2]} 66%, ${colors[3]} 100%)`, + }; + } else if (type === 1) { + return { + background: ` + radial-gradient(circle at ${rng.int(30, 70)}% ${rng.int(30, 70)}%, ${colors[0]} 0%, ${colors[1]} 30%, ${colors[2]} 60%, ${colors[3]} 100%) + `, + }; + } else { + return { + background: ` + linear-gradient(${angle}deg, ${colors[0]} 0%, ${colors[0]} 25%, transparent 25%, transparent 75%, ${colors[2]} 75%), + linear-gradient(${angle + 90}deg, ${colors[1]} 0%, ${colors[1]} 25%, transparent 25%, transparent 75%, ${colors[3]} 75%), + linear-gradient(${angle}deg, ${colors[2]} 0%, ${colors[3]} 100%) + `, + backgroundBlendMode: 'multiply, screen, normal', + }; + } + } + + case 'rings': { + const centerX = rng.int(30, 70); + const centerY = rng.int(30, 70); + return { + background: ` + repeating-radial-gradient(circle at ${centerX}% ${centerY}%, + ${colors[0]}66 0px, ${colors[0]}66 2px, + transparent 2px, transparent ${rng.int(15, 25)}px, + ${colors[1]}66 ${rng.int(15, 25)}px, ${colors[1]}66 ${rng.int(17, 27)}px, + transparent ${rng.int(17, 27)}px, transparent ${rng.int(35, 50)}px + ), + radial-gradient(circle at ${centerX}% ${centerY}%, ${colors[2]}88 0%, transparent 60%), + linear-gradient(${rng.int(0, 180)}deg, ${colors[3]}44, ${colors[0]}44) + `, + backgroundColor: bg, + }; + } + + case 'crystal': { + const facets = rng.int(4, 8); + const gradients = Array.from({ length: facets }, (_, i) => { + const startAngle = (360 / facets) * i; + const color = colors[i % colors.length]; + return `conic-gradient(from ${startAngle}deg at ${50 + rng.int(-20, 20)}% ${50 + rng.int(-20, 20)}%, ${color}88 0deg, transparent ${360/facets}deg)`; + }); + return { + background: [ + ...gradients, + `radial-gradient(circle at 50% 50%, ${colors[0]}44 0%, transparent 70%)`, + ].join(', '), + backgroundColor: bg, + }; + } + + default: + return { + background: `linear-gradient(135deg, ${colors[0]}, ${colors[1]})`, + }; + } +}; + +export const AlbumCover: React.FC = ({ seed, size = 'md', className = '', children }) => { + const coverStyle = useMemo(() => { + const rng = new SeededRandom(seed); + const palette = rng.pick(palettes); + return generatePattern(rng, palette); + }, [seed]); + + const sizeClasses: Record = { + xs: 'w-8 h-8', + sm: 'w-10 h-10', + md: 'w-12 h-12', + lg: 'w-14 h-14', + xl: 'w-48 h-48', + full: 'w-full h-full', + }; + + return ( +
+ {children} +
+ ); +}; + +export default AlbumCover; diff --git a/ui/components/ConsolePanel.tsx b/ui/components/ConsolePanel.tsx new file mode 100644 index 0000000..76a6eea --- /dev/null +++ b/ui/components/ConsolePanel.tsx @@ -0,0 +1,130 @@ +import React, { useEffect, useRef, useState } from 'react'; +import { X, Copy } from 'lucide-react'; + +interface ConsolePanelProps { + isOpen: boolean; + onClose: () => void; +} + +const LOG_STREAM_URL = '/logs/stream'; + +export const ConsolePanel: React.FC = ({ isOpen, onClose }) => { + const [lines, setLines] = useState([]); + const [status, setStatus] = useState<'connecting' | 'connected' | 'closed' | 'error'>('closed'); + const [errorMessage, setErrorMessage] = useState(null); + const bottomRef = useRef(null); + const eventSourceRef = useRef(null); + + useEffect(() => { + if (!isOpen) return; + + setLines(prev => [...prev, `[Console] Connecting to ${LOG_STREAM_URL}...`]); + setStatus('connecting'); + setErrorMessage(null); + + const base = window.location.origin; + const url = `${base}${LOG_STREAM_URL}`; + const es = new EventSource(url); + eventSourceRef.current = es; + + es.onopen = () => { + setStatus('connected'); + setLines(prev => [...prev, '[System] Log stream connected.']); + }; + + es.onmessage = (event: MessageEvent) => { + const msg = event.data; + if (msg != null && typeof msg === 'string') { + setLines(prev => { + const next = [...prev, msg]; + if (next.length > 2000) return next.slice(-1500); + return next; + }); + } + }; + + es.onerror = () => { + if (es.readyState === EventSource.CLOSED) { + setStatus('closed'); + } else { + setStatus('error'); + setErrorMessage('Connection lost. Reconnecting...'); + } + }; + + return () => { + es.close(); + eventSourceRef.current = null; + }; + }, [isOpen]); + + useEffect(() => { + bottomRef.current?.scrollIntoView({ behavior: 'smooth' }); + }, [lines]); + + const copyToClipboard = () => { + const text = lines.join('\n'); + if (!text) return; + navigator.clipboard.writeText(text).then( + () => setErrorMessage(null), + () => setErrorMessage('Copy failed') + ); + }; + + if (!isOpen) return null; + + return ( +
+
+
+
+ Console + + {status} + + {errorMessage && ( + {errorMessage} + )} +
+
+ + +
+
+
+ {lines.map((line, i) => ( +
+ {line} +
+ ))} +
+
+
+
+ ); +}; diff --git a/ui/components/CreatePanel.tsx b/ui/components/CreatePanel.tsx new file mode 100644 index 0000000..e3c207c --- /dev/null +++ b/ui/components/CreatePanel.tsx @@ -0,0 +1,2225 @@ +import React, { useState, useEffect, useRef, useCallback } from 'react'; +import { Sparkles, ChevronDown, Settings2, Trash2, Music2, Sliders, Dices, Hash, RefreshCw, Plus, Upload, Play, Pause, Info, Loader2 } from 'lucide-react'; +import { GenerationParams, Song } from '../types'; +import { useAuth } from '../context/AuthContext'; +import { generateApi, type LoraAdapter } from '../services/api'; + +interface ReferenceTrack { + id: string; + filename: string; + storage_key: string; + duration: number | null; + file_size_bytes: number | null; + tags: string[] | null; + created_at?: string; + audio_url: string; + /** Display name (title or filename stem) */ + label?: string; + /** 'uploaded' = ref uploads (deletable); 'library' = generated/player library */ + source?: 'uploaded' | 'library'; +} + +interface CreatePanelProps { + onGenerate: (params: GenerationParams) => void; + isGenerating: boolean; + initialData?: { song: Song, timestamp: number } | null; +} + +/** Visible tooltip on hover (native title has delay and is unreliable). */ +function InfoTooltip({ text }: { text: string }) { + return ( + + + + {text} + + + ); +} + +const KEY_SIGNATURES = [ + '', + 'C major', 'C minor', + 'C# major', 'C# minor', + 'Db major', 'Db minor', + 'D major', 'D minor', + 'D# major', 'D# minor', + 'Eb major', 'Eb minor', + 'E major', 'E minor', + 'F major', 'F minor', + 'F# major', 'F# minor', + 'Gb major', 'Gb minor', + 'G major', 'G minor', + 'G# major', 'G# minor', + 'Ab major', 'Ab minor', + 'A major', 'A minor', + 'A# major', 'A# minor', + 'Bb major', 'Bb minor', + 'B major', 'B minor' +]; + +const TIME_SIGNATURES = ['', '2/4', '3/4', '4/4', '6/8']; + +const VOCAL_LANGUAGES = [ + { value: 'unknown', label: 'Auto / Instrumental' }, + { value: 'ar', label: 'Arabic' }, + { value: 'az', label: 'Azerbaijani' }, + { value: 'bg', label: 'Bulgarian' }, + { value: 'bn', label: 'Bengali' }, + { value: 'ca', label: 'Catalan' }, + { value: 'cs', label: 'Czech' }, + { value: 'da', label: 'Danish' }, + { value: 'de', label: 'German' }, + { value: 'el', label: 'Greek' }, + { value: 'en', label: 'English' }, + { value: 'es', label: 'Spanish' }, + { value: 'fa', label: 'Persian' }, + { value: 'fi', label: 'Finnish' }, + { value: 'fr', label: 'French' }, + { value: 'he', label: 'Hebrew' }, + { value: 'hi', label: 'Hindi' }, + { value: 'hr', label: 'Croatian' }, + { value: 'ht', label: 'Haitian Creole' }, + { value: 'hu', label: 'Hungarian' }, + { value: 'id', label: 'Indonesian' }, + { value: 'is', label: 'Icelandic' }, + { value: 'it', label: 'Italian' }, + { value: 'ja', label: 'Japanese' }, + { value: 'ko', label: 'Korean' }, + { value: 'la', label: 'Latin' }, + { value: 'lt', label: 'Lithuanian' }, + { value: 'ms', label: 'Malay' }, + { value: 'ne', label: 'Nepali' }, + { value: 'nl', label: 'Dutch' }, + { value: 'no', label: 'Norwegian' }, + { value: 'pa', label: 'Punjabi' }, + { value: 'pl', label: 'Polish' }, + { value: 'pt', label: 'Portuguese' }, + { value: 'ro', label: 'Romanian' }, + { value: 'ru', label: 'Russian' }, + { value: 'sa', label: 'Sanskrit' }, + { value: 'sk', label: 'Slovak' }, + { value: 'sr', label: 'Serbian' }, + { value: 'sv', label: 'Swedish' }, + { value: 'sw', label: 'Swahili' }, + { value: 'ta', label: 'Tamil' }, + { value: 'te', label: 'Telugu' }, + { value: 'th', label: 'Thai' }, + { value: 'tl', label: 'Tagalog' }, + { value: 'tr', label: 'Turkish' }, + { value: 'uk', label: 'Ukrainian' }, + { value: 'ur', label: 'Urdu' }, + { value: 'vi', label: 'Vietnamese' }, + { value: 'yue', label: 'Cantonese' }, + { value: 'zh', label: 'Chinese (Mandarin)' }, +]; + +export const CreatePanel: React.FC = ({ onGenerate, isGenerating, initialData }) => { + const { isAuthenticated, token } = useAuth(); + + // Mode + const [customMode, setCustomMode] = useState(true); + + // Simple Mode + const [songDescription, setSongDescription] = useState(''); + + // Custom Mode + const [lyrics, setLyrics] = useState(''); + const [style, setStyle] = useState(''); + const [title, setTitle] = useState(''); + + // Common + const [instrumental, setInstrumental] = useState(false); + const [vocalLanguage, setVocalLanguage] = useState('en'); + + // Music Parameters + const [bpm, setBpm] = useState(0); + const [keyScale, setKeyScale] = useState(''); + const [timeSignature, setTimeSignature] = useState(''); + + // Advanced Settings + const [showAdvanced, setShowAdvanced] = useState(false); + const [duration, setDuration] = useState(-1); + const [batchSize, setBatchSize] = useState(1); + const [bulkCount, setBulkCount] = useState(1); // Number of independent generation jobs to queue + const [guidanceScale, setGuidanceScale] = useState(4.0); + const [randomSeed, setRandomSeed] = useState(true); + const [seed, setSeed] = useState(-1); + const [thinking, setThinking] = useState(false); // Default false for GPU compatibility + const [audioFormat, setAudioFormat] = useState<'mp3' | 'flac'>('mp3'); + const [inferenceSteps, setInferenceSteps] = useState(65); + const [inferMethod, setInferMethod] = useState<'ode' | 'sde'>('ode'); + const [shift, setShift] = useState(3.0); + + // LM Parameters (under Expert) + const [showLmParams, setShowLmParams] = useState(false); + const [lmTemperature, setLmTemperature] = useState(0.85); + const [lmCfgScale, setLmCfgScale] = useState(2.0); + const [lmTopK, setLmTopK] = useState(0); + const [lmTopP, setLmTopP] = useState(0.9); + const [lmNegativePrompt, setLmNegativePrompt] = useState('NO USER INPUT'); + + // Expert Parameters (now in Advanced section) + const [referenceAudioUrl, setReferenceAudioUrl] = useState(''); + const [sourceAudioUrl, setSourceAudioUrl] = useState(''); + const [audioCodes, setAudioCodes] = useState(''); + const [repaintingStart, setRepaintingStart] = useState(0); + const [repaintingEnd, setRepaintingEnd] = useState(-1); + const [audioCoverStrength, setAudioCoverStrength] = useState(1.0); + const [taskType, setTaskType] = useState('text2music'); + const [useAdg, setUseAdg] = useState(false); + const [cfgIntervalStart, setCfgIntervalStart] = useState(0.0); + const [cfgIntervalEnd, setCfgIntervalEnd] = useState(1.0); + const [customTimesteps, setCustomTimesteps] = useState(''); + const [loraAdapters, setLoraAdapters] = useState([]); + const [loraLoading, setLoraLoading] = useState(false); + const [loraNameOrPath, setLoraNameOrPath] = useState(''); + const [loraWeight, setLoraWeight] = useState(0.75); + const [useCotMetas, setUseCotMetas] = useState(true); + const [useCotCaption, setUseCotCaption] = useState(true); + const [useCotLanguage, setUseCotLanguage] = useState(true); + const [autogen, setAutogen] = useState(false); + const [constrainedDecodingDebug, setConstrainedDecodingDebug] = useState(false); + const [allowLmBatch, setAllowLmBatch] = useState(true); + const [getScores, setGetScores] = useState(false); + const [getLrc, setGetLrc] = useState(false); + const [scoreScale, setScoreScale] = useState(0.5); + const [lmBatchChunkSize, setLmBatchChunkSize] = useState(8); + const [trackName, setTrackName] = useState(''); + const [completeTrackClasses, setCompleteTrackClasses] = useState(''); + const [isFormatCaption, setIsFormatCaption] = useState(false); + + const [isUploadingReference, setIsUploadingReference] = useState(false); + const [isUploadingSource, setIsUploadingSource] = useState(false); + const [uploadError, setUploadError] = useState(null); + const [isFormatting, setIsFormatting] = useState(false); + const referenceInputRef = useRef(null); + const sourceInputRef = useRef(null); + const [showAudioModal, setShowAudioModal] = useState(false); + const [audioModalTarget, setAudioModalTarget] = useState<'reference' | 'source'>('reference'); + const [tempAudioUrl, setTempAudioUrl] = useState(''); + const [audioTab, setAudioTab] = useState<'reference' | 'source'>('reference'); + const referenceAudioRef = useRef(null); + const sourceAudioRef = useRef(null); + const [referencePlaying, setReferencePlaying] = useState(false); + const [sourcePlaying, setSourcePlaying] = useState(false); + const [referenceTime, setReferenceTime] = useState(0); + const [sourceTime, setSourceTime] = useState(0); + const [referenceDuration, setReferenceDuration] = useState(0); + const [sourceDuration, setSourceDuration] = useState(0); + + // Reference tracks modal state + const [referenceTracks, setReferenceTracks] = useState([]); + const [libraryTagFilter, setLibraryTagFilter] = useState('all'); + const [isLoadingTracks, setIsLoadingTracks] = useState(false); + const [playingTrackId, setPlayingTrackId] = useState(null); + const modalAudioRef = useRef(null); + const [modalTrackTime, setModalTrackTime] = useState(0); + const [modalTrackDuration, setModalTrackDuration] = useState(0); + + const getAudioLabel = (url: string) => { + try { + const parsed = new URL(url); + return decodeURIComponent(parsed.pathname.split('/').pop() || parsed.hostname); + } catch { + const parts = url.split('/'); + return decodeURIComponent(parts[parts.length - 1] || url); + } + }; + + // Resize Logic + const [lyricsHeight, setLyricsHeight] = useState(() => { + const saved = localStorage.getItem('acestep_lyrics_height'); + return saved ? parseInt(saved, 10) : 144; // Default h-36 is 144px (9rem * 16) + }); + const [isResizing, setIsResizing] = useState(false); + const lyricsRef = useRef(null); + + // Reuse Effect - must be after all state declarations + useEffect(() => { + if (initialData) { + setCustomMode(true); + setLyrics(initialData.song.lyrics); + setStyle(initialData.song.style); + setTitle(initialData.song.title); + setInstrumental(initialData.song.lyrics.length === 0); + } + }, [initialData]); + + // When both reference and source audio are removed, restore Text → Music mode + useEffect(() => { + if (!referenceAudioUrl.trim() && !sourceAudioUrl.trim()) { + setTaskType('text2music'); + } + }, [referenceAudioUrl, sourceAudioUrl]); + + const fetchLoraAdapters = useCallback(() => { + setLoraLoading(true); + generateApi.getLoraAdapters() + .then((res) => setLoraAdapters(res.adapters || [])) + .catch(() => setLoraAdapters([])) + .finally(() => setLoraLoading(false)); + }, []); + + // Fetch LoRA adapters on mount (Training output + custom_lora) + useEffect(() => { fetchLoraAdapters(); }, [fetchLoraAdapters]); + + useEffect(() => { + const handleMouseMove = (e: MouseEvent) => { + if (!isResizing) return; + + // Calculate new height based on mouse position relative to the lyrics container top + // We can't easily get the container top here without a ref to it, + // but we can use dy (delta y) from the previous position if we tracked it, + // OR simpler: just update based on movement if we track the start. + // + // Better approach for absolute sizing: + // 1. Get the bounding rect of the textarea wrapper on mount/resize start? + // We can just rely on the fact that we are dragging the bottom. + // So new height = currentMouseY - topOfElement. + + if (lyricsRef.current) { + const rect = lyricsRef.current.getBoundingClientRect(); + const newHeight = e.clientY - rect.top; + // detailed limits: min 96px (h-24), max 600px + if (newHeight > 96 && newHeight < 600) { + setLyricsHeight(newHeight); + } + } + }; + + const handleMouseUp = () => { + setIsResizing(false); + document.body.style.cursor = 'default'; + document.body.style.userSelect = 'auto'; + // Save height to localStorage + localStorage.setItem('acestep_lyrics_height', String(lyricsHeight)); + }; + + if (isResizing) { + window.addEventListener('mousemove', handleMouseMove); + window.addEventListener('mouseup', handleMouseUp); + document.body.style.cursor = 'ns-resize'; + document.body.style.userSelect = 'none'; // Prevent text selection while dragging + } + + return () => { + window.removeEventListener('mousemove', handleMouseMove); + window.removeEventListener('mouseup', handleMouseUp); + document.body.style.cursor = 'default'; + document.body.style.userSelect = 'auto'; + }; + }, [isResizing]); + + const startResizing = (e: React.MouseEvent) => { + e.preventDefault(); + setIsResizing(true); + }; + + const uploadAudio = async (file: File, target: 'reference' | 'source') => { + setUploadError(null); + const setUploading = target === 'reference' ? setIsUploadingReference : setIsUploadingSource; + setUploading(true); + try { + const result = await generateApi.uploadAudio(file, token || ''); + if (target === 'reference') setReferenceAudioUrl(result.url); + else setSourceAudioUrl(result.url); + setTaskType(target === 'reference' ? 'audio2audio' : 'cover'); + setAudioTab(target); + setShowAudioModal(false); + setTempAudioUrl(''); + } catch (err) { + const message = err instanceof Error ? err.message : 'Upload failed'; + setUploadError(message); + } finally { + setUploading(false); + } + }; + + const handleFileSelect = (e: React.ChangeEvent, target: 'reference' | 'source') => { + const file = e.target.files?.[0]; + if (file) { + void uploadAudio(file, target); + } + e.target.value = ''; + }; + + // Format handler - uses LLM to enhance style and auto-fill parameters + const handleFormat = async () => { + if (!token || !style.trim()) return; + setIsFormatting(true); + try { + const result = await generateApi.formatInput({ + caption: style, + lyrics: lyrics, + bpm: bpm > 0 ? bpm : undefined, + duration: duration > 0 ? duration : undefined, + keyScale: keyScale || undefined, + timeSignature: timeSignature || undefined, + temperature: lmTemperature, + topK: lmTopK > 0 ? lmTopK : undefined, + topP: lmTopP, + }, token); + + if (result.success) { + // Update fields with LLM-generated values + if (result.caption) setStyle(result.caption); + if (result.lyrics) setLyrics(result.lyrics); + if (result.bpm && result.bpm > 0) setBpm(result.bpm); + if (result.duration && result.duration > 0) setDuration(result.duration); + if (result.key_scale) setKeyScale(result.key_scale); + if (result.time_signature) setTimeSignature(result.time_signature); + if (result.language) setVocalLanguage(result.language); + setIsFormatCaption(true); + } else { + console.error('Format failed:', result.error || result.status_message); + alert(result.error || result.status_message || 'Format failed. Make sure the LLM is initialized.'); + } + } catch (err) { + console.error('Format error:', err); + alert('Format failed. The LLM may not be available.'); + } finally { + setIsFormatting(false); + } + }; + + const openAudioModal = (target: 'reference' | 'source') => { + setAudioModalTarget(target); + setTempAudioUrl(''); + setShowAudioModal(true); + void fetchReferenceTracks(); + }; + + const fetchReferenceTracks = useCallback(async () => { + setIsLoadingTracks(true); + try { + const [refRes, songsRes] = await Promise.all([ + fetch('/api/reference-tracks', { headers: token ? { Authorization: `Bearer ${token}` } : {} }), + fetch('/api/songs', { headers: token ? { Authorization: `Bearer ${token}` } : {} }) + ]); + const refData = refRes.ok ? await refRes.json() : { tracks: [] }; + const songsData = songsRes.ok ? await songsRes.json() : { songs: [] }; + const refTracks: ReferenceTrack[] = (refData.tracks || []).map((r: { id: string; filename?: string; storage_key?: string; audio_url: string; duration?: number | null; tags?: string[] | null; file_size_bytes?: number | null }) => ({ + id: r.id, + filename: r.filename || r.storage_key || r.id, + storage_key: r.storage_key || r.filename || r.id, + audio_url: r.audio_url, + duration: r.duration ?? null, + tags: r.tags ?? null, + file_size_bytes: r.file_size_bytes ?? null, + label: (r.filename || r.storage_key || r.id).replace(/\.[^/.]+$/, ''), + source: 'uploaded' as const, + })); + const songs = songsData.songs || []; + const libraryTracks: ReferenceTrack[] = songs.map((s: { id: string; title?: string; audio_url: string; duration?: number | null; tags?: string[] }) => ({ + id: s.id, + filename: s.title || s.id, + storage_key: s.id, + audio_url: s.audio_url, + duration: s.duration ?? null, + tags: Array.isArray(s.tags) ? s.tags : null, + file_size_bytes: null, + label: s.title || s.id.replace(/\.[^/.]+$/, '') || s.id, + source: (s.id.startsWith('ref:') ? 'uploaded' : 'library') as 'uploaded' | 'library', + })); + const merged = [...refTracks]; + const seenIds = new Set(refTracks.map(t => t.id)); + for (const t of libraryTracks) { + if (t.source === 'library' && !seenIds.has(t.id)) { + merged.push(t); + seenIds.add(t.id); + } + } + merged.sort((a, b) => (b.label || b.filename).localeCompare(a.label || a.filename, undefined, { sensitivity: 'base' })); + setReferenceTracks(merged); + setLibraryTagFilter('all'); + } catch (err) { + console.error('Failed to fetch library/reference tracks:', err); + } finally { + setIsLoadingTracks(false); + } + }, [token]); + + // Refresh library list periodically so API-completed generations show up in "From library" + useEffect(() => { + void fetchReferenceTracks(); + const REFRESH_MS = 20_000; + const id = setInterval(() => fetchReferenceTracks(), REFRESH_MS); + return () => clearInterval(id); + }, [fetchReferenceTracks]); + + const uploadReferenceTrack = async (file: File) => { + setUploadError(null); + setIsUploadingReference(true); + try { + const formData = new FormData(); + formData.append('audio', file); + + const headers: Record = {}; + if (token) headers.Authorization = `Bearer ${token}`; + + const response = await fetch('/api/reference-tracks', { + method: 'POST', + headers, + body: formData + }); + + if (!response.ok) { + const err = await response.json(); + throw new Error(err.error || 'Upload failed'); + } + + const data = await response.json(); + const r = data.track || {}; + const normalized: ReferenceTrack = { + id: r.id, + filename: r.filename || r.storage_key || r.id, + storage_key: r.storage_key || r.filename || r.id, + audio_url: r.audio_url, + duration: r.duration ?? null, + tags: r.tags ?? ['uploaded'], + file_size_bytes: r.file_size_bytes ?? null, + label: (r.filename || r.storage_key || r.id || '').replace(/\.[^/.]+$/, ''), + source: 'uploaded', + }; + setReferenceTracks(prev => [normalized, ...prev]); + + const audioUrl = data.track?.audio_url; + if (audioUrl) { + if (audioModalTarget === 'reference') { + setReferenceAudioUrl(audioUrl); + setTaskType('audio2audio'); + setAudioTab('reference'); + } else { + setSourceAudioUrl(audioUrl); + setTaskType('cover'); + setAudioTab('source'); + } + } + setShowAudioModal(false); + } catch (err) { + const message = err instanceof Error ? err.message : 'Upload failed'; + setUploadError(message); + } finally { + setIsUploadingReference(false); + } + }; + + const deleteReferenceTrack = async (trackId: string) => { + if (!token) return; + try { + const response = await fetch(`/api/reference-tracks/${trackId}`, { + method: 'DELETE', + headers: { Authorization: `Bearer ${token}` } + }); + if (response.ok) { + setReferenceTracks(prev => prev.filter(t => t.id !== trackId)); + if (playingTrackId === trackId) { + setPlayingTrackId(null); + if (modalAudioRef.current) { + modalAudioRef.current.pause(); + } + } + } + } catch (err) { + console.error('Failed to delete track:', err); + } + }; + + const useReferenceTrack = (track: ReferenceTrack) => { + if (audioModalTarget === 'reference') { + setReferenceAudioUrl(track.audio_url); + setTaskType('audio2audio'); + setAudioTab('reference'); + } else { + setSourceAudioUrl(track.audio_url); + setTaskType('cover'); + setAudioTab('source'); + } + setShowAudioModal(false); + setPlayingTrackId(null); + }; + + const toggleModalTrack = (track: ReferenceTrack) => { + if (playingTrackId === track.id) { + if (modalAudioRef.current) { + modalAudioRef.current.pause(); + } + setPlayingTrackId(null); + } else { + setPlayingTrackId(track.id); + if (modalAudioRef.current) { + modalAudioRef.current.src = track.audio_url; + modalAudioRef.current.play().catch(() => undefined); + } + } + }; + + const applyAudioUrl = () => { + if (!tempAudioUrl.trim()) return; + if (audioModalTarget === 'reference') { + setReferenceAudioUrl(tempAudioUrl.trim()); + setReferenceTime(0); + setReferenceDuration(0); + setTaskType('audio2audio'); + } else { + setSourceAudioUrl(tempAudioUrl.trim()); + setSourceTime(0); + setSourceDuration(0); + setTaskType('cover'); + } + setShowAudioModal(false); + setTempAudioUrl(''); + }; + + const formatTime = (time: number) => { + if (!Number.isFinite(time) || time <= 0) return '0:00'; + const minutes = Math.floor(time / 60); + const seconds = Math.floor(time % 60); + return `${minutes}:${String(seconds).padStart(2, '0')}`; + }; + + const toggleAudio = (target: 'reference' | 'source') => { + const audio = target === 'reference' ? referenceAudioRef.current : sourceAudioRef.current; + if (!audio) return; + if (audio.paused) { + audio.play().catch(() => undefined); + } else { + audio.pause(); + } + }; + + const handleDrop = (e: React.DragEvent, target: 'reference' | 'source') => { + e.preventDefault(); + const file = e.dataTransfer.files?.[0]; + if (file) { + void uploadAudio(file, target); + } + }; + + const handleDragOver = (e: React.DragEvent) => { + e.preventDefault(); + }; + + const handleGenerate = () => { + console.log('[CreatePanel] Create button clicked', { bulkCount, customMode, isAuthenticated }); + // Bulk generation: loop bulkCount times + for (let i = 0; i < bulkCount; i++) { + // Seed handling: first job uses user's seed, rest get random seeds + let jobSeed = -1; + if (!randomSeed && i === 0) { + jobSeed = seed; + } else if (!randomSeed && i > 0) { + // Subsequent jobs get random seeds for variety + jobSeed = Math.floor(Math.random() * 4294967295); + } + + onGenerate({ + customMode, + songDescription: customMode ? undefined : songDescription, + prompt: style, + lyrics, + style, + title: bulkCount > 1 ? `${title} (${i + 1})` : title, + instrumental, + vocalLanguage, + bpm, + keyScale, + timeSignature, + duration, + inferenceSteps, + guidanceScale, + batchSize, + randomSeed: randomSeed || i > 0, // Force random for subsequent bulk jobs + seed: jobSeed, + thinking, + audioFormat, + inferMethod, + shift, + lmTemperature, + lmCfgScale, + lmTopK, + lmTopP, + lmNegativePrompt, + referenceAudioUrl: referenceAudioUrl.trim() || undefined, + sourceAudioUrl: sourceAudioUrl.trim() || undefined, + audioCodes: audioCodes.trim() || undefined, + repaintingStart, + repaintingEnd, + audioCoverStrength, + taskType, + useAdg, + cfgIntervalStart, + cfgIntervalEnd, + customTimesteps: customTimesteps.trim() || undefined, + loraNameOrPath: loraNameOrPath.trim() || undefined, + loraWeight, + useCotMetas, + useCotCaption, + useCotLanguage, + autogen, + constrainedDecodingDebug, + allowLmBatch, + getScores, + getLrc, + scoreScale, + lmBatchChunkSize, + trackName: trackName.trim() || undefined, + completeTrackClasses: (() => { + const parsed = completeTrackClasses + .split(',') + .map((item) => item.trim()) + .filter(Boolean); + return parsed.length ? parsed : undefined; + })(), + isFormatCaption, + }); + } + + // Reset bulk count after generation + if (bulkCount > 1) { + setBulkCount(1); + } + }; + + return ( +
+
+ handleFileSelect(e, 'reference')} + className="hidden" + /> + handleFileSelect(e, 'source')} + className="hidden" + /> +