diff --git a/src/app/package-lock.json b/src/app/package-lock.json index 35a7694f1..b731302a6 100644 --- a/src/app/package-lock.json +++ b/src/app/package-lock.json @@ -4379,6 +4379,17 @@ "node": ">= 0.8" } }, + "node_modules/encoding": { + "version": "0.1.13", + "resolved": "https://registry.npmjs.org/encoding/-/encoding-0.1.13.tgz", + "integrity": "sha512-ETBauow1T35Y/WZMkio9jiM0Z5xjHHmJ4XmjZOq1l/dXz3lr2sRn87nJy20RupqSh1F2m3HHPSp8ShIPQJrJ3A==", + "dev": true, + "license": "MIT", + "optional": true, + "dependencies": { + "iconv-lite": "^0.6.2" + } + }, "node_modules/end-of-stream": { "version": "1.4.5", "resolved": "https://registry.npmjs.org/end-of-stream/-/end-of-stream-1.4.5.tgz", diff --git a/src/app/src/renderer/AddModelPanel.tsx b/src/app/src/renderer/AddModelPanel.tsx index 86db8e656..e67571d7e 100644 --- a/src/app/src/renderer/AddModelPanel.tsx +++ b/src/app/src/renderer/AddModelPanel.tsx @@ -1,5 +1,6 @@ -import React, { useState, useEffect } from 'react'; +import React, { useState, useEffect, useMemo } from 'react'; import { useSystem } from './hooks/useSystem'; +import { TASK_RECIPE_MAP } from './utils/recipeCompatibility'; export interface AddModelInitialValues { name: string; @@ -58,12 +59,32 @@ const AddModelPanel: React.FC = ({ onClose, onInstall, initi useEffect(() => { const newForm = createEmptyForm(initialValues); if (initialValues?.mmprojOptions && initialValues.mmprojOptions.length > 0) { - newForm.mmproj = initialValues.mmprojOptions[0]; + // Prefer BF16 > F16 > F32 > first available + const priority = [/bf16/i, /f16/i, /f32/i]; + let best = initialValues.mmprojOptions[0]; + for (const pattern of priority) { + const match = initialValues.mmprojOptions.find(f => pattern.test(f)); + if (match) { best = match; break; } + } + newForm.mmproj = best; } setForm(newForm); setError(null); }, [initialValues]); + // Detect recipe/name mismatch — warn when checkpoint suggests a different modality + const recipeMismatchWarning = useMemo(() => { + const checkpoint = form.checkpoint.toLowerCase(); + const name = form.name.toLowerCase(); + const combined = `${checkpoint} ${name}`; + for (const mapping of TASK_RECIPE_MAP) { + if (mapping.namePatterns.some(p => p.test(combined)) && form.recipe !== mapping.recipe) { + return `This looks like a ${mapping.label} model. The selected recipe (${RECIPE_LABELS[form.recipe] ?? form.recipe}) may not be compatible.`; + } + } + return null; + }, [form.checkpoint, form.name, form.recipe]); + const handleChange = (field: string, value: string | boolean) => { setForm(prev => ({ ...prev, [field]: value })); setError(null); @@ -252,6 +273,7 @@ const AddModelPanel: React.FC = ({ onClose, onInstall, initi + {recipeMismatchWarning &&
{recipeMismatchWarning}
} {error &&
{error}
} diff --git a/src/app/src/renderer/ConfirmDialog.tsx b/src/app/src/renderer/ConfirmDialog.tsx index 768de755a..046cf6700 100644 --- a/src/app/src/renderer/ConfirmDialog.tsx +++ b/src/app/src/renderer/ConfirmDialog.tsx @@ -1,5 +1,10 @@ import React, { useEffect, useRef, useState } from 'react'; +interface KeepFilesOption { + label: string; + defaultChecked?: boolean; +} + interface ConfirmDialogProps { isOpen: boolean; title: string; @@ -7,6 +12,9 @@ interface ConfirmDialogProps { confirmText?: string; cancelText?: string; danger?: boolean; + keepFilesOption?: KeepFilesOption; + keepFiles?: boolean; + onKeepFilesChange?: (keepFiles: boolean) => void; onConfirm: () => void; onCancel: () => void; } @@ -18,6 +26,9 @@ const ConfirmDialog: React.FC = ({ confirmText = 'Confirm', cancelText = 'Cancel', danger = false, + keepFilesOption, + keepFiles, + onKeepFilesChange, onConfirm, onCancel }) => { @@ -59,6 +70,16 @@ const ConfirmDialog: React.FC = ({

{title}

{message}

+ {keepFilesOption && ( + + )}
+ +
+
+
+ {cacheQuants.length >= 1 && ( + + )} + + {compatibility.label} + {compatibility.level === 'experimental' && ' ?'} + +
+ + ); + }; + + const renderCacheProviderGroup = (provider: string, members: CacheModelInfo[]) => { + const isExpanded = expandedCacheProviders.has(provider); + return ( +
+
toggleCacheProvider(provider)}> + + + + {provider} + ({members.length}) +
+ {isExpanded && ( +
+ {members.map(m => renderCacheModelItem(m, true))} +
+ )} +
+ ); + }; + const renderFamilyItem = (item: Extract) => { const { family, members } = item; const isExpanded = expandedFamilies.has(family.displayName); @@ -1383,7 +2043,7 @@ const [searchQuery, setSearchQuery] = useState(''); /> {showInlineFilterButton && ( + ); + })} + +
Suggested models
@@ -1449,10 +2137,29 @@ const [searchQuery, setSearchQuery] = useState(''); +
Sections
+ + + @@ -1485,7 +2192,7 @@ const [searchQuery, setSearchQuery] = useState(''); )}
- {currentView === 'models' && ( + {currentView === 'models' && showSuggestedSection && (
SUGGESTED MODELS
@@ -1494,29 +2201,56 @@ const [searchQuery, setSearchQuery] = useState(''); {renderModelsView()}
)} - {currentView === 'models' && searchQuery.trim().length >= 3 && ( // Rendering the HF models by searching + {currentView === 'models' && showCacheSection && cacheGroupedItems.length > 0 && ( +
+
+
FROM HF CACHE
+
+ {cacheGroupedItems.map(item => + item.type === 'provider-group' + ? renderCacheProviderGroup(item.provider, item.members) + : renderCacheModelItem(item.cacheModel) + )} +
+ )} + {currentView === 'models' && showSearchSection && searchQuery.trim().length >= 3 && ( // Rendering the HF models by searching
FROM HUGGING FACE
- {isSearchingHF && } +
+ {isSearchingHF && } + {hfSearchPage > 0 && ( + + )} + {(hfSearchPage > 0 || hfHasMoreResults) && ( + {hfSearchPage + 1} + )} + {hfHasMoreResults && ( + + )} +
{hfRateLimited && ( -
Rate limited — try again shortly.
+
+ Rate limited — {hfRateLimitReset ? `retry in ${hfRateLimitReset}s` : 'try again shortly'}. + {!hfAuthenticated && ' Set HF_TOKEN for higher limits.'} +
)} - {!hfRateLimited && !isSearchingHF && ( + {!hfRateLimited && !isSearchingHF && hfSearchCompleted && ( hfSearchResults.length === 0 || (hfSearchResults.length > 0 && detectingBackendFor === null && hfSearchResults.every((m: HFModelInfo) => { - const backend = hfModelBackends[m.id]; - return backend === null || (backend != null && ['sd-cpp', 'whispercpp'].includes(backend.recipe)); + const b = hfModelBackends[m.id]; + return b === null || (b != null && !enabledRecipes.has(b.recipe)); })) ) && ( -
No compatible models found.
+
{hfSearchPage > 0 ? 'No more results.' : 'No compatible models found.'}
)} {hfSearchResults.filter((hfModel: HFModelInfo) => { - const backend = hfModelBackends[hfModel.id]; - return backend !== null && !(backend != null && ['sd-cpp', 'whispercpp'].includes(backend.recipe)); + const b = hfModelBackends[hfModel.id]; + if (!b) return false; + return enabledRecipes.has(b.recipe); }).map((hfModel: HFModelInfo) => { const backend = hfModelBackends[hfModel.id]; const isDetecting = detectingBackendFor === hfModel.id; @@ -1526,7 +2260,17 @@ const [searchQuery, setSearchQuery] = useState(''); return (
- {hfModel.id} + { + e.preventDefault(); + const url = `https://huggingface.co/${hfModel.id}`; + if (window.api?.openExternal) { window.api.openExternal(url); } + else { window.open(url, '_blank', 'noopener,noreferrer'); } + }} + >{hfModel.id} {size !== undefined && {formatSize(size / (1024 ** 3))}} ↓ {formatDownloads(hfModel.downloads)} {isDetecting && } @@ -1578,10 +2322,11 @@ const [searchQuery, setSearchQuery] = useState('');
- {!isDetecting && backend && quants.length > 1 && ( + {!isDetecting && backend && quants.length >= 1 && ( )} - {!isDetecting && backend && {backend.label}} + {!isDetecting && backend && ( + + {backend.label} + {backend.compatibilityLevel === 'experimental' && ' ?'} + + )}
); diff --git a/src/app/src/renderer/components/ConnectedBackendRow.tsx b/src/app/src/renderer/components/ConnectedBackendRow.tsx index 7c1958317..7e07ab055 100644 --- a/src/app/src/renderer/components/ConnectedBackendRow.tsx +++ b/src/app/src/renderer/components/ConnectedBackendRow.tsx @@ -46,14 +46,14 @@ const ConnectedBackendRow: React.FC = ({ const info = statusMessage ? { ...backendInfo, message: statusMessage } : backendInfo; const onConfirmUninstall = useCallback(async (r: string, b: string) => { - const confirmed = await confirm({ + const result = await confirm({ title: 'Uninstall Backend', message: `Are you sure you want to uninstall ${RECIPE_DISPLAY_NAMES[r] || r} ${b}?`, confirmText: 'Uninstall', cancelText: 'Cancel', danger: true, }); - if (confirmed) await handleUninstall(r, b); + if (result.confirmed) await handleUninstall(r, b); }, [confirm, handleUninstall]); return ( diff --git a/src/app/src/renderer/utils/backendInstaller.ts b/src/app/src/renderer/utils/backendInstaller.ts index 505e4046e..c932b872f 100644 --- a/src/app/src/renderer/utils/backendInstaller.ts +++ b/src/app/src/renderer/utils/backendInstaller.ts @@ -32,6 +32,7 @@ export interface ModelRegistrationData { vision?: boolean; embedding?: boolean; reranking?: boolean; + labels?: string[]; } /** @@ -282,11 +283,11 @@ export async function uninstallBackend(recipe: string, backend: string): Promise * Delete a model's files. Single codepath for all model deletions. * Dispatches `modelsUpdated` on success so the models context refreshes. */ -export async function deleteModel(modelName: string): Promise { +export async function deleteModel(modelName: string, keepFiles = false): Promise { const response = await serverFetch('/delete', { method: 'POST', headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ model_name: modelName }), + body: JSON.stringify({ model_name: modelName, keep_files: keepFiles }), }); if (!response.ok) { @@ -626,7 +627,7 @@ async function ensureModelReadyInternal( if (!loadResponse.ok) { const errorData = await loadResponse.json().catch(() => ({})); - const errorMsg = errorData.error || `Failed to load model: ${loadResponse.statusText}`; + const errorMsg = (typeof errorData.error === 'string' ? errorData.error : errorData.error?.message) || `Failed to load model: ${loadResponse.statusText}`; throw new Error(errorMsg); } diff --git a/src/app/src/renderer/utils/recipeCompatibility.ts b/src/app/src/renderer/utils/recipeCompatibility.ts new file mode 100644 index 000000000..8dea4461e --- /dev/null +++ b/src/app/src/renderer/utils/recipeCompatibility.ts @@ -0,0 +1,297 @@ +/** + * Recipe-aware model compatibility classification. + * + * Maps HuggingFace model metadata (pipeline_tag, tags, model ID) to Lemonade + * recipes and compatibility levels. Task takes priority over format — GGUF is + * a container format, not a task indicator. + * + * Mirrors server-side logic in model_types.h (get_model_type_from_labels) + * and model_manager.cpp (register_user_model label assignment). + */ + +export interface TaskRecipeMapping { + pipelineTags: string[]; + hfTags: string[]; + namePatterns: RegExp[]; + recipe: string; + modelType: string; + label: string; +} + +/** + * Task-to-recipe mapping table. + * Order matters: first match wins. LLM is the fallback and not listed here. + */ +export const TASK_RECIPE_MAP: TaskRecipeMapping[] = [ + { + pipelineTags: ['text-to-image', 'image-to-image'], + hfTags: ['stable-diffusion', 'text-to-image', 'diffusers', 'image-generation', 'image-editing'], + namePatterns: [/stable-diffusion/i, /\bflux\b/i, /\bsdxl\b/i], + recipe: 'sd-cpp', + modelType: 'image', + label: 'sd.cpp', + }, + { + pipelineTags: ['automatic-speech-recognition'], + hfTags: ['whisper'], + namePatterns: [/whisper/i], + recipe: 'whispercpp', + modelType: 'audio', + label: 'whisper.cpp', + }, + { + pipelineTags: ['text-to-speech', 'text-to-audio'], + hfTags: ['tts', 'kokoro'], + namePatterns: [/kokoro/i], + recipe: 'kokoro', + modelType: 'tts', + label: 'Kokoro', + }, + { + pipelineTags: ['sentence-similarity', 'feature-extraction'], + hfTags: ['sentence-transformers', 'nomic-embed', 'embedding', 'embeddings'], + namePatterns: [/embed/i, /nomic/i], + recipe: 'llamacpp', + modelType: 'embedding', + label: 'llama.cpp', + }, + { + pipelineTags: ['text-ranking'], + hfTags: ['reranker', 'cross-encoder', 'reranking'], + namePatterns: [/rerank/i], + recipe: 'llamacpp', + modelType: 'reranking', + label: 'llama.cpp', + }, +]; + +/** Pipeline tags that indicate an LLM (including multimodal/vision LLMs). */ +const LLM_PIPELINE_TAGS = ['text-generation', 'conversational', 'text2text-generation', 'image-text-to-text', 'translation', 'image-to-text']; + +/** + * All pipeline_tag values that any Lemonade backend can handle. + * Models with a pipeline_tag NOT in this set are incompatible and can be + * skipped before calling detectBackend (saves 2 HF API calls per model). + * Models with no pipeline_tag are allowed through (classification falls + * back to HF tags / name patterns / format detection). + */ +export const SUPPORTED_PIPELINE_TAGS = new Set([ + ...LLM_PIPELINE_TAGS, + ...TASK_RECIPE_MAP.flatMap(m => m.pipelineTags), +]); + +export type CompatibilityLevel = 'supported' | 'likely' | 'experimental' | 'incompatible'; + +export interface ModelCompatibility { + recipe: string; + modelType: string; + label: string; + level: CompatibilityLevel; + reason: string; +} + +export interface ClassifyInput { + modelId: string; + pipelineTag?: string; + tags: string[]; + hasGgufFiles: boolean; + hasOnnxFiles: boolean; + hasFlmFiles: boolean; + hasBinFiles: boolean; +} + +/** + * File formats currently supported by each recipe/backend. + * When a backend gains format support (e.g. whisper.cpp adds GGUF), + * just add the format tag here. + * See https://lemonade-server.ai/docs/server/server_spec/ + */ +export const RECIPE_FORMATS: Record = { + 'llamacpp': ['gguf'], + 'sd-cpp': ['safetensors'], + 'whispercpp': ['bin'], + 'kokoro': ['onnx'], + 'flm': ['flm'], + 'ryzenai-llm': ['onnx'], +}; + +/** + * Check whether the model has at least one file in a format the recipe supports. + * Uses HF tags (gguf, safetensors, onnx) with fallback to file-based detection + * for formats not represented as HF tags (bin, flm). + */ +function hasRequiredFormat(recipe: string, input: ClassifyInput): boolean { + const formats = RECIPE_FORMATS[recipe]; + if (!formats) return true; // unknown recipe — don't gate + return formats.some(fmt => { + // HF tags cover the common formats + if (input.tags.includes(fmt)) return true; + // Fallback to file-based detection for formats without HF tags + switch (fmt) { + case 'gguf': return input.hasGgufFiles; + case 'onnx': return input.hasOnnxFiles; + case 'bin': return input.hasBinFiles; + case 'flm': return input.hasFlmFiles; + default: return false; + } + }); +} + +/** + * Classify a HuggingFace model into a Lemonade recipe with a confidence level. + * + * Priority order: + * 1. pipeline_tag match against TASK_RECIPE_MAP → supported + * 2. pipeline_tag is a known LLM tag + GGUF → supported + * 3. HF tags match against TASK_RECIPE_MAP → likely + * 4. Model ID name pattern match → experimental + * 5. FLM files/tags (format-specific) → likely + * 6. ONNX files (format-specific) → likely + * 7. GGUF present, no other signals → experimental (was silently "supported") + * 8. Nothing matched → incompatible + */ +export function classifyModel(input: ClassifyInput): ModelCompatibility { + const { modelId, pipelineTag, tags, hasGgufFiles, hasOnnxFiles, hasFlmFiles, hasBinFiles } = input; + const idLower = modelId.toLowerCase(); + + // --- Early reject: Python-only quantization formats --- + // BitsAndBytes, AWQ, and GPTQ models require Python runtimes and are + // incompatible with all C++ backends (llamacpp, sd-cpp, etc.) + if (/[-._](bnb|awq|gptq)([-._]|$)/i.test(idLower)) { + return { + recipe: '', + modelType: 'unknown', + label: 'Python-only', + level: 'incompatible', + reason: 'BitsAndBytes/AWQ/GPTQ quantization requires Python runtime', + }; + } + + // --- Pass 1: pipeline_tag (highest confidence) --- + + if (pipelineTag) { + // Check non-LLM mappings first (must also have a supported file format) + for (const mapping of TASK_RECIPE_MAP) { + if (mapping.pipelineTags.includes(pipelineTag) && hasRequiredFormat(mapping.recipe, input)) { + return { + recipe: mapping.recipe, + modelType: mapping.modelType, + label: mapping.label, + level: 'supported', + reason: `Task "${pipelineTag}" maps to ${mapping.label}`, + }; + } + } + + // Known LLM pipeline tag + if (LLM_PIPELINE_TAGS.includes(pipelineTag)) { + if (hasGgufFiles) { + return { + recipe: 'llamacpp', + modelType: 'llm', + label: 'llama.cpp', + level: 'supported', + reason: `Task "${pipelineTag}" with GGUF files`, + }; + } + if (hasOnnxFiles) { + return { + recipe: 'ryzenai-llm', + modelType: 'llm', + label: 'RyzenAI', + level: 'likely', + reason: `Task "${pipelineTag}" with ONNX files`, + }; + } + } + + // pipeline_tag present but doesn't match anything we support + // (e.g. "feature-extraction", "fill-mask", "summarization", etc.) + if (!LLM_PIPELINE_TAGS.includes(pipelineTag)) { + return { + recipe: '', + modelType: 'unknown', + label: pipelineTag, + level: 'incompatible', + reason: `Task "${pipelineTag}" is not supported by any Lemonade backend`, + }; + } + } + + // --- Pass 2: HF tags (medium confidence) --- + + for (const mapping of TASK_RECIPE_MAP) { + if (mapping.hfTags.some(t => tags.includes(t)) && hasRequiredFormat(mapping.recipe, input)) { + return { + recipe: mapping.recipe, + modelType: mapping.modelType, + label: mapping.label, + level: 'likely', + reason: `Repository tags suggest ${mapping.label} model`, + }; + } + } + + // --- Pass 3: Model ID name patterns (low confidence) --- + + for (const mapping of TASK_RECIPE_MAP) { + if (mapping.namePatterns.some(p => p.test(idLower)) && hasRequiredFormat(mapping.recipe, input)) { + return { + recipe: mapping.recipe, + modelType: mapping.modelType, + label: mapping.label, + level: 'experimental', + reason: `Model name suggests ${mapping.label} — no confirming metadata`, + }; + } + } + + // --- Pass 4: Format-only fallbacks --- + + // FLM detection + if (hasFlmFiles || idLower.startsWith('fastflowlm/') || tags.includes('flm')) { + return { + recipe: 'flm', + modelType: 'llm', + label: 'FastFlowLM', + level: 'likely', + reason: 'FLM files or tags detected', + }; + } + + // ONNX detection (without LLM pipeline_tag — lower confidence) + if (hasOnnxFiles) { + let recipe = 'ryzenai-llm'; + let label = 'RyzenAI'; + if (idLower.includes('-ryzenai-npu') || tags.includes('npu')) { recipe = 'ryzenai-llm'; label = 'RyzenAI NPU'; } + else if (idLower.includes('-ryzenai-hybrid') || tags.includes('hybrid')) { recipe = 'ryzenai-llm'; label = 'RyzenAI Hybrid'; } + else if (tags.includes('igpu')) { recipe = 'ryzenai-llm'; label = 'RyzenAI iGPU'; } + return { + recipe, + modelType: 'llm', + label, + level: 'likely', + reason: 'ONNX files detected', + }; + } + + // GGUF present but no task metadata — this is the case that was causing issues + if (hasGgufFiles) { + return { + recipe: 'llamacpp', + modelType: 'llm', + label: 'llama.cpp', + level: 'experimental', + reason: 'GGUF files present but no task metadata — assuming LLM', + }; + } + + // Nothing matched + return { + recipe: '', + modelType: 'unknown', + label: 'Unknown', + level: 'incompatible', + reason: 'No compatible format or task metadata detected', + }; +} diff --git a/src/app/styles.css b/src/app/styles.css index 4e60f3c97..044d885d6 100644 --- a/src/app/styles.css +++ b/src/app/styles.css @@ -1060,6 +1060,14 @@ footer { background: rgba(255, 255, 255, 0.16); } +.left-panel-inline-filter-btn.filter-active { + color: #4ade80; +} + +.left-panel-inline-filter-btn.filter-active:hover { + color: #86efac; +} + .left-panel-filter-popover { position: absolute; top: calc(100% + 6px); @@ -1120,6 +1128,71 @@ footer { color: #ffffff; } +.filter-section-label { + font-size: 0.55rem; + color: #666; + text-transform: uppercase; + letter-spacing: 0.5px; + margin-bottom: -4px; +} + +.recipe-filter-chips { + display: flex; + flex-wrap: wrap; + gap: 4px; +} + +.recipe-chip { + font-size: 0.58rem; + padding: 3px 8px; + border-radius: 10px; + border: 1px solid rgba(255, 255, 255, 0.1); + background: transparent; + color: #777; + cursor: pointer; + transition: all 0.15s ease; +} + +.recipe-chip:hover { + background: rgba(255, 255, 255, 0.05); + color: #aaa; +} + +.recipe-chip.active { + border-color: rgba(255, 255, 255, 0.25); + background: rgba(255, 255, 255, 0.08); + color: #ddd; +} + +.recipe-chip.state-installed.active { + border-color: rgba(76, 175, 80, 0.5); + background: rgba(76, 175, 80, 0.15); + color: #81c784; +} + +.recipe-chip.state-available.active { + border-color: rgba(255, 193, 7, 0.5); + background: rgba(255, 193, 7, 0.15); + color: #ffd54f; +} + +.recipe-chip.state-unsupported.active { + border-color: rgba(244, 67, 54, 0.4); + background: rgba(244, 67, 54, 0.12); + color: #e57373; +} + +/* Inactive state hints — subtle color so unsupported is distinguishable from deselected */ +.recipe-chip.state-unsupported:not(.active) { + border-color: rgba(244, 67, 54, 0.2); + color: #a05050; +} + +.recipe-chip.state-available:not(.active) { + border-color: rgba(255, 193, 7, 0.15); + color: #8a7740; +} + .model-filter-popover .organization-toggle { padding: 2px; border: 1px solid rgba(255, 255, 255, 0.06); @@ -1214,6 +1287,55 @@ footer { to { transform: rotate(360deg); } } +.hf-pagination { + display: flex; + align-items: center; + gap: 4px; + margin-left: auto; +} + +.hf-page-btn { + background: rgba(255, 255, 255, 0.06); + border: 1px solid rgba(255, 255, 255, 0.1); + border-radius: 3px; + color: #aaa; + font-size: 0.75rem; + width: 20px; + height: 20px; + display: flex; + align-items: center; + justify-content: center; + cursor: pointer; + padding: 0; + line-height: 1; +} + +.hf-page-btn:hover:not(:disabled) { + background: rgba(255, 255, 255, 0.12); + color: #ddd; +} + +.hf-page-btn:disabled { + opacity: 0.4; + cursor: not-allowed; +} + +.hf-page-cooldown { + animation: hf-cooldown-fade 2s ease-out forwards; +} + +@keyframes hf-cooldown-fade { + 0% { background: rgba(200, 60, 60, 0.4); border-color: rgba(200, 60, 60, 0.5); } + 100% { background: rgba(255, 255, 255, 0.06); border-color: rgba(255, 255, 255, 0.1); } +} + +.hf-page-label { + font-size: 0.6rem; + color: #888; + min-width: 14px; + text-align: center; +} + .hf-search-message { font-size: 0.68rem; color: #555; @@ -1259,6 +1381,12 @@ footer { text-overflow: ellipsis; flex-shrink: 1; min-width: 0; + text-decoration: none; +} + +.hf-model-name:hover { + color: #fff; + text-decoration: underline; } .hf-model-meta { @@ -1290,6 +1418,49 @@ footer { padding: 1px 5px; letter-spacing: 0.2px; flex-shrink: 0; + cursor: default; +} + +.hf-badge-experimental { + color: #c9a227; + border-color: rgba(201, 162, 39, 0.3); + background: rgba(201, 162, 39, 0.08); +} + +/* HF cache provider grouping */ +.hf-provider-group { + margin: 0; +} + +.hf-provider-header { + display: flex; + align-items: center; + gap: 6px; + padding: 4px 10px 4px 13px; + cursor: pointer; + user-select: none; +} + +.hf-provider-header:hover { + background: rgba(255, 255, 255, 0.03); +} + +.hf-provider-name { + font-size: 0.65rem; + color: #d0d0d0; +} + +.hf-provider-count { + font-size: 0.58rem; + color: #666; +} + +.hf-provider-members { + padding-left: 6px; +} + +.hf-provider-member { + padding-left: 16px; } .hf-quant-select { @@ -2470,6 +2641,16 @@ footer { padding-left: 8px; } +.form-warning { + font-size: 0.75rem; + color: #c9a227; + background: rgba(201, 162, 39, 0.08); + border: 1px solid rgba(201, 162, 39, 0.2); + border-radius: 4px; + padding: 8px 10px; + margin-top: 8px; +} + .form-checkboxes { display: grid; grid-template-columns: 1fr 1fr; @@ -5246,6 +5427,20 @@ footer { line-height: 1.5; } +.confirm-dialog-checkbox { + display: flex; + align-items: center; + gap: 8px; + font-size: 0.8rem; + color: #aaa; + margin-bottom: 16px; + cursor: pointer; +} + +.confirm-dialog-checkbox input { + cursor: pointer; +} + .confirm-dialog-actions { display: flex; gap: 10px; diff --git a/src/cpp/include/lemon/model_manager.h b/src/cpp/include/lemon/model_manager.h index afbd9f9a5..a78fa7c21 100644 --- a/src/cpp/include/lemon/model_manager.h +++ b/src/cpp/include/lemon/model_manager.h @@ -99,8 +99,8 @@ class ModelManager { bool do_not_upgrade = false, DownloadProgressCallback progress_callback = nullptr); - // Delete a model - void delete_model(const std::string& model_name); + // Delete a model (keep_files=true removes from registry only, preserving HF cache files) + void delete_model(const std::string& model_name, bool keep_files = false); // Get model info by name ModelInfo get_model_info(const std::string& model_name); @@ -135,6 +135,9 @@ class ModelManager { // Set extra models directory for GGUF discovery void set_extra_models_dir(const std::string& dir); + // Discover models in HF cache that are not registered in the model registry + json discover_hf_cache_models(); + void save_model_options(const ModelInfo& info); private: diff --git a/src/cpp/server/model_manager.cpp b/src/cpp/server/model_manager.cpp index a2fd444a4..958fa5f25 100644 --- a/src/cpp/server/model_manager.cpp +++ b/src/cpp/server/model_manager.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -390,6 +391,198 @@ std::map ModelManager::discover_extra_models() const { return discovered; } +json ModelManager::discover_hf_cache_models() { + json result = json::array(); + + std::string hf_cache = get_hf_cache_dir(); + if (hf_cache.empty() || !fs::exists(hf_cache)) { + return result; + } + + // Get the set of repo IDs already known (registered models) + std::set known_repos; + auto all_models = get_supported_models(); + for (const auto& [name, info] : all_models) { + // Extract repo_id from the main checkpoint + std::string ckpt = info.checkpoint("main"); + if (!ckpt.empty()) { + // Remove variant suffix (everything after ':') + size_t colon = ckpt.find(':'); + std::string repo = (colon != std::string::npos) ? ckpt.substr(0, colon) : ckpt; + known_repos.insert(repo); + } + } + + // Scan HF cache for models--org--name directories + try { + for (const auto& entry : fs::directory_iterator(hf_cache)) { + if (!entry.is_directory()) continue; + + std::string dirname = entry.path().filename().string(); + if (dirname.substr(0, 8) != "models--") continue; + + // Convert models--org--name back to org/name + std::string repo_id = dirname.substr(8); + size_t pos = 0; + while ((pos = repo_id.find("--", pos)) != std::string::npos) { + repo_id.replace(pos, 2, "/"); + pos += 1; + } + + // Skip if already registered + if (known_repos.count(repo_id)) continue; + + // Find snapshot directory (use the latest one) + fs::path snapshots_dir = entry.path() / "snapshots"; + if (!fs::exists(snapshots_dir)) continue; + + fs::path latest_snapshot; + std::filesystem::file_time_type latest_time{}; + for (const auto& snap : fs::directory_iterator(snapshots_dir)) { + if (!snap.is_directory()) continue; + auto ftime = snap.last_write_time(); + if (latest_snapshot.empty() || ftime > latest_time) { + latest_snapshot = snap.path(); + latest_time = ftime; + } + } + if (latest_snapshot.empty()) continue; + + // Scan for model files and compute total size + bool has_gguf = false, has_onnx = false, has_bin = false; + double total_size_gb = 0.0; + + // Group GGUF files: root files individually, subfolder files by folder + struct FileEntry { std::string name; uintmax_t size; }; + std::vector root_gguf_files; + std::map> folder_files; // folder name -> files + std::vector bin_files; + std::vector mmproj_files; + + for (const auto& file : fs::recursive_directory_iterator(latest_snapshot)) { + if (!file.is_regular_file()) continue; + std::string fname = file.path().filename().string(); + std::string fname_lower = fname; + std::transform(fname_lower.begin(), fname_lower.end(), fname_lower.begin(), ::tolower); + + uintmax_t fsize = 0; + try { fsize = file.file_size(); } catch (...) {} + total_size_gb += static_cast(fsize) / (1024.0 * 1024.0 * 1024.0); + + if (fname_lower.find(".gguf") != std::string::npos) { + has_gguf = true; + if (fname_lower.find("mmproj") != std::string::npos) { + mmproj_files.push_back(fname); + } else { + // Check if file is in a subfolder relative to the snapshot + // Use canonical paths to resolve symlinks before computing relative path + fs::path canonical_file = fs::canonical(file.path()); + fs::path canonical_snap = fs::canonical(latest_snapshot); + fs::path rel = fs::relative(canonical_file, canonical_snap); + std::string rel_parent = rel.parent_path().string(); + if (rel_parent.empty() || rel_parent == ".") { + root_gguf_files.push_back({fname, fsize}); + } else { + // File in subfolder — group by top-level folder name + std::string folder = rel.begin()->string(); + folder_files[folder].push_back({fname, fsize}); + } + } + } + if (fname_lower.find(".onnx") != std::string::npos) has_onnx = true; + if (fname_lower.find(".bin") != std::string::npos) { + has_bin = true; + bin_files.push_back({fname, fsize}); + } + } + + // Skip empty or unrecognized repos + if (!has_gguf && !has_onnx && !has_bin) continue; + + // Build gguf_files array: one entry per variant + json gguf_files = json::array(); + std::regex quant_regex(R"((Q\d+(?:_\d)?(?:_[KS])?(?:_[MSXL]+)?|F(?:16|32)|IQ\d+(?:_[A-Z]+)?|BF16|MXFP\d+(?:_[A-Z]+)?))", std::regex::icase); + for (const auto& [folder, files] : folder_files) { + if (std::regex_search(folder, quant_regex)) { + // Folder name is a quant variant (e.g. "Q4_0/", "UD-Q3_K_XL/") — single entry with combined size + uintmax_t total = 0; + for (const auto& f : files) total += f.size; + json gf; + gf["filename"] = folder; + if (total > 0) gf["size"] = total; + gguf_files.push_back(gf); + } else { + // Folder name is not a quant (e.g. "whisper.cpp/") — list individual files + for (const auto& f : files) { + json gf; + gf["filename"] = f.name; + if (f.size > 0) gf["size"] = f.size; + gguf_files.push_back(gf); + } + } + } + for (const auto& rf : root_gguf_files) { + json gf; + gf["filename"] = rf.name; + if (rf.size > 0) gf["size"] = rf.size; + gguf_files.push_back(gf); + } + + json model_entry; + model_entry["repo_id"] = repo_id; + model_entry["has_gguf"] = has_gguf; + model_entry["has_onnx"] = has_onnx; + model_entry["has_bin"] = has_bin; + if (total_size_gb > 0.0) model_entry["size_gb"] = total_size_gb; + if (!gguf_files.empty()) model_entry["gguf_files"] = gguf_files; + if (!mmproj_files.empty()) model_entry["mmproj_files"] = mmproj_files; + if (!bin_files.empty()) { + json bf_array = json::array(); + for (const auto& bf : bin_files) { + json bfj; + bfj["filename"] = bf.name; + if (bf.size > 0) bfj["size"] = bf.size; + bf_array.push_back(bfj); + } + model_entry["bin_files"] = bf_array; + } + + result.push_back(model_entry); + } + } catch (const std::exception& e) { + LOG(ERROR, "ModelManager") << "Error scanning HF cache: " << e.what() << std::endl; + } + + // Fetch pipeline_tag metadata from HF API for each discovered model + if (!result.empty()) { + std::map headers; + const char* hf_token = std::getenv("HF_TOKEN"); + if (hf_token) { + headers["Authorization"] = "Bearer " + std::string(hf_token); + } + + for (auto& model_entry : result) { + std::string repo_id = model_entry["repo_id"].get(); + try { + std::string api_url = "https://huggingface.co/api/models/" + repo_id; + auto response = HttpClient::get(api_url, headers); + if (response.status_code == 200) { + auto info = JsonUtils::parse(response.body); + if (info.contains("pipeline_tag") && info["pipeline_tag"].is_string()) { + model_entry["pipeline_tag"] = info["pipeline_tag"].get(); + } + } + } catch (const std::exception& e) { + // Non-fatal: model will fall back to format-based heuristics + LOG(WARNING, "ModelManager") << "Failed to fetch metadata for " << repo_id << ": " << e.what() << std::endl; + } + } + } + + LOG(INFO, "ModelManager") << "Found " << result.size() << " unregistered models in HF cache" << std::endl; + return result; +} + std::string ModelManager::resolve_model_path(const ModelInfo& info, const std::string& type, const std::string& checkpoint) const { // Experience models are virtual bundles with no direct checkpoint to resolve if (info.recipe == "experience") { @@ -489,8 +682,13 @@ std::string ModelManager::resolve_model_path(const ModelInfo& info, const std::s return all_bin_files[0]; } - // For llamacpp, find the GGUF file with advanced sharded model support - if (info.recipe == "llamacpp" && type == "main") { + // For GGUF-based backends, find the GGUF file with advanced sharded model support + // Skip this resolver if the variant is explicitly a non-GGUF file (e.g. safetensors) + bool variant_is_non_gguf = !variant.empty() && ( + variant.find(".safetensors") != std::string::npos || + variant.find(".onnx") != std::string::npos || + variant.find(".bin") != std::string::npos); + if ((info.recipe == "llamacpp" || info.recipe == "sd-cpp") && type == "main" && !variant_is_non_gguf) { if (!fs::exists(model_cache_path_fs)) { return model_cache_path; // Return directory path even if not found } @@ -1019,6 +1217,29 @@ void ModelManager::add_model_to_cache(const std::string& model_name) { } } + // Compute size from resolved path if not already set + if (info.size <= 0.0 && info.downloaded && !info.resolved_path().empty()) { + try { + fs::path resolved(info.resolved_path()); + // For sharded models, sum all model files in the snapshot directory + // (resolved_path points to just the first shard) + fs::path snapshot_dir = fs::is_directory(resolved) ? resolved : resolved.parent_path(); + double total = 0.0; + for (const auto& entry : fs::directory_iterator(snapshot_dir)) { + if (entry.is_regular_file()) { + std::string ext = entry.path().extension().string(); + // Only count model files, not metadata + if (ext == ".gguf" || ext == ".bin") { + total += static_cast(entry.file_size()); + } + } + } + if (total > 0.0) { + info.size = total / (1024.0 * 1024.0 * 1024.0); + } + } catch (...) {} + } + models_cache_[model_name] = info; LOG(INFO, "ModelManager") << "Added '" << model_name << "' to cache (downloaded=" << info.downloaded << ")" << std::endl; } @@ -1053,6 +1274,25 @@ void ModelManager::update_model_in_cache(const std::string& model_name, bool dow // The path changes now that files exist on disk if (downloaded) { resolve_all_model_paths(it->second); + // Compute size if not already set + if (it->second.size <= 0.0 && !it->second.resolved_path().empty()) { + try { + fs::path resolved(it->second.resolved_path()); + fs::path snapshot_dir = fs::is_directory(resolved) ? resolved : resolved.parent_path(); + double total = 0.0; + for (const auto& entry : fs::directory_iterator(snapshot_dir)) { + if (entry.is_regular_file()) { + std::string ext = entry.path().extension().string(); + if (ext == ".gguf" || ext == ".bin") { + total += static_cast(entry.file_size()); + } + } + } + if (total > 0.0) { + it->second.size = total / (1024.0 * 1024.0 * 1024.0); + } + } catch (...) {} + } LOG(INFO, "ModelManager") << "Updated '" << model_name << "' downloaded=" << downloaded << ", resolved_path=" << it->second.resolved_path() << std::endl; @@ -1727,6 +1967,13 @@ void ModelManager::download_model(const std::string& model_name, // Register user models to user_models.json if (model_name.substr(0, 5) == "user." && !model_registered) { register_user_model(model_name, model_data); + + // After registration, check if model files already exist in HF cache + // (e.g. user is registering a model they already downloaded outside Lemonade) + if (is_model_downloaded(model_name)) { + LOG(INFO, "ModelManager") << "Model already exists in cache, skipping download" << std::endl; + return; + } } auto model_info = get_model_info(model_name); @@ -2352,7 +2599,7 @@ void ModelManager::download_from_flm(const std::string& checkpoint, LOG(INFO, "ModelManager") << "FLM model pull completed successfully" << std::endl; } -void ModelManager::delete_model(const std::string& model_name) { +void ModelManager::delete_model(const std::string& model_name, bool keep_files) { auto info = get_model_info(model_name); LOG(INFO, "ModelManager") << "Deleting model: " << model_name << std::endl; @@ -2458,13 +2705,17 @@ void ModelManager::delete_model(const std::string& model_name) { LOG(INFO, "ModelManager") << "Cache path: " << model_cache_path << std::endl; - fs::path model_cache_path_fs = path_from_utf8(model_cache_path); - if (fs::exists(model_cache_path_fs)) { - LOG(INFO, "ModelManager") << "Removing directory..." << std::endl; - fs::remove_all(model_cache_path_fs); - LOG(INFO, "ModelManager") << "✓ Deleted model files: " << model_name << std::endl; + if (keep_files) { + LOG(INFO, "ModelManager") << "Keeping files in HF cache (unregister only)" << std::endl; } else { - LOG(INFO, "ModelManager") << "Warning: Model cache directory not found (may already be deleted)" << std::endl; + fs::path model_cache_path_fs = path_from_utf8(model_cache_path); + if (fs::exists(model_cache_path_fs)) { + LOG(INFO, "ModelManager") << "Removing directory..." << std::endl; + fs::remove_all(model_cache_path_fs); + LOG(INFO, "ModelManager") << "✓ Deleted model files: " << model_name << std::endl; + } else { + LOG(INFO, "ModelManager") << "Warning: Model cache directory not found (may already be deleted)" << std::endl; + } } // Remove from user models if it's a user model diff --git a/src/cpp/server/server.cpp b/src/cpp/server/server.cpp index 4c8d837d4..6907d0645 100644 --- a/src/cpp/server/server.cpp +++ b/src/cpp/server/server.cpp @@ -326,6 +326,83 @@ void Server::setup_routes(httplib::Server &web_server) { handle_system_stats(req, res); }); + register_get("cache/models", [this](const httplib::Request&, httplib::Response& res) { + auto models = model_manager_->discover_hf_cache_models(); + res.set_content(models.dump(), "application/json"); + }); + + // HuggingFace search proxy — uses server-side HF_TOKEN for higher rate limits + register_get("hf/search", [](const httplib::Request& req, httplib::Response& res) { + std::string url; + if (req.has_param("next_url")) { + // Use the full cursor URL directly (for pagination) + url = req.get_param_value("next_url"); + } else { + // Build HF API URL from query params + url = "https://huggingface.co/api/models?"; + bool first = true; + for (const auto& param : {"search", "filter", "limit", "sort", "direction", "pipeline_tag", "author"}) { + if (req.has_param(param)) { + if (!first) url += "&"; + url += std::string(param) + "=" + httplib::encode_query_component(req.get_param_value(param)); + first = false; + } + } + } + + // Add HF_TOKEN if available + std::map headers; + const char* hf_token = std::getenv("HF_TOKEN"); + if (hf_token) { + headers["Authorization"] = "Bearer " + std::string(hf_token); + } + + auto hf_res = lemon::utils::HttpClient::get(url, headers); + + // Build response with rate limit info + nlohmann::json response; + response["status"] = hf_res.status_code; + + if (hf_res.status_code == 200) { + response["data"] = nlohmann::json::parse(hf_res.body, nullptr, false); + } else if (hf_res.status_code == 429) { + response["data"] = nlohmann::json::array(); + } else { + response["data"] = nlohmann::json::array(); + } + + // Parse Link header for next cursor (headers stored lowercase) + auto link_it = hf_res.headers.find("link"); + if (link_it != hf_res.headers.end()) { + std::string link = link_it->second; + auto pos = link.find("<"); + auto end = link.find(">"); + if (pos != std::string::npos && end != std::string::npos) { + response["next_cursor"] = link.substr(pos + 1, end - pos - 1); + } + } + + // Parse RateLimit header (headers stored lowercase) + auto rl_it = hf_res.headers.find("ratelimit"); + if (rl_it != hf_res.headers.end()) { + response["rate_limit_header"] = rl_it->second; + // Parse remaining and time-to-reset: format is "api;r=123;t=45" + std::string rl = rl_it->second; + auto r_pos = rl.find("r="); + auto t_pos = rl.find("t="); + if (r_pos != std::string::npos) { + try { response["rate_limit_remaining"] = std::stoi(rl.substr(r_pos + 2)); } catch (...) {} + } + if (t_pos != std::string::npos) { + try { response["rate_limit_reset"] = std::stoi(rl.substr(t_pos + 2)); } catch (...) {} + } + } + + response["authenticated"] = (hf_token != nullptr); + + res.set_content(response.dump(), "application/json"); + }); + register_post("log-level", [this](const httplib::Request& req, httplib::Response& res) { handle_log_level(req, res); }); @@ -2618,7 +2695,10 @@ void Server::handle_delete(const httplib::Request& req, httplib::Response& res) request_json["model"].get() : request_json["model_name"].get(); - LOG(INFO, "Server") << "Deleting model: " << model_name << std::endl; + bool keep_files = request_json.value("keep_files", false); + + LOG(INFO, "Server") << "Deleting model: " << model_name + << (keep_files ? " (keeping files)" : "") << std::endl; // If the model is currently loaded, unload it first to release file locks if (router_->is_model_loaded(model_name)) { @@ -2635,7 +2715,7 @@ void Server::handle_delete(const httplib::Request& req, httplib::Response& res) for (int attempt = 0; attempt <= max_retries; ++attempt) { try { - model_manager_->delete_model(model_name); + model_manager_->delete_model(model_name, keep_files); // Success - send response and return nlohmann::json response = { diff --git a/src/cpp/server/utils/http_client.cpp b/src/cpp/server/utils/http_client.cpp index f0f3faa0f..bef78aeb7 100644 --- a/src/cpp/server/utils/http_client.cpp +++ b/src/cpp/server/utils/http_client.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -57,6 +58,26 @@ static int progress_callback(void* clientp, curl_off_t dltotal, curl_off_t dlnow return 0; // Continue transfer } +// Header callback for capturing response headers +static size_t header_write_callback(char* buffer, size_t size, size_t nitems, void* userdata) { + size_t total = size * nitems; + auto* resp_headers = static_cast*>(userdata); + std::string line(buffer, total); + // Trim trailing whitespace/newlines + while (!line.empty() && (line.back() == '\r' || line.back() == '\n')) line.pop_back(); + auto colon = line.find(':'); + if (colon != std::string::npos) { + std::string key = line.substr(0, colon); + std::string value = line.substr(colon + 1); + // Trim leading whitespace from value + while (!value.empty() && value.front() == ' ') value.erase(value.begin()); + // Store with lowercase key for case-insensitive lookup + std::transform(key.begin(), key.end(), key.begin(), ::tolower); + resp_headers->emplace(key, value); + } + return total; +} + HttpResponse HttpClient::get(const std::string& url, const std::map& headers) { CURL* curl = curl_easy_init(); @@ -70,6 +91,8 @@ HttpResponse HttpClient::get(const std::string& url, curl_easy_setopt(curl, CURLOPT_URL, url.c_str()); curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_callback); curl_easy_setopt(curl, CURLOPT_WRITEDATA, &response_body); + curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, header_write_callback); + curl_easy_setopt(curl, CURLOPT_HEADERDATA, &response.headers); curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L); curl_easy_setopt(curl, CURLOPT_TIMEOUT, default_timeout_seconds_); curl_easy_setopt(curl, CURLOPT_USERAGENT, "lemon.cpp/1.0"); diff --git a/test/hf_model_tags.py b/test/hf_model_tags.py new file mode 100644 index 000000000..8e2e8356d --- /dev/null +++ b/test/hf_model_tags.py @@ -0,0 +1,558 @@ +#!/usr/bin/env python3 +""" +Show pipeline_tag and tags for HuggingFace models. + +Strips quant/file specifiers (e.g. "org/model-GGUF:Q4_K_M" → "org/model-GGUF") +and deduplicates checkpoints when scanning model registries. + +Tracks HF API rate limits via response headers and waits automatically. +Set HF_TOKEN env var for higher rate limits (1000 vs 500 req/5min). + +Usage: + # Single model + python test/hf_model_tags.py HauhauCS/Qwen3.5-35B-A3B-Uncensored-HauhauCS-Aggressive + + # All checkpoints from server_models.json + python test/hf_model_tags.py --all + + # Single recipe from server_models.json + python test/hf_model_tags.py --llamacpp + python test/hf_model_tags.py --sd-cpp + python test/hf_model_tags.py --whispercpp + python test/hf_model_tags.py --kokoro + python test/hf_model_tags.py --ryzenai-llm + python test/hf_model_tags.py --experience + + # Include user_models.json (from LEMONADE_CACHE_DIR or ~/.cache/lemonade/) + python test/hf_model_tags.py --all --user + python test/hf_model_tags.py --llamacpp --user + python test/hf_model_tags.py --all --user --user-models-path /path/to/user_models.json + + # Tag summary per recipe (inclusive list of all tags seen for each recipe) + python test/hf_model_tags.py --all --summary + python test/hf_model_tags.py --llamacpp --summary + + # Full detection (file extensions, name patterns, classifyModel result) + python test/hf_model_tags.py --detect some-org/some-model + python test/hf_model_tags.py --llamacpp --detect + + # Combine freely + python test/hf_model_tags.py --llamacpp --kokoro --user --summary --detect some-org/some-model +""" + +import argparse +import json +import os +import re +import sys +import time +from collections import defaultdict +from pathlib import Path + +import requests + +MODELS_JSON = Path(__file__).resolve().parent.parent / "src/cpp/resources/server_models.json" + +ALL_RECIPES = ["llamacpp", "sd-cpp", "whispercpp", "kokoro", "flm", "ryzenai-llm", "experience"] + +# Format tags — file serialization formats +FORMAT_TAGS = {"gguf", "onnx", "safetensors", "bin", "flm", "q4nx"} + +# Task/pipeline tags — what the model does (mirrors recipeCompatibility.ts) +TASK_TAGS = { + # LLM + "text-generation", "conversational", "text2text-generation", "image-text-to-text", + # Image + "text-to-image", "image-to-image", "image-to-video", "image-to-3d", + "image-text-to-image", "image-text-to-video", "unconditional-image-generation", + "image-segmentation", "object-detection", "depth-estimation", "mask-generation", + "zero-shot-object-detection", + # Audio + "automatic-speech-recognition", "text-to-speech", "audio-text-to-text", + "text-to-audio", "audio-to-audio", "voice-activity-detection", + # Video + "text-to-video", "text-to-3d", "video-to-video", + # Embedding/reranking + "sentence-similarity", "feature-extraction", "text-ranking", + # Other NLP + "fill-mask", "question-answering", "summarization", "translation", + "text-classification", "token-classification", "zero-shot-classification", + "table-question-answering", +} + +# Library/framework tags +LIBRARY_TAGS = { + "transformers", "transformers.js", "diffusers", "sentence-transformers", + "onnxruntime", "pytorch", "tensorflow", "jax", "flax", "keras", + "llama.cpp", "ctranslate2", "mlx", "vllm", "openvino", "coreml", + "tensorrt", "tflite", "rust", "paddlepaddle", "spacy", "fastai", + "flair", "adapter-transformers", "timm", "open_clip", +} + +# Prefixed tags we filter into their own buckets +PREFIXED_CATEGORIES = ("license:", "arxiv:", "base_model:", "region:", "doi:", "dataset:") + +# --------------------------------------------------------------------------- +# Mirrors recipeCompatibility.ts — TASK_RECIPE_MAP +# --------------------------------------------------------------------------- +TASK_RECIPE_MAP = [ + { + "pipelineTags": ["text-to-image", "image-to-image"], + "hfTags": ["stable-diffusion", "text-to-image", "diffusers", "image-generation", "image-editing"], + "namePatterns": [re.compile(r"stable-diffusion", re.I), re.compile(r"\bflux\b", re.I), re.compile(r"\bsdxl\b", re.I)], + "recipe": "sd-cpp", + "modelType": "image", + "label": "sd.cpp", + }, + { + "pipelineTags": ["automatic-speech-recognition"], + "hfTags": ["whisper"], + "namePatterns": [re.compile(r"whisper", re.I)], + "recipe": "whispercpp", + "modelType": "audio", + "label": "whisper.cpp", + }, + { + "pipelineTags": ["text-to-speech", "text-to-audio"], + "hfTags": ["tts", "kokoro"], + "namePatterns": [re.compile(r"kokoro", re.I)], + "recipe": "kokoro", + "modelType": "tts", + "label": "Kokoro", + }, + { + "pipelineTags": ["sentence-similarity", "feature-extraction"], + "hfTags": ["sentence-transformers", "nomic-embed", "embedding", "embeddings"], + "namePatterns": [re.compile(r"embed", re.I), re.compile(r"nomic", re.I)], + "recipe": "llamacpp", + "modelType": "embedding", + "label": "llama.cpp", + }, + { + "pipelineTags": ["text-ranking"], + "hfTags": ["reranker", "cross-encoder", "reranking"], + "namePatterns": [re.compile(r"rerank", re.I)], + "recipe": "llamacpp", + "modelType": "reranking", + "label": "llama.cpp", + }, +] + +LLM_PIPELINE_TAGS = ["text-generation", "conversational", "text2text-generation", "image-text-to-text"] + +RECIPE_FORMATS = { + "llamacpp": ["gguf"], + "sd-cpp": ["safetensors"], + "whispercpp": ["bin"], + "kokoro": ["onnx"], + "flm": ["flm"], + "ryzenai-llm": ["onnx"], +} + +# Rate limit tracking +_rate_limit_remaining = None +_rate_limit_reset = None + + +# --------------------------------------------------------------------------- +# Detection logic — mirrors detectBackend() + classifyModel() from ModelManager.tsx +# --------------------------------------------------------------------------- + +def scan_file_extensions(siblings: list[dict]) -> dict: + """Scan siblings file list for format-relevant extensions.""" + files = [s.get("rfilename", "").lower() for s in siblings] + return { + "gguf": [f for f in files if f.endswith(".gguf")], + "onnx": [f for f in files if f.endswith(".onnx") or f.endswith(".onnx_data")], + "safetensors": [f for f in files if f.endswith(".safetensors")], + "bin": [f for f in files if f.endswith(".bin")], + "flm": [f for f in files if f.endswith(".flm")], + } + + +def check_name_patterns(model_id: str) -> list[dict]: + """Check model ID against TASK_RECIPE_MAP name patterns. Returns matching mappings.""" + id_lower = model_id.lower() + matches = [] + for mapping in TASK_RECIPE_MAP: + for pat in mapping["namePatterns"]: + if pat.search(id_lower): + matches.append({"recipe": mapping["recipe"], "label": mapping["label"], + "modelType": mapping["modelType"], "pattern": pat.pattern}) + break + return matches + + +def has_required_format(recipe: str, tags: list[str], ext_scan: dict) -> bool: + """Check format gate — tags first, file extension fallback.""" + formats = RECIPE_FORMATS.get(recipe) + if not formats: + return True + for fmt in formats: + if fmt in tags: + return True + if ext_scan.get(fmt): + return True + return False + + +def classify_model(model_id: str, pipeline_tag: str | None, tags: list[str], ext_scan: dict) -> dict: + """ + Python port of classifyModel() from recipeCompatibility.ts. + Returns {recipe, modelType, label, level, reason, source}. + source indicates which pass matched: 'pipeline_tag', 'hf_tags', 'name_pattern', + 'format_fallback', or 'none'. + """ + id_lower = model_id.lower() + tag_set = set(tags) + + has_gguf = bool(ext_scan["gguf"]) or "gguf" in tag_set + has_onnx = bool(ext_scan["onnx"]) or "onnx" in tag_set + has_flm = bool(ext_scan["flm"]) or "flm" in tag_set + has_bin = bool(ext_scan["bin"]) + + # --- Pass 1: pipeline_tag --- + if pipeline_tag: + for m in TASK_RECIPE_MAP: + if pipeline_tag in m["pipelineTags"] and has_required_format(m["recipe"], tags, ext_scan): + return {"recipe": m["recipe"], "modelType": m["modelType"], "label": m["label"], + "level": "supported", "reason": f'pipeline_tag "{pipeline_tag}" → {m["label"]}', + "source": "pipeline_tag"} + + if pipeline_tag in LLM_PIPELINE_TAGS: + if has_gguf: + return {"recipe": "llamacpp", "modelType": "llm", "label": "llama.cpp", + "level": "supported", "reason": f'pipeline_tag "{pipeline_tag}" + GGUF', + "source": "pipeline_tag"} + if has_onnx: + return {"recipe": "ryzenai-llm", "modelType": "llm", "label": "RyzenAI", + "level": "likely", "reason": f'pipeline_tag "{pipeline_tag}" + ONNX', + "source": "pipeline_tag"} + + if pipeline_tag not in LLM_PIPELINE_TAGS: + return {"recipe": "", "modelType": "unknown", "label": pipeline_tag, + "level": "incompatible", "reason": f'pipeline_tag "{pipeline_tag}" unsupported', + "source": "pipeline_tag"} + + # --- Pass 2: HF tags --- + for m in TASK_RECIPE_MAP: + if any(t in tag_set for t in m["hfTags"]) and has_required_format(m["recipe"], tags, ext_scan): + matched = [t for t in m["hfTags"] if t in tag_set] + return {"recipe": m["recipe"], "modelType": m["modelType"], "label": m["label"], + "level": "likely", "reason": f"hf_tags [{', '.join(matched)}] → {m['label']}", + "source": "hf_tags"} + + # --- Pass 3: Name patterns --- + for m in TASK_RECIPE_MAP: + for pat in m["namePatterns"]: + if pat.search(id_lower) and has_required_format(m["recipe"], tags, ext_scan): + return {"recipe": m["recipe"], "modelType": m["modelType"], "label": m["label"], + "level": "experimental", "reason": f"name /{pat.pattern}/ → {m['label']}", + "source": "name_pattern"} + + # --- Pass 4: Format-only fallbacks --- + if has_flm or id_lower.startswith("fastflowlm/") or "flm" in tag_set: + return {"recipe": "flm", "modelType": "llm", "label": "FastFlowLM", + "level": "likely", "reason": "FLM files or tags", + "source": "format_fallback"} + + if has_onnx: + label = "RyzenAI" + if "npu" in tag_set or "-ryzenai-npu" in id_lower: + label = "RyzenAI NPU" + elif "hybrid" in tag_set or "-ryzenai-hybrid" in id_lower: + label = "RyzenAI Hybrid" + elif "igpu" in tag_set: + label = "RyzenAI iGPU" + return {"recipe": "ryzenai-llm", "modelType": "llm", "label": label, + "level": "likely", "reason": "ONNX files detected", + "source": "format_fallback"} + + if has_gguf: + return {"recipe": "llamacpp", "modelType": "llm", "label": "llama.cpp", + "level": "experimental", "reason": "GGUF present, no task metadata", + "source": "format_fallback"} + + return {"recipe": "", "modelType": "unknown", "label": "Unknown", + "level": "incompatible", "reason": "No compatible format or metadata", + "source": "none"} + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def get_user_models_path(override: str = None) -> Path: + if override: + return Path(override) + cache_dir = os.environ.get("LEMONADE_CACHE_DIR") + if cache_dir: + return Path(cache_dir) / "user_models.json" + return Path.home() / ".cache" / "lemonade" / "user_models.json" + + +def strip_quant_specifier(checkpoint: str) -> str: + """Strip quant/file specifiers after ':' (e.g. 'org/model-GGUF:Q4_K_M' → 'org/model-GGUF').""" + return checkpoint.split(":")[0] if ":" in checkpoint else checkpoint + + +def classify_tags(meta: dict) -> dict: + """Classify a model's tags into categories.""" + pipeline_tag = meta.get("pipeline_tag") + tags = meta.get("tags", []) + tag_set = set(tags) + + formats = sorted(FORMAT_TAGS & tag_set) + tasks = sorted(TASK_TAGS & tag_set) + libraries = sorted(LIBRARY_TAGS & tag_set) + known = FORMAT_TAGS | TASK_TAGS | LIBRARY_TAGS + other = [t for t in tags if t not in known and not t.startswith(PREFIXED_CATEGORIES)] + + return { + "pipeline_tag": pipeline_tag, + "formats": formats, + "tasks": tasks, + "libraries": libraries, + "other": other, + } + + +def _hf_get(url: str) -> requests.Response | None: + """GET with rate limiting and HF_TOKEN.""" + global _rate_limit_remaining, _rate_limit_reset + + if _rate_limit_remaining is not None and _rate_limit_remaining <= 1: + wait = (_rate_limit_reset or 60) + 1 + print(f" [rate limited — waiting {wait}s]", flush=True) + time.sleep(wait) + + headers = {} + hf_token = os.environ.get("HF_TOKEN") + if hf_token: + headers["Authorization"] = f"Bearer {hf_token}" + + try: + r = requests.get(url, headers=headers, timeout=15) + + if "X-RateLimit-Remaining" in r.headers: + _rate_limit_remaining = int(r.headers["X-RateLimit-Remaining"]) + if "X-RateLimit-Reset" in r.headers: + try: + reset_time = int(r.headers["X-RateLimit-Reset"]) + _rate_limit_reset = max(0, reset_time - int(time.time())) + except ValueError: + pass + + if r.status_code == 429: + retry_after = int(r.headers.get("Retry-After", 60)) + print(f" [429 rate limited — waiting {retry_after}s]", flush=True) + time.sleep(retry_after + 1) + return _hf_get(url) + + return r + except requests.RequestException as e: + print(f" ERROR: {e}") + return None + + +def fetch_model_meta(model_id: str) -> dict | None: + api_id = strip_quant_specifier(model_id) + r = _hf_get(f"https://huggingface.co/api/models/{api_id}") + if r is None: + return None + if r.status_code == 404: + print(f" NOT FOUND: {model_id}") + return None + try: + r.raise_for_status() + except requests.HTTPError as e: + print(f" ERROR fetching {model_id}: {e}") + return None + return r.json() + + +def fetch_siblings(model_id: str) -> list[dict] | None: + """Fetch the file tree for a model (siblings list from model metadata).""" + meta = fetch_model_meta(model_id) + if not meta: + return None + return meta.get("siblings", []) + + +# --------------------------------------------------------------------------- +# Output +# --------------------------------------------------------------------------- + +def print_model(model_id: str, meta: dict, recipe: str = None, detect: bool = False): + c = classify_tags(meta) + + prefix = f"[{recipe}] " if recipe else "" + print(f"\n{prefix}{model_id}") + print(f" pipeline_tag: {c['pipeline_tag'] or '(none)'}") + print(f" formats: {', '.join(c['formats']) if c['formats'] else '(none)'}") + print(f" tasks: {', '.join(c['tasks']) if c['tasks'] else '(none)'}") + print(f" libraries: {', '.join(c['libraries']) if c['libraries'] else '(none)'}") + if c["other"]: + print(f" other: {', '.join(c['other'])}") + + if detect: + siblings = meta.get("siblings", []) + ext_scan = scan_file_extensions(siblings) + ext_summary = {fmt: len(files) for fmt, files in ext_scan.items() if files} + print(f" extensions: {ext_summary if ext_summary else '(none)'}") + + name_matches = check_name_patterns(model_id) + if name_matches: + parts = [f"{m['recipe']} (/{m['pattern']}/)" for m in name_matches] + print(f" name match: {', '.join(parts)}") + else: + print(f" name match: (none)") + + tags = meta.get("tags", []) + result = classify_model(model_id, meta.get("pipeline_tag"), tags, ext_scan) + source = result["source"] + level = result["level"] + print(f" classify: {result['recipe'] or '(none)'} / {result['modelType']}" + f" [{level}] via {source} — {result['reason']}") + + +def print_summary(summary: dict): + """Print inclusive tag summary per recipe.""" + print(f"\n{'=' * 60}") + print("TAG SUMMARY BY RECIPE") + print(f"{'=' * 60}") + + for recipe in sorted(summary.keys()): + data = summary[recipe] + count = data["count"] + print(f"\n[{recipe}] ({count} model{'s' if count != 1 else ''})") + + for category in ("pipeline_tags", "formats", "tasks", "libraries", "other"): + tags = sorted(data[category]) + if not tags: + tags = ["(none)"] + label = category.replace("_", " ").rjust(14) + if data[f"{category}_none_count"] > 0 and tags[0] != "(none)": + tags.insert(0, f"(none)×{data[f'{category}_none_count']}") + print(f" {label}: {', '.join(tags)}") + + +def load_checkpoints(registry: dict, recipe_filter: set | None) -> dict: + """Load and deduplicate checkpoints from a model registry.""" + seen = {} + for _name, entry in registry.items(): + recipe = entry.get("recipe", "?") + if recipe_filter and recipe not in recipe_filter: + continue + cp = entry.get("checkpoint", "") + cp_base = strip_quant_specifier(cp) + if cp_base and cp_base not in seen: + seen[cp_base] = recipe + return seen + + +def main(): + parser = argparse.ArgumentParser( + description="Show HF model pipeline_tag and tags", + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + parser.add_argument("models", nargs="*", help="HuggingFace model IDs (org/name)") + parser.add_argument("--all", action="store_true", help="All recipes from server_models.json") + parser.add_argument("--user", action="store_true", help="Include user_models.json") + parser.add_argument("--user-models-path", type=str, default=None, + help="Override user_models.json path (default: LEMONADE_CACHE_DIR or ~/.cache/lemonade/)") + parser.add_argument("--summary", action="store_true", help="Print inclusive tag summary per recipe") + parser.add_argument("--detect", action="store_true", + help="Show file extension scan, name pattern matches, and classifyModel result") + + # Per-recipe flags + for recipe in ALL_RECIPES: + parser.add_argument(f"--{recipe}", action="store_true", help=f"Only {recipe} models from server_models.json") + + args = parser.parse_args() + + # Determine which recipes are selected + selected_recipes = {r for r in ALL_RECIPES if getattr(args, r.replace("-", "_"), False)} + use_registry = args.all or bool(selected_recipes) + recipe_filter = selected_recipes if selected_recipes else None # None = all + + if not args.models and not use_registry and not args.user: + parser.print_help() + sys.exit(1) + + # Collect results for summary mode + summary = defaultdict(lambda: { + "count": 0, + "pipeline_tags": set(), "pipeline_tags_none_count": 0, + "formats": set(), "formats_none_count": 0, + "tasks": set(), "tasks_none_count": 0, + "libraries": set(), "libraries_none_count": 0, + "other": set(), "other_none_count": 0, + }) + + def process_model(model_id: str, recipe: str = None): + meta = fetch_model_meta(model_id) + if not meta: + return + print_model(model_id, meta, recipe=recipe, detect=args.detect) + + if args.summary and recipe: + c = classify_tags(meta) + s = summary[recipe] + s["count"] += 1 + if c["pipeline_tag"]: + s["pipeline_tags"].add(c["pipeline_tag"]) + else: + s["pipeline_tags_none_count"] += 1 + for cat in ("formats", "tasks", "libraries", "other"): + if c[cat]: + s[cat].update(c[cat]) + else: + s[f"{cat}_none_count"] += 1 + + # Named models first + for model_id in args.models: + process_model(model_id) + + # server_models.json checkpoints + if use_registry: + with open(MODELS_JSON) as f: + registry = json.load(f) + + checkpoints = load_checkpoints(registry, recipe_filter) + + label = "server_models.json" + if recipe_filter: + label += f" [{', '.join(sorted(recipe_filter))}]" + print(f"\n{'=' * 60}") + print(f"{label}: {len(checkpoints)} unique checkpoints") + print(f"{'=' * 60}") + + for checkpoint, recipe in sorted(checkpoints.items()): + process_model(checkpoint, recipe=recipe) + + # user_models.json + if args.user: + user_path = get_user_models_path(args.user_models_path) + if user_path.exists(): + with open(user_path) as f: + user_registry = json.load(f) + + checkpoints = load_checkpoints(user_registry, recipe_filter) + + print(f"\n{'=' * 60}") + print(f"user_models.json: {len(checkpoints)} unique checkpoints") + print(f"{'=' * 60}") + + for checkpoint, recipe in sorted(checkpoints.items()): + process_model(checkpoint, recipe=recipe) + else: + print(f"\n user_models.json not found at {user_path}") + + # Print summary if requested + if args.summary and summary: + print_summary(summary) + + +if __name__ == "__main__": + main()