diff --git a/src/app/package-lock.json b/src/app/package-lock.json
index 35a7694f1..b731302a6 100644
--- a/src/app/package-lock.json
+++ b/src/app/package-lock.json
@@ -4379,6 +4379,17 @@
         "node": ">= 0.8"
       }
     },
+    "node_modules/encoding": {
+      "version": "0.1.13",
+      "resolved": "https://registry.npmjs.org/encoding/-/encoding-0.1.13.tgz",
+      "integrity": "sha512-ETBauow1T35Y/WZMkio9jiM0Z5xjHHmJ4XmjZOq1l/dXz3lr2sRn87nJy20RupqSh1F2m3HHPSp8ShIPQJrJ3A==",
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "dependencies": {
+        "iconv-lite": "^0.6.2"
+      }
+    },
     "node_modules/end-of-stream": {
       "version": "1.4.5",
       "resolved": "https://registry.npmjs.org/end-of-stream/-/end-of-stream-1.4.5.tgz",
diff --git a/src/app/src/renderer/AddModelPanel.tsx b/src/app/src/renderer/AddModelPanel.tsx
index 86db8e656..e67571d7e 100644
--- a/src/app/src/renderer/AddModelPanel.tsx
+++ b/src/app/src/renderer/AddModelPanel.tsx
@@ -1,5 +1,6 @@
-import React, { useState, useEffect } from 'react';
+import React, { useState, useEffect, useMemo } from 'react';
 import { useSystem } from './hooks/useSystem';
+import { TASK_RECIPE_MAP } from './utils/recipeCompatibility';
 
 export interface AddModelInitialValues {
   name: string;
@@ -58,12 +59,32 @@ const AddModelPanel: React.FC<AddModelPanelProps> = ({ onClose, onInstall, initi
   useEffect(() => {
     const newForm = createEmptyForm(initialValues);
     if (initialValues?.mmprojOptions && initialValues.mmprojOptions.length > 0) {
-      newForm.mmproj = initialValues.mmprojOptions[0];
+      // Prefer BF16 > F16 > F32 > first available
+      const priority = [/bf16/i, /f16/i, /f32/i];
+      let best = initialValues.mmprojOptions[0];
+      for (const pattern of priority) {
+        const match = initialValues.mmprojOptions.find(f => pattern.test(f));
+        if (match) { best = match; break; }
+      }
+      newForm.mmproj = best;
     }
     setForm(newForm);
     setError(null);
   }, [initialValues]);
 
+  // Detect recipe/name mismatch — warn when checkpoint suggests a different modality
+  const recipeMismatchWarning = useMemo(() => {
+    const checkpoint = form.checkpoint.toLowerCase();
+    const name = form.name.toLowerCase();
+    const combined = `${checkpoint} ${name}`;
+    for (const mapping of TASK_RECIPE_MAP) {
+      if (mapping.namePatterns.some(p => p.test(combined)) && form.recipe !== mapping.recipe) {
+        return `This looks like a ${mapping.label} model. The selected recipe (${RECIPE_LABELS[form.recipe] ?? form.recipe}) may not be compatible.`;
+      }
+    }
+    return null;
+  }, [form.checkpoint, form.name, form.recipe]);
+
   const handleChange = (field: string, value: string | boolean) => {
     setForm(prev => ({ ...prev, [field]: value }));
     setError(null);
@@ -252,6 +273,7 @@ const AddModelPanel: React.FC<AddModelPanelProps> = ({ onClose, onInstall, initi
           </div>
         </div>
 
+        {recipeMismatchWarning && <div className="form-warning">{recipeMismatchWarning}</div>}
         {error && <div className="form-error">{error}</div>}
       </div>
 
diff --git a/src/app/src/renderer/ConfirmDialog.tsx b/src/app/src/renderer/ConfirmDialog.tsx
index 768de755a..046cf6700 100644
--- a/src/app/src/renderer/ConfirmDialog.tsx
+++ b/src/app/src/renderer/ConfirmDialog.tsx
@@ -1,5 +1,10 @@
 import React, { useEffect, useRef, useState } from 'react';
 
+interface KeepFilesOption {
+  label: string;
+  defaultChecked?: boolean;
+}
+
 interface ConfirmDialogProps {
   isOpen: boolean;
   title: string;
@@ -7,6 +12,9 @@ interface ConfirmDialogProps {
   confirmText?: string;
   cancelText?: string;
   danger?: boolean;
+  keepFilesOption?: KeepFilesOption;
+  keepFiles?: boolean;
+  onKeepFilesChange?: (keepFiles: boolean) => void;
   onConfirm: () => void;
   onCancel: () => void;
 }
@@ -18,6 +26,9 @@ const ConfirmDialog: React.FC<ConfirmDialogProps> = ({
   confirmText = 'Confirm',
   cancelText = 'Cancel',
   danger = false,
+  keepFilesOption,
+  keepFiles,
+  onKeepFilesChange,
   onConfirm,
   onCancel
 }) => {
@@ -59,6 +70,16 @@ const ConfirmDialog: React.FC<ConfirmDialogProps> = ({
       <div className="confirm-dialog" ref={dialogRef}>
         <h3 className="confirm-dialog-title">{title}</h3>
         <p className="confirm-dialog-message">{message}</p>
+        {keepFilesOption && (
+          <label className="confirm-dialog-checkbox">
+            <input
+              type="checkbox"
+              checked={keepFiles ?? false}
+              onChange={(e) => onKeepFilesChange?.(e.target.checked)}
+            />
+            <span>{keepFilesOption.label}</span>
+          </label>
+        )}
         <div className="confirm-dialog-actions">
           <button
             className="confirm-dialog-btn confirm-dialog-btn-cancel"
@@ -86,6 +107,12 @@ interface ConfirmOptions {
   confirmText?: string;
   cancelText?: string;
   danger?: boolean;
+  keepFilesOption?: KeepFilesOption;
+}
+
+export interface ConfirmResult {
+  confirmed: boolean;
+  keepFiles: boolean;
 }
 
 export const useConfirmDialog = () => {
@@ -94,10 +121,12 @@ export const useConfirmDialog = () => {
     title: '',
     message: '',
   });
-  const resolveRef = useRef<((value: boolean) => void) | null>(null);
+  const resolveRef = useRef<((value: ConfirmResult) => void) | null>(null);
+  const [keepFiles, setKeepFiles] = useState(false);
 
-  const confirm = (opts: ConfirmOptions): Promise<boolean> => {
+  const confirm = (opts: ConfirmOptions): Promise<ConfirmResult> => {
     return new Promise((resolve) => {
+      setKeepFiles(opts.keepFilesOption?.defaultChecked ?? false);
       setOptions(opts);
       setIsOpen(true);
       resolveRef.current = resolve;
@@ -106,12 +135,12 @@ export const useConfirmDialog = () => {
 
   const handleConfirm = () => {
     setIsOpen(false);
-    resolveRef.current?.(true);
+    resolveRef.current?.({ confirmed: true, keepFiles });
   };
 
   const handleCancel = () => {
     setIsOpen(false);
-    resolveRef.current?.(false);
+    resolveRef.current?.({ confirmed: false, keepFiles: false });
   };
 
   const ConfirmDialogComponent = () => (
@@ -122,6 +151,9 @@ export const useConfirmDialog = () => {
       confirmText={options.confirmText}
       cancelText={options.cancelText}
       danger={options.danger}
+      keepFilesOption={options.keepFilesOption}
+      keepFiles={keepFiles}
+      onKeepFilesChange={(v) => setKeepFiles(v)}
       onConfirm={handleConfirm}
       onCancel={handleCancel}
     />
diff --git a/src/app/src/renderer/ModelManager.tsx b/src/app/src/renderer/ModelManager.tsx
index 4edace792..32dc18f9c 100644
--- a/src/app/src/renderer/ModelManager.tsx
+++ b/src/app/src/renderer/ModelManager.tsx
@@ -19,6 +19,7 @@ import MarketplacePanel, { MarketplaceCategory } from './MarketplacePanel';
 import { RECIPE_DISPLAY_NAMES } from './utils/recipeNames';
 import { EjectIcon } from './components/Icons';
 import { getExperienceComponents, isExperienceFullyDownloaded, isExperienceFullyLoaded, isExperienceModel, isModelEffectivelyDownloaded } from './utils/experienceModels';
+import { classifyModel, SUPPORTED_PIPELINE_TAGS } from './utils/recipeCompatibility';
 
 interface ModelFamily {
   displayName: string;
@@ -115,6 +116,7 @@ interface HFModelDetails {
   id: string;
   siblings: HFSibling[];
   tags: string[];
+  pipeline_tag?: string;
 }
 
 interface GGUFQuantization {
@@ -128,8 +130,27 @@ interface DetectedBackend {
   label: string;
   quantizations?: GGUFQuantization[];
   mmprojFiles?: string[];
+  compatibilityLevel?: import('./utils/recipeCompatibility').CompatibilityLevel;
+  compatibilityReason?: string;
+  modelType?: string;
+}
+
+interface CacheModelInfo {
+  repo_id: string;
+  has_gguf: boolean;
+  has_onnx: boolean;
+  has_bin: boolean;
+  size_gb?: number;
+  pipeline_tag?: string;
+  gguf_files?: { filename: string; size?: number }[];
+  bin_files?: { filename: string; size?: number }[];
+  mmproj_files?: string[];
 }
 
+type CacheListItem =
+  | { type: 'model'; cacheModel: CacheModelInfo }
+  | { type: 'provider-group'; provider: string; members: CacheModelInfo[] };
+
 function buildModelList(
   models: Array<{ name: string; info: ModelInfo }>
 ): ModelListItem[] {
@@ -204,6 +225,46 @@ const ModelManager: React.FC<ModelManagerProps> = ({ isContentVisible, onContent
   const [organizationMode, setOrganizationMode] = useState<'recipe' | 'category'>('recipe');
   const [showDownloadedOnly, setShowDownloadedOnly] = useState(false);
   const [showFilterPanel, setShowFilterPanel] = useState(false);
+  const [enabledRecipes, setEnabledRecipes] = useState<Set<string>>(new Set([
+    'llamacpp', 'sd-cpp', 'whispercpp', 'kokoro', 'flm', 'ryzenai-llm'
+  ]));
+  const [recipeDefaultsApplied, setRecipeDefaultsApplied] = useState(false);
+
+  // Compute best backend state per recipe: 'installed' > 'available' > 'unsupported'
+  // 'available' covers installable, update_required, action_required
+  const recipeStates = useMemo<Record<string, 'installed' | 'available' | 'unsupported'>>(() => {
+    const states: Record<string, 'installed' | 'available' | 'unsupported'> = {};
+    const recipes = systemInfo?.recipes;
+    if (!recipes) return states;
+    for (const [recipeName, recipe] of Object.entries(recipes)) {
+      const backends = recipe?.backends;
+      if (!backends) { states[recipeName] = 'unsupported'; continue; }
+      let best: 'installed' | 'available' | 'unsupported' = 'unsupported';
+      for (const backend of Object.values(backends)) {
+        if (backend?.state === 'installed') { best = 'installed'; break; }
+        if (backend?.state && backend.state !== 'unsupported') best = 'available';
+      }
+      states[recipeName] = best;
+    }
+    return states;
+  }, [systemInfo]);
+
+  // Default enabledRecipes to only recipes with a viable backend (not unsupported)
+  useEffect(() => {
+    if (recipeDefaultsApplied || Object.keys(recipeStates).length === 0) return;
+    const viable = new Set<string>();
+    for (const [recipe, state] of Object.entries(recipeStates)) {
+      if (state !== 'unsupported') viable.add(recipe);
+    }
+    if (viable.size > 0) {
+      setEnabledRecipes(viable);
+      setRecipeDefaultsApplied(true);
+    }
+  }, [recipeStates, recipeDefaultsApplied]);
+
+  const [showSuggestedSection, setShowSuggestedSection] = useState(true);
+  const [showCacheSection, setShowCacheSection] = useState(true);
+  const [showSearchSection, setShowSearchSection] = useState(true);
 const [searchQuery, setSearchQuery] = useState('');
   const [loadedModels, setLoadedModels] = useState<Set<string>>(new Set());
   const [loadingModels, setLoadingModels] = useState<Set<string>>(new Set());
@@ -223,6 +284,18 @@ const [searchQuery, setSearchQuery] = useState('');
   const [hfModelSizes, setHfModelSizes] = useState<Record<string, number | undefined>>({});
   const [detectingBackendFor, setDetectingBackendFor] = useState<string | null>(null);
   const hfSearchTimeoutRef = useRef<ReturnType<typeof setTimeout> | null>(null);
+  const [hfSearchPage, setHfSearchPage] = useState(0);
+  const [hfHasMoreResults, setHfHasMoreResults] = useState(false);
+  const [hfNextCursors, setHfNextCursors] = useState<(string | null)[]>([]); // one cursor per recipe query
+  const hfPageCursorsRef = useRef<Record<number, (string | null)[]>>({}); // page -> cursors array
+  const [hfRateLimitRemaining, setHfRateLimitRemaining] = useState<number | undefined>(undefined);
+  const [hfRateLimitReset, setHfRateLimitReset] = useState<number | undefined>(undefined);
+  const [hfAuthenticated, setHfAuthenticated] = useState(false);
+  const [hfSearchCompleted, setHfSearchCompleted] = useState(false);
+  // HF cache discovery state
+  const [hfCacheModels, setHfCacheModels] = useState<CacheModelInfo[]>([]);
+  const [expandedCacheProviders, setExpandedCacheProviders] = useState<Set<string>>(new Set());
+  const [cacheSelectedQuants, setCacheSelectedQuants] = useState<Record<string, string>>({});
 
 
   const { toasts, removeToast, showError, showSuccess, showWarning } = useToast();
@@ -261,6 +334,21 @@ const [searchQuery, setSearchQuery] = useState('');
     ensureSystemInfoLoaded();
   }, [ensureSystemInfoLoaded]);
 
+  // Fetch unregistered models from HF cache
+  const fetchHfCacheModels = useCallback(async () => {
+    try {
+      const res = await serverFetch('/cache/models');
+      if (res.ok) {
+        const data = await res.json();
+        setHfCacheModels(Array.isArray(data) ? data : []);
+      }
+    } catch { /* ignore */ }
+  }, []);
+
+  useEffect(() => {
+    fetchHfCacheModels();
+  }, [fetchHfCacheModels]);
+
   useEffect(() => {
     fetchCurrentLoadedModel();
 
@@ -343,17 +431,25 @@ const [searchQuery, setSearchQuery] = useState('');
   const getFilteredModels = () => {
     let filtered = suggestedModels;
 
+    // Filter by enabled recipes
+    filtered = filtered.filter(model => {
+      const recipe = modelsData[model.name]?.recipe;
+      if (!recipe) return true; // Show models with no recipe info
+      return enabledRecipes.has(recipe);
+    });
+
     // Filter by downloaded status
     if (showDownloadedOnly) {
       filtered = filtered.filter(model => modelsData[model.name]?.downloaded);
     }
 
-    // Filter by search query
+    // Filter by search query (all words must match independently)
     if (searchQuery.trim()) {
-      const query = searchQuery.toLowerCase();
-      filtered = filtered.filter(model =>
-        model.name.toLowerCase().includes(query)
-      );
+      const words = searchQuery.toLowerCase().split(/\s+/).filter(Boolean);
+      filtered = filtered.filter(model => {
+        const name = model.name.toLowerCase();
+        return words.every(w => name.includes(w));
+      });
     }
 
     return filtered;
@@ -418,7 +514,7 @@ const [searchQuery, setSearchQuery] = useState('');
 
   const groupedModels = useMemo(
     () => organizationMode === 'recipe' ? groupModelsByRecipe() : groupModelsByCategory(),
-    [suggestedModels, modelsData, organizationMode, showDownloadedOnly, searchQuery, systemInfo?.recipes]
+    [suggestedModels, modelsData, organizationMode, showDownloadedOnly, searchQuery, systemInfo?.recipes, enabledRecipes]
   );
   const availableModelCount = useMemo(
     () => Object.values(groupedModels).reduce((sum, arr) => sum + arr.length, 0),
@@ -518,6 +614,18 @@ const [searchQuery, setSearchQuery] = useState('');
     return labels[category] || category.charAt(0).toUpperCase() + category.slice(1);
   };
 
+  // Pick the best mmproj file: prefer BF16 > F16 > F32 > first available
+  const pickBestMmproj = (mmprojFiles: string[]): string | undefined => {
+    if (!mmprojFiles || mmprojFiles.length === 0) return undefined;
+    if (mmprojFiles.length === 1) return mmprojFiles[0];
+    const priority = [/bf16/i, /f16/i, /f32/i];
+    for (const pattern of priority) {
+      const match = mmprojFiles.find(f => pattern.test(f));
+      if (match) return match;
+    }
+    return mmprojFiles[0];
+  };
+
   const shouldShowCategory = (category: string): boolean => {
     return expandedCategories.has(category);
   };
@@ -542,57 +650,126 @@ const [searchQuery, setSearchQuery] = useState('');
     return String(downloads);
   };
 
-  const searchHuggingFace = useCallback(async (query: string) => {  //Searching the HF API for GGUF, ONNX, and FastFlowLM models
+  const HF_PAGE_SIZE = 12;
+
+  /**
+   * Build HF search queries per enabled recipe.
+   *
+   * | Backend     | Strategy                                              |
+   * |-------------|-------------------------------------------------------|
+   * | llamacpp    | filter=gguf + user query                              |
+   * | sd-cpp      | filter=safetensors,text-to-image + user query         |
+   * | kokoro      | filter=onnx,text-to-speech + user query               |
+   * | whispercpp  | author=ggerganov, pinned provider                     |
+   * | flm         | author=FastFlowLM, pinned provider                    |
+   * | ryzenai-llm | author=amd & filter=onnx                              |
+   */
+  const buildRecipeSearchUrls = useCallback((query: string): string[] => {
+    const q = encodeURIComponent(query);
+    const base = `&limit=${HF_PAGE_SIZE}&sort=downloads&direction=-1`;
+    const urls: string[] = [];
+
+    if (enabledRecipes.has('llamacpp'))
+      urls.push(`/hf/search?search=${q}&filter=gguf${base}`);
+    if (enabledRecipes.has('sd-cpp'))
+      urls.push(`/hf/search?search=${q}&filter=safetensors,text-to-image${base}`);
+    if (enabledRecipes.has('kokoro'))
+      urls.push(`/hf/search?search=${q}&filter=onnx,text-to-speech${base}`);
+    // Pinned providers — scoped to author, user query filters within
+    if (enabledRecipes.has('whispercpp'))
+      urls.push(`/hf/search?search=${q}&author=ggerganov&pipeline_tag=automatic-speech-recognition${base}`);
+    if (enabledRecipes.has('flm'))
+      urls.push(`/hf/search?search=${q}&author=FastFlowLM${base}`);
+    if (enabledRecipes.has('ryzenai-llm'))
+      urls.push(`/hf/search?search=${q}&author=amd&filter=onnx${base}`);
+
+    return urls;
+  }, [enabledRecipes]);
+
+  const searchHuggingFace = useCallback(async (query: string, cursors?: (string | null)[]) => {
     if (!query.trim() || query.length < 3) {
       setHfSearchResults([]);
       setHfRateLimited(false);
+      setHfHasMoreResults(false);
       return;
     }
     setIsSearchingHF(true);
     setHfRateLimited(false);
     try {
-      const encoded = encodeURIComponent(query);
-      const ggufRes = await fetch(`https://huggingface.co/api/models?search=${encoded}&filter=gguf&limit=12&sort=downloads&direction=-1`);
-      if (ggufRes.status === 429) {
-        setHfRateLimited(true);
+      // Build fetch URLs — either from cursors (pagination) or fresh per-recipe queries
+      let urls: (string | null)[];
+      if (cursors) {
+        // Pagination — use cursor URLs where available, null = no more results for that query
+        urls = cursors.map(c => c ? `/hf/search?next_url=${encodeURIComponent(c)}` : null);
+      } else {
+        // Initial search — build per-recipe query URLs
+        urls = buildRecipeSearchUrls(query);
+      }
+
+      // Filter to non-null URLs (skip exhausted cursors)
+      const activeIndices: number[] = [];
+      const activeUrls: string[] = [];
+      urls.forEach((url, i) => {
+        if (url) { activeIndices.push(i); activeUrls.push(url); }
+      });
+
+      if (activeUrls.length === 0) {
         setHfSearchResults([]);
+        setHfSearchCompleted(true);
+        setHfHasMoreResults(false);
+        setHfNextCursors([]);
         return;
       }
-      const ggufData: HFModelInfo[] = ggufRes.ok ? await ggufRes.json() : [];
-      const EXCLUDED_PIPELINE_TAGS = new Set([
-        // Audio input/output
-        'automatic-speech-recognition',
-        'text-to-speech',
-        'audio-text-to-text',
-        'text-to-audio',
-        'audio-to-audio',
-        'voice-activity-detection',
-        // Image/video input/output
-        'text-to-image',
-        'image-to-image',
-        'image-to-video',
-        'image-to-3d',
-        'image-text-to-image',
-        'image-text-to-video',
-        'unconditional-image-generation',
-        'image-segmentation',
-        'object-detection',
-        'depth-estimation',
-        'mask-generation',
-        'zero-shot-object-detection',
-        // Video
-        'text-to-video',
-        'text-to-3d',
-        'video-to-video'
-      ]);
-      const filteredData = ggufData.filter(m => !m.pipeline_tag || !EXCLUDED_PIPELINE_TAGS.has(m.pipeline_tag.toLowerCase()));
-      setHfSearchResults(filteredData);
+
+      const activeResults = await Promise.all(
+        activeUrls.map(url => serverFetch(url).then(r => r.json()).catch(() => ({ data: [] })))
+      );
+
+      // Check for rate limiting on any response
+      for (const result of activeResults) {
+        if (result.status === 429) {
+          setHfRateLimited(true);
+          setHfSearchResults([]);
+          return;
+        }
+      }
+
+      // Rebuild full cursors array (preserving positions for exhausted queries)
+      const nextCursors: (string | null)[] = new Array(urls.length).fill(null);
+      activeIndices.forEach((origIdx, i) => {
+        nextCursors[origIdx] = activeResults[i]?.next_cursor ?? null;
+      });
+
+      // Merge, deduplicate by model id, sort by downloads descending
+      const seen = new Set<string>();
+      const merged: HFModelInfo[] = [];
+      for (const result of activeResults) {
+        const data: HFModelInfo[] = Array.isArray(result.data) ? result.data : [];
+        for (const model of data) {
+          if (!seen.has(model.id)) {
+            seen.add(model.id);
+            merged.push(model);
+          }
+        }
+      }
+      merged.sort((a, b) => (b.downloads ?? 0) - (a.downloads ?? 0));
+
+      setHfSearchResults(merged);
+      setHfSearchCompleted(true);
+      setHfNextCursors(nextCursors);
+      setHfHasMoreResults(nextCursors.some(c => c !== null));
+
+      // Rate limit info from the last response
+      const last = activeResults[activeResults.length - 1];
+      if (last?.rate_limit_remaining !== undefined) setHfRateLimitRemaining(last.rate_limit_remaining);
+      if (last?.rate_limit_reset !== undefined) setHfRateLimitReset(last.rate_limit_reset);
+      if (last?.authenticated !== undefined) setHfAuthenticated(last.authenticated);
     } catch {
       setHfSearchResults([]);
     } finally {
       setIsSearchingHF(false);
     }
-  }, []);
+  }, [buildRecipeSearchUrls]);
 
   const detectBackend = useCallback(async (modelId: string) => {
     if (hfModelBackends[modelId] !== undefined) return;
@@ -616,10 +793,73 @@ const [searchQuery, setSearchQuery] = useState('');
         } catch { /* ignore */ }
       }
 
-      // GGUF detection
+      const totalFileSize = Object.values(fileSizes).reduce((a, b) => a + b, 0) || undefined;
+
+      // Detect file formats present
       const allGgufFiles = data.siblings.filter(s => s.rfilename.toLowerCase().endsWith('.gguf'));
-      if (allGgufFiles.length > 0) {
-        // Separate mmproj files from regular model files
+      const hasGgufFiles = allGgufFiles.length > 0;
+      const hasOnnxFiles = files.some(f => f.endsWith('.onnx') || f.endsWith('.onnx_data'));
+      const hasFlmFiles = files.some(f => f.endsWith('.flm'));
+      const hasBinFiles = files.some(f => f.endsWith('.bin'));
+
+      // Classify model: task first, then format, then recipe
+      const compatibility = classifyModel({
+        modelId,
+        pipelineTag: data.pipeline_tag,
+        tags,
+        hasGgufFiles,
+        hasOnnxFiles,
+        hasFlmFiles,
+        hasBinFiles,
+      });
+
+      // Incompatible models are hidden
+      if (compatibility.level === 'incompatible') {
+        setHfModelBackends((prev: Record<string, DetectedBackend | null>) => ({ ...prev, [modelId]: null }));
+        return;
+      }
+
+      // For whispercpp, extract quantizations from .bin files instead of .gguf
+      if (compatibility.recipe === 'whispercpp') {
+        const binFiles = data.siblings.filter(s => s.rfilename.toLowerCase().endsWith('.bin'));
+        if (binFiles.length > 0) {
+          const binQuantRegex = /[-._](Q\d+(?:[_]?\d)?(?:[_]?[KS])?(?:[_]?[MSXL]+)?|F(?:16|32)|IQ\d+(?:[_]?[A-Z]+)?|BF16|MXFP\d+(?:_[A-Z]+)?)(?:[-._]|\.bin$)/i;
+          const quantizations: GGUFQuantization[] = [];
+          binFiles.forEach(f => {
+            const m = f.rfilename.match(binQuantRegex);
+            if (m) quantizations.push({ filename: f.rfilename, quantization: m[1].toUpperCase(), size: fileSizes[f.rfilename] });
+          });
+          // If no quant pattern found, list by filename
+          if (quantizations.length === 0 && binFiles.length > 0) {
+            binFiles.forEach(f => {
+              quantizations.push({ filename: f.rfilename, quantization: f.rfilename.replace(/\.bin$/i, ''), size: fileSizes[f.rfilename] });
+            });
+          }
+          setHfModelBackends((prev: Record<string, DetectedBackend | null>) => ({
+            ...prev,
+            [modelId]: {
+              recipe: compatibility.recipe,
+              label: compatibility.label,
+              quantizations,
+              compatibilityLevel: compatibility.level,
+              compatibilityReason: compatibility.reason,
+              modelType: compatibility.modelType,
+            },
+          }));
+          if (!hfSelectedQuantizations[modelId]) {
+            const preferred = quantizations.find(q => q.quantization === 'Q4_K_M') ?? quantizations[0];
+            setHfSelectedQuantizations((prev: Record<string, string>) => ({ ...prev, [modelId]: preferred.filename }));
+            if (preferred.size !== undefined) setHfModelSizes((prev: Record<string, number | undefined>) => ({ ...prev, [modelId]: preferred.size }));
+          }
+          return;
+        }
+        // Whisper model with no .bin files (GGUF-only) — not yet supported
+        setHfModelBackends((prev: Record<string, DetectedBackend | null>) => ({ ...prev, [modelId]: null }));
+        return;
+      }
+
+      // Extract GGUF quantizations if this recipe uses GGUF files
+      if (hasGgufFiles && (compatibility.recipe === 'llamacpp' || compatibility.recipe === 'sd-cpp')) {
         const mmprojGgufs = allGgufFiles.filter(s => s.rfilename.toLowerCase().includes('mmproj'));
         const ggufFiles = allGgufFiles.filter(s => !s.rfilename.toLowerCase().includes('mmproj'));
         const mmprojFiles = mmprojGgufs.map(s => {
@@ -641,70 +881,93 @@ const [searchQuery, setSearchQuery] = useState('');
           }
         });
         const quantizations: GGUFQuantization[] = [];
+        const quantRegex = /[-._](Q\d+(?:[_]?\d)?(?:[_]?[KS])?(?:[_]?[MSXL]+)?|F(?:16|32)|IQ\d+(?:_[A-Z]+)?|BF16|MXFP\d+(?:_[A-Z]+)?)(?:[-._]|\.gguf$)/i;
         Object.entries(folderGroups).forEach(([folder, g]) => {
-          const m = folder.match(/(Q\d+(?:_\d)?(?:_[KS])?(?:_[MSL])?|F(?:16|32)|IQ\d+(?:_[A-Z]+)?|BF16)/i);
-          quantizations.push({ filename: folder, quantization: m ? m[1].toUpperCase() : folder, size: g.totalSize || undefined });
+          const m = folder.match(/((?:UD-)?Q\d+(?:[_]?\d)?(?:[_]?[KS])?(?:[_]?[MSXL]+)?|F(?:16|32)|IQ\d+(?:[_]?[A-Z]+)?|BF16)/i);
+          if (m) {
+            // Folder name is a quant (e.g. "Q4_K_M/") — treat as single entry
+            quantizations.push({ filename: folder, quantization: m[1].toUpperCase(), size: g.totalSize > 0 ? g.totalSize : undefined });
+          } else {
+            // Folder name is not a quant (e.g. "whisper.cpp/") — extract quants from files inside
+            g.files.forEach(filepath => {
+              const fname = filepath.split('/').pop() ?? filepath;
+              const fm = fname.match(quantRegex);
+              if (fm) quantizations.push({ filename: filepath, quantization: fm[1].toUpperCase(), size: fileSizes[filepath] });
+            });
+          }
         });
+        // Group root files by quant label to deduplicate sharded models
+        // (e.g. model-Q4_K_M-00001-of-00004.gguf, model-Q4_K_M-00002-of-00004.gguf → single Q4_K_M entry)
+        const rootQuantGroups: Record<string, { filename: string; totalSize: number }> = {};
         rootFiles.forEach(({ filename, size }) => {
-          const m = filename.match(/[-._](Q\d+(?:_\d)?(?:_[KS])?(?:_[MSL])?|F(?:16|32)|IQ\d+(?:_[A-Z]+)?|BF16)(?:[-._]|\.gguf$)/i);
-          if (m) quantizations.push({ filename, quantization: m[1].toUpperCase(), size });
+          const m = filename.match(quantRegex);
+          if (m) {
+            const isUD = /[-._]UD[-._]/i.test(filename);
+            const qLabel = m[1].toUpperCase() + (isUD ? ' (UD)' : '');
+            if (!rootQuantGroups[qLabel]) {
+              rootQuantGroups[qLabel] = { filename, totalSize: 0 };
+            }
+            rootQuantGroups[qLabel].totalSize += size ?? 0;
+          }
+        });
+        Object.entries(rootQuantGroups).forEach(([qLabel, { filename, totalSize }]) => {
+          quantizations.push({ filename, quantization: qLabel, size: totalSize > 0 ? totalSize : undefined });
         });
         if (quantizations.length > 0) {
-          const priority: Record<string, number> = { Q4_K_M: 1, Q4_K_S: 2, Q5_K_M: 3, Q5_K_S: 4, Q6_K: 5, Q8_0: 6 };
-          quantizations.sort((a, b) => (priority[a.quantization] ?? 100) - (priority[b.quantization] ?? 100));
+          // Sort by bit-depth (ascending), matching HuggingFace ordering
+          const quantBitDepth = (q: string): number => {
+            const base = q.replace(' (UD)', '');
+            // Map quant names to approximate bit-depth for sorting
+            const bitMap: Record<string, number> = {
+              IQ1_S: 1.0, IQ1_M: 1.1,
+              IQ2_XXS: 2.0, IQ2_XS: 2.1, IQ2_S: 2.2, IQ2_M: 2.3,
+              Q2_K: 2.5, Q2_K_L: 2.6, Q2_K_XL: 2.7,
+              IQ3_XXS: 3.0, IQ3_XS: 3.1, IQ3_S: 3.2, IQ3_M: 3.3,
+              Q3_K_S: 3.4, Q3_K_M: 3.5, Q3_K_L: 3.6, Q3_K_XL: 3.7,
+              IQ4_XS: 4.0, IQ4_NL: 4.1,
+              Q4_0: 4.2, Q4_1: 4.3, Q4_K_S: 4.4, Q4_K_M: 4.5, Q4_K_L: 4.6, Q4_K_XL: 4.7,
+              Q5_0: 5.0, Q5_1: 5.1, Q5_K_S: 5.2, Q5_K_M: 5.3, Q5_K_L: 5.4, Q5_K_XL: 5.5,
+              Q6_K: 6.0, Q6_K_XL: 6.5,
+              Q8_0: 8.0, Q8_K_XL: 8.5,
+              BF16: 16.0, F16: 16.0, F32: 32.0,
+            };
+            const p = bitMap[base] ?? 50;
+            return q.includes('(UD)') ? p + 0.05 : p;
+          };
+          quantizations.sort((a, b) => quantBitDepth(a.quantization) - quantBitDepth(b.quantization));
           setHfModelBackends((prev: Record<string, DetectedBackend | null>) => ({
             ...prev,
             [modelId]: {
-              recipe: 'llamacpp',
-              label: 'GGUF',
+              recipe: compatibility.recipe,
+              label: compatibility.label,
               quantizations,
               mmprojFiles: mmprojFiles.length > 0 ? mmprojFiles : undefined,
+              compatibilityLevel: compatibility.level,
+              compatibilityReason: compatibility.reason,
+              modelType: compatibility.modelType,
             },
           }));
           if (!hfSelectedQuantizations[modelId]) {
-            setHfSelectedQuantizations((prev: Record<string, string>) => ({ ...prev, [modelId]: quantizations[0].filename }));
-            if (quantizations[0].size !== undefined) setHfModelSizes((prev: Record<string, number | undefined>) => ({ ...prev, [modelId]: quantizations[0].size }));
+            const preferred = quantizations.find(q => q.quantization === 'Q4_K_M') ?? quantizations[0];
+            setHfSelectedQuantizations((prev: Record<string, string>) => ({ ...prev, [modelId]: preferred.filename }));
+            if (preferred.size !== undefined) setHfModelSizes((prev: Record<string, number | undefined>) => ({ ...prev, [modelId]: preferred.size }));
           }
           return;
         }
       }
 
-      const totalFileSize = Object.values(fileSizes).reduce((a, b) => a + b, 0) || undefined;
-
-      // FLM detection (FastFlowLM)
-      if (modelId.toLowerCase().startsWith('fastflowlm/') || tags.includes('flm') || files.some(f => f.endsWith('.flm'))) {
-        setHfModelBackends((prev: Record<string, DetectedBackend | null>) => ({ ...prev, [modelId]: { recipe: 'flm', label: 'FLM NPU' } }));
-        if (totalFileSize) setHfModelSizes((prev: Record<string, number | undefined>) => ({ ...prev, [modelId]: totalFileSize }));
-        return;
-      }
-
-      // ONNX detection
-      if (files.some(f => f.endsWith('.onnx') || f.endsWith('.onnx_data'))) {
-        const id = modelId.toLowerCase();
-        let recipe = 'ryzenai-llm', label = 'ONNX CPU';
-        if (id.includes('-ryzenai-npu') || tags.includes('npu')) { recipe = 'ryzenai-llm'; label = 'ONNX NPU'; }
-        else if (id.includes('-ryzenai-hybrid') || tags.includes('hybrid')) { recipe = 'ryzenai-llm'; label = 'ONNX Hybrid'; }
-        else if (tags.includes('igpu')) { recipe = 'ryzenai-llm'; label = 'ONNX iGPU'; }
-        setHfModelBackends((prev: Record<string, DetectedBackend | null>) => ({ ...prev, [modelId]: { recipe, label } }));
-        if (totalFileSize) setHfModelSizes((prev: Record<string, number | undefined>) => ({ ...prev, [modelId]: totalFileSize }));
-        return;
-      }
-
-      // Whisper
-      if ((tags.includes('whisper') || modelId.toLowerCase().includes('whisper')) && files.some(f => f.endsWith('.bin'))) {
-        setHfModelBackends((prev: Record<string, DetectedBackend | null>) => ({ ...prev, [modelId]: { recipe: 'whispercpp', label: 'Whisper' } }));
-        if (totalFileSize) setHfModelSizes((prev: Record<string, number | undefined>) => ({ ...prev, [modelId]: totalFileSize }));
-        return;
-      }
-
-      // Stable Diffusion
-      if (tags.includes('stable-diffusion') || tags.includes('text-to-image') || modelId.toLowerCase().includes('stable-diffusion') || modelId.toLowerCase().includes('flux')) {
-        setHfModelBackends((prev: Record<string, DetectedBackend | null>) => ({ ...prev, [modelId]: { recipe: 'sd-cpp', label: 'SD.cpp' } }));
-        if (totalFileSize) setHfModelSizes((prev: Record<string, number | undefined>) => ({ ...prev, [modelId]: totalFileSize }));
-        return;
-      }
-
-      setHfModelBackends((prev: Record<string, DetectedBackend | null>) => ({ ...prev, [modelId]: null }));
+      // Non-GGUF backends (FLM, ONNX, etc.)
+      setHfModelBackends((prev: Record<string, DetectedBackend | null>) => ({
+        ...prev,
+        [modelId]: {
+          recipe: compatibility.recipe,
+          label: compatibility.label,
+          compatibilityLevel: compatibility.level,
+          compatibilityReason: compatibility.reason,
+          modelType: compatibility.modelType,
+        },
+      }));
+      if (totalFileSize) setHfModelSizes((prev: Record<string, number | undefined>) => ({ ...prev, [modelId]: totalFileSize }));
     } catch {
       setHfModelBackends((prev: Record<string, DetectedBackend | null>) => ({ ...prev, [modelId]: null }));
     } finally {
@@ -778,36 +1041,199 @@ const [searchQuery, setSearchQuery] = useState('');
     const selectedFilename = hfSelectedQuantizations[modelId];
     if (!selectedFilename) return modelId;
     const quantObj = backend.quantizations?.find(q => q.filename === selectedFilename);
+    // For whisper .bin files, use the full filename as variant (not the quant label)
+    if (backend.recipe === 'whispercpp') {
+      return `${modelId}:${selectedFilename}`;
+    }
     return `${modelId}:${quantObj?.quantization ?? selectedFilename}`;
   }, [hfSelectedQuantizations]);
 
-  const handleInstallHFModel = useCallback((hfModel: HFModelInfo) => {
+  const handleInstallHFModel = useCallback(async (hfModel: HFModelInfo) => {
     const backend = hfModelBackends[hfModel.id];
     if (!backend) return;
-    const checkpoint = backend.recipe === 'llamacpp'
+
+    // Gate on compatibility level
+    if (backend.compatibilityLevel === 'experimental') {
+      const result = await confirm({
+        title: 'Experimental Compatibility',
+        message: `${backend.compatibilityReason ?? 'This model has not been verified for compatibility.'}\n\nDo you want to proceed anyway?`,
+        confirmText: 'Install Anyway',
+      });
+      if (!result.confirmed) return;
+    }
+
+    const checkpoint = (backend.quantizations && backend.quantizations.length > 0)
       ? resolveGgufCheckpoint(hfModel.id, backend)
       : hfModel.id;
     const modelName = `user.${hfModel.id.split('/').pop() ?? hfModel.id}`;
-    handleDownloadModel(modelName, { checkpoint, recipe: backend.recipe });
-  }, [hfModelBackends, resolveGgufCheckpoint, handleDownloadModel]);
+
+    // Vision models with multiple mmproj options — open edit dialog for user to choose
+    if (backend.mmprojFiles && backend.mmprojFiles.length > 1) {
+      window.dispatchEvent(new CustomEvent('openAddModel', {
+        detail: {
+          initialValues: {
+            name: hfModel.id.split('/').pop() || hfModel.id,
+            checkpoint,
+            recipe: backend.recipe,
+            mmprojOptions: backend.mmprojFiles,
+            vision: true,
+          },
+        },
+      }));
+      return;
+    }
+
+    // Pass model type labels so the server registers them correctly
+    const labels: string[] = [];
+    if (backend.modelType && backend.modelType !== 'llm') {
+      labels.push(backend.modelType);
+    }
+
+    const regData: import('./utils/backendInstaller').ModelRegistrationData = {
+      checkpoint,
+      recipe: backend.recipe,
+      labels,
+    };
+    // Single mmproj — use it directly
+    if (backend.mmprojFiles && backend.mmprojFiles.length === 1) {
+      regData.mmproj = backend.mmprojFiles[0];
+      regData.vision = true;
+    }
+
+    handleDownloadModel(modelName, regData);
+  }, [hfModelBackends, resolveGgufCheckpoint, handleDownloadModel, confirm]);
+
+  // Handle adding a model from the HF cache
+  const handleAddCacheModel = useCallback((cacheModel: CacheModelInfo) => {
+    const compatibility = classifyModel({
+      modelId: cacheModel.repo_id,
+      tags: [],
+      hasGgufFiles: cacheModel.has_gguf,
+      hasOnnxFiles: cacheModel.has_onnx,
+      hasFlmFiles: false,
+      hasBinFiles: cacheModel.has_bin,
+    });
+
+    if (compatibility.level === 'incompatible' || !compatibility.recipe) {
+      showError(`Cannot determine a compatible recipe for "${cacheModel.repo_id}".`);
+      return;
+    }
+
+    // Use selected quant; for whisper use .bin files, for others use GGUF files
+    const defaultFile = compatibility.recipe === 'whispercpp'
+      ? cacheModel.bin_files?.[0]?.filename
+      : cacheModel.gguf_files?.[0]?.filename;
+    const selectedQuant = cacheSelectedQuants[cacheModel.repo_id] ?? defaultFile;
+    const checkpoint = (cacheModel.has_gguf || cacheModel.has_bin) && selectedQuant
+      ? `${cacheModel.repo_id}:${selectedQuant}`
+      : cacheModel.repo_id;
+    const modelName = `user.${cacheModel.repo_id.split('/').pop() ?? cacheModel.repo_id}`;
+    const labels: string[] = [];
+    if (compatibility.modelType && compatibility.modelType !== 'llm') {
+      labels.push(compatibility.modelType);
+    }
+
+    // Vision models with multiple mmproj options — open edit dialog for user to choose
+    if (cacheModel.mmproj_files && cacheModel.mmproj_files.length > 1) {
+      window.dispatchEvent(new CustomEvent('openAddModel', {
+        detail: {
+          initialValues: {
+            name: cacheModel.repo_id.split('/').pop() || cacheModel.repo_id,
+            checkpoint,
+            recipe: compatibility.recipe,
+            mmprojOptions: cacheModel.mmproj_files,
+            vision: true,
+          },
+        },
+      }));
+      return;
+    }
+
+    const regData: import('./utils/backendInstaller').ModelRegistrationData = {
+      checkpoint,
+      recipe: compatibility.recipe,
+      labels,
+    };
+    // Include mmproj if vision model (single mmproj — use it directly)
+    if (cacheModel.mmproj_files && cacheModel.mmproj_files.length === 1) {
+      regData.mmproj = cacheModel.mmproj_files[0];
+      regData.vision = true;
+    }
+
+    handleDownloadModel(modelName, regData);
+    // Remove from cache list since it's now being registered
+    setHfCacheModels(prev => prev.filter(m => m.repo_id !== cacheModel.repo_id));
+  }, [handleDownloadModel, showError, cacheSelectedQuants]);
 
   // Debounced HF search effect - to avoid HF API rate limit error
   useEffect(() => {
     if (currentView !== 'models') return;
     if (hfSearchTimeoutRef.current) clearTimeout(hfSearchTimeoutRef.current);
+    setHfSearchPage(0);
+    setHfSearchCompleted(false);
+    hfPageCursorsRef.current = {};
     if (searchQuery.trim().length >= 3) {
       hfSearchTimeoutRef.current = setTimeout(() => searchHuggingFace(searchQuery), 1200);
     } else {
       setHfSearchResults([]);
       setHfRateLimited(false);
+      setHfHasMoreResults(false);
+      setHfNextCursors([]);
     }
     return () => { if (hfSearchTimeoutRef.current) clearTimeout(hfSearchTimeoutRef.current); };
   }, [searchQuery, currentView, searchHuggingFace]);
 
-  // Trigger backend detection for new HF results
+  const [hfPageCooldown, setHfPageCooldown] = useState(false);
+  const hfCooldownTimerRef = useRef<ReturnType<typeof setTimeout> | null>(null);
+
+  const startPageCooldown = useCallback(() => {
+    // Adaptive cooldown: shorter when plenty of quota, longer when low
+    let cooldownMs = 1500;
+    if (hfRateLimitRemaining !== undefined) {
+      if (hfRateLimitRemaining < 20) cooldownMs = 5000;
+      else if (hfRateLimitRemaining < 50) cooldownMs = 3000;
+      else if (hfRateLimitRemaining < 100) cooldownMs = 2000;
+    }
+    setHfPageCooldown(true);
+    if (hfCooldownTimerRef.current) clearTimeout(hfCooldownTimerRef.current);
+    hfCooldownTimerRef.current = setTimeout(() => setHfPageCooldown(false), cooldownMs);
+  }, [hfRateLimitRemaining]);
+
+  const hfNextPage = useCallback(() => {
+    if (!hfNextCursors.some(c => c !== null) || hfPageCooldown) return;
+    const nextPage = hfSearchPage + 1;
+    hfPageCursorsRef.current[nextPage] = hfNextCursors;
+    setHfSearchPage(nextPage);
+    searchHuggingFace(searchQuery, hfNextCursors);
+    startPageCooldown();
+  }, [hfSearchPage, hfNextCursors, searchQuery, searchHuggingFace, hfPageCooldown, startPageCooldown]);
+
+  const hfPrevPage = useCallback(() => {
+    if (hfSearchPage <= 0 || hfPageCooldown) return;
+    const prevPage = hfSearchPage - 1;
+    setHfSearchPage(prevPage);
+    if (prevPage === 0) {
+      searchHuggingFace(searchQuery);
+    } else {
+      const cursors = hfPageCursorsRef.current[prevPage];
+      searchHuggingFace(searchQuery, cursors);
+    }
+    startPageCooldown();
+  }, [hfSearchPage, searchQuery, searchHuggingFace, hfPageCooldown, startPageCooldown]);
+
+  // Trigger backend detection for new HF results.
+  // Pre-filter: skip models whose pipeline_tag is known to be incompatible
+  // (saves 2 HF API calls per model that would just classify as incompatible).
+  // Models with no pipeline_tag are allowed through for tag/name/format fallbacks.
   useEffect(() => {
     hfSearchResults.forEach((model: HFModelInfo) => {
-      if (hfModelBackends[model.id] === undefined) detectBackend(model.id);
+      if (hfModelBackends[model.id] !== undefined) return;
+      if (model.pipeline_tag && !SUPPORTED_PIPELINE_TAGS.has(model.pipeline_tag)) {
+        // Incompatible pipeline_tag — mark as null without calling detectBackend
+        setHfModelBackends((prev: Record<string, DetectedBackend | null>) => ({ ...prev, [model.id]: null }));
+        return;
+      }
+      detectBackend(model.id);
     });
   }, [hfSearchResults, hfModelBackends, detectBackend]);
 
@@ -966,23 +1392,29 @@ const [searchQuery, setSearchQuery] = useState('');
   };
 
   const handleDeleteModel = async (modelName: string) => {
-    const confirmed = await confirm({
+    const isUserModel = modelName.startsWith('user.');
+    const result = await confirm({
       title: 'Delete Model',
       message: `Are you sure you want to delete the model "${modelName}"? This action cannot be undone.`,
       confirmText: 'Delete',
       cancelText: 'Cancel',
-      danger: true
+      danger: true,
+      ...(isUserModel ? { keepFilesOption: { label: 'Keep downloaded files in HF cache', defaultChecked: false } } : {}),
     });
 
-    if (!confirmed) {
+    if (!result.confirmed) {
       return;
     }
 
     try {
-      await deleteModel(modelName);
-      // No manual modelsUpdated dispatch needed — deleteModel() handles it
+      await deleteModel(modelName, result.keepFiles);
       await fetchCurrentLoadedModel();
-      showSuccess(`Model "${modelName}" deleted successfully.`);
+      if (result.keepFiles) {
+        showSuccess(`Model "${modelName}" unregistered. Files kept in HF cache.`);
+        fetchHfCacheModels(); // Refresh cache list since model should reappear there
+      } else {
+        showSuccess(`Model "${modelName}" deleted successfully.`);
+      }
     } catch (error) {
       console.error('Error deleting model:', error);
       showError(`Failed to delete model: ${error instanceof Error ? error.message : 'Unknown error'}`);
@@ -1205,6 +1637,234 @@ const [searchQuery, setSearchQuery] = useState('');
     });
   };
 
+  const toggleCacheProvider = (provider: string) => {
+    setExpandedCacheProviders(prev => {
+      const next = new Set(prev);
+      if (next.has(provider)) next.delete(provider);
+      else next.add(provider);
+      return next;
+    });
+  };
+
+  // Group HF cache models by provider
+  const cacheGroupedItems = useMemo((): CacheListItem[] => {
+    const filtered = hfCacheModels.filter(m => {
+      if (searchQuery.trim()) {
+        const words = searchQuery.toLowerCase().split(/\s+/).filter(Boolean);
+        const id = m.repo_id.toLowerCase();
+        if (!words.every(w => id.includes(w))) return false;
+      }
+      const compat = classifyModel({
+        modelId: m.repo_id,
+        pipelineTag: m.pipeline_tag,
+        tags: [],
+        hasGgufFiles: m.has_gguf,
+        hasOnnxFiles: m.has_onnx,
+        hasFlmFiles: false,
+        hasBinFiles: m.has_bin,
+      });
+      if (compat.level === 'incompatible') return false;
+      if (compat.recipe === 'whispercpp' && (!m.bin_files || m.bin_files.length === 0)) return false;
+      if (!enabledRecipes.has(compat.recipe)) return false;
+      return true;
+    });
+
+    const byProvider: Record<string, CacheModelInfo[]> = {};
+    for (const m of filtered) {
+      const provider = m.repo_id.split('/')[0] || 'unknown';
+      if (!byProvider[provider]) byProvider[provider] = [];
+      byProvider[provider].push(m);
+    }
+
+    const items: CacheListItem[] = [];
+    const consumed = new Set<string>();
+
+    for (const [provider, members] of Object.entries(byProvider)) {
+      if (members.length > 1) {
+        items.push({ type: 'provider-group', provider, members });
+        members.forEach(m => consumed.add(m.repo_id));
+      }
+    }
+
+    for (const m of filtered) {
+      if (!consumed.has(m.repo_id)) {
+        items.push({ type: 'model', cacheModel: m });
+      }
+    }
+
+    // Groups first (alphabetical), then ungrouped (original order)
+    items.sort((a, b) => {
+      if (a.type === 'provider-group' && b.type === 'provider-group') {
+        return a.provider.localeCompare(b.provider);
+      }
+      if (a.type === 'provider-group') return -1;
+      if (b.type === 'provider-group') return 1;
+      return 0;
+    });
+
+    return items;
+  }, [hfCacheModels, searchQuery, enabledRecipes]);
+
+  const renderCacheModelItem = (cacheModel: CacheModelInfo, showShortName = false) => {
+    const compatibility = classifyModel({
+      modelId: cacheModel.repo_id,
+      pipelineTag: cacheModel.pipeline_tag,
+      tags: [],
+      hasGgufFiles: cacheModel.has_gguf,
+      hasOnnxFiles: cacheModel.has_onnx,
+      hasFlmFiles: false,
+      hasBinFiles: cacheModel.has_bin,
+    });
+
+    const ggufQuantRegex = /[-._](Q\d+(?:[_]?\d)?(?:[_]?[KS])?(?:[_]?[MSXL]+)?|F(?:16|32)|IQ\d+(?:[_]?[A-Z]+)?|BF16|MXFP\d+(?:_[A-Z]+)?)(?:[-._]|\.gguf$)/i;
+    const binQuantRegex = /[-._](Q\d+(?:[_]?\d)?(?:[_]?[KS])?(?:[_]?[MSXL]+)?|F(?:16|32)|IQ\d+(?:[_]?[A-Z]+)?|BF16|MXFP\d+(?:_[A-Z]+)?)(?:[-._]|\.bin$)/i;
+    const folderQuantRegex = /((?:UD-)?Q\d+(?:[_]?\d)?(?:[_]?K)?(?:[_]?[A-Z]+)?|F(?:16|32)|IQ\d+(?:[_]?[A-Z]+)?|BF16|MXFP\d+(?:_[A-Z]+)?)/i;
+    const cacheQuants: GGUFQuantization[] = [];
+
+    if (compatibility.recipe === 'whispercpp') {
+      (cacheModel.bin_files ?? []).forEach((bf: { filename: string; size?: number }) => {
+        const m = bf.filename.match(binQuantRegex);
+        cacheQuants.push({ filename: bf.filename, quantization: m ? m[1].toUpperCase() : bf.filename.replace(/\.bin$/i, ''), size: bf.size });
+      });
+    } else {
+      (cacheModel.gguf_files ?? []).forEach((gf: { filename: string; size?: number }) => {
+        const isGgufFile = gf.filename.toLowerCase().endsWith('.gguf');
+        if (isGgufFile) {
+          const m = gf.filename.match(ggufQuantRegex);
+          if (m) {
+            const isUD = /[-._]UD[-._]/i.test(gf.filename);
+            cacheQuants.push({ filename: gf.filename, quantization: m[1].toUpperCase() + (isUD ? ' (UD)' : ''), size: gf.size });
+          }
+        } else {
+          const m = gf.filename.match(folderQuantRegex);
+          const isUD = /^UD-/i.test(gf.filename);
+          cacheQuants.push({ filename: gf.filename, quantization: (m ? m[1].toUpperCase() : gf.filename) + (isUD ? ' (UD)' : ''), size: gf.size });
+        }
+      });
+    }
+    const priority: Record<string, number> = {
+            Q4_K_M: 1, Q4_K_S: 2, Q4_K_XL: 3,
+            Q5_K_M: 4, Q5_K_S: 5, Q5_K_XL: 6,
+            Q6_K: 7, Q6_K_XL: 8,
+            Q8_0: 9, Q8_K_XL: 10,
+            Q3_K_M: 11, Q3_K_S: 12, Q3_K_XL: 13,
+            Q2_K: 14, Q2_K_L: 15, Q2_K_XL: 16,
+            BF16: 17, Q4_0: 18, Q4_1: 19,
+            IQ4_NL: 20, IQ4_XS: 21, IQ3_XXS: 22,
+            IQ2_M: 23, IQ2_XXS: 24, IQ1_M: 25, IQ1_S: 26,
+          };
+    cacheQuants.sort((a, b) => (priority[a.quantization] ?? 100) - (priority[b.quantization] ?? 100));
+    const selectedCacheQuant = cacheSelectedQuants[cacheModel.repo_id] ?? cacheQuants[0]?.filename;
+    const selectedCacheFile = cacheQuants.find(q => q.filename === selectedCacheQuant);
+    const displaySize = selectedCacheFile?.size !== undefined
+      ? formatSize(selectedCacheFile.size / (1024 ** 3))
+      : cacheModel.size_gb !== undefined ? formatSize(cacheModel.size_gb) : undefined;
+
+    const displayName = showShortName
+      ? (cacheModel.repo_id.split('/').slice(1).join('/') || cacheModel.repo_id)
+      : cacheModel.repo_id;
+
+    return (
+      <div key={cacheModel.repo_id} className={`hf-model-item${showShortName ? ' hf-provider-member' : ''}`}>
+        <div className="hf-model-left">
+          <a
+            className="hf-model-name"
+            title={cacheModel.repo_id}
+            href={`https://huggingface.co/${cacheModel.repo_id}`}
+            onClick={(e: React.MouseEvent) => {
+              e.preventDefault();
+              const url = `https://huggingface.co/${cacheModel.repo_id}`;
+              if (window.api?.openExternal) { window.api.openExternal(url); }
+              else { window.open(url, '_blank', 'noopener,noreferrer'); }
+            }}
+          >{displayName}</a>
+          {displaySize && <span className="hf-model-size">{displaySize}</span>}
+          <div className="hf-model-actions">
+            <button
+              className="model-action-btn edit-btn"
+              title="Edit before adding"
+              onClick={(e: React.MouseEvent) => {
+                e.stopPropagation();
+                const ckpt = selectedCacheQuant
+                  ? `${cacheModel.repo_id}:${selectedCacheQuant}`
+                  : cacheModel.repo_id;
+                window.dispatchEvent(new CustomEvent('openAddModel', {
+                  detail: {
+                    initialValues: {
+                      name: cacheModel.repo_id.split('/').pop() || cacheModel.repo_id,
+                      checkpoint: ckpt,
+                      recipe: compatibility.recipe,
+                      mmprojOptions: cacheModel.mmproj_files,
+                      vision: (cacheModel.mmproj_files?.length ?? 0) > 0,
+                    },
+                  },
+                }));
+              }}
+            >
+              <svg width="12" height="12" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2">
+                <path d="M11 4H4a2 2 0 0 0-2 2v14a2 2 0 0 0 2 2h14a2 2 0 0 0 2-2v-7" />
+                <path d="M18.5 2.5a2.121 2.121 0 0 1 3 3L12 15l-4 1 1-4 9.5-9.5z" />
+              </svg>
+            </button>
+            <button
+              className="model-action-btn download-btn"
+              title="Add to model registry"
+              onClick={() => handleAddCacheModel(cacheModel)}
+            >
+              <svg width="12" height="12" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2">
+                <line x1="12" y1="5" x2="12" y2="19" />
+                <line x1="5" y1="12" x2="19" y2="12" />
+              </svg>
+            </button>
+          </div>
+        </div>
+        <div className="hf-model-right">
+          {cacheQuants.length >= 1 && (
+            <select
+              className="hf-quant-select"
+              value={selectedCacheQuant ?? ''}
+              title={cacheQuants.length > 10 ? `${cacheQuants.length} quantizations — scroll for more` : undefined}
+              onChange={(e: React.ChangeEvent<HTMLSelectElement>) => {
+                setCacheSelectedQuants(prev => ({ ...prev, [cacheModel.repo_id]: e.target.value }));
+              }}
+            >
+              {cacheQuants.map(q => (
+                <option key={q.filename} value={q.filename}>{q.quantization}</option>
+              ))}
+            </select>
+          )}
+          <span
+            className={`hf-backend-badge${compatibility.level === 'experimental' ? ' hf-badge-experimental' : ''}`}
+            title={compatibility.reason}
+          >
+            {compatibility.label}
+            {compatibility.level === 'experimental' && ' ?'}
+          </span>
+        </div>
+      </div>
+    );
+  };
+
+  const renderCacheProviderGroup = (provider: string, members: CacheModelInfo[]) => {
+    const isExpanded = expandedCacheProviders.has(provider);
+    return (
+      <div key={`cache-provider-${provider}`} className="hf-provider-group">
+        <div className="hf-provider-header" onClick={() => toggleCacheProvider(provider)}>
+          <span className={`family-chevron${isExpanded ? ' expanded' : ''}`}>
+            <ChevronRight size={11} strokeWidth={2.1} />
+          </span>
+          <span className="hf-provider-name">{provider}</span>
+          <span className="hf-provider-count">({members.length})</span>
+        </div>
+        {isExpanded && (
+          <div className="hf-provider-members">
+            {members.map(m => renderCacheModelItem(m, true))}
+          </div>
+        )}
+      </div>
+    );
+  };
+
   const renderFamilyItem = (item: Extract<ModelListItem, { type: 'family' }>) => {
     const { family, members } = item;
     const isExpanded = expandedFamilies.has(family.displayName);
@@ -1383,7 +2043,7 @@ const [searchQuery, setSearchQuery] = useState('');
               />
               {showInlineFilterButton && (
                 <button
-                  className={`left-panel-inline-filter-btn ${showFilterPanel ? 'active' : ''}`}
+                  className={`left-panel-inline-filter-btn ${showFilterPanel ? 'active' : ''}${(() => { const viableCount = Object.values(recipeStates).filter(s => s !== 'unsupported').length || 6; return enabledRecipes.size !== viableCount || !showSuggestedSection || !showCacheSection || !showSearchSection || showDownloadedOnly; })() ? ' filter-active' : ''}`}
                   onClick={() => setShowFilterPanel(prev => !prev)}
                   title="Filters"
                   aria-label="Filters"
@@ -1432,16 +2092,44 @@ const [searchQuery, setSearchQuery] = useState('');
               )}
               {currentView === 'models' && showFilterPanel && (
                 <div className="left-panel-filter-popover model-filter-popover">
+                  <div className="filter-section-label">Backends</div>
+                  <div className="recipe-filter-chips">
+                    {[
+                      { key: 'llamacpp', label: 'llama.cpp' },
+                      { key: 'sd-cpp', label: 'sd.cpp' },
+                      { key: 'whispercpp', label: 'whisper.cpp' },
+                      { key: 'kokoro', label: 'Kokoro' },
+                      { key: 'flm', label: 'FLM' },
+                      { key: 'ryzenai-llm', label: 'RyzenAI' },
+                    ].map(r => {
+                      const state = recipeStates[r.key] ?? 'unsupported';
+                      const isActive = enabledRecipes.has(r.key);
+                      return (
+                        <button
+                          key={r.key}
+                          className={`recipe-chip${isActive ? ' active' : ''} state-${state}`}
+                          title={state === 'installed' ? 'Backend installed' : state === 'available' ? 'Backend available (not installed)' : 'Backend not supported on this system'}
+                          onClick={() => {
+                            setEnabledRecipes(prev => {
+                              const next = new Set(prev);
+                              if (next.has(r.key)) next.delete(r.key);
+                              else next.add(r.key);
+                              return next;
+                            });
+                          }}
+                        >{r.label}</button>
+                      );
+                    })}
+                  </div>
+                  <div className="filter-section-label">Suggested models</div>
                   <div className="organization-toggle">
                     <button className={`toggle-button ${organizationMode === 'recipe' ? 'active' : ''}`} onClick={() => {
                       setOrganizationMode('recipe');
-                      setShowFilterPanel(false);
                     }}>
                       By Recipe
                     </button>
                     <button className={`toggle-button ${organizationMode === 'category' ? 'active' : ''}`} onClick={() => {
                       setOrganizationMode('category');
-                      setShowFilterPanel(false);
                     }}>
                       By Category
                     </button>
@@ -1449,10 +2137,29 @@ const [searchQuery, setSearchQuery] = useState('');
                   <label className="toggle-switch-label">
                     <span className="toggle-label-text">Downloaded only</span>
                     <div className="toggle-switch">
-                      <input type="checkbox" checked={showDownloadedOnly} onChange={(e) => {
-                        setShowDownloadedOnly(e.target.checked);
-                        setShowFilterPanel(false);
-                      }} />
+                      <input type="checkbox" checked={showDownloadedOnly} onChange={(e) => setShowDownloadedOnly(e.target.checked)} />
+                      <span className="toggle-slider"></span>
+                    </div>
+                  </label>
+                  <div className="filter-section-label">Sections</div>
+                  <label className="toggle-switch-label">
+                    <span className="toggle-label-text">Suggested</span>
+                    <div className="toggle-switch">
+                      <input type="checkbox" checked={showSuggestedSection} onChange={(e) => setShowSuggestedSection(e.target.checked)} />
+                      <span className="toggle-slider"></span>
+                    </div>
+                  </label>
+                  <label className="toggle-switch-label">
+                    <span className="toggle-label-text">HF Cache</span>
+                    <div className="toggle-switch">
+                      <input type="checkbox" checked={showCacheSection} onChange={(e) => setShowCacheSection(e.target.checked)} />
+                      <span className="toggle-slider"></span>
+                    </div>
+                  </label>
+                  <label className="toggle-switch-label">
+                    <span className="toggle-label-text">HF Search</span>
+                    <div className="toggle-switch">
+                      <input type="checkbox" checked={showSearchSection} onChange={(e) => setShowSearchSection(e.target.checked)} />
                       <span className="toggle-slider"></span>
                     </div>
                   </label>
@@ -1485,7 +2192,7 @@ const [searchQuery, setSearchQuery] = useState('');
           )}
 
           <div className="model-manager-content">
-            {currentView === 'models' && (
+            {currentView === 'models' && showSuggestedSection && (
               <div className="available-models-section widget">
                 <div className="available-models-header">
                   <div className="loaded-model-label">SUGGESTED MODELS</div>
@@ -1494,29 +2201,56 @@ const [searchQuery, setSearchQuery] = useState('');
                 {renderModelsView()}
               </div>
             )}
-            {currentView === 'models' && searchQuery.trim().length >= 3 && ( // Rendering the HF models by searching
+            {currentView === 'models' && showCacheSection && cacheGroupedItems.length > 0 && (
+              <div className="hf-search-section widget">
+                <div className="available-models-header">
+                  <div className="loaded-model-label">FROM HF CACHE</div>
+                </div>
+                {cacheGroupedItems.map(item =>
+                  item.type === 'provider-group'
+                    ? renderCacheProviderGroup(item.provider, item.members)
+                    : renderCacheModelItem(item.cacheModel)
+                )}
+              </div>
+            )}
+            {currentView === 'models' && showSearchSection && searchQuery.trim().length >= 3 && ( // Rendering the HF models by searching
               <div className="hf-search-section widget">
                 <div className="available-models-header">
                   <div className="loaded-model-label">FROM HUGGING FACE</div>
-                  {isSearchingHF && <span className="hf-search-spinner" />}
+                  <div className="hf-pagination">
+                    {isSearchingHF && <span className="hf-search-spinner" />}
+                    {hfSearchPage > 0 && (
+                      <button className={`hf-page-btn${hfPageCooldown ? ' hf-page-cooldown' : ''}`} onClick={hfPrevPage} title="Previous page" disabled={isSearchingHF || hfPageCooldown}>‹</button>
+                    )}
+                    {(hfSearchPage > 0 || hfHasMoreResults) && (
+                      <span className="hf-page-label">{hfSearchPage + 1}</span>
+                    )}
+                    {hfHasMoreResults && (
+                      <button className={`hf-page-btn${hfPageCooldown ? ' hf-page-cooldown' : ''}`} onClick={hfNextPage} title="Next page" disabled={isSearchingHF || hfPageCooldown}>›</button>
+                    )}
+                  </div>
                 </div>
                 {hfRateLimited && (
-                  <div className="hf-search-message">Rate limited — try again shortly.</div>
+                  <div className="hf-search-message">
+                    Rate limited — {hfRateLimitReset ? `retry in ${hfRateLimitReset}s` : 'try again shortly'}.
+                    {!hfAuthenticated && ' Set HF_TOKEN for higher limits.'}
+                  </div>
                 )}
-                {!hfRateLimited && !isSearchingHF && (
+                {!hfRateLimited && !isSearchingHF && hfSearchCompleted && (
                   hfSearchResults.length === 0 ||
                   (hfSearchResults.length > 0 &&
                     detectingBackendFor === null &&
                     hfSearchResults.every((m: HFModelInfo) => {
-                      const backend = hfModelBackends[m.id];
-                      return backend === null || (backend != null && ['sd-cpp', 'whispercpp'].includes(backend.recipe));
+                      const b = hfModelBackends[m.id];
+                      return b === null || (b != null && !enabledRecipes.has(b.recipe));
                     }))
                 ) && (
-                  <div className="hf-search-message">No compatible models found.</div>
+                  <div className="hf-search-message">{hfSearchPage > 0 ? 'No more results.' : 'No compatible models found.'}</div>
                 )}
                 {hfSearchResults.filter((hfModel: HFModelInfo) => {
-                  const backend = hfModelBackends[hfModel.id];
-                  return backend !== null && !(backend != null && ['sd-cpp', 'whispercpp'].includes(backend.recipe));
+                  const b = hfModelBackends[hfModel.id];
+                  if (!b) return false;
+                  return enabledRecipes.has(b.recipe);
                 }).map((hfModel: HFModelInfo) => {
                   const backend = hfModelBackends[hfModel.id];
                   const isDetecting = detectingBackendFor === hfModel.id;
@@ -1526,7 +2260,17 @@ const [searchQuery, setSearchQuery] = useState('');
                   return (
                     <div key={hfModel.id} className="hf-model-item">
                       <div className="hf-model-left">
-                        <span className="hf-model-name" title={hfModel.id}>{hfModel.id}</span>
+                        <a
+                          className="hf-model-name"
+                          title={hfModel.id}
+                          href={`https://huggingface.co/${hfModel.id}`}
+                          onClick={(e: React.MouseEvent) => {
+                            e.preventDefault();
+                            const url = `https://huggingface.co/${hfModel.id}`;
+                            if (window.api?.openExternal) { window.api.openExternal(url); }
+                            else { window.open(url, '_blank', 'noopener,noreferrer'); }
+                          }}
+                        >{hfModel.id}</a>
                         {size !== undefined && <span className="hf-model-size">{formatSize(size / (1024 ** 3))}</span>}
                         <span className="hf-model-meta">↓ {formatDownloads(hfModel.downloads)}</span>
                         {isDetecting && <span className="hf-search-spinner" />}
@@ -1578,10 +2322,11 @@ const [searchQuery, setSearchQuery] = useState('');
                         </div>
                       </div>
                       <div className="hf-model-right">
-                        {!isDetecting && backend && quants.length > 1 && (
+                        {!isDetecting && backend && quants.length >= 1 && (
                           <select
                             className="hf-quant-select"
                             value={selectedQuant ?? ''}
+                            title={quants.length > 10 ? `${quants.length} quantizations — scroll for more` : undefined}
                             onChange={(e: React.ChangeEvent<HTMLSelectElement>) => {
                               const q = quants.find((x: GGUFQuantization) => x.filename === e.target.value);
                               setHfSelectedQuantizations((prev: Record<string, string>) => ({ ...prev, [hfModel.id]: e.target.value }));
@@ -1593,7 +2338,15 @@ const [searchQuery, setSearchQuery] = useState('');
                             ))}
                           </select>
                         )}
-                        {!isDetecting && backend && <span className="hf-backend-badge">{backend.label}</span>}
+                        {!isDetecting && backend && (
+                          <span
+                            className={`hf-backend-badge${backend.compatibilityLevel === 'experimental' ? ' hf-badge-experimental' : ''}`}
+                            title={backend.compatibilityReason}
+                          >
+                            {backend.label}
+                            {backend.compatibilityLevel === 'experimental' && ' ?'}
+                          </span>
+                        )}
                       </div>
                     </div>
                   );
diff --git a/src/app/src/renderer/components/ConnectedBackendRow.tsx b/src/app/src/renderer/components/ConnectedBackendRow.tsx
index 7c1958317..7e07ab055 100644
--- a/src/app/src/renderer/components/ConnectedBackendRow.tsx
+++ b/src/app/src/renderer/components/ConnectedBackendRow.tsx
@@ -46,14 +46,14 @@ const ConnectedBackendRow: React.FC<ConnectedBackendRowProps> = ({
   const info = statusMessage ? { ...backendInfo, message: statusMessage } : backendInfo;
 
   const onConfirmUninstall = useCallback(async (r: string, b: string) => {
-    const confirmed = await confirm({
+    const result = await confirm({
       title: 'Uninstall Backend',
       message: `Are you sure you want to uninstall ${RECIPE_DISPLAY_NAMES[r] || r} ${b}?`,
       confirmText: 'Uninstall',
       cancelText: 'Cancel',
       danger: true,
     });
-    if (confirmed) await handleUninstall(r, b);
+    if (result.confirmed) await handleUninstall(r, b);
   }, [confirm, handleUninstall]);
 
   return (
diff --git a/src/app/src/renderer/utils/backendInstaller.ts b/src/app/src/renderer/utils/backendInstaller.ts
index 505e4046e..c932b872f 100644
--- a/src/app/src/renderer/utils/backendInstaller.ts
+++ b/src/app/src/renderer/utils/backendInstaller.ts
@@ -32,6 +32,7 @@ export interface ModelRegistrationData {
   vision?: boolean;
   embedding?: boolean;
   reranking?: boolean;
+  labels?: string[];
 }
 
 /**
@@ -282,11 +283,11 @@ export async function uninstallBackend(recipe: string, backend: string): Promise
  * Delete a model's files. Single codepath for all model deletions.
  * Dispatches `modelsUpdated` on success so the models context refreshes.
  */
-export async function deleteModel(modelName: string): Promise<void> {
+export async function deleteModel(modelName: string, keepFiles = false): Promise<void> {
   const response = await serverFetch('/delete', {
     method: 'POST',
     headers: { 'Content-Type': 'application/json' },
-    body: JSON.stringify({ model_name: modelName }),
+    body: JSON.stringify({ model_name: modelName, keep_files: keepFiles }),
   });
 
   if (!response.ok) {
@@ -626,7 +627,7 @@ async function ensureModelReadyInternal(
 
       if (!loadResponse.ok) {
         const errorData = await loadResponse.json().catch(() => ({}));
-        const errorMsg = errorData.error || `Failed to load model: ${loadResponse.statusText}`;
+        const errorMsg = (typeof errorData.error === 'string' ? errorData.error : errorData.error?.message) || `Failed to load model: ${loadResponse.statusText}`;
 
         throw new Error(errorMsg);
       }
diff --git a/src/app/src/renderer/utils/recipeCompatibility.ts b/src/app/src/renderer/utils/recipeCompatibility.ts
new file mode 100644
index 000000000..8dea4461e
--- /dev/null
+++ b/src/app/src/renderer/utils/recipeCompatibility.ts
@@ -0,0 +1,297 @@
+/**
+ * Recipe-aware model compatibility classification.
+ *
+ * Maps HuggingFace model metadata (pipeline_tag, tags, model ID) to Lemonade
+ * recipes and compatibility levels. Task takes priority over format — GGUF is
+ * a container format, not a task indicator.
+ *
+ * Mirrors server-side logic in model_types.h (get_model_type_from_labels)
+ * and model_manager.cpp (register_user_model label assignment).
+ */
+
+export interface TaskRecipeMapping {
+  pipelineTags: string[];
+  hfTags: string[];
+  namePatterns: RegExp[];
+  recipe: string;
+  modelType: string;
+  label: string;
+}
+
+/**
+ * Task-to-recipe mapping table.
+ * Order matters: first match wins. LLM is the fallback and not listed here.
+ */
+export const TASK_RECIPE_MAP: TaskRecipeMapping[] = [
+  {
+    pipelineTags: ['text-to-image', 'image-to-image'],
+    hfTags: ['stable-diffusion', 'text-to-image', 'diffusers', 'image-generation', 'image-editing'],
+    namePatterns: [/stable-diffusion/i, /\bflux\b/i, /\bsdxl\b/i],
+    recipe: 'sd-cpp',
+    modelType: 'image',
+    label: 'sd.cpp',
+  },
+  {
+    pipelineTags: ['automatic-speech-recognition'],
+    hfTags: ['whisper'],
+    namePatterns: [/whisper/i],
+    recipe: 'whispercpp',
+    modelType: 'audio',
+    label: 'whisper.cpp',
+  },
+  {
+    pipelineTags: ['text-to-speech', 'text-to-audio'],
+    hfTags: ['tts', 'kokoro'],
+    namePatterns: [/kokoro/i],
+    recipe: 'kokoro',
+    modelType: 'tts',
+    label: 'Kokoro',
+  },
+  {
+    pipelineTags: ['sentence-similarity', 'feature-extraction'],
+    hfTags: ['sentence-transformers', 'nomic-embed', 'embedding', 'embeddings'],
+    namePatterns: [/embed/i, /nomic/i],
+    recipe: 'llamacpp',
+    modelType: 'embedding',
+    label: 'llama.cpp',
+  },
+  {
+    pipelineTags: ['text-ranking'],
+    hfTags: ['reranker', 'cross-encoder', 'reranking'],
+    namePatterns: [/rerank/i],
+    recipe: 'llamacpp',
+    modelType: 'reranking',
+    label: 'llama.cpp',
+  },
+];
+
+/** Pipeline tags that indicate an LLM (including multimodal/vision LLMs). */
+const LLM_PIPELINE_TAGS = ['text-generation', 'conversational', 'text2text-generation', 'image-text-to-text', 'translation', 'image-to-text'];
+
+/**
+ * All pipeline_tag values that any Lemonade backend can handle.
+ * Models with a pipeline_tag NOT in this set are incompatible and can be
+ * skipped before calling detectBackend (saves 2 HF API calls per model).
+ * Models with no pipeline_tag are allowed through (classification falls
+ * back to HF tags / name patterns / format detection).
+ */
+export const SUPPORTED_PIPELINE_TAGS = new Set([
+  ...LLM_PIPELINE_TAGS,
+  ...TASK_RECIPE_MAP.flatMap(m => m.pipelineTags),
+]);
+
+export type CompatibilityLevel = 'supported' | 'likely' | 'experimental' | 'incompatible';
+
+export interface ModelCompatibility {
+  recipe: string;
+  modelType: string;
+  label: string;
+  level: CompatibilityLevel;
+  reason: string;
+}
+
+export interface ClassifyInput {
+  modelId: string;
+  pipelineTag?: string;
+  tags: string[];
+  hasGgufFiles: boolean;
+  hasOnnxFiles: boolean;
+  hasFlmFiles: boolean;
+  hasBinFiles: boolean;
+}
+
+/**
+ * File formats currently supported by each recipe/backend.
+ * When a backend gains format support (e.g. whisper.cpp adds GGUF),
+ * just add the format tag here.
+ * See https://lemonade-server.ai/docs/server/server_spec/
+ */
+export const RECIPE_FORMATS: Record<string, string[]> = {
+  'llamacpp':    ['gguf'],
+  'sd-cpp':      ['safetensors'],
+  'whispercpp':  ['bin'],
+  'kokoro':      ['onnx'],
+  'flm':         ['flm'],
+  'ryzenai-llm': ['onnx'],
+};
+
+/**
+ * Check whether the model has at least one file in a format the recipe supports.
+ * Uses HF tags (gguf, safetensors, onnx) with fallback to file-based detection
+ * for formats not represented as HF tags (bin, flm).
+ */
+function hasRequiredFormat(recipe: string, input: ClassifyInput): boolean {
+  const formats = RECIPE_FORMATS[recipe];
+  if (!formats) return true; // unknown recipe — don't gate
+  return formats.some(fmt => {
+    // HF tags cover the common formats
+    if (input.tags.includes(fmt)) return true;
+    // Fallback to file-based detection for formats without HF tags
+    switch (fmt) {
+      case 'gguf': return input.hasGgufFiles;
+      case 'onnx': return input.hasOnnxFiles;
+      case 'bin': return input.hasBinFiles;
+      case 'flm': return input.hasFlmFiles;
+      default: return false;
+    }
+  });
+}
+
+/**
+ * Classify a HuggingFace model into a Lemonade recipe with a confidence level.
+ *
+ * Priority order:
+ *   1. pipeline_tag match against TASK_RECIPE_MAP  → supported
+ *   2. pipeline_tag is a known LLM tag + GGUF      → supported
+ *   3. HF tags match against TASK_RECIPE_MAP        → likely
+ *   4. Model ID name pattern match                  → experimental
+ *   5. FLM files/tags (format-specific)             → likely
+ *   6. ONNX files (format-specific)                 → likely
+ *   7. GGUF present, no other signals               → experimental (was silently "supported")
+ *   8. Nothing matched                              → incompatible
+ */
+export function classifyModel(input: ClassifyInput): ModelCompatibility {
+  const { modelId, pipelineTag, tags, hasGgufFiles, hasOnnxFiles, hasFlmFiles, hasBinFiles } = input;
+  const idLower = modelId.toLowerCase();
+
+  // --- Early reject: Python-only quantization formats ---
+  // BitsAndBytes, AWQ, and GPTQ models require Python runtimes and are
+  // incompatible with all C++ backends (llamacpp, sd-cpp, etc.)
+  if (/[-._](bnb|awq|gptq)([-._]|$)/i.test(idLower)) {
+    return {
+      recipe: '',
+      modelType: 'unknown',
+      label: 'Python-only',
+      level: 'incompatible',
+      reason: 'BitsAndBytes/AWQ/GPTQ quantization requires Python runtime',
+    };
+  }
+
+  // --- Pass 1: pipeline_tag (highest confidence) ---
+
+  if (pipelineTag) {
+    // Check non-LLM mappings first (must also have a supported file format)
+    for (const mapping of TASK_RECIPE_MAP) {
+      if (mapping.pipelineTags.includes(pipelineTag) && hasRequiredFormat(mapping.recipe, input)) {
+        return {
+          recipe: mapping.recipe,
+          modelType: mapping.modelType,
+          label: mapping.label,
+          level: 'supported',
+          reason: `Task "${pipelineTag}" maps to ${mapping.label}`,
+        };
+      }
+    }
+
+    // Known LLM pipeline tag
+    if (LLM_PIPELINE_TAGS.includes(pipelineTag)) {
+      if (hasGgufFiles) {
+        return {
+          recipe: 'llamacpp',
+          modelType: 'llm',
+          label: 'llama.cpp',
+          level: 'supported',
+          reason: `Task "${pipelineTag}" with GGUF files`,
+        };
+      }
+      if (hasOnnxFiles) {
+        return {
+          recipe: 'ryzenai-llm',
+          modelType: 'llm',
+          label: 'RyzenAI',
+          level: 'likely',
+          reason: `Task "${pipelineTag}" with ONNX files`,
+        };
+      }
+    }
+
+    // pipeline_tag present but doesn't match anything we support
+    // (e.g. "feature-extraction", "fill-mask", "summarization", etc.)
+    if (!LLM_PIPELINE_TAGS.includes(pipelineTag)) {
+      return {
+        recipe: '',
+        modelType: 'unknown',
+        label: pipelineTag,
+        level: 'incompatible',
+        reason: `Task "${pipelineTag}" is not supported by any Lemonade backend`,
+      };
+    }
+  }
+
+  // --- Pass 2: HF tags (medium confidence) ---
+
+  for (const mapping of TASK_RECIPE_MAP) {
+    if (mapping.hfTags.some(t => tags.includes(t)) && hasRequiredFormat(mapping.recipe, input)) {
+      return {
+        recipe: mapping.recipe,
+        modelType: mapping.modelType,
+        label: mapping.label,
+        level: 'likely',
+        reason: `Repository tags suggest ${mapping.label} model`,
+      };
+    }
+  }
+
+  // --- Pass 3: Model ID name patterns (low confidence) ---
+
+  for (const mapping of TASK_RECIPE_MAP) {
+    if (mapping.namePatterns.some(p => p.test(idLower)) && hasRequiredFormat(mapping.recipe, input)) {
+      return {
+        recipe: mapping.recipe,
+        modelType: mapping.modelType,
+        label: mapping.label,
+        level: 'experimental',
+        reason: `Model name suggests ${mapping.label} — no confirming metadata`,
+      };
+    }
+  }
+
+  // --- Pass 4: Format-only fallbacks ---
+
+  // FLM detection
+  if (hasFlmFiles || idLower.startsWith('fastflowlm/') || tags.includes('flm')) {
+    return {
+      recipe: 'flm',
+      modelType: 'llm',
+      label: 'FastFlowLM',
+      level: 'likely',
+      reason: 'FLM files or tags detected',
+    };
+  }
+
+  // ONNX detection (without LLM pipeline_tag — lower confidence)
+  if (hasOnnxFiles) {
+    let recipe = 'ryzenai-llm';
+    let label = 'RyzenAI';
+    if (idLower.includes('-ryzenai-npu') || tags.includes('npu')) { recipe = 'ryzenai-llm'; label = 'RyzenAI NPU'; }
+    else if (idLower.includes('-ryzenai-hybrid') || tags.includes('hybrid')) { recipe = 'ryzenai-llm'; label = 'RyzenAI Hybrid'; }
+    else if (tags.includes('igpu')) { recipe = 'ryzenai-llm'; label = 'RyzenAI iGPU'; }
+    return {
+      recipe,
+      modelType: 'llm',
+      label,
+      level: 'likely',
+      reason: 'ONNX files detected',
+    };
+  }
+
+  // GGUF present but no task metadata — this is the case that was causing issues
+  if (hasGgufFiles) {
+    return {
+      recipe: 'llamacpp',
+      modelType: 'llm',
+      label: 'llama.cpp',
+      level: 'experimental',
+      reason: 'GGUF files present but no task metadata — assuming LLM',
+    };
+  }
+
+  // Nothing matched
+  return {
+    recipe: '',
+    modelType: 'unknown',
+    label: 'Unknown',
+    level: 'incompatible',
+    reason: 'No compatible format or task metadata detected',
+  };
+}
diff --git a/src/app/styles.css b/src/app/styles.css
index 4e60f3c97..044d885d6 100644
--- a/src/app/styles.css
+++ b/src/app/styles.css
@@ -1060,6 +1060,14 @@ footer {
     background: rgba(255, 255, 255, 0.16);
 }
 
+.left-panel-inline-filter-btn.filter-active {
+    color: #4ade80;
+}
+
+.left-panel-inline-filter-btn.filter-active:hover {
+    color: #86efac;
+}
+
 .left-panel-filter-popover {
     position: absolute;
     top: calc(100% + 6px);
@@ -1120,6 +1128,71 @@ footer {
     color: #ffffff;
 }
 
+.filter-section-label {
+    font-size: 0.55rem;
+    color: #666;
+    text-transform: uppercase;
+    letter-spacing: 0.5px;
+    margin-bottom: -4px;
+}
+
+.recipe-filter-chips {
+    display: flex;
+    flex-wrap: wrap;
+    gap: 4px;
+}
+
+.recipe-chip {
+    font-size: 0.58rem;
+    padding: 3px 8px;
+    border-radius: 10px;
+    border: 1px solid rgba(255, 255, 255, 0.1);
+    background: transparent;
+    color: #777;
+    cursor: pointer;
+    transition: all 0.15s ease;
+}
+
+.recipe-chip:hover {
+    background: rgba(255, 255, 255, 0.05);
+    color: #aaa;
+}
+
+.recipe-chip.active {
+    border-color: rgba(255, 255, 255, 0.25);
+    background: rgba(255, 255, 255, 0.08);
+    color: #ddd;
+}
+
+.recipe-chip.state-installed.active {
+    border-color: rgba(76, 175, 80, 0.5);
+    background: rgba(76, 175, 80, 0.15);
+    color: #81c784;
+}
+
+.recipe-chip.state-available.active {
+    border-color: rgba(255, 193, 7, 0.5);
+    background: rgba(255, 193, 7, 0.15);
+    color: #ffd54f;
+}
+
+.recipe-chip.state-unsupported.active {
+    border-color: rgba(244, 67, 54, 0.4);
+    background: rgba(244, 67, 54, 0.12);
+    color: #e57373;
+}
+
+/* Inactive state hints — subtle color so unsupported is distinguishable from deselected */
+.recipe-chip.state-unsupported:not(.active) {
+    border-color: rgba(244, 67, 54, 0.2);
+    color: #a05050;
+}
+
+.recipe-chip.state-available:not(.active) {
+    border-color: rgba(255, 193, 7, 0.15);
+    color: #8a7740;
+}
+
 .model-filter-popover .organization-toggle {
     padding: 2px;
     border: 1px solid rgba(255, 255, 255, 0.06);
@@ -1214,6 +1287,55 @@ footer {
     to { transform: rotate(360deg); }
 }
 
+.hf-pagination {
+    display: flex;
+    align-items: center;
+    gap: 4px;
+    margin-left: auto;
+}
+
+.hf-page-btn {
+    background: rgba(255, 255, 255, 0.06);
+    border: 1px solid rgba(255, 255, 255, 0.1);
+    border-radius: 3px;
+    color: #aaa;
+    font-size: 0.75rem;
+    width: 20px;
+    height: 20px;
+    display: flex;
+    align-items: center;
+    justify-content: center;
+    cursor: pointer;
+    padding: 0;
+    line-height: 1;
+}
+
+.hf-page-btn:hover:not(:disabled) {
+    background: rgba(255, 255, 255, 0.12);
+    color: #ddd;
+}
+
+.hf-page-btn:disabled {
+    opacity: 0.4;
+    cursor: not-allowed;
+}
+
+.hf-page-cooldown {
+    animation: hf-cooldown-fade 2s ease-out forwards;
+}
+
+@keyframes hf-cooldown-fade {
+    0% { background: rgba(200, 60, 60, 0.4); border-color: rgba(200, 60, 60, 0.5); }
+    100% { background: rgba(255, 255, 255, 0.06); border-color: rgba(255, 255, 255, 0.1); }
+}
+
+.hf-page-label {
+    font-size: 0.6rem;
+    color: #888;
+    min-width: 14px;
+    text-align: center;
+}
+
 .hf-search-message {
     font-size: 0.68rem;
     color: #555;
@@ -1259,6 +1381,12 @@ footer {
     text-overflow: ellipsis;
     flex-shrink: 1;
     min-width: 0;
+    text-decoration: none;
+}
+
+.hf-model-name:hover {
+    color: #fff;
+    text-decoration: underline;
 }
 
 .hf-model-meta {
@@ -1290,6 +1418,49 @@ footer {
     padding: 1px 5px;
     letter-spacing: 0.2px;
     flex-shrink: 0;
+    cursor: default;
+}
+
+.hf-badge-experimental {
+    color: #c9a227;
+    border-color: rgba(201, 162, 39, 0.3);
+    background: rgba(201, 162, 39, 0.08);
+}
+
+/* HF cache provider grouping */
+.hf-provider-group {
+    margin: 0;
+}
+
+.hf-provider-header {
+    display: flex;
+    align-items: center;
+    gap: 6px;
+    padding: 4px 10px 4px 13px;
+    cursor: pointer;
+    user-select: none;
+}
+
+.hf-provider-header:hover {
+    background: rgba(255, 255, 255, 0.03);
+}
+
+.hf-provider-name {
+    font-size: 0.65rem;
+    color: #d0d0d0;
+}
+
+.hf-provider-count {
+    font-size: 0.58rem;
+    color: #666;
+}
+
+.hf-provider-members {
+    padding-left: 6px;
+}
+
+.hf-provider-member {
+    padding-left: 16px;
 }
 
 .hf-quant-select {
@@ -2470,6 +2641,16 @@ footer {
     padding-left: 8px;
 }
 
+.form-warning {
+    font-size: 0.75rem;
+    color: #c9a227;
+    background: rgba(201, 162, 39, 0.08);
+    border: 1px solid rgba(201, 162, 39, 0.2);
+    border-radius: 4px;
+    padding: 8px 10px;
+    margin-top: 8px;
+}
+
 .form-checkboxes {
     display: grid;
     grid-template-columns: 1fr 1fr;
@@ -5246,6 +5427,20 @@ footer {
     line-height: 1.5;
 }
 
+.confirm-dialog-checkbox {
+    display: flex;
+    align-items: center;
+    gap: 8px;
+    font-size: 0.8rem;
+    color: #aaa;
+    margin-bottom: 16px;
+    cursor: pointer;
+}
+
+.confirm-dialog-checkbox input {
+    cursor: pointer;
+}
+
 .confirm-dialog-actions {
     display: flex;
     gap: 10px;
diff --git a/src/cpp/include/lemon/model_manager.h b/src/cpp/include/lemon/model_manager.h
index afbd9f9a5..a78fa7c21 100644
--- a/src/cpp/include/lemon/model_manager.h
+++ b/src/cpp/include/lemon/model_manager.h
@@ -99,8 +99,8 @@ class ModelManager {
                                 bool do_not_upgrade = false,
                                 DownloadProgressCallback progress_callback = nullptr);
 
-    // Delete a model
-    void delete_model(const std::string& model_name);
+    // Delete a model (keep_files=true removes from registry only, preserving HF cache files)
+    void delete_model(const std::string& model_name, bool keep_files = false);
 
     // Get model info by name
     ModelInfo get_model_info(const std::string& model_name);
@@ -135,6 +135,9 @@ class ModelManager {
     // Set extra models directory for GGUF discovery
     void set_extra_models_dir(const std::string& dir);
 
+    // Discover models in HF cache that are not registered in the model registry
+    json discover_hf_cache_models();
+
     void save_model_options(const ModelInfo& info);
 
 private:
diff --git a/src/cpp/server/model_manager.cpp b/src/cpp/server/model_manager.cpp
index a2fd444a4..958fa5f25 100644
--- a/src/cpp/server/model_manager.cpp
+++ b/src/cpp/server/model_manager.cpp
@@ -13,6 +13,7 @@
 #include <thread>
 #include <chrono>
 #include <unordered_set>
+#include <regex>
 #include <iomanip>
 #include <lemon/utils/aixlog.hpp>
 
@@ -390,6 +391,198 @@ std::map<std::string, ModelInfo> ModelManager::discover_extra_models() const {
     return discovered;
 }
 
+json ModelManager::discover_hf_cache_models() {
+    json result = json::array();
+
+    std::string hf_cache = get_hf_cache_dir();
+    if (hf_cache.empty() || !fs::exists(hf_cache)) {
+        return result;
+    }
+
+    // Get the set of repo IDs already known (registered models)
+    std::set<std::string> known_repos;
+    auto all_models = get_supported_models();
+    for (const auto& [name, info] : all_models) {
+        // Extract repo_id from the main checkpoint
+        std::string ckpt = info.checkpoint("main");
+        if (!ckpt.empty()) {
+            // Remove variant suffix (everything after ':')
+            size_t colon = ckpt.find(':');
+            std::string repo = (colon != std::string::npos) ? ckpt.substr(0, colon) : ckpt;
+            known_repos.insert(repo);
+        }
+    }
+
+    // Scan HF cache for models--org--name directories
+    try {
+        for (const auto& entry : fs::directory_iterator(hf_cache)) {
+            if (!entry.is_directory()) continue;
+
+            std::string dirname = entry.path().filename().string();
+            if (dirname.substr(0, 8) != "models--") continue;
+
+            // Convert models--org--name back to org/name
+            std::string repo_id = dirname.substr(8);
+            size_t pos = 0;
+            while ((pos = repo_id.find("--", pos)) != std::string::npos) {
+                repo_id.replace(pos, 2, "/");
+                pos += 1;
+            }
+
+            // Skip if already registered
+            if (known_repos.count(repo_id)) continue;
+
+            // Find snapshot directory (use the latest one)
+            fs::path snapshots_dir = entry.path() / "snapshots";
+            if (!fs::exists(snapshots_dir)) continue;
+
+            fs::path latest_snapshot;
+            std::filesystem::file_time_type latest_time{};
+            for (const auto& snap : fs::directory_iterator(snapshots_dir)) {
+                if (!snap.is_directory()) continue;
+                auto ftime = snap.last_write_time();
+                if (latest_snapshot.empty() || ftime > latest_time) {
+                    latest_snapshot = snap.path();
+                    latest_time = ftime;
+                }
+            }
+            if (latest_snapshot.empty()) continue;
+
+            // Scan for model files and compute total size
+            bool has_gguf = false, has_onnx = false, has_bin = false;
+            double total_size_gb = 0.0;
+
+            // Group GGUF files: root files individually, subfolder files by folder
+            struct FileEntry { std::string name; uintmax_t size; };
+            std::vector<FileEntry> root_gguf_files;
+            std::map<std::string, std::vector<FileEntry>> folder_files; // folder name -> files
+            std::vector<FileEntry> bin_files;
+            std::vector<std::string> mmproj_files;
+
+            for (const auto& file : fs::recursive_directory_iterator(latest_snapshot)) {
+                if (!file.is_regular_file()) continue;
+                std::string fname = file.path().filename().string();
+                std::string fname_lower = fname;
+                std::transform(fname_lower.begin(), fname_lower.end(), fname_lower.begin(), ::tolower);
+
+                uintmax_t fsize = 0;
+                try { fsize = file.file_size(); } catch (...) {}
+                total_size_gb += static_cast<double>(fsize) / (1024.0 * 1024.0 * 1024.0);
+
+                if (fname_lower.find(".gguf") != std::string::npos) {
+                    has_gguf = true;
+                    if (fname_lower.find("mmproj") != std::string::npos) {
+                        mmproj_files.push_back(fname);
+                    } else {
+                        // Check if file is in a subfolder relative to the snapshot
+                        // Use canonical paths to resolve symlinks before computing relative path
+                        fs::path canonical_file = fs::canonical(file.path());
+                        fs::path canonical_snap = fs::canonical(latest_snapshot);
+                        fs::path rel = fs::relative(canonical_file, canonical_snap);
+                        std::string rel_parent = rel.parent_path().string();
+                        if (rel_parent.empty() || rel_parent == ".") {
+                            root_gguf_files.push_back({fname, fsize});
+                        } else {
+                            // File in subfolder — group by top-level folder name
+                            std::string folder = rel.begin()->string();
+                            folder_files[folder].push_back({fname, fsize});
+                        }
+                    }
+                }
+                if (fname_lower.find(".onnx") != std::string::npos) has_onnx = true;
+                if (fname_lower.find(".bin") != std::string::npos) {
+                    has_bin = true;
+                    bin_files.push_back({fname, fsize});
+                }
+            }
+
+            // Skip empty or unrecognized repos
+            if (!has_gguf && !has_onnx && !has_bin) continue;
+
+            // Build gguf_files array: one entry per variant
+            json gguf_files = json::array();
+            std::regex quant_regex(R"((Q\d+(?:_\d)?(?:_[KS])?(?:_[MSXL]+)?|F(?:16|32)|IQ\d+(?:_[A-Z]+)?|BF16|MXFP\d+(?:_[A-Z]+)?))", std::regex::icase);
+            for (const auto& [folder, files] : folder_files) {
+                if (std::regex_search(folder, quant_regex)) {
+                    // Folder name is a quant variant (e.g. "Q4_0/", "UD-Q3_K_XL/") — single entry with combined size
+                    uintmax_t total = 0;
+                    for (const auto& f : files) total += f.size;
+                    json gf;
+                    gf["filename"] = folder;
+                    if (total > 0) gf["size"] = total;
+                    gguf_files.push_back(gf);
+                } else {
+                    // Folder name is not a quant (e.g. "whisper.cpp/") — list individual files
+                    for (const auto& f : files) {
+                        json gf;
+                        gf["filename"] = f.name;
+                        if (f.size > 0) gf["size"] = f.size;
+                        gguf_files.push_back(gf);
+                    }
+                }
+            }
+            for (const auto& rf : root_gguf_files) {
+                json gf;
+                gf["filename"] = rf.name;
+                if (rf.size > 0) gf["size"] = rf.size;
+                gguf_files.push_back(gf);
+            }
+
+            json model_entry;
+            model_entry["repo_id"] = repo_id;
+            model_entry["has_gguf"] = has_gguf;
+            model_entry["has_onnx"] = has_onnx;
+            model_entry["has_bin"] = has_bin;
+            if (total_size_gb > 0.0) model_entry["size_gb"] = total_size_gb;
+            if (!gguf_files.empty()) model_entry["gguf_files"] = gguf_files;
+            if (!mmproj_files.empty()) model_entry["mmproj_files"] = mmproj_files;
+            if (!bin_files.empty()) {
+                json bf_array = json::array();
+                for (const auto& bf : bin_files) {
+                    json bfj;
+                    bfj["filename"] = bf.name;
+                    if (bf.size > 0) bfj["size"] = bf.size;
+                    bf_array.push_back(bfj);
+                }
+                model_entry["bin_files"] = bf_array;
+            }
+
+            result.push_back(model_entry);
+        }
+    } catch (const std::exception& e) {
+        LOG(ERROR, "ModelManager") << "Error scanning HF cache: " << e.what() << std::endl;
+    }
+
+    // Fetch pipeline_tag metadata from HF API for each discovered model
+    if (!result.empty()) {
+        std::map<std::string, std::string> headers;
+        const char* hf_token = std::getenv("HF_TOKEN");
+        if (hf_token) {
+            headers["Authorization"] = "Bearer " + std::string(hf_token);
+        }
+
+        for (auto& model_entry : result) {
+            std::string repo_id = model_entry["repo_id"].get<std::string>();
+            try {
+                std::string api_url = "https://huggingface.co/api/models/" + repo_id;
+                auto response = HttpClient::get(api_url, headers);
+                if (response.status_code == 200) {
+                    auto info = JsonUtils::parse(response.body);
+                    if (info.contains("pipeline_tag") && info["pipeline_tag"].is_string()) {
+                        model_entry["pipeline_tag"] = info["pipeline_tag"].get<std::string>();
+                    }
+                }
+            } catch (const std::exception& e) {
+                // Non-fatal: model will fall back to format-based heuristics
+                LOG(WARNING, "ModelManager") << "Failed to fetch metadata for " << repo_id << ": " << e.what() << std::endl;
+            }
+        }
+    }
+
+    LOG(INFO, "ModelManager") << "Found " << result.size() << " unregistered models in HF cache" << std::endl;
+    return result;
+}
+
 std::string ModelManager::resolve_model_path(const ModelInfo& info, const std::string& type, const std::string& checkpoint) const {
     // Experience models are virtual bundles with no direct checkpoint to resolve
     if (info.recipe == "experience") {
@@ -489,8 +682,13 @@ std::string ModelManager::resolve_model_path(const ModelInfo& info, const std::s
         return all_bin_files[0];
     }
 
-    // For llamacpp, find the GGUF file with advanced sharded model support
-    if (info.recipe == "llamacpp" && type == "main") {
+    // For GGUF-based backends, find the GGUF file with advanced sharded model support
+    // Skip this resolver if the variant is explicitly a non-GGUF file (e.g. safetensors)
+    bool variant_is_non_gguf = !variant.empty() && (
+        variant.find(".safetensors") != std::string::npos ||
+        variant.find(".onnx") != std::string::npos ||
+        variant.find(".bin") != std::string::npos);
+    if ((info.recipe == "llamacpp" || info.recipe == "sd-cpp") && type == "main" && !variant_is_non_gguf) {
         if (!fs::exists(model_cache_path_fs)) {
             return model_cache_path;  // Return directory path even if not found
         }
@@ -1019,6 +1217,29 @@ void ModelManager::add_model_to_cache(const std::string& model_name) {
         }
     }
 
+    // Compute size from resolved path if not already set
+    if (info.size <= 0.0 && info.downloaded && !info.resolved_path().empty()) {
+        try {
+            fs::path resolved(info.resolved_path());
+            // For sharded models, sum all model files in the snapshot directory
+            // (resolved_path points to just the first shard)
+            fs::path snapshot_dir = fs::is_directory(resolved) ? resolved : resolved.parent_path();
+            double total = 0.0;
+            for (const auto& entry : fs::directory_iterator(snapshot_dir)) {
+                if (entry.is_regular_file()) {
+                    std::string ext = entry.path().extension().string();
+                    // Only count model files, not metadata
+                    if (ext == ".gguf" || ext == ".bin") {
+                        total += static_cast<double>(entry.file_size());
+                    }
+                }
+            }
+            if (total > 0.0) {
+                info.size = total / (1024.0 * 1024.0 * 1024.0);
+            }
+        } catch (...) {}
+    }
+
     models_cache_[model_name] = info;
     LOG(INFO, "ModelManager") << "Added '" << model_name << "' to cache (downloaded=" << info.downloaded << ")" << std::endl;
 }
@@ -1053,6 +1274,25 @@ void ModelManager::update_model_in_cache(const std::string& model_name, bool dow
         // The path changes now that files exist on disk
         if (downloaded) {
             resolve_all_model_paths(it->second);
+            // Compute size if not already set
+            if (it->second.size <= 0.0 && !it->second.resolved_path().empty()) {
+                try {
+                    fs::path resolved(it->second.resolved_path());
+                    fs::path snapshot_dir = fs::is_directory(resolved) ? resolved : resolved.parent_path();
+                    double total = 0.0;
+                    for (const auto& entry : fs::directory_iterator(snapshot_dir)) {
+                        if (entry.is_regular_file()) {
+                            std::string ext = entry.path().extension().string();
+                            if (ext == ".gguf" || ext == ".bin") {
+                                total += static_cast<double>(entry.file_size());
+                            }
+                        }
+                    }
+                    if (total > 0.0) {
+                        it->second.size = total / (1024.0 * 1024.0 * 1024.0);
+                    }
+                } catch (...) {}
+            }
             LOG(INFO, "ModelManager") << "Updated '" << model_name
                       << "' downloaded=" << downloaded
                       << ", resolved_path=" << it->second.resolved_path() << std::endl;
@@ -1727,6 +1967,13 @@ void ModelManager::download_model(const std::string& model_name,
     // Register user models to user_models.json
     if (model_name.substr(0, 5) == "user." && !model_registered) {
         register_user_model(model_name, model_data);
+
+        // After registration, check if model files already exist in HF cache
+        // (e.g. user is registering a model they already downloaded outside Lemonade)
+        if (is_model_downloaded(model_name)) {
+            LOG(INFO, "ModelManager") << "Model already exists in cache, skipping download" << std::endl;
+            return;
+        }
     }
 
     auto model_info = get_model_info(model_name);
@@ -2352,7 +2599,7 @@ void ModelManager::download_from_flm(const std::string& checkpoint,
     LOG(INFO, "ModelManager") << "FLM model pull completed successfully" << std::endl;
 }
 
-void ModelManager::delete_model(const std::string& model_name) {
+void ModelManager::delete_model(const std::string& model_name, bool keep_files) {
     auto info = get_model_info(model_name);
 
     LOG(INFO, "ModelManager") << "Deleting model: " << model_name << std::endl;
@@ -2458,13 +2705,17 @@ void ModelManager::delete_model(const std::string& model_name) {
 
     LOG(INFO, "ModelManager") << "Cache path: " << model_cache_path << std::endl;
 
-    fs::path model_cache_path_fs = path_from_utf8(model_cache_path);
-    if (fs::exists(model_cache_path_fs)) {
-        LOG(INFO, "ModelManager") << "Removing directory..." << std::endl;
-        fs::remove_all(model_cache_path_fs);
-        LOG(INFO, "ModelManager") << "✓ Deleted model files: " << model_name << std::endl;
+    if (keep_files) {
+        LOG(INFO, "ModelManager") << "Keeping files in HF cache (unregister only)" << std::endl;
     } else {
-        LOG(INFO, "ModelManager") << "Warning: Model cache directory not found (may already be deleted)" << std::endl;
+        fs::path model_cache_path_fs = path_from_utf8(model_cache_path);
+        if (fs::exists(model_cache_path_fs)) {
+            LOG(INFO, "ModelManager") << "Removing directory..." << std::endl;
+            fs::remove_all(model_cache_path_fs);
+            LOG(INFO, "ModelManager") << "✓ Deleted model files: " << model_name << std::endl;
+        } else {
+            LOG(INFO, "ModelManager") << "Warning: Model cache directory not found (may already be deleted)" << std::endl;
+        }
     }
 
     // Remove from user models if it's a user model
diff --git a/src/cpp/server/server.cpp b/src/cpp/server/server.cpp
index 4c8d837d4..6907d0645 100644
--- a/src/cpp/server/server.cpp
+++ b/src/cpp/server/server.cpp
@@ -326,6 +326,83 @@ void Server::setup_routes(httplib::Server &web_server) {
         handle_system_stats(req, res);
     });
 
+    register_get("cache/models", [this](const httplib::Request&, httplib::Response& res) {
+        auto models = model_manager_->discover_hf_cache_models();
+        res.set_content(models.dump(), "application/json");
+    });
+
+    // HuggingFace search proxy — uses server-side HF_TOKEN for higher rate limits
+    register_get("hf/search", [](const httplib::Request& req, httplib::Response& res) {
+        std::string url;
+        if (req.has_param("next_url")) {
+            // Use the full cursor URL directly (for pagination)
+            url = req.get_param_value("next_url");
+        } else {
+            // Build HF API URL from query params
+            url = "https://huggingface.co/api/models?";
+            bool first = true;
+            for (const auto& param : {"search", "filter", "limit", "sort", "direction", "pipeline_tag", "author"}) {
+                if (req.has_param(param)) {
+                    if (!first) url += "&";
+                    url += std::string(param) + "=" + httplib::encode_query_component(req.get_param_value(param));
+                    first = false;
+                }
+            }
+        }
+
+        // Add HF_TOKEN if available
+        std::map<std::string, std::string> headers;
+        const char* hf_token = std::getenv("HF_TOKEN");
+        if (hf_token) {
+            headers["Authorization"] = "Bearer " + std::string(hf_token);
+        }
+
+        auto hf_res = lemon::utils::HttpClient::get(url, headers);
+
+        // Build response with rate limit info
+        nlohmann::json response;
+        response["status"] = hf_res.status_code;
+
+        if (hf_res.status_code == 200) {
+            response["data"] = nlohmann::json::parse(hf_res.body, nullptr, false);
+        } else if (hf_res.status_code == 429) {
+            response["data"] = nlohmann::json::array();
+        } else {
+            response["data"] = nlohmann::json::array();
+        }
+
+        // Parse Link header for next cursor (headers stored lowercase)
+        auto link_it = hf_res.headers.find("link");
+        if (link_it != hf_res.headers.end()) {
+            std::string link = link_it->second;
+            auto pos = link.find("<");
+            auto end = link.find(">");
+            if (pos != std::string::npos && end != std::string::npos) {
+                response["next_cursor"] = link.substr(pos + 1, end - pos - 1);
+            }
+        }
+
+        // Parse RateLimit header (headers stored lowercase)
+        auto rl_it = hf_res.headers.find("ratelimit");
+        if (rl_it != hf_res.headers.end()) {
+            response["rate_limit_header"] = rl_it->second;
+            // Parse remaining and time-to-reset: format is "api;r=123;t=45"
+            std::string rl = rl_it->second;
+            auto r_pos = rl.find("r=");
+            auto t_pos = rl.find("t=");
+            if (r_pos != std::string::npos) {
+                try { response["rate_limit_remaining"] = std::stoi(rl.substr(r_pos + 2)); } catch (...) {}
+            }
+            if (t_pos != std::string::npos) {
+                try { response["rate_limit_reset"] = std::stoi(rl.substr(t_pos + 2)); } catch (...) {}
+            }
+        }
+
+        response["authenticated"] = (hf_token != nullptr);
+
+        res.set_content(response.dump(), "application/json");
+    });
+
     register_post("log-level", [this](const httplib::Request& req, httplib::Response& res) {
         handle_log_level(req, res);
     });
@@ -2618,7 +2695,10 @@ void Server::handle_delete(const httplib::Request& req, httplib::Response& res)
             request_json["model"].get<std::string>() :
             request_json["model_name"].get<std::string>();
 
-        LOG(INFO, "Server") << "Deleting model: " << model_name << std::endl;
+        bool keep_files = request_json.value("keep_files", false);
+
+        LOG(INFO, "Server") << "Deleting model: " << model_name
+                  << (keep_files ? " (keeping files)" : "") << std::endl;
 
         // If the model is currently loaded, unload it first to release file locks
         if (router_->is_model_loaded(model_name)) {
@@ -2635,7 +2715,7 @@ void Server::handle_delete(const httplib::Request& req, httplib::Response& res)
 
         for (int attempt = 0; attempt <= max_retries; ++attempt) {
             try {
-                model_manager_->delete_model(model_name);
+                model_manager_->delete_model(model_name, keep_files);
 
                 // Success - send response and return
                 nlohmann::json response = {
diff --git a/src/cpp/server/utils/http_client.cpp b/src/cpp/server/utils/http_client.cpp
index f0f3faa0f..bef78aeb7 100644
--- a/src/cpp/server/utils/http_client.cpp
+++ b/src/cpp/server/utils/http_client.cpp
@@ -2,6 +2,7 @@
 #include <lemon/utils/path_utils.h>
 #include <lemon/utils/aixlog.hpp>
 #include <curl/curl.h>
+#include <algorithm>
 #include <sstream>
 #include <stdexcept>
 #include <iostream>
@@ -57,6 +58,26 @@ static int progress_callback(void* clientp, curl_off_t dltotal, curl_off_t dlnow
     return 0;  // Continue transfer
 }
 
+// Header callback for capturing response headers
+static size_t header_write_callback(char* buffer, size_t size, size_t nitems, void* userdata) {
+    size_t total = size * nitems;
+    auto* resp_headers = static_cast<std::map<std::string, std::string>*>(userdata);
+    std::string line(buffer, total);
+    // Trim trailing whitespace/newlines
+    while (!line.empty() && (line.back() == '\r' || line.back() == '\n')) line.pop_back();
+    auto colon = line.find(':');
+    if (colon != std::string::npos) {
+        std::string key = line.substr(0, colon);
+        std::string value = line.substr(colon + 1);
+        // Trim leading whitespace from value
+        while (!value.empty() && value.front() == ' ') value.erase(value.begin());
+        // Store with lowercase key for case-insensitive lookup
+        std::transform(key.begin(), key.end(), key.begin(), ::tolower);
+        resp_headers->emplace(key, value);
+    }
+    return total;
+}
+
 HttpResponse HttpClient::get(const std::string& url,
                              const std::map<std::string, std::string>& headers) {
     CURL* curl = curl_easy_init();
@@ -70,6 +91,8 @@ HttpResponse HttpClient::get(const std::string& url,
     curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
     curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_callback);
     curl_easy_setopt(curl, CURLOPT_WRITEDATA, &response_body);
+    curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, header_write_callback);
+    curl_easy_setopt(curl, CURLOPT_HEADERDATA, &response.headers);
     curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
     curl_easy_setopt(curl, CURLOPT_TIMEOUT, default_timeout_seconds_);
     curl_easy_setopt(curl, CURLOPT_USERAGENT, "lemon.cpp/1.0");
diff --git a/test/hf_model_tags.py b/test/hf_model_tags.py
new file mode 100644
index 000000000..8e2e8356d
--- /dev/null
+++ b/test/hf_model_tags.py
@@ -0,0 +1,558 @@
+#!/usr/bin/env python3
+"""
+Show pipeline_tag and tags for HuggingFace models.
+
+Strips quant/file specifiers (e.g. "org/model-GGUF:Q4_K_M" → "org/model-GGUF")
+and deduplicates checkpoints when scanning model registries.
+
+Tracks HF API rate limits via response headers and waits automatically.
+Set HF_TOKEN env var for higher rate limits (1000 vs 500 req/5min).
+
+Usage:
+    # Single model
+    python test/hf_model_tags.py HauhauCS/Qwen3.5-35B-A3B-Uncensored-HauhauCS-Aggressive
+
+    # All checkpoints from server_models.json
+    python test/hf_model_tags.py --all
+
+    # Single recipe from server_models.json
+    python test/hf_model_tags.py --llamacpp
+    python test/hf_model_tags.py --sd-cpp
+    python test/hf_model_tags.py --whispercpp
+    python test/hf_model_tags.py --kokoro
+    python test/hf_model_tags.py --ryzenai-llm
+    python test/hf_model_tags.py --experience
+
+    # Include user_models.json (from LEMONADE_CACHE_DIR or ~/.cache/lemonade/)
+    python test/hf_model_tags.py --all --user
+    python test/hf_model_tags.py --llamacpp --user
+    python test/hf_model_tags.py --all --user --user-models-path /path/to/user_models.json
+
+    # Tag summary per recipe (inclusive list of all tags seen for each recipe)
+    python test/hf_model_tags.py --all --summary
+    python test/hf_model_tags.py --llamacpp --summary
+
+    # Full detection (file extensions, name patterns, classifyModel result)
+    python test/hf_model_tags.py --detect some-org/some-model
+    python test/hf_model_tags.py --llamacpp --detect
+
+    # Combine freely
+    python test/hf_model_tags.py --llamacpp --kokoro --user --summary --detect some-org/some-model
+"""
+
+import argparse
+import json
+import os
+import re
+import sys
+import time
+from collections import defaultdict
+from pathlib import Path
+
+import requests
+
+MODELS_JSON = Path(__file__).resolve().parent.parent / "src/cpp/resources/server_models.json"
+
+ALL_RECIPES = ["llamacpp", "sd-cpp", "whispercpp", "kokoro", "flm", "ryzenai-llm", "experience"]
+
+# Format tags — file serialization formats
+FORMAT_TAGS = {"gguf", "onnx", "safetensors", "bin", "flm", "q4nx"}
+
+# Task/pipeline tags — what the model does (mirrors recipeCompatibility.ts)
+TASK_TAGS = {
+    # LLM
+    "text-generation", "conversational", "text2text-generation", "image-text-to-text",
+    # Image
+    "text-to-image", "image-to-image", "image-to-video", "image-to-3d",
+    "image-text-to-image", "image-text-to-video", "unconditional-image-generation",
+    "image-segmentation", "object-detection", "depth-estimation", "mask-generation",
+    "zero-shot-object-detection",
+    # Audio
+    "automatic-speech-recognition", "text-to-speech", "audio-text-to-text",
+    "text-to-audio", "audio-to-audio", "voice-activity-detection",
+    # Video
+    "text-to-video", "text-to-3d", "video-to-video",
+    # Embedding/reranking
+    "sentence-similarity", "feature-extraction", "text-ranking",
+    # Other NLP
+    "fill-mask", "question-answering", "summarization", "translation",
+    "text-classification", "token-classification", "zero-shot-classification",
+    "table-question-answering",
+}
+
+# Library/framework tags
+LIBRARY_TAGS = {
+    "transformers", "transformers.js", "diffusers", "sentence-transformers",
+    "onnxruntime", "pytorch", "tensorflow", "jax", "flax", "keras",
+    "llama.cpp", "ctranslate2", "mlx", "vllm", "openvino", "coreml",
+    "tensorrt", "tflite", "rust", "paddlepaddle", "spacy", "fastai",
+    "flair", "adapter-transformers", "timm", "open_clip",
+}
+
+# Prefixed tags we filter into their own buckets
+PREFIXED_CATEGORIES = ("license:", "arxiv:", "base_model:", "region:", "doi:", "dataset:")
+
+# ---------------------------------------------------------------------------
+# Mirrors recipeCompatibility.ts — TASK_RECIPE_MAP
+# ---------------------------------------------------------------------------
+TASK_RECIPE_MAP = [
+    {
+        "pipelineTags": ["text-to-image", "image-to-image"],
+        "hfTags": ["stable-diffusion", "text-to-image", "diffusers", "image-generation", "image-editing"],
+        "namePatterns": [re.compile(r"stable-diffusion", re.I), re.compile(r"\bflux\b", re.I), re.compile(r"\bsdxl\b", re.I)],
+        "recipe": "sd-cpp",
+        "modelType": "image",
+        "label": "sd.cpp",
+    },
+    {
+        "pipelineTags": ["automatic-speech-recognition"],
+        "hfTags": ["whisper"],
+        "namePatterns": [re.compile(r"whisper", re.I)],
+        "recipe": "whispercpp",
+        "modelType": "audio",
+        "label": "whisper.cpp",
+    },
+    {
+        "pipelineTags": ["text-to-speech", "text-to-audio"],
+        "hfTags": ["tts", "kokoro"],
+        "namePatterns": [re.compile(r"kokoro", re.I)],
+        "recipe": "kokoro",
+        "modelType": "tts",
+        "label": "Kokoro",
+    },
+    {
+        "pipelineTags": ["sentence-similarity", "feature-extraction"],
+        "hfTags": ["sentence-transformers", "nomic-embed", "embedding", "embeddings"],
+        "namePatterns": [re.compile(r"embed", re.I), re.compile(r"nomic", re.I)],
+        "recipe": "llamacpp",
+        "modelType": "embedding",
+        "label": "llama.cpp",
+    },
+    {
+        "pipelineTags": ["text-ranking"],
+        "hfTags": ["reranker", "cross-encoder", "reranking"],
+        "namePatterns": [re.compile(r"rerank", re.I)],
+        "recipe": "llamacpp",
+        "modelType": "reranking",
+        "label": "llama.cpp",
+    },
+]
+
+LLM_PIPELINE_TAGS = ["text-generation", "conversational", "text2text-generation", "image-text-to-text"]
+
+RECIPE_FORMATS = {
+    "llamacpp": ["gguf"],
+    "sd-cpp": ["safetensors"],
+    "whispercpp": ["bin"],
+    "kokoro": ["onnx"],
+    "flm": ["flm"],
+    "ryzenai-llm": ["onnx"],
+}
+
+# Rate limit tracking
+_rate_limit_remaining = None
+_rate_limit_reset = None
+
+
+# ---------------------------------------------------------------------------
+# Detection logic — mirrors detectBackend() + classifyModel() from ModelManager.tsx
+# ---------------------------------------------------------------------------
+
+def scan_file_extensions(siblings: list[dict]) -> dict:
+    """Scan siblings file list for format-relevant extensions."""
+    files = [s.get("rfilename", "").lower() for s in siblings]
+    return {
+        "gguf": [f for f in files if f.endswith(".gguf")],
+        "onnx": [f for f in files if f.endswith(".onnx") or f.endswith(".onnx_data")],
+        "safetensors": [f for f in files if f.endswith(".safetensors")],
+        "bin": [f for f in files if f.endswith(".bin")],
+        "flm": [f for f in files if f.endswith(".flm")],
+    }
+
+
+def check_name_patterns(model_id: str) -> list[dict]:
+    """Check model ID against TASK_RECIPE_MAP name patterns. Returns matching mappings."""
+    id_lower = model_id.lower()
+    matches = []
+    for mapping in TASK_RECIPE_MAP:
+        for pat in mapping["namePatterns"]:
+            if pat.search(id_lower):
+                matches.append({"recipe": mapping["recipe"], "label": mapping["label"],
+                                "modelType": mapping["modelType"], "pattern": pat.pattern})
+                break
+    return matches
+
+
+def has_required_format(recipe: str, tags: list[str], ext_scan: dict) -> bool:
+    """Check format gate — tags first, file extension fallback."""
+    formats = RECIPE_FORMATS.get(recipe)
+    if not formats:
+        return True
+    for fmt in formats:
+        if fmt in tags:
+            return True
+        if ext_scan.get(fmt):
+            return True
+    return False
+
+
+def classify_model(model_id: str, pipeline_tag: str | None, tags: list[str], ext_scan: dict) -> dict:
+    """
+    Python port of classifyModel() from recipeCompatibility.ts.
+    Returns {recipe, modelType, label, level, reason, source}.
+    source indicates which pass matched: 'pipeline_tag', 'hf_tags', 'name_pattern',
+    'format_fallback', or 'none'.
+    """
+    id_lower = model_id.lower()
+    tag_set = set(tags)
+
+    has_gguf = bool(ext_scan["gguf"]) or "gguf" in tag_set
+    has_onnx = bool(ext_scan["onnx"]) or "onnx" in tag_set
+    has_flm = bool(ext_scan["flm"]) or "flm" in tag_set
+    has_bin = bool(ext_scan["bin"])
+
+    # --- Pass 1: pipeline_tag ---
+    if pipeline_tag:
+        for m in TASK_RECIPE_MAP:
+            if pipeline_tag in m["pipelineTags"] and has_required_format(m["recipe"], tags, ext_scan):
+                return {"recipe": m["recipe"], "modelType": m["modelType"], "label": m["label"],
+                        "level": "supported", "reason": f'pipeline_tag "{pipeline_tag}" → {m["label"]}',
+                        "source": "pipeline_tag"}
+
+        if pipeline_tag in LLM_PIPELINE_TAGS:
+            if has_gguf:
+                return {"recipe": "llamacpp", "modelType": "llm", "label": "llama.cpp",
+                        "level": "supported", "reason": f'pipeline_tag "{pipeline_tag}" + GGUF',
+                        "source": "pipeline_tag"}
+            if has_onnx:
+                return {"recipe": "ryzenai-llm", "modelType": "llm", "label": "RyzenAI",
+                        "level": "likely", "reason": f'pipeline_tag "{pipeline_tag}" + ONNX',
+                        "source": "pipeline_tag"}
+
+        if pipeline_tag not in LLM_PIPELINE_TAGS:
+            return {"recipe": "", "modelType": "unknown", "label": pipeline_tag,
+                    "level": "incompatible", "reason": f'pipeline_tag "{pipeline_tag}" unsupported',
+                    "source": "pipeline_tag"}
+
+    # --- Pass 2: HF tags ---
+    for m in TASK_RECIPE_MAP:
+        if any(t in tag_set for t in m["hfTags"]) and has_required_format(m["recipe"], tags, ext_scan):
+            matched = [t for t in m["hfTags"] if t in tag_set]
+            return {"recipe": m["recipe"], "modelType": m["modelType"], "label": m["label"],
+                    "level": "likely", "reason": f"hf_tags [{', '.join(matched)}] → {m['label']}",
+                    "source": "hf_tags"}
+
+    # --- Pass 3: Name patterns ---
+    for m in TASK_RECIPE_MAP:
+        for pat in m["namePatterns"]:
+            if pat.search(id_lower) and has_required_format(m["recipe"], tags, ext_scan):
+                return {"recipe": m["recipe"], "modelType": m["modelType"], "label": m["label"],
+                        "level": "experimental", "reason": f"name /{pat.pattern}/ → {m['label']}",
+                        "source": "name_pattern"}
+
+    # --- Pass 4: Format-only fallbacks ---
+    if has_flm or id_lower.startswith("fastflowlm/") or "flm" in tag_set:
+        return {"recipe": "flm", "modelType": "llm", "label": "FastFlowLM",
+                "level": "likely", "reason": "FLM files or tags",
+                "source": "format_fallback"}
+
+    if has_onnx:
+        label = "RyzenAI"
+        if "npu" in tag_set or "-ryzenai-npu" in id_lower:
+            label = "RyzenAI NPU"
+        elif "hybrid" in tag_set or "-ryzenai-hybrid" in id_lower:
+            label = "RyzenAI Hybrid"
+        elif "igpu" in tag_set:
+            label = "RyzenAI iGPU"
+        return {"recipe": "ryzenai-llm", "modelType": "llm", "label": label,
+                "level": "likely", "reason": "ONNX files detected",
+                "source": "format_fallback"}
+
+    if has_gguf:
+        return {"recipe": "llamacpp", "modelType": "llm", "label": "llama.cpp",
+                "level": "experimental", "reason": "GGUF present, no task metadata",
+                "source": "format_fallback"}
+
+    return {"recipe": "", "modelType": "unknown", "label": "Unknown",
+            "level": "incompatible", "reason": "No compatible format or metadata",
+            "source": "none"}
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def get_user_models_path(override: str = None) -> Path:
+    if override:
+        return Path(override)
+    cache_dir = os.environ.get("LEMONADE_CACHE_DIR")
+    if cache_dir:
+        return Path(cache_dir) / "user_models.json"
+    return Path.home() / ".cache" / "lemonade" / "user_models.json"
+
+
+def strip_quant_specifier(checkpoint: str) -> str:
+    """Strip quant/file specifiers after ':' (e.g. 'org/model-GGUF:Q4_K_M' → 'org/model-GGUF')."""
+    return checkpoint.split(":")[0] if ":" in checkpoint else checkpoint
+
+
+def classify_tags(meta: dict) -> dict:
+    """Classify a model's tags into categories."""
+    pipeline_tag = meta.get("pipeline_tag")
+    tags = meta.get("tags", [])
+    tag_set = set(tags)
+
+    formats = sorted(FORMAT_TAGS & tag_set)
+    tasks = sorted(TASK_TAGS & tag_set)
+    libraries = sorted(LIBRARY_TAGS & tag_set)
+    known = FORMAT_TAGS | TASK_TAGS | LIBRARY_TAGS
+    other = [t for t in tags if t not in known and not t.startswith(PREFIXED_CATEGORIES)]
+
+    return {
+        "pipeline_tag": pipeline_tag,
+        "formats": formats,
+        "tasks": tasks,
+        "libraries": libraries,
+        "other": other,
+    }
+
+
+def _hf_get(url: str) -> requests.Response | None:
+    """GET with rate limiting and HF_TOKEN."""
+    global _rate_limit_remaining, _rate_limit_reset
+
+    if _rate_limit_remaining is not None and _rate_limit_remaining <= 1:
+        wait = (_rate_limit_reset or 60) + 1
+        print(f"  [rate limited — waiting {wait}s]", flush=True)
+        time.sleep(wait)
+
+    headers = {}
+    hf_token = os.environ.get("HF_TOKEN")
+    if hf_token:
+        headers["Authorization"] = f"Bearer {hf_token}"
+
+    try:
+        r = requests.get(url, headers=headers, timeout=15)
+
+        if "X-RateLimit-Remaining" in r.headers:
+            _rate_limit_remaining = int(r.headers["X-RateLimit-Remaining"])
+        if "X-RateLimit-Reset" in r.headers:
+            try:
+                reset_time = int(r.headers["X-RateLimit-Reset"])
+                _rate_limit_reset = max(0, reset_time - int(time.time()))
+            except ValueError:
+                pass
+
+        if r.status_code == 429:
+            retry_after = int(r.headers.get("Retry-After", 60))
+            print(f"  [429 rate limited — waiting {retry_after}s]", flush=True)
+            time.sleep(retry_after + 1)
+            return _hf_get(url)
+
+        return r
+    except requests.RequestException as e:
+        print(f"  ERROR: {e}")
+        return None
+
+
+def fetch_model_meta(model_id: str) -> dict | None:
+    api_id = strip_quant_specifier(model_id)
+    r = _hf_get(f"https://huggingface.co/api/models/{api_id}")
+    if r is None:
+        return None
+    if r.status_code == 404:
+        print(f"  NOT FOUND: {model_id}")
+        return None
+    try:
+        r.raise_for_status()
+    except requests.HTTPError as e:
+        print(f"  ERROR fetching {model_id}: {e}")
+        return None
+    return r.json()
+
+
+def fetch_siblings(model_id: str) -> list[dict] | None:
+    """Fetch the file tree for a model (siblings list from model metadata)."""
+    meta = fetch_model_meta(model_id)
+    if not meta:
+        return None
+    return meta.get("siblings", [])
+
+
+# ---------------------------------------------------------------------------
+# Output
+# ---------------------------------------------------------------------------
+
+def print_model(model_id: str, meta: dict, recipe: str = None, detect: bool = False):
+    c = classify_tags(meta)
+
+    prefix = f"[{recipe}] " if recipe else ""
+    print(f"\n{prefix}{model_id}")
+    print(f"  pipeline_tag: {c['pipeline_tag'] or '(none)'}")
+    print(f"  formats:      {', '.join(c['formats']) if c['formats'] else '(none)'}")
+    print(f"  tasks:        {', '.join(c['tasks']) if c['tasks'] else '(none)'}")
+    print(f"  libraries:    {', '.join(c['libraries']) if c['libraries'] else '(none)'}")
+    if c["other"]:
+        print(f"  other:        {', '.join(c['other'])}")
+
+    if detect:
+        siblings = meta.get("siblings", [])
+        ext_scan = scan_file_extensions(siblings)
+        ext_summary = {fmt: len(files) for fmt, files in ext_scan.items() if files}
+        print(f"  extensions:   {ext_summary if ext_summary else '(none)'}")
+
+        name_matches = check_name_patterns(model_id)
+        if name_matches:
+            parts = [f"{m['recipe']} (/{m['pattern']}/)" for m in name_matches]
+            print(f"  name match:   {', '.join(parts)}")
+        else:
+            print(f"  name match:   (none)")
+
+        tags = meta.get("tags", [])
+        result = classify_model(model_id, meta.get("pipeline_tag"), tags, ext_scan)
+        source = result["source"]
+        level = result["level"]
+        print(f"  classify:     {result['recipe'] or '(none)'} / {result['modelType']}"
+              f" [{level}] via {source} — {result['reason']}")
+
+
+def print_summary(summary: dict):
+    """Print inclusive tag summary per recipe."""
+    print(f"\n{'=' * 60}")
+    print("TAG SUMMARY BY RECIPE")
+    print(f"{'=' * 60}")
+
+    for recipe in sorted(summary.keys()):
+        data = summary[recipe]
+        count = data["count"]
+        print(f"\n[{recipe}] ({count} model{'s' if count != 1 else ''})")
+
+        for category in ("pipeline_tags", "formats", "tasks", "libraries", "other"):
+            tags = sorted(data[category])
+            if not tags:
+                tags = ["(none)"]
+            label = category.replace("_", " ").rjust(14)
+            if data[f"{category}_none_count"] > 0 and tags[0] != "(none)":
+                tags.insert(0, f"(none)×{data[f'{category}_none_count']}")
+            print(f"  {label}: {', '.join(tags)}")
+
+
+def load_checkpoints(registry: dict, recipe_filter: set | None) -> dict:
+    """Load and deduplicate checkpoints from a model registry."""
+    seen = {}
+    for _name, entry in registry.items():
+        recipe = entry.get("recipe", "?")
+        if recipe_filter and recipe not in recipe_filter:
+            continue
+        cp = entry.get("checkpoint", "")
+        cp_base = strip_quant_specifier(cp)
+        if cp_base and cp_base not in seen:
+            seen[cp_base] = recipe
+    return seen
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Show HF model pipeline_tag and tags",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+    )
+    parser.add_argument("models", nargs="*", help="HuggingFace model IDs (org/name)")
+    parser.add_argument("--all", action="store_true", help="All recipes from server_models.json")
+    parser.add_argument("--user", action="store_true", help="Include user_models.json")
+    parser.add_argument("--user-models-path", type=str, default=None,
+                        help="Override user_models.json path (default: LEMONADE_CACHE_DIR or ~/.cache/lemonade/)")
+    parser.add_argument("--summary", action="store_true", help="Print inclusive tag summary per recipe")
+    parser.add_argument("--detect", action="store_true",
+                        help="Show file extension scan, name pattern matches, and classifyModel result")
+
+    # Per-recipe flags
+    for recipe in ALL_RECIPES:
+        parser.add_argument(f"--{recipe}", action="store_true", help=f"Only {recipe} models from server_models.json")
+
+    args = parser.parse_args()
+
+    # Determine which recipes are selected
+    selected_recipes = {r for r in ALL_RECIPES if getattr(args, r.replace("-", "_"), False)}
+    use_registry = args.all or bool(selected_recipes)
+    recipe_filter = selected_recipes if selected_recipes else None  # None = all
+
+    if not args.models and not use_registry and not args.user:
+        parser.print_help()
+        sys.exit(1)
+
+    # Collect results for summary mode
+    summary = defaultdict(lambda: {
+        "count": 0,
+        "pipeline_tags": set(), "pipeline_tags_none_count": 0,
+        "formats": set(), "formats_none_count": 0,
+        "tasks": set(), "tasks_none_count": 0,
+        "libraries": set(), "libraries_none_count": 0,
+        "other": set(), "other_none_count": 0,
+    })
+
+    def process_model(model_id: str, recipe: str = None):
+        meta = fetch_model_meta(model_id)
+        if not meta:
+            return
+        print_model(model_id, meta, recipe=recipe, detect=args.detect)
+
+        if args.summary and recipe:
+            c = classify_tags(meta)
+            s = summary[recipe]
+            s["count"] += 1
+            if c["pipeline_tag"]:
+                s["pipeline_tags"].add(c["pipeline_tag"])
+            else:
+                s["pipeline_tags_none_count"] += 1
+            for cat in ("formats", "tasks", "libraries", "other"):
+                if c[cat]:
+                    s[cat].update(c[cat])
+                else:
+                    s[f"{cat}_none_count"] += 1
+
+    # Named models first
+    for model_id in args.models:
+        process_model(model_id)
+
+    # server_models.json checkpoints
+    if use_registry:
+        with open(MODELS_JSON) as f:
+            registry = json.load(f)
+
+        checkpoints = load_checkpoints(registry, recipe_filter)
+
+        label = "server_models.json"
+        if recipe_filter:
+            label += f" [{', '.join(sorted(recipe_filter))}]"
+        print(f"\n{'=' * 60}")
+        print(f"{label}: {len(checkpoints)} unique checkpoints")
+        print(f"{'=' * 60}")
+
+        for checkpoint, recipe in sorted(checkpoints.items()):
+            process_model(checkpoint, recipe=recipe)
+
+    # user_models.json
+    if args.user:
+        user_path = get_user_models_path(args.user_models_path)
+        if user_path.exists():
+            with open(user_path) as f:
+                user_registry = json.load(f)
+
+            checkpoints = load_checkpoints(user_registry, recipe_filter)
+
+            print(f"\n{'=' * 60}")
+            print(f"user_models.json: {len(checkpoints)} unique checkpoints")
+            print(f"{'=' * 60}")
+
+            for checkpoint, recipe in sorted(checkpoints.items()):
+                process_model(checkpoint, recipe=recipe)
+        else:
+            print(f"\n  user_models.json not found at {user_path}")
+
+    # Print summary if requested
+    if args.summary and summary:
+        print_summary(summary)
+
+
+if __name__ == "__main__":
+    main()