diff --git a/docs/replica_backend_base.md b/docs/replica_backend_base.md index 82674674..87f8cfbb 100644 --- a/docs/replica_backend_base.md +++ b/docs/replica_backend_base.md @@ -274,7 +274,7 @@ Protected method that stores fetched data and updates all cache metadata (timest ```python # ✅ GitHubBackend pattern (inside _load_converted_from_disk) -with open(file_path) as f: +with open(file_path, encoding="utf-8") as f: data: dict[str, Any] = ujson.load(f) self._store_in_cache(category, data) @@ -615,7 +615,7 @@ When errors occur during fetch/read operations, invalidate the cache to force re ```python try: - with open(file_path) as f: + with open(file_path, encoding="utf-8") as f: data = json.load(f) self._store_in_cache(category, data) return data @@ -651,7 +651,7 @@ def fetch_category(self, category, *, force_refresh=False): if force_refresh or self.should_fetch_data(category): file_path = self._get_file_path(category) try: - with open(file_path) as f: + with open(file_path, encoding="utf-8") as f: data = json.load(f) self._store_in_cache(category, data) return data @@ -806,7 +806,7 @@ def fetch_category(self, category, *, force_refresh=False): if force_refresh or self.should_fetch_data(category): file_path = self._get_file_path(category) try: - with open(file_path) as f: + with open(file_path, encoding="utf-8") as f: data = json.load(f) self._store_in_cache(category, data) return data diff --git a/schemas/stable_diffusion.schema.json b/schemas/stable_diffusion.schema.json index 7c94ca6d..ce717dc2 100644 --- a/schemas/stable_diffusion.schema.json +++ b/schemas/stable_diffusion.schema.json @@ -199,7 +199,9 @@ "stable_cascade", "flux_1", "flux_schnell", - "flux_dev" + "flux_dev", + "qwen_image", + "z_image_turbo" ], "title": "KNOWN_IMAGE_GENERATION_BASELINE", "type": "string" diff --git a/scripts/legacy_text/convert.py b/scripts/legacy_text/convert.py index 702e63a9..c6887a62 100644 --- a/scripts/legacy_text/convert.py +++ b/scripts/legacy_text/convert.py @@ -23,19 +23,19 @@ # Keys and values from defaults.json are always present in the model record. # Values from defaults.json are used to fill in missing fields in the CSV file. -with open("defaults.json") as defaults_file: +with open("defaults.json", encoding="utf-8") as defaults_file: defaults = json.load(defaults_file) # Keys from generation_params.json are used to validate the 'settings' field. # Values from generation_params.json are not used. -with open("generation_params.json") as params_file: +with open("generation_params.json", encoding="utf-8") as params_file: params = json.load(params_file) data = {} -with open(input_file, newline="") as csvfile: +with open(input_file, newline="", encoding="utf-8") as csvfile: reader = csv.DictReader(csvfile) row: dict[str, Any] for row in reader: @@ -97,20 +97,20 @@ # If tests are ongoing, we don't want to overwrite the db.json file # Instead, we'll write to a new file and make sure the two files are the same # by comparing them as strings - with open("db_test.json", "w") as f: + with open("db_test.json", "w", encoding="utf-8") as f: json.dump(data, f, indent=4) f.write("\n") - with open(output_file) as f: + with open(output_file, encoding="utf-8") as f: old_data = f.read() - with open("db_test.json") as f: + with open("db_test.json", encoding="utf-8") as f: new_data = f.read() if old_data != new_data: print("db.json and db_test.json are different. Did you forget to run `convert.py`?") exit(1) else: - with open(output_file, "w") as f: + with open(output_file, "w", encoding="utf-8") as f: json.dump(data, f, indent=4) f.write("\n") diff --git a/scripts/legacy_text/reverse_convert.py b/scripts/legacy_text/reverse_convert.py index 8a0dad49..e9326e81 100644 --- a/scripts/legacy_text/reverse_convert.py +++ b/scripts/legacy_text/reverse_convert.py @@ -36,9 +36,9 @@ output_file = "models.csv" # Load defaults and generation params for validation -with open("defaults.json") as f: +with open("defaults.json", encoding="utf-8") as f: defaults = json.load(f) -with open("generation_params.json") as f: +with open("generation_params.json", encoding="utf-8") as f: params = json.load(f) @@ -226,7 +226,7 @@ def has_empty_config(record: dict[str, Any]) -> bool: # Read db.json try: - with open(input_file) as f: + with open(input_file, encoding="utf-8") as f: data: dict[str, dict[str, Any]] = json.load(f) except FileNotFoundError: print(f"Error: {input_file} not found") @@ -341,17 +341,17 @@ def has_empty_config(record: dict[str, Any]) -> bool: "display_name", ] -with open(actual_output, "w", newline="") as csvfile: +with open(actual_output, "w", newline="", encoding="utf-8") as csvfile: writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() writer.writerows(csv_rows) if TESTS_ONGOING and output_file_existed_before: # Compare the test output with the existing file - with open(output_file) as f: + with open(output_file, encoding="utf-8") as f: old_data = f.read() - with open(actual_output) as f: + with open(actual_output, encoding="utf-8") as f: new_data = f.read() if old_data != new_data: diff --git a/src/horde_model_reference/backends/github_backend.py b/src/horde_model_reference/backends/github_backend.py index 134fe395..0d39451f 100644 --- a/src/horde_model_reference/backends/github_backend.py +++ b/src/horde_model_reference/backends/github_backend.py @@ -539,7 +539,7 @@ def _load_converted_from_disk( try: # All v2 files are JSON format (including text_generation.json) - with open(file_path) as f: + with open(file_path, encoding="utf-8") as f: data = cast(dict[str, Any], ujson.load(f)) self._store_in_cache(category, data) diff --git a/src/horde_model_reference/integrations/horde_api_models.py b/src/horde_model_reference/integrations/horde_api_models.py index db8a55d7..f0aeec8f 100644 --- a/src/horde_model_reference/integrations/horde_api_models.py +++ b/src/horde_model_reference/integrations/horde_api_models.py @@ -236,6 +236,41 @@ class _StatsLookup(BaseModel): total: dict[str, int] = Field(default_factory=dict) +def _strip_quantization_suffix(model_name: str) -> str: + """Strip quantization suffix from a model name, preserving size. + + This is different from get_base_model_name which strips BOTH size and quantization. + This function only strips quantization, keeping the size suffix. + + Args: + model_name: Model name potentially with quantization suffix. + + Returns: + Model name without quantization suffix, but with size preserved. + + Example: + "Lumimaid-v0.2-8B-Q8_0" -> "Lumimaid-v0.2-8B" + "Lumimaid-v0.2-8B" -> "Lumimaid-v0.2-8B" + "koboldcpp/Lumimaid-v0.2-8B-Q4_K_M" -> "koboldcpp/Lumimaid-v0.2-8B" + """ + import re + + # Quantization patterns to strip (same as text_model_parser.QUANT_PATTERNS but as suffix) + quant_suffix_patterns = [ + r"[-_](Q[2-8]_K(?:_[SMLH])?)$", # -Q4_K_M, -Q5_K_S + r"[-_](Q[2-8]_[01])$", # -Q4_0, -Q5_0, -Q8_0 + r"[-_](Q[2-8])$", # -Q4, -Q8 + r"[-_](GGUF|GGML|GPTQ|AWQ|EXL2)$", + r"[-_](fp16|fp32|int8|int4)$", + ] + + result = model_name + for pattern in quant_suffix_patterns: + result = re.sub(pattern, "", result, flags=re.IGNORECASE) + + return result + + def _build_base_name_index(model_names: list[str]) -> dict[str, list[str]]: """Build an index mapping base model names to all matching model names. @@ -283,14 +318,85 @@ def _build_base_name_index(model_names: list[str]) -> dict[str, list[str]]: return base_name_index +def _build_model_with_size_index(model_names: list[str]) -> dict[str, list[str]]: + """Build an index mapping model names (with size, without quant) to all matching names. + + This enables aggregating stats across quantization variants only (e.g., Q4_K_M, Q8_0) + while keeping different sizes separate. + + Unlike _build_base_name_index which groups ALL variants (including different sizes), + this index only groups quantization variants of the SAME sized model. + + The key normalizes: + - Backend prefix (stripped for matching, but preserved in values) + - Org prefix (stripped for matching) + - Quantization suffix (stripped for matching) + + But preserves: + - Size suffix (8B, 12B, etc.) + + Args: + model_names: List of model names from API stats (may include backend prefixes + and quantization suffixes). + + Returns: + Dictionary mapping normalized model names (backend/model-size) to lists of + original model names (lowercase) that match that model. + + Example: + Input: ["koboldcpp/Lumimaid-v0.2-8B", "koboldcpp/Lumimaid-v0.2-8B-Q8_0", + "koboldcpp/Lumimaid-v0.2-12B", "aphrodite/NeverSleep/Lumimaid-v0.2-8B"] + Output: { + "koboldcpp/lumimaid-v0.2-8b": [ + "koboldcpp/lumimaid-v0.2-8b", + "koboldcpp/lumimaid-v0.2-8b-q8_0" + ], + "koboldcpp/lumimaid-v0.2-12b": ["koboldcpp/lumimaid-v0.2-12b"], + "aphrodite/lumimaid-v0.2-8b": ["aphrodite/neversleep/lumimaid-v0.2-8b"] + } + """ + model_with_size_index: dict[str, list[str]] = {} + + for model_name in model_names: + model_name_lower = model_name.lower() + + # Extract backend prefix if present + backend_prefix = "" + stripped = model_name_lower + if stripped.startswith("aphrodite/"): + backend_prefix = "aphrodite/" + stripped = stripped[len("aphrodite/") :] + elif stripped.startswith("koboldcpp/"): + backend_prefix = "koboldcpp/" + stripped = stripped[len("koboldcpp/") :] + + # Strip org prefix (e.g., "neversleep/lumimaid-v0.2-8b" -> "lumimaid-v0.2-8b") + if "/" in stripped: + stripped = stripped.split("/")[-1] + + # Strip quantization suffix + stripped_no_quant = _strip_quantization_suffix(stripped) + + # Build key: backend_prefix + model_name (no org, no quant, but with size) + key = f"{backend_prefix}{stripped_no_quant}" + + if key not in model_with_size_index: + model_with_size_index[key] = [] + if model_name_lower not in model_with_size_index[key]: + model_with_size_index[key].append(model_name_lower) + + return model_with_size_index + + class IndexedHordeModelStats(RootModel[_StatsLookup]): """Indexed model stats for O(1) lookups by model name. This wraps the stats response and provides case-insensitive dictionary access. Time complexity: O(1) for lookups instead of O(n) for dict iteration. - Also builds a base-name index for aggregating stats across quantization variants - and different backend prefixes. + Two indexes are built: + - _base_name_index: Groups ALL variants (including different sizes) for group-level aggregation + - _model_with_size_index: Groups only quantization variants for per-model stats Usage: indexed = IndexedHordeModelStats(stats_response) @@ -300,6 +406,7 @@ class IndexedHordeModelStats(RootModel[_StatsLookup]): root: _StatsLookup _base_name_index: dict[str, list[str]] = {} + _model_with_size_index: dict[str, list[str]] = {} def __init__(self, stats_response: HordeModelStatsResponse) -> None: """Build indexed lookups from stats response. @@ -315,11 +422,13 @@ def __init__(self, stats_response: HordeModelStatsResponse) -> None: ) super().__init__(root=lookups) - # Build base name index from all unique model names across all time periods + # Build indexes from all unique model names across all time periods all_model_names = ( set(stats_response.day.keys()) | set(stats_response.month.keys()) | set(stats_response.total.keys()) ) - self._base_name_index = _build_base_name_index(list(all_model_names)) + model_names_list = list(all_model_names) + self._base_name_index = _build_base_name_index(model_names_list) + self._model_with_size_index = _build_model_with_size_index(model_names_list) def get_day(self, model_name: str) -> int | None: """Get day count for a model (case-insensitive). O(1).""" @@ -371,7 +480,9 @@ def get_aggregated_stats(self, canonical_name: str) -> tuple[int, int, int]: # Then, add all model names that share the same base model name # This catches quantization variants and org-prefixed variants - base_name = get_base_model_name(canonical_name).lower() + # Strip org prefix from canonical name if present (e.g., "NeverSleep/Lumimaid-v0.2" -> "Lumimaid-v0.2") + canonical_without_org = canonical_name.split("/")[-1] if "/" in canonical_name else canonical_name + base_name = get_base_model_name(canonical_without_org).lower() if base_name in self._base_name_index: for api_model_name in self._base_name_index[base_name]: names_to_aggregate.add(api_model_name) @@ -391,37 +502,59 @@ def get_aggregated_stats(self, canonical_name: str) -> tuple[int, int, int]: def get_stats_with_variations( self, canonical_name: str ) -> tuple[tuple[int, int, int], dict[str, tuple[int, int, int]]]: - """Get aggregated stats and individual backend variations. + """Get stats for a specific model broken down by backend. + + Unlike get_aggregated_stats which aggregates across all models with the same + base name (e.g., all Lumimaid-v0.2 sizes), this method returns stats only for + the exact model specified (including its quantization variants), broken down + by backend prefix. - This method returns both the aggregated stats (same as get_aggregated_stats) - and a dictionary of individual backend stats keyed by backend name. - Now includes quantization variants and org-prefixed variants via base name matching. + This enables showing per-model stats in the UI when displaying grouped models, + where each model variant (8B, 12B, etc.) shows its own stats by backend. Args: canonical_name: The canonical model name from the model reference. Returns: Tuple of (aggregated_stats, variations_dict) where: - - aggregated_stats: (day_total, month_total, total_total) aggregated + - aggregated_stats: (day_total, month_total, total_total) for this exact model - variations_dict: Dict of backend_name -> (day, month, total) Keys are 'canonical', 'aphrodite', 'koboldcpp' depending on what's found """ - from horde_model_reference.analytics.text_model_parser import get_base_model_name from horde_model_reference.meta_consts import get_model_name_variants - # Collect all model names to aggregate (use set to avoid double-counting) + # Collect all model names that are variants of this specific model + # Use _model_with_size_index to include quantization variants, but NOT size variants names_to_aggregate: set[str] = set() - # First, add exact variants from get_model_name_variants + # Get exact backend-prefixed variants variants = get_model_name_variants(canonical_name) for variant in variants: - names_to_aggregate.add(variant.lower()) - - # Then, add all model names that share the same base model name - base_name = get_base_model_name(canonical_name).lower() - if base_name in self._base_name_index: - for api_model_name in self._base_name_index[base_name]: - names_to_aggregate.add(api_model_name) + variant_lower = variant.lower() + names_to_aggregate.add(variant_lower) + + # Build the normalized key to look up in _model_with_size_index + # The key format is: [backend_prefix/]model_name (no org, no quant) + backend_prefix = "" + stripped = variant_lower + if stripped.startswith("aphrodite/"): + backend_prefix = "aphrodite/" + stripped = stripped[len("aphrodite/") :] + elif stripped.startswith("koboldcpp/"): + backend_prefix = "koboldcpp/" + stripped = stripped[len("koboldcpp/") :] + + # Strip org prefix if present + if "/" in stripped: + stripped = stripped.split("/")[-1] + + # Strip quantization suffix and build key + stripped_no_quant = _strip_quantization_suffix(stripped) + key = f"{backend_prefix}{stripped_no_quant}" + + if key in self._model_with_size_index: + for api_model_name in self._model_with_size_index[key]: + names_to_aggregate.add(api_model_name) # Track stats by backend for variations dict backend_stats: dict[str, tuple[int, int, int]] = { diff --git a/src/horde_model_reference/legacy/convert_all_legacy_dbs.py b/src/horde_model_reference/legacy/convert_all_legacy_dbs.py index f0957921..dda4b4d1 100644 --- a/src/horde_model_reference/legacy/convert_all_legacy_dbs.py +++ b/src/horde_model_reference/legacy/convert_all_legacy_dbs.py @@ -10,6 +10,7 @@ LegacyStableDiffusionConverter, LegacyTextGenerationConverter, ) +from horde_model_reference.meta_consts import no_legacy_format_available_categories from horde_model_reference.path_consts import normalize_legacy_base_path @@ -140,6 +141,10 @@ def convert_legacy_database_by_category( Returns: True if the conversion succeeded, False otherwise. """ + if model_category in no_legacy_format_available_categories: + logger.info(f"Skipping legacy database conversion for category: {model_category} (no legacy format available)") + return True + normalized_legacy_path = normalize_legacy_base_path(legacy_path) normalized_target_path = normalize_legacy_base_path(target_path) diff --git a/src/horde_model_reference/legacy/validate_sd.py b/src/horde_model_reference/legacy/validate_sd.py index 746e3887..9a8fe56e 100644 --- a/src/horde_model_reference/legacy/validate_sd.py +++ b/src/horde_model_reference/legacy/validate_sd.py @@ -26,7 +26,7 @@ def validate_legacy_stable_diffusion_db( bool: True if the validation passes, False otherwise. """ raw_json_sd_db: str - with open(sd_db) as sd_db_file: + with open(sd_db, encoding="utf-8") as sd_db_file: raw_json_sd_db = sd_db_file.read() try: loaded_json_sd_db = json.loads(raw_json_sd_db) diff --git a/src/horde_model_reference/meta_consts.py b/src/horde_model_reference/meta_consts.py index ad092fa1..8e9a8dc8 100644 --- a/src/horde_model_reference/meta_consts.py +++ b/src/horde_model_reference/meta_consts.py @@ -100,6 +100,13 @@ class MODEL_REFERENCE_CATEGORY(StrEnum): no_legacy_format_available_categories = [ MODEL_REFERENCE_CATEGORY.video_generation, MODEL_REFERENCE_CATEGORY.audio_generation, + MODEL_REFERENCE_CATEGORY.lora, + MODEL_REFERENCE_CATEGORY.ti, +] + +categories_managed_elsewhere = [ + MODEL_REFERENCE_CATEGORY.lora, + MODEL_REFERENCE_CATEGORY.ti, ] @@ -172,6 +179,8 @@ class KNOWN_IMAGE_GENERATION_BASELINE(StrEnum): flux_1 = auto() # TODO: Extract flux and create "IMAGE_GENERATION_BASELINE_CATEGORY" due to name inconsistency flux_schnell = auto() # FIXME flux_dev = auto() # FIXME + qwen_image = auto() + z_image_turbo = auto() STABLE_DIFFUSION_BASELINE_CATEGORY = KNOWN_IMAGE_GENERATION_BASELINE @@ -213,6 +222,10 @@ class KNOWN_IMAGE_GENERATION_BASELINE(StrEnum): "stable cascade", ] +_alternative_qwen_image_baseline_names = ["qwen_image", "qwen image", "qwen-image", "qwen"] + +_alternative_z_image_turbo_baseline_names = ["z_image_turbo", "z image turbo", "zimage-turbo", "zimage"] + def matching_baseline_exists( baseline: str, @@ -238,6 +251,10 @@ def matching_baseline_exists( return baseline in _alternative_flux_dev_baseline_names if known_image_generation_baseline == KNOWN_IMAGE_GENERATION_BASELINE.stable_cascade: return baseline in _alternative_stable_cascade_baseline_names + if known_image_generation_baseline == KNOWN_IMAGE_GENERATION_BASELINE.qwen_image: + return baseline in _alternative_qwen_image_baseline_names + if known_image_generation_baseline == KNOWN_IMAGE_GENERATION_BASELINE.z_image_turbo: + return baseline in _alternative_z_image_turbo_baseline_names return baseline == known_image_generation_baseline.name @@ -300,6 +317,10 @@ def matching_baseline_exists( KNOWN_IMAGE_GENERATION_BASELINE.stable_diffusion_xl: 1024, KNOWN_IMAGE_GENERATION_BASELINE.stable_cascade: 1024, KNOWN_IMAGE_GENERATION_BASELINE.flux_1: 1024, + KNOWN_IMAGE_GENERATION_BASELINE.flux_schnell: 1024, + KNOWN_IMAGE_GENERATION_BASELINE.flux_dev: 1024, + KNOWN_IMAGE_GENERATION_BASELINE.qwen_image: 1024, + KNOWN_IMAGE_GENERATION_BASELINE.z_image_turbo: 1024, } """The single-side preferred resolution for each known stable diffusion baseline.""" diff --git a/src/horde_model_reference/model_reference_manager.py b/src/horde_model_reference/model_reference_manager.py index 43ecddac..c0ec6fcf 100644 --- a/src/horde_model_reference/model_reference_manager.py +++ b/src/horde_model_reference/model_reference_manager.py @@ -18,7 +18,7 @@ ModelReferenceBackend, RedisBackend, ) -from horde_model_reference.meta_consts import MODEL_REFERENCE_CATEGORY +from horde_model_reference.meta_consts import MODEL_REFERENCE_CATEGORY, categories_managed_elsewhere from horde_model_reference.model_reference_records import ( MODEL_RECORD_TYPE_LOOKUP, AudioGenerationModelRecord, @@ -479,6 +479,10 @@ def _file_json_dict_to_model_reference( logger.warning(f"File dict json is None for {category}.") return None + if category in categories_managed_elsewhere: + logger.info(f"Skipping conversion for category: {category} (managed elsewhere)") + return None + try: record_type = MODEL_RECORD_TYPE_LOOKUP.get(category, GenericModelRecord) model_reference: dict[str, GenericModelRecord] = {} diff --git a/src/horde_model_reference/model_reference_metadata.py b/src/horde_model_reference/model_reference_metadata.py index 3fdf1063..c255de3e 100644 --- a/src/horde_model_reference/model_reference_metadata.py +++ b/src/horde_model_reference/model_reference_metadata.py @@ -374,7 +374,7 @@ def _read_metadata_file(self, file_path: Path) -> CategoryMetadata | None: return None try: - with open(file_path) as f: + with open(file_path, encoding="utf-8") as f: data = json.load(f) return CategoryMetadata(**data) except Exception as e: @@ -395,7 +395,7 @@ def _write_metadata_file(self, file_path: Path, metadata: CategoryMetadata) -> N try: # Write to temp file - with open(temp_path, "w") as f: + with open(temp_path, "w", encoding="utf-8") as f: json.dump(metadata.model_dump(mode="json"), f, indent=2) os.fsync(f.fileno()) diff --git a/tests/integrations/test_stats_aggregation.py b/tests/integrations/test_stats_aggregation.py index 4eb55985..235fb948 100644 --- a/tests/integrations/test_stats_aggregation.py +++ b/tests/integrations/test_stats_aggregation.py @@ -184,3 +184,64 @@ def test_aggregate_stats_model_not_found(self) -> None: assert day == 0 assert month == 0 assert total == 0 + + def test_aggregate_stats_canonical_name_with_org_prefix(self) -> None: + """Test that canonical names with org prefix correctly match API stats. + + This is a critical test case because model reference entries often have + org prefixes (e.g., "NeverSleep/Lumimaid-v0.2") but API stats may have + different prefixing patterns. + """ + stats = HordeModelStatsResponse( + day={ + "koboldcpp/Lumimaid-v0.2-8B": 4080, + "koboldcpp/Lumimaid-v0.2-8B-Q8_0": 1500, + "aphrodite/NeverSleep/Lumimaid-v0.2-8B": 2000, + }, + month={ + "koboldcpp/Lumimaid-v0.2-8B": 40000, + "koboldcpp/Lumimaid-v0.2-8B-Q8_0": 15000, + "aphrodite/NeverSleep/Lumimaid-v0.2-8B": 20000, + }, + total={}, + ) + + indexed = IndexedHordeModelStats(stats) + + # Query with canonical name that HAS org prefix (like in model reference) + canonical_with_org = "NeverSleep/Lumimaid-v0.2" + day, month, _total = indexed.get_aggregated_stats(canonical_with_org) + + # Should aggregate all variants even though canonical has org prefix + assert day == 4080 + 1500 + 2000 + assert month == 40000 + 15000 + 20000 + + def test_get_stats_with_variations_canonical_with_org_prefix(self) -> None: + """Test variations breakdown with canonical name that has org prefix. + + When querying for a specific model like 'NeverSleep/Lumimaid-v0.2-8B', + get_stats_with_variations should find: + - The aphrodite variant: aphrodite/NeverSleep/Lumimaid-v0.2-8B + - The koboldcpp variant: koboldcpp/Lumimaid-v0.2-8B + - Quantization variants: koboldcpp/Lumimaid-v0.2-8B-Q8_0 + """ + stats = HordeModelStatsResponse( + day={ + "koboldcpp/Lumimaid-v0.2-8B": 300, + "koboldcpp/Lumimaid-v0.2-8B-Q8_0": 400, + "aphrodite/NeverSleep/Lumimaid-v0.2-8B": 200, + }, + month={}, + total={}, + ) + + indexed = IndexedHordeModelStats(stats) + + # Query with canonical name that HAS org prefix (like in model reference) + (day_total, _m, _t), variations = indexed.get_stats_with_variations("NeverSleep/Lumimaid-v0.2-8B") + + assert day_total == 300 + 400 + 200 + assert "aphrodite" in variations + assert "koboldcpp" in variations + assert variations["aphrodite"][0] == 200 + assert variations["koboldcpp"][0] == 300 + 400 diff --git a/tests/test_metadata.py b/tests/test_metadata.py index 559f7513..b3c88591 100644 --- a/tests/test_metadata.py +++ b/tests/test_metadata.py @@ -547,7 +547,7 @@ def test_v2_metadata_file_creation(self, primary_base: Path) -> None: assert expected_path.is_file(), "V2 metadata path should be a file" # Verify file contains valid JSON - with open(expected_path) as f: + with open(expected_path, encoding="utf-8") as f: file_content = f.read() assert file_content, "Metadata file should not be empty" file_data = json.loads(file_content) @@ -590,7 +590,7 @@ def test_legacy_metadata_file_creation(self, primary_base: Path) -> None: assert expected_path != v2_path, "Legacy and v2 metadata paths should be different" # Verify file contents - with open(expected_path) as f: + with open(expected_path, encoding="utf-8") as f: file_data = json.loads(f.read()) assert file_data["category"] == category.value @@ -715,7 +715,7 @@ def test_metadata_file_json_formatting(self, primary_base: Path) -> None: ) metadata_path = primary_base / "meta" / "v2" / f"{category.value}_metadata.json" - with open(metadata_path) as f: + with open(metadata_path, encoding="utf-8") as f: content = f.read() # Verify JSON is indented (not minified) @@ -754,13 +754,13 @@ def test_metadata_cache_invalidation_on_file_change( # Modify file directly (simulating external change) metadata_path = primary_base / "meta" / "v2" / f"{category.value}_metadata.json" - with open(metadata_path) as f: + with open(metadata_path, encoding="utf-8") as f: file_data = json.loads(f.read()) file_data["total_creates"] = initial_creates + 10 file_data["last_updated"] = int(time.time()) - with open(metadata_path, "w") as f: + with open(metadata_path, "w", encoding="utf-8") as f: json.dump(file_data, f, indent=2) # Touch file to update mtime @@ -826,7 +826,7 @@ def test_metadata_file_missing_fields_handled(self, primary_base: Path) -> None: "backend_type": "FileSystemBackend", } - with open(metadata_path, "w") as f: + with open(metadata_path, "w", encoding="utf-8") as f: json.dump(minimal_data, f) # Clear cache