From 4260b2406bbdfc70a0c45a0525a90a3b8b289908 Mon Sep 17 00:00:00 2001 From: nb950 Date: Thu, 10 Apr 2025 12:08:24 -0400 Subject: [PATCH 1/7] add input directory checking for sample audio plugin --- .../audio_transcription/main.py | 35 +++++++++++++++++-- src/rb-lib/rb/lib/utils.py | 4 +-- 2 files changed, 35 insertions(+), 4 deletions(-) diff --git a/src/audio-transcription/audio_transcription/main.py b/src/audio-transcription/audio_transcription/main.py index d367092c..57a558b2 100644 --- a/src/audio-transcription/audio_transcription/main.py +++ b/src/audio-transcription/audio_transcription/main.py @@ -3,6 +3,8 @@ import logging from typing import TypedDict +from pathlib import Path +from pydantic import DirectoryPath, field_validator import typer from rb.api.models import ( BatchTextResponse, @@ -19,7 +21,7 @@ logger = logging.getLogger(__name__) logging.basicConfig( - level=logging.INFO, + level=logging.DEBUG, format="%(asctime)s [%(levelname)s] %(name)s: %(message)s", ) @@ -35,9 +37,38 @@ model = AudioTranscriptionModel() +AUDIO_EXTENSIONS = {".mp3", ".wav", ".flac", ".aac"} + + +class AudioDirectory(DirectoryInput): + path: DirectoryPath + + @field_validator("path") + @classmethod + def validate_directory(cls, v): + path = Path(v) + + if not path.exists(): + raise ValueError(f"validate Directory: '{v}' does not exist.") + if not path.is_dir(): + raise ValueError(f"validate Directory: Path '{v}' is not a directory.") + + audio_files = list(path.glob("*")) + if not audio_files: + raise ValueError("validate Directory: Directory is empty.") + + no_audio = [f.name for f in audio_files if f.suffix.lower() in AUDIO_EXTENSIONS] + + if len(no_audio) < 1: + raise ValueError( + f"validate Directory: No-audio files found in directory: {no_audio}" + ) + + return v + class AudioInput(TypedDict): - input_dir: DirectoryInput + input_dir: AudioDirectory def task_schema() -> TaskSchema: diff --git a/src/rb-lib/rb/lib/utils.py b/src/rb-lib/rb/lib/utils.py index bd94c4ee..664aecee 100644 --- a/src/rb-lib/rb/lib/utils.py +++ b/src/rb-lib/rb/lib/utils.py @@ -72,8 +72,8 @@ def ensure_ml_func_hinting_and_task_schemas_are_valid( input_type_hint is FileInput ), f"For key {key}, the input type is NewFileInputType, but the TypeDict hint is {input_type_hint}. Change to FileInput." case InputType.DIRECTORY: - assert ( - input_type_hint is DirectoryInput + assert issubclass( + input_type_hint, DirectoryInput ), f"For key {key}, the input type is InputType.DIRECTORY, but the TypeDict hint is {input_type_hint}. Change to DirectoryInput." case InputType.TEXT: assert ( From 72ddb2cd2c636bcba8360bc676cc50de6c02de67 Mon Sep 17 00:00:00 2001 From: nb950 Date: Thu, 10 Apr 2025 15:43:44 -0400 Subject: [PATCH 2/7] add common error handler --- .../audio_transcription/main.py | 10 ++++++---- src/rb-api/rb/api/main.py | 19 ++++++++++++++++++- src/rb-api/rb/api/routes/cli.py | 7 ++++--- 3 files changed, 28 insertions(+), 8 deletions(-) diff --git a/src/audio-transcription/audio_transcription/main.py b/src/audio-transcription/audio_transcription/main.py index 57a558b2..81ca7314 100644 --- a/src/audio-transcription/audio_transcription/main.py +++ b/src/audio-transcription/audio_transcription/main.py @@ -49,19 +49,21 @@ def validate_directory(cls, v): path = Path(v) if not path.exists(): - raise ValueError(f"validate Directory: '{v}' does not exist.") + raise ValueError(f"validate audio directory: '{v}' does not exist.") if not path.is_dir(): - raise ValueError(f"validate Directory: Path '{v}' is not a directory.") + raise ValueError( + f"validate audio directory: Path '{v}' is not a directory." + ) audio_files = list(path.glob("*")) if not audio_files: - raise ValueError("validate Directory: Directory is empty.") + raise ValueError(f"validate audio directory: Directory {v} is empty.") no_audio = [f.name for f in audio_files if f.suffix.lower() in AUDIO_EXTENSIONS] if len(no_audio) < 1: raise ValueError( - f"validate Directory: No-audio files found in directory: {no_audio}" + f"validate audio directory: No-audio files found in directory: {v}" ) return v diff --git a/src/rb-api/rb/api/main.py b/src/rb-api/rb/api/main.py index 80bec1a9..28cc0e09 100644 --- a/src/rb-api/rb/api/main.py +++ b/src/rb-api/rb/api/main.py @@ -1,8 +1,10 @@ import multiprocessing import os import sys -from fastapi import FastAPI +from fastapi import FastAPI, HTTPException, Request, status from fastapi.staticfiles import StaticFiles +from fastapi.exceptions import RequestValidationError +from fastapi.responses import JSONResponse from rb.api import routes app = FastAPI( @@ -21,6 +23,21 @@ name="static", ) + +@app.exception_handler(RequestValidationError) +async def validation_exception_handler(request, exc: RequestValidationError): + """response handler for all plugin input validation errors""" + error_msg = str(exc) + for e in exc.errors(): + error_msg = e.get("msg") + print("debug e", e.get("msg")) + + raise HTTPException( # pylint: disable=raise-missing-from + status_code=422, + detail={"error": f"{error_msg}"}, + ) + + app.include_router(routes.probes_router, prefix="/probes") app.include_router(routes.cli_to_api_router) app.include_router(routes.ui_router) diff --git a/src/rb-api/rb/api/routes/cli.py b/src/rb-api/rb/api/routes/cli.py index 9f547453..ce045c12 100644 --- a/src/rb-api/rb/api/routes/cli.py +++ b/src/rb-api/rb/api/routes/cli.py @@ -68,10 +68,11 @@ def static_endpoint(callback: Callable, *args, **kwargs) -> ResponseBody: # this has an issue of nor sending back details to desktop ui the api caller ? raise ValueError(f"Invalid return type from Typer command: {type(result)}") except Exception as e: - logger.error("Error executing CLI command: %s", e) + # response handler for all plugin runtime errors + logger.error("Error: %s", e) raise HTTPException( # pylint: disable=raise-missing-from - status_code=400, - detail={"error": f"Typer CLI aborted {e}", "stdout": stdout[-10:]}, + status_code=500, + detail={"error": f"{e}"}, ) From 19716068030698febbccc0173fcbb3d6beea475a Mon Sep 17 00:00:00 2001 From: nb950 Date: Thu, 10 Apr 2025 15:48:01 -0400 Subject: [PATCH 3/7] fix lint --- src/rb-api/rb/api/main.py | 4 +--- src/rb-api/rb/api/routes/cli.py | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/src/rb-api/rb/api/main.py b/src/rb-api/rb/api/main.py index 28cc0e09..42d3247d 100644 --- a/src/rb-api/rb/api/main.py +++ b/src/rb-api/rb/api/main.py @@ -1,10 +1,8 @@ import multiprocessing import os import sys -from fastapi import FastAPI, HTTPException, Request, status -from fastapi.staticfiles import StaticFiles +from fastapi import FastAPI, HTTPException from fastapi.exceptions import RequestValidationError -from fastapi.responses import JSONResponse from rb.api import routes app = FastAPI( diff --git a/src/rb-api/rb/api/routes/cli.py b/src/rb-api/rb/api/routes/cli.py index ce045c12..e39468a1 100644 --- a/src/rb-api/rb/api/routes/cli.py +++ b/src/rb-api/rb/api/routes/cli.py @@ -69,7 +69,7 @@ def static_endpoint(callback: Callable, *args, **kwargs) -> ResponseBody: raise ValueError(f"Invalid return type from Typer command: {type(result)}") except Exception as e: # response handler for all plugin runtime errors - logger.error("Error: %s", e) + logger.error("Error: %s %s", e, stdout) raise HTTPException( # pylint: disable=raise-missing-from status_code=500, detail={"error": f"{e}"}, From 98b3e0069913d31efdf2db3fb416e187db2000a9 Mon Sep 17 00:00:00 2001 From: nb950 Date: Thu, 10 Apr 2025 15:50:23 -0400 Subject: [PATCH 4/7] fix import --- src/rb-api/rb/api/main.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/rb-api/rb/api/main.py b/src/rb-api/rb/api/main.py index 42d3247d..ae231e01 100644 --- a/src/rb-api/rb/api/main.py +++ b/src/rb-api/rb/api/main.py @@ -3,6 +3,7 @@ import sys from fastapi import FastAPI, HTTPException from fastapi.exceptions import RequestValidationError +from fastapi.staticfiles import StaticFiles from rb.api import routes app = FastAPI( From a01f381d8602b5345c77d92c1ef6e212b883c682 Mon Sep 17 00:00:00 2001 From: nb950 Date: Thu, 10 Apr 2025 16:02:53 -0400 Subject: [PATCH 5/7] use http code --- src/rb-api/rb/api/main.py | 7 +++---- src/rb-api/rb/api/routes/cli.py | 4 ++-- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/src/rb-api/rb/api/main.py b/src/rb-api/rb/api/main.py index ae231e01..9533cdce 100644 --- a/src/rb-api/rb/api/main.py +++ b/src/rb-api/rb/api/main.py @@ -1,7 +1,7 @@ import multiprocessing import os import sys -from fastapi import FastAPI, HTTPException +from fastapi import FastAPI, HTTPException, Request, status from fastapi.exceptions import RequestValidationError from fastapi.staticfiles import StaticFiles from rb.api import routes @@ -24,15 +24,14 @@ @app.exception_handler(RequestValidationError) -async def validation_exception_handler(request, exc: RequestValidationError): +async def validation_exception_handler(request: Request, exc: RequestValidationError): # fmt: skip """response handler for all plugin input validation errors""" error_msg = str(exc) for e in exc.errors(): error_msg = e.get("msg") - print("debug e", e.get("msg")) raise HTTPException( # pylint: disable=raise-missing-from - status_code=422, + status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, detail={"error": f"{error_msg}"}, ) diff --git a/src/rb-api/rb/api/routes/cli.py b/src/rb-api/rb/api/routes/cli.py index e39468a1..a2024c33 100644 --- a/src/rb-api/rb/api/routes/cli.py +++ b/src/rb-api/rb/api/routes/cli.py @@ -5,7 +5,7 @@ from typing import Callable, Generator, Optional import typer -from fastapi import APIRouter, HTTPException, Response +from fastapi import APIRouter, HTTPException, Response, status from fastapi.responses import StreamingResponse from makefun import with_signature from pydantic import BaseModel @@ -71,7 +71,7 @@ def static_endpoint(callback: Callable, *args, **kwargs) -> ResponseBody: # response handler for all plugin runtime errors logger.error("Error: %s %s", e, stdout) raise HTTPException( # pylint: disable=raise-missing-from - status_code=500, + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail={"error": f"{e}"}, ) From c4ca7c4d5513b213f66a1a4431f9bc9f0a493b4c Mon Sep 17 00:00:00 2001 From: nb950 Date: Thu, 1 May 2025 13:36:22 -0400 Subject: [PATCH 6/7] refactor based on PR comment --- .../audio_transcription/main.py | 35 ++--------- src/rb-api/rb/api/models.py | 48 ++++++++++++++- src/rb-lib/rb/lib/abstract_parser.py | 60 ------------------- 3 files changed, 53 insertions(+), 90 deletions(-) delete mode 100644 src/rb-lib/rb/lib/abstract_parser.py diff --git a/src/audio-transcription/audio_transcription/main.py b/src/audio-transcription/audio_transcription/main.py index 81ca7314..570e5cb4 100644 --- a/src/audio-transcription/audio_transcription/main.py +++ b/src/audio-transcription/audio_transcription/main.py @@ -1,14 +1,14 @@ """audio transcribe plugin""" import logging -from typing import TypedDict +from typing import List, TypedDict -from pathlib import Path -from pydantic import DirectoryPath, field_validator +from pydantic import DirectoryPath import typer from rb.api.models import ( BatchTextResponse, DirectoryInput, + FileFilterDirectory, InputSchema, InputType, ResponseBody, @@ -40,33 +40,10 @@ AUDIO_EXTENSIONS = {".mp3", ".wav", ".flac", ".aac"} -class AudioDirectory(DirectoryInput): - path: DirectoryPath - - @field_validator("path") - @classmethod - def validate_directory(cls, v): - path = Path(v) - - if not path.exists(): - raise ValueError(f"validate audio directory: '{v}' does not exist.") - if not path.is_dir(): - raise ValueError( - f"validate audio directory: Path '{v}' is not a directory." - ) - - audio_files = list(path.glob("*")) - if not audio_files: - raise ValueError(f"validate audio directory: Directory {v} is empty.") +class AudioDirectory(FileFilterDirectory): - no_audio = [f.name for f in audio_files if f.suffix.lower() in AUDIO_EXTENSIONS] - - if len(no_audio) < 1: - raise ValueError( - f"validate audio directory: No-audio files found in directory: {v}" - ) - - return v + path: DirectoryPath + file_extensions: List[str] = AUDIO_EXTENSIONS class AudioInput(TypedDict): diff --git a/src/rb-api/rb/api/models.py b/src/rb-api/rb/api/models.py index 90406093..36e318f5 100644 --- a/src/rb-api/rb/api/models.py +++ b/src/rb-api/rb/api/models.py @@ -5,9 +5,19 @@ from __future__ import annotations from enum import Enum +from pathlib import Path from typing import Annotated, Any, Dict, List, Literal, Optional, Union -from pydantic import BaseModel, ConfigDict, DirectoryPath, Field, FilePath, RootModel +from pydantic import ( + BaseModel, + ConfigDict, + DirectoryPath, + Field, + FilePath, + RootModel, + field_validator, + model_validator, +) API_APPMETDATA = "app_metadata" API_ROUTES = "routes" @@ -51,6 +61,42 @@ class DirectoryInput(BaseModel): path: DirectoryPath +class FileFilterDirectory(DirectoryInput): + """Find files with file_extensions in path directory.""" + + model_config = ConfigDict( + populate_by_name=True, + ) + path: str + file_extensions: List[str] + + @field_validator("path") + @classmethod + def validate_directory(cls, v): + path = Path(v) + + if not path.exists(): + raise ValueError(f"validate directory: '{v}' does not exist.") + if not path.is_dir(): + raise ValueError(f"validate directory: Path '{v}' is not a directory.") + return v + + @model_validator(mode="after") + def file_filter(self) -> "FileFilterDirectory": + path_obj = Path(self.path) + files = list(path_obj.glob("*")) + if not files: + raise ValueError(f"validate directory: Directory {path_obj} is empty.") + number_of_matched_files = [ + f.name for f in files if f.suffix.lower() in self.file_extensions + ] + if len(number_of_matched_files) < 1: + raise ValueError( + f"validate directory: No file extensions matching {self.file_extensions} found in directory: {path_obj}" + ) + return self + + class TextInput(BaseModel): model_config = ConfigDict( populate_by_name=True, diff --git a/src/rb-lib/rb/lib/abstract_parser.py b/src/rb-lib/rb/lib/abstract_parser.py deleted file mode 100644 index 83f70b66..00000000 --- a/src/rb-lib/rb/lib/abstract_parser.py +++ /dev/null @@ -1,60 +0,0 @@ -import logging -from abc import ABC, abstractmethod -from typing import Any, Dict, List - -logger = logging.getLogger(__name__) - - -class AbstractParser(ABC): - """ - Abstract base class for all parsers. - Each parser must define: - - Input validation - - CLI parsing - - API routes (automatically registered) - """ - - def __init__( - self, - name: str, - version: str = "1.0.0", - author: str = "Unknown", - description: str = "", - ): - self.name = name - self.version = version - self.author = author - self.description = description - - @abstractmethod - def cli_parser(self, path: str): - """Parse CLI input.""" - pass - - @property - @abstractmethod - def routes(self) -> List[Dict[str, Any]]: - """ - Define API routes for this parser. - Every parser **must define** its own routes. - """ - pass - - @property - @abstractmethod - def task_schema(self) -> Dict[str, Any]: - """ - Return a schema dict (used by RescueBox desktop UI). - This replaces the `@app.command('task_schema')` handler in the plugin. - """ - pass - - @property - def metadata(self) -> Dict[str, Any]: - """Returns the metadata of the parser (configurable).""" - return { - "name": self.name, - "version": self.version, - "author": self.author, - "description": self.description, - } From 3447ee41bccff72e3f07407a2c04cacbeb974d7e Mon Sep 17 00:00:00 2001 From: nb950 Date: Fri, 2 May 2025 11:30:19 -0400 Subject: [PATCH 7/7] add negative test --- src/audio-transcription/tests/negative/foo.txt | 0 src/audio-transcription/tests/test_main.py | 18 ++++++++++++++++++ 2 files changed, 18 insertions(+) create mode 100644 src/audio-transcription/tests/negative/foo.txt diff --git a/src/audio-transcription/tests/negative/foo.txt b/src/audio-transcription/tests/negative/foo.txt new file mode 100644 index 00000000..e69de29b diff --git a/src/audio-transcription/tests/test_main.py b/src/audio-transcription/tests/test_main.py index 13fb2637..6cd6d87f 100644 --- a/src/audio-transcription/tests/test_main.py +++ b/src/audio-transcription/tests/test_main.py @@ -1,3 +1,4 @@ +import json from pathlib import Path from rb.api.models import ResponseBody @@ -55,3 +56,20 @@ def test_api_transcribe_command(self): assert response.status_code == 200 body = ResponseBody(**response.json()) assert body.root.texts and "Twinkle" in body.root.texts[0].value + + def test_negative_api_transcribe_command(self): + """pass in valid directory but no audio files , expect 422 validation error""" + transcribe_api = f"/{APP_NAME}/transcribe" + full_path = Path.cwd() / "src" / "audio-transcription" / "tests" / "negative" + input_json = { + "inputs": { + "input_dir": { + "path": str(full_path), + } + } + } + response = self.client.post(transcribe_api, json=input_json) + assert response.status_code == 422 + resp = json.loads(json.dumps(response.json())) + print(f"Response: {resp["detail"]["error"]}") + assert resp and "No file extensions matching" in resp["detail"]["error"]