diff --git a/libs/admin-api-lib/src/admin_api_lib/api_endpoints/file_uploader.py b/libs/admin-api-lib/src/admin_api_lib/api_endpoints/file_uploader.py index 3ab7464a..30a497c5 100644 --- a/libs/admin-api-lib/src/admin_api_lib/api_endpoints/file_uploader.py +++ b/libs/admin-api-lib/src/admin_api_lib/api_endpoints/file_uploader.py @@ -8,6 +8,7 @@ class FileUploader(UploaderBase): + """File uploader implementation.""" @abstractmethod async def upload_file( @@ -16,7 +17,7 @@ async def upload_file( file: UploadFile, ) -> None: """ - Uploads a source file for content extraction. + Upload a source file for content extraction. Parameters ---------- diff --git a/libs/admin-api-lib/src/admin_api_lib/api_endpoints/source_uploader.py b/libs/admin-api-lib/src/admin_api_lib/api_endpoints/source_uploader.py index 5a1c50ae..68365c89 100644 --- a/libs/admin-api-lib/src/admin_api_lib/api_endpoints/source_uploader.py +++ b/libs/admin-api-lib/src/admin_api_lib/api_endpoints/source_uploader.py @@ -21,7 +21,7 @@ async def upload_source( timeout: Optional[float], ) -> None: """ - Uploads the parameters for source content extraction. + Upload the parameters for source content extraction. Parameters ---------- diff --git a/libs/admin-api-lib/src/admin_api_lib/api_endpoints/uploader_base.py b/libs/admin-api-lib/src/admin_api_lib/api_endpoints/uploader_base.py index a344dcc2..b382d7ad 100644 --- a/libs/admin-api-lib/src/admin_api_lib/api_endpoints/uploader_base.py +++ b/libs/admin-api-lib/src/admin_api_lib/api_endpoints/uploader_base.py @@ -7,9 +7,7 @@ class UploaderBase: """Base class for uploader API endpoints.""" def __init__(self): - """ - Initialize the UploaderBase. - """ + """Initialize the UploaderBase.""" self._background_threads = [] def _prune_background_threads(self) -> list[Thread]: diff --git a/libs/admin-api-lib/src/admin_api_lib/apis/admin_api.py b/libs/admin-api-lib/src/admin_api_lib/apis/admin_api.py index c348b5de..bc705761 100644 --- a/libs/admin-api-lib/src/admin_api_lib/apis/admin_api.py +++ b/libs/admin-api-lib/src/admin_api_lib/apis/admin_api.py @@ -149,7 +149,7 @@ async def upload_file( request: Request, ) -> None: """ - Uploads user selected sources. + Upload user selected sources. Parameters ---------- @@ -181,7 +181,7 @@ async def upload_source( key_value_pair: List[KeyValuePair] = Body(None, description="The key-value pairs for the source"), ) -> None: """ - Uploads user selected sources. + Upload user selected sources. Parameters ---------- diff --git a/libs/admin-api-lib/src/admin_api_lib/impl/api_endpoints/default_document_reference_retriever.py b/libs/admin-api-lib/src/admin_api_lib/impl/api_endpoints/default_document_reference_retriever.py index 20a98682..d327bf84 100644 --- a/libs/admin-api-lib/src/admin_api_lib/impl/api_endpoints/default_document_reference_retriever.py +++ b/libs/admin-api-lib/src/admin_api_lib/impl/api_endpoints/default_document_reference_retriever.py @@ -2,7 +2,6 @@ import io import logging -import traceback from fastapi import HTTPException, Response, status @@ -54,10 +53,8 @@ async def adocument_reference_id_get(self, identification: str) -> Response: self._file_service.download_file(identification, document_buffer) logger.debug("DONE retrieving document with id: %s", identification) document_data = document_buffer.getvalue() - except Exception as e: - logger.error( - "Error retrieving document with id: %s. Error: %s %s", identification, e, traceback.format_exc() - ) + except Exception: + logger.exception("Error retrieving document with id: %s", identification) raise ValueError(f"Document with id '{identification}' not found.") finally: document_buffer.close() diff --git a/libs/admin-api-lib/src/admin_api_lib/impl/api_endpoints/default_file_uploader.py b/libs/admin-api-lib/src/admin_api_lib/impl/api_endpoints/default_file_uploader.py index 668cacd5..c93f6826 100644 --- a/libs/admin-api-lib/src/admin_api_lib/impl/api_endpoints/default_file_uploader.py +++ b/libs/admin-api-lib/src/admin_api_lib/impl/api_endpoints/default_file_uploader.py @@ -1,6 +1,7 @@ +"""Module for the default file uploader implementation.""" + import logging from pathlib import Path -import traceback import urllib import tempfile import asyncio @@ -78,7 +79,7 @@ async def upload_file( file: UploadFile, ) -> None: """ - Uploads a source file for content extraction. + Upload a source file for content extraction. Parameters ---------- @@ -109,7 +110,7 @@ async def upload_file( raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(e)) except Exception as e: self._key_value_store.upsert(source_name, Status.ERROR) - logger.error("Error while uploading %s = %s", source_name, str(e)) + logger.exception("Error while uploading %s", source_name) raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e)) def _log_task_exception(self, task: asyncio.Task) -> None: @@ -124,19 +125,16 @@ def _log_task_exception(self, task: asyncio.Task) -> None: if task.done() and not task.cancelled(): try: task.result() # This will raise the exception if one occurred - except Exception as e: - logger.error("Background task failed with exception: %s", str(e)) - logger.debug("Background task exception traceback: %s", traceback.format_exc()) + except Exception: + logger.exception("Background task failed with exception") def _prune_background_tasks(self) -> None: - """ - Remove completed background tasks from the list. - """ + """Remove completed background tasks from the list.""" self._background_tasks = [task for task in self._background_tasks if not task.done()] def _check_if_already_in_processing(self, source_name: str) -> None: """ - Checks if the source is already in processing state. + Check if the source is already in processing state. Parameters ---------- @@ -196,9 +194,9 @@ async def _handle_source_upload( await asyncio.to_thread(self._rag_api.upload_information_piece, rag_information_pieces) self._key_value_store.upsert(source_name, Status.READY) logger.info("Source uploaded successfully: %s", source_name) - except Exception as e: + except Exception: self._key_value_store.upsert(source_name, Status.ERROR) - logger.error("Error while uploading %s = %s", source_name, str(e)) + logger.exception("Error while uploading %s", source_name) def _add_file_url(self, file_name: str, base_url: str, chunked_documents: list[Document]): document_url = f"{base_url.rstrip('/')}/document_reference/{urllib.parse.quote_plus(file_name)}" @@ -229,6 +227,6 @@ async def _asave_new_document( self._file_service.upload_file(Path(temp_file_path), filename) return filename - except Exception as e: - logger.error("Error during document saving: %s %s", e, traceback.format_exc()) + except Exception: + logger.exception("Error during document saving") self._key_value_store.upsert(source_name, Status.ERROR) diff --git a/libs/admin-api-lib/src/admin_api_lib/impl/api_endpoints/default_source_uploader.py b/libs/admin-api-lib/src/admin_api_lib/impl/api_endpoints/default_source_uploader.py index f0fe7932..ea8e3e89 100644 --- a/libs/admin-api-lib/src/admin_api_lib/impl/api_endpoints/default_source_uploader.py +++ b/libs/admin-api-lib/src/admin_api_lib/impl/api_endpoints/default_source_uploader.py @@ -1,3 +1,5 @@ +"""Module for the default source uploader implementation.""" + import logging import asyncio from threading import Thread @@ -28,6 +30,7 @@ class DefaultSourceUploader(SourceUploader): + """Default implementation of the SourceUploader.""" def __init__( self, @@ -78,7 +81,7 @@ async def upload_source( kwargs: list[KeyValuePair], ) -> None: """ - Uploads the parameters for source content extraction. + Upload the parameters for source content extraction. Parameters ---------- @@ -95,7 +98,6 @@ async def upload_source( ------- None """ - self._prune_background_threads() source_name = f"{source_type}:{sanitize_document_name(name)}" @@ -111,12 +113,12 @@ async def upload_source( raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(e)) except Exception as e: self._key_value_store.upsert(source_name, Status.ERROR) - logger.error("Error while uploading %s = %s", source_name, str(e)) + logger.exception("Error while uploading %s", source_name) raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e)) def _check_if_already_in_processing(self, source_name: str) -> None: """ - Checks if the source is already in processing state. + Check if the source is already in processing state. Parameters ---------- @@ -197,6 +199,6 @@ async def _handle_source_upload( await asyncio.to_thread(self._rag_api.upload_information_piece, rag_information_pieces) self._key_value_store.upsert(source_name, Status.READY) logger.info("Source uploaded successfully: %s", source_name) - except Exception as e: + except Exception: self._key_value_store.upsert(source_name, Status.ERROR) - logger.error("Error while uploading %s = %s", source_name, str(e)) + logger.exception("Error while uploading %s", source_name) diff --git a/libs/admin-api-lib/src/admin_api_lib/impl/file_services/s3_service.py b/libs/admin-api-lib/src/admin_api_lib/impl/file_services/s3_service.py index 0f641252..ae6ed28d 100644 --- a/libs/admin-api-lib/src/admin_api_lib/impl/file_services/s3_service.py +++ b/libs/admin-api-lib/src/admin_api_lib/impl/file_services/s3_service.py @@ -1,7 +1,6 @@ """Class to handle I/O with S3 storage.""" import logging -import traceback from pathlib import Path from typing import BinaryIO @@ -125,7 +124,7 @@ def delete_file(self, file_name: str) -> None: try: file_name = f"/{file_name}" if not file_name.startswith("/") else file_name self._s3_client.delete_object(Bucket=self._s3_settings.bucket, Key=file_name) - logger.info(f"File {file_name} successfully deleted.") - except Exception as e: - logger.error("Error deleting file %s: %s %s" % (file_name, e, traceback.format_exc())) + logger.info("File %s successfully deleted.", file_name) + except Exception: + logger.exception("Error deleting file %s", file_name) raise diff --git a/libs/admin-api-lib/src/admin_api_lib/impl/summarizer/langchain_summarizer.py b/libs/admin-api-lib/src/admin_api_lib/impl/summarizer/langchain_summarizer.py index 1d5b5d09..f32f56a2 100644 --- a/libs/admin-api-lib/src/admin_api_lib/impl/summarizer/langchain_summarizer.py +++ b/libs/admin-api-lib/src/admin_api_lib/impl/summarizer/langchain_summarizer.py @@ -1,7 +1,6 @@ """Module for the LangchainSummarizer class.""" import logging -import traceback from typing import Optional from langchain.text_splitter import RecursiveCharacterTextSplitter @@ -66,7 +65,7 @@ async def ainvoke(self, query: SummarizerInput, config: Optional[RunnableConfig] assert query, "Query is empty: %s" % query # noqa S101 config = ensure_config(config) tries_remaining = config.get("configurable", {}).get("tries_remaining", 3) - logger.debug("Tries remaining %d" % tries_remaining) + logger.debug("Tries remaining %d", tries_remaining) if tries_remaining < 0: raise Exception("Summary creation failed.") @@ -81,8 +80,8 @@ async def ainvoke(self, query: SummarizerInput, config: Optional[RunnableConfig] # Extract content from AIMessage if it's not already a string content = result.content if hasattr(result, "content") else str(result) outputs.append(content) - except Exception as e: - logger.error("Error in summarizing langchain doc: %s %s", e, traceback.format_exc()) + except Exception: + logger.exception("Error in summarizing langchain doc") config["tries_remaining"] = tries_remaining - 1 result = await self._create_chain().ainvoke({"text": langchain_document.page_content}, config) # Extract content from AIMessage if it's not already a string @@ -93,8 +92,9 @@ async def ainvoke(self, query: SummarizerInput, config: Optional[RunnableConfig] return outputs[0] summary = " ".join(outputs) logger.debug( - "Reduced number of chars from %d to %d" - % (len("".join([x.page_content for x in langchain_documents])), len(summary)) + "Reduced number of chars from %d to %d", + len("".join([x.page_content for x in langchain_documents])), + len(summary), ) return await self.ainvoke(summary, config) diff --git a/libs/extractor-api-lib/pyproject.toml b/libs/extractor-api-lib/pyproject.toml index 37a3fb98..ca0fba48 100644 --- a/libs/extractor-api-lib/pyproject.toml +++ b/libs/extractor-api-lib/pyproject.toml @@ -32,8 +32,8 @@ per-file-ignores = """ ./src/extractor_api_lib/apis/extractor_api.py: B008,WOT001, ./src/extractor_api_lib/impl/extractor_api_impl.py: B008, ./src/extractor_api_lib/container.py: CCE002,CCE001, - ./src/extractor_api_lib/apis/extractor_api_base.py: WOT001, - ./tests/*: S101,E501, + ./src/extractor_api_lib/apis/extractor_api_base.py: WOT001,D105, + ./tests/*: S101,E501,D105,D100,D102, """ [tool.black] diff --git a/libs/extractor-api-lib/src/extractor_api_lib/api_endpoints/file_extractor.py b/libs/extractor-api-lib/src/extractor_api_lib/api_endpoints/file_extractor.py index 2c9a645b..3f650fbf 100644 --- a/libs/extractor-api-lib/src/extractor_api_lib/api_endpoints/file_extractor.py +++ b/libs/extractor-api-lib/src/extractor_api_lib/api_endpoints/file_extractor.py @@ -1,3 +1,5 @@ +"""Module for the FileExtractor class.""" + from abc import ABC, abstractmethod from extractor_api_lib.models.extraction_request import ExtractionRequest from extractor_api_lib.models.information_piece import InformationPiece diff --git a/libs/extractor-api-lib/src/extractor_api_lib/api_endpoints/source_extractor.py b/libs/extractor-api-lib/src/extractor_api_lib/api_endpoints/source_extractor.py index 4071322e..834802c3 100644 --- a/libs/extractor-api-lib/src/extractor_api_lib/api_endpoints/source_extractor.py +++ b/libs/extractor-api-lib/src/extractor_api_lib/api_endpoints/source_extractor.py @@ -1,3 +1,5 @@ +"""Module for the SourceExtractor class.""" + from abc import ABC, abstractmethod from extractor_api_lib.models.extraction_parameters import ExtractionParameters diff --git a/libs/extractor-api-lib/src/extractor_api_lib/extractors/information_extractor.py b/libs/extractor-api-lib/src/extractor_api_lib/extractors/information_extractor.py index 3a6ee684..d49c6d14 100644 --- a/libs/extractor-api-lib/src/extractor_api_lib/extractors/information_extractor.py +++ b/libs/extractor-api-lib/src/extractor_api_lib/extractors/information_extractor.py @@ -13,7 +13,14 @@ class InformationExtractor(ABC): @property @abstractmethod - def extractor_type(self) -> ExtractorTypes: ... + def extractor_type(self) -> ExtractorTypes: + """Return the type of the extractor. + + Returns + ------- + ExtractorTypes + The type of the extractor. + """ @abstractmethod async def aextract_content( diff --git a/libs/extractor-api-lib/src/extractor_api_lib/impl/api_endpoints/general_file_extractor.py b/libs/extractor-api-lib/src/extractor_api_lib/impl/api_endpoints/general_file_extractor.py index fee7db2c..93de5bf5 100644 --- a/libs/extractor-api-lib/src/extractor_api_lib/impl/api_endpoints/general_file_extractor.py +++ b/libs/extractor-api-lib/src/extractor_api_lib/impl/api_endpoints/general_file_extractor.py @@ -3,7 +3,6 @@ import logging from pathlib import Path import tempfile -import traceback from extractor_api_lib.api_endpoints.file_extractor import FileExtractor @@ -76,5 +75,5 @@ async def aextract_information(self, extraction_request: ExtractionRequest) -> l ) return [self._mapper.map_internal_to_external(x) for x in results if x.page_content is not None] except Exception as e: - logger.error("Error during document parsing: %s %s", e, traceback.format_exc()) + logger.exception("Error during document parsing") raise e diff --git a/libs/extractor-api-lib/src/extractor_api_lib/impl/extractors/confluence_extractor.py b/libs/extractor-api-lib/src/extractor_api_lib/impl/extractors/confluence_extractor.py index 0c025062..de10af7f 100644 --- a/libs/extractor-api-lib/src/extractor_api_lib/impl/extractors/confluence_extractor.py +++ b/libs/extractor-api-lib/src/extractor_api_lib/impl/extractors/confluence_extractor.py @@ -34,6 +34,13 @@ def __init__( @property def extractor_type(self) -> ExtractorTypes: + """Return the type of the extractor. + + Returns + ------- + ExtractorTypes + The type of the extractor. + """ return ExtractorTypes.CONFLUENCE async def aextract_content( diff --git a/libs/extractor-api-lib/src/extractor_api_lib/impl/extractors/file_extractors/pdf_extractor.py b/libs/extractor-api-lib/src/extractor_api_lib/impl/extractors/file_extractors/pdf_extractor.py index 32340772..a23b45d9 100644 --- a/libs/extractor-api-lib/src/extractor_api_lib/impl/extractors/file_extractors/pdf_extractor.py +++ b/libs/extractor-api-lib/src/extractor_api_lib/impl/extractors/file_extractors/pdf_extractor.py @@ -154,7 +154,7 @@ async def aextract_content(self, file_path: Path, name: str) -> list[InternalInf ) pdf_elements += new_pdf_elements - logger.info(f"Extraction completed. Found {len(pdf_elements)} information pieces.") + logger.info("Extraction completed. Found %d information pieces.", len(pdf_elements)) return pdf_elements def _is_text_based(self, page: Page) -> bool: @@ -200,8 +200,8 @@ def _extract_tables_from_text_page( table_df = pd.DataFrame(table_data) try: converted_table = self._dataframe_converter.convert(table_df) - except TypeError as e: - logger.error(f"Error while converting table to string: {e}") + except TypeError: + logger.exception("Error while converting table to string") continue if not converted_table.strip(): continue @@ -215,8 +215,8 @@ def _extract_tables_from_text_page( information_id=hash_datetime(), ) ) - except Exception as e: - logger.warning(f"Failed to find tables on page {page_index}: {e}") + except Exception: + logger.exception("Failed to find tables on page %d", page_index) return table_elements @@ -321,19 +321,19 @@ def _extract_tables_from_scanned_page( }, ) ) - except Exception as e: - logger.warning(f"Failed to convert Camelot table {i + 1}: {e}") + except Exception: + logger.exception("Failed to convert Camelot table %d", i + 1) - except Exception as e: - logger.debug(f"Camelot table extraction failed for page {page_index}: {e}") + except Exception: + logger.exception("Camelot table extraction failed for page %d", page_index) return table_elements def _extract_text_from_text_page(self, page: Page) -> str: try: return page.extract_text() or "" - except Exception as e: - logger.warning(f"Failed to extract text with pdfplumber: {e}") + except Exception: + logger.exception("Failed to extract text with pdfplumber") return "" def _extract_content_from_page( diff --git a/libs/extractor-api-lib/src/extractor_api_lib/impl/extractors/sitemap_extractor.py b/libs/extractor-api-lib/src/extractor_api_lib/impl/extractors/sitemap_extractor.py index 18010cc7..77db4cf3 100644 --- a/libs/extractor-api-lib/src/extractor_api_lib/impl/extractors/sitemap_extractor.py +++ b/libs/extractor-api-lib/src/extractor_api_lib/impl/extractors/sitemap_extractor.py @@ -42,6 +42,13 @@ def __init__( @property def extractor_type(self) -> ExtractorTypes: + """Return the type of the extractor. + + Returns + ------- + ExtractorTypes + The type of the extractor. + """ return ExtractorTypes.SITEMAP @property diff --git a/libs/extractor-api-lib/src/extractor_api_lib/impl/file_services/s3_service.py b/libs/extractor-api-lib/src/extractor_api_lib/impl/file_services/s3_service.py index af796b5c..2751430c 100644 --- a/libs/extractor-api-lib/src/extractor_api_lib/impl/file_services/s3_service.py +++ b/libs/extractor-api-lib/src/extractor_api_lib/impl/file_services/s3_service.py @@ -124,6 +124,6 @@ def delete_file(self, file_name: str) -> None: file_name = f"/{file_name}" if not file_name.startswith("/") else file_name self._s3_client.delete_object(Bucket=self._s3_settings.bucket, Key=file_name) logger.info("File %s successfully deleted.", file_name) - except Exception as e: - logger.error("Error deleting file %s: %s", file_name, e) + except Exception: + logger.exception("Error deleting file %s", file_name) raise diff --git a/libs/extractor-api-lib/src/extractor_api_lib/impl/types/extractor_types.py b/libs/extractor-api-lib/src/extractor_api_lib/impl/types/extractor_types.py index c4efaa48..56814445 100644 --- a/libs/extractor-api-lib/src/extractor_api_lib/impl/types/extractor_types.py +++ b/libs/extractor-api-lib/src/extractor_api_lib/impl/types/extractor_types.py @@ -1,3 +1,5 @@ +"""Module for the ExtractorTypes enumeration.""" + from enum import StrEnum diff --git a/libs/extractor-api-lib/src/extractor_api_lib/impl/utils/sitemap_extractor_utils.py b/libs/extractor-api-lib/src/extractor_api_lib/impl/utils/sitemap_extractor_utils.py index 1b53c4b8..8a3fd9af 100644 --- a/libs/extractor-api-lib/src/extractor_api_lib/impl/utils/sitemap_extractor_utils.py +++ b/libs/extractor-api-lib/src/extractor_api_lib/impl/utils/sitemap_extractor_utils.py @@ -1,9 +1,12 @@ +"""Module for the SitemapExtractor utility functions.""" + from bs4 import BeautifulSoup from typing import Any, Union def custom_sitemap_parser_function(content: Union[str, BeautifulSoup]) -> str: - """ + """Parse sitemap content. + Given HTML content (as a string or BeautifulSoup object), return only the concatenated text from all
elements. @@ -26,8 +29,7 @@ def custom_sitemap_parser_function(content: Union[str, BeautifulSoup]) -> str: def custom_sitemap_metadata_parser_function(meta: dict, _content: Any) -> dict: - """ - Given metadata and HTML content, extract the title from the first article and the first

element + """Given metadata and HTML content, extract the title from the first article and the first

element. Parameters ---------- diff --git a/libs/extractor-api-lib/tests/pdf_extractor_test.py b/libs/extractor-api-lib/tests/pdf_extractor_test.py index a353f8b4..7f0d559f 100644 --- a/libs/extractor-api-lib/tests/pdf_extractor_test.py +++ b/libs/extractor-api-lib/tests/pdf_extractor_test.py @@ -292,7 +292,6 @@ def test_extract_tables_from_text_page(self, pdf_extractor): @pytest.mark.integration def test_extract_text_from_scanned_page(self, pdf_extractor, test_pdf_files): """Test text extraction from scanned pages using OCR with real PDF.""" - # Use the actual scanned test PDF scanned_pdf_path = test_pdf_files["scanned"] @@ -551,8 +550,8 @@ async def test_end_to_end_extraction(self, pdf_extractor, test_pdf_files): text_count = sum(1 for elem in result if elem.type == ContentType.TEXT) table_count = sum(1 for elem in result if elem.type == ContentType.TABLE) - logger.info(f" Text elements: {text_count}") - logger.info(f" Table elements: {table_count}") + logger.info(" Text elements: %d", text_count) + logger.info(" Table elements: %d", table_count) # Verify metadata completeness for i, element in enumerate(result): diff --git a/libs/rag-core-api/src/rag_core_api/impl/api_endpoints/default_information_pieces_remover.py b/libs/rag-core-api/src/rag_core_api/impl/api_endpoints/default_information_pieces_remover.py index 735f98a8..94751c4c 100644 --- a/libs/rag-core-api/src/rag_core_api/impl/api_endpoints/default_information_pieces_remover.py +++ b/libs/rag-core-api/src/rag_core_api/impl/api_endpoints/default_information_pieces_remover.py @@ -49,7 +49,7 @@ def remove_information_piece(self, delete_request: DeleteRequest) -> None: for key_value_pair in delete_request.metadata: metadata["metadata." + key_value_pair.key] = json.loads(key_value_pair.value) except Exception as e: - logger.error("Error while parsing metadata: %s", e) + logger.exception("Error while parsing metadata") raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="Error while parsing metadata: %s" % e, @@ -61,8 +61,8 @@ def remove_information_piece(self, delete_request: DeleteRequest) -> None: ) try: self._vector_database.delete(metadata) - except Exception as e: - logger.error("Error while deleting from vector db: %s", e) + except Exception: + logger.exception("Error while deleting from vector db") raise HTTPException( status_code=status.HTTP_404_NOT_FOUND, detail="Error while deleting %s from vector db" % delete_request.metadata, diff --git a/libs/rag-core-api/src/rag_core_api/impl/evaluator/langfuse_ragas_evaluator.py b/libs/rag-core-api/src/rag_core_api/impl/evaluator/langfuse_ragas_evaluator.py index d933fdd5..b12ab298 100644 --- a/libs/rag-core-api/src/rag_core_api/impl/evaluator/langfuse_ragas_evaluator.py +++ b/libs/rag-core-api/src/rag_core_api/impl/evaluator/langfuse_ragas_evaluator.py @@ -141,8 +141,8 @@ async def aevaluate(self) -> None: try: evaluation_dataset = self._get_dataset(self._settings.evaluation_dataset_name) await self._aauto_answer_generation4evaluation_questions(evaluation_dataset) - except Exception as e: - logger.error("Failed to evaluate questions: %s", e) + except Exception: + logger.exception("Failed to evaluate questions") async def _aauto_answer_generation4evaluation_questions(self, dataset) -> tuple[int, Dataset]: session_id = str(uuid4()) @@ -166,8 +166,8 @@ async def _aevaluate_question(self, item, experiment_name: str, generation_time: try: response = await self._chat_endpoint.achat(config["metadata"]["session_id"], chat_request) - except Exception as e: - logger.info("Error while answering question %s: %s", item.input, e) + except Exception: + logger.exception("Error while answering question %s", item.input) response = None if response and response.citations: @@ -219,7 +219,7 @@ def _link_item2generation(self, item, generation, experiment_name, retries: int try: item.link(generation, experiment_name) except ApiError as e: - logger.warning("Failed to link item to generation: %s", e) + logger.exception("Failed to link item to generation") retries += 1 if retries > self.MAX_RETRIES: raise e diff --git a/libs/rag-core-api/src/rag_core_api/impl/graph/chat_graph.py b/libs/rag-core-api/src/rag_core_api/impl/graph/chat_graph.py index 73be5e39..07c7a9ee 100644 --- a/libs/rag-core-api/src/rag_core_api/impl/graph/chat_graph.py +++ b/libs/rag-core-api/src/rag_core_api/impl/graph/chat_graph.py @@ -236,7 +236,7 @@ async def _retrieve_node(self, state: dict) -> dict: self.FINISH_REASONS: ["NoOrEmptyCollectionError"], } except Exception as e: - logger.error("Error while searching for documents in vector database: %s", e) + logger.exception("Error while searching for documents in vector database") raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="Error while searching for documents in vector database: %s" % e, diff --git a/libs/rag-core-api/src/rag_core_api/impl/vector_databases/qdrant_database.py b/libs/rag-core-api/src/rag_core_api/impl/vector_databases/qdrant_database.py index aa99e89b..5a123d32 100644 --- a/libs/rag-core-api/src/rag_core_api/impl/vector_databases/qdrant_database.py +++ b/libs/rag-core-api/src/rag_core_api/impl/vector_databases/qdrant_database.py @@ -112,8 +112,8 @@ async def asearch(self, query: str, search_kwargs: dict, filter_kwargs: dict | N related_results += self._get_related(res.metadata["related"]) return results + related_results - except Exception as e: - logger.error(f"Search failed: {str(e)}") + except Exception: + logger.exception("Search failed") raise def get_specific_document(self, document_id: str) -> list[Document]: diff --git a/libs/rag-core-lib/src/rag_core_lib/impl/langfuse_manager/langfuse_manager.py b/libs/rag-core-lib/src/rag_core_lib/impl/langfuse_manager/langfuse_manager.py index 87095d7b..1daa6a08 100644 --- a/libs/rag-core-lib/src/rag_core_lib/impl/langfuse_manager/langfuse_manager.py +++ b/libs/rag-core-lib/src/rag_core_lib/impl/langfuse_manager/langfuse_manager.py @@ -82,7 +82,7 @@ def get_langfuse_prompt(self, base_prompt_name: str) -> Optional[TextPromptClien langfuse_prompt = self._langfuse.get_prompt(base_prompt_name, type="chat") return langfuse_prompt except NotFoundError: - logger.info(f"Prompt '{base_prompt_name}' not found in Langfuse. Creating new chat prompt.") + logger.info("Prompt '%s' not found in Langfuse. Creating new chat prompt.", base_prompt_name) local_prompt = self._managed_prompts[base_prompt_name] chat_messages = self._convert_chat_prompt_to_langfuse_format(local_prompt) @@ -103,11 +103,8 @@ def get_langfuse_prompt(self, base_prompt_name: str) -> Optional[TextPromptClien langfuse_prompt = self._langfuse.get_prompt(base_prompt_name, type="chat") return langfuse_prompt - except Exception as error: - logger.error( - f"Error occurred while getting prompt template from langfuse. Error: {error}", - extra={"error": error}, - ) + except Exception: + logger.exception("Error occurred while getting prompt template from langfuse") return None def get_base_llm(self, name: str) -> LLM: @@ -166,7 +163,7 @@ def get_base_prompt(self, name: str) -> ChatPromptTemplate: role = message[0] content = message[1] if len(message) > 1 else "" else: - logger.warning(f"Unexpected message format: {message}") + logger.warning("Unexpected message format: %s", message) continue if role == "system": diff --git a/libs/rag-core-lib/src/rag_core_lib/impl/llms/llm_factory.py b/libs/rag-core-lib/src/rag_core_lib/impl/llms/llm_factory.py index fe495624..be09f8f9 100644 --- a/libs/rag-core-lib/src/rag_core_lib/impl/llms/llm_factory.py +++ b/libs/rag-core-lib/src/rag_core_lib/impl/llms/llm_factory.py @@ -1,3 +1,5 @@ +"""Module for creating and managing Large Language Models (LLMs).""" + from pydantic_settings import BaseSettings from langchain.chat_models import init_chat_model from langchain.chat_models.base import _SUPPORTED_PROVIDERS diff --git a/libs/rag-core-lib/tests/chat_model_provider_test.py b/libs/rag-core-lib/tests/chat_model_provider_test.py index d6162510..c32992ad 100644 --- a/libs/rag-core-lib/tests/chat_model_provider_test.py +++ b/libs/rag-core-lib/tests/chat_model_provider_test.py @@ -1,8 +1,4 @@ -#!/usr/bin/env python3 -""" -Simple test script to verify the chat_model_provider function works correctly -with the new init_chat_model approach. -""" +"""Simple test script to verify the chat_model_provider function works correctly.""" import os import sys @@ -18,7 +14,6 @@ def test_chat_model_provider(): """Test that the chat_model_provider function creates a model correctly.""" - # Set up test environment variables os.environ["STACKIT_VLLM_API_KEY"] = "test_key" os.environ["STACKIT_VLLM_BASE_URL"] = "https://test.example.com/v1" diff --git a/services/rag-backend/chat_endpoint.py b/services/rag-backend/chat_endpoint.py index 2bab9a90..c3882517 100644 --- a/services/rag-backend/chat_endpoint.py +++ b/services/rag-backend/chat_endpoint.py @@ -1,3 +1,5 @@ +"""The Module for the use case chat endpoint implementation.""" + import logging from langchain_core.runnables import RunnableConfig @@ -10,7 +12,16 @@ class UseCaseChat(Chat): + """The class of the chat use case implementation.""" + def __init__(self, chat_graph: TracedRunnable): + """Initialize the use case chat. + + Parameters + ---------- + chat_graph : TracedRunnable + The chat graph to use for this use case. + """ self._chat_graph = chat_graph async def achat( @@ -18,6 +29,20 @@ async def achat( session_id: str, chat_request: ChatRequest, ) -> ChatResponse: + """Handle a chat request. + + Parameters + ---------- + session_id : str + The ID of the session. + chat_request : ChatRequest + The chat request to handle. + + Returns + ------- + ChatResponse + The response to the chat request. + """ config = RunnableConfig( tags=[], callbacks=None, diff --git a/services/rag-backend/container.py b/services/rag-backend/container.py index 0566e2b0..ee670ca7 100644 --- a/services/rag-backend/container.py +++ b/services/rag-backend/container.py @@ -1,5 +1,5 @@ -""" -This is an example of how to replace a dependency in the dependency container of the rag-core-api library. +"""An example of how to replace a dependency in the dependency container of the rag-core-api library. + If you replace a dependency keep in mind that the dependency you are replacing should have: 1. the same name and 2. the same base class @@ -19,4 +19,6 @@ @containers.copy(DependencyContainer) class UseCaseContainer(DependencyContainer): + """The container for use case dependencies.""" + chat_endpoint = Singleton(UseCaseChat, DependencyContainer.traced_chat_graph)