From b856f54c3069789b3eaf5d49b0fafad1e7641901 Mon Sep 17 00:00:00 2001 From: itsmvd Date: Tue, 25 Feb 2025 13:17:01 +0000 Subject: [PATCH 01/63] add response_schema support to ollama.py --- timesketch/lib/llms/providers/ollama.py | 93 +++++++++++++++++++++++++ 1 file changed, 93 insertions(+) create mode 100644 timesketch/lib/llms/providers/ollama.py diff --git a/timesketch/lib/llms/providers/ollama.py b/timesketch/lib/llms/providers/ollama.py new file mode 100644 index 0000000000..75d83c112b --- /dev/null +++ b/timesketch/lib/llms/providers/ollama.py @@ -0,0 +1,93 @@ +# Copyright 2025 Google Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""A LLM provider for the ollama server.""" +import json +import requests +from typing import Optional + +from timesketch.lib.llms.providers import interface +from timesketch.lib.llms.providers import manager + + +class Ollama(interface.LLMProvider): + """A LLM provider for the ollama server.""" + + NAME = "ollama" + + def _post(self, request_body: str) -> requests.Response: + """ + Make a POST request to the ollama server. + + Args: + request_body: The body of the request in JSON format. + + Returns: + The response from the server as a dictionary. + """ + api_resource = "/api/chat" + url = self.config.get("server_url") + api_resource + return requests.post(url, data=request_body) + + def generate(self, prompt: str, response_schema: Optional[dict] = None) -> str: + """ + Generate text using the ollama server, optionally with a JSON schema. + + Args: + prompt: The prompt to use for the generation. + response_schema: An optional JSON schema to define the expected + response format. + + Returns: + The generated text as a string (or parsed data if + response_schema is provided). + """ + request_body = { + "messages": [{"role": "user", "content": prompt}], + "model": self.config.get("model"), + "stream": False, # Force to false, streaming not available with /api/chat endpoint + "options": { + "temperature": self.config.get("temperature"), + "num_predict": self.config.get("max_output_tokens"), + "top_p": self.config.get("top_p"), + "top_k": self.config.get("top_k"), + }, + } + + if response_schema: + request_body["format"] = response_schema + + response = self._post(json.dumps(request_body)) + + if response.status_code != 200: + raise ValueError(f"Error generating text: {response.text}") + + try: + text_response = response.json().get("content", "").strip() + if response_schema: + return json.loads(text_response) + + return text_response + + except json.JSONDecodeError as error: + raise ValueError( + f"Error JSON parsing text: {text_response}: {error}" + ) from error + + except Exception as error: + raise ValueError( + f"An unexpected error occurred: {error}" + ) from error + + +manager.LLMManager.register_provider(Ollama) From 5debf0f999ed3038a07c3616456e06b17537f151 Mon Sep 17 00:00:00 2001 From: itsmvd Date: Tue, 25 Feb 2025 15:46:59 +0000 Subject: [PATCH 02/63] Create separate llm provider directory, add response_schema to ollama provider --- timesketch/api/v1/resources/llm_summarize.py | 7 +- timesketch/api/v1/resources/nl2q.py | 2 +- timesketch/api/v1/resources_test.py | 6 +- timesketch/lib/llms/ollama.py | 72 ------------------- .../lib/llms/{ => providers}/__init__.py | 8 +-- .../lib/llms/{ => providers}/aistudio.py | 6 +- .../lib/llms/{ => providers}/interface.py | 0 .../lib/llms/{ => providers}/manager.py | 3 +- .../lib/llms/{ => providers}/manager_test.py | 2 +- timesketch/lib/llms/providers/ollama.py | 48 ++++++------- .../lib/llms/{ => providers}/vertexai.py | 4 +- 11 files changed, 43 insertions(+), 115 deletions(-) delete mode 100644 timesketch/lib/llms/ollama.py rename timesketch/lib/llms/{ => providers}/__init__.py (76%) rename timesketch/lib/llms/{ => providers}/aistudio.py (95%) rename timesketch/lib/llms/{ => providers}/interface.py (100%) rename timesketch/lib/llms/{ => providers}/manager.py (98%) rename timesketch/lib/llms/{ => providers}/manager_test.py (99%) rename timesketch/lib/llms/{ => providers}/vertexai.py (96%) diff --git a/timesketch/api/v1/resources/llm_summarize.py b/timesketch/api/v1/resources/llm_summarize.py index 0c18441b56..5aa37657f1 100644 --- a/timesketch/api/v1/resources/llm_summarize.py +++ b/timesketch/api/v1/resources/llm_summarize.py @@ -28,7 +28,8 @@ from flask_restful import Resource from timesketch.api.v1 import resources, export -from timesketch.lib import definitions, llms, utils +from timesketch.lib import definitions, utils +from timesketch.lib.llms.providers import manager from timesketch.lib.definitions import METRICS_NAMESPACE from timesketch.models.sketch import Sketch @@ -304,8 +305,8 @@ def _get_content( configured LLM provider """ try: - feature_name = "llm_summarization" - llm = llms.manager.LLMManager.create_provider(feature_name=feature_name) + feature_name = "llm_summarize" + llm = manager.LLMManager.create_provider(feature_name=feature_name) except Exception as e: # pylint: disable=broad-except logger.error("Error LLM Provider: %s", e) abort( diff --git a/timesketch/api/v1/resources/nl2q.py b/timesketch/api/v1/resources/nl2q.py index d016a768f7..5ed533e956 100644 --- a/timesketch/api/v1/resources/nl2q.py +++ b/timesketch/api/v1/resources/nl2q.py @@ -26,7 +26,7 @@ import pandas as pd from timesketch.api.v1 import utils -from timesketch.lib.llms import manager +from timesketch.lib.llms.providers import manager from timesketch.lib.definitions import HTTP_STATUS_CODE_BAD_REQUEST from timesketch.lib.definitions import HTTP_STATUS_CODE_INTERNAL_SERVER_ERROR from timesketch.lib.definitions import HTTP_STATUS_CODE_NOT_FOUND diff --git a/timesketch/api/v1/resources_test.py b/timesketch/api/v1/resources_test.py index a964fad50a..7044bb2250 100644 --- a/timesketch/api/v1/resources_test.py +++ b/timesketch/api/v1/resources_test.py @@ -1198,7 +1198,7 @@ class TestNl2qResource(BaseTest): resource_url = "/api/v1/sketches/1/nl2q/" - @mock.patch("timesketch.lib.llms.manager.LLMManager.create_provider") + @mock.patch("timesketch.lib.llms.provider.manager.LLMManager.create_provider") @mock.patch("timesketch.api.v1.utils.run_aggregator") @mock.patch("timesketch.api.v1.resources.OpenSearchDataStore", MockDataStore) def test_nl2q_prompt(self, mock_aggregator, mock_create_provider): @@ -1380,7 +1380,7 @@ def test_nl2q_no_permission(self): ) self.assertEqual(response.status_code, HTTP_STATUS_CODE_FORBIDDEN) - @mock.patch("timesketch.lib.llms.manager.LLMManager.create_provider") + @mock.patch("timesketch.lib.llms.provider.manager.LLMManager.create_provider") @mock.patch("timesketch.api.v1.utils.run_aggregator") @mock.patch("timesketch.api.v1.resources.OpenSearchDataStore", MockDataStore) def test_nl2q_llm_error(self, mock_aggregator, mock_create_provider): @@ -1584,7 +1584,7 @@ def test_llm_summarize_no_events(self): ) @mock.patch("timesketch.api.v1.resources.OpenSearchDataStore", MockDataStore) - @mock.patch("timesketch.lib.llms.manager.LLMManager.create_provider") + @mock.patch("timesketch.lib.llms.provider.manager.LLMManager.create_provider") def test_llm_summarize_with_events(self, mock_create_provider): """Test LLM summarizer with events returned and mock LLM.""" self.login() diff --git a/timesketch/lib/llms/ollama.py b/timesketch/lib/llms/ollama.py deleted file mode 100644 index 365716b580..0000000000 --- a/timesketch/lib/llms/ollama.py +++ /dev/null @@ -1,72 +0,0 @@ -# Copyright 2024 Google Inc. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""A LLM provider for the ollama server.""" -import json -import requests - -from timesketch.lib.llms import interface -from timesketch.lib.llms import manager - - -class Ollama(interface.LLMProvider): - """A LLM provider for the ollama server.""" - - NAME = "ollama" - - def _post(self, request_body: str) -> requests.Response: - """ - Make a POST request to the ollama server. - - Args: - request_body: The body of the request in JSON format. - - Returns: - The response from the server as a dictionary. - """ - api_resource = "/api/generate/" - url = self.config.get("server_url") + api_resource - return requests.post(url, data=request_body) - - def generate(self, prompt: str) -> str: - """ - Generate text using the ollama server. - - Args: - prompt: The prompt to use for the generation. - temperature: The temperature to use for the generation. - stream: Whether to stream the generation or not. - - Raises: - ValueError: If the generation fails. - - Returns: - The generated text as a string. - """ - request_body = { - "prompt": prompt, - "model": self.config.get("model"), - "stream": self.config.get("stream"), - "options": { - "temperature": self.config.get("temperature"), - "num_predict": self.config.get("max_output_tokens"), - }, - } - response = self._post(json.dumps(request_body)) - if response.status_code != 200: - raise ValueError(f"Error generating text: {response.text}") - - return response.json().get("response", "").strip() - - -manager.LLMManager.register_provider(Ollama) diff --git a/timesketch/lib/llms/__init__.py b/timesketch/lib/llms/providers/__init__.py similarity index 76% rename from timesketch/lib/llms/__init__.py rename to timesketch/lib/llms/providers/__init__.py index bb52e18d42..f92027460b 100644 --- a/timesketch/lib/llms/__init__.py +++ b/timesketch/lib/llms/providers/__init__.py @@ -11,8 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""LLM module for Timesketch.""" +"""LLM providers for Timesketch.""" -from timesketch.lib.llms import ollama -from timesketch.lib.llms import vertexai -from timesketch.lib.llms import aistudio +from timesketch.lib.llms.providers import ollama +from timesketch.lib.llms.providers import vertexai +from timesketch.lib.llms.providers import aistudio diff --git a/timesketch/lib/llms/aistudio.py b/timesketch/lib/llms/providers/aistudio.py similarity index 95% rename from timesketch/lib/llms/aistudio.py rename to timesketch/lib/llms/providers/aistudio.py index 77b6502efa..df7d5ca1bb 100644 --- a/timesketch/lib/llms/aistudio.py +++ b/timesketch/lib/llms/providers/aistudio.py @@ -1,4 +1,4 @@ -# Copyright 2024 Google Inc. All rights reserved. +# Copyright 2025 Google Inc. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -15,8 +15,8 @@ import json from typing import Optional -from timesketch.lib.llms import interface -from timesketch.lib.llms import manager +from timesketch.lib.llms.providers import interface +from timesketch.lib.llms.providers import manager # Check if the required dependencies are installed. diff --git a/timesketch/lib/llms/interface.py b/timesketch/lib/llms/providers/interface.py similarity index 100% rename from timesketch/lib/llms/interface.py rename to timesketch/lib/llms/providers/interface.py diff --git a/timesketch/lib/llms/manager.py b/timesketch/lib/llms/providers/manager.py similarity index 98% rename from timesketch/lib/llms/manager.py rename to timesketch/lib/llms/providers/manager.py index 5412abcec6..6bb3757d1d 100644 --- a/timesketch/lib/llms/manager.py +++ b/timesketch/lib/llms/providers/manager.py @@ -14,7 +14,7 @@ """This file contains a class for managing Large Language Model (LLM) providers.""" from flask import current_app -from timesketch.lib.llms.interface import LLMProvider +from timesketch.lib.llms.providers.interface import LLMProvider class LLMManager: @@ -80,7 +80,6 @@ def create_provider(cls, feature_name: str = None, **kwargs) -> LLMProvider: raise ValueError( "Configuration for the feature must specify exactly one provider." ) - provider_name = next(iter(config_mapping)) provider_config = config_mapping[provider_name] diff --git a/timesketch/lib/llms/manager_test.py b/timesketch/lib/llms/providers/manager_test.py similarity index 99% rename from timesketch/lib/llms/manager_test.py rename to timesketch/lib/llms/providers/manager_test.py index c850b6a75c..af5b5f4e95 100644 --- a/timesketch/lib/llms/manager_test.py +++ b/timesketch/lib/llms/providers/manager_test.py @@ -14,7 +14,7 @@ """Tests for LLM provider manager.""" from timesketch.lib.testlib import BaseTest -from timesketch.lib.llms import manager +from timesketch.lib.llms.providers import manager class MockAistudioProvider: diff --git a/timesketch/lib/llms/providers/ollama.py b/timesketch/lib/llms/providers/ollama.py index 75d83c112b..bbb6795887 100644 --- a/timesketch/lib/llms/providers/ollama.py +++ b/timesketch/lib/llms/providers/ollama.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""A LLM provider for the ollama server.""" +"""A LLM provider for the Ollama server.""" import json import requests from typing import Optional @@ -21,27 +21,29 @@ class Ollama(interface.LLMProvider): - """A LLM provider for the ollama server.""" + """A LLM provider for the Ollama server.""" NAME = "ollama" def _post(self, request_body: str) -> requests.Response: """ - Make a POST request to the ollama server. + Make a POST request to the Ollama server. Args: request_body: The body of the request in JSON format. Returns: - The response from the server as a dictionary. + The response from the server as a requests.Response object. """ api_resource = "/api/chat" url = self.config.get("server_url") + api_resource - return requests.post(url, data=request_body) + return requests.post( + url, data=request_body, headers={"Content-Type": "application/json"} + ) def generate(self, prompt: str, response_schema: Optional[dict] = None) -> str: """ - Generate text using the ollama server, optionally with a JSON schema. + Generate text using the Ollama server, optionally with a JSON schema. Args: prompt: The prompt to use for the generation. @@ -49,13 +51,15 @@ def generate(self, prompt: str, response_schema: Optional[dict] = None) -> str: response format. Returns: - The generated text as a string (or parsed data if - response_schema is provided). + The generated text as a string (or parsed data if response_schema is provided). + + Raises: + ValueError: If the request fails or JSON parsing fails. """ request_body = { "messages": [{"role": "user", "content": prompt}], "model": self.config.get("model"), - "stream": False, # Force to false, streaming not available with /api/chat endpoint + "stream": self.config.get("stream"), "options": { "temperature": self.config.get("temperature"), "num_predict": self.config.get("max_output_tokens"), @@ -72,22 +76,18 @@ def generate(self, prompt: str, response_schema: Optional[dict] = None) -> str: if response.status_code != 200: raise ValueError(f"Error generating text: {response.text}") - try: - text_response = response.json().get("content", "").strip() - if response_schema: + response_data = response.json() + text_response = response_data.get("message", {}).get("content", "").strip() + + if response_schema: + try: return json.loads(text_response) - - return text_response - - except json.JSONDecodeError as error: - raise ValueError( - f"Error JSON parsing text: {text_response}: {error}" - ) from error - - except Exception as error: - raise ValueError( - f"An unexpected error occurred: {error}" - ) from error + except json.JSONDecodeError as error: + raise ValueError( + f"Error JSON parsing text: {text_response}: {error}" + ) from error + + return text_response manager.LLMManager.register_provider(Ollama) diff --git a/timesketch/lib/llms/vertexai.py b/timesketch/lib/llms/providers/vertexai.py similarity index 96% rename from timesketch/lib/llms/vertexai.py rename to timesketch/lib/llms/providers/vertexai.py index e4f25f7f7e..123bbdd39e 100644 --- a/timesketch/lib/llms/vertexai.py +++ b/timesketch/lib/llms/providers/vertexai.py @@ -16,8 +16,8 @@ import json from typing import Optional -from timesketch.lib.llms import interface -from timesketch.lib.llms import manager +from timesketch.lib.llms.providers import interface +from timesketch.lib.llms.providers import manager # Check if the required dependencies are installed. has_required_deps = True From 70d06991938b9f777c3b4632c7e4612820475e47 Mon Sep 17 00:00:00 2001 From: itsmvd Date: Tue, 25 Feb 2025 15:50:24 +0000 Subject: [PATCH 03/63] Update timesketch.conf --- data/timesketch.conf | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/data/timesketch.conf b/data/timesketch.conf index 0d9a47164c..1df853cc12 100644 --- a/data/timesketch.conf +++ b/data/timesketch.conf @@ -379,16 +379,16 @@ LLM_PROVIDER_CONFIGS = { 'project_id': '', }, }, - 'llm_summarization': { + 'llm_summarize': { 'aistudio': { 'model': 'gemini-2.0-flash-exp', 'project_id': '', }, }, 'default': { - 'aistudio': { - 'api_key': '', - 'model': 'gemini-2.0-flash-exp', + 'ollama': { + 'server_url': 'http://ollama:11434', + 'model': 'gemma:7b', }, } } From 59ce086c88c7eb4c23499edf194fa4b1d955e686 Mon Sep 17 00:00:00 2001 From: itsmvd Date: Tue, 25 Feb 2025 15:53:20 +0000 Subject: [PATCH 04/63] solve naming conflict --- timesketch/api/v1/resources/llm_summarize.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/timesketch/api/v1/resources/llm_summarize.py b/timesketch/api/v1/resources/llm_summarize.py index 5aa37657f1..a5ecebc3b6 100644 --- a/timesketch/api/v1/resources/llm_summarize.py +++ b/timesketch/api/v1/resources/llm_summarize.py @@ -29,7 +29,7 @@ from timesketch.api.v1 import resources, export from timesketch.lib import definitions, utils -from timesketch.lib.llms.providers import manager +from timesketch.lib.llms.providers import manager as provider_manager from timesketch.lib.definitions import METRICS_NAMESPACE from timesketch.models.sketch import Sketch @@ -306,7 +306,7 @@ def _get_content( """ try: feature_name = "llm_summarize" - llm = manager.LLMManager.create_provider(feature_name=feature_name) + llm = provider_manager.LLMManager.create_provider(feature_name=feature_name) except Exception as e: # pylint: disable=broad-except logger.error("Error LLM Provider: %s", e) abort( From 9e2c294a796c12a4978dc1ffe22e103647687ccf Mon Sep 17 00:00:00 2001 From: itsmvd Date: Tue, 25 Feb 2025 16:06:13 +0000 Subject: [PATCH 05/63] fix typo --- timesketch/api/v1/resources_test.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/timesketch/api/v1/resources_test.py b/timesketch/api/v1/resources_test.py index 7044bb2250..5396b010ab 100644 --- a/timesketch/api/v1/resources_test.py +++ b/timesketch/api/v1/resources_test.py @@ -1198,7 +1198,7 @@ class TestNl2qResource(BaseTest): resource_url = "/api/v1/sketches/1/nl2q/" - @mock.patch("timesketch.lib.llms.provider.manager.LLMManager.create_provider") + @mock.patch("timesketch.lib.llms.providers.manager.LLMManager.create_provider") @mock.patch("timesketch.api.v1.utils.run_aggregator") @mock.patch("timesketch.api.v1.resources.OpenSearchDataStore", MockDataStore) def test_nl2q_prompt(self, mock_aggregator, mock_create_provider): @@ -1380,7 +1380,7 @@ def test_nl2q_no_permission(self): ) self.assertEqual(response.status_code, HTTP_STATUS_CODE_FORBIDDEN) - @mock.patch("timesketch.lib.llms.provider.manager.LLMManager.create_provider") + @mock.patch("timesketch.lib.llms.providers.manager.LLMManager.create_provider") @mock.patch("timesketch.api.v1.utils.run_aggregator") @mock.patch("timesketch.api.v1.resources.OpenSearchDataStore", MockDataStore) def test_nl2q_llm_error(self, mock_aggregator, mock_create_provider): @@ -1584,7 +1584,7 @@ def test_llm_summarize_no_events(self): ) @mock.patch("timesketch.api.v1.resources.OpenSearchDataStore", MockDataStore) - @mock.patch("timesketch.lib.llms.provider.manager.LLMManager.create_provider") + @mock.patch("timesketch.lib.llms.providers.manager.LLMManager.create_provider") def test_llm_summarize_with_events(self, mock_create_provider): """Test LLM summarizer with events returned and mock LLM.""" self.login() From 5f252a94be82bc960c144eabcbbc4d7a74777a86 Mon Sep 17 00:00:00 2001 From: itsmvd Date: Tue, 25 Feb 2025 17:17:35 +0000 Subject: [PATCH 06/63] Add an __init__ file to the timsketch/lib/llms folder --- timesketch/lib/llms/__init__.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 timesketch/lib/llms/__init__.py diff --git a/timesketch/lib/llms/__init__.py b/timesketch/lib/llms/__init__.py new file mode 100644 index 0000000000..0242820fb1 --- /dev/null +++ b/timesketch/lib/llms/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2025 Google Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""LLM libraries for Timesketch.""" From c0401596592a12f937709623997b51f1aa7bd2ff Mon Sep 17 00:00:00 2001 From: itsmvd Date: Tue, 25 Feb 2025 17:24:27 +0000 Subject: [PATCH 07/63] lint fix ollama --- timesketch/lib/llms/providers/ollama.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/timesketch/lib/llms/providers/ollama.py b/timesketch/lib/llms/providers/ollama.py index bbb6795887..42481e7aee 100644 --- a/timesketch/lib/llms/providers/ollama.py +++ b/timesketch/lib/llms/providers/ollama.py @@ -12,9 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. """A LLM provider for the Ollama server.""" +from typing import Optional import json import requests -from typing import Optional from timesketch.lib.llms.providers import interface from timesketch.lib.llms.providers import manager @@ -51,7 +51,8 @@ def generate(self, prompt: str, response_schema: Optional[dict] = None) -> str: response format. Returns: - The generated text as a string (or parsed data if response_schema is provided). + The generated text as a string (or parsed data if + response_schema is provided). Raises: ValueError: If the request fails or JSON parsing fails. From 9ab391ed42b744294d2fae940ba88282ead3ab58 Mon Sep 17 00:00:00 2001 From: itsmvd Date: Wed, 26 Feb 2025 09:48:28 +0000 Subject: [PATCH 08/63] Improve fallback mechanism for LLM configs --- timesketch/lib/llms/providers/manager.py | 20 +++++++++++++----- timesketch/lib/llms/providers/manager_test.py | 21 +++++++++++++++++++ 2 files changed, 36 insertions(+), 5 deletions(-) diff --git a/timesketch/lib/llms/providers/manager.py b/timesketch/lib/llms/providers/manager.py index 6bb3757d1d..3dfc4705aa 100644 --- a/timesketch/lib/llms/providers/manager.py +++ b/timesketch/lib/llms/providers/manager.py @@ -63,7 +63,7 @@ def create_provider(cls, feature_name: str = None, **kwargs) -> LLMProvider: """ Create an instance of the provider for the given feature. - If a configuration exists for the feature in + If a valid configuration exists for the feature in current_app.config["LLM_PROVIDER_CONFIGS"], use it; otherwise, fall back to the configuration under the "default" key. @@ -71,14 +71,24 @@ def create_provider(cls, feature_name: str = None, **kwargs) -> LLMProvider: the provider name. """ llm_configs = current_app.config.get("LLM_PROVIDER_CONFIGS", {}) + if feature_name and feature_name in llm_configs: config_mapping = llm_configs[feature_name] - else: - config_mapping = llm_configs.get("default") - + if config_mapping and len(config_mapping) == 1: + provider_name = next(iter(config_mapping)) + provider_config = config_mapping[provider_name] + provider_class = cls.get_provider(provider_name) + # Check that provider specifies required fields + try: + return provider_class(config=provider_config, **kwargs) + except ValueError: + pass # Fallback to default provider + + # Fallback to default config + config_mapping = llm_configs.get("default") if not config_mapping or len(config_mapping) != 1: raise ValueError( - "Configuration for the feature must specify exactly one provider." + "Default configuration must specify exactly one provider." ) provider_name = next(iter(config_mapping)) provider_config = config_mapping[provider_name] diff --git a/timesketch/lib/llms/providers/manager_test.py b/timesketch/lib/llms/providers/manager_test.py index af5b5f4e95..6db3f6b3ce 100644 --- a/timesketch/lib/llms/providers/manager_test.py +++ b/timesketch/lib/llms/providers/manager_test.py @@ -144,3 +144,24 @@ def test_create_provider_missing_config(self): self.app.config["LLM_PROVIDER_CONFIGS"] = {} with self.assertRaises(ValueError): manager.LLMManager.create_provider() + + def test_create_provider_empty_feature_fallback(self): + """Test that create_provider falls back to default when feature config is empty.""" + self.app.config["LLM_PROVIDER_CONFIGS"] = { + "llm_summarize": {}, # Empty feature config + "default": { + "aistudio": { + "api_key": "AIzaSyTestDefaultKey", + "model": "gemini-2.0-flash-exp", + } + }, + } + provider_instance = manager.LLMManager.create_provider(feature_name="llm_summarize") + self.assertIsInstance(provider_instance, MockAistudioProvider) + self.assertEqual( + provider_instance.config, + { + "api_key": "AIzaSyTestDefaultKey", + "model": "gemini-2.0-flash-exp", + }, + ) \ No newline at end of file From 5d4746a22e9832455100adc89b66782aeded08bd Mon Sep 17 00:00:00 2001 From: itsmvd Date: Wed, 26 Feb 2025 09:51:16 +0000 Subject: [PATCH 09/63] formatting --- timesketch/lib/llms/providers/manager.py | 6 ++---- timesketch/lib/llms/providers/manager_test.py | 6 ++++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/timesketch/lib/llms/providers/manager.py b/timesketch/lib/llms/providers/manager.py index 3dfc4705aa..7cfc0a7574 100644 --- a/timesketch/lib/llms/providers/manager.py +++ b/timesketch/lib/llms/providers/manager.py @@ -71,7 +71,7 @@ def create_provider(cls, feature_name: str = None, **kwargs) -> LLMProvider: the provider name. """ llm_configs = current_app.config.get("LLM_PROVIDER_CONFIGS", {}) - + if feature_name and feature_name in llm_configs: config_mapping = llm_configs[feature_name] if config_mapping and len(config_mapping) == 1: @@ -87,9 +87,7 @@ def create_provider(cls, feature_name: str = None, **kwargs) -> LLMProvider: # Fallback to default config config_mapping = llm_configs.get("default") if not config_mapping or len(config_mapping) != 1: - raise ValueError( - "Default configuration must specify exactly one provider." - ) + raise ValueError("Default configuration must specify exactly one provider.") provider_name = next(iter(config_mapping)) provider_config = config_mapping[provider_name] diff --git a/timesketch/lib/llms/providers/manager_test.py b/timesketch/lib/llms/providers/manager_test.py index 6db3f6b3ce..ceb7267100 100644 --- a/timesketch/lib/llms/providers/manager_test.py +++ b/timesketch/lib/llms/providers/manager_test.py @@ -156,7 +156,9 @@ def test_create_provider_empty_feature_fallback(self): } }, } - provider_instance = manager.LLMManager.create_provider(feature_name="llm_summarize") + provider_instance = manager.LLMManager.create_provider( + feature_name="llm_summarize" + ) self.assertIsInstance(provider_instance, MockAistudioProvider) self.assertEqual( provider_instance.config, @@ -164,4 +166,4 @@ def test_create_provider_empty_feature_fallback(self): "api_key": "AIzaSyTestDefaultKey", "model": "gemini-2.0-flash-exp", }, - ) \ No newline at end of file + ) From 390cd091ab5a60e7e61b49904c186938153c50dd Mon Sep 17 00:00:00 2001 From: itsmvd Date: Wed, 26 Feb 2025 09:57:13 +0000 Subject: [PATCH 10/63] format fix 2 --- timesketch/lib/llms/providers/manager_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/timesketch/lib/llms/providers/manager_test.py b/timesketch/lib/llms/providers/manager_test.py index ceb7267100..09902faa0f 100644 --- a/timesketch/lib/llms/providers/manager_test.py +++ b/timesketch/lib/llms/providers/manager_test.py @@ -146,7 +146,7 @@ def test_create_provider_missing_config(self): manager.LLMManager.create_provider() def test_create_provider_empty_feature_fallback(self): - """Test that create_provider falls back to default when feature config is empty.""" + """Test that create_provider falls back to default when feature config empty.""" self.app.config["LLM_PROVIDER_CONFIGS"] = { "llm_summarize": {}, # Empty feature config "default": { From ad4d70b303a74c5cba57d9a14bbf287fa596468e Mon Sep 17 00:00:00 2001 From: itsmvd Date: Wed, 26 Feb 2025 13:13:06 +0000 Subject: [PATCH 11/63] Add LLM features manager and interface --- timesketch/lib/llms/features/__init__.py | 16 +++ timesketch/lib/llms/features/interface.py | 53 +++++++ timesketch/lib/llms/features/manager.py | 65 +++++++++ timesketch/lib/llms/features/manager_test.py | 142 +++++++++++++++++++ 4 files changed, 276 insertions(+) create mode 100644 timesketch/lib/llms/features/__init__.py create mode 100644 timesketch/lib/llms/features/interface.py create mode 100644 timesketch/lib/llms/features/manager.py create mode 100644 timesketch/lib/llms/features/manager_test.py diff --git a/timesketch/lib/llms/features/__init__.py b/timesketch/lib/llms/features/__init__.py new file mode 100644 index 0000000000..6a8e4caaf4 --- /dev/null +++ b/timesketch/lib/llms/features/__init__.py @@ -0,0 +1,16 @@ +# Copyright 2025 Google Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""LLM features for Timesketch.""" + +from timesketch.lib.llms.features import manager diff --git a/timesketch/lib/llms/features/interface.py b/timesketch/lib/llms/features/interface.py new file mode 100644 index 0000000000..10317fe014 --- /dev/null +++ b/timesketch/lib/llms/features/interface.py @@ -0,0 +1,53 @@ +# Copyright 2025 Google Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Interface for LLM features.""" + +from typing import Any, Optional +from abc import ABC, abstractmethod +from timesketch.models.sketch import Sketch + + +class LLMFeatureInterface(ABC): + """Interface for LLM features.""" + + NAME: str = "llm_feature_interface" # Must be overridden in subclasses + RESPONSE_SCHEMA: Optional[dict[str, Any]] = None + + @abstractmethod + def generate_prompt(self, sketch: Sketch, **kwargs: Any) -> str: + """Generates a prompt for the LLM. + + Args: + sketch_id: The ID of the sketch. + kwargs: Feature-specific keyword arguments for prompt generation. + + Returns: + The generated prompt string. + """ + raise NotImplementedError() + + @abstractmethod + def process_response(self, llm_response: str, **kwargs: Any) -> dict[str, Any]: + """Processes the raw LLM response. + + Args: + llm_response: The raw string response from the LLM provider. + kwargs: Feature-specific arguments. + + Returns: + A dictionary containing the processed response data, suitable for + returning from the API. Must include a "response" key with the + main result, and can optionally include other metadata. + """ + raise NotImplementedError() diff --git a/timesketch/lib/llms/features/manager.py b/timesketch/lib/llms/features/manager.py new file mode 100644 index 0000000000..70bfac4836 --- /dev/null +++ b/timesketch/lib/llms/features/manager.py @@ -0,0 +1,65 @@ +# Copyright 2025 Google Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Manager for LLM features.""" + +import logging +from timesketch.lib.llms.features.interface import LLMFeatureInterface + +logger = logging.getLogger("timesketch.llm.manager") + + +class FeatureManager: + """The manager for LLM features.""" + + _feature_registry = {} + + @classmethod + def register_feature(cls, feature_class: type[LLMFeatureInterface]): + """Register an LLM feature class.""" + feature_name = feature_class.NAME.lower() + if feature_name in cls._feature_registry: + raise ValueError(f"LLM Feature {feature_class.NAME} already registered") + cls._feature_registry[feature_name] = feature_class + # Optional: Add logging here + + @classmethod + def get_feature(cls, feature_name: str) -> type[LLMFeatureInterface]: + """Get a feature class by name.""" + try: + return cls._feature_registry[feature_name.lower()] + except KeyError as no_such_feature: + raise KeyError( + f"No such LLM feature: {feature_name.lower()}" + ) from no_such_feature + + @classmethod + def get_features(cls): + """Get all registered features. + + Yields: + A tuple of (feature_name, feature_class) + """ + for feature_name, feature_class in cls._feature_registry.items(): + yield feature_name, feature_class + + @classmethod + def get_feature_instance(cls, feature_name: str) -> LLMFeatureInterface: + """Get an instance of a feature by name.""" + feature_class = cls.get_feature(feature_name) + return feature_class() + + @classmethod + def clear_registration(cls): + """Clear all registered features.""" + cls._feature_registry = {} diff --git a/timesketch/lib/llms/features/manager_test.py b/timesketch/lib/llms/features/manager_test.py new file mode 100644 index 0000000000..2e053e7199 --- /dev/null +++ b/timesketch/lib/llms/features/manager_test.py @@ -0,0 +1,142 @@ +# Copyright 2025 Google Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for LLM feature manager.""" + +from typing import Any +from timesketch.lib.testlib import BaseTest +from timesketch.lib.llms.features import manager +from timesketch.models.sketch import Sketch + + +class MockSummarizeFeature: + """A mock LLM summarize feature.""" + + NAME = "llm_summarize" + + def generate_prompt(self, _sketch: Sketch, **_kwargs: Any) -> str: + """Mock implementation of generate_prompt.""" + return "Summarize these events." + + def process_response(self, llm_response: str, **kwargs: Any) -> dict[str, Any]: + """Mock implementation of process_response.""" + return {"response": f"Summary: {llm_response}"} + + +class MockNl2qFeature: + """A mock Natural Language to Query feature.""" + + NAME = "nl2q" + + def generate_prompt(self, _sketch: Sketch, **_kwargs: Any) -> str: + """Mock implementation of generate_prompt.""" + return "Convert this question to a query." + + def process_response(self, llm_response: str, **_kwargs: Any) -> dict[str, Any]: + """Mock implementation of process_response.""" + return {"response": f"Query: {llm_response}"} + + +class TestFeatureManager(BaseTest): + """Tests for the functionality of the FeatureManager module.""" + + def setUp(self) -> None: + super().setUp() + manager.FeatureManager.clear_registration() + manager.FeatureManager.register_feature(MockSummarizeFeature) + manager.FeatureManager.register_feature(MockNl2qFeature) + + def tearDown(self) -> None: + manager.FeatureManager.clear_registration() + super().tearDown() + + def test_get_features(self): + """Test that get_features returns the registered features.""" + features = manager.FeatureManager.get_features() + feature_list = list(features) + self.assertIsInstance(feature_list, list) + + found_summarize = any( + feature_name == "llm_summarize" and feature_class == MockSummarizeFeature + for feature_name, feature_class in feature_list + ) + found_nl2q = any( + feature_name == "nl2q" and feature_class == MockNl2qFeature + for feature_name, feature_class in feature_list + ) + self.assertTrue(found_summarize, "LLM Summarize feature not found.") + self.assertTrue(found_nl2q, "NL2Q feature not found.") + + def test_get_feature(self): + """Test retrieval of a feature class from the registry.""" + feature_class = manager.FeatureManager.get_feature("llm_summarize") + self.assertEqual(feature_class, MockSummarizeFeature) + + feature_class = manager.FeatureManager.get_feature("LLM_SUMMARIZE") + self.assertEqual(feature_class, MockSummarizeFeature) + + self.assertRaises( + KeyError, manager.FeatureManager.get_feature, "no_such_feature" + ) + + def test_register_feature(self): + """Test that re-registering an already registered feature raises ValueError.""" + self.assertRaises( + ValueError, manager.FeatureManager.register_feature, MockSummarizeFeature + ) + + def test_get_feature_instance(self): + """Test get_feature_instance creates the correct feature instance.""" + feature_instance = manager.FeatureManager.get_feature_instance("llm_summarize") + self.assertIsInstance(feature_instance, MockSummarizeFeature) + + feature_instance = manager.FeatureManager.get_feature_instance("nl2q") + self.assertIsInstance(feature_instance, MockNl2qFeature) + + self.assertRaises( + KeyError, manager.FeatureManager.get_feature_instance, "no_such_feature" + ) + + def test_feature_methods(self): + """Test that feature methods work correctly.""" + summarize_instance = manager.FeatureManager.get_feature_instance( + "llm_summarize" + ) + nl2q_instance = manager.FeatureManager.get_feature_instance("nl2q") + + sketch = None + + self.assertEqual( + summarize_instance.generate_prompt(sketch), "Summarize these events." + ) + self.assertEqual( + nl2q_instance.generate_prompt(sketch), "Convert this question to a query." + ) + + self.assertEqual( + summarize_instance.process_response("Test events"), + {"response": "Summary: Test events"}, + ) + self.assertEqual( + nl2q_instance.process_response("timestamp:*"), + {"response": "Query: timestamp:*"}, + ) + + def test_clear_registration(self): + """Test clear_registration removes all registered features.""" + self.assertEqual(len(list(manager.FeatureManager.get_features())), 2) + + manager.FeatureManager.clear_registration() + + self.assertEqual(len(list(manager.FeatureManager.get_features())), 0) + self.assertRaises(KeyError, manager.FeatureManager.get_feature, "llm_summarize") From aa267cc7390dbcea3863b93ba2540f6c8a2e24b0 Mon Sep 17 00:00:00 2001 From: itsmvd Date: Wed, 26 Feb 2025 14:51:02 +0000 Subject: [PATCH 12/63] linter fix --- timesketch/lib/llms/features/manager_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/timesketch/lib/llms/features/manager_test.py b/timesketch/lib/llms/features/manager_test.py index 2e053e7199..83fc7b81b1 100644 --- a/timesketch/lib/llms/features/manager_test.py +++ b/timesketch/lib/llms/features/manager_test.py @@ -28,7 +28,7 @@ def generate_prompt(self, _sketch: Sketch, **_kwargs: Any) -> str: """Mock implementation of generate_prompt.""" return "Summarize these events." - def process_response(self, llm_response: str, **kwargs: Any) -> dict[str, Any]: + def process_response(self, llm_response: str, **_kwargs: Any) -> dict[str, Any]: """Mock implementation of process_response.""" return {"response": f"Summary: {llm_response}"} From bd8d6d20dbd24058952cd85bfddb0a73d08b22f4 Mon Sep 17 00:00:00 2001 From: itsmvd Date: Thu, 27 Feb 2025 10:24:33 +0000 Subject: [PATCH 13/63] Automatically load features, add better doc-strings to interface.py --- timesketch/lib/llms/features/__init__.py | 2 + timesketch/lib/llms/features/interface.py | 48 ++++++++++-- timesketch/lib/llms/features/manager.py | 36 ++++++++- timesketch/lib/llms/features/manager_test.py | 80 +++++++++++++++++--- 4 files changed, 147 insertions(+), 19 deletions(-) diff --git a/timesketch/lib/llms/features/__init__.py b/timesketch/lib/llms/features/__init__.py index 6a8e4caaf4..8346fe51c3 100644 --- a/timesketch/lib/llms/features/__init__.py +++ b/timesketch/lib/llms/features/__init__.py @@ -14,3 +14,5 @@ """LLM features for Timesketch.""" from timesketch.lib.llms.features import manager + +manager.FeatureManager.load_llm_features() diff --git a/timesketch/lib/llms/features/interface.py b/timesketch/lib/llms/features/interface.py index 10317fe014..ba3d1827b2 100644 --- a/timesketch/lib/llms/features/interface.py +++ b/timesketch/lib/llms/features/interface.py @@ -19,7 +19,27 @@ class LLMFeatureInterface(ABC): - """Interface for LLM features.""" + """Interface for LLM features. + + This abstract class defines the required methods and attributes for implementing + an LLM-powered feature in Timesketch. Features must override the NAME constant + and implement the abstract methods. + + Attributes: + NAME: String identifier for the feature. Must be overridden in subclasses. + RESPONSE_SCHEMA: Optional JSON schema that defines the expected format of + the LLM response. When defined, this schema will be passed to the LLM + provider to enforce structured outputs matching the defined format. + For example: + + { + "type": "object", + "properties": {"summary": {"type": "string"}}, + "required": ["summary"], + } + + If None, the LLM will return unstructured text. + """ NAME: str = "llm_feature_interface" # Must be overridden in subclasses RESPONSE_SCHEMA: Optional[dict[str, Any]] = None @@ -39,15 +59,29 @@ def generate_prompt(self, sketch: Sketch, **kwargs: Any) -> str: @abstractmethod def process_response(self, llm_response: str, **kwargs: Any) -> dict[str, Any]: - """Processes the raw LLM response. + """Processes the LLM response and formats it for API consumption. + + This method takes the response from the LLM provider and transforms it into + a structured format to be returned to the frontend through the API. The + response handling varies depending on whether RESPONSE_SCHEMA is defined: + + - If RESPONSE_SCHEMA is None: Typically receives a string response + - If RESPONSE_SCHEMA is defined: Typically receives a structured dict + + The returned dictionary defines the data contract with the frontend, which will + use these fields to render the appropriate UI elements. Args: - llm_response: The raw string response from the LLM provider. - kwargs: Feature-specific arguments. + llm_response: The response from the LLM provider. This may be a + string or a structured dict depending on RESPONSE_SCHEMA. + **kwargs: Additional data needed for processing, which may include: + - sketch_id: The ID of the sketch + - sketch: The Sketch object Returns: - A dictionary containing the processed response data, suitable for - returning from the API. Must include a "response" key with the - main result, and can optionally include other metadata. + A dictionary that will be JSON-serialized and returned through the API. + This dictionary defines the data contract with the frontend and must include + all fields that the frontend expects to render. Example for NL2Q: + - {"name": "AI generated search query", "query_string": "...", "error": null} """ raise NotImplementedError() diff --git a/timesketch/lib/llms/features/manager.py b/timesketch/lib/llms/features/manager.py index 70bfac4836..67010a50cf 100644 --- a/timesketch/lib/llms/features/manager.py +++ b/timesketch/lib/llms/features/manager.py @@ -13,6 +13,10 @@ # limitations under the License. """Manager for LLM features.""" +import os +import importlib +import inspect +import pkgutil import logging from timesketch.lib.llms.features.interface import LLMFeatureInterface @@ -24,6 +28,37 @@ class FeatureManager: _feature_registry = {} + @classmethod + def load_llm_features(cls): + """Dynamically load and register all LLM features.""" + features_path = os.path.dirname(os.path.abspath(__file__)) + cls.clear_registration() + + for _, module_name, _ in pkgutil.iter_modules([features_path]): + if module_name in ["interface", "manager"] or module_name.endswith("_test"): + continue + try: + module = importlib.import_module( + f"timesketch.lib.llms.features.{module_name}" + ) + for _, obj in inspect.getmembers(module): + if ( + inspect.isclass(obj) + and issubclass(obj, LLMFeatureInterface) + and obj != LLMFeatureInterface + ): + try: + cls.register_feature(obj) + except ValueError as e: + logger.debug("Failed to register feature: %s", str(e)) + + except (ImportError, AttributeError) as e: + logger.error( + "Error loading LLM feature module %s: %s", module_name, str(e) + ) + + logger.debug("Loaded %d LLM features", len(cls._feature_registry)) + @classmethod def register_feature(cls, feature_class: type[LLMFeatureInterface]): """Register an LLM feature class.""" @@ -31,7 +66,6 @@ def register_feature(cls, feature_class: type[LLMFeatureInterface]): if feature_name in cls._feature_registry: raise ValueError(f"LLM Feature {feature_class.NAME} already registered") cls._feature_registry[feature_name] = feature_class - # Optional: Add logging here @classmethod def get_feature(cls, feature_name: str) -> type[LLMFeatureInterface]: diff --git a/timesketch/lib/llms/features/manager_test.py b/timesketch/lib/llms/features/manager_test.py index 83fc7b81b1..7e5c0dd49b 100644 --- a/timesketch/lib/llms/features/manager_test.py +++ b/timesketch/lib/llms/features/manager_test.py @@ -13,10 +13,13 @@ # limitations under the License. """Tests for LLM feature manager.""" +import mock +import types from typing import Any from timesketch.lib.testlib import BaseTest from timesketch.lib.llms.features import manager from timesketch.models.sketch import Sketch +from timesketch.lib.llms.features.interface import LLMFeatureInterface class MockSummarizeFeature: @@ -25,28 +28,48 @@ class MockSummarizeFeature: NAME = "llm_summarize" def generate_prompt(self, _sketch: Sketch, **_kwargs: Any) -> str: - """Mock implementation of generate_prompt.""" + """Mocks implementation of generate_prompt.""" return "Summarize these events." def process_response(self, llm_response: str, **_kwargs: Any) -> dict[str, Any]: - """Mock implementation of process_response.""" + """Mocks implementation of process_response.""" return {"response": f"Summary: {llm_response}"} -class MockNl2qFeature: +class MockNl2qFeature(LLMFeatureInterface): """A mock Natural Language to Query feature.""" NAME = "nl2q" def generate_prompt(self, _sketch: Sketch, **_kwargs: Any) -> str: - """Mock implementation of generate_prompt.""" + """Mocks implementation of generate_prompt.""" return "Convert this question to a query." def process_response(self, llm_response: str, **_kwargs: Any) -> dict[str, Any]: - """Mock implementation of process_response.""" + """Mocks implementation of process_response.""" return {"response": f"Query: {llm_response}"} +class MockFeature(LLMFeatureInterface): + NAME = "some_feature" + + def generate_prompt(self, *args: Any, **kwargs: Any) -> str: + return "some prompt" + + def process_response(self, *args: Any, **kwargs: Any) -> dict: + return {"response": "some response"} + + +class DuplicateNl2qFeature(LLMFeatureInterface): + NAME = "nl2q" + + def generate_prompt(self, *args: Any, **kwargs: Any) -> str: + return "duplicate prompt" + + def process_response(self, *args: Any, **kwargs: Any) -> dict: + return {"response": "duplicate response"} + + class TestFeatureManager(BaseTest): """Tests for the functionality of the FeatureManager module.""" @@ -61,7 +84,7 @@ def tearDown(self) -> None: super().tearDown() def test_get_features(self): - """Test that get_features returns the registered features.""" + """Tests that get_features returns the registered features.""" features = manager.FeatureManager.get_features() feature_list = list(features) self.assertIsInstance(feature_list, list) @@ -78,7 +101,7 @@ def test_get_features(self): self.assertTrue(found_nl2q, "NL2Q feature not found.") def test_get_feature(self): - """Test retrieval of a feature class from the registry.""" + """Tests retrieval of a feature class from the registry.""" feature_class = manager.FeatureManager.get_feature("llm_summarize") self.assertEqual(feature_class, MockSummarizeFeature) @@ -90,13 +113,13 @@ def test_get_feature(self): ) def test_register_feature(self): - """Test that re-registering an already registered feature raises ValueError.""" + """Tests that re-registering an already registered feature raises ValueError.""" self.assertRaises( ValueError, manager.FeatureManager.register_feature, MockSummarizeFeature ) def test_get_feature_instance(self): - """Test get_feature_instance creates the correct feature instance.""" + """Tests that get_feature_instance creates the correct feature instance.""" feature_instance = manager.FeatureManager.get_feature_instance("llm_summarize") self.assertIsInstance(feature_instance, MockSummarizeFeature) @@ -108,7 +131,7 @@ def test_get_feature_instance(self): ) def test_feature_methods(self): - """Test that feature methods work correctly.""" + """Tests that feature methods work correctly.""" summarize_instance = manager.FeatureManager.get_feature_instance( "llm_summarize" ) @@ -133,10 +156,45 @@ def test_feature_methods(self): ) def test_clear_registration(self): - """Test clear_registration removes all registered features.""" + """Tests that clear_registration removes all registered features.""" self.assertEqual(len(list(manager.FeatureManager.get_features())), 2) manager.FeatureManager.clear_registration() self.assertEqual(len(list(manager.FeatureManager.get_features())), 0) self.assertRaises(KeyError, manager.FeatureManager.get_feature, "llm_summarize") + + @mock.patch("importlib.import_module") + @mock.patch("pkgutil.iter_modules", return_value=[(None, "nl2q", False)]) + def test_load_llm_feature(self, _, mock_import_module) -> None: + """Tests that load_llm_feature loads the expected features.""" + mock_module = types.ModuleType("mock_module") + setattr(mock_module, "MockNl2qFeature", MockNl2qFeature) + mock_import_module.return_value = mock_module + + manager.FeatureManager.load_llm_features() + features = list(manager.FeatureManager.get_features()) + self.assertEqual(len(features), 1) + registered_name, registered_class = features[0] + self.assertEqual(registered_name, "nl2q") + self.assertEqual(registered_class, MockNl2qFeature) + mock_import_module.assert_called_with("timesketch.lib.llms.features.nl2q") + + @mock.patch("importlib.import_module") + @mock.patch("pkgutil.iter_modules", return_value=[(None, "nl2q", False)]) + def test_load_llm_feature_duplicate(self, _, mock_import_module) -> None: + """Tests that load_llm_feature handles registration of duplciate features.""" + dummy_module = types.ModuleType("dummy_module") + setattr(dummy_module, "MockNl2qFeature", MockNl2qFeature) + setattr(dummy_module, "DuplicateNl2qFeature", DuplicateNl2qFeature) + mock_import_module.return_value = dummy_module + + with self.assertLogs("timesketch.llm.manager", level="WARNING") as log_cm: + manager.FeatureManager.load_llm_features() + features = list(manager.FeatureManager.get_features()) + self.assertEqual(len(features), 1) + registered_name, _ = features[0] + self.assertEqual(registered_name, "nl2q") + self.assertTrue( + any("already registered" in message for message in log_cm.output) + ) From 290afc68ae51393420b29ac1e0a80df32f960c8f Mon Sep 17 00:00:00 2001 From: itsmvd Date: Thu, 27 Feb 2025 10:27:53 +0000 Subject: [PATCH 14/63] linter fix --- timesketch/lib/llms/features/interface.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/timesketch/lib/llms/features/interface.py b/timesketch/lib/llms/features/interface.py index ba3d1827b2..09b76e0ec3 100644 --- a/timesketch/lib/llms/features/interface.py +++ b/timesketch/lib/llms/features/interface.py @@ -82,6 +82,6 @@ def process_response(self, llm_response: str, **kwargs: Any) -> dict[str, Any]: A dictionary that will be JSON-serialized and returned through the API. This dictionary defines the data contract with the frontend and must include all fields that the frontend expects to render. Example for NL2Q: - - {"name": "AI generated search query", "query_string": "...", "error": null} + - {"name": "AI generated search query","query_string": "...","error":null} """ raise NotImplementedError() From fb9b668783c5f280567bacc34dfac7826e7aec62 Mon Sep 17 00:00:00 2001 From: itsmvd Date: Thu, 27 Feb 2025 10:31:48 +0000 Subject: [PATCH 15/63] linter fixes --- timesketch/lib/llms/features/manager_test.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/timesketch/lib/llms/features/manager_test.py b/timesketch/lib/llms/features/manager_test.py index 7e5c0dd49b..46518ac328 100644 --- a/timesketch/lib/llms/features/manager_test.py +++ b/timesketch/lib/llms/features/manager_test.py @@ -13,9 +13,9 @@ # limitations under the License. """Tests for LLM feature manager.""" -import mock import types from typing import Any +import mock from timesketch.lib.testlib import BaseTest from timesketch.lib.llms.features import manager from timesketch.models.sketch import Sketch @@ -53,20 +53,20 @@ def process_response(self, llm_response: str, **_kwargs: Any) -> dict[str, Any]: class MockFeature(LLMFeatureInterface): NAME = "some_feature" - def generate_prompt(self, *args: Any, **kwargs: Any) -> str: + def generate_prompt(self, *_args: Any, **_kwargs: Any) -> str: return "some prompt" - def process_response(self, *args: Any, **kwargs: Any) -> dict: + def process_response(self, *_args: Any, **_kwargs: Any) -> dict: return {"response": "some response"} class DuplicateNl2qFeature(LLMFeatureInterface): NAME = "nl2q" - def generate_prompt(self, *args: Any, **kwargs: Any) -> str: + def generate_prompt(self, *_args: Any, **_kwargs: Any) -> str: return "duplicate prompt" - def process_response(self, *args: Any, **kwargs: Any) -> dict: + def process_response(self, *_args: Any, **_kwargs: Any) -> dict: return {"response": "duplicate response"} From 0858b7f83cf5a039ad6ab3b448cec02534c83604 Mon Sep 17 00:00:00 2001 From: itsmvd Date: Thu, 27 Feb 2025 10:50:51 +0000 Subject: [PATCH 16/63] linter fixes --- timesketch/lib/llms/features/manager_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/timesketch/lib/llms/features/manager_test.py b/timesketch/lib/llms/features/manager_test.py index 46518ac328..a74c46cef0 100644 --- a/timesketch/lib/llms/features/manager_test.py +++ b/timesketch/lib/llms/features/manager_test.py @@ -189,7 +189,7 @@ def test_load_llm_feature_duplicate(self, _, mock_import_module) -> None: setattr(dummy_module, "DuplicateNl2qFeature", DuplicateNl2qFeature) mock_import_module.return_value = dummy_module - with self.assertLogs("timesketch.llm.manager", level="WARNING") as log_cm: + with self.assertLogs("timesketch.llm.manager", level="DEBUG") as log_cm: manager.FeatureManager.load_llm_features() features = list(manager.FeatureManager.get_features()) self.assertEqual(len(features), 1) From 1bcd2b1889c344c47e295f4ef7acca6833cdb18d Mon Sep 17 00:00:00 2001 From: itsmvd Date: Thu, 27 Feb 2025 13:13:57 +0000 Subject: [PATCH 17/63] Introduce LLMResource API method, tests, and add it as a method for the frontend --- timesketch/api/v1/resources/llm.py | 239 ++++++++++++++++++ timesketch/api/v1/resources_test.py | 131 +++++++++- timesketch/api/v1/routes.py | 2 + .../frontend-ng/src/utils/RestApiClient.js | 6 + 4 files changed, 377 insertions(+), 1 deletion(-) create mode 100644 timesketch/api/v1/resources/llm.py diff --git a/timesketch/api/v1/resources/llm.py b/timesketch/api/v1/resources/llm.py new file mode 100644 index 0000000000..c01eab48dd --- /dev/null +++ b/timesketch/api/v1/resources/llm.py @@ -0,0 +1,239 @@ +# Copyright 2025 Google Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Timesketch API endpoint for interacting with LLM features.""" +import logging +import multiprocessing +import multiprocessing.managers +import time + +import prometheus_client +from flask import request, abort, jsonify +from flask_login import login_required, current_user +from flask_restful import Resource + +from timesketch.api.v1 import resources +from timesketch.lib import definitions, utils +from timesketch.lib.definitions import METRICS_NAMESPACE +from timesketch.lib.llms.providers import manager as llm_manager +from timesketch.lib.llms.features import manager as feature_manager +from timesketch.models.sketch import Sketch + +logger = logging.getLogger("timesketch.api.llm") + + +class LLMResource(resources.ResourceMixin, Resource): + """Resource to interact with LLMs.""" + + METRICS = { + "llm_requests_total": prometheus_client.Counter( + "llm_requests_total", + "Total number of LLM requests received", + ["sketch_id", "feature"], + namespace=METRICS_NAMESPACE, + ), + "llm_errors_total": prometheus_client.Counter( + "llm_errors_total", + "Total number of errors during LLM processing", + ["sketch_id", "feature", "error_type"], + namespace=METRICS_NAMESPACE, + ), + "llm_duration_seconds": prometheus_client.Summary( + "llm_duration_seconds", + "Time taken to process an LLM request (in seconds)", + ["sketch_id", "feature"], + namespace=METRICS_NAMESPACE, + ), + } + + _LLM_TIMEOUT_WAIT_SECONDS = 30 + + @login_required + def post(self, sketch_id: int): + """Handles POST requests to the resource.""" + start_time = time.time() + sketch = self._validate_sketch(sketch_id) + form = self._validate_request_data() + feature = self._get_feature(form.get("feature")) + + self._increment_request_metric(sketch_id, feature.NAME) + + timeline_ids = self._validate_indices(sketch, form.get("filter", {})) + prompt = self._generate_prompt(feature, sketch, form, timeline_ids) + response = self._execute_llm_call(feature, prompt, sketch_id) + result = self._process_llm_response( + feature, response, sketch, form, timeline_ids + ) + + self._record_duration(sketch_id, feature.NAME, start_time) + return jsonify(result) + + def _validate_sketch(self, sketch_id: int) -> Sketch: + """Validates sketch existence and user permissions.""" + sketch = Sketch.get_with_acl(sketch_id) + if not sketch: + abort( + definitions.HTTP_STATUS_CODE_NOT_FOUND, "No sketch found with this ID." + ) + if not sketch.has_permission(current_user, "read"): + abort( + definitions.HTTP_STATUS_CODE_FORBIDDEN, + "User does not have read access to the sketch.", + ) + return sketch + + def _validate_request_data(self) -> dict: + """Validates the presence of request JSON data.""" + form = request.json + if not form: + abort( + definitions.HTTP_STATUS_CODE_BAD_REQUEST, + "The POST request requires data", + ) + return form + + def _get_feature(self, feature_name: str) -> feature_manager.LLMFeatureInterface: + """Retrieves and validates the requested LLM feature.""" + if not feature_name: + abort( + definitions.HTTP_STATUS_CODE_BAD_REQUEST, + "The 'feature' parameter is required.", + ) + try: + return feature_manager.FeatureManager.get_feature_instance(feature_name) + except KeyError: + abort( + definitions.HTTP_STATUS_CODE_BAD_REQUEST, + f"Invalid LLM feature: {feature_name}", + ) + + def _validate_indices(self, sketch: Sketch, query_filter: dict) -> list: + """Extracts and validates timeline IDs from the query filter for a sketch.""" + all_indices = list({t.searchindex.index_name for t in sketch.timelines}) + indices = query_filter.get("indices", all_indices) + if "_all" in indices: + indices = all_indices + indices, timeline_ids = utils.get_validated_indices(indices, sketch) + if not indices: + abort( + definitions.HTTP_STATUS_CODE_BAD_REQUEST, + "No valid search indices were found.", + ) + return timeline_ids + + def _generate_prompt( + self, + feature: feature_manager.LLMFeatureInterface, + sketch: Sketch, + form: dict, + timeline_ids: list, + ) -> str: + """Generates the LLM prompt based on the feature and request data.""" + try: + return feature.generate_prompt( + sketch, form=form, datastore=self.datastore, timeline_ids=timeline_ids + ) + except ValueError as e: + abort(definitions.HTTP_STATUS_CODE_BAD_REQUEST, str(e)) + + def _execute_llm_call( + self, feature: feature_manager.LLMFeatureInterface, prompt: str, sketch_id: int + ) -> dict: + """Executes the LLM call with a timeout using multiprocessing.""" + with multiprocessing.Manager() as manager: + shared_response = manager.dict() + process = multiprocessing.Process( + target=self._get_content_with_timeout, + args=(feature, prompt, shared_response), + ) + process.start() + process.join(timeout=self._LLM_TIMEOUT_WAIT_SECONDS) + + if process.is_alive(): + logger.warning( + "LLM call timed out after %d seconds.", + self._LLM_TIMEOUT_WAIT_SECONDS, + ) + process.terminate() + process.join() + self.METRICS["llm_errors_total"].labels( + sketch_id=str(sketch_id), feature=feature.NAME, error_type="timeout" + ).inc() + abort(definitions.HTTP_STATUS_CODE_BAD_REQUEST, "LLM call timed out.") + + response = dict(shared_response) + if "error" in response: + self.METRICS["llm_errors_total"].labels( + sketch_id=str(sketch_id), + feature=feature.NAME, + error_type="llm_api_error", + ).inc() + abort( + definitions.HTTP_STATUS_CODE_INTERNAL_SERVER_ERROR, + "Error during LLM processing.", + ) + return response["response"] + + def _process_llm_response( + self, feature, response: dict, sketch: Sketch, form: dict, timeline_ids: list + ) -> dict: + """Processes the LLM response into the final result.""" + try: + return feature.process_response( + llm_response=response, + form=form, + sketch_id=sketch.id, + datastore=self.datastore, + sketch=sketch, + timeline_ids=timeline_ids, + ) + except ValueError as e: + self.METRICS["llm_errors_total"].labels( + sketch_id=str(sketch.id), + feature=feature.NAME, + error_type="response_processing", + ).inc() + abort(definitions.HTTP_STATUS_CODE_BAD_REQUEST, str(e)) + + def _increment_request_metric(self, sketch_id: int, feature_name: str) -> None: + """Increments the request counter metric.""" + self.METRICS["llm_requests_total"].labels( + sketch_id=str(sketch_id), feature=feature_name + ).inc() + + def _record_duration( + self, sketch_id: int, feature_name: str, start_time: float + ) -> None: + """Records the duration of the request.""" + duration = time.time() - start_time + self.METRICS["llm_duration_seconds"].labels( + sketch_id=str(sketch_id), feature=feature_name + ).observe(duration) + + def _get_content_with_timeout( + self, + feature: feature_manager.LLMFeatureInterface, + prompt: str, + shared_response: multiprocessing.managers.DictProxy, + ) -> None: + """Send a prompt to the LLM and get a response within a process.""" + try: + llm = llm_manager.LLMManager.create_provider(feature_name=feature.NAME) + response_schema = ( + feature.RESPONSE_SCHEMA if hasattr(feature, "RESPONSE_SCHEMA") else None + ) + response = llm.generate(prompt, response_schema=response_schema) + shared_response.update({"response": response}) + except Exception as e: + logger.error("Error in LLM call within process: %s", e, exc_info=True) + shared_response.update({"error": str(e)}) diff --git a/timesketch/api/v1/resources_test.py b/timesketch/api/v1/resources_test.py index 14dc3e8bfa..222641e45c 100644 --- a/timesketch/api/v1/resources_test.py +++ b/timesketch/api/v1/resources_test.py @@ -33,7 +33,6 @@ from timesketch.models.sketch import InvestigativeQuestion from timesketch.models.sketch import InvestigativeQuestionApproach from timesketch.models.sketch import Facet - from timesketch.api.v1.resources import ResourceMixin @@ -1692,3 +1691,133 @@ def test_llm_summarize_with_events(self, mock_create_provider): self.assertEqual(response.status_code, 200) response_data = json.loads(response.get_data(as_text=True)) self.assertEqual(response_data.get("summary"), "Mock summary from LLM") + + +@mock.patch("timesketch.api.v1.resources.OpenSearchDataStore", MockDataStore) +class LLMResourceTest(BaseTest): + """Test LLMResource.""" + + resource_url = "/api/v1/sketches/1/llm/" + + @mock.patch("timesketch.models.sketch.Sketch.get_with_acl") + @mock.patch( + "timesketch.lib.llms.features.manager.FeatureManager.get_feature_instance" + ) + @mock.patch("timesketch.lib.utils.get_validated_indices") + @mock.patch("timesketch.api.v1.resources.llm.LLMResource._execute_llm_call") + def test_post_success( + self, + mock_execute_llm, + mock_get_validated_indices, + mock_get_feature, + mock_get_with_acl, + ): + """Test a successful POST request to the LLM endpoint.""" + mock_sketch = mock.MagicMock() + mock_sketch.has_permission.return_value = True + mock_sketch.id = 1 + mock_get_with_acl.return_value = mock_sketch + + mock_feature = mock.MagicMock() + mock_feature.NAME = "test_feature" + mock_feature.generate_prompt.return_value = "test prompt" + mock_feature.process_response.return_value = {"result": "test result"} + mock_get_feature.return_value = mock_feature + + mock_get_validated_indices.return_value = (["index1"], [1]) + mock_execute_llm.return_value = {"response": "mock response"} + + self.login() + response = self.client.post( + self.resource_url, + data=json.dumps({"feature": "test_feature", "filter": {}}), + content_type="application/json", + ) + self.assertEqual(response.status_code, HTTP_STATUS_CODE_OK) + response_data = json.loads(response.get_data(as_text=True)) + self.assertEqual(response_data, {"result": "test result"}) + + def test_post_missing_data(self): + """Test POST request with missing data.""" + self.login() + response = self.client.post( + self.resource_url, + data=json.dumps({"some_param": "some_value"}), + content_type="application/json", + ) + self.assertEqual(response.status_code, HTTP_STATUS_CODE_BAD_REQUEST) + response_data = json.loads(response.get_data(as_text=True)) + self.assertIn("The 'feature' parameter is required", response_data["message"]) + + @mock.patch("timesketch.models.sketch.Sketch.get_with_acl") + def test_post_missing_feature(self, mock_get_with_acl): + """Test POST request with no feature parameter.""" + mock_sketch = mock.MagicMock() + mock_sketch.has_permission.return_value = True + mock_get_with_acl.return_value = mock_sketch + + self.login() + response = self.client.post( + self.resource_url, + data=json.dumps({"filter": {}}), # No 'feature' key + content_type="application/json", + ) + self.assertEqual(response.status_code, HTTP_STATUS_CODE_BAD_REQUEST) + response_data = json.loads(response.get_data(as_text=True)) + self.assertIn("The 'feature' parameter is required", response_data["message"]) + + @mock.patch("timesketch.models.sketch.Sketch.get_with_acl") + def test_post_invalid_sketch(self, mock_get_with_acl): + """Test POST request with an invalid sketch ID.""" + mock_get_with_acl.return_value = None + + self.login() + response = self.client.post( + self.resource_url, + data=json.dumps({"feature": "test_feature", "filter": {}}), + content_type="application/json", + ) + self.assertEqual(response.status_code, HTTP_STATUS_CODE_NOT_FOUND) + response_data = json.loads(response.get_data(as_text=True)) + self.assertIn("No sketch found with this ID", response_data["message"]) + + @mock.patch("timesketch.models.sketch.Sketch.get_with_acl") + def test_post_no_permission(self, mock_get_with_acl): + """Test POST request when user lacks read permission.""" + mock_sketch = mock.MagicMock() + mock_sketch.has_permission.return_value = False + mock_get_with_acl.return_value = mock_sketch + + self.login() + response = self.client.post( + self.resource_url, + data=json.dumps({"feature": "test_feature", "filter": {}}), + content_type="application/json", + ) + self.assertEqual(response.status_code, HTTP_STATUS_CODE_FORBIDDEN) + response_data = json.loads(response.get_data(as_text=True)) + self.assertIn( + "User does not have read access to the sketch", response_data["message"] + ) + + @mock.patch("timesketch.models.sketch.Sketch.get_with_acl") + @mock.patch( + "timesketch.lib.llms.features.manager.FeatureManager.get_feature_instance" + ) + def test_post_invalid_feature(self, mock_get_feature, mock_get_with_acl): + """Test POST request with an invalid feature name.""" + mock_sketch = mock.MagicMock() + mock_sketch.has_permission.return_value = True + mock_get_with_acl.return_value = mock_sketch + + mock_get_feature.side_effect = KeyError("Invalid feature") + + self.login() + response = self.client.post( + self.resource_url, + data=json.dumps({"feature": "invalid_feature", "filter": {}}), + content_type="application/json", + ) + self.assertEqual(response.status_code, HTTP_STATUS_CODE_BAD_REQUEST) + response_data = json.loads(response.get_data(as_text=True)) + self.assertIn("Invalid LLM feature: invalid_feature", response_data["message"]) diff --git a/timesketch/api/v1/routes.py b/timesketch/api/v1/routes.py index 48ecf6f05a..5bc249ebc5 100644 --- a/timesketch/api/v1/routes.py +++ b/timesketch/api/v1/routes.py @@ -78,6 +78,7 @@ from .resources.unfurl import UnfurlResource from .resources.nl2q import Nl2qResource from .resources.llm_summarize import LLMSummarizeResource +from .resources.llm import LLMResource from .resources.settings import SystemSettingsResource from .resources.scenarios import ScenarioTemplateListResource @@ -204,6 +205,7 @@ (UnfurlResource, "/unfurl/"), (Nl2qResource, "/sketches//nl2q/"), (LLMSummarizeResource, "/sketches//events/summary/"), + (LLMResource, "/sketches//llm/"), (SystemSettingsResource, "/settings/"), # Scenario templates (ScenarioTemplateListResource, "/scenarios/"), diff --git a/timesketch/frontend-ng/src/utils/RestApiClient.js b/timesketch/frontend-ng/src/utils/RestApiClient.js index 36114aef1a..86416ebd33 100644 --- a/timesketch/frontend-ng/src/utils/RestApiClient.js +++ b/timesketch/frontend-ng/src/utils/RestApiClient.js @@ -528,4 +528,10 @@ export default { getEventSummary(sketchId, formData) { return RestApiClient.post('/sketches/' + sketchId + '/events/summary/', formData) }, + llmRequest(sketchId, featureName, formData) { + formData = formData || {} + formData.feature = featureName + + return RestApiClient.post(`/sketches/${sketchId}/llm/`, formData) + } } From f379b0e96b4fd2df6a2481ac1a55f385567fb232 Mon Sep 17 00:00:00 2001 From: itsmvd Date: Thu, 27 Feb 2025 13:26:13 +0000 Subject: [PATCH 18/63] linter fix --- timesketch/api/v1/resources/llm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/timesketch/api/v1/resources/llm.py b/timesketch/api/v1/resources/llm.py index c01eab48dd..edaa3c9691 100644 --- a/timesketch/api/v1/resources/llm.py +++ b/timesketch/api/v1/resources/llm.py @@ -234,6 +234,6 @@ def _get_content_with_timeout( ) response = llm.generate(prompt, response_schema=response_schema) shared_response.update({"response": response}) - except Exception as e: + except Exception as e: # pylint: disable=broad-except logger.error("Error in LLM call within process: %s", e, exc_info=True) shared_response.update({"error": str(e)}) From 2e669d0619f893f54d04a94825a4c28992a1bfe9 Mon Sep 17 00:00:00 2001 From: itsmvd Date: Thu, 27 Feb 2025 13:28:10 +0000 Subject: [PATCH 19/63] linter fix --- timesketch/api/v1/resources/llm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/timesketch/api/v1/resources/llm.py b/timesketch/api/v1/resources/llm.py index edaa3c9691..d9343144a7 100644 --- a/timesketch/api/v1/resources/llm.py +++ b/timesketch/api/v1/resources/llm.py @@ -234,6 +234,6 @@ def _get_content_with_timeout( ) response = llm.generate(prompt, response_schema=response_schema) shared_response.update({"response": response}) - except Exception as e: # pylint: disable=broad-except + except Exception as e: # pylint: disable=broad-except logger.error("Error in LLM call within process: %s", e, exc_info=True) shared_response.update({"error": str(e)}) From 1e58a282140dced2c56024340948f0203764024b Mon Sep 17 00:00:00 2001 From: itsmvd Date: Fri, 28 Feb 2025 10:47:38 +0000 Subject: [PATCH 20/63] Address comments from review --- timesketch/api/v1/resources/llm.py | 168 +++++++++++++++++++++++----- timesketch/api/v1/resources_test.py | 83 ++++++++++++++ 2 files changed, 226 insertions(+), 25 deletions(-) diff --git a/timesketch/api/v1/resources/llm.py b/timesketch/api/v1/resources/llm.py index d9343144a7..e776502aeb 100644 --- a/timesketch/api/v1/resources/llm.py +++ b/timesketch/api/v1/resources/llm.py @@ -16,12 +16,10 @@ import multiprocessing import multiprocessing.managers import time - import prometheus_client -from flask import request, abort, jsonify +from flask import request, abort, jsonify, Response from flask_login import login_required, current_user from flask_restful import Resource - from timesketch.api.v1 import resources from timesketch.lib import definitions, utils from timesketch.lib.definitions import METRICS_NAMESPACE @@ -33,7 +31,12 @@ class LLMResource(resources.ResourceMixin, Resource): - """Resource to interact with LLMs.""" + """Resource to interact with LLMs. + + This class provides an API endpoint for accessing and utilizing Large Language + Model features within Timesketch. It handles request validation, processing, + and response handling, while also monitoring performance metrics. + """ METRICS = { "llm_requests_total": prometheus_client.Counter( @@ -55,31 +58,52 @@ class LLMResource(resources.ResourceMixin, Resource): namespace=METRICS_NAMESPACE, ), } - + # TODO(itsmvd): Make this configurable _LLM_TIMEOUT_WAIT_SECONDS = 30 @login_required - def post(self, sketch_id: int): - """Handles POST requests to the resource.""" + def post(self, sketch_id: int) -> Response: + """Handles POST requests to the resource. + + Processes LLM requests, validates inputs, generates prompts, + executes LLM calls, and returns the processed results. + + Args: + sketch_id: The ID of the sketch to process. + + Returns: + A Flask JSON response containing the processed LLM result. + + Raises: + HTTP exceptions for various error conditions. + """ start_time = time.time() sketch = self._validate_sketch(sketch_id) form = self._validate_request_data() feature = self._get_feature(form.get("feature")) - self._increment_request_metric(sketch_id, feature.NAME) - timeline_ids = self._validate_indices(sketch, form.get("filter", {})) prompt = self._generate_prompt(feature, sketch, form, timeline_ids) response = self._execute_llm_call(feature, prompt, sketch_id) result = self._process_llm_response( feature, response, sketch, form, timeline_ids ) - self._record_duration(sketch_id, feature.NAME, start_time) return jsonify(result) def _validate_sketch(self, sketch_id: int) -> Sketch: - """Validates sketch existence and user permissions.""" + """Validates sketch existence and user permissions. + + Args: + sketch_id: The ID of the sketch to validate. + + Returns: + The validated Sketch object. + + Raises: + HTTP 404: If the sketch doesn't exist. + HTTP 403: If the user doesn't have read access to the sketch. + """ sketch = Sketch.get_with_acl(sketch_id) if not sketch: abort( @@ -93,7 +117,14 @@ def _validate_sketch(self, sketch_id: int) -> Sketch: return sketch def _validate_request_data(self) -> dict: - """Validates the presence of request JSON data.""" + """Validates the presence of request JSON data. + + Returns: + The validated request data as a dictionary. + + Raises: + HTTP 400: If no JSON data is provided in the request. + """ form = request.json if not form: abort( @@ -103,7 +134,17 @@ def _validate_request_data(self) -> dict: return form def _get_feature(self, feature_name: str) -> feature_manager.LLMFeatureInterface: - """Retrieves and validates the requested LLM feature.""" + """Retrieves and validates the requested LLM feature. + + Args: + feature_name: The name of the LLM feature to retrieve. + + Returns: + An instance of the requested LLM feature. + + Raises: + HTTP 400: If feature_name is not provided or is invalid. + """ if not feature_name: abort( definitions.HTTP_STATUS_CODE_BAD_REQUEST, @@ -118,7 +159,18 @@ def _get_feature(self, feature_name: str) -> feature_manager.LLMFeatureInterface ) def _validate_indices(self, sketch: Sketch, query_filter: dict) -> list: - """Extracts and validates timeline IDs from the query filter for a sketch.""" + """Extracts and validates timeline IDs from the query filter for a sketch. + + Args: + sketch: The Sketch object to validate indices for. + query_filter: A dictionary containing filter parameters. + + Returns: + A list of validated timeline IDs. + + Raises: + HTTP 400: If no valid search indices are found. + """ all_indices = list({t.searchindex.index_name for t in sketch.timelines}) indices = query_filter.get("indices", all_indices) if "_all" in indices: @@ -138,7 +190,20 @@ def _generate_prompt( form: dict, timeline_ids: list, ) -> str: - """Generates the LLM prompt based on the feature and request data.""" + """Generates the LLM prompt based on the feature and request data. + + Args: + feature: The LLM feature instance to use. + sketch: The Sketch object. + form: The request form data. + timeline_ids: A list of validated timeline IDs. + + Returns: + The generated prompt string for the LLM. + + Raises: + HTTP 400: If prompt generation fails. + """ try: return feature.generate_prompt( sketch, form=form, datastore=self.datastore, timeline_ids=timeline_ids @@ -149,7 +214,20 @@ def _generate_prompt( def _execute_llm_call( self, feature: feature_manager.LLMFeatureInterface, prompt: str, sketch_id: int ) -> dict: - """Executes the LLM call with a timeout using multiprocessing.""" + """Executes the LLM call with a timeout using multiprocessing. + + Args: + feature: The LLM feature instance to use. + prompt: The generated prompt to send to the LLM. + sketch_id: The ID of the sketch being processed. + + Returns: + The LLM response as a dictionary. + + Raises: + HTTP 400: If the LLM call times out. + HTTP 500: If an error occurs during LLM processing. + """ with multiprocessing.Manager() as manager: shared_response = manager.dict() process = multiprocessing.Process( @@ -158,7 +236,6 @@ def _execute_llm_call( ) process.start() process.join(timeout=self._LLM_TIMEOUT_WAIT_SECONDS) - if process.is_alive(): logger.warning( "LLM call timed out after %d seconds.", @@ -169,8 +246,11 @@ def _execute_llm_call( self.METRICS["llm_errors_total"].labels( sketch_id=str(sketch_id), feature=feature.NAME, error_type="timeout" ).inc() - abort(definitions.HTTP_STATUS_CODE_BAD_REQUEST, "LLM call timed out.") - + abort( + definitions.HTTP_STATUS_CODE_BAD_REQUEST, + "LLM call timed out, please try again. " + "If this issue persists, contact your administrator.", + ) response = dict(shared_response) if "error" in response: self.METRICS["llm_errors_total"].labels( @@ -180,14 +260,33 @@ def _execute_llm_call( ).inc() abort( definitions.HTTP_STATUS_CODE_INTERNAL_SERVER_ERROR, - "Error during LLM processing.", + f"Error during LLM processing: {response['error']}", ) return response["response"] def _process_llm_response( - self, feature, response: dict, sketch: Sketch, form: dict, timeline_ids: list + self, + feature: feature_manager.LLMFeatureInterface, + response: dict, + sketch: Sketch, + form: dict, + timeline_ids: list, ) -> dict: - """Processes the LLM response into the final result.""" + """Processes the LLM response into the final result. + + Args: + feature: The LLM feature instance used. + response: The raw LLM response. + sketch: The Sketch object. + form: The request form data. + timeline_ids: A list of validated timeline IDs. + + Returns: + The processed LLM response as a dictionary. + + Raises: + HTTP 400: If response processing fails. + """ try: return feature.process_response( llm_response=response, @@ -206,7 +305,12 @@ def _process_llm_response( abort(definitions.HTTP_STATUS_CODE_BAD_REQUEST, str(e)) def _increment_request_metric(self, sketch_id: int, feature_name: str) -> None: - """Increments the request counter metric.""" + """Increments the request counter metric. + + Args: + sketch_id: The ID of the sketch being processed. + feature_name: The name of the LLM feature being used. + """ self.METRICS["llm_requests_total"].labels( sketch_id=str(sketch_id), feature=feature_name ).inc() @@ -214,7 +318,13 @@ def _increment_request_metric(self, sketch_id: int, feature_name: str) -> None: def _record_duration( self, sketch_id: int, feature_name: str, start_time: float ) -> None: - """Records the duration of the request.""" + """Records the duration of the request. + + Args: + sketch_id: The ID of the sketch being processed. + feature_name: The name of the LLM feature being used. + start_time: The timestamp when the request started. + """ duration = time.time() - start_time self.METRICS["llm_duration_seconds"].labels( sketch_id=str(sketch_id), feature=feature_name @@ -226,7 +336,15 @@ def _get_content_with_timeout( prompt: str, shared_response: multiprocessing.managers.DictProxy, ) -> None: - """Send a prompt to the LLM and get a response within a process.""" + """Send a prompt to the LLM and get a response within a process. + + This method is executed in a separate process to allow for timeout control. + + Args: + feature: The LLM feature instance to use. + prompt: The generated prompt to send to the LLM. + shared_response: A managed dictionary to store the response or error. + """ try: llm = llm_manager.LLMManager.create_provider(feature_name=feature.NAME) response_schema = ( diff --git a/timesketch/api/v1/resources_test.py b/timesketch/api/v1/resources_test.py index 222641e45c..64f3b8f045 100644 --- a/timesketch/api/v1/resources_test.py +++ b/timesketch/api/v1/resources_test.py @@ -1821,3 +1821,86 @@ def test_post_invalid_feature(self, mock_get_feature, mock_get_with_acl): self.assertEqual(response.status_code, HTTP_STATUS_CODE_BAD_REQUEST) response_data = json.loads(response.get_data(as_text=True)) self.assertIn("Invalid LLM feature: invalid_feature", response_data["message"]) + + @mock.patch("timesketch.models.sketch.Sketch.get_with_acl") + @mock.patch( + "timesketch.lib.llms.features.manager.FeatureManager.get_feature_instance" + ) + @mock.patch("timesketch.lib.utils.get_validated_indices") + def test_post_prompt_generation_error( + self, + mock_get_validated_indices, + mock_get_feature, + mock_get_with_acl, + ): + """Test handling of errors during prompt generation.""" + mock_sketch = mock.MagicMock() + mock_sketch.has_permission.return_value = True + mock_sketch.id = 1 + mock_get_with_acl.return_value = mock_sketch + + mock_feature = mock.MagicMock() + mock_feature.NAME = "test_feature" + mock_feature.generate_prompt.side_effect = ValueError( + "Prompt generation failed" + ) + mock_get_feature.return_value = mock_feature + + mock_get_validated_indices.return_value = (["index1"], [1]) + + self.login() + response = self.client.post( + self.resource_url, + data=json.dumps({"feature": "test_feature", "filter": {}}), + content_type="application/json", + ) + + self.assertEqual(response.status_code, HTTP_STATUS_CODE_BAD_REQUEST) + response_data = json.loads(response.get_data(as_text=True)) + self.assertIn("Prompt generation failed", response_data["message"]) + + mock_feature.generate_prompt.assert_called_once() + + @mock.patch("timesketch.models.sketch.Sketch.get_with_acl") + @mock.patch( + "timesketch.lib.llms.features.manager.FeatureManager.get_feature_instance" + ) + @mock.patch("timesketch.lib.utils.get_validated_indices") + @mock.patch("multiprocessing.Process") + def test_post_llm_execution_timeout( + self, + mock_process, + mock_get_validated_indices, + mock_get_feature, + mock_get_with_acl, + ): + """Test handling of LLM execution timeouts.""" + # Setup mocks + mock_sketch = mock.MagicMock() + mock_sketch.has_permission.return_value = True + mock_sketch.id = 1 + mock_get_with_acl.return_value = mock_sketch + + mock_feature = mock.MagicMock() + mock_feature.NAME = "test_feature" + mock_feature.generate_prompt.return_value = "test prompt" + mock_get_feature.return_value = mock_feature + + mock_get_validated_indices.return_value = (["index1"], [1]) + + process_instance = mock.MagicMock() + process_instance.is_alive.return_value = True + mock_process.return_value = process_instance + + self.login() + response = self.client.post( + self.resource_url, + data=json.dumps({"feature": "test_feature", "filter": {}}), + content_type="application/json", + ) + + self.assertEqual(response.status_code, HTTP_STATUS_CODE_BAD_REQUEST) + response_data = json.loads(response.get_data(as_text=True)) + self.assertIn("LLM call timed out", response_data["message"]) + + process_instance.terminate.assert_called_once() From 37481e26fe6fdffb80a8dea6fc03ee1c5e1f5acc Mon Sep 17 00:00:00 2001 From: itsmvd Date: Fri, 28 Feb 2025 17:00:00 +0000 Subject: [PATCH 21/63] Add nl2q and llm_summarize as LLM features --- timesketch/lib/llms/features/llm_summarize.py | 213 +++++++++++++++++ .../lib/llms/features/llm_summarize_test.py | 215 ++++++++++++++++++ timesketch/lib/llms/features/nl2q.py | 190 ++++++++++++++++ timesketch/lib/llms/features/nl2q_test.py | 149 ++++++++++++ 4 files changed, 767 insertions(+) create mode 100644 timesketch/lib/llms/features/llm_summarize.py create mode 100644 timesketch/lib/llms/features/llm_summarize_test.py create mode 100644 timesketch/lib/llms/features/nl2q.py create mode 100644 timesketch/lib/llms/features/nl2q_test.py diff --git a/timesketch/lib/llms/features/llm_summarize.py b/timesketch/lib/llms/features/llm_summarize.py new file mode 100644 index 0000000000..65402d7087 --- /dev/null +++ b/timesketch/lib/llms/features/llm_summarize.py @@ -0,0 +1,213 @@ +# Copyright 2025 Google Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""LLM Summarization feature.""" +import json +import logging +from typing import Any, Optional +import pandas as pd +from flask import current_app +from opensearchpy import OpenSearch +from timesketch.lib import utils +from timesketch.api.v1 import export +from timesketch.models.sketch import Sketch +from timesketch.lib.llms.features.interface import LLMFeatureInterface + +logger = logging.getLogger("timesketch.llm.summarize_feature") + + +class LLMSummarizeFeature(LLMFeatureInterface): + """LLM Summarization feature.""" + + NAME = "llm_summarize" + RESPONSE_SCHEMA = { + "type": "object", + "properties": {"summary": {"type": "string"}}, + "required": ["summary"], + } + + def _get_prompt_text(self, events_dict: list) -> str: + """Reads the prompt template from file and injects events. + + Args: + events_dict: List of event dictionaries to inject into prompt. + + Returns: + str: Complete prompt text with injected events. + + Raises: + ValueError: If the prompt path is not configured. + FileNotFoundError: If the prompt file cannot be found. + IOError: If there's an error reading the prompt file. + """ + prompt_file_path = current_app.config.get("PROMPT_LLM_SUMMARIZATION") + if not prompt_file_path: + logger.error("PROMPT_LLM_SUMMARIZATION config not set") + raise ValueError("LLM summarization prompt path not configured.") + try: + with open(prompt_file_path, "r", encoding="utf-8") as file_handle: + prompt_template = file_handle.read() + except FileNotFoundError: + logger.error("Prompt file not found: %s", prompt_file_path) + raise FileNotFoundError(f"LLM Prompt file not found: {prompt_file_path}") + except IOError as e: + logger.error("Error reading prompt file: %s", e) + raise IOError("Error reading LLM prompt file.") from e + prompt_text = prompt_template.replace("", json.dumps(events_dict)) + return prompt_text + + def _run_timesketch_query( + self, + sketch: Sketch, + query_string: str = "*", + query_filter: Optional[dict] = None, + id_list: Optional[list] = None, + datastore: Optional[OpenSearch] = None, + timeline_ids: Optional[list] = None, + ) -> pd.DataFrame: + """Runs a timesketch query and returns results as a DataFrame. + + Args: + sketch: The Sketch object to query. + query_string: Search query string. + query_filter: Dictionary with filter parameters. + id_list: List of event IDs to retrieve. + datastore: OpenSearch instance for querying. + timeline_ids: List of timeline IDs to query. + + Returns: + pd.DataFrame: DataFrame containing query results. + + Raises: + ValueError: If datastore is not provided or no valid indices are found. + """ + if datastore is None: + raise ValueError("Datastore must be provided.") + if not query_filter: + query_filter = {} + if id_list: + id_query = " OR ".join([f'_id:"{event_id}"' for event_id in id_list]) + query_string = id_query + all_indices = list({t.searchindex.index_name for t in sketch.timelines}) + indices_from_filter = query_filter.get("indices", all_indices) + if "_all" in indices_from_filter: + indices_from_filter = all_indices + indices, timeline_ids = utils.get_validated_indices(indices_from_filter, sketch) + if not indices: + raise ValueError( + "No valid search indices were found to perform the search on." + ) + result = datastore.search( + sketch_id=sketch.id, + query_string=query_string, + query_filter=query_filter, + query_dsl="", + indices=indices, + timeline_ids=timeline_ids, + ) + return export.query_results_to_dataframe(result, sketch) + + def generate_prompt(self, sketch: Sketch, **kwargs: Any) -> str: + """Generates the summarization prompt based on events from a query. + + Args: + sketch: The Sketch object containing events to summarize. + **kwargs: Additional arguments including: + - form: Form data containing query and filter information. + - datastore: OpenSearch instance for querying. + - timeline_ids: List of timeline IDs to query. + + Returns: + str: Generated prompt text with events to summarize. + + Raises: + ValueError: If required parameters are missing or if no events are found. + """ + form = kwargs.get("form") + datastore = kwargs.get("datastore") + timeline_ids = kwargs.get("timeline_ids") + if not form: + raise ValueError("Missing 'form' data in kwargs") + query_filter = form.get("filter", {}) + query_string = form.get("query", "*") or "*" + events_df = self._run_timesketch_query( + sketch, + query_string, + query_filter, + datastore=datastore, + timeline_ids=timeline_ids, + ) + if events_df is None or events_df.empty: + return "No events to summarize based on the current filter." + unique_events_df = events_df[["message"]].drop_duplicates( + subset="message", keep="first" + ) + events_dict = unique_events_df.to_dict(orient="records") + if not events_dict: + return "No events to summarize based on the current filter." + return self._get_prompt_text(events_dict) + + def process_response(self, llm_response: Any, **kwargs: Any) -> dict[str, Any]: + """Processes the LLM response and adds additional context information. + + Args: + llm_response: The response from the LLM model, expected to be a dictionary. + **kwargs: Additional arguments including: + - sketch_id: ID of the sketch being processed. + - sketch: The Sketch object. + - datastore: OpenSearch instance for querying. + - timeline_ids: List of timeline IDs. + - form: Form data containing query and filter information. + + Returns: + dict[str, Any]: Dictionary containing the processed response with additional context: + - response: The summary text. + - summary_event_count: Total number of events summarized. + - summary_unique_event_count: Number of unique events summarized. + + Raises: + ValueError: If required parameters are missing or if the LLM response + is not in the expected format. + """ + sketch_id = kwargs.get("sketch_id") + sketch = kwargs.get("sketch") + datastore = kwargs.get("datastore") + timeline_ids = kwargs.get("timeline_ids") + if not sketch_id: + raise ValueError("Missing 'sketch_id' in kwargs") + form = kwargs.get("form") + if not form: + raise ValueError("Missing 'form' data in kwargs") + query_filter = form.get("filter", {}) + query_string = form.get("query", "*") + events_df = self._run_timesketch_query( + sketch, + query_string, + query_filter, + datastore=datastore, + timeline_ids=timeline_ids, + ) + total_events_count = len(events_df) + unique_events_count = len( + events_df[["message"]].drop_duplicates(subset="message", keep="first") + ) + if not isinstance(llm_response, dict): + raise ValueError("LLM response is expected to be a dictionary") + summary_text = llm_response.get("summary") + if summary_text is None: + raise ValueError("LLM response missing 'summary' key") + return { + "response": summary_text, + "summary_event_count": total_events_count, + "summary_unique_event_count": unique_events_count, + } diff --git a/timesketch/lib/llms/features/llm_summarize_test.py b/timesketch/lib/llms/features/llm_summarize_test.py new file mode 100644 index 0000000000..4946f118f0 --- /dev/null +++ b/timesketch/lib/llms/features/llm_summarize_test.py @@ -0,0 +1,215 @@ +# Copyright 2025 Google Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the llm_summarize feature.""" + +import json +import mock +import pandas as pd +from flask import current_app +from timesketch.lib.testlib import BaseTest +from timesketch.lib.testlib import MockDataStore +from timesketch.lib.llms.features.llm_summarize import LLMSummarizeFeature + + +class TestLLMSummarizeFeature(BaseTest): + """Tests for the LLMSummarizeFeature.""" + + def setUp(self): + """Set up the tests.""" + super().setUp() + self.llm_feature = LLMSummarizeFeature() + current_app.config["PROMPT_LLM_SUMMARIZATION"] = ( + "./data/llm_summarize/prompt.txt" + ) + self.datastore = MockDataStore("noserver", 4711) + + @mock.patch( + "builtins.open", mock.mock_open(read_data="Analyze these events: ") + ) + def test_get_prompt_text(self): + """Tests _get_prompt_text method.""" + events_dict = [{"message": "Test event 1"}, {"message": "Test event 2"}] + prompt = self.llm_feature._get_prompt_text(events_dict) + + self.assertEqual(prompt, f"Analyze these events: {json.dumps(events_dict)}") + + def test_get_prompt_text_missing_file(self): + """Tests _get_prompt_text method with missing file.""" + current_app.config["PROMPT_LLM_SUMMARIZATION"] = "/file_does_not_exist.txt" + + with self.assertRaises(FileNotFoundError): + self.llm_feature._get_prompt_text([]) + + def test_get_prompt_text_missing_config(self): + """Tests _get_prompt_text method with missing config.""" + del current_app.config["PROMPT_LLM_SUMMARIZATION"] + + with self.assertRaises(ValueError): + self.llm_feature._get_prompt_text([]) + + @mock.patch("timesketch.lib.utils.get_validated_indices") + def test_run_timesketch_query(self, mock_get_indices): + """Tests _run_timesketch_query method.""" + mock_get_indices.return_value = ["test_index"], [1] + result_df = pd.DataFrame([{"message": "Test event"}]) + + with mock.patch.object( + self.datastore, "search", return_value={"mock": "result"} + ) as mock_search: + with mock.patch( + "timesketch.api.v1.export.query_results_to_dataframe", + return_value=result_df, + ) as mock_export: + df = self.llm_feature._run_timesketch_query( + self.sketch1, + query_string="test query", + query_filter={"filter": "test"}, + datastore=self.datastore, + ) + + self.assertEqual(len(df), 1) + self.assertEqual(df.iloc[0]["message"], "Test event") + mock_search.assert_called_once() + mock_export.assert_called_once() + + def test_run_timesketch_query_no_datastore(self): + """Tests _run_timesketch_query method with no datastore.""" + with self.assertRaises(ValueError): + self.llm_feature._run_timesketch_query(self.sketch1) + + @mock.patch("timesketch.lib.utils.get_validated_indices") + def test_run_timesketch_query_no_indices(self, mock_get_indices): + """Tests _run_timesketch_query method with no valid indices.""" + mock_get_indices.return_value = [], [] + + with self.assertRaises(ValueError): + self.llm_feature._run_timesketch_query( + self.sketch1, datastore=self.datastore + ) + + @mock.patch( + "timesketch.lib.llms.features.llm_summarize." + "LLMSummarizeFeature._run_timesketch_query" + ) + @mock.patch( + "timesketch.lib.llms.features.llm_summarize." + "LLMSummarizeFeature._get_prompt_text" + ) + def test_generate_prompt(self, mock_get_prompt, mock_run_query): + """Tests generate_prompt method.""" + # Set up mocks + mock_run_query.return_value = pd.DataFrame( + [ + {"message": "Test event 1"}, + {"message": "Test event 2"}, + {"message": "Test event 1"}, # Add duplicate event on purpose + ] + ) + mock_get_prompt.return_value = "Test prompt" + + # Call the method + prompt = self.llm_feature.generate_prompt( + self.sketch1, form={"query": "test", "filter": {}}, datastore=self.datastore + ) + + # Verify the result + self.assertEqual(prompt, "Test prompt") + mock_run_query.assert_called_once() + called_events = mock_get_prompt.call_args[0][0] + self.assertEqual(len(called_events), 2) + self.assertEqual(called_events[0]["message"], "Test event 1") + self.assertEqual(called_events[1]["message"], "Test event 2") + + @mock.patch( + "timesketch.lib.llms.features.llm_summarize.LLMSummarizeFeature." + "_run_timesketch_query" + ) + def test_generate_prompt_no_events(self, mock_run_query): + """Tests generate_prompt method with no events.""" + mock_run_query.return_value = pd.DataFrame() + + prompt = self.llm_feature.generate_prompt( + self.sketch1, form={"query": "test", "filter": {}}, datastore=self.datastore + ) + + self.assertEqual(prompt, "No events to summarize based on the current filter.") + + def test_generate_prompt_missing_form(self): + """Tests generate_prompt method with missing form.""" + with self.assertRaises(ValueError): + self.llm_feature.generate_prompt(self.sketch1, datastore=self.datastore) + + @mock.patch( + "timesketch.lib.llms.features.llm_summarize.LLMSummarizeFeature." + "_run_timesketch_query" + ) + def test_process_response(self, mock_run_query): + """Tests process_response method.""" + mock_run_query.return_value = pd.DataFrame( + [ + {"message": "Test event 1"}, + {"message": "Test event 2"}, + {"message": "Test event 1"}, # Add duplicate event on purpose + ] + ) + + result = self.llm_feature.process_response( + {"summary": "This is a test summary"}, + sketch_id=1, + sketch=self.sketch1, + form={"query": "test", "filter": {}}, + datastore=self.datastore, + ) + + self.assertEqual(result["response"], "This is a test summary") + self.assertEqual(result["summary_event_count"], 3) + self.assertEqual(result["summary_unique_event_count"], 2) + + def test_process_response_missing_params(self): + """Tests process_response method with missing parameters.""" + with self.assertRaises(ValueError): + self.llm_feature.process_response( + {"summary": "Test"}, + sketch=self.sketch1, + form={"query": "test", "filter": {}}, + datastore=self.datastore, + ) + + with self.assertRaises(ValueError): + self.llm_feature.process_response( + {"summary": "Test"}, + sketch_id=1, + sketch=self.sketch1, + datastore=self.datastore, + ) + + def test_process_response_invalid_response(self): + """Tests process_response method with invalid response format.""" + with self.assertRaises(ValueError): + self.llm_feature.process_response( + "Not a dict", + sketch_id=1, + sketch=self.sketch1, + form={"query": "test", "filter": {}}, + datastore=self.datastore, + ) + + with self.assertRaises(ValueError): + self.llm_feature.process_response( + {"not_summary": "Test"}, + sketch_id=1, + sketch=self.sketch1, + form={"query": "test", "filter": {}}, + datastore=self.datastore, + ) diff --git a/timesketch/lib/llms/features/nl2q.py b/timesketch/lib/llms/features/nl2q.py new file mode 100644 index 0000000000..bd0aa7d674 --- /dev/null +++ b/timesketch/lib/llms/features/nl2q.py @@ -0,0 +1,190 @@ +# Copyright 2025 Google Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Natural language to query (NL2Q) version 1.""" +import logging +from typing import Any +import pandas as pd +from flask import current_app +from timesketch.api.v1 import utils +from timesketch.models.sketch import Sketch +from timesketch.lib.llms.features.interface import LLMFeatureInterface + +logger = logging.getLogger("timesketch.llm.nl2q_feature") + + +class Nl2qFeature(LLMFeatureInterface): + """NL2Q feature.""" + + NAME = "nl2q" + + def _sketch_data_types(self, sketch: Sketch) -> str: + """Get the data types for the current sketch. + + Args: + sketch: The Sketch object to extract data types from. + + Returns: + str: Comma-separated list of data types found in the sketch. + """ + output = [] + data_type_aggregation = utils.run_aggregator( + sketch.id, "field_bucket", {"field": "data_type", "limit": "1000"} + ) + if not data_type_aggregation or not data_type_aggregation[0]: + logger.error("Internal problem with the aggregations.") + return "" + data_types = data_type_aggregation[0].values + if not data_types: + logger.warning("No data types in the sketch.") + return "" + for data_type in data_types: + output.append(data_type.get("data_type")) + return ",".join(output) + + def _data_types_descriptions(self, data_types: str) -> str: + """Creates a formatted string of data types and attribute descriptions. + + Args: + data_types: Comma-separated list of data types. + + Returns: + str: Multi-line string with data types and their field descriptions. + """ + df_data_types = utils.load_csv_file("DATA_TYPES_PATH") + if df_data_types.empty: + logger.error("No data types description file or the file is empty.") + return "" + df_short_data_types = pd.DataFrame( + df_data_types.groupby("data_type").apply(self._concatenate_values), + columns=["fields"], + ) + df_short_data_types["data_type"] = df_short_data_types.index + df_short_data_types["data_type"] = df_short_data_types["data_type"].apply( + lambda x: x.strip() + ) + df_short_data_types.reset_index(drop=True, inplace=True) + output = [] + for dtype in data_types.split(","): + extract = df_short_data_types[ + df_short_data_types["data_type"] == dtype.strip() + ] + if extract.empty: + logger.warning("'%s' not found in data types", dtype.strip()) + continue + output.append(extract.iloc[0]["fields"]) + return "\n".join(output) + + def _generate_fields(self, group) -> str: + """Generated the fields for a data type. + + Args: + group: DataFrame group containing field, type, and description columns. + + Returns: + str: Comma-separated list of fields formatted as strings. + """ + return ", ".join( + f'"{f}"' + for f, t, d in zip(group["field"], group["type"], group["description"]) + ) + + def _concatenate_values(self, group) -> str: + """Concatenates the fields for a data type. + + Args: + group: DataFrame group with data_type and field information. + + Returns: + str: Formatted string with data type and its fields. + """ + return f'* "{group["data_type"].iloc[0]}" -> {self._generate_fields(group)}' + + def _build_prompt(self, question: str, sketch: Sketch) -> str: + """Builds the prompt for NL2Q. + + Args: + question: Natural language question from the user. + sketch: The Sketch object to extract data types from. + + Returns: + str: Complete prompt with question, examples, and data types. + + Raises: + OSError: If prompt or examples file cannot be opened. + IOError: If prompt or examples file cannot be read. + """ + prompt_file = current_app.config.get("PROMPT_NL2Q", "") + examples_file = current_app.config.get("EXAMPLES_NL2Q", "") + try: + with open(prompt_file, "r") as file: + prompt = file.read() + except (OSError, IOError): + logger.error("No prompt file found") + raise + try: + with open(examples_file, "r") as file: + examples = file.read() + except (OSError, IOError): + logger.error("No examples file found") + raise # Re-raise the exception + prompt = prompt.format( + examples=examples, + question=question, + data_types=self._data_types_descriptions(self._sketch_data_types(sketch)), + ) + return prompt + + def generate_prompt(self, sketch: Sketch, **kwargs: Any) -> str: + """Generates the NL2Q prompt. + + Args: + sketch: The Sketch object. + kwargs: Must contain 'form' with a 'question' key. + + Returns: + str: The generated prompt. + + Raises: + ValueError: If the required question is missing from the form data. + """ + form = kwargs.get("form") + if not form or "question" not in form: + raise ValueError("Missing 'question' in form data") + question = form["question"] + return self._build_prompt(question, sketch) + + def process_response(self, llm_response: str, **kwargs: Any) -> dict[str, Any]: + """Processes the LLM response, extracting the query. + + Args: + llm_response: String response from the LLM. + kwargs: Additional arguments (not used). + + Returns: + dict[str, Any]: Dictionary containing the search query with keys: + - name: Name of the generated query + - query_string: The actual query string + - error: Error message (None if successful) + + Raises: + ValueError: If the LLM response is not a string. + """ + if not isinstance(llm_response, str): + raise ValueError(f"Unexpected response type from LLM: {type(llm_response)}") + result_schema = { + "name": "AI generated search query", + "query_string": llm_response.strip("`\n\r\t "), + "error": None, + } + return result_schema diff --git a/timesketch/lib/llms/features/nl2q_test.py b/timesketch/lib/llms/features/nl2q_test.py new file mode 100644 index 0000000000..684a5f54fc --- /dev/null +++ b/timesketch/lib/llms/features/nl2q_test.py @@ -0,0 +1,149 @@ +# Copyright 2025 Google Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the nl2q feature.""" + +import mock +import pandas as pd +from flask import current_app +from timesketch.lib.testlib import BaseTest +from timesketch.lib.llms.features.nl2q import Nl2qFeature + + +class TestNl2qFeature(BaseTest): + """Tests for the Nl2qFeature.""" + + def setUp(self): + """Set up the tests.""" + super().setUp() + self.nl2q_feature = Nl2qFeature() + current_app.config["PROMPT_NL2Q"] = "./tests/test_data/nl2q/test_prompt_nl2q" + current_app.config["EXAMPLES_NL2Q"] = ( + "./tests/test_data/nl2q/test_examples_nl2q" + ) + + @mock.patch("timesketch.lib.llms.features.nl2q.utils.run_aggregator") + def test_sketch_data_types(self, mock_aggregator): + """Test _sketch_data_types method.""" + mock_AggregationResult = mock.MagicMock() + mock_AggregationResult.values = [ + {"data_type": "test:data_type:1"}, + {"data_type": "test:data_type:2"}, + ] + mock_aggregator.return_value = (mock_AggregationResult, {}) + + data_types = self.nl2q_feature._sketch_data_types(self.sketch1) + + self.assertEqual(data_types, "test:data_type:1,test:data_type:2") + mock_aggregator.assert_called_once_with( + self.sketch1.id, "field_bucket", {"field": "data_type", "limit": "1000"} + ) + + @mock.patch("timesketch.lib.llms.features.nl2q.utils.load_csv_file") + def test_data_types_descriptions(self, mock_load_csv): + """Test _data_types_descriptions method.""" + mock_df = pd.DataFrame( + { + "data_type": [ + "test:data_type:1", + "test:data_type:1", + "test:data_type:2", + ], + "field": ["field_test_1", "field_test_2", "field_test_3"], + "type": ["text", "text", "text"], + "description": ["desc1", "desc2", "desc3"], + } + ) + mock_load_csv.return_value = mock_df + + descriptions = self.nl2q_feature._data_types_descriptions( + "test:data_type:1,test:data_type:2" + ) + + self.assertIn( + '* "test:data_type:1" -> "field_test_1", "field_test_2"', descriptions + ) + self.assertIn('* "test:data_type:2" -> "field_test_3"', descriptions) + + @mock.patch("timesketch.lib.llms.features.nl2q.Nl2qFeature._sketch_data_types") + @mock.patch( + "timesketch.lib.llms.features.nl2q.Nl2qFeature._data_types_descriptions" + ) + def test_build_prompt(self, mock_data_types_desc, mock_sketch_data_types): + """Test _build_prompt method.""" + mock_sketch_data_types.return_value = "test:data_type:1,test:data_type:2" + mock_data_types_desc.return_value = ( + '* "test:data_type:1" -> "field_test_1", "field_test_2"\n' + '* "test:data_type:2" -> "field_test_3"' + ) + + prompt_content = ( + "Examples:\n{examples}\nTypes:\n{data_types}\nQuestion:\n{question}" + ) + examples_content = "example 1\n\nexample 2" + + m = mock.mock_open() + m.side_effect = [ + mock.mock_open(read_data=prompt_content).return_value, + mock.mock_open(read_data=examples_content).return_value, + ] + + with mock.patch("builtins.open", m): + prompt = self.nl2q_feature._build_prompt("What happened?", self.sketch1) + + self.assertIn("Examples:", prompt) + self.assertIn("example 1", prompt) + self.assertIn("example 2", prompt) + self.assertIn("Types:", prompt) + self.assertIn('* "test:data_type:1" -> "field_test_1", "field_test_2"', prompt) + self.assertIn('* "test:data_type:2" -> "field_test_3"', prompt) + self.assertIn("Question:", prompt) + self.assertIn("What happened?", prompt) + + @mock.patch("timesketch.lib.llms.features.nl2q.Nl2qFeature._build_prompt") + def test_generate_prompt(self, mock_build_prompt): + """Test generate_prompt method.""" + mock_build_prompt.return_value = "Test prompt" + + prompt = self.nl2q_feature.generate_prompt( + self.sketch1, form={"question": "What happened?"} + ) + + self.assertEqual(prompt, "Test prompt") + mock_build_prompt.assert_called_once_with("What happened?", self.sketch1) + + def test_generate_prompt_missing_question(self): + """Test generate_prompt method with missing question.""" + with self.assertRaises(ValueError): + self.nl2q_feature.generate_prompt(self.sketch1, form={}) + + with self.assertRaises(ValueError): + self.nl2q_feature.generate_prompt(self.sketch1) + + def test_process_response(self): + """Test process_response method.""" + result = self.nl2q_feature.process_response("test query") + self.assertEqual(result["query_string"], "test query") + self.assertIsNone(result["error"]) + + result = self.nl2q_feature.process_response(" \t`test query`\n ") + self.assertEqual(result["query_string"], "test query") + + result = self.nl2q_feature.process_response("```test query``") + self.assertEqual(result["query_string"], "test query") + + result = self.nl2q_feature.process_response(" \t```test query```\n ") + self.assertEqual(result["query_string"], "test query") + + with self.assertRaises(ValueError): + self.nl2q_feature.process_response(123) From d028f0f5c5e48a91e57ac958e1bb47aed451a752 Mon Sep 17 00:00:00 2001 From: itsmvd Date: Fri, 28 Feb 2025 17:08:04 +0000 Subject: [PATCH 22/63] Couple of linter fixes on llm_summarize --- timesketch/lib/llms/features/llm_summarize.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/timesketch/lib/llms/features/llm_summarize.py b/timesketch/lib/llms/features/llm_summarize.py index 65402d7087..786ba0e1b4 100644 --- a/timesketch/lib/llms/features/llm_summarize.py +++ b/timesketch/lib/llms/features/llm_summarize.py @@ -57,9 +57,11 @@ def _get_prompt_text(self, events_dict: list) -> str: try: with open(prompt_file_path, "r", encoding="utf-8") as file_handle: prompt_template = file_handle.read() - except FileNotFoundError: + except FileNotFoundError as exc: logger.error("Prompt file not found: %s", prompt_file_path) - raise FileNotFoundError(f"LLM Prompt file not found: {prompt_file_path}") + raise FileNotFoundError( + f"LLM Prompt file not found: {prompt_file_path}" + ) from exc except IOError as e: logger.error("Error reading prompt file: %s", e) raise IOError("Error reading LLM prompt file.") from e @@ -170,7 +172,7 @@ def process_response(self, llm_response: Any, **kwargs: Any) -> dict[str, Any]: - form: Form data containing query and filter information. Returns: - dict[str, Any]: Dictionary containing the processed response with additional context: + Dictionary containing the processed response with additional context: - response: The summary text. - summary_event_count: Total number of events summarized. - summary_unique_event_count: Number of unique events summarized. From f4471b283ce68cb69b23caac053dd7f7a3e30389 Mon Sep 17 00:00:00 2001 From: itsmvd Date: Fri, 28 Feb 2025 17:13:17 +0000 Subject: [PATCH 23/63] pylint: disable=protected-access --- timesketch/lib/llms/features/llm_summarize_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/timesketch/lib/llms/features/llm_summarize_test.py b/timesketch/lib/llms/features/llm_summarize_test.py index 4946f118f0..186bad0f1f 100644 --- a/timesketch/lib/llms/features/llm_summarize_test.py +++ b/timesketch/lib/llms/features/llm_summarize_test.py @@ -21,7 +21,7 @@ from timesketch.lib.testlib import MockDataStore from timesketch.lib.llms.features.llm_summarize import LLMSummarizeFeature - +#pylint: disable=protected-access class TestLLMSummarizeFeature(BaseTest): """Tests for the LLMSummarizeFeature.""" From e7e82d4c7afb109d81eccd4e10baf6049aac8d66 Mon Sep 17 00:00:00 2001 From: itsmvd Date: Fri, 28 Feb 2025 17:15:35 +0000 Subject: [PATCH 24/63] black formatting --- timesketch/lib/llms/features/llm_summarize_test.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/timesketch/lib/llms/features/llm_summarize_test.py b/timesketch/lib/llms/features/llm_summarize_test.py index 186bad0f1f..c103321aff 100644 --- a/timesketch/lib/llms/features/llm_summarize_test.py +++ b/timesketch/lib/llms/features/llm_summarize_test.py @@ -21,7 +21,8 @@ from timesketch.lib.testlib import MockDataStore from timesketch.lib.llms.features.llm_summarize import LLMSummarizeFeature -#pylint: disable=protected-access + +# pylint: disable=protected-access class TestLLMSummarizeFeature(BaseTest): """Tests for the LLMSummarizeFeature.""" From e171e4d10563bdcf330511cce8730a61a45fc86a Mon Sep 17 00:00:00 2001 From: itsmvd Date: Fri, 28 Feb 2025 17:20:04 +0000 Subject: [PATCH 25/63] # pylint: disable=protected-access --- timesketch/lib/llms/features/nl2q_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/timesketch/lib/llms/features/nl2q_test.py b/timesketch/lib/llms/features/nl2q_test.py index 684a5f54fc..e3e52d470d 100644 --- a/timesketch/lib/llms/features/nl2q_test.py +++ b/timesketch/lib/llms/features/nl2q_test.py @@ -19,7 +19,7 @@ from timesketch.lib.testlib import BaseTest from timesketch.lib.llms.features.nl2q import Nl2qFeature - +# pylint: disable=protected-access class TestNl2qFeature(BaseTest): """Tests for the Nl2qFeature.""" From 1cf49c073e16ab2cfc96bb0fdcc02f0f412fcfc3 Mon Sep 17 00:00:00 2001 From: itsmvd Date: Fri, 28 Feb 2025 17:22:54 +0000 Subject: [PATCH 26/63] formatting on nl2q --- timesketch/lib/llms/features/nl2q_test.py | 1 + 1 file changed, 1 insertion(+) diff --git a/timesketch/lib/llms/features/nl2q_test.py b/timesketch/lib/llms/features/nl2q_test.py index e3e52d470d..c902cad527 100644 --- a/timesketch/lib/llms/features/nl2q_test.py +++ b/timesketch/lib/llms/features/nl2q_test.py @@ -19,6 +19,7 @@ from timesketch.lib.testlib import BaseTest from timesketch.lib.llms.features.nl2q import Nl2qFeature + # pylint: disable=protected-access class TestNl2qFeature(BaseTest): """Tests for the Nl2qFeature.""" From 0b3f25105be19b0d5ba7b3bb0e73416bdc351a5b Mon Sep 17 00:00:00 2001 From: itsmvd Date: Sat, 1 Mar 2025 09:34:33 +0000 Subject: [PATCH 27/63] add feature specific metrics --- timesketch/lib/llms/features/llm_summarize.py | 44 ++++++++++++++----- 1 file changed, 32 insertions(+), 12 deletions(-) diff --git a/timesketch/lib/llms/features/llm_summarize.py b/timesketch/lib/llms/features/llm_summarize.py index 786ba0e1b4..1daca5e84d 100644 --- a/timesketch/lib/llms/features/llm_summarize.py +++ b/timesketch/lib/llms/features/llm_summarize.py @@ -16,15 +16,33 @@ import logging from typing import Any, Optional import pandas as pd +import prometheus_client from flask import current_app from opensearchpy import OpenSearch from timesketch.lib import utils from timesketch.api.v1 import export from timesketch.models.sketch import Sketch +from timesketch.lib.definitions import METRICS_NAMESPACE from timesketch.lib.llms.features.interface import LLMFeatureInterface logger = logging.getLogger("timesketch.llm.summarize_feature") +# TODO(itsmvd): Remove 'feature' prefix after migration +METRICS = { + "llm_summary_events_processed_total": prometheus_client.Counter( + "feature_llm_summary_events_processed_total", # avoid duplicate registration + "Total number of events processed for LLM summarization", + ["sketch_id"], + namespace=METRICS_NAMESPACE, + ), + "llm_summary_unique_events_total": prometheus_client.Counter( + "feature_llm_summary_unique_events_total", # avoid duplicate registration + "Total number of unique events sent to the LLM", + ["sketch_id"], + namespace=METRICS_NAMESPACE, + ), +} + class LLMSummarizeFeature(LLMFeatureInterface): """LLM Summarization feature.""" @@ -38,13 +56,10 @@ class LLMSummarizeFeature(LLMFeatureInterface): def _get_prompt_text(self, events_dict: list) -> str: """Reads the prompt template from file and injects events. - Args: events_dict: List of event dictionaries to inject into prompt. - Returns: str: Complete prompt text with injected events. - Raises: ValueError: If the prompt path is not configured. FileNotFoundError: If the prompt file cannot be found. @@ -78,7 +93,6 @@ def _run_timesketch_query( timeline_ids: Optional[list] = None, ) -> pd.DataFrame: """Runs a timesketch query and returns results as a DataFrame. - Args: sketch: The Sketch object to query. query_string: Search query string. @@ -86,10 +100,8 @@ def _run_timesketch_query( id_list: List of event IDs to retrieve. datastore: OpenSearch instance for querying. timeline_ids: List of timeline IDs to query. - Returns: pd.DataFrame: DataFrame containing query results. - Raises: ValueError: If datastore is not provided or no valid indices are found. """ @@ -121,17 +133,14 @@ def _run_timesketch_query( def generate_prompt(self, sketch: Sketch, **kwargs: Any) -> str: """Generates the summarization prompt based on events from a query. - Args: sketch: The Sketch object containing events to summarize. **kwargs: Additional arguments including: - form: Form data containing query and filter information. - datastore: OpenSearch instance for querying. - timeline_ids: List of timeline IDs to query. - Returns: str: Generated prompt text with events to summarize. - Raises: ValueError: If required parameters are missing or if no events are found. """ @@ -151,17 +160,30 @@ def generate_prompt(self, sketch: Sketch, **kwargs: Any) -> str: ) if events_df is None or events_df.empty: return "No events to summarize based on the current filter." + + # Count and record total events + total_events_count = len(events_df) + METRICS["llm_summary_events_processed_total"].labels( + sketch_id=str(sketch.id) + ).inc(total_events_count) + + # Get unique events, count and record them unique_events_df = events_df[["message"]].drop_duplicates( subset="message", keep="first" ) + unique_events_count = len(unique_events_df) + METRICS["llm_summary_unique_events_total"].labels(sketch_id=str(sketch.id)).inc( + unique_events_count + ) + events_dict = unique_events_df.to_dict(orient="records") if not events_dict: return "No events to summarize based on the current filter." + return self._get_prompt_text(events_dict) def process_response(self, llm_response: Any, **kwargs: Any) -> dict[str, Any]: """Processes the LLM response and adds additional context information. - Args: llm_response: The response from the LLM model, expected to be a dictionary. **kwargs: Additional arguments including: @@ -170,13 +192,11 @@ def process_response(self, llm_response: Any, **kwargs: Any) -> dict[str, Any]: - datastore: OpenSearch instance for querying. - timeline_ids: List of timeline IDs. - form: Form data containing query and filter information. - Returns: Dictionary containing the processed response with additional context: - response: The summary text. - summary_event_count: Total number of events summarized. - summary_unique_event_count: Number of unique events summarized. - Raises: ValueError: If required parameters are missing or if the LLM response is not in the expected format. From bb2bc886cf2d946eee4a12b896cefb4cc3a3440c Mon Sep 17 00:00:00 2001 From: itsmvd Date: Sat, 1 Mar 2025 09:36:56 +0000 Subject: [PATCH 28/63] remove unnecessary comments --- timesketch/lib/llms/features/llm_summarize.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/timesketch/lib/llms/features/llm_summarize.py b/timesketch/lib/llms/features/llm_summarize.py index 1daca5e84d..695a5a3c7b 100644 --- a/timesketch/lib/llms/features/llm_summarize.py +++ b/timesketch/lib/llms/features/llm_summarize.py @@ -161,13 +161,11 @@ def generate_prompt(self, sketch: Sketch, **kwargs: Any) -> str: if events_df is None or events_df.empty: return "No events to summarize based on the current filter." - # Count and record total events total_events_count = len(events_df) METRICS["llm_summary_events_processed_total"].labels( sketch_id=str(sketch.id) ).inc(total_events_count) - # Get unique events, count and record them unique_events_df = events_df[["message"]].drop_duplicates( subset="message", keep="first" ) From 5bf333e4ba3475390cd21afdb8d3d45bf7476252 Mon Sep 17 00:00:00 2001 From: janosch Date: Mon, 3 Mar 2025 10:18:14 +0000 Subject: [PATCH 29/63] fix black linter --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index ac1b357bb8..9018ee4c69 100755 --- a/setup.py +++ b/setup.py @@ -14,8 +14,8 @@ # limitations under the License. """This is the setup file for the project. The standard setup rules apply: - python setup.py build - sudo python setup.py install +python setup.py build +sudo python setup.py install """ from __future__ import print_function From 68140794fa45609f44cb62aacdac69d13c5f8b40 Mon Sep 17 00:00:00 2001 From: itsmvd Date: Tue, 4 Mar 2025 09:23:14 +0000 Subject: [PATCH 30/63] review fixes --- timesketch/lib/llms/features/llm_summarize.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/timesketch/lib/llms/features/llm_summarize.py b/timesketch/lib/llms/features/llm_summarize.py index 695a5a3c7b..7c38339214 100644 --- a/timesketch/lib/llms/features/llm_summarize.py +++ b/timesketch/lib/llms/features/llm_summarize.py @@ -48,16 +48,17 @@ class LLMSummarizeFeature(LLMFeatureInterface): """LLM Summarization feature.""" NAME = "llm_summarize" + PROMPT_CONFIG_KEY = "PROMPT_LLM_SUMMARIZATION" RESPONSE_SCHEMA = { "type": "object", "properties": {"summary": {"type": "string"}}, "required": ["summary"], } - def _get_prompt_text(self, events_dict: list) -> str: + def _get_prompt_text(self, events: list[dict[str, Any]]) -> str: """Reads the prompt template from file and injects events. Args: - events_dict: List of event dictionaries to inject into prompt. + events: List of event dictionaries to inject into prompt. Returns: str: Complete prompt text with injected events. Raises: @@ -65,10 +66,11 @@ def _get_prompt_text(self, events_dict: list) -> str: FileNotFoundError: If the prompt file cannot be found. IOError: If there's an error reading the prompt file. """ - prompt_file_path = current_app.config.get("PROMPT_LLM_SUMMARIZATION") + prompt_file_path = current_app.config.get(self.PROMPT_CONFIG_KEY) if not prompt_file_path: - logger.error("PROMPT_LLM_SUMMARIZATION config not set") + logger.error("%s config not set", {self.PROMPT_CONFIG_KEY}) raise ValueError("LLM summarization prompt path not configured.") + try: with open(prompt_file_path, "r", encoding="utf-8") as file_handle: prompt_template = file_handle.read() @@ -80,7 +82,8 @@ def _get_prompt_text(self, events_dict: list) -> str: except IOError as e: logger.error("Error reading prompt file: %s", e) raise IOError("Error reading LLM prompt file.") from e - prompt_text = prompt_template.replace("", json.dumps(events_dict)) + + prompt_text = prompt_template.replace("", json.dumps(events)) return prompt_text def _run_timesketch_query( @@ -174,11 +177,11 @@ def generate_prompt(self, sketch: Sketch, **kwargs: Any) -> str: unique_events_count ) - events_dict = unique_events_df.to_dict(orient="records") - if not events_dict: + events = unique_events_df.to_dict(orient="records") + if not events: return "No events to summarize based on the current filter." - return self._get_prompt_text(events_dict) + return self._get_prompt_text(events) def process_response(self, llm_response: Any, **kwargs: Any) -> dict[str, Any]: """Processes the LLM response and adds additional context information. From 25e7042e464a0574c84ed7fe5583e0c47159a9d8 Mon Sep 17 00:00:00 2001 From: itsmvd Date: Tue, 4 Mar 2025 10:04:08 +0000 Subject: [PATCH 31/63] Handle incorrect prompt file + test --- timesketch/lib/llms/features/llm_summarize.py | 15 +++++++++++---- .../lib/llms/features/llm_summarize_test.py | 13 +++++++++++++ 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/timesketch/lib/llms/features/llm_summarize.py b/timesketch/lib/llms/features/llm_summarize.py index 7c38339214..9a6cf5f457 100644 --- a/timesketch/lib/llms/features/llm_summarize.py +++ b/timesketch/lib/llms/features/llm_summarize.py @@ -55,14 +55,14 @@ class LLMSummarizeFeature(LLMFeatureInterface): "required": ["summary"], } - def _get_prompt_text(self, events: list[dict[str, Any]]) -> str: + def _get_prompt_text(self, events_dict: list[dict[str, Any]]) -> str: """Reads the prompt template from file and injects events. Args: - events: List of event dictionaries to inject into prompt. + events_dict: List of event dictionaries to inject into prompt. Returns: str: Complete prompt text with injected events. Raises: - ValueError: If the prompt path is not configured. + ValueError: If the prompt path is not configured or placeholder is missing. FileNotFoundError: If the prompt file cannot be found. IOError: If there's an error reading the prompt file. """ @@ -83,7 +83,14 @@ def _get_prompt_text(self, events: list[dict[str, Any]]) -> str: logger.error("Error reading prompt file: %s", e) raise IOError("Error reading LLM prompt file.") from e - prompt_text = prompt_template.replace("", json.dumps(events)) + if "" not in prompt_template: + logger.error("Prompt template is missing the placeholder") + raise ValueError( + "LLM summarization prompt template is missing the " + "required placeholder." + ) + + prompt_text = prompt_template.replace("", json.dumps(events_dict)) return prompt_text def _run_timesketch_query( diff --git a/timesketch/lib/llms/features/llm_summarize_test.py b/timesketch/lib/llms/features/llm_summarize_test.py index c103321aff..073ca07479 100644 --- a/timesketch/lib/llms/features/llm_summarize_test.py +++ b/timesketch/lib/llms/features/llm_summarize_test.py @@ -45,6 +45,19 @@ def test_get_prompt_text(self): self.assertEqual(prompt, f"Analyze these events: {json.dumps(events_dict)}") + @mock.patch( + "builtins.open", + mock.mock_open(read_data="Analyze these events without placeholder"), + ) + def test_get_prompt_text_missing_placeholder(self): + """Tests _get_prompt_text method with missing placeholder.""" + events_dict = [{"message": "Test event"}] + with self.assertRaises(ValueError) as context: + self.llm_feature._get_prompt_text(events_dict) + self.assertIn( + "missing the required placeholder", str(context.exception) + ) + def test_get_prompt_text_missing_file(self): """Tests _get_prompt_text method with missing file.""" current_app.config["PROMPT_LLM_SUMMARIZATION"] = "/file_does_not_exist.txt" From e76ffcfe614ab4e3c3f8475abba8bffb309c6cd6 Mon Sep 17 00:00:00 2001 From: itsmvd Date: Tue, 4 Mar 2025 10:23:47 +0000 Subject: [PATCH 32/63] frontend: LLM features switch to new llm endpoint --- .../src/components/Explore/EventList.vue | 4 ++-- .../src/components/Scenarios/QuestionCard.vue | 21 ++++++++++--------- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/timesketch/frontend-ng/src/components/Explore/EventList.vue b/timesketch/frontend-ng/src/components/Explore/EventList.vue index f9bcd7a793..f7d7de62fe 100644 --- a/timesketch/frontend-ng/src/components/Explore/EventList.vue +++ b/timesketch/frontend-ng/src/components/Explore/EventList.vue @@ -957,9 +957,9 @@ export default { query: this.currentQueryString, filter: this.currentQueryFilter, } - ApiClient.getEventSummary(this.sketch.id, formData) + ApiClient.llmRequest(this.sketch.id, 'llm_summarize', formData) .then((response) => { - this.$set(this.eventList.meta, 'summary', response.data.summary) + this.$set(this.eventList.meta, 'summary', response.data.response) this.$set(this.eventList.meta, 'summary_event_count', response.data.summary_event_count) this.$set(this.eventList.meta, 'summary_unique_event_count', response.data.summary_unique_event_count) this.isSummaryLoading = false diff --git a/timesketch/frontend-ng/src/components/Scenarios/QuestionCard.vue b/timesketch/frontend-ng/src/components/Scenarios/QuestionCard.vue index 64f538c08d..40068253e7 100644 --- a/timesketch/frontend-ng/src/components/Scenarios/QuestionCard.vue +++ b/timesketch/frontend-ng/src/components/Scenarios/QuestionCard.vue @@ -404,16 +404,17 @@ export default { }, methods: { getSuggestedQuery() { - this.suggestedQueryLoading = true - ApiClient.nl2q(this.sketch.id, this.activeQuestion.display_name) - .then((response) => { - this.suggestedQuery = response.data - this.suggestedQueryLoading = false - }) - .catch((e) => { - console.error(e) - }) - }, + this.suggestedQueryLoading = true + let formData = { question: this.activeQuestion.display_name } + ApiClient.llmRequest(this.sketch.id, 'nl2q', formData) + .then((response) => { + this.suggestedQuery = response.data + this.suggestedQueryLoading = false + }) + .catch((e) => { + console.error(e) + }) + }, getQuestionTemplates() { this.isLoading = true ApiClient.getQuestionTemplates() From a4276eece41e1c405e6c82a6da3453b734093a55 Mon Sep 17 00:00:00 2001 From: itsmvd Date: Tue, 4 Mar 2025 10:27:17 +0000 Subject: [PATCH 33/63] layout fix --- .../src/components/Scenarios/QuestionCard.vue | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/timesketch/frontend-ng/src/components/Scenarios/QuestionCard.vue b/timesketch/frontend-ng/src/components/Scenarios/QuestionCard.vue index 40068253e7..601a045611 100644 --- a/timesketch/frontend-ng/src/components/Scenarios/QuestionCard.vue +++ b/timesketch/frontend-ng/src/components/Scenarios/QuestionCard.vue @@ -404,17 +404,17 @@ export default { }, methods: { getSuggestedQuery() { - this.suggestedQueryLoading = true - let formData = { question: this.activeQuestion.display_name } - ApiClient.llmRequest(this.sketch.id, 'nl2q', formData) - .then((response) => { - this.suggestedQuery = response.data - this.suggestedQueryLoading = false - }) - .catch((e) => { - console.error(e) - }) - }, + this.suggestedQueryLoading = true + let formData = { question: this.activeQuestion.display_name } + ApiClient.llmRequest(this.sketch.id, 'nl2q', formData) + .then((response) => { + this.suggestedQuery = response.data + this.suggestedQueryLoading = false + }) + .catch((e) => { + console.error(e) + }) + }, getQuestionTemplates() { this.isLoading = true ApiClient.getQuestionTemplates() From 8895e2a085600852b5a863f7962ffd1ad19defc9 Mon Sep 17 00:00:00 2001 From: itsmvd Date: Tue, 4 Mar 2025 10:41:16 +0000 Subject: [PATCH 34/63] Remove nl2q & llm_summarize features from the API --- timesketch/api/v1/resources/llm_summarize.py | 373 ------------------ timesketch/api/v1/resources/nl2q.py | 226 ----------- timesketch/api/v1/resources_test.py | 363 ----------------- timesketch/api/v1/routes.py | 4 - .../frontend-ng/src/utils/RestApiClient.js | 7 - timesketch/lib/llms/features/llm_summarize.py | 5 +- 6 files changed, 2 insertions(+), 976 deletions(-) delete mode 100644 timesketch/api/v1/resources/llm_summarize.py delete mode 100644 timesketch/api/v1/resources/nl2q.py diff --git a/timesketch/api/v1/resources/llm_summarize.py b/timesketch/api/v1/resources/llm_summarize.py deleted file mode 100644 index a5ecebc3b6..0000000000 --- a/timesketch/api/v1/resources/llm_summarize.py +++ /dev/null @@ -1,373 +0,0 @@ -# Copyright 2024 Google Inc. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Timesketch API for LLM event summarization.""" - -import multiprocessing -import multiprocessing.managers -import logging -from typing import Dict, Optional -import json -import time -import pandas as pd -import prometheus_client - -from flask import request, abort, jsonify, current_app -from flask_login import login_required, current_user -from flask_restful import Resource - -from timesketch.api.v1 import resources, export -from timesketch.lib import definitions, utils -from timesketch.lib.llms.providers import manager as provider_manager -from timesketch.lib.definitions import METRICS_NAMESPACE -from timesketch.models.sketch import Sketch - -logger = logging.getLogger("timesketch.api.llm_summarize") - -summary_response_schema = { - "type": "object", - "properties": {"summary": {"type": "string"}}, - "required": ["summary"], -} - -# Metrics definitions -METRICS = { - "llm_summary_requests_total": prometheus_client.Counter( - "llm_summary_requests_total", - "Total number of LLM summarization requests received", - ["sketch_id"], - namespace=METRICS_NAMESPACE, - ), - "llm_summary_events_processed_total": prometheus_client.Counter( - "llm_summary_events_processed_total", - "Total number of events processed for LLM summarization", - ["sketch_id"], - namespace=METRICS_NAMESPACE, - ), - "llm_summary_unique_events_total": prometheus_client.Counter( - "llm_summary_unique_events_total", - "Total number of unique events sent to the LLM", - ["sketch_id"], - namespace=METRICS_NAMESPACE, - ), - "llm_summary_errors_total": prometheus_client.Counter( - "llm_summary_errors_total", - "Total number of errors encountered during LLM summarization", - ["sketch_id", "error_type"], - namespace=METRICS_NAMESPACE, - ), - "llm_summary_duration_seconds": prometheus_client.Summary( - "llm_summary_duration_seconds", - "Time taken to process an LLM summarization request (in seconds)", - ["sketch_id"], - namespace=METRICS_NAMESPACE, - ), -} - -_LLM_TIMEOUT_WAIT_SECONDS = 30 - - -class LLMSummarizeResource(resources.ResourceMixin, Resource): - """Resource to get LLM summary of events.""" - - def _get_prompt_text(self, events_dict: list) -> str: - """Reads the prompt template from file and injects events. - - Args: - events_dict: A list of dictionaries representing the events to summarize. - - Returns: - The prompt text with the events injected. - - Raises: - HTTP_STATUS_CODE_INTERNAL_SERVER_ERROR: If prompt template file - is not configured, not found, or error when reading it. - """ - prompt_file_path = current_app.config.get("PROMPT_LLM_SUMMARIZATION") - if not prompt_file_path: - logger.error("PROMPT_LLM_SUMMARIZATION config not set in timesketch.conf") - abort( - definitions.HTTP_STATUS_CODE_INTERNAL_SERVER_ERROR, - "LLM summarization prompt path not configured.", - ) - - try: - with open(prompt_file_path, "r", encoding="utf-8") as file_handle: - prompt_template = file_handle.read() - except FileNotFoundError: - logger.error("Prompt file not found: %s", prompt_file_path) - abort( - definitions.HTTP_STATUS_CODE_INTERNAL_SERVER_ERROR, - "LLM Prompt file not found on the server.", - ) - except IOError as e: - logger.error("Error reading prompt file: %s", e) - abort( - definitions.HTTP_STATUS_CODE_INTERNAL_SERVER_ERROR, - "Error reading LLM prompt file.", - ) - - prompt_text = prompt_template.replace("", json.dumps(events_dict)) - return prompt_text - - @login_required - def post(self, sketch_id: int): - """Handles POST request to the resource. - - Handler for /api/v1/sketches/:sketch_id/events/summary/ - - Args: - sketch_id: Integer primary key for a sketch database model. - - Returns: - JSON response with event summary, total event count, and unique event count. - - Raises: - HTTP_STATUS_CODE_NOT_FOUND: If no sketch is found with the given ID. - HTTP_STATUS_CODE_FORBIDDEN: If the user does not - have read access to the sketch. - HTTP_STATUS_CODE_BAD_REQUEST: If the POST request does not contain data, - if no events are found, or if there's an issue getting LLM data. - HTTP_STATUS_CODE_INTERNAL_SERVER_ERROR: If LLM provider is not configured. - """ - start_time = time.time() - METRICS["llm_summary_requests_total"].labels(sketch_id=str(sketch_id)).inc() - - sketch = Sketch.get_with_acl(sketch_id) - if not sketch: - abort( - definitions.HTTP_STATUS_CODE_NOT_FOUND, "No sketch found with this ID." - ) - if not sketch.has_permission(current_user, "read"): - abort( - definitions.HTTP_STATUS_CODE_FORBIDDEN, - "User does not have read access controls on sketch.", - ) - - form = request.json - if not form: - abort( - definitions.HTTP_STATUS_CODE_BAD_REQUEST, - "The POST request requires data", - ) - - query_filter = form.get("filter", {}) - query_string = form.get("query", "*") - if not query_string: - query_string = "*" - - events_df = self._run_timesketch_query(sketch, query_string, query_filter) - if events_df is None or events_df.empty: - return jsonify( - {"summary": "No events to summarize based on the current filter."} - ) - new_df = events_df[["message"]] - unique_df = new_df.drop_duplicates(subset="message", keep="first") - events_dict = unique_df.to_dict(orient="records") - - total_events_count = len(new_df) - unique_events_count = len(unique_df) - - METRICS["llm_summary_events_processed_total"].labels( - sketch_id=str(sketch_id) - ).inc(total_events_count) - METRICS["llm_summary_unique_events_total"].labels(sketch_id=str(sketch_id)).inc( - unique_events_count - ) - - logger.debug("Summarizing %d events", total_events_count) - logger.debug("Reduced to %d unique events", unique_events_count) - - if not events_dict: - return jsonify( - {"summary": "No events to summarize based on the current filter."} - ) - - try: - prompt_text = self._get_prompt_text(events_dict) - # TODO(itsmvd): Change to proper background worker such as celery in future - with multiprocessing.Manager() as manager: - shared_response = manager.dict() - p = multiprocessing.Process( - target=self._get_content_with_timeout, - args=(prompt_text, summary_response_schema, shared_response), - ) - p.start() - p.join(timeout=_LLM_TIMEOUT_WAIT_SECONDS) - - if p.is_alive(): - logger.warning( - "LLM call timed out after %d seconds.", - _LLM_TIMEOUT_WAIT_SECONDS, - ) - p.terminate() - p.join() - METRICS["llm_summary_errors_total"].labels( - sketch_id=str(sketch_id), error_type="timeout" - ).inc() - abort( - definitions.HTTP_STATUS_CODE_BAD_REQUEST, - "LLM call timed out.", - ) - - response = dict(shared_response) - - except Exception as e: # pylint: disable=broad-except - logger.error( - "Unable to call LLM to process events for summary. Error: %s", e - ) - METRICS["llm_summary_errors_total"].labels( - sketch_id=str(sketch_id), error_type="llm_api_error" - ).inc() - abort( - definitions.HTTP_STATUS_CODE_BAD_REQUEST, - "Unable to get LLM data, check server configuration for LLM.", - ) - - if not response or not response.get("summary"): - logger.error("No valid summary from LLM.") - METRICS["llm_summary_errors_total"].labels( - sketch_id=str(sketch_id), error_type="no_summary_error" - ).inc() - abort( - definitions.HTTP_STATUS_CODE_BAD_REQUEST, - "No valid summary from LLM.", - ) - summary_text = response.get("summary") - - duration = time.time() - start_time - METRICS["llm_summary_duration_seconds"].labels( - sketch_id=str(sketch_id) - ).observe(duration) - - # TODO: Add runtime seconds - return jsonify( - { - "summary": summary_text, - "summary_event_count": total_events_count, - "summary_unique_event_count": unique_events_count, - } - ) - - def _get_content_with_timeout( - self, - prompt: str, - response_schema: Optional[dict], - shared_response: multiprocessing.managers.DictProxy, - ) -> None: - """Send a prompt to the LLM and get a response within a process. - - Args: - prompt: The prompt to send to the LLM. - response_schema: If set, the LLM will attempt to return a structured - response that conforms to this schema. If set to None, the LLM - will return an unstructured response - shared_response: A shared dictionary to store the response. - """ - try: - response = self._get_content(prompt, response_schema) - shared_response.update(response) - except Exception as e: # pylint: disable=broad-except - logger.error("Error in LLM call within process: %s", e) - shared_response.update({"error": str(e)}) - - def _get_content( - self, prompt: str, response_schema: Optional[dict] = None - ) -> Optional[Dict]: - """Send a prompt to the LLM and get a response. - - Args: - prompt: The prompt to send to the LLM. - response_schema: If set, the LLM will attempt to return a structured - response that conforms to this schema. If set to None, the LLM - will return an unstructured response - - Returns: - If response_schema is set, a dictionary representing the structured - response will be returned. If response_schema is None, the raw text - response from the LLM will be returned as a string. - - Raises: - HTTP_STATUS_CODE_INTERNAL_SERVER_ERROR: If no LLM provider is defined - in the configuration file - HTTP_STATUS_CODE_BAD_REQUEST: If an error occurs with the - configured LLM provider - """ - try: - feature_name = "llm_summarize" - llm = provider_manager.LLMManager.create_provider(feature_name=feature_name) - except Exception as e: # pylint: disable=broad-except - logger.error("Error LLM Provider: %s", e) - abort( - definitions.HTTP_STATUS_CODE_BAD_REQUEST, - "An error occurred with the configured LLM provider. " - "Please check the logs and configuration file.", - ) - - prediction = llm.generate(prompt, response_schema=response_schema) - return prediction - - def _run_timesketch_query( - self, - sketch: Sketch, - query_string: str = "*", - query_filter: Optional[dict] = None, - id_list: Optional[list] = None, - ) -> pd.DataFrame: - """Runs a timesketch query. - - Args: - sketch: The Sketch object to query. - query_string: The query string to use. - query_filter: The query filter to use. - id_list: A list of event IDs to use. - - Returns: - A pandas DataFrame containing the query results. - - Raises: - HTTP_STATUS_CODE_BAD_REQUEST: If no valid search indices were found - to perform the search on. - """ - if not query_filter: - query_filter = {} - - if id_list: - id_query = " OR ".join([f'_id:"{event_id}"' for event_id in id_list]) - query_string = id_query - - all_indices = list({t.searchindex.index_name for t in sketch.timelines}) - indices = query_filter.get("indices", all_indices) - - if "_all" in indices: - indices = all_indices - - indices, timeline_ids = utils.get_validated_indices(indices, sketch) - - if not indices: - abort( - definitions.HTTP_STATUS_CODE_BAD_REQUEST, - "No valid search indices were found to perform the search on.", - ) - - result = self.datastore.search( - sketch_id=sketch.id, - query_string=query_string, - query_filter=query_filter, - query_dsl="", - indices=indices, - timeline_ids=timeline_ids, - ) - - return export.query_results_to_dataframe(result, sketch) diff --git a/timesketch/api/v1/resources/nl2q.py b/timesketch/api/v1/resources/nl2q.py deleted file mode 100644 index 770d8861ee..0000000000 --- a/timesketch/api/v1/resources/nl2q.py +++ /dev/null @@ -1,226 +0,0 @@ -# Copyright 2024 Google Inc. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Natural language to query (NL2Q) API for version 1 of the Timesketch API.""" - -import logging - -from flask import jsonify -from flask import request -from flask import abort -from flask import current_app -from flask_restful import Resource -from flask_login import login_required -from flask_login import current_user - -import pandas as pd - -from timesketch.api.v1 import utils -from timesketch.lib.llms.providers import manager -from timesketch.lib.definitions import HTTP_STATUS_CODE_BAD_REQUEST -from timesketch.lib.definitions import HTTP_STATUS_CODE_INTERNAL_SERVER_ERROR -from timesketch.lib.definitions import HTTP_STATUS_CODE_NOT_FOUND -from timesketch.lib.definitions import HTTP_STATUS_CODE_FORBIDDEN -from timesketch.models.sketch import Sketch - - -logger = logging.getLogger("timesketch.api_nl2q") - - -class Nl2qResource(Resource): - """Resource to get NL2Q prediction.""" - - def build_prompt(self, question, sketch_id): - """Builds the prompt. - - Args: - sketch_id: Sketch ID. - - Return: - String containing the whole prompt. - """ - prompt = "" - examples = "" - prompt_file = current_app.config.get("PROMPT_NL2Q", "") - examples_file = current_app.config.get("EXAMPLES_NL2Q", "") - try: - with open(prompt_file, "r") as file: - prompt = file.read() - except (OSError, IOError): - abort(HTTP_STATUS_CODE_INTERNAL_SERVER_ERROR, "No prompt file found") - try: - with open(examples_file, "r") as file: - examples = file.read() - except (OSError, IOError): - abort(HTTP_STATUS_CODE_INTERNAL_SERVER_ERROR, "No examples file found") - prompt = prompt.format( - examples=examples, - question=question, - data_types=self.data_types_descriptions(self.sketch_data_types(sketch_id)), - ) - return prompt - - def sketch_data_types(self, sketch_id): - """Get the data types for the current sketch. - - Args: - sketch_id: Sketch ID. - - Returns: - List of data types in a sketch. - """ - output = [] - sketch = Sketch.get_with_acl(sketch_id) - if not sketch: - abort(HTTP_STATUS_CODE_NOT_FOUND, "No sketch found with this ID.") - - if not sketch.has_permission(current_user, "read"): - abort( - HTTP_STATUS_CODE_FORBIDDEN, "User does not have read access to sketch" - ) - - data_type_aggregation = utils.run_aggregator( - sketch_id, "field_bucket", {"field": "data_type", "limit": "1000"} - ) - - if not data_type_aggregation or not data_type_aggregation[0]: - abort( - HTTP_STATUS_CODE_INTERNAL_SERVER_ERROR, - "Internal problem with the aggregations.", - ) - data_types = data_type_aggregation[0].values - if not data_types: - abort( - HTTP_STATUS_CODE_INTERNAL_SERVER_ERROR, - "No data types in the sketch.", - ) - for data_type in data_types: - output.append(data_type.get("data_type")) - return ",".join(output) - - def data_types_descriptions(self, data_types): - """Creates a formatted string of data types and attribute descriptions. - - Args: - data_types: List of data types in the sketch. - - Returns: - Formatted string of data types and attribute descriptions. - """ - df_data_types = utils.load_csv_file("DATA_TYPES_PATH") - if df_data_types.empty: - abort( - HTTP_STATUS_CODE_INTERNAL_SERVER_ERROR, - "No data types description file or the file is empty.", - ) - df_short_data_types = pd.DataFrame( - df_data_types.groupby("data_type").apply(self.concatenate_values), - columns=["fields"], - ) - df_short_data_types["data_type"] = df_short_data_types.index - df_short_data_types["data_type"] = df_short_data_types["data_type"].apply( - lambda x: x.strip() - ) - df_short_data_types.reset_index(drop=True, inplace=True) - output = [] - for dtype in data_types.split(","): - extract = df_short_data_types[ - df_short_data_types["data_type"] == dtype.strip() - ] - if extract.empty: - print(f"'{dtype.strip()}' not found in [{data_types}]") - continue - output.append(extract.iloc[0]["fields"]) - return "\n".join(output) - - def generate_fields(self, group): - """Generated the fields for a data type. - - Args: - group: Data type fields. - - Returns: - String of the generated fields. - """ - return ", ".join( - f'"{f}"' - for f, t, d in zip(group["field"], group["type"], group["description"]) - ) - - def concatenate_values(self, group): - """Concatenates the fields for a data type. - - Args: - group: Data type fields. - - Returns: - String of the concatenated fields. - """ - return f'* "{group["data_type"].iloc[0]}" -> {self.generate_fields(group)}' - - @login_required - def post(self, sketch_id): - """Handles POST request to the resource. - - Args: - sketch_id: Sketch ID. - - Returns: - JSON representing the LLM prediction. - """ - form = request.json - if not form: - abort(HTTP_STATUS_CODE_BAD_REQUEST, "No JSON data provided") - - if "question" not in form: - abort(HTTP_STATUS_CODE_BAD_REQUEST, "The 'question' parameter is required!") - - llm_configs = current_app.config.get("LLM_PROVIDER_CONFIGS") - if not llm_configs: - logger.error("No LLM provider configuration defined.") - abort( - HTTP_STATUS_CODE_INTERNAL_SERVER_ERROR, - "No LLM provider was defined in the main configuration file", - ) - - question = form.get("question") - prompt = self.build_prompt(question, sketch_id) - - result_schema = { - "name": "AI generated search query", - "query_string": None, - "error": None, - } - - feature_name = "nl2q" - try: - llm = manager.LLMManager.create_provider(feature_name=feature_name) - except Exception as e: # pylint: disable=broad-except - logger.error("Error LLM Provider: {}".format(e)) - result_schema["error"] = ( - "Error loading LLM Provider. Please try again later!" - ) - return jsonify(result_schema) - - try: - prediction = llm.generate(prompt) - except Exception as e: # pylint: disable=broad-except - logger.error("Error NL2Q prompt: {}".format(e)) - result_schema["error"] = ( - "An error occurred generating the query via the defined LLM. " - "Please try again later!" - ) - return jsonify(result_schema) - - result_schema["query_string"] = prediction.strip("`\n\r\t ") - return jsonify(result_schema) diff --git a/timesketch/api/v1/resources_test.py b/timesketch/api/v1/resources_test.py index 39a8701659..5fdee2ed11 100644 --- a/timesketch/api/v1/resources_test.py +++ b/timesketch/api/v1/resources_test.py @@ -24,7 +24,6 @@ from timesketch.lib.definitions import HTTP_STATUS_CODE_NOT_FOUND from timesketch.lib.definitions import HTTP_STATUS_CODE_OK from timesketch.lib.definitions import HTTP_STATUS_CODE_FORBIDDEN -from timesketch.lib.definitions import HTTP_STATUS_CODE_INTERNAL_SERVER_ERROR from timesketch.lib.testlib import BaseTest from timesketch.lib.testlib import MockDataStore from timesketch.lib.dfiq import DFIQ @@ -1285,303 +1284,6 @@ def test_user_get_resource_admin(self): self.assertEqual(data["objects"][0]["username"], "test1") -class TestNl2qResource(BaseTest): - """Test Nl2qResource.""" - - resource_url = "/api/v1/sketches/1/nl2q/" - - @mock.patch("timesketch.lib.llms.providers.manager.LLMManager.create_provider") - @mock.patch("timesketch.api.v1.utils.run_aggregator") - @mock.patch("timesketch.api.v1.resources.OpenSearchDataStore", MockDataStore) - def test_nl2q_prompt(self, mock_aggregator, mock_create_provider): - """Test the prompt is created correctly.""" - - self.login() - data = dict(question="Question for LLM?") - mock_AggregationResult = mock.MagicMock() - mock_AggregationResult.values = [ - {"data_type": "test:data_type:1"}, - {"data_type": "test:data_type:2"}, - ] - mock_aggregator.return_value = (mock_AggregationResult, {}) - mock_llm = mock.Mock() - mock_llm.generate.return_value = "LLM generated query" - mock_create_provider.return_value = mock_llm - response = self.client.post( - self.resource_url, - data=json.dumps(data), - content_type="application/json", - ) - expected_input = ( - "Examples:\n" - "example 1\n" - "\n" - "example 2\n" - "Types:\n" - '* "test:data_type:1" -> "field_test_1", "field_test_2"\n' - '* "test:data_type:2" -> "field_test_3", "field_test_4"\n' - "Question:\n" - "Question for LLM?" - ) - mock_llm.generate.assert_called_once_with(expected_input) - self.assertEqual(response.status_code, HTTP_STATUS_CODE_OK) - self.assertDictEqual( - response.json, - { - "name": "AI generated search query", - "query_string": "LLM generated query", - "error": None, - }, - ) - - @mock.patch("timesketch.api.v1.utils.run_aggregator") - @mock.patch("timesketch.api.v1.resources.OpenSearchDataStore", MockDataStore) - def test_nl2q_no_prompt(self, mock_aggregator): - """Test error when the prompt file is missing or not configured.""" - - self.app.config["PROMPT_NL2Q"] = "/file_does_not_exist.txt" - self.login() - data = dict(question="Question for LLM?") - mock_AggregationResult = mock.MagicMock() - mock_AggregationResult.values = [ - {"data_type": "test:data_type:1"}, - {"data_type": "test:data_type:2"}, - ] - mock_aggregator.return_value = (mock_AggregationResult, {}) - response = self.client.post( - self.resource_url, - data=json.dumps(data), - content_type="application/json", - ) - self.assertEqual(response.status_code, HTTP_STATUS_CODE_INTERNAL_SERVER_ERROR) - - del self.app.config["PROMPT_NL2Q"] - response = self.client.post( - self.resource_url, - data=json.dumps(data), - content_type="application/json", - ) - self.assertEqual(response.status_code, HTTP_STATUS_CODE_INTERNAL_SERVER_ERROR) - # data = json.loads(response.get_data(as_text=True)) - # self.assertIsNotNone(data.get("error")) - - @mock.patch("timesketch.api.v1.utils.run_aggregator") - @mock.patch("timesketch.api.v1.resources.OpenSearchDataStore", MockDataStore) - def test_nl2q_no_examples(self, mock_aggregator): - """Test error when the prompt file is missing or not configured.""" - - self.app.config["EXAMPLES_NL2Q"] = "/file_does_not_exist.txt" - self.login() - data = dict(question="Question for LLM?") - mock_AggregationResult = mock.MagicMock() - mock_AggregationResult.values = [ - {"data_type": "test:data_type:1"}, - {"data_type": "test:data_type:2"}, - ] - mock_aggregator.return_value = (mock_AggregationResult, {}) - response = self.client.post( - self.resource_url, - data=json.dumps(data), - content_type="application/json", - ) - self.assertEqual(response.status_code, HTTP_STATUS_CODE_INTERNAL_SERVER_ERROR) - - del self.app.config["EXAMPLES_NL2Q"] - response = self.client.post( - self.resource_url, - data=json.dumps(data), - content_type="application/json", - ) - self.assertEqual(response.status_code, HTTP_STATUS_CODE_INTERNAL_SERVER_ERROR) - - @mock.patch("timesketch.api.v1.resources.OpenSearchDataStore", MockDataStore) - def test_nl2q_no_question(self): - """Test nl2q without submitting a question.""" - - self.login() - data = dict() - response = self.client.post( - self.resource_url, - data=json.dumps(data), - content_type="application/json", - ) - self.assertEqual(response.status_code, HTTP_STATUS_CODE_BAD_REQUEST) - - @mock.patch("timesketch.api.v1.utils.run_aggregator") - @mock.patch("timesketch.api.v1.resources.OpenSearchDataStore", MockDataStore) - def test_nl2q_wrong_llm_provider(self, mock_aggregator): - """Test nl2q with llm provider that does not exist.""" - - self.app.config["LLM_PROVIDER_CONFIGS"] = {"default": {"DoesNotExists": {}}} - self.login() - self.login() - data = dict(question="Question for LLM?") - mock_AggregationResult = mock.MagicMock() - mock_AggregationResult.values = [ - {"data_type": "test:data_type:1"}, - {"data_type": "test:data_type:2"}, - ] - mock_aggregator.return_value = (mock_AggregationResult, {}) - response = self.client.post( - self.resource_url, - data=json.dumps(data), - content_type="application/json", - ) - self.assertEqual(response.status_code, HTTP_STATUS_CODE_OK) - data = json.loads(response.get_data(as_text=True)) - self.assertIsNotNone(data.get("error")) - - @mock.patch("timesketch.api.v1.resources.OpenSearchDataStore", MockDataStore) - def test_nl2q_no_llm_provider(self): - """Test nl2q with no LLM provider configured.""" - - if "LLM_PROVIDER_CONFIGS" in self.app.config: - del self.app.config["LLM_PROVIDER_CONFIGS"] - self.login() - data = dict(question="Question for LLM?") - response = self.client.post( - self.resource_url, - data=json.dumps(data), - content_type="application/json", - ) - self.assertEqual(response.status_code, HTTP_STATUS_CODE_INTERNAL_SERVER_ERROR) - - @mock.patch("timesketch.api.v1.resources.OpenSearchDataStore", MockDataStore) - def test_nl2q_no_sketch(self): - """Test the nl2q with non existing sketch.""" - - self.login() - data = dict(question="Question for LLM?") - response = self.client.post( - "/api/v1/sketches/9999/nl2q/", - data=json.dumps(data), - content_type="application/json", - ) - self.assertEqual(response.status_code, HTTP_STATUS_CODE_NOT_FOUND) - - @mock.patch("timesketch.api.v1.resources.OpenSearchDataStore", MockDataStore) - def test_nl2q_no_permission(self): - """Test the nl2q with no permission on the sketch.""" - - self.login() - data = dict(question="Question for LLM?") - response = self.client.post( - "/api/v1/sketches/2/nl2q/", - data=json.dumps(data), - content_type="application/json", - ) - self.assertEqual(response.status_code, HTTP_STATUS_CODE_FORBIDDEN) - - @mock.patch("timesketch.lib.llms.providers.manager.LLMManager.create_provider") - @mock.patch("timesketch.api.v1.utils.run_aggregator") - @mock.patch("timesketch.api.v1.resources.OpenSearchDataStore", MockDataStore) - def test_nl2q_llm_error(self, mock_aggregator, mock_create_provider): - """Test nl2q with llm error.""" - - self.login() - data = dict(question="Question for LLM?") - mock_AggregationResult = mock.MagicMock() - mock_AggregationResult.values = [ - {"data_type": "test:data_type:1"}, - {"data_type": "test:data_type:2"}, - ] - mock_aggregator.return_value = (mock_AggregationResult, {}) - mock_llm = mock.Mock() - mock_llm.generate.side_effect = Exception("Test exception") - mock_create_provider.return_value = mock_llm - response = self.client.post( - self.resource_url, - data=json.dumps(data), - content_type="application/json", - ) - self.assertEqual( - response.status_code, HTTP_STATUS_CODE_OK - ) # Still expect 200 OK with error in JSON - data = json.loads(response.get_data(as_text=True)) - self.assertIsNotNone(data.get("error")) - - @mock.patch("timesketch.lib.llms.providers.manager.LLMManager.create_provider") - @mock.patch("timesketch.api.v1.utils.run_aggregator") - @mock.patch("timesketch.api.v1.resources.OpenSearchDataStore", MockDataStore) - def test_nl2q_strip_back_ticks(self, mock_aggregator, mock_create_provider): - """Test the result does not have any back tick.""" - - self.login() - data = dict(question="Question for LLM?") - mock_AggregationResult = mock.MagicMock() - mock_AggregationResult.values = [ - {"data_type": "test:data_type:1"}, - {"data_type": "test:data_type:2"}, - ] - mock_aggregator.return_value = (mock_AggregationResult, {}) - expected_input = ( - "Examples:\n" - "example 1\n" - "\n" - "example 2\n" - "Types:\n" - '* "test:data_type:1" -> "field_test_1", "field_test_2"\n' - '* "test:data_type:2" -> "field_test_3", "field_test_4"\n' - "Question:\n" - "Question for LLM?" - ) - - mock_llm_1 = mock.Mock() - mock_llm_1.generate.return_value = " \t`LLM generated query`\n " - mock_create_provider.return_value = mock_llm_1 - response = self.client.post( - self.resource_url, - data=json.dumps(data), - content_type="application/json", - ) - mock_llm_1.generate.assert_called_once_with(expected_input) - self.assertEqual(response.status_code, HTTP_STATUS_CODE_OK) - self.assertDictEqual( - response.json, - { - "name": "AI generated search query", - "query_string": "LLM generated query", - "error": None, - }, - ) - mock_llm_2 = mock.Mock() - mock_llm_2.generate.return_value = "```LLM generated query``" - mock_create_provider.return_value = mock_llm_2 - response = self.client.post( - self.resource_url, - data=json.dumps(data), - content_type="application/json", - ) - mock_llm_2.generate.assert_called_once_with(expected_input) - self.assertEqual(response.status_code, HTTP_STATUS_CODE_OK) - self.assertDictEqual( - response.json, - { - "name": "AI generated search query", - "query_string": "LLM generated query", - "error": None, - }, - ) - mock_llm_3 = mock.Mock() - mock_llm_3.generate.return_value = " \t```LLM generated query```\n " - mock_create_provider.return_value = mock_llm_3 - response = self.client.post( - self.resource_url, - data=json.dumps(data), - content_type="application/json", - ) - mock_llm_3.generate.assert_called_once_with(expected_input) - self.assertEqual(response.status_code, HTTP_STATUS_CODE_OK) - self.assertDictEqual( - response.json, - { - "name": "AI generated search query", - "query_string": "LLM generated query", - "error": None, - }, - ) - - class SystemSettingsResourceTest(BaseTest): """Test system settings resource.""" @@ -1721,71 +1423,6 @@ def test_check_and_run_dfiq_analysis_steps(self, mock_analyzer_manager): self.assertFalse(result) -class MockLLM: - """Mock LLM class for testing.""" - - def generate(self): - return {"summary": "Mock summary from LLM"} - - -class TestLLMSummarizeResource(BaseTest): - """Test LLMSummarizeResource.""" - - resource_url = "/api/v1/sketches/1/events/summary/" - - @mock.patch("timesketch.api.v1.resources.OpenSearchDataStore", MockDataStore) - def test_llm_summarize_no_events(self): - """Test LLM summarizer when no events are returned from the Timesketch query.""" - self.login() - self.app.config["PROMPT_LLM_SUMMARIZATION"] = "data/llm_summarize/prompt.txt" - - with mock.patch( - "timesketch.api.v1.resources.llm_summarize.LLMSummarizeResource._run_timesketch_query", # pylint: disable=line-too-long - return_value=pd.DataFrame(), - ), mock.patch("timesketch.api.v1.resources.OpenSearchDataStore", MockDataStore): - response = self.client.post( - self.resource_url, - data=json.dumps({"query": "*"}), - content_type="application/json", - ) - - self.assertEqual(response.status_code, 200) - response_data = json.loads(response.get_data(as_text=True)) - self.assertEqual( - response_data.get("summary"), - "No events to summarize based on the current filter.", - ) - - @mock.patch("timesketch.api.v1.resources.OpenSearchDataStore", MockDataStore) - @mock.patch("timesketch.lib.llms.providers.manager.LLMManager.create_provider") - def test_llm_summarize_with_events(self, mock_create_provider): - """Test LLM summarizer with events returned and mock LLM.""" - self.login() - self.app.config["PROMPT_LLM_SUMMARIZATION"] = "data/llm_summarize/prompt.txt" - mock_create_provider.return_value = MockLLM() - - sample_events = pd.DataFrame([{"message": "Test event message"}]) - - with mock.patch( - "timesketch.api.v1.resources.llm_summarize.LLMSummarizeResource._run_timesketch_query", # pylint: disable=line-too-long - return_value=sample_events, - ), mock.patch( - "timesketch.api.v1.resources.llm_summarize.LLMSummarizeResource._get_content", # pylint: disable=line-too-long - return_value={"summary": "Mock summary from LLM"}, - ), mock.patch( - "timesketch.api.v1.resources.OpenSearchDataStore", MockDataStore - ): - response = self.client.post( - self.resource_url, - data=json.dumps({"query": "*"}), - content_type="application/json", - ) - - self.assertEqual(response.status_code, 200) - response_data = json.loads(response.get_data(as_text=True)) - self.assertEqual(response_data.get("summary"), "Mock summary from LLM") - - @mock.patch("timesketch.api.v1.resources.OpenSearchDataStore", MockDataStore) class LLMResourceTest(BaseTest): """Test LLMResource.""" diff --git a/timesketch/api/v1/routes.py b/timesketch/api/v1/routes.py index 5bc249ebc5..7a10d8537c 100644 --- a/timesketch/api/v1/routes.py +++ b/timesketch/api/v1/routes.py @@ -76,8 +76,6 @@ from .resources.intelligence import TagMetadataResource from .resources.contextlinks import ContextLinkConfigResource from .resources.unfurl import UnfurlResource -from .resources.nl2q import Nl2qResource -from .resources.llm_summarize import LLMSummarizeResource from .resources.llm import LLMResource from .resources.settings import SystemSettingsResource @@ -203,8 +201,6 @@ (TagMetadataResource, "/intelligence/tagmetadata/"), (ContextLinkConfigResource, "/contextlinks/"), (UnfurlResource, "/unfurl/"), - (Nl2qResource, "/sketches//nl2q/"), - (LLMSummarizeResource, "/sketches//events/summary/"), (LLMResource, "/sketches//llm/"), (SystemSettingsResource, "/settings/"), # Scenario templates diff --git a/timesketch/frontend-ng/src/utils/RestApiClient.js b/timesketch/frontend-ng/src/utils/RestApiClient.js index 86416ebd33..b288e6dbf2 100644 --- a/timesketch/frontend-ng/src/utils/RestApiClient.js +++ b/timesketch/frontend-ng/src/utils/RestApiClient.js @@ -521,13 +521,6 @@ export default { let formData = { settings: settings } return RestApiClient.post('/users/me/settings/', formData) }, - nl2q(sketchId, question) { - let formData = { question: question } - return RestApiClient.post('/sketches/' + sketchId + '/nl2q/', formData) - }, - getEventSummary(sketchId, formData) { - return RestApiClient.post('/sketches/' + sketchId + '/events/summary/', formData) - }, llmRequest(sketchId, featureName, formData) { formData = formData || {} formData.feature = featureName diff --git a/timesketch/lib/llms/features/llm_summarize.py b/timesketch/lib/llms/features/llm_summarize.py index 9a6cf5f457..e776fcb705 100644 --- a/timesketch/lib/llms/features/llm_summarize.py +++ b/timesketch/lib/llms/features/llm_summarize.py @@ -27,16 +27,15 @@ logger = logging.getLogger("timesketch.llm.summarize_feature") -# TODO(itsmvd): Remove 'feature' prefix after migration METRICS = { "llm_summary_events_processed_total": prometheus_client.Counter( - "feature_llm_summary_events_processed_total", # avoid duplicate registration + "llm_summary_events_processed_total", "Total number of events processed for LLM summarization", ["sketch_id"], namespace=METRICS_NAMESPACE, ), "llm_summary_unique_events_total": prometheus_client.Counter( - "feature_llm_summary_unique_events_total", # avoid duplicate registration + "llm_summary_unique_events_total", "Total number of unique events sent to the LLM", ["sketch_id"], namespace=METRICS_NAMESPACE, From 092711fd2bd6e873db646c3078d2c6395741dc11 Mon Sep 17 00:00:00 2001 From: itsmvd Date: Tue, 4 Mar 2025 13:15:10 +0000 Subject: [PATCH 35/63] remove unused import --- timesketch/api/v1/resources_test.py | 1 - 1 file changed, 1 deletion(-) diff --git a/timesketch/api/v1/resources_test.py b/timesketch/api/v1/resources_test.py index 5fdee2ed11..13cdcd5816 100644 --- a/timesketch/api/v1/resources_test.py +++ b/timesketch/api/v1/resources_test.py @@ -17,7 +17,6 @@ import json import mock -import pandas as pd from timesketch.lib.definitions import HTTP_STATUS_CODE_BAD_REQUEST from timesketch.lib.definitions import HTTP_STATUS_CODE_CREATED From 4f37d7ffa2a278ed773eea069c0ce4da5db0cc56 Mon Sep 17 00:00:00 2001 From: itsmvd Date: Wed, 5 Mar 2025 15:44:13 +0000 Subject: [PATCH 36/63] Update RestApiClient in frontend-v3 --- timesketch/frontend-v3/src/utils/RestApiClient.js | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/timesketch/frontend-v3/src/utils/RestApiClient.js b/timesketch/frontend-v3/src/utils/RestApiClient.js index f3c8827e00..a08bcc56d2 100644 --- a/timesketch/frontend-v3/src/utils/RestApiClient.js +++ b/timesketch/frontend-v3/src/utils/RestApiClient.js @@ -672,8 +672,10 @@ export default { let formData = { settings: settings }; return RestApiClient.post("/users/me/settings/", formData); }, - nl2q(sketchId, question) { - let formData = { question: question }; - return RestApiClient.post("/sketches/" + sketchId + "/nl2q/", formData); - }, + llmRequest(sketchId, featureName, formData) { + formData = formData || {} + formData.feature = featureName + + return RestApiClient.post(`/sketches/${sketchId}/llm/`, formData) + } }; From 6549bbdb7ee68a2848730b7b4fc02823c2649f3e Mon Sep 17 00:00:00 2001 From: itsmvd Date: Mon, 10 Mar 2025 08:58:28 +0000 Subject: [PATCH 37/63] stash local changes --- data/llm_summarize/prompt_forensic_report.txt | 14 + timesketch/lib/llms/actions.py | 74 +++++ .../lib/llms/features/llm_forensic_report.py | 305 ++++++++++++++++++ 3 files changed, 393 insertions(+) create mode 100644 data/llm_summarize/prompt_forensic_report.txt create mode 100644 timesketch/lib/llms/actions.py create mode 100644 timesketch/lib/llms/features/llm_forensic_report.py diff --git a/data/llm_summarize/prompt_forensic_report.txt b/data/llm_summarize/prompt_forensic_report.txt new file mode 100644 index 0000000000..8dcfa80962 --- /dev/null +++ b/data/llm_summarize/prompt_forensic_report.txt @@ -0,0 +1,14 @@ +You are a highly skilled digital forensic analyst. Your task is to analyze a set of security events, which have been identified as potentially significant ("starred events") in a Timesketch investigation. Based on these events, generate a concise forensic report summary, formatted in Markdown. + +Focus on identifying: + +* **Incident Overview:** Provide a brief summary of what appears to have happened based on these events. What type of incident is suggested (e.g., unauthorized access, malware infection, data breach attempt)? +* **Key Findings:** Highlight the most important observations and indicators from the events. Be specific and mention key entities (usernames, IP addresses, file paths, process names) involved. +* **Timeline of Significant Events (Chronological Order):** Briefly outline the sequence of key actions observed in the starred events. +* **Potential Impact/Severity:** Assess the potential impact or severity of the incident based on the available information. +* **Recommended Next Steps:** Suggest 2-3 concrete next steps for the investigation based on your analysis. + +Use bolding (**...**) for key entities and findings. Format the output as a Markdown document. + +Here are the starred events in JSON format: + \ No newline at end of file diff --git a/timesketch/lib/llms/actions.py b/timesketch/lib/llms/actions.py new file mode 100644 index 0000000000..98c14f7676 --- /dev/null +++ b/timesketch/lib/llms/actions.py @@ -0,0 +1,74 @@ +# Copyright 2025 Google Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Actions for LLM features in Timesketch.""" +import json +import logging +import time +from typing import Dict, Any +from timesketch.models import db_session +from timesketch.models.sketch import Sketch, Story + +logger = logging.getLogger("timesketch.llm.actions") + +def create_story( + sketch: Sketch, + content: str, + title: str = None +) -> int: + """Creates a Timesketch story with the given content. + + Args: + sketch: Sketch object. + content: Text content to add to the story. + title: Title for the story. If None, a default title with timestamp will be used. + + Returns: + The ID of the newly created story. + + Raises: + ValueError: If there's an error creating the story. + """ + if title is None: + title = f"AI Generated Report - {time.strftime('%Y-%m-%d %H:%M')}" + + try: + # Create the story + story = Story( + title=title, + sketch=sketch, + user=sketch.user + ) + + content_blocks = [ + { + "componentName": "", + "componentProps": {}, + "content": content, + "edit": False, + "showPanel": False, + "isActive": False + } + ] + + story.content = json.dumps(content_blocks) + + db_session.add(story) + db_session.commit() + + logger.info(f"Created story with ID {story.id} for sketch {sketch.id}") + return story.id + + except Exception as e: + logger.error(f"Error creating story: {e}") + raise ValueError(f"Error creating story: {e}") diff --git a/timesketch/lib/llms/features/llm_forensic_report.py b/timesketch/lib/llms/features/llm_forensic_report.py new file mode 100644 index 0000000000..4ff2362076 --- /dev/null +++ b/timesketch/lib/llms/features/llm_forensic_report.py @@ -0,0 +1,305 @@ +# Copyright 2025 Google Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""LLM Forensic Report feature.""" +import json +import logging +import time +from typing import Any, Dict, List, Optional +import pandas as pd +from flask import current_app +from opensearchpy import OpenSearch +from timesketch.lib import utils +from timesketch.api.v1 import export +from timesketch.models import db_session +from timesketch.models.sketch import Sketch +from timesketch.lib.llms import actions +from timesketch.lib.llms.features.interface import LLMFeatureInterface + +logger = logging.getLogger("timesketch.llm.forensic_report_feature") + +class LLMForensicReportFeature(LLMFeatureInterface): + """LLM Forensic Report feature.""" + NAME = "llm_forensic_report" + PROMPT_CONFIG_KEY = "PROMPT_LLM_FORENSIC_REPORT" + + RESPONSE_SCHEMA = { + "type": "object", + "properties": { + "summary": { + "type": "string", + "description": "Detailed forensic report summary of the events" + } + }, + "required": ["summary"] + } + + def _get_prompt_text(self, events_dict: List[Dict[str, Any]]) -> str: + """Reads the prompt template from file and injects events. + + Args: + events_dict: List of event dictionaries to inject into prompt. + + Returns: + str: Complete prompt text with injected events. + + Raises: + ValueError: If the prompt path is not configured or placeholder is missing. + FileNotFoundError: If the prompt file cannot be found. + IOError: If there's an error reading the prompt file. + """ + prompt_file_path = current_app.config.get(self.PROMPT_CONFIG_KEY) + if not prompt_file_path: + logger.error("%s config not set", self.PROMPT_CONFIG_KEY) + raise ValueError("LLM forensic report prompt path not configured.") + + try: + with open(prompt_file_path, "r", encoding="utf-8") as file_handle: + prompt_template = file_handle.read() + except FileNotFoundError as exc: + logger.error("Forensic report prompt file not found: %s", prompt_file_path) + raise FileNotFoundError( + f"LLM Prompt file not found: {prompt_file_path}" + ) from exc + except IOError as e: + logger.error("Error reading prompt file: %s", e) + raise IOError("Error reading LLM prompt file.") from e + + if "" not in prompt_template: + logger.error("Prompt template is missing the placeholder") + raise ValueError( + "LLM forensic report prompt template is missing the " + "required placeholder." + ) + + prompt_text = prompt_template.replace("", json.dumps(events_dict)) + return prompt_text + + def _run_timesketch_query( + self, + sketch: Sketch, + query_string: str = "*", + query_filter: Optional[Dict] = None, + id_list: Optional[List] = None, + datastore: Optional[OpenSearch] = None, + timeline_ids: Optional[List] = None, + ) -> pd.DataFrame: + """Runs a timesketch query and returns results as a DataFrame. + + Args: + sketch: The Sketch object to query. + query_string: Search query string. + query_filter: Dictionary with filter parameters. + id_list: List of event IDs to retrieve. + datastore: OpenSearch instance for querying. + timeline_ids: List of timeline IDs to query. + + Returns: + pd.DataFrame: DataFrame containing query results. + + Raises: + ValueError: If datastore is not provided or no valid indices are found. + """ + if datastore is None: + raise ValueError("Datastore must be provided.") + + if not query_filter: + query_filter = {} + + if id_list: + id_query = " OR ".join([f'_id:"{event_id}"' for event_id in id_list]) + query_string = id_query + + all_indices = list({t.searchindex.index_name for t in sketch.timelines}) + indices_from_filter = query_filter.get("indices", all_indices) + + if "_all" in indices_from_filter: + indices_from_filter = all_indices + + indices, timeline_ids = utils.get_validated_indices(indices_from_filter, sketch) + + if not indices: + raise ValueError( + "No valid search indices were found to perform the search on." + ) + + result = datastore.search( + sketch_id=sketch.id, + query_string=query_string, + query_filter=query_filter, + query_dsl="", + indices=indices, + timeline_ids=timeline_ids, + ) + + logger.info("Number of hits from datastore search: %d", len(result)) + return export.query_results_to_dataframe(result, sketch) + + def generate_prompt(self, sketch: Sketch, **kwargs: Any) -> str: + """Generates the forensic report prompt based on events from a query. + + Args: + sketch: The Sketch object containing events to analyze. + **kwargs: Additional arguments including: + - form: Form data containing query and filter information. + - datastore: OpenSearch instance for querying. + - timeline_ids: List of timeline IDs to query. + + Returns: + str: Generated prompt text with events to analyze. + + Raises: + ValueError: If required parameters are missing or if no events are found. + """ + form = kwargs.get("form") + datastore = kwargs.get("datastore") + timeline_ids = kwargs.get("timeline_ids") + + if not form: + raise ValueError("Missing 'form' data in kwargs") + + query_filter = form.get("filter", {}) + query_string = form.get("query", "*") or "*" + + events_df = self._run_timesketch_query( + sketch, + query_string, + query_filter, + datastore=datastore, + timeline_ids=timeline_ids, + ) + + if events_df is None or events_df.empty: + return "No events to analyze for forensic report." + + # Ensure 'datetime' column exists and convert to datetime objects + if 'datetime' not in events_df.columns: + logger.error("The 'datetime' column is missing in the events DataFrame.") + raise ValueError("The 'datetime' column is missing in the events DataFrame.") + + # Convert 'datetime' column to datetime objects, handling potential errors + try: + events_df['datetime'] = pd.to_datetime(events_df['datetime'], errors='raise') + except Exception as e: + logger.error("Error converting 'datetime' column: %s", e) + raise ValueError(f"Error converting 'datetime' column to datetime objects: {e}") + + # Create a combined key of timestamp and message to uniquely identify events + events_df['combined_key'] = events_df['datetime'].astype(str) + events_df['message'] + + # Drop duplicates based on the combined key + unique_df = events_df.drop_duplicates(subset='combined_key', keep='first') + + # Convert datetime to string BEFORE creating the dictionary + unique_df['datetime_str'] = unique_df['datetime'].astype(str) + + # Prepare the unique events for the LLM prompt, include timestamp string + events_dict = unique_df[['datetime_str', 'message']].rename( + columns={'datetime_str': 'datetime'}).to_dict(orient="records") + + total_events_count = len(events_df) + unique_events_count = len(unique_df) + + logger.info( + "Analyzing events for forensic report: %d events", + total_events_count, + ) + logger.info("Reduced to %d unique events", unique_events_count) + + if not events_dict: + return "No events to analyze for forensic report." + + return self._get_prompt_text(events_dict) + + def process_response(self, llm_response: Any, **kwargs: Any) -> Dict[str, Any]: + """Processes the LLM response and creates a Story in the sketch. + + Args: + llm_response: The response from the LLM model, expected to be a dictionary. + **kwargs: Additional arguments including: + - sketch_id: ID of the sketch being processed. + - sketch: The Sketch object. + - form: Form data containing query and filter information. + - datastore: OpenSearch instance for querying. + - timeline_ids: List of timeline IDs to query. + + Returns: + Dictionary containing the processed response: + - summary: The forensic report text + - summary_event_count: Total number of events analyzed + - summary_unique_event_count: Number of unique events analyzed + - story_id: ID of the created story + + Raises: + ValueError: If required parameters are missing or if the LLM response + is not in the expected format. + """ + sketch = kwargs.get("sketch") + form = kwargs.get("form") + datastore = kwargs.get("datastore") + timeline_ids = kwargs.get("timeline_ids") + + if not sketch: + raise ValueError("Missing 'sketch' in kwargs") + + if not form: + raise ValueError("Missing 'form' data in kwargs") + + if not isinstance(llm_response, dict): + raise ValueError("LLM response is expected to be a dictionary") + + summary_text = llm_response.get("summary") + if summary_text is None: + raise ValueError("LLM response missing 'summary' key") + + # Recalculate event counts for metrics in the response + query_filter = form.get("filter", {}) + query_string = form.get("query", "*") or "*" + + events_df = self._run_timesketch_query( + sketch, + query_string, + query_filter, + datastore=datastore, + timeline_ids=timeline_ids, + ) + + total_events_count = len(events_df) + + # For unique count, use the same logic as in generate_prompt + if 'datetime' in events_df.columns: + events_df['datetime'] = pd.to_datetime(events_df['datetime'], errors='coerce') + events_df['combined_key'] = events_df['datetime'].astype(str) + events_df['message'] + unique_events_count = len(events_df.drop_duplicates(subset='combined_key', keep='first')) + else: + unique_events_count = len(events_df.drop_duplicates(subset='message', keep='first')) + + # Create a story using the actions module + try: + # Create the story with a specific title for forensic reports + story_title = f"Forensic Report - {time.strftime('%Y-%m-%d %H:%M')}" + story_id = actions.create_story( + sketch=sketch, + content=summary_text, + title=story_title + ) + except Exception as e: + logger.error("Error creating story for forensic report: %s", e) + raise ValueError(f"Error creating story to save forensic report: {e}") from e + + return { + "summary": summary_text, + "summary_event_count": total_events_count, + "summary_unique_event_count": unique_events_count, + "story_id": story_id + } From 9d8e343b1c41d95416ee535c0850d9e7a42b8c8b Mon Sep 17 00:00:00 2001 From: itsmvd Date: Fri, 28 Feb 2025 17:00:00 +0000 Subject: [PATCH 38/63] Add nl2q and llm_summarize as LLM features --- timesketch/lib/llms/features/llm_summarize.py | 73 ++++++------------- .../lib/llms/features/llm_summarize_test.py | 14 ---- timesketch/lib/llms/features/nl2q_test.py | 1 - 3 files changed, 22 insertions(+), 66 deletions(-) diff --git a/timesketch/lib/llms/features/llm_summarize.py b/timesketch/lib/llms/features/llm_summarize.py index e776fcb705..65402d7087 100644 --- a/timesketch/lib/llms/features/llm_summarize.py +++ b/timesketch/lib/llms/features/llm_summarize.py @@ -16,79 +16,53 @@ import logging from typing import Any, Optional import pandas as pd -import prometheus_client from flask import current_app from opensearchpy import OpenSearch from timesketch.lib import utils from timesketch.api.v1 import export from timesketch.models.sketch import Sketch -from timesketch.lib.definitions import METRICS_NAMESPACE from timesketch.lib.llms.features.interface import LLMFeatureInterface logger = logging.getLogger("timesketch.llm.summarize_feature") -METRICS = { - "llm_summary_events_processed_total": prometheus_client.Counter( - "llm_summary_events_processed_total", - "Total number of events processed for LLM summarization", - ["sketch_id"], - namespace=METRICS_NAMESPACE, - ), - "llm_summary_unique_events_total": prometheus_client.Counter( - "llm_summary_unique_events_total", - "Total number of unique events sent to the LLM", - ["sketch_id"], - namespace=METRICS_NAMESPACE, - ), -} - class LLMSummarizeFeature(LLMFeatureInterface): """LLM Summarization feature.""" NAME = "llm_summarize" - PROMPT_CONFIG_KEY = "PROMPT_LLM_SUMMARIZATION" RESPONSE_SCHEMA = { "type": "object", "properties": {"summary": {"type": "string"}}, "required": ["summary"], } - def _get_prompt_text(self, events_dict: list[dict[str, Any]]) -> str: + def _get_prompt_text(self, events_dict: list) -> str: """Reads the prompt template from file and injects events. + Args: events_dict: List of event dictionaries to inject into prompt. + Returns: str: Complete prompt text with injected events. + Raises: - ValueError: If the prompt path is not configured or placeholder is missing. + ValueError: If the prompt path is not configured. FileNotFoundError: If the prompt file cannot be found. IOError: If there's an error reading the prompt file. """ - prompt_file_path = current_app.config.get(self.PROMPT_CONFIG_KEY) + prompt_file_path = current_app.config.get("PROMPT_LLM_SUMMARIZATION") if not prompt_file_path: - logger.error("%s config not set", {self.PROMPT_CONFIG_KEY}) + logger.error("PROMPT_LLM_SUMMARIZATION config not set") raise ValueError("LLM summarization prompt path not configured.") - try: with open(prompt_file_path, "r", encoding="utf-8") as file_handle: prompt_template = file_handle.read() - except FileNotFoundError as exc: + except FileNotFoundError: logger.error("Prompt file not found: %s", prompt_file_path) - raise FileNotFoundError( - f"LLM Prompt file not found: {prompt_file_path}" - ) from exc + raise FileNotFoundError(f"LLM Prompt file not found: {prompt_file_path}") except IOError as e: logger.error("Error reading prompt file: %s", e) raise IOError("Error reading LLM prompt file.") from e - - if "" not in prompt_template: - logger.error("Prompt template is missing the placeholder") - raise ValueError( - "LLM summarization prompt template is missing the " - "required placeholder." - ) - prompt_text = prompt_template.replace("", json.dumps(events_dict)) return prompt_text @@ -102,6 +76,7 @@ def _run_timesketch_query( timeline_ids: Optional[list] = None, ) -> pd.DataFrame: """Runs a timesketch query and returns results as a DataFrame. + Args: sketch: The Sketch object to query. query_string: Search query string. @@ -109,8 +84,10 @@ def _run_timesketch_query( id_list: List of event IDs to retrieve. datastore: OpenSearch instance for querying. timeline_ids: List of timeline IDs to query. + Returns: pd.DataFrame: DataFrame containing query results. + Raises: ValueError: If datastore is not provided or no valid indices are found. """ @@ -142,14 +119,17 @@ def _run_timesketch_query( def generate_prompt(self, sketch: Sketch, **kwargs: Any) -> str: """Generates the summarization prompt based on events from a query. + Args: sketch: The Sketch object containing events to summarize. **kwargs: Additional arguments including: - form: Form data containing query and filter information. - datastore: OpenSearch instance for querying. - timeline_ids: List of timeline IDs to query. + Returns: str: Generated prompt text with events to summarize. + Raises: ValueError: If required parameters are missing or if no events are found. """ @@ -169,28 +149,17 @@ def generate_prompt(self, sketch: Sketch, **kwargs: Any) -> str: ) if events_df is None or events_df.empty: return "No events to summarize based on the current filter." - - total_events_count = len(events_df) - METRICS["llm_summary_events_processed_total"].labels( - sketch_id=str(sketch.id) - ).inc(total_events_count) - unique_events_df = events_df[["message"]].drop_duplicates( subset="message", keep="first" ) - unique_events_count = len(unique_events_df) - METRICS["llm_summary_unique_events_total"].labels(sketch_id=str(sketch.id)).inc( - unique_events_count - ) - - events = unique_events_df.to_dict(orient="records") - if not events: + events_dict = unique_events_df.to_dict(orient="records") + if not events_dict: return "No events to summarize based on the current filter." - - return self._get_prompt_text(events) + return self._get_prompt_text(events_dict) def process_response(self, llm_response: Any, **kwargs: Any) -> dict[str, Any]: """Processes the LLM response and adds additional context information. + Args: llm_response: The response from the LLM model, expected to be a dictionary. **kwargs: Additional arguments including: @@ -199,11 +168,13 @@ def process_response(self, llm_response: Any, **kwargs: Any) -> dict[str, Any]: - datastore: OpenSearch instance for querying. - timeline_ids: List of timeline IDs. - form: Form data containing query and filter information. + Returns: - Dictionary containing the processed response with additional context: + dict[str, Any]: Dictionary containing the processed response with additional context: - response: The summary text. - summary_event_count: Total number of events summarized. - summary_unique_event_count: Number of unique events summarized. + Raises: ValueError: If required parameters are missing or if the LLM response is not in the expected format. diff --git a/timesketch/lib/llms/features/llm_summarize_test.py b/timesketch/lib/llms/features/llm_summarize_test.py index 073ca07479..4946f118f0 100644 --- a/timesketch/lib/llms/features/llm_summarize_test.py +++ b/timesketch/lib/llms/features/llm_summarize_test.py @@ -22,7 +22,6 @@ from timesketch.lib.llms.features.llm_summarize import LLMSummarizeFeature -# pylint: disable=protected-access class TestLLMSummarizeFeature(BaseTest): """Tests for the LLMSummarizeFeature.""" @@ -45,19 +44,6 @@ def test_get_prompt_text(self): self.assertEqual(prompt, f"Analyze these events: {json.dumps(events_dict)}") - @mock.patch( - "builtins.open", - mock.mock_open(read_data="Analyze these events without placeholder"), - ) - def test_get_prompt_text_missing_placeholder(self): - """Tests _get_prompt_text method with missing placeholder.""" - events_dict = [{"message": "Test event"}] - with self.assertRaises(ValueError) as context: - self.llm_feature._get_prompt_text(events_dict) - self.assertIn( - "missing the required placeholder", str(context.exception) - ) - def test_get_prompt_text_missing_file(self): """Tests _get_prompt_text method with missing file.""" current_app.config["PROMPT_LLM_SUMMARIZATION"] = "/file_does_not_exist.txt" diff --git a/timesketch/lib/llms/features/nl2q_test.py b/timesketch/lib/llms/features/nl2q_test.py index c902cad527..684a5f54fc 100644 --- a/timesketch/lib/llms/features/nl2q_test.py +++ b/timesketch/lib/llms/features/nl2q_test.py @@ -20,7 +20,6 @@ from timesketch.lib.llms.features.nl2q import Nl2qFeature -# pylint: disable=protected-access class TestNl2qFeature(BaseTest): """Tests for the Nl2qFeature.""" From 1a213a208f10d370b44a7888a94f84453cd4eec9 Mon Sep 17 00:00:00 2001 From: itsmvd Date: Fri, 28 Feb 2025 17:08:04 +0000 Subject: [PATCH 39/63] Couple of linter fixes on llm_summarize --- timesketch/lib/llms/features/llm_summarize.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/timesketch/lib/llms/features/llm_summarize.py b/timesketch/lib/llms/features/llm_summarize.py index 65402d7087..786ba0e1b4 100644 --- a/timesketch/lib/llms/features/llm_summarize.py +++ b/timesketch/lib/llms/features/llm_summarize.py @@ -57,9 +57,11 @@ def _get_prompt_text(self, events_dict: list) -> str: try: with open(prompt_file_path, "r", encoding="utf-8") as file_handle: prompt_template = file_handle.read() - except FileNotFoundError: + except FileNotFoundError as exc: logger.error("Prompt file not found: %s", prompt_file_path) - raise FileNotFoundError(f"LLM Prompt file not found: {prompt_file_path}") + raise FileNotFoundError( + f"LLM Prompt file not found: {prompt_file_path}" + ) from exc except IOError as e: logger.error("Error reading prompt file: %s", e) raise IOError("Error reading LLM prompt file.") from e @@ -170,7 +172,7 @@ def process_response(self, llm_response: Any, **kwargs: Any) -> dict[str, Any]: - form: Form data containing query and filter information. Returns: - dict[str, Any]: Dictionary containing the processed response with additional context: + Dictionary containing the processed response with additional context: - response: The summary text. - summary_event_count: Total number of events summarized. - summary_unique_event_count: Number of unique events summarized. From 3f9c7f0101d3ce5202f9accd0ded76686f507ba9 Mon Sep 17 00:00:00 2001 From: itsmvd Date: Fri, 28 Feb 2025 17:13:17 +0000 Subject: [PATCH 40/63] pylint: disable=protected-access --- timesketch/lib/llms/features/llm_summarize_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/timesketch/lib/llms/features/llm_summarize_test.py b/timesketch/lib/llms/features/llm_summarize_test.py index 4946f118f0..186bad0f1f 100644 --- a/timesketch/lib/llms/features/llm_summarize_test.py +++ b/timesketch/lib/llms/features/llm_summarize_test.py @@ -21,7 +21,7 @@ from timesketch.lib.testlib import MockDataStore from timesketch.lib.llms.features.llm_summarize import LLMSummarizeFeature - +#pylint: disable=protected-access class TestLLMSummarizeFeature(BaseTest): """Tests for the LLMSummarizeFeature.""" From 2baf0af012b2d7b8ebf98790b706f4c8d6d256b2 Mon Sep 17 00:00:00 2001 From: itsmvd Date: Fri, 28 Feb 2025 17:15:35 +0000 Subject: [PATCH 41/63] black formatting --- timesketch/lib/llms/features/llm_summarize_test.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/timesketch/lib/llms/features/llm_summarize_test.py b/timesketch/lib/llms/features/llm_summarize_test.py index 186bad0f1f..c103321aff 100644 --- a/timesketch/lib/llms/features/llm_summarize_test.py +++ b/timesketch/lib/llms/features/llm_summarize_test.py @@ -21,7 +21,8 @@ from timesketch.lib.testlib import MockDataStore from timesketch.lib.llms.features.llm_summarize import LLMSummarizeFeature -#pylint: disable=protected-access + +# pylint: disable=protected-access class TestLLMSummarizeFeature(BaseTest): """Tests for the LLMSummarizeFeature.""" From e46be9748d28d870dce81815deb7e088010520e5 Mon Sep 17 00:00:00 2001 From: itsmvd Date: Fri, 28 Feb 2025 17:20:04 +0000 Subject: [PATCH 42/63] # pylint: disable=protected-access --- timesketch/lib/llms/features/nl2q_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/timesketch/lib/llms/features/nl2q_test.py b/timesketch/lib/llms/features/nl2q_test.py index 684a5f54fc..e3e52d470d 100644 --- a/timesketch/lib/llms/features/nl2q_test.py +++ b/timesketch/lib/llms/features/nl2q_test.py @@ -19,7 +19,7 @@ from timesketch.lib.testlib import BaseTest from timesketch.lib.llms.features.nl2q import Nl2qFeature - +# pylint: disable=protected-access class TestNl2qFeature(BaseTest): """Tests for the Nl2qFeature.""" From be0b2ef0a190593ba9388db8f908c6766f147a39 Mon Sep 17 00:00:00 2001 From: itsmvd Date: Fri, 28 Feb 2025 17:22:54 +0000 Subject: [PATCH 43/63] formatting on nl2q --- timesketch/lib/llms/features/nl2q_test.py | 1 + 1 file changed, 1 insertion(+) diff --git a/timesketch/lib/llms/features/nl2q_test.py b/timesketch/lib/llms/features/nl2q_test.py index e3e52d470d..c902cad527 100644 --- a/timesketch/lib/llms/features/nl2q_test.py +++ b/timesketch/lib/llms/features/nl2q_test.py @@ -19,6 +19,7 @@ from timesketch.lib.testlib import BaseTest from timesketch.lib.llms.features.nl2q import Nl2qFeature + # pylint: disable=protected-access class TestNl2qFeature(BaseTest): """Tests for the Nl2qFeature.""" From 56bcd6d56047160f65b19885d3856fee7c0b2b1b Mon Sep 17 00:00:00 2001 From: itsmvd Date: Sat, 1 Mar 2025 09:34:33 +0000 Subject: [PATCH 44/63] add feature specific metrics --- timesketch/lib/llms/features/llm_summarize.py | 44 ++++++++++++++----- 1 file changed, 32 insertions(+), 12 deletions(-) diff --git a/timesketch/lib/llms/features/llm_summarize.py b/timesketch/lib/llms/features/llm_summarize.py index 786ba0e1b4..1daca5e84d 100644 --- a/timesketch/lib/llms/features/llm_summarize.py +++ b/timesketch/lib/llms/features/llm_summarize.py @@ -16,15 +16,33 @@ import logging from typing import Any, Optional import pandas as pd +import prometheus_client from flask import current_app from opensearchpy import OpenSearch from timesketch.lib import utils from timesketch.api.v1 import export from timesketch.models.sketch import Sketch +from timesketch.lib.definitions import METRICS_NAMESPACE from timesketch.lib.llms.features.interface import LLMFeatureInterface logger = logging.getLogger("timesketch.llm.summarize_feature") +# TODO(itsmvd): Remove 'feature' prefix after migration +METRICS = { + "llm_summary_events_processed_total": prometheus_client.Counter( + "feature_llm_summary_events_processed_total", # avoid duplicate registration + "Total number of events processed for LLM summarization", + ["sketch_id"], + namespace=METRICS_NAMESPACE, + ), + "llm_summary_unique_events_total": prometheus_client.Counter( + "feature_llm_summary_unique_events_total", # avoid duplicate registration + "Total number of unique events sent to the LLM", + ["sketch_id"], + namespace=METRICS_NAMESPACE, + ), +} + class LLMSummarizeFeature(LLMFeatureInterface): """LLM Summarization feature.""" @@ -38,13 +56,10 @@ class LLMSummarizeFeature(LLMFeatureInterface): def _get_prompt_text(self, events_dict: list) -> str: """Reads the prompt template from file and injects events. - Args: events_dict: List of event dictionaries to inject into prompt. - Returns: str: Complete prompt text with injected events. - Raises: ValueError: If the prompt path is not configured. FileNotFoundError: If the prompt file cannot be found. @@ -78,7 +93,6 @@ def _run_timesketch_query( timeline_ids: Optional[list] = None, ) -> pd.DataFrame: """Runs a timesketch query and returns results as a DataFrame. - Args: sketch: The Sketch object to query. query_string: Search query string. @@ -86,10 +100,8 @@ def _run_timesketch_query( id_list: List of event IDs to retrieve. datastore: OpenSearch instance for querying. timeline_ids: List of timeline IDs to query. - Returns: pd.DataFrame: DataFrame containing query results. - Raises: ValueError: If datastore is not provided or no valid indices are found. """ @@ -121,17 +133,14 @@ def _run_timesketch_query( def generate_prompt(self, sketch: Sketch, **kwargs: Any) -> str: """Generates the summarization prompt based on events from a query. - Args: sketch: The Sketch object containing events to summarize. **kwargs: Additional arguments including: - form: Form data containing query and filter information. - datastore: OpenSearch instance for querying. - timeline_ids: List of timeline IDs to query. - Returns: str: Generated prompt text with events to summarize. - Raises: ValueError: If required parameters are missing or if no events are found. """ @@ -151,17 +160,30 @@ def generate_prompt(self, sketch: Sketch, **kwargs: Any) -> str: ) if events_df is None or events_df.empty: return "No events to summarize based on the current filter." + + # Count and record total events + total_events_count = len(events_df) + METRICS["llm_summary_events_processed_total"].labels( + sketch_id=str(sketch.id) + ).inc(total_events_count) + + # Get unique events, count and record them unique_events_df = events_df[["message"]].drop_duplicates( subset="message", keep="first" ) + unique_events_count = len(unique_events_df) + METRICS["llm_summary_unique_events_total"].labels(sketch_id=str(sketch.id)).inc( + unique_events_count + ) + events_dict = unique_events_df.to_dict(orient="records") if not events_dict: return "No events to summarize based on the current filter." + return self._get_prompt_text(events_dict) def process_response(self, llm_response: Any, **kwargs: Any) -> dict[str, Any]: """Processes the LLM response and adds additional context information. - Args: llm_response: The response from the LLM model, expected to be a dictionary. **kwargs: Additional arguments including: @@ -170,13 +192,11 @@ def process_response(self, llm_response: Any, **kwargs: Any) -> dict[str, Any]: - datastore: OpenSearch instance for querying. - timeline_ids: List of timeline IDs. - form: Form data containing query and filter information. - Returns: Dictionary containing the processed response with additional context: - response: The summary text. - summary_event_count: Total number of events summarized. - summary_unique_event_count: Number of unique events summarized. - Raises: ValueError: If required parameters are missing or if the LLM response is not in the expected format. From 09bca8b9d86cab010b88587688231f0153e0e17c Mon Sep 17 00:00:00 2001 From: itsmvd Date: Sat, 1 Mar 2025 09:36:56 +0000 Subject: [PATCH 45/63] remove unnecessary comments --- timesketch/lib/llms/features/llm_summarize.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/timesketch/lib/llms/features/llm_summarize.py b/timesketch/lib/llms/features/llm_summarize.py index 1daca5e84d..695a5a3c7b 100644 --- a/timesketch/lib/llms/features/llm_summarize.py +++ b/timesketch/lib/llms/features/llm_summarize.py @@ -161,13 +161,11 @@ def generate_prompt(self, sketch: Sketch, **kwargs: Any) -> str: if events_df is None or events_df.empty: return "No events to summarize based on the current filter." - # Count and record total events total_events_count = len(events_df) METRICS["llm_summary_events_processed_total"].labels( sketch_id=str(sketch.id) ).inc(total_events_count) - # Get unique events, count and record them unique_events_df = events_df[["message"]].drop_duplicates( subset="message", keep="first" ) From 5478dcd5cf14c35aea41ad6b2643a32218ee3461 Mon Sep 17 00:00:00 2001 From: itsmvd Date: Tue, 4 Mar 2025 09:23:14 +0000 Subject: [PATCH 46/63] review fixes --- timesketch/lib/llms/features/llm_summarize.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/timesketch/lib/llms/features/llm_summarize.py b/timesketch/lib/llms/features/llm_summarize.py index 695a5a3c7b..7c38339214 100644 --- a/timesketch/lib/llms/features/llm_summarize.py +++ b/timesketch/lib/llms/features/llm_summarize.py @@ -48,16 +48,17 @@ class LLMSummarizeFeature(LLMFeatureInterface): """LLM Summarization feature.""" NAME = "llm_summarize" + PROMPT_CONFIG_KEY = "PROMPT_LLM_SUMMARIZATION" RESPONSE_SCHEMA = { "type": "object", "properties": {"summary": {"type": "string"}}, "required": ["summary"], } - def _get_prompt_text(self, events_dict: list) -> str: + def _get_prompt_text(self, events: list[dict[str, Any]]) -> str: """Reads the prompt template from file and injects events. Args: - events_dict: List of event dictionaries to inject into prompt. + events: List of event dictionaries to inject into prompt. Returns: str: Complete prompt text with injected events. Raises: @@ -65,10 +66,11 @@ def _get_prompt_text(self, events_dict: list) -> str: FileNotFoundError: If the prompt file cannot be found. IOError: If there's an error reading the prompt file. """ - prompt_file_path = current_app.config.get("PROMPT_LLM_SUMMARIZATION") + prompt_file_path = current_app.config.get(self.PROMPT_CONFIG_KEY) if not prompt_file_path: - logger.error("PROMPT_LLM_SUMMARIZATION config not set") + logger.error("%s config not set", {self.PROMPT_CONFIG_KEY}) raise ValueError("LLM summarization prompt path not configured.") + try: with open(prompt_file_path, "r", encoding="utf-8") as file_handle: prompt_template = file_handle.read() @@ -80,7 +82,8 @@ def _get_prompt_text(self, events_dict: list) -> str: except IOError as e: logger.error("Error reading prompt file: %s", e) raise IOError("Error reading LLM prompt file.") from e - prompt_text = prompt_template.replace("", json.dumps(events_dict)) + + prompt_text = prompt_template.replace("", json.dumps(events)) return prompt_text def _run_timesketch_query( @@ -174,11 +177,11 @@ def generate_prompt(self, sketch: Sketch, **kwargs: Any) -> str: unique_events_count ) - events_dict = unique_events_df.to_dict(orient="records") - if not events_dict: + events = unique_events_df.to_dict(orient="records") + if not events: return "No events to summarize based on the current filter." - return self._get_prompt_text(events_dict) + return self._get_prompt_text(events) def process_response(self, llm_response: Any, **kwargs: Any) -> dict[str, Any]: """Processes the LLM response and adds additional context information. From 4f9a0b7d36916c2384ced8e1e2540972d9c69675 Mon Sep 17 00:00:00 2001 From: itsmvd Date: Tue, 4 Mar 2025 10:04:08 +0000 Subject: [PATCH 47/63] Handle incorrect prompt file + test --- timesketch/lib/llms/features/llm_summarize.py | 15 +++++++++++---- .../lib/llms/features/llm_summarize_test.py | 13 +++++++++++++ 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/timesketch/lib/llms/features/llm_summarize.py b/timesketch/lib/llms/features/llm_summarize.py index 7c38339214..9a6cf5f457 100644 --- a/timesketch/lib/llms/features/llm_summarize.py +++ b/timesketch/lib/llms/features/llm_summarize.py @@ -55,14 +55,14 @@ class LLMSummarizeFeature(LLMFeatureInterface): "required": ["summary"], } - def _get_prompt_text(self, events: list[dict[str, Any]]) -> str: + def _get_prompt_text(self, events_dict: list[dict[str, Any]]) -> str: """Reads the prompt template from file and injects events. Args: - events: List of event dictionaries to inject into prompt. + events_dict: List of event dictionaries to inject into prompt. Returns: str: Complete prompt text with injected events. Raises: - ValueError: If the prompt path is not configured. + ValueError: If the prompt path is not configured or placeholder is missing. FileNotFoundError: If the prompt file cannot be found. IOError: If there's an error reading the prompt file. """ @@ -83,7 +83,14 @@ def _get_prompt_text(self, events: list[dict[str, Any]]) -> str: logger.error("Error reading prompt file: %s", e) raise IOError("Error reading LLM prompt file.") from e - prompt_text = prompt_template.replace("", json.dumps(events)) + if "" not in prompt_template: + logger.error("Prompt template is missing the placeholder") + raise ValueError( + "LLM summarization prompt template is missing the " + "required placeholder." + ) + + prompt_text = prompt_template.replace("", json.dumps(events_dict)) return prompt_text def _run_timesketch_query( diff --git a/timesketch/lib/llms/features/llm_summarize_test.py b/timesketch/lib/llms/features/llm_summarize_test.py index c103321aff..073ca07479 100644 --- a/timesketch/lib/llms/features/llm_summarize_test.py +++ b/timesketch/lib/llms/features/llm_summarize_test.py @@ -45,6 +45,19 @@ def test_get_prompt_text(self): self.assertEqual(prompt, f"Analyze these events: {json.dumps(events_dict)}") + @mock.patch( + "builtins.open", + mock.mock_open(read_data="Analyze these events without placeholder"), + ) + def test_get_prompt_text_missing_placeholder(self): + """Tests _get_prompt_text method with missing placeholder.""" + events_dict = [{"message": "Test event"}] + with self.assertRaises(ValueError) as context: + self.llm_feature._get_prompt_text(events_dict) + self.assertIn( + "missing the required placeholder", str(context.exception) + ) + def test_get_prompt_text_missing_file(self): """Tests _get_prompt_text method with missing file.""" current_app.config["PROMPT_LLM_SUMMARIZATION"] = "/file_does_not_exist.txt" From a82d776e49a7a128bdc5145123bcb0512d21f21d Mon Sep 17 00:00:00 2001 From: itsmvd Date: Tue, 4 Mar 2025 10:23:47 +0000 Subject: [PATCH 48/63] frontend: LLM features switch to new llm endpoint --- .../src/components/Scenarios/QuestionCard.vue | 49 ++++++++----------- 1 file changed, 20 insertions(+), 29 deletions(-) diff --git a/timesketch/frontend-ng/src/components/Scenarios/QuestionCard.vue b/timesketch/frontend-ng/src/components/Scenarios/QuestionCard.vue index de5eccf9f1..40068253e7 100644 --- a/timesketch/frontend-ng/src/components/Scenarios/QuestionCard.vue +++ b/timesketch/frontend-ng/src/components/Scenarios/QuestionCard.vue @@ -404,17 +404,17 @@ export default { }, methods: { getSuggestedQuery() { - this.suggestedQueryLoading = true - let formData = { question: this.activeQuestion.display_name } - ApiClient.llmRequest(this.sketch.id, 'nl2q', formData) - .then((response) => { - this.suggestedQuery = response.data - this.suggestedQueryLoading = false - }) - .catch((e) => { - console.error(e) - }) - }, + this.suggestedQueryLoading = true + let formData = { question: this.activeQuestion.display_name } + ApiClient.llmRequest(this.sketch.id, 'nl2q', formData) + .then((response) => { + this.suggestedQuery = response.data + this.suggestedQueryLoading = false + }) + .catch((e) => { + console.error(e) + }) + }, getQuestionTemplates() { this.isLoading = true ApiClient.getQuestionTemplates() @@ -508,24 +508,15 @@ export default { this.suggestedQuery = {} // Set active tab - if (this.userSettings.generateQuery && this.systemSettings.LLM_PROVIDER) { - if (this.activeQuestion.conclusions.length) { - this.activeTab = 2 - } else { - this.activeTab = 0 - } - } else { - if (this.activeQuestion.conclusions.length) { - this.activeTab = 2 - } else if (this.allSuggestedQueries.length) { - this.activeTab = 0 - } else if (this.activeQuestion.approaches.length) { - this.activeTab = 1 - } else { - this.activeTab = 2 - } - } - + if (this.activeQuestion.conclusions.length) { + this.activeTab = 2 + } else if (this.allSuggestedQueries.length) { + this.activeTab = 0 + } else if (question.approaches.length) { + this.activeTab = 1 + } else { + this.activeTab = 2 + } let payload = { scenarioId: null, From 75f55f15d22275b0b1df19a4f725837a855d81aa Mon Sep 17 00:00:00 2001 From: itsmvd Date: Tue, 4 Mar 2025 10:27:17 +0000 Subject: [PATCH 49/63] layout fix --- .../src/components/Scenarios/QuestionCard.vue | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/timesketch/frontend-ng/src/components/Scenarios/QuestionCard.vue b/timesketch/frontend-ng/src/components/Scenarios/QuestionCard.vue index 40068253e7..601a045611 100644 --- a/timesketch/frontend-ng/src/components/Scenarios/QuestionCard.vue +++ b/timesketch/frontend-ng/src/components/Scenarios/QuestionCard.vue @@ -404,17 +404,17 @@ export default { }, methods: { getSuggestedQuery() { - this.suggestedQueryLoading = true - let formData = { question: this.activeQuestion.display_name } - ApiClient.llmRequest(this.sketch.id, 'nl2q', formData) - .then((response) => { - this.suggestedQuery = response.data - this.suggestedQueryLoading = false - }) - .catch((e) => { - console.error(e) - }) - }, + this.suggestedQueryLoading = true + let formData = { question: this.activeQuestion.display_name } + ApiClient.llmRequest(this.sketch.id, 'nl2q', formData) + .then((response) => { + this.suggestedQuery = response.data + this.suggestedQueryLoading = false + }) + .catch((e) => { + console.error(e) + }) + }, getQuestionTemplates() { this.isLoading = true ApiClient.getQuestionTemplates() From 317eec821dc131aed5c1a1de533499a3c48dcfce Mon Sep 17 00:00:00 2001 From: itsmvd Date: Tue, 4 Mar 2025 10:41:16 +0000 Subject: [PATCH 50/63] Remove nl2q & llm_summarize features from the API --- timesketch/lib/llms/features/llm_summarize.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/timesketch/lib/llms/features/llm_summarize.py b/timesketch/lib/llms/features/llm_summarize.py index 9a6cf5f457..e776fcb705 100644 --- a/timesketch/lib/llms/features/llm_summarize.py +++ b/timesketch/lib/llms/features/llm_summarize.py @@ -27,16 +27,15 @@ logger = logging.getLogger("timesketch.llm.summarize_feature") -# TODO(itsmvd): Remove 'feature' prefix after migration METRICS = { "llm_summary_events_processed_total": prometheus_client.Counter( - "feature_llm_summary_events_processed_total", # avoid duplicate registration + "llm_summary_events_processed_total", "Total number of events processed for LLM summarization", ["sketch_id"], namespace=METRICS_NAMESPACE, ), "llm_summary_unique_events_total": prometheus_client.Counter( - "feature_llm_summary_unique_events_total", # avoid duplicate registration + "llm_summary_unique_events_total", "Total number of unique events sent to the LLM", ["sketch_id"], namespace=METRICS_NAMESPACE, From bc20bf00f78abfe1ac52e008ada95792e06a3d16 Mon Sep 17 00:00:00 2001 From: itsmvd Date: Tue, 11 Mar 2025 11:12:11 +0000 Subject: [PATCH 51/63] Make timeout configurable for snackBar methods --- timesketch/frontend-ng/src/mixins/snackBar.js | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/timesketch/frontend-ng/src/mixins/snackBar.js b/timesketch/frontend-ng/src/mixins/snackBar.js index 97ac9bc918..efc021709c 100644 --- a/timesketch/frontend-ng/src/mixins/snackBar.js +++ b/timesketch/frontend-ng/src/mixins/snackBar.js @@ -23,32 +23,35 @@ const defaultSnackBar = { "timeout": defaultTimeout } -// These methids will be available to all components without any further imports. +// These methods will be available to all components without any further imports. Vue.mixin({ methods: { - successSnackBar(message) { + successSnackBar(message, timeout) { let snackbar = defaultSnackBar snackbar.message = message snackbar.color = "success" + snackbar.timeout = timeout || defaultTimeout this.$store.dispatch('setSnackBar', snackbar) }, - errorSnackBar(message) { + errorSnackBar(message, timeout) { let snackbar = defaultSnackBar snackbar.message = message snackbar.color = "error" + snackbar.timeout = timeout || defaultTimeout this.$store.dispatch('setSnackBar', snackbar) }, - warningSnackBar(message) { + warningSnackBar(message, timeout) { let snackbar = defaultSnackBar snackbar.message = message snackbar.color = "warning" + snackbar.timeout = timeout || defaultTimeout this.$store.dispatch('setSnackBar', snackbar) }, - infoSnackBar(message) { + infoSnackBar(message, timeout) { let snackbar = defaultSnackBar snackbar.message = message snackbar.color = "info" - snackbar.timeout = 2000 + snackbar.timeout = timeout || defaultTimeout this.$store.dispatch('setSnackBar', snackbar) }, } From 27592c9e957bd4bd804c186f5da9879462417d42 Mon Sep 17 00:00:00 2001 From: itsmvd Date: Tue, 11 Mar 2025 12:21:18 +0000 Subject: [PATCH 52/63] Re-applying changes, excluding snackbar timeout --- timesketch/frontend-ng/src/mixins/snackBar.js | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/timesketch/frontend-ng/src/mixins/snackBar.js b/timesketch/frontend-ng/src/mixins/snackBar.js index 97ac9bc918..efc021709c 100644 --- a/timesketch/frontend-ng/src/mixins/snackBar.js +++ b/timesketch/frontend-ng/src/mixins/snackBar.js @@ -23,32 +23,35 @@ const defaultSnackBar = { "timeout": defaultTimeout } -// These methids will be available to all components without any further imports. +// These methods will be available to all components without any further imports. Vue.mixin({ methods: { - successSnackBar(message) { + successSnackBar(message, timeout) { let snackbar = defaultSnackBar snackbar.message = message snackbar.color = "success" + snackbar.timeout = timeout || defaultTimeout this.$store.dispatch('setSnackBar', snackbar) }, - errorSnackBar(message) { + errorSnackBar(message, timeout) { let snackbar = defaultSnackBar snackbar.message = message snackbar.color = "error" + snackbar.timeout = timeout || defaultTimeout this.$store.dispatch('setSnackBar', snackbar) }, - warningSnackBar(message) { + warningSnackBar(message, timeout) { let snackbar = defaultSnackBar snackbar.message = message snackbar.color = "warning" + snackbar.timeout = timeout || defaultTimeout this.$store.dispatch('setSnackBar', snackbar) }, - infoSnackBar(message) { + infoSnackBar(message, timeout) { let snackbar = defaultSnackBar snackbar.message = message snackbar.color = "info" - snackbar.timeout = 2000 + snackbar.timeout = timeout || defaultTimeout this.$store.dispatch('setSnackBar', snackbar) }, } From 024e86260e29aa84ba504a10784aa43fe7ca0928 Mon Sep 17 00:00:00 2001 From: itsmvd Date: Tue, 11 Mar 2025 12:56:24 +0000 Subject: [PATCH 53/63] UI changes for llm_forensic_report LLM feature --- timesketch/frontend-ng/src/assets/main.scss | 12 ++ .../src/components/Explore/EventList.vue | 118 ++++++++++++------ 2 files changed, 90 insertions(+), 40 deletions(-) diff --git a/timesketch/frontend-ng/src/assets/main.scss b/timesketch/frontend-ng/src/assets/main.scss index 2141a43805..4df44675e4 100644 --- a/timesketch/frontend-ng/src/assets/main.scss +++ b/timesketch/frontend-ng/src/assets/main.scss @@ -208,3 +208,15 @@ html { -o-transition: none !important; transition: none !important; } + +$llm-gradient: linear-gradient(90deg, + #8ab4f8 0%, + #81c995 20%, + #f8c665 40%, + #ec7764 60%, + #b39ddb 80%, + #8ab4f8 100%); + +:root { + --llm-gradient: #{$llm-gradient}; +} diff --git a/timesketch/frontend-ng/src/components/Explore/EventList.vue b/timesketch/frontend-ng/src/components/Explore/EventList.vue index f7d7de62fe..571ba9c3e1 100644 --- a/timesketch/frontend-ng/src/components/Explore/EventList.vue +++ b/timesketch/frontend-ng/src/components/Explore/EventList.vue @@ -246,6 +246,17 @@ limitations under the License. mdi-download + +
+ mdi-file-document-check +
+
+