From b856f54c3069789b3eaf5d49b0fafad1e7641901 Mon Sep 17 00:00:00 2001
From: itsmvd <mvd@google.com>
Date: Tue, 25 Feb 2025 13:17:01 +0000
Subject: [PATCH 01/63] add response_schema support to ollama.py

---
 timesketch/lib/llms/providers/ollama.py | 93 +++++++++++++++++++++++++
 1 file changed, 93 insertions(+)
 create mode 100644 timesketch/lib/llms/providers/ollama.py

diff --git a/timesketch/lib/llms/providers/ollama.py b/timesketch/lib/llms/providers/ollama.py
new file mode 100644
index 0000000000..75d83c112b
--- /dev/null
+++ b/timesketch/lib/llms/providers/ollama.py
@@ -0,0 +1,93 @@
+# Copyright 2025 Google Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""A LLM provider for the ollama server."""
+import json
+import requests
+from typing import Optional
+
+from timesketch.lib.llms.providers import interface
+from timesketch.lib.llms.providers import manager
+
+
+class Ollama(interface.LLMProvider):
+    """A LLM provider for the ollama server."""
+
+    NAME = "ollama"
+
+    def _post(self, request_body: str) -> requests.Response:
+        """
+        Make a POST request to the ollama server.
+
+        Args:
+            request_body: The body of the request in JSON format.
+
+        Returns:
+            The response from the server as a dictionary.
+        """
+        api_resource = "/api/chat"
+        url = self.config.get("server_url") + api_resource
+        return requests.post(url, data=request_body)
+
+    def generate(self, prompt: str, response_schema: Optional[dict] = None) -> str:
+        """
+        Generate text using the ollama server, optionally with a JSON schema.
+
+        Args:
+            prompt: The prompt to use for the generation.
+            response_schema: An optional JSON schema to define the expected
+                response format.
+
+        Returns:
+            The generated text as a string (or parsed data if
+                response_schema is provided).
+        """
+        request_body = {
+            "messages": [{"role": "user", "content": prompt}],
+            "model": self.config.get("model"),
+            "stream": False,  # Force to false, streaming not available with /api/chat endpoint
+            "options": {
+                "temperature": self.config.get("temperature"),
+                "num_predict": self.config.get("max_output_tokens"),
+                "top_p": self.config.get("top_p"),
+                "top_k": self.config.get("top_k"),
+            },
+        }
+
+        if response_schema:
+            request_body["format"] = response_schema
+
+        response = self._post(json.dumps(request_body))
+
+        if response.status_code != 200:
+            raise ValueError(f"Error generating text: {response.text}")
+
+        try:
+            text_response = response.json().get("content", "").strip()
+            if response_schema:
+                return json.loads(text_response)
+            
+            return text_response
+
+        except json.JSONDecodeError as error:
+            raise ValueError(
+                f"Error JSON parsing text: {text_response}: {error}"
+            ) from error
+
+        except Exception as error:
+            raise ValueError(
+                f"An unexpected error occurred: {error}"
+            ) from error
+
+
+manager.LLMManager.register_provider(Ollama)

From 5debf0f999ed3038a07c3616456e06b17537f151 Mon Sep 17 00:00:00 2001
From: itsmvd <mvd@google.com>
Date: Tue, 25 Feb 2025 15:46:59 +0000
Subject: [PATCH 02/63] Create separate llm provider directory, add
 response_schema to ollama provider

---
 timesketch/api/v1/resources/llm_summarize.py  |  7 +-
 timesketch/api/v1/resources/nl2q.py           |  2 +-
 timesketch/api/v1/resources_test.py           |  6 +-
 timesketch/lib/llms/ollama.py                 | 72 -------------------
 .../lib/llms/{ => providers}/__init__.py      |  8 +--
 .../lib/llms/{ => providers}/aistudio.py      |  6 +-
 .../lib/llms/{ => providers}/interface.py     |  0
 .../lib/llms/{ => providers}/manager.py       |  3 +-
 .../lib/llms/{ => providers}/manager_test.py  |  2 +-
 timesketch/lib/llms/providers/ollama.py       | 48 ++++++-------
 .../lib/llms/{ => providers}/vertexai.py      |  4 +-
 11 files changed, 43 insertions(+), 115 deletions(-)
 delete mode 100644 timesketch/lib/llms/ollama.py
 rename timesketch/lib/llms/{ => providers}/__init__.py (76%)
 rename timesketch/lib/llms/{ => providers}/aistudio.py (95%)
 rename timesketch/lib/llms/{ => providers}/interface.py (100%)
 rename timesketch/lib/llms/{ => providers}/manager.py (98%)
 rename timesketch/lib/llms/{ => providers}/manager_test.py (99%)
 rename timesketch/lib/llms/{ => providers}/vertexai.py (96%)

diff --git a/timesketch/api/v1/resources/llm_summarize.py b/timesketch/api/v1/resources/llm_summarize.py
index 0c18441b56..5aa37657f1 100644
--- a/timesketch/api/v1/resources/llm_summarize.py
+++ b/timesketch/api/v1/resources/llm_summarize.py
@@ -28,7 +28,8 @@
 from flask_restful import Resource
 
 from timesketch.api.v1 import resources, export
-from timesketch.lib import definitions, llms, utils
+from timesketch.lib import definitions, utils
+from timesketch.lib.llms.providers import manager
 from timesketch.lib.definitions import METRICS_NAMESPACE
 from timesketch.models.sketch import Sketch
 
@@ -304,8 +305,8 @@ def _get_content(
                 configured LLM provider
         """
         try:
-            feature_name = "llm_summarization"
-            llm = llms.manager.LLMManager.create_provider(feature_name=feature_name)
+            feature_name = "llm_summarize"
+            llm = manager.LLMManager.create_provider(feature_name=feature_name)
         except Exception as e:  # pylint: disable=broad-except
             logger.error("Error LLM Provider: %s", e)
             abort(
diff --git a/timesketch/api/v1/resources/nl2q.py b/timesketch/api/v1/resources/nl2q.py
index d016a768f7..5ed533e956 100644
--- a/timesketch/api/v1/resources/nl2q.py
+++ b/timesketch/api/v1/resources/nl2q.py
@@ -26,7 +26,7 @@
 import pandas as pd
 
 from timesketch.api.v1 import utils
-from timesketch.lib.llms import manager
+from timesketch.lib.llms.providers import manager
 from timesketch.lib.definitions import HTTP_STATUS_CODE_BAD_REQUEST
 from timesketch.lib.definitions import HTTP_STATUS_CODE_INTERNAL_SERVER_ERROR
 from timesketch.lib.definitions import HTTP_STATUS_CODE_NOT_FOUND
diff --git a/timesketch/api/v1/resources_test.py b/timesketch/api/v1/resources_test.py
index a964fad50a..7044bb2250 100644
--- a/timesketch/api/v1/resources_test.py
+++ b/timesketch/api/v1/resources_test.py
@@ -1198,7 +1198,7 @@ class TestNl2qResource(BaseTest):
 
     resource_url = "/api/v1/sketches/1/nl2q/"
 
-    @mock.patch("timesketch.lib.llms.manager.LLMManager.create_provider")
+    @mock.patch("timesketch.lib.llms.provider.manager.LLMManager.create_provider")
     @mock.patch("timesketch.api.v1.utils.run_aggregator")
     @mock.patch("timesketch.api.v1.resources.OpenSearchDataStore", MockDataStore)
     def test_nl2q_prompt(self, mock_aggregator, mock_create_provider):
@@ -1380,7 +1380,7 @@ def test_nl2q_no_permission(self):
         )
         self.assertEqual(response.status_code, HTTP_STATUS_CODE_FORBIDDEN)
 
-    @mock.patch("timesketch.lib.llms.manager.LLMManager.create_provider")
+    @mock.patch("timesketch.lib.llms.provider.manager.LLMManager.create_provider")
     @mock.patch("timesketch.api.v1.utils.run_aggregator")
     @mock.patch("timesketch.api.v1.resources.OpenSearchDataStore", MockDataStore)
     def test_nl2q_llm_error(self, mock_aggregator, mock_create_provider):
@@ -1584,7 +1584,7 @@ def test_llm_summarize_no_events(self):
         )
 
     @mock.patch("timesketch.api.v1.resources.OpenSearchDataStore", MockDataStore)
-    @mock.patch("timesketch.lib.llms.manager.LLMManager.create_provider")
+    @mock.patch("timesketch.lib.llms.provider.manager.LLMManager.create_provider")
     def test_llm_summarize_with_events(self, mock_create_provider):
         """Test LLM summarizer with events returned and mock LLM."""
         self.login()
diff --git a/timesketch/lib/llms/ollama.py b/timesketch/lib/llms/ollama.py
deleted file mode 100644
index 365716b580..0000000000
--- a/timesketch/lib/llms/ollama.py
+++ /dev/null
@@ -1,72 +0,0 @@
-# Copyright 2024 Google Inc. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""A LLM provider for the ollama server."""
-import json
-import requests
-
-from timesketch.lib.llms import interface
-from timesketch.lib.llms import manager
-
-
-class Ollama(interface.LLMProvider):
-    """A LLM provider for the ollama server."""
-
-    NAME = "ollama"
-
-    def _post(self, request_body: str) -> requests.Response:
-        """
-        Make a POST request to the ollama server.
-
-        Args:
-            request_body: The body of the request in JSON format.
-
-        Returns:
-            The response from the server as a dictionary.
-        """
-        api_resource = "/api/generate/"
-        url = self.config.get("server_url") + api_resource
-        return requests.post(url, data=request_body)
-
-    def generate(self, prompt: str) -> str:
-        """
-        Generate text using the ollama server.
-
-        Args:
-            prompt: The prompt to use for the generation.
-            temperature: The temperature to use for the generation.
-            stream: Whether to stream the generation or not.
-
-        Raises:
-            ValueError: If the generation fails.
-
-        Returns:
-            The generated text as a string.
-        """
-        request_body = {
-            "prompt": prompt,
-            "model": self.config.get("model"),
-            "stream": self.config.get("stream"),
-            "options": {
-                "temperature": self.config.get("temperature"),
-                "num_predict": self.config.get("max_output_tokens"),
-            },
-        }
-        response = self._post(json.dumps(request_body))
-        if response.status_code != 200:
-            raise ValueError(f"Error generating text: {response.text}")
-
-        return response.json().get("response", "").strip()
-
-
-manager.LLMManager.register_provider(Ollama)
diff --git a/timesketch/lib/llms/__init__.py b/timesketch/lib/llms/providers/__init__.py
similarity index 76%
rename from timesketch/lib/llms/__init__.py
rename to timesketch/lib/llms/providers/__init__.py
index bb52e18d42..f92027460b 100644
--- a/timesketch/lib/llms/__init__.py
+++ b/timesketch/lib/llms/providers/__init__.py
@@ -11,8 +11,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""LLM module for Timesketch."""
+"""LLM providers for Timesketch."""
 
-from timesketch.lib.llms import ollama
-from timesketch.lib.llms import vertexai
-from timesketch.lib.llms import aistudio
+from timesketch.lib.llms.providers import ollama
+from timesketch.lib.llms.providers import vertexai
+from timesketch.lib.llms.providers import aistudio
diff --git a/timesketch/lib/llms/aistudio.py b/timesketch/lib/llms/providers/aistudio.py
similarity index 95%
rename from timesketch/lib/llms/aistudio.py
rename to timesketch/lib/llms/providers/aistudio.py
index 77b6502efa..df7d5ca1bb 100644
--- a/timesketch/lib/llms/aistudio.py
+++ b/timesketch/lib/llms/providers/aistudio.py
@@ -1,4 +1,4 @@
-# Copyright 2024 Google Inc. All rights reserved.
+# Copyright 2025 Google Inc. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -15,8 +15,8 @@
 
 import json
 from typing import Optional
-from timesketch.lib.llms import interface
-from timesketch.lib.llms import manager
+from timesketch.lib.llms.providers import interface
+from timesketch.lib.llms.providers import manager
 
 
 # Check if the required dependencies are installed.
diff --git a/timesketch/lib/llms/interface.py b/timesketch/lib/llms/providers/interface.py
similarity index 100%
rename from timesketch/lib/llms/interface.py
rename to timesketch/lib/llms/providers/interface.py
diff --git a/timesketch/lib/llms/manager.py b/timesketch/lib/llms/providers/manager.py
similarity index 98%
rename from timesketch/lib/llms/manager.py
rename to timesketch/lib/llms/providers/manager.py
index 5412abcec6..6bb3757d1d 100644
--- a/timesketch/lib/llms/manager.py
+++ b/timesketch/lib/llms/providers/manager.py
@@ -14,7 +14,7 @@
 """This file contains a class for managing Large Language Model (LLM) providers."""
 
 from flask import current_app
-from timesketch.lib.llms.interface import LLMProvider
+from timesketch.lib.llms.providers.interface import LLMProvider
 
 
 class LLMManager:
@@ -80,7 +80,6 @@ def create_provider(cls, feature_name: str = None, **kwargs) -> LLMProvider:
             raise ValueError(
                 "Configuration for the feature must specify exactly one provider."
             )
-
         provider_name = next(iter(config_mapping))
         provider_config = config_mapping[provider_name]
 
diff --git a/timesketch/lib/llms/manager_test.py b/timesketch/lib/llms/providers/manager_test.py
similarity index 99%
rename from timesketch/lib/llms/manager_test.py
rename to timesketch/lib/llms/providers/manager_test.py
index c850b6a75c..af5b5f4e95 100644
--- a/timesketch/lib/llms/manager_test.py
+++ b/timesketch/lib/llms/providers/manager_test.py
@@ -14,7 +14,7 @@
 """Tests for LLM provider manager."""
 
 from timesketch.lib.testlib import BaseTest
-from timesketch.lib.llms import manager
+from timesketch.lib.llms.providers import manager
 
 
 class MockAistudioProvider:
diff --git a/timesketch/lib/llms/providers/ollama.py b/timesketch/lib/llms/providers/ollama.py
index 75d83c112b..bbb6795887 100644
--- a/timesketch/lib/llms/providers/ollama.py
+++ b/timesketch/lib/llms/providers/ollama.py
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""A LLM provider for the ollama server."""
+"""A LLM provider for the Ollama server."""
 import json
 import requests
 from typing import Optional
@@ -21,27 +21,29 @@
 
 
 class Ollama(interface.LLMProvider):
-    """A LLM provider for the ollama server."""
+    """A LLM provider for the Ollama server."""
 
     NAME = "ollama"
 
     def _post(self, request_body: str) -> requests.Response:
         """
-        Make a POST request to the ollama server.
+        Make a POST request to the Ollama server.
 
         Args:
             request_body: The body of the request in JSON format.
 
         Returns:
-            The response from the server as a dictionary.
+            The response from the server as a requests.Response object.
         """
         api_resource = "/api/chat"
         url = self.config.get("server_url") + api_resource
-        return requests.post(url, data=request_body)
+        return requests.post(
+            url, data=request_body, headers={"Content-Type": "application/json"}
+        )
 
     def generate(self, prompt: str, response_schema: Optional[dict] = None) -> str:
         """
-        Generate text using the ollama server, optionally with a JSON schema.
+        Generate text using the Ollama server, optionally with a JSON schema.
 
         Args:
             prompt: The prompt to use for the generation.
@@ -49,13 +51,15 @@ def generate(self, prompt: str, response_schema: Optional[dict] = None) -> str:
                 response format.
 
         Returns:
-            The generated text as a string (or parsed data if
-                response_schema is provided).
+            The generated text as a string (or parsed data if response_schema is provided).
+
+        Raises:
+            ValueError: If the request fails or JSON parsing fails.
         """
         request_body = {
             "messages": [{"role": "user", "content": prompt}],
             "model": self.config.get("model"),
-            "stream": False,  # Force to false, streaming not available with /api/chat endpoint
+            "stream": self.config.get("stream"),
             "options": {
                 "temperature": self.config.get("temperature"),
                 "num_predict": self.config.get("max_output_tokens"),
@@ -72,22 +76,18 @@ def generate(self, prompt: str, response_schema: Optional[dict] = None) -> str:
         if response.status_code != 200:
             raise ValueError(f"Error generating text: {response.text}")
 
-        try:
-            text_response = response.json().get("content", "").strip()
-            if response_schema:
+        response_data = response.json()
+        text_response = response_data.get("message", {}).get("content", "").strip()
+
+        if response_schema:
+            try:
                 return json.loads(text_response)
-            
-            return text_response
-
-        except json.JSONDecodeError as error:
-            raise ValueError(
-                f"Error JSON parsing text: {text_response}: {error}"
-            ) from error
-
-        except Exception as error:
-            raise ValueError(
-                f"An unexpected error occurred: {error}"
-            ) from error
+            except json.JSONDecodeError as error:
+                raise ValueError(
+                    f"Error JSON parsing text: {text_response}: {error}"
+                ) from error
+
+        return text_response
 
 
 manager.LLMManager.register_provider(Ollama)
diff --git a/timesketch/lib/llms/vertexai.py b/timesketch/lib/llms/providers/vertexai.py
similarity index 96%
rename from timesketch/lib/llms/vertexai.py
rename to timesketch/lib/llms/providers/vertexai.py
index e4f25f7f7e..123bbdd39e 100644
--- a/timesketch/lib/llms/vertexai.py
+++ b/timesketch/lib/llms/providers/vertexai.py
@@ -16,8 +16,8 @@
 import json
 from typing import Optional
 
-from timesketch.lib.llms import interface
-from timesketch.lib.llms import manager
+from timesketch.lib.llms.providers import interface
+from timesketch.lib.llms.providers import manager
 
 # Check if the required dependencies are installed.
 has_required_deps = True

From 70d06991938b9f777c3b4632c7e4612820475e47 Mon Sep 17 00:00:00 2001
From: itsmvd <mvd@google.com>
Date: Tue, 25 Feb 2025 15:50:24 +0000
Subject: [PATCH 03/63] Update timesketch.conf

---
 data/timesketch.conf | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/data/timesketch.conf b/data/timesketch.conf
index 0d9a47164c..1df853cc12 100644
--- a/data/timesketch.conf
+++ b/data/timesketch.conf
@@ -379,16 +379,16 @@ LLM_PROVIDER_CONFIGS = {
             'project_id': '',
         },
     },
-    'llm_summarization': {
+    'llm_summarize': {
         'aistudio': {
             'model': 'gemini-2.0-flash-exp',
             'project_id': '',
         },
     },
     'default': {
-        'aistudio': {
-             'api_key': '',
-             'model': 'gemini-2.0-flash-exp',
+        'ollama': {
+            'server_url': 'http://ollama:11434',
+            'model': 'gemma:7b',
         },
     }
 }

From 59ce086c88c7eb4c23499edf194fa4b1d955e686 Mon Sep 17 00:00:00 2001
From: itsmvd <mvd@google.com>
Date: Tue, 25 Feb 2025 15:53:20 +0000
Subject: [PATCH 04/63] solve naming conflict

---
 timesketch/api/v1/resources/llm_summarize.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/timesketch/api/v1/resources/llm_summarize.py b/timesketch/api/v1/resources/llm_summarize.py
index 5aa37657f1..a5ecebc3b6 100644
--- a/timesketch/api/v1/resources/llm_summarize.py
+++ b/timesketch/api/v1/resources/llm_summarize.py
@@ -29,7 +29,7 @@
 
 from timesketch.api.v1 import resources, export
 from timesketch.lib import definitions, utils
-from timesketch.lib.llms.providers import manager
+from timesketch.lib.llms.providers import manager as provider_manager
 from timesketch.lib.definitions import METRICS_NAMESPACE
 from timesketch.models.sketch import Sketch
 
@@ -306,7 +306,7 @@ def _get_content(
         """
         try:
             feature_name = "llm_summarize"
-            llm = manager.LLMManager.create_provider(feature_name=feature_name)
+            llm = provider_manager.LLMManager.create_provider(feature_name=feature_name)
         except Exception as e:  # pylint: disable=broad-except
             logger.error("Error LLM Provider: %s", e)
             abort(

From 9e2c294a796c12a4978dc1ffe22e103647687ccf Mon Sep 17 00:00:00 2001
From: itsmvd <mvd@google.com>
Date: Tue, 25 Feb 2025 16:06:13 +0000
Subject: [PATCH 05/63] fix typo

---
 timesketch/api/v1/resources_test.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/timesketch/api/v1/resources_test.py b/timesketch/api/v1/resources_test.py
index 7044bb2250..5396b010ab 100644
--- a/timesketch/api/v1/resources_test.py
+++ b/timesketch/api/v1/resources_test.py
@@ -1198,7 +1198,7 @@ class TestNl2qResource(BaseTest):
 
     resource_url = "/api/v1/sketches/1/nl2q/"
 
-    @mock.patch("timesketch.lib.llms.provider.manager.LLMManager.create_provider")
+    @mock.patch("timesketch.lib.llms.providers.manager.LLMManager.create_provider")
     @mock.patch("timesketch.api.v1.utils.run_aggregator")
     @mock.patch("timesketch.api.v1.resources.OpenSearchDataStore", MockDataStore)
     def test_nl2q_prompt(self, mock_aggregator, mock_create_provider):
@@ -1380,7 +1380,7 @@ def test_nl2q_no_permission(self):
         )
         self.assertEqual(response.status_code, HTTP_STATUS_CODE_FORBIDDEN)
 
-    @mock.patch("timesketch.lib.llms.provider.manager.LLMManager.create_provider")
+    @mock.patch("timesketch.lib.llms.providers.manager.LLMManager.create_provider")
     @mock.patch("timesketch.api.v1.utils.run_aggregator")
     @mock.patch("timesketch.api.v1.resources.OpenSearchDataStore", MockDataStore)
     def test_nl2q_llm_error(self, mock_aggregator, mock_create_provider):
@@ -1584,7 +1584,7 @@ def test_llm_summarize_no_events(self):
         )
 
     @mock.patch("timesketch.api.v1.resources.OpenSearchDataStore", MockDataStore)
-    @mock.patch("timesketch.lib.llms.provider.manager.LLMManager.create_provider")
+    @mock.patch("timesketch.lib.llms.providers.manager.LLMManager.create_provider")
     def test_llm_summarize_with_events(self, mock_create_provider):
         """Test LLM summarizer with events returned and mock LLM."""
         self.login()

From 5f252a94be82bc960c144eabcbbc4d7a74777a86 Mon Sep 17 00:00:00 2001
From: itsmvd <mvd@google.com>
Date: Tue, 25 Feb 2025 17:17:35 +0000
Subject: [PATCH 06/63] Add an __init__ file to the timsketch/lib/llms folder

---
 timesketch/lib/llms/__init__.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)
 create mode 100644 timesketch/lib/llms/__init__.py

diff --git a/timesketch/lib/llms/__init__.py b/timesketch/lib/llms/__init__.py
new file mode 100644
index 0000000000..0242820fb1
--- /dev/null
+++ b/timesketch/lib/llms/__init__.py
@@ -0,0 +1,14 @@
+# Copyright 2025 Google Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""LLM libraries for Timesketch."""

From c0401596592a12f937709623997b51f1aa7bd2ff Mon Sep 17 00:00:00 2001
From: itsmvd <mvd@google.com>
Date: Tue, 25 Feb 2025 17:24:27 +0000
Subject: [PATCH 07/63] lint fix ollama

---
 timesketch/lib/llms/providers/ollama.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/timesketch/lib/llms/providers/ollama.py b/timesketch/lib/llms/providers/ollama.py
index bbb6795887..42481e7aee 100644
--- a/timesketch/lib/llms/providers/ollama.py
+++ b/timesketch/lib/llms/providers/ollama.py
@@ -12,9 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """A LLM provider for the Ollama server."""
+from typing import Optional
 import json
 import requests
-from typing import Optional
 
 from timesketch.lib.llms.providers import interface
 from timesketch.lib.llms.providers import manager
@@ -51,7 +51,8 @@ def generate(self, prompt: str, response_schema: Optional[dict] = None) -> str:
                 response format.
 
         Returns:
-            The generated text as a string (or parsed data if response_schema is provided).
+            The generated text as a string (or parsed data if
+            response_schema is provided).
 
         Raises:
             ValueError: If the request fails or JSON parsing fails.

From 9ab391ed42b744294d2fae940ba88282ead3ab58 Mon Sep 17 00:00:00 2001
From: itsmvd <mvd@google.com>
Date: Wed, 26 Feb 2025 09:48:28 +0000
Subject: [PATCH 08/63] Improve fallback mechanism for LLM configs

---
 timesketch/lib/llms/providers/manager.py      | 20 +++++++++++++-----
 timesketch/lib/llms/providers/manager_test.py | 21 +++++++++++++++++++
 2 files changed, 36 insertions(+), 5 deletions(-)

diff --git a/timesketch/lib/llms/providers/manager.py b/timesketch/lib/llms/providers/manager.py
index 6bb3757d1d..3dfc4705aa 100644
--- a/timesketch/lib/llms/providers/manager.py
+++ b/timesketch/lib/llms/providers/manager.py
@@ -63,7 +63,7 @@ def create_provider(cls, feature_name: str = None, **kwargs) -> LLMProvider:
         """
         Create an instance of the provider for the given feature.
 
-        If a configuration exists for the feature in
+        If a valid configuration exists for the feature in
         current_app.config["LLM_PROVIDER_CONFIGS"], use it; otherwise,
         fall back to the configuration under the "default" key.
 
@@ -71,14 +71,24 @@ def create_provider(cls, feature_name: str = None, **kwargs) -> LLMProvider:
         the provider name.
         """
         llm_configs = current_app.config.get("LLM_PROVIDER_CONFIGS", {})
+        
         if feature_name and feature_name in llm_configs:
             config_mapping = llm_configs[feature_name]
-        else:
-            config_mapping = llm_configs.get("default")
-
+            if config_mapping and len(config_mapping) == 1:
+                provider_name = next(iter(config_mapping))
+                provider_config = config_mapping[provider_name]
+                provider_class = cls.get_provider(provider_name)
+                # Check that provider specifies required fields
+                try:
+                    return provider_class(config=provider_config, **kwargs)
+                except ValueError:
+                    pass  # Fallback to default provider
+
+        # Fallback to default config
+        config_mapping = llm_configs.get("default")
         if not config_mapping or len(config_mapping) != 1:
             raise ValueError(
-                "Configuration for the feature must specify exactly one provider."
+                "Default configuration must specify exactly one provider."
             )
         provider_name = next(iter(config_mapping))
         provider_config = config_mapping[provider_name]
diff --git a/timesketch/lib/llms/providers/manager_test.py b/timesketch/lib/llms/providers/manager_test.py
index af5b5f4e95..6db3f6b3ce 100644
--- a/timesketch/lib/llms/providers/manager_test.py
+++ b/timesketch/lib/llms/providers/manager_test.py
@@ -144,3 +144,24 @@ def test_create_provider_missing_config(self):
         self.app.config["LLM_PROVIDER_CONFIGS"] = {}
         with self.assertRaises(ValueError):
             manager.LLMManager.create_provider()
+
+    def test_create_provider_empty_feature_fallback(self):
+        """Test that create_provider falls back to default when feature config is empty."""
+        self.app.config["LLM_PROVIDER_CONFIGS"] = {
+            "llm_summarize": {},  # Empty feature config
+            "default": {
+                "aistudio": {
+                    "api_key": "AIzaSyTestDefaultKey",
+                    "model": "gemini-2.0-flash-exp",
+                }
+            },
+        }
+        provider_instance = manager.LLMManager.create_provider(feature_name="llm_summarize")
+        self.assertIsInstance(provider_instance, MockAistudioProvider)
+        self.assertEqual(
+            provider_instance.config,
+            {
+                "api_key": "AIzaSyTestDefaultKey",
+                "model": "gemini-2.0-flash-exp",
+            },
+        )
\ No newline at end of file

From 5d4746a22e9832455100adc89b66782aeded08bd Mon Sep 17 00:00:00 2001
From: itsmvd <mvd@google.com>
Date: Wed, 26 Feb 2025 09:51:16 +0000
Subject: [PATCH 09/63] formatting

---
 timesketch/lib/llms/providers/manager.py      | 6 ++----
 timesketch/lib/llms/providers/manager_test.py | 6 ++++--
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/timesketch/lib/llms/providers/manager.py b/timesketch/lib/llms/providers/manager.py
index 3dfc4705aa..7cfc0a7574 100644
--- a/timesketch/lib/llms/providers/manager.py
+++ b/timesketch/lib/llms/providers/manager.py
@@ -71,7 +71,7 @@ def create_provider(cls, feature_name: str = None, **kwargs) -> LLMProvider:
         the provider name.
         """
         llm_configs = current_app.config.get("LLM_PROVIDER_CONFIGS", {})
-        
+
         if feature_name and feature_name in llm_configs:
             config_mapping = llm_configs[feature_name]
             if config_mapping and len(config_mapping) == 1:
@@ -87,9 +87,7 @@ def create_provider(cls, feature_name: str = None, **kwargs) -> LLMProvider:
         # Fallback to default config
         config_mapping = llm_configs.get("default")
         if not config_mapping or len(config_mapping) != 1:
-            raise ValueError(
-                "Default configuration must specify exactly one provider."
-            )
+            raise ValueError("Default configuration must specify exactly one provider.")
         provider_name = next(iter(config_mapping))
         provider_config = config_mapping[provider_name]
 
diff --git a/timesketch/lib/llms/providers/manager_test.py b/timesketch/lib/llms/providers/manager_test.py
index 6db3f6b3ce..ceb7267100 100644
--- a/timesketch/lib/llms/providers/manager_test.py
+++ b/timesketch/lib/llms/providers/manager_test.py
@@ -156,7 +156,9 @@ def test_create_provider_empty_feature_fallback(self):
                 }
             },
         }
-        provider_instance = manager.LLMManager.create_provider(feature_name="llm_summarize")
+        provider_instance = manager.LLMManager.create_provider(
+            feature_name="llm_summarize"
+        )
         self.assertIsInstance(provider_instance, MockAistudioProvider)
         self.assertEqual(
             provider_instance.config,
@@ -164,4 +166,4 @@ def test_create_provider_empty_feature_fallback(self):
                 "api_key": "AIzaSyTestDefaultKey",
                 "model": "gemini-2.0-flash-exp",
             },
-        )
\ No newline at end of file
+        )

From 390cd091ab5a60e7e61b49904c186938153c50dd Mon Sep 17 00:00:00 2001
From: itsmvd <mvd@google.com>
Date: Wed, 26 Feb 2025 09:57:13 +0000
Subject: [PATCH 10/63] format fix 2

---
 timesketch/lib/llms/providers/manager_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/timesketch/lib/llms/providers/manager_test.py b/timesketch/lib/llms/providers/manager_test.py
index ceb7267100..09902faa0f 100644
--- a/timesketch/lib/llms/providers/manager_test.py
+++ b/timesketch/lib/llms/providers/manager_test.py
@@ -146,7 +146,7 @@ def test_create_provider_missing_config(self):
             manager.LLMManager.create_provider()
 
     def test_create_provider_empty_feature_fallback(self):
-        """Test that create_provider falls back to default when feature config is empty."""
+        """Test that create_provider falls back to default when feature config empty."""
         self.app.config["LLM_PROVIDER_CONFIGS"] = {
             "llm_summarize": {},  # Empty feature config
             "default": {

From ad4d70b303a74c5cba57d9a14bbf287fa596468e Mon Sep 17 00:00:00 2001
From: itsmvd <mvd@google.com>
Date: Wed, 26 Feb 2025 13:13:06 +0000
Subject: [PATCH 11/63] Add LLM features manager and interface

---
 timesketch/lib/llms/features/__init__.py     |  16 +++
 timesketch/lib/llms/features/interface.py    |  53 +++++++
 timesketch/lib/llms/features/manager.py      |  65 +++++++++
 timesketch/lib/llms/features/manager_test.py | 142 +++++++++++++++++++
 4 files changed, 276 insertions(+)
 create mode 100644 timesketch/lib/llms/features/__init__.py
 create mode 100644 timesketch/lib/llms/features/interface.py
 create mode 100644 timesketch/lib/llms/features/manager.py
 create mode 100644 timesketch/lib/llms/features/manager_test.py

diff --git a/timesketch/lib/llms/features/__init__.py b/timesketch/lib/llms/features/__init__.py
new file mode 100644
index 0000000000..6a8e4caaf4
--- /dev/null
+++ b/timesketch/lib/llms/features/__init__.py
@@ -0,0 +1,16 @@
+# Copyright 2025 Google Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""LLM features for Timesketch."""
+
+from timesketch.lib.llms.features import manager
diff --git a/timesketch/lib/llms/features/interface.py b/timesketch/lib/llms/features/interface.py
new file mode 100644
index 0000000000..10317fe014
--- /dev/null
+++ b/timesketch/lib/llms/features/interface.py
@@ -0,0 +1,53 @@
+# Copyright 2025 Google Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Interface for LLM features."""
+
+from typing import Any, Optional
+from abc import ABC, abstractmethod
+from timesketch.models.sketch import Sketch
+
+
+class LLMFeatureInterface(ABC):
+    """Interface for LLM features."""
+
+    NAME: str = "llm_feature_interface"  # Must be overridden in subclasses
+    RESPONSE_SCHEMA: Optional[dict[str, Any]] = None
+
+    @abstractmethod
+    def generate_prompt(self, sketch: Sketch, **kwargs: Any) -> str:
+        """Generates a prompt for the LLM.
+
+        Args:
+            sketch_id: The ID of the sketch.
+            kwargs: Feature-specific keyword arguments for prompt generation.
+
+        Returns:
+            The generated prompt string.
+        """
+        raise NotImplementedError()
+
+    @abstractmethod
+    def process_response(self, llm_response: str, **kwargs: Any) -> dict[str, Any]:
+        """Processes the raw LLM response.
+
+        Args:
+            llm_response:  The raw string response from the LLM provider.
+            kwargs: Feature-specific arguments.
+
+        Returns:
+            A dictionary containing the processed response data, suitable for
+            returning from the API.  Must include a "response" key with the
+            main result, and can optionally include other metadata.
+        """
+        raise NotImplementedError()
diff --git a/timesketch/lib/llms/features/manager.py b/timesketch/lib/llms/features/manager.py
new file mode 100644
index 0000000000..70bfac4836
--- /dev/null
+++ b/timesketch/lib/llms/features/manager.py
@@ -0,0 +1,65 @@
+# Copyright 2025 Google Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Manager for LLM features."""
+
+import logging
+from timesketch.lib.llms.features.interface import LLMFeatureInterface
+
+logger = logging.getLogger("timesketch.llm.manager")
+
+
+class FeatureManager:
+    """The manager for LLM features."""
+
+    _feature_registry = {}
+
+    @classmethod
+    def register_feature(cls, feature_class: type[LLMFeatureInterface]):
+        """Register an LLM feature class."""
+        feature_name = feature_class.NAME.lower()
+        if feature_name in cls._feature_registry:
+            raise ValueError(f"LLM Feature {feature_class.NAME} already registered")
+        cls._feature_registry[feature_name] = feature_class
+        # Optional: Add logging here
+
+    @classmethod
+    def get_feature(cls, feature_name: str) -> type[LLMFeatureInterface]:
+        """Get a feature class by name."""
+        try:
+            return cls._feature_registry[feature_name.lower()]
+        except KeyError as no_such_feature:
+            raise KeyError(
+                f"No such LLM feature: {feature_name.lower()}"
+            ) from no_such_feature
+
+    @classmethod
+    def get_features(cls):
+        """Get all registered features.
+
+        Yields:
+            A tuple of (feature_name, feature_class)
+        """
+        for feature_name, feature_class in cls._feature_registry.items():
+            yield feature_name, feature_class
+
+    @classmethod
+    def get_feature_instance(cls, feature_name: str) -> LLMFeatureInterface:
+        """Get an instance of a feature by name."""
+        feature_class = cls.get_feature(feature_name)
+        return feature_class()
+
+    @classmethod
+    def clear_registration(cls):
+        """Clear all registered features."""
+        cls._feature_registry = {}
diff --git a/timesketch/lib/llms/features/manager_test.py b/timesketch/lib/llms/features/manager_test.py
new file mode 100644
index 0000000000..2e053e7199
--- /dev/null
+++ b/timesketch/lib/llms/features/manager_test.py
@@ -0,0 +1,142 @@
+# Copyright 2025 Google Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tests for LLM feature manager."""
+
+from typing import Any
+from timesketch.lib.testlib import BaseTest
+from timesketch.lib.llms.features import manager
+from timesketch.models.sketch import Sketch
+
+
+class MockSummarizeFeature:
+    """A mock LLM summarize feature."""
+
+    NAME = "llm_summarize"
+
+    def generate_prompt(self, _sketch: Sketch, **_kwargs: Any) -> str:
+        """Mock implementation of generate_prompt."""
+        return "Summarize these events."
+
+    def process_response(self, llm_response: str, **kwargs: Any) -> dict[str, Any]:
+        """Mock implementation of process_response."""
+        return {"response": f"Summary: {llm_response}"}
+
+
+class MockNl2qFeature:
+    """A mock Natural Language to Query feature."""
+
+    NAME = "nl2q"
+
+    def generate_prompt(self, _sketch: Sketch, **_kwargs: Any) -> str:
+        """Mock implementation of generate_prompt."""
+        return "Convert this question to a query."
+
+    def process_response(self, llm_response: str, **_kwargs: Any) -> dict[str, Any]:
+        """Mock implementation of process_response."""
+        return {"response": f"Query: {llm_response}"}
+
+
+class TestFeatureManager(BaseTest):
+    """Tests for the functionality of the FeatureManager module."""
+
+    def setUp(self) -> None:
+        super().setUp()
+        manager.FeatureManager.clear_registration()
+        manager.FeatureManager.register_feature(MockSummarizeFeature)
+        manager.FeatureManager.register_feature(MockNl2qFeature)
+
+    def tearDown(self) -> None:
+        manager.FeatureManager.clear_registration()
+        super().tearDown()
+
+    def test_get_features(self):
+        """Test that get_features returns the registered features."""
+        features = manager.FeatureManager.get_features()
+        feature_list = list(features)
+        self.assertIsInstance(feature_list, list)
+
+        found_summarize = any(
+            feature_name == "llm_summarize" and feature_class == MockSummarizeFeature
+            for feature_name, feature_class in feature_list
+        )
+        found_nl2q = any(
+            feature_name == "nl2q" and feature_class == MockNl2qFeature
+            for feature_name, feature_class in feature_list
+        )
+        self.assertTrue(found_summarize, "LLM Summarize feature not found.")
+        self.assertTrue(found_nl2q, "NL2Q feature not found.")
+
+    def test_get_feature(self):
+        """Test retrieval of a feature class from the registry."""
+        feature_class = manager.FeatureManager.get_feature("llm_summarize")
+        self.assertEqual(feature_class, MockSummarizeFeature)
+
+        feature_class = manager.FeatureManager.get_feature("LLM_SUMMARIZE")
+        self.assertEqual(feature_class, MockSummarizeFeature)
+
+        self.assertRaises(
+            KeyError, manager.FeatureManager.get_feature, "no_such_feature"
+        )
+
+    def test_register_feature(self):
+        """Test that re-registering an already registered feature raises ValueError."""
+        self.assertRaises(
+            ValueError, manager.FeatureManager.register_feature, MockSummarizeFeature
+        )
+
+    def test_get_feature_instance(self):
+        """Test get_feature_instance creates the correct feature instance."""
+        feature_instance = manager.FeatureManager.get_feature_instance("llm_summarize")
+        self.assertIsInstance(feature_instance, MockSummarizeFeature)
+
+        feature_instance = manager.FeatureManager.get_feature_instance("nl2q")
+        self.assertIsInstance(feature_instance, MockNl2qFeature)
+
+        self.assertRaises(
+            KeyError, manager.FeatureManager.get_feature_instance, "no_such_feature"
+        )
+
+    def test_feature_methods(self):
+        """Test that feature methods work correctly."""
+        summarize_instance = manager.FeatureManager.get_feature_instance(
+            "llm_summarize"
+        )
+        nl2q_instance = manager.FeatureManager.get_feature_instance("nl2q")
+
+        sketch = None
+
+        self.assertEqual(
+            summarize_instance.generate_prompt(sketch), "Summarize these events."
+        )
+        self.assertEqual(
+            nl2q_instance.generate_prompt(sketch), "Convert this question to a query."
+        )
+
+        self.assertEqual(
+            summarize_instance.process_response("Test events"),
+            {"response": "Summary: Test events"},
+        )
+        self.assertEqual(
+            nl2q_instance.process_response("timestamp:*"),
+            {"response": "Query: timestamp:*"},
+        )
+
+    def test_clear_registration(self):
+        """Test clear_registration removes all registered features."""
+        self.assertEqual(len(list(manager.FeatureManager.get_features())), 2)
+
+        manager.FeatureManager.clear_registration()
+
+        self.assertEqual(len(list(manager.FeatureManager.get_features())), 0)
+        self.assertRaises(KeyError, manager.FeatureManager.get_feature, "llm_summarize")

From aa267cc7390dbcea3863b93ba2540f6c8a2e24b0 Mon Sep 17 00:00:00 2001
From: itsmvd <mvd@google.com>
Date: Wed, 26 Feb 2025 14:51:02 +0000
Subject: [PATCH 12/63] linter fix

---
 timesketch/lib/llms/features/manager_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/timesketch/lib/llms/features/manager_test.py b/timesketch/lib/llms/features/manager_test.py
index 2e053e7199..83fc7b81b1 100644
--- a/timesketch/lib/llms/features/manager_test.py
+++ b/timesketch/lib/llms/features/manager_test.py
@@ -28,7 +28,7 @@ def generate_prompt(self, _sketch: Sketch, **_kwargs: Any) -> str:
         """Mock implementation of generate_prompt."""
         return "Summarize these events."
 
-    def process_response(self, llm_response: str, **kwargs: Any) -> dict[str, Any]:
+    def process_response(self, llm_response: str, **_kwargs: Any) -> dict[str, Any]:
         """Mock implementation of process_response."""
         return {"response": f"Summary: {llm_response}"}
 

From bd8d6d20dbd24058952cd85bfddb0a73d08b22f4 Mon Sep 17 00:00:00 2001
From: itsmvd <mvd@google.com>
Date: Thu, 27 Feb 2025 10:24:33 +0000
Subject: [PATCH 13/63] Automatically load features, add better doc-strings to
 interface.py

---
 timesketch/lib/llms/features/__init__.py     |  2 +
 timesketch/lib/llms/features/interface.py    | 48 ++++++++++--
 timesketch/lib/llms/features/manager.py      | 36 ++++++++-
 timesketch/lib/llms/features/manager_test.py | 80 +++++++++++++++++---
 4 files changed, 147 insertions(+), 19 deletions(-)

diff --git a/timesketch/lib/llms/features/__init__.py b/timesketch/lib/llms/features/__init__.py
index 6a8e4caaf4..8346fe51c3 100644
--- a/timesketch/lib/llms/features/__init__.py
+++ b/timesketch/lib/llms/features/__init__.py
@@ -14,3 +14,5 @@
 """LLM features for Timesketch."""
 
 from timesketch.lib.llms.features import manager
+
+manager.FeatureManager.load_llm_features()
diff --git a/timesketch/lib/llms/features/interface.py b/timesketch/lib/llms/features/interface.py
index 10317fe014..ba3d1827b2 100644
--- a/timesketch/lib/llms/features/interface.py
+++ b/timesketch/lib/llms/features/interface.py
@@ -19,7 +19,27 @@
 
 
 class LLMFeatureInterface(ABC):
-    """Interface for LLM features."""
+    """Interface for LLM features.
+
+    This abstract class defines the required methods and attributes for implementing
+    an LLM-powered feature in Timesketch. Features must override the NAME constant
+    and implement the abstract methods.
+
+    Attributes:
+        NAME: String identifier for the feature. Must be overridden in subclasses.
+        RESPONSE_SCHEMA: Optional JSON schema that defines the expected format of
+            the LLM response. When defined, this schema will be passed to the LLM
+            provider to enforce structured outputs matching the defined format.
+            For example:
+
+            {
+                "type": "object",
+                "properties": {"summary": {"type": "string"}},
+                "required": ["summary"],
+            }
+
+            If None, the LLM will return unstructured text.
+    """
 
     NAME: str = "llm_feature_interface"  # Must be overridden in subclasses
     RESPONSE_SCHEMA: Optional[dict[str, Any]] = None
@@ -39,15 +59,29 @@ def generate_prompt(self, sketch: Sketch, **kwargs: Any) -> str:
 
     @abstractmethod
     def process_response(self, llm_response: str, **kwargs: Any) -> dict[str, Any]:
-        """Processes the raw LLM response.
+        """Processes the LLM response and formats it for API consumption.
+
+        This method takes the response from the LLM provider and transforms it into
+        a structured format to be returned to the frontend through the API. The
+        response handling varies depending on whether RESPONSE_SCHEMA is defined:
+
+        - If RESPONSE_SCHEMA is None: Typically receives a string response
+        - If RESPONSE_SCHEMA is defined: Typically receives a structured dict
+
+        The returned dictionary defines the data contract with the frontend, which will
+        use these fields to render the appropriate UI elements.
 
         Args:
-            llm_response:  The raw string response from the LLM provider.
-            kwargs: Feature-specific arguments.
+            llm_response: The response from the LLM provider. This may be a
+                        string or a structured dict depending on RESPONSE_SCHEMA.
+            **kwargs: Additional data needed for processing, which may include:
+                    - sketch_id: The ID of the sketch
+                    - sketch: The Sketch object
 
         Returns:
-            A dictionary containing the processed response data, suitable for
-            returning from the API.  Must include a "response" key with the
-            main result, and can optionally include other metadata.
+            A dictionary that will be JSON-serialized and returned through the API.
+            This dictionary defines the data contract with the frontend and must include
+            all fields that the frontend expects to render. Example for NL2Q:
+            - {"name": "AI generated search query", "query_string": "...", "error": null}
         """
         raise NotImplementedError()
diff --git a/timesketch/lib/llms/features/manager.py b/timesketch/lib/llms/features/manager.py
index 70bfac4836..67010a50cf 100644
--- a/timesketch/lib/llms/features/manager.py
+++ b/timesketch/lib/llms/features/manager.py
@@ -13,6 +13,10 @@
 # limitations under the License.
 """Manager for LLM features."""
 
+import os
+import importlib
+import inspect
+import pkgutil
 import logging
 from timesketch.lib.llms.features.interface import LLMFeatureInterface
 
@@ -24,6 +28,37 @@ class FeatureManager:
 
     _feature_registry = {}
 
+    @classmethod
+    def load_llm_features(cls):
+        """Dynamically load and register all LLM features."""
+        features_path = os.path.dirname(os.path.abspath(__file__))
+        cls.clear_registration()
+
+        for _, module_name, _ in pkgutil.iter_modules([features_path]):
+            if module_name in ["interface", "manager"] or module_name.endswith("_test"):
+                continue
+            try:
+                module = importlib.import_module(
+                    f"timesketch.lib.llms.features.{module_name}"
+                )
+                for _, obj in inspect.getmembers(module):
+                    if (
+                        inspect.isclass(obj)
+                        and issubclass(obj, LLMFeatureInterface)
+                        and obj != LLMFeatureInterface
+                    ):
+                        try:
+                            cls.register_feature(obj)
+                        except ValueError as e:
+                            logger.debug("Failed to register feature: %s", str(e))
+
+            except (ImportError, AttributeError) as e:
+                logger.error(
+                    "Error loading LLM feature module %s: %s", module_name, str(e)
+                )
+
+        logger.debug("Loaded %d LLM features", len(cls._feature_registry))
+
     @classmethod
     def register_feature(cls, feature_class: type[LLMFeatureInterface]):
         """Register an LLM feature class."""
@@ -31,7 +66,6 @@ def register_feature(cls, feature_class: type[LLMFeatureInterface]):
         if feature_name in cls._feature_registry:
             raise ValueError(f"LLM Feature {feature_class.NAME} already registered")
         cls._feature_registry[feature_name] = feature_class
-        # Optional: Add logging here
 
     @classmethod
     def get_feature(cls, feature_name: str) -> type[LLMFeatureInterface]:
diff --git a/timesketch/lib/llms/features/manager_test.py b/timesketch/lib/llms/features/manager_test.py
index 83fc7b81b1..7e5c0dd49b 100644
--- a/timesketch/lib/llms/features/manager_test.py
+++ b/timesketch/lib/llms/features/manager_test.py
@@ -13,10 +13,13 @@
 # limitations under the License.
 """Tests for LLM feature manager."""
 
+import mock
+import types
 from typing import Any
 from timesketch.lib.testlib import BaseTest
 from timesketch.lib.llms.features import manager
 from timesketch.models.sketch import Sketch
+from timesketch.lib.llms.features.interface import LLMFeatureInterface
 
 
 class MockSummarizeFeature:
@@ -25,28 +28,48 @@ class MockSummarizeFeature:
     NAME = "llm_summarize"
 
     def generate_prompt(self, _sketch: Sketch, **_kwargs: Any) -> str:
-        """Mock implementation of generate_prompt."""
+        """Mocks implementation of generate_prompt."""
         return "Summarize these events."
 
     def process_response(self, llm_response: str, **_kwargs: Any) -> dict[str, Any]:
-        """Mock implementation of process_response."""
+        """Mocks implementation of process_response."""
         return {"response": f"Summary: {llm_response}"}
 
 
-class MockNl2qFeature:
+class MockNl2qFeature(LLMFeatureInterface):
     """A mock Natural Language to Query feature."""
 
     NAME = "nl2q"
 
     def generate_prompt(self, _sketch: Sketch, **_kwargs: Any) -> str:
-        """Mock implementation of generate_prompt."""
+        """Mocks implementation of generate_prompt."""
         return "Convert this question to a query."
 
     def process_response(self, llm_response: str, **_kwargs: Any) -> dict[str, Any]:
-        """Mock implementation of process_response."""
+        """Mocks implementation of process_response."""
         return {"response": f"Query: {llm_response}"}
 
 
+class MockFeature(LLMFeatureInterface):
+    NAME = "some_feature"
+
+    def generate_prompt(self, *args: Any, **kwargs: Any) -> str:
+        return "some prompt"
+
+    def process_response(self, *args: Any, **kwargs: Any) -> dict:
+        return {"response": "some response"}
+
+
+class DuplicateNl2qFeature(LLMFeatureInterface):
+    NAME = "nl2q"
+
+    def generate_prompt(self, *args: Any, **kwargs: Any) -> str:
+        return "duplicate prompt"
+
+    def process_response(self, *args: Any, **kwargs: Any) -> dict:
+        return {"response": "duplicate response"}
+
+
 class TestFeatureManager(BaseTest):
     """Tests for the functionality of the FeatureManager module."""
 
@@ -61,7 +84,7 @@ def tearDown(self) -> None:
         super().tearDown()
 
     def test_get_features(self):
-        """Test that get_features returns the registered features."""
+        """Tests that get_features returns the registered features."""
         features = manager.FeatureManager.get_features()
         feature_list = list(features)
         self.assertIsInstance(feature_list, list)
@@ -78,7 +101,7 @@ def test_get_features(self):
         self.assertTrue(found_nl2q, "NL2Q feature not found.")
 
     def test_get_feature(self):
-        """Test retrieval of a feature class from the registry."""
+        """Tests retrieval of a feature class from the registry."""
         feature_class = manager.FeatureManager.get_feature("llm_summarize")
         self.assertEqual(feature_class, MockSummarizeFeature)
 
@@ -90,13 +113,13 @@ def test_get_feature(self):
         )
 
     def test_register_feature(self):
-        """Test that re-registering an already registered feature raises ValueError."""
+        """Tests that re-registering an already registered feature raises ValueError."""
         self.assertRaises(
             ValueError, manager.FeatureManager.register_feature, MockSummarizeFeature
         )
 
     def test_get_feature_instance(self):
-        """Test get_feature_instance creates the correct feature instance."""
+        """Tests that get_feature_instance creates the correct feature instance."""
         feature_instance = manager.FeatureManager.get_feature_instance("llm_summarize")
         self.assertIsInstance(feature_instance, MockSummarizeFeature)
 
@@ -108,7 +131,7 @@ def test_get_feature_instance(self):
         )
 
     def test_feature_methods(self):
-        """Test that feature methods work correctly."""
+        """Tests that feature methods work correctly."""
         summarize_instance = manager.FeatureManager.get_feature_instance(
             "llm_summarize"
         )
@@ -133,10 +156,45 @@ def test_feature_methods(self):
         )
 
     def test_clear_registration(self):
-        """Test clear_registration removes all registered features."""
+        """Tests that clear_registration removes all registered features."""
         self.assertEqual(len(list(manager.FeatureManager.get_features())), 2)
 
         manager.FeatureManager.clear_registration()
 
         self.assertEqual(len(list(manager.FeatureManager.get_features())), 0)
         self.assertRaises(KeyError, manager.FeatureManager.get_feature, "llm_summarize")
+
+    @mock.patch("importlib.import_module")
+    @mock.patch("pkgutil.iter_modules", return_value=[(None, "nl2q", False)])
+    def test_load_llm_feature(self, _, mock_import_module) -> None:
+        """Tests that load_llm_feature loads the expected features."""
+        mock_module = types.ModuleType("mock_module")
+        setattr(mock_module, "MockNl2qFeature", MockNl2qFeature)
+        mock_import_module.return_value = mock_module
+
+        manager.FeatureManager.load_llm_features()
+        features = list(manager.FeatureManager.get_features())
+        self.assertEqual(len(features), 1)
+        registered_name, registered_class = features[0]
+        self.assertEqual(registered_name, "nl2q")
+        self.assertEqual(registered_class, MockNl2qFeature)
+        mock_import_module.assert_called_with("timesketch.lib.llms.features.nl2q")
+
+    @mock.patch("importlib.import_module")
+    @mock.patch("pkgutil.iter_modules", return_value=[(None, "nl2q", False)])
+    def test_load_llm_feature_duplicate(self, _, mock_import_module) -> None:
+        """Tests that load_llm_feature handles registration of duplciate features."""
+        dummy_module = types.ModuleType("dummy_module")
+        setattr(dummy_module, "MockNl2qFeature", MockNl2qFeature)
+        setattr(dummy_module, "DuplicateNl2qFeature", DuplicateNl2qFeature)
+        mock_import_module.return_value = dummy_module
+
+        with self.assertLogs("timesketch.llm.manager", level="WARNING") as log_cm:
+            manager.FeatureManager.load_llm_features()
+            features = list(manager.FeatureManager.get_features())
+            self.assertEqual(len(features), 1)
+            registered_name, _ = features[0]
+            self.assertEqual(registered_name, "nl2q")
+            self.assertTrue(
+                any("already registered" in message for message in log_cm.output)
+            )

From 290afc68ae51393420b29ac1e0a80df32f960c8f Mon Sep 17 00:00:00 2001
From: itsmvd <mvd@google.com>
Date: Thu, 27 Feb 2025 10:27:53 +0000
Subject: [PATCH 14/63] linter fix

---
 timesketch/lib/llms/features/interface.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/timesketch/lib/llms/features/interface.py b/timesketch/lib/llms/features/interface.py
index ba3d1827b2..09b76e0ec3 100644
--- a/timesketch/lib/llms/features/interface.py
+++ b/timesketch/lib/llms/features/interface.py
@@ -82,6 +82,6 @@ def process_response(self, llm_response: str, **kwargs: Any) -> dict[str, Any]:
             A dictionary that will be JSON-serialized and returned through the API.
             This dictionary defines the data contract with the frontend and must include
             all fields that the frontend expects to render. Example for NL2Q:
-            - {"name": "AI generated search query", "query_string": "...", "error": null}
+            - {"name": "AI generated search query","query_string": "...","error":null}
         """
         raise NotImplementedError()

From fb9b668783c5f280567bacc34dfac7826e7aec62 Mon Sep 17 00:00:00 2001
From: itsmvd <mvd@google.com>
Date: Thu, 27 Feb 2025 10:31:48 +0000
Subject: [PATCH 15/63] linter fixes

---
 timesketch/lib/llms/features/manager_test.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/timesketch/lib/llms/features/manager_test.py b/timesketch/lib/llms/features/manager_test.py
index 7e5c0dd49b..46518ac328 100644
--- a/timesketch/lib/llms/features/manager_test.py
+++ b/timesketch/lib/llms/features/manager_test.py
@@ -13,9 +13,9 @@
 # limitations under the License.
 """Tests for LLM feature manager."""
 
-import mock
 import types
 from typing import Any
+import mock
 from timesketch.lib.testlib import BaseTest
 from timesketch.lib.llms.features import manager
 from timesketch.models.sketch import Sketch
@@ -53,20 +53,20 @@ def process_response(self, llm_response: str, **_kwargs: Any) -> dict[str, Any]:
 class MockFeature(LLMFeatureInterface):
     NAME = "some_feature"
 
-    def generate_prompt(self, *args: Any, **kwargs: Any) -> str:
+    def generate_prompt(self, *_args: Any, **_kwargs: Any) -> str:
         return "some prompt"
 
-    def process_response(self, *args: Any, **kwargs: Any) -> dict:
+    def process_response(self, *_args: Any, **_kwargs: Any) -> dict:
         return {"response": "some response"}
 
 
 class DuplicateNl2qFeature(LLMFeatureInterface):
     NAME = "nl2q"
 
-    def generate_prompt(self, *args: Any, **kwargs: Any) -> str:
+    def generate_prompt(self, *_args: Any, **_kwargs: Any) -> str:
         return "duplicate prompt"
 
-    def process_response(self, *args: Any, **kwargs: Any) -> dict:
+    def process_response(self, *_args: Any, **_kwargs: Any) -> dict:
         return {"response": "duplicate response"}
 
 

From 0858b7f83cf5a039ad6ab3b448cec02534c83604 Mon Sep 17 00:00:00 2001
From: itsmvd <mvd@google.com>
Date: Thu, 27 Feb 2025 10:50:51 +0000
Subject: [PATCH 16/63] linter fixes

---
 timesketch/lib/llms/features/manager_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/timesketch/lib/llms/features/manager_test.py b/timesketch/lib/llms/features/manager_test.py
index 46518ac328..a74c46cef0 100644
--- a/timesketch/lib/llms/features/manager_test.py
+++ b/timesketch/lib/llms/features/manager_test.py
@@ -189,7 +189,7 @@ def test_load_llm_feature_duplicate(self, _, mock_import_module) -> None:
         setattr(dummy_module, "DuplicateNl2qFeature", DuplicateNl2qFeature)
         mock_import_module.return_value = dummy_module
 
-        with self.assertLogs("timesketch.llm.manager", level="WARNING") as log_cm:
+        with self.assertLogs("timesketch.llm.manager", level="DEBUG") as log_cm:
             manager.FeatureManager.load_llm_features()
             features = list(manager.FeatureManager.get_features())
             self.assertEqual(len(features), 1)

From 1bcd2b1889c344c47e295f4ef7acca6833cdb18d Mon Sep 17 00:00:00 2001
From: itsmvd <mvd@google.com>
Date: Thu, 27 Feb 2025 13:13:57 +0000
Subject: [PATCH 17/63] Introduce LLMResource API method, tests, and add it as
 a method for the frontend

---
 timesketch/api/v1/resources/llm.py            | 239 ++++++++++++++++++
 timesketch/api/v1/resources_test.py           | 131 +++++++++-
 timesketch/api/v1/routes.py                   |   2 +
 .../frontend-ng/src/utils/RestApiClient.js    |   6 +
 4 files changed, 377 insertions(+), 1 deletion(-)
 create mode 100644 timesketch/api/v1/resources/llm.py

diff --git a/timesketch/api/v1/resources/llm.py b/timesketch/api/v1/resources/llm.py
new file mode 100644
index 0000000000..c01eab48dd
--- /dev/null
+++ b/timesketch/api/v1/resources/llm.py
@@ -0,0 +1,239 @@
+# Copyright 2025 Google Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Timesketch API endpoint for interacting with LLM features."""
+import logging
+import multiprocessing
+import multiprocessing.managers
+import time
+
+import prometheus_client
+from flask import request, abort, jsonify
+from flask_login import login_required, current_user
+from flask_restful import Resource
+
+from timesketch.api.v1 import resources
+from timesketch.lib import definitions, utils
+from timesketch.lib.definitions import METRICS_NAMESPACE
+from timesketch.lib.llms.providers import manager as llm_manager
+from timesketch.lib.llms.features import manager as feature_manager
+from timesketch.models.sketch import Sketch
+
+logger = logging.getLogger("timesketch.api.llm")
+
+
+class LLMResource(resources.ResourceMixin, Resource):
+    """Resource to interact with LLMs."""
+
+    METRICS = {
+        "llm_requests_total": prometheus_client.Counter(
+            "llm_requests_total",
+            "Total number of LLM requests received",
+            ["sketch_id", "feature"],
+            namespace=METRICS_NAMESPACE,
+        ),
+        "llm_errors_total": prometheus_client.Counter(
+            "llm_errors_total",
+            "Total number of errors during LLM processing",
+            ["sketch_id", "feature", "error_type"],
+            namespace=METRICS_NAMESPACE,
+        ),
+        "llm_duration_seconds": prometheus_client.Summary(
+            "llm_duration_seconds",
+            "Time taken to process an LLM request (in seconds)",
+            ["sketch_id", "feature"],
+            namespace=METRICS_NAMESPACE,
+        ),
+    }
+
+    _LLM_TIMEOUT_WAIT_SECONDS = 30
+
+    @login_required
+    def post(self, sketch_id: int):
+        """Handles POST requests to the resource."""
+        start_time = time.time()
+        sketch = self._validate_sketch(sketch_id)
+        form = self._validate_request_data()
+        feature = self._get_feature(form.get("feature"))
+
+        self._increment_request_metric(sketch_id, feature.NAME)
+
+        timeline_ids = self._validate_indices(sketch, form.get("filter", {}))
+        prompt = self._generate_prompt(feature, sketch, form, timeline_ids)
+        response = self._execute_llm_call(feature, prompt, sketch_id)
+        result = self._process_llm_response(
+            feature, response, sketch, form, timeline_ids
+        )
+
+        self._record_duration(sketch_id, feature.NAME, start_time)
+        return jsonify(result)
+
+    def _validate_sketch(self, sketch_id: int) -> Sketch:
+        """Validates sketch existence and user permissions."""
+        sketch = Sketch.get_with_acl(sketch_id)
+        if not sketch:
+            abort(
+                definitions.HTTP_STATUS_CODE_NOT_FOUND, "No sketch found with this ID."
+            )
+        if not sketch.has_permission(current_user, "read"):
+            abort(
+                definitions.HTTP_STATUS_CODE_FORBIDDEN,
+                "User does not have read access to the sketch.",
+            )
+        return sketch
+
+    def _validate_request_data(self) -> dict:
+        """Validates the presence of request JSON data."""
+        form = request.json
+        if not form:
+            abort(
+                definitions.HTTP_STATUS_CODE_BAD_REQUEST,
+                "The POST request requires data",
+            )
+        return form
+
+    def _get_feature(self, feature_name: str) -> feature_manager.LLMFeatureInterface:
+        """Retrieves and validates the requested LLM feature."""
+        if not feature_name:
+            abort(
+                definitions.HTTP_STATUS_CODE_BAD_REQUEST,
+                "The 'feature' parameter is required.",
+            )
+        try:
+            return feature_manager.FeatureManager.get_feature_instance(feature_name)
+        except KeyError:
+            abort(
+                definitions.HTTP_STATUS_CODE_BAD_REQUEST,
+                f"Invalid LLM feature: {feature_name}",
+            )
+
+    def _validate_indices(self, sketch: Sketch, query_filter: dict) -> list:
+        """Extracts and validates timeline IDs from the query filter for a sketch."""
+        all_indices = list({t.searchindex.index_name for t in sketch.timelines})
+        indices = query_filter.get("indices", all_indices)
+        if "_all" in indices:
+            indices = all_indices
+        indices, timeline_ids = utils.get_validated_indices(indices, sketch)
+        if not indices:
+            abort(
+                definitions.HTTP_STATUS_CODE_BAD_REQUEST,
+                "No valid search indices were found.",
+            )
+        return timeline_ids
+
+    def _generate_prompt(
+        self,
+        feature: feature_manager.LLMFeatureInterface,
+        sketch: Sketch,
+        form: dict,
+        timeline_ids: list,
+    ) -> str:
+        """Generates the LLM prompt based on the feature and request data."""
+        try:
+            return feature.generate_prompt(
+                sketch, form=form, datastore=self.datastore, timeline_ids=timeline_ids
+            )
+        except ValueError as e:
+            abort(definitions.HTTP_STATUS_CODE_BAD_REQUEST, str(e))
+
+    def _execute_llm_call(
+        self, feature: feature_manager.LLMFeatureInterface, prompt: str, sketch_id: int
+    ) -> dict:
+        """Executes the LLM call with a timeout using multiprocessing."""
+        with multiprocessing.Manager() as manager:
+            shared_response = manager.dict()
+            process = multiprocessing.Process(
+                target=self._get_content_with_timeout,
+                args=(feature, prompt, shared_response),
+            )
+            process.start()
+            process.join(timeout=self._LLM_TIMEOUT_WAIT_SECONDS)
+
+            if process.is_alive():
+                logger.warning(
+                    "LLM call timed out after %d seconds.",
+                    self._LLM_TIMEOUT_WAIT_SECONDS,
+                )
+                process.terminate()
+                process.join()
+                self.METRICS["llm_errors_total"].labels(
+                    sketch_id=str(sketch_id), feature=feature.NAME, error_type="timeout"
+                ).inc()
+                abort(definitions.HTTP_STATUS_CODE_BAD_REQUEST, "LLM call timed out.")
+
+            response = dict(shared_response)
+            if "error" in response:
+                self.METRICS["llm_errors_total"].labels(
+                    sketch_id=str(sketch_id),
+                    feature=feature.NAME,
+                    error_type="llm_api_error",
+                ).inc()
+                abort(
+                    definitions.HTTP_STATUS_CODE_INTERNAL_SERVER_ERROR,
+                    "Error during LLM processing.",
+                )
+            return response["response"]
+
+    def _process_llm_response(
+        self, feature, response: dict, sketch: Sketch, form: dict, timeline_ids: list
+    ) -> dict:
+        """Processes the LLM response into the final result."""
+        try:
+            return feature.process_response(
+                llm_response=response,
+                form=form,
+                sketch_id=sketch.id,
+                datastore=self.datastore,
+                sketch=sketch,
+                timeline_ids=timeline_ids,
+            )
+        except ValueError as e:
+            self.METRICS["llm_errors_total"].labels(
+                sketch_id=str(sketch.id),
+                feature=feature.NAME,
+                error_type="response_processing",
+            ).inc()
+            abort(definitions.HTTP_STATUS_CODE_BAD_REQUEST, str(e))
+
+    def _increment_request_metric(self, sketch_id: int, feature_name: str) -> None:
+        """Increments the request counter metric."""
+        self.METRICS["llm_requests_total"].labels(
+            sketch_id=str(sketch_id), feature=feature_name
+        ).inc()
+
+    def _record_duration(
+        self, sketch_id: int, feature_name: str, start_time: float
+    ) -> None:
+        """Records the duration of the request."""
+        duration = time.time() - start_time
+        self.METRICS["llm_duration_seconds"].labels(
+            sketch_id=str(sketch_id), feature=feature_name
+        ).observe(duration)
+
+    def _get_content_with_timeout(
+        self,
+        feature: feature_manager.LLMFeatureInterface,
+        prompt: str,
+        shared_response: multiprocessing.managers.DictProxy,
+    ) -> None:
+        """Send a prompt to the LLM and get a response within a process."""
+        try:
+            llm = llm_manager.LLMManager.create_provider(feature_name=feature.NAME)
+            response_schema = (
+                feature.RESPONSE_SCHEMA if hasattr(feature, "RESPONSE_SCHEMA") else None
+            )
+            response = llm.generate(prompt, response_schema=response_schema)
+            shared_response.update({"response": response})
+        except Exception as e:
+            logger.error("Error in LLM call within process: %s", e, exc_info=True)
+            shared_response.update({"error": str(e)})
diff --git a/timesketch/api/v1/resources_test.py b/timesketch/api/v1/resources_test.py
index 14dc3e8bfa..222641e45c 100644
--- a/timesketch/api/v1/resources_test.py
+++ b/timesketch/api/v1/resources_test.py
@@ -33,7 +33,6 @@
 from timesketch.models.sketch import InvestigativeQuestion
 from timesketch.models.sketch import InvestigativeQuestionApproach
 from timesketch.models.sketch import Facet
-
 from timesketch.api.v1.resources import ResourceMixin
 
 
@@ -1692,3 +1691,133 @@ def test_llm_summarize_with_events(self, mock_create_provider):
         self.assertEqual(response.status_code, 200)
         response_data = json.loads(response.get_data(as_text=True))
         self.assertEqual(response_data.get("summary"), "Mock summary from LLM")
+
+
+@mock.patch("timesketch.api.v1.resources.OpenSearchDataStore", MockDataStore)
+class LLMResourceTest(BaseTest):
+    """Test LLMResource."""
+
+    resource_url = "/api/v1/sketches/1/llm/"
+
+    @mock.patch("timesketch.models.sketch.Sketch.get_with_acl")
+    @mock.patch(
+        "timesketch.lib.llms.features.manager.FeatureManager.get_feature_instance"
+    )
+    @mock.patch("timesketch.lib.utils.get_validated_indices")
+    @mock.patch("timesketch.api.v1.resources.llm.LLMResource._execute_llm_call")
+    def test_post_success(
+        self,
+        mock_execute_llm,
+        mock_get_validated_indices,
+        mock_get_feature,
+        mock_get_with_acl,
+    ):
+        """Test a successful POST request to the LLM endpoint."""
+        mock_sketch = mock.MagicMock()
+        mock_sketch.has_permission.return_value = True
+        mock_sketch.id = 1
+        mock_get_with_acl.return_value = mock_sketch
+
+        mock_feature = mock.MagicMock()
+        mock_feature.NAME = "test_feature"
+        mock_feature.generate_prompt.return_value = "test prompt"
+        mock_feature.process_response.return_value = {"result": "test result"}
+        mock_get_feature.return_value = mock_feature
+
+        mock_get_validated_indices.return_value = (["index1"], [1])
+        mock_execute_llm.return_value = {"response": "mock response"}
+
+        self.login()
+        response = self.client.post(
+            self.resource_url,
+            data=json.dumps({"feature": "test_feature", "filter": {}}),
+            content_type="application/json",
+        )
+        self.assertEqual(response.status_code, HTTP_STATUS_CODE_OK)
+        response_data = json.loads(response.get_data(as_text=True))
+        self.assertEqual(response_data, {"result": "test result"})
+
+    def test_post_missing_data(self):
+        """Test POST request with missing data."""
+        self.login()
+        response = self.client.post(
+            self.resource_url,
+            data=json.dumps({"some_param": "some_value"}),
+            content_type="application/json",
+        )
+        self.assertEqual(response.status_code, HTTP_STATUS_CODE_BAD_REQUEST)
+        response_data = json.loads(response.get_data(as_text=True))
+        self.assertIn("The 'feature' parameter is required", response_data["message"])
+
+    @mock.patch("timesketch.models.sketch.Sketch.get_with_acl")
+    def test_post_missing_feature(self, mock_get_with_acl):
+        """Test POST request with no feature parameter."""
+        mock_sketch = mock.MagicMock()
+        mock_sketch.has_permission.return_value = True
+        mock_get_with_acl.return_value = mock_sketch
+
+        self.login()
+        response = self.client.post(
+            self.resource_url,
+            data=json.dumps({"filter": {}}),  # No 'feature' key
+            content_type="application/json",
+        )
+        self.assertEqual(response.status_code, HTTP_STATUS_CODE_BAD_REQUEST)
+        response_data = json.loads(response.get_data(as_text=True))
+        self.assertIn("The 'feature' parameter is required", response_data["message"])
+
+    @mock.patch("timesketch.models.sketch.Sketch.get_with_acl")
+    def test_post_invalid_sketch(self, mock_get_with_acl):
+        """Test POST request with an invalid sketch ID."""
+        mock_get_with_acl.return_value = None
+
+        self.login()
+        response = self.client.post(
+            self.resource_url,
+            data=json.dumps({"feature": "test_feature", "filter": {}}),
+            content_type="application/json",
+        )
+        self.assertEqual(response.status_code, HTTP_STATUS_CODE_NOT_FOUND)
+        response_data = json.loads(response.get_data(as_text=True))
+        self.assertIn("No sketch found with this ID", response_data["message"])
+
+    @mock.patch("timesketch.models.sketch.Sketch.get_with_acl")
+    def test_post_no_permission(self, mock_get_with_acl):
+        """Test POST request when user lacks read permission."""
+        mock_sketch = mock.MagicMock()
+        mock_sketch.has_permission.return_value = False
+        mock_get_with_acl.return_value = mock_sketch
+
+        self.login()
+        response = self.client.post(
+            self.resource_url,
+            data=json.dumps({"feature": "test_feature", "filter": {}}),
+            content_type="application/json",
+        )
+        self.assertEqual(response.status_code, HTTP_STATUS_CODE_FORBIDDEN)
+        response_data = json.loads(response.get_data(as_text=True))
+        self.assertIn(
+            "User does not have read access to the sketch", response_data["message"]
+        )
+
+    @mock.patch("timesketch.models.sketch.Sketch.get_with_acl")
+    @mock.patch(
+        "timesketch.lib.llms.features.manager.FeatureManager.get_feature_instance"
+    )
+    def test_post_invalid_feature(self, mock_get_feature, mock_get_with_acl):
+        """Test POST request with an invalid feature name."""
+        mock_sketch = mock.MagicMock()
+        mock_sketch.has_permission.return_value = True
+        mock_get_with_acl.return_value = mock_sketch
+
+        mock_get_feature.side_effect = KeyError("Invalid feature")
+
+        self.login()
+        response = self.client.post(
+            self.resource_url,
+            data=json.dumps({"feature": "invalid_feature", "filter": {}}),
+            content_type="application/json",
+        )
+        self.assertEqual(response.status_code, HTTP_STATUS_CODE_BAD_REQUEST)
+        response_data = json.loads(response.get_data(as_text=True))
+        self.assertIn("Invalid LLM feature: invalid_feature", response_data["message"])
diff --git a/timesketch/api/v1/routes.py b/timesketch/api/v1/routes.py
index 48ecf6f05a..5bc249ebc5 100644
--- a/timesketch/api/v1/routes.py
+++ b/timesketch/api/v1/routes.py
@@ -78,6 +78,7 @@
 from .resources.unfurl import UnfurlResource
 from .resources.nl2q import Nl2qResource
 from .resources.llm_summarize import LLMSummarizeResource
+from .resources.llm import LLMResource
 from .resources.settings import SystemSettingsResource
 
 from .resources.scenarios import ScenarioTemplateListResource
@@ -204,6 +205,7 @@
     (UnfurlResource, "/unfurl/"),
     (Nl2qResource, "/sketches/<int:sketch_id>/nl2q/"),
     (LLMSummarizeResource, "/sketches/<int:sketch_id>/events/summary/"),
+    (LLMResource, "/sketches/<int:sketch_id>/llm/"),
     (SystemSettingsResource, "/settings/"),
     # Scenario templates
     (ScenarioTemplateListResource, "/scenarios/"),
diff --git a/timesketch/frontend-ng/src/utils/RestApiClient.js b/timesketch/frontend-ng/src/utils/RestApiClient.js
index 36114aef1a..86416ebd33 100644
--- a/timesketch/frontend-ng/src/utils/RestApiClient.js
+++ b/timesketch/frontend-ng/src/utils/RestApiClient.js
@@ -528,4 +528,10 @@ export default {
   getEventSummary(sketchId, formData) {
     return RestApiClient.post('/sketches/' + sketchId + '/events/summary/', formData)
   },
+  llmRequest(sketchId, featureName, formData) {
+    formData = formData || {}
+    formData.feature = featureName
+  
+    return RestApiClient.post(`/sketches/${sketchId}/llm/`, formData)
+  }
 }

From f379b0e96b4fd2df6a2481ac1a55f385567fb232 Mon Sep 17 00:00:00 2001
From: itsmvd <mvd@google.com>
Date: Thu, 27 Feb 2025 13:26:13 +0000
Subject: [PATCH 18/63] linter fix

---
 timesketch/api/v1/resources/llm.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/timesketch/api/v1/resources/llm.py b/timesketch/api/v1/resources/llm.py
index c01eab48dd..edaa3c9691 100644
--- a/timesketch/api/v1/resources/llm.py
+++ b/timesketch/api/v1/resources/llm.py
@@ -234,6 +234,6 @@ def _get_content_with_timeout(
             )
             response = llm.generate(prompt, response_schema=response_schema)
             shared_response.update({"response": response})
-        except Exception as e:
+        except Exception as e: # pylint: disable=broad-except
             logger.error("Error in LLM call within process: %s", e, exc_info=True)
             shared_response.update({"error": str(e)})

From 2e669d0619f893f54d04a94825a4c28992a1bfe9 Mon Sep 17 00:00:00 2001
From: itsmvd <mvd@google.com>
Date: Thu, 27 Feb 2025 13:28:10 +0000
Subject: [PATCH 19/63] linter fix

---
 timesketch/api/v1/resources/llm.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/timesketch/api/v1/resources/llm.py b/timesketch/api/v1/resources/llm.py
index edaa3c9691..d9343144a7 100644
--- a/timesketch/api/v1/resources/llm.py
+++ b/timesketch/api/v1/resources/llm.py
@@ -234,6 +234,6 @@ def _get_content_with_timeout(
             )
             response = llm.generate(prompt, response_schema=response_schema)
             shared_response.update({"response": response})
-        except Exception as e: # pylint: disable=broad-except
+        except Exception as e:  # pylint: disable=broad-except
             logger.error("Error in LLM call within process: %s", e, exc_info=True)
             shared_response.update({"error": str(e)})

From 1e58a282140dced2c56024340948f0203764024b Mon Sep 17 00:00:00 2001
From: itsmvd <mvd@google.com>
Date: Fri, 28 Feb 2025 10:47:38 +0000
Subject: [PATCH 20/63] Address comments from review

---
 timesketch/api/v1/resources/llm.py  | 168 +++++++++++++++++++++++-----
 timesketch/api/v1/resources_test.py |  83 ++++++++++++++
 2 files changed, 226 insertions(+), 25 deletions(-)

diff --git a/timesketch/api/v1/resources/llm.py b/timesketch/api/v1/resources/llm.py
index d9343144a7..e776502aeb 100644
--- a/timesketch/api/v1/resources/llm.py
+++ b/timesketch/api/v1/resources/llm.py
@@ -16,12 +16,10 @@
 import multiprocessing
 import multiprocessing.managers
 import time
-
 import prometheus_client
-from flask import request, abort, jsonify
+from flask import request, abort, jsonify, Response
 from flask_login import login_required, current_user
 from flask_restful import Resource
-
 from timesketch.api.v1 import resources
 from timesketch.lib import definitions, utils
 from timesketch.lib.definitions import METRICS_NAMESPACE
@@ -33,7 +31,12 @@
 
 
 class LLMResource(resources.ResourceMixin, Resource):
-    """Resource to interact with LLMs."""
+    """Resource to interact with LLMs.
+
+    This class provides an API endpoint for accessing and utilizing Large Language
+    Model features within Timesketch. It handles request validation, processing,
+    and response handling, while also monitoring performance metrics.
+    """
 
     METRICS = {
         "llm_requests_total": prometheus_client.Counter(
@@ -55,31 +58,52 @@ class LLMResource(resources.ResourceMixin, Resource):
             namespace=METRICS_NAMESPACE,
         ),
     }
-
+    # TODO(itsmvd): Make this configurable
     _LLM_TIMEOUT_WAIT_SECONDS = 30
 
     @login_required
-    def post(self, sketch_id: int):
-        """Handles POST requests to the resource."""
+    def post(self, sketch_id: int) -> Response:
+        """Handles POST requests to the resource.
+
+        Processes LLM requests, validates inputs, generates prompts,
+        executes LLM calls, and returns the processed results.
+
+        Args:
+            sketch_id: The ID of the sketch to process.
+
+        Returns:
+            A Flask JSON response containing the processed LLM result.
+
+        Raises:
+            HTTP exceptions for various error conditions.
+        """
         start_time = time.time()
         sketch = self._validate_sketch(sketch_id)
         form = self._validate_request_data()
         feature = self._get_feature(form.get("feature"))
-
         self._increment_request_metric(sketch_id, feature.NAME)
-
         timeline_ids = self._validate_indices(sketch, form.get("filter", {}))
         prompt = self._generate_prompt(feature, sketch, form, timeline_ids)
         response = self._execute_llm_call(feature, prompt, sketch_id)
         result = self._process_llm_response(
             feature, response, sketch, form, timeline_ids
         )
-
         self._record_duration(sketch_id, feature.NAME, start_time)
         return jsonify(result)
 
     def _validate_sketch(self, sketch_id: int) -> Sketch:
-        """Validates sketch existence and user permissions."""
+        """Validates sketch existence and user permissions.
+
+        Args:
+            sketch_id: The ID of the sketch to validate.
+
+        Returns:
+            The validated Sketch object.
+
+        Raises:
+            HTTP 404: If the sketch doesn't exist.
+            HTTP 403: If the user doesn't have read access to the sketch.
+        """
         sketch = Sketch.get_with_acl(sketch_id)
         if not sketch:
             abort(
@@ -93,7 +117,14 @@ def _validate_sketch(self, sketch_id: int) -> Sketch:
         return sketch
 
     def _validate_request_data(self) -> dict:
-        """Validates the presence of request JSON data."""
+        """Validates the presence of request JSON data.
+
+        Returns:
+            The validated request data as a dictionary.
+
+        Raises:
+            HTTP 400: If no JSON data is provided in the request.
+        """
         form = request.json
         if not form:
             abort(
@@ -103,7 +134,17 @@ def _validate_request_data(self) -> dict:
         return form
 
     def _get_feature(self, feature_name: str) -> feature_manager.LLMFeatureInterface:
-        """Retrieves and validates the requested LLM feature."""
+        """Retrieves and validates the requested LLM feature.
+
+        Args:
+            feature_name: The name of the LLM feature to retrieve.
+
+        Returns:
+            An instance of the requested LLM feature.
+
+        Raises:
+            HTTP 400: If feature_name is not provided or is invalid.
+        """
         if not feature_name:
             abort(
                 definitions.HTTP_STATUS_CODE_BAD_REQUEST,
@@ -118,7 +159,18 @@ def _get_feature(self, feature_name: str) -> feature_manager.LLMFeatureInterface
             )
 
     def _validate_indices(self, sketch: Sketch, query_filter: dict) -> list:
-        """Extracts and validates timeline IDs from the query filter for a sketch."""
+        """Extracts and validates timeline IDs from the query filter for a sketch.
+
+        Args:
+            sketch: The Sketch object to validate indices for.
+            query_filter: A dictionary containing filter parameters.
+
+        Returns:
+            A list of validated timeline IDs.
+
+        Raises:
+            HTTP 400: If no valid search indices are found.
+        """
         all_indices = list({t.searchindex.index_name for t in sketch.timelines})
         indices = query_filter.get("indices", all_indices)
         if "_all" in indices:
@@ -138,7 +190,20 @@ def _generate_prompt(
         form: dict,
         timeline_ids: list,
     ) -> str:
-        """Generates the LLM prompt based on the feature and request data."""
+        """Generates the LLM prompt based on the feature and request data.
+
+        Args:
+            feature: The LLM feature instance to use.
+            sketch: The Sketch object.
+            form: The request form data.
+            timeline_ids: A list of validated timeline IDs.
+
+        Returns:
+            The generated prompt string for the LLM.
+
+        Raises:
+            HTTP 400: If prompt generation fails.
+        """
         try:
             return feature.generate_prompt(
                 sketch, form=form, datastore=self.datastore, timeline_ids=timeline_ids
@@ -149,7 +214,20 @@ def _generate_prompt(
     def _execute_llm_call(
         self, feature: feature_manager.LLMFeatureInterface, prompt: str, sketch_id: int
     ) -> dict:
-        """Executes the LLM call with a timeout using multiprocessing."""
+        """Executes the LLM call with a timeout using multiprocessing.
+
+        Args:
+            feature: The LLM feature instance to use.
+            prompt: The generated prompt to send to the LLM.
+            sketch_id: The ID of the sketch being processed.
+
+        Returns:
+            The LLM response as a dictionary.
+
+        Raises:
+            HTTP 400: If the LLM call times out.
+            HTTP 500: If an error occurs during LLM processing.
+        """
         with multiprocessing.Manager() as manager:
             shared_response = manager.dict()
             process = multiprocessing.Process(
@@ -158,7 +236,6 @@ def _execute_llm_call(
             )
             process.start()
             process.join(timeout=self._LLM_TIMEOUT_WAIT_SECONDS)
-
             if process.is_alive():
                 logger.warning(
                     "LLM call timed out after %d seconds.",
@@ -169,8 +246,11 @@ def _execute_llm_call(
                 self.METRICS["llm_errors_total"].labels(
                     sketch_id=str(sketch_id), feature=feature.NAME, error_type="timeout"
                 ).inc()
-                abort(definitions.HTTP_STATUS_CODE_BAD_REQUEST, "LLM call timed out.")
-
+                abort(
+                    definitions.HTTP_STATUS_CODE_BAD_REQUEST,
+                    "LLM call timed out, please try again. "
+                    "If this issue persists, contact your administrator.",
+                )
             response = dict(shared_response)
             if "error" in response:
                 self.METRICS["llm_errors_total"].labels(
@@ -180,14 +260,33 @@ def _execute_llm_call(
                 ).inc()
                 abort(
                     definitions.HTTP_STATUS_CODE_INTERNAL_SERVER_ERROR,
-                    "Error during LLM processing.",
+                    f"Error during LLM processing: {response['error']}",
                 )
             return response["response"]
 
     def _process_llm_response(
-        self, feature, response: dict, sketch: Sketch, form: dict, timeline_ids: list
+        self,
+        feature: feature_manager.LLMFeatureInterface,
+        response: dict,
+        sketch: Sketch,
+        form: dict,
+        timeline_ids: list,
     ) -> dict:
-        """Processes the LLM response into the final result."""
+        """Processes the LLM response into the final result.
+
+        Args:
+            feature: The LLM feature instance used.
+            response: The raw LLM response.
+            sketch: The Sketch object.
+            form: The request form data.
+            timeline_ids: A list of validated timeline IDs.
+
+        Returns:
+            The processed LLM response as a dictionary.
+
+        Raises:
+            HTTP 400: If response processing fails.
+        """
         try:
             return feature.process_response(
                 llm_response=response,
@@ -206,7 +305,12 @@ def _process_llm_response(
             abort(definitions.HTTP_STATUS_CODE_BAD_REQUEST, str(e))
 
     def _increment_request_metric(self, sketch_id: int, feature_name: str) -> None:
-        """Increments the request counter metric."""
+        """Increments the request counter metric.
+
+        Args:
+            sketch_id: The ID of the sketch being processed.
+            feature_name: The name of the LLM feature being used.
+        """
         self.METRICS["llm_requests_total"].labels(
             sketch_id=str(sketch_id), feature=feature_name
         ).inc()
@@ -214,7 +318,13 @@ def _increment_request_metric(self, sketch_id: int, feature_name: str) -> None:
     def _record_duration(
         self, sketch_id: int, feature_name: str, start_time: float
     ) -> None:
-        """Records the duration of the request."""
+        """Records the duration of the request.
+
+        Args:
+            sketch_id: The ID of the sketch being processed.
+            feature_name: The name of the LLM feature being used.
+            start_time: The timestamp when the request started.
+        """
         duration = time.time() - start_time
         self.METRICS["llm_duration_seconds"].labels(
             sketch_id=str(sketch_id), feature=feature_name
@@ -226,7 +336,15 @@ def _get_content_with_timeout(
         prompt: str,
         shared_response: multiprocessing.managers.DictProxy,
     ) -> None:
-        """Send a prompt to the LLM and get a response within a process."""
+        """Send a prompt to the LLM and get a response within a process.
+
+        This method is executed in a separate process to allow for timeout control.
+
+        Args:
+            feature: The LLM feature instance to use.
+            prompt: The generated prompt to send to the LLM.
+            shared_response: A managed dictionary to store the response or error.
+        """
         try:
             llm = llm_manager.LLMManager.create_provider(feature_name=feature.NAME)
             response_schema = (
diff --git a/timesketch/api/v1/resources_test.py b/timesketch/api/v1/resources_test.py
index 222641e45c..64f3b8f045 100644
--- a/timesketch/api/v1/resources_test.py
+++ b/timesketch/api/v1/resources_test.py
@@ -1821,3 +1821,86 @@ def test_post_invalid_feature(self, mock_get_feature, mock_get_with_acl):
         self.assertEqual(response.status_code, HTTP_STATUS_CODE_BAD_REQUEST)
         response_data = json.loads(response.get_data(as_text=True))
         self.assertIn("Invalid LLM feature: invalid_feature", response_data["message"])
+
+    @mock.patch("timesketch.models.sketch.Sketch.get_with_acl")
+    @mock.patch(
+        "timesketch.lib.llms.features.manager.FeatureManager.get_feature_instance"
+    )
+    @mock.patch("timesketch.lib.utils.get_validated_indices")
+    def test_post_prompt_generation_error(
+        self,
+        mock_get_validated_indices,
+        mock_get_feature,
+        mock_get_with_acl,
+    ):
+        """Test handling of errors during prompt generation."""
+        mock_sketch = mock.MagicMock()
+        mock_sketch.has_permission.return_value = True
+        mock_sketch.id = 1
+        mock_get_with_acl.return_value = mock_sketch
+
+        mock_feature = mock.MagicMock()
+        mock_feature.NAME = "test_feature"
+        mock_feature.generate_prompt.side_effect = ValueError(
+            "Prompt generation failed"
+        )
+        mock_get_feature.return_value = mock_feature
+
+        mock_get_validated_indices.return_value = (["index1"], [1])
+
+        self.login()
+        response = self.client.post(
+            self.resource_url,
+            data=json.dumps({"feature": "test_feature", "filter": {}}),
+            content_type="application/json",
+        )
+
+        self.assertEqual(response.status_code, HTTP_STATUS_CODE_BAD_REQUEST)
+        response_data = json.loads(response.get_data(as_text=True))
+        self.assertIn("Prompt generation failed", response_data["message"])
+
+        mock_feature.generate_prompt.assert_called_once()
+
+    @mock.patch("timesketch.models.sketch.Sketch.get_with_acl")
+    @mock.patch(
+        "timesketch.lib.llms.features.manager.FeatureManager.get_feature_instance"
+    )
+    @mock.patch("timesketch.lib.utils.get_validated_indices")
+    @mock.patch("multiprocessing.Process")
+    def test_post_llm_execution_timeout(
+        self,
+        mock_process,
+        mock_get_validated_indices,
+        mock_get_feature,
+        mock_get_with_acl,
+    ):
+        """Test handling of LLM execution timeouts."""
+        # Setup mocks
+        mock_sketch = mock.MagicMock()
+        mock_sketch.has_permission.return_value = True
+        mock_sketch.id = 1
+        mock_get_with_acl.return_value = mock_sketch
+
+        mock_feature = mock.MagicMock()
+        mock_feature.NAME = "test_feature"
+        mock_feature.generate_prompt.return_value = "test prompt"
+        mock_get_feature.return_value = mock_feature
+
+        mock_get_validated_indices.return_value = (["index1"], [1])
+
+        process_instance = mock.MagicMock()
+        process_instance.is_alive.return_value = True
+        mock_process.return_value = process_instance
+
+        self.login()
+        response = self.client.post(
+            self.resource_url,
+            data=json.dumps({"feature": "test_feature", "filter": {}}),
+            content_type="application/json",
+        )
+
+        self.assertEqual(response.status_code, HTTP_STATUS_CODE_BAD_REQUEST)
+        response_data = json.loads(response.get_data(as_text=True))
+        self.assertIn("LLM call timed out", response_data["message"])
+
+        process_instance.terminate.assert_called_once()

From 37481e26fe6fdffb80a8dea6fc03ee1c5e1f5acc Mon Sep 17 00:00:00 2001
From: itsmvd <mvd@google.com>
Date: Fri, 28 Feb 2025 17:00:00 +0000
Subject: [PATCH 21/63] Add nl2q and llm_summarize as LLM features

---
 timesketch/lib/llms/features/llm_summarize.py | 213 +++++++++++++++++
 .../lib/llms/features/llm_summarize_test.py   | 215 ++++++++++++++++++
 timesketch/lib/llms/features/nl2q.py          | 190 ++++++++++++++++
 timesketch/lib/llms/features/nl2q_test.py     | 149 ++++++++++++
 4 files changed, 767 insertions(+)
 create mode 100644 timesketch/lib/llms/features/llm_summarize.py
 create mode 100644 timesketch/lib/llms/features/llm_summarize_test.py
 create mode 100644 timesketch/lib/llms/features/nl2q.py
 create mode 100644 timesketch/lib/llms/features/nl2q_test.py

diff --git a/timesketch/lib/llms/features/llm_summarize.py b/timesketch/lib/llms/features/llm_summarize.py
new file mode 100644
index 0000000000..65402d7087
--- /dev/null
+++ b/timesketch/lib/llms/features/llm_summarize.py
@@ -0,0 +1,213 @@
+# Copyright 2025 Google Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""LLM Summarization feature."""
+import json
+import logging
+from typing import Any, Optional
+import pandas as pd
+from flask import current_app
+from opensearchpy import OpenSearch
+from timesketch.lib import utils
+from timesketch.api.v1 import export
+from timesketch.models.sketch import Sketch
+from timesketch.lib.llms.features.interface import LLMFeatureInterface
+
+logger = logging.getLogger("timesketch.llm.summarize_feature")
+
+
+class LLMSummarizeFeature(LLMFeatureInterface):
+    """LLM Summarization feature."""
+
+    NAME = "llm_summarize"
+    RESPONSE_SCHEMA = {
+        "type": "object",
+        "properties": {"summary": {"type": "string"}},
+        "required": ["summary"],
+    }
+
+    def _get_prompt_text(self, events_dict: list) -> str:
+        """Reads the prompt template from file and injects events.
+
+        Args:
+            events_dict: List of event dictionaries to inject into prompt.
+
+        Returns:
+            str: Complete prompt text with injected events.
+
+        Raises:
+            ValueError: If the prompt path is not configured.
+            FileNotFoundError: If the prompt file cannot be found.
+            IOError: If there's an error reading the prompt file.
+        """
+        prompt_file_path = current_app.config.get("PROMPT_LLM_SUMMARIZATION")
+        if not prompt_file_path:
+            logger.error("PROMPT_LLM_SUMMARIZATION config not set")
+            raise ValueError("LLM summarization prompt path not configured.")
+        try:
+            with open(prompt_file_path, "r", encoding="utf-8") as file_handle:
+                prompt_template = file_handle.read()
+        except FileNotFoundError:
+            logger.error("Prompt file not found: %s", prompt_file_path)
+            raise FileNotFoundError(f"LLM Prompt file not found: {prompt_file_path}")
+        except IOError as e:
+            logger.error("Error reading prompt file: %s", e)
+            raise IOError("Error reading LLM prompt file.") from e
+        prompt_text = prompt_template.replace("<EVENTS_JSON>", json.dumps(events_dict))
+        return prompt_text
+
+    def _run_timesketch_query(
+        self,
+        sketch: Sketch,
+        query_string: str = "*",
+        query_filter: Optional[dict] = None,
+        id_list: Optional[list] = None,
+        datastore: Optional[OpenSearch] = None,
+        timeline_ids: Optional[list] = None,
+    ) -> pd.DataFrame:
+        """Runs a timesketch query and returns results as a DataFrame.
+
+        Args:
+            sketch: The Sketch object to query.
+            query_string: Search query string.
+            query_filter: Dictionary with filter parameters.
+            id_list: List of event IDs to retrieve.
+            datastore: OpenSearch instance for querying.
+            timeline_ids: List of timeline IDs to query.
+
+        Returns:
+            pd.DataFrame: DataFrame containing query results.
+
+        Raises:
+            ValueError: If datastore is not provided or no valid indices are found.
+        """
+        if datastore is None:
+            raise ValueError("Datastore must be provided.")
+        if not query_filter:
+            query_filter = {}
+        if id_list:
+            id_query = " OR ".join([f'_id:"{event_id}"' for event_id in id_list])
+            query_string = id_query
+        all_indices = list({t.searchindex.index_name for t in sketch.timelines})
+        indices_from_filter = query_filter.get("indices", all_indices)
+        if "_all" in indices_from_filter:
+            indices_from_filter = all_indices
+        indices, timeline_ids = utils.get_validated_indices(indices_from_filter, sketch)
+        if not indices:
+            raise ValueError(
+                "No valid search indices were found to perform the search on."
+            )
+        result = datastore.search(
+            sketch_id=sketch.id,
+            query_string=query_string,
+            query_filter=query_filter,
+            query_dsl="",
+            indices=indices,
+            timeline_ids=timeline_ids,
+        )
+        return export.query_results_to_dataframe(result, sketch)
+
+    def generate_prompt(self, sketch: Sketch, **kwargs: Any) -> str:
+        """Generates the summarization prompt based on events from a query.
+
+        Args:
+            sketch: The Sketch object containing events to summarize.
+            **kwargs: Additional arguments including:
+                - form: Form data containing query and filter information.
+                - datastore: OpenSearch instance for querying.
+                - timeline_ids: List of timeline IDs to query.
+
+        Returns:
+            str: Generated prompt text with events to summarize.
+
+        Raises:
+            ValueError: If required parameters are missing or if no events are found.
+        """
+        form = kwargs.get("form")
+        datastore = kwargs.get("datastore")
+        timeline_ids = kwargs.get("timeline_ids")
+        if not form:
+            raise ValueError("Missing 'form' data in kwargs")
+        query_filter = form.get("filter", {})
+        query_string = form.get("query", "*") or "*"
+        events_df = self._run_timesketch_query(
+            sketch,
+            query_string,
+            query_filter,
+            datastore=datastore,
+            timeline_ids=timeline_ids,
+        )
+        if events_df is None or events_df.empty:
+            return "No events to summarize based on the current filter."
+        unique_events_df = events_df[["message"]].drop_duplicates(
+            subset="message", keep="first"
+        )
+        events_dict = unique_events_df.to_dict(orient="records")
+        if not events_dict:
+            return "No events to summarize based on the current filter."
+        return self._get_prompt_text(events_dict)
+
+    def process_response(self, llm_response: Any, **kwargs: Any) -> dict[str, Any]:
+        """Processes the LLM response and adds additional context information.
+
+        Args:
+            llm_response: The response from the LLM model, expected to be a dictionary.
+            **kwargs: Additional arguments including:
+                - sketch_id: ID of the sketch being processed.
+                - sketch: The Sketch object.
+                - datastore: OpenSearch instance for querying.
+                - timeline_ids: List of timeline IDs.
+                - form: Form data containing query and filter information.
+
+        Returns:
+            dict[str, Any]: Dictionary containing the processed response with additional context:
+                - response: The summary text.
+                - summary_event_count: Total number of events summarized.
+                - summary_unique_event_count: Number of unique events summarized.
+
+        Raises:
+            ValueError: If required parameters are missing or if the LLM response
+                        is not in the expected format.
+        """
+        sketch_id = kwargs.get("sketch_id")
+        sketch = kwargs.get("sketch")
+        datastore = kwargs.get("datastore")
+        timeline_ids = kwargs.get("timeline_ids")
+        if not sketch_id:
+            raise ValueError("Missing 'sketch_id' in kwargs")
+        form = kwargs.get("form")
+        if not form:
+            raise ValueError("Missing 'form' data in kwargs")
+        query_filter = form.get("filter", {})
+        query_string = form.get("query", "*")
+        events_df = self._run_timesketch_query(
+            sketch,
+            query_string,
+            query_filter,
+            datastore=datastore,
+            timeline_ids=timeline_ids,
+        )
+        total_events_count = len(events_df)
+        unique_events_count = len(
+            events_df[["message"]].drop_duplicates(subset="message", keep="first")
+        )
+        if not isinstance(llm_response, dict):
+            raise ValueError("LLM response is expected to be a dictionary")
+        summary_text = llm_response.get("summary")
+        if summary_text is None:
+            raise ValueError("LLM response missing 'summary' key")
+        return {
+            "response": summary_text,
+            "summary_event_count": total_events_count,
+            "summary_unique_event_count": unique_events_count,
+        }
diff --git a/timesketch/lib/llms/features/llm_summarize_test.py b/timesketch/lib/llms/features/llm_summarize_test.py
new file mode 100644
index 0000000000..4946f118f0
--- /dev/null
+++ b/timesketch/lib/llms/features/llm_summarize_test.py
@@ -0,0 +1,215 @@
+# Copyright 2025 Google Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tests for the llm_summarize feature."""
+
+import json
+import mock
+import pandas as pd
+from flask import current_app
+from timesketch.lib.testlib import BaseTest
+from timesketch.lib.testlib import MockDataStore
+from timesketch.lib.llms.features.llm_summarize import LLMSummarizeFeature
+
+
+class TestLLMSummarizeFeature(BaseTest):
+    """Tests for the LLMSummarizeFeature."""
+
+    def setUp(self):
+        """Set up the tests."""
+        super().setUp()
+        self.llm_feature = LLMSummarizeFeature()
+        current_app.config["PROMPT_LLM_SUMMARIZATION"] = (
+            "./data/llm_summarize/prompt.txt"
+        )
+        self.datastore = MockDataStore("noserver", 4711)
+
+    @mock.patch(
+        "builtins.open", mock.mock_open(read_data="Analyze these events: <EVENTS_JSON>")
+    )
+    def test_get_prompt_text(self):
+        """Tests _get_prompt_text method."""
+        events_dict = [{"message": "Test event 1"}, {"message": "Test event 2"}]
+        prompt = self.llm_feature._get_prompt_text(events_dict)
+
+        self.assertEqual(prompt, f"Analyze these events: {json.dumps(events_dict)}")
+
+    def test_get_prompt_text_missing_file(self):
+        """Tests _get_prompt_text method with missing file."""
+        current_app.config["PROMPT_LLM_SUMMARIZATION"] = "/file_does_not_exist.txt"
+
+        with self.assertRaises(FileNotFoundError):
+            self.llm_feature._get_prompt_text([])
+
+    def test_get_prompt_text_missing_config(self):
+        """Tests _get_prompt_text method with missing config."""
+        del current_app.config["PROMPT_LLM_SUMMARIZATION"]
+
+        with self.assertRaises(ValueError):
+            self.llm_feature._get_prompt_text([])
+
+    @mock.patch("timesketch.lib.utils.get_validated_indices")
+    def test_run_timesketch_query(self, mock_get_indices):
+        """Tests _run_timesketch_query method."""
+        mock_get_indices.return_value = ["test_index"], [1]
+        result_df = pd.DataFrame([{"message": "Test event"}])
+
+        with mock.patch.object(
+            self.datastore, "search", return_value={"mock": "result"}
+        ) as mock_search:
+            with mock.patch(
+                "timesketch.api.v1.export.query_results_to_dataframe",
+                return_value=result_df,
+            ) as mock_export:
+                df = self.llm_feature._run_timesketch_query(
+                    self.sketch1,
+                    query_string="test query",
+                    query_filter={"filter": "test"},
+                    datastore=self.datastore,
+                )
+
+                self.assertEqual(len(df), 1)
+                self.assertEqual(df.iloc[0]["message"], "Test event")
+                mock_search.assert_called_once()
+                mock_export.assert_called_once()
+
+    def test_run_timesketch_query_no_datastore(self):
+        """Tests _run_timesketch_query method with no datastore."""
+        with self.assertRaises(ValueError):
+            self.llm_feature._run_timesketch_query(self.sketch1)
+
+    @mock.patch("timesketch.lib.utils.get_validated_indices")
+    def test_run_timesketch_query_no_indices(self, mock_get_indices):
+        """Tests _run_timesketch_query method with no valid indices."""
+        mock_get_indices.return_value = [], []
+
+        with self.assertRaises(ValueError):
+            self.llm_feature._run_timesketch_query(
+                self.sketch1, datastore=self.datastore
+            )
+
+    @mock.patch(
+        "timesketch.lib.llms.features.llm_summarize."
+        "LLMSummarizeFeature._run_timesketch_query"
+    )
+    @mock.patch(
+        "timesketch.lib.llms.features.llm_summarize."
+        "LLMSummarizeFeature._get_prompt_text"
+    )
+    def test_generate_prompt(self, mock_get_prompt, mock_run_query):
+        """Tests generate_prompt method."""
+        # Set up mocks
+        mock_run_query.return_value = pd.DataFrame(
+            [
+                {"message": "Test event 1"},
+                {"message": "Test event 2"},
+                {"message": "Test event 1"},  # Add duplicate event on purpose
+            ]
+        )
+        mock_get_prompt.return_value = "Test prompt"
+
+        # Call the method
+        prompt = self.llm_feature.generate_prompt(
+            self.sketch1, form={"query": "test", "filter": {}}, datastore=self.datastore
+        )
+
+        # Verify the result
+        self.assertEqual(prompt, "Test prompt")
+        mock_run_query.assert_called_once()
+        called_events = mock_get_prompt.call_args[0][0]
+        self.assertEqual(len(called_events), 2)
+        self.assertEqual(called_events[0]["message"], "Test event 1")
+        self.assertEqual(called_events[1]["message"], "Test event 2")
+
+    @mock.patch(
+        "timesketch.lib.llms.features.llm_summarize.LLMSummarizeFeature."
+        "_run_timesketch_query"
+    )
+    def test_generate_prompt_no_events(self, mock_run_query):
+        """Tests generate_prompt method with no events."""
+        mock_run_query.return_value = pd.DataFrame()
+
+        prompt = self.llm_feature.generate_prompt(
+            self.sketch1, form={"query": "test", "filter": {}}, datastore=self.datastore
+        )
+
+        self.assertEqual(prompt, "No events to summarize based on the current filter.")
+
+    def test_generate_prompt_missing_form(self):
+        """Tests generate_prompt method with missing form."""
+        with self.assertRaises(ValueError):
+            self.llm_feature.generate_prompt(self.sketch1, datastore=self.datastore)
+
+    @mock.patch(
+        "timesketch.lib.llms.features.llm_summarize.LLMSummarizeFeature."
+        "_run_timesketch_query"
+    )
+    def test_process_response(self, mock_run_query):
+        """Tests process_response method."""
+        mock_run_query.return_value = pd.DataFrame(
+            [
+                {"message": "Test event 1"},
+                {"message": "Test event 2"},
+                {"message": "Test event 1"},  # Add duplicate event on purpose
+            ]
+        )
+
+        result = self.llm_feature.process_response(
+            {"summary": "This is a test summary"},
+            sketch_id=1,
+            sketch=self.sketch1,
+            form={"query": "test", "filter": {}},
+            datastore=self.datastore,
+        )
+
+        self.assertEqual(result["response"], "This is a test summary")
+        self.assertEqual(result["summary_event_count"], 3)
+        self.assertEqual(result["summary_unique_event_count"], 2)
+
+    def test_process_response_missing_params(self):
+        """Tests process_response method with missing parameters."""
+        with self.assertRaises(ValueError):
+            self.llm_feature.process_response(
+                {"summary": "Test"},
+                sketch=self.sketch1,
+                form={"query": "test", "filter": {}},
+                datastore=self.datastore,
+            )
+
+        with self.assertRaises(ValueError):
+            self.llm_feature.process_response(
+                {"summary": "Test"},
+                sketch_id=1,
+                sketch=self.sketch1,
+                datastore=self.datastore,
+            )
+
+    def test_process_response_invalid_response(self):
+        """Tests process_response method with invalid response format."""
+        with self.assertRaises(ValueError):
+            self.llm_feature.process_response(
+                "Not a dict",
+                sketch_id=1,
+                sketch=self.sketch1,
+                form={"query": "test", "filter": {}},
+                datastore=self.datastore,
+            )
+
+        with self.assertRaises(ValueError):
+            self.llm_feature.process_response(
+                {"not_summary": "Test"},
+                sketch_id=1,
+                sketch=self.sketch1,
+                form={"query": "test", "filter": {}},
+                datastore=self.datastore,
+            )
diff --git a/timesketch/lib/llms/features/nl2q.py b/timesketch/lib/llms/features/nl2q.py
new file mode 100644
index 0000000000..bd0aa7d674
--- /dev/null
+++ b/timesketch/lib/llms/features/nl2q.py
@@ -0,0 +1,190 @@
+# Copyright 2025 Google Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Natural language to query (NL2Q) version 1."""
+import logging
+from typing import Any
+import pandas as pd
+from flask import current_app
+from timesketch.api.v1 import utils
+from timesketch.models.sketch import Sketch
+from timesketch.lib.llms.features.interface import LLMFeatureInterface
+
+logger = logging.getLogger("timesketch.llm.nl2q_feature")
+
+
+class Nl2qFeature(LLMFeatureInterface):
+    """NL2Q feature."""
+
+    NAME = "nl2q"
+
+    def _sketch_data_types(self, sketch: Sketch) -> str:
+        """Get the data types for the current sketch.
+
+        Args:
+            sketch: The Sketch object to extract data types from.
+
+        Returns:
+            str: Comma-separated list of data types found in the sketch.
+        """
+        output = []
+        data_type_aggregation = utils.run_aggregator(
+            sketch.id, "field_bucket", {"field": "data_type", "limit": "1000"}
+        )
+        if not data_type_aggregation or not data_type_aggregation[0]:
+            logger.error("Internal problem with the aggregations.")
+            return ""
+        data_types = data_type_aggregation[0].values
+        if not data_types:
+            logger.warning("No data types in the sketch.")
+            return ""
+        for data_type in data_types:
+            output.append(data_type.get("data_type"))
+        return ",".join(output)
+
+    def _data_types_descriptions(self, data_types: str) -> str:
+        """Creates a formatted string of data types and attribute descriptions.
+
+        Args:
+            data_types: Comma-separated list of data types.
+
+        Returns:
+            str: Multi-line string with data types and their field descriptions.
+        """
+        df_data_types = utils.load_csv_file("DATA_TYPES_PATH")
+        if df_data_types.empty:
+            logger.error("No data types description file or the file is empty.")
+            return ""
+        df_short_data_types = pd.DataFrame(
+            df_data_types.groupby("data_type").apply(self._concatenate_values),
+            columns=["fields"],
+        )
+        df_short_data_types["data_type"] = df_short_data_types.index
+        df_short_data_types["data_type"] = df_short_data_types["data_type"].apply(
+            lambda x: x.strip()
+        )
+        df_short_data_types.reset_index(drop=True, inplace=True)
+        output = []
+        for dtype in data_types.split(","):
+            extract = df_short_data_types[
+                df_short_data_types["data_type"] == dtype.strip()
+            ]
+            if extract.empty:
+                logger.warning("'%s' not found in data types", dtype.strip())
+                continue
+            output.append(extract.iloc[0]["fields"])
+        return "\n".join(output)
+
+    def _generate_fields(self, group) -> str:
+        """Generated the fields for a data type.
+
+        Args:
+            group: DataFrame group containing field, type, and description columns.
+
+        Returns:
+            str: Comma-separated list of fields formatted as strings.
+        """
+        return ", ".join(
+            f'"{f}"'
+            for f, t, d in zip(group["field"], group["type"], group["description"])
+        )
+
+    def _concatenate_values(self, group) -> str:
+        """Concatenates the fields for a data type.
+
+        Args:
+            group: DataFrame group with data_type and field information.
+
+        Returns:
+            str: Formatted string with data type and its fields.
+        """
+        return f'* "{group["data_type"].iloc[0]}" -> {self._generate_fields(group)}'
+
+    def _build_prompt(self, question: str, sketch: Sketch) -> str:
+        """Builds the prompt for NL2Q.
+
+        Args:
+            question: Natural language question from the user.
+            sketch: The Sketch object to extract data types from.
+
+        Returns:
+            str: Complete prompt with question, examples, and data types.
+
+        Raises:
+            OSError: If prompt or examples file cannot be opened.
+            IOError: If prompt or examples file cannot be read.
+        """
+        prompt_file = current_app.config.get("PROMPT_NL2Q", "")
+        examples_file = current_app.config.get("EXAMPLES_NL2Q", "")
+        try:
+            with open(prompt_file, "r") as file:
+                prompt = file.read()
+        except (OSError, IOError):
+            logger.error("No prompt file found")
+            raise
+        try:
+            with open(examples_file, "r") as file:
+                examples = file.read()
+        except (OSError, IOError):
+            logger.error("No examples file found")
+            raise  # Re-raise the exception
+        prompt = prompt.format(
+            examples=examples,
+            question=question,
+            data_types=self._data_types_descriptions(self._sketch_data_types(sketch)),
+        )
+        return prompt
+
+    def generate_prompt(self, sketch: Sketch, **kwargs: Any) -> str:
+        """Generates the NL2Q prompt.
+
+        Args:
+            sketch: The Sketch object.
+            kwargs: Must contain 'form' with a 'question' key.
+
+        Returns:
+            str: The generated prompt.
+
+        Raises:
+            ValueError: If the required question is missing from the form data.
+        """
+        form = kwargs.get("form")
+        if not form or "question" not in form:
+            raise ValueError("Missing 'question' in form data")
+        question = form["question"]
+        return self._build_prompt(question, sketch)
+
+    def process_response(self, llm_response: str, **kwargs: Any) -> dict[str, Any]:
+        """Processes the LLM response, extracting the query.
+
+        Args:
+            llm_response: String response from the LLM.
+            kwargs: Additional arguments (not used).
+
+        Returns:
+            dict[str, Any]: Dictionary containing the search query with keys:
+                - name: Name of the generated query
+                - query_string: The actual query string
+                - error: Error message (None if successful)
+
+        Raises:
+            ValueError: If the LLM response is not a string.
+        """
+        if not isinstance(llm_response, str):
+            raise ValueError(f"Unexpected response type from LLM: {type(llm_response)}")
+        result_schema = {
+            "name": "AI generated search query",
+            "query_string": llm_response.strip("`\n\r\t "),
+            "error": None,
+        }
+        return result_schema
diff --git a/timesketch/lib/llms/features/nl2q_test.py b/timesketch/lib/llms/features/nl2q_test.py
new file mode 100644
index 0000000000..684a5f54fc
--- /dev/null
+++ b/timesketch/lib/llms/features/nl2q_test.py
@@ -0,0 +1,149 @@
+# Copyright 2025 Google Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tests for the nl2q feature."""
+
+import mock
+import pandas as pd
+from flask import current_app
+from timesketch.lib.testlib import BaseTest
+from timesketch.lib.llms.features.nl2q import Nl2qFeature
+
+
+class TestNl2qFeature(BaseTest):
+    """Tests for the Nl2qFeature."""
+
+    def setUp(self):
+        """Set up the tests."""
+        super().setUp()
+        self.nl2q_feature = Nl2qFeature()
+        current_app.config["PROMPT_NL2Q"] = "./tests/test_data/nl2q/test_prompt_nl2q"
+        current_app.config["EXAMPLES_NL2Q"] = (
+            "./tests/test_data/nl2q/test_examples_nl2q"
+        )
+
+    @mock.patch("timesketch.lib.llms.features.nl2q.utils.run_aggregator")
+    def test_sketch_data_types(self, mock_aggregator):
+        """Test _sketch_data_types method."""
+        mock_AggregationResult = mock.MagicMock()
+        mock_AggregationResult.values = [
+            {"data_type": "test:data_type:1"},
+            {"data_type": "test:data_type:2"},
+        ]
+        mock_aggregator.return_value = (mock_AggregationResult, {})
+
+        data_types = self.nl2q_feature._sketch_data_types(self.sketch1)
+
+        self.assertEqual(data_types, "test:data_type:1,test:data_type:2")
+        mock_aggregator.assert_called_once_with(
+            self.sketch1.id, "field_bucket", {"field": "data_type", "limit": "1000"}
+        )
+
+    @mock.patch("timesketch.lib.llms.features.nl2q.utils.load_csv_file")
+    def test_data_types_descriptions(self, mock_load_csv):
+        """Test _data_types_descriptions method."""
+        mock_df = pd.DataFrame(
+            {
+                "data_type": [
+                    "test:data_type:1",
+                    "test:data_type:1",
+                    "test:data_type:2",
+                ],
+                "field": ["field_test_1", "field_test_2", "field_test_3"],
+                "type": ["text", "text", "text"],
+                "description": ["desc1", "desc2", "desc3"],
+            }
+        )
+        mock_load_csv.return_value = mock_df
+
+        descriptions = self.nl2q_feature._data_types_descriptions(
+            "test:data_type:1,test:data_type:2"
+        )
+
+        self.assertIn(
+            '* "test:data_type:1" -> "field_test_1", "field_test_2"', descriptions
+        )
+        self.assertIn('* "test:data_type:2" -> "field_test_3"', descriptions)
+
+    @mock.patch("timesketch.lib.llms.features.nl2q.Nl2qFeature._sketch_data_types")
+    @mock.patch(
+        "timesketch.lib.llms.features.nl2q.Nl2qFeature._data_types_descriptions"
+    )
+    def test_build_prompt(self, mock_data_types_desc, mock_sketch_data_types):
+        """Test _build_prompt method."""
+        mock_sketch_data_types.return_value = "test:data_type:1,test:data_type:2"
+        mock_data_types_desc.return_value = (
+            '* "test:data_type:1" -> "field_test_1", "field_test_2"\n'
+            '* "test:data_type:2" -> "field_test_3"'
+        )
+
+        prompt_content = (
+            "Examples:\n{examples}\nTypes:\n{data_types}\nQuestion:\n{question}"
+        )
+        examples_content = "example 1\n\nexample 2"
+
+        m = mock.mock_open()
+        m.side_effect = [
+            mock.mock_open(read_data=prompt_content).return_value,
+            mock.mock_open(read_data=examples_content).return_value,
+        ]
+
+        with mock.patch("builtins.open", m):
+            prompt = self.nl2q_feature._build_prompt("What happened?", self.sketch1)
+
+        self.assertIn("Examples:", prompt)
+        self.assertIn("example 1", prompt)
+        self.assertIn("example 2", prompt)
+        self.assertIn("Types:", prompt)
+        self.assertIn('* "test:data_type:1" -> "field_test_1", "field_test_2"', prompt)
+        self.assertIn('* "test:data_type:2" -> "field_test_3"', prompt)
+        self.assertIn("Question:", prompt)
+        self.assertIn("What happened?", prompt)
+
+    @mock.patch("timesketch.lib.llms.features.nl2q.Nl2qFeature._build_prompt")
+    def test_generate_prompt(self, mock_build_prompt):
+        """Test generate_prompt method."""
+        mock_build_prompt.return_value = "Test prompt"
+
+        prompt = self.nl2q_feature.generate_prompt(
+            self.sketch1, form={"question": "What happened?"}
+        )
+
+        self.assertEqual(prompt, "Test prompt")
+        mock_build_prompt.assert_called_once_with("What happened?", self.sketch1)
+
+    def test_generate_prompt_missing_question(self):
+        """Test generate_prompt method with missing question."""
+        with self.assertRaises(ValueError):
+            self.nl2q_feature.generate_prompt(self.sketch1, form={})
+
+        with self.assertRaises(ValueError):
+            self.nl2q_feature.generate_prompt(self.sketch1)
+
+    def test_process_response(self):
+        """Test process_response method."""
+        result = self.nl2q_feature.process_response("test query")
+        self.assertEqual(result["query_string"], "test query")
+        self.assertIsNone(result["error"])
+
+        result = self.nl2q_feature.process_response(" \t`test query`\n ")
+        self.assertEqual(result["query_string"], "test query")
+
+        result = self.nl2q_feature.process_response("```test query``")
+        self.assertEqual(result["query_string"], "test query")
+
+        result = self.nl2q_feature.process_response(" \t```test query```\n ")
+        self.assertEqual(result["query_string"], "test query")
+
+        with self.assertRaises(ValueError):
+            self.nl2q_feature.process_response(123)

From d028f0f5c5e48a91e57ac958e1bb47aed451a752 Mon Sep 17 00:00:00 2001
From: itsmvd <mvd@google.com>
Date: Fri, 28 Feb 2025 17:08:04 +0000
Subject: [PATCH 22/63] Couple of linter fixes on llm_summarize

---
 timesketch/lib/llms/features/llm_summarize.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/timesketch/lib/llms/features/llm_summarize.py b/timesketch/lib/llms/features/llm_summarize.py
index 65402d7087..786ba0e1b4 100644
--- a/timesketch/lib/llms/features/llm_summarize.py
+++ b/timesketch/lib/llms/features/llm_summarize.py
@@ -57,9 +57,11 @@ def _get_prompt_text(self, events_dict: list) -> str:
         try:
             with open(prompt_file_path, "r", encoding="utf-8") as file_handle:
                 prompt_template = file_handle.read()
-        except FileNotFoundError:
+        except FileNotFoundError as exc:
             logger.error("Prompt file not found: %s", prompt_file_path)
-            raise FileNotFoundError(f"LLM Prompt file not found: {prompt_file_path}")
+            raise FileNotFoundError(
+                f"LLM Prompt file not found: {prompt_file_path}"
+            ) from exc
         except IOError as e:
             logger.error("Error reading prompt file: %s", e)
             raise IOError("Error reading LLM prompt file.") from e
@@ -170,7 +172,7 @@ def process_response(self, llm_response: Any, **kwargs: Any) -> dict[str, Any]:
                 - form: Form data containing query and filter information.
 
         Returns:
-            dict[str, Any]: Dictionary containing the processed response with additional context:
+            Dictionary containing the processed response with additional context:
                 - response: The summary text.
                 - summary_event_count: Total number of events summarized.
                 - summary_unique_event_count: Number of unique events summarized.

From f4471b283ce68cb69b23caac053dd7f7a3e30389 Mon Sep 17 00:00:00 2001
From: itsmvd <mvd@google.com>
Date: Fri, 28 Feb 2025 17:13:17 +0000
Subject: [PATCH 23/63] pylint: disable=protected-access

---
 timesketch/lib/llms/features/llm_summarize_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/timesketch/lib/llms/features/llm_summarize_test.py b/timesketch/lib/llms/features/llm_summarize_test.py
index 4946f118f0..186bad0f1f 100644
--- a/timesketch/lib/llms/features/llm_summarize_test.py
+++ b/timesketch/lib/llms/features/llm_summarize_test.py
@@ -21,7 +21,7 @@
 from timesketch.lib.testlib import MockDataStore
 from timesketch.lib.llms.features.llm_summarize import LLMSummarizeFeature
 
-
+#pylint: disable=protected-access
 class TestLLMSummarizeFeature(BaseTest):
     """Tests for the LLMSummarizeFeature."""
 

From e7e82d4c7afb109d81eccd4e10baf6049aac8d66 Mon Sep 17 00:00:00 2001
From: itsmvd <mvd@google.com>
Date: Fri, 28 Feb 2025 17:15:35 +0000
Subject: [PATCH 24/63] black formatting

---
 timesketch/lib/llms/features/llm_summarize_test.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/timesketch/lib/llms/features/llm_summarize_test.py b/timesketch/lib/llms/features/llm_summarize_test.py
index 186bad0f1f..c103321aff 100644
--- a/timesketch/lib/llms/features/llm_summarize_test.py
+++ b/timesketch/lib/llms/features/llm_summarize_test.py
@@ -21,7 +21,8 @@
 from timesketch.lib.testlib import MockDataStore
 from timesketch.lib.llms.features.llm_summarize import LLMSummarizeFeature
 
-#pylint: disable=protected-access
+
+# pylint: disable=protected-access
 class TestLLMSummarizeFeature(BaseTest):
     """Tests for the LLMSummarizeFeature."""
 

From e171e4d10563bdcf330511cce8730a61a45fc86a Mon Sep 17 00:00:00 2001
From: itsmvd <mvd@google.com>
Date: Fri, 28 Feb 2025 17:20:04 +0000
Subject: [PATCH 25/63] # pylint: disable=protected-access

---
 timesketch/lib/llms/features/nl2q_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/timesketch/lib/llms/features/nl2q_test.py b/timesketch/lib/llms/features/nl2q_test.py
index 684a5f54fc..e3e52d470d 100644
--- a/timesketch/lib/llms/features/nl2q_test.py
+++ b/timesketch/lib/llms/features/nl2q_test.py
@@ -19,7 +19,7 @@
 from timesketch.lib.testlib import BaseTest
 from timesketch.lib.llms.features.nl2q import Nl2qFeature
 
-
+# pylint: disable=protected-access
 class TestNl2qFeature(BaseTest):
     """Tests for the Nl2qFeature."""
 

From 1cf49c073e16ab2cfc96bb0fdcc02f0f412fcfc3 Mon Sep 17 00:00:00 2001
From: itsmvd <mvd@google.com>
Date: Fri, 28 Feb 2025 17:22:54 +0000
Subject: [PATCH 26/63] formatting on nl2q

---
 timesketch/lib/llms/features/nl2q_test.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/timesketch/lib/llms/features/nl2q_test.py b/timesketch/lib/llms/features/nl2q_test.py
index e3e52d470d..c902cad527 100644
--- a/timesketch/lib/llms/features/nl2q_test.py
+++ b/timesketch/lib/llms/features/nl2q_test.py
@@ -19,6 +19,7 @@
 from timesketch.lib.testlib import BaseTest
 from timesketch.lib.llms.features.nl2q import Nl2qFeature
 
+
 # pylint: disable=protected-access
 class TestNl2qFeature(BaseTest):
     """Tests for the Nl2qFeature."""

From 0b3f25105be19b0d5ba7b3bb0e73416bdc351a5b Mon Sep 17 00:00:00 2001
From: itsmvd <mvd@google.com>
Date: Sat, 1 Mar 2025 09:34:33 +0000
Subject: [PATCH 27/63] add feature specific metrics

---
 timesketch/lib/llms/features/llm_summarize.py | 44 ++++++++++++++-----
 1 file changed, 32 insertions(+), 12 deletions(-)

diff --git a/timesketch/lib/llms/features/llm_summarize.py b/timesketch/lib/llms/features/llm_summarize.py
index 786ba0e1b4..1daca5e84d 100644
--- a/timesketch/lib/llms/features/llm_summarize.py
+++ b/timesketch/lib/llms/features/llm_summarize.py
@@ -16,15 +16,33 @@
 import logging
 from typing import Any, Optional
 import pandas as pd
+import prometheus_client
 from flask import current_app
 from opensearchpy import OpenSearch
 from timesketch.lib import utils
 from timesketch.api.v1 import export
 from timesketch.models.sketch import Sketch
+from timesketch.lib.definitions import METRICS_NAMESPACE
 from timesketch.lib.llms.features.interface import LLMFeatureInterface
 
 logger = logging.getLogger("timesketch.llm.summarize_feature")
 
+# TODO(itsmvd): Remove 'feature' prefix after migration
+METRICS = {
+    "llm_summary_events_processed_total": prometheus_client.Counter(
+        "feature_llm_summary_events_processed_total",  # avoid duplicate registration
+        "Total number of events processed for LLM summarization",
+        ["sketch_id"],
+        namespace=METRICS_NAMESPACE,
+    ),
+    "llm_summary_unique_events_total": prometheus_client.Counter(
+        "feature_llm_summary_unique_events_total",  # avoid duplicate registration
+        "Total number of unique events sent to the LLM",
+        ["sketch_id"],
+        namespace=METRICS_NAMESPACE,
+    ),
+}
+
 
 class LLMSummarizeFeature(LLMFeatureInterface):
     """LLM Summarization feature."""
@@ -38,13 +56,10 @@ class LLMSummarizeFeature(LLMFeatureInterface):
 
     def _get_prompt_text(self, events_dict: list) -> str:
         """Reads the prompt template from file and injects events.
-
         Args:
             events_dict: List of event dictionaries to inject into prompt.
-
         Returns:
             str: Complete prompt text with injected events.
-
         Raises:
             ValueError: If the prompt path is not configured.
             FileNotFoundError: If the prompt file cannot be found.
@@ -78,7 +93,6 @@ def _run_timesketch_query(
         timeline_ids: Optional[list] = None,
     ) -> pd.DataFrame:
         """Runs a timesketch query and returns results as a DataFrame.
-
         Args:
             sketch: The Sketch object to query.
             query_string: Search query string.
@@ -86,10 +100,8 @@ def _run_timesketch_query(
             id_list: List of event IDs to retrieve.
             datastore: OpenSearch instance for querying.
             timeline_ids: List of timeline IDs to query.
-
         Returns:
             pd.DataFrame: DataFrame containing query results.
-
         Raises:
             ValueError: If datastore is not provided or no valid indices are found.
         """
@@ -121,17 +133,14 @@ def _run_timesketch_query(
 
     def generate_prompt(self, sketch: Sketch, **kwargs: Any) -> str:
         """Generates the summarization prompt based on events from a query.
-
         Args:
             sketch: The Sketch object containing events to summarize.
             **kwargs: Additional arguments including:
                 - form: Form data containing query and filter information.
                 - datastore: OpenSearch instance for querying.
                 - timeline_ids: List of timeline IDs to query.
-
         Returns:
             str: Generated prompt text with events to summarize.
-
         Raises:
             ValueError: If required parameters are missing or if no events are found.
         """
@@ -151,17 +160,30 @@ def generate_prompt(self, sketch: Sketch, **kwargs: Any) -> str:
         )
         if events_df is None or events_df.empty:
             return "No events to summarize based on the current filter."
+
+        # Count and record total events
+        total_events_count = len(events_df)
+        METRICS["llm_summary_events_processed_total"].labels(
+            sketch_id=str(sketch.id)
+        ).inc(total_events_count)
+
+        # Get unique events, count and record them
         unique_events_df = events_df[["message"]].drop_duplicates(
             subset="message", keep="first"
         )
+        unique_events_count = len(unique_events_df)
+        METRICS["llm_summary_unique_events_total"].labels(sketch_id=str(sketch.id)).inc(
+            unique_events_count
+        )
+
         events_dict = unique_events_df.to_dict(orient="records")
         if not events_dict:
             return "No events to summarize based on the current filter."
+
         return self._get_prompt_text(events_dict)
 
     def process_response(self, llm_response: Any, **kwargs: Any) -> dict[str, Any]:
         """Processes the LLM response and adds additional context information.
-
         Args:
             llm_response: The response from the LLM model, expected to be a dictionary.
             **kwargs: Additional arguments including:
@@ -170,13 +192,11 @@ def process_response(self, llm_response: Any, **kwargs: Any) -> dict[str, Any]:
                 - datastore: OpenSearch instance for querying.
                 - timeline_ids: List of timeline IDs.
                 - form: Form data containing query and filter information.
-
         Returns:
             Dictionary containing the processed response with additional context:
                 - response: The summary text.
                 - summary_event_count: Total number of events summarized.
                 - summary_unique_event_count: Number of unique events summarized.
-
         Raises:
             ValueError: If required parameters are missing or if the LLM response
                         is not in the expected format.

From bb2bc886cf2d946eee4a12b896cefb4cc3a3440c Mon Sep 17 00:00:00 2001
From: itsmvd <mvd@google.com>
Date: Sat, 1 Mar 2025 09:36:56 +0000
Subject: [PATCH 28/63] remove unnecessary comments

---
 timesketch/lib/llms/features/llm_summarize.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/timesketch/lib/llms/features/llm_summarize.py b/timesketch/lib/llms/features/llm_summarize.py
index 1daca5e84d..695a5a3c7b 100644
--- a/timesketch/lib/llms/features/llm_summarize.py
+++ b/timesketch/lib/llms/features/llm_summarize.py
@@ -161,13 +161,11 @@ def generate_prompt(self, sketch: Sketch, **kwargs: Any) -> str:
         if events_df is None or events_df.empty:
             return "No events to summarize based on the current filter."
 
-        # Count and record total events
         total_events_count = len(events_df)
         METRICS["llm_summary_events_processed_total"].labels(
             sketch_id=str(sketch.id)
         ).inc(total_events_count)
 
-        # Get unique events, count and record them
         unique_events_df = events_df[["message"]].drop_duplicates(
             subset="message", keep="first"
         )

From 5bf333e4ba3475390cd21afdb8d3d45bf7476252 Mon Sep 17 00:00:00 2001
From: janosch <jkpr@google.com>
Date: Mon, 3 Mar 2025 10:18:14 +0000
Subject: [PATCH 29/63] fix black linter

---
 setup.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/setup.py b/setup.py
index ac1b357bb8..9018ee4c69 100755
--- a/setup.py
+++ b/setup.py
@@ -14,8 +14,8 @@
 # limitations under the License.
 """This is the setup file for the project. The standard setup rules apply:
 
-   python setup.py build
-   sudo python setup.py install
+python setup.py build
+sudo python setup.py install
 """
 
 from __future__ import print_function

From 68140794fa45609f44cb62aacdac69d13c5f8b40 Mon Sep 17 00:00:00 2001
From: itsmvd <mvd@google.com>
Date: Tue, 4 Mar 2025 09:23:14 +0000
Subject: [PATCH 30/63] review fixes

---
 timesketch/lib/llms/features/llm_summarize.py | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/timesketch/lib/llms/features/llm_summarize.py b/timesketch/lib/llms/features/llm_summarize.py
index 695a5a3c7b..7c38339214 100644
--- a/timesketch/lib/llms/features/llm_summarize.py
+++ b/timesketch/lib/llms/features/llm_summarize.py
@@ -48,16 +48,17 @@ class LLMSummarizeFeature(LLMFeatureInterface):
     """LLM Summarization feature."""
 
     NAME = "llm_summarize"
+    PROMPT_CONFIG_KEY = "PROMPT_LLM_SUMMARIZATION"
     RESPONSE_SCHEMA = {
         "type": "object",
         "properties": {"summary": {"type": "string"}},
         "required": ["summary"],
     }
 
-    def _get_prompt_text(self, events_dict: list) -> str:
+    def _get_prompt_text(self, events: list[dict[str, Any]]) -> str:
         """Reads the prompt template from file and injects events.
         Args:
-            events_dict: List of event dictionaries to inject into prompt.
+            events: List of event dictionaries to inject into prompt.
         Returns:
             str: Complete prompt text with injected events.
         Raises:
@@ -65,10 +66,11 @@ def _get_prompt_text(self, events_dict: list) -> str:
             FileNotFoundError: If the prompt file cannot be found.
             IOError: If there's an error reading the prompt file.
         """
-        prompt_file_path = current_app.config.get("PROMPT_LLM_SUMMARIZATION")
+        prompt_file_path = current_app.config.get(self.PROMPT_CONFIG_KEY)
         if not prompt_file_path:
-            logger.error("PROMPT_LLM_SUMMARIZATION config not set")
+            logger.error("%s config not set", {self.PROMPT_CONFIG_KEY})
             raise ValueError("LLM summarization prompt path not configured.")
+
         try:
             with open(prompt_file_path, "r", encoding="utf-8") as file_handle:
                 prompt_template = file_handle.read()
@@ -80,7 +82,8 @@ def _get_prompt_text(self, events_dict: list) -> str:
         except IOError as e:
             logger.error("Error reading prompt file: %s", e)
             raise IOError("Error reading LLM prompt file.") from e
-        prompt_text = prompt_template.replace("<EVENTS_JSON>", json.dumps(events_dict))
+
+        prompt_text = prompt_template.replace("<EVENTS_JSON>", json.dumps(events))
         return prompt_text
 
     def _run_timesketch_query(
@@ -174,11 +177,11 @@ def generate_prompt(self, sketch: Sketch, **kwargs: Any) -> str:
             unique_events_count
         )
 
-        events_dict = unique_events_df.to_dict(orient="records")
-        if not events_dict:
+        events = unique_events_df.to_dict(orient="records")
+        if not events:
             return "No events to summarize based on the current filter."
 
-        return self._get_prompt_text(events_dict)
+        return self._get_prompt_text(events)
 
     def process_response(self, llm_response: Any, **kwargs: Any) -> dict[str, Any]:
         """Processes the LLM response and adds additional context information.

From 25e7042e464a0574c84ed7fe5583e0c47159a9d8 Mon Sep 17 00:00:00 2001
From: itsmvd <mvd@google.com>
Date: Tue, 4 Mar 2025 10:04:08 +0000
Subject: [PATCH 31/63] Handle incorrect prompt file + test

---
 timesketch/lib/llms/features/llm_summarize.py     | 15 +++++++++++----
 .../lib/llms/features/llm_summarize_test.py       | 13 +++++++++++++
 2 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/timesketch/lib/llms/features/llm_summarize.py b/timesketch/lib/llms/features/llm_summarize.py
index 7c38339214..9a6cf5f457 100644
--- a/timesketch/lib/llms/features/llm_summarize.py
+++ b/timesketch/lib/llms/features/llm_summarize.py
@@ -55,14 +55,14 @@ class LLMSummarizeFeature(LLMFeatureInterface):
         "required": ["summary"],
     }
 
-    def _get_prompt_text(self, events: list[dict[str, Any]]) -> str:
+    def _get_prompt_text(self, events_dict: list[dict[str, Any]]) -> str:
         """Reads the prompt template from file and injects events.
         Args:
-            events: List of event dictionaries to inject into prompt.
+            events_dict: List of event dictionaries to inject into prompt.
         Returns:
             str: Complete prompt text with injected events.
         Raises:
-            ValueError: If the prompt path is not configured.
+            ValueError: If the prompt path is not configured or placeholder is missing.
             FileNotFoundError: If the prompt file cannot be found.
             IOError: If there's an error reading the prompt file.
         """
@@ -83,7 +83,14 @@ def _get_prompt_text(self, events: list[dict[str, Any]]) -> str:
             logger.error("Error reading prompt file: %s", e)
             raise IOError("Error reading LLM prompt file.") from e
 
-        prompt_text = prompt_template.replace("<EVENTS_JSON>", json.dumps(events))
+        if "<EVENTS_JSON>" not in prompt_template:
+            logger.error("Prompt template is missing the <EVENTS_JSON> placeholder")
+            raise ValueError(
+                "LLM summarization prompt template is missing the "
+                "required <EVENTS_JSON> placeholder."
+            )
+
+        prompt_text = prompt_template.replace("<EVENTS_JSON>", json.dumps(events_dict))
         return prompt_text
 
     def _run_timesketch_query(
diff --git a/timesketch/lib/llms/features/llm_summarize_test.py b/timesketch/lib/llms/features/llm_summarize_test.py
index c103321aff..073ca07479 100644
--- a/timesketch/lib/llms/features/llm_summarize_test.py
+++ b/timesketch/lib/llms/features/llm_summarize_test.py
@@ -45,6 +45,19 @@ def test_get_prompt_text(self):
 
         self.assertEqual(prompt, f"Analyze these events: {json.dumps(events_dict)}")
 
+    @mock.patch(
+        "builtins.open",
+        mock.mock_open(read_data="Analyze these events without placeholder"),
+    )
+    def test_get_prompt_text_missing_placeholder(self):
+        """Tests _get_prompt_text method with missing placeholder."""
+        events_dict = [{"message": "Test event"}]
+        with self.assertRaises(ValueError) as context:
+            self.llm_feature._get_prompt_text(events_dict)
+        self.assertIn(
+            "missing the required <EVENTS_JSON> placeholder", str(context.exception)
+        )
+
     def test_get_prompt_text_missing_file(self):
         """Tests _get_prompt_text method with missing file."""
         current_app.config["PROMPT_LLM_SUMMARIZATION"] = "/file_does_not_exist.txt"

From e76ffcfe614ab4e3c3f8475abba8bffb309c6cd6 Mon Sep 17 00:00:00 2001
From: itsmvd <mvd@google.com>
Date: Tue, 4 Mar 2025 10:23:47 +0000
Subject: [PATCH 32/63] frontend: LLM features switch to new llm endpoint

---
 .../src/components/Explore/EventList.vue      |  4 ++--
 .../src/components/Scenarios/QuestionCard.vue | 21 ++++++++++---------
 2 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/timesketch/frontend-ng/src/components/Explore/EventList.vue b/timesketch/frontend-ng/src/components/Explore/EventList.vue
index f9bcd7a793..f7d7de62fe 100644
--- a/timesketch/frontend-ng/src/components/Explore/EventList.vue
+++ b/timesketch/frontend-ng/src/components/Explore/EventList.vue
@@ -957,9 +957,9 @@ export default {
         query: this.currentQueryString,
         filter: this.currentQueryFilter,
       }
-      ApiClient.getEventSummary(this.sketch.id, formData)
+      ApiClient.llmRequest(this.sketch.id, 'llm_summarize', formData)
         .then((response) => {
-          this.$set(this.eventList.meta, 'summary', response.data.summary)
+          this.$set(this.eventList.meta, 'summary', response.data.response)
           this.$set(this.eventList.meta, 'summary_event_count', response.data.summary_event_count)
           this.$set(this.eventList.meta, 'summary_unique_event_count', response.data.summary_unique_event_count)
           this.isSummaryLoading = false
diff --git a/timesketch/frontend-ng/src/components/Scenarios/QuestionCard.vue b/timesketch/frontend-ng/src/components/Scenarios/QuestionCard.vue
index 64f538c08d..40068253e7 100644
--- a/timesketch/frontend-ng/src/components/Scenarios/QuestionCard.vue
+++ b/timesketch/frontend-ng/src/components/Scenarios/QuestionCard.vue
@@ -404,16 +404,17 @@ export default {
   },
   methods: {
     getSuggestedQuery() {
-      this.suggestedQueryLoading = true
-      ApiClient.nl2q(this.sketch.id, this.activeQuestion.display_name)
-        .then((response) => {
-          this.suggestedQuery = response.data
-          this.suggestedQueryLoading = false
-        })
-        .catch((e) => {
-          console.error(e)
-        })
-    },
+        this.suggestedQueryLoading = true
+        let formData = { question: this.activeQuestion.display_name }
+        ApiClient.llmRequest(this.sketch.id, 'nl2q', formData)
+          .then((response) => {
+            this.suggestedQuery = response.data
+            this.suggestedQueryLoading = false
+          })
+          .catch((e) => {
+            console.error(e)
+          })
+      },
     getQuestionTemplates() {
       this.isLoading = true
       ApiClient.getQuestionTemplates()

From a4276eece41e1c405e6c82a6da3453b734093a55 Mon Sep 17 00:00:00 2001
From: itsmvd <mvd@google.com>
Date: Tue, 4 Mar 2025 10:27:17 +0000
Subject: [PATCH 33/63] layout fix

---
 .../src/components/Scenarios/QuestionCard.vue | 22 +++++++++----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/timesketch/frontend-ng/src/components/Scenarios/QuestionCard.vue b/timesketch/frontend-ng/src/components/Scenarios/QuestionCard.vue
index 40068253e7..601a045611 100644
--- a/timesketch/frontend-ng/src/components/Scenarios/QuestionCard.vue
+++ b/timesketch/frontend-ng/src/components/Scenarios/QuestionCard.vue
@@ -404,17 +404,17 @@ export default {
   },
   methods: {
     getSuggestedQuery() {
-        this.suggestedQueryLoading = true
-        let formData = { question: this.activeQuestion.display_name }
-        ApiClient.llmRequest(this.sketch.id, 'nl2q', formData)
-          .then((response) => {
-            this.suggestedQuery = response.data
-            this.suggestedQueryLoading = false
-          })
-          .catch((e) => {
-            console.error(e)
-          })
-      },
+      this.suggestedQueryLoading = true
+      let formData = { question: this.activeQuestion.display_name }
+      ApiClient.llmRequest(this.sketch.id, 'nl2q', formData)
+        .then((response) => {
+          this.suggestedQuery = response.data
+          this.suggestedQueryLoading = false
+        })
+        .catch((e) => {
+          console.error(e)
+        })
+    },
     getQuestionTemplates() {
       this.isLoading = true
       ApiClient.getQuestionTemplates()

From 8895e2a085600852b5a863f7962ffd1ad19defc9 Mon Sep 17 00:00:00 2001
From: itsmvd <mvd@google.com>
Date: Tue, 4 Mar 2025 10:41:16 +0000
Subject: [PATCH 34/63] Remove nl2q & llm_summarize features from the API

---
 timesketch/api/v1/resources/llm_summarize.py  | 373 ------------------
 timesketch/api/v1/resources/nl2q.py           | 226 -----------
 timesketch/api/v1/resources_test.py           | 363 -----------------
 timesketch/api/v1/routes.py                   |   4 -
 .../frontend-ng/src/utils/RestApiClient.js    |   7 -
 timesketch/lib/llms/features/llm_summarize.py |   5 +-
 6 files changed, 2 insertions(+), 976 deletions(-)
 delete mode 100644 timesketch/api/v1/resources/llm_summarize.py
 delete mode 100644 timesketch/api/v1/resources/nl2q.py

diff --git a/timesketch/api/v1/resources/llm_summarize.py b/timesketch/api/v1/resources/llm_summarize.py
deleted file mode 100644
index a5ecebc3b6..0000000000
--- a/timesketch/api/v1/resources/llm_summarize.py
+++ /dev/null
@@ -1,373 +0,0 @@
-# Copyright 2024 Google Inc. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Timesketch API for LLM event summarization."""
-
-import multiprocessing
-import multiprocessing.managers
-import logging
-from typing import Dict, Optional
-import json
-import time
-import pandas as pd
-import prometheus_client
-
-from flask import request, abort, jsonify, current_app
-from flask_login import login_required, current_user
-from flask_restful import Resource
-
-from timesketch.api.v1 import resources, export
-from timesketch.lib import definitions, utils
-from timesketch.lib.llms.providers import manager as provider_manager
-from timesketch.lib.definitions import METRICS_NAMESPACE
-from timesketch.models.sketch import Sketch
-
-logger = logging.getLogger("timesketch.api.llm_summarize")
-
-summary_response_schema = {
-    "type": "object",
-    "properties": {"summary": {"type": "string"}},
-    "required": ["summary"],
-}
-
-# Metrics definitions
-METRICS = {
-    "llm_summary_requests_total": prometheus_client.Counter(
-        "llm_summary_requests_total",
-        "Total number of LLM summarization requests received",
-        ["sketch_id"],
-        namespace=METRICS_NAMESPACE,
-    ),
-    "llm_summary_events_processed_total": prometheus_client.Counter(
-        "llm_summary_events_processed_total",
-        "Total number of events processed for LLM summarization",
-        ["sketch_id"],
-        namespace=METRICS_NAMESPACE,
-    ),
-    "llm_summary_unique_events_total": prometheus_client.Counter(
-        "llm_summary_unique_events_total",
-        "Total number of unique events sent to the LLM",
-        ["sketch_id"],
-        namespace=METRICS_NAMESPACE,
-    ),
-    "llm_summary_errors_total": prometheus_client.Counter(
-        "llm_summary_errors_total",
-        "Total number of errors encountered during LLM summarization",
-        ["sketch_id", "error_type"],
-        namespace=METRICS_NAMESPACE,
-    ),
-    "llm_summary_duration_seconds": prometheus_client.Summary(
-        "llm_summary_duration_seconds",
-        "Time taken to process an LLM summarization request (in seconds)",
-        ["sketch_id"],
-        namespace=METRICS_NAMESPACE,
-    ),
-}
-
-_LLM_TIMEOUT_WAIT_SECONDS = 30
-
-
-class LLMSummarizeResource(resources.ResourceMixin, Resource):
-    """Resource to get LLM summary of events."""
-
-    def _get_prompt_text(self, events_dict: list) -> str:
-        """Reads the prompt template from file and injects events.
-
-        Args:
-            events_dict: A list of dictionaries representing the events to summarize.
-
-        Returns:
-            The prompt text with the events injected.
-
-        Raises:
-             HTTP_STATUS_CODE_INTERNAL_SERVER_ERROR: If prompt template file
-               is not configured, not found, or error when reading it.
-        """
-        prompt_file_path = current_app.config.get("PROMPT_LLM_SUMMARIZATION")
-        if not prompt_file_path:
-            logger.error("PROMPT_LLM_SUMMARIZATION config not set in timesketch.conf")
-            abort(
-                definitions.HTTP_STATUS_CODE_INTERNAL_SERVER_ERROR,
-                "LLM summarization prompt path not configured.",
-            )
-
-        try:
-            with open(prompt_file_path, "r", encoding="utf-8") as file_handle:
-                prompt_template = file_handle.read()
-        except FileNotFoundError:
-            logger.error("Prompt file not found: %s", prompt_file_path)
-            abort(
-                definitions.HTTP_STATUS_CODE_INTERNAL_SERVER_ERROR,
-                "LLM Prompt file not found on the server.",
-            )
-        except IOError as e:
-            logger.error("Error reading prompt file: %s", e)
-            abort(
-                definitions.HTTP_STATUS_CODE_INTERNAL_SERVER_ERROR,
-                "Error reading LLM prompt file.",
-            )
-
-        prompt_text = prompt_template.replace("<EVENTS_JSON>", json.dumps(events_dict))
-        return prompt_text
-
-    @login_required
-    def post(self, sketch_id: int):
-        """Handles POST request to the resource.
-
-        Handler for /api/v1/sketches/:sketch_id/events/summary/
-
-        Args:
-            sketch_id: Integer primary key for a sketch database model.
-
-        Returns:
-            JSON response with event summary, total event count, and unique event count.
-
-        Raises:
-            HTTP_STATUS_CODE_NOT_FOUND: If no sketch is found with the given ID.
-            HTTP_STATUS_CODE_FORBIDDEN: If the user does not
-                have read access to the sketch.
-            HTTP_STATUS_CODE_BAD_REQUEST: If the POST request does not contain data,
-                if no events are found, or if there's an issue getting LLM data.
-            HTTP_STATUS_CODE_INTERNAL_SERVER_ERROR: If LLM provider is not configured.
-        """
-        start_time = time.time()
-        METRICS["llm_summary_requests_total"].labels(sketch_id=str(sketch_id)).inc()
-
-        sketch = Sketch.get_with_acl(sketch_id)
-        if not sketch:
-            abort(
-                definitions.HTTP_STATUS_CODE_NOT_FOUND, "No sketch found with this ID."
-            )
-        if not sketch.has_permission(current_user, "read"):
-            abort(
-                definitions.HTTP_STATUS_CODE_FORBIDDEN,
-                "User does not have read access controls on sketch.",
-            )
-
-        form = request.json
-        if not form:
-            abort(
-                definitions.HTTP_STATUS_CODE_BAD_REQUEST,
-                "The POST request requires data",
-            )
-
-        query_filter = form.get("filter", {})
-        query_string = form.get("query", "*")
-        if not query_string:
-            query_string = "*"
-
-        events_df = self._run_timesketch_query(sketch, query_string, query_filter)
-        if events_df is None or events_df.empty:
-            return jsonify(
-                {"summary": "No events to summarize based on the current filter."}
-            )
-        new_df = events_df[["message"]]
-        unique_df = new_df.drop_duplicates(subset="message", keep="first")
-        events_dict = unique_df.to_dict(orient="records")
-
-        total_events_count = len(new_df)
-        unique_events_count = len(unique_df)
-
-        METRICS["llm_summary_events_processed_total"].labels(
-            sketch_id=str(sketch_id)
-        ).inc(total_events_count)
-        METRICS["llm_summary_unique_events_total"].labels(sketch_id=str(sketch_id)).inc(
-            unique_events_count
-        )
-
-        logger.debug("Summarizing %d events", total_events_count)
-        logger.debug("Reduced to %d unique events", unique_events_count)
-
-        if not events_dict:
-            return jsonify(
-                {"summary": "No events to summarize based on the current filter."}
-            )
-
-        try:
-            prompt_text = self._get_prompt_text(events_dict)
-            # TODO(itsmvd): Change to proper background worker such as celery in future
-            with multiprocessing.Manager() as manager:
-                shared_response = manager.dict()
-                p = multiprocessing.Process(
-                    target=self._get_content_with_timeout,
-                    args=(prompt_text, summary_response_schema, shared_response),
-                )
-                p.start()
-                p.join(timeout=_LLM_TIMEOUT_WAIT_SECONDS)
-
-                if p.is_alive():
-                    logger.warning(
-                        "LLM call timed out after %d seconds.",
-                        _LLM_TIMEOUT_WAIT_SECONDS,
-                    )
-                    p.terminate()
-                    p.join()
-                    METRICS["llm_summary_errors_total"].labels(
-                        sketch_id=str(sketch_id), error_type="timeout"
-                    ).inc()
-                    abort(
-                        definitions.HTTP_STATUS_CODE_BAD_REQUEST,
-                        "LLM call timed out.",
-                    )
-
-                response = dict(shared_response)
-
-        except Exception as e:  # pylint: disable=broad-except
-            logger.error(
-                "Unable to call LLM to process events for summary. Error: %s", e
-            )
-            METRICS["llm_summary_errors_total"].labels(
-                sketch_id=str(sketch_id), error_type="llm_api_error"
-            ).inc()
-            abort(
-                definitions.HTTP_STATUS_CODE_BAD_REQUEST,
-                "Unable to get LLM data, check server configuration for LLM.",
-            )
-
-        if not response or not response.get("summary"):
-            logger.error("No valid summary from LLM.")
-            METRICS["llm_summary_errors_total"].labels(
-                sketch_id=str(sketch_id), error_type="no_summary_error"
-            ).inc()
-            abort(
-                definitions.HTTP_STATUS_CODE_BAD_REQUEST,
-                "No valid summary from LLM.",
-            )
-        summary_text = response.get("summary")
-
-        duration = time.time() - start_time
-        METRICS["llm_summary_duration_seconds"].labels(
-            sketch_id=str(sketch_id)
-        ).observe(duration)
-
-        # TODO: Add runtime seconds
-        return jsonify(
-            {
-                "summary": summary_text,
-                "summary_event_count": total_events_count,
-                "summary_unique_event_count": unique_events_count,
-            }
-        )
-
-    def _get_content_with_timeout(
-        self,
-        prompt: str,
-        response_schema: Optional[dict],
-        shared_response: multiprocessing.managers.DictProxy,
-    ) -> None:
-        """Send a prompt to the LLM and get a response within a process.
-
-        Args:
-            prompt: The prompt to send to the LLM.
-            response_schema: If set, the LLM will attempt to return a structured
-                response that conforms to this schema. If set to None, the LLM
-                will return an unstructured response
-            shared_response: A shared dictionary to store the response.
-        """
-        try:
-            response = self._get_content(prompt, response_schema)
-            shared_response.update(response)
-        except Exception as e:  # pylint: disable=broad-except
-            logger.error("Error in LLM call within process: %s", e)
-            shared_response.update({"error": str(e)})
-
-    def _get_content(
-        self, prompt: str, response_schema: Optional[dict] = None
-    ) -> Optional[Dict]:
-        """Send a prompt to the LLM and get a response.
-
-        Args:
-            prompt: The prompt to send to the LLM.
-            response_schema: If set, the LLM will attempt to return a structured
-                response that conforms to this schema. If set to None, the LLM
-                will return an unstructured response
-
-        Returns:
-            If response_schema is set, a dictionary representing the structured
-            response will be returned. If response_schema is None, the raw text
-            response from the LLM will be returned as a string.
-
-        Raises:
-            HTTP_STATUS_CODE_INTERNAL_SERVER_ERROR: If no LLM provider is defined
-            in the configuration file
-            HTTP_STATUS_CODE_BAD_REQUEST: If an error occurs with the
-                configured LLM provider
-        """
-        try:
-            feature_name = "llm_summarize"
-            llm = provider_manager.LLMManager.create_provider(feature_name=feature_name)
-        except Exception as e:  # pylint: disable=broad-except
-            logger.error("Error LLM Provider: %s", e)
-            abort(
-                definitions.HTTP_STATUS_CODE_BAD_REQUEST,
-                "An error occurred with the configured LLM provider. "
-                "Please check the logs and configuration file.",
-            )
-
-        prediction = llm.generate(prompt, response_schema=response_schema)
-        return prediction
-
-    def _run_timesketch_query(
-        self,
-        sketch: Sketch,
-        query_string: str = "*",
-        query_filter: Optional[dict] = None,
-        id_list: Optional[list] = None,
-    ) -> pd.DataFrame:
-        """Runs a timesketch query.
-
-        Args:
-            sketch: The Sketch object to query.
-            query_string: The query string to use.
-            query_filter: The query filter to use.
-            id_list: A list of event IDs to use.
-
-        Returns:
-            A pandas DataFrame containing the query results.
-
-         Raises:
-            HTTP_STATUS_CODE_BAD_REQUEST: If no valid search indices were found
-                to perform the search on.
-        """
-        if not query_filter:
-            query_filter = {}
-
-        if id_list:
-            id_query = " OR ".join([f'_id:"{event_id}"' for event_id in id_list])
-            query_string = id_query
-
-        all_indices = list({t.searchindex.index_name for t in sketch.timelines})
-        indices = query_filter.get("indices", all_indices)
-
-        if "_all" in indices:
-            indices = all_indices
-
-        indices, timeline_ids = utils.get_validated_indices(indices, sketch)
-
-        if not indices:
-            abort(
-                definitions.HTTP_STATUS_CODE_BAD_REQUEST,
-                "No valid search indices were found to perform the search on.",
-            )
-
-        result = self.datastore.search(
-            sketch_id=sketch.id,
-            query_string=query_string,
-            query_filter=query_filter,
-            query_dsl="",
-            indices=indices,
-            timeline_ids=timeline_ids,
-        )
-
-        return export.query_results_to_dataframe(result, sketch)
diff --git a/timesketch/api/v1/resources/nl2q.py b/timesketch/api/v1/resources/nl2q.py
deleted file mode 100644
index 770d8861ee..0000000000
--- a/timesketch/api/v1/resources/nl2q.py
+++ /dev/null
@@ -1,226 +0,0 @@
-# Copyright 2024 Google Inc. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""Natural language to query (NL2Q) API for version 1 of the Timesketch API."""
-
-import logging
-
-from flask import jsonify
-from flask import request
-from flask import abort
-from flask import current_app
-from flask_restful import Resource
-from flask_login import login_required
-from flask_login import current_user
-
-import pandas as pd
-
-from timesketch.api.v1 import utils
-from timesketch.lib.llms.providers import manager
-from timesketch.lib.definitions import HTTP_STATUS_CODE_BAD_REQUEST
-from timesketch.lib.definitions import HTTP_STATUS_CODE_INTERNAL_SERVER_ERROR
-from timesketch.lib.definitions import HTTP_STATUS_CODE_NOT_FOUND
-from timesketch.lib.definitions import HTTP_STATUS_CODE_FORBIDDEN
-from timesketch.models.sketch import Sketch
-
-
-logger = logging.getLogger("timesketch.api_nl2q")
-
-
-class Nl2qResource(Resource):
-    """Resource to get NL2Q prediction."""
-
-    def build_prompt(self, question, sketch_id):
-        """Builds the prompt.
-
-        Args:
-          sketch_id: Sketch ID.
-
-        Return:
-          String containing the whole prompt.
-        """
-        prompt = ""
-        examples = ""
-        prompt_file = current_app.config.get("PROMPT_NL2Q", "")
-        examples_file = current_app.config.get("EXAMPLES_NL2Q", "")
-        try:
-            with open(prompt_file, "r") as file:
-                prompt = file.read()
-        except (OSError, IOError):
-            abort(HTTP_STATUS_CODE_INTERNAL_SERVER_ERROR, "No prompt file found")
-        try:
-            with open(examples_file, "r") as file:
-                examples = file.read()
-        except (OSError, IOError):
-            abort(HTTP_STATUS_CODE_INTERNAL_SERVER_ERROR, "No examples file found")
-        prompt = prompt.format(
-            examples=examples,
-            question=question,
-            data_types=self.data_types_descriptions(self.sketch_data_types(sketch_id)),
-        )
-        return prompt
-
-    def sketch_data_types(self, sketch_id):
-        """Get the data types for the current sketch.
-
-        Args:
-          sketch_id: Sketch ID.
-
-        Returns:
-          List of data types in a sketch.
-        """
-        output = []
-        sketch = Sketch.get_with_acl(sketch_id)
-        if not sketch:
-            abort(HTTP_STATUS_CODE_NOT_FOUND, "No sketch found with this ID.")
-
-        if not sketch.has_permission(current_user, "read"):
-            abort(
-                HTTP_STATUS_CODE_FORBIDDEN, "User does not have read access to sketch"
-            )
-
-        data_type_aggregation = utils.run_aggregator(
-            sketch_id, "field_bucket", {"field": "data_type", "limit": "1000"}
-        )
-
-        if not data_type_aggregation or not data_type_aggregation[0]:
-            abort(
-                HTTP_STATUS_CODE_INTERNAL_SERVER_ERROR,
-                "Internal problem with the aggregations.",
-            )
-        data_types = data_type_aggregation[0].values
-        if not data_types:
-            abort(
-                HTTP_STATUS_CODE_INTERNAL_SERVER_ERROR,
-                "No data types in the sketch.",
-            )
-        for data_type in data_types:
-            output.append(data_type.get("data_type"))
-        return ",".join(output)
-
-    def data_types_descriptions(self, data_types):
-        """Creates a formatted string of data types and attribute descriptions.
-
-        Args:
-          data_types: List of data types in the sketch.
-
-        Returns:
-          Formatted string of data types and attribute descriptions.
-        """
-        df_data_types = utils.load_csv_file("DATA_TYPES_PATH")
-        if df_data_types.empty:
-            abort(
-                HTTP_STATUS_CODE_INTERNAL_SERVER_ERROR,
-                "No data types description file or the file is empty.",
-            )
-        df_short_data_types = pd.DataFrame(
-            df_data_types.groupby("data_type").apply(self.concatenate_values),
-            columns=["fields"],
-        )
-        df_short_data_types["data_type"] = df_short_data_types.index
-        df_short_data_types["data_type"] = df_short_data_types["data_type"].apply(
-            lambda x: x.strip()
-        )
-        df_short_data_types.reset_index(drop=True, inplace=True)
-        output = []
-        for dtype in data_types.split(","):
-            extract = df_short_data_types[
-                df_short_data_types["data_type"] == dtype.strip()
-            ]
-            if extract.empty:
-                print(f"'{dtype.strip()}' not found in [{data_types}]")
-                continue
-            output.append(extract.iloc[0]["fields"])
-        return "\n".join(output)
-
-    def generate_fields(self, group):
-        """Generated the fields for a data type.
-
-        Args:
-          group: Data type fields.
-
-        Returns:
-          String of the generated fields.
-        """
-        return ", ".join(
-            f'"{f}"'
-            for f, t, d in zip(group["field"], group["type"], group["description"])
-        )
-
-    def concatenate_values(self, group):
-        """Concatenates the fields for a data type.
-
-        Args:
-          group: Data type fields.
-
-        Returns:
-          String of the concatenated fields.
-        """
-        return f'* "{group["data_type"].iloc[0]}" -> {self.generate_fields(group)}'
-
-    @login_required
-    def post(self, sketch_id):
-        """Handles POST request to the resource.
-
-        Args:
-          sketch_id: Sketch ID.
-
-        Returns:
-            JSON representing the LLM prediction.
-        """
-        form = request.json
-        if not form:
-            abort(HTTP_STATUS_CODE_BAD_REQUEST, "No JSON data provided")
-
-        if "question" not in form:
-            abort(HTTP_STATUS_CODE_BAD_REQUEST, "The 'question' parameter is required!")
-
-        llm_configs = current_app.config.get("LLM_PROVIDER_CONFIGS")
-        if not llm_configs:
-            logger.error("No LLM provider configuration defined.")
-            abort(
-                HTTP_STATUS_CODE_INTERNAL_SERVER_ERROR,
-                "No LLM provider was defined in the main configuration file",
-            )
-
-        question = form.get("question")
-        prompt = self.build_prompt(question, sketch_id)
-
-        result_schema = {
-            "name": "AI generated search query",
-            "query_string": None,
-            "error": None,
-        }
-
-        feature_name = "nl2q"
-        try:
-            llm = manager.LLMManager.create_provider(feature_name=feature_name)
-        except Exception as e:  # pylint: disable=broad-except
-            logger.error("Error LLM Provider: {}".format(e))
-            result_schema["error"] = (
-                "Error loading LLM Provider. Please try again later!"
-            )
-            return jsonify(result_schema)
-
-        try:
-            prediction = llm.generate(prompt)
-        except Exception as e:  # pylint: disable=broad-except
-            logger.error("Error NL2Q prompt: {}".format(e))
-            result_schema["error"] = (
-                "An error occurred generating the query via the defined LLM. "
-                "Please try again later!"
-            )
-            return jsonify(result_schema)
-
-        result_schema["query_string"] = prediction.strip("`\n\r\t ")
-        return jsonify(result_schema)
diff --git a/timesketch/api/v1/resources_test.py b/timesketch/api/v1/resources_test.py
index 39a8701659..5fdee2ed11 100644
--- a/timesketch/api/v1/resources_test.py
+++ b/timesketch/api/v1/resources_test.py
@@ -24,7 +24,6 @@
 from timesketch.lib.definitions import HTTP_STATUS_CODE_NOT_FOUND
 from timesketch.lib.definitions import HTTP_STATUS_CODE_OK
 from timesketch.lib.definitions import HTTP_STATUS_CODE_FORBIDDEN
-from timesketch.lib.definitions import HTTP_STATUS_CODE_INTERNAL_SERVER_ERROR
 from timesketch.lib.testlib import BaseTest
 from timesketch.lib.testlib import MockDataStore
 from timesketch.lib.dfiq import DFIQ
@@ -1285,303 +1284,6 @@ def test_user_get_resource_admin(self):
         self.assertEqual(data["objects"][0]["username"], "test1")
 
 
-class TestNl2qResource(BaseTest):
-    """Test Nl2qResource."""
-
-    resource_url = "/api/v1/sketches/1/nl2q/"
-
-    @mock.patch("timesketch.lib.llms.providers.manager.LLMManager.create_provider")
-    @mock.patch("timesketch.api.v1.utils.run_aggregator")
-    @mock.patch("timesketch.api.v1.resources.OpenSearchDataStore", MockDataStore)
-    def test_nl2q_prompt(self, mock_aggregator, mock_create_provider):
-        """Test the prompt is created correctly."""
-
-        self.login()
-        data = dict(question="Question for LLM?")
-        mock_AggregationResult = mock.MagicMock()
-        mock_AggregationResult.values = [
-            {"data_type": "test:data_type:1"},
-            {"data_type": "test:data_type:2"},
-        ]
-        mock_aggregator.return_value = (mock_AggregationResult, {})
-        mock_llm = mock.Mock()
-        mock_llm.generate.return_value = "LLM generated query"
-        mock_create_provider.return_value = mock_llm
-        response = self.client.post(
-            self.resource_url,
-            data=json.dumps(data),
-            content_type="application/json",
-        )
-        expected_input = (
-            "Examples:\n"
-            "example 1\n"
-            "\n"
-            "example 2\n"
-            "Types:\n"
-            '* "test:data_type:1" -> "field_test_1", "field_test_2"\n'
-            '* "test:data_type:2" -> "field_test_3", "field_test_4"\n'
-            "Question:\n"
-            "Question for LLM?"
-        )
-        mock_llm.generate.assert_called_once_with(expected_input)
-        self.assertEqual(response.status_code, HTTP_STATUS_CODE_OK)
-        self.assertDictEqual(
-            response.json,
-            {
-                "name": "AI generated search query",
-                "query_string": "LLM generated query",
-                "error": None,
-            },
-        )
-
-    @mock.patch("timesketch.api.v1.utils.run_aggregator")
-    @mock.patch("timesketch.api.v1.resources.OpenSearchDataStore", MockDataStore)
-    def test_nl2q_no_prompt(self, mock_aggregator):
-        """Test error when the prompt file is missing or not configured."""
-
-        self.app.config["PROMPT_NL2Q"] = "/file_does_not_exist.txt"
-        self.login()
-        data = dict(question="Question for LLM?")
-        mock_AggregationResult = mock.MagicMock()
-        mock_AggregationResult.values = [
-            {"data_type": "test:data_type:1"},
-            {"data_type": "test:data_type:2"},
-        ]
-        mock_aggregator.return_value = (mock_AggregationResult, {})
-        response = self.client.post(
-            self.resource_url,
-            data=json.dumps(data),
-            content_type="application/json",
-        )
-        self.assertEqual(response.status_code, HTTP_STATUS_CODE_INTERNAL_SERVER_ERROR)
-
-        del self.app.config["PROMPT_NL2Q"]
-        response = self.client.post(
-            self.resource_url,
-            data=json.dumps(data),
-            content_type="application/json",
-        )
-        self.assertEqual(response.status_code, HTTP_STATUS_CODE_INTERNAL_SERVER_ERROR)
-        # data = json.loads(response.get_data(as_text=True))
-        # self.assertIsNotNone(data.get("error"))
-
-    @mock.patch("timesketch.api.v1.utils.run_aggregator")
-    @mock.patch("timesketch.api.v1.resources.OpenSearchDataStore", MockDataStore)
-    def test_nl2q_no_examples(self, mock_aggregator):
-        """Test error when the prompt file is missing or not configured."""
-
-        self.app.config["EXAMPLES_NL2Q"] = "/file_does_not_exist.txt"
-        self.login()
-        data = dict(question="Question for LLM?")
-        mock_AggregationResult = mock.MagicMock()
-        mock_AggregationResult.values = [
-            {"data_type": "test:data_type:1"},
-            {"data_type": "test:data_type:2"},
-        ]
-        mock_aggregator.return_value = (mock_AggregationResult, {})
-        response = self.client.post(
-            self.resource_url,
-            data=json.dumps(data),
-            content_type="application/json",
-        )
-        self.assertEqual(response.status_code, HTTP_STATUS_CODE_INTERNAL_SERVER_ERROR)
-
-        del self.app.config["EXAMPLES_NL2Q"]
-        response = self.client.post(
-            self.resource_url,
-            data=json.dumps(data),
-            content_type="application/json",
-        )
-        self.assertEqual(response.status_code, HTTP_STATUS_CODE_INTERNAL_SERVER_ERROR)
-
-    @mock.patch("timesketch.api.v1.resources.OpenSearchDataStore", MockDataStore)
-    def test_nl2q_no_question(self):
-        """Test nl2q without submitting a question."""
-
-        self.login()
-        data = dict()
-        response = self.client.post(
-            self.resource_url,
-            data=json.dumps(data),
-            content_type="application/json",
-        )
-        self.assertEqual(response.status_code, HTTP_STATUS_CODE_BAD_REQUEST)
-
-    @mock.patch("timesketch.api.v1.utils.run_aggregator")
-    @mock.patch("timesketch.api.v1.resources.OpenSearchDataStore", MockDataStore)
-    def test_nl2q_wrong_llm_provider(self, mock_aggregator):
-        """Test nl2q with llm provider that does not exist."""
-
-        self.app.config["LLM_PROVIDER_CONFIGS"] = {"default": {"DoesNotExists": {}}}
-        self.login()
-        self.login()
-        data = dict(question="Question for LLM?")
-        mock_AggregationResult = mock.MagicMock()
-        mock_AggregationResult.values = [
-            {"data_type": "test:data_type:1"},
-            {"data_type": "test:data_type:2"},
-        ]
-        mock_aggregator.return_value = (mock_AggregationResult, {})
-        response = self.client.post(
-            self.resource_url,
-            data=json.dumps(data),
-            content_type="application/json",
-        )
-        self.assertEqual(response.status_code, HTTP_STATUS_CODE_OK)
-        data = json.loads(response.get_data(as_text=True))
-        self.assertIsNotNone(data.get("error"))
-
-    @mock.patch("timesketch.api.v1.resources.OpenSearchDataStore", MockDataStore)
-    def test_nl2q_no_llm_provider(self):
-        """Test nl2q with no LLM provider configured."""
-
-        if "LLM_PROVIDER_CONFIGS" in self.app.config:
-            del self.app.config["LLM_PROVIDER_CONFIGS"]
-        self.login()
-        data = dict(question="Question for LLM?")
-        response = self.client.post(
-            self.resource_url,
-            data=json.dumps(data),
-            content_type="application/json",
-        )
-        self.assertEqual(response.status_code, HTTP_STATUS_CODE_INTERNAL_SERVER_ERROR)
-
-    @mock.patch("timesketch.api.v1.resources.OpenSearchDataStore", MockDataStore)
-    def test_nl2q_no_sketch(self):
-        """Test the nl2q with non existing sketch."""
-
-        self.login()
-        data = dict(question="Question for LLM?")
-        response = self.client.post(
-            "/api/v1/sketches/9999/nl2q/",
-            data=json.dumps(data),
-            content_type="application/json",
-        )
-        self.assertEqual(response.status_code, HTTP_STATUS_CODE_NOT_FOUND)
-
-    @mock.patch("timesketch.api.v1.resources.OpenSearchDataStore", MockDataStore)
-    def test_nl2q_no_permission(self):
-        """Test the nl2q with no permission on the sketch."""
-
-        self.login()
-        data = dict(question="Question for LLM?")
-        response = self.client.post(
-            "/api/v1/sketches/2/nl2q/",
-            data=json.dumps(data),
-            content_type="application/json",
-        )
-        self.assertEqual(response.status_code, HTTP_STATUS_CODE_FORBIDDEN)
-
-    @mock.patch("timesketch.lib.llms.providers.manager.LLMManager.create_provider")
-    @mock.patch("timesketch.api.v1.utils.run_aggregator")
-    @mock.patch("timesketch.api.v1.resources.OpenSearchDataStore", MockDataStore)
-    def test_nl2q_llm_error(self, mock_aggregator, mock_create_provider):
-        """Test nl2q with llm error."""
-
-        self.login()
-        data = dict(question="Question for LLM?")
-        mock_AggregationResult = mock.MagicMock()
-        mock_AggregationResult.values = [
-            {"data_type": "test:data_type:1"},
-            {"data_type": "test:data_type:2"},
-        ]
-        mock_aggregator.return_value = (mock_AggregationResult, {})
-        mock_llm = mock.Mock()
-        mock_llm.generate.side_effect = Exception("Test exception")
-        mock_create_provider.return_value = mock_llm
-        response = self.client.post(
-            self.resource_url,
-            data=json.dumps(data),
-            content_type="application/json",
-        )
-        self.assertEqual(
-            response.status_code, HTTP_STATUS_CODE_OK
-        )  # Still expect 200 OK with error in JSON
-        data = json.loads(response.get_data(as_text=True))
-        self.assertIsNotNone(data.get("error"))
-
-    @mock.patch("timesketch.lib.llms.providers.manager.LLMManager.create_provider")
-    @mock.patch("timesketch.api.v1.utils.run_aggregator")
-    @mock.patch("timesketch.api.v1.resources.OpenSearchDataStore", MockDataStore)
-    def test_nl2q_strip_back_ticks(self, mock_aggregator, mock_create_provider):
-        """Test the result does not have any back tick."""
-
-        self.login()
-        data = dict(question="Question for LLM?")
-        mock_AggregationResult = mock.MagicMock()
-        mock_AggregationResult.values = [
-            {"data_type": "test:data_type:1"},
-            {"data_type": "test:data_type:2"},
-        ]
-        mock_aggregator.return_value = (mock_AggregationResult, {})
-        expected_input = (
-            "Examples:\n"
-            "example 1\n"
-            "\n"
-            "example 2\n"
-            "Types:\n"
-            '* "test:data_type:1" -> "field_test_1", "field_test_2"\n'
-            '* "test:data_type:2" -> "field_test_3", "field_test_4"\n'
-            "Question:\n"
-            "Question for LLM?"
-        )
-
-        mock_llm_1 = mock.Mock()
-        mock_llm_1.generate.return_value = " \t`LLM generated query`\n "
-        mock_create_provider.return_value = mock_llm_1
-        response = self.client.post(
-            self.resource_url,
-            data=json.dumps(data),
-            content_type="application/json",
-        )
-        mock_llm_1.generate.assert_called_once_with(expected_input)
-        self.assertEqual(response.status_code, HTTP_STATUS_CODE_OK)
-        self.assertDictEqual(
-            response.json,
-            {
-                "name": "AI generated search query",
-                "query_string": "LLM generated query",
-                "error": None,
-            },
-        )
-        mock_llm_2 = mock.Mock()
-        mock_llm_2.generate.return_value = "```LLM generated query``"
-        mock_create_provider.return_value = mock_llm_2
-        response = self.client.post(
-            self.resource_url,
-            data=json.dumps(data),
-            content_type="application/json",
-        )
-        mock_llm_2.generate.assert_called_once_with(expected_input)
-        self.assertEqual(response.status_code, HTTP_STATUS_CODE_OK)
-        self.assertDictEqual(
-            response.json,
-            {
-                "name": "AI generated search query",
-                "query_string": "LLM generated query",
-                "error": None,
-            },
-        )
-        mock_llm_3 = mock.Mock()
-        mock_llm_3.generate.return_value = " \t```LLM generated query```\n "
-        mock_create_provider.return_value = mock_llm_3
-        response = self.client.post(
-            self.resource_url,
-            data=json.dumps(data),
-            content_type="application/json",
-        )
-        mock_llm_3.generate.assert_called_once_with(expected_input)
-        self.assertEqual(response.status_code, HTTP_STATUS_CODE_OK)
-        self.assertDictEqual(
-            response.json,
-            {
-                "name": "AI generated search query",
-                "query_string": "LLM generated query",
-                "error": None,
-            },
-        )
-
-
 class SystemSettingsResourceTest(BaseTest):
     """Test system settings resource."""
 
@@ -1721,71 +1423,6 @@ def test_check_and_run_dfiq_analysis_steps(self, mock_analyzer_manager):
         self.assertFalse(result)
 
 
-class MockLLM:
-    """Mock LLM class for testing."""
-
-    def generate(self):
-        return {"summary": "Mock summary from LLM"}
-
-
-class TestLLMSummarizeResource(BaseTest):
-    """Test LLMSummarizeResource."""
-
-    resource_url = "/api/v1/sketches/1/events/summary/"
-
-    @mock.patch("timesketch.api.v1.resources.OpenSearchDataStore", MockDataStore)
-    def test_llm_summarize_no_events(self):
-        """Test LLM summarizer when no events are returned from the Timesketch query."""
-        self.login()
-        self.app.config["PROMPT_LLM_SUMMARIZATION"] = "data/llm_summarize/prompt.txt"
-
-        with mock.patch(
-            "timesketch.api.v1.resources.llm_summarize.LLMSummarizeResource._run_timesketch_query",  # pylint: disable=line-too-long
-            return_value=pd.DataFrame(),
-        ), mock.patch("timesketch.api.v1.resources.OpenSearchDataStore", MockDataStore):
-            response = self.client.post(
-                self.resource_url,
-                data=json.dumps({"query": "*"}),
-                content_type="application/json",
-            )
-
-        self.assertEqual(response.status_code, 200)
-        response_data = json.loads(response.get_data(as_text=True))
-        self.assertEqual(
-            response_data.get("summary"),
-            "No events to summarize based on the current filter.",
-        )
-
-    @mock.patch("timesketch.api.v1.resources.OpenSearchDataStore", MockDataStore)
-    @mock.patch("timesketch.lib.llms.providers.manager.LLMManager.create_provider")
-    def test_llm_summarize_with_events(self, mock_create_provider):
-        """Test LLM summarizer with events returned and mock LLM."""
-        self.login()
-        self.app.config["PROMPT_LLM_SUMMARIZATION"] = "data/llm_summarize/prompt.txt"
-        mock_create_provider.return_value = MockLLM()
-
-        sample_events = pd.DataFrame([{"message": "Test event message"}])
-
-        with mock.patch(
-            "timesketch.api.v1.resources.llm_summarize.LLMSummarizeResource._run_timesketch_query",  # pylint: disable=line-too-long
-            return_value=sample_events,
-        ), mock.patch(
-            "timesketch.api.v1.resources.llm_summarize.LLMSummarizeResource._get_content",  # pylint: disable=line-too-long
-            return_value={"summary": "Mock summary from LLM"},
-        ), mock.patch(
-            "timesketch.api.v1.resources.OpenSearchDataStore", MockDataStore
-        ):
-            response = self.client.post(
-                self.resource_url,
-                data=json.dumps({"query": "*"}),
-                content_type="application/json",
-            )
-
-        self.assertEqual(response.status_code, 200)
-        response_data = json.loads(response.get_data(as_text=True))
-        self.assertEqual(response_data.get("summary"), "Mock summary from LLM")
-
-
 @mock.patch("timesketch.api.v1.resources.OpenSearchDataStore", MockDataStore)
 class LLMResourceTest(BaseTest):
     """Test LLMResource."""
diff --git a/timesketch/api/v1/routes.py b/timesketch/api/v1/routes.py
index 5bc249ebc5..7a10d8537c 100644
--- a/timesketch/api/v1/routes.py
+++ b/timesketch/api/v1/routes.py
@@ -76,8 +76,6 @@
 from .resources.intelligence import TagMetadataResource
 from .resources.contextlinks import ContextLinkConfigResource
 from .resources.unfurl import UnfurlResource
-from .resources.nl2q import Nl2qResource
-from .resources.llm_summarize import LLMSummarizeResource
 from .resources.llm import LLMResource
 from .resources.settings import SystemSettingsResource
 
@@ -203,8 +201,6 @@
     (TagMetadataResource, "/intelligence/tagmetadata/"),
     (ContextLinkConfigResource, "/contextlinks/"),
     (UnfurlResource, "/unfurl/"),
-    (Nl2qResource, "/sketches/<int:sketch_id>/nl2q/"),
-    (LLMSummarizeResource, "/sketches/<int:sketch_id>/events/summary/"),
     (LLMResource, "/sketches/<int:sketch_id>/llm/"),
     (SystemSettingsResource, "/settings/"),
     # Scenario templates
diff --git a/timesketch/frontend-ng/src/utils/RestApiClient.js b/timesketch/frontend-ng/src/utils/RestApiClient.js
index 86416ebd33..b288e6dbf2 100644
--- a/timesketch/frontend-ng/src/utils/RestApiClient.js
+++ b/timesketch/frontend-ng/src/utils/RestApiClient.js
@@ -521,13 +521,6 @@ export default {
     let formData = { settings: settings }
     return RestApiClient.post('/users/me/settings/', formData)
   },
-  nl2q(sketchId, question) {
-    let formData = { question: question }
-    return RestApiClient.post('/sketches/' + sketchId + '/nl2q/', formData)
-  },
-  getEventSummary(sketchId, formData) {
-    return RestApiClient.post('/sketches/' + sketchId + '/events/summary/', formData)
-  },
   llmRequest(sketchId, featureName, formData) {
     formData = formData || {}
     formData.feature = featureName
diff --git a/timesketch/lib/llms/features/llm_summarize.py b/timesketch/lib/llms/features/llm_summarize.py
index 9a6cf5f457..e776fcb705 100644
--- a/timesketch/lib/llms/features/llm_summarize.py
+++ b/timesketch/lib/llms/features/llm_summarize.py
@@ -27,16 +27,15 @@
 
 logger = logging.getLogger("timesketch.llm.summarize_feature")
 
-# TODO(itsmvd): Remove 'feature' prefix after migration
 METRICS = {
     "llm_summary_events_processed_total": prometheus_client.Counter(
-        "feature_llm_summary_events_processed_total",  # avoid duplicate registration
+        "llm_summary_events_processed_total",
         "Total number of events processed for LLM summarization",
         ["sketch_id"],
         namespace=METRICS_NAMESPACE,
     ),
     "llm_summary_unique_events_total": prometheus_client.Counter(
-        "feature_llm_summary_unique_events_total",  # avoid duplicate registration
+        "llm_summary_unique_events_total",
         "Total number of unique events sent to the LLM",
         ["sketch_id"],
         namespace=METRICS_NAMESPACE,

From 092711fd2bd6e873db646c3078d2c6395741dc11 Mon Sep 17 00:00:00 2001
From: itsmvd <mvd@google.com>
Date: Tue, 4 Mar 2025 13:15:10 +0000
Subject: [PATCH 35/63] remove unused import

---
 timesketch/api/v1/resources_test.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/timesketch/api/v1/resources_test.py b/timesketch/api/v1/resources_test.py
index 5fdee2ed11..13cdcd5816 100644
--- a/timesketch/api/v1/resources_test.py
+++ b/timesketch/api/v1/resources_test.py
@@ -17,7 +17,6 @@
 
 import json
 import mock
-import pandas as pd
 
 from timesketch.lib.definitions import HTTP_STATUS_CODE_BAD_REQUEST
 from timesketch.lib.definitions import HTTP_STATUS_CODE_CREATED

From 4f37d7ffa2a278ed773eea069c0ce4da5db0cc56 Mon Sep 17 00:00:00 2001
From: itsmvd <mvd@google.com>
Date: Wed, 5 Mar 2025 15:44:13 +0000
Subject: [PATCH 36/63] Update RestApiClient in frontend-v3

---
 timesketch/frontend-v3/src/utils/RestApiClient.js | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/timesketch/frontend-v3/src/utils/RestApiClient.js b/timesketch/frontend-v3/src/utils/RestApiClient.js
index f3c8827e00..a08bcc56d2 100644
--- a/timesketch/frontend-v3/src/utils/RestApiClient.js
+++ b/timesketch/frontend-v3/src/utils/RestApiClient.js
@@ -672,8 +672,10 @@ export default {
     let formData = { settings: settings };
     return RestApiClient.post("/users/me/settings/", formData);
   },
-  nl2q(sketchId, question) {
-    let formData = { question: question };
-    return RestApiClient.post("/sketches/" + sketchId + "/nl2q/", formData);
-  },
+  llmRequest(sketchId, featureName, formData) {
+    formData = formData || {}
+    formData.feature = featureName
+  
+    return RestApiClient.post(`/sketches/${sketchId}/llm/`, formData)
+  }
 };

From 6549bbdb7ee68a2848730b7b4fc02823c2649f3e Mon Sep 17 00:00:00 2001
From: itsmvd <mvd@google.com>
Date: Mon, 10 Mar 2025 08:58:28 +0000
Subject: [PATCH 37/63] stash local changes

---
 data/llm_summarize/prompt_forensic_report.txt |  14 +
 timesketch/lib/llms/actions.py                |  74 +++++
 .../lib/llms/features/llm_forensic_report.py  | 305 ++++++++++++++++++
 3 files changed, 393 insertions(+)
 create mode 100644 data/llm_summarize/prompt_forensic_report.txt
 create mode 100644 timesketch/lib/llms/actions.py
 create mode 100644 timesketch/lib/llms/features/llm_forensic_report.py

diff --git a/data/llm_summarize/prompt_forensic_report.txt b/data/llm_summarize/prompt_forensic_report.txt
new file mode 100644
index 0000000000..8dcfa80962
--- /dev/null
+++ b/data/llm_summarize/prompt_forensic_report.txt
@@ -0,0 +1,14 @@
+You are a highly skilled digital forensic analyst. Your task is to analyze a set of security events, which have been identified as potentially significant ("starred events") in a Timesketch investigation.  Based on these events, generate a concise forensic report summary, formatted in Markdown.
+
+Focus on identifying:
+
+* **Incident Overview:**  Provide a brief summary of what appears to have happened based on these events.  What type of incident is suggested (e.g., unauthorized access, malware infection, data breach attempt)?
+* **Key Findings:**  Highlight the most important observations and indicators from the events. Be specific and mention key entities (usernames, IP addresses, file paths, process names) involved.
+* **Timeline of Significant Events (Chronological Order):** Briefly outline the sequence of key actions observed in the starred events.
+* **Potential Impact/Severity:**  Assess the potential impact or severity of the incident based on the available information.
+* **Recommended Next Steps:** Suggest 2-3 concrete next steps for the investigation based on your analysis.
+
+Use bolding (**...**) for key entities and findings.  Format the output as a Markdown document.
+
+Here are the starred events in JSON format:
+<EVENTS_JSON>
\ No newline at end of file
diff --git a/timesketch/lib/llms/actions.py b/timesketch/lib/llms/actions.py
new file mode 100644
index 0000000000..98c14f7676
--- /dev/null
+++ b/timesketch/lib/llms/actions.py
@@ -0,0 +1,74 @@
+# Copyright 2025 Google Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Actions for LLM features in Timesketch."""
+import json
+import logging
+import time
+from typing import Dict, Any
+from timesketch.models import db_session
+from timesketch.models.sketch import Sketch, Story
+
+logger = logging.getLogger("timesketch.llm.actions")
+
+def create_story(
+    sketch: Sketch, 
+    content: str, 
+    title: str = None
+) -> int:
+    """Creates a Timesketch story with the given content.
+    
+    Args:
+        sketch: Sketch object.
+        content: Text content to add to the story.
+        title: Title for the story. If None, a default title with timestamp will be used.
+        
+    Returns:
+        The ID of the newly created story.
+        
+    Raises:
+        ValueError: If there's an error creating the story.
+    """
+    if title is None:
+        title = f"AI Generated Report - {time.strftime('%Y-%m-%d %H:%M')}"
+    
+    try:
+        # Create the story
+        story = Story(
+            title=title,
+            sketch=sketch,
+            user=sketch.user
+        )
+        
+        content_blocks = [
+            {
+                "componentName": "",
+                "componentProps": {},
+                "content": content,
+                "edit": False,
+                "showPanel": False,
+                "isActive": False
+            }
+        ]
+        
+        story.content = json.dumps(content_blocks)
+        
+        db_session.add(story)
+        db_session.commit()
+        
+        logger.info(f"Created story with ID {story.id} for sketch {sketch.id}")
+        return story.id
+        
+    except Exception as e:
+        logger.error(f"Error creating story: {e}")
+        raise ValueError(f"Error creating story: {e}")
diff --git a/timesketch/lib/llms/features/llm_forensic_report.py b/timesketch/lib/llms/features/llm_forensic_report.py
new file mode 100644
index 0000000000..4ff2362076
--- /dev/null
+++ b/timesketch/lib/llms/features/llm_forensic_report.py
@@ -0,0 +1,305 @@
+# Copyright 2025 Google Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""LLM Forensic Report feature."""
+import json
+import logging
+import time
+from typing import Any, Dict, List, Optional
+import pandas as pd
+from flask import current_app
+from opensearchpy import OpenSearch
+from timesketch.lib import utils
+from timesketch.api.v1 import export
+from timesketch.models import db_session
+from timesketch.models.sketch import Sketch
+from timesketch.lib.llms import actions
+from timesketch.lib.llms.features.interface import LLMFeatureInterface
+
+logger = logging.getLogger("timesketch.llm.forensic_report_feature")
+
+class LLMForensicReportFeature(LLMFeatureInterface):
+    """LLM Forensic Report feature."""
+    NAME = "llm_forensic_report"
+    PROMPT_CONFIG_KEY = "PROMPT_LLM_FORENSIC_REPORT"
+    
+    RESPONSE_SCHEMA = {
+        "type": "object",
+        "properties": {
+            "summary": {
+                "type": "string",
+                "description": "Detailed forensic report summary of the events"
+            }
+        },
+        "required": ["summary"]
+    }
+    
+    def _get_prompt_text(self, events_dict: List[Dict[str, Any]]) -> str:
+        """Reads the prompt template from file and injects events.
+        
+        Args:
+            events_dict: List of event dictionaries to inject into prompt.
+            
+        Returns:
+            str: Complete prompt text with injected events.
+            
+        Raises:
+            ValueError: If the prompt path is not configured or placeholder is missing.
+            FileNotFoundError: If the prompt file cannot be found.
+            IOError: If there's an error reading the prompt file.
+        """
+        prompt_file_path = current_app.config.get(self.PROMPT_CONFIG_KEY)
+        if not prompt_file_path:
+            logger.error("%s config not set", self.PROMPT_CONFIG_KEY)
+            raise ValueError("LLM forensic report prompt path not configured.")
+            
+        try:
+            with open(prompt_file_path, "r", encoding="utf-8") as file_handle:
+                prompt_template = file_handle.read()
+        except FileNotFoundError as exc:
+            logger.error("Forensic report prompt file not found: %s", prompt_file_path)
+            raise FileNotFoundError(
+                f"LLM Prompt file not found: {prompt_file_path}"
+            ) from exc
+        except IOError as e:
+            logger.error("Error reading prompt file: %s", e)
+            raise IOError("Error reading LLM prompt file.") from e
+            
+        if "<EVENTS_JSON>" not in prompt_template:
+            logger.error("Prompt template is missing the <EVENTS_JSON> placeholder")
+            raise ValueError(
+                "LLM forensic report prompt template is missing the "
+                "required <EVENTS_JSON> placeholder."
+            )
+            
+        prompt_text = prompt_template.replace("<EVENTS_JSON>", json.dumps(events_dict))
+        return prompt_text
+    
+    def _run_timesketch_query(
+        self,
+        sketch: Sketch,
+        query_string: str = "*",
+        query_filter: Optional[Dict] = None,
+        id_list: Optional[List] = None,
+        datastore: Optional[OpenSearch] = None,
+        timeline_ids: Optional[List] = None,
+    ) -> pd.DataFrame:
+        """Runs a timesketch query and returns results as a DataFrame.
+        
+        Args:
+            sketch: The Sketch object to query.
+            query_string: Search query string.
+            query_filter: Dictionary with filter parameters.
+            id_list: List of event IDs to retrieve.
+            datastore: OpenSearch instance for querying.
+            timeline_ids: List of timeline IDs to query.
+            
+        Returns:
+            pd.DataFrame: DataFrame containing query results.
+            
+        Raises:
+            ValueError: If datastore is not provided or no valid indices are found.
+        """
+        if datastore is None:
+            raise ValueError("Datastore must be provided.")
+            
+        if not query_filter:
+            query_filter = {}
+            
+        if id_list:
+            id_query = " OR ".join([f'_id:"{event_id}"' for event_id in id_list])
+            query_string = id_query
+            
+        all_indices = list({t.searchindex.index_name for t in sketch.timelines})
+        indices_from_filter = query_filter.get("indices", all_indices)
+        
+        if "_all" in indices_from_filter:
+            indices_from_filter = all_indices
+            
+        indices, timeline_ids = utils.get_validated_indices(indices_from_filter, sketch)
+        
+        if not indices:
+            raise ValueError(
+                "No valid search indices were found to perform the search on."
+            )
+            
+        result = datastore.search(
+            sketch_id=sketch.id,
+            query_string=query_string,
+            query_filter=query_filter,
+            query_dsl="",
+            indices=indices,
+            timeline_ids=timeline_ids,
+        )
+        
+        logger.info("Number of hits from datastore search: %d", len(result))
+        return export.query_results_to_dataframe(result, sketch)
+    
+    def generate_prompt(self, sketch: Sketch, **kwargs: Any) -> str:
+        """Generates the forensic report prompt based on events from a query.
+        
+        Args:
+            sketch: The Sketch object containing events to analyze.
+            **kwargs: Additional arguments including:
+                - form: Form data containing query and filter information.
+                - datastore: OpenSearch instance for querying.
+                - timeline_ids: List of timeline IDs to query.
+                
+        Returns:
+            str: Generated prompt text with events to analyze.
+            
+        Raises:
+            ValueError: If required parameters are missing or if no events are found.
+        """
+        form = kwargs.get("form")
+        datastore = kwargs.get("datastore")
+        timeline_ids = kwargs.get("timeline_ids")
+        
+        if not form:
+            raise ValueError("Missing 'form' data in kwargs")
+            
+        query_filter = form.get("filter", {})
+        query_string = form.get("query", "*") or "*"
+        
+        events_df = self._run_timesketch_query(
+            sketch,
+            query_string,
+            query_filter,
+            datastore=datastore,
+            timeline_ids=timeline_ids,
+        )
+        
+        if events_df is None or events_df.empty:
+            return "No events to analyze for forensic report."
+        
+        # Ensure 'datetime' column exists and convert to datetime objects
+        if 'datetime' not in events_df.columns:
+            logger.error("The 'datetime' column is missing in the events DataFrame.")
+            raise ValueError("The 'datetime' column is missing in the events DataFrame.")
+            
+        # Convert 'datetime' column to datetime objects, handling potential errors
+        try:
+            events_df['datetime'] = pd.to_datetime(events_df['datetime'], errors='raise')
+        except Exception as e:
+            logger.error("Error converting 'datetime' column: %s", e)
+            raise ValueError(f"Error converting 'datetime' column to datetime objects: {e}")
+            
+        # Create a combined key of timestamp and message to uniquely identify events
+        events_df['combined_key'] = events_df['datetime'].astype(str) + events_df['message']
+        
+        # Drop duplicates based on the combined key
+        unique_df = events_df.drop_duplicates(subset='combined_key', keep='first')
+        
+        # Convert datetime to string BEFORE creating the dictionary
+        unique_df['datetime_str'] = unique_df['datetime'].astype(str)
+        
+        # Prepare the unique events for the LLM prompt, include timestamp string
+        events_dict = unique_df[['datetime_str', 'message']].rename(
+            columns={'datetime_str': 'datetime'}).to_dict(orient="records")
+        
+        total_events_count = len(events_df)
+        unique_events_count = len(unique_df)
+        
+        logger.info(
+            "Analyzing events for forensic report: %d events",
+            total_events_count,
+        )
+        logger.info("Reduced to %d unique events", unique_events_count)
+        
+        if not events_dict:
+            return "No events to analyze for forensic report."
+            
+        return self._get_prompt_text(events_dict)
+    
+    def process_response(self, llm_response: Any, **kwargs: Any) -> Dict[str, Any]:
+        """Processes the LLM response and creates a Story in the sketch.
+        
+        Args:
+            llm_response: The response from the LLM model, expected to be a dictionary.
+            **kwargs: Additional arguments including:
+                - sketch_id: ID of the sketch being processed.
+                - sketch: The Sketch object.
+                - form: Form data containing query and filter information.
+                - datastore: OpenSearch instance for querying.
+                - timeline_ids: List of timeline IDs to query.
+                
+        Returns:
+            Dictionary containing the processed response:
+                - summary: The forensic report text
+                - summary_event_count: Total number of events analyzed
+                - summary_unique_event_count: Number of unique events analyzed
+                - story_id: ID of the created story
+                
+        Raises:
+            ValueError: If required parameters are missing or if the LLM response
+                      is not in the expected format.
+        """
+        sketch = kwargs.get("sketch")
+        form = kwargs.get("form")
+        datastore = kwargs.get("datastore")
+        timeline_ids = kwargs.get("timeline_ids")
+        
+        if not sketch:
+            raise ValueError("Missing 'sketch' in kwargs")
+            
+        if not form:
+            raise ValueError("Missing 'form' data in kwargs")
+            
+        if not isinstance(llm_response, dict):
+            raise ValueError("LLM response is expected to be a dictionary")
+            
+        summary_text = llm_response.get("summary")
+        if summary_text is None:
+            raise ValueError("LLM response missing 'summary' key")
+            
+        # Recalculate event counts for metrics in the response
+        query_filter = form.get("filter", {})
+        query_string = form.get("query", "*") or "*"
+        
+        events_df = self._run_timesketch_query(
+            sketch,
+            query_string,
+            query_filter,
+            datastore=datastore,
+            timeline_ids=timeline_ids,
+        )
+        
+        total_events_count = len(events_df)
+        
+        # For unique count, use the same logic as in generate_prompt
+        if 'datetime' in events_df.columns:
+            events_df['datetime'] = pd.to_datetime(events_df['datetime'], errors='coerce')
+            events_df['combined_key'] = events_df['datetime'].astype(str) + events_df['message']
+            unique_events_count = len(events_df.drop_duplicates(subset='combined_key', keep='first'))
+        else:
+            unique_events_count = len(events_df.drop_duplicates(subset='message', keep='first'))
+        
+        # Create a story using the actions module
+        try:            
+            # Create the story with a specific title for forensic reports
+            story_title = f"Forensic Report - {time.strftime('%Y-%m-%d %H:%M')}"
+            story_id = actions.create_story(
+                sketch=sketch,
+                content=summary_text,
+                title=story_title
+            )
+        except Exception as e:
+            logger.error("Error creating story for forensic report: %s", e)
+            raise ValueError(f"Error creating story to save forensic report: {e}") from e
+        
+        return {
+            "summary": summary_text,
+            "summary_event_count": total_events_count,
+            "summary_unique_event_count": unique_events_count,
+            "story_id": story_id
+        }

From 9d8e343b1c41d95416ee535c0850d9e7a42b8c8b Mon Sep 17 00:00:00 2001
From: itsmvd <mvd@google.com>
Date: Fri, 28 Feb 2025 17:00:00 +0000
Subject: [PATCH 38/63] Add nl2q and llm_summarize as LLM features

---
 timesketch/lib/llms/features/llm_summarize.py | 73 ++++++-------------
 .../lib/llms/features/llm_summarize_test.py   | 14 ----
 timesketch/lib/llms/features/nl2q_test.py     |  1 -
 3 files changed, 22 insertions(+), 66 deletions(-)

diff --git a/timesketch/lib/llms/features/llm_summarize.py b/timesketch/lib/llms/features/llm_summarize.py
index e776fcb705..65402d7087 100644
--- a/timesketch/lib/llms/features/llm_summarize.py
+++ b/timesketch/lib/llms/features/llm_summarize.py
@@ -16,79 +16,53 @@
 import logging
 from typing import Any, Optional
 import pandas as pd
-import prometheus_client
 from flask import current_app
 from opensearchpy import OpenSearch
 from timesketch.lib import utils
 from timesketch.api.v1 import export
 from timesketch.models.sketch import Sketch
-from timesketch.lib.definitions import METRICS_NAMESPACE
 from timesketch.lib.llms.features.interface import LLMFeatureInterface
 
 logger = logging.getLogger("timesketch.llm.summarize_feature")
 
-METRICS = {
-    "llm_summary_events_processed_total": prometheus_client.Counter(
-        "llm_summary_events_processed_total",
-        "Total number of events processed for LLM summarization",
-        ["sketch_id"],
-        namespace=METRICS_NAMESPACE,
-    ),
-    "llm_summary_unique_events_total": prometheus_client.Counter(
-        "llm_summary_unique_events_total",
-        "Total number of unique events sent to the LLM",
-        ["sketch_id"],
-        namespace=METRICS_NAMESPACE,
-    ),
-}
-
 
 class LLMSummarizeFeature(LLMFeatureInterface):
     """LLM Summarization feature."""
 
     NAME = "llm_summarize"
-    PROMPT_CONFIG_KEY = "PROMPT_LLM_SUMMARIZATION"
     RESPONSE_SCHEMA = {
         "type": "object",
         "properties": {"summary": {"type": "string"}},
         "required": ["summary"],
     }
 
-    def _get_prompt_text(self, events_dict: list[dict[str, Any]]) -> str:
+    def _get_prompt_text(self, events_dict: list) -> str:
         """Reads the prompt template from file and injects events.
+
         Args:
             events_dict: List of event dictionaries to inject into prompt.
+
         Returns:
             str: Complete prompt text with injected events.
+
         Raises:
-            ValueError: If the prompt path is not configured or placeholder is missing.
+            ValueError: If the prompt path is not configured.
             FileNotFoundError: If the prompt file cannot be found.
             IOError: If there's an error reading the prompt file.
         """
-        prompt_file_path = current_app.config.get(self.PROMPT_CONFIG_KEY)
+        prompt_file_path = current_app.config.get("PROMPT_LLM_SUMMARIZATION")
         if not prompt_file_path:
-            logger.error("%s config not set", {self.PROMPT_CONFIG_KEY})
+            logger.error("PROMPT_LLM_SUMMARIZATION config not set")
             raise ValueError("LLM summarization prompt path not configured.")
-
         try:
             with open(prompt_file_path, "r", encoding="utf-8") as file_handle:
                 prompt_template = file_handle.read()
-        except FileNotFoundError as exc:
+        except FileNotFoundError:
             logger.error("Prompt file not found: %s", prompt_file_path)
-            raise FileNotFoundError(
-                f"LLM Prompt file not found: {prompt_file_path}"
-            ) from exc
+            raise FileNotFoundError(f"LLM Prompt file not found: {prompt_file_path}")
         except IOError as e:
             logger.error("Error reading prompt file: %s", e)
             raise IOError("Error reading LLM prompt file.") from e
-
-        if "<EVENTS_JSON>" not in prompt_template:
-            logger.error("Prompt template is missing the <EVENTS_JSON> placeholder")
-            raise ValueError(
-                "LLM summarization prompt template is missing the "
-                "required <EVENTS_JSON> placeholder."
-            )
-
         prompt_text = prompt_template.replace("<EVENTS_JSON>", json.dumps(events_dict))
         return prompt_text
 
@@ -102,6 +76,7 @@ def _run_timesketch_query(
         timeline_ids: Optional[list] = None,
     ) -> pd.DataFrame:
         """Runs a timesketch query and returns results as a DataFrame.
+
         Args:
             sketch: The Sketch object to query.
             query_string: Search query string.
@@ -109,8 +84,10 @@ def _run_timesketch_query(
             id_list: List of event IDs to retrieve.
             datastore: OpenSearch instance for querying.
             timeline_ids: List of timeline IDs to query.
+
         Returns:
             pd.DataFrame: DataFrame containing query results.
+
         Raises:
             ValueError: If datastore is not provided or no valid indices are found.
         """
@@ -142,14 +119,17 @@ def _run_timesketch_query(
 
     def generate_prompt(self, sketch: Sketch, **kwargs: Any) -> str:
         """Generates the summarization prompt based on events from a query.
+
         Args:
             sketch: The Sketch object containing events to summarize.
             **kwargs: Additional arguments including:
                 - form: Form data containing query and filter information.
                 - datastore: OpenSearch instance for querying.
                 - timeline_ids: List of timeline IDs to query.
+
         Returns:
             str: Generated prompt text with events to summarize.
+
         Raises:
             ValueError: If required parameters are missing or if no events are found.
         """
@@ -169,28 +149,17 @@ def generate_prompt(self, sketch: Sketch, **kwargs: Any) -> str:
         )
         if events_df is None or events_df.empty:
             return "No events to summarize based on the current filter."
-
-        total_events_count = len(events_df)
-        METRICS["llm_summary_events_processed_total"].labels(
-            sketch_id=str(sketch.id)
-        ).inc(total_events_count)
-
         unique_events_df = events_df[["message"]].drop_duplicates(
             subset="message", keep="first"
         )
-        unique_events_count = len(unique_events_df)
-        METRICS["llm_summary_unique_events_total"].labels(sketch_id=str(sketch.id)).inc(
-            unique_events_count
-        )
-
-        events = unique_events_df.to_dict(orient="records")
-        if not events:
+        events_dict = unique_events_df.to_dict(orient="records")
+        if not events_dict:
             return "No events to summarize based on the current filter."
-
-        return self._get_prompt_text(events)
+        return self._get_prompt_text(events_dict)
 
     def process_response(self, llm_response: Any, **kwargs: Any) -> dict[str, Any]:
         """Processes the LLM response and adds additional context information.
+
         Args:
             llm_response: The response from the LLM model, expected to be a dictionary.
             **kwargs: Additional arguments including:
@@ -199,11 +168,13 @@ def process_response(self, llm_response: Any, **kwargs: Any) -> dict[str, Any]:
                 - datastore: OpenSearch instance for querying.
                 - timeline_ids: List of timeline IDs.
                 - form: Form data containing query and filter information.
+
         Returns:
-            Dictionary containing the processed response with additional context:
+            dict[str, Any]: Dictionary containing the processed response with additional context:
                 - response: The summary text.
                 - summary_event_count: Total number of events summarized.
                 - summary_unique_event_count: Number of unique events summarized.
+
         Raises:
             ValueError: If required parameters are missing or if the LLM response
                         is not in the expected format.
diff --git a/timesketch/lib/llms/features/llm_summarize_test.py b/timesketch/lib/llms/features/llm_summarize_test.py
index 073ca07479..4946f118f0 100644
--- a/timesketch/lib/llms/features/llm_summarize_test.py
+++ b/timesketch/lib/llms/features/llm_summarize_test.py
@@ -22,7 +22,6 @@
 from timesketch.lib.llms.features.llm_summarize import LLMSummarizeFeature
 
 
-# pylint: disable=protected-access
 class TestLLMSummarizeFeature(BaseTest):
     """Tests for the LLMSummarizeFeature."""
 
@@ -45,19 +44,6 @@ def test_get_prompt_text(self):
 
         self.assertEqual(prompt, f"Analyze these events: {json.dumps(events_dict)}")
 
-    @mock.patch(
-        "builtins.open",
-        mock.mock_open(read_data="Analyze these events without placeholder"),
-    )
-    def test_get_prompt_text_missing_placeholder(self):
-        """Tests _get_prompt_text method with missing placeholder."""
-        events_dict = [{"message": "Test event"}]
-        with self.assertRaises(ValueError) as context:
-            self.llm_feature._get_prompt_text(events_dict)
-        self.assertIn(
-            "missing the required <EVENTS_JSON> placeholder", str(context.exception)
-        )
-
     def test_get_prompt_text_missing_file(self):
         """Tests _get_prompt_text method with missing file."""
         current_app.config["PROMPT_LLM_SUMMARIZATION"] = "/file_does_not_exist.txt"
diff --git a/timesketch/lib/llms/features/nl2q_test.py b/timesketch/lib/llms/features/nl2q_test.py
index c902cad527..684a5f54fc 100644
--- a/timesketch/lib/llms/features/nl2q_test.py
+++ b/timesketch/lib/llms/features/nl2q_test.py
@@ -20,7 +20,6 @@
 from timesketch.lib.llms.features.nl2q import Nl2qFeature
 
 
-# pylint: disable=protected-access
 class TestNl2qFeature(BaseTest):
     """Tests for the Nl2qFeature."""
 

From 1a213a208f10d370b44a7888a94f84453cd4eec9 Mon Sep 17 00:00:00 2001
From: itsmvd <mvd@google.com>
Date: Fri, 28 Feb 2025 17:08:04 +0000
Subject: [PATCH 39/63] Couple of linter fixes on llm_summarize

---
 timesketch/lib/llms/features/llm_summarize.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/timesketch/lib/llms/features/llm_summarize.py b/timesketch/lib/llms/features/llm_summarize.py
index 65402d7087..786ba0e1b4 100644
--- a/timesketch/lib/llms/features/llm_summarize.py
+++ b/timesketch/lib/llms/features/llm_summarize.py
@@ -57,9 +57,11 @@ def _get_prompt_text(self, events_dict: list) -> str:
         try:
             with open(prompt_file_path, "r", encoding="utf-8") as file_handle:
                 prompt_template = file_handle.read()
-        except FileNotFoundError:
+        except FileNotFoundError as exc:
             logger.error("Prompt file not found: %s", prompt_file_path)
-            raise FileNotFoundError(f"LLM Prompt file not found: {prompt_file_path}")
+            raise FileNotFoundError(
+                f"LLM Prompt file not found: {prompt_file_path}"
+            ) from exc
         except IOError as e:
             logger.error("Error reading prompt file: %s", e)
             raise IOError("Error reading LLM prompt file.") from e
@@ -170,7 +172,7 @@ def process_response(self, llm_response: Any, **kwargs: Any) -> dict[str, Any]:
                 - form: Form data containing query and filter information.
 
         Returns:
-            dict[str, Any]: Dictionary containing the processed response with additional context:
+            Dictionary containing the processed response with additional context:
                 - response: The summary text.
                 - summary_event_count: Total number of events summarized.
                 - summary_unique_event_count: Number of unique events summarized.

From 3f9c7f0101d3ce5202f9accd0ded76686f507ba9 Mon Sep 17 00:00:00 2001
From: itsmvd <mvd@google.com>
Date: Fri, 28 Feb 2025 17:13:17 +0000
Subject: [PATCH 40/63] pylint: disable=protected-access

---
 timesketch/lib/llms/features/llm_summarize_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/timesketch/lib/llms/features/llm_summarize_test.py b/timesketch/lib/llms/features/llm_summarize_test.py
index 4946f118f0..186bad0f1f 100644
--- a/timesketch/lib/llms/features/llm_summarize_test.py
+++ b/timesketch/lib/llms/features/llm_summarize_test.py
@@ -21,7 +21,7 @@
 from timesketch.lib.testlib import MockDataStore
 from timesketch.lib.llms.features.llm_summarize import LLMSummarizeFeature
 
-
+#pylint: disable=protected-access
 class TestLLMSummarizeFeature(BaseTest):
     """Tests for the LLMSummarizeFeature."""
 

From 2baf0af012b2d7b8ebf98790b706f4c8d6d256b2 Mon Sep 17 00:00:00 2001
From: itsmvd <mvd@google.com>
Date: Fri, 28 Feb 2025 17:15:35 +0000
Subject: [PATCH 41/63] black formatting

---
 timesketch/lib/llms/features/llm_summarize_test.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/timesketch/lib/llms/features/llm_summarize_test.py b/timesketch/lib/llms/features/llm_summarize_test.py
index 186bad0f1f..c103321aff 100644
--- a/timesketch/lib/llms/features/llm_summarize_test.py
+++ b/timesketch/lib/llms/features/llm_summarize_test.py
@@ -21,7 +21,8 @@
 from timesketch.lib.testlib import MockDataStore
 from timesketch.lib.llms.features.llm_summarize import LLMSummarizeFeature
 
-#pylint: disable=protected-access
+
+# pylint: disable=protected-access
 class TestLLMSummarizeFeature(BaseTest):
     """Tests for the LLMSummarizeFeature."""
 

From e46be9748d28d870dce81815deb7e088010520e5 Mon Sep 17 00:00:00 2001
From: itsmvd <mvd@google.com>
Date: Fri, 28 Feb 2025 17:20:04 +0000
Subject: [PATCH 42/63] # pylint: disable=protected-access

---
 timesketch/lib/llms/features/nl2q_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/timesketch/lib/llms/features/nl2q_test.py b/timesketch/lib/llms/features/nl2q_test.py
index 684a5f54fc..e3e52d470d 100644
--- a/timesketch/lib/llms/features/nl2q_test.py
+++ b/timesketch/lib/llms/features/nl2q_test.py
@@ -19,7 +19,7 @@
 from timesketch.lib.testlib import BaseTest
 from timesketch.lib.llms.features.nl2q import Nl2qFeature
 
-
+# pylint: disable=protected-access
 class TestNl2qFeature(BaseTest):
     """Tests for the Nl2qFeature."""
 

From be0b2ef0a190593ba9388db8f908c6766f147a39 Mon Sep 17 00:00:00 2001
From: itsmvd <mvd@google.com>
Date: Fri, 28 Feb 2025 17:22:54 +0000
Subject: [PATCH 43/63] formatting on nl2q

---
 timesketch/lib/llms/features/nl2q_test.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/timesketch/lib/llms/features/nl2q_test.py b/timesketch/lib/llms/features/nl2q_test.py
index e3e52d470d..c902cad527 100644
--- a/timesketch/lib/llms/features/nl2q_test.py
+++ b/timesketch/lib/llms/features/nl2q_test.py
@@ -19,6 +19,7 @@
 from timesketch.lib.testlib import BaseTest
 from timesketch.lib.llms.features.nl2q import Nl2qFeature
 
+
 # pylint: disable=protected-access
 class TestNl2qFeature(BaseTest):
     """Tests for the Nl2qFeature."""

From 56bcd6d56047160f65b19885d3856fee7c0b2b1b Mon Sep 17 00:00:00 2001
From: itsmvd <mvd@google.com>
Date: Sat, 1 Mar 2025 09:34:33 +0000
Subject: [PATCH 44/63] add feature specific metrics

---
 timesketch/lib/llms/features/llm_summarize.py | 44 ++++++++++++++-----
 1 file changed, 32 insertions(+), 12 deletions(-)

diff --git a/timesketch/lib/llms/features/llm_summarize.py b/timesketch/lib/llms/features/llm_summarize.py
index 786ba0e1b4..1daca5e84d 100644
--- a/timesketch/lib/llms/features/llm_summarize.py
+++ b/timesketch/lib/llms/features/llm_summarize.py
@@ -16,15 +16,33 @@
 import logging
 from typing import Any, Optional
 import pandas as pd
+import prometheus_client
 from flask import current_app
 from opensearchpy import OpenSearch
 from timesketch.lib import utils
 from timesketch.api.v1 import export
 from timesketch.models.sketch import Sketch
+from timesketch.lib.definitions import METRICS_NAMESPACE
 from timesketch.lib.llms.features.interface import LLMFeatureInterface
 
 logger = logging.getLogger("timesketch.llm.summarize_feature")
 
+# TODO(itsmvd): Remove 'feature' prefix after migration
+METRICS = {
+    "llm_summary_events_processed_total": prometheus_client.Counter(
+        "feature_llm_summary_events_processed_total",  # avoid duplicate registration
+        "Total number of events processed for LLM summarization",
+        ["sketch_id"],
+        namespace=METRICS_NAMESPACE,
+    ),
+    "llm_summary_unique_events_total": prometheus_client.Counter(
+        "feature_llm_summary_unique_events_total",  # avoid duplicate registration
+        "Total number of unique events sent to the LLM",
+        ["sketch_id"],
+        namespace=METRICS_NAMESPACE,
+    ),
+}
+
 
 class LLMSummarizeFeature(LLMFeatureInterface):
     """LLM Summarization feature."""
@@ -38,13 +56,10 @@ class LLMSummarizeFeature(LLMFeatureInterface):
 
     def _get_prompt_text(self, events_dict: list) -> str:
         """Reads the prompt template from file and injects events.
-
         Args:
             events_dict: List of event dictionaries to inject into prompt.
-
         Returns:
             str: Complete prompt text with injected events.
-
         Raises:
             ValueError: If the prompt path is not configured.
             FileNotFoundError: If the prompt file cannot be found.
@@ -78,7 +93,6 @@ def _run_timesketch_query(
         timeline_ids: Optional[list] = None,
     ) -> pd.DataFrame:
         """Runs a timesketch query and returns results as a DataFrame.
-
         Args:
             sketch: The Sketch object to query.
             query_string: Search query string.
@@ -86,10 +100,8 @@ def _run_timesketch_query(
             id_list: List of event IDs to retrieve.
             datastore: OpenSearch instance for querying.
             timeline_ids: List of timeline IDs to query.
-
         Returns:
             pd.DataFrame: DataFrame containing query results.
-
         Raises:
             ValueError: If datastore is not provided or no valid indices are found.
         """
@@ -121,17 +133,14 @@ def _run_timesketch_query(
 
     def generate_prompt(self, sketch: Sketch, **kwargs: Any) -> str:
         """Generates the summarization prompt based on events from a query.
-
         Args:
             sketch: The Sketch object containing events to summarize.
             **kwargs: Additional arguments including:
                 - form: Form data containing query and filter information.
                 - datastore: OpenSearch instance for querying.
                 - timeline_ids: List of timeline IDs to query.
-
         Returns:
             str: Generated prompt text with events to summarize.
-
         Raises:
             ValueError: If required parameters are missing or if no events are found.
         """
@@ -151,17 +160,30 @@ def generate_prompt(self, sketch: Sketch, **kwargs: Any) -> str:
         )
         if events_df is None or events_df.empty:
             return "No events to summarize based on the current filter."
+
+        # Count and record total events
+        total_events_count = len(events_df)
+        METRICS["llm_summary_events_processed_total"].labels(
+            sketch_id=str(sketch.id)
+        ).inc(total_events_count)
+
+        # Get unique events, count and record them
         unique_events_df = events_df[["message"]].drop_duplicates(
             subset="message", keep="first"
         )
+        unique_events_count = len(unique_events_df)
+        METRICS["llm_summary_unique_events_total"].labels(sketch_id=str(sketch.id)).inc(
+            unique_events_count
+        )
+
         events_dict = unique_events_df.to_dict(orient="records")
         if not events_dict:
             return "No events to summarize based on the current filter."
+
         return self._get_prompt_text(events_dict)
 
     def process_response(self, llm_response: Any, **kwargs: Any) -> dict[str, Any]:
         """Processes the LLM response and adds additional context information.
-
         Args:
             llm_response: The response from the LLM model, expected to be a dictionary.
             **kwargs: Additional arguments including:
@@ -170,13 +192,11 @@ def process_response(self, llm_response: Any, **kwargs: Any) -> dict[str, Any]:
                 - datastore: OpenSearch instance for querying.
                 - timeline_ids: List of timeline IDs.
                 - form: Form data containing query and filter information.
-
         Returns:
             Dictionary containing the processed response with additional context:
                 - response: The summary text.
                 - summary_event_count: Total number of events summarized.
                 - summary_unique_event_count: Number of unique events summarized.
-
         Raises:
             ValueError: If required parameters are missing or if the LLM response
                         is not in the expected format.

From 09bca8b9d86cab010b88587688231f0153e0e17c Mon Sep 17 00:00:00 2001
From: itsmvd <mvd@google.com>
Date: Sat, 1 Mar 2025 09:36:56 +0000
Subject: [PATCH 45/63] remove unnecessary comments

---
 timesketch/lib/llms/features/llm_summarize.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/timesketch/lib/llms/features/llm_summarize.py b/timesketch/lib/llms/features/llm_summarize.py
index 1daca5e84d..695a5a3c7b 100644
--- a/timesketch/lib/llms/features/llm_summarize.py
+++ b/timesketch/lib/llms/features/llm_summarize.py
@@ -161,13 +161,11 @@ def generate_prompt(self, sketch: Sketch, **kwargs: Any) -> str:
         if events_df is None or events_df.empty:
             return "No events to summarize based on the current filter."
 
-        # Count and record total events
         total_events_count = len(events_df)
         METRICS["llm_summary_events_processed_total"].labels(
             sketch_id=str(sketch.id)
         ).inc(total_events_count)
 
-        # Get unique events, count and record them
         unique_events_df = events_df[["message"]].drop_duplicates(
             subset="message", keep="first"
         )

From 5478dcd5cf14c35aea41ad6b2643a32218ee3461 Mon Sep 17 00:00:00 2001
From: itsmvd <mvd@google.com>
Date: Tue, 4 Mar 2025 09:23:14 +0000
Subject: [PATCH 46/63] review fixes

---
 timesketch/lib/llms/features/llm_summarize.py | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/timesketch/lib/llms/features/llm_summarize.py b/timesketch/lib/llms/features/llm_summarize.py
index 695a5a3c7b..7c38339214 100644
--- a/timesketch/lib/llms/features/llm_summarize.py
+++ b/timesketch/lib/llms/features/llm_summarize.py
@@ -48,16 +48,17 @@ class LLMSummarizeFeature(LLMFeatureInterface):
     """LLM Summarization feature."""
 
     NAME = "llm_summarize"
+    PROMPT_CONFIG_KEY = "PROMPT_LLM_SUMMARIZATION"
     RESPONSE_SCHEMA = {
         "type": "object",
         "properties": {"summary": {"type": "string"}},
         "required": ["summary"],
     }
 
-    def _get_prompt_text(self, events_dict: list) -> str:
+    def _get_prompt_text(self, events: list[dict[str, Any]]) -> str:
         """Reads the prompt template from file and injects events.
         Args:
-            events_dict: List of event dictionaries to inject into prompt.
+            events: List of event dictionaries to inject into prompt.
         Returns:
             str: Complete prompt text with injected events.
         Raises:
@@ -65,10 +66,11 @@ def _get_prompt_text(self, events_dict: list) -> str:
             FileNotFoundError: If the prompt file cannot be found.
             IOError: If there's an error reading the prompt file.
         """
-        prompt_file_path = current_app.config.get("PROMPT_LLM_SUMMARIZATION")
+        prompt_file_path = current_app.config.get(self.PROMPT_CONFIG_KEY)
         if not prompt_file_path:
-            logger.error("PROMPT_LLM_SUMMARIZATION config not set")
+            logger.error("%s config not set", {self.PROMPT_CONFIG_KEY})
             raise ValueError("LLM summarization prompt path not configured.")
+
         try:
             with open(prompt_file_path, "r", encoding="utf-8") as file_handle:
                 prompt_template = file_handle.read()
@@ -80,7 +82,8 @@ def _get_prompt_text(self, events_dict: list) -> str:
         except IOError as e:
             logger.error("Error reading prompt file: %s", e)
             raise IOError("Error reading LLM prompt file.") from e
-        prompt_text = prompt_template.replace("<EVENTS_JSON>", json.dumps(events_dict))
+
+        prompt_text = prompt_template.replace("<EVENTS_JSON>", json.dumps(events))
         return prompt_text
 
     def _run_timesketch_query(
@@ -174,11 +177,11 @@ def generate_prompt(self, sketch: Sketch, **kwargs: Any) -> str:
             unique_events_count
         )
 
-        events_dict = unique_events_df.to_dict(orient="records")
-        if not events_dict:
+        events = unique_events_df.to_dict(orient="records")
+        if not events:
             return "No events to summarize based on the current filter."
 
-        return self._get_prompt_text(events_dict)
+        return self._get_prompt_text(events)
 
     def process_response(self, llm_response: Any, **kwargs: Any) -> dict[str, Any]:
         """Processes the LLM response and adds additional context information.

From 4f9a0b7d36916c2384ced8e1e2540972d9c69675 Mon Sep 17 00:00:00 2001
From: itsmvd <mvd@google.com>
Date: Tue, 4 Mar 2025 10:04:08 +0000
Subject: [PATCH 47/63] Handle incorrect prompt file + test

---
 timesketch/lib/llms/features/llm_summarize.py     | 15 +++++++++++----
 .../lib/llms/features/llm_summarize_test.py       | 13 +++++++++++++
 2 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/timesketch/lib/llms/features/llm_summarize.py b/timesketch/lib/llms/features/llm_summarize.py
index 7c38339214..9a6cf5f457 100644
--- a/timesketch/lib/llms/features/llm_summarize.py
+++ b/timesketch/lib/llms/features/llm_summarize.py
@@ -55,14 +55,14 @@ class LLMSummarizeFeature(LLMFeatureInterface):
         "required": ["summary"],
     }
 
-    def _get_prompt_text(self, events: list[dict[str, Any]]) -> str:
+    def _get_prompt_text(self, events_dict: list[dict[str, Any]]) -> str:
         """Reads the prompt template from file and injects events.
         Args:
-            events: List of event dictionaries to inject into prompt.
+            events_dict: List of event dictionaries to inject into prompt.
         Returns:
             str: Complete prompt text with injected events.
         Raises:
-            ValueError: If the prompt path is not configured.
+            ValueError: If the prompt path is not configured or placeholder is missing.
             FileNotFoundError: If the prompt file cannot be found.
             IOError: If there's an error reading the prompt file.
         """
@@ -83,7 +83,14 @@ def _get_prompt_text(self, events: list[dict[str, Any]]) -> str:
             logger.error("Error reading prompt file: %s", e)
             raise IOError("Error reading LLM prompt file.") from e
 
-        prompt_text = prompt_template.replace("<EVENTS_JSON>", json.dumps(events))
+        if "<EVENTS_JSON>" not in prompt_template:
+            logger.error("Prompt template is missing the <EVENTS_JSON> placeholder")
+            raise ValueError(
+                "LLM summarization prompt template is missing the "
+                "required <EVENTS_JSON> placeholder."
+            )
+
+        prompt_text = prompt_template.replace("<EVENTS_JSON>", json.dumps(events_dict))
         return prompt_text
 
     def _run_timesketch_query(
diff --git a/timesketch/lib/llms/features/llm_summarize_test.py b/timesketch/lib/llms/features/llm_summarize_test.py
index c103321aff..073ca07479 100644
--- a/timesketch/lib/llms/features/llm_summarize_test.py
+++ b/timesketch/lib/llms/features/llm_summarize_test.py
@@ -45,6 +45,19 @@ def test_get_prompt_text(self):
 
         self.assertEqual(prompt, f"Analyze these events: {json.dumps(events_dict)}")
 
+    @mock.patch(
+        "builtins.open",
+        mock.mock_open(read_data="Analyze these events without placeholder"),
+    )
+    def test_get_prompt_text_missing_placeholder(self):
+        """Tests _get_prompt_text method with missing placeholder."""
+        events_dict = [{"message": "Test event"}]
+        with self.assertRaises(ValueError) as context:
+            self.llm_feature._get_prompt_text(events_dict)
+        self.assertIn(
+            "missing the required <EVENTS_JSON> placeholder", str(context.exception)
+        )
+
     def test_get_prompt_text_missing_file(self):
         """Tests _get_prompt_text method with missing file."""
         current_app.config["PROMPT_LLM_SUMMARIZATION"] = "/file_does_not_exist.txt"

From a82d776e49a7a128bdc5145123bcb0512d21f21d Mon Sep 17 00:00:00 2001
From: itsmvd <mvd@google.com>
Date: Tue, 4 Mar 2025 10:23:47 +0000
Subject: [PATCH 48/63] frontend: LLM features switch to new llm endpoint

---
 .../src/components/Scenarios/QuestionCard.vue | 49 ++++++++-----------
 1 file changed, 20 insertions(+), 29 deletions(-)

diff --git a/timesketch/frontend-ng/src/components/Scenarios/QuestionCard.vue b/timesketch/frontend-ng/src/components/Scenarios/QuestionCard.vue
index de5eccf9f1..40068253e7 100644
--- a/timesketch/frontend-ng/src/components/Scenarios/QuestionCard.vue
+++ b/timesketch/frontend-ng/src/components/Scenarios/QuestionCard.vue
@@ -404,17 +404,17 @@ export default {
   },
   methods: {
     getSuggestedQuery() {
-      this.suggestedQueryLoading = true
-      let formData = { question: this.activeQuestion.display_name }
-      ApiClient.llmRequest(this.sketch.id, 'nl2q', formData)
-        .then((response) => {
-          this.suggestedQuery = response.data
-          this.suggestedQueryLoading = false
-        })
-        .catch((e) => {
-          console.error(e)
-        })
-    },
+        this.suggestedQueryLoading = true
+        let formData = { question: this.activeQuestion.display_name }
+        ApiClient.llmRequest(this.sketch.id, 'nl2q', formData)
+          .then((response) => {
+            this.suggestedQuery = response.data
+            this.suggestedQueryLoading = false
+          })
+          .catch((e) => {
+            console.error(e)
+          })
+      },
     getQuestionTemplates() {
       this.isLoading = true
       ApiClient.getQuestionTemplates()
@@ -508,24 +508,15 @@ export default {
       this.suggestedQuery = {}
 
       // Set active tab
-     if (this.userSettings.generateQuery && this.systemSettings.LLM_PROVIDER) {
-       if (this.activeQuestion.conclusions.length) {
-         this.activeTab = 2
-       } else {
-         this.activeTab = 0
-       }
-     } else {
-       if (this.activeQuestion.conclusions.length) {
-         this.activeTab = 2
-       } else if (this.allSuggestedQueries.length) {
-         this.activeTab = 0
-       } else if (this.activeQuestion.approaches.length) {
-         this.activeTab = 1
-       } else {
-         this.activeTab = 2
-       }
-     }
-
+      if (this.activeQuestion.conclusions.length) {
+        this.activeTab = 2
+      } else if (this.allSuggestedQueries.length) {
+        this.activeTab = 0
+      } else if (question.approaches.length) {
+        this.activeTab = 1
+      } else {
+        this.activeTab = 2
+      }
 
       let payload = {
         scenarioId: null,

From 75f55f15d22275b0b1df19a4f725837a855d81aa Mon Sep 17 00:00:00 2001
From: itsmvd <mvd@google.com>
Date: Tue, 4 Mar 2025 10:27:17 +0000
Subject: [PATCH 49/63] layout fix

---
 .../src/components/Scenarios/QuestionCard.vue | 22 +++++++++----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/timesketch/frontend-ng/src/components/Scenarios/QuestionCard.vue b/timesketch/frontend-ng/src/components/Scenarios/QuestionCard.vue
index 40068253e7..601a045611 100644
--- a/timesketch/frontend-ng/src/components/Scenarios/QuestionCard.vue
+++ b/timesketch/frontend-ng/src/components/Scenarios/QuestionCard.vue
@@ -404,17 +404,17 @@ export default {
   },
   methods: {
     getSuggestedQuery() {
-        this.suggestedQueryLoading = true
-        let formData = { question: this.activeQuestion.display_name }
-        ApiClient.llmRequest(this.sketch.id, 'nl2q', formData)
-          .then((response) => {
-            this.suggestedQuery = response.data
-            this.suggestedQueryLoading = false
-          })
-          .catch((e) => {
-            console.error(e)
-          })
-      },
+      this.suggestedQueryLoading = true
+      let formData = { question: this.activeQuestion.display_name }
+      ApiClient.llmRequest(this.sketch.id, 'nl2q', formData)
+        .then((response) => {
+          this.suggestedQuery = response.data
+          this.suggestedQueryLoading = false
+        })
+        .catch((e) => {
+          console.error(e)
+        })
+    },
     getQuestionTemplates() {
       this.isLoading = true
       ApiClient.getQuestionTemplates()

From 317eec821dc131aed5c1a1de533499a3c48dcfce Mon Sep 17 00:00:00 2001
From: itsmvd <mvd@google.com>
Date: Tue, 4 Mar 2025 10:41:16 +0000
Subject: [PATCH 50/63] Remove nl2q & llm_summarize features from the API

---
 timesketch/lib/llms/features/llm_summarize.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/timesketch/lib/llms/features/llm_summarize.py b/timesketch/lib/llms/features/llm_summarize.py
index 9a6cf5f457..e776fcb705 100644
--- a/timesketch/lib/llms/features/llm_summarize.py
+++ b/timesketch/lib/llms/features/llm_summarize.py
@@ -27,16 +27,15 @@
 
 logger = logging.getLogger("timesketch.llm.summarize_feature")
 
-# TODO(itsmvd): Remove 'feature' prefix after migration
 METRICS = {
     "llm_summary_events_processed_total": prometheus_client.Counter(
-        "feature_llm_summary_events_processed_total",  # avoid duplicate registration
+        "llm_summary_events_processed_total",
         "Total number of events processed for LLM summarization",
         ["sketch_id"],
         namespace=METRICS_NAMESPACE,
     ),
     "llm_summary_unique_events_total": prometheus_client.Counter(
-        "feature_llm_summary_unique_events_total",  # avoid duplicate registration
+        "llm_summary_unique_events_total",
         "Total number of unique events sent to the LLM",
         ["sketch_id"],
         namespace=METRICS_NAMESPACE,

From bc20bf00f78abfe1ac52e008ada95792e06a3d16 Mon Sep 17 00:00:00 2001
From: itsmvd <mvd@google.com>
Date: Tue, 11 Mar 2025 11:12:11 +0000
Subject: [PATCH 51/63] Make timeout configurable for snackBar methods

---
 timesketch/frontend-ng/src/mixins/snackBar.js | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/timesketch/frontend-ng/src/mixins/snackBar.js b/timesketch/frontend-ng/src/mixins/snackBar.js
index 97ac9bc918..efc021709c 100644
--- a/timesketch/frontend-ng/src/mixins/snackBar.js
+++ b/timesketch/frontend-ng/src/mixins/snackBar.js
@@ -23,32 +23,35 @@ const defaultSnackBar = {
     "timeout": defaultTimeout
 }
 
-// These methids will be available to all components without any further imports.
+// These methods will be available to all components without any further imports.
 Vue.mixin({
     methods: {
-        successSnackBar(message) {
+        successSnackBar(message, timeout) {
             let snackbar = defaultSnackBar
             snackbar.message = message
             snackbar.color = "success"
+            snackbar.timeout = timeout || defaultTimeout
             this.$store.dispatch('setSnackBar', snackbar)
         },
-        errorSnackBar(message) {
+        errorSnackBar(message, timeout) {
             let snackbar = defaultSnackBar
             snackbar.message = message
             snackbar.color = "error"
+            snackbar.timeout = timeout || defaultTimeout
             this.$store.dispatch('setSnackBar', snackbar)
         },
-        warningSnackBar(message) {
+        warningSnackBar(message, timeout) {
           let snackbar = defaultSnackBar
           snackbar.message = message
           snackbar.color = "warning"
+          snackbar.timeout = timeout || defaultTimeout
           this.$store.dispatch('setSnackBar', snackbar)
         },
-        infoSnackBar(message) {
+        infoSnackBar(message, timeout) {
           let snackbar = defaultSnackBar
           snackbar.message = message
           snackbar.color = "info"
-          snackbar.timeout = 2000
+          snackbar.timeout = timeout || defaultTimeout
           this.$store.dispatch('setSnackBar', snackbar)
       },
     }

From 27592c9e957bd4bd804c186f5da9879462417d42 Mon Sep 17 00:00:00 2001
From: itsmvd <mvd@google.com>
Date: Tue, 11 Mar 2025 12:21:18 +0000
Subject: [PATCH 52/63] Re-applying changes, excluding snackbar timeout

---
 timesketch/frontend-ng/src/mixins/snackBar.js | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/timesketch/frontend-ng/src/mixins/snackBar.js b/timesketch/frontend-ng/src/mixins/snackBar.js
index 97ac9bc918..efc021709c 100644
--- a/timesketch/frontend-ng/src/mixins/snackBar.js
+++ b/timesketch/frontend-ng/src/mixins/snackBar.js
@@ -23,32 +23,35 @@ const defaultSnackBar = {
     "timeout": defaultTimeout
 }
 
-// These methids will be available to all components without any further imports.
+// These methods will be available to all components without any further imports.
 Vue.mixin({
     methods: {
-        successSnackBar(message) {
+        successSnackBar(message, timeout) {
             let snackbar = defaultSnackBar
             snackbar.message = message
             snackbar.color = "success"
+            snackbar.timeout = timeout || defaultTimeout
             this.$store.dispatch('setSnackBar', snackbar)
         },
-        errorSnackBar(message) {
+        errorSnackBar(message, timeout) {
             let snackbar = defaultSnackBar
             snackbar.message = message
             snackbar.color = "error"
+            snackbar.timeout = timeout || defaultTimeout
             this.$store.dispatch('setSnackBar', snackbar)
         },
-        warningSnackBar(message) {
+        warningSnackBar(message, timeout) {
           let snackbar = defaultSnackBar
           snackbar.message = message
           snackbar.color = "warning"
+          snackbar.timeout = timeout || defaultTimeout
           this.$store.dispatch('setSnackBar', snackbar)
         },
-        infoSnackBar(message) {
+        infoSnackBar(message, timeout) {
           let snackbar = defaultSnackBar
           snackbar.message = message
           snackbar.color = "info"
-          snackbar.timeout = 2000
+          snackbar.timeout = timeout || defaultTimeout
           this.$store.dispatch('setSnackBar', snackbar)
       },
     }

From 024e86260e29aa84ba504a10784aa43fe7ca0928 Mon Sep 17 00:00:00 2001
From: itsmvd <mvd@google.com>
Date: Tue, 11 Mar 2025 12:56:24 +0000
Subject: [PATCH 53/63] UI changes for llm_forensic_report LLM feature

---
 timesketch/frontend-ng/src/assets/main.scss   |  12 ++
 .../src/components/Explore/EventList.vue      | 118 ++++++++++++------
 2 files changed, 90 insertions(+), 40 deletions(-)

diff --git a/timesketch/frontend-ng/src/assets/main.scss b/timesketch/frontend-ng/src/assets/main.scss
index 2141a43805..4df44675e4 100644
--- a/timesketch/frontend-ng/src/assets/main.scss
+++ b/timesketch/frontend-ng/src/assets/main.scss
@@ -208,3 +208,15 @@ html {
   -o-transition: none !important;
   transition: none !important;
 }
+
+$llm-gradient: linear-gradient(90deg,
+  #8ab4f8 0%,   
+  #81c995 20%, 
+  #f8c665 40%, 
+  #ec7764 60%,  
+  #b39ddb 80%,  
+  #8ab4f8 100%);
+
+:root {
+  --llm-gradient: #{$llm-gradient};
+}
diff --git a/timesketch/frontend-ng/src/components/Explore/EventList.vue b/timesketch/frontend-ng/src/components/Explore/EventList.vue
index f7d7de62fe..571ba9c3e1 100644
--- a/timesketch/frontend-ng/src/components/Explore/EventList.vue
+++ b/timesketch/frontend-ng/src/components/Explore/EventList.vue
@@ -246,6 +246,17 @@ limitations under the License.
                   <v-icon title="Download current view as CSV">mdi-download</v-icon>
                 </v-btn>
 
+                <v-btn 
+                  icon 
+                  @click="generateForensicReport()" 
+                  class="ml-2" 
+                  :loading="isGeneratingReport"
+                  v-if="isStarredEventsFilterActive">
+                    <div class="ts-llm-icon-wrapper" v-if="!isGeneratingReport">
+                      <v-icon title="Generate forensic report with LLM from starred events">mdi-file-document-check</v-icon>
+                    </div>
+                </v-btn>
+
                 <v-menu v-if="!disableSettings" offset-y :close-on-content-click="false">
                   <template v-slot:activator="{ on, attrs }">
                     <v-btn icon v-bind="attrs" v-on="on">
@@ -583,6 +594,7 @@ export default {
         itemsPerPage: this.itemsPerPage,
       },
       isSummaryLoading: false,
+      isGeneratingReport: false,
       currentItemsPerPage: this.itemsPerPage,
       expandedRows: [],
       selectedFields: [{ field: 'message', type: 'text' }],
@@ -621,6 +633,11 @@ export default {
     }
   },
   computed: {
+    isStarredEventsFilterActive() {
+      return this.filterChips.some(chip => 
+        chip.type === 'label' && chip.value === '__ts_star'
+    )
+    },
     summaryInfoMessage() {
       const totalEvents = this.eventList.meta.summary_event_count
       const uniqueEvents = this.eventList.meta.summary_unique_event_count
@@ -948,7 +965,6 @@ export default {
           } else {
             this.errorSnackBar(msg)
           }
-          console.error('Error message: ' + msg)
           console.error(e)
         })
     },
@@ -970,6 +986,35 @@ export default {
           this.isSummaryLoading = false
         })
     },
+    generateForensicReport() {
+      if (this.totalHits > 500) {
+        this.warningSnackBar('This feature is currently limited to a 1000 starred events, try setting a timerange filter. ' +
+        'This limit will be increased soon.', 10000);
+        return;
+      }
+
+      this.isGeneratingReport = true;
+      const requestData = {
+        filter: this.currentQueryFilter
+      };
+      
+      ApiClient.llmRequest(this.sketch.id, 'llm_forensic_report', requestData)
+        .then((response) => {
+          this.isGeneratingReport = false;
+          if (response.data && response.data.story_id) {
+            this.$store.dispatch('updateSketch', this.sketch.id);
+            this.successSnackBar('Forensic report generated! You can find it in the "Stories" section.');
+          } else {
+            this.errorSnackBar('Error generating report. No story was created.');
+          }
+        })
+        .catch((error) => {
+          this.isGeneratingReport = false;
+          const errorMessage = (error.response && error.response.data && error.response.data.message) || 'Unknown error occurred';
+          this.errorSnackBar(`Error generating report: ${errorMessage}`);
+          console.error('Error generating starred events report:', error);
+        });
+    },
     exportSearchResult: function () {
       this.exportDialog = true
       const now = new Date()
@@ -1247,20 +1292,19 @@ th:first-child {
   padding: 0 0 0 10px !important;
 }
 
+.ts-event-list-container {
+  display: flex;
+  flex-direction: column;
+  width: 100%; 
+  gap: 20px;  
+}
 .ts-ai-summary-card {
   border: 1px solid transparent !important; 
   border-radius: 8px;
   background-color: #fafafa; 
   background-image:
       linear-gradient(white, white), 
-      linear-gradient(90deg,
-          #8ab4f8 0%,   
-          #81c995 20%, 
-          #f8c665 40%, 
-          #ec7764 60%,  
-          #b39ddb 80%,  
-          #8ab4f8 100%  
-      );
+      var(--llm-gradient);
   background-origin: border-box;
   background-clip: content-box, border-box;
   background-size: 300% 100%;
@@ -1269,11 +1313,9 @@ th:first-child {
   display: block;
   margin-bottom: 20px;
 }
-
 .v-data-table {
   display: block; /* Ensure block display for data table */
 }
-
 @keyframes borderBeamIridescent-subtle { 
     0% {
         background-position: 0% 50%;
@@ -1282,25 +1324,16 @@ th:first-child {
         background-position: 100% 50%;
     }
 }
-
 .theme--dark.ts-ai-summary-card {
   background-color: #1e1e1e; 
   border-color: hsla(0,0%,100%,.12) !important; 
   background-image:
       linear-gradient(#1e1e1e, #1e1e1e), 
-      linear-gradient(90deg,
-          #8ab4f8 0%,  
-          #81c995 20%,  
-          #f8c665 40%, 
-          #ec7764 60%, 
-          #b39ddb 80%,
-          #8ab4f8 100%  
-      );
-      box-shadow: 0 2px 5px rgba(255, 255, 255, 0.08);
+      var(--llm-gradient);;
+  box-shadow: 0 2px 5px rgba(255, 255, 255, 0.08);
   display: block;
   margin-bottom: 20px;
 }
-
 .ts-ai-summary-text {
   white-space: pre-line;
   word-wrap: break-word;
@@ -1309,15 +1342,12 @@ th:first-child {
   padding-left: 10px;
   padding-right: 10px;
 }
-
 .ts-ai-summary-card .v-btn--icon {
   cursor: pointer;
 }
-
 .ts-ai-summary-card .v-btn--icon:hover {
   opacity: 0.8;
 }
-
 .ts-summary-placeholder-line {
   height: 1em;
   background-color: #e0e0e0;
@@ -1325,21 +1355,17 @@ th:first-child {
   border-radius: 4px;
   width: 100%;
 }
-
 .ts-summary-placeholder-line.short {
   width: 60%;
 }
-
 .ts-summary-placeholder-line.long {
   width: 80%;
 }
-
 .shimmer {
   background: linear-gradient(to right, #e0e0e0 8%, #f0f0f0 18%, #e0e0e0 33%);
   background-size: 800px 100%;
   animation: shimmer-animation 1.5s infinite linear forwards;
 }
-
 @keyframes shimmer-animation {
   0% {
     background-position: -468px 0;
@@ -1348,32 +1374,44 @@ th:first-child {
     background-position: 468px 0;
   }
 }
-
-.ts-event-list-container {
-  display: flex;
-  flex-direction: column;
-  width: 100%; 
-  gap: 20px;  
-}
-
 ::v-deep .no-transition {
   transition: none !important;
 }
-
 .ts-ai-summary-card-title {
   display: flex;
   align-items: baseline;
 }
-
 .ts-ai-summary-title {
   margin-right: 8px;
   font-weight: normal;
 }
-
 .ts-ai-summary-subtitle {
   font-size: 0.7em;
   color: grey;
   vertical-align: middle;
   display: inline-block;
 }
+.ts-llm-icon-wrapper {
+  position: relative;
+  display: inline-flex;
+  align-items: center;
+  justify-content: center;
+}
+.ts-llm-icon-wrapper::after {
+  content: "";
+  position: absolute;
+  top: -4px;
+  left: -4px;
+  right: -4px;
+  bottom: -4px;
+  border-radius: 50%;
+  background: var(--llm-gradient);
+  background-size: 300% 100%;
+  opacity: 0.2;
+  animation: borderBeamIridescent-subtle 6s linear infinite;
+  z-index: -1;
+}
+.v-btn:hover .ts-llm-icon-wrapper::after {
+  opacity: 0.4;
+}
 </style>

From ed1d5f5e223552880c0d8c60a6cd1e51e643086e Mon Sep 17 00:00:00 2001
From: itsmvd <mvd@google.com>
Date: Tue, 11 Mar 2025 12:59:58 +0000
Subject: [PATCH 54/63] update llm_forensic_reprot

---
 .../lib/llms/features/llm_forensic_report.py  | 128 ++++++++----------
 1 file changed, 59 insertions(+), 69 deletions(-)

diff --git a/timesketch/lib/llms/features/llm_forensic_report.py b/timesketch/lib/llms/features/llm_forensic_report.py
index 4ff2362076..a011366f42 100644
--- a/timesketch/lib/llms/features/llm_forensic_report.py
+++ b/timesketch/lib/llms/features/llm_forensic_report.py
@@ -16,43 +16,63 @@
 import logging
 import time
 from typing import Any, Dict, List, Optional
+
 import pandas as pd
+import prometheus_client
 from flask import current_app
 from opensearchpy import OpenSearch
+
 from timesketch.lib import utils
 from timesketch.api.v1 import export
-from timesketch.models import db_session
 from timesketch.models.sketch import Sketch
 from timesketch.lib.llms import actions
+from timesketch.lib.definitions import METRICS_NAMESPACE
 from timesketch.lib.llms.features.interface import LLMFeatureInterface
 
 logger = logging.getLogger("timesketch.llm.forensic_report_feature")
 
+METRICS = {
+    "llm_forensic_report_events_processed_total": prometheus_client.Counter(
+        "llm_forensic_report_events_processed_total",
+        "Total number of events processed for LLM forensic reports",
+        ["sketch_id"],
+        namespace=METRICS_NAMESPACE,
+    ),
+    "llm_forensic_report_unique_events_total": prometheus_client.Counter(
+        "llm_forensic_report_unique_events_total",
+        "Total number of unique events sent to the LLM for forensic report generation",
+        ["sketch_id"],
+        namespace=METRICS_NAMESPACE,
+    ),
+    "llm_forensic_report_stories_created_total": prometheus_client.Counter(
+        "llm_forensic_report_stories_created_total",
+        "Total number of forensic report stories created",
+        ["sketch_id"],
+        namespace=METRICS_NAMESPACE,
+    ),
+}
+
 class LLMForensicReportFeature(LLMFeatureInterface):
     """LLM Forensic Report feature."""
     NAME = "llm_forensic_report"
     PROMPT_CONFIG_KEY = "PROMPT_LLM_FORENSIC_REPORT"
-    
     RESPONSE_SCHEMA = {
         "type": "object",
         "properties": {
             "summary": {
                 "type": "string",
-                "description": "Detailed forensic report summary of the events"
+                "description": "Detailed forensic report summary of the events",
             }
         },
-        "required": ["summary"]
+        "required": ["summary"],
     }
-    
+
     def _get_prompt_text(self, events_dict: List[Dict[str, Any]]) -> str:
         """Reads the prompt template from file and injects events.
-        
         Args:
             events_dict: List of event dictionaries to inject into prompt.
-            
         Returns:
-            str: Complete prompt text with injected events.
-            
+            Complete prompt text with injected events.
         Raises:
             ValueError: If the prompt path is not configured or placeholder is missing.
             FileNotFoundError: If the prompt file cannot be found.
@@ -62,7 +82,7 @@ def _get_prompt_text(self, events_dict: List[Dict[str, Any]]) -> str:
         if not prompt_file_path:
             logger.error("%s config not set", self.PROMPT_CONFIG_KEY)
             raise ValueError("LLM forensic report prompt path not configured.")
-            
+        
         try:
             with open(prompt_file_path, "r", encoding="utf-8") as file_handle:
                 prompt_template = file_handle.read()
@@ -84,7 +104,7 @@ def _get_prompt_text(self, events_dict: List[Dict[str, Any]]) -> str:
             
         prompt_text = prompt_template.replace("<EVENTS_JSON>", json.dumps(events_dict))
         return prompt_text
-    
+
     def _run_timesketch_query(
         self,
         sketch: Sketch,
@@ -95,7 +115,6 @@ def _run_timesketch_query(
         timeline_ids: Optional[List] = None,
     ) -> pd.DataFrame:
         """Runs a timesketch query and returns results as a DataFrame.
-        
         Args:
             sketch: The Sketch object to query.
             query_string: Search query string.
@@ -103,10 +122,8 @@ def _run_timesketch_query(
             id_list: List of event IDs to retrieve.
             datastore: OpenSearch instance for querying.
             timeline_ids: List of timeline IDs to query.
-            
         Returns:
             pd.DataFrame: DataFrame containing query results.
-            
         Raises:
             ValueError: If datastore is not provided or no valid indices are found.
         """
@@ -142,22 +159,18 @@ def _run_timesketch_query(
             timeline_ids=timeline_ids,
         )
         
-        logger.info("Number of hits from datastore search: %d", len(result))
         return export.query_results_to_dataframe(result, sketch)
-    
+
     def generate_prompt(self, sketch: Sketch, **kwargs: Any) -> str:
         """Generates the forensic report prompt based on events from a query.
-        
         Args:
             sketch: The Sketch object containing events to analyze.
             **kwargs: Additional arguments including:
                 - form: Form data containing query and filter information.
                 - datastore: OpenSearch instance for querying.
                 - timeline_ids: List of timeline IDs to query.
-                
         Returns:
             str: Generated prompt text with events to analyze.
-            
         Raises:
             ValueError: If required parameters are missing or if no events are found.
         """
@@ -182,48 +195,32 @@ def generate_prompt(self, sketch: Sketch, **kwargs: Any) -> str:
         if events_df is None or events_df.empty:
             return "No events to analyze for forensic report."
         
-        # Ensure 'datetime' column exists and convert to datetime objects
-        if 'datetime' not in events_df.columns:
-            logger.error("The 'datetime' column is missing in the events DataFrame.")
-            raise ValueError("The 'datetime' column is missing in the events DataFrame.")
-            
-        # Convert 'datetime' column to datetime objects, handling potential errors
-        try:
-            events_df['datetime'] = pd.to_datetime(events_df['datetime'], errors='raise')
-        except Exception as e:
-            logger.error("Error converting 'datetime' column: %s", e)
-            raise ValueError(f"Error converting 'datetime' column to datetime objects: {e}")
-            
-        # Create a combined key of timestamp and message to uniquely identify events
-        events_df['combined_key'] = events_df['datetime'].astype(str) + events_df['message']
+        events_df["datetime_str"] = events_df["datetime"].astype(str)
+        events_df["combined_key"] = events_df["datetime_str"] + events_df["message"]
+        unique_df = events_df.drop_duplicates(subset="combined_key", keep="first")
         
-        # Drop duplicates based on the combined key
-        unique_df = events_df.drop_duplicates(subset='combined_key', keep='first')
-        
-        # Convert datetime to string BEFORE creating the dictionary
-        unique_df['datetime_str'] = unique_df['datetime'].astype(str)
-        
-        # Prepare the unique events for the LLM prompt, include timestamp string
-        events_dict = unique_df[['datetime_str', 'message']].rename(
-            columns={'datetime_str': 'datetime'}).to_dict(orient="records")
+        events_dict = unique_df[["datetime_str", "message"]].rename(
+            columns={"datetime_str": "datetime"}
+        ).to_dict(orient="records")
         
         total_events_count = len(events_df)
         unique_events_count = len(unique_df)
         
-        logger.info(
-            "Analyzing events for forensic report: %d events",
-            total_events_count,
-        )
-        logger.info("Reduced to %d unique events", unique_events_count)
+        METRICS["llm_forensic_report_events_processed_total"].labels(
+            sketch_id=str(sketch.id)
+        ).inc(total_events_count)
+        
+        METRICS["llm_forensic_report_unique_events_total"].labels(
+            sketch_id=str(sketch.id)
+        ).inc(unique_events_count)
         
         if not events_dict:
             return "No events to analyze for forensic report."
             
         return self._get_prompt_text(events_dict)
-    
+
     def process_response(self, llm_response: Any, **kwargs: Any) -> Dict[str, Any]:
         """Processes the LLM response and creates a Story in the sketch.
-        
         Args:
             llm_response: The response from the LLM model, expected to be a dictionary.
             **kwargs: Additional arguments including:
@@ -232,14 +229,12 @@ def process_response(self, llm_response: Any, **kwargs: Any) -> Dict[str, Any]:
                 - form: Form data containing query and filter information.
                 - datastore: OpenSearch instance for querying.
                 - timeline_ids: List of timeline IDs to query.
-                
         Returns:
             Dictionary containing the processed response:
                 - summary: The forensic report text
                 - summary_event_count: Total number of events analyzed
                 - summary_unique_event_count: Number of unique events analyzed
                 - story_id: ID of the created story
-                
         Raises:
             ValueError: If required parameters are missing or if the LLM response
                       is not in the expected format.
@@ -261,8 +256,7 @@ def process_response(self, llm_response: Any, **kwargs: Any) -> Dict[str, Any]:
         summary_text = llm_response.get("summary")
         if summary_text is None:
             raise ValueError("LLM response missing 'summary' key")
-            
-        # Recalculate event counts for metrics in the response
+        
         query_filter = form.get("filter", {})
         query_string = form.get("query", "*") or "*"
         
@@ -276,30 +270,26 @@ def process_response(self, llm_response: Any, **kwargs: Any) -> Dict[str, Any]:
         
         total_events_count = len(events_df)
         
-        # For unique count, use the same logic as in generate_prompt
-        if 'datetime' in events_df.columns:
-            events_df['datetime'] = pd.to_datetime(events_df['datetime'], errors='coerce')
-            events_df['combined_key'] = events_df['datetime'].astype(str) + events_df['message']
-            unique_events_count = len(events_df.drop_duplicates(subset='combined_key', keep='first'))
-        else:
-            unique_events_count = len(events_df.drop_duplicates(subset='message', keep='first'))
+        events_df["combined_key"] = events_df["datetime"].astype(str) + events_df["message"]
+        unique_events_count = len(events_df.drop_duplicates(subset="combined_key", keep="first"))
         
-        # Create a story using the actions module
-        try:            
-            # Create the story with a specific title for forensic reports
+        try:
             story_title = f"Forensic Report - {time.strftime('%Y-%m-%d %H:%M')}"
             story_id = actions.create_story(
-                sketch=sketch,
-                content=summary_text,
-                title=story_title
+                sketch=sketch, content=summary_text, title=story_title
             )
+            METRICS["llm_forensic_report_stories_created_total"].labels(
+                sketch_id=str(sketch.id)
+            ).inc()
         except Exception as e:
             logger.error("Error creating story for forensic report: %s", e)
-            raise ValueError(f"Error creating story to save forensic report: {e}") from e
-        
+            raise ValueError(
+                f"Error creating story to save forensic report: {e}"
+            ) from e
+            
         return {
             "summary": summary_text,
             "summary_event_count": total_events_count,
             "summary_unique_event_count": unique_events_count,
-            "story_id": story_id
+            "story_id": story_id,
         }

From e75f12de483ea1ec3bddfa78e1ca47ea676cd020 Mon Sep 17 00:00:00 2001
From: itsmvd <mvd@google.com>
Date: Tue, 11 Mar 2025 14:43:03 +0000
Subject: [PATCH 55/63] latest changes to actions.py & llm_forensic_report

---
 timesketch/lib/llms/actions.py                | 32 ++------
 .../lib/llms/features/llm_forensic_report.py  | 82 ++++++++++---------
 2 files changed, 52 insertions(+), 62 deletions(-)

diff --git a/timesketch/lib/llms/actions.py b/timesketch/lib/llms/actions.py
index 98c14f7676..c007ff2b03 100644
--- a/timesketch/lib/llms/actions.py
+++ b/timesketch/lib/llms/actions.py
@@ -15,41 +15,27 @@
 import json
 import logging
 import time
-from typing import Dict, Any
 from timesketch.models import db_session
 from timesketch.models.sketch import Sketch, Story
 
 logger = logging.getLogger("timesketch.llm.actions")
 
-def create_story(
-    sketch: Sketch, 
-    content: str, 
-    title: str = None
-) -> int:
+
+def create_story(sketch: Sketch, content: str, title: str = None) -> int:
     """Creates a Timesketch story with the given content.
-    
     Args:
         sketch: Sketch object.
         content: Text content to add to the story.
         title: Title for the story. If None, a default title with timestamp will be used.
-        
     Returns:
         The ID of the newly created story.
-        
     Raises:
         ValueError: If there's an error creating the story.
     """
     if title is None:
         title = f"AI Generated Report - {time.strftime('%Y-%m-%d %H:%M')}"
-    
     try:
-        # Create the story
-        story = Story(
-            title=title,
-            sketch=sketch,
-            user=sketch.user
-        )
-        
+        story = Story(title=title, sketch=sketch, user=sketch.user)
         content_blocks = [
             {
                 "componentName": "",
@@ -57,18 +43,14 @@ def create_story(
                 "content": content,
                 "edit": False,
                 "showPanel": False,
-                "isActive": False
+                "isActive": False,
             }
         ]
-        
         story.content = json.dumps(content_blocks)
-        
         db_session.add(story)
         db_session.commit()
-        
-        logger.info(f"Created story with ID {story.id} for sketch {sketch.id}")
+        logger.debug("Created story with ID %s for sketch %s", story.id, sketch.id)
         return story.id
-        
     except Exception as e:
-        logger.error(f"Error creating story: {e}")
-        raise ValueError(f"Error creating story: {e}")
+        logger.error("Error creating story: %s", e)
+        raise ValueError(f"Error creating story: {e}") from e
diff --git a/timesketch/lib/llms/features/llm_forensic_report.py b/timesketch/lib/llms/features/llm_forensic_report.py
index a011366f42..197ac87eca 100644
--- a/timesketch/lib/llms/features/llm_forensic_report.py
+++ b/timesketch/lib/llms/features/llm_forensic_report.py
@@ -52,8 +52,10 @@
     ),
 }
 
+
 class LLMForensicReportFeature(LLMFeatureInterface):
     """LLM Forensic Report feature."""
+
     NAME = "llm_forensic_report"
     PROMPT_CONFIG_KEY = "PROMPT_LLM_FORENSIC_REPORT"
     RESPONSE_SCHEMA = {
@@ -82,7 +84,7 @@ def _get_prompt_text(self, events_dict: List[Dict[str, Any]]) -> str:
         if not prompt_file_path:
             logger.error("%s config not set", self.PROMPT_CONFIG_KEY)
             raise ValueError("LLM forensic report prompt path not configured.")
-        
+
         try:
             with open(prompt_file_path, "r", encoding="utf-8") as file_handle:
                 prompt_template = file_handle.read()
@@ -94,14 +96,14 @@ def _get_prompt_text(self, events_dict: List[Dict[str, Any]]) -> str:
         except IOError as e:
             logger.error("Error reading prompt file: %s", e)
             raise IOError("Error reading LLM prompt file.") from e
-            
+
         if "<EVENTS_JSON>" not in prompt_template:
             logger.error("Prompt template is missing the <EVENTS_JSON> placeholder")
             raise ValueError(
                 "LLM forensic report prompt template is missing the "
                 "required <EVENTS_JSON> placeholder."
             )
-            
+
         prompt_text = prompt_template.replace("<EVENTS_JSON>", json.dumps(events_dict))
         return prompt_text
 
@@ -129,27 +131,27 @@ def _run_timesketch_query(
         """
         if datastore is None:
             raise ValueError("Datastore must be provided.")
-            
+
         if not query_filter:
             query_filter = {}
-            
+
         if id_list:
             id_query = " OR ".join([f'_id:"{event_id}"' for event_id in id_list])
             query_string = id_query
-            
+
         all_indices = list({t.searchindex.index_name for t in sketch.timelines})
         indices_from_filter = query_filter.get("indices", all_indices)
-        
+
         if "_all" in indices_from_filter:
             indices_from_filter = all_indices
-            
+
         indices, timeline_ids = utils.get_validated_indices(indices_from_filter, sketch)
-        
+
         if not indices:
             raise ValueError(
                 "No valid search indices were found to perform the search on."
             )
-            
+
         result = datastore.search(
             sketch_id=sketch.id,
             query_string=query_string,
@@ -158,7 +160,7 @@ def _run_timesketch_query(
             indices=indices,
             timeline_ids=timeline_ids,
         )
-        
+
         return export.query_results_to_dataframe(result, sketch)
 
     def generate_prompt(self, sketch: Sketch, **kwargs: Any) -> str:
@@ -177,13 +179,13 @@ def generate_prompt(self, sketch: Sketch, **kwargs: Any) -> str:
         form = kwargs.get("form")
         datastore = kwargs.get("datastore")
         timeline_ids = kwargs.get("timeline_ids")
-        
+
         if not form:
             raise ValueError("Missing 'form' data in kwargs")
-            
+
         query_filter = form.get("filter", {})
         query_string = form.get("query", "*") or "*"
-        
+
         events_df = self._run_timesketch_query(
             sketch,
             query_string,
@@ -191,32 +193,34 @@ def generate_prompt(self, sketch: Sketch, **kwargs: Any) -> str:
             datastore=datastore,
             timeline_ids=timeline_ids,
         )
-        
+
         if events_df is None or events_df.empty:
             return "No events to analyze for forensic report."
-        
+
         events_df["datetime_str"] = events_df["datetime"].astype(str)
         events_df["combined_key"] = events_df["datetime_str"] + events_df["message"]
         unique_df = events_df.drop_duplicates(subset="combined_key", keep="first")
-        
-        events_dict = unique_df[["datetime_str", "message"]].rename(
-            columns={"datetime_str": "datetime"}
-        ).to_dict(orient="records")
-        
+
+        events_dict = (
+            unique_df[["datetime_str", "message"]]
+            .rename(columns={"datetime_str": "datetime"})
+            .to_dict(orient="records")
+        )
+
         total_events_count = len(events_df)
         unique_events_count = len(unique_df)
-        
+
         METRICS["llm_forensic_report_events_processed_total"].labels(
             sketch_id=str(sketch.id)
         ).inc(total_events_count)
-        
+
         METRICS["llm_forensic_report_unique_events_total"].labels(
             sketch_id=str(sketch.id)
         ).inc(unique_events_count)
-        
+
         if not events_dict:
             return "No events to analyze for forensic report."
-            
+
         return self._get_prompt_text(events_dict)
 
     def process_response(self, llm_response: Any, **kwargs: Any) -> Dict[str, Any]:
@@ -243,23 +247,23 @@ def process_response(self, llm_response: Any, **kwargs: Any) -> Dict[str, Any]:
         form = kwargs.get("form")
         datastore = kwargs.get("datastore")
         timeline_ids = kwargs.get("timeline_ids")
-        
+
         if not sketch:
             raise ValueError("Missing 'sketch' in kwargs")
-            
+
         if not form:
             raise ValueError("Missing 'form' data in kwargs")
-            
+
         if not isinstance(llm_response, dict):
             raise ValueError("LLM response is expected to be a dictionary")
-            
+
         summary_text = llm_response.get("summary")
         if summary_text is None:
             raise ValueError("LLM response missing 'summary' key")
-        
+
         query_filter = form.get("filter", {})
         query_string = form.get("query", "*") or "*"
-        
+
         events_df = self._run_timesketch_query(
             sketch,
             query_string,
@@ -267,12 +271,16 @@ def process_response(self, llm_response: Any, **kwargs: Any) -> Dict[str, Any]:
             datastore=datastore,
             timeline_ids=timeline_ids,
         )
-        
+
         total_events_count = len(events_df)
-        
-        events_df["combined_key"] = events_df["datetime"].astype(str) + events_df["message"]
-        unique_events_count = len(events_df.drop_duplicates(subset="combined_key", keep="first"))
-        
+
+        events_df["combined_key"] = (
+            events_df["datetime"].astype(str) + events_df["message"]
+        )
+        unique_events_count = len(
+            events_df.drop_duplicates(subset="combined_key", keep="first")
+        )
+
         try:
             story_title = f"Forensic Report - {time.strftime('%Y-%m-%d %H:%M')}"
             story_id = actions.create_story(
@@ -286,7 +294,7 @@ def process_response(self, llm_response: Any, **kwargs: Any) -> Dict[str, Any]:
             raise ValueError(
                 f"Error creating story to save forensic report: {e}"
             ) from e
-            
+
         return {
             "summary": summary_text,
             "summary_event_count": total_events_count,

From 79287762280f54acfff264686e12740316613409 Mon Sep 17 00:00:00 2001
From: itsmvd <mvd@google.com>
Date: Tue, 11 Mar 2025 14:43:47 +0000
Subject: [PATCH 56/63] fix debug totalHits

---
 timesketch/frontend-ng/src/components/Explore/EventList.vue | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/timesketch/frontend-ng/src/components/Explore/EventList.vue b/timesketch/frontend-ng/src/components/Explore/EventList.vue
index 571ba9c3e1..47805fb37f 100644
--- a/timesketch/frontend-ng/src/components/Explore/EventList.vue
+++ b/timesketch/frontend-ng/src/components/Explore/EventList.vue
@@ -987,7 +987,7 @@ export default {
         })
     },
     generateForensicReport() {
-      if (this.totalHits > 500) {
+      if (this.totalHits > 1000) {
         this.warningSnackBar('This feature is currently limited to a 1000 starred events, try setting a timerange filter. ' +
         'This limit will be increased soon.', 10000);
         return;

From 3ffb54bc51fafa78ec9821447d0aa21be2e26a62 Mon Sep 17 00:00:00 2001
From: itsmvd <mvd@google.com>
Date: Tue, 11 Mar 2025 14:48:46 +0000
Subject: [PATCH 57/63] linter fix

---
 timesketch/lib/llms/actions.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/timesketch/lib/llms/actions.py b/timesketch/lib/llms/actions.py
index c007ff2b03..5071b7a7c7 100644
--- a/timesketch/lib/llms/actions.py
+++ b/timesketch/lib/llms/actions.py
@@ -26,7 +26,8 @@ def create_story(sketch: Sketch, content: str, title: str = None) -> int:
     Args:
         sketch: Sketch object.
         content: Text content to add to the story.
-        title: Title for the story. If None, a default title with timestamp will be used.
+        title: Title for the story. If None, a default title
+            with timestamp will be used.
     Returns:
         The ID of the newly created story.
     Raises:

From f3065b9744a5ce188f778331c8f2b0f60350114b Mon Sep 17 00:00:00 2001
From: itsmvd <mvd@google.com>
Date: Tue, 11 Mar 2025 14:54:25 +0000
Subject: [PATCH 58/63] update prompt related settings

---
 .../prompt.txt}                                     |  3 +--
 data/timesketch.conf                                | 13 +++++++++++--
 docker/dev/build/docker-entrypoint.sh               |  1 +
 3 files changed, 13 insertions(+), 4 deletions(-)
 rename data/{llm_summarize/prompt_forensic_report.txt => llm_forensic_report/prompt.txt} (94%)

diff --git a/data/llm_summarize/prompt_forensic_report.txt b/data/llm_forensic_report/prompt.txt
similarity index 94%
rename from data/llm_summarize/prompt_forensic_report.txt
rename to data/llm_forensic_report/prompt.txt
index 8dcfa80962..f55489efde 100644
--- a/data/llm_summarize/prompt_forensic_report.txt
+++ b/data/llm_forensic_report/prompt.txt
@@ -10,5 +10,4 @@ Focus on identifying:
 
 Use bolding (**...**) for key entities and findings.  Format the output as a Markdown document.
 
-Here are the starred events in JSON format:
-<EVENTS_JSON>
\ No newline at end of file
+Here are the events in JSON format: <events><EVENTS_JSON></events>
\ No newline at end of file
diff --git a/data/timesketch.conf b/data/timesketch.conf
index 1df853cc12..464a7ba88b 100644
--- a/data/timesketch.conf
+++ b/data/timesketch.conf
@@ -380,11 +380,17 @@ LLM_PROVIDER_CONFIGS = {
         },
     },
     'llm_summarize': {
-        'aistudio': {
-            'model': 'gemini-2.0-flash-exp',
+        'vertexai': {
+            'model': 'gemini-2.0-flash-001',
             'project_id': '',
         },
     },
+    'llm_forensic_report': {
+        'aistudio': {
+            'model': 'gemini-2.0-flash-001',
+            'api_key': '',
+        },
+    },
     'default': {
         'ollama': {
             'server_url': 'http://ollama:11434',
@@ -401,3 +407,6 @@ EXAMPLES_NL2Q = '/etc/timesketch/nl2q/examples_nl2q'
 
 # LLM event summarization configuration
 PROMPT_LLM_SUMMARIZATION = '/etc/timesketch/llm_summarize/prompt.txt'
+
+# LLM starred events to forensic report configuration
+PROMPT_LLM_FORENSIC_REPORT = 'etc/timesketch/llm_forensic_report/prompt.txt'
\ No newline at end of file
diff --git a/docker/dev/build/docker-entrypoint.sh b/docker/dev/build/docker-entrypoint.sh
index 118b5a1154..a7222bc573 100755
--- a/docker/dev/build/docker-entrypoint.sh
+++ b/docker/dev/build/docker-entrypoint.sh
@@ -25,6 +25,7 @@ if [ "$1" = 'timesketch' ]; then
   ln -s /usr/local/src/timesketch/data/plaso_formatters.yaml /etc/timesketch/plaso_formatters.yaml
   ln -s /usr/local/src/timesketch/data/nl2q /etc/timesketch/
   ln -s /usr/local/src/timesketch/data/llm_summarize /etc/timesketch/
+  ln -s /usr/local/src/timesketch/data/llm_forensic_report /etc/timesketch/
 
   # Set SECRET_KEY in /etc/timesketch/timesketch.conf if it isn't already set
   if grep -q "SECRET_KEY = '<KEY_GOES_HERE>'" /etc/timesketch/timesketch.conf; then

From 26701bf2354c82cf59e766ea7f51687f53d1e677 Mon Sep 17 00:00:00 2001
From: itsmvd <mvd@google.com>
Date: Tue, 11 Mar 2025 14:55:14 +0000
Subject: [PATCH 59/63] conf fix

---
 data/timesketch.conf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/data/timesketch.conf b/data/timesketch.conf
index 464a7ba88b..0ae222cb9b 100644
--- a/data/timesketch.conf
+++ b/data/timesketch.conf
@@ -409,4 +409,4 @@ EXAMPLES_NL2Q = '/etc/timesketch/nl2q/examples_nl2q'
 PROMPT_LLM_SUMMARIZATION = '/etc/timesketch/llm_summarize/prompt.txt'
 
 # LLM starred events to forensic report configuration
-PROMPT_LLM_FORENSIC_REPORT = 'etc/timesketch/llm_forensic_report/prompt.txt'
\ No newline at end of file
+PROMPT_LLM_FORENSIC_REPORT = '/etc/timesketch/llm_forensic_report/prompt.txt'

From 3451d56e661244f129f518b05da530c6309f55ff Mon Sep 17 00:00:00 2001
From: itsmvd <mvd@google.com>
Date: Wed, 2 Apr 2025 18:09:16 +0000
Subject: [PATCH 60/63] update snackbar changes from other pr

---
 timesketch/frontend-ng/src/mixins/snackBar.js | 60 +++++++++----------
 timesketch/frontend-v3/src/mixins.js          | 51 ++++++++--------
 2 files changed, 53 insertions(+), 58 deletions(-)

diff --git a/timesketch/frontend-ng/src/mixins/snackBar.js b/timesketch/frontend-ng/src/mixins/snackBar.js
index efc021709c..211d2b0472 100644
--- a/timesketch/frontend-ng/src/mixins/snackBar.js
+++ b/timesketch/frontend-ng/src/mixins/snackBar.js
@@ -1,13 +1,9 @@
-
 /*
 Copyright 2022 Google Inc. All rights reserved.
-
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
-
     http://www.apache.org/licenses/LICENSE-2.0
-
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -15,44 +11,42 @@ See the License for the specific language governing permissions and
 limitations under the License.
 */
 import Vue from 'vue'
-
 const defaultTimeout = 5000
-const defaultSnackBar = {
-    "message": "",
-    "color": "info",
-    "timeout": defaultTimeout
-}
 
 // These methods will be available to all components without any further imports.
 Vue.mixin({
     methods: {
-        successSnackBar(message, timeout) {
-            let snackbar = defaultSnackBar
-            snackbar.message = message
-            snackbar.color = "success"
-            snackbar.timeout = timeout || defaultTimeout
+        successSnackBar(message, timeout = defaultTimeout) {
+            const snackbar = {
+                message: message,
+                color: "success",
+                timeout: timeout
+            }
+            this.$store.dispatch('setSnackBar', snackbar)
+        },
+        errorSnackBar(message, timeout = defaultTimeout) {
+            const snackbar = {
+                message: message,
+                color: "error",
+                timeout: timeout
+            }
             this.$store.dispatch('setSnackBar', snackbar)
         },
-        errorSnackBar(message, timeout) {
-            let snackbar = defaultSnackBar
-            snackbar.message = message
-            snackbar.color = "error"
-            snackbar.timeout = timeout || defaultTimeout
+        warningSnackBar(message, timeout = defaultTimeout) {
+            const snackbar = {
+                message: message,
+                color: "warning",
+                timeout: timeout
+            }
             this.$store.dispatch('setSnackBar', snackbar)
         },
-        warningSnackBar(message, timeout) {
-          let snackbar = defaultSnackBar
-          snackbar.message = message
-          snackbar.color = "warning"
-          snackbar.timeout = timeout || defaultTimeout
-          this.$store.dispatch('setSnackBar', snackbar)
+        infoSnackBar(message, timeout = 2000) {
+            const snackbar = {
+                message: message,
+                color: "info",
+                timeout: timeout
+            }
+            this.$store.dispatch('setSnackBar', snackbar)
         },
-        infoSnackBar(message, timeout) {
-          let snackbar = defaultSnackBar
-          snackbar.message = message
-          snackbar.color = "info"
-          snackbar.timeout = timeout || defaultTimeout
-          this.$store.dispatch('setSnackBar', snackbar)
-      },
     }
 })
diff --git a/timesketch/frontend-v3/src/mixins.js b/timesketch/frontend-v3/src/mixins.js
index f1f0d31328..5a1c8b53ca 100644
--- a/timesketch/frontend-v3/src/mixins.js
+++ b/timesketch/frontend-v3/src/mixins.js
@@ -1,39 +1,40 @@
-
 const defaultTimeout = 5000
-const defaultSnackBar = {
-    "message": "",
-    "color": "info",
-    "timeout": defaultTimeout
-}
 
 // These methods will be available to all components without any further imports.
 export const snackBarMixin = {
   methods: {
-    successSnackBar(message) {
-        let snackbar = defaultSnackBar
-        snackbar.message = message
-        snackbar.color = "success"
+    successSnackBar(message, timeout = defaultTimeout) {
+        const snackbar = {
+            message: message,
+            color: "success",
+            timeout: timeout
+        }
         console.log('success snack bar', message)
         this.appStore.setSnackBar(snackbar)
     },
-    errorSnackBar(message) {
-        let snackbar = defaultSnackBar
-        snackbar.message = message
-        snackbar.color = "error"
+    errorSnackBar(message, timeout = defaultTimeout) {
+        const snackbar = {
+            message: message,
+            color: "error",
+            timeout: timeout
+        }
         this.appStore.setSnackBar(snackbar)
     },
-    warningSnackBar(message) {
-      let snackbar = defaultSnackBar
-      snackbar.message = message
-      snackbar.color = "warning"
-      this.appStore.setSnackBar(snackbar)
+    warningSnackBar(message, timeout = defaultTimeout) {
+        const snackbar = {
+            message: message,
+            color: "warning",
+            timeout: timeout
+        }
+        this.appStore.setSnackBar(snackbar)
     },
-    infoSnackBar(message) {
-      let snackbar = defaultSnackBar
-      snackbar.message = message
-      snackbar.color = "info"
-      snackbar.timeout = 2000
-      this.appStore.setSnackBar(snackbar)
+    infoSnackBar(message, timeout = 2000) {
+        const snackbar = {
+            message: message,
+            color: "info",
+            timeout: timeout
+        }
+        this.appStore.setSnackBar(snackbar)
     },
   }
 }

From 082c8e2b3d1af7ffcbbcce80706a76f1e154cbfd Mon Sep 17 00:00:00 2001
From: itsmvd <mvd@google.com>
Date: Wed, 2 Apr 2025 18:43:04 +0000
Subject: [PATCH 61/63] address review comments

---
 .../lib/llms/features/llm_forensic_report.py     | 16 ++++++++--------
 .../lib/{llms/actions.py => stories/utils.py}    |  3 +--
 2 files changed, 9 insertions(+), 10 deletions(-)
 rename timesketch/lib/{llms/actions.py => stories/utils.py} (95%)

diff --git a/timesketch/lib/llms/features/llm_forensic_report.py b/timesketch/lib/llms/features/llm_forensic_report.py
index 197ac87eca..c2ec3eb5eb 100644
--- a/timesketch/lib/llms/features/llm_forensic_report.py
+++ b/timesketch/lib/llms/features/llm_forensic_report.py
@@ -15,7 +15,7 @@
 import json
 import logging
 import time
-from typing import Any, Dict, List, Optional
+from typing import Any, Optional
 
 import pandas as pd
 import prometheus_client
@@ -25,7 +25,7 @@
 from timesketch.lib import utils
 from timesketch.api.v1 import export
 from timesketch.models.sketch import Sketch
-from timesketch.lib.llms import actions
+from timesketch.lib.stories import utils as story_utils
 from timesketch.lib.definitions import METRICS_NAMESPACE
 from timesketch.lib.llms.features.interface import LLMFeatureInterface
 
@@ -69,7 +69,7 @@ class LLMForensicReportFeature(LLMFeatureInterface):
         "required": ["summary"],
     }
 
-    def _get_prompt_text(self, events_dict: List[Dict[str, Any]]) -> str:
+    def _get_prompt_text(self, events_dict: list[dict[str, Any]]) -> str:
         """Reads the prompt template from file and injects events.
         Args:
             events_dict: List of event dictionaries to inject into prompt.
@@ -111,10 +111,10 @@ def _run_timesketch_query(
         self,
         sketch: Sketch,
         query_string: str = "*",
-        query_filter: Optional[Dict] = None,
-        id_list: Optional[List] = None,
+        query_filter: Optional[dict] = None,
+        id_list: Optional[list] = None,
         datastore: Optional[OpenSearch] = None,
-        timeline_ids: Optional[List] = None,
+        timeline_ids: Optional[list] = None,
     ) -> pd.DataFrame:
         """Runs a timesketch query and returns results as a DataFrame.
         Args:
@@ -223,7 +223,7 @@ def generate_prompt(self, sketch: Sketch, **kwargs: Any) -> str:
 
         return self._get_prompt_text(events_dict)
 
-    def process_response(self, llm_response: Any, **kwargs: Any) -> Dict[str, Any]:
+    def process_response(self, llm_response: Any, **kwargs: Any) -> dict[str, Any]:
         """Processes the LLM response and creates a Story in the sketch.
         Args:
             llm_response: The response from the LLM model, expected to be a dictionary.
@@ -283,7 +283,7 @@ def process_response(self, llm_response: Any, **kwargs: Any) -> Dict[str, Any]:
 
         try:
             story_title = f"Forensic Report - {time.strftime('%Y-%m-%d %H:%M')}"
-            story_id = actions.create_story(
+            story_id = story_utils.create_story(
                 sketch=sketch, content=summary_text, title=story_title
             )
             METRICS["llm_forensic_report_stories_created_total"].labels(
diff --git a/timesketch/lib/llms/actions.py b/timesketch/lib/stories/utils.py
similarity index 95%
rename from timesketch/lib/llms/actions.py
rename to timesketch/lib/stories/utils.py
index 5071b7a7c7..f5db016771 100644
--- a/timesketch/lib/llms/actions.py
+++ b/timesketch/lib/stories/utils.py
@@ -18,7 +18,7 @@
 from timesketch.models import db_session
 from timesketch.models.sketch import Sketch, Story
 
-logger = logging.getLogger("timesketch.llm.actions")
+logger = logging.getLogger("timesketch.lib.stories.utils")
 
 
 def create_story(sketch: Sketch, content: str, title: str = None) -> int:
@@ -53,5 +53,4 @@ def create_story(sketch: Sketch, content: str, title: str = None) -> int:
         logger.debug("Created story with ID %s for sketch %s", story.id, sketch.id)
         return story.id
     except Exception as e:
-        logger.error("Error creating story: %s", e)
         raise ValueError(f"Error creating story: {e}") from e

From e8c2067c6b4ebeac04481eccd2f869acdb0615d2 Mon Sep 17 00:00:00 2001
From: itsmvd <mvd@google.com>
Date: Wed, 2 Apr 2025 18:46:51 +0000
Subject: [PATCH 62/63] update doc string

---
 timesketch/lib/llms/features/llm_forensic_report.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/timesketch/lib/llms/features/llm_forensic_report.py b/timesketch/lib/llms/features/llm_forensic_report.py
index c2ec3eb5eb..bdf3d38ba8 100644
--- a/timesketch/lib/llms/features/llm_forensic_report.py
+++ b/timesketch/lib/llms/features/llm_forensic_report.py
@@ -79,6 +79,7 @@ def _get_prompt_text(self, events_dict: list[dict[str, Any]]) -> str:
             ValueError: If the prompt path is not configured or placeholder is missing.
             FileNotFoundError: If the prompt file cannot be found.
             IOError: If there's an error reading the prompt file.
+            OSError: If there's an error reading the prompt file.
         """
         prompt_file_path = current_app.config.get(self.PROMPT_CONFIG_KEY)
         if not prompt_file_path:

From 085caa5b48df35ed91d44f69f572735862a8d93e Mon Sep 17 00:00:00 2001
From: itsmvd <70900075+itsmvd@users.noreply.github.com>
Date: Fri, 2 May 2025 11:44:51 +0200
Subject: [PATCH 63/63] Update
 timesketch/lib/llms/features/llm_forensic_report.py

Co-authored-by: Janosch <99879757+jkppr@users.noreply.github.com>
---
 timesketch/lib/llms/features/llm_forensic_report.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/timesketch/lib/llms/features/llm_forensic_report.py b/timesketch/lib/llms/features/llm_forensic_report.py
index bdf3d38ba8..9164ac7bde 100644
--- a/timesketch/lib/llms/features/llm_forensic_report.py
+++ b/timesketch/lib/llms/features/llm_forensic_report.py
@@ -140,11 +140,11 @@ def _run_timesketch_query(
             id_query = " OR ".join([f'_id:"{event_id}"' for event_id in id_list])
             query_string = id_query
 
-        all_indices = list({t.searchindex.index_name for t in sketch.timelines})
-        indices_from_filter = query_filter.get("indices", all_indices)
+        all_timeline_ids = [t.id for t in sketch.timelines]
+        indices = query_filter.get("indices", all_timeline_ids)
 
         if "_all" in indices_from_filter:
-            indices_from_filter = all_indices
+            indices_from_filter = all_timeline_ids
 
         indices, timeline_ids = utils.get_validated_indices(indices_from_filter, sketch)