openmpf · eric-mccann-pro · Dec 12, 2025 · Dec 12, 2025 · Dec 12, 2025 · Dec 12, 2025
diff --git a/python/LlmSpeechSummarization/Dockerfile b/python/LlmSpeechSummarization/Dockerfile
@@ -0,0 +1,52 @@
+# syntax=docker/dockerfile:1.2
+
+#############################################################################
+# NOTICE                                                                    #
+#                                                                           #
+# This software (or technical data) was produced for the U.S. Government    #
+# under contract, and is subject to the Rights in Data-General Clause       #
+# 52.227-14, Alt. IV (DEC 2007).                                            #
+#                                                                           #
+# Copyright 2025 The MITRE Corporation. All Rights Reserved.                #
+#############################################################################
+
+#############################################################################
+# Copyright 2025 The MITRE Corporation                                      #
+#                                                                           #
+# Licensed under the Apache License, Version 2.0 (the "License");           #
+# you may not use this file except in compliance with the License.          #
+# You may obtain a copy of the License at                                   #
+#                                                                           #
+#    http://www.apache.org/licenses/LICENSE-2.0                             #
+#                                                                           #
+# Unless required by applicable law or agreed to in writing, software       #
+# distributed under the License is distributed on an "AS IS" BASIS,         #
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  #
+# See the License for the specific language governing permissions and       #
+# limitations under the License.                                            #
+#############################################################################
+
+ARG BUILD_REGISTRY
+ARG BUILD_TAG=latest
+FROM ${BUILD_REGISTRY}openmpf_python_executor_ssb:${BUILD_TAG}
+
+ARG RUN_TESTS=false
+RUN set -x; DEPS="transformers>=4.51.0 accelerate==1.12.0 pydantic==2.12.5 openai==2.16.0 jinja2"; \
+    if [ "${RUN_TESTS,,}" == true ]; then DEPS="$DEPS pytest"; fi; \
+    pip3 install --no-cache-dir $DEPS
+
+ARG VLLM_MODEL="Qwen/Qwen3-30B-A3B-Instruct-2507-FP8"
+ARG TOKENIZER_MODELS="${VLLM_MODEL} openai/gpt-oss-120b"
+
+RUN --mount=target=.,readwrite \
+    install-component.sh; \
+    # make sure the tokenizers are available offline
+    for tokenizer_model in ${TOKENIZER_MODELS}; do /opt/mpf/plugin-venv/bin/python3 -c "from transformers import AutoTokenizer; AutoTokenizer.from_pretrained(\"${tokenizer_model}\")"; done; \
+    if [ "${RUN_TESTS,,}" == true ]; then pytest llm_speech_summarization_component; fi
+
+LABEL org.label-schema.license="Apache 2.0" \
+      org.label-schema.name="OpenMPF LLM Speech Summarization" \
+      org.label-schema.schema-version="1.0" \
+      org.label-schema.url="https://openmpf.github.io" \
+      org.label-schema.vcs-url="https://github.com/openmpf/openmpf-components" \
+      org.label-schema.vendor="MITRE"
diff --git a/python/LlmSpeechSummarization/Dockerfile.vllm b/python/LlmSpeechSummarization/Dockerfile.vllm
@@ -0,0 +1,58 @@
+#############################################################################
+# NOTICE                                                                    #
+#                                                                           #
+# This software (or technical data) was produced for the U.S. Government    #
+# under contract, and is subject to the Rights in Data-General Clause       #
+# 52.227-14, Alt. IV (DEC 2007).                                            #
+#                                                                           #
+# Copyright 2025 The MITRE Corporation. All Rights Reserved.                #
+#############################################################################
+
+#############################################################################
+# Copyright 2025 The MITRE Corporation                                      #
+#                                                                           #
+# Licensed under the Apache License, Version 2.0 (the "License");           #
+# you may not use this file except in compliance with the License.          #
+# You may obtain a copy of the License at                                   #
+#                                                                           #
+#    http://www.apache.org/licenses/LICENSE-2.0                             #
+#                                                                           #
+# Unless required by applicable law or agreed to in writing, software       #
+# distributed under the License is distributed on an "AS IS" BASIS,         #
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  #
+# See the License for the specific language governing permissions and       #
+# limitations under the License.                                            #
+#############################################################################
+
+FROM ubuntu:20.04 AS download_model
+
+RUN --mount=type=tmpfs,target=/var/cache/apt \
+    --mount=type=tmpfs,target=/var/lib/apt/lists  \
+    --mount=type=tmpfs,target=/tmp \
+    apt-get update && apt-get install --no-install-recommends -y curl ca-certificates python3-venv python3-pip python3-certifi python3-urllib3 && \
+    pip install huggingface_hub[cli]
+
+ARG VLLM_MODEL="Qwen/Qwen3-30B-A3B-Instruct-2507-FP8"
+ENV VLLM_MODEL="${VLLM_MODEL}"
+RUN HF_HUB_DISABLE_XET=1 hf download ${VLLM_MODEL}
+
+
+FROM vllm/vllm-openai:v0.15.0
+ARG VLLM_MODEL="Qwen/Qwen3-30B-A3B-Instruct-2507-FP8"
+ENV VLLM_MODEL="${VLLM_MODEL}"
+
+USER root
+RUN mkdir -p /root/.cache
+COPY --chown=root:root --from=download_model /root/.cache/huggingface /root/.cache/huggingface
+
+# default value
+ENV MAX_MODEL_LEN=45000
+
+COPY --chown=root:root vllm-entrypoint.sh /usr/bin/
+
+ENTRYPOINT ["/usr/bin/vllm-entrypoint.sh"]
+
+CMD [ \
+    "--host", "0.0.0.0",\
+    "--port", "11434"\
+    ]
diff --git a/python/LlmSpeechSummarization/README.md b/python/LlmSpeechSummarization/README.md
@@ -0,0 +1,58 @@
+# Overview
+
+LlmSpeechSummarization component uses a vllm-served LLM model (Qwen3 by default) to summarize FeedForward video tracks' speech detections.
+
+# Details
+
+This folder contains source code for the OpenMPF LLM Speech Summarization Component.
+
+This component requires a base image python3.10+ and an mpf_component_api that supports mpf.AllVideoTracksJob.
+
+We have tested Qwen/Qwen3-30B-A3B-Instruct-2507 on an 80GB card and Qwen/Qwen3-30B-A3B-Instruct-2507-FP8 on a 40GB card. Both seem quite viable.
+
+If you are daring, any openai-compatible API could be substituted for VLLM and any model could replace Qwen3-30B BUT these scenarios are untested
+and your mileage may vary.
+
+In either case, the component assumes anonymous access to the openai-api-compatible endpoint that performs the summarization.
+
+# Inputs
+
+- classifiers.json: contains a definition of subjects of interest to score with a low 0-1 confidence if the input DOES NOT include the defined classifier OR high if it does
+
+```json
+[
+    {
+        "Classifier": "Major League Baseball",
+        "Definition": "discussions regarding major league baseball teams, professional baseball players, and baseball stadiums",
+        "Items of Interest": "Baseball fields, baseball teams, baseball players, baseballs, baseball bats, baseball hats"
+    }
+]
+```
+
+# Properties
+
+- `CLASSIFIERS_FILE`: when set to an absolute path (with a valid classifiers.json in a volume mounted such that the file is at the specified path), will replace the default classifiers.json
+- `CLASSIFIERS_LIST`: Either "ALL", or a comma-separated list of specific names of the "Classifier" fields of defined classifiers
+- `PROMPT_TEMPLATE`: if set, will replace the packaged `templates/prompt.jinja` with one read from this location. Must include self-recursive summarization instructions and the jinja templates `{{ classifiers }}` and `{{ input }}`.
+
+# Docker build-args
+
+- `VLLM_MODEL`: if building Dockerfile.vllm for vllm (which downloads the model during docker build), this is the ONLY model that your llm_speech_summarization_component will be able to use.
+
+NOTE: if you have an internet connection at runtime, you may use the image `vllm/vllm-openai:latest` directly in lieu of building Dockerfile.vllm. We do not support this arrangement BUT it is possible with the right command on the docker service.
+
+# Environment variables
+
+- `VLLM_MODEL`: must MATCH the model name being served by vllm OR be available at whichver openai-api-compatible API you choose to talk to.
+- `VLLM_URI`: the base_url of the openai-api-compatible API providing access to your model. If your vllm service is named vllm, then this would need to be `http://vllm:11434/v1`.
+- `MAX_MODEL_LEN` should be defined on both the llm-speech-summarization container AND the llm-speech-summarization-server (VLLM) container. It is the maximum input+output token count you can use without erroring. We have tried 45000 for the -FP8 model and 120000 for the nonquantized model on a 40GB and 80GB card, respectively.
+- `INPUT_TOKEN_CHUNK_SIZE` should be about 20%-30% of your `MAX_MODEL_LEN`, and is the token size that your input will be split into during chunking before making a series of calls to the LLM.
+- `INPUT_CHUNK_TOKEN_OVERLAP` should be small and constant. If it is too small, there will be no overlap between chunks, which could negatively impact performance with huge input tracks.
+
+# Outputs
+
+A list of mpf.VideoTracks or mpf.AudioTracks (once supported).
+
+Track[0] will always contain the overall summary of the input, including primary/other topics and entities.
+
+Track[1-n] will be the confidences, reasoning, and name for each of the intersection of enabled classifiers AND classifiers defined in classifiers.json.
diff --git a/python/LlmSpeechSummarization/llm_speech_summarization_component/__init__.py b/python/LlmSpeechSummarization/llm_speech_summarization_component/__init__.py
@@ -0,0 +1,33 @@
+#############################################################################
+# NOTICE                                                                    #
+#                                                                           #
+# This software (or technical data) was produced for the U.S. Government    #
+# under contract, and is subject to the Rights in Data-General Clause       #
+# 52.227-14, Alt. IV (DEC 2007).                                            #
+#                                                                           #
+# Copyright 2025 The MITRE Corporation. All Rights Reserved.                #
+#############################################################################
+
+#############################################################################
+# Copyright 2025 The MITRE Corporation                                      #
+#                                                                           #
+# Licensed under the Apache License, Version 2.0 (the "License");           #
+# you may not use this file except in compliance with the License.          #
+# You may obtain a copy of the License at                                   #
+#                                                                           #
+#    http://www.apache.org/licenses/LICENSE-2.0                             #
+#                                                                           #
+# Unless required by applicable law or agreed to in writing, software       #
+# distributed under the License is distributed on an "AS IS" BASIS,         #
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  #
+# See the License for the specific language governing permissions and       #
+# limitations under the License.                                            #
+#############################################################################
+
+from .llm_speech_summarization_component import LlmSpeechSummaryComponent, JobConfig
+
+from .schema import StructuredResponseClassFactory
+
+from .llm_util.classifiers import get_classifier_lines, get_classifier_dict
+from .llm_util.slapchop import split_csv_into_chunks, summarize_summaries, BOUNDARY_TOKEN_FOR_COUNTING
+from .llm_util.input_cleanup import convert_speech_tracks_to_csv
diff --git a/python/LlmSpeechSummarization/llm_speech_summarization_component/classifiers.json b/python/LlmSpeechSummarization/llm_speech_summarization_component/classifiers.json
@@ -0,0 +1,7 @@
+[
+    {
+        "Classifier": "Major League Baseball",
+        "Definition": "discussions regarding major league baseball teams, professional baseball players, and baseball stadiums",
+        "Items of Interest": "Baseball fields, baseball teams, baseball players, baseballs, baseball bats, baseball hats"
+    }
+]