openmpf · eric-mccann-pro · Dec 23, 2025 · Dec 24, 2025 · Dec 24, 2025 · Jan 15, 2026
diff --git a/docker-compose.components.yml b/docker-compose.components.yml
@@ -114,7 +114,6 @@ services:
       # Optionally, limit the GPUs exposed to the server.
       # At least one GPU is required to run the YOLO TensorRT engine.
       - NVIDIA_VISIBLE_DEVICES=all
-
     command: [tritonserver,
               --model-repository=/models,
               --strict-model-config=false,
@@ -221,6 +220,25 @@ services:
     image: ${REGISTRY}openmpf_ortools_subject_component:${TAG}
     build: ${OPENMPF_PROJECTS_PATH}/openmpf-components/python/OrToolsSubjectComponent
 
+  llm-speech-summarization:
+    <<: *component-base
+    image: ${REGISTRY}openmpf_llm_speech_summarization:${TAG}
+    build: ${OPENMPF_PROJECTS_PATH}/openmpf-components/python/LlmSpeechSummarization
+
+  llm-speech-summarization-server:
+    image: ${REGISTRY}openmpf_llm_speech_summarization_server:${TAG}
+    build:
+      context: ${OPENMPF_PROJECTS_PATH}/openmpf-components/python/LlmSpeechSummarization
+      dockerfile: Dockerfile.vllm
+    deploy:
+      mode: global
+      # resources:
+      #   reservations:
+      #     devices:
+      #       - driver: nvidia
+      #         device_ids: ['0']
+      #         capabilities: [gpu]
+
   scene-change-detection:
     <<: *component-base
     image: ${REGISTRY}openmpf_scene_change_detection:${TAG}