update default workflow resolution to 384x704, warmup to default workflow resolution

eliteprox · eliteprox · commit 290eeee00f4b · 2025-05-21T14:00:10.000Z
diff --git a/runner/app/live/pipelines/comfyui.py b/runner/app/live/pipelines/comfyui.py
@@ -51,6 +51,49 @@ def validate_prompt(cls, v) -> dict:
         raise ValueError("Prompt must be either a JSON object or such JSON object serialized as a string")
 
 
+class ComfyUtils:
+    @staticmethod
+    def get_latent_image_dimensions(workflow: dict) -> tuple[int, int]:
+        """Get dimensions from the EmptyLatentImage node in the workflow.
+        
+        Args:
+            workflow: The workflow JSON dictionary
+            
+        Returns:
+            Tuple of (width, height) from the latent image, or (None, None) if not found
+        """
+        for node_id, node in workflow.items():
+            if node.get("class_type") == "EmptyLatentImage":
+                try:
+                    inputs = node.get("inputs", {})
+                    return inputs.get("width"), inputs.get("height")
+                except Exception as e:
+                    logging.warning(f"Failed to extract dimensions from latent image: {e}")
+                    return None, None
+        return None, None
+
+    @staticmethod
+    def update_latent_image_dimensions(workflow: dict, width: int, height: int) -> dict | None:
+        """Update the EmptyLatentImage node dimensions in the workflow.
+        
+        Args:
+            workflow: The workflow JSON dictionary
+            width: Width to set
+            height: Height to set
+        """
+        for node_id, node in workflow.items():
+            if node.get("class_type") == "EmptyLatentImage":
+                try:
+                    if "inputs" not in node:
+                        node["inputs"] = {}
+                    node["inputs"]["width"] = width
+                    node["inputs"]["height"] = height
+                    logging.info(f"Updated latent image dimensions to {width}x{height}")
+                except Exception as e:
+                    logging.warning(f"Failed to update latent image dimensions: {e}")
+                break
+
+
 class ComfyUI(Pipeline):
     def __init__(self):
         comfy_ui_workspace = os.getenv(COMFY_UI_WORKSPACE_ENV)
@@ -61,14 +104,30 @@ def __init__(self):
     async def initialize(self, **params):
         new_params = ComfyUIParams(**params)
         logging.info(f"Initializing ComfyUI Pipeline with prompt: {new_params.prompt}")
-        # TODO: currently its a single prompt, but need to support multiple prompts
+        
+        # Get dimensions from workflow if it's a dict
+        
+        if width is None or height is None:
+            if isinstance(new_params.prompt, dict):
+                # If dimensions not provided in params, get them from latent image
+                latent_width, latent_height = ComfyUtils.get_latent_image_dimensions(new_params.prompt)
+                new_params.width = width or latent_width or new_params.width
+                new_params.height = height or latent_height or new_params.height
+            else:
+                # If dimensions provided in params, update the latent image
+                ComfyUtils.update_latent_image_dimensions(new_params.prompt, width, height)
+
+        # TODO clean up extra vars
+        width = width or new_params.width
+        height = height or new_params.height
+    
         await self.client.set_prompts([new_params.prompt])
         self.params = new_params
 
-        # Warm up the pipeline
-        logging.info(f"Warming up pipeline with dimensions: {new_params.width}x{new_params.height}")
+        # Warm up the pipeline with the final dimensions
+        logging.info(f"Warming up pipeline with dimensions: {width}x{height}")
         dummy_frame = VideoFrame(None, 0, 0)
-        dummy_frame.side_data.input = torch.randn(1, new_params.height, new_params.width, 3)
+        dummy_frame.side_data.input = torch.randn(1, height, width, 3)
 
         for _ in range(WARMUP_RUNS):
             self.client.put_video_input(dummy_frame)
diff --git a/runner/app/live/pipelines/comfyui_default_workflow.json b/runner/app/live/pipelines/comfyui_default_workflow.json
@@ -23,7 +23,7 @@
   },
   "3": {
     "inputs": {
-      "unet_name": "dynamic-dreamshaper8_SD15_dyn-b-1-4-2-h-448-704-512-w-448-704-512_00001_.engine",
+      "unet_name": "static-dreamshaper8_SD15_$stat-b-1-h-384-w-704_00001_.engine",
       "model_type": "SD15"
     },
     "class_type": "TensorRTLoader",
@@ -194,7 +194,7 @@
   },
   "16": {
     "inputs": {
-      "width": 448,
+      "width": 384,
       "height": 704,
       "batch_size": 1
     },
diff --git a/runner/app/live/streamer/protocol/trickle.py b/runner/app/live/streamer/protocol/trickle.py
@@ -23,26 +23,31 @@ def __init__(self, subscribe_url: str, publish_url: str, control_url: Optional[s
         self.events_publisher = None
         self.subscribe_task = None
         self.publish_task = None
-        self.output_width = 512
-        self.output_height = 512
 
     async def start(self, params: dict = None):
         self.subscribe_queue = queue.Queue[InputFrame]()
         self.publish_queue = queue.Queue[OutputFrame]()
         metadata_cache = LastValueCache[dict]() # to pass video metadata from decoder to encoder
-        
-        # Get resolution from params if available
-        if params:
-            self.output_width = params.get('width', self.output_width)
-            self.output_height = params.get('height', self.output_height)
+
+        #TODO fix this default value issue
+        output_width = params.get('width', 512)
+        output_height = params.get('height', 512)
         
         self.subscribe_task = asyncio.create_task(
-            media.run_subscribe(self.subscribe_url, self.subscribe_queue.put, metadata_cache.put, self.emit_monitoring_event, 
-                              output_width=self.output_width, output_height=self.output_height)
+            media.run_subscribe(self.subscribe_url, 
+                                self.subscribe_queue.put, 
+                                metadata_cache.put, 
+                                self.emit_monitoring_event,
+                                output_width, 
+                                output_height)
         )
         self.publish_task = asyncio.create_task(
-            media.run_publish(self.publish_url, self.publish_queue.get, metadata_cache.get, self.emit_monitoring_event, 
-                            output_width=self.output_width, output_height=self.output_height)
+            media.run_publish(self.publish_url, 
+                              self.publish_queue.get, 
+                              metadata_cache.get, 
+                              self.emit_monitoring_event, 
+                              output_width, 
+                              output_height)
         )
         if self.control_url and self.control_url.strip() != "":
             self.control_subscriber = TrickleSubscriber(self.control_url)
diff --git a/runner/app/live/streamer/streamer.py b/runner/app/live/streamer/streamer.py
@@ -48,6 +48,9 @@ async def start(self, params: dict):
             self.request_id, self.stream_id, params, self
         )
 
+        params['width'] = params.get('width', self.output_width)
+        params['height'] = params.get('height', self.output_height)
+
         self.stop_event.clear()
         await self.protocol.start(params)
 
@@ -180,24 +183,16 @@ async def run_ingress_loop(self):
             if frame.mode != "RGBA":
                 frame = frame.convert("RGBA")
 
-            # Scale image to 512x512 as most models expect this size, especially when using tensorrt
+            target_width = self.output_width
+            target_height = self.output_width
+
+            # # Scale image to target size
             width, height = frame.size
-            if (width, height) != (512, 512):
+            if (width, height) != (target_width, target_height):
                 frame_array = np.array(frame)
 
-                # Crop to the center square if image not already square
-                square_size = min(width, height)
-                if width != height:
-                    start_x = width // 2 - square_size // 2
-                    start_y = height // 2 - square_size // 2
-                    frame_array = frame_array[
-                        start_y : start_y + square_size, start_x : start_x + square_size
-                    ]
-
                 # Resize using cv2 (much faster than PIL)
-                if square_size != 512:
-                    frame_array = cv2.resize(frame_array, (512, 512))
-
+                frame_array = cv2.resize(frame_array, (target_width, target_height))
                 frame = Image.fromarray(frame_array)
 
             logging.debug(
diff --git a/runner/app/live/trickle/decoder.py b/runner/app/live/trickle/decoder.py
@@ -9,15 +9,15 @@
 
 MAX_FRAMERATE=24
 
-def decode_av(pipe_input, frame_callback, put_metadata, output_width=512, output_height=512):
+def decode_av(pipe_input, frame_callback, put_metadata, output_width, output_height):
     """
     Reads from a pipe (or file-like object).
 
     :param pipe_input: File path, 'pipe:', sys.stdin, or another file-like object.
     :param frame_callback: A function that accepts an InputFrame object
     :param put_metadata: A function that accepts audio/video metadata
-    :param output_width: Desired output width (default: 512)
-    :param output_height: Desired output height (default: 512)
+    :param output_width: Desired output width
+    :param output_height: Desired output height
     """
     container = cast(InputContainer, av.open(pipe_input, 'r'))
 
diff --git a/runner/app/live/trickle/media.py b/runner/app/live/trickle/media.py
@@ -16,7 +16,7 @@
 MAX_ENCODER_RETRIES = 3
 ENCODER_RETRY_RESET_SECONDS = 120 # reset retry counter after 2 minutes
 
-async def run_subscribe(subscribe_url: str, image_callback, put_metadata, monitoring_callback, output_width=512, output_height=512):
+async def run_subscribe(subscribe_url: str, image_callback, put_metadata, monitoring_callback, output_width, output_height):
     # TODO add some pre-processing parameters, eg image size
     try:
         in_pipe, out_pipe = os.pipe()
@@ -112,7 +112,7 @@ def decode_runner():
     loop = asyncio.get_running_loop()
     await loop.run_in_executor(None, decode_runner)
 
-def encode_in(task_pipes, task_lock, image_generator, sync_callback, get_metadata, output_width=512, output_height=512, **kwargs):
+def encode_in(task_pipes, task_lock, image_generator, sync_callback, get_metadata, output_width, output_height, **kwargs):
     # encode_av has a tendency to crash, so restart as necessary
     retryCount = 0
     last_retry_time = time.time()
@@ -146,7 +146,7 @@ def encode_in(task_pipes, task_lock, image_generator, sync_callback, get_metadat
                         logging.exception("Error closing pipe on task list", stack_info=True)
             logging.info(f"Closed pipes - {pipe_count}/{total_pipes}")
 
-async def run_publish(publish_url: str, image_generator, get_metadata, monitoring_callback, output_width=512, output_height=512):
+async def run_publish(publish_url: str, image_generator, get_metadata, monitoring_callback, output_width, output_height):
     first_segment = True
 
     publisher = None