huggingface · drbh · Nov 11, 2024 · Nov 13, 2024 · Nov 13, 2024 · Nov 14, 2024
diff --git a/.github/workflows/autodocs.yaml b/.github/workflows/autodocs.yaml
@@ -20,7 +20,7 @@ jobs:
     - name: Install Protocol Buffers compiler
       run: |
         sudo apt-get update
-        sudo apt-get install -y protobuf-compiler libprotobuf-dev
+        sudo apt-get install -y protobuf-compiler libprotobuf-dev clang libavcodec-dev libavfilter-dev libavdevice-dev libavformat-dev libavutil-dev pkg-config
 
     - name: Install Launcher
       id: install-launcher

diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml
@@ -43,7 +43,9 @@ jobs:
       - name: Install
         run: |
           sudo apt update
-          sudo apt install python3.11-dev -y
+          sudo apt install python3.11-dev python3.11-venv python3-pip clang libavcodec-dev libavfilter-dev libavdevice-dev libavformat-dev libavutil-dev pkg-config -y
+          export PKG_CONFIG_PATH=$PKG_CONFIG_PATH:/usr/lib/x86_64-linux-gnu/pkgconfig
+          python -m pip install --upgrade pip
           make install-cpu
       - name: Run server tests
         run: |

diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Dockerfile b/Dockerfile
@@ -20,14 +20,28 @@ FROM chef AS builder
 
 RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
     python3.11-dev
+
+RUN apt-get update && apt-get install -y \
+    ffmpeg \
+    libavcodec-dev \
+    libavfilter-dev \
+    libavdevice-dev \
+    libavformat-dev \
+    libavutil-dev \
+    libswscale-dev \
+    pkg-config \
+    libclang-dev \
+    clang \
+    && rm -rf /var/lib/apt/lists/*
+
 RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \
     curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \
     unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \
     unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && \
     rm -f $PROTOC_ZIP
 
 COPY --from=planner /usr/src/recipe.json recipe.json
-RUN cargo chef cook --profile release-opt --recipe-path recipe.json
+RUN cargo chef cook --profile release-opt --features video --recipe-path recipe.json
 
 ARG GIT_SHA
 ARG DOCKER_LABEL
@@ -40,7 +54,7 @@ COPY benchmark benchmark
 COPY router router
 COPY backends backends
 COPY launcher launcher
-RUN cargo build --profile release-opt --frozen
+RUN cargo build --profile release-opt --frozen --features video
 
 # Python builder
 # Adapted from: https://github.com/pytorch/pytorch/blob/master/Dockerfile
@@ -61,18 +75,18 @@ ARG TARGETPLATFORM
 ENV PATH /opt/conda/bin:$PATH
 
 RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
-        build-essential \
-        ca-certificates \
-        ccache \
-        curl \
-        git && \
-        rm -rf /var/lib/apt/lists/*
+    build-essential \
+    ca-certificates \
+    ccache \
+    curl \
+    git && \
+    rm -rf /var/lib/apt/lists/*
 
 # Install conda
 # translating Docker's TARGETPLATFORM into mamba arches
 RUN case ${TARGETPLATFORM} in \
-         "linux/arm64")  MAMBA_ARCH=aarch64  ;; \
-         *)              MAMBA_ARCH=x86_64   ;; \
+    "linux/arm64")  MAMBA_ARCH=aarch64  ;; \
+    *)              MAMBA_ARCH=x86_64   ;; \
     esac && \
     curl -fsSL -v -o ~/mambaforge.sh -O  "https://github.com/conda-forge/miniforge/releases/download/${MAMBA_VERSION}/Mambaforge-${MAMBA_VERSION}-Linux-${MAMBA_ARCH}.sh"
 RUN chmod +x ~/mambaforge.sh && \
@@ -82,21 +96,24 @@ RUN chmod +x ~/mambaforge.sh && \
 # Install pytorch
 # On arm64 we exit with an error code
 RUN case ${TARGETPLATFORM} in \
-         "linux/arm64")  exit 1 ;; \
-         *)              /opt/conda/bin/conda update -y conda &&  \
-                         /opt/conda/bin/conda install -c "${INSTALL_CHANNEL}" -c "${CUDA_CHANNEL}" -y "python=${PYTHON_VERSION}" "pytorch=$PYTORCH_VERSION" "pytorch-cuda=$(echo $CUDA_VERSION | cut -d'.' -f 1-2)"  ;; \
+    "linux/arm64")  exit 1 ;; \
+    *)              /opt/conda/bin/conda update -y conda &&  \
+    /opt/conda/bin/conda install -c "${INSTALL_CHANNEL}" -c "${CUDA_CHANNEL}" -y "python=${PYTHON_VERSION}" "pytorch=$PYTORCH_VERSION" "pytorch-cuda=$(echo $CUDA_VERSION | cut -d'.' -f 1-2)" "openssl>=3.3.0" ;; \
     esac && \
     /opt/conda/bin/conda clean -ya
 
+RUN /opt/conda/bin/conda install -y pyOpenSSL
+
+
 # CUDA kernels builder image
 FROM pytorch-install AS kernel-builder
 
 ARG MAX_JOBS=8
 ENV TORCH_CUDA_ARCH_LIST="8.0;8.6;9.0+PTX"
 
 RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
-        ninja-build cmake \
-        && rm -rf /var/lib/apt/lists/*
+    ninja-build cmake \
+    && rm -rf /var/lib/apt/lists/*
 
 # Build Flash Attention CUDA kernels
 FROM kernel-builder AS flash-att-builder
@@ -188,12 +205,15 @@ ENV HF_HOME=/data \
 WORKDIR /usr/src
 
 RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
-        libssl-dev \
-        ca-certificates \
-        make \
-        curl \
-        git \
-        && rm -rf /var/lib/apt/lists/*
+    libssl-dev \
+    ca-certificates \
+    make \
+    curl \
+    git \
+    && rm -rf /var/lib/apt/lists/*
+
+# Add ffmpeg libraries to the path
+ENV LD_LIBRARY_PATH="/usr/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH"
 
 # Copy conda with PyTorch installed
 COPY --from=pytorch-install /opt/conda /opt/conda
@@ -239,6 +259,8 @@ RUN cd server && \
 ENV LD_PRELOAD=/opt/conda/lib/python3.11/site-packages/nvidia/nccl/lib/libnccl.so.2
 # Required to find libpython within the rust binaries
 ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/opt/conda/lib/"
+ENV LD_PRELOAD="/opt/conda/lib/libcrypto.so.3"
+
 # This is needed because exl2 tries to load flash-attn
 # And fails with our builds.
 ENV EXLLAMA_NO_FLASH_ATTN=1
@@ -247,9 +269,9 @@ ENV EXLLAMA_NO_FLASH_ATTN=1
 # The binaries change on every build given we burn the SHA into them
 # The deps change less often.
 RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
-        build-essential \
-        g++ \
-        && rm -rf /var/lib/apt/lists/*
+    build-essential \
+    g++ \
+    && rm -rf /var/lib/apt/lists/*
 
 # Install benchmarker
 COPY --from=builder /usr/src/target/release-opt/text-generation-benchmark /usr/local/bin/text-generation-benchmark
@@ -258,6 +280,9 @@ COPY --from=builder /usr/src/target/release-opt/text-generation-router /usr/loca
 # Install launcher
 COPY --from=builder /usr/src/target/release-opt/text-generation-launcher /usr/local/bin/text-generation-launcher
 
+# Copy the ffmpeg libraries
+COPY --from=builder /usr/lib/x86_64-linux-gnu/* /usr/lib/x86_64-linux-gnu-copy/
+ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/lib/x86_64-linux-gnu-copy"
 
 # AWS Sagemaker compatible image
 FROM base AS sagemaker

diff --git a/backends/client/src/lib.rs b/backends/client/src/lib.rs
@@ -9,7 +9,7 @@ use thiserror::Error;
 use tonic::transport;
 use tonic::Status;
 
-pub use v3::{Chunk, Image, Input, InputChunk};
+pub use v3::{Chunk, Image, Input, InputChunk, Video};
 
 #[async_trait]
 pub trait Health {
@@ -79,6 +79,17 @@ impl ChunksToString for Vec<InputChunk> {
                 let encoded = STANDARD.encode(data);
                 output.push_str(&format!("![](data:{};base64,{})", mimetype, encoded))
             }
+            Some(Chunk::Video(Video {
+                data,
+                mimetype,
+                width,
+                height: _,
+                frames: _,
+            })) => {
+                // 
+                // TODO: do not support serialization of video data
+                unimplemented!("Video tokens are not supported for this model configuration")
+            }
             // We don't create empty chunks, so this should be unreachable.
             None => unreachable!("Chunks should never be empty"),
         });

diff --git a/backends/client/src/v3/mod.rs b/backends/client/src/v3/mod.rs
@@ -8,6 +8,6 @@ pub use client::Client;
 pub use pb::generate::v3::{
     input_chunk::Chunk, Batch, CachedBatch, FinishReason, GeneratedText, Generation, GrammarType,
     HealthResponse, Image, InfoResponse, Input, InputChunk, NextTokenChooserParameters, Request,
-    StoppingCriteriaParameters, Tokens,
+    StoppingCriteriaParameters, Tokens, Video,
 };
 pub use sharded_client::ShardedClient;
diff --git a/backends/trtllm/src/looper.rs b/backends/trtllm/src/looper.rs
@@ -301,6 +301,7 @@ impl TensorRtLlmBackendV2 {
             1 => match request.inputs.first().expect("Single item-chunk") {
                 Chunk::Text(_) => Ok(()),
                 Chunk::Image(_) => Err(ValidationError(UnsupportedModality("image"))),
+                Chunk::Video(_) => Err(ValidationError(UnsupportedModality("video"))),
             },
         }
     }

diff --git a/backends/v3/src/client/mod.rs b/backends/v3/src/client/mod.rs
@@ -15,7 +15,7 @@ pub use grpc_client::Client;
 pub use pb::generate::v3::{
     input_chunk::Chunk, Batch, CachedBatch, FinishReason, GeneratedText, Generation, GrammarType,
     HealthResponse, Image, InfoResponse, Input, InputChunk, NextTokenChooserParameters, Request,
-    StoppingCriteriaParameters,
+    StoppingCriteriaParameters, Video,
 };
 pub use sharded_client::ShardedClient;
 

diff --git a/backends/v3/src/queue.rs b/backends/v3/src/queue.rs
@@ -439,6 +439,13 @@ impl State {
                                     data: image.data,
                                     mimetype: image.mimetype,
                                 }),
+                                Chunk::Video(video) => client::Chunk::Video(client::Video {
+                                    data: video.data,
+                                    mimetype: video.mimetype,
+                                    width: video.width,
+                                    height: video.height,
+                                    frames: video.num_frames,
+                                }),
                             }),
                         })
                         .collect(),