diff --git a/.github/workflows/scripts/load-test.sh b/.github/workflows/scripts/load-test.sh
new file mode 100755
index 000000000..57be20d58
--- /dev/null
+++ b/.github/workflows/scripts/load-test.sh
@@ -0,0 +1,523 @@
+#!/bin/bash
+
+# Load Test Script for Bifrost
+# Runs a load test against bifrost-http with a mocker provider
+# Usage: ./load-test.sh
+#
+# This script:
+# 1. Builds bifrost-http locally if not present
+# 2. Creates a config.json with mocker provider (OpenAI-style)
+# 3. Starts the mocker server and bifrost-http
+# 4. Runs a 3000 RPS load test for 1 minute using Vegeta
+# 5. Prints the results
+
+set -e
+
+# Configuration
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+REPO_ROOT="$(cd "${SCRIPT_DIR}/../../.." && pwd)"
+BIFROST_HTTP_DIR="${REPO_ROOT}/transports/bifrost-http"
+TRANSPORTS_DIR="${REPO_ROOT}/transports"
+WORK_DIR="${SCRIPT_DIR}"
+MOCKER_DIR="${REPO_ROOT}/../bifrost-benchmarking/mocker"
+
+BIFROST_PORT=8080
+MOCKER_PORT=8000
+RATE=3000
+DURATION=60
+BASE_LATENCY_MS=10000  # 10 seconds base latency from mocker
+
+# Results storage for summary table
+RESULTS_FILE="${WORK_DIR}/load-test-results.md"
+RESULTS_JSON="${WORK_DIR}/load-test-results.json"
+
+# Colors for output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+BLUE='\033[0;34m'
+NC='\033[0m' # No Color
+
+log_info() {
+  echo -e "${BLUE}[INFO]${NC} $1"
+}
+
+log_success() {
+  echo -e "${GREEN}[SUCCESS]${NC} $1"
+}
+
+log_warn() {
+  echo -e "${YELLOW}[WARN]${NC} $1"
+}
+
+log_error() {
+  echo -e "${RED}[ERROR]${NC} $1"
+}
+
+# Cleanup function to kill background processes
+cleanup() {
+  log_info "Cleaning up..."
+  if [ -n "$BIFROST_PID" ] && kill -0 "$BIFROST_PID" 2>/dev/null; then
+    kill "$BIFROST_PID" 2>/dev/null || true
+    wait "$BIFROST_PID" 2>/dev/null || true
+  fi
+  if [ -n "$MOCKER_PID" ] && kill -0 "$MOCKER_PID" 2>/dev/null; then
+    kill "$MOCKER_PID" 2>/dev/null || true
+    wait "$MOCKER_PID" 2>/dev/null || true
+  fi
+  # Clean up temporary files (keep results files for artifact upload)
+  rm -f "${WORK_DIR}/config.json" "${WORK_DIR}/logs.db" "${WORK_DIR}/attack.bin" "${WORK_DIR}/bifrost.log" "${WORK_DIR}/vegeta-target.json" "${WORK_DIR}/vegeta-report.json" 2>/dev/null || true
+  log_info "Cleanup complete"
+}
+
+trap cleanup EXIT
+
+# Check for required tools
+check_dependencies() {
+  log_info "Checking dependencies..."
+  
+  if ! command -v go &> /dev/null; then
+    log_error "Go is not installed. Please install Go 1.24.3 or later."
+    exit 1
+  fi
+  
+  if ! command -v git &> /dev/null; then
+    log_error "Git is not installed. Please install Git."
+    exit 1
+  fi
+  
+  log_success "All dependencies found"
+}
+
+# Kill any process running on a specific port
+kill_port() {
+  local port=$1
+  local pids=$(lsof -ti ":${port}" 2>/dev/null)
+  if [ -n "$pids" ]; then
+    log_warn "Killing existing process(es) on port ${port}: ${pids}"
+    echo "$pids" | xargs kill -9 2>/dev/null || true
+    sleep 1
+  fi
+}
+
+# Kill processes on required ports before starting
+cleanup_ports() {
+  log_info "Checking for processes on required ports..."
+  kill_port ${MOCKER_PORT}
+  kill_port ${BIFROST_PORT}
+}
+
+# Install Vegeta if not present
+install_vegeta() {
+  if ! command -v vegeta &> /dev/null; then
+    log_info "Installing Vegeta load testing tool..."
+    go install github.com/tsenart/vegeta/v12@latest
+    export PATH="$PATH:$(go env GOPATH)/bin"
+    if ! command -v vegeta &> /dev/null; then
+      log_error "Failed to install Vegeta"
+      exit 1
+    fi
+    log_success "Vegeta installed"
+  else
+    log_success "Vegeta already installed"
+  fi
+}
+
+# Build bifrost-http if binary doesn't exist
+build_bifrost_http() {
+  if [ -f "${REPO_ROOT}/tmp/bifrost-http" ]; then
+    log_success "bifrost-http binary already exists at ${REPO_ROOT}/tmp/bifrost-http"
+    return 0
+  fi
+  
+  log_info "Building bifrost-http..."
+  cd "${TRANSPORTS_DIR}"
+  
+  # Build the binary
+  if go build -o ${REPO_ROOT}/tmp/bifrost-http .; then
+    log_success "bifrost-http built successfully"
+  else
+    log_error "Failed to build bifrost-http"
+    exit 1
+  fi
+  
+  cd "${WORK_DIR}"
+}
+
+# Clone and setup mocker from bifrost-benchmarking
+setup_mocker() {
+  if [ -d "${REPO_ROOT}/../bifrost-benchmarking" ]; then
+    log_info "Updating bifrost-benchmarking repository..."
+    cd "${REPO_ROOT}/../bifrost-benchmarking"
+    git pull --quiet || true
+    cd "${WORK_DIR}"
+  else
+    log_info "Cloning bifrost-benchmarking repository..."
+    cd "${WORK_DIR}"
+    git clone --depth 1 https://github.com/maximhq/bifrost-benchmarking.git
+  fi
+  
+  log_success "Mocker setup complete"
+}
+
+# Create config.json for bifrost with mocker provider
+create_config() {
+  log_info "Creating config.json..."
+  
+  cat > "${WORK_DIR}/config.json" << 'EOF'
+{
+  "$schema": "https://www.getbifrost.ai/schema",
+  "client": {
+    "enable_logging": false,
+    "initial_pool_size": 10000,
+    "drop_excess_requests": false,
+    "enable_governance": false,
+    "allow_direct_keys": false
+  },
+  "config_store": {
+    "enabled": false
+  },
+  "logs_store": {
+    "enabled": false    
+  },
+  "providers": {
+    "openai": {
+      "keys": [
+        {
+          "name": "mocker-key",
+          "value": "Bearer mocker-key",
+          "weight": 1
+        }
+      ],
+      "network_config": {
+        "base_url": "http://localhost:8000",
+        "default_request_timeout_in_seconds": 30
+      },
+      "concurrency_and_buffer_size": {
+        "concurrency": 10000,
+        "buffer_size": 15000
+      },
+      "custom_provider_config": {
+        "base_provider_type": "openai",
+        "allowed_requests": {
+          "list_models": false,
+          "chat_completion": true,
+          "chat_completion_stream": true
+        }
+      }
+    }
+  }
+}
+EOF
+  
+  log_success "config.json created"
+}
+
+# Start the mocker server
+start_mocker() {
+  log_info "Starting mocker server on port ${MOCKER_PORT}..."
+  
+  cd "${MOCKER_DIR}"
+  go run main.go -port ${MOCKER_PORT} -host 0.0.0.0 -latency 10000&
+  MOCKER_PID=$!
+  cd "${WORK_DIR}"
+  
+  # Wait for mocker to be ready
+  local max_attempts=30
+  local attempt=0
+  while ! curl -s "http://localhost:${MOCKER_PORT}/v1/chat/completions" -X POST \
+    -H "Content-Type: application/json" \
+    -H "Authorization: Bearer mocker-key" \
+    -d '{"model":"gpt-4o-mini","messages":[{"role":"user","content":"test"}]}' > /dev/null 2>&1; do
+    sleep 1
+    attempt=$((attempt + 1))
+    if [ $attempt -ge $max_attempts ]; then
+      log_error "Mocker failed to start within ${max_attempts} seconds"
+      exit 1
+    fi
+  done
+  
+  log_success "Mocker server started (PID: ${MOCKER_PID})"
+}
+
+# Start bifrost-http server
+start_bifrost() {
+  log_info "Starting bifrost-http on port ${BIFROST_PORT}..."
+  
+  cd "${WORK_DIR}"
+  local bifrost_log="${WORK_DIR}/bifrost.log"
+  "${REPO_ROOT}/tmp/bifrost-http" -app-dir "${WORK_DIR}" -port "${BIFROST_PORT}" -host "0.0.0.0" -log-level "info" > "${bifrost_log}" 2>&1 &
+  BIFROST_PID=$!
+  
+  # Wait for bifrost to be fully ready (look for "successfully started bifrost" message)
+  local max_attempts=60
+  local attempt=0
+  while ! grep -q "successfully started bifrost" "${bifrost_log}" 2>/dev/null; do
+    sleep 1
+    attempt=$((attempt + 1))
+    if [ $attempt -ge $max_attempts ]; then
+      log_error "Bifrost failed to start within ${max_attempts} seconds"
+      log_error "Bifrost log output:"
+      cat "${bifrost_log}" 2>/dev/null || true
+      exit 1
+    fi
+    # Check if process is still running
+    if ! kill -0 "$BIFROST_PID" 2>/dev/null; then
+      log_error "Bifrost process died unexpectedly"
+      log_error "Bifrost log output:"
+      cat "${bifrost_log}" 2>/dev/null || true
+      exit 1
+    fi
+  done
+  
+  log_success "Bifrost-http started (PID: ${BIFROST_PID})"
+}
+
+# Initialize results file with header
+init_results_file() {
+  cat > "${RESULTS_FILE}" << 'EOF'
+# Bifrost Load Test Results
+
+> **Base Latency:** 10 seconds (simulated by mocker)
+> **Test Configuration:** See individual scenario details below
+
+## Overhead Summary
+
+| Scenario | RPS | Duration | Success Rate | Min Overhead | Mean Overhead | P50 Overhead | P90 Overhead | P95 Overhead | P99 Overhead | Max Overhead |
+|----------|-----|----------|--------------|--------------|---------------|--------------|--------------|--------------|--------------|--------------|
+EOF
+  
+  # Initialize JSON results
+  echo '{"scenarios": [], "base_latency_ms": '"${BASE_LATENCY_MS}"', "timestamp": "'"$(date -u +"%Y-%m-%dT%H:%M:%SZ")"'"}' > "${RESULTS_JSON}"
+}
+
+# Parse vegeta output and calculate overhead
+# Arguments: $1 = scenario name, $2 = rate, $3 = duration
+parse_and_record_results() {
+  local scenario_name=$1
+  local rate=$2
+  local duration=$3
+  
+  # Get JSON report from vegeta and save to file for debugging
+  local json_report_file="${WORK_DIR}/vegeta-report.json"
+  vegeta report -type=json < "${WORK_DIR}/attack.bin" > "${json_report_file}"
+  
+  # Debug: show the JSON structure
+  log_info "Parsing vegeta JSON report..."
+  
+  # Check if jq is available for reliable JSON parsing
+  if command -v jq &> /dev/null; then
+    # Use jq for reliable JSON parsing
+    local latency_min=$(jq '.latencies.min // 0' "${json_report_file}")
+    local latency_mean=$(jq '.latencies.mean // 0' "${json_report_file}")
+    local latency_50=$(jq '.latencies["50th"] // 0' "${json_report_file}")
+    local latency_90=$(jq '.latencies["90th"] // 0' "${json_report_file}")
+    local latency_95=$(jq '.latencies["95th"] // 0' "${json_report_file}")
+    local latency_99=$(jq '.latencies["99th"] // 0' "${json_report_file}")
+    local latency_max=$(jq '.latencies.max // 0' "${json_report_file}")
+    local success_rate=$(jq '.success // 0' "${json_report_file}")
+  else
+    # Fallback: Use python for JSON parsing (more reliable than grep)
+    if command -v python3 &> /dev/null; then
+      local latency_min=$(python3 -c "import json; d=json.load(open('${json_report_file}')); print(d.get('latencies', {}).get('min', 0))")
+      local latency_mean=$(python3 -c "import json; d=json.load(open('${json_report_file}')); print(d.get('latencies', {}).get('mean', 0))")
+      local latency_50=$(python3 -c "import json; d=json.load(open('${json_report_file}')); print(d.get('latencies', {}).get('50th', 0))")
+      local latency_90=$(python3 -c "import json; d=json.load(open('${json_report_file}')); print(d.get('latencies', {}).get('90th', 0))")
+      local latency_95=$(python3 -c "import json; d=json.load(open('${json_report_file}')); print(d.get('latencies', {}).get('95th', 0))")
+      local latency_99=$(python3 -c "import json; d=json.load(open('${json_report_file}')); print(d.get('latencies', {}).get('99th', 0))")
+      local latency_max=$(python3 -c "import json; d=json.load(open('${json_report_file}')); print(d.get('latencies', {}).get('max', 0))")
+      local success_rate=$(python3 -c "import json; d=json.load(open('${json_report_file}')); print(d.get('success', 0))")
+    else
+      log_error "Neither jq nor python3 found. Cannot parse JSON results."
+      return 1
+    fi
+  fi
+  
+  # Debug: Show extracted values
+  log_info "  Extracted latencies (ns): min=$latency_min, mean=$latency_mean, p50=$latency_50, p99=$latency_99, max=$latency_max"
+  log_info "  Success rate: $success_rate"
+  
+  # Validate that we got numeric values
+  if [ -z "$latency_min" ] || [ "$latency_min" = "0" ] || [ "$latency_min" = "null" ]; then
+    log_warn "Failed to extract latency values from vegeta report"
+    log_info "Vegeta JSON report contents:"
+    cat "${json_report_file}"
+    return 1
+  fi
+  
+  # Convert nanoseconds to microseconds and calculate overhead
+  local base_ns=$((BASE_LATENCY_MS * 1000000))
+  
+  # Calculate overhead in µs (latency - base_latency) with proper formatting
+  local overhead_min=$(printf "%.2f" $(echo "scale=4; ($latency_min - $base_ns) / 1000" | bc))
+  local overhead_mean=$(printf "%.2f" $(echo "scale=4; ($latency_mean - $base_ns) / 1000" | bc))
+  local overhead_50=$(printf "%.2f" $(echo "scale=4; ($latency_50 - $base_ns) / 1000" | bc))
+  local overhead_90=$(printf "%.2f" $(echo "scale=4; ($latency_90 - $base_ns) / 1000" | bc))
+  local overhead_95=$(printf "%.2f" $(echo "scale=4; ($latency_95 - $base_ns) / 1000" | bc))
+  local overhead_99=$(printf "%.2f" $(echo "scale=4; ($latency_99 - $base_ns) / 1000" | bc))
+  local overhead_max=$(printf "%.2f" $(echo "scale=4; ($latency_max - $base_ns) / 1000" | bc))
+  
+  # Convert success rate to percentage (with 2 decimal places)
+  local success_pct=$(printf "%.2f" $(echo "scale=4; $success_rate * 100" | bc))
+  
+  # Append to markdown table
+  echo "| ${scenario_name} | ${rate} | ${duration}s | ${success_pct}% | ${overhead_min}µs | ${overhead_mean}µs | ${overhead_50}µs | ${overhead_90}µs | ${overhead_95}µs | ${overhead_99}µs | ${overhead_max}µs |" >> "${RESULTS_FILE}"
+  
+  # Update JSON results
+  local tmp_json=$(mktemp)
+  cat "${RESULTS_JSON}" | sed 's/\("scenarios": \[\)/\1{"name": "'"${scenario_name}"'", "rate": '"${rate}"', "duration": '"${duration}"', "success_rate": '"${success_pct}"', "overhead_us": {"min": '"${overhead_min}"', "mean": '"${overhead_mean}"', "p50": '"${overhead_50}"', "p90": '"${overhead_90}"', "p95": '"${overhead_95}"', "p99": '"${overhead_99}"', "max": '"${overhead_max}"'}},/' > "${tmp_json}"
+  mv "${tmp_json}" "${RESULTS_JSON}"
+  
+  # Cleanup temp file
+  rm -f "${json_report_file}"
+  
+  log_success "Results recorded for scenario: ${scenario_name}"
+  log_info "  Overhead - Min: ${overhead_min}µs, Mean: ${overhead_mean}µs, P99: ${overhead_99}µs, Max: ${overhead_max}µs"
+}
+
+# Finalize results file
+finalize_results() {
+  # Add footer with raw output section
+  cat >> "${RESULTS_FILE}" << 'EOF'
+
+## Notes
+
+- **Overhead** = Actual Latency - Base Latency (10s)
+- All overhead values are in microseconds (µs)
+- Lower overhead indicates better Bifrost performance
+- P50/P90/P95/P99 represent percentile latencies
+
+---
+*Generated by Bifrost Load Test Script*
+EOF
+
+  # Fix JSON (remove trailing comma in scenarios array)
+  sed -i.bak 's/},\]/}]/' "${RESULTS_JSON}" 2>/dev/null || sed -i '' 's/},\]/}]/' "${RESULTS_JSON}"
+  rm -f "${RESULTS_JSON}.bak"
+  
+  log_success "Results saved to:"
+  log_info "  - Markdown: ${RESULTS_FILE}"
+  log_info "  - JSON: ${RESULTS_JSON}"
+  
+  echo ""
+  echo "╔═══════════════════════════════════════════════════════════╗"
+  echo "║              Overhead Summary Table                       ║"
+  echo "╚═══════════════════════════════════════════════════════════╝"
+  echo ""
+  cat "${RESULTS_FILE}"
+}
+
+# Run a single load test scenario
+# Arguments: $1 = scenario name, $2 = rate (optional), $3 = duration (optional)
+run_load_test() {
+  local scenario_name=${1:-"Default"}
+  local rate=${2:-$RATE}
+  local duration=${3:-$DURATION}
+  
+  log_info "Running load test scenario '${scenario_name}': ${rate} RPS for ${duration} seconds..."
+  echo ""
+  
+  # Create the target file for Vegeta
+  local target_url="http://localhost:${BIFROST_PORT}/v1/chat/completions"
+  local target_file="${WORK_DIR}/vegeta-target.json"
+  local payload='{"model":"openai/gpt-4o-mini","messages":[{"role":"user","content":"Hello, how are you?"}]}'
+  
+  # Write target in Vegeta JSON format
+  cat > "${target_file}" << EOF
+{"method": "POST", "url": "${target_url}", "header": {"Content-Type": ["application/json"]}, "body": "$(echo -n "${payload}" | base64)"}
+EOF
+  
+  # Run Vegeta attack and save binary results to file
+  vegeta attack \
+    -format=json \
+    -targets="${target_file}" \
+    -rate="${rate}" \
+    -duration="${duration}s" \
+    -timeout="30s" \
+    -workers=500 \
+    -max-workers=3000 > "${WORK_DIR}/attack.bin"
+  
+  echo ""
+  log_info "Attack complete. Generating reports..."
+  
+  echo ""
+  log_info "Summary report:"
+  vegeta report < "${WORK_DIR}/attack.bin"
+  
+  echo ""
+  log_info "Latency histogram:"
+  vegeta report -type=hist[0,1ms,5ms,10ms,50ms,100ms,500ms,1s,5s,10s,15s] < "${WORK_DIR}/attack.bin" || log_warn "Histogram generation failed"
+  
+  # Parse results and record to summary
+  parse_and_record_results "${scenario_name}" "${rate}" "${duration}"
+}
+
+# Run all test scenarios
+run_all_scenarios() {
+  # Initialize results file
+  init_results_file
+  
+  echo ""
+  echo "╔═══════════════════════════════════════════════════════════╗"
+  echo "║                    Load Test Results                      ║"
+  echo "╚═══════════════════════════════════════════════════════════╝"
+  echo ""
+  
+  # Scenario 1: Default high load (3000 RPS for 60s)
+  run_load_test "High Load (3000 RPS)" 3000 60
+  
+  # Add more scenarios as needed:
+  # run_load_test "Medium Load (1000 RPS)" 1000 60
+  # run_load_test "Low Load (100 RPS)" 100 60
+  # run_load_test "Burst Test (5000 RPS)" 5000 30
+  
+  # Finalize and display summary
+  finalize_results
+}
+
+# Main execution
+main() {
+  echo ""
+  echo "╔═══════════════════════════════════════════════════════════╗"
+  echo "║              Bifrost Load Test Script                     ║"
+  echo "║           ${RATE} RPS for ${DURATION} seconds                         ║"
+  echo "╚═══════════════════════════════════════════════════════════╝"
+  echo ""
+  
+  check_dependencies
+  install_vegeta
+  build_bifrost_http
+  setup_mocker
+  create_config
+  cleanup_ports
+  start_mocker
+  start_bifrost
+  
+  run_all_scenarios
+  cleanup_ports
+  echo ""
+  log_success "Load test completed successfully!"
+  log_info "Results files location:"
+  log_info "  - ${RESULTS_FILE}"
+  log_info "  - ${RESULTS_JSON}"
+  
+  # Print final summary table to console
+  echo ""
+  echo "╔══════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════╗"
+  echo "║                                                         FINAL RESULTS SUMMARY                                                                                    ║"
+  echo "╚══════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════╝"
+  echo ""
+  echo "Base Latency: ${BASE_LATENCY_MS}ms (simulated by mocker)"
+  echo ""
+  # Print just the table (header + separator + data rows)
+  grep -E "^\|" "${RESULTS_FILE}"
+  echo ""
+  echo "Notes:"
+  echo "  - Overhead = Actual Latency - Base Latency (${BASE_LATENCY_MS}ms)"
+  echo "  - Note that this overhead also includes the JSON serialization/ deserialization time along with the wait time on the mocker side. The actual overhead of Bifrost is much lower (approx 20-30 µs)"
+  echo "  - All overhead values are in microseconds (µs)"
+  echo "  - Lower overhead indicates better Bifrost performance"
+  echo ""
+}
+
+main "$@"
diff --git a/.gitignore b/.gitignore
index 2aa0fc256..ecc5675c0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -49,4 +49,7 @@ test-reports
 **/.venv/
 **/.pytest_cache/
 **/.coverage/
-**/.pytest_cache/
\ No newline at end of file
+**/.pytest_cache/
+
+# Bifrost benchmarking
+bifrost-benchmarking
\ No newline at end of file