diff --git a/yscope-compose/.env b/yscope-compose/.env new file mode 100644 index 0000000000000..b9a4cf785d336 --- /dev/null +++ b/yscope-compose/.env @@ -0,0 +1,35 @@ +# Coordinator common +PRESTO_COORDINATOR_HTTPPORT="8080" +PRESTO_COORDINATOR_SERVICENAME="presto-coordinator" + +# Coordinator clp.properties +PRESTO_COORDINATOR_CONFIG_CLPPROPERTIES_METADATA_PROVIDERTYPE="mysql" +PRESTO_COORDINATOR_CONFIG_CLPPROPERTIES_METADATA_DATABASE_URL="jdbc:mysql://localhost:6001" +PRESTO_COORDINATOR_CONFIG_CLPPROPERTIES_METADATA_DATABASE_NAME="clp-db" +PRESTO_COORDINATOR_CONFIG_CLPPROPERTIES_METADATA_DATABASE_USER="clp-user" +PRESTO_COORDINATOR_CONFIG_CLPPROPERTIES_METADATA_DATABASE_PASSWORD="123456" +PRESTO_COORDINATOR_CONFIG_CLPPROPERTIES_METADATA_TABLEPREFIX="clp_" +PRESTO_COORDINATOR_CONFIG_CLPPROPERTIES_SPLITPROVIDER="mysql" + +# Coordinator config.properties +PRESTO_COORDINATOR_CONFIG_CONFIGPROPERTIES_QUERY_MAXMEMORY="1GB" +PRESTO_COORDINATOR_CONFIG_CONFIGPROPERTIES_QUERY_MAXMEMORYPERNODE="1GB" + +# Coordinator jvm.config +PRESTO_COORDINATOR_CONFIG_JVMCONFIG_MAXHEAPSIZE="4G" +PRESTO_COORDINATOR_CONFIG_JVMCONFIG_G1HEAPREGIONSIZE="32M" + +# Coordinator log.properties +PRESTO_COORDINATOR_CONFIG_LOGPROPERTIES_LEVEL="DEBUG" + +# Coordinator node.properties +PRESTO_COORDINATOR_CONFIG_NODEPROPERTIES_ENVIRONMENT="production" + +# Worker common +PRESTO_WORKER_HTTPPORT="8080" + +# Worker node.properties +PRESTO_WORKER_CONFIG_NODEPROPERTIES_LOCATION="worker-location" + +# CLP package archives +CLP_PACKAGE_ARCHIVES=REPLACE_ME diff --git a/yscope-compose/README.md b/yscope-compose/README.md new file mode 100644 index 0000000000000..97ccf312e92d0 --- /dev/null +++ b/yscope-compose/README.md @@ -0,0 +1,76 @@ +# Setup local docker stack for presto + clp + +## Install docker + +Follow the guide here: [docker] + +# Launch clp-package + +1. Find the clp-package for test on our official website [clp-json-v0.4.0]. Here is a sample dataset for demo testing: [postgresql dataset]. + +2. Untar the clp-package and the postgresql dataset. + +3. Replace the content of `/path/to/clp-json-package/etc/clp-config.yml` with the output of `demo-assets/init.sh generate_sample_clp_config`. + +4. Launch: + +```bash +# You probably want to run a python 3.9 or newer virtual environment +sbin/start-clp.sh +``` + +5. Compress: + +```bash +# You can also use your own dataset +sbin/compress.sh --timestamp-key 'timestamp' /path/to/postgresql.log +``` + +6. Use the following command to update `.env`: + +```bash +demo-assets/init.sh update_metadata_config /path/to/clp-json-package +``` + +# Create Docker Cluster + +Create a local docker stack: + +```bash +docker compose up +``` + +To create a docker stack with more than 1 worker (e.g., 3 workers): +``` +docker compose up --scale presto-worker=3 +``` + +# Use cli: + +After all containers are in "Started" states (check by `docker ps`): + +```bash +# On your host +docker exec -it compose-presto-coordinator-1 sh + +# In presto-coordinator container +/opt/presto-cli --catalog clp --schema default --server localhost:8080 +``` + +Example query: +```sql +SELECT * FROM default LIMIT 1; +``` + +# Delete docker Cluster + +```bash +docker compose down +``` + + + +[clp-json-v0.4.0]: https://github.com/y-scope/clp/releases/tag/v0.4.0 +[docker]: https://docs.docker.com/engine/install +[postgresql dataset]: https://zenodo.org/records/10516402 + diff --git a/yscope-compose/coordinator/config-template/clp.properties b/yscope-compose/coordinator/config-template/clp.properties new file mode 100644 index 0000000000000..2ee47e75a2a8f --- /dev/null +++ b/yscope-compose/coordinator/config-template/clp.properties @@ -0,0 +1,10 @@ +connector.name=clp +clp.metadata-provider-type=${PRESTO_COORDINATOR_CONFIG_CLPPROPERTIES_METADATA_PROVIDERTYPE} +clp.metadata-db-url=${PRESTO_COORDINATOR_CONFIG_CLPPROPERTIES_METADATA_DATABASE_URL} +clp.metadata-db-name=${PRESTO_COORDINATOR_CONFIG_CLPPROPERTIES_METADATA_DATABASE_NAME} +clp.metadata-db-user=${PRESTO_COORDINATOR_CONFIG_CLPPROPERTIES_METADATA_DATABASE_USER} +clp.metadata-db-password=${PRESTO_COORDINATOR_CONFIG_CLPPROPERTIES_METADATA_DATABASE_PASSWORD} +clp.metadata-table-prefix=${PRESTO_COORDINATOR_CONFIG_CLPPROPERTIES_METADATA_TABLEPREFIX} +clp.split-provider-type=${PRESTO_COORDINATOR_CONFIG_CLPPROPERTIES_SPLITPROVIDER} +clp.metadata-filter-config=/opt/presto-server/etc/metadata-filter.json + diff --git a/yscope-compose/coordinator/config-template/config.properties b/yscope-compose/coordinator/config-template/config.properties new file mode 100644 index 0000000000000..22f905fb0c3ed --- /dev/null +++ b/yscope-compose/coordinator/config-template/config.properties @@ -0,0 +1,14 @@ +coordinator=true +node-scheduler.include-coordinator=false +http-server.http.port=${PRESTO_COORDINATOR_HTTPPORT} +query.max-memory=${PRESTO_COORDINATOR_CONFIG_CONFIGPROPERTIES_QUERY_MAXMEMORY} +query.max-memory-per-node=${PRESTO_COORDINATOR_CONFIG_CONFIGPROPERTIES_QUERY_MAXMEMORYPERNODE} +discovery-server.enabled=true +discovery.uri=http://${PRESTO_COORDINATOR_SERVICENAME}:${PRESTO_COORDINATOR_HTTPPORT} +optimizer.optimize-hash-generation=false +regex-library=RE2J +use-alternative-function-signatures=true +inline-sql-functions=false +nested-data-serialization-enabled=false +native-execution-enabled=true + diff --git a/yscope-compose/coordinator/config-template/jvm.config b/yscope-compose/coordinator/config-template/jvm.config new file mode 100644 index 0000000000000..49be4c4c0b3e3 --- /dev/null +++ b/yscope-compose/coordinator/config-template/jvm.config @@ -0,0 +1,10 @@ +-server +-Xmx${PRESTO_COORDINATOR_CONFIG_JVMCONFIG_MAXHEAPSIZE} +-XX:+UseG1GC +-XX:G1HeapRegionSize=${PRESTO_COORDINATOR_CONFIG_JVMCONFIG_G1HEAPREGIONSIZE} +-XX:+UseGCOverheadLimit +-XX:+ExplicitGCInvokesConcurrent +-XX:+HeapDumpOnOutOfMemoryError +-XX:+ExitOnOutOfMemoryError +-Djdk.attach.allowAttachSelf=true + diff --git a/yscope-compose/coordinator/config-template/log.properties b/yscope-compose/coordinator/config-template/log.properties new file mode 100644 index 0000000000000..a4ea74216a45b --- /dev/null +++ b/yscope-compose/coordinator/config-template/log.properties @@ -0,0 +1,2 @@ +com.facebook.presto=${PRESTO_COORDINATOR_CONFIG_LOGPROPERTIES_LEVEL} + diff --git a/yscope-compose/coordinator/config-template/metadata-filter.json b/yscope-compose/coordinator/config-template/metadata-filter.json new file mode 100644 index 0000000000000..bfd870e452fce --- /dev/null +++ b/yscope-compose/coordinator/config-template/metadata-filter.json @@ -0,0 +1,3 @@ +{ +} + diff --git a/yscope-compose/coordinator/config-template/node.properties b/yscope-compose/coordinator/config-template/node.properties new file mode 100644 index 0000000000000..2803273ef9156 --- /dev/null +++ b/yscope-compose/coordinator/config-template/node.properties @@ -0,0 +1,3 @@ +node.environment=${PRESTO_COORDINATOR_CONFIG_NODEPROPERTIES_ENVIRONMENT} +node.id=${PRESTO_COORDINATOR_SERVICENAME} + diff --git a/yscope-compose/coordinator/scripts/$ b/yscope-compose/coordinator/scripts/$ new file mode 100644 index 0000000000000..6c37411d0fc31 --- /dev/null +++ b/yscope-compose/coordinator/scripts/$ @@ -0,0 +1,19 @@ +#!/bin/sh + +# Exit on error +set -e + +PRESTO_CONFIG_DIR="/opt/presto-server/etc" + +# Substitute environemnt variables in config template +find /configs -type f | while read -r f; do + ( echo "cat < "${PRESTO_CONFIG_DIR}/$(basename "$f")" +done + +# Setup the config directory hierarchy +rm -rf ${PRESTO_CONFIG_DIR}/catalog +mkdir -p ${PRESTO_CONFIG_DIR}/catalog + +# Copy over files +mv ${PRESTO_CONFIG_DIR}/clp.properties ${PRESTO_CONFIG_DIR}/catalog + diff --git a/yscope-compose/coordinator/scripts/generate-configs.sh b/yscope-compose/coordinator/scripts/generate-configs.sh new file mode 100755 index 0000000000000..4082e6b5d0b55 --- /dev/null +++ b/yscope-compose/coordinator/scripts/generate-configs.sh @@ -0,0 +1,18 @@ +#!/bin/sh + +# Exit on error +set -e + +PRESTO_CONFIG_DIR="/opt/presto-server/etc" + +# Substitute environemnt variables in config template +find /configs -type f | while read -r f; do + ( echo "cat < "${PRESTO_CONFIG_DIR}/$(basename "$f")" +done + +# Setup the config directory hierarchy +rm -f ${PRESTO_CONFIG_DIR}/catalog/* + +# Copy over files +mv ${PRESTO_CONFIG_DIR}/clp.properties ${PRESTO_CONFIG_DIR}/catalog + diff --git a/yscope-compose/demo-assets/clp-config.yml b/yscope-compose/demo-assets/clp-config.yml new file mode 100644 index 0000000000000..aa8d472d9cf2a --- /dev/null +++ b/yscope-compose/demo-assets/clp-config.yml @@ -0,0 +1,39 @@ +package: + storage_engine: "clp-s" +database: + type: "mariadb" + host: "${REPLACE_IP}" + port: 6001 + name: "clp-db" +query_scheduler: + host: "${REPLACE_IP}" + port: 6002 + jobs_poll_delay: 0.1 + num_archives_to_search_per_sub_job: 16 + logging_level: "INFO" +queue: + host: "${REPLACE_IP}" + port: 6003 +redis: + host: "${REPLACE_IP}" + port: 6004 + query_backend_database: 0 + compression_backend_database: 1 +reducer: + host: "${REPLACE_IP}" + base_port: 6100 + logging_level: "INFO" + upsert_interval: 100 +results_cache: + host: "${REPLACE_IP}" + port: 6005 + db_name: "clp-query-results" + stream_collection_name: "stream-files" +webui: + host: "localhost" + port: 6000 + logging_level: "INFO" +log_viewer_webui: + host: "localhost" + port: 6006 + diff --git a/yscope-compose/demo-assets/init.sh b/yscope-compose/demo-assets/init.sh new file mode 100755 index 0000000000000..61a9244f41985 --- /dev/null +++ b/yscope-compose/demo-assets/init.sh @@ -0,0 +1,50 @@ +#!/usr/bin/env bash + +SCRIPT_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +function generate_sample_clp_config { + local ip=$(hostname -i) + local file="${SCRIPT_PATH}/clp-config.yml" + cp "$file" "${file}.bak" + sed -i "s|\${REPLACE_IP}|$ip|g" "$file" + echo "Replaced \${REPLACE_IP} with $ip in $file" +} + +function update_metadata_config { + if [[ $# -ne 1 ]]; then + echo "Usage: update_metadata_config " + return 1 + fi + + local clp_pkg_home=$1 + local clp_config_path="$(readlink -f ${clp_pkg_home})/etc/clp-config.yml" + local credential_path="$(readlink -f ${clp_pkg_home})/etc/credentials.yml" + host=$(python3 -c 'import sys, yaml; print(yaml.load(sys.stdin)["database"]["host"])' < "$clp_config_path") + port=$(python3 -c 'import sys, yaml; print(yaml.load(sys.stdin)["database"]["port"])' < "$clp_config_path") + name=$(python3 -c 'import sys, yaml; print(yaml.load(sys.stdin)["database"]["name"])' < "$clp_config_path") + user=$(python3 -c 'import sys, yaml; print(yaml.load(sys.stdin)["database"]["user"])' < "$credential_path") + password=$(python3 -c 'import sys, yaml; print(yaml.load(sys.stdin)["database"]["password"])' < "$credential_path") + echo "Metadata database host: $host" + echo "Metadata database port: $port" + echo "Metadata database name: $name" + echo "Metadata database user: $user" + echo "Metadata database password: $password" + + local env_path="${SCRIPT_PATH}/../.env" + cp "$env_path" "${env_path}.bak" + sed -i "s|^PRESTO_COORDINATOR_CONFIG_CLPPROPERTIES_METADATA_DATABASE_URL=.*|PRESTO_COORDINATOR_CONFIG_CLPPROPERTIES_METADATA_DATABASE_URL=\"jdbc:mysql://${host}:${port}\"|" "$env_path" + sed -i "s/^PRESTO_COORDINATOR_CONFIG_CLPPROPERTIES_METADATA_DATABASE_NAME=.*/PRESTO_COORDINATOR_CONFIG_CLPPROPERTIES_METADATA_DATABASE_NAME=\"${name}\"/" "$env_path" + sed -i "s/^PRESTO_COORDINATOR_CONFIG_CLPPROPERTIES_METADATA_DATABASE_USER=.*/PRESTO_COORDINATOR_CONFIG_CLPPROPERTIES_METADATA_DATABASE_USER=\"${user}\"/" "$env_path" + sed -i "s/^PRESTO_COORDINATOR_CONFIG_CLPPROPERTIES_METADATA_DATABASE_PASSWORD=.*/PRESTO_COORDINATOR_CONFIG_CLPPROPERTIES_METADATA_DATABASE_PASSWORD=\"${password}\"/" "$env_path" + sed -i "s|^CLP_PACKAGE_ARCHIVES=.*|CLP_PACKAGE_ARCHIVES=\"$(readlink -f ${clp_pkg_home})/var/data/archives/default\"|" "$env_path" +} + +if declare -f "$1" > /dev/null; then + "$@" +else + echo "Error: '$1' is not a valid function name." + echo "Available functions:" + declare -F | awk '{print $3}' + exit 1 +fi + diff --git a/yscope-compose/docker-compose.yaml b/yscope-compose/docker-compose.yaml new file mode 100644 index 0000000000000..33029c7d30262 --- /dev/null +++ b/yscope-compose/docker-compose.yaml @@ -0,0 +1,39 @@ +version: "3.9" + +services: + presto-coordinator: + image: ghcr.io/y-scope/presto/coordinator:dev + entrypoint: ["/bin/bash", "-c", "/scripts/generate-configs.sh && /opt/entrypoint.sh"] + env_file: + - .env + volumes: + - ./coordinator/scripts:/scripts:ro + - coordinator-config:/opt/presto-server/etc + - ./coordinator/config-template:/configs:ro + networks: + - presto + + presto-worker: + image: ghcr.io/y-scope/presto/prestissimo-worker:dev + depends_on: + presto-coordinator: + condition: service_started + entrypoint: ["/bin/bash", "-c", "/scripts/generate-configs.sh && /opt/entrypoint.sh"] + env_file: + - .env + volumes: + - ./worker/scripts:/scripts:ro + - worker-config:/opt/presto-server/etc + - ./worker/config-template:/configs:ro + - "${CLP_PACKAGE_ARCHIVES}:${CLP_PACKAGE_ARCHIVES}" + networks: + - presto + +volumes: + coordinator-config: + worker-config: + +networks: + presto: + driver: bridge + diff --git a/yscope-compose/worker/config-template/clp.properties b/yscope-compose/worker/config-template/clp.properties new file mode 100644 index 0000000000000..484cbfe2c1c30 --- /dev/null +++ b/yscope-compose/worker/config-template/clp.properties @@ -0,0 +1,2 @@ +connector.name=clp + diff --git a/yscope-compose/worker/config-template/config.properties b/yscope-compose/worker/config-template/config.properties new file mode 100644 index 0000000000000..0b8758c8fd34e --- /dev/null +++ b/yscope-compose/worker/config-template/config.properties @@ -0,0 +1,7 @@ +discovery.uri=http://${PRESTO_COORDINATOR_SERVICENAME}:${PRESTO_COORDINATOR_HTTPPORT} +presto.version=REPLACE_ME +http-server.http.port=${PRESTO_WORKER_HTTPPORT} +shutdown-onset-sec=1 +register-test-functions=false +runtime-metrics-collection-enabled=false + diff --git a/yscope-compose/worker/config-template/node.properties b/yscope-compose/worker/config-template/node.properties new file mode 100644 index 0000000000000..44f4cd70b276c --- /dev/null +++ b/yscope-compose/worker/config-template/node.properties @@ -0,0 +1,5 @@ +node.environment=${PRESTO_COORDINATOR_CONFIG_NODEPROPERTIES_ENVIRONMENT} +node.internal-address=REPLACE_ME +node.location=${PRESTO_WORKER_CONFIG_NODEPROPERTIES_LOCATION} +node.id=REPLACE_ME + diff --git a/yscope-compose/worker/config-template/velox.properties b/yscope-compose/worker/config-template/velox.properties new file mode 100644 index 0000000000000..50de3f6962934 --- /dev/null +++ b/yscope-compose/worker/config-template/velox.properties @@ -0,0 +1,2 @@ +mutable-config=true + diff --git a/yscope-compose/worker/scripts/generate-configs.sh b/yscope-compose/worker/scripts/generate-configs.sh new file mode 100755 index 0000000000000..8df961e47d2aa --- /dev/null +++ b/yscope-compose/worker/scripts/generate-configs.sh @@ -0,0 +1,87 @@ +#!/bin/sh + +# Install wget +apt-get update && apt-get install -y wget + +PRESTO_CONFIG_DIR="/opt/presto-server/etc" + +# Substitute environemnt variables in config template +find /configs -type f | while read -r f; do + ( echo "cat < "${PRESTO_CONFIG_DIR}/$(basename "$f")" +done + +# Setup the config directory hierarchy +rm -f ${PRESTO_CONFIG_DIR}/catalog/* + +mv ${PRESTO_CONFIG_DIR}/clp.properties ${PRESTO_CONFIG_DIR}/catalog + +# Update "presto.version" parameter in config.properties file using values from coordinator +CONFIG_PROPERTIES_FILE="/opt/presto-server/etc/config.properties" + +# Retry configuration +MAX_RETRIES=30 +RETRY_DELAY=10 + +echo "Init container: Waiting for Presto to be ready..." + +# 1. Fetch version info from Presto with retry logic +retry_count=0 +while [ $retry_count -lt $MAX_RETRIES ]; do +echo "Attempt $((retry_count + 1))/$MAX_RETRIES - Checking Presto availability..." + +# Try to fetch the response +DISCOVERY_URI=$(awk -F= '/^discovery.uri=/ { print $2 }' "${PRESTO_CONFIG_DIR}/config.properties") +if response=$(wget -qO- --timeout=10 "${DISCOVERY_URI}/v1/info" 2>/dev/null); then + # Check if response is not empty and contains version info + if [ -n "$response" ] && echo "$response" | grep -q '"version"'; then + echo "Presto is ready!" + break + fi +fi + +echo "Presto not ready yet, retrying in ${RETRY_DELAY}s..." +sleep $RETRY_DELAY +retry_count=$((retry_count + 1)) +done + +# Check if we exceeded max retries +if [ $retry_count -eq $MAX_RETRIES ]; then + echo "Error: Presto did not become ready after $MAX_RETRIES attempts" + exit 1 +fi + +# 2. Extract the version using grep and sed (busybox compatible) +version=$(echo "$response" | grep -o '"version":"[^"]*"' | sed 's/"version":"//;s/"//') + +echo "Detected Presto version: $version" + +# 3. Replace `presto.version=REPLACE_ME` with actual version in the config file +if grep -q '^presto.version=REPLACE_ME' "$CONFIG_PROPERTIES_FILE"; then + sed -i "s|^presto.version=REPLACE_ME|presto.version=$version|" "$CONFIG_PROPERTIES_FILE" + echo "Updated $CONFIG_PROPERTIES_FILE with version $version" +else + echo "Warning: 'presto.version=REPLACE_ME' not found in $CONFIG_PROPERTIES_FILE" + exit 1 +fi + +# Modify node.properties +NODE_PROPERTIES_FILE="/opt/presto-server/etc/node.properties" +INTERNAL_ADDRESS=$(hostname -i) +# Replace `node.internal-address=REPLACE_ME` with actual ip address in the config file +if grep -q '^node.internal-address=REPLACE_ME' "$NODE_PROPERTIES_FILE"; then + sed -i "s|^node.internal-address=REPLACE_ME|node.internal-address=${INTERNAL_ADDRESS}|" "$NODE_PROPERTIES_FILE" + echo "Updated $NODE_PROPERTIES_FILE with node.internal-address ${INTERNAL_ADDRESS}" +else + echo "Warning: 'node.internal-address=REPLACE_ME' not found in $NODE_PROPERTIES_FILE" + exit 1 +fi + +# Replace `node.id=REPLACE_ME` with actual hostname in the config file +if grep -q '^node.id=REPLACE_ME' "$NODE_PROPERTIES_FILE"; then + sed -i "s|^node.id=REPLACE_ME|node.id=$HOSTNAME|" "$NODE_PROPERTIES_FILE" + echo "Updated $NODE_PROPERTIES_FILE with node.id $HOSTNAME" +else + echo "Warning: 'node.id=REPLACE_ME' not found in $NODE_PROPERTIES_FILE" + exit 1 +fi + diff --git a/yscope-k8s/.gitignore b/yscope-k8s/.gitignore new file mode 100644 index 0000000000000..aa2167f68101e --- /dev/null +++ b/yscope-k8s/.gitignore @@ -0,0 +1 @@ +demo-assets/clp-config.yml diff --git a/yscope-k8s/.helmignore b/yscope-k8s/.helmignore new file mode 100644 index 0000000000000..cd4201863e066 --- /dev/null +++ b/yscope-k8s/.helmignore @@ -0,0 +1,27 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ + +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ + +# Demo assets +/demo-assets/ diff --git a/yscope-k8s/Chart.yaml b/yscope-k8s/Chart.yaml new file mode 100644 index 0000000000000..fcd338e9e1d9f --- /dev/null +++ b/yscope-k8s/Chart.yaml @@ -0,0 +1,24 @@ +apiVersion: v2 +name: presto-velox +description: A Helm chart for Kubernetes + +# A chart can be either an 'application' or a 'library' chart. +# +# Application charts are a collection of templates that can be packaged into versioned archives +# to be deployed. +# +# Library charts provide useful utilities or functions for the chart developer. They're included as +# a dependency of application charts to inject those utilities and functions into the rendering +# pipeline. Library charts do not define any templates and therefore cannot be deployed. +type: application + +# This is the chart version. This version number should be incremented each time you make changes +# to the chart and its templates, including the app version. +# Versions are expected to follow Semantic Versioning (https://semver.org/) +version: 0.1.0 + +# This is the version number of the application being deployed. This version number should be +# incremented each time you make changes to the application. Versions are not expected to +# follow Semantic Versioning. They should reflect the version the application is using. +# It is recommended to use it with quotes. +appVersion: "1.16.0" diff --git a/yscope-k8s/README.md b/yscope-k8s/README.md new file mode 100644 index 0000000000000..7983a776c34dd --- /dev/null +++ b/yscope-k8s/README.md @@ -0,0 +1,107 @@ +# Setup local K8s cluster for presto + clp + +## Install docker + +Follow the guide here: [docker] + +## Install kubectl + +`kubectl` is the command-line tool for interacting with Kubernetes clusters. You will use it to +manage and inspect your k3d cluster. + +Follow the guide here: [kubectl] + +## Install k3d + +k3d is a lightweight wrapper to run k3s (Rancher Lab's minimal Kubernetes distribution) in docker. + +Follow the guide here: [k3d] + +## Install Helm + +Helm is the package manager for Kubernetes. + +Follow the guide here: [helm] + +# Launch clp-package +1. Find the clp-package for test on our official website [clp-json-v0.4.0]. We also put the dataset for demo here: `mongod-256MB-presto-clp.log.tar.gz`. + +2. Untar it. + +3. Replace the content of `/path/to/clp-json-package/etc/clp-config.yml` with the output of `demo-assets/init.sh ` where the `` is the IP address of the host that you are running the clp-package. + +4. Launch: +```bash +# You probably want to run in a 3.11 python environment +sbin/start-clp.sh +``` + +5. Compress: +```bash +# You can also use your own dataset +sbin/compress.sh --timestamp-key 't.dollar_sign_date' datasets/mongod-256MB-processed.log +``` + +6. Use the following command to update the CLP metadata database so that the worker can find the archives in right place: +```bash +# Install mysql-client if necessary +sudo apt update && sudo apt install -y mysql-client +# Find the user and password in /path/to/clp-json-package/etc/credential.yml +mysql -h ${REPLACE_IP} -P 6001 -u ${REPLACE_USER} -p'${REPLACE_PASSWORD}' clp-db -e "UPDATE clp_datasets SET archive_storage_directory = '/var/data/archives/default';" +``` + +# Create k8s Cluster +Create a local k8s cluster with port forwarding +```bash +k3d cluster create yscope --servers 1 --agents 1 -v $(readlink -f /path/to/clp-json-package/var/data/archives):/var/data/archives +``` + +# Working with helm chart +## Install +In `yscope-k8s/templates/presto/presto-coordinator-config.yaml`: +1. replace the `${REPLACE_ME}` in `clp.metadata-db-url=jdbc:mysql://${REPLACE_ME}:6001` with the IP address of the host you are running the clp-package (basially match the IP address that you configured in the `etc/clp-config.yml` of the clp-package). +2. replace the `${REPLACE_ME}` in `clp.metadata-db-user=${REPLACE_ME}` with the user stored in `/path/to/clp-json-package/etc/credential.yml`. +3. replace the `${REPLACE_ME}` in `clp.metadata-db-password=${REPLACE_ME}` with the password stored in `/path/to/clp-json-package/etc/credential.yml`. + +```bash +cd yscope-k8s + +helm template . + +helm install demo . +``` + +## Use cli: +After all containers are in "Running" states (check by `kubectl get pods`): +```bash +kubectl port-forward service/presto-coordinator 8080:8080 +``` + +Then you can further forward the 8080 port to your local laptop, to access the Presto's WebUI by e.g., http://localhost:8080 + +To use presto-cli: +```bash +./presto-cli-0.293-executable.jar --catalog clp --schema default --server localhost:8080 +``` + +Example query: +``` +SELECT * FROM default LIMIT 1; +``` + +## Uninstall +```bash +helm uninstall demo +``` + +# Delete k8s Cluster +```bash +k3d cluster delete yscope +``` + + +[clp-json-v0.4.0]: https://github.com/y-scope/clp/releases/tag/v0.4.0 +[docker]: https://docs.docker.com/engine/install +[k3d]: https://k3d.io/stable/#installation +[kubectl]: https://kubernetes.io/docs/tasks/tools/#kubectl +[helm]: https://helm.sh/docs/intro/install/ diff --git a/yscope-k8s/demo-assets/clp-config.yml.bak b/yscope-k8s/demo-assets/clp-config.yml.bak new file mode 100644 index 0000000000000..49291183e758e --- /dev/null +++ b/yscope-k8s/demo-assets/clp-config.yml.bak @@ -0,0 +1,38 @@ +package: + storage_engine: "clp-s" +database: + type: "mariadb" + host: "${REPLACE_IP}" + port: 6001 + name: "clp-db" +query_scheduler: + host: "${REPLACE_IP}" + port: 6002 + jobs_poll_delay: 0.1 + num_archives_to_search_per_sub_job: 16 + logging_level: "INFO" +queue: + host: "${REPLACE_IP}" + port: 6003 +redis: + host: "${REPLACE_IP}" + port: 6004 + query_backend_database: 0 + compression_backend_database: 1 +reducer: + host: "${REPLACE_IP}" + base_port: 6100 + logging_level: "INFO" + upsert_interval: 100 +results_cache: + host: "${REPLACE_IP}" + port: 6005 + db_name: "clp-query-results" + stream_collection_name: "stream-files" +webui: + host: "localhost" + port: 6000 + logging_level: "INFO" +log_viewer_webui: + host: "localhost" + port: 6006 diff --git a/yscope-k8s/demo-assets/init.sh b/yscope-k8s/demo-assets/init.sh new file mode 100755 index 0000000000000..b54cac4934db6 --- /dev/null +++ b/yscope-k8s/demo-assets/init.sh @@ -0,0 +1,17 @@ +#!/usr/bin/env bash + +if [ "$#" -ne 1 ]; then + echo "Usage: $0 " + exit 1 +fi + +SCRIPT_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +IP="$1" +FILE="${SCRIPT_PATH}/clp-config.yml" + +cp "${FILE}.bak" "$FILE" + +sed -i "s|\${REPLACE_IP}|$IP|g" "$FILE" + +echo "Replaced \${REPLACE_IP} with $IP in $FILE" diff --git a/yscope-k8s/demo-assets/mongod-256MB-presto-clp.log.tar.gz b/yscope-k8s/demo-assets/mongod-256MB-presto-clp.log.tar.gz new file mode 100644 index 0000000000000..9586831fc2993 Binary files /dev/null and b/yscope-k8s/demo-assets/mongod-256MB-presto-clp.log.tar.gz differ diff --git a/yscope-k8s/demo-assets/presto-cli-0.293-executable.jar b/yscope-k8s/demo-assets/presto-cli-0.293-executable.jar new file mode 100755 index 0000000000000..6426dee9118ea Binary files /dev/null and b/yscope-k8s/demo-assets/presto-cli-0.293-executable.jar differ diff --git a/yscope-k8s/templates/presto/presto-coordinator-config.yaml b/yscope-k8s/templates/presto/presto-coordinator-config.yaml new file mode 100644 index 0000000000000..772e525411b58 --- /dev/null +++ b/yscope-k8s/templates/presto/presto-coordinator-config.yaml @@ -0,0 +1,53 @@ +apiVersion: "v1" +kind: "ConfigMap" +metadata: + name: "presto-coordinator-config" +data: + clp.properties: | + connector.name=clp + clp.metadata-provider-type=mysql + clp.metadata-db-url=jdbc:mysql://${REPLACE_ME}:6001 + clp.metadata-db-name=clp-db + clp.metadata-db-user=${REPLACE_ME} + clp.metadata-db-password=${REPLACE_ME} + clp.metadata-table-prefix=clp_ + clp.split-provider-type=mysql + clp.metadata-filter-config=/opt/presto-server/etc/metadata-filter.json + + config.properties: | + coordinator=true + node-scheduler.include-coordinator=false + http-server.http.port=8080 + query.max-memory=1GB + query.max-memory-per-node=1GB + discovery-server.enabled=true + discovery.uri=http://{{ .Values.presto.coordinator.serviceName }}:{{ .Values.presto.coordinator.httpPort }} + optimizer.optimize-hash-generation=false + regex-library=RE2J + use-alternative-function-signatures=true + inline-sql-functions=false + nested-data-serialization-enabled=false + native-execution-enabled=true + + jvm.config: | + -server + -agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=*:5005 + -Xmx4G + -XX:+UseG1GC + -XX:G1HeapRegionSize=32M + -XX:+UseGCOverheadLimit + -XX:+ExplicitGCInvokesConcurrent + -XX:+HeapDumpOnOutOfMemoryError + -XX:+ExitOnOutOfMemoryError + -Djdk.attach.allowAttachSelf=true + + log.properties: | + com.facebook.presto=DEBUG + + metadata-filter.json: | + { + } + + node.properties: | + node.environment=production + node.id={{ .Values.presto.coordinator.serviceName }} diff --git a/yscope-k8s/templates/presto/presto-coordinator.yaml b/yscope-k8s/templates/presto/presto-coordinator.yaml new file mode 100644 index 0000000000000..0d5316d22fe03 --- /dev/null +++ b/yscope-k8s/templates/presto/presto-coordinator.yaml @@ -0,0 +1,75 @@ +apiVersion: "v1" +kind: "Pod" +metadata: + labels: + app: "{{ .Values.presto.coordinator.serviceName }}" + name: "{{ .Values.presto.coordinator.serviceName }}" +spec: + initContainers: + - name: "generate-configs" + image: "busybox" + command: ["/scripts/generate-configs.sh"] + volumeMounts: + - name: "presto-coordinator-config-script-volume" + mountPath: "/scripts" + - name: "presto-coordinator-config-dir-volume" + mountPath: "/opt/presto-server/etc" + - name: "presto-coordinator-config-volume" + mountPath: "/configs" + containers: + - name: "{{ .Values.presto.coordinator.serviceName }}" + image: "{{ .Values.presto.coordinator.image }}" + volumeMounts: + - name: "presto-coordinator-config-dir-volume" + mountPath: "/opt/presto-server/etc" + - name: "clp-archives-volume" + mountPath: "/var/data/archives" + volumes: + - name: "presto-coordinator-config-script-volume" + configMap: + name: "presto-coordinator-config-script" + defaultMode: 0755 + - name: "presto-coordinator-config-dir-volume" + emptyDir: {} + - name: "presto-coordinator-config-volume" + configMap: + name: "presto-coordinator-config" + - name: "clp-archives-volume" + hostPath: + path: "/var/data/archives" +--- +apiVersion: "v1" +kind: "Service" +metadata: + name: "{{ .Values.presto.coordinator.serviceName }}" + labels: + app: "{{ .Values.presto.coordinator.serviceName }}" +spec: + type: "ClusterIP" + ports: + - name: "http" + port: {{ .Values.presto.coordinator.httpPort }} + targetPort: {{ .Values.presto.coordinator.httpPort }} + selector: + app: "{{ .Values.presto.coordinator.serviceName }}" +--- +apiVersion: "v1" +kind: "ConfigMap" +metadata: + name: "presto-coordinator-config-script" +data: + generate-configs.sh: | + #!/bin/sh + + # Exit on error + set -e + + PRESTO_CONFIG_DIR="/opt/presto-server/etc" + + # Setup the config directory hierarchy + mkdir -p ${PRESTO_CONFIG_DIR}/catalog + + # Copy over files + cp /configs/* ${PRESTO_CONFIG_DIR} + mv ${PRESTO_CONFIG_DIR}/clp.properties ${PRESTO_CONFIG_DIR}/catalog + diff --git a/yscope-k8s/templates/presto/presto-worker-config.yaml b/yscope-k8s/templates/presto/presto-worker-config.yaml new file mode 100644 index 0000000000000..7db3ee621965b --- /dev/null +++ b/yscope-k8s/templates/presto/presto-worker-config.yaml @@ -0,0 +1,24 @@ +apiVersion: "v1" +kind: "ConfigMap" +metadata: + name: "presto-worker-config" +data: + clp.properties: | + connector.name=clp + + config.properties: | + discovery.uri=http://{{ .Values.presto.coordinator.serviceName }}.default.svc.cluster.local:{{ .Values.presto.coordinator.httpPort }} + presto.version=REPLACE_ME + http-server.http.port={{ .Values.presto.worker.httpPort }} + shutdown-onset-sec=1 + register-test-functions=false + runtime-metrics-collection-enabled=false + + node.properties: | + node.environment=production + node.internal-address=REPLACE_ME + node.location=testing-location + node.id=REPLACE_ME + + velox.properties: | + mutable-config=true \ No newline at end of file diff --git a/yscope-k8s/templates/presto/presto-worker.yaml b/yscope-k8s/templates/presto/presto-worker.yaml new file mode 100644 index 0000000000000..c994f50def3a9 --- /dev/null +++ b/yscope-k8s/templates/presto/presto-worker.yaml @@ -0,0 +1,138 @@ +apiVersion: "apps/v1" +kind: "Deployment" +metadata: + name: {{ .Values.presto.worker.serviceName }} + labels: + app: {{ .Values.presto.worker.serviceName }} +spec: + replicas: {{ .Values.presto.worker.replicaCount }} + selector: + matchLabels: + app: {{ .Values.presto.worker.serviceName }} + template: + metadata: + labels: + app: {{ .Values.presto.worker.serviceName }} + spec: + initContainers: + - name: "generate-configs" + image: "busybox" + command: ["/scripts/generate-configs.sh"] + volumeMounts: + - name: "presto-worker-config-script-volume" + mountPath: "/scripts" + - name: "presto-worker-config-dir-volume" + mountPath: "/opt/presto-server/etc" + - name: "presto-worker-config-volume" + mountPath: "/configs" + env: + - name: POD_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + containers: + - name: {{ .Values.presto.worker.serviceName }} + image: {{ .Values.presto.worker.image }} + volumeMounts: + - name: "presto-worker-config-dir-volume" + mountPath: "/opt/presto-server/etc" + - name: "clp-archives-volume" + mountPath: "/var/data/archives" + volumes: + - name: "presto-worker-config-script-volume" + configMap: + name: "presto-worker-config-script" + defaultMode: 0755 + - name: "presto-worker-config-dir-volume" + emptyDir: {} + - name: "presto-worker-config-volume" + configMap: + name: "presto-worker-config" + - name: "clp-archives-volume" + hostPath: + path: "/var/data/archives" +--- +apiVersion: "v1" +kind: "ConfigMap" +metadata: + name: "presto-worker-config-script" +data: + generate-configs.sh: | + #!/bin/sh + + PRESTO_CONFIG_DIR="/opt/presto-server/etc" + + # Setup the config directory hierarchy + mkdir -p ${PRESTO_CONFIG_DIR}/catalog + + # Copy over files + cp /configs/* ${PRESTO_CONFIG_DIR} + mv ${PRESTO_CONFIG_DIR}/clp.properties ${PRESTO_CONFIG_DIR}/catalog + + # Update "presto.version" parameter in config.properties file using values from coordinator + CONFIG_PROPERTIES_FILE="/opt/presto-server/etc/config.properties" + + # Retry configuration + MAX_RETRIES=30 + RETRY_DELAY=10 + + echo "Init container: Waiting for Presto to be ready..." + + # 1. Fetch version info from Presto with retry logic + retry_count=0 + while [ $retry_count -lt $MAX_RETRIES ]; do + echo "Attempt $((retry_count + 1))/$MAX_RETRIES - Checking Presto availability..." + + # Try to fetch the response + if response=$(wget -qO- --timeout=10 http://{{ .Values.presto.coordinator.serviceName}}.default.svc.cluster.local:{{ .Values.presto.coordinator.httpPort }}/v1/info 2>/dev/null); then + # Check if response is not empty and contains version info + if [ -n "$response" ] && echo "$response" | grep -q '"version"'; then + echo "Presto is ready!" + break + fi + fi + + echo "Presto not ready yet, retrying in ${RETRY_DELAY}s..." + sleep $RETRY_DELAY + retry_count=$((retry_count + 1)) + done + + # Check if we exceeded max retries + if [ $retry_count -eq $MAX_RETRIES ]; then + echo "Error: Presto did not become ready after $MAX_RETRIES attempts" + exit 1 + fi + + # 2. Extract the version using grep and sed (busybox compatible) + version=$(echo "$response" | grep -o '"version":"[^"]*"' | sed 's/"version":"//;s/"//') + + echo "Detected Presto version: $version" + + # 3. Replace `presto.version=REPLACE_ME` with actual version in the config file + if grep -q '^presto.version=REPLACE_ME' "$CONFIG_PROPERTIES_FILE"; then + sed -i "s|^presto.version=REPLACE_ME|presto.version=$version|" "$CONFIG_PROPERTIES_FILE" + echo "Updated $CONFIG_PROPERTIES_FILE with version $version" + else + echo "Warning: 'presto.version=REPLACE_ME' not found in $CONFIG_PROPERTIES_FILE" + exit 1 + fi + + # Modify node.properties + NODE_PROPERTIES_FILE="/opt/presto-server/etc/node.properties" + # 1. Replace `node.internal-address=REPLACE_ME` with actual ip address in the config file + if grep -q '^node.internal-address=REPLACE_ME' "$NODE_PROPERTIES_FILE"; then + sed -i "s|^node.internal-address=REPLACE_ME|node.internal-address=$POD_IP|" "$NODE_PROPERTIES_FILE" + echo "Updated $NODE_PROPERTIES_FILE with node.internal-address $POD_IP" + else + echo "Warning: 'node.internal-address=REPLACE_ME' not found in $NODE_PROPERTIES_FILE" + exit 1 + fi + + # 2. Replace `node.id=REPLACE_ME` with actual hostname in the config file + if grep -q '^node.id=REPLACE_ME' "$NODE_PROPERTIES_FILE"; then + sed -i "s|^node.id=REPLACE_ME|node.id=$HOSTNAME|" "$NODE_PROPERTIES_FILE" + echo "Updated $NODE_PROPERTIES_FILE with node.id $HOSTNAME" + else + echo "Warning: 'node.id=REPLACE_ME' not found in $NODE_PROPERTIES_FILE" + exit 1 + fi diff --git a/yscope-k8s/values.yaml b/yscope-k8s/values.yaml new file mode 100644 index 0000000000000..90a1355c042f5 --- /dev/null +++ b/yscope-k8s/values.yaml @@ -0,0 +1,12 @@ +# Configure Presto +presto: + coordinator: + image: "ghcr.io/y-scope/presto/coordinator:dev" + serviceName: "presto-coordinator" + httpPort: 8080 + worker: + image: "ghcr.io/y-scope/presto/prestissimo-worker:dev" + serviceName: "presto-worker" + httpPort: 8080 + replicaCount: 1 +