diff --git a/.env b/.env index 058b1eb..4eaaf65 100644 --- a/.env +++ b/.env @@ -34,6 +34,8 @@ # - Zebra: UID=10001, GID=10001, permissions=700 # - Zaino: UID=1000, GID=1000, permissions=700 # - Zallet: UID=65532, GID=65532, permissions=700 +# - Prometheus: UID=65534, GID=65534, permissions=700 (nobody) +# - Grafana: UID=472, GID=0, permissions=700 # - Cookie: Keep as Docker volume (recommended) to avoid cross-user issues # # WARNING: Never use 755 or 777 permissions - they expose your data! @@ -67,6 +69,14 @@ Z3_ZAINO_DATA_PATH=zaino_data # Default: zallet_data (Docker named volume) Z3_ZALLET_DATA_PATH=zallet_data +# Prometheus metrics data directory +# Default: prometheus_data (Docker named volume) +Z3_PROMETHEUS_DATA_PATH=prometheus_data + +# Grafana dashboard data directory +# Default: grafana_data (Docker named volume) +Z3_GRAFANA_DATA_PATH=grafana_data + # ============================================================================= # Common Configuration # ============================================================================= @@ -82,6 +92,14 @@ ENABLE_COOKIE_AUTH=true # In-container directory for the .cookie authentication file COOKIE_AUTH_FILE_DIR=/var/run/auth +# ============================================================================= +# Monitoring Configuration +# ============================================================================= +# Enable monitoring services (prometheus, grafana) by default. +# Comment out or set to empty string to disable monitoring. +COMPOSE_PROFILES=monitoring + + # ============================================================================= # Zebra Configuration # ============================================================================= @@ -104,6 +122,10 @@ Z3_ZEBRA_RPC_PORT=18232 Z3_ZEBRA_HOST_RPC_PORT=18232 # Infrastructure: Zebra host health port (for external access to health endpoints) Z3_ZEBRA_HOST_HEALTH_PORT=8080 +# Zebra metrics endpoint configuration +ZEBRA_METRICS__ENDPOINT_ADDR=0.0.0.0:9999 +# Infrastructure: Zebra host metrics port +Z3_ZEBRA_HOST_METRICS_PORT=9999 # ============================================================================= # Zaino Configuration diff --git a/.gitignore b/.gitignore index bd498cf..22e92df 100644 --- a/.gitignore +++ b/.gitignore @@ -31,3 +31,8 @@ config/tls/* # Un-ignore .gitkeep directly under config !config/.gitkeep + +# Monitoring Configuration +!config/prometheus.yml +!config/grafana/ +!config/grafana/** diff --git a/README.md b/README.md index 1bf1b21..6d75275 100644 --- a/README.md +++ b/README.md @@ -16,6 +16,7 @@ A modern, modular Zcash software stack combining Zebra, Zaino, and Zallet to rep - [Interacting with Services](#interacting-with-services) - [Configuration Guide](#configuration-guide) - [Health and Readiness Checks](#health-and-readiness-checks) +- [Monitoring](#monitoring) --- @@ -118,6 +119,8 @@ docker compose ps | **Zebra** | `zfnd/zebra:3.1.0` | Pre-built from [ZcashFoundation/zebra](https://github.com/ZcashFoundation/zebra) | | **Zaino** | `z3-zaino:local` | Must build locally from submodule | | **Zallet** | `z3-zallet:local` | Must build locally from submodule | +| **Prometheus** | `prom/prometheus:latest` | Official Prometheus image | +| **Grafana** | `grafana/grafana:latest` | Official Grafana image | ### Building Local Images @@ -348,6 +351,8 @@ By default, the stack uses Docker named volumes which are managed by Docker: - `zebra_data`: Zebra blockchain state (~300GB+ for mainnet, ~30GB for testnet) - `zaino_data`: Zaino indexer database - `zallet_data`: Zallet wallet data +- `prometheus_data`: Prometheus metrics storage +- `grafana_data`: Grafana dashboard data and settings - `shared_cookie_volume`: RPC authentication cookies **Advantages:** @@ -403,12 +408,42 @@ Each service runs as a specific non-root user with distinct UIDs/GIDs: - **Zebra**: UID=10001, GID=10001, permissions 700 - **Zaino**: UID=1000, GID=1000, permissions 700 - **Zallet**: UID=65532, GID=65532, permissions 700 +- **Prometheus**: UID=65534, GID=65534, permissions 700 +- **Grafana**: UID=472, GID=0, permissions 700 **Critical:** Local directories must have correct ownership and secure permissions: - Use `fix-permissions.sh` to set ownership automatically - Permissions must be 700 (owner only) or 750 (owner + group read) - **Never use 755 or 777** - these expose your blockchain data and wallet to other users +#### Monitoring Data (Optional) + +If you choose to use local directories for Prometheus and Grafana data: + +1. **Create directories:** + ```bash + mkdir -p /your/chosen/path/prometheus-data + mkdir -p /your/chosen/path/grafana-data + ``` + +2. **Fix permissions:** + - **Prometheus** runs as `nobody` (UID 65534): + ```bash + sudo chown -R 65534:65534 /your/chosen/path/prometheus-data + chmod 700 /your/chosen/path/prometheus-data + ``` + - **Grafana** runs as UID 472: + ```bash + sudo chown -R 472:0 /your/chosen/path/grafana-data + chmod 700 /your/chosen/path/grafana-data + ``` + +3. **Update `.env`:** + ```bash + Z3_PROMETHEUS_DATA_PATH=/your/chosen/path/prometheus-data + Z3_GRAFANA_DATA_PATH=/your/chosen/path/grafana-data + ``` + ## Configuration Guide This section explains how the Z3 stack is configured and how to customize it for your needs. @@ -625,4 +660,62 @@ Once the stack is running, services can be accessed via their exposed ports: * **Zaino JSON-RPC:** `http://localhost:${ZAINO_HOST_JSONRPC_PORT:-8237}` (default: `http://localhost:8237`, if enabled) * **Zallet RPC:** `http://localhost:${ZALLET_HOST_RPC_PORT:-28232}` (default: `http://localhost:28232`) -Refer to the individual component documentation for RPC API details. \ No newline at end of file +Refer to the individual component documentation for RPC API details. + +## Monitoring + +The Z3 stack includes a pre-configured Grafana dashboard for monitoring and alerting. + +### Accessing the Dashboard + +1. **Access Grafana:** Open your browser and navigate to `http://localhost:3000`. +2. **Login:** + - **Username:** `admin` + - **Password:** `admin` +3. **Change Password:** You will be prompted to change the password on your first login. This new password will be persisted in the `grafana_data` volume. + +### Dashboard Features + +The **Zebra Status** dashboard provides real-time visibility into: + +- **Node Status:** Current version, block height, and peer connection health. +- **Network Health:** Inbound/Outbound traffic rates and P2P message volume. +- **Consensus & Mempool:** Mempool transaction count/size and proof verification rates. +- **Peer Analytics:** Distribution of connected peer user agents. + +### Alerting + +The dashboard includes basic alerts to notify you of critical issues: + +- **Low Peer Count:** Triggers if the node has 0 peers for 5 minutes. +- **Block Height Stalled:** Triggers if the block height hasn't increased in 15 minutes. + +**Configuring Notifications:** + +The default alerts are configured via provisioning files. To receive notifications (e.g., via Email, Slack, PagerDuty), you must configure **Contact Points** in the Grafana UI: + +1. Log in to Grafana (`http://localhost:3000`). +2. Go to **Alerting** -> **Contact points**. +3. Edit the `default_contact_point` or create a new one with your preferred integration (Email, Slack, etc.). +4. Go to **Alerting** -> **Notification policies** and ensure your contact point is selected as the default. + +> [!NOTE] +> Currently, the dashboard only visualizes metrics from the **Zebra** node. Support for **Zaino** (indexer) and **Zallet** (wallet) metrics is planned for future updates. + +### Configuration + +Monitoring is enabled by default via the `prometheus` and `grafana` services in `docker-compose.yml`. Prometheus scrapes metrics from Zebra's metrics endpoint (port 9999), and Grafana visualizes this data. + +**To disable monitoring:** + +Edit your `.env` file and comment out or clear the `COMPOSE_PROFILES` variable: + +```bash +# In .env: +# COMPOSE_PROFILES=monitoring <-- Comment out to disable +``` + +Then restart the stack: +```bash +docker compose up -d --remove-orphans +``` \ No newline at end of file diff --git a/config/grafana/dashboards/zebra.json b/config/grafana/dashboards/zebra.json new file mode 100644 index 0000000..af8f466 --- /dev/null +++ b/config/grafana/dashboards/zebra.json @@ -0,0 +1,723 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": null, + "links": [], + "liveNow": false, + "panels": [ + { + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 100, + "title": "Node Status", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "z3_prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 8, + "x": 0, + "y": 1 + }, + "id": 1, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^version$/", + "values": false + }, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.3.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "z3_prometheus" + }, + "expr": "zebrad_build_info", + "format": "table", + "instant": true, + "legendFormat": "{{version}}", + "refId": "A" + } + ], + "title": "Version", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "z3_prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 8, + "x": 8, + "y": 1 + }, + "id": 2, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.3.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "z3_prometheus" + }, + "expr": "zcash_chain_verified_block_height", + "refId": "A" + } + ], + "title": "Block Height", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "z3_prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "#EAB839", + "value": 10 + }, + { + "color": "green", + "value": 20 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 8, + "x": 16, + "y": 1 + }, + "id": 3, + "options": { + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "pluginVersion": "10.3.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "z3_prometheus" + }, + "expr": "zcash_net_peers", + "refId": "A" + } + ], + "title": "Peer Count", + "type": "gauge" + }, + { + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 5 + }, + "id": 101, + "title": "Network Health", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "z3_prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "Bps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 6 + }, + "id": 4, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "z3_prometheus" + }, + "expr": "sum(rate(zcash_net_in_bytes_total[1m]))", + "legendFormat": "Inbound", + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "z3_prometheus" + }, + "expr": "sum(rate(zcash_net_out_bytes_total[1m]))", + "legendFormat": "Outbound", + "refId": "A" + } + ], + "title": "Network Traffic", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "z3_prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 6 + }, + "id": 5, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "z3_prometheus" + }, + "expr": "sum(rate(zcash_net_in_messages[1m]))", + "legendFormat": "Inbound", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "z3_prometheus" + }, + "expr": "sum(rate(zcash_net_out_messages[1m]))", + "legendFormat": "Outbound", + "refId": "B" + } + ], + "title": "Message Rate", + "type": "timeseries" + }, + { + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 14 + }, + "id": 102, + "title": "Consensus & Mempool", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "z3_prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Size (Bytes)" + }, + "properties": [ + { + "id": "custom.axisPlacement", + "value": "right" + }, + { + "id": "unit", + "value": "bytes" + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 15 + }, + "id": 6, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "z3_prometheus" + }, + "expr": "zcash_mempool_size_transactions", + "legendFormat": "Transactions", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "z3_prometheus" + }, + "expr": "zcash_mempool_size_bytes", + "legendFormat": "Size (Bytes)", + "refId": "B" + } + ], + "title": "Mempool Size", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "z3_prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 15 + }, + "id": 7, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "z3_prometheus" + }, + "expr": "rate(proofs_halo2_verified[1m]) + rate(proofs_groth16_verified[1m])", + "legendFormat": "Verifications/sec", + "refId": "A" + } + ], + "title": "Verification Rate", + "type": "timeseries" + }, + { + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 23 + }, + "id": 103, + "title": "Peer Analytics", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "z3_prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "mappings": [] + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 24 + }, + "id": 8, + "options": { + "legend": { + "displayMode": "list", + "placement": "right", + "showLegend": true + }, + "pieType": "pie", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "z3_prometheus" + }, + "expr": "sum by (user_agent) (zcash_net_peers_connected)", + "legendFormat": "{{user_agent}}", + "refId": "A" + } + ], + "title": "Peer Distribution", + "type": "piechart" + } + ], + "refresh": "", + "schemaVersion": 39, + "tags": [], + "templating": { + "list": [] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "Zebra Status", + "uid": "zebra-status", + "version": 7, + "weekStart": "" +} \ No newline at end of file diff --git a/config/grafana/provisioning/alerting/alerting.yaml b/config/grafana/provisioning/alerting/alerting.yaml new file mode 100644 index 0000000..cb65b4e --- /dev/null +++ b/config/grafana/provisioning/alerting/alerting.yaml @@ -0,0 +1,174 @@ +apiVersion: 1 + +contactPoints: + - orgId: 1 + name: default_contact_point + receivers: + - uid: default_email_receiver + type: email + settings: + addresses: "example@example.com" + +policies: + - orgId: 1 + receiver: default_contact_point + group_by: ['grafana_folder', 'alertname'] + +groups: + - orgId: 1 + name: Zebra Alerts + folder: Zebra + interval: 1m + rules: + - uid: low_peer_count + title: Low Peer Count + condition: C + data: + - refId: A + relativeTimeRange: + from: 600 + to: 0 + datasourceUid: z3_prometheus + model: + editorMode: code + expr: zcash_net_peers + instant: true + intervalMs: 1000 + legendFormat: __auto + maxDataPoints: 43200 + range: false + refId: A + - refId: B + datasourceUid: __expr__ + model: + conditions: + - evaluator: + params: [] + type: gt + operator: + type: and + query: + params: + - B + reducer: + params: [] + type: last + type: query + datasource: + type: __expr__ + uid: __expr__ + expression: A + intervalMs: 1000 + maxDataPoints: 43200 + reducer: last + refId: B + type: reduce + - refId: C + datasourceUid: __expr__ + model: + conditions: + - evaluator: + params: + - 1 + type: lt + operator: + type: and + query: + params: + - C + reducer: + params: [] + type: last + type: query + datasource: + type: __expr__ + uid: __expr__ + expression: $B < 1 + intervalMs: 1000 + maxDataPoints: 43200 + refId: C + type: math + noDataState: Alerting + execErrState: Error + for: 5m + annotations: + description: "Node has 0 peers. Check network connection." + summary: "Low Peer Count" + labels: + severity: critical + - uid: block_height_stalled + title: Block Height Stalled + condition: C + data: + - refId: A + relativeTimeRange: + from: 900 + to: 0 + datasourceUid: z3_prometheus + model: + editorMode: code + expr: changes(zcash_chain_verified_block_height[15m]) + instant: true + intervalMs: 1000 + legendFormat: __auto + maxDataPoints: 43200 + range: false + refId: A + - refId: B + datasourceUid: __expr__ + model: + conditions: + - evaluator: + params: [] + type: gt + operator: + type: and + query: + params: + - B + reducer: + params: [] + type: last + type: query + datasource: + type: __expr__ + uid: __expr__ + expression: A + intervalMs: 1000 + maxDataPoints: 43200 + reducer: last + refId: B + type: reduce + - refId: C + datasourceUid: __expr__ + model: + conditions: + - evaluator: + params: + - 1 + type: lt + operator: + type: and + query: + params: + - C + reducer: + params: [] + type: last + type: query + datasource: + type: __expr__ + uid: __expr__ + expression: $B < 1 + intervalMs: 1000 + maxDataPoints: 43200 + refId: C + type: math + noDataState: Alerting + execErrState: Error + for: 5m + annotations: + description: "Block height has not increased in the last 15 minutes. Node may be stuck." + summary: "Block Height Stalled" + labels: + severity: warning diff --git a/config/grafana/provisioning/dashboards/dashboard.yml b/config/grafana/provisioning/dashboards/dashboard.yml new file mode 100644 index 0000000..f588ca9 --- /dev/null +++ b/config/grafana/provisioning/dashboards/dashboard.yml @@ -0,0 +1,11 @@ +apiVersion: 1 + +providers: + - name: 'Default' + orgId: 1 + folder: '' + type: file + disableDeletion: false + updateIntervalSeconds: 10 + options: + path: /etc/grafana/dashboards diff --git a/config/grafana/provisioning/datasources/datasource.yml b/config/grafana/provisioning/datasources/datasource.yml new file mode 100644 index 0000000..094aaeb --- /dev/null +++ b/config/grafana/provisioning/datasources/datasource.yml @@ -0,0 +1,9 @@ +apiVersion: 1 + +datasources: + - name: Prometheus + type: prometheus + uid: z3_prometheus + access: proxy + url: http://prometheus:9090 + isDefault: true diff --git a/config/prometheus.yml b/config/prometheus.yml new file mode 100644 index 0000000..916d7f4 --- /dev/null +++ b/config/prometheus.yml @@ -0,0 +1,7 @@ +global: + scrape_interval: 15s + +scrape_configs: + - job_name: 'zebra' + static_configs: + - targets: ['zebra:9999'] diff --git a/docker-compose.yml b/docker-compose.yml index a3db421..18cda4c 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -15,6 +15,7 @@ services: - ZEBRA_NETWORK__NETWORK=${NETWORK_NAME} - ZEBRA_RPC__ENABLE_COOKIE_AUTH=${ENABLE_COOKIE_AUTH} - ZEBRA_RPC__COOKIE_DIR=${COOKIE_AUTH_FILE_DIR} + - ZEBRA_METRICS__ENDPOINT_ADDR=${ZEBRA_METRICS__ENDPOINT_ADDR} volumes: # Blockchain state (defaults to named volume 'zebra_data') - ${Z3_ZEBRA_DATA_PATH}:/home/zebra/.cache/zebra @@ -23,6 +24,7 @@ services: ports: - "${Z3_ZEBRA_HOST_RPC_PORT}:${Z3_ZEBRA_RPC_PORT}" - "${Z3_ZEBRA_HOST_HEALTH_PORT}:8080" + - "${Z3_ZEBRA_HOST_METRICS_PORT}:9999" networks: - z3_net healthcheck: @@ -127,10 +129,51 @@ services: # Zallet will restart automatically if it crashes (restart: unless-stopped) # Monitor logs or use external monitoring for service health + prometheus: + image: prom/prometheus:latest + profiles: ["monitoring"] + container_name: z3_prometheus + restart: unless-stopped + volumes: + - ./config/prometheus.yml:/etc/prometheus/prometheus.yml + - ${Z3_PROMETHEUS_DATA_PATH}:/prometheus + command: + - '--config.file=/etc/prometheus/prometheus.yml' + - '--storage.tsdb.path=/prometheus' + - '--web.console.libraries=/etc/prometheus/console_libraries' + - '--web.console.templates=/etc/prometheus/consoles' + - '--web.enable-lifecycle' + ports: + - "9090:9090" + networks: + - z3_net + + grafana: + image: grafana/grafana:latest + profiles: ["monitoring"] + container_name: z3_grafana + restart: unless-stopped + volumes: + - ${Z3_GRAFANA_DATA_PATH}:/var/lib/grafana + - ./config/grafana/provisioning:/etc/grafana/provisioning + - ./config/grafana/dashboards:/etc/grafana/dashboards + - ./config/grafana/provisioning/alerting:/etc/grafana/provisioning/alerting + environment: + - GF_USERS_ALLOW_SIGN_UP=false + ports: + - "3000:3000" + networks: + - z3_net + depends_on: + - prometheus + + volumes: zebra_data: zaino_data: zallet_data: + prometheus_data: + grafana_data: shared_cookie_volume: networks: