Skip to content

Commit

Permalink
Importing from Hachyderm infra repo
Browse files Browse the repository at this point in the history
  • Loading branch information
Preskton committed Nov 22, 2024
1 parent 187fbcf commit b86cc56
Show file tree
Hide file tree
Showing 12 changed files with 460 additions and 1 deletion.
8 changes: 8 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
.idea
.vscode
.cache
.*vault-pass
.dccache
.DS_Store

files/sample/*
18 changes: 18 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
.PHONY: render
render:
gomplate -f files/etc/prometheus-tailscale-discovery/prometheus.yml.tpl -d tailscale=files/sample/tailscale-status.json > files/sample/prometheus.rendered.yml

.PHONY: repl
repl:
# If you're focusing on a specific part of the file, grep out the header
watch 'gomplate -f files/etc/prometheus-tailscale-discovery/prometheus.yml.tpl -d tailscale=files/sample/tailscale-status.json'

.PHONY: lint
lint: render
promtool check config files/sample/prometheus.rendered.yml

/usr/local/bin/promtool:
VERSION=$(curl -Ls https://api.github.com/repos/prometheus/prometheus/releases/latest | jq ".tag_name" | xargs | cut -c2-)
wget -qO- "https://github.com/prometheus/prometheus/releases/download/v${VERSION}/prometheus-$VERSION.linux-amd64.tar.gz" \
| tar xvzf - "prometheus-$VERSION.linux-amd64"/promtool --strip-components=1
sudo mv promtool /usr/local/bin/promtool
15 changes: 14 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,15 @@
# ansible-role-prometheus-tailscale-discovery
An ansible role that uses tailscale to generate an inventory of hosts to then render a prometheus config.

> [!WARNING]
> This was copypasta'd from an internal repo and may not work stand-alone. We'll work to make this the version we use too so that we know it works!
An ansible role that uses tailscale to generate an inventory of hosts to then render a prometheus config. Used by the Hachyderm team to automatically regenerate our prometheus configuration as our fleet changes.

## Assumptions

- `tailscale` is installed, up, and the `tailscale` binary is in your user's `$PATH`
- `promtool` is installed (it should be if prometheus is installed), and it's also available on your `$PATH`

## Using in your environment

You'll definetly want to update the [prometheus.yml template](files/etc/prometheus-tailscale-discovery/prometheus.yml.tpl) to match your environment.
253 changes: 253 additions & 0 deletions files/etc/prometheus-tailscale-discovery/prometheus.yml.tpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,253 @@
####################################################################
# Hachyderm Prometheus Config (/etc/prometheus/prometheus.yml)
#
# DO NOT EDIT THIS FILE MANUALLY - IT IS AUTOGENERATED PERIODCALLY
# BY A SYSTEMD TIMER BASED ON TAGS IN TAILSCALE!
#
# Last rendered: {{ time.Now.Format "2006-01-02 15:04:05" }}
####################################################################

global:
scrape_interval: 30s
evaluation_interval: 1m

alerting:
alertmanagers:
- static_configs:
- targets:
# - alertmanager:9093

# Load rules once and periodically evaluate them
rule_files:
# - "rules.yml"


scrape_configs:
###############################################################################
# SELF - PROMETHEUS
###############################################################################
# first we scrape ourselves
- job_name: "prometheus"
static_configs:
- targets: ["prod-prometheus01:9090"]

###############################################################################
# SYNTHETICS - BLACKBOX EXPORTER
###############################################################################
# do some blackbox probing to make sure we know when services are up and stuff
- job_name: "blackbox"
metrics_path: /probe
params:
module: [http_2xx_get]
static_configs:
- targets:
- https://hachyderm.io
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- source_labels: [__param_target]
target_label: instance
- target_label: __address__
replacement: localhost:9115

###############################################################################
# NODE EXPORTER (ALL)
###############################################################################
# get some handy metrics from the node exporter (all nodes)
- job_name: "node"
static_configs:
- targets:
- "{{ (ds "tailscale").Self.HostName }}:9100"
labels:
env: prod
### PROD ###############
- targets:
{{- range $hostDetail := (ds "tailscale").Peer }}
{{- if and (coll.Has $hostDetail "Tags") (coll.Has $hostDetail.Tags "tag:env-prod") }}
- "{{ $hostDetail.HostName }}:9100"
{{- end }}
{{- end }}
labels:
env: prod
### DEV ###############
- targets:
{{- range $hostDetail := (ds "tailscale").Peer }}
{{- if and (coll.Has $hostDetail "Tags") (coll.Has $hostDetail.Tags "tag:env-dev") }}
- "{{ $hostDetail.HostName }}:9100"
{{- end }}
{{- end }}
labels:
env: dev

###############################################################################
# NGINX
###############################################################################
# Scrape nginx exporter and nginxlog exporter for anything running nginx,
# like mastodon-web or edge-cdn
- job_name: "nginx"
static_configs:
### PROD ###############
- targets:
{{- range $hostDetail := (ds "tailscale").Peer }}
{{- if and (coll.Has $hostDetail "Tags") (coll.Has $hostDetail.Tags "tag:env-prod") }}
{{- if or (coll.Has $hostDetail.Tags "tag:role-mastodon-web") (coll.Has $hostDetail.Tags "tag:role-edge-cdn") (coll.Has $hostDetail.Tags "tag:role-mastodon-streaming") }}
- "{{ $hostDetail.HostName }}:4040" # nginxlog
- "{{ $hostDetail.HostName }}:9113"
{{- end }}
{{- end }}
{{- end }}
labels:
env: prod
### DEV ###############
- targets:
{{- range $hostDetail := (ds "tailscale").Peer }}
{{- if and (coll.Has $hostDetail "Tags") (coll.Has $hostDetail.Tags "tag:env-dev") }}
{{- if or (coll.Has $hostDetail.Tags "tag:role-mastodon-web") (coll.Has $hostDetail.Tags "tag:role-edge-cdn") (coll.Has $hostDetail.Tags "tag:role-mastodon-streaming") }}
- "{{ $hostDetail.HostName }}:4040" # nginxlog
- "{{ $hostDetail.HostName }}:9113"
{{- end }}
{{- end }}
{{- end }}
labels:
env: dev

###############################################################################
# ELASTICSEARCH STATSD
###############################################################################
# statsd has been deprecated in mastodon-sidekiq, so we're only scraping this from
# the localhost (our mastodon stats exporter). also include elasticsearch nodes.
- job_name: "statsd"
static_configs:
- targets: ["localhost:9876"] # mastodon_api for hachyderm.io
labels:
env: prod
### PROD ###############
- targets:
{{- range $hostDetail := (ds "tailscale").Peer }}
{{- if and (coll.Has $hostDetail "Tags") (coll.Has $hostDetail.Tags "tag:env-prod") }}
{{- if or (coll.Has $hostDetail.Tags "tag:role-elasticsearch") }}
- "{{ $hostDetail.HostName }}:9102"
{{- end }}
{{- end }}
{{- end }}
labels:
env: prod
### DEV ###############
- targets:
{{- range $hostDetail := (ds "tailscale").Peer }}
{{- if and (coll.Has $hostDetail "Tags") (coll.Has $hostDetail.Tags "tag:env-dev") }}
{{- if or (coll.Has $hostDetail.Tags "tag:role-elasticsearch") }}
- "{{ $hostDetail.HostName }}:9102"
{{- end }}
{{- end }}
{{- end }}
labels:
env: dev
relabel_configs:
- source_labels: [__param_target]
target_label: instance

###############################################################################
# POSTGRESQL
###############################################################################
# postgresql exporter
- job_name: "postgresql"
static_configs:
### PROD ###############
- targets:
{{- range $hostDetail := (ds "tailscale").Peer }}
{{- if and (coll.Has $hostDetail "Tags") (coll.Has $hostDetail.Tags "tag:env-prod") }}
{{- if or (coll.Has $hostDetail.Tags "tag:role-postgres-primary") (coll.Has $hostDetail.Tags "tag:role-postgres-backup") }}
- "{{ $hostDetail.HostName }}:9187"
- "{{ $hostDetail.HostName }}:9930" # pgcat
{{- end }}
{{- end }}
{{- end }}
labels:
env: prod
### DEV ###############
- targets:
{{- range $hostDetail := (ds "tailscale").Peer }}
{{- if and (coll.Has $hostDetail "Tags") (coll.Has $hostDetail.Tags "tag:env-dev") }}
{{- if or (coll.Has $hostDetail.Tags "tag:role-postgres-primary") (coll.Has $hostDetail.Tags "tag:role-postgres-backup") }}
- "{{ $hostDetail.HostName }}:9187"
- "{{ $hostDetail.HostName }}:9930" # pgcat
{{- end }}
{{- end }}
{{- end }}
labels:
env: dev

###############################################################################
# REDIS
###############################################################################
- job_name: "redis"
static_configs:
### PROD ###############
- targets:
{{- range $hostDetail := (ds "tailscale").Peer }}
{{- if and (coll.Has $hostDetail "Tags") (coll.Has $hostDetail.Tags "tag:env-prod") }}
{{- if or (coll.Has $hostDetail.Tags "tag:role-redis") }}
- "{{ $hostDetail.HostName }}:9187"
{{- end }}
{{- end }}
{{- end }}
labels:
env: prod
### DEV ###############
- targets:
{{- range $hostDetail := (ds "tailscale").Peer }}
{{- if and (coll.Has $hostDetail "Tags") (coll.Has $hostDetail.Tags "tag:env-dev") }}
{{- if or (coll.Has $hostDetail.Tags "tag:role-redis") }}
- "{{ $hostDetail.HostName }}:9187"
{{- end }}
{{- end }}
{{- end }}
labels:
env: dev

###############################################################################
# MASTODON STREAMING
###############################################################################
- job_name: "streaming"
static_configs:
- targets: ["prod-mastodon-streaming01:4000", "prod-mastodon-streaming01:4001", "prod-mastodon-streaming01:4002"]
labels:
env: "prod"
- targets: ["dev-mastodon01:4000"]
labels:
env: "dev"
metric_relabel_configs:
- source_labels: [ __name__ ]
target_label: __name__
regex: '(.*)'
action: replace
replacement: mastodon_streaming_${1}

###############################################################################
# SIDEKIQ
###############################################################################
- job_name: "sidekiq"
static_configs:
### PROD ###############
- targets:
{{- range $hostDetail := (ds "tailscale").Peer }}
{{- if and (coll.Has $hostDetail "Tags") (coll.Has $hostDetail.Tags "tag:env-prod") }}
{{- if or (coll.Has $hostDetail.Tags "tag:role-mastodon-sidekiq") }}
- "{{ $hostDetail.HostName }}:9187"
{{- end }}
{{- end }}
{{- end }}
labels:
env: prod
### DEV ###############
- targets:
{{- range $hostDetail := (ds "tailscale").Peer }}
{{- if and (coll.Has $hostDetail "Tags") (coll.Has $hostDetail.Tags "tag:env-dev") }}
{{- if or (coll.Has $hostDetail.Tags "tag:role-mastodon-sidekiq") }}
- "{{ $hostDetail.HostName }}:9187"
{{- end }}
{{- end }}
{{- end }}
labels:
env: dev
52 changes: 52 additions & 0 deletions files/usr/local/bin/prometheus-write-scrape-config-from-tailscale
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
#!/usr/bin/env bash

TAILSCALE_STATUS_CACHE=/var/cache/tailscale/status.json
PROMETHEUS_CONFIG_TEMPLATE=/etc/prometheus-tailscale-discovery/prometheus.yml.tpl
PROMETHEUS_RENDERED_CONFIG=/etc/prometheus/prometheus.rendered.yml
PROMETHEUS_CONFIG=/etc/prometheus/prometheus.yml
PROMETHEUS_CONFIG_BACKUP=/etc/prometheus/prometheus.yml.bak

# if template doesn't exist, bail
if [[ ! -f "${PROMETHEUS_CONFIG_TEMPLATE}" ]]; then
echo "ERROR - Prometheus config template does not exist at ${PROMETHEUS_CONFIG_TEMPLATE}. Exiting."
exit 1
fi

if [[ ! -f "$TAILSCALE_STATUS_CACHE" ]]; then
echo "ERROR - Tailscale status cache does not exist at ${TAILSCALE_STATUS_CACHE}. Exiting."
exit 1
fi

gomplate -f ${PROMETHEUS_CONFIG_TEMPLATE} -d tailscale=${TAILSCALE_STATUS_CACHE} > ${PROMETHEUS_RENDERED_CONFIG}

sha256() {
sha256sum "$1" | awk '{print $1}'
}

if [[ -f "${PROMETHEUS_CONFIG}" ]]; then
starting_hash="$(sha256 "${PROMETHEUS_CONFIG}")"
else
starting_hash="<does not exist>"
fi

echo "Starting Prometheus config sync from Tailscale"
echo "Original ${PROMETHEUS_CONFIG} file hash: ${starting_hash}"

ending_hash="$(sha256 "${PROMETHEUS_RENDERED_CONFIG}")"

if [[ "${starting_hash}" == "${ending_hash}" ]]; then
echo "No update"
else
echo "Config needs update. New file hash: ${ending_hash}"

echo "Backing up config to ${PROMETHEUS_CONFIG_BACKUP}"
cp ${PROMETHEUS_CONFIG} ${PROMETHEUS_CONFIG_BACKUP}

# TODO here we could run promtool to validate the config before copying it

echo "Copying rendered config to ${PROMETHEUS_CONFIG}"
cp ${PROMETHEUS_RENDERED_CONFIG} ${PROMETHEUS_CONFIG}

echo "Sending SIGHUP to Prometheus to reload config"
killall -HUP prometheus
fi
3 changes: 3 additions & 0 deletions files/usr/local/bin/tailscale-write-status-cache
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#!/usr/bin/env bash

tailscale status --json > /var/cache/tailscale/status.json
4 changes: 4 additions & 0 deletions requirements.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
---
collections:
- name: https://github.com/ansible-collections/community.general
type: git
Loading

0 comments on commit b86cc56

Please sign in to comment.