Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix snuba admin's query tracing to connect to right storage and query nodes #6268

Merged
merged 11 commits into from
Oct 24, 2024
24 changes: 23 additions & 1 deletion docker-compose.gcb.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ x-test-common: &test-common
- ".artifacts:/.artifacts"
environment:
SNUBA_SETTINGS: "$SNUBA_SETTINGS"
CLICKHOUSE_HOST: clickhouse
CLICKHOUSE_HOST: clickhouse.dev.ci
onkar marked this conversation as resolved.
Show resolved Hide resolved
USE_REDIS_CLUSTER: "1"
REDIS_HOST: "redis-cluster"
REDIS_PORT: 7000
Expand Down Expand Up @@ -71,6 +71,13 @@ services:
KAFKA_TOOLS_LOG4J_LOGLEVEL: "WARN"
clickhouse:
image: "${CLICKHOUSE_IMAGE:-ghcr.io/getsentry/image-mirror-altinity-clickhouse-server:23.3.19.33.altinitystable}"
hostname: clickhouse.dev.ci
extra_hosts:
- "clickhouse.dev.ci:127.0.0.1" # Add entry to /etc/hosts file
ports:
- "8123:8123"
- "9000:9000"
- "9009:9009"
volumes:
- ./config/clickhouse/macros.xml:/etc/clickhouse-server/config.d/macros.xml
- ./config/clickhouse/zookeeper.xml:/etc/clickhouse-server/config.d/zookeeper.xml
Expand All @@ -85,6 +92,9 @@ services:
depends_on:
- zookeeper
image: "${CLICKHOUSE_IMAGE:-ghcr.io/getsentry/image-mirror-altinity-clickhouse-server:23.3.19.33.altinitystable}"
hostname: clickhouse-query.dev.ci
extra_hosts:
- "clickhouse-query.dev.ci:127.0.0.1" # Add entry to /etc/hosts file
profiles: ["multi_node"]
volumes:
- ./test_distributed_migrations/config/clickhouse/zookeeper.xml:/etc/clickhouse-server/config.d/zookeeper.xml
Expand All @@ -97,6 +107,9 @@ services:
depends_on:
- zookeeper
image: "${CLICKHOUSE_IMAGE:-altinity/clickhouse-server:23.3.19.33.altinitystable}"
hostname: clickhouse-01.dev.ci
extra_hosts:
- "clickhouse-01.dev.ci:127.0.0.1" # Add entry to /etc/hosts file
profiles: ["multi_node"]
volumes:
- ./test_distributed_migrations/config/clickhouse/macros-01.xml:/etc/clickhouse-server/config.d/macros.xml
Expand All @@ -110,6 +123,9 @@ services:
depends_on:
- zookeeper
image: "${CLICKHOUSE_IMAGE:-altinity/clickhouse-server:23.3.19.33.altinitystable}"
hostname: clickhouse-02.dev.ci
extra_hosts:
- "clickhouse-02.dev.ci:127.0.0.1" # Add entry to /etc/hosts file
profiles: ["multi_node"]
volumes:
- ./test_distributed_migrations/config/clickhouse/macros-02.xml:/etc/clickhouse-server/config.d/macros.xml
Expand All @@ -123,6 +139,9 @@ services:
depends_on:
- zookeeper
image: "${CLICKHOUSE_IMAGE:-altinity/clickhouse-server:23.3.19.33.altinitystable}"
hostname: clickhouse-03.dev.ci
extra_hosts:
- "clickhouse-03.dev.ci:127.0.0.1" # Add entry to /etc/hosts file
profiles: ["multi_node"]
volumes:
- ./test_distributed_migrations/config/clickhouse/macros-03.xml:/etc/clickhouse-server/config.d/macros.xml
Expand All @@ -136,6 +155,9 @@ services:
depends_on:
- zookeeper
image: "${CLICKHOUSE_IMAGE:-ghcr.io/getsentry/image-mirror-altinity-clickhouse-server:23.3.19.33.altinitystable}"
hostname: clickhouse-04.dev.ci
extra_hosts:
- "clickhouse-04.dev.ci:127.0.0.1" # Add entry to /etc/hosts file
profiles: ["multi_node"]
volumes:
- ./test_distributed_migrations/config/clickhouse/macros-04.xml:/etc/clickhouse-server/config.d/macros.xml
Expand Down
79 changes: 54 additions & 25 deletions snuba/admin/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,11 @@

import io
import sys
import time
from contextlib import redirect_stdout
from dataclasses import asdict
from datetime import datetime
from typing import Any, List, Mapping, Optional, Sequence, Tuple, cast
from typing import Any, Dict, List, Mapping, Optional, Sequence, Tuple, cast

import sentry_sdk
import simplejson as json
Expand Down Expand Up @@ -498,14 +499,46 @@ def clickhouse_trace_query() -> Response:

profile_events_raw_sql = "SELECT ProfileEvents FROM system.query_log WHERE query_id = '{}' AND type = 'QueryFinish'"

for query_trace_data in parse_trace_for_query_ids(query_trace, storage):
for query_trace_data in parse_trace_for_query_ids(query_trace):
sql = profile_events_raw_sql.format(query_trace_data.query_id)
logger.info(
"Profile event gathering host: {}, port = {}, storage = {}, sql = {}, g.user = {}".format(
"Gathering profile event using host: {}, port = {}, storage = {}, sql = {}, g.user = {}".format(
query_trace_data.host, query_trace_data.port, storage, sql, g.user
)
)
# TODO: Onkar to add the profile event logic later.
system_query_result, counter = None, 0
while counter < 60:
onkar marked this conversation as resolved.
Show resolved Hide resolved
# There is a race between the trace query and the 'SELECT ProfileEvents...' query. ClickHouse does not immediately
# return the rows for 'SELECT ProfileEvents...' query. To make it return rows, sleep between the query executions.
system_query_result = run_system_query_on_host_with_sql(
query_trace_data.host,
int(query_trace_data.port),
storage,
sql,
False,
g.user,
)
if not system_query_result.results:
time.sleep(1)
counter += 1
else:
break

if system_query_result is not None and len(system_query_result.results) > 0:
query_trace.profile_events_meta.append(system_query_result.meta)
query_trace.profile_events_profile = cast(
Dict[str, int], system_query_result.profile
)
columns = system_query_result.meta
if columns:
res = {}
res["column_names"] = [name for name, _ in columns]
res["rows"] = []
for query_result in system_query_result.results:
if query_result[0]:
res["rows"].append(json.dumps(query_result[0]))
query_trace.profile_events_results[query_trace_data.node_name] = res

return make_response(jsonify(asdict(query_trace)), 200)
except InvalidCustomQuery as err:
return make_response(
Expand All @@ -520,41 +553,37 @@ def clickhouse_trace_query() -> Response:
400,
)
except ClickhouseError as err:
logger.error(err, exc_info=True)
onkar marked this conversation as resolved.
Show resolved Hide resolved
details = {
"type": "clickhouse",
"message": str(err),
"code": err.code,
}
return make_response(jsonify({"error": details}), 400)
except Exception as err:
logger.error(err, exc_info=True)
return make_response(
jsonify({"error": {"type": "unknown", "message": str(err)}}),
500,
)


def parse_trace_for_query_ids(
trace_output: TraceOutput, storage_key: str
) -> List[QueryTraceData]:
result = []
def parse_trace_for_query_ids(trace_output: TraceOutput) -> List[QueryTraceData]:
summarized_trace_output = trace_output.summarized_trace_output
storage_info = get_storage_info()
matched = next(
(info for info in storage_info if info["storage_name"] == storage_key), None
)
if matched is not None:
local_nodes = matched.get("local_nodes", [])
query_node = matched.get("query_node", None)
result = [
QueryTraceData(
host=local_nodes[0].get("host") if local_nodes else query_node.get("host"), # type: ignore
port=local_nodes[0].get("port") if local_nodes else query_node.get("port"), # type: ignore
query_id=query_summary.query_id,
node_name=node_name,
)
for node_name, query_summary in summarized_trace_output.query_summaries.items()
]
return result
node_name_to_query_id = {
node_name: query_summary.query_id
for node_name, query_summary in summarized_trace_output.query_summaries.items()
}
logger.info("node to query id mapping: {}".format(node_name_to_query_id))
return [
QueryTraceData(
host=node_name,
port=9000,
query_id=query_id,
node_name=node_name,
)
for node_name, query_id in node_name_to_query_id.items()
]


@application.route("/clickhouse_querylog_query", methods=["POST"])
Expand Down
2 changes: 1 addition & 1 deletion snuba/settings/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@

CLUSTERS: Sequence[Mapping[str, Any]] = [
{
"host": os.environ.get("CLICKHOUSE_HOST", "127.0.0.1"),
"host": os.environ.get("CLICKHOUSE_HOST", "clickhouse.dev.local"),
onkar marked this conversation as resolved.
Show resolved Hide resolved
"port": int(os.environ.get("CLICKHOUSE_PORT", 9000)),
"max_connections": int(os.environ.get("CLICKHOUSE_MAX_CONNECTIONS", 1)),
"block_connections": bool(
Expand Down
1 change: 1 addition & 0 deletions tests/admin/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,7 @@ def test_query_trace(admin_api: FlaskClient) -> None:
data = json.loads(response.data)
assert "<Debug> executeQuery" in data["trace_output"]
assert "summarized_trace_output" in data
assert "profile_events_results" in data


@pytest.mark.redis_db
Expand Down
Loading