Skip to content
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions packages/elastic_agent/changelog.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
# newer versions go on top
- version: "2.6.4"
changes:
- description: Adds alerting rule templates
type: enhancement
link: https://github.com/elastic/integrations/pull/15572
- version: "2.6.3"
changes:
- description: Elastic Agent memory charts now prioritise RSS memory for more accurate usage reporting.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
{
"id": "elastic-agent-cpu-usage-spike-rule",
"type": "alerting_rule_template",
"attributes": {
"name": "[Elastic Agent] CPU usage spike",
"tags": ["Elastic Agent", "Resource Consumption"],
"ruleTypeId": ".es-query",
"schedule": {
"interval": "1m"
},
"params": {
"searchType": "esqlQuery",
"timeWindowSize": 7,
"timeWindowUnit": "m",
"threshold": [0],
"thresholdComparator": ">",
"size": 100,
"esqlQuery": {
"esql": "FROM metrics-*\n | WHERE process.executable LIKE \"*elastic*agent*\"\n | STATS cpu_process_pct = MAX(system.process.cpu.total.pct) * 100\n BY elastic_agent.id, process.name,\n time_bucket = BUCKET(@timestamp, 1 minute)\n // Count the 1 minute timebuckets that are above 80% by process and agent\n | WHERE cpu_process_pct >= 80\n | STATS count_above_threshold = COUNT(*)\n BY elastic_agent.id, process.name\n // Alert if there are 5 or more occurences\n | WHERE count_above_threshold >= 5"
},
"aggType": "count",
"groupBy": "row",
"termSize": 5,
"sourceFields": [],
"timeField": "@timestamp",
"excludeHitsFromPreviousRun": true
},
"alertDelay": {
"active": 1
}
},
"coreMigrationVersion": "8.8.0",
"typeMigrationVersion": "10.1.0"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
{
"id": "elastic-agent-dropped-events",
"type": "alerting_rule_template",
"attributes": {
"name": "[Elastic Agent] Dropped events",
"tags": ["Elastic Agent", "Pipeline and Queues"],
"ruleTypeId": ".es-query",
"schedule": {
"interval": "1m"
},
"params": {
"searchType": "esqlQuery",
"timeWindowSize": 5,
"timeWindowUnit": "m",
"threshold": [0],
"thresholdComparator": ">",
"size": 100,
"esqlQuery": {
"esql": "TS metrics-*\n| WHERE data_stream.dataset LIKE \"elastic_agent.*beat\"\n| STATS events_dropped_rate = max(rate(beat.stats.libbeat.pipeline.events.dropped)), pipeline_acked_rate = max(rate(beat.stats.libbeat.pipeline.queue.acked)) BY time_bucket = bucket(@timestamp,5minute), elastic_agent.id, component.id\n| EVAL percent_drop_rate = (events_dropped_rate / pipeline_acked_rate)\n| WHERE percent_drop_rate >= 0.05\n\n"
},
"aggType": "count",
"groupBy": "all",
"termSize": 5,
"sourceFields": [],
"timeField": "@timestamp",
"excludeHitsFromPreviousRun": true
},
"alertDelay": {
"active": 1
}
},
"coreMigrationVersion": "8.8.0",
"typeMigrationVersion": "10.1.0"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
{
"id": "elastic-agent-excessive-memory-usage-rule",
"type": "alerting_rule_template",
"attributes": {
"name": "[Elastic Agent] Excessive memory usage",
"tags": ["Elastic Agent", "Resource Consumption"],
"ruleTypeId": ".es-query",
"schedule": {
"interval": "1m"
},
"params": {
"searchType": "esqlQuery",
"timeWindowSize": 5,
"timeWindowUnit": "m",
"threshold": [0],
"thresholdComparator": ">",
"size": 100,
"esqlQuery": {
"esql": "FROM metrics-*\n| WHERE process.executable LIKE \"*elastic*agent*\"\n| STATS max_memory_per_process = MAX(system.process.memory.rss.pct * 100) BY agent.id, process.name\n| STATS total_memory_usage = SUM(max_memory_per_process) BY agent.id\n| WHERE total_memory_usage > 50"
},
"aggType": "count",
"groupBy": "row",
"termSize": 5,
"sourceFields": [],
"timeField": "@timestamp",
"excludeHitsFromPreviousRun": true
},
"alertDelay": {
"active": 1
}
},
"coreMigrationVersion": "8.8.0",
"typeMigrationVersion": "10.1.0"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
{
"id": "elastic-agent-excessive-restarts",
"type": "alerting_rule_template",
"attributes": {
"name": "[Elastic Agent] Excessive restarts",
"tags": ["Elastic Agent"],
"ruleTypeId": ".es-query",
"schedule": {
"interval": "1m"
},
"params": {
"searchType": "esqlQuery",
"timeWindowSize": 5,
"timeWindowUnit": "m",
"threshold": [0],
"thresholdComparator": ">",
"size": 100,
"esqlQuery": {
"esql": "FROM metrics-* \n| WHERE process.executable LIKE \"*elastic*agent*\"\n| STATS restart_count = COUNT_DISTINCT(process.cpu.start_time) BY elastic_agent.id, process.name, bucket(@timestamp,5min) \n| WHERE restart_count > 10"
},
"aggType": "count",
"groupBy": "row",
"termSize": 5,
"sourceFields": [],
"timeField": "@timestamp",
"excludeHitsFromPreviousRun": true
},
"alertDelay": {
"active": 1
}
},
"coreMigrationVersion": "8.8.0",
"typeMigrationVersion": "10.1.0"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
{
"id": "elastic-agent-high-pipeline-queue",
"type": "alerting_rule_template",
"attributes": {
"name": "[Elastic Agent] High pipeline queue",
"tags": ["Elastic Agent", "Pipeline and Queues"],
"ruleTypeId": ".es-query",
"schedule": {
"interval": "1m"
},
"params": {
"searchType": "esqlQuery",
"timeWindowSize": 5,
"timeWindowUnit": "m",
"threshold": [0],
"thresholdComparator": ">",
"size": 100,
"esqlQuery": {
"esql": "TS metrics-*\n| WHERE data_stream.dataset == \"elastic_agent.*beat\"\n| STATS pipeline_queue_pct = MAX(beat.stats.libbeat.pipeline.queue.filled.pct) * 100 BY elastic_agent.id, process.name\n| WHERE pipeline_queue_pct >= 90"
},
"aggType": "count",
"groupBy": "row",
"termSize": 5,
"sourceFields": [],
"timeField": "@timestamp",
"excludeHitsFromPreviousRun": true
},
"alertDelay": {
"active": 1
}
},
"coreMigrationVersion": "8.8.0",
"typeMigrationVersion": "10.1.0"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
{
"id": "elastic-agent-output-errors",
"type": "alerting_rule_template",
"attributes": {
"name": "[Elastic Agent] Output errors",
"tags": ["Elastic Agent", "Pipeline and Queues"],
"ruleTypeId": ".es-query",
"schedule": {
"interval": "1m"
},
"params": {
"searchType": "esqlQuery",
"timeWindowSize": 10,
"timeWindowUnit": "m",
"threshold": [0],
"thresholdComparator": ">",
"size": 100,
"esqlQuery": {
"esql": "TS metrics-*\n| WHERE data_stream.dataset LIKE \"elastic_agent.*beat\"\n| STATS errors_rate = MAX(RATE(beat.stats.libbeat.output.write.errors)) BY time_bucket = BUCKET(@timestamp,5minute), elastic_agent.id, component.id\n| EVAL errors_per_min = errors_rate * 60\n| WHERE errors_per_min > 5"
},
"aggType": "count",
"groupBy": "row",
"termSize": 5,
"sourceFields": [],
"timeField": "@timestamp",
"excludeHitsFromPreviousRun": true
},
"alertDelay": {
"active": 1
}
},
"coreMigrationVersion": "8.8.0",
"typeMigrationVersion": "10.1.0"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
{
"id": "elastic-agent-unhealthy-status",
"type": "alerting_rule_template",
"attributes": {
"name": "[Elastic Agent] Unhealthy status",
"tags": ["Elastic Agent"],
"ruleTypeId": ".es-query",
"schedule": {
"interval": "1m"
},
"params": {
"searchType": "esqlQuery",
"timeWindowSize": 5,
"timeWindowUnit": "m",
"threshold": [0],
"thresholdComparator": ">",
"size": 100,
"esqlQuery": {
"esql": "FROM logs-* \n| WHERE data_stream.dataset == \"elastic_agent.status_change\" and agentless == false and status == \"error\""
},
"aggType": "count",
"groupBy": "row",
"termSize": 5,
"sourceFields": [],
"timeField": "@timestamp",
"excludeHitsFromPreviousRun": true
},
"alertDelay": {
"active": 1
}
},
"coreMigrationVersion": "8.8.0",
"typeMigrationVersion": "10.1.0"
}
4 changes: 2 additions & 2 deletions packages/elastic_agent/manifest.yml
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
name: elastic_agent
title: Elastic Agent
version: 2.6.3
version: 2.6.4
description: Collect logs and metrics from Elastic Agents.
type: integration
format_version: 3.1.4
format_version: 3.5.0
categories: ["elastic_stack"]
conditions:
kibana:
Expand Down