Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions packages/elastic_agent/changelog.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
# newer versions go on top
- version: "2.6.4"
changes:
- description: Adds alerting rule templates
type: enhancement
link: https://github.com/elastic/integrations/pull/15572
- version: "2.6.3"
changes:
- description: Elastic Agent memory charts now prioritise RSS memory for more accurate usage reporting.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
{
"id": "elastic-agent-cpu-usage-spike-rule",
"type": "alerting_rule_template",
"attributes": {
"name": "[Elastic Agent] CPU usage spike",
"tags": ["Elastic Agent", "Resource Consumption"],
"ruleTypeId": ".es-query",
"schedule": {
"interval": "1m"
},
"params": {
"searchType": "esqlQuery",
"timeWindowSize": 7,
"timeWindowUnit": "m",
"threshold": [0],
"thresholdComparator": ">",
"size": 100,
"esqlQuery": {
"esql": "FROM metrics-*, *:metrics-*\n| WHERE process.executable RLIKE \".*[Ee]lastic.*[Aa]gent.*\" AND agent.name NOT LIKE \"*agentless*\"\n| STATS cpu_process_pct = MAX(system.process.cpu.total.pct) * 100\n BY elastic_agent.id, process.name,\n time_bucket = BUCKET(@timestamp, 1 minute)\n// Count the 1 minute timebuckets that are above 80% by process and agent\n| WHERE cpu_process_pct >= 80\n| STATS count_above_threshold = COUNT(*)\n BY elastic_agent.id, process.name\n// Alert if there are 5 or more occurences\n| WHERE count_above_threshold >= 5"
},
"aggType": "count",
"groupBy": "row",
"termSize": 5,
"sourceFields": [],
"timeField": "@timestamp",
"excludeHitsFromPreviousRun": true
},
"alertDelay": {
"active": 1
}
},
"coreMigrationVersion": "8.8.0",
"typeMigrationVersion": "10.1.0"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
{
"id": "elastic-agent-dropped-events",
"type": "alerting_rule_template",
"attributes": {
"name": "[Elastic Agent] Dropped events",
"tags": ["Elastic Agent", "Pipeline and Queues"],
"ruleTypeId": ".es-query",
"schedule": {
"interval": "1m"
},
"params": {
"searchType": "esqlQuery",
"timeWindowSize": 3,
"timeWindowUnit": "m",
"threshold": [0],
"thresholdComparator": ">",
"size": 100,
"esqlQuery": {
"esql": "FROM metrics-elastic_agent.*beat-default, *:metrics-elastic_agent.*beat-default*\n| WHERE data_stream.dataset LIKE \"elastic_agent.*beat\" AND agent.name NOT LIKE \"*agentless*\"\n| STATS \n events_dropped_max = max(to_long(beat.stats.libbeat.pipeline.events.dropped)),\n events_dropped_min = min(to_long(beat.stats.libbeat.pipeline.events.dropped)), \n pipeline_acked_max = max(to_long(beat.stats.libbeat.pipeline.queue.acked)), \n pipeline_acked_min = min(to_long(beat.stats.libbeat.pipeline.queue.acked)) \n BY time_bucket = DATE_TRUNC(1 minute, @timestamp), elastic_agent.id, component.id\n| EVAL \n events_dropped = events_dropped_max - events_dropped_min, \n events_acked = pipeline_acked_max - pipeline_acked_min\n| EVAL drop_pct = CASE(\n events_acked > 0, events_dropped / events_acked, \n 0\n)\n| WHERE drop_pct >= 0.05\n| STATS MAX(drop_pct) BY elastic_agent.id, component.id"
},
"aggType": "count",
"groupBy": "row",
"termSize": 5,
"sourceFields": [],
"timeField": "@timestamp",
"excludeHitsFromPreviousRun": true
},
"alertDelay": {
"active": 1
}
},
"coreMigrationVersion": "8.8.0",
"typeMigrationVersion": "10.1.0"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
{
"id": "elastic-agent-excessive-memory-usage-rule",
"type": "alerting_rule_template",
"attributes": {
"name": "[Elastic Agent] Excessive memory usage",
"tags": ["Elastic Agent", "Resource Consumption"],
"ruleTypeId": ".es-query",
"schedule": {
"interval": "1m"
},
"params": {
"searchType": "esqlQuery",
"timeWindowSize": 5,
"timeWindowUnit": "m",
"threshold": [0],
"thresholdComparator": ">",
"size": 100,
"esqlQuery": {
"esql": "FROM metrics-*, *:metrics-*\n| WHERE process.executable RLIKE \".*[Ee]lastic.*[Aa]gent.*\" AND agent.name NOT LIKE \"*agentless*\"\n| STATS max_memory_per_process = MAX(system.process.memory.rss.pct * 100) BY agent.id, process.name\n| STATS total_memory_usage = SUM(max_memory_per_process) BY agent.id\n| WHERE total_memory_usage > 50"
},
"aggType": "count",
"groupBy": "row",
"termSize": 5,
"sourceFields": [],
"timeField": "@timestamp",
"excludeHitsFromPreviousRun": true
},
"alertDelay": {
"active": 1
}
},
"coreMigrationVersion": "8.8.0",
"typeMigrationVersion": "10.1.0"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
{
"id": "elastic-agent-excessive-restarts",
"type": "alerting_rule_template",
"attributes": {
"name": "[Elastic Agent] Excessive restarts",
"tags": ["Elastic Agent"],
"ruleTypeId": ".es-query",
"schedule": {
"interval": "1m"
},
"params": {
"searchType": "esqlQuery",
"timeWindowSize": 5,
"timeWindowUnit": "m",
"threshold": [0],
"thresholdComparator": ">",
"size": 100,
"esqlQuery": {
"esql": "FROM metrics-*, *:metrics-*\n| WHERE process.executable RLIKE \".*[Ee]lastic.*[Aa]gent.*\" AND agent.name NOT LIKE \"*agentless*\"\n| STATS restart_count = COUNT_DISTINCT(process.cpu.start_time) BY host.name, process.name, bucket(@timestamp,5 minute) \n| WHERE restart_count > 10\n| STATS MAX(restart_count) BY host.name, process.name"
},
"aggType": "count",
"groupBy": "row",
"termSize": 5,
"sourceFields": [],
"timeField": "@timestamp",
"excludeHitsFromPreviousRun": true
},
"alertDelay": {
"active": 1
}
},
"coreMigrationVersion": "8.8.0",
"typeMigrationVersion": "10.1.0"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
{
"id": "elastic-agent-high-pipeline-queue",
"type": "alerting_rule_template",
"attributes": {
"name": "[Elastic Agent] High pipeline queue",
"tags": ["Elastic Agent", "Pipeline and Queues"],
"ruleTypeId": ".es-query",
"schedule": {
"interval": "1m"
},
"params": {
"searchType": "esqlQuery",
"timeWindowSize": 1,
"timeWindowUnit": "m",
"threshold": [0],
"thresholdComparator": ">",
"size": 100,
"esqlQuery": {
"esql": "FROM metrics-elastic_agent.*beat-default, *:metrics-elastic_agent.*beat-default*\n| WHERE data_stream.dataset LIKE \"elastic_agent.*beat\" AND agent.name NOT LIKE \"*agentless*\"\n| STATS pipeline_queue_pct = MAX(beat.stats.libbeat.pipeline.queue.filled.pct) * 100 BY elastic_agent.id, component.id\n| WHERE pipeline_queue_pct >= 90"
},
"aggType": "count",
"groupBy": "row",
"termSize": 5,
"sourceFields": [],
"timeField": "@timestamp",
"excludeHitsFromPreviousRun": true
},
"alertDelay": {
"active": 1
}
},
"coreMigrationVersion": "8.8.0",
"typeMigrationVersion": "10.1.0"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
{
"id": "elastic-agent-output-errors",
"type": "alerting_rule_template",
"attributes": {
"name": "[Elastic Agent] Output errors",
"tags": ["Elastic Agent", "Pipeline and Queues"],
"ruleTypeId": ".es-query",
"schedule": {
"interval": "1m"
},
"params": {
"searchType": "esqlQuery",
"timeWindowSize": 3,
"timeWindowUnit": "m",
"threshold": [0],
"thresholdComparator": ">",
"size": 100,
"esqlQuery": {
"esql": "FROM metrics-elastic_agent.*beat-default*, *:metrics-elastic_agent.*beat-default*\n| WHERE data_stream.dataset LIKE \"elastic_agent.*beat\" AND agent.name NOT LIKE \"*agentless*\"\n| STATS \n max_errors = MAX(TO_LONG(beat.stats.libbeat.output.write.errors)),\n min_errors = MIN(TO_LONG(beat.stats.libbeat.output.write.errors)) \n BY time_bucket = DATE_TRUNC(1 minute, @timestamp), elastic_agent.id, component.id\n| EVAL errors_count = max_errors - min_errors \n| WHERE errors_count > 5 \n| STATS MAX(errors_count) BY elastic_agent.id, component.id"
},
"aggType": "count",
"groupBy": "row",
"termSize": 5,
"sourceFields": [],
"timeField": "@timestamp",
"excludeHitsFromPreviousRun": true
},
"alertDelay": {
"active": 1
}
},
"coreMigrationVersion": "8.8.0",
"typeMigrationVersion": "10.1.0"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
{
"id": "elastic-agent-unhealthy-status",
"type": "alerting_rule_template",
"attributes": {
"name": "[Elastic Agent] Unhealthy status",
"tags": ["Elastic Agent"],
"ruleTypeId": ".es-query",
"schedule": {
"interval": "1m"
},
"params": {
"searchType": "esqlQuery",
"timeWindowSize": 5,
"timeWindowUnit": "m",
"threshold": [0],
"thresholdComparator": ">",
"size": 100,
"esqlQuery": {
"esql": "FROM logs-elastic_agent.status_change-default, *:logs-elastic_agent.status_change-default\n| WHERE data_stream.dataset == \"elastic_agent.status_change\" and agentless == false and status == \"error\""
},
"aggType": "count",
"groupBy": "row",
"termSize": 5,
"sourceFields": [],
"timeField": "@timestamp",
"excludeHitsFromPreviousRun": true
},
"alertDelay": {
"active": 1
}
},
"coreMigrationVersion": "8.8.0",
"typeMigrationVersion": "10.1.0"
}
4 changes: 2 additions & 2 deletions packages/elastic_agent/manifest.yml
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
name: elastic_agent
title: Elastic Agent
version: 2.6.3
version: 2.6.4
description: Collect logs and metrics from Elastic Agents.
type: integration
format_version: 3.1.4
format_version: 3.5.0
categories: ["elastic_stack"]
conditions:
kibana:
Expand Down