From ac11f0cd3fb468c22c14be7465b6e7d508be964f Mon Sep 17 00:00:00 2001 From: daeny Date: Wed, 29 Jan 2025 09:25:49 -0500 Subject: [PATCH 1/8] [WIP] Update Automatic multi-line aggregation part in content/en/agent/logs/advanced_log_collection.md --- .../en/agent/logs/advanced_log_collection.md | 168 ++++++++++++++---- go.mod | 2 +- go.sum | 4 +- 3 files changed, 135 insertions(+), 39 deletions(-) diff --git a/content/en/agent/logs/advanced_log_collection.md b/content/en/agent/logs/advanced_log_collection.md index f548c16cf9d89..4ee427e7a92b4 100644 --- a/content/en/agent/logs/advanced_log_collection.md +++ b/content/en/agent/logs/advanced_log_collection.md @@ -462,8 +462,12 @@ More examples: | 2020-10-27 05:10:49.657 | `\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2}\.\d{3}` | | {"date": "2018-01-02" | `\{"date": "\d{4}-\d{2}-\d{2}` | -### Automatic multi-line aggregation -With Agent 7.37+, `auto_multi_line_detection` can be enabled, which allows the Agent to detect [common multi-line patterns][3] automatically. +### Global Automatic multi-line aggregation +With Agent 7.37+, `auto_multi_line_detection` can be enabled, which allows the Agent to detect [common multi-line patterns][3] automatically for **all** log integrations it sets up. + + +{{< tabs >}} +{{% tab "Configuration file" %}} Enable `auto_multi_line_detection` globally in the `datadog.yaml` file: @@ -472,48 +476,66 @@ logs_config: auto_multi_line_detection: true ``` -For containerized deployments, you can enable `auto_multi_line_detection` with the `DD_LOGS_CONFIG_AUTO_MULTI_LINE_DETECTION=true` environment variable. +{{% /tab %}} +{{% tab "Docker" %}} -It can also be enabled or disabled (overriding the global config) per log configuration: +Use the environment variable `DD_LOGS_CONFIG_AUTO_MULTI_LINE_DETECTION` in Datadog Agent container to configure global automatic multi-line aggregation rule, for example: -{{< tabs >}} -{{% tab "Configuration file" %}} +```shell +DD_LOGS_CONFIG_AUTO_MULTI_LINE_DETECTION=true +``` + +{{% /tab %}} +{{% tab "Kubernetes" %}} + +#### Operator +Use the `spec.override.nodeAgent.env` parameter in your Datadog Operator manifest to set the `DD_LOGS_CONFIG_AUTO_MULTI_LINE_DETECTION` environment variable to configure a global automatic multi-line aggregation rule. For example: ```yaml -logs: - - type: file - path: /my/test/file.log - service: testApp - source: java - auto_multi_line_detection: true +spec: + override: + nodeAgent: + env: + - name: DD_LOGS_CONFIG_AUTO_MULTI_LINE_DETECTION + value: "true" ``` -Automatic multi-line detection uses a list of common regular expressions to attempt to match logs. If the built-in list is not sufficient, you can also add custom patterns in the `datadog.yaml` file: +#### Helm +Use the `datadog.logs.autoMultiLineDetection` option in the Helm chart to configure a global automatic multi-line aggregation rule. For example: ```yaml -logs_config: - auto_multi_line_detection: true - auto_multi_line_extra_patterns: - - \d{4}\-(0?[1-9]|1[012])\-(0?[1-9]|[12][0-9]|3[01]) - - '[A-Za-z_]+ \d+, \d+ \d+:\d+:\d+ (AM|PM)' +datadog: + logs: + enabled: true + autoMultiLineDetection: true ``` -If no pattern meets the line match threshold, add the `auto_multi_line_default_match_threshold` parameter with a lower value. This configures a threshold value that determines how frequently logs have to match in order for the auto multi-line aggregation to work. To find the current threshold value run the [agent `status` command][1]. + +{{% /tab %}} +{{< /tabs >}} + +### Enable multi-line aggregation per integration +Alternatively, this can be enabled or disabled (overriding the global config) per log integration setup. + +{{< tabs >}} +{{% tab "Configuration file" %}} + +In a Host environment, enable `auto_multi_line_detection` with [Custom log collection][1] method. For example: + +[1]: https://docs.datadoghq.com/agent/logs/?tab=tailfiles#custom-log-collection ```yaml -logs_config: - auto_multi_line_detection: true - auto_multi_line_extra_patterns: - - \d{4}\-(0?[1-9]|1[012])\-(0?[1-9]|[12][0-9]|3[01]) - - '[A-Za-z_]+ \d+, \d+ \d+:\d+:\d+ (AM|PM)' - auto_multi_line_default_match_threshold: 0.1 +logs: + - type: file + path: /my/test/file.log + service: testApp + source: java + auto_multi_line_detection: true ``` - -[1]: https://docs.datadoghq.com/agent/configuration/agent-commands/#agent-information {{% /tab %}} {{% tab "Docker" %}} -In a Docker environment, use the label `com.datadoghq.ad.logs` on your container to specify the `log_processing_rules`. For example: +In a Docker environment, use the label `com.datadoghq.ad.logs` on your container to specify the log configuration. For example: ```yaml labels: @@ -524,14 +546,10 @@ In a Docker environment, use the label `com.datadoghq.ad.logs` on your container "auto_multi_line_detection": true }] ``` -Automatic multi-line detection uses a list of common regular expressions to attempt to match logs. If the built-in list is not sufficient, you can also add custom patterns in the `datadog.yaml` file with the `DD_LOGS_CONFIG_AUTO_MULTI_LINE_EXTRA_PATTERNS` environment variable. - -If no pattern meets the line match threshold, add the `DD_LOGS_CONFIG_AUTO_MULTI_LINE_DEFAULT_MATCH_THRESHOLD` environment variable with a lower value. This configures a threshold value that determines how frequently logs have to match in order for the auto multi-line aggregation to work. To find the current threshold value run the [agent `status` command][1]. - -[1]: https://docs.datadoghq.com/agent/configuration/agent-commands/#agent-information {{% /tab %}} {{% tab "Kubernetes" %}} +In a Kubernetes environment, use the annotation `ad.datadoghq.com/.logs` on your pod to specify the log configuration. For example: ```yaml apiVersion: apps/v1 @@ -558,17 +576,95 @@ spec: - name: '' image: testApp:latest ``` +{{% /tab %}} +{{< /tabs >}} + +### Customizing multi-line aggregation configuration +Automatic multi-line detection uses a list of [common regular expressions][1] to attempt to match logs. If the built-in list is not sufficient, you can also add custom patterns and thresholds for detection. + +[1]:https://github.com/DataDog/datadog-agent/blob/a27c16c05da0cf7b09d5a5075ca568fdae1b4ee0/pkg/logs/internal/decoder/auto_multiline_handler.go#L187 +{{< tabs >}} +{{% tab "Configuration file" %}} +In a configuration file, add the `auto_multi_line_extra_patterns` to your `datadog.yaml` like so +```yaml +logs_config: + auto_multi_line_detection: true + auto_multi_line_extra_patterns: + - \d{4}\-(0?[1-9]|1[012])\-(0?[1-9]|[12][0-9]|3[01]) + - '[A-Za-z_]+ \d+, \d+ \d+:\d+:\d+ (AM|PM)' +``` + +If no pattern meets the line match threshold, add the `auto_multi_line_default_match_threshold` parameter with a lower value. This configures a threshold value that determines how frequently logs have to match in order for the auto multi-line aggregation to work. To find the current threshold value run the [agent `status` command][1]. + +```yaml +logs_config: + auto_multi_line_detection: true + auto_multi_line_extra_patterns: + - \d{4}\-(0?[1-9]|1[012])\-(0?[1-9]|[12][0-9]|3[01]) + - '[A-Za-z_]+ \d+, \d+ \d+:\d+:\d+ (AM|PM)' + auto_multi_line_default_match_threshold: 0.1 +``` + +[1]: https://docs.datadoghq.com/agent/configuration/agent-commands/#agent-information +{{% /tab %}} +{{% tab "Docker" %}} +In a containerized Agent, add the environment variable `DD_LOGS_CONFIG_AUTO_MULTI_LINE_EXTRA_PATTERNS` like so + +```yaml + environment: + - DD_LOGS_CONFIG_AUTO_MULTI_LINE_DETECTION=true + - DD_LOGS_CONFIG_AUTO_MULTI_LINE_EXTRA_PATTERNS=\d{4}-(0?[1-9]|1[012])-(0?[1-9]|[12][0-9]|3[01]) [A-Za-z_]+\s\d+,\s\d+\s\d+:\d+:\d+\s(AM|PM) +``` + +If no pattern meets the line match threshold, add the `DD_LOGS_CONFIG_AUTO_MULTI_LINE_DEFAULT_MATCH_THRESHOLD` environment variable with a lower value. This configures a threshold value that determines how frequently logs have to match in order for the auto multi-line aggregation to work. To find the current threshold value run the [agent `status` command][1]. + +```yaml + environment: + - DD_LOGS_CONFIG_AUTO_MULTI_LINE_DETECTION=true + - DD_LOGS_CONFIG_AUTO_MULTI_LINE_EXTRA_PATTERNS=\d{4}-(0?[1-9]|1[012])-(0?[1-9]|[12][0-9]|3[01]) [A-Za-z_]+\s\d+,\s\d+\s\d+:\d+:\d+\s(AM|PM) + - DD_LOGS_CONFIG_AUTO_MULTI_LINE_DEFAULT_MATCH_THRESHOLD=0.1 +``` -Automatic multi-line detection uses a list of common regular expressions to attempt to match logs. If the built-in list is not sufficient, you can also add custom patterns in the `datadog.yaml` file with the `DD_LOGS_CONFIG_AUTO_MULTI_LINE_EXTRA_PATTERNS` environment variable. +**Note**: The Datadog Agent interpret spaces in the `DD_LOGS_CONFIG_AUTO_MULTI_LINE_EXTRA_PATTERNS` environment variable as separators between multiple patterns. You can see the two regex patterns are divided by a space and `\s` is used in the second regex pattern to match for spaces. +[1]: https://docs.datadoghq.com/agent/configuration/agent-commands/#agent-information + +{{% /tab %}} +{{% tab "Kubernetes" %}} +In Kubernetes, add the environment variable `DD_LOGS_CONFIG_AUTO_MULTI_LINE_EXTRA_PATTERNS` like so If no pattern meets the line match threshold, add the `DD_LOGS_CONFIG_AUTO_MULTI_LINE_DEFAULT_MATCH_THRESHOLD` environment variable with a lower value. This configures a threshold value that determines how frequently logs have to match in order for the auto multi-line aggregation to work. To find the current threshold value run the [agent `status` command][1]. [1]: https://docs.datadoghq.com/agent/configuration/agent-commands/#agent-information +#### Operator + +```yaml +spec: + override: + nodeAgent: + env: + - name: DD_LOGS_CONFIG_AUTO_MULTI_LINE_EXTRA_PATTERNS + value: \d{4}-(0?[1-9]|1[012])-(0?[1-9]|[12][0-9]|3[01]) [A-Za-z_]+\s\d+,\s\d+\s\d+:\d+:\d+\s(AM|PM) + - name: DD_LOGS_CONFIG_AUTO_MULTI_LINE_DEFAULT_MATCH_THRESHOLD + value: "0.1" +``` + +#### Helm + +```yaml +datadog: + env: + - name: DD_LOGS_CONFIG_AUTO_MULTI_LINE_EXTRA_PATTERNS + value: \d{4}-(0?[1-9]|1[012])-(0?[1-9]|[12][0-9]|3[01]) [A-Za-z_]+\s\d+,\s\d+\s\d+:\d+:\d+\s(AM|PM) + - name: DD_LOGS_CONFIG_AUTO_MULTI_LINE_DEFAULT_MATCH_THRESHOLD + value: "0.1" +``` + +**Note**: The Datadog Agent interpret spaces in the `DD_LOGS_CONFIG_AUTO_MULTI_LINE_EXTRA_PATTERNS` environment variable as separators between multiple patterns. You can see the two regex patterns are divided by a space and `\s` is used in the second regex pattern to match for spaces. {{% /tab %}} {{< /tabs >}} -With this feature enabled, when a new log file is opened the Agent tries to detect a pattern. During this process the logs are sent as single lines. After the detection threshold is met, all future logs for that source are aggregated with the detected pattern, or as single lines if no pattern is found. Detection takes at most 30 seconds or the first 500 logs (whichever comes first). +With this feature enabled, when a new log file is opened the Agent first tries to detect a pattern. This detection process takes at most 30 seconds or the first 500 logs (whichever comes first). During this process the logs are sent as single lines. After the detection threshold is met all future logs for that source are aggregated with the best matching pattern, or as single lines if no pattern is found. **Note**: If you can control the naming pattern of the rotated log, ensure that the rotated file replaces the previously active file with the same name. The Agent reuses a previously detected pattern on the newly rotated file to avoid re-running detection. @@ -734,4 +830,4 @@ All the logs collected by the Datadog Agent are impacted by the global processin [3]: https://github.com/DataDog/datadog-agent/blob/a27c16c05da0cf7b09d5a5075ca568fdae1b4ee0/pkg/logs/internal/decoder/auto_multiline_handler.go#L187 [4]: /agent/faq/commonly-used-log-processing-rules [5]: /agent/configuration/agent-configuration-files/#agent-main-configuration-file -[6]: /agent/configuration/agent-commands/#agent-information +[6]: /agent/configuration/agent-commands/#agent-information \ No newline at end of file diff --git a/go.mod b/go.mod index c359c2e4fe72c..5c72783195cdd 100644 --- a/go.mod +++ b/go.mod @@ -4,7 +4,7 @@ go 1.14 require ( github.com/DataDog/websites-modules v1.4.205 // indirect - github.com/DataDog/websites-sources v0.0.0-20250117185032-4a2553fdc82d // indirect + github.com/DataDog/websites-sources v0.0.0-20250128173555-97fa5be65b39 // indirect ) // replace github.com/DataDog/websites-modules => /Users/matt.fitzsimmons/source/websites-modules diff --git a/go.sum b/go.sum index 7c120ce473117..78c3fd6aa8be0 100644 --- a/go.sum +++ b/go.sum @@ -1,4 +1,4 @@ github.com/DataDog/websites-modules v1.4.205 h1:OmmyizpEpO+oEWC3pF0Gl9C/UcIo2bETKhdcvUH0848= github.com/DataDog/websites-modules v1.4.205/go.mod h1:CcQxAmCXoiFr3hNw6Q+1si65C3uOP1gB+7aX4S3h+CQ= -github.com/DataDog/websites-sources v0.0.0-20250117185032-4a2553fdc82d h1:1lr/eHDZbd5deqAdEK6TLFuDbwX7QVy8Z8NVVFmlXCI= -github.com/DataDog/websites-sources v0.0.0-20250117185032-4a2553fdc82d/go.mod h1:RvGhXV0uQC6Ocs+n84QyL97kows6vg6VG5ZLQMHw4Fs= +github.com/DataDog/websites-sources v0.0.0-20250128173555-97fa5be65b39 h1:77xjDJBhRwqG5OMAQNmqg1718nsm2fjbO6voNpAU2co= +github.com/DataDog/websites-sources v0.0.0-20250128173555-97fa5be65b39/go.mod h1:RvGhXV0uQC6Ocs+n84QyL97kows6vg6VG5ZLQMHw4Fs= From a8f89502347a08747f87a56ef21855ec2ccb6574 Mon Sep 17 00:00:00 2001 From: daenypark <118874777+daenypark@users.noreply.github.com> Date: Wed, 29 Jan 2025 09:43:34 -0500 Subject: [PATCH 2/8] Update advanced_log_collection.md --- .../en/agent/logs/advanced_log_collection.md | 29 ++++++++++++++++--- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/content/en/agent/logs/advanced_log_collection.md b/content/en/agent/logs/advanced_log_collection.md index 4ee427e7a92b4..8e7a8ce32ce33 100644 --- a/content/en/agent/logs/advanced_log_collection.md +++ b/content/en/agent/logs/advanced_log_collection.md @@ -585,7 +585,7 @@ Automatic multi-line detection uses a list of [common regular expressions][1] to [1]:https://github.com/DataDog/datadog-agent/blob/a27c16c05da0cf7b09d5a5075ca568fdae1b4ee0/pkg/logs/internal/decoder/auto_multiline_handler.go#L187 {{< tabs >}} {{% tab "Configuration file" %}} -In a configuration file, add the `auto_multi_line_extra_patterns` to your `datadog.yaml` like so +In a configuration file, add the `auto_multi_line_extra_patterns` to your `datadog.yaml` like so: ```yaml logs_config: auto_multi_line_detection: true @@ -608,7 +608,7 @@ logs_config: [1]: https://docs.datadoghq.com/agent/configuration/agent-commands/#agent-information {{% /tab %}} {{% tab "Docker" %}} -In a containerized Agent, add the environment variable `DD_LOGS_CONFIG_AUTO_MULTI_LINE_EXTRA_PATTERNS` like so +In a containerized Agent, add the environment variable `DD_LOGS_CONFIG_AUTO_MULTI_LINE_EXTRA_PATTERNS` like so: ```yaml environment: @@ -631,7 +631,27 @@ If no pattern meets the line match threshold, add the `DD_LOGS_CONFIG_AUTO_MULTI {{% /tab %}} {{% tab "Kubernetes" %}} -In Kubernetes, add the environment variable `DD_LOGS_CONFIG_AUTO_MULTI_LINE_EXTRA_PATTERNS` like so +In Kubernetes, add the environment variable `DD_LOGS_CONFIG_AUTO_MULTI_LINE_EXTRA_PATTERNS` like so: + +#### Operator + +```yaml +spec: + override: + nodeAgent: + env: + - name: DD_LOGS_CONFIG_AUTO_MULTI_LINE_EXTRA_PATTERNS + value: \d{4}-(0?[1-9]|1[012])-(0?[1-9]|[12][0-9]|3[01]) [A-Za-z_]+\s\d+,\s\d+\s\d+:\d+:\d+\s(AM|PM) +``` + +#### Helm + +```yaml +datadog: + env: + - name: DD_LOGS_CONFIG_AUTO_MULTI_LINE_EXTRA_PATTERNS + value: \d{4}-(0?[1-9]|1[012])-(0?[1-9]|[12][0-9]|3[01]) [A-Za-z_]+\s\d+,\s\d+\s\d+:\d+:\d+\s(AM|PM) +``` If no pattern meets the line match threshold, add the `DD_LOGS_CONFIG_AUTO_MULTI_LINE_DEFAULT_MATCH_THRESHOLD` environment variable with a lower value. This configures a threshold value that determines how frequently logs have to match in order for the auto multi-line aggregation to work. To find the current threshold value run the [agent `status` command][1]. [1]: https://docs.datadoghq.com/agent/configuration/agent-commands/#agent-information @@ -661,6 +681,7 @@ datadog: ``` **Note**: The Datadog Agent interpret spaces in the `DD_LOGS_CONFIG_AUTO_MULTI_LINE_EXTRA_PATTERNS` environment variable as separators between multiple patterns. You can see the two regex patterns are divided by a space and `\s` is used in the second regex pattern to match for spaces. + {{% /tab %}} {{< /tabs >}} @@ -830,4 +851,4 @@ All the logs collected by the Datadog Agent are impacted by the global processin [3]: https://github.com/DataDog/datadog-agent/blob/a27c16c05da0cf7b09d5a5075ca568fdae1b4ee0/pkg/logs/internal/decoder/auto_multiline_handler.go#L187 [4]: /agent/faq/commonly-used-log-processing-rules [5]: /agent/configuration/agent-configuration-files/#agent-main-configuration-file -[6]: /agent/configuration/agent-commands/#agent-information \ No newline at end of file +[6]: /agent/configuration/agent-commands/#agent-information From 74e7f4468567f69d13801c0ed4fc93d460b1f031 Mon Sep 17 00:00:00 2001 From: daenypark <118874777+daenypark@users.noreply.github.com> Date: Wed, 29 Jan 2025 12:56:06 -0500 Subject: [PATCH 3/8] revert go.mod back to original. --- go.mod | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go.mod b/go.mod index 5c72783195cdd..c359c2e4fe72c 100644 --- a/go.mod +++ b/go.mod @@ -4,7 +4,7 @@ go 1.14 require ( github.com/DataDog/websites-modules v1.4.205 // indirect - github.com/DataDog/websites-sources v0.0.0-20250128173555-97fa5be65b39 // indirect + github.com/DataDog/websites-sources v0.0.0-20250117185032-4a2553fdc82d // indirect ) // replace github.com/DataDog/websites-modules => /Users/matt.fitzsimmons/source/websites-modules From d64d770a83e03399b48301372d658df30a87cc90 Mon Sep 17 00:00:00 2001 From: daenypark <118874777+daenypark@users.noreply.github.com> Date: Wed, 29 Jan 2025 12:57:29 -0500 Subject: [PATCH 4/8] revert go.sum back to original. --- go.sum | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/go.sum b/go.sum index 78c3fd6aa8be0..7c120ce473117 100644 --- a/go.sum +++ b/go.sum @@ -1,4 +1,4 @@ github.com/DataDog/websites-modules v1.4.205 h1:OmmyizpEpO+oEWC3pF0Gl9C/UcIo2bETKhdcvUH0848= github.com/DataDog/websites-modules v1.4.205/go.mod h1:CcQxAmCXoiFr3hNw6Q+1si65C3uOP1gB+7aX4S3h+CQ= -github.com/DataDog/websites-sources v0.0.0-20250128173555-97fa5be65b39 h1:77xjDJBhRwqG5OMAQNmqg1718nsm2fjbO6voNpAU2co= -github.com/DataDog/websites-sources v0.0.0-20250128173555-97fa5be65b39/go.mod h1:RvGhXV0uQC6Ocs+n84QyL97kows6vg6VG5ZLQMHw4Fs= +github.com/DataDog/websites-sources v0.0.0-20250117185032-4a2553fdc82d h1:1lr/eHDZbd5deqAdEK6TLFuDbwX7QVy8Z8NVVFmlXCI= +github.com/DataDog/websites-sources v0.0.0-20250117185032-4a2553fdc82d/go.mod h1:RvGhXV0uQC6Ocs+n84QyL97kows6vg6VG5ZLQMHw4Fs= From c057495b739685d7e2ac345d1928dc29f08a4d0e Mon Sep 17 00:00:00 2001 From: daenypark <118874777+daenypark@users.noreply.github.com> Date: Wed, 29 Jan 2025 13:00:56 -0500 Subject: [PATCH 5/8] Apply suggestions from code review Review from Jack makes sense. Co-authored-by: JacksonDavenport --- content/en/agent/logs/advanced_log_collection.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/content/en/agent/logs/advanced_log_collection.md b/content/en/agent/logs/advanced_log_collection.md index 8e7a8ce32ce33..20b164ba680d5 100644 --- a/content/en/agent/logs/advanced_log_collection.md +++ b/content/en/agent/logs/advanced_log_collection.md @@ -462,7 +462,7 @@ More examples: | 2020-10-27 05:10:49.657 | `\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2}\.\d{3}` | | {"date": "2018-01-02" | `\{"date": "\d{4}-\d{2}-\d{2}` | -### Global Automatic multi-line aggregation +### Global automatic multi-line aggregation With Agent 7.37+, `auto_multi_line_detection` can be enabled, which allows the Agent to detect [common multi-line patterns][3] automatically for **all** log integrations it sets up. @@ -652,6 +652,8 @@ datadog: - name: DD_LOGS_CONFIG_AUTO_MULTI_LINE_EXTRA_PATTERNS value: \d{4}-(0?[1-9]|1[012])-(0?[1-9]|[12][0-9]|3[01]) [A-Za-z_]+\s\d+,\s\d+\s\d+:\d+:\d+\s(AM|PM) ``` +**Note**: The Datadog Agent interpret spaces in the `DD_LOGS_CONFIG_AUTO_MULTI_LINE_EXTRA_PATTERNS` environment variable as separators between multiple patterns. You can see the two regex patterns are divided by a space and `\s` is used in the second regex pattern to match for spaces. + If no pattern meets the line match threshold, add the `DD_LOGS_CONFIG_AUTO_MULTI_LINE_DEFAULT_MATCH_THRESHOLD` environment variable with a lower value. This configures a threshold value that determines how frequently logs have to match in order for the auto multi-line aggregation to work. To find the current threshold value run the [agent `status` command][1]. [1]: https://docs.datadoghq.com/agent/configuration/agent-commands/#agent-information @@ -680,7 +682,6 @@ datadog: value: "0.1" ``` -**Note**: The Datadog Agent interpret spaces in the `DD_LOGS_CONFIG_AUTO_MULTI_LINE_EXTRA_PATTERNS` environment variable as separators between multiple patterns. You can see the two regex patterns are divided by a space and `\s` is used in the second regex pattern to match for spaces. {{% /tab %}} {{< /tabs >}} From eee9777d1d1a7a19b145928b8d665385ca0c2dfe Mon Sep 17 00:00:00 2001 From: daenypark <118874777+daenypark@users.noreply.github.com> Date: Wed, 29 Jan 2025 18:23:05 -0500 Subject: [PATCH 6/8] Apply suggestions from code review Apply feedback from document team. Co-authored-by: Ursula Chen <58821586+urseberry@users.noreply.github.com> --- .../en/agent/logs/advanced_log_collection.md | 37 +++++++++++-------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/content/en/agent/logs/advanced_log_collection.md b/content/en/agent/logs/advanced_log_collection.md index 20b164ba680d5..dfc6e28bee75d 100644 --- a/content/en/agent/logs/advanced_log_collection.md +++ b/content/en/agent/logs/advanced_log_collection.md @@ -479,7 +479,7 @@ logs_config: {{% /tab %}} {{% tab "Docker" %}} -Use the environment variable `DD_LOGS_CONFIG_AUTO_MULTI_LINE_DETECTION` in Datadog Agent container to configure global automatic multi-line aggregation rule, for example: +Use the environment variable `DD_LOGS_CONFIG_AUTO_MULTI_LINE_DETECTION` in the Datadog Agent container to configure a global automatic multi-line aggregation rule. For example: ```shell DD_LOGS_CONFIG_AUTO_MULTI_LINE_DETECTION=true @@ -515,12 +515,12 @@ datadog: {{< /tabs >}} ### Enable multi-line aggregation per integration -Alternatively, this can be enabled or disabled (overriding the global config) per log integration setup. +Alternatively, you can enable or disable multi-line aggregation for an individual integration's log collection. Changing the multi-line aggregation for an integration overrides the global configuration. {{< tabs >}} {{% tab "Configuration file" %}} -In a Host environment, enable `auto_multi_line_detection` with [Custom log collection][1] method. For example: +In a host environment, enable `auto_multi_line_detection` with the [Custom log collection][1] method. For example: [1]: https://docs.datadoghq.com/agent/logs/?tab=tailfiles#custom-log-collection @@ -579,13 +579,13 @@ spec: {{% /tab %}} {{< /tabs >}} -### Customizing multi-line aggregation configuration +### Customize multi-line aggregation configuration Automatic multi-line detection uses a list of [common regular expressions][1] to attempt to match logs. If the built-in list is not sufficient, you can also add custom patterns and thresholds for detection. [1]:https://github.com/DataDog/datadog-agent/blob/a27c16c05da0cf7b09d5a5075ca568fdae1b4ee0/pkg/logs/internal/decoder/auto_multiline_handler.go#L187 {{< tabs >}} {{% tab "Configuration file" %}} -In a configuration file, add the `auto_multi_line_extra_patterns` to your `datadog.yaml` like so: +In a configuration file, add the `auto_multi_line_extra_patterns` to your `datadog.yaml`: ```yaml logs_config: auto_multi_line_detection: true @@ -594,7 +594,9 @@ logs_config: - '[A-Za-z_]+ \d+, \d+ \d+:\d+:\d+ (AM|PM)' ``` -If no pattern meets the line match threshold, add the `auto_multi_line_default_match_threshold` parameter with a lower value. This configures a threshold value that determines how frequently logs have to match in order for the auto multi-line aggregation to work. To find the current threshold value run the [agent `status` command][1]. +The `auto_multi_line_default_match_threshold` parameter determines how closely logs have to match the patterns in order for the auto multi-line aggregation to work. + +If your multi-line logs aren't getting aggregated as you like, you can change the sensitivity of the matching by setting the `auto_multi_line_default_match_threshold` parameter. Add the `auto_multi_line_default_match_threshold` parameter to your configuration file with a value lower (to increase matches) or higher (to decrease matches) than the current threshold value. To find the current threshold value, run the [Agent `status` command][1]. ```yaml logs_config: @@ -608,7 +610,7 @@ logs_config: [1]: https://docs.datadoghq.com/agent/configuration/agent-commands/#agent-information {{% /tab %}} {{% tab "Docker" %}} -In a containerized Agent, add the environment variable `DD_LOGS_CONFIG_AUTO_MULTI_LINE_EXTRA_PATTERNS` like so: +In a containerized Agent, add the environment variable `DD_LOGS_CONFIG_AUTO_MULTI_LINE_EXTRA_PATTERNS`: ```yaml environment: @@ -616,22 +618,22 @@ In a containerized Agent, add the environment variable `DD_LOGS_CONFIG_AUTO_MULT - DD_LOGS_CONFIG_AUTO_MULTI_LINE_EXTRA_PATTERNS=\d{4}-(0?[1-9]|1[012])-(0?[1-9]|[12][0-9]|3[01]) [A-Za-z_]+\s\d+,\s\d+\s\d+:\d+:\d+\s(AM|PM) ``` -If no pattern meets the line match threshold, add the `DD_LOGS_CONFIG_AUTO_MULTI_LINE_DEFAULT_MATCH_THRESHOLD` environment variable with a lower value. This configures a threshold value that determines how frequently logs have to match in order for the auto multi-line aggregation to work. To find the current threshold value run the [agent `status` command][1]. +The `auto_multi_line_default_match_threshold` parameter determines how closely logs have to match the patterns in order for the auto multi-line aggregation to work. + +If your multi-line logs aren't getting aggregated as you like, you can change the sensitivity of the matching by setting the `auto_multi_line_default_match_threshold` parameter. Add the `auto_multi_line_default_match_threshold` parameter to your configuration file with a value lower (to increase matches) or higher (to decrease matches) than the current threshold value. To find the current threshold value, run the [Agent `status` command][1]. +The Datadog Agent interprets spaces in the `DD_LOGS_CONFIG_AUTO_MULTI_LINE_EXTRA_PATTERNS` environment variable as separators between multiple patterns. In the following example, the two regex patterns are divided by a space, and `\s` in the second regex pattern matches spaces. ```yaml environment: - DD_LOGS_CONFIG_AUTO_MULTI_LINE_DETECTION=true - DD_LOGS_CONFIG_AUTO_MULTI_LINE_EXTRA_PATTERNS=\d{4}-(0?[1-9]|1[012])-(0?[1-9]|[12][0-9]|3[01]) [A-Za-z_]+\s\d+,\s\d+\s\d+:\d+:\d+\s(AM|PM) - DD_LOGS_CONFIG_AUTO_MULTI_LINE_DEFAULT_MATCH_THRESHOLD=0.1 -``` - -**Note**: The Datadog Agent interpret spaces in the `DD_LOGS_CONFIG_AUTO_MULTI_LINE_EXTRA_PATTERNS` environment variable as separators between multiple patterns. You can see the two regex patterns are divided by a space and `\s` is used in the second regex pattern to match for spaces. [1]: https://docs.datadoghq.com/agent/configuration/agent-commands/#agent-information {{% /tab %}} {{% tab "Kubernetes" %}} -In Kubernetes, add the environment variable `DD_LOGS_CONFIG_AUTO_MULTI_LINE_EXTRA_PATTERNS` like so: +In Kubernetes, add the environment variable `DD_LOGS_CONFIG_AUTO_MULTI_LINE_EXTRA_PATTERNS`: #### Operator @@ -646,15 +648,16 @@ spec: #### Helm +The Datadog Agent interprets spaces in the `DD_LOGS_CONFIG_AUTO_MULTI_LINE_EXTRA_PATTERNS` environment variable as separators between multiple patterns. In the following example, the two regex patterns are divided by a space, and `\s` in the second regex pattern matches spaces. ```yaml datadog: env: - name: DD_LOGS_CONFIG_AUTO_MULTI_LINE_EXTRA_PATTERNS value: \d{4}-(0?[1-9]|1[012])-(0?[1-9]|[12][0-9]|3[01]) [A-Za-z_]+\s\d+,\s\d+\s\d+:\d+:\d+\s(AM|PM) -``` -**Note**: The Datadog Agent interpret spaces in the `DD_LOGS_CONFIG_AUTO_MULTI_LINE_EXTRA_PATTERNS` environment variable as separators between multiple patterns. You can see the two regex patterns are divided by a space and `\s` is used in the second regex pattern to match for spaces. -If no pattern meets the line match threshold, add the `DD_LOGS_CONFIG_AUTO_MULTI_LINE_DEFAULT_MATCH_THRESHOLD` environment variable with a lower value. This configures a threshold value that determines how frequently logs have to match in order for the auto multi-line aggregation to work. To find the current threshold value run the [agent `status` command][1]. +The `auto_multi_line_default_match_threshold` parameter determines how closely logs have to match the patterns in order for the auto multi-line aggregation to work. + +If your multi-line logs aren't getting aggregated as you like, you can change the sensitivity of the matching by setting the `auto_multi_line_default_match_threshold` parameter. Add the `auto_multi_line_default_match_threshold` parameter to your configuration file with a value lower (to increase matches) or higher (to decrease matches) than the current threshold value. To find the current threshold value, run the [Agent `status` command][1]. [1]: https://docs.datadoghq.com/agent/configuration/agent-commands/#agent-information @@ -686,7 +689,9 @@ datadog: {{% /tab %}} {{< /tabs >}} -With this feature enabled, when a new log file is opened the Agent first tries to detect a pattern. This detection process takes at most 30 seconds or the first 500 logs (whichever comes first). During this process the logs are sent as single lines. After the detection threshold is met all future logs for that source are aggregated with the best matching pattern, or as single lines if no pattern is found. +With multi-line aggregation enabled, the Agent first tries to detect a pattern in each new log file. This detection process takes at most 30 seconds or the first 500 logs, whichever comes first. During the initial detection process, the logs are sent as single lines. + +After the detection threshold is met, all future logs for that source are aggregated with the best matching pattern, or as single lines if no pattern is found. **Note**: If you can control the naming pattern of the rotated log, ensure that the rotated file replaces the previously active file with the same name. The Agent reuses a previously detected pattern on the newly rotated file to avoid re-running detection. From 80536a4188db5a02c6888032ee4b1a00ce4c4598 Mon Sep 17 00:00:00 2001 From: daenypark <118874777+daenypark@users.noreply.github.com> Date: Thu, 30 Jan 2025 11:32:36 -0500 Subject: [PATCH 7/8] Update advanced_log_collection.md Fixed some box format "```" and re-arrange extra pattern note. --- content/en/agent/logs/advanced_log_collection.md | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/content/en/agent/logs/advanced_log_collection.md b/content/en/agent/logs/advanced_log_collection.md index dfc6e28bee75d..d58b78332867b 100644 --- a/content/en/agent/logs/advanced_log_collection.md +++ b/content/en/agent/logs/advanced_log_collection.md @@ -617,20 +617,20 @@ In a containerized Agent, add the environment variable `DD_LOGS_CONFIG_AUTO_MULT - DD_LOGS_CONFIG_AUTO_MULTI_LINE_DETECTION=true - DD_LOGS_CONFIG_AUTO_MULTI_LINE_EXTRA_PATTERNS=\d{4}-(0?[1-9]|1[012])-(0?[1-9]|[12][0-9]|3[01]) [A-Za-z_]+\s\d+,\s\d+\s\d+:\d+:\d+\s(AM|PM) ``` +**Note**The Datadog Agent interprets spaces in the `DD_LOGS_CONFIG_AUTO_MULTI_LINE_EXTRA_PATTERNS` environment variable as separators between multiple patterns. In the following example, the two regex patterns are divided by a space, and `\s` in the second regex pattern matches spaces. The `auto_multi_line_default_match_threshold` parameter determines how closely logs have to match the patterns in order for the auto multi-line aggregation to work. If your multi-line logs aren't getting aggregated as you like, you can change the sensitivity of the matching by setting the `auto_multi_line_default_match_threshold` parameter. Add the `auto_multi_line_default_match_threshold` parameter to your configuration file with a value lower (to increase matches) or higher (to decrease matches) than the current threshold value. To find the current threshold value, run the [Agent `status` command][1]. -The Datadog Agent interprets spaces in the `DD_LOGS_CONFIG_AUTO_MULTI_LINE_EXTRA_PATTERNS` environment variable as separators between multiple patterns. In the following example, the two regex patterns are divided by a space, and `\s` in the second regex pattern matches spaces. ```yaml environment: - DD_LOGS_CONFIG_AUTO_MULTI_LINE_DETECTION=true - DD_LOGS_CONFIG_AUTO_MULTI_LINE_EXTRA_PATTERNS=\d{4}-(0?[1-9]|1[012])-(0?[1-9]|[12][0-9]|3[01]) [A-Za-z_]+\s\d+,\s\d+\s\d+:\d+:\d+\s(AM|PM) - DD_LOGS_CONFIG_AUTO_MULTI_LINE_DEFAULT_MATCH_THRESHOLD=0.1 +``` [1]: https://docs.datadoghq.com/agent/configuration/agent-commands/#agent-information - {{% /tab %}} {{% tab "Kubernetes" %}} In Kubernetes, add the environment variable `DD_LOGS_CONFIG_AUTO_MULTI_LINE_EXTRA_PATTERNS`: @@ -648,12 +648,14 @@ spec: #### Helm -The Datadog Agent interprets spaces in the `DD_LOGS_CONFIG_AUTO_MULTI_LINE_EXTRA_PATTERNS` environment variable as separators between multiple patterns. In the following example, the two regex patterns are divided by a space, and `\s` in the second regex pattern matches spaces. ```yaml datadog: env: - name: DD_LOGS_CONFIG_AUTO_MULTI_LINE_EXTRA_PATTERNS value: \d{4}-(0?[1-9]|1[012])-(0?[1-9]|[12][0-9]|3[01]) [A-Za-z_]+\s\d+,\s\d+\s\d+:\d+:\d+\s(AM|PM) +``` +**Note**The Datadog Agent interprets spaces in the `DD_LOGS_CONFIG_AUTO_MULTI_LINE_EXTRA_PATTERNS` environment variable as separators between multiple patterns. In the following example, the two regex patterns are divided by a space, and `\s` in the second regex pattern matches spaces. + The `auto_multi_line_default_match_threshold` parameter determines how closely logs have to match the patterns in order for the auto multi-line aggregation to work. From f100ffff5dbe950a0fda028085d695f1d712df40 Mon Sep 17 00:00:00 2001 From: daenypark <118874777+daenypark@users.noreply.github.com> Date: Thu, 30 Jan 2025 11:50:39 -0500 Subject: [PATCH 8/8] Update advanced_log_collection.md small nit change. --- content/en/agent/logs/advanced_log_collection.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/content/en/agent/logs/advanced_log_collection.md b/content/en/agent/logs/advanced_log_collection.md index d58b78332867b..8e683509974a4 100644 --- a/content/en/agent/logs/advanced_log_collection.md +++ b/content/en/agent/logs/advanced_log_collection.md @@ -617,7 +617,7 @@ In a containerized Agent, add the environment variable `DD_LOGS_CONFIG_AUTO_MULT - DD_LOGS_CONFIG_AUTO_MULTI_LINE_DETECTION=true - DD_LOGS_CONFIG_AUTO_MULTI_LINE_EXTRA_PATTERNS=\d{4}-(0?[1-9]|1[012])-(0?[1-9]|[12][0-9]|3[01]) [A-Za-z_]+\s\d+,\s\d+\s\d+:\d+:\d+\s(AM|PM) ``` -**Note**The Datadog Agent interprets spaces in the `DD_LOGS_CONFIG_AUTO_MULTI_LINE_EXTRA_PATTERNS` environment variable as separators between multiple patterns. In the following example, the two regex patterns are divided by a space, and `\s` in the second regex pattern matches spaces. +**Note**: The Datadog Agent interprets spaces in the `DD_LOGS_CONFIG_AUTO_MULTI_LINE_EXTRA_PATTERNS` environment variable as separators between multiple patterns. In the following example, the two regex patterns are divided by a space, and `\s` in the second regex pattern matches spaces. The `auto_multi_line_default_match_threshold` parameter determines how closely logs have to match the patterns in order for the auto multi-line aggregation to work. @@ -654,7 +654,7 @@ datadog: - name: DD_LOGS_CONFIG_AUTO_MULTI_LINE_EXTRA_PATTERNS value: \d{4}-(0?[1-9]|1[012])-(0?[1-9]|[12][0-9]|3[01]) [A-Za-z_]+\s\d+,\s\d+\s\d+:\d+:\d+\s(AM|PM) ``` -**Note**The Datadog Agent interprets spaces in the `DD_LOGS_CONFIG_AUTO_MULTI_LINE_EXTRA_PATTERNS` environment variable as separators between multiple patterns. In the following example, the two regex patterns are divided by a space, and `\s` in the second regex pattern matches spaces. +**Note**: The Datadog Agent interprets spaces in the `DD_LOGS_CONFIG_AUTO_MULTI_LINE_EXTRA_PATTERNS` environment variable as separators between multiple patterns. In the following example, the two regex patterns are divided by a space, and `\s` in the second regex pattern matches spaces. The `auto_multi_line_default_match_threshold` parameter determines how closely logs have to match the patterns in order for the auto multi-line aggregation to work.