-
Notifications
You must be signed in to change notification settings - Fork 491
Update metric docs #5844
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
rdettai
wants to merge
8
commits into
revamp-storage-metrics
Choose a base branch
from
update-metrics-doc
base: revamp-storage-metrics
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Update metric docs #5844
Changes from 6 commits
Commits
Show all changes
8 commits
Select commit
Hold shift + click to select a range
716c43f
Reorganize object store metrics
rdettai 88528bf
Fix feature flags and license headers
rdettai cb7eb48
Improve download metric
rdettai 1553498
Improve metric descriptions
rdettai 922a15d
Update metrics doc
rdettai a7f12de
Make in-flight storage get requests a memory metric
rdettai ad3dd11
Add precisions to job_assigned_total
rdettai b339041
Update example Grafana dashboards
rdettai File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -13,18 +13,14 @@ | |
| // limitations under the License. | ||
|
|
||
| use once_cell::sync::Lazy; | ||
| use quickwit_common::metrics::{IntCounter, IntGauge, new_counter, new_counter_vec, new_gauge}; | ||
| use quickwit_common::metrics::{IntCounter, new_counter_vec}; | ||
|
|
||
| pub struct IngestMetrics { | ||
| // With ingest V1 all ingested documents are considered valid | ||
| pub ingested_docs_bytes_valid: IntCounter, | ||
| pub ingested_docs_valid: IntCounter, | ||
| pub ingested_docs_bytes_invalid: IntCounter, | ||
| pub ingested_docs_invalid: IntCounter, | ||
| pub ingested_docs_valid: IntCounter, | ||
|
|
||
| pub replicated_num_bytes_total: IntCounter, | ||
| pub replicated_num_docs_total: IntCounter, | ||
| #[allow(dead_code)] // this really shouldn't be dead, it needs to be used somewhere | ||
|
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this has been dead for a year |
||
| pub queue_count: IntGauge, | ||
| } | ||
|
|
||
| impl Default for IngestMetrics { | ||
|
|
@@ -56,24 +52,6 @@ impl Default for IngestMetrics { | |
| ingested_docs_bytes_invalid, | ||
| ingested_docs_valid, | ||
| ingested_docs_invalid, | ||
| replicated_num_bytes_total: new_counter( | ||
| "replicated_num_bytes_total", | ||
| "Total size in bytes of the replicated docs.", | ||
| "ingest", | ||
| &[], | ||
| ), | ||
| replicated_num_docs_total: new_counter( | ||
| "replicated_num_docs_total", | ||
| "Total number of docs replicated.", | ||
| "ingest", | ||
| &[], | ||
| ), | ||
| queue_count: new_gauge( | ||
| "queue_count", | ||
| "Number of queues currently active", | ||
| "ingest", | ||
| &[], | ||
| ), | ||
| } | ||
| } | ||
| } | ||
|
|
||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -16,8 +16,8 @@ | |
|
|
||
| use once_cell::sync::Lazy; | ||
| use quickwit_common::metrics::{ | ||
| GaugeGuard, Histogram, IntCounter, IntCounterVec, IntGauge, new_counter, new_counter_vec, | ||
| new_gauge, new_histogram_vec, | ||
| GaugeGuard, HistogramVec, IntCounter, IntCounterVec, IntGauge, MEMORY_METRICS, new_counter, | ||
| new_counter_vec, new_gauge, new_histogram_vec, | ||
| }; | ||
|
|
||
| /// Counters associated to storage operations. | ||
|
|
@@ -30,19 +30,11 @@ pub struct StorageMetrics { | |
| pub searcher_split_cache: CacheMetrics, | ||
| pub get_slice_timeout_successes: [IntCounter; 3], | ||
| pub get_slice_timeout_all_timeouts: IntCounter, | ||
| pub object_storage_get_total: IntCounter, | ||
| pub object_storage_get_errors_total: IntCounterVec<1>, | ||
| pub object_storage_get_slice_in_flight_count: IntGauge, | ||
| pub object_storage_get_slice_in_flight_num_bytes: IntGauge, | ||
|
Comment on lines
-35
to
-36
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
| pub object_storage_put_total: IntCounter, | ||
| pub object_storage_put_parts: IntCounter, | ||
| pub object_storage_download_num_bytes: IntCounter, | ||
| pub object_storage_upload_num_bytes: IntCounter, | ||
|
|
||
| pub object_storage_delete_requests_total: IntCounter, | ||
| pub object_storage_bulk_delete_requests_total: IntCounter, | ||
| pub object_storage_delete_request_duration: Histogram, | ||
| pub object_storage_bulk_delete_request_duration: Histogram, | ||
| pub object_storage_requests_total: IntCounterVec<2>, | ||
| pub object_storage_request_duration: HistogramVec<2>, | ||
| pub object_storage_download_num_bytes: IntCounterVec<1>, | ||
| pub object_storage_download_errors: IntCounterVec<1>, | ||
| pub object_storage_upload_num_bytes: IntCounterVec<1>, | ||
| } | ||
|
|
||
| impl Default for StorageMetrics { | ||
|
|
@@ -63,31 +55,6 @@ impl Default for StorageMetrics { | |
| let get_slice_timeout_all_timeouts = | ||
| get_slice_timeout_outcome_total_vec.with_label_values(["all_timeouts"]); | ||
|
|
||
| let object_storage_requests_total = new_counter_vec( | ||
| "object_storage_requests_total", | ||
| "Total number of object storage requests performed.", | ||
| "storage", | ||
| &[], | ||
| ["action"], | ||
| ); | ||
| let object_storage_delete_requests_total = | ||
| object_storage_requests_total.with_label_values(["delete_object"]); | ||
| let object_storage_bulk_delete_requests_total = | ||
| object_storage_requests_total.with_label_values(["delete_objects"]); | ||
|
|
||
| let object_storage_request_duration = new_histogram_vec( | ||
| "object_storage_request_duration_seconds", | ||
| "Duration of object storage requests in seconds.", | ||
| "storage", | ||
| &[], | ||
| ["action"], | ||
| vec![0.1, 0.5, 1.0, 5.0, 10.0, 30.0, 60.0], | ||
| ); | ||
| let object_storage_delete_request_duration = | ||
| object_storage_request_duration.with_label_values(["delete_object"]); | ||
| let object_storage_bulk_delete_request_duration = | ||
| object_storage_request_duration.with_label_values(["delete_objects"]); | ||
|
|
||
| StorageMetrics { | ||
| fast_field_cache: CacheMetrics::for_component("fastfields"), | ||
| fd_cache_metrics: CacheMetrics::for_component("fd"), | ||
|
|
@@ -97,62 +64,50 @@ impl Default for StorageMetrics { | |
| split_footer_cache: CacheMetrics::for_component("splitfooter"), | ||
| get_slice_timeout_successes, | ||
| get_slice_timeout_all_timeouts, | ||
| object_storage_get_total: new_counter( | ||
| "object_storage_gets_total", | ||
| "Number of objects fetched. Might be lower than get_slice_timeout_outcome if \ | ||
| queries are debounced.", | ||
| object_storage_requests_total: new_counter_vec( | ||
| "object_storage_requests_total", | ||
| "Number of requests to the object store, by action and status. Requests are \ | ||
| recorded when the response headers are returned, download failures will not \ | ||
| appear as errors.", | ||
| "storage", | ||
| &[], | ||
| ["action", "status"], | ||
| ), | ||
| object_storage_get_errors_total: new_counter_vec::<1>( | ||
| "object_storage_get_errors_total", | ||
| "Number of GetObject errors.", | ||
| object_storage_request_duration: new_histogram_vec( | ||
| "object_storage_request_duration", | ||
| "Durations until the response headers are returned from the object store, by \ | ||
| action and status. This does not measure the download time for the body content.", | ||
| "storage", | ||
| &[], | ||
| ["code"], | ||
| ["action", "status"], | ||
| vec![0.1, 0.5, 1.0, 5.0, 10.0, 30.0, 60.0], | ||
| ), | ||
| object_storage_get_slice_in_flight_count: new_gauge( | ||
| "object_storage_get_slice_in_flight_count", | ||
| "Number of GetObject for which the memory was allocated but the download is still \ | ||
| in progress.", | ||
| "storage", | ||
| &[], | ||
| ), | ||
| object_storage_get_slice_in_flight_num_bytes: new_gauge( | ||
| "object_storage_get_slice_in_flight_num_bytes", | ||
| "Memory allocated for GetObject requests that are still in progress.", | ||
| "storage", | ||
| &[], | ||
| ), | ||
| object_storage_put_total: new_counter( | ||
| "object_storage_puts_total", | ||
| "Number of objects uploaded. May differ from object_storage_requests_parts due to \ | ||
| multipart upload.", | ||
| object_storage_download_num_bytes: new_counter_vec( | ||
| "object_storage_download_num_bytes", | ||
| "Amount of data downloaded from object storage.", | ||
| "storage", | ||
| &[], | ||
| ["status"], | ||
| ), | ||
| object_storage_put_parts: new_counter( | ||
| "object_storage_puts_parts", | ||
| "Number of object parts uploaded.", | ||
| "", | ||
| &[], | ||
| ), | ||
| object_storage_download_num_bytes: new_counter( | ||
| "object_storage_download_num_bytes", | ||
| "Amount of data downloaded from an object storage.", | ||
| object_storage_download_errors: new_counter_vec( | ||
| "object_storage_download_errors", | ||
| // Download errors are recorded separately because the associated | ||
| // get_object requests were already recorded as successful in | ||
| // object_storage_requests_total | ||
| "Number of download requests that received successful response headers but failed \ | ||
| during download.", | ||
| "storage", | ||
| &[], | ||
| ["status"], | ||
| ), | ||
| object_storage_upload_num_bytes: new_counter( | ||
| object_storage_upload_num_bytes: new_counter_vec( | ||
| "object_storage_upload_num_bytes", | ||
| "Amount of data uploaded to an object storage.", | ||
| "Amount of data uploaded to object storage. The value recorded for failed and \ | ||
| aborted uploads is the full payload size.", | ||
| "storage", | ||
| &[], | ||
| ["status"], | ||
| ), | ||
| object_storage_delete_requests_total, | ||
| object_storage_bulk_delete_requests_total, | ||
| object_storage_delete_request_duration, | ||
| object_storage_bulk_delete_request_duration, | ||
| } | ||
| } | ||
| } | ||
|
|
@@ -229,15 +184,11 @@ pub static STORAGE_METRICS: Lazy<StorageMetrics> = Lazy::new(StorageMetrics::def | |
| pub static CACHE_METRICS_FOR_TESTS: Lazy<CacheMetrics> = | ||
| Lazy::new(|| CacheMetrics::for_component("fortest")); | ||
|
|
||
| pub fn object_storage_get_slice_in_flight_guards( | ||
| get_request_size: usize, | ||
| ) -> (GaugeGuard<'static>, GaugeGuard<'static>) { | ||
| let mut bytes_guard = GaugeGuard::from_gauge( | ||
| &crate::STORAGE_METRICS.object_storage_get_slice_in_flight_num_bytes, | ||
| ); | ||
| /// Helps tracking pre-allocated memory for downloads that are still in progress. | ||
| /// | ||
| /// This is actually recorded as a memory metric and not a storage metric. | ||
| pub fn object_storage_get_slice_in_flight_guards(get_request_size: usize) -> GaugeGuard<'static> { | ||
| let mut bytes_guard = GaugeGuard::from_gauge(&MEMORY_METRICS.in_flight.get_object); | ||
| bytes_guard.add(get_request_size as i64); | ||
| let mut count_guard = | ||
| GaugeGuard::from_gauge(&crate::STORAGE_METRICS.object_storage_get_slice_in_flight_count); | ||
| count_guard.add(1); | ||
| (bytes_guard, count_guard) | ||
| bytes_guard | ||
| } | ||
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
moved this to be part of of ingest_v2 metrics