Skip to content

Commit b2f99c6

Browse files
PSeitzPSeitz-dd
andauthored
add term->histogram benchmark (#2758)
* add term->histogram benchmark * add more term aggs --------- Co-authored-by: Pascal Seitz <[email protected]>
1 parent 76de5ba commit b2f99c6

File tree

1 file changed

+84
-5
lines changed

1 file changed

+84
-5
lines changed

benches/agg_bench.rs

Lines changed: 84 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
use binggan::plugins::PeakMemAllocPlugin;
22
use binggan::{black_box, InputGroup, PeakMemAlloc, INSTRUMENTED_SYSTEM};
3+
use rand::distributions::WeightedIndex;
34
use rand::prelude::SliceRandom;
45
use rand::rngs::StdRng;
56
use rand::{Rng, SeedableRng};
@@ -54,12 +55,18 @@ fn bench_agg(mut group: InputGroup<Index>) {
5455
register!(group, extendedstats_f64);
5556
register!(group, percentiles_f64);
5657
register!(group, terms_few);
58+
register!(group, terms_all_unique);
5759
register!(group, terms_many);
5860
register!(group, terms_many_top_1000);
5961
register!(group, terms_many_order_by_term);
6062
register!(group, terms_many_with_top_hits);
63+
register!(group, terms_all_unique_with_avg_sub_agg);
6164
register!(group, terms_many_with_avg_sub_agg);
6265
register!(group, terms_few_with_avg_sub_agg);
66+
register!(group, terms_status_with_avg_sub_agg);
67+
register!(group, terms_status);
68+
register!(group, terms_few_with_histogram);
69+
register!(group, terms_status_with_histogram);
6370

6471
register!(group, terms_many_json_mixed_type_with_avg_sub_agg);
6572

@@ -132,12 +139,12 @@ fn extendedstats_f64(index: &Index) {
132139
}
133140
fn percentiles_f64(index: &Index) {
134141
let agg_req = json!({
135-
"mypercentiles": {
136-
"percentiles": {
137-
"field": "score_f64",
138-
"percents": [ 95, 99, 99.9 ]
142+
"mypercentiles": {
143+
"percentiles": {
144+
"field": "score_f64",
145+
"percents": [ 95, 99, 99.9 ]
146+
}
139147
}
140-
}
141148
});
142149
execute_agg(index, agg_req);
143150
}
@@ -174,6 +181,19 @@ fn terms_few(index: &Index) {
174181
});
175182
execute_agg(index, agg_req);
176183
}
184+
fn terms_status(index: &Index) {
185+
let agg_req = json!({
186+
"my_texts": { "terms": { "field": "text_few_terms_status" } },
187+
});
188+
execute_agg(index, agg_req);
189+
}
190+
fn terms_all_unique(index: &Index) {
191+
let agg_req = json!({
192+
"my_texts": { "terms": { "field": "text_all_unique_terms" } },
193+
});
194+
execute_agg(index, agg_req);
195+
}
196+
177197
fn terms_many(index: &Index) {
178198
let agg_req = json!({
179199
"my_texts": { "terms": { "field": "text_many_terms" } },
@@ -222,6 +242,39 @@ fn terms_many_with_avg_sub_agg(index: &Index) {
222242
});
223243
execute_agg(index, agg_req);
224244
}
245+
fn terms_all_unique_with_avg_sub_agg(index: &Index) {
246+
let agg_req = json!({
247+
"my_texts": {
248+
"terms": { "field": "text_all_unique_terms" },
249+
"aggs": {
250+
"average_f64": { "avg": { "field": "score_f64" } }
251+
}
252+
},
253+
});
254+
execute_agg(index, agg_req);
255+
}
256+
fn terms_few_with_histogram(index: &Index) {
257+
let agg_req = json!({
258+
"my_texts": {
259+
"terms": { "field": "text_few_terms" },
260+
"aggs": {
261+
"histo": {"histogram": { "field": "score_f64", "interval": 10 }}
262+
}
263+
}
264+
});
265+
execute_agg(index, agg_req);
266+
}
267+
fn terms_status_with_histogram(index: &Index) {
268+
let agg_req = json!({
269+
"my_texts": {
270+
"terms": { "field": "text_few_terms_status" },
271+
"aggs": {
272+
"histo": {"histogram": { "field": "score_f64", "interval": 10 }}
273+
}
274+
}
275+
});
276+
execute_agg(index, agg_req);
277+
}
225278

226279
fn terms_few_with_avg_sub_agg(index: &Index) {
227280
let agg_req = json!({
@@ -234,6 +287,17 @@ fn terms_few_with_avg_sub_agg(index: &Index) {
234287
});
235288
execute_agg(index, agg_req);
236289
}
290+
fn terms_status_with_avg_sub_agg(index: &Index) {
291+
let agg_req = json!({
292+
"my_texts": {
293+
"terms": { "field": "text_few_terms_status" },
294+
"aggs": {
295+
"average_f64": { "avg": { "field": "score_f64" } }
296+
}
297+
},
298+
});
299+
execute_agg(index, agg_req);
300+
}
237301

238302
fn terms_many_json_mixed_type_with_avg_sub_agg(index: &Index) {
239303
let agg_req = json!({
@@ -419,14 +483,21 @@ fn get_test_index_bench(cardinality: Cardinality) -> tantivy::Result<Index> {
419483
.set_stored();
420484
let text_field = schema_builder.add_text_field("text", text_fieldtype);
421485
let json_field = schema_builder.add_json_field("json", FAST);
486+
let text_field_all_unique_terms =
487+
schema_builder.add_text_field("text_all_unique_terms", STRING | FAST);
488+
let text_field_many_terms = schema_builder.add_text_field("text_many_terms", STRING | FAST);
422489
let text_field_many_terms = schema_builder.add_text_field("text_many_terms", STRING | FAST);
423490
let text_field_few_terms = schema_builder.add_text_field("text_few_terms", STRING | FAST);
491+
let text_field_few_terms_status =
492+
schema_builder.add_text_field("text_few_terms_status", STRING | FAST);
424493
let score_fieldtype = tantivy::schema::NumericOptions::default().set_fast();
425494
let score_field = schema_builder.add_u64_field("score", score_fieldtype.clone());
426495
let score_field_f64 = schema_builder.add_f64_field("score_f64", score_fieldtype.clone());
427496
let score_field_i64 = schema_builder.add_i64_field("score_i64", score_fieldtype);
428497
let index = Index::create_from_tempdir(schema_builder.build())?;
429498
let few_terms_data = ["INFO", "ERROR", "WARN", "DEBUG"];
499+
// Approximate production log proportions: INFO dominant, WARN and DEBUG occasional, ERROR rare.
500+
let log_level_distribution = WeightedIndex::new([80u32, 3, 12, 5]).unwrap();
430501

431502
let lg_norm = rand_distr::LogNormal::new(2.996f64, 0.979f64).unwrap();
432503

@@ -442,15 +513,21 @@ fn get_test_index_bench(cardinality: Cardinality) -> tantivy::Result<Index> {
442513
index_writer.add_document(doc!())?;
443514
}
444515
if cardinality == Cardinality::Multivalued {
516+
let log_level_sample_a = few_terms_data[log_level_distribution.sample(&mut rng)];
517+
let log_level_sample_b = few_terms_data[log_level_distribution.sample(&mut rng)];
445518
index_writer.add_document(doc!(
446519
json_field => json!({"mixed_type": 10.0}),
447520
json_field => json!({"mixed_type": 10.0}),
448521
text_field => "cool",
449522
text_field => "cool",
523+
text_field_all_unique_terms => "cool",
524+
text_field_all_unique_terms => "coolo",
450525
text_field_many_terms => "cool",
451526
text_field_many_terms => "cool",
452527
text_field_few_terms => "cool",
453528
text_field_few_terms => "cool",
529+
text_field_few_terms_status => log_level_sample_a,
530+
text_field_few_terms_status => log_level_sample_b,
454531
score_field => 1u64,
455532
score_field => 1u64,
456533
score_field_f64 => lg_norm.sample(&mut rng),
@@ -475,8 +552,10 @@ fn get_test_index_bench(cardinality: Cardinality) -> tantivy::Result<Index> {
475552
index_writer.add_document(doc!(
476553
text_field => "cool",
477554
json_field => json,
555+
text_field_all_unique_terms => format!("unique_term_{}", rng.gen::<u64>()),
478556
text_field_many_terms => many_terms_data.choose(&mut rng).unwrap().to_string(),
479557
text_field_few_terms => few_terms_data.choose(&mut rng).unwrap().to_string(),
558+
text_field_few_terms_status => few_terms_data[log_level_distribution.sample(&mut rng)],
480559
score_field => val as u64,
481560
score_field_f64 => lg_norm.sample(&mut rng),
482561
score_field_i64 => val as i64,

0 commit comments

Comments
 (0)