11use binggan:: plugins:: PeakMemAllocPlugin ;
22use binggan:: { black_box, InputGroup , PeakMemAlloc , INSTRUMENTED_SYSTEM } ;
3+ use rand:: distributions:: WeightedIndex ;
34use rand:: prelude:: SliceRandom ;
45use rand:: rngs:: StdRng ;
56use rand:: { Rng , SeedableRng } ;
@@ -54,12 +55,18 @@ fn bench_agg(mut group: InputGroup<Index>) {
5455 register ! ( group, extendedstats_f64) ;
5556 register ! ( group, percentiles_f64) ;
5657 register ! ( group, terms_few) ;
58+ register ! ( group, terms_all_unique) ;
5759 register ! ( group, terms_many) ;
5860 register ! ( group, terms_many_top_1000) ;
5961 register ! ( group, terms_many_order_by_term) ;
6062 register ! ( group, terms_many_with_top_hits) ;
63+ register ! ( group, terms_all_unique_with_avg_sub_agg) ;
6164 register ! ( group, terms_many_with_avg_sub_agg) ;
6265 register ! ( group, terms_few_with_avg_sub_agg) ;
66+ register ! ( group, terms_status_with_avg_sub_agg) ;
67+ register ! ( group, terms_status) ;
68+ register ! ( group, terms_few_with_histogram) ;
69+ register ! ( group, terms_status_with_histogram) ;
6370
6471 register ! ( group, terms_many_json_mixed_type_with_avg_sub_agg) ;
6572
@@ -132,12 +139,12 @@ fn extendedstats_f64(index: &Index) {
132139}
133140fn percentiles_f64 ( index : & Index ) {
134141 let agg_req = json ! ( {
135- "mypercentiles" : {
136- "percentiles" : {
137- "field" : "score_f64" ,
138- "percents" : [ 95 , 99 , 99.9 ]
142+ "mypercentiles" : {
143+ "percentiles" : {
144+ "field" : "score_f64" ,
145+ "percents" : [ 95 , 99 , 99.9 ]
146+ }
139147 }
140- }
141148 } ) ;
142149 execute_agg ( index, agg_req) ;
143150}
@@ -174,6 +181,19 @@ fn terms_few(index: &Index) {
174181 } ) ;
175182 execute_agg ( index, agg_req) ;
176183}
184+ fn terms_status ( index : & Index ) {
185+ let agg_req = json ! ( {
186+ "my_texts" : { "terms" : { "field" : "text_few_terms_status" } } ,
187+ } ) ;
188+ execute_agg ( index, agg_req) ;
189+ }
190+ fn terms_all_unique ( index : & Index ) {
191+ let agg_req = json ! ( {
192+ "my_texts" : { "terms" : { "field" : "text_all_unique_terms" } } ,
193+ } ) ;
194+ execute_agg ( index, agg_req) ;
195+ }
196+
177197fn terms_many ( index : & Index ) {
178198 let agg_req = json ! ( {
179199 "my_texts" : { "terms" : { "field" : "text_many_terms" } } ,
@@ -222,6 +242,39 @@ fn terms_many_with_avg_sub_agg(index: &Index) {
222242 } ) ;
223243 execute_agg ( index, agg_req) ;
224244}
245+ fn terms_all_unique_with_avg_sub_agg ( index : & Index ) {
246+ let agg_req = json ! ( {
247+ "my_texts" : {
248+ "terms" : { "field" : "text_all_unique_terms" } ,
249+ "aggs" : {
250+ "average_f64" : { "avg" : { "field" : "score_f64" } }
251+ }
252+ } ,
253+ } ) ;
254+ execute_agg ( index, agg_req) ;
255+ }
256+ fn terms_few_with_histogram ( index : & Index ) {
257+ let agg_req = json ! ( {
258+ "my_texts" : {
259+ "terms" : { "field" : "text_few_terms" } ,
260+ "aggs" : {
261+ "histo" : { "histogram" : { "field" : "score_f64" , "interval" : 10 } }
262+ }
263+ }
264+ } ) ;
265+ execute_agg ( index, agg_req) ;
266+ }
267+ fn terms_status_with_histogram ( index : & Index ) {
268+ let agg_req = json ! ( {
269+ "my_texts" : {
270+ "terms" : { "field" : "text_few_terms_status" } ,
271+ "aggs" : {
272+ "histo" : { "histogram" : { "field" : "score_f64" , "interval" : 10 } }
273+ }
274+ }
275+ } ) ;
276+ execute_agg ( index, agg_req) ;
277+ }
225278
226279fn terms_few_with_avg_sub_agg ( index : & Index ) {
227280 let agg_req = json ! ( {
@@ -234,6 +287,17 @@ fn terms_few_with_avg_sub_agg(index: &Index) {
234287 } ) ;
235288 execute_agg ( index, agg_req) ;
236289}
290+ fn terms_status_with_avg_sub_agg ( index : & Index ) {
291+ let agg_req = json ! ( {
292+ "my_texts" : {
293+ "terms" : { "field" : "text_few_terms_status" } ,
294+ "aggs" : {
295+ "average_f64" : { "avg" : { "field" : "score_f64" } }
296+ }
297+ } ,
298+ } ) ;
299+ execute_agg ( index, agg_req) ;
300+ }
237301
238302fn terms_many_json_mixed_type_with_avg_sub_agg ( index : & Index ) {
239303 let agg_req = json ! ( {
@@ -419,14 +483,21 @@ fn get_test_index_bench(cardinality: Cardinality) -> tantivy::Result<Index> {
419483 . set_stored ( ) ;
420484 let text_field = schema_builder. add_text_field ( "text" , text_fieldtype) ;
421485 let json_field = schema_builder. add_json_field ( "json" , FAST ) ;
486+ let text_field_all_unique_terms =
487+ schema_builder. add_text_field ( "text_all_unique_terms" , STRING | FAST ) ;
488+ let text_field_many_terms = schema_builder. add_text_field ( "text_many_terms" , STRING | FAST ) ;
422489 let text_field_many_terms = schema_builder. add_text_field ( "text_many_terms" , STRING | FAST ) ;
423490 let text_field_few_terms = schema_builder. add_text_field ( "text_few_terms" , STRING | FAST ) ;
491+ let text_field_few_terms_status =
492+ schema_builder. add_text_field ( "text_few_terms_status" , STRING | FAST ) ;
424493 let score_fieldtype = tantivy:: schema:: NumericOptions :: default ( ) . set_fast ( ) ;
425494 let score_field = schema_builder. add_u64_field ( "score" , score_fieldtype. clone ( ) ) ;
426495 let score_field_f64 = schema_builder. add_f64_field ( "score_f64" , score_fieldtype. clone ( ) ) ;
427496 let score_field_i64 = schema_builder. add_i64_field ( "score_i64" , score_fieldtype) ;
428497 let index = Index :: create_from_tempdir ( schema_builder. build ( ) ) ?;
429498 let few_terms_data = [ "INFO" , "ERROR" , "WARN" , "DEBUG" ] ;
499+ // Approximate production log proportions: INFO dominant, WARN and DEBUG occasional, ERROR rare.
500+ let log_level_distribution = WeightedIndex :: new ( [ 80u32 , 3 , 12 , 5 ] ) . unwrap ( ) ;
430501
431502 let lg_norm = rand_distr:: LogNormal :: new ( 2.996f64 , 0.979f64 ) . unwrap ( ) ;
432503
@@ -442,15 +513,21 @@ fn get_test_index_bench(cardinality: Cardinality) -> tantivy::Result<Index> {
442513 index_writer. add_document ( doc ! ( ) ) ?;
443514 }
444515 if cardinality == Cardinality :: Multivalued {
516+ let log_level_sample_a = few_terms_data[ log_level_distribution. sample ( & mut rng) ] ;
517+ let log_level_sample_b = few_terms_data[ log_level_distribution. sample ( & mut rng) ] ;
445518 index_writer. add_document ( doc ! (
446519 json_field => json!( { "mixed_type" : 10.0 } ) ,
447520 json_field => json!( { "mixed_type" : 10.0 } ) ,
448521 text_field => "cool" ,
449522 text_field => "cool" ,
523+ text_field_all_unique_terms => "cool" ,
524+ text_field_all_unique_terms => "coolo" ,
450525 text_field_many_terms => "cool" ,
451526 text_field_many_terms => "cool" ,
452527 text_field_few_terms => "cool" ,
453528 text_field_few_terms => "cool" ,
529+ text_field_few_terms_status => log_level_sample_a,
530+ text_field_few_terms_status => log_level_sample_b,
454531 score_field => 1u64 ,
455532 score_field => 1u64 ,
456533 score_field_f64 => lg_norm. sample( & mut rng) ,
@@ -475,8 +552,10 @@ fn get_test_index_bench(cardinality: Cardinality) -> tantivy::Result<Index> {
475552 index_writer. add_document ( doc ! (
476553 text_field => "cool" ,
477554 json_field => json,
555+ text_field_all_unique_terms => format!( "unique_term_{}" , rng. gen :: <u64 >( ) ) ,
478556 text_field_many_terms => many_terms_data. choose( & mut rng) . unwrap( ) . to_string( ) ,
479557 text_field_few_terms => few_terms_data. choose( & mut rng) . unwrap( ) . to_string( ) ,
558+ text_field_few_terms_status => few_terms_data[ log_level_distribution. sample( & mut rng) ] ,
480559 score_field => val as u64 ,
481560 score_field_f64 => lg_norm. sample( & mut rng) ,
482561 score_field_i64 => val as i64 ,
0 commit comments