Skip to content

Commit 253af5c

Browse files
committed
Add util for estimating capacity needed for flatbuffer-encoding functioncall
Signed-off-by: Ludvig Liljenberg <[email protected]>
1 parent abdb2d6 commit 253af5c

File tree

1 file changed

+348
-0
lines changed
  • src/hyperlight_common/src/flatbuffer_wrappers

1 file changed

+348
-0
lines changed

src/hyperlight_common/src/flatbuffer_wrappers/util.rs

Lines changed: 348 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ use alloc::vec::Vec;
1818

1919
use flatbuffers::FlatBufferBuilder;
2020

21+
use crate::flatbuffer_wrappers::function_types::ParameterValue;
2122
use crate::flatbuffers::hyperlight::generated::{
2223
FunctionCallResult as FbFunctionCallResult, FunctionCallResultArgs as FbFunctionCallResultArgs,
2324
ReturnValue as FbReturnValue, hlbool as Fbhlbool, hlboolArgs as FbhlboolArgs,
@@ -169,3 +170,350 @@ impl FlatbufferSerializable for bool {
169170
}
170171
}
171172
}
173+
174+
/// Estimates the required buffer capacity for encoding a FunctionCall with the given parameters.
175+
/// This helps avoid reallocation during FlatBuffer encoding when passing large slices and strings.
176+
///
177+
/// The function aims to be lightweight and fast and run in O(1) as long as the number of parameters is limited
178+
/// (which it is since hyperlight only currently supports up to 12).
179+
///
180+
/// Note: This estimates the capacity needed for the inner vec inside a FlatBufferBuilder. It does not
181+
/// necessarily match the size of the final encoded buffer. The estimation always rounds up to the
182+
/// nearest power of two to match FlatBufferBuilder's allocation strategy.
183+
///
184+
/// The estimations are numbers used are empirically derived based on the tests below and vaguely based
185+
/// on https://flatbuffers.dev/internals/ and https://github.com/dvidelabs/flatcc/blob/master/doc/binary-format.md#flatbuffers-binary-format
186+
#[inline] // allow cross-crate inlining (for hyperlight-host calls)
187+
pub fn estimate_flatbuffer_capacity(function_name: &str, args: &[ParameterValue]) -> usize {
188+
let mut estimated_capacity = 20;
189+
190+
// Function name overhead
191+
estimated_capacity += function_name.len() + 12;
192+
193+
// Parameters vector overhead
194+
estimated_capacity += 12 + args.len() * 6;
195+
196+
// Per-parameter overhead
197+
for arg in args {
198+
estimated_capacity += 16; // Base parameter structure
199+
estimated_capacity += match arg {
200+
ParameterValue::String(s) => s.len() + 20,
201+
ParameterValue::VecBytes(v) => v.len() + 20,
202+
ParameterValue::Int(_) | ParameterValue::UInt(_) => 16,
203+
ParameterValue::Long(_) | ParameterValue::ULong(_) => 20,
204+
ParameterValue::Float(_) => 16,
205+
ParameterValue::Double(_) => 20,
206+
ParameterValue::Bool(_) => 12,
207+
};
208+
}
209+
210+
// match how vec grows
211+
estimated_capacity.next_power_of_two()
212+
}
213+
214+
#[cfg(test)]
215+
mod tests {
216+
use alloc::string::ToString;
217+
use alloc::vec;
218+
use alloc::vec::Vec;
219+
220+
use super::*;
221+
use crate::flatbuffer_wrappers::function_call::{FunctionCall, FunctionCallType};
222+
use crate::flatbuffer_wrappers::function_types::{ParameterValue, ReturnType};
223+
224+
/// Helper function to check that estimation is within reasonable bounds (±25%)
225+
fn assert_estimation_accuracy(
226+
function_name: &str,
227+
args: Vec<ParameterValue>,
228+
call_type: FunctionCallType,
229+
return_type: ReturnType,
230+
) {
231+
let estimated = estimate_flatbuffer_capacity(function_name, &args);
232+
233+
let fc = FunctionCall::new(
234+
function_name.to_string(),
235+
Some(args),
236+
call_type.clone(),
237+
return_type,
238+
);
239+
// Important that this FlatBufferBuilder is created with capacity 0 so it grows to its needed capacity
240+
let mut builder = FlatBufferBuilder::new();
241+
let _buffer = fc.encode(&mut builder);
242+
let actual = builder.collapse().0.capacity();
243+
244+
let lower_bound = (actual as f64 * 0.75) as usize;
245+
let upper_bound = (actual as f64 * 1.25) as usize;
246+
247+
assert!(
248+
estimated >= lower_bound && estimated <= upper_bound,
249+
"Estimation {} outside bounds [{}, {}] for actual size {} (function: {}, call_type: {:?}, return_type: {:?})",
250+
estimated,
251+
lower_bound,
252+
upper_bound,
253+
actual,
254+
function_name,
255+
call_type,
256+
return_type
257+
);
258+
}
259+
260+
#[test]
261+
fn test_estimate_no_parameters() {
262+
assert_estimation_accuracy(
263+
"simple_function",
264+
vec![],
265+
FunctionCallType::Guest,
266+
ReturnType::Void,
267+
);
268+
}
269+
270+
#[test]
271+
fn test_estimate_single_int_parameter() {
272+
assert_estimation_accuracy(
273+
"add_one",
274+
vec![ParameterValue::Int(42)],
275+
FunctionCallType::Guest,
276+
ReturnType::Int,
277+
);
278+
}
279+
280+
#[test]
281+
fn test_estimate_multiple_scalar_parameters() {
282+
assert_estimation_accuracy(
283+
"calculate",
284+
vec![
285+
ParameterValue::Int(10),
286+
ParameterValue::UInt(20),
287+
ParameterValue::Long(30),
288+
ParameterValue::ULong(40),
289+
ParameterValue::Float(1.5),
290+
ParameterValue::Double(2.5),
291+
ParameterValue::Bool(true),
292+
],
293+
FunctionCallType::Guest,
294+
ReturnType::Double,
295+
);
296+
}
297+
298+
#[test]
299+
fn test_estimate_string_parameters() {
300+
assert_estimation_accuracy(
301+
"process_strings",
302+
vec![
303+
ParameterValue::String("hello".to_string()),
304+
ParameterValue::String("world".to_string()),
305+
ParameterValue::String("this is a longer string for testing".to_string()),
306+
],
307+
FunctionCallType::Host,
308+
ReturnType::String,
309+
);
310+
}
311+
312+
#[test]
313+
fn test_estimate_very_long_string() {
314+
let long_string = "a".repeat(1000);
315+
assert_estimation_accuracy(
316+
"process_long_string",
317+
vec![ParameterValue::String(long_string)],
318+
FunctionCallType::Guest,
319+
ReturnType::String,
320+
);
321+
}
322+
323+
#[test]
324+
fn test_estimate_vector_parameters() {
325+
assert_estimation_accuracy(
326+
"process_vectors",
327+
vec![
328+
ParameterValue::VecBytes(vec![1, 2, 3, 4, 5]),
329+
ParameterValue::VecBytes(vec![]),
330+
ParameterValue::VecBytes(vec![0; 100]),
331+
],
332+
FunctionCallType::Host,
333+
ReturnType::VecBytes,
334+
);
335+
}
336+
337+
#[test]
338+
fn test_estimate_mixed_parameters() {
339+
assert_estimation_accuracy(
340+
"complex_function",
341+
vec![
342+
ParameterValue::String("test".to_string()),
343+
ParameterValue::Int(42),
344+
ParameterValue::VecBytes(vec![1, 2, 3, 4, 5]),
345+
ParameterValue::Bool(true),
346+
ParameterValue::Double(553.14159),
347+
ParameterValue::String("another string".to_string()),
348+
ParameterValue::Long(9223372036854775807),
349+
],
350+
FunctionCallType::Guest,
351+
ReturnType::VecBytes,
352+
);
353+
}
354+
355+
#[test]
356+
fn test_estimate_large_function_name() {
357+
let long_name = "very_long_function_name_that_exceeds_normal_lengths_for_testing_purposes";
358+
assert_estimation_accuracy(
359+
long_name,
360+
vec![ParameterValue::Int(1)],
361+
FunctionCallType::Host,
362+
ReturnType::Long,
363+
);
364+
}
365+
366+
#[test]
367+
fn test_estimate_large_vector() {
368+
let large_vector = vec![42u8; 10000];
369+
assert_estimation_accuracy(
370+
"process_large_data",
371+
vec![ParameterValue::VecBytes(large_vector)],
372+
FunctionCallType::Guest,
373+
ReturnType::Bool,
374+
);
375+
}
376+
377+
#[test]
378+
fn test_estimate_all_parameter_types() {
379+
assert_estimation_accuracy(
380+
"comprehensive_test",
381+
vec![
382+
ParameterValue::Int(i32::MIN),
383+
ParameterValue::UInt(u32::MAX),
384+
ParameterValue::Long(i64::MIN),
385+
ParameterValue::ULong(u64::MAX),
386+
ParameterValue::Float(f32::MIN),
387+
ParameterValue::Double(f64::MAX),
388+
ParameterValue::Bool(false),
389+
ParameterValue::String("test string".to_string()),
390+
ParameterValue::VecBytes(vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
391+
],
392+
FunctionCallType::Host,
393+
ReturnType::ULong,
394+
);
395+
}
396+
397+
#[test]
398+
fn test_different_function_call_types() {
399+
assert_estimation_accuracy(
400+
"guest_function",
401+
vec![ParameterValue::String("guest call".to_string())],
402+
FunctionCallType::Guest,
403+
ReturnType::String,
404+
);
405+
406+
assert_estimation_accuracy(
407+
"host_function",
408+
vec![ParameterValue::String("host call".to_string())],
409+
FunctionCallType::Host,
410+
ReturnType::String,
411+
);
412+
}
413+
414+
#[test]
415+
fn test_different_return_types() {
416+
let args = vec![
417+
ParameterValue::Int(42),
418+
ParameterValue::String("test".to_string()),
419+
];
420+
421+
let void_est = estimate_flatbuffer_capacity("test_void", &args);
422+
let int_est = estimate_flatbuffer_capacity("test_int", &args);
423+
let string_est = estimate_flatbuffer_capacity("test_string", &args);
424+
425+
assert!((void_est as i32 - int_est as i32).abs() < 10);
426+
assert!((int_est as i32 - string_est as i32).abs() < 10);
427+
428+
assert_estimation_accuracy(
429+
"test_void",
430+
args.clone(),
431+
FunctionCallType::Guest,
432+
ReturnType::Void,
433+
);
434+
assert_estimation_accuracy(
435+
"test_int",
436+
args.clone(),
437+
FunctionCallType::Guest,
438+
ReturnType::Int,
439+
);
440+
assert_estimation_accuracy(
441+
"test_string",
442+
args,
443+
FunctionCallType::Guest,
444+
ReturnType::String,
445+
);
446+
}
447+
448+
#[test]
449+
fn test_estimate_many_large_vectors_and_strings() {
450+
assert_estimation_accuracy(
451+
"process_bulk_data",
452+
vec![
453+
ParameterValue::String("Large string data: ".to_string() + &"x".repeat(2000)),
454+
ParameterValue::VecBytes(vec![1u8; 5000]),
455+
ParameterValue::String(
456+
"Another large string with lots of content ".to_string() + &"y".repeat(3000),
457+
),
458+
ParameterValue::VecBytes(vec![255u8; 7500]),
459+
ParameterValue::String(
460+
"Third massive string parameter ".to_string() + &"z".repeat(1500),
461+
),
462+
ParameterValue::VecBytes(vec![128u8; 10000]),
463+
ParameterValue::Int(42),
464+
ParameterValue::String("Final large string ".to_string() + &"a".repeat(4000)),
465+
ParameterValue::VecBytes(vec![64u8; 2500]),
466+
ParameterValue::Bool(true),
467+
],
468+
FunctionCallType::Host,
469+
ReturnType::VecBytes,
470+
);
471+
}
472+
473+
#[test]
474+
fn test_estimate_twenty_parameters() {
475+
assert_estimation_accuracy(
476+
"function_with_many_parameters",
477+
vec![
478+
ParameterValue::Int(1),
479+
ParameterValue::String("param2".to_string()),
480+
ParameterValue::Bool(true),
481+
ParameterValue::Float(3213.14),
482+
ParameterValue::VecBytes(vec![1, 2, 3]),
483+
ParameterValue::Long(1000000),
484+
ParameterValue::Double(322.718),
485+
ParameterValue::UInt(42),
486+
ParameterValue::String("param9".to_string()),
487+
ParameterValue::Bool(false),
488+
ParameterValue::ULong(9999999999),
489+
ParameterValue::VecBytes(vec![4, 5, 6, 7, 8]),
490+
ParameterValue::Int(-100),
491+
ParameterValue::Float(1.414),
492+
ParameterValue::String("param15".to_string()),
493+
ParameterValue::Double(1.732),
494+
ParameterValue::Bool(true),
495+
ParameterValue::VecBytes(vec![9, 10]),
496+
ParameterValue::Long(-5000000),
497+
ParameterValue::UInt(12345),
498+
],
499+
FunctionCallType::Guest,
500+
ReturnType::Int,
501+
);
502+
}
503+
504+
#[test]
505+
fn test_estimate_megabyte_parameters() {
506+
assert_estimation_accuracy(
507+
"process_megabyte_data",
508+
vec![
509+
ParameterValue::String("MB String 1: ".to_string() + &"x".repeat(1_048_576)), // 1MB string
510+
ParameterValue::VecBytes(vec![42u8; 2_097_152]), // 2MB vector
511+
ParameterValue::String("MB String 2: ".to_string() + &"y".repeat(1_572_864)), // 1.5MB string
512+
ParameterValue::VecBytes(vec![128u8; 3_145_728]), // 3MB vector
513+
ParameterValue::String("MB String 3: ".to_string() + &"z".repeat(2_097_152)), // 2MB string
514+
],
515+
FunctionCallType::Host,
516+
ReturnType::VecBytes,
517+
);
518+
}
519+
}

0 commit comments

Comments
 (0)