A set of native implementation of common bioinformatics algorithms to be used as Arrow-DataFusion or SeQuiLa (Apache Spark) extensions.
RUSTFLAGS="-C target-cpu=native" RUST_LOG=info cargo run --release
RUST_LOG=info cargo run -p sequila-cli -- --file queries/q1-coitrees.sql
https://docs.rs/crate/flamegraph/0.6.5
sudo pacman -S perf gcc-libs glibc
cargo install flamegraph
echo -1 | sudo tee /proc/sys/kernel/perf_event_paranoid
cargo build --release
flamegraph -- target/release/sequila-cli -f queries/q1-coitrees.sql
SET sequila.prefer_interval_join TO true;
SET sequila.interval_join_algorithm TO coitrees;
SET datafusion.optimizer.repartition_joins TO false;
SET datafusion.execution.coalesce_batches TO false;
-- for controlling parallism level (only for bechmarking purposes otherwise use defaults)
SET datafusion.execution.target_partitions=1;
- Download and unpack test dataset.
- Export env variable with path to the root folder with benchmark data, e.g.:
export BENCH_DATA_ROOT=/Users/mwiewior/research/databio/
- Run benchmark
RUSTFLAGS="-Ctarget-cpu=native" cargo bench --bench databio_benchmark -- --quick