Skip to content

Commit 4642a35

Browse files
authored
fix: remove redundancy length field in wal record (#1576)
## Rationale `length` field is not required in wal record, it's duplicated with value_length. ## Detailed Changes - Remove length from wal record - Remove rocksdb-wal from default features ## Test Plan CI and manually do some benchmark with [avalanche](https://github.com/prometheus-community/avalanche)
1 parent a90745e commit 4642a35

20 files changed

+78
-100
lines changed

.asf.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ github:
3939
protected_branches:
4040
main:
4141
required_pull_request_reviews:
42-
dismiss_stale_reviews: true
42+
dismiss_stale_reviews: false
4343
required_approving_review_count: 1
4444
protected_tags: []
4545

.github/workflows/ci.yml

+4
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,10 @@ on:
3939
- 'Cargo.lock'
4040
- '.github/workflows/ci.yml'
4141

42+
concurrency:
43+
group: ${{ github.workflow }}-${{ github.ref }}
44+
cancel-in-progress: true
45+
4246
# Common environment variables
4347
env:
4448
RUSTFLAGS: "-C debuginfo=1"

.github/workflows/tsbs.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ jobs:
4646
- name: Setup Build Environment
4747
run: |
4848
sudo apt update
49-
sudo apt install --yes protobuf-compiler
49+
sudo apt install --yes protobuf-compiler liblzma-dev
5050
- name: Build server
5151
run: |
5252
make build

Cargo.lock

+4-4
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

+2
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,8 @@ members = [
8484
"src/wal"
8585
]
8686

87+
default-members = ["src/horaedb"]
88+
8789
[workspace.dependencies]
8890
alloc_tracker = { path = "src/components/alloc_tracker" }
8991
arrow = { version = "49.0.0", features = ["prettyprint"] }

docs/example-cluster-0.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ type = "Local"
3737
data_dir = "/tmp/horaedb0"
3838

3939
[analytic.wal]
40-
type = "RocksDB"
40+
type = "Local"
4141
data_dir = "/tmp/horaedb0"
4242

4343
[cluster_deployment]

docs/example-cluster-1.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ type = "Local"
3838
data_dir = "/tmp/horaedb1"
3939

4040
[analytic.wal]
41-
type = "RocksDB"
41+
type = "Local"
4242
data_dir = "/tmp/horaedb1"
4343

4444
[cluster_deployment]

docs/example-standalone-static-routing.toml

+1-2
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ max_replay_tables_per_batch = 1024
3636
write_group_command_channel_cap = 1024
3737

3838
[analytic.wal]
39-
type = "RocksDB"
39+
type = "Local"
4040
data_dir = "/tmp/horaedb1"
4141

4242
[analytic.storage]
@@ -91,4 +91,3 @@ shards = [ 1 ]
9191
[limiter]
9292
write_block_list = ['mytable1']
9393
read_block_list = ['mytable1']
94-

docs/minimal.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ type = "Local"
3232
data_dir = "/tmp/horaedb"
3333

3434
[analytic.wal]
35-
type = "RocksDB"
35+
type = "Local"
3636
data_dir = "/tmp/horaedb"
3737

3838
[analytic]

integration_tests/Makefile

+1-1
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ build-meta:
5454
./build_meta.sh
5555

5656
build-horaedb:
57-
cd .. && cargo build --bin horaedb-server --features wal-table-kv,wal-message-queue,wal-rocksdb,wal-local-storage
57+
cd .. && make build-debug
5858

5959
build-test:
6060
cargo build

integration_tests/cases/env/cluster/ddl/partition_table.result

+4-2
Original file line numberDiff line numberDiff line change
@@ -100,21 +100,23 @@ UInt64(16367588166920223437),Timestamp(1651737067000),String("horaedb9"),Int32(0
100100
-- SQLNESS REPLACE compute=\d+.?\d*(µ|m|n) compute=xx
101101
-- SQLNESS REPLACE time=\d+.?\d*(µ|m|n) time=xx
102102
-- SQLNESS REPLACE metrics=\[.*?s\] metrics=xx
103+
-- SQLNESS REPLACE scan_memtable_\d+ scan_memtable_n
103104
EXPLAIN ANALYZE SELECT * from partition_table_t where name = "ceresdb0";
104105

105106
plan_type,plan,
106-
String("Plan with Metrics"),String("ResolvedPartitionedScan: pushdown_continue:false, partition_count:1, metrics=xx\n ScanTable: table=__partition_table_t_1, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[name = Utf8(\"ceresdb0\")], time_range:TimeRange { inclusive_start: Timestamp(-9223372036854775808), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n do_merge_sort=true\n iter_num=1\n merge_iter_0:\n init_duration=xxs\n num_memtables=0\n num_ssts=0\n scan_count=1\n scan_duration=xxs\n times_fetch_row_from_multiple=0\n times_fetch_rows_from_one=0\n total_rows_fetch_from_one=0\n scan_memtable_1, fetched_columns:[tsid,t,name,id,value]:\n=0]\n=0]\n"),
107+
String("Plan with Metrics"),String("ResolvedPartitionedScan: pushdown_continue:false, partition_count:1, metrics=xx\n ScanTable: table=__partition_table_t_1, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[name = Utf8(\"ceresdb0\")], time_range:TimeRange { inclusive_start: Timestamp(-9223372036854775808), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n do_merge_sort=true\n iter_num=1\n merge_iter_0:\n init_duration=xxs\n num_memtables=0\n num_ssts=0\n scan_count=1\n scan_duration=xxs\n times_fetch_row_from_multiple=0\n times_fetch_rows_from_one=0\n total_rows_fetch_from_one=0\n scan_memtable_n, fetched_columns:[tsid,t,name,id,value]:\n=0]\n=0]\n"),
107108

108109

109110
-- SQLNESS REPLACE duration=\d+.?\d*(µ|m|n) duration=xx
110111
-- SQLNESS REPLACE compute=\d+.?\d*(µ|m|n) compute=xx
111112
-- SQLNESS REPLACE __partition_table_t_\d __partition_table_t_x
112113
-- SQLNESS REPLACE time=\d+.?\d*(µ|m|n) time=xx
113114
-- SQLNESS REPLACE metrics=\[.*?s\] metrics=xx
115+
-- SQLNESS REPLACE scan_memtable_\d+ scan_memtable_n
114116
EXPLAIN ANALYZE SELECT * from partition_table_t where name in ("ceresdb0", "ceresdb1", "ceresdb2", "ceresdb3", "ceresdb4");
115117

116118
plan_type,plan,
117-
String("Plan with Metrics"),String("ResolvedPartitionedScan: pushdown_continue:false, partition_count:3, metrics=xx\n ScanTable: table=__partition_table_t_x, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=xx\n ScanTable: table=__partition_table_t_x, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=xx\n ScanTable: table=__partition_table_t_x, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[name IN ([Utf8(\"ceresdb0\"), Utf8(\"ceresdb1\"), Utf8(\"ceresdb2\"), Utf8(\"ceresdb3\"), Utf8(\"ceresdb4\")])], time_range:TimeRange { inclusive_start: Timestamp(-9223372036854775808), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n do_merge_sort=true\n iter_num=1\n merge_iter_0:\n init_duration=xxs\n num_memtables=0\n num_ssts=0\n scan_count=1\n scan_duration=xxs\n times_fetch_row_from_multiple=0\n times_fetch_rows_from_one=0\n total_rows_fetch_from_one=0\n scan_memtable_1, fetched_columns:[tsid,t,name,id,value]:\n=0]\n=0]\n"),
119+
String("Plan with Metrics"),String("ResolvedPartitionedScan: pushdown_continue:false, partition_count:3, metrics=xx\n ScanTable: table=__partition_table_t_x, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=xx\n ScanTable: table=__partition_table_t_x, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=xx\n ScanTable: table=__partition_table_t_x, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[name IN ([Utf8(\"ceresdb0\"), Utf8(\"ceresdb1\"), Utf8(\"ceresdb2\"), Utf8(\"ceresdb3\"), Utf8(\"ceresdb4\")])], time_range:TimeRange { inclusive_start: Timestamp(-9223372036854775808), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n do_merge_sort=true\n iter_num=1\n merge_iter_0:\n init_duration=xxs\n num_memtables=0\n num_ssts=0\n scan_count=1\n scan_duration=xxs\n times_fetch_row_from_multiple=0\n times_fetch_rows_from_one=0\n total_rows_fetch_from_one=0\n scan_memtable_n, fetched_columns:[tsid,t,name,id,value]:\n=0]\n=0]\n"),
118120

119121

120122
ALTER TABLE partition_table_t ADD COLUMN (b string);

integration_tests/cases/env/cluster/ddl/partition_table.sql

+2
Original file line numberDiff line numberDiff line change
@@ -58,13 +58,15 @@ SELECT * from partition_table_t where name in ("horaedb5", "horaedb6", "horaedb7
5858
-- SQLNESS REPLACE compute=\d+.?\d*(µ|m|n) compute=xx
5959
-- SQLNESS REPLACE time=\d+.?\d*(µ|m|n) time=xx
6060
-- SQLNESS REPLACE metrics=\[.*?s\] metrics=xx
61+
-- SQLNESS REPLACE scan_memtable_\d+ scan_memtable_n
6162
EXPLAIN ANALYZE SELECT * from partition_table_t where name = "ceresdb0";
6263

6364
-- SQLNESS REPLACE duration=\d+.?\d*(µ|m|n) duration=xx
6465
-- SQLNESS REPLACE compute=\d+.?\d*(µ|m|n) compute=xx
6566
-- SQLNESS REPLACE __partition_table_t_\d __partition_table_t_x
6667
-- SQLNESS REPLACE time=\d+.?\d*(µ|m|n) time=xx
6768
-- SQLNESS REPLACE metrics=\[.*?s\] metrics=xx
69+
-- SQLNESS REPLACE scan_memtable_\d+ scan_memtable_n
6870
EXPLAIN ANALYZE SELECT * from partition_table_t where name in ("ceresdb0", "ceresdb1", "ceresdb2", "ceresdb3", "ceresdb4");
6971

7072
ALTER TABLE partition_table_t ADD COLUMN (b string);

integration_tests/cases/env/local/ddl/query-plan.result

+4-4
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ explain analyze select t from `03_dml_select_real_time_range`
5050
where t > 1695348001000;
5151

5252
plan_type,plan,
53-
String("Plan with Metrics"),String("ScanTable: table=03_dml_select_real_time_range, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[t > TimestampMillisecond(1695348001000, None)], time_range:TimeRange { inclusive_start: Timestamp(1695348001001), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n do_merge_sort=true\n iter_num=1\n merge_iter_0:\n init_duration=xxs\n num_memtables=1\n num_ssts=0\n scan_count=2\n scan_duration=xxs\n times_fetch_row_from_multiple=0\n times_fetch_rows_from_one=1\n total_rows_fetch_from_one=1\n scan_memtable_1, fetched_columns:[tsid,t]:\n=0]\n"),
53+
String("Plan with Metrics"),String("ScanTable: table=03_dml_select_real_time_range, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[t > TimestampMillisecond(1695348001000, None)], time_range:TimeRange { inclusive_start: Timestamp(1695348001001), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n do_merge_sort=true\n iter_num=1\n merge_iter_0:\n init_duration=xxs\n num_memtables=1\n num_ssts=0\n scan_count=2\n scan_duration=xxs\n times_fetch_row_from_multiple=0\n times_fetch_rows_from_one=1\n total_rows_fetch_from_one=1\n scan_memtable_164, fetched_columns:[tsid,t]:\n=0]\n"),
5454

5555

5656
-- This query should have higher priority
@@ -60,7 +60,7 @@ explain analyze select t from `03_dml_select_real_time_range`
6060
where t >= 1695348001000 and t < 1695348002000;
6161

6262
plan_type,plan,
63-
String("Plan with Metrics"),String("ScanTable: table=03_dml_select_real_time_range, parallelism=8, priority=High, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[t >= TimestampMillisecond(1695348001000, None), t < TimestampMillisecond(1695348002000, None)], time_range:TimeRange { inclusive_start: Timestamp(1695348001000), exclusive_end: Timestamp(1695348002000) } }\nscan_table:\n do_merge_sort=true\n iter_num=1\n merge_iter_0:\n init_duration=xxs\n num_memtables=1\n num_ssts=0\n scan_count=2\n scan_duration=xxs\n times_fetch_row_from_multiple=0\n times_fetch_rows_from_one=1\n total_rows_fetch_from_one=1\n scan_memtable_1, fetched_columns:[tsid,t]:\n=0]\n"),
63+
String("Plan with Metrics"),String("ScanTable: table=03_dml_select_real_time_range, parallelism=8, priority=High, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[t >= TimestampMillisecond(1695348001000, None), t < TimestampMillisecond(1695348002000, None)], time_range:TimeRange { inclusive_start: Timestamp(1695348001000), exclusive_end: Timestamp(1695348002000) } }\nscan_table:\n do_merge_sort=true\n iter_num=1\n merge_iter_0:\n init_duration=xxs\n num_memtables=1\n num_ssts=0\n scan_count=2\n scan_duration=xxs\n times_fetch_row_from_multiple=0\n times_fetch_rows_from_one=1\n total_rows_fetch_from_one=1\n scan_memtable_164, fetched_columns:[tsid,t]:\n=0]\n"),
6464

6565

6666
-- This query should have higher priority
@@ -70,7 +70,7 @@ explain analyze select name from `03_dml_select_real_time_range`
7070
where t >= 1695348001000 and t < 1695348002000;
7171

7272
plan_type,plan,
73-
String("Plan with Metrics"),String("ProjectionExec: expr=[name@0 as name], metrics=xx\n ScanTable: table=03_dml_select_real_time_range, parallelism=8, priority=High, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[t >= TimestampMillisecond(1695348001000, None), t < TimestampMillisecond(1695348002000, None)], time_range:TimeRange { inclusive_start: Timestamp(1695348001000), exclusive_end: Timestamp(1695348002000) } }\nscan_table:\n do_merge_sort=true\n iter_num=1\n merge_iter_0:\n init_duration=xxs\n num_memtables=1\n num_ssts=0\n scan_count=2\n scan_duration=xxs\n times_fetch_row_from_multiple=0\n times_fetch_rows_from_one=1\n total_rows_fetch_from_one=1\n scan_memtable_1, fetched_columns:[tsid,t,name]:\n=0]\n"),
73+
String("Plan with Metrics"),String("ProjectionExec: expr=[name@0 as name], metrics=xx\n ScanTable: table=03_dml_select_real_time_range, parallelism=8, priority=High, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[t >= TimestampMillisecond(1695348001000, None), t < TimestampMillisecond(1695348002000, None)], time_range:TimeRange { inclusive_start: Timestamp(1695348001000), exclusive_end: Timestamp(1695348002000) } }\nscan_table:\n do_merge_sort=true\n iter_num=1\n merge_iter_0:\n init_duration=xxs\n num_memtables=1\n num_ssts=0\n scan_count=2\n scan_duration=xxs\n times_fetch_row_from_multiple=0\n times_fetch_rows_from_one=1\n total_rows_fetch_from_one=1\n scan_memtable_164, fetched_columns:[tsid,t,name]:\n=0]\n"),
7474

7575

7676
-- This query should not include memtable
@@ -135,7 +135,7 @@ explain analyze select t from `03_append_mode_table`
135135
where t >= 1695348001000 and name = 'ceresdb';
136136

137137
plan_type,plan,
138-
String("Plan with Metrics"),String("ProjectionExec: expr=[t@0 as t], metrics=xx\n ScanTable: table=03_append_mode_table, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[t >= TimestampMillisecond(1695348001000, None), name = Utf8(\"ceresdb\")], time_range:TimeRange { inclusive_start: Timestamp(1695348001000), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n do_merge_sort=false\n chain_iter_0:\n num_memtables=1\n num_ssts=0\n scan_duration=xxs\n since_create=xxs\n since_init=xxs\n total_batch_fetched=1\n total_rows_fetched=2\n scan_memtable_1, fetched_columns:[t,name]:\n=0]\n"),
138+
String("Plan with Metrics"),String("ProjectionExec: expr=[t@0 as t], metrics=xx\n ScanTable: table=03_append_mode_table, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[t >= TimestampMillisecond(1695348001000, None), name = Utf8(\"ceresdb\")], time_range:TimeRange { inclusive_start: Timestamp(1695348001000), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n do_merge_sort=false\n chain_iter_0:\n num_memtables=1\n num_ssts=0\n scan_duration=xxs\n since_create=xxs\n since_init=xxs\n total_batch_fetched=1\n total_rows_fetched=2\n scan_memtable_166, fetched_columns:[t,name]:\n=0]\n"),
139139

140140

141141
-- Should just fetch projected columns from SST

integration_tests/config/horaedb-cluster-0.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ type = "Local"
3737
data_dir = "/tmp/horaedb0"
3838

3939
[analytic.wal]
40-
type = "RocksDB"
40+
type = "Local"
4141
data_dir = "/tmp/horaedb0"
4242

4343
[cluster_deployment]

integration_tests/config/horaedb-cluster-1.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ type = "Local"
3838
data_dir = "/tmp/horaedb1"
3939

4040
[analytic.wal]
41-
type = "RocksDB"
41+
type = "Local"
4242
data_dir = "/tmp/horaedb1"
4343

4444
[cluster_deployment]

integration_tests/config/shard-based-recovery.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -34,5 +34,5 @@ type = "Local"
3434
data_dir = "/tmp/horaedb"
3535

3636
[analytic.wal]
37-
type = "RocksDB"
37+
type = "Local"
3838
data_dir = "/tmp/horaedb"

src/horaedb/Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ workspace = true
3131
workspace = true
3232

3333
[features]
34-
default = ["wal-rocksdb", "wal-table-kv", "wal-message-queue", "wal-local-storage"]
34+
default = ["wal-table-kv", "wal-message-queue", "wal-local-storage"]
3535
wal-table-kv = ["wal/wal-table-kv", "analytic_engine/wal-table-kv"]
3636
wal-message-queue = ["wal/wal-message-queue", "analytic_engine/wal-message-queue"]
3737
wal-rocksdb = ["wal/wal-rocksdb", "analytic_engine/wal-rocksdb"]

src/wal/Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ workspace = true
3232

3333
[dependencies.rocksdb]
3434
git = "https://github.com/tikv/rust-rocksdb.git"
35-
rev = "f04f4dd8eacc30e67c24bc2529a6d9c6edb85f8f"
35+
rev = "85e79e52c6ad80b8c547fcb90b3cade64f141fac"
3636
features = ["portable"]
3737
optional = true
3838

0 commit comments

Comments
 (0)