Skip to content

Commit cc5b915

Browse files
authored
feat: pipeline-based sample building (#25)
1 parent a7db0a9 commit cc5b915

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

62 files changed

+1072
-1233
lines changed

.github/workflows/ci.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ jobs:
4242
only: ytt
4343

4444
- name: setup uv
45-
uses: astral-sh/setup-uv@v3
45+
uses: astral-sh/setup-uv@v4
4646
with:
4747
version: "latest"
4848
enable-cache: true

.github/workflows/release.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ jobs:
3333
fetch-depth: 0
3434

3535
- name: build and inspect
36-
uses: hynek/build-and-inspect-python-package@v2.9.0
36+
uses: hynek/build-and-inspect-python-package@v2
3737
with:
3838
attest-build-provenance-github: "true"
3939

.pre-commit-config.yaml

+2-2
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,14 @@ repos:
1111
- id: pyupgrade
1212

1313
- repo: https://github.com/astral-sh/ruff-pre-commit
14-
rev: v0.7.4
14+
rev: v0.8.0
1515
hooks:
1616
- id: ruff
1717
args: [--fix]
1818
- id: ruff-format
1919

2020
- repo: https://github.com/DetachHead/basedpyright-pre-commit-mirror
21-
rev: 1.21.1
21+
rev: 1.22.0
2222
hooks:
2323
- id: basedpyright
2424

config/_templates/dataset/carla.yaml

+57-31
Original file line numberDiff line numberDiff line change
@@ -75,39 +75,65 @@ inputs:
7575

7676
#@ end
7777

78-
table_builder:
79-
_target_: rbyte.io.table.TableBuilder
80-
_convert_: all
81-
readers:
82-
ego_logs:
83-
path: ${data_dir}/(@=input_id@)/ego_logs.json
84-
reader:
85-
_target_: rbyte.io.JsonTableReader
86-
_recursive_: false
87-
fields:
88-
records:
89-
_idx_:
90-
control.brake:
91-
control.throttle:
92-
control.steer:
93-
state.velocity.value:
94-
state.acceleration.value:
78+
samples:
79+
pipeline:
80+
_target_: pipefunc.Pipeline
81+
validate_type_annotations: false
82+
functions:
83+
- _target_: pipefunc.PipeFunc
84+
bound:
85+
path: ${data_dir}/(@=input_id@)/ego_logs.json
86+
output_name: ego_logs
87+
func:
88+
_target_: rbyte.io.JsonDataFrameBuilder
89+
fields:
90+
records:
91+
control.brake:
92+
control.throttle:
93+
control.steer:
94+
state.velocity.value:
95+
state.acceleration.value:
9596

96-
transforms:
97-
- _target_: rbyte.io.FpsResampler
98-
source_fps: 20
99-
target_fps: 30
97+
- _target_: pipefunc.PipeFunc
98+
renames:
99+
input: ego_logs
100+
output_name: data
101+
func:
102+
_target_: rbyte.io.DataFrameConcater
103+
method: vertical
100104

101-
merger:
102-
_target_: rbyte.io.TableConcater
103-
method: vertical
105+
- _target_: pipefunc.PipeFunc
106+
renames:
107+
input: data
108+
output_name: data_resampled
109+
func:
110+
_target_: rbyte.io.DataFrameFpsResampler
111+
fps_in: 20
112+
fps_out: 30
104113

105-
filter: |
106-
`control.throttle` > 0.5
114+
- _target_: pipefunc.PipeFunc
115+
renames:
116+
input: data_resampled
117+
output_name: data_indexed
118+
func:
119+
_target_: rbyte.io.DataFrameIndexer
120+
name: _idx_
107121

108-
#@ end
122+
- _target_: pipefunc.PipeFunc
123+
renames:
124+
input: data_indexed
125+
output_name: data_filtered
126+
func:
127+
_target_: rbyte.io.DataFrameFilter
128+
predicate: |
129+
`control.throttle` > 0.5
109130
110-
sample_builder:
111-
_target_: rbyte.RollingWindowSampleBuilder
112-
index_column: _idx_
113-
period: 1i
131+
- _target_: pipefunc.PipeFunc
132+
renames:
133+
input: data_filtered
134+
output_name: samples
135+
func:
136+
_target_: rbyte.RollingWindowSampleBuilder
137+
index_column: _idx_
138+
period: 1i
139+
#@ end

config/_templates/dataset/mimicgen.yaml

+40-22
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@
1212
#@ ]
1313
---
1414
_target_: rbyte.Dataset
15-
_convert_: all
1615
_recursive_: false
16+
_convert_: all
1717
inputs:
1818
#@ for input_id, input_keys in inputs.items():
1919
#@ for input_key in input_keys:
@@ -28,27 +28,45 @@ inputs:
2828
key: (@=input_key@)/(@=frame_key@)
2929
#@ end
3030

31-
table_builder:
32-
_target_: rbyte.io.TableBuilder
33-
_convert_: all
34-
readers:
35-
hdf5:
36-
path: "${data_dir}/(@=input_id@).hdf5"
37-
reader:
38-
_target_: rbyte.io.Hdf5TableReader
39-
_recursive_: false
40-
fields:
41-
(@=input_key@):
42-
_idx_:
43-
obs/robot0_eef_pos:
31+
samples:
32+
pipeline:
33+
_target_: pipefunc.Pipeline
34+
validate_type_annotations: false
35+
functions:
36+
- _target_: pipefunc.PipeFunc
37+
bound:
38+
path: "${data_dir}/(@=input_id@).hdf5"
39+
output_name: data
40+
func:
41+
_target_: rbyte.io.Hdf5DataFrameBuilder
42+
fields:
43+
(@=input_key@):
44+
obs/robot0_eef_pos:
45+
46+
- _target_: pipefunc.PipeFunc
47+
renames:
48+
input: data
49+
output_name: data_indexed
50+
func:
51+
_target_: rbyte.io.DataFrameIndexer
52+
name: _idx_
53+
54+
- _target_: pipefunc.PipeFunc
55+
renames:
56+
input: data_indexed
57+
output_name: data_concated
58+
func:
59+
_target_: rbyte.io.DataFrameConcater
60+
method: vertical
61+
62+
- _target_: pipefunc.PipeFunc
63+
renames:
64+
input: data_concated
65+
output_name: samples
66+
func:
67+
_target_: rbyte.RollingWindowSampleBuilder
68+
index_column: _idx_
69+
period: 1i
4470

45-
merger:
46-
_target_: rbyte.io.TableConcater
47-
method: vertical
4871
#@ end
4972
#@ end
50-
51-
sample_builder:
52-
_target_: rbyte.RollingWindowSampleBuilder
53-
index_column: _idx_
54-
period: 1i

config/_templates/dataset/nuscenes/mcap.yaml

+76-57
Original file line numberDiff line numberDiff line change
@@ -11,15 +11,15 @@
1111
#@ }
1212
---
1313
_target_: rbyte.Dataset
14-
_convert_: all
1514
_recursive_: false
15+
_convert_: all
1616
inputs:
1717
#@ for input_id in inputs:
1818
(@=input_id@):
1919
sources:
2020
#@ for camera, topic in camera_topics.items():
2121
(@=camera@):
22-
index_column: mcap/(@=topic@)/_idx_
22+
index_column: (@=topic@)/_idx_
2323
source:
2424
_target_: rbyte.io.McapTensorSource
2525
path: "${data_dir}/(@=input_id@).mcap"
@@ -33,66 +33,85 @@ inputs:
3333
fastupsample: true
3434
#@ end
3535

36-
table_builder:
37-
_target_: rbyte.io.TableBuilder
38-
_convert_: all
39-
readers:
40-
mcap:
41-
path: "${data_dir}/(@=input_id@).mcap"
42-
reader:
43-
_target_: rbyte.io.McapTableReader
44-
_recursive_: false
45-
decoder_factories:
46-
- rbyte.utils.mcap.ProtobufDecoderFactory
47-
- rbyte.utils.mcap.JsonDecoderFactory
36+
samples:
37+
pipeline:
38+
_target_: pipefunc.Pipeline
39+
validate_type_annotations: false
40+
functions:
41+
- _target_: pipefunc.PipeFunc
42+
bound:
43+
path: ${data_dir}/(@=input_id@).mcap
44+
output_name: data
45+
func:
46+
_target_: rbyte.io.McapDataFrameBuilder
47+
decoder_factories:
48+
- rbyte.utils._mcap.ProtobufDecoderFactory
49+
- rbyte.utils._mcap.JsonDecoderFactory
50+
fields:
51+
#@ for topic in camera_topics.values():
52+
(@=topic@):
53+
log_time:
54+
_target_: polars.Datetime
55+
time_unit: ns
56+
#@ end
4857

49-
fields:
50-
#@ for topic in camera_topics.values():
51-
(@=topic@):
52-
_idx_:
53-
log_time:
54-
_target_: polars.Datetime
55-
time_unit: ns
56-
#@ end
58+
/odom:
59+
log_time:
60+
_target_: polars.Datetime
61+
time_unit: ns
62+
vel.x:
5763

58-
/odom:
59-
log_time:
60-
_target_: polars.Datetime
61-
time_unit: ns
62-
vel.x:
64+
- _target_: pipefunc.PipeFunc
65+
renames:
66+
input: data
67+
output_name: data_indexed
68+
func:
69+
_target_: rbyte.io.DataFrameIndexer
70+
name: _idx_
6371

64-
merger:
65-
_target_: rbyte.io.TableAligner
66-
separator: "/"
67-
merge:
68-
mcap:
69-
#@ topic = camera_topics.values()[0]
70-
(@=topic@):
71-
key: log_time
72+
- _target_: pipefunc.PipeFunc
73+
renames:
74+
input: data_indexed
75+
output_name: data_aligned
76+
func:
77+
_target_: rbyte.io.DataFrameAligner
78+
separator: /
79+
fields:
80+
#@ topic = camera_topics.values()[0]
81+
(@=topic@):
82+
key: log_time
7283

73-
#@ for topic in camera_topics.values()[1:]:
74-
(@=topic@):
75-
key: log_time
76-
columns:
77-
_idx_:
78-
method: asof
79-
tolerance: 40ms
80-
strategy: nearest
81-
#@ end
84+
#@ for topic in camera_topics.values()[1:]:
85+
(@=topic@):
86+
key: log_time
87+
columns:
88+
_idx_:
89+
method: asof
90+
tolerance: 40ms
91+
strategy: nearest
92+
#@ end
8293

83-
/odom:
84-
key: log_time
85-
columns:
86-
vel.x:
87-
method: interp
94+
/odom:
95+
key: log_time
96+
columns:
97+
vel.x:
98+
method: interp
8899

89-
filter: |
90-
`mcap//odom/vel.x` >= 8
100+
- _target_: pipefunc.PipeFunc
101+
renames:
102+
input: data_aligned
103+
output_name: data_filtered
104+
func:
105+
_target_: rbyte.io.DataFrameFilter
106+
predicate: |
107+
`/odom/vel.x` >= 8
91108
92-
cache:
109+
- _target_: pipefunc.PipeFunc
110+
renames:
111+
input: data_filtered
112+
output_name: samples
113+
func:
114+
_target_: rbyte.RollingWindowSampleBuilder
115+
index_column: (@=camera_topics.values()[0]@)/_idx_
116+
period: 1i
93117
#@ end
94-
95-
sample_builder:
96-
_target_: rbyte.RollingWindowSampleBuilder
97-
index_column: mcap/(@=camera_topics.values()[0]@)/_idx_
98-
period: 1i

0 commit comments

Comments
 (0)