Skip to content

Commit 304b5ef

Browse files
authored
feat: add pipefunc cache example (#26)
1 parent cc5b915 commit 304b5ef

File tree

6 files changed

+69
-48
lines changed

6 files changed

+69
-48
lines changed

config/_templates/dataset/yaak.yaml

+49-40
Original file line numberDiff line numberDiff line change
@@ -30,50 +30,23 @@ inputs:
3030
pipeline:
3131
_target_: pipefunc.Pipeline
3232
validate_type_annotations: false
33+
cache_type: disk
34+
cache_kwargs:
35+
cache_dir: /tmp/rbyte-cache
3336
functions:
3437
- _target_: pipefunc.PipeFunc
35-
bound:
36-
path: ${data_dir}/(@=input_id@)/metadata.log
37-
output_name: meta_data
38+
scope: meta
39+
output_name: data
40+
cache: true
3841
func:
39-
_target_: rbyte.io.YaakMetadataDataFrameBuilder
40-
fields:
41-
rbyte.io.yaak.proto.sensor_pb2.ImageMetadata:
42-
time_stamp:
43-
_target_: polars.Datetime
44-
time_unit: ns
45-
46-
frame_idx:
47-
_target_: polars.Int32
48-
49-
camera_name:
50-
_target_: polars.Enum
51-
categories:
52-
- cam_front_center
53-
- cam_front_left
54-
- cam_front_right
55-
- cam_left_forward
56-
- cam_right_forward
57-
- cam_left_backward
58-
- cam_right_backward
59-
- cam_rear
60-
61-
rbyte.io.yaak.proto.can_pb2.VehicleMotion:
62-
time_stamp:
63-
_target_: polars.Datetime
64-
time_unit: ns
65-
66-
speed:
67-
_target_: polars.Float32
68-
69-
gear:
70-
_target_: polars.Enum
71-
categories: ["0", "1", "2", "3"]
42+
_target_: hydra.utils.get_method
43+
path: rbyte.io.build_yaak_metadata_dataframe
7244

7345
- _target_: pipefunc.PipeFunc
46+
scope: mcap
7447
bound:
7548
path: ${data_dir}/(@=input_id@)/ai.mcap
76-
output_name: mcap_data
49+
output_name: data
7750
func:
7851
_target_: rbyte.io.McapDataFrameBuilder
7952
decoder_factories: [rbyte.utils._mcap.ProtobufDecoderFactory]
@@ -94,8 +67,8 @@ inputs:
9467
k0: meta
9568
k1: mcap
9669
renames:
97-
v0: meta_data
98-
v1: mcap_data
70+
v0: meta.data
71+
v1: mcap.data
9972
output_name: data
10073

10174
- _target_: pipefunc.PipeFunc
@@ -150,7 +123,7 @@ inputs:
150123
func:
151124
_target_: rbyte.io.DataFrameFilter
152125
predicate: |
153-
`meta/VehicleMotion/gear` == '3'
126+
`meta/VehicleMotion/speed` > 44
154127
155128
- _target_: pipefunc.PipeFunc
156129
renames:
@@ -170,4 +143,40 @@ inputs:
170143
_target_: rbyte.io.DataFrameFilter
171144
predicate: |
172145
array_length(`meta/ImageMetadata.(@=cameras[0]@)/time_stamp`) == 6
146+
147+
kwargs:
148+
meta:
149+
path: ${data_dir}/(@=input_id@)/metadata.log
150+
fields:
151+
rbyte.io.yaak.proto.sensor_pb2.ImageMetadata:
152+
time_stamp:
153+
_target_: polars.Datetime
154+
time_unit: ns
155+
156+
frame_idx:
157+
_target_: polars.Int32
158+
159+
camera_name:
160+
_target_: polars.Enum
161+
categories:
162+
- cam_front_center
163+
- cam_front_left
164+
- cam_front_right
165+
- cam_left_forward
166+
- cam_right_forward
167+
- cam_left_backward
168+
- cam_right_backward
169+
- cam_rear
170+
171+
rbyte.io.yaak.proto.can_pb2.VehicleMotion:
172+
time_stamp:
173+
_target_: polars.Datetime
174+
time_unit: ns
175+
176+
speed:
177+
_target_: polars.Float32
178+
179+
gear:
180+
_target_: polars.Enum
181+
categories: ["0", "1", "2", "3"]
173182
#@ end

pyproject.toml

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "rbyte"
3-
version = "0.9.0"
3+
version = "0.9.1"
44
description = "Multimodal PyTorch dataset library"
55
authors = [{ name = "Evgenii Gorchakov", email = "[email protected]" }]
66
maintainers = [{ name = "Evgenii Gorchakov", email = "[email protected]" }]
@@ -20,7 +20,7 @@ dependencies = [
2020
"structlog>=24.4.0",
2121
"xxhash>=3.5.0",
2222
"tqdm>=4.66.5",
23-
"pipefunc>=0.40.2",
23+
"pipefunc>=0.41.0",
2424
]
2525
readme = "README.md"
2626
requires-python = ">=3.12,<3.13"

src/rbyte/dataset.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
import polars as pl
77
import torch
8+
from hydra.utils import instantiate
89
from pipefunc import Pipeline
910
from pydantic import Field, StringConstraints, validate_call
1011
from structlog import get_logger
@@ -69,9 +70,10 @@ def __init__(
6970
output_name = (
7071
samples_cfg.output_name or pipeline.unique_leaf_node.output_name # pyright: ignore[reportUnknownMemberType]
7172
)
72-
samples[input_id] = pipeline.run(
73-
output_name=output_name, kwargs=samples_cfg.kwargs
73+
kwargs = instantiate(
74+
samples_cfg.kwargs, _recursive_=True, _convert_="all"
7475
)
76+
samples[input_id] = pipeline.run(output_name=output_name, kwargs=kwargs)
7577
logger.debug(
7678
"built samples",
7779
columns=samples[input_id].columns,

src/rbyte/io/__init__.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,8 @@
5050
__all__ += ["FfmpegFrameSource"]
5151

5252
try:
53-
from .yaak import YaakMetadataDataFrameBuilder
53+
from .yaak import YaakMetadataDataFrameBuilder, build_yaak_metadata_dataframe
5454
except ImportError:
5555
pass
5656
else:
57-
__all__ += ["YaakMetadataDataFrameBuilder"]
57+
__all__ += ["YaakMetadataDataFrameBuilder", "build_yaak_metadata_dataframe"]

src/rbyte/io/yaak/__init__.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1-
from .dataframe_builder import YaakMetadataDataFrameBuilder
1+
from .dataframe_builder import (
2+
YaakMetadataDataFrameBuilder,
3+
build_yaak_metadata_dataframe,
4+
)
25

3-
__all__ = ["YaakMetadataDataFrameBuilder"]
6+
__all__ = ["YaakMetadataDataFrameBuilder", "build_yaak_metadata_dataframe"]

src/rbyte/io/yaak/dataframe_builder.py

+7
Original file line numberDiff line numberDiff line change
@@ -77,3 +77,10 @@ def __call__(self, path: PathLike[str]) -> Mapping[str, pl.DataFrame]:
7777
}
7878

7979
return dfs
80+
81+
82+
# exposing all kwargs so its cacheable by pipefunc
83+
def build_yaak_metadata_dataframe(
84+
*, path: PathLike[str], fields: Fields
85+
) -> Mapping[str, pl.DataFrame]:
86+
return YaakMetadataDataFrameBuilder(fields=fields)(path)

0 commit comments

Comments
 (0)