Skip to content

Commit 9585c74

Browse files
authored
feat: cacheable YaakMetadataDataFrameBuilder (#34)
1 parent ffd14dc commit 9585c74

18 files changed

+48
-79
lines changed

.pre-commit-config.yaml

+2-2
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,14 @@ repos:
1111
- id: pyupgrade
1212

1313
- repo: https://github.com/astral-sh/ruff-pre-commit
14-
rev: v0.9.2
14+
rev: v0.9.4
1515
hooks:
1616
- id: ruff
1717
args: [--fix]
1818
- id: ruff-format
1919

2020
- repo: https://github.com/DetachHead/basedpyright-pre-commit-mirror
21-
rev: 1.24.0
21+
rev: 1.26.0
2222
hooks:
2323
- id: basedpyright
2424

config/_templates/dataset/yaak.yaml

+34-34
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,40 @@ inputs:
3636
functions:
3737
- _target_: pipefunc.PipeFunc
3838
func:
39-
_target_: hydra.utils.get_method
40-
path: rbyte.io.build_yaak_metadata_dataframe
39+
_target_: rbyte.io.YaakMetadataDataFrameBuilder
40+
fields:
41+
rbyte.io.yaak.proto.sensor_pb2.ImageMetadata:
42+
time_stamp:
43+
_target_: polars.Datetime
44+
time_unit: ns
45+
46+
frame_idx:
47+
_target_: polars.Int32
48+
49+
camera_name:
50+
_target_: polars.Enum
51+
categories:
52+
- cam_front_center
53+
- cam_front_left
54+
- cam_front_right
55+
- cam_left_forward
56+
- cam_right_forward
57+
- cam_left_backward
58+
- cam_right_backward
59+
- cam_rear
60+
61+
rbyte.io.yaak.proto.can_pb2.VehicleMotion:
62+
time_stamp:
63+
_target_: polars.Datetime
64+
time_unit: ns
65+
66+
speed:
67+
_target_: polars.Float32
68+
69+
gear:
70+
_target_: polars.Enum
71+
categories: ["0", "1", "2", "3"]
72+
4173
output_name: output
4274
scope: metadata
4375
cache: true
@@ -172,36 +204,4 @@ inputs:
172204
kwargs:
173205
metadata:
174206
path: ${data_dir}/(@=input_id@)/metadata.log
175-
fields:
176-
rbyte.io.yaak.proto.sensor_pb2.ImageMetadata:
177-
time_stamp:
178-
_target_: polars.Datetime
179-
time_unit: ns
180-
181-
frame_idx:
182-
_target_: polars.Int32
183-
184-
camera_name:
185-
_target_: polars.Enum
186-
categories:
187-
- cam_front_center
188-
- cam_front_left
189-
- cam_front_right
190-
- cam_left_forward
191-
- cam_right_forward
192-
- cam_left_backward
193-
- cam_right_backward
194-
- cam_rear
195-
196-
rbyte.io.yaak.proto.can_pb2.VehicleMotion:
197-
time_stamp:
198-
_target_: polars.Datetime
199-
time_unit: ns
200-
201-
speed:
202-
_target_: polars.Float32
203-
204-
gear:
205-
_target_: polars.Enum
206-
categories: ["0", "1", "2", "3"]
207207
#@ end

pyproject.toml

+4-3
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
11
[project]
22
name = "rbyte"
3-
version = "0.11.0"
3+
version = "0.11.1"
44
description = "Multimodal PyTorch dataset library"
55
authors = [{ name = "Evgenii Gorchakov", email = "[email protected]" }]
66
maintainers = [{ name = "Evgenii Gorchakov", email = "[email protected]" }]
77
dependencies = [
88
"tensordict>=0.6.2",
99
"torch",
1010
"numpy",
11-
"polars>=1.18.0",
11+
"polars>=1.21.0",
1212
"pydantic>=2.10.2",
1313
"more-itertools>=10.5.0",
1414
"hydra-core>=1.3.2",
@@ -18,7 +18,8 @@ dependencies = [
1818
"parse>=1.20.2",
1919
"structlog>=24.4.0",
2020
"tqdm>=4.66.5",
21-
"pipefunc>=0.50.0",
21+
"pipefunc>=0.53.0",
22+
"xxhash>=3.5.0",
2223
]
2324
readme = "README.md"
2425
requires-python = ">=3.12,<3.13"

src/rbyte/io/__init__.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -57,8 +57,8 @@
5757
__all__ += ["VideoDataFrameBuilder"]
5858

5959
try:
60-
from .yaak import YaakMetadataDataFrameBuilder, build_yaak_metadata_dataframe
60+
from .yaak import YaakMetadataDataFrameBuilder
6161
except ImportError:
6262
pass
6363
else:
64-
__all__ += ["YaakMetadataDataFrameBuilder", "build_yaak_metadata_dataframe"]
64+
__all__ += ["YaakMetadataDataFrameBuilder"]

src/rbyte/io/_json/dataframe_builder.py

-2
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,6 @@
1919

2020
@final
2121
class JsonDataFrameBuilder:
22-
__name__ = __qualname__
23-
2422
@validate_call(config=ConfigDict(arbitrary_types_allowed=True))
2523
def __init__(self, fields: Fields) -> None:
2624
self._fields = fields

src/rbyte/io/_mcap/dataframe_builder.py

-2
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,6 @@ class SpecialField(StrEnum):
4545

4646
@final
4747
class McapDataFrameBuilder:
48-
__name__ = __qualname__
49-
5048
@validate_call(config=ConfigDict(arbitrary_types_allowed=True))
5149
def __init__(
5250
self,

src/rbyte/io/dataframe/aligner.py

-2
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,6 @@ class MergeConfig(BaseModel):
4545

4646
@final
4747
class DataFrameAligner:
48-
__name__ = __qualname__
49-
5048
@validate_call
5149
def __init__(self, *, fields: Fields, separator: str = "/") -> None:
5250
self._fields = fields

src/rbyte/io/dataframe/concater.py

-2
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,6 @@
88

99
@final
1010
class DataFrameConcater:
11-
__name__ = __qualname__
12-
1311
@validate_call
1412
def __init__(
1513
self, method: ConcatMethod = "horizontal", separator: str | None = None

src/rbyte/io/dataframe/filter.py

-2
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,6 @@
55

66
@final
77
class DataFrameFilter:
8-
__name__ = __qualname__
9-
108
def __init__(self, predicate: str) -> None:
119
self._query = f"select * from self where {predicate}" # noqa: S608
1210

src/rbyte/io/dataframe/fps_resampler.py

-2
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,6 @@
88

99
@final
1010
class DataFrameFpsResampler:
11-
__name__ = __qualname__
12-
1311
IDX_COL = uuid4().hex
1412

1513
@validate_call

src/rbyte/io/dataframe/indexer.py

-2
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,6 @@
88

99
@final
1010
class DataFrameIndexer:
11-
__name__ = __qualname__
12-
1311
@validate_call
1412
def __init__(self, name: str) -> None:
1513
self._fn = partial(pl.DataFrame.with_row_index, name=name)

src/rbyte/io/hdf5/dataframe_builder.py

-2
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,6 @@
1818

1919
@final
2020
class Hdf5DataFrameBuilder:
21-
__name__ = __qualname__
22-
2321
@validate_call(config=ConfigDict(arbitrary_types_allowed=True))
2422
def __init__(self, fields: Fields) -> None:
2523
self._fields = fields

src/rbyte/io/path/dataframe_builder.py

-2
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,6 @@
2222

2323
@final
2424
class PathDataFrameBuilder:
25-
__name__ = __qualname__
26-
2725
@validate_call(config=ConfigDict(arbitrary_types_allowed=True))
2826
def __init__(self, fields: Fields) -> None:
2927
self._fields = fields

src/rbyte/io/rrd/dataframe_builder.py

-2
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,6 @@ class Column(StrEnum):
1717

1818
@final
1919
class RrdDataFrameBuilder:
20-
__name__ = __qualname__
21-
2220
@validate_call
2321
def __init__(
2422
self, index: str, contents: Mapping[str, Sequence[str] | None]

src/rbyte/io/video/dataframe_builder.py

-2
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,6 @@
1818

1919
@final
2020
class VideoDataFrameBuilder:
21-
__name__ = __qualname__
22-
2321
@validate_call(config=ConfigDict(arbitrary_types_allowed=True))
2422
def __init__(self, fields: Fields) -> None:
2523
self._fields = fields

src/rbyte/io/yaak/__init__.py

+2-5
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,3 @@
1-
from .dataframe_builder import (
2-
YaakMetadataDataFrameBuilder,
3-
build_yaak_metadata_dataframe,
4-
)
1+
from .dataframe_builder import YaakMetadataDataFrameBuilder
52

6-
__all__ = ["YaakMetadataDataFrameBuilder", "build_yaak_metadata_dataframe"]
3+
__all__ = ["YaakMetadataDataFrameBuilder"]

src/rbyte/io/yaak/dataframe_builder.py

+4-9
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from pydantic import ConfigDict, ImportString, validate_call
1818
from structlog import get_logger
1919
from tqdm import tqdm
20+
from xxhash import xxh3_64_hexdigest as digest
2021

2122
from .message_iterator import YaakMetadataMessageIterator
2223
from .proto import sensor_pb2
@@ -31,14 +32,15 @@
3132

3233
@final
3334
class YaakMetadataDataFrameBuilder:
34-
__name__ = __qualname__
35-
3635
@validate_call(config=ConfigDict(arbitrary_types_allowed=True))
3736
def __init__(self, *, fields: Fields) -> None:
3837
super().__init__()
3938

4039
self._fields = fields
4140

41+
def __pipefunc_hash__(self) -> str: # noqa: PLW3201
42+
return digest(str(self._fields))
43+
4244
def __call__(self, path: PathLike[str]) -> Mapping[str, pl.DataFrame]:
4345
with Path(path).open("rb") as _f, mmap(_f.fileno(), 0, access=ACCESS_READ) as f:
4446
handler_pool = HandlerPool()
@@ -77,10 +79,3 @@ def __call__(self, path: PathLike[str]) -> Mapping[str, pl.DataFrame]:
7779
}
7880

7981
return dfs
80-
81-
82-
# exposing all kwargs so its cacheable by pipefunc
83-
def build_yaak_metadata_dataframe(
84-
*, path: PathLike[str], fields: Fields
85-
) -> Mapping[str, pl.DataFrame]:
86-
return YaakMetadataDataFrameBuilder(fields=fields)(path)

src/rbyte/sample/fixed_window.py

-2
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,6 @@ class FixedWindowSampleBuilder:
1616
https://docs.pola.rs/api/python/stable/reference/dataframe/api/polars.DataFrame.group_by_dynamic
1717
"""
1818

19-
__name__ = __qualname__
20-
2119
@validate_call
2220
def __init__( # noqa: PLR0913
2321
self,

0 commit comments

Comments
 (0)