Skip to content

Commit 8521c1e

Browse files
authored
Refactor time series management (#78)
* Refactor time series management
1 parent 47e823d commit 8521c1e

22 files changed

+751
-429
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -130,3 +130,4 @@ dmypy.json
130130
# pixi environments
131131
.pixi
132132

133+
.vscode

docs/how_tos/list_time_series.md

+42-51
Original file line numberDiff line numberDiff line change
@@ -3,61 +3,52 @@
33
Suppose that you have added multiple time series arrays to your components using differing
44
names and attributes. How can you see what is present?
55

6-
This example assumes that a system with two generators and time series data has been serialized
7-
to a file.
6+
This example uses a test module in the `infrasys` repository.
7+
8+
The call to `system.add_time_series` returns a key. You can store those keys yourself or look them
9+
up later with `system.list_time_series_keys`. Here's how to do it.
810

911
```python
10-
from infrasys import Component, System
11-
12-
system = System.from_json("system.json")
13-
for component in system.get_components(Component):
14-
for metadata in system.list_time_series_metadata(component):
15-
print(f"{component.label}: {metadata.label} {metadata.user_attributes}")
16-
17-
Generator.gen1: SingleTimeSeries.active_power {'scenario': 'high', 'model_year': '2030'}
18-
Generator.gen1: SingleTimeSeries.active_power {'scenario': 'high', 'model_year': '2035'}
19-
Generator.gen1: SingleTimeSeries.active_power {'scenario': 'low', 'model_year': '2030'}
20-
Generator.gen1: SingleTimeSeries.active_power {'scenario': 'low', 'model_year': '2035'}
21-
Generator.gen1: SingleTimeSeries.reactive_power {'scenario': 'high', 'model_year': '2030'}
22-
Generator.gen1: SingleTimeSeries.reactive_power {'scenario': 'high', 'model_year': '2035'}
23-
Generator.gen1: SingleTimeSeries.reactive_power {'scenario': 'low', 'model_year': '2030'}
24-
Generator.gen1: SingleTimeSeries.reactive_power {'scenario': 'low', 'model_year': '2035'}
25-
Generator.gen2: SingleTimeSeries.active_power {'scenario': 'high', 'model_year': '2030'}
26-
Generator.gen2: SingleTimeSeries.active_power {'scenario': 'high', 'model_year': '2035'}
27-
Generator.gen2: SingleTimeSeries.active_power {'scenario': 'low', 'model_year': '2030'}
28-
Generator.gen2: SingleTimeSeries.active_power {'scenario': 'low', 'model_year': '2035'}
29-
Generator.gen2: SingleTimeSeries.reactive_power {'scenario': 'high', 'model_year': '2030'}
30-
Generator.gen2: SingleTimeSeries.reactive_power {'scenario': 'high', 'model_year': '2035'}
31-
Generator.gen2: SingleTimeSeries.reactive_power {'scenario': 'low', 'model_year': '2030'}
32-
Generator.gen2: SingleTimeSeries.reactive_power {'scenario': 'low', 'model_year': '2035'}
12+
from datetime import datetime, timedelta
13+
14+
import numpy as np
15+
16+
from infrasys import SingleTimeSeries
17+
from tests.models.simple_system import SimpleSystem, SimpleGenerator, SimpleBus
18+
19+
system = SimpleSystem()
20+
bus = SimpleBus(name="test-bus", voltage=1.1)
21+
gen = SimpleGenerator(name="gen", active_power=1.0, rating=1.0, bus=bus, available=True)
22+
system.add_components(bus, gen)
23+
24+
length = 10
25+
initial_time = datetime(year=2020, month=1, day=1)
26+
timestamps = [initial_time + timedelta(hours=i) for i in range(length)]
27+
variable_name = "active_power"
28+
ts1 = SingleTimeSeries.from_time_array(np.random.rand(length), variable_name, timestamps)
29+
ts2 = SingleTimeSeries.from_time_array(np.random.rand(length), variable_name, timestamps)
30+
key1 = system.add_time_series(ts1, gen, scenario="low")
31+
key2 = system.add_time_series(ts2, gen, scenario="high")
32+
33+
# Use the keys directly.
34+
ts1_b = system.get_time_series_by_key(gen, key1)
35+
ts2_b = system.get_time_series_by_key(gen, key2)
36+
37+
# Identify the keys later.
38+
for key in system.list_time_series_keys(gen):
39+
print(f"{gen.label}: {key}")
40+
```
41+
```
42+
SimpleGenerator.gen: variable_name='active_power' initial_time=datetime.datetime(2020, 1, 1, 0, 0) resolution=datetime.timedelta(seconds=3600) time_series_type=<class 'infrasys.time_series_models.SingleTimeSeries'> user_attributes={'scenario': 'high'} length=10
43+
SimpleGenerator.gen: variable_name='active_power' initial_time=datetime.datetime(2020, 1, 1, 0, 0) resolution=datetime.timedelta(seconds=3600) time_series_type=<class 'infrasys.time_series_models.SingleTimeSeries'> user_attributes={'scenario': 'low'} length=10
3344
```
3445

35-
Now you can retrieve the exact instance you want.
46+
You can also retrieve time series by specifying the parameters as shown here:
3647

3748
```python
38-
system.time_series.get(gen1, variable_name="active_power", scenario="high", model_year="2035").data
39-
<pyarrow.lib.Int64Array object at 0x107a38d60>
40-
[
41-
0,
42-
1,
43-
2,
44-
3,
45-
4,
46-
5,
47-
6,
48-
7,
49-
8,
50-
9,
51-
...
52-
8774,
53-
8775,
54-
8776,
55-
8777,
56-
8778,
57-
8779,
58-
8780,
59-
8781,
60-
8782,
61-
8783
62-
]
49+
system.time_series.get(gen, variable_name="active_power", scenario="high")
50+
```
51+
```
52+
SingleTimeSeries(variable_name='active_power', normalization=None, data=array([0.29276233, 0.97400382, 0.76499075, 0.95080431, 0.61749027,
53+
0.73899945, 0.57877704, 0.3411286 , 0.80701393, 0.53051773]), resolution=datetime.timedelta(seconds=3600), initial_time=datetime.datetime(2020, 1, 1, 0, 0), length=10)
6354
```

pyproject.toml

+5-8
Original file line numberDiff line numberDiff line change
@@ -27,18 +27,16 @@ classifiers = [
2727
"Programming Language :: Python :: Implementation :: PyPy",
2828
]
2929
dependencies = [
30-
# This is what we want, but are currently be limited by pyarrow.
31-
# Leave unbounded until pyarrow upgrades numpy.
32-
#"numpy >= 2, < 3",
33-
"numpy",
34-
"pyarrow~=15.0.2",
35-
"pint~=0.23",
3630
"loguru~=0.7.2",
37-
"rich~=13.7.1",
31+
"numpy >= 2, < 3",
32+
"pint~=0.23",
33+
"pyarrow~=19.0",
3834
"pydantic >= 2.7, < 3",
35+
"rich~=13.7.1",
3936
]
4037
[project.optional-dependencies]
4138
dev = [
39+
"autodoc_pydantic~=2.0",
4240
"furo",
4341
"mypy >=1.13, < 2",
4442
"myst_parser",
@@ -49,7 +47,6 @@ dev = [
4947
"ruff",
5048
"sphinx",
5149
"sphinx-click",
52-
"autodoc_pydantic~=2.0",
5350
"sphinx-copybutton",
5451
"sphinx-tabs~=3.4",
5552
]

src/infrasys/__init__.py

+9-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,12 @@
1111
from .normalization import NormalizationModel
1212
from .supplemental_attribute import SupplementalAttribute
1313
from .system import System
14-
from .time_series_models import SingleTimeSeries
14+
from .time_series_models import (
15+
SingleTimeSeries,
16+
TimeSeriesStorageType,
17+
TimeSeriesKey,
18+
SingleTimeSeriesKey,
19+
)
1520

1621

1722
__all__ = (
@@ -21,6 +26,9 @@
2126
"Location",
2227
"NormalizationModel",
2328
"SingleTimeSeries",
29+
"SingleTimeSeriesKey",
2430
"SupplementalAttribute",
2531
"System",
32+
"TimeSeriesKey",
33+
"TimeSeriesStorageType",
2634
)

src/infrasys/arrow_storage.py

+16-8
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
SingleTimeSeriesMetadata,
2020
TimeSeriesData,
2121
TimeSeriesMetadata,
22+
TimeSeriesStorageType,
2223
)
2324
from infrasys.time_series_storage_base import TimeSeriesStorageBase
2425

@@ -54,16 +55,15 @@ def add_time_series(
5455
self,
5556
metadata: TimeSeriesMetadata,
5657
time_series: TimeSeriesData,
58+
connection: Any = None,
5759
) -> None:
5860
if isinstance(time_series, SingleTimeSeries):
59-
self.add_raw_single_time_series(metadata.time_series_uuid, time_series.data_array)
61+
self._add_single_time_series(metadata.time_series_uuid, time_series.data_array)
6062
else:
6163
msg = f"Bug: need to implement add_time_series for {type(time_series)}"
6264
raise NotImplementedError(msg)
6365

64-
def add_raw_single_time_series(
65-
self, time_series_uuid: UUID, time_series_data: NDArray
66-
) -> None:
66+
def _add_single_time_series(self, time_series_uuid: UUID, time_series_data: NDArray) -> None:
6767
fpath = self._ts_directory.joinpath(f"{time_series_uuid}{EXTENSION}")
6868
if not fpath.exists():
6969
arrow_batch = self._convert_to_record_batch(time_series_data, str(time_series_uuid))
@@ -80,6 +80,7 @@ def get_time_series(
8080
metadata: TimeSeriesMetadata,
8181
start_time: datetime | None = None,
8282
length: int | None = None,
83+
connection: Any = None,
8384
) -> Any:
8485
if isinstance(metadata, SingleTimeSeriesMetadata):
8586
return self._get_single_time_series(
@@ -89,22 +90,29 @@ def get_time_series(
8990
msg = f"Bug: need to implement get_time_series for {type(metadata)}"
9091
raise NotImplementedError(msg)
9192

92-
def remove_time_series(self, uuid: UUID) -> None:
93-
fpath = self._ts_directory.joinpath(f"{uuid}{EXTENSION}")
93+
def remove_time_series(self, metadata: TimeSeriesMetadata, connection: Any = None) -> None:
94+
fpath = self._ts_directory.joinpath(f"{metadata.time_series_uuid}{EXTENSION}")
9495
if not fpath.exists():
95-
msg = f"No time series with {uuid} is stored"
96+
msg = f"No time series with {metadata.time_series_uuid} is stored"
9697
raise ISNotStored(msg)
9798
fpath.unlink()
9899

99-
def serialize(self, dst: Path | str, src: Optional[Path | str] = None) -> None:
100+
def serialize(
101+
self, data: dict[str, Any], dst: Path | str, src: Path | str | None = None
102+
) -> None:
100103
# From the shutil documentation: the copying operation will continue if
101104
# it encounters existing directories, and files within the dst tree
102105
# will be overwritten by corresponding files from the src tree.
103106
if src is None:
104107
src = self._ts_directory
105108
shutil.copytree(src, dst, dirs_exist_ok=True)
109+
self.add_serialized_data(data)
106110
logger.info("Copied time series data to {}", dst)
107111

112+
@staticmethod
113+
def add_serialized_data(data: dict[str, Any]) -> None:
114+
data["time_series_storage_type"] = TimeSeriesStorageType.ARROW.value
115+
108116
def _get_single_time_series(
109117
self,
110118
metadata: SingleTimeSeriesMetadata,

src/infrasys/in_memory_time_series_storage.py

+18-11
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,13 @@
22

33
from datetime import datetime
44
from pathlib import Path
5+
from typing import Any
6+
57
from numpy.typing import NDArray
6-
from typing import Optional, TypeAlias
8+
from typing import TypeAlias
79
from uuid import UUID
810

911
from loguru import logger
10-
from infrasys.arrow_storage import ArrowTimeSeriesStorage
1112

1213
from infrasys.exceptions import ISNotStored
1314
from infrasys.time_series_models import (
@@ -30,7 +31,12 @@ def __init__(self) -> None:
3031
def get_time_series_directory(self) -> None:
3132
return None
3233

33-
def add_time_series(self, metadata: TimeSeriesMetadata, time_series: TimeSeriesData) -> None:
34+
def add_time_series(
35+
self,
36+
metadata: TimeSeriesMetadata,
37+
time_series: TimeSeriesData,
38+
connection: Any = None,
39+
) -> None:
3440
if isinstance(time_series, SingleTimeSeries):
3541
if metadata.time_series_uuid not in self._arrays:
3642
self._arrays[metadata.time_series_uuid] = time_series.data_array
@@ -47,22 +53,23 @@ def get_time_series(
4753
metadata: TimeSeriesMetadata,
4854
start_time: datetime | None = None,
4955
length: int | None = None,
56+
connection: Any = None,
5057
) -> TimeSeriesData:
5158
if isinstance(metadata, SingleTimeSeriesMetadata):
5259
return self._get_single_time_series(metadata, start_time, length)
5360
raise NotImplementedError(str(metadata.get_time_series_data_type()))
5461

55-
def remove_time_series(self, uuid: UUID) -> None:
56-
time_series = self._arrays.pop(uuid, None)
62+
def remove_time_series(self, metadata: TimeSeriesMetadata, connection: Any = None) -> None:
63+
time_series = self._arrays.pop(metadata.time_series_uuid, None)
5764
if time_series is None:
58-
msg = f"No time series with {uuid} is stored"
65+
msg = f"No time series with {metadata.time_series_uuid} is stored"
5966
raise ISNotStored(msg)
6067

61-
def serialize(self, dst: Path | str, _: Optional[Path | str] = None) -> None:
62-
base_directory = dst if isinstance(dst, Path) else Path(dst)
63-
storage = ArrowTimeSeriesStorage.create_with_permanent_directory(base_directory)
64-
for ts_uuid, ts in self._arrays.items():
65-
storage.add_raw_single_time_series(ts_uuid, ts)
68+
def serialize(
69+
self, data: dict[str, Any], dst: Path | str, src: Path | str | None = None
70+
) -> None:
71+
msg = "Bug: InMemoryTimeSeriesStorage.serialize should never be called."
72+
raise Exception(msg)
6673

6774
def _get_single_time_series(
6875
self,

src/infrasys/parquet_time_series_storage.py

-26
This file was deleted.

0 commit comments

Comments
 (0)