OpenOA/test/unit/test_timeseries_toolkit.py at 8197ef55f8a5df92ad22c3414a5d7d9414301cd5 · NatLabRockies/OpenOA · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
import unittest
from datetime import datetime

import numpy as np
import pandas as pd
from pytz import timezone
from numpy import testing as nptest

from openoa.utils import timeseries


class SimpleTimeseriesTests(unittest.TestCase):
    def setUp(self):
        self.mountain_tz = timezone("US/Mountain")
        self.pacific_tz = timezone("US/Pacific")
        self.summer_midnight = datetime(2018, 0o7, 16, 0, 0, 0)
        self.winter_midnight = datetime(2018, 0o1, 11, 0, 0, 0)
        self.day_of_data = pd.Series(
            pd.date_range(start="1/1/2018 00:00:00", end="1/1/2018 23:59:59", freq="10min")
        )
        self.two_days_of_data = pd.concat(
            [
                self.day_of_data,
                pd.Series(
                    pd.date_range(start="2/1/2018 00:00:00", end="2/1/2018 23:59:59", freq="10min")
                ),
            ],
            axis=0,
        )

    def test_convert_local_to_utc(self):
        # Pass in a localized datetime with matching tz string and make sure it throws an exception
        self.assertRaises(
            Exception,
            self.mountain_tz.localize(self.summer_midnight),
            "T1: No exception raised for a datetime object with baked in TZInfo",
        )

        # Pass in a non-localized datetime with tz string
        mm_utc = timeseries.convert_local_to_utc(self.summer_midnight, "US/Pacific")
        hours_diff = self.summer_midnight.hour - mm_utc.hour
        # PDT is UTC -7
        self.assertTrue(hours_diff == -7, "T2: PDT is not UTC -7?")

        # Pass in a non-localized winter datetime with tz string
        mm_utc = timeseries.convert_local_to_utc(self.winter_midnight, "US/Mountain")
        hours_diff = self.summer_midnight.hour - mm_utc.hour
        # MST is UTC -7
        self.assertTrue(hours_diff == -7, "T3: MST is not UTC -7?")

    def test_find_time_gaps(self):
        # A full day worth of data has zero gaps
        day_of_data = self.day_of_data
        no_gaps = timeseries.find_time_gaps(day_of_data, "10min")
        self.assertEqual(no_gaps.size, 0, "T1: Something with no gaps was reported to have gaps")

        # Removing two gaps should result is a result size of two
        missing_two = day_of_data.drop([2, 3])
        two_gaps = timeseries.find_time_gaps(missing_two, "10min")
        self.assertEqual(two_gaps.size, 2, "T2: Did not properly detect two gaps in 10M timeseries")

        # Shuffling the above series should maintain the same number of gaps
        shuffled_missing_two = pd.Series(np.random.permutation(missing_two))
        two_gaps = timeseries.find_time_gaps(shuffled_missing_two, "10min")
        self.assertEqual(
            two_gaps.size, 2, "T3: Did not properly detect two gaps in shuffled 10M timeseries"
        )

        # An empty series has zero gaps
        empty_series = pd.Series(dtype=np.float64)
        no_gaps = timeseries.find_time_gaps(empty_series, "10min")
        self.assertEqual(no_gaps.size, 0, "T4: Empty series should have zero gaps")

    def test_find_duplicate_times(self):
        # Manually set one row to another and detect it
        day_of_data = self.day_of_data.copy()
        day_of_data[1] = day_of_data[2]
        dupes = timeseries.find_duplicate_times(day_of_data)
        self.assertEqual(dupes.size, 1, "T1: Detect one duplicated row")

        # Input series of length zero
        day_of_data = pd.Series(dtype=np.float64)
        dupes = timeseries.find_duplicate_times(day_of_data)
        self.assertEqual(dupes.size, 0, "T2: Empty series should have zero duplicates")

    def test_gap_fill_data_frame(self):
        # df with a gap
        day_of_data = self.day_of_data.copy()
        missing_two = day_of_data.drop([2, 3])
        missing_two_df = pd.DataFrame({"time": missing_two, "col1": missing_two})
        filled = timeseries.gap_fill_data_frame(missing_two_df, "time", "10min")
        self.assertEqual(
            day_of_data.size,
            filled["time"].size,
            "T1: Gap filling should increase size of this dataframe",
        )

        # df with no gaps
        day_of_data = self.day_of_data.copy()
        day_of_data_df = pd.DataFrame({"time": day_of_data, "col1": day_of_data})
        filled = timeseries.gap_fill_data_frame(day_of_data_df, "time", "10min")
        self.assertEqual(
            filled["time"].size, day_of_data.size, "T2: Full series should not have any new members"
        )

        # empty input df
        empty = pd.Series(dtype=np.float64)
        empty_df = pd.DataFrame({"time": empty, "col1": empty})
        filled = timeseries.gap_fill_data_frame(empty_df, "time", "10min")
        self.assertEqual(filled["time"].size, 0, "T3: Empty dataframe should still be empty")

    def test_num_days(self):
        # Test 1 day of data
        day_of_data = pd.DataFrame(index=self.day_of_data, columns=["dt_col"])
        num = timeseries.num_days("dt_col", data=day_of_data)
        self.assertEqual(num, 1, "One day of data...")

        # Test 0 days of data
        empty_data = pd.DataFrame(index=pd.DatetimeIndex([]), columns=["dt_col"])
        num = timeseries.num_days("dt_col", data=empty_data)
        self.assertEqual(num, 0, "Zero days of data...")

        # Test 2 days of data separated by a month gap
        two_days = pd.Series(index=self.two_days_of_data, data=self.two_days_of_data.values)
        num = timeseries.num_days(two_days)
        self.assertEqual(num, 32, "Two days of data separated by a month...")

    def test_num_hours(self):
        # Test 1 day of data
        day_of_data = pd.DataFrame(index=self.day_of_data, columns=["dt_col"])
        num = timeseries.num_hours("dt_col", data=day_of_data)
        self.assertEqual(num, 24, "One day of data...")

        # Test 0 days of data
        empty_data = pd.DataFrame(index=pd.DatetimeIndex([]), columns=["dt_col"])
        num = timeseries.num_hours("dt_col", data=empty_data)
        self.assertEqual(num, 0, "Zero days of data...")

        # Test 2 days of data separated by a month gap
        two_days = pd.DataFrame(index=self.two_days_of_data, columns=["dt_col"])
        num = timeseries.num_hours("dt_col", data=two_days)
        self.assertEqual(num, 32 * 24, "Two days of data separated by a month...")

    def test_percent_nan(self):
        test_dict = {}

        # All should be float Series given PlantData requirements
        test_dict["a"] = pd.Series([True, 1, 2, 1e5, np.inf]).astype(float)
        test_dict["b"] = pd.Series([False, np.nan, 2, 1e5, np.inf]).astype(float)
        test_dict["c"] = pd.Series([np.nan, 1, 2, 1e5, np.nan]).astype(float)

        nan_values = {"a": 0.0, "b": 0.2, "c": 0.4}

        for a, b in test_dict.items():
            nptest.assert_almost_equal(
                nan_values[a],
                timeseries.percent_nan(test_dict[a]),
                err_msg="NaN percentage function is broken",
            )

    def tearDown(self):
        pass


if __name__ == "__main__":
    unittest.main()