fix conflict

NREL · Feb 9, 2024 · 947502c · 947502c
2 parents 75a056d + 4fbdfd1
commit 947502c
Show file tree

Hide file tree

Showing 64 changed files with 5,947 additions and 5,037 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -3,6 +3,18 @@ All notable changes to this project will be documented in this file. If you make
 
 ## Unreleased - TBD
 
+- Updated compatibility with Pandas datetime offsets. All uppercase offset strings representing
+  one hour or less have been replaced with the lowercase version. This stems from an update in the
+  Pandas frequency API that breaks in 2.2.0. See the below changes to update frequency settings. The
+  soon-to-be-deprecated style from Pandas will continue to be supported in OpenOA, but will display
+  a `DeprecationWarning` with support extending until OpenOA v4.
+  - M -> ME (MS still allowed)
+  - H -> h
+  - T -> min
+  - S -> s
+  - L -> ms
+  - U -> us
+  - N -> ns
 - Python 3.11 is now supported.
 - Updates the dependency requirements to minimize the number of required packages, and have a more
   expansive list of modifiers. Users can now use any combination of
@@ -12,6 +24,19 @@ All notable changes to this project will be documented in this file. If you make
 `pytest test/unit` or `pytest test/regression`.
 - Converts some configuration files into `pyproject.toml` settings to reduce visual clutter
   at the top-level of the directory.
+- Updates chained `.loc` expressions to be a single `.loc` expression in project_ENGIE.py to silence
+  a Pandas deprecation warning about future changes.
+- Adds a missing NaN assignment to `project_ENGIE.py:clean_scada`, which causes a slight change in
+  results for the TIE and wake loss regression tests.
+- `openoa.utils.timeseries.gap_fill_data_frame()` now returns the original data if there is no data
+  to fill in, avoiding a Pandas `concat` deprecation warning about pending behavioral changes.
+- The turbine capacity value used for power curve filtering in `TurbineLongTermGrossEnergy` is
+  changed to the rated power from the asset table instead of the maximum power from SCADA. This
+  makes the power curve filtering more robust to turbine power outliers above rated power.
+
+## [3.0.1 - 2023-12-22]
+
+- Includes warnings about limitations and lack of validation of static yaw misalignment method.
 
 ## v3.0 - 29 September 2023
 

diff --git a/examples/00_intro_to_plant_data.ipynb b/examples/00_intro_to_plant_data.ipynb
diff --git a/examples/01_utils_examples.ipynb b/examples/01_utils_examples.ipynb
diff --git a/examples/02a_plant_aep_analysis.ipynb b/examples/02a_plant_aep_analysis.ipynb
diff --git a/examples/02b_plant_aep_analysis_cubico.ipynb b/examples/02b_plant_aep_analysis_cubico.ipynb
diff --git a/examples/02c_augmented_plant_aep_analysis.ipynb b/examples/02c_augmented_plant_aep_analysis.ipynb
diff --git a/examples/03_turbine_ideal_energy.ipynb b/examples/03_turbine_ideal_energy.ipynb
diff --git a/examples/04_electrical_losses.ipynb b/examples/04_electrical_losses.ipynb
diff --git a/examples/05_eya_gap_analysis.ipynb b/examples/05_eya_gap_analysis.ipynb
diff --git a/examples/06_wake_loss_analysis.ipynb b/examples/06_wake_loss_analysis.ipynb
diff --git a/examples/07_static_yaw_misalignment.ipynb b/examples/07_static_yaw_misalignment.ipynb
diff --git a/examples/data/plant_meta.json b/examples/data/plant_meta.json
@@ -12,7 +12,7 @@
   "curtail": {
     "IAVL_DnWh": "availability_kwh",
     "IAVL_ExtPwrDnWh": "curtailment_kwh",
-    "frequency": "10T",
+    "frequency": "10min",
     "time": "time"
   },
   "latitude": 48.4497,
@@ -28,7 +28,7 @@
       "WMETR_HorWdDir": "winddirection_deg",
       "WMETR_HorWdSpdU": "u_100",
       "WMETR_HorWdSpdV": "v_100",
-      "frequency": "H",
+      "frequency": "h",
       "time": "datetime"
     },
     "merra2": {
@@ -37,7 +37,7 @@
       "WMETR_HorWdDir": "winddirection_deg",
       "WMETR_HorWdSpdU": "u_50",
       "WMETR_HorWdSpdV": "v_50",
-      "frequency": "H",
+      "frequency": "h",
       "time": "datetime"
     }
   },
@@ -49,7 +49,7 @@
     "WROT_BlPthAngVal": "Ba_avg",
     "WTUR_W": "P_avg",
     "asset_id": "Wind_turbine_name",
-    "frequency": "10T",
+    "frequency": "10min",
     "time": "Date_time"
   }
 }
diff --git a/examples/data/plant_meta.yml b/examples/data/plant_meta.yml
@@ -9,7 +9,7 @@ asset:
 curtail:
   IAVL_DnWh: availability_kwh  # availability, kWh
   IAVL_ExtPwrDnWh: curtailment_kwh  # curtailment, kWh
-  frequency: 10T  # timestamp frequency
+  frequency: 10min  # timestamp frequency
   time: time  # timestamp
 latitude: 48.4497  # WGS-84 latitudinal plant centroid
 longitude: 5.5896  # WGS-84 longitudinal plant centroid
@@ -19,23 +19,23 @@ meter:
   time: time  # timestamp
 reanalysis:
   era5:  # reanalysis product name/ID
-    frequency: H  # timestamp frequency
+    frequency: h  # timestamp frequency
     WMETR_EnvPres: surf_pres  # surface pressure, Pa
     WMETR_EnvTmp: t_2m  # temperature, K
     time: datetime  # timestamps
     WMETR_HorWdSpdU: u_100  # u-direction windspeed, m/s
     WMETR_HorWdSpdV: v_100  # v-direction windspeed, m/s
     WMETR_HorWdDir: winddirection_deg  # wind direction, degrees
   merra2:  # reanalysis product name/ID
-    frequency: H  # timestamp frequency
+    frequency: h  # timestamp frequency
     WMETR_EnvPres: surface_pressure  # surface pressure, Pa
     WMETR_EnvTmp: temp_2m  # temperature, K
     time: datetime  # timestamps
     WMETR_HorWdSpdU: u_50  # u-direction windspeed, m/s
     WMETR_HorWdSpdV: v_50  # v-direction windspeed, m/s
     WMETR_HorWdDir: winddirection_deg  # wind direction, degrees
 scada:
-  frequency: 10T  # timestamp frequency
+  frequency: 10min  # timestamp frequency
   asset_id: Wind_turbine_name  # Unique ID of wind turbine
   WROT_BlPthAngVal: Ba_avg  # pitch angle, degrees
   WTUR_W: P_avg  # power produced, kW

diff --git a/examples/project_Cubico.py b/examples/project_Cubico.py
@@ -344,7 +344,7 @@ def prepare(asset: str = "kelmarsh", return_value: str = "plantdata") -> PlantDa
         "curtail": {
             "IAVL_DnWh": "Lost Production to Downtime (kWh)",
             "IAVL_ExtPwrDnWh": "Lost Production to Curtailment (Total) (kWh)",
-            "frequency": "10T",
+            "frequency": "10min",
             "time": "Timestamp",
         },
         "latitude": str(asset_df["Latitude"].mean()),
@@ -359,7 +359,7 @@ def prepare(asset: str = "kelmarsh", return_value: str = "plantdata") -> PlantDa
                 "WMETR_HorWdSpdU": "u_ms",
                 "WMETR_HorWdSpdV": "v_ms",
                 "WMETR_HorWdSpd": "windspeed_ms",
-                "frequency": "H",
+                "frequency": "h",
                 "time": "datetime",
             },
             "merra2": {
@@ -369,7 +369,7 @@ def prepare(asset: str = "kelmarsh", return_value: str = "plantdata") -> PlantDa
                 "WMETR_HorWdSpdU": "u_ms",
                 "WMETR_HorWdSpdV": "v_ms",
                 "WMETR_HorWdSpd": "windspeed_ms",
-                "frequency": "H",
+                "frequency": "h",
                 "time": "datetime",
             },
             "era5_monthly": {
@@ -394,7 +394,7 @@ def prepare(asset: str = "kelmarsh", return_value: str = "plantdata") -> PlantDa
             "WROT_BlPthAngVal": "Blade angle (pitch position) A (°)",
             "asset_id": "Turbine",
             "WTUR_W": "Power (kW)",
-            "frequency": "10T",
+            "frequency": "10min",
             "time": "Timestamp",
         },
     }

diff --git a/examples/project_ENGIE.py b/examples/project_ENGIE.py
@@ -69,7 +69,7 @@ def clean_scada(scada_file: str | Path) -> pd.DataFrame:
     Returns:
         pd.DataFrame: The cleaned up SCADA data that is ready for loading into a `PlantData` object.
     """
-    scada_freq = "10T"
+    scada_freq = "10min"
 
     logger.info("Loading SCADA data")
     scada_df = pd.read_csv(scada_file)
@@ -96,14 +96,11 @@ def clean_scada(scada_file: str | Path) -> pd.DataFrame:
 
         # Cancel out readings where the wind vane direction repeats more than 3 times in a row
         ix_flag = filters.unresponsive_flag(scada_df.loc[ix_turbine], 3, col=["Va_avg"])
-        scada_df.loc[ix_turbine].loc[ix_flag.values, sensor_cols]
+        scada_df.loc[ix_flag.loc[ix_flag["Va_avg"]].index, sensor_cols] = np.nan
 
         # Cancel out the temperature readings where the value repeats more than 20 times in a row
         ix_flag = filters.unresponsive_flag(scada_df.loc[ix_turbine], 20, col=["Ot_avg"])
-
-        # NOTE: ix_flag is flattened here because as a series it's shape = (N, 1) and
-        # incompatible with this style of indexing, so we need it as shape = (N,)
-        scada_df.loc[ix_turbine, "Ot_avg"].loc[ix_flag.values.flatten()] = np.nan
+        scada_df.loc[ix_flag.loc[ix_flag["Ot_avg"]].index, "Ot_avg"] = np.nan
 
     logger.info("Converting pitch to the range [-180, 180]")
     scada_df.loc[:, "Ba_avg"] = scada_df["Ba_avg"] % 360
@@ -255,7 +252,7 @@ def prepare(
 
     # Fill the 2 missing time stamps with NaN values
     reanalysis_era5_df = reanalysis_era5_df.set_index(pd.DatetimeIndex(reanalysis_era5_df.datetime))
-    reanalysis_era5_df = reanalysis_era5_df.asfreq("1H")
+    reanalysis_era5_df = reanalysis_era5_df.asfreq("1h")
     reanalysis_era5_df["datetime"] = reanalysis_era5_df.index
 
     # calculate wind direction from u, v

diff --git a/openoa/__init__.py b/openoa/__init__.py
@@ -1,4 +1,4 @@
-__version__ = "3.0"
+__version__ = "3.0.1"
 """
 When bumping version, please be sure to also update parameters in sphinx/conf.py
 """

diff --git a/openoa/analysis/aep.py b/openoa/analysis/aep.py
@@ -24,6 +24,7 @@
 from openoa.utils import met_data_processing as mt
 from openoa.schema import FromDictMixin, ResetValuesMixin
 from openoa.logging import logging, logged_method_call
+from openoa.schema.metadata import convert_frequency
 from openoa.utils.machine_learning_setup import MachineLearningSetup
 from openoa.analysis._analysis_validators import validate_reanalysis_selections
 
@@ -102,8 +103,8 @@ class MonteCarloAEP(FromDictMixin, ResetValuesMixin):
             filter. Defaults to (1, 3).
         uncertainty_nan_energy(:obj:`float`): Threshold to flag days/months based on NaNs. Defaults
             to 0.01.
-        time_resolution(:obj:`string`): whether to perform the AEP calculation at monthly ("M"),
-            daily ("D") or hourly ("H") time resolution. Defaults to "M".
+        time_resolution(:obj:`string`): whether to perform the AEP calculation at monthly ("ME" or
+            "MS"), daily ("D") or hourly ("h") time resolution. Defaults to "ME".
         end_date_lt(:obj:`string` or :obj:`pandas.Timestamp`): The last date to use for the
             long-term correction. Note that only the component of the date corresponding to the
             time_resolution argument is considered. If None, the end of the last complete month of
@@ -158,7 +159,11 @@ class MonteCarloAEP(FromDictMixin, ResetValuesMixin):
         ),
     )
     uncertainty_nan_energy: float = field(default=0.01, converter=float)
-    time_resolution: str = field(default="M", validator=attrs.validators.in_(("M", "D", "H")))
+    time_resolution: str = field(
+        default="ME",
+        converter=convert_frequency,
+        validator=attrs.validators.in_(("MS", "ME", "D", "h")),
+    )
     end_date_lt: str | pd.Timestamp = field(default=None)
     reg_model: str = field(
         default="lin", converter=str, validator=attrs.validators.in_(("lin", "gbm", "etr", "gam"))
@@ -237,16 +242,18 @@ def __attrs_post_init__(self):
 
         logger.info("Initializing MonteCarloAEP Analysis Object")
 
-        self.resample_freq = {"M": "MS", "D": "D", "H": "H"}[self.time_resolution]
-        self.resample_hours = {"M": 30 * 24, "D": 1 * 24, "H": 1}[self.time_resolution]
-        self.calendar_samples = {"M": 12, "D": 365, "H": 365 * 24}[self.time_resolution]
+        self.resample_freq = self.time_resolution
+        self.resample_hours = {"MS": 30 * 24, "ME": 30 * 24, "D": 1 * 24, "h": 1}[
+            self.time_resolution
+        ]
+        self.calendar_samples = {"MS": 12, "ME": 12, "D": 365, "h": 365 * 24}[self.time_resolution]
 
         if self.end_date_lt is not None:
             # Set to the bottom of the bottom of the hour
             self.end_date_lt = pd.to_datetime(self.end_date_lt).replace(minute=0)
 
         # Monthly data can only use robust linear regression because of limited number of data
-        if (self.time_resolution == "M") & (self.reg_model != "lin"):
+        if (self.time_resolution in ("ME", "MS")) & (self.reg_model != "lin"):
             raise ValueError("For monthly time resolution, only linear regression is allowed!")
 
         # Run preprocessing step
@@ -305,8 +312,8 @@ def run(
                 filter. Defaults to (1, 3).
             uncertainty_nan_energy(:obj:`float`): Threshold to flag days/months based on NaNs. Defaults
                 to 0.01.
-            time_resolution(:obj:`string`): whether to perform the AEP calculation at monthly ("M"),
-                daily ("D") or hourly ("H") time resolution. Defaults to "M".
+            time_resolution(:obj:`string`): whether to perform the AEP calculation at monthly ("ME" or
+                "MS"), daily ("D") or hourly ("h") time resolution. Defaults to "ME".
             end_date_lt(:obj:`string` or :obj:`pandas.Timestamp`): The last date to use for the
                 long-term correction. Note that only the component of the date corresponding to the
                 time_resolution argument is considered. If None, the end of the last complete month of
@@ -395,11 +402,11 @@ def groupby_time_res(self, df):
             None
         """
 
-        if self.time_resolution == "M":
+        if self.time_resolution in ("MS", "ME"):
             df_grouped = df.groupby(df.index.month).mean()
         elif self.time_resolution == "D":
             df_grouped = df.groupby([(df.index.month), (df.index.day)]).mean()
-        elif self.time_resolution == "H":
+        elif self.time_resolution == "h":
             df_grouped = df.groupby([(df.index.month), (df.index.day), (df.index.hour)]).mean()
 
         return df_grouped
@@ -421,7 +428,7 @@ def calculate_aggregate_dataframe(self):
 
         # Remove first and last reporting months if only partial month reported
         # (only for monthly time resolution calculations)
-        if self.time_resolution == "M":
+        if self.time_resolution in ("MS", "ME"):
             self.trim_monthly_df()
 
         # Drop any data that have NaN gross energy values or NaN reanalysis data
@@ -449,7 +456,7 @@ def process_revenue_meter_energy(self):
             tm.percent_nan
         )
 
-        if self.time_resolution == "M":
+        if self.time_resolution in ("MS", "ME"):
             # Create a column with expected number of days per month (to be used when normalizing to 30-days for regression)
             days_per_month = (pd.Series(self.aggregate.index)).dt.daysinmonth
             days_per_month.index = self.aggregate.index
@@ -542,7 +549,7 @@ def process_reanalysis_data(self):
         # Next, update the start date to make sure it corresponds to a full time period, by shifting
         # to either the start of the next month, or start of the next day, depending on the frequency
         start_date_minus = start_date - pd.DateOffset(hours=1)
-        if (self.time_resolution == "M") & (start_date.month == start_date_minus.month):
+        if (self.time_resolution in ("MS", "ME")) & (start_date.month == start_date_minus.month):
             start_date = start_date.replace(day=1, hour=0, minute=0) + pd.DateOffset(months=1)
         elif (self.time_resolution == "D") & (start_date.day == start_date_minus.day):
             start_date = start_date.replace(hour=0, minute=0) + pd.DateOffset(days=1)
@@ -552,7 +559,9 @@ def process_reanalysis_data(self):
         if self.end_date_lt is not None:
             # If valid (before the last full time period in the data), use the specified end date
             end_date_lt_plus = self.end_date_lt + pd.DateOffset(hours=1)
-            if (self.time_resolution == "M") & (self.end_date_lt.month == end_date_lt_plus.month):
+            if (self.time_resolution in ("MS", "ME")) & (
+                self.end_date_lt.month == end_date_lt_plus.month
+            ):
                 self.end_date_lt = (
                     self.end_date_lt.replace(day=1, hour=0, minute=0)
                     + pd.DateOffset(months=1)
@@ -770,7 +779,7 @@ def filter_outliers(self, n):
         )
 
         if self.outlier_detection:
-            if self.time_resolution == "M":
+            if self.time_resolution in ("MS", "ME"):
                 # Monthly linear regression (i.e., few data points):
                 # flag outliers with robust linear regression using Huber algorithm
 
@@ -826,7 +835,7 @@ def filter_outliers(self, n):
             valid_data_to_add = df_sub.loc[~df_sub.loc[:, "flag_final"], [f"{reanal}_WMETR_EnvTmp"]]
             valid_data = pd.concat([valid_data, valid_data_to_add], axis=1)
 
-        if self.time_resolution == "M":
+        if self.time_resolution in ("MS", "ME"):
             valid_data_to_add = df_sub.loc[~df_sub.loc[:, "flag_final"], ["num_days_expected"]]
             valid_data = pd.concat([valid_data, valid_data_to_add], axis=1)
 
@@ -869,7 +878,7 @@ def set_regression_data(self, n):
 
         # Calculate gorss energy and normalize to 30-days
         mc_gross_energy = mc_energy + mc_availability + mc_curtailment
-        if self.time_resolution == "M":
+        if self.time_resolution in ("MS", "ME"):
             num_days_expected = reg_data["num_days_expected"]
             mc_gross_norm = mc_gross_energy * 30 / num_days_expected
         else:
@@ -1033,7 +1042,7 @@ def run_AEP_monte_carlo(self):
                 )
             )
 
-            if self.time_resolution == "M":  # Undo normalization to 30-day months
+            if self.time_resolution in ("MS", "ME"):  # Undo normalization to 30-day months
                 # Shift the list of number of days per month to align with the reanalysis data
                 last_month = self._reanalysis_aggregate.index[-1].month
                 gross_lt = (
@@ -1288,7 +1297,7 @@ def plot_reanalysis_gross_energy_data(
         valid_aggregate = self.aggregate
 
         # Monthly case: apply robust linear regression for outliers detection
-        if self.time_resolution == "M":
+        if self.time_resolution in ("MS", "ME"):
             for name, df in self.plant.reanalysis.items():
                 x = sm.add_constant(valid_aggregate[name])
                 y = valid_aggregate["gross_energy_gwh"] * 30 / valid_aggregate["num_days_expected"]
@@ -1339,7 +1348,7 @@ def plot_reanalysis_gross_energy_data(
 
             if self.time_resolution == "D":
                 ax.set_ylabel("Daily gross energy (GWh)")
-            elif self.time_resolution == "H":
+            elif self.time_resolution == "h":
                 ax.set_ylabel("Hourly gross energy (GWh)")
 
         ax.legend(**legend_kwargs)

diff --git a/openoa/analysis/electrical_losses.py b/openoa/analysis/electrical_losses.py
@@ -125,7 +125,7 @@ def __attrs_post_init__(self):
 
         # Process the SCADA and meter data appropriately
         self.process_scada()
-        if self.plant.metadata.meter.frequency not in ("MS", "M", "1MS"):
+        if self.plant.metadata.meter.frequency not in ("MS", "ME", "1MS"):
             self.process_meter()
             self.monthly_meter = False