Skip to content

Commit

Permalink
fix conflict
Browse files Browse the repository at this point in the history
  • Loading branch information
RHammond2 committed Feb 9, 2024
2 parents 75a056d + 4fbdfd1 commit 947502c
Show file tree
Hide file tree
Showing 64 changed files with 5,947 additions and 5,037 deletions.
25 changes: 25 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,18 @@ All notable changes to this project will be documented in this file. If you make

## Unreleased - TBD

- Updated compatibility with Pandas datetime offsets. All uppercase offset strings representing
one hour or less have been replaced with the lowercase version. This stems from an update in the
Pandas frequency API that breaks in 2.2.0. See the below changes to update frequency settings. The
soon-to-be-deprecated style from Pandas will continue to be supported in OpenOA, but will display
a `DeprecationWarning` with support extending until OpenOA v4.
- M -> ME (MS still allowed)
- H -> h
- T -> min
- S -> s
- L -> ms
- U -> us
- N -> ns
- Python 3.11 is now supported.
- Updates the dependency requirements to minimize the number of required packages, and have a more
expansive list of modifiers. Users can now use any combination of
Expand All @@ -12,6 +24,19 @@ All notable changes to this project will be documented in this file. If you make
`pytest test/unit` or `pytest test/regression`.
- Converts some configuration files into `pyproject.toml` settings to reduce visual clutter
at the top-level of the directory.
- Updates chained `.loc` expressions to be a single `.loc` expression in project_ENGIE.py to silence
a Pandas deprecation warning about future changes.
- Adds a missing NaN assignment to `project_ENGIE.py:clean_scada`, which causes a slight change in
results for the TIE and wake loss regression tests.
- `openoa.utils.timeseries.gap_fill_data_frame()` now returns the original data if there is no data
to fill in, avoiding a Pandas `concat` deprecation warning about pending behavioral changes.
- The turbine capacity value used for power curve filtering in `TurbineLongTermGrossEnergy` is
changed to the rated power from the asset table instead of the maximum power from SCADA. This
makes the power curve filtering more robust to turbine power outliers above rated power.

## [3.0.1 - 2023-12-22]

- Includes warnings about limitations and lack of validation of static yaw misalignment method.

## v3.0 - 29 September 2023

Expand Down
101 changes: 76 additions & 25 deletions examples/00_intro_to_plant_data.ipynb

Large diffs are not rendered by default.

147 changes: 104 additions & 43 deletions examples/01_utils_examples.ipynb

Large diffs are not rendered by default.

97 changes: 81 additions & 16 deletions examples/02a_plant_aep_analysis.ipynb

Large diffs are not rendered by default.

431 changes: 183 additions & 248 deletions examples/02b_plant_aep_analysis_cubico.ipynb

Large diffs are not rendered by default.

110 changes: 88 additions & 22 deletions examples/02c_augmented_plant_aep_analysis.ipynb

Large diffs are not rendered by default.

24 changes: 12 additions & 12 deletions examples/03_turbine_ideal_energy.ipynb

Large diffs are not rendered by default.

82 changes: 74 additions & 8 deletions examples/04_electrical_losses.ipynb

Large diffs are not rendered by default.

67 changes: 59 additions & 8 deletions examples/05_eya_gap_analysis.ipynb

Large diffs are not rendered by default.

1,953 changes: 1,014 additions & 939 deletions examples/06_wake_loss_analysis.ipynb

Large diffs are not rendered by default.

1,905 changes: 992 additions & 913 deletions examples/07_static_yaw_misalignment.ipynb

Large diffs are not rendered by default.

8 changes: 4 additions & 4 deletions examples/data/plant_meta.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
"curtail": {
"IAVL_DnWh": "availability_kwh",
"IAVL_ExtPwrDnWh": "curtailment_kwh",
"frequency": "10T",
"frequency": "10min",
"time": "time"
},
"latitude": 48.4497,
Expand All @@ -28,7 +28,7 @@
"WMETR_HorWdDir": "winddirection_deg",
"WMETR_HorWdSpdU": "u_100",
"WMETR_HorWdSpdV": "v_100",
"frequency": "H",
"frequency": "h",
"time": "datetime"
},
"merra2": {
Expand All @@ -37,7 +37,7 @@
"WMETR_HorWdDir": "winddirection_deg",
"WMETR_HorWdSpdU": "u_50",
"WMETR_HorWdSpdV": "v_50",
"frequency": "H",
"frequency": "h",
"time": "datetime"
}
},
Expand All @@ -49,7 +49,7 @@
"WROT_BlPthAngVal": "Ba_avg",
"WTUR_W": "P_avg",
"asset_id": "Wind_turbine_name",
"frequency": "10T",
"frequency": "10min",
"time": "Date_time"
}
}
8 changes: 4 additions & 4 deletions examples/data/plant_meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ asset:
curtail:
IAVL_DnWh: availability_kwh # availability, kWh
IAVL_ExtPwrDnWh: curtailment_kwh # curtailment, kWh
frequency: 10T # timestamp frequency
frequency: 10min # timestamp frequency
time: time # timestamp
latitude: 48.4497 # WGS-84 latitudinal plant centroid
longitude: 5.5896 # WGS-84 longitudinal plant centroid
Expand All @@ -19,23 +19,23 @@ meter:
time: time # timestamp
reanalysis:
era5: # reanalysis product name/ID
frequency: H # timestamp frequency
frequency: h # timestamp frequency
WMETR_EnvPres: surf_pres # surface pressure, Pa
WMETR_EnvTmp: t_2m # temperature, K
time: datetime # timestamps
WMETR_HorWdSpdU: u_100 # u-direction windspeed, m/s
WMETR_HorWdSpdV: v_100 # v-direction windspeed, m/s
WMETR_HorWdDir: winddirection_deg # wind direction, degrees
merra2: # reanalysis product name/ID
frequency: H # timestamp frequency
frequency: h # timestamp frequency
WMETR_EnvPres: surface_pressure # surface pressure, Pa
WMETR_EnvTmp: temp_2m # temperature, K
time: datetime # timestamps
WMETR_HorWdSpdU: u_50 # u-direction windspeed, m/s
WMETR_HorWdSpdV: v_50 # v-direction windspeed, m/s
WMETR_HorWdDir: winddirection_deg # wind direction, degrees
scada:
frequency: 10T # timestamp frequency
frequency: 10min # timestamp frequency
asset_id: Wind_turbine_name # Unique ID of wind turbine
WROT_BlPthAngVal: Ba_avg # pitch angle, degrees
WTUR_W: P_avg # power produced, kW
Expand Down
8 changes: 4 additions & 4 deletions examples/project_Cubico.py
Original file line number Diff line number Diff line change
Expand Up @@ -344,7 +344,7 @@ def prepare(asset: str = "kelmarsh", return_value: str = "plantdata") -> PlantDa
"curtail": {
"IAVL_DnWh": "Lost Production to Downtime (kWh)",
"IAVL_ExtPwrDnWh": "Lost Production to Curtailment (Total) (kWh)",
"frequency": "10T",
"frequency": "10min",
"time": "Timestamp",
},
"latitude": str(asset_df["Latitude"].mean()),
Expand All @@ -359,7 +359,7 @@ def prepare(asset: str = "kelmarsh", return_value: str = "plantdata") -> PlantDa
"WMETR_HorWdSpdU": "u_ms",
"WMETR_HorWdSpdV": "v_ms",
"WMETR_HorWdSpd": "windspeed_ms",
"frequency": "H",
"frequency": "h",
"time": "datetime",
},
"merra2": {
Expand All @@ -369,7 +369,7 @@ def prepare(asset: str = "kelmarsh", return_value: str = "plantdata") -> PlantDa
"WMETR_HorWdSpdU": "u_ms",
"WMETR_HorWdSpdV": "v_ms",
"WMETR_HorWdSpd": "windspeed_ms",
"frequency": "H",
"frequency": "h",
"time": "datetime",
},
"era5_monthly": {
Expand All @@ -394,7 +394,7 @@ def prepare(asset: str = "kelmarsh", return_value: str = "plantdata") -> PlantDa
"WROT_BlPthAngVal": "Blade angle (pitch position) A (°)",
"asset_id": "Turbine",
"WTUR_W": "Power (kW)",
"frequency": "10T",
"frequency": "10min",
"time": "Timestamp",
},
}
Expand Down
11 changes: 4 additions & 7 deletions examples/project_ENGIE.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def clean_scada(scada_file: str | Path) -> pd.DataFrame:
Returns:
pd.DataFrame: The cleaned up SCADA data that is ready for loading into a `PlantData` object.
"""
scada_freq = "10T"
scada_freq = "10min"

logger.info("Loading SCADA data")
scada_df = pd.read_csv(scada_file)
Expand All @@ -96,14 +96,11 @@ def clean_scada(scada_file: str | Path) -> pd.DataFrame:

# Cancel out readings where the wind vane direction repeats more than 3 times in a row
ix_flag = filters.unresponsive_flag(scada_df.loc[ix_turbine], 3, col=["Va_avg"])
scada_df.loc[ix_turbine].loc[ix_flag.values, sensor_cols]
scada_df.loc[ix_flag.loc[ix_flag["Va_avg"]].index, sensor_cols] = np.nan

# Cancel out the temperature readings where the value repeats more than 20 times in a row
ix_flag = filters.unresponsive_flag(scada_df.loc[ix_turbine], 20, col=["Ot_avg"])

# NOTE: ix_flag is flattened here because as a series it's shape = (N, 1) and
# incompatible with this style of indexing, so we need it as shape = (N,)
scada_df.loc[ix_turbine, "Ot_avg"].loc[ix_flag.values.flatten()] = np.nan
scada_df.loc[ix_flag.loc[ix_flag["Ot_avg"]].index, "Ot_avg"] = np.nan

logger.info("Converting pitch to the range [-180, 180]")
scada_df.loc[:, "Ba_avg"] = scada_df["Ba_avg"] % 360
Expand Down Expand Up @@ -255,7 +252,7 @@ def prepare(

# Fill the 2 missing time stamps with NaN values
reanalysis_era5_df = reanalysis_era5_df.set_index(pd.DatetimeIndex(reanalysis_era5_df.datetime))
reanalysis_era5_df = reanalysis_era5_df.asfreq("1H")
reanalysis_era5_df = reanalysis_era5_df.asfreq("1h")
reanalysis_era5_df["datetime"] = reanalysis_era5_df.index

# calculate wind direction from u, v
Expand Down
2 changes: 1 addition & 1 deletion openoa/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = "3.0"
__version__ = "3.0.1"
"""
When bumping version, please be sure to also update parameters in sphinx/conf.py
"""
Expand Down
51 changes: 30 additions & 21 deletions openoa/analysis/aep.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from openoa.utils import met_data_processing as mt
from openoa.schema import FromDictMixin, ResetValuesMixin
from openoa.logging import logging, logged_method_call
from openoa.schema.metadata import convert_frequency
from openoa.utils.machine_learning_setup import MachineLearningSetup
from openoa.analysis._analysis_validators import validate_reanalysis_selections

Expand Down Expand Up @@ -102,8 +103,8 @@ class MonteCarloAEP(FromDictMixin, ResetValuesMixin):
filter. Defaults to (1, 3).
uncertainty_nan_energy(:obj:`float`): Threshold to flag days/months based on NaNs. Defaults
to 0.01.
time_resolution(:obj:`string`): whether to perform the AEP calculation at monthly ("M"),
daily ("D") or hourly ("H") time resolution. Defaults to "M".
time_resolution(:obj:`string`): whether to perform the AEP calculation at monthly ("ME" or
"MS"), daily ("D") or hourly ("h") time resolution. Defaults to "ME".
end_date_lt(:obj:`string` or :obj:`pandas.Timestamp`): The last date to use for the
long-term correction. Note that only the component of the date corresponding to the
time_resolution argument is considered. If None, the end of the last complete month of
Expand Down Expand Up @@ -158,7 +159,11 @@ class MonteCarloAEP(FromDictMixin, ResetValuesMixin):
),
)
uncertainty_nan_energy: float = field(default=0.01, converter=float)
time_resolution: str = field(default="M", validator=attrs.validators.in_(("M", "D", "H")))
time_resolution: str = field(
default="ME",
converter=convert_frequency,
validator=attrs.validators.in_(("MS", "ME", "D", "h")),
)
end_date_lt: str | pd.Timestamp = field(default=None)
reg_model: str = field(
default="lin", converter=str, validator=attrs.validators.in_(("lin", "gbm", "etr", "gam"))
Expand Down Expand Up @@ -237,16 +242,18 @@ def __attrs_post_init__(self):

logger.info("Initializing MonteCarloAEP Analysis Object")

self.resample_freq = {"M": "MS", "D": "D", "H": "H"}[self.time_resolution]
self.resample_hours = {"M": 30 * 24, "D": 1 * 24, "H": 1}[self.time_resolution]
self.calendar_samples = {"M": 12, "D": 365, "H": 365 * 24}[self.time_resolution]
self.resample_freq = self.time_resolution
self.resample_hours = {"MS": 30 * 24, "ME": 30 * 24, "D": 1 * 24, "h": 1}[
self.time_resolution
]
self.calendar_samples = {"MS": 12, "ME": 12, "D": 365, "h": 365 * 24}[self.time_resolution]

if self.end_date_lt is not None:
# Set to the bottom of the bottom of the hour
self.end_date_lt = pd.to_datetime(self.end_date_lt).replace(minute=0)

# Monthly data can only use robust linear regression because of limited number of data
if (self.time_resolution == "M") & (self.reg_model != "lin"):
if (self.time_resolution in ("ME", "MS")) & (self.reg_model != "lin"):
raise ValueError("For monthly time resolution, only linear regression is allowed!")

# Run preprocessing step
Expand Down Expand Up @@ -305,8 +312,8 @@ def run(
filter. Defaults to (1, 3).
uncertainty_nan_energy(:obj:`float`): Threshold to flag days/months based on NaNs. Defaults
to 0.01.
time_resolution(:obj:`string`): whether to perform the AEP calculation at monthly ("M"),
daily ("D") or hourly ("H") time resolution. Defaults to "M".
time_resolution(:obj:`string`): whether to perform the AEP calculation at monthly ("ME" or
"MS"), daily ("D") or hourly ("h") time resolution. Defaults to "ME".
end_date_lt(:obj:`string` or :obj:`pandas.Timestamp`): The last date to use for the
long-term correction. Note that only the component of the date corresponding to the
time_resolution argument is considered. If None, the end of the last complete month of
Expand Down Expand Up @@ -395,11 +402,11 @@ def groupby_time_res(self, df):
None
"""

if self.time_resolution == "M":
if self.time_resolution in ("MS", "ME"):
df_grouped = df.groupby(df.index.month).mean()
elif self.time_resolution == "D":
df_grouped = df.groupby([(df.index.month), (df.index.day)]).mean()
elif self.time_resolution == "H":
elif self.time_resolution == "h":
df_grouped = df.groupby([(df.index.month), (df.index.day), (df.index.hour)]).mean()

return df_grouped
Expand All @@ -421,7 +428,7 @@ def calculate_aggregate_dataframe(self):

# Remove first and last reporting months if only partial month reported
# (only for monthly time resolution calculations)
if self.time_resolution == "M":
if self.time_resolution in ("MS", "ME"):
self.trim_monthly_df()

# Drop any data that have NaN gross energy values or NaN reanalysis data
Expand Down Expand Up @@ -449,7 +456,7 @@ def process_revenue_meter_energy(self):
tm.percent_nan
)

if self.time_resolution == "M":
if self.time_resolution in ("MS", "ME"):
# Create a column with expected number of days per month (to be used when normalizing to 30-days for regression)
days_per_month = (pd.Series(self.aggregate.index)).dt.daysinmonth
days_per_month.index = self.aggregate.index
Expand Down Expand Up @@ -542,7 +549,7 @@ def process_reanalysis_data(self):
# Next, update the start date to make sure it corresponds to a full time period, by shifting
# to either the start of the next month, or start of the next day, depending on the frequency
start_date_minus = start_date - pd.DateOffset(hours=1)
if (self.time_resolution == "M") & (start_date.month == start_date_minus.month):
if (self.time_resolution in ("MS", "ME")) & (start_date.month == start_date_minus.month):
start_date = start_date.replace(day=1, hour=0, minute=0) + pd.DateOffset(months=1)
elif (self.time_resolution == "D") & (start_date.day == start_date_minus.day):
start_date = start_date.replace(hour=0, minute=0) + pd.DateOffset(days=1)
Expand All @@ -552,7 +559,9 @@ def process_reanalysis_data(self):
if self.end_date_lt is not None:
# If valid (before the last full time period in the data), use the specified end date
end_date_lt_plus = self.end_date_lt + pd.DateOffset(hours=1)
if (self.time_resolution == "M") & (self.end_date_lt.month == end_date_lt_plus.month):
if (self.time_resolution in ("MS", "ME")) & (
self.end_date_lt.month == end_date_lt_plus.month
):
self.end_date_lt = (
self.end_date_lt.replace(day=1, hour=0, minute=0)
+ pd.DateOffset(months=1)
Expand Down Expand Up @@ -770,7 +779,7 @@ def filter_outliers(self, n):
)

if self.outlier_detection:
if self.time_resolution == "M":
if self.time_resolution in ("MS", "ME"):
# Monthly linear regression (i.e., few data points):
# flag outliers with robust linear regression using Huber algorithm

Expand Down Expand Up @@ -826,7 +835,7 @@ def filter_outliers(self, n):
valid_data_to_add = df_sub.loc[~df_sub.loc[:, "flag_final"], [f"{reanal}_WMETR_EnvTmp"]]
valid_data = pd.concat([valid_data, valid_data_to_add], axis=1)

if self.time_resolution == "M":
if self.time_resolution in ("MS", "ME"):
valid_data_to_add = df_sub.loc[~df_sub.loc[:, "flag_final"], ["num_days_expected"]]
valid_data = pd.concat([valid_data, valid_data_to_add], axis=1)

Expand Down Expand Up @@ -869,7 +878,7 @@ def set_regression_data(self, n):

# Calculate gorss energy and normalize to 30-days
mc_gross_energy = mc_energy + mc_availability + mc_curtailment
if self.time_resolution == "M":
if self.time_resolution in ("MS", "ME"):
num_days_expected = reg_data["num_days_expected"]
mc_gross_norm = mc_gross_energy * 30 / num_days_expected
else:
Expand Down Expand Up @@ -1033,7 +1042,7 @@ def run_AEP_monte_carlo(self):
)
)

if self.time_resolution == "M": # Undo normalization to 30-day months
if self.time_resolution in ("MS", "ME"): # Undo normalization to 30-day months
# Shift the list of number of days per month to align with the reanalysis data
last_month = self._reanalysis_aggregate.index[-1].month
gross_lt = (
Expand Down Expand Up @@ -1288,7 +1297,7 @@ def plot_reanalysis_gross_energy_data(
valid_aggregate = self.aggregate

# Monthly case: apply robust linear regression for outliers detection
if self.time_resolution == "M":
if self.time_resolution in ("MS", "ME"):
for name, df in self.plant.reanalysis.items():
x = sm.add_constant(valid_aggregate[name])
y = valid_aggregate["gross_energy_gwh"] * 30 / valid_aggregate["num_days_expected"]
Expand Down Expand Up @@ -1339,7 +1348,7 @@ def plot_reanalysis_gross_energy_data(

if self.time_resolution == "D":
ax.set_ylabel("Daily gross energy (GWh)")
elif self.time_resolution == "H":
elif self.time_resolution == "h":
ax.set_ylabel("Hourly gross energy (GWh)")

ax.legend(**legend_kwargs)
Expand Down
2 changes: 1 addition & 1 deletion openoa/analysis/electrical_losses.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ def __attrs_post_init__(self):

# Process the SCADA and meter data appropriately
self.process_scada()
if self.plant.metadata.meter.frequency not in ("MS", "M", "1MS"):
if self.plant.metadata.meter.frequency not in ("MS", "ME", "1MS"):
self.process_meter()
self.monthly_meter = False

Expand Down
Loading

0 comments on commit 947502c

Please sign in to comment.