From 8454cfa9a90a0a783905d7390819cf0f64f0bdb0 Mon Sep 17 00:00:00 2001 From: Fabian Neumann Date: Tue, 19 Aug 2025 09:33:37 +0200 Subject: [PATCH 01/68] update BEYONDCOAL --- powerplantmatching/data.py | 35 +++++++++++++++------ powerplantmatching/package_data/config.yaml | 5 +-- 2 files changed, 28 insertions(+), 12 deletions(-) diff --git a/powerplantmatching/data.py b/powerplantmatching/data.py index e2fea2c7..089faa3f 100644 --- a/powerplantmatching/data.py +++ b/powerplantmatching/data.py @@ -68,11 +68,16 @@ def BEYONDCOAL(raw=False, update=False, config=None): config = get_config() if config is None else config fn = get_raw_file("BEYONDCOAL", update=update, config=config) - df = pd.read_excel(fn, sheet_name="Plant", header=[0, 1, 2], skiprows=[3]) + df = pd.read_excel(fn, sheet_name="Plant", header=0, skiprows=[0,2,3]) + df.set_index("BFF plant ID", drop=False, inplace=True) if raw: return df + status_list = config["BEYONDCOAL"].get("status", ["Open"]) # noqa + + df_units = pd.read_excel(fn, sheet_name="Unit", header=0, skiprows=[0,2,3]) + RENAME_COLUMNS = { "Plant name": "Name", "Fuel type": "Fueltype", @@ -82,24 +87,34 @@ def BEYONDCOAL(raw=False, update=False, config=None): "(Announced) Retirement year of last unit": "DateOut", "Coal capacity open": "Capacity", "Plant status\n(gross)": "status", - "EBC plant ID": "projectID", + "BFF plant ID": "projectID", } phaseout_col = "Covered by country phase-out? [if yes: country phase-out year]" + df_units[phaseout_col] = pd.to_numeric(df_units[phaseout_col], errors='coerce') + unit_phaseout = df_units.groupby("BFF plant ID")[phaseout_col].max() + + # plant-level does not contain CHP information + def get_dominant_type(group): + type_capacity = group.groupby("Unit type")["Capacity"].sum() + return "CHP" if type_capacity.get("chp", 0) > type_capacity.get("conventional", 0) else "PP" + + unit_set = df_units.groupby("BFF plant ID").apply(get_dominant_type, include_groups=False) + + # for retired plants + unit_capacity = df_units.groupby("BFF plant ID").Capacity.sum() df = ( - df["Plant Data"] - .droplevel(1, axis=1) + df .rename(columns=RENAME_COLUMNS) - .query('status != "Cancelled"') + .query('status in @status_list') .assign( - DateOut=lambda df: df.DateOut.fillna(df[phaseout_col]).where( - lambda ds: ds <= 8000 - ), + DateOut=lambda df: df.rename(columns=RENAME_COLUMNS).DateOut.replace({"After 2030": np.nan, "By 2030": 2030}).infer_objects(copy=False).combine_first(unit_phaseout), projectID=lambda df: "BEYOND-" + df.projectID, - Fueltype=lambda df: df.Fueltype.str.title().replace("Unknown", "Other"), - Set="PP", + Fueltype=lambda df: df.Fueltype.str.title(), + Set=unit_set, Technology=np.nan, + Capacity=lambda df: df.Capacity.add(df["Coal capacity under construction"], fill_value=0).combine_first(unit_capacity), ) .pipe(scale_to_net_capacities) .pipe(clean_name) diff --git a/powerplantmatching/package_data/config.yaml b/powerplantmatching/package_data/config.yaml index f9b88ebe..b6d41484 100644 --- a/powerplantmatching/package_data/config.yaml +++ b/powerplantmatching/package_data/config.yaml @@ -57,8 +57,9 @@ BEYONDCOAL: net_capacity: false aggregated_units: true reliability_score: 6 - fn: Europe_Beyond_Coal-European_Coal_Database_hc5n.xlsx - url: https://beyond-coal.eu/wp-content/uploads/2021/07/2021-04-20_Europe_Beyond_Coal-European_Coal_Database_hc5n.xlsx + status: ["Construction", "Open", "Planned", "Retired"] + fn: 2025-07-24-BeyondFossilFuels-Europe_Coal_Plants_Database.xlsx + url: https://beyondfossilfuels.org/wp-content/uploads/2025/07/2025-07-24-BeyondFossilFuels-Europe_Coal_Plants_Database.xlsx IRENA: net_capacity: true aggregated_units: true From 6b06a87bead0dc2bb1606b6306918d947059d070 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 19 Aug 2025 07:38:32 +0000 Subject: [PATCH 02/68] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- powerplantmatching/data.py | 34 ++++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/powerplantmatching/data.py b/powerplantmatching/data.py index 089faa3f..7995fc58 100644 --- a/powerplantmatching/data.py +++ b/powerplantmatching/data.py @@ -68,15 +68,15 @@ def BEYONDCOAL(raw=False, update=False, config=None): config = get_config() if config is None else config fn = get_raw_file("BEYONDCOAL", update=update, config=config) - df = pd.read_excel(fn, sheet_name="Plant", header=0, skiprows=[0,2,3]) + df = pd.read_excel(fn, sheet_name="Plant", header=0, skiprows=[0, 2, 3]) df.set_index("BFF plant ID", drop=False, inplace=True) if raw: return df - status_list = config["BEYONDCOAL"].get("status", ["Open"]) # noqa + status_list = config["BEYONDCOAL"].get("status", ["Open"]) # noqa - df_units = pd.read_excel(fn, sheet_name="Unit", header=0, skiprows=[0,2,3]) + df_units = pd.read_excel(fn, sheet_name="Unit", header=0, skiprows=[0, 2, 3]) RENAME_COLUMNS = { "Plant name": "Name", @@ -91,30 +91,40 @@ def BEYONDCOAL(raw=False, update=False, config=None): } phaseout_col = "Covered by country phase-out? [if yes: country phase-out year]" - df_units[phaseout_col] = pd.to_numeric(df_units[phaseout_col], errors='coerce') + df_units[phaseout_col] = pd.to_numeric(df_units[phaseout_col], errors="coerce") unit_phaseout = df_units.groupby("BFF plant ID")[phaseout_col].max() # plant-level does not contain CHP information def get_dominant_type(group): type_capacity = group.groupby("Unit type")["Capacity"].sum() - return "CHP" if type_capacity.get("chp", 0) > type_capacity.get("conventional", 0) else "PP" + return ( + "CHP" + if type_capacity.get("chp", 0) > type_capacity.get("conventional", 0) + else "PP" + ) + + unit_set = df_units.groupby("BFF plant ID").apply( + get_dominant_type, include_groups=False + ) - unit_set = df_units.groupby("BFF plant ID").apply(get_dominant_type, include_groups=False) - # for retired plants unit_capacity = df_units.groupby("BFF plant ID").Capacity.sum() df = ( - df - .rename(columns=RENAME_COLUMNS) - .query('status in @status_list') + df.rename(columns=RENAME_COLUMNS) + .query("status in @status_list") .assign( - DateOut=lambda df: df.rename(columns=RENAME_COLUMNS).DateOut.replace({"After 2030": np.nan, "By 2030": 2030}).infer_objects(copy=False).combine_first(unit_phaseout), + DateOut=lambda df: df.rename(columns=RENAME_COLUMNS) + .DateOut.replace({"After 2030": np.nan, "By 2030": 2030}) + .infer_objects(copy=False) + .combine_first(unit_phaseout), projectID=lambda df: "BEYOND-" + df.projectID, Fueltype=lambda df: df.Fueltype.str.title(), Set=unit_set, Technology=np.nan, - Capacity=lambda df: df.Capacity.add(df["Coal capacity under construction"], fill_value=0).combine_first(unit_capacity), + Capacity=lambda df: df.Capacity.add( + df["Coal capacity under construction"], fill_value=0 + ).combine_first(unit_capacity), ) .pipe(scale_to_net_capacities) .pipe(clean_name) From 0002ee26c0ebf39c66a3df389141113ac4b0df96 Mon Sep 17 00:00:00 2001 From: Fabian Neumann Date: Tue, 19 Aug 2025 09:47:17 +0200 Subject: [PATCH 03/68] update JRC --- powerplantmatching/package_data/config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/powerplantmatching/package_data/config.yaml b/powerplantmatching/package_data/config.yaml index b6d41484..d49ed078 100644 --- a/powerplantmatching/package_data/config.yaml +++ b/powerplantmatching/package_data/config.yaml @@ -80,7 +80,7 @@ ENTSOE-EIC: JRC: reliability_score: 4 fn: jrc-hydro-power-plant-database.csv - url: https://raw.githubusercontent.com/energy-modelling-toolkit/hydro-power-database/fd7535c/data/jrc-hydro-power-plant-database.csv + url: https://raw.githubusercontent.com/energy-modelling-toolkit/hydro-power-database/27e80f/data/jrc-hydro-power-plant-database.csv GEO: net_capacity: false reliability_score: 3 From cdefa46fe1f3ba2f31c1095e0133838ccad60a64 Mon Sep 17 00:00:00 2001 From: Fabian Neumann Date: Tue, 19 Aug 2025 09:50:31 +0200 Subject: [PATCH 04/68] update IRENASTAT --- powerplantmatching/package_data/config.yaml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/powerplantmatching/package_data/config.yaml b/powerplantmatching/package_data/config.yaml index d49ed078..9404ff13 100644 --- a/powerplantmatching/package_data/config.yaml +++ b/powerplantmatching/package_data/config.yaml @@ -63,8 +63,9 @@ BEYONDCOAL: IRENA: net_capacity: true aggregated_units: true - fn: IRENASTAT_capacities_2000-2023.csv - url: https://zenodo.org/records/10952917/files/IRENASTAT_capacities_2000-2023.csv + fn: IRENASTAT_capacities_2000-2024.csv + # compiled from https://pxweb.irena.org/pxweb/en/IRENASTAT/IRENASTAT__Power%20Capacity%20and%20Generation/Country_ELECSTAT_2025_H2_PX.px/ + url: https://tubcloud.tu-berlin.de/s/p2D5E9MLWE8HPHE/download/IRENASTAT_capacities_2000-2024.csv CARMA: net_capacity: false reliability_score: 1 From 2043225588bb8b540c193fe685657a5436a90d9c Mon Sep 17 00:00:00 2001 From: Fabian Neumann Date: Tue, 19 Aug 2025 10:18:39 +0200 Subject: [PATCH 05/68] update GGTPT --- powerplantmatching/data.py | 6 +++--- powerplantmatching/package_data/config.yaml | 7 ++++--- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/powerplantmatching/data.py b/powerplantmatching/data.py index 7995fc58..90f7f401 100644 --- a/powerplantmatching/data.py +++ b/powerplantmatching/data.py @@ -1846,11 +1846,11 @@ def GGTPT(raw=False, update=False, config=None): RENAME_COLUMNS = { "Project Name": "Name", - "Capacity (MW)": "Capacity", + "Unit Capacity (MW)": "Capacity", "Latitude": "lat", "Longitude": "lon", - "Start year": "DateIn", - "Retired year": "DateOut", + "Start Year": "DateIn", + "Retired Year": "DateOut", "Country/Area": "Country", "GEM unit ID": "projectID", } diff --git a/powerplantmatching/package_data/config.yaml b/powerplantmatching/package_data/config.yaml index 9404ff13..52b22792 100644 --- a/powerplantmatching/package_data/config.yaml +++ b/powerplantmatching/package_data/config.yaml @@ -173,9 +173,10 @@ GCPT: GGTPT: net_capacity: false reliability_score: 4 - status: ["operating", "retired", "construction"] - fn: Geothermal-Power-Tracker-May-2024.xlsx - url: https://tubcloud.tu-berlin.de/s/Hz3ZD7YcKnZTs9t/download/Geothermal-Power-Tracker-May-2024.xlsx + aggregated_units: false + status: ["operating", "retired", "construction", "mothballed"] + fn: Geothermal-Power-Tracker-March-2025-Final.xlsx + url: https://tubcloud.tu-berlin.de/s/dNoEsLeGtCWDkoc/download/Geothermal-Power-Tracker-March-2025-Final.xlsx GWPT: net_capacity: false reliability_score: 4 From 44371498c8de8276b6f6dcf54a03f11bf93be1ae Mon Sep 17 00:00:00 2001 From: Fabian Neumann Date: Tue, 19 Aug 2025 10:53:44 +0200 Subject: [PATCH 06/68] update GCPT --- powerplantmatching/data.py | 23 ++++++++++++--------- powerplantmatching/package_data/config.yaml | 4 ++-- 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/powerplantmatching/data.py b/powerplantmatching/data.py index 90f7f401..a31c3fc6 100644 --- a/powerplantmatching/data.py +++ b/powerplantmatching/data.py @@ -1764,7 +1764,7 @@ def GCPT(raw=False, update=False, config=None): config = get_config() if config is None else config fn = get_raw_file("GCPT", update=update, config=config) - df = pd.read_excel(fn, sheet_name="Units") + df = pd.read_excel(fn, sheet_name="Units", na_values=["not found"]) if raw: return df @@ -1782,22 +1782,24 @@ def GCPT(raw=False, update=False, config=None): "GEM unit/phase ID": "projectID", } fueltype_dict = { + "anthracite": "Hard Coal", "bituminous": "Hard Coal", + "bituminous with CCS": "Hard Coal", "lignite": "Lignite", - "unknown": "Hard Coal", - "subbituminous": "Hard Coal", - "waste coal": "Hard Coal", - "anthracite": "Hard Coal", "lignite with CCS": "Lignite", - "bituminous with CCS": "Hard Coal", + "subbituminous": "Hard Coal", "subbituminous with CCS": "Hard Coal", + "unknown": "Hard Coal", "unknown with CCS": "Hard Coal", + "waste coal": "Hard Coal", } planned_retirement = df["Planned retirement"].apply(pd.to_numeric, errors="coerce") status_list = config["GCPT"].get("status", ["operating"]) # noqa: F841 + BTU_PER_KWH = 3412.14 + df = df.rename(columns=RENAME_COLUMNS) df_final = ( df.pipe(clean_name) @@ -1806,16 +1808,17 @@ def GCPT(raw=False, update=False, config=None): .dropna(subset="Capacity") .assign( DateIn=df["DateIn"].apply(pd.to_numeric, errors="coerce"), - DateOut=df["DateOut"].apply(pd.to_numeric, errors="coerce"), + DateOut=df["DateOut"] + .apply(pd.to_numeric, errors="coerce") + .combine_first(planned_retirement), lat=df["lat"].apply(pd.to_numeric, errors="coerce"), lon=df["lon"].apply(pd.to_numeric, errors="coerce"), + Set=df["CHP"].replace({"yes": "CHP", "no": "PP"}), + Efficiency=BTU_PER_KWH / df["Heat rate (Btu per kWh)"], ) - .assign(DateOut=lambda x: x["DateOut"].combine_first(planned_retirement)) .query("Status in @status_list") .pipe(lambda x: x[df.columns.intersection(config.get("target_columns"))]) .pipe(lambda x: x.replace({"Fueltype": fueltype_dict})) - .pipe(lambda x: x.assign(Technology="Steam Turbine")) - .pipe(lambda x: x.assign(Set="PP")) .pipe(config_filter, config) ) diff --git a/powerplantmatching/package_data/config.yaml b/powerplantmatching/package_data/config.yaml index 52b22792..f961fcab 100644 --- a/powerplantmatching/package_data/config.yaml +++ b/powerplantmatching/package_data/config.yaml @@ -168,8 +168,8 @@ GCPT: net_capacity: false reliability_score: 4 status: ["operating", "retired", "construction"] - fn: Global-Coal-Plant-Tracker-July-2024.xlsx - url: https://tubcloud.tu-berlin.de/s/FdyKMZtr2ddRJEd/download/Global-Coal-Plant-Tracker-July-2024.xlsx + fn: Global-Coal-Plant-Tracker-July-2025.xlsx + url: https://tubcloud.tu-berlin.de/s/etMB7qawKNwfgnk/download/Global-Coal-Plant-Tracker-July-2025.xlsx GGTPT: net_capacity: false reliability_score: 4 From 309b22509e6a0d2569243381b1e79ca6f453d9e9 Mon Sep 17 00:00:00 2001 From: Fabian Neumann Date: Tue, 19 Aug 2025 12:23:45 +0200 Subject: [PATCH 07/68] update GCPT --- powerplantmatching/data.py | 50 +++++++++++---------- powerplantmatching/package_data/config.yaml | 4 +- 2 files changed, 29 insertions(+), 25 deletions(-) diff --git a/powerplantmatching/data.py b/powerplantmatching/data.py index a31c3fc6..4d9ed713 100644 --- a/powerplantmatching/data.py +++ b/powerplantmatching/data.py @@ -2028,42 +2028,54 @@ def GGPT(raw=False, update=False, config=None): """ config = get_config() if config is None else config fn = get_raw_file("GGPT", update=update, config=config) - df = pd.read_excel(fn, sheet_name="Gas & Oil Units") + df = pd.read_excel(fn, sheet_name="Gas & Oil Units", na_values=["not found"]) + df_small = pd.read_excel(fn, sheet_name="sub-threshold units", na_values=["not found"]) + df = pd.concat([df, df_small], ignore_index=True) if raw: return df RENAME_COLUMNS = { "Plant name": "Name", - "Fuel": "Fueltype", "Capacity (MW)": "Capacity", "Latitude": "lat", "Longitude": "lon", "Start year": "DateIn", "Retired year": "DateOut", "CHP": "Set", + "Fuel": "Fueltype", "GEM location ID": "projectID", + "Country/Area": "Country", + "Turbine/Engine Technology": "Technology", } + def classify_fuel(s): + if s["Fuel classification?"] in ["Gas only", "LNG only"]: + return "Natural Gas" + elif s["Fuel classification?"] == "Oil only": + return "Oil" + elif s["Fueltype"].startswith("fossil liquids"): + return "Oil" + else: + return "Natural Gas" + technology_dict = { - "GT": "Steam Turbine", - "IC": "Steam Turbine", - "CC": "CCGT", - "GT/IC": "Steam Turbine", + "gas turbine": "Steam Turbine", + "internal combustion": "Steam Turbine", + "combined cycle": "CCGT", "ICCC": "CCGT", "ISCC": "CCGT", - "ST": "Steam Turbine", + "steam turbine": "Steam Turbine", "AFC": "CCGT", + "unknown": np.nan, } set_dict = { - "Y": "CHP", - "N": "PP", - "not found": "PP", + "yes": "CHP", + "no": "PP", } status_list = config["GGPT"].get("status", ["operating"]) # noqa: F841 - gas_fuels = ["NG", "LNG", "BU", "LFG", "BG", "BFG", "COG", "CM", "H", "OG"] df = df.rename(columns=RENAME_COLUMNS) df_final = ( @@ -2071,26 +2083,18 @@ def GGPT(raw=False, update=False, config=None): .pipe(set_column_name, "GGPT") .pipe(convert_to_short_name) .dropna(subset="Capacity") - .pipe(lambda x: x.query("Capacity != 'not found'")) .assign( DateIn=df["DateIn"].apply(pd.to_numeric, errors="coerce"), - DateOut=df["DateOut"].apply(pd.to_numeric, errors="coerce"), + DateOut=df["DateOut"].apply(pd.to_numeric, errors="coerce").combine_first(df["Planned retire"]), lat=df["lat"].apply(pd.to_numeric, errors="coerce"), lon=df["lon"].apply(pd.to_numeric, errors="coerce"), - Capacity=lambda df: pd.to_numeric(df.Capacity, "coerce"), - Fueltype=df["Fueltype"].apply( - lambda s: ( - "Natural Gas" - if any(sub in gas_fuels for sub in s.split("/")) - else "Oil" - ) - ), + Capacity=df["Capacity"].apply(pd.to_numeric, errors="coerce"), + Fueltype=df.apply(classify_fuel, axis=1), ) .query("Status in @status_list") .pipe(lambda x: x[df.columns.intersection(config.get("target_columns"))]) .pipe(lambda x: x.replace({"Technology": technology_dict})) - .pipe(lambda x: x.replace({"Set": set_dict}).fillna({"Set": "PP"})) - .assign(Fueltype="Natural Gas") + .pipe(lambda x: x.replace({"Set": set_dict})) .pipe(config_filter, config) ) return df_final diff --git a/powerplantmatching/package_data/config.yaml b/powerplantmatching/package_data/config.yaml index f961fcab..345a20e2 100644 --- a/powerplantmatching/package_data/config.yaml +++ b/powerplantmatching/package_data/config.yaml @@ -158,8 +158,8 @@ GGPT: net_capacity: false reliability_score: 5 status: ["operating", "retired", "construction"] - fn: Global-Oil-and-Gas-Plant-Tracker-GOGPT-February-2024-v4.xlsx - url: https://tubcloud.tu-berlin.de/s/Be5arQgT9Z9g8Kp/download/Global-Oil-and-Gas-Plant-Tracker-GOGPT-February-2024-v4.xlsx + fn: Global-Oil-and-Gas-Plant-Tracker-GOGPT-August-2025.xlsx + url: https://tubcloud.tu-berlin.de/s/aKrt7dyNgazmgAm/download/Global-Oil-and-Gas-Plant-Tracker-GOGPT-August-2025.xlsx GEM: # combined data set of all GEM trackers net_capacity: true From 65d1c6d5dcaef96a7f7729fcf69733a566f7148b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 19 Aug 2025 10:23:55 +0000 Subject: [PATCH 08/68] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- powerplantmatching/data.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/powerplantmatching/data.py b/powerplantmatching/data.py index 4d9ed713..5b7312e6 100644 --- a/powerplantmatching/data.py +++ b/powerplantmatching/data.py @@ -2029,7 +2029,9 @@ def GGPT(raw=False, update=False, config=None): config = get_config() if config is None else config fn = get_raw_file("GGPT", update=update, config=config) df = pd.read_excel(fn, sheet_name="Gas & Oil Units", na_values=["not found"]) - df_small = pd.read_excel(fn, sheet_name="sub-threshold units", na_values=["not found"]) + df_small = pd.read_excel( + fn, sheet_name="sub-threshold units", na_values=["not found"] + ) df = pd.concat([df, df_small], ignore_index=True) if raw: @@ -2085,7 +2087,9 @@ def classify_fuel(s): .dropna(subset="Capacity") .assign( DateIn=df["DateIn"].apply(pd.to_numeric, errors="coerce"), - DateOut=df["DateOut"].apply(pd.to_numeric, errors="coerce").combine_first(df["Planned retire"]), + DateOut=df["DateOut"] + .apply(pd.to_numeric, errors="coerce") + .combine_first(df["Planned retire"]), lat=df["lat"].apply(pd.to_numeric, errors="coerce"), lon=df["lon"].apply(pd.to_numeric, errors="coerce"), Capacity=df["Capacity"].apply(pd.to_numeric, errors="coerce"), From 43bef198095590fd7147fb875b753df6ebbab879 Mon Sep 17 00:00:00 2001 From: Fabian Neumann Date: Tue, 19 Aug 2025 13:35:13 +0200 Subject: [PATCH 09/68] update GWPT --- powerplantmatching/data.py | 5 +++++ powerplantmatching/package_data/config.yaml | 4 ++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/powerplantmatching/data.py b/powerplantmatching/data.py index 5b7312e6..04494c6c 100644 --- a/powerplantmatching/data.py +++ b/powerplantmatching/data.py @@ -1900,6 +1900,11 @@ def GWPT(raw=False, update=False, config=None): config = get_config() if config is None else config fn = get_raw_file("GWPT", update=update, config=config) df = pd.read_excel(fn, sheet_name="Data") + df_small = pd.read_excel(fn, sheet_name="Below Threshold") + df = pd.concat([df, df_small], ignore_index=True) + + if raw: + return df RENAME_COLUMNS = { "Project Name": "Name", diff --git a/powerplantmatching/package_data/config.yaml b/powerplantmatching/package_data/config.yaml index 345a20e2..2c983fa2 100644 --- a/powerplantmatching/package_data/config.yaml +++ b/powerplantmatching/package_data/config.yaml @@ -181,8 +181,8 @@ GWPT: net_capacity: false reliability_score: 4 status: ["operating", "retired", "construction"] - fn: Global-Wind-Power-Tracker-June-2024.xlsx - url: https://tubcloud.tu-berlin.de/s/Z9b3WkAJmSnsrHD/download/Global-Wind-Power-Tracker-June-2024.xlsx + fn: Global-Wind-Power-Tracker-February-2025.xlsx + url: https://tubcloud.tu-berlin.de/s/8NSXSjPmJPXpg4W/download/Global-Wind-Power-Tracker-February-2025.xlsx GSPT: net_capacity: false reliability_score: 4 From 88df2b633fa6d28b2131ba66c2ac59e2592c2ad4 Mon Sep 17 00:00:00 2001 From: Fabian Neumann Date: Tue, 19 Aug 2025 13:44:04 +0200 Subject: [PATCH 10/68] update GSPT --- powerplantmatching/package_data/config.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/powerplantmatching/package_data/config.yaml b/powerplantmatching/package_data/config.yaml index 2c983fa2..0f9bf13e 100644 --- a/powerplantmatching/package_data/config.yaml +++ b/powerplantmatching/package_data/config.yaml @@ -186,9 +186,9 @@ GWPT: GSPT: net_capacity: false reliability_score: 4 - status: ["operating", "construction"] - fn: Global-Solar-Power-Tracker-June-2024.xlsx - url: https://tubcloud.tu-berlin.de/s/tJ5K5rA2e5XaNjM/download/Global-Solar-Power-Tracker-June-2024.xlsx + status: ["operating", "retired", "construction"] + fn: Global-Solar-Power-Tracker-February-2025.xlsx + url: https://tubcloud.tu-berlin.de/s/7eo4dZXMp6eB3mz/download/Global-Solar-Power-Tracker-February-2025.xlsx GBPT: net_capacity: false reliability_score: 4 From 01b790b0a1a518518464ff977b5f3a4c20c5d0c9 Mon Sep 17 00:00:00 2001 From: Fabian Neumann Date: Tue, 19 Aug 2025 14:27:35 +0200 Subject: [PATCH 11/68] update GBPT --- powerplantmatching/data.py | 53 ++++++++++++--------- powerplantmatching/package_data/config.yaml | 6 +-- 2 files changed, 33 insertions(+), 26 deletions(-) diff --git a/powerplantmatching/data.py b/powerplantmatching/data.py index 04494c6c..5e8092dc 100644 --- a/powerplantmatching/data.py +++ b/powerplantmatching/data.py @@ -1632,37 +1632,44 @@ def GBPT(raw=False, update=False, config=None): """ config = get_config() if config is None else config fn = get_raw_file("GBPT", update=update, config=config) - df = pd.read_excel(fn, sheet_name="Data") + large = pd.read_excel(fn, sheet_name="Data") + small = pd.read_excel(fn, sheet_name="Below Threshold") + df = pd.concat([large, small], ignore_index=True) if raw: return df RENAME_COLUMNS = { - "Project name": "Name", + "Project Name": "Name", "Capacity (MW)": "Capacity", - "Fuel 1": "Fueltype", - "Operating status": "Status", + "Fuel": "Fueltype", "Latitude": "lat", "Longitude": "lon", - "Unit start year": "DateIn", - "Retired year": "DateOut", + "Start Year": "DateIn", + "Retired Year": "DateOut", + "Country/Area": "Country", "GEM phase ID": "projectID", } + fueltype_dict = { - "bioenergy - agricultural waste (solids)": "Solid Biomass", - "bioenergy - refuse (municipal and industrial wastes)": "Solid Biomass", - "bioenergy - refuse (syngas)": "Solid Biomass", - "bioenergy - agricultural waste (biogas)": "Biogas", - "bioenergy - wood & other biomass (solids)": "Solid Biomass", - "bioenergy - ethanol": "Solid Biomass", - "bioenergy - paper mill wastes": "Solid Biomass", - "bioenergy - biodiesel": "Solid Biomass", - "bioenergy - unknown": "Solid Biomass", - "bioenergy - wastewater and sewage sludge (solids or biogas)": "Solid Biomass", - "bioenergy - refuse (landfill gas)": "Biogas", - "bioenergy - agricultural waste (unknown)": "Solid Biomass", - "bioenergy - agricultural waste (syngas)": "Solid Biomass", - "bioenergy - wood & other biomass (biocoal)": "Solid Biomass", + # solid biomass + "bioenergy: agricultural waste (solids)": "Solid Biomass", + "bioenergy: agricultural waste (unknown)": "Solid Biomass", + "bioenergy: paper mill wastes": "Solid Biomass", + "bioenergy: unknown": "Solid Biomass", + "bioenergy: wood & other biomass (biocoal)": "Solid Biomass", + "bioenergy: wood & other biomass (solids)": "Solid Biomass", + "bioenergy: agricultural waste (syngas)": "Solid Biomass", + # biogas + "bioenergy: agricultural waste (biogas)": "Biogas", + "bioenergy: refuse (landfill gas)": "Biogas", + "bioenergy: wastewater and sewage sludge (solids or biogas)": "Biogas", + # oil + "bioenergy: ethanol": "Oil", + "bioenergy: biodiesel": "Oil", + # waste + "bioenergy: refuse (municipal and industrial wastes)": "Waste", + "bioenergy: refuse (syngas)": "Solid Biomass", } status_list = config["GBPT"].get("status", ["operating"]) # noqa: F841 @@ -1678,12 +1685,12 @@ def GBPT(raw=False, update=False, config=None): DateOut=df["DateOut"].apply(pd.to_numeric, errors="coerce"), lat=df["lat"].apply(pd.to_numeric, errors="coerce"), lon=df["lon"].apply(pd.to_numeric, errors="coerce"), + Fueltype=df["Fueltype"].apply(lambda v: fueltype_dict[v.split(",")[0].strip()]) ) .query("Status in @status_list") .pipe(lambda x: x[df.columns.intersection(config.get("target_columns"))]) - .pipe(lambda x: x.replace({"Fueltype": fueltype_dict})) - .assign(Technology="Steam Turbine") - .assign(Set="PP") + .assign(Technology=np.nan) + .assign(Set=np.nan) .pipe(config_filter, config) ) return df_final diff --git a/powerplantmatching/package_data/config.yaml b/powerplantmatching/package_data/config.yaml index 0f9bf13e..c6f94d4a 100644 --- a/powerplantmatching/package_data/config.yaml +++ b/powerplantmatching/package_data/config.yaml @@ -193,8 +193,8 @@ GBPT: net_capacity: false reliability_score: 4 status: ["operating", "retired", "construction"] - fn: Global-Bioenergy-Power-Tracker-GBPT-V1.xlsx - url: https://tubcloud.tu-berlin.de/s/F34bbwcxYHL9ZR4/download/Global-Bioenergy-Power-Tracker-GBPT-V1.xlsx + fn: Global-Bioenergy-Power-Tracker-GBPT-September-2024.xlsx + url: https://tubcloud.tu-berlin.de/s/CzMBKe2rAcsoq7c/download/Global-Bioenergy-Power-Tracker-GBPT-September-2024.xlsx GNPT: net_capacity: false reliability_score: 4 @@ -315,7 +315,7 @@ target_fueltypes: Oil: [oil, diesel, mineralölprodukte] Geothermal: "" Solar: "" - Waste: ["abfall.*", "waste"] + Waste: ["abfall.*", "waste", "mva", "müll", "afval", "energy recovery", "incineration"] Wind: "" Battery: [Electro-chemical, battery] target_sets: From cfc8c31d86db8ee7e56826ea978a4d7bbede2d5e Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 19 Aug 2025 12:27:45 +0000 Subject: [PATCH 12/68] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- powerplantmatching/data.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/powerplantmatching/data.py b/powerplantmatching/data.py index 5e8092dc..a37d58d1 100644 --- a/powerplantmatching/data.py +++ b/powerplantmatching/data.py @@ -1660,7 +1660,7 @@ def GBPT(raw=False, update=False, config=None): "bioenergy: wood & other biomass (biocoal)": "Solid Biomass", "bioenergy: wood & other biomass (solids)": "Solid Biomass", "bioenergy: agricultural waste (syngas)": "Solid Biomass", - # biogas + # biogas "bioenergy: agricultural waste (biogas)": "Biogas", "bioenergy: refuse (landfill gas)": "Biogas", "bioenergy: wastewater and sewage sludge (solids or biogas)": "Biogas", @@ -1685,7 +1685,9 @@ def GBPT(raw=False, update=False, config=None): DateOut=df["DateOut"].apply(pd.to_numeric, errors="coerce"), lat=df["lat"].apply(pd.to_numeric, errors="coerce"), lon=df["lon"].apply(pd.to_numeric, errors="coerce"), - Fueltype=df["Fueltype"].apply(lambda v: fueltype_dict[v.split(",")[0].strip()]) + Fueltype=df["Fueltype"].apply( + lambda v: fueltype_dict[v.split(",")[0].strip()] + ), ) .query("Status in @status_list") .pipe(lambda x: x[df.columns.intersection(config.get("target_columns"))]) From 285f9bce0cb003ab027c97fbf12de0b7ac602117 Mon Sep 17 00:00:00 2001 From: Fabian Neumann Date: Tue, 19 Aug 2025 14:49:39 +0200 Subject: [PATCH 13/68] update GHPT --- powerplantmatching/data.py | 9 ++++++--- powerplantmatching/package_data/config.yaml | 8 ++++++-- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/powerplantmatching/data.py b/powerplantmatching/data.py index a37d58d1..b2b6970b 100644 --- a/powerplantmatching/data.py +++ b/powerplantmatching/data.py @@ -2135,7 +2135,9 @@ def GHPT(raw=False, update=False, config=None): """ config = get_config() if config is None else config fn = get_raw_file("GHPT", update=update, config=config) - df = pd.read_excel(fn, sheet_name="Data") + large = pd.read_excel(fn, sheet_name="Data") + small = pd.read_excel(fn, sheet_name="Below Threshold") + df = pd.concat([large, small], ignore_index=True) if raw: return df @@ -2148,7 +2150,7 @@ def GHPT(raw=False, update=False, config=None): "Start Year": "DateIn", "Retired Year": "DateOut", "GEM unit ID": "projectID", - "Country 1": "Country", + "Country/Area 1": "Country", "Technology Type": "Technology", } technology_dict = { @@ -2156,7 +2158,8 @@ def GHPT(raw=False, update=False, config=None): "pumped storage": "Pumped Storage", "run-of-river": "Run-Of-River", "conventional and pumped storage": "Pumped Storage", - "conventional and run-of-river": "Run-Of-River", + "conventional and run-of-river": "Reservoir", + "unknown": "Run-Of-River", } status_list = config["GHPT"].get("status", ["operating"]) # noqa: F841 df = df.rename(columns=RENAME_COLUMNS) diff --git a/powerplantmatching/package_data/config.yaml b/powerplantmatching/package_data/config.yaml index c6f94d4a..2442fbfc 100644 --- a/powerplantmatching/package_data/config.yaml +++ b/powerplantmatching/package_data/config.yaml @@ -205,8 +205,8 @@ GHPT: net_capacity: false reliability_score: 4 status: ["operating", "retired", "construction"] - fn: Global-Hydropower-Tracker-April-2024.xlsx - url: https://tubcloud.tu-berlin.de/s/sEztyBLdJS5sNHY/download/Global-Hydropower-Tracker-April-2024.xlsx + fn: Global-Hydropower-Tracker-April-2025.xlsx + url: https://tubcloud.tu-berlin.de/s/2xqxRmfP4FKTrLf/download/Global-Hydropower-Tracker-April-2025.xlsx MASTR: net_capacity: true @@ -306,9 +306,13 @@ target_fueltypes: run-of-river, ror, hydro, + hidro, hydroelectric, wasserkraft, wasser, + vannkraft, + wodna, + idroelettrica, ] Hard Coal: [coal, coke, steinkohle] Lignite: [brown coal, lignite, peat, braunkohle] From c8861842947dfbb62e5196397a3e263ec8fbda3c Mon Sep 17 00:00:00 2001 From: Fabian Neumann Date: Tue, 19 Aug 2025 15:07:21 +0200 Subject: [PATCH 14/68] match fueltypes in other languages --- powerplantmatching/package_data/config.yaml | 40 +++++++++++++++++---- 1 file changed, 33 insertions(+), 7 deletions(-) diff --git a/powerplantmatching/package_data/config.yaml b/powerplantmatching/package_data/config.yaml index 2442fbfc..e1a5615b 100644 --- a/powerplantmatching/package_data/config.yaml +++ b/powerplantmatching/package_data/config.yaml @@ -282,9 +282,9 @@ target_fueltypes: # given by the list. An empty string results in a regex expression containing only the key. # Parsed of representatives at the top may be overwritten by representatives further below. Other: ".*" - Solid Biomass: [biological, bioenergy, agricultural, wood, biomass, feste biomasse] - Biogas: [biogas, biomethan, gasförmige biomasse] - Nuclear: [nuclear] + Solid Biomass: [biological, bioenergy, agricultural, wood, biomass, feste biomasse, biomasa, biomassa] + Biogas: [biogas, biogaz, biomethan, gasförmige biomasse] + Nuclear: [nuclear, kernkraft, atomkraft, nucléaire, atomowa, jądrowa, kjernekraft, kern, atom, atoom] Natural Gas: [ ccgt, @@ -297,6 +297,13 @@ target_fueltypes: mixed fossil fuels, erdgas, andere gase, + gaz, + gaz naturel, + gas natural, + naturgass, + gaz ziemny, + gass, + aardgas, ] Hydro: [ @@ -307,19 +314,23 @@ target_fueltypes: ror, hydro, hidro, + hydraulique, hydroelectric, wasserkraft, + waterkracht, wasser, vannkraft, + vattenkraft, wodna, idroelettrica, + idraulica, ] - Hard Coal: [coal, coke, steinkohle] - Lignite: [brown coal, lignite, peat, braunkohle] - Oil: [oil, diesel, mineralölprodukte] + Hard Coal: [coal, coke, steinkohle, houille, charbon dur, hulla, carbón duro, carbone duro, antracite, steinkul, węgiel kamienny, steenkool] + Lignite: [brown coal, lignite, peat, braunkohle, ligni.*, brunatny, brunkul, bruinkool] + Oil: [oil, diesel, mineralölprodukte, fioul, mazout, petrol, olio, olej, carburante, olie] Geothermal: "" Solar: "" - Waste: ["abfall.*", "waste", "mva", "müll", "afval", "energy recovery", "incineration"] + Waste: ["abfall.*", "waste", "mva", "müll", "afval", "affald", "energy recovery", "incineration", "reststoffe", "refuse", "déchets", "ordures", "residuos", "basura", "rifiuti", "scarti", "odpady", "śmieci"] Wind: "" Battery: [Electro-chemical, battery] target_sets: @@ -341,6 +352,21 @@ target_sets: power and heat, heat and power, chp, + cogen, + heat & power, + power & heat, + cogeneración, + cogenerazione, + kogeneracja, + combinada calor y electricidad, + kraftvarmeverk, + kraftvarmeværk, + samproduktion, + samproduksjon, + kvv, + wkk, + warmtekrachtkoppeling, + warmte-krachtcentrale, ] Store: [battery, storage, store] target_technologies: From 194133330935f1a31a8c1daec6b472520223e858 Mon Sep 17 00:00:00 2001 From: Fabian Neumann Date: Tue, 19 Aug 2025 15:14:31 +0200 Subject: [PATCH 15/68] update ENTSOE-EIC --- powerplantmatching/package_data/config.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/powerplantmatching/package_data/config.yaml b/powerplantmatching/package_data/config.yaml index e1a5615b..4583d75e 100644 --- a/powerplantmatching/package_data/config.yaml +++ b/powerplantmatching/package_data/config.yaml @@ -76,8 +76,8 @@ ENTSOE: url: https://raw.githubusercontent.com/pypsa-meets-earth/ppm-data-backup/main/entsoe_powerplants.csv fn: entsoe_powerplants.csv ENTSOE-EIC: - url: https://eepublicdownloads.entsoe.eu/eic-codes-csv/W_eiccodes.csv - fn: entsoe_eic_codes.csv + url: https://eepublicdownloads.blob.core.windows.net/cio-lio/csv/W_eicCodes.csv + fn: W_eicCodes.csv JRC: reliability_score: 4 fn: jrc-hydro-power-plant-database.csv From 53e1a8c30143f329c24998b52e82c03cfcd1423e Mon Sep 17 00:00:00 2001 From: Fabian Neumann Date: Wed, 20 Aug 2025 11:24:45 +0200 Subject: [PATCH 16/68] update MASTR data processing --- powerplantmatching/data.py | 80 ++++++++++++++++--- .../package_data/PLZ_Coords_map.csv | 8 ++ powerplantmatching/package_data/config.yaml | 21 ++--- 3 files changed, 88 insertions(+), 21 deletions(-) diff --git a/powerplantmatching/data.py b/powerplantmatching/data.py index b2b6970b..961ad236 100644 --- a/powerplantmatching/data.py +++ b/powerplantmatching/data.py @@ -35,7 +35,7 @@ gather_specifications, ) from .core import _package_data, get_config -from .heuristics import scale_to_net_capacities +from .heuristics import scale_to_net_capacities, PLZ_to_LatLon_map from .utils import ( config_filter, convert_to_short_name, @@ -2226,8 +2226,11 @@ def MASTR( defaults to powerplantmatching.config.get_config() """ + config = get_config() if config is None else config + THRESHOLD_KW = 1000 # noqa: F841 + RENAME_COLUMNS = { "EinheitMastrNummer": "projectID", "NameKraftwerk": "Name", @@ -2251,6 +2254,7 @@ def MASTR( "Energietraeger", "Hauptbrennstoff", "NameStromerzeugungseinheit", + "Technologie", ] fn = get_raw_file("MASTR", update=update, config=config) @@ -2261,6 +2265,7 @@ def MASTR( "Hydro": "hydro_raw.csv", "Wind": "wind_raw.csv", "Solar": "solar_raw.csv", + "Storage": "bnetza_mastr_storage_raw.csv" } data_frames = [] with ZipFile(fn, "r") as file: @@ -2272,6 +2277,13 @@ def MASTR( "GeplantesInbetriebnahmedatum", "ThermischeNutzleistung", "KwkMastrNummer", + "Batterietechnologie", + "DatumBeginnVoruebergehendeStilllegung", + "DatumWiederaufnahmeBetrieb", + "Postleitzahl", + "Ort", + "Gemeinde", + "Landkreis", ] target_columns = ( target_columns + PARSE_COLUMNS + list(RENAME_COLUMNS.keys()) @@ -2279,32 +2291,57 @@ def MASTR( usecols = available_columns.intersection(target_columns) df = pd.read_csv(file.open(name), usecols=usecols).assign( Filesuffix=fueltype - ) + ).query("Nettonennleistung >= @THRESHOLD_KW") data_frames.append(df) break df = pd.concat(data_frames).reset_index(drop=True) + cols = ["NutzbareSpeicherkapazitaet", "VerknuepfteEinheit"] + with ZipFile(fn, "r") as file: + fn_storage_units = "bnetza_open_mastr_2025-02-09/bnetza_mastr_storage_units_raw.csv" + storage_units = pd.read_csv(file.open(fn_storage_units), usecols=cols) + + storage_mwh = ( + storage_units + .assign(VerknuepfteEinheit=lambda x: x.VerknuepfteEinheit.str.split(", ")) + .assign(n=lambda x: x.VerknuepfteEinheit.str.len()) + .explode("VerknuepfteEinheit") + .assign(NutzbareSpeicherkapazitaet=lambda x: x.NutzbareSpeicherkapazitaet / x.n) + .set_index("VerknuepfteEinheit")["NutzbareSpeicherkapazitaet"] + ) + + df["StorageCapacity_MWh"] = df["EinheitMastrNummer"].map(storage_mwh) / 1000 # kWh to MWh + if raw: return df status_list = config["MASTR"].get("status", ["In Betrieb"]) # noqa: F841 - capacity_threshold_kw = 1000 - df = ( + PLZ_map = PLZ_to_LatLon_map() + df.Postleitzahl = df.Postleitzahl.astype(str).str.replace(r'[^0-9]', '0', regex=True).astype(int) + df["PLZ_lat"] = df.Postleitzahl.map(PLZ_map.lat) + df["PLZ_lon"] = df.Postleitzahl.map(PLZ_map.lon) + + df_processed = ( df.rename(columns=RENAME_COLUMNS) .query("Status in @status_list") - .loc[lambda df: df.Capacity > capacity_threshold_kw] .assign( projectID=lambda df: "MASTR-" + df.projectID, + Name=lambda df: df.Name.combine_first(df.NameStromerzeugungseinheit), Country=lambda df: df.Country.map(COUNTRY_MAP), Capacity=lambda df: df.Capacity / 1e3, # kW to MW - DateIn=lambda df: pd.to_datetime(df.DateIn).dt.year, - DateOut=lambda df: pd.to_datetime(df.DateOut).dt.year, - ) - .assign( - DateIn=lambda df: df["DateIn"].combine_first( + DateIn=lambda df: pd.to_datetime(df.DateIn).dt.year.combine_first( pd.to_datetime(df["GeplantesInbetriebnahmedatum"]).dt.year ), + DateOut=lambda df: pd.to_datetime(df.DateOut).dt.year.where( + df.Status != "Vorübergehend stillgelegt", + pd.to_datetime(df["DatumBeginnVoruebergehendeStilllegung"]).dt.year.where( + df["DatumWiederaufnahmeBetrieb"].isna(), + pd.to_datetime(df.DateOut).dt.year + ), + ), + lat=lambda df: df.lat.combine_first(df.PLZ_lat), + lon=lambda df: df.lon.combine_first(df.PLZ_lon), ) .pipe( gather_specifications, @@ -2316,12 +2353,33 @@ def MASTR( df["KwkMastrNummer"].isna() & df["ThermischeNutzleistung"].isna(), "CHP" ), ) + ) + + psw = df_processed.query("Energietraeger == 'Speicher' and Technologie == 'Pumpspeicher'").index + df_processed.loc[psw, ["Fueltype", "Technology"]] = ["Hydro", "Pumped Storage"] + + bat = df_processed.query("Energietraeger == 'Speicher' and Technologie == 'Batterie'").index + df_processed.loc[bat, ["Fueltype", "Set"]] = ["Battery", "Store"] + BATTERY_MAPPING = { + "Blei-Batterie": "Lead", + "Lithium-Batterie": "Lithium", + "Sonstige Batterie": np.nan, + "Hochtemperaturbatterie": "High-Temperature", + "Nickel-Cadmium- / Nickel-Metallhydridbatterie": "Nickel" + } + df_processed.loc[bat, "Technology"] = df_processed.loc[bat, "Batterietechnologie"].map(BATTERY_MAPPING) + + mask = df_processed.query("Energietraeger in ['Hydro', 'Wind', 'Solar', 'Battery'] and Set == 'Store'").index + df_processed.loc[mask, "Set"] = "PP" + + df_final = ( + df_processed .pipe(clean_name) .pipe(set_column_name, "MASTR") .pipe(config_filter, config) ) - return df + return df_final # deprecated alias for GGPT diff --git a/powerplantmatching/package_data/PLZ_Coords_map.csv b/powerplantmatching/package_data/PLZ_Coords_map.csv index 90203fec..f6fc38cb 100644 --- a/powerplantmatching/package_data/PLZ_Coords_map.csv +++ b/powerplantmatching/package_data/PLZ_Coords_map.csv @@ -8197,3 +8197,11 @@ PLZ,lon,lat 65527,8.29686030496,50.1698531547 32760,8.89250849998,51.9103401848 65529,8.34783843133,50.256587295 +39628,11.6901777,52.6269331 +23769,11.1340848,54.4687375 +64760,8.9928567,49.540722 +78089,8.3637278,48.0748482 +99331,10.8270088,50.7108384 +98694,10.9888104,50.6365371 +19055,11.4375455,53.655925 +81248,11.4023582,48.1497765 diff --git a/powerplantmatching/package_data/config.yaml b/powerplantmatching/package_data/config.yaml index 4583d75e..39c91364 100644 --- a/powerplantmatching/package_data/config.yaml +++ b/powerplantmatching/package_data/config.yaml @@ -211,7 +211,7 @@ GHPT: MASTR: net_capacity: true reliability_score: 8 - status: ["In Betrieb", "In Planung", "Endgültig stillgelegt"] + status: ["In Betrieb", "In Planung", "Endgültig stillgelegt", "Vorübergehend stillgelegt"] fn: bnetza_open_mastr_2025-02-09.zip url: https://zenodo.org/records/14783581/files/bnetza_open_mastr_2025-02-09.zip @@ -282,7 +282,7 @@ target_fueltypes: # given by the list. An empty string results in a regex expression containing only the key. # Parsed of representatives at the top may be overwritten by representatives further below. Other: ".*" - Solid Biomass: [biological, bioenergy, agricultural, wood, biomass, feste biomasse, biomasa, biomassa] + Solid Biomass: [biological, bioenergy, agricultural, wood, holz, biomass, feste biomasse, biomasa, biomassa, feste biogene stoffe, pellets, stroh, straw] Biogas: [biogas, biogaz, biomethan, gasförmige biomasse] Nuclear: [nuclear, kernkraft, atomkraft, nucléaire, atomowa, jądrowa, kjernekraft, kern, atom, atoom] Natural Gas: @@ -304,6 +304,7 @@ target_fueltypes: gaz ziemny, gass, aardgas, + flüssiggas ] Hydro: [ @@ -327,10 +328,10 @@ target_fueltypes: ] Hard Coal: [coal, coke, steinkohle, houille, charbon dur, hulla, carbón duro, carbone duro, antracite, steinkul, węgiel kamienny, steenkool] Lignite: [brown coal, lignite, peat, braunkohle, ligni.*, brunatny, brunkul, bruinkool] - Oil: [oil, diesel, mineralölprodukte, fioul, mazout, petrol, olio, olej, carburante, olie] + Oil: [oil, diesel, biodiesel, methanol, heizöl, ethanol, mineralölprodukte, öl, fioul, mazout, petrol, olio, olej, carburante, olie] Geothermal: "" Solar: "" - Waste: ["abfall.*", "waste", "mva", "müll", "afval", "affald", "energy recovery", "incineration", "reststoffe", "refuse", "déchets", "ordures", "residuos", "basura", "rifiuti", "scarti", "odpady", "śmieci"] + Waste: ["abfall.*", "waste", "mva", "müll", "afval", "affald", "energy recovery", "incineration", "reststoffe", "refuse", "déchets", "ordures", "residuos", "basura", "rifiuti", "scarti", "odpady", "śmieci", "abfälle"] Wind: "" Battery: [Electro-chemical, battery] target_sets: @@ -368,7 +369,7 @@ target_sets: warmtekrachtkoppeling, warmte-krachtcentrale, ] - Store: [battery, storage, store] + Store: [battery, storage, store, speicher, pumped] target_technologies: # Provide a mapping of the keys to a list or a regex expression which are used for parsing. # A list will be converted to a regex expression matching all words (case-insensitive) @@ -378,14 +379,14 @@ target_technologies: OCGT: [ocgt, gasturbinen ohne abhitzekessel] Steam Turbine: [steam, turbine, kondensationsmaschine, gegendruckmaschine, dampfmotor] Combustion Engine: [combustion engine, verbrennungsmotor, stirlingmotor] - Run-Of-River: [run-off, run off, run of river, run-of-river, ror, laufwasseranlage] - Pumped Storage: [pumped hydro, pumped, speicherwasseranlage] - Reservoir: "" + Run-Of-River: [run-off, run off, run of river, run-of-river, ror, laufwasseranlage, laufwasser, abwasserkraft, trinkwassersystem, brauchwassersystem, pasada, przepływowa, fluente, elvekraft, doorstroom, älvkraft] + Reservoir: [reservoir, réservoir, impoundment, talsperre, stausee, speicherwasseranlage, speicherwasser, barrage, embalse, bacino, zbiornik, magasinverk, damkraftverk, reguleringsmagasin] + Pumped Storage: [pumped hydro, pumped, kavernen, bombeo, reversible, reversibel, oberbecken, unterbecken, pompage, pompaggio, pompowa, pumpekraftverk] Marine: "" - Onshore: "" - Offshore: "" PV: [pv, photo-voltaic, photo voltaic] CSP: "" + Onshore: "" + Offshore: "" clean_name: remove_common_words: false # remove words which appear more that 20 times in all entries remove_duplicated_words: true From b8cdfe670186e013a85f9e7046383736d10dae60 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 20 Aug 2025 09:24:59 +0000 Subject: [PATCH 17/68] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- powerplantmatching/data.py | 58 +++++++++++++++++++++++++------------- 1 file changed, 38 insertions(+), 20 deletions(-) diff --git a/powerplantmatching/data.py b/powerplantmatching/data.py index 961ad236..5b927b1a 100644 --- a/powerplantmatching/data.py +++ b/powerplantmatching/data.py @@ -35,7 +35,7 @@ gather_specifications, ) from .core import _package_data, get_config -from .heuristics import scale_to_net_capacities, PLZ_to_LatLon_map +from .heuristics import PLZ_to_LatLon_map, scale_to_net_capacities from .utils import ( config_filter, convert_to_short_name, @@ -2265,7 +2265,7 @@ def MASTR( "Hydro": "hydro_raw.csv", "Wind": "wind_raw.csv", "Solar": "solar_raw.csv", - "Storage": "bnetza_mastr_storage_raw.csv" + "Storage": "bnetza_mastr_storage_raw.csv", } data_frames = [] with ZipFile(fn, "r") as file: @@ -2289,28 +2289,35 @@ def MASTR( target_columns + PARSE_COLUMNS + list(RENAME_COLUMNS.keys()) ) usecols = available_columns.intersection(target_columns) - df = pd.read_csv(file.open(name), usecols=usecols).assign( - Filesuffix=fueltype - ).query("Nettonennleistung >= @THRESHOLD_KW") + df = ( + pd.read_csv(file.open(name), usecols=usecols) + .assign(Filesuffix=fueltype) + .query("Nettonennleistung >= @THRESHOLD_KW") + ) data_frames.append(df) break df = pd.concat(data_frames).reset_index(drop=True) cols = ["NutzbareSpeicherkapazitaet", "VerknuepfteEinheit"] with ZipFile(fn, "r") as file: - fn_storage_units = "bnetza_open_mastr_2025-02-09/bnetza_mastr_storage_units_raw.csv" + fn_storage_units = ( + "bnetza_open_mastr_2025-02-09/bnetza_mastr_storage_units_raw.csv" + ) storage_units = pd.read_csv(file.open(fn_storage_units), usecols=cols) storage_mwh = ( - storage_units - .assign(VerknuepfteEinheit=lambda x: x.VerknuepfteEinheit.str.split(", ")) + storage_units.assign( + VerknuepfteEinheit=lambda x: x.VerknuepfteEinheit.str.split(", ") + ) .assign(n=lambda x: x.VerknuepfteEinheit.str.len()) .explode("VerknuepfteEinheit") .assign(NutzbareSpeicherkapazitaet=lambda x: x.NutzbareSpeicherkapazitaet / x.n) .set_index("VerknuepfteEinheit")["NutzbareSpeicherkapazitaet"] ) - df["StorageCapacity_MWh"] = df["EinheitMastrNummer"].map(storage_mwh) / 1000 # kWh to MWh + df["StorageCapacity_MWh"] = ( + df["EinheitMastrNummer"].map(storage_mwh) / 1000 + ) # kWh to MWh if raw: return df @@ -2318,7 +2325,9 @@ def MASTR( status_list = config["MASTR"].get("status", ["In Betrieb"]) # noqa: F841 PLZ_map = PLZ_to_LatLon_map() - df.Postleitzahl = df.Postleitzahl.astype(str).str.replace(r'[^0-9]', '0', regex=True).astype(int) + df.Postleitzahl = ( + df.Postleitzahl.astype(str).str.replace(r"[^0-9]", "0", regex=True).astype(int) + ) df["PLZ_lat"] = df.Postleitzahl.map(PLZ_map.lat) df["PLZ_lon"] = df.Postleitzahl.map(PLZ_map.lon) @@ -2335,9 +2344,11 @@ def MASTR( ), DateOut=lambda df: pd.to_datetime(df.DateOut).dt.year.where( df.Status != "Vorübergehend stillgelegt", - pd.to_datetime(df["DatumBeginnVoruebergehendeStilllegung"]).dt.year.where( + pd.to_datetime( + df["DatumBeginnVoruebergehendeStilllegung"] + ).dt.year.where( df["DatumWiederaufnahmeBetrieb"].isna(), - pd.to_datetime(df.DateOut).dt.year + pd.to_datetime(df.DateOut).dt.year, ), ), lat=lambda df: df.lat.combine_first(df.PLZ_lat), @@ -2355,26 +2366,33 @@ def MASTR( ) ) - psw = df_processed.query("Energietraeger == 'Speicher' and Technologie == 'Pumpspeicher'").index + psw = df_processed.query( + "Energietraeger == 'Speicher' and Technologie == 'Pumpspeicher'" + ).index df_processed.loc[psw, ["Fueltype", "Technology"]] = ["Hydro", "Pumped Storage"] - bat = df_processed.query("Energietraeger == 'Speicher' and Technologie == 'Batterie'").index - df_processed.loc[bat, ["Fueltype", "Set"]] = ["Battery", "Store"] + bat = df_processed.query( + "Energietraeger == 'Speicher' and Technologie == 'Batterie'" + ).index + df_processed.loc[bat, ["Fueltype", "Set"]] = ["Battery", "Store"] BATTERY_MAPPING = { "Blei-Batterie": "Lead", "Lithium-Batterie": "Lithium", "Sonstige Batterie": np.nan, "Hochtemperaturbatterie": "High-Temperature", - "Nickel-Cadmium- / Nickel-Metallhydridbatterie": "Nickel" + "Nickel-Cadmium- / Nickel-Metallhydridbatterie": "Nickel", } - df_processed.loc[bat, "Technology"] = df_processed.loc[bat, "Batterietechnologie"].map(BATTERY_MAPPING) + df_processed.loc[bat, "Technology"] = df_processed.loc[ + bat, "Batterietechnologie" + ].map(BATTERY_MAPPING) - mask = df_processed.query("Energietraeger in ['Hydro', 'Wind', 'Solar', 'Battery'] and Set == 'Store'").index + mask = df_processed.query( + "Energietraeger in ['Hydro', 'Wind', 'Solar', 'Battery'] and Set == 'Store'" + ).index df_processed.loc[mask, "Set"] = "PP" df_final = ( - df_processed - .pipe(clean_name) + df_processed.pipe(clean_name) .pipe(set_column_name, "MASTR") .pipe(config_filter, config) ) From ff9da6faa285e6f5735ef8bb5251997fe909e5cc Mon Sep 17 00:00:00 2001 From: Fabian Neumann Date: Wed, 20 Aug 2025 11:32:08 +0200 Subject: [PATCH 18/68] suggestions for deprecations --- powerplantmatching/data.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/powerplantmatching/data.py b/powerplantmatching/data.py index 5b927b1a..9f66c54e 100644 --- a/powerplantmatching/data.py +++ b/powerplantmatching/data.py @@ -250,6 +250,10 @@ def OPSD( ) +@deprecated( + deprecated_in="0.8.0", + details="Deprecated since data is not maintained. Use GEM instead.", +) def GEO(raw=False, update=False, config=None): """ Importer for the GEO database. @@ -621,6 +625,10 @@ def GPD(raw=False, update=False, config=None, filter_other_dbs=True): ) +@deprecated( + deprecated_in="0.8.0", + details="Removed since data is not maintained. Use GNPT instead.", +) def WIKIPEDIA(raw=False, update=False, config=None): """ Importer for the WIKIPEDIA nuclear power plant database. @@ -1150,7 +1158,7 @@ def WEPP(raw=False, config=None): @deprecated( deprecated_in="0.5.0", - details="This function is not maintained anymore.", + details="This function is not maintained anymore. Use MASTR instead.", ) def UBA( raw=False, @@ -1273,7 +1281,7 @@ def UBA( @deprecated( deprecated_in="0.5.0", - details="This function is not maintained anymore.", + details="This function is not maintained anymore. Use MASTR instead.", ) def BNETZA( raw=False, @@ -1434,6 +1442,10 @@ def BNETZA( ) +@deprecated( + deprecated_in="0.8.0", + details="Removed since data is not maintained. Use GSPT, GWPT and GHPT instead.", +) def OPSD_VRE(raw=False, update=False, config=None): """ Importer for the OPSD (Open Power Systems Data) renewables (VRE) @@ -1482,7 +1494,10 @@ def OPSD_VRE(raw=False, update=False, config=None): .pipe(config_filter, config) ) - +@deprecated( + deprecated_in="0.8.0", + details="Removed since data is not maintained. Use GSPT, GWPT and GHPT instead.", +) def OPSD_VRE_country(country, raw=False, update=False, config=None): """ Get country specific data from OPSD for renewables, if available. From 88f38852108ae56763c1bf2126522b279b909c13 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 20 Aug 2025 09:32:19 +0000 Subject: [PATCH 19/68] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- powerplantmatching/data.py | 1 + 1 file changed, 1 insertion(+) diff --git a/powerplantmatching/data.py b/powerplantmatching/data.py index 9f66c54e..ee40bce8 100644 --- a/powerplantmatching/data.py +++ b/powerplantmatching/data.py @@ -1494,6 +1494,7 @@ def OPSD_VRE(raw=False, update=False, config=None): .pipe(config_filter, config) ) + @deprecated( deprecated_in="0.8.0", details="Removed since data is not maintained. Use GSPT, GWPT and GHPT instead.", From 89ae0c5acd0bef58158416d4519c01cc0fe1452b Mon Sep 17 00:00:00 2001 From: Fabian Neumann Date: Wed, 20 Aug 2025 12:06:01 +0200 Subject: [PATCH 20/68] find a reasonable DateOut for mothballed GCPT and GGPT --- powerplantmatching/data.py | 15 +++++++++++++-- powerplantmatching/package_data/config.yaml | 2 +- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/powerplantmatching/data.py b/powerplantmatching/data.py index ee40bce8..a0f5b7d2 100644 --- a/powerplantmatching/data.py +++ b/powerplantmatching/data.py @@ -1821,6 +1821,10 @@ def GCPT(raw=False, update=False, config=None): planned_retirement = df["Planned retirement"].apply(pd.to_numeric, errors="coerce") + # conservative assumption that mothballed plants (without fixed retirement + # date) went out of operation in 2024 + mothballed_retirement = df["Status"].apply(lambda x: 2024 if x == "mothballed" else np.nan) + status_list = config["GCPT"].get("status", ["operating"]) # noqa: F841 BTU_PER_KWH = 3412.14 @@ -1835,7 +1839,8 @@ def GCPT(raw=False, update=False, config=None): DateIn=df["DateIn"].apply(pd.to_numeric, errors="coerce"), DateOut=df["DateOut"] .apply(pd.to_numeric, errors="coerce") - .combine_first(planned_retirement), + .combine_first(planned_retirement) + .combine_first(mothballed_retirement), lat=df["lat"].apply(pd.to_numeric, errors="coerce"), lon=df["lon"].apply(pd.to_numeric, errors="coerce"), Set=df["CHP"].replace({"yes": "CHP", "no": "PP"}), @@ -2110,6 +2115,11 @@ def classify_fuel(s): status_list = config["GGPT"].get("status", ["operating"]) # noqa: F841 df = df.rename(columns=RENAME_COLUMNS) + + # conservative assumption that mothballed plants (without fixed retirement + # date) went out of operation in 2024 + mothballed_retirement = df["Status"].apply(lambda x: 2024 if x == "mothballed" else np.nan) + df_final = ( df.pipe(clean_name) .pipe(set_column_name, "GGPT") @@ -2119,7 +2129,8 @@ def classify_fuel(s): DateIn=df["DateIn"].apply(pd.to_numeric, errors="coerce"), DateOut=df["DateOut"] .apply(pd.to_numeric, errors="coerce") - .combine_first(df["Planned retire"]), + .combine_first(df["Planned retire"]) + .combine_first(mothballed_retirement), lat=df["lat"].apply(pd.to_numeric, errors="coerce"), lon=df["lon"].apply(pd.to_numeric, errors="coerce"), Capacity=df["Capacity"].apply(pd.to_numeric, errors="coerce"), diff --git a/powerplantmatching/package_data/config.yaml b/powerplantmatching/package_data/config.yaml index 39c91364..b566ca0f 100644 --- a/powerplantmatching/package_data/config.yaml +++ b/powerplantmatching/package_data/config.yaml @@ -167,7 +167,7 @@ GEM: GCPT: net_capacity: false reliability_score: 4 - status: ["operating", "retired", "construction"] + status: ["operating", "retired", "construction", "mothballed"] fn: Global-Coal-Plant-Tracker-July-2025.xlsx url: https://tubcloud.tu-berlin.de/s/etMB7qawKNwfgnk/download/Global-Coal-Plant-Tracker-July-2025.xlsx GGTPT: From 7fea3486a9a020425d348b700423592693f245a2 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 20 Aug 2025 10:06:13 +0000 Subject: [PATCH 21/68] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- powerplantmatching/data.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/powerplantmatching/data.py b/powerplantmatching/data.py index a0f5b7d2..320cca1b 100644 --- a/powerplantmatching/data.py +++ b/powerplantmatching/data.py @@ -1823,7 +1823,9 @@ def GCPT(raw=False, update=False, config=None): # conservative assumption that mothballed plants (without fixed retirement # date) went out of operation in 2024 - mothballed_retirement = df["Status"].apply(lambda x: 2024 if x == "mothballed" else np.nan) + mothballed_retirement = df["Status"].apply( + lambda x: 2024 if x == "mothballed" else np.nan + ) status_list = config["GCPT"].get("status", ["operating"]) # noqa: F841 @@ -2118,7 +2120,9 @@ def classify_fuel(s): # conservative assumption that mothballed plants (without fixed retirement # date) went out of operation in 2024 - mothballed_retirement = df["Status"].apply(lambda x: 2024 if x == "mothballed" else np.nan) + mothballed_retirement = df["Status"].apply( + lambda x: 2024 if x == "mothballed" else np.nan + ) df_final = ( df.pipe(clean_name) From 7dd471ef2fa8e777a2372224eca2728df70ff55e Mon Sep 17 00:00:00 2001 From: Fabian Neumann Date: Wed, 20 Aug 2025 13:30:32 +0200 Subject: [PATCH 22/68] add European Energy Storage Inventory EESI as source# --- powerplantmatching/data.py | 111 ++++++++++++++++++++ powerplantmatching/package_data/config.yaml | 15 ++- 2 files changed, 124 insertions(+), 2 deletions(-) diff --git a/powerplantmatching/data.py b/powerplantmatching/data.py index 320cca1b..3f2cc6e0 100644 --- a/powerplantmatching/data.py +++ b/powerplantmatching/data.py @@ -21,6 +21,7 @@ import os from zipfile import ZipFile +import json import entsoe import numpy as np import pandas as pd @@ -2440,6 +2441,116 @@ def GEM_GGPT(*args, **kwargs): return GGPT(*args, **kwargs) +def EESI( + raw=False, + update=False, + config=None, +): + """ + Get the European Energy Storage Inventory (EESI) dataset. + + Provided by the European Commission's Joint Research Centre. Contains + chemical, electrochemical, thermal and mechanical energy storage + technologies in Europe. + + https://ses.jrc.ec.europa.eu/storage-inventory-maps + + https://ses.jrc.ec.europa.eu/storage-inventory-tool/api/projects + + Parameters + ---------- + raw : Boolean, default False + Whether to return the original dataset + update: bool, default False + Whether to update the data from the url. + config : dict, default None + Add custom specific configuration, e.g. + powerplantmatching.config.get_config(target_countries='Italy'), defaults + to powerplantmatching.config.get_config() + """ + + config = get_config() if config is None else config + + fn = get_raw_file("EESI", update=update, config=config) + + with open(fn) as f: + data = json.load(f) + + df = pd.json_normalize(data["projects"], sep="_") + float_cols = ["power", "capacity", "facility_latitude", "facility_longitude"] + df[float_cols] = df[float_cols].astype(float) + + if raw: + return df + + status_list = config["EESI"].get("status", ["Operational"]) # noqa: F841 + + RENAME_COLUMNS = { + "title": "Name", + "power": "Capacity", + "capacity": "StorageCapacity_MWh", + "facility_latitude": "lat", + "facility_longitude": "lon", + "facility_country": "Country", + "id": "projectID", + "technology_name": "Technology", + "status": "Status", + } + + df_processed = ( + df.rename(columns=RENAME_COLUMNS) + .query("Status in @status_list") + .assign( + projectID=lambda df: "EESI-" + df.projectID.astype(str), + StorageCapacity_MWh=lambda df: df.StorageCapacity_MWh.where( + df.StorageCapacity_MWh > 0 + ), + Capacity=lambda df: df.Capacity.where(df.Capacity > 0), + Set="Store", + ) + ) + + sel = df_processed.query("technology_parentName == 'ElectroChemical'").index + df_processed.loc[sel, "Fueltype"] = "Battery" + + sel = df_processed.query("technology_parentName == 'Thermal'").index + df_processed.loc[sel, "Fueltype"] = "Heat Storage" + + sel = df_processed.query("technology_parentName == 'Mechanical'").index + df_processed.loc[sel, "Fueltype"] = "Mechanical Storage" + + sel = df_processed.query("Technology == 'Power to Gas (H2)'").index + df_processed.loc[sel, "Fueltype"] = "Hydrogen Storage" + + sel = df_processed.query("Technology == 'Pumped Hydro Storage (PHS)'").index + df_processed.loc[sel, "Fueltype"] = "Hydro" + + TECHNOLOGY_MAPPING = { + "Power to Gas (H2)": np.nan, + "Lithium-ion batteries": "Li", + "Lead Acid batteries": "Pb", + "Sodium Sulphur batteries": "NaS", + "Redox flow batteries Vanadium": "V", + "Sodium Nickel Chloride batteries": "NaNiCl", + "Lithium-titanate battery (LTO)": "Li", + "Pumped Hydro Storage (PHS)": "Pumped Storage", + "Unespecified Storage - mechanical": np.nan, + "Compressed Air Energy Storage (CAES)": "CAES", + "Flywheel Energy Storage": "Flywheel", + "Unspecific Thermal Storage": np.nan, + "Molten salts (Sensible Thermal Energy Storage (STES))": "Molten Salt", + } + df_processed.Technology = df_processed.Technology.map(TECHNOLOGY_MAPPING) + + df_final = ( + df_processed.pipe(clean_name) + .pipe(set_column_name, "EESI") + .pipe(config_filter, config) + ) + + return df_final + + def EXTERNAL_DATABASE(raw=False, update=True, config=None): """ Importer for external custom databases. diff --git a/powerplantmatching/package_data/config.yaml b/powerplantmatching/package_data/config.yaml index b566ca0f..6f9873a8 100644 --- a/powerplantmatching/package_data/config.yaml +++ b/powerplantmatching/package_data/config.yaml @@ -25,6 +25,7 @@ matching_sources: - WIKIPEDIA: Fueltype != 'Solar' - GEM - MASTR + - EESI # fully_included_sources, these sources are included even without match to the final dataset fully_included_sources: @@ -35,7 +36,7 @@ fully_included_sources: - JRC: Country not in ['Switzerland', 'Albania', 'United Kingdom', 'Norway'] - OPSD: Country not in ['Switzerland', 'Italy', 'Spain', 'Norway', 'Austria'] - BEYONDCOAL - - GEM: Country != 'Germany' or Fueltype == 'Solar' + - EESI: Fueltype != 'Hydro' and not (Country == 'Germany' and Fueltype == 'Battery') - MASTR @@ -214,6 +215,12 @@ MASTR: status: ["In Betrieb", "In Planung", "Endgültig stillgelegt", "Vorübergehend stillgelegt"] fn: bnetza_open_mastr_2025-02-09.zip url: https://zenodo.org/records/14783581/files/bnetza_open_mastr_2025-02-09.zip +EESI: + net_capacity: true + reliability_score: 5 + status: ["Operational"] # since no start years given + fn: european-energy-storage-inventory-20250817-2245.json + url: https://tubcloud.tu-berlin.de/s/RXWgYbYJpePsWAZ/download/european-energy-storage-inventory-20250817-2245.json # ---------------------------------------------------------------------------- # # Data Structure Config # @@ -334,6 +341,9 @@ target_fueltypes: Waste: ["abfall.*", "waste", "mva", "müll", "afval", "affald", "energy recovery", "incineration", "reststoffe", "refuse", "déchets", "ordures", "residuos", "basura", "rifiuti", "scarti", "odpady", "śmieci", "abfälle"] Wind: "" Battery: [Electro-chemical, battery] + Mechanical Storage: "" + Heat Storage: "" + Hydrogen Storage: "" target_sets: # Provide a mapping of the keys to a list or a regex expression which are used for parsing. # A list will be converted to a regex expression matching all words (case-insensitive) @@ -489,5 +499,6 @@ fuel_to_color: Geothermal: darkgoldenrod Battery: purple Hydrogen Storage: teal - Electro-mechanical: teal + Mechanical Storage: darkslategray + Heat Storage: darkorange Total: gold From 93ee31ee166bca9e04ed14a5c1cdc9a20df689c4 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 20 Aug 2025 11:30:48 +0000 Subject: [PATCH 23/68] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- powerplantmatching/data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/powerplantmatching/data.py b/powerplantmatching/data.py index 3f2cc6e0..190ccae1 100644 --- a/powerplantmatching/data.py +++ b/powerplantmatching/data.py @@ -17,11 +17,11 @@ Collection of power plant data bases and statistical data """ +import json import logging import os from zipfile import ZipFile -import json import entsoe import numpy as np import pandas as pd From 6371bb32109ee67f8f472518f183710773e1c13f Mon Sep 17 00:00:00 2001 From: Fabian Neumann Date: Wed, 20 Aug 2025 14:23:43 +0200 Subject: [PATCH 24/68] unify battery naming across EESI and MASTR --- powerplantmatching/data.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/powerplantmatching/data.py b/powerplantmatching/data.py index 190ccae1..9ab01c5a 100644 --- a/powerplantmatching/data.py +++ b/powerplantmatching/data.py @@ -2408,11 +2408,11 @@ def MASTR( ).index df_processed.loc[bat, ["Fueltype", "Set"]] = ["Battery", "Store"] BATTERY_MAPPING = { - "Blei-Batterie": "Lead", - "Lithium-Batterie": "Lithium", + "Blei-Batterie": "Pb", + "Lithium-Batterie": "Li", "Sonstige Batterie": np.nan, - "Hochtemperaturbatterie": "High-Temperature", - "Nickel-Cadmium- / Nickel-Metallhydridbatterie": "Nickel", + "Hochtemperaturbatterie": "NaS", + "Nickel-Cadmium- / Nickel-Metallhydridbatterie": "NiCd", } df_processed.loc[bat, "Technology"] = df_processed.loc[ bat, "Batterietechnologie" From d7c303ff1cbe041b5a624c1983c9501a2d785c24 Mon Sep 17 00:00:00 2001 From: Fabian Neumann Date: Wed, 20 Aug 2025 15:30:13 +0200 Subject: [PATCH 25/68] add GeoNuclearData (GND) as source --- powerplantmatching/data.py | 60 +++++++++++++++++++++ powerplantmatching/package_data/config.yaml | 11 +++- 2 files changed, 69 insertions(+), 2 deletions(-) diff --git a/powerplantmatching/data.py b/powerplantmatching/data.py index 9ab01c5a..e03dda94 100644 --- a/powerplantmatching/data.py +++ b/powerplantmatching/data.py @@ -2551,6 +2551,66 @@ def EESI( return df_final +def GND( + raw=False, + update=False, + config=None, +): + """ + Get the GeoNuclearData (GND) dataset. + + https://github.com/cristianst85/GeoNuclearData + + Parameters + ---------- + raw : Boolean, default False + Whether to return the original dataset + update: bool, default False + Whether to update the data from the url. + config : dict, default None + Add custom specific configuration, e.g. + powerplantmatching.config.get_config(target_countries='Italy'), defaults + to powerplantmatching.config.get_config() + """ + + config = get_config() if config is None else config + + fn = get_raw_file("GND", update=update, config=config) + + df = pd.read_csv(fn) + + if raw: + return df + + status_list = config["GND"].get("status", ["Operational"]) # noqa: F841 + + RENAME_COLUMNS = { + "Id": "projectID", + "Latitude": "lat", + "Longitude": "lon", + "OperationalFrom": "DateIn", + "OperationalTo": "DateOut", + } + + df_final = ( + df.rename(columns=RENAME_COLUMNS) + .query("Status in @status_list") + .assign( + projectID=lambda df: "GND-" + df.projectID.astype(str), + Capacity=lambda df: df.Capacity.where(df.Capacity > 0), + DateIn=lambda df: pd.to_datetime(df.DateIn).dt.year, + DateOut=lambda df: pd.to_datetime(df.DateOut).dt.year, + Set="PP", + Fueltype="Nuclear", + ) + .pipe(clean_name) + .pipe(set_column_name, "GND") + .pipe(config_filter, config) + ) + + return df_final + + def EXTERNAL_DATABASE(raw=False, update=True, config=None): """ Importer for external custom databases. diff --git a/powerplantmatching/package_data/config.yaml b/powerplantmatching/package_data/config.yaml index 6f9873a8..64430291 100644 --- a/powerplantmatching/package_data/config.yaml +++ b/powerplantmatching/package_data/config.yaml @@ -26,6 +26,7 @@ matching_sources: - GEM - MASTR - EESI + - GND # fully_included_sources, these sources are included even without match to the final dataset fully_included_sources: @@ -74,8 +75,8 @@ CARMA: fn: Full_CARMA_2009_Dataset_1.csv ENTSOE: reliability_score: 5 - url: https://raw.githubusercontent.com/pypsa-meets-earth/ppm-data-backup/main/entsoe_powerplants.csv - fn: entsoe_powerplants.csv + url: https://tubcloud.tu-berlin.de/s/QaHLH38J4A7ZF5m/download/entsoe_transparency_platform_20250820.csv + fn: entsoe_transparency_platform_20250820.csv ENTSOE-EIC: url: https://eepublicdownloads.blob.core.windows.net/cio-lio/csv/W_eicCodes.csv fn: W_eicCodes.csv @@ -221,6 +222,12 @@ EESI: status: ["Operational"] # since no start years given fn: european-energy-storage-inventory-20250817-2245.json url: https://tubcloud.tu-berlin.de/s/RXWgYbYJpePsWAZ/download/european-energy-storage-inventory-20250817-2245.json +GND: + net_capacity: true + reliability_score: 4 + status: ["Shutdown", "Operational", "Planned", "Under Construction", "Decommissioning Completed"] + url: https://raw.githubusercontent.com/cristianst85/GeoNuclearData/1bc8b4ac106af236902385b87e46c540b4864815/data/csv/denormalized/nuclear_power_plants.csv + fn: nuclear_power_plants.csv # ---------------------------------------------------------------------------- # # Data Structure Config # From 08a4dc8a0ab4e19709b980553c5c2a4b042747f2 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 20 Aug 2025 13:30:43 +0000 Subject: [PATCH 26/68] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- powerplantmatching/data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/powerplantmatching/data.py b/powerplantmatching/data.py index e03dda94..33644be6 100644 --- a/powerplantmatching/data.py +++ b/powerplantmatching/data.py @@ -2581,7 +2581,7 @@ def GND( if raw: return df - + status_list = config["GND"].get("status", ["Operational"]) # noqa: F841 RENAME_COLUMNS = { From b7ebfc5eab83b94e81e79580a676ce4e8a22a7ac Mon Sep 17 00:00:00 2001 From: Fabian Neumann Date: Wed, 20 Aug 2025 16:47:27 +0200 Subject: [PATCH 27/68] properly distinguish onshore/offshore wind MASTR --- powerplantmatching/data.py | 15 ++++++++++++++- powerplantmatching/package_data/config.yaml | 4 ++-- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/powerplantmatching/data.py b/powerplantmatching/data.py index 33644be6..f5fdfabb 100644 --- a/powerplantmatching/data.py +++ b/powerplantmatching/data.py @@ -2286,6 +2286,7 @@ def MASTR( "Energietraeger", "Hauptbrennstoff", "NameStromerzeugungseinheit", + "NameWindpark", "Technologie", ] @@ -2316,6 +2317,7 @@ def MASTR( "Ort", "Gemeinde", "Landkreis", + "Lage", ] target_columns = ( target_columns + PARSE_COLUMNS + list(RENAME_COLUMNS.keys()) @@ -2368,7 +2370,9 @@ def MASTR( .query("Status in @status_list") .assign( projectID=lambda df: "MASTR-" + df.projectID, - Name=lambda df: df.Name.combine_first(df.NameStromerzeugungseinheit), + Name=lambda df: df.Name.combine_first(df.NameWindpark).combine_first( + df.NameStromerzeugungseinheit + ), Country=lambda df: df.Country.map(COUNTRY_MAP), Capacity=lambda df: df.Capacity / 1e3, # kW to MW DateIn=lambda df: pd.to_datetime(df.DateIn).dt.year.combine_first( @@ -2418,6 +2422,15 @@ def MASTR( bat, "Batterietechnologie" ].map(BATTERY_MAPPING) + WIND_MAPPING = { + "Windkraft auf See": "Offshore", + "Windkraft an Land": "Onshore", + } + wind = df_processed.query("Energietraeger == 'Wind'").index + df_processed.loc[wind, "Technology"] = df_processed.loc[wind, "Lage"].map( + WIND_MAPPING + ) + mask = df_processed.query( "Energietraeger in ['Hydro', 'Wind', 'Solar', 'Battery'] and Set == 'Store'" ).index diff --git a/powerplantmatching/package_data/config.yaml b/powerplantmatching/package_data/config.yaml index 64430291..20c17dc2 100644 --- a/powerplantmatching/package_data/config.yaml +++ b/powerplantmatching/package_data/config.yaml @@ -402,8 +402,8 @@ target_technologies: Marine: "" PV: [pv, photo-voltaic, photo voltaic] CSP: "" - Onshore: "" - Offshore: "" + Onshore: ["onshore", "an land", "terrestre", "landvind", "på land", "op land", "lądowy", "su terra", "en tierra", "à terre"] + Offshore: ["offshore", "nearshore", "auf see", "en mer", "marino", "en mar", "in mare", "morski", "havvind", "til havs", "på havet", "op zee", "zeewind"] clean_name: remove_common_words: false # remove words which appear more that 20 times in all entries remove_duplicated_words: true From 3953da92193837b135bd92730d3fb49c746cbfb8 Mon Sep 17 00:00:00 2001 From: Fabian Neumann Date: Wed, 20 Aug 2025 16:48:03 +0200 Subject: [PATCH 28/68] revert deprecation warnings --- powerplantmatching/data.py | 76 +++++++++++++++++++------------------- 1 file changed, 39 insertions(+), 37 deletions(-) diff --git a/powerplantmatching/data.py b/powerplantmatching/data.py index f5fdfabb..283a45f3 100644 --- a/powerplantmatching/data.py +++ b/powerplantmatching/data.py @@ -111,28 +111,30 @@ def get_dominant_type(group): # for retired plants unit_capacity = df_units.groupby("BFF plant ID").Capacity.sum() - df = ( - df.rename(columns=RENAME_COLUMNS) - .query("status in @status_list") - .assign( - DateOut=lambda df: df.rename(columns=RENAME_COLUMNS) - .DateOut.replace({"After 2030": np.nan, "By 2030": 2030}) - .infer_objects(copy=False) - .combine_first(unit_phaseout), - projectID=lambda df: "BEYOND-" + df.projectID, - Fueltype=lambda df: df.Fueltype.str.title(), - Set=unit_set, - Technology=np.nan, - Capacity=lambda df: df.Capacity.add( - df["Coal capacity under construction"], fill_value=0 - ).combine_first(unit_capacity), + with pd.option_context("future.no_silent_downcasting", True): + df = ( + df.rename(columns=RENAME_COLUMNS) + .query("status in @status_list") + .assign( + DateOut=lambda df: df.rename(columns=RENAME_COLUMNS) + .DateOut.replace({"After 2030": np.nan, "By 2030": 2030}) + .astype(float) + .combine_first(unit_phaseout), + projectID=lambda df: "BEYOND-" + df.projectID, + Fueltype=lambda df: df.Fueltype.str.title(), + Set=unit_set, + Technology=np.nan, + Capacity=lambda df: df.Capacity.add( + df["Coal capacity under construction"], fill_value=0 + ).combine_first(unit_capacity), + ) + .pipe(scale_to_net_capacities) + .pipe(clean_name) + .pipe(convert_to_short_name) + .pipe(set_column_name, "BEYONDCOAL") + .pipe(config_filter, config) ) - .pipe(scale_to_net_capacities) - .pipe(clean_name) - .pipe(convert_to_short_name) - .pipe(set_column_name, "BEYONDCOAL") - .pipe(config_filter, config) - ) + return df @@ -251,10 +253,10 @@ def OPSD( ) -@deprecated( - deprecated_in="0.8.0", - details="Deprecated since data is not maintained. Use GEM instead.", -) +# @deprecated( +# deprecated_in="0.8.0", +# details="Deprecated since data is not maintained. Use GEM instead.", +# ) def GEO(raw=False, update=False, config=None): """ Importer for the GEO database. @@ -626,10 +628,10 @@ def GPD(raw=False, update=False, config=None, filter_other_dbs=True): ) -@deprecated( - deprecated_in="0.8.0", - details="Removed since data is not maintained. Use GNPT instead.", -) +# @deprecated( +# deprecated_in="0.8.0", +# details="Removed since data is not maintained. Use GNPT instead.", +# ) def WIKIPEDIA(raw=False, update=False, config=None): """ Importer for the WIKIPEDIA nuclear power plant database. @@ -1443,10 +1445,10 @@ def BNETZA( ) -@deprecated( - deprecated_in="0.8.0", - details="Removed since data is not maintained. Use GSPT, GWPT and GHPT instead.", -) +# @deprecated( +# deprecated_in="0.8.0", +# details="Removed since data is not maintained. Use GSPT, GWPT and GHPT instead.", +# ) def OPSD_VRE(raw=False, update=False, config=None): """ Importer for the OPSD (Open Power Systems Data) renewables (VRE) @@ -1496,10 +1498,10 @@ def OPSD_VRE(raw=False, update=False, config=None): ) -@deprecated( - deprecated_in="0.8.0", - details="Removed since data is not maintained. Use GSPT, GWPT and GHPT instead.", -) +# @deprecated( +# deprecated_in="0.8.0", +# details="Removed since data is not maintained. Use GSPT, GWPT and GHPT instead.", +# ) def OPSD_VRE_country(country, raw=False, update=False, config=None): """ Get country specific data from OPSD for renewables, if available. From 1f10f3789170b6d105b38ca18449f34e65369499 Mon Sep 17 00:00:00 2001 From: Fabian Neumann Date: Wed, 20 Aug 2025 18:38:03 +0200 Subject: [PATCH 29/68] new proposed matching settings --- powerplantmatching/package_data/config.yaml | 57 ++++++++++----------- 1 file changed, 27 insertions(+), 30 deletions(-) diff --git a/powerplantmatching/package_data/config.yaml b/powerplantmatching/package_data/config.yaml index 20c17dc2..16154868 100644 --- a/powerplantmatching/package_data/config.yaml +++ b/powerplantmatching/package_data/config.yaml @@ -20,25 +20,23 @@ matching_sources: - GEO: Fueltype != 'Solar' - GPD: Fueltype != 'Solar' - JRC: Fueltype != 'Solar' - - OPSD: Country != "Spain" and Fueltype != 'Hard Coal' and Fueltype != 'Solar' + - OPSD: Country != "Spain" and Fueltype not in ['Hard Coal', 'Solar'] - BEYONDCOAL: Fueltype != 'Solar' - - WIKIPEDIA: Fueltype != 'Solar' - GEM - MASTR - - EESI - - GND + - EESI: Fueltype != 'Solar' + - GND: Fueltype != 'Solar' # fully_included_sources, these sources are included even without match to the final dataset fully_included_sources: # Make individual queries for the datasets - ENTSOE: (Country not in ['Switzerland', 'Ireland', 'Albania', 'Greece', 'Czech Republic', 'Bulgaria', 'United Kingdom', 'Italy', 'Serbia'] and not (Country == 'Spain' and Fueltype == 'Hydro')) or (Fueltype == 'Geothermal') - - GEO: (Country == 'Spain' and Fueltype == 'Natural Gas') - - GPD: Country in ['Finland', 'Spain'] - JRC: Country not in ['Switzerland', 'Albania', 'United Kingdom', 'Norway'] - OPSD: Country not in ['Switzerland', 'Italy', 'Spain', 'Norway', 'Austria'] - - BEYONDCOAL + - GEM: not (Country == 'Germany' and Fueltype in ['Solar', 'Wind']) - EESI: Fueltype != 'Hydro' and not (Country == 'Germany' and Fueltype == 'Battery') - MASTR + - GND parallel_duke_processes: false @@ -52,13 +50,13 @@ matched_data_url: https://raw.githubusercontent.com/PyPSA/powerplantmatching/{ta opsd_vres_base_year: 2020 BNETZA: - reliability_score: 2 + reliability_score: 1 fn: Kraftwerksliste_2017_2.xlsx url: https://www.bundesnetzagentur.de/SharedDocs/Downloads/DE/Sachgebiete/Energie/Unternehmen_Institutionen/Versorgungssicherheit/Erzeugungskapazitaeten/Kraftwerksliste/Kraftwerksliste_2019_1.xlsx;jsessionid=17E419F28D025C7DD9FC6E2BEB3D088F?__blob=publicationFile&v=2 BEYONDCOAL: net_capacity: false aggregated_units: true - reliability_score: 6 + reliability_score: 7 status: ["Construction", "Open", "Planned", "Retired"] fn: 2025-07-24-BeyondFossilFuels-Europe_Coal_Plants_Database.xlsx url: https://beyondfossilfuels.org/wp-content/uploads/2025/07/2025-07-24-BeyondFossilFuels-Europe_Coal_Plants_Database.xlsx @@ -81,17 +79,17 @@ ENTSOE-EIC: url: https://eepublicdownloads.blob.core.windows.net/cio-lio/csv/W_eicCodes.csv fn: W_eicCodes.csv JRC: - reliability_score: 4 + reliability_score: 5 fn: jrc-hydro-power-plant-database.csv url: https://raw.githubusercontent.com/energy-modelling-toolkit/hydro-power-database/27e80f/data/jrc-hydro-power-plant-database.csv GEO: net_capacity: false - reliability_score: 3 + reliability_score: 2 url: https://raw.githubusercontent.com/pypsa-meets-earth/ppm-data-backup/main/global_energy_observatory_power_plants.csv fn: global_energy_observatory_power_plants.csv GEO_units: net_capacity: false - reliability_score: 3 + reliability_score: 2 url: https://raw.githubusercontent.com/pypsa-meets-earth/ppm-data-backup/main/global_energy_observatory_ppl_units.csv fn: global_energy_observatory_ppl_units.csv GPD: @@ -100,19 +98,19 @@ GPD: #if outdated, look at http://datasets.wri.org/dataset/globalpowerplantdatabase url: https://wri-dataportal-prod.s3.amazonaws.com/manual/global_power_plant_database_v_1_3.zip WIKIPEDIA: - reliability_score: 4 + reliability_score: 2 url: https://raw.githubusercontent.com/pypsa-meets-earth/ppm-data-backup/main/nuclear_plants_from_wikipedia.csv fn: nuclear_plants_from_wikipedia.csv IWPDCY: aggregated_units: true - reliability_score: 3 + reliability_score: 2 fn: IWPDCY.csv OPSD_DE: - reliability_score: 4 + reliability_score: 3 fn: conventional_power_plants_DE.csv url: https://raw.githubusercontent.com/pypsa-meets-earth/ppm-data-backup/main/conventional_power_plants_DE.csv OPSD_EU: - reliability_score: 4 + reliability_score: 3 fn: conventional_power_plants_EU.csv url: https://raw.githubusercontent.com/pypsa-meets-earth/ppm-data-backup/main/conventional_power_plants_EU.csv OPSD_VRE: @@ -143,76 +141,75 @@ OPSD_VRE_GB: url: https://data.open-power-system-data.org/renewable_power_plants/2020-08-25/renewable_power_plants_UK.csv fn: renewable_power_plants_UK.csv OPSD: - reliability_score: 4 + reliability_score: 3 Capacity_stats: url: https://data.open-power-system-data.org/national_generation_capacity/2020-10-01/national_generation_capacity_stacked.csv fn: national_generation_capacity_stacked.csv UBA: net_capacity: false - reliability_score: 4 + reliability_score: 1 fn: kraftwerke-de-ab-100-mw.xls url: https://www.umweltbundesamt.de/sites/default/files/medien/372/dokumente/kraftwerke_de_ab_100_mw_0.xls WEPP: net_capacity: false - reliability_score: 3 + reliability_score: 1 fn: platts_wepp.csv GGPT: net_capacity: false - reliability_score: 5 + reliability_score: 6 status: ["operating", "retired", "construction"] fn: Global-Oil-and-Gas-Plant-Tracker-GOGPT-August-2025.xlsx url: https://tubcloud.tu-berlin.de/s/aKrt7dyNgazmgAm/download/Global-Oil-and-Gas-Plant-Tracker-GOGPT-August-2025.xlsx GEM: # combined data set of all GEM trackers net_capacity: true - reliability_score: 5 + reliability_score: 6 GCPT: net_capacity: false - reliability_score: 4 + reliability_score: 6 status: ["operating", "retired", "construction", "mothballed"] fn: Global-Coal-Plant-Tracker-July-2025.xlsx url: https://tubcloud.tu-berlin.de/s/etMB7qawKNwfgnk/download/Global-Coal-Plant-Tracker-July-2025.xlsx GGTPT: net_capacity: false - reliability_score: 4 + reliability_score: 6 aggregated_units: false status: ["operating", "retired", "construction", "mothballed"] fn: Geothermal-Power-Tracker-March-2025-Final.xlsx url: https://tubcloud.tu-berlin.de/s/dNoEsLeGtCWDkoc/download/Geothermal-Power-Tracker-March-2025-Final.xlsx GWPT: net_capacity: false - reliability_score: 4 + reliability_score: 6 status: ["operating", "retired", "construction"] fn: Global-Wind-Power-Tracker-February-2025.xlsx url: https://tubcloud.tu-berlin.de/s/8NSXSjPmJPXpg4W/download/Global-Wind-Power-Tracker-February-2025.xlsx GSPT: net_capacity: false - reliability_score: 4 + reliability_score: 6 status: ["operating", "retired", "construction"] fn: Global-Solar-Power-Tracker-February-2025.xlsx url: https://tubcloud.tu-berlin.de/s/7eo4dZXMp6eB3mz/download/Global-Solar-Power-Tracker-February-2025.xlsx GBPT: net_capacity: false - reliability_score: 4 + reliability_score: 6 status: ["operating", "retired", "construction"] fn: Global-Bioenergy-Power-Tracker-GBPT-September-2024.xlsx url: https://tubcloud.tu-berlin.de/s/CzMBKe2rAcsoq7c/download/Global-Bioenergy-Power-Tracker-GBPT-September-2024.xlsx GNPT: net_capacity: false - reliability_score: 4 + reliability_score: 6 status: ["operating", "retired", "mothballed", "construction"] fn: Global-Nuclear-Power-Tracker-July-2024.xlsx url: https://tubcloud.tu-berlin.de/s/gXFim9EciRHrjeQ/download/Global-Nuclear-Power-Tracker-July-2024.xlsx GHPT: net_capacity: false - reliability_score: 4 + reliability_score: 6 status: ["operating", "retired", "construction"] fn: Global-Hydropower-Tracker-April-2025.xlsx url: https://tubcloud.tu-berlin.de/s/2xqxRmfP4FKTrLf/download/Global-Hydropower-Tracker-April-2025.xlsx - MASTR: net_capacity: true - reliability_score: 8 + reliability_score: 7 status: ["In Betrieb", "In Planung", "Endgültig stillgelegt", "Vorübergehend stillgelegt"] fn: bnetza_open_mastr_2025-02-09.zip url: https://zenodo.org/records/14783581/files/bnetza_open_mastr_2025-02-09.zip From c0296bbe579e3a51393f2041832a0843a54174be Mon Sep 17 00:00:00 2001 From: Fabian Neumann Date: Wed, 20 Aug 2025 19:23:00 +0200 Subject: [PATCH 30/68] add release notes and update docs --- doc/basics.rst | 2 ++ doc/release-notes.rst | 18 ++++++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/doc/basics.rst b/doc/basics.rst index 7e91618b..358ee423 100644 --- a/doc/basics.rst +++ b/doc/basics.rst @@ -49,6 +49,8 @@ Data Sources - GNPT - `Global Nuclear Powerplant Tracker by Global Energy Monitor `__ - GSPT - `Global Solar Powerplant Tracker by Global Energy Monitor `__ - GWPT - `Global Wind Powerplant Tracker by Global Energy Monitor `__ +- EESI - `European Energy Storage Inventory `__ +- GND - `GeoNuclearData `__ - CARMA - `Carbon Monitoring for Action `__ - ENTSOe - `European Network of Transmission System Operators for Electricity `__, annually provides statistics diff --git a/doc/release-notes.rst b/doc/release-notes.rst index 0430a104..e2c2de5f 100644 --- a/doc/release-notes.rst +++ b/doc/release-notes.rst @@ -8,6 +8,24 @@ Upcoming Version * Drop support for Python 3.9, add support for Python 3.13. Minimum required Python version is now 3.10. +* Added [GeoNuclearData](github.com/cristianst85/GeoNuclearData) dataset as `pm.data.GND()`. + +* Added [European Energy Storage Inventory](https://ses.jrc.ec.europa.eu/storage-inventory-maps) dataset as `pm.data.EESI()`. + +* Updated ENTSOE, BEYONDCOAL, JRC, IRENASTAT and the Global Energy Monitor datasets to the latest versions. + +* Fix in `pm.data.MASTR()` the distinction of hydro technologies and between offshore and onshore wind. Also read in storage technologies. + +* Improved recognition of CHP power plants. + +* In Global Energy Monitor datasets, also read entries below capacity threshold. + +* In `pm.data.GCPT()`, add estimate for coal plant efficiency. + +* Include mothballed gas, oil and coal power plants. + +* Updating matching logic configuration. + `v0.7.1 `__ (30th January 2024) ================================================================================================= From c481d0602ec5aaaf04ed7d3b1481b11544211353 Mon Sep 17 00:00:00 2001 From: Fabian Neumann Date: Thu, 21 Aug 2025 08:13:00 +0200 Subject: [PATCH 31/68] adjust reliability scores of BEYONDCOAL and GND (closes #241) --- powerplantmatching/package_data/config.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/powerplantmatching/package_data/config.yaml b/powerplantmatching/package_data/config.yaml index 16154868..1e05774b 100644 --- a/powerplantmatching/package_data/config.yaml +++ b/powerplantmatching/package_data/config.yaml @@ -56,7 +56,7 @@ BNETZA: BEYONDCOAL: net_capacity: false aggregated_units: true - reliability_score: 7 + reliability_score: 4 status: ["Construction", "Open", "Planned", "Retired"] fn: 2025-07-24-BeyondFossilFuels-Europe_Coal_Plants_Database.xlsx url: https://beyondfossilfuels.org/wp-content/uploads/2025/07/2025-07-24-BeyondFossilFuels-Europe_Coal_Plants_Database.xlsx @@ -221,7 +221,7 @@ EESI: url: https://tubcloud.tu-berlin.de/s/RXWgYbYJpePsWAZ/download/european-energy-storage-inventory-20250817-2245.json GND: net_capacity: true - reliability_score: 4 + reliability_score: 5 status: ["Shutdown", "Operational", "Planned", "Under Construction", "Decommissioning Completed"] url: https://raw.githubusercontent.com/cristianst85/GeoNuclearData/1bc8b4ac106af236902385b87e46c540b4864815/data/csv/denormalized/nuclear_power_plants.csv fn: nuclear_power_plants.csv From 8d2eb4d9869555b508a524f2c8cfd1fab9e54467 Mon Sep 17 00:00:00 2001 From: Fabian Neumann Date: Thu, 21 Aug 2025 08:38:45 +0200 Subject: [PATCH 32/68] GGPT use unit rather than location ID (closes #215) --- powerplantmatching/data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/powerplantmatching/data.py b/powerplantmatching/data.py index 283a45f3..262f17b5 100644 --- a/powerplantmatching/data.py +++ b/powerplantmatching/data.py @@ -2086,7 +2086,7 @@ def GGPT(raw=False, update=False, config=None): "Retired year": "DateOut", "CHP": "Set", "Fuel": "Fueltype", - "GEM location ID": "projectID", + "GEM unit ID": "projectID", "Country/Area": "Country", "Turbine/Engine Technology": "Technology", } From 865953dd485f7dbfb6294951a644e76e7df91834 Mon Sep 17 00:00:00 2001 From: Fabian Neumann Date: Thu, 21 Aug 2025 13:49:57 +0200 Subject: [PATCH 33/68] GCPT: translate technologies --- powerplantmatching/data.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/powerplantmatching/data.py b/powerplantmatching/data.py index 262f17b5..1430f930 100644 --- a/powerplantmatching/data.py +++ b/powerplantmatching/data.py @@ -1821,6 +1821,13 @@ def GCPT(raw=False, update=False, config=None): "unknown with CCS": "Hard Coal", "waste coal": "Hard Coal", } + technology_dict = { + "IGCC": "CCGT", + "subcritical": "Steam Turbine", + "unknown": np.nan, + "supercritical": "Steam Turbine", + "ultra-supercritical": "Steam Turbine", + } planned_retirement = df["Planned retirement"].apply(pd.to_numeric, errors="coerce") @@ -1853,7 +1860,7 @@ def GCPT(raw=False, update=False, config=None): ) .query("Status in @status_list") .pipe(lambda x: x[df.columns.intersection(config.get("target_columns"))]) - .pipe(lambda x: x.replace({"Fueltype": fueltype_dict})) + .pipe(lambda x: x.replace({"Fueltype": fueltype_dict, "Technology": technology_dict})) .pipe(config_filter, config) ) From d2f6c89fe66f500794e32097170686b71fc3e055 Mon Sep 17 00:00:00 2001 From: Fabian Neumann Date: Thu, 21 Aug 2025 13:50:31 +0200 Subject: [PATCH 34/68] JRC: log storage parameters only if > 0 --- powerplantmatching/data.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/powerplantmatching/data.py b/powerplantmatching/data.py index 1430f930..6cf9d265 100644 --- a/powerplantmatching/data.py +++ b/powerplantmatching/data.py @@ -475,6 +475,9 @@ def set_large_spanish_stores_to_reservoirs(df): .assign( Set=lambda df: np.where(df.Technology == "Run-Of-River", "PP", "Store"), Fueltype="Hydro", + Duration=lambda df: df.Duration.where(df.Duration > 0), + StorageCapacity_MWh=lambda df: df.StorageCapacity_MWh.where(df.StorageCapacity_MWh > 0), + Volume_Mm3=lambda df: df.Volume_Mm3.where(df.Volume_Mm3 > 0), ) .drop(columns=["pypsa_id", "GEO"]) .powerplant.convert_alpha2_to_country() From 3242b6b000640a782276fd633b022634eb60c8f6 Mon Sep 17 00:00:00 2001 From: Fabian Neumann Date: Thu, 21 Aug 2025 13:50:59 +0200 Subject: [PATCH 35/68] MASTR: set threshold to 100 kW --- powerplantmatching/data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/powerplantmatching/data.py b/powerplantmatching/data.py index 6cf9d265..2a9b75dd 100644 --- a/powerplantmatching/data.py +++ b/powerplantmatching/data.py @@ -2273,7 +2273,7 @@ def MASTR( config = get_config() if config is None else config - THRESHOLD_KW = 1000 # noqa: F841 + THRESHOLD_KW = 100 # noqa: F841 RENAME_COLUMNS = { "EinheitMastrNummer": "projectID", From c5813f7b5b5f5d3ff00bf3d42482524ef61e8f48 Mon Sep 17 00:00:00 2001 From: Fabian Neumann Date: Thu, 21 Aug 2025 13:51:16 +0200 Subject: [PATCH 36/68] MASTR: calculate Duration --- powerplantmatching/data.py | 1 + 1 file changed, 1 insertion(+) diff --git a/powerplantmatching/data.py b/powerplantmatching/data.py index 2a9b75dd..9ee56cb9 100644 --- a/powerplantmatching/data.py +++ b/powerplantmatching/data.py @@ -2401,6 +2401,7 @@ def MASTR( ), lat=lambda df: df.lat.combine_first(df.PLZ_lat), lon=lambda df: df.lon.combine_first(df.PLZ_lon), + Duration=lambda df: df.StorageCapacity_MWh.div(df.Capacity, fill_value=np.nan), ) .pipe( gather_specifications, From 9064eac56e8ac968e05cd4684905ca8e2ca36ab4 Mon Sep 17 00:00:00 2001 From: Fabian Neumann Date: Thu, 21 Aug 2025 13:51:48 +0200 Subject: [PATCH 37/68] MASTR: fixes from gather_specifications inaccuracies --- powerplantmatching/data.py | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/powerplantmatching/data.py b/powerplantmatching/data.py index 9ee56cb9..6c450718 100644 --- a/powerplantmatching/data.py +++ b/powerplantmatching/data.py @@ -2424,6 +2424,7 @@ def MASTR( "Energietraeger == 'Speicher' and Technologie == 'Batterie'" ).index df_processed.loc[bat, ["Fueltype", "Set"]] = ["Battery", "Store"] + BATTERY_MAPPING = { "Blei-Batterie": "Pb", "Lithium-Batterie": "Li", @@ -2444,8 +2445,33 @@ def MASTR( WIND_MAPPING ) + sel = df_processed.query("Fueltype == 'Natural Gas' and Filesuffix == 'Bioenergy'").index + df_processed.loc[sel, "Fueltype"] = "Biogas" + + # one biogas unit has 'Wind' in name + sel = df_processed.query("Fueltype == 'Wind' and Filesuffix == 'Biomass'").index + df_processed.loc[sel, "Fueltype"] = "Biogas" + + # some combi-units are named wind-solar + sel = df_processed.query("Fueltype in ['Wind', 'Waste'] and Filesuffix == 'Solar'").index + df_processed.loc[sel, ["Fueltype", "Technology"]] = ["Solar", "PV"] + + # some technologies are wrongly allocated + sel = df_processed.query("Fueltype == 'Biogas' and Technology == 'PV'").index + df_processed.loc[sel, "Technology"] = "Combustion Engine" + sel = df_processed.query("Fueltype == 'Hydro' and Technology == 'Steam Turbine'").index + df_processed.loc[sel, "Technology"] = "Run-Of-River" + sel = df_processed.query("Fueltype == 'Solar' and Technology == 'CCGT'").index + df_processed.loc[sel, "Technology"] = "PV" + sel = df_processed.query("Fueltype == 'Solar' and Technology == 'OCGT' and Filesuffix == 'Combustion'").index + df_processed.loc[sel, "Fueltype"] = "Natural Gas" + sel = df_processed.query("Fueltype == 'Wind' and Technology == 'PV' and Filesuffix == 'Solar'").index + df_processed.loc[sel, "Fueltype"] = "Solar" + sel = df_processed.query("Fueltype == 'Wind' and Technology == 'Combustion Engine' and Filesuffix == 'Bioenergy'").index + df_processed.loc[sel, "Fueltype"] = "Biogas" + mask = df_processed.query( - "Energietraeger in ['Hydro', 'Wind', 'Solar', 'Battery'] and Set == 'Store'" + "Energietraeger in ['Hydro', 'Wind', 'Solar', 'Battery'] and Set in ['Store', 'CHP']" ).index df_processed.loc[mask, "Set"] = "PP" From 1dc7fa1ab329bc56eff224116e0abae6072859dd Mon Sep 17 00:00:00 2001 From: Fabian Neumann Date: Thu, 21 Aug 2025 13:52:42 +0200 Subject: [PATCH 38/68] config: omit ambiguous regex expressions --- powerplantmatching/package_data/config.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/powerplantmatching/package_data/config.yaml b/powerplantmatching/package_data/config.yaml index 1e05774b..d333dd9f 100644 --- a/powerplantmatching/package_data/config.yaml +++ b/powerplantmatching/package_data/config.yaml @@ -293,9 +293,9 @@ target_fueltypes: # given by the list. An empty string results in a regex expression containing only the key. # Parsed of representatives at the top may be overwritten by representatives further below. Other: ".*" - Solid Biomass: [biological, bioenergy, agricultural, wood, holz, biomass, feste biomasse, biomasa, biomassa, feste biogene stoffe, pellets, stroh, straw] + Solid Biomass: [biological, bioenergy, agricultural, biomass, feste biomasse, biomasa, biomassa, feste biogene stoffe, pellets, stroh, straw] Biogas: [biogas, biogaz, biomethan, gasförmige biomasse] - Nuclear: [nuclear, kernkraft, atomkraft, nucléaire, atomowa, jądrowa, kjernekraft, kern, atom, atoom] + Nuclear: [nuclear, kernkraft, atomkraft, nucléaire, atomowa, jądrowa, kjernekraft, atoom] Natural Gas: [ ccgt, From 856a3ddb921f923a78dd8bd14bf287fd57d5c229 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 21 Aug 2025 11:53:00 +0000 Subject: [PATCH 39/68] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- powerplantmatching/data.py | 38 +++++++++++++++++++++++++++++--------- 1 file changed, 29 insertions(+), 9 deletions(-) diff --git a/powerplantmatching/data.py b/powerplantmatching/data.py index 6c450718..895b7904 100644 --- a/powerplantmatching/data.py +++ b/powerplantmatching/data.py @@ -476,7 +476,9 @@ def set_large_spanish_stores_to_reservoirs(df): Set=lambda df: np.where(df.Technology == "Run-Of-River", "PP", "Store"), Fueltype="Hydro", Duration=lambda df: df.Duration.where(df.Duration > 0), - StorageCapacity_MWh=lambda df: df.StorageCapacity_MWh.where(df.StorageCapacity_MWh > 0), + StorageCapacity_MWh=lambda df: df.StorageCapacity_MWh.where( + df.StorageCapacity_MWh > 0 + ), Volume_Mm3=lambda df: df.Volume_Mm3.where(df.Volume_Mm3 > 0), ) .drop(columns=["pypsa_id", "GEO"]) @@ -1863,7 +1865,11 @@ def GCPT(raw=False, update=False, config=None): ) .query("Status in @status_list") .pipe(lambda x: x[df.columns.intersection(config.get("target_columns"))]) - .pipe(lambda x: x.replace({"Fueltype": fueltype_dict, "Technology": technology_dict})) + .pipe( + lambda x: x.replace( + {"Fueltype": fueltype_dict, "Technology": technology_dict} + ) + ) .pipe(config_filter, config) ) @@ -2401,7 +2407,9 @@ def MASTR( ), lat=lambda df: df.lat.combine_first(df.PLZ_lat), lon=lambda df: df.lon.combine_first(df.PLZ_lon), - Duration=lambda df: df.StorageCapacity_MWh.div(df.Capacity, fill_value=np.nan), + Duration=lambda df: df.StorageCapacity_MWh.div( + df.Capacity, fill_value=np.nan + ), ) .pipe( gather_specifications, @@ -2445,7 +2453,9 @@ def MASTR( WIND_MAPPING ) - sel = df_processed.query("Fueltype == 'Natural Gas' and Filesuffix == 'Bioenergy'").index + sel = df_processed.query( + "Fueltype == 'Natural Gas' and Filesuffix == 'Bioenergy'" + ).index df_processed.loc[sel, "Fueltype"] = "Biogas" # one biogas unit has 'Wind' in name @@ -2453,21 +2463,31 @@ def MASTR( df_processed.loc[sel, "Fueltype"] = "Biogas" # some combi-units are named wind-solar - sel = df_processed.query("Fueltype in ['Wind', 'Waste'] and Filesuffix == 'Solar'").index + sel = df_processed.query( + "Fueltype in ['Wind', 'Waste'] and Filesuffix == 'Solar'" + ).index df_processed.loc[sel, ["Fueltype", "Technology"]] = ["Solar", "PV"] # some technologies are wrongly allocated sel = df_processed.query("Fueltype == 'Biogas' and Technology == 'PV'").index df_processed.loc[sel, "Technology"] = "Combustion Engine" - sel = df_processed.query("Fueltype == 'Hydro' and Technology == 'Steam Turbine'").index + sel = df_processed.query( + "Fueltype == 'Hydro' and Technology == 'Steam Turbine'" + ).index df_processed.loc[sel, "Technology"] = "Run-Of-River" sel = df_processed.query("Fueltype == 'Solar' and Technology == 'CCGT'").index df_processed.loc[sel, "Technology"] = "PV" - sel = df_processed.query("Fueltype == 'Solar' and Technology == 'OCGT' and Filesuffix == 'Combustion'").index + sel = df_processed.query( + "Fueltype == 'Solar' and Technology == 'OCGT' and Filesuffix == 'Combustion'" + ).index df_processed.loc[sel, "Fueltype"] = "Natural Gas" - sel = df_processed.query("Fueltype == 'Wind' and Technology == 'PV' and Filesuffix == 'Solar'").index + sel = df_processed.query( + "Fueltype == 'Wind' and Technology == 'PV' and Filesuffix == 'Solar'" + ).index df_processed.loc[sel, "Fueltype"] = "Solar" - sel = df_processed.query("Fueltype == 'Wind' and Technology == 'Combustion Engine' and Filesuffix == 'Bioenergy'").index + sel = df_processed.query( + "Fueltype == 'Wind' and Technology == 'Combustion Engine' and Filesuffix == 'Bioenergy'" + ).index df_processed.loc[sel, "Fueltype"] = "Biogas" mask = df_processed.query( From f1a35e4378d6248c17b7a97aaecd910283ee8f34 Mon Sep 17 00:00:00 2001 From: Fabian Neumann Date: Thu, 21 Aug 2025 14:05:27 +0200 Subject: [PATCH 40/68] config: simplify formatting (fewer lists) --- powerplantmatching/package_data/config.yaml | 426 ++++++++++++++------ 1 file changed, 292 insertions(+), 134 deletions(-) diff --git a/powerplantmatching/package_data/config.yaml b/powerplantmatching/package_data/config.yaml index d333dd9f..c68a9bc5 100644 --- a/powerplantmatching/package_data/config.yaml +++ b/powerplantmatching/package_data/config.yaml @@ -1,7 +1,7 @@ # ---------------------------------------------------------------------------- # # IO Config # # ---------------------------------------------------------------------------- # -entsoe_token: +entsoe_token: "17f212db-55c5-49a5-b7e4-5b4036f22249" google_api_key: # ---------------------------------------------------------------------------- # @@ -39,8 +39,8 @@ fully_included_sources: - GND -parallel_duke_processes: false -process_limit: 4 +parallel_duke_processes: true +process_limit: 14 matched_data_url: https://raw.githubusercontent.com/PyPSA/powerplantmatching/{tag}/powerplants.csv # ---------------------------------------------------------------------------- # @@ -293,58 +293,133 @@ target_fueltypes: # given by the list. An empty string results in a regex expression containing only the key. # Parsed of representatives at the top may be overwritten by representatives further below. Other: ".*" - Solid Biomass: [biological, bioenergy, agricultural, biomass, feste biomasse, biomasa, biomassa, feste biogene stoffe, pellets, stroh, straw] - Biogas: [biogas, biogaz, biomethan, gasförmige biomasse] - Nuclear: [nuclear, kernkraft, atomkraft, nucléaire, atomowa, jądrowa, kjernekraft, atoom] + Solid Biomass: + - biological + - bioenergy + - agricultural + - biomass + - feste biomasse + - biomasa + - biomassa + - feste biogene stoffe + - pellets + - stroh + - straw + Biogas: + - biogas + - biogaz + - biomethan + - gasförmige biomasse + Nuclear: + - nuclear + - kernkraft + - atomkraft + - nucléaire + - atomowa + - jądrowa + - kjernekraft + - atoom Natural Gas: - [ - ccgt, - gas, - natural gas, - ocgt, - lng, - combined cycle, - fossil gas, - mixed fossil fuels, - erdgas, - andere gase, - gaz, - gaz naturel, - gas natural, - naturgass, - gaz ziemny, - gass, - aardgas, - flüssiggas - ] + - ccgt + - gas + - natural gas + - ocgt + - lng + - combined cycle + - fossil gas + - mixed fossil fuels + - erdgas + - andere gase + - gaz + - gaz naturel + - gas natural + - naturgass + - gaz ziemny + - gass + - aardgas + - flüssiggas Hydro: - [ - run-off, - run off, - run of river, - run-of-river, - ror, - hydro, - hidro, - hydraulique, - hydroelectric, - wasserkraft, - waterkracht, - wasser, - vannkraft, - vattenkraft, - wodna, - idroelettrica, - idraulica, - ] - Hard Coal: [coal, coke, steinkohle, houille, charbon dur, hulla, carbón duro, carbone duro, antracite, steinkul, węgiel kamienny, steenkool] - Lignite: [brown coal, lignite, peat, braunkohle, ligni.*, brunatny, brunkul, bruinkool] - Oil: [oil, diesel, biodiesel, methanol, heizöl, ethanol, mineralölprodukte, öl, fioul, mazout, petrol, olio, olej, carburante, olie] + - run-off + - run off + - run of river + - run-of-river + - ror + - hydro + - hidro + - hydraulique + - hydroelectric + - wasserkraft + - waterkracht + - wasser + - vannkraft + - vattenkraft + - wodna + - idroelettrica + - idraulica + Hard Coal: + - coal + - coke + - steinkohle + - houille + - charbon dur + - hulla + - carbón duro + - carbone duro + - antracite + - steinkul + - węgiel kamienny + - steenkool + Lignite: + - brown coal + - lignite + - peat + - braunkohle + - ligni.* + - brunatny + - brunkul + - bruinkool + Oil: + - oil + - diesel + - biodiesel + - methanol + - heizöl + - ethanol + - mineralölprodukte + - öl + - fioul + - mazout + - petrol + - olio + - olej + - carburante + - olie Geothermal: "" Solar: "" - Waste: ["abfall.*", "waste", "mva", "müll", "afval", "affald", "energy recovery", "incineration", "reststoffe", "refuse", "déchets", "ordures", "residuos", "basura", "rifiuti", "scarti", "odpady", "śmieci", "abfälle"] + Waste: + - "abfall.*" + - waste + - mva + - müll + - afval + - affald + - energy recovery + - incineration + - reststoffe + - refuse + - déchets + - ordures + - residuos + - basura + - rifiuti + - scarti + - odpady + - śmieci + - abfälle Wind: "" - Battery: [Electro-chemical, battery] + Battery: + - Electro-chemical + - battery Mechanical Storage: "" Heat Storage: "" Hydrogen Storage: "" @@ -355,52 +430,137 @@ target_sets: # Parsed of representatives at the top may be overwritten by representatives further below. PP: ".*" CHP: - [ - heizkraftwerk, - hkw, - kwk, - fhkw, - gud, - hp, - bhkw, - cogeneration, - power and heat, - heat and power, - chp, - cogen, - heat & power, - power & heat, - cogeneración, - cogenerazione, - kogeneracja, - combinada calor y electricidad, - kraftvarmeverk, - kraftvarmeværk, - samproduktion, - samproduksjon, - kvv, - wkk, - warmtekrachtkoppeling, - warmte-krachtcentrale, - ] - Store: [battery, storage, store, speicher, pumped] + - heizkraftwerk + - hkw + - kwk + - fhkw + - gud + - hp + - bhkw + - cogeneration + - power and heat + - heat and power + - chp + - cogen + - heat & power + - power & heat + - cogeneración + - cogenerazione + - kogeneracja + - combinada calor y electricidad + - kraftvarmeverk + - kraftvarmeværk + - samproduktion + - samproduksjon + - kvv + - wkk + - warmtekrachtkoppeling + - warmte-krachtcentrale + Storage: + - battery + - storage + - store + - speicher + - pumped target_technologies: # Provide a mapping of the keys to a list or a regex expression which are used for parsing. # A list will be converted to a regex expression matching all words (case-insensitive) # given by the list. An empty string results in a regex expression containing only the key. # Parsed of representatives at the top may be overwritten by representatives further below. - CCGT: [ccgt, gas, natural gas, gasturbinen mit abhitzekessel] - OCGT: [ocgt, gasturbinen ohne abhitzekessel] - Steam Turbine: [steam, turbine, kondensationsmaschine, gegendruckmaschine, dampfmotor] - Combustion Engine: [combustion engine, verbrennungsmotor, stirlingmotor] - Run-Of-River: [run-off, run off, run of river, run-of-river, ror, laufwasseranlage, laufwasser, abwasserkraft, trinkwassersystem, brauchwassersystem, pasada, przepływowa, fluente, elvekraft, doorstroom, älvkraft] - Reservoir: [reservoir, réservoir, impoundment, talsperre, stausee, speicherwasseranlage, speicherwasser, barrage, embalse, bacino, zbiornik, magasinverk, damkraftverk, reguleringsmagasin] - Pumped Storage: [pumped hydro, pumped, kavernen, bombeo, reversible, reversibel, oberbecken, unterbecken, pompage, pompaggio, pompowa, pumpekraftverk] + CCGT: + - ccgt + - gas + - natural gas + - gasturbinen mit abhitzekessel + OCGT: + - ocgt + - gasturbinen ohne abhitzekessel + Steam Turbine: + - steam + - turbine + - kondensationsmaschine + - gegendruckmaschine + - dampfmotor + Combustion Engine: + - combustion engine + - verbrennungsmotor + - stirlingmotor + Run-Of-River: + - run-off + - run off + - run of river + - run-of-river + - ror + - laufwasseranlage + - laufwasser + - abwasserkraft + - trinkwassersystem + - brauchwassersystem + - pasada + - przepływowa + - fluente + - elvekraft + - doorstroom + - älvkraft + Reservoir: + - reservoir + - réservoir + - impoundment + - talsperre + - stausee + - speicherwasseranlage + - speicherwasser + - barrage + - embalse + - bacino + - zbiornik + - magasinverk + - damkraftverk + - reguleringsmagasin + Pumped Storage: + - pumped hydro + - pumped + - kavernen + - bombeo + - reversible + - reversibel + - oberbecken + - unterbecken + - pompage + - pompaggio + - pompowa + - pumpekraftverk Marine: "" - PV: [pv, photo-voltaic, photo voltaic] + PV: + - pv + - photo-voltaic + - photo voltaic CSP: "" - Onshore: ["onshore", "an land", "terrestre", "landvind", "på land", "op land", "lądowy", "su terra", "en tierra", "à terre"] - Offshore: ["offshore", "nearshore", "auf see", "en mer", "marino", "en mar", "in mare", "morski", "havvind", "til havs", "på havet", "op zee", "zeewind"] + Onshore: + - onshore + - an land + - terrestre + - landvind + - på land + - op land + - lądowy + - su terra + - en tierra + - à terre + Offshore: + - offshore + - nearshore + - auf see + - en mer + - marino + - en mar + - in mare + - morski + - havvind + - til havs + - på havet + - op zee + - zeewind clean_name: remove_common_words: false # remove words which appear more that 20 times in all entries remove_duplicated_words: true @@ -408,51 +568,49 @@ clean_name: " ": "[^a-zA-Z]" # non-alphabetical symbols "": # This should be a list, if remove_common_words is true. - [ - I, - II, - III, - IV, - V, - VI, - VII, - VIII, - IX, - X, - XI, - parque, - grupo, - station, - power, - plant, - unit, - kraftwerk, - kw, - hkw, - nuclear, - thermal, - heizkraftwerk, - eolico, - project, - hydroelectric, - pumped, - storage, - france, - austria, - sweden, - serbia, - ukraine, - switzerland, - slovakia, - croatia, - poland, - slovenia, - portugal, - bosnia, - and, - herzegovina, - \w, #remove single letters - ] + - I + - II + - III + - IV + - V + - VI + - VII + - VIII + - IX + - X + - XI + - parque + - grupo + - station + - power + - plant + - unit + - kraftwerk + - kw + - hkw + - nuclear + - thermal + - heizkraftwerk + - eolico + - project + - hydroelectric + - pumped + - storage + - france + - austria + - sweden + - serbia + - ukraine + - switzerland + - slovakia + - croatia + - poland + - slovenia + - portugal + - bosnia + - and + - herzegovina + - \w #remove single letters "ss": "ß" # ---------------------------------------------------------------------------- # From f63287391ee292d689a47ff949f6495ae1dfe6e7 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 21 Aug 2025 12:05:38 +0000 Subject: [PATCH 41/68] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- powerplantmatching/package_data/config.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/powerplantmatching/package_data/config.yaml b/powerplantmatching/package_data/config.yaml index c68a9bc5..ddb376a4 100644 --- a/powerplantmatching/package_data/config.yaml +++ b/powerplantmatching/package_data/config.yaml @@ -293,7 +293,7 @@ target_fueltypes: # given by the list. An empty string results in a regex expression containing only the key. # Parsed of representatives at the top may be overwritten by representatives further below. Other: ".*" - Solid Biomass: + Solid Biomass: - biological - bioenergy - agricultural @@ -305,12 +305,12 @@ target_fueltypes: - pellets - stroh - straw - Biogas: + Biogas: - biogas - biogaz - biomethan - gasförmige biomasse - Nuclear: + Nuclear: - nuclear - kernkraft - atomkraft From b81c5d822aab79f8a126488777d918595cdffb36 Mon Sep 17 00:00:00 2001 From: Fabian Neumann Date: Fri, 22 Aug 2025 11:39:11 +0200 Subject: [PATCH 42/68] separate filters for matching_sources and fully_included_sources --- powerplantmatching/collection.py | 4 ++++ powerplantmatching/utils.py | 11 +---------- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/powerplantmatching/collection.py b/powerplantmatching/collection.py index f19c9b5e..9995fae2 100644 --- a/powerplantmatching/collection.py +++ b/powerplantmatching/collection.py @@ -71,6 +71,10 @@ def df_by_name(name): get_df = getattr(data, name) df = get_df(config=config) + for source in config["matching_sources"]: + if isinstance(source, dict) and next(iter(source)) == name: + df = df.query(source[name]) + if not conf.get("aggregated_units", False): return aggregate_units(df, dataset_name=name, config=config) else: diff --git a/powerplantmatching/utils.py b/powerplantmatching/utils.py index b77376b3..752fa89b 100644 --- a/powerplantmatching/utils.py +++ b/powerplantmatching/utils.py @@ -124,16 +124,7 @@ def config_filter(df, config): main_query = config.get("main_query", "") - # individual filter from config.yaml - queries = {} - for source in config["matching_sources"]: - if isinstance(source, dict): - queries.update(source) - else: - queries[source] = "" - ds_query = queries.get(name, "") - - query = " and ".join([q for q in [target_query, main_query, ds_query] if q]) + query = " and ".join([q for q in [target_query, main_query] if q]) df = correct_manually(df, name, config=config) From 2c792ec652c79d3d7b955c22cd2b40b88cbc8e42 Mon Sep 17 00:00:00 2001 From: Fabian Neumann Date: Fri, 22 Aug 2025 11:39:35 +0200 Subject: [PATCH 43/68] cleaning: improve handling of abbreviations --- powerplantmatching/cleaning.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/powerplantmatching/cleaning.py b/powerplantmatching/cleaning.py index 523c315f..91f47f3b 100644 --- a/powerplantmatching/cleaning.py +++ b/powerplantmatching/cleaning.py @@ -340,7 +340,16 @@ def clean_technology(df, generalize_hydros=False): .str.split(", ") .apply(lambda x: ", ".join(i.strip() for i in np.unique(x))) ) - tech = tech.replace({"Ccgt": "CCGT", "Ocgt": "OCGT"}, regex=True) + ABBREVIATIONS = { + "Ccgt": "CCGT", + "Ocgt": "OCGT", + "Pv": "PV", + "Nas": "NaS", + "Nicd": "NiCd", + "Nanicl": "NaNiCl", + "Caes": "CAES", + } + tech = tech.replace(ABBREVIATIONS, regex=True) return df.assign(Technology=tech) From ea16d3d5057c0126744b0e2e497ed06bab0a7429 Mon Sep 17 00:00:00 2001 From: Fabian Neumann Date: Fri, 22 Aug 2025 11:40:05 +0200 Subject: [PATCH 44/68] utils: do not mark hydrogen storage as uncommon fueltype --- powerplantmatching/utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/powerplantmatching/utils.py b/powerplantmatching/utils.py index 752fa89b..646c61ea 100644 --- a/powerplantmatching/utils.py +++ b/powerplantmatching/utils.py @@ -187,7 +187,6 @@ def set_uncommon_fueltypes_to_other(df, fillna_other=True, config=None, **kwargs default = [ "Mixed fuel types", "Electro-mechanical", - "Hydrogen Storage", ] fueltypes = kwargs.get("fueltypes", default) df.loc[df.Fueltype.isin(fueltypes), "Fueltype"] = "Other" From 44b66adfd333c66d9b262606005862f101beb531 Mon Sep 17 00:00:00 2001 From: Fabian Neumann Date: Fri, 22 Aug 2025 11:44:16 +0200 Subject: [PATCH 45/68] .gitignore .ipynb --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index e883d2b7..fe079f42 100644 --- a/.gitignore +++ b/.gitignore @@ -99,3 +99,6 @@ test.ipynb # uv uv.lock + +# jupyter +*.ipynb \ No newline at end of file From a80acd84349a41807fdc5e462e8e274a7f10ea40 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 22 Aug 2025 09:44:28 +0000 Subject: [PATCH 46/68] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index fe079f42..e71eeaf1 100644 --- a/.gitignore +++ b/.gitignore @@ -101,4 +101,4 @@ test.ipynb uv.lock # jupyter -*.ipynb \ No newline at end of file +*.ipynb From a396d93e3579b6e45f73e101e7aeed936621c7e8 Mon Sep 17 00:00:00 2001 From: Fabian Neumann Date: Fri, 22 Aug 2025 14:24:21 +0200 Subject: [PATCH 47/68] correct config.yaml --- powerplantmatching/package_data/config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/powerplantmatching/package_data/config.yaml b/powerplantmatching/package_data/config.yaml index ddb376a4..dc1d09cc 100644 --- a/powerplantmatching/package_data/config.yaml +++ b/powerplantmatching/package_data/config.yaml @@ -1,7 +1,7 @@ # ---------------------------------------------------------------------------- # # IO Config # # ---------------------------------------------------------------------------- # -entsoe_token: "17f212db-55c5-49a5-b7e4-5b4036f22249" +entsoe_token: "" google_api_key: # ---------------------------------------------------------------------------- # From 28d5d58ad16ba35b70bbbba61a64b7bac0e57dce Mon Sep 17 00:00:00 2001 From: Fabian Neumann Date: Fri, 22 Aug 2025 19:53:11 +0200 Subject: [PATCH 48/68] enable multiprocessing in unit aggregation for non-matched resources --- powerplantmatching/cleaning.py | 9 +++++++-- powerplantmatching/duke.py | 2 ++ powerplantmatching/heuristics.py | 4 +++- powerplantmatching/package_data/config.yaml | 4 ++-- powerplantmatching/utils.py | 16 +++++++++++++--- 5 files changed, 27 insertions(+), 8 deletions(-) diff --git a/powerplantmatching/cleaning.py b/powerplantmatching/cleaning.py index 91f47f3b..0bb89099 100644 --- a/powerplantmatching/cleaning.py +++ b/powerplantmatching/cleaning.py @@ -387,6 +387,7 @@ def aggregate_units( pre_clean_name=False, country_wise=True, config=None, + threads=1, **kwargs, ): """ @@ -405,6 +406,8 @@ def aggregate_units( Whether to clean the 'Name'-column before aggregating. country_wise : Boolean, default True Whether to aggregate only entries with a identical country. + threads : int, default 1 + Number of threads to use """ deprecated_args = {"use_saved_aggregation", "save_aggregation"} used_deprecated_args = deprecated_args.intersection(kwargs) @@ -445,9 +448,11 @@ def aggregate_units( if country_wise: countries = df.Country.unique() - duplicates = pd.concat([duke(df.query("Country == @c")) for c in countries]) + duplicates = pd.concat( + [duke(df.query("Country == @c"), threads=threads) for c in countries] + ) else: - duplicates = duke(df) + duplicates = duke(df, threads=threads) df = cliques(df, duplicates) df = df.groupby("grouped").agg(props_for_groups) diff --git a/powerplantmatching/duke.py b/powerplantmatching/duke.py index 6eefe393..37cc4929 100644 --- a/powerplantmatching/duke.py +++ b/powerplantmatching/duke.py @@ -52,6 +52,7 @@ def duke( showmatches=False, keepfiles=False, showoutput=False, + threads=1, ): """ Run duke in different modes (Deduplication or Record Linkage Mode) to @@ -119,6 +120,7 @@ def duke( "-Dfile.encoding=UTF-8", "no.priv.garshol.duke.Duke", "--linkfile=linkfile.txt", + f"--threads={threads}", ] if singlematch: args.append("--singlematch") diff --git a/powerplantmatching/heuristics.py b/powerplantmatching/heuristics.py index b40ec2af..011b724f 100644 --- a/powerplantmatching/heuristics.py +++ b/powerplantmatching/heuristics.py @@ -66,6 +66,8 @@ def extend_by_non_matched( if config is None: config = get_config() + threads = config.get("threads_extend_by_non_matched", 1) + if isinstance(extend_by, str): label = extend_by extend_by = getattr(data, extend_by)(config=config) @@ -82,7 +84,7 @@ def extend_by_non_matched( if aggregate_added_data and not extend_by.empty: extend_by = aggregate_units( - extend_by, dataset_name=label, config=config, **aggkwargs + extend_by, dataset_name=label, config=config, threads=threads, **aggkwargs ) extend_by["projectID"] = extend_by.projectID.map(lambda x: {label: x}) else: diff --git a/powerplantmatching/package_data/config.yaml b/powerplantmatching/package_data/config.yaml index dc1d09cc..3892d9e4 100644 --- a/powerplantmatching/package_data/config.yaml +++ b/powerplantmatching/package_data/config.yaml @@ -39,8 +39,8 @@ fully_included_sources: - GND -parallel_duke_processes: true -process_limit: 14 +parallel_duke_processes: 16 +threads_extend_by_non_matched: 16 matched_data_url: https://raw.githubusercontent.com/PyPSA/powerplantmatching/{tag}/powerplants.csv # ---------------------------------------------------------------------------- # diff --git a/powerplantmatching/utils.py b/powerplantmatching/utils.py index 646c61ea..67570a6a 100644 --- a/powerplantmatching/utils.py +++ b/powerplantmatching/utils.py @@ -343,7 +343,7 @@ def fun(f, q_in, q_out): q_out.put((i, f(x))) -def parmap(f, arg_list, config=None): +def parmap(f, arg_list, config=None, threads=None): """ Parallel mapping function. Use this function to parallelly map function f onto arguments in arg_list. The maximum number of parallel threads is @@ -356,11 +356,21 @@ def parmap(f, arg_list, config=None): python function with one argument arg_list : list list of arguments mapped to f + config : dict, default None + configuration dictionary + threads : int, default None + number of parallel threads """ if config is None: config = get_config() - if config["parallel_duke_processes"]: - nprocs = min(multiprocessing.cpu_count(), config["process_limit"]) + + if threads is None: + threads = config["parallel_duke_processes"] + if isinstance(threads, bool): + threads = config.get("process_limit", 1) + + if threads > 1: + nprocs = min(multiprocessing.cpu_count(), threads) logger.info(f"Run process with {nprocs} parallel threads.") q_in = multiprocessing.Queue(1) q_out = multiprocessing.Queue() From 55a6b76de6b84bdfa17c91cc74ecf4bed14a02e9 Mon Sep 17 00:00:00 2001 From: Fabian Neumann Date: Fri, 22 Aug 2025 19:56:24 +0200 Subject: [PATCH 49/68] keep blocks for selected fueltypes and option to aggretage only matching sources --- powerplantmatching/cleaning.py | 70 +++++++++++++++++++-- powerplantmatching/data.py | 37 +++++------ powerplantmatching/package_data/config.yaml | 16 ++--- 3 files changed, 92 insertions(+), 31 deletions(-) diff --git a/powerplantmatching/cleaning.py b/powerplantmatching/cleaning.py index 0bb89099..2626db91 100644 --- a/powerplantmatching/cleaning.py +++ b/powerplantmatching/cleaning.py @@ -87,20 +87,63 @@ def clean_name(df, config=None): name = df.Name.astype(str).copy().apply(unidecode.unidecode) + roman_to_arabic = { + "I": "1", + "II": "2", + "III": "3", + "IV": "4", + "V": "5", + "VI": "6", + "VII": "7", + "VIII": "8", + "IX": "9", + "X": "10", + "XI": "11", + } + for roman, arabic in roman_to_arabic.items(): + name = name.str.replace(rf"\b{roman}\b", arabic, regex=True) + replace = config["clean_name"]["replace"] replace.setdefault("", []) + keep_blocks = config["clean_name"].get("fueltypes_with_blocks", []) + if len(keep_blocks) > 0: + mask = df.Fueltype.isin(keep_blocks) + for key, pattern in replace.items(): if config["clean_name"]["remove_common_words"] and (key == ""): common_words = pd.Series(sum(name.str.split(), [])).value_counts() common_words = list(common_words[common_words >= 20].index) pattern += common_words - if isinstance(pattern, list): - # if pattern is a list, concat all entries in a case-insensitive regex + + pattern = np.atleast_1d(pattern) + + # do not remove block numbers for fuel types with blocks + if len(keep_blocks) > 0 and key == " " and "[^a-zA-Z]" in pattern: + base = [rf"\b{p}\b" for p in pattern if p != "[^a-zA-Z]"] + pattern_keep = r"(?i)" + "|".join(base + [r"[^a-zA-Z0-9]"]) + pattern_default = r"(?i)" + "|".join(base + [r"[^a-zA-Z]"]) + name.loc[mask] = name.loc[mask].str.replace(pattern_keep, key, regex=True) + name.loc[~mask] = name.loc[~mask].str.replace( + pattern_default, key, regex=True + ) + + # do not remove block letters for fuel types with blocks + elif key == "" and "\w" in pattern: + pattern_keep = r"(?i)" + "|".join( + [rf"\b{p}\b" for p in pattern if p != "\w"] + ) + pattern_default = r"(?i)" + "|".join( + [rf"\b{p}\b" for p in pattern if p != "\w"] + ) + name.loc[mask] = name.loc[mask].str.replace(pattern_keep, key, regex=True) + name.loc[~mask] = name.loc[~mask].str.replace( + pattern_default, key, regex=True + ) + + else: pattern = r"(?i)" + "|".join([rf"\b{p}\b" for p in pattern]) - elif not isinstance(pattern, str): - raise ValueError(f"Pattern must be string or list, not {type(pattern)}") - name = name.str.replace(pattern, key, regex=True) + name = name.str.replace(pattern, key, regex=True) if config["clean_name"]["remove_duplicated_words"]: name = name.str.replace(r"\b(\w+)(?:\W\1\b)+", r"\1", regex=True, case=False) @@ -445,13 +488,30 @@ def aggregate_units( df = clean_name(df) logger.info(f"Aggregating blocks in data source '{ds_name}'.") + agg_query = None + if ds_name in config.get("aggregate_only_matching_sources", []): + for source in config["matching_sources"]: + if isinstance(source, dict) and ds_name in source: + query = source[ds_name] + break + + block_query = None + if with_blocks := config["clean_name"].get("fuel_type_with_blocks", []): # noqa + block_query = "Fueltype in @with_blocks" if country_wise: countries = df.Country.unique() + country_query = "Country == @c" + query = " and ".join(filter(None, [agg_query, block_query, country_query])) + duplicates = pd.concat( + [duke(df.query(query), threads=threads) for c in countries] + ) duplicates = pd.concat( [duke(df.query("Country == @c"), threads=threads) for c in countries] ) else: + query = " and ".join(filter(None, [agg_query, block_query])) + duplicates = duke(df.query(query) if query else df, threads=threads) duplicates = duke(df, threads=threads) df = cliques(df, duplicates) diff --git a/powerplantmatching/data.py b/powerplantmatching/data.py index 895b7904..9f63bf84 100644 --- a/powerplantmatching/data.py +++ b/powerplantmatching/data.py @@ -670,7 +670,6 @@ def WIKIPEDIA(raw=False, update=False, config=None): df = ( df.rename(columns=RENAME_COLUMNS) - .pipe(clean_name) .pipe(convert_to_short_name) .assign( Fueltype="Nuclear", @@ -679,6 +678,7 @@ def WIKIPEDIA(raw=False, update=False, config=None): # plants which are not yet built are set to 2027 DateIn=lambda df: df.DateIn.where(~df.Status.str.contains("In Bau"), 2027), ) + .pipe(clean_name) .pipe(set_column_name, "WIKIPEDIA") .pipe(config_filter, config) ) @@ -1700,8 +1700,7 @@ def GBPT(raw=False, update=False, config=None): df = df.rename(columns=RENAME_COLUMNS) df_final = ( - df.pipe(clean_name) - .pipe(set_column_name, "GBPT") + df.pipe(set_column_name, "GBPT") .pipe(convert_to_short_name) .dropna(subset="Capacity") .assign( @@ -1717,6 +1716,7 @@ def GBPT(raw=False, update=False, config=None): .pipe(lambda x: x[df.columns.intersection(config.get("target_columns"))]) .assign(Technology=np.nan) .assign(Set=np.nan) + .pipe(clean_name) .pipe(config_filter, config) ) return df_final @@ -1739,7 +1739,7 @@ def GNPT(raw=False, update=False, config=None): """ config = get_config() if config is None else config fn = get_raw_file("GNPT", update=update, config=config) - df = pd.read_excel(fn, sheet_name="Data") + df = pd.read_excel(fn, sheet_name="Data", na_values=["--"]) if raw: return df @@ -1759,11 +1759,11 @@ def GNPT(raw=False, update=False, config=None): df = df.rename(columns=RENAME_COLUMNS) df_final = ( - df.pipe(clean_name) - .pipe(set_column_name, "GNPT") + df.pipe(set_column_name, "GNPT") .pipe(convert_to_short_name) .dropna(subset="Capacity") .assign( + Name=lambda df: df["Name"] + df["Unit Name"].fillna("").apply(lambda x: f" {x}" if x else ""), DateIn=df["DateIn"].apply(pd.to_numeric, errors="coerce"), DateOut=df["DateOut"].apply(pd.to_numeric, errors="coerce"), lat=df["lat"].apply(pd.to_numeric, errors="coerce"), @@ -1774,6 +1774,7 @@ def GNPT(raw=False, update=False, config=None): .assign(Fueltype="Nuclear") .assign(Technology="Steam Turbine") .assign(Set="PP") + .pipe(clean_name) .pipe(config_filter, config) ) return df_final @@ -1848,8 +1849,7 @@ def GCPT(raw=False, update=False, config=None): df = df.rename(columns=RENAME_COLUMNS) df_final = ( - df.pipe(clean_name) - .pipe(set_column_name, "GCPT") + df.pipe(set_column_name, "GCPT") .pipe(convert_to_short_name) .dropna(subset="Capacity") .assign( @@ -1870,6 +1870,7 @@ def GCPT(raw=False, update=False, config=None): {"Fueltype": fueltype_dict, "Technology": technology_dict} ) ) + .pipe(clean_name) .pipe(config_filter, config) ) @@ -1913,8 +1914,7 @@ def GGTPT(raw=False, update=False, config=None): df = df.rename(columns=RENAME_COLUMNS) df_final = ( - df.pipe(clean_name) - .pipe(set_column_name, "GGTPT") + df.pipe(set_column_name, "GGTPT") .pipe(convert_to_short_name) .dropna(subset="Capacity") .assign( @@ -1928,6 +1928,7 @@ def GGTPT(raw=False, update=False, config=None): .assign(Fueltype="Geothermal") .assign(Technology="Steam Turbine") .assign(Set="PP") + .pipe(clean_name) .pipe(config_filter, config) ) return df_final @@ -1980,8 +1981,7 @@ def GWPT(raw=False, update=False, config=None): df = df.rename(columns=RENAME_COLUMNS) df_final = ( - df.pipe(clean_name) - .pipe(set_column_name, "GWPT") + df.pipe(set_column_name, "GWPT") .pipe(convert_to_short_name) .dropna(subset="Capacity") .assign( @@ -1995,6 +1995,7 @@ def GWPT(raw=False, update=False, config=None): .pipe(lambda x: x.replace({"Technology": technology_dict})) .assign(Fueltype="Wind") .assign(Set="PP") + .pipe(clean_name) .pipe(config_filter, config) ) return df_final @@ -2047,8 +2048,7 @@ def GSPT(raw=False, update=False, config=None): df = df.rename(columns=RENAME_COLUMNS) df_final = ( - df.pipe(clean_name) - .pipe(set_column_name, "GSPT") + df.pipe(set_column_name, "GSPT") .pipe(convert_to_short_name) .dropna(subset="Capacity") .assign( @@ -2062,6 +2062,7 @@ def GSPT(raw=False, update=False, config=None): .pipe(lambda x: x.replace({"Technology": technology_dict})) .assign(Fueltype="Solar") .assign(Set="PP") + .pipe(clean_name) .pipe(config_filter, config) ) return df_final @@ -2144,8 +2145,7 @@ def classify_fuel(s): ) df_final = ( - df.pipe(clean_name) - .pipe(set_column_name, "GGPT") + df.pipe(set_column_name, "GGPT") .pipe(convert_to_short_name) .dropna(subset="Capacity") .assign( @@ -2163,6 +2163,7 @@ def classify_fuel(s): .pipe(lambda x: x[df.columns.intersection(config.get("target_columns"))]) .pipe(lambda x: x.replace({"Technology": technology_dict})) .pipe(lambda x: x.replace({"Set": set_dict})) + .pipe(clean_name) .pipe(config_filter, config) ) return df_final @@ -2214,8 +2215,7 @@ def GHPT(raw=False, update=False, config=None): status_list = config["GHPT"].get("status", ["operating"]) # noqa: F841 df = df.rename(columns=RENAME_COLUMNS) df_final = ( - df.pipe(clean_name) - .pipe(set_column_name, "GHPT") + df.pipe(set_column_name, "GHPT") .pipe(convert_to_short_name) .dropna(subset="Capacity") .assign( @@ -2229,6 +2229,7 @@ def GHPT(raw=False, update=False, config=None): .pipe(lambda x: x.replace({"Technology": technology_dict})) .assign(Fueltype="Hydro") .assign(Set="PP") + .pipe(clean_name) .pipe(config_filter, config) ) return df_final diff --git a/powerplantmatching/package_data/config.yaml b/powerplantmatching/package_data/config.yaml index 3892d9e4..447684f3 100644 --- a/powerplantmatching/package_data/config.yaml +++ b/powerplantmatching/package_data/config.yaml @@ -30,14 +30,9 @@ matching_sources: # fully_included_sources, these sources are included even without match to the final dataset fully_included_sources: # Make individual queries for the datasets - - ENTSOE: (Country not in ['Switzerland', 'Ireland', 'Albania', 'Greece', 'Czech Republic', 'Bulgaria', 'United Kingdom', 'Italy', 'Serbia'] and not (Country == 'Spain' and Fueltype == 'Hydro')) or (Fueltype == 'Geothermal') - - JRC: Country not in ['Switzerland', 'Albania', 'United Kingdom', 'Norway'] - - OPSD: Country not in ['Switzerland', 'Italy', 'Spain', 'Norway', 'Austria'] - - GEM: not (Country == 'Germany' and Fueltype in ['Solar', 'Wind']) - - EESI: Fueltype != 'Hydro' and not (Country == 'Germany' and Fueltype == 'Battery') +# these sources skip unit aggregation for fully_included_sources not covered in matching_sources +aggregate_only_matching_sources: - MASTR - - GND - parallel_duke_processes: 16 threads_extend_by_non_matched: 16 @@ -222,7 +217,7 @@ EESI: GND: net_capacity: true reliability_score: 5 - status: ["Shutdown", "Operational", "Planned", "Under Construction", "Decommissioning Completed"] + status: ["Shutdown", "Operational", "Under Construction", "Decommissioning Completed"] url: https://raw.githubusercontent.com/cristianst85/GeoNuclearData/1bc8b4ac106af236902385b87e46c540b4864815/data/csv/denormalized/nuclear_power_plants.csv fn: nuclear_power_plants.csv @@ -562,6 +557,8 @@ target_technologies: - op zee - zeewind clean_name: + fueltypes_with_blocks: + - Nuclear remove_common_words: false # remove words which appear more that 20 times in all entries remove_duplicated_words: true replace: @@ -586,6 +583,9 @@ clean_name: - plant - unit - kraftwerk + - kernkraftwerk + - gemeinschaftskernkraftwerk + - kernkw - kw - hkw - nuclear From 153ef70da424a98ef8798dec7a08b8ed5ec60a22 Mon Sep 17 00:00:00 2001 From: Fabian Neumann Date: Fri, 22 Aug 2025 19:57:02 +0200 Subject: [PATCH 50/68] finetuned source and matching settings --- powerplantmatching/package_data/config.yaml | 29 +++++++++++++-------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/powerplantmatching/package_data/config.yaml b/powerplantmatching/package_data/config.yaml index 447684f3..9b805d49 100644 --- a/powerplantmatching/package_data/config.yaml +++ b/powerplantmatching/package_data/config.yaml @@ -16,20 +16,27 @@ main_query: "Name != '' and (lat >= 30 or lat != lat)" matching_sources: # Make individual queries for the datasets as done in `fully_included_sources` # Queries are combined with `main_query` with an `and` operator - - ENTSOE: Fueltype != 'Solar' - - GEO: Fueltype != 'Solar' - - GPD: Fueltype != 'Solar' - - JRC: Fueltype != 'Solar' - - OPSD: Country != "Spain" and Fueltype not in ['Hard Coal', 'Solar'] - - BEYONDCOAL: Fueltype != 'Solar' - - GEM - - MASTR - - EESI: Fueltype != 'Solar' - - GND: Fueltype != 'Solar' + - ENTSOE: not (Country == 'Germany' and Fueltype == 'Wind') # wind is per turbine rather than park in MASTR and unsuitable for matching + - GEO: Capacity >= 1 and not (Country == 'Germany' and Fueltype == 'Wind') and Fueltype not in ['Oil', 'Nuclear'] and not (Country in ['Bulgaria', 'Slovakia'] and Fueltype == 'Hard Coal') + - GPD: Capacity >= 1 and not (Country == 'Germany' and Fueltype == 'Wind') and not (Country in ['Czechia', 'Bulgaria', 'Romania'] and Fueltype == 'Hard Coal') and Fueltype != 'Nuclear' + - JRC: Capacity >= 1 and not (Country == 'Germany' and Fueltype == 'Wind') # do not match small hydro + - OPSD: not (Country == 'Germany' and Fueltype == 'Wind') and ((Capacity >= 1 and Fueltype != 'Solar') or Capacity >= 3) and not (Country == 'Spain' and Fueltype == 'Hard Coal') and not (Country == 'Italy' and Fueltype == 'Natural Gas') + - BEYONDCOAL + - GEM: Capacity >= 3 and not (Country == 'Germany' and Fueltype == 'Wind') + # do not match units below 1 MW (2 MW for biogas, 3 MW for solar), exclude wind in Germany from any matching + - MASTR: (Fueltype != 'Wind') and ((Fueltype == 'Solar' and Capacity >= 3) or (Fueltype == 'Biogas' and Capacity >= 2) or (Fueltype not in ['Solar', 'Biogas'] and Capacity >= 1)) + - EESI -# fully_included_sources, these sources are included even without match to the final dataset +# # fully_included_sources, these sources are included even without match to the final dataset fully_included_sources: # Make individual queries for the datasets + - GEM: not (Country == 'Germany' and Fueltype in ['Solar', 'Wind']) # wind and solar in Germany is covered by MASTR + - EESI: Fueltype != 'Hydro' and not (Country == 'Germany' and Fueltype == 'Battery') # battery in Germany is covered by MASTR + - MASTR: Capacity >= 0.1 and Fueltype != 'Nuclear' + - OPSD: Country != 'Germany' and Capacity < 1 and Capacity >= 0.1 and Fueltype == 'Hydro'# take small hydro outside Germany from OPSD (highest coverage) + - BEYONDCOAL + - JRC: Country in ['Italy', 'Croatia', 'Serbia', 'Slovakia'] + # these sources skip unit aggregation for fully_included_sources not covered in matching_sources aggregate_only_matching_sources: - MASTR From b7066d4a3a2fd6f3d731b34cefffdea52cea8199 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 22 Aug 2025 17:57:19 +0000 Subject: [PATCH 51/68] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- powerplantmatching/data.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/powerplantmatching/data.py b/powerplantmatching/data.py index 9f63bf84..83bc62d7 100644 --- a/powerplantmatching/data.py +++ b/powerplantmatching/data.py @@ -1763,7 +1763,8 @@ def GNPT(raw=False, update=False, config=None): .pipe(convert_to_short_name) .dropna(subset="Capacity") .assign( - Name=lambda df: df["Name"] + df["Unit Name"].fillna("").apply(lambda x: f" {x}" if x else ""), + Name=lambda df: df["Name"] + + df["Unit Name"].fillna("").apply(lambda x: f" {x}" if x else ""), DateIn=df["DateIn"].apply(pd.to_numeric, errors="coerce"), DateOut=df["DateOut"].apply(pd.to_numeric, errors="coerce"), lat=df["lat"].apply(pd.to_numeric, errors="coerce"), From d446aba70b9b4fe382a63afc8f324a2d7bb76d7e Mon Sep 17 00:00:00 2001 From: Fabian Neumann Date: Fri, 22 Aug 2025 20:02:29 +0200 Subject: [PATCH 52/68] fix typo --- powerplantmatching/cleaning.py | 2 +- powerplantmatching/package_data/config.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/powerplantmatching/cleaning.py b/powerplantmatching/cleaning.py index 2626db91..01ec1aa5 100644 --- a/powerplantmatching/cleaning.py +++ b/powerplantmatching/cleaning.py @@ -492,7 +492,7 @@ def aggregate_units( if ds_name in config.get("aggregate_only_matching_sources", []): for source in config["matching_sources"]: if isinstance(source, dict) and ds_name in source: - query = source[ds_name] + agg_query = source[ds_name] break block_query = None diff --git a/powerplantmatching/package_data/config.yaml b/powerplantmatching/package_data/config.yaml index 9b805d49..ec710b36 100644 --- a/powerplantmatching/package_data/config.yaml +++ b/powerplantmatching/package_data/config.yaml @@ -33,7 +33,7 @@ fully_included_sources: - GEM: not (Country == 'Germany' and Fueltype in ['Solar', 'Wind']) # wind and solar in Germany is covered by MASTR - EESI: Fueltype != 'Hydro' and not (Country == 'Germany' and Fueltype == 'Battery') # battery in Germany is covered by MASTR - MASTR: Capacity >= 0.1 and Fueltype != 'Nuclear' - - OPSD: Country != 'Germany' and Capacity < 1 and Capacity >= 0.1 and Fueltype == 'Hydro'# take small hydro outside Germany from OPSD (highest coverage) + - OPSD: Country != 'Germany' and Capacity < 1 and Capacity >= 0.1 and Fueltype == 'Hydro' # take small hydro outside Germany from OPSD (highest coverage) - BEYONDCOAL - JRC: Country in ['Italy', 'Croatia', 'Serbia', 'Slovakia'] From a3335ea1dea8b4132ca8b73ab75b58f9b1fce107 Mon Sep 17 00:00:00 2001 From: Fabian Neumann Date: Fri, 22 Aug 2025 20:04:25 +0200 Subject: [PATCH 53/68] fix another typo --- powerplantmatching/cleaning.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/powerplantmatching/cleaning.py b/powerplantmatching/cleaning.py index 01ec1aa5..b49a1b3a 100644 --- a/powerplantmatching/cleaning.py +++ b/powerplantmatching/cleaning.py @@ -506,13 +506,9 @@ def aggregate_units( duplicates = pd.concat( [duke(df.query(query), threads=threads) for c in countries] ) - duplicates = pd.concat( - [duke(df.query("Country == @c"), threads=threads) for c in countries] - ) else: query = " and ".join(filter(None, [agg_query, block_query])) duplicates = duke(df.query(query) if query else df, threads=threads) - duplicates = duke(df, threads=threads) df = cliques(df, duplicates) df = df.groupby("grouped").agg(props_for_groups) From 94264f25d78740c349bbf6e1407fc2a7177f1ca9 Mon Sep 17 00:00:00 2001 From: Fabian Neumann Date: Fri, 22 Aug 2025 20:14:23 +0200 Subject: [PATCH 54/68] amend release notes --- doc/release-notes.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/doc/release-notes.rst b/doc/release-notes.rst index e2c2de5f..c5748c13 100644 --- a/doc/release-notes.rst +++ b/doc/release-notes.rst @@ -24,6 +24,12 @@ Upcoming Version * Include mothballed gas, oil and coal power plants. +* Added option to retain blocks for subsets of fuel types (e.g. `clean_name: fueltypes_with_blocks: ['Nuclear']`). + +* For fully included datasets, add option to only aggregate units included in the matching process (e.g. `aggregate_only_matching_sources: ['MASTR']`). + +* Added option for multiprocessing when aggregating units of non-matched power plants (e.g. `threads_extend_by_non_matched: 16`). + * Updating matching logic configuration. From 07cc23cb5881785d79c259a308179e416dfae735 Mon Sep 17 00:00:00 2001 From: Fabian Neumann Date: Sun, 24 Aug 2025 11:05:30 +0200 Subject: [PATCH 55/68] remove zero values from summed non-weighted numeric columns in aggregate_units function --- powerplantmatching/cleaning.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/powerplantmatching/cleaning.py b/powerplantmatching/cleaning.py index b49a1b3a..c5221015 100644 --- a/powerplantmatching/cleaning.py +++ b/powerplantmatching/cleaning.py @@ -526,4 +526,9 @@ def aggregate_units( .reindex(columns=cols) .pipe(set_column_name, ds_name) ) + + # Remove zero values from summed non-weighted numeric columns + numeric_cols = df.select_dtypes(include="number").columns + df[numeric_cols] = df[numeric_cols].where(lambda df: df != 0) + return df From e7cbaedbcd8ad72c185d58f886387c038d9c9e63 Mon Sep 17 00:00:00 2001 From: Fabian Neumann Date: Sun, 24 Aug 2025 11:28:04 +0200 Subject: [PATCH 56/68] add GloHydroRES dataset --- powerplantmatching/cleaning.py | 11 +++- powerplantmatching/data.py | 71 +++++++++++++++++++++ powerplantmatching/package_data/config.yaml | 19 ++++++ 3 files changed, 99 insertions(+), 2 deletions(-) diff --git a/powerplantmatching/cleaning.py b/powerplantmatching/cleaning.py index c5221015..9b0960de 100644 --- a/powerplantmatching/cleaning.py +++ b/powerplantmatching/cleaning.py @@ -146,8 +146,15 @@ def clean_name(df, config=None): name = name.str.replace(pattern, key, regex=True) if config["clean_name"]["remove_duplicated_words"]: - name = name.str.replace(r"\b(\w+)(?:\W\1\b)+", r"\1", regex=True, case=False) - name = name.str.strip().str.title().str.replace(r" +", " ", regex=True) + name = ( + name.str.replace(r"\b(\w+)(?:\W\1\b)+", r"\1", regex=True, case=False) + .str.strip() + .str.replace(r" +", " ", regex=True) + .str.title() + .str.replace(r"\b(\w+)(?:\W\1\b)+", r"\1", regex=True, case=False) + ) + else: + name = name.str.strip().str.title().str.replace(r" +", " ", regex=True) return df.assign(Name=name).sort_values("Name") diff --git a/powerplantmatching/data.py b/powerplantmatching/data.py index 83bc62d7..b8d9118c 100644 --- a/powerplantmatching/data.py +++ b/powerplantmatching/data.py @@ -2685,6 +2685,77 @@ def GND( return df_final +def GHR( + raw=False, + update=False, + config=None, +): + """ + Get the GloHydroRes (GHR) dataset. + + https://www.nature.com/articles/s41597-025-04975-0 + + https://zenodo.org/records/14526360 + + Parameters + ---------- + raw : Boolean, default False + Whether to return the original dataset + update: bool, default False + Whether to update the data from the url. + config : dict, default None + Add custom specific configuration, e.g. + powerplantmatching.config.get_config(target_countries='Italy'), defaults + to powerplantmatching.config.get_config() + """ + + config = get_config() if config is None else config + + fn = get_raw_file("GHR", update=update, config=config) + + df = pd.read_csv(fn) + + if raw: + return df + + RENAME_COLUMNS = { + "ID": "projectID", + "name": "Name", + "country": "Country", + "Latitude": "plant_lat", + "Longitude": "plant_lon", + "plant_type": "Technology", + "dam_height_m": "DamHeight_m", + "year": "DateIn", + } + TECHNOLOGY_MAP = { + "STO": "Reservoir", + "RTO": "Run-Of-River", + "PHS": "Pumped Hydro", + "canal": np.nan, + } + + df_final = ( + df.rename(columns=RENAME_COLUMNS) + .assign( + projectID=lambda df: "GHR-" + df.projectID.astype(str), + Name=lambda df: df.Name.str.split(" - ").str[0].combine_first(df.dam_name), + DateIn=lambda df: pd.to_datetime(df.DateIn).dt.year, + Technology=lambda df: df.Technology.map(TECHNOLOGY_MAP), + Volume_Mm3=lambda df: df.res_vol_km3 * 1e3, + # StorageCapacity_MWh=lambda df: 9.81 * df.dam_height_m * df.Volume_Mm3 * 0.9 / 3.6, + # Duration=lambda df: df.StorageCapacity_MWh / df.Capacity, + Set="PP", + Fueltype="Hydro", + ) + .pipe(clean_name) + .pipe(set_column_name, "GHR") + .pipe(config_filter, config) + ) + + return df_final + + def EXTERNAL_DATABASE(raw=False, update=True, config=None): """ Importer for external custom databases. diff --git a/powerplantmatching/package_data/config.yaml b/powerplantmatching/package_data/config.yaml index ec710b36..4e9515c2 100644 --- a/powerplantmatching/package_data/config.yaml +++ b/powerplantmatching/package_data/config.yaml @@ -26,6 +26,7 @@ matching_sources: # do not match units below 1 MW (2 MW for biogas, 3 MW for solar), exclude wind in Germany from any matching - MASTR: (Fueltype != 'Wind') and ((Fueltype == 'Solar' and Capacity >= 3) or (Fueltype == 'Biogas' and Capacity >= 2) or (Fueltype not in ['Solar', 'Biogas'] and Capacity >= 1)) - EESI + - GHR # # fully_included_sources, these sources are included even without match to the final dataset fully_included_sources: @@ -227,6 +228,10 @@ GND: status: ["Shutdown", "Operational", "Under Construction", "Decommissioning Completed"] url: https://raw.githubusercontent.com/cristianst85/GeoNuclearData/1bc8b4ac106af236902385b87e46c540b4864815/data/csv/denormalized/nuclear_power_plants.csv fn: nuclear_power_plants.csv +GHR: + reliability_score: 4 + fn: GloHydroRes_vs1.csv + url: https://zenodo.org/records/14526360/files/GloHydroRes_vs1.csv # ---------------------------------------------------------------------------- # # Data Structure Config # @@ -591,16 +596,23 @@ clean_name: - unit - kraftwerk - kernkraftwerk + - wehrkraftwerk + - rheinkraftwerk - gemeinschaftskernkraftwerk - kernkw - kw - hkw - nuclear + - hydro - thermal - heizkraftwerk - eolico - project - hydroelectric + - hydropower + - hydroelectrique + - hydraulique + - embassament - pumped - storage - france @@ -617,6 +629,13 @@ clean_name: - bosnia - and - herzegovina + - bulgaria + - generating + - romania + - macedonia + - latvia + - lithuania + - hungary - \w #remove single letters "ss": "ß" From e23667415b165b3e891b543e37e9bc92c0fae84c Mon Sep 17 00:00:00 2001 From: Fabian Neumann Date: Sun, 24 Aug 2025 11:50:08 +0200 Subject: [PATCH 57/68] amend realease notes --- doc/release-notes.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/release-notes.rst b/doc/release-notes.rst index c5748c13..5b9dbbce 100644 --- a/doc/release-notes.rst +++ b/doc/release-notes.rst @@ -12,6 +12,8 @@ Upcoming Version * Added [European Energy Storage Inventory](https://ses.jrc.ec.europa.eu/storage-inventory-maps) dataset as `pm.data.EESI()`. +* Added [GloHydroRES](https://zenodo.org/records/14526360) dataset as `pm.data.GHR()`. + * Updated ENTSOE, BEYONDCOAL, JRC, IRENASTAT and the Global Energy Monitor datasets to the latest versions. * Fix in `pm.data.MASTR()` the distinction of hydro technologies and between offshore and onshore wind. Also read in storage technologies. From e22267895f0d4eda921a78d47c16c9252567454c Mon Sep 17 00:00:00 2001 From: Fabian Neumann Date: Sun, 24 Aug 2025 11:50:55 +0200 Subject: [PATCH 58/68] add unit name in GCPT --- powerplantmatching/data.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/powerplantmatching/data.py b/powerplantmatching/data.py index b8d9118c..12023401 100644 --- a/powerplantmatching/data.py +++ b/powerplantmatching/data.py @@ -1799,7 +1799,7 @@ def GCPT(raw=False, update=False, config=None): config = get_config() if config is None else config fn = get_raw_file("GCPT", update=update, config=config) - df = pd.read_excel(fn, sheet_name="Units", na_values=["not found"]) + df = pd.read_excel(fn, sheet_name="Units", na_values=["not found", "-"]) if raw: return df @@ -1854,6 +1854,8 @@ def GCPT(raw=False, update=False, config=None): .pipe(convert_to_short_name) .dropna(subset="Capacity") .assign( + Name=lambda df: df["Name"] + + df["Unit Name"].fillna("").apply(lambda x: f" {x}" if x else ""), DateIn=df["DateIn"].apply(pd.to_numeric, errors="coerce"), DateOut=df["DateOut"] .apply(pd.to_numeric, errors="coerce") From 60de3dcc10346a7bac0eedc24a34018e6296ca20 Mon Sep 17 00:00:00 2001 From: Fabian Neumann Date: Sun, 24 Aug 2025 14:33:49 +0200 Subject: [PATCH 59/68] report more unit names --- doc/release-notes.rst | 2 ++ powerplantmatching/data.py | 16 +++++++++++++++- powerplantmatching/package_data/config.yaml | 1 + 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/doc/release-notes.rst b/doc/release-notes.rst index 5b9dbbce..7adc6971 100644 --- a/doc/release-notes.rst +++ b/doc/release-notes.rst @@ -26,6 +26,8 @@ Upcoming Version * Include mothballed gas, oil and coal power plants. +* Initially, include unit/block name in power plant name before matching. + * Added option to retain blocks for subsets of fuel types (e.g. `clean_name: fueltypes_with_blocks: ['Nuclear']`). * For fully included datasets, add option to only aggregate units included in the matching process (e.g. `aggregate_only_matching_sources: ['MASTR']`). diff --git a/powerplantmatching/data.py b/powerplantmatching/data.py index 12023401..76821a6d 100644 --- a/powerplantmatching/data.py +++ b/powerplantmatching/data.py @@ -329,6 +329,9 @@ def to_year(ds): res = units.join(ppl.set_index("projectID"), "projectID", rsuffix="_ppl") res["DateIn"] = res.DateIn.fillna(res.DateIn_ppl) + res["Name"] = res.Name + res["Unit_Nbr"].fillna("").apply( + lambda x: f" {x}" if x else "" + ) not_included_ppl = ppl.query("projectID not in @res.projectID") res = pd.concat([res, not_included_ppl]).pipe(set_column_name, "GEO") res = scale_to_net_capacities(res) @@ -1855,7 +1858,7 @@ def GCPT(raw=False, update=False, config=None): .dropna(subset="Capacity") .assign( Name=lambda df: df["Name"] - + df["Unit Name"].fillna("").apply(lambda x: f" {x}" if x else ""), + + df["Unit name"].fillna("").apply(lambda x: f" {x}" if x else ""), DateIn=df["DateIn"].apply(pd.to_numeric, errors="coerce"), DateOut=df["DateOut"] .apply(pd.to_numeric, errors="coerce") @@ -2295,6 +2298,7 @@ def MASTR( "EinheitBetriebsstatus": "Status", "Laengengrad": "lon", "Breitengrad": "lat", + "WEIC": "EIC", } COUNTRY_MAP = { "Deutschland": "Germany", @@ -2308,6 +2312,7 @@ def MASTR( "Energietraeger", "Hauptbrennstoff", "NameStromerzeugungseinheit", + "NameKraftwerksblock", "NameWindpark", "Technologie", ] @@ -2499,6 +2504,15 @@ def MASTR( ).index df_processed.loc[mask, "Set"] = "PP" + df_processed["Name"] = df_processed.apply( + lambda x: f"{x.Name} {x.NameKraftwerksblock.replace(x.Name, '').strip()}" + if x.NameKraftwerksblock + and x.NameKraftwerksblock != x.Name + and x.Fueltype in config["clean_name"]["fueltypes_with_blocks"] + else x.Name, + axis=1, + ) + df_final = ( df_processed.pipe(clean_name) .pipe(set_column_name, "MASTR") diff --git a/powerplantmatching/package_data/config.yaml b/powerplantmatching/package_data/config.yaml index 4e9515c2..5f403784 100644 --- a/powerplantmatching/package_data/config.yaml +++ b/powerplantmatching/package_data/config.yaml @@ -594,6 +594,7 @@ clean_name: - power - plant - unit + - block - kraftwerk - kernkraftwerk - wehrkraftwerk From 8a8d7445b7472275a0a1d71906f2699ff3a7f38b Mon Sep 17 00:00:00 2001 From: Fabian Neumann Date: Sun, 24 Aug 2025 14:34:28 +0200 Subject: [PATCH 60/68] move BEYONDCOAL to unit-level dataset --- powerplantmatching/data.py | 102 +++++++++----------- powerplantmatching/package_data/config.yaml | 4 +- 2 files changed, 50 insertions(+), 56 deletions(-) diff --git a/powerplantmatching/data.py b/powerplantmatching/data.py index 76821a6d..d1949301 100644 --- a/powerplantmatching/data.py +++ b/powerplantmatching/data.py @@ -69,73 +69,67 @@ def BEYONDCOAL(raw=False, update=False, config=None): config = get_config() if config is None else config fn = get_raw_file("BEYONDCOAL", update=update, config=config) - df = pd.read_excel(fn, sheet_name="Plant", header=0, skiprows=[0, 2, 3]) - df.set_index("BFF plant ID", drop=False, inplace=True) + df = pd.read_excel( + fn, sheet_name="Unit", header=0, skiprows=[0, 2, 3], na_values=["unknown"] + ) + + df_plant = pd.read_excel( + fn, + sheet_name="Plant", + header=0, + skiprows=[0, 2, 3], + usecols=["BFF plant ID", "Latitude", "Longitude"], + ).set_index("BFF plant ID") + + df["lat"] = df["BFF plant ID"].map(df_plant.Latitude) + df["lon"] = df["BFF plant ID"].map(df_plant.Longitude) if raw: return df - status_list = config["BEYONDCOAL"].get("status", ["Open"]) # noqa - - df_units = pd.read_excel(fn, sheet_name="Unit", header=0, skiprows=[0, 2, 3]) + status_list = config["BEYONDCOAL"].get("status", ["operational"]) # noqa RENAME_COLUMNS = { - "Plant name": "Name", + "Unit name": "Name", "Fuel type": "Fueltype", - "Latitude": "lat", - "Longitude": "lon", - "Commissioning year of first unit": "DateIn", - "(Announced) Retirement year of last unit": "DateOut", - "Coal capacity open": "Capacity", - "Plant status\n(gross)": "status", - "BFF plant ID": "projectID", + "Commissioning year": "DateIn", + "Unit status\n(detailed)": "status", + "BFF unit ID": "projectID", } - phaseout_col = "Covered by country phase-out? [if yes: country phase-out year]" - df_units[phaseout_col] = pd.to_numeric(df_units[phaseout_col], errors="coerce") - unit_phaseout = df_units.groupby("BFF plant ID")[phaseout_col].max() - - # plant-level does not contain CHP information - def get_dominant_type(group): - type_capacity = group.groupby("Unit type")["Capacity"].sum() - return ( - "CHP" - if type_capacity.get("chp", 0) > type_capacity.get("conventional", 0) - else "PP" - ) - - unit_set = df_units.groupby("BFF plant ID").apply( - get_dominant_type, include_groups=False - ) - - # for retired plants - unit_capacity = df_units.groupby("BFF plant ID").Capacity.sum() + SET_MAP = { + "chp": "CHP", + "conventional": "PP", + "industrial": "CHP", + "heat": "CHP", + } with pd.option_context("future.no_silent_downcasting", True): - df = ( - df.rename(columns=RENAME_COLUMNS) - .query("status in @status_list") - .assign( - DateOut=lambda df: df.rename(columns=RENAME_COLUMNS) - .DateOut.replace({"After 2030": np.nan, "By 2030": 2030}) - .astype(float) - .combine_first(unit_phaseout), - projectID=lambda df: "BEYOND-" + df.projectID, - Fueltype=lambda df: df.Fueltype.str.title(), - Set=unit_set, - Technology=np.nan, - Capacity=lambda df: df.Capacity.add( - df["Coal capacity under construction"], fill_value=0 - ).combine_first(unit_capacity), - ) - .pipe(scale_to_net_capacities) - .pipe(clean_name) - .pipe(convert_to_short_name) - .pipe(set_column_name, "BEYONDCOAL") - .pipe(config_filter, config) + phaseout_col = "Covered by country phase-out? [if yes: country phase-out year]" + date_out = ( + df["(Announced) Retirement year"] + .replace({"After 2030": np.nan, "By 2030": 2030}) + .astype(float) + .combine_first(pd.to_numeric(df[phaseout_col], errors="coerce")) ) - return df + df_final = ( + df.rename(columns=RENAME_COLUMNS) + .query("status in @status_list") + .assign( + DateOut=date_out, + projectID=lambda df: "BEYOND-" + df.projectID, + Fueltype=lambda df: df.Fueltype.str.title(), + Set=lambda df: df["Unit type"].map(SET_MAP), + Technology=np.nan, + ) + .pipe(clean_name) + .pipe(convert_to_short_name) + .pipe(set_column_name, "BEYONDCOAL") + .pipe(config_filter, config) + ) + + return df_final def OPSD( diff --git a/powerplantmatching/package_data/config.yaml b/powerplantmatching/package_data/config.yaml index 5f403784..42c12e0f 100644 --- a/powerplantmatching/package_data/config.yaml +++ b/powerplantmatching/package_data/config.yaml @@ -58,9 +58,9 @@ BNETZA: url: https://www.bundesnetzagentur.de/SharedDocs/Downloads/DE/Sachgebiete/Energie/Unternehmen_Institutionen/Versorgungssicherheit/Erzeugungskapazitaeten/Kraftwerksliste/Kraftwerksliste_2019_1.xlsx;jsessionid=17E419F28D025C7DD9FC6E2BEB3D088F?__blob=publicationFile&v=2 BEYONDCOAL: net_capacity: false - aggregated_units: true + aggregated_units: false reliability_score: 4 - status: ["Construction", "Open", "Planned", "Retired"] + status: ["construction", "operational", "no longer coal", "retired", "standby", "deactivated", "retrofitting"] fn: 2025-07-24-BeyondFossilFuels-Europe_Coal_Plants_Database.xlsx url: https://beyondfossilfuels.org/wp-content/uploads/2025/07/2025-07-24-BeyondFossilFuels-Europe_Coal_Plants_Database.xlsx IRENA: From 2afa44ae7073258b12fce3324dff36e683cae866 Mon Sep 17 00:00:00 2001 From: Fabian Neumann Date: Mon, 25 Aug 2025 17:50:34 +0200 Subject: [PATCH 61/68] Update powerplantmatching/cleaning.py Co-authored-by: Johannes HAMPP <42553970+euronion@users.noreply.github.com> --- powerplantmatching/cleaning.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/powerplantmatching/cleaning.py b/powerplantmatching/cleaning.py index 9b0960de..192f884f 100644 --- a/powerplantmatching/cleaning.py +++ b/powerplantmatching/cleaning.py @@ -399,7 +399,7 @@ def clean_technology(df, generalize_hydros=False): "Nanicl": "NaNiCl", "Caes": "CAES", } - tech = tech.replace(ABBREVIATIONS, regex=True) + tech = tech.replace(ABBREVIATIONS, regex=False) return df.assign(Technology=tech) From b432462543a3f9de30d66abab724c6d8871d6d28 Mon Sep 17 00:00:00 2001 From: Fabian Neumann Date: Mon, 25 Aug 2025 18:11:40 +0200 Subject: [PATCH 62/68] more comments in cleaning --- powerplantmatching/cleaning.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/powerplantmatching/cleaning.py b/powerplantmatching/cleaning.py index 192f884f..ac5c497a 100644 --- a/powerplantmatching/cleaning.py +++ b/powerplantmatching/cleaning.py @@ -118,7 +118,10 @@ def clean_name(df, config=None): pattern = np.atleast_1d(pattern) - # do not remove block numbers for fuel types with blocks + # do not remove block numbers for fuel types with blocks; the regular + # regex [^a-zA-Z] removes non-alphabetical characters; for fueltypes to + # keep, the regex [^a-zA-Z0-9] is used which only removes + # non-alphanumerical characters if len(keep_blocks) > 0 and key == " " and "[^a-zA-Z]" in pattern: base = [rf"\b{p}\b" for p in pattern if p != "[^a-zA-Z]"] pattern_keep = r"(?i)" + "|".join(base + [r"[^a-zA-Z0-9]"]) @@ -128,13 +131,15 @@ def clean_name(df, config=None): pattern_default, key, regex=True ) - # do not remove block letters for fuel types with blocks + # do not remove block letters for fuel types with blocks; the regular + # regex \w would remove standalone letters, this one is skipped for + # fueltypes in mask elif key == "" and "\w" in pattern: pattern_keep = r"(?i)" + "|".join( [rf"\b{p}\b" for p in pattern if p != "\w"] ) pattern_default = r"(?i)" + "|".join( - [rf"\b{p}\b" for p in pattern if p != "\w"] + [rf"\b{p}\b" for p in pattern] ) name.loc[mask] = name.loc[mask].str.replace(pattern_keep, key, regex=True) name.loc[~mask] = name.loc[~mask].str.replace( @@ -145,6 +150,7 @@ def clean_name(df, config=None): pattern = r"(?i)" + "|".join([rf"\b{p}\b" for p in pattern]) name = name.str.replace(pattern, key, regex=True) + # remove duplicated words; second pass necessary for edge cases if config["clean_name"]["remove_duplicated_words"]: name = ( name.str.replace(r"\b(\w+)(?:\W\1\b)+", r"\1", regex=True, case=False) From 2fb3e7c4d4c9e1ec1b51256e2b7242bc362e90ff Mon Sep 17 00:00:00 2001 From: Fabian Neumann Date: Mon, 25 Aug 2025 18:11:59 +0200 Subject: [PATCH 63/68] more comments on matching_sources selection --- powerplantmatching/package_data/config.yaml | 23 +++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/powerplantmatching/package_data/config.yaml b/powerplantmatching/package_data/config.yaml index 42c12e0f..6b4f3bf7 100644 --- a/powerplantmatching/package_data/config.yaml +++ b/powerplantmatching/package_data/config.yaml @@ -16,12 +16,18 @@ main_query: "Name != '' and (lat >= 30 or lat != lat)" matching_sources: # Make individual queries for the datasets as done in `fully_included_sources` # Queries are combined with `main_query` with an `and` operator - - ENTSOE: not (Country == 'Germany' and Fueltype == 'Wind') # wind is per turbine rather than park in MASTR and unsuitable for matching + # capacity filters avoid matching of too small units (which is too time-consuming) + # wind is per turbine rather than park in MASTR and unsuitable for matching + - ENTSOE: not (Country == 'Germany' and Fueltype == 'Wind') + # wind in germany is provided by MASTR, other filters are due to large deviations to other datasets - GEO: Capacity >= 1 and not (Country == 'Germany' and Fueltype == 'Wind') and Fueltype not in ['Oil', 'Nuclear'] and not (Country in ['Bulgaria', 'Slovakia'] and Fueltype == 'Hard Coal') + # wind in germany is provided by MASTR, nuclear is not block-wise, other filters are due to large deviations to other datasets - GPD: Capacity >= 1 and not (Country == 'Germany' and Fueltype == 'Wind') and not (Country in ['Czechia', 'Bulgaria', 'Romania'] and Fueltype == 'Hard Coal') and Fueltype != 'Nuclear' - - JRC: Capacity >= 1 and not (Country == 'Germany' and Fueltype == 'Wind') # do not match small hydro + - JRC: Capacity >= 1 + # wind in germany is provided by MASTR, other filters are due to large deviations to other datasets - OPSD: not (Country == 'Germany' and Fueltype == 'Wind') and ((Capacity >= 1 and Fueltype != 'Solar') or Capacity >= 3) and not (Country == 'Spain' and Fueltype == 'Hard Coal') and not (Country == 'Italy' and Fueltype == 'Natural Gas') - BEYONDCOAL + # wind in germany is provided by MASTR - GEM: Capacity >= 3 and not (Country == 'Germany' and Fueltype == 'Wind') # do not match units below 1 MW (2 MW for biogas, 3 MW for solar), exclude wind in Germany from any matching - MASTR: (Fueltype != 'Wind') and ((Fueltype == 'Solar' and Capacity >= 3) or (Fueltype == 'Biogas' and Capacity >= 2) or (Fueltype not in ['Solar', 'Biogas'] and Capacity >= 1)) @@ -31,16 +37,21 @@ matching_sources: # # fully_included_sources, these sources are included even without match to the final dataset fully_included_sources: # Make individual queries for the datasets - - GEM: not (Country == 'Germany' and Fueltype in ['Solar', 'Wind']) # wind and solar in Germany is covered by MASTR - - EESI: Fueltype != 'Hydro' and not (Country == 'Germany' and Fueltype == 'Battery') # battery in Germany is covered by MASTR + # wind and solar in Germany is covered by MASTR + - GEM: not (Country == 'Germany' and Fueltype in ['Solar', 'Wind']) + # battery in Germany is covered by MASTR + - EESI: Fueltype != 'Hydro' and not (Country == 'Germany' and Fueltype == 'Battery') + # exclude units smaller than 100 kW (low total capacity) and take nuclear from other datasets (good matching) - MASTR: Capacity >= 0.1 and Fueltype != 'Nuclear' - - OPSD: Country != 'Germany' and Capacity < 1 and Capacity >= 0.1 and Fueltype == 'Hydro' # take small hydro outside Germany from OPSD (highest coverage) + # take small hydro outside Germany from OPSD (highest coverage) + - OPSD: Country != 'Germany' and Capacity < 1 and Capacity >= 0.1 and Fueltype == 'Hydro' - BEYONDCOAL + # include this selection of countries as they have poorer coverage in all other datasets - JRC: Country in ['Italy', 'Croatia', 'Serbia', 'Slovakia'] # these sources skip unit aggregation for fully_included_sources not covered in matching_sources aggregate_only_matching_sources: - - MASTR + - MASTR # the matching process of very small units is not efficient parallel_duke_processes: 16 threads_extend_by_non_matched: 16 From bacece0225f08719f5f4df670145c9964052c2e0 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 25 Aug 2025 16:23:45 +0000 Subject: [PATCH 64/68] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- powerplantmatching/cleaning.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/powerplantmatching/cleaning.py b/powerplantmatching/cleaning.py index 9f4bf998..3acf4306 100644 --- a/powerplantmatching/cleaning.py +++ b/powerplantmatching/cleaning.py @@ -127,9 +127,7 @@ def clean_name(df, config=None): pattern_keep = r"(?i)" + "|".join( [rf"\b{p}\b" for p in pattern if p != "\w"] ) - pattern_default = r"(?i)" + "|".join( - [rf"\b{p}\b" for p in pattern] - ) + pattern_default = r"(?i)" + "|".join([rf"\b{p}\b" for p in pattern]) name.loc[mask] = name.loc[mask].str.replace(pattern_keep, key, regex=True) name.loc[~mask] = name.loc[~mask].str.replace( pattern_default, key, regex=True From 4e95e661e5d661285ee161aa5f5c8093c45a8415 Mon Sep 17 00:00:00 2001 From: Fabian Neumann Date: Mon, 25 Aug 2025 18:28:45 +0200 Subject: [PATCH 65/68] markdown release notes --- docs/release-notes.md | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/docs/release-notes.md b/docs/release-notes.md index e6a13c21..43c8e8ef 100644 --- a/docs/release-notes.md +++ b/docs/release-notes.md @@ -11,6 +11,20 @@ SPDX-License-Identifier: MIT * Update Marktstammdatenregister data for Germany from [open-MaStR (February 25, 2025)](https://zenodo.org/records/14783581). * Drop support for Python 3.9, add support for Python 3.13. Minimum required Python version is now 3.10. * Restructure documentation and move to use `mkdocs` for a nicer user experience. +* Added [GeoNuclearData](github.com/cristianst85/GeoNuclearData) dataset as `pm.data.GND()`. +* Added [European Energy Storage Inventory](https://ses.jrc.ec.europa.eu/storage-inventory-maps) dataset as `pm.data.EESI()`. +* Added [GloHydroRES](https://zenodo.org/records/14526360) dataset as `pm.data.GHR()`. +* Updated ENTSOE, BEYONDCOAL, JRC, IRENASTAT and the Global Energy Monitor datasets to the latest versions. +* Fix in `pm.data.MASTR()` the distinction of hydro technologies and between offshore and onshore wind. Also read in storage technologies. +* Improved recognition of CHP power plants. +* In Global Energy Monitor datasets, also read entries below capacity threshold. +* In `pm.data.GCPT()`, add estimate for coal plant efficiency. +* Include mothballed gas, oil and coal power plants. +* Initially, include unit/block name in power plant name before matching. +* Added option to retain blocks for subsets of fuel types (e.g. `clean_name: fueltypes_with_blocks: ['Nuclear']`). +* For fully included datasets, add option to only aggregate units included in the matching process (e.g. `aggregate_only_matching_sources: ['MASTR']`). +* Added option for multiprocessing when aggregating units of non-matched power plants (e.g. `threads_extend_by_non_matched: 16`). +* Updating matching logic configuration. ## [v0.7.1](https://github.com/PyPSA/powerplantmatching/releases/tag/v0.7.1) (30th January 2024) From 02d05a9c9e40267a150e683950a7138d651cec57 Mon Sep 17 00:00:00 2001 From: Fabian Neumann Date: Fri, 5 Sep 2025 17:10:02 +0200 Subject: [PATCH 66/68] fix tests: reduce load --- test/test_cleaning.py | 2 +- test/test_data.py | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/test/test_cleaning.py b/test/test_cleaning.py index dab6c5c0..b4411075 100644 --- a/test/test_cleaning.py +++ b/test/test_cleaning.py @@ -83,7 +83,7 @@ def test_gather_specifications(data): def test_clean_name(data): res = clean_name(data) assert res.Name[0] == "Powerplant" - assert res.Name[1] == "An Hydro Powerplant" + assert res.Name[1] == "An Powerplant" assert res.Name[2] == "Another Powerplant With Whitespaces" assert res.Name[3] == "Coalition" assert res.Name[4] == "Besonders Chp" diff --git a/test/test_data.py b/test/test_data.py index 07d32843..933d0710 100755 --- a/test/test_data.py +++ b/test/test_data.py @@ -54,4 +54,7 @@ def test_url_retrieval(): def test_reduced_retrieval(): - pm.powerplants(reduced=False) + config = pm.get_config() + config["matching_sources"] = ["GEO", "GPD"] + config["fully_included_sources"] = [] + pm.powerplants(reduced=False, config=config) From 73f7996386f4accfc7df4fb9eb33a6df9d0df8c9 Mon Sep 17 00:00:00 2001 From: jensch-dlr <95235501+jensch-dlr@users.noreply.github.com> Date: Wed, 5 Nov 2025 15:28:34 +0100 Subject: [PATCH 67/68] Complement data update 2025 (#267) * updates path to `powerplants.png` * sets "parallel_duke_processes" to false, because otherwise pm does not execute on Windows machines out-of-the-box otherwise * corrects GPD file name * updates and complements GBPT converter to work with "Global-Bioenergy-Power-Tracker-GBPT-V3.xlsx" from September 2025 * fixes GEM_FUNCTIONS typo * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fixes typo in docstring of `gather_fueltype_info()` and `MASTR()` and complements release notes * fixes paths to release-notes.md and contributors.md in PR template --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .github/pull_request_template.md | 4 +-- README.md | 2 +- docs/release-notes.md | 5 ++++ powerplantmatching/cleaning.py | 2 +- powerplantmatching/data.py | 30 ++++++++++++++++----- powerplantmatching/package_data/config.yaml | 4 +-- 6 files changed, 35 insertions(+), 12 deletions(-) diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 641a2829..15e7f292 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -7,5 +7,5 @@ Closes # (if applicable). - [ ] Code changes are sufficiently documented; i.e. new functions contain docstrings and further explanations may be given in `docs`. - [ ] Unit tests for new features were added (if applicable). -- [ ] A note for the release notes `doc/release_notes.md` of the upcoming release is included. -- [ ] I consent to the release of this PR's code under the MIT license and have added my name to the `doc/contributors.md`. +- [ ] A note for the release notes `docs/release_notes.md` of the upcoming release is included. +- [ ] I consent to the release of this PR's code under the MIT license and have added my name to the `docs/contributors.md`. diff --git a/README.md b/README.md index 19ad53a7..843efbe5 100644 --- a/README.md +++ b/README.md @@ -48,7 +48,7 @@ out simulations. ## Map -![powerplants.png](doc/powerplants.png) +![powerplants.png](docs/assets/images/powerplants.png) ## Installation diff --git a/docs/release-notes.md b/docs/release-notes.md index 43c8e8ef..ae1d2e06 100644 --- a/docs/release-notes.md +++ b/docs/release-notes.md @@ -25,6 +25,11 @@ SPDX-License-Identifier: MIT * For fully included datasets, add option to only aggregate units included in the matching process (e.g. `aggregate_only_matching_sources: ['MASTR']`). * Added option for multiprocessing when aggregating units of non-matched power plants (e.g. `threads_extend_by_non_matched: 16`). * Updating matching logic configuration. +* Update GBPT importer to support newer version of the database (from V3 on without sheet "Below Threshold"). +* Corrects GPD file name in `config.yaml`. +* Sets `parallel_duke_processes` to false (instead of 16) to make powerplantmatching executable out-of-the-box also for Windows systems. +* Updates path to `powerplants.png` in README. +* Fixes typo in docstring of `gather_fueltype_info()` (`cleaning.py`) and `MASTR()` (`data.py`). ## [v0.7.1](https://github.com/PyPSA/powerplantmatching/releases/tag/v0.7.1) (30th January 2024) diff --git a/powerplantmatching/cleaning.py b/powerplantmatching/cleaning.py index 3acf4306..432866e1 100644 --- a/powerplantmatching/cleaning.py +++ b/powerplantmatching/cleaning.py @@ -262,7 +262,7 @@ def gather_fueltype_info( Parses in a set of columns for distinct fueltype specifications. This function uses the mappings (key -> regex pattern) given - by the `config` under the section `target_technologies`. + by the `config` under the section `target_fueltypes`. The representative keys are set if any of the columns in `search_col` matches the regex pattern. diff --git a/powerplantmatching/data.py b/powerplantmatching/data.py index c7b0d5c2..a7717560 100644 --- a/powerplantmatching/data.py +++ b/powerplantmatching/data.py @@ -1642,9 +1642,18 @@ def GBPT(raw=False, update=False, config=None): """ config = get_config() if config is None else config fn = get_raw_file("GBPT", update=update, config=config) - large = pd.read_excel(fn, sheet_name="Data") - small = pd.read_excel(fn, sheet_name="Below Threshold") - df = pd.concat([large, small], ignore_index=True) + try: + large = pd.read_excel(fn, sheet_name="Data") + small = pd.read_excel(fn, sheet_name="Below Threshold") + df = pd.concat([large, small], ignore_index=True) + except Exception as e: + if e.args[0] == ("Worksheet named 'Below Threshold' not found"): + logger.info( + 'In newer versions of the dataset, the sheet "Below Threshold" does not exist anymore.' + ) + df = pd.read_excel(fn, sheet_name="Data") + else: + logger.error(e) if raw: return df @@ -1664,12 +1673,20 @@ def GBPT(raw=False, update=False, config=None): fueltype_dict = { # solid biomass "bioenergy: agricultural waste (solids)": "Solid Biomass", + "bioenergy: agricultural waste (solids) [90%]": "Solid Biomass", "bioenergy: agricultural waste (unknown)": "Solid Biomass", "bioenergy: paper mill wastes": "Solid Biomass", "bioenergy: unknown": "Solid Biomass", "bioenergy: wood & other biomass (biocoal)": "Solid Biomass", "bioenergy: wood & other biomass (solids)": "Solid Biomass", "bioenergy: agricultural waste (syngas)": "Solid Biomass", + "bioenergy: wood & other biomass (solids) [95%]": "Solid Biomass", + "bioenergy: wood & other biomass (solids) [92%]": "Solid Biomass", + "bioenergy: wood & other biomass (solids) [80%]": "Solid Biomass", + "bioenergy: wood & other biomass (solids) [75%]": "Solid Biomass", + "bioenergy: wood & other biomass (solids) [60%]": "Solid Biomass", + "bioenergy: wood & other biomass (solids) [51%]": "Solid Biomass", + "bioenergy: wood & other biomass (solids) [50%]": "Solid Biomass", # biogas "bioenergy: agricultural waste (biogas)": "Biogas", "bioenergy: refuse (landfill gas)": "Biogas", @@ -1677,6 +1694,7 @@ def GBPT(raw=False, update=False, config=None): # oil "bioenergy: ethanol": "Oil", "bioenergy: biodiesel": "Oil", + "bioenergy: bio-heavy oil": "Oil", # waste "bioenergy: refuse (municipal and industrial wastes)": "Waste", "bioenergy: refuse (syngas)": "Solid Biomass", @@ -2238,8 +2256,8 @@ def GEM(raw=False, update=False, config=None): Custom configuration, by default None """ - GEMS_FUNTIONS = [GBPT, GGPT, GCPT, GGTPT, GNPT, GSPT, GWPT, GHPT] - data = [f(raw=raw, update=update, config=config) for f in GEMS_FUNTIONS] + GEM_FUNCTIONS = [GBPT, GGPT, GCPT, GGTPT, GNPT, GSPT, GWPT, GHPT] + data = [f(raw=raw, update=update, config=config) for f in GEM_FUNCTIONS] return pd.concat(data, ignore_index=True) @@ -2251,7 +2269,7 @@ def MASTR( """ Get the Marktstammdatenregister (MaStR) dataset. - Provided by the German Federal Network Agency (Bundesnetzagentur / BNetza) and + Provided by the German Federal Network Agency (Bundesnetzagentur / BNetzA) and contains data on Germany, Austria and Switzerland. Parameters diff --git a/powerplantmatching/package_data/config.yaml b/powerplantmatching/package_data/config.yaml index 6b4f3bf7..c7c8327b 100644 --- a/powerplantmatching/package_data/config.yaml +++ b/powerplantmatching/package_data/config.yaml @@ -53,7 +53,7 @@ fully_included_sources: aggregate_only_matching_sources: - MASTR # the matching process of very small units is not efficient -parallel_duke_processes: 16 +parallel_duke_processes: false threads_extend_by_non_matched: 16 matched_data_url: https://raw.githubusercontent.com/PyPSA/powerplantmatching/{tag}/powerplants.csv @@ -108,7 +108,7 @@ GEO_units: fn: global_energy_observatory_ppl_units.csv GPD: reliability_score: 3 - fn: globalpowerplantdatabasev120.zip + fn: globalpowerplantdatabase_v_1_3_0.zip #if outdated, look at http://datasets.wri.org/dataset/globalpowerplantdatabase url: https://wri-dataportal-prod.s3.amazonaws.com/manual/global_power_plant_database_v_1_3.zip WIKIPEDIA: From 51d16b0a33665d297e7eadb13be24438dc892d5e Mon Sep 17 00:00:00 2001 From: Fabian Neumann Date: Wed, 5 Nov 2025 15:53:48 +0100 Subject: [PATCH 68/68] update TUBcloud fileshare links --- powerplantmatching/data.py | 4 ++-- powerplantmatching/package_data/config.yaml | 22 ++++++++++----------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/powerplantmatching/data.py b/powerplantmatching/data.py index a7717560..eb1f8130 100644 --- a/powerplantmatching/data.py +++ b/powerplantmatching/data.py @@ -1647,9 +1647,9 @@ def GBPT(raw=False, update=False, config=None): small = pd.read_excel(fn, sheet_name="Below Threshold") df = pd.concat([large, small], ignore_index=True) except Exception as e: - if e.args[0] == ("Worksheet named 'Below Threshold' not found"): + if "Below Threshold" in e.args[0]: logger.info( - 'In newer versions of the dataset, the sheet "Below Threshold" does not exist anymore.' + "In newer versions of the dataset, the sheet 'Below Threshold' does not exist anymore." ) df = pd.read_excel(fn, sheet_name="Data") else: diff --git a/powerplantmatching/package_data/config.yaml b/powerplantmatching/package_data/config.yaml index c7c8327b..77256e09 100644 --- a/powerplantmatching/package_data/config.yaml +++ b/powerplantmatching/package_data/config.yaml @@ -79,7 +79,7 @@ IRENA: aggregated_units: true fn: IRENASTAT_capacities_2000-2024.csv # compiled from https://pxweb.irena.org/pxweb/en/IRENASTAT/IRENASTAT__Power%20Capacity%20and%20Generation/Country_ELECSTAT_2025_H2_PX.px/ - url: https://tubcloud.tu-berlin.de/s/p2D5E9MLWE8HPHE/download/IRENASTAT_capacities_2000-2024.csv + url: https://tubcloud.tu-berlin.de/s/dDS9erreKPNH4Ey/download/IRENASTAT_capacities_2000-2024.csv CARMA: net_capacity: false reliability_score: 1 @@ -87,7 +87,7 @@ CARMA: fn: Full_CARMA_2009_Dataset_1.csv ENTSOE: reliability_score: 5 - url: https://tubcloud.tu-berlin.de/s/QaHLH38J4A7ZF5m/download/entsoe_transparency_platform_20250820.csv + url: https://tubcloud.tu-berlin.de/s/N7qo3AGyRYZyisS/download/entsoe_transparency_platform_20250820.csv fn: entsoe_transparency_platform_20250820.csv ENTSOE-EIC: url: https://eepublicdownloads.blob.core.windows.net/cio-lio/csv/W_eicCodes.csv @@ -173,7 +173,7 @@ GGPT: reliability_score: 6 status: ["operating", "retired", "construction"] fn: Global-Oil-and-Gas-Plant-Tracker-GOGPT-August-2025.xlsx - url: https://tubcloud.tu-berlin.de/s/aKrt7dyNgazmgAm/download/Global-Oil-and-Gas-Plant-Tracker-GOGPT-August-2025.xlsx + url: https://tubcloud.tu-berlin.de/s/WrmNX5awNJFcXrQ/download/Global-Oil-and-Gas-Plant-Tracker-GOGPT-August-2025.xlsx GEM: # combined data set of all GEM trackers net_capacity: true @@ -183,32 +183,32 @@ GCPT: reliability_score: 6 status: ["operating", "retired", "construction", "mothballed"] fn: Global-Coal-Plant-Tracker-July-2025.xlsx - url: https://tubcloud.tu-berlin.de/s/etMB7qawKNwfgnk/download/Global-Coal-Plant-Tracker-July-2025.xlsx + url: https://tubcloud.tu-berlin.de/s/ijzbscopNTgNB2r/download/Global-Coal-Plant-Tracker-July-2025.xlsx GGTPT: net_capacity: false reliability_score: 6 aggregated_units: false status: ["operating", "retired", "construction", "mothballed"] fn: Geothermal-Power-Tracker-March-2025-Final.xlsx - url: https://tubcloud.tu-berlin.de/s/dNoEsLeGtCWDkoc/download/Geothermal-Power-Tracker-March-2025-Final.xlsx + url: https://tubcloud.tu-berlin.de/s/ypr3eL2K5kckAK4/download/Geothermal-Power-Tracker-March-2025-Final.xlsx GWPT: net_capacity: false reliability_score: 6 status: ["operating", "retired", "construction"] fn: Global-Wind-Power-Tracker-February-2025.xlsx - url: https://tubcloud.tu-berlin.de/s/8NSXSjPmJPXpg4W/download/Global-Wind-Power-Tracker-February-2025.xlsx + url: https://tubcloud.tu-berlin.de/s/L4AssxsisA6ENRb/download/Global-Wind-Power-Tracker-February-2025.xlsx GSPT: net_capacity: false reliability_score: 6 status: ["operating", "retired", "construction"] fn: Global-Solar-Power-Tracker-February-2025.xlsx - url: https://tubcloud.tu-berlin.de/s/7eo4dZXMp6eB3mz/download/Global-Solar-Power-Tracker-February-2025.xlsx + url: https://tubcloud.tu-berlin.de/s/iWZ7j3zsCGfyJ5f/download/Global-Solar-Power-Tracker-February-2025.xlsx GBPT: net_capacity: false reliability_score: 6 status: ["operating", "retired", "construction"] - fn: Global-Bioenergy-Power-Tracker-GBPT-September-2024.xlsx - url: https://tubcloud.tu-berlin.de/s/CzMBKe2rAcsoq7c/download/Global-Bioenergy-Power-Tracker-GBPT-September-2024.xlsx + fn: Global-Bioenergy-Power-Tracker-GBPT-V3.xlsx + url: https://tubcloud.tu-berlin.de/s/ZkaQonLYdakrN75/download/Global-Bioenergy-Power-Tracker-GBPT-V3.xlsx GNPT: net_capacity: false reliability_score: 6 @@ -220,7 +220,7 @@ GHPT: reliability_score: 6 status: ["operating", "retired", "construction"] fn: Global-Hydropower-Tracker-April-2025.xlsx - url: https://tubcloud.tu-berlin.de/s/2xqxRmfP4FKTrLf/download/Global-Hydropower-Tracker-April-2025.xlsx + url: https://tubcloud.tu-berlin.de/s/aDyd3MJWZNgeEH4/download/Global-Hydropower-Tracker-April-2025.xlsx MASTR: net_capacity: true reliability_score: 7 @@ -232,7 +232,7 @@ EESI: reliability_score: 5 status: ["Operational"] # since no start years given fn: european-energy-storage-inventory-20250817-2245.json - url: https://tubcloud.tu-berlin.de/s/RXWgYbYJpePsWAZ/download/european-energy-storage-inventory-20250817-2245.json + url: https://tubcloud.tu-berlin.de/s/5KqMDMZfb2pN3Aw/download/european-energy-storage-inventory-20250817-2245.json GND: net_capacity: true reliability_score: 5