From 8454cfa9a90a0a783905d7390819cf0f64f0bdb0 Mon Sep 17 00:00:00 2001
From: Fabian Neumann <fabian.neumann@outlook.de>
Date: Tue, 19 Aug 2025 09:33:37 +0200
Subject: [PATCH 01/68] update BEYONDCOAL

---
 powerplantmatching/data.py                  | 35 +++++++++++++++------
 powerplantmatching/package_data/config.yaml |  5 +--
 2 files changed, 28 insertions(+), 12 deletions(-)

diff --git a/powerplantmatching/data.py b/powerplantmatching/data.py
index e2fea2c7..089faa3f 100644
--- a/powerplantmatching/data.py
+++ b/powerplantmatching/data.py
@@ -68,11 +68,16 @@ def BEYONDCOAL(raw=False, update=False, config=None):
     config = get_config() if config is None else config
 
     fn = get_raw_file("BEYONDCOAL", update=update, config=config)
-    df = pd.read_excel(fn, sheet_name="Plant", header=[0, 1, 2], skiprows=[3])
+    df = pd.read_excel(fn, sheet_name="Plant", header=0, skiprows=[0,2,3])
+    df.set_index("BFF plant ID", drop=False, inplace=True)
 
     if raw:
         return df
 
+    status_list = config["BEYONDCOAL"].get("status", ["Open"]) # noqa
+
+    df_units = pd.read_excel(fn, sheet_name="Unit", header=0, skiprows=[0,2,3])
+
     RENAME_COLUMNS = {
         "Plant name": "Name",
         "Fuel type": "Fueltype",
@@ -82,24 +87,34 @@ def BEYONDCOAL(raw=False, update=False, config=None):
         "(Announced) Retirement year of last unit": "DateOut",
         "Coal capacity open": "Capacity",
         "Plant status\n(gross)": "status",
-        "EBC plant ID": "projectID",
+        "BFF plant ID": "projectID",
     }
 
     phaseout_col = "Covered by country phase-out? [if yes: country phase-out year]"
+    df_units[phaseout_col] = pd.to_numeric(df_units[phaseout_col], errors='coerce')
+    unit_phaseout = df_units.groupby("BFF plant ID")[phaseout_col].max()
+
+    # plant-level does not contain CHP information
+    def get_dominant_type(group):
+        type_capacity = group.groupby("Unit type")["Capacity"].sum()
+        return "CHP" if type_capacity.get("chp", 0) > type_capacity.get("conventional", 0) else "PP"
+
+    unit_set = df_units.groupby("BFF plant ID").apply(get_dominant_type, include_groups=False)
+    
+    # for retired plants
+    unit_capacity = df_units.groupby("BFF plant ID").Capacity.sum()
 
     df = (
-        df["Plant Data"]
-        .droplevel(1, axis=1)
+        df
         .rename(columns=RENAME_COLUMNS)
-        .query('status != "Cancelled"')
+        .query('status in @status_list')
         .assign(
-            DateOut=lambda df: df.DateOut.fillna(df[phaseout_col]).where(
-                lambda ds: ds <= 8000
-            ),
+            DateOut=lambda df: df.rename(columns=RENAME_COLUMNS).DateOut.replace({"After 2030": np.nan, "By 2030": 2030}).infer_objects(copy=False).combine_first(unit_phaseout),
             projectID=lambda df: "BEYOND-" + df.projectID,
-            Fueltype=lambda df: df.Fueltype.str.title().replace("Unknown", "Other"),
-            Set="PP",
+            Fueltype=lambda df: df.Fueltype.str.title(),
+            Set=unit_set,
             Technology=np.nan,
+            Capacity=lambda df: df.Capacity.add(df["Coal capacity under construction"], fill_value=0).combine_first(unit_capacity),
         )
         .pipe(scale_to_net_capacities)
         .pipe(clean_name)
diff --git a/powerplantmatching/package_data/config.yaml b/powerplantmatching/package_data/config.yaml
index f9b88ebe..b6d41484 100644
--- a/powerplantmatching/package_data/config.yaml
+++ b/powerplantmatching/package_data/config.yaml
@@ -57,8 +57,9 @@ BEYONDCOAL:
   net_capacity: false
   aggregated_units: true
   reliability_score: 6
-  fn: Europe_Beyond_Coal-European_Coal_Database_hc5n.xlsx
-  url: https://beyond-coal.eu/wp-content/uploads/2021/07/2021-04-20_Europe_Beyond_Coal-European_Coal_Database_hc5n.xlsx
+  status: ["Construction", "Open", "Planned", "Retired"]
+  fn: 2025-07-24-BeyondFossilFuels-Europe_Coal_Plants_Database.xlsx
+  url: https://beyondfossilfuels.org/wp-content/uploads/2025/07/2025-07-24-BeyondFossilFuels-Europe_Coal_Plants_Database.xlsx
 IRENA:
   net_capacity: true
   aggregated_units: true

From 6b06a87bead0dc2bb1606b6306918d947059d070 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 19 Aug 2025 07:38:32 +0000
Subject: [PATCH 02/68] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 powerplantmatching/data.py | 34 ++++++++++++++++++++++------------
 1 file changed, 22 insertions(+), 12 deletions(-)

diff --git a/powerplantmatching/data.py b/powerplantmatching/data.py
index 089faa3f..7995fc58 100644
--- a/powerplantmatching/data.py
+++ b/powerplantmatching/data.py
@@ -68,15 +68,15 @@ def BEYONDCOAL(raw=False, update=False, config=None):
     config = get_config() if config is None else config
 
     fn = get_raw_file("BEYONDCOAL", update=update, config=config)
-    df = pd.read_excel(fn, sheet_name="Plant", header=0, skiprows=[0,2,3])
+    df = pd.read_excel(fn, sheet_name="Plant", header=0, skiprows=[0, 2, 3])
     df.set_index("BFF plant ID", drop=False, inplace=True)
 
     if raw:
         return df
 
-    status_list = config["BEYONDCOAL"].get("status", ["Open"]) # noqa
+    status_list = config["BEYONDCOAL"].get("status", ["Open"])  # noqa
 
-    df_units = pd.read_excel(fn, sheet_name="Unit", header=0, skiprows=[0,2,3])
+    df_units = pd.read_excel(fn, sheet_name="Unit", header=0, skiprows=[0, 2, 3])
 
     RENAME_COLUMNS = {
         "Plant name": "Name",
@@ -91,30 +91,40 @@ def BEYONDCOAL(raw=False, update=False, config=None):
     }
 
     phaseout_col = "Covered by country phase-out? [if yes: country phase-out year]"
-    df_units[phaseout_col] = pd.to_numeric(df_units[phaseout_col], errors='coerce')
+    df_units[phaseout_col] = pd.to_numeric(df_units[phaseout_col], errors="coerce")
     unit_phaseout = df_units.groupby("BFF plant ID")[phaseout_col].max()
 
     # plant-level does not contain CHP information
     def get_dominant_type(group):
         type_capacity = group.groupby("Unit type")["Capacity"].sum()
-        return "CHP" if type_capacity.get("chp", 0) > type_capacity.get("conventional", 0) else "PP"
+        return (
+            "CHP"
+            if type_capacity.get("chp", 0) > type_capacity.get("conventional", 0)
+            else "PP"
+        )
+
+    unit_set = df_units.groupby("BFF plant ID").apply(
+        get_dominant_type, include_groups=False
+    )
 
-    unit_set = df_units.groupby("BFF plant ID").apply(get_dominant_type, include_groups=False)
-    
     # for retired plants
     unit_capacity = df_units.groupby("BFF plant ID").Capacity.sum()
 
     df = (
-        df
-        .rename(columns=RENAME_COLUMNS)
-        .query('status in @status_list')
+        df.rename(columns=RENAME_COLUMNS)
+        .query("status in @status_list")
         .assign(
-            DateOut=lambda df: df.rename(columns=RENAME_COLUMNS).DateOut.replace({"After 2030": np.nan, "By 2030": 2030}).infer_objects(copy=False).combine_first(unit_phaseout),
+            DateOut=lambda df: df.rename(columns=RENAME_COLUMNS)
+            .DateOut.replace({"After 2030": np.nan, "By 2030": 2030})
+            .infer_objects(copy=False)
+            .combine_first(unit_phaseout),
             projectID=lambda df: "BEYOND-" + df.projectID,
             Fueltype=lambda df: df.Fueltype.str.title(),
             Set=unit_set,
             Technology=np.nan,
-            Capacity=lambda df: df.Capacity.add(df["Coal capacity under construction"], fill_value=0).combine_first(unit_capacity),
+            Capacity=lambda df: df.Capacity.add(
+                df["Coal capacity under construction"], fill_value=0
+            ).combine_first(unit_capacity),
         )
         .pipe(scale_to_net_capacities)
         .pipe(clean_name)

From 0002ee26c0ebf39c66a3df389141113ac4b0df96 Mon Sep 17 00:00:00 2001
From: Fabian Neumann <fabian.neumann@outlook.de>
Date: Tue, 19 Aug 2025 09:47:17 +0200
Subject: [PATCH 03/68] update JRC

---
 powerplantmatching/package_data/config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/powerplantmatching/package_data/config.yaml b/powerplantmatching/package_data/config.yaml
index b6d41484..d49ed078 100644
--- a/powerplantmatching/package_data/config.yaml
+++ b/powerplantmatching/package_data/config.yaml
@@ -80,7 +80,7 @@ ENTSOE-EIC:
 JRC:
   reliability_score: 4
   fn: jrc-hydro-power-plant-database.csv
-  url: https://raw.githubusercontent.com/energy-modelling-toolkit/hydro-power-database/fd7535c/data/jrc-hydro-power-plant-database.csv
+  url: https://raw.githubusercontent.com/energy-modelling-toolkit/hydro-power-database/27e80f/data/jrc-hydro-power-plant-database.csv
 GEO:
   net_capacity: false
   reliability_score: 3

From cdefa46fe1f3ba2f31c1095e0133838ccad60a64 Mon Sep 17 00:00:00 2001
From: Fabian Neumann <fabian.neumann@outlook.de>
Date: Tue, 19 Aug 2025 09:50:31 +0200
Subject: [PATCH 04/68] update IRENASTAT

---
 powerplantmatching/package_data/config.yaml | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/powerplantmatching/package_data/config.yaml b/powerplantmatching/package_data/config.yaml
index d49ed078..9404ff13 100644
--- a/powerplantmatching/package_data/config.yaml
+++ b/powerplantmatching/package_data/config.yaml
@@ -63,8 +63,9 @@ BEYONDCOAL:
 IRENA:
   net_capacity: true
   aggregated_units: true
-  fn: IRENASTAT_capacities_2000-2023.csv
-  url: https://zenodo.org/records/10952917/files/IRENASTAT_capacities_2000-2023.csv
+  fn: IRENASTAT_capacities_2000-2024.csv
+  # compiled from https://pxweb.irena.org/pxweb/en/IRENASTAT/IRENASTAT__Power%20Capacity%20and%20Generation/Country_ELECSTAT_2025_H2_PX.px/
+  url: https://tubcloud.tu-berlin.de/s/p2D5E9MLWE8HPHE/download/IRENASTAT_capacities_2000-2024.csv
 CARMA:
   net_capacity: false
   reliability_score: 1

From 2043225588bb8b540c193fe685657a5436a90d9c Mon Sep 17 00:00:00 2001
From: Fabian Neumann <fabian.neumann@outlook.de>
Date: Tue, 19 Aug 2025 10:18:39 +0200
Subject: [PATCH 05/68] update GGTPT

---
 powerplantmatching/data.py                  | 6 +++---
 powerplantmatching/package_data/config.yaml | 7 ++++---
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/powerplantmatching/data.py b/powerplantmatching/data.py
index 7995fc58..90f7f401 100644
--- a/powerplantmatching/data.py
+++ b/powerplantmatching/data.py
@@ -1846,11 +1846,11 @@ def GGTPT(raw=False, update=False, config=None):
 
     RENAME_COLUMNS = {
         "Project Name": "Name",
-        "Capacity (MW)": "Capacity",
+        "Unit Capacity (MW)": "Capacity",
         "Latitude": "lat",
         "Longitude": "lon",
-        "Start year": "DateIn",
-        "Retired year": "DateOut",
+        "Start Year": "DateIn",
+        "Retired Year": "DateOut",
         "Country/Area": "Country",
         "GEM unit ID": "projectID",
     }
diff --git a/powerplantmatching/package_data/config.yaml b/powerplantmatching/package_data/config.yaml
index 9404ff13..52b22792 100644
--- a/powerplantmatching/package_data/config.yaml
+++ b/powerplantmatching/package_data/config.yaml
@@ -173,9 +173,10 @@ GCPT:
 GGTPT:
   net_capacity: false
   reliability_score: 4
-  status: ["operating", "retired", "construction"]
-  fn: Geothermal-Power-Tracker-May-2024.xlsx
-  url: https://tubcloud.tu-berlin.de/s/Hz3ZD7YcKnZTs9t/download/Geothermal-Power-Tracker-May-2024.xlsx
+  aggregated_units: false
+  status: ["operating", "retired", "construction", "mothballed"]
+  fn: Geothermal-Power-Tracker-March-2025-Final.xlsx
+  url: https://tubcloud.tu-berlin.de/s/dNoEsLeGtCWDkoc/download/Geothermal-Power-Tracker-March-2025-Final.xlsx
 GWPT:
   net_capacity: false
   reliability_score: 4

From 44371498c8de8276b6f6dcf54a03f11bf93be1ae Mon Sep 17 00:00:00 2001
From: Fabian Neumann <fabian.neumann@outlook.de>
Date: Tue, 19 Aug 2025 10:53:44 +0200
Subject: [PATCH 06/68] update GCPT

---
 powerplantmatching/data.py                  | 23 ++++++++++++---------
 powerplantmatching/package_data/config.yaml |  4 ++--
 2 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/powerplantmatching/data.py b/powerplantmatching/data.py
index 90f7f401..a31c3fc6 100644
--- a/powerplantmatching/data.py
+++ b/powerplantmatching/data.py
@@ -1764,7 +1764,7 @@ def GCPT(raw=False, update=False, config=None):
 
     config = get_config() if config is None else config
     fn = get_raw_file("GCPT", update=update, config=config)
-    df = pd.read_excel(fn, sheet_name="Units")
+    df = pd.read_excel(fn, sheet_name="Units", na_values=["not found"])
 
     if raw:
         return df
@@ -1782,22 +1782,24 @@ def GCPT(raw=False, update=False, config=None):
         "GEM unit/phase ID": "projectID",
     }
     fueltype_dict = {
+        "anthracite": "Hard Coal",
         "bituminous": "Hard Coal",
+        "bituminous with CCS": "Hard Coal",
         "lignite": "Lignite",
-        "unknown": "Hard Coal",
-        "subbituminous": "Hard Coal",
-        "waste coal": "Hard Coal",
-        "anthracite": "Hard Coal",
         "lignite with CCS": "Lignite",
-        "bituminous with CCS": "Hard Coal",
+        "subbituminous": "Hard Coal",
         "subbituminous with CCS": "Hard Coal",
+        "unknown": "Hard Coal",
         "unknown with CCS": "Hard Coal",
+        "waste coal": "Hard Coal",
     }
 
     planned_retirement = df["Planned retirement"].apply(pd.to_numeric, errors="coerce")
 
     status_list = config["GCPT"].get("status", ["operating"])  # noqa: F841
 
+    BTU_PER_KWH = 3412.14
+
     df = df.rename(columns=RENAME_COLUMNS)
     df_final = (
         df.pipe(clean_name)
@@ -1806,16 +1808,17 @@ def GCPT(raw=False, update=False, config=None):
         .dropna(subset="Capacity")
         .assign(
             DateIn=df["DateIn"].apply(pd.to_numeric, errors="coerce"),
-            DateOut=df["DateOut"].apply(pd.to_numeric, errors="coerce"),
+            DateOut=df["DateOut"]
+            .apply(pd.to_numeric, errors="coerce")
+            .combine_first(planned_retirement),
             lat=df["lat"].apply(pd.to_numeric, errors="coerce"),
             lon=df["lon"].apply(pd.to_numeric, errors="coerce"),
+            Set=df["CHP"].replace({"yes": "CHP", "no": "PP"}),
+            Efficiency=BTU_PER_KWH / df["Heat rate (Btu per kWh)"],
         )
-        .assign(DateOut=lambda x: x["DateOut"].combine_first(planned_retirement))
         .query("Status in @status_list")
         .pipe(lambda x: x[df.columns.intersection(config.get("target_columns"))])
         .pipe(lambda x: x.replace({"Fueltype": fueltype_dict}))
-        .pipe(lambda x: x.assign(Technology="Steam Turbine"))
-        .pipe(lambda x: x.assign(Set="PP"))
         .pipe(config_filter, config)
     )
 
diff --git a/powerplantmatching/package_data/config.yaml b/powerplantmatching/package_data/config.yaml
index 52b22792..f961fcab 100644
--- a/powerplantmatching/package_data/config.yaml
+++ b/powerplantmatching/package_data/config.yaml
@@ -168,8 +168,8 @@ GCPT:
   net_capacity: false
   reliability_score: 4
   status: ["operating", "retired", "construction"]
-  fn: Global-Coal-Plant-Tracker-July-2024.xlsx
-  url: https://tubcloud.tu-berlin.de/s/FdyKMZtr2ddRJEd/download/Global-Coal-Plant-Tracker-July-2024.xlsx
+  fn: Global-Coal-Plant-Tracker-July-2025.xlsx
+  url: https://tubcloud.tu-berlin.de/s/etMB7qawKNwfgnk/download/Global-Coal-Plant-Tracker-July-2025.xlsx
 GGTPT:
   net_capacity: false
   reliability_score: 4

From 309b22509e6a0d2569243381b1e79ca6f453d9e9 Mon Sep 17 00:00:00 2001
From: Fabian Neumann <fabian.neumann@outlook.de>
Date: Tue, 19 Aug 2025 12:23:45 +0200
Subject: [PATCH 07/68] update GCPT

---
 powerplantmatching/data.py                  | 50 +++++++++++----------
 powerplantmatching/package_data/config.yaml |  4 +-
 2 files changed, 29 insertions(+), 25 deletions(-)

diff --git a/powerplantmatching/data.py b/powerplantmatching/data.py
index a31c3fc6..4d9ed713 100644
--- a/powerplantmatching/data.py
+++ b/powerplantmatching/data.py
@@ -2028,42 +2028,54 @@ def GGPT(raw=False, update=False, config=None):
     """
     config = get_config() if config is None else config
     fn = get_raw_file("GGPT", update=update, config=config)
-    df = pd.read_excel(fn, sheet_name="Gas & Oil Units")
+    df = pd.read_excel(fn, sheet_name="Gas & Oil Units", na_values=["not found"])
+    df_small = pd.read_excel(fn, sheet_name="sub-threshold units", na_values=["not found"])
+    df = pd.concat([df, df_small], ignore_index=True)
 
     if raw:
         return df
 
     RENAME_COLUMNS = {
         "Plant name": "Name",
-        "Fuel": "Fueltype",
         "Capacity (MW)": "Capacity",
         "Latitude": "lat",
         "Longitude": "lon",
         "Start year": "DateIn",
         "Retired year": "DateOut",
         "CHP": "Set",
+        "Fuel": "Fueltype",
         "GEM location ID": "projectID",
+        "Country/Area": "Country",
+        "Turbine/Engine Technology": "Technology",
     }
 
+    def classify_fuel(s):
+        if s["Fuel classification?"] in ["Gas only", "LNG only"]:
+            return "Natural Gas"
+        elif s["Fuel classification?"] == "Oil only":
+            return "Oil"
+        elif s["Fueltype"].startswith("fossil liquids"):
+            return "Oil"
+        else:
+            return "Natural Gas"
+
     technology_dict = {
-        "GT": "Steam Turbine",
-        "IC": "Steam Turbine",
-        "CC": "CCGT",
-        "GT/IC": "Steam Turbine",
+        "gas turbine": "Steam Turbine",
+        "internal combustion": "Steam Turbine",
+        "combined cycle": "CCGT",
         "ICCC": "CCGT",
         "ISCC": "CCGT",
-        "ST": "Steam Turbine",
+        "steam turbine": "Steam Turbine",
         "AFC": "CCGT",
+        "unknown": np.nan,
     }
 
     set_dict = {
-        "Y": "CHP",
-        "N": "PP",
-        "not found": "PP",
+        "yes": "CHP",
+        "no": "PP",
     }
 
     status_list = config["GGPT"].get("status", ["operating"])  # noqa: F841
-    gas_fuels = ["NG", "LNG", "BU", "LFG", "BG", "BFG", "COG", "CM", "H", "OG"]
 
     df = df.rename(columns=RENAME_COLUMNS)
     df_final = (
@@ -2071,26 +2083,18 @@ def GGPT(raw=False, update=False, config=None):
         .pipe(set_column_name, "GGPT")
         .pipe(convert_to_short_name)
         .dropna(subset="Capacity")
-        .pipe(lambda x: x.query("Capacity != 'not found'"))
         .assign(
             DateIn=df["DateIn"].apply(pd.to_numeric, errors="coerce"),
-            DateOut=df["DateOut"].apply(pd.to_numeric, errors="coerce"),
+            DateOut=df["DateOut"].apply(pd.to_numeric, errors="coerce").combine_first(df["Planned retire"]),
             lat=df["lat"].apply(pd.to_numeric, errors="coerce"),
             lon=df["lon"].apply(pd.to_numeric, errors="coerce"),
-            Capacity=lambda df: pd.to_numeric(df.Capacity, "coerce"),
-            Fueltype=df["Fueltype"].apply(
-                lambda s: (
-                    "Natural Gas"
-                    if any(sub in gas_fuels for sub in s.split("/"))
-                    else "Oil"
-                )
-            ),
+            Capacity=df["Capacity"].apply(pd.to_numeric, errors="coerce"),
+            Fueltype=df.apply(classify_fuel, axis=1),
         )
         .query("Status in @status_list")
         .pipe(lambda x: x[df.columns.intersection(config.get("target_columns"))])
         .pipe(lambda x: x.replace({"Technology": technology_dict}))
-        .pipe(lambda x: x.replace({"Set": set_dict}).fillna({"Set": "PP"}))
-        .assign(Fueltype="Natural Gas")
+        .pipe(lambda x: x.replace({"Set": set_dict}))
         .pipe(config_filter, config)
     )
     return df_final
diff --git a/powerplantmatching/package_data/config.yaml b/powerplantmatching/package_data/config.yaml
index f961fcab..345a20e2 100644
--- a/powerplantmatching/package_data/config.yaml
+++ b/powerplantmatching/package_data/config.yaml
@@ -158,8 +158,8 @@ GGPT:
   net_capacity: false
   reliability_score: 5
   status: ["operating", "retired", "construction"]
-  fn: Global-Oil-and-Gas-Plant-Tracker-GOGPT-February-2024-v4.xlsx
-  url: https://tubcloud.tu-berlin.de/s/Be5arQgT9Z9g8Kp/download/Global-Oil-and-Gas-Plant-Tracker-GOGPT-February-2024-v4.xlsx
+  fn: Global-Oil-and-Gas-Plant-Tracker-GOGPT-August-2025.xlsx
+  url: https://tubcloud.tu-berlin.de/s/aKrt7dyNgazmgAm/download/Global-Oil-and-Gas-Plant-Tracker-GOGPT-August-2025.xlsx
 GEM:
   # combined data set of all GEM trackers
   net_capacity: true

From 65d1c6d5dcaef96a7f7729fcf69733a566f7148b Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 19 Aug 2025 10:23:55 +0000
Subject: [PATCH 08/68] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 powerplantmatching/data.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/powerplantmatching/data.py b/powerplantmatching/data.py
index 4d9ed713..5b7312e6 100644
--- a/powerplantmatching/data.py
+++ b/powerplantmatching/data.py
@@ -2029,7 +2029,9 @@ def GGPT(raw=False, update=False, config=None):
     config = get_config() if config is None else config
     fn = get_raw_file("GGPT", update=update, config=config)
     df = pd.read_excel(fn, sheet_name="Gas & Oil Units", na_values=["not found"])
-    df_small = pd.read_excel(fn, sheet_name="sub-threshold units", na_values=["not found"])
+    df_small = pd.read_excel(
+        fn, sheet_name="sub-threshold units", na_values=["not found"]
+    )
     df = pd.concat([df, df_small], ignore_index=True)
 
     if raw:
@@ -2085,7 +2087,9 @@ def classify_fuel(s):
         .dropna(subset="Capacity")
         .assign(
             DateIn=df["DateIn"].apply(pd.to_numeric, errors="coerce"),
-            DateOut=df["DateOut"].apply(pd.to_numeric, errors="coerce").combine_first(df["Planned retire"]),
+            DateOut=df["DateOut"]
+            .apply(pd.to_numeric, errors="coerce")
+            .combine_first(df["Planned retire"]),
             lat=df["lat"].apply(pd.to_numeric, errors="coerce"),
             lon=df["lon"].apply(pd.to_numeric, errors="coerce"),
             Capacity=df["Capacity"].apply(pd.to_numeric, errors="coerce"),

From 43bef198095590fd7147fb875b753df6ebbab879 Mon Sep 17 00:00:00 2001
From: Fabian Neumann <fabian.neumann@outlook.de>
Date: Tue, 19 Aug 2025 13:35:13 +0200
Subject: [PATCH 09/68] update GWPT

---
 powerplantmatching/data.py                  | 5 +++++
 powerplantmatching/package_data/config.yaml | 4 ++--
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/powerplantmatching/data.py b/powerplantmatching/data.py
index 5b7312e6..04494c6c 100644
--- a/powerplantmatching/data.py
+++ b/powerplantmatching/data.py
@@ -1900,6 +1900,11 @@ def GWPT(raw=False, update=False, config=None):
     config = get_config() if config is None else config
     fn = get_raw_file("GWPT", update=update, config=config)
     df = pd.read_excel(fn, sheet_name="Data")
+    df_small = pd.read_excel(fn, sheet_name="Below Threshold")
+    df = pd.concat([df, df_small], ignore_index=True)
+
+    if raw:
+        return df
 
     RENAME_COLUMNS = {
         "Project Name": "Name",
diff --git a/powerplantmatching/package_data/config.yaml b/powerplantmatching/package_data/config.yaml
index 345a20e2..2c983fa2 100644
--- a/powerplantmatching/package_data/config.yaml
+++ b/powerplantmatching/package_data/config.yaml
@@ -181,8 +181,8 @@ GWPT:
   net_capacity: false
   reliability_score: 4
   status: ["operating", "retired", "construction"]
-  fn: Global-Wind-Power-Tracker-June-2024.xlsx
-  url: https://tubcloud.tu-berlin.de/s/Z9b3WkAJmSnsrHD/download/Global-Wind-Power-Tracker-June-2024.xlsx
+  fn:  Global-Wind-Power-Tracker-February-2025.xlsx
+  url: https://tubcloud.tu-berlin.de/s/8NSXSjPmJPXpg4W/download/Global-Wind-Power-Tracker-February-2025.xlsx
 GSPT:
   net_capacity: false
   reliability_score: 4

From 88df2b633fa6d28b2131ba66c2ac59e2592c2ad4 Mon Sep 17 00:00:00 2001
From: Fabian Neumann <fabian.neumann@outlook.de>
Date: Tue, 19 Aug 2025 13:44:04 +0200
Subject: [PATCH 10/68] update GSPT

---
 powerplantmatching/package_data/config.yaml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/powerplantmatching/package_data/config.yaml b/powerplantmatching/package_data/config.yaml
index 2c983fa2..0f9bf13e 100644
--- a/powerplantmatching/package_data/config.yaml
+++ b/powerplantmatching/package_data/config.yaml
@@ -186,9 +186,9 @@ GWPT:
 GSPT:
   net_capacity: false
   reliability_score: 4
-  status: ["operating", "construction"]
-  fn: Global-Solar-Power-Tracker-June-2024.xlsx
-  url: https://tubcloud.tu-berlin.de/s/tJ5K5rA2e5XaNjM/download/Global-Solar-Power-Tracker-June-2024.xlsx
+  status: ["operating", "retired", "construction"]
+  fn: Global-Solar-Power-Tracker-February-2025.xlsx
+  url: https://tubcloud.tu-berlin.de/s/7eo4dZXMp6eB3mz/download/Global-Solar-Power-Tracker-February-2025.xlsx
 GBPT:
   net_capacity: false
   reliability_score: 4

From 01b790b0a1a518518464ff977b5f3a4c20c5d0c9 Mon Sep 17 00:00:00 2001
From: Fabian Neumann <fabian.neumann@outlook.de>
Date: Tue, 19 Aug 2025 14:27:35 +0200
Subject: [PATCH 11/68] update GBPT

---
 powerplantmatching/data.py                  | 53 ++++++++++++---------
 powerplantmatching/package_data/config.yaml |  6 +--
 2 files changed, 33 insertions(+), 26 deletions(-)

diff --git a/powerplantmatching/data.py b/powerplantmatching/data.py
index 04494c6c..5e8092dc 100644
--- a/powerplantmatching/data.py
+++ b/powerplantmatching/data.py
@@ -1632,37 +1632,44 @@ def GBPT(raw=False, update=False, config=None):
     """
     config = get_config() if config is None else config
     fn = get_raw_file("GBPT", update=update, config=config)
-    df = pd.read_excel(fn, sheet_name="Data")
+    large = pd.read_excel(fn, sheet_name="Data")
+    small = pd.read_excel(fn, sheet_name="Below Threshold")
+    df = pd.concat([large, small], ignore_index=True)
 
     if raw:
         return df
 
     RENAME_COLUMNS = {
-        "Project name": "Name",
+        "Project Name": "Name",
         "Capacity (MW)": "Capacity",
-        "Fuel 1": "Fueltype",
-        "Operating status": "Status",
+        "Fuel": "Fueltype",
         "Latitude": "lat",
         "Longitude": "lon",
-        "Unit start year": "DateIn",
-        "Retired year": "DateOut",
+        "Start Year": "DateIn",
+        "Retired Year": "DateOut",
+        "Country/Area": "Country",
         "GEM phase ID": "projectID",
     }
+
     fueltype_dict = {
-        "bioenergy - agricultural waste (solids)": "Solid Biomass",
-        "bioenergy - refuse (municipal and industrial wastes)": "Solid Biomass",
-        "bioenergy - refuse (syngas)": "Solid Biomass",
-        "bioenergy - agricultural waste (biogas)": "Biogas",
-        "bioenergy - wood & other biomass (solids)": "Solid Biomass",
-        "bioenergy - ethanol": "Solid Biomass",
-        "bioenergy - paper mill wastes": "Solid Biomass",
-        "bioenergy - biodiesel": "Solid Biomass",
-        "bioenergy - unknown": "Solid Biomass",
-        "bioenergy - wastewater and sewage sludge (solids or biogas)": "Solid Biomass",
-        "bioenergy - refuse (landfill gas)": "Biogas",
-        "bioenergy - agricultural waste (unknown)": "Solid Biomass",
-        "bioenergy - agricultural waste (syngas)": "Solid Biomass",
-        "bioenergy - wood & other biomass (biocoal)": "Solid Biomass",
+        # solid biomass
+        "bioenergy: agricultural waste (solids)": "Solid Biomass",
+        "bioenergy: agricultural waste (unknown)": "Solid Biomass",
+        "bioenergy: paper mill wastes": "Solid Biomass",
+        "bioenergy: unknown": "Solid Biomass",
+        "bioenergy: wood & other biomass (biocoal)": "Solid Biomass",
+        "bioenergy: wood & other biomass (solids)": "Solid Biomass",
+        "bioenergy: agricultural waste (syngas)": "Solid Biomass",
+        # biogas    
+        "bioenergy: agricultural waste (biogas)": "Biogas",
+        "bioenergy: refuse (landfill gas)": "Biogas",
+        "bioenergy: wastewater and sewage sludge (solids or biogas)": "Biogas",
+        # oil
+        "bioenergy: ethanol": "Oil",
+        "bioenergy: biodiesel": "Oil",
+        # waste
+        "bioenergy: refuse (municipal and industrial wastes)": "Waste",
+        "bioenergy: refuse (syngas)": "Solid Biomass",
     }
 
     status_list = config["GBPT"].get("status", ["operating"])  # noqa: F841
@@ -1678,12 +1685,12 @@ def GBPT(raw=False, update=False, config=None):
             DateOut=df["DateOut"].apply(pd.to_numeric, errors="coerce"),
             lat=df["lat"].apply(pd.to_numeric, errors="coerce"),
             lon=df["lon"].apply(pd.to_numeric, errors="coerce"),
+            Fueltype=df["Fueltype"].apply(lambda v: fueltype_dict[v.split(",")[0].strip()])
         )
         .query("Status in @status_list")
         .pipe(lambda x: x[df.columns.intersection(config.get("target_columns"))])
-        .pipe(lambda x: x.replace({"Fueltype": fueltype_dict}))
-        .assign(Technology="Steam Turbine")
-        .assign(Set="PP")
+        .assign(Technology=np.nan)
+        .assign(Set=np.nan)
         .pipe(config_filter, config)
     )
     return df_final
diff --git a/powerplantmatching/package_data/config.yaml b/powerplantmatching/package_data/config.yaml
index 0f9bf13e..c6f94d4a 100644
--- a/powerplantmatching/package_data/config.yaml
+++ b/powerplantmatching/package_data/config.yaml
@@ -193,8 +193,8 @@ GBPT:
   net_capacity: false
   reliability_score: 4
   status: ["operating", "retired", "construction"]
-  fn: Global-Bioenergy-Power-Tracker-GBPT-V1.xlsx
-  url: https://tubcloud.tu-berlin.de/s/F34bbwcxYHL9ZR4/download/Global-Bioenergy-Power-Tracker-GBPT-V1.xlsx
+  fn: Global-Bioenergy-Power-Tracker-GBPT-September-2024.xlsx
+  url: https://tubcloud.tu-berlin.de/s/CzMBKe2rAcsoq7c/download/Global-Bioenergy-Power-Tracker-GBPT-September-2024.xlsx
 GNPT:
   net_capacity: false
   reliability_score: 4
@@ -315,7 +315,7 @@ target_fueltypes:
   Oil: [oil, diesel, mineralölprodukte]
   Geothermal: ""
   Solar: ""
-  Waste: ["abfall.*", "waste"]
+  Waste: ["abfall.*", "waste", "mva", "müll", "afval", "energy recovery", "incineration"]
   Wind: ""
   Battery: [Electro-chemical, battery]
 target_sets:

From cfc8c31d86db8ee7e56826ea978a4d7bbede2d5e Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 19 Aug 2025 12:27:45 +0000
Subject: [PATCH 12/68] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 powerplantmatching/data.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/powerplantmatching/data.py b/powerplantmatching/data.py
index 5e8092dc..a37d58d1 100644
--- a/powerplantmatching/data.py
+++ b/powerplantmatching/data.py
@@ -1660,7 +1660,7 @@ def GBPT(raw=False, update=False, config=None):
         "bioenergy: wood & other biomass (biocoal)": "Solid Biomass",
         "bioenergy: wood & other biomass (solids)": "Solid Biomass",
         "bioenergy: agricultural waste (syngas)": "Solid Biomass",
-        # biogas    
+        # biogas
         "bioenergy: agricultural waste (biogas)": "Biogas",
         "bioenergy: refuse (landfill gas)": "Biogas",
         "bioenergy: wastewater and sewage sludge (solids or biogas)": "Biogas",
@@ -1685,7 +1685,9 @@ def GBPT(raw=False, update=False, config=None):
             DateOut=df["DateOut"].apply(pd.to_numeric, errors="coerce"),
             lat=df["lat"].apply(pd.to_numeric, errors="coerce"),
             lon=df["lon"].apply(pd.to_numeric, errors="coerce"),
-            Fueltype=df["Fueltype"].apply(lambda v: fueltype_dict[v.split(",")[0].strip()])
+            Fueltype=df["Fueltype"].apply(
+                lambda v: fueltype_dict[v.split(",")[0].strip()]
+            ),
         )
         .query("Status in @status_list")
         .pipe(lambda x: x[df.columns.intersection(config.get("target_columns"))])

From 285f9bce0cb003ab027c97fbf12de0b7ac602117 Mon Sep 17 00:00:00 2001
From: Fabian Neumann <fabian.neumann@outlook.de>
Date: Tue, 19 Aug 2025 14:49:39 +0200
Subject: [PATCH 13/68] update GHPT

---
 powerplantmatching/data.py                  | 9 ++++++---
 powerplantmatching/package_data/config.yaml | 8 ++++++--
 2 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/powerplantmatching/data.py b/powerplantmatching/data.py
index a37d58d1..b2b6970b 100644
--- a/powerplantmatching/data.py
+++ b/powerplantmatching/data.py
@@ -2135,7 +2135,9 @@ def GHPT(raw=False, update=False, config=None):
     """
     config = get_config() if config is None else config
     fn = get_raw_file("GHPT", update=update, config=config)
-    df = pd.read_excel(fn, sheet_name="Data")
+    large = pd.read_excel(fn, sheet_name="Data")
+    small = pd.read_excel(fn, sheet_name="Below Threshold")
+    df = pd.concat([large, small], ignore_index=True)
 
     if raw:
         return df
@@ -2148,7 +2150,7 @@ def GHPT(raw=False, update=False, config=None):
         "Start Year": "DateIn",
         "Retired Year": "DateOut",
         "GEM unit ID": "projectID",
-        "Country 1": "Country",
+        "Country/Area 1": "Country",
         "Technology Type": "Technology",
     }
     technology_dict = {
@@ -2156,7 +2158,8 @@ def GHPT(raw=False, update=False, config=None):
         "pumped storage": "Pumped Storage",
         "run-of-river": "Run-Of-River",
         "conventional and pumped storage": "Pumped Storage",
-        "conventional and run-of-river": "Run-Of-River",
+        "conventional and run-of-river": "Reservoir",
+        "unknown": "Run-Of-River",
     }
     status_list = config["GHPT"].get("status", ["operating"])  # noqa: F841
     df = df.rename(columns=RENAME_COLUMNS)
diff --git a/powerplantmatching/package_data/config.yaml b/powerplantmatching/package_data/config.yaml
index c6f94d4a..2442fbfc 100644
--- a/powerplantmatching/package_data/config.yaml
+++ b/powerplantmatching/package_data/config.yaml
@@ -205,8 +205,8 @@ GHPT:
   net_capacity: false
   reliability_score: 4
   status: ["operating", "retired", "construction"]
-  fn: Global-Hydropower-Tracker-April-2024.xlsx
-  url: https://tubcloud.tu-berlin.de/s/sEztyBLdJS5sNHY/download/Global-Hydropower-Tracker-April-2024.xlsx
+  fn: Global-Hydropower-Tracker-April-2025.xlsx
+  url: https://tubcloud.tu-berlin.de/s/2xqxRmfP4FKTrLf/download/Global-Hydropower-Tracker-April-2025.xlsx
 
 MASTR:
   net_capacity: true
@@ -306,9 +306,13 @@ target_fueltypes:
       run-of-river,
       ror,
       hydro,
+      hidro,
       hydroelectric,
       wasserkraft,
       wasser,
+      vannkraft,
+      wodna,
+      idroelettrica,
     ]
   Hard Coal: [coal, coke, steinkohle]
   Lignite: [brown coal, lignite, peat, braunkohle]

From c8861842947dfbb62e5196397a3e263ec8fbda3c Mon Sep 17 00:00:00 2001
From: Fabian Neumann <fabian.neumann@outlook.de>
Date: Tue, 19 Aug 2025 15:07:21 +0200
Subject: [PATCH 14/68] match fueltypes in other languages

---
 powerplantmatching/package_data/config.yaml | 40 +++++++++++++++++----
 1 file changed, 33 insertions(+), 7 deletions(-)

diff --git a/powerplantmatching/package_data/config.yaml b/powerplantmatching/package_data/config.yaml
index 2442fbfc..e1a5615b 100644
--- a/powerplantmatching/package_data/config.yaml
+++ b/powerplantmatching/package_data/config.yaml
@@ -282,9 +282,9 @@ target_fueltypes:
   # given by the list. An empty string results in a regex expression containing only the key.
   # Parsed of representatives at the top may be overwritten by representatives further below.
   Other: ".*"
-  Solid Biomass: [biological, bioenergy, agricultural, wood, biomass, feste biomasse]
-  Biogas: [biogas, biomethan, gasförmige biomasse]
-  Nuclear: [nuclear]
+  Solid Biomass: [biological, bioenergy, agricultural, wood, biomass, feste biomasse, biomasa, biomassa]
+  Biogas: [biogas, biogaz, biomethan, gasförmige biomasse]
+  Nuclear: [nuclear, kernkraft, atomkraft, nucléaire, atomowa, jądrowa, kjernekraft, kern, atom, atoom]
   Natural Gas:
     [
       ccgt,
@@ -297,6 +297,13 @@ target_fueltypes:
       mixed fossil fuels,
       erdgas,
       andere gase,
+      gaz,
+      gaz naturel,
+      gas natural,
+      naturgass,
+      gaz ziemny,
+      gass,
+      aardgas,
     ]
   Hydro:
     [
@@ -307,19 +314,23 @@ target_fueltypes:
       ror,
       hydro,
       hidro,
+      hydraulique,
       hydroelectric,
       wasserkraft,
+      waterkracht,
       wasser,
       vannkraft,
+      vattenkraft,
       wodna,
       idroelettrica,
+      idraulica,
     ]
-  Hard Coal: [coal, coke, steinkohle]
-  Lignite: [brown coal, lignite, peat, braunkohle]
-  Oil: [oil, diesel, mineralölprodukte]
+  Hard Coal: [coal, coke, steinkohle, houille, charbon dur, hulla, carbón duro, carbone duro, antracite, steinkul, węgiel kamienny, steenkool]
+  Lignite: [brown coal, lignite, peat, braunkohle, ligni.*, brunatny, brunkul, bruinkool]
+  Oil: [oil, diesel, mineralölprodukte, fioul, mazout, petrol, olio, olej, carburante, olie]
   Geothermal: ""
   Solar: ""
-  Waste: ["abfall.*", "waste", "mva", "müll", "afval", "energy recovery", "incineration"]
+  Waste: ["abfall.*", "waste", "mva", "müll", "afval", "affald", "energy recovery", "incineration", "reststoffe", "refuse", "déchets", "ordures", "residuos", "basura", "rifiuti", "scarti", "odpady", "śmieci"]
   Wind: ""
   Battery: [Electro-chemical, battery]
 target_sets:
@@ -341,6 +352,21 @@ target_sets:
       power and heat,
       heat and power,
       chp,
+      cogen,
+      heat & power,
+      power & heat,
+      cogeneración,
+      cogenerazione,
+      kogeneracja,
+      combinada calor y electricidad,
+      kraftvarmeverk,
+      kraftvarmeværk,
+      samproduktion,
+      samproduksjon,
+      kvv,
+      wkk,
+      warmtekrachtkoppeling,
+      warmte-krachtcentrale,
     ]
   Store: [battery, storage, store]
 target_technologies:

From 194133330935f1a31a8c1daec6b472520223e858 Mon Sep 17 00:00:00 2001
From: Fabian Neumann <fabian.neumann@outlook.de>
Date: Tue, 19 Aug 2025 15:14:31 +0200
Subject: [PATCH 15/68] update ENTSOE-EIC

---
 powerplantmatching/package_data/config.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/powerplantmatching/package_data/config.yaml b/powerplantmatching/package_data/config.yaml
index e1a5615b..4583d75e 100644
--- a/powerplantmatching/package_data/config.yaml
+++ b/powerplantmatching/package_data/config.yaml
@@ -76,8 +76,8 @@ ENTSOE:
   url: https://raw.githubusercontent.com/pypsa-meets-earth/ppm-data-backup/main/entsoe_powerplants.csv
   fn: entsoe_powerplants.csv
 ENTSOE-EIC:
-  url: https://eepublicdownloads.entsoe.eu/eic-codes-csv/W_eiccodes.csv
-  fn: entsoe_eic_codes.csv
+  url: https://eepublicdownloads.blob.core.windows.net/cio-lio/csv/W_eicCodes.csv
+  fn: W_eicCodes.csv
 JRC:
   reliability_score: 4
   fn: jrc-hydro-power-plant-database.csv

From 53e1a8c30143f329c24998b52e82c03cfcd1423e Mon Sep 17 00:00:00 2001
From: Fabian Neumann <fabian.neumann@outlook.de>
Date: Wed, 20 Aug 2025 11:24:45 +0200
Subject: [PATCH 16/68] update MASTR data processing

---
 powerplantmatching/data.py                    | 80 ++++++++++++++++---
 .../package_data/PLZ_Coords_map.csv           |  8 ++
 powerplantmatching/package_data/config.yaml   | 21 ++---
 3 files changed, 88 insertions(+), 21 deletions(-)

diff --git a/powerplantmatching/data.py b/powerplantmatching/data.py
index b2b6970b..961ad236 100644
--- a/powerplantmatching/data.py
+++ b/powerplantmatching/data.py
@@ -35,7 +35,7 @@
     gather_specifications,
 )
 from .core import _package_data, get_config
-from .heuristics import scale_to_net_capacities
+from .heuristics import scale_to_net_capacities, PLZ_to_LatLon_map
 from .utils import (
     config_filter,
     convert_to_short_name,
@@ -2226,8 +2226,11 @@ def MASTR(
         defaults to powerplantmatching.config.get_config()
 
     """
+
     config = get_config() if config is None else config
 
+    THRESHOLD_KW = 1000  # noqa: F841
+
     RENAME_COLUMNS = {
         "EinheitMastrNummer": "projectID",
         "NameKraftwerk": "Name",
@@ -2251,6 +2254,7 @@ def MASTR(
         "Energietraeger",
         "Hauptbrennstoff",
         "NameStromerzeugungseinheit",
+        "Technologie",
     ]
 
     fn = get_raw_file("MASTR", update=update, config=config)
@@ -2261,6 +2265,7 @@ def MASTR(
         "Hydro": "hydro_raw.csv",
         "Wind": "wind_raw.csv",
         "Solar": "solar_raw.csv",
+        "Storage": "bnetza_mastr_storage_raw.csv"
     }
     data_frames = []
     with ZipFile(fn, "r") as file:
@@ -2272,6 +2277,13 @@ def MASTR(
                         "GeplantesInbetriebnahmedatum",
                         "ThermischeNutzleistung",
                         "KwkMastrNummer",
+                        "Batterietechnologie",
+                        "DatumBeginnVoruebergehendeStilllegung",
+                        "DatumWiederaufnahmeBetrieb",
+                        "Postleitzahl",
+                        "Ort",
+                        "Gemeinde",
+                        "Landkreis",
                     ]
                     target_columns = (
                         target_columns + PARSE_COLUMNS + list(RENAME_COLUMNS.keys())
@@ -2279,32 +2291,57 @@ def MASTR(
                     usecols = available_columns.intersection(target_columns)
                     df = pd.read_csv(file.open(name), usecols=usecols).assign(
                         Filesuffix=fueltype
-                    )
+                    ).query("Nettonennleistung >= @THRESHOLD_KW")
                     data_frames.append(df)
                     break
     df = pd.concat(data_frames).reset_index(drop=True)
 
+    cols = ["NutzbareSpeicherkapazitaet", "VerknuepfteEinheit"]
+    with ZipFile(fn, "r") as file:
+        fn_storage_units = "bnetza_open_mastr_2025-02-09/bnetza_mastr_storage_units_raw.csv"
+        storage_units = pd.read_csv(file.open(fn_storage_units), usecols=cols)
+
+    storage_mwh = (
+        storage_units
+        .assign(VerknuepfteEinheit=lambda x: x.VerknuepfteEinheit.str.split(", "))
+        .assign(n=lambda x: x.VerknuepfteEinheit.str.len())
+        .explode("VerknuepfteEinheit")
+        .assign(NutzbareSpeicherkapazitaet=lambda x: x.NutzbareSpeicherkapazitaet / x.n)
+        .set_index("VerknuepfteEinheit")["NutzbareSpeicherkapazitaet"]
+    )
+
+    df["StorageCapacity_MWh"] = df["EinheitMastrNummer"].map(storage_mwh) / 1000 #  kWh to MWh
+
     if raw:
         return df
 
     status_list = config["MASTR"].get("status", ["In Betrieb"])  # noqa: F841
-    capacity_threshold_kw = 1000
 
-    df = (
+    PLZ_map = PLZ_to_LatLon_map()
+    df.Postleitzahl = df.Postleitzahl.astype(str).str.replace(r'[^0-9]', '0', regex=True).astype(int)
+    df["PLZ_lat"] = df.Postleitzahl.map(PLZ_map.lat)
+    df["PLZ_lon"] = df.Postleitzahl.map(PLZ_map.lon)
+
+    df_processed = (
         df.rename(columns=RENAME_COLUMNS)
         .query("Status in @status_list")
-        .loc[lambda df: df.Capacity > capacity_threshold_kw]
         .assign(
             projectID=lambda df: "MASTR-" + df.projectID,
+            Name=lambda df: df.Name.combine_first(df.NameStromerzeugungseinheit),
             Country=lambda df: df.Country.map(COUNTRY_MAP),
             Capacity=lambda df: df.Capacity / 1e3,  # kW to MW
-            DateIn=lambda df: pd.to_datetime(df.DateIn).dt.year,
-            DateOut=lambda df: pd.to_datetime(df.DateOut).dt.year,
-        )
-        .assign(
-            DateIn=lambda df: df["DateIn"].combine_first(
+            DateIn=lambda df: pd.to_datetime(df.DateIn).dt.year.combine_first(
                 pd.to_datetime(df["GeplantesInbetriebnahmedatum"]).dt.year
             ),
+            DateOut=lambda df: pd.to_datetime(df.DateOut).dt.year.where(
+                df.Status != "Vorübergehend stillgelegt",
+                pd.to_datetime(df["DatumBeginnVoruebergehendeStilllegung"]).dt.year.where(
+                    df["DatumWiederaufnahmeBetrieb"].isna(),
+                    pd.to_datetime(df.DateOut).dt.year
+                ),
+            ),
+            lat=lambda df: df.lat.combine_first(df.PLZ_lat),
+            lon=lambda df: df.lon.combine_first(df.PLZ_lon),
         )
         .pipe(
             gather_specifications,
@@ -2316,12 +2353,33 @@ def MASTR(
                 df["KwkMastrNummer"].isna() & df["ThermischeNutzleistung"].isna(), "CHP"
             ),
         )
+    )
+
+    psw = df_processed.query("Energietraeger == 'Speicher' and Technologie == 'Pumpspeicher'").index
+    df_processed.loc[psw, ["Fueltype", "Technology"]] = ["Hydro", "Pumped Storage"]
+
+    bat = df_processed.query("Energietraeger == 'Speicher' and Technologie == 'Batterie'").index
+    df_processed.loc[bat, ["Fueltype", "Set"]] =  ["Battery", "Store"]
+    BATTERY_MAPPING = {
+        "Blei-Batterie": "Lead",
+        "Lithium-Batterie": "Lithium",
+        "Sonstige Batterie": np.nan,
+        "Hochtemperaturbatterie": "High-Temperature",
+        "Nickel-Cadmium- / Nickel-Metallhydridbatterie": "Nickel"
+    }
+    df_processed.loc[bat, "Technology"] = df_processed.loc[bat, "Batterietechnologie"].map(BATTERY_MAPPING)
+
+    mask = df_processed.query("Energietraeger in ['Hydro', 'Wind', 'Solar', 'Battery'] and Set == 'Store'").index
+    df_processed.loc[mask, "Set"] = "PP"
+
+    df_final = (
+        df_processed
         .pipe(clean_name)
         .pipe(set_column_name, "MASTR")
         .pipe(config_filter, config)
     )
 
-    return df
+    return df_final
 
 
 # deprecated alias for GGPT
diff --git a/powerplantmatching/package_data/PLZ_Coords_map.csv b/powerplantmatching/package_data/PLZ_Coords_map.csv
index 90203fec..f6fc38cb 100644
--- a/powerplantmatching/package_data/PLZ_Coords_map.csv
+++ b/powerplantmatching/package_data/PLZ_Coords_map.csv
@@ -8197,3 +8197,11 @@ PLZ,lon,lat
 65527,8.29686030496,50.1698531547
 32760,8.89250849998,51.9103401848
 65529,8.34783843133,50.256587295
+39628,11.6901777,52.6269331
+23769,11.1340848,54.4687375
+64760,8.9928567,49.540722
+78089,8.3637278,48.0748482
+99331,10.8270088,50.7108384
+98694,10.9888104,50.6365371
+19055,11.4375455,53.655925
+81248,11.4023582,48.1497765
diff --git a/powerplantmatching/package_data/config.yaml b/powerplantmatching/package_data/config.yaml
index 4583d75e..39c91364 100644
--- a/powerplantmatching/package_data/config.yaml
+++ b/powerplantmatching/package_data/config.yaml
@@ -211,7 +211,7 @@ GHPT:
 MASTR:
   net_capacity: true
   reliability_score: 8
-  status: ["In Betrieb", "In Planung", "Endgültig stillgelegt"]
+  status: ["In Betrieb", "In Planung", "Endgültig stillgelegt", "Vorübergehend stillgelegt"]
   fn: bnetza_open_mastr_2025-02-09.zip
   url: https://zenodo.org/records/14783581/files/bnetza_open_mastr_2025-02-09.zip
 
@@ -282,7 +282,7 @@ target_fueltypes:
   # given by the list. An empty string results in a regex expression containing only the key.
   # Parsed of representatives at the top may be overwritten by representatives further below.
   Other: ".*"
-  Solid Biomass: [biological, bioenergy, agricultural, wood, biomass, feste biomasse, biomasa, biomassa]
+  Solid Biomass: [biological, bioenergy, agricultural, wood, holz, biomass, feste biomasse, biomasa, biomassa, feste biogene stoffe, pellets, stroh, straw]
   Biogas: [biogas, biogaz, biomethan, gasförmige biomasse]
   Nuclear: [nuclear, kernkraft, atomkraft, nucléaire, atomowa, jądrowa, kjernekraft, kern, atom, atoom]
   Natural Gas:
@@ -304,6 +304,7 @@ target_fueltypes:
       gaz ziemny,
       gass,
       aardgas,
+      flüssiggas
     ]
   Hydro:
     [
@@ -327,10 +328,10 @@ target_fueltypes:
     ]
   Hard Coal: [coal, coke, steinkohle, houille, charbon dur, hulla, carbón duro, carbone duro, antracite, steinkul, węgiel kamienny, steenkool]
   Lignite: [brown coal, lignite, peat, braunkohle, ligni.*, brunatny, brunkul, bruinkool]
-  Oil: [oil, diesel, mineralölprodukte, fioul, mazout, petrol, olio, olej, carburante, olie]
+  Oil: [oil, diesel, biodiesel, methanol, heizöl, ethanol, mineralölprodukte, öl, fioul, mazout, petrol, olio, olej, carburante, olie]
   Geothermal: ""
   Solar: ""
-  Waste: ["abfall.*", "waste", "mva", "müll", "afval", "affald", "energy recovery", "incineration", "reststoffe", "refuse", "déchets", "ordures", "residuos", "basura", "rifiuti", "scarti", "odpady", "śmieci"]
+  Waste: ["abfall.*", "waste", "mva", "müll", "afval", "affald", "energy recovery", "incineration", "reststoffe", "refuse", "déchets", "ordures", "residuos", "basura", "rifiuti", "scarti", "odpady", "śmieci", "abfälle"]
   Wind: ""
   Battery: [Electro-chemical, battery]
 target_sets:
@@ -368,7 +369,7 @@ target_sets:
       warmtekrachtkoppeling,
       warmte-krachtcentrale,
     ]
-  Store: [battery, storage, store]
+  Store: [battery, storage, store, speicher, pumped]
 target_technologies:
   # Provide a mapping of the keys to a list or a regex expression which are used for parsing.
   # A list will be converted to a regex expression matching all words (case-insensitive)
@@ -378,14 +379,14 @@ target_technologies:
   OCGT: [ocgt, gasturbinen ohne abhitzekessel]
   Steam Turbine: [steam, turbine, kondensationsmaschine, gegendruckmaschine, dampfmotor]
   Combustion Engine: [combustion engine, verbrennungsmotor, stirlingmotor]
-  Run-Of-River: [run-off, run off, run of river, run-of-river, ror, laufwasseranlage]
-  Pumped Storage: [pumped hydro, pumped, speicherwasseranlage]
-  Reservoir: ""
+  Run-Of-River: [run-off, run off, run of river, run-of-river, ror, laufwasseranlage, laufwasser, abwasserkraft, trinkwassersystem, brauchwassersystem, pasada, przepływowa, fluente, elvekraft, doorstroom, älvkraft]
+  Reservoir: [reservoir, réservoir, impoundment, talsperre, stausee, speicherwasseranlage, speicherwasser, barrage, embalse, bacino, zbiornik, magasinverk, damkraftverk, reguleringsmagasin]
+  Pumped Storage: [pumped hydro, pumped, kavernen, bombeo, reversible, reversibel, oberbecken, unterbecken, pompage, pompaggio, pompowa, pumpekraftverk]
   Marine: ""
-  Onshore: ""
-  Offshore: ""
   PV: [pv, photo-voltaic, photo voltaic]
   CSP: ""
+  Onshore: ""
+  Offshore: ""
 clean_name:
   remove_common_words: false # remove words which appear more that 20 times in all entries
   remove_duplicated_words: true

From b8cdfe670186e013a85f9e7046383736d10dae60 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 20 Aug 2025 09:24:59 +0000
Subject: [PATCH 17/68] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 powerplantmatching/data.py | 58 +++++++++++++++++++++++++-------------
 1 file changed, 38 insertions(+), 20 deletions(-)

diff --git a/powerplantmatching/data.py b/powerplantmatching/data.py
index 961ad236..5b927b1a 100644
--- a/powerplantmatching/data.py
+++ b/powerplantmatching/data.py
@@ -35,7 +35,7 @@
     gather_specifications,
 )
 from .core import _package_data, get_config
-from .heuristics import scale_to_net_capacities, PLZ_to_LatLon_map
+from .heuristics import PLZ_to_LatLon_map, scale_to_net_capacities
 from .utils import (
     config_filter,
     convert_to_short_name,
@@ -2265,7 +2265,7 @@ def MASTR(
         "Hydro": "hydro_raw.csv",
         "Wind": "wind_raw.csv",
         "Solar": "solar_raw.csv",
-        "Storage": "bnetza_mastr_storage_raw.csv"
+        "Storage": "bnetza_mastr_storage_raw.csv",
     }
     data_frames = []
     with ZipFile(fn, "r") as file:
@@ -2289,28 +2289,35 @@ def MASTR(
                         target_columns + PARSE_COLUMNS + list(RENAME_COLUMNS.keys())
                     )
                     usecols = available_columns.intersection(target_columns)
-                    df = pd.read_csv(file.open(name), usecols=usecols).assign(
-                        Filesuffix=fueltype
-                    ).query("Nettonennleistung >= @THRESHOLD_KW")
+                    df = (
+                        pd.read_csv(file.open(name), usecols=usecols)
+                        .assign(Filesuffix=fueltype)
+                        .query("Nettonennleistung >= @THRESHOLD_KW")
+                    )
                     data_frames.append(df)
                     break
     df = pd.concat(data_frames).reset_index(drop=True)
 
     cols = ["NutzbareSpeicherkapazitaet", "VerknuepfteEinheit"]
     with ZipFile(fn, "r") as file:
-        fn_storage_units = "bnetza_open_mastr_2025-02-09/bnetza_mastr_storage_units_raw.csv"
+        fn_storage_units = (
+            "bnetza_open_mastr_2025-02-09/bnetza_mastr_storage_units_raw.csv"
+        )
         storage_units = pd.read_csv(file.open(fn_storage_units), usecols=cols)
 
     storage_mwh = (
-        storage_units
-        .assign(VerknuepfteEinheit=lambda x: x.VerknuepfteEinheit.str.split(", "))
+        storage_units.assign(
+            VerknuepfteEinheit=lambda x: x.VerknuepfteEinheit.str.split(", ")
+        )
         .assign(n=lambda x: x.VerknuepfteEinheit.str.len())
         .explode("VerknuepfteEinheit")
         .assign(NutzbareSpeicherkapazitaet=lambda x: x.NutzbareSpeicherkapazitaet / x.n)
         .set_index("VerknuepfteEinheit")["NutzbareSpeicherkapazitaet"]
     )
 
-    df["StorageCapacity_MWh"] = df["EinheitMastrNummer"].map(storage_mwh) / 1000 #  kWh to MWh
+    df["StorageCapacity_MWh"] = (
+        df["EinheitMastrNummer"].map(storage_mwh) / 1000
+    )  #  kWh to MWh
 
     if raw:
         return df
@@ -2318,7 +2325,9 @@ def MASTR(
     status_list = config["MASTR"].get("status", ["In Betrieb"])  # noqa: F841
 
     PLZ_map = PLZ_to_LatLon_map()
-    df.Postleitzahl = df.Postleitzahl.astype(str).str.replace(r'[^0-9]', '0', regex=True).astype(int)
+    df.Postleitzahl = (
+        df.Postleitzahl.astype(str).str.replace(r"[^0-9]", "0", regex=True).astype(int)
+    )
     df["PLZ_lat"] = df.Postleitzahl.map(PLZ_map.lat)
     df["PLZ_lon"] = df.Postleitzahl.map(PLZ_map.lon)
 
@@ -2335,9 +2344,11 @@ def MASTR(
             ),
             DateOut=lambda df: pd.to_datetime(df.DateOut).dt.year.where(
                 df.Status != "Vorübergehend stillgelegt",
-                pd.to_datetime(df["DatumBeginnVoruebergehendeStilllegung"]).dt.year.where(
+                pd.to_datetime(
+                    df["DatumBeginnVoruebergehendeStilllegung"]
+                ).dt.year.where(
                     df["DatumWiederaufnahmeBetrieb"].isna(),
-                    pd.to_datetime(df.DateOut).dt.year
+                    pd.to_datetime(df.DateOut).dt.year,
                 ),
             ),
             lat=lambda df: df.lat.combine_first(df.PLZ_lat),
@@ -2355,26 +2366,33 @@ def MASTR(
         )
     )
 
-    psw = df_processed.query("Energietraeger == 'Speicher' and Technologie == 'Pumpspeicher'").index
+    psw = df_processed.query(
+        "Energietraeger == 'Speicher' and Technologie == 'Pumpspeicher'"
+    ).index
     df_processed.loc[psw, ["Fueltype", "Technology"]] = ["Hydro", "Pumped Storage"]
 
-    bat = df_processed.query("Energietraeger == 'Speicher' and Technologie == 'Batterie'").index
-    df_processed.loc[bat, ["Fueltype", "Set"]] =  ["Battery", "Store"]
+    bat = df_processed.query(
+        "Energietraeger == 'Speicher' and Technologie == 'Batterie'"
+    ).index
+    df_processed.loc[bat, ["Fueltype", "Set"]] = ["Battery", "Store"]
     BATTERY_MAPPING = {
         "Blei-Batterie": "Lead",
         "Lithium-Batterie": "Lithium",
         "Sonstige Batterie": np.nan,
         "Hochtemperaturbatterie": "High-Temperature",
-        "Nickel-Cadmium- / Nickel-Metallhydridbatterie": "Nickel"
+        "Nickel-Cadmium- / Nickel-Metallhydridbatterie": "Nickel",
     }
-    df_processed.loc[bat, "Technology"] = df_processed.loc[bat, "Batterietechnologie"].map(BATTERY_MAPPING)
+    df_processed.loc[bat, "Technology"] = df_processed.loc[
+        bat, "Batterietechnologie"
+    ].map(BATTERY_MAPPING)
 
-    mask = df_processed.query("Energietraeger in ['Hydro', 'Wind', 'Solar', 'Battery'] and Set == 'Store'").index
+    mask = df_processed.query(
+        "Energietraeger in ['Hydro', 'Wind', 'Solar', 'Battery'] and Set == 'Store'"
+    ).index
     df_processed.loc[mask, "Set"] = "PP"
 
     df_final = (
-        df_processed
-        .pipe(clean_name)
+        df_processed.pipe(clean_name)
         .pipe(set_column_name, "MASTR")
         .pipe(config_filter, config)
     )

From ff9da6faa285e6f5735ef8bb5251997fe909e5cc Mon Sep 17 00:00:00 2001
From: Fabian Neumann <fabian.neumann@outlook.de>
Date: Wed, 20 Aug 2025 11:32:08 +0200
Subject: [PATCH 18/68] suggestions for deprecations

---
 powerplantmatching/data.py | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/powerplantmatching/data.py b/powerplantmatching/data.py
index 5b927b1a..9f66c54e 100644
--- a/powerplantmatching/data.py
+++ b/powerplantmatching/data.py
@@ -250,6 +250,10 @@ def OPSD(
     )
 
 
+@deprecated(
+    deprecated_in="0.8.0",
+    details="Deprecated since data is not maintained. Use GEM instead.",
+)
 def GEO(raw=False, update=False, config=None):
     """
     Importer for the GEO database.
@@ -621,6 +625,10 @@ def GPD(raw=False, update=False, config=None, filter_other_dbs=True):
     )
 
 
+@deprecated(
+    deprecated_in="0.8.0",
+    details="Removed since data is not maintained. Use GNPT instead.",
+)
 def WIKIPEDIA(raw=False, update=False, config=None):
     """
     Importer for the WIKIPEDIA nuclear power plant database.
@@ -1150,7 +1158,7 @@ def WEPP(raw=False, config=None):
 
 @deprecated(
     deprecated_in="0.5.0",
-    details="This function is not maintained anymore.",
+    details="This function is not maintained anymore. Use MASTR instead.",
 )
 def UBA(
     raw=False,
@@ -1273,7 +1281,7 @@ def UBA(
 
 @deprecated(
     deprecated_in="0.5.0",
-    details="This function is not maintained anymore.",
+    details="This function is not maintained anymore. Use MASTR instead.",
 )
 def BNETZA(
     raw=False,
@@ -1434,6 +1442,10 @@ def BNETZA(
     )
 
 
+@deprecated(
+    deprecated_in="0.8.0",
+    details="Removed since data is not maintained. Use GSPT, GWPT and GHPT instead.",
+)
 def OPSD_VRE(raw=False, update=False, config=None):
     """
     Importer for the OPSD (Open Power Systems Data) renewables (VRE)
@@ -1482,7 +1494,10 @@ def OPSD_VRE(raw=False, update=False, config=None):
         .pipe(config_filter, config)
     )
 
-
+@deprecated(
+    deprecated_in="0.8.0",
+    details="Removed since data is not maintained. Use GSPT, GWPT and GHPT instead.",
+)
 def OPSD_VRE_country(country, raw=False, update=False, config=None):
     """
     Get country specific data from OPSD for renewables, if available.

From 88f38852108ae56763c1bf2126522b279b909c13 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 20 Aug 2025 09:32:19 +0000
Subject: [PATCH 19/68] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 powerplantmatching/data.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/powerplantmatching/data.py b/powerplantmatching/data.py
index 9f66c54e..ee40bce8 100644
--- a/powerplantmatching/data.py
+++ b/powerplantmatching/data.py
@@ -1494,6 +1494,7 @@ def OPSD_VRE(raw=False, update=False, config=None):
         .pipe(config_filter, config)
     )
 
+
 @deprecated(
     deprecated_in="0.8.0",
     details="Removed since data is not maintained. Use GSPT, GWPT and GHPT instead.",

From 89ae0c5acd0bef58158416d4519c01cc0fe1452b Mon Sep 17 00:00:00 2001
From: Fabian Neumann <fabian.neumann@outlook.de>
Date: Wed, 20 Aug 2025 12:06:01 +0200
Subject: [PATCH 20/68] find a reasonable DateOut for mothballed GCPT and GGPT

---
 powerplantmatching/data.py                  | 15 +++++++++++++--
 powerplantmatching/package_data/config.yaml |  2 +-
 2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/powerplantmatching/data.py b/powerplantmatching/data.py
index ee40bce8..a0f5b7d2 100644
--- a/powerplantmatching/data.py
+++ b/powerplantmatching/data.py
@@ -1821,6 +1821,10 @@ def GCPT(raw=False, update=False, config=None):
 
     planned_retirement = df["Planned retirement"].apply(pd.to_numeric, errors="coerce")
 
+    # conservative assumption that mothballed plants (without fixed retirement
+    # date) went out of operation in 2024
+    mothballed_retirement = df["Status"].apply(lambda x: 2024 if x == "mothballed" else np.nan)
+
     status_list = config["GCPT"].get("status", ["operating"])  # noqa: F841
 
     BTU_PER_KWH = 3412.14
@@ -1835,7 +1839,8 @@ def GCPT(raw=False, update=False, config=None):
             DateIn=df["DateIn"].apply(pd.to_numeric, errors="coerce"),
             DateOut=df["DateOut"]
             .apply(pd.to_numeric, errors="coerce")
-            .combine_first(planned_retirement),
+            .combine_first(planned_retirement)
+            .combine_first(mothballed_retirement),
             lat=df["lat"].apply(pd.to_numeric, errors="coerce"),
             lon=df["lon"].apply(pd.to_numeric, errors="coerce"),
             Set=df["CHP"].replace({"yes": "CHP", "no": "PP"}),
@@ -2110,6 +2115,11 @@ def classify_fuel(s):
     status_list = config["GGPT"].get("status", ["operating"])  # noqa: F841
 
     df = df.rename(columns=RENAME_COLUMNS)
+
+    # conservative assumption that mothballed plants (without fixed retirement
+    # date) went out of operation in 2024
+    mothballed_retirement = df["Status"].apply(lambda x: 2024 if x == "mothballed" else np.nan)
+
     df_final = (
         df.pipe(clean_name)
         .pipe(set_column_name, "GGPT")
@@ -2119,7 +2129,8 @@ def classify_fuel(s):
             DateIn=df["DateIn"].apply(pd.to_numeric, errors="coerce"),
             DateOut=df["DateOut"]
             .apply(pd.to_numeric, errors="coerce")
-            .combine_first(df["Planned retire"]),
+            .combine_first(df["Planned retire"])
+            .combine_first(mothballed_retirement),
             lat=df["lat"].apply(pd.to_numeric, errors="coerce"),
             lon=df["lon"].apply(pd.to_numeric, errors="coerce"),
             Capacity=df["Capacity"].apply(pd.to_numeric, errors="coerce"),
diff --git a/powerplantmatching/package_data/config.yaml b/powerplantmatching/package_data/config.yaml
index 39c91364..b566ca0f 100644
--- a/powerplantmatching/package_data/config.yaml
+++ b/powerplantmatching/package_data/config.yaml
@@ -167,7 +167,7 @@ GEM:
 GCPT:
   net_capacity: false
   reliability_score: 4
-  status: ["operating", "retired", "construction"]
+  status: ["operating", "retired", "construction", "mothballed"]
   fn: Global-Coal-Plant-Tracker-July-2025.xlsx
   url: https://tubcloud.tu-berlin.de/s/etMB7qawKNwfgnk/download/Global-Coal-Plant-Tracker-July-2025.xlsx
 GGTPT:

From 7fea3486a9a020425d348b700423592693f245a2 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 20 Aug 2025 10:06:13 +0000
Subject: [PATCH 21/68] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 powerplantmatching/data.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/powerplantmatching/data.py b/powerplantmatching/data.py
index a0f5b7d2..320cca1b 100644
--- a/powerplantmatching/data.py
+++ b/powerplantmatching/data.py
@@ -1823,7 +1823,9 @@ def GCPT(raw=False, update=False, config=None):
 
     # conservative assumption that mothballed plants (without fixed retirement
     # date) went out of operation in 2024
-    mothballed_retirement = df["Status"].apply(lambda x: 2024 if x == "mothballed" else np.nan)
+    mothballed_retirement = df["Status"].apply(
+        lambda x: 2024 if x == "mothballed" else np.nan
+    )
 
     status_list = config["GCPT"].get("status", ["operating"])  # noqa: F841
 
@@ -2118,7 +2120,9 @@ def classify_fuel(s):
 
     # conservative assumption that mothballed plants (without fixed retirement
     # date) went out of operation in 2024
-    mothballed_retirement = df["Status"].apply(lambda x: 2024 if x == "mothballed" else np.nan)
+    mothballed_retirement = df["Status"].apply(
+        lambda x: 2024 if x == "mothballed" else np.nan
+    )
 
     df_final = (
         df.pipe(clean_name)

From 7dd471ef2fa8e777a2372224eca2728df70ff55e Mon Sep 17 00:00:00 2001
From: Fabian Neumann <fabian.neumann@outlook.de>
Date: Wed, 20 Aug 2025 13:30:32 +0200
Subject: [PATCH 22/68] add European Energy Storage Inventory EESI as source#

---
 powerplantmatching/data.py                  | 111 ++++++++++++++++++++
 powerplantmatching/package_data/config.yaml |  15 ++-
 2 files changed, 124 insertions(+), 2 deletions(-)

diff --git a/powerplantmatching/data.py b/powerplantmatching/data.py
index 320cca1b..3f2cc6e0 100644
--- a/powerplantmatching/data.py
+++ b/powerplantmatching/data.py
@@ -21,6 +21,7 @@
 import os
 from zipfile import ZipFile
 
+import json
 import entsoe
 import numpy as np
 import pandas as pd
@@ -2440,6 +2441,116 @@ def GEM_GGPT(*args, **kwargs):
     return GGPT(*args, **kwargs)
 
 
+def EESI(
+    raw=False,
+    update=False,
+    config=None,
+):
+    """
+    Get the European Energy Storage Inventory (EESI) dataset.
+
+    Provided by the European Commission's Joint Research Centre. Contains
+    chemical, electrochemical, thermal and mechanical energy storage
+    technologies in Europe.
+
+    https://ses.jrc.ec.europa.eu/storage-inventory-maps
+
+    https://ses.jrc.ec.europa.eu/storage-inventory-tool/api/projects
+
+    Parameters
+    ----------
+    raw : Boolean, default False
+        Whether to return the original dataset
+    update: bool, default False
+        Whether to update the data from the url.
+    config : dict, default None
+        Add custom specific configuration, e.g.
+        powerplantmatching.config.get_config(target_countries='Italy'), defaults
+        to powerplantmatching.config.get_config()
+    """
+
+    config = get_config() if config is None else config
+
+    fn = get_raw_file("EESI", update=update, config=config)
+
+    with open(fn) as f:
+        data = json.load(f)
+
+    df = pd.json_normalize(data["projects"], sep="_")
+    float_cols = ["power", "capacity", "facility_latitude", "facility_longitude"]
+    df[float_cols] = df[float_cols].astype(float)
+
+    if raw:
+        return df
+
+    status_list = config["EESI"].get("status", ["Operational"])  # noqa: F841
+
+    RENAME_COLUMNS = {
+        "title": "Name",
+        "power": "Capacity",
+        "capacity": "StorageCapacity_MWh",
+        "facility_latitude": "lat",
+        "facility_longitude": "lon",
+        "facility_country": "Country",
+        "id": "projectID",
+        "technology_name": "Technology",
+        "status": "Status",
+    }
+
+    df_processed = (
+        df.rename(columns=RENAME_COLUMNS)
+        .query("Status in @status_list")
+        .assign(
+            projectID=lambda df: "EESI-" + df.projectID.astype(str),
+            StorageCapacity_MWh=lambda df: df.StorageCapacity_MWh.where(
+                df.StorageCapacity_MWh > 0
+            ),
+            Capacity=lambda df: df.Capacity.where(df.Capacity > 0),
+            Set="Store",
+        )
+    )
+
+    sel = df_processed.query("technology_parentName == 'ElectroChemical'").index
+    df_processed.loc[sel, "Fueltype"] = "Battery"
+
+    sel = df_processed.query("technology_parentName == 'Thermal'").index
+    df_processed.loc[sel, "Fueltype"] = "Heat Storage"
+
+    sel = df_processed.query("technology_parentName == 'Mechanical'").index
+    df_processed.loc[sel, "Fueltype"] = "Mechanical Storage"
+
+    sel = df_processed.query("Technology == 'Power to Gas (H2)'").index
+    df_processed.loc[sel, "Fueltype"] = "Hydrogen Storage"
+
+    sel = df_processed.query("Technology == 'Pumped Hydro Storage (PHS)'").index
+    df_processed.loc[sel, "Fueltype"] = "Hydro"
+
+    TECHNOLOGY_MAPPING = {
+        "Power to Gas (H2)": np.nan,
+        "Lithium-ion batteries": "Li",
+        "Lead Acid batteries": "Pb",
+        "Sodium Sulphur batteries": "NaS",
+        "Redox flow batteries Vanadium": "V",
+        "Sodium Nickel Chloride batteries": "NaNiCl",
+        "Lithium-titanate battery (LTO)": "Li",
+        "Pumped Hydro Storage (PHS)": "Pumped Storage",
+        "Unespecified Storage - mechanical": np.nan,
+        "Compressed Air Energy Storage (CAES)": "CAES",
+        "Flywheel Energy Storage": "Flywheel",
+        "Unspecific Thermal Storage": np.nan,
+        "Molten salts (Sensible Thermal Energy Storage (STES))": "Molten Salt",
+    }
+    df_processed.Technology = df_processed.Technology.map(TECHNOLOGY_MAPPING)
+
+    df_final = (
+        df_processed.pipe(clean_name)
+        .pipe(set_column_name, "EESI")
+        .pipe(config_filter, config)
+    )
+
+    return df_final
+
+
 def EXTERNAL_DATABASE(raw=False, update=True, config=None):
     """
     Importer for external custom databases.
diff --git a/powerplantmatching/package_data/config.yaml b/powerplantmatching/package_data/config.yaml
index b566ca0f..6f9873a8 100644
--- a/powerplantmatching/package_data/config.yaml
+++ b/powerplantmatching/package_data/config.yaml
@@ -25,6 +25,7 @@ matching_sources:
   - WIKIPEDIA: Fueltype != 'Solar'
   - GEM
   - MASTR
+  - EESI
 
 # fully_included_sources, these sources are included even without match to the final dataset
 fully_included_sources:
@@ -35,7 +36,7 @@ fully_included_sources:
   - JRC: Country not in ['Switzerland', 'Albania', 'United Kingdom', 'Norway']
   - OPSD: Country not in ['Switzerland', 'Italy', 'Spain', 'Norway', 'Austria']
   - BEYONDCOAL
-  - GEM: Country != 'Germany' or Fueltype == 'Solar'
+  - EESI: Fueltype != 'Hydro' and not (Country == 'Germany' and Fueltype == 'Battery')
   - MASTR
 
 
@@ -214,6 +215,12 @@ MASTR:
   status: ["In Betrieb", "In Planung", "Endgültig stillgelegt", "Vorübergehend stillgelegt"]
   fn: bnetza_open_mastr_2025-02-09.zip
   url: https://zenodo.org/records/14783581/files/bnetza_open_mastr_2025-02-09.zip
+EESI:
+  net_capacity: true
+  reliability_score: 5
+  status: ["Operational"] # since no start years given
+  fn: european-energy-storage-inventory-20250817-2245.json
+  url: https://tubcloud.tu-berlin.de/s/RXWgYbYJpePsWAZ/download/european-energy-storage-inventory-20250817-2245.json
 
 # ---------------------------------------------------------------------------- #
 #                             Data Structure Config                            #
@@ -334,6 +341,9 @@ target_fueltypes:
   Waste: ["abfall.*", "waste", "mva", "müll", "afval", "affald", "energy recovery", "incineration", "reststoffe", "refuse", "déchets", "ordures", "residuos", "basura", "rifiuti", "scarti", "odpady", "śmieci", "abfälle"]
   Wind: ""
   Battery: [Electro-chemical, battery]
+  Mechanical Storage: ""
+  Heat Storage: ""
+  Hydrogen Storage: ""
 target_sets:
   # Provide a mapping of the keys to a list or a regex expression which are used for parsing.
   # A list will be converted to a regex expression matching all words (case-insensitive)
@@ -489,5 +499,6 @@ fuel_to_color:
   Geothermal: darkgoldenrod
   Battery: purple
   Hydrogen Storage: teal
-  Electro-mechanical: teal
+  Mechanical Storage: darkslategray
+  Heat Storage: darkorange
   Total: gold

From 93ee31ee166bca9e04ed14a5c1cdc9a20df689c4 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 20 Aug 2025 11:30:48 +0000
Subject: [PATCH 23/68] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 powerplantmatching/data.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/powerplantmatching/data.py b/powerplantmatching/data.py
index 3f2cc6e0..190ccae1 100644
--- a/powerplantmatching/data.py
+++ b/powerplantmatching/data.py
@@ -17,11 +17,11 @@
 Collection of power plant data bases and statistical data
 """
 
+import json
 import logging
 import os
 from zipfile import ZipFile
 
-import json
 import entsoe
 import numpy as np
 import pandas as pd

From 6371bb32109ee67f8f472518f183710773e1c13f Mon Sep 17 00:00:00 2001
From: Fabian Neumann <fabian.neumann@outlook.de>
Date: Wed, 20 Aug 2025 14:23:43 +0200
Subject: [PATCH 24/68] unify battery naming across EESI and MASTR

---
 powerplantmatching/data.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/powerplantmatching/data.py b/powerplantmatching/data.py
index 190ccae1..9ab01c5a 100644
--- a/powerplantmatching/data.py
+++ b/powerplantmatching/data.py
@@ -2408,11 +2408,11 @@ def MASTR(
     ).index
     df_processed.loc[bat, ["Fueltype", "Set"]] = ["Battery", "Store"]
     BATTERY_MAPPING = {
-        "Blei-Batterie": "Lead",
-        "Lithium-Batterie": "Lithium",
+        "Blei-Batterie": "Pb",
+        "Lithium-Batterie": "Li",
         "Sonstige Batterie": np.nan,
-        "Hochtemperaturbatterie": "High-Temperature",
-        "Nickel-Cadmium- / Nickel-Metallhydridbatterie": "Nickel",
+        "Hochtemperaturbatterie": "NaS",
+        "Nickel-Cadmium- / Nickel-Metallhydridbatterie": "NiCd",
     }
     df_processed.loc[bat, "Technology"] = df_processed.loc[
         bat, "Batterietechnologie"

From d7c303ff1cbe041b5a624c1983c9501a2d785c24 Mon Sep 17 00:00:00 2001
From: Fabian Neumann <fabian.neumann@outlook.de>
Date: Wed, 20 Aug 2025 15:30:13 +0200
Subject: [PATCH 25/68] add GeoNuclearData (GND) as source

---
 powerplantmatching/data.py                  | 60 +++++++++++++++++++++
 powerplantmatching/package_data/config.yaml | 11 +++-
 2 files changed, 69 insertions(+), 2 deletions(-)

diff --git a/powerplantmatching/data.py b/powerplantmatching/data.py
index 9ab01c5a..e03dda94 100644
--- a/powerplantmatching/data.py
+++ b/powerplantmatching/data.py
@@ -2551,6 +2551,66 @@ def EESI(
     return df_final
 
 
+def GND(
+    raw=False,
+    update=False,
+    config=None,
+):
+    """
+    Get the GeoNuclearData (GND) dataset.
+
+    https://github.com/cristianst85/GeoNuclearData
+
+    Parameters
+    ----------
+    raw : Boolean, default False
+        Whether to return the original dataset
+    update: bool, default False
+        Whether to update the data from the url.
+    config : dict, default None
+        Add custom specific configuration, e.g.
+        powerplantmatching.config.get_config(target_countries='Italy'), defaults
+        to powerplantmatching.config.get_config()
+    """
+
+    config = get_config() if config is None else config
+
+    fn = get_raw_file("GND", update=update, config=config)
+
+    df = pd.read_csv(fn)
+
+    if raw:
+        return df
+    
+    status_list = config["GND"].get("status", ["Operational"])  # noqa: F841
+
+    RENAME_COLUMNS = {
+        "Id": "projectID",
+        "Latitude": "lat",
+        "Longitude": "lon",
+        "OperationalFrom": "DateIn",
+        "OperationalTo": "DateOut",
+    }
+
+    df_final = (
+        df.rename(columns=RENAME_COLUMNS)
+        .query("Status in @status_list")
+        .assign(
+            projectID=lambda df: "GND-" + df.projectID.astype(str),
+            Capacity=lambda df: df.Capacity.where(df.Capacity > 0),
+            DateIn=lambda df: pd.to_datetime(df.DateIn).dt.year,
+            DateOut=lambda df: pd.to_datetime(df.DateOut).dt.year,
+            Set="PP",
+            Fueltype="Nuclear",
+        )
+        .pipe(clean_name)
+        .pipe(set_column_name, "GND")
+        .pipe(config_filter, config)
+    )
+
+    return df_final
+
+
 def EXTERNAL_DATABASE(raw=False, update=True, config=None):
     """
     Importer for external custom databases.
diff --git a/powerplantmatching/package_data/config.yaml b/powerplantmatching/package_data/config.yaml
index 6f9873a8..64430291 100644
--- a/powerplantmatching/package_data/config.yaml
+++ b/powerplantmatching/package_data/config.yaml
@@ -26,6 +26,7 @@ matching_sources:
   - GEM
   - MASTR
   - EESI
+  - GND
 
 # fully_included_sources, these sources are included even without match to the final dataset
 fully_included_sources:
@@ -74,8 +75,8 @@ CARMA:
   fn: Full_CARMA_2009_Dataset_1.csv
 ENTSOE:
   reliability_score: 5
-  url: https://raw.githubusercontent.com/pypsa-meets-earth/ppm-data-backup/main/entsoe_powerplants.csv
-  fn: entsoe_powerplants.csv
+  url: https://tubcloud.tu-berlin.de/s/QaHLH38J4A7ZF5m/download/entsoe_transparency_platform_20250820.csv
+  fn: entsoe_transparency_platform_20250820.csv
 ENTSOE-EIC:
   url: https://eepublicdownloads.blob.core.windows.net/cio-lio/csv/W_eicCodes.csv
   fn: W_eicCodes.csv
@@ -221,6 +222,12 @@ EESI:
   status: ["Operational"] # since no start years given
   fn: european-energy-storage-inventory-20250817-2245.json
   url: https://tubcloud.tu-berlin.de/s/RXWgYbYJpePsWAZ/download/european-energy-storage-inventory-20250817-2245.json
+GND:
+  net_capacity: true
+  reliability_score: 4
+  status: ["Shutdown", "Operational", "Planned", "Under Construction", "Decommissioning Completed"]
+  url: https://raw.githubusercontent.com/cristianst85/GeoNuclearData/1bc8b4ac106af236902385b87e46c540b4864815/data/csv/denormalized/nuclear_power_plants.csv
+  fn: nuclear_power_plants.csv
 
 # ---------------------------------------------------------------------------- #
 #                             Data Structure Config                            #

From 08a4dc8a0ab4e19709b980553c5c2a4b042747f2 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 20 Aug 2025 13:30:43 +0000
Subject: [PATCH 26/68] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 powerplantmatching/data.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/powerplantmatching/data.py b/powerplantmatching/data.py
index e03dda94..33644be6 100644
--- a/powerplantmatching/data.py
+++ b/powerplantmatching/data.py
@@ -2581,7 +2581,7 @@ def GND(
 
     if raw:
         return df
-    
+
     status_list = config["GND"].get("status", ["Operational"])  # noqa: F841
 
     RENAME_COLUMNS = {

From b7ebfc5eab83b94e81e79580a676ce4e8a22a7ac Mon Sep 17 00:00:00 2001
From: Fabian Neumann <fabian.neumann@outlook.de>
Date: Wed, 20 Aug 2025 16:47:27 +0200
Subject: [PATCH 27/68] properly distinguish onshore/offshore wind MASTR

---
 powerplantmatching/data.py                  | 15 ++++++++++++++-
 powerplantmatching/package_data/config.yaml |  4 ++--
 2 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/powerplantmatching/data.py b/powerplantmatching/data.py
index 33644be6..f5fdfabb 100644
--- a/powerplantmatching/data.py
+++ b/powerplantmatching/data.py
@@ -2286,6 +2286,7 @@ def MASTR(
         "Energietraeger",
         "Hauptbrennstoff",
         "NameStromerzeugungseinheit",
+        "NameWindpark",
         "Technologie",
     ]
 
@@ -2316,6 +2317,7 @@ def MASTR(
                         "Ort",
                         "Gemeinde",
                         "Landkreis",
+                        "Lage",
                     ]
                     target_columns = (
                         target_columns + PARSE_COLUMNS + list(RENAME_COLUMNS.keys())
@@ -2368,7 +2370,9 @@ def MASTR(
         .query("Status in @status_list")
         .assign(
             projectID=lambda df: "MASTR-" + df.projectID,
-            Name=lambda df: df.Name.combine_first(df.NameStromerzeugungseinheit),
+            Name=lambda df: df.Name.combine_first(df.NameWindpark).combine_first(
+                df.NameStromerzeugungseinheit
+            ),
             Country=lambda df: df.Country.map(COUNTRY_MAP),
             Capacity=lambda df: df.Capacity / 1e3,  # kW to MW
             DateIn=lambda df: pd.to_datetime(df.DateIn).dt.year.combine_first(
@@ -2418,6 +2422,15 @@ def MASTR(
         bat, "Batterietechnologie"
     ].map(BATTERY_MAPPING)
 
+    WIND_MAPPING = {
+        "Windkraft auf See": "Offshore",
+        "Windkraft an Land": "Onshore",
+    }
+    wind = df_processed.query("Energietraeger == 'Wind'").index
+    df_processed.loc[wind, "Technology"] = df_processed.loc[wind, "Lage"].map(
+        WIND_MAPPING
+    )
+
     mask = df_processed.query(
         "Energietraeger in ['Hydro', 'Wind', 'Solar', 'Battery'] and Set == 'Store'"
     ).index
diff --git a/powerplantmatching/package_data/config.yaml b/powerplantmatching/package_data/config.yaml
index 64430291..20c17dc2 100644
--- a/powerplantmatching/package_data/config.yaml
+++ b/powerplantmatching/package_data/config.yaml
@@ -402,8 +402,8 @@ target_technologies:
   Marine: ""
   PV: [pv, photo-voltaic, photo voltaic]
   CSP: ""
-  Onshore: ""
-  Offshore: ""
+  Onshore: ["onshore", "an land", "terrestre", "landvind", "på land", "op land", "lądowy", "su terra", "en tierra", "à terre"]
+  Offshore: ["offshore", "nearshore", "auf see", "en mer", "marino", "en mar", "in mare", "morski", "havvind", "til havs", "på havet", "op zee", "zeewind"]
 clean_name:
   remove_common_words: false # remove words which appear more that 20 times in all entries
   remove_duplicated_words: true

From 3953da92193837b135bd92730d3fb49c746cbfb8 Mon Sep 17 00:00:00 2001
From: Fabian Neumann <fabian.neumann@outlook.de>
Date: Wed, 20 Aug 2025 16:48:03 +0200
Subject: [PATCH 28/68] revert deprecation warnings

---
 powerplantmatching/data.py | 76 +++++++++++++++++++-------------------
 1 file changed, 39 insertions(+), 37 deletions(-)

diff --git a/powerplantmatching/data.py b/powerplantmatching/data.py
index f5fdfabb..283a45f3 100644
--- a/powerplantmatching/data.py
+++ b/powerplantmatching/data.py
@@ -111,28 +111,30 @@ def get_dominant_type(group):
     # for retired plants
     unit_capacity = df_units.groupby("BFF plant ID").Capacity.sum()
 
-    df = (
-        df.rename(columns=RENAME_COLUMNS)
-        .query("status in @status_list")
-        .assign(
-            DateOut=lambda df: df.rename(columns=RENAME_COLUMNS)
-            .DateOut.replace({"After 2030": np.nan, "By 2030": 2030})
-            .infer_objects(copy=False)
-            .combine_first(unit_phaseout),
-            projectID=lambda df: "BEYOND-" + df.projectID,
-            Fueltype=lambda df: df.Fueltype.str.title(),
-            Set=unit_set,
-            Technology=np.nan,
-            Capacity=lambda df: df.Capacity.add(
-                df["Coal capacity under construction"], fill_value=0
-            ).combine_first(unit_capacity),
+    with pd.option_context("future.no_silent_downcasting", True):
+        df = (
+            df.rename(columns=RENAME_COLUMNS)
+            .query("status in @status_list")
+            .assign(
+                DateOut=lambda df: df.rename(columns=RENAME_COLUMNS)
+                .DateOut.replace({"After 2030": np.nan, "By 2030": 2030})
+                .astype(float)
+                .combine_first(unit_phaseout),
+                projectID=lambda df: "BEYOND-" + df.projectID,
+                Fueltype=lambda df: df.Fueltype.str.title(),
+                Set=unit_set,
+                Technology=np.nan,
+                Capacity=lambda df: df.Capacity.add(
+                    df["Coal capacity under construction"], fill_value=0
+                ).combine_first(unit_capacity),
+            )
+            .pipe(scale_to_net_capacities)
+            .pipe(clean_name)
+            .pipe(convert_to_short_name)
+            .pipe(set_column_name, "BEYONDCOAL")
+            .pipe(config_filter, config)
         )
-        .pipe(scale_to_net_capacities)
-        .pipe(clean_name)
-        .pipe(convert_to_short_name)
-        .pipe(set_column_name, "BEYONDCOAL")
-        .pipe(config_filter, config)
-    )
+
     return df
 
 
@@ -251,10 +253,10 @@ def OPSD(
     )
 
 
-@deprecated(
-    deprecated_in="0.8.0",
-    details="Deprecated since data is not maintained. Use GEM instead.",
-)
+# @deprecated(
+#     deprecated_in="0.8.0",
+#     details="Deprecated since data is not maintained. Use GEM instead.",
+# )
 def GEO(raw=False, update=False, config=None):
     """
     Importer for the GEO database.
@@ -626,10 +628,10 @@ def GPD(raw=False, update=False, config=None, filter_other_dbs=True):
     )
 
 
-@deprecated(
-    deprecated_in="0.8.0",
-    details="Removed since data is not maintained. Use GNPT instead.",
-)
+# @deprecated(
+#     deprecated_in="0.8.0",
+#     details="Removed since data is not maintained. Use GNPT instead.",
+# )
 def WIKIPEDIA(raw=False, update=False, config=None):
     """
     Importer for the WIKIPEDIA nuclear power plant database.
@@ -1443,10 +1445,10 @@ def BNETZA(
     )
 
 
-@deprecated(
-    deprecated_in="0.8.0",
-    details="Removed since data is not maintained. Use GSPT, GWPT and GHPT instead.",
-)
+# @deprecated(
+#     deprecated_in="0.8.0",
+#     details="Removed since data is not maintained. Use GSPT, GWPT and GHPT instead.",
+# )
 def OPSD_VRE(raw=False, update=False, config=None):
     """
     Importer for the OPSD (Open Power Systems Data) renewables (VRE)
@@ -1496,10 +1498,10 @@ def OPSD_VRE(raw=False, update=False, config=None):
     )
 
 
-@deprecated(
-    deprecated_in="0.8.0",
-    details="Removed since data is not maintained. Use GSPT, GWPT and GHPT instead.",
-)
+# @deprecated(
+#     deprecated_in="0.8.0",
+#     details="Removed since data is not maintained. Use GSPT, GWPT and GHPT instead.",
+# )
 def OPSD_VRE_country(country, raw=False, update=False, config=None):
     """
     Get country specific data from OPSD for renewables, if available.

From 1f10f3789170b6d105b38ca18449f34e65369499 Mon Sep 17 00:00:00 2001
From: Fabian Neumann <fabian.neumann@outlook.de>
Date: Wed, 20 Aug 2025 18:38:03 +0200
Subject: [PATCH 29/68] new proposed matching settings

---
 powerplantmatching/package_data/config.yaml | 57 ++++++++++-----------
 1 file changed, 27 insertions(+), 30 deletions(-)

diff --git a/powerplantmatching/package_data/config.yaml b/powerplantmatching/package_data/config.yaml
index 20c17dc2..16154868 100644
--- a/powerplantmatching/package_data/config.yaml
+++ b/powerplantmatching/package_data/config.yaml
@@ -20,25 +20,23 @@ matching_sources:
   - GEO: Fueltype != 'Solar'
   - GPD: Fueltype != 'Solar'
   - JRC: Fueltype != 'Solar'
-  - OPSD: Country != "Spain" and Fueltype != 'Hard Coal' and Fueltype != 'Solar'
+  - OPSD: Country != "Spain" and Fueltype not in ['Hard Coal', 'Solar']
   - BEYONDCOAL: Fueltype != 'Solar'
-  - WIKIPEDIA: Fueltype != 'Solar'
   - GEM
   - MASTR
-  - EESI
-  - GND
+  - EESI: Fueltype != 'Solar'
+  - GND: Fueltype != 'Solar'
 
 # fully_included_sources, these sources are included even without match to the final dataset
 fully_included_sources:
   # Make individual queries for the datasets
   - ENTSOE: (Country not in ['Switzerland', 'Ireland', 'Albania', 'Greece', 'Czech Republic', 'Bulgaria', 'United Kingdom', 'Italy', 'Serbia'] and not (Country == 'Spain' and Fueltype == 'Hydro')) or (Fueltype == 'Geothermal')
-  - GEO: (Country == 'Spain' and Fueltype == 'Natural Gas')
-  - GPD: Country in ['Finland', 'Spain']
   - JRC: Country not in ['Switzerland', 'Albania', 'United Kingdom', 'Norway']
   - OPSD: Country not in ['Switzerland', 'Italy', 'Spain', 'Norway', 'Austria']
-  - BEYONDCOAL
+  - GEM: not (Country == 'Germany' and Fueltype in ['Solar', 'Wind'])
   - EESI: Fueltype != 'Hydro' and not (Country == 'Germany' and Fueltype == 'Battery')
   - MASTR
+  - GND
 
 
 parallel_duke_processes: false
@@ -52,13 +50,13 @@ matched_data_url: https://raw.githubusercontent.com/PyPSA/powerplantmatching/{ta
 opsd_vres_base_year: 2020
 
 BNETZA:
-  reliability_score: 2
+  reliability_score: 1
   fn: Kraftwerksliste_2017_2.xlsx
   url: https://www.bundesnetzagentur.de/SharedDocs/Downloads/DE/Sachgebiete/Energie/Unternehmen_Institutionen/Versorgungssicherheit/Erzeugungskapazitaeten/Kraftwerksliste/Kraftwerksliste_2019_1.xlsx;jsessionid=17E419F28D025C7DD9FC6E2BEB3D088F?__blob=publicationFile&v=2
 BEYONDCOAL:
   net_capacity: false
   aggregated_units: true
-  reliability_score: 6
+  reliability_score: 7
   status: ["Construction", "Open", "Planned", "Retired"]
   fn: 2025-07-24-BeyondFossilFuels-Europe_Coal_Plants_Database.xlsx
   url: https://beyondfossilfuels.org/wp-content/uploads/2025/07/2025-07-24-BeyondFossilFuels-Europe_Coal_Plants_Database.xlsx
@@ -81,17 +79,17 @@ ENTSOE-EIC:
   url: https://eepublicdownloads.blob.core.windows.net/cio-lio/csv/W_eicCodes.csv
   fn: W_eicCodes.csv
 JRC:
-  reliability_score: 4
+  reliability_score: 5
   fn: jrc-hydro-power-plant-database.csv
   url: https://raw.githubusercontent.com/energy-modelling-toolkit/hydro-power-database/27e80f/data/jrc-hydro-power-plant-database.csv
 GEO:
   net_capacity: false
-  reliability_score: 3
+  reliability_score: 2
   url: https://raw.githubusercontent.com/pypsa-meets-earth/ppm-data-backup/main/global_energy_observatory_power_plants.csv
   fn: global_energy_observatory_power_plants.csv
 GEO_units:
   net_capacity: false
-  reliability_score: 3
+  reliability_score: 2
   url: https://raw.githubusercontent.com/pypsa-meets-earth/ppm-data-backup/main/global_energy_observatory_ppl_units.csv
   fn: global_energy_observatory_ppl_units.csv
 GPD:
@@ -100,19 +98,19 @@ GPD:
   #if outdated, look at http://datasets.wri.org/dataset/globalpowerplantdatabase
   url: https://wri-dataportal-prod.s3.amazonaws.com/manual/global_power_plant_database_v_1_3.zip
 WIKIPEDIA:
-  reliability_score: 4
+  reliability_score: 2
   url: https://raw.githubusercontent.com/pypsa-meets-earth/ppm-data-backup/main/nuclear_plants_from_wikipedia.csv
   fn: nuclear_plants_from_wikipedia.csv
 IWPDCY:
   aggregated_units: true
-  reliability_score: 3
+  reliability_score: 2
   fn: IWPDCY.csv
 OPSD_DE:
-  reliability_score: 4
+  reliability_score: 3
   fn: conventional_power_plants_DE.csv
   url: https://raw.githubusercontent.com/pypsa-meets-earth/ppm-data-backup/main/conventional_power_plants_DE.csv
 OPSD_EU:
-  reliability_score: 4
+  reliability_score: 3
   fn: conventional_power_plants_EU.csv
   url: https://raw.githubusercontent.com/pypsa-meets-earth/ppm-data-backup/main/conventional_power_plants_EU.csv
 OPSD_VRE:
@@ -143,76 +141,75 @@ OPSD_VRE_GB:
   url: https://data.open-power-system-data.org/renewable_power_plants/2020-08-25/renewable_power_plants_UK.csv
   fn: renewable_power_plants_UK.csv
 OPSD:
-  reliability_score: 4
+  reliability_score: 3
 Capacity_stats:
   url: https://data.open-power-system-data.org/national_generation_capacity/2020-10-01/national_generation_capacity_stacked.csv
   fn: national_generation_capacity_stacked.csv
 UBA:
   net_capacity: false
-  reliability_score: 4
+  reliability_score: 1
   fn: kraftwerke-de-ab-100-mw.xls
   url: https://www.umweltbundesamt.de/sites/default/files/medien/372/dokumente/kraftwerke_de_ab_100_mw_0.xls
 WEPP:
   net_capacity: false
-  reliability_score: 3
+  reliability_score: 1
   fn: platts_wepp.csv
 GGPT:
   net_capacity: false
-  reliability_score: 5
+  reliability_score: 6
   status: ["operating", "retired", "construction"]
   fn: Global-Oil-and-Gas-Plant-Tracker-GOGPT-August-2025.xlsx
   url: https://tubcloud.tu-berlin.de/s/aKrt7dyNgazmgAm/download/Global-Oil-and-Gas-Plant-Tracker-GOGPT-August-2025.xlsx
 GEM:
   # combined data set of all GEM trackers
   net_capacity: true
-  reliability_score: 5
+  reliability_score: 6
 GCPT:
   net_capacity: false
-  reliability_score: 4
+  reliability_score: 6
   status: ["operating", "retired", "construction", "mothballed"]
   fn: Global-Coal-Plant-Tracker-July-2025.xlsx
   url: https://tubcloud.tu-berlin.de/s/etMB7qawKNwfgnk/download/Global-Coal-Plant-Tracker-July-2025.xlsx
 GGTPT:
   net_capacity: false
-  reliability_score: 4
+  reliability_score: 6
   aggregated_units: false
   status: ["operating", "retired", "construction", "mothballed"]
   fn: Geothermal-Power-Tracker-March-2025-Final.xlsx
   url: https://tubcloud.tu-berlin.de/s/dNoEsLeGtCWDkoc/download/Geothermal-Power-Tracker-March-2025-Final.xlsx
 GWPT:
   net_capacity: false
-  reliability_score: 4
+  reliability_score: 6
   status: ["operating", "retired", "construction"]
   fn:  Global-Wind-Power-Tracker-February-2025.xlsx
   url: https://tubcloud.tu-berlin.de/s/8NSXSjPmJPXpg4W/download/Global-Wind-Power-Tracker-February-2025.xlsx
 GSPT:
   net_capacity: false
-  reliability_score: 4
+  reliability_score: 6
   status: ["operating", "retired", "construction"]
   fn: Global-Solar-Power-Tracker-February-2025.xlsx
   url: https://tubcloud.tu-berlin.de/s/7eo4dZXMp6eB3mz/download/Global-Solar-Power-Tracker-February-2025.xlsx
 GBPT:
   net_capacity: false
-  reliability_score: 4
+  reliability_score: 6
   status: ["operating", "retired", "construction"]
   fn: Global-Bioenergy-Power-Tracker-GBPT-September-2024.xlsx
   url: https://tubcloud.tu-berlin.de/s/CzMBKe2rAcsoq7c/download/Global-Bioenergy-Power-Tracker-GBPT-September-2024.xlsx
 GNPT:
   net_capacity: false
-  reliability_score: 4
+  reliability_score: 6
   status: ["operating", "retired", "mothballed", "construction"]
   fn: Global-Nuclear-Power-Tracker-July-2024.xlsx
   url: https://tubcloud.tu-berlin.de/s/gXFim9EciRHrjeQ/download/Global-Nuclear-Power-Tracker-July-2024.xlsx
 GHPT:
   net_capacity: false
-  reliability_score: 4
+  reliability_score: 6
   status: ["operating", "retired", "construction"]
   fn: Global-Hydropower-Tracker-April-2025.xlsx
   url: https://tubcloud.tu-berlin.de/s/2xqxRmfP4FKTrLf/download/Global-Hydropower-Tracker-April-2025.xlsx
-
 MASTR:
   net_capacity: true
-  reliability_score: 8
+  reliability_score: 7
   status: ["In Betrieb", "In Planung", "Endgültig stillgelegt", "Vorübergehend stillgelegt"]
   fn: bnetza_open_mastr_2025-02-09.zip
   url: https://zenodo.org/records/14783581/files/bnetza_open_mastr_2025-02-09.zip

From c0296bbe579e3a51393f2041832a0843a54174be Mon Sep 17 00:00:00 2001
From: Fabian Neumann <fabian.neumann@outlook.de>
Date: Wed, 20 Aug 2025 19:23:00 +0200
Subject: [PATCH 30/68] add release notes and update docs

---
 doc/basics.rst        |  2 ++
 doc/release-notes.rst | 18 ++++++++++++++++++
 2 files changed, 20 insertions(+)

diff --git a/doc/basics.rst b/doc/basics.rst
index 7e91618b..358ee423 100644
--- a/doc/basics.rst
+++ b/doc/basics.rst
@@ -49,6 +49,8 @@ Data Sources
 -  GNPT - `Global Nuclear Powerplant Tracker by Global Energy Monitor <https://globalenergymonitor.org/projects/global-nuclear-power-tracker/>`__
 -  GSPT - `Global Solar Powerplant Tracker by Global Energy Monitor <https://globalenergymonitor.org/projects/global-solar-power-tracker/>`__
 -  GWPT - `Global Wind Powerplant Tracker by Global Energy Monitor <https://globalenergymonitor.org/projects/global-wind-power-tracker/>`__
+-  EESI - `European Energy Storage Inventory <https://ses.jrc.ec.europa.eu/storage-inventory-maps>`__
+-  GND - `GeoNuclearData <https://github.com/cristianst85/GeoNuclearData>`__
 -  CARMA - `Carbon Monitoring for Action <http://carma.org/plant>`__
 -  ENTSOe - `European Network of Transmission System Operators for
    Electricity <http://entsoe.eu/>`__, annually provides statistics
diff --git a/doc/release-notes.rst b/doc/release-notes.rst
index 0430a104..e2c2de5f 100644
--- a/doc/release-notes.rst
+++ b/doc/release-notes.rst
@@ -8,6 +8,24 @@ Upcoming Version
 
 * Drop support for Python 3.9, add support for Python 3.13. Minimum required Python version is now 3.10.
 
+* Added [GeoNuclearData](github.com/cristianst85/GeoNuclearData) dataset as `pm.data.GND()`.
+
+* Added [European Energy Storage Inventory](https://ses.jrc.ec.europa.eu/storage-inventory-maps) dataset as `pm.data.EESI()`.
+
+* Updated ENTSOE, BEYONDCOAL, JRC, IRENASTAT and the Global Energy Monitor datasets to the latest versions.
+
+* Fix in `pm.data.MASTR()` the distinction of hydro technologies and between offshore and onshore wind. Also read in storage technologies.
+
+* Improved recognition of CHP power plants.
+
+* In Global Energy Monitor datasets, also read entries below capacity threshold.
+
+* In `pm.data.GCPT()`, add estimate for coal plant efficiency.
+
+* Include mothballed gas, oil and coal power plants.
+
+* Updating matching logic configuration.
+
 
 `v0.7.1 <https://github.com/PyPSA/powerplantmatching/releases/tag/v0.7.1>`__ (30th January 2024)
 =================================================================================================

From c481d0602ec5aaaf04ed7d3b1481b11544211353 Mon Sep 17 00:00:00 2001
From: Fabian Neumann <fabian.neumann@outlook.de>
Date: Thu, 21 Aug 2025 08:13:00 +0200
Subject: [PATCH 31/68]  adjust reliability scores of BEYONDCOAL and GND
 (closes #241)

---
 powerplantmatching/package_data/config.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/powerplantmatching/package_data/config.yaml b/powerplantmatching/package_data/config.yaml
index 16154868..1e05774b 100644
--- a/powerplantmatching/package_data/config.yaml
+++ b/powerplantmatching/package_data/config.yaml
@@ -56,7 +56,7 @@ BNETZA:
 BEYONDCOAL:
   net_capacity: false
   aggregated_units: true
-  reliability_score: 7
+  reliability_score: 4
   status: ["Construction", "Open", "Planned", "Retired"]
   fn: 2025-07-24-BeyondFossilFuels-Europe_Coal_Plants_Database.xlsx
   url: https://beyondfossilfuels.org/wp-content/uploads/2025/07/2025-07-24-BeyondFossilFuels-Europe_Coal_Plants_Database.xlsx
@@ -221,7 +221,7 @@ EESI:
   url: https://tubcloud.tu-berlin.de/s/RXWgYbYJpePsWAZ/download/european-energy-storage-inventory-20250817-2245.json
 GND:
   net_capacity: true
-  reliability_score: 4
+  reliability_score: 5
   status: ["Shutdown", "Operational", "Planned", "Under Construction", "Decommissioning Completed"]
   url: https://raw.githubusercontent.com/cristianst85/GeoNuclearData/1bc8b4ac106af236902385b87e46c540b4864815/data/csv/denormalized/nuclear_power_plants.csv
   fn: nuclear_power_plants.csv

From 8d2eb4d9869555b508a524f2c8cfd1fab9e54467 Mon Sep 17 00:00:00 2001
From: Fabian Neumann <fabian.neumann@outlook.de>
Date: Thu, 21 Aug 2025 08:38:45 +0200
Subject: [PATCH 32/68] GGPT use unit rather than location ID (closes #215)

---
 powerplantmatching/data.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/powerplantmatching/data.py b/powerplantmatching/data.py
index 283a45f3..262f17b5 100644
--- a/powerplantmatching/data.py
+++ b/powerplantmatching/data.py
@@ -2086,7 +2086,7 @@ def GGPT(raw=False, update=False, config=None):
         "Retired year": "DateOut",
         "CHP": "Set",
         "Fuel": "Fueltype",
-        "GEM location ID": "projectID",
+        "GEM unit ID": "projectID",
         "Country/Area": "Country",
         "Turbine/Engine Technology": "Technology",
     }

From 865953dd485f7dbfb6294951a644e76e7df91834 Mon Sep 17 00:00:00 2001
From: Fabian Neumann <fabian.neumann@outlook.de>
Date: Thu, 21 Aug 2025 13:49:57 +0200
Subject: [PATCH 33/68] GCPT: translate technologies

---
 powerplantmatching/data.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/powerplantmatching/data.py b/powerplantmatching/data.py
index 262f17b5..1430f930 100644
--- a/powerplantmatching/data.py
+++ b/powerplantmatching/data.py
@@ -1821,6 +1821,13 @@ def GCPT(raw=False, update=False, config=None):
         "unknown with CCS": "Hard Coal",
         "waste coal": "Hard Coal",
     }
+    technology_dict = {
+        "IGCC": "CCGT",
+        "subcritical": "Steam Turbine",
+        "unknown": np.nan,
+        "supercritical": "Steam Turbine",
+        "ultra-supercritical": "Steam Turbine",
+    }
 
     planned_retirement = df["Planned retirement"].apply(pd.to_numeric, errors="coerce")
 
@@ -1853,7 +1860,7 @@ def GCPT(raw=False, update=False, config=None):
         )
         .query("Status in @status_list")
         .pipe(lambda x: x[df.columns.intersection(config.get("target_columns"))])
-        .pipe(lambda x: x.replace({"Fueltype": fueltype_dict}))
+        .pipe(lambda x: x.replace({"Fueltype": fueltype_dict, "Technology": technology_dict}))
         .pipe(config_filter, config)
     )
 

From d2f6c89fe66f500794e32097170686b71fc3e055 Mon Sep 17 00:00:00 2001
From: Fabian Neumann <fabian.neumann@outlook.de>
Date: Thu, 21 Aug 2025 13:50:31 +0200
Subject: [PATCH 34/68] JRC: log storage parameters only if > 0

---
 powerplantmatching/data.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/powerplantmatching/data.py b/powerplantmatching/data.py
index 1430f930..6cf9d265 100644
--- a/powerplantmatching/data.py
+++ b/powerplantmatching/data.py
@@ -475,6 +475,9 @@ def set_large_spanish_stores_to_reservoirs(df):
         .assign(
             Set=lambda df: np.where(df.Technology == "Run-Of-River", "PP", "Store"),
             Fueltype="Hydro",
+            Duration=lambda df: df.Duration.where(df.Duration > 0),
+            StorageCapacity_MWh=lambda df: df.StorageCapacity_MWh.where(df.StorageCapacity_MWh > 0),
+            Volume_Mm3=lambda df: df.Volume_Mm3.where(df.Volume_Mm3 > 0),
         )
         .drop(columns=["pypsa_id", "GEO"])
         .powerplant.convert_alpha2_to_country()

From 3242b6b000640a782276fd633b022634eb60c8f6 Mon Sep 17 00:00:00 2001
From: Fabian Neumann <fabian.neumann@outlook.de>
Date: Thu, 21 Aug 2025 13:50:59 +0200
Subject: [PATCH 35/68] MASTR: set threshold to 100 kW

---
 powerplantmatching/data.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/powerplantmatching/data.py b/powerplantmatching/data.py
index 6cf9d265..2a9b75dd 100644
--- a/powerplantmatching/data.py
+++ b/powerplantmatching/data.py
@@ -2273,7 +2273,7 @@ def MASTR(
 
     config = get_config() if config is None else config
 
-    THRESHOLD_KW = 1000  # noqa: F841
+    THRESHOLD_KW = 100  # noqa: F841
 
     RENAME_COLUMNS = {
         "EinheitMastrNummer": "projectID",

From c5813f7b5b5f5d3ff00bf3d42482524ef61e8f48 Mon Sep 17 00:00:00 2001
From: Fabian Neumann <fabian.neumann@outlook.de>
Date: Thu, 21 Aug 2025 13:51:16 +0200
Subject: [PATCH 36/68] MASTR: calculate Duration

---
 powerplantmatching/data.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/powerplantmatching/data.py b/powerplantmatching/data.py
index 2a9b75dd..9ee56cb9 100644
--- a/powerplantmatching/data.py
+++ b/powerplantmatching/data.py
@@ -2401,6 +2401,7 @@ def MASTR(
             ),
             lat=lambda df: df.lat.combine_first(df.PLZ_lat),
             lon=lambda df: df.lon.combine_first(df.PLZ_lon),
+            Duration=lambda df: df.StorageCapacity_MWh.div(df.Capacity, fill_value=np.nan),
         )
         .pipe(
             gather_specifications,

From 9064eac56e8ac968e05cd4684905ca8e2ca36ab4 Mon Sep 17 00:00:00 2001
From: Fabian Neumann <fabian.neumann@outlook.de>
Date: Thu, 21 Aug 2025 13:51:48 +0200
Subject: [PATCH 37/68] MASTR: fixes from gather_specifications inaccuracies

---
 powerplantmatching/data.py | 28 +++++++++++++++++++++++++++-
 1 file changed, 27 insertions(+), 1 deletion(-)

diff --git a/powerplantmatching/data.py b/powerplantmatching/data.py
index 9ee56cb9..6c450718 100644
--- a/powerplantmatching/data.py
+++ b/powerplantmatching/data.py
@@ -2424,6 +2424,7 @@ def MASTR(
         "Energietraeger == 'Speicher' and Technologie == 'Batterie'"
     ).index
     df_processed.loc[bat, ["Fueltype", "Set"]] = ["Battery", "Store"]
+
     BATTERY_MAPPING = {
         "Blei-Batterie": "Pb",
         "Lithium-Batterie": "Li",
@@ -2444,8 +2445,33 @@ def MASTR(
         WIND_MAPPING
     )
 
+    sel = df_processed.query("Fueltype == 'Natural Gas' and Filesuffix == 'Bioenergy'").index
+    df_processed.loc[sel, "Fueltype"] = "Biogas"
+
+    # one biogas unit has 'Wind' in name
+    sel = df_processed.query("Fueltype == 'Wind' and Filesuffix == 'Biomass'").index
+    df_processed.loc[sel, "Fueltype"] = "Biogas"
+
+    # some combi-units are named wind-solar
+    sel = df_processed.query("Fueltype in ['Wind', 'Waste'] and Filesuffix == 'Solar'").index
+    df_processed.loc[sel, ["Fueltype", "Technology"]] = ["Solar", "PV"]
+
+    # some technologies are wrongly allocated
+    sel = df_processed.query("Fueltype == 'Biogas' and Technology == 'PV'").index
+    df_processed.loc[sel, "Technology"] = "Combustion Engine"
+    sel = df_processed.query("Fueltype == 'Hydro' and Technology == 'Steam Turbine'").index
+    df_processed.loc[sel, "Technology"] = "Run-Of-River"
+    sel = df_processed.query("Fueltype == 'Solar' and Technology == 'CCGT'").index
+    df_processed.loc[sel, "Technology"] = "PV"
+    sel = df_processed.query("Fueltype == 'Solar' and Technology == 'OCGT' and Filesuffix == 'Combustion'").index
+    df_processed.loc[sel, "Fueltype"] = "Natural Gas"
+    sel = df_processed.query("Fueltype == 'Wind' and Technology == 'PV' and Filesuffix == 'Solar'").index
+    df_processed.loc[sel, "Fueltype"] = "Solar"
+    sel = df_processed.query("Fueltype == 'Wind' and Technology == 'Combustion Engine' and Filesuffix == 'Bioenergy'").index
+    df_processed.loc[sel, "Fueltype"] = "Biogas"
+
     mask = df_processed.query(
-        "Energietraeger in ['Hydro', 'Wind', 'Solar', 'Battery'] and Set == 'Store'"
+        "Energietraeger in ['Hydro', 'Wind', 'Solar', 'Battery'] and Set in ['Store', 'CHP']"
     ).index
     df_processed.loc[mask, "Set"] = "PP"
 

From 1dc7fa1ab329bc56eff224116e0abae6072859dd Mon Sep 17 00:00:00 2001
From: Fabian Neumann <fabian.neumann@outlook.de>
Date: Thu, 21 Aug 2025 13:52:42 +0200
Subject: [PATCH 38/68] config: omit ambiguous regex expressions

---
 powerplantmatching/package_data/config.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/powerplantmatching/package_data/config.yaml b/powerplantmatching/package_data/config.yaml
index 1e05774b..d333dd9f 100644
--- a/powerplantmatching/package_data/config.yaml
+++ b/powerplantmatching/package_data/config.yaml
@@ -293,9 +293,9 @@ target_fueltypes:
   # given by the list. An empty string results in a regex expression containing only the key.
   # Parsed of representatives at the top may be overwritten by representatives further below.
   Other: ".*"
-  Solid Biomass: [biological, bioenergy, agricultural, wood, holz, biomass, feste biomasse, biomasa, biomassa, feste biogene stoffe, pellets, stroh, straw]
+  Solid Biomass: [biological, bioenergy, agricultural, biomass, feste biomasse, biomasa, biomassa, feste biogene stoffe, pellets, stroh, straw]
   Biogas: [biogas, biogaz, biomethan, gasförmige biomasse]
-  Nuclear: [nuclear, kernkraft, atomkraft, nucléaire, atomowa, jądrowa, kjernekraft, kern, atom, atoom]
+  Nuclear: [nuclear, kernkraft, atomkraft, nucléaire, atomowa, jądrowa, kjernekraft, atoom]
   Natural Gas:
     [
       ccgt,

From 856a3ddb921f923a78dd8bd14bf287fd57d5c229 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 21 Aug 2025 11:53:00 +0000
Subject: [PATCH 39/68] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 powerplantmatching/data.py | 38 +++++++++++++++++++++++++++++---------
 1 file changed, 29 insertions(+), 9 deletions(-)

diff --git a/powerplantmatching/data.py b/powerplantmatching/data.py
index 6c450718..895b7904 100644
--- a/powerplantmatching/data.py
+++ b/powerplantmatching/data.py
@@ -476,7 +476,9 @@ def set_large_spanish_stores_to_reservoirs(df):
             Set=lambda df: np.where(df.Technology == "Run-Of-River", "PP", "Store"),
             Fueltype="Hydro",
             Duration=lambda df: df.Duration.where(df.Duration > 0),
-            StorageCapacity_MWh=lambda df: df.StorageCapacity_MWh.where(df.StorageCapacity_MWh > 0),
+            StorageCapacity_MWh=lambda df: df.StorageCapacity_MWh.where(
+                df.StorageCapacity_MWh > 0
+            ),
             Volume_Mm3=lambda df: df.Volume_Mm3.where(df.Volume_Mm3 > 0),
         )
         .drop(columns=["pypsa_id", "GEO"])
@@ -1863,7 +1865,11 @@ def GCPT(raw=False, update=False, config=None):
         )
         .query("Status in @status_list")
         .pipe(lambda x: x[df.columns.intersection(config.get("target_columns"))])
-        .pipe(lambda x: x.replace({"Fueltype": fueltype_dict, "Technology": technology_dict}))
+        .pipe(
+            lambda x: x.replace(
+                {"Fueltype": fueltype_dict, "Technology": technology_dict}
+            )
+        )
         .pipe(config_filter, config)
     )
 
@@ -2401,7 +2407,9 @@ def MASTR(
             ),
             lat=lambda df: df.lat.combine_first(df.PLZ_lat),
             lon=lambda df: df.lon.combine_first(df.PLZ_lon),
-            Duration=lambda df: df.StorageCapacity_MWh.div(df.Capacity, fill_value=np.nan),
+            Duration=lambda df: df.StorageCapacity_MWh.div(
+                df.Capacity, fill_value=np.nan
+            ),
         )
         .pipe(
             gather_specifications,
@@ -2445,7 +2453,9 @@ def MASTR(
         WIND_MAPPING
     )
 
-    sel = df_processed.query("Fueltype == 'Natural Gas' and Filesuffix == 'Bioenergy'").index
+    sel = df_processed.query(
+        "Fueltype == 'Natural Gas' and Filesuffix == 'Bioenergy'"
+    ).index
     df_processed.loc[sel, "Fueltype"] = "Biogas"
 
     # one biogas unit has 'Wind' in name
@@ -2453,21 +2463,31 @@ def MASTR(
     df_processed.loc[sel, "Fueltype"] = "Biogas"
 
     # some combi-units are named wind-solar
-    sel = df_processed.query("Fueltype in ['Wind', 'Waste'] and Filesuffix == 'Solar'").index
+    sel = df_processed.query(
+        "Fueltype in ['Wind', 'Waste'] and Filesuffix == 'Solar'"
+    ).index
     df_processed.loc[sel, ["Fueltype", "Technology"]] = ["Solar", "PV"]
 
     # some technologies are wrongly allocated
     sel = df_processed.query("Fueltype == 'Biogas' and Technology == 'PV'").index
     df_processed.loc[sel, "Technology"] = "Combustion Engine"
-    sel = df_processed.query("Fueltype == 'Hydro' and Technology == 'Steam Turbine'").index
+    sel = df_processed.query(
+        "Fueltype == 'Hydro' and Technology == 'Steam Turbine'"
+    ).index
     df_processed.loc[sel, "Technology"] = "Run-Of-River"
     sel = df_processed.query("Fueltype == 'Solar' and Technology == 'CCGT'").index
     df_processed.loc[sel, "Technology"] = "PV"
-    sel = df_processed.query("Fueltype == 'Solar' and Technology == 'OCGT' and Filesuffix == 'Combustion'").index
+    sel = df_processed.query(
+        "Fueltype == 'Solar' and Technology == 'OCGT' and Filesuffix == 'Combustion'"
+    ).index
     df_processed.loc[sel, "Fueltype"] = "Natural Gas"
-    sel = df_processed.query("Fueltype == 'Wind' and Technology == 'PV' and Filesuffix == 'Solar'").index
+    sel = df_processed.query(
+        "Fueltype == 'Wind' and Technology == 'PV' and Filesuffix == 'Solar'"
+    ).index
     df_processed.loc[sel, "Fueltype"] = "Solar"
-    sel = df_processed.query("Fueltype == 'Wind' and Technology == 'Combustion Engine' and Filesuffix == 'Bioenergy'").index
+    sel = df_processed.query(
+        "Fueltype == 'Wind' and Technology == 'Combustion Engine' and Filesuffix == 'Bioenergy'"
+    ).index
     df_processed.loc[sel, "Fueltype"] = "Biogas"
 
     mask = df_processed.query(

From f1a35e4378d6248c17b7a97aaecd910283ee8f34 Mon Sep 17 00:00:00 2001
From: Fabian Neumann <fabian.neumann@outlook.de>
Date: Thu, 21 Aug 2025 14:05:27 +0200
Subject: [PATCH 40/68] config: simplify formatting (fewer lists)

---
 powerplantmatching/package_data/config.yaml | 426 ++++++++++++++------
 1 file changed, 292 insertions(+), 134 deletions(-)

diff --git a/powerplantmatching/package_data/config.yaml b/powerplantmatching/package_data/config.yaml
index d333dd9f..c68a9bc5 100644
--- a/powerplantmatching/package_data/config.yaml
+++ b/powerplantmatching/package_data/config.yaml
@@ -1,7 +1,7 @@
 # ---------------------------------------------------------------------------- #
 #                                   IO Config                                  #
 # ---------------------------------------------------------------------------- #
-entsoe_token:
+entsoe_token: "17f212db-55c5-49a5-b7e4-5b4036f22249"
 google_api_key:
 
 # ---------------------------------------------------------------------------- #
@@ -39,8 +39,8 @@ fully_included_sources:
   - GND
 
 
-parallel_duke_processes: false
-process_limit: 4
+parallel_duke_processes: true
+process_limit: 14
 matched_data_url: https://raw.githubusercontent.com/PyPSA/powerplantmatching/{tag}/powerplants.csv
 
 # ---------------------------------------------------------------------------- #
@@ -293,58 +293,133 @@ target_fueltypes:
   # given by the list. An empty string results in a regex expression containing only the key.
   # Parsed of representatives at the top may be overwritten by representatives further below.
   Other: ".*"
-  Solid Biomass: [biological, bioenergy, agricultural, biomass, feste biomasse, biomasa, biomassa, feste biogene stoffe, pellets, stroh, straw]
-  Biogas: [biogas, biogaz, biomethan, gasförmige biomasse]
-  Nuclear: [nuclear, kernkraft, atomkraft, nucléaire, atomowa, jądrowa, kjernekraft, atoom]
+  Solid Biomass: 
+    - biological
+    - bioenergy
+    - agricultural
+    - biomass
+    - feste biomasse
+    - biomasa
+    - biomassa
+    - feste biogene stoffe
+    - pellets
+    - stroh
+    - straw
+  Biogas: 
+    - biogas
+    - biogaz
+    - biomethan
+    - gasförmige biomasse
+  Nuclear: 
+    - nuclear
+    - kernkraft
+    - atomkraft
+    - nucléaire
+    - atomowa
+    - jądrowa
+    - kjernekraft
+    - atoom
   Natural Gas:
-    [
-      ccgt,
-      gas,
-      natural gas,
-      ocgt,
-      lng,
-      combined cycle,
-      fossil gas,
-      mixed fossil fuels,
-      erdgas,
-      andere gase,
-      gaz,
-      gaz naturel,
-      gas natural,
-      naturgass,
-      gaz ziemny,
-      gass,
-      aardgas,
-      flüssiggas
-    ]
+    - ccgt
+    - gas
+    - natural gas
+    - ocgt
+    - lng
+    - combined cycle
+    - fossil gas
+    - mixed fossil fuels
+    - erdgas
+    - andere gase
+    - gaz
+    - gaz naturel
+    - gas natural
+    - naturgass
+    - gaz ziemny
+    - gass
+    - aardgas
+    - flüssiggas
   Hydro:
-    [
-      run-off,
-      run off,
-      run of river,
-      run-of-river,
-      ror,
-      hydro,
-      hidro,
-      hydraulique,
-      hydroelectric,
-      wasserkraft,
-      waterkracht,
-      wasser,
-      vannkraft,
-      vattenkraft,
-      wodna,
-      idroelettrica,
-      idraulica,
-    ]
-  Hard Coal: [coal, coke, steinkohle, houille, charbon dur, hulla, carbón duro, carbone duro, antracite, steinkul, węgiel kamienny, steenkool]
-  Lignite: [brown coal, lignite, peat, braunkohle, ligni.*, brunatny, brunkul, bruinkool]
-  Oil: [oil, diesel, biodiesel, methanol, heizöl, ethanol, mineralölprodukte, öl, fioul, mazout, petrol, olio, olej, carburante, olie]
+    - run-off
+    - run off
+    - run of river
+    - run-of-river
+    - ror
+    - hydro
+    - hidro
+    - hydraulique
+    - hydroelectric
+    - wasserkraft
+    - waterkracht
+    - wasser
+    - vannkraft
+    - vattenkraft
+    - wodna
+    - idroelettrica
+    - idraulica
+  Hard Coal:
+    - coal
+    - coke
+    - steinkohle
+    - houille
+    - charbon dur
+    - hulla
+    - carbón duro
+    - carbone duro
+    - antracite
+    - steinkul
+    - węgiel kamienny
+    - steenkool
+  Lignite:
+    - brown coal
+    - lignite
+    - peat
+    - braunkohle
+    - ligni.*
+    - brunatny
+    - brunkul
+    - bruinkool
+  Oil:
+    - oil
+    - diesel
+    - biodiesel
+    - methanol
+    - heizöl
+    - ethanol
+    - mineralölprodukte
+    - öl
+    - fioul
+    - mazout
+    - petrol
+    - olio
+    - olej
+    - carburante
+    - olie
   Geothermal: ""
   Solar: ""
-  Waste: ["abfall.*", "waste", "mva", "müll", "afval", "affald", "energy recovery", "incineration", "reststoffe", "refuse", "déchets", "ordures", "residuos", "basura", "rifiuti", "scarti", "odpady", "śmieci", "abfälle"]
+  Waste:
+    - "abfall.*"
+    - waste
+    - mva
+    - müll
+    - afval
+    - affald
+    - energy recovery
+    - incineration
+    - reststoffe
+    - refuse
+    - déchets
+    - ordures
+    - residuos
+    - basura
+    - rifiuti
+    - scarti
+    - odpady
+    - śmieci
+    - abfälle
   Wind: ""
-  Battery: [Electro-chemical, battery]
+  Battery:
+    - Electro-chemical
+    - battery
   Mechanical Storage: ""
   Heat Storage: ""
   Hydrogen Storage: ""
@@ -355,52 +430,137 @@ target_sets:
   # Parsed of representatives at the top may be overwritten by representatives further below.
   PP: ".*"
   CHP:
-    [
-      heizkraftwerk,
-      hkw,
-      kwk,
-      fhkw,
-      gud,
-      hp,
-      bhkw,
-      cogeneration,
-      power and heat,
-      heat and power,
-      chp,
-      cogen,
-      heat & power,
-      power & heat,
-      cogeneración,
-      cogenerazione,
-      kogeneracja,
-      combinada calor y electricidad,
-      kraftvarmeverk,
-      kraftvarmeværk,
-      samproduktion,
-      samproduksjon,
-      kvv,
-      wkk,
-      warmtekrachtkoppeling,
-      warmte-krachtcentrale,
-    ]
-  Store: [battery, storage, store, speicher, pumped]
+    - heizkraftwerk
+    - hkw
+    - kwk
+    - fhkw
+    - gud
+    - hp
+    - bhkw
+    - cogeneration
+    - power and heat
+    - heat and power
+    - chp
+    - cogen
+    - heat & power
+    - power & heat
+    - cogeneración
+    - cogenerazione
+    - kogeneracja
+    - combinada calor y electricidad
+    - kraftvarmeverk
+    - kraftvarmeværk
+    - samproduktion
+    - samproduksjon
+    - kvv
+    - wkk
+    - warmtekrachtkoppeling
+    - warmte-krachtcentrale
+  Storage:
+    - battery
+    - storage
+    - store
+    - speicher
+    - pumped
 target_technologies:
   # Provide a mapping of the keys to a list or a regex expression which are used for parsing.
   # A list will be converted to a regex expression matching all words (case-insensitive)
   # given by the list. An empty string results in a regex expression containing only the key.
   # Parsed of representatives at the top may be overwritten by representatives further below.
-  CCGT: [ccgt, gas, natural gas, gasturbinen mit abhitzekessel]
-  OCGT: [ocgt, gasturbinen ohne abhitzekessel]
-  Steam Turbine: [steam, turbine, kondensationsmaschine, gegendruckmaschine, dampfmotor]
-  Combustion Engine: [combustion engine, verbrennungsmotor, stirlingmotor]
-  Run-Of-River: [run-off, run off, run of river, run-of-river, ror, laufwasseranlage, laufwasser, abwasserkraft, trinkwassersystem, brauchwassersystem, pasada, przepływowa, fluente, elvekraft, doorstroom, älvkraft]
-  Reservoir: [reservoir, réservoir, impoundment, talsperre, stausee, speicherwasseranlage, speicherwasser, barrage, embalse, bacino, zbiornik, magasinverk, damkraftverk, reguleringsmagasin]
-  Pumped Storage: [pumped hydro, pumped, kavernen, bombeo, reversible, reversibel, oberbecken, unterbecken, pompage, pompaggio, pompowa, pumpekraftverk]
+  CCGT:
+   - ccgt
+   - gas
+   - natural gas
+   - gasturbinen mit abhitzekessel
+  OCGT:
+   - ocgt
+   - gasturbinen ohne abhitzekessel
+  Steam Turbine:
+   - steam
+   - turbine
+   - kondensationsmaschine
+   - gegendruckmaschine
+   - dampfmotor
+  Combustion Engine:
+   - combustion engine
+   - verbrennungsmotor
+   - stirlingmotor
+  Run-Of-River:
+   - run-off
+   - run off
+   - run of river
+   - run-of-river
+   - ror
+   - laufwasseranlage
+   - laufwasser
+   - abwasserkraft
+   - trinkwassersystem
+   - brauchwassersystem
+   - pasada
+   - przepływowa
+   - fluente
+   - elvekraft
+   - doorstroom
+   - älvkraft
+  Reservoir:
+   - reservoir
+   - réservoir
+   - impoundment
+   - talsperre
+   - stausee
+   - speicherwasseranlage
+   - speicherwasser
+   - barrage
+   - embalse
+   - bacino
+   - zbiornik
+   - magasinverk
+   - damkraftverk
+   - reguleringsmagasin
+  Pumped Storage:
+   - pumped hydro
+   - pumped
+   - kavernen
+   - bombeo
+   - reversible
+   - reversibel
+   - oberbecken
+   - unterbecken
+   - pompage
+   - pompaggio
+   - pompowa
+   - pumpekraftverk
   Marine: ""
-  PV: [pv, photo-voltaic, photo voltaic]
+  PV:
+   - pv
+   - photo-voltaic
+   - photo voltaic
   CSP: ""
-  Onshore: ["onshore", "an land", "terrestre", "landvind", "på land", "op land", "lądowy", "su terra", "en tierra", "à terre"]
-  Offshore: ["offshore", "nearshore", "auf see", "en mer", "marino", "en mar", "in mare", "morski", "havvind", "til havs", "på havet", "op zee", "zeewind"]
+  Onshore:
+   - onshore
+   - an land
+   - terrestre
+   - landvind
+   - på land
+   - op land
+   - lądowy
+   - su terra
+   - en tierra
+   - à terre
+  Offshore:
+   - offshore
+   - nearshore
+   - auf see
+   - en mer
+   - marino
+   - en mar
+   - in mare
+   - morski
+   - havvind
+   - til havs
+   - på havet
+   - op zee
+   - zeewind
 clean_name:
   remove_common_words: false # remove words which appear more that 20 times in all entries
   remove_duplicated_words: true
@@ -408,51 +568,49 @@ clean_name:
     " ": "[^a-zA-Z]" # non-alphabetical symbols
     "":
       # This should be a list, if remove_common_words is true.
-      [
-        I,
-        II,
-        III,
-        IV,
-        V,
-        VI,
-        VII,
-        VIII,
-        IX,
-        X,
-        XI,
-        parque,
-        grupo,
-        station,
-        power,
-        plant,
-        unit,
-        kraftwerk,
-        kw,
-        hkw,
-        nuclear,
-        thermal,
-        heizkraftwerk,
-        eolico,
-        project,
-        hydroelectric,
-        pumped,
-        storage,
-        france,
-        austria,
-        sweden,
-        serbia,
-        ukraine,
-        switzerland,
-        slovakia,
-        croatia,
-        poland,
-        slovenia,
-        portugal,
-        bosnia,
-        and,
-        herzegovina,
-        \w, #remove single letters
-      ]
+      - I
+      - II
+      - III
+      - IV
+      - V
+      - VI
+      - VII
+      - VIII
+      - IX
+      - X
+      - XI
+      - parque
+      - grupo
+      - station
+      - power
+      - plant
+      - unit
+      - kraftwerk
+      - kw
+      - hkw
+      - nuclear
+      - thermal
+      - heizkraftwerk
+      - eolico
+      - project
+      - hydroelectric
+      - pumped
+      - storage
+      - france
+      - austria
+      - sweden
+      - serbia
+      - ukraine
+      - switzerland
+      - slovakia
+      - croatia
+      - poland
+      - slovenia
+      - portugal
+      - bosnia
+      - and
+      - herzegovina
+      - \w #remove single letters
     "ss": "ß"
 
 # ---------------------------------------------------------------------------- #

From f63287391ee292d689a47ff949f6495ae1dfe6e7 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 21 Aug 2025 12:05:38 +0000
Subject: [PATCH 41/68] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 powerplantmatching/package_data/config.yaml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/powerplantmatching/package_data/config.yaml b/powerplantmatching/package_data/config.yaml
index c68a9bc5..ddb376a4 100644
--- a/powerplantmatching/package_data/config.yaml
+++ b/powerplantmatching/package_data/config.yaml
@@ -293,7 +293,7 @@ target_fueltypes:
   # given by the list. An empty string results in a regex expression containing only the key.
   # Parsed of representatives at the top may be overwritten by representatives further below.
   Other: ".*"
-  Solid Biomass: 
+  Solid Biomass:
     - biological
     - bioenergy
     - agricultural
@@ -305,12 +305,12 @@ target_fueltypes:
     - pellets
     - stroh
     - straw
-  Biogas: 
+  Biogas:
     - biogas
     - biogaz
     - biomethan
     - gasförmige biomasse
-  Nuclear: 
+  Nuclear:
     - nuclear
     - kernkraft
     - atomkraft

From b81c5d822aab79f8a126488777d918595cdffb36 Mon Sep 17 00:00:00 2001
From: Fabian Neumann <fabian.neumann@outlook.de>
Date: Fri, 22 Aug 2025 11:39:11 +0200
Subject: [PATCH 42/68] separate filters for matching_sources and
 fully_included_sources

---
 powerplantmatching/collection.py |  4 ++++
 powerplantmatching/utils.py      | 11 +----------
 2 files changed, 5 insertions(+), 10 deletions(-)

diff --git a/powerplantmatching/collection.py b/powerplantmatching/collection.py
index f19c9b5e..9995fae2 100644
--- a/powerplantmatching/collection.py
+++ b/powerplantmatching/collection.py
@@ -71,6 +71,10 @@ def df_by_name(name):
         get_df = getattr(data, name)
         df = get_df(config=config)
 
+        for source in config["matching_sources"]:
+            if isinstance(source, dict) and next(iter(source)) == name:
+                df = df.query(source[name])
+
         if not conf.get("aggregated_units", False):
             return aggregate_units(df, dataset_name=name, config=config)
         else:
diff --git a/powerplantmatching/utils.py b/powerplantmatching/utils.py
index b77376b3..752fa89b 100644
--- a/powerplantmatching/utils.py
+++ b/powerplantmatching/utils.py
@@ -124,16 +124,7 @@ def config_filter(df, config):
 
     main_query = config.get("main_query", "")
 
-    # individual filter from config.yaml
-    queries = {}
-    for source in config["matching_sources"]:
-        if isinstance(source, dict):
-            queries.update(source)
-        else:
-            queries[source] = ""
-    ds_query = queries.get(name, "")
-
-    query = " and ".join([q for q in [target_query, main_query, ds_query] if q])
+    query = " and ".join([q for q in [target_query, main_query] if q])
 
     df = correct_manually(df, name, config=config)
 

From 2c792ec652c79d3d7b955c22cd2b40b88cbc8e42 Mon Sep 17 00:00:00 2001
From: Fabian Neumann <fabian.neumann@outlook.de>
Date: Fri, 22 Aug 2025 11:39:35 +0200
Subject: [PATCH 43/68] cleaning: improve handling of abbreviations

---
 powerplantmatching/cleaning.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/powerplantmatching/cleaning.py b/powerplantmatching/cleaning.py
index 523c315f..91f47f3b 100644
--- a/powerplantmatching/cleaning.py
+++ b/powerplantmatching/cleaning.py
@@ -340,7 +340,16 @@ def clean_technology(df, generalize_hydros=False):
         .str.split(", ")
         .apply(lambda x: ", ".join(i.strip() for i in np.unique(x)))
     )
-    tech = tech.replace({"Ccgt": "CCGT", "Ocgt": "OCGT"}, regex=True)
+    ABBREVIATIONS = {
+        "Ccgt": "CCGT",
+        "Ocgt": "OCGT",
+        "Pv": "PV",
+        "Nas": "NaS",
+        "Nicd": "NiCd",
+        "Nanicl": "NaNiCl",
+        "Caes": "CAES",
+    }
+    tech = tech.replace(ABBREVIATIONS, regex=True)
     return df.assign(Technology=tech)
 
 

From ea16d3d5057c0126744b0e2e497ed06bab0a7429 Mon Sep 17 00:00:00 2001
From: Fabian Neumann <fabian.neumann@outlook.de>
Date: Fri, 22 Aug 2025 11:40:05 +0200
Subject: [PATCH 44/68] utils: do not mark hydrogen storage as uncommon
 fueltype

---
 powerplantmatching/utils.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/powerplantmatching/utils.py b/powerplantmatching/utils.py
index 752fa89b..646c61ea 100644
--- a/powerplantmatching/utils.py
+++ b/powerplantmatching/utils.py
@@ -187,7 +187,6 @@ def set_uncommon_fueltypes_to_other(df, fillna_other=True, config=None, **kwargs
     default = [
         "Mixed fuel types",
         "Electro-mechanical",
-        "Hydrogen Storage",
     ]
     fueltypes = kwargs.get("fueltypes", default)
     df.loc[df.Fueltype.isin(fueltypes), "Fueltype"] = "Other"

From 44b66adfd333c66d9b262606005862f101beb531 Mon Sep 17 00:00:00 2001
From: Fabian Neumann <fabian.neumann@outlook.de>
Date: Fri, 22 Aug 2025 11:44:16 +0200
Subject: [PATCH 45/68] .gitignore .ipynb

---
 .gitignore | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.gitignore b/.gitignore
index e883d2b7..fe079f42 100644
--- a/.gitignore
+++ b/.gitignore
@@ -99,3 +99,6 @@ test.ipynb
 
 # uv
 uv.lock
+
+# jupyter
+*.ipynb
\ No newline at end of file

From a80acd84349a41807fdc5e462e8e274a7f10ea40 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 22 Aug 2025 09:44:28 +0000
Subject: [PATCH 46/68] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .gitignore | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index fe079f42..e71eeaf1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -101,4 +101,4 @@ test.ipynb
 uv.lock
 
 # jupyter
-*.ipynb
\ No newline at end of file
+*.ipynb

From a396d93e3579b6e45f73e101e7aeed936621c7e8 Mon Sep 17 00:00:00 2001
From: Fabian Neumann <fabian.neumann@outlook.de>
Date: Fri, 22 Aug 2025 14:24:21 +0200
Subject: [PATCH 47/68] correct config.yaml

---
 powerplantmatching/package_data/config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/powerplantmatching/package_data/config.yaml b/powerplantmatching/package_data/config.yaml
index ddb376a4..dc1d09cc 100644
--- a/powerplantmatching/package_data/config.yaml
+++ b/powerplantmatching/package_data/config.yaml
@@ -1,7 +1,7 @@
 # ---------------------------------------------------------------------------- #
 #                                   IO Config                                  #
 # ---------------------------------------------------------------------------- #
-entsoe_token: "17f212db-55c5-49a5-b7e4-5b4036f22249"
+entsoe_token: ""
 google_api_key:
 
 # ---------------------------------------------------------------------------- #

From 28d5d58ad16ba35b70bbbba61a64b7bac0e57dce Mon Sep 17 00:00:00 2001
From: Fabian Neumann <fabian.neumann@outlook.de>
Date: Fri, 22 Aug 2025 19:53:11 +0200
Subject: [PATCH 48/68] enable multiprocessing in unit aggregation for
 non-matched resources

---
 powerplantmatching/cleaning.py              |  9 +++++++--
 powerplantmatching/duke.py                  |  2 ++
 powerplantmatching/heuristics.py            |  4 +++-
 powerplantmatching/package_data/config.yaml |  4 ++--
 powerplantmatching/utils.py                 | 16 +++++++++++++---
 5 files changed, 27 insertions(+), 8 deletions(-)

diff --git a/powerplantmatching/cleaning.py b/powerplantmatching/cleaning.py
index 91f47f3b..0bb89099 100644
--- a/powerplantmatching/cleaning.py
+++ b/powerplantmatching/cleaning.py
@@ -387,6 +387,7 @@ def aggregate_units(
     pre_clean_name=False,
     country_wise=True,
     config=None,
+    threads=1,
     **kwargs,
 ):
     """
@@ -405,6 +406,8 @@ def aggregate_units(
         Whether to clean the 'Name'-column before aggregating.
     country_wise : Boolean, default True
         Whether to aggregate only entries with a identical country.
+    threads : int, default 1
+        Number of threads to use
     """
     deprecated_args = {"use_saved_aggregation", "save_aggregation"}
     used_deprecated_args = deprecated_args.intersection(kwargs)
@@ -445,9 +448,11 @@ def aggregate_units(
 
     if country_wise:
         countries = df.Country.unique()
-        duplicates = pd.concat([duke(df.query("Country == @c")) for c in countries])
+        duplicates = pd.concat(
+            [duke(df.query("Country == @c"), threads=threads) for c in countries]
+        )
     else:
-        duplicates = duke(df)
+        duplicates = duke(df, threads=threads)
 
     df = cliques(df, duplicates)
     df = df.groupby("grouped").agg(props_for_groups)
diff --git a/powerplantmatching/duke.py b/powerplantmatching/duke.py
index 6eefe393..37cc4929 100644
--- a/powerplantmatching/duke.py
+++ b/powerplantmatching/duke.py
@@ -52,6 +52,7 @@ def duke(
     showmatches=False,
     keepfiles=False,
     showoutput=False,
+    threads=1,
 ):
     """
     Run duke in different modes (Deduplication or Record Linkage Mode) to
@@ -119,6 +120,7 @@ def duke(
             "-Dfile.encoding=UTF-8",
             "no.priv.garshol.duke.Duke",
             "--linkfile=linkfile.txt",
+            f"--threads={threads}",
         ]
         if singlematch:
             args.append("--singlematch")
diff --git a/powerplantmatching/heuristics.py b/powerplantmatching/heuristics.py
index b40ec2af..011b724f 100644
--- a/powerplantmatching/heuristics.py
+++ b/powerplantmatching/heuristics.py
@@ -66,6 +66,8 @@ def extend_by_non_matched(
     if config is None:
         config = get_config()
 
+    threads = config.get("threads_extend_by_non_matched", 1)
+
     if isinstance(extend_by, str):
         label = extend_by
         extend_by = getattr(data, extend_by)(config=config)
@@ -82,7 +84,7 @@ def extend_by_non_matched(
 
     if aggregate_added_data and not extend_by.empty:
         extend_by = aggregate_units(
-            extend_by, dataset_name=label, config=config, **aggkwargs
+            extend_by, dataset_name=label, config=config, threads=threads, **aggkwargs
         )
         extend_by["projectID"] = extend_by.projectID.map(lambda x: {label: x})
     else:
diff --git a/powerplantmatching/package_data/config.yaml b/powerplantmatching/package_data/config.yaml
index dc1d09cc..3892d9e4 100644
--- a/powerplantmatching/package_data/config.yaml
+++ b/powerplantmatching/package_data/config.yaml
@@ -39,8 +39,8 @@ fully_included_sources:
   - GND
 
 
-parallel_duke_processes: true
-process_limit: 14
+parallel_duke_processes: 16
+threads_extend_by_non_matched: 16
 matched_data_url: https://raw.githubusercontent.com/PyPSA/powerplantmatching/{tag}/powerplants.csv
 
 # ---------------------------------------------------------------------------- #
diff --git a/powerplantmatching/utils.py b/powerplantmatching/utils.py
index 646c61ea..67570a6a 100644
--- a/powerplantmatching/utils.py
+++ b/powerplantmatching/utils.py
@@ -343,7 +343,7 @@ def fun(f, q_in, q_out):
         q_out.put((i, f(x)))
 
 
-def parmap(f, arg_list, config=None):
+def parmap(f, arg_list, config=None, threads=None):
     """
     Parallel mapping function. Use this function to parallelly map function
     f onto arguments in arg_list. The maximum number of parallel threads is
@@ -356,11 +356,21 @@ def parmap(f, arg_list, config=None):
         python function with one argument
     arg_list : list
         list of arguments mapped to f
+    config : dict, default None
+        configuration dictionary
+    threads : int, default None
+        number of parallel threads
     """
     if config is None:
         config = get_config()
-    if config["parallel_duke_processes"]:
-        nprocs = min(multiprocessing.cpu_count(), config["process_limit"])
+
+    if threads is None:
+        threads = config["parallel_duke_processes"]
+    if isinstance(threads, bool):
+        threads = config.get("process_limit", 1)
+
+    if threads > 1:
+        nprocs = min(multiprocessing.cpu_count(), threads)
         logger.info(f"Run process with {nprocs} parallel threads.")
         q_in = multiprocessing.Queue(1)
         q_out = multiprocessing.Queue()

From 55a6b76de6b84bdfa17c91cc74ecf4bed14a02e9 Mon Sep 17 00:00:00 2001
From: Fabian Neumann <fabian.neumann@outlook.de>
Date: Fri, 22 Aug 2025 19:56:24 +0200
Subject: [PATCH 49/68] keep blocks for selected fueltypes and option to
 aggretage only matching sources

---
 powerplantmatching/cleaning.py              | 70 +++++++++++++++++++--
 powerplantmatching/data.py                  | 37 +++++------
 powerplantmatching/package_data/config.yaml | 16 ++---
 3 files changed, 92 insertions(+), 31 deletions(-)

diff --git a/powerplantmatching/cleaning.py b/powerplantmatching/cleaning.py
index 0bb89099..2626db91 100644
--- a/powerplantmatching/cleaning.py
+++ b/powerplantmatching/cleaning.py
@@ -87,20 +87,63 @@ def clean_name(df, config=None):
 
     name = df.Name.astype(str).copy().apply(unidecode.unidecode)
 
+    roman_to_arabic = {
+        "I": "1",
+        "II": "2",
+        "III": "3",
+        "IV": "4",
+        "V": "5",
+        "VI": "6",
+        "VII": "7",
+        "VIII": "8",
+        "IX": "9",
+        "X": "10",
+        "XI": "11",
+    }
+    for roman, arabic in roman_to_arabic.items():
+        name = name.str.replace(rf"\b{roman}\b", arabic, regex=True)
+
     replace = config["clean_name"]["replace"]
     replace.setdefault("", [])
 
+    keep_blocks = config["clean_name"].get("fueltypes_with_blocks", [])
+    if len(keep_blocks) > 0:
+        mask = df.Fueltype.isin(keep_blocks)
+
     for key, pattern in replace.items():
         if config["clean_name"]["remove_common_words"] and (key == ""):
             common_words = pd.Series(sum(name.str.split(), [])).value_counts()
             common_words = list(common_words[common_words >= 20].index)
             pattern += common_words
-        if isinstance(pattern, list):
-            # if pattern is a list, concat all entries in a case-insensitive regex
+
+        pattern = np.atleast_1d(pattern)
+
+        # do not remove block numbers for fuel types with blocks
+        if len(keep_blocks) > 0 and key == " " and "[^a-zA-Z]" in pattern:
+            base = [rf"\b{p}\b" for p in pattern if p != "[^a-zA-Z]"]
+            pattern_keep = r"(?i)" + "|".join(base + [r"[^a-zA-Z0-9]"])
+            pattern_default = r"(?i)" + "|".join(base + [r"[^a-zA-Z]"])
+            name.loc[mask] = name.loc[mask].str.replace(pattern_keep, key, regex=True)
+            name.loc[~mask] = name.loc[~mask].str.replace(
+                pattern_default, key, regex=True
+            )
+
+        # do not remove block letters for fuel types with blocks
+        elif key == "" and "\w" in pattern:
+            pattern_keep = r"(?i)" + "|".join(
+                [rf"\b{p}\b" for p in pattern if p != "\w"]
+            )
+            pattern_default = r"(?i)" + "|".join(
+                [rf"\b{p}\b" for p in pattern if p != "\w"]
+            )
+            name.loc[mask] = name.loc[mask].str.replace(pattern_keep, key, regex=True)
+            name.loc[~mask] = name.loc[~mask].str.replace(
+                pattern_default, key, regex=True
+            )
+
+        else:
             pattern = r"(?i)" + "|".join([rf"\b{p}\b" for p in pattern])
-        elif not isinstance(pattern, str):
-            raise ValueError(f"Pattern must be string or list, not {type(pattern)}")
-        name = name.str.replace(pattern, key, regex=True)
+            name = name.str.replace(pattern, key, regex=True)
 
     if config["clean_name"]["remove_duplicated_words"]:
         name = name.str.replace(r"\b(\w+)(?:\W\1\b)+", r"\1", regex=True, case=False)
@@ -445,13 +488,30 @@ def aggregate_units(
         df = clean_name(df)
 
     logger.info(f"Aggregating blocks in data source '{ds_name}'.")
+    agg_query = None
+    if ds_name in config.get("aggregate_only_matching_sources", []):
+        for source in config["matching_sources"]:
+            if isinstance(source, dict) and ds_name in source:
+                query = source[ds_name]
+                break
+
+    block_query = None
+    if with_blocks := config["clean_name"].get("fuel_type_with_blocks", []):  # noqa
+        block_query = "Fueltype in @with_blocks"
 
     if country_wise:
         countries = df.Country.unique()
+        country_query = "Country == @c"
+        query = " and ".join(filter(None, [agg_query, block_query, country_query]))
+        duplicates = pd.concat(
+            [duke(df.query(query), threads=threads) for c in countries]
+        )
         duplicates = pd.concat(
             [duke(df.query("Country == @c"), threads=threads) for c in countries]
         )
     else:
+        query = " and ".join(filter(None, [agg_query, block_query]))
+        duplicates = duke(df.query(query) if query else df, threads=threads)
         duplicates = duke(df, threads=threads)
 
     df = cliques(df, duplicates)
diff --git a/powerplantmatching/data.py b/powerplantmatching/data.py
index 895b7904..9f63bf84 100644
--- a/powerplantmatching/data.py
+++ b/powerplantmatching/data.py
@@ -670,7 +670,6 @@ def WIKIPEDIA(raw=False, update=False, config=None):
 
     df = (
         df.rename(columns=RENAME_COLUMNS)
-        .pipe(clean_name)
         .pipe(convert_to_short_name)
         .assign(
             Fueltype="Nuclear",
@@ -679,6 +678,7 @@ def WIKIPEDIA(raw=False, update=False, config=None):
             # plants which are not yet built are set to 2027
             DateIn=lambda df: df.DateIn.where(~df.Status.str.contains("In Bau"), 2027),
         )
+        .pipe(clean_name)
         .pipe(set_column_name, "WIKIPEDIA")
         .pipe(config_filter, config)
     )
@@ -1700,8 +1700,7 @@ def GBPT(raw=False, update=False, config=None):
 
     df = df.rename(columns=RENAME_COLUMNS)
     df_final = (
-        df.pipe(clean_name)
-        .pipe(set_column_name, "GBPT")
+        df.pipe(set_column_name, "GBPT")
         .pipe(convert_to_short_name)
         .dropna(subset="Capacity")
         .assign(
@@ -1717,6 +1716,7 @@ def GBPT(raw=False, update=False, config=None):
         .pipe(lambda x: x[df.columns.intersection(config.get("target_columns"))])
         .assign(Technology=np.nan)
         .assign(Set=np.nan)
+        .pipe(clean_name)
         .pipe(config_filter, config)
     )
     return df_final
@@ -1739,7 +1739,7 @@ def GNPT(raw=False, update=False, config=None):
     """
     config = get_config() if config is None else config
     fn = get_raw_file("GNPT", update=update, config=config)
-    df = pd.read_excel(fn, sheet_name="Data")
+    df = pd.read_excel(fn, sheet_name="Data", na_values=["--"])
 
     if raw:
         return df
@@ -1759,11 +1759,11 @@ def GNPT(raw=False, update=False, config=None):
 
     df = df.rename(columns=RENAME_COLUMNS)
     df_final = (
-        df.pipe(clean_name)
-        .pipe(set_column_name, "GNPT")
+        df.pipe(set_column_name, "GNPT")
         .pipe(convert_to_short_name)
         .dropna(subset="Capacity")
         .assign(
+            Name=lambda df: df["Name"] + df["Unit Name"].fillna("").apply(lambda x: f" {x}" if x else ""),
             DateIn=df["DateIn"].apply(pd.to_numeric, errors="coerce"),
             DateOut=df["DateOut"].apply(pd.to_numeric, errors="coerce"),
             lat=df["lat"].apply(pd.to_numeric, errors="coerce"),
@@ -1774,6 +1774,7 @@ def GNPT(raw=False, update=False, config=None):
         .assign(Fueltype="Nuclear")
         .assign(Technology="Steam Turbine")
         .assign(Set="PP")
+        .pipe(clean_name)
         .pipe(config_filter, config)
     )
     return df_final
@@ -1848,8 +1849,7 @@ def GCPT(raw=False, update=False, config=None):
 
     df = df.rename(columns=RENAME_COLUMNS)
     df_final = (
-        df.pipe(clean_name)
-        .pipe(set_column_name, "GCPT")
+        df.pipe(set_column_name, "GCPT")
         .pipe(convert_to_short_name)
         .dropna(subset="Capacity")
         .assign(
@@ -1870,6 +1870,7 @@ def GCPT(raw=False, update=False, config=None):
                 {"Fueltype": fueltype_dict, "Technology": technology_dict}
             )
         )
+        .pipe(clean_name)
         .pipe(config_filter, config)
     )
 
@@ -1913,8 +1914,7 @@ def GGTPT(raw=False, update=False, config=None):
 
     df = df.rename(columns=RENAME_COLUMNS)
     df_final = (
-        df.pipe(clean_name)
-        .pipe(set_column_name, "GGTPT")
+        df.pipe(set_column_name, "GGTPT")
         .pipe(convert_to_short_name)
         .dropna(subset="Capacity")
         .assign(
@@ -1928,6 +1928,7 @@ def GGTPT(raw=False, update=False, config=None):
         .assign(Fueltype="Geothermal")
         .assign(Technology="Steam Turbine")
         .assign(Set="PP")
+        .pipe(clean_name)
         .pipe(config_filter, config)
     )
     return df_final
@@ -1980,8 +1981,7 @@ def GWPT(raw=False, update=False, config=None):
 
     df = df.rename(columns=RENAME_COLUMNS)
     df_final = (
-        df.pipe(clean_name)
-        .pipe(set_column_name, "GWPT")
+        df.pipe(set_column_name, "GWPT")
         .pipe(convert_to_short_name)
         .dropna(subset="Capacity")
         .assign(
@@ -1995,6 +1995,7 @@ def GWPT(raw=False, update=False, config=None):
         .pipe(lambda x: x.replace({"Technology": technology_dict}))
         .assign(Fueltype="Wind")
         .assign(Set="PP")
+        .pipe(clean_name)
         .pipe(config_filter, config)
     )
     return df_final
@@ -2047,8 +2048,7 @@ def GSPT(raw=False, update=False, config=None):
 
     df = df.rename(columns=RENAME_COLUMNS)
     df_final = (
-        df.pipe(clean_name)
-        .pipe(set_column_name, "GSPT")
+        df.pipe(set_column_name, "GSPT")
         .pipe(convert_to_short_name)
         .dropna(subset="Capacity")
         .assign(
@@ -2062,6 +2062,7 @@ def GSPT(raw=False, update=False, config=None):
         .pipe(lambda x: x.replace({"Technology": technology_dict}))
         .assign(Fueltype="Solar")
         .assign(Set="PP")
+        .pipe(clean_name)
         .pipe(config_filter, config)
     )
     return df_final
@@ -2144,8 +2145,7 @@ def classify_fuel(s):
     )
 
     df_final = (
-        df.pipe(clean_name)
-        .pipe(set_column_name, "GGPT")
+        df.pipe(set_column_name, "GGPT")
         .pipe(convert_to_short_name)
         .dropna(subset="Capacity")
         .assign(
@@ -2163,6 +2163,7 @@ def classify_fuel(s):
         .pipe(lambda x: x[df.columns.intersection(config.get("target_columns"))])
         .pipe(lambda x: x.replace({"Technology": technology_dict}))
         .pipe(lambda x: x.replace({"Set": set_dict}))
+        .pipe(clean_name)
         .pipe(config_filter, config)
     )
     return df_final
@@ -2214,8 +2215,7 @@ def GHPT(raw=False, update=False, config=None):
     status_list = config["GHPT"].get("status", ["operating"])  # noqa: F841
     df = df.rename(columns=RENAME_COLUMNS)
     df_final = (
-        df.pipe(clean_name)
-        .pipe(set_column_name, "GHPT")
+        df.pipe(set_column_name, "GHPT")
         .pipe(convert_to_short_name)
         .dropna(subset="Capacity")
         .assign(
@@ -2229,6 +2229,7 @@ def GHPT(raw=False, update=False, config=None):
         .pipe(lambda x: x.replace({"Technology": technology_dict}))
         .assign(Fueltype="Hydro")
         .assign(Set="PP")
+        .pipe(clean_name)
         .pipe(config_filter, config)
     )
     return df_final
diff --git a/powerplantmatching/package_data/config.yaml b/powerplantmatching/package_data/config.yaml
index 3892d9e4..447684f3 100644
--- a/powerplantmatching/package_data/config.yaml
+++ b/powerplantmatching/package_data/config.yaml
@@ -30,14 +30,9 @@ matching_sources:
 # fully_included_sources, these sources are included even without match to the final dataset
 fully_included_sources:
   # Make individual queries for the datasets
-  - ENTSOE: (Country not in ['Switzerland', 'Ireland', 'Albania', 'Greece', 'Czech Republic', 'Bulgaria', 'United Kingdom', 'Italy', 'Serbia'] and not (Country == 'Spain' and Fueltype == 'Hydro')) or (Fueltype == 'Geothermal')
-  - JRC: Country not in ['Switzerland', 'Albania', 'United Kingdom', 'Norway']
-  - OPSD: Country not in ['Switzerland', 'Italy', 'Spain', 'Norway', 'Austria']
-  - GEM: not (Country == 'Germany' and Fueltype in ['Solar', 'Wind'])
-  - EESI: Fueltype != 'Hydro' and not (Country == 'Germany' and Fueltype == 'Battery')
+# these sources skip unit aggregation for fully_included_sources not covered in matching_sources
+aggregate_only_matching_sources:
   - MASTR
-  - GND
-
 
 parallel_duke_processes: 16
 threads_extend_by_non_matched: 16
@@ -222,7 +217,7 @@ EESI:
 GND:
   net_capacity: true
   reliability_score: 5
-  status: ["Shutdown", "Operational", "Planned", "Under Construction", "Decommissioning Completed"]
+  status: ["Shutdown", "Operational", "Under Construction", "Decommissioning Completed"]
   url: https://raw.githubusercontent.com/cristianst85/GeoNuclearData/1bc8b4ac106af236902385b87e46c540b4864815/data/csv/denormalized/nuclear_power_plants.csv
   fn: nuclear_power_plants.csv
 
@@ -562,6 +557,8 @@ target_technologies:
    - op zee
    - zeewind
 clean_name:
+  fueltypes_with_blocks:
+    - Nuclear
   remove_common_words: false # remove words which appear more that 20 times in all entries
   remove_duplicated_words: true
   replace:
@@ -586,6 +583,9 @@ clean_name:
       - plant
       - unit
       - kraftwerk
+      - kernkraftwerk
+      - gemeinschaftskernkraftwerk
+      - kernkw
       - kw
       - hkw
       - nuclear

From 153ef70da424a98ef8798dec7a08b8ed5ec60a22 Mon Sep 17 00:00:00 2001
From: Fabian Neumann <fabian.neumann@outlook.de>
Date: Fri, 22 Aug 2025 19:57:02 +0200
Subject: [PATCH 50/68] finetuned source and matching settings

---
 powerplantmatching/package_data/config.yaml | 29 +++++++++++++--------
 1 file changed, 18 insertions(+), 11 deletions(-)

diff --git a/powerplantmatching/package_data/config.yaml b/powerplantmatching/package_data/config.yaml
index 447684f3..9b805d49 100644
--- a/powerplantmatching/package_data/config.yaml
+++ b/powerplantmatching/package_data/config.yaml
@@ -16,20 +16,27 @@ main_query: "Name != '' and (lat >= 30 or lat != lat)"
 matching_sources:
   # Make individual queries for the datasets as done in `fully_included_sources`
   # Queries are combined with `main_query` with an `and` operator
-  - ENTSOE: Fueltype != 'Solar'
-  - GEO: Fueltype != 'Solar'
-  - GPD: Fueltype != 'Solar'
-  - JRC: Fueltype != 'Solar'
-  - OPSD: Country != "Spain" and Fueltype not in ['Hard Coal', 'Solar']
-  - BEYONDCOAL: Fueltype != 'Solar'
-  - GEM
-  - MASTR
-  - EESI: Fueltype != 'Solar'
-  - GND: Fueltype != 'Solar'
+  - ENTSOE: not (Country == 'Germany' and Fueltype  == 'Wind') # wind is per turbine rather than park in MASTR and unsuitable for matching
+  - GEO: Capacity >= 1 and not (Country == 'Germany' and Fueltype  == 'Wind') and Fueltype not in ['Oil', 'Nuclear'] and not (Country in ['Bulgaria', 'Slovakia'] and Fueltype == 'Hard Coal')
+  - GPD: Capacity >= 1 and not (Country == 'Germany' and Fueltype  == 'Wind') and not (Country in ['Czechia', 'Bulgaria', 'Romania'] and Fueltype == 'Hard Coal') and Fueltype != 'Nuclear'
+  - JRC: Capacity >= 1 and not (Country == 'Germany' and Fueltype  == 'Wind') # do not match small hydro
+  - OPSD: not (Country == 'Germany' and Fueltype  == 'Wind') and ((Capacity >= 1 and Fueltype != 'Solar') or Capacity >= 3) and not (Country == 'Spain' and Fueltype == 'Hard Coal') and not (Country == 'Italy' and Fueltype == 'Natural Gas')
+  - BEYONDCOAL
+  - GEM: Capacity >= 3 and not (Country == 'Germany' and Fueltype  == 'Wind')
+  # do not match units below 1 MW (2 MW for biogas, 3 MW for solar), exclude wind in Germany from any matching
+  - MASTR: (Fueltype != 'Wind') and ((Fueltype == 'Solar' and Capacity >= 3) or (Fueltype == 'Biogas' and Capacity >= 2) or (Fueltype not in ['Solar', 'Biogas'] and Capacity >= 1))
+  - EESI
 
-# fully_included_sources, these sources are included even without match to the final dataset
+# # fully_included_sources, these sources are included even without match to the final dataset
 fully_included_sources:
   # Make individual queries for the datasets
+  - GEM: not (Country == 'Germany' and Fueltype in ['Solar', 'Wind']) # wind and solar in Germany is covered by MASTR
+  - EESI: Fueltype != 'Hydro' and not (Country == 'Germany' and Fueltype == 'Battery') # battery in Germany is covered by MASTR
+  - MASTR: Capacity >= 0.1 and Fueltype != 'Nuclear'
+  - OPSD: Country != 'Germany' and Capacity < 1 and Capacity >= 0.1 and Fueltype == 'Hydro'# take small hydro outside Germany from OPSD (highest coverage)
+  - BEYONDCOAL
+  - JRC: Country in ['Italy', 'Croatia', 'Serbia', 'Slovakia']
+
 # these sources skip unit aggregation for fully_included_sources not covered in matching_sources
 aggregate_only_matching_sources:
   - MASTR

From b7066d4a3a2fd6f3d731b34cefffdea52cea8199 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 22 Aug 2025 17:57:19 +0000
Subject: [PATCH 51/68] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 powerplantmatching/data.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/powerplantmatching/data.py b/powerplantmatching/data.py
index 9f63bf84..83bc62d7 100644
--- a/powerplantmatching/data.py
+++ b/powerplantmatching/data.py
@@ -1763,7 +1763,8 @@ def GNPT(raw=False, update=False, config=None):
         .pipe(convert_to_short_name)
         .dropna(subset="Capacity")
         .assign(
-            Name=lambda df: df["Name"] + df["Unit Name"].fillna("").apply(lambda x: f" {x}" if x else ""),
+            Name=lambda df: df["Name"]
+            + df["Unit Name"].fillna("").apply(lambda x: f" {x}" if x else ""),
             DateIn=df["DateIn"].apply(pd.to_numeric, errors="coerce"),
             DateOut=df["DateOut"].apply(pd.to_numeric, errors="coerce"),
             lat=df["lat"].apply(pd.to_numeric, errors="coerce"),

From d446aba70b9b4fe382a63afc8f324a2d7bb76d7e Mon Sep 17 00:00:00 2001
From: Fabian Neumann <fabian.neumann@outlook.de>
Date: Fri, 22 Aug 2025 20:02:29 +0200
Subject: [PATCH 52/68] fix typo

---
 powerplantmatching/cleaning.py              | 2 +-
 powerplantmatching/package_data/config.yaml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/powerplantmatching/cleaning.py b/powerplantmatching/cleaning.py
index 2626db91..01ec1aa5 100644
--- a/powerplantmatching/cleaning.py
+++ b/powerplantmatching/cleaning.py
@@ -492,7 +492,7 @@ def aggregate_units(
     if ds_name in config.get("aggregate_only_matching_sources", []):
         for source in config["matching_sources"]:
             if isinstance(source, dict) and ds_name in source:
-                query = source[ds_name]
+                agg_query = source[ds_name]
                 break
 
     block_query = None
diff --git a/powerplantmatching/package_data/config.yaml b/powerplantmatching/package_data/config.yaml
index 9b805d49..ec710b36 100644
--- a/powerplantmatching/package_data/config.yaml
+++ b/powerplantmatching/package_data/config.yaml
@@ -33,7 +33,7 @@ fully_included_sources:
   - GEM: not (Country == 'Germany' and Fueltype in ['Solar', 'Wind']) # wind and solar in Germany is covered by MASTR
   - EESI: Fueltype != 'Hydro' and not (Country == 'Germany' and Fueltype == 'Battery') # battery in Germany is covered by MASTR
   - MASTR: Capacity >= 0.1 and Fueltype != 'Nuclear'
-  - OPSD: Country != 'Germany' and Capacity < 1 and Capacity >= 0.1 and Fueltype == 'Hydro'# take small hydro outside Germany from OPSD (highest coverage)
+  - OPSD: Country != 'Germany' and Capacity < 1 and Capacity >= 0.1 and Fueltype == 'Hydro' # take small hydro outside Germany from OPSD (highest coverage)
   - BEYONDCOAL
   - JRC: Country in ['Italy', 'Croatia', 'Serbia', 'Slovakia']
 

From a3335ea1dea8b4132ca8b73ab75b58f9b1fce107 Mon Sep 17 00:00:00 2001
From: Fabian Neumann <fabian.neumann@outlook.de>
Date: Fri, 22 Aug 2025 20:04:25 +0200
Subject: [PATCH 53/68] fix another typo

---
 powerplantmatching/cleaning.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/powerplantmatching/cleaning.py b/powerplantmatching/cleaning.py
index 01ec1aa5..b49a1b3a 100644
--- a/powerplantmatching/cleaning.py
+++ b/powerplantmatching/cleaning.py
@@ -506,13 +506,9 @@ def aggregate_units(
         duplicates = pd.concat(
             [duke(df.query(query), threads=threads) for c in countries]
         )
-        duplicates = pd.concat(
-            [duke(df.query("Country == @c"), threads=threads) for c in countries]
-        )
     else:
         query = " and ".join(filter(None, [agg_query, block_query]))
         duplicates = duke(df.query(query) if query else df, threads=threads)
-        duplicates = duke(df, threads=threads)
 
     df = cliques(df, duplicates)
     df = df.groupby("grouped").agg(props_for_groups)

From 94264f25d78740c349bbf6e1407fc2a7177f1ca9 Mon Sep 17 00:00:00 2001
From: Fabian Neumann <fabian.neumann@outlook.de>
Date: Fri, 22 Aug 2025 20:14:23 +0200
Subject: [PATCH 54/68] amend release notes

---
 doc/release-notes.rst | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/doc/release-notes.rst b/doc/release-notes.rst
index e2c2de5f..c5748c13 100644
--- a/doc/release-notes.rst
+++ b/doc/release-notes.rst
@@ -24,6 +24,12 @@ Upcoming Version
 
 * Include mothballed gas, oil and coal power plants.
 
+* Added option to retain blocks for subsets of fuel types (e.g. `clean_name: fueltypes_with_blocks: ['Nuclear']`).
+
+* For fully included datasets, add option to only aggregate units included in the matching process (e.g. `aggregate_only_matching_sources: ['MASTR']`).
+
+* Added option for multiprocessing when aggregating units of non-matched power plants (e.g. `threads_extend_by_non_matched: 16`).
+
 * Updating matching logic configuration.
 
 

From 07cc23cb5881785d79c259a308179e416dfae735 Mon Sep 17 00:00:00 2001
From: Fabian Neumann <fabian.neumann@outlook.de>
Date: Sun, 24 Aug 2025 11:05:30 +0200
Subject: [PATCH 55/68] remove zero values from summed non-weighted numeric
 columns in aggregate_units function

---
 powerplantmatching/cleaning.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/powerplantmatching/cleaning.py b/powerplantmatching/cleaning.py
index b49a1b3a..c5221015 100644
--- a/powerplantmatching/cleaning.py
+++ b/powerplantmatching/cleaning.py
@@ -526,4 +526,9 @@ def aggregate_units(
         .reindex(columns=cols)
         .pipe(set_column_name, ds_name)
     )
+
+    # Remove zero values from summed non-weighted numeric columns
+    numeric_cols = df.select_dtypes(include="number").columns
+    df[numeric_cols] = df[numeric_cols].where(lambda df: df != 0)
+
     return df

From e7cbaedbcd8ad72c185d58f886387c038d9c9e63 Mon Sep 17 00:00:00 2001
From: Fabian Neumann <fabian.neumann@outlook.de>
Date: Sun, 24 Aug 2025 11:28:04 +0200
Subject: [PATCH 56/68] add GloHydroRES dataset

---
 powerplantmatching/cleaning.py              | 11 +++-
 powerplantmatching/data.py                  | 71 +++++++++++++++++++++
 powerplantmatching/package_data/config.yaml | 19 ++++++
 3 files changed, 99 insertions(+), 2 deletions(-)

diff --git a/powerplantmatching/cleaning.py b/powerplantmatching/cleaning.py
index c5221015..9b0960de 100644
--- a/powerplantmatching/cleaning.py
+++ b/powerplantmatching/cleaning.py
@@ -146,8 +146,15 @@ def clean_name(df, config=None):
             name = name.str.replace(pattern, key, regex=True)
 
     if config["clean_name"]["remove_duplicated_words"]:
-        name = name.str.replace(r"\b(\w+)(?:\W\1\b)+", r"\1", regex=True, case=False)
-    name = name.str.strip().str.title().str.replace(r" +", " ", regex=True)
+        name = (
+            name.str.replace(r"\b(\w+)(?:\W\1\b)+", r"\1", regex=True, case=False)
+            .str.strip()
+            .str.replace(r" +", " ", regex=True)
+            .str.title()
+            .str.replace(r"\b(\w+)(?:\W\1\b)+", r"\1", regex=True, case=False)
+        )
+    else:
+        name = name.str.strip().str.title().str.replace(r" +", " ", regex=True)
 
     return df.assign(Name=name).sort_values("Name")
 
diff --git a/powerplantmatching/data.py b/powerplantmatching/data.py
index 83bc62d7..b8d9118c 100644
--- a/powerplantmatching/data.py
+++ b/powerplantmatching/data.py
@@ -2685,6 +2685,77 @@ def GND(
     return df_final
 
 
+def GHR(
+    raw=False,
+    update=False,
+    config=None,
+):
+    """
+    Get the GloHydroRes (GHR) dataset.
+
+    https://www.nature.com/articles/s41597-025-04975-0
+
+    https://zenodo.org/records/14526360
+
+    Parameters
+    ----------
+    raw : Boolean, default False
+        Whether to return the original dataset
+    update: bool, default False
+        Whether to update the data from the url.
+    config : dict, default None
+        Add custom specific configuration, e.g.
+        powerplantmatching.config.get_config(target_countries='Italy'), defaults
+        to powerplantmatching.config.get_config()
+    """
+
+    config = get_config() if config is None else config
+
+    fn = get_raw_file("GHR", update=update, config=config)
+
+    df = pd.read_csv(fn)
+
+    if raw:
+        return df
+
+    RENAME_COLUMNS = {
+        "ID": "projectID",
+        "name": "Name",
+        "country": "Country",
+        "Latitude": "plant_lat",
+        "Longitude": "plant_lon",
+        "plant_type": "Technology",
+        "dam_height_m": "DamHeight_m",
+        "year": "DateIn",
+    }
+    TECHNOLOGY_MAP = {
+        "STO": "Reservoir",
+        "RTO": "Run-Of-River",
+        "PHS": "Pumped Hydro",
+        "canal": np.nan,
+    }
+
+    df_final = (
+        df.rename(columns=RENAME_COLUMNS)
+        .assign(
+            projectID=lambda df: "GHR-" + df.projectID.astype(str),
+            Name=lambda df: df.Name.str.split(" - ").str[0].combine_first(df.dam_name),
+            DateIn=lambda df: pd.to_datetime(df.DateIn).dt.year,
+            Technology=lambda df: df.Technology.map(TECHNOLOGY_MAP),
+            Volume_Mm3=lambda df: df.res_vol_km3 * 1e3,
+            # StorageCapacity_MWh=lambda df: 9.81 * df.dam_height_m * df.Volume_Mm3 * 0.9 / 3.6,
+            # Duration=lambda df: df.StorageCapacity_MWh / df.Capacity,
+            Set="PP",
+            Fueltype="Hydro",
+        )
+        .pipe(clean_name)
+        .pipe(set_column_name, "GHR")
+        .pipe(config_filter, config)
+    )
+
+    return df_final
+
+
 def EXTERNAL_DATABASE(raw=False, update=True, config=None):
     """
     Importer for external custom databases.
diff --git a/powerplantmatching/package_data/config.yaml b/powerplantmatching/package_data/config.yaml
index ec710b36..4e9515c2 100644
--- a/powerplantmatching/package_data/config.yaml
+++ b/powerplantmatching/package_data/config.yaml
@@ -26,6 +26,7 @@ matching_sources:
   # do not match units below 1 MW (2 MW for biogas, 3 MW for solar), exclude wind in Germany from any matching
   - MASTR: (Fueltype != 'Wind') and ((Fueltype == 'Solar' and Capacity >= 3) or (Fueltype == 'Biogas' and Capacity >= 2) or (Fueltype not in ['Solar', 'Biogas'] and Capacity >= 1))
   - EESI
+  - GHR
 
 # # fully_included_sources, these sources are included even without match to the final dataset
 fully_included_sources:
@@ -227,6 +228,10 @@ GND:
   status: ["Shutdown", "Operational", "Under Construction", "Decommissioning Completed"]
   url: https://raw.githubusercontent.com/cristianst85/GeoNuclearData/1bc8b4ac106af236902385b87e46c540b4864815/data/csv/denormalized/nuclear_power_plants.csv
   fn: nuclear_power_plants.csv
+GHR:
+  reliability_score: 4
+  fn: GloHydroRes_vs1.csv
+  url: https://zenodo.org/records/14526360/files/GloHydroRes_vs1.csv
 
 # ---------------------------------------------------------------------------- #
 #                             Data Structure Config                            #
@@ -591,16 +596,23 @@ clean_name:
       - unit
       - kraftwerk
       - kernkraftwerk
+      - wehrkraftwerk
+      - rheinkraftwerk
       - gemeinschaftskernkraftwerk
       - kernkw
       - kw
       - hkw
       - nuclear
+      - hydro
       - thermal
       - heizkraftwerk
       - eolico
       - project
       - hydroelectric
+      - hydropower
+      - hydroelectrique
+      - hydraulique
+      - embassament
       - pumped
       - storage
       - france
@@ -617,6 +629,13 @@ clean_name:
       - bosnia
       - and
       - herzegovina
+      - bulgaria
+      - generating
+      - romania
+      - macedonia
+      - latvia
+      - lithuania
+      - hungary
       - \w #remove single letters
     "ss": "ß"
 

From e23667415b165b3e891b543e37e9bc92c0fae84c Mon Sep 17 00:00:00 2001
From: Fabian Neumann <fabian.neumann@outlook.de>
Date: Sun, 24 Aug 2025 11:50:08 +0200
Subject: [PATCH 57/68] amend realease notes

---
 doc/release-notes.rst | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/doc/release-notes.rst b/doc/release-notes.rst
index c5748c13..5b9dbbce 100644
--- a/doc/release-notes.rst
+++ b/doc/release-notes.rst
@@ -12,6 +12,8 @@ Upcoming Version
 
 * Added [European Energy Storage Inventory](https://ses.jrc.ec.europa.eu/storage-inventory-maps) dataset as `pm.data.EESI()`.
 
+* Added [GloHydroRES](https://zenodo.org/records/14526360) dataset as `pm.data.GHR()`.
+
 * Updated ENTSOE, BEYONDCOAL, JRC, IRENASTAT and the Global Energy Monitor datasets to the latest versions.
 
 * Fix in `pm.data.MASTR()` the distinction of hydro technologies and between offshore and onshore wind. Also read in storage technologies.

From e22267895f0d4eda921a78d47c16c9252567454c Mon Sep 17 00:00:00 2001
From: Fabian Neumann <fabian.neumann@outlook.de>
Date: Sun, 24 Aug 2025 11:50:55 +0200
Subject: [PATCH 58/68] add unit name in GCPT

---
 powerplantmatching/data.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/powerplantmatching/data.py b/powerplantmatching/data.py
index b8d9118c..12023401 100644
--- a/powerplantmatching/data.py
+++ b/powerplantmatching/data.py
@@ -1799,7 +1799,7 @@ def GCPT(raw=False, update=False, config=None):
 
     config = get_config() if config is None else config
     fn = get_raw_file("GCPT", update=update, config=config)
-    df = pd.read_excel(fn, sheet_name="Units", na_values=["not found"])
+    df = pd.read_excel(fn, sheet_name="Units", na_values=["not found", "-"])
 
     if raw:
         return df
@@ -1854,6 +1854,8 @@ def GCPT(raw=False, update=False, config=None):
         .pipe(convert_to_short_name)
         .dropna(subset="Capacity")
         .assign(
+            Name=lambda df: df["Name"]
+            + df["Unit Name"].fillna("").apply(lambda x: f" {x}" if x else ""),
             DateIn=df["DateIn"].apply(pd.to_numeric, errors="coerce"),
             DateOut=df["DateOut"]
             .apply(pd.to_numeric, errors="coerce")

From 60de3dcc10346a7bac0eedc24a34018e6296ca20 Mon Sep 17 00:00:00 2001
From: Fabian Neumann <fabian.neumann@outlook.de>
Date: Sun, 24 Aug 2025 14:33:49 +0200
Subject: [PATCH 59/68] report more unit names

---
 doc/release-notes.rst                       |  2 ++
 powerplantmatching/data.py                  | 16 +++++++++++++++-
 powerplantmatching/package_data/config.yaml |  1 +
 3 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/doc/release-notes.rst b/doc/release-notes.rst
index 5b9dbbce..7adc6971 100644
--- a/doc/release-notes.rst
+++ b/doc/release-notes.rst
@@ -26,6 +26,8 @@ Upcoming Version
 
 * Include mothballed gas, oil and coal power plants.
 
+* Initially, include unit/block name in power plant name before matching.
+
 * Added option to retain blocks for subsets of fuel types (e.g. `clean_name: fueltypes_with_blocks: ['Nuclear']`).
 
 * For fully included datasets, add option to only aggregate units included in the matching process (e.g. `aggregate_only_matching_sources: ['MASTR']`).
diff --git a/powerplantmatching/data.py b/powerplantmatching/data.py
index 12023401..76821a6d 100644
--- a/powerplantmatching/data.py
+++ b/powerplantmatching/data.py
@@ -329,6 +329,9 @@ def to_year(ds):
 
     res = units.join(ppl.set_index("projectID"), "projectID", rsuffix="_ppl")
     res["DateIn"] = res.DateIn.fillna(res.DateIn_ppl)
+    res["Name"] = res.Name + res["Unit_Nbr"].fillna("").apply(
+        lambda x: f" {x}" if x else ""
+    )
     not_included_ppl = ppl.query("projectID not in @res.projectID")
     res = pd.concat([res, not_included_ppl]).pipe(set_column_name, "GEO")
     res = scale_to_net_capacities(res)
@@ -1855,7 +1858,7 @@ def GCPT(raw=False, update=False, config=None):
         .dropna(subset="Capacity")
         .assign(
             Name=lambda df: df["Name"]
-            + df["Unit Name"].fillna("").apply(lambda x: f" {x}" if x else ""),
+            + df["Unit name"].fillna("").apply(lambda x: f" {x}" if x else ""),
             DateIn=df["DateIn"].apply(pd.to_numeric, errors="coerce"),
             DateOut=df["DateOut"]
             .apply(pd.to_numeric, errors="coerce")
@@ -2295,6 +2298,7 @@ def MASTR(
         "EinheitBetriebsstatus": "Status",
         "Laengengrad": "lon",
         "Breitengrad": "lat",
+        "WEIC": "EIC",
     }
     COUNTRY_MAP = {
         "Deutschland": "Germany",
@@ -2308,6 +2312,7 @@ def MASTR(
         "Energietraeger",
         "Hauptbrennstoff",
         "NameStromerzeugungseinheit",
+        "NameKraftwerksblock",
         "NameWindpark",
         "Technologie",
     ]
@@ -2499,6 +2504,15 @@ def MASTR(
     ).index
     df_processed.loc[mask, "Set"] = "PP"
 
+    df_processed["Name"] = df_processed.apply(
+        lambda x: f"{x.Name} {x.NameKraftwerksblock.replace(x.Name, '').strip()}"
+        if x.NameKraftwerksblock
+        and x.NameKraftwerksblock != x.Name
+        and x.Fueltype in config["clean_name"]["fueltypes_with_blocks"]
+        else x.Name,
+        axis=1,
+    )
+
     df_final = (
         df_processed.pipe(clean_name)
         .pipe(set_column_name, "MASTR")
diff --git a/powerplantmatching/package_data/config.yaml b/powerplantmatching/package_data/config.yaml
index 4e9515c2..5f403784 100644
--- a/powerplantmatching/package_data/config.yaml
+++ b/powerplantmatching/package_data/config.yaml
@@ -594,6 +594,7 @@ clean_name:
       - power
       - plant
       - unit
+      - block
       - kraftwerk
       - kernkraftwerk
       - wehrkraftwerk

From 8a8d7445b7472275a0a1d71906f2699ff3a7f38b Mon Sep 17 00:00:00 2001
From: Fabian Neumann <fabian.neumann@outlook.de>
Date: Sun, 24 Aug 2025 14:34:28 +0200
Subject: [PATCH 60/68] move BEYONDCOAL to unit-level dataset

---
 powerplantmatching/data.py                  | 102 +++++++++-----------
 powerplantmatching/package_data/config.yaml |   4 +-
 2 files changed, 50 insertions(+), 56 deletions(-)

diff --git a/powerplantmatching/data.py b/powerplantmatching/data.py
index 76821a6d..d1949301 100644
--- a/powerplantmatching/data.py
+++ b/powerplantmatching/data.py
@@ -69,73 +69,67 @@ def BEYONDCOAL(raw=False, update=False, config=None):
     config = get_config() if config is None else config
 
     fn = get_raw_file("BEYONDCOAL", update=update, config=config)
-    df = pd.read_excel(fn, sheet_name="Plant", header=0, skiprows=[0, 2, 3])
-    df.set_index("BFF plant ID", drop=False, inplace=True)
+    df = pd.read_excel(
+        fn, sheet_name="Unit", header=0, skiprows=[0, 2, 3], na_values=["unknown"]
+    )
+
+    df_plant = pd.read_excel(
+        fn,
+        sheet_name="Plant",
+        header=0,
+        skiprows=[0, 2, 3],
+        usecols=["BFF plant ID", "Latitude", "Longitude"],
+    ).set_index("BFF plant ID")
+
+    df["lat"] = df["BFF plant ID"].map(df_plant.Latitude)
+    df["lon"] = df["BFF plant ID"].map(df_plant.Longitude)
 
     if raw:
         return df
 
-    status_list = config["BEYONDCOAL"].get("status", ["Open"])  # noqa
-
-    df_units = pd.read_excel(fn, sheet_name="Unit", header=0, skiprows=[0, 2, 3])
+    status_list = config["BEYONDCOAL"].get("status", ["operational"])  # noqa
 
     RENAME_COLUMNS = {
-        "Plant name": "Name",
+        "Unit name": "Name",
         "Fuel type": "Fueltype",
-        "Latitude": "lat",
-        "Longitude": "lon",
-        "Commissioning year of first unit": "DateIn",
-        "(Announced) Retirement year of last unit": "DateOut",
-        "Coal capacity open": "Capacity",
-        "Plant status\n(gross)": "status",
-        "BFF plant ID": "projectID",
+        "Commissioning year": "DateIn",
+        "Unit status\n(detailed)": "status",
+        "BFF unit ID": "projectID",
     }
 
-    phaseout_col = "Covered by country phase-out? [if yes: country phase-out year]"
-    df_units[phaseout_col] = pd.to_numeric(df_units[phaseout_col], errors="coerce")
-    unit_phaseout = df_units.groupby("BFF plant ID")[phaseout_col].max()
-
-    # plant-level does not contain CHP information
-    def get_dominant_type(group):
-        type_capacity = group.groupby("Unit type")["Capacity"].sum()
-        return (
-            "CHP"
-            if type_capacity.get("chp", 0) > type_capacity.get("conventional", 0)
-            else "PP"
-        )
-
-    unit_set = df_units.groupby("BFF plant ID").apply(
-        get_dominant_type, include_groups=False
-    )
-
-    # for retired plants
-    unit_capacity = df_units.groupby("BFF plant ID").Capacity.sum()
+    SET_MAP = {
+        "chp": "CHP",
+        "conventional": "PP",
+        "industrial": "CHP",
+        "heat": "CHP",
+    }
 
     with pd.option_context("future.no_silent_downcasting", True):
-        df = (
-            df.rename(columns=RENAME_COLUMNS)
-            .query("status in @status_list")
-            .assign(
-                DateOut=lambda df: df.rename(columns=RENAME_COLUMNS)
-                .DateOut.replace({"After 2030": np.nan, "By 2030": 2030})
-                .astype(float)
-                .combine_first(unit_phaseout),
-                projectID=lambda df: "BEYOND-" + df.projectID,
-                Fueltype=lambda df: df.Fueltype.str.title(),
-                Set=unit_set,
-                Technology=np.nan,
-                Capacity=lambda df: df.Capacity.add(
-                    df["Coal capacity under construction"], fill_value=0
-                ).combine_first(unit_capacity),
-            )
-            .pipe(scale_to_net_capacities)
-            .pipe(clean_name)
-            .pipe(convert_to_short_name)
-            .pipe(set_column_name, "BEYONDCOAL")
-            .pipe(config_filter, config)
+        phaseout_col = "Covered by country phase-out? [if yes: country phase-out year]"
+        date_out = (
+            df["(Announced) Retirement year"]
+            .replace({"After 2030": np.nan, "By 2030": 2030})
+            .astype(float)
+            .combine_first(pd.to_numeric(df[phaseout_col], errors="coerce"))
         )
 
-    return df
+    df_final = (
+        df.rename(columns=RENAME_COLUMNS)
+        .query("status in @status_list")
+        .assign(
+            DateOut=date_out,
+            projectID=lambda df: "BEYOND-" + df.projectID,
+            Fueltype=lambda df: df.Fueltype.str.title(),
+            Set=lambda df: df["Unit type"].map(SET_MAP),
+            Technology=np.nan,
+        )
+        .pipe(clean_name)
+        .pipe(convert_to_short_name)
+        .pipe(set_column_name, "BEYONDCOAL")
+        .pipe(config_filter, config)
+    )
+
+    return df_final
 
 
 def OPSD(
diff --git a/powerplantmatching/package_data/config.yaml b/powerplantmatching/package_data/config.yaml
index 5f403784..42c12e0f 100644
--- a/powerplantmatching/package_data/config.yaml
+++ b/powerplantmatching/package_data/config.yaml
@@ -58,9 +58,9 @@ BNETZA:
   url: https://www.bundesnetzagentur.de/SharedDocs/Downloads/DE/Sachgebiete/Energie/Unternehmen_Institutionen/Versorgungssicherheit/Erzeugungskapazitaeten/Kraftwerksliste/Kraftwerksliste_2019_1.xlsx;jsessionid=17E419F28D025C7DD9FC6E2BEB3D088F?__blob=publicationFile&v=2
 BEYONDCOAL:
   net_capacity: false
-  aggregated_units: true
+  aggregated_units: false
   reliability_score: 4
-  status: ["Construction", "Open", "Planned", "Retired"]
+  status: ["construction", "operational", "no longer coal", "retired", "standby", "deactivated", "retrofitting"]
   fn: 2025-07-24-BeyondFossilFuels-Europe_Coal_Plants_Database.xlsx
   url: https://beyondfossilfuels.org/wp-content/uploads/2025/07/2025-07-24-BeyondFossilFuels-Europe_Coal_Plants_Database.xlsx
 IRENA:

From 2afa44ae7073258b12fce3324dff36e683cae866 Mon Sep 17 00:00:00 2001
From: Fabian Neumann <fabian.neumann@outlook.de>
Date: Mon, 25 Aug 2025 17:50:34 +0200
Subject: [PATCH 61/68] Update powerplantmatching/cleaning.py

Co-authored-by: Johannes HAMPP <42553970+euronion@users.noreply.github.com>
---
 powerplantmatching/cleaning.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/powerplantmatching/cleaning.py b/powerplantmatching/cleaning.py
index 9b0960de..192f884f 100644
--- a/powerplantmatching/cleaning.py
+++ b/powerplantmatching/cleaning.py
@@ -399,7 +399,7 @@ def clean_technology(df, generalize_hydros=False):
         "Nanicl": "NaNiCl",
         "Caes": "CAES",
     }
-    tech = tech.replace(ABBREVIATIONS, regex=True)
+    tech = tech.replace(ABBREVIATIONS, regex=False)
     return df.assign(Technology=tech)
 
 

From b432462543a3f9de30d66abab724c6d8871d6d28 Mon Sep 17 00:00:00 2001
From: Fabian Neumann <fabian.neumann@outlook.de>
Date: Mon, 25 Aug 2025 18:11:40 +0200
Subject: [PATCH 62/68] more comments in cleaning

---
 powerplantmatching/cleaning.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/powerplantmatching/cleaning.py b/powerplantmatching/cleaning.py
index 192f884f..ac5c497a 100644
--- a/powerplantmatching/cleaning.py
+++ b/powerplantmatching/cleaning.py
@@ -118,7 +118,10 @@ def clean_name(df, config=None):
 
         pattern = np.atleast_1d(pattern)
 
-        # do not remove block numbers for fuel types with blocks
+        # do not remove block numbers for fuel types with blocks; the regular
+        # regex [^a-zA-Z] removes non-alphabetical characters; for fueltypes to
+        # keep, the regex [^a-zA-Z0-9] is used which only removes
+        # non-alphanumerical characters
         if len(keep_blocks) > 0 and key == " " and "[^a-zA-Z]" in pattern:
             base = [rf"\b{p}\b" for p in pattern if p != "[^a-zA-Z]"]
             pattern_keep = r"(?i)" + "|".join(base + [r"[^a-zA-Z0-9]"])
@@ -128,13 +131,15 @@ def clean_name(df, config=None):
                 pattern_default, key, regex=True
             )
 
-        # do not remove block letters for fuel types with blocks
+        # do not remove block letters for fuel types with blocks; the regular
+        # regex \w would remove standalone letters, this one is skipped for
+        # fueltypes in mask
         elif key == "" and "\w" in pattern:
             pattern_keep = r"(?i)" + "|".join(
                 [rf"\b{p}\b" for p in pattern if p != "\w"]
             )
             pattern_default = r"(?i)" + "|".join(
-                [rf"\b{p}\b" for p in pattern if p != "\w"]
+                [rf"\b{p}\b" for p in pattern]
             )
             name.loc[mask] = name.loc[mask].str.replace(pattern_keep, key, regex=True)
             name.loc[~mask] = name.loc[~mask].str.replace(
@@ -145,6 +150,7 @@ def clean_name(df, config=None):
             pattern = r"(?i)" + "|".join([rf"\b{p}\b" for p in pattern])
             name = name.str.replace(pattern, key, regex=True)
 
+    # remove duplicated words; second pass necessary for edge cases
     if config["clean_name"]["remove_duplicated_words"]:
         name = (
             name.str.replace(r"\b(\w+)(?:\W\1\b)+", r"\1", regex=True, case=False)

From 2fb3e7c4d4c9e1ec1b51256e2b7242bc362e90ff Mon Sep 17 00:00:00 2001
From: Fabian Neumann <fabian.neumann@outlook.de>
Date: Mon, 25 Aug 2025 18:11:59 +0200
Subject: [PATCH 63/68] more comments on matching_sources selection

---
 powerplantmatching/package_data/config.yaml | 23 +++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/powerplantmatching/package_data/config.yaml b/powerplantmatching/package_data/config.yaml
index 42c12e0f..6b4f3bf7 100644
--- a/powerplantmatching/package_data/config.yaml
+++ b/powerplantmatching/package_data/config.yaml
@@ -16,12 +16,18 @@ main_query: "Name != '' and (lat >= 30 or lat != lat)"
 matching_sources:
   # Make individual queries for the datasets as done in `fully_included_sources`
   # Queries are combined with `main_query` with an `and` operator
-  - ENTSOE: not (Country == 'Germany' and Fueltype  == 'Wind') # wind is per turbine rather than park in MASTR and unsuitable for matching
+  # capacity filters avoid matching of too small units (which is too time-consuming)
+  # wind is per turbine rather than park in MASTR and unsuitable for matching
+  - ENTSOE: not (Country == 'Germany' and Fueltype  == 'Wind')
+  # wind in germany is provided by MASTR, other filters are due to large deviations to other datasets
   - GEO: Capacity >= 1 and not (Country == 'Germany' and Fueltype  == 'Wind') and Fueltype not in ['Oil', 'Nuclear'] and not (Country in ['Bulgaria', 'Slovakia'] and Fueltype == 'Hard Coal')
+  # wind in germany is provided by MASTR, nuclear is not block-wise, other filters are due to large deviations to other datasets
   - GPD: Capacity >= 1 and not (Country == 'Germany' and Fueltype  == 'Wind') and not (Country in ['Czechia', 'Bulgaria', 'Romania'] and Fueltype == 'Hard Coal') and Fueltype != 'Nuclear'
-  - JRC: Capacity >= 1 and not (Country == 'Germany' and Fueltype  == 'Wind') # do not match small hydro
+  - JRC: Capacity >= 1
+  # wind in germany is provided by MASTR, other filters are due to large deviations to other datasets
   - OPSD: not (Country == 'Germany' and Fueltype  == 'Wind') and ((Capacity >= 1 and Fueltype != 'Solar') or Capacity >= 3) and not (Country == 'Spain' and Fueltype == 'Hard Coal') and not (Country == 'Italy' and Fueltype == 'Natural Gas')
   - BEYONDCOAL
+  # wind in germany is provided by MASTR
   - GEM: Capacity >= 3 and not (Country == 'Germany' and Fueltype  == 'Wind')
   # do not match units below 1 MW (2 MW for biogas, 3 MW for solar), exclude wind in Germany from any matching
   - MASTR: (Fueltype != 'Wind') and ((Fueltype == 'Solar' and Capacity >= 3) or (Fueltype == 'Biogas' and Capacity >= 2) or (Fueltype not in ['Solar', 'Biogas'] and Capacity >= 1))
@@ -31,16 +37,21 @@ matching_sources:
 # # fully_included_sources, these sources are included even without match to the final dataset
 fully_included_sources:
   # Make individual queries for the datasets
-  - GEM: not (Country == 'Germany' and Fueltype in ['Solar', 'Wind']) # wind and solar in Germany is covered by MASTR
-  - EESI: Fueltype != 'Hydro' and not (Country == 'Germany' and Fueltype == 'Battery') # battery in Germany is covered by MASTR
+  # wind and solar in Germany is covered by MASTR
+  - GEM: not (Country == 'Germany' and Fueltype in ['Solar', 'Wind'])
+  # battery in Germany is covered by MASTR
+  - EESI: Fueltype != 'Hydro' and not (Country == 'Germany' and Fueltype == 'Battery')
+  # exclude units smaller than 100 kW (low total capacity) and take nuclear from other datasets (good matching)
   - MASTR: Capacity >= 0.1 and Fueltype != 'Nuclear'
-  - OPSD: Country != 'Germany' and Capacity < 1 and Capacity >= 0.1 and Fueltype == 'Hydro' # take small hydro outside Germany from OPSD (highest coverage)
+  # take small hydro outside Germany from OPSD (highest coverage)
+  - OPSD: Country != 'Germany' and Capacity < 1 and Capacity >= 0.1 and Fueltype == 'Hydro'
   - BEYONDCOAL
+  # include this selection of countries as they have poorer coverage in all other datasets
   - JRC: Country in ['Italy', 'Croatia', 'Serbia', 'Slovakia']
 
 # these sources skip unit aggregation for fully_included_sources not covered in matching_sources
 aggregate_only_matching_sources:
-  - MASTR
+  - MASTR # the matching process of very small units is not efficient
 
 parallel_duke_processes: 16
 threads_extend_by_non_matched: 16

From bacece0225f08719f5f4df670145c9964052c2e0 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 25 Aug 2025 16:23:45 +0000
Subject: [PATCH 64/68] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 powerplantmatching/cleaning.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/powerplantmatching/cleaning.py b/powerplantmatching/cleaning.py
index 9f4bf998..3acf4306 100644
--- a/powerplantmatching/cleaning.py
+++ b/powerplantmatching/cleaning.py
@@ -127,9 +127,7 @@ def clean_name(df, config=None):
             pattern_keep = r"(?i)" + "|".join(
                 [rf"\b{p}\b" for p in pattern if p != "\w"]
             )
-            pattern_default = r"(?i)" + "|".join(
-                [rf"\b{p}\b" for p in pattern]
-            )
+            pattern_default = r"(?i)" + "|".join([rf"\b{p}\b" for p in pattern])
             name.loc[mask] = name.loc[mask].str.replace(pattern_keep, key, regex=True)
             name.loc[~mask] = name.loc[~mask].str.replace(
                 pattern_default, key, regex=True

From 4e95e661e5d661285ee161aa5f5c8093c45a8415 Mon Sep 17 00:00:00 2001
From: Fabian Neumann <fabian.neumann@outlook.de>
Date: Mon, 25 Aug 2025 18:28:45 +0200
Subject: [PATCH 65/68] markdown release notes

---
 docs/release-notes.md | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/docs/release-notes.md b/docs/release-notes.md
index e6a13c21..43c8e8ef 100644
--- a/docs/release-notes.md
+++ b/docs/release-notes.md
@@ -11,6 +11,20 @@ SPDX-License-Identifier: MIT
 * Update Marktstammdatenregister data for Germany from [open-MaStR (February 25, 2025)](https://zenodo.org/records/14783581).
 * Drop support for Python 3.9, add support for Python 3.13. Minimum required Python version is now 3.10.
 * Restructure documentation and move to use `mkdocs` for a nicer user experience.
+* Added [GeoNuclearData](github.com/cristianst85/GeoNuclearData) dataset as `pm.data.GND()`.
+* Added [European Energy Storage Inventory](https://ses.jrc.ec.europa.eu/storage-inventory-maps) dataset as `pm.data.EESI()`.
+* Added [GloHydroRES](https://zenodo.org/records/14526360) dataset as `pm.data.GHR()`.
+* Updated ENTSOE, BEYONDCOAL, JRC, IRENASTAT and the Global Energy Monitor datasets to the latest versions.
+* Fix in `pm.data.MASTR()` the distinction of hydro technologies and between offshore and onshore wind. Also read in storage technologies.
+* Improved recognition of CHP power plants.
+* In Global Energy Monitor datasets, also read entries below capacity threshold.
+* In `pm.data.GCPT()`, add estimate for coal plant efficiency.
+* Include mothballed gas, oil and coal power plants.
+* Initially, include unit/block name in power plant name before matching.
+* Added option to retain blocks for subsets of fuel types (e.g. `clean_name: fueltypes_with_blocks: ['Nuclear']`).
+* For fully included datasets, add option to only aggregate units included in the matching process (e.g. `aggregate_only_matching_sources: ['MASTR']`).
+* Added option for multiprocessing when aggregating units of non-matched power plants (e.g. `threads_extend_by_non_matched: 16`).
+* Updating matching logic configuration.
 
 ## [v0.7.1](https://github.com/PyPSA/powerplantmatching/releases/tag/v0.7.1) (30th January 2024)
 

From 02d05a9c9e40267a150e683950a7138d651cec57 Mon Sep 17 00:00:00 2001
From: Fabian Neumann <fabian.neumann@outlook.de>
Date: Fri, 5 Sep 2025 17:10:02 +0200
Subject: [PATCH 66/68] fix tests: reduce load

---
 test/test_cleaning.py | 2 +-
 test/test_data.py     | 5 ++++-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/test/test_cleaning.py b/test/test_cleaning.py
index dab6c5c0..b4411075 100644
--- a/test/test_cleaning.py
+++ b/test/test_cleaning.py
@@ -83,7 +83,7 @@ def test_gather_specifications(data):
 def test_clean_name(data):
     res = clean_name(data)
     assert res.Name[0] == "Powerplant"
-    assert res.Name[1] == "An Hydro Powerplant"
+    assert res.Name[1] == "An Powerplant"
     assert res.Name[2] == "Another Powerplant With Whitespaces"
     assert res.Name[3] == "Coalition"
     assert res.Name[4] == "Besonders Chp"
diff --git a/test/test_data.py b/test/test_data.py
index 07d32843..933d0710 100755
--- a/test/test_data.py
+++ b/test/test_data.py
@@ -54,4 +54,7 @@ def test_url_retrieval():
 
 
 def test_reduced_retrieval():
-    pm.powerplants(reduced=False)
+    config = pm.get_config()
+    config["matching_sources"] = ["GEO", "GPD"]
+    config["fully_included_sources"] = []
+    pm.powerplants(reduced=False, config=config)

From 73f7996386f4accfc7df4fb9eb33a6df9d0df8c9 Mon Sep 17 00:00:00 2001
From: jensch-dlr <95235501+jensch-dlr@users.noreply.github.com>
Date: Wed, 5 Nov 2025 15:28:34 +0100
Subject: [PATCH 67/68] Complement data update 2025 (#267)

* updates path to `powerplants.png`

* sets "parallel_duke_processes" to false, because otherwise pm does not execute on Windows machines out-of-the-box otherwise

* corrects GPD file name

* updates and complements GBPT converter to work with "Global-Bioenergy-Power-Tracker-GBPT-V3.xlsx" from September 2025

* fixes GEM_FUNCTIONS typo

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* fixes typo in docstring of `gather_fueltype_info()` and `MASTR()` and complements release notes

* fixes paths to release-notes.md and contributors.md in PR template

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 .github/pull_request_template.md            |  4 +--
 README.md                                   |  2 +-
 docs/release-notes.md                       |  5 ++++
 powerplantmatching/cleaning.py              |  2 +-
 powerplantmatching/data.py                  | 30 ++++++++++++++++-----
 powerplantmatching/package_data/config.yaml |  4 +--
 6 files changed, 35 insertions(+), 12 deletions(-)

diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
index 641a2829..15e7f292 100644
--- a/.github/pull_request_template.md
+++ b/.github/pull_request_template.md
@@ -7,5 +7,5 @@ Closes # (if applicable).
 
 - [ ] Code changes are sufficiently documented; i.e. new functions contain docstrings and further explanations may be given in `docs`.
 - [ ] Unit tests for new features were added (if applicable).
-- [ ] A note for the release notes `doc/release_notes.md` of the upcoming release is included.
-- [ ] I consent to the release of this PR's code under the MIT license and have added my name to the `doc/contributors.md`.
+- [ ] A note for the release notes `docs/release_notes.md` of the upcoming release is included.
+- [ ] I consent to the release of this PR's code under the MIT license and have added my name to the `docs/contributors.md`.
diff --git a/README.md b/README.md
index 19ad53a7..843efbe5 100644
--- a/README.md
+++ b/README.md
@@ -48,7 +48,7 @@ out simulations.
 
 ## Map
 
-![powerplants.png](doc/powerplants.png)
+![powerplants.png](docs/assets/images/powerplants.png)
 
 ## Installation
 
diff --git a/docs/release-notes.md b/docs/release-notes.md
index 43c8e8ef..ae1d2e06 100644
--- a/docs/release-notes.md
+++ b/docs/release-notes.md
@@ -25,6 +25,11 @@ SPDX-License-Identifier: MIT
 * For fully included datasets, add option to only aggregate units included in the matching process (e.g. `aggregate_only_matching_sources: ['MASTR']`).
 * Added option for multiprocessing when aggregating units of non-matched power plants (e.g. `threads_extend_by_non_matched: 16`).
 * Updating matching logic configuration.
+* Update GBPT importer to support newer version of the database (from V3 on without sheet "Below Threshold").
+* Corrects GPD file name in `config.yaml`.
+* Sets `parallel_duke_processes` to false (instead of 16) to make powerplantmatching executable out-of-the-box also for Windows systems.
+* Updates path to `powerplants.png` in README.
+* Fixes typo in docstring of `gather_fueltype_info()` (`cleaning.py`) and `MASTR()` (`data.py`).
 
 ## [v0.7.1](https://github.com/PyPSA/powerplantmatching/releases/tag/v0.7.1) (30th January 2024)
 
diff --git a/powerplantmatching/cleaning.py b/powerplantmatching/cleaning.py
index 3acf4306..432866e1 100644
--- a/powerplantmatching/cleaning.py
+++ b/powerplantmatching/cleaning.py
@@ -262,7 +262,7 @@ def gather_fueltype_info(
     Parses in a set of columns for distinct fueltype specifications.
 
     This function uses the mappings (key -> regex pattern) given
-    by the `config` under the section `target_technologies`.
+    by the `config` under the section `target_fueltypes`.
     The representative keys are set if any of the columns
     in `search_col` matches the regex pattern.
 
diff --git a/powerplantmatching/data.py b/powerplantmatching/data.py
index c7b0d5c2..a7717560 100644
--- a/powerplantmatching/data.py
+++ b/powerplantmatching/data.py
@@ -1642,9 +1642,18 @@ def GBPT(raw=False, update=False, config=None):
     """
     config = get_config() if config is None else config
     fn = get_raw_file("GBPT", update=update, config=config)
-    large = pd.read_excel(fn, sheet_name="Data")
-    small = pd.read_excel(fn, sheet_name="Below Threshold")
-    df = pd.concat([large, small], ignore_index=True)
+    try:
+        large = pd.read_excel(fn, sheet_name="Data")
+        small = pd.read_excel(fn, sheet_name="Below Threshold")
+        df = pd.concat([large, small], ignore_index=True)
+    except Exception as e:
+        if e.args[0] == ("Worksheet named 'Below Threshold' not found"):
+            logger.info(
+                'In newer versions of the dataset, the sheet "Below Threshold" does not exist anymore.'
+            )
+            df = pd.read_excel(fn, sheet_name="Data")
+        else:
+            logger.error(e)
 
     if raw:
         return df
@@ -1664,12 +1673,20 @@ def GBPT(raw=False, update=False, config=None):
     fueltype_dict = {
         # solid biomass
         "bioenergy: agricultural waste (solids)": "Solid Biomass",
+        "bioenergy: agricultural waste (solids) [90%]": "Solid Biomass",
         "bioenergy: agricultural waste (unknown)": "Solid Biomass",
         "bioenergy: paper mill wastes": "Solid Biomass",
         "bioenergy: unknown": "Solid Biomass",
         "bioenergy: wood & other biomass (biocoal)": "Solid Biomass",
         "bioenergy: wood & other biomass (solids)": "Solid Biomass",
         "bioenergy: agricultural waste (syngas)": "Solid Biomass",
+        "bioenergy: wood & other biomass (solids) [95%]": "Solid Biomass",
+        "bioenergy: wood & other biomass (solids) [92%]": "Solid Biomass",
+        "bioenergy: wood & other biomass (solids) [80%]": "Solid Biomass",
+        "bioenergy: wood & other biomass (solids) [75%]": "Solid Biomass",
+        "bioenergy: wood & other biomass (solids) [60%]": "Solid Biomass",
+        "bioenergy: wood & other biomass (solids) [51%]": "Solid Biomass",
+        "bioenergy: wood & other biomass (solids) [50%]": "Solid Biomass",
         # biogas
         "bioenergy: agricultural waste (biogas)": "Biogas",
         "bioenergy: refuse (landfill gas)": "Biogas",
@@ -1677,6 +1694,7 @@ def GBPT(raw=False, update=False, config=None):
         # oil
         "bioenergy: ethanol": "Oil",
         "bioenergy: biodiesel": "Oil",
+        "bioenergy: bio-heavy oil": "Oil",
         # waste
         "bioenergy: refuse (municipal and industrial wastes)": "Waste",
         "bioenergy: refuse (syngas)": "Solid Biomass",
@@ -2238,8 +2256,8 @@ def GEM(raw=False, update=False, config=None):
         Custom configuration, by default None
 
     """
-    GEMS_FUNTIONS = [GBPT, GGPT, GCPT, GGTPT, GNPT, GSPT, GWPT, GHPT]
-    data = [f(raw=raw, update=update, config=config) for f in GEMS_FUNTIONS]
+    GEM_FUNCTIONS = [GBPT, GGPT, GCPT, GGTPT, GNPT, GSPT, GWPT, GHPT]
+    data = [f(raw=raw, update=update, config=config) for f in GEM_FUNCTIONS]
     return pd.concat(data, ignore_index=True)
 
 
@@ -2251,7 +2269,7 @@ def MASTR(
     """
     Get the Marktstammdatenregister (MaStR) dataset.
 
-    Provided by the German Federal Network Agency (Bundesnetzagentur / BNetza) and
+    Provided by the German Federal Network Agency (Bundesnetzagentur / BNetzA) and
     contains data on Germany, Austria and Switzerland.
 
     Parameters
diff --git a/powerplantmatching/package_data/config.yaml b/powerplantmatching/package_data/config.yaml
index 6b4f3bf7..c7c8327b 100644
--- a/powerplantmatching/package_data/config.yaml
+++ b/powerplantmatching/package_data/config.yaml
@@ -53,7 +53,7 @@ fully_included_sources:
 aggregate_only_matching_sources:
   - MASTR # the matching process of very small units is not efficient
 
-parallel_duke_processes: 16
+parallel_duke_processes: false
 threads_extend_by_non_matched: 16
 matched_data_url: https://raw.githubusercontent.com/PyPSA/powerplantmatching/{tag}/powerplants.csv
 
@@ -108,7 +108,7 @@ GEO_units:
   fn: global_energy_observatory_ppl_units.csv
 GPD:
   reliability_score: 3
-  fn: globalpowerplantdatabasev120.zip
+  fn: globalpowerplantdatabase_v_1_3_0.zip
   #if outdated, look at http://datasets.wri.org/dataset/globalpowerplantdatabase
   url: https://wri-dataportal-prod.s3.amazonaws.com/manual/global_power_plant_database_v_1_3.zip
 WIKIPEDIA:

From 51d16b0a33665d297e7eadb13be24438dc892d5e Mon Sep 17 00:00:00 2001
From: Fabian Neumann <fabian.neumann@outlook.de>
Date: Wed, 5 Nov 2025 15:53:48 +0100
Subject: [PATCH 68/68] update TUBcloud fileshare links

---
 powerplantmatching/data.py                  |  4 ++--
 powerplantmatching/package_data/config.yaml | 22 ++++++++++-----------
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/powerplantmatching/data.py b/powerplantmatching/data.py
index a7717560..eb1f8130 100644
--- a/powerplantmatching/data.py
+++ b/powerplantmatching/data.py
@@ -1647,9 +1647,9 @@ def GBPT(raw=False, update=False, config=None):
         small = pd.read_excel(fn, sheet_name="Below Threshold")
         df = pd.concat([large, small], ignore_index=True)
     except Exception as e:
-        if e.args[0] == ("Worksheet named 'Below Threshold' not found"):
+        if "Below Threshold" in e.args[0]:
             logger.info(
-                'In newer versions of the dataset, the sheet "Below Threshold" does not exist anymore.'
+                "In newer versions of the dataset, the sheet 'Below Threshold' does not exist anymore."
             )
             df = pd.read_excel(fn, sheet_name="Data")
         else:
diff --git a/powerplantmatching/package_data/config.yaml b/powerplantmatching/package_data/config.yaml
index c7c8327b..77256e09 100644
--- a/powerplantmatching/package_data/config.yaml
+++ b/powerplantmatching/package_data/config.yaml
@@ -79,7 +79,7 @@ IRENA:
   aggregated_units: true
   fn: IRENASTAT_capacities_2000-2024.csv
   # compiled from https://pxweb.irena.org/pxweb/en/IRENASTAT/IRENASTAT__Power%20Capacity%20and%20Generation/Country_ELECSTAT_2025_H2_PX.px/
-  url: https://tubcloud.tu-berlin.de/s/p2D5E9MLWE8HPHE/download/IRENASTAT_capacities_2000-2024.csv
+  url: https://tubcloud.tu-berlin.de/s/dDS9erreKPNH4Ey/download/IRENASTAT_capacities_2000-2024.csv
 CARMA:
   net_capacity: false
   reliability_score: 1
@@ -87,7 +87,7 @@ CARMA:
   fn: Full_CARMA_2009_Dataset_1.csv
 ENTSOE:
   reliability_score: 5
-  url: https://tubcloud.tu-berlin.de/s/QaHLH38J4A7ZF5m/download/entsoe_transparency_platform_20250820.csv
+  url: https://tubcloud.tu-berlin.de/s/N7qo3AGyRYZyisS/download/entsoe_transparency_platform_20250820.csv
   fn: entsoe_transparency_platform_20250820.csv
 ENTSOE-EIC:
   url: https://eepublicdownloads.blob.core.windows.net/cio-lio/csv/W_eicCodes.csv
@@ -173,7 +173,7 @@ GGPT:
   reliability_score: 6
   status: ["operating", "retired", "construction"]
   fn: Global-Oil-and-Gas-Plant-Tracker-GOGPT-August-2025.xlsx
-  url: https://tubcloud.tu-berlin.de/s/aKrt7dyNgazmgAm/download/Global-Oil-and-Gas-Plant-Tracker-GOGPT-August-2025.xlsx
+  url: https://tubcloud.tu-berlin.de/s/WrmNX5awNJFcXrQ/download/Global-Oil-and-Gas-Plant-Tracker-GOGPT-August-2025.xlsx
 GEM:
   # combined data set of all GEM trackers
   net_capacity: true
@@ -183,32 +183,32 @@ GCPT:
   reliability_score: 6
   status: ["operating", "retired", "construction", "mothballed"]
   fn: Global-Coal-Plant-Tracker-July-2025.xlsx
-  url: https://tubcloud.tu-berlin.de/s/etMB7qawKNwfgnk/download/Global-Coal-Plant-Tracker-July-2025.xlsx
+  url: https://tubcloud.tu-berlin.de/s/ijzbscopNTgNB2r/download/Global-Coal-Plant-Tracker-July-2025.xlsx
 GGTPT:
   net_capacity: false
   reliability_score: 6
   aggregated_units: false
   status: ["operating", "retired", "construction", "mothballed"]
   fn: Geothermal-Power-Tracker-March-2025-Final.xlsx
-  url: https://tubcloud.tu-berlin.de/s/dNoEsLeGtCWDkoc/download/Geothermal-Power-Tracker-March-2025-Final.xlsx
+  url: https://tubcloud.tu-berlin.de/s/ypr3eL2K5kckAK4/download/Geothermal-Power-Tracker-March-2025-Final.xlsx
 GWPT:
   net_capacity: false
   reliability_score: 6
   status: ["operating", "retired", "construction"]
   fn:  Global-Wind-Power-Tracker-February-2025.xlsx
-  url: https://tubcloud.tu-berlin.de/s/8NSXSjPmJPXpg4W/download/Global-Wind-Power-Tracker-February-2025.xlsx
+  url: https://tubcloud.tu-berlin.de/s/L4AssxsisA6ENRb/download/Global-Wind-Power-Tracker-February-2025.xlsx
 GSPT:
   net_capacity: false
   reliability_score: 6
   status: ["operating", "retired", "construction"]
   fn: Global-Solar-Power-Tracker-February-2025.xlsx
-  url: https://tubcloud.tu-berlin.de/s/7eo4dZXMp6eB3mz/download/Global-Solar-Power-Tracker-February-2025.xlsx
+  url: https://tubcloud.tu-berlin.de/s/iWZ7j3zsCGfyJ5f/download/Global-Solar-Power-Tracker-February-2025.xlsx
 GBPT:
   net_capacity: false
   reliability_score: 6
   status: ["operating", "retired", "construction"]
-  fn: Global-Bioenergy-Power-Tracker-GBPT-September-2024.xlsx
-  url: https://tubcloud.tu-berlin.de/s/CzMBKe2rAcsoq7c/download/Global-Bioenergy-Power-Tracker-GBPT-September-2024.xlsx
+  fn:  Global-Bioenergy-Power-Tracker-GBPT-V3.xlsx
+  url: https://tubcloud.tu-berlin.de/s/ZkaQonLYdakrN75/download/Global-Bioenergy-Power-Tracker-GBPT-V3.xlsx
 GNPT:
   net_capacity: false
   reliability_score: 6
@@ -220,7 +220,7 @@ GHPT:
   reliability_score: 6
   status: ["operating", "retired", "construction"]
   fn: Global-Hydropower-Tracker-April-2025.xlsx
-  url: https://tubcloud.tu-berlin.de/s/2xqxRmfP4FKTrLf/download/Global-Hydropower-Tracker-April-2025.xlsx
+  url: https://tubcloud.tu-berlin.de/s/aDyd3MJWZNgeEH4/download/Global-Hydropower-Tracker-April-2025.xlsx
 MASTR:
   net_capacity: true
   reliability_score: 7
@@ -232,7 +232,7 @@ EESI:
   reliability_score: 5
   status: ["Operational"] # since no start years given
   fn: european-energy-storage-inventory-20250817-2245.json
-  url: https://tubcloud.tu-berlin.de/s/RXWgYbYJpePsWAZ/download/european-energy-storage-inventory-20250817-2245.json
+  url: https://tubcloud.tu-berlin.de/s/5KqMDMZfb2pN3Aw/download/european-energy-storage-inventory-20250817-2245.json
 GND:
   net_capacity: true
   reliability_score: 5