diff --git a/dfm_tools/observations.py b/dfm_tools/observations.py index 4ba65126..54dec447 100644 --- a/dfm_tools/observations.py +++ b/dfm_tools/observations.py @@ -502,8 +502,8 @@ def gesla3_ssh_read_catalog(only_coastal=True): def rwsddl_ssh_meta_dict(): - # combination for measured waterlevels - meta_dict = {'Grootheid.Code':'WATHTE', 'Groepering.Code':'NVT'} + # combination for measured waterlevels (no astro, no extremes) + meta_dict = {'ProcesType':'meting', 'Grootheid.Code':'WATHTE', 'Groepering.Code':''} return meta_dict @@ -546,9 +546,13 @@ def rwsddl_ssh_read_catalog(meta_dict=None): # add "Code" index as column and reset the index selected = selected.reset_index() - xcoords = selected["X"] - ycoords = selected["Y"] + xcoords = selected["Lon"] + ycoords = selected["Lat"] epsg_all = selected["Coordinatenstelsel"] + # TODO: manually replacing crs name with epsg, the old waterwebservices had epsg in + # this column, would be great if new wws also has this. + # https://github.com/Rijkswaterstaat/WaterWebservices/issues/20 + epsg_all = epsg_all.replace("ETRS89", "4258") epsg_uniq = epsg_all.unique() if len(epsg_uniq)>1: raise ValueError(f"multiple EPSG codes in one LocatieLijst not supported: {epsg_uniq.tolist()}") @@ -939,7 +943,7 @@ def rwsddl_ssh_retrieve_data(row, time_min, time_max): return # minimize disk usage of StatuswaardeLijst by converting to U1 - varn_status = "WaarnemingMetadata.StatuswaardeLijst" + varn_status = "WaarnemingMetadata.Statuswaarde" status_dict = {"O":"Ongecontroleerd", "G":"Gecontroleerd", "D":"Definitief"} @@ -947,26 +951,18 @@ def rwsddl_ssh_retrieve_data(row, time_min, time_max): measurements[varn_status] = measurements[varn_status].str.replace(v, k) # convert to xarray (dropping some constant columns) - drop_if_constant = ["WaarnemingMetadata.OpdrachtgevendeInstantieLijst", - "WaarnemingMetadata.BemonsteringshoogteLijst", - "WaarnemingMetadata.ReferentievlakLijst", - "AquoMetadata_MessageID", - "BioTaxonType", - "BemonsteringsSoort.Code", - "Compartiment.Code", - "Eenheid.Code", - "Grootheid.Code", - "Hoedanigheid.Code", - "WaardeBepalingsmethode.Code", - "MeetApparaat.Code", - ] - ds = ddlpy.dataframe_to_xarray(measurements, drop_if_constant) + always_preserve = [ + "Meetwaarde.Waarde_Numeriek", + "WaarnemingMetadata.Kwaliteitswaardecode", + "WaarnemingMetadata.Statuswaarde", + ] + ds = ddlpy.dataframe_to_xarray(df=measurements, always_preserve=always_preserve) ds[varn_status] = ds[varn_status].assign_attrs(status_dict) rename_dict = {'Meetwaarde.Waarde_Numeriek':'waterlevel', - 'WaarnemingMetadata.KwaliteitswaardecodeLijst':'qc', - 'WaarnemingMetadata.StatuswaardeLijst':'status'} + 'WaarnemingMetadata.Kwaliteitswaardecode':'qc', + 'WaarnemingMetadata.Statuswaarde':'status'} ds = ds.rename_vars(rename_dict) # convert meters to cm diff --git a/docs/notebooks/subset_retrieve_sealevel_observations.ipynb b/docs/notebooks/subset_retrieve_sealevel_observations.ipynb index 911200eb..e492b708 100644 --- a/docs/notebooks/subset_retrieve_sealevel_observations.ipynb +++ b/docs/notebooks/subset_retrieve_sealevel_observations.ipynb @@ -44,7 +44,7 @@ "cmems_catalog_gpd = dfmt.ssh_catalog_subset(source='cmems')\n", "uhslc_catalog_gpd = dfmt.ssh_catalog_subset(source='uhslc')\n", "psmsl_gnssir_catalog_gpd = dfmt.ssh_catalog_subset(source='psmsl-gnssir')\n", - "rwsddl_catalog_gpd = dfmt.ssh_catalog_subset(source='rwsddl')\n", + "rwsddl_catalog_gpd = dfmt.ssh_catalog_subset(source='rwsddl') # TODO: this currently takes approximately 30 seconds\n", "\n", "# subsetting gesla\n", "bool_ndays = gesla_catalog_gpd[\"time_ndays\"] > 365\n", @@ -399,7 +399,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.8" + "version": "3.12.11" } }, "nbformat": 4, diff --git a/docs/whats-new.md b/docs/whats-new.md index c6ee1cbb..4b38472c 100644 --- a/docs/whats-new.md +++ b/docs/whats-new.md @@ -2,6 +2,9 @@ ## UNRELEASED +# Feat +- update to new RWS Waterwebservices (and ddlpy) in [#1301](https://github.com/Deltares/dfm_tools/pull/1301) + ## 0.43.0 (2025-11-27) diff --git a/pyproject.toml b/pyproject.toml index 0530e8da..31c3daa1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -47,8 +47,8 @@ dependencies = [ "erddapy>=2.0.2", #copernicusmarine>=2.0.0 has breaking changes compared to v1 "copernicusmarine>=2.0.0", - #rws-ddlpy>=0.6.0 `ddlpy.measurements_amount()` returns all amounts - "rws-ddlpy>=0.6.0", + #rws-ddlpy>=0.8.0 connects to new RWS waterwebservices/ddapi20 + "rws-ddlpy>=0.8.0", #pooch>=1.1.0 has attribute retrieve "pooch>=1.1.0", #hydrolib-core>=0.9.9 removed obsolete keywords to avoid model crash with 2026.01 diff --git a/tests/test_observations.py b/tests/test_observations.py index 2c0982f2..3003e2d9 100644 --- a/tests/test_observations.py +++ b/tests/test_observations.py @@ -89,7 +89,7 @@ def test_ssh_retrieve_data(source, tmp_path): if source=="rwsddl": # order of rows in rwsddl locations dataframe is python-version-dependent # make sure we always test on the same hist station (no realtime data) - bool_hoekvhld = ssc_catalog_gpd["Code"].isin(["HOEKVHLD"]) + bool_hoekvhld = ssc_catalog_gpd["Code"].isin(["hoekvanholland"]) ssc_catalog_gpd = ssc_catalog_gpd.loc[bool_hoekvhld] if source == "ioc": @@ -115,8 +115,9 @@ def test_ssh_netcdf_overview(tmp_path): # order of rows in rwsddl locations dataframe is python-version-dependent # make sure we always test on the same hist station (no realtime data) - bool_hoekvhld = ssc_catalog_gpd["Code"].isin(["HOEKVHLD"]) + bool_hoekvhld = ssc_catalog_gpd["Code"].isin(["hoekvanholland"]) ssc_catalog_gpd_sel = ssc_catalog_gpd.loc[bool_hoekvhld] + assert len(ssc_catalog_gpd_sel) == 1 dfmt.ssh_retrieve_data(ssc_catalog_gpd_sel, dir_output=tmp_path, time_min=time_min, time_max=time_max) @@ -131,11 +132,14 @@ def test_ssh_netcdf_overview(tmp_path): def test_rwsddl_ssh_get_time_max(): locations = ddlpy.locations() bool_hoedanigheid = locations['Hoedanigheid.Code'].isin(['NAP']) - bool_stations = locations.index.isin(['HOEKVHLD', 'IJMDBTHVN','SCHEVNGN']) + bool_stations = locations.index.isin(['hoekvanholland', 'ijmuiden.buitenhaven','scheveningen']) + bool_procestype = locations['ProcesType'].isin(['meting']) bool_grootheid = locations['Grootheid.Code'].isin(['WATHTE']) - bool_groepering = locations['Groepering.Code'].isin(['NVT']) - selected = locations.loc[bool_grootheid & bool_hoedanigheid & bool_groepering & bool_stations] + bool_groepering = locations['Groepering.Code'].isin(['']) + selected = locations.loc[bool_procestype & bool_grootheid & bool_hoedanigheid & bool_groepering & bool_stations] selected_withtimemax = dfmt.observations.rwsddl_ssh_get_time_max(selected) + assert len(selected) == 3 + assert len(selected_withtimemax) == 3 assert "time_max" not in selected.columns assert "time_max" in selected_withtimemax.columns