diff --git a/dags/wmes/CWMS_USGS_ratings.py b/dags/wmes/CWMS_USGS_ratings.py new file mode 100644 index 0000000..810cb69 --- /dev/null +++ b/dags/wmes/CWMS_USGS_ratings.py @@ -0,0 +1,378 @@ +import logging +import pandas as pd +import numpy as np +from datetime import datetime, timedelta, timezone +import cwms +from json import loads +from dataretrieval import nwis +import requests +from airflow import DAG +from airflow.decorators import dag, task +from airflow.exceptions import AirflowSkipException +from airflow.utils.task_group import TaskGroup +from airflow.operators.python import get_current_context + + +APIROOT = "https://cwms-data-test.cwbi.us/cwms-data/" +APIKEY = "foo" +DAYSBACK = 3 + + +default_args = { + "owner": "airflow", + "depends_on_past": False, + "start_date": (datetime.now(timezone.utc) - timedelta(hours=4)).replace( + minute=0, second=0 + ), + "catchup_by_default": False, + "email_on_failure": False, + "email_on_retry": False, + "retries": 1, + "retry_delay": timedelta(minutes=5), + "execution_timeout": timedelta(hours=1), +} + + +def getusgs_rating_cda(api_root, office_id, days_back, api_key): + api_key = "apikey " + api_key + api = cwms.api.init_session(api_root=api_root, api_key=api_key) + logging.info(f"CDA connection: {api_root}") + logging.info( + f"Updated Ratings will be check from the USGS for the past {days_back} days" + ) + execution_date = datetime.now() + logging.info(f"Execution date {execution_date}") + + logging.info(f"Get Rating Spec information from CWMS Database") + rating_specs = get_rating_ids_from_specs(office_id) + USGS_ratings = get_location_aliases( + rating_specs, "USGS Station Number", "Agency Aliases", "CWMS", None, None + ) + + # grab ratings that don't have an existing rating curve. ie new specs. + USGS_ratings_empty = USGS_ratings[USGS_ratings["effective-dates"].isna()] + USGS_ratings = USGS_ratings[USGS_ratings["effective-dates"].notna()] + + logging.info(f"Get list of ratings updated by USGS in past {days_back} days") + df = get_usgs_updated_ratings(days_back * 24) + + updated_ratings = pd.merge( + USGS_ratings, + df, + how="inner", + left_on=["USGS_St_Num", "rating-type"], + right_on=["USGS_St_Num", "rating-type"], + ) + + updated_ratings.loc[:, "effective-dates"] = updated_ratings[ + "effective-dates" + ].apply(lambda x: [pd.to_datetime(d) for d in x]) + updated_ratings.loc[:, "cwms_max_effective_date"] = updated_ratings[ + "effective-dates" + ].apply(max) + + # merge the new specs without an existing curve back into the update ratings df + if not USGS_ratings_empty.empty: + updated_ratings = pd.concat( + [updated_ratings, USGS_ratings_empty], ignore_index=True + ) + + cwms_write_ratings(updated_ratings) + + +def get_rating_ids_from_specs(office_id): + rating_types = ["EXSA", "CORR", "BASE"] + rating_specs = cwms.get_rating_specs(office_id=office_id).df + rating_specs = rating_specs.dropna(subset=["description"]) + for rating_type in rating_types: + rating_specs.loc[ + rating_specs["description"].str.contains(f"USGS-{rating_type}"), + "rating-type", + ] = rating_type + rating_specs = rating_specs[ + (rating_specs["rating-type"].isin(rating_types)) + & (rating_specs["active"] == True) + & (rating_specs["auto-update"] == True) + ] + return rating_specs + + +def get_location_aliases( + df, loc_group_id, category_id, office_id, category_office_id, group_office_id +): + # CDA get location group endpoint has an error with category and group office ids. need to update when error is fixed. + Locdf = cwms.get_location_group( + loc_group_id=loc_group_id, + category_id=category_id, + office_id=office_id, + category_office_id=category_office_id, + group_office_id=group_office_id, + ).df + USGS_alias = Locdf[Locdf["alias-id"].notnull()] + USGS_alias = USGS_alias.rename( + columns={"alias-id": "USGS_St_Num", "attribute": "Loc_attribute"} + ) + USGS_alias.USGS_St_Num = USGS_alias.USGS_St_Num.str.rjust(8, "0") + USGS_ratings = pd.merge( + df, USGS_alias, how="inner", on=["location-id", "office-id"] + ) + return USGS_ratings + + +def get_usgs_updated_ratings(period): + """ + Function to grab data from the USGS based off of dataretieve-python + """ + # Get USGS data + base_url = "https://nwis.waterdata.usgs.gov/nwisweb/get_ratings" + + query_dict = {"period": period, "format": "rdb"} + + r = requests.get(base_url, params=query_dict) + temp = pd.DataFrame(r.text.split("\n")) + temp = temp[temp[0].str.startswith("USGS")] + updated_ratings = temp[0].str.split("\t", expand=True) + updated_ratings.columns = [ + "org", + "USGS_St_Num", + "rating-type", + "date_updated", + "url", + ] + updated_ratings["rating-type"] = updated_ratings["rating-type"].str.upper() + return updated_ratings + + +def convert_tz(tz: str): + if tz in ("AST", "ADT"): + tzid = "America/Halifax" + elif tz in ("EST", "EDT"): + tzid = "US/Eastern" + elif tz in ("CST", "CDT"): + tzid = "US/Central" + elif tz in ("MST", "MDT"): + tzid = "US/Mountain" + elif tz in ("PST", "PDT"): + tzid = "US/Pacific" + elif tz in ("AKST", "AKDT"): + tzid = "America/Anchorage" + elif tz in ("UTC", "GMT"): + tzid = "UTC" + else: + tzid = tz + return tzid + + +def get_usgs_tz(data): + line = data[data[0].str.startswith("# //STATION AGENCY=")].iloc[0, 0] + timezone = line.split("TIME_ZONE=")[1].split()[0].replace('"', "") + timezone = convert_tz(timezone) + return timezone + + +def get_begin_with_date(data, str_starts): + date_string = None + lines = data[data[0].str.startswith(str_starts)] + for _, line in lines.iterrows(): + timestr = line[0].split("BEGIN=")[1].split()[0].strip().replace('"', "") + if timestr.isdigit(): + date_string = timestr + return date_string + + +def get_usgs_effective_date(data, rating_type): + + date_string = None + if rating_type == "EXSA": + line = data[data[0].str.startswith("# //RATING SHIFTED=")].iloc[0, 0] + rating_shifted_date = line.split("=")[1].replace('"', "") + date_string = rating_shifted_date.split()[0] + + elif rating_type == "BASE": + date_string = get_begin_with_date(data, ("# //RATING_DATETIME BEGIN=")) + + elif rating_type == "CORR": + date_string = get_begin_with_date( + data, + ("# //CORR1_PREV BEGIN=", "# //CORR2_PREV BEGIN=", "# //CORR3_PREV BEGIN="), + ) + + if date_string is None: + line = data[data[0].str.startswith("# //RETRIEVED:")].iloc[0, 0] + date_string = line.split("RETRIEVED: ")[1] + + timezone = get_usgs_tz(data) + dt = pd.to_datetime(date_string).tz_localize(timezone).floor("Min") + return dt + + +def convert_usgs_rating_df(df, rating_type): + if rating_type == "CORR": + df = df.groupby("CORR") + df = pd.concat([df.first(), df.last()], ignore_index=True, join="inner") + df = df.sort_values(by=["INDEP"], ignore_index=True) + df = df.rename(columns={"INDEP": "ind", "CORRINDEP": "dep", "DEP": "dep"}) + df_out = df[["ind", "dep"]].copy() + return df_out + + +def cwms_write_ratings(updated_ratings): + storErr = [] + usgsapiErr = [] + usgsemptyErr = [] + usgseffectiveErr = [] + total_recs = len(updated_ratings.index) + saved = 0 + same_effective = 0 + + rating_units = {"EXSA": "ft;cfs", "BASE": "ft;cfs", "CORR": "ft;ft"} + for _, row in updated_ratings.iterrows(): + logging.info(f'Getting data for rating ID = {row["rating-id"]}') + logging.info( + f'Getting data from USGS for USGS ID = {row["USGS_St_Num"]}, Rating Type = {row["rating-type"]}' + ) + try: + usgs_rating, meta = nwis.get_ratings( + site=row["USGS_St_Num"], file_type=str(row["rating-type"]).lower() + ) + url = meta.url + except Exception as error: + usgsapiErr.append( + [row["rating-id"], row["USGS_St_Num"], row["rating-type"], error] + ) + logging.error( + f'FAIL Error collecting rating data from USGS for --> {row["rating-id"]},{row["USGS_St_Num"]}, {row["rating-type"]} USGS error = {error}' + ) + continue + if usgs_rating.empty: + logging.warning( + f'Empty rating obtained from USGS for USGS ID = {row["USGS_St_Num"]}, Rating Type = {row["rating-type"]}, url' + ) + usgsemptyErr.append( + [row["rating-id"], row["USGS_St_Num"], row["rating-type"]] + ) + else: + try: + response = requests.get(url) + temp = pd.DataFrame(response.text.split("\n")) + usgs_effective_date = get_usgs_effective_date(temp, row["rating-type"]) + except Exception as error: + usgseffectiveErr.append( + [row["rating-id"], row["USGS_St_Num"], row["rating-type"], error] + ) + logging.error( + f'FAIL Error collecting effective date from USGS rating --> {row["rating-id"]},{row["USGS_St_Num"]}, {row["rating-type"]} CDA error = {error}' + ) + continue + cwms_effective_date = row["cwms_max_effective_date"] + logging.info( + f"Effective dates: cwms = {cwms_effective_date}, usgs = {usgs_effective_date}" + ) + if (cwms_effective_date == usgs_effective_date) or ( + cwms_effective_date == (usgs_effective_date + timedelta(hours=1)) + ): + logging.info( + "Effective dates are the same rating curve will not be saved" + ) + same_effective = same_effective + 1 + else: + try: + usgs_store_rating = convert_usgs_rating_df( + usgs_rating, row["rating-type"] + ) + + if row["effective-dates"] and row["auto-migrate-extension"]: + current_rating = cwms.get_ratings( + rating_id=row["rating-id"], + office_id=row["office-id"], + begin=cwms_effective_date, + end=cwms_effective_date, + method="EAGER", + single_rating_df=True, + ) + rating_json = current_rating.json + points_json = loads(usgs_store_rating.to_json(orient="records")) + rating_json["simple-rating"]["rating-points"] = { + "point": points_json + } + rating_json["simple-rating"][ + "effective-date" + ] = usgs_effective_date.isoformat() + del rating_json["simple-rating"]["create-date"] + rating_json["simple-rating"]["active"] = row["auto-activate"] + else: + rating_json = cwms.rating_simple_df_to_json( + data=usgs_store_rating, + rating_id=row["rating-id"], + office_id=row["office-id"], + units=rating_units[row["rating-type"]], + effective_date=usgs_effective_date, + active=row["auto-activate"], + ) + response = cwms.update_ratings( + data=rating_json, rating_id=row["rating-id"] + ) + logging.info( + f'SUCCESS Stored rating for rating id = {row["rating-id"]}, effective date = {usgs_effective_date}' + ) + saved = saved + 1 + except Exception as error: + storErr.append( + [ + row["rating-id"], + row["USGS_St_Num"], + row["rating-type"], + error, + ] + ) + logging.error( + f'FAIL Data could not be stored to CWMS database for --> {row["rating-id"]},{row["USGS_St_Num"]}, {row["rating-type"]} CDA error = {error}' + ) + logging.info( + f"A total of {same_effective + saved} out of {total_recs} records were successfully saved or had same effective date in cwms" + ) + if len(usgsapiErr) > 0: + logging.info( + f"The following ratings errored out when accessing the USGS API: {usgsapiErr}" + ) + if len(usgsemptyErr) > 0: + logging.info( + f"The following ratings had an empty rating curve returned from the usgs: {usgsemptyErr}" + ) + if len(usgseffectiveErr) > 0: + logging.info( + f"The following ratings errored when trying to determine the effective date from the USGS: {usgseffectiveErr}" + ) + if len(storErr) > 0: + logging.info( + f"The following ratings errored when trying to store to CDA: {storErr}" + ) + + +@dag( + default_args=default_args, + tags=["CWMS", "USGS"], + schedule="@hourly", + max_active_runs=1, + max_active_tasks=4, + catchup=False, + doc_md=__doc__, +) +def cwms_usgs_ratings(): + office_ids = ["MVP"] + for office_id in office_ids: + with TaskGroup(group_id=f"{office_id}_usgs") as tg: + + @task(task_id=f"{office_id}_cwms_usgs_rating_byoffice") + def cwms_usgs_ratings_byoffice(office_id): + getusgs_rating_cda( + api_root=APIROOT, + office_id=office_id, + days_back=DAYSBACK, + api_key=APIKEY, + ) + + cwms_usgs_ratings_byoffice(office_id) + + +DAG_ = cwms_usgs_ratings() diff --git a/dags/wmes/CWMS_USGS_timeseries.py b/dags/wmes/CWMS_USGS_timeseries.py index ddf4883..9b8d305 100644 --- a/dags/wmes/CWMS_USGS_timeseries.py +++ b/dags/wmes/CWMS_USGS_timeseries.py @@ -41,7 +41,8 @@ def getusgs_cda(api_root, office_id, days_back, api_key): USGS_ts = get_CMWS_TS_Loc_Data(office_id) # grab all of the unique USGS stations numbers to be sent to USGS api - sites = USGS_ts.USGS_St_Num.unique() + sites = USGS_ts[USGS_ts["USGS_Method_TS"].isna()].USGS_St_Num.unique() + method_sites = USGS_ts[USGS_ts["USGS_Method_TS"].notna()].USGS_St_Num.unique() logging.info(f"Execution date {execution_date}") # This is added to the 'startDT' @@ -55,9 +56,18 @@ def getusgs_cda(api_root, office_id, days_back, api_key): endDT = execution_date + timedelta(hours=2) logging.info(f"Grabing data from USGS between {startDT} and {endDT}") - USGS_data = getUSGS_ts(sites, startDT, endDT) - CWMS_writeData(USGS_ts, USGS_data) + USGS_data = pd.DataFrame() + USGS_data_method = pd.DataFrame() + + if len(sites) > 0: + USGS_data = getUSGS_ts(sites, startDT, endDT) + # sites with a method_id or usgs tsid are retrieved from a seperate database. this is access using 3 as access in + # usgs API call. + if len(method_sites) > 0: + USGS_data_method = getUSGS_ts(method_sites, startDT, endDT, 3) + + CWMS_writeData(USGS_ts, USGS_data, USGS_data_method) def get_USGS_params(): @@ -101,7 +111,9 @@ def get_CMWS_TS_Loc_Data(office): df = cwms.get_timeseries_group( group_id="USGS TS Data Acquisition", category_id="Data Acquisition", - office_id="CWMS", + office_id=office, + category_office_id="CWMS", + group_office_id="CWMS", ).df df[["location-id", "param", "type", "int", "dur", "ver"]] = df[ @@ -116,11 +128,13 @@ def get_CMWS_TS_Loc_Data(office): df["attribute"] = np.nan df = df.rename(columns={"alias-id": "USGS_Method_TS"}) + # error in CDA with category_office_id and group_office_id. need to fix once CDA is updated Locdf = cwms.get_location_group( loc_group_id="USGS Station Number", category_id="Agency Aliases", office_id="CWMS", ).df.set_index("location-id") + Locdf = Locdf[Locdf["office-id"] == office] # Grab all of the locations that have a USGS station number assigned to them USGS_alias = Locdf[Locdf["alias-id"].notnull()] @@ -167,7 +181,7 @@ def get_CMWS_TS_Loc_Data(office): return USGS_ts -def getUSGS_ts(sites, startDT, endDT): +def getUSGS_ts(sites, startDT, endDT, access=None): """ Function to grab data from the USGS based off of dataretieve-python """ @@ -180,7 +194,7 @@ def getUSGS_ts(sites, startDT, endDT): "sites": ",".join(sites), "startDT": startDT.isoformat(), "endDT": endDT.isoformat(), - "access": 3, + "access": access, # "parameterCd": ",".join(unique_param_codes), # 'period': 'P1D', # "modifiedSince": "PT6H", @@ -202,7 +216,7 @@ def getUSGS_ts(sites, startDT, endDT): return USGS_data -def CWMS_writeData(USGS_ts, USGS_data): +def CWMS_writeData(USGS_ts, USGS_data, USGS_data_method): # lists to hold time series that fail # noData -> usgs location and parameter were present in USGS api but the values were empty # NotinAPI -> usgs location and parameter were not retrieved from USGS api @@ -224,9 +238,16 @@ def CWMS_writeData(USGS_ts, USGS_data): f"Attempting to write values for ts_id --> {ts_id},{USGS_Id_param}" ) values = pd.DataFrame() - if USGS_Id_param in USGS_data.index: + if (USGS_Id_param in USGS_data.index) or ( + USGS_Id_param in USGS_data_method.index + ): + if pd.isna(row.USGS_Method_TS): + USGS_data_row = USGS_data.loc[USGS_Id_param] + else: + USGS_data_row = USGS_data_method.loc[USGS_Id_param] + # grab the time series values obtained from USGS API. - values_df = pd.DataFrame(USGS_data.loc[USGS_Id_param]["values"]) + values_df = pd.DataFrame(USGS_data_row["values"]) if values_df.shape[0] > 1: if pd.isna(row.USGS_Method_TS): logging.warning( @@ -257,7 +278,7 @@ def CWMS_writeData(USGS_ts, USGS_data): ) else: # grab value and for no data (ie -999999) remove from dataset - nodata_val = USGS_data.loc[USGS_Id_param]["variable"]["noDataValue"] + nodata_val = USGS_data_row["variable"]["noDataValue"] values = values[values.value != str(int(nodata_val))] # check again if values dataframe is empty after removing nodata_vals if values.empty: @@ -276,7 +297,7 @@ def CWMS_writeData(USGS_ts, USGS_data): "qualifiers": "quality-code", } ) - units = USGS_data.loc[USGS_Id_param]["variable"]["unit"]["unitCode"] + units = USGS_data_row["variable"]["unit"]["unitCode"] office = row["office-id"] values["quality-code"] = 0 @@ -326,7 +347,7 @@ def CWMS_writeData(USGS_ts, USGS_data): doc_md=__doc__, ) def cwms_usgs_timeseries(): - office_ids = ["LRL", "SAJ", "SWF", "SWG", "MVM", "MVR"] + office_ids = ["LRL", "MVP", "SAJ", "SWF", "SWG"] for office_id in office_ids: with TaskGroup(group_id=f"{office_id}_usgs") as tg: diff --git a/plugins/cwms/__init__.py b/plugins/cwms/__init__.py index f1e9a48..b97c4c3 100644 --- a/plugins/cwms/__init__.py +++ b/plugins/cwms/__init__.py @@ -1,20 +1,35 @@ from importlib.metadata import PackageNotFoundError, version from cwms.api import * +from cwms.catalog.blobs import * from cwms.catalog.catalog import * +from cwms.catalog.clobs import * +from cwms.datafile_imports.shef_critfile_import import * from cwms.forecast.forecast_instance import * from cwms.forecast.forecast_spec import * from cwms.levels.location_levels import * from cwms.levels.specified_levels import * +from cwms.locations.gate_changes import * +from cwms.locations.location_groups import * from cwms.locations.physical_locations import * +from cwms.outlets.outlets import * +from cwms.outlets.virtual_outlets import * +from cwms.projects.project_lock_rights import * +from cwms.projects.project_locks import * +from cwms.projects.projects import * from cwms.ratings.ratings import * from cwms.ratings.ratings_spec import * from cwms.ratings.ratings_template import * from cwms.standard_text.standard_text import * -from cwms.timeseries.timerseries_identifier import * from cwms.timeseries.timeseries import * from cwms.timeseries.timeseries_bin import * +from cwms.timeseries.timeseries_group import * +from cwms.timeseries.timeseries_identifier import * +from cwms.timeseries.timeseries_profile import * +from cwms.timeseries.timeseries_profile_instance import * +from cwms.timeseries.timeseries_profile_parser import * from cwms.timeseries.timeseries_txt import * +from cwms.turbines.turbines import * try: __version__ = version("cwms-python") diff --git a/plugins/cwms/api.py b/plugins/cwms/api.py index 1547292..31afe02 100644 --- a/plugins/cwms/api.py +++ b/plugins/cwms/api.py @@ -1,4 +1,4 @@ -""" Session management and REST functions for CWMS Data API. +"""Session management and REST functions for CWMS Data API. This module provides functions for making REST calls to the CWMS Data API (CDA). These functions should be used internally to interact with the API. The user should not have to @@ -31,7 +31,7 @@ from json import JSONDecodeError from typing import Any, Optional, cast -from requests import Response +from requests import Response, adapters from requests_toolbelt import sessions # type: ignore from requests_toolbelt.sessions import BaseUrlSession # type: ignore @@ -41,8 +41,10 @@ API_ROOT = "https://cwms-data.usace.army.mil/cwms-data/" API_VERSION = 2 -# Initialize a non-authenticated session with the default root URL. +# Initialize a non-authenticated session with the default root URL and set default pool connections. SESSION = sessions.BaseUrlSession(base_url=API_ROOT) +adapter = adapters.HTTPAdapter(pool_connections=100, pool_maxsize=100) +SESSION.mount("https://", adapter) class InvalidVersion(Exception): @@ -91,7 +93,10 @@ def hint(self) -> str: def init_session( - *, api_root: Optional[str] = None, api_key: Optional[str] = None + *, + api_root: Optional[str] = None, + api_key: Optional[str] = None, + pool_connections: int = 100, ) -> BaseUrlSession: """Specify a root URL and authentication key for the CWMS Data API. @@ -112,7 +117,10 @@ def init_session( if api_root: logging.debug(f"Initializing root URL: api_root={api_root}") SESSION = sessions.BaseUrlSession(base_url=api_root) - + adapter = adapters.HTTPAdapter( + pool_connections=pool_connections, pool_maxsize=pool_connections + ) + SESSION.mount("https://", adapter) if api_key: logging.debug(f"Setting authorization key: api_key={api_key}") SESSION.headers.update({"Authorization": api_key}) @@ -183,6 +191,7 @@ def get_xml( headers = {"Accept": api_version_text(api_version)} response = SESSION.get(endpoint, params=params, headers=headers) + response.close() if response.status_code < 200 or response.status_code >= 300: logging.error(f"CDA Error: response={response}") @@ -220,7 +229,7 @@ def get( headers = {"Accept": api_version_text(api_version)} response = SESSION.get(endpoint, params=params, headers=headers) - + response.close() if response.status_code < 200 or response.status_code >= 300: logging.error(f"CDA Error: response={response}") raise ApiError(response) @@ -232,6 +241,46 @@ def get( return {} +def get_with_paging( + selector: str, + endpoint: str, + params: RequestParams, + *, + api_version: int = API_VERSION, +) -> JSON: + """Make a GET request to the CWMS Data API with paging. + + Args: + endpoint: The CDA endpoint for the record(s). + selector: The json key that will be merged though each page call + params (optional): Query parameters for the request. + + Keyword Args: + api_version (optional): The CDA version to use for the request. If not specified, + the default API_VERSION will be used. + + Returns: + The deserialized JSON response data. + + Raises: + ApiError: If an error response is return by the API. + """ + + first_pass = True + while (params["page"] is not None) or first_pass: + temp = get(endpoint, params, api_version=api_version) + if first_pass: + response = temp + else: + response[selector] = response[selector] + temp[selector] + if "next-page" in temp.keys(): + params["page"] = temp["next-page"] + else: + params["page"] = None + first_pass = False + return response + + def post( endpoint: str, data: Any, @@ -260,11 +309,11 @@ def post( # post requires different headers than get for headers = {"accept": "*/*", "Content-Type": api_version_text(api_version)} - if isinstance(data, dict): + if isinstance(data, dict) or isinstance(data, list): data = json.dumps(data) - response = SESSION.post(endpoint, params=params, - headers=headers, data=data) + response = SESSION.post(endpoint, params=params, headers=headers, data=data) + response.close() if response.status_code < 200 or response.status_code >= 300: logging.error(f"CDA Error: response={response}") @@ -300,11 +349,10 @@ def patch( if data is None: response = SESSION.patch(endpoint, params=params, headers=headers) else: - if isinstance(data, dict): + if isinstance(data, dict) or isinstance(data, list): data = json.dumps(data) - response = SESSION.patch( - endpoint, params=params, headers=headers, data=data) - + response = SESSION.patch(endpoint, params=params, headers=headers, data=data) + response.close() if response.status_code < 200 or response.status_code >= 300: logging.error(f"CDA Error: response={response}") raise ApiError(response) @@ -332,7 +380,7 @@ def delete( headers = {"Accept": api_version_text(api_version)} response = SESSION.delete(endpoint, params=params, headers=headers) - + response.close() if response.status_code < 200 or response.status_code >= 300: logging.error(f"CDA Error: response={response}") raise ApiError(response) diff --git a/plugins/cwms/catalog/blobs.py b/plugins/cwms/catalog/blobs.py new file mode 100644 index 0000000..3e62269 --- /dev/null +++ b/plugins/cwms/catalog/blobs.py @@ -0,0 +1,85 @@ +from typing import Optional + +import cwms.api as api +from cwms.cwms_types import JSON, Data + + +def get_blob(blob_id: str, office_id: str) -> Data: + """Get a single clob. + + Parameters + ---------- + blob_id: string + Specifies the id of the blob + office_id: string + Specifies the office of the blob. + + + Returns + ------- + cwms data type. data.json will return the JSON output and data.df will return a dataframe + """ + + endpoint = f"blobs/{blob_id}" + params = {"office": office_id} + response = api.get(endpoint, params, api_version=1) + return Data(response) + + +def get_blobs( + office_id: Optional[str] = None, + page_size: Optional[int] = 100, + blob_id_like: Optional[str] = None, +) -> Data: + """Get a subset of Blobs + + Parameters + ---------- + office_id: Optional[string] + Specifies the office of the blob. + page_sie: Optional[Integer] + How many entries per page returned. Default 100. + blob_id_like: Optional[string] + Posix regular expression matching against the clob id + + Returns + ------- + cwms data type. data.json will return the JSON output and data.df will return a dataframe + """ + + endpoint = "blobs" + params = {"office": office_id, "page-size": page_size, "like": blob_id_like} + + response = api.get(endpoint, params, api_version=1) + return Data(response, selector="blobs") + + +def store_blobs(data: JSON, fail_if_exists: Optional[bool] = True) -> None: + """Create New Blob + + Parameters + ---------- + Data: JSON dictionary + JSON containing information of Blob to be updated + { + "office-id": "string", + "id": "string", + "description": "string", + "media-type-id": "string", + "value": "string" + } + fail_if_exists: Boolean + Create will fail if provided ID already exists. Default: true + + Returns + ------- + None + """ + + if not isinstance(data, dict): + raise ValueError("Cannot store a Blob without a JSON data dictionary") + + endpoint = "blobs" + params = {"fail-if-exists": fail_if_exists} + + return api.post(endpoint, data, params, api_version=1) diff --git a/plugins/cwms/catalog/clobs.py b/plugins/cwms/catalog/clobs.py new file mode 100644 index 0000000..1650643 --- /dev/null +++ b/plugins/cwms/catalog/clobs.py @@ -0,0 +1,158 @@ +from typing import Optional + +import cwms.api as api +from cwms.cwms_types import JSON, Data + + +def get_clob(clob_id: str, office_id: str, clob_id_query: Optional[str] = None) -> Data: + """Get a single clob. + + Parameters + ---------- + clob_id: string + Specifies the id of the clob + office_id: string + Specifies the office of the clob. + clob_id_query: string + If this query parameter is provided the id path parameter is ignored and the + value of the query parameter is used. Note: this query parameter is necessary + for id's that contain '/' or other special characters. Because of abuse even + properly escaped '/' in url paths are blocked. When using this query parameter + a valid path parameter must still be provided for the request to be properly + routed. If your clob id contains '/' you can't specify the clob-id query + parameter and also specify the id path parameter because firewall and/or server + rules will deny the request even though you are specifying this override. "ignored" + is suggested. + + + Returns + ------- + cwms data type. data.json will return the JSON output and data.df will return a dataframe + """ + + endpoint = f"clobs/{clob_id}" + params = { + "office": office_id, + "clob-id-query": clob_id_query, + } + response = api.get(endpoint, params) + return Data(response) + + +def get_clobs( + office_id: Optional[str] = None, + page_size: Optional[int] = 100, + include_values: Optional[bool] = False, + clob_id_like: Optional[str] = None, +) -> Data: + """Get a subset of Clobs + + Parameters + ---------- + office_id: Optional[string] + Specifies the office of the clob. + page_sie: Optional[Integer] + How many entries per page returned. Default 100. + include_values: Optional[Boolean] + Do you want the value associated with this particular clob (default: false) + clob_id_like: Optional[string] + Posix regular expression matching against the clob id + + Returns + ------- + cwms data type. data.json will return the JSON output and data.df will return a dataframe + """ + + endpoint = "clobs" + params = { + "office": office_id, + "page-size": page_size, + "include-values": include_values, + "like": clob_id_like, + } + + response = api.get(endpoint, params) + return Data(response, selector="clobs") + + +def delete_clob(clob_id: str, office_id: str) -> None: + """Deletes requested clob + + Parameters + ---------- + clob_id: string + Specifies the id of the clob to be deleted + office_id: string + Specifies the office of the clob. + + Returns + ------- + None + """ + + endpoint = f"clobs/{clob_id}" + params = {"office": office_id} + + return api.delete(endpoint, params=params, api_version=1) + + +def update_clob(data: JSON, clob_id: str, ignore_nulls: Optional[bool] = True) -> None: + """Updates clob + + Parameters + ---------- + Data: JSON dictionary + JSON containing information of Clob to be updated + { + "office-id": "string", + "id": "string", + "description": "string", + "value": "string" + } + clob_id: string + Specifies the id of the clob to be deleted + ignore_nulls: Boolean + If true, null and empty fields in the provided clob will be ignored and the existing value of those fields left in place. Default: true + + Returns + ------- + None + """ + + if not isinstance(data, dict): + raise ValueError("Cannot store a Clob without a JSON data dictionary") + + endpoint = f"clobs/{clob_id}" + params = {"ignore-nulls": ignore_nulls} + + return api.patch(endpoint, data, params, api_version=1) + + +def store_clobs(data: JSON, fail_if_exists: Optional[bool] = True) -> None: + """Create New Clob + + Parameters + ---------- + Data: JSON dictionary + JSON containing information of Clob to be updated + { + "office-id": "string", + "id": "string", + "description": "string", + "value": "string" + } + fail_if_exists: Boolean + Create will fail if provided ID already exists. Default: true + + Returns + ------- + None + """ + + if not isinstance(data, dict): + raise ValueError("Cannot store a Clob without a JSON data dictionary") + + endpoint = "clobs" + params = {"fail-if-exists": fail_if_exists} + + return api.post(endpoint, data, params, api_version=1) diff --git a/plugins/cwms/cwms_types.py b/plugins/cwms/cwms_types.py index f5296a3..6eb18e6 100644 --- a/plugins/cwms/cwms_types.py +++ b/plugins/cwms/cwms_types.py @@ -2,7 +2,7 @@ from enum import Enum, auto from typing import Any, Optional -from pandas import DataFrame, Index, json_normalize, to_datetime +from pandas import DataFrame, Index, json_normalize, to_datetime, to_numeric # Describes generic JSON serializable data. JSON = dict[str, Any] @@ -51,27 +51,48 @@ def to_df(json: JSON, selector: Optional[str]) -> DataFrame: A data frame containing the data located """ - data = deepcopy(json) - - if selector: + def get_df_data(data: JSON, selector: str) -> JSON: + # get the data that will be stored in the dataframe using the selectors df_data = data for key in selector.split("."): if key in df_data.keys(): df_data = df_data[key] + return df_data + + def rating_type(data: JSON) -> DataFrame: + # grab the correct point values for a rating table + df = DataFrame(data["point"]) if data["point"] else DataFrame() + df = df.apply(to_numeric) + return df + + def timeseries_type(orig_json: JSON, value_json: JSON) -> DataFrame: + # if timeseries values are present then grab the values and put into + # dataframe else create empty dataframe + columns = Index([sub["name"] for sub in orig_json["value-columns"]]) + if value_json: + df = DataFrame(value_json) + df.columns = columns + else: + df = DataFrame(columns=columns) + + if "date-time" in df.columns: + df["date-time"] = to_datetime(df["date-time"], unit="ms", utc=True) + return df + + data = deepcopy(json) + + if selector: + df_data = get_df_data(data, selector) # if the dataframe is for a rating table if ("rating-points" in selector) and ("point" in df_data.keys()): - df = DataFrame(df_data["point"]) + df = rating_type(df_data) elif selector == "values": - df = DataFrame(df_data) - # if timeseries values are present then grab the values and put into dataframe - df.columns = Index([sub["name"] for sub in data["value-columns"]]) + df = timeseries_type(data, df_data) - if "date-time" in df.columns: - df["date-time"] = to_datetime(df["date-time"], unit="ms", utc=True) else: - df = json_normalize(df_data) + df = json_normalize(df_data) if df_data else DataFrame() else: df = json_normalize(data) @@ -81,7 +102,7 @@ def to_df(json: JSON, selector: Optional[str]) -> DataFrame: def df(self) -> DataFrame: """Return the data frame.""" - if type(self._df) != DataFrame: + if not isinstance(self._df, DataFrame): self._df = Data.to_df(self.json, self.selector) return self._df diff --git a/plugins/cwms/datafile_imports/shef_critfile_import.py b/plugins/cwms/datafile_imports/shef_critfile_import.py new file mode 100644 index 0000000..11dc966 --- /dev/null +++ b/plugins/cwms/datafile_imports/shef_critfile_import.py @@ -0,0 +1,130 @@ +import re +from typing import Dict, List + +import pandas as pd + +import cwms + + +def import_critfile_to_ts_group( + file_path: str, + office_id: str, + group_id: str = "SHEF Data Acquisition", + category_id: str = "Data Acquisition", + group_office_id: str = "CWMS", + category_office_id: str = "CWMS", + replace_assigned_ts: bool = False, +) -> None: + """ + Processes a .crit file and saves the information to the SHEF Data Acquisition time series group. + + Parameters + ---------- + file_path : str + Path to the .crit file. + office_id : str + The ID of the office associated with the specified timeseries. + group_id : str, optional + The specified group associated with the timeseries data. Defaults to "SHEF Data Acquisition". + category_id : str, optional + The category ID that contains the timeseries group. Defaults to "Data Acquisition". + group_office_id : str, optional + The specified office group associated with the timeseries data. Defaults to "CWMS". + replace_assigned_ts : bool, optional + Specifies whether to unassign all existing time series before assigning new time series specified in the content body. Default is False. + + Returns + ------- + None + """ + + def parse_crit_file(file_path: str) -> List[Dict[str, str]]: + """ + Parses a .crit file into a dictionary containing timeseries ID and Alias. + + Parameters + ---------- + file_path : str + Path to the .crit file. + + Returns + ------- + List[Dict[str, str]] + A list of dictionaries with "Alias" and "Timeseries ID" as keys. + """ + parsed_data = [] + with open(file_path, "r") as file: + for line in file: + # Ignore comment lines and empty lines + if line.startswith("#") or not line.strip(): + continue + + # Extract alias, timeseries ID, and TZ + match = re.match(r"([^=]+)=([^;]+);(.+)", line.strip()) + + if match: + alias = match.group(1).strip() + timeseries_id = match.group(2).strip() + alias2 = match.group(3).strip() + + parsed_data.append( + { + "Alias": alias + ":" + alias2, + "Timeseries ID": timeseries_id, + } + ) + + return parsed_data + + def append_df( + df: pd.DataFrame, office_id: str, ts_id: str, alias: str + ) -> pd.DataFrame: + """ + Appends a row to the DataFrame. + + Parameters + ---------- + df : pandas.DataFrame + The DataFrame to append to. + office_id : str + The ID of the office associated with the specified timeseries. + tsId : str + The timeseries ID from the file. + alias : str + The alias from the file. + Returns + ------- + pandas.DataFrame + The updated DataFrame. + """ + data = { + "office-id": [office_id], + "timeseries-id": [ts_id], + "alias-id": [alias], + } + df = pd.concat([df, pd.DataFrame(data)]) + return df + + # Parse the file and get the parsed data + parsed_data = parse_crit_file(file_path) + + df = pd.DataFrame() + for data in parsed_data: + # Create DataFrame for the current row + df = append_df(df, office_id, data["Timeseries ID"], data["Alias"]) + + # Generate JSON dictionary + json_dict = cwms.timeseries_group_df_to_json( + data=df, + group_id=group_id, + group_office_id=group_office_id, + category_office_id=category_office_id, + category_id=category_id, + ) + + cwms.update_timeseries_groups( + group_id=group_id, + office_id=office_id, + replace_assigned_ts=replace_assigned_ts, + data=json_dict, + ) diff --git a/plugins/cwms/forecast/forecast_instance.py b/plugins/cwms/forecast/forecast_instance.py index 801163c..ed4e6ae 100644 --- a/plugins/cwms/forecast/forecast_instance.py +++ b/plugins/cwms/forecast/forecast_instance.py @@ -99,8 +99,7 @@ def get_forecast_instance( if designator is None: raise ValueError("Retrieve a forecast instance requires a designator") if forecast_date is None: - raise ValueError( - "Retrieve a forecast instance requires a forecast date") + raise ValueError("Retrieve a forecast instance requires a forecast date") if issue_date is None: raise ValueError("Retrieve a forecast instance requires a issue date") @@ -143,8 +142,7 @@ def store_forecast_instance(data: JSON) -> None: If a 500 range error code response is returned from the server. """ if data is None: - raise ValueError( - "Storing a forecast instance requires a JSON data dictionary") + raise ValueError("Storing a forecast instance requires a JSON data dictionary") endpoint = "forecast-instance" return api.post(endpoint, data, params=None) @@ -195,8 +193,7 @@ def delete_forecast_instance( if designator is None: raise ValueError("Deleting a forecast instance requires a designator") if forecast_date is None: - raise ValueError( - "Deleting a forecast instance requires a forecast date") + raise ValueError("Deleting a forecast instance requires a forecast date") if issue_date is None: raise ValueError("Deleting a forecast instance requires a issue date") diff --git a/plugins/cwms/forecast/forecast_spec.py b/plugins/cwms/forecast/forecast_spec.py index 8d36543..b87b36d 100644 --- a/plugins/cwms/forecast/forecast_spec.py +++ b/plugins/cwms/forecast/forecast_spec.py @@ -125,8 +125,7 @@ def store_forecast_spec(data: JSON) -> None: If a 500 range error code response is returned from the server. """ if data is None: - raise ValueError( - "Storing a forecast spec requires a JSON data dictionary") + raise ValueError("Storing a forecast spec requires a JSON data dictionary") endpoint = "forecast-spec" return api.post(endpoint, data) diff --git a/plugins/cwms/levels/location_levels.py b/plugins/cwms/levels/location_levels.py index f85e6a5..d3aea2e 100644 --- a/plugins/cwms/levels/location_levels.py +++ b/plugins/cwms/levels/location_levels.py @@ -101,11 +101,9 @@ def get_location_level( """ if level_id is None: - raise ValueError( - "Cannot retrieve a single location level without an id") + raise ValueError("Cannot retrieve a single location level without an id") if office_id is None: - raise ValueError( - "Cannot retrieve a single location level without an office id") + raise ValueError("Cannot retrieve a single location level without an office id") if effective_date is None: raise ValueError( "Cannot retrieve a single location level without an effective date" @@ -131,8 +129,7 @@ def store_location_level(data: JSON) -> None: """ if data is None: - raise ValueError( - "Cannot store a location level without a JSON data dictionary") + raise ValueError("Cannot store a location level without a JSON data dictionary") endpoint = "levels" return api.post(endpoint, data, params=None) diff --git a/plugins/cwms/levels/specified_levels.py b/plugins/cwms/levels/specified_levels.py index 68aeec5..592df72 100644 --- a/plugins/cwms/levels/specified_levels.py +++ b/plugins/cwms/levels/specified_levels.py @@ -86,8 +86,7 @@ def delete_specified_level(specified_level_id: str, office_id: str) -> None: if specified_level_id is None: raise ValueError("Cannot delete a specified level without an id") if office_id is None: - raise ValueError( - "Cannot delete a specified level without an office id") + raise ValueError("Cannot delete a specified level without an office id") endpoint = f"specified-levels/{specified_level_id}" params = {"office": office_id} @@ -117,8 +116,7 @@ def update_specified_level( if new_specified_level_id is None: raise ValueError("Cannot update a specified level without a new id") if office_id is None: - raise ValueError( - "Cannot update a specified level without an office id") + raise ValueError("Cannot update a specified level without an office id") endpoint = f"specified-levels/{old_specified_level_id}" params = { diff --git a/plugins/cwms/locations/gate_changes.py b/plugins/cwms/locations/gate_changes.py new file mode 100644 index 0000000..6ce8473 --- /dev/null +++ b/plugins/cwms/locations/gate_changes.py @@ -0,0 +1,185 @@ +# Copyright (c) 2024 +# United States Army Corps of Engineers - Hydrologic Engineering Center (USACE/HEC) +# All Rights Reserved. USACE PROPRIETARY/CONFIDENTIAL. +# Source may not be released without written approval from HEC + +from datetime import datetime +from typing import Optional + +import cwms.api as api +from cwms.cwms_types import JSON, Data + + +def get_all_gate_changes( + office_id: str, + project_id: str, + begin: datetime, + end: datetime, + start_time_inclusive: Optional[bool] = True, + end_time_inclusive: Optional[bool] = False, + unit_system: Optional[str] = "EN", + page_size: Optional[int] = 500, +) -> Data: + """ + Returns all gate changes for a project within a specified time range. + + Parameters + ---------- + office_id: string + The owning office of the project + project_id: string + The project identifier + begin: datetime + The beginning of the time range + end: datetime + The end of the time range + start_time_inclusive: boolean, optional + Whether the returned gate changes should include data from the specified start timestamp. Default is `True`. + end_time_inclusive: boolean, optional + Whether the returned gate changes should include data from the specified end timestamp. Default is `False`. + unit_system: string, optional + The unit system to use for the gate changes. Can be SI (International Scientific) or EN (Imperial.) + Default is `EN`. + page_size: integer, optional + The maximum number of gate changes to retrieve, regardless of time window. A positive integer is + interpreted as the maximum number of changes from the beginning of the time window. + A negative integer is interpreted as the maximum number from the end of the time window. + Default 500. A page cursor will not be returned by this DTO. Instead, the next page can be + determined by querying the next set of changes using the last returned change date + and using start-time-inclusive=False. + + Returns + ------- + cwms data type + """ + + endpoint = f"projects/{office_id}/{project_id}/gate-changes" + params = { + "begin": begin.isoformat() if begin else None, + "end": end.isoformat() if end else None, + "start-time-inclusive": start_time_inclusive, + "end-time-inclusive": end_time_inclusive, + "unit-system": unit_system, + "page-size": page_size, + } + response = api.get(endpoint, params=params) + return Data(response) + + +def store_gate_change( + gate_change_data: JSON, fail_if_exists: Optional[bool] = True +) -> None: + """ + Creates a gate change. + + Parameters + ---------- + gate_change_data: JSON + The gate change data to insert into the database. + The data must be in JSON format as an array. + + Example: + [{ + "type": "gate-change", + "project-id": { + "office-id": "SPK", + "name": "BIGH" + }, + "change-date": 1704096000000, + "pool-elevation": 3.0, + "protected": true, + "discharge-computation-type": { + "office-id": "CWMS", + "display-value": "A", + "tooltip": "Adjusted by an automated method", + "active": true + }, + "reason-type": { + "office-id": "CWMS", + "display-value": "O", + "tooltip": "Other release", + "active": true + }, + "notes": "Test notes", + "new-total-discharge-override": 1.0, + "old-total-discharge-override": 2.0, + "discharge-units": "cfs", + "tailwater-elevation": 4.0, + "elevation-units": "ft", + "settings": [ + { + "type": "gate-setting", + "location-id": { + "office-id": "SPK", + "name": "BIGH-TG1" + }, + "opening": 0.0, + "opening-parameter": "Opening", + "invert-elevation": 1.0, + "opening-units": "ft" + }, + { + "type": "gate-setting", + "location-id": { + "office-id": "SPK", + "name": "TG2" + }, + "opening": 0.0, + "opening-parameter": "Opening", + "invert-elevation": 1.0, + "opening-units": "ft" + } + ] + }] + + fail_if_exists: boolean, optional + Whether to fail if the gate change already exists. Default is `True`. + + Returns + ------- + cwms data type + """ + + endpoint = "projects/gate-changes" + params = { + "fail-if-exists": fail_if_exists, + } + return api.post(endpoint, data=gate_change_data, params=params) + + +def delete_gate_change( + office_id: str, + project_id: str, + begin: datetime, + end: datetime, + override_protection: Optional[bool] = False, +) -> None: + """ + Deletes a gate change. + + Parameters + ---------- + office_id: string + The owning office of the gate change. + project_id: string + The project identifier. + begin: datetime + The beginning of the time range. + end: datetime + The end of the time range. + override_protection: boolean, optional + Whether to enable override protection for the gate change. Default is `False`. + + Returns + ------- + None + """ + + endpoint = f"projects/{office_id}/{project_id}/gate-changes" + params = { + "begin": begin.isoformat() if begin else None, + "end": end.isoformat() if end else None, + "override-protection": override_protection, + } + + return api.delete(endpoint, params=params) diff --git a/plugins/cwms/locations/location_groups.py b/plugins/cwms/locations/location_groups.py new file mode 100644 index 0000000..cbbf650 --- /dev/null +++ b/plugins/cwms/locations/location_groups.py @@ -0,0 +1,166 @@ +from typing import Optional + +import pandas as pd +from pandas import DataFrame + +import cwms.api as api +from cwms.cwms_types import JSON, Data + + +def get_location_group( + loc_group_id: str, + category_id: str, + office_id: str, + group_office_id: Optional[str] = None, + category_office_id: Optional[str] = None, +) -> Data: + """Retreives time series stored in the requested time series group + + Parameters + ---------- + group_id: string + Location group whose data is to be included in the response. + category_id: string + The category id that contains the Location group. + office_id: string + The owning office of the Locations assigned to the group whose data is to be included in the response. + group_office_id: string + Specifies the owning office of the Location group. + category_office_id: string + Specifies the owning office of the Location group category. + + Returns + ------- + cwms data type. data.json will return the JSON output and data.df will return a dataframe + """ + + endpoint = f"location/group/{loc_group_id}" + params = { + "office": office_id, + "category-id": category_id, + "category-office-id": category_office_id, + "group-office-id": group_office_id, + } + + response = api.get(endpoint, params, api_version=1) + return Data(response, selector="assigned-locations") + + +def get_location_groups( + office_id: Optional[str] = None, + include_assigned: Optional[bool] = True, + location_category_like: Optional[str] = None, + location_office_id: Optional[str] = None, + category_office_id: Optional[str] = None, +) -> Data: + """ + Retreives a list of location groups. + + Parameters + ---------- + office_id: string + Specifies the owning office of the location group whose data is to be included in the response.. + include_assigned: Boolean + Include the assigned location in the returned timeseries groups. (default: true) + location_category_like: string + Posix regular expression matching against the location category id + location_office_id: String + Specifies the owning office of the location assigned to the location group whose data is to be included in the response. + category_office_id: string + Specifies the owning office of the category the location group belongs to whose data is to be included in the response. + Returns + ------- + cwms data type. data.json will return the JSON output and data.df will return a dataframe + """ + + endpoint = "location/group" + params = { + "office": office_id, + "include-assigned": include_assigned, + "location-category-like": location_category_like, + "location-office-id": location_office_id, + "category-office-id": category_office_id, + } + response = api.get(endpoint=endpoint, params=params, api_version=1) + return Data(response) + + +def store_location_groups(data: JSON) -> None: + """ + Create new Location Group + Parameters + ---------- + data: JSON dictionary + location group data to be stored. + + Returns + ------- + None + """ + + if data is None: + raise ValueError("Cannot store a standard text without timeseries group JSON") + + endpoint = "location/group" + + return api.post(endpoint=endpoint, data=data, api_version=1) + + +def update_location_group( + data: JSON, + group_id: str, + office_id: str, + replace_assigned_locs: Optional[bool] = False, +) -> None: + """ + Updates the location groups with the provided group ID and office ID. + + Parameters + ---------- + group_id : str + The group if of the location to be updated + office_id : str + The ID of the office associated with the specified location group. + replace_assigned_ts : bool, optional + Specifies whether to unassign all existing locations before assigning new locations specified in the content body. Default is False. + data: JSON dictionary + Location Group data to be stored. + + Returns + ------- + None + """ + + endpoint = f"location/group/{group_id}" + params = { + "replace-assigned-locs": replace_assigned_locs, + "office": office_id, + } + + api.patch(endpoint=endpoint, data=data, params=params, api_version=1) + + +def delete_location_group(group_id: str, category_id: str, office_id: str) -> None: + """Deletes requested time series group + + Parameters + ---------- + group_id: string + The location group to be deleted + category_id: string + Specifies the location category of the location group to be deleted + office_id: string + Specifies the owning office of the location group to be deleted + + Returns + ------- + None + """ + + endpoint = f"location/group/{group_id}" + params = { + "office": office_id, + "category-id": category_id, + } + + return api.delete(endpoint, params=params, api_version=1) diff --git a/plugins/cwms/locations/physical_locations.py b/plugins/cwms/locations/physical_locations.py index 64108ef..0b44998 100644 --- a/plugins/cwms/locations/physical_locations.py +++ b/plugins/cwms/locations/physical_locations.py @@ -7,14 +7,6 @@ from cwms.cwms_types import JSON, Data -def get_location_group(loc_group_id: str, category_id: str, office_id: str) -> Data: - endpoint = f"location/group/{loc_group_id}" - params = {"office": office_id, "category-id": category_id} - - response = api.get(endpoint, params, api_version=1) - return Data(response, selector="assigned-locations") - - def get_location(location_id: str, office_id: str, unit: str = "EN") -> Data: """ Get location data for a single location @@ -46,20 +38,44 @@ def get_location(location_id: str, office_id: str, unit: str = "EN") -> Data: def get_locations( office_id: Optional[str] = None, - loc_ids: Optional[str] = None, - units: Optional[str] = None, + location_ids: Optional[str] = None, + units: Optional[str] = "EN", datum: Optional[str] = None, ) -> Data: + """ + Get location data for a single location + + Parameters + ---------- + location_id: str + Specifies the name(s) of the location(s) whose data is to be included in the response. This parameter is a Posix regular expression matching against the id + office_id : str + The ID of the office that the locations belongs to. + unit: string, optional, default is EN + The unit or unit system of the response. Defaults to EN. Valid values + for the unit field are: + 1. EN. English unit system. + 2. SI. SI unit system. + 3. Other. + Datum: string, optional, default is None + Specifies the elevation datum of the response. This field affects only vertical datum. Valid values for this field are: + 1.) NAVD88 The elevation values will in the specified or default units above the NAVD-88 datum. + 2.) NGVD29 The elevation values will be in the specified or default units above the NGVD-29 datum. + Returns + ------- + cwms data type. data.json will return the JSON output and data.df will return a dataframe + + """ endpoint = "locations" params = { "office": office_id, - "names": loc_ids, + "names": location_ids, "units": units, "datum": datum, } response = api.get(endpoint, params) - return Data(response, selector="locations.locations") + return Data(response) def ExpandLocations(df: DataFrame) -> DataFrame: diff --git a/plugins/cwms/outlets/outlets.py b/plugins/cwms/outlets/outlets.py new file mode 100644 index 0000000..7f1a473 --- /dev/null +++ b/plugins/cwms/outlets/outlets.py @@ -0,0 +1,195 @@ +# Copyright (c) 2024 +# United States Army Corps of Engineers - Hydrologic Engineering Center (USACE/HEC) +# All Rights Reserved. USACE PROPRIETARY/CONFIDENTIAL. +# Source may not be released without written approval from HEC +from typing import Optional + +import cwms.api as api +from cwms.cwms_types import JSON, Data, DeleteMethod + + +def get_outlet(office_id: str, name: str) -> Data: + """ + Parameters + ---------- + name : str + The ID of the outlet. + office_id : str + The ID of the office. + + Returns + ------- + response : dict + the JSON response from CWMS Data API. + + Raises + ------ + ValueError + If any of name or office_id is None. + ClientError + If a 400 range error code response is returned from the server. + NoDataFoundError + If a 404 range error code response is returned from the server. + ServerError + If a 500 range error code response is returned from the server. + """ + + if name is None: + raise ValueError("Retrieve outlet requires a name") + if office_id is None: + raise ValueError("Retrieve outlet requires an office") + + endpoint = f"projects/outlets/{name}" + params = {"office": office_id} + response = api.get(endpoint, params) + return Data(response) + + +def get_outlets(office_id: str, project_id: str) -> Data: + """ + Parameters + ---------- + project_id : str + The project ID of the outlets. + office_id : str + The ID of the project's office. + + Returns + ------- + response : dict + the JSON response from CWMS Data API. + + Raises + ------ + ValueError + If any of project_id or office_id is None. + ClientError + If a 400 range error code response is returned from the server. + NoDataFoundError + If a 404 range error code response is returned from the server. + ServerError + If a 500 range error code response is returned from the server. + """ + + if project_id is None: + raise ValueError("Retrieve outlets requires a project id") + if office_id is None: + raise ValueError("Retrieve outlets requires an office") + + endpoint = "projects/outlets" + params = {"office": office_id, "project-id": project_id} + response = api.get(endpoint, params) + return Data(response) + + +def delete_outlet(office_id: str, name: str, delete_method: DeleteMethod) -> None: + """ + Parameters + ---------- + name : str + The name of the outlets. + office_id : str + The ID of the project's office. + delete_method: DeleteMethod + The method to use to delete the outlet. + + Returns + ------- + None + + Raises + ------ + ValueError + If any of name, delete_method, or office_id is None. + ClientError + If a 400 range error code response is returned from the server. + NoDataFoundError + If a 404 range error code response is returned from the server. + ServerError + If a 500 range error code response is returned from the server. + """ + + if name is None: + raise ValueError("Delete outlet requires an outlet name") + if office_id is None: + raise ValueError("Delete outlet requires an office") + if delete_method is None: + raise ValueError("Delete outlet requires a delete method") + + endpoint = f"projects/outlets/{name}" + params = {"office": office_id, "method": delete_method.name} + api.delete(endpoint, params) + + +def rename_outlet(office_id: str, old_name: str, new_name: str) -> None: + """ + Parameters + ---------- + old_name : str + The name of the outlet to rename. + new_name : str + The new name of the outlet. + office_id : str + The ID of the project's office. + + Returns + ------- + None + + Raises + ------ + ValueError + If any of old_outlet_name, new_outlet_name, or office_id is None. + ClientError + If a 400 range error code response is returned from the server. + NoDataFoundError + If a 404 range error code response is returned from the server. + ServerError + If a 500 range error code response is returned from the server. + """ + + if old_name is None: + raise ValueError("Rename outlet requires the original outlet name") + if new_name is None: + raise ValueError("Rename outlet requires a new outlet name") + if office_id is None: + raise ValueError("Rename outlet requires an office") + + endpoint = f"projects/outlets/{old_name}" + params = {"office": office_id, "name": new_name} + api.patch(endpoint=endpoint, params=params) + + +def store_outlet(data: JSON, fail_if_exists: Optional[bool] = True) -> None: + """ + Parameters + ---------- + data : dict + A dictionary representing the JSON data to be stored. + If the `data` value is None, a `ValueError` will be raised. + fail_if_exists : str, optional + A boolean value indicating whether to fail if the outlet already exists. + Default is True. + + Returns + ------- + None + + Raises + ------ + ValueError + If any of data is None. + ClientError + If a 400 range error code response is returned from the server. + NoDataFoundError + If a 404 range error code response is returned from the server. + ServerError + If a 500 range error code response is returned from the server. + """ + + if data is None: + raise ValueError("Cannot store an outlet without a JSON data dictionary") + + endpoint = "projects/outlets" + params = {"fail-if-exists": fail_if_exists} + api.post(endpoint, data, params) diff --git a/plugins/cwms/outlets/virtual_outlets.py b/plugins/cwms/outlets/virtual_outlets.py new file mode 100644 index 0000000..e65d7e8 --- /dev/null +++ b/plugins/cwms/outlets/virtual_outlets.py @@ -0,0 +1,164 @@ +# Copyright (c) 2024 +# United States Army Corps of Engineers - Hydrologic Engineering Center (USACE/HEC) +# All Rights Reserved. USACE PROPRIETARY/CONFIDENTIAL. +# Source may not be released without written approval from HEC +from typing import Optional + +import cwms.api as api +from cwms.cwms_types import JSON, Data, DeleteMethod + + +def get_virtual_outlet(office_id: str, project_id: str, name: str) -> Data: + """ + Parameters + ---------- + name : str + The ID of the virtual outlet. + project_id: + The project for the virtual outlet. + office_id : str + The ID of the office. + + Returns + ------- + response : dict + the JSON response from CWMS Data API. + + Raises + ------ + ValueError + If any of name, project_id, or office_id is None. + ClientError + If a 400 range error code response is returned from the server. + NoDataFoundError + If a 404 range error code response is returned from the server. + ServerError + If a 500 range error code response is returned from the server. + """ + + if name is None: + raise ValueError("Retrieve virtual outlet requires a name") + if project_id is None: + raise ValueError("Retrieve virtual outlet requires a project id") + if office_id is None: + raise ValueError("Retrieve virtual outlet requires an office") + + endpoint = f"projects/{office_id}/{project_id}/virtual-outlets/{name}" + response = api.get(endpoint) + return Data(response) + + +def get_virtual_outlets(office_id: str, project_id: str) -> Data: + """ + Parameters + ---------- + project_id: + The project for the virtual outlets. + office_id : str + The ID of the office. + + Returns + ------- + response : dict + the JSON response from CWMS Data API. + + Raises + ------ + ValueError + If any of project_id, or office_id is None. + ClientError + If a 400 range error code response is returned from the server. + NoDataFoundError + If a 404 range error code response is returned from the server. + ServerError + If a 500 range error code response is returned from the server. + """ + + if project_id is None: + raise ValueError("Retrieve virtual outlets requires a project id") + if office_id is None: + raise ValueError("Retrieve virtual outlets requires an office") + + endpoint = f"projects/{office_id}/{project_id}/virtual-outlets" + response = api.get(endpoint) + return Data(response) + + +def delete_virtual_outlet( + office_id: str, project_id: str, name: str, delete_method: DeleteMethod +) -> None: + """ + Parameters + ---------- + name : str + The name of the virtual outlet. + project_id: + The project for the virtual outlet. + office_id : str + The ID of the virtual outlet's office. + delete_method: DeleteMethod + The method to use to delete the virtual outlet. + + Returns + ------- + None + + Raises + ------ + ValueError + If any of name, project_id, delete_method, or office_id is None. + ClientError + If a 400 range error code response is returned from the server. + NoDataFoundError + If a 404 range error code response is returned from the server. + ServerError + If a 500 range error code response is returned from the server. + """ + + if name is None: + raise ValueError("Delete virtual outlet requires an outlet name") + if project_id is None: + raise ValueError("Delete virtual outlet requires a project id") + if office_id is None: + raise ValueError("Delete virtual outlet requires an office") + if delete_method is None: + raise ValueError("Delete virtual outlet requires a delete method") + + endpoint = f"projects/{office_id}/{project_id}/virtual-outlets/{name}" + params = {"method": delete_method.name} + api.delete(endpoint, params) + + +def store_virtual_outlet(data: JSON, fail_if_exists: Optional[bool] = True) -> None: + """ + Parameters + ---------- + data : dict + A dictionary representing the JSON data to be stored. + If the `data` value is None, a `ValueError` will be raised. + fail_if_exists : str, optional + A boolean value indicating whether to fail if + the virtual outlet already exists. Default is True. + + Returns + ------- + None + + Raises + ------ + ValueError + If any of data is None. + ClientError + If a 400 range error code response is returned from the server. + NoDataFoundError + If a 404 range error code response is returned from the server. + ServerError + If a 500 range error code response is returned from the server. + """ + + if data is None: + raise ValueError("Cannot store an outlet without a JSON data dictionary") + + endpoint = "projects/virtual-outlets" + params = {"fail-if-exists": fail_if_exists} + api.post(endpoint, data, params) diff --git a/plugins/cwms/projects/project_lock_rights.py b/plugins/cwms/projects/project_lock_rights.py new file mode 100644 index 0000000..9802f15 --- /dev/null +++ b/plugins/cwms/projects/project_lock_rights.py @@ -0,0 +1,151 @@ +# Copyright (c) 2024 +# United States Army Corps of Engineers - Hydrologic Engineering Center (USACE/HEC) +# All Rights Reserved. USACE PROPRIETARY/CONFIDENTIAL. +# Source may not be released without written approval from HEC +from typing import Optional + +import cwms.api as api +from cwms.cwms_types import Data + + +def get_project_lock_rights( + office_mask: str, + project_mask: Optional[str] = None, + application_mask: Optional[str] = None, +) -> Data: + """ + Parameters + ---------- + office_mask : str + Specifies the office mask to be used + to filter the lock revoker rights. + project_mask : Optional[str] + Specifies the project mask to be used + to filter the lock revoker rights. + application_mask : Optional[str] + Specifies the application mask to be used + to filter the lock revoker rights. + + Returns + ------- + response : dict + the JSON response from CWMS Data API. + + Raises + ------ + ValueError + If office_mask is None. + ClientError + If a 400 range error code response is returned from the server. + NoDataFoundError + If a 404 range error code response is returned from the server. + ServerError + If a 500 range error code response is returned from the server. + """ + + endpoint = "project-lock-rights" + params = { + "office-mask": office_mask, + "project-mask": project_mask, + "application-mask": application_mask, + } + response = api.get(endpoint, params) + return Data(response) + + +def remove_all_project_lock_rights( + office_id: str, application_id: str, user_id: str +) -> None: + """ + Parameters + ---------- + office_id : str + Specifies the session office. + application_id : str + Specifies the application id. + user_id : str + Specifies the user. + + Returns + ------- + None + + Raises + ------ + ValueError + If any of office_id, application_id, or user_id is None. + ClientError + If a 400 range error code response is returned from the server. + NoDataFoundError + If a 404 range error code response is returned from the server. + ServerError + If a 500 range error code response is returned from the server. + """ + + if office_id is None: + raise ValueError("Remove project lock rights requires an office") + if application_id is None: + raise ValueError("Remove project lock rights requires an application") + if user_id is None: + raise ValueError("Remove project lock rights requires a user") + + endpoint = "project-lock-rights/remove-all" + params = {"office": office_id, "application-id": application_id, "user-id": user_id} + api.post(endpoint, None, params) + + +def update_project_lock_rights( + office_id: str, + application_id: str, + user_id: str, + allow: bool, + project_mask: Optional[str] = None, +) -> None: + """ + Parameters + ---------- + office_id : str + The ID of the office owning the project lock + application_id : str + Specifies the application id. + user_id : str + Specifies the user. + allow : bool + True to add the user to the allow list, False to add to the deny list + project_mask : str + Specifies the project mask to be used. + + Returns + ------- + None + + Raises + ------ + ValueError + If any of office_id, application_id, user_id, or allow is None. + ClientError + If a 400 range error code response is returned from the server. + NoDataFoundError + If a 404 range error code response is returned from the server. + ServerError + If a 500 range error code response is returned from the server. + """ + + if office_id is None: + raise ValueError("Update project lock rights requires an office") + if application_id is None: + raise ValueError("Update project lock rights requires an application") + if user_id is None: + raise ValueError("Update project lock rights requires a user") + if allow is None: + raise ValueError("Update project lock rights requires a allow flag") + + endpoint = "project-lock-rights/update" + params = { + "office": office_id, + "application-id": application_id, + "user-id": user_id, + "allow": allow, + "project-mask": project_mask, + } + api.post(endpoint, None, params) diff --git a/plugins/cwms/projects/project_locks.py b/plugins/cwms/projects/project_locks.py new file mode 100644 index 0000000..4e42981 --- /dev/null +++ b/plugins/cwms/projects/project_locks.py @@ -0,0 +1,239 @@ +# Copyright (c) 2024 +# United States Army Corps of Engineers - Hydrologic Engineering Center (USACE/HEC) +# All Rights Reserved. USACE PROPRIETARY/CONFIDENTIAL. +# Source may not be released without written approval from HEC +from typing import Optional + +import cwms.api as api +from cwms.cwms_types import JSON, Data, DeleteMethod + + +def get_project_lock(office_id: str, name: str, application_id: str) -> Data: + """ + Parameters + ---------- + name : str + The ID of the project. + office_id : str + The ID of the office. + application_id : str + The ID of the application with the lock. + + Returns + ------- + response : dict + the JSON response from CWMS Data API. + + Raises + ------ + ValueError + If any of name, application_id or office_id is None. + ClientError + If a 400 range error code response is returned from the server. + NoDataFoundError + If a 404 range error code response is returned from the server. + ServerError + If a 500 range error code response is returned from the server. + """ + + if name is None: + raise ValueError("Retrieve project lock requires a name") + if office_id is None: + raise ValueError("Retrieve project requires an office") + if application_id is None: + raise ValueError("Retrieve project requires an application") + + endpoint = f"project-locks/{name}" + params = {"office": office_id, "application-id": application_id} + response = api.get(endpoint, params) + return Data(response) + + +def get_project_locks( + office_mask: str, + project_mask: Optional[str] = None, + application_mask: Optional[str] = None, +) -> Data: + """ + Parameters + ---------- + office_mask : str + Specifies the office mask to be used to filter the locks. + project_mask : Optional[str] + Specifies the project mask to be used to filter the locks. + application_mask : Optional[str] + Specifies the application mask to be used to filter the locks. + + Returns + ------- + response : dict + the JSON response from CWMS Data API. + + Raises + ------ + ValueError + If office_mask is None. + ClientError + If a 400 range error code response is returned from the server. + NoDataFoundError + If a 404 range error code response is returned from the server. + ServerError + If a 500 range error code response is returned from the server. + """ + + endpoint = "project-locks" + params = { + "office-mask": office_mask, + "project-mask": project_mask, + "application-mask": application_mask, + } + response = api.get(endpoint, params) + return Data(response) + + +def revoke_project_lock( + office_id: str, name: str, revoke_timeout_seconds: Optional[int] = None +) -> None: + """ + Parameters + ---------- + name : str + The name of the project. + office_id : str + The ID of the project's office. + revoke_timeout_seconds: DeleteMethod + time in seconds to wait for existing lock to be revoked. + + Returns + ------- + None + + Raises + ------ + ValueError + If any of name or office_id is None. + ClientError + If a 400 range error code response is returned from the server. + NoDataFoundError + If a 404 range error code response is returned from the server. + ServerError + If a 500 range error code response is returned from the server. + """ + + if name is None: + raise ValueError("Delete project requires an project name") + if office_id is None: + raise ValueError("Delete project requires an office") + + endpoint = f"project-locks/{name}" + params = {"office": office_id, "revoke-timeout": revoke_timeout_seconds} + api.delete(endpoint, params) + + +def request_project_lock( + data: JSON, + revoke_existing: Optional[bool] = False, + revoke_timeout_seconds: Optional[int] = None, +) -> None: + """ + Parameters + ---------- + data : dict + A dictionary representing the JSON data to be stored. + If the `data` value is None, a `ValueError` will be raised. + revoke_existing : str, optional + If an existing lock is found should a revoke be attempted + revoke_timeout_seconds : str, optional + time in seconds to wait for existing lock to be revoked. + + Returns + ------- + None + + Raises + ------ + ValueError + If data is None. + ClientError + If a 400 range error code response is returned from the server. + NoDataFoundError + If a 404 range error code response is returned from the server. + ServerError + If a 500 range error code response is returned from the server. + """ + + if data is None: + raise ValueError("Cannot require a project lock without a JSON data dictionary") + + endpoint = "project-locks" + params = { + "revoke-existing": revoke_existing, + "revoke-timeout": revoke_timeout_seconds, + } + api.post(endpoint, data, params) + + +def deny_project_lock_request(lock_id: str) -> None: + """ + Parameters + ---------- + lock_id : str + The ID of the requested project lock + + Returns + ------- + None + + Raises + ------ + ValueError + If lock_id is None. + ClientError + If a 400 range error code response is returned from the server. + NoDataFoundError + If a 404 range error code response is returned from the server. + ServerError + If a 500 range error code response is returned from the server. + """ + + if lock_id is None: + raise ValueError("Cannot deny a project lock request without a lock id") + + endpoint = "project-locks/deny" + params = {"lock-id": lock_id} + api.post(endpoint, None, params) + + +def release_project_lock(office_id: str, lock_id: str) -> None: + """ + Parameters + ---------- + office_id : str + The ID of the office owning the project lock + lock_id : str + The ID of the requested project lock + + Returns + ------- + None + + Raises + ------ + ValueError + If lock_id is None. + ClientError + If a 400 range error code response is returned from the server. + NoDataFoundError + If a 404 range error code response is returned from the server. + ServerError + If a 500 range error code response is returned from the server. + """ + + if office_id is None: + raise ValueError("Release project lock requires an office") + if lock_id is None: + raise ValueError("Release project lock requires a lock id") + + endpoint = "project-locks/release" + params = {"office": office_id, "lock-id": lock_id} + api.post(endpoint, None, params) diff --git a/plugins/cwms/projects/projects.py b/plugins/cwms/projects/projects.py new file mode 100644 index 0000000..61aae9a --- /dev/null +++ b/plugins/cwms/projects/projects.py @@ -0,0 +1,309 @@ +# Copyright (c) 2024 +# United States Army Corps of Engineers - Hydrologic Engineering Center (USACE/HEC) +# All Rights Reserved. USACE PROPRIETARY/CONFIDENTIAL. +# Source may not be released without written approval from HEC +from datetime import datetime +from typing import Optional + +import cwms.api as api +from cwms.cwms_types import JSON, Data, DeleteMethod + + +def get_project(office_id: str, name: str) -> Data: + """ + Parameters + ---------- + name : str + The ID of the project. + office_id : str + The ID of the office. + + Returns + ------- + response : dict + the JSON response from CWMS Data API. + + Raises + ------ + ValueError + If any of name or office_id is None. + ClientError + If a 400 range error code response is returned from the server. + NoDataFoundError + If a 404 range error code response is returned from the server. + ServerError + If a 500 range error code response is returned from the server. + """ + + if name is None: + raise ValueError("Retrieve project requires a name") + if office_id is None: + raise ValueError("Retrieve project requires an office") + + endpoint = f"projects/{name}" + params = {"office": office_id} + response = api.get(endpoint, params) + return Data(response) + + +def get_projects( + office_id: str, + id_mask: Optional[str] = None, + page: Optional[str] = None, + page_size: Optional[int] = None, +) -> Data: + """ + Parameters + ---------- + office_id : str + The ID of the project's office. + id_mask : Optional[str] + The project ID mask for projects to return. + page : Optional[str] + A string representing the page to retrieve. + If None then the first page will be retrieved. + page_size : Optional[int] + An integer representing the number of items per page. + + Returns + ------- + response : dict + the JSON response from CWMS Data API. + + Raises + ------ + ClientError + If a 400 range error code response is returned from the server. + NoDataFoundError + If a 404 range error code response is returned from the server. + ServerError + If a 500 range error code response is returned from the server. + """ + + endpoint = "projects" + params = { + "office": office_id, + "id-mask": id_mask, + "page": page, + "page-size": page_size, + } + response = api.get(endpoint, params) + return Data(response) + + +def get_project_locations( + office_id: str, + project_like: Optional[str] = None, + location_id_like: Optional[str] = None, +) -> Data: + """ + Parameters + ---------- + office_id : str + The ID of the project's office. + project_like : Optional[str] + The project ID regex for projects to return. + location_id_like : Optional[str] + The location kind ID regex for locations to return. + + Returns + ------- + response : dict + the JSON response from CWMS Data API. + + Raises + ------ + ValueError + If any of name or office_id is None. + ClientError + If a 400 range error code response is returned from the server. + NoDataFoundError + If a 404 range error code response is returned from the server. + ServerError + If a 500 range error code response is returned from the server. + """ + + endpoint = "projects/locations" + params = { + "office": office_id, + "project-like": project_like, + "location-kind-like": location_id_like, + } + response = api.get(endpoint, params) + return Data(response) + + +def delete_project(office_id: str, name: str, delete_method: DeleteMethod) -> None: + """ + Parameters + ---------- + name : str + The name of the projects. + office_id : str + The ID of the project's office. + delete_method: DeleteMethod + The method to use to delete the project. + + Returns + ------- + None + + Raises + ------ + ValueError + If any of name, delete_method, or office_id is None. + ClientError + If a 400 range error code response is returned from the server. + NoDataFoundError + If a 404 range error code response is returned from the server. + ServerError + If a 500 range error code response is returned from the server. + """ + + if name is None: + raise ValueError("Delete project requires an project name") + if office_id is None: + raise ValueError("Delete project requires an office") + if delete_method is None: + raise ValueError("Delete project requires a delete method") + + endpoint = f"projects/{name}" + params = {"office": office_id, "method": delete_method.name} + api.delete(endpoint, params) + + +def rename_project(office_id: str, old_name: str, new_name: str) -> None: + """ + Parameters + ---------- + office_id : str + The ID of the project's office. + old_name : str + The name of the project to rename. + new_name : str + The new name of the project. + + Returns + ------- + None + + Raises + ------ + ValueError + If any of old_name, new_name, or office_id is None. + ClientError + If a 400 range error code response is returned from the server. + NoDataFoundError + If a 404 range error code response is returned from the server. + ServerError + If a 500 range error code response is returned from the server. + """ + + if old_name is None: + raise ValueError("Rename project requires the original project name") + if new_name is None: + raise ValueError("Rename project requires a new project name") + if office_id is None: + raise ValueError("Rename project requires an office") + + endpoint = f"projects/{old_name}" + params = {"office": office_id, "name": new_name} + api.patch(endpoint=endpoint, params=params) + + +def store_project(data: JSON, fail_if_exists: Optional[bool] = True) -> None: + """ + Parameters + ---------- + data : dict + A dictionary representing the JSON data to be stored. + If the `data` value is None, a `ValueError` will be raised. + fail_if_exists : str, optional + A boolean value indicating whether to fail if the project already exists. + Default is True. + + Returns + ------- + None + + Raises + ------ + ValueError + If any of data is None. + ClientError + If a 400 range error code response is returned from the server. + NoDataFoundError + If a 404 range error code response is returned from the server. + ServerError + If a 500 range error code response is returned from the server. + """ + + if data is None: + raise ValueError("Cannot store an project without a JSON data dictionary") + + endpoint = "projects" + params = {"fail-if-exists": fail_if_exists} + api.post(endpoint, data, params) + + +def status_update( + office_id: str, + project_id: str, + application_id: str, + source_id: Optional[str] = None, + timeseries_id: Optional[str] = None, + begin: Optional[datetime] = None, + end: Optional[datetime] = None, +) -> None: + """ + Parameters + ---------- + office_id : str + The office generating the message (and owning the project). + project_id : str + The location identifier of the project that has been updated + application_id : str, optional + A text string identifying the application for which the update applies. + source_id : str, optional + An application-defined string of the instance + and/or component that generated the message. + timeseries_id : str, optional + A time series identifier of the time series associated with the update. + begin : str, optional + The start time of the updates to the time series. + end : str, optional + The end time of the updates to the time series. + + Returns + ------- + None + + Raises + ------ + ValueError + If any of office_id, project_id, or application_id is None. + ClientError + If a 400 range error code response is returned from the server. + NoDataFoundError + If a 404 range error code response is returned from the server. + ServerError + If a 500 range error code response is returned from the server. + """ + + if office_id is None: + raise ValueError("Post project status update requires an office") + if project_id is None: + raise ValueError("Post project status update requires a project") + if application_id is None: + raise ValueError("Post project status update requires an application") + + endpoint = f"projects/status-update/{project_id}" + params = { + "office": office_id, + "application-id": application_id, + "source-id": source_id, + "timeseries-id": timeseries_id, + "begin": (begin.isoformat() if begin else None), + "end": (end.isoformat() if end else None), + } + api.post(endpoint, None, params) diff --git a/plugins/cwms/ratings/ratings.py b/plugins/cwms/ratings/ratings.py index 8b226e1..f5b3665 100644 --- a/plugins/cwms/ratings/ratings.py +++ b/plugins/cwms/ratings/ratings.py @@ -5,8 +5,10 @@ import pandas as pd import cwms.api as api -from cwms.ratings.ratings_spec import get_rating_spec from cwms.cwms_types import JSON, Data +from cwms.ratings.ratings_spec import get_rating_spec + +xml_heading = " Any: @@ -220,6 +222,7 @@ def rating_simple_df_to_json( effective_date: datetime, transition_start_date: Optional[datetime] = None, description: Optional[str] = None, + active: Optional[bool] = True, ) -> JSON: """This function converts a dataframe to a json dictionary in the correct format to be posted using the store_ratings function. Can only be used for simple ratings with a indenpendant and 1 dependant variable. @@ -253,6 +256,8 @@ def rating_simple_df_to_json( The transitional start date of the rating curve to be stored description: str Optional = None a description to be added to the rating curve + active: Boolean Optional = True + store the rating as active as True of False Returns: JSON @@ -286,7 +291,7 @@ def rating_simple_df_to_json( "transition-start-date": ( transition_start_date.isoformat() if transition_start_date else None ), - "active": True, + "active": active, "description": description, "rating-points": {"point": points_json}, } @@ -318,12 +323,12 @@ def update_ratings( endpoint = f"ratings/{rating_id}" params = {"store-template": store_template} - if not isinstance(data, dict) and " None: + """Will create a new ratingset including template/spec and rating + + Parameters + ---------- + data: JSON dictionary or XML + rating data to be stored. + store_template: Boolean Default = True + Store updates to the rating template. Default = True + + Returns + ------- + response + """ + + endpoint = "ratings" + params = {"store-template": store_template} + + if not isinstance(data, dict) and xml_heading not in data: + raise ValueError( + "Cannot store a timeseries without a JSON data dictionaryor in XML" + ) + + if xml_heading in data: + api_version = 102 + else: + api_version = 2 + return api.post(endpoint, data, params, api_version=api_version) diff --git a/plugins/cwms/ratings/ratings_spec.py b/plugins/cwms/ratings/ratings_spec.py index db99a1f..684bbdb 100644 --- a/plugins/cwms/ratings/ratings_spec.py +++ b/plugins/cwms/ratings/ratings_spec.py @@ -103,6 +103,56 @@ def delete_rating_spec(rating_id: str, office_id: str, delete_method: str) -> No return api.delete(endpoint, params) +def rating_spec_df_to_xml(data: pd.DataFrame) -> str: + """ + Converts a dataframe containing rating specification parameters + into xml to be stored into the database. + + Parameters + ---------- + data : pd_dataframe + pandas dataframe that contrains rating specification paramters + should follow same formate the is returned from get_rating_spec function + Returns + ------- + str: xml that can be used in store_rating_spec function + """ + + spec_xml = f""" + + {data.loc[0,'rating-id']} + {data.loc[0,'template-id']} + {data.loc[0,'location-id']} + {data.loc[0,'version']} + {data.loc[0,'source-agency']} + {data.loc[0,'in-range-method']} + {data.loc[0,'out-range-low-method']} + {data.loc[0,'out-range-high-method']} + {str(data.loc[0,'active']).lower()} + {str(data.loc[0,'auto-update']).lower()} + {str(data.loc[0,'auto-activate']).lower()} + {str(data.loc[0,'auto-migrate-extension']).lower()} + """ + + ind_rouding = data.loc[0, "independent-rounding-specs"] + if isinstance(ind_rouding, list): + i = 1 + for rounding in ind_rouding: + spec_xml = ( + spec_xml + + f"""\n {rounding['value']}""" + ) + i = i + 1 + spec_xml2 = f"""\n + {data.loc[0,'dependent-rounding-spec']} + {data.loc[0,'description']} + """ + + spec_xml = spec_xml + spec_xml2 + + return spec_xml + + def store_rating_spec(data: str, fail_if_exists: Optional[bool] = True) -> None: """ This method is used to store a new rating spec. diff --git a/plugins/cwms/standard_text/standard_text.py b/plugins/cwms/standard_text/standard_text.py index 3f1b125..4614775 100644 --- a/plugins/cwms/standard_text/standard_text.py +++ b/plugins/cwms/standard_text/standard_text.py @@ -30,8 +30,7 @@ def standard_text_to_json(text_id: str, standard_text: str, office_id: str) -> J if text_id is None: raise ValueError("Cannot store a standard text without a text id") if standard_text is None: - raise ValueError( - "Cannot store a standard text without a standard_text message") + raise ValueError("Cannot store a standard text without a standard_text message") if office_id is None: raise ValueError("Cannot store a standard text without an office_id") @@ -156,8 +155,7 @@ def delete_standard_text( if office_id is None: raise ValueError("Deleting standard timeseries requires an office") if delete_method is None: - raise ValueError( - "Deleting standard timeseries requires a delete method") + raise ValueError("Deleting standard timeseries requires a delete method") endpoint = f"standard-text-id/{text_id}" params = {"office": office_id, "method": delete_method.name} @@ -195,8 +193,7 @@ def store_standard_text(data: JSON, fail_if_exists: bool = False) -> None: """ if data is None: - raise ValueError( - "Cannot store a standard text without a JSON data dictionary") + raise ValueError("Cannot store a standard text without a JSON data dictionary") endpoint = "standard-text-id" params = {"fail-if-exists": fail_if_exists} diff --git a/plugins/cwms/timeseries/timeseries.py b/plugins/cwms/timeseries/timeseries.py index b4e6869..4eef62a 100644 --- a/plugins/cwms/timeseries/timeseries.py +++ b/plugins/cwms/timeseries/timeseries.py @@ -1,44 +1,135 @@ +import threading from datetime import datetime -from typing import Optional +from typing import Any, Dict, Optional import pandas as pd +from pandas import DataFrame import cwms.api as api from cwms.cwms_types import JSON, Data -def get_timeseries_group(group_id: str, category_id: str, office_id: str) -> Data: - """Retreives time series stored in the requested time series group +def get_multi_timeseries_df( + ts_ids: list[str], + office_id: str, + unit: Optional[str] = "EN", + begin: Optional[datetime] = None, + end: Optional[datetime] = None, + melted: Optional[bool] = False, +) -> DataFrame: + """gets multiple timeseries and stores into a single dataframe Parameters - ---------- - group_id: string - Timeseries group whose data is to be included in the response. - category_id: string - The category id that contains the timeseries group. - office_id: string - The owning office of the timeseries group. + ---------- + ts_ids: linst + a list of timeseries to get. If the timeseries is a verioned timeseries then serpeate the ts_id from the + version_date using a :. Example "OMA.Stage.Inst.6Hours.0.Fcst-MRBWM-GRFT:2024-04-22 07:00:00-05:00". Make + sure that the version date include the timezone offset if not in UTC. + office_id: string + The owning office of the time series(s). + unit: string, optional, default is EN + The unit or unit system of the response. Defaults to EN. Valid values + for the unit field are: + 1. EN. English unit system. + 2. SI. SI unit system. + 3. Other. + begin: datetime, optional, default is None + Start of the time window for data to be included in the response. If this field is + not specified, any required time window begins 24 hours prior to the specified + or default end time. Any timezone information should be passed within the datetime + object. If no timezone information is given, default will be UTC. + end: datetime, optional, default is None + End of the time window for data to be included in the response. If this field is + not specified, any required time window ends at the current time. Any timezone + information should be passed within the datetime object. If no timezone information + is given, default will be UTC. + melted: Boolean, optional, default is false + if set to True a melted dataframe will be provided. By default a multi-index column dataframe will be + returned. + Returns ------- - cwms data type. data.json will return the JSON output and data.df will return a dataframe + dataframe """ - endpoint = f"timeseries/group/{group_id}" - params = {"office": office_id, "category-id": category_id} + def get_ts_ids( + result_dict: list[Dict[str, Any]], + ts_id: str, + office_id: str, + begin: datetime, + end: datetime, + unit: str, + version_date: datetime, + ) -> None: + data = get_timeseries( + ts_id=ts_id, + office_id=office_id, + unit=unit, + begin=begin, + end=end, + version_date=version_date, + ) + result_dict.append( + { + "ts_id": ts_id, + "unit": data.json["units"], + "version_date": version_date, + "values": data.df, + } + ) - response = api.get(endpoint=endpoint, params=params, api_version=1) - return Data(response, selector="assigned-time-series") + result_dict = [] # type: list[Dict[str,Any]] + threads = [] + for ts_id in ts_ids: + if ":" in ts_id: + ts_id, version_date = ts_id.split(":", 1) + version_date_dt = pd.to_datetime(version_date) + else: + version_date_dt = None + t = threading.Thread( + target=get_ts_ids, + args=(result_dict, ts_id, office_id, begin, end, unit, version_date_dt), + ) + threads.append(t) + t.start() + + for t in threads: + t.join() + + data = pd.DataFrame() + for row in result_dict: + temp_df = row["values"] + temp_df = temp_df.assign(ts_id=row["ts_id"], units=row["unit"]) + if "version_date" in row.keys(): + temp_df = temp_df.assign(version_date=row["version_date"]) + temp_df.dropna(how="all", axis=1, inplace=True) + data = pd.concat([data, temp_df], ignore_index=True) + + if not melted: + cols = ["ts_id", "units"] + if "version_date" in data.columns: + cols.append("version_date") + data["version_date"] = data["version_date"].dt.strftime( + "%Y-%m-%d %H:%M:%S%z" + ) + data["version_date"] = ( + data["version_date"].str[:-2] + ":" + data["version_date"].str[-2:] + ) + data.fillna({"version_date": ""}, inplace=True) + data = data.pivot(index="date-time", columns=cols, values="value") + + return data def get_timeseries( ts_id: str, office_id: str, - unit: str = "EN", + unit: Optional[str] = "EN", datum: Optional[str] = None, begin: Optional[datetime] = None, end: Optional[datetime] = None, - page_size: int = 500000, + page_size: Optional[int] = 500000, version_date: Optional[datetime] = None, trim: Optional[bool] = True, ) -> Data: @@ -48,9 +139,9 @@ def get_timeseries( Parameters ---------- ts_id: string - Name(s) of the time series whose data is to be included in the response. + Name of the time series whose data is to be included in the response. office_id: string - The owning office of the time series(s). + The owning office of the time series. unit: string, optional, default is EN The unit or unit system of the response. Defaults to EN. Valid values for the unit field are: @@ -86,6 +177,12 @@ def get_timeseries( # creates the dataframe from the timeseries data endpoint = "timeseries" + if begin and not isinstance(begin, datetime): + raise ValueError("begin needs to be in datetime") + if end and not isinstance(end, datetime): + raise ValueError("end needs to be in datetime") + if version_date and not isinstance(version_date, datetime): + raise ValueError("version_date needs to be in datetime") params = { "office": office_id, "name": ts_id, @@ -94,11 +191,14 @@ def get_timeseries( "begin": begin.isoformat() if begin else None, "end": end.isoformat() if end else None, "page-size": page_size, + "page": None, "version-date": version_date.isoformat() if version_date else None, + "trim": trim, } + selector = "values" - response = api.get(endpoint, params) - return Data(response, selector="values") + response = api.get_with_paging(selector=selector, endpoint=endpoint, params=params) + return Data(response, selector=selector) def timeseries_df_to_json( @@ -132,32 +232,36 @@ def timeseries_df_to_json( Version date of time series values to be posted. Returns: - JSON + JSON. Dates in JSON will be in UTC to be stored in """ + + # make a copy so original dataframe does not get updated. + df = data.copy() # check dataframe columns - if "quality-code" not in data: - data["quality-code"] = 0 - if "date-time" not in data: + if "quality-code" not in df: + df["quality-code"] = 0 + if "date-time" not in df: raise TypeError( "date-time is a required column in data when posting as a dateframe" ) - if "value" not in data: + if "value" not in df: raise TypeError( "value is a required column when posting data when posting as a dataframe" ) # make sure that dataTime column is in iso8601 formate. - data["date-time"] = pd.to_datetime(data["date-time"] - ).apply(pd.Timestamp.isoformat) - data = data.reindex(columns=["date-time", "value", "quality-code"]) - if data.isnull().values.any(): + df["date-time"] = pd.to_datetime(df["date-time"], utc=True).apply( + pd.Timestamp.isoformat + ) + df = df.reindex(columns=["date-time", "value", "quality-code"]) + if df.isnull().values.any(): raise ValueError("Null/NaN data must be removed from the dataframe") ts_dict = { "name": ts_id, "office-id": office_id, "units": units, - "values": data.values.tolist(), + "values": df.values.tolist(), "version-date": version_date, } @@ -201,8 +305,7 @@ def store_timeseries( } if not isinstance(data, dict): - raise ValueError( - "Cannot store a timeseries without a JSON data dictionary") + raise ValueError("Cannot store a timeseries without a JSON data dictionary") return api.post(endpoint, data, params) diff --git a/plugins/cwms/timeseries/timeseries_bin.py b/plugins/cwms/timeseries/timeseries_bin.py index a4b7965..96f77b2 100644 --- a/plugins/cwms/timeseries/timeseries_bin.py +++ b/plugins/cwms/timeseries/timeseries_bin.py @@ -88,18 +88,6 @@ def get_binary_timeseries( return Data(response) -def get_large_blob(url: str) -> bytes: - """ - Retrieves large blob data greater than 64kb from CWMS data api - :param url: str - Url used in query by CDA - :return: bytes - Large binary data - """ - response = requests.get(url) - return response.content - - def store_binary_timeseries(data: JSON, replace_all: bool = False) -> None: """ This method is used to store a binary time series through CWMS Data API. @@ -129,8 +117,7 @@ def store_binary_timeseries(data: JSON, replace_all: bool = False) -> None: """ if data is None: - raise ValueError( - "Storing binary time series requires a JSON data dictionary") + raise ValueError("Storing binary time series requires a JSON data dictionary") endpoint = "timeseries/binary" params = {"replace-all": replace_all} diff --git a/plugins/cwms/timeseries/timeseries_group.py b/plugins/cwms/timeseries/timeseries_group.py new file mode 100644 index 0000000..a6ba3ed --- /dev/null +++ b/plugins/cwms/timeseries/timeseries_group.py @@ -0,0 +1,253 @@ +import threading +from datetime import datetime +from typing import Any, Dict, Optional + +import pandas as pd +from pandas import DataFrame + +import cwms.api as api +from cwms.cwms_types import JSON, Data + + +def get_timeseries_group( + group_id: str, + category_id: str, + office_id: str, + group_office_id: str, + category_office_id: str, +) -> Data: + """Retreives time series stored in the requested time series group + + Parameters + ---------- + group_id: string + Timeseries group whose data is to be included in the response. + category_id: string + The category id that contains the timeseries group. + office_id: string + The owning office of the timeseries assigned to the group whose data is to be included in the response. + group_office_id: string + Specifies the owning office of the timeseries group. + category_office_id: string + Specifies the owning office of the timeseries group category. + + Returns + ------- + cwms data type. data.json will return the JSON output and data.df will return a dataframe + """ + + endpoint = f"timeseries/group/{group_id}" + params = { + "office": office_id, + "category-id": category_id, + "category-office-id": category_office_id, + "group-office-id": group_office_id, + } + + response = api.get(endpoint=endpoint, params=params, api_version=1) + return Data(response, selector="assigned-time-series") + + +def get_timeseries_groups( + office_id: Optional[str] = None, + include_assigned: Optional[bool] = True, + timeseries_category_like: Optional[str] = None, + timeseries_group_like: Optional[str] = None, + category_office_id: Optional[str] = None, +) -> Data: + """ + Retreives a list of time series groups. + + Parameters + ---------- + category_id: string + The category id that contains the timeseries group. + include_assigned: Boolean + Include the assigned timeseries in the returned timeseries groups. (default: true) + timeseries_category_like: string + Posix regular expression matching against the timeseries category id + timeseries_group_like: String + Posix regular expression matching against the timeseries group id + category_office_id: string + Specifies the owning office of the timeseries group category + Returns + ------- + cwms data type. data.json will return the JSON output and data.df will return a dataframe + """ + + endpoint = "timeseries/group" + params = { + "office": office_id, + "include-assigned": include_assigned, + "timeseries-category-like": timeseries_category_like, + "timeseries_group_like": timeseries_group_like, + "category-office-id": category_office_id, + } + response = api.get(endpoint=endpoint, params=params, api_version=1) + return Data(response) + + +def timeseries_group_df_to_json( + data: pd.DataFrame, + group_id: str, + group_office_id: str, + category_office_id: str, + category_id: str, +) -> JSON: + """ + Converts a dataframe to a json dictionary in the correct format. + + Parameters + ---------- + data: pd.DataFrame + Dataframe containing timeseries information. + group_id: str + The group ID for the timeseries. + office_id: str + The ID of the office associated with the specified timeseries. + category_id: str + The ID of the category associated with the group + + Returns + ------- + JSON + JSON dictionary of the timeseries data. + """ + df = data.copy() + required_columns = ["office-id", "timeseries-id"] + optional_columns = ["alias-id", "attribute", "ts-code"] + for column in required_columns: + if column not in df.columns: + raise TypeError( + f"{column} is a required column in data when posting as a dataframe" + ) + + if df[required_columns].isnull().any().any(): + raise ValueError( + f"Null/NaN values found in required columns: {required_columns}. " + ) + + # Fill optional columns with default values if missing + if "alias-id" not in df.columns: + df["alias-id"] = None + if "attribute" not in df.columns: + df["attribute"] = 0 + + # Replace NaN with None for optional columns + for column in optional_columns: + if column in df.columns: + data[column] = df[column].where(pd.notnull(df[column]), None) + + # Build the list of time-series entries + assigned_time_series = df.apply( + lambda entry: { + "office-id": entry["office-id"], + "timeseries-id": entry["timeseries-id"], + "alias-id": entry["alias-id"], + "attribute": entry["attribute"], + **( + {"ts-code": entry["ts-code"]} + if "ts-code" in entry and pd.notna(entry["ts-code"]) + else {} + ), + }, + axis=1, + ).tolist() + + # Construct the final JSON dictionary + json_dict = { + "office-id": group_office_id, + "id": group_id, + "time-series-category": {"office-id": category_office_id, "id": category_id}, + "assigned-time-series": assigned_time_series, + } + + return json_dict + + +def store_timeseries_groups(data: JSON, fail_if_exists: Optional[bool] = True) -> None: + """ + Create new TimeSeriesGroup + Parameters + ---------- + data: JSON dictionary + Time Series data to be stored. + fail_if_exists: Boolean Defualt = True + Create will fail if provided ID already exists. + + Returns + ------- + None + """ + + if data is None: + raise ValueError("Cannot store a standard text without timeseries group JSON") + + endpoint = "timeseries/group" + params = {"fail-if-exists": fail_if_exists} + + return api.post(endpoint, data, params, api_version=1) + + +def update_timeseries_groups( + data: JSON, + group_id: str, + office_id: str, + replace_assigned_ts: Optional[bool] = False, +) -> None: + """ + Updates the timeseries groups with the provided group ID and office ID. + + Parameters + ---------- + group_id : str + The group id of the timeseries group to be updates + office_id : str + The ID of the office associated with the timeseries group. + replace_assigned_ts : bool, optional + Specifies whether to unassign all existing timeseries before assigning new timeseries specified in the content body. Default is False. + data: JSON dictionary + Timeseries data to be stored. + + Returns + ------- + None + """ + if not group_id: + raise ValueError("Cannot update a timeseries groups without an id") + if not office_id: + raise ValueError("Cannot update a timeseries groups without an office id") + + endpoint = f"timeseries/group/{group_id}" + params = { + "replace-assigned-ts": replace_assigned_ts, + "office": office_id, + } + + api.patch(endpoint=endpoint, data=data, params=params, api_version=1) + + +def delete_timeseries_group(group_id: str, category_id: str, office_id: str) -> None: + """Deletes requested time series group + + Parameters + ---------- + group_id: string + The time series group to be deleted + category_id: string + Specifies the time series category of the time series group to be deleted + office_id: string + Specifies the owning office of the time series group to be deleted + + Returns + ------- + None + """ + + endpoint = f"timeseries/group/{group_id}" + params = { + "office": office_id, + "category-id": category_id, + } + + return api.delete(endpoint, params=params, api_version=1) diff --git a/plugins/cwms/timeseries/timerseries_identifier.py b/plugins/cwms/timeseries/timeseries_identifier.py similarity index 97% rename from plugins/cwms/timeseries/timerseries_identifier.py rename to plugins/cwms/timeseries/timeseries_identifier.py index ab0c560..3fda96d 100644 --- a/plugins/cwms/timeseries/timerseries_identifier.py +++ b/plugins/cwms/timeseries/timeseries_identifier.py @@ -127,8 +127,7 @@ def store_timeseries_identifier( """ if data is None: - raise ValueError( - "Cannot store a time series identifier with out json data") + raise ValueError("Cannot store a time series identifier with out json data") endpoint = "timeseries/identifier-descriptor/" params = {"fail-if-exists": fail_if_exists} diff --git a/plugins/cwms/timeseries/timeseries_profile.py b/plugins/cwms/timeseries/timeseries_profile.py new file mode 100644 index 0000000..87e0ee7 --- /dev/null +++ b/plugins/cwms/timeseries/timeseries_profile.py @@ -0,0 +1,166 @@ +# Copyright (c) 2024 +# United States Army Corps of Engineers - Hydrologic Engineering Center (USACE/HEC) +# All Rights Reserved. USACE PROPRIETARY/CONFIDENTIAL. +# Source may not be released without written approval from HEC + +from typing import Optional + +import cwms.api as api +from cwms.cwms_types import Data + + +def get_timeseries_profile(office_id: str, location_id: str, parameter_id: str) -> Data: + """ + Retrieves a timeseries profile. + + Compatibility Warning: + Currently, the TimeSeries Profile API may not be fully supported + until a new version of cwms-data-access is released with the updated + endpoint implementation. + + Parameters + ---------- + office_id: string + The owning office of the timeseries profile + location_id: string + The location associated with the timeseries profile parser + parameter_id: string + Name of the key parameter associated with the timeseries profile + + Returns + ------- + cwms data type + """ + + endpoint = f"timeseries/profile/{location_id}/{parameter_id}" + params = { + "office": office_id, + } + + response = api.get(endpoint, params) + return Data(response) + + +def get_timeseries_profiles( + office_mask: Optional[str], + location_mask: Optional[str], + parameter_id_mask: Optional[str], + page: Optional[str] = None, + page_size: Optional[int] = 1000, +) -> Data: + """ + Retrieves all timeseries profiles that fit the provided masks. Does not include time series values. + + Compatibility Warning: + Currently, the TimeSeries Profile API may not be fully supported + until a new version of cwms-data-access is released with the updated + endpoint implementation. + + Parameters + ---------- + office_mask: string + A mask to limit the results based on office. Uses regex to compare with office IDs in the database. + Default value is '*' + location_mask: string + A mask to limit the results based on location. Uses regex to compare with location IDs in the database. + Default value is '*' + parameter_id_mask: string + A mask to limit the results based on the parameter associated with the timeseries profile. Uses regex to + compare the parameter IDs in the database. Default value is '*' + + Returns + ------- + cwms data type + """ + + endpoint = "timeseries/profile" + params = { + "office-mask": office_mask, + "location-mask": location_mask, + "parameter-id-mask": parameter_id_mask, + "page": page, + "page-size": page_size, + } + + response = api.get(endpoint, params) + return Data(response) + + +def delete_timeseries_profile( + office_id: str, parameter_id: str, location_id: str +) -> None: + """ + Deletes a specified timeseries profile + + Compatibility Warning: + Currently, the TimeSeries Profile API may not be fully supported + until a new version of cwms-data-access is released with the updated + endpoint implementation. + + Parameters + ---------- + office_id: string + The owning office of the timeseries profile + parameter_id: string + Name of the key parameter associated with the timeseries profile + location_id: string + The location associated with the timeseries profile + + Returns + ------- + None + """ + + endpoint = f"timeseries/profile/{location_id}/{parameter_id}" + params = { + "office": office_id, + } + + return api.delete(endpoint, params) + + +def store_timeseries_profile(data: str, fail_if_exists: Optional[bool] = True) -> None: + """ + Stores a new timeseries profile + + Compatibility Warning: + Currently, the TimeSeries Profile API may not be fully supported + until a new version of cwms-data-access is released with the updated + endpoint implementation. + + Parameters + ---------- + data: string + json for storing a timeseries profile + { + "description": "string", + "parameter-list": [ + "string", + ... + ], + "location-id": { + "office-id": "string", + "name": "string" + }, + "reference-ts-id": { + "office-id": "string", + "name": "string" + }, + "key-parameter": "string" + } + + fail_if_exists: boolean, optional + Throw a ClientError if the profile already exists + Default is `True` + + Returns + ------- + None + """ + + endpoint = "timeseries/profile" + params = { + "fail-if-exists": fail_if_exists, + } + + return api.post(endpoint, data, params) diff --git a/plugins/cwms/timeseries/timeseries_profile_instance.py b/plugins/cwms/timeseries/timeseries_profile_instance.py new file mode 100644 index 0000000..d696d2f --- /dev/null +++ b/plugins/cwms/timeseries/timeseries_profile_instance.py @@ -0,0 +1,237 @@ +# Copyright (c) 2024 +# United States Army Corps of Engineers - Hydrologic Engineering Center (USACE/HEC) +# All Rights Reserved. USACE PROPRIETARY/CONFIDENTIAL. +# Source may not be released without written approval from HEC + +from datetime import datetime +from typing import Optional + +import cwms.api as api +from cwms.cwms_types import Data + + +def get_timeseries_profile_instance( + office_id: str, + location_id: str, + parameter_id: str, + version: str, + unit: str, + version_date: Optional[datetime], + start: Optional[datetime], + end: Optional[datetime], + page: Optional[str] = None, + page_size: Optional[int] = 500, + start_inclusive: Optional[bool] = True, + end_inclusive: Optional[bool] = True, + previous: Optional[bool] = False, + next: Optional[bool] = False, + max_version: Optional[bool] = False, +) -> Data: + """ + Returns a timeseries profile instance with associated timeseries values. + + Compatibility Warning: + Currently, the TimeSeries Profile API may not be fully supported + until a new version of cwms-data-access is released with the updated + endpoint implementation. + + Parameters + ---------- + office_id: string + The owning office of the timeseries profile instance + location_id: string + The location associated with the timeseries profile instance + parameter_id: string + The name of the key parameter associated with the timeseries profile instance + version: str + The version of the timeseries profile instance + unit: str + The requested units to use for the key parameter values of the timeseries profile instance + version_date: datetime, optional + The version date associated with the timeseries profile instance + start_inclusive: boolean, optional + Whether the returned timeseries profile instance should include data from the specified + start timestamp. Default is `True`. + end_inclusive: boolean, optional + Whether the returned timeseries profile instance should include data from the specified + end timestamp. Default is `True`. + previous: boolean, optional + The previous timeseries profile instance. Default is `False`. + next: boolean, optional + The next timeseries profile instance. Default is `False`. + max_version: boolean, optional + Whether the provided version is the maximum version of the timeseries profile instance. + Default is `False`. + start: datetime, optional + The start timestamp of the timeseries profile instance. Default is the year 1800. + end: datetime, optional + The end timestamp of the timeseries profile instance. Default is the year 3000. + page: string, optional + The page cursor of the timeseries profile instance. + page_size: string, optional + The number of timeseries profile instance data records retrieved as part of the instance. Default is `1000`. + + Returns + ------- + cwms data type + """ + + endpoint = f"timeseries/profile-instance/{location_id}/{parameter_id}/{version}" + params = { + "office": office_id, + "version-date": version_date.isoformat() if version_date else None, + "unit": unit, + "start-time-inclusive": start_inclusive, + "end-time-inclusive": end_inclusive, + "previous": previous, + "next": next, + "max-version": max_version, + "start": start.isoformat() if start else None, + "end": end.isoformat() if end else None, + "page": page, + "page-size": page_size, + } + + response = api.get(endpoint, params) + return Data(response) + + +def get_timeseries_profile_instances( + office_mask: Optional[str], + location_mask: Optional[str], + parameter_id_mask: Optional[str], + version_mask: Optional[str], +) -> Data: + """ + Retrieves a list of timeseries profile instances that match the specified masks. Does not return timeseries values. + + Compatibility Warning: + Currently, the TimeSeries Profile API may not be fully supported + until a new version of cwms-data-access is released with the updated + endpoint implementation. + + Parameters + ---------- + office_mask: string + A mask to limit the results based on office ID. Uses regex to compare with office IDs in the database. + Default value is `*` + location_mask: string + A mask to limit the results based on location ID. Uses regex to compare with location IDs in the database. + Default value is `*` + parameter_id_mask: string + A mask to limit the results based on the parameter associated with the timeseries profile instance. + Uses regex to compare the parameter IDs in the database. Default value is `*` + version_mask: string + A mask to limit the results based on the version associated with the timeseries profile instance. + Default value is `*` + + Returns + ------- + cwms data type + """ + + endpoint = "timeseries/profile-instance" + params = { + "office-mask": office_mask, + "location-mask": location_mask, + "parameter-id-mask": parameter_id_mask, + "version-mask": version_mask, + } + + response = api.get(endpoint, params) + return Data(response) + + +def delete_timeseries_profile_instance( + office_id: str, + location_id: str, + parameter_id: str, + version: str, + version_date: datetime, + first_date: datetime, + override_protection: Optional[bool] = True, +) -> None: + """ + Deletes a timeseries profile instance. + + Compatibility Warning: + Currently, the TimeSeries Profile API may not be fully supported + until a new version of cwms-data-access is released with the updated + endpoint implementation. + + Parameters + ---------- + office_id: string + The owning office of the timeseries profile instance + location_id: string + The name identifier for the timeseries profile instance to delete + parameter_id: string + The name of the key parameter associated with the timeseries profile instance + version: string + The version of the timeseries profile instance + version_date: datetime + The timestamp of the timeseries profile instance version + first_date: datetime + The first date of the timeseries profile instance + override_protection: boolean, optional + Whether to enable override protection for the timeseries profile instance. Default is `True`. + + Returns + ------- + None + """ + + endpoint = f"timeseries/profile-instance/{location_id}/{parameter_id}/{version}" + params = { + "office": office_id, + "version-date": version_date.isoformat() if version_date else None, + "date": first_date.isoformat() if first_date else None, + "override-protection": override_protection, + } + + return api.delete(endpoint, params) + + +def store_timeseries_profile_instance( + profile_data: str, + version: str, + version_date: datetime, + store_rule: Optional[str] = None, + override_protection: Optional[bool] = False, +) -> None: + """ + Stores a new timeseries profile instance. Requires timeseries profile and parser to already be stored. + + Compatibility Warning: + Currently, the TimeSeries Profile API may not be fully supported + until a new version of cwms-data-access is released with the updated + endpoint implementation. + + Parameters + ---------- + profile_data: string + The profile data of the timeseries profile instance + store_rule: boolean, optional + The method of storing the timeseries profile instance. Default is `REPLACE_ALL`. + version: string + The version of the timeseries profile instance. + version_date: datetime + The version date of the timeseries profile instance. + override_protection: boolean, optional + Whether to enable override protection for the timeseries profile instance. Default is `False`. + + Returns + ------- + None + """ + + endpoint = "timeseries/profile-instance" + params = { + "profile-data": profile_data, + "method": store_rule, + "version": version, + "version-date": version_date.isoformat() if version_date else None, + "override-protection": override_protection, + } + + return api.post(endpoint, None, params) diff --git a/plugins/cwms/timeseries/timeseries_profile_parser.py b/plugins/cwms/timeseries/timeseries_profile_parser.py new file mode 100644 index 0000000..1d84437 --- /dev/null +++ b/plugins/cwms/timeseries/timeseries_profile_parser.py @@ -0,0 +1,210 @@ +# Copyright (c) 2024 +# United States Army Corps of Engineers - Hydrologic Engineering Center (USACE/HEC) +# All Rights Reserved. USACE PROPRIETARY/CONFIDENTIAL. +# Source may not be released without written approval from HEC + +from typing import Optional + +import cwms.api as api +from cwms.cwms_types import Data + + +def get_timeseries_profile_parser( + office_id: str, location_id: str, parameter_id: str +) -> Data: + """ + Returns a timeseries profile parser used to interpret timeseries data input. + + Compatibility Warning: + Currently, the TimeSeries Profile API may not be fully supported + until a new version of cwms-data-access is released with the updated + endpoint implementation. + + Parameters + ---------- + office_id: string + The owning office of the timeseries profile parser + location_id: string + The location name associated with the timeseries profile parser + parameter_id: string + The name of the key parameter associated with the timeseries profile parser + + Returns + ------- + cwms data type + """ + + endpoint = f"timeseries/profile-parser/{location_id}/{parameter_id}" + params = { + "office": office_id, + } + + response = api.get(endpoint, params) + return Data(response) + + +def get_timeseries_profile_parsers( + office_mask: Optional[str], + location_mask: Optional[str], + parameter_id_mask: Optional[str], +) -> Data: + """ + Returns a list of timeseries profile parsers. + + Compatibility Warning: + Currently, the TimeSeries Profile API may not be fully supported + until a new version of cwms-data-access is released with the updated + endpoint implementation. + + Parameters + ---------- + office_mask: string, optional + A mask to limit the results based on office. Uses regex to compare with office IDs in the database. + Default value is '*' + location_mask: string, optional + A mask to limit the results based on location. Uses regex to compare with location IDs in the database. + Default value is '*' + parameter_id_mask: string, optional + A mask to limit the results based on the parameter associated with the timeseries profile. Uses regex to + compare the parameter IDs in the database. Default value is '*' + + Returns + ------- + cwms data type + """ + + endpoint = "timeseries/profile-parser" + params = { + "office-mask": office_mask, + "location-mask": location_mask, + "parameter-id-mask": parameter_id_mask, + } + + response = api.get(endpoint, params) + return Data(response) + + +def delete_timeseries_profile_parser( + office_id: str, location_id: str, parameter_id: str +) -> None: + """ + Deletes a specified timeseries profile parser + + Compatibility Warning: + Currently, the TimeSeries Profile API may not be fully supported + until a new version of cwms-data-access is released with the updated + endpoint implementation. + + Parameters + ---------- + office_id: string + The owning office of the timeseries profile parser + location_id: string + The location associated with the timeseries profile parser + parameter_id: string + The name of the key parameter associated with the timeseries profile parser + + Returns + ------- + None + """ + + endpoint = f"timeseries/profile-parser/{location_id}/{parameter_id}" + params = {"office": office_id} + + return api.delete(endpoint, params) + + +def store_timeseries_profile_parser( + data: str, fail_if_exists: Optional[bool] = True +) -> None: + """ + Stores a new timeseries profile parser. + + Compatibility Warning: + Currently, the TimeSeries Profile API may not be fully supported + until a new version of cwms-data-access is released with the updated + endpoint implementation. + + Parameters + ---------- + data: string + JSON for storing a timeseries profile parser + + Indexed: + { + "type": "indexed-timeseries-profile-parser", + "location-id": { + "office-id": "string", + "name": "string" + }, + "key-parameter": "string", + "record-delimiter": "char", + "time-format": "MM/DD/YYYY,HH24:MI:SS", + "time-zone": "string", + "parameter-info-list": [ + { + "type": "indexed-parameter-info", + "parameter": "string", + "unit": "string", + "index": int + }, + { + "type": "indexed-parameter-info", + "parameter": "string", + "unit": "string", + "index": int + } + ], + "time-in-two-fields": bool, + "field-delimiter": "char", + "time-field": int + } + + Columnar: + { + "type": "columnar-timeseries-profile-parser", + "location-id": { + "office-id": "string", + "name": "string" + }, + "key-parameter": "string", + "record-delimiter": "char", + "time-format": "MM/DD/YYYY,HH24:MI:SS", + "time-zone": "string", + "parameter-info-list": [ + { + "type": "columnar-parameter-info", + "parameter": "string", + "unit": "string", + "start-column": int, + "end-column": int + }, + { + "type": "columnar-parameter-info", + "parameter": "string", + "unit": "string", + "start-column": int, + "end-column": int + } + ], + "time-in-two-fields": bool, + "time-start-column": int, + "time-end-column": int + } + + fail_if_exists: boolean, optional + Throw a ClientError if the parser already exists + Default is `True` + + Returns + ------- + None + """ + + endpoint = "timeseries/profile-parser" + params = { + "fail-if-exists": fail_if_exists, + } + + return api.post(endpoint, data, params) diff --git a/plugins/cwms/timeseries/timeseries_txt.py b/plugins/cwms/timeseries/timeseries_txt.py index 211d496..b9c5b73 100644 --- a/plugins/cwms/timeseries/timeseries_txt.py +++ b/plugins/cwms/timeseries/timeseries_txt.py @@ -78,20 +78,6 @@ def get_text_timeseries( return Data(response) -def get_large_clob(url: str, encoding: str = "utf-8") -> str: - """ - Retrieves large clob data greater than 64kb from CWMS data api - :param url: str - Url used in query by CDA - :param encoding: str, optional - Encoding used to decode text data. Default utf-8 - :return: str - Large text data - """ - response = requests.get(url) - return response.content.decode(encoding) - - def store_text_timeseries(data: JSON, replace_all: bool = False) -> None: """ This method is used to store a text time series through CWMS Data API. diff --git a/plugins/cwms/turbines/turbines.py b/plugins/cwms/turbines/turbines.py new file mode 100644 index 0000000..2f430f7 --- /dev/null +++ b/plugins/cwms/turbines/turbines.py @@ -0,0 +1,242 @@ +from datetime import datetime +from typing import Optional + +import cwms.api as api +from cwms.cwms_types import JSON, Data + +# ========================================================================== +# GET CWMS TURBINES +# ========================================================================== + + +def get_project_turbines(office: str, project_id: str) -> Data: + """Returns matching CWMS Turbine Data for a Reservoir Project. Get cwmsData projects turbines. + Args: + office (str): The office associated with the turbine data. + project_id (str): The ID of the project. + Returns: + dict: A dictionary containing the turbine data. + """ + endpoint = "projects/turbines" + params = {"office": office, "project-id": project_id} + + response = api.get(endpoint=endpoint, params=params, api_version=1) + + return Data(response) + + +def get_project_turbine(office: str, name: str) -> Data: + """Returns CWMS Turbine Data Get cwmsData projects turbines with name. + Args: + office (str): The office associated with the turbine data. + name (str): The name of the turbine. + Returns: + dict: A dictionary containing the turbine data. + """ + endpoint = f"projects/turbines/{name}" + params = {"office": office} + response = api.get(endpoint=endpoint, params=params, api_version=1) + return Data(response) + + +def get_project_turbine_changes( + name: str, + begin: datetime, + end: datetime, + office: str, + page_size: Optional[int], + unit_system: Optional[str], + start_time_inclusive: Optional[bool], + end_time_inclusive: Optional[bool], +) -> Data: + """ + Returns CWMS Turbine Data for projects with specified office and turbine name changes within a given time range. + Args: + begin (str): The start date and time for the data retrieval in ISO 8601 format. + end (str): The end date and time for the data retrieval in ISO 8601 format. + end_time_inclusive (Optional[bool]): Whether the end time is inclusive. + name (str): The name of the turbine. + office (str): The office associated with the turbine data. + page_size (Optional[int]): The number of records to return per page. + start_time_inclusive (Optional[bool]): Whether the start time is inclusive. + unit_system (Optional[str]): The unit system to use for the data [SI, EN]. + Returns: + dict: A dictionary containing the turbine data. + """ + if begin and not isinstance(begin, datetime): + raise ValueError("begin needs to be in datetime") + if end and not isinstance(end, datetime): + raise ValueError("end needs to be in datetime") + + endpoint = f"projects/{office}/{name}/turbine-changes" + params = { + "name": name, + "begin": begin.isoformat() if begin else None, + "end": end.isoformat() if end else None, + "office": office, + "page-size": page_size, + "unit-system": unit_system, + "start-time-inclusive": start_time_inclusive, + "end-time-inclusive": end_time_inclusive, + } + response = api.get(endpoint=endpoint, params=params, api_version=1) + return Data(response) + + +# ========================================================================== +# POST CWMS TURBINES +# ========================================================================== + + +def store_project_turbine(data: JSON, fail_if_exists: Optional[bool]) -> None: + """ + Create a new turbine in CWMS. + Parameters + ---------- + fail_if_exists (bool): If True, the request will fail if the turbine already exists. + + Returns + ------- + None + + + Raises + ------ + ValueError + If provided data is None + Unauthorized + 401 - Indicates that the client request has not been completed because it lacks valid authentication credentials for the requested resource. + Forbidden + 403 - Indicates that the server understands the request but refuses to authorize it. + Not Found + 404 - Indicates that the server cannot find the requested resource. + Server Error + 500 - Indicates that the server encountered an unexpected condition that prevented it from fulfilling the request. + """ + if data is None: + raise ValueError( + "Cannot store project turbine changes without a JSON data dictionary" + ) + endpoint = "projects/turbines" + params = { + "fail-if-exists": fail_if_exists, + } + return api.post(endpoint=endpoint, data=data, params=params, api_version=1) + + +def store_project_turbine_changes( + data: JSON, office: str, name: str, override_protection: Optional[bool] +) -> None: + """ + Create CWMS Turbine Changes + Parameters + ---------- + office (str): Office id for the reservoir project location associated with the turbine changes. + name (str): Specifies the name of project of the Turbine changes whose data is to stored. + override_protection (bool): A flag ('True'/'False') specifying whether to delete protected data. Default is False + + Returns + ------- + None - Turbine successfully stored to CWMS. + + + Raises + ------ + ValueError + If provided data is None + Unauthorized + 401 - Indicates that the client request has not been completed because it lacks valid authentication credentials for the requested resource. + Forbidden + 403 - Indicates that the server understands the request but refuses to authorize it. + Not Found + 404 - Indicates that the server cannot find the requested resource. + Server Error + 500 - Indicates that the server encountered an unexpected condition that prevented it from fulfilling the request. + """ + if data is None: + raise ValueError( + "Cannot store project turbine changes without a JSON data dictionary" + ) + endpoint = f"projects/{office}/{name}/turbine-changes" + params = {"override-protection": override_protection} + return api.post(endpoint=endpoint, data=data, params=params, api_version=1) + + +# ========================================================================== +# DELETE CWMS TURBINES +# ========================================================================== + + +def delete_project_turbine(name: str, office: str, method: Optional[str]) -> None: + """ + Delete CWMS Turbine + Parameters + ---------- + name (str): Specifies the name of the turbine to be deleted. + office (str): Specifies the owning office of the turbine to be deleted. + method (str): Specifies the delete method used. Defaults to "DELETE_KEY". Options are: DELETE_KEY, DELETE_DATA, DELETE_ALL + Returns + ------- + None - Turbine successfully deleted from CWMS. + + + Raises + ------ + ValueError + If provided data is None + Unauthorized + 401 - Indicates that the client request has not been completed because it lacks valid authentication credentials for the requested resource. + Forbidden + 403 - Indicates that the server understands the request but refuses to authorize it. + Not Found + 404 - Indicates that the server cannot find the requested resource. + Server Error + 500 - Indicates that the server encountered an unexpected condition that prevented it from fulfilling the request. + """ + endpoint = f"projects/turbines/{name}" + params = {"office": office, "method": method} + return api.delete(endpoint=endpoint, params=params, api_version=1) + + +def delete_project_turbine_changes( + office: str, + name: str, + begin: datetime, + end: datetime, + override_protection: Optional[bool], +) -> None: + """ + Delete CWMS Turbine Changes + Parameters + ---------- + name (str): Specifies the name of project for the turbine changes to be deleted. + office (str): Specifies the owning office of the turbine to be deleted. + begin (datetime): The start of the time window + end (datetime): The end of the time window + override_protection (bool): A flag ('True'/'False') specifying whether to delete protected data. Default is False + + Returns + ------- + None - Turbine successfully deleted from CWMS. + + + Raises + ------ + ValueError + If provided data is None + Unauthorized + 401 - Indicates that the client request has not been completed because it lacks valid authentication credentials for the requested resource. + Forbidden + 403 - Indicates that the server understands the request but refuses to authorize it. + Not Found + 404 - Indicates that the server cannot find the requested resource. + Server Error + 500 - Indicates that the server encountered an unexpected condition that prevented it from fulfilling the request. + """ + endpoint = f"projects/{office}/{name}/turbine-changes" + params = { + "begin": begin.isoformat() if begin else None, + "end": end.isoformat() if end else None, + "override-protection": override_protection, + } + return api.delete(endpoint=endpoint, params=params, api_version=1) diff --git a/plugins/dataretrieval/__init__.py b/plugins/dataretrieval/__init__.py new file mode 100644 index 0000000..a0087e4 --- /dev/null +++ b/plugins/dataretrieval/__init__.py @@ -0,0 +1,9 @@ +from importlib.metadata import PackageNotFoundError, version + +from dataretrieval.nwis import * +from dataretrieval.utils import * + +try: + __version__ = version("dataretrieval") +except PackageNotFoundError: + __version__ = "version-unknown" diff --git a/plugins/dataretrieval/codes/__init__.py b/plugins/dataretrieval/codes/__init__.py new file mode 100755 index 0000000..c5c4d65 --- /dev/null +++ b/plugins/dataretrieval/codes/__init__.py @@ -0,0 +1,2 @@ +from dataretrieval.codes.states import * +from dataretrieval.codes.timezones import * diff --git a/plugins/dataretrieval/codes/states.py b/plugins/dataretrieval/codes/states.py new file mode 100644 index 0000000..e3d83f0 --- /dev/null +++ b/plugins/dataretrieval/codes/states.py @@ -0,0 +1,109 @@ +"""List of 2-digit state codes with commented full names.""" + +state_codes = { + "Alabama": "al", + "Alaska": "ak", + "Arizona": "az", + "Arkansas": "ar", + "California": "ca", + "Colorado": "co", + "Connecticut": "ct", + "Delaware": "de", + "District of Columbia": "dc", + "Florida": "fl", + "Georgia": "ga", + "Hawaii": "hi", + "Idaho": "id", + "Illinois": "il", + "Indiana": "in", + "Iowa": "ia", + "Kansas": "ks", + "Kentucky": "ky", + "Louisiana": "la", + "Maine": "me", + "Maryland": "md", + "Massachusetts": "ma", + "Michigan": "mi", + "Minnesota": "mn", + "Mississippi": "ms", + "Missouri": "mo", + "Montana": "mt", + "Nebraska": "ne", + "Nevada": "nv", + "New Hampshire": "nh", + "New Jersey": "nj", + "New Mexico": "nm", + "New York": "ny", + "North Carolina": "nc", + "North Dakota": "nd", + "Ohio": "oh", + "Oklahoma": "ok", + "Oregon": "or", + "Pennsylvania": "pa", + "Rhode Island": "ri", + "South Carolina": "sc", + "South Dakota": "sd", + "Tennessee": "tn", + "Texas": "tx", + "Utah": "ut", + "Vermont": "vt", + "Virginia": "va", + "Washington": "wa", + "West Virginia": "wv", + "Wisconsin": "wi", + "Wyoming": "wy", +} + +fips_codes = { + "Alabama": "01", + "Alaska": "02", + "Arizona": "04", + "Arkansas": "05", + "California": "06", + "Colorado": "08", + "Connecticut": "09", + "Delaware": "10", + "District of Columbia": "11", + "Florida": "12", + "Georgia": "13", + "Hawaii": "15", + "Idaho": "16", + "Illinois": "17", + "Indiana": "18", + "Iowa": "19", + "Kansas": "20", + "Kentucky": "21", + "Louisiana": "22", + "Maine": "23", + "Maryland": "24", + "Massachusetts": "25", + "Michigan": "26", + "Minnesota": "27", + "Mississippi": "28", + "Missouri": "29", + "Montana": "30", + "Nebraska": "31", + "Nevada": "32", + "New Hampshire": "33", + "New Jersey": "34", + "New Mexico": "35", + "New York": "36", + "North Carolina": "37", + "North Dakota": "38", + "Ohio": "39", + "Oklahoma": "40", + "Oregon": "41", + "Pennsylvania": "42", + "Rhode Island": "44", + "South Carolina": "45", + "South Dakota": "46", + "Tennessee": "47", + "Texas": "48", + "Utah": "49", + "Vermont": "50", + "Virginia": "51", + "Washington": "53", + "West Virginia": "54", + "Wisconsin": "55", + "Wyoming": "56", +} diff --git a/plugins/dataretrieval/codes/timezones.py b/plugins/dataretrieval/codes/timezones.py new file mode 100644 index 0000000..3f91421 --- /dev/null +++ b/plugins/dataretrieval/codes/timezones.py @@ -0,0 +1,48 @@ +""" +Time zone information +""" + +tz_str = """-1200 Y +-1100 X NUT SST +-1000 W CKT HAST HST TAHT TKT +-0900 V AKST GAMT GIT HADT HNY +-0800 U AKDT CIST HAY HNP PST PT +-0700 T HAP HNR MST PDT +-0600 S CST EAST GALT HAR HNC MDT +-0500 R CDT COT EASST ECT EST ET HAC HNE PET +-0400 Q AST BOT CLT COST EDT FKT GYT HAE HNA PYT +-0300 P ADT ART BRT CLST FKST GFT HAA PMST PYST SRT UYT WGT +-0200 O BRST FNT PMDT UYST WGST +-0100 N AZOT CVT EGT ++0000 Z EGST GMT UTC WET WT ++0100 A CET DFT WAT WEDT WEST ++0200 B CAT CEDT CEST EET SAST WAST ++0300 C EAT EEDT EEST IDT MSK ++0400 D AMT AZT GET GST KUYT MSD MUT RET SAMT SCT ++0500 E AMST AQTT AZST HMT MAWT MVT PKT TFT TJT TMT UZT YEKT ++0600 F ALMT BIOT BTT IOT KGT NOVT OMST YEKST ++0700 G CXT DAVT HOVT ICT KRAT NOVST OMSST THA WIB ++0800 H ACT AWST BDT BNT CAST HKT IRKT KRAST MYT PHT SGT ULAT WITA WST ++0900 I AWDT IRKST JST KST PWT TLT WDT WIT YAKT ++1000 K AEST ChST PGT VLAT YAKST YAPT ++1100 L AEDT LHDT MAGT NCT PONT SBT VLAST VUT ++1200 M ANAST ANAT FJT GILT MAGST MHT NZST PETST PETT TVT WFT ++1330 FJST NZDT ++1130 NFT ++1030 ACDT LHST ++0930 ACST ++0630 CCT MMT ++0545 NPT ++0530 SLT ++0430 AFT IRDT ++0330 IRST +-0230 HAT NDT +-0330 HNT NST NT +-0430 HLV VET +-0930 MART MIT""" + +tz = {} +for tz_descr in map(str.split, tz_str.split("\n")): + tz_offset = tz_descr[0] + for tz_code in tz_descr[1:]: + tz[tz_code] = tz_offset diff --git a/plugins/dataretrieval/nwis.py b/plugins/dataretrieval/nwis.py new file mode 100644 index 0000000..e01823e --- /dev/null +++ b/plugins/dataretrieval/nwis.py @@ -0,0 +1,1591 @@ +"""Functions for downloading data from the `National Water Information System (NWIS)`_. + +.. _National Water Information System (NWIS): https://waterdata.usgs.gov/nwis + + +.. todo:: + + * Create a test to check whether functions pull multiple sites + * Work on multi-index capabilities. + * Check that all timezones are handled properly for each service. + +""" + +import re +import warnings +from io import StringIO +from typing import List, Optional, Tuple, Union + +import pandas as pd +import requests + +from dataretrieval.utils import BaseMetadata, format_datetime, to_str, query + +try: + import geopandas as gpd +except ImportError: + gpd = None + +WATERDATA_BASE_URL = "https://nwis.waterdata.usgs.gov/" +WATERDATA_URL = WATERDATA_BASE_URL + "nwis/" +WATERSERVICE_URL = "https://waterservices.usgs.gov/nwis/" +PARAMCODES_URL = "https://help.waterdata.usgs.gov/code/parameter_cd_nm_query?" +ALLPARAMCODES_URL = "https://help.waterdata.usgs.gov/code/parameter_cd_query?" + +WATERSERVICES_SERVICES = ["dv", "iv", "site", "stat"] +WATERDATA_SERVICES = [ + "qwdata", + "gwlevels", + "measurements", + "peaks", + "pmcodes", + "water_use", + "ratings", +] +# NAD83 +_CRS = "EPSG:4269" + + +def format_response( + df: pd.DataFrame, service: Optional[str] = None, **kwargs +) -> pd.DataFrame: + """Setup index for response from query. + + This function formats the response from the NWIS web services, in + particular it sets the index of the data frame. This function tries to + convert the NWIS response into pandas datetime values localized to UTC, + and if possible, uses these timestamps to define the data frame index. + + Parameters + ---------- + df: ``pandas.DataFrame`` + The data frame to format + service: string, optional, default is None + The NWIS service that was queried, important because the 'peaks' + service returns a different format than the other services. + **kwargs: optional + Additional keyword arguments, e.g., 'multi_index' + + Returns + ------- + df: ``pandas.DataFrame`` + The formatted data frame + + """ + mi = kwargs.pop("multi_index", True) + + if service == "peaks": + df = preformat_peaks_response(df) + + if gpd is not None: + if "dec_lat_va" in list(df): + geoms = gpd.points_from_xy(df.dec_long_va.values, df.dec_lat_va.values) + df = gpd.GeoDataFrame(df, geometry=geoms, crs=_CRS) + + # check for multiple sites: + if "datetime" not in df.columns: + # XXX: consider making site_no index + return df + + elif len(df["site_no"].unique()) > 1 and mi: + # setup multi-index + df.set_index(["site_no", "datetime"], inplace=True) + if hasattr(df.index.levels[1], "tzinfo") and df.index.levels[1].tzinfo is None: + df = df.tz_localize("UTC", level=1) + + else: + df.set_index(["datetime"], inplace=True) + if hasattr(df.index, "tzinfo") and df.index.tzinfo is None: + df = df.tz_localize("UTC") + + return df.sort_index() + + +def preformat_peaks_response(df: pd.DataFrame) -> pd.DataFrame: + """Datetime formatting for the 'peaks' service response. + + Function to format the datetime column of the 'peaks' service response. + + Parameters + ---------- + df: ``pandas.DataFrame`` + The data frame to format + + Returns + ------- + df: ``pandas.DataFrame`` + The formatted data frame + + """ + df["datetime"] = pd.to_datetime(df.pop("peak_dt"), errors="coerce") + df.dropna(subset=["datetime"], inplace=True) + return df + + +def get_qwdata( + sites: Optional[Union[List[str], str]] = None, + start: Optional[str] = None, + end: Optional[str] = None, + multi_index: bool = True, + wide_format: bool = True, + datetime_index: bool = True, + ssl_check: bool = True, + **kwargs, +) -> Tuple[pd.DataFrame, BaseMetadata]: + """ + Get water sample data from qwdata service. + + .. warning:: + + WARNING: Beginning in March 2024 the NWIS qw data endpoint will + not deliver new data or updates to existing data. + Eventually the endpoint will be retired. For updated information visit: + https://waterdata.usgs.gov.nwis/qwdata + For additional details, see the R package vignette: + https://doi-usgs.github.io/dataRetrieval/articles/Status.html + If you have additional questions about the qw data service, + email CompTools@usgs.gov. + + Parameters + ---------- + sites: string or list of strings, optional, default is None + If the qwdata parameter site_no is supplied, it will overwrite the + sites parameter + start: string, optional, default is None + If the qwdata parameter begin_date is supplied, it will overwrite the + start parameter (YYYY-MM-DD) + end: string, optional, default is None + If the qwdata parameter end_date is supplied, it will overwrite the + end parameter (YYYY-MM-DD) + multi_index: bool, optional + If False, a dataframe with a single-level index (datetime) is returned, + default is True + wide_format : bool, optional + If True, return data in wide format with multiple samples per row and + one row per time, default is True + datetime_index : bool, optional + If True, create a datetime index, default is True + ssl_check: bool, optional + If True, check SSL certificates, if False, do not check SSL, + default is True + **kwargs: optional + If supplied, will be used as query parameters + + Returns + ------- + df: ``pandas.DataFrame`` + Times series data from the NWIS JSON + md: :obj:`dataretrieval.utils.Metadata` + A custom metadata object + + Examples + -------- + .. doctest:: + + >>> # get water sample information for site 11447650 + >>> df, md = dataretrieval.nwis.get_qwdata( + ... sites="11447650", start="2010-01-01", end="2010-02-01" + ... ) + + """ + warnings.warn( + ( + "WARNING: Starting in March 2024, the NWIS qw data endpoint is " + "retiring and no longer receives updates. For more information, " + "refer to https://waterdata.usgs.gov.nwis/qwdata and " + "https://doi-usgs.github.io/dataRetrieval/articles/Status.html " + "or email CompTools@usgs.gov." + ) + ) + + _check_sites_value_types(sites) + + kwargs["site_no"] = kwargs.pop("site_no", sites) + kwargs["begin_date"] = kwargs.pop("begin_date", start) + kwargs["end_date"] = kwargs.pop("end_date", end) + kwargs["multi_index"] = multi_index + if wide_format: + kwargs["qw_sample_wide"] = "qw_sample_wide" + + payload = { + "agency_cd": "USGS", + "format": "rdb", + "pm_cd_compare": "Greater than", + "inventory_output": "0", + "rdb_inventory_output": "file", + "TZoutput": "0", + "rdb_qw_attributes": "expanded", + "date_format": "YYYY-MM-DD", + "rdb_compression": "value", + "submitted_form": "brief_list", + } + + # check for parameter codes, and reformat query args + qwdata_parameter_code_field = "parameterCd" + if kwargs.get(qwdata_parameter_code_field): + parameter_codes = kwargs.pop(qwdata_parameter_code_field) + parameter_codes = to_str(parameter_codes) + kwargs["multiple_parameter_cds"] = parameter_codes + kwargs["param_cd_operator"] = "OR" + + search_criteria = kwargs.get("list_of_search_criteria") + if search_criteria: + kwargs["list_of_search_criteria"] = "{},{}".format( + search_criteria, "multiple_parameter_cds" + ) + else: + kwargs["list_of_search_criteria"] = "multiple_parameter_cds" + + kwargs.update(payload) + + warnings.warn( + "NWIS qw web services are being retired. " + + "See this note from the R package for more: " + + "https://doi-usgs.github.io/dataRetrieval/articles/qwdata_changes.html", + category=DeprecationWarning, + ) + response = query_waterdata("qwdata", ssl_check=ssl_check, **kwargs) + + df = _read_rdb(response.text) + + if datetime_index is True: + df = format_datetime(df, "sample_dt", "sample_tm", "sample_start_time_datum_cd") + + return format_response(df, **kwargs), NWIS_Metadata(response, **kwargs) + + +def get_discharge_measurements( + sites: Optional[Union[List[str], str]] = None, + start: Optional[str] = None, + end: Optional[str] = None, + ssl_check: bool = True, + **kwargs, +) -> Tuple[pd.DataFrame, BaseMetadata]: + """ + Get discharge measurements from the waterdata service. + + Parameters + ---------- + sites: string or list of strings, optional, default is None + If the qwdata parameter site_no is supplied, it will overwrite the + sites parameter + start: string, optional, default is None + If the qwdata parameter begin_date is supplied, it will overwrite the + start parameter (YYYY-MM-DD) + end: string, optional, default is None + If the qwdata parameter end_date is supplied, it will overwrite the + end parameter (YYYY-MM-DD) + ssl_check: bool, optional + If True, check SSL certificates, if False, do not check SSL, + default is True + **kwargs: optional + If supplied, will be used as query parameters + + Returns + ------- + df: ``pandas.DataFrame`` + Times series data from the NWIS JSON + md: :obj:`dataretrieval.utils.Metadata` + A custom metadata object + + Examples + -------- + .. doctest:: + + >>> # Get discharge measurements for site 05114000 + >>> df, md = dataretrieval.nwis.get_discharge_measurements( + ... sites="05114000", start="2000-01-01", end="2000-01-30" + ... ) + + >>> # Get discharge measurements for sites in Alaska + >>> df, md = dataretrieval.nwis.get_discharge_measurements( + ... start="2012-01-09", end="2012-01-10", stateCd="AK" + ... ) + + """ + _check_sites_value_types(sites) + + kwargs["site_no"] = kwargs.pop("site_no", sites) + kwargs["begin_date"] = kwargs.pop("begin_date", start) + kwargs["end_date"] = kwargs.pop("end_date", end) + + if "format" not in kwargs: + kwargs["format"] = "rdb" + + response = query_waterdata("measurements", ssl_check=ssl_check, **kwargs) + return _read_rdb(response.text), NWIS_Metadata(response, **kwargs) + + +def get_discharge_peaks( + sites: Optional[Union[List[str], str]] = None, + start: Optional[str] = None, + end: Optional[str] = None, + multi_index: bool = True, + ssl_check: bool = True, + **kwargs, +) -> Tuple[pd.DataFrame, BaseMetadata]: + """ + Get discharge peaks from the waterdata service. + + Parameters + ---------- + sites: string or list of strings, optional, default is None + If the waterdata parameter site_no is supplied, it will overwrite the + sites parameter + start: string, optional, default is None + If the waterdata parameter begin_date is supplied, it will overwrite + the start parameter (YYYY-MM-DD) + end: string, optional, default is None + If the waterdata parameter end_date is supplied, it will overwrite + the end parameter (YYYY-MM-DD) + multi_index: bool, optional + If False, a dataframe with a single-level index (datetime) is returned, + default is True + ssl_check: bool, optional + If True, check SSL certificates, if False, do not check SSL, + default is True + **kwargs: optional + If supplied, will be used as query parameters + + Returns + ------- + df: ``pandas.DataFrame`` + Times series data from the NWIS JSON + md: :obj:`dataretrieval.utils.Metadata` + A custom metadata object + + Examples + -------- + .. doctest:: + + >>> # Get discharge peaks for site 01491000 + >>> df, md = dataretrieval.nwis.get_discharge_peaks( + ... sites="01491000", start="1980-01-01", end="1990-01-01" + ... ) + + >>> # Get discharge peaks for sites in Hawaii + >>> df, md = dataretrieval.nwis.get_discharge_peaks( + ... start="1980-01-01", end="1980-01-02", stateCd="HI" + ... ) + + """ + _check_sites_value_types(sites) + + kwargs["site_no"] = kwargs.pop("site_no", sites) + kwargs["begin_date"] = kwargs.pop("begin_date", start) + kwargs["end_date"] = kwargs.pop("end_date", end) + kwargs["multi_index"] = multi_index + + response = query_waterdata("peaks", format="rdb", ssl_check=ssl_check, **kwargs) + + df = _read_rdb(response.text) + + return format_response(df, service="peaks", **kwargs), NWIS_Metadata( + response, **kwargs + ) + + +def get_gwlevels( + sites: Optional[Union[List[str], str]] = None, + start: str = "1851-01-01", + end: Optional[str] = None, + multi_index: bool = True, + datetime_index: bool = True, + ssl_check: bool = True, + **kwargs, +) -> Tuple[pd.DataFrame, BaseMetadata]: + """ + Queries the groundwater level service from waterservices + + Parameters + ---------- + sites: string or list of strings, optional, default is None + If the waterdata parameter site_no is supplied, it will overwrite the + sites parameter + start: string, optional, default is '1851-01-01' + If the waterdata parameter begin_date is supplied, it will overwrite + the start parameter + end: string, optional, default is None + If the waterdata parameter end_date is supplied, it will overwrite the + end parameter (YYYY-MM-DD) + multi_index: bool, optional + If False, a dataframe with a single-level index (datetime) is returned, + default is True + datetime_index : bool, optional + If True, create a datetime index, default is True + ssl_check: bool, optional + If True, check SSL certificates, if False, do not check SSL, + default is True + **kwargs: optional + If supplied, will be used as query parameters + + Returns + ------- + df: ``pandas.DataFrame`` + Times series data from the NWIS JSON + md: :obj:`dataretrieval.utils.Metadata` + A custom metadata object + + Examples + -------- + .. doctest:: + + >>> # Get groundwater levels for site 434400121275801 + >>> df, md = dataretrieval.nwis.get_gwlevels(sites="434400121275801") + + """ + _check_sites_value_types(sites) + + # Make kwargs backwards compatible with waterservices + # vocabulary + if "startDT" in kwargs: + kwargs["begin_date"] = kwargs.pop("startDT") + if "endDT" in kwargs: + kwargs["end_date"] = kwargs.pop("endDT") + if "sites" in kwargs: + kwargs["site_no"] = kwargs.pop("sites") + if "stateCd" in kwargs: + kwargs["state_cd"] = kwargs.pop("stateCd") + + kwargs["begin_date"] = kwargs.pop("begin_date", start) + kwargs["end_date"] = kwargs.pop("end_date", end) + kwargs["site_no"] = kwargs.pop("site_no", sites) + kwargs["multi_index"] = multi_index + + response = query_waterdata("gwlevels", format="rdb", ssl_check=ssl_check, **kwargs) + + df = _read_rdb(response.text) + + if datetime_index is True: + df = format_datetime(df, "lev_dt", "lev_tm", "lev_tz_cd") + + return format_response(df, **kwargs), NWIS_Metadata(response, **kwargs) + + +def get_stats( + sites: Optional[Union[List[str], str]] = None, ssl_check: bool = True, **kwargs +) -> Tuple[pd.DataFrame, BaseMetadata]: + """ + Queries water services statistics information. + + For more information about the water services statistics service, visit + https://waterservices.usgs.gov/docs/statistics/statistics-details/ + + Parameters + ---------- + sites: string or list of strings, optional, default is None + USGS site number (or list of site numbers) + ssl_check: bool, optional + If True, check SSL certificates, if False, do not check SSL, + default is True + **kwargs: optional + If supplied, will be used as query parameters + + Keyword Arguments + --------------------- + statReportType: string + daily (default), monthly, or annual + statTypeCd: string + all, mean, max, min, median + + Returns + ------- + df: ``pandas.DataFrame`` + Statistics data from the statistics service + md: :obj:`dataretrieval.utils.Metadata` + A custom metadata object + + .. todo:: + + fix date parsing + + Examples + -------- + .. doctest:: + + >>> # Get annual water statistics for a site + >>> df, md = dataretrieval.nwis.get_stats( + ... sites="01646500", statReportType="annual", statYearType="water" + ... ) + + >>> # Get monthly statistics for a site + >>> df, md = dataretrieval.nwis.get_stats( + ... sites="01646500", statReportType="monthly" + ... ) + + """ + _check_sites_value_types(sites) + + response = query_waterservices( + service="stat", sites=sites, ssl_check=ssl_check, **kwargs + ) + + return _read_rdb(response.text), NWIS_Metadata(response, **kwargs) + + +def query_waterdata( + service: str, ssl_check: bool = True, **kwargs +) -> requests.models.Response: + """ + Queries waterdata. + + Parameters + ---------- + service: string + Name of the service to query: 'site', 'stats', etc. + ssl_check: bool, optional + If True, check SSL certificates, if False, do not check SSL, + default is True + **kwargs: optional + If supplied, will be used as query parameters + + Returns + ------- + request: ``requests.models.Response`` + The response object from the API request to the web service + """ + major_params = ["site_no", "state_cd"] + bbox_params = [ + "nw_longitude_va", + "nw_latitude_va", + "se_longitude_va", + "se_latitude_va", + ] + + if not any(key in kwargs for key in major_params + bbox_params): + raise TypeError("Query must specify a major filter: site_no, stateCd, bBox") + + elif any(key in kwargs for key in bbox_params) and not all( + key in kwargs for key in bbox_params + ): + raise TypeError("One or more lat/long coordinates missing or invalid.") + + if service not in WATERDATA_SERVICES: + raise TypeError("Service not recognized") + + url = WATERDATA_URL + service + + return query(url, payload=kwargs, ssl_check=ssl_check) + + +def query_waterservices( + service: str, ssl_check: bool = True, **kwargs +) -> requests.models.Response: + """ + Queries waterservices.usgs.gov + + For more documentation see https://waterservices.usgs.gov/docs/ + + .. note:: + + User must specify one major filter: sites, stateCd, or bBox + + Parameters + ---------- + service: string + Name of the service to query: 'site', 'stats', etc. + ssl_check: bool, optional + If True, check SSL certificates, if False, do not check SSL, + default is True + ssl_check: bool, optional + If True, check SSL certificates, if False, do not check SSL, + default is True + **kwargs: optional + If supplied, will be used as query parameters + + Keyword Arguments + ---------------- + bBox: string + 7-digit Hydrologic Unit Code (HUC) + startDT: string + Start date (e.g., '2017-12-31') + endDT: string + End date (e.g., '2018-01-01') + modifiedSince: string + Used to return only sites where attributes or period of record data + have changed during the request period. String expected to be formatted + in ISO-8601 duration format (e.g., 'P1D' for one day, + 'P1Y' for one year) + + Returns + ------- + request: ``requests.models.Response`` + The response object from the API request to the web service + + """ + if not any( + key in kwargs for key in ["sites", "stateCd", "bBox", "huc", "countyCd"] + ): + raise TypeError( + "Query must specify a major filter: sites, stateCd, bBox, huc, or countyCd" + ) + + if service not in WATERSERVICES_SERVICES: + raise TypeError("Service not recognized") + + if "format" not in kwargs: + kwargs["format"] = "rdb" + + url = WATERSERVICE_URL + service + + return query(url, payload=kwargs, ssl_check=ssl_check) + + +def get_dv( + sites: Optional[Union[List[str], str]] = None, + start: Optional[str] = None, + end: Optional[str] = None, + multi_index: bool = True, + ssl_check: bool = True, + **kwargs, +) -> Tuple[pd.DataFrame, BaseMetadata]: + """ + Get daily values data from NWIS and return it as a ``pandas.DataFrame``. + + .. note: + + If no start or end date are provided, only the most recent record + is returned. + + Parameters + ---------- + sites: string or list of strings, optional, default is None + USGS site number (or list of site numbers) + start: string, optional, default is None + If the waterdata parameter startDT is supplied, it will overwrite the + start parameter (YYYY-MM-DD) + end: string, optional, default is None + If the waterdata parameter endDT is supplied, it will overwrite the + end parameter (YYYY-MM-DD) + multi_index: bool, optional + If True, return a multi-index dataframe, if False, return a + single-index dataframe, default is True + ssl_check: bool, optional + If True, check SSL certificates, if False, do not check SSL, + default is True + **kwargs: optional + If supplied, will be used as query parameters + + Returns + ------- + df: ``pandas.DataFrame`` + Times series data from the NWIS JSON + md: :obj:`dataretrieval.utils.Metadata` + A custom metadata object + + Examples + -------- + .. doctest:: + + >>> # Get mean statistic daily values for site 04085427 + >>> df, md = dataretrieval.nwis.get_dv( + ... sites="04085427", + ... start="2012-01-01", + ... end="2012-06-30", + ... statCd="00003", + ... ) + + >>> # Get the latest daily values for site 01646500 + >>> df, md = dataretrieval.nwis.get_dv(sites="01646500") + + """ + _check_sites_value_types(sites) + + kwargs["startDT"] = kwargs.pop("startDT", start) + kwargs["endDT"] = kwargs.pop("endDT", end) + kwargs["sites"] = kwargs.pop("sites", sites) + kwargs["multi_index"] = multi_index + + response = query_waterservices("dv", format="json", ssl_check=ssl_check, **kwargs) + df = _read_json(response.json()) + + return format_response(df, **kwargs), NWIS_Metadata(response, **kwargs) + + +def get_info(ssl_check: bool = True, **kwargs) -> Tuple[pd.DataFrame, BaseMetadata]: + """ + Get site description information from NWIS. + + **Note:** *Must specify one major parameter.* + + For additional parameter options see + https://waterservices.usgs.gov/docs/site-service/site-service-details/ + + Parameters + ---------- + ssl_check: bool, optional + If True, check SSL certificates, if False, do not check SSL, + default is True + **kwargs: optional + If supplied, will be used as query parameters + + Keyword Arguments + ---------------- + sites: string or list of strings + A list of site numbers. Sites may be prefixed with an optional agency + code followed by a colon. + stateCd: string + U.S. postal service (2-digit) state code. Only 1 state can be specified + per request. + huc: string or list of strings + A list of hydrologic unit codes (HUC) or aggregated watersheds. Only 1 + major HUC can be specified per request, or up to 10 minor HUCs. A major + HUC has two digits. + bBox: string or list of strings + A contiguous range of decimal latitude and longitude, starting with the + west longitude, then the south latitude, then the east longitude, and + then the north latitude with each value separated by a comma. The + product of the range of latitude range and longitude cannot exceed 25 + degrees. Whole or decimal degrees must be specified, up to six digits + of precision. Minutes and seconds are not allowed. + countyCd: string or list of strings + A list of county numbers, in a 5 digit numeric format. The first two + digits of a county's code are the FIPS State Code. + (url: https://help.waterdata.usgs.gov/code/county_query?fmt=html) + startDt: string + Selects sites based on whether data was collected at a point in time + beginning after startDt (start date). Dates must be in ISO-8601 + Calendar Date format (for example: 1990-01-01). + endDt: string + The end date for the period of record. Dates must be in ISO-8601 + Calendar Date format (for example: 1990-01-01). + period: string + Selects sites based on whether they were active between now + and a time in the past. For example, period=P10W will select sites + active in the last ten weeks. + modifiedSince: string + Returns only sites where site attributes or period of record data have + changed during the request period. + parameterCd: string or list of strings + Returns only site data for those sites containing the requested USGS + parameter codes. + siteType: string or list of strings + Restricts sites to those having one or more major and/or minor site + types, such as stream, spring or well. For a list of all valid site + types see https://help.waterdata.usgs.gov/site_tp_cd + For example, siteType='ST' returns streams only. + siteOutput: string ('basic' or 'expanded') + Indicates the richness of metadata you want for site attributes. Note + that for visually oriented formats like Google Map format, this + argument has no meaning. Note: for performance reasons, + siteOutput=expanded cannot be used if seriesCatalogOutput=true or with + any values for outputDataTypeCd. + seriesCatalogOutput: bool + A switch that provides detailed period of record information for + certain output formats. The period of record indicates date ranges for + a certain kind of information about a site, for example the start and + end dates for a site's daily mean streamflow. + + Returns + ------- + df: ``pandas.DataFrame`` + Site data from the NWIS web service + md: :obj:`dataretrieval.utils.Metadata` + A custom metadata object + + Examples + -------- + .. doctest:: + + >>> # Get site information for a single site + >>> df, md = dataretrieval.nwis.get_info(sites="05114000") + + >>> # Get site information for multiple sites + >>> df, md = dataretrieval.nwis.get_info(sites=["05114000", "09423350"]) + + """ + seriesCatalogOutput = kwargs.pop("seriesCatalogOutput", None) + if seriesCatalogOutput in ["True", "TRUE", "true", True]: + warnings.warn( + ( + "WARNING: Starting in March 2024, the NWIS qw data endpoint is " + "retiring and no longer receives updates. For more information, " + "refer to https://waterdata.usgs.gov.nwis/qwdata and " + "https://doi-usgs.github.io/dataRetrieval/articles/Status.html " + "or email CompTools@usgs.gov." + ) + ) + # convert bool to string if necessary + kwargs["seriesCatalogOutput"] = "True" + else: + # cannot have both seriesCatalogOutput and the expanded format + kwargs["siteOutput"] = "Expanded" + + response = query_waterservices("site", ssl_check=ssl_check, **kwargs) + + return _read_rdb(response.text), NWIS_Metadata(response, **kwargs) + + +def get_iv( + sites: Optional[Union[List[str], str]] = None, + start: Optional[str] = None, + end: Optional[str] = None, + multi_index: bool = True, + ssl_check: bool = True, + **kwargs, +) -> Tuple[pd.DataFrame, BaseMetadata]: + """Get instantaneous values data from NWIS and return it as a DataFrame. + + .. note:: + + If no start or end date are provided, only the most recent record + is returned. + + Parameters + ---------- + sites: string or list of strings, optional, default is None + If the waterdata parameter site_no is supplied, it will overwrite the + sites parameter + start: string, optional, default is None + If the waterdata parameter startDT is supplied, it will overwrite the + start parameter (YYYY-MM-DD) + end: string, optional, default is None + If the waterdata parameter endDT is supplied, it will overwrite the + end parameter (YYYY-MM-DD) + multi_index: bool, optional + If False, a dataframe with a single-level index (datetime) is returned, + default is True + ssl_check: bool, optional + If True, check SSL certificates, if False, do not check SSL, + default is True + **kwargs: optional + If supplied, will be used as query parameters + + Returns + ------- + df: ``pandas.DataFrame`` + Times series data from the NWIS JSON + md: :obj:`dataretrieval.utils.Metadata` + A custom metadata object + + Examples + -------- + .. doctest:: + + >>> # Get instantaneous discharge data for site 05114000 + >>> df, md = dataretrieval.nwis.get_iv( + ... sites="05114000", + ... start="2013-11-03", + ... end="2013-11-03", + ... parameterCd="00060", + ... ) + + """ + _check_sites_value_types(sites) + + kwargs["startDT"] = kwargs.pop("startDT", start) + kwargs["endDT"] = kwargs.pop("endDT", end) + kwargs["sites"] = kwargs.pop("sites", sites) + kwargs["multi_index"] = multi_index + + response = query_waterservices( + service="iv", format="json", ssl_check=ssl_check, **kwargs + ) + + df = _read_json(response.json()) + return format_response(df, **kwargs), NWIS_Metadata(response, **kwargs) + + +def get_pmcodes( + parameterCd: Union[str, List[str]] = "All", + partial: bool = True, + ssl_check: bool = True, +) -> Tuple[pd.DataFrame, BaseMetadata]: + """ + Return a ``pandas.DataFrame`` containing all NWIS parameter codes. + + Parameters + ---------- + parameterCd: string or list of strings, default is 'All' + Accepts parameter codes or names + partial: bool, optional + Default is True (partial querying). If False, the function will query + only exact matches, default is True + ssl_check: bool, optional + If True, check SSL certificates, if False, do not check SSL, + default is True + + Returns + ------- + df: ``pandas.DataFrame`` + Data retrieved from the NWIS web service. + md: :obj:`dataretrieval.utils.Metadata` + A custom metadata object + + Examples + -------- + .. doctest:: + + >>> # Get information about the '00060' pcode + >>> df, md = dataretrieval.nwis.get_pmcodes( + ... parameterCd="00060", partial=False + ... ) + + >>> # Get information about all 'Discharge' pcodes + >>> df, md = dataretrieval.nwis.get_pmcodes( + ... parameterCd="Discharge", partial=True + ... ) + + """ + + payload = {"fmt": "rdb"} + url = PARAMCODES_URL + + if isinstance(parameterCd, str): # when a single code or name is given + if parameterCd.lower() == "all": + payload.update({"group_cd": "%"}) + url = ALLPARAMCODES_URL + response = query(url, payload, ssl_check=ssl_check) + return _read_rdb(response.text), NWIS_Metadata(response) + + else: + parameterCd = [parameterCd] + + if not isinstance(parameterCd, list): + raise TypeError( + "Parameter information (code or name) must be type string or list" + ) + + # Querying with a list of parameters names, codes, or mixed + return_list = [] + for param in parameterCd: + if isinstance(param, str): + if partial: + param = f"%{param}%" + payload.update({"parm_nm_cd": param}) + response = query(url, payload, ssl_check=ssl_check) + if len(response.text.splitlines()) < 10: # empty query + raise TypeError( + "One of the parameter codes or names entered does not" + "return any information, please try a different value" + ) + return_list.append(_read_rdb(response.text)) + else: + raise TypeError("Parameter information (code or name) must be type string") + return pd.concat(return_list), NWIS_Metadata(response) + + +def get_water_use( + years: Union[str, List[str]] = "ALL", + state: Optional[str] = None, + counties: Union[str, List[str]] = "ALL", + categories: Union[str, List[str]] = "ALL", + ssl_check: bool = True, +) -> Tuple[pd.DataFrame, BaseMetadata]: + """ + Water use data retrieval from USGS (NWIS). + + Parameters + ---------- + years: string or list of strings + List or comma delimited string of years. Must be years ending in 0 or + 5, or "ALL", which retrieves all available years, default is "ALL" + state: string, optional, default is None + full name, abbreviation or id + counties: string or list of strings + County IDs from county lookup or "ALL", default is "ALL" + categories: string or list of strings + List or comma delimited string of Two-letter category abbreviations, + default is "ALL" + ssl_check: bool, optional + If True, check SSL certificates, if False, do not check SSL, + default is True + + Returns + ------- + df: ``pandas.DataFrame`` + Data from NWIS + md: :obj:`dataretrieval.utils.Metadata` + A custom metadata object + + Examples + -------- + .. doctest:: + + >>> # Get total population for RI from the NWIS water use service + >>> df, md = dataretrieval.nwis.get_water_use( + ... years="2000", state="RI", categories="TP" + ... ) + + >>> # Get the national total water use for livestock in Bgal/day + >>> df, md = dataretrieval.nwis.get_water_use(years="2010", categories="L") + + >>> # Get 2005 domestic water use for Apache County in Arizona + >>> df, md = dataretrieval.nwis.get_water_use( + ... years="2005", state="Arizona", counties="001", categories="DO" + ... ) + + """ + if years: + if not isinstance(years, list) and not isinstance(years, str): + raise TypeError("years must be a string or a list of strings") + + if counties: + if not isinstance(counties, list) and not isinstance(counties, str): + raise TypeError("counties must be a string or a list of strings") + + if categories: + if not isinstance(categories, list) and not isinstance(categories, str): + raise TypeError("categories must be a string or a list of strings") + + payload = { + "rdb_compression": "value", + "format": "rdb", + "wu_year": years, + "wu_category": categories, + "wu_county": counties, + } + url = WATERDATA_URL + "water_use" + if state is not None: + url = WATERDATA_BASE_URL + state + "/nwis/water_use" + payload.update({"wu_area": "county"}) + response = query(url, payload, ssl_check=ssl_check) + return _read_rdb(response.text), NWIS_Metadata(response) + + +def get_ratings( + site: Optional[str] = None, + file_type: str = "base", + ssl_check: bool = True, + **kwargs, +) -> Tuple[pd.DataFrame, BaseMetadata]: + """ + Rating table for an active USGS streamgage retrieval. + + Reads current rating table for an active USGS streamgage from NWISweb. + Data is retrieved from https://waterdata.usgs.gov/nwis. + + Parameters + ---------- + site: string, optional, default is None + USGS site number. This is usually an 8 digit number as a string. + If the nwis parameter site_no is supplied, it will overwrite the site + parameter + file_type: string, default is "base" + can be "base", "corr", or "exsa" + ssl_check: bool, optional + If True, check SSL certificates, if False, do not check SSL, + default is True + **kwargs: optional + If supplied, will be used as query parameters + + Return + ------ + df: ``pandas.DataFrame`` + Formatted requested data + md: :obj:`dataretrieval.utils.Metadata` + A custom metadata object + + Examples + -------- + .. doctest:: + + >>> # Get the rating table for USGS streamgage 01594440 + >>> df, md = dataretrieval.nwis.get_ratings(site="01594440") + + """ + site = kwargs.pop("site_no", site) + + payload = {} + url = WATERDATA_BASE_URL + "nwisweb/get_ratings/" + if site is not None: + payload.update({"site_no": site}) + if file_type is not None: + if file_type not in ["base", "corr", "exsa"]: + raise ValueError( + f'Unrecognized file_type: {file_type}, must be "base", "corr" or "exsa"' + ) + payload.update({"file_type": file_type}) + response = query(url, payload, ssl_check=ssl_check) + return _read_rdb(response.text), NWIS_Metadata(response, site_no=site) + + +def what_sites(ssl_check: bool = True, **kwargs) -> Tuple[pd.DataFrame, BaseMetadata]: + """ + Search NWIS for sites within a region with specific data. + + Parameters + ---------- + ssl_check: bool, optional + If True, check SSL certificates, if False, do not check SSL, + default is True + **kwargs: optional + Accepts the same parameters as :obj:`dataretrieval.nwis.get_info` + + Return + ------ + df: ``pandas.DataFrame`` + Formatted requested data + md: :obj:`dataretrieval.utils.Metadata` + A custom metadata object + + Examples + -------- + .. doctest:: + + >>> # get information about a single site + >>> df, md = dataretrieval.nwis.what_sites(sites="05114000") + + >>> # get information about sites with phosphorus in Ohio + >>> df, md = dataretrieval.nwis.what_sites( + ... stateCd="OH", parameterCd="00665" + ... ) + + """ + + response = query_waterservices(service="site", ssl_check=ssl_check, **kwargs) + + df = _read_rdb(response.text) + + return df, NWIS_Metadata(response, **kwargs) + + +def get_record( + sites: Optional[Union[List[str], str]] = None, + start: Optional[str] = None, + end: Optional[str] = None, + multi_index: bool = True, + wide_format: bool = True, + datetime_index: bool = True, + state: Optional[str] = None, + service: str = "iv", + ssl_check: bool = True, + **kwargs, +) -> pd.DataFrame: + """ + Get data from NWIS and return it as a ``pandas.DataFrame``. + + .. note:: + + If no start or end date are provided, only the most recent record is + returned. + + Parameters + ---------- + sites: string or list of strings, optional, default is None + List or comma delimited string of site. + start: string, optional, default is None + Starting date of record (YYYY-MM-DD) + end: string, optional, default is None + Ending date of record. (YYYY-MM-DD) + multi_index: bool, optional + If False, a dataframe with a single-level index (datetime) is returned, + default is True + wide_format : bool, optional + If True, return data in wide format with multiple samples per row and + one row per time, default is True + datetime_index : bool, optional + If True, create a datetime index. default is True + state: string, optional, default is None + full name, abbreviation or id + service: string, default is 'iv' + - 'iv' : instantaneous data + - 'dv' : daily mean data + - 'qwdata' : discrete samples + - 'site' : site description + - 'measurements' : discharge measurements + - 'peaks': discharge peaks + - 'gwlevels': groundwater levels + - 'pmcodes': get parameter codes + - 'water_use': get water use data + - 'ratings': get rating table + - 'stat': get statistics + ssl_check: bool, optional + If True, check SSL certificates, if False, do not check SSL, + default is True + **kwargs: optional + If supplied, will be used as query parameters + + Returns + ------- + ``pandas.DataFrame`` containing requested data + + Examples + -------- + .. doctest:: + + >>> # Get latest instantaneous data from site 01585200 + >>> df = dataretrieval.nwis.get_record(sites="01585200", service="iv") + + >>> # Get latest daily mean data from site 01585200 + >>> df = dataretrieval.nwis.get_record(sites="01585200", service="dv") + + >>> # Get all discrete sample data from site 01585200 + >>> df = dataretrieval.nwis.get_record(sites="01585200", service="qwdata") + + >>> # Get site description for site 01585200 + >>> df = dataretrieval.nwis.get_record(sites="01585200", service="site") + + >>> # Get discharge measurements for site 01585200 + >>> df = dataretrieval.nwis.get_record( + ... sites="01585200", service="measurements" + ... ) + + >>> # Get discharge peaks for site 01585200 + >>> df = dataretrieval.nwis.get_record(sites="01585200", service="peaks") + + >>> # Get latest groundwater level for site 434400121275801 + >>> df = dataretrieval.nwis.get_record( + ... sites="434400121275801", service="gwlevels" + ... ) + + >>> # Get information about the discharge parameter code + >>> df = dataretrieval.nwis.get_record( + ... service="pmcodes", parameterCd="00060" + ... ) + + >>> # Get water use data for livestock nationally in 2010 + >>> df = dataretrieval.nwis.get_record( + ... service="water_use", years="2010", categories="L" + ... ) + + >>> # Get rating table for USGS streamgage 01585200 + >>> df = dataretrieval.nwis.get_record(sites="01585200", service="ratings") + + >>> # Get annual statistics for USGS station 01646500 + >>> df = dataretrieval.nwis.get_record( + ... sites="01646500", + ... service="stat", + ... statReportType="annual", + ... statYearType="water", + ... ) + + """ + _check_sites_value_types(sites) + + if service not in WATERSERVICES_SERVICES + WATERDATA_SERVICES: + raise TypeError(f"Unrecognized service: {service}") + + if service == "iv": + df, _ = get_iv( + sites=sites, + startDT=start, + endDT=end, + multi_index=multi_index, + ssl_check=ssl_check, + **kwargs, + ) + return df + + elif service == "dv": + df, _ = get_dv( + sites=sites, + startDT=start, + endDT=end, + multi_index=multi_index, + ssl_check=ssl_check, + **kwargs, + ) + return df + + elif service == "qwdata": + df, _ = get_qwdata( + site_no=sites, + begin_date=start, + end_date=end, + multi_index=multi_index, + wide_format=wide_format, + ssl_check=ssl_check, + **kwargs, + ) + return df + + elif service == "site": + df, _ = get_info(sites=sites, ssl_check=ssl_check, **kwargs) + return df + + elif service == "measurements": + df, _ = get_discharge_measurements( + site_no=sites, begin_date=start, end_date=end, ssl_check=ssl_check, **kwargs + ) + return df + + elif service == "peaks": + df, _ = get_discharge_peaks( + site_no=sites, + begin_date=start, + end_date=end, + multi_index=multi_index, + ssl_check=ssl_check, + **kwargs, + ) + return df + + elif service == "gwlevels": + df, _ = get_gwlevels( + sites=sites, + startDT=start, + endDT=end, + multi_index=multi_index, + datetime_index=datetime_index, + ssl_check=ssl_check, + **kwargs, + ) + return df + + elif service == "pmcodes": + df, _ = get_pmcodes(ssl_check=ssl_check, **kwargs) + return df + + elif service == "water_use": + df, _ = get_water_use(state=state, ssl_check=ssl_check, **kwargs) + return df + + elif service == "ratings": + df, _ = get_ratings(site=sites, ssl_check=ssl_check, **kwargs) + return df + + elif service == "stat": + df, _ = get_stats(sites=sites, ssl_check=ssl_check, **kwargs) + return df + + else: + raise TypeError(f"{service} service not yet implemented") + + +def _read_json(json): + """ + Reads a NWIS Water Services formatted JSON into a ``pandas.DataFrame``. + + Parameters + ---------- + json: dict + A JSON dictionary response to be parsed into a ``pandas.DataFrame`` + + Returns + ------- + df: ``pandas.DataFrame`` + Times series data from the NWIS JSON + md: :obj:`dataretrieval.utils.Metadata` + A custom metadata object + + """ + merged_df = pd.DataFrame(columns=["site_no", "datetime"]) + + site_list = [ + ts["sourceInfo"]["siteCode"][0]["value"] for ts in json["value"]["timeSeries"] + ] + + # create a list of indexes for each change in site no + # for example, [0, 21, 22] would be the first and last indeces + index_list = [0] + index_list.extend( + [i + 1 for i, (a, b) in enumerate(zip(site_list[:-1], site_list[1:])) if a != b] + ) + index_list.append(len(site_list)) + + for i in range(len(index_list) - 1): + start = index_list[i] # [0] + end = index_list[i + 1] # [21] + + # grab a block containing timeseries 0:21, + # which are all from the same site + site_block = json["value"]["timeSeries"][start:end] + if not site_block: + continue + + site_no = site_block[0]["sourceInfo"]["siteCode"][0]["value"] + site_df = pd.DataFrame(columns=["datetime"]) + + for timeseries in site_block: + param_cd = timeseries["variable"]["variableCode"][0]["value"] + # check whether min, max, mean record XXX + option = timeseries["variable"]["options"]["option"][0].get("value") + + # loop through each parameter in timeseries, then concat to the merged_df + for parameter in timeseries["values"]: + col_name = param_cd + method = parameter["method"][0]["methodDescription"] + + # if len(timeseries['values']) > 1 and method: + if method: + # get method, format it, and append to column name + method = method.strip("[]()").lower() + col_name = f"{col_name}_{method}" + + if option: + col_name = f"{col_name}_{option}" + + record_json = parameter["value"] + + if not record_json: + # no data in record + continue + # should be able to avoid this by dumping + record_json = str(record_json).replace("'", '"') + + # read json, converting all values to float64 and all qualifiers + # Lists can't be hashed, thus we cannot df.merge on a list column + record_df = pd.read_json( + StringIO(record_json), + orient="records", + dtype={"value": "float64", "qualifiers": "unicode"}, + convert_dates=False, + ) + + record_df["qualifiers"] = ( + record_df["qualifiers"].str.strip("[]").str.replace("'", "") + ) + + record_df.rename( + columns={ + "value": col_name, + "dateTime": "datetime", + "qualifiers": col_name + "_cd", + }, + inplace=True, + ) + + site_df = site_df.merge(record_df, how="outer", on="datetime") + + # end of site loop + site_df["site_no"] = site_no + merged_df = pd.concat([merged_df, site_df]) + + # convert to datetime, normalizing the timezone to UTC when doing so + if "datetime" in merged_df.columns: + merged_df["datetime"] = pd.to_datetime(merged_df["datetime"], utc=True) + + return merged_df + + +def _read_rdb(rdb): + """ + Convert NWIS rdb table into a ``pandas.dataframe``. + + Parameters + ---------- + rdb: string + A string representation of an rdb table + + Returns + ------- + df: ``pandas.dataframe`` + A formatted pandas data frame + + """ + count = 0 + + for line in rdb.splitlines(): + # ignore comment lines + if line.startswith("#"): + count = count + 1 + + else: + break + + fields = re.split("[\t]", rdb.splitlines()[count]) + fields = [field.replace(",", "") for field in fields] + dtypes = { + "site_no": str, + "dec_long_va": float, + "dec_lat_va": float, + "parm_cd": str, + "parameter_cd": str, + } + + df = pd.read_csv( + StringIO(rdb), + delimiter="\t", + skiprows=count + 2, + names=fields, + na_values="NaN", + dtype=dtypes, + ) + + df = format_response(df) + return df + + +def _check_sites_value_types(sites): + if sites: + if not isinstance(sites, list) and not isinstance(sites, str): + raise TypeError("sites must be a string or a list of strings") + + +class NWIS_Metadata(BaseMetadata): + """Metadata class for NWIS service, derived from BaseMetadata. + + Attributes + ---------- + url : str + Response url + query_time: datetme.timedelta + Response elapsed time + header: requests.structures.CaseInsensitiveDict + Response headers + comments: str | None + Metadata comments, if any + site_info: tuple[pd.DataFrame, NWIS_Metadata] | None + Site information if the query included `site_no`, `sites`, `stateCd`, + `huc`, `countyCd` or `bBox`. `site_no` is preferred over `sites` if + both are present. + variable_info: tuple[pd.DataFrame, NWIS_Metadata] | None + Variable information if the query included `parameterCd`. + + """ + + def __init__(self, response, **parameters) -> None: + """Generates a standard set of metadata informed by the response with specific + metadata for NWIS data. + + Parameters + ---------- + response: Response + Response object from requests module + parameters: unpacked dictionary + Unpacked dictionary of the parameters supplied in the request + + Returns + ------- + md: :obj:`dataretrieval.nwis.NWIS_Metadata` + A ``dataretrieval`` custom :obj:`dataretrieval.nwis.NWIS_Metadata` object. + + """ + super().__init__(response) + + comments = "" + for line in response.text.splitlines(): + if line.startswith("#"): + comments += line.lstrip("#") + "\n" + if comments: + self.comment = comments + + self._parameters = parameters + + @property + def site_info(self) -> Optional[Tuple[pd.DataFrame, BaseMetadata]]: + """ + Return + ------ + df: ``pandas.DataFrame`` + Formatted requested data from calling `nwis.what_sites` + md: :obj:`dataretrieval.nwis.NWIS_Metadata` + A NWIS_Metadata object + """ + if "site_no" in self._parameters: + return what_sites(sites=self._parameters["site_no"]) + + elif "sites" in self._parameters: + return what_sites(sites=self._parameters["sites"]) + + elif "stateCd" in self._parameters: + return what_sites(stateCd=self._parameters["stateCd"]) + + elif "huc" in self._parameters: + return what_sites(huc=self._parameters["huc"]) + + elif "countyCd" in self._parameters: + return what_sites(countyCd=self._parameters["countyCd"]) + + elif "bBox" in self._parameters: + return what_sites(bBox=self._parameters["bBox"]) + + else: + return None # don't set metadata site_info attribute + + @property + def variable_info(self) -> Optional[Tuple[pd.DataFrame, BaseMetadata]]: + # define variable_info metadata based on parameterCd if available + if "parameterCd" in self._parameters: + return get_pmcodes(parameterCd=self._parameters["parameterCd"]) diff --git a/plugins/dataretrieval/utils.py b/plugins/dataretrieval/utils.py new file mode 100644 index 0000000..53e95ac --- /dev/null +++ b/plugins/dataretrieval/utils.py @@ -0,0 +1,234 @@ +""" +Useful utilities for data munging. +""" + +import warnings + +import pandas as pd +import requests + +import dataretrieval +from dataretrieval.codes import tz + + +def to_str(listlike, delimiter=","): + """Translates list-like objects into strings. + + Parameters + ---------- + listlike: list-like object + An object that is a list, or list-like + (e.g., ``pandas.core.series.Series``) + delimiter: string, optional + The delimiter that is placed between entries in listlike when it is + turned into a string. Default value is a comma. + + Returns + ------- + listlike: string + The listlike object as string separated by the delimiter + + Examples + -------- + .. doctest:: + + >>> dataretrieval.utils.to_str([1, "a", 2]) + '1,a,2' + + >>> dataretrieval.utils.to_str([0, 10, 42], delimiter="+") + '0+10+42' + + """ + if type(listlike) == list: + return delimiter.join([str(x) for x in listlike]) + + elif type(listlike) == pd.core.series.Series: + return delimiter.join(listlike.tolist()) + + elif type(listlike) == pd.core.indexes.base.Index: + return delimiter.join(listlike.tolist()) + + elif type(listlike) == str: + return listlike + + +def format_datetime(df, date_field, time_field, tz_field): + """Creates a datetime field from separate date, time, and + time zone fields. + + Assumes ISO 8601. + + Parameters + ---------- + df: ``pandas.DataFrame`` + A data frame containing date, time, and timezone fields. + date_field: string + Name of date column in df. + time_field: string + Name of time column in df. + tz_field: string + Name of time zone column in df. + + Returns + ------- + df: ``pandas.DataFrame`` + The data frame with a formatted 'datetime' column + + """ + # create a datetime index from the columns in qwdata response + df[tz_field] = df[tz_field].map(tz) + + df["datetime"] = pd.to_datetime( + df[date_field] + " " + df[time_field] + " " + df[tz_field], + format="ISO8601", + utc=True, + ) + + # if there are any incomplete dates, warn the user + if df["datetime"].isna().any(): + count = df["datetime"].isna().sum() + warnings.warn( + f"Warning: {count} incomplete dates found, " + + "consider setting datetime_index to False.", + UserWarning, + ) + + return df + + +class BaseMetadata: + """Base class for metadata. + + Attributes + ---------- + url : str + Response url + query_time: datetme.timedelta + Response elapsed time + header: requests.structures.CaseInsensitiveDict + Response headers + + """ + + def __init__(self, response) -> None: + """Generates a standard set of metadata informed by the response. + + Parameters + ---------- + response: Response + Response object from requests module + + Returns + ------- + md: :obj:`dataretrieval.utils.BaseMetadata` + A ``dataretrieval`` custom :obj:`dataretrieval.utils.BaseMetadata` object. + + """ + + # These are built from the API response + self.url = response.url + self.query_time = response.elapsed + self.header = response.headers + self.comment = None + + # # not sure what statistic_info is + # self.statistic_info = None + + # # disclaimer seems to be only part of importWaterML1 + # self.disclaimer = None + + # These properties are to be set by `nwis` or `wqp`-specific metadata classes. + @property + def site_info(self): + raise NotImplementedError( + "site_info must be implemented by utils.BaseMetadata children" + ) + + @property + def variable_info(self): + raise NotImplementedError( + "variable_info must be implemented by utils.BaseMetadata children" + ) + + def __repr__(self) -> str: + return f"{type(self).__name__}(url={self.url})" + + +def query(url, payload, delimiter=",", ssl_check=True): + """Send a query. + + Wrapper for requests.get that handles errors, converts listed + query parameters to comma separated strings, and returns response. + + Parameters + ---------- + url: string + URL to query + payload: dict + query parameters passed to ``requests.get`` + delimiter: string + delimiter to use with lists + ssl_check: bool + If True, check SSL certificates, if False, do not check SSL, + default is True + + Returns + ------- + string: query response + The response from the API query ``requests.get`` function call. + """ + + for key, value in payload.items(): + payload[key] = to_str(value, delimiter) + # for index in range(len(payload)): + # key, value = payload[index] + # payload[index] = (key, to_str(value)) + + # define the user agent for the query + user_agent = {"user-agent": f"python-dataretrieval/{dataretrieval.__version__}"} + + response = requests.get(url, params=payload, headers=user_agent, verify=ssl_check) + + if response.status_code == 400: + raise ValueError( + f"Bad Request, check that your parameters are correct. URL: {response.url}" + ) + elif response.status_code == 404: + raise ValueError( + "Page Not Found Error. May be the result of an empty query. " + + f"URL: {response.url}" + ) + elif response.status_code == 414: + _reason = response.reason + _example = """ + # n is the number of chunks to divide the query into \n + split_list = np.array_split(site_list, n) + data_list = [] # list to store chunk results in \n + # loop through chunks and make requests \n + for site_list in split_list: \n + data = nwis.get_record(sites=site_list, service='dv', \n + start=start, end=end) \n + data_list.append(data) # append results to list""" + raise ValueError( + "Request URL too long. Modify your query to use fewer sites. " + + f"API response reason: {_reason}. Pseudo-code example of how to " + + f"split your query: \n {_example}" + ) + + if response.text.startswith("No sites/data"): + raise NoSitesError(response.url) + + return response + + +class NoSitesError(Exception): + """Custom error class used when selection criteria returns no sites/data.""" + + def __init__(self, url): + self.url = url + + def __str__(self): + return ( + "No sites/data found using the selection criteria specified in url: " + "{url}" + ).format(url=self.url) diff --git a/tests-Dockerfile b/tests-Dockerfile index 93b1c4a..9d07531 100644 --- a/tests-Dockerfile +++ b/tests-Dockerfile @@ -6,8 +6,11 @@ ARG AIRFLOW_HOME=/opt/airflow ENV PYTEST_ADDOPTS="--color=yes" ENV AIRFLOW_HOME=${AIRFLOW_HOME} +# Fix local build issue +RUN echo "Acquire::http::Pipeline-Depth 0;\nAcquire::http::No-Cache true;\nAcquire::BrokenProxy true;" > /etc/apt/apt.conf.d/99fixbadproxy + RUN apt-get update -y && apt-get install -y \ - python3-pip \ + python3-pip libxmlsec1-dev \ && rm -rf /var/lib/apt/lists/* # Install Pip Requirements diff --git a/tests/requirements.txt b/tests/requirements.txt index 70c6867..074afe8 100644 --- a/tests/requirements.txt +++ b/tests/requirements.txt @@ -1,7 +1,7 @@ apache-airflow~=2.10.4 -apache-airflow-providers-amazon~=9.2.0 +apache-airflow-providers-amazon~=9.4.0 black -pytest~=8.3.4 +pytest~=8.3.5 #pendulum~=2.1.2 # Pin to older version for compatibility beautifulsoup4 pandas \ No newline at end of file