diff --git a/HARP-2024-2025/access-db.R b/HARP-2024-2025/access-db.R deleted file mode 100644 index 05a20cf5..00000000 --- a/HARP-2024-2025/access-db.R +++ /dev/null @@ -1,167 +0,0 @@ -library("RPostgres") -library("sqldf") -library("dataRetrieval") - -# works with only Rpostgres. This is the connection string to the database. -# Using this allows us to directly query the database from R, rather than -# needing to save results via SQL and send off text files with results. These -# connection strings are similar for all databases and rely on the permissions -# within the db -con <- dbConnect( - RPostgres::Postgres(), - dbname = "drupal.dh03", - host = "deq1.bse.vt.edu", - port = 5431, - user = readline("DB User Name: "), - password = getPass::getPass("REST Password: ") - ) - -#A function to get mean meteorology data for a given VA Hydro feature's geometry -#extent. In other words, this function can pull down meteorology data from our -#database (which covers the entire chesapeake Bay watershed), clip it to a given -#feature (like the drainage area of a gage or a parituclar watershed), and get -#the mean precipitation per day -get_feature_met <- function(con, - #The hydro code of the VA Hydro feature to clip the - #dataset to (the hydrocode is often from the base - #database and is not generated in VAHydro, whereas - #the feature id is the unique VA Hydro auto - #generated index number) - hydrocode, - #The variable key from VA Hydro for the met data of interest - varkey, - #The band of the raster. Important for NLDAS, PRISM - #and dyamet only have 1 band - band = 1, - #The feature associated with the meteorology data - #records. For precip, this is the cbp6_met_coverage - #generally (the CB p6 watershed) - mcovcode = 'cbp6_met_coverage') { - q = paste0( - "select met.featureid, to_timestamp(met.tsendtime) as obs_date, - extract(month from to_timestamp(met.tsendtime)) as mo, - --Note the syntax used to first clip the raster to the requested feature - --prior to summarizing, and note that we call mean as if it is a method of - --the object of ST_summarystats - (ST_summarystats(st_clip(met.rast, fgeo.dh_geofield_geom), ", - band, ", TRUE)).mean as precip_mm, - 0.0393701 * (ST_summarystats(st_clip(met.rast, fgeo.dh_geofield_geom), ", - band, ", TRUE)).mean as precip_in - from dh_feature as f - left outer join field_data_dh_geofield as fgeo - on ( - fgeo.entity_id = f.hydroid - and fgeo.entity_type = 'dh_feature' - ) - left outer join dh_variabledefinition as v - on (v.varkey = '", - varkey, "') - left outer join dh_feature as mcov - on ( - mcov.hydrocode = '", mcovcode, "' - ) - left outer join dh_timeseries_weather as met - on ( mcov.hydroid = met.featureid and met.varid = v.hydroid) - where f.hydrocode = '", hydrocode, "' - order by met.tsendtime -" - ) - dat <- sqldf(q, connection = con) - return(dat) -} - -# Inventory i.e show the number of records of a given meteorology data set per -# year and show the minimum and maximum dates tied to that data. We should -# expect to see 366 days on leap years and 365 days on non-leap years EXCEPT for -# daymet, which will ALWAYS have 365 days with 12-31 missing on leap years -varkey <- "nldas2_obs_hourly" -inv_met <- sqldf(paste0("select extract(year from to_timestamp(met.tsendtime)) as year, -min(to_timestamp(met.tsendtime)) as start_date, -max(to_timestamp(met.tsendtime)) as end_date, -count(*) -from ( - select met.tsendtime - from dh_feature as mcov - left outer join dh_variabledefinition as v - on ( - v.varkey = '",varkey,"' - ) - left outer join dh_timeseries_weather as met - on ( - mcov.hydroid = met.featureid and met.varid = v.hydroid - and met.entity_type = 'dh_feature' - ) - where mcov.hydrocode = 'cbp6_met_coverage' - and met.rast is not null - group by met.tsendtime -) as met -group by extract(year from to_timestamp(met.tsendtime)) -order by extract(year from to_timestamp(met.tsendtime))"), connection = con) - -# get all the tables from connection -dbListTables(con) - -#Set a USGS gage ID -gageid = '01634000' -#Format the ID to match the hydrocode used in the VA Hydro database -hydrocode = paste0('usgs_ws_', gageid) -#Get PRISM data, specified via the varkey argument in get_feature_met, and clip -#to the gage drainage area, specified via the hydrocode. Get the mean precip in -#the watershed for each day -prism_data <- get_feature_met(con, hydrocode, 'prism_mod_daily') - -#Add more details about the date as needed. Note that format relies on a date -#defined via https://pubs.opengroup.org/onlinepubs/009695399/utilities/date.html -prism_data[,c('yr', 'mo', 'da')] <- cbind(format(prism_data$obs_date,"%Y"), - format(prism_data$obs_date,"%m"), - format(prism_data$obs_date,"%d")) -#Repeat the process, this time pulling data from the daymet data source -daymet_data <- get_feature_met(con, hydrocode, 'daymet_mod_daily') -daymet_data[,c('yr', 'mo', 'da')] <- cbind(format(daymet_data$obs_date,"%Y"), - format(daymet_data$obs_date,"%m"), - format(daymet_data$obs_date,"%d")) -# Get USGS gage data from NWIS, the USGS REST services. We pull this data using -# the dataRetrieval R package which has extensive documentation on the options -# therewithin. See https://waterdata.usgs.gov/blog/dataretrieval/. Below, we -# a cached version of the function defined in our config.R file: -gage_info <- memo_readNWISsite(gageid) -#Extract the drainage area of the gage -da <- gage_info$drain_area_va -#Get daily, mean streamflow (pcode 000060, see dataRetrieval::parameterCdFile) -#from the gage specified in gageid -usgs_data <- memo_readNWISdv(gageid,'00060') -#As above, add more details about the dates -usgs_data[,c('yr', 'mo', 'da')] <- cbind(format(usgs_data$Date,"%Y"), - format(usgs_data$Date,"%m"), - format(usgs_data$Date,"%d")) - -#Join all data together. In other words, extract the relevant fields from the -#three data sources and combine them based on the day, month, and year. We could -#do this by joining by the dates, but you may notice that the USGS daily data -#may not come in as POSIX and doesn't have a time associated with it. -comp_data <- sqldf( - "select a.obs_date, a.precip_in as prism_p_in, - b.precip_in as daymet_p_in, c.X_00060_00003 as usgs_cfs - from prism_data as a - left outer join daymet_data as b - on ( - a.yr = b.yr - and a.mo = b.mo - and a.da = b.da - ) - left outer join usgs_data as c - on ( - a.yr = c.yr - and a.mo = c.mo - and a.da = c.da - ) - " -) -#If you are familiar with tidyverse verbiage, you may wonder why we use SQLDF -#here instead of a few dplyr::left_join()s and dplyr::select(). The reason is -#fairly simple. Our team is more well-versed in SQL than in tidyverse and -#tidyverse has a tendency to change these verbs slightly over time. Most of the -#time, this is not an issue. But, some of the more obscure commands often get -#depreceated. Look at the history of dplyr::across() as an example. Even now, -#the joins were changed just a few versions ago and will soon block certain code -#from executing without directly specifying all inputs! diff --git a/HARP-2024-2025/access-file.R b/HARP-2024-2025/access-file.R deleted file mode 100644 index 11d1662d..00000000 --- a/HARP-2024-2025/access-file.R +++ /dev/null @@ -1,418 +0,0 @@ -# Test datasets CSV -library("sqldf") -library("dataRetrieval") -library("lubridate") - -#Set the id for the USGS gage from which we will pull data and use to summarize -#our meteorology data. Currently, we have data set up for: -#Culpepper 01667500, Strasburg 01634000, Ruckersville 01665500 -gageid = '01665500' -# We redefine the gage id as a "hydrocode", which is how it is stored in the VA -#Hydro (DEQ) database -hydrocode = paste0('usgs_ws_', gageid) -#Pull data from VA Hydro, grabbing some precip data for the gage watershed -prism_data <- read.csv(paste0("http://deq1.bse.vt.edu:81/files/met/", hydrocode, "-prism-all.csv")) -#Use lubridate to add on some additional information about the date. These dates -#come in as posix and thus have timestamps, which make them a little trickier to -#work with. So, we can disaggregate them for easier joins and manipulation -prism_data[,c('yr', 'mo', 'da', 'wk')] <- cbind(year(as.Date(prism_data$obs_date)), - month(as.Date(prism_data$obs_date)), - day(as.Date(prism_data$obs_date)), - week(as.Date(prism_data$obs_date))) - -#Repeat the download and date disaggregation for daymet, a different precip -#source: -daymet_data <- read.csv(paste0("http://deq1.bse.vt.edu:81/files/met/", hydrocode, "-daymet-all.csv")) -daymet_data[,c('yr', 'mo', 'da', 'wk')] <- cbind(year(as.Date(daymet_data$obs_date)), - month(as.Date(daymet_data$obs_date)), - day(as.Date(daymet_data$obs_date)), - week(as.Date(daymet_data$obs_date))) -#Repeat the download and date disaggregation for NLDAS2, an HOURLY precip source -nldas2_data <- read.csv(paste0("http://deq1.bse.vt.edu:81/files/met/", hydrocode, "-nldas2-all.csv")) -nldas2_data[,c('yr', 'mo', 'da', 'wk')] <- cbind(year(as.Date(nldas2_data$obs_date)), - month(as.Date(nldas2_data$obs_date)), - day(as.Date(nldas2_data$obs_date)), - week(as.Date(nldas2_data$obs_date))) -#NLDAS is an hourly precip data set. We should aggregate it to be daily sums to -#match the data in PRISM and daymet for the purpose of comparison. We can do -#this in SQL by grouping by the the date columns we created for month, day, and -#year -nldas2_data <- sqldf( - "select featureid, min(obs_date) as obs_date, yr, mo, da, - sum(precip_mm) as precip_mm, sum(precip_in) as precip_in - from nldas2_data - group by yr, mo, da - order by yr, mo, da - " -) - - -# Get USGS gage data from NWIS, the USGS REST services. We pull this data using -# the dataRetrieval R package which has extensive documentation on the options -# therewithin. See https://waterdata.usgs.gov/blog/dataretrieval/. Below, we -# a cached version of the function defined in our config.R file: -gage_info <- memo_readNWISsite(gageid) -#Extract the drainage area of the gage -da <- gage_info$drain_area_va -#Get daily, mean streamflow (pcode 000060, see dataRetrieval::parameterCdFile) -#from the gage specified in gageid -usgs_data <- memo_readNWISdv(gageid,'00060') -#Extract date information from the gage using lubridate as above -usgs_data[,c('yr', 'mo', 'da')] <- cbind(year(as.Date(usgs_data$Date)), - month(as.Date(usgs_data$Date)), - day(as.Date(usgs_data$Date))) - -#Join all data together. In other words, extract the relevant fields from the -#three data sources and combine them based on the day, month, and year. We could -#do this by joining by the dates, but you may notice that the USGS daily data -#may not come in as POSIX and doesn't have a time associated with it. We could -#do this via tidyverse verbs, but our team typically prefers SQL as it tends to -#change a lot less frequently i.e. will require less long-term maintenance -comp_data <- sqldf( - "select a.obs_date, a.precip_in as prism_p_in, - a.yr, a.mo, a.da, a.wk, - b.precip_in as daymet_p_in, d.precip_in as nldas2_p_in, - c.X_00060_00003 as usgs_cfs - from prism_data as a - left outer join daymet_data as b - on ( - a.yr = b.yr - and a.mo = b.mo - and a.da = b.da - ) - left outer join usgs_data as c - on ( - a.yr = c.yr - and a.mo = c.mo - and a.da = c.da - ) - left outer join nldas2_data as d - on ( - a.yr = d.yr - and a.mo = d.mo - and a.da = d.da - ) - order by a.yr, a.mo, a.da - " -) - -# Index the dataset, giving a unique number to each row. Please note that these -# are already available via rownames(comp_data) but here we add it as a column -# to call them in SQLDF. You can see how SQL changes our R workflows. As the -# project moves forward, feel free to branch out and try other workflows but -# please note we will often encourage the use of SQL as we assist in tasks. But -# all code is acceptable if it is documented and works! -comp_data$dataset_day <- index(comp_data) - -#Add a column for the date since this is now daily data -comp_data$date <- as.Date(comp_data$obs_date) - -#Here, we add in columns to represent tomorrow's flow and the change in flow -#from today to yesterday and today and tomorrow. This give an indication of how -#flow is changing. We can do so using R's indexing, simply calling the next rows -#down where needed -# comp_data$nextday_usgs_cfs <- c(comp_data$usgs_cfs[2:nrow(comp_data)],NA) -# comp_data$nextday_d_cfs <- comp_data$nextday_usgs_cfs - comp_data$usgs_cfs -# comp_data$today_d_cfs <- comp_data$usgs_cfs - c(NA,comp_data$usgs_cfs[1:(nrow(comp_data) - 1)]) - -#We could alternatively get this via an SQL query that runs just about as -#quickly! Note the use of the Lag() function and its inputs in the subquery used -#to pull in yesterdays and todays data -comp_data <- sqldf( - "select b.*, - b.nextday_usgs_cfs - b.usgs_cfs as nextday_d_cfs, - b.usgs_cfs - b.yesterday_usgs_cfs as today_d_cfs - FROM ( - select - a.*, - Lag(a.usgs_cfs,1) OVER(ORDER BY a.dataset_day) as yesterday_usgs_cfs, - Lag(a.usgs_cfs,-1) OVER(ORDER BY a.dataset_day) as nextday_usgs_cfs - from comp_data as a - order by a.dataset_day - ) as b - " -) - -# Add a cfs Precip column using the following conversions for EACH of the precip -# data sources: -# acre-feet/day = 3.07 MGD -# cfs = 1.57 MGD -# DAsqmi * 640 ac/sqmi * p_in_per_day / 12.0 = Precip ac-ft/day -# P acft/day / 3.07 = P mgd -# P mgd * 1.572 = P cfs -# 1.572 * (DAsqmi * 640.0 * p_in / 12.0) / 3.07 -comp_data$prism_p_cfs <- 1.572 * (da * 640.0 * comp_data$prism_p_in / 12.0) / 3.07 -comp_data$daymet_p_cfs <- 1.572 * (da * 640.0 * comp_data$daymet_p_in / 12.0) / 3.07 -comp_data$nldas2_p_cfs <- 1.572 * (da * 640.0 * comp_data$nldas2_p_in / 12.0) / 3.07 - -#We have a lot of daily data in comp_data, but we suspect to see more noteable -#trends in weekly data compared to daily data given travel time and -#abstraction/storage considerations. So, we can summarize our data by grouping -#it via the week variable we added in earlier using lubridate::week. -#(again, you may have done this previously via dplyr::group_by and summarize) -week_data <- sqldf( - "select min(obs_date) as week_begin, yr, wk, min(dataset_day) as dataset_day_begin, - avg(daymet_p_in) as daymet_p_in, avg(daymet_p_cfs) as daymet_p_cfs, - avg(prism_p_in) as prism_p_in, avg(prism_p_cfs) as prism_p_cfs, - avg(nldas2_p_in) as nldas2_p_in, avg(nldas2_p_cfs) as nldas2_p_cfs, - avg(usgs_cfs) as usgs_cfs, avg(today_d_cfs) as today_d_cfs, - avg(nextday_d_cfs) as nextday_d_cfs - from comp_data - group by yr, wk - order by yr, wk - " -) -#Base the month column off of the min day oindex for that week, or the day that -#week began -week_data$mo <- month(week_data$week_begin) - - -#Why does daymet generally procude worse relationships? Isn't it similar to -#PRISM? What is the relationship between PRISM and daymet as we expected these -#to be relatively similar -comparePrecip <- lm(comp_data$daymet_p_cfs ~ comp_data$prism_p_cfs) -summary(comparePrecip)$adj.r.squared -plot(comp_data$daymet_p_cfs ~ comp_data$prism_p_cfs) -#What if we offset the PRSIM data by one day? -compareNextDayPRISM <- lm(comp_data$daymet_p_cfs[1:(nrow(comp_data) - 1)] ~ - comp_data$prism_p_cfs[2:nrow(comp_data)]) -#Much improved. More like we expected, seems like daymet is offset. -summary(compareNextDayPRISM)$adj.r.squared -plot(comp_data$daymet_p_cfs[1:(nrow(comp_data) - 1)] ~ - comp_data$prism_p_cfs[2:nrow(comp_data)]) - -#Add a column with the offset daymet data -comp_data$daymet_p_cfs_nextDay <- c(NA,comp_data$daymet_p_cfs[1:(nrow(comp_data) - 1)]) - -#Create a simple linear regression of flow and precip with flow as the dependent -#variable to test if there is a clear and easy relationship between flow and -#precip. Using the formula notation "~" we can specify the regression as -#dependent ~ independent variable as long as the data option is specified in the -#function -mod_prism <- lm(usgs_cfs ~ prism_p_cfs, data = comp_data) -#Look at the regression paramters. Identify how well the regression performed -#(spoiler alert, it performed poorly, which makes sense given time of travel and -#abstraction/storage. We wouldn't want it to be easy!) -summary(mod_prism) - -#Repeated for daymet, running the linear regression between the USGS data and -#daymet -mod_daymet <- lm(usgs_cfs ~ daymet_p_cfs, data = comp_data) -#Evaluate the performance of the regression -summary(mod_daymet) - -# Weekly cfs vs P. Above, we created linear regressions for flow and -# precipitations. Now we repeat this process to see if weekly flow/precip is -# correlated more strongly. We'd suspect this to be a bit more reliably related, -# but how much stronger? -mod_week_prism <- lm(usgs_cfs ~ prism_p_cfs, data=week_data) -summary(mod_week_prism) -#Looking at the results, we see some correlation but it's still not very strong -plot(mod_week_prism$model$usgs_cfs ~ mod_week_prism$model$prism_p_cfs) -mod_week_daymet <- lm(usgs_cfs ~ daymet_p_cfs, data=week_data) -summary(mod_week_daymet) -plot(mod_week_daymet$model$usgs_cfs ~ mod_week_daymet$model$daymet_p_cfs) - - -# Let's repeat the regression anlysis now for ONLY January, which tends to be a -# wetter month. We start with the daily data. For PRISM: -# PRISM -mod_prism_jan <- lm(usgs_cfs ~ prism_p_cfs, data=comp_data[(comp_data$mo == 1),]) -summary(mod_prism_jan) -# daymet -mod_daymet_jan <- lm(usgs_cfs ~ daymet_p_cfs_nextDay, data=comp_data[(comp_data$mo == 1),]) -summary(mod_daymet_jan) -#Plot the daily data for january and see how the flow relates to the summarized -#precip data. This is equivalent to comp_data$usgs_cfs and comp_data$daymet_p_cfs -plot(mod_daymet_jan$model$usgs_cfs ~ mod_daymet_jan$model$daymet_p_cfs_nextDay) - -# January, next day flow todays precip -mod_prism_jan_nd <- lm(nextday_usgs_cfs ~ prism_p_cfs, - data=comp_data[(comp_data$mo == 1),]) -summary(mod_prism_jan_nd) - -#Repeat regression, but now compare the CHANGE in flow between today and -#tomorrow with TODAYs precipitation. Again, we stick with just January for now. -mod_prism_jan_ndd <- lm(nextday_d_cfs ~ prism_p_cfs, - data=comp_data[(comp_data$mo == 1),]) -summary(mod_prism_jan_ndd) -plot(mod_prism_jan_ndd$model$nextday_d_cfs ~ mod_prism_jan_ndd$model$prism_p_cfs) - -# Repeat the above analysis for daymet e.g. the CHANGE in flow b/t today and -# tomorrow and TODAY's daymet precipitation -mod_daymet_jan_ndd <- lm(nextday_d_cfs ~ daymet_p_cfs_nextDay, - data=comp_data[which(comp_data$mo == 1),]) -summary(mod_daymet_jan_ndd) -plot(mod_daymet_jan_ndd$model$nextday_d_cfs ~ mod_daymet_jan_ndd$model$daymet_p_cfs) - -# Days with zero rainfall may be skewing our results based on the plots above. -# So, repeat the analysis now only comparing against change in flows on the -# same day as non-zero rain -# *** DAYMET -mod_daymet_jan_nz_ndd <- lm(nextday_d_cfs ~ daymet_p_cfs_nextDay, - data = comp_data[((comp_data$mo == 1) & (comp_data$daymet_p_cfs > 0)),]) -summary(mod_daymet_jan_nz_ndd) -plot(mod_daymet_jan_nz_ndd$model$nextday_d_cfs ~ mod_daymet_jan_nz_ndd$model$daymet_p_cfs) -# *** PRISM -mod_prism_jan_nz_ndd <- lm(nextday_d_cfs ~ prism_p_cfs, - data = comp_data[which((comp_data$mo == 1) & (comp_data$prism_p_cfs > 0)),]) -summary(mod_prism_jan_nz_ndd) -plot(mod_prism_jan_nz_ndd$model$nextday_d_cfs ~ mod_prism_jan_nz_ndd$model$prism_p_cfs) - -# Now, we look at the change in flow on the day after it rains to the summarized -# precipitation. So, repeate the same ananlysis but now compare to nextday flow -# difference -# *** DAYMET -mod_daymet_jan_nz_ndd <- lm(nextday_d_cfs ~ daymet_p_cfs_nextDay, - data = comp_data[((comp_data$mo == 1) & (comp_data$daymet_p_cfs > 0)),]) -summary(mod_daymet_jan_nz_ndd) -plot(mod_daymet_jan_nz_ndd$model$nextday_d_cfs ~ mod_daymet_jan_nz_ndd$model$daymet_p_cfs) -# *** PRISM -mod_prism_jan_nz_ndd <- lm(nextday_d_cfs ~ prism_p_cfs, - data = comp_data[((comp_data$mo == 1) & (comp_data$prism_p_cfs > 0)),]) -summary(mod_prism_jan_nz_ndd) -plot(mod_prism_jan_nz_ndd$model$nextday_d_cfs ~ mod_prism_jan_nz_ndd$model$prism_p_cfs) - -# Now, we look at the change in flow on the day after it rains to the summarized -# precipitation. But, this time only look for days where there is some change in -# flow -# *** DAYMET -mod_daymet_jan_nz_cdd <- lm(nextday_d_cfs ~ daymet_p_cfs_nextDay, - data = comp_data[((comp_data$mo == 1) & (comp_data$nextday_d_cfs > 0)),]) -summary(mod_daymet_jan_nz_cdd) -plot(mod_daymet_jan_nz_ndd$model$nextday_d_cfs ~ mod_daymet_jan_nz_ndd$model$daymet_p_cfs_nextDay) -# *** PRISM -mod_prism_jan_nz_ndd <- lm(nextday_d_cfs ~ prism_p_cfs, - data = comp_data[((comp_data$mo == 1) & (comp_data$prism_p_cfs > 0)),]) -summary(mod_prism_jan_nz_ndd) -plot(mod_prism_jan_nz_ndd$model$nextday_d_cfs ~ mod_prism_jan_nz_ndd$model$prism_p_cfs) - - -#Try the above analysis (compare change in next day flow to precip only on days -#where change in flow is positive), but try for February which is another wet -#month -mod_prism_mon_nz_ndd <- lm(nextday_d_cfs ~ prism_p_cfs, - data = comp_data[which((comp_data$mo == 2) & (comp_data$nextday_d_cfs > 0)),]) -summary(mod_prism_mon_nz_ndd) -plot(mod_prism_mon_nz_ndd$model$nextday_d_cfs ~ mod_prism_mon_nz_ndd$model$prism_p_cfs) - -#The correlations above are decent. Let's see what the relationship looks like -#across all months of the year -# do all months and assemble a barplot of R^2 - -# create a class to hold both stats and the plot to make it easier to do comparisons later -plotBin <- R6::R6Class( - "plotBin", - public = list( - plot = NULL, data=list(), atts=list(), - initialize = function(plot = NULL, data = list()){ - self.plot = plot; self.data=data; - } - ) -) -# Week -mon_lm <- function(sample_data, y_var, x_var, mo_var, data_name){ - plot_out <- plotBin$new(data = sample_data) - nwd_stats <- data.frame(row.names=c('month', 'rsquared_a')) - for (i in 1:12) { - mo_data=sample_data[which((sample_data[,mo_var] == i)),] - weekmo_data <- lm(mo_data[,y_var] ~ mo_data[,x_var]) - dsum <- summary(weekmo_data) - nwd_stats <- rbind(nwd_stats, data.frame(i, dsum$adj.r.squared)) - } - plot_out$atts[['stats']] <- nwd_stats - barplot( - nwd_stats$dsum.adj.r.squared ~ nwd_stats$i, - ylim=c(0,1.0), - main=paste("lm(Q ~ P), monthly,",data_name) - ) - plot_out$plot <- recordPlot() - return(plot_out) -} - -nldas2_lm <- mon_lm(week_data, "nldas2_p_cfs", "usgs_cfs", "mo", "nldas2") -nldas2_lm$atts -nldas2_lm$plot - -prism_lm <- mon_lm(week_data, "prism_p_cfs", "usgs_cfs", "mo", "prism") -prism_lm$atts -prism_lm$plot - - -daymet_lm <- mon_lm(week_data, "daymet_p_cfs", "usgs_cfs", "mo", "daymet") -daymet_lm$atts -daymet_lm$plot - -# NLDAS2 -# do all months and assemble a barplot of R^2 -nldaswk_stats <- data.frame('month' = 1:12, 'rsquared_a' = numeric(12)) -for (i in 1:nrow(nldaswk_stats)) { - # Weekly d cfs vs P - mod_weekmo_nldas2_cfs <- lm(usgs_cfs ~ nldas2_p_cfs, - data=week_data[which((week_data$mo == i)),]) - dsum <- summary(mod_weekmo_nldas2_cfs) - #plot(mod_weekmo_nldas2_cfs$model$usgs_cfs ~ mod_weekmo_nldas2_cfs$model$nldas2_p_cfs) - - #mod_week_daymet_d_cfs <- lm(today_d_cfs ~ daymet_p_cfs, data=week_data) - #summary(mod_week_daymet_d_cfs) - #plot(mod_week_daymet_d_cfs$model$today_d_cfs ~ mod_week_daymet_d_cfs$model$daymet_p_cfs) - nldaswk_stats$rsquared_a[i] <- dsum$adj.r.squared -} -barplot(nldaswk_stats$dsum.adj.r.squared ~ nldaswk_stats$i, - main=paste(gage_info$station_nm ), ylim=c(0,1.0)) -daymet_lm <- mon_lm(week_data, "daymet_p_cfs", "usgs_cfs", "mo", "daymet") -daymet_lm$atts -daymet_lm$plot - - -#Binomial = Based on x, did Y occur? Looking at weekly rainfall totals, did a -#storm hydrograph occur? -#Poisson = Counts. Based on x, how many times did Y occur? Looking at weekly -#rainfall totals, how many storms should we have expected to see? - -#Are rollowing rainfall sums relevant to this analysis? -comp_data$rollingSumDaymet <- c(NA,NA,rollapply(comp_data$daymet_p_cfs_nextDay,3,sum, - align = "left")) -comp_data$rollingSumPRISM <- c(NA,NA,rollapply(comp_data$prism_p_cfs,3,sum, - align = "left")) -comp_data$rollingSumUSGS_prev <- c(NA,NA,rollapply(comp_data$usgs_cfs,3,sum, - align = "left")) -comp_data$rollingSumUSGS_next <- c(rollapply(comp_data$usgs_cfs,3,sum, - align = "left"),NA,NA) -rollModelPRISM <- lm(usgs_cfs ~ rollingSumPRISM,data = comp_data) -summary(rollModelPRISM) -plot(log10(rollModelPRISM$model)) -#Nope, not really. Maybe only at higher flows, which is when we'd expect -#correlation. Correlation is improved for next day flow however. What if we look -#at rolling vs rolling? -rollModel <- lm(rollingSumUSGS_prev ~ rollingSumPRISM,data = comp_data) -summary(rollModel) - -#What if we increase the rolling duration? At what point do we produce a maximum -#correlation? We would assume this increases with rolling time period -rsq <- numeric(180) -rsq_winter <- numeric(180) -for(i in 1:180){ - testData <- comp_data - testData$rollingSumPRISM <- c(rep(NA,(i - 1)),rollapply(testData$prism_p_cfs,i,sum, - align = "left")) - testData$rollingSumUSGS_prev <- c(rep(NA,(i - 1)),rollapply(testData$usgs_cfs,i,sum, - align = "left")) - rollModeli <- lm(rollingSumUSGS_prev ~ rollingSumPRISM,data = testData) - rsq[i] <- summary(rollModeli)$adj.r.squared - - rollModel_winteri <- lm(rollingSumUSGS_prev ~ rollingSumPRISM,data = testData[testData$mo %in% c(11,12,1,2,3),]) - rsq_winter[i] <- summary(rollModel_winteri)$adj.r.squared -} - -plot(rsq_winter) -points(rsq_winter,col = "blue") - - -plot(as.Date(comp_data$obs_date), - comp_data$prism_p_cfs, - col = "red",type = "l") -lines(as.Date(comp_data$obs_date), - comp_data$usgs_cfs) - - diff --git a/HARP-2024-2025/assessEventPrecip.R b/HARP-2024-2025/assessEventPrecip.R deleted file mode 100644 index 0d6066e3..00000000 --- a/HARP-2024-2025/assessEventPrecip.R +++ /dev/null @@ -1,312 +0,0 @@ -# Test datasets CSV -library("sqldf") -library("dataRetrieval") -library("lubridate") - -#Set the id for the USGS gage from which we will pull data and use to summarize -#our meteorology data. Currently, we have data set up for: -#Culpepper 01667500, Strasburg 01634000, Ruckersville 01665500 -gageid = '01665500' -# We redefine the gage id as a "hydrocode", which is how it is stored in the VA -#Hydro (DEQ) database -hydrocode = paste0('usgs_ws_', gageid) - -#Pull data from VA Hydro, grabbing some precip data for the gage watershed -prism_data <- read.csv(paste0("http://deq1.bse.vt.edu:81/files/met/", hydrocode, "-prism-all.csv")) - -#Use lubridate to add on some additional information about the date. These dates -#come in as posix and thus have timestamps, which make them a little trickier to -#work with. So, we can disaggregate them for easier joins and manipulation -prism_data[,c('yr', 'mo', 'da', 'wk')] <- cbind(year(as.Date(prism_data$obs_date)), - month(as.Date(prism_data$obs_date)), - day(as.Date(prism_data$obs_date)), - week(as.Date(prism_data$obs_date))) - -#Repeat the download and date disaggregation for daymet, a different precip -#source: -daymet_data <- read.csv(paste0("http://deq1.bse.vt.edu:81/files/met/", hydrocode, "-daymet-all.csv")) -daymet_data[,c('yr', 'mo', 'da', 'wk')] <- cbind(year(as.Date(daymet_data$obs_date)), - month(as.Date(daymet_data$obs_date)), - day(as.Date(daymet_data$obs_date)), - week(as.Date(daymet_data$obs_date))) -#Repeat the download and date disaggregation for NLDAS2, an HOURLY precip source -nldas2_data <- read.csv(paste0("http://deq1.bse.vt.edu:81/files/met/", hydrocode, "-nldas2-all.csv")) -nldas2_data[,c('yr', 'mo', 'da', 'wk')] <- cbind(year(as.Date(nldas2_data$obs_date)), - month(as.Date(nldas2_data$obs_date)), - day(as.Date(nldas2_data$obs_date)), - week(as.Date(nldas2_data$obs_date))) -#NLDAS is an hourly precip data set. We should aggregate it to be daily sums to -#match the data in PRISM and daymet for the purpose of comparison. We can do -#this in SQL by grouping by the the date columns we created for month, day, and -#year -nldas2_data <- sqldf( - "select featureid, min(obs_date) as obs_date, yr, mo, da, - sum(precip_mm) as precip_mm, sum(precip_in) as precip_in - from nldas2_data - group by yr, mo, da - order by yr, mo, da - " -) - - -# Get USGS gage data from NWIS, the USGS REST services. We pull this data using -# the dataRetrieval R package which has extensive documentation on the options -# therewithin. See https://waterdata.usgs.gov/blog/dataretrieval/. Below, we -# a cached version of the function defined in our config.R file: -gage_info <- memo_readNWISsite(gageid) -#Extract the drainage area of the gage -da <- gage_info$drain_area_va -#Get daily, mean streamflow (pcode 000060, see dataRetrieval::parameterCdFile) -#from the gage specified in gageid -usgs_data <- memo_readNWISdv(gageid,'00060') -#Extract date information from the gage using lubridate as above -usgs_data[,c('yr', 'mo', 'da')] <- cbind(year(as.Date(usgs_data$Date)), - month(as.Date(usgs_data$Date)), - day(as.Date(usgs_data$Date))) - -#Join all data together. In other words, extract the relevant fields from the -#three data sources and combine them based on the day, month, and year. We could -#do this by joining by the dates, but you may notice that the USGS daily data -#may not come in as POSIX and doesn't have a time associated with it. We could -#do this via tidyverse verbs, but our team typically prefers SQL as it tends to -#change a lot less frequently i.e. will require less long-term maintenance -comp_data <- sqldf( - "select a.obs_date, a.precip_in as prism_p_in, - a.yr, a.mo, a.da, a.wk, - b.precip_in as daymet_p_in, d.precip_in as nldas2_p_in, - c.X_00060_00003 as usgs_cfs - from prism_data as a - left outer join daymet_data as b - on ( - a.yr = b.yr - and a.mo = b.mo - and a.da = b.da - ) - left outer join usgs_data as c - on ( - a.yr = c.yr - and a.mo = c.mo - and a.da = c.da - ) - left outer join nldas2_data as d - on ( - a.yr = d.yr - and a.mo = d.mo - and a.da = d.da - ) - order by a.yr, a.mo, a.da - " -) - -# Index the dataset, giving a unique number to each row. Please note that these -# are already available via rownames(comp_data) but here we add it as a column -# to call them in SQLDF. You can see how SQL changes our R workflows. As the -# project moves forward, feel free to branch out and try other workflows but -# please note we will often encourage the use of SQL as we assist in tasks. But -# all code is acceptable if it is documented and works! -comp_data$dataset_day <- index(comp_data) - -#Add a column for the date since this is now daily data -comp_data$date <- as.Date(comp_data$obs_date) - -#Here, we add in columns to represent tomorrow's flow and the change in flow -#from today to yesterday and today and tomorrow. This give an indication of how -#flow is changing. We can do so using R's indexing, simply calling the next rows -#down where needed -# comp_data$nextday_usgs_cfs <- c(comp_data$usgs_cfs[2:nrow(comp_data)],NA) -# comp_data$nextday_d_cfs <- comp_data$nextday_usgs_cfs - comp_data$usgs_cfs -# comp_data$today_d_cfs <- comp_data$usgs_cfs - c(NA,comp_data$usgs_cfs[1:(nrow(comp_data) - 1)]) - -#We could alternatively get this via an SQL query that runs just about as -#quickly! Note the use of the Lag() function and its inputs in the subquery used -#to pull in yesterdays and todays data -comp_data <- sqldf( - "select b.*, - b.nextday_usgs_cfs - b.usgs_cfs as nextday_d_cfs, - b.usgs_cfs - b.yesterday_usgs_cfs as today_d_cfs - FROM ( - select - a.*, - Lag(a.usgs_cfs,1) OVER(ORDER BY a.dataset_day) as yesterday_usgs_cfs, - Lag(a.usgs_cfs,-1) OVER(ORDER BY a.dataset_day) as nextday_usgs_cfs - from comp_data as a - order by a.dataset_day - ) as b - " -) - -# Add a cfs Precip column using the following conversions for EACH of the precip -# data sources: -# acre-feet/day = 3.07 MGD -# cfs = 1.57 MGD -# DAsqmi * 640 ac/sqmi * p_in_per_day / 12.0 = Precip ac-ft/day -# P acft/day / 3.07 = P mgd -# P mgd * 1.572 = P cfs -# 1.572 * (DAsqmi * 640.0 * p_in / 12.0) / 3.07 -comp_data$prism_p_cfs <- 1.572 * (da * 640.0 * comp_data$prism_p_in / 12.0) / 3.07 -comp_data$daymet_p_cfs <- 1.572 * (da * 640.0 * comp_data$daymet_p_in / 12.0) / 3.07 -comp_data$nldas2_p_cfs <- 1.572 * (da * 640.0 * comp_data$nldas2_p_in / 12.0) / 3.07 -#Can we learn anything based on what stormSep gives us? e.g. the number of -#storms that occur in a given week, month, day, etc.? -#First, create a dataset where USGS flow is not NA -source("C:/Users/gcw73279.COV/Desktop/gitBackups/OWS/HARParchive/HARP-2024-2025/stormSep_USGS.R") - -stormCompData <- comp_data[!is.na(comp_data$usgs_cfs),] -stormOut <- stormSeparate(as.Date(stormCompData$obs_date), - inflow = stormCompData$usgs_cfs, - plt = FALSE,path = paste0(getwd(),"/StormPlots/"), - allMinimaStorms = TRUE, - baselineFlowOption = "Month") - -#For each storm, sum precip leading up to it (past 3, 5, 7, and 14 days?). Go -#through each storm and find the sum of precip of each dataset. Also find past -#3, 5, 7, and 14 days from storm endpoint including its full duration? What -#about stream length? What about DA or other NHDPLus factors? Can we get at -#travel time and or lag? Land use? -stormStats <- stormOut$Stats -#Throw out any storms that have inconsistent durations compared to start and end -#date. This can occur when a gage goes offline as StormSep doesn't check for -#this -stormEvents <- stormOut$Storms[(as.Date(stormStats$endDate) - as.Date(stormStats$startDate) + 1) == stormStats$durAll] -stormStats <- stormStats[(as.Date(stormStats$endDate) - as.Date(stormStats$startDate) + 1) == stormStats$durAll,] - -#Get rolling sums of precip over 3 day periods: -comp_data$roll3PRISM_cfs <- rollapply(comp_data$prism_p_cfs,3,sum,fill = NA,align = "right") - -#A function that gets precip from the rollingDur period but will include the -#full stormDuration -getRollPrecip <- function(comp_data,stormDuration, - rollingDur,endDate, - precipColName = "prism_p_cfs"){ - #Convert input date to date, just in case - sDate <- as.Date(endDate) - #Get the index in comp_date df where the endDate occurs - dateIndex <- grep(endDate,comp_data$date) - #Get all values from the precipColName in comp_data for the target duration - #adjusted for the storm duration. So, if there is a five-day storm, - #stormDuration is 5. If we are interested in rolling 7-day precip prior to and - #throughout the storm, we'd want rollingDur = 7. So, we need dateIndex - 7 - 5 - #The precip data is in: - precipData <- comp_data[,precipColName] - precipStorm <- precipData[(dateIndex - rollingDur - stormDuration + 2) : dateIndex] - #Return total precip. Adjust for NAs that may occur due to indexing numbers - #prior to start of comp_data - totalPrecip <- sum(precipStorm,na.rm = TRUE) - return(totalPrecip) -} - -#Add to stormStats the sum of precipitation from the 3-, 7-, and 14-day periods -#leading up to the storm and including the full storm duration. Convert to MG -cfsToMGD <- 86400 * 12*12*12/231/1000000 -#Only precip during the storm iteself -stormStats$roll1DayWStormPRISM_MG <- mapply(SIMPLIFY = TRUE, USE.NAMES = FALSE, - FUN = getRollPrecip, stormDuration = stormStats$durAll, - endDate = stormStats$endDate, - MoreArgs = list(comp_data = comp_data,rollingDur = 1, - precipColName = "prism_p_cfs") -) -stormStats$roll1DayWStormPRISM_MG <- stormStats$roll1DayWStormPRISM_MG * cfsToMGD -#Includes 1-day prior to the storm: -stormStats$roll2DayWStormPRISM_MG <- mapply(SIMPLIFY = TRUE, USE.NAMES = FALSE, - FUN = getRollPrecip, stormDuration = stormStats$durAll, - endDate = stormStats$endDate, - MoreArgs = list(comp_data = comp_data,rollingDur = 2, - precipColName = "prism_p_cfs") -) -stormStats$roll2DayWStormPRISM_MG <- stormStats$roll2DayWStormPRISM_MG * cfsToMGD - -#Plots of total storm volume to rolling 3-, 7-, 14-day periods with storm duration -plot(stormStats$roll1DayWStormPRISM_MG,stormStats$volumeTotalMG, - log = "xy") -#Plots of total storm volume above base Q to rolling 3-, 7-, 14-day periods with -#storm duration -plot(stormStats$roll1DayWStormPRISM_MG,stormStats$volumeAboveBaseQMG) - -#Linear models with storm duration -withStorm1 <- lm(volumeTotalMG ~ roll1DayWStormPRISM_MG, - data = stormStats) - -#Linear models with stormDuration above base Q -withStorm1_aboveBaseQ <- lm(volumeAboveBaseQMG ~ roll1DayWStormPRISM_MG, - data = stormStats) -withStorm2_aboveBaseQ <- lm(volumeAboveBaseQMG ~ roll2DayWStormPRISM_MG, - data = stormStats) - -summary(withStorm1) -summary(withStorm1_aboveBaseQ) -summary(withStorm2_aboveBaseQ) - -#The following showed a strong correlation. What if we look at power regression -#to smooth over small and large differences e.g. take power away from larger -#storms? This also removes storms in which no precip was found in the target -#duration. This may skew results and should be reviewed carefully due to the -#bias of picking only data where this method "works" -withStorm1_aboveBaseQ_log <- lm(log(volumeAboveBaseQMG) ~ log(roll1DayWStormPRISM_MG), - data = stormStats[stormStats$roll1DayWStormPRISM_MG > 0,]) -#The log power regression in this case actually performs worse: -summary(withStorm1_aboveBaseQ_log) - -#However, are we weighted by large storm volumes? It appears so, with one value -#even falling outside of Cooks Distance -plot(withStorm1_aboveBaseQ) - -#Does relationsip improve at all when we look only at specific months. Add month -#based on the startDate of the storm -stormStats$beginMonth <- as.numeric(format(as.Date(stormStats$startDate),"%m")) - -#What are the monthly relationships? -monthEventOut1Day <- mon_lm(stormStats, y_var = "volumeAboveBaseQMG", - x_var = "roll1DayWStormPRISM_MG", - mo_var = "beginMonth", "Storm Event Vol") -monthEventOut2Day <- mon_lm(stormStats, y_var = "volumeAboveBaseQMG", - x_var = "roll2DayWStormPRISM_MG", - mo_var = "beginMonth", "Storm Event Vol") - -#Data counts? -table(stormStats$beginMonth) -sum(stormStats$beginMonth == 10) -testReg <- lm(log(volumeAboveBaseQMG) ~ log(roll1DayWStormPRISM_MG), - data = stormStats[stormStats$beginMonth==10,]) -summary(testReg) - -#There may be some heavy influence from high storm events. May want to consider -#power regressions or non-linear exponential regressions to reduce their -#influence or evens a Cooks Distance analysis to remove errant data points - -#For each storm in stormOut$Storms, use the storm start and end-date to find the -#7-day period leading up to the storm and the 7-day period following the storm. -#Show precip during this period and throughout storm duration. Then, highlight -#the separated storm hydrograph -for(i in 1:length(stormEvents)){ - print(i) - #Get the current storm flow data which will be used for highlighting that - #hydrograph - stormi <- stormEvents[[1]] - #Only need non-NA values since stormSep will output full timeseries but leave - #values as NA if they are not included in storm - stormi <- stormi[!is.na(stormi$flow),] - - #Get the start and end date of the storm from stormStats: - stormStart <- as.Date(stormStats$startDate[i]) - stormEnd <- as.Date(stormStats$endDate[i]) - #Adjust these values to increase the window: - plotStart <- stormStart - 7 - plotEnd <- stormEnd + 7 - - #Get the stream flow and baseflow from stormSep - flowData <- stormOut$flowData[stormOut$flowData$timestamp >= plotStart & - stormOut$flowData$timestamp <= plotEnd,] - #Join in the precip "flow" from comp_data: - flowDataAll <- sqldf("SELECT flowData.*, - comp.prism_p_in, - comp.daymet_p_in, - comp.nldas2_p_in, - comp.prism_p_cfs, - comp.daymet_p_cfs, - comp.nldas2_p_cfs - FROM flowData - LEFT JOIN comp_data as comp - ON flowData.timeStamp = comp.Date") - pathOut <- paste0("StormPlotsNew/stormPlot",i,".PNG") - plotStorm(pathOut,stormi,flowDataAll) -} diff --git a/HARP-2024-2025/basic_analytics_for_vahydro.R b/HARP-2024-2025/basic_analytics_for_vahydro.R new file mode 100644 index 00000000..0aa73393 --- /dev/null +++ b/HARP-2024-2025/basic_analytics_for_vahydro.R @@ -0,0 +1,96 @@ +#Creates Summary analytic functions for workflow +#Use for VA HYDRO metrics +library(tidyr) +library(sqldf) +library(zoo) + + +## For ANY data + +summary_analytics <- function(df){ + +#creating a yearly summary with each year and its total precip +yearly.summary <- + sqldf( + "SELECT COUNT(*) AS ndays, yr, SUM(precip_in) AS total_precip + FROM df + GROUP BY yr + HAVING ndays >= 365" +) + +#summary analytics + +precip_annual_max_in <- + max(yearly.summary$total_precip) + +precip_annual_max_year <- + yearly.summary$yr[which.max(yearly.summary$total_precip)] + +precip_annual_mean_in <- + mean(yearly.summary$total_precip) + + +#For min values and years, we can exclude the first and last row since the +#current year and first years are incomplete data +precip_annual_min_in <- + min(yearly.summary$total_precip) +precip_annual_min_year <- + yearly.summary$yr[which.min(yearly.summary$total_precip)] + + + +# Create daily summary to use for all data. This makes hourly data daily sums. +daily.summary <- + sqldf( + "SELECT yr, mo, da, obs_date, SUM(precip_in) AS total_precip + FROM df + GROUP BY yr, mo, da" + ) #include obs_date after SQL to data frame +precip_daily_max_in <- max(daily.summary$total_precip) +#move to beginning + + +#if else statement evaluates the amount of unique hours and if 24, +#then hourly max is taken. If not, hourly max is NA +if(length(unique(df$hr)) == 24){ + precip_hourly_max_in <- max(df$precip_in) +} else { + precip_hourly_max_in <- NA +} +#Alternatively to a null value for hourly precip in daily data, +#we could look at the rainfall distribution table for a +#type II storm and multiply by the max P(t)/P(24) value. +#this function uses NULL for daily data + + +#l90 using zoo package +l90_precip_in_roll <- + min(rollapply(daily.summary$total_precip, width = 90, FUN = mean, + fill = NA, align = "right"), + na.rm = TRUE) + +#l90 using IHA +if(length(unique(df$hr)) == 24){ + Qout_zoo <- zoo(as.numeric(daily.summary$total_precip), order.by = as.POSIXct(daily.summary$obs_date)) + Qout_g2 <- data.frame(group2(Qout_zoo)) + l90_precip_in <- min(Qout_g2$X90.Day.Min) +} else { + Qout_zoo <- zoo(as.numeric(df$precip_in), order.by = as.POSIXct(df$obs_date)) + Qout_g2 <- data.frame(group2(Qout_zoo)) + l90_precip_in <- min(Qout_g2$X90.Day.Min) +} + + + +#makes data frame with all 9 metrics +metrics<- data.frame(precip_annual_max_in = precip_annual_max_in, + precip_annual_max_year = precip_annual_max_year, + precip_annual_mean_in = precip_annual_mean_in, + precip_annual_min_in = precip_annual_min_in, + precip_annual_min_year = precip_annual_min_year, + precip_daily_max_in = precip_daily_max_in, + precip_hourly_max_in = precip_hourly_max_in, + l90_precip_in = l90_precip_in, + l90_precip_in_roll = l90_precip_in_roll) +} + diff --git a/HARP-2024-2025/dni_data_experiments.R b/HARP-2024-2025/dni_data_experiments.R deleted file mode 100644 index f461450f..00000000 --- a/HARP-2024-2025/dni_data_experiments.R +++ /dev/null @@ -1,9 +0,0 @@ - -# How to use SQL in R in a more flexible way than sqldf will permit -# https://dbi.r-dbi.org/reference/dbAppendTable.html -con <- dbConnect(RSQLite::SQLite(), ":memory:") -dbCreateTable(con, "dh_timeseries", ds$tsvalues) -dbWriteTable(con, "dh_timeseries", ds$tsvalues, overwrite = TRUE) -dbExecute(con, "insert into dh_timeseries(tid, tsvalue, featureid) values (2,4.5,10)") -dbExecute(con, "select * from dh_timeseries") -dbReadTable(con, "dh_timeseries") diff --git a/HARP-2024-2025/precip_coverage_summary.R b/HARP-2024-2025/precip_coverage_summary.R new file mode 100644 index 00000000..735d67db --- /dev/null +++ b/HARP-2024-2025/precip_coverage_summary.R @@ -0,0 +1,61 @@ +# Insert info about this event into database +# attach a property showing the path to the original raster file +# attach a property with a web URL to the origina raster file +basepath='/var/www/R'; +source("/var/www/R/config.R") +library("lubridate") +library("hydrotools") +# Set up our data source +source(paste0(HARParchive_location,"/HARP-2024-2025/basic_analytics_for_vahydro.R")) + +ds <- RomDataSource$new(site, rest_uname = rest_uname) +ds$get_token(rest_pw) + +# Accepting command arguments: +argst <- commandArgs(trailingOnly = T) +if (length(argst) < 6) { + + message("Use: Rscript met_store_info.R datasource coverage_hydrocode coverage_bundle coverage_ftype model_version met_file") + message("Ex: Rscript met_store_info.R nldas2 N5113 landunit cbp6_landseg met_file") + q('n') +} +scenario_name <- argst[1] +coverage_hydrocode <- argst[2] +coverage_bundle <- argst[3] +coverage_ftype <- argst[4] +model_version <- argst[5] +met_file <- argst[6] +# load the feature -- get feature hydroid +# find the dh_timeseries_weather record for this event +# attach an image property to the record +# return + message(paste("Searching for feature hydrocode=", coverage_hydrocode,"with ftype",coverage_ftype)) +feature <- RomFeature$new( + ds, + list( + hydrocode=coverage_hydrocode, + ftype=coverage_ftype, + bundle=coverage_bundle + ), + TRUE +) +# this will create or retrieve a model scenario to house this summary data. +model <- om_model_object(ds, feature, model_version) +# if a matching model does not exist, this will go ahead and create one +scenario <- om_get_model_scenario(ds, model, scenario_name) + +met_data <- read.table(met_file, header = TRUE, sep=",") + + +precip_summary <- summary_analytics(met_data) + +numrecs <- nrow(met_data) # what does this do? +vahydro_post_metric_to_scenprop(scenario$pid, 'om_class_Constant', NULL, 'num_records', numrecs, ds) + +vahydro_post_metric_to_scenprop(scenario$pid, 'om_class_Constant', NULL, 'precip_annual_max_in', precip_summary$precip_annual_max_in, ds) + +vahydro_post_metric_to_scenprop(scenario$pid, 'om_class_Constant', NULL, 'precip_annual_min_in', precip_summary$precip_annual_max_in, ds) + + + + diff --git a/HARP-2024-2025/stormSep_USGS.R b/HARP-2024-2025/stormSep_USGS.R deleted file mode 100644 index 54261215..00000000 --- a/HARP-2024-2025/stormSep_USGS.R +++ /dev/null @@ -1,504 +0,0 @@ -# flowData <- dataRetrieval::readNWISdv("01613900",parameterCd = "00060") -# inflow <- flowData$X_00060_00003 -# timeIn <- as.Date(flowData$Date) -# mindex <- which(timeIn == "2019-01-01") -# maxdex <- which(timeIn == "2021-12-31") -# seqdex <- seq(mindex,maxdex) -# inflow <- inflow[seqdex] -# timeIn <- timeIn[seqdex] - - -#Input variables are: -# timeIn = Timestamps associated with streamflow data as vector -# inflow = Streamflow daily data as vector in cfs -# plt = Boolean to determine if plots are necessary -# path = Directory to store plots in. Used if plt is TRUE -# allMinimaStorms = Considers ALL local minima as potnetial storm endpoints. -# Will distinguish between peaks in the case of a multi-day storm hydrograph if -# set to TRUE. Otherwise, a storm is only considered as two subseuqnet local -# minima that fall below baseline flow -# baselineFlowOption = One of c("Water Year","Month","Calendar Year","Climate Year","All"). -# Defaults to "All". Determines how program will develop baseline flow. Uses -# horizontal line regression of baseflow and breaks it down based on all time, -# water year, calendar year, or month based on user selection -stormSeparate <- function(timeIn, inflow, - plt = F, - path = paste0(getwd(),"/"), - allMinimaStorms = FALSE, - baselineFlowOption = "All" -){ - #Call packages required by function if not called already: - require(grwat) - require(zoo) - require(sqldf) - - #First, get baseflow associated with inflow. Use defaults of grwat for now - #which involves three passes and the Lyne-Hollick (1979) hydrograph separation - #method with coefficient a as 0.925 - baseQ <- grwat::gr_baseflow(inflow) - - #Add timestamp to the baseflow separation for convenience by creating a - #dataframe - baseQ <- data.frame(timestamp = timeIn, Year = as.numeric(format(timeIn,"%Y")), - baseQ = baseQ,flow = inflow) - - #Find mins/maxes of three consecutive points such that the extreme is in the - #middle. These represent potential local mins and maxes - maxes <- rollapply(baseQ$flow,3,function(x) which.max(x) == 2,# & (x[2]!=x[1] & x[2]!= x[3]), - align = "left") - mins <- rollapply(baseQ$flow,3,function(x) which.min(x) == 2,# & (x[2]!=x[1] & x[2]!= x[3]), - align = "left") - #Create data frames of these local minima/maxima with the corresponding - #timestamp from the original data frame. Note that the rolling looks at the - #first three data points. If the first triplet has a local max as its second - #value, the first value in maxes will be TRUE.The timestamp to be associated - #with this should be that of the middle data point e.g. the second timestamp - #of the triplet! - mins <- data.frame(timestamp = baseQ$timestamp[c(FALSE,mins,FALSE)], - mins = baseQ$flow[c(FALSE,mins,FALSE)]) - maxes <- data.frame(timestamp = baseQ$timestamp[c(FALSE,maxes,FALSE)], - maxes = baseQ$flow[c(FALSE,maxes,FALSE)]) - - #Need to find mins/maxes below baseline flow. But first baseline flow must be - #defined. hreg() will try to fit mulitple horizontal lines through subset of - #data involving every point below that line until best fit is found. This - #becomes baseline flow, brk - #Find the break associated with this run and buffer by 10%. Based the hreg on - #the timescale selected by the user - # baselineFlowOption = One of c("Water Year","Month","Calendar Year","All") - if(baselineFlowOption == "All"){ - #Use the entire baseflow dataset to get baseline flow, brk - brk <- hreg(baseQ$baseQ,limit = 1) - brk <- brk * 1.1 - #Make as long as timeseries - brk <- rep(brk,nrow(baseQ)) - }else if(baselineFlowOption %in% c("Calendar Year", "Climate Year","Water Year")){ - #Based on the user option, determine the start month of the year - #designation. Water Years run from October - September, Climate years from - #April - March, and Calendar Years from January - December - if(baselineFlowOption == "Water Year"){ - WYS <- "10-01" - }else if(baselineFlowOption == "Climate Year"){ - WYS <- "04-01" - }else{ - WYS <- "01-01" - } - #Make dates of the input timesteps: - dataDates <- as.Date(timeIn) - #Create an emtpty vector for the new annual designations - dataWY <- NA - - #Identify months in the next year that are classified as the previous year's - #water/climate year e.g. January 2021 is in Water Year 2020 that begins in - #October. Get the month using gsub to get only the characters prior to first - #dash - WYM <- as.numeric(gsub("-.*","",WYS)) - if(WYM > 1){ - WYP <- seq(1,(WYM - 1)) - }else{ - WYP <- 0 - } - - #Add leading zeros to month number January - September - WYP[nchar(WYP) == 1]<-paste0("0",WYP[nchar(WYP) == 1]) - #Combine to form a regex pattern that searches for any of the months at the - #END of a string ($) - WYP <- paste0(WYP,"$",collapse = "|") - #Get the year and month associated with the dates - WY <- format(dataDates,"%Y-%m") - - #Initialize water year by making it the calendar year + 1 - dataWY <- as.numeric(gsub("-.*","",WY)) + 1 - - #Search for all months from January to the start of the water year. These - #months need to be assigned previous water year number (current calendar - #year) - dataWY[grepl(WYP,WY)] <- dataWY[grepl(WYP,WY)] - 1 - - #Exception case occurs when water year is calendar year, in which case water - #year ends in calendar year such that calendar year = water year - if(WYS == "01-01"){ - dataWY <- as.numeric(format(dataDates,"%Y")) - } - #Store the years in the appropriate column in baseQ data frame - baseQ$Year <- dataWY - #Create an empty vector the length of the baseQ data frame - brk <- numeric(nrow(baseQ)) - #For each unique water year, calendar year, etc., run the hreg and store in - #brk - for (i in unique(dataWY)){ - baseQsubset <- baseQ[baseQ$Year == i,] - #Use the subset of the baseflow dataset to get baseline flow, brk - brki <- hreg(baseQsubset$baseQ,limit = 1) - brki <- brki * 1.1 - #Store results - brk[baseQ$Year == i] <- brki - } - }else if(baselineFlowOption == "Month"){ - #Make dates of the input timesteps: - dataDates <- as.Date(timeIn) - - #Create a vector of the months and years, essentially getting a vector of - #all months in the timeIn vector - monthYears <- format(dataDates,"%m-%Y") - - #Create an empty vector the length of the baseQ data frame - brk <- numeric(nrow(baseQ)) - for (i in unique(monthYears)){ - baseQsubset <- baseQ[monthYears == i,] - #Use the subset of the baseflow dataset to get baseline flow, brk - brki <- hreg(baseQsubset$baseQ,limit = 1) - brki <- brki * 1.1 - #Store results - brk[monthYears == i] <- brki - } - } - #Add baseline flow to baseQ - baseQ$baselineQ <- brk - - #Next step is to isolate storms. This can be accomplished by taking a minimum - #and the next point to fall below baseline flow, brk. Each storm is checked to - #ensure a maximum above some reference level, here 1.5*brk. This eliminates - #small storms with little or no peak or rises in baseflow. Only minimums below - #the brk value are considered as these are storms that occur at baseflow and - #fully rise/recede. - - #Get the times associated with minimums that are below baseline flow brk. - #First, join in the baseline flow for the timeperiod: - mins <- sqldf("SELECT mins.*, baseQ.baselineQ - FROM mins - LEFT JOIN baseQ - ON mins.timestamp = baseQ.timestamp") - - if(allMinimaStorms){ - #Use all minima as potential storm start and stop points - x <- mins$timestamp - #Get the corresponding local maximums - y <- maxes$maxes - }else{ - #Get only the minima timestamps that lie below baseline flow - x <- mins$timestamp[mins$mins < mins$baselineQ] - #Get the corresponding local maximums - y <- maxes$maxes[mins$mins < mins$baselineQ] - } - - #A data frame to build with storms in each column - stormsep <- list() - #An ID column to store the storm number in baseQ - baseQ$stormID <- NA - - #Start evaluating each set of minima to evaluate if there is a qualifying - #storm event. If there is, store it with all relevant data - for (i in 1:(length(x) - 1)){ - # if(i==73){browser()} - endIndex <- 1 - #Initial guess at storm endpoints e.g. two local minimums - storm <- c(x[i], x[i + endIndex]) - #initial stormflows and the times associated with those flows: - stormflow <- inflow[timeIn >= storm[1] & timeIn <= storm[2]] - - #When using allMinimaStorms, some minima may be combined. Skip loops that - #feature combined storms. So, if at frist storm and nextStorm are January - #1st - Jan 10 and Jan 10 - Jan 12, but these are combined into one (Jan 1 - - #Jan 12), then we can skip the loop of Jan 10 - Jan 12. First, make sure - #there is a storm in stormsep and that this is not the first loop i.e. i != - #1 - if(i > 1 && length(stormsep) > 0){ - #Get the previous storm - prevStorm <- stormsep[[length(stormsep)]] - #Remove the NAs so we only have the timestamps associated with the current - #storm - prevStorm <- prevStorm[!is.na(prevStorm$flow),] - #If the end point of the storm is in the previous storm, skip this - #iteration. This storm has already been accounted for. - if(storm[2] %in% prevStorm$timestamp){ - #Skip to next iteration of for loop: - next - } - } - - #If the second minimum flow is practically equal to the next local maximum, - #combine with the next storm as they are likely of the same storm (of - #course, this assumes that the timestamps match up which they should in the - #current set-up regardless of allMinimaStorms). But only do this if - #allMinimaStorms is TRUE since otherwise baselineflow is the cut-off. - if(allMinimaStorms & !is.na(x[i + 1 + endIndex])){ - #Initial guess at storm endpoints e.g. two local minimums - nextStorm <- c(x[i + 1], x[i + 1 + endIndex]) - #initial stormflows and the times associated with those flows: - nextStormflow <- inflow[timeIn >= nextStorm[1] & timeIn <= nextStorm[2]] - #What is the maximum of this storm event? - nextMaxStormFlow <- max(nextStormflow) - - while(!is.na(nextMaxStormFlow) & - stormflow[length(stormflow)] >= 0.8 * nextMaxStormFlow){ - endIndex <- endIndex + 1 - #Initial guess at storm endpoints e.g. two local minimums - storm <- c(x[i], x[i + endIndex]) - #initial stormflows and the times associated with those flows: - stormflow <- inflow[timeIn >= x[i] & timeIn <= x[i + endIndex]] - - if(!is.na(x[i + 1 + endIndex])){ - #Initial guess at storm endpoints e.g. two local minimums - nextStorm <- c(x[i + endIndex], x[i + 1 + endIndex]) - #initial stormflows and the times associated with those flows: - nextStormflow <- inflow[timeIn >= nextStorm[1] & timeIn <= nextStorm[2]] - #What is the maximum of this storm event? - nextMaxStormFlow <- max(nextStormflow) - }else{ - nextMaxStormFlow <- NA - } - } - } - - #Get the times associated with the storm - stormtimes <- timeIn[timeIn >= x[i] & timeIn <= x[i + endIndex]] - - #When does the maximum flow associated with this storm occur? - maxtime <- stormtimes[stormflow == max(stormflow)][1] - - #If there is a point at baseflow before next minimum, use it instead to - #prevent over elongated tails. We just need to ensure it takes place before - #the next minima, is over brk, and occurs after maxtime - endAlt <- (baseQ$timestamp[baseQ$flow < baseQ$baselineQ & - baseQ$timestamp > x[i] & - baseQ$timestamp < x[i + endIndex] & - baseQ$timestamp > maxtime])[1] - #If an alternate endpoint for the storm was found, redefine the storm: - if (!is.na(endAlt)){ - storm <- c(x[i],endAlt) - stormflow <- inflow[timeIn >= x[i] & timeIn <= endAlt] - stormtimes <- timeIn[timeIn >= x[i] & timeIn <= endAlt] - } - #data frame of storm data - stormdat <- data.frame( - timestamp = stormtimes, - baseQ = baseQ$baseQ[baseQ$timestamp >= storm[1] & baseQ$timestamp <= storm[2]], - flow = stormflow, - baselineQ = baseQ$baselineQ[baseQ$timestamp >= storm[1] & baseQ$timestamp <= storm[2]] - ) - - #data frame of whole time series - store <- data.frame(timestamp = baseQ$timestamp, flow = NA,baseflow = NA) - #Fills in only flow data during storm, leaving rest as NA - store$flow[store$timestamp >= storm[1] & - store$timestamp <= storm[2]] <- stormdat$flow - store$baseflow[store$timestamp >= storm[1] & - store$timestamp <= storm[2]] <- stormdat$baseQ - store$baselineflow[store$timestamp >= storm[1] & - store$timestamp <= storm[2]] <- stormdat$baselineQ - - #If maximum exceeds limit, add it to the stormsep list: - if(any(store$flow > (2.0 * store$baselineflow),na.rm = TRUE)){ - #Set the storm number in baseQ. In case of overlapping end/start points, - #store both IDs. - startID <- baseQ$stormID[baseQ$timestamp == storm[1]] - if(!is.na(startID)){ - #If the stormID in baseQ is already entered for the start date, it - #overlaps with a previous storm since both use that local minima. Store - #both IDs. - baseQ$stormID[baseQ$timestamp == storm[1]] <- paste0(startID,",",length(stormsep) + 1) - baseQ$stormID[baseQ$timestamp > storm[1] & baseQ$timestamp <= storm[2]] <- length(stormsep) + 1 - }else{ - #If there is no overlap, enter the storm ID for all storm timestamps - baseQ$stormID[baseQ$timestamp >= storm[1] & baseQ$timestamp <= storm[2]] <- length(stormsep) + 1 - } - - stormsep[[length(stormsep) + 1]] <- store - } - } - - #Now plot each storm and fit exponential curves to rising and falling limbs - #Store coefficients and statistics for each curve into a data frame, looking - #at shape of curve and the adjusted R square values. Store each storm as a PNG - #graph in the designated area. Need to prevent errors from zero flow. Added - #0.0001 to all flow values. This WILL RESULT IN BIAS - ext <- ".png" - #Empty data frame to store statistics - transients <- data.frame(startDate=character(length(stormsep)), - endDate = NA, maxDate = NA, - rising = NA, RsqR = NA, falling = NA, RsqF = NA, - durAll = NA,durF = NA,durR = NA, - volumeTotalMG = NA, - volumeAboveBaseQMG = NA, - volumeAboveBaselineQMG = NA, - volumeTotalMG_rise = NA, - volumeAboveBaseQMG_rise = NA, - volumeAboveBaselineQMG_rise = NA, - volumeTotalMG_fall = NA, - volumeAboveBaseQMG_fall = NA, - volumeAboveBaselineQMG_fall = NA) - for (i in 1:length(stormsep)){ - #Find the storm of interest - storm <- stormsep[[i]] - #remove nas: - storm <- storm[!is.na(storm[,2]),] - #Look for where the max is - maxtime <- storm[storm[,2] == max(storm[,2]),1][1] - - #Store the start and end time of the storm - transients$startDate[i] <- format(storm$timestamp[1],"%Y-%m-%d") - transients$endDate[i] <- format(storm$timestamp[nrow(storm)],"%Y-%m-%d") - transients$maxDate[i] <- format(storm[storm[,2] == max(storm[,2]),1][1],"%Y-%m-%d") - - #Separate rising and falling limbs based on maxtime e.g. the rising limb is - #all values leading up to maxtime - rising <- storm[storm[,1] <= maxtime,] - falling <- storm[storm[,1] >= maxtime,] - - #What is the volume of the storm streamflow i.e. total volume? First, get - #the difference in timestamps throughout the storm from one time to the - #next: - timeDiff <- difftime(storm$timestamp[2:nrow(storm)], storm$timestamp[1:(nrow(storm) - 1)], - units = "secs") - timeDiff <- as.numeric(timeDiff) - #Repeat for rising and falling limbs only: - #Rising: - timeDiff_rise <- difftime(rising$timestamp[2:nrow(rising)], rising$timestamp[1:(nrow(rising) - 1)], - units = "secs") - timeDiff_rise <- as.numeric(timeDiff_rise) - #Falling: - timeDiff_fall <- difftime(falling$timestamp[2:nrow(falling)], falling$timestamp[1:(nrow(falling) - 1)], - units = "secs") - timeDiff_fall <- as.numeric(timeDiff_fall) - #Using a trapezoidal approach, get the area of each trapezoid to estimate - #volume of the storm. Use three approaches. Total storm volume, volume above - #baseflow, volume above baseline flow - #Total storm flow: - trapzArea <- function(timeDiff,stormFlow){ - out <- timeDiff * ((stormFlow[1:(length(stormFlow) - 1)] + stormFlow[2:length(stormFlow)]) / 2) - } - trapz_total <- trapzArea(timeDiff,storm$flow) - #Only flow above baseflow: - trapz_abovebaseQ <- trapzArea(timeDiff,(storm$flow - storm$baseflow)) - #Only flow above baseline flow. THIS CAN BE NEGATIVE AND THEREFORE - #UNRELIABLE?: - trapz_abovebaselineQ <- trapzArea(timeDiff,(storm$flow - storm$baselineflow)) - - #Rising/falling storm flow: - trapz_total_rise <- trapzArea(timeDiff_rise,rising$flow) - trapz_total_fall <- trapzArea(timeDiff_fall,falling$flow) - #Only flow above baseflow: - trapz_abovebaseQ_rise <- trapzArea(timeDiff_rise,(rising$flow - rising$baseflow)) - trapz_abovebaseQ_fall <- trapzArea(timeDiff_fall,(falling$flow - falling$baseflow)) - #Only flow above baseline flow. THIS CAN BE NEGATIVE AND THEREFORE - #UNRELIABLE?: - trapz_abovebaselineQ_rise <- trapzArea(timeDiff_rise,(rising$flow - rising$baselineflow)) - trapz_abovebaselineQ_fall <- trapzArea(timeDiff_fall,(falling$flow - falling$baselineflow)) - - #Total volume is the sum of area of the trapezoids found above converted to - #MG from CF (1 CF * 12^3 in^3/cf * 231 gal/in^3 * 1 MG/1000000 gal): - volume_total <- sum(trapz_total) * 12 * 12 * 12 / 231 / 1000000 - volume_abovebaseQ <- sum(trapz_abovebaseQ) * 12 * 12 * 12 / 231 / 1000000 - volume_abovebaselineQ <- sum(trapz_abovebaselineQ) * 12 * 12 * 12 / 231 / 1000000 - #Rising Limb - volume_total_rise <- sum(trapz_total_rise) * 12 * 12 * 12 / 231 / 1000000 - volume_abovebaseQ_rise <- sum(trapz_abovebaseQ_rise) * 12 * 12 * 12 / 231 / 1000000 - volume_abovebaselineQ_rise <- sum(trapz_abovebaselineQ_rise) * 12 * 12 * 12 / 231 / 1000000 - #Falling limb: - volume_total_fall <- sum(trapz_total_fall) * 12 * 12 * 12 / 231 / 1000000 - volume_abovebaseQ_fall <- sum(trapz_abovebaseQ_fall) * 12 * 12 * 12 / 231 / 1000000 - volume_abovebaselineQ_fall <- sum(trapz_abovebaselineQ_fall) * 12 * 12 * 12 / 231 / 1000000 - #Store results: - transients$volumeTotalMG[i] <- volume_total - transients$volumeAboveBaseQMG[i] <- volume_abovebaseQ - transients$volumeAboveBaselineQMG[i] <- volume_abovebaselineQ - transients$volumeTotalMG_rise[i] <- volume_total_rise - transients$volumeAboveBaseQMG_rise[i] <- volume_abovebaseQ_rise - transients$volumeAboveBaselineQMG_rise[i] <- volume_abovebaselineQ_rise - transients$volumeTotalMG_fall[i] <- volume_total_fall - transients$volumeAboveBaseQMG_fall[i] <- volume_abovebaseQ_fall - transients$volumeAboveBaselineQMG_fall[i] <- volume_abovebaselineQ_fall - - #Create an exponential regression for the rising limb to roughly fit the - #rising limb based on an "ideal" hydrograph - modelR <- lm(log(rising[,2] + 0.0001) ~ seq(1,length(rising[,1]))) - #Store exponential coefficient and adjusted r squared values - transients$rising[i] <- summary(modelR)$coefficients[2] - transients$RsqR[i] <- summary(modelR)$adj.r.squared - - #Create an exponential regression for the falling limb - modelF <- lm(log(falling[,2] + 0.0001) ~ seq(1,length(falling[,1]))) - transients$falling[i] <- summary(modelF)$coefficients[2] - transients$RsqF[i] <- summary(modelF)$adj.r.squared - - #Finds duration of the storm, rising and falling limbs combined - transients$durAll[i] <- length(storm$timestamp) - #Finds duration of the rising limb - transients$durF[i] <- length(rising$timestamp) - #Finds duration of the falling limb - transients$durR[i] <- length(falling$timestamp) - - #Plot the storm and the fitted rising and falling limbs and store them in - #designated path. Include the baseflow and and the baseline flow brk - if(plt == T){ - #Set plot output path and dimensions - png(paste0(path,"storm",i,ext), width=1820,height=760) - #Set plot margines - par(mar=c(5,6,2,4)) - #Plot the storm, making the labels a little thicker and the lines of the - #plot and labeling the axes - plot(storm[,1], storm[,2], type='l', - xlab='Date', ylab='Flow (cfs)', - lwd=2, cex.axis=2, cex.lab=2) - #Plot the fitted rising limb: - lines(storm[storm[,1] <= maxtime,1], - exp(fitted(modelR)), - col = 'darkblue',lwd = 2) - #Plot the fitted falling limb: - lines(storm[storm[,1] >= maxtime,1], - exp(fitted(modelF)), - col = 'darkred', lwd = 2) - #Plot the baseflow - lines(storm[,1], storm[,3], - col = "darkgreen", lwd = 2) - #Plot the baseline flow brk as a dashed line via lty = 3 - lines(storm[,1], storm[,4],lty = 3,lwd = 2) - #Put a small legend on the plot - legend("topleft",c("Flow","Baseflow","Rise Limb","Fall Limb","Baseline"), - col = c("black","darkgreen","darkblue","darkred","black"), - lty = c(1,1,1,1,3), - bty = "n") - #Close the plot PNG and output the file - dev.off() - } - } - #rm(maxtime,storm,rising,falling,modelR,modelF,i,ext,fxn_locations) - out <- list(Storms = stormsep,Stats = transients, - flowData = baseQ) - return(out) -} - -#Below function hreg (horizontal regression) will try to fit mulitple -#horizontal lines through subset of data involving every point below that line -#until best fit is found. This becomes baseline flow, brk -hreg <- function(x, limit = 1){ - #What is the numeric percentile of x that is of percent limit? - lim <- as.numeric(quantile(x,limit)) - #Give all of x below the percentile of limit: - x <- x[x <= lim] - #Get all percentiles of x from 0 to 100% by 0.1%: - lns <- as.numeric(quantile(x, seq(0,1,0.001))) - #Keep only values above 0 - lns <- lns[lns != 0] - #A vector for mean square error - mse <- numeric(length(lns)) - #For each values in lns, determine the mean square error if this is a - #horizontal regression of the data in x - for (i in 1:length(lns)){ - line <- lns[i] - mse[i] <- mean((x - line)^2) - } - #Return the percentile x that created the minimum least square error: - return(lns[which.min(mse)]) -} - -# pathOut <- paste0(path,"storm",i,ext) - - -# outTest <- stormSeparate(timeIn, inflow, -# plt = FALSE,path = paste0(getwd(),"/"), -# allMinimaStorms = TRUE, -# baselineFlowOption = "Month" -# ) -# length(outTest$Storms)