From ca61c3ea033f5d7354ba362b1796e791d795cd1b Mon Sep 17 00:00:00 2001
From: Ilona Hughes <ilonah22@vt.edu>
Date: Thu, 11 Jul 2024 16:24:53 -0400
Subject: [PATCH 1/8] Daily and weekly drafts

---
 HARP-2024-2025/hydroimport_daily.R      | 49 ++++++++++++++++++++
 HARP-2024-2025/make_weekly_summary_ts.R | 61 +++++++++++++++++++++++++
 2 files changed, 110 insertions(+)
 create mode 100644 HARP-2024-2025/hydroimport_daily.R
 create mode 100644 HARP-2024-2025/make_weekly_summary_ts.R

diff --git a/HARP-2024-2025/hydroimport_daily.R b/HARP-2024-2025/hydroimport_daily.R
new file mode 100644
index 00000000..f626c3a5
--- /dev/null
+++ b/HARP-2024-2025/hydroimport_daily.R
@@ -0,0 +1,49 @@
+# Inputs (args):
+# 1 = File path of csv from VA Hydro
+# 2 = Data source "nldas2, daymet, prism"
+# 3 = End path of new csv
+# Outputs:
+# Csv file with manipulated data at end filepath
+
+# Library necessary packages
+print("Accessing necessary libraries")
+suppressPackageStartupMessages(library(lubridate))
+suppressPackageStartupMessages(library(sqldf))
+
+# Set up command args
+print("Reading command args")
+args <- commandArgs(trailingOnly = TRUE)
+if (length(args) != 3){
+  message("Missing or extra inputs. Usage: Rscript hydroimport_daily.R input_path data_source output_path")
+  q()
+}
+
+input_path <- args[1]
+data_source <- args[2]
+output_path <- args[3]
+
+# Pull csv from input file path
+print("Reading csv")
+hydro_daily <- read.csv(input_path)
+# Add in more date information
+print("Adding date information")
+hydro_daily[,c('yr', 'mo', 'da', 'wk')] <- cbind(year(as.Date(hydro_daily$obs_date)),
+                                                 month(as.Date(hydro_daily$obs_date)),
+                                                 day(as.Date(hydro_daily$obs_date)),
+                                                 week(as.Date(hydro_daily$obs_date)))
+
+# If data comes from nladas2 (hourly), it must be converted into daily data
+print("Checking for nldas2")
+if (data_source=="nldas2"){
+  hydro_daily <- sqldf(
+    "select featureid, min(obs_date) as obs_date, yr, mo, da, 
+     sum(precip_mm) as precip_mm, sum(precip_in) as precip_in
+   from hydro_daily 
+   group by yr, mo, da
+   order by yr, mo, da
+  "
+  )}
+
+# Write csv in new file path
+print("Writing csv")
+write.csv(hydro_daily,file = output_path)
\ No newline at end of file
diff --git a/HARP-2024-2025/make_weekly_summary_ts.R b/HARP-2024-2025/make_weekly_summary_ts.R
new file mode 100644
index 00000000..56930201
--- /dev/null
+++ b/HARP-2024-2025/make_weekly_summary_ts.R
@@ -0,0 +1,61 @@
+# Inputs (args): 
+# 1 = comp data csv filepath
+# 2 = Output csv path
+# 3 = data column
+
+# Libraries
+suppressPackageStartupMessages(library(lubridate))
+suppressPackageStartupMessages(library(sqldf))
+
+
+args <- commandArgs(trailingOnly = T)
+
+if (length(args) != 3){
+  message("Missing or extra inputs. Usage: Rscript make_weekly_summary_ts.R comp_data_filepath output_filepath data_column ")
+}
+# Missing from github issue:[weekly_column(default=weekly_mean_value)]
+print("Assigning arguments")
+comp_data_filepath <- args[1]
+output_filepath <- args[2]
+data_column <- args[3]
+
+print("Reading in comp data csv")
+comp_data <- read.csv(comp_data_filepath)
+  
+# Check for necessary data (Comp data doesnt have start and end date?)
+print("Checking columns")
+#if ("start_date" %in% colnames(comp_data)){}else{
+ # message("start_date column missing")
+ # q()
+#}
+
+#if ("end_date" %in% colnames(comp_data)){}else{
+  #message("end_date column missing")
+  #q()
+#}
+
+if (data_column %in% colnames(comp_data)){}else{
+  message(data_column, "column missing")
+  q()
+}
+
+
+#converts our daily data into weekly
+print("Converting to weekly data")
+  week_data <- sqldf(
+    "select min(obs_date) as week_begin, yr, wk, min(dataset_day) as dataset_day_begin,
+     avg(dataset_p_in) as dataset_p_in, avg(dataset_cfs) as dataset_cfs,
+     avg(usgs_cfs) as usgs_cfs, avg(today_d_cfs) as today_d_cfs, 
+     avg(nextday_d_cfs) as nextday_d_cfs
+   from comp_data
+   group by yr, wk
+   order by yr, wk
+  "
+  )
+  #Base the month column off of the min day oindex for that week, or the day that
+  #week began
+  week_data$mo <- month(week_data$week_begin)
+  
+  
+  write.csv(week_data,output_filepath)
+  

From a74051ea5742b69f75182c90c94509a83dd7d7d2 Mon Sep 17 00:00:00 2001
From: Ilona Hughes <ilonah22@vt.edu>
Date: Mon, 9 Sep 2024 13:25:49 -0400
Subject: [PATCH 2/8] Data source analysis

---
 HARP-2024-2025/DataSourceEvaluations.R | 147 +++++++++
 HARP-2024-2025/StormResults_Analysis.R | 394 +++++++++++++++++++++++++
 2 files changed, 541 insertions(+)
 create mode 100644 HARP-2024-2025/DataSourceEvaluations.R
 create mode 100644 HARP-2024-2025/StormResults_Analysis.R

diff --git a/HARP-2024-2025/DataSourceEvaluations.R b/HARP-2024-2025/DataSourceEvaluations.R
new file mode 100644
index 00000000..14c0c02f
--- /dev/null
+++ b/HARP-2024-2025/DataSourceEvaluations.R
@@ -0,0 +1,147 @@
+# Load in libraries
+library(sqldf)
+library(ggplot2)
+library(tidyverse)
+
+
+################################################################################
+# PRISM List of files
+
+list_of_prism <- list.files(path="C:/Users/ilona/OneDrive - Virginia Tech/HARP/Storm Tests/Ratings/out-prism",
+                            recursive = TRUE,
+                            pattern = "\\.csv$",
+                            full.names = TRUE)
+prism_list <- read_csv(list_of_prism, id="gageid")
+
+prism_list$gageid <- grep("0", unlist(strsplit(prism_list$gageid, "_")), value = TRUE, perl = TRUE)
+prism_list$gageid <- grep("0", unlist(strsplit(prism_list$gageid, "-")), value = TRUE, perl = TRUE)
+prism_list$...1 <- NULL
+
+prism_gages <- as.list(unique(prism_list$gageid))
+
+################################################################################
+# DAYMET List of files
+
+list_of_daymet <- list.files(path="C:/Users/ilona/OneDrive - Virginia Tech/HARP/Storm Tests/Ratings/out-daymet/out",
+                            recursive = TRUE,
+                            pattern = "\\.csv$",
+                            full.names = TRUE)
+daymet_list <- read_csv(list_of_daymet, id="gageid")
+
+daymet_list$gageid <- grep("0", unlist(strsplit(daymet_list$gageid, "_")), value = TRUE, perl = TRUE)
+daymet_list$gageid <- grep("0", unlist(strsplit(daymet_list$gageid, "-")), value = TRUE, perl = TRUE)
+daymet_list$...1 <- NULL
+
+daymet_gages <- as.list(unique(daymet_list$gageid))
+
+################################################################################
+# NLDAS2 List of Files
+
+list_of_nldas2 <- list.files(path="C:/Users/ilona/OneDrive - Virginia Tech/HARP/Storm Tests/Ratings/out-daymet/out",
+                             recursive = TRUE,
+                             pattern = "\\.csv$",
+                             full.names = TRUE)
+nldas2_list <- read_csv(list_of_nldas2, id="gageid")
+
+nldas2_list$gageid <- grep("0", unlist(strsplit(nldas2_list$gageid, "_")), value = TRUE, perl = TRUE)
+nldas2_list$gageid <- grep("0", unlist(strsplit(nldas2_list$gageid, "-")), value = TRUE, perl = TRUE)
+nldas2_list$...1 <- NULL
+
+nldas2_gages <- as.list(unique(nldas2_list$gageid))
+
+################################################################################
+# Find gages which ran for each data source
+
+complete_gages <- as.list(intersect(prism_gages,daymet_gages))
+complete_gages <- as.list(as.numeric(intersect(complete_gages,nldas2_gages)))
+
+################################################################################
+# 
+for(j in c("daymet","prism","nldas")){
+  print(j)
+  #Set path to read in ratings based on data source of outer loop
+  pathToread <- paste0("C:/Users/ilona/OneDrive - Virginia Tech/HARP/Storm Tests/Ratings/",j,"/")
+  for( i in list.files(pathToread) ){
+    filei <- read.csv(paste0(pathToread,i))
+    #Store the gage number
+    filei$gage <- gsub(".*_(\\d+)-.*","\\1",i)
+    #Combine the file into a single data frame
+    if(!exists("combineFile")){
+      combineFile <- filei
+    }else{
+      combineFile <- rbind(combineFile,filei)
+    }
+  }
+  #Assign to a specific variable and delete the generic combineFile
+  assign(paste0('combineFile',j),combineFile)
+  rm(combineFile)
+}
+
+#Join the daymet and prism data together
+joinData <- combineFileprism %>% 
+  #Remove the row numbers (not important) and rename the r_squared for clarity
+  select(-X,prismRating = r_squared) %>% 
+  #Join in the dayment data, but first rename the r_squared column for clarity.
+  #Join on gage and month
+  left_join(combineFiledaymet %>% 
+              select(daymetRating = r_squared,gage,mo),
+            by = c("gage","mo")) %>% 
+  #Join in the nldas data, but first rename the r_squared column for clarity.
+  #Join on gage and month
+  left_join(combineFilenldas %>% 
+              select(nldasRating = r_squared,gage,mo),
+            by = c("gage","mo")) %>% 
+  #Pivot it longer to have a column with the data source and one for the
+  #ratings, for plotting ease
+  pivot_longer(c(prismRating,daymetRating,nldasRating),
+               names_to = 'dataSource',
+               values_to = 'rating')
+
+
+
+################################################################################
+# Import and add drainage are from USGS
+drainage_area <- dataRetrieval::readNWISsite(c(unique(joinData$gage)))
+joinData <- sqldf(
+  "select a.mo as mo, a.gage as gageid, a.dataSource as dataSource, a.rating as rating,
+  b.drain_area_va as drainageArea_sqmi
+  from joinData as a
+  left outer join drainage_area as b
+  on (
+   a.gage = b.site_no
+  )"
+)
+
+joinData$dataSource <- gsub("Rating","",joinData$dataSource)
+
+
+################################################################################
+# Trends in DA and ratings
+
+ggplot(data=joinData)+
+  aes(y=mean(joinData$rating, group_by(gageid)),x=joinData$drainageArea_sqmi)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/HARP-2024-2025/StormResults_Analysis.R b/HARP-2024-2025/StormResults_Analysis.R
new file mode 100644
index 00000000..95b5f5ad
--- /dev/null
+++ b/HARP-2024-2025/StormResults_Analysis.R
@@ -0,0 +1,394 @@
+# Key for Gages:
+# 01620500 = North River near Stokesville
+# 01634500 = Cedar Creek near Winchester
+# 01669000 = Piscataway Creek near Tappahannock
+# 02021500 = Maury River at Rockbridge Falls
+# 02031000 = Mechums River near White Hall
+# 02044500 = Nottoway River near Rawlings
+# 03176500 = New River at Glen Lyn
+# 03165000 = Chesnut Creek at Galax
+# 02054530 = Roanoke River at Glenvar
+# 02034000 = Rivanna River at Palmyra
+
+# sql, ggplot2 
+library(sqldf)
+library(ggplot2)
+library(tidyverse)
+
+##############################################################################
+# STORM FILE DATA COMPILATION
+
+list_of_files <- list.files(path="C:/Users/ilona/OneDrive - Virginia Tech/HARP/Storm Tests/Ratings/out",
+                            recursive = TRUE,
+                            pattern = "\\.csv$",
+                            full.names = TRUE)
+df <- read_csv(list_of_files, id="gageid")
+
+df$gageid <- grep("0", unlist(strsplit(df$gageid, "_")), value = TRUE, perl = TRUE)
+df$gageid <- grep("0", unlist(strsplit(df$gageid, "-")), value = TRUE, perl = TRUE)
+df$...1 <- NULL
+
+df$season <- ifelse (df$mo %in% c(6:8), 
+                               "SUMMER",
+                               ifelse (df$mo %in% c(9:11), 
+                                       "AUTUMN",
+                                       ifelse (df$mo %in% c(12,1,2), 
+                                               "WINTER",
+                                               ifelse (df$mo %in% c(3:5), 
+                                                       "SPRING", NA))))
+
+##############################################################################
+# MONTHLY AND SEASONAL R-SQUARED PLOTS
+
+ggplot(data = df)+
+  aes(y = season, x=r_squared, color=season)+
+  geom_boxplot()+
+  xlim(0,1)+
+  theme_bw()+
+  xlab("R-Squared")+
+  ylab("Season")+
+  ggtitle("Seasonal R-Squared Values ()")+
+  theme(plot.title = element_text(hjust = 0.5), legend.position = "none")
+
+
+ggplot(data = df)+
+  aes(y = mo, x=r_squared)+
+  geom_boxplot(mapping = aes(group = mo))+
+  xlim(0,1)+
+  theme_bw()+
+  xlab("R-Squared")+
+  ylab("Month")+
+  ggtitle("Monthly R-Squared Values ()")+
+  theme(plot.title = element_text(hjust = 0.5), legend.position = "none")+
+  scale_y_continuous(breaks = seq(0,12,1))
+
+
+############################################################################
+# Highest and lowest avg R-squared
+
+gage_mean<- aggregate(x= df$r_squared,
+                       # Specify group indicator
+                       by = list(df$gageid),      
+                       # Specify function (i.e. mean)
+                       FUN = mean, 
+                       na.rm = TRUE )
+print(gage_mean)
+
+ggplot(data = gage_mean,
+       mapping = aes(x=x))+
+  geom_density(color="dodgerblue3")+
+  theme_bw()+
+  ylab("Density")+
+  xlab("R-Squared")+
+  ggtitle("Mean R-Squared Value Density for each Gage")+
+  theme(plot.title = element_text(hjust = 0.5))+
+  xlim(0,1)+
+  geom_vline(xintercept = mean(gage_mean$x), lwd=0.5,colour="purple4",linetype = "dashed")
+
+ggplot(data = df,
+       mapping = aes(x=r_squared))+
+  geom_density(color="indianred4")+
+  theme_bw()+
+  ylab("Density")+
+  xlab("R-Squared")+
+  ggtitle("Total R-Squared Value Density")+
+  theme(plot.title = element_text(hjust = 0.5))+
+  xlim(0,1)+
+  geom_vline(xintercept = mean(df$r_squared, na.rm = TRUE), lwd=0.5,colour="deeppink3",linetype = "dashed")
+
+
+##############################################################################
+# Highest R-squared Gage analysis
+
+high_data <- read.csv("C:/Users/ilona/OneDrive - Virginia Tech/HARP/Storm Tests/Ratings/out/usgs_ws_01652500-PRISM-storm_volume-ratings.csv")
+
+ggplot(data = high_data,
+       mapping = aes(x=mo,y=r_squared))+
+  geom_line(color="magenta4")+
+  theme_bw()+
+  ylim(0,1)+
+  xlab("Month")+
+  ylab("R-Squared")+
+  ggtitle("Monthly R-Squared, Highest Mean")+
+  theme(plot.title = element_text(hjust = 0.5))
+
+
+#############################################################################
+# Lowest R-squared Gage analysis
+
+low_data <- read.csv("C:/Users/ilona/OneDrive - Virginia Tech/HARP/Storm Tests/Ratings/out/usgs_ws_01671025-PRISM-storm_volume-ratings.csv")
+
+ggplot(data = low_data,
+       mapping = aes(x=mo,y=r_squared))+
+  geom_line(color="darkolivegreen3")+
+  theme_bw()+
+  ylim(-0.1,1)+
+  xlab("Month")+
+  ylab("R-Squared")+
+  ggtitle("Monthly R-Squared, Lowest Mean")+
+  theme(plot.title = element_text(hjust = 0.5))
+
+#############################################################################
+
+precip <- read.csv("C:/Users/ilona/OneDrive - Virginia Tech/HARP/Storm Tests/Peak Diff/usgs_ws_02024000-PRISM-daily.csv")
+stormflow <- read.csv("C:/Users/ilona/OneDrive - Virginia Tech/HARP/Storm Tests/Peak Diff/usgs_ws_02024000-stormevent-flow.csv")
+
+precip$obs_date <- as_date(precip$obs_date)
+stormflow$timestamp <- as_date(stormflow$timestamp)
+
+stormflow <- subset(stormflow, stormflow$timestamp >= min(precip$obs_date))
+stormflow <- subset(stormflow, stormflow$timestamp <= max(precip$obs_date))
+
+compdata <- sqldf(
+  "select a.obs_date, a.precip_in,
+  b.flow, b.stormID
+  from precip as a
+  left outer join stormflow as b
+  on (
+    a.yr = b.year
+    and a.obs_date = b.timestamp
+  )"
+)
+
+
+
+############################################################################
+# Plot Ratings
+ggplot(data=comp_ratings,
+       mapping = aes(x=mo))+
+  geom_line(aes(y=r_01620500), color="darkred")+
+  geom_line(aes(y=r_01629500), color="red3")+
+  geom_line(aes(y=r_01634500), color="firebrick1")+
+  geom_line(aes(y=r_01643700), color="orangered1")+
+  geom_line(aes(y=r_01662800), color="darkorange1")+
+  geom_line(aes(y=r_01669000), color="orange1")+
+  geom_line(aes(y=r_01669520), color="goldenrod1")+
+  geom_line(aes(y=r_02018500), color="yellow2")+
+  geom_line(aes(y=r_02021500), color="yellowgreen")+
+  geom_line(aes(y=r_02024000), color="palegreen3")+
+  geom_line(aes(y=r_02026000), color="darkslategray3")+
+  geom_line(aes(y=r_02029000), color="deepskyblue3")+
+  geom_line(aes(y=r_02031000), color="dodgerblue4")+
+  geom_line(aes(y=r_02032250), color="darkslateblue")+
+  geom_line(aes(y=r_02034000), color="purple4")+
+  geom_line(aes(y=r_02033500), color="darkorchid4")+
+  geom_line(aes(y=r_02051000), color="darkviolet")+
+  geom_line(aes(y=r_02054530), color="darkmagenta")+
+  geom_line(aes(y=r_03165000), color="deeppink3")+
+  geom_line(aes(y=r_03176500), color="hotpink")+
+  theme_bw()+
+  xlab("Month")+
+  ylab("R-Squared")+
+  ggtitle("Monthly R-Squared")+
+  theme(plot.title = element_text(hjust = 0.5))+
+  ylim(0,1)
+
+
+
+ggplot(data=comp_ratings,
+       mapping = aes(x=mo))+
+  geom_line(aes(y=mean(r_01620500)), color="darkred")+
+  geom_line(aes(y=mean(r_01629500)), color="red3")+
+  geom_line(aes(y=mean(r_01634500)), color="firebrick1")+
+  geom_line(aes(y=mean(r_01643700)), color="orangered1")+
+  geom_line(aes(y=mean(r_01662800)), color="darkorange1")+
+  geom_line(aes(y=mean(r_01669000)), color="orange1")+
+  geom_line(aes(y=mean(r_01669520)), color="goldenrod1")+
+  geom_line(aes(y=mean(r_02018500)), color="yellow2")+
+  geom_line(aes(y=mean(r_02021500)), color="yellowgreen")+
+  geom_line(aes(y=mean(r_02024000)), color="palegreen3")+
+  geom_line(aes(y=mean(r_02026000)), color="darkslategray3")+
+  geom_line(aes(y=mean(r_02029000)), color="deepskyblue3")+
+  geom_line(aes(y=mean(r_02031000)), color="dodgerblue4")+
+  geom_line(aes(y=mean(r_02032250)), color="darkslateblue")+
+  geom_line(aes(y=mean(r_02034000)), color="purple4")+
+  geom_line(aes(y=mean(r_02033500)), color="darkorchid4")+
+  geom_line(aes(y=mean(r_02051000)), color="darkviolet")+
+  geom_line(aes(y=mean(r_02054530)), color="darkmagenta")+
+  geom_line(aes(y=mean(r_03165000)), color="deeppink3")+
+  geom_line(aes(y=mean(r_03176500)), color="hotpink")+
+  theme_bw()+
+  xlab("Month")+
+  ylab("R-Squared")+
+  ggtitle("Monthly R-Squared")+
+  theme(plot.title = element_text(hjust = 0.5))+
+  ylim(0.25,0.75)
+
+
+ggplot(data=comp_ratings,
+       mapping = aes(x=mo))+
+  geom_line(aes(y=mean(r_03176500)), color="hotpink")
+
+
+ggplot(data = comp_ratings,
+       mapping = aes(x=mo))+
+  geom_line(aes(y=r_01620500), color="red3")+
+  geom_line(aes(y=r_01634500), color="orange2")+
+  geom_line(aes(y=r_01669000), color="gold1")+
+  geom_line(aes(y=r_02021500), color="yellowgreen")+
+  geom_line(aes(y=r_02031000), color="darkturquoise")+
+  geom_line(aes(y=r_02034000), color="dodgerblue")+
+  geom_line(aes(y=r_02044500), color="dodgerblue4")+
+  geom_line(aes(y=r_02054530), color="purple3")+
+  geom_line(aes(y=r_03165000), color="orchid3")+
+  geom_line(aes(y=r_03176500), color="hotpink")+
+  theme_bw()+
+  xlab("Month")+
+  ylab("R-Squared")+
+  ggtitle("Monthly R-Squared")+
+  theme(plot.title = element_text(hjust = 0.5))+
+  geom_rect(aes(xmin = 6, xmax = 8, ymin = 0.025, ymax = 0.55),
+            fill="transparent", color="red", size=0.75)+
+  geom_rect(aes(xmin = 3, xmax = 5, ymin = 0.55, ymax = 0.85),
+            fill="transparent", color="red", size=0.75)+
+  geom_rect(aes(xmin = 9.25, xmax = 10.75, ymin = 0.4, ymax = 0.85),
+          fill="transparent", color="red", size=0.75)+
+  geom_rect(aes(xmin = 1.5, xmax = 2.5, ymin = 0.45, ymax = 0.85),
+            fill="transparent", color="red", size=0.75)+
+  ylim(0,1)
+
+
+# July Dip (> 5)
+ggplot(data = comp_ratings,
+       mapping = aes(x=mo))+
+  geom_line(aes(y=r_01620500), color="red3")+
+  geom_line(aes(y=r_01634500), color="orange2")+
+  geom_line(aes(y=r_02021500), color="yellowgreen")+
+  geom_line(aes(y=r_02044500), color="dodgerblue4")+
+  geom_line(aes(y=r_02054530), color="purple3")+
+  geom_line(aes(y=r_03165000), color="orchid3")+
+  theme_bw()+
+  xlab("Month")+
+  ylab("R-Squared")+
+  ggtitle("July Decrease in Monthly R-Squared")+
+  theme(plot.title = element_text(hjust = 0.5))+
+  xlim(5,9)+
+  ylim(0,1)
+
+# April Peak (not > 5)
+ggplot(data = comp_ratings,
+       mapping = aes(x=mo))+
+  geom_line(aes(y=r_02021500), color="yellowgreen")+
+  geom_line(aes(y=r_02034000), color="dodgerblue")+
+  geom_line(aes(y=r_02054530), color="purple3")+
+  geom_line(aes(y=r_03165000), color="orchid3")+
+  geom_line(aes(y=r_03176500), color="hotpink")+
+  theme_bw()+
+  xlab("Month")+
+  ylab("R-Squared")+
+  ggtitle("April Peak Monthly R-Squared")+
+  theme(plot.title = element_text(hjust = 0.5))+
+  xlim(2,6)+
+  ylim(0,1)
+
+
+# February Peak (not > 5)
+ggplot(data = comp_ratings,
+       mapping = aes(x=mo))+
+  geom_line(aes(y=r_01669000), color="gold1")+
+  geom_line(aes(y=r_02031000), color="darkturquoise")+
+  geom_line(aes(y=r_02054530), color="purple3")+
+  geom_line(aes(y=r_03176500), color="hotpink")+
+  theme_bw()+
+  xlab("Month")+
+  ylab("R-Squared")+
+  ggtitle("February Peak Monthly R-Squared")+
+  theme(plot.title = element_text(hjust = 0.5))+
+  xlim(1,4)+
+  ylim(0,1)
+
+
+# October Peak (not > 5)
+ggplot(data = comp_ratings,
+       mapping = aes(x=mo))+
+  geom_line(aes(y=r_01620500), color="red3")+
+  geom_line(aes(y=r_02021500), color="yellowgreen")+
+  geom_line(aes(y=r_02031000), color="darkturquoise")+
+  geom_line(aes(y=r_02044500), color="dodgerblue4")+
+  geom_line(aes(y=r_03165000), color="orchid3")+
+  theme_bw()+
+  xlab("Month")+
+  ylab("R-Squared")+
+  ggtitle("October Peak Monthly R-Squared")+
+  theme(plot.title = element_text(hjust = 0.5))+
+  xlim(8,12)+
+  ylim(0,1)
+
+
+# Data frame with monthly mean r-squared value
+monthly_mean <- data.frame(rowMeans(comp_ratings[,2:11]))
+monthly_mean$mo <- c(1:12)
+monthly_mean$season <- ifelse (monthly_mean$mo %in% c(6:8), 
+                        "SUMMER",
+                        ifelse (monthly_mean$mo %in% c(9:11), 
+                                "AUTUMN",
+                                ifelse (monthly_mean$mo %in% c(12,1,2), 
+                                        "WINTER",
+                                        ifelse (monthly_mean$mo %in% c(3:5), 
+                                                "SPRING", NA))))
+monthly_mean <- monthly_mean[, c(2,1)]
+colnames(monthly_mean)[2] <- "mean_r_squared"
+
+
+
+
+# Add season to comp data
+comp_ratings$season <- ifelse (comp_ratings$mo %in% c(6:8), 
+                               "SUMMER",
+                               ifelse (comp_ratings$mo %in% c(9:11), 
+                                       "AUTUMN",
+                                       ifelse (comp_ratings$mo %in% c(12,1,2), 
+                                               "WINTER",
+                                               ifelse (comp_ratings$mo %in% c(3:5), 
+                                                       "SPRING", NA))))
+
+library(ggbeeswarm)
+
+ggplot(data = pivot_longer(comp_ratings,2:21))+
+  aes(y = season, x=value, color=season)+
+  geom_boxplot()+
+  xlim(0,1)+
+  theme_bw()+
+  xlab("R-Squared")+
+  ylab("Season")+
+  ggtitle("Seasonal R-Squared Values")+
+  theme(plot.title = element_text(hjust = 0.5), legend.position = "none")
+
+
+ggplot(data = pivot_longer(comp_ratings,2:21))+
+  aes(y = mo, x=value)+
+  geom_boxplot(mapping = aes(group = mo))+
+  xlim(0,1)+
+  theme_bw()+
+  xlab("R-Squared")+
+  ylab("Month")+
+  ggtitle("Monthly R-Squared Values")+
+  theme(plot.title = element_text(hjust = 0.5), legend.position = "none")+
+  scale_y_continuous(breaks = seq(0,12,1))
+
+
+
+
+# Plot monthly average rsquared value
+ggplot(data = monthly_mean,
+       mapping = aes(x=mo))+
+  geom_line(aes(y=mean_r_squared), color="hotpink3")+
+  theme_bw()+
+  xlab("Month")+
+  ylab("Mean R-Squared")+
+  ggtitle("Monthly Average R-Squared Values")+
+  theme(plot.title = element_text(hjust = 0.5))+
+  ylim(0,1)
+
+
+
+
+
+
+
+
+
+
+
+
+

From b5b874e688cc0eca0689c8b4eafc3eb1597d5704 Mon Sep 17 00:00:00 2001
From: Nate Foley <nathanielf22@vt.edu>
Date: Mon, 7 Oct 2024 12:09:47 -0400
Subject: [PATCH 3/8] VA hydro analytics (unfinished)

---
 HARP-2024-2025/basic_analytics_for_vahydro.R | 184 +++++++++++++++++++
 HARP-2024-2025/storm_event_ratings.R         |  39 ++++
 HARP-2024-2025/using_basic_analytics.R       |  14 ++
 3 files changed, 237 insertions(+)
 create mode 100644 HARP-2024-2025/basic_analytics_for_vahydro.R
 create mode 100644 HARP-2024-2025/storm_event_ratings.R
 create mode 100644 HARP-2024-2025/using_basic_analytics.R

diff --git a/HARP-2024-2025/basic_analytics_for_vahydro.R b/HARP-2024-2025/basic_analytics_for_vahydro.R
new file mode 100644
index 00000000..f4782636
--- /dev/null
+++ b/HARP-2024-2025/basic_analytics_for_vahydro.R
@@ -0,0 +1,184 @@
+#Creates Summary analytic functions for workflow
+#Use for VA HYDRO metrics
+library(tidyr)
+library(sqldf)
+
+## For PRISM data
+
+prism_summary_analytics <- function(df){
+df <- 
+  separate (data = df, 
+            col = obs_date, 
+            into = c("obs_date","obs_time"),
+            sep = " ")
+df <- 
+  separate (data = df, 
+            col = obs_date, 
+            into = c("obs_year","obs_month","obs_day"), 
+            sep = "-")
+
+#creating a yearly summary with each year and its total precip
+yearly.summary <- 
+  sqldf(
+  "SELECT obs_year, SUM(precip_in) AS total_precip
+  FROM df
+  GROUP BY obs_year"
+)
+
+#summary analytics
+
+precip_annual_max_in <- 
+  max(yearly.summary$total_precip)
+
+precip_annual_max_year <- 
+  yearly.summary$obs_year[which.max(yearly.summary$total_precip)]
+
+precip_annual_mean_in <- 
+  mean(yearly.summary$total_precip)
+
+
+#For min values and years, we can exclude the last row since the current year 
+#is incomplete data
+
+precip_annual_min_in <- 
+  min(yearly.summary$total_precip[-nrow(yearly.summary)])
+precip_annual_min_year <- 
+  yearly.summary$obs_year[which.min
+                          (yearly.summary$total_precip[-nrow
+                                                       (yearly.summary)])]
+
+
+precip_daily_max_in <- max(df$precip_in)
+precip_hourly_max_in <- max(df$precip_in)/24 
+#precip_hourly_max_in done using interpolation (not super realistic). 
+#Alternatively, we could look at the rainfall distribution table for a 
+#type II storm and multiply by the max P(t)/P(24) value.
+
+#not sure about last metric, 190_precip_in. 
+print(c(precip_annual_max_in, precip_annual_max_year, precip_annual_mean_in,
+        precip_annual_min_in, precip_annual_min_year, precip_daily_max_in,
+        precip_hourly_max_in)) #move into dataframe with 7 columns and 1 row
+}
+
+# For Daymet
+
+daymet_summary_analytics <- function(df){
+  df <- 
+    separate (data = df, 
+              col = obs_date, 
+              into = c("obs_date","obs_time"),
+              sep = " ")
+  df <- 
+    separate (data = df, 
+              col = obs_date, 
+              into = c("obs_year","obs_month","obs_day"), 
+              sep = "-")
+  
+  #creating a yearly summary with each year and its total precip
+  yearly.summary <- 
+    sqldf(
+      "SELECT obs_year, SUM(precip_in) AS total_precip
+  FROM df
+  GROUP BY obs_year"
+    )
+  
+  #summary analytics
+  
+  precip_annual_max_in <- 
+    max(yearly.summary$total_precip)
+  
+  precip_annual_max_year <- 
+    yearly.summary$obs_year[which.max(yearly.summary$total_precip)]
+  
+  precip_annual_mean_in <- 
+    mean(yearly.summary$total_precip)
+  
+  
+  #For min values and years, we can exclude the last row since the current year 
+  #is incomplete data
+  
+  precip_annual_min_in <- 
+    min(yearly.summary$total_precip[-nrow(yearly.summary)])
+  precip_annual_min_year <- 
+    yearly.summary$obs_year[which.min
+                            (yearly.summary$total_precip[-nrow
+                                                         (yearly.summary)])]
+  
+  
+  precip_daily_max_in <- max(df$precip_in)
+  precip_hourly_max_in <- max(df$precip_in)/24 
+  #precip_hourly_max_in done using interpolation (not super realistic). 
+  #Alternatively, we could look at the rainfall distribution table for a 
+  #type II storm and multiply by the max P(t)/P(24) value.
+  
+  #not sure about last metric, 190_precip_in. 
+  print(c(precip_annual_max_in, precip_annual_max_year, precip_annual_mean_in,
+          precip_annual_min_in, precip_annual_min_year, precip_daily_max_in,
+          precip_hourly_max_in))
+}
+
+# For nldas2
+
+nldas2_summary_analytics <- function(df){
+  df <- 
+    separate (data = df, 
+              col = obs_date, 
+              into = c("obs_date","obs_time"),
+              sep = " ")
+  df <- 
+    separate (data = df, 
+              col = obs_date, 
+              into = c("obs_year","obs_month","obs_day"), 
+              sep = "-")
+  
+  #creating a yearly summary with each year and its total precip
+  yearly.summary <- 
+    sqldf(
+      "SELECT obs_year, SUM(precip_in) AS total_precip
+  FROM df
+  GROUP BY obs_year"
+    )
+  
+  #summary analytics
+  
+  precip_annual_max_in <- 
+    max(yearly.summary$total_precip)
+  
+  precip_annual_max_year <- 
+    yearly.summary$obs_year[which.max(yearly.summary$total_precip)]
+  
+  precip_annual_mean_in <- 
+    mean(yearly.summary$total_precip)
+  
+  
+  #For min values and years, we can exclude the last row since the current year 
+  #is incomplete data
+  
+  precip_annual_min_in <- 
+    min(yearly.summary$total_precip[c(-nrow(yearly.summary),-1)])
+  precip_annual_min_year <- 
+    yearly.summary$obs_year[which.min
+                            (yearly.summary$total_precip[c(-nrow
+                                                         (yearly.summary),-1)])]
+  daily.summary <- 
+    sqldf(
+      "SELECT obs_year, obs_month, obs_day, SUM(precip_in) AS total_precip
+  FROM df
+  GROUP BY obs_year, obs_month, obs_day"
+    )
+  
+  precip_daily_max_in <- max(daily.summary$total_precip[-1])
+  
+  precip_hourly_max_in <- max(df$precip_in)
+
+  
+  #not sure about last metric, 190_precip_in. 
+  print(c(precip_annual_max_in, precip_annual_max_year, precip_annual_mean_in,
+          precip_annual_min_in, precip_annual_min_year, precip_daily_max_in,
+          precip_hourly_max_in))
+}
+
+
+metric <- c("precip_annual_max_in", 'precip_annual_max_year', 'precip_annual_mean_in',
+            'precip_annual_min_in', 'precip_annual_min_year', 'precip_hourly_max_in',
+            'precip_daily_max_in')
\ No newline at end of file
diff --git a/HARP-2024-2025/storm_event_ratings.R b/HARP-2024-2025/storm_event_ratings.R
new file mode 100644
index 00000000..fd00f31a
--- /dev/null
+++ b/HARP-2024-2025/storm_event_ratings.R
@@ -0,0 +1,39 @@
+## Set working directory
+setwd("C:/Users/natef/OneDrive - Virginia Tech/HARP")
+
+#get packages
+library(ggplot2)
+library(sqldf)
+library(tidyr)
+
+#set varibales for stream gage id
+gage_id <- "02026000"
+
+
+#download stormeventflow and rating values
+stormflow <- read.csv(paste("http://deq1.bse.vt.edu:81/met/stormVol_nldas2/flow/usgs_ws_",gage_id,"-stormevent-flow.csv",sep = ""))
+ratings0 <- read.csv(paste("http://deq1.bse.vt.edu:81/met/nldas2/out/usgs_ws_",gage_id,"-nldas2-lm_simple-rating-ts.csv",sep = ""))
+
+#clean up ratings dates
+ratings <- ratings0 %>%
+  separate(start_date, into = c("timestamp","start_time"), sep = " ")
+
+#sql join
+# Perform the join
+result <- sqldf("SELECT stormflow.timestamp, stormflow.stormID, ratings.rating
+                 FROM stormflow
+                 INNER JOIN ratings ON stormflow.timestamp = ratings.timestamp")
+is.na(result$stormID)
+
+plot <- ggplot(data = result, 
+       mapping = aes(x = !is.na(stormID), y = rating))+
+  geom_boxplot()+
+  xlab("Storm Event?")+
+  ggtitle("Comparison of Ratings During Storm Events and Outside of Storm Events")+
+  theme_bw()+
+  ylim(c(0,1))
+
+plot
+
+
+
diff --git a/HARP-2024-2025/using_basic_analytics.R b/HARP-2024-2025/using_basic_analytics.R
new file mode 100644
index 00000000..02b0a617
--- /dev/null
+++ b/HARP-2024-2025/using_basic_analytics.R
@@ -0,0 +1,14 @@
+source("C:/Users/natef/OneDrive - Virginia Tech/HARP/Github/HARParchive/HARP-2024-2025/basic analytics for vahydro.R")
+
+prism <- read.csv("http://deq1.bse.vt.edu:81/met/stormVol_prism/precip/usgs_ws_01613900-PRISM-all.csv")
+prism.summary <- prism_summary_analytics(prism)
+
+daymet <- read.csv("http://deq1.bse.vt.edu:81/met/daymet/precip/usgs_ws_01613900-daymet-all.csv")
+daymet.summary <- daymet_summary_analytics(daymet)
+
+nldas2 <- read.csv("http://deq1.bse.vt.edu:81/met/nldas2/precip/usgs_ws_01613900-nldas2-all.csv")
+nldas2.summary <- nldas2_summary_analytics(nldas2)
+
+analytics <- data.frame(metric, prism.summary)#, daymet.summary, nldas2.summary)
+
+precip_annual_max_in <- analytics$prism.summary[1]

From d8d883dbc0a2bba9bf9ccaf001d95b2556d5040e Mon Sep 17 00:00:00 2001
From: Nate Foley <nathanielf22@vt.edu>
Date: Fri, 25 Oct 2024 10:03:52 -0400
Subject: [PATCH 4/8] 10.25 metrics commit

---
 HARP-2024-2025/GIS script.R                  | 140 +++++++++++++++++++
 HARP-2024-2025/basic_analytics_for_vahydro.R | 102 +++++++++-----
 HARP-2024-2025/caching function.R            |  33 +++++
 HARP-2024-2025/retrieve_vahydro_metrics.R    |  12 ++
 HARP-2024-2025/using_basic_analytics.R       |  36 ++++-
 HARP-2024-2025/variable definitions.R        |   6 +
 6 files changed, 288 insertions(+), 41 deletions(-)
 create mode 100644 HARP-2024-2025/GIS script.R
 create mode 100644 HARP-2024-2025/caching function.R
 create mode 100644 HARP-2024-2025/retrieve_vahydro_metrics.R
 create mode 100644 HARP-2024-2025/variable definitions.R

diff --git a/HARP-2024-2025/GIS script.R b/HARP-2024-2025/GIS script.R
new file mode 100644
index 00000000..fbcb3196
--- /dev/null
+++ b/HARP-2024-2025/GIS script.R	
@@ -0,0 +1,140 @@
+library(tidyverse)
+#Assumes all ratings files are in a folder ratings/daymet/ or ratings/prism/
+for(j in c("daymet","prism","nldas")){
+  print(j)
+  #Set path to read in ratings  based on data source of outer loop
+  pathToread <- paste0("C:/Users/natef/OneDrive - Virginia Tech/HARP/Ratings/",j,"/")
+  for( i in list.files(pathToread) ){
+    #For each file in the directory, read in the file i as a csv
+    filei <- read.csv(paste0(pathToread,i))
+    #Store the gage number
+    filei$gage <- gsub(".*_(\\d+)-.*","\\1",i)
+    #Store the analysis type
+    filei$workflow <- gsub(".*_","",j)
+    #Keep only the necessary columns, depending on the workflow:
+    if(filei$workflow[1] == "simplelm"){
+      filei <- filei[,c("mo","rating","gage","workflow")]
+    }else{
+      filei <- filei[,c("mo","r_squared","gage","workflow")]
+    }
+    names(filei) <- c("mo","rating","gage","workflow")
+    #Combine the file into a single data frame
+    if(!exists("combineFile")){
+      combineFile <- filei
+    }else{
+      combineFile <- rbind(combineFile,filei)
+    }
+  }
+  #Assign to a specific variable and delete the generic combineFile
+  assign(paste0('combineFile',j),combineFile)
+  rm(combineFile)
+}
+
+#Join the daymet and prism data together
+joinData <- combineFileprism %>% 
+  #Rename the rating for clarity
+  select(prismRatingstorm = rating,gage,mo,workflow) %>% 
+  #Join in the dayment data, but first rename the rating column for clarity.
+  #Join on gage, month, and workflow
+  full_join(combineFiledaymet %>% 
+              select(daymetRatingstorm = rating,gage,mo,workflow),
+            by = c("gage","mo","workflow")) %>% 
+  full_join(combineFiledaymet %>% 
+              select(daymetRatinglm = rating,gage,mo,workflow),
+            by = c("gage","mo","workflow")) %>% 
+  #Add remaining prism data:
+  full_join(combineFileprism %>% 
+              select(prismRatinglm = rating,gage,mo,workflow),
+            by = c("gage","mo","workflow")) %>% 
+  #Join in the nldas data, but first rename the rating column for clarity.
+  #Join on gage, month, and workflow
+  full_join(combineFilenldas %>% 
+              select(nldasRatingstorm = rating,gage,mo,workflow),
+            by = c("gage","mo","workflow")) %>% 
+  full_join(combineFilenldas %>%
+              select(nldasRatinglm = rating,gage,mo,workflow),
+            by = c("gage","mo","workflow")) %>%
+  #For easier viewing, combine lm and storm columns such that there is only one
+  #column for prism, daymet, nldas classified by the workflow column
+  mutate(prismRating = coalesce(prismRatingstorm,prismRatinglm),
+         daymetRating = coalesce(daymetRatingstorm,daymetRatinglm),
+         nldasRating = coalesce(nldasRatingstorm,nldasRatinglm)
+  ) %>% 
+  #Remove now superflous columns:
+  select(-prismRatingstorm,-prismRatinglm,
+         -daymetRatingstorm,-daymetRatinglm,
+         -nldasRatingstorm,-nldasRatinglm) %>% 
+  #Pivot it longer to have a column with the data source and one for the
+  #ratings, for plotting ease
+  pivot_longer(c(prismRating,daymetRating,nldasRating),
+               names_to = 'dataSource',
+               values_to = 'rating') %>% 
+  arrange(gage,mo,workflow)
+
+#At each gage, does the best performing data source change between workflows?
+#The below code is for ESTIMATES ONLY. The left_join assumes that the ratings
+#are unique between datasources for each gage, workflow, month. This is brittle
+#and could result in incorrect answers!
+
+safe_max <- function(x) {
+  if (length(na.omit(x)) == 0) {
+    return(NA)  # or any other placeholder you prefer
+  } else {
+    return(max(x, na.rm = TRUE))
+  }
+}
+
+gageCompare <- joinData %>% dplyr::ungroup() %>% 
+  #Group by gage, workflow, and month and find the max rating:
+  dplyr::group_by(gage,workflow,mo) %>% 
+  dplyr::summarise(maxRating = mean(safe_max(rating))) %>% 
+  #Join the joinData df back in matching by gage, workflow, mo, and rating. This
+  #could be problematic with duplicate ratings as in a case where all ratings
+  #across the data sources are the same value
+  left_join(joinData,by = c('gage','workflow','mo','maxRating' = 'rating')) %>% 
+  #Now pivot wider such that we can see ratings and best data sources side-by-side
+  pivot_wider(names_from = workflow,values_from = c(maxRating,dataSource)) %>% 
+  #Now filter to only find instances where the best data sources are different:
+  filter(dataSource != dataSource) %>% 
+  #Create a column showing the difference in rating and arrange by the difference
+  mutate(differenceInRating = maxRating - maxRating) %>% 
+  arrange(differenceInRating)
+
+
+
+#Isolate one month
+gageCompareMonth <- gageCompare[gageCompare$mo == 10,]
+
+library(sf)
+#Get the watershed coverage from the server
+watershedGeo <- read.csv("http://deq1.bse.vt.edu:81/met/usgsGageWatershedGeofield.csv")
+
+#Get the gage numbers as their own field and store a copy of the data
+gageWatershedSF <- watershedGeo
+gageWatershedSF$gage <- gsub(".*_(\\d+)","\\1",gageWatershedSF$hydrocode)
+
+#Let's begin by plotting October daymet ratings for stormVol
+forPlot <- joinData[joinData$dataSource == 'daymetRating' & 
+                      joinData$workflow == 'stormvol' &
+                      joinData$mo == 10,]
+#Join the geometry data onto out plot data
+joinDataSF <- forPlot %>% 
+  left_join(gageWatershedSF %>% select(-hydrocode),
+            by = 'gage') %>% 
+  filter(!is.na(wkt))
+#Create an SF object. Specify the coordinate system and the field name in the
+#data frame that contains the well-known text. In this case, WKT is the name of
+#the field with the polygon geometries
+joinDataSF <- st_as_sf(joinDataSF,wkt = 'wkt',crs = 4326)
+#Repair broken geometries
+joinDataSF <- st_make_valid(joinDataSF)
+#Add shape area in coordinate system units (likely meaningless in crs 4326)
+joinDataSF$area <- st_area(joinDataSF)
+#Order the data by largest to smallest area to make plotting more effective
+joinDataSF <- joinDataSF[order(joinDataSF$area,decreasing = TRUE),]
+#Plot SF polygons
+ggplot(joinDataSF) + 
+  geom_sf(aes(fill = rating)) + 
+  scale_fill_viridis_c(option = 'magma') + 
+  theme_classic()
+
diff --git a/HARP-2024-2025/basic_analytics_for_vahydro.R b/HARP-2024-2025/basic_analytics_for_vahydro.R
index f4782636..44f39439 100644
--- a/HARP-2024-2025/basic_analytics_for_vahydro.R
+++ b/HARP-2024-2025/basic_analytics_for_vahydro.R
@@ -2,27 +2,19 @@
 #Use for VA HYDRO metrics
 library(tidyr)
 library(sqldf)
+library(zoo)
 
-## For PRISM data
 
-prism_summary_analytics <- function(df){
-df <- 
-  separate (data = df, 
-            col = obs_date, 
-            into = c("obs_date","obs_time"),
-            sep = " ")
-df <- 
-  separate (data = df, 
-            col = obs_date, 
-            into = c("obs_year","obs_month","obs_day"), 
-            sep = "-")
+## For ANY data
+
+summary_analytics <- function(df){
 
 #creating a yearly summary with each year and its total precip
 yearly.summary <- 
   sqldf(
-  "SELECT obs_year, SUM(precip_in) AS total_precip
+  "SELECT yr, SUM(precip_in) AS total_precip
   FROM df
-  GROUP BY obs_year"
+  GROUP BY yr"
 )
 
 #summary analytics
@@ -31,35 +23,80 @@ precip_annual_max_in <-
   max(yearly.summary$total_precip)
 
 precip_annual_max_year <- 
-  yearly.summary$obs_year[which.max(yearly.summary$total_precip)]
+  yearly.summary$yr[which.max(yearly.summary$total_precip)]
 
 precip_annual_mean_in <- 
   mean(yearly.summary$total_precip)
 
 
-#For min values and years, we can exclude the last row since the current year 
-#is incomplete data
-
+#For min values and years, we can exclude the first and last row since the 
+#current year and first years are incomplete data
 precip_annual_min_in <- 
-  min(yearly.summary$total_precip[-nrow(yearly.summary)])
+  min(yearly.summary$total_precip[c(-nrow(yearly.summary),-1)])
 precip_annual_min_year <- 
-  yearly.summary$obs_year[which.min
-                          (yearly.summary$total_precip[-nrow
-                                                       (yearly.summary)])]
+  yearly.summary$yr[which.min
+                          (yearly.summary$total_precip[c(-nrow
+                                                         (yearly.summary),-1)])]
+
+
+
+# Create daily summary to use for all data. This makes hourly data daily sums.
+daily.summary <- 
+  sqldf(
+    "SELECT yr, mo, da, obs_date, SUM(precip_in) AS total_precip
+  FROM df
+  GROUP BY yr, mo, da"
+  ) #include obs_date after SQL to data frame
+precip_daily_max_in <- max(daily.summary$total_precip)
+#move to beginning
 
 
-precip_daily_max_in <- max(df$precip_in)
-precip_hourly_max_in <- max(df$precip_in)/24 
-#precip_hourly_max_in done using interpolation (not super realistic). 
-#Alternatively, we could look at the rainfall distribution table for a 
+#if else statement evaluates the amount of unique hours and if 24,
+#then hourly max is taken. If not, hourly max is NA
+if(length(unique(df$hr)) == 24){
+  precip_hourly_max_in <- max(df$precip_in)
+} else {
+  precip_hourly_max_in <- NA
+}
+#Alternatively to a null value for hourly precip in daily data,
+#we could look at the rainfall distribution table for a 
 #type II storm and multiply by the max P(t)/P(24) value.
+#this function uses NULL for daily data
+
+
+#l90 using zoo package
+l90_precip_in_roll <- 
+  min(rollapply(daily.summary$total_precip, width = 90, FUN = mean, 
+                fill = NA, align = "right"), 
+    na.rm = TRUE)
+
+#l90 using IHA
+if(length(unique(df$hr)) == 24){
+  Qout_zoo <- zoo(as.numeric(daily.summary$total_precip), order.by = as.POSIXct(daily.summary$obs_date))
+  Qout_g2 <- data.frame(group2(Qout_zoo))
+  l90_precip_in <- min(Qout_g2$X90.Day.Min)
+} else {
+  Qout_zoo <- zoo(as.numeric(df$precip_in), order.by = as.POSIXct(df$obs_date))
+  Qout_g2 <- data.frame(group2(Qout_zoo))
+  l90_precip_in <- min(Qout_g2$X90.Day.Min)
+}
+
 
-#not sure about last metric, 190_precip_in. 
-print(c(precip_annual_max_in, precip_annual_max_year, precip_annual_mean_in,
-        precip_annual_min_in, precip_annual_min_year, precip_daily_max_in,
-        precip_hourly_max_in)) #move into dataframe with 7 columns and 1 row
+
+#makes data frame with all 9 metrics
+metrics<- data.frame(precip_annual_max_in = precip_annual_max_in,
+                        precip_annual_max_year = precip_annual_max_year, 
+                        precip_annual_mean_in = precip_annual_mean_in,
+                        precip_annual_min_in = precip_annual_min_in, 
+                        precip_annual_min_year = precip_annual_min_year, 
+                        precip_daily_max_in = precip_daily_max_in,
+                        precip_hourly_max_in = precip_hourly_max_in,
+                        l90_precip_in = l90_precip_in,
+                        l90_precip_in_roll = l90_precip_in_roll)
 }
 
+
+#----Ignore old functions
 # For Daymet
 
 daymet_summary_analytics <- function(df){
@@ -177,8 +214,3 @@ nldas2_summary_analytics <- function(df){
           precip_annual_min_in, precip_annual_min_year, precip_daily_max_in,
           precip_hourly_max_in))
 }
-
-
-metric <- c("precip_annual_max_in", 'precip_annual_max_year', 'precip_annual_mean_in',
-            'precip_annual_min_in', 'precip_annual_min_year', 'precip_hourly_max_in',
-            'precip_daily_max_in')
\ No newline at end of file
diff --git a/HARP-2024-2025/caching function.R b/HARP-2024-2025/caching function.R
new file mode 100644
index 00000000..36a3b20b
--- /dev/null
+++ b/HARP-2024-2025/caching function.R	
@@ -0,0 +1,33 @@
+# set up a caching function for NWIS
+Sys.setenv(USGS_cache_dir = "/media/model/usgs")
+
+hydro_tools <- 'https://raw.githubusercontent.com/HARPgroup/hydro-tools/master'
+cbp6_location <- 'https://raw.githubusercontent.com/HARPgroup/cbp6/master'
+hydro_tools_location <- hydro_tools
+om_location <- "https://raw.githubusercontent.com/HARPgroup/om/master"
+elfgen_location <- "https://raw.githubusercontent.com/HARPgroup/elfgen/master"
+openmi_om_location <- "https://raw.githubusercontent.com/HARPgroup/openmi-om/master"
+vahydro_location <- "https://github.com/HARPgroup/vahydro/blob/master"
+github_location <- "C:/usr/local/home/git"
+HARParchive_location <- paste0(github_location, "/HARParchive")
+# foundation_location <- "C:\\Users\\faw18626\\OneDrive - Commonwealth of Virginia"
+foundation_location <- paste0(Sys.getenv("USERPROFILE"),"\\OneDrive - Commonwealth of Virginia\\OWS\\foundation_datasets\\wsp\\wsp2020")
+onedrive_location <- paste0(Sys.getenv("USERPROFILE"),"\\OneDrive - Commonwealth of Virginia")
+
+folder <- '/Workspace/tmp/'
+export_path <- '/Workspace/tmp/'
+base_url <- 'http://deq1.bse.vt.edu:81/d.dh' #needed for REST functions
+save_url <- "http://deq1.bse.vt.edu:81/data/proj3/out";
+noaa_api_key <- 'OdnQxMwsNwbNTPiFJRygEwxflkOIrxhh'
+options(noaakey = noaa_api_key)
+
+#### Old Stuff No Longer Needed?
+vahydro_directory <- "/usr/local/home/git/r-dh-ecohydro/Analysis/fn_vahydro-2.0"
+auth_directory <- "/usr/local/home/git/r-dh-ecohydro/ELFGEN/internal"
+file_directory <- "/Workspace/tmp"
+save_directory <- "/Workspace/tmp"
+fxn_locations <- "/usr/local/home/git/r-dh-ecohydro/ELFGEN/internal/"
+habitat_files <- "/usr/local/home/git/r-dh-ecohydro/Analysis/Habitat/"
+fxn_vahydro <- "/usr/local/home/git/hydro-tools"
+base_directory <- "/usr/local/home/git/r-dh-ecohydro"
+
diff --git a/HARP-2024-2025/retrieve_vahydro_metrics.R b/HARP-2024-2025/retrieve_vahydro_metrics.R
new file mode 100644
index 00000000..2c4b9ecf
--- /dev/null
+++ b/HARP-2024-2025/retrieve_vahydro_metrics.R
@@ -0,0 +1,12 @@
+# GET VAHydro 1.0 RIVERSEG l90_Qout DATA
+mdf <- data.frame(
+  'model_version' = c('cbp-6.1'),
+  'runid' = c('stormVol_prism'),
+  'metric' = c('precip_annual_max_in'),
+  'runlabel' = c('precip_annual_max_in')
+)
+met_data <- om_vahydro_metric_grid(
+  metric = metric, runids = mdf, bundle="watershed", ftype="usgs_full_drainage",
+  base_url = paste(site,'entity-model-prop-level-export',sep="/"),
+  ds = ds
+)
diff --git a/HARP-2024-2025/using_basic_analytics.R b/HARP-2024-2025/using_basic_analytics.R
index 02b0a617..1d16c2ca 100644
--- a/HARP-2024-2025/using_basic_analytics.R
+++ b/HARP-2024-2025/using_basic_analytics.R
@@ -1,14 +1,38 @@
-source("C:/Users/natef/OneDrive - Virginia Tech/HARP/Github/HARParchive/HARP-2024-2025/basic analytics for vahydro.R")
+source("C:/Users/natef/OneDrive - Virginia Tech/HARP/Github/HARParchive/HARP-2024-2025/basic_analytics_for_vahydro.R")
 
 prism <- read.csv("http://deq1.bse.vt.edu:81/met/stormVol_prism/precip/usgs_ws_01613900-PRISM-all.csv")
-prism.summary <- prism_summary_analytics(prism)
+prism_summary <- summary_analytics(prism)
+
 
 daymet <- read.csv("http://deq1.bse.vt.edu:81/met/daymet/precip/usgs_ws_01613900-daymet-all.csv")
-daymet.summary <- daymet_summary_analytics(daymet)
+daymet_summary <- summary_analytics(daymet)
 
 nldas2 <- read.csv("http://deq1.bse.vt.edu:81/met/nldas2/precip/usgs_ws_01613900-nldas2-all.csv")
-nldas2.summary <- nldas2_summary_analytics(nldas2)
+nldas2_summary <- summary_analytics(nldas2) #issues with l90 -- zoo unique values
+
+#pull any value like this:
+
+prism_l90_precip_in <- prism_summary$l90_precip_in
+daymet_l90_precip_in <- daymet_summary$l90_precip_in
+nldas2_l90_precip_in <- nldas2_summary$l90_precip_in
+
+#Or even without the summary:
+
+summary_analytics(prism)$l90_precip_in
+
+visualizations <- function(prism.metric, daymet.metric, nldas2.metric, title = NULL){
+ggplot(data = NULL, mapping = aes(x = c("prism","dayment","nldas2"),
+y = c(prism.metric, daymet.metric, nldas2.metric)))+
+  geom_bar(stat = "identity", fill = "lightblue3")+
+  labs(title = title, y = "precip (in)", x = "model")}
+
+visualizations(prism.metric = prism_summary$precip_annual_max_in,
+               daymet.metric = daymet_summary$precip_annual_max_in,
+               nldas2.metric = nldas2_summary$precip_annual_max_in,
+              title = "yearly max precip")
+visualizations(prism.metric = prism_summary$precip_annual_min_in,
+               daymet.metric = daymet_summary$precip_annual_min_in,
+               nldas2.metric = nldas2_summary$precip_annual_min_in,
+               title = "yearly min precip")
 
-analytics <- data.frame(metric, prism.summary)#, daymet.summary, nldas2.summary)
 
-precip_annual_max_in <- analytics$prism.summary[1]
diff --git a/HARP-2024-2025/variable definitions.R b/HARP-2024-2025/variable definitions.R
new file mode 100644
index 00000000..d5e029e7
--- /dev/null
+++ b/HARP-2024-2025/variable definitions.R	
@@ -0,0 +1,6 @@
+coverage_hydrocode <- "usgs_ws_01613900"
+coverage_bundle <- "watershed" #type of coverage we are looking for
+coverage_ftype <- "usgs_full_drainage" #feature type
+model_version <- "cbp-6.1"
+met_file <- "http://deq1.bse.vt.edu:81/met/stormVol_prism/precip/usgs_ws_01613900-PRISM-all.csv"
+scenario_name <- "stormVol_prism"

From 6b6ecde6069a78f601eb9c9c63a42a4db24b0b17 Mon Sep 17 00:00:00 2001
From: Nate Foley <nathanielf22@vt.edu>
Date: Fri, 25 Oct 2024 10:43:15 -0400
Subject: [PATCH 5/8] 10.25 metrics commit

---
 HARP-2024-2025/VAHydro_metric_script.R   | 116 +++++++++++++++++++++++
 HARP-2024-2025/precip_coverage_summary.R |  61 ++++++++++++
 HARP-2024-2025/yearly_metrics.R          |  44 +++++++++
 3 files changed, 221 insertions(+)
 create mode 100644 HARP-2024-2025/VAHydro_metric_script.R
 create mode 100644 HARP-2024-2025/precip_coverage_summary.R
 create mode 100644 HARP-2024-2025/yearly_metrics.R

diff --git a/HARP-2024-2025/VAHydro_metric_script.R b/HARP-2024-2025/VAHydro_metric_script.R
new file mode 100644
index 00000000..89330e30
--- /dev/null
+++ b/HARP-2024-2025/VAHydro_metric_script.R
@@ -0,0 +1,116 @@
+# Inputs (args):
+# 1 = path to precip data you want to use
+# 2 = path of new csv
+suppressPackageStartupMessages(library(dataRetrieval))
+suppressPackageStartupMessages(library(lubridate))
+suppressPackageStartupMessages(library(sqldf))
+suppressPackageStartupMessages(library(dplyr))
+args <- commandArgs(trailingOnly = TRUE)
+if (length(args) != 2) {
+  message("Usage: Rscript usgsdata.R gage_id output_path")
+  q()
+}
+
+read_path <- args[1]
+write_path <- args[2]
+
+message("Pull csv from input file path")
+
+df <- read.csv(read_path)
+
+summary_analytics <- function(df){
+  
+  #creating a yearly summary with each year and its total precip
+  yearly.summary <- 
+    sqldf(
+      "SELECT yr, SUM(precip_in) AS total_precip
+  FROM df
+  GROUP BY yr"
+    )
+  
+  #summary analytics
+  
+  precip_annual_max_in <- 
+    max(yearly.summary$total_precip)
+  
+  precip_annual_max_year <- 
+    yearly.summary$yr[which.max(yearly.summary$total_precip)]
+  
+  precip_annual_mean_in <- 
+    mean(yearly.summary$total_precip)
+  
+  
+  #For min values and years, we can exclude the first and last row since the 
+  #current year and first years are incomplete data
+  precip_annual_min_in <- 
+    min(yearly.summary$total_precip[c(-nrow(yearly.summary),-1)])
+  precip_annual_min_year <- 
+    yearly.summary$yr[which.min
+                      (yearly.summary$total_precip[c(-nrow
+                                                     (yearly.summary),-1)])]
+  
+  
+  
+  # Create daily summary to use for all data. This makes hourly data daily sums.
+  daily.summary <- 
+    sqldf(
+      "SELECT yr, mo, da, obs_date, SUM(precip_in) AS total_precip
+  FROM df
+  GROUP BY yr, mo, da"
+    ) #include obs_date after SQL to data frame
+  precip_daily_max_in <- max(daily.summary$total_precip)
+  #move to beginning
+  
+  
+  #if else statement evaluates the amount of unique hours and if 24,
+  #then hourly max is taken. If not, hourly max is NA
+  if(length(unique(df$hr)) == 24){
+    precip_hourly_max_in <- max(df$precip_in)
+  } else {
+    precip_hourly_max_in <- NA
+  }
+  #Alternatively to a null value for hourly precip in daily data,
+  #we could look at the rainfall distribution table for a 
+  #type II storm and multiply by the max P(t)/P(24) value.
+  #this function uses NULL for daily data
+  
+  
+  #l90 using zoo package
+  l90_precip_in_roll <- 
+    min(rollapply(daily.summary$total_precip, width = 90, FUN = mean, 
+                  fill = NA, align = "right"), 
+        na.rm = TRUE)
+  
+  #l90 using IHA
+  if(length(unique(df$hr)) == 24){
+    Qout_zoo <- zoo(as.numeric(daily.summary$total_precip), order.by = as.POSIXct(daily.summary$obs_date))
+    Qout_g2 <- data.frame(group2(Qout_zoo))
+    l90_precip_in <- min(Qout_g2$X90.Day.Min)
+  } else {
+    Qout_zoo <- zoo(as.numeric(df$precip_in), order.by = as.POSIXct(df$obs_date))
+    Qout_g2 <- data.frame(group2(Qout_zoo))
+    l90_precip_in <- min(Qout_g2$X90.Day.Min)
+  }
+  
+  
+  
+  #makes data frame with all 9 metrics
+  metrics<- data.frame(precip_annual_max_in = precip_annual_max_in,
+                       precip_annual_max_year = precip_annual_max_year, 
+                       precip_annual_mean_in = precip_annual_mean_in,
+                       precip_annual_min_in = precip_annual_min_in, 
+                       precip_annual_min_year = precip_annual_min_year, 
+                       precip_daily_max_in = precip_daily_max_in,
+                       precip_hourly_max_in = precip_hourly_max_in,
+                       l90_precip_in = l90_precip_in,
+                       l90_precip_in_roll = l90_precip_in_roll)
+}
+
+
+summary_analytics(df)
+#What would be the best way to format the output csv?
+
+
+message(paste0("Write csv in new file path: ",write_path))
+write.csv(precip_metric,write_path)
+
diff --git a/HARP-2024-2025/precip_coverage_summary.R b/HARP-2024-2025/precip_coverage_summary.R
new file mode 100644
index 00000000..12bca0ba
--- /dev/null
+++ b/HARP-2024-2025/precip_coverage_summary.R
@@ -0,0 +1,61 @@
+# Insert info about this event into database
+# attach a property showing the path to the original raster file
+# attach a property with a web URL to the origina raster file
+basepath='/var/www/R';
+source("/var/www/R/config.R")
+library("lubridate")
+library("hydrotools")
+# Set up our data source
+source(paste0(HARParchive_location,"/HARP-2024-2025/basic_analytics_for_vahydro.R"))
+       
+ds <- RomDataSource$new(site, rest_uname = rest_uname)
+ds$get_token(rest_pw)
+
+# Accepting command arguments:
+argst <- commandArgs(trailingOnly = T)
+if (length(argst) < 7) {
+  
+  message("Use: Rscript met_store_info.R $ddate datasource coverage_hydrocode coverage_bundle coverage_ftype model_version met_file")
+  message("Ex: Rscript met_store_info.R $ddate nldas2 N5113 landunit cbp6_landseg met_file")
+  q('n')
+}
+ddate <- argst[1]
+scenario_name <- argst[2]
+coverage_hydrocode <- argst[3]
+coverage_bundle <- argst[4]
+coverage_ftype <- argst[5]
+model_version <- argst[6] 
+met_file <- argst[7] 
+# load the feature -- get feature hydroid
+# find the dh_timeseries_weather record for this event
+# attach an image property to the record
+# return 
+  message(paste("Searching for feature hydrocode=", coverage_hydrocode,"with ftype",coverage_ftype))
+feature <- RomFeature$new(
+  ds,
+  list(
+    hydrocode=coverage_hydrocode,
+    ftype=coverage_ftype,
+    bundle=coverage_bundle
+  ),
+  TRUE
+)
+# this will create or retrieve a model scenario to house this summary data.
+model <- om_model_object(ds, feature, model_version)
+# if a matching model does not exist, this will go ahead and create one
+scenario <- om_get_model_scenario(ds, model, scenario_name)
+
+met_data <- read.table(met_file, header = TRUE, sep=",")
+
+
+summary <- summary_analytics(met_data)
+
+numrecs <- nrow(met_data) # what does this do?
+vahydro_post_metric_to_scenprop(scenario$pid, 'om_class_Constant', NULL, 'num_records', numrecs, ds)
+
+vahydro_post_metric_to_scenprop(scenario$pid, 'om_class_Constant', NULL, 'precip_annual_max_in', summary$precip_annual_max_in, ds)
+
+
+
+
+
diff --git a/HARP-2024-2025/yearly_metrics.R b/HARP-2024-2025/yearly_metrics.R
new file mode 100644
index 00000000..53e9fd1f
--- /dev/null
+++ b/HARP-2024-2025/yearly_metrics.R
@@ -0,0 +1,44 @@
+#number of days with precip_in and longest duration storm per yr
+
+library(sqldf)
+
+#this function uses a series of SQL chunks to compute yearly data frames
+#that include helpful QA metrics.
+yearly_metrics <- function(df){
+# Create Daily summary to make all data daily
+daily_summary <- 
+  sqldf(
+    "SELECT yr, mo, da, obs_date, SUM(precip_in) AS precip_in
+  FROM df
+  GROUP BY yr, mo, da"
+  ) #include obs_date after SQL to data frame
+
+# Number of days with precip_in per yr
+rainfall_days <- sqldf("SELECT yr, COUNT(*) 
+                       AS DaysWithRainfall 
+                       FROM daily_summary 
+                       WHERE precip_in > 0 
+                       GROUP BY yr")
+
+# Longest duration storm per yr
+storm_events <- sqldf("WITH RainEvents AS (
+                        SELECT obs_date, precip_in, yr, 
+                        (SELECT SUM(precip_in = 0) 
+                        FROM daily_summary rd2 
+                        WHERE rd2.obs_date <= rd1.obs_date) 
+                        AS StormGroup
+                        FROM daily_summary rd1),
+                      StormDurations AS (
+                        SELECT yr, StormGroup, COUNT(*) 
+                        AS StormDuration
+                        FROM RainEvents
+                        WHERE precip_in > 0
+                        GROUP BY yr, StormGroup)
+                      SELECT yr, MAX(StormDuration) AS LongestStorm 
+                      FROM StormDurations 
+                      GROUP BY yr")
+
+# Printing results
+merge(rainfall_days, storm_events)
+}
+## It seems like NLDAS has much higher results...
\ No newline at end of file

From c37a8b774a5160708956fe2f3063814a161be10c Mon Sep 17 00:00:00 2001
From: Nate Foley <nathanielf22@vt.edu>
Date: Mon, 4 Nov 2024 12:01:37 -0500
Subject: [PATCH 6/8] better visualizations and minor changes including
 commenting

---
 HARP-2024-2025/basic_analytics_for_vahydro.R | 119 -------------------
 HARP-2024-2025/precip_coverage_summary.R     |   2 +-
 HARP-2024-2025/using_basic_analytics.R       |  49 +++++---
 HARP-2024-2025/yearly_metrics.R              |  48 +++++---
 4 files changed, 69 insertions(+), 149 deletions(-)

diff --git a/HARP-2024-2025/basic_analytics_for_vahydro.R b/HARP-2024-2025/basic_analytics_for_vahydro.R
index 44f39439..a54ce3f4 100644
--- a/HARP-2024-2025/basic_analytics_for_vahydro.R
+++ b/HARP-2024-2025/basic_analytics_for_vahydro.R
@@ -95,122 +95,3 @@ metrics<- data.frame(precip_annual_max_in = precip_annual_max_in,
                         l90_precip_in_roll = l90_precip_in_roll)
 }
 
-
-#----Ignore old functions
-# For Daymet
-
-daymet_summary_analytics <- function(df){
-  df <- 
-    separate (data = df, 
-              col = obs_date, 
-              into = c("obs_date","obs_time"),
-              sep = " ")
-  df <- 
-    separate (data = df, 
-              col = obs_date, 
-              into = c("obs_year","obs_month","obs_day"), 
-              sep = "-")
-  
-  #creating a yearly summary with each year and its total precip
-  yearly.summary <- 
-    sqldf(
-      "SELECT obs_year, SUM(precip_in) AS total_precip
-  FROM df
-  GROUP BY obs_year"
-    )
-  
-  #summary analytics
-  
-  precip_annual_max_in <- 
-    max(yearly.summary$total_precip)
-  
-  precip_annual_max_year <- 
-    yearly.summary$obs_year[which.max(yearly.summary$total_precip)]
-  
-  precip_annual_mean_in <- 
-    mean(yearly.summary$total_precip)
-  
-  
-  #For min values and years, we can exclude the last row since the current year 
-  #is incomplete data
-  
-  precip_annual_min_in <- 
-    min(yearly.summary$total_precip[-nrow(yearly.summary)])
-  precip_annual_min_year <- 
-    yearly.summary$obs_year[which.min
-                            (yearly.summary$total_precip[-nrow
-                                                         (yearly.summary)])]
-  
-  
-  precip_daily_max_in <- max(df$precip_in)
-  precip_hourly_max_in <- max(df$precip_in)/24 
-  #precip_hourly_max_in done using interpolation (not super realistic). 
-  #Alternatively, we could look at the rainfall distribution table for a 
-  #type II storm and multiply by the max P(t)/P(24) value.
-  
-  #not sure about last metric, 190_precip_in. 
-  print(c(precip_annual_max_in, precip_annual_max_year, precip_annual_mean_in,
-          precip_annual_min_in, precip_annual_min_year, precip_daily_max_in,
-          precip_hourly_max_in))
-}
-
-# For nldas2
-
-nldas2_summary_analytics <- function(df){
-  df <- 
-    separate (data = df, 
-              col = obs_date, 
-              into = c("obs_date","obs_time"),
-              sep = " ")
-  df <- 
-    separate (data = df, 
-              col = obs_date, 
-              into = c("obs_year","obs_month","obs_day"), 
-              sep = "-")
-  
-  #creating a yearly summary with each year and its total precip
-  yearly.summary <- 
-    sqldf(
-      "SELECT obs_year, SUM(precip_in) AS total_precip
-  FROM df
-  GROUP BY obs_year"
-    )
-  
-  #summary analytics
-  
-  precip_annual_max_in <- 
-    max(yearly.summary$total_precip)
-  
-  precip_annual_max_year <- 
-    yearly.summary$obs_year[which.max(yearly.summary$total_precip)]
-  
-  precip_annual_mean_in <- 
-    mean(yearly.summary$total_precip)
-  
-  
-  #For min values and years, we can exclude the last row since the current year 
-  #is incomplete data
-  
-  precip_annual_min_in <- 
-    min(yearly.summary$total_precip[c(-nrow(yearly.summary),-1)])
-  precip_annual_min_year <- 
-    yearly.summary$obs_year[which.min
-                            (yearly.summary$total_precip[c(-nrow
-                                                         (yearly.summary),-1)])]
-  daily.summary <- 
-    sqldf(
-      "SELECT obs_year, obs_month, obs_day, SUM(precip_in) AS total_precip
-  FROM df
-  GROUP BY obs_year, obs_month, obs_day"
-    )
-  
-  precip_daily_max_in <- max(daily.summary$total_precip[-1])
-  
-  precip_hourly_max_in <- max(df$precip_in)
-
-  
-  #not sure about last metric, 190_precip_in. 
-  print(c(precip_annual_max_in, precip_annual_max_year, precip_annual_mean_in,
-          precip_annual_min_in, precip_annual_min_year, precip_daily_max_in,
-          precip_hourly_max_in))
-}
diff --git a/HARP-2024-2025/precip_coverage_summary.R b/HARP-2024-2025/precip_coverage_summary.R
index 12bca0ba..d5a414b4 100644
--- a/HARP-2024-2025/precip_coverage_summary.R
+++ b/HARP-2024-2025/precip_coverage_summary.R
@@ -48,7 +48,7 @@ scenario <- om_get_model_scenario(ds, model, scenario_name)
 met_data <- read.table(met_file, header = TRUE, sep=",")
 
 
-summary <- summary_analytics(met_data)
+precip_summary <- summary_analytics(met_data)
 
 numrecs <- nrow(met_data) # what does this do?
 vahydro_post_metric_to_scenprop(scenario$pid, 'om_class_Constant', NULL, 'num_records', numrecs, ds)
diff --git a/HARP-2024-2025/using_basic_analytics.R b/HARP-2024-2025/using_basic_analytics.R
index 1d16c2ca..0294e685 100644
--- a/HARP-2024-2025/using_basic_analytics.R
+++ b/HARP-2024-2025/using_basic_analytics.R
@@ -1,5 +1,7 @@
 source("C:/Users/natef/OneDrive - Virginia Tech/HARP/Github/HARParchive/HARP-2024-2025/basic_analytics_for_vahydro.R")
-
+library("IHA")
+library("ggplot2")
+library(sqldf)
 prism <- read.csv("http://deq1.bse.vt.edu:81/met/stormVol_prism/precip/usgs_ws_01613900-PRISM-all.csv")
 prism_summary <- summary_analytics(prism)
 
@@ -20,19 +22,36 @@ nldas2_l90_precip_in <- nldas2_summary$l90_precip_in
 
 summary_analytics(prism)$l90_precip_in
 
-visualizations <- function(prism.metric, daymet.metric, nldas2.metric, title = NULL){
-ggplot(data = NULL, mapping = aes(x = c("prism","dayment","nldas2"),
-y = c(prism.metric, daymet.metric, nldas2.metric)))+
-  geom_bar(stat = "identity", fill = "lightblue3")+
-  labs(title = title, y = "precip (in)", x = "model")}
-
-visualizations(prism.metric = prism_summary$precip_annual_max_in,
-               daymet.metric = daymet_summary$precip_annual_max_in,
-               nldas2.metric = nldas2_summary$precip_annual_max_in,
-              title = "yearly max precip")
-visualizations(prism.metric = prism_summary$precip_annual_min_in,
-               daymet.metric = daymet_summary$precip_annual_min_in,
-               nldas2.metric = nldas2_summary$precip_annual_min_in,
-               title = "yearly min precip")
 
+#visualizing data:
+
+# visualizations <- function(prism.metric, daymet.metric, nldas2.metric, title = NULL){
+# ggplot(data = NULL, mapping = aes(x = c("prism","dayment","nldas2"),
+# y = c(prism.metric, daymet.metric, nldas2.metric)))+
+#   geom_bar(stat = "identity", fill = "lightblue3")+
+#   labs(title = title, y = "precip (in)", x = "model")}
+# 
+# visualizations(prism.metric = prism_summary$precip_annual_max_in,
+#                daymet.metric = daymet_summary$precip_annual_max_in,
+#                nldas2.metric = nldas2_summary$precip_annual_max_in,
+#               title = "yearly max precip")
+# visualizations(prism.metric = prism_summary$precip_annual_min_in,
+#                daymet.metric = daymet_summary$precip_annual_min_in,
+#                nldas2.metric = nldas2_summary$precip_annual_min_in,
+#                title = "yearly min precip")
+
+visualizations <- function(gage_id, metric){
+  ggplot(data = NULL, mapping = aes(x = c("prism","dayment","nldas2"),
+                                    y = c(summary_analytics(read.csv(paste0("http://deq1.bse.vt.edu:81/met/stormVol_prism/precip/",gage_id,"-PRISM-all.csv")))[[metric]], 
+                                          summary_analytics(read.csv(paste0("http://deq1.bse.vt.edu:81/met/daymet/precip/",gage_id,"-daymet-all.csv")))[[metric]],
+                                          summary_analytics(read.csv(paste0("http://deq1.bse.vt.edu:81/met/nldas2/precip/",gage_id,"-nldas2-all.csv")))[[metric]])))+
+    geom_bar(stat = "identity", fill = "lightblue3")+
+    labs(title = as.character(metric), y = "precip (in)", x = "model")}
+
+# perform basic bar plot visualizations for any of these 5 metrics for any gage
+visualizations(gage_id = "usgs_ws_01613900", metric = "precip_annual_max_in")
+visualizations(gage_id = "usgs_ws_01613900", metric = "l90_precip_in")
+visualizations(gage_id = "usgs_ws_01613900", metric = "precip_annual_mean_in")
+visualizations(gage_id = "usgs_ws_01613900", metric = "precip_annual_min_in")
+visualizations(gage_id = "usgs_ws_01613900", metric = "precip_daily_max_in")
 
diff --git a/HARP-2024-2025/yearly_metrics.R b/HARP-2024-2025/yearly_metrics.R
index 53e9fd1f..8b0c1cfe 100644
--- a/HARP-2024-2025/yearly_metrics.R
+++ b/HARP-2024-2025/yearly_metrics.R
@@ -14,31 +14,51 @@ daily_summary <-
   ) #include obs_date after SQL to data frame
 
 # Number of days with precip_in per yr
+rainfall_hist <- ggplot(data = daily_summary, mapping = aes(x = obs_date, y = precip_in))+
+  geom_histogram()
 rainfall_days <- sqldf("SELECT yr, COUNT(*) 
                        AS DaysWithRainfall 
                        FROM daily_summary 
-                       WHERE precip_in > 0 
+                       WHERE precip_in > 0.01 
                        GROUP BY yr")
 
-# Longest duration storm per yr
+# # Longest duration storm per yr
+# this marks the storm groups by separating due to days with no rainfall
+#the NWS states that >0.01 is "measurable precipitation".
+#this changes our numbers by ~20 days for the Hogue creek gage
 storm_events <- sqldf("WITH RainEvents AS (
-                        SELECT obs_date, precip_in, yr, 
-                        (SELECT SUM(precip_in = 0) 
-                        FROM daily_summary rd2 
-                        WHERE rd2.obs_date <= rd1.obs_date) 
-                        AS StormGroup
-                        FROM daily_summary rd1),
+                       SELECT obs_date, precip_in, yr,
+                      (SELECT SUM(precip_in <= 0.01)
+                     FROM daily_summary rd2
+                       WHERE rd2.obs_date <= rd1.obs_date)
+                       AS StormGroup 
+                       FROM daily_summary rd1),
                       StormDurations AS (
-                        SELECT yr, StormGroup, COUNT(*) 
+                        SELECT yr, StormGroup, COUNT(*)
                         AS StormDuration
                         FROM RainEvents
-                        WHERE precip_in > 0
+                        WHERE precip_in > 0.01
                         GROUP BY yr, StormGroup)
-                      SELECT yr, MAX(StormDuration) AS LongestStorm 
-                      FROM StormDurations 
+                      SELECT yr, MAX(StormDuration) AS LongestStorm
+                      FROM StormDurations
                       GROUP BY yr")
 
 # Printing results
-merge(rainfall_days, storm_events)
+#merge(rainfall_days)
+return(merge(rainfall_days,storm_events))
 }
-## It seems like NLDAS has much higher results...
\ No newline at end of file
+## It seems like NLDAS has much higher results...
+
+yearly_metrics(prism)
+yearly_metrics(nldas2)
+
+
+daily_summary <- 
+  sqldf(
+    "SELECT yr, mo, da, obs_date, SUM(precip_in) AS precip_in
+  FROM df
+  GROUP BY yr, mo, da"
+  )
+
+rainfall_hist <- ggplot(data = daily_summary, mapping = aes(x = precip_in))+
+  geom_histogram(binwidth = 0.01)

From 1ead13cc050a22d833eb533be12bfbf90067ba20 Mon Sep 17 00:00:00 2001
From: Nate Foley <nathanielf22@vt.edu>
Date: Fri, 8 Nov 2024 09:28:04 -0500
Subject: [PATCH 7/8] updates for integration

---
 HARP-2024-2025/basic_analytics_for_vahydro.R | 11 ++++-----
 HARP-2024-2025/precip_coverage_summary.R     | 22 +++++++++---------
 HARP-2024-2025/using_basic_analytics.R       | 24 ++++++++++----------
 HARP-2024-2025/yearly_metrics.R              |  9 ++++++++
 4 files changed, 37 insertions(+), 29 deletions(-)

diff --git a/HARP-2024-2025/basic_analytics_for_vahydro.R b/HARP-2024-2025/basic_analytics_for_vahydro.R
index a54ce3f4..0aa73393 100644
--- a/HARP-2024-2025/basic_analytics_for_vahydro.R
+++ b/HARP-2024-2025/basic_analytics_for_vahydro.R
@@ -12,9 +12,10 @@ summary_analytics <- function(df){
 #creating a yearly summary with each year and its total precip
 yearly.summary <- 
   sqldf(
-  "SELECT yr, SUM(precip_in) AS total_precip
+  "SELECT COUNT(*) AS ndays, yr, SUM(precip_in) AS total_precip
   FROM df
-  GROUP BY yr"
+  GROUP BY yr
+  HAVING ndays >= 365"
 )
 
 #summary analytics
@@ -32,11 +33,9 @@ precip_annual_mean_in <-
 #For min values and years, we can exclude the first and last row since the 
 #current year and first years are incomplete data
 precip_annual_min_in <- 
-  min(yearly.summary$total_precip[c(-nrow(yearly.summary),-1)])
+  min(yearly.summary$total_precip)
 precip_annual_min_year <- 
-  yearly.summary$yr[which.min
-                          (yearly.summary$total_precip[c(-nrow
-                                                         (yearly.summary),-1)])]
+  yearly.summary$yr[which.min(yearly.summary$total_precip)]
 
 
 
diff --git a/HARP-2024-2025/precip_coverage_summary.R b/HARP-2024-2025/precip_coverage_summary.R
index d5a414b4..735d67db 100644
--- a/HARP-2024-2025/precip_coverage_summary.R
+++ b/HARP-2024-2025/precip_coverage_summary.R
@@ -13,19 +13,18 @@ ds$get_token(rest_pw)
 
 # Accepting command arguments:
 argst <- commandArgs(trailingOnly = T)
-if (length(argst) < 7) {
+if (length(argst) < 6) {
   
-  message("Use: Rscript met_store_info.R $ddate datasource coverage_hydrocode coverage_bundle coverage_ftype model_version met_file")
-  message("Ex: Rscript met_store_info.R $ddate nldas2 N5113 landunit cbp6_landseg met_file")
+  message("Use: Rscript met_store_info.R datasource coverage_hydrocode coverage_bundle coverage_ftype model_version met_file")
+  message("Ex: Rscript met_store_info.R nldas2 N5113 landunit cbp6_landseg met_file")
   q('n')
 }
-ddate <- argst[1]
-scenario_name <- argst[2]
-coverage_hydrocode <- argst[3]
-coverage_bundle <- argst[4]
-coverage_ftype <- argst[5]
-model_version <- argst[6] 
-met_file <- argst[7] 
+scenario_name <- argst[1]
+coverage_hydrocode <- argst[2]
+coverage_bundle <- argst[3]
+coverage_ftype <- argst[4]
+model_version <- argst[5] 
+met_file <- argst[6] 
 # load the feature -- get feature hydroid
 # find the dh_timeseries_weather record for this event
 # attach an image property to the record
@@ -53,8 +52,9 @@ precip_summary <- summary_analytics(met_data)
 numrecs <- nrow(met_data) # what does this do?
 vahydro_post_metric_to_scenprop(scenario$pid, 'om_class_Constant', NULL, 'num_records', numrecs, ds)
 
-vahydro_post_metric_to_scenprop(scenario$pid, 'om_class_Constant', NULL, 'precip_annual_max_in', summary$precip_annual_max_in, ds)
+vahydro_post_metric_to_scenprop(scenario$pid, 'om_class_Constant', NULL, 'precip_annual_max_in', precip_summary$precip_annual_max_in, ds)
 
+vahydro_post_metric_to_scenprop(scenario$pid, 'om_class_Constant', NULL, 'precip_annual_min_in', precip_summary$precip_annual_max_in, ds)
 
 
 
diff --git a/HARP-2024-2025/using_basic_analytics.R b/HARP-2024-2025/using_basic_analytics.R
index 0294e685..84c8ad3e 100644
--- a/HARP-2024-2025/using_basic_analytics.R
+++ b/HARP-2024-2025/using_basic_analytics.R
@@ -2,21 +2,21 @@ source("C:/Users/natef/OneDrive - Virginia Tech/HARP/Github/HARParchive/HARP-202
 library("IHA")
 library("ggplot2")
 library(sqldf)
-prism <- read.csv("http://deq1.bse.vt.edu:81/met/stormVol_prism/precip/usgs_ws_01613900-PRISM-all.csv")
-prism_summary <- summary_analytics(prism)
-
-
-daymet <- read.csv("http://deq1.bse.vt.edu:81/met/daymet/precip/usgs_ws_01613900-daymet-all.csv")
-daymet_summary <- summary_analytics(daymet)
-
-nldas2 <- read.csv("http://deq1.bse.vt.edu:81/met/nldas2/precip/usgs_ws_01613900-nldas2-all.csv")
-nldas2_summary <- summary_analytics(nldas2) #issues with l90 -- zoo unique values
+# prism <- read.csv("http://deq1.bse.vt.edu:81/met/stormVol_prism/precip/usgs_ws_01613900-PRISM-all.csv")
+# prism_summary <- summary_analytics(prism)
+# 
+# 
+# daymet <- read.csv("http://deq1.bse.vt.edu:81/met/daymet/precip/usgs_ws_01613900-daymet-all.csv")
+# daymet_summary <- summary_analytics(daymet)
+# 
+# nldas2 <- read.csv("http://deq1.bse.vt.edu:81/met/nldas2/precip/usgs_ws_01613900-nldas2-all.csv")
+# nldas2_summary <- summary_analytics(nldas2) 
 
 #pull any value like this:
 
-prism_l90_precip_in <- prism_summary$l90_precip_in
-daymet_l90_precip_in <- daymet_summary$l90_precip_in
-nldas2_l90_precip_in <- nldas2_summary$l90_precip_in
+# prism_l90_precip_in <- prism_summary$l90_precip_in
+# daymet_l90_precip_in <- daymet_summary$l90_precip_in
+# nldas2_l90_precip_in <- nldas2_summary$l90_precip_in
 
 #Or even without the summary:
 
diff --git a/HARP-2024-2025/yearly_metrics.R b/HARP-2024-2025/yearly_metrics.R
index 8b0c1cfe..f8716b53 100644
--- a/HARP-2024-2025/yearly_metrics.R
+++ b/HARP-2024-2025/yearly_metrics.R
@@ -43,9 +43,18 @@ storm_events <- sqldf("WITH RainEvents AS (
                       FROM StormDurations
                       GROUP BY yr")
 
+# RainEvents <- sqldf("
+# SELECT obs_date, precip_in, yr,
+#   (SELECT SUM(precip_in <= 0.01)
+#    FROM daily_summary AS rd2
+#    WHERE rd2.obs_date <= rd1.obs_date)
+#    AS StormGroup
+#   FROM daily_summary AS rd1")
+
 # Printing results
 #merge(rainfall_days)
 return(merge(rainfall_days,storm_events))
+return(RainEvents)
 }
 ## It seems like NLDAS has much higher results...
 

From 93c19c326435e3ae033b8bceb2ce84881b805260 Mon Sep 17 00:00:00 2001
From: Nate Foley <nathanielf22@vt.edu>
Date: Fri, 8 Nov 2024 09:49:41 -0500
Subject: [PATCH 8/8] updates for integration

---
 HARP-2024-2025/DataSourceEvaluations.R    | 147 -------
 HARP-2024-2025/GIS script.R               | 140 ------
 HARP-2024-2025/StormResults_Analysis.R    | 394 -----------------
 HARP-2024-2025/VAHydro_metric_script.R    | 116 -----
 HARP-2024-2025/access-db.R                | 167 -------
 HARP-2024-2025/access-file.R              | 418 ------------------
 HARP-2024-2025/assessEventPrecip.R        | 312 --------------
 HARP-2024-2025/caching function.R         |  33 --
 HARP-2024-2025/dni_data_experiments.R     |   9 -
 HARP-2024-2025/hydroimport_daily.R        |  49 ---
 HARP-2024-2025/make_weekly_summary_ts.R   |  61 ---
 HARP-2024-2025/retrieve_vahydro_metrics.R |  12 -
 HARP-2024-2025/stormSep_USGS.R            | 504 ----------------------
 HARP-2024-2025/storm_event_ratings.R      |  39 --
 HARP-2024-2025/using_basic_analytics.R    |  57 ---
 HARP-2024-2025/variable definitions.R     |   6 -
 HARP-2024-2025/yearly_metrics.R           |  73 ----
 17 files changed, 2537 deletions(-)
 delete mode 100644 HARP-2024-2025/DataSourceEvaluations.R
 delete mode 100644 HARP-2024-2025/GIS script.R
 delete mode 100644 HARP-2024-2025/StormResults_Analysis.R
 delete mode 100644 HARP-2024-2025/VAHydro_metric_script.R
 delete mode 100644 HARP-2024-2025/access-db.R
 delete mode 100644 HARP-2024-2025/access-file.R
 delete mode 100644 HARP-2024-2025/assessEventPrecip.R
 delete mode 100644 HARP-2024-2025/caching function.R
 delete mode 100644 HARP-2024-2025/dni_data_experiments.R
 delete mode 100644 HARP-2024-2025/hydroimport_daily.R
 delete mode 100644 HARP-2024-2025/make_weekly_summary_ts.R
 delete mode 100644 HARP-2024-2025/retrieve_vahydro_metrics.R
 delete mode 100644 HARP-2024-2025/stormSep_USGS.R
 delete mode 100644 HARP-2024-2025/storm_event_ratings.R
 delete mode 100644 HARP-2024-2025/using_basic_analytics.R
 delete mode 100644 HARP-2024-2025/variable definitions.R
 delete mode 100644 HARP-2024-2025/yearly_metrics.R

diff --git a/HARP-2024-2025/DataSourceEvaluations.R b/HARP-2024-2025/DataSourceEvaluations.R
deleted file mode 100644
index 14c0c02f..00000000
--- a/HARP-2024-2025/DataSourceEvaluations.R
+++ /dev/null
@@ -1,147 +0,0 @@
-# Load in libraries
-library(sqldf)
-library(ggplot2)
-library(tidyverse)
-
-
-################################################################################
-# PRISM List of files
-
-list_of_prism <- list.files(path="C:/Users/ilona/OneDrive - Virginia Tech/HARP/Storm Tests/Ratings/out-prism",
-                            recursive = TRUE,
-                            pattern = "\\.csv$",
-                            full.names = TRUE)
-prism_list <- read_csv(list_of_prism, id="gageid")
-
-prism_list$gageid <- grep("0", unlist(strsplit(prism_list$gageid, "_")), value = TRUE, perl = TRUE)
-prism_list$gageid <- grep("0", unlist(strsplit(prism_list$gageid, "-")), value = TRUE, perl = TRUE)
-prism_list$...1 <- NULL
-
-prism_gages <- as.list(unique(prism_list$gageid))
-
-################################################################################
-# DAYMET List of files
-
-list_of_daymet <- list.files(path="C:/Users/ilona/OneDrive - Virginia Tech/HARP/Storm Tests/Ratings/out-daymet/out",
-                            recursive = TRUE,
-                            pattern = "\\.csv$",
-                            full.names = TRUE)
-daymet_list <- read_csv(list_of_daymet, id="gageid")
-
-daymet_list$gageid <- grep("0", unlist(strsplit(daymet_list$gageid, "_")), value = TRUE, perl = TRUE)
-daymet_list$gageid <- grep("0", unlist(strsplit(daymet_list$gageid, "-")), value = TRUE, perl = TRUE)
-daymet_list$...1 <- NULL
-
-daymet_gages <- as.list(unique(daymet_list$gageid))
-
-################################################################################
-# NLDAS2 List of Files
-
-list_of_nldas2 <- list.files(path="C:/Users/ilona/OneDrive - Virginia Tech/HARP/Storm Tests/Ratings/out-daymet/out",
-                             recursive = TRUE,
-                             pattern = "\\.csv$",
-                             full.names = TRUE)
-nldas2_list <- read_csv(list_of_nldas2, id="gageid")
-
-nldas2_list$gageid <- grep("0", unlist(strsplit(nldas2_list$gageid, "_")), value = TRUE, perl = TRUE)
-nldas2_list$gageid <- grep("0", unlist(strsplit(nldas2_list$gageid, "-")), value = TRUE, perl = TRUE)
-nldas2_list$...1 <- NULL
-
-nldas2_gages <- as.list(unique(nldas2_list$gageid))
-
-################################################################################
-# Find gages which ran for each data source
-
-complete_gages <- as.list(intersect(prism_gages,daymet_gages))
-complete_gages <- as.list(as.numeric(intersect(complete_gages,nldas2_gages)))
-
-################################################################################
-# 
-for(j in c("daymet","prism","nldas")){
-  print(j)
-  #Set path to read in ratings based on data source of outer loop
-  pathToread <- paste0("C:/Users/ilona/OneDrive - Virginia Tech/HARP/Storm Tests/Ratings/",j,"/")
-  for( i in list.files(pathToread) ){
-    filei <- read.csv(paste0(pathToread,i))
-    #Store the gage number
-    filei$gage <- gsub(".*_(\\d+)-.*","\\1",i)
-    #Combine the file into a single data frame
-    if(!exists("combineFile")){
-      combineFile <- filei
-    }else{
-      combineFile <- rbind(combineFile,filei)
-    }
-  }
-  #Assign to a specific variable and delete the generic combineFile
-  assign(paste0('combineFile',j),combineFile)
-  rm(combineFile)
-}
-
-#Join the daymet and prism data together
-joinData <- combineFileprism %>% 
-  #Remove the row numbers (not important) and rename the r_squared for clarity
-  select(-X,prismRating = r_squared) %>% 
-  #Join in the dayment data, but first rename the r_squared column for clarity.
-  #Join on gage and month
-  left_join(combineFiledaymet %>% 
-              select(daymetRating = r_squared,gage,mo),
-            by = c("gage","mo")) %>% 
-  #Join in the nldas data, but first rename the r_squared column for clarity.
-  #Join on gage and month
-  left_join(combineFilenldas %>% 
-              select(nldasRating = r_squared,gage,mo),
-            by = c("gage","mo")) %>% 
-  #Pivot it longer to have a column with the data source and one for the
-  #ratings, for plotting ease
-  pivot_longer(c(prismRating,daymetRating,nldasRating),
-               names_to = 'dataSource',
-               values_to = 'rating')
-
-
-
-################################################################################
-# Import and add drainage are from USGS
-drainage_area <- dataRetrieval::readNWISsite(c(unique(joinData$gage)))
-joinData <- sqldf(
-  "select a.mo as mo, a.gage as gageid, a.dataSource as dataSource, a.rating as rating,
-  b.drain_area_va as drainageArea_sqmi
-  from joinData as a
-  left outer join drainage_area as b
-  on (
-   a.gage = b.site_no
-  )"
-)
-
-joinData$dataSource <- gsub("Rating","",joinData$dataSource)
-
-
-################################################################################
-# Trends in DA and ratings
-
-ggplot(data=joinData)+
-  aes(y=mean(joinData$rating, group_by(gageid)),x=joinData$drainageArea_sqmi)
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/HARP-2024-2025/GIS script.R b/HARP-2024-2025/GIS script.R
deleted file mode 100644
index fbcb3196..00000000
--- a/HARP-2024-2025/GIS script.R	
+++ /dev/null
@@ -1,140 +0,0 @@
-library(tidyverse)
-#Assumes all ratings files are in a folder ratings/daymet/ or ratings/prism/
-for(j in c("daymet","prism","nldas")){
-  print(j)
-  #Set path to read in ratings  based on data source of outer loop
-  pathToread <- paste0("C:/Users/natef/OneDrive - Virginia Tech/HARP/Ratings/",j,"/")
-  for( i in list.files(pathToread) ){
-    #For each file in the directory, read in the file i as a csv
-    filei <- read.csv(paste0(pathToread,i))
-    #Store the gage number
-    filei$gage <- gsub(".*_(\\d+)-.*","\\1",i)
-    #Store the analysis type
-    filei$workflow <- gsub(".*_","",j)
-    #Keep only the necessary columns, depending on the workflow:
-    if(filei$workflow[1] == "simplelm"){
-      filei <- filei[,c("mo","rating","gage","workflow")]
-    }else{
-      filei <- filei[,c("mo","r_squared","gage","workflow")]
-    }
-    names(filei) <- c("mo","rating","gage","workflow")
-    #Combine the file into a single data frame
-    if(!exists("combineFile")){
-      combineFile <- filei
-    }else{
-      combineFile <- rbind(combineFile,filei)
-    }
-  }
-  #Assign to a specific variable and delete the generic combineFile
-  assign(paste0('combineFile',j),combineFile)
-  rm(combineFile)
-}
-
-#Join the daymet and prism data together
-joinData <- combineFileprism %>% 
-  #Rename the rating for clarity
-  select(prismRatingstorm = rating,gage,mo,workflow) %>% 
-  #Join in the dayment data, but first rename the rating column for clarity.
-  #Join on gage, month, and workflow
-  full_join(combineFiledaymet %>% 
-              select(daymetRatingstorm = rating,gage,mo,workflow),
-            by = c("gage","mo","workflow")) %>% 
-  full_join(combineFiledaymet %>% 
-              select(daymetRatinglm = rating,gage,mo,workflow),
-            by = c("gage","mo","workflow")) %>% 
-  #Add remaining prism data:
-  full_join(combineFileprism %>% 
-              select(prismRatinglm = rating,gage,mo,workflow),
-            by = c("gage","mo","workflow")) %>% 
-  #Join in the nldas data, but first rename the rating column for clarity.
-  #Join on gage, month, and workflow
-  full_join(combineFilenldas %>% 
-              select(nldasRatingstorm = rating,gage,mo,workflow),
-            by = c("gage","mo","workflow")) %>% 
-  full_join(combineFilenldas %>%
-              select(nldasRatinglm = rating,gage,mo,workflow),
-            by = c("gage","mo","workflow")) %>%
-  #For easier viewing, combine lm and storm columns such that there is only one
-  #column for prism, daymet, nldas classified by the workflow column
-  mutate(prismRating = coalesce(prismRatingstorm,prismRatinglm),
-         daymetRating = coalesce(daymetRatingstorm,daymetRatinglm),
-         nldasRating = coalesce(nldasRatingstorm,nldasRatinglm)
-  ) %>% 
-  #Remove now superflous columns:
-  select(-prismRatingstorm,-prismRatinglm,
-         -daymetRatingstorm,-daymetRatinglm,
-         -nldasRatingstorm,-nldasRatinglm) %>% 
-  #Pivot it longer to have a column with the data source and one for the
-  #ratings, for plotting ease
-  pivot_longer(c(prismRating,daymetRating,nldasRating),
-               names_to = 'dataSource',
-               values_to = 'rating') %>% 
-  arrange(gage,mo,workflow)
-
-#At each gage, does the best performing data source change between workflows?
-#The below code is for ESTIMATES ONLY. The left_join assumes that the ratings
-#are unique between datasources for each gage, workflow, month. This is brittle
-#and could result in incorrect answers!
-
-safe_max <- function(x) {
-  if (length(na.omit(x)) == 0) {
-    return(NA)  # or any other placeholder you prefer
-  } else {
-    return(max(x, na.rm = TRUE))
-  }
-}
-
-gageCompare <- joinData %>% dplyr::ungroup() %>% 
-  #Group by gage, workflow, and month and find the max rating:
-  dplyr::group_by(gage,workflow,mo) %>% 
-  dplyr::summarise(maxRating = mean(safe_max(rating))) %>% 
-  #Join the joinData df back in matching by gage, workflow, mo, and rating. This
-  #could be problematic with duplicate ratings as in a case where all ratings
-  #across the data sources are the same value
-  left_join(joinData,by = c('gage','workflow','mo','maxRating' = 'rating')) %>% 
-  #Now pivot wider such that we can see ratings and best data sources side-by-side
-  pivot_wider(names_from = workflow,values_from = c(maxRating,dataSource)) %>% 
-  #Now filter to only find instances where the best data sources are different:
-  filter(dataSource != dataSource) %>% 
-  #Create a column showing the difference in rating and arrange by the difference
-  mutate(differenceInRating = maxRating - maxRating) %>% 
-  arrange(differenceInRating)
-
-
-
-#Isolate one month
-gageCompareMonth <- gageCompare[gageCompare$mo == 10,]
-
-library(sf)
-#Get the watershed coverage from the server
-watershedGeo <- read.csv("http://deq1.bse.vt.edu:81/met/usgsGageWatershedGeofield.csv")
-
-#Get the gage numbers as their own field and store a copy of the data
-gageWatershedSF <- watershedGeo
-gageWatershedSF$gage <- gsub(".*_(\\d+)","\\1",gageWatershedSF$hydrocode)
-
-#Let's begin by plotting October daymet ratings for stormVol
-forPlot <- joinData[joinData$dataSource == 'daymetRating' & 
-                      joinData$workflow == 'stormvol' &
-                      joinData$mo == 10,]
-#Join the geometry data onto out plot data
-joinDataSF <- forPlot %>% 
-  left_join(gageWatershedSF %>% select(-hydrocode),
-            by = 'gage') %>% 
-  filter(!is.na(wkt))
-#Create an SF object. Specify the coordinate system and the field name in the
-#data frame that contains the well-known text. In this case, WKT is the name of
-#the field with the polygon geometries
-joinDataSF <- st_as_sf(joinDataSF,wkt = 'wkt',crs = 4326)
-#Repair broken geometries
-joinDataSF <- st_make_valid(joinDataSF)
-#Add shape area in coordinate system units (likely meaningless in crs 4326)
-joinDataSF$area <- st_area(joinDataSF)
-#Order the data by largest to smallest area to make plotting more effective
-joinDataSF <- joinDataSF[order(joinDataSF$area,decreasing = TRUE),]
-#Plot SF polygons
-ggplot(joinDataSF) + 
-  geom_sf(aes(fill = rating)) + 
-  scale_fill_viridis_c(option = 'magma') + 
-  theme_classic()
-
diff --git a/HARP-2024-2025/StormResults_Analysis.R b/HARP-2024-2025/StormResults_Analysis.R
deleted file mode 100644
index 95b5f5ad..00000000
--- a/HARP-2024-2025/StormResults_Analysis.R
+++ /dev/null
@@ -1,394 +0,0 @@
-# Key for Gages:
-# 01620500 = North River near Stokesville
-# 01634500 = Cedar Creek near Winchester
-# 01669000 = Piscataway Creek near Tappahannock
-# 02021500 = Maury River at Rockbridge Falls
-# 02031000 = Mechums River near White Hall
-# 02044500 = Nottoway River near Rawlings
-# 03176500 = New River at Glen Lyn
-# 03165000 = Chesnut Creek at Galax
-# 02054530 = Roanoke River at Glenvar
-# 02034000 = Rivanna River at Palmyra
-
-# sql, ggplot2 
-library(sqldf)
-library(ggplot2)
-library(tidyverse)
-
-##############################################################################
-# STORM FILE DATA COMPILATION
-
-list_of_files <- list.files(path="C:/Users/ilona/OneDrive - Virginia Tech/HARP/Storm Tests/Ratings/out",
-                            recursive = TRUE,
-                            pattern = "\\.csv$",
-                            full.names = TRUE)
-df <- read_csv(list_of_files, id="gageid")
-
-df$gageid <- grep("0", unlist(strsplit(df$gageid, "_")), value = TRUE, perl = TRUE)
-df$gageid <- grep("0", unlist(strsplit(df$gageid, "-")), value = TRUE, perl = TRUE)
-df$...1 <- NULL
-
-df$season <- ifelse (df$mo %in% c(6:8), 
-                               "SUMMER",
-                               ifelse (df$mo %in% c(9:11), 
-                                       "AUTUMN",
-                                       ifelse (df$mo %in% c(12,1,2), 
-                                               "WINTER",
-                                               ifelse (df$mo %in% c(3:5), 
-                                                       "SPRING", NA))))
-
-##############################################################################
-# MONTHLY AND SEASONAL R-SQUARED PLOTS
-
-ggplot(data = df)+
-  aes(y = season, x=r_squared, color=season)+
-  geom_boxplot()+
-  xlim(0,1)+
-  theme_bw()+
-  xlab("R-Squared")+
-  ylab("Season")+
-  ggtitle("Seasonal R-Squared Values ()")+
-  theme(plot.title = element_text(hjust = 0.5), legend.position = "none")
-
-
-ggplot(data = df)+
-  aes(y = mo, x=r_squared)+
-  geom_boxplot(mapping = aes(group = mo))+
-  xlim(0,1)+
-  theme_bw()+
-  xlab("R-Squared")+
-  ylab("Month")+
-  ggtitle("Monthly R-Squared Values ()")+
-  theme(plot.title = element_text(hjust = 0.5), legend.position = "none")+
-  scale_y_continuous(breaks = seq(0,12,1))
-
-
-############################################################################
-# Highest and lowest avg R-squared
-
-gage_mean<- aggregate(x= df$r_squared,
-                       # Specify group indicator
-                       by = list(df$gageid),      
-                       # Specify function (i.e. mean)
-                       FUN = mean, 
-                       na.rm = TRUE )
-print(gage_mean)
-
-ggplot(data = gage_mean,
-       mapping = aes(x=x))+
-  geom_density(color="dodgerblue3")+
-  theme_bw()+
-  ylab("Density")+
-  xlab("R-Squared")+
-  ggtitle("Mean R-Squared Value Density for each Gage")+
-  theme(plot.title = element_text(hjust = 0.5))+
-  xlim(0,1)+
-  geom_vline(xintercept = mean(gage_mean$x), lwd=0.5,colour="purple4",linetype = "dashed")
-
-ggplot(data = df,
-       mapping = aes(x=r_squared))+
-  geom_density(color="indianred4")+
-  theme_bw()+
-  ylab("Density")+
-  xlab("R-Squared")+
-  ggtitle("Total R-Squared Value Density")+
-  theme(plot.title = element_text(hjust = 0.5))+
-  xlim(0,1)+
-  geom_vline(xintercept = mean(df$r_squared, na.rm = TRUE), lwd=0.5,colour="deeppink3",linetype = "dashed")
-
-
-##############################################################################
-# Highest R-squared Gage analysis
-
-high_data <- read.csv("C:/Users/ilona/OneDrive - Virginia Tech/HARP/Storm Tests/Ratings/out/usgs_ws_01652500-PRISM-storm_volume-ratings.csv")
-
-ggplot(data = high_data,
-       mapping = aes(x=mo,y=r_squared))+
-  geom_line(color="magenta4")+
-  theme_bw()+
-  ylim(0,1)+
-  xlab("Month")+
-  ylab("R-Squared")+
-  ggtitle("Monthly R-Squared, Highest Mean")+
-  theme(plot.title = element_text(hjust = 0.5))
-
-
-#############################################################################
-# Lowest R-squared Gage analysis
-
-low_data <- read.csv("C:/Users/ilona/OneDrive - Virginia Tech/HARP/Storm Tests/Ratings/out/usgs_ws_01671025-PRISM-storm_volume-ratings.csv")
-
-ggplot(data = low_data,
-       mapping = aes(x=mo,y=r_squared))+
-  geom_line(color="darkolivegreen3")+
-  theme_bw()+
-  ylim(-0.1,1)+
-  xlab("Month")+
-  ylab("R-Squared")+
-  ggtitle("Monthly R-Squared, Lowest Mean")+
-  theme(plot.title = element_text(hjust = 0.5))
-
-#############################################################################
-
-precip <- read.csv("C:/Users/ilona/OneDrive - Virginia Tech/HARP/Storm Tests/Peak Diff/usgs_ws_02024000-PRISM-daily.csv")
-stormflow <- read.csv("C:/Users/ilona/OneDrive - Virginia Tech/HARP/Storm Tests/Peak Diff/usgs_ws_02024000-stormevent-flow.csv")
-
-precip$obs_date <- as_date(precip$obs_date)
-stormflow$timestamp <- as_date(stormflow$timestamp)
-
-stormflow <- subset(stormflow, stormflow$timestamp >= min(precip$obs_date))
-stormflow <- subset(stormflow, stormflow$timestamp <= max(precip$obs_date))
-
-compdata <- sqldf(
-  "select a.obs_date, a.precip_in,
-  b.flow, b.stormID
-  from precip as a
-  left outer join stormflow as b
-  on (
-    a.yr = b.year
-    and a.obs_date = b.timestamp
-  )"
-)
-
-
-
-############################################################################
-# Plot Ratings
-ggplot(data=comp_ratings,
-       mapping = aes(x=mo))+
-  geom_line(aes(y=r_01620500), color="darkred")+
-  geom_line(aes(y=r_01629500), color="red3")+
-  geom_line(aes(y=r_01634500), color="firebrick1")+
-  geom_line(aes(y=r_01643700), color="orangered1")+
-  geom_line(aes(y=r_01662800), color="darkorange1")+
-  geom_line(aes(y=r_01669000), color="orange1")+
-  geom_line(aes(y=r_01669520), color="goldenrod1")+
-  geom_line(aes(y=r_02018500), color="yellow2")+
-  geom_line(aes(y=r_02021500), color="yellowgreen")+
-  geom_line(aes(y=r_02024000), color="palegreen3")+
-  geom_line(aes(y=r_02026000), color="darkslategray3")+
-  geom_line(aes(y=r_02029000), color="deepskyblue3")+
-  geom_line(aes(y=r_02031000), color="dodgerblue4")+
-  geom_line(aes(y=r_02032250), color="darkslateblue")+
-  geom_line(aes(y=r_02034000), color="purple4")+
-  geom_line(aes(y=r_02033500), color="darkorchid4")+
-  geom_line(aes(y=r_02051000), color="darkviolet")+
-  geom_line(aes(y=r_02054530), color="darkmagenta")+
-  geom_line(aes(y=r_03165000), color="deeppink3")+
-  geom_line(aes(y=r_03176500), color="hotpink")+
-  theme_bw()+
-  xlab("Month")+
-  ylab("R-Squared")+
-  ggtitle("Monthly R-Squared")+
-  theme(plot.title = element_text(hjust = 0.5))+
-  ylim(0,1)
-
-
-
-ggplot(data=comp_ratings,
-       mapping = aes(x=mo))+
-  geom_line(aes(y=mean(r_01620500)), color="darkred")+
-  geom_line(aes(y=mean(r_01629500)), color="red3")+
-  geom_line(aes(y=mean(r_01634500)), color="firebrick1")+
-  geom_line(aes(y=mean(r_01643700)), color="orangered1")+
-  geom_line(aes(y=mean(r_01662800)), color="darkorange1")+
-  geom_line(aes(y=mean(r_01669000)), color="orange1")+
-  geom_line(aes(y=mean(r_01669520)), color="goldenrod1")+
-  geom_line(aes(y=mean(r_02018500)), color="yellow2")+
-  geom_line(aes(y=mean(r_02021500)), color="yellowgreen")+
-  geom_line(aes(y=mean(r_02024000)), color="palegreen3")+
-  geom_line(aes(y=mean(r_02026000)), color="darkslategray3")+
-  geom_line(aes(y=mean(r_02029000)), color="deepskyblue3")+
-  geom_line(aes(y=mean(r_02031000)), color="dodgerblue4")+
-  geom_line(aes(y=mean(r_02032250)), color="darkslateblue")+
-  geom_line(aes(y=mean(r_02034000)), color="purple4")+
-  geom_line(aes(y=mean(r_02033500)), color="darkorchid4")+
-  geom_line(aes(y=mean(r_02051000)), color="darkviolet")+
-  geom_line(aes(y=mean(r_02054530)), color="darkmagenta")+
-  geom_line(aes(y=mean(r_03165000)), color="deeppink3")+
-  geom_line(aes(y=mean(r_03176500)), color="hotpink")+
-  theme_bw()+
-  xlab("Month")+
-  ylab("R-Squared")+
-  ggtitle("Monthly R-Squared")+
-  theme(plot.title = element_text(hjust = 0.5))+
-  ylim(0.25,0.75)
-
-
-ggplot(data=comp_ratings,
-       mapping = aes(x=mo))+
-  geom_line(aes(y=mean(r_03176500)), color="hotpink")
-
-
-ggplot(data = comp_ratings,
-       mapping = aes(x=mo))+
-  geom_line(aes(y=r_01620500), color="red3")+
-  geom_line(aes(y=r_01634500), color="orange2")+
-  geom_line(aes(y=r_01669000), color="gold1")+
-  geom_line(aes(y=r_02021500), color="yellowgreen")+
-  geom_line(aes(y=r_02031000), color="darkturquoise")+
-  geom_line(aes(y=r_02034000), color="dodgerblue")+
-  geom_line(aes(y=r_02044500), color="dodgerblue4")+
-  geom_line(aes(y=r_02054530), color="purple3")+
-  geom_line(aes(y=r_03165000), color="orchid3")+
-  geom_line(aes(y=r_03176500), color="hotpink")+
-  theme_bw()+
-  xlab("Month")+
-  ylab("R-Squared")+
-  ggtitle("Monthly R-Squared")+
-  theme(plot.title = element_text(hjust = 0.5))+
-  geom_rect(aes(xmin = 6, xmax = 8, ymin = 0.025, ymax = 0.55),
-            fill="transparent", color="red", size=0.75)+
-  geom_rect(aes(xmin = 3, xmax = 5, ymin = 0.55, ymax = 0.85),
-            fill="transparent", color="red", size=0.75)+
-  geom_rect(aes(xmin = 9.25, xmax = 10.75, ymin = 0.4, ymax = 0.85),
-          fill="transparent", color="red", size=0.75)+
-  geom_rect(aes(xmin = 1.5, xmax = 2.5, ymin = 0.45, ymax = 0.85),
-            fill="transparent", color="red", size=0.75)+
-  ylim(0,1)
-
-
-# July Dip (> 5)
-ggplot(data = comp_ratings,
-       mapping = aes(x=mo))+
-  geom_line(aes(y=r_01620500), color="red3")+
-  geom_line(aes(y=r_01634500), color="orange2")+
-  geom_line(aes(y=r_02021500), color="yellowgreen")+
-  geom_line(aes(y=r_02044500), color="dodgerblue4")+
-  geom_line(aes(y=r_02054530), color="purple3")+
-  geom_line(aes(y=r_03165000), color="orchid3")+
-  theme_bw()+
-  xlab("Month")+
-  ylab("R-Squared")+
-  ggtitle("July Decrease in Monthly R-Squared")+
-  theme(plot.title = element_text(hjust = 0.5))+
-  xlim(5,9)+
-  ylim(0,1)
-
-# April Peak (not > 5)
-ggplot(data = comp_ratings,
-       mapping = aes(x=mo))+
-  geom_line(aes(y=r_02021500), color="yellowgreen")+
-  geom_line(aes(y=r_02034000), color="dodgerblue")+
-  geom_line(aes(y=r_02054530), color="purple3")+
-  geom_line(aes(y=r_03165000), color="orchid3")+
-  geom_line(aes(y=r_03176500), color="hotpink")+
-  theme_bw()+
-  xlab("Month")+
-  ylab("R-Squared")+
-  ggtitle("April Peak Monthly R-Squared")+
-  theme(plot.title = element_text(hjust = 0.5))+
-  xlim(2,6)+
-  ylim(0,1)
-
-
-# February Peak (not > 5)
-ggplot(data = comp_ratings,
-       mapping = aes(x=mo))+
-  geom_line(aes(y=r_01669000), color="gold1")+
-  geom_line(aes(y=r_02031000), color="darkturquoise")+
-  geom_line(aes(y=r_02054530), color="purple3")+
-  geom_line(aes(y=r_03176500), color="hotpink")+
-  theme_bw()+
-  xlab("Month")+
-  ylab("R-Squared")+
-  ggtitle("February Peak Monthly R-Squared")+
-  theme(plot.title = element_text(hjust = 0.5))+
-  xlim(1,4)+
-  ylim(0,1)
-
-
-# October Peak (not > 5)
-ggplot(data = comp_ratings,
-       mapping = aes(x=mo))+
-  geom_line(aes(y=r_01620500), color="red3")+
-  geom_line(aes(y=r_02021500), color="yellowgreen")+
-  geom_line(aes(y=r_02031000), color="darkturquoise")+
-  geom_line(aes(y=r_02044500), color="dodgerblue4")+
-  geom_line(aes(y=r_03165000), color="orchid3")+
-  theme_bw()+
-  xlab("Month")+
-  ylab("R-Squared")+
-  ggtitle("October Peak Monthly R-Squared")+
-  theme(plot.title = element_text(hjust = 0.5))+
-  xlim(8,12)+
-  ylim(0,1)
-
-
-# Data frame with monthly mean r-squared value
-monthly_mean <- data.frame(rowMeans(comp_ratings[,2:11]))
-monthly_mean$mo <- c(1:12)
-monthly_mean$season <- ifelse (monthly_mean$mo %in% c(6:8), 
-                        "SUMMER",
-                        ifelse (monthly_mean$mo %in% c(9:11), 
-                                "AUTUMN",
-                                ifelse (monthly_mean$mo %in% c(12,1,2), 
-                                        "WINTER",
-                                        ifelse (monthly_mean$mo %in% c(3:5), 
-                                                "SPRING", NA))))
-monthly_mean <- monthly_mean[, c(2,1)]
-colnames(monthly_mean)[2] <- "mean_r_squared"
-
-
-
-
-# Add season to comp data
-comp_ratings$season <- ifelse (comp_ratings$mo %in% c(6:8), 
-                               "SUMMER",
-                               ifelse (comp_ratings$mo %in% c(9:11), 
-                                       "AUTUMN",
-                                       ifelse (comp_ratings$mo %in% c(12,1,2), 
-                                               "WINTER",
-                                               ifelse (comp_ratings$mo %in% c(3:5), 
-                                                       "SPRING", NA))))
-
-library(ggbeeswarm)
-
-ggplot(data = pivot_longer(comp_ratings,2:21))+
-  aes(y = season, x=value, color=season)+
-  geom_boxplot()+
-  xlim(0,1)+
-  theme_bw()+
-  xlab("R-Squared")+
-  ylab("Season")+
-  ggtitle("Seasonal R-Squared Values")+
-  theme(plot.title = element_text(hjust = 0.5), legend.position = "none")
-
-
-ggplot(data = pivot_longer(comp_ratings,2:21))+
-  aes(y = mo, x=value)+
-  geom_boxplot(mapping = aes(group = mo))+
-  xlim(0,1)+
-  theme_bw()+
-  xlab("R-Squared")+
-  ylab("Month")+
-  ggtitle("Monthly R-Squared Values")+
-  theme(plot.title = element_text(hjust = 0.5), legend.position = "none")+
-  scale_y_continuous(breaks = seq(0,12,1))
-
-
-
-
-# Plot monthly average rsquared value
-ggplot(data = monthly_mean,
-       mapping = aes(x=mo))+
-  geom_line(aes(y=mean_r_squared), color="hotpink3")+
-  theme_bw()+
-  xlab("Month")+
-  ylab("Mean R-Squared")+
-  ggtitle("Monthly Average R-Squared Values")+
-  theme(plot.title = element_text(hjust = 0.5))+
-  ylim(0,1)
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/HARP-2024-2025/VAHydro_metric_script.R b/HARP-2024-2025/VAHydro_metric_script.R
deleted file mode 100644
index 89330e30..00000000
--- a/HARP-2024-2025/VAHydro_metric_script.R
+++ /dev/null
@@ -1,116 +0,0 @@
-# Inputs (args):
-# 1 = path to precip data you want to use
-# 2 = path of new csv
-suppressPackageStartupMessages(library(dataRetrieval))
-suppressPackageStartupMessages(library(lubridate))
-suppressPackageStartupMessages(library(sqldf))
-suppressPackageStartupMessages(library(dplyr))
-args <- commandArgs(trailingOnly = TRUE)
-if (length(args) != 2) {
-  message("Usage: Rscript usgsdata.R gage_id output_path")
-  q()
-}
-
-read_path <- args[1]
-write_path <- args[2]
-
-message("Pull csv from input file path")
-
-df <- read.csv(read_path)
-
-summary_analytics <- function(df){
-  
-  #creating a yearly summary with each year and its total precip
-  yearly.summary <- 
-    sqldf(
-      "SELECT yr, SUM(precip_in) AS total_precip
-  FROM df
-  GROUP BY yr"
-    )
-  
-  #summary analytics
-  
-  precip_annual_max_in <- 
-    max(yearly.summary$total_precip)
-  
-  precip_annual_max_year <- 
-    yearly.summary$yr[which.max(yearly.summary$total_precip)]
-  
-  precip_annual_mean_in <- 
-    mean(yearly.summary$total_precip)
-  
-  
-  #For min values and years, we can exclude the first and last row since the 
-  #current year and first years are incomplete data
-  precip_annual_min_in <- 
-    min(yearly.summary$total_precip[c(-nrow(yearly.summary),-1)])
-  precip_annual_min_year <- 
-    yearly.summary$yr[which.min
-                      (yearly.summary$total_precip[c(-nrow
-                                                     (yearly.summary),-1)])]
-  
-  
-  
-  # Create daily summary to use for all data. This makes hourly data daily sums.
-  daily.summary <- 
-    sqldf(
-      "SELECT yr, mo, da, obs_date, SUM(precip_in) AS total_precip
-  FROM df
-  GROUP BY yr, mo, da"
-    ) #include obs_date after SQL to data frame
-  precip_daily_max_in <- max(daily.summary$total_precip)
-  #move to beginning
-  
-  
-  #if else statement evaluates the amount of unique hours and if 24,
-  #then hourly max is taken. If not, hourly max is NA
-  if(length(unique(df$hr)) == 24){
-    precip_hourly_max_in <- max(df$precip_in)
-  } else {
-    precip_hourly_max_in <- NA
-  }
-  #Alternatively to a null value for hourly precip in daily data,
-  #we could look at the rainfall distribution table for a 
-  #type II storm and multiply by the max P(t)/P(24) value.
-  #this function uses NULL for daily data
-  
-  
-  #l90 using zoo package
-  l90_precip_in_roll <- 
-    min(rollapply(daily.summary$total_precip, width = 90, FUN = mean, 
-                  fill = NA, align = "right"), 
-        na.rm = TRUE)
-  
-  #l90 using IHA
-  if(length(unique(df$hr)) == 24){
-    Qout_zoo <- zoo(as.numeric(daily.summary$total_precip), order.by = as.POSIXct(daily.summary$obs_date))
-    Qout_g2 <- data.frame(group2(Qout_zoo))
-    l90_precip_in <- min(Qout_g2$X90.Day.Min)
-  } else {
-    Qout_zoo <- zoo(as.numeric(df$precip_in), order.by = as.POSIXct(df$obs_date))
-    Qout_g2 <- data.frame(group2(Qout_zoo))
-    l90_precip_in <- min(Qout_g2$X90.Day.Min)
-  }
-  
-  
-  
-  #makes data frame with all 9 metrics
-  metrics<- data.frame(precip_annual_max_in = precip_annual_max_in,
-                       precip_annual_max_year = precip_annual_max_year, 
-                       precip_annual_mean_in = precip_annual_mean_in,
-                       precip_annual_min_in = precip_annual_min_in, 
-                       precip_annual_min_year = precip_annual_min_year, 
-                       precip_daily_max_in = precip_daily_max_in,
-                       precip_hourly_max_in = precip_hourly_max_in,
-                       l90_precip_in = l90_precip_in,
-                       l90_precip_in_roll = l90_precip_in_roll)
-}
-
-
-summary_analytics(df)
-#What would be the best way to format the output csv?
-
-
-message(paste0("Write csv in new file path: ",write_path))
-write.csv(precip_metric,write_path)
-
diff --git a/HARP-2024-2025/access-db.R b/HARP-2024-2025/access-db.R
deleted file mode 100644
index 05a20cf5..00000000
--- a/HARP-2024-2025/access-db.R
+++ /dev/null
@@ -1,167 +0,0 @@
-library("RPostgres")
-library("sqldf")
-library("dataRetrieval")
-
-# works with only Rpostgres. This is the connection string to the database.
-# Using this allows us to directly query the database from R, rather than
-# needing to save results via SQL and send off text files with results. These
-# connection strings are similar for all databases and rely on the permissions
-# within the db
-con <- dbConnect(
-  RPostgres::Postgres(),
-  dbname = "drupal.dh03",
-  host = "deq1.bse.vt.edu",
-  port = 5431,
-  user = readline("DB User Name: "),
-  password =  getPass::getPass("REST Password: ")
-  )
-
-#A function to get mean meteorology data for a given VA Hydro feature's geometry
-#extent. In other words, this function can pull down meteorology data from our
-#database (which covers the entire chesapeake Bay watershed), clip it to a given
-#feature (like the drainage area of a gage or a parituclar watershed), and get
-#the mean precipitation per day
-get_feature_met <- function(con,
-                            #The hydro code of the VA Hydro feature to clip the
-                            #dataset to (the hydrocode is often from the base
-                            #database and is not generated in VAHydro, whereas
-                            #the feature id is the unique VA Hydro auto
-                            #generated index number)
-                            hydrocode,
-                            #The variable key from VA Hydro for the met data of interest
-                            varkey,
-                            #The band of the raster. Important for NLDAS, PRISM
-                            #and dyamet only have 1 band
-                            band = 1,
-                            #The feature associated with the meteorology data
-                            #records. For precip, this is the cbp6_met_coverage
-                            #generally (the CB p6 watershed)
-                            mcovcode = 'cbp6_met_coverage') {
-  q = paste0(
-    "select met.featureid, to_timestamp(met.tsendtime) as obs_date,
-      extract(month from to_timestamp(met.tsendtime)) as mo, 
-    --Note the syntax used to first clip the raster to the requested feature 
-    --prior to summarizing, and note that we call mean as if it is a method of 
-    --the object of ST_summarystats  
-    (ST_summarystats(st_clip(met.rast, fgeo.dh_geofield_geom), ",
-    band, ", TRUE)).mean as precip_mm,
-      0.0393701 * (ST_summarystats(st_clip(met.rast, fgeo.dh_geofield_geom), ",
-    band, ", TRUE)).mean as precip_in
-    from dh_feature as f
-    left outer join field_data_dh_geofield as fgeo
-    on (
-      fgeo.entity_id = f.hydroid
-          and fgeo.entity_type = 'dh_feature'
-    )
-    left outer join dh_variabledefinition as v
-    on (v.varkey = '",
-    varkey, "')
-    left outer join dh_feature as mcov
-    on (
-      mcov.hydrocode = '", mcovcode, "'
-    )
-    left outer join dh_timeseries_weather as met
-    on ( mcov.hydroid = met.featureid and met.varid = v.hydroid)
-    where f.hydrocode = '", hydrocode, "'
-    order by met.tsendtime
-"
-  )
-  dat <- sqldf(q, connection = con)
-  return(dat)
-}
-
-# Inventory i.e show the number of records of a given meteorology data set per
-# year and show the minimum and maximum dates tied to that data. We should
-# expect to see 366 days on leap years and 365 days on non-leap years EXCEPT for
-# daymet, which will ALWAYS have 365 days with 12-31 missing on leap years
-varkey <- "nldas2_obs_hourly"
-inv_met <- sqldf(paste0("select extract(year from to_timestamp(met.tsendtime)) as year,
-min(to_timestamp(met.tsendtime)) as start_date,
-max(to_timestamp(met.tsendtime)) as end_date,
-count(*)
-from (
-  select met.tsendtime
-  from dh_feature as mcov
-  left outer join dh_variabledefinition as v
-  on (
-    v.varkey = '",varkey,"'
-  )
-  left outer join dh_timeseries_weather as met
-  on (
-    mcov.hydroid = met.featureid and met.varid = v.hydroid
-    and met.entity_type = 'dh_feature'
-  )
-  where mcov.hydrocode = 'cbp6_met_coverage'
-  and met.rast is not null
-  group by met.tsendtime
-) as met
-group by extract(year from to_timestamp(met.tsendtime))
-order by extract(year from to_timestamp(met.tsendtime))"), connection = con)
-
-# get all the tables from connection
-dbListTables(con)
-
-#Set a USGS gage ID
-gageid = '01634000'
-#Format the ID to match the hydrocode used in the VA Hydro database
-hydrocode = paste0('usgs_ws_', gageid)
-#Get PRISM data, specified via the varkey argument in get_feature_met, and clip
-#to the gage drainage area, specified via the hydrocode. Get the mean precip in
-#the watershed for each day
-prism_data <- get_feature_met(con, hydrocode, 'prism_mod_daily')
-
-#Add more details about the date as needed. Note that format relies on a date
-#defined via https://pubs.opengroup.org/onlinepubs/009695399/utilities/date.html
-prism_data[,c('yr', 'mo', 'da')] <- cbind(format(prism_data$obs_date,"%Y"),
-                                          format(prism_data$obs_date,"%m"),
-                                          format(prism_data$obs_date,"%d"))
-#Repeat the process, this time pulling data from the daymet data source
-daymet_data <- get_feature_met(con, hydrocode, 'daymet_mod_daily')
-daymet_data[,c('yr', 'mo', 'da')] <- cbind(format(daymet_data$obs_date,"%Y"),
-                                           format(daymet_data$obs_date,"%m"),
-                                           format(daymet_data$obs_date,"%d"))
-# Get USGS gage data from NWIS, the USGS REST services. We pull this data using
-# the dataRetrieval R package which has extensive documentation on the options
-# therewithin. See https://waterdata.usgs.gov/blog/dataretrieval/. Below, we
-# a cached version of the function defined in our config.R file:
-gage_info <- memo_readNWISsite(gageid)
-#Extract the drainage area of the gage
-da <- gage_info$drain_area_va
-#Get daily, mean streamflow (pcode 000060, see dataRetrieval::parameterCdFile)
-#from the gage specified in gageid
-usgs_data <- memo_readNWISdv(gageid,'00060')
-#As above, add more details about the dates
-usgs_data[,c('yr', 'mo', 'da')] <- cbind(format(usgs_data$Date,"%Y"),
-                                         format(usgs_data$Date,"%m"),
-                                         format(usgs_data$Date,"%d"))
-
-#Join all data together. In other words, extract the relevant fields from the
-#three data sources and combine them based on the day, month, and year. We could
-#do this by joining by the dates, but you may notice that the USGS daily data
-#may not come in as POSIX and doesn't have a time associated with it.
-comp_data <- sqldf(
-  "select a.obs_date, a.precip_in as prism_p_in, 
-  b.precip_in as daymet_p_in, c.X_00060_00003 as usgs_cfs
-  from prism_data as a
-  left outer join daymet_data as b 
-  on (
-    a.yr = b.yr
-    and a.mo = b.mo
-    and a.da = b.da
-  )
-  left outer join usgs_data as c 
-  on (
-    a.yr = c.yr
-    and a.mo = c.mo
-    and a.da = c.da
-  )
-  "
-)
-#If you are familiar with tidyverse verbiage, you may wonder why we use SQLDF
-#here instead of a few dplyr::left_join()s and dplyr::select(). The reason is
-#fairly simple. Our team is more well-versed in SQL than in tidyverse and
-#tidyverse has a tendency to change these verbs slightly over time. Most of the
-#time, this is not an issue. But, some of the more obscure commands often get
-#depreceated. Look at the history of dplyr::across() as an example. Even now,
-#the joins were changed just a few versions ago and will soon block certain code
-#from executing without directly specifying all inputs!
diff --git a/HARP-2024-2025/access-file.R b/HARP-2024-2025/access-file.R
deleted file mode 100644
index 11d1662d..00000000
--- a/HARP-2024-2025/access-file.R
+++ /dev/null
@@ -1,418 +0,0 @@
-# Test datasets CSV
-library("sqldf")
-library("dataRetrieval")
-library("lubridate")
-
-#Set the id for the USGS gage from which we will pull data and use to summarize
-#our meteorology data. Currently, we have data set up for:
-#Culpepper 01667500, Strasburg 01634000, Ruckersville 01665500
-gageid = '01665500' 
-# We redefine the gage id as a "hydrocode", which is how it is stored in the VA
-#Hydro (DEQ) database
-hydrocode = paste0('usgs_ws_', gageid)
-#Pull data from VA Hydro, grabbing some precip data for the gage watershed
-prism_data <- read.csv(paste0("http://deq1.bse.vt.edu:81/files/met/", hydrocode, "-prism-all.csv"))
-#Use lubridate to add on some additional information about the date. These dates
-#come in as posix and thus have timestamps, which make them a little trickier to
-#work with. So, we can disaggregate them for easier joins and manipulation
-prism_data[,c('yr', 'mo', 'da', 'wk')] <- cbind(year(as.Date(prism_data$obs_date)),
-                                                month(as.Date(prism_data$obs_date)),
-                                                day(as.Date(prism_data$obs_date)),
-                                                week(as.Date(prism_data$obs_date)))
-
-#Repeat the download and date disaggregation for daymet, a different precip
-#source:
-daymet_data <- read.csv(paste0("http://deq1.bse.vt.edu:81/files/met/", hydrocode, "-daymet-all.csv"))
-daymet_data[,c('yr', 'mo', 'da', 'wk')] <- cbind(year(as.Date(daymet_data$obs_date)),
-                                                 month(as.Date(daymet_data$obs_date)),
-                                                 day(as.Date(daymet_data$obs_date)),
-                                                 week(as.Date(daymet_data$obs_date)))
-#Repeat the download and date disaggregation for NLDAS2, an HOURLY precip source
-nldas2_data <- read.csv(paste0("http://deq1.bse.vt.edu:81/files/met/", hydrocode, "-nldas2-all.csv"))
-nldas2_data[,c('yr', 'mo', 'da', 'wk')] <- cbind(year(as.Date(nldas2_data$obs_date)),
-                                                 month(as.Date(nldas2_data$obs_date)),
-                                                 day(as.Date(nldas2_data$obs_date)),
-                                                 week(as.Date(nldas2_data$obs_date)))
-#NLDAS is an hourly precip data set. We should aggregate it to be daily sums to
-#match the data in PRISM and daymet for the purpose of comparison. We can do
-#this in SQL by grouping by the the date columns we created for month, day, and
-#year
-nldas2_data <- sqldf(
-  "select featureid, min(obs_date) as obs_date, yr, mo, da, 
-     sum(precip_mm) as precip_mm, sum(precip_in) as precip_in
-   from nldas2_data 
-   group by yr, mo, da
-   order by yr, mo, da
-  "
-)
-
-
-# Get USGS gage data from NWIS, the USGS REST services. We pull this data using
-# the dataRetrieval R package which has extensive documentation on the options
-# therewithin. See https://waterdata.usgs.gov/blog/dataretrieval/. Below, we
-# a cached version of the function defined in our config.R file:
-gage_info <- memo_readNWISsite(gageid)
-#Extract the drainage area of the gage
-da <- gage_info$drain_area_va
-#Get daily, mean streamflow (pcode 000060, see dataRetrieval::parameterCdFile)
-#from the gage specified in gageid
-usgs_data <- memo_readNWISdv(gageid,'00060')
-#Extract date information from the gage using lubridate as above
-usgs_data[,c('yr', 'mo', 'da')] <- cbind(year(as.Date(usgs_data$Date)),
-                                         month(as.Date(usgs_data$Date)),
-                                         day(as.Date(usgs_data$Date)))
-
-#Join all data together. In other words, extract the relevant fields from the
-#three data sources and combine them based on the day, month, and year. We could
-#do this by joining by the dates, but you may notice that the USGS daily data
-#may not come in as POSIX and doesn't have a time associated with it. We could
-#do this via tidyverse verbs, but our team typically prefers SQL as it tends to
-#change a lot less frequently i.e. will require less long-term maintenance
-comp_data <- sqldf(
-  "select a.obs_date, a.precip_in as prism_p_in, 
-  a.yr, a.mo, a.da, a.wk,
-  b.precip_in as daymet_p_in, d.precip_in as nldas2_p_in,
-  c.X_00060_00003 as usgs_cfs
-  from prism_data as a
-  left outer join daymet_data as b 
-  on (
-    a.yr = b.yr
-    and a.mo = b.mo
-    and a.da = b.da
-  )
-  left outer join usgs_data as c 
-  on (
-    a.yr = c.yr
-    and a.mo = c.mo
-    and a.da = c.da
-  )
-  left outer join nldas2_data as d 
-  on (
-    a.yr = d.yr
-    and a.mo = d.mo
-    and a.da = d.da
-  )
-  order by a.yr, a.mo, a.da
-  "
-)
-
-# Index the dataset, giving a unique number to each row. Please note that these
-# are already available via rownames(comp_data) but here we add it as a column
-# to call them in SQLDF. You can see how SQL changes our R workflows. As the
-# project moves forward, feel free to branch out and try other workflows but
-# please note we will often encourage the use of SQL as we assist in tasks. But
-# all code is acceptable if it is documented and works!
-comp_data$dataset_day <- index(comp_data) 
-
-#Add a column for the date since this is now daily data
-comp_data$date <- as.Date(comp_data$obs_date)
-
-#Here, we add in columns to represent tomorrow's flow and the change in flow
-#from today to yesterday and today and tomorrow. This give an indication of how
-#flow is changing. We can do so using R's indexing, simply calling the next rows
-#down where needed
-# comp_data$nextday_usgs_cfs <- c(comp_data$usgs_cfs[2:nrow(comp_data)],NA)
-# comp_data$nextday_d_cfs <- comp_data$nextday_usgs_cfs - comp_data$usgs_cfs
-# comp_data$today_d_cfs <- comp_data$usgs_cfs - c(NA,comp_data$usgs_cfs[1:(nrow(comp_data) - 1)])
-
-#We could alternatively get this via an SQL query that runs just about as
-#quickly! Note the use of the Lag() function and its inputs in the subquery used
-#to pull in yesterdays and todays data
-comp_data <- sqldf(
-  "select b.*,
-  b.nextday_usgs_cfs - b.usgs_cfs as nextday_d_cfs,
-  b.usgs_cfs - b.yesterday_usgs_cfs as today_d_cfs
-  FROM (
-    select 
-    a.*,
-    Lag(a.usgs_cfs,1) OVER(ORDER BY a.dataset_day) as yesterday_usgs_cfs,
-    Lag(a.usgs_cfs,-1) OVER(ORDER BY a.dataset_day) as nextday_usgs_cfs
-    from comp_data as a
-    order by a.dataset_day
-  ) as b
-  "
-)
-
-# Add a cfs Precip column using the following conversions for EACH of the precip
-# data sources:
-# acre-feet/day = 3.07 MGD
-# cfs = 1.57 MGD
-# DAsqmi * 640 ac/sqmi * p_in_per_day / 12.0 = Precip ac-ft/day
-# P acft/day / 3.07 = P mgd
-# P mgd * 1.572 = P cfs
-# 1.572 * (DAsqmi * 640.0 * p_in / 12.0) / 3.07 
-comp_data$prism_p_cfs <- 1.572 * (da * 640.0 * comp_data$prism_p_in / 12.0) / 3.07 
-comp_data$daymet_p_cfs <- 1.572 * (da * 640.0 * comp_data$daymet_p_in / 12.0) / 3.07 
-comp_data$nldas2_p_cfs <- 1.572 * (da * 640.0 * comp_data$nldas2_p_in / 12.0) / 3.07 
-
-#We have a lot of daily data in comp_data, but we suspect to see more noteable
-#trends in weekly data compared to daily data given travel time and
-#abstraction/storage considerations. So, we can summarize our data by grouping
-#it via the week variable we added in earlier using lubridate::week.
-#(again, you may have done this previously via dplyr::group_by and summarize)
-week_data <- sqldf(
-  "select min(obs_date) as week_begin, yr, wk, min(dataset_day) as dataset_day_begin,
-     avg(daymet_p_in) as daymet_p_in, avg(daymet_p_cfs) as daymet_p_cfs,
-     avg(prism_p_in) as prism_p_in, avg(prism_p_cfs) as prism_p_cfs,
-     avg(nldas2_p_in) as nldas2_p_in, avg(nldas2_p_cfs) as nldas2_p_cfs,
-     avg(usgs_cfs) as usgs_cfs, avg(today_d_cfs) as today_d_cfs, 
-     avg(nextday_d_cfs) as nextday_d_cfs
-   from comp_data
-   group by yr, wk
-   order by yr, wk
-  "
-)
-#Base the month column off of the min day oindex for that week, or the day that
-#week began
-week_data$mo <- month(week_data$week_begin)
-
-
-#Why does daymet generally procude worse relationships? Isn't it similar to
-#PRISM? What is the relationship between PRISM and daymet as we expected these
-#to be relatively similar
-comparePrecip <- lm(comp_data$daymet_p_cfs ~ comp_data$prism_p_cfs)
-summary(comparePrecip)$adj.r.squared
-plot(comp_data$daymet_p_cfs ~ comp_data$prism_p_cfs)
-#What if we offset the PRSIM data by one day?
-compareNextDayPRISM <- lm(comp_data$daymet_p_cfs[1:(nrow(comp_data) - 1)] ~ 
-                        comp_data$prism_p_cfs[2:nrow(comp_data)])
-#Much improved. More like we expected, seems like daymet is offset.
-summary(compareNextDayPRISM)$adj.r.squared
-plot(comp_data$daymet_p_cfs[1:(nrow(comp_data) - 1)] ~ 
-       comp_data$prism_p_cfs[2:nrow(comp_data)])
-
-#Add a column with the offset daymet data
-comp_data$daymet_p_cfs_nextDay <- c(NA,comp_data$daymet_p_cfs[1:(nrow(comp_data) - 1)])
-
-#Create a simple linear regression of flow and precip with flow as the dependent
-#variable to test if there is a clear and easy relationship between flow and
-#precip. Using the formula notation "~" we can specify the regression as
-#dependent ~ independent variable as long as the data option is specified in the
-#function
-mod_prism <- lm(usgs_cfs ~ prism_p_cfs, data = comp_data)
-#Look at the regression paramters. Identify how well the regression performed
-#(spoiler alert, it performed poorly, which makes sense given time of travel and
-#abstraction/storage. We wouldn't want it to be easy!)
-summary(mod_prism)
-
-#Repeated for daymet, running the linear regression between the USGS data and
-#daymet
-mod_daymet <- lm(usgs_cfs ~ daymet_p_cfs, data = comp_data)
-#Evaluate the performance of the regression
-summary(mod_daymet)
-
-# Weekly cfs vs P. Above, we created linear regressions for flow and
-# precipitations. Now we repeat this process to see if weekly flow/precip is
-# correlated more strongly. We'd suspect this to be a bit more reliably related,
-# but how much stronger?
-mod_week_prism <- lm(usgs_cfs ~ prism_p_cfs, data=week_data)
-summary(mod_week_prism)
-#Looking at the results, we see some correlation but it's still not very strong
-plot(mod_week_prism$model$usgs_cfs ~ mod_week_prism$model$prism_p_cfs)
-mod_week_daymet <- lm(usgs_cfs ~ daymet_p_cfs, data=week_data)
-summary(mod_week_daymet)
-plot(mod_week_daymet$model$usgs_cfs ~ mod_week_daymet$model$daymet_p_cfs)
-
-
-# Let's repeat the regression anlysis now for ONLY January, which tends to be a
-# wetter month. We start with the daily data. For PRISM:
-# PRISM
-mod_prism_jan <- lm(usgs_cfs ~ prism_p_cfs, data=comp_data[(comp_data$mo == 1),])
-summary(mod_prism_jan)
-# daymet
-mod_daymet_jan <- lm(usgs_cfs ~ daymet_p_cfs_nextDay, data=comp_data[(comp_data$mo == 1),])
-summary(mod_daymet_jan)
-#Plot the daily data for january and see how the flow relates to the summarized
-#precip data. This is equivalent to comp_data$usgs_cfs and comp_data$daymet_p_cfs
-plot(mod_daymet_jan$model$usgs_cfs ~ mod_daymet_jan$model$daymet_p_cfs_nextDay)
-
-# January, next day flow todays precip
-mod_prism_jan_nd <- lm(nextday_usgs_cfs ~ prism_p_cfs,
-                       data=comp_data[(comp_data$mo == 1),])
-summary(mod_prism_jan_nd)
-
-#Repeat regression, but now compare the CHANGE in flow between today and
-#tomorrow with TODAYs precipitation. Again, we stick with just January for now.
-mod_prism_jan_ndd <- lm(nextday_d_cfs ~ prism_p_cfs,
-                        data=comp_data[(comp_data$mo == 1),])
-summary(mod_prism_jan_ndd)
-plot(mod_prism_jan_ndd$model$nextday_d_cfs ~ mod_prism_jan_ndd$model$prism_p_cfs)
-
-# Repeat the above analysis for daymet e.g. the CHANGE in flow b/t today and
-# tomorrow and TODAY's daymet precipitation
-mod_daymet_jan_ndd <- lm(nextday_d_cfs ~ daymet_p_cfs_nextDay,
-                         data=comp_data[which(comp_data$mo == 1),])
-summary(mod_daymet_jan_ndd)
-plot(mod_daymet_jan_ndd$model$nextday_d_cfs ~ mod_daymet_jan_ndd$model$daymet_p_cfs)
-
-# Days with zero rainfall may be skewing our results based on the plots above.
-# So, repeat the analysis now only comparing against change in flows on the
-# same day as non-zero rain
-# *** DAYMET
-mod_daymet_jan_nz_ndd <- lm(nextday_d_cfs ~ daymet_p_cfs_nextDay,
-                            data = comp_data[((comp_data$mo == 1) & (comp_data$daymet_p_cfs > 0)),])
-summary(mod_daymet_jan_nz_ndd)
-plot(mod_daymet_jan_nz_ndd$model$nextday_d_cfs ~ mod_daymet_jan_nz_ndd$model$daymet_p_cfs)
-# *** PRISM
-mod_prism_jan_nz_ndd <- lm(nextday_d_cfs ~ prism_p_cfs,
-                           data = comp_data[which((comp_data$mo == 1) & (comp_data$prism_p_cfs > 0)),])
-summary(mod_prism_jan_nz_ndd)
-plot(mod_prism_jan_nz_ndd$model$nextday_d_cfs ~ mod_prism_jan_nz_ndd$model$prism_p_cfs)
-
-# Now, we look at the change in flow on the day after it rains to the summarized
-# precipitation. So, repeate the same ananlysis but now compare to nextday flow
-# difference
-# *** DAYMET
-mod_daymet_jan_nz_ndd <- lm(nextday_d_cfs ~ daymet_p_cfs_nextDay, 
-                            data = comp_data[((comp_data$mo == 1) & (comp_data$daymet_p_cfs > 0)),])
-summary(mod_daymet_jan_nz_ndd)
-plot(mod_daymet_jan_nz_ndd$model$nextday_d_cfs ~ mod_daymet_jan_nz_ndd$model$daymet_p_cfs)
-# *** PRISM
-mod_prism_jan_nz_ndd <- lm(nextday_d_cfs ~ prism_p_cfs,
-                           data = comp_data[((comp_data$mo == 1) & (comp_data$prism_p_cfs > 0)),])
-summary(mod_prism_jan_nz_ndd)
-plot(mod_prism_jan_nz_ndd$model$nextday_d_cfs ~ mod_prism_jan_nz_ndd$model$prism_p_cfs)
-
-# Now, we look at the change in flow on the day after it rains to the summarized
-# precipitation. But, this time only look for days where there is some change in
-# flow
-# *** DAYMET
-mod_daymet_jan_nz_cdd <- lm(nextday_d_cfs ~ daymet_p_cfs_nextDay,
-                            data = comp_data[((comp_data$mo == 1) & (comp_data$nextday_d_cfs > 0)),])
-summary(mod_daymet_jan_nz_cdd)
-plot(mod_daymet_jan_nz_ndd$model$nextday_d_cfs ~ mod_daymet_jan_nz_ndd$model$daymet_p_cfs_nextDay)
-# *** PRISM
-mod_prism_jan_nz_ndd <- lm(nextday_d_cfs ~ prism_p_cfs,
-                           data = comp_data[((comp_data$mo == 1) & (comp_data$prism_p_cfs > 0)),])
-summary(mod_prism_jan_nz_ndd)
-plot(mod_prism_jan_nz_ndd$model$nextday_d_cfs ~ mod_prism_jan_nz_ndd$model$prism_p_cfs)
-
-
-#Try the above analysis (compare change in next day flow to precip only on days
-#where change in flow is positive), but try for February which is another wet
-#month
-mod_prism_mon_nz_ndd <- lm(nextday_d_cfs ~ prism_p_cfs,
-                           data = comp_data[which((comp_data$mo == 2) & (comp_data$nextday_d_cfs > 0)),])
-summary(mod_prism_mon_nz_ndd)
-plot(mod_prism_mon_nz_ndd$model$nextday_d_cfs ~ mod_prism_mon_nz_ndd$model$prism_p_cfs)
-
-#The correlations above are decent. Let's see what the relationship looks like
-#across all months of the year
-# do all months and assemble a barplot of R^2
-
-# create a class to hold both stats and the plot to make it easier to do comparisons later
-plotBin <- R6::R6Class(
-  "plotBin", 
-  public = list(
-    plot = NULL, data=list(), atts=list(),
-    initialize = function(plot = NULL, data = list()){ 
-      self.plot = plot; self.data=data; 
-    }
-  )
-)
-# Week
-mon_lm <- function(sample_data, y_var, x_var, mo_var, data_name){
-  plot_out <- plotBin$new(data = sample_data)
-  nwd_stats <- data.frame(row.names=c('month', 'rsquared_a'))
-  for (i in 1:12) {
-    mo_data=sample_data[which((sample_data[,mo_var] == i)),]
-    weekmo_data <- lm(mo_data[,y_var] ~ mo_data[,x_var])
-    dsum <- summary(weekmo_data)
-    nwd_stats <- rbind(nwd_stats, data.frame(i, dsum$adj.r.squared))
-  }
-  plot_out$atts[['stats']] <- nwd_stats
-  barplot(
-    nwd_stats$dsum.adj.r.squared ~ nwd_stats$i,
-    ylim=c(0,1.0),
-    main=paste("lm(Q ~ P), monthly,",data_name)
-  )
-  plot_out$plot <- recordPlot()
-  return(plot_out)
-}
-
-nldas2_lm <- mon_lm(week_data, "nldas2_p_cfs", "usgs_cfs", "mo", "nldas2")
-nldas2_lm$atts
-nldas2_lm$plot
-
-prism_lm <- mon_lm(week_data, "prism_p_cfs", "usgs_cfs", "mo", "prism")
-prism_lm$atts
-prism_lm$plot
-
-
-daymet_lm <- mon_lm(week_data, "daymet_p_cfs", "usgs_cfs", "mo", "daymet")
-daymet_lm$atts
-daymet_lm$plot
-
-# NLDAS2
-# do all months and assemble a barplot of R^2
-nldaswk_stats <- data.frame('month' = 1:12, 'rsquared_a' = numeric(12))
-for (i in 1:nrow(nldaswk_stats)) {
-  # Weekly d cfs vs P
-  mod_weekmo_nldas2_cfs <- lm(usgs_cfs ~ nldas2_p_cfs,
-                              data=week_data[which((week_data$mo == i)),])
-  dsum <- summary(mod_weekmo_nldas2_cfs)
-  #plot(mod_weekmo_nldas2_cfs$model$usgs_cfs ~ mod_weekmo_nldas2_cfs$model$nldas2_p_cfs)
-  
-  #mod_week_daymet_d_cfs <- lm(today_d_cfs ~ daymet_p_cfs, data=week_data)
-  #summary(mod_week_daymet_d_cfs)
-  #plot(mod_week_daymet_d_cfs$model$today_d_cfs ~ mod_week_daymet_d_cfs$model$daymet_p_cfs)
-  nldaswk_stats$rsquared_a[i] <- dsum$adj.r.squared
-}
-barplot(nldaswk_stats$dsum.adj.r.squared ~ nldaswk_stats$i,
-        main=paste(gage_info$station_nm ), ylim=c(0,1.0))
-daymet_lm <- mon_lm(week_data, "daymet_p_cfs", "usgs_cfs", "mo", "daymet")
-daymet_lm$atts
-daymet_lm$plot
-
-
-#Binomial = Based on x, did Y occur? Looking at weekly rainfall totals, did a
-#storm hydrograph occur? 
-#Poisson = Counts. Based on x, how many times did Y occur? Looking at weekly
-#rainfall totals, how many storms should we have expected to see?
-
-#Are rollowing rainfall sums relevant to this analysis?
-comp_data$rollingSumDaymet <- c(NA,NA,rollapply(comp_data$daymet_p_cfs_nextDay,3,sum,
-                                        align = "left"))
-comp_data$rollingSumPRISM <- c(NA,NA,rollapply(comp_data$prism_p_cfs,3,sum,
-                                          align = "left"))
-comp_data$rollingSumUSGS_prev <- c(NA,NA,rollapply(comp_data$usgs_cfs,3,sum,
-                                         align = "left"))
-comp_data$rollingSumUSGS_next <- c(rollapply(comp_data$usgs_cfs,3,sum,
-                                                   align = "left"),NA,NA)
-rollModelPRISM <- lm(usgs_cfs ~ rollingSumPRISM,data = comp_data)
-summary(rollModelPRISM)
-plot(log10(rollModelPRISM$model))
-#Nope, not really. Maybe only at higher flows, which is when we'd expect
-#correlation. Correlation is improved for next day flow however. What if we look
-#at rolling vs rolling?
-rollModel <- lm(rollingSumUSGS_prev ~ rollingSumPRISM,data = comp_data)
-summary(rollModel)
-
-#What if we increase the rolling duration? At what point do we produce a maximum
-#correlation? We would assume this increases with rolling time period
-rsq <- numeric(180)
-rsq_winter <- numeric(180)
-for(i in 1:180){
-  testData <- comp_data
-  testData$rollingSumPRISM <- c(rep(NA,(i - 1)),rollapply(testData$prism_p_cfs,i,sum,
-                                                 align = "left"))
-  testData$rollingSumUSGS_prev <- c(rep(NA,(i - 1)),rollapply(testData$usgs_cfs,i,sum,
-                                                     align = "left"))
-  rollModeli <- lm(rollingSumUSGS_prev ~ rollingSumPRISM,data = testData)
-  rsq[i] <- summary(rollModeli)$adj.r.squared
-  
-  rollModel_winteri <- lm(rollingSumUSGS_prev ~ rollingSumPRISM,data = testData[testData$mo %in% c(11,12,1,2,3),])
-  rsq_winter[i] <- summary(rollModel_winteri)$adj.r.squared
-}
-
-plot(rsq_winter)
-points(rsq_winter,col = "blue")
-
-
-plot(as.Date(comp_data$obs_date),
-     comp_data$prism_p_cfs,
-     col = "red",type = "l")
-lines(as.Date(comp_data$obs_date),
-      comp_data$usgs_cfs)
-
-
diff --git a/HARP-2024-2025/assessEventPrecip.R b/HARP-2024-2025/assessEventPrecip.R
deleted file mode 100644
index 0d6066e3..00000000
--- a/HARP-2024-2025/assessEventPrecip.R
+++ /dev/null
@@ -1,312 +0,0 @@
-# Test datasets CSV
-library("sqldf")
-library("dataRetrieval")
-library("lubridate")
-
-#Set the id for the USGS gage from which we will pull data and use to summarize
-#our meteorology data. Currently, we have data set up for:
-#Culpepper 01667500, Strasburg 01634000, Ruckersville 01665500
-gageid = '01665500' 
-# We redefine the gage id as a "hydrocode", which is how it is stored in the VA
-#Hydro (DEQ) database
-hydrocode = paste0('usgs_ws_', gageid)
-
-#Pull data from VA Hydro, grabbing some precip data for the gage watershed
-prism_data <- read.csv(paste0("http://deq1.bse.vt.edu:81/files/met/", hydrocode, "-prism-all.csv"))
-
-#Use lubridate to add on some additional information about the date. These dates
-#come in as posix and thus have timestamps, which make them a little trickier to
-#work with. So, we can disaggregate them for easier joins and manipulation
-prism_data[,c('yr', 'mo', 'da', 'wk')] <- cbind(year(as.Date(prism_data$obs_date)),
-                                                month(as.Date(prism_data$obs_date)),
-                                                day(as.Date(prism_data$obs_date)),
-                                                week(as.Date(prism_data$obs_date)))
-
-#Repeat the download and date disaggregation for daymet, a different precip
-#source:
-daymet_data <- read.csv(paste0("http://deq1.bse.vt.edu:81/files/met/", hydrocode, "-daymet-all.csv"))
-daymet_data[,c('yr', 'mo', 'da', 'wk')] <- cbind(year(as.Date(daymet_data$obs_date)),
-                                                 month(as.Date(daymet_data$obs_date)),
-                                                 day(as.Date(daymet_data$obs_date)),
-                                                 week(as.Date(daymet_data$obs_date)))
-#Repeat the download and date disaggregation for NLDAS2, an HOURLY precip source
-nldas2_data <- read.csv(paste0("http://deq1.bse.vt.edu:81/files/met/", hydrocode, "-nldas2-all.csv"))
-nldas2_data[,c('yr', 'mo', 'da', 'wk')] <- cbind(year(as.Date(nldas2_data$obs_date)),
-                                                 month(as.Date(nldas2_data$obs_date)),
-                                                 day(as.Date(nldas2_data$obs_date)),
-                                                 week(as.Date(nldas2_data$obs_date)))
-#NLDAS is an hourly precip data set. We should aggregate it to be daily sums to
-#match the data in PRISM and daymet for the purpose of comparison. We can do
-#this in SQL by grouping by the the date columns we created for month, day, and
-#year
-nldas2_data <- sqldf(
-  "select featureid, min(obs_date) as obs_date, yr, mo, da, 
-     sum(precip_mm) as precip_mm, sum(precip_in) as precip_in
-   from nldas2_data 
-   group by yr, mo, da
-   order by yr, mo, da
-  "
-)
-
-
-# Get USGS gage data from NWIS, the USGS REST services. We pull this data using
-# the dataRetrieval R package which has extensive documentation on the options
-# therewithin. See https://waterdata.usgs.gov/blog/dataretrieval/. Below, we
-# a cached version of the function defined in our config.R file:
-gage_info <- memo_readNWISsite(gageid)
-#Extract the drainage area of the gage
-da <- gage_info$drain_area_va
-#Get daily, mean streamflow (pcode 000060, see dataRetrieval::parameterCdFile)
-#from the gage specified in gageid
-usgs_data <- memo_readNWISdv(gageid,'00060')
-#Extract date information from the gage using lubridate as above
-usgs_data[,c('yr', 'mo', 'da')] <- cbind(year(as.Date(usgs_data$Date)),
-                                         month(as.Date(usgs_data$Date)),
-                                         day(as.Date(usgs_data$Date)))
-
-#Join all data together. In other words, extract the relevant fields from the
-#three data sources and combine them based on the day, month, and year. We could
-#do this by joining by the dates, but you may notice that the USGS daily data
-#may not come in as POSIX and doesn't have a time associated with it. We could
-#do this via tidyverse verbs, but our team typically prefers SQL as it tends to
-#change a lot less frequently i.e. will require less long-term maintenance
-comp_data <- sqldf(
-  "select a.obs_date, a.precip_in as prism_p_in, 
-  a.yr, a.mo, a.da, a.wk,
-  b.precip_in as daymet_p_in, d.precip_in as nldas2_p_in,
-  c.X_00060_00003 as usgs_cfs
-  from prism_data as a
-  left outer join daymet_data as b 
-  on (
-    a.yr = b.yr
-    and a.mo = b.mo
-    and a.da = b.da
-  )
-  left outer join usgs_data as c 
-  on (
-    a.yr = c.yr
-    and a.mo = c.mo
-    and a.da = c.da
-  )
-  left outer join nldas2_data as d 
-  on (
-    a.yr = d.yr
-    and a.mo = d.mo
-    and a.da = d.da
-  )
-  order by a.yr, a.mo, a.da
-  "
-)
-
-# Index the dataset, giving a unique number to each row. Please note that these
-# are already available via rownames(comp_data) but here we add it as a column
-# to call them in SQLDF. You can see how SQL changes our R workflows. As the
-# project moves forward, feel free to branch out and try other workflows but
-# please note we will often encourage the use of SQL as we assist in tasks. But
-# all code is acceptable if it is documented and works!
-comp_data$dataset_day <- index(comp_data) 
-
-#Add a column for the date since this is now daily data
-comp_data$date <- as.Date(comp_data$obs_date)
-
-#Here, we add in columns to represent tomorrow's flow and the change in flow
-#from today to yesterday and today and tomorrow. This give an indication of how
-#flow is changing. We can do so using R's indexing, simply calling the next rows
-#down where needed
-# comp_data$nextday_usgs_cfs <- c(comp_data$usgs_cfs[2:nrow(comp_data)],NA)
-# comp_data$nextday_d_cfs <- comp_data$nextday_usgs_cfs - comp_data$usgs_cfs
-# comp_data$today_d_cfs <- comp_data$usgs_cfs - c(NA,comp_data$usgs_cfs[1:(nrow(comp_data) - 1)])
-
-#We could alternatively get this via an SQL query that runs just about as
-#quickly! Note the use of the Lag() function and its inputs in the subquery used
-#to pull in yesterdays and todays data
-comp_data <- sqldf(
-  "select b.*,
-  b.nextday_usgs_cfs - b.usgs_cfs as nextday_d_cfs,
-  b.usgs_cfs - b.yesterday_usgs_cfs as today_d_cfs
-  FROM (
-    select 
-    a.*,
-    Lag(a.usgs_cfs,1) OVER(ORDER BY a.dataset_day) as yesterday_usgs_cfs,
-    Lag(a.usgs_cfs,-1) OVER(ORDER BY a.dataset_day) as nextday_usgs_cfs
-    from comp_data as a
-    order by a.dataset_day
-  ) as b
-  "
-)
-
-# Add a cfs Precip column using the following conversions for EACH of the precip
-# data sources:
-# acre-feet/day = 3.07 MGD
-# cfs = 1.57 MGD
-# DAsqmi * 640 ac/sqmi * p_in_per_day / 12.0 = Precip ac-ft/day
-# P acft/day / 3.07 = P mgd
-# P mgd * 1.572 = P cfs
-# 1.572 * (DAsqmi * 640.0 * p_in / 12.0) / 3.07 
-comp_data$prism_p_cfs <- 1.572 * (da * 640.0 * comp_data$prism_p_in / 12.0) / 3.07 
-comp_data$daymet_p_cfs <- 1.572 * (da * 640.0 * comp_data$daymet_p_in / 12.0) / 3.07 
-comp_data$nldas2_p_cfs <- 1.572 * (da * 640.0 * comp_data$nldas2_p_in / 12.0) / 3.07 
-#Can we learn anything based on what stormSep gives us? e.g. the number of
-#storms that occur in a given week, month, day, etc.?
-#First, create a dataset where USGS flow is not NA
-source("C:/Users/gcw73279.COV/Desktop/gitBackups/OWS/HARParchive/HARP-2024-2025/stormSep_USGS.R")
-
-stormCompData <- comp_data[!is.na(comp_data$usgs_cfs),]
-stormOut <- stormSeparate(as.Date(stormCompData$obs_date),
-                          inflow = stormCompData$usgs_cfs,
-                          plt = FALSE,path = paste0(getwd(),"/StormPlots/"),
-                          allMinimaStorms = TRUE,
-                          baselineFlowOption = "Month")
-
-#For each storm, sum precip leading up to it (past 3, 5, 7, and 14 days?). Go
-#through each storm and find the sum of precip of each dataset. Also find past
-#3, 5, 7, and 14 days from storm endpoint including its full duration? What
-#about stream length? What about DA or other NHDPLus factors? Can we get at
-#travel time and or lag? Land use?
-stormStats <- stormOut$Stats
-#Throw out any storms that have inconsistent durations compared to start and end
-#date. This can occur when a gage goes offline as StormSep doesn't check for
-#this
-stormEvents <- stormOut$Storms[(as.Date(stormStats$endDate) - as.Date(stormStats$startDate) + 1) == stormStats$durAll]
-stormStats <- stormStats[(as.Date(stormStats$endDate) - as.Date(stormStats$startDate) + 1) == stormStats$durAll,]
-
-#Get rolling sums of precip over 3 day periods:
-comp_data$roll3PRISM_cfs <- rollapply(comp_data$prism_p_cfs,3,sum,fill = NA,align = "right")
-
-#A function that gets precip from the rollingDur period but will include the
-#full stormDuration
-getRollPrecip <- function(comp_data,stormDuration,
-                          rollingDur,endDate,
-                          precipColName = "prism_p_cfs"){
-  #Convert input date to date, just in case
-  sDate <- as.Date(endDate)
-  #Get the index in comp_date df where the endDate occurs
-  dateIndex <- grep(endDate,comp_data$date)
-  #Get all values from the precipColName in comp_data for the target duration
-  #adjusted for the storm duration. So, if there is a five-day storm,
-  #stormDuration is 5. If we are interested in rolling 7-day precip prior to and
-  #throughout the storm, we'd want rollingDur = 7. So, we need dateIndex - 7 - 5
-  #The precip data is in:
-  precipData <- comp_data[,precipColName]
-  precipStorm <- precipData[(dateIndex - rollingDur - stormDuration + 2) : dateIndex]
-  #Return total precip. Adjust for NAs that may occur due to indexing numbers
-  #prior to start of comp_data
-  totalPrecip <- sum(precipStorm,na.rm = TRUE)
-  return(totalPrecip)
-}
-
-#Add to stormStats the sum of precipitation from the 3-, 7-, and 14-day periods
-#leading up to the storm and including the full storm duration. Convert to MG
-cfsToMGD <- 86400 * 12*12*12/231/1000000
-#Only precip during the storm iteself
-stormStats$roll1DayWStormPRISM_MG <- mapply(SIMPLIFY = TRUE, USE.NAMES = FALSE,
-                                            FUN = getRollPrecip, stormDuration = stormStats$durAll,
-                                            endDate = stormStats$endDate,
-                                            MoreArgs = list(comp_data = comp_data,rollingDur = 1,
-                                                            precipColName = "prism_p_cfs")
-)
-stormStats$roll1DayWStormPRISM_MG <- stormStats$roll1DayWStormPRISM_MG * cfsToMGD
-#Includes 1-day prior to the storm:
-stormStats$roll2DayWStormPRISM_MG <- mapply(SIMPLIFY = TRUE, USE.NAMES = FALSE,
-                                            FUN = getRollPrecip, stormDuration = stormStats$durAll,
-                                            endDate = stormStats$endDate,
-                                            MoreArgs = list(comp_data = comp_data,rollingDur = 2,
-                                                            precipColName = "prism_p_cfs")
-)
-stormStats$roll2DayWStormPRISM_MG <- stormStats$roll2DayWStormPRISM_MG * cfsToMGD
-
-#Plots of total storm volume to rolling 3-, 7-, 14-day periods with storm duration
-plot(stormStats$roll1DayWStormPRISM_MG,stormStats$volumeTotalMG,
-     log = "xy")
-#Plots of total storm volume above base Q to rolling 3-, 7-, 14-day periods with
-#storm duration
-plot(stormStats$roll1DayWStormPRISM_MG,stormStats$volumeAboveBaseQMG)
-
-#Linear models with storm duration
-withStorm1 <- lm(volumeTotalMG ~ roll1DayWStormPRISM_MG,
-                 data = stormStats)
-
-#Linear models with stormDuration above base Q
-withStorm1_aboveBaseQ <- lm(volumeAboveBaseQMG ~ roll1DayWStormPRISM_MG,
-                            data = stormStats)
-withStorm2_aboveBaseQ <- lm(volumeAboveBaseQMG ~ roll2DayWStormPRISM_MG,
-                            data = stormStats)
-
-summary(withStorm1)
-summary(withStorm1_aboveBaseQ)
-summary(withStorm2_aboveBaseQ)
-
-#The following showed a strong correlation. What if we look at power regression
-#to smooth over small and large differences e.g. take power away from larger
-#storms? This also removes storms in which no precip was found in the target
-#duration. This may skew results and should be reviewed carefully due to the
-#bias of picking only data where this method "works"
-withStorm1_aboveBaseQ_log <- lm(log(volumeAboveBaseQMG) ~ log(roll1DayWStormPRISM_MG),
-                                data = stormStats[stormStats$roll1DayWStormPRISM_MG > 0,])
-#The log power regression in this case actually performs worse:
-summary(withStorm1_aboveBaseQ_log)
-
-#However, are we weighted by large storm volumes? It appears so, with one value
-#even falling outside of Cooks Distance
-plot(withStorm1_aboveBaseQ)
-
-#Does relationsip improve at all when we look only at specific months. Add month
-#based on the startDate of the storm
-stormStats$beginMonth <- as.numeric(format(as.Date(stormStats$startDate),"%m"))
-
-#What are the monthly relationships?
-monthEventOut1Day <- mon_lm(stormStats, y_var = "volumeAboveBaseQMG",
-                            x_var = "roll1DayWStormPRISM_MG",
-                            mo_var = "beginMonth", "Storm Event Vol")
-monthEventOut2Day <- mon_lm(stormStats, y_var = "volumeAboveBaseQMG",
-                            x_var = "roll2DayWStormPRISM_MG",
-                            mo_var = "beginMonth", "Storm Event Vol")
-
-#Data counts?
-table(stormStats$beginMonth)
-sum(stormStats$beginMonth == 10)
-testReg <- lm(log(volumeAboveBaseQMG) ~ log(roll1DayWStormPRISM_MG),
-              data = stormStats[stormStats$beginMonth==10,])
-summary(testReg)
-
-#There may be some heavy influence from high storm events. May want to consider
-#power regressions or non-linear exponential regressions to reduce their
-#influence or evens a Cooks Distance analysis to remove errant data points
-
-#For each storm in stormOut$Storms, use the storm start and end-date to find the
-#7-day period leading up to the storm and the 7-day period following the storm.
-#Show precip during this period and throughout storm duration. Then, highlight
-#the separated storm hydrograph
-for(i in 1:length(stormEvents)){
-  print(i)
-  #Get the current storm flow data which will be used for highlighting that
-  #hydrograph
-  stormi <- stormEvents[[1]]
-  #Only need non-NA values since stormSep will output full timeseries but leave
-  #values as NA if they are not included in storm
-  stormi <- stormi[!is.na(stormi$flow),]
-  
-  #Get the start and end date of the storm from stormStats:
-  stormStart <- as.Date(stormStats$startDate[i])
-  stormEnd <- as.Date(stormStats$endDate[i])
-  #Adjust these values to increase the window:
-  plotStart <- stormStart - 7
-  plotEnd <- stormEnd + 7
-  
-  #Get the stream flow and baseflow from stormSep
-  flowData <- stormOut$flowData[stormOut$flowData$timestamp >= plotStart & 
-                                  stormOut$flowData$timestamp <= plotEnd,]
-  #Join in the precip "flow" from comp_data:
-  flowDataAll <- sqldf("SELECT flowData.*,
-                          comp.prism_p_in,
-                          comp.daymet_p_in,
-                          comp.nldas2_p_in,
-                          comp.prism_p_cfs,
-                          comp.daymet_p_cfs,
-                          comp.nldas2_p_cfs
-                        FROM flowData
-                        LEFT JOIN comp_data as comp
-                          ON flowData.timeStamp = comp.Date")
-  pathOut <- paste0("StormPlotsNew/stormPlot",i,".PNG")
-  plotStorm(pathOut,stormi,flowDataAll)
-}
diff --git a/HARP-2024-2025/caching function.R b/HARP-2024-2025/caching function.R
deleted file mode 100644
index 36a3b20b..00000000
--- a/HARP-2024-2025/caching function.R	
+++ /dev/null
@@ -1,33 +0,0 @@
-# set up a caching function for NWIS
-Sys.setenv(USGS_cache_dir = "/media/model/usgs")
-
-hydro_tools <- 'https://raw.githubusercontent.com/HARPgroup/hydro-tools/master'
-cbp6_location <- 'https://raw.githubusercontent.com/HARPgroup/cbp6/master'
-hydro_tools_location <- hydro_tools
-om_location <- "https://raw.githubusercontent.com/HARPgroup/om/master"
-elfgen_location <- "https://raw.githubusercontent.com/HARPgroup/elfgen/master"
-openmi_om_location <- "https://raw.githubusercontent.com/HARPgroup/openmi-om/master"
-vahydro_location <- "https://github.com/HARPgroup/vahydro/blob/master"
-github_location <- "C:/usr/local/home/git"
-HARParchive_location <- paste0(github_location, "/HARParchive")
-# foundation_location <- "C:\\Users\\faw18626\\OneDrive - Commonwealth of Virginia"
-foundation_location <- paste0(Sys.getenv("USERPROFILE"),"\\OneDrive - Commonwealth of Virginia\\OWS\\foundation_datasets\\wsp\\wsp2020")
-onedrive_location <- paste0(Sys.getenv("USERPROFILE"),"\\OneDrive - Commonwealth of Virginia")
-
-folder <- '/Workspace/tmp/'
-export_path <- '/Workspace/tmp/'
-base_url <- 'http://deq1.bse.vt.edu:81/d.dh' #needed for REST functions
-save_url <- "http://deq1.bse.vt.edu:81/data/proj3/out";
-noaa_api_key <- 'OdnQxMwsNwbNTPiFJRygEwxflkOIrxhh'
-options(noaakey = noaa_api_key)
-
-#### Old Stuff No Longer Needed?
-vahydro_directory <- "/usr/local/home/git/r-dh-ecohydro/Analysis/fn_vahydro-2.0"
-auth_directory <- "/usr/local/home/git/r-dh-ecohydro/ELFGEN/internal"
-file_directory <- "/Workspace/tmp"
-save_directory <- "/Workspace/tmp"
-fxn_locations <- "/usr/local/home/git/r-dh-ecohydro/ELFGEN/internal/"
-habitat_files <- "/usr/local/home/git/r-dh-ecohydro/Analysis/Habitat/"
-fxn_vahydro <- "/usr/local/home/git/hydro-tools"
-base_directory <- "/usr/local/home/git/r-dh-ecohydro"
-
diff --git a/HARP-2024-2025/dni_data_experiments.R b/HARP-2024-2025/dni_data_experiments.R
deleted file mode 100644
index f461450f..00000000
--- a/HARP-2024-2025/dni_data_experiments.R
+++ /dev/null
@@ -1,9 +0,0 @@
-
-# How to use SQL in R in a more flexible way than sqldf will permit
-# https://dbi.r-dbi.org/reference/dbAppendTable.html
-con <- dbConnect(RSQLite::SQLite(), ":memory:")
-dbCreateTable(con, "dh_timeseries", ds$tsvalues)
-dbWriteTable(con, "dh_timeseries", ds$tsvalues, overwrite = TRUE)
-dbExecute(con, "insert into dh_timeseries(tid, tsvalue, featureid) values (2,4.5,10)")
-dbExecute(con, "select * from dh_timeseries")
-dbReadTable(con, "dh_timeseries")
diff --git a/HARP-2024-2025/hydroimport_daily.R b/HARP-2024-2025/hydroimport_daily.R
deleted file mode 100644
index f626c3a5..00000000
--- a/HARP-2024-2025/hydroimport_daily.R
+++ /dev/null
@@ -1,49 +0,0 @@
-# Inputs (args):
-# 1 = File path of csv from VA Hydro
-# 2 = Data source "nldas2, daymet, prism"
-# 3 = End path of new csv
-# Outputs:
-# Csv file with manipulated data at end filepath
-
-# Library necessary packages
-print("Accessing necessary libraries")
-suppressPackageStartupMessages(library(lubridate))
-suppressPackageStartupMessages(library(sqldf))
-
-# Set up command args
-print("Reading command args")
-args <- commandArgs(trailingOnly = TRUE)
-if (length(args) != 3){
-  message("Missing or extra inputs. Usage: Rscript hydroimport_daily.R input_path data_source output_path")
-  q()
-}
-
-input_path <- args[1]
-data_source <- args[2]
-output_path <- args[3]
-
-# Pull csv from input file path
-print("Reading csv")
-hydro_daily <- read.csv(input_path)
-# Add in more date information
-print("Adding date information")
-hydro_daily[,c('yr', 'mo', 'da', 'wk')] <- cbind(year(as.Date(hydro_daily$obs_date)),
-                                                 month(as.Date(hydro_daily$obs_date)),
-                                                 day(as.Date(hydro_daily$obs_date)),
-                                                 week(as.Date(hydro_daily$obs_date)))
-
-# If data comes from nladas2 (hourly), it must be converted into daily data
-print("Checking for nldas2")
-if (data_source=="nldas2"){
-  hydro_daily <- sqldf(
-    "select featureid, min(obs_date) as obs_date, yr, mo, da, 
-     sum(precip_mm) as precip_mm, sum(precip_in) as precip_in
-   from hydro_daily 
-   group by yr, mo, da
-   order by yr, mo, da
-  "
-  )}
-
-# Write csv in new file path
-print("Writing csv")
-write.csv(hydro_daily,file = output_path)
\ No newline at end of file
diff --git a/HARP-2024-2025/make_weekly_summary_ts.R b/HARP-2024-2025/make_weekly_summary_ts.R
deleted file mode 100644
index 56930201..00000000
--- a/HARP-2024-2025/make_weekly_summary_ts.R
+++ /dev/null
@@ -1,61 +0,0 @@
-# Inputs (args): 
-# 1 = comp data csv filepath
-# 2 = Output csv path
-# 3 = data column
-
-# Libraries
-suppressPackageStartupMessages(library(lubridate))
-suppressPackageStartupMessages(library(sqldf))
-
-
-args <- commandArgs(trailingOnly = T)
-
-if (length(args) != 3){
-  message("Missing or extra inputs. Usage: Rscript make_weekly_summary_ts.R comp_data_filepath output_filepath data_column ")
-}
-# Missing from github issue:[weekly_column(default=weekly_mean_value)]
-print("Assigning arguments")
-comp_data_filepath <- args[1]
-output_filepath <- args[2]
-data_column <- args[3]
-
-print("Reading in comp data csv")
-comp_data <- read.csv(comp_data_filepath)
-  
-# Check for necessary data (Comp data doesnt have start and end date?)
-print("Checking columns")
-#if ("start_date" %in% colnames(comp_data)){}else{
- # message("start_date column missing")
- # q()
-#}
-
-#if ("end_date" %in% colnames(comp_data)){}else{
-  #message("end_date column missing")
-  #q()
-#}
-
-if (data_column %in% colnames(comp_data)){}else{
-  message(data_column, "column missing")
-  q()
-}
-
-
-#converts our daily data into weekly
-print("Converting to weekly data")
-  week_data <- sqldf(
-    "select min(obs_date) as week_begin, yr, wk, min(dataset_day) as dataset_day_begin,
-     avg(dataset_p_in) as dataset_p_in, avg(dataset_cfs) as dataset_cfs,
-     avg(usgs_cfs) as usgs_cfs, avg(today_d_cfs) as today_d_cfs, 
-     avg(nextday_d_cfs) as nextday_d_cfs
-   from comp_data
-   group by yr, wk
-   order by yr, wk
-  "
-  )
-  #Base the month column off of the min day oindex for that week, or the day that
-  #week began
-  week_data$mo <- month(week_data$week_begin)
-  
-  
-  write.csv(week_data,output_filepath)
-  
diff --git a/HARP-2024-2025/retrieve_vahydro_metrics.R b/HARP-2024-2025/retrieve_vahydro_metrics.R
deleted file mode 100644
index 2c4b9ecf..00000000
--- a/HARP-2024-2025/retrieve_vahydro_metrics.R
+++ /dev/null
@@ -1,12 +0,0 @@
-# GET VAHydro 1.0 RIVERSEG l90_Qout DATA
-mdf <- data.frame(
-  'model_version' = c('cbp-6.1'),
-  'runid' = c('stormVol_prism'),
-  'metric' = c('precip_annual_max_in'),
-  'runlabel' = c('precip_annual_max_in')
-)
-met_data <- om_vahydro_metric_grid(
-  metric = metric, runids = mdf, bundle="watershed", ftype="usgs_full_drainage",
-  base_url = paste(site,'entity-model-prop-level-export',sep="/"),
-  ds = ds
-)
diff --git a/HARP-2024-2025/stormSep_USGS.R b/HARP-2024-2025/stormSep_USGS.R
deleted file mode 100644
index 54261215..00000000
--- a/HARP-2024-2025/stormSep_USGS.R
+++ /dev/null
@@ -1,504 +0,0 @@
-# flowData <- dataRetrieval::readNWISdv("01613900",parameterCd = "00060")
-# inflow <- flowData$X_00060_00003
-# timeIn <- as.Date(flowData$Date)
-# mindex <- which(timeIn == "2019-01-01")
-# maxdex <- which(timeIn == "2021-12-31")
-# seqdex <- seq(mindex,maxdex)
-# inflow <- inflow[seqdex]
-# timeIn <- timeIn[seqdex]
-
-
-#Input variables are:
-# timeIn = Timestamps associated with streamflow data as vector
-# inflow = Streamflow daily data as vector in cfs
-# plt = Boolean to determine if plots are necessary
-# path = Directory to store plots in. Used if plt is TRUE
-# allMinimaStorms = Considers ALL local minima as potnetial storm endpoints.
-# Will distinguish between peaks in the case of a multi-day storm hydrograph if
-# set to TRUE. Otherwise, a storm is only considered as two subseuqnet local
-# minima that fall below baseline flow
-# baselineFlowOption = One of c("Water Year","Month","Calendar Year","Climate Year","All").
-# Defaults to "All". Determines how program will develop baseline flow. Uses
-# horizontal line regression of baseflow and breaks it down based on all time,
-# water year, calendar year, or month based on user selection
-stormSeparate <- function(timeIn, inflow, 
-                          plt = F,
-                          path = paste0(getwd(),"/"),
-                          allMinimaStorms = FALSE,
-                          baselineFlowOption = "All"
-){
-  #Call packages required by function if not called already:
-  require(grwat)
-  require(zoo)
-  require(sqldf)
-  
-  #First, get baseflow associated with inflow. Use defaults of grwat for now
-  #which involves three passes and the Lyne-Hollick (1979) hydrograph separation
-  #method with coefficient a as 0.925
-  baseQ <- grwat::gr_baseflow(inflow)
-  
-  #Add timestamp to the baseflow separation for convenience by creating a
-  #dataframe
-  baseQ <- data.frame(timestamp = timeIn, Year = as.numeric(format(timeIn,"%Y")),
-                      baseQ = baseQ,flow = inflow)
-  
-  #Find mins/maxes of three consecutive points such that the extreme is in the
-  #middle. These represent potential local mins and maxes
-  maxes <- rollapply(baseQ$flow,3,function(x) which.max(x) == 2,# & (x[2]!=x[1] & x[2]!= x[3]),
-                     align = "left")
-  mins <- rollapply(baseQ$flow,3,function(x) which.min(x) == 2,# & (x[2]!=x[1] & x[2]!= x[3]),
-                    align = "left")
-  #Create data frames of these local minima/maxima with the corresponding
-  #timestamp from the original data frame. Note that the rolling looks at the
-  #first three data points. If the first triplet has a local max as its second
-  #value, the first value in maxes will be TRUE.The timestamp to be associated
-  #with this should be that of the middle data point e.g. the second timestamp
-  #of the triplet!
-  mins <- data.frame(timestamp = baseQ$timestamp[c(FALSE,mins,FALSE)],
-                     mins = baseQ$flow[c(FALSE,mins,FALSE)])
-  maxes <- data.frame(timestamp = baseQ$timestamp[c(FALSE,maxes,FALSE)],
-                      maxes = baseQ$flow[c(FALSE,maxes,FALSE)])
-  
-  #Need to find mins/maxes below baseline flow. But first baseline flow must be
-  #defined. hreg() will try to fit mulitple horizontal lines through subset of
-  #data involving every point below that line until best fit is found. This
-  #becomes baseline flow, brk
-  #Find the break associated with this run and buffer by 10%. Based the hreg on
-  #the timescale selected by the user
-  # baselineFlowOption = One of c("Water Year","Month","Calendar Year","All")
-  if(baselineFlowOption == "All"){
-    #Use the entire baseflow dataset to get baseline flow, brk
-    brk <- hreg(baseQ$baseQ,limit = 1)
-    brk <- brk * 1.1
-    #Make as long as timeseries
-    brk <- rep(brk,nrow(baseQ))
-  }else if(baselineFlowOption %in% c("Calendar Year", "Climate Year","Water Year")){
-    #Based on the user option, determine the start month of the year
-    #designation. Water Years run from October - September, Climate years from
-    #April - March, and Calendar Years from January - December
-    if(baselineFlowOption == "Water Year"){
-      WYS <- "10-01"
-    }else if(baselineFlowOption == "Climate Year"){
-      WYS <- "04-01"
-    }else{
-      WYS <- "01-01"
-    }
-    #Make dates of the input timesteps:
-    dataDates <- as.Date(timeIn)
-    #Create an emtpty vector for the new annual designations
-    dataWY <- NA
-    
-    #Identify months in the next year that are classified as the previous year's
-    #water/climate year e.g. January 2021 is in Water Year 2020 that begins in
-    #October. Get the month using gsub to get only the characters prior to first
-    #dash
-    WYM <- as.numeric(gsub("-.*","",WYS))
-    if(WYM > 1){
-      WYP <- seq(1,(WYM - 1))
-    }else{
-      WYP <- 0
-    }
-    
-    #Add leading zeros to month number January - September
-    WYP[nchar(WYP) == 1]<-paste0("0",WYP[nchar(WYP) == 1])
-    #Combine to form a regex pattern that searches for any of the months at the
-    #END of a string ($)
-    WYP <- paste0(WYP,"$",collapse = "|")
-    #Get the year and month associated with the dates
-    WY <- format(dataDates,"%Y-%m")
-    
-    #Initialize water year by making it the calendar year + 1
-    dataWY <- as.numeric(gsub("-.*","",WY)) + 1
-    
-    #Search for all months from January to the start of the water year. These
-    #months need to be assigned previous water year number (current calendar
-    #year)
-    dataWY[grepl(WYP,WY)] <- dataWY[grepl(WYP,WY)] - 1
-    
-    #Exception case occurs when water year is calendar year, in which case water
-    #year ends in calendar year such that calendar year = water year
-    if(WYS == "01-01"){
-      dataWY <- as.numeric(format(dataDates,"%Y"))
-    }
-    #Store the years in the appropriate column in baseQ data frame
-    baseQ$Year <- dataWY
-    #Create an empty vector the length of the baseQ data frame
-    brk <- numeric(nrow(baseQ))
-    #For each unique water year, calendar year, etc., run the hreg and store in
-    #brk
-    for (i in unique(dataWY)){
-      baseQsubset <- baseQ[baseQ$Year == i,]
-      #Use the subset of the baseflow dataset to get baseline flow, brk
-      brki <- hreg(baseQsubset$baseQ,limit = 1)
-      brki <- brki * 1.1
-      #Store results
-      brk[baseQ$Year == i] <- brki
-    }
-  }else if(baselineFlowOption == "Month"){
-    #Make dates of the input timesteps:
-    dataDates <- as.Date(timeIn)
-    
-    #Create a vector of the months and years, essentially getting a vector of
-    #all months in the timeIn vector
-    monthYears <- format(dataDates,"%m-%Y")
-    
-    #Create an empty vector the length of the baseQ data frame
-    brk <- numeric(nrow(baseQ))
-    for (i in unique(monthYears)){
-      baseQsubset <- baseQ[monthYears == i,]
-      #Use the subset of the baseflow dataset to get baseline flow, brk
-      brki <- hreg(baseQsubset$baseQ,limit = 1)
-      brki <- brki * 1.1
-      #Store results
-      brk[monthYears == i] <- brki
-    }
-  }
-  #Add baseline flow to baseQ
-  baseQ$baselineQ <- brk
-  
-  #Next step is to isolate storms. This can be accomplished by taking a minimum
-  #and the next point to fall below baseline flow, brk. Each storm is checked to
-  #ensure a maximum above some reference level, here 1.5*brk. This eliminates
-  #small storms with little or no peak or rises in baseflow. Only minimums below
-  #the brk value are considered as these are storms that occur at baseflow and
-  #fully rise/recede.
-  
-  #Get the times associated with minimums that are below baseline flow brk.
-  #First, join in the baseline flow for the timeperiod:
-  mins <- sqldf("SELECT mins.*, baseQ.baselineQ
-        FROM mins 
-        LEFT JOIN baseQ
-        ON mins.timestamp = baseQ.timestamp")
-  
-  if(allMinimaStorms){
-    #Use all minima as potential storm start and stop points
-    x <- mins$timestamp
-    #Get the corresponding local maximums
-    y <- maxes$maxes
-  }else{
-    #Get only the minima timestamps that lie below baseline flow
-    x <- mins$timestamp[mins$mins < mins$baselineQ]
-    #Get the corresponding local maximums
-    y <- maxes$maxes[mins$mins < mins$baselineQ]
-  }
-  
-  #A data frame to build with storms in each column
-  stormsep <- list()
-  #An ID column to store the storm number in baseQ
-  baseQ$stormID <- NA
-  
-  #Start evaluating each set of minima to evaluate if there is a qualifying
-  #storm event. If there is, store it with all relevant data
-  for (i in 1:(length(x) - 1)){
-    # if(i==73){browser()}
-    endIndex <- 1
-    #Initial guess at storm endpoints e.g. two local minimums
-    storm <- c(x[i], x[i + endIndex])
-    #initial stormflows and the times associated with those flows:
-    stormflow <- inflow[timeIn >= storm[1] & timeIn <= storm[2]]
-    
-    #When using allMinimaStorms, some minima may be combined. Skip loops that
-    #feature combined storms. So, if at frist storm and nextStorm are January
-    #1st - Jan 10 and Jan 10 - Jan 12, but these are combined into one (Jan 1 -
-    #Jan 12), then we can skip the loop of Jan 10 - Jan 12. First, make sure
-    #there is a storm in stormsep and that this is not the first loop i.e. i !=
-    #1
-    if(i > 1 && length(stormsep) > 0){
-      #Get the previous storm
-      prevStorm <- stormsep[[length(stormsep)]]
-      #Remove the NAs so we only have the timestamps associated with the current
-      #storm
-      prevStorm <- prevStorm[!is.na(prevStorm$flow),]
-      #If the end point of the storm is in the previous storm, skip this
-      #iteration. This storm has already been accounted for.
-      if(storm[2] %in% prevStorm$timestamp){
-        #Skip to next iteration of for loop:
-        next
-      }
-    }
-    
-    #If the second minimum flow is practically equal to the next local maximum,
-    #combine with the next storm as they are likely of the same storm (of
-    #course, this assumes that the timestamps match up which they should in the
-    #current set-up regardless of allMinimaStorms). But only do this if
-    #allMinimaStorms is TRUE since otherwise baselineflow is the cut-off.
-    if(allMinimaStorms & !is.na(x[i + 1 + endIndex])){
-      #Initial guess at storm endpoints e.g. two local minimums
-      nextStorm <- c(x[i + 1], x[i + 1 + endIndex])
-      #initial stormflows and the times associated with those flows:
-      nextStormflow <- inflow[timeIn >= nextStorm[1] & timeIn <= nextStorm[2]]
-      #What is the maximum of this storm event?
-      nextMaxStormFlow <- max(nextStormflow)
-      
-      while(!is.na(nextMaxStormFlow) & 
-            stormflow[length(stormflow)] >= 0.8 * nextMaxStormFlow){
-        endIndex <- endIndex + 1
-        #Initial guess at storm endpoints e.g. two local minimums
-        storm <- c(x[i], x[i + endIndex])
-        #initial stormflows and the times associated with those flows:
-        stormflow <- inflow[timeIn >= x[i] & timeIn <= x[i + endIndex]]
-        
-        if(!is.na(x[i + 1 + endIndex])){
-          #Initial guess at storm endpoints e.g. two local minimums
-          nextStorm <- c(x[i + endIndex], x[i + 1 + endIndex])
-          #initial stormflows and the times associated with those flows:
-          nextStormflow <- inflow[timeIn >= nextStorm[1] & timeIn <= nextStorm[2]]
-          #What is the maximum of this storm event?
-          nextMaxStormFlow <- max(nextStormflow)
-        }else{
-          nextMaxStormFlow <- NA
-        }
-      }
-    }
-    
-    #Get the times associated with the storm
-    stormtimes <- timeIn[timeIn >= x[i] & timeIn <= x[i + endIndex]]
-    
-    #When does the maximum flow associated with this storm occur?
-    maxtime <- stormtimes[stormflow == max(stormflow)][1]
-    
-    #If there is a point at baseflow before next minimum, use it instead to
-    #prevent over elongated tails. We just need to ensure it takes place before
-    #the next minima, is over brk, and occurs after maxtime
-    endAlt <- (baseQ$timestamp[baseQ$flow < baseQ$baselineQ & 
-                                 baseQ$timestamp > x[i] & 
-                                 baseQ$timestamp < x[i + endIndex] & 
-                                 baseQ$timestamp > maxtime])[1]
-    #If an alternate endpoint for the storm was found, redefine the storm:
-    if (!is.na(endAlt)){
-      storm <- c(x[i],endAlt)
-      stormflow <- inflow[timeIn >= x[i] & timeIn <= endAlt]
-      stormtimes <- timeIn[timeIn >= x[i] & timeIn <= endAlt]
-    }
-    #data frame of storm data
-    stormdat <- data.frame(
-      timestamp = stormtimes,
-      baseQ = baseQ$baseQ[baseQ$timestamp >= storm[1] & baseQ$timestamp <= storm[2]],
-      flow = stormflow,
-      baselineQ = baseQ$baselineQ[baseQ$timestamp >= storm[1] & baseQ$timestamp <= storm[2]]
-    )
-    
-    #data frame of whole time series
-    store <- data.frame(timestamp = baseQ$timestamp, flow = NA,baseflow = NA)
-    #Fills in only flow data during storm, leaving rest as NA
-    store$flow[store$timestamp >= storm[1] & 
-                 store$timestamp <= storm[2]] <- stormdat$flow
-    store$baseflow[store$timestamp >= storm[1] & 
-                     store$timestamp <= storm[2]] <- stormdat$baseQ
-    store$baselineflow[store$timestamp >= storm[1] & 
-                     store$timestamp <= storm[2]] <- stormdat$baselineQ
-    
-    #If maximum exceeds limit, add it to the stormsep list:
-    if(any(store$flow > (2.0 * store$baselineflow),na.rm = TRUE)){
-      #Set the storm number in baseQ. In case of overlapping end/start points,
-      #store both IDs.
-      startID <- baseQ$stormID[baseQ$timestamp == storm[1]]
-      if(!is.na(startID)){
-        #If the stormID in baseQ is already entered for the start date, it
-        #overlaps with a previous storm since both use that local minima. Store
-        #both IDs.
-        baseQ$stormID[baseQ$timestamp == storm[1]] <- paste0(startID,",",length(stormsep) + 1)
-        baseQ$stormID[baseQ$timestamp > storm[1] & baseQ$timestamp <= storm[2]] <- length(stormsep) + 1
-      }else{
-        #If there is no overlap, enter the storm ID for all storm timestamps
-        baseQ$stormID[baseQ$timestamp >= storm[1] & baseQ$timestamp <= storm[2]] <- length(stormsep) + 1
-      }
-      
-      stormsep[[length(stormsep) + 1]] <- store
-    }
-  }
-  
-  #Now plot each storm and fit exponential curves to rising and falling limbs
-  #Store coefficients and statistics for each curve into a data frame, looking
-  #at shape of curve and the adjusted R square values. Store each storm as a PNG
-  #graph in the designated area. Need to prevent errors from zero flow. Added
-  #0.0001 to all flow values. This WILL RESULT IN BIAS
-  ext <- ".png"
-  #Empty data frame to store statistics
-  transients <- data.frame(startDate=character(length(stormsep)),
-                           endDate = NA, maxDate = NA,
-                           rising = NA, RsqR = NA, falling = NA, RsqF = NA,
-                           durAll = NA,durF = NA,durR = NA,
-                           volumeTotalMG = NA,
-                           volumeAboveBaseQMG = NA,
-                           volumeAboveBaselineQMG = NA,
-                           volumeTotalMG_rise = NA,
-                           volumeAboveBaseQMG_rise = NA,
-                           volumeAboveBaselineQMG_rise = NA,
-                           volumeTotalMG_fall = NA,
-                           volumeAboveBaseQMG_fall = NA,
-                           volumeAboveBaselineQMG_fall = NA)
-  for (i in 1:length(stormsep)){
-    #Find the storm of interest
-    storm <- stormsep[[i]]
-    #remove nas:
-    storm <- storm[!is.na(storm[,2]),]
-    #Look for where the max is
-    maxtime <- storm[storm[,2] == max(storm[,2]),1][1]
-    
-    #Store the start and end time of the storm
-    transients$startDate[i] <- format(storm$timestamp[1],"%Y-%m-%d")
-    transients$endDate[i] <- format(storm$timestamp[nrow(storm)],"%Y-%m-%d")
-    transients$maxDate[i] <- format(storm[storm[,2] == max(storm[,2]),1][1],"%Y-%m-%d")
-    
-    #Separate rising and falling limbs based on maxtime e.g. the rising limb is
-    #all values leading up to maxtime
-    rising <- storm[storm[,1] <= maxtime,]
-    falling <- storm[storm[,1] >= maxtime,]
-    
-    #What is the volume of the storm streamflow i.e. total volume? First, get
-    #the difference in timestamps throughout the storm from one time to the
-    #next:
-    timeDiff <- difftime(storm$timestamp[2:nrow(storm)], storm$timestamp[1:(nrow(storm) - 1)],
-                         units = "secs")
-    timeDiff <- as.numeric(timeDiff)
-    #Repeat for rising and falling limbs only:
-    #Rising:
-    timeDiff_rise <- difftime(rising$timestamp[2:nrow(rising)], rising$timestamp[1:(nrow(rising) - 1)],
-                         units = "secs")
-    timeDiff_rise <- as.numeric(timeDiff_rise)
-    #Falling:
-    timeDiff_fall <- difftime(falling$timestamp[2:nrow(falling)], falling$timestamp[1:(nrow(falling) - 1)],
-                         units = "secs")
-    timeDiff_fall <- as.numeric(timeDiff_fall)
-    #Using a trapezoidal approach, get the area of each trapezoid to estimate
-    #volume of the storm. Use three approaches. Total storm volume, volume above
-    #baseflow, volume above baseline flow
-    #Total storm flow:
-    trapzArea <- function(timeDiff,stormFlow){
-      out <- timeDiff * ((stormFlow[1:(length(stormFlow) - 1)] + stormFlow[2:length(stormFlow)]) / 2)
-    }
-    trapz_total <- trapzArea(timeDiff,storm$flow)
-    #Only flow above baseflow:
-    trapz_abovebaseQ <- trapzArea(timeDiff,(storm$flow - storm$baseflow))
-    #Only flow above baseline flow. THIS CAN BE NEGATIVE AND THEREFORE
-    #UNRELIABLE?:
-    trapz_abovebaselineQ <- trapzArea(timeDiff,(storm$flow - storm$baselineflow))
-    
-    #Rising/falling storm flow:
-    trapz_total_rise <- trapzArea(timeDiff_rise,rising$flow)
-    trapz_total_fall <- trapzArea(timeDiff_fall,falling$flow)
-    #Only flow above baseflow:
-    trapz_abovebaseQ_rise <- trapzArea(timeDiff_rise,(rising$flow - rising$baseflow))
-    trapz_abovebaseQ_fall <- trapzArea(timeDiff_fall,(falling$flow - falling$baseflow))
-    #Only flow above baseline flow. THIS CAN BE NEGATIVE AND THEREFORE
-    #UNRELIABLE?:
-    trapz_abovebaselineQ_rise <- trapzArea(timeDiff_rise,(rising$flow - rising$baselineflow))
-    trapz_abovebaselineQ_fall <- trapzArea(timeDiff_fall,(falling$flow - falling$baselineflow))
-    
-    #Total volume is the sum of area of the trapezoids found above converted to
-    #MG from CF (1 CF * 12^3 in^3/cf * 231 gal/in^3 * 1 MG/1000000 gal):
-    volume_total <- sum(trapz_total) * 12 * 12 * 12 / 231 / 1000000
-    volume_abovebaseQ <- sum(trapz_abovebaseQ) * 12 * 12 * 12 / 231 / 1000000
-    volume_abovebaselineQ <- sum(trapz_abovebaselineQ) * 12 * 12 * 12 / 231 / 1000000
-    #Rising Limb
-    volume_total_rise <- sum(trapz_total_rise) * 12 * 12 * 12 / 231 / 1000000
-    volume_abovebaseQ_rise <- sum(trapz_abovebaseQ_rise) * 12 * 12 * 12 / 231 / 1000000
-    volume_abovebaselineQ_rise <- sum(trapz_abovebaselineQ_rise) * 12 * 12 * 12 / 231 / 1000000
-    #Falling limb:
-    volume_total_fall <- sum(trapz_total_fall) * 12 * 12 * 12 / 231 / 1000000
-    volume_abovebaseQ_fall <- sum(trapz_abovebaseQ_fall) * 12 * 12 * 12 / 231 / 1000000
-    volume_abovebaselineQ_fall <- sum(trapz_abovebaselineQ_fall) * 12 * 12 * 12 / 231 / 1000000
-    #Store results:
-    transients$volumeTotalMG[i] <- volume_total
-    transients$volumeAboveBaseQMG[i] <- volume_abovebaseQ
-    transients$volumeAboveBaselineQMG[i] <- volume_abovebaselineQ
-    transients$volumeTotalMG_rise[i] <- volume_total_rise
-    transients$volumeAboveBaseQMG_rise[i] <- volume_abovebaseQ_rise
-    transients$volumeAboveBaselineQMG_rise[i] <- volume_abovebaselineQ_rise
-    transients$volumeTotalMG_fall[i] <- volume_total_fall
-    transients$volumeAboveBaseQMG_fall[i] <- volume_abovebaseQ_fall
-    transients$volumeAboveBaselineQMG_fall[i] <- volume_abovebaselineQ_fall
-    
-    #Create an exponential regression for the rising limb to roughly fit the
-    #rising limb based on an "ideal" hydrograph
-    modelR <- lm(log(rising[,2] + 0.0001) ~ seq(1,length(rising[,1])))
-    #Store exponential coefficient and adjusted r squared values
-    transients$rising[i] <- summary(modelR)$coefficients[2]
-    transients$RsqR[i] <- summary(modelR)$adj.r.squared
-    
-    #Create an exponential regression for the falling limb
-    modelF <- lm(log(falling[,2] + 0.0001) ~ seq(1,length(falling[,1])))
-    transients$falling[i] <- summary(modelF)$coefficients[2]
-    transients$RsqF[i] <- summary(modelF)$adj.r.squared
-    
-    #Finds duration of the storm, rising and falling limbs combined
-    transients$durAll[i] <- length(storm$timestamp)
-    #Finds duration of the rising limb
-    transients$durF[i] <- length(rising$timestamp)
-    #Finds duration of the falling limb
-    transients$durR[i] <- length(falling$timestamp)
-    
-    #Plot the storm and the fitted rising and falling limbs and store them in
-    #designated path. Include the baseflow and and the baseline flow brk
-    if(plt == T){
-      #Set plot output path and dimensions
-      png(paste0(path,"storm",i,ext), width=1820,height=760)
-      #Set plot margines
-      par(mar=c(5,6,2,4))
-      #Plot the storm, making the labels a little thicker and the lines of the
-      #plot and labeling the axes
-      plot(storm[,1], storm[,2], type='l',
-           xlab='Date', ylab='Flow (cfs)',
-           lwd=2, cex.axis=2, cex.lab=2)
-      #Plot the fitted rising limb:
-      lines(storm[storm[,1] <= maxtime,1],
-            exp(fitted(modelR)),
-            col = 'darkblue',lwd = 2)
-      #Plot the fitted falling limb:
-      lines(storm[storm[,1] >= maxtime,1],
-            exp(fitted(modelF)),
-            col = 'darkred', lwd = 2)
-      #Plot the baseflow
-      lines(storm[,1], storm[,3],
-            col = "darkgreen", lwd = 2)
-      #Plot the baseline flow brk as a dashed line via lty = 3
-      lines(storm[,1], storm[,4],lty = 3,lwd = 2)
-      #Put a small legend on the plot
-      legend("topleft",c("Flow","Baseflow","Rise Limb","Fall Limb","Baseline"),
-             col = c("black","darkgreen","darkblue","darkred","black"),
-             lty = c(1,1,1,1,3),
-             bty = "n")
-      #Close the plot PNG and output the file
-      dev.off()
-    }
-  }
-  #rm(maxtime,storm,rising,falling,modelR,modelF,i,ext,fxn_locations)
-  out <- list(Storms = stormsep,Stats = transients,
-              flowData = baseQ)
-  return(out)
-}
-
-#Below function hreg (horizontal regression) will try to fit mulitple
-#horizontal lines through subset of data involving every point below that line
-#until best fit is found. This becomes baseline flow, brk
-hreg <- function(x, limit = 1){
-  #What is the numeric percentile of x that is of percent limit?
-  lim <- as.numeric(quantile(x,limit))
-  #Give all of x below the percentile of limit:
-  x <- x[x <= lim]
-  #Get all percentiles of x from 0 to 100% by 0.1%:
-  lns <- as.numeric(quantile(x, seq(0,1,0.001)))
-  #Keep only values above 0
-  lns <- lns[lns != 0]
-  #A vector for mean square error
-  mse <- numeric(length(lns))
-  #For each values in lns, determine the mean square error if this is a
-  #horizontal regression of the data in x
-  for (i in 1:length(lns)){
-    line <- lns[i]
-    mse[i] <- mean((x - line)^2)
-  }
-  #Return the percentile x that created the minimum least square error:
-  return(lns[which.min(mse)])
-}
-
-# pathOut <- paste0(path,"storm",i,ext)
-
-
-# outTest <- stormSeparate(timeIn, inflow,
-#                          plt = FALSE,path = paste0(getwd(),"/"),
-#                          allMinimaStorms = TRUE,
-#                          baselineFlowOption = "Month"
-# )
-# length(outTest$Storms)
diff --git a/HARP-2024-2025/storm_event_ratings.R b/HARP-2024-2025/storm_event_ratings.R
deleted file mode 100644
index fd00f31a..00000000
--- a/HARP-2024-2025/storm_event_ratings.R
+++ /dev/null
@@ -1,39 +0,0 @@
-## Set working directory
-setwd("C:/Users/natef/OneDrive - Virginia Tech/HARP")
-
-#get packages
-library(ggplot2)
-library(sqldf)
-library(tidyr)
-
-#set varibales for stream gage id
-gage_id <- "02026000"
-
-
-#download stormeventflow and rating values
-stormflow <- read.csv(paste("http://deq1.bse.vt.edu:81/met/stormVol_nldas2/flow/usgs_ws_",gage_id,"-stormevent-flow.csv",sep = ""))
-ratings0 <- read.csv(paste("http://deq1.bse.vt.edu:81/met/nldas2/out/usgs_ws_",gage_id,"-nldas2-lm_simple-rating-ts.csv",sep = ""))
-
-#clean up ratings dates
-ratings <- ratings0 %>%
-  separate(start_date, into = c("timestamp","start_time"), sep = " ")
-
-#sql join
-# Perform the join
-result <- sqldf("SELECT stormflow.timestamp, stormflow.stormID, ratings.rating
-                 FROM stormflow
-                 INNER JOIN ratings ON stormflow.timestamp = ratings.timestamp")
-is.na(result$stormID)
-
-plot <- ggplot(data = result, 
-       mapping = aes(x = !is.na(stormID), y = rating))+
-  geom_boxplot()+
-  xlab("Storm Event?")+
-  ggtitle("Comparison of Ratings During Storm Events and Outside of Storm Events")+
-  theme_bw()+
-  ylim(c(0,1))
-
-plot
-
-
-
diff --git a/HARP-2024-2025/using_basic_analytics.R b/HARP-2024-2025/using_basic_analytics.R
deleted file mode 100644
index 84c8ad3e..00000000
--- a/HARP-2024-2025/using_basic_analytics.R
+++ /dev/null
@@ -1,57 +0,0 @@
-source("C:/Users/natef/OneDrive - Virginia Tech/HARP/Github/HARParchive/HARP-2024-2025/basic_analytics_for_vahydro.R")
-library("IHA")
-library("ggplot2")
-library(sqldf)
-# prism <- read.csv("http://deq1.bse.vt.edu:81/met/stormVol_prism/precip/usgs_ws_01613900-PRISM-all.csv")
-# prism_summary <- summary_analytics(prism)
-# 
-# 
-# daymet <- read.csv("http://deq1.bse.vt.edu:81/met/daymet/precip/usgs_ws_01613900-daymet-all.csv")
-# daymet_summary <- summary_analytics(daymet)
-# 
-# nldas2 <- read.csv("http://deq1.bse.vt.edu:81/met/nldas2/precip/usgs_ws_01613900-nldas2-all.csv")
-# nldas2_summary <- summary_analytics(nldas2) 
-
-#pull any value like this:
-
-# prism_l90_precip_in <- prism_summary$l90_precip_in
-# daymet_l90_precip_in <- daymet_summary$l90_precip_in
-# nldas2_l90_precip_in <- nldas2_summary$l90_precip_in
-
-#Or even without the summary:
-
-summary_analytics(prism)$l90_precip_in
-
-
-#visualizing data:
-
-# visualizations <- function(prism.metric, daymet.metric, nldas2.metric, title = NULL){
-# ggplot(data = NULL, mapping = aes(x = c("prism","dayment","nldas2"),
-# y = c(prism.metric, daymet.metric, nldas2.metric)))+
-#   geom_bar(stat = "identity", fill = "lightblue3")+
-#   labs(title = title, y = "precip (in)", x = "model")}
-# 
-# visualizations(prism.metric = prism_summary$precip_annual_max_in,
-#                daymet.metric = daymet_summary$precip_annual_max_in,
-#                nldas2.metric = nldas2_summary$precip_annual_max_in,
-#               title = "yearly max precip")
-# visualizations(prism.metric = prism_summary$precip_annual_min_in,
-#                daymet.metric = daymet_summary$precip_annual_min_in,
-#                nldas2.metric = nldas2_summary$precip_annual_min_in,
-#                title = "yearly min precip")
-
-visualizations <- function(gage_id, metric){
-  ggplot(data = NULL, mapping = aes(x = c("prism","dayment","nldas2"),
-                                    y = c(summary_analytics(read.csv(paste0("http://deq1.bse.vt.edu:81/met/stormVol_prism/precip/",gage_id,"-PRISM-all.csv")))[[metric]], 
-                                          summary_analytics(read.csv(paste0("http://deq1.bse.vt.edu:81/met/daymet/precip/",gage_id,"-daymet-all.csv")))[[metric]],
-                                          summary_analytics(read.csv(paste0("http://deq1.bse.vt.edu:81/met/nldas2/precip/",gage_id,"-nldas2-all.csv")))[[metric]])))+
-    geom_bar(stat = "identity", fill = "lightblue3")+
-    labs(title = as.character(metric), y = "precip (in)", x = "model")}
-
-# perform basic bar plot visualizations for any of these 5 metrics for any gage
-visualizations(gage_id = "usgs_ws_01613900", metric = "precip_annual_max_in")
-visualizations(gage_id = "usgs_ws_01613900", metric = "l90_precip_in")
-visualizations(gage_id = "usgs_ws_01613900", metric = "precip_annual_mean_in")
-visualizations(gage_id = "usgs_ws_01613900", metric = "precip_annual_min_in")
-visualizations(gage_id = "usgs_ws_01613900", metric = "precip_daily_max_in")
-
diff --git a/HARP-2024-2025/variable definitions.R b/HARP-2024-2025/variable definitions.R
deleted file mode 100644
index d5e029e7..00000000
--- a/HARP-2024-2025/variable definitions.R	
+++ /dev/null
@@ -1,6 +0,0 @@
-coverage_hydrocode <- "usgs_ws_01613900"
-coverage_bundle <- "watershed" #type of coverage we are looking for
-coverage_ftype <- "usgs_full_drainage" #feature type
-model_version <- "cbp-6.1"
-met_file <- "http://deq1.bse.vt.edu:81/met/stormVol_prism/precip/usgs_ws_01613900-PRISM-all.csv"
-scenario_name <- "stormVol_prism"
diff --git a/HARP-2024-2025/yearly_metrics.R b/HARP-2024-2025/yearly_metrics.R
deleted file mode 100644
index f8716b53..00000000
--- a/HARP-2024-2025/yearly_metrics.R
+++ /dev/null
@@ -1,73 +0,0 @@
-#number of days with precip_in and longest duration storm per yr
-
-library(sqldf)
-
-#this function uses a series of SQL chunks to compute yearly data frames
-#that include helpful QA metrics.
-yearly_metrics <- function(df){
-# Create Daily summary to make all data daily
-daily_summary <- 
-  sqldf(
-    "SELECT yr, mo, da, obs_date, SUM(precip_in) AS precip_in
-  FROM df
-  GROUP BY yr, mo, da"
-  ) #include obs_date after SQL to data frame
-
-# Number of days with precip_in per yr
-rainfall_hist <- ggplot(data = daily_summary, mapping = aes(x = obs_date, y = precip_in))+
-  geom_histogram()
-rainfall_days <- sqldf("SELECT yr, COUNT(*) 
-                       AS DaysWithRainfall 
-                       FROM daily_summary 
-                       WHERE precip_in > 0.01 
-                       GROUP BY yr")
-
-# # Longest duration storm per yr
-# this marks the storm groups by separating due to days with no rainfall
-#the NWS states that >0.01 is "measurable precipitation".
-#this changes our numbers by ~20 days for the Hogue creek gage
-storm_events <- sqldf("WITH RainEvents AS (
-                       SELECT obs_date, precip_in, yr,
-                      (SELECT SUM(precip_in <= 0.01)
-                     FROM daily_summary rd2
-                       WHERE rd2.obs_date <= rd1.obs_date)
-                       AS StormGroup 
-                       FROM daily_summary rd1),
-                      StormDurations AS (
-                        SELECT yr, StormGroup, COUNT(*)
-                        AS StormDuration
-                        FROM RainEvents
-                        WHERE precip_in > 0.01
-                        GROUP BY yr, StormGroup)
-                      SELECT yr, MAX(StormDuration) AS LongestStorm
-                      FROM StormDurations
-                      GROUP BY yr")
-
-# RainEvents <- sqldf("
-# SELECT obs_date, precip_in, yr,
-#   (SELECT SUM(precip_in <= 0.01)
-#    FROM daily_summary AS rd2
-#    WHERE rd2.obs_date <= rd1.obs_date)
-#    AS StormGroup
-#   FROM daily_summary AS rd1")
-
-# Printing results
-#merge(rainfall_days)
-return(merge(rainfall_days,storm_events))
-return(RainEvents)
-}
-## It seems like NLDAS has much higher results...
-
-yearly_metrics(prism)
-yearly_metrics(nldas2)
-
-
-daily_summary <- 
-  sqldf(
-    "SELECT yr, mo, da, obs_date, SUM(precip_in) AS precip_in
-  FROM df
-  GROUP BY yr, mo, da"
-  )
-
-rainfall_hist <- ggplot(data = daily_summary, mapping = aes(x = precip_in))+
-  geom_histogram(binwidth = 0.01)