From f0ce31085ce0dbaf6fb9313ce20fb4f35fc8e31e Mon Sep 17 00:00:00 2001 From: James Hagan Date: Thu, 10 Aug 2023 17:05:15 +0200 Subject: [PATCH] update vignettes so that users can acess the raw databases --- .../03_create_reference_database.R | 1 - ..._create_preservation_correction_database.R | 20 +++ database/preservation_correction_database.rds | Bin 0 -> 1996 bytes vignettes/InvTraitR_output_description.Rmd | 17 +- vignettes/InvTraitR_view_database.Rmd | 152 ++++++++++++++++++ 5 files changed, 181 insertions(+), 9 deletions(-) create mode 100644 companion_scripts/02_create_database/04_create_preservation_correction_database.R create mode 100644 database/preservation_correction_database.rds create mode 100644 vignettes/InvTraitR_view_database.Rmd diff --git a/companion_scripts/02_create_database/03_create_reference_database.R b/companion_scripts/02_create_database/03_create_reference_database.R index c139d23..44e8374 100644 --- a/companion_scripts/02_create_database/03_create_reference_database.R +++ b/companion_scripts/02_create_database/03_create_reference_database.R @@ -6,7 +6,6 @@ #' # load relevant libraries -library(dplyr) library(readxl) # load the reference list diff --git a/companion_scripts/02_create_database/04_create_preservation_correction_database.R b/companion_scripts/02_create_database/04_create_preservation_correction_database.R new file mode 100644 index 0000000..e82e39f --- /dev/null +++ b/companion_scripts/02_create_database/04_create_preservation_correction_database.R @@ -0,0 +1,20 @@ +#' @title preservation correction factors +#' @description create a data.frame with the preservation correction factor data +#' @author James G. Hagan (james_hagan(at)outlook.com) +#' + +# load relevant libraries +library(readxl) + +# load the reference list +pre_dat <- readxl::read_xlsx(path = "C:/Users/james/OneDrive/PhD_Gothenburg/Chapter_4_FreshInvTraitR/data/allometry_database_ver4/dry_biomass_correction_data.xlsx") +head(pre_dat) + +# check the database +View(pre_dat) + +# replace the NA characters with true NAs +pre_dat[pre_dat == "NA"] <- NA + +# export the database as a .rds file +saveRDS(pre_dat, "database/preservation_correction_database.rds") diff --git a/database/preservation_correction_database.rds b/database/preservation_correction_database.rds new file mode 100644 index 0000000000000000000000000000000000000000..87c518420364a36595de5f9fc1a2506ef5227440 GIT binary patch literal 1996 zcmV;-2Q&B|iwFP!000002JKr-a~roA-dd9NVL5hVyQ!x+U^<@0$(R%=iMH${4JnEe zl~R@}F>$9C4Y&((5n+K17Nks{^b*GvyRW~TSH@9tukA0jC- zk~nt69nlY8;Qa;`Uxa_RU>L@fF@0^yn3{pF#>{8s&BPj%-2vhjeBOf3d!Kx71W(q` zIH&Qp#=9DSuJK)sNaHUxKG66pjSn?`pz)E$q{fuSw8mwPD;ifdW;8z5_(bEH#-|#e zY5Y**M;brY_-l=yXv{`<5@Jr%IZdx?dR^0bP3J=zp4;$w&zLsm0q1tGEg7NwHKk`u zm|NIpLy-$FD6xpN1#@)Cd{G=blrbXv6(914yFsKQlnP*eiLscOW!$$-;*p!0u29b(XBv#N5Od%yP6|n-Y3O0`t%I`ry zIe`k~8+Rp`m5s1~RI4E2O)O6=Pb^O?UzlZgn^#$#aFayy)&aqumV=VLcF`T8Y+=C+ z#}O=szdQMZ&$J<%K+&45f~l{Bfjdx^x2Y6dICT%XtnLU8zLb(z&TcrV{HCzES;&_U z(C#M6mGZmA-BMz2zi?34|2$XT-hGbXuUssk7lrMuV%g_9*xf0h^}_RVaXXK;pMPH1 zFBjJLbLGN;4&`|vY)RjCtA!gR@b|dop(h*y9mR13|J$E~K2#xG+0;ZGed&_4hT(tF zQ!8kMKT9?fLHe<0C#sSVzeJltu%HQEiB5|Me_024Gkhfy6N}@rc%j{R{6V~VFHU8H z!0yeVobprWn0h3l7EB!e@RUUHD*ubtD*si-MqqrF-tbN| zX_yfj<}Z6a)*j&-p>!aR+Jbs?WBSZc4IqNW(2CY zd_OCLtq#LAo*cEe@T~3yZxXo}_EwA7O^b|CVstIzPk zCEI-+rsPx(7k4njrT{i<{-AI!k6hRjtl5!-(2rIXqB4no|LG=g(WF#ah)!@SCFo#SP%q8!c@YnGaP&Vvwhg~ zzw{3ZNKIl?O(M~_nA$sgg8K~sZG;i7R%t88KMhg}A?kCK*wuv`_ZzLr
1$-TA| z`R5rNRJ39pRJqWq)Fp3iv8Jm8@LAN2&u-q{AxctVa^kBw;ZhWpyZlY>D z332qBsNpg;va@>sD|r5tUDKHPD#X=l@Wl30E5Vap(Rx4D^^yEX8YBN6gnA;Q+2ToO z9-=(GYjiwuyGD6UY21$D(-`IL)2=*q<-hyMu4_NjfAzyz&946^jO)YF-~A`MrQ-_Y zUCrjd)p0Eb^+xAtbw8a-rcl-Gz%n!;Ja5*81PEJY!$Z>Hc` zO+Q_k$WCM@vR7KReV|KcmJ_K|CXrfRP9X66I=f%*Wq6K?EL|*zw9l^~r z*r%ZIKW`wS5N1jAF#U8b^FBJZVbhHKruC2#!EGy7iU#cPEd-OpYp-wI`M9cip6L5= z>pS=OGkp!d|0Fu#km5MWvE$#oGH$}>+6nvveBX94&O}W9D0wS7I?-2Vso;lI2u#RB zYU#SgDwUo(#C2uI$0^fIg5z@veRC$#uc%_tQQci8%;4wTLyLH(q^|!x(%XI|DLPMT zUES$*Br>D$%b0gsVt%B*-)!?gcKlvj;VHed&;*Bd8GFq|aA~bGRpHr8n5wDhq(xQO ep;gt?6i|`xe$Pl&C8QUq|Nb8!fh%IuIsgD(=H_7l literal 0 HcmV?d00001 diff --git a/vignettes/InvTraitR_output_description.Rmd b/vignettes/InvTraitR_output_description.Rmd index 4be933d..79c9896 100644 --- a/vignettes/InvTraitR_output_description.Rmd +++ b/vignettes/InvTraitR_output_description.Rmd @@ -2,10 +2,8 @@ title: "Detailed description of the output" author: "James G. Hagan" date: "`r Sys.Date()`" -vignette: > - %\VignetteIndexEntry{Detailed description of the output} - %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} +vignette: "%\\VignetteIndexEntry{Detailed description of the output} %\\VignetteEngine{knitr::rmarkdown} + %\\VignetteEncoding{UTF-8}\n" --- ```{r, include = FALSE} @@ -15,8 +13,9 @@ knitr::opts_chunk$set( ) ``` -The *InvTraitR* R-package exports a single function called *get_trait_from_taxon()*. +### Overview +The *InvTraitR* R-package exports a single function called *get_trait_from_taxon()*. ```{r eval=FALSE} get_trait_from_taxon( data, # data.frame with at least five columns: target taxon, life stage, latitude (dd), longitude (dd) and body size (mm) if trait == "equation" @@ -34,7 +33,8 @@ get_trait_from_taxon( This generates a relatively complex output and this vignette gives a detailed description of the different outputs. The output of *get_trait_from_taxon()* is a list with two data.frames as named elements: "data" and "decision_data". We start with the columns in the "data" element: -*workflow1* and *workflow2* + +### *workflow1* and *workflow2* + row [int] - variable specifying the row number from the original, input data.frame @@ -92,7 +92,7 @@ This generates a relatively complex output and this vignette gives a detailed de + equation_form [chr] - the database supports two types of equation: "model1" - the log-log linear equation and "model2" - non-linear equation. See documentation for details of these models -+ log_base [num] - base of log for model1 equations ++ log_base [num] - base of log for "model1" equations + a [num] - a parameter for the "model1" and "model2" equations. See documentation for details of these models @@ -104,7 +104,8 @@ lm_correction_type [chr] - the specific correction factor used for "model1" equa dry_biomass_scale [num] - multiplier to convert the equation output to mg -*workflow2*: + +### *workflow2*: dry_biomass_mg [num] - dry biomass (mg) derived for the body size given in the original, input data using the chosen equation diff --git a/vignettes/InvTraitR_view_database.Rmd b/vignettes/InvTraitR_view_database.Rmd new file mode 100644 index 0000000..528213a --- /dev/null +++ b/vignettes/InvTraitR_view_database.Rmd @@ -0,0 +1,152 @@ +--- +title: "View database" +author: "James G. Hagan" +date: "`r Sys.Date()`" +vignette: "%\\VignetteIndexEntry{View database} %\\VignetteEngine{knitr::rmarkdown} + %\\VignetteEncoding{UTF-8}\n" +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +### Equation database + +The code below loads the equation database so that users can simply browse the database and examine where the raw data comes from. + +```{r} +equ_dat <- readRDS(url("https://raw.githubusercontent.com/haganjam/InvTraitR/main/database/equation_database.rds")) +print(equ_dat) +``` + +The metadata for the equation database is: + ++ db_taxon [chr] - taxon name associated with the equation in the equation database from the original publication + ++ db_life_stage [chr] - life-stage of the db_taxon according to the life-stage classification used in *InvTraitR* (see Hagan et al. for details) + ++ equation_id [num] - unique numerical id for each equation in the database + ++ preservation [chr] - method used to preserve the specimens that were used to generate the equation: "none" - no preservation, "ethanol" - preserved in ethanol and "formaldehyde" - preserved in formaldehyde + ++ preservation_percentage [num] - if the specimens were preserved in ethanol or formaldehyde, this provides the concentration of the preservation material + ++ correction_percentage [num] - preliminary percentage correction for the preservation method (currently not implemented in *InvTraitR*) + ++ correction_factor_id [chr] - unique numerical id for each preservation correction factor used (see section *Preservation correction database* below) + ++ body_size_meas [chr] - linear body size dimension that the equation was developed for to the classification used in *InvTraitR* (see Hagan et al. for details) + ++ body_size_unit [chr] - unit of measurement of the linear body size dimension + ++ body_size_min [num] - minimum body size that the equation was developed for + ++ body_size_max [num] - maximum body size that the equation was developed for + ++ equation_form [chr] - the database supports two types of equation: "model1" - the log-log linear equation and "model2" - non-linear equation. See documentation for details of these models + ++ log_base [num] - base of log for "model1" equations + ++ a [num] - a parameter for the "model1" and "model2" equations. See documentation for details of these models + ++ b [num] - b parameter for the "model1" and "model2" equations. See documentation for details of these models + ++ dry_biomass_scale [num] - multiplier to convert the equation output to mg + ++ dry_biomass_min [num] - minimum dry biomass that the equation was developed with + ++ dry_biomass_max [num] - maximum dry biomass that the equation was developed with + ++ dry_biomass_unit [chr] - unit of measurement of dry biomass used in the equation + ++ RMS [num] - residual mean square of the equation + ++ n [num] - sample size + ++ r2 [num] - coefficient of determination + ++ lm_corrrection [num] - corrections for "model1" equations to remove the bias associated with back-transforming predictions made on the log-log scale to the natural scale. See documentation for details. + ++ lm_correction_type [chr] - the specific correction factor used for "model1" equations which are based on the availability of information like the mean squared errors from the original papers. See documentation for details. + ++ lm_reference [chr] - first author and year of the publication from which the formula for the correction factor was obtained + ++ reference [num] - unique numerical id of the publication from which the equation was obtained. Full details of each publication are provided as a separate database (see section *Reference database* below) + +### Reference database + +The code below loads the reference database which contains the full citation of the publications from the equations were gathered to generate the database. + +```{r} +ref_dat <- readRDS(url("https://raw.githubusercontent.com/haganjam/InvTraitR/main/database/reference_database.rds")) +print(ref_dat) +``` + +The metadata for the reference database is: + ++ reference_id [num] - unique numerical id of the publication from which the equation was obtained. This numerical id matches with numeric id in equation database. + ++ first_author [chr] - first author of the publication + ++ year [num] - publication year + ++ journal [chr] - journal in which the publication was published + ++ title [chr] - title of the publication + ++ location_description - description of the place in the publication where the equation was found + ++ doi_url - doi or url of the publication + ++ notes - additional notes on the publication that are relevant to how the equation data were gathered + +### Geographic/habitat similarity database + +The code below loads the database that holds the biogeographical realm, major habitat type and ecoregion data for each equation in the equation database derived based on the coordinates associated with the equation and Abell et al.'s (2008) freshwater ecoregion map (www.feow.org). + +```{r} +eco_dat <- readRDS(url("https://raw.githubusercontent.com/haganjam/InvTraitR/main/database/freshwater_ecoregion_data.rds")) +print(eco_dat) +``` + +The metadata for the geographic/habitat similarity data is: + ++ database [chr] - describes the data type which, in this case, is the equation database. Future version of *InvTraitR* may, however, incorporate other trait databases. + ++ id [num] - unique numerical id associated with the equation in the database + ++ accuracy [chr] - whether the coordinates associated with the equation are exact (i.e. describe the exact spot that specimens were collected to generate the equation) or approximate (i.e. describe the rough area where the specimens were collected to generate the equation) + ++ lat_dd [chr] - latitude in decimal degrees (WGS84) + ++ lon_dd [chr] - longitude in decimal degrees (WGS84) + ++ habitat_id - numerical id of the ecoregion from Abell et al.'s (2008) freshwater ecoregion map (www.feow.org) + ++ area_km2 [num] - area ($km^2$) of the ecoregion + ++ realm [chr] - biogeographical realm in which the coordinates are located (Afrotropic, Australasia, Indo-Malay, Nearctic, Neotropic, Oceania, Palearctic) + ++ major_habitat_type [chr] - description of the general freshwater habitat type at the location of the coordinates (see Abell et al. 2008 and www.feow.org for details) + ++ ecoregion [chr] - ecoregion at location of the coordinates (see Abell et al. 2008 and www.feow.org for details) + + +### Preservation correction database + +In the equation database, we provide correction factors for the equations that were generated using preserved specimens. Several papers have shown that generating length-biomass equations on preserved specimens can lead to biased estimates of dry biomass because specimens often lose weight during preservation. + +We have compiled a database of correction factors and included suggested correction factors in the equation database. These are, however, not currently incorporated in *InvTraitR* but may be in a future version. + +The code below loads the preservation correction database so that users can see where the suggested correction factors in the equation database come from. + +```{r} +pre_dat <- readRDS(url("https://raw.githubusercontent.com/haganjam/InvTraitR/main/database/preservation_correction_database.rds")) +print(pre_dat) +``` + + +