From 1a56d5db0e7bd799a89bb037414588d2467ed457 Mon Sep 17 00:00:00 2001 From: James Hagan Date: Fri, 28 Jul 2023 11:24:07 +0200 Subject: [PATCH] update the user guide vignette with some new test data --- database/InvTraitR_vignette_data.csv | 41 +++++ database/dolmans_MSc_2022.csv | 163 ------------------ ..._vignette.Rmd => InvTraitR_user_guide.Rmd} | 153 ++++++++-------- 3 files changed, 118 insertions(+), 239 deletions(-) create mode 100644 database/InvTraitR_vignette_data.csv delete mode 100644 database/dolmans_MSc_2022.csv rename vignettes/{InvTraitR_vignette.Rmd => InvTraitR_user_guide.Rmd} (51%) diff --git a/database/InvTraitR_vignette_data.csv b/database/InvTraitR_vignette_data.csv new file mode 100644 index 0000000..5cc8df2 --- /dev/null +++ b/database/InvTraitR_vignette_data.csv @@ -0,0 +1,41 @@ +target_taxon,life_stage,lat,lon,length_mm +Acilius sulcatus,adult,51.122197,5.33172,16.3 +Acilius sulcatus,larva,51.122197,5.33172,8.1 +Agabus striolatus,adult,51.122197,5.33172,10.11 +Ceratopogonidae,pupa,51.122197,5.33172,13.33 +Chaoborus,larva,51.122197,5.33172,13.09 +Chironomidae,pupa,51.122197,5.33172,9.25 +Chrysomelidae,larva,51.122197,5.33172,9.81 +Chydorus sphaericus,adult,51.122197,5.33172,0.459 +Cloeon dipterum,nymph,51.122197,5.33172,15.31 +Coenagrion ornatum,nymph,51.122197,5.33172,17.87 +Culex territans,larva,51.122197,5.33172,2 +Culicidae,pupa,51.122197,5.33172,8.77 +Daphnia obtusa,adult,51.122197,5.33172,2.65 +Dixella aestivalis,larva,51.122197,5.33172,10.2 +Dixidae,pupa,51.122197,5.33172,7.08 +Dytiscus marginalis,larva,51.122197,5.33172,40.24 +Epidalea calamita,larva,51.122197,5.33172,19.51 +Gamasida,adult,51.122197,5.33172,0.467 +Gerris lacustris,adult,51.122197,5.33172,9.25 +Gyrinus natator,adult,51.122197,5.33172,7.73 +Gyrinus natator,larva,51.122197,5.33172,8.32 +Haliplus,larva,51.122197,5.33172,10.21 +Hesperocorixa linnaei,adult,51.122197,5.33172,6.27 +Hydraenidae,adult,51.122197,5.33172,2.3 +Hydrochara caraboides ,larva,51.122197,5.33172,13.62 +Hydroporus palustris,adult,51.122197,5.33172,3.5 +Hydroporus palustris,larva,51.122197,5.33172,4.7 +Ilybius quadriguttatus,adult,51.122197,5.33172,8.94 +Laccophilus,larva,51.122197,5.33172,14.37 +Lumbriculidae,adult,51.122197,5.33172,1.04 +Lumbriculidae,adult,51.122197,5.33172,7.31 +Mesocyclops,adult,51.122197,5.33172,0.92 +Notenectida,adult,51.122197,5.33172,8.32 +Notonecta glauca,adult,51.122197,5.33172,16.55 +Notonecta obliqua,adult,51.122197,5.33172,14.16 +Orthocladiinae,larva,51.122197,5.33172,10.58 +Pisidium casertanum,adult,51.122197,5.33172,9.66 +Rhantus suturalis,adult,51.122197,5.33172,12.83 +Rhantus suturalis,larva,51.122197,5.33172,10.3 +Triops cancriformis,adult,51.122197,5.33172,65 diff --git a/database/dolmans_MSc_2022.csv b/database/dolmans_MSc_2022.csv deleted file mode 100644 index 75a335f..0000000 --- a/database/dolmans_MSc_2022.csv +++ /dev/null @@ -1,163 +0,0 @@ -taxon,life_stage,lat,lon,country_code_XXX,location,body_dimension,length_mm -Chironominae,larva,8.79037,-3.7672,CIV,Comoe_National_Park,BL,3.2 -Chironominae,larva,8.79037,-3.7672,CIV,Comoe_National_Park,BL,3.2 -Chironominae,larva,8.79037,-3.7672,CIV,Comoe_National_Park,BL,3.2 -Chironominae,larva,8.79037,-3.7672,CIV,Comoe_National_Park,BL,3.2 -Chironomini,larva,8.79037,-3.7672,CIV,Comoe_National_Park,BL,3.2 -Chironomus,larva,8.79037,-3.7672,CIV,Comoe_National_Park,BL,3.2 -Chironomus,larva,8.79037,-3.7672,CIV,Comoe_National_Park,BL,3.2 -Chironomus,larva,8.79037,-3.7672,CIV,Comoe_National_Park,BL,3.2 -Chironomus,larva,8.79037,-3.7672,CIV,Comoe_National_Park,BL,3.2 -Chironomus,larva,8.79037,-3.7672,CIV,Comoe_National_Park,BL,3.2 -Culicidae,pupa,8.79037,-3.7672,CIV,Comoe_National_Park,BL,5.95 -Anopheles,larva,8.79037,-3.7672,CIV,Comoe_National_Park,BL,6.19 -Aedes,larva,8.79037,-3.7672,CIV,Comoe_National_Park,BL,6.19 -Aedes vexans,larva,8.79037,-3.7672,CIV,Comoe_National_Park,BL,6.19 -Aedes,pupa,8.79037,-3.7672,CIV,Comoe_National_Park,BL,5.95 -Culex,pupa,8.79037,-3.7672,CIV,Comoe_National_Park,BL,5.95 -Macrothrix,adult,8.79037,-3.7672,CIV,Comoe_National_Park,BL,0.415 -Macrothrix breviseta,adult,8.79037,-3.7672,CIV,Comoe_National_Park,BL,0.415 -Pleuroxus,adult,8.79037,-3.7672,CIV,Comoe_National_Park,BL,0.52 -Diaphanosoma excisum,adult,8.79037,-3.7672,CIV,Comoe_National_Park,BL,1.03 -Diaphanosoma,adult,8.79037,-3.7672,CIV,Comoe_National_Park,BL,1.09 -Mesostoma,none,8.79037,-3.7672,CIV,Comoe_National_Park,BL,0.32 -Ostracoda,adult,8.79037,-3.7672,CIV,Comoe_National_Park,BL,1.34 -Ostracoda,adult,8.79037,-3.7672,CIV,Comoe_National_Park,BL,1.34 -Ostracoda,adult,8.79037,-3.7672,CIV,Comoe_National_Park,BL,1.34 -Ostracoda,adult,8.79037,-3.7672,CIV,Comoe_National_Park,BL,1.34 -Ostracoda,adult,8.79037,-3.7672,CIV,Comoe_National_Park,BL,1.34 -Ostracoda,adult,8.79037,-3.7672,CIV,Comoe_National_Park,BL,1.34 -Ostracoda,adult,8.79037,-3.7672,CIV,Comoe_National_Park,BL,1.34 -Ostracoda,adult,8.79037,-3.7672,CIV,Comoe_National_Park,BL,1.34 -Ostracoda,adult,8.79037,-3.7672,CIV,Comoe_National_Park,BL,1.34 -Ostracoda,adult,8.79037,-3.7672,CIV,Comoe_National_Park,BL,1.34 -Nematoda,none,8.79037,-3.7672,CIV,Comoe_National_Park,BL,1.47 -Nematoda,none,8.79037,-3.7672,CIV,Comoe_National_Park,BL,1.47 -Nematoda,none,8.79037,-3.7672,CIV,Comoe_National_Park,BL,1.47 -Nematoda,none,8.79037,-3.7672,CIV,Comoe_National_Park,BL,1.47 -Micronectidae,adult,8.79037,-3.7672,CIV,Comoe_National_Park,BL,1.9 -Micronectidae,nymph,8.79037,-3.7672,CIV,Comoe_National_Park,BL,1.9 -Micronectidae,nymph,8.79037,-3.7672,CIV,Comoe_National_Park,BL,1.9 -Culiseta longiareolata,pupa,-30.20086,17.939,ZAF,Kamiesberge,BL,6.1 -Ceratopogonidae,pupa,-30.20086,17.939,ZAF,Kamiesberge,BL,3.48 -Dasyhelea,larva,-30.20086,17.939,ZAF,Kamiesberge,BL,6.92 -Dasyhelea,pupa,-30.20086,17.939,ZAF,Kamiesberge,BL,3.48 -Dasyhelea,pupa,-30.20086,17.939,ZAF,Kamiesberge,BL,3.48 -Macrothrix,adult,-30.20086,17.939,ZAF,Kamiesberge,BL,1.16 -Macrothrix,adult,-30.20086,17.939,ZAF,Kamiesberge,BL,1.03 -Camptocercus,adult,-30.20086,17.939,ZAF,Kamiesberge,BL,0.595 -Alona rectangula,adult,-30.20086,17.939,ZAF,Kamiesberge,BL,0.595 -Alona,adult,-30.20086,17.939,ZAF,Kamiesberge,BL,0.595 -Alona,adult,-30.20086,17.939,ZAF,Kamiesberge,BL,0.595 -Alona,adult,-30.20086,17.939,ZAF,Kamiesberge,BL,0.595 -Leberis,adult,-30.20086,17.939,ZAF,Kamiesberge,BL,0.82 -Biapertura affinis,adult,-30.20086,17.939,ZAF,Kamiesberge,BL,0.595 -Ceriodaphnia quadrangula,adult,-30.20086,17.939,ZAF,Kamiesberge,BL,0.89 -Turbellaria,none,-30.20086,17.939,ZAF,Kamiesberge,BL,1.82 -Turbellaria,none,-30.20086,17.939,ZAF,Kamiesberge,BL,1.82 -Turbellaria,none,-30.20086,17.939,ZAF,Kamiesberge,BL,1.82 -Turbellaria,none,-30.20086,17.939,ZAF,Kamiesberge,BL,1.82 -Turbellaria,none,-30.20086,17.939,ZAF,Kamiesberge,BL,1.82 -Turbellaria,none,-30.20086,17.939,ZAF,Kamiesberge,BL,1.82 -Cypridopsis,adult,-30.20086,17.939,ZAF,Kamiesberge,BL,0.55 -Heterocypris,adult,-30.20086,17.939,ZAF,Kamiesberge,BL,1.43 -Zonocypris,adult,-30.20086,17.939,ZAF,Kamiesberge,BL,0.495 -Cypridoidea,adult,-30.20086,17.939,ZAF,Kamiesberge,BL,0.656 -Cypridopsinae,adult,-30.20086,17.939,ZAF,Kamiesberge,BL,0.55 -Ramotha,adult,-30.20086,17.939,ZAF,Kamiesberge,BL,0.55 -Cypricercus,adult,-30.20086,17.939,ZAF,Kamiesberge,BL,0.656 -Diaptomidae,adult,-30.20086,17.939,ZAF,Kamiesberge,BL,2.8 -Paradiaptomus,adult,-30.20086,17.939,ZAF,Kamiesberge,BL,2.2 -Ectocyclops phaleratus,adult,-30.20086,17.939,ZAF,Kamiesberge,BL,0.935 -Thermocyclops,adult,-30.20086,17.939,ZAF,Kamiesberge,BL,0.935 -Nematoda,none,-30.20086,17.939,ZAF,Kamiesberge,BL,3.68 -Streptocephalus purcelli,adult,-30.20086,17.939,ZAF,Kamiesberge,BL,13.16 -Sminthuridae,adult,-30.20086,17.939,ZAF,Kamiesberge,BL,0.97 -Sminthuridae,adult,-30.20086,17.939,ZAF,Kamiesberge,BL,0.75 -Sminthuridae,adult,-30.20086,17.939,ZAF,Kamiesberge,BL,0.59 -Anisops sardea,adult,-30.20086,17.939,ZAF,Kamiesberge,BL,7.34 -Anisops,adult,-30.20086,17.939,ZAF,Kamiesberge,BL,7.34 -Corixidae,adult,-30.20086,17.939,ZAF,Kamiesberge,BL,5.15 -Corixidae,adult,-30.20086,17.939,ZAF,Kamiesberge,BL,5.15 -Corixidae,adult,-30.20086,17.939,ZAF,Kamiesberge,BL,5.15 -Hydroporinae,larva,-30.20086,17.939,ZAF,Kamiesberge,BL,2.93 -Hydroporinae,larva,-30.20086,17.939,ZAF,Kamiesberge,BL,1.955 -Hydroporinae,larva,-30.20086,17.939,ZAF,Kamiesberge,BL,5.05 -Elminae,adult,-30.20086,17.939,ZAF,Kamiesberge,BL,4.25 -Oligochaeta,none,-30.20086,17.939,ZAF,Kamiesberge,BL,0.475 -Oligochaeta,none,-30.20086,17.939,ZAF,Kamiesberge,BL,2.17 -Oligochaeta,none,-30.20086,17.939,ZAF,Kamiesberge,BL,1.875 -Hirudinea,none,-30.20086,17.939,ZAF,Kamiesberge,BL,2 -Hirudinea,none,-30.20086,17.939,ZAF,Kamiesberge,BL,8.86 -Macrothrix spinosa,adult,-28.85361,27.2308,ZAF,Korannaberg,BL,0.5 -Alona,adult,-28.85361,27.2308,ZAF,Korannaberg,BL,0.54 -Chydorus sphaericus,adult,-28.85361,27.2308,ZAF,Korannaberg,BL,0.37 -Chydorus,adult,-28.85361,27.2308,ZAF,Korannaberg,BL,0.37 -Simocephalus serrulatus,adult,-28.85361,27.2308,ZAF,Korannaberg,BL,1.5 -Simocephalus vetulus,adult,-28.85361,27.2308,ZAF,Korannaberg,BL,1.5 -Leptestheria rubidgei,adult,-28.85361,27.2308,ZAF,Korannaberg,BL,6.35 -Heterocypris incongruens,adult,-28.85361,27.2308,ZAF,Korannaberg,BL,1.17 -Tanytarsus,larva,41.93337,3.209,ESP,Begur,BL,4 -Bezzia,pupa,41.93337,3.209,ESP,Begur,BL,4 -Dasyhelea,larva,41.93337,3.209,ESP,Begur,BL,4.72 -Alona elegans,adult,41.93337,3.209,ESP,Begur,BL,0.47 -Daphnia pulicaria,adult,41.93337,3.209,ESP,Begur,BL,1.63 -Daphnia obtusa,adult,41.93337,3.209,ESP,Begur,BL,1.2 -Mesostoma,none,41.93337,3.209,ESP,Begur,BL,0.244 -Dugesiidae,none,41.93337,3.209,ESP,Begur,BL,0.4 -Eucypris virens,adult,41.93337,3.209,ESP,Begur,BL,0.75 -Heterocypris incongruens,adult,41.93337,3.209,ESP,Begur,BL,1.39 -Nematoda,none,41.93337,3.209,ESP,Begur,BL,2.48 -Tanymastix stagnalis,adult,41.93337,3.209,ESP,Begur,BL,9.25 -Sminthuridae,adult,41.93337,3.209,ESP,Begur,BL,0.5 -Hypogastruridae,adult,41.93337,3.209,ESP,Begur,BL,0.84 -Isotomidae,adult,41.93337,3.209,ESP,Begur,BL,1.52 -Oligochaeta,none,41.93337,3.209,ESP,Begur,BL,0.66 -Oligochaeta,none,41.93337,3.209,ESP,Begur,BL,0.66 -Oligochaeta,none,41.93337,3.209,ESP,Begur,BL,0.66 -Amphipoda,adult,41.93337,3.209,ESP,Begur,BL,4.45 -Chironominae,larva,-32.31833,119.1519,AUS,South_West,BL,4.26 -Podonominae,larva,-32.31833,119.1519,AUS,South_West,BL,5.55 -Podonomus,larva,-32.31833,119.1519,AUS,South_West,BL,5.55 -Dasyhelea,larva,-32.31833,119.1519,AUS,South_West,BL,5.42 -Neothrix armata,adult,-32.31833,119.1519,AUS,South_West,BL,0.57 -Macrothrix hardingi,adult,-32.31833,119.1519,AUS,South_West,BL,1.3 -Pleuroxus jugosus,adult,-32.31833,119.1519,AUS,South_West,BL,0.84 -Plurispina,adult,-32.31833,119.1519,AUS,South_West,BL,1.14 -Ephemeroporus,adult,-32.31833,119.1519,AUS,South_West,BL,0.369 -Alona rigidicaudis,adult,-32.31833,119.1519,AUS,South_West,BL,0.803 -Alona,adult,-32.31833,119.1519,AUS,South_West,BL,0.38 -Leberis,adult,-32.31833,119.1519,AUS,South_West,BL,0.5 -Daphnia jollyi,adult,-32.31833,119.1519,AUS,South_West,BL,1.8 -Ceriodaphnia dubia,adult,-32.31833,119.1519,AUS,South_West,BL,0.675 -Ceriodaphnia reticula,adult,-32.31833,119.1519,AUS,South_West,BL,0.675 -Ceriodaphnia,adult,-32.31833,119.1519,AUS,South_West,BL,0.675 -Caenestheriella mariae,adult,-32.31833,119.1519,AUS,South_West,BL,4.65 -Limnadia badia,adult,-32.31833,119.1519,AUS,South_West,BL,5.2 -Turbellaria,none,-32.31833,119.1519,AUS,South_West,BL,2.34 -Turbellaria,none,-32.31833,119.1519,AUS,South_West,BL,2.33 -Turbellaria,none,-32.31833,119.1519,AUS,South_West,BL,0.26 -Eucypris,adult,-32.31833,119.1519,AUS,South_West,BL,0.63 -Cypretta,adult,-32.31833,119.1519,AUS,South_West,BL,0.58 -Cypretta,adult,-32.31833,119.1519,AUS,South_West,BL,0.58 -Limnocythere,adult,-32.31833,119.1519,AUS,South_West,BL,0.25 -Limnocythere,adult,-32.31833,119.1519,AUS,South_West,BL,0.25 -Candonocypris,adult,-32.31833,119.1519,AUS,South_West,BL,1.62 -Candonocypris,adult,-32.31833,119.1519,AUS,South_West,BL,1.62 -Candonocypris,adult,-32.31833,119.1519,AUS,South_West,BL,1.62 -Ilyodromus,adult,-32.31833,119.1519,AUS,South_West,BL,1.13 -Ilyodromus,adult,-32.31833,119.1519,AUS,South_West,BL,0.76 -Nematoda,none,-32.31833,119.1519,AUS,South_West,BL,2.26 -Branchinella longirostris,adult,-32.31833,119.1519,AUS,South_West,BL,8.48 -Sminthuridae,adult,-32.31833,119.1519,AUS,South_West,BL,0.46 -Sminthuridae,adult,-32.31833,119.1519,AUS,South_West,BL,0.46 -Sminthuridae,adult,-32.31833,119.1519,AUS,South_West,BL,0.46 -Sminthuridae,adult,-32.31833,119.1519,AUS,South_West,BL,0.46 -Hypogastruridae,adult,-32.31833,119.1519,AUS,South_West,BL,0.96 -Hypogastruridae,adult,-32.31833,119.1519,AUS,South_West,BL,0.96 -Isotomidae,adult,-32.31833,119.1519,AUS,South_West,BL,0.67 -Anisops,adult,-32.31833,119.1519,AUS,South_West,BL,2 -Hemicordulia tau,nymph,-32.31833,119.1519,AUS,South_West,BL,2.2 -Triplectides australis,larva,-32.31833,119.1519,AUS,South_West,BL,6.22 -Daphnia magna,adult,60.49808,18.4285,SWE,Sweden,BL,3.46 -Daphnia longispina,adult,60.49808,18.4285,SWE,Sweden,BL,1.87 diff --git a/vignettes/InvTraitR_vignette.Rmd b/vignettes/InvTraitR_user_guide.Rmd similarity index 51% rename from vignettes/InvTraitR_vignette.Rmd rename to vignettes/InvTraitR_user_guide.Rmd index 1ced7ae..7ceb299 100644 --- a/vignettes/InvTraitR_vignette.Rmd +++ b/vignettes/InvTraitR_user_guide.Rmd @@ -19,10 +19,6 @@ knitr::opts_chunk$set( library(InvTraitR) ``` -To see how we can use *InvTraitR* to obtain estimates of the dry biomass of freshwater invertebrates using taxon names and geographical coordinates, we will start by loading some relevant packages and some test data. The test data consist of a set of 89 invertebrate taxa collected in freshwater rockpools on six rocky outcrops (often called inselbergs) in five countries. - -In addition, the data consist of latitude-longitude coordinates for the samples as well as measured body lengths for all the taxa (some body lenghts were compiled from the literature) - ```{r} # until we can simply load the package, I'll load the functions and files manually @@ -53,25 +49,22 @@ library(tidyr) library(ggplot2) library(assertthat) library(testthat) +``` +To see how we can use *InvTraitR* to obtain estimates of the dry biomass of freshwater invertebrates using taxon names and geographical coordinates, we will start by loading some relevant packages and some test data. The test data consist of invertebrate taxa collected in six temporary ponds in a nature reserve in Belgium. In addition, the data consist of latitude-longitude coordinates for the samples as well as measured body lengths for all the taxa. +```{r} # load the test data -dol_df <- readr::read_csv("database/dolmans_MSc_2022.csv") -head(dol_df) +pond_df <- readr::read_csv("database/InvTraitR_vignette_data.csv") +head(pond_df) # how many unique taxa are there? -length(unique(dol_df$taxon)) - -# how many locations? -length(unique(dol_df$location)) - -# how many countries? -length(unique(dol_df$country_code_XXX)) +length(unique(pond_df$target_taxon)) # check the summary statistics -summary(dol_df) +summary(pond_df) ``` -We want to use the body length data to estimate the dry biomass for each of these samples using length-dry biomass allometric equations. The *InvTraitR* package houses a database of more than 350 length-dry biomass allometric equations that represent most major groups of freshwater invertebrates globally. We can use *InvTraitR* to search this database for appropriate length-dry biomass equations for the samples in our test data (`dol_df`). +We want to use the body length data to estimate the dry biomass for each of these samples using length-dry biomass allometric equations. The *InvTraitR* package houses a database of more than 350 length-dry biomass allometric equations that represent most major groups of freshwater invertebrates globally. We can use *InvTraitR* to search this database for appropriate length-dry biomass equations for the samples in our test data (`pond_df`). To do this, we have two options both of which rely on the `get_trait_from_taxon()` function. This function (see below) takes a data.frame with at least five columns. These columns are the taxon name for which dry biomass data are required, the life stage of the taxon, the latitude and longitude coordinates from which the taxon was sampled and body size of the taxon: @@ -98,17 +91,17 @@ Let's see how this works: ```{r} # we specify the dol_df into the function -dol_df_equ <- get_trait_from_taxon(data = dol_df, - target_taxon = "taxon", - life_stage = "life_stage", - latitude_dd = "lat", - longitude_dd = "lon", - body_size = "length_mm", - workflow = "workflow1", - trait = "equation", - max_tax_dist = 3.5, - gen_sp_dist = 0.25) - +pond_equ1 <- get_trait_from_taxon(data = pond_df, + target_taxon = "target_taxon", + life_stage = "life_stage", + latitude_dd = "lat", + longitude_dd = "lon", + body_size = "length_mm", + workflow = "workflow1", + trait = "equation", + max_tax_dist = 3.5, + gen_sp_dist = 0.25 + ) ``` @@ -117,8 +110,8 @@ The output is a list of length two with two different data.frames: the data with ```{r} # look at the first few rows of both the output data.frames in the list -head(dol_df_equ$data) -head(dol_df_equ$decision_data) +head(pond_equ1$data) +head(pond_equ1$decision_data) ``` @@ -126,7 +119,7 @@ We'll start by looking at the outputted data with the equations. To do this, let ```{r} # check the first row of the data that we fed into the function -print(dol_df[1,]) +print(pond_df[1,]) ``` @@ -134,30 +127,28 @@ Then, let us compare this with what the function outputs: ```{r} # check the first row of the output data with the equations -print(dplyr::filter(dol_df_equ$data, row == 1)) +print(dplyr::filter(pond_equ1$data, row == 1)) ``` -In this output, we have severals rows for each input row (these are labelled with the row column i.e. input row 1 is labelled has a 1 in the row column). Each row from the input data, there are several rows in this output data. These different rows correspond to different equations from the database that are within the maximum taxonomic distance that we set (i.e. 3.5). - -The idea with *workflow1* is for us to now use our discretion to choose appropriate equations. For example, we can use the *tax_distance* column *body_size_range_match* columns to make an initial decision. We want low taxonomic distances and we want the length of our target data to be within the body size range of the equation in the database (which is specified by the *body_size_range_match* column). Based on these criteria, there is a clear winner: equation id = 12 (*id*). The taxonomic distance is zero, it is within the body size range and the equation has an r2 value of 0.93. One issue is that it does not come from the same geographic/habitat conditions (i.e. *realm_match*, *major_habitat_type_match* and *ecoregion_match*). +In this output, we have several rows for each input row (these are labelled with the row column i.e. input row 1 is labelled as '1' in the row column). For each row from the input data, there are several rows in this output data. These different rows correspond to different equations from the database that are within the maximum taxonomic distance that we set (i.e. 3.5). -We can also take guidance from the decision data that is also outputted with workflow1. +The idea with *workflow1* is for us to now use our discretion to choose appropriate equations. To look in more detail at the output, we can also use the *View()* function: ```{r} -# check the first row of the decision data -print(dplyr::filter(dol_df_equ$decision_data, row == 1)) +View(dplyr::filter(pond_equ1$data, row == 1)) ``` -From this, we can see that the equation we chose (i.e. id = 12) is both recommended by *InvTraitR* (*recommend* column) and would have been the choice made by *workflow2* had we selected it. +The most important columns to consider are the *tax_distance* column, the *body_size_range_match* column and the *life_stage_match* column to make an initial decision. We want low taxonomic distances, we want the length of our target data to be within the body size range of the equation in the database (which is specified by the *body_size_range_match* column) and, very importantly, we want to the life-stage of our target to match the life-stage of the equation. There is only one equation that matches the life-stage (id = 169). Given that the target taxon (*Acilius sulcatus*) is a beetle, life-stage is crucially important. The problem, however, with id = 169 is that the body size range does not match. Indeed, the equation was developed for body sizes between 1.4 and 5.2 mm (*db_min_body_size_mm* and *db_min_body_size_mm* columns) and the length in our data is 16.3 mm. Thus, using this equation would be a considerable extrapolation. Given the non-linearity of many allometric equtaions, this may be a problem. + +Nonetheless, because the other equations within the appropriate taxonomic do not have the appropriate life-stage, this is the best that we can do and use our expert knowledge to check that the biomass value that we obtain is reasonable for this taxon (e.g. using other literature, for example). These choices are, of course, subjective to a certain degree. Choosing between id = 167 or id = 168 is at some level subjective. The point of *workflow1* is to provide the relevant information to make informed decisions. -Given that we have made our decision, we can now extract the relevant parameters that we need to calculate the dry biomass. +Based on this information, we will choose id = 169 because it is the only equation with the appropriate life-stage information. ```{r} ex1 <- - dol_df_equ$data |> - dplyr::filter(row == 1, id == 12) |> - dplyr::select(row, taxon, life_stage, lat, lon, country_code_XXX, - location, length_mm, + pond_equ1$data |> + dplyr::filter(row == 1, id == 169) |> + dplyr::select(row, target_taxon, life_stage, lat, lon, length_mm, equation_form, log_base, a, b, lm_correction, dry_biomass_scale) ``` @@ -165,7 +156,8 @@ Using these data, we can simply calculate the expected dry biomass based on the ```{r} # use with so we can directly reference the variables -dry_biomass_mg <- +ex1_biomass_mg <- + with(ex1, { # calculate the raw prediction on the log-scale @@ -182,9 +174,11 @@ dry_biomass_mg <- }) -print(paste0("Expected dry biomass (mg) = ", round(dry_biomass_mg, 2) )) +print(paste0("Expected dry biomass (mg) = ", round(ex1_biomass_mg, 2) )) ``` +Checking the literature, I found a that ca. 50 mg is not atypical for a Dysticid diving beetle adult which provides confidence in this estimate (Klecka and Boukal 2013, Journal of Animal Ecology). + We've developed a helper function to automatically perform these calculations based on the following variables in a data.frame. @@ -194,66 +188,75 @@ The second option for obtaining dry biomass estimates for a set of taxa is workf ```{r} # we specify the dol_df into the function and select workflow2 -dol_df_equ <- get_trait_from_taxon(data = dol_df, - target_taxon = "taxon", - life_stage = "life_stage", - latitude_dd = "lat", - longitude_dd = "lon", - body_size = "length_mm", - workflow = "workflow2", - trait = "equation", - max_tax_dist = 3.5, - gen_sp_dist = 0.25) +pond_equ2 <- get_trait_from_taxon(data = pond_df, + target_taxon = "target_taxon", + life_stage = "life_stage", + latitude_dd = "lat", + longitude_dd = "lon", + body_size = "length_mm", + workflow = "workflow2", + trait = "equation", + max_tax_dist = 3.5, + gen_sp_dist = 0.25) ``` -Unlike with *workflow1*, when we open the output data, it has the same number of rows as the input data (i.e. *dol_df*). That is because *InvTraitR* automatically selected an appropriate equation for each row where an appropriate equation was available in the database. Moreover, *InvTraitR* directly calculated the expected dry biomass (see *dry_biomass_mg* column) For example, let's look at the first row: +Unlike with *workflow1*, when we open the output data, it has the same number of rows as the input data (i.e. *pond_df*). That is because *InvTraitR* automatically selected an appropriate equation for each row where an appropriate equation was available in the database. Moreover, *InvTraitR* directly calculated the expected dry biomass (see *dry_biomass_mg* column) For example, let's look at the first row: ```{r} # check the first row of the output data with the equations -print(dplyr::filter(dol_df_equ$data, row == 1)) +print(dplyr::filter(pond_equ2$data, row == 1)) ``` -But, as we can see if we look at the whole output, not all rows have equations and this is because *InvTraitR* could not find suitable equations for all input datapoints. +In this case, all the columns are filled with NA values. This is because there is no appropriate equation for this taxon according to the InvTraitR *workflow2* criteria. For an equation to be selected with *workflow2*, it has to meet three conditions: + +1. The taxonomic distance must be less than the maximum taxonomic taxonomic distance (in this case 3.5 as we set it) + +2. The target body size must be within the body size range of the equation + +3. The life-stage of the target taxon and the equation must match + +As was clear from looking at this example with *workflow1*, there were no equations that met all three of these criteria. Therefore, we ended up using an equation where the target body size was not within the body size range of the equation, + +Indeed, if we look at the whole output, many of the rows do not have equations which means that *InvTraitR* was unable to find appropriate equations in the database based on these three criteria. ```{r} -print(dol_df_equ$data) +View(pond_equ2$data) ``` -In fact, in this dataset, *InvTraitR* was only able to find equations for 22 % of the data points and 23 % of the unique taxa. +In fact, in this dataset, *InvTraitR* was only able to find equations for 25 % of the data points and 29 % of the unique taxa. ```{r} # proportion of data points for which appropriate equations were found -sum(!is.na(dol_df_equ$data$id))/nrow(dol_df_equ$data) +sum(!is.na(pond_equ2$data$id))/nrow(pond_equ2$data) # proportion of unique taxa for which appropriate equations were found n <- - dplyr::filter(dol_df_equ$data, !is.na(id)) |> - dplyr::pull(taxon) |> + dplyr::filter(pond_equ2$data, !is.na(id)) |> + dplyr::pull(target_taxon) |> unique() |> length() -n/length(unique(dol_df_equ$data$taxon)) +n/length(unique(pond_equ2$data$target_taxon)) ``` -Let's have a look in more detail at the output: +If we want to get more insight into why *InvTraitR* was unable to find appropriate equations for the different rows, we can look at the second output which is the *decision_data*. For each row, it shows all the equations that were considered and then in the column called *explanation*, it explains why a given equation was not chosen. ```{r} -View(dol_df_equ$data) -View(dol_df_equ$decision_data) +View(pond_equ2$decision_data) ``` -However, we specified certain levels of maximum taxonomic distance in the function. What happens if we relax this and increase the acceptable maximum taxonomic distance and increase it to 4 (instead of 3.5)? +However, in this case, we specified certain levels of maximum taxonomic distance in the function. What happens if we relax this and increase the acceptable maximum taxonomic distance and increase it to 4 (instead of 3.5)? ```{r} # we specify the dol_df into the function and select workflow2 -dol_df_equ <- get_trait_from_taxon(data = dol_df, - target_taxon = "taxon", +pond_equ2 <- get_trait_from_taxon(data = pond_df, + target_taxon = "target_taxon", life_stage = "life_stage", latitude_dd = "lat", longitude_dd = "lon", body_size = "length_mm", workflow = "workflow2", trait = "equation", - max_tax_dist = 4.25, + max_tax_dist = 4, gen_sp_dist = 0.25) ``` @@ -261,17 +264,15 @@ Do we get a higher proportion of datapoint and taxa with appropriate equations? ```{r} # proportion of data points for which appropriate equations were found -sum(!is.na(dol_df_equ$data$id))/nrow(dol_df_equ$data) +sum(!is.na(pond_equ2$data$id))/nrow(pond_equ2$data) # proportion of unique taxa for which appropriate equations were found n <- - dplyr::filter(dol_df_equ$data, !is.na(id)) |> - dplyr::pull(taxon) |> + dplyr::filter(pond_equ2$data, !is.na(id)) |> + dplyr::pull(target_taxon) |> unique() |> length() -n/length(unique(dol_df_equ$data$taxon)) +n/length(unique(pond_equ2$data$target_taxon)) ``` - In this case, we do not. If this occurs, a user can see which datapoints appropriate equations were not given for and then use the decision data to see if there are appropriate equations for taxa that *InvTraitR* missed. Otherwise, if no good equation can be found, one might have to use order-level equations instead which are available in many research papers (e.g. Benke et al. 1999). -