diff --git a/DESCRIPTION b/DESCRIPTION index bb194a2..7c5eb04 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -47,4 +47,8 @@ Imports: magrittr, rlang, stringr, - tidyr + tidyr, + tidyselect, + RColorBrewer, + ggplot2, + plotly diff --git a/NAMESPACE b/NAMESPACE index c89e790..5658f7b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,8 +1,11 @@ # Generated by roxygen2: do not edit by hand export("%>%") +export(estimateAquPercentCover) export(estimatePheDurationByTag) export(estimatePheTransByTag) +export(joinAquClipHarvest) +export(joinAquPointCount) export(joinRootChem) export(scaleRootMass) export(stackPlantPresence) diff --git a/R/estimateAquPercentCover.R b/R/estimateAquPercentCover.R new file mode 100644 index 0000000..41e322c --- /dev/null +++ b/R/estimateAquPercentCover.R @@ -0,0 +1,259 @@ +#' @title Estimate NEON aquatic plant, bryophyte, lichen, and macroalgae percent cover in wadeable streams +#' +#' @author Madaline Ritter \email{ritterm1@battelleecology.org} \cr +#' +#' @description Data inputs are NEON Aquatic Plant, Bryophyte, Lichen, and Macroalgae Point Counts in Wadeable Streams (DP1.20072.001) in list format retrieved using the neonUtilities::loadByProduct() function (preferred), data tables downloaded from the NEON Data Portal, or input data tables with an equivalent structure and representing the same site x month combinations. The estimateAquPercentCover() function joins taxonomy information across point count tables and aggregates occurrence data to estimate percent cover at the transect level. +#' +#' @details Input data may be provided either as a list generated from the neonUtilities::laodByProduct() function or as individual tables. However, if both list and table inputs are provided at the same time the function will error. +#' +#' Percent cover is calculated using the equation from Bowden et al. 2006: +#' +#' \deqn{percentCover_i = (N_i / N_t) * 100} +#' +#' Where: +#' +#' - \eqn{N_i} is the number of observed points in a transect that match class type ā€œiā€ (i.e., a particular taxonID or substrate) +#' - \eqn{N_t} is the total number of points observed in the transect +#' +#' Note: This calculation can generate percent cover values >100% if there is vertical stacking of plants, or values <100% if 'targetTaxaPresent' is unknown. +#' +#' @param inputDataList A list object comprised of Aquatic Plant, Bryophyte, Lichen, and Macroalgae Point Count tables (DP1.20072.001) downloaded using the neonUtilities::loadByProduct() function. If list input is provided, the table input arguments must all be NA; similarly, if list input is missing, table inputs must be provided. [list] +#' +#' @param inputPoint The 'apc_pointTransect' table for the site x month combination(s) of interest (defaults to NA). If table input is provided, the 'inputDataList' argument must be missing. [data.frame] +#' +#' @param inputPerTax The 'apc_perTaxon' table for the site x month combination(s) of interest (defaults to NA). If table input is provided, the 'inputDataList' argument must be missing. [data.frame] +#' +#' @param inputTaxProc The 'apc_taxonomyProcessed' table for the site x month combination(s) of interest (defaults to NA). If table input is provided, the 'inputDataList' argument must be missing. [data.frame] +#' +#' @param inputMorph The 'apc_morphospecies' table for the site x month combination(s) of interest (defaults to NA). If table input is provided, the 'inputDataList' argument must be missing. [data.frame] +#' +#' @param barPlots If TRUE, will produce a list of plots, one for each site/date in the data provided. +#' +#' @return Two tables are produced containing point count summary data. The first "percentCover" table contains estimated percent cover for each observed species and/or substrate class on aquatic plant transects. This table includes a 'type' column indicating whether the estimate corresponds to a taxon or substrate class, and a 'substrateOrTaxonID' column which provides the corresponding taxonID or substrate identifier. +#' +#' The second "transectMetrics" table contains summary information including the length, habitatType, and total number of points sampled at each transect. +#' +#' If barPlots = TRUE, a list containing plots for each site x date combination will also be produced. +#' +#' @references +#' License: GNU AFFERO GENERAL PUBLIC LICENSE Version 3, 19 November 2007 +#' +#' @examples +#' \dontrun{ +#' # Obtain NEON Aquatic Plant Point Count data +#' apc <- neonUtilities::loadByProduct( +#' dpID = "DP1.20072.001", +#' site = "all", +#' startdate = "2018-07", +#' enddate = "2018-08", +#' tabl = "all", +#' check.size = FALSE +#' ) +#' +#' # Calculate percent cover for downloaded data +#' list <- neonPlants::estimateAquPercentCover( +#' inputDataList = apc, +#' inputPoint = NA, +#' inputPerTax = NA, +#' inputTaxProc = NA, +#' inputMorph = NA, +#' barPlots = FALSE +#' ) +#' +#' } +#' @export estimateAquPercentCover + + + + +estimateAquPercentCover <- function(inputDataList, + inputPoint = NA, + inputPerTax = NA, + inputTaxProc = NA, + inputMorph = NA, + barPlots = FALSE) { + + ### Join taxonomy data #### + + if(!missing(inputDataList)){ + + joinPointCounts <- neonPlants::joinAquPointCount(inputDataList = inputDataList) + # joinPointCounts <- joinAquPointCount(inputDataList = apc) + + + } else { + + joinPointCounts <- neonPlants::joinAquPointCount(inputPoint = inputPoint, + inputPerTax = inputPerTax, + inputTaxProc = inputTaxProc, + inputMorph = inputMorph) + } + + ### Remove SI Records #### + joinPointCounts <- joinPointCounts %>% dplyr::filter(is.na(.data$samplingImpractical)) + + ### Calculate Percent Cover #### + + # Calculate total number of sampling points per transect + total_points_df <- joinPointCounts %>% + dplyr::distinct(.data$siteID, .data$namedLocation, .data$collectDate, .data$pointNumber) %>% + dplyr::group_by(.data$siteID, .data$namedLocation, .data$collectDate) %>% + dplyr::summarise(totalPoints = dplyr::n(), .groups = "drop") + + + # Percent cover by substrate + cover_substrate <- joinPointCounts %>% + dplyr::filter(.data$targetTaxaPresent == "N") %>% + dplyr::group_by(.data$siteID, .data$collectDate, .data$namedLocation, .data$substrate) %>% + dplyr::summarise(count = dplyr::n(), .groups = "drop") %>% + dplyr::left_join(total_points_df, by = c("siteID", "namedLocation", "collectDate")) %>% + dplyr::mutate( + percent_cover = round(100 * .data$count / .data$totalPoints, 2), + type = "substrate", + substrateOrTaxonID = .data$substrate + ) %>% + dplyr::select("siteID", "collectDate", "namedLocation", "type", "substrateOrTaxonID", "percent_cover") + + # Percent cover by taxon + cover_taxon <- joinPointCounts %>% + dplyr::filter(.data$targetTaxaPresent != "N") %>% # "Y" and "U" + dplyr::group_by(.data$siteID, .data$collectDate, .data$namedLocation, .data$acceptedTaxonID, .data$aquaticPlantType, .data$targetTaxaPresent) %>% #, .data$scientificName + dplyr::summarise(count = dplyr::n(), .groups = "drop") %>% + dplyr::left_join(total_points_df, by = c("siteID", "namedLocation", "collectDate")) %>% + dplyr::mutate( + percent_cover = round(100 * .data$count / .data$totalPoints, 2), + # type = "taxon", + type = .data$aquaticPlantType, + substrateOrTaxonID = .data$acceptedTaxonID, + type = dplyr::case_when( + targetTaxaPresent == 'U' & is.na(type) ~ 'unknown', + TRUE ~ type + ), + substrateOrTaxonID = dplyr::case_when( + type == 'unknown' ~ 'UNKNOWN', + TRUE ~ .data$substrateOrTaxonID + ) + ) %>% + dplyr::select("siteID", "collectDate", "namedLocation", "type", "substrateOrTaxonID", "percent_cover") #"scientificName", + + # Combine results + percent_cover <- dplyr::bind_rows(cover_substrate, cover_taxon) %>% + dplyr::arrange(.data$collectDate, .data$namedLocation) + + + + ### Calculate Transect metrics #### + + transect_metrics <- joinPointCounts %>% + dplyr::distinct(.data$domainID, .data$siteID, .data$namedLocation, .data$collectDate, .data$boutNumber, .data$habitatType, .data$pointNumber, .data$transectDistance, .data$targetTaxaPresent) %>% + dplyr::group_by(.data$domainID, .data$siteID, .data$namedLocation, .data$collectDate, .data$boutNumber, .data$habitatType) %>% + dplyr::summarise( + transectMax = max(.data$transectDistance), + transectMin = min(.data$transectDistance), + totalPoints = dplyr::n(), + pointsWithTaxaPresent = sum(.data$targetTaxaPresent == "Y"), + .groups = "drop" + ) %>% + dplyr::mutate(transectLength_m = .data$transectMax - .data$transectMin) %>% + dplyr::select("domainID", "siteID", "namedLocation", "collectDate", "boutNumber", "habitatType", "transectLength_m", "totalPoints", "pointsWithTaxaPresent") + + returnList <- list(percentCover=percent_cover, transectMetrics=transect_metrics) + + + ### Optionally Plot Percent Cover by Site/Date #### + if(barPlots){ + + # Create a unique ID for each siteID + collectDate + percent_cover$boutID <- paste(percent_cover$siteID, substr(percent_cover$collectDate, 1, 10), sep = "_") + + # Simplify transectID and ensure consistent ordering + percent_cover$transectID <- stringr::str_extract(percent_cover$namedLocation, "(?<=transect\\.)\\w+") + percent_cover$transectID <- stringr::str_replace(percent_cover$transectID, "^0+", "") #strip leading zeros + + percent_cover <- percent_cover %>% + dplyr::mutate( + transectID = as.character(.data$transectID), + transect_num = as.numeric(stringr::str_extract(.data$transectID, "\\d+")), + transect_suffix = stringr::str_extract(.data$transectID, "[a-zA-Z]*") + ) + + ordered_ids <- percent_cover %>% + dplyr::distinct(.data$transectID, .data$transect_num, .data$transect_suffix) %>% + dplyr::arrange(.data$transect_num, .data$transect_suffix) %>% + dplyr::pull(.data$transectID) + + percent_cover <- percent_cover %>% + dplyr::mutate(transectID = factor(.data$transectID, levels = ordered_ids)) + + # Create custom order for plotting + substrate_ids <- percent_cover %>% + dplyr::filter(.data$type == "substrate" | .data$type == 'unknown') %>% + dplyr::pull(.data$substrateOrTaxonID) %>% + unique() + + taxon_ids <- percent_cover %>% + dplyr::filter(.data$type == "macroalgae" | .data$type == "plant") %>% + dplyr::pull(.data$substrateOrTaxonID) %>% + unique() + + stack_order <- c(substrate_ids, taxon_ids) + percent_cover$substrateOrTaxonID <- factor(percent_cover$substrateOrTaxonID, levels = stack_order) + + + plot_ids <- unique(percent_cover$boutID) + + # Create a consistent color palette + + # Separate substrate and taxon IDs + substrate_ids <- unique(cover_substrate$substrateOrTaxonID) + taxon_ids <- unique(cover_taxon$substrateOrTaxonID) + + # Assign greyscale colors to substrates + greys <- grDevices::gray.colors(length(substrate_ids), start = 0.3, end = 0.8) + + # Assign colorful palette to taxa + taxon_colors <- RColorBrewer::brewer.pal(min(length(taxon_ids), 8), "Set2") + if (length(taxon_ids) > 8) { + extra_colors <- grDevices::colors()[grep('gr(a|e)y', grDevices::colors(), invert = TRUE)] + taxon_colors <- c(taxon_colors, sample(extra_colors, length(taxon_ids) - 8)) + } + + # Combine into one palette + color_palette <- c(stats::setNames(greys, substrate_ids), stats::setNames(taxon_colors, taxon_ids)) + + + # Plotting function + plot_grid <- function(plot_id) { + plotly::ggplotly( + ggplot2::ggplot( + # subset(percent_cover, .data$boutID == plot_id), + percent_cover[percent_cover$boutID == plot_id, ], + ggplot2::aes( + x = .data$transectID, + y = .data$percent_cover, + fill = .data$substrateOrTaxonID)) + + ggplot2::geom_bar(stat = "identity", position = "stack") + + ggplot2::scale_fill_manual(values = color_palette) + # Apply consistent colors + ggplot2::labs( + title = gsub(" ", " on ", plot_id), + x = "Transect Number", + y = "Percent Cover", + fill = "Substrate/Taxon" + ) + + ggplot2::theme_minimal() + ) + } + + plot_list <- lapply(plot_ids, plot_grid) + names(plot_list) <- plot_ids + + # Bind df and plots + returnList$plot_list <- plot_list + + } + + return(returnList) + +} #function closer + + \ No newline at end of file diff --git a/R/joinAquClipHarvest.R b/R/joinAquClipHarvest.R new file mode 100644 index 0000000..bbac5a3 --- /dev/null +++ b/R/joinAquClipHarvest.R @@ -0,0 +1,596 @@ +#' @title Join taxonomic identifications in NEON aquatic plant clip harvest data to field and biomass tables + +#' @author Madaline Ritter \email{ritterm1@battelleecology.org} \cr + +#' @description Join the 'apl_clipHarvest', 'apl_biomass', 'apl_taxonomyProcessed' and 'apc_morphospecies' tables to generate two joined output tables that contain clip harvest data with merged taxonomic identifications. Data inputs are NEON Aquatic Plant Bryophyte Macroalgae Clip Harvest (DP1.20066.001) in list format retrieved using the neonUtilities::loadByProduct() function (preferred), data tables downloaded from the NEON Data Portal, or input data tables with an equivalent structure and representing the same site x month combinations. +#' +#' @details Input data may be provided either as a list or as individual tables. However, if both list and table inputs are provided at the same time the function will error out. For table joining to be successful, inputs must contain data from the same site x month combination(s) for all tables. +#' +#' Only data from bout 2 (midsummer sampling) is returned in the joined output tables, as other bouts do not include taxonomy data. If the input does not include any bout 2 data, the function will error out. +#' +#' In the joined output tables, the 'acceptedTaxonID' and associated taxonomic fields are populated from the first available identification in the following order: 'apl_taxonomyProcessed', 'apl_biomass', or 'apc_morphospecies'. For samples identified both in the field and by an expert taxonomist, the expert identification is retained in the output. A new field, 'taxonIDSourceTable', is included in the output and indicates the source table for each sample's identification. +#' +#' @param inputDataList A list object comprised of Aquatic Plant Bryophyte Macroalgae Clip Harvest tables (DP1.20066.001) downloaded using the neonUtilities::loadByProduct() function. If list input is provided, the table input arguments must all be NA; similarly, if list input is missing, table inputs must be provided. [list] +#' +#' @param inputBio The 'apl_biomass' table for the site x month combination(s) of interest (defaults to NA). If table input is provided, the 'inputDataList' argument must be missing. [data.frame] +#' +#' @param inputClip The 'apl_clipHarvest' table for the site x month combination(s) of interest (defaults to NA). If table input is provided, the 'inputDataList' argument must be missing. [data.frame] +#' +#' @param inputTaxProc The 'apl_taxonomyProcessed' table for the site x month combination(s) of interest (defaults to NA). If table input is provided, the 'inputDataList' argument must be missing. [data.frame] +#' +#' @param inputMorph The 'apc_morphospecies' table for the site x month combination(s) of interest (defaults to NA). If table input is provided, the 'inputDataList' argument must be missing. [data.frame] +#' +#' @return Two tables are produced containing joined clip harvest data. The first "joinedBiomass" table contains one row per sampleID in 'apl_biomass'. A single sampleID in 'apl_biomass' may correspond to more than one taxa in 'apl_taxonomyProcessed'. When tables are joined, the taxon with the greatest 'algalParameterValue' in 'apl_taxonomyProcessed' will be listed as the 'acceptedTaxonID' and a new field, 'additionalTaxa', appears in the output and includes all other taxa associated with the sampleID. If more than one taxon shares the same max 'algalParameterValue', the first row in the input table is returned as the 'acceptedTaxonID'. Detailed taxonomic information for any additionalTaxa can be found in the input 'apl_taxonomyProcessed' table. +#' +#' The second "fieldTaxonomy" table joins taxonomic identifications across tables to 'apl_clipHarvest' by field ID. Joining may result in one or many unique rows per sampleID. +#' +#' @references +#' License: GNU AFFERO GENERAL PUBLIC LICENSE Version 3, 19 November 2007 +#' +#' @examples +#' \dontrun{ +#' # Obtain NEON Aquatic Plant Clip Harvest data +#' apl <- neonUtilities::loadByProduct( +#' dpID = "DP1.20066.001", +#' site = "all", +#' startdate = "2018-07", +#' enddate = "2018-08", +#' tabl = "all", +#' check.size = FALSE +#' ) +#' +#' # Join downloaded clip harvest data +#' list <- neonPlants::joinAquClipHarvest( +#' inputDataList = apl, +#' inputBio = NA, +#' inputClip = NA, +#' inputTaxProc = NA, +#' inputMorph = NA +#' ) +#' +#' } +#' +#' @export joinAquClipHarvest + + + +joinAquClipHarvest <- function(inputDataList, + inputBio = NA, + inputClip = NA, + inputTaxProc = NA, + inputMorph = NA) { + + ### Test that user has supplied arguments as required by function #### + + ### Verify user-supplied inputDataList object contains correct data if not NA + if (!missing(inputDataList)) { + + # Check that input is a list + if (!inherits(inputDataList, "list")) { + stop( + glue::glue( + "Argument 'inputDataList' must be a list object from neonUtilities::loadByProduct(); + supplied input object is {class(inputDataList)}" + ) + ) + } + + # Check that required tables within list match expected names + listExpNames <- c("apl_biomass", "apl_clipHarvest") + + # Determine dataType or stop with appropriate message + if (length(setdiff(listExpNames, names(inputDataList))) > 0) { + stop( + glue::glue( + "Required tables missing from 'inputDataList':", + '{paste(setdiff(listExpNames, names(inputDataList)), collapse = ", ")}', + .sep = " " + ) + ) + } + } else { + + inputDataList <- NULL + + } # end missing conditional + + + ### Verify table inputs are NA if inputDataList is supplied + if (!is.null(inputDataList)) { + if (!isTRUE(is.na(inputBio)) || !isTRUE(is.na(inputClip)) || + !isTRUE(is.na(inputTaxProc)) || !isTRUE(is.na(inputMorph))) { + stop("When 'inputDataList' is supplied, all table input arguments must be NA.") + } + } + + + ### Verify all table inputs are data frames if inputDataList is NA + if (is.null(inputDataList) & + ( + !is.data.frame(inputBio) || !is.data.frame(inputClip) + )) { + stop("Data frames must be supplied for table inputs if 'inputDataList' is missing") + + } + + + ### Conditionally define input tables #### + if (inherits(inputDataList, "list")) { + apBio <- inputDataList$apl_biomass + apClip <- inputDataList$apl_clipHarvest + if (!is.null(inputDataList$apl_taxonomyProcessed)) { + apTaxProc <- inputDataList$apl_taxonomyProcessed + } else{ + apTaxProc <- NA + } + if (!is.null(inputDataList$apc_morphospecies)) { + apMorph <- inputDataList$apc_morphospecies + } else{ + apMorph <- NA + } + + } else { + apBio <- inputBio + apClip <- inputClip + apTaxProc <- inputTaxProc + apMorph <- inputMorph + + } + + + + ### Verify input tables contain required columns and data #### + + ### Verify 'apBio' table contains required data + # Check for required columns + bioExpCols <- c( + "sampleID", "taxonID", "scientificName", "morphospeciesID", "identifiedDate", + "sampleCondition", "identificationHistoryID", "dataQF", "publicationDate", + "release", "division", "class", "order", "family", "genus", "section", + "specificEpithet", "scientificNameAuthorship", "identificationQualifier", + "identificationReferences", "remarks", "identifiedBy", "uid" + ) + + + if (length(setdiff(bioExpCols, colnames(apBio))) > 0) { + stop( + glue::glue( + "Required columns missing from 'inputBio':", + '{paste(setdiff(bioExpCols, colnames(apBio)), collapse = ", ")}', + .sep = " " + ) + ) + } + + # Check for data + if (nrow(apBio) == 0) { + stop(glue::glue("Table 'inputBio' has no data.")) + } + + + + ### Verify 'apClip' table contains required data + # Check for required columns + clipExpCols <- c( + "namedLocation", "eventID", "boutNumber", "fieldID", "benthicArea", "domainID", + "siteID", "startDate", "collectDate", "fieldIDCode", "recordedBy", "remarks" + ) + + if (length(setdiff(clipExpCols, colnames(apClip))) > 0) { + stop( + glue::glue( + "Required columns missing from 'inputClip':", + '{paste(setdiff(clipExpCols, colnames(apClip)), collapse = ", ")}', + .sep = " " + ) + ) + } + + # Check for data + if (nrow(apClip) == 0) { + stop(glue::glue("Table 'inputClip' has no data.")) + } + + # Check for bout 2 data + if (nrow(apClip %>% dplyr::filter(.data$boutNumber == '2')) == 0) { + stop( + glue::glue( + "The input data does not contain any bout 2 records. No taxonomy data to join." + ) + ) + } + + + ### Verify 'apTaxProc' table contains required data if data exists + taxProcExpCols <- c( + "sampleID", "taxonID", "identifiedDate", "sampleCondition", + "identificationHistoryID", "dataQF", "publicationDate", "release", + "division", "class", "order", "family", "genus", "section", "specificEpithet", + "scientificNameAuthorship", "identificationQualifier", + "identificationReferences", "remarks", "identifiedBy", "morphospeciesID", + "uid", "domainID", "siteID", "namedLocation", "collectDate", "sampleCode" + ) + + # Check for data + if (is.data.frame(apTaxProc)) { + if(nrow(apTaxProc) == 0) { + message( + glue::glue( + "Warning: Table 'inputTaxProc' has no data. Join will not include processed taxonomy data." + ) + ) + } else { + # Check for required columns if data exists + if (length(setdiff(taxProcExpCols, colnames(apTaxProc))) > 0) { + stop( + glue::glue( + "Required columns missing from 'inputTaxProc':", + '{paste(setdiff(taxProcExpCols, colnames(apTaxProc)), collapse = ", ")}', + .sep = " " + ) + ) + } + } + } + + + ### Verify 'apMorph' table contains required data if data exists + morphExpCols <- c( + "morphospeciesID", "taxonID", "scientificName", "identificationQualifier", + "identificationReferences", "identifiedBy", "dataQF" + ) + + + # Check for data + if (is.data.frame(apMorph)){ + if(nrow(apMorph) == 0) { + message( + "Warning: Table 'inputMorph' has no data. Joined output does not include identifications from the morphospecies table." + ) + } else { + # Check for required columns if data exists + if (length(setdiff(morphExpCols, colnames(apMorph))) > 0) { + stop( + glue::glue( + "Required columns missing from 'inputMorph':", + '{paste(setdiff(morphExpCols, colnames(apMorph)), collapse = ", ")}', + .sep = " " + ) + ) + } + } + } + + + + ### Join apBio and apTaxProc tables using sampleID #### + + if (is.data.frame(apTaxProc) && nrow(apTaxProc) > 0) { + + # Select needed columns from apTaxProc + apTaxProc <- apTaxProc %>% + dplyr::select( + -"uid", + -"domainID", + -"siteID", + -"namedLocation", + -"collectDate", + -"morphospeciesID", + -"sampleCode" + ) #%>% + # dplyr::mutate(identifiedDate = as.character(identifiedDate)) #biomass identifiedDate is character, not date + + # Columns conditionally replaced with taxProc data + join1_cols <- c( + "division", "class", "order", "family", + "genus", "section", "specificEpithet", + "scientificNameAuthorship", "identificationQualifier", "identificationReferences", + "taxonRank", "identifiedBy", "identifiedDate" + ) + + # Update expert taxonomist identifications + apJoin1 <- apBio %>% + dplyr::left_join( + apTaxProc, + by = "sampleID", + suffix = c("_bio", "_taxProc"), + relationship = "many-to-many" + ) %>% + dplyr::mutate( + + sampleCondition = dplyr::case_when( + !is.na(.data$sampleCondition_bio) & !is.na(.data$sampleCondition_taxProc) ~ + paste0("biomass ", .data$sampleCondition_bio," | taxProcessed ", .data$sampleCondition_taxProc), + !is.na(.data$sampleCondition_bio) & is.na(.data$sampleCondition_taxProc) ~ + paste0("biomass ", .data$sampleCondition_bio), + is.na(.data$sampleCondition_bio) & !is.na(.data$sampleCondition_taxProc) ~ + paste0("taxProcessed ", .data$sampleCondition_taxProc), + TRUE ~ NA + ), + + taxonIDSourceTable = dplyr::case_when( + !is.na(.data$taxonID_taxProc) ~ "apl_taxonomyProcessed", + is.na(.data$taxonID_taxProc) & !is.na(.data$taxonID_bio) ~ "apl_biomass", + TRUE ~ NA + ), + + tempTaxonID = dplyr::if_else( + !is.na(.data$taxonID_taxProc), .data$taxonID_taxProc, .data$taxonID_bio + ), + + scientificName = dplyr::if_else( + !is.na(.data$taxonID_taxProc), + .data$scientificName_taxProc, + .data$scientificName_bio + ), + + identificationHistoryID = dplyr::case_when( + !is.na(.data$identificationHistoryID_bio) & !is.na(.data$identificationHistoryID_taxProc) ~ + paste0(.data$identificationHistoryID_bio," | ",.data$identificationHistoryID_taxProc), + is.na(.data$identificationHistoryID_taxProc) & !is.na(.data$identificationHistoryID_bio) ~ + .data$identificationHistoryID_bio, + !is.na(.data$identificationHistoryID_taxProc) & is.na(.data$identificationHistoryID_bio) ~ + .data$identificationHistoryID_taxProc, + TRUE ~ NA + ), + + biomassDataQF = .data$dataQF_bio, + taxProcessedDataQF = .data$dataQF_taxProc, + biomassPublicationDate = .data$publicationDate_bio, + taxProcessedPublicationDate = .data$publicationDate_taxProc, + biomassRelease = .data$release_bio, + taxProcessedRelease = .data$release_taxProc, + + remarks = dplyr::case_when( + !is.na(.data$remarks_bio) & !is.na(.data$remarks_taxProc) ~ + paste0( "biomass remarks - ", .data$remarks_bio, " | taxProcessed remarks - ", .data$remarks_taxProc), + is.na(.data$remarks_taxProc) & !is.na(.data$remarks_bio) ~ + paste0("biomass remarks - ", .data$remarks_bio), + !is.na(.data$remarks_taxProc) & is.na(.data$remarks_bio) ~ + paste0("taxProcessed remarks - ", .data$remarks_taxProc), + TRUE ~ NA + ) + ) + + for (col in join1_cols) { + taxProc_col <- paste0(col, "_taxProc") + bio_col <- paste0(col, "_bio") + apJoin1[[col]] <- dplyr::if_else( + !is.na(apJoin1$taxonID_taxProc), + if (taxProc_col %in% names(apJoin1)) as.character(apJoin1[[taxProc_col]]) else NA_character_, + if (bio_col %in% names(apJoin1)) as.character(apJoin1[[bio_col]]) else NA_character_ + ) + } + apJoin1 <- apJoin1 %>% + dplyr::select(-"uid", -"targetTaxaPresent", + -dplyr::matches("_taxProc"),-dplyr::matches("_bio")) + + + } else { + message("Output tables do not include identifications from the expert taxonomists.\nProvide the 'apl_taxonomyProcessed' table to join expert identifications.") + # rename columns if no taxProc join + apJoin1 <- apBio %>% + dplyr::mutate( + startDate = NA, + tempTaxonID = .data$taxonID, + remarks = dplyr::if_else(is.na(.data$remarks), NA, paste0("biomass remarks - ", .data$remarks)), + biomassRelease = .data$release, + taxonIDSourceTable = dplyr::if_else(is.na(.data$taxonID), NA, "apl_biomass"), + biomassDataQF = .data$dataQF, + biomassPublicationDate = .data$publicationDate + ) %>% + dplyr::select(-"taxonID", + -"release", + -"dataQF", + -"publicationDate", + -"uid") + } + + + ### Join apJoin1 and apMorph tables #### + + # Select needed columns from apMorph + if (is.data.frame(apMorph) && nrow(apMorph) > 0) { + # message("Join morphospecies taxonomic identifications.") + apMorph <- apMorph %>% + dplyr::select( + "taxonID", "scientificName", "morphospeciesID", "identificationQualifier", + "identificationReferences", "identifiedBy", "morphospeciesResolvedDate", + ## Uncomment next two lines once morph table has been updated + # "phylum", "division", "class", "order", "family", "genus", "section", + # "specificEpithet", "infraspecificEpithet", "variety", "form", "taxonRank", + "dataQF" + )%>% + dplyr::rename(identifiedDate="morphospeciesResolvedDate") + + # Update morphospecies taxon identifications + apJoin2 <- apJoin1 %>% + dplyr::mutate( + morphospeciesID = dplyr::if_else( + !is.na(.data$morphospeciesID), + paste0(.data$morphospeciesID, ".", substr(.data$collectDate, 1, 4)), + .data$morphospeciesID + ) + ) %>% + dplyr::left_join(apMorph, by="morphospeciesID", suffix=c("_bio","_morph")) %>% + dplyr::mutate( + taxonIDSourceTable = dplyr::if_else( + !is.na(.data$taxonID) & .data$tempTaxonID %in% c('2PLANT', 'UNKALG'), + "apc_morphospecies", .data$taxonIDSourceTable), + + acceptedTaxonID = dplyr::if_else( + !is.na(.data$taxonID) & .data$tempTaxonID %in% c('2PLANT', 'UNKALG'), + .data$taxonID, .data$tempTaxonID), + + morphospeciesDataQF = .data$dataQF + ) + + # Columns conditionally replaced with morph data + join2_cols <- c( + "scientificName", "identificationQualifier", "identificationReferences", + "identifiedBy", "identifiedDate" + ## Uncomment next two lines once morph table has been updated + # , "phylum", "division", "class", "order", + # "family", "genus", "section", "specificEpithet", "infraspecificEpithet", + # "variety", "form", "taxonRank" + ) + + for (col in join2_cols) { + morph_col <- paste0(col, "_morph") + bio_col <- paste0(col, "_bio") + apJoin2[[col]] <- dplyr::if_else( + !is.na(apJoin2$taxonID) & apJoin2$tempTaxonID %in% c("2PLANT", "UNKALG"), + if (morph_col %in% names(apJoin2)) as.character(apJoin2[[morph_col]]) else NA_character_, + if (bio_col %in% names(apJoin2)) as.character(apJoin2[[bio_col]]) else NA_character_ + ) + } + + apJoin2 <- apJoin2 %>% + dplyr::select( + -"taxonID", -"tempTaxonID", -"dataQF", + -dplyr::matches("_morph"),-dplyr::matches("_bio")) + + + } else { + message("No data joined from apc_morphospecies table.") + + apJoin2 <- apJoin1 %>% + dplyr::mutate(acceptedTaxonID = .data$tempTaxonID) %>% + dplyr::select(-"tempTaxonID") + } + + + + + + ### Join apClip and apBio tables #### + + finalJoin <- apClip %>% + dplyr::left_join( + apJoin2 %>% dplyr::select( + -"benthicArea", -"namedLocation", -"domainID", -"siteID", + -"startDate", -"collectDate", -"fieldIDCode"), # + by = "fieldID", suffix = c("_clip", "_bio")) %>% + dplyr::mutate( + remarks = dplyr::case_when( + !is.na(.data$remarks_bio) & + !is.na(.data$remarks_clip) ~ paste0( + "clipHarvest remarks - ", + .data$remarks_clip, + " | ", + .data$remarks_bio + ),!is.na(.data$remarks_bio) & + is.na(.data$remarks_clip) ~ .data$remarks_bio, + is.na(.data$remarks_bio) & + !is.na(.data$remarks_clip) ~ paste0("clipHarvest remarks - ", .data$remarks_clip), + TRUE ~ NA + ), + recordedBy = dplyr::if_else( + !is.na(.data$recordedBy_clip), + .data$recordedBy_clip, + .data$recordedBy_bio + ), + clipDataQF = .data$dataQF, + clipPublicationDate = .data$publicationDate, + clipRelease = .data$release + ) %>% + dplyr::select(-"dataQF", + -"publicationDate", + -"release",-dplyr::matches("_bio"),-dplyr::matches("_clip")) + + + ### Filter out bout 1 and 3 data #### + finalJoin <- finalJoin %>% dplyr::filter(.data$boutNumber == '2') + + + ### Re-format date columns #### + + finalJoin$processingDate <- as.Date(finalJoin$processingDate) + finalJoin$identifiedDate <- as.Date(finalJoin$identifiedDate) + finalJoin$collectDate <- as.POSIXct(finalJoin$collectDate, + format = "%Y-%m-%d %H:%M:%S", tz = "UTC") + finalJoin$startDate <- as.POSIXct(finalJoin$startDate, + format = "%Y-%m-%d %H:%M:%S", tz = "UTC") + finalJoin$biomassPublicationDate <- as.POSIXct(finalJoin$biomassPublicationDate, + format = "%Y%m%dT%H%M%SZ", tz = "UTC") + finalJoin$clipPublicationDate <- as.POSIXct(finalJoin$clipPublicationDate, + format = "%Y%m%dT%H%M%SZ", tz = "UTC") + if (is.data.frame(apTaxProc) && nrow(apTaxProc) > 0){ + finalJoin$taxProcessedPublicationDate <- as.POSIXct(finalJoin$taxProcessedPublicationDate, + format = "%Y%m%dT%H%M%SZ", tz = "UTC") + } + + + ### Create joinedBiomass output table #### + if (is.data.frame(apTaxProc) && nrow(apTaxProc) > 0) { + joinedBiomass <- finalJoin %>% + dplyr::filter(.data$targetTaxaPresent=='Y') %>% #only include records where taxa present + dplyr::group_by(.data$sampleID) %>% + # Arrange so the highest algalParameterValue comes first + dplyr::arrange(dplyr::desc(.data$algalParameterValue), .by_group = TRUE) %>% + # Create new column with all acceptedTaxonIDs except the first + dplyr::mutate( + additionalTaxa = { + unique_taxa <- unique(.data$acceptedTaxonID) + if (length(unique_taxa) > 1) { + other_taxa <- .data$acceptedTaxonID[-1] # all but the first + other_taxa <- other_taxa[!is.na(other_taxa)] # remove NAs + if (length(other_taxa) > 0) paste(other_taxa, collapse = "|") + else NA_character_ + } else { + NA_character_ + } + } + ) %>% + # Keep only the first row per sampleID + dplyr::slice(1) %>% + dplyr::ungroup() %>% + dplyr::relocate('additionalTaxa', .after = 'acceptedTaxonID') + } else { + joinedBiomass <- finalJoin %>% + dplyr::filter(.data$targetTaxaPresent=='Y') %>% #only include records where taxa present + # Create empty column for acceptedTaxonID + dplyr::mutate( + additionalTaxa = NA_character_ + ) %>% + dplyr::relocate('additionalTaxa', .after = 'acceptedTaxonID') + } + + + ### Create fieldTaxonomy output table #### + if (is.data.frame(apTaxProc) && nrow(apTaxProc) > 0) { + fieldTaxCols <- setdiff( + c(names(apClip), "fieldID", "sampleID", "sampleCode", "sampleCondition", + "acceptedTaxonID", "scientificName", "scientificNameAuthorship", + "identificationQualifier", "identificationHistoryID", "identificationReferences", + "identifiedBy", "identifiedDate", "taxonIDSourceTable", + "algalParameter", "algalParameterValue", "algalParameterUnit", "testMethod", "method", + "subspecies", "variety", "subvariety", "form", "subform", "speciesGroup", + "taxonDatabaseName", "taxonDatabaseID", + "division", "class", "order", "family", "genus", "section", "specificEpithet", + "taxonRank","clipDataQF", "taxProcessedDataQF", "clipPublicationDate", "taxProcessedPublicationDate"), + c("release", "dataQF", "publicationDate") + ) + } else { + fieldTaxCols <- setdiff( + c(names(apClip), "fieldID", "sampleID", "sampleCode", "sampleCondition", + "acceptedTaxonID", "scientificName", "scientificNameAuthorship", + "identificationQualifier", "identificationHistoryID", "identificationReferences", + "identifiedBy", "identifiedDate", "taxonIDSourceTable", + "division", "class", "order", "family", "genus", "section", "specificEpithet", + "taxonRank","clipDataQF", "clipPublicationDate"), + c("release", "dataQF", "publicationDate") + ) + } + + fieldTaxonomy <- finalJoin %>% + dplyr::select(tidyselect::all_of(fieldTaxCols)) + + + ### Create final output list #### + + joinClipHarvest <- list('joinedBiomass' = joinedBiomass, 'fieldTaxonomy' = fieldTaxonomy) + + return(joinClipHarvest) + +} #function closer diff --git a/R/joinAquPointCount.R b/R/joinAquPointCount.R new file mode 100644 index 0000000..5dd49df --- /dev/null +++ b/R/joinAquPointCount.R @@ -0,0 +1,580 @@ +#' @title Join NEON aquatic plant point count data into a single table with merged taxonomic identifications + +#' @author Madaline Ritter \email{ritterm1@battelleecology.org} \cr + +#' @description Join the 'apc_pointTransect', 'apc_perTaxon', 'apc_taxonomyProcessed' and 'apc_morphospecies' tables to generate a single table that contains point count data with taxonomic identifications for each sampleID. Data inputs are NEON Aquatic Plant, Bryophyte, Lichen, and Macroalgae Point Counts in Wadeable Streams (DP1.20072.001) in list format retrieved using the neonUtilities::loadByProduct() function (preferred), data tables downloaded from the NEON Data Portal, or input data tables with an equivalent structure and representing the same site x month combinations. +#' +#' @details Input data may be provided either as a list or as individual tables. However, if both list and table inputs are provided at the same time the function will error out. For table joining to be successful, inputs must contain data from the same site x month combination(s) for all tables. +#' +#' In the joined output table, the 'acceptedTaxonID' and associated taxonomic fields are populated from the first available identification in the following order: 'apc_taxonomyProcessed', 'apc_perTaxon', or 'apc_morphospecies'. For samples identified both in the field and by an expert taxonomist, the expert identification is retained in the output. A new field, 'taxonIDSourceTable', is included in the output and indicates the source table for each sample's identification. +#' +#' If a single sample in 'apc_taxonomyProcessed' contains multiple macroalgae species, each species will be represented as a separate row in 'apc_pointTransect' for every point associated with that sampleID. +#' +#' @param inputDataList A list object comprised of Aquatic Plant, Bryophyte, Lichen, and Macroalgae Point Count tables (DP1.20072.001) downloaded using the neonUtilities::loadByProduct() function. If list input is provided, the table input arguments must all be NA; similarly, if list input is missing, table inputs must be provided. [list] +#' +#' @param inputPoint The 'apc_pointTransect' table for the site x month combination(s) of interest (defaults to NA). If table input is provided, the 'inputDataList' argument must be missing. [data.frame] +#' +#' @param inputPerTax The 'apc_perTaxon' table for the site x month combination(s) of interest (defaults to NA). If table input is provided, the 'inputDataList' argument must be missing. [data.frame] +#' +#' @param inputTaxProc The 'apc_taxonomyProcessed' table for the site x month combination(s) of interest (defaults to NA). If table input is provided, the 'inputDataList' argument must be missing. [data.frame] +#' +#' @param inputMorph The 'apc_morphospecies' table for the site x month combination(s) of interest (defaults to NA). If table input is provided, the 'inputDataList' argument must be missing. [data.frame] +#' +#' @return A table containing point transect data with all associated taxonomic information for each point where targetTaxaPresent == 'Y'. +#' +#' @references +#' License: GNU AFFERO GENERAL PUBLIC LICENSE Version 3, 19 November 2007 +#' +#' @examples +#' \dontrun{ +#' # Obtain NEON Aquatic Plant Point Count data +#' apc <- neonUtilities::loadByProduct( +#' dpID = "DP1.20072.001", +#' site = "all", +#' startdate = "2018-03", +#' enddate = "2018-05", +#' tabl = "all", +#' package = 'expanded', +#' check.size = FALSE +#' ) +#' +#' # Join downloaded point count data +#' df <- neonPlants::joinAquPointCount( +#' inputDataList = apc, +#' inputPoint = NA, +#' inputPerTax = NA, +#' inputTaxProc = NA, +#' inputMorph = NA +#' ) +#' +#' } +#' +#' @export joinAquPointCount + + + +joinAquPointCount <- function(inputDataList, + inputPoint = NA, + inputPerTax = NA, + inputTaxProc = NA, + inputMorph = NA) { + ### Test that user has supplied arguments as required by function #### + + ### Verify user-supplied inputDataList object contains correct data if not NA + if (!missing(inputDataList)) { + # Check that input is a list + if (!inherits(inputDataList, "list")) { + stop( + glue::glue( + "Argument 'inputDataList' must be a list object from neonUtilities::loadByProduct(); + supplied input object is {class(inputDataList)}" + ) + ) + } + + # Check that required tables within list match expected names + listExpNames <- c("apc_pointTransect", "apc_perTaxon") + + # Determine dataType or stop with appropriate message + if (length(setdiff(listExpNames, names(inputDataList))) > 0) { + stop( + glue::glue( + "Required tables missing from 'inputDataList':", + '{paste(setdiff(listExpNames, names(inputDataList)), collapse = ", ")}', + .sep = " " + ) + ) + } + } else { + inputDataList <- NULL + + } # end missing conditional + + + ### Verify table inputs are NA if inputDataList is supplied + if (!is.null(inputDataList)) { + if (!isTRUE(is.na(inputPoint)) || !isTRUE(is.na(inputPerTax)) || + !isTRUE(is.na(inputTaxProc)) || !isTRUE(is.na(inputMorph))) { + stop("When 'inputDataList' is supplied, all table input arguments must be NA.") + } + } + + + ### Verify all table inputs are data frames if inputDataList is NA + if (is.null(inputDataList) & + ( + !is.data.frame(inputPoint) || !is.data.frame(inputPerTax) + )) { + stop("Data frames must be supplied for table inputs if 'inputDataList' is missing") + + } + + + ### Conditionally define input tables #### + if (inherits(inputDataList, "list")) { + apPoint <- inputDataList$apc_pointTransect + apPerTax <- inputDataList$apc_perTaxon + if (!is.null(inputDataList$apc_taxonomyProcessed)) { + apTaxProc <- inputDataList$apc_taxonomyProcessed + } else{ + apTaxProc <- NA + } + if (!is.null(inputDataList$apc_morphospecies)) { + apMorph <- inputDataList$apc_morphospecies + } else{ + apMorph <- NA + } + + } else { + apPoint <- inputPoint + apPerTax <- inputPerTax + apTaxProc <- inputTaxProc + apMorph <- inputMorph + + } + + + + ### Verify input tables contain required columns and data #### + + ### Verify 'apPoint' table contains required data + # Check for required columns + pointExpCols <- c( + "domainID", + "siteID", + "namedLocation", + "pointNumber", + "collectDate", + "eventID", + "remarks" + ) + + if (length(setdiff(pointExpCols, colnames(apPoint))) > 0) { + stop( + glue::glue( + "Required columns missing from 'inputPoint':", + '{paste(setdiff(pointExpCols, colnames(apPoint)), collapse = ", ")}', + .sep = " " + ) + ) + } + + # Check for data + if (nrow(apPoint) == 0) { + stop(glue::glue("Table 'inputPoint' has no data.")) + } + + + + ### Verify 'apPerTax' table contains required data + # Check for required columns + perTaxExpCols <- c( + "sampleID", + "taxonID", + "scientificName", + "morphospeciesID", + "sampleCondition", + "identificationHistoryID", + "dataQF", + "publicationDate", + "release", + "phylum", + "division", + "class", + "order", + "family", + "genus", + "section", + "specificEpithet", + "infraspecificEpithet", + "variety", + "form", + "scientificNameAuthorship", + "identificationQualifier", + "identificationReferences", + "taxonRank", + "remarks", + "identifiedBy", + "identifiedDate", + "uid" + ) + + if (length(setdiff(perTaxExpCols, colnames(apPerTax))) > 0) { + stop( + glue::glue( + "Required columns missing from 'inputPerTax':", + '{paste(setdiff(perTaxExpCols, colnames(apPerTax)), collapse = ", ")}', + .sep = " " + ) + ) + } + + # Check for data + if (nrow(apPerTax) == 0) { + stop(glue::glue("Table 'inputPerTax' has no data.")) + } + + + + ### Verify 'apTaxProc' table contains required data if data exists + taxProcExpCols <- c( + "sampleID", + "acceptedTaxonID", + "scientificName", + "sampleCondition", + "identificationHistoryID", + "dataQF", + "publicationDate", + "release", + "phylum", + "division", + "class", + "order", + "family", + "genus", + "section", + "specificEpithet", + "infraspecificEpithet", + "variety", + "form", + "scientificNameAuthorship", + "identificationQualifier", + "identificationReferences", + "taxonRank", + "remarks", + "identifiedBy", + "identifiedDate", + "morphospeciesID", + "uid", + "domainID", + "siteID", + "namedLocation", + "collectDate" + ) + + # Check for data + if (is.data.frame(apTaxProc)) { + if (nrow(apTaxProc) == 0) { + message( + glue::glue( + "Warning: Table 'inputTaxProc' has no data. Join will not include processed taxonomy data." + ) + ) + } else { + # Check for required columns if data exists + if (length(setdiff(taxProcExpCols, colnames(apTaxProc))) > 0) { + stop( + glue::glue( + "Required columns missing from 'inputTaxProc':", + '{paste(setdiff(taxProcExpCols, colnames(apTaxProc)), collapse = ", ")}', + .sep = " " + ) + ) + } + } + } + + + ### Verify 'apMorph' table contains required data if data exists + morphExpCols <- c( + "taxonID", + "scientificName", + "morphospeciesID", + "identificationQualifier", + "identificationReferences", + "identifiedBy", + "dataQF" + ) + + + # Check for data + if (is.data.frame(apMorph)) { + if (nrow(apMorph) == 0) { + message( + "Warning: Table 'inputMorph' has no data. Join will not include identifications from the morphospecies table." + ) + } else { + # Check for required columns if data exists + if (length(setdiff(morphExpCols, colnames(apMorph))) > 0) { + stop( + glue::glue( + "Required columns missing from 'inputMorph':", + '{paste(setdiff(morphExpCols, colnames(apMorph)), collapse = ", ")}', + .sep = " " + ) + ) + } + } + } + + + + + + ### Join apPerTax and apTaxProc tables #### + + if (is.data.frame(apTaxProc) && nrow(apTaxProc) > 0) { + # Select needed columns from apTaxProc + apTaxProc <- apTaxProc %>% + dplyr::select( + -"uid", + -"domainID", + -"siteID", + -"namedLocation", + -"collectDate", + -"morphospeciesID" + ) %>% + dplyr::rename(taxonID = "acceptedTaxonID") + + # Columns conditionally replaced with taxProc data + join1_cols <- c( + "scientificName", + "phylum", "division", "class", "order", + "family", "genus", "section", "specificEpithet", "infraspecificEpithet", + "variety", "form", "scientificNameAuthorship", "identificationQualifier", "identificationReferences", + "taxonRank", "identifiedBy", "identifiedDate" + ) + + # Update expert taxonomist identifications + apJoin1 <- apPerTax %>% + dplyr::left_join( + apTaxProc, + by = "sampleID", + suffix = c("_perTax", "_taxProc"), + relationship = "many-to-many" + ) %>% + dplyr::mutate( + sampleCondition = dplyr::case_when( + !is.na(.data$sampleCondition_perTax) & + !is.na(.data$sampleCondition_taxProc) ~ paste0( + "perTaxon ", + .data$sampleCondition_perTax, + " | taxProcessed ", + .data$sampleCondition_taxProc + ),!is.na(.data$sampleCondition_perTax) & + is.na(.data$sampleCondition_taxProc) ~ paste0("perTaxon ", .data$sampleCondition_perTax), + is.na(.data$sampleCondition_perTax) & + !is.na(.data$sampleCondition_taxProc) ~ paste0("taxProcessed ", .data$sampleCondition_taxProc), + TRUE ~ NA + ), + taxonIDSourceTable = dplyr::case_when( + !is.na(.data$taxonID_taxProc) ~ "apc_taxonomyProcessed", + is.na(.data$taxonID_taxProc) & + !is.na(.data$taxonID_perTax) ~ "apc_perTaxon", + TRUE ~ NA + ), + tempTaxonID = dplyr::if_else( + !is.na(.data$taxonID_taxProc), + .data$taxonID_taxProc, + .data$taxonID_perTax + ), + identificationHistoryID = dplyr::case_when( + !is.na(.data$identificationHistoryID_perTax) & + !is.na(.data$identificationHistoryID_taxProc) ~ paste0( + .data$identificationHistoryID_perTax, + " | ", + .data$identificationHistoryID_taxProc + ), + is.na(.data$identificationHistoryID_taxProc) & + !is.na(.data$identificationHistoryID_perTax) ~ .data$identificationHistoryID_perTax,!is.na(.data$identificationHistoryID_taxProc) & + is.na(.data$identificationHistoryID_perTax) ~ .data$identificationHistoryID_taxProc, + TRUE ~ NA + ), + perTaxonDataQF = .data$dataQF_perTax, + taxProcessedDataQF = .data$dataQF_taxProc, + perTaxonPublicationDate = .data$publicationDate_perTax, + taxProcessedPublicationDate = .data$publicationDate_taxProc, + taxProcessedRelease = .data$release_taxProc, + perTaxonRelease = .data$release_perTax, + remarks = dplyr::case_when( + !is.na(.data$remarks_perTax) & !is.na(.data$remarks_taxProc) ~ paste0( + "perTaxon remarks - ", .data$remarks_perTax, " | taxProcessed remarks - ",.data$remarks_taxProc + ), + is.na(.data$remarks_taxProc) & + !is.na(.data$remarks_perTax) ~ paste0("perTaxon remarks - ", .data$remarks_perTax),!is.na(.data$remarks_taxProc) & + is.na(.data$remarks_perTax) ~ paste0("taxProcessed remarks - ", .data$remarks_taxProc), + TRUE ~ NA + ) + ) + + for (col in join1_cols) { + taxProc_col <- paste0(col, "_taxProc") + perTax_col <- paste0(col, "_perTax") + apJoin1[[col]] <- dplyr::if_else( + !is.na(apJoin1$taxonID_taxProc), + if (taxProc_col %in% names(apJoin1)) as.character(apJoin1[[taxProc_col]]) else NA_character_, + if (perTax_col %in% names(apJoin1)) as.character(apJoin1[[perTax_col]]) else NA_character_ + ) + } + + apJoin1 <- apJoin1 %>% + dplyr::select(-"uid", -"targetTaxaPresent", + -dplyr::matches("_taxProc"),-dplyr::matches("_perTax")) + + + } else { + message("No data joined from apc_taxonomyProcessed table.") + # rename columns if no taxProc join + apJoin1 <- apPerTax %>% + dplyr::mutate( + tempTaxonID = .data$taxonID, + remarks = dplyr::if_else(is.na(.data$remarks), NA, paste0("perTaxon remarks - ", .data$remarks)), + perTaxonRelease = .data$release, + taxonIDSourceTable = dplyr::if_else(is.na(.data$taxonID), NA, "apc_perTaxon"), + perTaxonDataQF = .data$dataQF, + perTaxonPublicationDate = .data$publicationDate + ) %>% + dplyr::select(-"taxonID", + -"release", + -"dataQF", + -"publicationDate", + -"uid") + } + + + + + + ### Join apJoin1 and apMorph tables #### + + # Select needed columns from apMorph + if (is.data.frame(apMorph) && nrow(apMorph) > 0) { + apMorph <- apMorph %>% + dplyr::select( + "taxonID", + "scientificName", + "morphospeciesID", + "identificationQualifier", + "identificationReferences", + "identifiedBy", + "morphospeciesResolvedDate", + # "phylum", + # "division", + # "class", + # "order", + # "family", + # "genus", + # "section", + # "specificEpithet", + # "infraspecificEpithet", + # "variety", + # "form", + # "taxonRank" + "dataQF" + ) %>% + dplyr::rename(identifiedDate="morphospeciesResolvedDate") + + apJoin2 <- apJoin1 %>% + dplyr::mutate(morphospeciesID = dplyr::if_else( + !is.na(.data$morphospeciesID), + paste0(.data$morphospeciesID, ".", substr(.data$collectDate, 1, 4)), + .data$morphospeciesID + )) %>% + dplyr::left_join(apMorph, + by = "morphospeciesID", + suffix = c("_perTax", "_morph")) %>% + dplyr::mutate( + taxonIDSourceTable = dplyr::if_else( + !is.na(.data$taxonID) & .data$tempTaxonID %in% c("2PLANT", "UNKALG"), + "apc_morphospecies", .data$taxonIDSourceTable), + acceptedTaxonID = dplyr::if_else( + !is.na(.data$taxonID) & .data$tempTaxonID %in% c("2PLANT", "UNKALG"), + .data$taxonID, .data$tempTaxonID), + morphospeciesDataQF = .data$dataQF + ) + + # Columns conditionally replaced with morph data + join2_cols <- c( + "scientificName", "identificationQualifier", "identificationReferences", + "identifiedBy", "identifiedDate" + # , "phylum", "division", "class", "order", + # "family", "genus", "section", "specificEpithet", "infraspecificEpithet", + # "variety", "form", "taxonRank" + ) + + for (col in join2_cols) { + morph_col <- paste0(col, "_morph") + perTax_col <- paste0(col, "_perTax") + apJoin2[[col]] <- dplyr::if_else( + !is.na(apJoin2$taxonID) & apJoin2$tempTaxonID %in% c("2PLANT", "UNKALG"), + if (morph_col %in% names(apJoin2)) as.character(apJoin2[[morph_col]]) else NA_character_, + if (perTax_col %in% names(apJoin2)) as.character(apJoin2[[perTax_col]]) else NA_character_ + ) + } + + apJoin2 <- apJoin2 %>% + dplyr::select( + -"taxonID", -"tempTaxonID", -"dataQF", + -dplyr::matches("_morph"),-dplyr::matches("_perTax")) + + + } else { + message("No data joined from apc_morphospecies table.") + + apJoin2 <- apJoin1 %>% + dplyr::mutate(acceptedTaxonID = .data$tempTaxonID) %>% + dplyr::select(-"tempTaxonID") + } + + + + + + ### Join apPoint and apPerTax tables #### + + joinPointCounts <- apPoint %>% + dplyr::rename( + pointPublicationDate = "publicationDate", + pointRelease = "release", + pointDataQF = "dataQF" + ) %>% + dplyr::left_join( + apJoin2, + by = c( + "domainID", + "siteID", + "namedLocation", + "pointNumber", + "collectDate", + "eventID" + ), + suffix = c("_point", "_perTax") + ) %>% + dplyr::mutate( + remarks = dplyr::case_when( + !is.na(.data$remarks_perTax) & + !is.na(.data$remarks_point) ~ paste0( + "pointTransect remarks - ", + .data$remarks_point, + " | ", + .data$remarks_perTax + ),!is.na(.data$remarks_perTax) & + is.na(.data$remarks_point) ~ .data$remarks_perTax, + is.na(.data$remarks_perTax) & + !is.na(.data$remarks_point) ~ paste0("pointTransect remarks - ", .data$remarks_point), + TRUE ~ NA + )) %>% + dplyr::select(-"remarks_perTax", -"remarks_point") + + + ### Re-format date columns #### + joinPointCounts$identifiedDate <- as.Date(joinPointCounts$identifiedDate) + + joinPointCounts$collectDate <- as.POSIXct(joinPointCounts$collectDate, + format = "%Y-%m-%dT%H:%MZ", tz = "UTC") + + joinPointCounts$pointPublicationDate <- as.POSIXct(joinPointCounts$pointPublicationDate, + format = "%Y%m%dT%H%M%SZ", tz = "UTC") + + joinPointCounts$perTaxonPublicationDate <- as.POSIXct(joinPointCounts$perTaxonPublicationDate, + format = "%Y%m%dT%H%M%SZ", tz = "UTC") + + if (is.data.frame(apTaxProc) && nrow(apTaxProc) > 0) { + joinPointCounts$taxProcessedPublicationDate <- as.POSIXct(joinPointCounts$taxProcessedPublicationDate, + format = "%Y%m%dT%H%M%SZ", tz = "UTC") + } + + + return(joinPointCounts) + +} #function closer diff --git a/man/estimateAquPercentCover.Rd b/man/estimateAquPercentCover.Rd new file mode 100644 index 0000000..6b71831 --- /dev/null +++ b/man/estimateAquPercentCover.Rd @@ -0,0 +1,83 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/estimateAquPercentCover.R +\name{estimateAquPercentCover} +\alias{estimateAquPercentCover} +\title{Estimate NEON aquatic plant, bryophyte, lichen, and macroalgae percent cover in wadeable streams} +\usage{ +estimateAquPercentCover( + inputDataList, + inputPoint = NA, + inputPerTax = NA, + inputTaxProc = NA, + inputMorph = NA, + barPlots = FALSE +) +} +\arguments{ +\item{inputDataList}{A list object comprised of Aquatic Plant, Bryophyte, Lichen, and Macroalgae Point Count tables (DP1.20072.001) downloaded using the neonUtilities::loadByProduct() function. If list input is provided, the table input arguments must all be NA; similarly, if list input is missing, table inputs must be provided. \link{list}} + +\item{inputPoint}{The 'apc_pointTransect' table for the site x month combination(s) of interest (defaults to NA). If table input is provided, the 'inputDataList' argument must be missing. \link{data.frame}} + +\item{inputPerTax}{The 'apc_perTaxon' table for the site x month combination(s) of interest (defaults to NA). If table input is provided, the 'inputDataList' argument must be missing. \link{data.frame}} + +\item{inputTaxProc}{The 'apc_taxonomyProcessed' table for the site x month combination(s) of interest (defaults to NA). If table input is provided, the 'inputDataList' argument must be missing. \link{data.frame}} + +\item{inputMorph}{The 'apc_morphospecies' table for the site x month combination(s) of interest (defaults to NA). If table input is provided, the 'inputDataList' argument must be missing. \link{data.frame}} + +\item{barPlots}{If TRUE, will produce a list of plots, one for each site/date in the data provided.} +} +\value{ +Two tables are produced containing point count summary data. The first "percentCover" table contains estimated percent cover for each observed species and/or substrate class on aquatic plant transects. This table includes a 'type' column indicating whether the estimate corresponds to a taxon or substrate class, and a 'substrateOrTaxonID' column which provides the corresponding taxonID or substrate identifier. + +The second "transectMetrics" table contains summary information including the length, habitatType, and total number of points sampled at each transect. + +If barPlots = TRUE, a list containing plots for each site x date combination will also be produced. +} +\description{ +Data inputs are NEON Aquatic Plant, Bryophyte, Lichen, and Macroalgae Point Counts in Wadeable Streams (DP1.20072.001) in list format retrieved using the neonUtilities::loadByProduct() function (preferred), data tables downloaded from the NEON Data Portal, or input data tables with an equivalent structure and representing the same site x month combinations. The estimateAquPercentCover() function joins taxonomy information across point count tables and aggregates occurrence data to estimate percent cover at the transect level. +} +\details{ +Input data may be provided either as a list generated from the neonUtilities::laodByProduct() function or as individual tables. However, if both list and table inputs are provided at the same time the function will error. + +Percent cover is calculated using the equation from Bowden et al. 2006: + +\deqn{percentCover_i = (N_i / N_t) * 100} + +Where: +\itemize{ +\item \eqn{N_i} is the number of observed points in a transect that match class type ā€œiā€ (i.e., a particular taxonID or substrate) +\item \eqn{N_t} is the total number of points observed in the transect +} + +Note: This calculation can generate percent cover values >100\% if there is vertical stacking of plants, or values <100\% if 'targetTaxaPresent' is unknown. +} +\examples{ +\dontrun{ +# Obtain NEON Aquatic Plant Point Count data +apc <- neonUtilities::loadByProduct( +dpID = "DP1.20072.001", +site = "all", +startdate = "2018-07", +enddate = "2018-08", +tabl = "all", +check.size = FALSE +) + +# Calculate percent cover for downloaded data +list <- neonPlants::estimateAquPercentCover( +inputDataList = apc, +inputPoint = NA, +inputPerTax = NA, +inputTaxProc = NA, +inputMorph = NA, +barPlots = FALSE +) + +} +} +\references{ +License: GNU AFFERO GENERAL PUBLIC LICENSE Version 3, 19 November 2007 +} +\author{ +Madaline Ritter \email{ritterm1@battelleecology.org} \cr +} diff --git a/man/joinAquClipHarvest.Rd b/man/joinAquClipHarvest.Rd new file mode 100644 index 0000000..be52945 --- /dev/null +++ b/man/joinAquClipHarvest.Rd @@ -0,0 +1,70 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/joinAquClipHarvest.R +\name{joinAquClipHarvest} +\alias{joinAquClipHarvest} +\title{Join taxonomic identifications in NEON aquatic plant clip harvest data to field and biomass tables} +\usage{ +joinAquClipHarvest( + inputDataList, + inputBio = NA, + inputClip = NA, + inputTaxProc = NA, + inputMorph = NA +) +} +\arguments{ +\item{inputDataList}{A list object comprised of Aquatic Plant Bryophyte Macroalgae Clip Harvest tables (DP1.20066.001) downloaded using the neonUtilities::loadByProduct() function. If list input is provided, the table input arguments must all be NA; similarly, if list input is missing, table inputs must be provided. \link{list}} + +\item{inputBio}{The 'apl_biomass' table for the site x month combination(s) of interest (defaults to NA). If table input is provided, the 'inputDataList' argument must be missing. \link{data.frame}} + +\item{inputClip}{The 'apl_clipHarvest' table for the site x month combination(s) of interest (defaults to NA). If table input is provided, the 'inputDataList' argument must be missing. \link{data.frame}} + +\item{inputTaxProc}{The 'apl_taxonomyProcessed' table for the site x month combination(s) of interest (defaults to NA). If table input is provided, the 'inputDataList' argument must be missing. \link{data.frame}} + +\item{inputMorph}{The 'apc_morphospecies' table for the site x month combination(s) of interest (defaults to NA). If table input is provided, the 'inputDataList' argument must be missing. \link{data.frame}} +} +\value{ +Two tables are produced containing joined clip harvest data. The first "joinedBiomass" table contains one row per sampleID in 'apl_biomass'. A single sampleID in 'apl_biomass' may correspond to more than one taxa in 'apl_taxonomyProcessed'. When tables are joined, the taxon with the greatest 'algalParameterValue' in 'apl_taxonomyProcessed' will be listed as the 'acceptedTaxonID' and a new field, 'additionalTaxa', appears in the output and includes all other taxa associated with the sampleID. If more than one taxon shares the same max 'algalParameterValue', the first row in the input table is returned as the 'acceptedTaxonID'. Detailed taxonomic information for any additionalTaxa can be found in the input 'apl_taxonomyProcessed' table. + +The second "fieldTaxonomy" table joins taxonomic identifications across tables to 'apl_clipHarvest' by field ID. Joining may result in one or many unique rows per sampleID. +} +\description{ +Join the 'apl_clipHarvest', 'apl_biomass', 'apl_taxonomyProcessed' and 'apc_morphospecies' tables to generate two joined output tables that contain clip harvest data with merged taxonomic identifications. Data inputs are NEON Aquatic Plant Bryophyte Macroalgae Clip Harvest (DP1.20066.001) in list format retrieved using the neonUtilities::loadByProduct() function (preferred), data tables downloaded from the NEON Data Portal, or input data tables with an equivalent structure and representing the same site x month combinations. +} +\details{ +Input data may be provided either as a list or as individual tables. However, if both list and table inputs are provided at the same time the function will error out. For table joining to be successful, inputs must contain data from the same site x month combination(s) for all tables. + +Only data from bout 2 (midsummer sampling) is returned in the joined output tables, as other bouts do not include taxonomy data. If the input does not include any bout 2 data, the function will error out. + +In the joined output tables, the 'acceptedTaxonID' and associated taxonomic fields are populated from the first available identification in the following order: 'apl_taxonomyProcessed', 'apl_biomass', or 'apc_morphospecies'. For samples identified both in the field and by an expert taxonomist, the expert identification is retained in the output. A new field, 'taxonIDSourceTable', is included in the output and indicates the source table for each sample's identification. +} +\examples{ +\dontrun{ +# Obtain NEON Aquatic Plant Clip Harvest data +apl <- neonUtilities::loadByProduct( +dpID = "DP1.20066.001", +site = "all", +startdate = "2018-07", +enddate = "2018-08", +tabl = "all", +check.size = FALSE +) + +# Join downloaded clip harvest data +list <- neonPlants::joinAquClipHarvest( +inputDataList = apl, +inputBio = NA, +inputClip = NA, +inputTaxProc = NA, +inputMorph = NA +) + +} + +} +\references{ +License: GNU AFFERO GENERAL PUBLIC LICENSE Version 3, 19 November 2007 +} +\author{ +Madaline Ritter \email{ritterm1@battelleecology.org} \cr +} diff --git a/man/joinAquPointCount.Rd b/man/joinAquPointCount.Rd new file mode 100644 index 0000000..d821757 --- /dev/null +++ b/man/joinAquPointCount.Rd @@ -0,0 +1,69 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/joinAquPointCount.R +\name{joinAquPointCount} +\alias{joinAquPointCount} +\title{Join NEON aquatic plant point count data into a single table with merged taxonomic identifications} +\usage{ +joinAquPointCount( + inputDataList, + inputPoint = NA, + inputPerTax = NA, + inputTaxProc = NA, + inputMorph = NA +) +} +\arguments{ +\item{inputDataList}{A list object comprised of Aquatic Plant, Bryophyte, Lichen, and Macroalgae Point Count tables (DP1.20072.001) downloaded using the neonUtilities::loadByProduct() function. If list input is provided, the table input arguments must all be NA; similarly, if list input is missing, table inputs must be provided. \link{list}} + +\item{inputPoint}{The 'apc_pointTransect' table for the site x month combination(s) of interest (defaults to NA). If table input is provided, the 'inputDataList' argument must be missing. \link{data.frame}} + +\item{inputPerTax}{The 'apc_perTaxon' table for the site x month combination(s) of interest (defaults to NA). If table input is provided, the 'inputDataList' argument must be missing. \link{data.frame}} + +\item{inputTaxProc}{The 'apc_taxonomyProcessed' table for the site x month combination(s) of interest (defaults to NA). If table input is provided, the 'inputDataList' argument must be missing. \link{data.frame}} + +\item{inputMorph}{The 'apc_morphospecies' table for the site x month combination(s) of interest (defaults to NA). If table input is provided, the 'inputDataList' argument must be missing. \link{data.frame}} +} +\value{ +A table containing point transect data with all associated taxonomic information for each point where targetTaxaPresent == 'Y'. +} +\description{ +Join the 'apc_pointTransect', 'apc_perTaxon', 'apc_taxonomyProcessed' and 'apc_morphospecies' tables to generate a single table that contains point count data with taxonomic identifications for each sampleID. Data inputs are NEON Aquatic Plant, Bryophyte, Lichen, and Macroalgae Point Counts in Wadeable Streams (DP1.20072.001) in list format retrieved using the neonUtilities::loadByProduct() function (preferred), data tables downloaded from the NEON Data Portal, or input data tables with an equivalent structure and representing the same site x month combinations. +} +\details{ +Input data may be provided either as a list or as individual tables. However, if both list and table inputs are provided at the same time the function will error out. For table joining to be successful, inputs must contain data from the same site x month combination(s) for all tables. + +In the joined output table, the 'acceptedTaxonID' and associated taxonomic fields are populated from the first available identification in the following order: 'apc_taxonomyProcessed', 'apc_perTaxon', or 'apc_morphospecies'. For samples identified both in the field and by an expert taxonomist, the expert identification is retained in the output. A new field, 'taxonIDSourceTable', is included in the output and indicates the source table for each sample's identification. + +If a single sample in 'apc_taxonomyProcessed' contains multiple macroalgae species, each species will be represented as a separate row in 'apc_pointTransect' for every point associated with that sampleID. +} +\examples{ +\dontrun{ +# Obtain NEON Aquatic Plant Point Count data +apc <- neonUtilities::loadByProduct( +dpID = "DP1.20072.001", +site = "all", +startdate = "2018-03", +enddate = "2018-05", +tabl = "all", +package = 'expanded', +check.size = FALSE +) + +# Join downloaded point count data +df <- neonPlants::joinAquPointCount( +inputDataList = apc, +inputPoint = NA, +inputPerTax = NA, +inputTaxProc = NA, +inputMorph = NA +) + +} + +} +\references{ +License: GNU AFFERO GENERAL PUBLIC LICENSE Version 3, 19 November 2007 +} +\author{ +Madaline Ritter \email{ritterm1@battelleecology.org} \cr +} diff --git a/tests/testthat/test_estimateAquPercentCover.R b/tests/testthat/test_estimateAquPercentCover.R new file mode 100644 index 0000000..e7ca67e --- /dev/null +++ b/tests/testthat/test_estimateAquPercentCover.R @@ -0,0 +1,209 @@ +### Unit tests for estimateAquPercentCover function #### +### POC: Madaline Ritter, ritterm1@BattelleEcology.org + +# # retrieve test data +# ap <- neonUtilities::loadByProduct( +# dpID="DP1.20072.001", #APL-clip: DP1.20066.001, APC: DP1.20072.001 +# check.size=F, +# startdate = '2023-06', +# enddate = '2023-06', +# site = c("CUPE", "GUIL"), +# include.provisional = T, +# release = "LATEST", +# token = Sys.getenv('NEON_PAT')) +# +# ap$categoricalCodes_20072 <- NULL +# ap$issueLog_20072 <- NULL +# ap$readme_20072 <- NULL +# ap$validation_20072 <- NULL +# ap$variables_20072 <- NULL +# +# saveRDS(ap, "C:/Users/ritterm1/Documents/GitHub/a_neonPackages/neonPlants/tests/testthat/testdata/estimateAquPercentCover_testData_D04_202306.rds") +# testList <- readRDS("C:/Users/ritterm1/Documents/GitHub/a_neonPackages/neonPlants/tests/testthat/testdata/estimateAquPercentCover_testData_D04_202306.rds") + +### Read in test data #### +testList <- readRDS(testthat::test_path("testdata", "estimateAquPercentCover_testData_D04_202306.rds")) +testPoint <- testList$apc_pointTransect +testPerTax <- testList$apc_perTaxon +testTaxProc <- testList$apc_taxonomyProcessed + + + +## Test: Function generates expected output type #### +# Test list input +testthat::test_that(desc = "Output type list input", { + + testthat::expect_type(object = estimateAquPercentCover(inputDataList = testList), + type = "list") +}) + +# Test table input +testthat::test_that(desc = "Output type table input", { + + testthat::expect_type(object = estimateAquPercentCover(inputPoint = testPoint, + inputPerTax = testPerTax, + inputTaxProc = testTaxProc), + type = "list") +}) + + +### Test: Function generates expected output class #### +# Test list input +testthat::test_that(desc = "Output class list input", + { + desc = estimateAquPercentCover(inputDataList = testList) + + testthat::expect_s3_class(desc[[1]], class = "data.frame") + testthat::expect_s3_class(desc[[2]], class = "data.frame") +}) + +# Test table input +testthat::test_that(desc = "Output class table input", { + + desc = estimateAquPercentCover(inputPoint = testPoint, + inputPerTax = testPerTax, + inputTaxProc = testTaxProc) + + testthat::expect_s3_class(desc[[1]], class = "data.frame") + testthat::expect_s3_class(desc[[2]], class = "data.frame") +}) + + +### Test: Function generates data frame with expected dimensions using test data #### +## Test list input +# Check expected dimensions of output df 1 +testthat::test_that(desc = "Output percentCover df dimensions list input", { + + out = estimateAquPercentCover(inputDataList = testList) + + testthat::expect_identical(object = nrow(out[[1]]), + expected = as.integer(109)) + + testthat::expect_identical(object = ncol(out[[1]]), + expected = as.integer(6)) +}) + +# Check expected dimensions of output df 2 +testthat::test_that(desc = "Output transectMetrics df dimensions list input", { + + out = estimateAquPercentCover(inputDataList = testList) + + testthat::expect_identical(object = nrow(out[[2]]), + expected = as.integer(20)) + + testthat::expect_identical(object = ncol(out[[2]]), + expected = as.integer(9)) +}) + +## Test table inputs +# Check expected dimensions of output df 1 +testthat::test_that(desc = "Output percentCover df dimensions list input", { + + out = estimateAquPercentCover(inputPoint = testPoint, + inputPerTax = testPerTax, + inputTaxProc = testTaxProc) + + testthat::expect_identical(object = nrow(out[[1]]), + expected = as.integer(109)) + + testthat::expect_identical(object = ncol(out[[1]]), + expected = as.integer(6)) +}) + +# Check expected dimensions of output df 2 +testthat::test_that(desc = "Output transectMetrics df dimensions list input", { + + out = estimateAquPercentCover(inputPoint = testPoint, + inputPerTax = testPerTax, + inputTaxProc = testTaxProc) + + testthat::expect_identical(object = nrow(out[[2]]), + expected = as.integer(20)) + + testthat::expect_identical(object = ncol(out[[2]]), + expected = as.integer(9)) +}) + + +### Test: Generates expected data using test data #### +## Test percentCover output +# Check sum of all percent_cover estimates +testthat::test_that(desc = "Output data frame percent cover sum", { + + out = estimateAquPercentCover(inputDataList = testList) + + testthat::expect_identical( + object = sum(out$percentCover$percent_cover), + expected = 2050) +}) + +# Check taxa percent_cover estimates +testthat::test_that(desc = "Output data frame percent cover taxa sum", { + + out = estimateAquPercentCover(inputDataList = testList) + + testthat::expect_identical( + object = sum(out$percentCover$percent_cover[out$percentCover$type == 'macroalgae']), + expected = 200) + + testthat::expect_identical( + object = sum(out$percentCover$percent_cover[out$percentCover$type == 'plant']), + expected = 60) + + testthat::expect_identical( + object = sum(out$percentCover$percent_cover[out$percentCover$type == 'unknown']), + expected = 80) +}) + + +## Test transectMetrics output +# Check sum of all transectLengths +testthat::test_that(desc = "Output data frame transect length sum", { + + out = estimateAquPercentCover(inputDataList = testList) + + testthat::expect_identical( + object = round(sum(out$transectMetrics$transectLength_m), 2), + expected = 105.57) +}) + +# Check unique habitat types +testthat::test_that(desc = "Output data frame unique habitat types", { + + out = estimateAquPercentCover(inputDataList = testList) + + testthat::expect_identical( + object = unique(out$transectMetrics$habitatType), + expected = c("riffle", "run", "pool")) +}) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/testthat/test_joinAquClipHarvest.R b/tests/testthat/test_joinAquClipHarvest.R new file mode 100644 index 0000000..f5497cb --- /dev/null +++ b/tests/testthat/test_joinAquClipHarvest.R @@ -0,0 +1,367 @@ +### Unit tests for joinAquClipHarvest function #### +### POC: Madaline Ritter, ritterm1@BattelleEcology.org + +# testList <- readRDS("C:/Users/ritterm1/Documents/GitHub/a_neonPackages/neonPlants/tests/testthat/testdata/joinAquClipHarvest_testData_202307.rds") + +### Read in test data +testList <- readRDS(testthat::test_path("testdata", "joinAquClipHarvest_testData_202307.rds")) +testBio <- testList$apl_biomass +testClip <- testList$apl_clipHarvest +testTaxProc <- testList$apl_taxonomyProcessed +testMorph <- testList$apc_morphospecies + + + +### Test: Function generates expected output type #### +# Test list input +testthat::test_that(desc = "Output type list input", { + + testthat::expect_type(object = joinAquClipHarvest(inputDataList = testList), + type = "list") + +}) + +# Test table input +testthat::test_that(desc = "Output type table input", { + + testthat::expect_type(object = joinAquClipHarvest(inputBio = testBio, + inputClip = testClip, + inputTaxProc = testTaxProc, + inputMorph = testMorph), + type = "list") +}) + + + +### Test: Function generates expected output class #### +# Test list input +testthat::test_that(desc = "Output class list input", { + + desc = joinAquClipHarvest(inputDataList = testList) + + testthat::expect_s3_class(desc[[1]], class = "data.frame") + testthat::expect_s3_class(desc[[2]], class = "data.frame") +}) + +# Test table input +testthat::test_that(desc = "Output class table input", { + + desc = joinAquClipHarvest(inputBio = testBio, + inputClip = testClip, + inputTaxProc = testTaxProc, + inputMorph = testMorph) + + testthat::expect_s3_class(desc[[1]], class = "data.frame") + testthat::expect_s3_class(desc[[2]], class = "data.frame") +}) + + + +### Test: Function generates data frames with expected dimensions using test data #### +## Test list input +# Check expected row number of output +testthat::test_that(desc = "Output data frame row number list input", { + + out <- joinAquClipHarvest(inputDataList = testList) + + testthat::expect_identical(object = nrow(out$joinedBiomass), + expected = as.integer(7)) + testthat::expect_identical(object = nrow(out$fieldTaxonomy), + expected = as.integer(13)) +}) + + +# Check expected column number of output +testthat::test_that(desc = "Output data frame column number list input", { + + out <- joinAquClipHarvest(inputDataList = testList) + + testthat::expect_identical(object = ncol(out$joinedBiomass), + expected = as.integer(104)) + + testthat::expect_identical(object = ncol(out$fieldTaxonomy), + expected = as.integer(76)) +}) + + +## Test table inputs +# Check expected row number of output +testthat::test_that(desc = "Output data frame row number table input", { + + out <- joinAquClipHarvest(inputBio = testBio, + inputClip = testClip, + inputTaxProc = testTaxProc, + inputMorph = testMorph) + + testthat::expect_identical(object = nrow(out$joinedBiomass), + expected = as.integer(7)) + testthat::expect_identical(object = nrow(out$fieldTaxonomy), + expected = as.integer(13)) +}) + +# Check expected column number of output +testthat::test_that(desc = "Output data frame row number table input", { + + out <- joinAquClipHarvest(inputBio = testBio, + inputClip = testClip, + inputTaxProc = testTaxProc, + inputMorph = testMorph) + + testthat::expect_identical(object = ncol(out$joinedBiomass), + expected = as.integer(104)) + + testthat::expect_identical(object = ncol(out$fieldTaxonomy), + expected = as.integer(76)) +}) + + + +### Test: Function joins biomass data correctly using test data #### +## Test dataframe output +# Check 'acceptedTaxonID' is pulled from apc_taxonomyProcessed if taxProc data exists +testthat::test_that(desc = "Output data frame source: taxonomyProcessed", { + + out <- joinAquClipHarvest(inputDataList = testList) + + testthat::expect_identical(object = unique(out$joinedBiomass$taxonIDSourceTable[which(out$joinedBiomass$sampleID == 'BLUE.20230717.MACROALGAE1.Q8')]), + expected = "apl_taxonomyProcessed") + testthat::expect_identical(object = unique(out$joinedBiomass$acceptedTaxonID[which(out$joinedBiomass$sampleID == 'BLUE.20230717.MACROALGAE1.Q8')]), + expected = "NEONDREX309000") +}) + + +# Check 'acceptedTaxonID' is pulled from apc_morphospecies if identification is in morphospecies table +testthat::test_that(desc = "Output data frame source: apc_morphospecies", { + + out <- joinAquClipHarvest(inputDataList = testList) + + testthat::expect_identical(object = unique(out$joinedBiomass$taxonIDSourceTable[which(out$joinedBiomass$sampleID == 'BLUE.20230717.AP1.Q2')]), + expected = "apc_morphospecies") + testthat::expect_identical(object = unique(out$joinedBiomass$acceptedTaxonID[which(out$joinedBiomass$sampleID == 'BLUE.20230717.AP1.Q2')]), + expected = "LURE2") + + testthat::expect_identical(object = unique(out$joinedBiomass$taxonIDSourceTable[which(out$joinedBiomass$sampleID == 'FLNT.20230724.AP2.P3')]), + expected = "apc_morphospecies") + testthat::expect_identical(object = unique(out$joinedBiomass$acceptedTaxonID[which(out$joinedBiomass$sampleID == 'FLNT.20230724.AP2.P3')]), + expected = "SEAP") +}) + + +# Check 'acceptedTaxonID' is pulled from apl_biomass if identification is not in morphospecies or taxProcessed tables +testthat::test_that(desc = "Output data frame source: biomass", { + + out <- joinAquClipHarvest(inputDataList = testList) + testthat::expect_identical(object = unique(out$joinedBiomass$taxonIDSourceTable[which(out$joinedBiomass$sampleID == 'FLNT.20230724.MACROALGAE1.P1')]), + expected = "apl_biomass") + testthat::expect_identical(object = unique(out$joinedBiomass$acceptedTaxonID[which(out$joinedBiomass$sampleID == 'FLNT.20230724.MACROALGAE1.P1')]), + expected = "UNKALG") + + testthat::expect_identical(object = unique(out$joinedBiomass$taxonIDSourceTable[which(out$joinedBiomass$sampleID == 'TOOK.20230726.AP3.P6')]), + expected = "apl_taxonomyProcessed") + testthat::expect_identical(object = unique(out$joinedBiomass$acceptedTaxonID[which(out$joinedBiomass$sampleID == 'TOOK.20230726.AP3.P6')]), + expected = "NEONDREX1220001") + + testthat::expect_identical(object = unique(out$joinedBiomass$taxonIDSourceTable[which(out$joinedBiomass$sampleID == 'BLUE.20230717.AP3.Q2')]), + expected = "apl_biomass") + testthat::expect_identical(object = unique(out$joinedBiomass$acceptedTaxonID[which(out$joinedBiomass$sampleID == 'BLUE.20230717.AP3.Q2')]), + expected = "RIFL4") + + testthat::expect_identical(object = unique(out$joinedBiomass$taxonIDSourceTable[which(out$joinedBiomass$sampleID == 'BLUE.20230717.AP2.Q2')]), + expected = "apl_biomass") + testthat::expect_identical(object = unique(out$joinedBiomass$acceptedTaxonID[which(out$joinedBiomass$sampleID == 'BLUE.20230717.AP2.Q2')]), + expected = "LERI6") +}) + + + +### Test: Generate joinedBiomass dataframe with correct taxonomic IDs #### +## Test dataframe output +# Check tax info is correct when sampleID has >1 taxonID in apl_taxonomyProcessed and max algalParameterValue is unique +testthat::test_that(desc = "Output taxonomy correct: multiple taxa per sampleID, single max algalParamValue", { + + out <- joinAquClipHarvest(inputDataList = testList) + + testthat::expect_identical(object = unique(out$joinedBiomass$acceptedTaxonID[which(out$joinedBiomass$sampleID == 'TOOK.20230726.AP3.P6')]), + expected = "NEONDREX1220001") + testthat::expect_identical(object = unique(out$joinedBiomass$acceptedTaxonID[which(out$joinedBiomass$sampleID == 'BLUE.20230717.MACROALGAE1.Q8')]), + expected = "NEONDREX309000") + + testthat::expect_identical(object = unique(out$joinedBiomass$additionalTaxa[which(out$joinedBiomass$sampleID == 'TOOK.20230726.AP3.P6')]), + expected = "NEONDREX885004|AUDSP|NEONDREX920001|NITELLASP") + testthat::expect_identical(object = unique(out$joinedBiomass$additionalTaxa[which(out$joinedBiomass$sampleID == 'BLUE.20230717.MACROALGAE1.Q8')]), + expected = NA_character_) + +}) + +# Check tax info is correct when sampleID has >1 taxonID in apl_taxonomyProcessed and max algalParameterValue is unique +testthat::test_that(desc = "Output additional taxa correct: multiple taxa per sampleID, many max algalParamValue", { + + # modify test data + testList2 <- testList + testList2$apl_taxonomyProcessed <- testList2$apl_taxonomyProcessed %>% dplyr::filter(algalParameterValue != 5) + out <- joinAquClipHarvest(inputDataList = testList2) + + testthat::expect_identical(object = unique(out$joinedBiomass$acceptedTaxonID[which(out$joinedBiomass$sampleID == 'TOOK.20230726.AP3.P6')]), + expected = "NEONDREX885004") + + testthat::expect_identical(object = unique(out$joinedBiomass$additionalTaxa[which(out$joinedBiomass$sampleID == 'TOOK.20230726.AP3.P6')]), + expected = "AUDSP|NEONDREX920001|NITELLASP") +}) + + +# Check 'acceptedTaxonID' is empty when only 1 taxonID exists per sampleID in apl_taxonomyProcessed +testthat::test_that(desc = "Output additional taxa correct: single taxon per sampleID", { + + # modify test data + testList3 <- testList + testList3$apl_taxonomyProcessed <- testList3$apl_taxonomyProcessed %>% dplyr::filter(siteID == 'BLUE') + out <- joinAquClipHarvest(inputDataList = testList3) + + testthat::expect_identical(object = unique(out$joinedBiomass$additionalTaxa), + expected = NA_character_) + +}) + + + +### Test: Generate fieldTaxonomy dataframe with correct taxonomic IDs #### +## Test dataframe output +# Check each fieldID has correct number of associated taxa +testthat::test_that(desc = "Output fieldTaxonomy: multiple rows per fieldID", { + + out <- joinAquClipHarvest(inputDataList = testList) + + testthat::expect_identical(object = as.numeric(sum(out$fieldTaxonomy$fieldID == 'BLUE.20230717.QUADRAT.Q2', na.rm = TRUE)), + expected = 3) + testthat::expect_identical(object = as.numeric(sum(out$fieldTaxonomy$fieldID == 'TOOK.20230726.RAKE.P6', na.rm = TRUE)), + expected = 5) + +}) + +# Check each fieldID has correct joined taxonIDs +testthat::test_that(desc = "Output fieldTaxonomy: correct taxa per fieldID", { + + out <- joinAquClipHarvest(inputDataList = testList) + + testthat::expect_identical(object = unique(out$fieldTaxonomy$acceptedTaxonID[which(out$fieldTaxonomy$fieldID == 'BLUE.20230717.QUADRAT.Q2')]), + expected = c("RIFL4", "LERI6", "LURE2")) + testthat::expect_identical(object = unique(out$fieldTaxonomy$acceptedTaxonID[which(out$fieldTaxonomy$fieldID == 'TOOK.20230726.RAKE.P6')]), + expected = c("NEONDREX1220001", "NEONDREX885004", "AUDSP", "NEONDREX920001", "NITELLASP")) + +}) + + + +### Tests: Generate expected errors for 'inputDataList' #### +# Test 'inputDataList' is a list +testthat::test_that(desc = "Argument 'inputDataList' is list object", { + + testthat::expect_error(object = joinAquClipHarvest(inputDataList = testBio), + regexp = "Argument 'inputDataList' must be a list object") +}) + +# Test 'inputDataList' contains required tables +testthat::test_that(desc = "Required tables present in 'inputDataList' input", { + + testthat::expect_error(object = joinAquClipHarvest(inputDataList = testList[1:2]), + regexp = "Required tables missing from 'inputDataList'") +}) + +# Test table inputs are NA if 'inputDataList' supplied +testthat::test_that(desc = "Table inputs NA when required", { + + testthat::expect_error(object = joinAquClipHarvest(inputDataList = testList, + inputBio = testBio), + regexp = "When 'inputDataList' is supplied, all table input arguments must be NA") +}) + + + +### Tests: Generate expected errors with table inputs #### +testthat::test_that(desc = "Table inputs are data frames when required", { + + testthat::expect_error(object = joinAquClipHarvest(inputMorph = testMorph, + inputBio = testBio), + regexp = "Data frames must be supplied for table inputs if 'inputDataList' is missing") +}) + + + +### Test: Generate expected errors for issues with biomass table (works for inputDataList or inputBio source) #### +# Test when inputBio lacks required column +testthat::test_that(desc = "Table 'inputBio' missing column", { + + testthat::expect_error(object = joinAquClipHarvest(inputBio = testBio %>% + dplyr::select(-taxonID), + inputClip = testClip), + regexp = "Required columns missing from 'inputBio': taxonID") +}) + +# Test when inputBio has no data +testthat::test_that(desc = "Table 'inputBio' missing data", { + + testthat::expect_error(object = joinAquClipHarvest(inputBio = testBio %>% + dplyr::filter(taxonID == "coconut"), + inputClip = testClip), + regexp = "Table 'inputBio' has no data.") +}) + + +### Test: Generate expected errors for issues with clipHarvest table (works for inputDataList or inputClip source) #### +# Test when inputClip lacks required column +testthat::test_that(desc = "Table 'inputClip' missing column", { + + testthat::expect_error(object = joinAquClipHarvest(inputClip = testClip %>% + dplyr::select(-eventID), + inputBio = testBio), + regexp = "Required columns missing from 'inputClip': eventID") +}) + +# Test when inputClip has no data +testthat::test_that(desc = "Table 'inputClip' missing data", { + + testthat::expect_error(object = joinAquClipHarvest(inputClip = testClip %>% + dplyr::filter(eventID == "moon landing"), + inputBio = testBio), + regexp = "Table 'inputClip' has no data.") +}) + + + +### Test: Generate expected errors for issues with taxonomyProcessed table (works for inputDataList or inputTaxProc source) #### +# Test when inputTaxProc lacks required column +testthat::test_that(desc = "Table 'inputTaxProc' missing column", { + + testthat::expect_error(object = joinAquClipHarvest(inputTaxProc = testTaxProc %>% + dplyr::select(-taxonID), + inputBio = testBio, + inputClip = testClip), + regexp = "Required columns missing from 'inputTaxProc': taxonID") +}) + + + +### Test: Generate expected errors for issues with morphospecies table (works for inputDataList or inputMorph source) #### +# Test when inputMorph lacks required column +testthat::test_that(desc = "Table 'inputMorph' missing column", { + + testthat::expect_error(object = joinAquClipHarvest(inputMorph = testMorph %>% + dplyr::select(-taxonID), + inputBio = testBio, + inputClip = testClip), + regexp = "Required columns missing from 'inputMorph': taxonID") +}) + + +### Test: Generate expected message when apl_taxProcessed isn't provided (works for inputDataList or inputMorph source) #### +# Test when inputMorph lacks required column +testthat::test_that(desc = "Message: expert tax data not provided", { + + testthat::expect_message(object = joinAquClipHarvest(inputMorph = testMorph, + inputBio = testBio, + inputClip = testClip), + regexp = "Output tables do not include identifications from the expert taxonomists.") +}) + + + diff --git a/tests/testthat/test_joinAquPointCount.R b/tests/testthat/test_joinAquPointCount.R new file mode 100644 index 0000000..f02485b --- /dev/null +++ b/tests/testthat/test_joinAquPointCount.R @@ -0,0 +1,246 @@ +### Unit tests for joinAquPointCount function #### +### POC: Madaline Ritter, ritterm1@BattelleEcology.org + + +### Read in test data #### +testList <- readRDS(testthat::test_path("testdata", "joinAquPointCount_testData_202307.rds")) + +testPoint <- testList$apc_pointTransect +testPerTax <- testList$apc_perTaxon +testTaxProc <- testList$apc_taxonomyProcessed +testMorph <- testList$apc_morphospecies + + + +### Test: Function generates expected output type #### +# Test list input +testthat::test_that(desc = "Output type list input", { + + testthat::expect_type(object = joinAquPointCount(inputDataList = testList), + type = "list") + +}) + +# Test table input +testthat::test_that(desc = "Output type table input", { + + testthat::expect_type(object = joinAquPointCount(inputPoint = testPoint, + inputPerTax = testPerTax, + inputTaxProc = testTaxProc, + inputMorph = testMorph), + type = "list") +}) + + + +### Test: Function generates expected output class #### +# Test list input +testthat::test_that(desc = "Output class list input", { + + testthat::expect_s3_class(object = joinAquPointCount(inputDataList = testList), + class = "data.frame") +}) + +# Test table input +testthat::test_that(desc = "Output class table input", { + + testthat::expect_s3_class(object = joinAquPointCount(inputPoint = testPoint, + inputPerTax = testPerTax, + inputTaxProc = testTaxProc, + inputMorph = testMorph), + class = "data.frame") +}) + + + +### Test: Function generates data frame with expected dimensions using test data #### +## Test list input +# Check expected row number of output +testthat::test_that(desc = "Output data frame row number list input", { + + testthat::expect_identical(object = nrow(joinAquPointCount(inputDataList = testList)), + expected = as.integer(145)) +}) + + +# Check expected column number of output +testthat::test_that(desc = "Output data frame column number list input", { + + testthat::expect_identical(object = ncol(joinAquPointCount(inputDataList = testList)), + expected = as.integer(73)) +}) + + +## Test table inputs +# Check expected row number of output +testthat::test_that(desc = "Output data frame row number table input", { + + testthat::expect_identical(object = nrow(joinAquPointCount(inputPoint = testPoint, + inputPerTax = testPerTax, + inputTaxProc = testTaxProc, + inputMorph = testMorph)), + expected = as.integer(145)) +}) + +# Check expected column number of output +testthat::test_that(desc = "Output data frame row number table input", { + + testthat::expect_identical(object = ncol(joinAquPointCount(inputPoint = testPoint, + inputPerTax = testPerTax, + inputTaxProc = testTaxProc, + inputMorph = testMorph)), + expected = as.integer(73)) +}) + + + +### Test: Generates expected data using test data #### +## Test dataframe output +# Check 'acceptedTaxonID' is pulled from apc_perTaxon if identification is not in morphospecies or taxProcessed tables +testthat::test_that(desc = "Output data frame source: taxonomyProcessed", { + + outDF <- joinAquPointCount(inputDataList = testList) + testthat::expect_identical(object = unique(outDF$taxonIDSourceTable[which(outDF$sampleID == 'HOPB.20230727.AP2.1.T1')]), + expected = "apc_perTaxon") + testthat::expect_identical(object = unique(outDF$acceptedTaxonID[which(outDF$sampleID == 'HOPB.20230727.AP2.1.T1')]), + expected = "PLLE3") + + testthat::expect_identical(object = unique(outDF$taxonIDSourceTable[which(outDF$sampleID == 'POSE.20230718.AP16.1.T7')]), + expected = "apc_perTaxon") + testthat::expect_identical(object = unique(outDF$acceptedTaxonID[which(outDF$sampleID == 'POSE.20230718.AP16.1.T7')]), + expected = "2PLANT") +}) + +# Check 'acceptedTaxonID' is pulled from apc_taxonomyProcessed when expertTaxonomyRequired = 'Y' +testthat::test_that(desc = "Output data frame source: taxonomyProcessed", { + + outDF <- joinAquPointCount(inputDataList = testList) + testthat::expect_identical(object = unique(outDF$taxonIDSourceTable[which(outDF$sampleID == 'KING.20230719.MACROALGAE17.T5')]), + expected = "apc_taxonomyProcessed") + testthat::expect_identical(object = unique(outDF$acceptedTaxonID[which(outDF$sampleID == 'KING.20230719.MACROALGAE17.T5')]), + expected = c("NEONDREX1220001", "NEONDREX444000")) + + testthat::expect_identical(object = unique(outDF$taxonIDSourceTable[which(outDF$sampleID == 'WLOU.20230703.AP19.1.T7')]), + expected = "apc_taxonomyProcessed") + testthat::expect_identical(object = unique(outDF$acceptedTaxonID[which(outDF$sampleID == 'WLOU.20230703.AP19.1.T7')]), + expected = "HYOC5") + + testthat::expect_identical(object = unique(outDF$taxonIDSourceTable[which(outDF$sampleID == 'REDB.20230719.AP19.1.T4')]), + expected = "apc_taxonomyProcessed") + testthat::expect_identical(object = unique(outDF$acceptedTaxonID[which(outDF$sampleID == 'REDB.20230719.AP19.1.T4')]), + expected = "EQAR") +}) + + +# Check 'acceptedTaxonID' is pulled from apc_morphospecies if identification is in morphospecies table and not apc_taxonomyProcessed +testthat::test_that(desc = "Output data frame source: apc_morphospecies", { + + outDF <- joinAquPointCount(inputDataList = testList) + testthat::expect_identical(object = unique(outDF$taxonIDSourceTable[which(outDF$sampleID == 'KING.20230719.AP11.1.T1')]), + expected = "apc_morphospecies") + + testthat::expect_identical(object = unique(outDF$acceptedTaxonID[which(outDF$sampleID == 'KING.20230719.AP11.1.T1')]), + expected = "NAOF") +}) + + + +### Tests: Generate expected errors for 'inputDataList' #### +# Test 'inputDataList' is a list +testthat::test_that(desc = "Argument 'inputDataList' is list object", { + + testthat::expect_error(object = joinAquPointCount(inputDataList = testPoint), + regexp = "Argument 'inputDataList' must be a list object") +}) + +# Test 'inputDataList' contains required tables +testthat::test_that(desc = "Required tables present in 'inputDataList' input", { + + testthat::expect_error(object = joinAquPointCount(inputDataList = testList[1:2]), + regexp = "Required tables missing from 'inputDataList'") +}) + +# Test table inputs are NA if 'inputDataList' supplied +testthat::test_that(desc = "Table inputs NA when required", { + + testthat::expect_error(object = joinAquPointCount(inputDataList = testList, + inputPoint = testPoint), + regexp = "When 'inputDataList' is supplied, all table input arguments must be NA.") +}) + + + +### Tests: Generate expected errors with table inputs #### +testthat::test_that(desc = "Table inputs are data frames when required", { + + testthat::expect_error(object = joinAquPointCount(inputPoint = testPoint, + inputTaxProc = testTaxProc), + regexp = "Data frames must be supplied for table inputs if 'inputDataList' is missing") +}) + + + +### Test: Generate expected errors for issues with pointCount table (works for inputDataList or inputPoint source) #### +# Test when inputPoint lacks required column +testthat::test_that(desc = "Table 'inputPoint' missing column", { + + testthat::expect_error(object = joinAquPointCount(inputPoint = testPoint %>% + dplyr::select(-pointNumber), + inputPerTax = testPerTax), + regexp = "Required columns missing from 'inputPoint': pointNumber") +}) + +# Test when inputPoint has no data +testthat::test_that(desc = "Table 'inputPoint' missing data", { + + testthat::expect_error(object = joinAquPointCount(inputPoint = testPoint %>% + dplyr::filter(pointNumber == "999"), + inputPerTax = testPerTax), + regexp = "Table 'inputPoint' has no data.") +}) + + +### Test: Generate expected errors for issues with perTaxon table (works for inputDataList or inputPerTax source) #### +# Test when inputPerTax lacks required column +testthat::test_that(desc = "Table 'inputPerTax' missing column", { + + testthat::expect_error(object = joinAquPointCount(inputPerTax = testPerTax %>% + dplyr::select(-taxonID), + inputPoint = testPoint), + regexp = "Required columns missing from 'inputPerTax': taxonID") +}) + +# Test when inputPerTax has no data +testthat::test_that(desc = "Table 'inputPerTax' missing data", { + + testthat::expect_error(object = joinAquPointCount(inputPerTax = testPerTax %>% + dplyr::filter(taxonID == "coconut"), + inputPoint = testPoint), + regexp = "Table 'inputPerTax' has no data.") +}) + + + +### Test: Generate expected errors for issues with taxonomyProcessed table (works for inputDataList or inputTaxProc source) #### +# Test when inputTaxProc lacks required column +testthat::test_that(desc = "Table 'inputTaxProc' missing column", { + + testthat::expect_error(object = joinAquPointCount(inputTaxProc = testTaxProc %>% + dplyr::select(-acceptedTaxonID), + inputPoint = testPoint, + inputPerTax = testPerTax), + regexp = "Required columns missing from 'inputTaxProc': acceptedTaxonID") +}) + + + +### Test: Generate expected errors for issues with morphospecies table (works for inputDataList or inputMorph source) #### +# Test when inputMorph lacks required column +testthat::test_that(desc = "Table 'inputMorph' missing column", { + + testthat::expect_error(object = joinAquPointCount(inputMorph = testMorph %>% + dplyr::select(-taxonID), + inputPoint = testPoint, + inputPerTax = testPerTax), + regexp = "Required columns missing from 'inputMorph': taxonID") +}) diff --git a/tests/testthat/testdata/estimateAquPercentCover_testData_D04_202306.rds b/tests/testthat/testdata/estimateAquPercentCover_testData_D04_202306.rds new file mode 100644 index 0000000..2197da1 Binary files /dev/null and b/tests/testthat/testdata/estimateAquPercentCover_testData_D04_202306.rds differ diff --git a/tests/testthat/testdata/joinAquClipHarvest_testData_202307.rds b/tests/testthat/testdata/joinAquClipHarvest_testData_202307.rds new file mode 100644 index 0000000..fdead80 Binary files /dev/null and b/tests/testthat/testdata/joinAquClipHarvest_testData_202307.rds differ diff --git a/tests/testthat/testdata/joinAquPointCount_testData_202307.rds b/tests/testthat/testdata/joinAquPointCount_testData_202307.rds new file mode 100644 index 0000000..dcae32d Binary files /dev/null and b/tests/testthat/testdata/joinAquPointCount_testData_202307.rds differ