diff --git a/R/DepthProfile.R b/R/DepthProfile.R index 91fa14c9b..c8d4bf8ce 100644 --- a/R/DepthProfile.R +++ b/R/DepthProfile.R @@ -1,3 +1,142 @@ +#' Depth-parameter characteristic names +#' +#' Returns the set of characteristic names that represent depth parameters +#' (e.g., Secchi, thalweg), which are handled specially in depth consolidation +#' and plotting. +#' +#' @return Character vector of characteristic names treated as depth parameters. +#' +#' @noRd +.depth_param_names <- function() { + c( + "DEPTH, SECCHI DISK DEPTH", + "DEPTH, SECCHI DISK DEPTH (CHOICE LIST)", + "DEPTH, SECCHI DISK DEPTH REAPPEARS", + "TRANSPARENCY, SECCHI TUBE WITH DISK", + "DEPTH, DATA-LOGGER (NON-PORTED)", + "DEPTH, DATA-LOGGER (PORTED)", + "RBP STREAM DEPTH - RIFFLE", + "RBP STREAM DEPTH - RUN", + "THALWEG DEPTH", + "SAMPLING DEPTH IN" + ) +} + +#' Normalize "null" or NULL numeric inputs +#' +#' Converts character "null" (case-insensitive) or NULL to NA_real_ for +#' numeric options such as surfacevalue/bottomvalue. Leaves other values +#' unchanged. +#' +#' @param x A value expected to be numeric, the character "null", or NULL. +#' +#' @return A numeric value or NA_real_. +#' +#' @examples +#' # .normalize_null_numeric("null") -> NA_real_ +#' # .normalize_null_numeric(NULL) -> NA_real_ +#' # .normalize_null_numeric(2) -> 2 +#' +#' @noRd +.normalize_null_numeric <- function(x) { + if (is.character(x) && tolower(x) == "null") { + return(NA_real_) + } + if (is.null(x)) { + return(NA_real_) + } + x +} + +#' Ensure depth-category columns exist +#' +#' Ensures the columns produced by TADA_FlagDepthCategory are present. If +#' missing, runs TADA_FlagDepthCategory with the supplied thresholds. +#' When allow_na_thresholds is TRUE and one or both thresholds are NA, +#' the function runs with defaults and then blanks out categories that +#' cannot be determined. +#' +#' @param .data A TADA-compatible data.frame. +#' @param surfacevalue Numeric or NA. Threshold for Surface category (m). +#' @param bottomvalue Numeric or NA. Threshold for Bottom category (m). +#' @param allow_na_thresholds Logical; if TRUE, permits NA thresholds and +#' post-adjusts depth-category flags accordingly. +#' +#' @return A data.frame with TADA.ConsolidatedDepth, TADA.ConsolidatedDepth.Unit, +#' TADA.ConsolidatedDepth.Bottom, and TADA.DepthCategory.Flag present. +#' +#' @noRd +.ensure_depth_flag_columns <- function( + .data, + surfacevalue = 2, + bottomvalue = 2, + allow_na_thresholds = FALSE +) { + needed <- c( + "TADA.ConsolidatedDepth", + "TADA.ConsolidatedDepth.Unit", + "TADA.ConsolidatedDepth.Bottom", + "TADA.DepthCategory.Flag" + ) + + if (all(needed %in% names(.data))) { + message( + "TADA: Necessary columns from TADA_FlagDepthCategory function are included in the data frame." + ) + return(.data) + } + + if (allow_na_thresholds && (is.na(surfacevalue) || is.na(bottomvalue))) { + message( + "TADA: Running TADA_FlagDepthCategory to add columns; NA thresholds requested, post-adjusting flags." + ) + # run with defaults and then blank out flags that cannot be determined + tmp <- TADA_FlagDepthCategory(.data, surfacevalue = 2, bottomvalue = 2) + if (is.na(surfacevalue) && is.na(bottomvalue)) { + tmp$TADA.DepthCategory.Flag <- NA_character_ + } else if (is.na(surfacevalue)) { + tmp$TADA.DepthCategory.Flag <- ifelse( + tmp$TADA.DepthCategory.Flag %in% c("Surface", "Middle"), + NA_character_, + tmp$TADA.DepthCategory.Flag + ) + } else if (is.na(bottomvalue)) { + tmp$TADA.DepthCategory.Flag <- ifelse( + tmp$TADA.DepthCategory.Flag %in% c("Bottom", "Middle"), + NA_character_, + tmp$TADA.DepthCategory.Flag + ) + } + return(tmp) + } + + message( + "TADA: Running TADA_FlagDepthCategory function to add required columns to data frame." + ) + TADA_FlagDepthCategory( + .data, + surfacevalue = surfacevalue, + bottomvalue = bottomvalue + ) +} + +#' Drop mean-aggregated rows from data +#' +#' Removes rows that were created by dailyagg = "avg" in TADA_FlagDepthCategory, +#' identified by ResultIdentifier values prefixed with "TADA-". +#' +#' @param .data A data.frame that may include mean-aggregated rows. +#' +#' @return The input data.frame with any "TADA-" ResultIdentifier rows removed. +#' +#' @noRd +.drop_avg_aggregates <- function(.data) { + if (!"ResultIdentifier" %in% names(.data)) { + return(.data) + } + dplyr::filter(.data, !grepl("^TADA-", .data$ResultIdentifier)) +} + #' TADA_FlagDepthCategory #' #' This function creates a new column, TADA.DepthCategory.Flag with values: "No @@ -57,22 +196,23 @@ #' @param aggregatedonly Boolean argument with options TRUE or FALSE. The #' default is aggregatedonly = FALSE which means that all results are returned. #' When aggregatedonly = TRUE, only aggregate values are returned. +#' Note: aggregatedonly = TRUE has no effect when dailyagg = "none" and will raise an error +#' (no aggregates to return). #' #' @param clean Boolean argument with options TRUE or FALSE. The #' default is clean = FALSE which means that all results are returned. #' When clean = TRUE, only aggregate results which can be assigned to a depth #' category are included in the returned dataframe. #' -#' @param .data TADA dataframe -#' #' @return The same input TADA dataframe with additional columns TADA.DepthCategory.Flag, #' TADA.DepthProfileAggregation.Flag, TADA.ConsolidatedDepth, TADA.ConsolidatedDepth.Bottom, #' and TADA.ConsolidatedDepth.Unit. The consolidated depth fields are created by reviewing -#' multiple WQC columns where users may input depth information. If a daily_agg = "avg", -#' "min", or "max", aggregated values will be identified in the TADA.ResultAggregation.Flag -#' column. In the case of daily_agg = "avg", additional rows to display averages will be -#' added to the data frame. They can be identified by the prefix ("TADA-") of -#' their result identifiers. +#' multiple WQC columns where users may input depth information. If dailyagg = "avg", +#' "min", or "max", aggregation status is described in TADA.DepthProfileAggregation.Flag. +#' In the case of dailyagg = "avg", additional rows to display averages will be +#' added to the data frame. Aggregated rows are identified by ResultIdentifier prefixed +#' with "TADA-". When dailyagg = "avg", the aggregated result retains metadata from a +#' deterministically selected representative record (first by ResultIdentifier within the group). #' #' @export #' @@ -102,11 +242,13 @@ TADA_FlagDepthCategory <- function( expected_cols <- c( "TADA.ActivityDepthHeightMeasure.MeasureValue", "TADA.ResultDepthHeightMeasure.MeasureValue", + "TADA.ActivityBottomDepthHeightMeasure.MeasureValue", "ActivityRelativeDepthName", "TADA.ResultDepthHeightMeasure.MeasureUnitCode", "TADA.ActivityDepthHeightMeasure.MeasureUnitCode", "TADA.CharacteristicName", "TADA.ResultMeasure.MeasureUnitCode", + "TADA.ResultMeasureValue", "ResultIdentifier", "TADA.MonitoringLocationIdentifier", "OrganizationIdentifier", @@ -117,6 +259,36 @@ TADA_FlagDepthCategory <- function( TADA_CheckType(aggregatedonly, "logical") # check clean is boolean TADA_CheckType(clean, "logical") + # additional input and enum validation + TADA_CheckType(.data, "data.frame", "Input object") + valid_bycategory <- c("no", "all", "surface", "middle", "bottom") + if (!bycategory %in% valid_bycategory) { + stop( + "TADA_FlagDepthCategory: bycategory must be one of: 'no', 'all', 'surface', 'middle', 'bottom'." + ) + } + valid_dailyagg <- c("none", "avg", "min", "max") + if (!dailyagg %in% valid_dailyagg) { + stop( + "TADA_FlagDepthCategory: dailyagg must be one of: 'none', 'avg', 'min', 'max'." + ) + } + + # normalize 'null' and NULL inputs to NA_real_ + surfacevalue <- .normalize_null_numeric(surfacevalue) + bottomvalue <- .normalize_null_numeric(bottomvalue) + + # validate types if provided + if (!is.na(surfacevalue) && !is.numeric(surfacevalue)) { + stop( + "TADA_FlagDepthCategory: surfacevalue must be numeric, NULL, or 'null'." + ) + } + if (!is.na(bottomvalue) && !is.numeric(bottomvalue)) { + stop( + "TADA_FlagDepthCategory: bottomvalue must be numeric, NULL, or 'null'." + ) + } # execute function after checks are passed @@ -140,40 +312,25 @@ TADA_FlagDepthCategory <- function( ) |> nrow() - length.units <- c("M", "FT", "IN") - - depth.params <- c( - "DEPTH, SECCHI DISK DEPTH", - "DEPTH, SECCHI DISK DEPTH (CHOICE LIST)", - "DEPTH, SECCHI DISK DEPTH REAPPEARS", - "TRANSPARENCY, SECCHI TUBE WITH DISK", - "DEPTH, DATA-LOGGER (NON-PORTED)", - "DEPTH, DATA-LOGGER (PORTED)", - "RBP STREAM DEPTH - RIFFLE", - "RBP STREAM DEPTH - RUN", - "THALWEG DEPTH" - ) - + # derive cattype after bycategory validation if (bycategory == "no") { cattype <- "for the entire depth profile" } - if (bycategory == "all") { cattype <- "for each depth category" } - if (bycategory == "bottom") { cattype <- "for Bottom" } - if (bycategory == "middle") { cattype <- "for Middle" } - if (bycategory == "surface") { cattype <- "for Surface" } + depth.params <- .depth_param_names() + if (depth.count > 0) { message(paste( "TADA_FlagDepthCategory: checking data set for depth values. ", @@ -184,9 +341,10 @@ TADA_FlagDepthCategory <- function( message("TADA_FlagDepthCategory: assigning depth categories.") + # 1) Consolidate depth and units first .data <- .data |> - # set equal to TADA.ResultDepthHeighMeasure.MeasureValue if available, otherwise use TADA.ActivityDepthHeightMeasure.MeasureValue dplyr::mutate( + # set equal to TADA.ResultDepthHeighMeasure.MeasureValue if available, otherwise use TADA.ActivityDepthHeightMeasure.MeasureValue TADA.ConsolidatedDepth = ifelse( !is.na(TADA.ResultDepthHeightMeasure.MeasureValue), TADA.ResultDepthHeightMeasure.MeasureValue, @@ -197,6 +355,7 @@ TADA_FlagDepthCategory <- function( TADA.ResultDepthHeightMeasure.MeasureUnitCode, TADA.ActivityDepthHeightMeasure.MeasureUnitCode ), + # Override with ResultMeasureValue for depth-parameter characteristics TADA.ConsolidatedDepth = ifelse( TADA.CharacteristicName %in% depth.params, TADA.ResultMeasureValue, @@ -208,8 +367,23 @@ TADA_FlagDepthCategory <- function( TADA.ConsolidatedDepth.Unit ), TADA.ConsolidatedDepth.Unit = tolower(TADA.ConsolidatedDepth.Unit) - ) |> - # use group_by to identify profile data + ) + + # 2) Validate there is only one depth unit in use (assumes conversion already done) + units_present <- .data |> + dplyr::filter(!is.na(TADA.ConsolidatedDepth.Unit)) |> + dplyr::pull(TADA.ConsolidatedDepth.Unit) |> + unique() + + if (length(units_present) > 1) { + stop( + "TADA_FlagDepthCategory: Multiple depth units detected. Convert depth units to a single unit before categorizing." + ) + } + + # 3) Proceed to compute bottom depth and assign categories (NA-aware) + # use group_by to identify profile data + .data <- .data |> dplyr::group_by( ActivityStartDate, TADA.MonitoringLocationIdentifier, @@ -217,29 +391,53 @@ TADA_FlagDepthCategory <- function( ) |> # determine the number of Depths per group dplyr::mutate( - DepthsPerGroup = length(unique(TADA.ConsolidatedDepth)), + DepthsPerGroup = dplyr::n_distinct( + TADA.ConsolidatedDepth, + na.rm = TRUE + ), # determine bottom value using TADA.ActivityBottomDepthHeightMeasure.MeasureValue or the max depth record for profile data - TADA.ConsolidatedDepth.Bottom = ifelse( + has_depths = any(!is.na(TADA.ConsolidatedDepth)), + TADA.ConsolidatedDepth.Bottom = dplyr::case_when( + DepthsPerGroup > 1 & + is.na(TADA.ActivityBottomDepthHeightMeasure.MeasureValue) & + has_depths ~ max(TADA.ConsolidatedDepth, na.rm = TRUE), DepthsPerGroup > 1 & - is.na(TADA.ActivityBottomDepthHeightMeasure.MeasureValue), - max(TADA.ConsolidatedDepth, na.rm = TRUE), - TADA.ActivityBottomDepthHeightMeasure.MeasureValue + is.na(TADA.ActivityBottomDepthHeightMeasure.MeasureValue) & + !has_depths ~ NA_real_, + TRUE ~ TADA.ActivityBottomDepthHeightMeasure.MeasureValue ) ) |> + dplyr::select(-has_depths) |> dplyr::ungroup() |> - # assign depth categories by using depth information dplyr::mutate( + # Only assign depth categories when the needed thresholds are available TADA.DepthCategory.Flag = dplyr::case_when( - TADA.ConsolidatedDepth <= surfacevalue ~ "Surface", - TADA.ConsolidatedDepth <= TADA.ConsolidatedDepth.Bottom & + # Surface only if surfacevalue is provided + !is.na(surfacevalue) & + !is.na(TADA.ConsolidatedDepth) & + TADA.ConsolidatedDepth <= surfacevalue ~ "Surface", + + # Bottom only if bottomvalue and bottom depth are available + !is.na(bottomvalue) & + !is.na(TADA.ConsolidatedDepth.Bottom) & + !is.na(TADA.ConsolidatedDepth) & TADA.ConsolidatedDepth >= - TADA.ConsolidatedDepth.Bottom - bottomvalue ~ "Bottom", - TADA.ConsolidatedDepth > surfacevalue & + (TADA.ConsolidatedDepth.Bottom - bottomvalue) & + TADA.ConsolidatedDepth <= TADA.ConsolidatedDepth.Bottom ~ "Bottom", + + # Middle only if both surfacevalue and bottomvalue are provided (and bottom available) + !is.na(surfacevalue) & + !is.na(bottomvalue) & + !is.na(TADA.ConsolidatedDepth.Bottom) & + !is.na(TADA.ConsolidatedDepth) & + TADA.ConsolidatedDepth > surfacevalue & TADA.ConsolidatedDepth < - TADA.ConsolidatedDepth.Bottom - bottomvalue ~ "Middle" + (TADA.ConsolidatedDepth.Bottom - bottomvalue) ~ "Middle", + + TRUE ~ NA_character_ ) ) |> - # assign depth categories that could not be assigned using depth + # Join ARD reference as fallback dplyr::left_join(ard.ref, by = "ActivityRelativeDepthName") |> dplyr::mutate( TADA.DepthCategory.Flag = ifelse( @@ -271,8 +469,10 @@ TADA_FlagDepthCategory <- function( .data <- .data |> dplyr::mutate( - TADA.DepthCategory.Flag = as.character(NA), - TADA.ConsolidatedDepth = as.numeric(NA) + TADA.DepthCategory.Flag = NA_character_, + TADA.ConsolidatedDepth = as.numeric(NA), + TADA.ConsolidatedDepth.Unit = NA_character_, + TADA.ConsolidatedDepth.Bottom = as.numeric(NA) ) |> TADA_OrderCols() @@ -301,66 +501,38 @@ TADA_FlagDepthCategory <- function( ) .data <- .data - } - - if (bycategory == "no") { - message( - "TADA_FlagDepthCategory: Grouping results by TADA.MonitoringLocationIdentifier, OrganizationIdentifier, CharacteristicName, and ActivityStartDate for aggregation for entire water column." - ) - - group.list <- c( - "TADA.MonitoringLocationIdentifier", - "OrganizationIdentifier", - "TADA.CharacteristicName", - "ActivityStartDate" - ) - - .data <- .data - } - - if (bycategory == "surface") { - message( - "TADA_FlagDepthCategory: Grouping results by TADA.MonitoringLocationIdentifier, OrganizationIdentifier, CharacteristicName, and ActivityStartDate for aggregation for surface samples only." - ) - - group.list <- c( - "TADA.MonitoringLocationIdentifier", - "OrganizationIdentifier", - "TADA.CharacteristicName", - "ActivityStartDate" - ) - - .data <- .data |> dplyr::filter(TADA.DepthCategory.Flag == "Surface") - } - - if (bycategory == "middle") { - message( - "TADA_FlagDepthCategory: Grouping results by TADA.MonitoringLocationIdentifier, OrganizationIdentifier, CharacteristicName, and ActivityStartDate for aggregation for middle samples only." - ) - - group.list <- c( - "TADA.MonitoringLocationIdentifier", - "OrganizationIdentifier", - "TADA.CharacteristicName", - "ActivityStartDate" - ) - - .data <- .data |> dplyr::filter(TADA.DepthCategory.Flag == "Middle") - } - - if (bycategory == "bottom") { - message( - "TADA_FlagDepthCategory: Grouping results by TADA.MonitoringLocationIdentifier, OrganizationIdentifier, CharacteristicName, and ActivityStartDate for aggregation for bottom samples only." - ) - + } else { + # unify grouping branches + if (bycategory == "no") { + message( + "TADA_FlagDepthCategory: Grouping results by TADA.MonitoringLocationIdentifier, OrganizationIdentifier, CharacteristicName, and ActivityStartDate for aggregation for entire water column." + ) + .data <- .data + } + if (bycategory == "surface") { + message( + "TADA_FlagDepthCategory: Grouping results by TADA.MonitoringLocationIdentifier, OrganizationIdentifier, CharacteristicName, and ActivityStartDate for aggregation for surface samples only." + ) + .data <- .data |> dplyr::filter(TADA.DepthCategory.Flag == "Surface") + } + if (bycategory == "middle") { + message( + "TADA_FlagDepthCategory: Grouping results by TADA.MonitoringLocationIdentifier, OrganizationIdentifier, CharacteristicName, and ActivityStartDate for aggregation for middle samples only." + ) + .data <- .data |> dplyr::filter(TADA.DepthCategory.Flag == "Middle") + } + if (bycategory == "bottom") { + message( + "TADA_FlagDepthCategory: Grouping results by TADA.MonitoringLocationIdentifier, OrganizationIdentifier, CharacteristicName, and ActivityStartDate for aggregation for bottom samples only." + ) + .data <- .data |> dplyr::filter(TADA.DepthCategory.Flag == "Bottom") + } group.list <- c( "TADA.MonitoringLocationIdentifier", "OrganizationIdentifier", "TADA.CharacteristicName", "ActivityStartDate" ) - - .data <- .data |> dplyr::filter(TADA.DepthCategory.Flag == "Bottom") } if (dailyagg == "none") { @@ -369,11 +541,13 @@ TADA_FlagDepthCategory <- function( # add TADA.ResultValue.Aggregation.Flag, remove unecessary columns, and order columns orig.data <- .data |> dplyr::group_by_at(group.list) |> - dplyr::mutate(DepthsByGroup = length(unique(TADA.ConsolidatedDepth))) |> + dplyr::mutate( + DepthsByGroup = dplyr::n_distinct(TADA.ConsolidatedDepth, na.rm = TRUE) + ) |> dplyr::mutate( TADA.DepthProfileAggregation.Flag = ifelse( DepthsByGroup > 1, - "No aggregation perfomed", + "No aggregation performed", "No aggregation needed" ) ) |> @@ -383,7 +557,7 @@ TADA_FlagDepthCategory <- function( if (aggregatedonly == TRUE) { stop( - "Function not executed because clean cannot be TRUE while daily_agg is 'no'" + "aggregatedonly = TRUE requires dailyagg = 'avg', 'min' or 'max'; nothing to return when dailyagg = 'none'." ) } @@ -393,13 +567,15 @@ TADA_FlagDepthCategory <- function( } if ((dailyagg == "avg")) { message( - "TADA_FlagDepthCategory: Calculating mean aggregate value with randomly selected metadata." + "TADA_FlagDepthCategory: Calculating mean aggregate value with deterministically selected metadata." ) # add TADA.ResultValue.Aggregation.Flag and remove unnecessary columns in original data set orig.data <- .data |> dplyr::group_by_at(group.list) |> - dplyr::mutate(DepthsByGroup = length(unique(TADA.ConsolidatedDepth))) |> + dplyr::mutate( + DepthsByGroup = dplyr::n_distinct(TADA.ConsolidatedDepth, na.rm = TRUE) + ) |> dplyr::mutate( TADA.DepthProfileAggregation.Flag = ifelse( DepthsByGroup > 1, @@ -417,7 +593,7 @@ TADA_FlagDepthCategory <- function( ) ) - # add TADA.ResultValue.Aggregation.Flag, remove necessary columns, calculate mean result value per group, and assign random metadata from group. + # add TADA.ResultValue.Aggregation.Flag, remove necessary columns, calculate mean result value per group, and assign deterministic metadata from group. agg.data <- orig.data |> dplyr::filter( DepthsByGroup > 1, @@ -426,12 +602,14 @@ TADA_FlagDepthCategory <- function( dplyr::mutate( TADA.ResultMeasureValue1 = mean(TADA.ResultMeasureValue, na.rm = TRUE) ) |> - dplyr::slice_sample(n = 1) |> + # choose a deterministic representative row for reproducibility + dplyr::arrange(ResultIdentifier) |> + dplyr::slice(1) |> dplyr::mutate( TADA.DepthProfileAggregation.Flag = paste0( "Calculated mean aggregate value ", cattype, - ", with randomly selected metadata from a row in the aggregate group" + ", with deterministically selected metadata from a row in the aggregate group" ) ) |> dplyr::select(-TADA.ResultMeasureValue, -DepthsByGroup) |> @@ -447,7 +625,7 @@ TADA_FlagDepthCategory <- function( if (aggregatedonly == FALSE) { # combine original and aggregate data - comb.data <- plyr::rbind.fill(orig.data, agg.data) |> + comb.data <- dplyr::bind_rows(orig.data, agg.data) |> dplyr::ungroup() |> dplyr::select(-DepthsByGroup) |> TADA_OrderCols() @@ -463,7 +641,9 @@ TADA_FlagDepthCategory <- function( # add TADA.ResultValue.Aggregation.Flag and remove unnecessary columns in original data set orig.data <- .data |> dplyr::group_by_at(group.list) |> - dplyr::mutate(DepthsByGroup = length(unique(TADA.ConsolidatedDepth))) |> + dplyr::mutate( + DepthsByGroup = dplyr::n_distinct(TADA.ConsolidatedDepth, na.rm = TRUE) + ) |> dplyr::mutate( TADA.DepthProfileAggregation.Flag = ifelse( DepthsByGroup > 1, @@ -518,7 +698,7 @@ TADA_FlagDepthCategory <- function( # combine original and aggregate data comb.data <- orig.data |> dplyr::filter(!ResultIdentifier %in% agg.list) |> - plyr::rbind.fill(agg.data) |> + dplyr::bind_rows(agg.data) |> dplyr::ungroup() |> dplyr::select(-DepthsByGroup) |> TADA_OrderCols() @@ -532,10 +712,12 @@ TADA_FlagDepthCategory <- function( if ((dailyagg == "max")) { message("TADA_FlagDepthCategory: Selecting maximum aggregate value.") - # add TADA.ResultValue.Aggregation.Flag and remove unnecessary columns in original data set + # Flag all rows (in groups with >1 depth) as considered/not selected by default orig.data <- .data |> dplyr::group_by_at(group.list) |> - dplyr::mutate(DepthsByGroup = length(unique(TADA.ConsolidatedDepth))) |> + dplyr::mutate( + DepthsByGroup = dplyr::n_distinct(TADA.ConsolidatedDepth, na.rm = TRUE) + ) |> dplyr::mutate( TADA.DepthProfileAggregation.Flag = ifelse( DepthsByGroup > 1, @@ -545,10 +727,16 @@ TADA_FlagDepthCategory <- function( "but not selected" ), "No aggregation needed" + ), + # If a row is outside depth categories, mark as "No aggregation needed" + TADA.DepthProfileAggregation.Flag = ifelse( + !TADA.DepthCategory.Flag %in% depthcat.list, + "No aggregation needed", + TADA.DepthProfileAggregation.Flag ) ) - # add TADA.ResultValue.Aggregation.Flag, remove necessary columns, and select maximum result value per group. + # Select the maximum result per group (only rows in depth categories) agg.data <- orig.data |> dplyr::filter( DepthsByGroup > 1, @@ -561,44 +749,40 @@ TADA_FlagDepthCategory <- function( ) |> dplyr::mutate( TADA.DepthProfileAggregation.Flag = paste0( - "TADA_FlagDepthCategory: Selecting maximum aggregate value.", + "Selected as max aggregate value ", cattype ) ) |> - dplyr::mutate(ResultIdentifier = paste0("TADA-", ResultIdentifier)) |> dplyr::select(-DepthsByGroup) |> dplyr::ungroup() if (aggregatedonly == TRUE) { rm(orig.data) - return(agg.data) } if (aggregatedonly == FALSE) { - # create list of result identifiers for selected aggregate data + # Remove the selected rows from the original so they are not duplicated, + # then add them back with the "selected" flag applied above agg.list <- agg.data |> dplyr::ungroup() |> dplyr::select(ResultIdentifier) |> unique() |> dplyr::pull() - # combine original and aggregate data comb.data <- orig.data |> dplyr::filter(!ResultIdentifier %in% agg.list) |> - plyr::rbind.fill(agg.data) |> + dplyr::bind_rows(agg.data) |> dplyr::ungroup() |> dplyr::select(-DepthsByGroup) |> TADA_OrderCols() rm(agg.data, orig.data, agg.list) - return(comb.data) } } } - #' TADA_IDDepthProfiles #' #' This function identifies depth profiles within a data frame to assist the user in @@ -614,14 +798,15 @@ TADA_FlagDepthCategory <- function( #' #' @param .data TADA dataframe which must include the columns ActivityStartDate, #' TADA.ConsolidatedDepth, TADA.ConsolidatedDepth.Unit, TADA.ConsolidatedDepth.Bottom, -#' TADA.ResultMeasureValue, TADA.ResultMeasureValue.UnitCode, +#' TADA.ResultMeasureValue, TADA.ResultMeasure.MeasureUnitCode, #' OrganizationIdentifier, TADA.MonitoringLocationName, TADA.MonitoringLocationIdentifier, #' and TADA.ComparableDataIdentifier. #' #' @param nresults Boolean argument with options "TRUE" or "FALSE". The #' default is nresults = TRUE, which means that the number of results for each #' characteristic are added within the TADA.CharacteristicsForDepthProfile column. -#' When nresults = FALSE. +#' When nresults = FALSE, the number of results is not appended to +#' TADA.CharacteristicsForDepthProfile. #' #' @param nvalue numeric argument to specify the number of results required to identify #' a depth profile. The default is 2, which means that a depth profile will be identified @@ -644,6 +829,10 @@ TADA_FlagDepthCategory <- function( #' param, TADA.CharacteristicsForDepthProfile may or may not contain the number #' of results for each characteristic. #' +#' @details +#' Inputs nresults and aggregates must be logical scalars; non-logical values will +#' raise an error. nvalue must be a single numeric value. +#' #' @export #' #' @examples @@ -663,61 +852,23 @@ TADA_IDDepthProfiles <- function( nvalue = 2, aggregates = FALSE ) { - # check for columns created in TADA_FlagDepthCategory and run the function if they are missing - # add check that depth category flag function has been run, run it if it has not - flag.func.cols <- c( - "TADA.ConsolidatedDepth", - "TADA.ConsolidatedDepth.Unit", - "TADA.ConsolidatedDepth.Bottom, TADA.DepthCategory.Flag", - "TADA.DepthProfileAggregation.Flag" - ) - - if (all(flag.func.cols %in% colnames(.data)) == TRUE) { - message( - "TADA_IDDepthProfiles: Necessary columns from TADA_FlagDepthCategory function are included in the data frame." - ) - - .data <- .data - } - - if (any(flag.func.cols %in% colnames(.data)) == FALSE) { - message( - "TADA_IDDepthProfiles: Necessary columns are being added to the data frame using TADA_DepthCatgegory.Flag function." - ) - - .data <- TADA_FlagDepthCategory(.data) + # input type validation + TADA_CheckType(.data, "data.frame", "Input object") + TADA_CheckType(nresults, "logical", "nresults") + TADA_CheckType(aggregates, "logical", "aggregates") + if (!is.numeric(nvalue) || length(nvalue) != 1) { + stop("TADA_IDDepthProfiles: nvalue must be a single numeric value.") } - depth.params <- c( - "DEPTH, SECCHI DISK DEPTH", - "DEPTH, SECCHI DISK DEPTH (CHOICE LIST)", - "DEPTH, SECCHI DISK DEPTH REAPPEARS", - "TRANSPARENCY, SECCHI TUBE WITH DISK", - "DEPTH, DATA-LOGGER (NON-PORTED)", - "DEPTH, DATA-LOGGER (PORTED)", - "RBP STREAM DEPTH - RIFFLE", - "RBP STREAM DEPTH - RUN", - "THALWEG DEPTH" - ) - - if (aggregates == FALSE) { - if ("TADA.DepthProfileAggregation.Flag" %in% names(.data) == TRUE) { - .data <- .data |> - dplyr::filter( - TADA.DepthProfileAggregation.Flag != - c( - "Calculated mean aggregate value, with randomly selected metadata from a row in the aggregate group" - ) - ) + # check for columns created in TADA_FlagDepthCategory and run the function if they are missing + # add check that depth category flag function has been run, run it if it has not + .data <- .ensure_depth_flag_columns(.data) - if ("TADA.DepthProfileAggregation.Flag" %in% names(.data) == FALSE) { - .data <- .data - } - } + depth.params <- .depth_param_names() - if (aggregates == TRUE) { - .data <- .data - } + # when aggregates == FALSE, robust removal of mean-aggregated rows (created by avg) + if (!aggregates) { + .data <- .drop_avg_aggregates(.data) } if (nresults == TRUE) { @@ -794,7 +945,7 @@ TADA_IDDepthProfiles <- function( return(.data) } - if (nresults == FALSE) { + if (identical(nresults, FALSE)) { .data <- .data |> dplyr::select( TADA.MonitoringLocationIdentifier, @@ -814,7 +965,9 @@ TADA_IDDepthProfiles <- function( ActivityStartDate, TADA.ComparableDataIdentifier ) |> - dplyr::mutate(TADA.NResults = length(unique(TADA.ConsolidatedDepth))) |> + dplyr::mutate( + TADA.NResults = dplyr::n_distinct(TADA.ConsolidatedDepth, na.rm = TRUE) + ) |> dplyr::filter( TADA.NResults >= nvalue | TADA.CharacteristicName %in% depth.params ) |> @@ -857,6 +1010,9 @@ TADA_IDDepthProfiles <- function( return(.data) } + + # ensure function doesn’t fall through silently + stop("TADA_IDDepthProfiles: nresults must be TRUE or FALSE.") } #' Create A Three-Characteristic Depth Profile @@ -869,35 +1025,43 @@ TADA_IDDepthProfiles <- function( #' must be the same. This can be accomplished using TADA_AutoClean() or #' TADA_ConvertDepthUnits. #' -#' @param groups A vector of two identifiers from the TADA.ComparableDataIdentifier column. -#' For example, the groups could be 'DISSOLVED OXYGEN (DO)_NA_NA_UG/L' and 'PH_NA_NA_NA'. -#' These groups will be specific to your data frame. The TADA_IDDepthProfiles can be -#' used to identify available groups. +#' @param groups A vector of up to three identifiers from the TADA.ComparableDataIdentifier column. +#' For example, the groups could be 'DISSOLVED OXYGEN (DO)_NA_NA_UG/L' and 'PH_NA_NA_NA'. +#' These groups will be specific to your data frame. The TADA_IDDepthProfiles can be +#' used to identify available groups. If more than three identifiers are supplied, +#' only the first three are used and a warning is issued. #' #' @param location A single TADA.MonitoringLocationIdentifier to plot the depth profile. -#' A TADA.MonitoringLocationIdentifier must be entered or an error will be returned and -#' no depth profile will be created. +#' A TADA.MonitoringLocationIdentifier must be entered or an error will be returned and +#' no depth profile will be created. #' #' @param activity_date The date the depth profile results were collected. #' #' @param depthcat Boolean argument indicating whether delineation between depth -#' categories should be shown on the depth profile figure. depthcat = TRUE is the -#' default and displays solid black lines to delineate between surface, middle, and -#' bottom samples and labels each section of the plot. +#' categories should be shown on the depth profile figure. depthcat = TRUE is the +#' default and displays solid black lines to delineate between surface, middle, and +#' bottom samples and labels each section of the plot. +#' When depthcat = TRUE, at least one of surfacevalue or bottomvalue must be provided +#' (non-NA), otherwise the function will stop. If bottom depth cannot be determined for +#' the selection, “Bottom” and “Middle” delineations are omitted. #' #' @param bottomvalue numeric argument. The user enters how many meters from the -#' bottom should be included in the "Bottom" category. Default is -#' bottomvalue = 2. +#' bottom should be included in the "Bottom" category. Default is +#' bottomvalue = 2. #' #' @param surfacevalue numeric argument. The user enters how many meters from the -#' surface should be included in the "Surface" category. Default is surfacevalue = 2. +#' surface should be included in the "Surface" category. Default is surfacevalue = 2. #' -#' @param unit Character argument. The enters either "m" or "ft" to specify which -#' depth units should be used for the plot. Default is "m". +#' @param unit Character argument. The user enters either "m" or "ft" to specify which +#' depth units should be used for the plot. Default is "m". +#' Non-depth-parameter rows must already be in the specified unit. Depth-parameter +#' rows (e.g., Secchi) are converted to the specified unit for plotting when necessary. #' #' @return A depth profile plot displaying up to three parameters for a single -#' TADA.MonitoringLocationIdentifier. Displaying depth categories is optional with the -#' depthcat argument. +#' TADA.MonitoringLocationIdentifier. Displaying depth categories is optional with the +#' depthcat argument. The function excludes duplicate depth-parameter rows from the +#' main profile series and, if any are included via groups, plots them as single +#' horizontal reference lines in the requested unit. #' #' @export #' @@ -924,7 +1088,7 @@ TADA_IDDepthProfiles <- function( #' groups = c("PH_NONE_NONE_NONE", "DISSOLVED OXYGEN (DO)_NONE_NONE_MG/L"), #' location = "REDLAKE_WQX-JOHN", #' activity_date = "2018-07-31", -#'depthcat = FALSE +#' depthcat = FALSE #' ) #' } #' @@ -939,267 +1103,98 @@ TADA_DepthProfilePlot <- function( unit = "m" ) { # check to see if TADA.ComparableDataIdentifier column is present - if ("TADA.ComparableDataIdentifier" %in% colnames(.data)) { - .data <- .data - - if (!"TADA.ComparableDataIdentifier" %in% colnames(.data)) { - message( - "TADA.ComparableDataIdentifier column not present in data set. Run TADA_CreateComparableID to create TADA.ComparableDataIdentifier." - ) - - stop() - } + if (!"TADA.ComparableDataIdentifier" %in% colnames(.data)) { + stop( + "TADA.ComparableDataIdentifier column not present in data set. Run TADA_CreateComparableID to create TADA.ComparableDataIdentifier." + ) } # check .data is data.frame TADA_CheckType(.data, "data.frame", "Input object") - - # add check that depth category flag function has been run, run it if it has not - flag.func.cols <- c( - "TADA.ConsolidatedDepth", - "TADA.ConsolidatedDepth.Unit", - "TADA.ConsolidatedDepth.Bottom, TADA.DepthCategory.Flag" - ) - - if (all(flag.func.cols %in% colnames(.data)) == TRUE) { - message( - "TADA_DepthProfilePlot: Necessary columns from TADA_FlagDepthCategory function are included in the data frame" - ) - - .data <- .data + # validate unit and groups length + if (!unit %in% c("m", "ft")) { + stop("TADA_DepthProfilePlot: unit must be 'm' or 'ft'.") } - - if (any(flag.func.cols %in% colnames(.data)) == FALSE) { - message( - "TADA_DepthProfilePlot: Running TADA_FlagDepthCategory function to add required columns to data frame" + if (length(groups) > 3) { + warning( + "TADA_DepthProfilePlot: More than 3 groups supplied; only the first 3 will be used." ) + groups <- groups[1:3] + } - if (bottomvalue == "null" & surfacevalue == "null") { - .data <- TADA_FlagDepthCategory( - .data, - surfacevalue = 2, - bottomvalue = 2 - ) |> - dplyr::mutate(TADA.DepthCategory.Flag = NA) - } - - if (surfacevalue == "null" & is.numeric(bottomvalue)) { - .data <- TADA_FlagDepthCategory( - .data, - surfacevalue = 2, - bottomvalue = bottomvalue - ) |> - dplyr::mutate( - TADA.DepthCatgeory.Flag = ifelse( - TADA.DepthCategory.Flag %in% c("Surface", "Middle"), - NA, - TADA.DepthCategory.Flag - ) - ) - } + # Normalize "null" to NA + surfacevalue <- .normalize_null_numeric(surfacevalue) + bottomvalue <- .normalize_null_numeric(bottomvalue) - if (bottomvalue == "null" & is.numeric(surfacevalue)) { - .data <- TADA_FlagDepthCategory( - .data, - surfacevalue = surfacevalue, - bottomvalue = 2 - ) |> - dplyr::mutate( - TADA.DepthCatgeory.Flag = ifelse( - TADA.DepthCategory.Flag %in% c("Bottom", "Middle"), - NA, - TADA.DepthCategory.Flag - ) - ) - } + # Add check that depth category flag function has been run, run it if it has not + .data <- .ensure_depth_flag_columns( + .data, + surfacevalue = surfacevalue, + bottomvalue = bottomvalue, + allow_na_thresholds = TRUE + ) - if (is.numeric(bottomvalue) & is.numeric(surfacevalue)) { - .data <- TADA_FlagDepthCategory( - .data, - surfacevalue = surfacevalue, - bottomvalue = bottomvalue - ) - } - } + # Define depth-parameter characteristics (needed before unit checks) + depth.params <- .depth_param_names() - # add convert depth unit (this still needs to be added), for now print warning and stop function if units don't match + # Enforce unit consistency only across non-depth-parameter rows; depth-parameter rows will be converted later .data <- .data |> dplyr::filter(!is.na(TADA.ConsolidatedDepth)) - if (.data$TADA.ConsolidatedDepth.Unit[1] == unit) { - message( - "TADA_DepthProfilePlot: Depth unit in data set matches depth unit specified by user for plot. No conversion necessary." - ) - - .data <- .data - - if (.data$TADA.ConsolidatedDepth.Unit[1] != unit) { + non_depth_rows <- .data |> + dplyr::filter(!TADA.CharacteristicName %in% depth.params) + if (nrow(non_depth_rows) > 0) { + units_present <- unique(stats::na.omit( + non_depth_rows$TADA.ConsolidatedDepth.Unit + )) + if (length(units_present) > 1 || units_present != unit) { stop( - "TADA_DepthProfilePlot: Depth unit in data set does not match depth unit specified by user for plot. Convert units in data or specify correct unit in TADA_DepthProfilePlot function." + "TADA_DepthProfilePlot: Convert non-depth-parameter depth units to match `unit` before plotting." + ) + } else { + message( + "TADA_DepthProfilePlot: Depth unit for non-depth-parameter rows matches `unit`." ) } + } else { + message( + "TADA_DepthProfilePlot: Only depth-parameter rows detected; unit check skipped (conversion will be applied as needed)." + ) } # create ID Depth Profiles data.frame to check against params - param.check <- TADA_IDDepthProfiles(.data) - if (is.null(location)) { - message( - "TADA_DepthProfilePlot: No TADA.MonitoringLocationIdentifier selected, a depth profile cannot be generated." + # Early required-argument checks + if (is.null(location) || is.null(activity_date) || is.null(groups)) { + stop( + "TADA_DepthProfilePlot: Please supply 'location', 'activity_date', and 'groups'." ) - - stop() - - if (!location %in% param.check$TADA.MonitoringLocationIdentifier) { - message( - "TADA_DepthProfilePlot: TADA.MonitoringLocationIdentifier selected is not in data set." - ) - - stop() - } - - if (location %in% param.check$TADA.MonitoringLocationIdentifier) { - message( - "TADA_DepthProfilePlot: TADA.MonitoringLocationIdentifier selected." - ) - } } - if (is.null(activity_date)) { - message( - "TADA_DepthProfilePlot: No ActivityStartDate selected, a depth profile cannot be generated." - ) - - stop() - - if (!activity_date %in% param.check$ActivityStartDate) { - message( - "TADA_DepthProfilePlot: ActivityStartDate selected is not in data set." - ) - } - - stop() - - if (activity_date %in% param.check$ActivityStartDate) { - message("TADA_DepthProfilePlot: ActivityStartDate selected.") - } + # Validate they exist in the data + if (!location %in% .data$TADA.MonitoringLocationIdentifier) { + stop("TADA_DepthProfilePlot: `location` is not present in the data.") } - - if (is.null(groups)) { - message( - "TADA_DepthProfilePlot: No groups selected, a depth profile cannot be generated." - ) - - stop() - - if (!is.null(groups)) { - groups.length <- length(groups) - - if (groups.length > 0) { - if ( - stringr::str_detect( - param.check$TADA.CharacteristicsForDepthProfile, - groups[1] - ) == - FALSE - ) { - message( - "TADA_DepthProfilePlot: First of groups for depth profile plot does not exist in data set." - ) - } - - stop() - - if ( - stringr::str_detect( - param.check$TADA.CharacteristicsForDepthProfile, - groups[1] - ) == - TRUE - ) { - message( - "TADA:DepthProfilePlot: First of groups for depth profile exists in data set." - ) - } - } - - if (groups.length > 1) { - if ( - stringr::str_detect( - param.check$TADA.CharacteristicsForDepthProfile, - groups[2] - ) == - FALSE - ) { - message( - "TADA_DepthProfilePlot: Second of groups for depth profile plot does not exist in data set." - ) - } - - stop() - - if ( - stringr::str_detect( - param.check$TADA.CharacteristicsForDepthProfile, - groups[2] - ) == - TRUE - ) { - message( - "TADA:DepthProfilePlot: Second of groups for depth profile exists in data set." - ) - } - } - - if (groups.length > 2) { - if ( - stringr::str_detect( - param.check$TADA.CharacteristicsForDepthProfile, - groups[3] - ) == - FALSE - ) { - message( - "TADA_DepthProfilePlot: Third of groups for depth profile plot does not exist in data set." - ) - } - - stop() - - if ( - stringr::str_detect( - param.check$TADA.CharacteristicsForDepthProfile, - groups[3] - ) == - TRUE - ) { - message( - "TADA:DepthProfilePlot: Third of groups for depth profile exists in data set." - ) - } - } - } - - if (!activity_date %in% param.check$ActivityStartDate) { - message( - "TADA_DepthProfilePlot: ActivityStartDate selected is not in data set." - ) - } - - stop() - - if (activity_date %in% param.check$ActivityStartDate) { - message("TADA_DepthProfilePlot: ActivityStartDate selected.") - } - - param.check <- param.check |> - dplyr::filter(ActivityStartDate == activity_date) + if (!activity_date %in% .data$ActivityStartDate) { + stop("TADA_DepthProfilePlot: `activity_date` is not present in the data.") + } + missing_groups <- setdiff(groups, unique(.data$TADA.ComparableDataIdentifier)) + if (length(missing_groups) > 0) { + stop(paste0( + "TADA_DepthProfilePlot: The following `groups` are not present in the data: ", + paste(missing_groups, collapse = ", ") + )) } # remove param.check rm(param.check) - # list required columns + # Ensure optional datetime column exists for hover text + if (!"ActivityStartDateTime" %in% names(.data)) { + .data$ActivityStartDateTime <- NA_character_ + } + + # list required columns (include fields used in hover/name text) required_cols <- c( "TADA.ResultDepthHeightMeasure.MeasureValue", "TADA.ResultDepthHeightMeasure.MeasureUnitCode", @@ -1214,7 +1209,14 @@ TADA_DepthProfilePlot <- function( "ActivityStartDateTime", "TADA.ConsolidatedDepth", "TADA.ConsolidatedDepth.Unit", - "TADA.ConsolidatedDepth.Bottom" + "TADA.ConsolidatedDepth.Bottom", + "TADA.ActivityMediaName", + "ActivityMediaSubdivisionName", + "TADA.ComparableDataIdentifier", + "TADA.CharacteristicName", + "ActivityRelativeDepthName", + "TADA.MethodSpeciationName", + "TADA.ResultSampleFractionText" ) # check .data has required columns @@ -1222,25 +1224,14 @@ TADA_DepthProfilePlot <- function( message("TADA_DepthProfilePlot: Identifying available depth profile data.") - # identify depth profile data - depth.params <- c( - "DEPTH, SECCHI DISK DEPTH", - "DEPTH, SECCHI DISK DEPTH (CHOICE LIST)", - "DEPTH, SECCHI DISK DEPTH REAPPEARS", - "DEPTH, DATA-LOGGER (NON-PORTED)", - "DEPTH, DATA-LOGGER (PORTED)", - "TRANSPARENCY, SECCHI TUBE WITH DISK", - "RBP STREAM DEPTH - RIFFLE", - "RBP STREAM DEPTH - RUN", - "THALWEG DEPTH" - ) - + # exclude depth-parameter rows from depthprofile.avail to avoid duplication depthprofile.avail <- .data |> dplyr::filter( !is.na(TADA.ConsolidatedDepth), TADA.MonitoringLocationIdentifier %in% location, ActivityStartDate %in% activity_date, - TADA.ActivityMediaName == "WATER" + TADA.ActivityMediaName == "WATER", + !TADA.CharacteristicName %in% depth.params ) |> dplyr::group_by( TADA.ComparableDataIdentifier, @@ -1255,63 +1246,36 @@ TADA_DepthProfilePlot <- function( ActivityStartDate ) |> dplyr::mutate(N = length(TADA.ResultMeasureValue)) |> - dplyr::filter(N > 2 | TADA.CharacteristicName %in% depth.params) |> + dplyr::filter(N > 2) |> dplyr::ungroup() |> dplyr::select(-N) - depth.params.groups <- depthprofile.avail |> + depth.params.groups <- .data |> dplyr::filter( - TADA.ComparableDataIdentifier %in% groups, + TADA.MonitoringLocationIdentifier %in% location, + ActivityStartDate %in% activity_date, + TADA.ActivityMediaName == "WATER", TADA.CharacteristicName %in% depth.params ) |> dplyr::select(TADA.ComparableDataIdentifier) |> unique() |> dplyr::pull() - # identify depth unit being used in graph - fig.depth.unit <- depthprofile.avail |> - dplyr::select(TADA.ConsolidatedDepth.Unit) |> - dplyr::filter(!is.na(TADA.ConsolidatedDepth.Unit)) |> - unique() |> - dplyr::pull() + # Use user-specified depth unit for the figure + fig.depth.unit <- unit # if any depth parameter (ex: secchi) data - - if (length(intersect(groups, depth.params.groups)) == 0) { - depth.params.string <- toString(depth.params, sep = "; ") |> - stringi::stri_replace_last(" or ", fixed = "; ") - - profile.data <- depthprofile.avail - - rm(depth.params.string, depthprofile.avail) - } - if (length(intersect(groups, depth.params.groups)) > 0) { # add depth param (ex: secchi) results - depth.params.string <- toString(depth.params, sep = "; ") |> - stringi::stri_replace_last(" or ", fixed = "; ") - - depth.units <- c( - "m", - "ft", - "in", - "m", - "m", - "ft", - "ft", - "in", - "in", - "m", - "ft", - "in" - ) + depth.params.string <- paste(depth.params, collapse = "; ") depth.params.avail <- .data |> dplyr::filter( TADA.MonitoringLocationIdentifier %in% location, TADA.CharacteristicName %in% depth.params, ActivityStartDate %in% activity_date, - TADA.ActivityMediaName == "WATER" + TADA.ActivityMediaName == "WATER", + TADA.ComparableDataIdentifier %in% groups ) |> dplyr::group_by( TADA.CharacteristicName, @@ -1321,59 +1285,54 @@ TADA_DepthProfilePlot <- function( dplyr::slice_sample(n = 1) |> dplyr::ungroup() - if ( - unique(depth.params.avail$TADA.ConsolidatedDepth.Unit) == fig.depth.unit - ) { + units_match <- all( + stats::na.omit(depth.params.avail$TADA.ConsolidatedDepth.Unit) == + fig.depth.unit + ) + if (units_match) { message(paste( "TADA_DepthProfilePlot: Any results for", depth.params.string, "match the depth unit selected for the figure." )) + } else { + message(paste( + "TADA_DepthProfilePlot: Converting depth units for any results for", + depth.params.string, + "results to match depth units selected for the figure." + )) - depth.params.avail <- depth.params.avail - - if ( - unique(depth.params.avail$TADA.ConsolidatedDepth.Unit) != fig.depth.unit - ) { - message(paste( - "TADA_DepthProfilePlot: Converting depth units for any results for", - depth.params.string, - "results to match depth units selected for the figure." - )) - - depth.units <- c( + # consolidated conversion map for depth-parameter rows + conv_df <- data.frame( + TADA.ConsolidatedDepth.Unit = c( "m", "ft", "in", - "m", - "m", - "ft", "ft", "in", + "m", "in", "m", "ft", - "in" - ) - - result.units <- c( + "cm", + "cm", + "cm" + ), + YAxis.DepthUnit = c( + "m", + "m", "m", "ft", - "in", + "ft", + "ft", "ft", "in", - "m", "in", "m", "ft", - "cm", - "cm", - "cm" - ) - - convert.factor <- c( - "1", - "1", + "in" + ), + SecchiConversion = c( "1", "0.3048", "0.0254", @@ -1383,63 +1342,40 @@ TADA_DepthProfilePlot <- function( "12", "0.01", "0.032808", + "0.39", + "0.39", "0.39" - ) + ), + stringsAsFactors = FALSE + ) - secchi.conversion <- data.frame( - result.units, - depth.units, - convert.factor + depth.params.avail <- depth.params.avail |> + dplyr::mutate(YAxis.DepthUnit = fig.depth.unit) |> + dplyr::left_join( + conv_df, + by = c("TADA.ConsolidatedDepth.Unit", "YAxis.DepthUnit") ) |> - dplyr::rename( - TADA.ConsolidatedDepth.Unit = result.units, - YAxis.DepthUnit = depth.units, - SecchiConversion = convert.factor - ) - - depth.params.avail <- depth.params.avail |> - dplyr::mutate(YAxis.DepthUnit = fig.depth.unit) |> - dplyr::left_join(secchi.conversion) |> - dplyr::mutate( - TADA.ConsolidatedDepth.Unit = fig.depth.unit, - TADA.ConsolidatedDepth = TADA.ResultMeasureValue * - as.numeric(SecchiConversion) - ) |> - dplyr::select(-YAxis.DepthUnit, -SecchiConversion) - - rm( - secchi.conversion, - depth.params.string, - depth.units, - result.units, - convert.factor - ) - } + dplyr::mutate( + TADA.ConsolidatedDepth.Unit = fig.depth.unit, + TADA.ConsolidatedDepth = TADA.ResultMeasureValue * + as.numeric(SecchiConversion) + ) |> + dplyr::select(-YAxis.DepthUnit, -SecchiConversion) } - profile.data <- depthprofile.avail |> - dplyr::full_join(depth.params.avail, by = c(names(depthprofile.avail))) - + profile.data <- dplyr::bind_rows(depthprofile.avail, depth.params.avail) rm(depth.params.avail, depthprofile.avail) + } else { + # no depth-parameter groups requested; use the main profile data only + profile.data <- depthprofile.avail } # this subset must include all fields included in plot hover below plot.data <- profile.data |> - dplyr::filter(dplyr::if_any( - TADA.ComparableDataIdentifier, - ~ .x %in% groups - )) |> + dplyr::filter(TADA.ComparableDataIdentifier %in% groups) |> dplyr::select( dplyr::all_of(required_cols), - "TADA.ComparableDataIdentifier", - "ActivityStartDateTime", - "TADA.MonitoringLocationName", - "TADA.ActivityMediaName", - "ActivityMediaSubdivisionName", - "ActivityRelativeDepthName", - "TADA.CharacteristicName", - "TADA.MethodSpeciationName", - "TADA.ResultSampleFractionText" + "TADA.ComparableDataIdentifier" ) |> dplyr::mutate( TADA.ResultMeasure.MeasureUnitCode = ifelse( @@ -1449,26 +1385,34 @@ TADA_DepthProfilePlot <- function( ) ) + # Ensure there is data to plot for the selected location/date/groups + if (nrow(plot.data) == 0) { + stop( + "TADA_DepthProfilePlot: No data found for the selected location, activity_date, and groups." + ) + } + rm(profile.data) # break into subsets for each parameter param1 <- plot.data |> - dplyr::filter(dplyr::if_any( - TADA.ComparableDataIdentifier, - ~ .x %in% groups[1] - )) - + dplyr::filter(TADA.ComparableDataIdentifier %in% groups[1]) param2 <- plot.data |> - dplyr::filter(dplyr::if_any( - TADA.ComparableDataIdentifier, - ~ .x %in% groups[2] - )) - + dplyr::filter(TADA.ComparableDataIdentifier %in% groups[2]) param3 <- plot.data |> - dplyr::filter(dplyr::if_any( - TADA.ComparableDataIdentifier, - ~ .x %in% groups[3] + dplyr::filter(TADA.ComparableDataIdentifier %in% groups[3]) + + # Ensure each requested group has data for this location/date + present_groups <- plot.data |> + dplyr::count(TADA.ComparableDataIdentifier) |> + dplyr::pull(TADA.ComparableDataIdentifier) + missing_in_subset <- setdiff(groups, present_groups) + if (length(missing_in_subset) > 0) { + stop(paste0( + "TADA_DepthProfilePlot: The following `groups` have no data for the selected location and activity_date: ", + paste(missing_in_subset, collapse = ", ") )) + } # create title for figure, conditional on number of groups/characteristics selected @@ -1587,7 +1531,9 @@ TADA_DepthProfilePlot <- function( # first parameter has a depth profile if ( - length(groups) >= 1 & !param1$TADA.CharacteristicName[1] %in% depth.params + length(groups) >= 1 && + nrow(param1) > 0 && + !param1$TADA.CharacteristicName[1] %in% depth.params ) { # config options https://plotly.com/r/configuration-options/ scatterplot <- scatterplot |> @@ -1643,11 +1589,13 @@ TADA_DepthProfilePlot <- function( # first parameter has a single value where units are depth if ( - length(groups) >= 1 & param1$TADA.CharacteristicName[1] %in% depth.params + length(groups) >= 1 && + nrow(param1) > 0 && + param1$TADA.CharacteristicName[1] %in% depth.params ) { scatterplot <- scatterplot |> plotly::add_lines( - y = param1$TADA.ResultMeasureValue[1], + y = param1$TADA.ConsolidatedDepth[1], x = xrange, name = TADA_CharStringRemoveNANone(paste0( param1$TADA.ResultSampleFractionText[1], @@ -1696,7 +1644,9 @@ TADA_DepthProfilePlot <- function( # second parameter has a depth profile if ( - length(groups) >= 2 & !param2$TADA.CharacteristicName[1] %in% depth.params + length(groups) >= 2 && + nrow(param2) > 0 && + !param2$TADA.CharacteristicName[1] %in% depth.params ) { scatterplot <- scatterplot |> plotly::add_trace( @@ -1750,11 +1700,13 @@ TADA_DepthProfilePlot <- function( # second parameter has a single value where units are depth if ( - length(groups) >= 2 & param2$TADA.CharacteristicName[1] %in% depth.params + length(groups) >= 2 && + nrow(param2) > 0 && + param2$TADA.CharacteristicName[1] %in% depth.params ) { scatterplot <- scatterplot |> plotly::add_lines( - y = param2$TADA.ResultMeasureValue[1], + y = param2$TADA.ConsolidatedDepth[1], x = xrange, name = TADA_CharStringRemoveNANone(paste0( param2$TADA.ResultSampleFractionText[1], @@ -1804,7 +1756,9 @@ TADA_DepthProfilePlot <- function( # third parameter has a depth profile if ( - length(groups) >= 3 & !param3$TADA.CharacteristicName[1] %in% depth.params + length(groups) >= 3 && + nrow(param3) > 0 && + !param3$TADA.CharacteristicName[1] %in% depth.params ) { scatterplot <- scatterplot |> plotly::add_trace( @@ -1830,7 +1784,7 @@ TADA_DepthProfilePlot <- function( paste0( param3$TADA.ResultMeasureValue, " ", - param2$TADA.ResultMeasure.MeasureUnitCode + param3$TADA.ResultMeasure.MeasureUnitCode ), "
", "Activity Start Date:", @@ -1858,11 +1812,13 @@ TADA_DepthProfilePlot <- function( # third parameter has a single value where units are depth if ( - length(groups) >= 3 & param3$TADA.CharacteristicName[1] %in% depth.params + length(groups) >= 3 && + nrow(param3) > 0 && + param3$TADA.CharacteristicName[1] %in% depth.params ) { scatterplot <- scatterplot |> plotly::add_lines( - y = param3$TADA.ResultMeasureValue[1], + y = param3$TADA.ConsolidatedDepth[1], x = xrange, name = TADA_CharStringRemoveNANone(paste0( param3$TADA.ResultSampleFractionText[1], @@ -1911,16 +1867,13 @@ TADA_DepthProfilePlot <- function( } # add horizontal lines for depth profile category - if (depthcat == TRUE & is.null(surfacevalue) & is.null(bottomvalue)) { + if (isTRUE(depthcat) && is.na(surfacevalue) && is.na(bottomvalue)) { stop( - "TADA_DepthProfilePlot: No depth categories can be determined when both surfacevalue and bottomvalue are null. Supply one or both of these values and run the function again." + "TADA_DepthProfilePlot: No depth categories can be determined when both surfacevalue and bottomvalue are NA. Supply one or both values and run the function again." ) } - if ( - (depthcat == TRUE & !is.null(surfacevalue)) | - (depthcat == TRUE & !is.null(bottomvalue)) - ) { + if (isTRUE(depthcat) && (!is.na(surfacevalue) || !is.na(bottomvalue))) { # create list to store depth annotation text depth_annotations <- list() @@ -1967,49 +1920,52 @@ TADA_DepthProfilePlot <- function( } if (is.numeric(bottomvalue)) { - # find bottom depth - bot.depth <- plot.data |> - dplyr::select(TADA.ConsolidatedDepth.Bottom) |> - unique() |> - dplyr::slice_max(TADA.ConsolidatedDepth.Bottom) |> - dplyr::pull() - - message("TADA_DepthProfilePlot: Adding bottom delination to figure.") - - scatterplot <- scatterplot |> - plotly::add_lines( - y = bot.depth - bottomvalue, - x = xrange, - inherit = FALSE, - showlegend = FALSE, - line = list(color = tada.pal[1]), - hoverinfo = "text", - hovertext = paste( - round((bot.depth - bottomvalue), digits = 1), - fig.depth.unit, - sep = " " + # find bottom depth robustly; skip annotation if no finite bottom + bd <- suppressWarnings(max( + plot.data$TADA.ConsolidatedDepth.Bottom, + na.rm = TRUE + )) + if (is.finite(bd)) { + message("TADA_DepthProfilePlot: Adding bottom delineation to figure.") + scatterplot <- scatterplot |> + plotly::add_lines( + y = bd - bottomvalue, + x = xrange, + inherit = FALSE, + showlegend = FALSE, + line = list(color = tada.pal[1]), + hoverinfo = "text", + hovertext = paste( + round((bd - bottomvalue), digits = 1), + fig.depth.unit, + sep = " " + ) ) - ) - bottom_text <- list( - x = 1, - y = (ymax + (bot.depth - bottomvalue)) / 2, - xref = "paper", - yref = "y", - text = "Bottom", - showarrow = F, - align = "right", - xanchor = "left", - yanchor = "center" - ) + bottom_text <- list( + x = 1, + y = (ymax + (bd - bottomvalue)) / 2, + xref = "paper", + yref = "y", + text = "Bottom", + showarrow = F, + align = "right", + xanchor = "left", + yanchor = "center" + ) - depth_annotations <- append(depth_annotations, list(bottom_text)) + depth_annotations <- append(depth_annotations, list(bottom_text)) + } else { + message( + "TADA_DepthProfilePlot: Bottom depth is not available; bottom delineation omitted." + ) + } } - if (is.numeric(surfacevalue) & is.numeric(bottomvalue)) { + if (is.numeric(surfacevalue) & is.numeric(bottomvalue) && is.finite(bd)) { middle_text <- list( x = 1, - y = (surfacevalue + (bot.depth - bottomvalue)) / 2, + y = (surfacevalue + (bd - bottomvalue)) / 2, xref = "paper", yref = "y", text = "Middle", diff --git a/R/Utilities.R b/R/Utilities.R index 357648ab3..75c889cf4 100644 --- a/R/Utilities.R +++ b/R/Utilities.R @@ -2176,6 +2176,7 @@ TADA_CreateCSV <- function(.data) { #' @export #' #' @examples +#' \dontrun{ #' DeWitt_wqx3 <- dataRetrieval::readWQPdata( #' statecode = "Illinois", #' countycode = "DeWitt", characteristicName = "Nitrogen", @@ -2184,6 +2185,7 @@ TADA_CreateCSV <- function(.data) { #' ) #' #' DeWitt_wqx3_withlegacynames <- EPATADA::TADA_RenametoLegacy(DeWitt_wqx3) +#' } #' TADA_RenametoLegacy <- function(.data) { ## READ WQX3.0 column name schema from EPA Water Data WQP Quick Reference Guide diff --git a/man/TADA_DepthProfilePlot.Rd b/man/TADA_DepthProfilePlot.Rd index c0ddcd59c..02ed81bf7 100644 --- a/man/TADA_DepthProfilePlot.Rd +++ b/man/TADA_DepthProfilePlot.Rd @@ -24,10 +24,11 @@ and TADA.ActivityDepthHeightMeasure.MeasureValue. Units for all depth fields must be the same. This can be accomplished using TADA_AutoClean() or TADA_ConvertDepthUnits.} -\item{groups}{A vector of two identifiers from the TADA.ComparableDataIdentifier column. +\item{groups}{A vector of up to three identifiers from the TADA.ComparableDataIdentifier column. For example, the groups could be 'DISSOLVED OXYGEN (DO)_NA_NA_UG/L' and 'PH_NA_NA_NA'. These groups will be specific to your data frame. The TADA_IDDepthProfiles can be -used to identify available groups.} +used to identify available groups. If more than three identifiers are supplied, +only the first three are used and a warning is issued.} \item{location}{A single TADA.MonitoringLocationIdentifier to plot the depth profile. A TADA.MonitoringLocationIdentifier must be entered or an error will be returned and @@ -38,7 +39,10 @@ no depth profile will be created.} \item{depthcat}{Boolean argument indicating whether delineation between depth categories should be shown on the depth profile figure. depthcat = TRUE is the default and displays solid black lines to delineate between surface, middle, and -bottom samples and labels each section of the plot.} +bottom samples and labels each section of the plot. +When depthcat = TRUE, at least one of surfacevalue or bottomvalue must be provided +(non-NA), otherwise the function will stop. If bottom depth cannot be determined for +the selection, “Bottom” and “Middle” delineations are omitted.} \item{surfacevalue}{numeric argument. The user enters how many meters from the surface should be included in the "Surface" category. Default is surfacevalue = 2.} @@ -47,13 +51,17 @@ surface should be included in the "Surface" category. Default is surfacevalue = bottom should be included in the "Bottom" category. Default is bottomvalue = 2.} -\item{unit}{Character argument. The enters either "m" or "ft" to specify which -depth units should be used for the plot. Default is "m".} +\item{unit}{Character argument. The user enters either "m" or "ft" to specify which +depth units should be used for the plot. Default is "m". +Non-depth-parameter rows must already be in the specified unit. Depth-parameter +rows (e.g., Secchi) are converted to the specified unit for plotting when necessary.} } \value{ A depth profile plot displaying up to three parameters for a single TADA.MonitoringLocationIdentifier. Displaying depth categories is optional with the -depthcat argument. +depthcat argument. The function excludes duplicate depth-parameter rows from the +main profile series and, if any are included via groups, plots them as single +horizontal reference lines in the requested unit. } \description{ Create A Three-Characteristic Depth Profile diff --git a/man/TADA_FlagDepthCategory.Rd b/man/TADA_FlagDepthCategory.Rd index 5e78eecf6..31bc6f06a 100644 --- a/man/TADA_FlagDepthCategory.Rd +++ b/man/TADA_FlagDepthCategory.Rd @@ -15,7 +15,11 @@ TADA_FlagDepthCategory( ) } \arguments{ -\item{.data}{TADA dataframe} +\item{.data}{TADA dataframe which must include the columns +TADA.ActivityDepthHeightMeasure.MeasureValue, +TADA.ResultDepthHeightMeasure.MeasureValue, +TADA.ActivityBottomDepthHeightMeasure.MeasureValue, and +ActivityRelativeDepthName.} \item{bycategory}{character argument with options "no", "all", "surface", "middle", "bottom". The default is bycategory = "no" which means that any aggregate values @@ -52,7 +56,9 @@ be added to describe aggregation.} \item{aggregatedonly}{Boolean argument with options TRUE or FALSE. The default is aggregatedonly = FALSE which means that all results are returned. -When aggregatedonly = TRUE, only aggregate values are returned.} +When aggregatedonly = TRUE, only aggregate values are returned. +Note: aggregatedonly = TRUE has no effect when dailyagg = "none" and will raise an error +(no aggregates to return).} \item{clean}{Boolean argument with options TRUE or FALSE. The default is clean = FALSE which means that all results are returned. @@ -63,11 +69,12 @@ category are included in the returned dataframe.} The same input TADA dataframe with additional columns TADA.DepthCategory.Flag, TADA.DepthProfileAggregation.Flag, TADA.ConsolidatedDepth, TADA.ConsolidatedDepth.Bottom, and TADA.ConsolidatedDepth.Unit. The consolidated depth fields are created by reviewing -multiple WQC columns where users may input depth information. If a daily_agg = "avg", -"min", or "max", aggregated values will be identified in the TADA.ResultAggregation.Flag -column. In the case of daily_agg = "avg", additional rows to display averages will be -added to the data frame. They can be identified by the prefix ("TADA-") of -their result identifiers. +multiple WQC columns where users may input depth information. If dailyagg = "avg", +"min", or "max", aggregation status is described in TADA.DepthProfileAggregation.Flag. +In the case of dailyagg = "avg", additional rows to display averages will be +added to the data frame. Aggregated rows are identified by ResultIdentifier prefixed +with "TADA-". When dailyagg = "avg", the aggregated result retains metadata from a +deterministically selected representative record (first by ResultIdentifier within the group). } \description{ This function creates a new column, TADA.DepthCategory.Flag with values: "No diff --git a/man/TADA_IDDepthProfiles.Rd b/man/TADA_IDDepthProfiles.Rd index 9375454a1..5104a3240 100644 --- a/man/TADA_IDDepthProfiles.Rd +++ b/man/TADA_IDDepthProfiles.Rd @@ -9,14 +9,15 @@ TADA_IDDepthProfiles(.data, nresults = TRUE, nvalue = 2, aggregates = FALSE) \arguments{ \item{.data}{TADA dataframe which must include the columns ActivityStartDate, TADA.ConsolidatedDepth, TADA.ConsolidatedDepth.Unit, TADA.ConsolidatedDepth.Bottom, -TADA.ResultMeasureValue, TADA.ResultMeasureValue.UnitCode, +TADA.ResultMeasureValue, TADA.ResultMeasure.MeasureUnitCode, OrganizationIdentifier, TADA.MonitoringLocationName, TADA.MonitoringLocationIdentifier, and TADA.ComparableDataIdentifier.} \item{nresults}{Boolean argument with options "TRUE" or "FALSE". The default is nresults = TRUE, which means that the number of results for each characteristic are added within the TADA.CharacteristicsForDepthProfile column. -When nresults = FALSE.} +When nresults = FALSE, the number of results is not appended to +TADA.CharacteristicsForDepthProfile.} \item{nvalue}{numeric argument to specify the number of results required to identify a depth profile. The default is 2, which means that a depth profile will be identified @@ -52,6 +53,9 @@ A new column, TADA.CharacteristicsForDepthProfile, is created which lists the characteristics available for depth profile analysis. Using the, nresults param, users can specify whether characteristic names should be followed by the number of results available for the characteristic in parentheses. + +Inputs nresults and aggregates must be logical scalars; non-logical values will +raise an error. nvalue must be a single numeric value. } \examples{ # Load data frame diff --git a/man/TADA_RenametoLegacy.Rd b/man/TADA_RenametoLegacy.Rd index 523fc41b4..3875e9dc7 100644 --- a/man/TADA_RenametoLegacy.Rd +++ b/man/TADA_RenametoLegacy.Rd @@ -26,6 +26,7 @@ The function uses data.table::setnames() to rename columns in the dataframe by reference - in this case where there are beta names, rename to legacy names, and skip where there are no matches. } \examples{ +\dontrun{ DeWitt_wqx3 <- dataRetrieval::readWQPdata( statecode = "Illinois", countycode = "DeWitt", characteristicName = "Nitrogen", @@ -34,5 +35,6 @@ DeWitt_wqx3 <- dataRetrieval::readWQPdata( ) DeWitt_wqx3_withlegacynames <- EPATADA::TADA_RenametoLegacy(DeWitt_wqx3) +} } diff --git a/tests/testthat/test-DepthProfile.R b/tests/testthat/test-DepthProfile.R new file mode 100644 index 000000000..38db9cf03 --- /dev/null +++ b/tests/testthat/test-DepthProfile.R @@ -0,0 +1,444 @@ +# Combined testthat suite for depth helpers and functions + +# ------------------------- +# Internal helpers tests +# ------------------------- + +testthat::test_that(".depth_param_names returns expected vector", { + dp <- .depth_param_names() + testthat::expect_type(dp, "character") + testthat::expect_true(length(dp) >= 3) + testthat::expect_true(any(grepl("SECCHI", dp))) +}) + +testthat::test_that(".normalize_null_numeric handles inputs correctly", { + testthat::expect_true(is.na(.normalize_null_numeric("null"))) + testthat::expect_true(is.na(.normalize_null_numeric(NULL))) + testthat::expect_identical(.normalize_null_numeric(2), 2) + testthat::expect_identical(.normalize_null_numeric(NA_real_), NA_real_) + # Non-character non-NULL values are returned as-is + testthat::expect_identical(.normalize_null_numeric("2"), "2") +}) + +testthat::test_that(".drop_avg_aggregates filters TADA- rows", { + df <- data.frame( + ResultIdentifier = c("A1", "TADA-A2", "B1", "TADA-B2"), + val = 1:4, + stringsAsFactors = FALSE + ) + out <- .drop_avg_aggregates(df) + testthat::expect_setequal(out$ResultIdentifier, c("A1", "B1")) +}) + +testthat::test_that(".ensure_depth_flag_columns runs FlagDepthCategory and can blank flags for NA thresholds", { + # Minimal synthetic dataset (single temperature result with depth) + df <- tibble::tibble( + TADA.ActivityDepthHeightMeasure.MeasureValue = 1, + TADA.ResultDepthHeightMeasure.MeasureValue = NA_real_, + TADA.ActivityBottomDepthHeightMeasure.MeasureValue = NA_real_, + ActivityRelativeDepthName = NA_character_, + TADA.ResultDepthHeightMeasure.MeasureUnitCode = "m", + TADA.ActivityDepthHeightMeasure.MeasureUnitCode = "m", + TADA.CharacteristicName = "TEMPERATURE", + TADA.ResultMeasure.MeasureUnitCode = "DEG C", + TADA.ResultMeasureValue = 10, + ResultIdentifier = "R1", + TADA.MonitoringLocationIdentifier = "LOC1", + OrganizationIdentifier = "ORG1", + ActivityStartDate = as.Date("2020-01-01"), + # Extra fields used elsewhere + TADA.MonitoringLocationName = "Loc 1", + TADA.ActivityMediaName = "WATER", + ActivityStartDateTime = "2020-01-01T08:00:00Z", + ActivityMediaSubdivisionName = NA_character_, + TADA.ComparableDataIdentifier = "TEMPERATURE_NONE_NONE_DEG C", + TADA.MethodSpeciationName = "NONE", + TADA.ResultSampleFractionText = "NONE", + TADA.MonitoringLocationTypeName = "River/Stream" + ) + + out1 <- .ensure_depth_flag_columns( + df, + surfacevalue = 2, + bottomvalue = 2, + allow_na_thresholds = FALSE + ) + testthat::expect_true(all( + c( + "TADA.ConsolidatedDepth", + "TADA.ConsolidatedDepth.Unit", + "TADA.ConsolidatedDepth.Bottom", + "TADA.DepthCategory.Flag" + ) %in% + names(out1) + )) + testthat::expect_false(all(is.na(out1$TADA.DepthCategory.Flag))) # some flag assigned + + out2 <- .ensure_depth_flag_columns( + df, + surfacevalue = NA_real_, + bottomvalue = NA_real_, + allow_na_thresholds = TRUE + ) + testthat::expect_true(all(is.na(out2$TADA.DepthCategory.Flag))) +}) + +# ------------------------- +# Fixtures +# ------------------------- + +make_synth_profile_only_df <- function() { + # Three temperature rows (profile only, no depth-parameter) + tibble::tibble( + TADA.ActivityDepthHeightMeasure.MeasureValue = c(0.5, 5, 9), + TADA.ResultDepthHeightMeasure.MeasureValue = c( + NA_real_, + NA_real_, + NA_real_ + ), + TADA.ActivityBottomDepthHeightMeasure.MeasureValue = c( + NA_real_, + NA_real_, + NA_real_ + ), + ActivityRelativeDepthName = NA_character_, + TADA.ResultDepthHeightMeasure.MeasureUnitCode = c("m", "m", "m"), + TADA.ActivityDepthHeightMeasure.MeasureUnitCode = c("m", "m", "m"), + TADA.CharacteristicName = c("TEMPERATURE", "TEMPERATURE", "TEMPERATURE"), + TADA.ResultMeasure.MeasureUnitCode = c("DEG C", "DEG C", "DEG C"), + TADA.ResultMeasureValue = c(10, 5, 1), + ResultIdentifier = c("T1", "T2", "T3"), + TADA.MonitoringLocationIdentifier = c("LOC1", "LOC1", "LOC1"), + OrganizationIdentifier = c("ORG1", "ORG1", "ORG1"), + ActivityStartDate = as.Date(rep("2020-01-01", 3)), + TADA.MonitoringLocationName = "Loc 1", + TADA.ActivityMediaName = "WATER", + ActivityStartDateTime = "2020-01-01T08:00:00Z", + ActivityMediaSubdivisionName = NA_character_, + TADA.ComparableDataIdentifier = rep("TEMPERATURE_NONE_NONE_DEG C", 3), + TADA.MethodSpeciationName = "NONE", + TADA.ResultSampleFractionText = "NONE", + TADA.MonitoringLocationTypeName = "River/Stream" + ) +} + +make_synth_depth_df_meters <- function() { + # Synthetic profile: 3 depths for temperature + 1 depth-param (secchi) row (in meters) + tibble::tibble( + # Use activity depth; leave result depth NA + TADA.ActivityDepthHeightMeasure.MeasureValue = c(0.5, 5, 9, NA), + TADA.ResultDepthHeightMeasure.MeasureValue = c( + NA_real_, + NA_real_, + NA_real_, + NA_real_ + ), + TADA.ActivityBottomDepthHeightMeasure.MeasureValue = c( + NA_real_, + NA_real_, + NA_real_, + NA_real_ + ), + ActivityRelativeDepthName = NA_character_, + TADA.ResultDepthHeightMeasure.MeasureUnitCode = c("m", "m", "m", "m"), # not used (result depth NA) + TADA.ActivityDepthHeightMeasure.MeasureUnitCode = c("m", "m", "m", "m"), + TADA.CharacteristicName = c( + "TEMPERATURE", + "TEMPERATURE", + "TEMPERATURE", + "DEPTH, SECCHI DISK DEPTH" + ), + TADA.ResultMeasure.MeasureUnitCode = c("DEG C", "DEG C", "DEG C", "m"), + TADA.ResultMeasureValue = c(10, 5, 1, 1.2), # secchi in meters + ResultIdentifier = c("T1", "T2", "T3", "S1"), + TADA.MonitoringLocationIdentifier = c("LOC1", "LOC1", "LOC1", "LOC1"), + OrganizationIdentifier = c("ORG1", "ORG1", "ORG1", "ORG1"), + ActivityStartDate = as.Date(rep("2020-01-01", 4)), + # fields used elsewhere + TADA.MonitoringLocationName = "Loc 1", + TADA.ActivityMediaName = "WATER", + ActivityStartDateTime = "2020-01-01T08:00:00Z", + ActivityMediaSubdivisionName = NA_character_, + TADA.ComparableDataIdentifier = c( + "TEMPERATURE_NONE_NONE_DEG C", + "TEMPERATURE_NONE_NONE_DEG C", + "TEMPERATURE_NONE_NONE_DEG C", + "DEPTH, SECCHI DISK DEPTH_NONE_NONE_M" + ), + TADA.MethodSpeciationName = "NONE", + TADA.ResultSampleFractionText = "NONE", + TADA.MonitoringLocationTypeName = "River/Stream" + ) +} + +make_synth_depth_df_mixed_units_annotated <- function() { + # Start with meters-only, annotate with FlagDepthCategory, then mutate the Secchi row to have ft unit + df_m <- make_synth_depth_df_meters() + df_ann <- TADA_FlagDepthCategory(df_m, dailyagg = "none") + is_depth_param <- df_ann$TADA.CharacteristicName %in% .depth_param_names() + # Convert the depth-parameter row "appearance" to feet for plotting conversion path + df_ann$TADA.ResultMeasureValue[is_depth_param] <- 4 + df_ann$TADA.ResultMeasure.MeasureUnitCode[is_depth_param] <- "ft" + df_ann$TADA.ConsolidatedDepth.Unit[is_depth_param] <- "ft" # force mismatch with figure unit ("m") + df_ann$TADA.ConsolidatedDepth[is_depth_param] <- 4 # arbitrary ft value; plot will convert using ResultMeasureValue + df_ann$TADA.ComparableDataIdentifier[ + is_depth_param + ] <- "DEPTH, SECCHI DISK DEPTH_NONE_NONE_M" + df_ann +} + +# ------------------------- +# TADA_FlagDepthCategory tests +# ------------------------- + +testthat::test_that("TADA_FlagDepthCategory assigns Surface/Middle/Bottom with bycategory = 'no'", { + df <- make_synth_depth_df_meters() + out <- TADA_FlagDepthCategory( + df, + bycategory = "no", + surfacevalue = 2, + bottomvalue = 2, + dailyagg = "none" + ) + # Filter to temperature rows + temp <- out[out$TADA.CharacteristicName == "TEMPERATURE", ] + flags <- temp$TADA.DepthCategory.Flag + testthat::expect_true(all(c("Surface", "Middle", "Bottom") %in% flags)) +}) + +testthat::test_that("TADA_FlagDepthCategory filters categories with bycategory filters", { + df <- make_synth_depth_df_meters() + out_surface <- TADA_FlagDepthCategory( + df, + bycategory = "surface", + dailyagg = "none" + ) + testthat::expect_true(all(out_surface$TADA.DepthCategory.Flag == "Surface")) + out_bottom <- TADA_FlagDepthCategory( + df, + bycategory = "bottom", + dailyagg = "none" + ) + testthat::expect_true(all(out_bottom$TADA.DepthCategory.Flag == "Bottom")) +}) + +testthat::test_that("TADA_FlagDepthCategory dailyagg = 'none' with aggregatedonly = TRUE errors", { + df <- make_synth_depth_df_meters() + testthat::expect_error(TADA_FlagDepthCategory( + df, + dailyagg = "none", + aggregatedonly = TRUE + )) +}) + +testthat::test_that("TADA_FlagDepthCategory dailyagg = 'avg' returns aggregate with prefix when aggregatedonly = TRUE", { + df <- make_synth_depth_df_meters() + out <- TADA_FlagDepthCategory( + df, + bycategory = "no", + dailyagg = "avg", + aggregatedonly = TRUE + ) + testthat::expect_true(all(grepl("^TADA-", out$ResultIdentifier))) + testthat::expect_equal(nrow(out), 1L) # single group aggregate (entire water column) +}) + +testthat::test_that("TADA_FlagDepthCategory dailyagg = 'min' and 'max' select one row each", { + df <- make_synth_depth_df_meters() + out_min <- TADA_FlagDepthCategory( + df, + bycategory = "no", + dailyagg = "min", + aggregatedonly = TRUE + ) + out_max <- TADA_FlagDepthCategory( + df, + bycategory = "no", + dailyagg = "max", + aggregatedonly = TRUE + ) + testthat::expect_equal(nrow(out_min), 1L) + testthat::expect_equal(nrow(out_max), 1L) + # min should pick the lowest temperature value (1 at bottom depth) + testthat::expect_equal(out_min$TADA.ResultMeasureValue, 1) + # max should pick the highest temperature value (10 at surface) + testthat::expect_equal(out_max$TADA.ResultMeasureValue, 10) +}) + +testthat::test_that("TADA_FlagDepthCategory clean = TRUE keeps only depth categories", { + df <- make_synth_depth_df_meters() + out <- TADA_FlagDepthCategory(df, clean = TRUE) + testthat::expect_true(all( + out$TADA.DepthCategory.Flag %in% c("Surface", "Middle", "Bottom") + )) +}) + +testthat::test_that("TADA_FlagDepthCategory stops on multiple depth units", { + df <- make_synth_depth_df_meters() + # Inject a second unit in the temperature rows by populating result depth with different unit + df$TADA.ResultDepthHeightMeasure.MeasureValue <- df$TADA.ActivityDepthHeightMeasure.MeasureValue + df$TADA.ResultDepthHeightMeasure.MeasureUnitCode <- c("m", "m", "ft", "ft") + testthat::expect_error(TADA_FlagDepthCategory(df)) +}) + +testthat::test_that("TADA_FlagDepthCategory handles data with no depth info", { + df <- make_synth_depth_df_meters() + # Wipe out all depth fields so depth.count == 0 + df$TADA.ActivityDepthHeightMeasure.MeasureValue <- NA_real_ + df$TADA.ResultDepthHeightMeasure.MeasureValue <- NA_real_ + out <- TADA_FlagDepthCategory(df) + testthat::expect_true(all(is.na(out$TADA.ConsolidatedDepth))) + testthat::expect_true(all(is.na(out$TADA.ConsolidatedDepth.Bottom))) + testthat::expect_true(all(is.na(out$TADA.DepthCategory.Flag))) +}) + +# ------------------------- +# TADA_IDDepthProfiles tests +# ------------------------- + +testthat::test_that("TADA_IDDepthProfiles lists characteristics with counts (default)", { + df <- make_synth_depth_df_meters() + out <- TADA_IDDepthProfiles( + df, + nresults = TRUE, + nvalue = 2, + aggregates = FALSE + ) + testthat::expect_true(all( + c( + "TADA.MonitoringLocationIdentifier", + "TADA.MonitoringLocationName", + "OrganizationIdentifier", + "ActivityStartDate", + "TADA.CharacteristicsForDepthProfile" + ) %in% + names(out) + )) + # Should include temperature comparable ID with count "(3)" + testthat::expect_true(any(grepl( + "TEMPERATURE_NONE_NONE_DEG C \\(3\\)", + out$TADA.CharacteristicsForDepthProfile + ))) +}) + +testthat::test_that("TADA_IDDepthProfiles without counts and higher threshold", { + df <- make_synth_depth_df_meters() + # With nvalue = 3, temperature group qualifies (3 depths) + out <- TADA_IDDepthProfiles( + df, + nresults = FALSE, + nvalue = 3, + aggregates = FALSE + ) + testthat::expect_true(any(grepl( + "TEMPERATURE_NONE_NONE_DEG C", + out$TADA.CharacteristicsForDepthProfile + ))) + # With nvalue = 4, temperature is dropped; depth-parameter remains only if a profile is present + out2 <- TADA_IDDepthProfiles( + df, + nresults = FALSE, + nvalue = 4, + aggregates = FALSE + ) + # Because the function also requires MeanResults > 1 across the group, + # and only secchi has 1 depth, the whole group will be filtered out. + testthat::expect_equal(nrow(out2), 0) +}) + +testthat::test_that("TADA_IDDepthProfiles respects aggregates = FALSE by ignoring TADA- average rows", { + df <- make_synth_depth_df_meters() + + # First, annotate the base data with consolidated depth/category columns + df_annot <- TADA_FlagDepthCategory(df, dailyagg = "none") + + # Baseline: no TADA- rows present + base_out <- TADA_IDDepthProfiles( + df_annot, + nresults = TRUE, + aggregates = FALSE + ) + + # Create an averaged aggregate row from the annotated data and append + avg_only <- TADA_FlagDepthCategory( + df_annot, + bycategory = "no", + dailyagg = "avg", + aggregatedonly = TRUE + ) + df2 <- dplyr::bind_rows(df_annot, avg_only) + + # Now run IDDepthProfiles with aggregates = FALSE; TADA- row should be ignored + out <- TADA_IDDepthProfiles(df2, nresults = TRUE, aggregates = FALSE) + + # Normalize for comparison: + normalize_df <- function(x) { + x |> + dplyr::ungroup() |> + dplyr::mutate( + TADA.CharacteristicsForDepthProfile = stringr::str_squish( + TADA.CharacteristicsForDepthProfile + ) + ) |> + dplyr::arrange( + TADA.MonitoringLocationIdentifier, + OrganizationIdentifier, + ActivityStartDate, + TADA.CharacteristicsForDepthProfile + ) + } + + out_norm <- normalize_df(out) + base_norm <- normalize_df(base_out) + + testthat::expect_equal(nrow(out_norm), nrow(base_norm)) + testthat::expect_equal(names(out_norm), names(base_norm)) + testthat::expect_equal(out_norm, base_norm, ignore_attr = TRUE) +}) + +testthat::test_that("TADA_DepthProfilePlot checks non-depth-parameter units against `unit`", { + testthat::skip_if_not_installed("plotly") + # Profile-only data in meters; asking for ft should error + df <- make_synth_profile_only_df() + testthat::expect_error(TADA_DepthProfilePlot( + df, + groups = c("TEMPERATURE_NONE_NONE_DEG C"), + location = "LOC1", + activity_date = as.Date("2020-01-01"), + depthcat = FALSE, + unit = "ft" + )) +}) + +testthat::test_that("TADA_DepthProfilePlot argument validation for missing inputs", { + testthat::skip_if_not_installed("plotly") + df <- make_synth_profile_only_df() + testthat::expect_error(TADA_DepthProfilePlot(df)) # missing location/date/groups + testthat::expect_error(TADA_DepthProfilePlot( + df, + groups = c("TEMPERATURE_NONE_NONE_DEG C"), + location = "NOT_IN_DATA", + activity_date = as.Date("2020-01-01") + )) + testthat::expect_error(TADA_DepthProfilePlot( + df, + groups = c("NOT_A_GROUP"), + location = "LOC1", + activity_date = as.Date("2020-01-01") + )) +}) + +testthat::test_that("TADA_DepthProfilePlot depthcat requires at least one threshold when TRUE", { + testthat::skip_if_not_installed("plotly") + df <- make_synth_profile_only_df() + testthat::expect_error(TADA_DepthProfilePlot( + df, + groups = c("TEMPERATURE_NONE_NONE_DEG C"), + location = "LOC1", + activity_date = as.Date("2020-01-01"), + depthcat = TRUE, + surfacevalue = NA_real_, + bottomvalue = NA_real_, + unit = "m" + )) +})