diff --git a/DESCRIPTION b/DESCRIPTION index 51e3deb..e8cccc2 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: hydrofabric3D Title: hydrofabric3D -Version: 0.1.82 +Version: 0.1.84 Authors@R: c(person("Mike", "Johnson", role = c("aut"), email = "mikecp11@gmail.com"), person("Angus", "Watters", role = c("aut", "cre"), email = "anguswatters@gmail.com"), person("Arash", "Modaresi", role = "ctb"), diff --git a/NAMESPACE b/NAMESPACE index 17bdd39..db1eaf6 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -20,8 +20,10 @@ export(adjust_transect_lengths) export(align_banks_and_bottoms) export(classify_points) export(classify_points2) +export(cluster_dissolve) export(compare_cs_validity) export(cross_section_pts) +export(cs_arrange) export(cut_cross_sections) export(cut_transect) export(drop_incomplete_cs_pts) @@ -40,6 +42,7 @@ export(get_improved_cs_pts) export(get_point_type_counts) export(get_relief) export(get_start_node) +export(get_transect_extension_distances_to_polygons) export(get_transects) export(get_unique_tmp_ids) export(get_validity_tally) @@ -157,6 +160,7 @@ importFrom(sf,st_line_sample) importFrom(sf,st_segmentize) importFrom(sf,st_set_geometry) importFrom(sf,st_transform) +importFrom(sf,st_union) importFrom(smoothr,densify) importFrom(smoothr,smooth) importFrom(stats,median) diff --git a/R/cs_eval.R b/R/cs_angles.R similarity index 100% rename from R/cs_eval.R rename to R/cs_angles.R diff --git a/R/cs_bank_attributes.R b/R/cs_bank_attributes.R new file mode 100644 index 0000000..38c8509 --- /dev/null +++ b/R/cs_bank_attributes.R @@ -0,0 +1,762 @@ + +utils::globalVariables( + c(".", "hy_id", "cs_id", "pt_id", "Z", "middle_index", "point_type", "minZ", + "maxZ", "minZ_bottom", "maxZ_left_bank", "maxZ_right_bank", "valid_left_bank", + "valid_right_bank", "bottom", "left_bank", "right_bank", "valid_banks", + "relative_distance", "cs_lengthm", "default_middle", "has_relief", + "max_relief", "braid_id", "geometry", + + "comid", "fromnode", "tonode", + "tocomid", "divergence", "cycle_id", "node", "braid_vector", "totdasqkm", + "changed", "relative_position", "head_distance", "tail_distance", + "component_id", "cs_measure", "ds_distance", "along_channel", "euclid_dist", + "sinuosity", "points_per_cs", "Z_at_bottom", "lower_bound", "upper_bound", + "ge_bottom", "is_near_bottom", "pts_near_bottom", "total_valid_pts", + "pct_near_bottom", + "member_braids", "braid_members", "diff_pts", "is_extended", + "new_cs_id", "split_braid_ids", + + "braid_length", + "crosswalk_id", + "lengthm", + "check_z_values", + "geom", + "is_same_Z", + "is_multibraid", + "channel", "unique_count", + "left_bank_count", "right_bank_count", "channel_count", "bottom_count", + "terminalID", + "tmp_id", + "make_geoms_to_cut_plot", + "Y", "improved", "length_vector_col", "median", "min_ch", "new_validity_score", + "old_validity_score", "transects", "validity_score", "x", + "A", "DEPTH", "DINGMAN_R", "TW", "X", "X_end", "X_start", "Y_end", "Y_start", + "ahg_a", "ahg_index", "ahg_x", "ahg_y", + "bottom_end", "bottom_length", "bottom_midpoint", + "bottom_start", "cs_partition", "distance_interval", "fixed_TW", + "has_new_DEPTH", "has_new_TW", "ind", "is_dem_point", "left_max", + "left_start", "max_right_position", "new_DEPTH", "new_TW", "next_X_is_missing", "next_Y_is_missing", + "parabola", "partition", "prev_X_is_missing", + "prev_Y_is_missing", "right_start", "right_start_max", "start_or_end", "start_pt_id", + "cs_source", + "partition_lengthm", "left_fema_index", "right_fema_index", + "left_is_within_fema", "right_is_within_fema", "left_distance", "right_distance", + "new_cs_lengthm", "polygon_index", + "crosswalk_id", "extend_invalid_transects2", + "anchors", "deriv_type", "edge", "extension_distance", + "left_is_extended", "right_is_extended", "to_node", "verbose", + "toindid", "indid", "toid", "is", "internal_is_braided2" + ) +) + + +#' @title Get the count of each point type in a set of cross section points +#' @description get_point_type_counts() will create a dataframe providing the counts of every point_type for each hy_id/cs_id in a set of classified cross section points (output of classify_pts()) +#' @param classified_pts dataframe or sf dataframe, cross section points with a "hy_id", and "cs_id" columns as well as a 'point_type' column containing the values: "bottom", "left_bank", "right_bank", and "channel" +#' @param crosswalk_id character, ID column +#' @return dataframe or sf dataframe with hy_id, cs_id, and _count columns for each point_type +#' @importFrom sf st_drop_geometry +#' @importFrom dplyr group_by count ungroup summarize filter n_distinct select slice left_join relocate all_of last_col +#' @importFrom tidyr pivot_wider pivot_longer +#' @export +get_point_type_counts <- function(classified_pts, crosswalk_id = NULL) { + + # classified_pts <- cs_pts %>% hydrofabric3D::classify_points() + # add = F + # classified_pts = classified_pts2 + # add = TRUE + + # make a unique ID if one is not given (NULL 'crosswalk_id') + if(is.null(crosswalk_id)) { + crosswalk_id <- 'hydrofabric_id' + } + + REQUIRED_COLS <- c(crosswalk_id, "cs_id", "point_type") + + if (!all(REQUIRED_COLS %in% names(classified_pts))) { + missing_cols <- REQUIRED_COLS[which(!REQUIRED_COLS %in% names(classified_pts))] + stop("'classified_pts' is missing one or more of the required columns:\n > ", + paste0(missing_cols, collapse = "\n > ")) + } + + # type checking + if (!any(class(classified_pts) %in% c("sf", "tbl_df", "tbl", "data.frame"))) { + stop("Invalid argument type, 'classified_pts' must be of type 'sf', 'tbl_df', 'tbl' or 'data.frame', given type was '", + class(classified_pts), "'") + } + + # create a copy of the input dataset, add a tmp_id column + stage_df <- + classified_pts %>% + sf::st_drop_geometry() %>% + hydrofabric3D::add_tmp_id(x = crosswalk_id) + + # # create a reference dataframe with all possible combinations of tmp_id and point_type + # reference_df <- expand.grid( + # tmp_id = unique(stage_df$tmp_id), + # point_type = unique(stage_df$point_type) + # ) + + # get a count of the point_types in each hy_id/cs_id group (i.e. each cross section) + point_type_counts <- + stage_df %>% + dplyr::group_by(tmp_id, point_type) %>% + dplyr::count() %>% + dplyr::ungroup() %>% + dplyr::mutate( + # add levels to the point_type column so if a given point_type + # is NOT in the cross seciton points, then it will be added with NAs in the subsequent pivot_wider + point_type = factor(point_type, levels = c("left_bank", "bottom", "right_bank", "channel")) + ) + + # pivot data wider to get implicit missing groups with NA values + point_type_counts <- + point_type_counts %>% + tidyr::pivot_wider( + names_from = point_type, + values_from = n, + names_expand = TRUE + ) + + point_type_counts <- + point_type_counts %>% + tidyr::pivot_longer( + cols = c(bottom, channel, right_bank, left_bank), + names_to = "point_type", + values_to = "n" + ) %>% + dplyr::mutate(n = ifelse(is.na(n), 0, n)) + + # # Join the count of point types in each group with the reference_df to + # # get rows of NA values for any group that is missing a specific point_type + # point_type_counts <- + # point_type_counts %>% + # dplyr::right_join(reference_df, by = c("tmp_id", "point_type")) + + # # For any cross section group that does NOT contain a point type, + # # the point type will be NA and here we replace those NAs with 0 + # point_type_counts$n[is.na(point_type_counts$n)] <- 0 + + # # make sure that all tmp_id groups have all 4 point types + check_counts <- + point_type_counts %>% + dplyr::group_by(tmp_id) %>% + dplyr::summarize(unique_count = dplyr::n_distinct(point_type)) %>% + dplyr::filter(unique_count == 4) + + # if the number of distinct points types in each cross section is not 4, raise an error + if (length(unique(stage_df$tmp_id)) != nrow(check_counts)) { + stop("Error validating each hy_id/cs_id cross section contains exactly 4 distinct values in the 'point_type' column") + } + + # get the hy_id, cs_id for each tmp_id to cross walk back to just using hy_id/cs_id + stage_df <- + stage_df %>% + dplyr::select(tmp_id, dplyr::any_of(crosswalk_id), cs_id) %>% + # dplyr::select(tmp_id, hy_id, cs_id) %>% + dplyr::group_by(tmp_id) %>% + dplyr::slice(1) %>% + dplyr::ungroup() + + # convert the column of point types to be a column for each point type that + # has the point type count for each hy_id/cs_id (cross section) + point_type_counts <- + point_type_counts %>% + tidyr::pivot_wider(names_from = point_type, + names_glue = "{point_type}_count", + values_from = n) %>% + dplyr::left_join( + stage_df, + by = "tmp_id" + ) %>% + dplyr::select( + dplyr::any_of(crosswalk_id), + cs_id, + left_bank_count, right_bank_count, channel_count, bottom_count + ) + + # point_type_counts %>% + # dplyr::arrange(-right_bank_count) + + return(point_type_counts) + +} + + +#' @title Add the count of each point type as a column to a dataframe of section points +#' @description add_point_type_counts() will add columns to the input dataframe with the counts of every point_type for each hy_id/cs_id in the input dataframe of classified cross section points (output of classify_pts()) +#' @param classified_pts dataframe or sf dataframe, cross section points with a "hy_id", and "cs_id" columns as well as a 'point_type' column containing the values: "bottom", "left_bank", "right_bank", and "channel" +#' @param crosswalk_id character, ID column +#' @return dataframe or sf dataframe with "_count" columns added +#' @importFrom sf st_drop_geometry +#' @importFrom dplyr group_by count ungroup summarize filter n_distinct select slice left_join relocate all_of last_col +#' @importFrom tidyr pivot_wider pivot_longer +#' @export +add_point_type_counts <- function(classified_pts, crosswalk_id = NULL) { + + # classified_pts <- cs_pts %>% hydrofabric3D::classify_points() + # add = F + # classified_pts = classified_pts2 + # add = TRUE + + # make a unique ID if one is not given (NULL 'crosswalk_id') + if(is.null(crosswalk_id)) { + crosswalk_id <- 'hydrofabric_id' + } + + # type checking + if (!any(class(classified_pts) %in% c("sf", "tbl_df", "tbl", "data.frame"))) { + stop("Invalid argument type, 'classified_pts' must be of type 'sf', 'tbl_df', 'tbl' or 'data.frame', given type was '", + class(classified_pts), "'") + } + + # create a copy of the input dataset, add a tmp_id column + stage_df <- + classified_pts %>% + sf::st_drop_geometry() %>% + hydrofabric3D::add_tmp_id(x = crosswalk_id) + + # # create a reference dataframe with all possible combinations of tmp_id and point_type + # reference_df <- expand.grid( + # tmp_id = unique(stage_df$tmp_id), + # point_type = unique(stage_df$point_type) + # ) + + # get a count of the point_types in each hy_id/cs_id group (i.e. each cross section) + point_type_counts <- + stage_df %>% + dplyr::group_by(tmp_id, point_type) %>% + dplyr::count() %>% + dplyr::ungroup() %>% + dplyr::mutate( + # add levels to the point_type column so if a given point_type + # is NOT in the cross seciton points, then it will be added with NAs in the subsequent pivot_wider + point_type = factor(point_type, levels = c("left_bank", "bottom", "right_bank", "channel")) + ) + + # pivot data wider to get implicit missing groups with NA values + point_type_counts <- + point_type_counts %>% + tidyr::pivot_wider( + names_from = point_type, + values_from = n, + names_expand = TRUE + ) + + point_type_counts <- + point_type_counts %>% + tidyr::pivot_longer( + cols = c(bottom, channel, right_bank, left_bank), + names_to = "point_type", + values_to = "n" + ) %>% + dplyr::mutate(n = ifelse(is.na(n), 0, n)) + + # # Join the count of point types in each group with the reference_df to + # # get rows of NA values for any group that is missing a specific point_type + # point_type_counts <- + # point_type_counts %>% + # dplyr::right_join(reference_df, by = c("tmp_id", "point_type")) + + # # For any cross section group that does NOT contain a point type, + # # the point type will be NA and here we replace those NAs with 0 + # point_type_counts$n[is.na(point_type_counts$n)] <- 0 + + # # make sure that all tmp_id groups have all 4 point types + check_counts <- + point_type_counts %>% + dplyr::group_by(tmp_id) %>% + dplyr::summarize(unique_count = dplyr::n_distinct(point_type)) %>% + dplyr::filter(unique_count == 4) + + # if the number of distinct points types in each cross section is not 4, raise an error + if (length(unique(stage_df$tmp_id)) != nrow(check_counts)) { + stop("Error validating each hy_id/cs_id cross section contains exactly 4 distinct values in the 'point_type' column") + } + + # get the hy_id, cs_id for each tmp_id to cross walk back to just using hy_id/cs_id + stage_df <- + stage_df %>% + dplyr::select(tmp_id, dplyr::any_of(crosswalk_id), cs_id) %>% + dplyr::group_by(tmp_id) %>% + dplyr::slice(1) %>% + dplyr::ungroup() + + # convert the column of point types to be a column for each point type that + # has the point type count for each hy_id/cs_id (cross section) + point_type_counts <- + point_type_counts %>% + tidyr::pivot_wider( + names_from = point_type, + names_glue = "{point_type}_count", + values_from = n + ) %>% + dplyr::left_join( + stage_df, + by = "tmp_id" + ) %>% + dplyr::select( + dplyr::any_of(crosswalk_id), + cs_id, + left_bank_count, right_bank_count, channel_count, bottom_count + ) + + # Join the point type counts to the original dataframe + classified_pts <- + classified_pts %>% + dplyr::left_join( + point_type_counts, + by = c(crosswalk_id, "cs_id") + # by = c("hy_id", "cs_id") + ) + + # check if any of the columns in 'classified_pts' are geometry types and move them to the end column if they do exist + classified_pts <- move_geometry_to_last(classified_pts) + + return(classified_pts) +} + +#' @title Adds attributes about the banks of each cross section in a dataframe of cross section points +#' Function adds "bottom", "left_bank", "right_bank" columns that are +#' the Z values of the "lowest" bottom point, and the "highest" left and right bank Z values, respectively. If there are +#' And also a "valid_banks" column is added that is TRUE if the hy_id/cs_id set of cross section point has at least 1 bottom point with +#' at least 1 left bank point AND 1 right bank point that are above the lowest "bottom" point. +#' @param classified_pts sf or dataframe of points with "hy_id", "cs_id", and "point_type" columns. Output of hydrofabric3D::classify_pts() +#' @return sf or dataframe with added "bottom", "left_bank", "right_bank", and "valid_banks" columns +#' @importFrom dplyr mutate case_when filter select group_by summarise ungroup left_join +#' @importFrom tidyr pivot_wider +add_bank_attributes <- function( + classified_pts +) { + + # classified_pts <- output_pts + + # type checking, throw an error if not "sf", "tbl_df", "tbl", or "data.frame" + if (!any(class(classified_pts) %in% c("sf", "tbl_df", "tbl", "data.frame"))) { + stop("Invalid argument type, 'classified_pts' must be of type 'sf', 'tbl_df', 'tbl' or 'data.frame', given type was '", + class(classified_pts), "'") + } + + # Add columns with the counts of point types + classified_pts <- hydrofabric3D::add_point_type_counts(classified_pts) + + # TODO: Need to add code that will just set aside the geometries and add them back to the final output dataset + # For now we will just drop geometries as safety precaution (as to not summarize() on a massive number of sf geometries) + classified_pts <- sf::st_drop_geometry(classified_pts) + + # Add a valid_count column which is TRUE + # if a hy_id/cs_id has a bottom point AND atleast 1 left and right bank + classified_pts <- + classified_pts %>% + dplyr::mutate( + valid_count = dplyr::case_when( + (bottom_count > 0 & + left_bank_count > 0 & + right_bank_count > 0) ~ TRUE, + TRUE ~ FALSE + ) + ) + + # Add minimum bottom Z, max left and right bank Z, and + # flags noting if the left/right banks are "valid" (i.e. max left/right bank values are greater than the bottom Z) + bank_validity <- + classified_pts %>% + dplyr::filter(point_type %in% c("bottom", "left_bank", "right_bank")) %>% + # dplyr::filter(point_type %in% c("left_bank", "right_bank")) %>% + dplyr::select(hy_id, cs_id, pt_id, Z, point_type) %>% + dplyr::group_by(hy_id, cs_id, point_type) %>% + dplyr::summarise( + minZ = min(Z, na.rm = TRUE), + maxZ = max(Z, na.rm = TRUE) + ) %>% + dplyr::ungroup() %>% + tidyr::pivot_wider( + names_from = point_type, + values_from = c(minZ, maxZ) + ) %>% + dplyr::select(hy_id, cs_id, + bottom = minZ_bottom, + left_bank = maxZ_left_bank, + right_bank = maxZ_right_bank + ) + + # Get logical values of the bank validity on both sides + bank_validity <- + bank_validity %>% + dplyr::mutate( + # bottom = ifelse(is.na(bottom), 0, bottom), # Old way was to set the NA left/bank/bottom Z values to 0 but i think this could lead to problems with small number of edge cases + # right_bank = ifelse(is.na(right_bank), 0, right_bank), + # left_bank = ifelse(is.na(left_bank), 0, left_bank), + valid_left_bank = dplyr::case_when( + (left_bank > bottom) & (!is.na(left_bank)) ~ TRUE, # Old method used: left_bank > bottom ~ TRUE, + TRUE ~ FALSE + ), + valid_right_bank = dplyr::case_when( + (right_bank > bottom) & (!is.na(right_bank)) ~ TRUE, # Old method used: right_bank > bottom ~ TRUE, + TRUE ~ FALSE + ), + valid_banks = valid_left_bank & valid_right_bank + ) + # tidyr::pivot_longer(cols = c(right_bank, left_bank), + # names_to = "point_type", values_to = "max_Z_at_banks") %>% + # dplyr::mutate(max_Z_at_banks = ifelse(is.na(max_Z_at_banks), 0, max_Z_at_banks)) + + # Add the following columns to the final output data: + # bottom - numeric, max depth (depth of lowest "bottom" point) + # left_bank - numeric, min depth of left bank (depth of the highest "left_bank" point). If no left_bank points exist, value is 0. + # right_bank - numeric, min depth of right bank (depth of the highest "right_bank" point). If no right_bank points exist, value is 0. + # valid_banks - logical, TRUE if the hy_id/cs_id has a bottom point with atleast 1 leftbank point AND 1 rightbank point that are above the lowest "bottom" point + classified_pts <- + classified_pts %>% + dplyr::left_join( + dplyr::select(bank_validity, + hy_id, cs_id, + bottom, left_bank, right_bank, + valid_left_bank, valid_right_bank, valid_banks + ), + by = c("hy_id", "cs_id") + ) + # %>% + # dplyr::mutate(valid_banks2 = valid_left_bank & valid_right_bank) + + # # return simple dataset if add is FALSE + # if(!add) { + # # subset to just hy_id/cs_id and added bank attributes to + # # return a dataframe with unique hy_id/cs_ids for each row + # bank_validity %>% + # sf::st_drop_geometry() %>% # drop sf geometry as a safety precaution to make sure returned data is a dataframe + # dplyr::select(hy_id, cs_id, + # bottom, left_bank, right_bank, + # valid_banks) + # + # return(bank_validity) + # + # } + + # select specific rows and returns + classified_pts <- + classified_pts %>% + dplyr::select(hy_id, cs_id, pt_id, Z, + relative_distance, cs_lengthm, + class, point_type, + bottom, left_bank, right_bank, valid_banks) + + # check if any of the columns in 'classified_pts' are geometry types and move them to the end column if they do exist + classified_pts <- move_geometry_to_last(classified_pts) + + return(classified_pts) + +} + +#' @title Get attributes about the banks of each cross section in a dataframe of cross section points +#' Given a set of cross section points with point_type column, return a dataframe of the unique hy_id/cs_ids with the following calculated columns: +#' "bottom", "left_bank", "right_bank" columns which are the Z values of the "lowest" bottom point, and the "highest" left and right bank Z values, respectively. +#' And a "valid_banks" column indicating whether the hy_id/cs_id set of cross section point has at least a signle bottom point with +#' at least 1 left bank point AND 1 right bank point that are above the lowest "bottom" point. +#' @param classified_pts sf or dataframe of points with "hy_id", "cs_id", and "point_type" columns. Output of hydrofabric3D::classify_pts() +#' @param crosswalk_id character, ID column +#' @return dataframe with each row being a unique hy_id/cs_id with "bottom", "left_bank", "right_bank", and "valid_banks" values for each hy_id/cs_id. +#' @importFrom dplyr mutate case_when filter select group_by summarise ungroup left_join rename any_of across bind_rows +#' @importFrom tidyr pivot_wider +#' @export +get_bank_attributes <- function( + classified_pts, + crosswalk_id = NULL +) { + # ----------------------------------------------------- + # classified_pts <- data.frame( + # hy_id = c("A", "A", "A", "B", "B", "B"), + # cs_id = c(1, 1, 1, 1, 1, 1), + # pt_id = c(1, 2, 3, 1, 2, 3), + # point_type = c('channel', 'channel', 'channel', "left_bank", "bottom", "right_bank"), + # Z = c(1, 5, 8, 10, 2, 12) + # ) + # crosswalk_id = "hy_id" + # ----------------------------------------------------- + + # type checking, throw an error if not "sf", "tbl_df", "tbl", or "data.frame" + if (!any(class(classified_pts) %in% c("sf", "tbl_df", "tbl", "data.frame"))) { + stop("Invalid argument type, 'classified_pts' must be of type 'sf', 'tbl_df', 'tbl' or 'data.frame', given type was '", + class(classified_pts), "'") + } + + # Add columns with the counts of point types + classified_pts <- add_point_type_counts(classified_pts, crosswalk_id) + # classified_pts <- hydrofabric3D::add_point_type_counts2(classified_pts, crosswalk_id) + + # TODO: Need to add code that will just set aside the geometries and add them back to the final output dataset + # For now we will just drop geometries as safety precaution (as to not summarize() on a massive number of sf geometries) + classified_pts <- sf::st_drop_geometry(classified_pts) + + # Add a valid_count column which is TRUE + # if a hy_id/cs_id has a bottom point AND atleast 1 left and right bank + classified_pts <- + classified_pts %>% + # sf::st_drop_geometry() %>% # drop sf geometry as a safety precaution to make sure returned data is a dataframe + dplyr::mutate( + valid_count = dplyr::case_when( + (bottom_count > 0 & + left_bank_count > 0 & + right_bank_count > 0) ~ TRUE, + TRUE ~ FALSE + ) + ) + + # Add minimum bottom Z, max left and right bank Z, and + # flags noting if the left/right banks are "valid" (i.e. max left/right bank values are greater than the bottom Z) + bank_validity <- + classified_pts %>% + # classified_pts2 %>% + # sf::st_drop_geometry() %>% # drop sf geometry as a safety precaution to make sure returned data is a dataframe + dplyr::filter(point_type %in% c("bottom", "left_bank", "right_bank")) %>% + # dplyr::filter(point_type %in% c("left_bank", "right_bank")) %>% + dplyr::select(dplyr::any_of(crosswalk_id), cs_id, pt_id, Z, point_type) %>% + dplyr::group_by(dplyr::across(dplyr::any_of(c(crosswalk_id, "cs_id", "point_type")))) %>% + # dplyr::select(hy_id, cs_id, pt_id, Z, point_type) %>% + # dplyr::group_by(hy_id, cs_id, point_type) %>% + dplyr::summarise( + minZ = min(Z, na.rm = TRUE), + maxZ = max(Z, na.rm = TRUE) + ) %>% + dplyr::ungroup() %>% + tidyr::pivot_wider( + names_from = point_type, + values_from = c(minZ, maxZ) + ) %>% + # dplyr::select( + # dplyr::any_of(crosswalk_id), + # cs_id, + # bottom = minZ_bottom, + # left_bank = maxZ_left_bank, + # right_bank = maxZ_right_bank + # ) + dplyr::select( + dplyr::any_of( + c( + crosswalk_id, + "cs_id", + "minZ_bottom", + "maxZ_left_bank", + "maxZ_right_bank" + )) + # cs_id, + # bottom = minZ_bottom, + # left_bank = maxZ_left_bank, + # right_bank = maxZ_right_bank + ) %>% + dplyr::rename( + dplyr::any_of(c( + bottom = "minZ_bottom", + left_bank = "maxZ_left_bank", + right_bank = "maxZ_right_bank" + )) + ) + + # make sure that all the required columns are present, if a column is missing, add that column and set the values to NA + required_pt_cols <- c("bottom", "left_bank", "right_bank") + + for (col in required_pt_cols) { + if (!col %in% names(bank_validity)) { + bank_validity[[col]] <- NA + } + } + + bank_validity <- + bank_validity %>% + dplyr::mutate( + # bottom = ifelse(is.na(bottom), 0, bottom), # Old way was to set the NA left/bank/bottom Z values to 0 but i think this could lead to problems with small number of edge cases + # right_bank = ifelse(is.na(right_bank), 0, right_bank), + # left_bank = ifelse(is.na(left_bank), 0, left_bank), + valid_left_bank = dplyr::case_when( + (left_bank > bottom) & (!is.na(left_bank)) ~ TRUE, # Old method used: left_bank > bottom ~ TRUE, + TRUE ~ FALSE + ), + valid_right_bank = dplyr::case_when( + (right_bank > bottom) & (!is.na(right_bank)) ~ TRUE, # Old method used: right_bank > bottom ~ TRUE, + TRUE ~ FALSE + ), + valid_banks = valid_left_bank & valid_right_bank + ) + + # Add the following columns to the final output data: + # bottom - numeric, max depth (depth of lowest "bottom" point) + # left_bank - numeric, min depth of left bank (depth of the highest "left_bank" point). If no left_bank points exist, value is 0. + # right_bank - numeric, min depth of right bank (depth of the highest "right_bank" point). If no right_bank points exist, value is 0. + # valid_banks - logical, TRUE if the hy_id/cs_id has a bottom point with atleast 1 leftbank point AND 1 rightbank point that are above the lowest "bottom" point + + # set default column values for any IDs that didnt have 'left_bank', 'right_bank', or 'bottom' point_types + bank_validity_tmp_ids <- add_tmp_id(bank_validity, x = crosswalk_id)$tmp_id + + default_bank_attrs <- + classified_pts %>% + add_tmp_id(x = crosswalk_id) %>% + dplyr::filter( + !tmp_id %in% bank_validity_tmp_ids + ) %>% + dplyr::select(dplyr::any_of(crosswalk_id), cs_id, tmp_id) %>% + dplyr::group_by(tmp_id) %>% + dplyr::slice(1) %>% + dplyr::ungroup() %>% + dplyr::select(-tmp_id) %>% + add_default_bank_attributes() + + # subset to just hy_id/cs_id and added bank attributes to + # return a dataframe with unique hy_id/cs_ids for each row + bank_validity <- + bank_validity %>% + dplyr::select( + dplyr::any_of(crosswalk_id), + cs_id, + bottom, left_bank, right_bank, valid_banks + ) %>% + dplyr::bind_rows( + default_bank_attrs + ) + + return(bank_validity) + +} + +#' Add "bottom", "left_bank", "right_bank", and "valid_banks" column defaults to a dataframe +#' Internal helper function for get_bank_attributes() +#' @param df dataframe, tibble, or sf dataframe +#' +#' @return dataframe, tibble, or sf dataframe +#' @noRd +#' @keywords internal +add_default_bank_attributes <- function(df) { + bank_attrs_cols <- c("bottom", "left_bank", "right_bank") + + for (col in bank_attrs_cols) { + df[[col]] <- NA + } + + df$valid_banks <- FALSE + + return(df) + +} + + +# TODO: DELETE add_point_type_counts2() + +#' @title Add the count of each point type as a column to a dataframe of section points +#' @description add_point_type_counts() will add columns to the input dataframe with the counts of every point_type for each hy_id/cs_id in the input dataframe of classified cross section points (output of classify_pts()) +#' @param classified_pts dataframe or sf dataframe, cross section points with a "hy_id", and "cs_id" columns as well as a 'point_type' column containing the values: "bottom", "left_bank", "right_bank", and "channel" +#' @return dataframe or sf dataframe with "_count" columns added +#' @importFrom sf st_drop_geometry +#' @importFrom dplyr group_by count ungroup summarize filter n_distinct select slice left_join relocate all_of last_col +#' @importFrom tidyr pivot_wider pivot_longer +#' @noRd +#' @keywords internal +add_point_type_counts2 <- function(classified_pts) { + + # classified_pts <- cs_pts %>% hydrofabric3D::classify_points() + # add = F + # classified_pts = classified_pts2 + # add = TRUE + + # type checking + if (!any(class(classified_pts) %in% c("sf", "tbl_df", "tbl", "data.frame"))) { + stop("Invalid argument type, 'classified_pts' must be of type 'sf', 'tbl_df', 'tbl' or 'data.frame', given type was '", + class(classified_pts), "'") + } + + # create a copy of the input dataset, add a tmp_id column + stage_df <- + classified_pts %>% + sf::st_drop_geometry() %>% + hydrofabric3D::add_tmp_id() + + # # create a reference dataframe with all possible combinations of tmp_id and point_type + # reference_df <- expand.grid( + # tmp_id = unique(stage_df$tmp_id), + # point_type = unique(stage_df$point_type) + # ) + + # get a count of the point_types in each hy_id/cs_id group (i.e. each cross section) + point_type_counts <- + stage_df %>% + dplyr::group_by(tmp_id, point_type) %>% + dplyr::count() %>% + dplyr::ungroup() %>% + dplyr::mutate( + # add levels to the point_type column so if a given point_type + # is NOT in the cross seciton points, then it will be added with NAs in the subsequent pivot_wider + point_type = factor(point_type, levels = c("left_bank", "bottom", "right_bank", "channel")) + ) + + # pivot data wider to get implicit missing groups with NA values + point_type_counts <- + point_type_counts %>% + tidyr::pivot_wider( + names_from = point_type, + values_from = n, + names_expand = TRUE + ) + + point_type_counts <- + point_type_counts %>% + tidyr::pivot_longer( + cols = c(bottom, channel, right_bank, left_bank), + names_to = "point_type", + values_to = "n" + ) %>% + dplyr::mutate(n = ifelse(is.na(n), 0, n)) + + # # Join the count of point types in each group with the reference_df to + # # get rows of NA values for any group that is missing a specific point_type + # point_type_counts <- + # point_type_counts %>% + # dplyr::right_join(reference_df, by = c("tmp_id", "point_type")) + + # # For any cross section group that does NOT contain a point type, + # # the point type will be NA and here we replace those NAs with 0 + # point_type_counts$n[is.na(point_type_counts$n)] <- 0 + + # # make sure that all tmp_id groups have all 4 point types + check_counts <- + point_type_counts %>% + dplyr::group_by(tmp_id) %>% + dplyr::summarize(unique_count = dplyr::n_distinct(point_type)) %>% + dplyr::filter(unique_count == 4) + + # if the number of distinct points types in each cross section is not 4, raise an error + if (length(unique(stage_df$tmp_id)) != nrow(check_counts)) { + stop("Error validating each hy_id/cs_id cross section contains exactly 4 distinct values in the 'point_type' column") + } + + # get the hy_id, cs_id for each tmp_id to cross walk back to just using hy_id/cs_id + stage_df <- + stage_df %>% + dplyr::select(tmp_id, hy_id, cs_id) %>% + dplyr::group_by(tmp_id) %>% + dplyr::slice(1) %>% + dplyr::ungroup() + + # convert the column of point types to be a column for each point type that + # has the point type count for each hy_id/cs_id (cross section) + point_type_counts <- + point_type_counts %>% + tidyr::pivot_wider(names_from = point_type, + names_glue = "{point_type}_count", + values_from = n) %>% + dplyr::left_join( + stage_df, + by = "tmp_id" + ) %>% + dplyr::select(hy_id, cs_id, left_bank_count, right_bank_count, channel_count, bottom_count) + + # Join the point type counts to the original dataframe + classified_pts <- + classified_pts %>% + dplyr::left_join( + point_type_counts, + by = c("hy_id", "cs_id") + ) + + # check if any of the columns in 'classified_pts' are geometry types and move them to the end column if they do exist + classified_pts <- move_geometry_to_last(classified_pts) + + return(classified_pts) +} diff --git a/R/ahg_estimates.R b/R/cs_bathymetry.R similarity index 93% rename from R/ahg_estimates.R rename to R/cs_bathymetry.R index 65a7893..6209037 100644 --- a/R/ahg_estimates.R +++ b/R/cs_bathymetry.R @@ -730,14 +730,83 @@ fix_oversized_topwidths <- function( return(updated_TW_and_Ymax) } -# cross_section_pts <- -# inchannel_cs %>% -# # dplyr::filter(hy_id == "wb-1002477", cs_id == "2") -# dplyr::filter(hy_id == "wb-1002477", cs_id %in% c("2", "3")) -# top_width = "owp_tw_inchan" -# depth = "owp_y_inchan" -# dingman_r = "owp_dingman_r" -# cross_section_pts$owp_tw_bf + +#' Calculate the length between the leftmost and rightmost bottom point in each cross section +#' +#' @param cross_section_pts dataframe, or sf dataframe of cross section points +#' @param crosswalk_id character, ID column +#' @importFrom dplyr select mutate case_when group_by lag ungroup filter summarise left_join across any_of +#' @return summarized dataframe of input cross_section_pts dataframe with a bottom_length value for each hy_id/cs_id +#' @export +get_cs_bottom_length <- function(cross_section_pts, + crosswalk_id = NULL) { + + # make a unique ID if one is not given (NULL 'crosswalk_id') + if(is.null(crosswalk_id)) { + # x <- add_hydrofabric_id(x) + crosswalk_id <- 'hydrofabric_id' + } + + REQUIRED_COLS <- c(crosswalk_id, "cs_id", "pt_id", "relative_distance", "point_type") + + # validate input graph + is_valid <- validate_df(cross_section_pts, REQUIRED_COLS, "cross_section_pts") + + # get the distance between cross section pts in each cross section, + # this will be used as a default for bottom length in case bottom length is 0 + interval_distances <- + cross_section_pts %>% + dplyr::select(dplyr::any_of(crosswalk_id), cs_id, pt_id, relative_distance) %>% + dplyr::group_by(dplyr::across(dplyr::any_of(c(crosswalk_id, "cs_id")))) %>% + # dplyr::select(hy_id, cs_id, pt_id, relative_distance) %>% + # dplyr::group_by(hy_id, cs_id) %>% + dplyr::mutate( + distance_interval = relative_distance - dplyr::lag(relative_distance) + ) %>% + dplyr::summarise( + distance_interval = ceiling(mean(distance_interval, na.rm = TRUE)) # TODO: round up to make sure we are not underestimating + # the interval, we're going to use this value to + # derive a new Top width for each cross section if + # the cross section length is less than the prescribed top width + ) %>% + dplyr::ungroup() + + # get the distance from the first and last bottom points, substittue any bottom lengths == 0 + # with the interval between points distance + bottom_lengths <- + cross_section_pts %>% + dplyr::filter(point_type == "bottom") %>% + dplyr::select(dplyr::any_of(crosswalk_id), cs_id, pt_id, relative_distance) %>% + dplyr::group_by(dplyr::across(dplyr::any_of(c(crosswalk_id, "cs_id")))) %>% + # dplyr::select(hy_id, cs_id, pt_id, relative_distance) %>% + # dplyr::group_by(hy_id, cs_id) %>% + dplyr::summarise( + bottom_start = min(relative_distance, na.rm = TRUE), + bottom_end = max(relative_distance, na.rm = TRUE) + ) %>% + dplyr::left_join( + interval_distances, + by = c(crosswalk_id, "cs_id") + # by = c("hy_id", "cs_id") + ) %>% + dplyr::group_by(dplyr::across(dplyr::any_of(c(crosswalk_id, "cs_id")))) %>% + # dplyr::group_by(hy_id, cs_id) %>% + dplyr::mutate( + bottom_length = bottom_end - bottom_start + ) %>% + dplyr::ungroup() %>% + dplyr::mutate( + bottom_length = dplyr::case_when( + floor(bottom_length) == 0 ~ distance_interval, + TRUE ~ bottom_length + ) + ) %>% + dplyr::select(dplyr::any_of(crosswalk_id), cs_id, bottom_length) + # dplyr::select(hy_id, cs_id, bottom_length) + + return(bottom_lengths) + +} #' Given provide inchannel widths and depths to a set of cross section points and derive estimated shapes #' @description diff --git a/R/cs_improvements.R b/R/cs_improvements.R index 3db2651..74a5727 100644 --- a/R/cs_improvements.R +++ b/R/cs_improvements.R @@ -51,6 +51,304 @@ utils::globalVariables( # TODO: Finalize version 3 that extends from BOTH directions instead of individually extending LEFT and RIGHT +#' Check if dataset X has all the same unique tmp_ids as dataset Y +#' Internal helper function for keeping track of IDs when they should be identical between 2 datasets +#' @param x tibble, dataframe, or sf dataframe +#' @param y tibble, dataframe, or sf dataframe +#' @param crosswalk_id character, unique ID column +#' +#' @return logical, TRUE if all id / cs_id combos are contained in both dataset x and y +#' @noRd +#' @keywords internal +has_same_unique_tmp_ids <- function(x, y, crosswalk_id = NULL) { + + if(is.null(crosswalk_id)) { + crosswalk_id = "hydrofabric_id" + } + + start_ids <- get_unique_tmp_ids(df = x, x = crosswalk_id, y = "cs_id") + end_ids <- get_unique_tmp_ids(df = y, x = crosswalk_id, y = "cs_id") + + # all IDs are in x AND y and same number of ids + same_unique_ids <- all(start_ids %in% end_ids) && all(end_ids %in% start_ids) && length(start_ids) == length(end_ids) + + return(same_unique_ids) +} + +#' Check if dataset X has all the same unique tmp_ids as dataset Y +#' Internal helper function for keeping track of IDs when they should be identical between 2 datasets +#' @param x tibble, dataframe, or sf dataframe +#' @param y tibble, dataframe, or sf dataframe +#' @param crosswalk_id character, unique ID column +#' +#' @return logical, TRUE if all id / cs_id combos are contained in both dataset x and y +#' @noRd +#' @keywords internal +has_same_uids <- function(x, y, crosswalk_id = NULL) { + + if(is.null(crosswalk_id)) { + crosswalk_id = "hydrofabric_id" + } + + x_uids <- get_unique_tmp_ids(df = x, x = crosswalk_id, y = "cs_id") + y_uids <- get_unique_tmp_ids(df = y, x = crosswalk_id, y = "cs_id") + + # all IDs are in x AND y and same number of ids + same_unique_ids <- all(x_uids %in% y_uids) && all(y_uids %in% x_uids) && length(x_uids) == length(y_uids) + # same_unique_ids <- all(x_uids %in% y_uids) && all(y_uids %in% x_uids) + + return(same_unique_ids) + +} + +#' Join 'valid_banks' and 'has_relief' columns to transects dataset from corresponding cross section points +#' +#' @param transects dataframe or sf dataframe of transects +#' @param cs_pts dataframe or sf dataframe cross section points corresponding to transects +#' @param crosswalk_id character, unique ID column +#' @importFrom dplyr left_join select group_by across any_of slice ungroup +#' @importFrom sf st_drop_geometry +#' @return dataframe or sf dataframe +#' @noRd +#' @keywords internal +add_cs_attributes_to_transects <- function(transects, + cs_pts, + crosswalk_id = NULL) { + # validate input datas + is_transects_valid <- validate_df(transects, + c(crosswalk_id, "cs_id"), + "transects") + + is_cs_pts_valid <- validate_df(cs_pts, + c(crosswalk_id, "cs_id", "valid_banks", "has_relief"), + "cs_pts") + + # join 'valid_banks' and 'has_relief' columns to transects from cs_pts + transects <- + transects %>% + dplyr::left_join( + cs_pts %>% + sf::st_drop_geometry() %>% + dplyr::group_by(dplyr::across(dplyr::any_of(c(crosswalk_id, "cs_id")))) %>% + dplyr::slice(1) %>% + dplyr::ungroup() %>% + dplyr::select(dplyr::any_of(crosswalk_id), cs_id, valid_banks, has_relief), + by = c(crosswalk_id, "cs_id") + ) + + return(transects) + +} + +#' Add an extension_distance column based off valid_banks and has_relief attributes +#' +#' @param transects dataframe, tibble or sf dataframe with length_col, "valid_banks", and "has_relief" columns +#' @param scale numeric, percentage of current transect line length to extend transects in transects_to_extend by. Default is 0.5 (50% of the transect length) +#' @param length_col character, name of the column with the numeric cross section length +#' +#' @return dataframe, tibble or sf dataframe +#' @importFrom dplyr mutate case_when +add_attribute_based_extension_distances <- function(transects, + scale = 0.5, + length_col = NULL +) { + # transects <- + # transects %>% + # dplyr::mutate( + # has_relief = TRUE, + # valid_banks = FALSE + # ) + + # scale = 0.5 + # length_col = NULL + + + if(!inherits(scale, "numeric")) { + stop("Invalid 'scale' value, scale must be an integer or float numeric value") + } + + if (scale < 0) { + stop("Invalid 'scale' value, scale must be numeric value greater than or equal to 0") + } + + if(is.null(length_col)) { + stop("Missing 'length_col' character input indicating which column in 'transects' is a numeric vector of the lengths of each transect") + } + + REQUIRED_COLS <- c(length_col, "valid_banks", "has_relief") + + # validate input graph + is_valid <- validate_df(transects, REQUIRED_COLS, "transects") + + # TODO: this should be reviewed + # NOTE: --> setting a default of FALSE for NA valid_banks and NA has_relief values + transects <- + transects %>% + dplyr::mutate( + valid_banks = dplyr::case_when( + is.na(valid_banks) ~ FALSE, + TRUE ~ valid_banks + ), + has_relief = dplyr::case_when( + is.na(has_relief) ~ FALSE, + TRUE ~ has_relief + ) + ) + + # add distances to extend for the left and right side of a transect + # for any of the the already "valid transects", we just set an extension distance of 0 + # on both sides and these transects will be KEPT AS IS + transects <- + transects %>% + dplyr::mutate( + extension_distance = dplyr::case_when( + !valid_banks | !has_relief ~ (((scale)*(.data[[length_col]])) / 2), + TRUE ~ 0 + ) + ) + + return(transects) + +} + +#' Set valid_banks and has_relief values to TRUE if an NA exists in either column +#' +#' @param x dataframe or sf dataframe with valid_banks and has_relief columns +#' +#' @importFrom dplyr mutate case_when +#' +#' @return dataframe or sf dataframe with valid_banks and has_relief set to TRUE +#' @noRd +#' @keywords internal +fill_missing_cs_attributes <- function(x) { + + # validate input datas + is_valid <- validate_df(x, + c("valid_banks", "has_relief"), + "x") + x <- + x %>% + dplyr::mutate( + valid_banks = dplyr::case_when( + is.na(valid_banks) | is.na(has_relief) ~ TRUE, + TRUE ~ valid_banks + ), + has_relief = dplyr::case_when( + is.na(valid_banks) | is.na(has_relief) ~ TRUE, + TRUE ~ has_relief + ) + # valid_banks = ifelse(is.na(valid_banks), TRUE, valid_banks), + # has_relief = ifelse(is.na(has_relief), TRUE, has_relief) + ) + + + return(x) + +} + +#' Add a flagged and extension distance columns to set of transects with CS attributes based on new cross section points data +#' +#' @param x sf dataframe of transects +#' @param crosswalk_id character, unique ID column +#' @param points_per_cs numeric +#' @param min_pts_per_cs numeric +#' @param dem character +#' @param pct_of_length_for_relief numeric +#' @param na.rm logical, whether to remove NAs from the given cross section points and any NA comparison points pulled from the dem. Default is TRUE +#' @importFrom hydroloom rename_geometry +#' @importFrom dplyr left_join mutate any_of select case_when +#' +#' @return sf dataframe of transects with updated geometries +#' @export +flag_transects_for_change <- function( + x, + crosswalk_id = NULL, + points_per_cs = NULL, + min_pts_per_cs = 10, + dem = "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/13/TIFF/USGS_Seamless_DEM_13.vrt", + pct_of_length_for_relief = 0.01, + na.rm = TRUE +) { + + # ----------------------------------------------------------- + # test data + # ----------------------------------------------------------- + + # x <- extended + # crosswalk_id = CROSSWALK_ID + # points_per_cs = POINTS_PER_CS + # min_pts_per_cs = MIN_PTS_PER_CS + # dem = DEM_PATH + # pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF + + # crosswalk_id = "hy_id" + # points_per_cs = NULL + # min_pts_per_cs = 10 + # dem = "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/13/TIFF/USGS_Seamless_DEM_13.vrt" + # pct_of_length_for_relief = 0.01 + + # ----------------------------------------------------------- + # set geometry column names at beginning + x <- hydroloom::rename_geometry(x, "geometry") + + # validate input datas + is_valid_df <- validate_df(x, + c(crosswalk_id, "cs_id", + "cs_lengthm", + + "initial_length", + "left_distance", "right_distance", + + "cs_measure", + "valid_banks", "has_relief", "geometry"), + "x") + + # get cross section point elevations`` + new_cs_pts <- cross_section_pts( + cs = x, + crosswalk_id = crosswalk_id, + points_per_cs = points_per_cs, + min_pts_per_cs = min_pts_per_cs, + dem = dem + ) %>% + # drop_incomplete_cs_pts(crosswalk_id) %>% + classify_points( + crosswalk_id = crosswalk_id, + pct_of_length_for_relief = pct_of_length_for_relief, + na.rm = na.rm + ) %>% + add_tmp_id(crosswalk_id) %>% + fill_missing_cs_attributes() + + # compare validity scores between initial validity values and new ones + cs_validities <- compare_cs_validity(cs_pts1 = x, + cs_pts2 = new_cs_pts, + crosswalk_id = crosswalk_id + ) + + # identify transects to shorten back to original length and provide a distance to shorten by (extension_distance) + x <- + x %>% + add_tmp_id(crosswalk_id) %>% + dplyr::left_join( + cs_validities %>% + dplyr::select(dplyr::any_of(crosswalk_id), + cs_id, is_improved), + by = c(crosswalk_id, "cs_id") + ) %>% + dplyr::mutate( + flagged = (!is_improved) & ((initial_length < cs_lengthm) | (left_distance > 0) | (right_distance > 0)), + extension_distance = ((cs_lengthm - initial_length) / 2) + # extension_distance = pmin(left_distance, right_distance) + + # flagged = (!is_improved) & (initial_length < cs_lengthm), + # extension_distance = ((cs_lengthm - initial_length) / 2) + ) + + return(x) + +} + #' @title Check and fix cross section points with limited variation in Z values (without removing any flowlines) #' @description Duplicate process as rectify_cs() but does NOT remove any cross sections, only attempts to extend transects and improve cross sections. This function takes in a set of cross section points (cs_pts), a flowline network (net) and a set of transects lines for that flowline network (transects). #' This function assumes the cross section points have been classified via "classify_points()" and have "has_relief" and "valid_banks" logical columns. @@ -1532,162 +1830,6 @@ get_improved_cs_pts = function( # } -#' @title Fix IDs in a dataframe -#' -#' @description -#' This function renumbers cross section IDs in a dataframe to ensure each crosswalk_id has cross sections -#' numbered from 1 to the total number of cross sections on the crosswalk_id. -#' -#' @param df A dataframe containing crosswalk_id and cs_id columns. -#' @param crosswalk_id crosswalk_id character, name of primary ID column -#' @return The input dataframe with renumbered cs_id values. -#' @importFrom dplyr select group_by slice ungroup mutate n left_join rename relocate -#' @importFrom sf st_drop_geometry -#' @export -renumber_cs_ids <- function(df, crosswalk_id = NULL) { - # df <- out_transect_lines - # df <- fixed_cs_pts - # df <- - # df %>% - # sf::st_drop_geometry() %>% - # dplyr::filter(id %in% c("wb-2415765", "wb-2416657")) %>% - # # dplyr::filter(id %in% c("wb-2416492", "wb-2414869")) %>% - # dplyr::select(id, cs_id, pt_id, cs_measure) - - # seq(0, 100, length.out = 10) - # c(seq(0, 100, length.out = 10), - # seq(0, 100, length.out = 10)) - # c(rep("A", 10), rep("B", 10)) - # - # df <- data.frame( - # id = c(rep("A", 10), - # rep("B", 10)), - # cs_id = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, - # 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 - # ), - # cs_measure = c(seq(0, 100, length.out = 10), - # seq(0, 100, length.out = 10)) - # ) - - # df <- data.frame( - # id = c(rep("A", 10), - # rep("B", 9)), - # cs_id = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, - # 1, 2, 3, 4, 5, 6, 7, 9, 10 - # ), - # cs_measure = c(seq(0, 100, length.out = 10), - # seq(0, 100, length.out = 9)) - # ) - - # df <- data.frame( - # id = c(rep("A", 10), - # rep("B", 8)), - # cs_id = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, - # 1, 3, 4, 5, 6, 7, 9, 10 - # ), - # cs_measure = c(seq(0, 100, length.out = 10), - # seq(0, 100, length.out = 8)) - # ) - # - # df <- data.frame( - # id = c(rep("A", 10), - # rep("B", 8)), - # cs_id = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, - # 1, 3, 4, 5, 6, 9, 7, 10 - # ), - # cs_measure = c(seq(0, 100, length.out = 10), - # c(0.00000 , 14.2857, 28.57143, 42.85714, 57.14286, 85.7142, 71.42857, 100.00000) - # # seq(0, 100, length.out = 8) - # ) - # ) - - is_valid_df <- validate_df(df, - c(crosswalk_id, "cs_id", "cs_measure"), - "df") - - # set to the default unique crosswalk ID if NULL is given - if(is.null(crosswalk_id)) { - crosswalk_id <- 'hydrofabric_id' - } - - if (!crosswalk_id %in% names(df)) { - stop("'crosswalk_id' ", crosswalk_id, " is not a column in input dataframe.") - } - - if (!"cs_id" %in% names(df)) { - stop("'cs_id' is not a column in input dataframe. Input dataframe must have a 'cs_id' column to uniquely identfy each cross section within each 'crosswalk_id'") - } - - if (length(unique(df[[crosswalk_id]])) == 0) { - stop("'crosswalk_id' ", crosswalk_id, " contains only empty values") - } - - if (length(unique(df$cs_id)) == 0) { - stop("'cs_id' contains only empty values") - } - - if (any(is.na(df[[crosswalk_id]]))) { - stop("'crosswalk_id' ", crosswalk_id, " column contains NA values") - } - - if (any(is.na(df$cs_id))) { - stop("'cs_id' column contains NA values") - } - - # make a dataframe that has a new_cs_id column that has - # the cs_id renumbered to fill in any missing IDs, - # so each hy_id has cs_ids that go from 1 - number of cross sections on hy_id - # The dataframe below will be used to join the "new_cs_id" with - # the original "cs_ids" in the final_pts output data - renumbered_ids <- - df %>% - sf::st_drop_geometry() %>% - dplyr::select( - # hy_id, - dplyr::any_of(crosswalk_id), - cs_id, - cs_measure - # cs_id, pt_id, cs_measure - ) %>% - dplyr::group_by(dplyr::across(dplyr::any_of(c(crosswalk_id, "cs_id")))) %>% - # dplyr::group_by(hy_id, cs_id) %>% - # dplyr::arrange(cs_measure, .by_group = TRUE) %>% - dplyr::slice(1) %>% - dplyr::ungroup() %>% - dplyr::group_by(dplyr::across(dplyr::any_of(c(crosswalk_id)))) %>% - # dplyr::group_by(hy_id) %>% - dplyr::arrange(cs_measure, .by_group = TRUE) %>% - dplyr::mutate( - new_cs_id = 1:dplyr::n() - # tmp_id = paste0(hy_id, "_", cs_id) - ) %>% - add_tmp_id(x = crosswalk_id, y = "cs_id") %>% - dplyr::ungroup() %>% - dplyr::select(new_cs_id, tmp_id) - # - # renumbered_ids %>% - # dplyr::filter(new_cs_id != cs_id) - - # Join the new cs_ids back with the final output data to replace the old cs_ids - df <- dplyr::left_join( - add_tmp_id(df, x = crosswalk_id, y = "cs_id"), - # dplyr::mutate(df,tmp_id = paste0(hy_id, "_", cs_id)), - renumbered_ids, - by = "tmp_id" - ) %>% - dplyr::select(-cs_id, -tmp_id) %>% - dplyr::rename("cs_id" = "new_cs_id") %>% - dplyr::relocate(dplyr::any_of(crosswalk_id), cs_id) - # dplyr::relocate(hy_id, cs_id) - - # df %>% - # # dplyr::group_by(dplyr::across(dplyr::any_of(c(crosswalk_id, "cs_id")))) - # dplyr::group_by(dplyr::across(dplyr::any_of(c(crosswalk_id)))) %>% - # dplyr::arrange(cs_measure, .by_group = TRUE) - - - return(df) -} # TODO: Delete @@ -2651,167 +2793,7 @@ match_transects_to_extended_cs_pts <- function(transect_lines, return(out_transect_lines) } -#' Check if dataset X has all the same unique tmp_ids as dataset Y -#' Internal helper function for keeping track of IDs when they should be identical between 2 datasets -#' @param x tibble, dataframe, or sf dataframe -#' @param y tibble, dataframe, or sf dataframe -#' @param crosswalk_id character, unique ID column -#' -#' @return logical, TRUE if all id / cs_id combos are contained in both dataset x and y -#' @noRd -#' @keywords internal -has_same_unique_tmp_ids <- function(x, y, crosswalk_id = NULL) { - - if(is.null(crosswalk_id)) { - crosswalk_id = "hydrofabric_id" - } - - start_ids <- get_unique_tmp_ids(df = x, x = crosswalk_id, y = "cs_id") - end_ids <- get_unique_tmp_ids(df = y, x = crosswalk_id, y = "cs_id") - - # all IDs are in x AND y and same number of ids - same_unique_ids <- all(start_ids %in% end_ids) && all(end_ids %in% start_ids) && length(start_ids) == length(end_ids) - - return(same_unique_ids) -} -#' Set valid_banks and has_relief values to TRUE if an NA exists in either column -#' -#' @param x dataframe or sf dataframe with valid_banks and has_relief columns -#' -#' @importFrom dplyr mutate case_when -#' -#' @return dataframe or sf dataframe with valid_banks and has_relief set to TRUE -#' @noRd -#' @keywords internal -fill_missing_cs_attributes <- function(x) { - - # validate input datas - is_valid <- validate_df(x, - c("valid_banks", "has_relief"), - "x") - x <- - x %>% - dplyr::mutate( - valid_banks = dplyr::case_when( - is.na(valid_banks) | is.na(has_relief) ~ TRUE, - TRUE ~ valid_banks - ), - has_relief = dplyr::case_when( - is.na(valid_banks) | is.na(has_relief) ~ TRUE, - TRUE ~ has_relief - ) - # valid_banks = ifelse(is.na(valid_banks), TRUE, valid_banks), - # has_relief = ifelse(is.na(has_relief), TRUE, has_relief) - ) - - - return(x) - -} - -#' Add a flagged and extension distance columns to set of transects with CS attributes based on new cross section points data -#' -#' @param x sf dataframe of transects -#' @param crosswalk_id character, unique ID column -#' @param points_per_cs numeric -#' @param min_pts_per_cs numeric -#' @param dem character -#' @param pct_of_length_for_relief numeric -#' @param na.rm logical, whether to remove NAs from the given cross section points and any NA comparison points pulled from the dem. Default is TRUE -#' @importFrom hydroloom rename_geometry -#' @importFrom dplyr left_join mutate any_of select case_when -#' -#' @return sf dataframe of transects with updated geometries -#' @export -flag_transects_for_change <- function( - x, - crosswalk_id = NULL, - points_per_cs = NULL, - min_pts_per_cs = 10, - dem = "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/13/TIFF/USGS_Seamless_DEM_13.vrt", - pct_of_length_for_relief = 0.01, - na.rm = TRUE -) { - - # ----------------------------------------------------------- - # test data - # ----------------------------------------------------------- - - # x <- extended - # crosswalk_id = CROSSWALK_ID - # points_per_cs = POINTS_PER_CS - # min_pts_per_cs = MIN_PTS_PER_CS - # dem = DEM_PATH - # pct_of_length_for_relief = PCT_LENGTH_OF_CROSS_SECTION_FOR_RELIEF - - # crosswalk_id = "hy_id" - # points_per_cs = NULL - # min_pts_per_cs = 10 - # dem = "/vsicurl/https://prd-tnm.s3.amazonaws.com/StagedProducts/Elevation/13/TIFF/USGS_Seamless_DEM_13.vrt" - # pct_of_length_for_relief = 0.01 - - # ----------------------------------------------------------- - # set geometry column names at beginning - x <- hydroloom::rename_geometry(x, "geometry") - - # validate input datas - is_valid_df <- validate_df(x, - c(crosswalk_id, "cs_id", - "cs_lengthm", - - "initial_length", - "left_distance", "right_distance", - - "cs_measure", - "valid_banks", "has_relief", "geometry"), - "x") - - # get cross section point elevations`` - new_cs_pts <- cross_section_pts( - cs = x, - crosswalk_id = crosswalk_id, - points_per_cs = points_per_cs, - min_pts_per_cs = min_pts_per_cs, - dem = dem - ) %>% - # drop_incomplete_cs_pts(crosswalk_id) %>% - classify_points( - crosswalk_id = crosswalk_id, - pct_of_length_for_relief = pct_of_length_for_relief, - na.rm = na.rm - ) %>% - add_tmp_id(crosswalk_id) %>% - fill_missing_cs_attributes() - - # compare validity scores between initial validity values and new ones - cs_validities <- compare_cs_validity(cs_pts1 = x, - cs_pts2 = new_cs_pts, - crosswalk_id = crosswalk_id - ) - - # identify transects to shorten back to original length and provide a distance to shorten by (extension_distance) - x <- - x %>% - add_tmp_id(crosswalk_id) %>% - dplyr::left_join( - cs_validities %>% - dplyr::select(dplyr::any_of(crosswalk_id), - cs_id, is_improved), - by = c(crosswalk_id, "cs_id") - ) %>% - dplyr::mutate( - flagged = (!is_improved) & ((initial_length < cs_lengthm) | (left_distance > 0) | (right_distance > 0)), - extension_distance = ((cs_lengthm - initial_length) / 2) - # extension_distance = pmin(left_distance, right_distance) - - # flagged = (!is_improved) & (initial_length < cs_lengthm), - # extension_distance = ((cs_lengthm - initial_length) / 2) - ) - - return(x) - -} #Check for flat cross sections and try to update these values by extending the original cross sections and reextracting DEM values diff --git a/R/cs_ordering.R b/R/cs_ordering.R new file mode 100644 index 0000000..bc78af7 --- /dev/null +++ b/R/cs_ordering.R @@ -0,0 +1,272 @@ +utils::globalVariables( + c(".", "hy_id", "cs_id", "pt_id", "Z", "middle_index", "point_type", "minZ", + "maxZ", "minZ_bottom", "maxZ_left_bank", "maxZ_right_bank", "valid_left_bank", + "valid_right_bank", "bottom", "left_bank", "right_bank", "valid_banks", + "relative_distance", "cs_lengthm", "default_middle", "has_relief", + "max_relief", "braid_id", "geometry", + + "comid", "fromnode", "tonode", + "tocomid", "divergence", "cycle_id", "node", "braid_vector", "totdasqkm", + "changed", "relative_position", "head_distance", "tail_distance", + "component_id", "cs_measure", "ds_distance", "along_channel", "euclid_dist", + "sinuosity", "points_per_cs", "Z_at_bottom", "lower_bound", "upper_bound", + "ge_bottom", "is_near_bottom", "pts_near_bottom", "total_valid_pts", + "pct_near_bottom", + "member_braids", "braid_members", "diff_pts", "is_extended", + "new_cs_id", "split_braid_ids", + + "braid_length", + "crosswalk_id", + "lengthm", + "check_z_values", + "geom", + "is_same_Z", + "is_multibraid", + "channel", "unique_count", + "left_bank_count", "right_bank_count", "channel_count", "bottom_count", + "terminalID", + "tmp_id", + "make_geoms_to_cut_plot", + "Y", "improved", "length_vector_col", "median", "min_ch", "new_validity_score", + "old_validity_score", "transects", "validity_score", "x", + "A", "DEPTH", "DINGMAN_R", "TW", "X", "X_end", "X_start", "Y_end", "Y_start", + "ahg_a", "ahg_index", "ahg_x", "ahg_y", + "bottom_end", "bottom_length", "bottom_midpoint", + "bottom_start", "cs_partition", "distance_interval", "fixed_TW", + "has_new_DEPTH", "has_new_TW", "ind", "is_dem_point", "left_max", + "left_start", "max_right_position", "new_DEPTH", "new_TW", "next_X_is_missing", "next_Y_is_missing", + "parabola", "partition", "prev_X_is_missing", + "prev_Y_is_missing", "right_start", "right_start_max", "start_or_end", "start_pt_id", + "cs_source", + "partition_lengthm", "left_fema_index", "right_fema_index", + "left_is_within_fema", "right_is_within_fema", "left_distance", "right_distance", + "new_cs_lengthm", "polygon_index", + "crosswalk_id", "extend_invalid_transects2", + "anchors", "deriv_type", "edge", "extension_distance", + "left_is_extended", "right_is_extended", "to_node", "verbose", + "toindid", "indid", "toid", "is", "internal_is_braided2" + ) +) + +#' Add a 1:number of rows 'initial_order' column +#' Internal helper function for readability +#' +#' @param x dataframe, sf dataframe or tibble +#' @importFrom dplyr mutate n +#' @return dataframe, sf dataframe or tibble with an added 'initial_order' column +#' @noRd +#' @keywords internal +add_initial_order <- function(x) { + + x <- + x %>% + dplyr::mutate( + initial_order = 1:dplyr::n() + ) + + return(x) + +} + +#' Rearrange transects / cross sections in order from upstream to downstream +#' +#' @param x dataframe, sf dataframe or tibble +#' @param crosswalk_id character, unique ID column +#' @param order_by character, either "cs_id" or "cs_measure" +#' @importFrom dplyr mutate group_by across any_of arrange n ungroup +#' @return dataframe, sf dataframe or tibble with an added 'cs_id' column +#' @export +cs_arrange <- function(x, + crosswalk_id = NULL, + order_by = c("cs_id", "cs_measure") + ) { + + # evaluate order_by choices + order_by <- match.arg(order_by) + + is_x_valid <- validate_df(x, + c(crosswalk_id, order_by), + "x") + + x <- + x %>% + dplyr::group_by(dplyr::across(dplyr::any_of(c(crosswalk_id)))) %>% + dplyr::arrange(.data[[order_by]], .by_group = TRUE) %>% + dplyr::ungroup() + + return(x) + +} + +#' Add a 1:number of cross sections 'cs_id' for each crosswalk_id by cs_measure +#' +#' @param x dataframe, sf dataframe or tibble +#' @param crosswalk_id character, unique ID column +#' @importFrom dplyr mutate group_by across any_of arrange n ungroup +#' @return dataframe, sf dataframe or tibble with an added 'cs_id' column +#' @export +add_cs_id_sequence <- function(x, crosswalk_id = NULL) { + is_x_valid <- validate_df(x, + c(crosswalk_id, "cs_measure"), + "x") + + x <- + x %>% + dplyr::group_by(dplyr::across(dplyr::any_of(c(crosswalk_id)))) %>% + dplyr::arrange(cs_measure, .by_group = TRUE) %>% + dplyr::mutate( + cs_id = 1:dplyr::n() + ) %>% + dplyr::ungroup() + + return(x) + +} + +#' Get the initial ordering of crosswalk IDs in a dataframe of transects +#' +#' @param x dataframe or sf dataframe +#' @param crosswalk_id character +#' +#' @importFrom dplyr mutate any_of select group_by slice_min ungroup n +#' @importFrom sf st_drop_geometry +#' @return dataframe, sf dataframe or tibble of crosswalk_id with initial_order column +#' @noRd +#' @keywords internal +get_transect_initial_order <- function(x, crosswalk_id = NULL) { + + is_x_valid <- validate_df(x, + c(crosswalk_id, "cs_id"), + "x") + + x[[crosswalk_id]] <- factor(x[[crosswalk_id]], levels = unique(x[[crosswalk_id]])) + + x_order <- + x %>% + sf::st_drop_geometry() %>% + dplyr::group_by(dplyr::across(dplyr::any_of(c(crosswalk_id)))) %>% + dplyr::slice_min(cs_id, n = 1, with_ties = FALSE) %>% + # dplyr::filter(cs_id == min(cs_id)) %>% + dplyr::ungroup() %>% + dplyr::mutate( + initial_order = 1:dplyr::n() + ) %>% + # hydrofabric3D:::add_initial_order() %>% + dplyr::select(dplyr::any_of(crosswalk_id), initial_order) + + # t_order[[crosswalk_id]] <- as.character(t_order[[crosswalk_id]]) + + return(x_order) + +} + +#' @title Fix IDs in a dataframe +#' +#' @description +#' This function renumbers cross section IDs in a dataframe to ensure each crosswalk_id has cross sections +#' numbered from 1 to the total number of cross sections on the crosswalk_id. +#' +#' @param df A dataframe containing crosswalk_id and cs_id columns. +#' @param crosswalk_id crosswalk_id character, name of primary ID column +#' @return The input dataframe with renumbered cs_id values. +#' @importFrom dplyr select group_by slice ungroup mutate n left_join rename relocate +#' @importFrom sf st_drop_geometry +#' @export +renumber_cs_ids <- function(df, crosswalk_id = NULL) { + # df <- data.frame( + # id = c(rep("A", 10), + # rep("B", 10)), + # cs_id = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, + # 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 + # ), + # cs_measure = c(seq(0, 100, length.out = 10), + # seq(0, 100, length.out = 10)) + # ) + + is_valid_df <- validate_df(df, + c(crosswalk_id, "cs_id", "cs_measure"), + "df") + + # set to the default unique crosswalk ID if NULL is given + if(is.null(crosswalk_id)) { + crosswalk_id <- 'hydrofabric_id' + } + + if (!crosswalk_id %in% names(df)) { + stop("'crosswalk_id' ", crosswalk_id, " is not a column in input dataframe.") + } + + if (!"cs_id" %in% names(df)) { + stop("'cs_id' is not a column in input dataframe. Input dataframe must have a 'cs_id' column to uniquely identfy each cross section within each 'crosswalk_id'") + } + + if (length(unique(df[[crosswalk_id]])) == 0) { + stop("'crosswalk_id' ", crosswalk_id, " contains only empty values") + } + + if (length(unique(df$cs_id)) == 0) { + stop("'cs_id' contains only empty values") + } + + if (any(is.na(df[[crosswalk_id]]))) { + stop("'crosswalk_id' ", crosswalk_id, " column contains NA values") + } + + if (any(is.na(df$cs_id))) { + stop("'cs_id' column contains NA values") + } + + # make a dataframe that has a new_cs_id column that has + # the cs_id renumbered to fill in any missing IDs, + # so each hy_id has cs_ids that go from 1 - number of cross sections on hy_id + # The dataframe below will be used to join the "new_cs_id" with + # the original "cs_ids" in the final_pts output data + renumbered_ids <- + df %>% + sf::st_drop_geometry() %>% + dplyr::select( + # hy_id, + dplyr::any_of(crosswalk_id), + cs_id, + cs_measure + # cs_id, pt_id, cs_measure + ) %>% + dplyr::group_by(dplyr::across(dplyr::any_of(c(crosswalk_id, "cs_id")))) %>% + # dplyr::group_by(hy_id, cs_id) %>% + # dplyr::arrange(cs_measure, .by_group = TRUE) %>% + dplyr::slice(1) %>% + dplyr::ungroup() %>% + dplyr::group_by(dplyr::across(dplyr::any_of(c(crosswalk_id)))) %>% + # dplyr::group_by(hy_id) %>% + dplyr::arrange(cs_measure, .by_group = TRUE) %>% + dplyr::mutate( + new_cs_id = 1:dplyr::n() + # tmp_id = paste0(hy_id, "_", cs_id) + ) %>% + add_tmp_id(x = crosswalk_id, y = "cs_id") %>% + dplyr::ungroup() %>% + dplyr::select(new_cs_id, tmp_id) + # + # renumbered_ids %>% + # dplyr::filter(new_cs_id != cs_id) + + # Join the new cs_ids back with the final output data to replace the old cs_ids + df <- dplyr::left_join( + add_tmp_id(df, x = crosswalk_id, y = "cs_id"), + # dplyr::mutate(df,tmp_id = paste0(hy_id, "_", cs_id)), + renumbered_ids, + by = "tmp_id" + ) %>% + dplyr::select(-cs_id, -tmp_id) %>% + dplyr::rename("cs_id" = "new_cs_id") %>% + dplyr::relocate(dplyr::any_of(crosswalk_id), cs_id) + # dplyr::relocate(hy_id, cs_id) + + # df %>% + # # dplyr::group_by(dplyr::across(dplyr::any_of(c(crosswalk_id, "cs_id")))) + # dplyr::group_by(dplyr::across(dplyr::any_of(c(crosswalk_id)))) %>% + # dplyr::arrange(cs_measure, .by_group = TRUE) + + + return(df) +} diff --git a/R/cs_pts.R b/R/cs_pts.R index c94bd17..67d19ef 100644 --- a/R/cs_pts.R +++ b/R/cs_pts.R @@ -1131,6 +1131,80 @@ classify_points <- function( # ) # ) +#' Remove entire cross sections that have any NA Z (depth) values +#' +#' @param cross_section_pts cs points dataframe, tibble, or sf dataframe +#' @param crosswalk_id unique ID for flowline +#' @importFrom dplyr group_by across any_of ungroup filter +#' @return cross_section_pts dataframe / tibble / sf dataframe with removed cross sections +#' @export +drop_incomplete_cs_pts <- function(cross_section_pts, crosswalk_id = NULL) { + # make a unique ID if one is not given (NULL 'crosswalk_id') + if(is.null(crosswalk_id)) { + crosswalk_id <- 'hydrofabric_id' + } + + cross_section_pts <- + cross_section_pts %>% + dplyr::group_by(dplyr::across(dplyr::any_of(c(crosswalk_id, "cs_id")))) %>% + dplyr::filter(!any(is.na(Z))) %>% + dplyr::ungroup() + + return(cross_section_pts) + +} + +#' Add an is_missing_depth flag to cross sections points +#' Any cross section points that has missing Z (depth = NA) values is flagged as is_missing_depth = TRUE +#' +#' @param cs_pts cs points dataframe, tibble, or sf dataframe +#' @importFrom dplyr mutate +#' @return cross_section_pts dataframe / tibble / sf dataframe with cross section points missing depths flag added +#' @export +add_is_missing_depth_flag <- function(cs_pts) { + + cs_pts <- + cs_pts %>% + # dplyr::group_by(dplyr::across(dplyr::any_of(c(crosswalk_id, "cs_id")))) %>% + dplyr::mutate( + is_missing_depth = is.na(Z) + ) + # dplyr::group_by(dplyr::across(dplyr::any_of(c(crosswalk_id, "cs_id")))) %>% + # dplyr::mutate( + # is_missing_depth = any(is.na(Z)) + # ) %>% + # dplyr::ungroup() + + return(cs_pts) + +} + +#' Add an is_complete_cs flag to cross sections points +#' Any cross section points that has does NOT have ANY NA Z (depth) values is flagged as is_complete_cs = TRUE +#' +#' @param cs_pts cs points dataframe, tibble, or sf dataframe +#' @param crosswalk_id unique ID for flowline +#' @importFrom dplyr group_by across any_of ungroup mutate +#' @return cross_section_pts dataframe / tibble / sf dataframe with cross section points with is_complete_cs flag added +#' @export +add_is_complete_cs_flag <- function(cs_pts, crosswalk_id = NULL) { + # make a unique ID if one is not given (NULL 'crosswalk_id') + if(is.null(crosswalk_id)) { + crosswalk_id <- 'hydrofabric_id' + } + + cs_pts <- + cs_pts %>% + dplyr::group_by(dplyr::across(dplyr::any_of(c(crosswalk_id, "cs_id")))) %>% + dplyr::mutate( + is_complete_cs = !any(is.na(Z)) + ) %>% + dplyr::ungroup() + + return(cs_pts) + +} + #' Classify banks and bottoms #' #' @param num_of_pts integer diff --git a/R/cs_relief.R b/R/cs_relief.R new file mode 100644 index 0000000..bd98d53 --- /dev/null +++ b/R/cs_relief.R @@ -0,0 +1,396 @@ + +utils::globalVariables( + c(".", "hy_id", "cs_id", "pt_id", "Z", "middle_index", "point_type", "minZ", + "maxZ", "minZ_bottom", "maxZ_left_bank", "maxZ_right_bank", "valid_left_bank", + "valid_right_bank", "bottom", "left_bank", "right_bank", "valid_banks", + "relative_distance", "cs_lengthm", "default_middle", "has_relief", + "max_relief", "braid_id", "geometry", + + "comid", "fromnode", "tonode", + "tocomid", "divergence", "cycle_id", "node", "braid_vector", "totdasqkm", + "changed", "relative_position", "head_distance", "tail_distance", + "component_id", "cs_measure", "ds_distance", "along_channel", "euclid_dist", + "sinuosity", "points_per_cs", "Z_at_bottom", "lower_bound", "upper_bound", + "ge_bottom", "is_near_bottom", "pts_near_bottom", "total_valid_pts", + "pct_near_bottom", + "member_braids", "braid_members", "diff_pts", "is_extended", + "new_cs_id", "split_braid_ids", + + "braid_length", + "crosswalk_id", + "lengthm", + "check_z_values", + "geom", + "is_same_Z", + "is_multibraid", + "channel", "unique_count", + "left_bank_count", "right_bank_count", "channel_count", "bottom_count", + "terminalID", + "tmp_id", + "make_geoms_to_cut_plot", + "Y", "improved", "length_vector_col", "median", "min_ch", "new_validity_score", + "old_validity_score", "transects", "validity_score", "x", + "A", "DEPTH", "DINGMAN_R", "TW", "X", "X_end", "X_start", "Y_end", "Y_start", + "ahg_a", "ahg_index", "ahg_x", "ahg_y", + "bottom_end", "bottom_length", "bottom_midpoint", + "bottom_start", "cs_partition", "distance_interval", "fixed_TW", + "has_new_DEPTH", "has_new_TW", "ind", "is_dem_point", "left_max", + "left_start", "max_right_position", "new_DEPTH", "new_TW", "next_X_is_missing", "next_Y_is_missing", + "parabola", "partition", "prev_X_is_missing", + "prev_Y_is_missing", "right_start", "right_start_max", "start_or_end", "start_pt_id", + "cs_source", + "partition_lengthm", "left_fema_index", "right_fema_index", + "left_is_within_fema", "right_is_within_fema", "left_distance", "right_distance", + "new_cs_lengthm", "polygon_index", + "crosswalk_id", "extend_invalid_transects2", + "anchors", "deriv_type", "edge", "extension_distance", + "left_is_extended", "right_is_extended", "to_node", "verbose", + "toindid", "indid", "toid", "is", "internal_is_braided2" + ) +) + +#' @title Add relief attributes to a dataframe of cross sections points +#' Given a set of cross section points (derived from hydrofabric3D::cross_section_pts() and hydrofabric3D::classify_points()) add a "has_relief" logical +#' value to data. The "has_relief" value is indicating whether a cross section "has relief". +#' Relief is determined by checking each set of cross section points have a left OR right bank that +#' has a depth difference from the bottom that isgreater than or equal to a percentage of the cross section length (e.g. Assuming a 'pct_of_length_for_relief' of 0.01 (1%) of a 100m cross section would have a relief depth threshold of 1m) +#' @param classified_pts sf or dataframe of points with "hy_id", "cs_id", "cs_lengthm", and "point_type" columns. Output of hydrofabric3D::classify_points() +#' @param crosswalk_id character, ID column +#' @param pct_of_length_for_relief numeric, percent of cs_lengthm to use as the threshold depth for classifying whether a cross section has "relief". Default is 0.01 (1% of the cross sections length). +#' @return sf or dataframe with added "has_relief" columns or a dataframe of dataframe of unique hy_id/cs_id and "has_relief" +#' @importFrom dplyr select group_by slice ungroup mutate filter summarise left_join case_when all_of relocate last_col +#' @importFrom tidyr pivot_wider +#' @export +add_relief <- function( + classified_pts, + crosswalk_id = NULL, + pct_of_length_for_relief = 0.01 +) { + + # make a unique ID if one is not given (NULL 'crosswalk_id') + if(is.null(crosswalk_id)) { + # cs <- add_hydrofabric_id(cs) + crosswalk_id <- 'hydrofabric_id' + } + + REQUIRED_COLS <- c(crosswalk_id, "cs_id", "pt_id", "cs_lengthm", "Z", "point_type") + + # validate input dataframe has correct columns + is_valid <- validate_df(classified_pts, REQUIRED_COLS, "classified_pts") + + # type checking + if (!any(class(classified_pts) %in% c("sf", "tbl_df", "tbl", "data.frame"))) { + stop("Invalid argument type, 'classified_pts' must be of type 'sf', 'tbl_df', 'tbl' or 'data.frame', given type was '", + class(classified_pts), "'") + } + + # type checking + if (!is.numeric(pct_of_length_for_relief)) { + stop("Invalid argument type, 'pct_of_length_for_relief' must be of type 'numeric', given type was '", + class(pct_of_length_for_relief), "'") + } + + # Make sure pct_of_length_for_relief is valid percentage value (greater than 0) + if (pct_of_length_for_relief < 0 ) { + stop("Invalid value 'pct_of_length_for_relief' of ", pct_of_length_for_relief, ", 'pct_of_length_for_relief' must be greater than or equal to 0") + } + + # TODO: Need to add code that will just set aside the geometries and add them back to the final output dataset + # For now we will just drop geometries as safety precaution (as to not summarize() on a massive number of sf geometries) + classified_pts <- sf::st_drop_geometry(classified_pts) + + # store the cross section lengths and calculate the depth threshold as a percent of the cross sections length + cs_lengths <- + classified_pts %>% + dplyr::select(dplyr::any_of(crosswalk_id), cs_id, cs_lengthm) %>% + dplyr::group_by(dplyr::across(dplyr::any_of(c(crosswalk_id, "cs_id")))) %>% + # dplyr::group_by(hy_id, cs_id) %>% + dplyr::slice(1) %>% + dplyr::ungroup() %>% + dplyr::mutate( + depth_threshold = round(cs_lengthm * pct_of_length_for_relief, 3) # maybe use floor() here + ) + + # get the minimum bottom point and maximum left and right bank points + relief <- + classified_pts %>% + # dplyr::filter(point_type %in% c("left_bank", "right_bank")) %>% + dplyr::filter(point_type %in% c("bottom", "left_bank", "right_bank")) %>% + dplyr::select(dplyr::any_of(crosswalk_id), cs_id, pt_id, Z, point_type) %>% + dplyr::group_by(dplyr::across(dplyr::any_of(c(crosswalk_id, "cs_id", "point_type")))) %>% + # dplyr::select(hy_id, cs_id, pt_id, Z, point_type) %>% + # dplyr::group_by(hy_id, cs_id, point_type) %>% + dplyr::summarise( + minZ = min(Z, na.rm = TRUE), + maxZ = max(Z, na.rm = TRUE) + ) %>% + dplyr::ungroup() %>% + tidyr::pivot_wider( + names_from = point_type, + values_from = c(minZ, maxZ) + ) %>% + dplyr::select( + dplyr::any_of(crosswalk_id), cs_id, + bottom = minZ_bottom, + left_bank = maxZ_left_bank, + right_bank = maxZ_right_bank + ) + + # join lengths and depth threshold back with relief table and + # calculate if the max difference between left/right bank vs bottom is + # greater than or equal to the depth threshold + relief <- + relief %>% + dplyr::left_join( + cs_lengths, + by = c(crosswalk_id, "cs_id") + ) %>% + dplyr::group_by(dplyr::across(dplyr::any_of(c(crosswalk_id, "cs_id")))) %>% + # dplyr::group_by(hy_id, cs_id) %>% + dplyr::mutate( + depth_diff = max(c(round(right_bank - bottom, 3), + round(left_bank - bottom, 3)), + na.rm = TRUE) # TODO: removing NAs might not be the right call, + # removing them might set has_relief to TRUE and + # says "there IS relief but no valid banks" + ) %>% + dplyr::ungroup() %>% + dplyr::mutate( + has_relief = dplyr::case_when( + depth_diff >= depth_threshold ~ TRUE, + TRUE ~ FALSE + ) + ) + + # add the new point type columns to the original dataframe + # Join the point type counts to the original dataframe + classified_pts <- + classified_pts %>% + dplyr::left_join( + dplyr::select(relief, + dplyr::any_of(crosswalk_id), cs_id, has_relief), + by = c(crosswalk_id, "cs_id") + ) + + # check if any of the columns in 'classified_pts' are geometry types and move them to the end column if they do exist + classified_pts <- move_geometry_to_last(classified_pts) + + return(classified_pts) + +} + +#' @title Get relief attributes from a dataframe of cross sections points +#' Generate a dataframe from a set of classified cross section points indicating whether a cross section "has relief". +#' Relief is determined by checking each set of cross section points have a left OR right bank that has a depth difference from the bottom that is +#' greater than or equal to a percentage of the cross section length (e.g. Assuming a 'pct_of_length_for_relief' of 0.01 (1%) of a 100m cross section would have a relief depth threshold of 1m) +#' @param classified_pts sf or dataframe of points with "hy_id", "cs_id", "cs_lengthm", and "point_type" columns. Output of hydrofabric3D::classify_pts() +#' @param crosswalk_id character, ID column +#' @param pct_of_length_for_relief numeric, percent of cs_lengthm to use as the threshold depth for classifying whether a cross section has "relief". Default is 0.01 (1% of the cross sections length). +#' @param detailed logical, whether to return only a the "has_relief" column or +#' include all derived relief based columns such as "max_relief" and the "pct_of_length_for_relief" used. Default is FALSE and returns a dataframe with only "hy_id", "cs_id", and "has_relief". +#' @return dataframe with each row being a unique hy_id/cs_id with a "has_relief" value for each hy_id/cs_id. If detailed = TRUE, then the output dataframe will include the following additional columns: "cs_lengthm", "max_relief", "pct_of_length_for_relief". +#' @importFrom dplyr select group_by slice ungroup mutate filter summarise left_join case_when all_of relocate last_col any_of across +#' @importFrom tidyr pivot_wider +#' @export +get_relief <- function( + classified_pts, + crosswalk_id = NULL, + pct_of_length_for_relief = 0.01, + detailed = FALSE +) { + + # ------------------------------------------------------------------------ + # ------------------------------------------------------------------------ + # crosswalk_id <- "hy_id" + # REQ_COLS <- c(crosswalk_id, "cs_id", "pt_id", "cs_lengthm", "Z", "point_type") + # + # pct_of_length_for_relief <- 0.01 + # CS_LENGTHM <- 100 + # MIN_REQ_RELIEF <- CS_LENGTHM * pct_of_length_for_relief + # detailed <- FALSE + + # classified_pts <- + # data.frame( + # hy_id = c("A", "A", "A", "A", "A"), + # cs_id = c(1, 1, 1, 1, 1), + # pt_id = c(1, 2, 3, 4, 5), + # cs_lengthm = c(CS_LENGTHM), + # point_type = c('left_bank', 'bottom', 'bottom', 'bottom', 'right_bank'), + # Z = c(100, 10, 10, 10, 100) + # ) + # ------------------------------------------------------------------------ + # ------------------------------------------------------------------------ + + # make a unique ID if one is not given (NULL 'crosswalk_id') + if(is.null(crosswalk_id)) { + # cs <- add_hydrofabric_id(cs) + crosswalk_id <- 'hydrofabric_id' + } + + REQUIRED_COLS <- c(crosswalk_id, "cs_id", "pt_id", "cs_lengthm", "Z", "point_type") + + # validate input dataframe has correct columns + is_valid <- validate_df(classified_pts, REQUIRED_COLS, "classified_pts") + + # type checking + if (!any(class(classified_pts) %in% c("sf", "tbl_df", "tbl", "data.frame"))) { + stop("Invalid argument type, 'classified_pts' must be of type 'sf', 'tbl_df', 'tbl' or 'data.frame', given type was '", class(classified_pts), "'") + } + + # type checking + if (!is.numeric(pct_of_length_for_relief)) { + stop("Invalid argument type, 'pct_of_length_for_relief' must be of type 'numeric', given type was '", class(pct_of_length_for_relief), "'") + } + + # type checking + if (!is.logical(detailed)) { + stop("Invalid argument type, 'detailed' must be of type 'logical', given type was '", class(detailed), "'") + } + + # drop geometries as safety precaution + classified_pts <- sf::st_drop_geometry(classified_pts) + + # store the cross section lengths and calculate the depth threshold as a percent of the cross sections length + cs_lengths <- + classified_pts %>% + dplyr::select(dplyr::any_of(crosswalk_id), cs_id, cs_lengthm) %>% + dplyr::group_by(dplyr::across(dplyr::any_of(c(crosswalk_id, "cs_id")))) %>% + # dplyr::select(hy_id, cs_id, cs_lengthm) %>% + # dplyr::group_by(hy_id, cs_id) %>% + dplyr::slice(1) %>% + dplyr::ungroup() %>% + dplyr::mutate( + depth_threshold = round(cs_lengthm * pct_of_length_for_relief, 3) # maybe use floor() here + ) + + # get the minimum bottom point and maximum left and right bank points + relief <- + classified_pts %>% + # dplyr::filter(point_type %in% c("left_bank", "right_bank")) %>% + dplyr::filter(point_type %in% c("bottom", "left_bank", "right_bank")) %>% + dplyr::select(dplyr::any_of(crosswalk_id), cs_id, pt_id, Z, point_type) %>% + dplyr::group_by(dplyr::across(dplyr::any_of(c(crosswalk_id, "cs_id", "point_type")))) %>% + # dplyr::select(hy_id, cs_id, pt_id, Z, point_type) %>% + # dplyr::group_by(hy_id, cs_id, point_type) %>% + dplyr::summarise( + minZ = min(Z, na.rm = TRUE), + maxZ = max(Z, na.rm = TRUE) + ) %>% + dplyr::ungroup() %>% + tidyr::pivot_wider( + names_from = point_type, + values_from = c(minZ, maxZ) + ) %>% + # dplyr::select( + # dplyr::any_of(crosswalk_id), + # cs_id, + # bottom = minZ_bottom, + # left_bank = maxZ_left_bank, + # right_bank = maxZ_right_bank + # ) + dplyr::select( + dplyr::any_of( + c( + crosswalk_id, + "cs_id", + "minZ_bottom", + "maxZ_left_bank", + "maxZ_right_bank" + )) + ) %>% + dplyr::rename( + dplyr::any_of(c( + bottom = "minZ_bottom", + left_bank = "maxZ_left_bank", + right_bank = "maxZ_right_bank" + )) + ) + + # make sure that all the required columns are present, if a column is missing, add that column and set the values to NA + required_pt_cols <- c("bottom", "left_bank", "right_bank") + + for (col in required_pt_cols) { + if (!col %in% names(relief)) { + # message("Missing ", col, " in relief, adding default NA") + relief[[col]] <- NA + } + } + + # join lengths and depth threshold back with relief table and + # calculate if the max difference between left/right bank vs bottom is + # greater than or equal to the depth threshold + relief <- + relief %>% + dplyr::left_join( + cs_lengths, + by = c(crosswalk_id, "cs_id") + # by = c("hy_id", "cs_id") + ) %>% + dplyr::group_by(dplyr::across(dplyr::any_of(c(crosswalk_id, "cs_id")))) %>% + # dplyr::group_by(hy_id, cs_id) %>% + dplyr::mutate( + max_relief = max( + c( + round(right_bank - bottom, 3), + round(left_bank - bottom, 3) + ), + na.rm = TRUE + ), # TODO: removing NAs might not be the right call, removing them might set has_relief to TRUE and says "there IS relief but no valid banks" + + # TODO: if both left AND right bank are NA, then we get an -Inf which we will just set to 0 (i.e. relief of 0) + max_relief = dplyr::case_when( + is.infinite(max_relief) ~ 0, + TRUE ~ max_relief + ) + ) %>% + dplyr::ungroup() %>% + dplyr::mutate( + # TODO: if a cross section does NOT have proper left/right banks, it by default can NOT have relief (i.e. has_relief = FALSE) + has_missing_banks = is.na(left_bank) | is.na(right_bank), + has_relief = dplyr::case_when( + (max_relief >= depth_threshold) & !has_missing_banks ~ TRUE, + TRUE ~ FALSE + ), + # has_relief = dplyr::case_when( + # max_relief >= depth_threshold ~ TRUE, + # TRUE ~ FALSE + # ), + pct_of_length_for_relief = pct_of_length_for_relief + ) + # dplyr::select(-has_missing_banks) + + # if detailed set of data is specified, return the relief dataframe with additional columns + if(detailed) { + relief <- + relief %>% + dplyr::group_by(dplyr::across(dplyr::any_of(c(crosswalk_id, "cs_id")))) %>% + dplyr::mutate( + max_relief = dplyr::case_when( + has_missing_banks ~ 0, + TRUE ~ max_relief + ) + ) %>% + dplyr::ungroup() %>% + dplyr::select( + dplyr::any_of(crosswalk_id), + cs_id, cs_lengthm, + has_relief, + max_relief, + pct_of_length_for_relief + ) + + return(relief) + + } + + # return dataframe with just hy_id/cs_id, and has_relief + relief <- + relief %>% + dplyr::select( + dplyr::any_of(crosswalk_id), + cs_id, + has_relief + ) + + return(relief) +} diff --git a/R/cs_validity_scores.R b/R/cs_validity_scores.R new file mode 100644 index 0000000..0f681dc --- /dev/null +++ b/R/cs_validity_scores.R @@ -0,0 +1,167 @@ +utils::globalVariables( + c(".", "hy_id", "cs_id", "pt_id", "Z", "middle_index", "point_type", "minZ", + "maxZ", "minZ_bottom", "maxZ_left_bank", "maxZ_right_bank", "valid_left_bank", + "valid_right_bank", "bottom", "left_bank", "right_bank", "valid_banks", + "relative_distance", "cs_lengthm", "default_middle", "has_relief", + "max_relief", "braid_id", "geometry", + + "comid", "fromnode", "tonode", + "tocomid", "divergence", "cycle_id", "node", "braid_vector", "totdasqkm", + "changed", "relative_position", "head_distance", "tail_distance", + "component_id", "cs_measure", "ds_distance", "along_channel", "euclid_dist", + "sinuosity", "points_per_cs", "Z_at_bottom", "lower_bound", "upper_bound", + "ge_bottom", "is_near_bottom", "pts_near_bottom", "total_valid_pts", + "pct_near_bottom", + "member_braids", "braid_members", "diff_pts", "is_extended", + "new_cs_id", "split_braid_ids", + + "braid_length", + "crosswalk_id", + "lengthm", + "check_z_values", + "geom", + "is_same_Z", + "is_multibraid", + "channel", "unique_count", + "left_bank_count", "right_bank_count", "channel_count", "bottom_count", + "terminalID", + "tmp_id", + "make_geoms_to_cut_plot", + "Y", "improved", "length_vector_col", "median", "min_ch", "new_validity_score", + "old_validity_score", "transects", "validity_score", "x", + "A", "DEPTH", "DINGMAN_R", "TW", "X", "X_end", "X_start", "Y_end", "Y_start", + "ahg_a", "ahg_index", "ahg_x", "ahg_y", + "bottom_end", "bottom_length", "bottom_midpoint", + "bottom_start", "cs_partition", "distance_interval", "fixed_TW", + "has_new_DEPTH", "has_new_TW", "ind", "is_dem_point", "left_max", + "left_start", "max_right_position", "new_DEPTH", "new_TW", "next_X_is_missing", "next_Y_is_missing", + "parabola", "partition", "prev_X_is_missing", + "prev_Y_is_missing", "right_start", "right_start_max", "start_or_end", "start_pt_id", + "cs_source", + "partition_lengthm", "left_fema_index", "right_fema_index", + "left_is_within_fema", "right_is_within_fema", "left_distance", "right_distance", + "new_cs_lengthm", "polygon_index", + "crosswalk_id", "extend_invalid_transects2", + "anchors", "deriv_type", "edge", "extension_distance", + "left_is_extended", "right_is_extended", "to_node", "verbose", + "toindid", "indid", "toid", "is", "internal_is_braided2" + ) +) + +#' Get a total count of the validity attributes +#' +#' @param x dataframe or sf dataframe with crosswalk_id, has_relief, and valid_banks columns +#' @param crosswalk_id character unique ID column +#' +#' @importFrom sf st_drop_geometry +#' @importFrom dplyr select any_of group_by across slice ungroup count +#' @return dataframe or tibble +#' @export +get_validity_tally <- function(x, crosswalk_id = NULL) { + # x <- classified_pts + # crosswalk_id = "hy_id" + + validity_tally <- + x %>% + sf::st_drop_geometry() %>% + dplyr::select(dplyr::any_of(crosswalk_id), cs_id, valid_banks, has_relief) %>% + dplyr::group_by(dplyr::across(dplyr::any_of(c(crosswalk_id, "cs_id")))) %>% + dplyr::slice(1) %>% + dplyr::ungroup() %>% + dplyr::count(valid_banks, has_relief) + + return(validity_tally) + +} + +#' Calculates a validity score column based on valid_banks and has_relief columns in a set of cross section points +#' +#' @param cs_to_validate dataframe +#' @param crosswalk_id character, ID column +#' @param validity_col_name name of the output validity score column +#' @importFrom sf st_drop_geometry +#' @importFrom dplyr group_by slice ungroup mutate select any_of +#' @return dataframe with added validity_score column +calc_validity_scores <- function(cs_to_validate, + crosswalk_id = NULL, + validity_col_name = "validity_score") { + + scores <- + cs_to_validate %>% + sf::st_drop_geometry() %>% + hydrofabric3D::add_tmp_id(x = crosswalk_id) %>% + dplyr::group_by(tmp_id) %>% + dplyr::slice(1) %>% + dplyr::ungroup() %>% + dplyr::mutate( + validity_score = valid_banks + has_relief + ) %>% + dplyr::select( + # hy_id, + dplyr::any_of(crosswalk_id), + cs_id, valid_banks, has_relief, validity_score) + + names(scores) <- c(crosswalk_id, "cs_id", "valid_banks", "has_relief", validity_col_name) + + return(scores) + +} + +#' Compare valid_banks and has_relief between 2 sets of cross section points +#' +#' @param cs_pts1 dataframe or sf dataframe of CS pts +#' @param cs_pts2 dataframe or sf dataframe of CS pts +#' @param crosswalk_id character unique ID +#' @importFrom dplyr rename filter any_of mutate select left_join case_when +#' @return dataframe, tibble +#' @export +compare_cs_validity <- function(cs_pts1, + cs_pts2, + crosswalk_id = NULL +) { + + # cs_pts1 <- x + # cs_pts2 <- new_cs_pts + + # validity_scores1$tmp_id[!validity_scores1$tmp_id %in% validity_scores2$tmp_id] + + validity_scores1 <- + cs_pts1 %>% + calc_validity_scores(crosswalk_id) %>% + add_tmp_id(crosswalk_id) %>% + dplyr::rename(score1 = validity_score) + + validity_scores2 <- + cs_pts2 %>% + calc_validity_scores(crosswalk_id) %>% + add_tmp_id(crosswalk_id) %>% + dplyr::rename(score2 = validity_score) + + # mark as "improved" for any hy_id/cs_ids that increased "validity score" after extending + check_for_improvement <- dplyr::left_join( + # OLD SCORES + validity_scores1 %>% + dplyr::filter( + tmp_id %in% unique(validity_scores2$tmp_id) + ) %>% + dplyr::select(dplyr::any_of(crosswalk_id), cs_id, score1), + + # NEW SCORES + validity_scores2 %>% + dplyr::select(dplyr::any_of(crosswalk_id), cs_id, score2), + by = c(crosswalk_id, "cs_id") + ) %>% + dplyr::mutate( + is_improved = dplyr::case_when( + score2 > score1 ~ TRUE, + TRUE ~ FALSE + ) + ) %>% + dplyr::select(dplyr::any_of(crosswalk_id), cs_id, + score1, score2, + is_improved + ) + + return(check_for_improvement) + +} diff --git a/R/transect_extenders.R b/R/transect_extenders.R deleted file mode 100644 index bc228fd..0000000 --- a/R/transect_extenders.R +++ /dev/null @@ -1,474 +0,0 @@ -utils::globalVariables( - c(".", "hy_id", "cs_id", "pt_id", "Z", "middle_index", "point_type", "minZ", - "maxZ", "minZ_bottom", "maxZ_left_bank", "maxZ_right_bank", "valid_left_bank", - "valid_right_bank", "bottom", "left_bank", "right_bank", "valid_banks", - "relative_distance", "cs_lengthm", "default_middle", "has_relief", - "max_relief", "braid_id", "geometry", - - "comid", "fromnode", "tonode", - "tocomid", "divergence", "cycle_id", "node", "braid_vector", "totdasqkm", - "changed", "relative_position", "head_distance", "tail_distance", - "component_id", "cs_measure", "ds_distance", "along_channel", "euclid_dist", - "sinuosity", "points_per_cs", "Z_at_bottom", "lower_bound", "upper_bound", - "ge_bottom", "is_near_bottom", "pts_near_bottom", "total_valid_pts", - "pct_near_bottom", - "member_braids", "braid_members", "diff_pts", "is_extended", - "new_cs_id", "split_braid_ids", - - "braid_length", - "crosswalk_id", - "lengthm", - "check_z_values", - "geom", - "is_same_Z", - "is_multibraid", - "channel", "unique_count", - "left_bank_count", "right_bank_count", "channel_count", "bottom_count", - "terminalID", - "tmp_id", - "make_geoms_to_cut_plot", - "Y", "improved", "length_vector_col", "median", "min_ch", "new_validity_score", - "old_validity_score", "transects", "validity_score", "x", - "A", "DEPTH", "DINGMAN_R", "TW", "X", "X_end", "X_start", "Y_end", "Y_start", - "ahg_a", "ahg_index", "ahg_x", "ahg_y", - "bottom_end", "bottom_length", "bottom_midpoint", - "bottom_start", "cs_partition", "distance_interval", "fixed_TW", - "has_new_DEPTH", "has_new_TW", "ind", "is_dem_point", "left_max", - "left_start", "max_right_position", "new_DEPTH", "new_TW", "next_X_is_missing", "next_Y_is_missing", - "parabola", "partition", "prev_X_is_missing", - "prev_Y_is_missing", "right_start", "right_start_max", "start_or_end", "start_pt_id", - "cs_source", - "partition_lengthm", "left_fema_index", "right_fema_index", - "left_is_within_fema", "right_is_within_fema", "left_distance", "right_distance", - "new_cs_lengthm", - "crosswalk_id", "extend_invalid_transects2", - "anchors", "deriv_type", "edge", "extension_distance", - "left_is_extended", "right_is_extended", "to_node", "verbose", - "toindid", "indid", "toid", "is", "internal_is_braided2" - ) -) - - -# TODO Deprecated, can be deleted soon (replaced by get_improved_cs_pts()) - -#' @title Extend a set of transects by a percentage based on banks and relief -#' Given a set of transect lines with valid_banks and has_relief columns (derived from DEM extracted cross section points), extend any transects -#' by a percentage of the transects length if the transect does NOT have valid banks (valid_banks == FALSE) OR it does NOT have relief (has_relief == FALSE). -#' @param transects_to_check sf linestrings, set of all transects in the network. Requires the following columns: "hy_id", "cs_id", "cs_lengthm" (length of geometry in meters), "valid_banks", and "has_relief" -#' @param net sf linestrings, flowline network that transects were generated from, requires "id" column (where "id" equals the "hy_id" columns in 'transects_to_check' and 'transects' ) -#' @param scale numeric, percentage of current transect line length to extend transects in transects_to_extend by. Default is 0.5 (50% of the transect length) -#' @param verbose logical, whether to show a progress bar and progress messages or not. Default is TRUE. -#' @return sf linestring dataframe containing the the original transects with extensions performed on transects without valid_banks OR has_relief (a "is_extended" flag denotes if the geometry was extended by "scale" % or not) -#' @importFrom geos as_geos_geometry geos_intersection geos_type geos_intersects -#' @importFrom sf st_geometry st_as_sf -#' @importFrom dplyr filter bind_rows -#' @noRd -#' @keywords internal -extend_invalid_transects <- function( - transects_to_check, - net, - scale = 0.5, - verbose = TRUE -) { - # ---------------------------------------- - # ---------------------------------------- - - # transects_to_check = transects - # net = net - # scale = scale - # verbose = verbose - - # ---------------------------------------- - # ---------------------------------------- - - # Create an "is_extended" flag to identify which transects were extended and updated - transects_to_check$is_extended <- FALSE - - # split input transects into invalid and valid sets (valid == has valid banks AND has relief) - invalid_transects <- dplyr::filter(transects_to_check, !valid_banks | !has_relief) - valid_transects <- dplyr::filter(transects_to_check, valid_banks & has_relief) - - # keep track of any transects that having missing values in either valid_banks/has_relief columns, - # these get added back to the updated data at the end - missing_bank_or_relief_data <- - transects_to_check %>% - dplyr::filter(is.na(valid_banks) | is.na(has_relief)) - - # TODO: Probably remove this - count_check <- nrow(valid_transects) + nrow(invalid_transects) == nrow(transects_to_check) - # count_check <- nrow(valid_transects) + nrow(invalid_transects) == nrow(transects_to_check) - nrow(missing_bank_or_relief_data) - - if(!count_check) { - warning(paste0(nrow(missing_bank_or_relief_data), " transects have NA values in either 'valid_banks' or 'has_relief' columns")) - # warning(paste0("Different number of transects after splitting data by 'valid_banks' and 'has_relief' columns, ", nrow(missing_bank_or_relief_data), " transects have NA values in either 'valid_banks' or 'has_relief' columns")) - # stop("Mismatch in number of points after splitting data by the 'valid_banks' and 'has_relief' columns, likely a missing value in either 'valid_banks' or 'has_relief' columns") - } - - if(verbose) { message(paste0("Extending ", nrow(invalid_transects), " transects without valid banks or relief by ", scale * 100, "%...")) } - - # Extend the transects by a scale % value - extended_trans <- extend_by_percent(invalid_transects, scale, "cs_lengthm") - - # Store the identifying information to use in for loop to subset data using IDs - fline_id_array <- net$id - hy_id_array <- extended_trans$hy_id - cs_id_array <- extended_trans$cs_id - - check_hy_id_array <- transects_to_check$hy_id - check_cs_id_array <- transects_to_check$cs_id - - # Convert extended transects to geos - extended_trans <- geos::as_geos_geometry(extended_trans) - transects_to_check_geos <- geos::as_geos_geometry(transects_to_check) - - # Convert the net object into a geos_geometry - net_geos <- geos::as_geos_geometry(net) - - # if(verbose) { message(paste0("Iterating through extended geometries and checking validity...")) } - - # Convert the original transect lines to geos_geometries and when - # a valid extension comes up in the below for loop, replace the old geometry with the newly extended one - geos_list <- geos::as_geos_geometry(invalid_transects$geom) - - # Preallocate vectors to store the "is_extended" flag and the new lengths after extensions: - # - if an extension is VALID (checked in the loop below), then - # set the "is_extended" flag to TRUE and update the cross section length - # to use the new extended length - extended_flag <- rep(FALSE, length(extended_trans)) - length_list <- invalid_transects$cs_lengthm - - make_progress <- make_progress_bar(verbose, length(extended_trans)) - - # loop through geometries that might need to be extended, try to extend, and then update - # the 'to_extend' values IF the extended transectr does NOT violate any intersection rules - for (i in 1:length(extended_trans)) { - - # Get the current transect, hy_id, cs_id - # current_trans <- extended_trans[i] - current_hy_id <- hy_id_array[i] - current_cs_id <- cs_id_array[i] - - # # Make sure that newly extended transect line only intersects its origin flowline at MOST 1 time - # # AND that the newly extended transect does NOT intersect with any previously computed transect lines - # fline_intersect <- geos::geos_intersection(extended_trans[i], current_fline) - - # Check that the extended transect lines only intersect a single flowline in the network only ONCE - intersects_with_flowlines <- geos::geos_intersection( - extended_trans[i], - net_geos[fline_id_array == current_hy_id] - ) - - # Check that newly extended cross section only intersects its origin flowline at MOST 1 time - # (This value will be a "MULTIPOINT" if it intersects more than once and will evaluate to FALSE) - intersects_flowline_only_once <- sum(geos::geos_type(intersects_with_flowlines) == "point") == 1 && - sum(geos::geos_type(intersects_with_flowlines) == "multipoint") == 0 - - if(!intersects_flowline_only_once) { - # message(" -> Skipping iteration because extended transect intersects flowline more than once") - next - } - - # Check that extended transect doesn't intersect with any of the original cross sections on this "hy_id" - is_intersecting_other_transects <- any(geos::geos_intersects( - extended_trans[i], - # AKA neighbor_transects - transects_to_check_geos[check_hy_id_array == current_hy_id & check_cs_id_array != current_cs_id] - ) - ) - - if (is_intersecting_other_transects) { - # message(" --> Skipping iteration because extended transect intersects another (UNEXTENDED) neighoring transect") - next - } - # Check that extended transect doesn't intersect with any of the NEWLY EXTENDED cross sections - is_intersecting_other_extended_transects <- any(geos::geos_intersects(extended_trans[i], extended_trans[-i])) - - if (is_intersecting_other_extended_transects) { - # message(" -----> Skipping iteration because extended transect intersects another (EXTENDED) neighoring transect") - next - } - - # If all of these conditions are TRUE then the currently extended transect will get inserted into "to_extend" - # - Newly extended transect intersects with its flowlines AT MOST 1 time - # - Newly extended transect does NOT intersect with any of the other NEWLY EXTENDED transect lines - # - Newly extended transect does NOT intersect with any of the ORIGINAL transect lines - if ( - # Check that newly extended cross section only intersects its origin flowline at MOST 1 time - # (This value will be a "MULTIPOINT" if it intersects more than once and will evaluate to FALSE) - intersects_flowline_only_once && - # geos::geos_type(fline_intersect) == "point" && - - # Check that extended transect doesn't intersect with any of the NEWLY EXTENDED cross sections - !is_intersecting_other_extended_transects && - # !any(geos::geos_intersects(extended_trans[i], extended_trans[-i])) && - - # Check that extended transect doesn't intersect with any of the original cross sections on this "hy_id" - !is_intersecting_other_transects - # !any(geos::geos_intersects(extended_trans[i], neighbor_transects)) - ) { - - # # Calculate the updated cross section length to align with the newly extended cross section for this row - updated_cs_length <- (length_list[i] * scale) + length_list[i] - # updated_cs_length <- (current_length * scale) + current_length - - # copy the current cross section length - length_list[i] <- updated_cs_length - # length_list <- vctrs::vec_c(length_list, updated_cs_length) - - # Update the transect geometry with the newly extended transect - geos_list[i] <- extended_trans[i] - # geos_list <- vctrs::vec_c(geos_list, extended_trans[i]) - - # Set the extended flag to TRUE for this transect - extended_flag[i] <- TRUE - # extended_flag <- vctrs::vec_c(extended_flag, TRUE) - - } - - make_progress() - } - - if(verbose) { message(paste0("Complete!")) } - - # Update the "invalid_transects" with new geos geometries ("geos_list") - sf::st_geometry(invalid_transects) <- sf::st_geometry(sf::st_as_sf(geos_list)) - - # update the "is_extended" flag and the cross section lengths to reflect any extensions - invalid_transects$is_extended <- extended_flag - invalid_transects$cs_lengthm <- length_list - - # Combine the valid_transects with the UPDATED invalid_transects (updated by attempting extension) to get the final output dataset - extended_transects <- dplyr::bind_rows( - valid_transects, - invalid_transects - ) - - # add back any transects that were missing banks/relief values - extended_transects <- dplyr::bind_rows( - extended_transects, - dplyr::select(missing_bank_or_relief_data, - dplyr::any_of(names(extended_transects))) - ) - - # check to make sure all unique hy_id/cs_id in the INPUT are in the OUTPUT, - # and raise an error if they're are missing hy_id/cs_ids - input_uids <- unique(hydrofabric3D::add_tmp_id(transects_to_check)$tmp_id) - output_uids <- unique(hydrofabric3D::add_tmp_id(extended_transects)$tmp_id) - - has_all_uids <- all(output_uids %in% input_uids) - - # throw an error if NOT all hy_id/cs_ids are the same in the input and output data - if(!has_all_uids) { - stop("Missing unique hy_id/cs_id from input transects in the output transects") - } - - return(extended_transects) -} - -# # @title Extend a set of transects by a percentage based on banks and relief -# # Given a set of transect lines with valid_banks and has_relief columns (derived from DEM extracted cross section points), extend any transects -# # by a percentage of the transects length if the transect does NOT have valid banks (valid_banks == FALSE) OR it does NOT have relief (has_relief == FALSE). -# # @param transects_to_check sf linestrings, set of all transects in the network. Requires the following columns: "hy_id", "cs_id", "cs_lengthm" (length of geometry in meters), "valid_banks", and "has_relief" -# # @param net sf linestrings, flowline network that transects were generated from, requires "id" column (where "id" equals the "hy_id" columns in 'transects_to_check' and 'transects' ) -# # @param crosswalk_id character, column name that connects features in transects to net -# # @param scale numeric, percentage of current transect line length to extend transects in transects_to_extend by. Default is 0.5 (50% of the transect length) -# # @param verbose logical, whether to show a progress bar and progress messages or not. Default is TRUE. -# # @return sf linestring dataframe containing the the original transects with extensions performed on transects without valid_banks OR has_relief (a "is_extended" flag denotes if the geometry was extended by "scale" % or not) -# # @importFrom geos as_geos_geometry geos_intersection geos_type geos_intersects -# # @importFrom sf st_geometry st_as_sf -# # @importFrom dplyr filter bind_rows -# # @importFrom hydroloom rename_geometry -# # @export -# extend_invalid_transects2 <- function( -# transects_to_check, -# net, -# crosswalk_id, -# scale = 0.5, -# verbose = TRUE -# ) { -# # ---------------------------------------- -# # ---------------------------------------- -# -# # transects_to_check = transects -# # net = net -# # crosswalk_id = "hy_id" -# # scale = scale -# # verbose = verbose -# -# # ---------------------------------------- -# # ---------------------------------------- -# -# # ---------------------------------------------------------------------------------- -# # ----------- Input checking ------ -# # ---------------------------------------------------------------------------------- -# if(!crosswalk_id %in% names(net)) { -# stop("crosswalk_id '", crosswalk_id, "' is not a column in 'net' input,\n", -# "Please provide a valid 'crosswalk_id' that crosswalks 'net' to 'transects_to_check'") -# } -# -# if(!crosswalk_id %in% names(transects_to_check)) { -# stop("crosswalk_id '", crosswalk_id, "' is not a column in 'transects_to_check' input,\n", -# "Please provide a valid 'crosswalk_id' that crosswalks the 'transects_to_check' to 'net'") -# } -# -# # set geometry coluimn name as beginning -# transects_to_check <- hydroloom::rename_geometry(transects_to_check, "geometry") -# -# # check for necessary columns -# req_cols <- c(crosswalk_id, "cs_id", "cs_lengthm", "valid_banks", "has_relief", "geometry") -# start_cols <- names(transects_to_check) -# -# if (!all(req_cols %in% start_cols)) { -# missing_cols <- req_cols[which(!req_cols %in% start_cols)] -# stop("'transects_to_check' is missing the following required columns: \n > ", -# paste0(missing_cols, collapse = "\n > ")) -# } -# -# # Create an "is_extended" flag to identify which transects were extended and updated -# transects_to_check$is_extended <- FALSE -# -# # # split input transects into invalid and valid sets (valid == has valid banks AND has relief) -# # invalid_transects <- dplyr::filter(transects_to_check, !valid_banks | !has_relief) -# # valid_transects <- dplyr::filter(transects_to_check, valid_banks & has_relief) -# -# # keep track of any transects that having missing values in either valid_banks/has_relief columns, -# # these get added back to the updated data at the end -# missing_bank_or_relief_data <- -# transects_to_check %>% -# dplyr::filter(is.na(valid_banks) | is.na(has_relief)) -# -# # TODO: Probably remove this -# count_check <- nrow(dplyr::filter(transects_to_check, valid_banks & has_relief)) + -# nrow(dplyr::filter(transects_to_check, !valid_banks | !has_relief)) == -# nrow(transects_to_check) - nrow(missing_bank_or_relief_data) -# -# # count_check <- nrow(valid_transects) + nrow(invalid_transects) == nrow(transects_to_check) -# # count_check <- nrow(valid_transects) + nrow(invalid_transects) == nrow(transects_to_check) - nrow(missing_bank_or_relief_data) -# -# if(!count_check) { -# warning(paste0(nrow(missing_bank_or_relief_data), " transects have NA values in either 'valid_banks' or 'has_relief' columns")) -# # warning(paste0("Different number of transects after splitting data by 'valid_banks' and 'has_relief' columns, ", nrow(missing_bank_or_relief_data), " transects have NA values in either 'valid_banks' or 'has_relief' columns")) -# # stop("Mismatch in number of points after splitting data by the 'valid_banks' and 'has_relief' columns, likely a missing value in either 'valid_banks' or 'has_relief' columns") -# } -# -# if(verbose) { message(paste0("Extending ", nrow(transects_to_check), -# " transects without valid banks or relief by ", -# scale * 100, "%...")) } -# -# # add distances to extend for the left and right side of a transect -# # for any of the the already "valid transects", we just set an extension distance of 0 -# # on both sides and these transects will be KEPT AS IS -# transects_to_check <- -# transects_to_check %>% -# dplyr::mutate( -# left_distance = dplyr::case_when( -# !valid_banks | !has_relief ~ (((scale)*(cs_lengthm)) / 2), -# TRUE ~ 0 -# ), -# right_distance = dplyr::case_when( -# !valid_banks | !has_relief ~ (((scale)*(cs_lengthm)) / 2), -# TRUE ~ 0 -# ) -# ) %>% -# dplyr::relocate(cs_lengthm, left_distance, right_distance) -# # dplyr::filter(left_distance == 0, right_distance == 0) -# -# -# # system.time({ -# -# extended_transects <- extend_transects_by_distances( -# transects = transects_to_check, -# flowlines = net, -# crosswalk_id = crosswalk_id, -# cs_id = "cs_id", -# grouping_id = crosswalk_id -# ) -# -# # }) -# -# # Set the is_extended flag based on if either the left OR the right side were extended -# extended_transects <- -# extended_transects %>% -# hydroloom::rename_geometry("geometry") %>% -# dplyr::mutate( -# is_extended = dplyr::case_when( -# left_is_extended | right_is_extended ~ TRUE, -# TRUE ~ FALSE -# ) -# ) %>% -# dplyr::select( -# -left_distance, -# -right_distance, -# -left_is_extended, -# -right_is_extended -# ) -# -# # add back any transects that were missing banks/relief values -# extended_transects <- dplyr::bind_rows( -# extended_transects, -# dplyr::select(missing_bank_or_relief_data, -# dplyr::any_of(names(extended_transects)) -# ) -# ) -# -# # Try and fix any transects that cross multiple -# is_multi_intersecting <- lengths(sf::st_intersects(extended_transects)) != 1 -# -# # replace any extended geoms that have multiple intersections with the original UNEXTENDED version of those same transects -# sf::st_geometry(extended_transects[is_multi_intersecting, ]) <- sf::st_geometry(transects_to_check[is_multi_intersecting, ]) -# -# # update the lengths and is_extended flag to align with the above replacement of geometries -# extended_transects[is_multi_intersecting, ]$cs_lengthm <- transects_to_check[is_multi_intersecting, ]$cs_lengthm -# extended_transects[is_multi_intersecting, ]$is_extended <- transects_to_check[is_multi_intersecting, ]$is_extended -# -# # TODO: -# # TODO: this won't work as expected currently, in case any transects were removed by the self intersection removal above -# # TODO: if any were removed, then "transects_to_check" is not guarenteed to have the same indices so the below logical\ -# # TODO: won't work as desired -# is_multi_intersecting_flowlines <- lengths(sf::st_intersects(extended_transects, net)) != 1 -# -# # replace any extended geoms that have multiple intersections with any flowlines (replacing with the original set of transects) -# sf::st_geometry(extended_transects[is_multi_intersecting_flowlines, ]) <- sf::st_geometry(transects_to_check[is_multi_intersecting_flowlines, ]) -# -# # update the lengths and is_extended flag to align with the above replacement of geometries -# extended_transects[is_multi_intersecting_flowlines, ]$cs_lengthm <- transects_to_check[is_multi_intersecting_flowlines, ]$cs_lengthm -# extended_transects[is_multi_intersecting_flowlines, ]$is_extended <- transects_to_check[is_multi_intersecting_flowlines, ]$is_extended -# -# # remove transects that intersect with OTHER TRANSECTS -# extended_transects <- -# extended_transects[lengths(sf::st_intersects(extended_transects)) == 1, ] %>% -# dplyr::group_by(dplyr::across(dplyr::any_of(crosswalk_id))) %>% -# # dplyr::group_by(hy_id) -# # dplyr::mutate(cs_id = 1:dplyr::n()) %>% -# dplyr::ungroup() -# -# # remove transects that intersect multiple flowlines -# extended_transects <- -# extended_transects[lengths(sf::st_intersects(extended_transects, net)) == 1, ] %>% -# dplyr::group_by(dplyr::across(dplyr::any_of(crosswalk_id))) %>% -# # dplyr::mutate(cs_id = 1:dplyr::n()) %>% -# dplyr::ungroup() -# -# # check to make sure all unique hy_id/cs_id in the INPUT are in the OUTPUT, -# # and raise an error if they're are missing hy_id/cs_ids -# input_uids <- unique(hydrofabric3D::add_tmp_id(transects_to_check)$tmp_id) -# output_uids <- unique(hydrofabric3D::add_tmp_id(extended_transects)$tmp_id) -# -# # missing_inputs <- -# # transects_to_check %>% -# # dplyr::filter(tmp_id %in% input_uids[!input_uids %in% output_uids]) -# # missing_outputs <- -# # extended_transects2 %>% -# # dplyr::filter(tmp_id %in% output_uids[!input_uids %in% output_uids]) -# # mapview::mapview(missing_inputs, color = "red") + -# # mapview::mapview(missing_outputs, color = "green") -# -# has_all_uids <- all(output_uids %in% input_uids) -# -# # throw an error if NOT all hy_id/cs_ids are the same in the input and output data -# if(!has_all_uids) { -# stop("Missing unique hy_id/cs_id from input transects in the output transects") -# } -# -# return(extended_transects) -# } \ No newline at end of file diff --git a/R/transect_extenders_with_network.R b/R/transect_extenders_with_network.R index ed41c80..6d02917 100644 --- a/R/transect_extenders_with_network.R +++ b/R/transect_extenders_with_network.R @@ -2560,6 +2560,223 @@ extend_transects_by_length <- function( # TODO: DELETE SOON +# TODO Deprecated, can be deleted soon (replaced by get_improved_cs_pts(), and then that was replaced by flagging methods) + +#' @title Extend a set of transects by a percentage based on banks and relief +#' Given a set of transect lines with valid_banks and has_relief columns (derived from DEM extracted cross section points), extend any transects +#' by a percentage of the transects length if the transect does NOT have valid banks (valid_banks == FALSE) OR it does NOT have relief (has_relief == FALSE). +#' @param transects_to_check sf linestrings, set of all transects in the network. Requires the following columns: "hy_id", "cs_id", "cs_lengthm" (length of geometry in meters), "valid_banks", and "has_relief" +#' @param net sf linestrings, flowline network that transects were generated from, requires "id" column (where "id" equals the "hy_id" columns in 'transects_to_check' and 'transects' ) +#' @param scale numeric, percentage of current transect line length to extend transects in transects_to_extend by. Default is 0.5 (50% of the transect length) +#' @param verbose logical, whether to show a progress bar and progress messages or not. Default is TRUE. +#' @return sf linestring dataframe containing the the original transects with extensions performed on transects without valid_banks OR has_relief (a "is_extended" flag denotes if the geometry was extended by "scale" % or not) +#' @importFrom geos as_geos_geometry geos_intersection geos_type geos_intersects +#' @importFrom sf st_geometry st_as_sf +#' @importFrom dplyr filter bind_rows +#' @noRd +#' @keywords internal +extend_invalid_transects <- function( + transects_to_check, + net, + scale = 0.5, + verbose = TRUE +) { + # ---------------------------------------- + # ---------------------------------------- + + # transects_to_check = transects + # net = net + # scale = scale + # verbose = verbose + + # ---------------------------------------- + # ---------------------------------------- + + # Create an "is_extended" flag to identify which transects were extended and updated + transects_to_check$is_extended <- FALSE + + # split input transects into invalid and valid sets (valid == has valid banks AND has relief) + invalid_transects <- dplyr::filter(transects_to_check, !valid_banks | !has_relief) + valid_transects <- dplyr::filter(transects_to_check, valid_banks & has_relief) + + # keep track of any transects that having missing values in either valid_banks/has_relief columns, + # these get added back to the updated data at the end + missing_bank_or_relief_data <- + transects_to_check %>% + dplyr::filter(is.na(valid_banks) | is.na(has_relief)) + + # TODO: Probably remove this + count_check <- nrow(valid_transects) + nrow(invalid_transects) == nrow(transects_to_check) + # count_check <- nrow(valid_transects) + nrow(invalid_transects) == nrow(transects_to_check) - nrow(missing_bank_or_relief_data) + + if(!count_check) { + warning(paste0(nrow(missing_bank_or_relief_data), " transects have NA values in either 'valid_banks' or 'has_relief' columns")) + # warning(paste0("Different number of transects after splitting data by 'valid_banks' and 'has_relief' columns, ", nrow(missing_bank_or_relief_data), " transects have NA values in either 'valid_banks' or 'has_relief' columns")) + # stop("Mismatch in number of points after splitting data by the 'valid_banks' and 'has_relief' columns, likely a missing value in either 'valid_banks' or 'has_relief' columns") + } + + if(verbose) { message(paste0("Extending ", nrow(invalid_transects), " transects without valid banks or relief by ", scale * 100, "%...")) } + + # Extend the transects by a scale % value + extended_trans <- extend_by_percent(invalid_transects, scale, "cs_lengthm") + + # Store the identifying information to use in for loop to subset data using IDs + fline_id_array <- net$id + hy_id_array <- extended_trans$hy_id + cs_id_array <- extended_trans$cs_id + + check_hy_id_array <- transects_to_check$hy_id + check_cs_id_array <- transects_to_check$cs_id + + # Convert extended transects to geos + extended_trans <- geos::as_geos_geometry(extended_trans) + transects_to_check_geos <- geos::as_geos_geometry(transects_to_check) + + # Convert the net object into a geos_geometry + net_geos <- geos::as_geos_geometry(net) + + # if(verbose) { message(paste0("Iterating through extended geometries and checking validity...")) } + + # Convert the original transect lines to geos_geometries and when + # a valid extension comes up in the below for loop, replace the old geometry with the newly extended one + geos_list <- geos::as_geos_geometry(invalid_transects$geom) + + # Preallocate vectors to store the "is_extended" flag and the new lengths after extensions: + # - if an extension is VALID (checked in the loop below), then + # set the "is_extended" flag to TRUE and update the cross section length + # to use the new extended length + extended_flag <- rep(FALSE, length(extended_trans)) + length_list <- invalid_transects$cs_lengthm + + make_progress <- make_progress_bar(verbose, length(extended_trans)) + + # loop through geometries that might need to be extended, try to extend, and then update + # the 'to_extend' values IF the extended transectr does NOT violate any intersection rules + for (i in 1:length(extended_trans)) { + + # Get the current transect, hy_id, cs_id + # current_trans <- extended_trans[i] + current_hy_id <- hy_id_array[i] + current_cs_id <- cs_id_array[i] + + # # Make sure that newly extended transect line only intersects its origin flowline at MOST 1 time + # # AND that the newly extended transect does NOT intersect with any previously computed transect lines + # fline_intersect <- geos::geos_intersection(extended_trans[i], current_fline) + + # Check that the extended transect lines only intersect a single flowline in the network only ONCE + intersects_with_flowlines <- geos::geos_intersection( + extended_trans[i], + net_geos[fline_id_array == current_hy_id] + ) + + # Check that newly extended cross section only intersects its origin flowline at MOST 1 time + # (This value will be a "MULTIPOINT" if it intersects more than once and will evaluate to FALSE) + intersects_flowline_only_once <- sum(geos::geos_type(intersects_with_flowlines) == "point") == 1 && + sum(geos::geos_type(intersects_with_flowlines) == "multipoint") == 0 + + if(!intersects_flowline_only_once) { + # message(" -> Skipping iteration because extended transect intersects flowline more than once") + next + } + + # Check that extended transect doesn't intersect with any of the original cross sections on this "hy_id" + is_intersecting_other_transects <- any(geos::geos_intersects( + extended_trans[i], + # AKA neighbor_transects + transects_to_check_geos[check_hy_id_array == current_hy_id & check_cs_id_array != current_cs_id] + ) + ) + + if (is_intersecting_other_transects) { + # message(" --> Skipping iteration because extended transect intersects another (UNEXTENDED) neighoring transect") + next + } + # Check that extended transect doesn't intersect with any of the NEWLY EXTENDED cross sections + is_intersecting_other_extended_transects <- any(geos::geos_intersects(extended_trans[i], extended_trans[-i])) + + if (is_intersecting_other_extended_transects) { + # message(" -----> Skipping iteration because extended transect intersects another (EXTENDED) neighoring transect") + next + } + + # If all of these conditions are TRUE then the currently extended transect will get inserted into "to_extend" + # - Newly extended transect intersects with its flowlines AT MOST 1 time + # - Newly extended transect does NOT intersect with any of the other NEWLY EXTENDED transect lines + # - Newly extended transect does NOT intersect with any of the ORIGINAL transect lines + if ( + # Check that newly extended cross section only intersects its origin flowline at MOST 1 time + # (This value will be a "MULTIPOINT" if it intersects more than once and will evaluate to FALSE) + intersects_flowline_only_once && + # geos::geos_type(fline_intersect) == "point" && + + # Check that extended transect doesn't intersect with any of the NEWLY EXTENDED cross sections + !is_intersecting_other_extended_transects && + # !any(geos::geos_intersects(extended_trans[i], extended_trans[-i])) && + + # Check that extended transect doesn't intersect with any of the original cross sections on this "hy_id" + !is_intersecting_other_transects + # !any(geos::geos_intersects(extended_trans[i], neighbor_transects)) + ) { + + # # Calculate the updated cross section length to align with the newly extended cross section for this row + updated_cs_length <- (length_list[i] * scale) + length_list[i] + # updated_cs_length <- (current_length * scale) + current_length + + # copy the current cross section length + length_list[i] <- updated_cs_length + # length_list <- vctrs::vec_c(length_list, updated_cs_length) + + # Update the transect geometry with the newly extended transect + geos_list[i] <- extended_trans[i] + # geos_list <- vctrs::vec_c(geos_list, extended_trans[i]) + + # Set the extended flag to TRUE for this transect + extended_flag[i] <- TRUE + # extended_flag <- vctrs::vec_c(extended_flag, TRUE) + + } + + make_progress() + } + + if(verbose) { message(paste0("Complete!")) } + + # Update the "invalid_transects" with new geos geometries ("geos_list") + sf::st_geometry(invalid_transects) <- sf::st_geometry(sf::st_as_sf(geos_list)) + + # update the "is_extended" flag and the cross section lengths to reflect any extensions + invalid_transects$is_extended <- extended_flag + invalid_transects$cs_lengthm <- length_list + + # Combine the valid_transects with the UPDATED invalid_transects (updated by attempting extension) to get the final output dataset + extended_transects <- dplyr::bind_rows( + valid_transects, + invalid_transects + ) + + # add back any transects that were missing banks/relief values + extended_transects <- dplyr::bind_rows( + extended_transects, + dplyr::select(missing_bank_or_relief_data, + dplyr::any_of(names(extended_transects))) + ) + + # check to make sure all unique hy_id/cs_id in the INPUT are in the OUTPUT, + # and raise an error if they're are missing hy_id/cs_ids + input_uids <- unique(hydrofabric3D::add_tmp_id(transects_to_check)$tmp_id) + output_uids <- unique(hydrofabric3D::add_tmp_id(extended_transects)$tmp_id) + + has_all_uids <- all(output_uids %in% input_uids) + + # throw an error if NOT all hy_id/cs_ids are the same in the input and output data + if(!has_all_uids) { + stop("Missing unique hy_id/cs_id from input transects in the output transects") + } + + return(extended_transects) +} + + # # @title Extend a set of transects by a percentage based on banks and relief # # Given a set of transect lines with valid_banks and has_relief columns (derived from DEM extracted cross section points), extend any transects # # by a percentage of the transects length if the transect does NOT have valid banks (valid_banks == FALSE) OR it does NOT have relief (has_relief == FALSE). diff --git a/R/transect_to_polygon_extensions.R b/R/transect_to_polygon_extender.R similarity index 89% rename from R/transect_to_polygon_extensions.R rename to R/transect_to_polygon_extender.R index fc019c3..b704eba 100644 --- a/R/transect_to_polygon_extensions.R +++ b/R/transect_to_polygon_extender.R @@ -173,7 +173,7 @@ extend_transects_to_polygons2 <- function( polygons_subset <- subset_polygons_in_transects(transect_lines, polygons) # get a dataframe that tells you how far to extend each line in either direction - extensions_by_id <- get_extensions_by_id(transect_subset, polygons_subset, crosswalk_id, max_extension_distance) + extensions_by_id <- get_transect_extension_distances_to_polygons(transect_subset, polygons_subset, crosswalk_id, max_extension_distance) # pid <- "wb-1002059" # pcsid <- 4 @@ -258,11 +258,13 @@ extend_transects_to_polygons2 <- function( #' @param reindex_cs_ids logical, whether to reindex the cs_ids to ensure each crosswalk_id has cs_ids of 1-number of transects. Default is TRUE, which makes sure if any cross sections were removed from a crosswalk_id, #' then the cs_ids are renumbered so there are no gaps between cs_ids within a crosswalk_id. #' Setting this to FALSE will make sure crosswalk_id/cs_ids remain untouched as they were given in the input data. +#' @param verbose logical, whether to output messages or not. Default is TRUE, and messages will output +#' #' @return sf linestring, with extended transect lines #' @importFrom rmapshaper ms_simplify #' @importFrom geos as_geos_geometry geos_intersects_matrix geos_simplify_preserve_topology geos_within_matrix geos_empty geos_point_start geos_point_end #' @importFrom sf st_as_sf st_cast st_segmentize st_length st_drop_geometry st_geometry -#' @importFrom dplyr mutate case_when select left_join relocate n any_of +#' @importFrom dplyr mutate case_when select left_join relocate n any_of group_by ungroup arrange across #' @importFrom lwgeom st_linesubstring #' @importFrom wk wk_crs #' @importFrom vctrs vec_c @@ -271,12 +273,13 @@ extend_transects_to_polygons <- function( transect_lines, polygons, flowlines, - crosswalk_id = 'hy_id', + crosswalk_id = NULL, grouping_id = 'mainstem', max_extension_distance = 3000, tolerance = NULL, keep_lengths = FALSE, - reindex_cs_ids = TRUE + reindex_cs_ids = TRUE, + verbose = TRUE ) { # ---------------------------------------------------------------------------------- # ----------- TEST DATA ------ @@ -356,6 +359,9 @@ extend_transects_to_polygons <- function( # ) # } + # standardize geometry name + flowlines <- hydroloom::rename_geometry(flowlines, "geometry") + is_valid_flowlines <- validate_df(flowlines, c(crosswalk_id, grouping_id, "geometry"), "flowlines") @@ -377,19 +383,20 @@ extend_transects_to_polygons <- function( sf::st_drop_geometry() %>% dplyr::select(dplyr::any_of(crosswalk_id), cs_id, initial_length) - # # preserve the initial ordering - # starting_order <- get_transect_initial_order(transect_lines, crosswalk_id) + # # # preserve the initial ordering + # starting_order <- get_transect_initial_order(transect_lines, crosswalk_id) # get only the relevent polygons/transects transect_subset <- subset_transects_in_polygons(transect_lines, polygons) polygons_subset <- subset_polygons_in_transects(transect_lines, polygons) # get a dataframe that tells you how far to extend each line in either direction - extensions_by_id <- get_extensions_by_id(transects = transect_subset, + extensions_by_id <- get_transect_extension_distances_to_polygons(transects = transect_subset, polygons = polygons_subset, crosswalk_id = crosswalk_id, max_extension_distance = max_extension_distance, - tolerance = tolerance + tolerance = tolerance, + verbose = verbose ) # pid <- "wb-1002059" @@ -528,6 +535,12 @@ extend_transects_to_polygons <- function( # ---- Final reorder ---- # ---------------------------------- + # transect_lines <- + # transect_lines %>% + # dplyr::group_by(dplyr::across(dplyr::any_of(c(crosswalk_id, "cs_id")))) %>% + # dplyr::arrange(cs_id, .by_group = TRUE) %>% + # dplyr::ungroup() + # transect_lines <- # transect_lines %>% # dplyr::left_join( @@ -924,6 +937,128 @@ partition_transects_for_extension <- function(transects, polygons_subset, dir = #' @param crosswalk_id character #' @param max_extension_distance numeric #' @param tolerance A minimum distance to use for simplification on polygons. Use a higher value for more simplification on the polygons. Default is NULL which will apply no simplification to polygons. +#' @param verbose logical, whether to output messages or not. Default is TRUE, and messages will output +#' +#' @return data.frame or tibble +#' @export +get_transect_extension_distances_to_polygons <- function( + transects, + polygons, + crosswalk_id, + max_extension_distance, + tolerance = NULL, + verbose = TRUE +) { + + # split transects into left and right partitions + left_partition <- partition_transects_for_extension( + transects, + polygons, + dir = "left" + ) %>% + wrangle_paritioned_transects( + dir = "left", + crosswalk_id = crosswalk_id + ) + + right_partition <- partition_transects_for_extension( + transects, + polygons, + dir = "right" + ) %>% + wrangle_paritioned_transects( + dir = "right", + crosswalk_id = crosswalk_id + ) + + # Convert the polygon to a MULTILINESTRING geometry for checking extension distances + mls <- sf_polygons_to_geos_multilinestrings(polygons, tolerance) + # mls <- sf_polygons_to_geos_multilinestrings(polygons, 100) + + message_if_verbose("Generating left side transects distances to polygons... ", verbose = verbose) + + left_distances <- calc_extension_distances( + geos_geoms = geos::as_geos_geometry(left_partition), + ids = left_partition$tmp_id, + lines_to_cut = mls, + lines_to_cut_indices = left_partition$polygon_index, + direction = "head", + max_extension_distance = max_extension_distance + ) + + message_if_verbose("Generating right side transects distances to polygons... ", verbose = verbose) + + right_distances <- calc_extension_distances( + geos_geoms = geos::as_geos_geometry(right_partition), + ids = right_partition$tmp_id, + lines_to_cut = mls, + lines_to_cut_indices = right_partition$polygon_index, + direction = "tail", + max_extension_distance = max_extension_distance + ) + + left_partition$left_distance <- left_distances + right_partition$right_distance <- right_distances + + # Distance to extend LEFT and/or RIGHT for each hy_id/cs_id + extensions_by_id <- dplyr::left_join( + sf::st_drop_geometry( + dplyr::select(left_partition, + dplyr::any_of(crosswalk_id), + cs_id, + left_distance + ) + ), + sf::st_drop_geometry( + dplyr::select(right_partition, + dplyr::any_of(crosswalk_id), + cs_id, + right_distance + ) + ), + by = c(crosswalk_id, "cs_id") + ) + + # add any missing crosswalk_id/cs_id that DID NOT have any extension distance w/ values of 0 + extensions_by_id <- dplyr::bind_rows( + extensions_by_id, + transects %>% + sf::st_drop_geometry() %>% + hydrofabric3D::add_tmp_id(x = crosswalk_id) %>% + dplyr::filter(!tmp_id %in% hydrofabric3D::add_tmp_id(extensions_by_id, x = crosswalk_id)$tmp_id) %>% + dplyr::mutate( + left_distance = 0, + right_distance = 0 + ) %>% + dplyr::select( + dplyr::any_of(crosswalk_id), cs_id, + left_distance, right_distance + ) + ) + # # TODO: i want to make any NA left/right distances set to 0 instead of having both NAs AND 0 values, need to fix tests for this function first + # extensions_by_id <- + # extensions_by_id %>% + # dplyr::mutate( + # left_distance = dplyr::case_when( + # is.na(left_distance) ~ 0, + # TRUE ~ left_distance + # ), + # right_distance = dplyr::case_when( + # is.na(right_distance) ~ 0, + # TRUE ~ right_distance + # ) + # ) + + return(extensions_by_id) +} + +#' Get the left and right extension distances for a set of transects out to a set of polygons +#' Deprecated, new name is get_transect_extension_distances_to_polygons() +#' @param transects sf linestring dataframe +#' @param polygons sf polygon dataframe +#' @param crosswalk_id character +#' @param max_extension_distance numeric +#' @param tolerance A minimum distance to use for simplification on polygons. Use a higher value for more simplification on the polygons. Default is NULL which will apply no simplification to polygons. #' #' @return data.frame or tibble #' @noRd @@ -1112,73 +1247,167 @@ get_line_node_pts <- function( #' Trim a set of transects to the bounds of polygons #' #' @param transect_lines sf dataframe -#' @param crosswalk_id character unique ID #' @param flowlines sf dataframe #' @param polygons sf dataframe -#' @param polygon_id character, unique ID column in polygons -#' @importFrom dplyr filter select any_of mutate bind_rows +#' @param crosswalk_id character unique ID +#' @importFrom dplyr filter select any_of mutate bind_rows n slice_max group_by ungroup across distinct #' @importFrom rmapshaper ms_explode #' @importFrom sf st_intersection +#' @importFrom hydroloom rename_geometry #' @return sf dataframe #' @export trim_transects_by_polygons <- function(transect_lines, - crosswalk_id = NULL, flowlines, - polygons, - polygon_id = NULL) { + polygons, + crosswalk_id = NULL + ) { + + # transect_lines = ext_trans + # flowlines = sf::read_sf(testthat::test_path("testdata", "flowlines.gpkg")) %>% + # dplyr::slice(c(9, 10)) + # polygons = small_polygons + # crosswalk_id = CROSSWALK_ID + # transect_lines = ext_trans + # # flowlines = flowlines, + # flowlines = sf::read_sf(testthat::test_path("testdata", "flowlines.gpkg")) %>% + # dplyr::slice(c(9, 10)) + # polygons = small_polygons + # crosswalk_id = CROSSWALK_ID + + # transect_lines = ext_trans + # flowlines = flowlines + # # flowlines = sf::read_sf(testthat::test_path("testdata", "flowlines.gpkg")) %>% + # # dplyr::slice(c(9, 10)), + # polygons = small_polygons + # crosswalk_id = CROSSWALK_ID + + # TODO: this is a hacky way of doing this, can definitily be improved so user wont get an error if there transects/flowlines have 'polygon_id' as the crosswalk_id + # temporary ID to use for keeping track of polygon IDs + POLYGON_ID <- "polygon_id" + + if(crosswalk_id == POLYGON_ID) { + stop("Invalid crosswalk_id value of '", crosswalk_id, "'. 'crosswalk_id' can NOT be 'polygon_id'. + \nTry renaming your crosswalk_id column in the 'transect_lines' and 'flowlines' input datasets to a name that is not 'polygon_id'" + ) + } + + # standardize geometry name and drop polygon_id if its a column in any of the data + transect_lines <- + transect_lines %>% + hydroloom::rename_geometry("geometry") %>% + dplyr::select(!dplyr::any_of(POLYGON_ID)) + + flowlines <- + flowlines %>% + hydroloom::rename_geometry("geometry") %>% + dplyr::select(!dplyr::any_of(POLYGON_ID)) + + polygons <- + polygons %>% + # # sf::st_union() %>% + # sf::st_difference() %>% + # rmapshaper::ms_explode() %>% + # sf::st_as_sf() %>% + cluster_dissolve() %>% + hydroloom::rename_geometry("geometry") %>% + dplyr::mutate( + polygon_id = 1:dplyr::n() + ) %>% + dplyr::select(polygon_id, geometry) + # plot(polygons$geom) + # plot(polygons2$geometry) + + # Validate the transects / flowlines input datasets + is_valid_transects <- validate_df(transect_lines, + c(crosswalk_id, "cs_id", "geometry"), + "transect_lines") + + is_valid_flowlines <- validate_df(flowlines, + c(crosswalk_id, "geometry"), + "flowlines") + + # Figure out which transects intersect the polygons and which do NOT split_transects <- add_intersects_ids( transect_lines, polygons, - polygon_id + POLYGON_ID ) - trimmed_trans <- - split_transects %>% - dplyr::filter(!is.na(.data[[polygon_id]])) %>% - sf::st_intersection(polygons) %>% - dplyr::select(!dplyr::any_of(c(paste0(polygon_id, ".1")))) %>% - rmapshaper::ms_explode() %>% - add_intersects_ids( - flowlines %>% - dplyr::mutate( - new_id = .data[[crosswalk_id]] - ), - "new_id" - ) %>% - dplyr::filter(!is.na(new_id)) %>% - dplyr::select(-new_id) + suppressWarnings({ + + # trim any of the transects that DO hit the polygons + trimmed_trans <- + split_transects %>% + dplyr::filter(!is.na(.data[[POLYGON_ID]])) %>% + dplyr::select(-dplyr::any_of(POLYGON_ID)) %>% + sf::st_intersection(polygons) %>% + dplyr::select(!dplyr::any_of(c(POLYGON_ID, paste0(POLYGON_ID, ".1")))) %>% + rmapshaper::ms_explode() %>% + add_intersects_ids( + flowlines %>% + dplyr::mutate( + new_id = .data[[crosswalk_id]] + ), + "new_id" + ) %>% + # dplyr::mutate(new_id2 = strsplit(new_id, ", ")) %>% + dplyr::filter(!is.na(new_id), .data[[crosswalk_id]] %in% strsplit(new_id, ", ")) %>% + # dplyr::filter(!is.na(new_id), .data[[crosswalk_id]] == new_id ) %>% + # dplyr::filter(!is.na(new_id)) %>% + dplyr::select(-new_id) + + # make sure all of the intersection transects are distinct and then select the longest of the transects if there are any duplicates + trimmed_trans <- + trimmed_trans %>% + dplyr::distinct() %>% + # distinct.sf() %>% + dplyr::group_by(dplyr::across(dplyr::any_of(c(crosswalk_id, "cs_id")))) %>% + add_length_col("length_check") %>% + dplyr::slice_max(length_check, with_ties = FALSE) %>% + dplyr::ungroup() %>% + dplyr::select(-length_check) + + + }) - remerged <- + merged_transects <- split_transects %>% - dplyr::filter(is.na(.data[[polygon_id]])) %>% + dplyr::filter(is.na(.data[[POLYGON_ID]])) %>% dplyr::bind_rows(trimmed_trans) %>% - dplyr::select(!dplyr::any_of(c(polygon_id))) + dplyr::select(!dplyr::any_of(c(POLYGON_ID))) - # hydrofabric3D::get_unique_tmp_ids(remerged, crosswalk_id) - - # has_same_uids(remerged, transect_lines, crosswalk_id = crosswalk_id) + # has_same_uids(merged_transects, transect_lines, crosswalk_id = crosswalk_id) missing_trans <- transect_lines %>% add_tmp_id(crosswalk_id) %>% - dplyr::filter(!tmp_id %in% get_unique_tmp_ids(remerged, crosswalk_id)) %>% + dplyr::filter(!tmp_id %in% get_unique_tmp_ids(merged_transects, crosswalk_id)) %>% dplyr::select(-tmp_id) + # plot(merged_transects$geometry, add =F) + # plot(flowlines$geometry, add = T) + # TODO: - # add any missing transects BACK to the remerged set, - # these transects couldnt be properly dealt with so they will just be thrown back in - remerged <- - remerged %>% + # add any missing transects BACK to the merged_transects set, + # these transects COULDN'T be properly dealt with so they will just be thrown back in as is + merged_transects <- + merged_transects %>% dplyr::bind_rows(missing_trans) - has_all_same_uids <- has_same_uids(remerged, transect_lines, crosswalk_id = crosswalk_id) + has_all_same_uids <- has_same_uids(merged_transects, transect_lines, crosswalk_id = crosswalk_id) if(!has_all_same_uids) { warning("Not all unique crosswalk_id/cs_ids were retained from input...") } - return(remerged) + # put transects back in proper order + merged_transects <- + merged_transects %>% + cs_arrange(crosswalk_id = crosswalk_id, order_by = "cs_id") + + + return(merged_transects) } #' Add an ID column from 'y' if it intersects with 'x' diff --git a/R/transects.R b/R/transects.R index e86fc26..4258eae 100644 --- a/R/transects.R +++ b/R/transects.R @@ -1511,33 +1511,6 @@ force_min_npts_per_flowlines <- function(lines) { } -#' Adds a logical 'is_outlet' flag to a set of transects identifying the most downstream transect -#' -#' @param x sf dataframe linestrings -#' @param crosswalk_id character -#' @importFrom dplyr group_by across any_of mutate ungroup row_number -#' -#' @return sf dataframe of transects with added is_outlet logical column -#' @export -add_is_outlet_flag <- function(x, crosswalk_id = NULL) { - # x <- transects - - is_valid_df <- validate_df(x, c(crosswalk_id, "cs_measure"), "x") - # is_valid_df <- validate_df(x, c(crosswalk_id, "cs_id", "cs_measure"), "x") - - x <- - x %>% - dplyr::group_by(dplyr::across(dplyr::any_of(crosswalk_id))) %>% - dplyr::mutate( - is_outlet = which.max(cs_measure) == dplyr::row_number() - # is_outlet = cs_id[which.max(cs_measure)] == cs_id - ) %>% - dplyr::ungroup() - - return(x) - -} - #' Remove transect lines that intersect with more than one flowline #' #' @param transects sf linestring dataframe of transect lines @@ -1564,9 +1537,6 @@ rm_multiflowline_intersections <- function(transects, flowlines) { #' @export rm_multi_intersects <- function(x) { - # x <- tmp_trans - # x - while (any(lengths(sf::st_intersects(x)) > 1)) { intersect_counts <- lengths(sf::st_intersects(x)) @@ -1576,8 +1546,6 @@ rm_multi_intersects <- function(x) { # message("# intersects > 1 intersect_counts: ", sum(intersect_counts > 1)) # message("Removing ", length(max_crossings), " from x") # message(nrow(x), " rows in x remain...\n") - # mapview::mapview(x, color = "red") + - # mapview::mapview(tmp_trans, color = "green") } diff --git a/R/utils.R b/R/utils.R index 7d4e07a..ffe0269 100644 --- a/R/utils.R +++ b/R/utils.R @@ -200,154 +200,28 @@ add_length_col <- function(x, } -#' Add an extension_distance column based off valid_banks and has_relief attributes +#' Adds a logical 'is_outlet' flag to a set of transects identifying the most downstream transect #' -#' @param transects dataframe, tibble or sf dataframe with length_col, "valid_banks", and "has_relief" columns -#' @param scale numeric, percentage of current transect line length to extend transects in transects_to_extend by. Default is 0.5 (50% of the transect length) -#' @param length_col character, name of the column with the numeric cross section length -#' -#' @return dataframe, tibble or sf dataframe -#' @importFrom dplyr mutate case_when -add_attribute_based_extension_distances <- function(transects, - scale = 0.5, - length_col = NULL -) { - # transects <- - # transects %>% - # dplyr::mutate( - # has_relief = TRUE, - # valid_banks = FALSE - # ) - - # scale = 0.5 - # length_col = NULL - - - if(!inherits(scale, "numeric")) { - stop("Invalid 'scale' value, scale must be an integer or float numeric value") - } - - if (scale < 0) { - stop("Invalid 'scale' value, scale must be numeric value greater than or equal to 0") - } - - if(is.null(length_col)) { - stop("Missing 'length_col' character input indicating which column in 'transects' is a numeric vector of the lengths of each transect") - } - - REQUIRED_COLS <- c(length_col, "valid_banks", "has_relief") - - # validate input graph - is_valid <- validate_df(transects, REQUIRED_COLS, "transects") - - # TODO: this should be reviewed - # NOTE: --> setting a default of FALSE for NA valid_banks and NA has_relief values - transects <- - transects %>% - dplyr::mutate( - valid_banks = dplyr::case_when( - is.na(valid_banks) ~ FALSE, - TRUE ~ valid_banks - ), - has_relief = dplyr::case_when( - is.na(has_relief) ~ FALSE, - TRUE ~ has_relief - ) - ) - - # add distances to extend for the left and right side of a transect - # for any of the the already "valid transects", we just set an extension distance of 0 - # on both sides and these transects will be KEPT AS IS - transects <- - transects %>% - dplyr::mutate( - extension_distance = dplyr::case_when( - !valid_banks | !has_relief ~ (((scale)*(.data[[length_col]])) / 2), - TRUE ~ 0 - ) - ) - - return(transects) - -} - -#' Add a 1:number of rows 'initial_order' column -#' Internal helper function for readability -#' -#' @param x dataframe, sf dataframe or tibble -#' @importFrom dplyr mutate n -#' @return dataframe, sf dataframe or tibble with an added 'initial_order' column -#' @noRd -#' @keywords internal -add_initial_order <- function(x) { - - x <- - x %>% - dplyr::mutate( - initial_order = 1:dplyr::n() - ) - - return(x) - -} - -#' Get the initial ordering of crosswalk IDs in a dataframe of transects -#' -#' @param x dataframe or sf dataframe +#' @param x sf dataframe linestrings #' @param crosswalk_id character +#' @importFrom dplyr group_by across any_of mutate ungroup row_number #' -#' @importFrom dplyr mutate any_of select group_by slice_min ungroup n -#' @importFrom sf st_drop_geometry -#' @return dataframe, sf dataframe or tibble of crosswalk_id with initial_order column -#' @noRd -#' @keywords internal -get_transect_initial_order <- function(x, crosswalk_id = NULL) { - - is_x_valid <- validate_df(x, - c(crosswalk_id, "cs_id"), - "x") - - x[[crosswalk_id]] <- factor(x[[crosswalk_id]], levels = unique(x[[crosswalk_id]])) - - x_order <- - x %>% - sf::st_drop_geometry() %>% - dplyr::group_by(dplyr::across(dplyr::any_of(c(crosswalk_id)))) %>% - dplyr::slice_min(cs_id, n = 1, with_ties = FALSE) %>% - # dplyr::filter(cs_id == min(cs_id)) %>% - dplyr::ungroup() %>% - dplyr::mutate( - initial_order = 1:dplyr::n() - ) %>% - # hydrofabric3D:::add_initial_order() %>% - dplyr::select(dplyr::any_of(crosswalk_id), initial_order) - - # t_order[[crosswalk_id]] <- as.character(t_order[[crosswalk_id]]) - - return(x_order) - -} - -#' Add a 1:number of cross sections 'cs_id' for each crosswalk_id by cs_measure -#' -#' @param x dataframe, sf dataframe or tibble -#' @param crosswalk_id character, unique ID column -#' @importFrom dplyr mutate group_by across any_of arrange n ungroup -#' @return dataframe, sf dataframe or tibble with an added 'cs_id' column +#' @return sf dataframe of transects with added is_outlet logical column #' @export -add_cs_id_sequence <- function(x, crosswalk_id = NULL) { - is_x_valid <- validate_df(x, - c(crosswalk_id, "cs_measure"), - "x") +add_is_outlet_flag <- function(x, crosswalk_id = NULL) { + # x <- transects + + is_valid_df <- validate_df(x, c(crosswalk_id, "cs_measure"), "x") + # is_valid_df <- validate_df(x, c(crosswalk_id, "cs_id", "cs_measure"), "x") - x <- + x <- x %>% - dplyr::group_by(dplyr::across(dplyr::any_of(c(crosswalk_id)))) %>% - dplyr::arrange(cs_measure, .by_group = TRUE) %>% + dplyr::group_by(dplyr::across(dplyr::any_of(crosswalk_id))) %>% dplyr::mutate( - cs_id = 1:dplyr::n() + is_outlet = which.max(cs_measure) == dplyr::row_number() + # is_outlet = cs_id[which.max(cs_measure)] == cs_id ) %>% - dplyr::ungroup() + dplyr::ungroup() return(x) @@ -462,1361 +336,6 @@ reorder_cols <- function(df, start_order) { ) } -#' @title Get the count of each point type in a set of cross section points -#' @description get_point_type_counts() will create a dataframe providing the counts of every point_type for each hy_id/cs_id in a set of classified cross section points (output of classify_pts()) -#' @param classified_pts dataframe or sf dataframe, cross section points with a "hy_id", and "cs_id" columns as well as a 'point_type' column containing the values: "bottom", "left_bank", "right_bank", and "channel" -#' @param crosswalk_id character, ID column -#' @return dataframe or sf dataframe with hy_id, cs_id, and _count columns for each point_type -#' @importFrom sf st_drop_geometry -#' @importFrom dplyr group_by count ungroup summarize filter n_distinct select slice left_join relocate all_of last_col -#' @importFrom tidyr pivot_wider pivot_longer -#' @export -get_point_type_counts <- function(classified_pts, crosswalk_id = NULL) { - - # classified_pts <- cs_pts %>% hydrofabric3D::classify_points() - # add = F - # classified_pts = classified_pts2 - # add = TRUE - - # make a unique ID if one is not given (NULL 'crosswalk_id') - if(is.null(crosswalk_id)) { - crosswalk_id <- 'hydrofabric_id' - } - - REQUIRED_COLS <- c(crosswalk_id, "cs_id", "point_type") - - if (!all(REQUIRED_COLS %in% names(classified_pts))) { - missing_cols <- REQUIRED_COLS[which(!REQUIRED_COLS %in% names(classified_pts))] - stop("'classified_pts' is missing one or more of the required columns:\n > ", - paste0(missing_cols, collapse = "\n > ")) - } - - # type checking - if (!any(class(classified_pts) %in% c("sf", "tbl_df", "tbl", "data.frame"))) { - stop("Invalid argument type, 'classified_pts' must be of type 'sf', 'tbl_df', 'tbl' or 'data.frame', given type was '", - class(classified_pts), "'") - } - - # create a copy of the input dataset, add a tmp_id column - stage_df <- - classified_pts %>% - sf::st_drop_geometry() %>% - hydrofabric3D::add_tmp_id(x = crosswalk_id) - - # # create a reference dataframe with all possible combinations of tmp_id and point_type - # reference_df <- expand.grid( - # tmp_id = unique(stage_df$tmp_id), - # point_type = unique(stage_df$point_type) - # ) - - # get a count of the point_types in each hy_id/cs_id group (i.e. each cross section) - point_type_counts <- - stage_df %>% - dplyr::group_by(tmp_id, point_type) %>% - dplyr::count() %>% - dplyr::ungroup() %>% - dplyr::mutate( - # add levels to the point_type column so if a given point_type - # is NOT in the cross seciton points, then it will be added with NAs in the subsequent pivot_wider - point_type = factor(point_type, levels = c("left_bank", "bottom", "right_bank", "channel")) - ) - - # pivot data wider to get implicit missing groups with NA values - point_type_counts <- - point_type_counts %>% - tidyr::pivot_wider( - names_from = point_type, - values_from = n, - names_expand = TRUE - ) - - point_type_counts <- - point_type_counts %>% - tidyr::pivot_longer( - cols = c(bottom, channel, right_bank, left_bank), - names_to = "point_type", - values_to = "n" - ) %>% - dplyr::mutate(n = ifelse(is.na(n), 0, n)) - - # # Join the count of point types in each group with the reference_df to - # # get rows of NA values for any group that is missing a specific point_type - # point_type_counts <- - # point_type_counts %>% - # dplyr::right_join(reference_df, by = c("tmp_id", "point_type")) - - # # For any cross section group that does NOT contain a point type, - # # the point type will be NA and here we replace those NAs with 0 - # point_type_counts$n[is.na(point_type_counts$n)] <- 0 - - # # make sure that all tmp_id groups have all 4 point types - check_counts <- - point_type_counts %>% - dplyr::group_by(tmp_id) %>% - dplyr::summarize(unique_count = dplyr::n_distinct(point_type)) %>% - dplyr::filter(unique_count == 4) - - # if the number of distinct points types in each cross section is not 4, raise an error - if (length(unique(stage_df$tmp_id)) != nrow(check_counts)) { - stop("Error validating each hy_id/cs_id cross section contains exactly 4 distinct values in the 'point_type' column") - } - - # get the hy_id, cs_id for each tmp_id to cross walk back to just using hy_id/cs_id - stage_df <- - stage_df %>% - dplyr::select(tmp_id, dplyr::any_of(crosswalk_id), cs_id) %>% - # dplyr::select(tmp_id, hy_id, cs_id) %>% - dplyr::group_by(tmp_id) %>% - dplyr::slice(1) %>% - dplyr::ungroup() - - # convert the column of point types to be a column for each point type that - # has the point type count for each hy_id/cs_id (cross section) - point_type_counts <- - point_type_counts %>% - tidyr::pivot_wider(names_from = point_type, - names_glue = "{point_type}_count", - values_from = n) %>% - dplyr::left_join( - stage_df, - by = "tmp_id" - ) %>% - dplyr::select( - dplyr::any_of(crosswalk_id), - cs_id, - left_bank_count, right_bank_count, channel_count, bottom_count - ) - - # point_type_counts %>% - # dplyr::arrange(-right_bank_count) - - return(point_type_counts) - -} -#' @title Add the count of each point type as a column to a dataframe of section points -#' @description add_point_type_counts() will add columns to the input dataframe with the counts of every point_type for each hy_id/cs_id in the input dataframe of classified cross section points (output of classify_pts()) -#' @param classified_pts dataframe or sf dataframe, cross section points with a "hy_id", and "cs_id" columns as well as a 'point_type' column containing the values: "bottom", "left_bank", "right_bank", and "channel" -#' @return dataframe or sf dataframe with "_count" columns added -#' @importFrom sf st_drop_geometry -#' @importFrom dplyr group_by count ungroup summarize filter n_distinct select slice left_join relocate all_of last_col -#' @importFrom tidyr pivot_wider pivot_longer -#' @noRd -#' @keywords internal -add_point_type_counts2 <- function(classified_pts) { - - # classified_pts <- cs_pts %>% hydrofabric3D::classify_points() - # add = F - # classified_pts = classified_pts2 - # add = TRUE - - # type checking - if (!any(class(classified_pts) %in% c("sf", "tbl_df", "tbl", "data.frame"))) { - stop("Invalid argument type, 'classified_pts' must be of type 'sf', 'tbl_df', 'tbl' or 'data.frame', given type was '", - class(classified_pts), "'") - } - - # create a copy of the input dataset, add a tmp_id column - stage_df <- - classified_pts %>% - sf::st_drop_geometry() %>% - hydrofabric3D::add_tmp_id() - - # # create a reference dataframe with all possible combinations of tmp_id and point_type - # reference_df <- expand.grid( - # tmp_id = unique(stage_df$tmp_id), - # point_type = unique(stage_df$point_type) - # ) - - # get a count of the point_types in each hy_id/cs_id group (i.e. each cross section) - point_type_counts <- - stage_df %>% - dplyr::group_by(tmp_id, point_type) %>% - dplyr::count() %>% - dplyr::ungroup() %>% - dplyr::mutate( - # add levels to the point_type column so if a given point_type - # is NOT in the cross seciton points, then it will be added with NAs in the subsequent pivot_wider - point_type = factor(point_type, levels = c("left_bank", "bottom", "right_bank", "channel")) - ) - - # pivot data wider to get implicit missing groups with NA values - point_type_counts <- - point_type_counts %>% - tidyr::pivot_wider( - names_from = point_type, - values_from = n, - names_expand = TRUE - ) - - point_type_counts <- - point_type_counts %>% - tidyr::pivot_longer( - cols = c(bottom, channel, right_bank, left_bank), - names_to = "point_type", - values_to = "n" - ) %>% - dplyr::mutate(n = ifelse(is.na(n), 0, n)) - - # # Join the count of point types in each group with the reference_df to - # # get rows of NA values for any group that is missing a specific point_type - # point_type_counts <- - # point_type_counts %>% - # dplyr::right_join(reference_df, by = c("tmp_id", "point_type")) - - # # For any cross section group that does NOT contain a point type, - # # the point type will be NA and here we replace those NAs with 0 - # point_type_counts$n[is.na(point_type_counts$n)] <- 0 - - # # make sure that all tmp_id groups have all 4 point types - check_counts <- - point_type_counts %>% - dplyr::group_by(tmp_id) %>% - dplyr::summarize(unique_count = dplyr::n_distinct(point_type)) %>% - dplyr::filter(unique_count == 4) - - # if the number of distinct points types in each cross section is not 4, raise an error - if (length(unique(stage_df$tmp_id)) != nrow(check_counts)) { - stop("Error validating each hy_id/cs_id cross section contains exactly 4 distinct values in the 'point_type' column") - } - - # get the hy_id, cs_id for each tmp_id to cross walk back to just using hy_id/cs_id - stage_df <- - stage_df %>% - dplyr::select(tmp_id, hy_id, cs_id) %>% - dplyr::group_by(tmp_id) %>% - dplyr::slice(1) %>% - dplyr::ungroup() - - # convert the column of point types to be a column for each point type that - # has the point type count for each hy_id/cs_id (cross section) - point_type_counts <- - point_type_counts %>% - tidyr::pivot_wider(names_from = point_type, - names_glue = "{point_type}_count", - values_from = n) %>% - dplyr::left_join( - stage_df, - by = "tmp_id" - ) %>% - dplyr::select(hy_id, cs_id, left_bank_count, right_bank_count, channel_count, bottom_count) - - # Join the point type counts to the original dataframe - classified_pts <- - classified_pts %>% - dplyr::left_join( - point_type_counts, - by = c("hy_id", "cs_id") - ) - - # check if any of the columns in 'classified_pts' are geometry types and move them to the end column if they do exist - classified_pts <- move_geometry_to_last(classified_pts) - - return(classified_pts) -} - -#' @title Add the count of each point type as a column to a dataframe of section points -#' @description add_point_type_counts() will add columns to the input dataframe with the counts of every point_type for each hy_id/cs_id in the input dataframe of classified cross section points (output of classify_pts()) -#' @param classified_pts dataframe or sf dataframe, cross section points with a "hy_id", and "cs_id" columns as well as a 'point_type' column containing the values: "bottom", "left_bank", "right_bank", and "channel" -#' @param crosswalk_id character, ID column -#' @return dataframe or sf dataframe with "_count" columns added -#' @importFrom sf st_drop_geometry -#' @importFrom dplyr group_by count ungroup summarize filter n_distinct select slice left_join relocate all_of last_col -#' @importFrom tidyr pivot_wider pivot_longer -#' @export -add_point_type_counts <- function(classified_pts, crosswalk_id = NULL) { - - # classified_pts <- cs_pts %>% hydrofabric3D::classify_points() - # add = F - # classified_pts = classified_pts2 - # add = TRUE - - # make a unique ID if one is not given (NULL 'crosswalk_id') - if(is.null(crosswalk_id)) { - crosswalk_id <- 'hydrofabric_id' - } - - # type checking - if (!any(class(classified_pts) %in% c("sf", "tbl_df", "tbl", "data.frame"))) { - stop("Invalid argument type, 'classified_pts' must be of type 'sf', 'tbl_df', 'tbl' or 'data.frame', given type was '", - class(classified_pts), "'") - } - - # create a copy of the input dataset, add a tmp_id column - stage_df <- - classified_pts %>% - sf::st_drop_geometry() %>% - hydrofabric3D::add_tmp_id(x = crosswalk_id) - - # # create a reference dataframe with all possible combinations of tmp_id and point_type - # reference_df <- expand.grid( - # tmp_id = unique(stage_df$tmp_id), - # point_type = unique(stage_df$point_type) - # ) - - # get a count of the point_types in each hy_id/cs_id group (i.e. each cross section) - point_type_counts <- - stage_df %>% - dplyr::group_by(tmp_id, point_type) %>% - dplyr::count() %>% - dplyr::ungroup() %>% - dplyr::mutate( - # add levels to the point_type column so if a given point_type - # is NOT in the cross seciton points, then it will be added with NAs in the subsequent pivot_wider - point_type = factor(point_type, levels = c("left_bank", "bottom", "right_bank", "channel")) - ) - - # pivot data wider to get implicit missing groups with NA values - point_type_counts <- - point_type_counts %>% - tidyr::pivot_wider( - names_from = point_type, - values_from = n, - names_expand = TRUE - ) - - point_type_counts <- - point_type_counts %>% - tidyr::pivot_longer( - cols = c(bottom, channel, right_bank, left_bank), - names_to = "point_type", - values_to = "n" - ) %>% - dplyr::mutate(n = ifelse(is.na(n), 0, n)) - - # # Join the count of point types in each group with the reference_df to - # # get rows of NA values for any group that is missing a specific point_type - # point_type_counts <- - # point_type_counts %>% - # dplyr::right_join(reference_df, by = c("tmp_id", "point_type")) - - # # For any cross section group that does NOT contain a point type, - # # the point type will be NA and here we replace those NAs with 0 - # point_type_counts$n[is.na(point_type_counts$n)] <- 0 - - # # make sure that all tmp_id groups have all 4 point types - check_counts <- - point_type_counts %>% - dplyr::group_by(tmp_id) %>% - dplyr::summarize(unique_count = dplyr::n_distinct(point_type)) %>% - dplyr::filter(unique_count == 4) - - # if the number of distinct points types in each cross section is not 4, raise an error - if (length(unique(stage_df$tmp_id)) != nrow(check_counts)) { - stop("Error validating each hy_id/cs_id cross section contains exactly 4 distinct values in the 'point_type' column") - } - - # get the hy_id, cs_id for each tmp_id to cross walk back to just using hy_id/cs_id - stage_df <- - stage_df %>% - dplyr::select(tmp_id, dplyr::any_of(crosswalk_id), cs_id) %>% - dplyr::group_by(tmp_id) %>% - dplyr::slice(1) %>% - dplyr::ungroup() - - # convert the column of point types to be a column for each point type that - # has the point type count for each hy_id/cs_id (cross section) - point_type_counts <- - point_type_counts %>% - tidyr::pivot_wider( - names_from = point_type, - names_glue = "{point_type}_count", - values_from = n - ) %>% - dplyr::left_join( - stage_df, - by = "tmp_id" - ) %>% - dplyr::select( - dplyr::any_of(crosswalk_id), - cs_id, - left_bank_count, right_bank_count, channel_count, bottom_count - ) - - # Join the point type counts to the original dataframe - classified_pts <- - classified_pts %>% - dplyr::left_join( - point_type_counts, - by = c(crosswalk_id, "cs_id") - # by = c("hy_id", "cs_id") - ) - - # check if any of the columns in 'classified_pts' are geometry types and move them to the end column if they do exist - classified_pts <- move_geometry_to_last(classified_pts) - - return(classified_pts) -} - -#' @title Adds attributes about the banks of each cross section in a dataframe of cross section points -#' Function adds "bottom", "left_bank", "right_bank" columns that are -#' the Z values of the "lowest" bottom point, and the "highest" left and right bank Z values, respectively. If there are -#' And also a "valid_banks" column is added that is TRUE if the hy_id/cs_id set of cross section point has at least 1 bottom point with -#' at least 1 left bank point AND 1 right bank point that are above the lowest "bottom" point. -#' @param classified_pts sf or dataframe of points with "hy_id", "cs_id", and "point_type" columns. Output of hydrofabric3D::classify_pts() -#' @return sf or dataframe with added "bottom", "left_bank", "right_bank", and "valid_banks" columns -#' @importFrom dplyr mutate case_when filter select group_by summarise ungroup left_join -#' @importFrom tidyr pivot_wider -add_bank_attributes <- function( - classified_pts -) { - - # classified_pts <- output_pts - - # type checking, throw an error if not "sf", "tbl_df", "tbl", or "data.frame" - if (!any(class(classified_pts) %in% c("sf", "tbl_df", "tbl", "data.frame"))) { - stop("Invalid argument type, 'classified_pts' must be of type 'sf', 'tbl_df', 'tbl' or 'data.frame', given type was '", - class(classified_pts), "'") - } - - # Add columns with the counts of point types - classified_pts <- hydrofabric3D::add_point_type_counts(classified_pts) - - # TODO: Need to add code that will just set aside the geometries and add them back to the final output dataset - # For now we will just drop geometries as safety precaution (as to not summarize() on a massive number of sf geometries) - classified_pts <- sf::st_drop_geometry(classified_pts) - - # Add a valid_count column which is TRUE - # if a hy_id/cs_id has a bottom point AND atleast 1 left and right bank - classified_pts <- - classified_pts %>% - dplyr::mutate( - valid_count = dplyr::case_when( - (bottom_count > 0 & - left_bank_count > 0 & - right_bank_count > 0) ~ TRUE, - TRUE ~ FALSE - ) - ) - - # Add minimum bottom Z, max left and right bank Z, and - # flags noting if the left/right banks are "valid" (i.e. max left/right bank values are greater than the bottom Z) - bank_validity <- - classified_pts %>% - dplyr::filter(point_type %in% c("bottom", "left_bank", "right_bank")) %>% - # dplyr::filter(point_type %in% c("left_bank", "right_bank")) %>% - dplyr::select(hy_id, cs_id, pt_id, Z, point_type) %>% - dplyr::group_by(hy_id, cs_id, point_type) %>% - dplyr::summarise( - minZ = min(Z, na.rm = TRUE), - maxZ = max(Z, na.rm = TRUE) - ) %>% - dplyr::ungroup() %>% - tidyr::pivot_wider( - names_from = point_type, - values_from = c(minZ, maxZ) - ) %>% - dplyr::select(hy_id, cs_id, - bottom = minZ_bottom, - left_bank = maxZ_left_bank, - right_bank = maxZ_right_bank - ) - - # Get logical values of the bank validity on both sides - bank_validity <- - bank_validity %>% - dplyr::mutate( - # bottom = ifelse(is.na(bottom), 0, bottom), # Old way was to set the NA left/bank/bottom Z values to 0 but i think this could lead to problems with small number of edge cases - # right_bank = ifelse(is.na(right_bank), 0, right_bank), - # left_bank = ifelse(is.na(left_bank), 0, left_bank), - valid_left_bank = dplyr::case_when( - (left_bank > bottom) & (!is.na(left_bank)) ~ TRUE, # Old method used: left_bank > bottom ~ TRUE, - TRUE ~ FALSE - ), - valid_right_bank = dplyr::case_when( - (right_bank > bottom) & (!is.na(right_bank)) ~ TRUE, # Old method used: right_bank > bottom ~ TRUE, - TRUE ~ FALSE - ), - valid_banks = valid_left_bank & valid_right_bank - ) - # tidyr::pivot_longer(cols = c(right_bank, left_bank), - # names_to = "point_type", values_to = "max_Z_at_banks") %>% - # dplyr::mutate(max_Z_at_banks = ifelse(is.na(max_Z_at_banks), 0, max_Z_at_banks)) - - # Add the following columns to the final output data: - # bottom - numeric, max depth (depth of lowest "bottom" point) - # left_bank - numeric, min depth of left bank (depth of the highest "left_bank" point). If no left_bank points exist, value is 0. - # right_bank - numeric, min depth of right bank (depth of the highest "right_bank" point). If no right_bank points exist, value is 0. - # valid_banks - logical, TRUE if the hy_id/cs_id has a bottom point with atleast 1 leftbank point AND 1 rightbank point that are above the lowest "bottom" point - classified_pts <- - classified_pts %>% - dplyr::left_join( - dplyr::select(bank_validity, - hy_id, cs_id, - bottom, left_bank, right_bank, - valid_left_bank, valid_right_bank, valid_banks - ), - by = c("hy_id", "cs_id") - ) - # %>% - # dplyr::mutate(valid_banks2 = valid_left_bank & valid_right_bank) - - # # return simple dataset if add is FALSE - # if(!add) { - # # subset to just hy_id/cs_id and added bank attributes to - # # return a dataframe with unique hy_id/cs_ids for each row - # bank_validity %>% - # sf::st_drop_geometry() %>% # drop sf geometry as a safety precaution to make sure returned data is a dataframe - # dplyr::select(hy_id, cs_id, - # bottom, left_bank, right_bank, - # valid_banks) - # - # return(bank_validity) - # - # } - - # select specific rows and returns - classified_pts <- - classified_pts %>% - dplyr::select(hy_id, cs_id, pt_id, Z, - relative_distance, cs_lengthm, - class, point_type, - bottom, left_bank, right_bank, valid_banks) - - # check if any of the columns in 'classified_pts' are geometry types and move them to the end column if they do exist - classified_pts <- move_geometry_to_last(classified_pts) - - return(classified_pts) - -} - -#' @title Get attributes about the banks of each cross section in a dataframe of cross section points -#' Given a set of cross section points with point_type column, return a dataframe of the unique hy_id/cs_ids with the following calculated columns: -#' "bottom", "left_bank", "right_bank" columns which are the Z values of the "lowest" bottom point, and the "highest" left and right bank Z values, respectively. -#' And a "valid_banks" column indicating whether the hy_id/cs_id set of cross section point has at least a signle bottom point with -#' at least 1 left bank point AND 1 right bank point that are above the lowest "bottom" point. -#' @param classified_pts sf or dataframe of points with "hy_id", "cs_id", and "point_type" columns. Output of hydrofabric3D::classify_pts() -#' @return dataframe with each row being a unique hy_id/cs_id with "bottom", "left_bank", "right_bank", and "valid_banks" values for each hy_id/cs_id. -#' @importFrom dplyr mutate case_when filter select group_by summarise ungroup left_join -#' @importFrom tidyr pivot_wider -#' @noRd -#' @keywords internal -get_bank_attributes2 <- function( - classified_pts -) { - - # classified_pts <- output_pts - # classified_pts - # classified_pts <- classified_pts2 - - # type checking, throw an error if not "sf", "tbl_df", "tbl", or "data.frame" - if (!any(class(classified_pts) %in% c("sf", "tbl_df", "tbl", "data.frame"))) { - stop("Invalid argument type, 'classified_pts' must be of type 'sf', 'tbl_df', 'tbl' or 'data.frame', given type was '", - class(classified_pts), "'") - } - - # Add columns with the counts of point types - classified_pts <- hydrofabric3D::add_point_type_counts(classified_pts) - - # TODO: Need to add code that will just set aside the geometries and add them back to the final output dataset - # For now we will just drop geometries as safety precaution (as to not summarize() on a massive number of sf geometries) - classified_pts <- sf::st_drop_geometry(classified_pts) - - # Add a valid_count column which is TRUE - # if a hy_id/cs_id has a bottom point AND atleast 1 left and right bank - classified_pts <- - classified_pts %>% - # sf::st_drop_geometry() %>% # drop sf geometry as a safety precaution to make sure returned data is a dataframe - dplyr::mutate( - valid_count = dplyr::case_when( - (bottom_count > 0 & - left_bank_count > 0 & - right_bank_count > 0) ~ TRUE, - TRUE ~ FALSE - ) - ) - - # Add minimum bottom Z, max left and right bank Z, and - # flags noting if the left/right banks are "valid" (i.e. max left/right bank values are greater than the bottom Z) - bank_validity <- - classified_pts %>% - # classified_pts2 %>% - # sf::st_drop_geometry() %>% # drop sf geometry as a safety precaution to make sure returned data is a dataframe - dplyr::filter(point_type %in% c("bottom", "left_bank", "right_bank")) %>% - # dplyr::filter(point_type %in% c("left_bank", "right_bank")) %>% - dplyr::select(hy_id, cs_id, pt_id, Z, point_type) %>% - dplyr::group_by(hy_id, cs_id, point_type) %>% - dplyr::summarise( - minZ = min(Z, na.rm = TRUE), - maxZ = max(Z, na.rm = TRUE) - ) %>% - dplyr::ungroup() %>% - tidyr::pivot_wider( - names_from = point_type, - values_from = c(minZ, maxZ) - ) %>% - dplyr::select(hy_id, cs_id, - bottom = minZ_bottom, - left_bank = maxZ_left_bank, - right_bank = maxZ_right_bank - ) - - bank_validity <- - bank_validity %>% - dplyr::mutate( - # bottom = ifelse(is.na(bottom), 0, bottom), # Old way was to set the NA left/bank/bottom Z values to 0 but i think this could lead to problems with small number of edge cases - # right_bank = ifelse(is.na(right_bank), 0, right_bank), - # left_bank = ifelse(is.na(left_bank), 0, left_bank), - valid_left_bank = dplyr::case_when( - (left_bank > bottom) & (!is.na(left_bank)) ~ TRUE, # Old method used: left_bank > bottom ~ TRUE, - TRUE ~ FALSE - ), - valid_right_bank = dplyr::case_when( - (right_bank > bottom) & (!is.na(right_bank)) ~ TRUE, # Old method used: right_bank > bottom ~ TRUE, - TRUE ~ FALSE - ), - valid_banks = valid_left_bank & valid_right_bank - ) - # tidyr::pivot_longer(cols = c(right_bank, left_bank), - # names_to = "point_type", values_to = "max_Z_at_banks") %>% - # dplyr::mutate(max_Z_at_banks = ifelse(is.na(max_Z_at_banks), 0, max_Z_at_banks)) - - # Add the following columns to the final output data: - # bottom - numeric, max depth (depth of lowest "bottom" point) - # left_bank - numeric, min depth of left bank (depth of the highest "left_bank" point). If no left_bank points exist, value is 0. - # right_bank - numeric, min depth of right bank (depth of the highest "right_bank" point). If no right_bank points exist, value is 0. - # valid_banks - logical, TRUE if the hy_id/cs_id has a bottom point with atleast 1 leftbank point AND 1 rightbank point that are above the lowest "bottom" point - - # subset to just hy_id/cs_id and added bank attributes to - # return a dataframe with unique hy_id/cs_ids for each row - bank_validity <- - bank_validity %>% - dplyr::select(hy_id, cs_id, - bottom, left_bank, right_bank, - valid_banks) - - return(bank_validity) - -} - -#' @title Get attributes about the banks of each cross section in a dataframe of cross section points -#' Given a set of cross section points with point_type column, return a dataframe of the unique hy_id/cs_ids with the following calculated columns: -#' "bottom", "left_bank", "right_bank" columns which are the Z values of the "lowest" bottom point, and the "highest" left and right bank Z values, respectively. -#' And a "valid_banks" column indicating whether the hy_id/cs_id set of cross section point has at least a signle bottom point with -#' at least 1 left bank point AND 1 right bank point that are above the lowest "bottom" point. -#' @param classified_pts sf or dataframe of points with "hy_id", "cs_id", and "point_type" columns. Output of hydrofabric3D::classify_pts() -#' @param crosswalk_id character, ID column -#' @return dataframe with each row being a unique hy_id/cs_id with "bottom", "left_bank", "right_bank", and "valid_banks" values for each hy_id/cs_id. -#' @importFrom dplyr mutate case_when filter select group_by summarise ungroup left_join rename any_of across bind_rows -#' @importFrom tidyr pivot_wider -#' @export -get_bank_attributes <- function( - classified_pts, - crosswalk_id = NULL -) { - # ----------------------------------------------------- - # classified_pts <- data.frame( - # hy_id = c("A", "A", "A", "B", "B", "B"), - # cs_id = c(1, 1, 1, 1, 1, 1), - # pt_id = c(1, 2, 3, 1, 2, 3), - # point_type = c('channel', 'channel', 'channel', "left_bank", "bottom", "right_bank"), - # Z = c(1, 5, 8, 10, 2, 12) - # ) - # crosswalk_id = "hy_id" - # ----------------------------------------------------- - - # type checking, throw an error if not "sf", "tbl_df", "tbl", or "data.frame" - if (!any(class(classified_pts) %in% c("sf", "tbl_df", "tbl", "data.frame"))) { - stop("Invalid argument type, 'classified_pts' must be of type 'sf', 'tbl_df', 'tbl' or 'data.frame', given type was '", - class(classified_pts), "'") - } - - # Add columns with the counts of point types - classified_pts <- add_point_type_counts(classified_pts, crosswalk_id) - # classified_pts <- hydrofabric3D::add_point_type_counts2(classified_pts, crosswalk_id) - - # TODO: Need to add code that will just set aside the geometries and add them back to the final output dataset - # For now we will just drop geometries as safety precaution (as to not summarize() on a massive number of sf geometries) - classified_pts <- sf::st_drop_geometry(classified_pts) - - # Add a valid_count column which is TRUE - # if a hy_id/cs_id has a bottom point AND atleast 1 left and right bank - classified_pts <- - classified_pts %>% - # sf::st_drop_geometry() %>% # drop sf geometry as a safety precaution to make sure returned data is a dataframe - dplyr::mutate( - valid_count = dplyr::case_when( - (bottom_count > 0 & - left_bank_count > 0 & - right_bank_count > 0) ~ TRUE, - TRUE ~ FALSE - ) - ) - - # Add minimum bottom Z, max left and right bank Z, and - # flags noting if the left/right banks are "valid" (i.e. max left/right bank values are greater than the bottom Z) - bank_validity <- - classified_pts %>% - # classified_pts2 %>% - # sf::st_drop_geometry() %>% # drop sf geometry as a safety precaution to make sure returned data is a dataframe - dplyr::filter(point_type %in% c("bottom", "left_bank", "right_bank")) %>% - # dplyr::filter(point_type %in% c("left_bank", "right_bank")) %>% - dplyr::select(dplyr::any_of(crosswalk_id), cs_id, pt_id, Z, point_type) %>% - dplyr::group_by(dplyr::across(dplyr::any_of(c(crosswalk_id, "cs_id", "point_type")))) %>% - # dplyr::select(hy_id, cs_id, pt_id, Z, point_type) %>% - # dplyr::group_by(hy_id, cs_id, point_type) %>% - dplyr::summarise( - minZ = min(Z, na.rm = TRUE), - maxZ = max(Z, na.rm = TRUE) - ) %>% - dplyr::ungroup() %>% - tidyr::pivot_wider( - names_from = point_type, - values_from = c(minZ, maxZ) - ) %>% - # dplyr::select( - # dplyr::any_of(crosswalk_id), - # cs_id, - # bottom = minZ_bottom, - # left_bank = maxZ_left_bank, - # right_bank = maxZ_right_bank - # ) - dplyr::select( - dplyr::any_of( - c( - crosswalk_id, - "cs_id", - "minZ_bottom", - "maxZ_left_bank", - "maxZ_right_bank" - )) - # cs_id, - # bottom = minZ_bottom, - # left_bank = maxZ_left_bank, - # right_bank = maxZ_right_bank - ) %>% - dplyr::rename( - dplyr::any_of(c( - bottom = "minZ_bottom", - left_bank = "maxZ_left_bank", - right_bank = "maxZ_right_bank" - )) - ) - - # make sure that all the required columns are present, if a column is missing, add that column and set the values to NA - required_pt_cols <- c("bottom", "left_bank", "right_bank") - - for (col in required_pt_cols) { - if (!col %in% names(bank_validity)) { - bank_validity[[col]] <- NA - } - } - - bank_validity <- - bank_validity %>% - dplyr::mutate( - # bottom = ifelse(is.na(bottom), 0, bottom), # Old way was to set the NA left/bank/bottom Z values to 0 but i think this could lead to problems with small number of edge cases - # right_bank = ifelse(is.na(right_bank), 0, right_bank), - # left_bank = ifelse(is.na(left_bank), 0, left_bank), - valid_left_bank = dplyr::case_when( - (left_bank > bottom) & (!is.na(left_bank)) ~ TRUE, # Old method used: left_bank > bottom ~ TRUE, - TRUE ~ FALSE - ), - valid_right_bank = dplyr::case_when( - (right_bank > bottom) & (!is.na(right_bank)) ~ TRUE, # Old method used: right_bank > bottom ~ TRUE, - TRUE ~ FALSE - ), - valid_banks = valid_left_bank & valid_right_bank - ) - - # Add the following columns to the final output data: - # bottom - numeric, max depth (depth of lowest "bottom" point) - # left_bank - numeric, min depth of left bank (depth of the highest "left_bank" point). If no left_bank points exist, value is 0. - # right_bank - numeric, min depth of right bank (depth of the highest "right_bank" point). If no right_bank points exist, value is 0. - # valid_banks - logical, TRUE if the hy_id/cs_id has a bottom point with atleast 1 leftbank point AND 1 rightbank point that are above the lowest "bottom" point - - # set default column values for any IDs that didnt have 'left_bank', 'right_bank', or 'bottom' point_types - bank_validity_tmp_ids <- add_tmp_id(bank_validity, x = crosswalk_id)$tmp_id - - default_bank_attrs <- - classified_pts %>% - add_tmp_id(x = crosswalk_id) %>% - dplyr::filter( - !tmp_id %in% bank_validity_tmp_ids - ) %>% - dplyr::select(dplyr::any_of(crosswalk_id), cs_id, tmp_id) %>% - dplyr::group_by(tmp_id) %>% - dplyr::slice(1) %>% - dplyr::ungroup() %>% - dplyr::select(-tmp_id) %>% - add_default_bank_attributes() - - # subset to just hy_id/cs_id and added bank attributes to - # return a dataframe with unique hy_id/cs_ids for each row - bank_validity <- - bank_validity %>% - dplyr::select( - dplyr::any_of(crosswalk_id), - cs_id, - bottom, left_bank, right_bank, valid_banks - ) %>% - dplyr::bind_rows( - default_bank_attrs - ) - - return(bank_validity) - -} - -#' Add "bottom", "left_bank", "right_bank", and "valid_banks" column defaults to a dataframe -#' Internal helper function for get_bank_attributes() -#' @param df dataframe, tibble, or sf dataframe -#' -#' @return dataframe, tibble, or sf dataframe -#' @noRd -#' @keywords internal -add_default_bank_attributes <- function(df) { - bank_attrs_cols <- c("bottom", "left_bank", "right_bank") - - for (col in bank_attrs_cols) { - df[[col]] <- NA - } - - df$valid_banks <- FALSE - - return(df) - -} - - -#' @title Add relief attributes to a dataframe of cross sections points -#' Given a set of cross section points (derived from hydrofabric3D::cross_section_pts() and hydrofabric3D::classify_points()) add a "has_relief" logical -#' value to data. The "has_relief" value is indicating whether a cross section "has relief". -#' Relief is determined by checking each set of cross section points have a left OR right bank that -#' has a depth difference from the bottom that isgreater than or equal to a percentage of the cross section length (e.g. Assuming a 'pct_of_length_for_relief' of 0.01 (1%) of a 100m cross section would have a relief depth threshold of 1m) -#' @param classified_pts sf or dataframe of points with "hy_id", "cs_id", "cs_lengthm", and "point_type" columns. Output of hydrofabric3D::classify_points() -#' @param pct_of_length_for_relief numeric, percent of cs_lengthm to use as the threshold depth for classifying whether a cross section has "relief". Default is 0.01 (1% of the cross sections length). -#' @return sf or dataframe with added "has_relief" columns or a dataframe of dataframe of unique hy_id/cs_id and "has_relief" -#' @importFrom dplyr select group_by slice ungroup mutate filter summarise left_join case_when all_of relocate last_col -#' @importFrom tidyr pivot_wider -#' @export -add_relief <- function( - classified_pts, - pct_of_length_for_relief = 0.01 -) { - - # 34 * as.numeric("2.3") - # classified_pts = output_pts - # pct_of_length_for_relief = 0.01 - # classified_pts <- output_pts - # pct_of_length_for_relief = 0.01 - - # type checking - if (!any(class(classified_pts) %in% c("sf", "tbl_df", "tbl", "data.frame"))) { - stop("Invalid argument type, 'classified_pts' must be of type 'sf', 'tbl_df', 'tbl' or 'data.frame', given type was '", - class(classified_pts), "'") - } - - # type checking - if (!is.numeric(pct_of_length_for_relief)) { - stop("Invalid argument type, 'pct_of_length_for_relief' must be of type 'numeric', given type was '", - class(pct_of_length_for_relief), "'") - } - - # Make sure pct_of_length_for_relief is valid percentage value (greater than 0) - if (pct_of_length_for_relief < 0 ) { - stop("Invalid value 'pct_of_length_for_relief' of ", pct_of_length_for_relief, ", 'pct_of_length_for_relief' must be greater than or equal to 0") - } - - # TODO: Need to add code that will just set aside the geometries and add them back to the final output dataset - # For now we will just drop geometries as safety precaution (as to not summarize() on a massive number of sf geometries) - classified_pts <- sf::st_drop_geometry(classified_pts) - - # store the cross section lengths and calculate the depth threshold as a percent of the cross sections length - cs_lengths <- - classified_pts %>% - dplyr::select(hy_id, cs_id, cs_lengthm) %>% - dplyr::group_by(hy_id, cs_id) %>% - dplyr::slice(1) %>% - dplyr::ungroup() %>% - dplyr::mutate( - depth_threshold = round(cs_lengthm * pct_of_length_for_relief, 3) # maybe use floor() here - ) - - # get the minimum bottom point and maximum left and right bank points - relief <- - classified_pts %>% - # dplyr::filter(point_type %in% c("left_bank", "right_bank")) %>% - dplyr::filter(point_type %in% c("bottom", "left_bank", "right_bank")) %>% - dplyr::select(hy_id, cs_id, pt_id, Z, point_type) %>% - dplyr::group_by(hy_id, cs_id, point_type) %>% - dplyr::summarise( - minZ = min(Z, na.rm = TRUE), - maxZ = max(Z, na.rm = TRUE) - ) %>% - dplyr::ungroup() %>% - tidyr::pivot_wider( - names_from = point_type, - values_from = c(minZ, maxZ) - ) %>% - dplyr::select(hy_id, cs_id, - bottom = minZ_bottom, - left_bank = maxZ_left_bank, - right_bank = maxZ_right_bank - ) - - # join lengths and depth threshold back with relief table and - # calculate if the max difference between left/right bank vs bottom is - # greater than or equal to the depth threshold - relief <- - relief %>% - dplyr::left_join( - cs_lengths, - by = c("hy_id", "cs_id") - ) %>% - dplyr::group_by(hy_id, cs_id) %>% - dplyr::mutate( - depth_diff = max(c(round(right_bank - bottom, 3), - round(left_bank - bottom, 3)), - na.rm = TRUE) # TODO: removing NAs might not be the right call, - # removing them might set has_relief to TRUE and - # says "there IS relief but no valid banks" - ) %>% - dplyr::ungroup() %>% - dplyr::mutate( - has_relief = dplyr::case_when( - depth_diff >= depth_threshold ~ TRUE, - TRUE ~ FALSE - ) - ) - - # add the new point type columns to the original dataframe - # Join the point type counts to the original dataframe - classified_pts <- - classified_pts %>% - dplyr::left_join( - dplyr::select(relief, - hy_id, cs_id, has_relief), - by = c("hy_id", "cs_id") - ) - - # check if any of the columns in 'classified_pts' are geometry types and move them to the end column if they do exist - classified_pts <- move_geometry_to_last(classified_pts) - - return(classified_pts) - -} - -#' @title Get relief attributes from a dataframe of cross sections points -#' Generate a dataframe from a set of classified cross section points indicating whether a cross section "has relief". -#' Relief is determined by checking each set of cross section points have a left OR right bank that has a depth difference from the bottom that is -#' greater than or equal to a percentage of the cross section length (e.g. Assuming a 'pct_of_length_for_relief' of 0.01 (1%) of a 100m cross section would have a relief depth threshold of 1m) -#' @param classified_pts sf or dataframe of points with "hy_id", "cs_id", "cs_lengthm", and "point_type" columns. Output of hydrofabric3D::classify_pts() -#' @param pct_of_length_for_relief numeric, percent of cs_lengthm to use as the threshold depth for classifying whether a cross section has "relief". Default is 0.01 (1% of the cross sections length). -#' @param detailed logical, whether to return only a the "has_relief" column or -#' include all derived relief based columns such as "max_relief" and the "pct_of_length_for_relief" used. Default is FALSE and returns a dataframe with only "hy_id", "cs_id", and "has_relief". -#' @return dataframe with each row being a unique hy_id/cs_id with a "has_relief" value for each hy_id/cs_id. If detailed = TRUE, then the output dataframe will include the following additional columns: "cs_lengthm", "max_relief", "pct_of_length_for_relief". -#' @importFrom dplyr select group_by slice ungroup mutate filter summarise left_join case_when all_of relocate last_col -#' @importFrom tidyr pivot_wider -#' @noRd -#' @keywords internal -get_relief2 <- function( - classified_pts, - pct_of_length_for_relief = 0.01, - detailed = FALSE -) { - - # classified_pts - # pct_of_length_for_relief = pct_of_length_for_relief - # detailed = FALSE - - # classified_pts = output_pts - # pct_of_length_for_relief = 0.01 - - # type checking - if (!any(class(classified_pts) %in% c("sf", "tbl_df", "tbl", "data.frame"))) { - stop("Invalid argument type, 'classified_pts' must be of type 'sf', 'tbl_df', 'tbl' or 'data.frame', given type was '", class(classified_pts), "'") - } - - # type checking - if (!is.numeric(pct_of_length_for_relief)) { - stop("Invalid argument type, 'pct_of_length_for_relief' must be of type 'numeric', given type was '", class(pct_of_length_for_relief), "'") - } - - # type checking - if (!is.logical(detailed)) { - stop("Invalid argument type, 'detailed' must be of type 'logical', given type was '", class(detailed), "'") - } - - # drop geometries as safety precaution - classified_pts <- sf::st_drop_geometry(classified_pts) - - # store the cross section lengths and calculate the depth threshold as a percent of the cross sections length - cs_lengths <- - classified_pts %>% - # classified_pts2 %>% - dplyr::select(hy_id, cs_id, cs_lengthm) %>% - dplyr::group_by(hy_id, cs_id) %>% - dplyr::slice(1) %>% - dplyr::ungroup() %>% - dplyr::mutate( - depth_threshold = round(cs_lengthm * pct_of_length_for_relief, 3) # maybe use floor() here - ) - - # get the minimum bottom point and maximum left and right bank points - relief <- - classified_pts %>% - # dplyr::filter(point_type %in% c("left_bank", "right_bank")) %>% - dplyr::filter(point_type %in% c("bottom", "left_bank", "right_bank")) %>% - dplyr::select(hy_id, cs_id, pt_id, Z, point_type) %>% - dplyr::group_by(hy_id, cs_id, point_type) %>% - dplyr::summarise( - minZ = min(Z, na.rm = TRUE), - maxZ = max(Z, na.rm = TRUE) - ) %>% - dplyr::ungroup() %>% - tidyr::pivot_wider( - names_from = point_type, - values_from = c(minZ, maxZ) - ) %>% - dplyr::select(hy_id, cs_id, - bottom = minZ_bottom, - left_bank = maxZ_left_bank, - right_bank = maxZ_right_bank - ) - - # join lengths and depth threshold back with relief table and - # calculate if the max difference between left/right bank vs bottom is - # greater than or equal to the depth threshold - relief <- - relief %>% - dplyr::left_join( - cs_lengths, - by = c("hy_id", "cs_id") - ) %>% - dplyr::group_by(hy_id, cs_id) %>% - dplyr::mutate( - max_relief = max(c(round(right_bank - bottom, 3), - round(left_bank - bottom, 3)), - na.rm = TRUE) # TODO: removing NAs might not be the right call, removing them might set has_relief to TRUE and says "there IS relief but no valid banks" - ) %>% - dplyr::ungroup() %>% - dplyr::mutate( - has_relief = dplyr::case_when( - max_relief >= depth_threshold ~ TRUE, - TRUE ~ FALSE - ), - pct_of_length_for_relief = pct_of_length_for_relief - ) - - # if detailed set of data is specified, return the relief dataframe with additional columns - if(detailed) { - relief <- - relief %>% - dplyr::select(hy_id, cs_id, cs_lengthm, has_relief, max_relief, pct_of_length_for_relief) - - return(relief) - - } - - # return dataframe with just hy_id/cs_id, and has_relief - relief <- - relief %>% - dplyr::select(hy_id, cs_id, has_relief) - - return(relief) -} - -#' @title Get relief attributes from a dataframe of cross sections points -#' Generate a dataframe from a set of classified cross section points indicating whether a cross section "has relief". -#' Relief is determined by checking each set of cross section points have a left OR right bank that has a depth difference from the bottom that is -#' greater than or equal to a percentage of the cross section length (e.g. Assuming a 'pct_of_length_for_relief' of 0.01 (1%) of a 100m cross section would have a relief depth threshold of 1m) -#' @param classified_pts sf or dataframe of points with "hy_id", "cs_id", "cs_lengthm", and "point_type" columns. Output of hydrofabric3D::classify_pts() -#' @param crosswalk_id character, ID column -#' @param pct_of_length_for_relief numeric, percent of cs_lengthm to use as the threshold depth for classifying whether a cross section has "relief". Default is 0.01 (1% of the cross sections length). -#' @param detailed logical, whether to return only a the "has_relief" column or -#' include all derived relief based columns such as "max_relief" and the "pct_of_length_for_relief" used. Default is FALSE and returns a dataframe with only "hy_id", "cs_id", and "has_relief". -#' @return dataframe with each row being a unique hy_id/cs_id with a "has_relief" value for each hy_id/cs_id. If detailed = TRUE, then the output dataframe will include the following additional columns: "cs_lengthm", "max_relief", "pct_of_length_for_relief". -#' @importFrom dplyr select group_by slice ungroup mutate filter summarise left_join case_when all_of relocate last_col any_of across -#' @importFrom tidyr pivot_wider -#' @export -get_relief <- function( - classified_pts, - crosswalk_id = NULL, - pct_of_length_for_relief = 0.01, - detailed = FALSE -) { - - # ------------------------------------------------------------------------ - # ------------------------------------------------------------------------ - # crosswalk_id <- "hy_id" - # REQ_COLS <- c(crosswalk_id, "cs_id", "pt_id", "cs_lengthm", "Z", "point_type") - # - # pct_of_length_for_relief <- 0.01 - # CS_LENGTHM <- 100 - # MIN_REQ_RELIEF <- CS_LENGTHM * pct_of_length_for_relief - # detailed <- FALSE - - # classified_pts <- - # data.frame( - # hy_id = c("A", "A", "A", "A", "A"), - # cs_id = c(1, 1, 1, 1, 1), - # pt_id = c(1, 2, 3, 4, 5), - # cs_lengthm = c(CS_LENGTHM), - # point_type = c('left_bank', 'bottom', 'bottom', 'bottom', 'right_bank'), - # Z = c(100, 10, 10, 10, 100) - # ) - # - # classified_pts <- - # data.frame( - # hy_id = c("A", "A", "A", "A", "A"), - # cs_id = c(1, 1, 1, 1, 1), - # pt_id = c(1, 2, 3, 4, 5), - # cs_lengthm = c(CS_LENGTHM), - # point_type = c('channel', 'bottom', 'bottom', 'bottom', 'right_bank'), - # Z = c(100, 10, 10, 10, 100) - # ) - # - # classified_pts <- - # data.frame( - # hy_id = c("A", "A", "A", "A", "A", - # "B", "B", "B", "B", "B" - # ), - # cs_id = c(1, 1, 1, 1, 1, - # 1, 1, 1, 1, 1 - # ), - # pt_id = c(1, 2, 3, 4, 5, - # 1, 2, 3, 4, 5 - # ), - # cs_lengthm = c(CS_LENGTHM), - # point_type = c( - # 'channel', 'bottom', 'bottom', 'bottom', 'right_bank', - # 'left_bank', 'bottom', 'bottom', 'bottom', 'right_bank' - # ), - # Z = c(100, 10, 10, 10, 100, - # 100, 10, 10, 10, 100 - # ) - # ) - - # classified_pts <- - # data.frame( - # hy_id = c("A", "A", "A", "A", "A"), - # cs_id = c(1, 1, 1, 1, 1), - # pt_id = c(1, 2, 3, 4, 5), - # cs_lengthm = c(CS_LENGTHM), - # point_type = c('bottom', 'bottom', 'bottom', 'bottom', 'bottom'), - # Z = c(100, 100, 100, 100, 100) - # ) - - # ------------------------------------------------------------------------ - # ------------------------------------------------------------------------ - - # make a unique ID if one is not given (NULL 'crosswalk_id') - if(is.null(crosswalk_id)) { - # cs <- add_hydrofabric_id(cs) - crosswalk_id <- 'hydrofabric_id' - } - - REQUIRED_COLS <- c(crosswalk_id, "cs_id", "pt_id", "cs_lengthm", "Z", "point_type") - # REQUIRED_COLS <- c(crosswalk_id, "cs_id", "pt_id", "cs_lengthm", "relative_distance") - - if (!all(REQUIRED_COLS %in% names(classified_pts))) { - missing_cols <- REQUIRED_COLS[which(!REQUIRED_COLS %in% names(classified_pts))] - stop("'classified_pts' is missing one or more of the required columns:\n > ", - paste0(missing_cols, collapse = "\n > ")) - } - - # type checking - if (!any(class(classified_pts) %in% c("sf", "tbl_df", "tbl", "data.frame"))) { - stop("Invalid argument type, 'classified_pts' must be of type 'sf', 'tbl_df', 'tbl' or 'data.frame', given type was '", class(classified_pts), "'") - } - - # type checking - if (!is.numeric(pct_of_length_for_relief)) { - stop("Invalid argument type, 'pct_of_length_for_relief' must be of type 'numeric', given type was '", class(pct_of_length_for_relief), "'") - } - - # type checking - if (!is.logical(detailed)) { - stop("Invalid argument type, 'detailed' must be of type 'logical', given type was '", class(detailed), "'") - } - - # drop geometries as safety precaution - classified_pts <- sf::st_drop_geometry(classified_pts) - - # store the cross section lengths and calculate the depth threshold as a percent of the cross sections length - cs_lengths <- - classified_pts %>% - dplyr::select(dplyr::any_of(crosswalk_id), cs_id, cs_lengthm) %>% - dplyr::group_by(dplyr::across(dplyr::any_of(c(crosswalk_id, "cs_id")))) %>% - # dplyr::select(hy_id, cs_id, cs_lengthm) %>% - # dplyr::group_by(hy_id, cs_id) %>% - dplyr::slice(1) %>% - dplyr::ungroup() %>% - dplyr::mutate( - depth_threshold = round(cs_lengthm * pct_of_length_for_relief, 3) # maybe use floor() here - ) - - # get the minimum bottom point and maximum left and right bank points - relief <- - classified_pts %>% - # dplyr::filter(point_type %in% c("left_bank", "right_bank")) %>% - dplyr::filter(point_type %in% c("bottom", "left_bank", "right_bank")) %>% - dplyr::select(dplyr::any_of(crosswalk_id), cs_id, pt_id, Z, point_type) %>% - dplyr::group_by(dplyr::across(dplyr::any_of(c(crosswalk_id, "cs_id", "point_type")))) %>% - # dplyr::select(hy_id, cs_id, pt_id, Z, point_type) %>% - # dplyr::group_by(hy_id, cs_id, point_type) %>% - dplyr::summarise( - minZ = min(Z, na.rm = TRUE), - maxZ = max(Z, na.rm = TRUE) - ) %>% - dplyr::ungroup() %>% - tidyr::pivot_wider( - names_from = point_type, - values_from = c(minZ, maxZ) - ) %>% - # dplyr::select( - # dplyr::any_of(crosswalk_id), - # cs_id, - # bottom = minZ_bottom, - # left_bank = maxZ_left_bank, - # right_bank = maxZ_right_bank - # ) - dplyr::select( - dplyr::any_of( - c( - crosswalk_id, - "cs_id", - "minZ_bottom", - "maxZ_left_bank", - "maxZ_right_bank" - )) - ) %>% - dplyr::rename( - dplyr::any_of(c( - bottom = "minZ_bottom", - left_bank = "maxZ_left_bank", - right_bank = "maxZ_right_bank" - )) - ) - - # make sure that all the required columns are present, if a column is missing, add that column and set the values to NA - required_pt_cols <- c("bottom", "left_bank", "right_bank") - - for (col in required_pt_cols) { - if (!col %in% names(relief)) { - # message("Missing ", col, " in relief, adding default NA") - relief[[col]] <- NA - } - } - - # join lengths and depth threshold back with relief table and - # calculate if the max difference between left/right bank vs bottom is - # greater than or equal to the depth threshold - relief <- - relief %>% - dplyr::left_join( - cs_lengths, - by = c(crosswalk_id, "cs_id") - # by = c("hy_id", "cs_id") - ) %>% - dplyr::group_by(dplyr::across(dplyr::any_of(c(crosswalk_id, "cs_id")))) %>% - # dplyr::group_by(hy_id, cs_id) %>% - dplyr::mutate( - max_relief = max( - c( - round(right_bank - bottom, 3), - round(left_bank - bottom, 3) - ), - na.rm = TRUE - ), # TODO: removing NAs might not be the right call, removing them might set has_relief to TRUE and says "there IS relief but no valid banks" - - # TODO: if both left AND right bank are NA, then we get an -Inf which we will just set to 0 (i.e. relief of 0) - max_relief = dplyr::case_when( - is.infinite(max_relief) ~ 0, - TRUE ~ max_relief - ) - ) %>% - dplyr::ungroup() %>% - dplyr::mutate( - # TODO: if a cross section does NOT have proper left/right banks, it by default can NOT have relief (i.e. has_relief = FALSE) - has_missing_banks = is.na(left_bank) | is.na(right_bank), - has_relief = dplyr::case_when( - (max_relief >= depth_threshold) & !has_missing_banks ~ TRUE, - TRUE ~ FALSE - ), - # has_relief = dplyr::case_when( - # max_relief >= depth_threshold ~ TRUE, - # TRUE ~ FALSE - # ), - pct_of_length_for_relief = pct_of_length_for_relief - ) - # dplyr::select(-has_missing_banks) - - # if detailed set of data is specified, return the relief dataframe with additional columns - if(detailed) { - relief <- - relief %>% - dplyr::group_by(dplyr::across(dplyr::any_of(c(crosswalk_id, "cs_id")))) %>% - dplyr::mutate( - max_relief = dplyr::case_when( - has_missing_banks ~ 0, - TRUE ~ max_relief - ) - ) %>% - dplyr::ungroup() %>% - dplyr::select( - dplyr::any_of(crosswalk_id), - cs_id, cs_lengthm, - has_relief, - max_relief, - pct_of_length_for_relief - ) - - return(relief) - - } - - # return dataframe with just hy_id/cs_id, and has_relief - relief <- - relief %>% - dplyr::select( - dplyr::any_of(crosswalk_id), - cs_id, - has_relief - ) - - return(relief) -} - -#' Join 'valid_banks' and 'has_relief' columns to transects dataset from corresponding cross section points -#' -#' @param transects dataframe or sf dataframe of transects -#' @param cs_pts dataframe or sf dataframe cross section points corresponding to transects -#' @param crosswalk_id character, unique ID column -#' @importFrom dplyr left_join select group_by across any_of slice ungroup -#' @importFrom sf st_drop_geometry -#' @return dataframe or sf dataframe -#' @noRd -#' @keywords internal -add_cs_attributes_to_transects <- function(transects, - cs_pts, - crosswalk_id = NULL) { - # validate input datas - is_transects_valid <- validate_df(transects, - c(crosswalk_id, "cs_id"), - "transects") - - is_cs_pts_valid <- validate_df(cs_pts, - c(crosswalk_id, "cs_id", "valid_banks", "has_relief"), - "cs_pts") - - # join 'valid_banks' and 'has_relief' columns to transects from cs_pts - transects <- - transects %>% - dplyr::left_join( - cs_pts %>% - sf::st_drop_geometry() %>% - dplyr::group_by(dplyr::across(dplyr::any_of(c(crosswalk_id, "cs_id")))) %>% - dplyr::slice(1) %>% - dplyr::ungroup() %>% - dplyr::select(dplyr::any_of(crosswalk_id), cs_id, valid_banks, has_relief), - by = c(crosswalk_id, "cs_id") - ) - - return(transects) - -} - #' Validate that a dataframe is valid type (inherits from a dataframe) and has the given required columns #' @@ -2036,155 +555,19 @@ validate_cut_cross_section_inputs <- function(net, return(NULL) } -#' Calculate the length between the leftmost and rightmost bottom point in each cross section -#' -#' @param cross_section_pts dataframe, or sf dataframe of cross section points -#' @param crosswalk_id character, ID column -#' @importFrom dplyr select mutate case_when group_by lag ungroup filter summarise left_join across any_of -#' @return summarized dataframe of input cross_section_pts dataframe with a bottom_length value for each hy_id/cs_id -#' @export -get_cs_bottom_length <- function(cross_section_pts, - crosswalk_id = NULL) { - - # make a unique ID if one is not given (NULL 'crosswalk_id') - if(is.null(crosswalk_id)) { - # x <- add_hydrofabric_id(x) - crosswalk_id <- 'hydrofabric_id' - } - - REQUIRED_COLS <- c(crosswalk_id, "cs_id", "pt_id", "relative_distance", "point_type") - - # validate input graph - is_valid <- validate_df(cross_section_pts, REQUIRED_COLS, "cross_section_pts") - - # get the distance between cross section pts in each cross section, - # this will be used as a default for bottom length in case bottom length is 0 - interval_distances <- - cross_section_pts %>% - dplyr::select(dplyr::any_of(crosswalk_id), cs_id, pt_id, relative_distance) %>% - dplyr::group_by(dplyr::across(dplyr::any_of(c(crosswalk_id, "cs_id")))) %>% - # dplyr::select(hy_id, cs_id, pt_id, relative_distance) %>% - # dplyr::group_by(hy_id, cs_id) %>% - dplyr::mutate( - distance_interval = relative_distance - dplyr::lag(relative_distance) - ) %>% - dplyr::summarise( - distance_interval = ceiling(mean(distance_interval, na.rm = TRUE)) # TODO: round up to make sure we are not underestimating - # the interval, we're going to use this value to - # derive a new Top width for each cross section if - # the cross section length is less than the prescribed top width - ) %>% - dplyr::ungroup() - - # get the distance from the first and last bottom points, substittue any bottom lengths == 0 - # with the interval between points distance - bottom_lengths <- - cross_section_pts %>% - dplyr::filter(point_type == "bottom") %>% - dplyr::select(dplyr::any_of(crosswalk_id), cs_id, pt_id, relative_distance) %>% - dplyr::group_by(dplyr::across(dplyr::any_of(c(crosswalk_id, "cs_id")))) %>% - # dplyr::select(hy_id, cs_id, pt_id, relative_distance) %>% - # dplyr::group_by(hy_id, cs_id) %>% - dplyr::summarise( - bottom_start = min(relative_distance, na.rm = TRUE), - bottom_end = max(relative_distance, na.rm = TRUE) - ) %>% - dplyr::left_join( - interval_distances, - by = c(crosswalk_id, "cs_id") - # by = c("hy_id", "cs_id") - ) %>% - dplyr::group_by(dplyr::across(dplyr::any_of(c(crosswalk_id, "cs_id")))) %>% - # dplyr::group_by(hy_id, cs_id) %>% - dplyr::mutate( - bottom_length = bottom_end - bottom_start - ) %>% - dplyr::ungroup() %>% - dplyr::mutate( - bottom_length = dplyr::case_when( - floor(bottom_length) == 0 ~ distance_interval, - TRUE ~ bottom_length - ) - ) %>% - dplyr::select(dplyr::any_of(crosswalk_id), cs_id, bottom_length) - # dplyr::select(hy_id, cs_id, bottom_length) - - return(bottom_lengths) - -} - -#' Remove entire cross sections that have any NA Z (depth) values -#' -#' @param cross_section_pts cs points dataframe, tibble, or sf dataframe -#' @param crosswalk_id unique ID for flowline -#' @importFrom dplyr group_by across any_of ungroup filter -#' @return cross_section_pts dataframe / tibble / sf dataframe with removed cross sections -#' @export -drop_incomplete_cs_pts <- function(cross_section_pts, crosswalk_id = NULL) { - # make a unique ID if one is not given (NULL 'crosswalk_id') - if(is.null(crosswalk_id)) { - crosswalk_id <- 'hydrofabric_id' - } - - cross_section_pts <- - cross_section_pts %>% - dplyr::group_by(dplyr::across(dplyr::any_of(c(crosswalk_id, "cs_id")))) %>% - dplyr::filter(!any(is.na(Z))) %>% - dplyr::ungroup() - - return(cross_section_pts) - -} - -#' Add an is_missing_depth flag to cross sections points -#' Any cross section points that has missing Z (depth = NA) values is flagged as is_missing_depth = TRUE -#' -#' @param cs_pts cs points dataframe, tibble, or sf dataframe -#' @importFrom dplyr mutate -#' @return cross_section_pts dataframe / tibble / sf dataframe with cross section points missing depths flag added -#' @export -add_is_missing_depth_flag <- function(cs_pts) { - - cs_pts <- - cs_pts %>% - # dplyr::group_by(dplyr::across(dplyr::any_of(c(crosswalk_id, "cs_id")))) %>% - dplyr::mutate( - is_missing_depth = is.na(Z) - ) - # dplyr::group_by(dplyr::across(dplyr::any_of(c(crosswalk_id, "cs_id")))) %>% - # dplyr::mutate( - # is_missing_depth = any(is.na(Z)) - # ) %>% - # dplyr::ungroup() - - return(cs_pts) - -} - -#' Add an is_complete_cs flag to cross sections points -#' Any cross section points that has does NOT have ANY NA Z (depth) values is flagged as is_complete_cs = TRUE +#' Output a message if verbose is TRUE, otherwise don't +#' internal helper function for outputting messages if verbose is TRUE +#' @param ... list of strings to output in message +#' @param verbose logical, whether to output message or not #' -#' @param cs_pts cs points dataframe, tibble, or sf dataframe -#' @param crosswalk_id unique ID for flowline -#' @importFrom dplyr group_by across any_of ungroup mutate -#' @return cross_section_pts dataframe / tibble / sf dataframe with cross section points with is_complete_cs flag added -#' @export -add_is_complete_cs_flag <- function(cs_pts, crosswalk_id = NULL) { - # make a unique ID if one is not given (NULL 'crosswalk_id') - if(is.null(crosswalk_id)) { - crosswalk_id <- 'hydrofabric_id' +#' @return NULL +#' @noRd +#' @keywords internal +message_if_verbose <- function(..., verbose = TRUE) { + if(verbose) { + args = paste(list(...), collapse = "") + message(args) } - - cs_pts <- - cs_pts %>% - dplyr::group_by(dplyr::across(dplyr::any_of(c(crosswalk_id, "cs_id")))) %>% - dplyr::mutate( - is_complete_cs = !any(is.na(Z)) - ) %>% - dplyr::ungroup() - - return(cs_pts) - } #' Select standard cross section point columns @@ -2269,134 +652,6 @@ select_transects <- function(transects, crosswalk_id = NULL) { return(transects) } -#' Get a total count of the validity attributes -#' -#' @param x dataframe or sf dataframe with crosswalk_id, has_relief, and valid_banks columns -#' @param crosswalk_id character unique ID column -#' -#' @importFrom sf st_drop_geometry -#' @importFrom dplyr select any_of group_by across slice ungroup count -#' @return dataframe or tibble -#' @export -get_validity_tally <- function(x, crosswalk_id = NULL) { - # x <- classified_pts - # crosswalk_id = "hy_id" - - validity_tally <- - x %>% - sf::st_drop_geometry() %>% - dplyr::select(dplyr::any_of(crosswalk_id), cs_id, valid_banks, has_relief) %>% - dplyr::group_by(dplyr::across(dplyr::any_of(c(crosswalk_id, "cs_id")))) %>% - dplyr::slice(1) %>% - dplyr::ungroup() %>% - dplyr::count(valid_banks, has_relief) - - return(validity_tally) - -} - -#' Calculates a validity score column based on valid_banks and has_relief columns in a set of cross section points -#' -#' @param cs_to_validate dataframe -#' @param crosswalk_id character, ID column -#' @param validity_col_name name of the output validity score column -#' @importFrom sf st_drop_geometry -#' @importFrom dplyr group_by slice ungroup mutate select any_of -#' @return dataframe with added validity_score column -calc_validity_scores <- function(cs_to_validate, - crosswalk_id = NULL, - validity_col_name = "validity_score") { - - scores <- - cs_to_validate %>% - sf::st_drop_geometry() %>% - hydrofabric3D::add_tmp_id(x = crosswalk_id) %>% - dplyr::group_by(tmp_id) %>% - dplyr::slice(1) %>% - dplyr::ungroup() %>% - dplyr::mutate( - validity_score = valid_banks + has_relief - ) %>% - dplyr::select( - # hy_id, - dplyr::any_of(crosswalk_id), - cs_id, valid_banks, has_relief, validity_score) - - names(scores) <- c(crosswalk_id, "cs_id", "valid_banks", "has_relief", validity_col_name) - - return(scores) - -} - -#' Compare valid_banks and has_relief between 2 sets of cross section points -#' -#' @param cs_pts1 dataframe or sf dataframe of CS pts -#' @param cs_pts2 dataframe or sf dataframe of CS pts -#' @param crosswalk_id character unique ID -#' @importFrom dplyr rename filter any_of mutate select left_join case_when -#' @return dataframe, tibble -#' @export -compare_cs_validity <- function(cs_pts1, - cs_pts2, - crosswalk_id = NULL -) { - - # cs_pts1 <- x - # cs_pts2 <- new_cs_pts - - # validity_scores1$tmp_id[!validity_scores1$tmp_id %in% validity_scores2$tmp_id] - - validity_scores1 <- - cs_pts1 %>% - calc_validity_scores(crosswalk_id) %>% - add_tmp_id(crosswalk_id) %>% - dplyr::rename(score1 = validity_score) - - validity_scores2 <- - cs_pts2 %>% - calc_validity_scores(crosswalk_id) %>% - add_tmp_id(crosswalk_id) %>% - dplyr::rename(score2 = validity_score) - - # mark as "improved" for any hy_id/cs_ids that increased "validity score" after extending - check_for_improvement <- dplyr::left_join( - # OLD SCORES - validity_scores1 %>% - dplyr::filter( - tmp_id %in% unique(validity_scores2$tmp_id) - ) %>% - dplyr::select(dplyr::any_of(crosswalk_id), cs_id, score1), - - # NEW SCORES - validity_scores2 %>% - dplyr::select(dplyr::any_of(crosswalk_id), cs_id, score2), - by = c(crosswalk_id, "cs_id") - ) %>% - dplyr::mutate( - is_improved = dplyr::case_when( - score2 > score1 ~ TRUE, - TRUE ~ FALSE - ) - ) %>% - dplyr::select(dplyr::any_of(crosswalk_id), cs_id, - score1, score2, - is_improved - ) - - return(check_for_improvement) - -} - -has_same_uids <- function(x, y, crosswalk_id = NULL) { - x_uids <- get_unique_tmp_ids(x, x = crosswalk_id) - y_uids <- get_unique_tmp_ids(y, x = crosswalk_id) - - return( - all(x_uids %in% y_uids) && all(y_uids %in% x_uids) - ) - -} - #' Check if data is an SF linestring / multilinestring #' #' @param data dataframe, tibble, sf dataframe, geometry @@ -2445,6 +700,32 @@ pts_to_XY <- function(pts) { } +#' Dissove polygons based on intersections with other polygons +#' +#' @param x sf dataframe +#' @importFrom sf st_union st_cast st_intersects +#' @importFrom dplyr group_by summarize +#' @return dissolved sf dataframe +#' @export +cluster_dissolve <- function(x) { + + cluster <- unlist( + sf::st_intersects( + x, + x %>% + sf::st_union() %>% + sf::st_cast("POLYGON") + ) + ) + + clustered <- + cbind(x, cluster) %>% + dplyr::group_by(cluster) %>% + dplyr::summarize() + + return(clustered) + +} #' Make a progress bar and return an "make_progress()" function to update the progress bar. #' Credit to the exactextractr team: https://github.com/isciences/exactextractr/blob/5fd17dcf02717332b125345aea586304f668cf12/R/exact_extract_helpers.R#L361 diff --git a/man/add_attribute_based_extension_distances.Rd b/man/add_attribute_based_extension_distances.Rd index 307705b..b6c4e15 100644 --- a/man/add_attribute_based_extension_distances.Rd +++ b/man/add_attribute_based_extension_distances.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R +% Please edit documentation in R/cs_improvements.R \name{add_attribute_based_extension_distances} \alias{add_attribute_based_extension_distances} \title{Add an extension_distance column based off valid_banks and has_relief attributes} diff --git a/man/add_bank_attributes.Rd b/man/add_bank_attributes.Rd index 743d556..77fe284 100644 --- a/man/add_bank_attributes.Rd +++ b/man/add_bank_attributes.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R +% Please edit documentation in R/cs_bank_attributes.R \name{add_bank_attributes} \alias{add_bank_attributes} \title{Adds attributes about the banks of each cross section in a dataframe of cross section points diff --git a/man/add_cs_bathymetry.Rd b/man/add_cs_bathymetry.Rd index 654f488..170090c 100644 --- a/man/add_cs_bathymetry.Rd +++ b/man/add_cs_bathymetry.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/ahg_estimates.R +% Please edit documentation in R/cs_bathymetry.R \name{add_cs_bathymetry} \alias{add_cs_bathymetry} \title{Given provide inchannel widths and depths to a set of cross section points and derive estimated shapes} diff --git a/man/add_cs_id_sequence.Rd b/man/add_cs_id_sequence.Rd index 851cf3a..93d5f82 100644 --- a/man/add_cs_id_sequence.Rd +++ b/man/add_cs_id_sequence.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R +% Please edit documentation in R/cs_ordering.R \name{add_cs_id_sequence} \alias{add_cs_id_sequence} \title{Add a 1:number of cross sections 'cs_id' for each crosswalk_id by cs_measure} diff --git a/man/add_intersects_ids.Rd b/man/add_intersects_ids.Rd index 3546ffb..ca9df1d 100644 --- a/man/add_intersects_ids.Rd +++ b/man/add_intersects_ids.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/transect_to_polygon_extensions.R +% Please edit documentation in R/transect_to_polygon_extender.R \name{add_intersects_ids} \alias{add_intersects_ids} \title{Add an ID column from 'y' if it intersects with 'x'} diff --git a/man/add_is_complete_cs_flag.Rd b/man/add_is_complete_cs_flag.Rd index d0d2d77..ab68a7c 100644 --- a/man/add_is_complete_cs_flag.Rd +++ b/man/add_is_complete_cs_flag.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R +% Please edit documentation in R/cs_pts.R \name{add_is_complete_cs_flag} \alias{add_is_complete_cs_flag} \title{Add an is_complete_cs flag to cross sections points diff --git a/man/add_is_missing_depth_flag.Rd b/man/add_is_missing_depth_flag.Rd index ab94150..8cd7faa 100644 --- a/man/add_is_missing_depth_flag.Rd +++ b/man/add_is_missing_depth_flag.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R +% Please edit documentation in R/cs_pts.R \name{add_is_missing_depth_flag} \alias{add_is_missing_depth_flag} \title{Add an is_missing_depth flag to cross sections points diff --git a/man/add_is_outlet_flag.Rd b/man/add_is_outlet_flag.Rd index 8073421..babe579 100644 --- a/man/add_is_outlet_flag.Rd +++ b/man/add_is_outlet_flag.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/transects.R +% Please edit documentation in R/utils.R \name{add_is_outlet_flag} \alias{add_is_outlet_flag} \title{Adds a logical 'is_outlet' flag to a set of transects identifying the most downstream transect} diff --git a/man/add_point_type_counts.Rd b/man/add_point_type_counts.Rd index 53cd125..414fdd3 100644 --- a/man/add_point_type_counts.Rd +++ b/man/add_point_type_counts.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R +% Please edit documentation in R/cs_bank_attributes.R \name{add_point_type_counts} \alias{add_point_type_counts} \title{Add the count of each point type as a column to a dataframe of section points} diff --git a/man/add_relief.Rd b/man/add_relief.Rd index 6f3dff2..b90facd 100644 --- a/man/add_relief.Rd +++ b/man/add_relief.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R +% Please edit documentation in R/cs_relief.R \name{add_relief} \alias{add_relief} \title{Add relief attributes to a dataframe of cross sections points @@ -8,11 +8,17 @@ value to data. The "has_relief" value is indicating whether a cross section "has Relief is determined by checking each set of cross section points have a left OR right bank that has a depth difference from the bottom that isgreater than or equal to a percentage of the cross section length (e.g. Assuming a 'pct_of_length_for_relief' of 0.01 (1\%) of a 100m cross section would have a relief depth threshold of 1m)} \usage{ -add_relief(classified_pts, pct_of_length_for_relief = 0.01) +add_relief( + classified_pts, + crosswalk_id = NULL, + pct_of_length_for_relief = 0.01 +) } \arguments{ \item{classified_pts}{sf or dataframe of points with "hy_id", "cs_id", "cs_lengthm", and "point_type" columns. Output of hydrofabric3D::classify_points()} +\item{crosswalk_id}{character, ID column} + \item{pct_of_length_for_relief}{numeric, percent of cs_lengthm to use as the threshold depth for classifying whether a cross section has "relief". Default is 0.01 (1\% of the cross sections length).} } \value{ diff --git a/man/calc_validity_scores.Rd b/man/calc_validity_scores.Rd index d31ed40..95ca035 100644 --- a/man/calc_validity_scores.Rd +++ b/man/calc_validity_scores.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R +% Please edit documentation in R/cs_validity_scores.R \name{calc_validity_scores} \alias{calc_validity_scores} \title{Calculates a validity score column based on valid_banks and has_relief columns in a set of cross section points} diff --git a/man/cluster_dissolve.Rd b/man/cluster_dissolve.Rd new file mode 100644 index 0000000..96b0b32 --- /dev/null +++ b/man/cluster_dissolve.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{cluster_dissolve} +\alias{cluster_dissolve} +\title{Dissove polygons based on intersections with other polygons} +\usage{ +cluster_dissolve(x) +} +\arguments{ +\item{x}{sf dataframe} +} +\value{ +dissolved sf dataframe +} +\description{ +Dissove polygons based on intersections with other polygons +} diff --git a/man/compare_cs_validity.Rd b/man/compare_cs_validity.Rd index 060dfde..32d144a 100644 --- a/man/compare_cs_validity.Rd +++ b/man/compare_cs_validity.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R +% Please edit documentation in R/cs_validity_scores.R \name{compare_cs_validity} \alias{compare_cs_validity} \title{Compare valid_banks and has_relief between 2 sets of cross section points} diff --git a/man/cs_arrange.Rd b/man/cs_arrange.Rd new file mode 100644 index 0000000..2baae6a --- /dev/null +++ b/man/cs_arrange.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/cs_ordering.R +\name{cs_arrange} +\alias{cs_arrange} +\title{Rearrange transects / cross sections in order from upstream to downstream} +\usage{ +cs_arrange(x, crosswalk_id = NULL, order_by = c("cs_id", "cs_measure")) +} +\arguments{ +\item{x}{dataframe, sf dataframe or tibble} + +\item{crosswalk_id}{character, unique ID column} + +\item{order_by}{character, either "cs_id" or "cs_measure"} +} +\value{ +dataframe, sf dataframe or tibble with an added 'cs_id' column +} +\description{ +Rearrange transects / cross sections in order from upstream to downstream +} diff --git a/man/drop_incomplete_cs_pts.Rd b/man/drop_incomplete_cs_pts.Rd index 7b5f55f..516f042 100644 --- a/man/drop_incomplete_cs_pts.Rd +++ b/man/drop_incomplete_cs_pts.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R +% Please edit documentation in R/cs_pts.R \name{drop_incomplete_cs_pts} \alias{drop_incomplete_cs_pts} \title{Remove entire cross sections that have any NA Z (depth) values} diff --git a/man/extend_transects_to_polygons.Rd b/man/extend_transects_to_polygons.Rd index 4877e46..19ba1d3 100644 --- a/man/extend_transects_to_polygons.Rd +++ b/man/extend_transects_to_polygons.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/transect_to_polygon_extensions.R +% Please edit documentation in R/transect_to_polygon_extender.R \name{extend_transects_to_polygons} \alias{extend_transects_to_polygons} \title{Give a set of transecct linestrings and poylgons and get the minimum distance to extend each transect line (from both directions, to try and reach the edge of a "polygons") @@ -9,12 +9,13 @@ extend_transects_to_polygons( transect_lines, polygons, flowlines, - crosswalk_id = "hy_id", + crosswalk_id = NULL, grouping_id = "mainstem", max_extension_distance = 3000, tolerance = NULL, keep_lengths = FALSE, - reindex_cs_ids = TRUE + reindex_cs_ids = TRUE, + verbose = TRUE ) } \arguments{ @@ -40,6 +41,8 @@ The intersect_group_id must appear as a column in both flowlines and transect_li \item{reindex_cs_ids}{logical, whether to reindex the cs_ids to ensure each crosswalk_id has cs_ids of 1-number of transects. Default is TRUE, which makes sure if any cross sections were removed from a crosswalk_id, then the cs_ids are renumbered so there are no gaps between cs_ids within a crosswalk_id. Setting this to FALSE will make sure crosswalk_id/cs_ids remain untouched as they were given in the input data.} + +\item{verbose}{logical, whether to output messages or not. Default is TRUE, and messages will output} } \value{ sf linestring, with extended transect lines diff --git a/man/get_bank_attributes.Rd b/man/get_bank_attributes.Rd index 044d688..95e9255 100644 --- a/man/get_bank_attributes.Rd +++ b/man/get_bank_attributes.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R +% Please edit documentation in R/cs_bank_attributes.R \name{get_bank_attributes} \alias{get_bank_attributes} \title{Get attributes about the banks of each cross section in a dataframe of cross section points diff --git a/man/get_cs_bottom_length.Rd b/man/get_cs_bottom_length.Rd index b1abb54..a478660 100644 --- a/man/get_cs_bottom_length.Rd +++ b/man/get_cs_bottom_length.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R +% Please edit documentation in R/cs_bathymetry.R \name{get_cs_bottom_length} \alias{get_cs_bottom_length} \title{Calculate the length between the leftmost and rightmost bottom point in each cross section} diff --git a/man/get_point_type_counts.Rd b/man/get_point_type_counts.Rd index c89adc7..4dc7248 100644 --- a/man/get_point_type_counts.Rd +++ b/man/get_point_type_counts.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R +% Please edit documentation in R/cs_bank_attributes.R \name{get_point_type_counts} \alias{get_point_type_counts} \title{Get the count of each point type in a set of cross section points} diff --git a/man/get_relief.Rd b/man/get_relief.Rd index ef71b4d..95f8a7f 100644 --- a/man/get_relief.Rd +++ b/man/get_relief.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R +% Please edit documentation in R/cs_relief.R \name{get_relief} \alias{get_relief} \title{Get relief attributes from a dataframe of cross sections points diff --git a/man/get_transect_extension_distances_to_polygons.Rd b/man/get_transect_extension_distances_to_polygons.Rd new file mode 100644 index 0000000..9cbe73e --- /dev/null +++ b/man/get_transect_extension_distances_to_polygons.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/transect_to_polygon_extender.R +\name{get_transect_extension_distances_to_polygons} +\alias{get_transect_extension_distances_to_polygons} +\title{Get the left and right extension distances for a set of transects out to a set of polygons} +\usage{ +get_transect_extension_distances_to_polygons( + transects, + polygons, + crosswalk_id, + max_extension_distance, + tolerance = NULL, + verbose = TRUE +) +} +\arguments{ +\item{transects}{sf linestring dataframe} + +\item{polygons}{sf polygon dataframe} + +\item{crosswalk_id}{character} + +\item{max_extension_distance}{numeric} + +\item{tolerance}{A minimum distance to use for simplification on polygons. Use a higher value for more simplification on the polygons. Default is NULL which will apply no simplification to polygons.} + +\item{verbose}{logical, whether to output messages or not. Default is TRUE, and messages will output} +} +\value{ +data.frame or tibble +} +\description{ +Get the left and right extension distances for a set of transects out to a set of polygons +} diff --git a/man/get_validity_tally.Rd b/man/get_validity_tally.Rd index 36130dd..d23c3e2 100644 --- a/man/get_validity_tally.Rd +++ b/man/get_validity_tally.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R +% Please edit documentation in R/cs_validity_scores.R \name{get_validity_tally} \alias{get_validity_tally} \title{Get a total count of the validity attributes} diff --git a/man/renumber_cs_ids.Rd b/man/renumber_cs_ids.Rd index dafcc2d..e574505 100644 --- a/man/renumber_cs_ids.Rd +++ b/man/renumber_cs_ids.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/cs_improvements.R +% Please edit documentation in R/cs_ordering.R \name{renumber_cs_ids} \alias{renumber_cs_ids} \title{Fix IDs in a dataframe} diff --git a/man/trim_transects_by_polygons.Rd b/man/trim_transects_by_polygons.Rd index 402ebd1..0546c03 100644 --- a/man/trim_transects_by_polygons.Rd +++ b/man/trim_transects_by_polygons.Rd @@ -1,27 +1,24 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/transect_to_polygon_extensions.R +% Please edit documentation in R/transect_to_polygon_extender.R \name{trim_transects_by_polygons} \alias{trim_transects_by_polygons} \title{Trim a set of transects to the bounds of polygons} \usage{ trim_transects_by_polygons( transect_lines, - crosswalk_id = NULL, flowlines, polygons, - polygon_id = NULL + crosswalk_id = NULL ) } \arguments{ \item{transect_lines}{sf dataframe} -\item{crosswalk_id}{character unique ID} - \item{flowlines}{sf dataframe} \item{polygons}{sf dataframe} -\item{polygon_id}{character, unique ID column in polygons} +\item{crosswalk_id}{character unique ID} } \value{ sf dataframe diff --git a/tests/testthat/test-extend-transects-to-polygons.R b/tests/testthat/test-extend-transects-to-polygons.R index 02b6d01..12d331e 100644 --- a/tests/testthat/test-extend-transects-to-polygons.R +++ b/tests/testthat/test-extend-transects-to-polygons.R @@ -11,6 +11,74 @@ source("testing_utils.R") # ------------------------------------------------------------------- # ---- hydrofabric3D::extend_transects_to_polygons() ---- # ------------------------------------------------------------------- +testthat::test_that("trim_transects_to_polygons() correct output columns - default inputs", { + # transect_lines, + # polygons, + # flowlines, + # crosswalk_id = 'hy_id', + # grouping_id = 'mainstem', + # max_extension_distance = 3000 + + flowlines <- sf::read_sf(testthat::test_path("testdata", "flowlines.gpkg")) %>% + dplyr::slice(10) + + # MIN_BF_WIDTH <- 50 + CROSSWALK_ID <- "id" + + flowlines <- + flowlines %>% + # add_powerlaw_bankful_width("tot_drainage_areasqkm", MIN_BF_WIDTH) %>% + dplyr::select( + dplyr::any_of(c(CROSSWALK_ID)), + geom + ) + + # create testiung polygons by buffering the flowlines + polygons <- + flowlines %>% + sf::st_buffer(500) + + # generate transects + transects <- cut_cross_sections( + net = flowlines, + crosswalk_id = CROSSWALK_ID, + cs_widths = 200, + num = 10 + ) %>% + dplyr::select( + dplyr::any_of(c(CROSSWALK_ID)), cs_id + ) + + # plot(polygons$geom, col = scales::alpha("pink", 0.3), add = F) + # plot(transects$geometry, add = T) + # plot(flowlines$geom, col = "blue", add = T) + + ext_trans <- hydrofabric3D::extend_transects_to_polygons( + transect_lines = transects, + polygons = polygons, + flowlines = flowlines, + crosswalk_id = CROSSWALK_ID, + grouping_id = CROSSWALK_ID, + max_extension_distance = 3000, + tolerance = NULL, + keep_lengths = FALSE, + reindex_cs_ids = FALSE, + verbose = TRUE + ) + + + # minimum expected cols + expected_cols <- c(CROSSWALK_ID, + "cs_id", + "cs_lengthm", + "left_distance", + "right_distance") + + has_required_output_cols <- check_required_cols(ext_trans, expected_cols = expected_cols) + testthat::expect_true(has_required_output_cols) + +}) + testthat::test_that("extend_transects_to_polygons() 2 transect lines get extended out to polygon boundaries but stop at specified max distance of 500m", { # transect_lines, # polygons, diff --git a/tests/testthat/test-get-transect-extension-distances-to-polygons.R b/tests/testthat/test-get-transect-extension-distances-to-polygons.R new file mode 100644 index 0000000..0901223 --- /dev/null +++ b/tests/testthat/test-get-transect-extension-distances-to-polygons.R @@ -0,0 +1,613 @@ + +library(testthat) +library(dplyr) +library(sf) +# # library(hydrofabric3D) + +source("testing_utils.R") +# source("tests/testthat/testing_utils.R") +# devtools::load_all() + +# ------------------------------------------------------------------- +# ---- hydrofabric3D::get_transect_extension_distances_to_polygons() ---- +# ------------------------------------------------------------------- +testthat::test_that("get_transect_extension_distances_to_polygons() correct output columns - default inputs", { + # transect_lines, + # polygons, + # flowlines, + # crosswalk_id = 'hy_id', + # grouping_id = 'mainstem', + # max_extension_distance = 3000 + + flowlines <- sf::read_sf(testthat::test_path("testdata", "flowlines.gpkg")) + + + + # flowlines <- dplyr::slice(flowlines, 1) + + # buff <- + # flowlines %>% + # sf::st_buffer(200) + + # MIN_BF_WIDTH <- 50 + CROSSWALK_ID <- "id" + + # LENGTH_COL_NAME <- "cs_lengthm" + # PCT <- 0.5 + NUM_OF_TRANSECTS <- 3 + MAX_EXT_DIST <- 3000 + BUFF_DIST <- 200 + + flowlines <- + flowlines %>% + # add_powerlaw_bankful_width("tot_drainage_areasqkm", MIN_BF_WIDTH) %>% + dplyr::select( + dplyr::any_of(c(CROSSWALK_ID)), + # tot_drainage_areasqkm, + # bf_width, + geom + ) + + # create testiung polygons by buffering the flowlines + polygons <- + flowlines %>% + sf::st_buffer(BUFF_DIST) + + # generate transects + transects <- cut_cross_sections( + net = flowlines, + crosswalk_id = CROSSWALK_ID, + num = NUM_OF_TRANSECTS + ) + + # transects + transects <- + transects %>% + dplyr::select( + dplyr::any_of(c(CROSSWALK_ID)), cs_id + ) + + + ext_dists <- get_transect_extension_distances_to_polygons( + transects = transects, + polygons = polygons, + crosswalk_id = CROSSWALK_ID, + max_extension_distance = MAX_EXT_DIST, + tolerance = 200 + ) + + # minimum expected cols + expected_cols <- c(CROSSWALK_ID, + "cs_id", + "left_distance", + "right_distance") + + has_required_output_cols <- check_required_cols(ext_dists, expected_cols = expected_cols) + testthat::expect_true(has_required_output_cols) + +}) + +testthat::test_that("get_transect_extension_distances_to_polygons() retains all unique transects (unique crosswalk_id / cs_id) - default inputs", { + # transect_lines, + # polygons, + # flowlines, + # crosswalk_id = 'hy_id', + # grouping_id = 'mainstem', + # max_extension_distance = 3000 + + flowlines <- sf::read_sf(testthat::test_path("testdata", "flowlines.gpkg")) + + + + # flowlines <- dplyr::slice(flowlines, 1) + + # buff <- + # flowlines %>% + # sf::st_buffer(200) + + CROSSWALK_ID <- "id" + NUM_OF_TRANSECTS <- 3 + MAX_EXT_DIST <- 3000 + BUFF_DIST <- 200 + + flowlines <- + flowlines %>% + # add_powerlaw_bankful_width("tot_drainage_areasqkm", MIN_BF_WIDTH) %>% + dplyr::select( + dplyr::any_of(c(CROSSWALK_ID )), + # tot_drainage_areasqkm, + # bf_width, + geom + ) + + # create testiung polygons by buffering the flowlines + polygons <- + flowlines %>% + sf::st_buffer(BUFF_DIST) + + # generate transects + transects <- cut_cross_sections( + net = flowlines, + crosswalk_id = CROSSWALK_ID, + num = NUM_OF_TRANSECTS + ) + + # transects + transects <- + transects %>% + dplyr::select( + dplyr::any_of(c(CROSSWALK_ID)), cs_id + ) + + + ext_dists <- hydrofabric3D::get_transect_extension_distances_to_polygons( + transects = transects, + polygons = polygons, + crosswalk_id = CROSSWALK_ID, + max_extension_distance = MAX_EXT_DIST, + tolerance = 200 + ) + + # hydrofabric3D:: + all_unique_ids_kept <- has_same_unique_tmp_ids(transects, ext_dists, crosswalk_id = CROSSWALK_ID) + testthat::expect_true(all_unique_ids_kept) + +}) + +testthat::test_that("get_transect_extension_distances_to_polygons() retains all unique transects (unique crosswalk_id / cs_id) - not all transects have a polygon to extend out to", { + + flowlines <- sf::read_sf(testthat::test_path("testdata", "flowlines.gpkg")) + + CROSSWALK_ID <- "id" + NUM_OF_TRANSECTS <- 3 + MAX_EXT_DIST <- 3000 + BUFF_DIST <- 200 + + flowlines <- + flowlines %>% + dplyr::select( + dplyr::any_of(c(CROSSWALK_ID )), + geom + ) + + # generate transects + transects <- cut_cross_sections( + net = flowlines, + crosswalk_id = CROSSWALK_ID, + num = NUM_OF_TRANSECTS + ) + + # transects + transects <- + transects %>% + dplyr::select( + dplyr::any_of(c(CROSSWALK_ID)), cs_id + ) + + # only 1 polygon, most transects are NOT extendable to the polygon, results in all unique crosswalk_id/cs_id IDs being kept + ext_dists <- hydrofabric3D::get_transect_extension_distances_to_polygons( + transects = transects, + polygons = sf::st_buffer( + dplyr::slice(flowlines, 1), + BUFF_DIST + ), + crosswalk_id = CROSSWALK_ID, + max_extension_distance = MAX_EXT_DIST + ) + + # hydrofabric3D:: + all_unique_ids_kept <- has_same_unique_tmp_ids(transects, ext_dists, crosswalk_id = CROSSWALK_ID) + testthat::expect_true(all_unique_ids_kept) + + # ALL transects have a corresponding polygon, still results in all unique crosswalk_id/cs_id IDs being kept + ext_dists <- hydrofabric3D::get_transect_extension_distances_to_polygons( + transects = transects, + polygons = sf::st_buffer(flowlines, BUFF_DIST), + crosswalk_id = CROSSWALK_ID, + max_extension_distance = MAX_EXT_DIST, + tolerance = 200 + ) + + # hydrofabric3D:: + all_unique_ids_kept <- has_same_unique_tmp_ids(transects, ext_dists, crosswalk_id = CROSSWALK_ID) + testthat::expect_true(all_unique_ids_kept) + +}) + + +testthat::test_that("get_transect_extension_distances_to_polygons() polygon is farther away than max extension distance so transect only goes to max_extension_distance", { + + flowlines <- sf::read_sf(testthat::test_path("testdata", "flowlines.gpkg")) + + CROSSWALK_ID <- "id" + NUM_OF_TRANSECTS <- 3 + MAX_EXT_DIST <- 10 + BUFF_DIST <- 1000 + + flowlines <- + flowlines %>% + dplyr::slice(1) %>% + dplyr::select( + dplyr::any_of(c(CROSSWALK_ID )), + geom + ) + + # generate transects + transects <- cut_cross_sections( + net = flowlines, + crosswalk_id = CROSSWALK_ID, + num = NUM_OF_TRANSECTS + ) %>% + dplyr::select( + dplyr::any_of(c(CROSSWALK_ID)), cs_id + ) + + # buffer flowline out by BUFF DIST + buffed_flowlines <- sf::st_buffer(flowlines, BUFF_DIST) + # mapview::mapview(buffed_flowlines) + transects + flowlines + + # only 1 polygon, most transects are NOT extendable to the polygon, results in all unique crosswalk_id/cs_id IDs being kept + ext_dists <- hydrofabric3D::get_transect_extension_distances_to_polygons( + transects = transects, + polygons = buffed_flowlines, + crosswalk_id = CROSSWALK_ID, + max_extension_distance = MAX_EXT_DIST, + tolerance = 200 + ) + + # make sure that because the polygon for the transects is WAY bigger than the max extension dist + starting transect length, + # then the transects are just extended to the max possible length because they never actually intersect the bounds of the + left_side_of_transects_is_max_extended <- all(ext_dists$left_distance == MAX_EXT_DIST) + right_side_of_transects_is_max_extended <- all(ext_dists$right_distance == MAX_EXT_DIST) + + testthat::expect_true(left_side_of_transects_is_max_extended) + testthat::expect_true(right_side_of_transects_is_max_extended) + +}) + +testthat::test_that("get_transect_extension_distances_to_polygons() max extension distance is long enough that transects reach edge of polygons ", { + + flowlines <- sf::read_sf(testthat::test_path("testdata", "flowlines.gpkg")) + + CROSSWALK_ID <- "id" + NUM_OF_TRANSECTS <- 3 + CS_WIDTH <- 100 + MAX_EXT_DIST <- 1200 + BUFF_DIST <- 1000 + + flowlines <- + flowlines %>% + dplyr::slice(1) %>% + dplyr::select( + dplyr::any_of(c(CROSSWALK_ID )), + geom + ) + + # generate transects + transects <- cut_cross_sections( + net = flowlines, + crosswalk_id = CROSSWALK_ID, + num = NUM_OF_TRANSECTS, + cs_widths = CS_WIDTH + ) %>% + dplyr::select( + dplyr::any_of(c(CROSSWALK_ID)), cs_id + ) + + # buffer flowline out by BUFF DIST + buffed_flowlines <- sf::st_buffer(flowlines, BUFF_DIST) + # mapview::mapview(buffed_flowlines) + transects + flowlines + + # only 1 polygon, most transects are NOT extendable to the polygon, results in all unique crosswalk_id/cs_id IDs being kept + ext_dists <- hydrofabric3D::get_transect_extension_distances_to_polygons( + transects = transects, + polygons = buffed_flowlines, + crosswalk_id = CROSSWALK_ID, + max_extension_distance = MAX_EXT_DIST, + tolerance = 200 + ) + + # make sure that the transects get an extension distance less than the max because the max starting transect length + the max extension distance is much bigger + # than the actual distance that the polygon is from either end of the transects + # (i.e. the left/right side of the transects do NOT need to go the full MAX EXTENSION DISTANCE to hit the edge of the polygon) + left_side_of_transects_is_less_than_max_extended <- all(ext_dists$left_distance < MAX_EXT_DIST) + right_side_of_transects_is_less_than_max_extended <- all(ext_dists$right_distance < MAX_EXT_DIST) + + testthat::expect_true(left_side_of_transects_is_less_than_max_extended) + testthat::expect_true(right_side_of_transects_is_less_than_max_extended) + +}) + +testthat::test_that("get_transect_extension_distances_to_polygons() transects that are already longer than and reach outside of the edge of a polygon get extension distance of 0", { + + flowlines <- sf::read_sf(testthat::test_path("testdata", "flowlines.gpkg")) + + CROSSWALK_ID <- "id" + NUM_OF_TRANSECTS <- 3 + CS_WIDTH <- 100 + MAX_EXT_DIST <- 200 + BUFF_DIST <- 10 + + flowlines <- + flowlines %>% + dplyr::slice(1) %>% + dplyr::select( + dplyr::any_of(c(CROSSWALK_ID )), + geom + ) + + # generate transects + transects <- cut_cross_sections( + net = flowlines, + crosswalk_id = CROSSWALK_ID, + num = NUM_OF_TRANSECTS, + cs_widths = CS_WIDTH + ) %>% + dplyr::select( + dplyr::any_of(c(CROSSWALK_ID)), cs_id + ) + + # buffer flowline out by BUFF DIST + buffed_flowlines <- sf::st_buffer(flowlines, BUFF_DIST) + # mapview::mapview(buffed_flowlines) + transects + flowlines + + # only 1 polygon, most transects are NOT extendable to the polygon, results in all unique crosswalk_id/cs_id IDs being kept + ext_dists <- hydrofabric3D::get_transect_extension_distances_to_polygons( + transects = transects, + polygons = buffed_flowlines, + crosswalk_id = CROSSWALK_ID, + max_extension_distance = MAX_EXT_DIST, + tolerance = 200 + ) + + # make sure that transects that are already longer than a polygon they intersect with, do NOT get extended + # TODO: maybe an ideal world would give a negative distance (i.e. shorten the transect lines) + no_left_side_extension <- all(ext_dists$left_distance == 0) + no_right_side_extension <- all(ext_dists$right_distance == 0) + + testthat::expect_true(no_left_side_extension) + testthat::expect_true(no_right_side_extension) + +}) + +testthat::test_that("get_transect_extension_distances_to_polygons() transects intersect with 2 overlapping polygons (i.e. 2 layered polygons, 1 entirely encompassing the other, both fully encompass the transect lines)", { + + flowlines <- sf::read_sf(testthat::test_path("testdata", "flowlines.gpkg")) + + CROSSWALK_ID <- "id" + NUM_OF_TRANSECTS <- 3 + CS_WIDTH <- 100 + MAX_EXT_DIST <- 1200 + BUFF_DIST_1 <- 1000 + BUFF_DIST_2 <- BUFF_DIST_1 / 2 + + + flowlines <- + flowlines %>% + dplyr::slice(1) %>% + dplyr::select( + dplyr::any_of(c(CROSSWALK_ID )), + geom + ) + + # generate transects + transects <- cut_cross_sections( + net = flowlines, + crosswalk_id = CROSSWALK_ID, + num = NUM_OF_TRANSECTS, + cs_widths = CS_WIDTH + ) %>% + dplyr::select( + dplyr::any_of(c(CROSSWALK_ID)), cs_id + ) + + # buffer flowline out by 2 different distances and merge them back together + big_polygons <- sf::st_buffer(flowlines, BUFF_DIST_1) + small_polygons <- sf::st_buffer(flowlines, BUFF_DIST_2) + + buffed_flowlines <- dplyr::bind_rows( + big_polygons, + small_polygons + ) + + # mapview::mapview(buffered_flowlines) + transects + flowlines + small_ext_dists <- hydrofabric3D::get_transect_extension_distances_to_polygons( + transects = transects, + polygons = small_polygons, + crosswalk_id = CROSSWALK_ID, + max_extension_distance = MAX_EXT_DIST + ) + + all_ext_dists <- hydrofabric3D::get_transect_extension_distances_to_polygons( + transects = transects, + polygons = buffed_flowlines, + crosswalk_id = CROSSWALK_ID, + max_extension_distance = MAX_EXT_DIST, + tolerance = 200 + ) + + # hardcoded correct extension distances to the smaller polygon + expected_left_distance <- c(475, 585, 328) + expected_right_distance <- c(420, 441, 383) + + # make sure the algo just extends out to the smaller of the overlayed polygons + left_side_transects_extend_only_to_closer_of_overlayed_polygons <- all(expected_left_distance == all_ext_dists$left_distance) + right_side_transects_extend_only_to_closer_of_overlayed_polygons <- all(expected_right_distance == all_ext_dists$right_distance) + + testthat::expect_true(left_side_transects_extend_only_to_closer_of_overlayed_polygons) + testthat::expect_true(right_side_transects_extend_only_to_closer_of_overlayed_polygons) + + # TODO: Not sure if this is bad practice for tests to use the function they are testing to check for correctness.... + # TODO: these tests get the extension distances to JUST the smaller polygons and compare those distances to the extension distances calculated for the OVERLAYED polygons + # TODO: They should be the same extension distances because the extenssion algo will use the first set of polygon edges it reaches + # left_side_extensions_match_extensions_for_only_smaller_polygons <- all(small_ext_dists$left_distance == all_ext_dists$left_distance) + # right_side_extensions_match_extensions_for_only_smaller_polygons <- all(small_ext_dists$right_distance == all_ext_dists$right_distance) + # + # testthat::expect_true(left_side_extensions_match_extensions_for_only_smaller_polygons) + # testthat::expect_true(right_side_extensions_match_extensions_for_only_smaller_polygons) + +}) + +testthat::test_that("get_transect_extension_distances_to_polygons() transects intersect with 2 overlapping polygons (i.e. 2 layered polygons, 1 entirely encompassing the other, neither encompass the transect lines) should result in 0 extension distance", { + + flowlines <- sf::read_sf(testthat::test_path("testdata", "flowlines.gpkg")) + + CROSSWALK_ID <- "id" + NUM_OF_TRANSECTS <- 3 + CS_WIDTH <- 100 + MAX_EXT_DIST <- 1200 + + BUFF_DIST_1 <- 10 + BUFF_DIST_2 <- BUFF_DIST_1 / 2 + + + flowlines <- + flowlines %>% + dplyr::slice(1) %>% + dplyr::select( + dplyr::any_of(c(CROSSWALK_ID )), + geom + ) + + # generate transects + transects <- cut_cross_sections( + net = flowlines, + crosswalk_id = CROSSWALK_ID, + num = NUM_OF_TRANSECTS, + cs_widths = CS_WIDTH + ) %>% + dplyr::select( + dplyr::any_of(c(CROSSWALK_ID)), cs_id + ) + + # buffer flowline out by 2 different distances and merge them back together + big_polygons <- sf::st_buffer(flowlines, BUFF_DIST_1) + small_polygons <- sf::st_buffer(flowlines, BUFF_DIST_2) + + buffed_flowlines <- dplyr::bind_rows( + big_polygons, + small_polygons + ) + + # mapview::mapview(buffed_flowlines) + transects + flowlines + + small_ext_dists <- hydrofabric3D::get_transect_extension_distances_to_polygons( + transects = transects, + polygons = small_polygons, + crosswalk_id = CROSSWALK_ID, + max_extension_distance = MAX_EXT_DIST + ) + + all_ext_dists <- hydrofabric3D::get_transect_extension_distances_to_polygons( + transects = transects, + polygons = buffed_flowlines, + crosswalk_id = CROSSWALK_ID, + max_extension_distance = MAX_EXT_DIST, + tolerance = 200 + ) + + # hardcoded correct extension distances to the smaller polygon + expected_left_distance <- c(0, 0, 0) + expected_right_distance <- c(0, 0, 0) + + # make sure the algo just extends out to the smaller of the overlayed polygons + left_side_transects_extend_only_to_closer_of_overlayed_polygons <- all(expected_left_distance == all_ext_dists$left_distance) + right_side_transects_extend_only_to_closer_of_overlayed_polygons <- all(expected_right_distance == all_ext_dists$right_distance) + + testthat::expect_true(left_side_transects_extend_only_to_closer_of_overlayed_polygons) + testthat::expect_true(right_side_transects_extend_only_to_closer_of_overlayed_polygons) + + # TODO: Not sure if this is bad practice for tests to use the function they are testing to check for correctness.... + # TODO: these tests get the extension distances to JUST the smaller polygons and compare those distances to the extension distances calculated for the OVERLAYED polygons + # TODO: They should be the same extension distances because the extenssion algo will use the first set of polygon edges it reaches + # left_side_extensions_match_extensions_for_only_smaller_polygons <- all(small_ext_dists$left_distance == all_ext_dists$left_distance) + # right_side_extensions_match_extensions_for_only_smaller_polygons <- all(small_ext_dists$right_distance == all_ext_dists$right_distance) + # + # testthat::expect_true(left_side_extensions_match_extensions_for_only_smaller_polygons) + # testthat::expect_true(right_side_extensions_match_extensions_for_only_smaller_polygons) + +}) + +testthat::test_that("get_transect_extension_distances_to_polygons() transects intersect with 2 overlapping polygons (i.e. 2 layered polygons, 1 entirely encompassing the other, 1 polygon fully encompasses the transect lines, the other polygon does NOT, and is thus shorter than the transect lines) should result in transect lines getting extended to the bigger of the overlayed polygons", { + + flowlines <- sf::read_sf(testthat::test_path("testdata", "flowlines.gpkg")) + + CROSSWALK_ID <- "id" + NUM_OF_TRANSECTS <- 3 + CS_WIDTH <- 100 + MAX_EXT_DIST <- 1200 + + BUFF_DIST_1 <- 1000 + BUFF_DIST_2 <- 10 + + + flowlines <- + flowlines %>% + dplyr::slice(1) %>% + dplyr::select( + dplyr::any_of(c(CROSSWALK_ID )), + geom + ) + + # generate transects + transects <- cut_cross_sections( + net = flowlines, + crosswalk_id = CROSSWALK_ID, + num = NUM_OF_TRANSECTS, + cs_widths = CS_WIDTH + ) %>% + dplyr::select( + dplyr::any_of(c(CROSSWALK_ID)), cs_id + ) + + # buffer flowline out by 2 different distances and merge them back together + big_polygons <- sf::st_buffer(flowlines, BUFF_DIST_1) + small_polygons <- sf::st_buffer(flowlines, BUFF_DIST_2) + + buffed_flowlines <- dplyr::bind_rows( + big_polygons, + small_polygons + ) + + # mapview::mapview(buffed_flowlines) + transects + flowlines + + small_ext_dists <- hydrofabric3D::get_transect_extension_distances_to_polygons( + transects = transects, + polygons = small_polygons, + crosswalk_id = CROSSWALK_ID, + max_extension_distance = MAX_EXT_DIST + ) + + all_ext_dists <- hydrofabric3D::get_transect_extension_distances_to_polygons( + transects = transects, + polygons = buffed_flowlines, + crosswalk_id = CROSSWALK_ID, + max_extension_distance = MAX_EXT_DIST, + tolerance = 200 + ) + + # hardcoded correct extension distances to the smaller polygon + expected_left_distance <- c(955, 952, 882) + expected_right_distance <- c(937, 939, 952) + + # make sure the algo extends out to the LARGER of the overlayed polygons + left_side_transects_extend_only_to_closer_of_overlayed_polygons <- all(expected_left_distance == all_ext_dists$left_distance) + right_side_transects_extend_only_to_closer_of_overlayed_polygons <- all(expected_right_distance == all_ext_dists$right_distance) + + testthat::expect_true(left_side_transects_extend_only_to_closer_of_overlayed_polygons) + testthat::expect_true(right_side_transects_extend_only_to_closer_of_overlayed_polygons) + + # TODO: Not sure if this is bad practice for tests to use the function they are testing to check for correctness.... + # TODO: these tests get the extension distances to JUST the smaller polygons and compare those distances to the extension distances calculated for the OVERLAYED polygons + # TODO: They should be the same extension distances because the extenssion algo will use the first set of polygon edges it reaches + # left_side_extensions_match_extensions_for_only_smaller_polygons <- all(small_ext_dists$left_distance == all_ext_dists$left_distance) + # right_side_extensions_match_extensions_for_only_smaller_polygons <- all(small_ext_dists$right_distance == all_ext_dists$right_distance) + # + # testthat::expect_true(left_side_extensions_match_extensions_for_only_smaller_polygons) + # testthat::expect_true(right_side_extensions_match_extensions_for_only_smaller_polygons) + +}) + +# buff <- +# flowlines %>% +# sf::st_buffer(200) \ No newline at end of file diff --git a/tests/testthat/test-trim-transects-to-polygons.R b/tests/testthat/test-trim-transects-to-polygons.R new file mode 100644 index 0000000..c873537 --- /dev/null +++ b/tests/testthat/test-trim-transects-to-polygons.R @@ -0,0 +1,1309 @@ + +library(testthat) +library(dplyr) +library(sf) +# # library(hydrofabric3D) + +source("testing_utils.R") +# source("tests/testthat/testing_utils.R") +# devtools::load_all() + +# ------------------------------------------------------------------- +# ---- hydrofabric3D::trim_transects_to_polygons() ---- +# ------------------------------------------------------------------- +testthat::test_that("trim_transects_to_polygons() correct output columns - default inputs", { + + flowlines <- sf::read_sf(testthat::test_path("testdata", "flowlines.gpkg")) %>% + dplyr::slice(10) + + # MIN_BF_WIDTH <- 50 + CROSSWALK_ID <- "id" + + flowlines <- + flowlines %>% + # add_powerlaw_bankful_width("tot_drainage_areasqkm", MIN_BF_WIDTH) %>% + dplyr::select( + dplyr::any_of(c(CROSSWALK_ID)), + geom + ) + + # # create testiung polygons by buffering the flowlines + # create testiung polygons by buffering the flowlines + big_polygons <- + flowlines %>% + sf::st_buffer(500) %>% + dplyr::mutate( + polygon_id = 1:dplyr::n() + ) + + small_polygons <- + flowlines %>% + sf::st_buffer(250) %>% + dplyr::mutate( + polygon_id = 1:dplyr::n() + ) + + # generate transects + transects <- cut_cross_sections( + net = flowlines, + crosswalk_id = CROSSWALK_ID, + cs_widths = 200, + num = 10 + ) %>% + dplyr::select( + dplyr::any_of(c(CROSSWALK_ID)), cs_id + ) + + # plot(polygons$geom, col = scales::alpha("pink", 0.3), add = F) + # plot(transects$geometry, add = T) + # plot(flowlines$geom, col = "blue", add = T) + + ext_trans <- hydrofabric3D::extend_transects_to_polygons( + transect_lines = transects, + polygons = big_polygons, + flowlines = flowlines, + crosswalk_id = CROSSWALK_ID, + grouping_id = CROSSWALK_ID, + max_extension_distance = 3000, + tolerance = NULL, + keep_lengths = FALSE, + reindex_cs_ids = FALSE, + verbose = TRUE + ) %>% + dplyr::select( + dplyr::any_of(CROSSWALK_ID), + cs_id + ) + + # plot(big_polygons$geom, col = scales::alpha("pink", 0.3), add = F) + # plot(small_polygons$geom, col = scales::alpha("blue", 0.3), add = T) + # plot(ext_trans$geometry, col = "green", lwd = 2, add = T) + # plot(transects$geometry, col = "red",lwd = 2, add = T) + # plot(flowlines$geom, col = "blue", add = T) + + trimmed_trans <- hydrofabric3D::trim_transects_by_polygons( + transect_lines = ext_trans, + flowlines = flowlines, + polygons = small_polygons, + crosswalk_id = CROSSWALK_ID + ) + + # plot(big_polygons$geom, col = scales::alpha("pink", 0.3), add = F) + # plot(small_polygons$geom, col = scales::alpha("blue", 0.3), add = T) + # plot(ext_trans$geometry, col = "green", lwd = 4, add = T) + # plot(trimmed_trans$geometry, col = "gold",lwd = 5, add = T) + # plot(transects$geometry, col = "red",lwd = 4, add = T) + # plot(flowlines$geom, col = "blue", add = T) + + # # plot(big_polygons$geom, col = scales::alpha("pink", 0.3), add = F) + # plot(small_polygons$geom, col = scales::alpha("blue", 0.3), add = F) + # plot(ext_trans$geometry, col = "green", lwd = 4, add = T) + # plot(trimmed_trans$geometry, col = "gold",lwd = 5, add = T) + # plot(transects$geometry, col = "red",lwd = 4, add = T) + # plot(flowlines$geom, col = "blue", add = T) + + # minimum expected cols + expected_cols <- c(CROSSWALK_ID, + "cs_id" + ) + + has_required_output_cols <- check_required_cols(trimmed_trans, expected_cols = expected_cols) + testthat::expect_true(has_required_output_cols) + +}) + +testthat::test_that("trim_transects_to_polygons() all transects are within polygons and thus get trimmed down and all are retained with correct IDs", { + + flowlines <- sf::read_sf(testthat::test_path("testdata", "flowlines.gpkg")) %>% + dplyr::slice(10) + + # MIN_BF_WIDTH <- 50 + CROSSWALK_ID <- "id" + + flowlines <- + flowlines %>% + # add_powerlaw_bankful_width("tot_drainage_areasqkm", MIN_BF_WIDTH) %>% + dplyr::select( + dplyr::any_of(c(CROSSWALK_ID)), + geom + ) + + # # create testiung polygons by buffering the flowlines + # create testiung polygons by buffering the flowlines + big_polygons <- + flowlines %>% + sf::st_buffer(500) %>% + dplyr::mutate( + polygon_id = 1:dplyr::n() + ) + + small_polygons <- + flowlines %>% + sf::st_buffer(250) %>% + dplyr::mutate( + polygon_id = 1:dplyr::n() + ) + + # generate transects + transects <- cut_cross_sections( + net = flowlines, + crosswalk_id = CROSSWALK_ID, + cs_widths = 200, + num = 10 + ) %>% + dplyr::select( + dplyr::any_of(c(CROSSWALK_ID)), cs_id, cs_measure + ) + + # plot(big_polygons$geom, col = scales::alpha("pink", 0.3), add = F) + # plot(small_polygons$geom, col = scales::alpha("blue", 0.3), add = T) + # plot(transects$geometry, add = T) + # plot(flowlines$geom, col = "blue", add = T) + + ext_trans <- hydrofabric3D::extend_transects_to_polygons( + transect_lines = transects, + polygons = big_polygons, + flowlines = flowlines, + crosswalk_id = CROSSWALK_ID, + grouping_id = CROSSWALK_ID, + max_extension_distance = 3000, + tolerance = NULL, + keep_lengths = FALSE, + reindex_cs_ids = FALSE, + verbose = TRUE + ) %>% + dplyr::select( + dplyr::any_of(CROSSWALK_ID), + cs_id, + cs_lengthm, + cs_measure + ) + + + # plot(big_polygons$geom, col = scales::alpha("pink", 0.3), add = F) + # plot(small_polygons$geom, col = scales::alpha("blue", 0.3), add = T) + # plot(ext_trans$geometry, col = "green", lwd = 2, add = T) + # plot(transects$geometry, col = "red",lwd = 2, add = T) + # plot(flowlines$geom, col = "blue", add = T) + + trimmed_trans <- hydrofabric3D::trim_transects_by_polygons( + transect_lines = ext_trans, + flowlines = flowlines, + polygons = small_polygons, + crosswalk_id = CROSSWALK_ID + ) + + # plot(big_polygons$geom, col = scales::alpha("pink", 0.3), add = F) + # plot(small_polygons$geom, col = scales::alpha("blue", 0.3), add = T) + # plot(ext_trans$geometry, col = "green", lwd = 4, add = T) + # plot(trimmed_trans$geometry, col = "gold",lwd = 5, add = T) + # plot(transects$geometry, col = "red",lwd = 4, add = T) + # plot(flowlines$geom, col = "blue", add = T) + + # test all UIDs are retained as desired + testthat::expect_true( + has_same_uids(transects, trimmed_trans, crosswalk_id = CROSSWALK_ID) + ) + + testthat::expect_true( + has_same_uids(ext_trans, trimmed_trans, crosswalk_id = CROSSWALK_ID) + ) + + # test to make sure transects only intersect one flowline and once + all_transects_intersect_flowlines_only_once <- all(lengths(sf::st_intersects(trimmed_trans, flowlines) ) == 1) + testthat::expect_true( + all_transects_intersect_flowlines_only_once + ) + + # test to make sure transects DON'T intersect any other flowlines (except self) + all_transects_intersect_no_other_transects <- all(lengths(sf::st_intersects(trimmed_trans)) == 1) + testthat::expect_true( + all_transects_intersect_no_other_transects + ) + + +}) + + +testthat::test_that("trim_transects_to_polygons() one set of transects has all trims occur, another set has no trimming, and all the IDs get retained and trimming works as expected ", { + + flowlines <- sf::read_sf(testthat::test_path("testdata", "flowlines.gpkg")) %>% + dplyr::slice(c(9, 10)) + + # MIN_BF_WIDTH <- 50 + CROSSWALK_ID <- "id" + + flowlines <- + flowlines %>% + # add_powerlaw_bankful_width("tot_drainage_areasqkm", MIN_BF_WIDTH) %>% + dplyr::select( + dplyr::any_of(c(CROSSWALK_ID)), + geom + ) + + # # create testiung polygons by buffering the flowlines + # create testiung polygons by buffering the flowlines + big_polygons <- + flowlines %>% + dplyr::slice(2) %>% + sf::st_buffer(200) %>% + dplyr::mutate( + polygon_id = 1:dplyr::n() + ) + + small_polygons <- + flowlines %>% + dplyr::slice(2) %>% + sf::st_buffer(150) %>% + dplyr::mutate( + polygon_id = 1:dplyr::n() + ) + + # generate transects + transects <- cut_cross_sections( + net = flowlines, + crosswalk_id = CROSSWALK_ID, + cs_widths = 200, + num = 10 + ) %>% + dplyr::select( + dplyr::any_of(c(CROSSWALK_ID)), cs_id, cs_measure + ) + + # plot(flowlines$geom, col = "blue", add = F) + # plot(big_polygons$geom, col = scales::alpha("pink", 0.3), add = T) + # plot(small_polygons$geom, col = scales::alpha("blue", 0.3), add = T) + # plot(transects$geometry, add = T) + # + ext_trans <- hydrofabric3D::extend_transects_to_polygons( + transect_lines = transects, + polygons = big_polygons, + flowlines = flowlines, + crosswalk_id = CROSSWALK_ID, + grouping_id = CROSSWALK_ID, + max_extension_distance = 3000, + tolerance = NULL, + keep_lengths = FALSE, + reindex_cs_ids = FALSE, + verbose = TRUE + ) %>% + dplyr::select( + dplyr::any_of(CROSSWALK_ID), + cs_id, + cs_lengthm, + cs_measure + ) + + # plot(flowlines$geom, col = "blue", add = F) + # plot(big_polygons$geom, col = scales::alpha("pink", 0.3), add = T) + # plot(small_polygons$geom, col = scales::alpha("blue", 0.3), add = T) + # plot(ext_trans$geometry, col = "green", lwd = 2, add = T) + # plot(transects$geometry, col = "red",lwd = 2, add = T) + # + # mapview::mapview(ext_trans, color = "green") + + # mapview::mapview(transects, color = "red") + + # mapview::mapview(flowlines, color = "dodgerblue") + + # mapview::mapview(big_polygons, col.regions = "white") + + + trimmed_trans <- hydrofabric3D::trim_transects_by_polygons( + transect_lines = ext_trans, + flowlines = flowlines, + polygons = small_polygons, + crosswalk_id = CROSSWALK_ID + ) + + # plot(flowlines$geom, col = "blue", add = F) + # plot(big_polygons$geom, col = scales::alpha("pink", 0.3), add = T) + # plot(small_polygons$geom, col = scales::alpha("blue", 0.3), add = T) + # plot(ext_trans$geometry, col = "green", lwd = 2, add = T) + # plot(trimmed_trans$geometry, col = "gold",lwd = 5, add = T) + # plot(transects$geometry, col = "red",lwd = 2, add = T) + + # test all UIDs are retained as desired + testthat::expect_true( + has_same_uids(transects, trimmed_trans, crosswalk_id = CROSSWALK_ID) + ) + + testthat::expect_true( + has_same_uids(ext_trans, trimmed_trans, crosswalk_id = CROSSWALK_ID) + ) + + # test to make sure transects only intersect one flowline and once + all_transects_intersect_flowlines_only_once <- all(lengths(sf::st_intersects(trimmed_trans, flowlines) ) == 1) + testthat::expect_true( + all_transects_intersect_flowlines_only_once + ) + + # test to make sure transects DON'T intersect any other flowlines (except self) + all_transects_intersect_no_other_transects <- all(lengths(sf::st_intersects(trimmed_trans)) == 1) + testthat::expect_true( + all_transects_intersect_no_other_transects + ) + + +}) + +testthat::test_that("trim_transects_to_polygons() 2 sets of transects lines get trimmed, one set has all its transects in polygons, + the other set has 3 transect line that intersects with one of the polygons from the other flowline + one of the transects is entirely in the polygon, the other 2 are partitually in the polygon, + none of these transects get extended nor trimmed", { + + flowlines <- sf::read_sf(testthat::test_path("testdata", "flowlines.gpkg")) %>% + dplyr::slice(c(9, 10)) + + # MIN_BF_WIDTH <- 50 + CROSSWALK_ID <- "id" + + flowlines <- + flowlines %>% + # add_powerlaw_bankful_width("tot_drainage_areasqkm", MIN_BF_WIDTH) %>% + dplyr::select( + dplyr::any_of(c(CROSSWALK_ID)), + geom + ) + + # # create testiung polygons by buffering the flowlines + # create testiung polygons by buffering the flowlines + big_polygons <- + flowlines %>% + dplyr::slice(2) %>% + sf::st_buffer(500) %>% + dplyr::mutate( + polygon_id = 1:dplyr::n() + ) + + small_polygons <- + flowlines %>% + dplyr::slice(2) %>% + sf::st_buffer(200) %>% + dplyr::mutate( + polygon_id = 1:dplyr::n() + ) + + # generate transects + transects <- cut_cross_sections( + net = flowlines, + crosswalk_id = CROSSWALK_ID, + cs_widths = 200, + num = 10 + ) %>% + dplyr::select( + dplyr::any_of(c(CROSSWALK_ID)), cs_id, cs_measure + ) + + # plot(flowlines$geom, col = "blue", add = F) + # plot(big_polygons$geom, col = scales::alpha("pink", 0.3), add = T) + # plot(small_polygons$geom, col = scales::alpha("blue", 0.3), add = T) + # plot(transects$geometry, add = T) + + ext_trans <- hydrofabric3D::extend_transects_to_polygons( + transect_lines = transects, + polygons = big_polygons, + flowlines = flowlines, + crosswalk_id = CROSSWALK_ID, + grouping_id = CROSSWALK_ID, + max_extension_distance = 3000, + tolerance = NULL, + keep_lengths = FALSE, + reindex_cs_ids = FALSE, + verbose = TRUE + ) %>% + dplyr::select( + dplyr::any_of(CROSSWALK_ID), + cs_id, + cs_lengthm, + cs_measure + ) + + # plot(flowlines$geom, col = "blue", add = F) + # plot(big_polygons$geom, col = scales::alpha("pink", 0.3), add = T) + # plot(small_polygons$geom, col = scales::alpha("blue", 0.3), add = T) + # plot(ext_trans$geometry, col = "green", lwd = 2, add = T) + # plot(transects$geometry, col = "red",lwd = 2, add = T) + + # mapview::mapview(ext_trans, color = "green") + + # mapview::mapview(transects, color = "red") + + # mapview::mapview(flowlines, color = "dodgerblue") + + # mapview::mapview(big_polygons, col.regions = "white") + + + trimmed_trans <- hydrofabric3D::trim_transects_by_polygons( + transect_lines = ext_trans, + flowlines = flowlines, + polygons = small_polygons, + crosswalk_id = CROSSWALK_ID + ) + + # plot(flowlines$geom, col = "blue", add = F) + # plot(big_polygons$geom, col = scales::alpha("pink", 0.3), add = T) + # plot(small_polygons$geom, col = scales::alpha("blue", 0.3), add = T) + # plot(ext_trans$geometry, col = "green", lwd = 2, add = T) + # plot(trimmed_trans$geometry, col = "gold",lwd = 5, add = T) + # plot(transects$geometry, col = "red",lwd = 2, add = T) + + # test all UIDs are retained as desired + testthat::expect_true( + has_same_uids(transects, trimmed_trans, crosswalk_id = CROSSWALK_ID) + ) + + testthat::expect_true( + has_same_uids(ext_trans, trimmed_trans, crosswalk_id = CROSSWALK_ID) + ) + + # test to make sure transects only intersect one flowline and once + all_transects_intersect_flowlines_only_once <- all(lengths(sf::st_intersects(trimmed_trans, flowlines) ) == 1) + testthat::expect_true( + all_transects_intersect_flowlines_only_once + ) + + # test to make sure transects DON'T intersect any other flowlines (except self) + all_transects_intersect_no_other_transects <- all(lengths(sf::st_intersects(trimmed_trans)) == 1) + testthat::expect_true( + all_transects_intersect_no_other_transects + ) + +}) + +testthat::test_that("trim_transects_to_polygons() a single set of transects with one flowline, and a poylgon for extending/trimming which is NOT entirely covering the flowline, + 2 transects get extended, but only 1 gets trimmed because that one is intersecting the 'trim polygon', the other extension does NOT intersect the trim polygon", { + + flowlines <- sf::read_sf(testthat::test_path("testdata", "flowlines.gpkg")) %>% + dplyr::slice(c(9)) + + # MIN_BF_WIDTH <- 50 + CROSSWALK_ID <- "id" + + flowlines <- + flowlines %>% + # add_powerlaw_bankful_width("tot_drainage_areasqkm", MIN_BF_WIDTH) %>% + dplyr::select( + dplyr::any_of(c(CROSSWALK_ID)), + geom + ) + + # # create testiung polygons by buffering the flowlines + # create testiung polygons by buffering the flowlines + big_polygons <- + # flowlines %>% + # dplyr::slice(2) %>% + sf::read_sf(testthat::test_path("testdata", "flowlines.gpkg")) %>% + dplyr::slice(c(10)) %>% + sf::st_buffer(500) %>% + dplyr::mutate( + polygon_id = 1:dplyr::n() + ) + + small_polygons <- + # flowlines %>% + # dplyr::slice(2) %>% + sf::read_sf(testthat::test_path("testdata", "flowlines.gpkg")) %>% + dplyr::slice(c(10)) %>% + sf::st_buffer(200) %>% + dplyr::mutate( + polygon_id = 1:dplyr::n() + ) + + # generate transects + transects <- cut_cross_sections( + net = flowlines, + crosswalk_id = CROSSWALK_ID, + cs_widths = 200, + num = 10 + ) %>% + dplyr::select( + dplyr::any_of(c(CROSSWALK_ID)), cs_id, cs_measure + ) + + # plot(flowlines$geom, col = "blue", add = F) + # plot(big_polygons$geom, col = scales::alpha("pink", 0.3), add = T) + # plot(small_polygons$geom, col = scales::alpha("blue", 0.3), add = T) + # plot(transects$geometry, add = T) + + ext_trans <- hydrofabric3D::extend_transects_to_polygons( + transect_lines = transects, + polygons = big_polygons, + flowlines = flowlines, + crosswalk_id = CROSSWALK_ID, + grouping_id = CROSSWALK_ID, + max_extension_distance = 3000, + tolerance = NULL, + keep_lengths = FALSE, + reindex_cs_ids = FALSE, + verbose = TRUE + ) %>% + dplyr::mutate( + is_extended = left_distance > 0 | right_distance > 0 + ) + # %>% + # dplyr::select( + # dplyr::any_of(CROSSWALK_ID), + # cs_id, + # cs_lengthm, + # cs_measure + # ) + + # plot(flowlines$geom, col = "blue", add = F) + # plot(big_polygons$geom, col = scales::alpha("pink", 0.3), add = T) + # plot(small_polygons$geom, col = scales::alpha("blue", 0.3), add = T) + # plot(ext_trans$geometry, col = "green", lwd = 2, add = T) + # plot(transects$geometry, col = "red",lwd = 2, add = T) + # # + # mapview::mapview(ext_trans, color = "green") + + # mapview::mapview(transects, color = "red") + + # mapview::mapview(flowlines, color = "dodgerblue") + + # mapview::mapview(big_polygons, col.regions = "white") + + + trimmed_trans <- hydrofabric3D::trim_transects_by_polygons( + transect_lines = ext_trans, + flowlines = flowlines, + # flowlines = sf::read_sf(testthat::test_path("testdata", "flowlines.gpkg")) %>% + # dplyr::slice(c(9, 10)), + polygons = small_polygons, + crosswalk_id = CROSSWALK_ID + ) + + # trimmed_trans %>% + # dplyr::filter(is_extended) + # + + # plot(flowlines$geom, col = "blue", add = F) + # plot(big_polygons$geom, col = scales::alpha("pink", 0.3), add = T) + # plot(small_polygons$geom, col = scales::alpha("blue", 0.3), add = T) + # plot(ext_trans$geometry, col = "green", lwd = 2, add = T) + # plot(trimmed_trans$geometry, col = "gold",lwd = 5, add = T) + # plot(transects$geometry, col = "red",lwd = 2, add = T) + # + + # mapview::mapview(ext_trans, color = "green") + + # mapview::mapview(transects, color = "red") + + # mapview::mapview(trimmed_trans, color = "gold") + + # mapview::mapview( trimmed_trans %>% + # dplyr::filter(is_extended), color = "hotpink") + + # mapview::mapview(flowlines, color = "dodgerblue") + + # mapview::mapview(big_polygons, col.regions = "white") + + # mapview::mapview(small_polygons, col.regions = "dodgerblue") + + # test all UIDs are retained as desired + testthat::expect_true( + has_same_uids(transects, trimmed_trans, crosswalk_id = CROSSWALK_ID) + ) + + testthat::expect_true( + has_same_uids(ext_trans, trimmed_trans, crosswalk_id = CROSSWALK_ID) + ) + + # test to make sure transects only intersect one flowline and once + all_transects_intersect_flowlines_only_once <- all(lengths(sf::st_intersects(trimmed_trans, flowlines) ) == 1) + testthat::expect_true( + all_transects_intersect_flowlines_only_once + ) + + # test to make sure transects DON'T intersect any other flowlines (except self) + all_transects_intersect_no_other_transects <- all(lengths(sf::st_intersects(trimmed_trans)) == 1) + testthat::expect_true( + all_transects_intersect_no_other_transects + ) + + # tests that make sure specific transects were and were NOT trimmed + was_not_trimmed <- + trimmed_trans %>% + dplyr::filter(id == "wb-1003265", cs_id == 9) %>% + hydrofabric3D::add_length_col("new_length") + + was_trimmed <- + trimmed_trans %>% + dplyr::filter(id == "wb-1003265", cs_id == 10) %>% + hydrofabric3D::add_length_col("new_length") + + testthat::expect_equal( + was_not_trimmed$cs_lengthm, + was_not_trimmed$new_length + ) + + actually_trimmed_transects_are_not_equal_length <- was_trimmed$cs_lengthm != was_trimmed$new_length + testthat::expect_true( + actually_trimmed_transects_are_not_equal_length + ) + + }) + + +testthat::test_that("trim_transects_to_polygons() a single set of transects with one flowline, + but during trim, there are more flowlines than used to generate transects + but the trim still happens for transects the are cross other flowlines, + this is the case when the given set of transects is violating the multiple flowline intersections property", { + + flowlines <- sf::read_sf(testthat::test_path("testdata", "flowlines.gpkg")) %>% + dplyr::slice(c(9)) + + # MIN_BF_WIDTH <- 50 + CROSSWALK_ID <- "id" + + flowlines <- + flowlines %>% + # add_powerlaw_bankful_width("tot_drainage_areasqkm", MIN_BF_WIDTH) %>% + dplyr::select( + dplyr::any_of(c(CROSSWALK_ID)), + geom + ) + + # # create testiung polygons by buffering the flowlines + # create testiung polygons by buffering the flowlines + big_polygons <- + # flowlines %>% + # dplyr::slice(2) %>% + sf::read_sf(testthat::test_path("testdata", "flowlines.gpkg")) %>% + dplyr::slice(c(10)) %>% + sf::st_buffer(500) %>% + dplyr::mutate( + polygon_id = 1:dplyr::n() + ) + + small_polygons <- + # flowlines %>% + # dplyr::slice(2) %>% + sf::read_sf(testthat::test_path("testdata", "flowlines.gpkg")) %>% + dplyr::slice(c(10)) %>% + sf::st_buffer(400) %>% + dplyr::mutate( + polygon_id = 1:dplyr::n() + ) + + # generate transects + transects <- cut_cross_sections( + net = flowlines, + crosswalk_id = CROSSWALK_ID, + cs_widths = 200, + num = 10 + ) %>% + dplyr::select( + dplyr::any_of(c(CROSSWALK_ID)), cs_id, cs_measure + ) + + # plot(flowlines$geom, col = "blue", add = F) + # plot(big_polygons$geom, col = scales::alpha("pink", 0.3), add = T) + # plot(small_polygons$geom, col = scales::alpha("blue", 0.3), add = T) + # plot(transects$geometry, add = T) + + ext_trans <- hydrofabric3D::extend_transects_to_polygons( + transect_lines = transects, + polygons = big_polygons, + flowlines = flowlines, + crosswalk_id = CROSSWALK_ID, + grouping_id = CROSSWALK_ID, + max_extension_distance = 3000, + tolerance = NULL, + keep_lengths = FALSE, + reindex_cs_ids = FALSE, + verbose = TRUE + ) + + # plot(flowlines$geom, col = "blue", add = F) + # plot(big_polygons$geom, col = scales::alpha("pink", 0.3), add = T) + # plot(small_polygons$geom, col = scales::alpha("blue", 0.3), add = T) + # plot(ext_trans$geometry, col = "green", lwd = 2, add = T) + # plot(transects$geometry, col = "red",lwd = 2, add = T) + # # # + # mapview::mapview(ext_trans, color = "green") + + # mapview::mapview(transects, color = "red") + + # mapview::mapview(flowlines, color = "dodgerblue") + + # mapview::mapview(big_polygons, col.regions = "white") + + + trimmed_trans <- trim_transects_by_polygons( + transect_lines = ext_trans, + # flowlines = flowlines, + flowlines = sf::read_sf(testthat::test_path("testdata", "flowlines.gpkg")) %>% + dplyr::slice(c(9, 10)), + polygons = small_polygons, + crosswalk_id = CROSSWALK_ID + ) + + # plot(big_polygons$geom, col = scales::alpha("pink", 0.3), add = F) + # plot(flowlines$geom, col = "blue", add = T) + # plot(flowlines$geom, col = "blue", add = F) + # plot(big_polygons$geom, col = scales::alpha("pink", 0.3), add = T) + # plot(small_polygons$geom, col = scales::alpha("blue", 0.3), add = T) + # plot(ext_trans$geometry, col = "green", lwd = 2, add = T) + # plot(trimmed_trans$geometry, col = "gold",lwd = 5, add = T) + # plot(transects$geometry, col = "red",lwd = 2, add = T) + # + + # mapview::mapview(ext_trans, color = "green") + + # mapview::mapview(transects, color = "red") + + # mapview::mapview(trimmed_trans, color = "gold") + + # mapview::mapview( trimmed_trans %>% + # dplyr::filter(is_extended), color = "hotpink") + + # mapview::mapview(flowlines, color = "dodgerblue") + + # mapview::mapview(big_polygons, col.regions = "white") + + # mapview::mapview(small_polygons, col.regions = "dodgerblue") + + # test all UIDs are retained as desired + testthat::expect_true( + has_same_uids(transects, trimmed_trans, crosswalk_id = CROSSWALK_ID) + ) + + testthat::expect_true( + has_same_uids(ext_trans, trimmed_trans, crosswalk_id = CROSSWALK_ID) + ) + + # test to make sure transects only intersect one flowline and once + all_transects_intersect_flowlines_only_once <- all(lengths(sf::st_intersects(trimmed_trans, flowlines) ) == 1) + testthat::expect_true( + all_transects_intersect_flowlines_only_once + ) + + # test to make sure transects DON'T intersect any other flowlines (except self) + all_transects_intersect_no_other_transects <- all(lengths(sf::st_intersects(trimmed_trans)) == 1) + testthat::expect_true( + all_transects_intersect_no_other_transects + ) + + # tests that make sure specific transects were and were NOT trimmed + was_not_trimmed <- + trimmed_trans %>% + dplyr::filter(id == "wb-1003265", cs_id == 3) %>% + hydrofabric3D::add_length_col("new_length") + + testthat::expect_equal( + was_not_trimmed$cs_lengthm, + was_not_trimmed$new_length + ) + + was_trimmed <- + trimmed_trans %>% + dplyr::filter(id == "wb-1003265", cs_id %in% c(9, 10)) %>% + hydrofabric3D::add_length_col("new_length") + + actually_trimmed_transects_are_not_equal_length <- all(was_trimmed$cs_lengthm != was_trimmed$new_length) + + testthat::expect_true( + actually_trimmed_transects_are_not_equal_length + ) + +}) +testthat::test_that("trim_transects_to_polygons() complex junction flowlines with all flowlines having a polygon to trim against", { + + flowlines <- sf::read_sf(testthat::test_path("testdata", "junction_flowlines.gpkg")) + # dplyr::slice(c(9)) + # plot(flowlines$geom) + + # MIN_BF_WIDTH <- 50 + CROSSWALK_ID <- "id" + + flowlines <- + flowlines %>% + # add_powerlaw_bankful_width("tot_drainage_areasqkm", MIN_BF_WIDTH) %>% + dplyr::select( + dplyr::any_of(c(CROSSWALK_ID)), + geom + ) + + # # create testiung polygons by buffering the flowlines + # create testiung polygons by buffering the flowlines + big_polygons <- + flowlines %>% + # dplyr::slice(c(1, 3, 5)) %>% + # sf::read_sf(testthat::test_path("testdata", "junction_flowlines.gpkg")) %>% + # dplyr::slice(c(10)) %>% + sf::st_buffer(300) %>% + dplyr::mutate( + polygon_id = 1:dplyr::n() + ) + + small_polygons <- + flowlines %>% + # dplyr::slice(c(1, 3, 5)) %>% + # sf::read_sf(testthat::test_path("testdata", "flowlines.gpkg")) %>% + # dplyr::slice(c(10)) %>% + sf::st_buffer(200) %>% + dplyr::mutate( + polygon_id = 1:dplyr::n() + ) + + # generate transects + transects <- cut_cross_sections( + net = flowlines, + crosswalk_id = CROSSWALK_ID, + cs_widths = 200, + num = 10 + ) %>% + dplyr::select( + dplyr::any_of(c(CROSSWALK_ID)), cs_id, cs_measure + ) + + # plot(flowlines$geom, col = "blue", add = F) + # plot(big_polygons$geom, col = scales::alpha("pink", 0.3), add = T) + # plot(small_polygons$geom, col = scales::alpha("blue", 0.3), add = T) + # plot(transects$geometry, add = T) + + ext_trans <- hydrofabric3D::extend_transects_to_polygons( + transect_lines = transects, + polygons = big_polygons, + flowlines = flowlines, + crosswalk_id = CROSSWALK_ID, + grouping_id = CROSSWALK_ID, + max_extension_distance = 3000, + tolerance = NULL, + keep_lengths = FALSE, + reindex_cs_ids = FALSE, + verbose = TRUE + ) + + # plot(flowlines$geom, col = "blue", add = F) + # plot(big_polygons$geom, col = scales::alpha("pink", 0.3), add = T) + # plot(small_polygons$geom, col = scales::alpha("blue", 0.3), add = T) + # plot(ext_trans$geometry, col = "green", lwd = 2, add = T) + # plot(transects$geometry, col = "red",lwd = 2, add = T) + + # mapview::mapview(ext_trans, color = "green") + + # mapview::mapview(transects, color = "red") + + # mapview::mapview(flowlines, color = "dodgerblue") + + # mapview::mapview(big_polygons, col.regions = "white") + + + trimmed_trans <- trim_transects_by_polygons( + transect_lines = ext_trans, + flowlines = flowlines, + # flowlines = sf::read_sf(testthat::test_path("testdata", "flowlines.gpkg")) %>% + # dplyr::slice(c(9, 10)), + polygons = small_polygons, + crosswalk_id = CROSSWALK_ID + ) + + # plot(big_polygons$geom, col = scales::alpha("pink", 0.3), add = F) + # plot(flowlines$geom, col = "blue", add = T) + # # plot(flowlines$geom, col = "blue", add = F) + # # plot(big_polygons$geom, col = scales::alpha("pink", 0.3), add = T) + # plot(small_polygons$geom, col = scales::alpha("blue", 0.3), add = T) + # plot(ext_trans$geometry, col = "green", lwd = 2, add = T) + # plot(trimmed_trans$geometry, col = "gold",lwd = 5, add = T) + # plot(transects$geometry, col = "red",lwd = 2, add = T) + + # mapview::mapview(ext_trans, color = "green") + + # mapview::mapview(transects, color = "red") + + # mapview::mapview(trimmed_trans, color = "gold") + + # mapview::mapview(flowlines, color = "dodgerblue") + + # mapview::mapview(big_polygons, col.regions = "white") + + # mapview::mapview(small_polygons, col.regions = "dodgerblue") + + # test all UIDs are retained as desired + testthat::expect_true( + has_same_uids(transects, trimmed_trans, crosswalk_id = CROSSWALK_ID) + ) + + testthat::expect_true( + has_same_uids(ext_trans, trimmed_trans, crosswalk_id = CROSSWALK_ID) + ) + + # test to make sure transects only intersect one flowline and once + all_transects_intersect_flowlines_only_once <- all(lengths(sf::st_intersects(trimmed_trans, flowlines) ) == 1) + testthat::expect_true( + all_transects_intersect_flowlines_only_once + ) + + # test to make sure transects DON'T intersect any other flowlines (except self) + all_transects_intersect_no_other_transects <- all(lengths(sf::st_intersects(trimmed_trans)) == 1) + testthat::expect_true( + all_transects_intersect_no_other_transects + ) + + # tests that make sure specific transects were and were NOT trimmed + was_not_trimmed <- + trimmed_trans %>% + dplyr::filter(id == "wb-1003265", cs_id == 3) %>% + hydrofabric3D::add_length_col("new_length") + + testthat::expect_equal( + was_not_trimmed$cs_lengthm, + was_not_trimmed$new_length + ) + + was_trimmed <- + trimmed_trans %>% + dplyr::filter(id == "wb-1003265", cs_id %in% c(9, 10)) %>% + hydrofabric3D::add_length_col("new_length") + + actually_trimmed_transects_are_not_equal_length <- all(was_trimmed$cs_lengthm != was_trimmed$new_length) + + testthat::expect_true( + actually_trimmed_transects_are_not_equal_length + ) + +}) + +testthat::test_that("trim_transects_to_polygons() complex junction flowlines with some flowlines having polygons and somenot having polygons", { + + flowlines <- sf::read_sf(testthat::test_path("testdata", "junction_flowlines.gpkg")) + # dplyr::slice(c(9)) + # plot(flowlines$geom) + + # MIN_BF_WIDTH <- 50 + CROSSWALK_ID <- "id" + + flowlines <- + flowlines %>% + # add_powerlaw_bankful_width("tot_drainage_areasqkm", MIN_BF_WIDTH) %>% + dplyr::select( + dplyr::any_of(c(CROSSWALK_ID)), + geom + ) + + # # create testiung polygons by buffering the flowlines + # create testiung polygons by buffering the flowlines + big_polygons <- + flowlines %>% + dplyr::slice(c(1, 2)) %>% + # sf::read_sf(testthat::test_path("testdata", "junction_flowlines.gpkg")) %>% + # dplyr::slice(c(10)) %>% + sf::st_buffer(500) %>% + dplyr::mutate( + polygon_id = 1:dplyr::n() + ) + + small_polygons <- + flowlines %>% + dplyr::slice(c(1, 2)) %>% + # sf::read_sf(testthat::test_path("testdata", "flowlines.gpkg")) %>% + # dplyr::slice(c(10)) %>% + sf::st_buffer(400) %>% + dplyr::mutate( + polygon_id = 1:dplyr::n() + ) + + # generate transects + transects <- cut_cross_sections( + net = flowlines, + crosswalk_id = CROSSWALK_ID, + cs_widths = 200, + num = 10 + ) %>% + dplyr::select( + dplyr::any_of(c(CROSSWALK_ID)), cs_id, cs_measure + ) + + # plot(flowlines$geom, col = "blue", add = F) + # plot(big_polygons$geom, col = scales::alpha("pink", 0.3), add = T) + # plot(small_polygons$geom, col = scales::alpha("blue", 0.3), add = T) + # plot(transects$geometry, add = T) + + ext_trans <- hydrofabric3D::extend_transects_to_polygons( + transect_lines = transects, + polygons = big_polygons, + flowlines = flowlines, + crosswalk_id = CROSSWALK_ID, + grouping_id = CROSSWALK_ID, + max_extension_distance = 3000, + tolerance = NULL, + keep_lengths = FALSE, + reindex_cs_ids = FALSE, + verbose = TRUE + ) + + # plot(flowlines$geom, col = "blue", add = F) + # plot(big_polygons$geom, col = scales::alpha("pink", 0.3), add = T) + # plot(small_polygons$geom, col = scales::alpha("blue", 0.3), add = T) + # plot(ext_trans$geometry, col = "green", lwd = 2, add = T) + # plot(transects$geometry, col = "red",lwd = 2, add = T) + # # # # + # mapview::mapview(ext_trans, color = "green") + + # mapview::mapview(transects, color = "red") + + # mapview::mapview(flowlines, color = "dodgerblue") + + # mapview::mapview(big_polygons, col.regions = "white") + + + trimmed_trans <- trim_transects_by_polygons( + transect_lines = ext_trans, + flowlines = flowlines, + # flowlines = sf::read_sf(testthat::test_path("testdata", "junction_flowlines.gpkg")), + polygons = small_polygons, + crosswalk_id = CROSSWALK_ID + ) + + # plot(big_polygons$geom, col = scales::alpha("pink", 0.3), add = F) + # plot(flowlines$geom, col = "blue", add = T) + # # plot(flowlines$geom, col = "blue", add = F) + # # plot(big_polygons$geom, col = scales::alpha("pink", 0.3), add = T) + # plot(small_polygons$geom, col = scales::alpha("blue", 0.3), add = T) + # plot(ext_trans$geometry, col = "green", lwd = 2, add = T) + # plot(trimmed_trans$geometry, col = "gold",lwd = 5, add = T) + # plot(transects$geometry, col = "red",lwd = 2, add = T) + # + + # mapview::mapview(ext_trans, color = "green") + + # mapview::mapview(transects, color = "red") + + # mapview::mapview(trimmed_trans, color = "gold") + + # mapview::mapview( trimmed_trans %>% + # dplyr::filter(is_extended), color = "hotpink") + + # mapview::mapview(flowlines, color = "dodgerblue") + + # mapview::mapview(big_polygons, col.regions = "white") + + # mapview::mapview(small_polygons, col.regions = "dodgerblue") + + # test all UIDs are retained as desired + testthat::expect_true( + has_same_uids(transects, trimmed_trans, crosswalk_id = CROSSWALK_ID) + ) + + testthat::expect_true( + has_same_uids(ext_trans, trimmed_trans, crosswalk_id = CROSSWALK_ID) + ) + + # test to make sure transects only intersect one flowline and once + all_transects_intersect_flowlines_only_once <- all(lengths(sf::st_intersects(trimmed_trans, flowlines) ) == 1) + testthat::expect_true( + all_transects_intersect_flowlines_only_once + ) + + # test to make sure transects DON'T intersect any other flowlines (except self) + all_transects_intersect_no_other_transects <- all(lengths(sf::st_intersects(trimmed_trans)) == 1) + testthat::expect_true( + all_transects_intersect_no_other_transects + ) + + # tests that make sure specific transects were and were NOT trimmed + trimmed_trans %>% + dplyr::filter(id == "wb-1007682") + transects %>% + dplyr::filter(id == "wb-1007682") + + was_not_trimmed_has_same_geoms <- all( + lengths( + sf::st_equals_exact( + trimmed_trans %>% dplyr::filter(id == "wb-1007682") , + transects %>% dplyr::filter(id == "wb-1007682"), + par = 0.4 + ) + ) == 1 + ) + + testthat::expect_true( + was_not_trimmed_has_same_geoms + ) + + was_trimmed_has_diff_geoms <- any( + lengths( + sf::st_equals_exact( + trimmed_trans %>% dplyr::filter(id == "wb-1002023") , + transects %>% dplyr::filter(id == "wb-1002023"), + par = 0.4 + ) + ) != 1 + ) + + testthat::expect_true( + was_trimmed_has_diff_geoms + ) + + + }) + +testthat::test_that("trim_transects_to_polygons() 2 seperate networks of flowlines and transects with polygons get trimmed all IDs are kept in tact", { + + flowlines <- dplyr::bind_rows( + sf::read_sf(testthat::test_path("testdata", "junction_flowlines.gpkg")), + sf::read_sf(testthat::test_path("testdata", "flowlines.gpkg")) + ) + # dplyr::slice(c(9)) + # plot(flowlines$geom) + + # MIN_BF_WIDTH <- 50 + CROSSWALK_ID <- "id" + + flowlines <- + flowlines %>% + # add_powerlaw_bankful_width("tot_drainage_areasqkm", MIN_BF_WIDTH) %>% + dplyr::select( + dplyr::any_of(c(CROSSWALK_ID)), + geom + ) + + # # create testiung polygons by buffering the flowlines + # create testiung polygons by buffering the flowlines + big_polygons <- + flowlines %>% + # dplyr::slice(c(1, 2)) %>% + # sf::read_sf(testthat::test_path("testdata", "junction_flowlines.gpkg")) %>% + # dplyr::slice(c(10)) %>% + sf::st_buffer(500) %>% + dplyr::mutate( + polygon_id = 1:dplyr::n() + ) + + small_polygons <- + flowlines %>% + # dplyr::slice(c(1, 2)) %>% + # sf::read_sf(testthat::test_path("testdata", "flowlines.gpkg")) %>% + # dplyr::slice(c(10)) %>% + sf::st_buffer(400) %>% + dplyr::mutate( + polygon_id = 1:dplyr::n() + ) + + # generate transects + transects <- cut_cross_sections( + net = flowlines, + crosswalk_id = CROSSWALK_ID, + cs_widths = 200, + num = 10 + ) %>% + dplyr::select( + dplyr::any_of(c(CROSSWALK_ID)), cs_id, cs_measure + ) + + # plot(flowlines$geom, col = "blue", add = F) + # plot(big_polygons$geom, col = scales::alpha("pink", 0.3), add = T) + # plot(small_polygons$geom, col = scales::alpha("blue", 0.3), add = T) + # plot(transects$geometry, add = T) + + ext_trans <- hydrofabric3D::extend_transects_to_polygons( + transect_lines = transects, + polygons = big_polygons, + flowlines = flowlines, + crosswalk_id = CROSSWALK_ID, + grouping_id = CROSSWALK_ID, + max_extension_distance = 3000, + tolerance = NULL, + keep_lengths = FALSE, + reindex_cs_ids = FALSE, + verbose = TRUE + ) + + # plot(flowlines$geom, col = "blue", add = F) + # plot(big_polygons$geom, col = scales::alpha("pink", 0.3), add = T) + # plot(small_polygons$geom, col = scales::alpha("blue", 0.3), add = T) + # plot(ext_trans$geometry, col = "green", lwd = 2, add = T) + # plot(transects$geometry, col = "red",lwd = 2, add = T) + # # # # # + # mapview::mapview(ext_trans, color = "green") + + # mapview::mapview(transects, color = "red") + + # mapview::mapview(flowlines, color = "dodgerblue") + + # mapview::mapview(big_polygons, col.regions = "white") + + + trimmed_trans <- trim_transects_by_polygons( + transect_lines = ext_trans, + flowlines = flowlines, + # flowlines = sf::read_sf(testthat::test_path("testdata", "junction_flowlines.gpkg")), + polygons = small_polygons, + crosswalk_id = CROSSWALK_ID + ) + + # plot(big_polygons$geom, col = scales::alpha("pink", 0.3), add = F) + # plot(flowlines$geom, col = "blue", add = T) + # # plot(flowlines$geom, col = "blue", add = F) + # # plot(big_polygons$geom, col = scales::alpha("pink", 0.3), add = T) + # plot(small_polygons$geom, col = scales::alpha("blue", 0.3), add = T) + # plot(ext_trans$geometry, col = "green", lwd = 2, add = T) + # plot(trimmed_trans$geometry, col = "gold",lwd = 5, add = T) + # plot(transects$geometry, col = "red",lwd = 2, add = T) + # + + # mapview::mapview(ext_trans, color = "green") + + # mapview::mapview(transects, color = "red") + + # mapview::mapview(trimmed_trans, color = "gold") + + # mapview::mapview(flowlines, color = "dodgerblue") + + # mapview::mapview(big_polygons, col.regions = "white") + + # mapview::mapview(small_polygons, col.regions = "dodgerblue") + + # test all UIDs are retained as desired + testthat::expect_true( + has_same_uids(transects, trimmed_trans, crosswalk_id = CROSSWALK_ID) + ) + + testthat::expect_true( + has_same_uids(ext_trans, trimmed_trans, crosswalk_id = CROSSWALK_ID) + ) + + # test to make sure transects only intersect one flowline and once + all_transects_intersect_flowlines_only_once <- all(lengths(sf::st_intersects(trimmed_trans, flowlines) ) == 1) + testthat::expect_true( + all_transects_intersect_flowlines_only_once + ) + + # test to make sure transects DON'T intersect any other flowlines (except self) + all_transects_intersect_no_other_transects <- all(lengths(sf::st_intersects(trimmed_trans)) == 1) + testthat::expect_true( + all_transects_intersect_no_other_transects + ) + + # tests that make sure specific transects were and were NOT trimmed + was_not_trimmed_has_same_geoms <- all( + lengths( + sf::st_equals_exact( + trimmed_trans %>% dplyr::filter(id == "wb-1010908", cs_id %in% c(1,2, 4:8)) , + transects %>%dplyr::filter(id == "wb-1010908", cs_id %in% c(1,2, 4:8)), + par = 0.4 + ) + ) == 1 + ) + + testthat::expect_true( + was_not_trimmed_has_same_geoms + ) + + was_trimmed_has_diff_geoms <- any( + lengths( + sf::st_equals_exact( + trimmed_trans %>% dplyr::filter(id == "wb-1002024") , + transects %>% dplyr::filter(id == "wb-1002024"), + par = 0.4 + ) + ) != 1 + ) + + testthat::expect_true( + was_trimmed_has_diff_geoms + ) + + testthat::expect_true( + any( + lengths( + sf::st_equals_exact( + trimmed_trans %>% dplyr::filter(id == "wb-1007682") , + transects %>% dplyr::filter(id == "wb-1007682"), + par = 0.4 + ) + ) != 1 + ) + ) + + testthat::expect_true( + any( + lengths( + sf::st_equals_exact( + trimmed_trans %>% dplyr::filter(id == "wb-1002023") , + transects %>% dplyr::filter(id == "wb-1002023"), + par = 0.4 + ) + ) != 1 + ) + ) + + testthat::expect_true( + any( + lengths( + sf::st_equals_exact( + trimmed_trans %>% dplyr::filter(id == "wb-1003263") , + transects %>% dplyr::filter(id == "wb-1003263"), + par = 0.4 + ) + ) != 1 + ) + ) + + testthat::expect_true( + any( + lengths( + sf::st_equals_exact( + trimmed_trans %>% dplyr::filter(id == "wb-1003267") , + transects %>% dplyr::filter(id == "wb-1003267"), + par = 0.4 + ) + ) != 1 + ) + ) + +}) +