From 63e1c3146064d0a68144dcdca9d196c048cc7630 Mon Sep 17 00:00:00 2001 From: Amy Paguirigan Date: Sun, 24 Mar 2019 15:19:04 -0700 Subject: [PATCH] update post pixie --- NAMESPACE | 2 + R/admin.R | 77 ++++++++++++------- ...ullAnnotations.R => annotationfunctions.R} | 8 +- R/listS3Objects.R | 22 ------ R/{redcapPull.R => redcapfunctions.R} | 3 +- R/s3functions.R | 44 +++++++++++ man/annotationDictionary.Rd | 2 +- man/listS3Objects.Rd | 4 +- man/listS3RepoObjects.Rd | 6 +- man/listS3RepoSummaries.Rd | 23 ++++++ man/pullAnnotations.Rd | 2 +- man/redcapPull.Rd | 6 +- man/summarizeS3Objects.Rd | 23 ++++++ man/undefinedAnnotations.Rd | 2 +- man/usedIdentifiers.Rd | 2 +- 15 files changed, 160 insertions(+), 66 deletions(-) rename R/{pullAnnotations.R => annotationfunctions.R} (54%) delete mode 100644 R/listS3Objects.R rename R/{redcapPull.R => redcapfunctions.R} (98%) create mode 100644 R/s3functions.R create mode 100644 man/listS3RepoSummaries.Rd create mode 100644 man/summarizeS3Objects.Rd diff --git a/NAMESPACE b/NAMESPACE index 89b05d0..d57112d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,9 +4,11 @@ export("%>%") export(annotationDictionary) export(listS3Objects) export(listS3RepoObjects) +export(listS3RepoSummaries) export(pullAnnotations) export(redcapPull) export(setCreds) +export(summarizeS3Objects) export(undefinedAnnotations) export(usedIdentifiers) importFrom(magrittr,"%>%") diff --git a/R/admin.R b/R/admin.R index a79df23..4a660fd 100644 --- a/R/admin.R +++ b/R/admin.R @@ -6,9 +6,12 @@ #' @return A data frame of makeMeaning. This is a template to add to the commonKnowledge dataframe with new definitions or corrections. #' @author Amy Paguirigan #' @details -#' Requires REDCap credentials to be set in the environment. +#' Requires **admin** REDCap credentials to be set in the environment. #' @export undefinedAnnotations <- function(commonKnowledge) { + if ("" %in% Sys.getenv(c("REDURI", "INT", "FCT", "MHT", "S3A", "S3SA"))) { + print("You have missing environment variables. Please set creds in env vars.")} else print("Credentials set successfully.") + print("Get all data from REDCap for variables intended to be harmonized.") sciMeta <- redcapPull(harmonizedOnly = TRUE) sciMeta <- Filter(function(x)!all(is.na(x)), sciMeta) @@ -44,9 +47,12 @@ undefinedAnnotations <- function(commonKnowledge) { #' @return Returns the list of idnetifiers that have already been used. #' @author Amy Paguirigan #' @details -#' Requires REDCap credentials to be set in the environment. +#' Requires **admin** REDCap credentials to be set in the environment. #' @export usedIdentifiers <- function(x, type) { + if ("" %in% Sys.getenv(c("REDURI", "INT", "FCT", "MHT", "S3A", "S3SA"))) { + print("You have missing environment variables. Please set creds in env vars.")} else print("Credentials set successfully.") + if (type == "biospecimen_id") { IDs <- REDCapR::redcap_read_oneshot( Sys.getenv("REDURI"), Sys.getenv("INT"), @@ -72,9 +78,12 @@ usedIdentifiers <- function(x, type) { #' @return Nothing. Creates character vectors containing the `categorical` annotations, the `truefalse` annotations, the union of these `fieldList`, and all columns in `summarizeList`. #' @author Amy Paguirigan #' @details -#' Requires REDCap credentials to be set in the environment. +#' Requires **admin** REDCap credentials to be set in the environment. #' @export annotationDictionary <- function(commonKnowledge) { + if ("" %in% Sys.getenv(c("REDURI", "INT", "FCT", "MHT", "S3A", "S3SA"))) { + print("You have missing environment variables. Please set creds in env vars.")} else print("Credentials set successfully.") + print("annotationDictionary(); setup for UI") # Get representative actual column names from REDCap by pulling one dataset INData <- REDCapR::redcap_read_oneshot( @@ -101,37 +110,51 @@ annotationDictionary <- function(commonKnowledge) { #' #' Pulls Information from the results of s3tagcrawler for TGR that are in an S3 bucket, including the object list and their tags as well as size metadata. #' -#' @param bucket The name of the S3 bucket containing the data, or "repository" if the intention is to query the whole Repository. -#' @return Returns a long form data frame of annotated objects in the S3 bucket. +#' @param bucket The name of the S3 bucket containing the data. +#' @return Returns a long form data frame of annotated objects in the S3 buckets associated with the Repository. #' @author Amy Paguirigan #' @details -#' Requires S3 credentials to be set in the environment by setCreds. +#' Requires **admin** or **app** S3 credentials to be set in the environment by setCreds. #' @export listS3RepoObjects <- function(bucket = "fh-pi-paguirigan-a-genomicsrepo") { + if ("" %in% Sys.getenv(c("REDURI", "INT", "FCT", "MHT", "S3A", "S3SA"))) { + print("You have missing environment variables. Please set creds in env vars.")} else print("Credentials set successfully.") + Sys.setenv(AWS_ACCESS_KEY_ID = Sys.getenv("S3A"), AWS_SECRET_ACCESS_KEY = Sys.getenv("S3SA"), AWS_DEFAULT_REGION = "us-west-2") - a <- get_bucket_df(bucket = bucket, prefix = "apptags/") - a$pi_bucket <- paste0("fh-pi-", sub("^([^-]*-[^-]*).*", "\\1", gsub("^[^/]*/", "", a$Key))) - tagFiles <- a[grepl("s3tags", a$Key)==T,] - sizeFiles <- a[grepl("s3sizes", a$Key)==T,] - - print("Pulling all S3 tag lists.") - s3tags <- aws.s3::s3read_using(utils::read.csv, stringsAsFactors = F, - object = "apptags/s3tags.csv", - bucket = bucket) - s3tags <- s3tags %>% dplyr::select("key", "molecular_id", "omics_sample_name", "stage", "workflowID") - s3tags$pi_bucket <- bucket - s3tags <- s3tags[s3tags$molecular_id != "" & s3tags$omics_sample_name != "" & s3tags$stage != "", ] - print("Pulling all S3 object size lists.") - s3sizes <- aws.s3::s3read_using(utils::read.table, stringsAsFactors = F, - col.names = c("dateCreated", "timeCreated", "sizeBytes", "key"), - object = "tg/apptags/s3sizes.tsv", - bucket = bucket) - s3sizes$pi_bucket <- bucket - print("Joining tags and sizes.") - allObjects <- dplyr::inner_join(s3tags, s3sizes) - return(allObjects) + a <- aws.s3::get_bucket_df(bucket = bucket, prefix = "apptags/meta/") + b <- purrr::map_dfr(a$Key, function(x) { + aws.s3::s3read_using(utils::read.csv, stringsAsFactors = F, + object = x, + bucket = bucket) + }) + return(b) } +#' Pull the summary of objects and tags in the Repository overall +#' +#' Pulls just the processed summary of object metadata for all buckets with data in the Repository. +#' +#' @param bucket The name of the S3 bucket containing the data. +#' @return Returns a long form data frame of annotated objects in the S3 bucket. +#' @author Amy Paguirigan +#' @details +#' Requires **admin** or **app** S3 credentials to be set in the environment by setCreds. +#' @export +listS3RepoSummaries <- function(bucket = "fh-pi-paguirigan-a-genomicsrepo") { + if ("" %in% Sys.getenv(c("REDURI", "INT", "FCT", "MHT", "S3A", "S3SA"))) { + print("You have missing environment variables. Please set creds in env vars.")} else print("Credentials set successfully.") + + Sys.setenv(AWS_ACCESS_KEY_ID = Sys.getenv("S3A"), + AWS_SECRET_ACCESS_KEY = Sys.getenv("S3SA"), + AWS_DEFAULT_REGION = "us-west-2") + c <- aws.s3::get_bucket_df(bucket = bucket, prefix = "apptags/summary/") + d <- purrr::map_dfr(c$Key, function(x) { + aws.s3::s3read_using(utils::read.csv, stringsAsFactors = F, + object = x, + bucket = bucket) + }) + return(d) +} diff --git a/R/pullAnnotations.R b/R/annotationfunctions.R similarity index 54% rename from R/pullAnnotations.R rename to R/annotationfunctions.R index bc13b0e..d715046 100644 --- a/R/pullAnnotations.R +++ b/R/annotationfunctions.R @@ -9,8 +9,8 @@ #' @export pullAnnotations <- function() { - print("pulling annotations from the master branch of the tgr-annotations repo") - commonKnowledge <- httr::content(httr::GET("https://raw.github.com/FredHutch/tgr-annotations/master/commonKnowledge.csv"), - as = "parsed", type = "text/csv") - return(commonKnowledge) + print("pulling annotations from the master branch of the tgr-annotations repo") + commonKnowledge <- httr::content(httr::GET("https://raw.github.com/FredHutch/tgr-annotations/master/commonKnowledge.csv"), + as = "parsed", type = "text/csv") + return(commonKnowledge) } diff --git a/R/listS3Objects.R b/R/listS3Objects.R deleted file mode 100644 index da66fb4..0000000 --- a/R/listS3Objects.R +++ /dev/null @@ -1,22 +0,0 @@ -#' Pull the list(s) of objects and tags in S3 -#' -#' Pulls Information from the results of s3tagcrawler for TGR that are in an S3 bucket, including the object list and their tags as well as size metadata. -#' -#' @param bucket A character vector containing the full names of the S3 bucket(s) containing the data to return. -#' @return Returns a long form data frame of objects in the indicated S3 bucket(s). -#' @author Amy Paguirigan -#' @details -#' Requires S3 credentials to be set in the environment by setCreds. -#' @export -listS3Objects <- function(bucket) { - if ("" %in% Sys.getenv(c("S3A", "S3SA"))) { - print("You have missing environment variables. Please setCreds().")} else print("Credentials set successfully.") - Sys.setenv(AWS_ACCESS_KEY_ID = Sys.getenv("S3A"), - AWS_SECRET_ACCESS_KEY = Sys.getenv("S3SA"), - AWS_DEFAULT_REGION = "us-west-2") - print("Pulling S3 tag list(s).") - s3tags <- purrr::map_dfr(bucket, function(x) {aws.s3::s3read_using(utils::read.csv, stringsAsFactors = F, - object = paste0("tg/apptags/", x, "-meta.csv"), - bucket = x)}) - return(s3tags) -} diff --git a/R/redcapPull.R b/R/redcapfunctions.R similarity index 98% rename from R/redcapPull.R rename to R/redcapfunctions.R index f7b2378..eb80d96 100644 --- a/R/redcapPull.R +++ b/R/redcapfunctions.R @@ -1,4 +1,4 @@ -#' Queries all TGR REDCap for metadata +#' Queries all TGR REDCap Projects for metadata #' #' #' @@ -62,3 +62,4 @@ redcapPull <- function(DAG = c("paguirigana", "bergera"), domain = "all", harmon return(results) } + diff --git a/R/s3functions.R b/R/s3functions.R new file mode 100644 index 0000000..771993f --- /dev/null +++ b/R/s3functions.R @@ -0,0 +1,44 @@ +#' Pull the list(s) of objects and tags in S3 +#' +#' Pulls Information from the results of s3tagcrawler for TGR that are in an S3 bucket, including the object list and their tags as well as size metadata. +#' +#' @param bucket A character vector containing the full names of the S3 bucket(s) containing the data to return. +#' @return Returns a long form data frame of objects in the indicated S3 bucket(s). +#' @author Amy Paguirigan +#' @details +#' Requires valid S3 credentials to be set in the environment by setCreds. +#' @export +listS3Objects <- function(bucket) { + if ("" %in% Sys.getenv(c("S3A", "S3SA"))) { + print("You have missing environment variables. Please setCreds().")} else print("Credentials set successfully.") + Sys.setenv(AWS_ACCESS_KEY_ID = Sys.getenv("S3A"), + AWS_SECRET_ACCESS_KEY = Sys.getenv("S3SA"), + AWS_DEFAULT_REGION = "us-west-2") + print("Pulling S3 tag list(s).") + s3tags <- purrr::map_dfr(bucket, function(x) {aws.s3::s3read_using(utils::read.csv, stringsAsFactors = F, + object = paste0("tg/apptags/", x, "-meta.csv"), + bucket = x)}) + return(s3tags) +} +#' Pull a summary of objects in S3 +#' +#' Pulls summary information from the apptags prefix in S3 for a given S3 bucket(s). +#' +#' @param bucket A character vector containing the full names of the S3 bucket(s) containing the data to return. +#' @return Returns a data frame containing a summary of what objects are in the indicated S3 bucket(s). +#' @author Amy Paguirigan +#' @details +#' Requires valid S3 credentials to be set in the environment by setCreds. +#' @export +summarizeS3Objects <- function(bucket) { + if ("" %in% Sys.getenv(c("S3A", "S3SA"))) { + print("You have missing environment variables. Please setCreds().")} else print("Credentials set successfully.") + Sys.setenv(AWS_ACCESS_KEY_ID = Sys.getenv("S3A"), + AWS_SECRET_ACCESS_KEY = Sys.getenv("S3SA"), + AWS_DEFAULT_REGION = "us-west-2") + print("Pulling S3 object summary.") + s3summary <- purrr::map_dfr(bucket, function(x) {aws.s3::s3read_using(utils::read.csv, stringsAsFactors = F, + object = paste0("tg/apptags/", x, "-summary.csv"), + bucket = x)}) + return(s3summary) +} diff --git a/man/annotationDictionary.Rd b/man/annotationDictionary.Rd index 754fb46..3508f12 100644 --- a/man/annotationDictionary.Rd +++ b/man/annotationDictionary.Rd @@ -16,7 +16,7 @@ Nothing. Creates character vectors containing the `categorical` annotations, th Pulls sample data down from REDCap in order to generate example column lists for use in Shiny UI's. } \details{ -Requires REDCap credentials to be set in the environment. +Requires **admin** REDCap credentials to be set in the environment. } \author{ Amy Paguirigan diff --git a/man/listS3Objects.Rd b/man/listS3Objects.Rd index 0f47ff8..8dfc31c 100644 --- a/man/listS3Objects.Rd +++ b/man/listS3Objects.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/listS3Objects.R +% Please edit documentation in R/s3functions.R \name{listS3Objects} \alias{listS3Objects} \title{Pull the list(s) of objects and tags in S3} @@ -16,7 +16,7 @@ Returns a long form data frame of objects in the indicated S3 bucket(s). Pulls Information from the results of s3tagcrawler for TGR that are in an S3 bucket, including the object list and their tags as well as size metadata. } \details{ -Requires S3 credentials to be set in the environment by setCreds. +Requires valid S3 credentials to be set in the environment by setCreds. } \author{ Amy Paguirigan diff --git a/man/listS3RepoObjects.Rd b/man/listS3RepoObjects.Rd index 4f958ce..b56bfb5 100644 --- a/man/listS3RepoObjects.Rd +++ b/man/listS3RepoObjects.Rd @@ -7,16 +7,16 @@ listS3RepoObjects(bucket = "fh-pi-paguirigan-a-genomicsrepo") } \arguments{ -\item{bucket}{The name of the S3 bucket containing the data, or "repository" if the intention is to query the whole Repository.} +\item{bucket}{The name of the S3 bucket containing the data.} } \value{ -Returns a long form data frame of annotated objects in the S3 bucket. +Returns a long form data frame of annotated objects in the S3 buckets associated with the Repository. } \description{ Pulls Information from the results of s3tagcrawler for TGR that are in an S3 bucket, including the object list and their tags as well as size metadata. } \details{ -Requires S3 credentials to be set in the environment by setCreds. +Requires **admin** or **app** S3 credentials to be set in the environment by setCreds. } \author{ Amy Paguirigan diff --git a/man/listS3RepoSummaries.Rd b/man/listS3RepoSummaries.Rd new file mode 100644 index 0000000..18780aa --- /dev/null +++ b/man/listS3RepoSummaries.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/admin.R +\name{listS3RepoSummaries} +\alias{listS3RepoSummaries} +\title{Pull the summary of objects and tags in the Repository overall} +\usage{ +listS3RepoSummaries(bucket = "fh-pi-paguirigan-a-genomicsrepo") +} +\arguments{ +\item{bucket}{The name of the S3 bucket containing the data.} +} +\value{ +Returns a long form data frame of annotated objects in the S3 bucket. +} +\description{ +Pulls just the processed summary of object metadata for all buckets with data in the Repository. +} +\details{ +Requires **admin** or **app** S3 credentials to be set in the environment by setCreds. +} +\author{ +Amy Paguirigan +} diff --git a/man/pullAnnotations.Rd b/man/pullAnnotations.Rd index 27f25e1..a17b5b2 100644 --- a/man/pullAnnotations.Rd +++ b/man/pullAnnotations.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pullAnnotations.R +% Please edit documentation in R/annotationfunctions.R \name{pullAnnotations} \alias{pullAnnotations} \title{Pull Current TGR Annotation definitions} diff --git a/man/redcapPull.Rd b/man/redcapPull.Rd index 0fedbea..9b9a8b7 100644 --- a/man/redcapPull.Rd +++ b/man/redcapPull.Rd @@ -1,8 +1,8 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/redcapPull.R +% Please edit documentation in R/redcapfunctions.R \name{redcapPull} \alias{redcapPull} -\title{Queries all TGR REDCap for metadata} +\title{Queries all TGR REDCap Projects for metadata} \usage{ redcapPull(DAG = c("paguirigana", "bergera"), domain = "all", harmonizedOnly = FALSE, evenEmptyCols = FALSE) @@ -20,7 +20,7 @@ redcapPull(DAG = c("paguirigana", "bergera"), domain = "all", Returns a long form data frame containing the requested dataset. } \description{ -Queries all TGR REDCap for metadata +Queries all TGR REDCap Projects for metadata } \details{ Requires REDCap credentials to be set in the environment. diff --git a/man/summarizeS3Objects.Rd b/man/summarizeS3Objects.Rd new file mode 100644 index 0000000..c351de6 --- /dev/null +++ b/man/summarizeS3Objects.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/s3functions.R +\name{summarizeS3Objects} +\alias{summarizeS3Objects} +\title{Pull a summary of objects in S3} +\usage{ +summarizeS3Objects(bucket) +} +\arguments{ +\item{bucket}{A character vector containing the full names of the S3 bucket(s) containing the data to return.} +} +\value{ +Returns a data frame containing a summary of what objects are in the indicated S3 bucket(s). +} +\description{ +Pulls summary information from the apptags prefix in S3 for a given S3 bucket(s). +} +\details{ +Requires valid S3 credentials to be set in the environment by setCreds. +} +\author{ +Amy Paguirigan +} diff --git a/man/undefinedAnnotations.Rd b/man/undefinedAnnotations.Rd index bfd4639..2f4c345 100644 --- a/man/undefinedAnnotations.Rd +++ b/man/undefinedAnnotations.Rd @@ -16,7 +16,7 @@ A data frame of makeMeaning. This is a template to add to the commonKnowledge d Pulls sample data down from REDCap and compares them with the defined annotation list in GitHub and identifies variables in REDCap that need defining in GitHub. Only variables for which there is a value in at least one record are returned. } \details{ -Requires REDCap credentials to be set in the environment. +Requires **admin** REDCap credentials to be set in the environment. } \author{ Amy Paguirigan diff --git a/man/usedIdentifiers.Rd b/man/usedIdentifiers.Rd index 170f5c5..2e0b524 100644 --- a/man/usedIdentifiers.Rd +++ b/man/usedIdentifiers.Rd @@ -18,7 +18,7 @@ Returns the list of idnetifiers that have already been used. Test if identifiers have already been used in the TGR. } \details{ -Requires REDCap credentials to be set in the environment. +Requires **admin** REDCap credentials to be set in the environment. } \author{ Amy Paguirigan