Skip to content

Commit

Permalink
update post pixie
Browse files Browse the repository at this point in the history
  • Loading branch information
vortexing committed Mar 24, 2019
1 parent e114cb9 commit 63e1c31
Show file tree
Hide file tree
Showing 15 changed files with 160 additions and 66 deletions.
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@ export("%>%")
export(annotationDictionary)
export(listS3Objects)
export(listS3RepoObjects)
export(listS3RepoSummaries)
export(pullAnnotations)
export(redcapPull)
export(setCreds)
export(summarizeS3Objects)
export(undefinedAnnotations)
export(usedIdentifiers)
importFrom(magrittr,"%>%")
77 changes: 50 additions & 27 deletions R/admin.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,12 @@
#' @return A data frame of makeMeaning. This is a template to add to the commonKnowledge dataframe with new definitions or corrections.
#' @author Amy Paguirigan
#' @details
#' Requires REDCap credentials to be set in the environment.
#' Requires **admin** REDCap credentials to be set in the environment.
#' @export
undefinedAnnotations <- function(commonKnowledge) {
if ("" %in% Sys.getenv(c("REDURI", "INT", "FCT", "MHT", "S3A", "S3SA"))) {
print("You have missing environment variables. Please set creds in env vars.")} else print("Credentials set successfully.")

print("Get all data from REDCap for variables intended to be harmonized.")
sciMeta <- redcapPull(harmonizedOnly = TRUE)
sciMeta <- Filter(function(x)!all(is.na(x)), sciMeta)
Expand Down Expand Up @@ -44,9 +47,12 @@ undefinedAnnotations <- function(commonKnowledge) {
#' @return Returns the list of idnetifiers that have already been used.
#' @author Amy Paguirigan
#' @details
#' Requires REDCap credentials to be set in the environment.
#' Requires **admin** REDCap credentials to be set in the environment.
#' @export
usedIdentifiers <- function(x, type) {
if ("" %in% Sys.getenv(c("REDURI", "INT", "FCT", "MHT", "S3A", "S3SA"))) {
print("You have missing environment variables. Please set creds in env vars.")} else print("Credentials set successfully.")

if (type == "biospecimen_id") {
IDs <- REDCapR::redcap_read_oneshot(
Sys.getenv("REDURI"), Sys.getenv("INT"),
Expand All @@ -72,9 +78,12 @@ usedIdentifiers <- function(x, type) {
#' @return Nothing. Creates character vectors containing the `categorical` annotations, the `truefalse` annotations, the union of these `fieldList`, and all columns in `summarizeList`.
#' @author Amy Paguirigan
#' @details
#' Requires REDCap credentials to be set in the environment.
#' Requires **admin** REDCap credentials to be set in the environment.
#' @export
annotationDictionary <- function(commonKnowledge) {
if ("" %in% Sys.getenv(c("REDURI", "INT", "FCT", "MHT", "S3A", "S3SA"))) {
print("You have missing environment variables. Please set creds in env vars.")} else print("Credentials set successfully.")

print("annotationDictionary(); setup for UI")
# Get representative actual column names from REDCap by pulling one dataset
INData <- REDCapR::redcap_read_oneshot(
Expand All @@ -101,37 +110,51 @@ annotationDictionary <- function(commonKnowledge) {
#'
#' Pulls Information from the results of s3tagcrawler for TGR that are in an S3 bucket, including the object list and their tags as well as size metadata.
#'
#' @param bucket The name of the S3 bucket containing the data, or "repository" if the intention is to query the whole Repository.
#' @return Returns a long form data frame of annotated objects in the S3 bucket.
#' @param bucket The name of the S3 bucket containing the data.
#' @return Returns a long form data frame of annotated objects in the S3 buckets associated with the Repository.
#' @author Amy Paguirigan
#' @details
#' Requires S3 credentials to be set in the environment by setCreds.
#' Requires **admin** or **app** S3 credentials to be set in the environment by setCreds.
#' @export
listS3RepoObjects <- function(bucket = "fh-pi-paguirigan-a-genomicsrepo") {
if ("" %in% Sys.getenv(c("REDURI", "INT", "FCT", "MHT", "S3A", "S3SA"))) {
print("You have missing environment variables. Please set creds in env vars.")} else print("Credentials set successfully.")

Sys.setenv(AWS_ACCESS_KEY_ID = Sys.getenv("S3A"),
AWS_SECRET_ACCESS_KEY = Sys.getenv("S3SA"),
AWS_DEFAULT_REGION = "us-west-2")
a <- get_bucket_df(bucket = bucket, prefix = "apptags/")
a$pi_bucket <- paste0("fh-pi-", sub("^([^-]*-[^-]*).*", "\\1", gsub("^[^/]*/", "", a$Key)))
tagFiles <- a[grepl("s3tags", a$Key)==T,]
sizeFiles <- a[grepl("s3sizes", a$Key)==T,]

print("Pulling all S3 tag lists.")
s3tags <- aws.s3::s3read_using(utils::read.csv, stringsAsFactors = F,
object = "apptags/s3tags.csv",
bucket = bucket)
s3tags <- s3tags %>% dplyr::select("key", "molecular_id", "omics_sample_name", "stage", "workflowID")
s3tags$pi_bucket <- bucket
s3tags <- s3tags[s3tags$molecular_id != "" & s3tags$omics_sample_name != "" & s3tags$stage != "", ]
print("Pulling all S3 object size lists.")
s3sizes <- aws.s3::s3read_using(utils::read.table, stringsAsFactors = F,
col.names = c("dateCreated", "timeCreated", "sizeBytes", "key"),
object = "tg/apptags/s3sizes.tsv",
bucket = bucket)
s3sizes$pi_bucket <- bucket
print("Joining tags and sizes.")
allObjects <- dplyr::inner_join(s3tags, s3sizes)
return(allObjects)
a <- aws.s3::get_bucket_df(bucket = bucket, prefix = "apptags/meta/")
b <- purrr::map_dfr(a$Key, function(x) {
aws.s3::s3read_using(utils::read.csv, stringsAsFactors = F,
object = x,
bucket = bucket)
})
return(b)
}

#' Pull the summary of objects and tags in the Repository overall
#'
#' Pulls just the processed summary of object metadata for all buckets with data in the Repository.
#'
#' @param bucket The name of the S3 bucket containing the data.
#' @return Returns a long form data frame of annotated objects in the S3 bucket.
#' @author Amy Paguirigan
#' @details
#' Requires **admin** or **app** S3 credentials to be set in the environment by setCreds.
#' @export
listS3RepoSummaries <- function(bucket = "fh-pi-paguirigan-a-genomicsrepo") {
if ("" %in% Sys.getenv(c("REDURI", "INT", "FCT", "MHT", "S3A", "S3SA"))) {
print("You have missing environment variables. Please set creds in env vars.")} else print("Credentials set successfully.")

Sys.setenv(AWS_ACCESS_KEY_ID = Sys.getenv("S3A"),
AWS_SECRET_ACCESS_KEY = Sys.getenv("S3SA"),
AWS_DEFAULT_REGION = "us-west-2")
c <- aws.s3::get_bucket_df(bucket = bucket, prefix = "apptags/summary/")
d <- purrr::map_dfr(c$Key, function(x) {
aws.s3::s3read_using(utils::read.csv, stringsAsFactors = F,
object = x,
bucket = bucket)
})
return(d)
}

8 changes: 4 additions & 4 deletions R/pullAnnotations.R → R/annotationfunctions.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
#' @export

pullAnnotations <- function() {
print("pulling annotations from the master branch of the tgr-annotations repo")
commonKnowledge <- httr::content(httr::GET("https://raw.github.com/FredHutch/tgr-annotations/master/commonKnowledge.csv"),
as = "parsed", type = "text/csv")
return(commonKnowledge)
print("pulling annotations from the master branch of the tgr-annotations repo")
commonKnowledge <- httr::content(httr::GET("https://raw.github.com/FredHutch/tgr-annotations/master/commonKnowledge.csv"),
as = "parsed", type = "text/csv")
return(commonKnowledge)
}
22 changes: 0 additions & 22 deletions R/listS3Objects.R

This file was deleted.

3 changes: 2 additions & 1 deletion R/redcapPull.R → R/redcapfunctions.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#' Queries all TGR REDCap for metadata
#' Queries all TGR REDCap Projects for metadata
#'
#'
#'
Expand Down Expand Up @@ -62,3 +62,4 @@ redcapPull <- function(DAG = c("paguirigana", "bergera"), domain = "all", harmon

return(results)
}

44 changes: 44 additions & 0 deletions R/s3functions.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#' Pull the list(s) of objects and tags in S3
#'
#' Pulls Information from the results of s3tagcrawler for TGR that are in an S3 bucket, including the object list and their tags as well as size metadata.
#'
#' @param bucket A character vector containing the full names of the S3 bucket(s) containing the data to return.
#' @return Returns a long form data frame of objects in the indicated S3 bucket(s).
#' @author Amy Paguirigan
#' @details
#' Requires valid S3 credentials to be set in the environment by setCreds.
#' @export
listS3Objects <- function(bucket) {
if ("" %in% Sys.getenv(c("S3A", "S3SA"))) {
print("You have missing environment variables. Please setCreds().")} else print("Credentials set successfully.")
Sys.setenv(AWS_ACCESS_KEY_ID = Sys.getenv("S3A"),
AWS_SECRET_ACCESS_KEY = Sys.getenv("S3SA"),
AWS_DEFAULT_REGION = "us-west-2")
print("Pulling S3 tag list(s).")
s3tags <- purrr::map_dfr(bucket, function(x) {aws.s3::s3read_using(utils::read.csv, stringsAsFactors = F,
object = paste0("tg/apptags/", x, "-meta.csv"),
bucket = x)})
return(s3tags)
}
#' Pull a summary of objects in S3
#'
#' Pulls summary information from the apptags prefix in S3 for a given S3 bucket(s).
#'
#' @param bucket A character vector containing the full names of the S3 bucket(s) containing the data to return.
#' @return Returns a data frame containing a summary of what objects are in the indicated S3 bucket(s).
#' @author Amy Paguirigan
#' @details
#' Requires valid S3 credentials to be set in the environment by setCreds.
#' @export
summarizeS3Objects <- function(bucket) {
if ("" %in% Sys.getenv(c("S3A", "S3SA"))) {
print("You have missing environment variables. Please setCreds().")} else print("Credentials set successfully.")
Sys.setenv(AWS_ACCESS_KEY_ID = Sys.getenv("S3A"),
AWS_SECRET_ACCESS_KEY = Sys.getenv("S3SA"),
AWS_DEFAULT_REGION = "us-west-2")
print("Pulling S3 object summary.")
s3summary <- purrr::map_dfr(bucket, function(x) {aws.s3::s3read_using(utils::read.csv, stringsAsFactors = F,
object = paste0("tg/apptags/", x, "-summary.csv"),
bucket = x)})
return(s3summary)
}
2 changes: 1 addition & 1 deletion man/annotationDictionary.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions man/listS3Objects.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions man/listS3RepoObjects.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

23 changes: 23 additions & 0 deletions man/listS3RepoSummaries.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/pullAnnotations.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions man/redcapPull.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

23 changes: 23 additions & 0 deletions man/summarizeS3Objects.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/undefinedAnnotations.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/usedIdentifiers.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 63e1c31

Please sign in to comment.