Skip to content

Commit

Permalink
do not put md hyperlink in JSON output, tests for rendering
Browse files Browse the repository at this point in the history
  • Loading branch information
nuest committed Dec 6, 2024
1 parent bf84957 commit af9884d
Show file tree
Hide file tree
Showing 24 changed files with 196 additions and 74 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: codecheck
Title: Helper Functions for CODECHECK Project
Version: 0.11.5
Version: 0.11.6
Authors@R:
c(person(given = "Stephen",
family = "Eglen",
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ export(register_render)
export(set_zenodo_certificate)
export(upload_zenodo_metadata)
export(validate_codecheck_yml)
import(dplyr)
import(jsonlite)
importFrom(R.cache,addMemoization)
importFrom(R.cache,getCacheRootPath)
Expand All @@ -28,6 +29,7 @@ importFrom(httr,http_error)
importFrom(httr,http_status)
importFrom(httr,status_code)
importFrom(httr,write_disk)
importFrom(jsonlite,fromJSON)
importFrom(knitr,kable)
importFrom(osfr,osf_download)
importFrom(osfr,osf_ls_files)
Expand Down
20 changes: 13 additions & 7 deletions R/register.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#'
#' @param register A `data.frame` with all required information for the register's view
#' @param outputs The output formats to create
#' @param config A list of configuration files to be sourced at the beginning of the rending process
#'
#' @return A `data.frame` of the register enriched with information from the configuration files of respective CODECHECKs from the online repositories
#'
Expand All @@ -16,24 +17,29 @@
#' @importFrom knitr kable
#' @importFrom utils capture.output read.csv tail
#' @import jsonlite
#' @import dplyr
#'
#' @export
register_render <- function(register = read.csv("register.csv", as.is = TRUE),
filter_by = c("venues", "codecheckers"),
outputs = c("html", "md", "json")) {
# Loading the dplyr package otherwise we cant use "%>%"
library(dplyr)

# Loading config.R file
source(system.file("extdata", "config.R", package = "codecheck"))
outputs = c("html", "md", "json"),
config = c(system.file("extdata", "config.R", package = "codecheck"))) {
# Loading config.R files
for (i in seq(length(config))) {
source(config[i])
}

message("Using cache path ", R.cache::getCacheRootPath())

register_table <- preprocess_register(register, filter_by)
# Setting number of codechecks now for later use. This is done to avoid double counting codechecks
# done by multiple authors.
CONFIG$NO_CODECHECKS <- nrow(register_table)
render_cert_htmls(register_table, force_download = FALSE)

if("html" %in% outputs) {
render_cert_htmls(register_table, force_download = FALSE)
}

create_filtered_reg_csvs(register, filter_by)
create_register_files(register_table, filter_by, outputs)
create_non_register_files(register_table, filter_by)
Expand Down
8 changes: 3 additions & 5 deletions R/utils_download_certs.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
library(httr)
library(jsonlite)

#' Downloads a certificate PDF from a report link and saves it locally.
#' If the download link is a ZIP file, it extracts the PDF from
#' the archive. Returns status based on success.
Expand Down Expand Up @@ -153,6 +150,7 @@ get_osf_cert_link <- function(report_link, cert_id){
#' @param api_key (Optional) API key for Zenodo authentication if required.
#'
#' @importFrom httr GET status_code content
#' @importFrom jsonlite fromJSON
#'
#' @return The download link for the certificate file as a string if found; otherwise, NULL.
get_zenodo_cert_link <- function(report_link, cert_id, api_key = "") {
Expand All @@ -172,7 +170,7 @@ get_zenodo_cert_link <- function(report_link, cert_id, api_key = "") {
if (httr::status_code(response) == 200) {

# Parse the response
record_data <- fromJSON(httr::content(response, "text", encoding = "UTF-8"))
record_data <- jsonlite::fromJSON(httr::content(response, "text", encoding = "UTF-8"))

files_list <- record_data$entries

Expand Down Expand Up @@ -265,7 +263,7 @@ extract_cert_pdf_from_zip <- function(zip_download_url, cert_sub_dir){
#'
#' @importFrom pdftools pdf_info
#' @param cert_id The certificate identifier. This ID is used to locate the PDF and save the resulting images.
convert_cert_pdf_to_jpeg <- function(cert_id){
convert_cert_pdf_to_png <- function(cert_id){
# Checking if the certs dir exist
cert_dir <- file.path(CONFIG$CERTS_DIR[["cert"]], cert_id)

Expand Down
20 changes: 15 additions & 5 deletions R/utils_preprocess_register.R
Original file line number Diff line number Diff line change
Expand Up @@ -159,21 +159,31 @@ add_codechecker <- function(register_table, register) {
return(register_table)
}

#' Function for adding clickable links to the paper for each entry in the register table.
#' Function for adding clickable links to the paper for each entry in the register table and add certificate identifier and link as extra columns
#'
#' @param register_table The register table to be adjusted.
#' @return The adjusted register table with clickable Certificate links.
#' @return The adjusted register table with clickable Certificate links and new columns for certificate identifier and certificate URL
add_cert_links <- function(register_table){
ids <- c()
links <- c()

# Looping over the entries in the register
for (i in seq_len(nrow(register_table))) {

# Constructing the hyperlink
cert_id <- register_table[i, ]$Certificate
hyperlink <- paste0("[", cert_id, "](", CONFIG$HYPERLINKS[["certs"]], cert_id, "/)")

cert_link <- paste0(CONFIG$HYPERLINKS[["certs"]], cert_id, "/")

# Constructing the hyperlink
hyperlink <- paste0("[", cert_id, "](", cert_link, ")")
register_table[i, ]$Certificate <- hyperlink

ids <- c(ids, cert_id)
links <- c(links, cert_link)
}

register_table$`Certificate ID` <- ids
register_table$`Certificate Link` <- links

return(register_table)
}

Expand Down
49 changes: 28 additions & 21 deletions R/utils_render_cert_htmls.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,35 +10,42 @@ render_cert_htmls <- function(register_table, force_download = FALSE){

# Loop over each cert in the register table
for (i in 1:nrow(register_table)){
download_cert_status <- NA

abstract <- get_abstract(register_table[i, ]$Repository)

# Retrieving report link and cert id
report_link <- register_table[i, ]$Report
cert_hyperlink <- register_table[i, ]$Certificate
cert_id <- sub("\\[(.*)\\]\\(.*\\)", "\\1", cert_hyperlink)

# Define paths for the certificate PDF and JPEG
pdf_path <- file.path(CONFIG$CERTS_DIR[["cert"]], cert_id, "cert.pdf")
pdf_exists <- file.exists(pdf_path)

# Download the PDF if it doesn't exist or if force_download is TRUE
if (!pdf_exists || force_download) {
download_cert_status <- download_cert_pdf(report_link, cert_id)
# Successfully downloaded cert
# Proceeding to convert pdfs to jpegs
if (download_cert_status == 1){
convert_cert_pdf_to_jpeg(cert_id)
if(CONFIG$CERT_DOWNLOAD_AND_CONVERT) {
# Define paths for the certificate PDF and JPEG
pdf_path <- file.path(CONFIG$CERTS_DIR[["cert"]], cert_id, "cert.pdf")
pdf_exists <- file.exists(pdf_path)

# Download the PDF if it doesn't exist or if force_download is TRUE
if (!pdf_exists || force_download) {
download_cert_status <- download_cert_pdf(report_link, cert_id)
# Successfully downloaded cert
# Proceeding to convert pdfs to jpegs
if (download_cert_status == 1){
convert_cert_pdf_to_png(cert_id)
}

# Delaying requests to adhere to request limits
Sys.sleep(CONFIG$CERT_REQUEST_DELAY)
}

# Delaying reqwuests to adhere to request limits
Sys.sleep(CONFIG$CERT_REQUEST_DELAY)
}

# The pdf exists and force download is False
else{
download_cert_status <- 1
# The pdf exists and force download is False
else{
download_cert_status <- 1
}
} else {
# do not display a certificate
download_cert_status <- 0
}

render_cert_html(cert_id, register_table[i, ]$Repository, download_cert_status)
}
}
Expand All @@ -48,7 +55,7 @@ render_cert_htmls <- function(register_table, force_download = FALSE){
#'
#' @importFrom pdftools pdf_info
#' @param cert_id The certificate identifier. This ID is used to locate the PDF and save the resulting images.
convert_cert_pdf_to_jpeg <- function(cert_id){
convert_cert_pdf_to_png <- function(cert_id){
# Checking if the certs dir exist
cert_dir <- file.path(CONFIG$CERTS_DIR[["cert"]], cert_id)

Expand Down
5 changes: 5 additions & 0 deletions R/utils_render_cert_md.R
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,11 @@ add_abstract <- function(repo_link, md_content){
create_cert_md <- function(cert_id, repo_link, download_cert_status){
cert_dir <- file.path(CONFIG$CERTS_DIR[["cert"]], cert_id)

# Create the directory if it does not exist (e.g., because no PDFs are downloaded)
if (!dir.exists(cert_dir)) {
dir.create(cert_dir, recursive = TRUE)
}

# Loading the correct template based on whether cert exists
if (download_cert_status == 0) {
template_type <- "md_template_no_cert"
Expand Down
6 changes: 3 additions & 3 deletions R/utils_render_register_general.r
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#' The function iterates through the provided output types, generates an output directory,
#' filters and adjusts the register table, and renders the original register files based on the specified formats.
create_original_register_files <- function(register_table, outputs){
filter <- "none"
filter <- NA
for (output_type in outputs){
table_details <- list(is_reg_table = TRUE)
table_details[["output_dir"]] <- generate_output_dir(filter, table_details)
Expand Down Expand Up @@ -150,7 +150,7 @@ generate_table_details <- function(table_key, table, filter, is_reg_table = TRUE
#' "csv" for CSVs, "md" for MD and "html" for HTMLs.
#'
#' @return None. The function generates a file in the specified format.
render_register <- function(register_table, table_details, filter, output_type){
render_register <- function(register_table, table_details, filter = NA, output_type){
register_table <- filter_and_drop_register_columns(register_table, filter, output_type)

switch(output_type,
Expand All @@ -175,7 +175,7 @@ generate_output_dir <- function(filter, table_details = list()) {
# We have register tables
if (table_details[["is_reg_table"]]){
# We have the original register table
if (filter=="none"){
if (is.na(filter)){
output_dir <- base_dir
}

Expand Down
4 changes: 2 additions & 2 deletions R/utils_render_register_htmls.R
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ generate_href <- function(filter, table_details, href_type) {

base_url <- href_details$base_url
# For the original register
if (filter == "none") {
if (is.na(filter)) {
return(paste0(base_url, "register", href_details$ext))
}

Expand Down Expand Up @@ -167,7 +167,7 @@ render_html <- function(table, table_details, filter){
# For all registered tables besides the original we change the html
# file so that the path to the libs folder refers to the libs folder "docs/libs".
# This is done to remove duplicates of "libs" folders.
if (filter != "none"){
if (!is.na(filter)){
html_file_path <- paste0(output_dir, "index.html")
edit_html_lib_paths(html_file_path)
# Deleting the libs folder after changing the html lib path
Expand Down
12 changes: 6 additions & 6 deletions R/utils_render_register_json.R
Original file line number Diff line number Diff line change
Expand Up @@ -54,33 +54,33 @@ set_paper_title_references <- function(register_table){
#' @param table_details List containing details such as the table name, subcat name.
#' @param filter The filter
render_register_json <- function(register_table, table_details, filter) {
register_table <- add_repository_links_json(register_table)
register_table_json <- add_repository_links_json(register_table)

# Set paper titles and references
register_table <- set_paper_title_references(register_table)
register_table_json <- set_paper_title_references(register_table_json)

output_dir <- table_details[["output_dir"]]

# Keeping only those columns that are mentioned in the json columns and those that
# register table already has
columns_to_keep <- intersect(CONFIG$JSON_COLUMNS, names(register_table))
columns_to_keep <- intersect(CONFIG$JSON_COLUMNS, names(register_table_json))

jsonlite::write_json(
register_table[, columns_to_keep],
register_table_json[, columns_to_keep],
path = paste0(output_dir, "register.json"),
pretty = TRUE
)

jsonlite::write_json(
utils::tail(register_table, 10)[, columns_to_keep],
utils::tail(register_table_json, CONFIG$FEATURED_COUNT)[, columns_to_keep],
path = paste0(output_dir, "featured.json"),
pretty = TRUE
)

jsonlite::write_json(
list(
source = generate_href(filter, table_details, "json"),
cert_count = nrow(register_table)
cert_count = nrow(register_table_json)
# TODO count conferences, preprints,
# journals, etc.
),
Expand Down
17 changes: 10 additions & 7 deletions R/utils_render_register_mds.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,21 @@
#' @return The modified markdown table
add_markdown_title <- function(table_details, md_table, filter){
# The filter is in the CONFIG$MD_TITLES
if (filter %in% names(CONFIG$MD_TITLES)) {
title_fn <- NULL

if (is.na(filter)) {
title_fn <- CONFIG$MD_TITLES[["default"]]
} else if (filter %in% names(CONFIG$MD_TITLES)) {
# Loading the title function (if present) and passing the argument
title_fn <- CONFIG$MD_TITLES[[filter]]
title <- title_fn(table_details)
}

# No titles provided in the CONFIG file for the filter type
# Stopping the process
else {
} else {
# No filter or no titles provided in the CONFIG file for the filter type
# Stopping the process
stop("Invalid filter provided.")
}

title <- title_fn(table_details)

md_table <- gsub("\\$title\\$", title, md_table)
return(md_table)
}
Expand Down
2 changes: 1 addition & 1 deletion R/utils_render_table_non_registers.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ create_non_register_files <- function(register_table, filter_by){

render_html(table, table_details, filter)

# Removing the unneccessary columns before creating html and json
# Removing the unnecessary columns before creating html and json
if (filter == "venues"){
table <- table %>% select(-`venue_slug`)
}
Expand Down
16 changes: 11 additions & 5 deletions inst/extdata/config.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ CONFIG$REGISTER_COLUMNS <- list(
html = c("Certificate", "Paper Title", "Type", "Venue", "Issue", "Report", "Check date"),
md = c("Certificate", "Paper Title", "Type", "Venue", "Issue", "Report", "Check date"),
csv = c("Certificate", "Repository", "Type", "Venue", "Issue", "Report", "Check date"),
json = c("Certificate", "Repository", "Type", "Venue", "Issue", "Report", "Check date")
json = c("Certificate ID", "Certificate Link", "Repository", "Type", "Venue", "Issue", "Report", "Check date")
)

CONFIG$DIR_TEMP_REGISTER_CODECHECKER <- "docs/temp_register_codechecker.csv"
Expand All @@ -41,7 +41,7 @@ CONFIG$FILTER_COLUMN_NAMES_TO_DROP <- list(
)

CONFIG$MD_TITLES <- list(
"none" = function(table_details){
"default" = function(table_details){
"CODECHECK Register"
},

Expand Down Expand Up @@ -180,9 +180,12 @@ CONFIG$FILTER_SUBCAT_COLUMNS <- list(
# OTHERS
CONFIG$DICT_ORCID_ID_NAME <- list()

# Delaying requests by 1 second to adhere to the rate limit of 60 requests/ minute for Zenodo
# Delaying requests by 1 second to adhere to the rate limit of 60 requests/minute for Zenodo
CONFIG$CERT_REQUEST_DELAY <- 1

# Number of items in the featured lists of certificates
CONFIG$FEATURED_COUNT <- 10

# CERT LINKS
CONFIG$CERT_LINKS <- list(
osf_api = "https://api.osf.io/v2/",
Expand All @@ -192,7 +195,9 @@ CONFIG$CERT_LINKS <- list(
)

CONFIG$CERTS_URL_PREFIX <- "https://doi.org/"
CONFIG$CERT_DPI <- 800
CONFIG$CERT_DPI <- 800

CONFIG$CERT_DOWNLOAD_AND_CONVERT <- TRUE

# DIRECTORIES
CONFIG$CERTS_DIR <- list(
Expand Down Expand Up @@ -239,7 +244,8 @@ CONFIG$DICT_VENUE_NAMES <- list(
# JSON FILE INFORMATION
# List specifying the columns to keep for JSON files
CONFIG$JSON_COLUMNS <- c(
"Certificate",
"Certificate ID",
"Certificate Link",
"Repository Link",
"Type",
"Venue",
Expand Down
Loading

0 comments on commit af9884d

Please sign in to comment.