do not put md hyperlink in JSON output, tests for rendering

codecheckers · Dec 6, 2024 · af9884d · af9884d
1 parent bf84957
commit af9884d
Show file tree

Hide file tree

Showing 24 changed files with 196 additions and 74 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: codecheck
 Title: Helper Functions for CODECHECK Project
-Version: 0.11.5
+Version: 0.11.6
 Authors@R: 
     c(person(given = "Stephen",
             family = "Eglen",

diff --git a/NAMESPACE b/NAMESPACE
@@ -18,6 +18,7 @@ export(register_render)
 export(set_zenodo_certificate)
 export(upload_zenodo_metadata)
 export(validate_codecheck_yml)
+import(dplyr)
 import(jsonlite)
 importFrom(R.cache,addMemoization)
 importFrom(R.cache,getCacheRootPath)
@@ -28,6 +29,7 @@ importFrom(httr,http_error)
 importFrom(httr,http_status)
 importFrom(httr,status_code)
 importFrom(httr,write_disk)
+importFrom(jsonlite,fromJSON)
 importFrom(knitr,kable)
 importFrom(osfr,osf_download)
 importFrom(osfr,osf_ls_files)

diff --git a/R/register.R b/R/register.R
@@ -7,6 +7,7 @@
 #'
 #' @param register A `data.frame` with all required information for the register's view
 #' @param outputs The output formats to create
+#' @param config A list of configuration files to be sourced at the beginning of the rending process
 #'
 #' @return A `data.frame` of the register enriched with information from the configuration files of respective CODECHECKs from the online repositories
 #'
@@ -16,24 +17,29 @@
 #' @importFrom knitr kable
 #' @importFrom utils capture.output read.csv tail
 #' @import     jsonlite
+#' @import     dplyr
 #'
 #' @export
 register_render <- function(register = read.csv("register.csv", as.is = TRUE),
                             filter_by = c("venues", "codecheckers"),
-                            outputs = c("html", "md", "json")) {
-  # Loading the dplyr package otherwise we cant use "%>%"
-  library(dplyr)
-
-  # Loading config.R file
-  source(system.file("extdata", "config.R", package = "codecheck"))
+                            outputs = c("html", "md", "json"),
+                            config = c(system.file("extdata", "config.R", package = "codecheck"))) {
+  # Loading config.R files
+  for (i in seq(length(config))) {
+    source(config[i])
+  }
 
   message("Using cache path ", R.cache::getCacheRootPath())
 
   register_table <- preprocess_register(register, filter_by)
   # Setting number of codechecks now for later use. This is done to avoid double counting codechecks
   # done by multiple authors.
   CONFIG$NO_CODECHECKS <- nrow(register_table)
-  render_cert_htmls(register_table, force_download = FALSE)
+
+  if("html" %in% outputs) {
+    render_cert_htmls(register_table, force_download = FALSE)
+  }
+
   create_filtered_reg_csvs(register, filter_by)
   create_register_files(register_table, filter_by, outputs)
   create_non_register_files(register_table, filter_by)

diff --git a/R/utils_download_certs.R b/R/utils_download_certs.R
@@ -1,6 +1,3 @@
-library(httr)
-library(jsonlite)
-
 #' Downloads a certificate PDF from a report link and saves it locally. 
 #' If the download link is a ZIP file, it extracts the PDF from 
 #' the archive. Returns status based on success.
@@ -153,6 +150,7 @@ get_osf_cert_link <- function(report_link, cert_id){
 #' @param api_key (Optional) API key for Zenodo authentication if required.
 #' 
 #' @importFrom httr GET status_code content
+#' @importFrom jsonlite fromJSON
 #'
 #' @return The download link for the certificate file as a string if found; otherwise, NULL.
 get_zenodo_cert_link <- function(report_link, cert_id, api_key = "") {
@@ -172,7 +170,7 @@ get_zenodo_cert_link <- function(report_link, cert_id, api_key = "") {
   if (httr::status_code(response) == 200) {
 
     # Parse the response
-    record_data <- fromJSON(httr::content(response, "text", encoding = "UTF-8"))
+    record_data <- jsonlite::fromJSON(httr::content(response, "text", encoding = "UTF-8"))
 
     files_list <- record_data$entries
 
@@ -265,7 +263,7 @@ extract_cert_pdf_from_zip <- function(zip_download_url, cert_sub_dir){
 #'
 #' @importFrom pdftools pdf_info
 #' @param cert_id The certificate identifier. This ID is used to locate the PDF and save the resulting images.
-convert_cert_pdf_to_jpeg <- function(cert_id){
+convert_cert_pdf_to_png <- function(cert_id){
   # Checking if the certs dir exist
   cert_dir <- file.path(CONFIG$CERTS_DIR[["cert"]], cert_id) 
 

diff --git a/R/utils_preprocess_register.R b/R/utils_preprocess_register.R
@@ -159,21 +159,31 @@ add_codechecker <- function(register_table, register) {
   return(register_table)
 }
 
-#' Function for adding clickable links to the paper for each entry in the register table.
+#' Function for adding clickable links to the paper for each entry in the register table and add certificate identifier and link as extra columns
 #' 
 #' @param register_table The register table to be adjusted.
-#' @return The adjusted register table with clickable Certificate links.
+#' @return The adjusted register table with clickable Certificate links and new columns for certificate identifier and certificate URL
 add_cert_links <- function(register_table){
+  ids <- c()
+  links <- c()
+
   # Looping over the entries in the register
   for (i in seq_len(nrow(register_table))) {
 
-    # Constructing the hyperlink
     cert_id <- register_table[i, ]$Certificate
-    hyperlink <- paste0("[", cert_id, "](", CONFIG$HYPERLINKS[["certs"]], cert_id, "/)")
-
+    cert_link <- paste0(CONFIG$HYPERLINKS[["certs"]], cert_id, "/")
+
+    # Constructing the hyperlink
+    hyperlink <- paste0("[", cert_id, "](", cert_link, ")")
     register_table[i, ]$Certificate <- hyperlink
+
+    ids <- c(ids, cert_id)
+    links <- c(links, cert_link)
   }
 
+  register_table$`Certificate ID` <- ids
+  register_table$`Certificate Link` <- links
+
   return(register_table)
 }
 

diff --git a/R/utils_render_cert_htmls.R b/R/utils_render_cert_htmls.R
@@ -10,35 +10,42 @@ render_cert_htmls <- function(register_table, force_download = FALSE){
 
   # Loop over each cert in the register table
   for (i in 1:nrow(register_table)){
+    download_cert_status <- NA
+
     abstract <- get_abstract(register_table[i, ]$Repository)
 
     # Retrieving report link and cert id
     report_link <- register_table[i, ]$Report
     cert_hyperlink <- register_table[i, ]$Certificate
     cert_id <- sub("\\[(.*)\\]\\(.*\\)", "\\1", cert_hyperlink)
 
-    # Define paths for the certificate PDF and JPEG
-    pdf_path <- file.path(CONFIG$CERTS_DIR[["cert"]], cert_id, "cert.pdf")
-    pdf_exists <- file.exists(pdf_path)
-
-    # Download the PDF if it doesn't exist or if force_download is TRUE
-    if (!pdf_exists || force_download) {
-      download_cert_status <- download_cert_pdf(report_link, cert_id)
-      # Successfully downloaded cert
-      # Proceeding to convert pdfs to jpegs
-      if (download_cert_status == 1){
-        convert_cert_pdf_to_jpeg(cert_id)
+    if(CONFIG$CERT_DOWNLOAD_AND_CONVERT) {
+      # Define paths for the certificate PDF and JPEG
+      pdf_path <- file.path(CONFIG$CERTS_DIR[["cert"]], cert_id, "cert.pdf")
+      pdf_exists <- file.exists(pdf_path)
+
+      # Download the PDF if it doesn't exist or if force_download is TRUE
+      if (!pdf_exists || force_download) {
+        download_cert_status <- download_cert_pdf(report_link, cert_id)
+        # Successfully downloaded cert
+        # Proceeding to convert pdfs to jpegs
+        if (download_cert_status == 1){
+          convert_cert_pdf_to_png(cert_id)
+        }
+
+        # Delaying requests to adhere to request limits
+        Sys.sleep(CONFIG$CERT_REQUEST_DELAY)
       }
-
-      # Delaying reqwuests to adhere to request limits
-      Sys.sleep(CONFIG$CERT_REQUEST_DELAY)
-    }
-
-    # The pdf exists and force download is False
-    else{
-      download_cert_status <- 1
+      
+      # The pdf exists and force download is False
+      else{
+        download_cert_status <- 1
+      }
+    } else {
+      # do not display a certificate
+      download_cert_status <- 0
     }
-
+    
     render_cert_html(cert_id, register_table[i, ]$Repository, download_cert_status)
   }
 }
@@ -48,7 +55,7 @@ render_cert_htmls <- function(register_table, force_download = FALSE){
 #'
 #' @importFrom pdftools pdf_info
 #' @param cert_id The certificate identifier. This ID is used to locate the PDF and save the resulting images.
-convert_cert_pdf_to_jpeg <- function(cert_id){
+convert_cert_pdf_to_png <- function(cert_id){
   # Checking if the certs dir exist
   cert_dir <- file.path(CONFIG$CERTS_DIR[["cert"]], cert_id) 
 

diff --git a/R/utils_render_cert_md.R b/R/utils_render_cert_md.R
@@ -204,6 +204,11 @@ add_abstract <- function(repo_link, md_content){
 create_cert_md <- function(cert_id, repo_link, download_cert_status){
   cert_dir <- file.path(CONFIG$CERTS_DIR[["cert"]], cert_id)
 
+  # Create the directory if it does not exist (e.g., because no PDFs are downloaded)
+  if (!dir.exists(cert_dir)) {
+    dir.create(cert_dir, recursive = TRUE) 
+  }
+
   # Loading the correct template based on whether cert exists
   if (download_cert_status == 0) {
     template_type <- "md_template_no_cert"

diff --git a/R/utils_render_register_general.r b/R/utils_render_register_general.r
@@ -6,7 +6,7 @@
 #' The function iterates through the provided output types, generates an output directory,
 #' filters and adjusts the register table, and renders the original register files based on the specified formats.
 create_original_register_files <- function(register_table, outputs){
-  filter <- "none"
+  filter <- NA
   for (output_type in outputs){
     table_details <- list(is_reg_table = TRUE)
     table_details[["output_dir"]] <- generate_output_dir(filter, table_details)
@@ -150,7 +150,7 @@ generate_table_details <- function(table_key, table, filter, is_reg_table = TRUE
 #'        "csv" for CSVs, "md" for MD and "html" for HTMLs.
 #'
 #' @return None. The function generates a file in the specified format.
-render_register <- function(register_table, table_details, filter, output_type){
+render_register <- function(register_table, table_details, filter = NA, output_type){
   register_table <- filter_and_drop_register_columns(register_table, filter, output_type)
 
   switch(output_type,
@@ -175,7 +175,7 @@ generate_output_dir <- function(filter, table_details = list()) {
   # We have register tables
   if (table_details[["is_reg_table"]]){
     # We have the original register table
-    if (filter=="none"){
+    if (is.na(filter)){
       output_dir <- base_dir
     }
 

diff --git a/R/utils_render_register_htmls.R b/R/utils_render_register_htmls.R
@@ -91,7 +91,7 @@ generate_href <- function(filter, table_details, href_type) {
 
   base_url <- href_details$base_url
   # For the original register
-  if (filter == "none") {
+  if (is.na(filter)) {
     return(paste0(base_url, "register", href_details$ext))
   } 
 
@@ -167,7 +167,7 @@ render_html <- function(table, table_details, filter){
   # For all registered tables besides the original we change the html
   # file so that the path to the libs folder refers to the libs folder "docs/libs".
   # This is done to remove duplicates of "libs" folders.
-  if (filter != "none"){
+  if (!is.na(filter)){
     html_file_path <- paste0(output_dir, "index.html")
     edit_html_lib_paths(html_file_path)
     # Deleting the libs folder after changing the html lib path

diff --git a/R/utils_render_register_json.R b/R/utils_render_register_json.R
@@ -54,33 +54,33 @@ set_paper_title_references <- function(register_table){
 #' @param table_details List containing details such as the table name, subcat name.
 #' @param filter The filter
 render_register_json <- function(register_table, table_details, filter) {
-  register_table <- add_repository_links_json(register_table)
+  register_table_json <- add_repository_links_json(register_table)
 
   # Set paper titles and references
-  register_table <- set_paper_title_references(register_table)
+  register_table_json <- set_paper_title_references(register_table_json)
 
   output_dir <- table_details[["output_dir"]]
 
   # Keeping only those columns that are mentioned in the json columns and those that 
   # register table already has
-  columns_to_keep <- intersect(CONFIG$JSON_COLUMNS, names(register_table))
+  columns_to_keep <- intersect(CONFIG$JSON_COLUMNS, names(register_table_json))
 
   jsonlite::write_json(
-    register_table[, columns_to_keep],
+    register_table_json[, columns_to_keep],
     path = paste0(output_dir, "register.json"),
     pretty = TRUE
   )
 
   jsonlite::write_json(
-    utils::tail(register_table, 10)[, columns_to_keep],
+    utils::tail(register_table_json, CONFIG$FEATURED_COUNT)[, columns_to_keep],
     path = paste0(output_dir, "featured.json"),
     pretty = TRUE
   )
 
   jsonlite::write_json(
     list(
       source = generate_href(filter, table_details, "json"),
-      cert_count = nrow(register_table)
+      cert_count = nrow(register_table_json)
       # TODO count conferences, preprints,
       # journals, etc.
     ),

diff --git a/R/utils_render_register_mds.R b/R/utils_render_register_mds.R
@@ -7,18 +7,21 @@
 #' @return The modified markdown table
 add_markdown_title <- function(table_details, md_table, filter){
   # The filter is in the CONFIG$MD_TITLES
-  if (filter %in% names(CONFIG$MD_TITLES)) {
+  title_fn <- NULL
+
+  if (is.na(filter)) {
+    title_fn <- CONFIG$MD_TITLES[["default"]]
+  } else if (filter %in% names(CONFIG$MD_TITLES)) {
     # Loading the title function (if present) and passing the argument
     title_fn <- CONFIG$MD_TITLES[[filter]]
-    title <- title_fn(table_details)
-  } 
-
-  # No titles provided in the CONFIG file for the filter type
-  # Stopping the process
-  else {
+  } else {
+    # No filter or no titles provided in the CONFIG file for the filter type
+    # Stopping the process
     stop("Invalid filter provided.")
   }
 
+  title <- title_fn(table_details)
+
   md_table <- gsub("\\$title\\$", title, md_table)
   return(md_table)
 }

diff --git a/R/utils_render_table_non_registers.R b/R/utils_render_table_non_registers.R
@@ -23,7 +23,7 @@ create_non_register_files <- function(register_table, filter_by){
 
       render_html(table, table_details, filter)
 
-      # Removing the unneccessary columns before creating html and json
+      # Removing the unnecessary columns before creating html and json
       if (filter == "venues"){
         table <- table %>% select(-`venue_slug`)
       }

diff --git a/inst/extdata/config.R b/inst/extdata/config.R
@@ -23,7 +23,7 @@ CONFIG$REGISTER_COLUMNS <- list(
   html = c("Certificate", "Paper Title", "Type", "Venue", "Issue", "Report", "Check date"),
   md = c("Certificate", "Paper Title", "Type", "Venue", "Issue", "Report", "Check date"),
   csv =   c("Certificate", "Repository", "Type", "Venue", "Issue", "Report", "Check date"),
-  json =  c("Certificate", "Repository", "Type", "Venue", "Issue", "Report", "Check date")
+  json =  c("Certificate ID", "Certificate Link", "Repository", "Type", "Venue", "Issue", "Report", "Check date")
 )
 
 CONFIG$DIR_TEMP_REGISTER_CODECHECKER <- "docs/temp_register_codechecker.csv"
@@ -41,7 +41,7 @@ CONFIG$FILTER_COLUMN_NAMES_TO_DROP <- list(
 )
 
 CONFIG$MD_TITLES <- list(
-  "none" = function(table_details){
+  "default" = function(table_details){
     "CODECHECK Register"
   },
 
@@ -180,9 +180,12 @@ CONFIG$FILTER_SUBCAT_COLUMNS <- list(
 # OTHERS
 CONFIG$DICT_ORCID_ID_NAME <- list()
 
-# Delaying requests by 1 second to adhere to the rate limit of 60 requests/ minute for Zenodo
+# Delaying requests by 1 second to adhere to the rate limit of 60 requests/minute for Zenodo
 CONFIG$CERT_REQUEST_DELAY <- 1
 
+# Number of items in the featured lists of certificates
+CONFIG$FEATURED_COUNT <- 10
+
 # CERT LINKS
 CONFIG$CERT_LINKS <- list(
   osf_api = "https://api.osf.io/v2/",
@@ -192,7 +195,9 @@ CONFIG$CERT_LINKS <- list(
 )
 
 CONFIG$CERTS_URL_PREFIX <- "https://doi.org/"
-CONFIG$CERT_DPI <- 800 
+CONFIG$CERT_DPI <- 800
+
+CONFIG$CERT_DOWNLOAD_AND_CONVERT <- TRUE
 
 # DIRECTORIES
 CONFIG$CERTS_DIR <- list(
@@ -239,7 +244,8 @@ CONFIG$DICT_VENUE_NAMES <- list(
 # JSON FILE INFORMATION
 # List specifying the columns to keep for JSON files
 CONFIG$JSON_COLUMNS <- c(
-  "Certificate",
+  "Certificate ID",
+  "Certificate Link",
   "Repository Link",
   "Type",
   "Venue",