Point to the new PortalPredictions, portal-forecasts

Portal-forecasts is a revised version of portalPredictions Portal-forecasts contains forecasts as zipped files
weecology · Nov 29, 2023 · 67c04e7 · 67c04e7
1 parent a35a772
commit 67c04e7
Show file tree

Hide file tree

Showing 109 changed files with 317 additions and 10,750 deletions.
diff --git a/.gitignore b/.gitignore
@@ -19,3 +19,4 @@ testing_dir/*
 /Meta/
 inst/app/models.knit.md
 *.DS_Store*
+.Rproj.user
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: portalcasting
 Title: Model and Forecast Portal Rodent Dynamics
-Version: 0.60.1
+Version: 0.60.2
 Authors@R: c(
     person(c("Juniper", "L."), "Simonis",
       email = "[email protected]", role = c("aut", "cre"),
@@ -21,7 +21,7 @@ Authors@R: c(
       comment = c(ORCID = "0000-0001-6728-7745")),
     person(c("S. K.", "Morgan"), "Ernest", role = c("aut"),
       comment = c(ORCID = "0000-0002-6026-8530")))
-Description: Create a directory, populate it with data, build models, forecast rodent populations, and visualize the results. The user can spin-up a local lightweight sandbox or full-scale production environment from the same code underlying the continuously deployed Portal Predictions Project continuous integration system <https://github.com/weecology/portalpredictions> and website <https://portal.naturecast.org>.
+Description: Create a directory, populate it with data, build models, forecast rodent populations, and visualize the results. The user can spin-up a local lightweight sandbox or full-scale production environment from the same code underlying the continuously deployed Portal Predictions Project continuous integration system <https://github.com/weecology/portal-forecasts> and website <https://portal.naturecast.org>.
 URL: https://weecology.github.io/portalcasting/, https://github.com/weecology/portalcasting
 BugReports: https://github.com/weecology/portalcasting/issues
 Depends: 
@@ -48,7 +48,9 @@ Imports:
   shiny,
   stats,
   viridis,
-  yaml
+  yaml,
+  utils,
+  zipR
 Suggests:
   pkgdown,
   testthat

diff --git a/NAMESPACE b/NAMESPACE
@@ -186,6 +186,7 @@ export(write_models_scripts)
 export(write_models_tab_html)
 export(write_rodents_profiles_tab_html)
 export(www_path)
+export(zip_unzip)
 importFrom(arrow,read_csv_arrow)
 importFrom(arrow,write_csv_arrow)
 importFrom(bslib,bs_theme)

diff --git a/NEWS.md b/NEWS.md
@@ -2,6 +2,13 @@
 
 Version numbers follow [Semantic Versioning](https://semver.org/).
 
+# [portalcasting 0.60.2](https://github.com/weecology/portalcasting/releases/tag/v0.60.2)
+*11-16-2023*
+
+## Repository moved
+* Store forecasts by date using the zip format
+* Move from portalPredictions to portal-forecasts due to size limitations.
+
 # [portalcasting 0.60.1](https://github.com/weecology/portalcasting/releases/tag/v0.60.1)
 *5-22-2023*
 

diff --git a/R/download.R b/R/download.R
@@ -290,8 +290,8 @@ download_archive <- function(main          = ".",
   } else if (source == "github") {
 
     url <- ifelse(version == "latest", 
-                  "https://api.github.com/repos/weecology/portalPredictions/releases/latest",
-                  paste0("https://api.github.com/repos/weecology/portalPredictions/releases/tags/", version))
+                  "https://api.github.com/repos/henrykironde/portal-forecasts/releases/latest",
+                  paste0("https://api.github.com/repos/henrykironde/portal-forecasts/releases/tags/", version))
 
     got <- GET(url = url)
 
@@ -308,8 +308,8 @@ download_archive <- function(main          = ".",
 
   }  
 
-  temp         <- file.path(tempdir(), "portalPredictions.zip")
-  final        <- file.path(main, resources_sub, "portalPredictions")
+  temp         <- file.path(tempdir(), "portal-forecasts.zip")
+  final        <- file.path(main, resources_sub, "portal-forecasts")
   version_file <- file.path(final, "version.txt")
 
   if (!force & file.exists(version_file)) {

diff --git a/R/fill_dir.R b/R/fill_dir.R
@@ -168,11 +168,8 @@ fill_app <- function (main = ".") {
 #' @export
 #'
 fill_resources <- function (main = ".") {
-
   settings <- read_directory_settings(main = main)
-
   messageq("Downloading resources ... ", quiet = settings$quiet)
-
   download_observations(path      = resources_path(main = main),
                         version   = settings$resources$PortalData$version,
                         source    = settings$resources$PortalData$source,
@@ -182,7 +179,7 @@ fill_resources <- function (main = ".") {
                         quiet     = settings$quiet,
                         verbose   = settings$verbose)
 
-  download_archive(main          = main, 
+  download_archive(main          = main,
                    resources_sub = settings$subdirectories$resources,
                    version       = settings$resources$portalPredictions$version,
                    source        = settings$resources$portalPredictions$source,
@@ -246,6 +243,8 @@ fill_forecasts <- function (main = ".") {
 
   messageq(paste0("  ... ", sum(copied), " files moved. "), quiet = settings$quiet)
 
+  # unzip the forecast zipped files
+  zip_unzip("unzip", forecast_path = forecasts_path(main = main))
   invisible( )
 
 }

diff --git a/R/settings.R b/R/settings.R
@@ -69,7 +69,7 @@
 #'
 #' @param force `logical` indicator of whether or not existing files or folders (such as the archive) should be over-written if an up-to-date copy exists (most users should leave as `FALSE`).
 #'
-#' @param unzip_pause Positive `integer` or integer `numeric` seconds for pausing during steps around unzipping that require time delay. 
+#' @param unzip_pause Positive `integer` or integer `numeric` seconds for pausing during steps around unzipping that require time delay.
 #'
 #' @param download_timeout Positive `integer` or integer `numeric` seconds for timeout on downloads. Temporarily overrides the `"timeout"` option in [`base::options`].
 #'
@@ -79,8 +79,8 @@
 #'
 #' @param timeseries_start `Date` after which historic samples are included in the timeseries fit. Default value is `1995-01-01`, corresponding to moon 217.
 #'
-#' @param lead_time `integer` (or integer `numeric`) value for the number of calendar days forward a forecast will cover. \cr 
-#'   As of version 0.51.0, default is now `365`, which when divided by 29.5 (duration of a lunar month), gives 13. The previous value was previously 12. We are now using 13 to align with the timestep being a lunar month, and 13 lunar months covers a full calendar year. 
+#' @param lead_time `integer` (or integer `numeric`) value for the number of calendar days forward a forecast will cover. \cr
+#'   As of version 0.51.0, default is now `365`, which when divided by 29.5 (duration of a lunar month), gives 13. The previous value was previously 12. We are now using 13 to align with the timestep being a lunar month, and 13 lunar months covers a full calendar year.
 #'
 #' @param max_lag `integer` (or integer `numeric`) maximum number of calendar days that any covariate is lagged for prediction in a model. \cr
 #'   Default is `365` for the logistic covariate models.
@@ -89,11 +89,11 @@
 #'   Default value of `60` corresponds to two additional lunar months. 
 #'
 #' @param lead_time_buffer `integer` (or integer `numeric`) additional number of calendar days forward in time to forecast. \cr
-#'   Default value of `30` corresponds to one additional lunar month. 
+#'   Default value of `30` corresponds to one additional lunar month.
 #'
 #' @param confidence_level `numeric` confidence level used in summarizing model output. Must be between `0` and `1`.
 #'
-#' @param nsamples `integer` (or integer `numeric`) number of samples used to summarizing model output of sample-based estimates. 
+#' @param nsamples `integer` (or integer `numeric`) number of samples used to summarizing model output of sample-based estimates.
 #'
 #' @return Named `list` of settings for the directory (for `directory_settings`) or `list` of settings components (for `directory_files`, `directory_subdirectories`, and `directory_resources`).
 #'
@@ -125,21 +125,21 @@ directory_settings <- function (files             = directory_files( ),
                                 confidence_level  = 0.95,
                                 nsamples          = 1e4,
                                 save              = TRUE,
-                                overwrite         = TRUE, 
-                                force             = FALSE, 
+                                overwrite         = TRUE,
+                                force             = FALSE,
                                 unzip_pause       = 30,
                                 download_timeout  = getOption("timeout")) {
 
   list(files            = files,
        subdirectories   = subdirectories,
        resources        = resources,
-       repository       = "portalPredictions", 
+       repository       = "portal-forecasts",
        confidence_level = confidence_level,
        nsamples         = nsamples,
        time             = time,
-       save             = save, 
-       force            = force, 
-       overwrite        = overwrite, 
+       save             = save,
+       force            = force,
+       overwrite        = overwrite,
        unzip_pause      = unzip_pause,
        download_timeout = download_timeout)
 
@@ -149,7 +149,7 @@ directory_settings <- function (files             = directory_files( ),
 #'
 #' @export
 #'
-time_settings <- function (timeseries_start = as.Date("1995-01-01"), 
+time_settings <- function (timeseries_start = as.Date("1995-01-01"),
                            origin           = Sys.Date( ),
                            forecast_date    = Sys.Date( ),
                            lead_time        = 365,
@@ -158,7 +158,7 @@ time_settings <- function (timeseries_start = as.Date("1995-01-01"),
                            lead_time_buffer = 30) {
 
 
-  timeseries_start_lagged <- timeseries_start - max_lag - lag_buffer 
+  timeseries_start_lagged <- timeseries_start - max_lag - lag_buffer
   forecast_start          <- origin + 1
   forecast_end            <- origin + lead_time
   forecast_end_buffered   <- origin + lead_time + lead_time_buffer
@@ -185,7 +185,7 @@ directory_files <- function (directory_configuration = "directory_configuration.
                              app                     = "app.R",
                              newmoons                = "newmoons.csv",
                              covariates              = "covariates.csv",
-                             datasets_controls       = "datasets_controls.yaml", 
+                             datasets_controls       = "datasets_controls.yaml",
                              models_controls         = "models_controls.yaml",
                              forecasts_evaluations   = "forecasts_evaluations.csv",
                              forecasts_results       = "forecasts_results.csv",
@@ -202,7 +202,7 @@ directory_files <- function (directory_configuration = "directory_configuration.
        app                       = app,
        newmoons                  = newmoons,
        covariates                = covariates,
-       datasets_controls         = datasets_controls, 
+       datasets_controls         = datasets_controls,
        models_controls           = models_controls,
        forecasts_evaluations     = forecasts_evaluations,
        forecasts_results         = forecasts_results,
@@ -222,18 +222,18 @@ directory_files <- function (directory_configuration = "directory_configuration.
 #'
 #' @export
 #'
-directory_subdirectories <- function (forecasts = "forecasts", 
-                                      fits      = "fits", 
-                                      models    = "models", 
-                                      resources = "resources", 
-                                      data      = "data", 
+directory_subdirectories <- function (forecasts = "forecasts",
+                                      fits      = "fits",
+                                      models    = "models",
+                                      resources = "resources",
+                                      data      = "data",
                                       www       = "www") {
 
-  list(forecasts = forecasts, 
-       fits      = fits, 
-       models    = models, 
-       resources = resources, 
-       data      = data, 
+  list(forecasts = forecasts,
+       fits      = fits,
+       models    = models,
+       resources = resources,
+       data      = data,
        www       = www)
 
 }
@@ -242,15 +242,15 @@ directory_subdirectories <- function (forecasts = "forecasts",
 #'
 #' @export
 #'
-directory_resources <- function (PortalData         = list(source  = "github", 
+directory_resources <- function (PortalData         = list(source  = "github",
                                                            version = "latest"),
-                                 portalPredictions  = list(source  = "github", 
+                                 portalPredictions  = list(source  = "github",
                                                            version = NULL),
-                                 climate_forecasts  = list(source  = "NMME", 
-                                                           version = as.character(Sys.Date()), 
+                                 climate_forecasts  = list(source  = "NMME",
+                                                           version = as.character(Sys.Date()),
                                                            data    = list(mintemp       = "tasmin",
-                                                                          meantemp      = "tasmean", 
-                                                                          maxtemp       = "tasmax", 
+                                                                          meantemp      = "tasmean",
+                                                                          maxtemp       = "tasmax",
                                                                           precipitation = "pr"))) {
 
   list(PortalData         = PortalData,
@@ -266,7 +266,7 @@ directory_resources <- function (PortalData         = list(source  = "github",
 #'
 production_settings <- function (download_timeout  = max(getOption("timeout"), 600)) {
 
-  resources <- directory_resources(portalPredictions = list(source  = "github", 
+  resources <- directory_resources(portalPredictions = list(source  = "github",
                                                             version = "latest"))
 
   directory_settings(resources        = resources,
@@ -279,7 +279,7 @@ production_settings <- function (download_timeout  = max(getOption("timeout"), 6
 #'
 #' @export
 #'
-sandbox_settings <- function ( ) {
+sandbox_settings <- function () {
 
   directory_settings( )
 

diff --git a/R/zip_unzip_forecasts.R b/R/zip_unzip_forecasts.R
@@ -0,0 +1,91 @@
+#' unZip and zip forecasts by forecast date
+#'
+#' @param type either zip or unzip
+#' @param forecast_path location of forecast directory
+#' @param date forecaset date to use while zipping
+#'
+#' @export
+#'
+zip_unzip <-
+  function(type = NULL,
+           forecast_path = "forecasts/",
+           date = NULL) {
+    print("Preparing forecasts files")
+    proj_path <- forecast_path
+    forecasts_metadata =  paste0(proj_path, "/forecasts_metadata.csv")
+
+    proj_path <- normalizePath(proj_path, mustWork = FALSE)
+    forecasts_metadata <-
+      normalizePath(forecasts_metadata, mustWork = FALSE)
+
+    metadata <- read.csv(forecasts_metadata)
+    unique_dates <- unique(metadata$forecast_date)
+    unique_dates = sort(unique_dates)
+
+    if (type == "zip") {
+      csv_file <- "_forecast_table.csv"
+      yaml_file <- "_metadata.yaml"
+      json_file <- "_model_forecast.json"
+      if (!is.null(date)) {
+        unique_dates = c(date)
+        print(paste0("Zipping forecasts files for ", date))
+      }
+      for (forecast_day in unique_dates) {
+        id_date_files <- c()
+        zipfile <-
+          paste0(proj_path, "/forecast_id_", forecast_day, ".zip")
+        # Get all the values of that particular day in a data frame
+        newdata <-
+          subset(metadata,
+                 forecast_date == forecast_day,
+                 select = c(forecast_id, forecast_date))
+        # for each forecast_id get 3 files
+        All_ids <- newdata$forecast_id
+        for (id in All_ids) {
+          csv_file_path  = paste0(proj_path, "forecast_id_", id, csv_file)
+          yaml_file_path = paste0(proj_path, "forecast_id_", id, yaml_file)
+          json_file_path = paste0(proj_path, "forecast_id_", id, json_file)
+          id_date_files <-
+            c(id_date_files,
+              csv_file_path,
+              yaml_file_path,
+              json_file_path)
+        }
+        zipfile <- normalizePath(zipfile, mustWork = FALSE)
+        # First remove old zip file if exists
+        unlink(zipfile)
+        # zip all id_date_files
+        zipr(zipfile, id_date_files, compression_level = 9)
+        unlink(id_date_files)
+      }
+
+      # Zip forecasts_evaluations.csv
+      id_date_files <-
+        c(paste0(proj_path, "/forecasts_evaluations.csv"))
+      id_date_files <- normalizePath(id_date_files, mustWork = FALSE)
+
+      zipfile <- paste0(proj_path, "/forecasts_evaluations.zip")
+      zipfile <- normalizePath(zipfile, mustWork = FALSE)
+
+      zipr(zipfile, id_date_files, compression_level = 9)
+    }
+
+    if (type == "unzip") {
+      print("Unzipping forecasts files")
+      # unzip files basing on unique_dates
+      for (forecast_day in unique_dates) {
+        zipfile <- paste0(proj_path, "/forecast_id_", forecast_day, ".zip")
+        zipfile <- normalizePath(zipfile, mustWork = FALSE)
+        if (file.exists(zipfile)) {
+          unzip(zipfile, exdir = proj_path)
+          unlink(zipfile)
+        }
+      }
+
+      # Unzip forecasts_evaluations.csv
+      eval_zip <- paste0(proj_path, "/forecasts_evaluations.zip")
+      eval_zip <- normalizePath(eval_zip, mustWork = FALSE)
+      utils::unzip(eval_zip, exdir = proj_path)
+      unlink(eval_zip)
+    }
+  }