Skip to content

Commit

Permalink
Merge pull request #99 from schochastics/url_clean
Browse files Browse the repository at this point in the history
fix #98
  • Loading branch information
schochastics authored Jan 4, 2024
2 parents a74515c + f28d651 commit 73bbbb4
Show file tree
Hide file tree
Showing 5 changed files with 26 additions and 12 deletions.
3 changes: 1 addition & 2 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
# webtrackR 0.1.0.9000

* reimplemented in base R removing several dependencies
* fixed #1, #81, #82, #83, #84, #87, #88

* fixed #1, #81, #82, #83, #84, #87, #88, #98

# webtrackR 0.1.0

Expand Down
13 changes: 7 additions & 6 deletions R/preprocess.R
Original file line number Diff line number Diff line change
Expand Up @@ -296,8 +296,8 @@ extract_domain <- function(wt, varname = "url") {
#' The path is defined as the part following the host but not including a
#' query (anything after a "?") or a fragment (anything after a "#").
#' @param wt webtrack data object
#' @param varname character. name of the column from which to extract the host.
#' Defaults to `"url"`.
#' @param varname character. name of the column from which to extract the host. Defaults to `"url"`.
#' @param decode logical. Whether to decode the path (see [utils::URLdecode()]), default to TRUE
#' @return webtrack data.frame with the same columns as wt
#' and a new column called `'path'` (or, if varname not equal to `'url'`, `'<varname>_path'`)
#' @examples
Expand All @@ -308,10 +308,10 @@ extract_domain <- function(wt, varname = "url") {
#' wt <- extract_path(wt)
#' }
#' @export
extract_path <- function(wt, varname = "url") {
extract_path <- function(wt, varname = "url", decode = TRUE) {
abort_if_not_wtdt(wt)
vars_exist(wt, varname)
path <- adaR::ada_get_pathname(wt[[varname]])
path <- adaR::ada_get_pathname(wt[[varname]], decode = decode)
if (varname == "url") {
wt[["path"]] <- path
} else {
Expand Down Expand Up @@ -359,6 +359,7 @@ drop_query <- function(wt, varname = "url") {
#' English words (as defined by the Word Game Dictionary,
#' cf. https://cran.r-project.org/web/packages/words/index.html) are kept.
#' Support for more languages will be added in future.
#' @param decode logical. Whether to decode the path (see [utils::URLdecode()]), default to TRUE
#' @return webtrack data.frame with the same columns as wt
#' and a new column called `'path_split'` (or, if varname not equal to `'url'`, `'<varname>_path_split'`)
#' containing parts as a comma-separated string.
Expand All @@ -370,14 +371,14 @@ drop_query <- function(wt, varname = "url") {
#' wt <- parse_path(wt)
#' }
#' @export
parse_path <- function(wt, varname = "url", keep = "letters_only") {
parse_path <- function(wt, varname = "url", keep = "letters_only", decode = TRUE) {
abort_if_not_wtdt(wt)
vars_exist(wt, varname)
keep <- match.arg(keep, c("letters_only", "words_only"))

path_delims <- "/|-|_|\\."
if (!"path" %in% names(wt)) {
tmp <- extract_path(wt, varname)
tmp <- extract_path(wt, varname, decode = decode)
paths <- tmp[[grep("path", names(tmp))]]
} else {
paths <- wt[["path"]]
Expand Down
7 changes: 4 additions & 3 deletions man/extract_path.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion man/parse_path.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 11 additions & 0 deletions tests/testthat/test-preprocess.R
Original file line number Diff line number Diff line change
Expand Up @@ -360,3 +360,14 @@ test_that("panelist_data testdt_specific", {
wt_joined <- add_panelist_data(wt, testdt_survey_w)
expect_true(round(mean(wt_joined$leftright), 2) == 4.99)
})

test_that("issue 98", {
test <- data.frame(
panelist_id = "abc",
timestamp = as.POSIXct("2019-05-31 12:41:59"),
url = "https://www.omahasteaks.com/product/Private-Reserve%AE-Boneless-Strips-00000004718"
)
test <- as.wt_dt(test)
expect_error(suppressWarnings(parse_path(test, decode = TRUE)))
expect_no_error(parse_path(test, decode = FALSE))
})

0 comments on commit 73bbbb4

Please sign in to comment.