diff --git a/NEWS.md b/NEWS.md index fa1c81e..ca608c6 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,8 +1,29 @@ # frictionless (development version) +## For users + +* `read_resource()` now supports reading from remote zip files, thanks to support in {vroom} (1.3.0) (#291). * `resources()` is soft-deprecated, please use `resource_names()` instead (#282). * `get_schema()` is soft-deprecated, please use `schema()` instead (#282). -* `read_resource()` now supports reading from remote zip files, thanks to support in {vroom} (1.3.0) (#291). + +## Changes for developers + +* Internal frictionless properties are now _attributes_, to better separate them from public Data Package properties (#289). If you use these internal properties, then update: + + ```R + package$directory + r <- frictionless:::get_resource(package, "resource_name") # Internal function + r$read_from + ``` + + to: + + ```R + attr(package, "directory") + r <- frictionless:::resource(package, "resource_name") # Renamed! + attr(r, "data_location") # Renamed! + ``` + * frictionless now relies on R >= 4.1.0 (because of an indirect {vroom} dependency) (#291) and uses base pipes (`|>` rather than `%>%`) (#292). # frictionless 1.2.1 diff --git a/R/check_package.R b/R/check_package.R index c5df8ec..da24f22 100644 --- a/R/check_package.R +++ b/R/check_package.R @@ -44,12 +44,21 @@ check_package <- function(package) { ) } - # Check package has directory (character) - if (!is.character(package$directory)) { + # Check all resources (if any) have a name + if (purrr::some(package$resources, ~ is.null(.x$name))) { + cli::cli_abort( + "All {.field resources} in {.arg package} must have a {.field name} + property.", + class = "frictionless_error_resources_without_name" + ) + } + + # Check package has directory attribute (character) + if (!is.character(attr(package, "directory"))) { cli::cli_abort( c( general_message, - "x" = "{.arg package} is missing a {.field directory} property or it is + "x" = "{.arg package} is missing a {.field directory} attribute or it is not a character.", "i" = tip_message ), @@ -57,15 +66,6 @@ check_package <- function(package) { ) } - # Check all resources (if any) have a name - if (purrr::some(package$resources, ~ is.null(.x$name))) { - cli::cli_abort( - "All {.field resources} in {.arg package} must have a {.field name} - property.", - class = "frictionless_error_resources_without_name" - ) - } - # Warn if class is missing if (!"datapackage" %in% class(package)) { cli::cli_warn( diff --git a/R/col_types.R b/R/col_types.R index ffa9167..3894ed5 100644 --- a/R/col_types.R +++ b/R/col_types.R @@ -16,7 +16,7 @@ cols <- function(schema) { # Create col_types col_types <- purrr::map(fields, field_to_col) - # Assign names: list("name1" = , "name2" = ...) + # Add names: list("name1" = , "name2" = ...) names(col_types) <- field_names # Replicate structure of readr::col_spec @@ -44,7 +44,7 @@ field_to_col <- function(field) { bare_number <- if (field$bareNumber %||% "" != FALSE) TRUE else FALSE format <- field$format %||% "default" # Undefined => default - # Assign types and formats + # Add types and formats col_type <- switch( type, "string" = col_string(enum), diff --git a/R/create_package.R b/R/create_package.R index 4ba8be2..fb22de2 100644 --- a/R/create_package.R +++ b/R/create_package.R @@ -3,12 +3,12 @@ #' Initiates a Data Package object, either from scratch or from an existing #' list. #' This Data Package object is a list with the following characteristics: -#' - A `datapackage` subclass. #' - All properties of the original `descriptor`. #' - A `resources` property, set to an empty list if undefined. -#' - A `directory` property, set to `"."` for the current directory if +#' - A `directory` attribute, set to `"."` for the current directory if #' undefined. #' It is used as the base path to access resources with [read_resource()]. +#' - A `datapackage` subclass. #' #' See `vignette("data-package")` to learn how this function implements the #' Data Package standard. @@ -36,9 +36,11 @@ create_package <- function(descriptor = NULL) { ) } - # Add properties + # Add resources property (also creates descriptor if NULL) descriptor$resources <- descriptor$resources %||% list() - descriptor$directory <- descriptor$directory %||% "." # Current directory + + # Add directory attribute + attr(descriptor, "directory") <- attr(descriptor, "directory") %||% "." # Add datapackage class if (!"datapackage" %in% class(descriptor)) { diff --git a/R/read_from_path.R b/R/read_from_path.R index 9dc5525..ad70ab7 100644 --- a/R/read_from_path.R +++ b/R/read_from_path.R @@ -33,7 +33,11 @@ read_from_path <- function(package, resource_name, col_select) { col_types <- cols(schema) # Get dialect (can be NULL) - dialect <- read_descriptor(resource$dialect, package$directory, safe = TRUE) + dialect <- read_descriptor( + resource$dialect, + attr(package, "directory"), + safe = TRUE + ) escape_backslash <- if (dialect$escapeChar %||% "not set" == "\\") { TRUE } else { diff --git a/R/read_package.R b/R/read_package.R index 8af3fd3..7bf934c 100644 --- a/R/read_package.R +++ b/R/read_package.R @@ -45,7 +45,7 @@ read_package <- function(file = "datapackage.json") { } # Add directory - descriptor$directory <- dirname(file) # Also works for URLs + attr(descriptor, "directory") <- dirname(file) # Also works for URLs # Create package create_package(descriptor) diff --git a/R/read_resource.R b/R/read_resource.R index 2aaab6c..4d4291e 100644 --- a/R/read_resource.R +++ b/R/read_resource.R @@ -50,11 +50,12 @@ read_resource <- function(package, resource_name, col_select = NULL) { resource <- resource(package, resource_name) # Read data directly - if (resource$read_from == "df") { + data_location <- attr(resource, "data_location") + if (data_location == "df") { df <- dplyr::as_tibble(resource$data) # Read data from data - } else if (resource$read_from == "data") { + } else if (data_location == "data") { df <- do.call( function(...) rbind.data.frame(..., stringsAsFactors = FALSE), resource$data @@ -62,7 +63,7 @@ read_resource <- function(package, resource_name, col_select = NULL) { df <- dplyr::as_tibble(df) # Read data from path(s) - } else if (resource$read_from == "path" || resource$read_from == "url") { + } else if (data_location == "path" || data_location == "url") { df <- read_from_path(package, resource_name, col_select) } return(df) diff --git a/R/resource.R b/R/resource.R index 89ab98c..3c4244c 100644 --- a/R/resource.R +++ b/R/resource.R @@ -4,8 +4,8 @@ #' described `resources`. #' #' @inheritParams read_resource -#' @return List describing a Data Resource, with new property `read_from` to -#' indicate how data should be read. +#' @return List describing a Data Resource, with new attribute `data_location` +#' to indicate how the data are attached. #' If present, `path` will be updated to contain the full path(s). #' @family accessor functions #' @noRd @@ -47,25 +47,26 @@ resource <- function(package, resource_name) { ) } - # Assign read_from property (based on path, then df, then data) + # Add data_location attribute (based on path, then df, then data) if (length(resource$path) != 0) { if (all(is_url(resource$path))) { - resource$read_from <- "url" + data_location <- "url" } else { - resource$read_from <- "path" + data_location <- "path" } # Expand paths to full paths, check if file exists and check path safety, # unless those paths were willingly added by user in add_resource() if (attr(resource, "path") %||% "" != "added") { resource$path <- purrr::map_chr( - resource$path, ~ check_path(.x, package$directory, safe = TRUE) + resource$path, ~ check_path(.x, attr(package, "directory"), safe = TRUE) ) } } else if (is.data.frame(resource$data)) { - resource$read_from <- "df" + data_location <- "df" } else if (!is.null(resource$data)) { - resource$read_from <- "data" + data_location <- "data" } + attr(resource, "data_location") <- data_location return(resource) } diff --git a/R/schema.R b/R/schema.R index e60efae..9252057 100644 --- a/R/schema.R +++ b/R/schema.R @@ -39,7 +39,11 @@ schema <- function(package, resource_name) { class = "frictionless_error_resource_without_schema" ) } - schema <- read_descriptor(resource$schema, package$directory, safe = TRUE) + schema <- read_descriptor( + resource$schema, + attr(package, "directory"), + safe = TRUE + ) # Check schema check_schema(schema) diff --git a/R/write_package.R b/R/write_package.R index f23fab5..cfcc2ba 100644 --- a/R/write_package.R +++ b/R/write_package.R @@ -73,7 +73,7 @@ write_package <- function(package, directory, compress = FALSE) { return_package <- package # Needs directory to remain valid # Write datapackage.json - package$directory <- NULL + attr(package, "directory") <- NULL package_json <- jsonlite::toJSON( package, pretty = TRUE, diff --git a/R/write_resource.R b/R/write_resource.R index 7d729ce..43014c5 100644 --- a/R/write_resource.R +++ b/R/write_resource.R @@ -14,7 +14,8 @@ write_resource <- function(package, resource_name, directory = ".", resource <- resource(package, resource_name) # Resource contains new data - if (resource$read_from == "df") { + data_location <- attr(resource, "data_location") + if (data_location == "df") { if (compress) { file_name <- paste(resource_name, "csv", "gz", sep = ".") } else { @@ -28,15 +29,14 @@ write_resource <- function(package, resource_name, directory = ".", resource$mediatype <- "text/csv" resource$encoding <- "utf-8" # Enforced by readr::write_csv() resource$dialect <- NULL - resource$read_from <- NULL resource$data <- NULL # Resource originally had data property - } else if (resource$read_from == "data") { - resource$read_from <- NULL + } else if (data_location == "data") { + # Do nothing # Resource has local paths (optionally mixed with URLs) - } else if (resource$read_from == "path") { + } else if (data_location == "path") { # Download or copy file to directory, point path to file name (in that dir) # Note that existing files will not be overwritten out_paths <- vector() @@ -56,14 +56,15 @@ write_resource <- function(package, resource_name, directory = ".", } out_paths <- append(out_paths, file_name) } - resource$read_from <- NULL resource$path <- out_paths # Resource has URL paths (only) - } else if (resource$read_from == "url") { + } else if (data_location == "url") { # Don't touch file, leave URL path as is - resource$read_from <- NULL } + # Remove attributes + attr(resource, "data_location") <- NULL + return(resource) } diff --git a/man/create_package.Rd b/man/create_package.Rd index ae7ff7d..1b49a50 100644 --- a/man/create_package.Rd +++ b/man/create_package.Rd @@ -18,12 +18,12 @@ Initiates a Data Package object, either from scratch or from an existing list. This Data Package object is a list with the following characteristics: \itemize{ -\item A \code{datapackage} subclass. \item All properties of the original \code{descriptor}. \item A \code{resources} property, set to an empty list if undefined. -\item A \code{directory} property, set to \code{"."} for the current directory if +\item A \code{directory} attribute, set to \code{"."} for the current directory if undefined. It is used as the base path to access resources with \code{\link[=read_resource]{read_resource()}}. +\item A \code{datapackage} subclass. } } \details{ diff --git a/tests/testthat/test-check_package.R b/tests/testthat/test-check_package.R index 79cb7d7..8e622db 100644 --- a/tests/testthat/test-check_package.R +++ b/tests/testthat/test-check_package.R @@ -60,7 +60,7 @@ test_that("check_package() returns error on missing or incorrect directory", { expect_error( check_package(list(resources = list())), regexp = paste( - "`package` is missing a directory property or it is not a character." + "`package` is missing a directory attribute or it is not a character." ), fixed = TRUE ) @@ -89,12 +89,14 @@ test_that("check_package() returns error if resources have no name", { }) test_that("check_package() returns warning on missing 'datapackage' class", { + p_invalid <- list(resources = list()) + attr(p_invalid, "directory") <- "." expect_warning( - check_package(list(resources = list(), directory = ".")), + check_package(p_invalid), class = "frictionless_warning_package_without_class" ) expect_warning( - check_package(list(resources = list(), directory = ".")), + check_package(p_invalid), regexp = "`package` is missing a \"datapackage\" class", fixed = TRUE ) diff --git a/tests/testthat/test-check_schema.R b/tests/testthat/test-check_schema.R index 5cfeaa4..50ee881 100644 --- a/tests/testthat/test-check_schema.R +++ b/tests/testthat/test-check_schema.R @@ -4,7 +4,7 @@ test_that("check_schema() returns schema invisibly on valid Table Schema", { # Can't obtain df using read_resource(), because that function uses # check_schema() (in schema()) internally, which is what we want to test df <- suppressMessages( - readr::read_csv(file.path(p$directory, p$resources[[1]]$path)) + readr::read_csv(file.path(attr(p, "directory"), p$resources[[1]]$path)) ) # Using schema() diff --git a/tests/testthat/test-create_package.R b/tests/testthat/test-create_package.R index ff9d3a1..d0005ab 100644 --- a/tests/testthat/test-create_package.R +++ b/tests/testthat/test-create_package.R @@ -35,10 +35,12 @@ test_that("create_package() sets resources or leaves as is", { test_that("create_package() sets directory or leaves as is", { new <- create_package() - expect_identical(new$directory, ".") + expect_identical(attr(new, "directory"), ".") - existing <- create_package(list(directory = "not_default")) - expect_identical(existing$directory, "not_default") + list_with_directory <- list() + attr(list_with_directory, "directory") <- "not_default" + existing <- create_package(list_with_directory) + expect_identical(attr(existing, "directory"), "not_default") }) test_that("create_package() adds class 'datapackage'", { diff --git a/tests/testthat/test-read_package.R b/tests/testthat/test-read_package.R index 103ff71..a2a01e3 100644 --- a/tests/testthat/test-read_package.R +++ b/tests/testthat/test-read_package.R @@ -16,10 +16,10 @@ test_that("read_package() returns a valid Data Package reading from path", { # Package has correct "directory", containing root dir of datapackage.json expect_identical( - p_local$directory, + attr(p_local, "directory"), sub("/datapackage.json", "", p_path, fixed = TRUE) ) - expect_identical(p_minimal$directory, "data") + expect_identical(attr(p_minimal, "directory"), "data") }) test_that("read_package() returns a valid Data Package reading from url", { @@ -40,7 +40,7 @@ test_that("read_package() returns a valid Data Package reading from url", { # Package has correct "directory", containing root dir of datapackage.json expect_identical( - p_remote$directory, + attr(p_remote, "directory"), sub("/datapackage.json", "", p_url, fixed = TRUE) ) }) diff --git a/tests/testthat/test-read_resource.R b/tests/testthat/test-read_resource.R index 7efcada..c61cecd 100644 --- a/tests/testthat/test-read_resource.R +++ b/tests/testthat/test-read_resource.R @@ -197,7 +197,7 @@ test_that("read_resource() returns error on invalid resource", { # Add valid path p_invalid$resources[[1]]$path <- "deployments.csv" - p_invalid$directory <- dirname( + attr(p_invalid, "directory") <- dirname( system.file("extdata", "v1", "datapackage.json", package = "frictionless") ) @@ -311,7 +311,7 @@ test_that("read_resource() can read safe local and remote Table Schema, skip_if_offline() p <- example_package() resource <- read_resource(p, "deployments") - p$directory <- "." + attr(p, "directory") <- "." # Use a remote path, otherwise schema and path need to share same directory p$resources[[1]]$path <- file.path( @@ -359,7 +359,7 @@ test_that("read_resource() can read safe local and remote CSV dialect", { skip_if_offline() p <- example_package() resource <- read_resource(p, "deployments") - p$directory <- "." + attr(p, "directory") <- "." # Use a remote path, otherwise dialect and path need to share same directory p$resources[[1]]$path <- file.path( @@ -407,7 +407,7 @@ test_that("read_resource() understands CSV dialect", { # Create package with non-default dialect properties p_dialect <- p - p_dialect$directory <- "." + attr(p_dialect, "directory") <- "." p_dialect$resources[[1]]$path <- test_path("data/deployments_dialect.csv") p_dialect$resources[[1]]$dialect <- list( delimiter = "/", @@ -436,7 +436,7 @@ test_that("read_resource() understands missing values", { # Create package with non-default missing values p_missing <- p - p_missing$directory <- "." + attr(p_missing, "directory") <- "." p_missing$resources[[1]]$path <- test_path("data/deployments_missingvalues.csv") p_missing$resources[[1]]$schema$missingValues <- @@ -450,7 +450,7 @@ test_that("read_resource() understands encoding", { # Create package with non-default encoding p_encoding <- p - p_encoding$directory <- "." + attr(p_encoding, "directory") <- "." p_encoding$resources[[1]]$path <- test_path("data/deployments_encoding.csv") p_encoding$resources[[1]]$encoding <- "windows-1252" expect_identical(read_resource(p_encoding, "deployments"), resource) @@ -545,7 +545,7 @@ test_that("read_resource() handles LF and CRLF line terminator characters", { resource <- read_resource(p, "deployments") # This file has LF p_crlf <- p - p_crlf$directory <- "." + attr(p_crlf, "directory") <- "." p_crlf$resources[[1]]$path <- test_path("data/deployments_crlf.csv") # File with CRLF expect_identical(read_resource(p_crlf, "deployments"), resource) @@ -559,7 +559,7 @@ test_that("read_resource() can read compressed files", { # File created in terminal with: # zip deployments.csv.zip deployments.csv p_local_zip <- p - p_local_zip$directory <- "." + attr(p_local_zip, "directory") <- "." p_local_zip$resources[[1]]$path <- test_path("data/deployments.csv.zip") p_remote_zip <- p p_remote_zip$resources[[1]]$path <- file.path( @@ -570,7 +570,7 @@ test_that("read_resource() can read compressed files", { # File created in terminal with: # gzip deployments.csv p_local_gz <- p - p_local_gz$directory <- "." + attr(p_local_gz, "directory") <- "." p_local_gz$resources[[1]]$path <- test_path("data/deployments.csv.gz") p_remote_gz <- p p_remote_gz$resources[[1]]$path <- file.path( diff --git a/tests/testthat/test-write_package.R b/tests/testthat/test-write_package.R index 90e3e0a..9d69aa5 100644 --- a/tests/testthat/test-write_package.R +++ b/tests/testthat/test-write_package.R @@ -8,7 +8,7 @@ test_that("write_package() returns output Data Package (invisibly)", { p_from_file <- read_package(file.path(dir, "datapackage.json")) # p_from_file$directory will differ: overwrite to make the same - p_from_file$directory <- p_written$directory + attr(p_from_file, "directory") <- attr(p_written, "directory") expect_invisible(suppressMessages(write_package(p, dir))) expect_identical(p_written, p_from_file) @@ -54,8 +54,7 @@ test_that("write_package() writes unaltered datapackage.json as is", { suppressMessages(write_package(p, dir)) json_as_written <- readr::read_lines(file.path(dir, "datapackage.json")) - # Output json = input json. This also tests if custom property "directory" - # is removed and json is printed "pretty" + # Output json = input json. This also tests the json is printed "pretty" expect_identical(json_as_written, json_original) }) @@ -124,7 +123,7 @@ test_that("write_package() downloads file(s) for path = local in remote p <- example_package() # Make remote - p$directory <- file.path( + attr(p, "directory") <- file.path( "https://raw.githubusercontent.com/frictionlessdata/frictionless-r/", "main/inst/extdata/v1" ) @@ -187,7 +186,7 @@ test_that("write_package() leaves as is for path = URL in remote package", { p <- example_package() # Make remote - p$directory <- file.path( + attr(p, "directory") <- file.path( "https://raw.githubusercontent.com/frictionlessdata/frictionless-r/", "main/inst/extdata/v1" ) @@ -232,7 +231,7 @@ test_that("write_package() leaves as is for data = json in remote package", { p <- example_package() # Make remote - p$directory <- file.path( + attr(p, "directory") <- file.path( "https://raw.githubusercontent.com/frictionlessdata/frictionless-r/", "main/inst/extdata/v1" ) @@ -268,7 +267,7 @@ test_that("write_package() creates file for data = df in remote package", { p <- example_package() # Make remote - p$directory <- file.path( + attr(p, "directory") <- file.path( "https://raw.githubusercontent.com/frictionlessdata/frictionless-r/", "main/inst/extdata/v1" ) @@ -333,7 +332,6 @@ test_that("write_package() sets correct properties for data frame resources", { expect_null(resource_written$dialect) expect_identical(resource_written$schema, schema) expect_null(resource_written$data) - expect_null(resource_written$read_from) }) test_that("write_package() retains custom properties set in add_resource()", {