From 6e6b2476bbe4236d40358d7a109a5f2ef23770b8 Mon Sep 17 00:00:00 2001 From: JulianUmbhau Date: Mon, 23 Feb 2026 14:30:38 +0100 Subject: [PATCH 1/5] adding labels to calls, check_labels function, default NULL for labels and tests for check_labels function --- R/bq-parse.R | 2 +- R/bq-perform.R | 57 ++++++++++++++++++++++++++++++----- R/utils.R | 59 +++++++++++++++++++++++++++++++++++++ R/zzz.R | 3 +- tests/testthat/test-utils.R | 19 ++++++++++++ 5 files changed, 130 insertions(+), 10 deletions(-) diff --git a/R/bq-parse.R b/R/bq-parse.R index def50560..8e94850e 100644 --- a/R/bq-parse.R +++ b/R/bq-parse.R @@ -1,6 +1,6 @@ bq_parse_single <- function(value, type, ...) { field <- bq_field("", type, ...) - field_j <- jsonlite::toJSON(as_json(field)) + field_j <- jsonlite::toJSON(as_json(field), auto_unbox = TRUE) value_j <- jsonlite::toJSON(value, auto_unbox = TRUE) bq_field_init(field_j, value_j) diff --git a/R/bq-perform.R b/R/bq-perform.R index b478b1f4..5b41ea3b 100644 --- a/R/bq-perform.R +++ b/R/bq-perform.R @@ -72,6 +72,8 @@ bq_perform_extract <- function( check_bool(print_header) check_string(billing) + labels <- check_labels(getOption("bigrquery.labels")) + url <- bq_path(billing, jobs = "") body <- list( configuration = list( @@ -81,7 +83,8 @@ bq_perform_extract <- function( destinationFormat = unbox(destination_format), compression = unbox(compression), printHeader = unbox(print_header) - ) + ), + labels = labels ) ) @@ -140,6 +143,8 @@ bq_perform_upload <- function( check_string(billing) json_digits <- check_digits(json_digits) + labels <- check_labels(getOption("bigrquery.labels")) + load <- list( sourceFormat = unbox(source_format), destinationTable = tableReference(x), @@ -154,11 +159,16 @@ bq_perform_upload <- function( load$autodetect <- unbox(TRUE) } - metadata <- list(configuration = list(load = load)) + metadata <- list( + configuration = list( + load = load, + labels = labels + ) + ) metadata <- bq_body(metadata, ...) metadata <- list( "type" = "application/json; charset=UTF-8", - "content" = jsonlite::toJSON(metadata, pretty = TRUE, digits = json_digits) + "content" = jsonlite::toJSON(metadata, auto_unbox = TRUE, pretty = TRUE) ) if (source_format == "NEWLINE_DELIMITED_JSON") { @@ -261,6 +271,8 @@ bq_perform_load <- function( check_string(create_disposition) check_string(write_disposition) + labels <- check_labels(getOption("bigrquery.labels")) + load <- list( sourceUris = as.list(source_uris), sourceFormat = unbox(source_format), @@ -280,7 +292,12 @@ bq_perform_load <- function( load$autodetect <- TRUE } - body <- list(configuration = list(load = load)) + body <- list( + configuration = list( + load = load, + labels = labels + ) + ) url <- bq_path(billing, jobs = "") res <- bq_post( @@ -332,6 +349,8 @@ bq_perform_query <- function( check_bool(use_legacy_sql) check_string(priority) + labels <- check_labels(getOption("bigrquery.labels")) + query <- list( query = unbox(query), useLegacySql = unbox(use_legacy_sql), @@ -357,7 +376,12 @@ bq_perform_query <- function( } url <- bq_path(billing, jobs = "") - body <- list(configuration = list(query = query)) + body <- list( + configuration = list( + query = query, + labels = labels + ) + ) res <- bq_post( url, @@ -383,9 +407,16 @@ bq_perform_query_dry_run <- function( parameters = parameters, use_legacy_sql = use_legacy_sql ) + labels <- check_labels(getOption("bigrquery.labels")) url <- bq_path(billing, jobs = "") - body <- list(configuration = list(query = query, dryRun = unbox(TRUE))) + body <- list( + configuration = list( + query = query, + labels = labels, + dryRun = unbox(TRUE) + ) + ) res <- bq_post( url, @@ -412,8 +443,16 @@ bq_perform_query_schema <- function( use_legacy_sql = FALSE ) + labels <- check_labels(getOption("bigrquery.labels")) + url <- bq_path(billing, jobs = "") - body <- list(configuration = list(query = query, dryRun = unbox(TRUE))) + body <- list( + configuration = list( + query = query, + labels = labels, + dryRun = unbox(TRUE) + ) + ) res <- bq_post( url, @@ -463,6 +502,7 @@ bq_perform_copy <- function( ) { billing <- billing %||% dest$project url <- bq_path(billing, jobs = "") + labels <- check_labels(getOption("bigrquery.labels")) body <- list( configuration = list( @@ -471,7 +511,8 @@ bq_perform_copy <- function( destinationTable = tableReference(dest), createDisposition = unbox(create_disposition), writeDisposition = unbox(write_disposition) - ) + ), + labels = labels ) ) diff --git a/R/utils.R b/R/utils.R index 18af81d7..1c398235 100644 --- a/R/utils.R +++ b/R/utils.R @@ -91,3 +91,62 @@ cli_escape <- function(x) { x <- gsub("}", "}}", x, fixed = TRUE) x } + +check_labels <- function(labels) { + # Handle NULL, NA, or empty inputs + if ( + is.null(labels) || + length(labels) == 0 || + (length(labels) == 1 && is.na(labels)) + ) { + return(NULL) + } + + if (!is.list(labels)) { + warning( + paste0("Labels must to be a dictionary list; dropping labels"), + immediate. = TRUE + ) + return(NULL) + } + nms <- names(labels) + if (is.null(nms) || anyNA(nms) || any(nms == "")) { + warning( + "Label keys must be non-empty strings; dropping labels", + immediate. = TRUE, + call. = FALSE + ) + return(NULL) + } + for (nm in names(labels)) { + if (!is.character(labels[[nm]]) || length(labels[[nm]]) != 1) { + warning( + sprintf("Label '%s' must be a single string; dropping labels", nm), + immediate. = TRUE + ) + return(NULL) + } + if (nm != tolower(nm)) { + warning( + sprintf( + "Label key '%s' must match ^[a-z0-9_-]{0,62}$; dropping labels", + nm + ), + immediate. = TRUE + ) + return(NULL) + } + if (labels[[nm]] != tolower(labels[[nm]])) { + warning( + sprintf( + "Label value '%s' must be empty or match ^[a-z0-9_-]{0,62}$; dropping labels", + labels[[nm]] + ), + immediate. = TRUE + ) + return(NULL) + } + } + + return(labels) +} diff --git a/R/zzz.R b/R/zzz.R index e3b1c8e2..043e5675 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -23,7 +23,8 @@ op <- options() defaults <- list( bigrquery.quiet = NA, - bigrquery.page.size = 1e4 + bigrquery.page.size = 1e4, + bigrquery.labels = NULL ) toset <- !(names(defaults) %in% names(op)) if (any(toset)) { diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R index 2f16dc84..d7d703fa 100644 --- a/tests/testthat/test-utils.R +++ b/tests/testthat/test-utils.R @@ -10,3 +10,22 @@ test_that("cli_escape() doubles cli braces", { expect_equal(cli_escape("no braces"), "no braces") expect_equal(cli_escape("{x}"), "{{x}}") }) + +test_that("check_labels() accepts valid labels and NULL-like inputs", { + expect_null(check_labels(NULL)) + expect_null(check_labels(NA)) + expect_null(check_labels(list())) + + expect_equal(check_labels(list(env = "prod")), list(env = "prod")) + expect_equal( + check_labels(list(env = "prod", team = "data")), + list(env = "prod", team = "data") + ) +}) + +test_that("check_labels() warns and returns NULL for invalid inputs", { + expect_warning(check_labels("not-a-list"), "dictionary list") + expect_warning(check_labels(list("no-name")), "non-empty strings") + expect_warning(check_labels(list(ENV = "prod")), "must match") + expect_warning(check_labels(list(env = "Prod")), "must be empty or match") +}) From e65a4b3f415d0e8123984daf52db39a5632ebcc9 Mon Sep 17 00:00:00 2001 From: JulianUmbhau Date: Mon, 2 Mar 2026 11:25:55 +0100 Subject: [PATCH 2/5] changing from list to named vector --- R/utils.R | 57 ++++++++----------------------------- tests/testthat/test-utils.R | 17 +++++------ 2 files changed, 19 insertions(+), 55 deletions(-) diff --git a/R/utils.R b/R/utils.R index 1c398235..f0534bb2 100644 --- a/R/utils.R +++ b/R/utils.R @@ -93,59 +93,26 @@ cli_escape <- function(x) { } check_labels <- function(labels) { - # Handle NULL, NA, or empty inputs - if ( - is.null(labels) || - length(labels) == 0 || - (length(labels) == 1 && is.na(labels)) - ) { + if (is.null(labels) || length(labels) == 0 || (length(labels) == 1 && is.na(labels))) { return(NULL) } - if (!is.list(labels)) { - warning( - paste0("Labels must to be a dictionary list; dropping labels"), - immediate. = TRUE - ) + if (!is.character(labels) || is.null(names(labels)) || anyNA(names(labels)) || any(names(labels) == "")) { + warning("Labels must be a named character vector; dropping labels", immediate. = TRUE, call. = FALSE) return(NULL) } + nms <- names(labels) - if (is.null(nms) || anyNA(nms) || any(nms == "")) { - warning( - "Label keys must be non-empty strings; dropping labels", - immediate. = TRUE, - call. = FALSE - ) + bad_keys <- nms[nms != tolower(nms)] + if (length(bad_keys) > 0) { + warning(sprintf("Label key '%s' must match ^[a-z0-9_-]{0,62}$; dropping labels", bad_keys[[1]]), immediate. = TRUE, call. = FALSE) return(NULL) } - for (nm in names(labels)) { - if (!is.character(labels[[nm]]) || length(labels[[nm]]) != 1) { - warning( - sprintf("Label '%s' must be a single string; dropping labels", nm), - immediate. = TRUE - ) - return(NULL) - } - if (nm != tolower(nm)) { - warning( - sprintf( - "Label key '%s' must match ^[a-z0-9_-]{0,62}$; dropping labels", - nm - ), - immediate. = TRUE - ) - return(NULL) - } - if (labels[[nm]] != tolower(labels[[nm]])) { - warning( - sprintf( - "Label value '%s' must be empty or match ^[a-z0-9_-]{0,62}$; dropping labels", - labels[[nm]] - ), - immediate. = TRUE - ) - return(NULL) - } + + bad_vals <- labels[labels != tolower(labels)] + if (length(bad_vals) > 0) { + warning(sprintf("Label value '%s' must be empty or match ^[a-z0-9_-]{0,62}$; dropping labels", bad_vals[[1]]), immediate. = TRUE, call. = FALSE) + return(NULL) } return(labels) diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R index d7d703fa..aeda6e5c 100644 --- a/tests/testthat/test-utils.R +++ b/tests/testthat/test-utils.R @@ -14,18 +14,15 @@ test_that("cli_escape() doubles cli braces", { test_that("check_labels() accepts valid labels and NULL-like inputs", { expect_null(check_labels(NULL)) expect_null(check_labels(NA)) - expect_null(check_labels(list())) + expect_null(check_labels(character())) - expect_equal(check_labels(list(env = "prod")), list(env = "prod")) - expect_equal( - check_labels(list(env = "prod", team = "data")), - list(env = "prod", team = "data") - ) + expect_equal(check_labels(c(env = "prod")), c(env = "prod")) + expect_equal(check_labels(c(env = "prod", team = "data")), c(env = "prod", team = "data")) }) test_that("check_labels() warns and returns NULL for invalid inputs", { - expect_warning(check_labels("not-a-list"), "dictionary list") - expect_warning(check_labels(list("no-name")), "non-empty strings") - expect_warning(check_labels(list(ENV = "prod")), "must match") - expect_warning(check_labels(list(env = "Prod")), "must be empty or match") + expect_warning(check_labels(list(env = "prod")), "named character vector") + expect_warning(check_labels(c("no-name")), "named character vector") + expect_warning(check_labels(c(ENV = "prod")), "must match") + expect_warning(check_labels(c(env = "Prod")), "must be empty or match") }) From ab9f3c73b321940368fb983ea6e078b9575db036 Mon Sep 17 00:00:00 2001 From: JulianUmbhau Date: Fri, 6 Mar 2026 23:50:32 +0100 Subject: [PATCH 3/5] Corrections based on PR comments. Returned to use list, cleaned up check_labels and corrected tests --- R/bq-parse.R | 2 +- R/bq-perform.R | 2 +- R/utils.R | 22 ++++------------------ tests/testthat/test-utils.R | 16 +++++++--------- 4 files changed, 13 insertions(+), 29 deletions(-) diff --git a/R/bq-parse.R b/R/bq-parse.R index 8e94850e..def50560 100644 --- a/R/bq-parse.R +++ b/R/bq-parse.R @@ -1,6 +1,6 @@ bq_parse_single <- function(value, type, ...) { field <- bq_field("", type, ...) - field_j <- jsonlite::toJSON(as_json(field), auto_unbox = TRUE) + field_j <- jsonlite::toJSON(as_json(field)) value_j <- jsonlite::toJSON(value, auto_unbox = TRUE) bq_field_init(field_j, value_j) diff --git a/R/bq-perform.R b/R/bq-perform.R index 5b41ea3b..b9eaa8ca 100644 --- a/R/bq-perform.R +++ b/R/bq-perform.R @@ -168,7 +168,7 @@ bq_perform_upload <- function( metadata <- bq_body(metadata, ...) metadata <- list( "type" = "application/json; charset=UTF-8", - "content" = jsonlite::toJSON(metadata, auto_unbox = TRUE, pretty = TRUE) + "content" = jsonlite::toJSON(metadata, auto_unbox = TRUE, pretty = TRUE, digits = json_digits) ) if (source_format == "NEWLINE_DELIMITED_JSON") { diff --git a/R/utils.R b/R/utils.R index f0534bb2..99a7691c 100644 --- a/R/utils.R +++ b/R/utils.R @@ -93,27 +93,13 @@ cli_escape <- function(x) { } check_labels <- function(labels) { - if (is.null(labels) || length(labels) == 0 || (length(labels) == 1 && is.na(labels))) { + if (is.null(labels) || length(labels) == 0) { return(NULL) } - if (!is.character(labels) || is.null(names(labels)) || anyNA(names(labels)) || any(names(labels) == "")) { - warning("Labels must be a named character vector; dropping labels", immediate. = TRUE, call. = FALSE) - return(NULL) - } - - nms <- names(labels) - bad_keys <- nms[nms != tolower(nms)] - if (length(bad_keys) > 0) { - warning(sprintf("Label key '%s' must match ^[a-z0-9_-]{0,62}$; dropping labels", bad_keys[[1]]), immediate. = TRUE, call. = FALSE) - return(NULL) - } - - bad_vals <- labels[labels != tolower(labels)] - if (length(bad_vals) > 0) { - warning(sprintf("Label value '%s' must be empty or match ^[a-z0-9_-]{0,62}$; dropping labels", bad_vals[[1]]), immediate. = TRUE, call. = FALSE) - return(NULL) + if (!is.list(labels) || any(names2(labels) == "")) { + cli::cli_abort("Labels must be a named list.") } - return(labels) + labels } diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R index aeda6e5c..698dc286 100644 --- a/tests/testthat/test-utils.R +++ b/tests/testthat/test-utils.R @@ -13,16 +13,14 @@ test_that("cli_escape() doubles cli braces", { test_that("check_labels() accepts valid labels and NULL-like inputs", { expect_null(check_labels(NULL)) - expect_null(check_labels(NA)) - expect_null(check_labels(character())) + expect_null(check_labels(list())) - expect_equal(check_labels(c(env = "prod")), c(env = "prod")) - expect_equal(check_labels(c(env = "prod", team = "data")), c(env = "prod", team = "data")) + expect_equal(check_labels(list(env = "prod")), list(env = "prod")) + expect_equal(check_labels(list(env = "prod", team = "data")), list(env = "prod", team = "data")) + expect_equal(check_labels(list(env = "")), list(env = "")) }) -test_that("check_labels() warns and returns NULL for invalid inputs", { - expect_warning(check_labels(list(env = "prod")), "named character vector") - expect_warning(check_labels(c("no-name")), "named character vector") - expect_warning(check_labels(c(ENV = "prod")), "must match") - expect_warning(check_labels(c(env = "Prod")), "must be empty or match") +test_that("check_labels() errors on invalid inputs", { + expect_error(check_labels(c(env = "prod")), "named list") + expect_error(check_labels(list("no-name")), "named list") }) From 85135aa85706e09d1bd95cfd8fd8ac05fd311ad4 Mon Sep 17 00:00:00 2001 From: Hadley Wickham Date: Thu, 30 Apr 2026 17:42:01 -0500 Subject: [PATCH 4/5] Polishing * Make it an explicit argument * Move `check_labels()` to standard structure * Add news bullet --- NEWS.md | 1 + R/bq-perform.R | 52 ++++++++++++++++----------- R/dbi-connection.R | 12 +++++-- R/dbi-driver.R | 5 ++- R/dbi-result.R | 1 + R/dplyr.R | 34 ++++++++++++++---- R/utils.R | 29 ++++++++++++--- man/api-perform.Rd | 27 ++++++++++---- man/bigquery.Rd | 9 ++++- man/bigrquery-package.Rd | 1 + man/bq_auth.Rd | 6 ++-- man/bq_auth_configure.Rd | 8 ++--- man/bq_deauth.Rd | 6 ++-- man/bq_has_token.Rd | 4 +-- man/bq_table_download.Rd | 2 +- man/bq_token.Rd | 6 ++-- man/bq_user.Rd | 4 +-- man/collect.tbl_BigQueryConnection.Rd | 4 +-- tests/testthat/_snaps/dbi-driver.md | 8 +++++ tests/testthat/_snaps/utils.md | 18 ++++++++++ tests/testthat/test-dbi-driver.R | 22 ++++++++++++ tests/testthat/test-utils.R | 18 +++++----- 22 files changed, 206 insertions(+), 71 deletions(-) create mode 100644 tests/testthat/_snaps/dbi-driver.md diff --git a/NEWS.md b/NEWS.md index 2a07273b..d1f0b1fd 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,6 @@ # bigrquery (development version) +* `bq_perform_query()` and friends gain a `labels` argument that attaches [BigQuery labels](https://cloud.google.com/bigquery/docs/labels-intro) to the resulting job, useful for cost allocation. `dbConnect()` gains a matching `labels` argument that is forwarded to every job run on the connection. Defaults to `getOption("bigrquery.labels")` (@JulianUmbhau, #673). * BigQuery error messages containing `{` or `}` are no longer mistaken for cli expressions, so the underlying server message is shown instead of a cli parse failure (#677). * `bq_perform_upload()` and friends now default to 22 digits of accuracy, and now allow you to change this value with the new `json_digits` argument. * Always upload `POSIXt` objects with 6 digits (i.e. microsecond) precision (#660). diff --git a/R/bq-perform.R b/R/bq-perform.R index b9eaa8ca..0b2e3c90 100644 --- a/R/bq-perform.R +++ b/R/bq-perform.R @@ -56,6 +56,11 @@ NULL #' snake_case names are automatically converted to camelCase. #' @param print_header Whether to print out a header row in the results. #' @param billing Identifier of project to bill. +#' @param labels A named list of strings used to attach +#' [BigQuery labels](https://cloud.google.com/bigquery/docs/labels-intro) +#' to the resulting job, e.g. `list(env = "prod", team = "data")`. This +#' is most useful for cost allocation and other FinOps reporting. +#' Defaults to the value of `getOption("bigrquery.labels")`. bq_perform_extract <- function( x, destination_uris, @@ -63,7 +68,8 @@ bq_perform_extract <- function( compression = "NONE", ..., print_header = TRUE, - billing = x$project + billing = x$project, + labels = getOption("bigrquery.labels") ) { x <- as_bq_table(x) destination_uris <- as.character(destination_uris) # for gs_object @@ -71,8 +77,7 @@ bq_perform_extract <- function( check_string(compression) check_bool(print_header) check_string(billing) - - labels <- check_labels(getOption("bigrquery.labels")) + check_labels(labels) url <- bq_path(billing, jobs = "") body <- list( @@ -130,7 +135,8 @@ bq_perform_upload <- function( write_disposition = "WRITE_EMPTY", ..., billing = x$project, - json_digits = NULL + json_digits = NULL, + labels = getOption("bigrquery.labels") ) { x <- as_bq_table(x) if (!is.data.frame(values)) { @@ -142,8 +148,7 @@ bq_perform_upload <- function( check_string(write_disposition) check_string(billing) json_digits <- check_digits(json_digits) - - labels <- check_labels(getOption("bigrquery.labels")) + check_labels(labels) load <- list( sourceFormat = unbox(source_format), @@ -168,7 +173,12 @@ bq_perform_upload <- function( metadata <- bq_body(metadata, ...) metadata <- list( "type" = "application/json; charset=UTF-8", - "content" = jsonlite::toJSON(metadata, auto_unbox = TRUE, pretty = TRUE, digits = json_digits) + "content" = jsonlite::toJSON( + metadata, + auto_unbox = TRUE, + pretty = TRUE, + digits = json_digits + ) ) if (source_format == "NEWLINE_DELIMITED_JSON") { @@ -261,7 +271,8 @@ bq_perform_load <- function( nskip = 0, create_disposition = "CREATE_IF_NEEDED", write_disposition = "WRITE_EMPTY", - ... + ..., + labels = getOption("bigrquery.labels") ) { x <- as_bq_table(x) source_uris <- as.character(source_uris) @@ -270,8 +281,7 @@ bq_perform_load <- function( check_number_decimal(nskip, min = 0) check_string(create_disposition) check_string(write_disposition) - - labels <- check_labels(getOption("bigrquery.labels")) + check_labels(labels) load <- list( sourceUris = as.list(source_uris), @@ -339,7 +349,8 @@ bq_perform_query <- function( create_disposition = "CREATE_IF_NEEDED", write_disposition = "WRITE_EMPTY", use_legacy_sql = FALSE, - priority = "INTERACTIVE" + priority = "INTERACTIVE", + labels = getOption("bigrquery.labels") ) { query <- as_query(query) check_string(billing) @@ -348,8 +359,7 @@ bq_perform_query <- function( check_string(write_disposition) check_bool(use_legacy_sql) check_string(priority) - - labels <- check_labels(getOption("bigrquery.labels")) + check_labels(labels) query <- list( query = unbox(query), @@ -399,7 +409,8 @@ bq_perform_query_dry_run <- function( ..., default_dataset = NULL, parameters = NULL, - use_legacy_sql = FALSE + use_legacy_sql = FALSE, + labels = getOption("bigrquery.labels") ) { query <- bq_perform_query_data( query = query, @@ -407,7 +418,7 @@ bq_perform_query_dry_run <- function( parameters = parameters, use_legacy_sql = use_legacy_sql ) - labels <- check_labels(getOption("bigrquery.labels")) + check_labels(labels) url <- bq_path(billing, jobs = "") body <- list( @@ -434,7 +445,8 @@ bq_perform_query_schema <- function( billing, ..., default_dataset = NULL, - parameters = NULL + parameters = NULL, + labels = getOption("bigrquery.labels") ) { query <- bq_perform_query_data( query = query, @@ -442,8 +454,7 @@ bq_perform_query_schema <- function( parameters = parameters, use_legacy_sql = FALSE ) - - labels <- check_labels(getOption("bigrquery.labels")) + check_labels(labels) url <- bq_path(billing, jobs = "") body <- list( @@ -498,11 +509,12 @@ bq_perform_copy <- function( create_disposition = "CREATE_IF_NEEDED", write_disposition = "WRITE_EMPTY", ..., - billing = NULL + billing = NULL, + labels = getOption("bigrquery.labels") ) { billing <- billing %||% dest$project url <- bq_path(billing, jobs = "") - labels <- check_labels(getOption("bigrquery.labels")) + check_labels(labels) body <- list( configuration = list( diff --git a/R/dbi-connection.R b/R/dbi-connection.R index 5636d4c8..40d4e4ac 100644 --- a/R/dbi-connection.R +++ b/R/dbi-connection.R @@ -8,7 +8,8 @@ BigQueryConnection <- function( page_size = 1e4, quiet = NA, use_legacy_sql = FALSE, - bigint = c("integer", "integer64", "numeric", "character") + bigint = c("integer", "integer64", "numeric", "character"), + labels = NULL ) { connection_capture() @@ -20,7 +21,8 @@ BigQueryConnection <- function( page_size = as.integer(page_size), quiet = quiet, use_legacy_sql = use_legacy_sql, - bigint = match.arg(bigint) + bigint = match.arg(bigint), + labels = labels ) } @@ -36,7 +38,8 @@ setClass( use_legacy_sql = "logical", page_size = "integer", quiet = "logical", - bigint = "character" + bigint = "character", + labels = "ANY" ) ) @@ -113,6 +116,7 @@ setMethod( default_dataset = ds, quiet = conn@quiet, parameters = params, + labels = conn@labels, ... ) bq_job_wait(job, quiet = conn@quiet) @@ -256,6 +260,7 @@ dbWriteTable_bq <- function( create_disposition = create_disposition, write_disposition = write_disposition, billing = conn@billing, + labels = conn@labels, ... ) invisible(TRUE) @@ -307,6 +312,7 @@ dbAppendTable_bq <- function(conn, name, value, ..., row.names = NULL) { create_disposition = "CREATE_NEVER", write_disposition = "WRITE_APPEND", billing = conn@billing, + labels = conn@labels, ... ) on_connection_updated(conn, toString(tb)) diff --git a/R/dbi-driver.R b/R/dbi-driver.R index ef5c5f4b..f3c2a196 100644 --- a/R/dbi-driver.R +++ b/R/dbi-driver.R @@ -84,6 +84,7 @@ setMethod( quiet = NA, use_legacy_sql = FALSE, bigint = c("integer", "integer64", "numeric", "character"), + labels = getOption("bigrquery.labels"), ... ) { check_string(project) @@ -93,6 +94,7 @@ setMethod( check_bool(quiet, allow_na = TRUE) check_bool(use_legacy_sql) bigint <- arg_match(bigint) + check_labels(labels, call = quote(dbConnect())) BigQueryConnection( project = project, @@ -101,7 +103,8 @@ setMethod( page_size = page_size, quiet = quiet, use_legacy_sql = use_legacy_sql, - bigint = bigint + bigint = bigint, + labels = labels ) } ) diff --git a/R/dbi-result.R b/R/dbi-result.R index e3d38fcb..ab634b89 100644 --- a/R/dbi-result.R +++ b/R/dbi-result.R @@ -9,6 +9,7 @@ BigQueryResult <- function(conn, sql, params = NULL, ...) { default_dataset = ds, quiet = conn@quiet, parameters = params, + labels = conn@labels, ... ) diff --git a/R/dplyr.R b/R/dplyr.R index 8f63b5b9..6555c360 100644 --- a/R/dplyr.R +++ b/R/dplyr.R @@ -50,7 +50,8 @@ tbl.BigQueryConnection <- function(src, from, ...) { schema <- bq_perform_query_schema( sql, billing = src$con@billing, - default_dataset = dataset + default_dataset = dataset, + labels = src$con@labels ) vars <- map_chr(schema, "[[", "name") @@ -94,7 +95,8 @@ db_compute.BigQueryConnection <- function( tb <- bq_project_query( con@project, sql, - destination_table = destination_table + destination_table = destination_table, + labels = con@labels ) } else { ds <- bq_dataset(con@project, con@dataset) @@ -103,7 +105,8 @@ db_compute.BigQueryConnection <- function( tb <- bq_dataset_query( ds, query = sql, - destination_table = destination_table + destination_table = destination_table, + labels = con@labels ) } @@ -133,7 +136,13 @@ db_copy_to.BigQueryConnection <- function( tb <- as_bq_table(con, table) write <- if (overwrite) "WRITE_TRUNCATE" else "WRITE_EMPTY" - bq_table_upload(tb, values, fields = types, write_disposition = write) + bq_table_upload( + tb, + values, + fields = types, + write_disposition = write, + labels = con@labels + ) table } @@ -181,10 +190,23 @@ collect.tbl_BigQueryConnection <- function( sql <- dbplyr::db_sql_render(con, x) if (is.null(con@dataset)) { - tb <- bq_project_query(billing, sql, quiet = con@quiet, ...) + tb <- bq_project_query( + billing, + sql, + quiet = con@quiet, + labels = con@labels, + ... + ) } else { ds <- as_bq_dataset(con) - tb <- bq_dataset_query(ds, sql, quiet = con@quiet, billing = billing, ...) + tb <- bq_dataset_query( + ds, + sql, + quiet = con@quiet, + billing = billing, + labels = con@labels, + ... + ) } } diff --git a/R/utils.R b/R/utils.R index 99a7691c..7d34313f 100644 --- a/R/utils.R +++ b/R/utils.R @@ -92,14 +92,33 @@ cli_escape <- function(x) { x } -check_labels <- function(labels) { - if (is.null(labels) || length(labels) == 0) { - return(NULL) +check_labels <- function( + labels, + arg = caller_arg(labels), + call = caller_env() +) { + if (is.null(labels)) { + return(invisible()) } if (!is.list(labels) || any(names2(labels) == "")) { - cli::cli_abort("Labels must be a named list.") + stop_input_type( + labels, + "a named list", + allow_null = TRUE, + arg = arg, + call = call + ) + } + is_string <- vapply(labels, is_string, logical(1)) + if (!all(is_string)) { + stop_input_type( + labels, + "a named list of strings", + arg = arg, + call = call + ) } - labels + invisible() } diff --git a/man/api-perform.Rd b/man/api-perform.Rd index 6f903820..d04b36f4 100644 --- a/man/api-perform.Rd +++ b/man/api-perform.Rd @@ -18,7 +18,8 @@ bq_perform_extract( compression = "NONE", ..., print_header = TRUE, - billing = x$project + billing = x$project, + labels = getOption("bigrquery.labels") ) bq_perform_upload( @@ -30,7 +31,8 @@ bq_perform_upload( write_disposition = "WRITE_EMPTY", ..., billing = x$project, - json_digits = NULL + json_digits = NULL, + labels = getOption("bigrquery.labels") ) bq_perform_load( @@ -42,7 +44,8 @@ bq_perform_load( nskip = 0, create_disposition = "CREATE_IF_NEEDED", write_disposition = "WRITE_EMPTY", - ... + ..., + labels = getOption("bigrquery.labels") ) bq_perform_query( @@ -55,7 +58,8 @@ bq_perform_query( create_disposition = "CREATE_IF_NEEDED", write_disposition = "WRITE_EMPTY", use_legacy_sql = FALSE, - priority = "INTERACTIVE" + priority = "INTERACTIVE", + labels = getOption("bigrquery.labels") ) bq_perform_query_dry_run( @@ -64,7 +68,8 @@ bq_perform_query_dry_run( ..., default_dataset = NULL, parameters = NULL, - use_legacy_sql = FALSE + use_legacy_sql = FALSE, + labels = getOption("bigrquery.labels") ) bq_perform_query_schema( @@ -72,7 +77,8 @@ bq_perform_query_schema( billing, ..., default_dataset = NULL, - parameters = NULL + parameters = NULL, + labels = getOption("bigrquery.labels") ) bq_perform_copy( @@ -81,7 +87,8 @@ bq_perform_copy( create_disposition = "CREATE_IF_NEEDED", write_disposition = "WRITE_EMPTY", ..., - billing = NULL + billing = NULL, + labels = getOption("bigrquery.labels") ) } \arguments{ @@ -116,6 +123,12 @@ snake_case names are automatically converted to camelCase.} \item{billing}{Identifier of project to bill.} +\item{labels}{A named list of strings used to attach +\href{https://cloud.google.com/bigquery/docs/labels-intro}{BigQuery labels} +to the resulting job, e.g. \code{list(env = "prod", team = "data")}. This +is most useful for cost allocation and other FinOps reporting. +Defaults to the value of \code{getOption("bigrquery.labels")}.} + \item{values}{Data frame of values to insert.} \item{fields}{A \link{bq_fields} specification, or something coercible to it diff --git a/man/bigquery.Rd b/man/bigquery.Rd index 0e9ba80d..ffdf847c 100644 --- a/man/bigquery.Rd +++ b/man/bigquery.Rd @@ -15,6 +15,7 @@ quiet = NA, use_legacy_sql = FALSE, bigint = c("integer", "integer64", "numeric", "character"), + labels = getOption("bigrquery.labels"), ... ) } @@ -34,9 +35,15 @@ if \code{NA} picks based on whether or not you're in an interactive context.} \item{bigint}{The R type that BigQuery's 64-bit integer types should be mapped to. The default is \code{"integer"} which returns R's \code{integer} type but results in \code{NA} for -values above/below +/- 2147483647. \code{"integer64"} returns a \link[bit64:bit64-package]{bit64::integer64}, +values above/below +/- 2147483647. \code{"integer64"} returns a \link[bit64:integer64]{bit64::integer64}, which allows the full range of 64 bit integers.} +\item{labels}{A named list of strings used to attach +\href{https://cloud.google.com/bigquery/docs/labels-intro}{BigQuery labels} +to the resulting job, e.g. \code{list(env = "prod", team = "data")}. This +is most useful for cost allocation and other FinOps reporting. +Defaults to the value of \code{getOption("bigrquery.labels")}.} + \item{...}{Other arguments for compatibility with generic; currently ignored.} } \description{ diff --git a/man/bigrquery-package.Rd b/man/bigrquery-package.Rd index 3a65d473..543121f9 100644 --- a/man/bigrquery-package.Rd +++ b/man/bigrquery-package.Rd @@ -35,6 +35,7 @@ Useful links: Authors: \itemize{ + \item Hadley Wickham \email{hadley@posit.co} (\href{https://orcid.org/0000-0003-4757-117X}{ORCID}) \item Jennifer Bryan \email{jenny@posit.co} (\href{https://orcid.org/0000-0002-6983-2759}{ORCID}) } diff --git a/man/bq_auth.Rd b/man/bq_auth.Rd index 5568c3d3..1ebf9752 100644 --- a/man/bq_auth.Rd +++ b/man/bq_auth.Rd @@ -143,8 +143,8 @@ bq_auth(path = "foofy-83ee9e7c9c48.json") } \seealso{ -Other auth functions: -\code{\link{bq_auth_configure}()}, -\code{\link{bq_deauth}()} +Other auth functions: +\code{\link[=bq_auth_configure]{bq_auth_configure()}}, +\code{\link[=bq_deauth]{bq_deauth()}} } \concept{auth functions} diff --git a/man/bq_auth_configure.Rd b/man/bq_auth_configure.Rd index e9122bd8..ed5a9551 100644 --- a/man/bq_auth_configure.Rd +++ b/man/bq_auth_configure.Rd @@ -21,7 +21,7 @@ secret, in one of the forms supported for the \code{txt} argument of \value{ \itemize{ \item \code{bq_auth_configure()}: An object of R6 class -\link[gargle:AuthState-class]{gargle::AuthState}, invisibly. +\link[gargle:AuthState]{gargle::AuthState}, invisibly. \item \code{bq_oauth_client()}: the current user-configured OAuth client. } } @@ -60,8 +60,8 @@ bq_oauth_client() bq_auth_configure(client = original_client) } \seealso{ -Other auth functions: -\code{\link{bq_auth}()}, -\code{\link{bq_deauth}()} +Other auth functions: +\code{\link[=bq_auth]{bq_auth()}}, +\code{\link[=bq_deauth]{bq_deauth()}} } \concept{auth functions} diff --git a/man/bq_deauth.Rd b/man/bq_deauth.Rd index 51dcdb27..5cf52a52 100644 --- a/man/bq_deauth.Rd +++ b/man/bq_deauth.Rd @@ -20,8 +20,8 @@ bq_deauth() } } \seealso{ -Other auth functions: -\code{\link{bq_auth}()}, -\code{\link{bq_auth_configure}()} +Other auth functions: +\code{\link[=bq_auth]{bq_auth()}}, +\code{\link[=bq_auth_configure]{bq_auth_configure()}} } \concept{auth functions} diff --git a/man/bq_has_token.Rd b/man/bq_has_token.Rd index e5475c08..1e988e36 100644 --- a/man/bq_has_token.Rd +++ b/man/bq_has_token.Rd @@ -17,7 +17,7 @@ requests. bq_has_token() } \seealso{ -Other low-level API functions: -\code{\link{bq_token}()} +Other low-level API functions: +\code{\link[=bq_token]{bq_token()}} } \concept{low-level API functions} diff --git a/man/bq_table_download.Rd b/man/bq_table_download.Rd index 837efa4c..2f648c28 100644 --- a/man/bq_table_download.Rd +++ b/man/bq_table_download.Rd @@ -42,7 +42,7 @@ if \code{NA} picks based on whether or not you're in an interactive context.} \item{bigint}{The R type that BigQuery's 64-bit integer types should be mapped to. The default is \code{"integer"}, which returns R's \code{integer} type, but results in \code{NA} for values above/below +/- 2147483647. \code{"integer64"} -returns a \link[bit64:bit64-package]{bit64::integer64}, which allows the full range of 64 bit +returns a \link[bit64:integer64]{bit64::integer64}, which allows the full range of 64 bit integers.} \item{api}{Which API to use? The \code{"json"} API works where ever bigrquery diff --git a/man/bq_token.Rd b/man/bq_token.Rd index ca2df952..c0b41e16 100644 --- a/man/bq_token.Rd +++ b/man/bq_token.Rd @@ -7,7 +7,7 @@ bq_token() } \value{ -A \code{request} object (an S3 class provided by \link[httr:httr-package]{httr}). +A \code{request} object (an S3 class provided by \link[httr:httr]{httr}). } \description{ For internal use or for those programming around the BigQuery API. @@ -25,7 +25,7 @@ bq_token() } } \seealso{ -Other low-level API functions: -\code{\link{bq_has_token}()} +Other low-level API functions: +\code{\link[=bq_has_token]{bq_has_token()}} } \concept{low-level API functions} diff --git a/man/bq_user.Rd b/man/bq_user.Rd index 3d9063d8..3684cb3b 100644 --- a/man/bq_user.Rd +++ b/man/bq_user.Rd @@ -19,6 +19,6 @@ bq_user() } } \seealso{ -\code{\link[gargle:token-info]{gargle::token_userinfo()}}, \code{\link[gargle:token-info]{gargle::token_email()}}, -\code{\link[gargle:token-info]{gargle::token_tokeninfo()}} +\code{\link[gargle:token_userinfo]{gargle::token_userinfo()}}, \code{\link[gargle:token_email]{gargle::token_email()}}, +\code{\link[gargle:token_tokeninfo]{gargle::token_tokeninfo()}} } diff --git a/man/collect.tbl_BigQueryConnection.Rd b/man/collect.tbl_BigQueryConnection.Rd index ac401ae2..9067014e 100644 --- a/man/collect.tbl_BigQueryConnection.Rd +++ b/man/collect.tbl_BigQueryConnection.Rd @@ -4,7 +4,7 @@ \alias{collect.tbl_BigQueryConnection} \title{Collect a BigQuery table} \usage{ -collect.tbl_BigQueryConnection( +\method{collect}{tbl_BigQueryConnection}( x, ..., n = Inf, @@ -49,6 +49,6 @@ This collect method is specialised for BigQuery tables, generating the SQL from your dplyr commands, then calling \code{\link[=bq_project_query]{bq_project_query()}} or \code{\link[=bq_dataset_query]{bq_dataset_query()}} to run the query, then \code{\link[=bq_table_download]{bq_table_download()}} to download the results. Thus the arguments are a combination of the -arguments to \code{\link[dplyr:compute]{dplyr::collect()}}, \code{bq_project_query()}/\code{bq_dataset_query()}, +arguments to \code{\link[dplyr:collect]{dplyr::collect()}}, \code{bq_project_query()}/\code{bq_dataset_query()}, and \code{bq_table_download()}. } diff --git a/tests/testthat/_snaps/dbi-driver.md b/tests/testthat/_snaps/dbi-driver.md new file mode 100644 index 00000000..4728630a --- /dev/null +++ b/tests/testthat/_snaps/dbi-driver.md @@ -0,0 +1,8 @@ +# dbConnect() validates labels + + Code + dbConnect(bigquery(), project = bq_test_project(), labels = "oops") + Condition + Error in `dbConnect()`: + ! `labels` must be a named list or `NULL`, not the string "oops". + diff --git a/tests/testthat/_snaps/utils.md b/tests/testthat/_snaps/utils.md index 799b7953..b645bb14 100644 --- a/tests/testthat/_snaps/utils.md +++ b/tests/testthat/_snaps/utils.md @@ -6,3 +6,21 @@ Error in `bq_check_namespace()`: ! The package "invalid package name" is required to parse BigQuery 'FIELD_TYPE' fields. +# check_labels() errors on invalid inputs + + Code + check_labels(c(env = "prod")) + Condition + Error: + ! `c(env = "prod")` must be a named list or `NULL`, not the string "prod". + Code + check_labels(list("no-name")) + Condition + Error: + ! `list("no-name")` must be a named list or `NULL`, not a list. + Code + check_labels(list(env = 1)) + Condition + Error: + ! `list(env = 1)` must be a named list of strings, not a list. + diff --git a/tests/testthat/test-dbi-driver.R b/tests/testthat/test-dbi-driver.R index 5a13d0cd..a6f7315d 100644 --- a/tests/testthat/test-dbi-driver.R +++ b/tests/testthat/test-dbi-driver.R @@ -10,3 +10,25 @@ test_that("connecting yields a BigQueryConnection", { con <- dbConnect(bigquery(), project = bq_test_project()) expect_s4_class(con, "BigQueryConnection") }) + +test_that("dbConnect() captures labels", { + con <- dbConnect( + bigquery(), + project = bq_test_project(), + labels = list(env = "test") + ) + expect_equal(con@labels, list(env = "test")) +}) + +test_that("dbConnect() validates labels", { + expect_snapshot( + error = TRUE, + dbConnect(bigquery(), project = bq_test_project(), labels = "oops") + ) +}) + +test_that("dbConnect() reads bigrquery.labels option", { + withr::local_options(bigrquery.labels = list(env = "from-option")) + con <- dbConnect(bigquery(), project = bq_test_project()) + expect_equal(con@labels, list(env = "from-option")) +}) diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R index 698dc286..e0a16529 100644 --- a/tests/testthat/test-utils.R +++ b/tests/testthat/test-utils.R @@ -12,15 +12,17 @@ test_that("cli_escape() doubles cli braces", { }) test_that("check_labels() accepts valid labels and NULL-like inputs", { - expect_null(check_labels(NULL)) - expect_null(check_labels(list())) - - expect_equal(check_labels(list(env = "prod")), list(env = "prod")) - expect_equal(check_labels(list(env = "prod", team = "data")), list(env = "prod", team = "data")) - expect_equal(check_labels(list(env = "")), list(env = "")) + expect_no_error(check_labels(NULL)) + expect_no_error(check_labels(list())) + expect_no_error(check_labels(list(env = "prod"))) + expect_no_error(check_labels(list(env = "prod", team = "data"))) + expect_no_error(check_labels(list(env = ""))) }) test_that("check_labels() errors on invalid inputs", { - expect_error(check_labels(c(env = "prod")), "named list") - expect_error(check_labels(list("no-name")), "named list") + expect_snapshot(error = TRUE, { + check_labels(c(env = "prod")) + check_labels(list("no-name")) + check_labels(list(env = 1)) + }) }) From 9c1289839d1d24d85bfd6cc6307ebdac62f6b005 Mon Sep 17 00:00:00 2001 From: Hadley Wickham Date: Thu, 30 Apr 2026 14:40:53 -0500 Subject: [PATCH 5/5] Escape external input in errors (#681) Fixes #677 --- R/utils.R | 6 ++++++ tests/testthat/test-utils.R | 5 +++++ 2 files changed, 11 insertions(+) diff --git a/R/utils.R b/R/utils.R index 7d34313f..93bf5094 100644 --- a/R/utils.R +++ b/R/utils.R @@ -122,3 +122,9 @@ check_labels <- function( invisible() } + +cli_escape <- function(x) { + x <- gsub("{", "{{", x, fixed = TRUE) + x <- gsub("}", "}}", x, fixed = TRUE) + x +} diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R index e0a16529..988cbdbb 100644 --- a/tests/testthat/test-utils.R +++ b/tests/testthat/test-utils.R @@ -26,3 +26,8 @@ test_that("check_labels() errors on invalid inputs", { check_labels(list(env = 1)) }) }) + +test_that("cli_escape() doubles cli braces", { + expect_equal(cli_escape("no braces"), "no braces") + expect_equal(cli_escape("{x}"), "{{x}}") +})