diff --git a/.github/workflows/build-docker.yaml b/.github/workflows/build-docker.yaml index db6d3acc..4e694bce 100644 --- a/.github/workflows/build-docker.yaml +++ b/.github/workflows/build-docker.yaml @@ -13,7 +13,7 @@ on: env: GH_REGISTRY: ghcr.io GH_REGISTRY_USER: alexslemonade - AWS_REGISTRY: 997241705947.dkr.ecr.us-east-1.amazonaws.com + AWS_REGISTRY: ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.us-east-1.amazonaws.com AWS_PREFIX: ghcr_io jobs: @@ -49,7 +49,7 @@ jobs: if: ${{ github.event_name == 'push' }} uses: aws-actions/configure-aws-credentials@v4 with: - role-to-assume: arn:aws:iam::997241705947:role/gha-ecr-access-role + role-to-assume: arn:aws:iam::${{ secrets.AWS_ACCOUNT_ID }}:role/gha-ecr-access-role role-session-name: githubActionSession aws-region: us-east-1 @@ -136,7 +136,7 @@ jobs: uses: aws-actions/configure-aws-credentials@v4 if: ${{ github.event_name == 'push' }} with: - role-to-assume: arn:aws:iam::997241705947:role/gha-ecr-access-role + role-to-assume: arn:aws:iam::${{ secrets.AWS_ACCOUNT_ID }}:role/gha-ecr-access-role role-session-name: githubActionSession-${{ matrix.image_name }} aws-region: us-east-1 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e7e02ce8..2dd4f366 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,51 +1,22 @@ # All available hooks: https://pre-commit.com/hooks.html # R specific hooks: https://github.com/lorenzwalthert/precommit repos: + - repo: https://github.com/posit-dev/air-pre-commit + # Air version + rev: 0.9.0 + hooks: + # Run the formatter + - id: air-format + - repo: https://github.com/crate-ci/typos + rev: v1.45.0 + hooks: + - id: typos - repo: https://github.com/lorenzwalthert/precommit - rev: v0.4.3 + rev: v0.4.3.9021 hooks: - - id: style-files - args: [--style_pkg=styler, --style_fun=tidyverse_style] # codemeta must be above use-tidy-description when both are used # - id: codemeta-description-updated - id: use-tidy-description - - id: spell-check - exclude: > - (?x)^( - .*\.[rR]| - .*\.feather| - .*\.jpeg| - .*\.pdf| - .*\.png| - .*\.py| - .*\.RData| - .*\.rds| - .*\.Rds| - .*\.Rproj| - .*\.sh| - (.*/|)\.gitattributes| - (.*/|)\.gitignore| - (.*/|)\.gitlab-ci\.yml| - (.*/|)\.lintr| - (.*/|)\.pre-commit-.*| - (.*/|)\.Rbuildignore| - (.*/|)\.Renviron| - (.*/|)\.Rprofile| - (.*/|)\.travis\.yml| - (.*/|)appveyor\.yml| - (.*/|)DESCRIPTION| - (.*/|)NAMESPACE| - (.*/|)pixi\.toml| - (.*/|)pixi\.lock| - (.*/|)renv/settings\.dcf| - (.*/|)renv.*\.lock| - (.*/|)requirements.*\.in| - (.*/|)requirements.*\.txt| - (.*/|).*Dockerfile| - (.*/|)WORDLIST| - \.github/workflows/.*| - data/.*| - )$ - id: lintr - id: parsable-R - id: no-browser-statement @@ -53,9 +24,12 @@ repos: - id: deps-in-desc exclude: docker/.*|renv/.* - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.6.0 + rev: v6.0.0 hooks: - id: check-added-large-files args: ["--maxkb=200"] - id: end-of-file-fixer exclude: '\.Rd' +ci: + autofix_prs: true # set to false if we don't want fixes automatically applied by ci + autoupdate_schedule: quarterly diff --git a/R/add_cell_mito_qc.R b/R/add_cell_mito_qc.R index ab15bf85..2595ce9b 100644 --- a/R/add_cell_mito_qc.R +++ b/R/add_cell_mito_qc.R @@ -28,7 +28,7 @@ add_cell_mito_qc <- function(sce, mito, miQC = FALSE, ...) { stop("sce must be a SingleCellExperiment object") } - # check that mito is not empty, otherwise resulting colData will be innacurate + # check that mito is not empty, otherwise resulting colData will be inaccurate if (length(mito) == 0) { stop("Mitochondrial gene list not used, cannot calculate mitochondrial metrics.") } @@ -50,7 +50,6 @@ add_cell_mito_qc <- function(sce, mito, miQC = FALSE, ...) { ... ) - if (miQC) { sce <- add_miQC(sce) } diff --git a/R/processing_pcs.R b/R/processing_pcs.R index 8040cadf..35d9f453 100644 --- a/R/processing_pcs.R +++ b/R/processing_pcs.R @@ -22,7 +22,7 @@ filter_pcs <- function(pcs, batches, rename_pcs = TRUE) { # Check dimensions after filtering if (nrow(pcs) != length(batches)) { - stop("Incompatable PC and batch information dimensions after removing NAs.") + stop("Incompatible PC and batch information dimensions after removing NAs.") } # rename PCs to batches if specified @@ -34,7 +34,6 @@ filter_pcs <- function(pcs, batches, rename_pcs = TRUE) { } - #' Downsample PCs for use in integration metric calculations #' #' @param pcs The PCs to downsample, these PCs should contain batch labels as rownames @@ -55,10 +54,7 @@ downsample_pcs <- function(pcs, frac_cells, min_cells = 50) { } # Determines rows to sample - downsampled_indices <- sample(1:num_cells, - frac_cells * num_cells, - replace = FALSE - ) + downsampled_indices <- sample(1:num_cells, frac_cells * num_cells, replace = FALSE) # Extract PCs for downsample downsampled_pcs <- pcs[downsampled_indices, , drop = FALSE] diff --git a/R/sim_sce.R b/R/sim_sce.R index 1a579a2b..0461118b 100644 --- a/R/sim_sce.R +++ b/R/sim_sce.R @@ -21,7 +21,7 @@ sim_sce <- function(n_genes = 200, n_cells = 100, n_empty = 1000, n_groups = 3) stop("n_genes must be a positive number.") } if (n_cells < 1) { - stop("n_cells must be a postive number.") + stop("n_cells must be a positive number.") } # enforce some minimums n_empty <- max(n_empty, 0) diff --git a/air.toml b/air.toml new file mode 100644 index 00000000..72b07b3e --- /dev/null +++ b/air.toml @@ -0,0 +1,6 @@ +[format] +line-width = 100 +indent-width = 2 +indent-style = "space" +line-ending = "lf" +skip = ["stopifnot"] diff --git a/docker/make-requirements.sh b/docker/make-requirements.sh index 84f4875e..163a58e2 100755 --- a/docker/make-requirements.sh +++ b/docker/make-requirements.sh @@ -2,7 +2,7 @@ set -euo pipefail # This script is used to generate or update requirements/lock files for R and Python packages # Requires that the `pip-tools` python package is installed. -# Before running, make sure that the renv.lock file and installed libaries are +# Before running, make sure that the renv.lock file and installed libraries are # consistent with renv::snapshot() or renv::restore() # To upgrade packages, set the `UPGRADE_PY` environment variable as follows: diff --git a/tests/testthat/test-add_sample_metadata.R b/tests/testthat/test-add_sample_metadata.R index a794e78f..de2a2c8d 100644 --- a/tests/testthat/test-add_sample_metadata.R +++ b/tests/testthat/test-add_sample_metadata.R @@ -9,9 +9,7 @@ sample_metadata_df <- data.frame( ) test_that("`add_sample_metadata` works as expected", { - updated_sce <- add_sample_metadata(sce, - metadata_df = sample_metadata_df - ) + updated_sce <- add_sample_metadata(sce, metadata_df = sample_metadata_df) expect_equal( metadata(updated_sce)$sample_metadata, @@ -19,7 +17,7 @@ test_that("`add_sample_metadata` works as expected", { ) }) -test_that("`add_sample_metadata` fails as exepected", { +test_that("`add_sample_metadata` fails as expected", { # missing sce expect_error(add_sample_metadata( sce = "not an sce", @@ -27,18 +25,13 @@ test_that("`add_sample_metadata` fails as exepected", { )) # incorrect format for metadata_df - expect_error(add_sample_metadata(sce, - metadata_df = "not a data frame" - )) - + expect_error(add_sample_metadata(sce, metadata_df = "not a data frame")) # incorrect sample id column incorrect_metadata <- data.frame( not_sample_id = "sample_id" ) - expect_error(add_sample_metadata(sce, - metadata_df = incorrect_metadata - )) + expect_error(add_sample_metadata(sce, metadata_df = incorrect_metadata)) # sample ids don't match metadata(sce)$sample_id <- "not a sample id" diff --git a/tests/testthat/test-integrate_sces.R b/tests/testthat/test-integrate_sces.R index 2e628bfc..13890c86 100644 --- a/tests/testthat/test-integrate_sces.R +++ b/tests/testthat/test-integrate_sces.R @@ -12,10 +12,7 @@ rownames(colData(merged_sce)) <- new_rownames # Add "sample" to colData colData(merged_sce)$sample <- batches # Add in a "covariate" column for testing -colData(merged_sce)$covariate <- sample(letters[1:4], - size = 300, - replace = TRUE -) +colData(merged_sce)$covariate <- sample(letters[1:4], size = 300, replace = TRUE) # add a logcounts assay for testing (numbers don't matter) logcounts(merged_sce) <- counts(merged_sce) @@ -29,11 +26,9 @@ batch_column <- "sample" ################################################################################ - - test_that("`integrate_fastmnn` works as expected", { suppressWarnings( - # warnings are supressed here b/c simulated data plays poorly enough with + # warnings are suppressed here b/c simulated data plays poorly enough with # algorithms to trigger warnings like: ### Warning in (function (A, nv = 5, nu = nv, maxit = 1000, work = nv + 7, reorth = TRUE, : ### You're computing too large a percentage of total singular values, use a standard svd instead. @@ -75,7 +70,6 @@ test_that("`integrate_harmony` works as expected", { }) - test_that("`integrate_harmony` fails when PCs are missing", { reducedDim(merged_sce, "PCA") <- NULL expect_error( @@ -85,10 +79,7 @@ test_that("`integrate_harmony` fails when PCs are missing", { test_that("`integrate_harmony` fails when covariate columns are missing", { expect_error( - integrate_harmony(merged_sce, - batch_column, - covariate_cols = "not_a_column" - ) + integrate_harmony(merged_sce, batch_column, covariate_cols = "not_a_column") ) }) @@ -108,7 +99,6 @@ test_that("`integrate_harmony` fails when covariates and covariate_lambda don't ################################################################################ ################################################################################ - test_that("`integrate_sces` fail as expected", { # bad sce expect_error( @@ -122,10 +112,7 @@ test_that("`integrate_sces` fail as expected", { # missing batch column expect_error( - integrate_sces(merged_sce, - "fastMNN", - batch_column = "not_a_column" - ) + integrate_sces(merged_sce, "fastMNN", batch_column = "not_a_column") ) # insufficient batches @@ -163,10 +150,7 @@ test_that("`integrate_sces` works as expected for return_corrected_expression=TR # fastmnn: suppressWarnings({ # simulated-data related numerical warnings - integrated_sce <- integrate_sces(merged_sce, - "fastMNN", - return_corrected_expression = TRUE - ) + integrated_sce <- integrate_sces(merged_sce, "fastMNN", return_corrected_expression = TRUE) }) expect_equal( @@ -186,23 +170,16 @@ test_that("`integrate_sces` works as expected for return_corrected_expression=TR }) - - - test_that("`integrate_sces` works as expected with fastmnn extra arguments", { expect_no_error( suppressWarnings({ # simulated-data related numerical warnings - integrated_sce <- integrate_sces(merged_sce, - "fastMNN", - cos.norm = FALSE - ) + integrated_sce <- integrate_sces(merged_sce, "fastMNN", cos.norm = FALSE) }) ) }) - test_that("`integrate_sces` works as expected for harmony defaults", { integrated_sce <- integrate_sces( merged_sce, diff --git a/tests/testthat/test-merge_sce_list.R b/tests/testthat/test-merge_sce_list.R index 053e1799..9516a842 100644 --- a/tests/testthat/test-merge_sce_list.R +++ b/tests/testthat/test-merge_sce_list.R @@ -101,7 +101,6 @@ test_that("`prepare_sce_for_merge` works as expected when all columns are presen }) - test_that("`prepare_sce_for_merge` works as expected when an expected column is missing, no altexps", { # REMOVE "detected" column first # It should get re-added in as all NAs @@ -136,7 +135,6 @@ test_that("merging SCEs with matching genes works as expected, no altexps", { preserve_rowdata_cols = c("gene_names") ) - # correct number of genes and cells: expect_equal(nrow(merged_sce), total_genes) expect_equal(ncol(merged_sce), total_cells) @@ -230,7 +228,7 @@ test_that("merging SCEs with multiple sample ids per library to mirror cellhash sce_list <- sce_list |> purrr::map2( c("sample4", "sample5", "sample6"), - \(sce, other_sample) { + \(sce, other_sample) { metadata(sce)$sample_id <- c(metadata(sce)$sample_id, other_sample) metadata(sce)$sample_metadata <- data.frame( sample_id = paste0(metadata(sce)$sample_id, collapse = ","), @@ -240,7 +238,6 @@ test_that("merging SCEs with multiple sample ids per library to mirror cellhash } ) - # merge merged_sce <- merge_sce_list( sce_list, @@ -276,15 +273,12 @@ test_that("merging SCEs with different genes among input SCEs works as expected, # Works as expected: merged_sce <- merge_sce_list(sce_list) - # correct number of genes and cells: expect_equal(nrow(merged_sce), 6) expect_equal(ncol(merged_sce), total_cells) }) - - test_that("merging SCEs with no matching genes fails as expected, no altexps", { # ensure different gene names entirely rownames(sce_list[[1]]) <- rownames(sce_list[[2]]) @@ -301,7 +295,6 @@ test_that("merging SCEs with no matching genes fails as expected, no altexps", { }) - test_that("merging SCEs without names works as expected, no altexps", { # First make sure it generates a warning - expect_warning( @@ -338,11 +331,12 @@ test_that("merging SCEs with library metadata fails as expected, no altexps", { ## helper function to add an altExp to a simulated SCE ---- add_sce_altexp <- function( - sce, - batch, - altexp_name, - num_altexp_features, - n_cells) { + sce, + batch, + altexp_name, + num_altexp_features, + n_cells +) { sce_alt <- sim_sce( n_genes = num_altexp_features, n_cells = n_cells, @@ -403,7 +397,6 @@ sce_list_with_altexp <- sce_list |> full_altexp_features <- rownames(altExp(sce_list_with_altexp[[1]])) - test_that("prepare_sce_for_merge() works as expected with is_altexp=TRUE", { test_altexp <- altExp(sce_list_with_altexp[[1]]) prepared_altexp <- prepare_sce_for_merge( @@ -424,11 +417,13 @@ test_that("prepare_sce_for_merge() works as expected with is_altexp=TRUE", { # column names should be unchanged expect_equal( - colnames(prepared_altexp), colnames(test_altexp) + colnames(prepared_altexp), + colnames(test_altexp) ) expect_equal( - colnames(colData(prepared_altexp)), c("batch", "cell") + colnames(colData(prepared_altexp)), + c("batch", "cell") ) }) @@ -479,9 +474,12 @@ test_that("merging SCEs with altExps has correct altExp colData names when retai # test correct altExp rowData names expected_cols <- c( "target_type", - "sce1-feature_column", "sce1-other_column", - "sce2-feature_column", "sce2-other_column", - "sce3-feature_column", "sce3-other_column" + "sce1-feature_column", + "sce1-other_column", + "sce2-feature_column", + "sce2-other_column", + "sce3-feature_column", + "sce3-other_column" ) observed_cols <- altExp(merged_sce) |> rowData() |> @@ -538,7 +536,6 @@ test_that("merging SCEs with 1 altexp and same features works as expected, with merged_altexp <- altExp(merged_sce) - expect_true(altExpNames(merged_sce) == altexp_name) expect_equal(dim(merged_altexp), c(num_altexp_features, total_cells)) expect_equal(rownames(merged_altexp), full_altexp_features) @@ -569,13 +566,10 @@ test_that("merging SCEs with 1 altexp and same features works as expected, with }) - - test_that("merging SCEs with 1 altexp but different features fails as expected, with altexps", { # keep only the first 3 features from the first SCE altExp(sce_list_with_altexp[[1]]) <- altExp(sce_list_with_altexp[[1]])[1:3, ] - expect_error( merge_sce_list( sce_list_with_altexp, @@ -589,12 +583,10 @@ test_that("merging SCEs with 1 altexp but different features fails as expected, }) - - test_that("merging SCEs where 1 altExp is missing works as expected, with altexps", { sce_list_with_altexp$sce4 <- sce_list[[1]] - # update the metdata list with sce4 name + # update the metadata list with sce4 name metadata(sce_list_with_altexp$sce4) <- list( library_id = "library-sce4", sample_id = "sample-sce4", @@ -633,7 +625,6 @@ test_that("merging SCEs where 1 altExp is missing works as expected, with altexp glue::glue("sample-{names(sce_list_with_altexp)}") ) - expect_setequal( # all but sce4 contain all metadata components altexp_metadata$library_metadata[-4] |> @@ -648,7 +639,6 @@ test_that("merging SCEs where 1 altExp is missing works as expected, with altexp c("library_id", "sample_id") ) - expect_true( is.null(altexp_metadata$library_metadata$sce4$ambient_profile) & is.null(altexp_metadata$library_metadata$sce4$mapped_reads) @@ -772,7 +762,6 @@ test_that("merging SCEs with different altExps works as expected; each SCE has 1 }) - test_that("merging SCEs with different altExps works as expected; each SCE has 2 different altExps", { other_altexp_name <- "other" other_n_features <- 3 @@ -866,17 +855,17 @@ test_that("merging SCEs with different altExps works as expected; each SCE has 2 }) - - ## Other tests ------------------ test_that("get_altexp_attributes passes when it should pass", { attribute_list <- get_altexp_attributes(sce_list_with_altexp) expect_equal( - attribute_list[["adt"]][["assays"]], c("counts", "logcounts") + attribute_list[["adt"]][["assays"]], + c("counts", "logcounts") ) expect_equal( - attribute_list[["adt"]][["features"]], full_altexp_features + attribute_list[["adt"]][["features"]], + full_altexp_features ) }) @@ -896,7 +885,7 @@ test_that("check_metadata throws an error when a field is missing", { test_that("get_altexp_metadata returns the correct values", { expected_list <- list( "library_id" = "library-sce1", - "sample_id" = "sample-sce1" + "sample_id" = "sample-sce1" ) observed_list <- get_altexp_metadata(sce_list[[1]], "MISSING_ALTEXP_NAME") expect_equal(observed_list, expected_list)