Skip to content

Get an error when impute with "condition" #10

@Freddsle

Description

@Freddsle

Here:

https://github.com/daisybio/PRONE/blob/86f4db6f9ef0ed30621f877d89810f88322ebee0/R/Imputation.R#L35C6-L35C46

Probably, because of the argument to get() is the column vector - the condition became dt$condition, not the string "condition").

How to reproduce and possibly fix:

# ---- packages ----
library(SummarizedExperiment)
library(dplyr)
library(data.table)

# ---- fake proteomics-like data ----
set.seed(1)
genes   <- paste0("P", sprintf("%05d", 1:5))                 # 5 proteins
samples <- c(paste0("lab_A_", 1:3), paste0("lab_B_", 1:3))   # 6 samples

mat <- matrix(rnorm(length(genes) * length(samples)),
              nrow = length(genes),
              dimnames = list(genes, samples))

# Induce MNAR-by-condition: proteins fully missing within a condition
mat["P00002", samples[1:3]] <- NA_real_   # missing in all A
mat["P00004", samples[4:6]] <- NA_real_   # missing in all B

# ---- sample annotation (colData) ----
cd <- data.frame(
  file      = samples,
  lab       = sub("(_.*)$", "", samples),
  condition = rep(c("A","B"), each = 3),
  Repl      = rep(1:3, times = 2),
  row.names = samples,
  stringsAsFactors = FALSE
)
cd$Column <- rownames(cd)   # join key matching colnames(se)

# ---- build SE ----
se <- SummarizedExperiment(
  assays  = list(raw = mat),
  colData = cd
)

# ---- reshape to long + join ----
ain <- "raw"
dt <- as.data.table(assays(se)[[ain]])
dt$ID <- rownames(dt)
dt <- melt(dt, id.vars = "ID", variable.name = "Column", value.name = "Intensity")

coldata <- as.data.table(colData(se))
dt <- merge(dt, coldata, by = "Column")

# sanity: keys line up
stopifnot(identical(sort(unique(dt$Column)), sort(colnames(se))))

# ---- column name ----
condition <- "condition"
message("Using condition: ", condition)

# ---- 1) Reproduce  ----
bad <- tryCatch({
  dt %>%
    group_by(ID, get(condition)) %>%           # <-- this is the bug
    summarize(NAs = all(is.na(Intensity))) %>%
    filter(NAs) %>%
    distinct(ID) %>%
    pull(ID)
}, error = function(e) {
  message("Expected error: ", e$message)
  NULL
})

# ---- 2) Fix: ----
ok <- dt %>%
  group_by(ID, !!sym(condition)) %>%         # <-- FIX
  summarize(NAs = all(is.na(Intensity)), .groups = "drop") %>%
  filter(NAs) %>%
  distinct(ID) %>%
  pull(ID)

print(ok)   # Proteins MNAR within a condition (e.g., "P00002", "P00004")

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions