-
Notifications
You must be signed in to change notification settings - Fork 3
Open
Description
Here:
Probably, because of the argument to get() is the column vector - the condition became dt$condition, not the string "condition").
How to reproduce and possibly fix:
# ---- packages ----
library(SummarizedExperiment)
library(dplyr)
library(data.table)
# ---- fake proteomics-like data ----
set.seed(1)
genes <- paste0("P", sprintf("%05d", 1:5)) # 5 proteins
samples <- c(paste0("lab_A_", 1:3), paste0("lab_B_", 1:3)) # 6 samples
mat <- matrix(rnorm(length(genes) * length(samples)),
nrow = length(genes),
dimnames = list(genes, samples))
# Induce MNAR-by-condition: proteins fully missing within a condition
mat["P00002", samples[1:3]] <- NA_real_ # missing in all A
mat["P00004", samples[4:6]] <- NA_real_ # missing in all B
# ---- sample annotation (colData) ----
cd <- data.frame(
file = samples,
lab = sub("(_.*)$", "", samples),
condition = rep(c("A","B"), each = 3),
Repl = rep(1:3, times = 2),
row.names = samples,
stringsAsFactors = FALSE
)
cd$Column <- rownames(cd) # join key matching colnames(se)
# ---- build SE ----
se <- SummarizedExperiment(
assays = list(raw = mat),
colData = cd
)
# ---- reshape to long + join ----
ain <- "raw"
dt <- as.data.table(assays(se)[[ain]])
dt$ID <- rownames(dt)
dt <- melt(dt, id.vars = "ID", variable.name = "Column", value.name = "Intensity")
coldata <- as.data.table(colData(se))
dt <- merge(dt, coldata, by = "Column")
# sanity: keys line up
stopifnot(identical(sort(unique(dt$Column)), sort(colnames(se))))
# ---- column name ----
condition <- "condition"
message("Using condition: ", condition)
# ---- 1) Reproduce ----
bad <- tryCatch({
dt %>%
group_by(ID, get(condition)) %>% # <-- this is the bug
summarize(NAs = all(is.na(Intensity))) %>%
filter(NAs) %>%
distinct(ID) %>%
pull(ID)
}, error = function(e) {
message("Expected error: ", e$message)
NULL
})
# ---- 2) Fix: ----
ok <- dt %>%
group_by(ID, !!sym(condition)) %>% # <-- FIX
summarize(NAs = all(is.na(Intensity)), .groups = "drop") %>%
filter(NAs) %>%
distinct(ID) %>%
pull(ID)
print(ok) # Proteins MNAR within a condition (e.g., "P00002", "P00004")
Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
No labels