XeniumCluster/xenium_BayesSpace.R at main · prob-ml/XeniumCluster · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
# if (!requireNamespace("BiocManager", quietly = TRUE)) {
#     install.packages("BiocManager")
#     BiocManager::install("BayesSpace")
# }

library(SingleCellExperiment)
library(ggplot2)
library(BayesSpace)
library(Matrix)

# sce <- readVisium("data/sce")

BayesSpace <- function(dataset="hBreast", SPOT_SIZE=100, init_method="mclust", num_pcs=15, K=NULL, grid_search=TRUE) {

  rowData <- read.csv(paste0("data/BayesSpace/", dataset, "_rowData_SPOT_SIZE=", SPOT_SIZE, ".csv"), stringsAsFactors=FALSE, row.names=1)
  colData <- read.csv(paste0("data/BayesSpace/", dataset, "_colData_SPOT_SIZE=", SPOT_SIZE, ".csv"), stringsAsFactors=FALSE, row.names=1)
  countsData <- t(read.csv(paste0("data/BayesSpace/", dataset, "_counts_SPOT_SIZE=", SPOT_SIZE, ".csv"), row.names=1, check.names=F, stringsAsFactors=FALSE))

  # Create unique row names from the first column, then remove it from countsData
  rownames(countsData) <- rownames(rowData)
  colnames(countsData) <- rownames(colData)

  if (grid_search) {
    gamma_list <- seq(1.0, 3.0, by = 0.25)
  } else {
    gamma_list <- c(2)
  }

  for (gamma in gamma_list) {

    is_already_log_transformed = TRUE
    if (is_already_log_transformed) {
      sce <- SingleCellExperiment(assays=list(logcounts=as(as.matrix(countsData), "dgCMatrix")),
                                  rowData=rowData,
                                  colData=colData)
    } else {
      sce <- SingleCellExperiment(assays=list(counts=as(as.matrix(countsData), "dgCMatrix")),
                                  rowData=rowData,
                                  colData=colData)
    }

    set.seed(102)

    if (dataset == "SYNTHETIC") {
      pca_data <- t(countsData)
      rownames(pca_data) <- colnames(sce)  # Ensure row names match sce colnames
      reducedDims(sce) <- list(PCA=pca_data)
    } else {
      if (is_already_log_transformed) {
        sce <- spatialPreprocess(sce, platform="ST",
                                 n.PCs=num_pcs, n.HVGs=2000, log.normalize=FALSE)
      } else {
        sce <- spatialPreprocess(sce, platform="ST",
                                 n.PCs=num_pcs, n.HVGs=2000, log.normalize=TRUE)
      }
    }

    sce <- sce[, !grepl("^(BLANK_|NegControl)", colnames(sce))]

    set.seed(149)
    if (is.null(K)) {
      sce <- qTune(sce, qs=seq(2, 15), platform="ST", d=num_pcs)
      qPlot(sce)
      q_optimal <- attr(sce, "q.logliks")[which.max(attr(sce, "q.logliks")$loglik),]$q
    }
    else
      q_optimal <- K

    NREP = 1000
    sce <- spatialCluster(sce, q=q_optimal, platform="ST", d=num_pcs,
                          init.method=init_method, model="t", gamma=gamma,
                          nrep=NREP, burn.in=100,
                          save.chain=TRUE)
    chain <- mcmcChain(sce)
    final_means <- chain[(NREP-100):NREP, 26:50]
    final_scales <- chain[(NREP-100):NREP, 1:25]

    dir_path <- paste0("results/", dataset, "/BayesSpace/", num_pcs, "/", q_optimal, "/clusters/", init_method, "/", SPOT_SIZE, "/", format(gamma, nsmall = 2))
    if (!dir.exists(dir_path)) {
      dir.create(dir_path, recursive = TRUE)
    }

    means_path <- paste0(dir_path, "/means")
    if (!dir.exists(means_path)) {
      dir.create(means_path, recursive = TRUE)
    }

    scales_path <- paste0(dir_path, "/scales")
    if (!dir.exists(scales_path)) {
      dir.create(scales_path, recursive = TRUE)
    }
    cluster_data <- data.frame(sce$spatial.cluster)
    colnames(cluster_data) <- c("BayesSpace cluster")
    write.csv(cluster_data, paste0(dir_path, "/clusters_K=", q_optimal, ".csv"), row.names = TRUE)
    write.csv(final_means, paste0(dir_path, "/means", "/clusters_K=", q_optimal, ".csv"), row.names = TRUE)
    write.csv(final_scales, paste0(dir_path, "/scales", "/clusters_K=", q_optimal, ".csv"), row.names = TRUE)

    clusterPlot(sce, palette=c("purple", "red", "blue", "yellow"), color="black") +
      theme_bw() +
      xlab("Column") +
      ylab("Row") +
      labs(fill="BayesSpace\ncluster", title="Spatial clustering of ST_mel1_rep2")
  }
  return(sce$spatial.cluster)
}

args <- commandArgs(trailingOnly = TRUE)
dataset_name <- args[1]
SPOT_SIZE <- as.numeric(args[2])
init_method <- args[3]
num_pcs <- as.numeric(args[4])
K <- ifelse(length(args) > 4, as.numeric(args[5]), NULL)
grid_search <- ifelse(length(args) > 5, as.logical(args[6]), NULL)
cat(c(SPOT_SIZE, num_pcs, K))
BayesSpace(dataset_name, SPOT_SIZE, init_method, num_pcs, K, grid_search)