-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfinal_script.py
More file actions
91 lines (71 loc) · 2.99 KB
/
final_script.py
File metadata and controls
91 lines (71 loc) · 2.99 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import os
import numpy as np
import pandas as pd
import scanpy as sc
import loompy as lp
# ----------------------------------------------------------------------------------------
################ HET_sample ##################
# Path to the cleaned gene expression matrix (genes x cells) for the HET condition
f_exprMat = 'expr_mat_HET_clean.tsv'
# Load the expression matrix as an AnnData object using Scanpy
# Rows correspond to genes and columns to cells
adata = sc.read_text(f_exprMat, delimiter='\t', first_column_names=True)
# Define row attributes for the loom file (gene-level metadata)
row_attrs = {
# Gene names extracted from the AnnData object
"Gene": np.array(adata.var.index),
}
# Define column attributes for the loom file (cell-level metadata)
col_attrs = {
# Unique cell identifiers
"CellID": np.array(adata.obs.index),
# Number of detected genes per cell (genes with non-zero expression)
"nGene": np.array(np.sum(adata.X.transpose() > 0, axis=0)).flatten(),
# Total number of UMIs per cell (sum of expression values)
"nUMI": np.array(np.sum(adata.X.transpose(), axis=0)).flatten(),
}
# Output path for the unfiltered loom file
f_loom_path_unfilt = 'expr_mat_HET_clean.loom'
# Create the loom file required for downstream pySCENIC analysis
# Note: matrix is transposed to match loom format (genes x cells)
lp.create(f_loom_path_unfilt, adata.X.transpose(), row_attrs, col_attrs)
# ----------------------------------------------------------------------------------------
################ KO_sample ##################
# Path to the cleaned gene expression matrix for the KO condition
f_exprMat = 'expr_mat_KO_clean.tsv'
# Load expression matrix
adata = sc.read_text(f_exprMat, delimiter='\t', first_column_names=True)
# Gene-level metadata
row_attrs = {
"Gene": np.array(adata.var.index),
}
# Cell-level metadata
col_attrs = {
"CellID": np.array(adata.obs.index),
"nGene": np.array(np.sum(adata.X.transpose() > 0, axis=0)).flatten(),
"nUMI": np.array(np.sum(adata.X.transpose(), axis=0)).flatten(),
}
# Output loom file for KO sample
f_loom_path_unfilt = 'expr_mat_KO_clean.loom'
# Create loom file
lp.create(f_loom_path_unfilt, adata.X.transpose(), row_attrs, col_attrs)
# ----------------------------------------------------------------------------------------
################ WT_sample ##################
# Path to the cleaned gene expression matrix for the WT condition
f_exprMat = 'expr_mat_WT_clean.tsv'
# Load expression matrix
adata = sc.read_text(f_exprMat, delimiter='\t', first_column_names=True)
# Gene-level metadata
row_attrs = {
"Gene": np.array(adata.var.index),
}
# Cell-level metadata
col_attrs = {
"CellID": np.array(adata.obs.index),
"nGene": np.array(np.sum(adata.X.transpose() > 0, axis=0)).flatten(),
"nUMI": np.array(np.sum(adata.X.transpose(), axis=0)).flatten(),
}
# Output loom file for WT sample
f_loom_path_unfilt = 'expr_mat_WT_clean.loom'
# Create loom file
lp.create(f_loom_path_unfilt, adata.X.transpose(), row_attrs, col_attrs)