|
| 1 | +import numpy as np |
| 2 | +import pandas as pd |
| 3 | + |
| 4 | +try: |
| 5 | + import rpy2.robjects as robjects |
| 6 | + from rpy2.robjects import pandas2ri |
| 7 | + from rpy2.robjects.conversion import localconverter |
| 8 | + |
| 9 | + RPY2_AVAILABLE = True |
| 10 | +except ImportError: |
| 11 | + RPY2_AVAILABLE = False |
| 12 | + |
| 13 | + |
| 14 | +def thr_AUCell(df_path, pathway_names=None, sample_names=None): |
| 15 | + """ |
| 16 | + Calculate activity thresholds for pathways using AUCell's thresholding algorithm. |
| 17 | +
|
| 18 | + Parameters |
| 19 | + ---------- |
| 20 | + df_path : pd.DataFrame or np.ndarray |
| 21 | + Pathway activity scores (rows: pathways, columns: samples). |
| 22 | + pathway_names : list, optional |
| 23 | + Names for pathways (rows). Used if df_path is numpy array. |
| 24 | + sample_names : list, optional |
| 25 | + Names for samples (columns). Used if df_path is numpy array. |
| 26 | +
|
| 27 | + Returns |
| 28 | + ------- |
| 29 | + dict |
| 30 | + Dictionary mapping pathway names to threshold activity score. |
| 31 | + """ |
| 32 | + if not RPY2_AVAILABLE: |
| 33 | + raise ImportError("rpy2 is required for AUCell thresholding.") |
| 34 | + # Convert input to DataFrame if needed |
| 35 | + if isinstance(df_path, np.ndarray): |
| 36 | + n_pathways, n_samples = df_path.shape |
| 37 | + if pathway_names is None: |
| 38 | + pathway_names = [f"pathway_{i}" for i in range(n_pathways)] |
| 39 | + if sample_names is None: |
| 40 | + sample_names = [f"sample_{i}" for i in range(n_samples)] |
| 41 | + df = pd.DataFrame(df_path, index=pathway_names, columns=sample_names) |
| 42 | + else: |
| 43 | + df = df_path.copy() |
| 44 | + if df.index is None or not hasattr(df.index, "size") or df.index.size == 0: |
| 45 | + df = df.copy() |
| 46 | + df.index = pd.Index([f"pathway_{i}" for i in range(df.shape[0])]) |
| 47 | + |
| 48 | + # Define R function for AUCell thresholding (single pathway) |
| 49 | + r_code = """ |
| 50 | + auc_threshold_single <- function(scores) { |
| 51 | + suppressMessages({ |
| 52 | + library(AUCell) |
| 53 | + }) |
| 54 | + res <- AUCell:::.auc_assignmnetThreshold_v6(as.matrix(scores), plotHist=FALSE) |
| 55 | + return(res$selected) |
| 56 | + } |
| 57 | + """ |
| 58 | + robjects.r(r_code) |
| 59 | + r_auc_threshold_single = robjects.globalenv["auc_threshold_single"] |
| 60 | + |
| 61 | + thresholds = {} |
| 62 | + with localconverter(robjects.default_converter + pandas2ri.converter): |
| 63 | + for pathway in df.index: |
| 64 | + # Pass as matrix (1 row, n_samples columns) |
| 65 | + scores_matrix = pd.DataFrame([df.loc[pathway].values], columns=df.columns) |
| 66 | + r_matrix = robjects.conversion.py2rpy(scores_matrix) |
| 67 | + thr = r_auc_threshold_single(r_matrix)[0] |
| 68 | + thresholds[pathway] = thr |
| 69 | + return thresholds |
0 commit comments