diff --git a/NiBAx/core/muse_qc.py b/NiBAx/core/muse_qc.py new file mode 100644 index 000000000..46bf8a24d --- /dev/null +++ b/NiBAx/core/muse_qc.py @@ -0,0 +1,145 @@ +import pandas as pd +import numpy as np +import sklearn.decomposition +from sklearn.decomposition import PCA +from sys import argv +from sklearn.preprocessing import StandardScaler + +def getMHD_from_RefROIs(ROIs, ROIsReference, n_components=3): + + ''' + Usage Information + + ROIs: pandas dataframe or numpy array with numeric values + + ROIsReference: numpy array with numeric values (will be used to calculate scaler and PCA) + + Returns: MHD result, reference-based scaler and reference-based PCA model + + ''' + + # check ROIs's type; if dataframe, convert to numpy array; if numpy array, continue; if else, exit + if isinstance(ROIs, pd.DataFrame): + ROIs = ROIs.to_numpy() + elif isinstance(ROIs, np.ndarray): + pass + else: + print("Please check input ROIs file type! Expecting pandas dataframe or numpy array!") + exit(1) + + # check if ROIs is numeric; will get error if can't be converted to numeric + ROIs = ROIs.astype(float) + + # check if the number of columns is larger than n_components + n_col = ROIs.shape[1] + if n_col <= n_components: + print("Please make sure the number of columns is larger than n_components !") + exit(1) + + # check if ROIsReference is a numpy array; if yes, then compute PCA object using ROIsRefenece as data and apply model on ROIs; if is a PCA object, apply model on ROIs + if isinstance(ROIsReference, np.ndarray): + + if ROIsReference.shape[1] > n_components: + # standardize ROIsReference and fit the model + scaler = StandardScaler() + ROIsReference_S = scaler.fit_transform(ROIsReference) + + pca = PCA(n_components = n_components,svd_solver='randomized',random_state=100) + pca_ref = pca.fit(ROIsReference_S) + + # standardize ROIs and apply model + ROIs_S = scaler.transform(ROIs) + ROIs_transformed = pca_ref.transform(ROIs_S) + + else: + print("Please check your ROIsReference!") + exit(1) + + else: + print("Please check your ROIsReference file type!") + exit(1) + + + # calculateMahalanobis + result = getMahalanobis(ROIs_transformed) + + + # return values, ref-based scaler and ref-based model + return result, scaler, pca_ref + + +def getMHD_from_RefModel(ROIs, ReferenceScaler, ReferenceModel, n_components=3): + + ''' + Usage Information + + ROIs: pandas dataframe or numpy array with numeric values + + ReferenceScaler: scaler object + + ReferenceModel: PCA object (numpy array) with column number equals to n_components + + Returns: MHD result + ''' + + # check ROIs's type; if dataframe, convert to numpy array; if numpy array, continue; if else, exit + if isinstance(ROIs, pd.DataFrame): + ROIs = ROIs.to_numpy() + elif isinstance(ROIs, np.ndarray): + pass + else: + print("Please check input ROIs file type! Expecting pandas dataframe or numpy array!") + exit(1) + + # check if ROIs is numeric; will get error if can't be converted to numeric + ROIs = ROIs.astype(float) + + # check if the number of columns is larger than n_components + n_col = ROIs.shape[1] + if n_col <= n_components: + print("Please make sure the number of columns is larger than n_components !") + exit(1) + + # check if ReferenceScaler is correct type + if isinstance(ReferenceScaler,sklearn.preprocessing._data.StandardScaler) is False: + print("Please check your ReferenceScaler!") + exit(1) + + # check if ReferenceModel is correct type + if isinstance(ReferenceModel,sklearn.decomposition._pca.PCA) is False: + print("Please check your ReferenceModel!") + exit(1) + + # standardize ROIs and apply model + ROIs_S = ReferenceScaler.fit_transform(ROIs) + ROIs_transformed = ReferenceModel.transform(ROIs_S) + + + # calculateMahalanobis + result = getMahalanobis(ROIs_transformed) + + + # return values + return result + +def getMahalanobis(ROIs_transformed): + import scipy as sp + from scipy.spatial.distance import mahalanobis + + # removing subjects with inf values + ROIs_transformed_red = ROIs_transformed[np.where((ROIs_transformed.mean(axis=1) != np.inf) & (ROIs_transformed.mean(axis=1) != np.nan))[0],:] + + # calculate mean values of the roi values + ROIs_transformed_mean = ROIs_transformed_red.mean(axis=0 ) + + # calculate inverse of the covariance matrix + VI = sp.linalg.inv( np.cov(ROIs_transformed_red, rowvar=False)) + + # calculate squared mahalanobis distance + md = np.zeros(ROIs_transformed.shape[0]) + for i in range(ROIs_transformed.shape[0]): + md[i] = mahalanobis( ROIs_transformed_mean, ROIs_transformed[i], VI) ** 2 + + return np.sqrt(md) + + diff --git a/NiBAx/tests/test_muse_qc.py b/NiBAx/tests/test_muse_qc.py new file mode 100644 index 000000000..bc721af58 --- /dev/null +++ b/NiBAx/tests/test_muse_qc.py @@ -0,0 +1,41 @@ +import pandas as pd +import numpy as np +import sklearn.decomposition +from sklearn.decomposition import PCA +from sys import argv +from sklearn.preprocessing import StandardScaler +from joblib import dump +from joblib import load +import muse_qc + +# test on v1.1/istaging.pkl.gz MUSE ROIs +data = pd.read_pickle("istaging.pkl.gz") +data_notnull=data[data.MUSE_Volume_702.notna()] +data_nutnull_muse = data_notnull.loc[:, 'MUSE_Volume_702':'MUSE_Volume_207'] +data_nutnull_muse.reset_index(inplace=True,drop=True) +data_nutnull_muse.shape + +# randomly split data into test and reference +reference=data_nutnull_muse.sample(frac=0.6,random_state=200) #random state is a seed value +test=data_nutnull_muse.drop(reference.index) + +#convert to numpy array +roi_data_prepare_np = test.to_numpy() +ref_data_prepare_np = reference.to_numpy() + +#get MHD results, reference-based scaler and reference-based PCA model by providing test ROIs and reference ROIs +res, ref_scaler, ref_model = muse_qc.getMHD_from_RefROIs(roi_data_prepare_np, ref_data_prepare_np,3) + +#save reference-based scaler and reference-based model +dump(ref_scaler, 'ref_based_scaler.joblib') +dump(ref_model, 'ref_based_PCA_model.joblib') + +#load reference-based scaler and reference-based model +ref_scaler = load('ref_based_scaler.joblib') +ref_model = load('ref_based_PCA_model.joblib') + +#get MHD results by providing test ROIs, reference-based scaler and reference-based PCA model +res2 = muse_qc.getMHD_from_RefModel(roi_data_prepare_np, ref_scaler,ref_model,3) + + +