From 89ac080d96f8ca78ec31393ebf0f2017775de1be Mon Sep 17 00:00:00 2001 From: yuhancui <43224134+yuhancui@users.noreply.github.com> Date: Mon, 10 Jan 2022 15:19:08 -0500 Subject: [PATCH 1/3] Add files via upload Add MUSE quality check test script. This script tried two functions: muse_qc.getMHD_from_RefROIs() and muse_qc.getMHD_from_RefModel() using v1.1/istaging.pkl.gz MUSE ROIs data. --- NiBAx/tests/test_muse_qc.py | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 NiBAx/tests/test_muse_qc.py diff --git a/NiBAx/tests/test_muse_qc.py b/NiBAx/tests/test_muse_qc.py new file mode 100644 index 000000000..62fe6540f --- /dev/null +++ b/NiBAx/tests/test_muse_qc.py @@ -0,0 +1,34 @@ +import pandas as pd +import numpy as np +import sklearn.decomposition +from sklearn.decomposition import PCA +from sys import argv +from sklearn.preprocessing import StandardScaler +import muse_qc + +# test on v1.1/istaging.pkl.gz MUSE ROIs +data = pd.read_pickle("istaging.pkl.gz") +data_notnull=data[data.MUSE_Volume_702.notna()] +data_nutnull_muse = data_notnull.loc[:, 'MUSE_Volume_702':'MUSE_Volume_207'] +data_nutnull_muse.reset_index(inplace=True,drop=True) +data_nutnull_muse.shape + +# randomly split data into test and reference +reference=data_nutnull_muse.sample(frac=0.6,random_state=200) #random state is a seed value +test=data_nutnull_muse.drop(reference.index) + +#convert to numpy array +roi_data_prepare_np = test.to_numpy() +ref_data_prepare_np = reference.to_numpy() + +#get MHD results, reference-based scaler and reference-based PCA model by providing test ROIs and reference ROIs +res, ref_scaler, ref_model = muse_qc.getMHD_from_RefROIs(roi_data_prepare_np, ref_data_prepare_np,3) + +#get MHD results by providing test ROIs, reference-based scaler and reference-based PCA model +scaler_test = StandardScaler() +ROIsReference_test = scaler_test.fit_transform(ref_data_prepare_np) +pca = PCA(n_components = 3,svd_solver='randomized',random_state=100) +pca_ref = pca.fit(ROIsReference_test) +res2 = muse_qc.getMHD_from_RefModel(roi_data_prepare_np, scaler_test,pca_ref,3) + + From 5f78d6b9d50154707fe53b8c28044e2989bbb495 Mon Sep 17 00:00:00 2001 From: yuhancui <43224134+yuhancui@users.noreply.github.com> Date: Mon, 10 Jan 2022 15:26:53 -0500 Subject: [PATCH 2/3] Add files via upload Update muse_qc.py with 2 functions: 1. getMHD_from_RefROIs() -Accepts inputROis, refROIs, and n_components (default=3) -Returns MHD result, ref-based scaler and ref-based PCA model 2. getMHD_from_RefModel() -Accepts inputROIs, ref-based scaler, ref-based PCA model and n_components (default=3) -Returns MHD result --- NiBAx/core/muse_qc.py | 145 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 145 insertions(+) create mode 100644 NiBAx/core/muse_qc.py diff --git a/NiBAx/core/muse_qc.py b/NiBAx/core/muse_qc.py new file mode 100644 index 000000000..46bf8a24d --- /dev/null +++ b/NiBAx/core/muse_qc.py @@ -0,0 +1,145 @@ +import pandas as pd +import numpy as np +import sklearn.decomposition +from sklearn.decomposition import PCA +from sys import argv +from sklearn.preprocessing import StandardScaler + +def getMHD_from_RefROIs(ROIs, ROIsReference, n_components=3): + + ''' + Usage Information + + ROIs: pandas dataframe or numpy array with numeric values + + ROIsReference: numpy array with numeric values (will be used to calculate scaler and PCA) + + Returns: MHD result, reference-based scaler and reference-based PCA model + + ''' + + # check ROIs's type; if dataframe, convert to numpy array; if numpy array, continue; if else, exit + if isinstance(ROIs, pd.DataFrame): + ROIs = ROIs.to_numpy() + elif isinstance(ROIs, np.ndarray): + pass + else: + print("Please check input ROIs file type! Expecting pandas dataframe or numpy array!") + exit(1) + + # check if ROIs is numeric; will get error if can't be converted to numeric + ROIs = ROIs.astype(float) + + # check if the number of columns is larger than n_components + n_col = ROIs.shape[1] + if n_col <= n_components: + print("Please make sure the number of columns is larger than n_components !") + exit(1) + + # check if ROIsReference is a numpy array; if yes, then compute PCA object using ROIsRefenece as data and apply model on ROIs; if is a PCA object, apply model on ROIs + if isinstance(ROIsReference, np.ndarray): + + if ROIsReference.shape[1] > n_components: + # standardize ROIsReference and fit the model + scaler = StandardScaler() + ROIsReference_S = scaler.fit_transform(ROIsReference) + + pca = PCA(n_components = n_components,svd_solver='randomized',random_state=100) + pca_ref = pca.fit(ROIsReference_S) + + # standardize ROIs and apply model + ROIs_S = scaler.transform(ROIs) + ROIs_transformed = pca_ref.transform(ROIs_S) + + else: + print("Please check your ROIsReference!") + exit(1) + + else: + print("Please check your ROIsReference file type!") + exit(1) + + + # calculateMahalanobis + result = getMahalanobis(ROIs_transformed) + + + # return values, ref-based scaler and ref-based model + return result, scaler, pca_ref + + +def getMHD_from_RefModel(ROIs, ReferenceScaler, ReferenceModel, n_components=3): + + ''' + Usage Information + + ROIs: pandas dataframe or numpy array with numeric values + + ReferenceScaler: scaler object + + ReferenceModel: PCA object (numpy array) with column number equals to n_components + + Returns: MHD result + ''' + + # check ROIs's type; if dataframe, convert to numpy array; if numpy array, continue; if else, exit + if isinstance(ROIs, pd.DataFrame): + ROIs = ROIs.to_numpy() + elif isinstance(ROIs, np.ndarray): + pass + else: + print("Please check input ROIs file type! Expecting pandas dataframe or numpy array!") + exit(1) + + # check if ROIs is numeric; will get error if can't be converted to numeric + ROIs = ROIs.astype(float) + + # check if the number of columns is larger than n_components + n_col = ROIs.shape[1] + if n_col <= n_components: + print("Please make sure the number of columns is larger than n_components !") + exit(1) + + # check if ReferenceScaler is correct type + if isinstance(ReferenceScaler,sklearn.preprocessing._data.StandardScaler) is False: + print("Please check your ReferenceScaler!") + exit(1) + + # check if ReferenceModel is correct type + if isinstance(ReferenceModel,sklearn.decomposition._pca.PCA) is False: + print("Please check your ReferenceModel!") + exit(1) + + # standardize ROIs and apply model + ROIs_S = ReferenceScaler.fit_transform(ROIs) + ROIs_transformed = ReferenceModel.transform(ROIs_S) + + + # calculateMahalanobis + result = getMahalanobis(ROIs_transformed) + + + # return values + return result + +def getMahalanobis(ROIs_transformed): + import scipy as sp + from scipy.spatial.distance import mahalanobis + + # removing subjects with inf values + ROIs_transformed_red = ROIs_transformed[np.where((ROIs_transformed.mean(axis=1) != np.inf) & (ROIs_transformed.mean(axis=1) != np.nan))[0],:] + + # calculate mean values of the roi values + ROIs_transformed_mean = ROIs_transformed_red.mean(axis=0 ) + + # calculate inverse of the covariance matrix + VI = sp.linalg.inv( np.cov(ROIs_transformed_red, rowvar=False)) + + # calculate squared mahalanobis distance + md = np.zeros(ROIs_transformed.shape[0]) + for i in range(ROIs_transformed.shape[0]): + md[i] = mahalanobis( ROIs_transformed_mean, ROIs_transformed[i], VI) ** 2 + + return np.sqrt(md) + + From 550856543a1dd409770a846bf03bee2ecdc7bc81 Mon Sep 17 00:00:00 2001 From: yuhancui <43224134+yuhancui@users.noreply.github.com> Date: Tue, 11 Jan 2022 14:14:28 -0500 Subject: [PATCH 3/3] Update test_muse_qc.py Use output (ref-scaler, ref-model) from getMHD_from_RefROIs() as input for getMHD_from_RefModel(). --- NiBAx/tests/test_muse_qc.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/NiBAx/tests/test_muse_qc.py b/NiBAx/tests/test_muse_qc.py index 62fe6540f..bc721af58 100644 --- a/NiBAx/tests/test_muse_qc.py +++ b/NiBAx/tests/test_muse_qc.py @@ -4,6 +4,8 @@ from sklearn.decomposition import PCA from sys import argv from sklearn.preprocessing import StandardScaler +from joblib import dump +from joblib import load import muse_qc # test on v1.1/istaging.pkl.gz MUSE ROIs @@ -24,11 +26,16 @@ #get MHD results, reference-based scaler and reference-based PCA model by providing test ROIs and reference ROIs res, ref_scaler, ref_model = muse_qc.getMHD_from_RefROIs(roi_data_prepare_np, ref_data_prepare_np,3) +#save reference-based scaler and reference-based model +dump(ref_scaler, 'ref_based_scaler.joblib') +dump(ref_model, 'ref_based_PCA_model.joblib') + +#load reference-based scaler and reference-based model +ref_scaler = load('ref_based_scaler.joblib') +ref_model = load('ref_based_PCA_model.joblib') + #get MHD results by providing test ROIs, reference-based scaler and reference-based PCA model -scaler_test = StandardScaler() -ROIsReference_test = scaler_test.fit_transform(ref_data_prepare_np) -pca = PCA(n_components = 3,svd_solver='randomized',random_state=100) -pca_ref = pca.fit(ROIsReference_test) -res2 = muse_qc.getMHD_from_RefModel(roi_data_prepare_np, scaler_test,pca_ref,3) +res2 = muse_qc.getMHD_from_RefModel(roi_data_prepare_np, ref_scaler,ref_model,3) +