From 0964f65feac5e465ec1766aae3ce7ac7074c81da Mon Sep 17 00:00:00 2001 From: marouenbg Date: Tue, 24 Mar 2026 19:16:15 -0400 Subject: [PATCH 1/2] Accept path-like objects for file inputs in PANDA and PUMA Replace 'type(x) is str' checks with 'isinstance(x, (str, os.PathLike))' so that pathlib.Path and other path-like objects are correctly recognized as file paths instead of being rejected as invalid DataFrames. This fixes the inability to mix file paths and DataFrames as inputs, which failed when path-like objects (e.g. pathlib.Path) were used. Files changed: - netZooPy/panda/io.py: load_motif() and load_expression() - netZooPy/panda/panda.py: PPI loading in processData() - netZooPy/puma/puma.py: miR file loading Fixes #351 --- netZooPy/panda/io.py | 9 +++++---- netZooPy/panda/panda.py | 4 ++-- netZooPy/puma/puma.py | 2 +- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/netZooPy/panda/io.py b/netZooPy/panda/io.py index bff67c8a..c399fa1c 100644 --- a/netZooPy/panda/io.py +++ b/netZooPy/panda/io.py @@ -1,18 +1,19 @@ from __future__ import print_function import math +import os import numpy as np import sys import pandas as pd def load_motif(motif_file): - if type(motif_file) is str: + if isinstance(motif_file, (str, os.PathLike)): # If motif_file is a filename motif_data = pd.read_csv(motif_file, sep="\t", header=None) motif_tfs = sorted(set(motif_data[0])) motif_genes = sorted(set(motif_data[1])) - elif type(motif_file) is not str: + else: # If motif_file is an object if motif_file is None: # Computation without motif @@ -37,7 +38,7 @@ def load_motif(motif_file): def load_expression(expression_file, with_header = False, start = 1, end = None): - if type(expression_file) is str: + if isinstance(expression_file, (str, os.PathLike)): # If we pass an expression file, check if we have a 'with header' flag and read it if with_header: @@ -54,7 +55,7 @@ def load_expression(expression_file, with_header = False, start = 1, end = None) expression_genes = expression_data.index.tolist() expression_samples = expression_data.columns.astype(str) - elif type(expression_file) is not str: + else: # Pass expression as a dataframe if expression_file is not None: if not isinstance(expression_file, pd.DataFrame): diff --git a/netZooPy/panda/panda.py b/netZooPy/panda/panda.py index 34adfd42..bfc65c5c 100755 --- a/netZooPy/panda/panda.py +++ b/netZooPy/panda/panda.py @@ -357,14 +357,14 @@ def processData( ### Loading the PPI # #TODO: move this to io - if type(ppi_file) is str: + if isinstance(ppi_file, (str, os.PathLike)): with Timer("Loading PPI data ..."): self.ppi_data = pd.read_csv(ppi_file, sep="\t", header=None) self.ppi_tfs = sorted( set(pd.concat([self.ppi_data[0], self.ppi_data[1]])) ) print("Number of PPIs:", self.ppi_data.shape[0]) - elif type(ppi_file) is not str: + else: if ppi_file is not None: if not isinstance(ppi_file, pd.DataFrame): raise Exception("Please provide a pandas dataframe for PPI data.") diff --git a/netZooPy/puma/puma.py b/netZooPy/puma/puma.py index eeb2f070..f844bc46 100755 --- a/netZooPy/puma/puma.py +++ b/netZooPy/puma/puma.py @@ -118,7 +118,7 @@ def __init__( with Timer("Loading miR data ..."): # If the mir_file is a string the mir list is read from file # otherwise the input list is used directly - if type(mir_file) is str: + if isinstance(mir_file, (str, os.PathLike)): with open(mir_file, "r") as f: miR = f.read().splitlines() elif isinstance(mir_file,list): From 2e9d6b2d9661a6c3a6a8b35b481ddda5aa4b4e88 Mon Sep 17 00:00:00 2001 From: marouenbg Date: Tue, 24 Mar 2026 19:36:32 -0400 Subject: [PATCH 2/2] Fix pandas 4.x compatibility in cobra test and condor - test_cobra: use numpy array for mean computation instead of pandas Series integer access (KeyError: 0 with string column index) - condor: use .iloc for positional access on iterrows() Series (KeyError: 2 with string column labels) - condor: use ki.sum() instead of float(sum(ki)) to avoid TypeError with np.matrix column vectors in newer numpy --- netZooPy/condor/condor.py | 5 +++-- tests/test_cobra.py | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/netZooPy/condor/condor.py b/netZooPy/condor/condor.py index 0e755bd9..bc5e95c5 100644 --- a/netZooPy/condor/condor.py +++ b/netZooPy/condor/condor.py @@ -305,13 +305,14 @@ def matrices(self, c,resolution): # Computes weighted biadjacency matrix. A = np.matrix(np.zeros((p, q))) for edge in self.net.iterrows(): - A[gn[edge[1][1]], rg[edge[1][0]]] = edge[1][2] + row = edge[1] + A[gn[row.iloc[1]], rg[row.iloc[0]]] = row.iloc[2] # Computes node degrees for the nodesets. ki = A.sum(1) dj = A.sum(0) # Computes sum of edges and bimodularity matrix. - m = float(sum(ki)) + m = float(ki.sum()) B = A - resolution*((ki @ dj) / m) # d = self.index_dict diff --git a/tests/test_cobra.py b/tests/test_cobra.py index 2fe940fc..00e59e55 100644 --- a/tests/test_cobra.py +++ b/tests/test_cobra.py @@ -37,7 +37,8 @@ def test_cobra(): pd.testing.assert_frame_equal(G, G_gt, rtol=1e-10, check_exact=False) q = psi.shape[0] + X_mean = np.mean(X.to_numpy(), axis=0) for i in range(q): - C = Q.to_numpy().dot(np.mean(X, axis=0)[i] * np.diag(psi.to_numpy()[i, :])).dot(Q.to_numpy().T) - C_gt = Q_gt.to_numpy().dot(np.mean(X, axis=0)[i] * np.diag(psi_gt.to_numpy()[i, :])).dot(Q_gt.to_numpy().T) + C = Q.to_numpy().dot(X_mean[i] * np.diag(psi.to_numpy()[i, :])).dot(Q.to_numpy().T) + C_gt = Q_gt.to_numpy().dot(X_mean[i] * np.diag(psi_gt.to_numpy()[i, :])).dot(Q_gt.to_numpy().T) pd.testing.assert_frame_equal(pd.DataFrame(C), pd.DataFrame(C_gt), rtol=1e-10, check_exact=False)