v0.1.0

eporetsky · eporetsky · commit 781ec0cd41bc · 2025-03-05T22:44:53.000-08:00
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
@@ -0,0 +1,38 @@
+name: Publish to PyPI
+
+on:
+  push:
+    tags:
+      - "v*"  # Trigger the workflow only when a tag starting with "v" is pushed (e.g., v0.1.0)
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+
+    steps:
+    # Step 1: Check out the repository
+    - name: Check out code
+      uses: actions/checkout@v3
+
+    # Step 2: Set up Python
+    - name: Set up Python
+      uses: actions/setup-python@v4
+      with:
+        python-version: "3.8"  # Specify the Python version
+
+    # Step 3: Install dependencies
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install build twine
+
+    # Step 4: Build the package
+    - name: Build the package
+      run: python -m build
+
+    # Step 5: Publish to PyPI
+    - name: Publish to PyPI
+      env:
+        TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
+        TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
+      run: twine upload dist/*
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,5 @@
+.DS_Store
+*.egg-info/
+dist/
+build/
+__pycache__/
diff --git a/mutclust/__init__.py b/mutclust/__init__.py
@@ -3,4 +3,5 @@
 MutClust: A package for mutual rank-based clustering and GO enrichment analysis.
 """
 
+# Version of the MutClust package
 __version__ = "0.1.0"
diff --git a/mutclust/annotate.py b/mutclust/annotate.py
@@ -1,8 +1,7 @@
 import pandas as pd
 
-def add_gene_annotations(cluster_df, path_annot):
-    annot = pd.read_csv(path_annot, sep="\t")
-
-    cluster_df = cluster_df.merge(annot, on="geneID", how="left")
-
-    return(cluster_df)
+def add_gene_annotations(cluster_df, annotations):
+    if isinstance(annotations, str):  # If a file path is provided
+        annotations = pd.read_csv(annotations, sep="\t")
+    cluster_df = cluster_df.merge(annotations, on="geneID", how="left")
+    return cluster_df
diff --git a/mutclust/gene_clustering.py b/mutclust/gene_clustering.py
@@ -4,6 +4,7 @@
 
 
 def filter_to_long_array(mr_df, threshold=100):
+    mr_df = mr_df.astype(float)
     mr_df.values[np.tril_indices_from(mr_df, k=0)] = np.inf
     rows, cols = np.where(mr_df < threshold)
     values = mr_df.values[rows, cols]
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,17 +1,17 @@
 [build-system]
-requires = ["setuptools", "setuptools-scm"]
+requires = ["setuptools>=61.0.0", "wheel"]
 build-backend = "setuptools.build_meta"
 
 [project]
-name = "mutclust"
+name = "MutClust"
+version = "0.1.0"
 authors = [
     {name = "Elly Poretsky", email = "eporetsky@plantapp.org"},
 ]
-description = "MutClust: Mutual rank-based clustering and GO enrichment analysis."
+description = "MutClust: Mutual rank-based coexpression, clustering and GO term enrichment analysis."
 readme = "README.md"
-requires-python = ">=3.8"
 keywords = ["bioinformatics", "coexpression", "mutual rank", "clustering", "leiden", "gene ontology", "RNA-seq", "metabolomics"]
-license = {text = "MIT"}
+license = { file = "LICENSE" }
 classifiers = [
     "Programming Language :: Python :: 3",
     "License :: OSI Approved :: MIT License",
@@ -26,15 +26,25 @@ dependencies = [
     "goatools",
     'importlib-metadata; python_version<"3.10"',
 ]
-dynamic = ["version"]
+requires-python = ">=3.8"
 
 [tool.setuptools.packages]
 # Explicitly include only the 'mutclust' directory
 find = { include = ["mutclust"]}
 
 [project.optional-dependencies]
-dev = ["pytest", "black", "flake8"]
-docs = ["sphinx", "sphinx-rtd-theme"]
+dev = [
+    "pytest",          # For running tests
+    "pytest-cov",      # For test coverage reports
+    "black",           # For code formatting
+    "flake8",          # For linting
+    "mypy",            # For type checking
+    "pre-commit"       # For managing pre-commit hooks
+]
+docs = [
+    "sphinx",          # For generating documentation
+    "sphinx-rtd-theme" # For the ReadTheDocs theme
+]
 
 [project.scripts]
 mutclust = "mutclust.__main__:main"
diff --git a/tests/test_mutclust.py b/tests/test_mutclust.py
@@ -0,0 +1,38 @@
+import pytest
+import pandas as pd
+from mutclust.gene_clustering import filter_to_long_array, filter_and_apply_decay
+from mutclust.annotate import add_gene_annotations
+
+def test_filter_to_long_array():
+    # Create a mock mutual rank DataFrame
+    data = {
+        "GeneA": [0, 50, 200],
+        "GeneB": [50, 0, 75],
+        "GeneC": [200, 75, 0],
+    }
+    mr_df = pd.DataFrame(data, index=["GeneA", "GeneB", "GeneC"])
+    long_array = filter_to_long_array(mr_df, threshold=100)
+    assert len(long_array) == 2  # Only two pairs should pass the threshold
+
+def test_filter_and_apply_decay():
+    # Create a mock long array
+    long_array = pd.DataFrame({
+        "Gene1": [0, 1],
+        "Gene2": [1, 2],
+        "MR": [10, 20]  # Adjusted MR values to ensure ED > 0.01
+    })
+    gene_id_mapping = {0: "GeneA", 1: "GeneB", 2: "GeneC"}
+    filtered_array = filter_and_apply_decay(gene_id_mapping, long_array, e_val=10)
+    assert "ED" in filtered_array.columns
+    assert len(filtered_array) == 2  # Both pairs should pass the decay filter
+
+def test_add_gene_annotations():
+    # Create mock cluster and annotation data
+    cluster_df = pd.DataFrame({"geneID": ["GeneA", "GeneB"]})
+    annotations = pd.DataFrame({
+        "geneID": ["GeneA", "GeneB"],
+        "description": ["Protein A", "Protein B"]
+    })
+    annotated_df = add_gene_annotations(cluster_df, annotations)
+    assert "description" in annotated_df.columns
+    assert annotated_df.loc[0, "description"] == "Protein A"