Skip to content

Commit 7763e11

Browse files
committed
Add docker
1 parent 76b3233 commit 7763e11

1 file changed

Lines changed: 73 additions & 0 deletions

File tree

tests/test_pca_analysis.py

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
import pytest
2+
import pandas as pd
3+
import numpy as np
4+
from mutclust.pca_analysis import calculate_eigen_genes
5+
6+
def test_empty_cluster_error():
7+
"""Test that empty clusters raise an error."""
8+
expression_data = pd.DataFrame({
9+
'Sample1': [1.0],
10+
'Sample2': [2.0]
11+
}, index=['Gene1'])
12+
13+
gene_clusters = [
14+
['Gene1'],
15+
[] # Empty cluster
16+
]
17+
18+
with pytest.raises(ValueError, match="Cannot perform PCA on empty clusters"):
19+
calculate_eigen_genes(expression_data, gene_clusters)
20+
21+
def test_single_gene_clusters():
22+
"""Test that single-gene clusters work correctly."""
23+
expression_data = pd.DataFrame({
24+
'Sample1': [1.0, 2.0],
25+
'Sample2': [3.0, 4.0]
26+
}, index=['Gene1', 'Gene2'])
27+
28+
gene_clusters = [
29+
['Gene1'],
30+
['Gene2']
31+
]
32+
33+
eigen_genes = calculate_eigen_genes(expression_data, gene_clusters)
34+
35+
# Check that each cluster's values match the original expression
36+
assert np.allclose(eigen_genes['Cluster_0'].values,
37+
expression_data.loc['Gene1'].values, rtol=1e-5)
38+
assert np.allclose(eigen_genes['Cluster_1'].values,
39+
expression_data.loc['Gene2'].values, rtol=1e-5)
40+
41+
def test_large_dataset_parallel():
42+
"""Test that the function can handle larger datasets in parallel."""
43+
# Create a larger dataset with 100 genes and 50 samples
44+
np.random.seed(42)
45+
n_genes = 100
46+
n_samples = 50
47+
48+
# Generate random expression data
49+
expression_data = pd.DataFrame(
50+
np.random.randn(n_genes, n_samples),
51+
index=[f'Gene{i}' for i in range(n_genes)],
52+
columns=[f'Sample{i}' for i in range(n_samples)]
53+
)
54+
55+
# Create 10 clusters with 10 genes each
56+
gene_clusters = [
57+
[f'Gene{i}' for i in range(j*10, (j+1)*10)]
58+
for j in range(10)
59+
]
60+
61+
# Calculate eigen-genes
62+
eigen_genes = calculate_eigen_genes(expression_data, gene_clusters)
63+
64+
# Check the output format
65+
assert isinstance(eigen_genes, pd.DataFrame)
66+
assert eigen_genes.shape == (n_samples, 10) # 50 samples, 10 clusters
67+
assert all(eigen_genes.columns == [f'Cluster_{i}' for i in range(10)])
68+
69+
# Check that each cluster's eigen-gene has the right dimensions
70+
for i in range(10):
71+
assert len(eigen_genes[f'Cluster_{i}']) == n_samples
72+
# Check that the values are not all zeros
73+
assert not np.allclose(eigen_genes[f'Cluster_{i}'], 0)

0 commit comments

Comments
 (0)