Skip to content

Commit d447823

Browse files
authored
Merge pull request #1 from eporetsky/docker-merge
# MutClust v0.1.2 ## Major Changes - Added Docker support for easy deployment and reproducibility - Improved test coverage and stability - Updated documentation with Docker usage examples - Updated dependency specifications
2 parents 6950028 + b11b867 commit d447823

5 files changed

Lines changed: 156 additions & 17 deletions

File tree

.dockerignore

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
.git
2+
.gitignore
3+
.pytest_cache
4+
__pycache__
5+
*.pyc
6+
*.pyo
7+
*.pyd
8+
.Python
9+
env
10+
venv
11+
.env
12+
.venv
13+
*.egg-info
14+
dist
15+
build
16+
.DS_Store
17+
*.swp
18+
*.swo

Dockerfile

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
FROM ubuntu:20.04
2+
3+
# Set environment variables to avoid interactive prompts
4+
ENV DEBIAN_FRONTEND=noninteractive
5+
6+
# Install system dependencies
7+
RUN apt-get update && apt-get install -y \
8+
python3.9 \
9+
python3.9-dev \
10+
python3-pip \
11+
git \
12+
&& rm -rf /var/lib/apt/lists/*
13+
14+
# Set up Python 3.9 as default
15+
RUN ln -sf /usr/bin/python3.9 /usr/bin/python3 && \
16+
ln -sf /usr/bin/python3.9 /usr/bin/python && \
17+
ln -sf /usr/bin/pip3 /usr/bin/pip
18+
19+
# Create a working directory
20+
WORKDIR /app
21+
22+
# Copy the project files
23+
COPY . /app/
24+
25+
# Install MutClust and its dependencies
26+
RUN pip install --upgrade pip && \
27+
pip install .
28+
29+
# Create a directory for mounting data
30+
RUN mkdir /data
31+
32+
# Set the entrypoint
33+
ENTRYPOINT ["mutclust"]

README.md

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,20 @@ cd mutclust
3232
pip install .
3333
```
3434

35+
### Docker Installation
36+
37+
For users who prefer containerized deployment, MutClust is available as a Docker container:
38+
39+
```bash
40+
# Build the container
41+
docker build -t mutclust .
42+
43+
# Run MutClust with your data
44+
docker run -v /path/to/your/data:/data mutclust --expression /data/your_expression.tsv --output /data/results
45+
```
46+
47+
The container uses Ubuntu 20.04 and includes all necessary dependencies. Mount your data directory to `/data` inside the container to access your files.
48+
3549
---
3650

3751
## Usage

pyproject.toml

Lines changed: 18 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
44

55
[project]
66
name = "MutClust"
7-
version = "0.1.1"
7+
version = "0.1.2"
88
authors = [
99
{name = "Elly Poretsky", email = "eporetsky@plantapp.org"},
1010
]
@@ -15,16 +15,17 @@ keywords = ["bioinformatics", "coexpression", "mutual rank", "clustering", "leid
1515
classifiers = [
1616
"Programming Language :: Python :: 3",
1717
"License :: OSI Approved :: MIT License",
18-
"Operating System :: OS Independent",
18+
"Operating System :: POSIX :: Linux",
1919
"Topic :: Scientific/Engineering :: Bio-Informatics"
2020
]
2121
dependencies = [
22-
"numpy",
23-
"pandas",
24-
"pynetcor",
25-
"python-igraph",
26-
"goatools",
27-
'importlib-metadata; python_version<"3.10"',
22+
"numpy==2.0.2",
23+
"pandas==2.2.3",
24+
"pynetcor==0.1.1",
25+
"python-igraph==0.11.8",
26+
"goatools==1.4.12",
27+
"scikit-learn==1.6.1",
28+
'importlib-metadata==8.6.1; python_version<"3.10"',
2829
]
2930
requires-python = ">=3.9"
3031

@@ -34,17 +35,17 @@ find = { include = ["mutclust"]}
3435

3536
[project.optional-dependencies]
3637
dev = [
37-
"pytest", # For running tests
38-
"pytest-cov", # For test coverage reports
39-
"black", # For code formatting
40-
"flake8", # For linting
41-
"mypy", # For type checking
42-
"pre-commit" # For managing pre-commit hooks
38+
"pytest>=7.0.0", # For running tests
39+
"pytest-cov>=3.0.0", # For test coverage reports
40+
"black>=22.0.0", # For code formatting
41+
"flake8>=4.0.0", # For linting
42+
"mypy>=0.900", # For type checking
43+
"pre-commit>=2.0.0" # For managing pre-commit hooks
4344
]
4445
docs = [
45-
"sphinx", # For generating documentation
46-
"sphinx-rtd-theme" # For the ReadTheDocs theme
46+
"sphinx>=4.0.0", # For generating documentation
47+
"sphinx-rtd-theme>=1.0.0" # For the ReadTheDocs theme
4748
]
4849

4950
[project.scripts]
50-
mutclust = "mutclust.__main__:main"
51+
mutclust = "mutclust.__main__:main"

tests/test_pca_analysis.py

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
import pytest
2+
import pandas as pd
3+
import numpy as np
4+
from mutclust.pca_analysis import calculate_eigen_genes
5+
6+
def test_empty_cluster_error():
7+
"""Test that empty clusters raise an error."""
8+
expression_data = pd.DataFrame({
9+
'Sample1': [1.0],
10+
'Sample2': [2.0]
11+
}, index=['Gene1'])
12+
13+
gene_clusters = [
14+
['Gene1'],
15+
[] # Empty cluster
16+
]
17+
18+
with pytest.raises(ValueError, match="Cannot perform PCA on empty clusters"):
19+
calculate_eigen_genes(expression_data, gene_clusters)
20+
21+
def test_single_gene_clusters():
22+
"""Test that single-gene clusters work correctly."""
23+
expression_data = pd.DataFrame({
24+
'Sample1': [1.0, 2.0],
25+
'Sample2': [3.0, 4.0]
26+
}, index=['Gene1', 'Gene2'])
27+
28+
gene_clusters = [
29+
['Gene1'],
30+
['Gene2']
31+
]
32+
33+
eigen_genes = calculate_eigen_genes(expression_data, gene_clusters)
34+
35+
# Check that each cluster's values match the original expression
36+
assert np.allclose(eigen_genes['Cluster_0'].values,
37+
expression_data.loc['Gene1'].values, rtol=1e-5)
38+
assert np.allclose(eigen_genes['Cluster_1'].values,
39+
expression_data.loc['Gene2'].values, rtol=1e-5)
40+
41+
def test_large_dataset_parallel():
42+
"""Test that the function can handle larger datasets in parallel."""
43+
# Create a larger dataset with 100 genes and 50 samples
44+
np.random.seed(42)
45+
n_genes = 100
46+
n_samples = 50
47+
48+
# Generate random expression data
49+
expression_data = pd.DataFrame(
50+
np.random.randn(n_genes, n_samples),
51+
index=[f'Gene{i}' for i in range(n_genes)],
52+
columns=[f'Sample{i}' for i in range(n_samples)]
53+
)
54+
55+
# Create 10 clusters with 10 genes each
56+
gene_clusters = [
57+
[f'Gene{i}' for i in range(j*10, (j+1)*10)]
58+
for j in range(10)
59+
]
60+
61+
# Calculate eigen-genes
62+
eigen_genes = calculate_eigen_genes(expression_data, gene_clusters)
63+
64+
# Check the output format
65+
assert isinstance(eigen_genes, pd.DataFrame)
66+
assert eigen_genes.shape == (n_samples, 10) # 50 samples, 10 clusters
67+
assert all(eigen_genes.columns == [f'Cluster_{i}' for i in range(10)])
68+
69+
# Check that each cluster's eigen-gene has the right dimensions
70+
for i in range(10):
71+
assert len(eigen_genes[f'Cluster_{i}']) == n_samples
72+
# Check that the values are not all zeros
73+
assert not np.allclose(eigen_genes[f'Cluster_{i}'], 0)

0 commit comments

Comments
 (0)