Skip to content
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 39 additions & 29 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,37 +2,47 @@ name: spras
channels:
- conda-forge
dependencies:
- adjusttext=0.7.3.1
- bioconda::snakemake-minimal=8.17.0
- docker-py=5.0
- matplotlib=3.6
- networkx=2.8
- pandas=1.5
- numpy=1.26.4
- pre-commit=2.20 # Only required for development
- go=1.24 # Only required for development
- pytest=8.0 # Only required for development
- python=3.11
- pip=22.1
- requests=2.28
- scikit-learn=1.2
- seaborn=0.12
- spython=0.2
# for dsub
- python-dateutil<=2.9.0
- pytz<=2024.1
- pyyaml<=6.0.1
- tenacity<=8.2.3
- tabulate<=0.9.0
- adjusttext=1.3.0
- bioconda::snakemake-minimal=9.6.2
# Conda refers to pypi/docker as docker-py.
- docker-py=7.1.0
- matplotlib=3.10.3
- networkx=3.5
- pandas=2.3.0
- numpy=2.3.1
- requests=2.32.4
- scikit-learn=1.7.0
- seaborn=0.13.2
- spython=0.3.14

# Only required for GraphSpace
- commonmark=0.9
- docutils=0.19
- jinja2=3.1
- mock=4.0
- recommonmark=0.7
- sphinx=6.0
- commonmark=0.9.1
- docutils=0.20.1
- jinja2=3.1.6
- mock=5.2.0
- recommonmark=0.7.1
- sphinx=7.4.7
- sphinx-rtd-theme=2.0.0
# graphspace_python is under pip

# conda-specific for dsub
- python-dateutil=2.9.0
- pytz=2025.2
- pyyaml=6.0.2
- tenacity=9.1.2
- tabulate=0.9.0

# toolchain deps
- pip=22.1
# This should be the same as requires-python minus the >=.
- python=3.11

# development dependencies
- pre-commit=4.2.0
- pytest=8.4.1
# development dependencies - conda-specific
- go=1.24

- pip:
- graphspace_python==1.3.1
- dsub==0.4.13

41 changes: 21 additions & 20 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,34 +19,35 @@ classifiers = [
requires-python = ">=3.11"
dependencies = [
"adjusttext==0.7.3",
# A bug was introduced in older versions of snakemake that prevent it from running. Update to fix
"snakemake==8.17.0",
"docker==5.0.3", # Switched from docker-py to docker because docker-py is not maintained in pypi. This appears to have no effect
"matplotlib==3.6",
"networkx==2.8",
"pandas==1.5",
"numpy==1.26.4",
"pip==22.1",
"requests==2.28",
"scikit-learn==1.2",
"seaborn==0.12",
"spython==0.2",
"snakemake==9.6.2",
"docker==7.1.0",
"matplotlib==3.10.3",
"networkx==3.5",
"pandas==2.3.0",
"numpy==2.3.1",
"requests==2.32.4",
"scikit-learn==1.7.0",
"seaborn==0.13.2",
"spython==0.3.14",
# Only required for GraphSpace
"commonmark==0.9",
"docutils==0.19",
"jinja2==3.1",
"mock==4.0",
"recommonmark==0.7",
"sphinx==6.0",
"graphspace_python==1.3.1",
"docutils==0.20.1",
"jinja2==3.1.6",
"mock==5.2.0",
"recommonmark==0.7.1",
"sphinx==7.4.7",
"sphinx-rtd-theme==2.0.0",
"graphspace_python==1.3.1",

# toolchain deps
"pip==22.1",
]

[project.optional-dependencies]
dev = [
# Only required for development
"pre-commit==2.20",
"pytest==8.0",
"pre-commit==4.2.0",
"pytest==8.4.1",
]

[project.urls]
Expand Down
2 changes: 1 addition & 1 deletion spras/analysis/ml.py
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,7 @@ def hac_horizontal(dataframe: pd.DataFrame, output_png: str, output_file: str, l

# plotting figure
plt.figure(figsize=(10, 7))
model = AgglomerativeClustering(linkage=linkage, affinity=metric,distance_threshold=0.5, n_clusters=None)
model = AgglomerativeClustering(linkage=linkage, metric=metric,distance_threshold=0.5, n_clusters=None)
model = model.fit(df)
plt.figure(figsize=(10, 7))
plt.title("Hierarchical Agglomerative Clustering Dendrogram")
Expand Down
4 changes: 4 additions & 0 deletions test/ml/expected/expected-pca-coordinates-2.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
algorithm PC1 PC2
test-data-s1 -2.006650210482033 -0.9865875190637743
test-data-s2 -1.5276508866841987 1.0799457247533237
test-data-s3 3.534301097166232 -0.0933582056895495
6 changes: 3 additions & 3 deletions test/ml/expected/expected-pca-coordinates.tsv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
algorithm PC1 PC2
test-data-s1 -2.006650210482033 -0.9865875190637743
test-data-s2 -1.5276508866841987 1.0799457247533237
test-data-s3 3.534301097166232 -0.0933582056895495
test-data-s1 2.006650210482033 -0.9865875190637743
test-data-s2 1.5276508866841987 1.0799457247533237
test-data-s3 -3.534301097166232 -0.0933582056895495
8 changes: 6 additions & 2 deletions test/ml/test_ml.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,12 @@ def test_pca(self):

def test_pca_robustness(self):
dataframe = ml.summarize_networks([INPUT_DIR + 'test-data-s1/s1.txt', INPUT_DIR + 'test-data-s2/s2.txt', INPUT_DIR + 'test-data-s3/s3.txt'])
# PCA signage now depends on the input data: we need two differently signed PCA coordinate files.
# See https://scikit-learn.org/stable/whats_new/v1.5.html#changed-models for more info.
expected = pd.read_table(EXPECT_DIR + 'expected-pca-coordinates.tsv')
expected_other = pd.read_table(EXPECT_DIR + 'expected-pca-coordinates-2.tsv')
expected = expected.round(5)
expected_other = expected_other.round(5)
for _ in range(5):
dataframe_shuffled = dataframe.sample(frac=1, axis=1) # permute the columns
ml.pca(dataframe_shuffled, OUT_DIR + 'pca-shuffled-columns.png', OUT_DIR + 'pca-shuffled-columns-variance.txt',
Expand All @@ -92,7 +96,7 @@ def test_pca_robustness(self):
coord = coord.round(5) # round values to 5 digits to account for numeric differences across machines
coord.sort_values(by='algorithm', ignore_index=True, inplace=True)

assert coord.equals(expected)
assert coord.equals(expected) or coord.equals(expected_other)

for _ in range(5):
dataframe_shuffled = dataframe.sample(frac=1, axis=0) # permute the rows
Expand All @@ -102,7 +106,7 @@ def test_pca_robustness(self):
coord = coord.round(5) # round values to 5 digits to account for numeric differences across machines
coord.sort_values(by='algorithm', ignore_index=True, inplace=True)

assert coord.equals(expected)
assert coord.equals(expected) or coord.equals(expected_other)

def test_hac_horizontal(self):
dataframe = ml.summarize_networks([INPUT_DIR + 'test-data-s1/s1.txt', INPUT_DIR + 'test-data-s2/s2.txt', INPUT_DIR + 'test-data-s3/s3.txt'])
Expand Down
Loading