Skip to content
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 30 additions & 24 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,30 +2,36 @@ name: spras
channels:
- conda-forge
dependencies:
- adjusttext=0.7.3.1
- bioconda::snakemake-minimal=8.17.0
- docker-py=5.0
- matplotlib=3.6
- networkx=2.8
- pandas=1.5
- numpy=1.26.4
- pre-commit=2.20 # Only required for development
- go=1.24 # Only required for development
- pytest=8.0 # Only required for development
- python=3.11
- adjusttext=1.3.0
- bioconda::snakemake-minimal=9.6.2
# Conda refers to pypi/docker as docker-py.
- docker-py=7.1.0
- matplotlib=3.10.3
- networkx=3.5
- pandas=2.3.0
- numpy=2.3.1
- requests=2.32.4
- scikit-learn=1.7.0
- seaborn=0.13.2
- spython=0.3.14

# conda-specific for dsub
- python-dateutil=2.9.0
- pytz=2025.2
- pyyaml=6.0.2
- tenacity=9.1.2
- tabulate=0.9.0

# toolchain deps
- pip=22.1
- requests=2.28
- scikit-learn=1.2
- seaborn=0.12
- spython=0.2
# for dsub
- python-dateutil<=2.9.0
- pytz<=2024.1
- pyyaml<=6.0.1
- tenacity<=8.2.3
- tabulate<=0.9.0
- sphinx=6.0
- sphinx-rtd-theme=2.0.0
# This should be the same as requires-python minus the >=.
- python=3.11

# development dependencies
- pre-commit=4.2.0
- pytest=8.4.1
# development dependencies - conda-specific
- go=1.24

- pip:
- dsub==0.4.13

29 changes: 14 additions & 15 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,27 +19,26 @@ classifiers = [
requires-python = ">=3.11"
dependencies = [
"adjusttext==0.7.3",
# A bug was introduced in older versions of snakemake that prevent it from running. Update to fix
"snakemake==8.17.0",
"docker==5.0.3", # Switched from docker-py to docker because docker-py is not maintained in pypi. This appears to have no effect
"matplotlib==3.6",
"networkx==2.8",
"pandas==1.5",
"numpy==1.26.4",
"snakemake==9.6.2",
"docker==7.1.0",
"matplotlib==3.10.3",
"networkx==3.5",
"pandas==2.3.0",
"numpy==2.3.1",
"requests==2.32.4",
"scikit-learn==1.7.0",
"seaborn==0.13.2",
"spython==0.3.14",

# toolchain deps
"pip==22.1",
"requests==2.28",
"scikit-learn==1.2",
"seaborn==0.12",
"spython==0.2",
"sphinx==6.0",
"sphinx-rtd-theme==2.0.0",
]

[project.optional-dependencies]
dev = [
# Only required for development
"pre-commit==2.20",
"pytest==8.0",
"pre-commit==4.2.0",
"pytest==8.4.1",
]

[project.urls]
Expand Down
2 changes: 1 addition & 1 deletion spras/analysis/ml.py
Original file line number Diff line number Diff line change
Expand Up @@ -382,7 +382,7 @@ def hac_horizontal(dataframe: pd.DataFrame, output_png: str, output_file: str, l

# plotting figure
plt.figure(figsize=(10, 7))
model = AgglomerativeClustering(linkage=linkage, affinity=metric,distance_threshold=0.5, n_clusters=None)
model = AgglomerativeClustering(linkage=linkage, metric=metric,distance_threshold=0.5, n_clusters=None)
model = model.fit(df)
plt.figure(figsize=(10, 7))
plt.title("Hierarchical Agglomerative Clustering Dendrogram")
Expand Down
5 changes: 5 additions & 0 deletions test/ml/expected/expected-pca-coordinates-negated.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
datapoint_labels PC1 PC2
centroid 0.0 0.0
test-data-s1 0.94594398 -0.46508182
test-data-s2 0.72014153 0.5090913
test-data-s3 -1.66608552 -0.04400948
2 changes: 1 addition & 1 deletion test/ml/expected/expected-pca-coordinates.tsv
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
datapoint_labels PC1 PC2
centroid 0.0 0.0
test-data-s1 -0.94594398 -0.46508182
test-data-s2 -0.72014153 0.5090913
test-data-s3 1.66608552 -0.04400948
centroid 0.0 0.0
9 changes: 7 additions & 2 deletions test/ml/test_ml.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ def test_pca_remove_empty_pathways(self):
coord = pd.read_table(OUT_DIR + 'pca-coordinates.tsv')
coord = coord.round(5) # round values to 5 digits to account for numeric differences across machines
expected = pd.read_table(EXPECT_DIR + 'expected-pca-coordinates.tsv')
pd.read_table(EXPECT_DIR + 'expected-pca-coordinates-negated.tsv')
expected = expected.round(5)

assert coord.equals(expected)
Expand All @@ -107,8 +108,12 @@ def test_pca_kernel_density(self):
def test_pca_robustness(self):
dataframe = ml.summarize_networks([INPUT_DIR + 'test-data-s1/s1.txt', INPUT_DIR + 'test-data-s2/s2.txt',
INPUT_DIR + 'test-data-s3/s3.txt'])
# PCA signage now depends on the input data: we need two differently signed PCA coordinate files.
# See https://scikit-learn.org/stable/whats_new/v1.5.html#changed-models for more info.
expected = pd.read_table(EXPECT_DIR + 'expected-pca-coordinates.tsv')
expected_other = pd.read_table(EXPECT_DIR + 'expected-pca-coordinates-negated.tsv')
expected = expected.round(5)
expected_other = expected_other.round(5)
expected.sort_values(by='datapoint_labels', ignore_index=True, inplace=True)

for _ in range(5):
Expand All @@ -118,7 +123,7 @@ def test_pca_robustness(self):
coord = pd.read_table(OUT_DIR + 'pca-shuffled-columns-coordinates.tsv')
coord = coord.round(5) # round values to 5 digits to account for numeric differences across machines
coord.sort_values(by='datapoint_labels', ignore_index=True, inplace=True)
assert coord.equals(expected)
assert coord.equals(expected) or coord.equals(expected_other)

for _ in range(5):
dataframe_shuffled = dataframe.sample(frac=1, axis=0) # permute the rows
Expand All @@ -128,7 +133,7 @@ def test_pca_robustness(self):
coord = coord.round(5) # round values to 5 digits to account for numeric differences across machines
coord.sort_values(by='datapoint_labels', ignore_index=True, inplace=True)

assert coord.equals(expected)
assert coord.equals(expected) or coord.equals(expected_other)

def test_hac_horizontal(self):
dataframe = ml.summarize_networks([INPUT_DIR + 'test-data-s1/s1.txt', INPUT_DIR + 'test-data-s2/s2.txt', INPUT_DIR + 'test-data-s3/s3.txt'])
Expand Down
Loading