Skip to content

Commit de16242

Browse files
authored
Sopa dep issues (#93)
* Set baysor config via dict * Update comseg script for sopa 2.1.5 * Convert baysor force_2d parameter to bool
1 parent 776ac26 commit de16242

File tree

3 files changed

+44
-73
lines changed

3 files changed

+44
-73
lines changed

src/methods_transcript_assignment/baysor/config.vsh.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,11 @@ arguments:
2121
default: global
2222

2323
- name: --force_2d
24-
type: string
24+
type: boolean
2525
required: false
2626
description: "Ignores z-column in the data if it is provided"
2727
direction: input
28-
default: "false"
28+
default: true
2929

3030
- name: --min_molecules_per_cell
3131
type: integer

src/methods_transcript_assignment/baysor/script.py

Lines changed: 41 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
'coordinate_system': 'global',
2121
'output': './temp/methods/baysor/baysor_assigned_transcripts.zarr',
2222

23-
'force_2d': 'false',
23+
'force_2d': True, #'false',
2424
'min_molecules_per_cell': 50,
2525
'scale': -1.0, #NOTE: For parameter selection see https://github.com/gustaveroussy/sopa/tree/main/workflow/config
2626
'scale_std': "25%",
@@ -37,7 +37,7 @@
3737
TMP_DIR = Path(meta["temp_dir"] or "/tmp")
3838
TMP_DIR.mkdir(parents=True, exist_ok=True)
3939

40-
CONFIG_TOML = TMP_DIR / "config.toml"
40+
#CONFIG_TOML = TMP_DIR / "config.toml"
4141

4242

4343
##############################
@@ -70,7 +70,7 @@
7070
label_image = sdata_segm["segmentation"]["scale0"].image.to_numpy()
7171
else:
7272
label_image = sdata_segm["segmentation"].to_numpy()
73-
73+
7474
cell_id_dask_series = dask.dataframe.from_dask_array(
7575
dask.array.from_array(
7676
label_image[y_coords, x_coords], chunks=tuple(sdata[par['transcripts_key']].map_partitions(len).compute())
@@ -91,26 +91,43 @@
9191
},
9292
)
9393

94-
# Write config to toml
95-
print('Writing config to toml', flush=True)
96-
toml_str = f"""[data]
97-
x = "x"
98-
y = "y"
99-
z = "z"
100-
gene = "feature_name"
101-
force_2d = {par['force_2d']}
102-
min_molecules_per_cell = {int(par['min_molecules_per_cell'])}
103-
exclude_genes = ""
104-
105-
[segmentation]
106-
scale = {float(par['scale'])}
107-
scale_std = "{par['scale_std']}"
108-
n_clusters = {int(par['n_clusters'])}
109-
prior_segmentation_confidence = {float(par['prior_segmentation_confidence'])}
110-
"""
111-
with open(CONFIG_TOML, "w") as toml_file:
112-
toml_file.write(toml_str)
113-
94+
## Write config to toml #NOTE: lead to an error since sopa v2.1.5, instead use config dict
95+
#print('Writing config to toml', flush=True)
96+
#toml_str = f"""[data]
97+
#x = "x"
98+
#y = "y"
99+
#z = "z"
100+
#gene = "feature_name"
101+
#force_2d = {par['force_2d']}
102+
#min_molecules_per_cell = {int(par['min_molecules_per_cell'])}
103+
#exclude_genes = ""
104+
#
105+
#[segmentation]
106+
#scale = {float(par['scale'])}
107+
#scale_std = "{par['scale_std']}"
108+
#n_clusters = {int(par['n_clusters'])}
109+
#prior_segmentation_confidence = {float(par['prior_segmentation_confidence'])}
110+
#"""
111+
#with open(CONFIG_TOML, "w") as toml_file:
112+
# toml_file.write(toml_str)
113+
114+
config = {
115+
"data": {
116+
"x": "x",
117+
"y": "y",
118+
"z": "z",
119+
"gene": "feature_name",
120+
"force_2d": par['force_2d'],
121+
"min_molecules_per_cell": int(par['min_molecules_per_cell']),
122+
"exclude_genes": "",
123+
},
124+
"segmentation": {
125+
"scale": float(par['scale']),
126+
"scale_std": str(par['scale_std']),
127+
"n_clusters": int(par['n_clusters']),
128+
"prior_segmentation_confidence": float(par['prior_segmentation_confidence']),
129+
},
130+
}
114131

115132

116133
# Make transcript patches
@@ -124,7 +141,7 @@
124141
# (called with sopa -->) subprocess.CalledProcessError: Command 'baysor ...' returned non-zero exit status 139.
125142
# When reproducing the error with `baysor ...` it reports a signal (11.1) Segmentation fault Allocations: 5017730 (Pool: 5013281; Big: 4449); GC: 8
126143
os.environ['JULIA_NUM_THREADS'] = str(n_threads)
127-
sopa.segmentation.baysor(sdata_sopa, config=str(CONFIG_TOML))
144+
sopa.segmentation.baysor(sdata_sopa, config=config) #str(CONFIG_TOML))
128145

129146
# Assign transcripts to cell ids
130147
sopa.spatial.assign_transcript_to_cell(

src/methods_transcript_assignment/comseg/script.py

Lines changed: 1 addition & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -25,52 +25,6 @@
2525
}
2626
## VIASH END
2727

28-
def fixed_count_transcripts_aligned(geo_df, points, value_key):
29-
"""
30-
The same function as sopa.aggregation.transcripts._count_transcripts_aligned.
31-
Minor change just the matrix X is converted to csr_matrix, to avoid bug error in comseg call
32-
33-
"""
34-
from scipy.sparse import csr_matrix
35-
from anndata import AnnData
36-
from dask.diagnostics import ProgressBar
37-
from functools import partial
38-
from sopa._settings import settings
39-
import geopandas as gpd
40-
def _add_csr(X_partitions, geo_df, partition, gene_column, gene_names ):
41-
if settings.gene_exclude_pattern is not None:
42-
partition = partition[~partition[gene_column].str.match(settings.gene_exclude_pattern, case=False, na=False)]
43-
44-
points_gdf = gpd.GeoDataFrame(partition, geometry=gpd.points_from_xy(partition["x"], partition["y"]))
45-
joined = geo_df.sjoin(points_gdf)
46-
cells_indices, column_indices = joined.index, joined[gene_column].cat.codes
47-
cells_indices = cells_indices[column_indices >= 0]
48-
column_indices = column_indices[column_indices >= 0]
49-
X_partition = csr_matrix((np.full(len(cells_indices), 1), (cells_indices, column_indices)),
50-
shape=(len(geo_df), len(gene_names)),
51-
)
52-
X_partitions.append(X_partition)
53-
54-
55-
points[value_key] = points[value_key].astype("category").cat.as_known()
56-
gene_names = points[value_key].cat.categories.astype(str)
57-
X = csr_matrix((len(geo_df), len(gene_names)), dtype=int)
58-
adata = AnnData(X=X, var=pd.DataFrame(index=gene_names))
59-
adata.obs_names = geo_df.index.astype(str)
60-
geo_df = geo_df.reset_index()
61-
X_partitions = []
62-
with ProgressBar():
63-
points.map_partitions(
64-
partial(_add_csr, X_partitions, geo_df, gene_column=value_key, gene_names=gene_names),
65-
meta=(),
66-
).compute()
67-
for X_partition in X_partitions:
68-
adata.X += X_partition
69-
if settings.gene_exclude_pattern is not None:
70-
adata = adata[:, ~adata.var_names.str.match(settings.gene_exclude_pattern, case=False, na=False)].copy()
71-
return adata
72-
73-
7428

7529
# Read input files
7630
print('Reading input files', flush=True)
@@ -105,7 +59,7 @@ def _add_csr(X_partitions, geo_df, partition, gene_column, gene_names ):
10559
"gene_column": par["gene_column"],
10660
}
10761

108-
sopa.aggregation.transcripts._count_transcripts_aligned = fixed_count_transcripts_aligned
62+
10963
# sopa.settings.parallelization_backend = 'dask' #TODO: get parallelization running.
11064
sopa.segmentation.comseg(sdata, config)
11165

0 commit comments

Comments
 (0)