Mapmycells (#98)

Kraftfahrzeughaftpflichtversicherung · LouisK92 · web-flow · commit dc27e7e40b9d · 2025-12-11T19:02:05.000+01:00
* mapmycells first batch

* 2nd batch of changes

* code to test

* Assume gene symbols in both adatas in mapmycells script

* Add mapmycells to workflow and scripts

---------

Co-authored-by: LouisK92 &lt;louiskuemmerle@googlemail.com&gt;
diff --git a/scripts/run_benchmark/run_full_seqeracloud.sh b/scripts/run_benchmark/run_full_seqeracloud.sh
@@ -49,6 +49,7 @@ celltype_annotation_methods:
   - ssam
   - tacco
   - moscot
+  - mapmycells
 expression_correction_methods:
   - no_correction
   - gene_efficiency_correction
diff --git a/scripts/run_benchmark/run_test_local.sh b/scripts/run_benchmark/run_test_local.sh
@@ -52,6 +52,7 @@ celltype_annotation_methods:
   - ssam
   # - tacco
   # - moscot
+  # - mapmycells
 expression_correction_methods:
   - no_correction
   # - gene_efficiency_correction
diff --git a/src/methods_cell_type_annotation/mapmycells/config.vsh.yaml b/src/methods_cell_type_annotation/mapmycells/config.vsh.yaml
@@ -0,0 +1,35 @@
+name: mapmycells
+label: "mapmycells"
+summary: "Mapping of annotations from single-cell to spatial using moscot"
+description: "Mapping of annotations from single-cell to spatial using moscot"
+links:
+  documentation: 'https://github.com/AllenInstitute/cell_type_mapper'
+  repository: 'https://github.com/AllenInstitute/cell_type_mapper'
+references:
+  doi: "10.1038/s41586-023-06812-z"
+
+__merge__: /src/api/comp_method_cell_type_annotation.yaml
+   
+  
+resources:
+  - type: python_script
+    path: script.py
+
+engines:
+  - type: docker
+    image: openproblems/base_python:1
+    __merge__: 
+      - /src/base/setup_spatialdata_partial.yaml
+      - /src/base/setup_txsim_partial.yaml
+    setup:
+      - type: python
+        pypi: 
+          - numpy
+          - git+https://github.com/AllenInstitute/cell_type_mapper.git
+  - type: native
+
+runners:
+  - type: executable
+  - type: nextflow
+    directives:
+      label: [ hightime, midcpu, highmem]
diff --git a/src/methods_cell_type_annotation/mapmycells/script.py b/src/methods_cell_type_annotation/mapmycells/script.py
@@ -0,0 +1,106 @@
+import anndata as ad
+import os
+import subprocess
+import json
+import pandas as pd
+from pathlib import Path 
+## VIASH START
+par = {
+    'input_spatial_normalized_counts': 'resources_test/task_ist_preprocessing/mouse_brain_combined/spatial_normalized_counts.h5ad',
+    'input_scrnaseq_reference': 'resources_test/task_ist_preprocessing/mouse_brain_combined/scrnaseq_reference.h5ad',
+    'celltype_key': 'cell_type',
+    "output": 'spatial_with_celltypes.h5ad'
+}
+meta = { "temp_dir": './tmp/'}
+
+## VIASH END
+
+TMP_DIR = Path(meta["temp_dir"] or "/tmp/")
+TMP_DIR.mkdir(parents=True, exist_ok=True)
+
+adata_sp = ad.read_h5ad(par['input_spatial_normalized_counts'])
+adata_sc = ad.read_h5ad(par['input_scrnaseq_reference'])
+
+if "counts" in adata_sc.layers:
+    adata_sc.X = adata_sc.layers["counts"]
+
+adata_sp.var_names = adata_sp.var_names.astype(str)
+adata_sc.var_names = adata_sc.var_names.astype(str)
+adata_sp.var_names_make_unique()
+adata_sc.var_names_make_unique()
+
+common_genes = list(set(adata_sp.var.index).intersection(adata_sc.var.index))
+
+adata_sc = adata_sc[:, common_genes]
+sc_path = os.path.join(meta["temp_dir"],"sc_adata_processed.h5ad")
+adata_sc.write_h5ad(sc_path)
+sp_path = os.path.join(meta["temp_dir"],"sp_processed.h5ad")
+adata_sp[:, common_genes].write_h5ad(sp_path)
+
+
+
+precomputed_path = os.path.join(meta["temp_dir"],"precomputed_stats.h5ad")
+
+command = [
+    "python",
+    "-m",
+    "cell_type_mapper.cli.precompute_stats_scrattch",
+    "--h5ad_path",
+    sc_path,  
+    "--hierarchy",
+    "['cell_type']",
+    "--output_path",
+   precomputed_path
+]
+
+subprocess.run(command)
+
+data = {"None": common_genes}
+genes_file_path = os.path.join(meta["temp_dir"],"genes.json")
+with open(genes_file_path, "w") as json_file:
+        json.dump(data, json_file, indent=2)
+
+command = [
+    "python",
+    "-m",
+    "cell_type_mapper.cli.from_specified_markers",
+    "--query_path",
+    sp_path,  
+    "--type_assignment.normalization",
+    "log2CPM", 
+    "--precomputed_stats.path",
+    precomputed_path,
+    "--query_markers.serialized_lookup",
+    genes_file_path,
+    "--csv_result_path",
+    os.path.join(meta["temp_dir"],"results.csv"),
+    "--extended_result_path",
+    os.path.join(meta["temp_dir"], "extended_results.json"),
+    "--flatten",
+    "True",
+    "--type_assignment.bootstrap_iteration", 
+    "1",
+    "--type_assignment.bootstrap_factor",
+    "1.0"
+]
+
+subprocess.run(command)
+annotation_df = pd.read_csv(os.path.join(meta["temp_dir"],"results.csv"), skiprows=3)
+adata_sp.obs[par['celltype_key']] = list(annotation_df['cell_type_label'])
+
+
+
+# Delete all temporary files
+for file_path in [
+     sc_path,
+     sp_path,
+     precomputed_path,
+     genes_file_path,
+     os.path.join(meta["temp_dir"],"results.csv"),
+     os.path.join(meta["temp_dir"], "extended_results.json")
+]:
+        if os.path.isfile(file_path):
+            os.remove(file_path)
+
+
+adata_sp.write_h5ad(par['output'])
diff --git a/src/workflows/run_benchmark/config.vsh.yaml b/src/workflows/run_benchmark/config.vsh.yaml
@@ -98,7 +98,7 @@ argument_groups:
           A list of cell type annotation methods to run.
         type: string
         multiple: true
-        default: "ssam:tacco:moscot"
+        default: "ssam:tacco:moscot:mapmycells"
       - name: "--expression_correction_methods"
         description: |
           A list of expression correction methods to run.
@@ -168,6 +168,7 @@ dependencies:
   - name: methods_cell_type_annotation/ssam
   - name: methods_cell_type_annotation/tacco
   - name: methods_cell_type_annotation/moscot
+  - name: methods_cell_type_annotation/mapmycells
   - name: methods_expression_correction/no_correction
   - name: methods_expression_correction/gene_efficiency_correction
   - name: methods_expression_correction/resolvi_correction
diff --git a/src/workflows/run_benchmark/main.nf b/src/workflows/run_benchmark/main.nf
@@ -374,7 +374,8 @@ workflow run_wf {
   cta_methods = [
     ssam,
     tacco,
-    moscot
+    moscot,
+    mapmycells
   ]
   
   cta_ch = normalization_ch

Original file line number	Diff line number	Diff line change
`@@ -374,7 +374,8 @@ workflow run_wf {`
`374`	`374`	`cta_methods = [`
`375`	`375`	`ssam,`
`376`	`376`	`tacco,`
`377`		`- moscot`
	`377`	`+ moscot,`
	`378`	`+ mapmycells`
`378`	`379`	`]`
`379`	`380`
`380`	`381`	`cta_ch = normalization_ch`