diff --git a/mubench.pipeline/crossproject_create_index.py b/mubench.pipeline/crossproject_create_index.py
deleted file mode 100644
index 527bcd8f2..000000000
--- a/mubench.pipeline/crossproject_create_index.py
+++ /dev/null
@@ -1,43 +0,0 @@
-import os
-
-import sys
-
-from data.misuse import Misuse
-from data.project import Project
-from data.project_version import ProjectVersion
-from tasks.implementations.collect_misuses import CollectMisusesTask
-from tasks.implementations.collect_projects import CollectProjectsTask
-from tasks.implementations.collect_versions import CollectVersionsTask
-from tasks.task_runner import TaskRunner
-from utils.data_entity_lists import DataEntityLists
-from utils.dataset_util import get_white_list
-
-
-__MUBENCH_ROOT_PATH = os.path.normpath(os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir))
-__MUBENCH_DATA_PATH = os.path.join(__MUBENCH_ROOT_PATH, "data")
-__MUBENCH_DATASETS_FILE = os.path.join(__MUBENCH_DATA_PATH, "datasets.yml")
-_INDEX_PATH = os.path.join(__MUBENCH_ROOT_PATH, "checkouts-xp", "index.csv")
-
-
-class PrintIndexTask:
-    def run(self, project: Project, version: ProjectVersion, misuse: Misuse):
-        print("{}\t{}\t{}\t{}\t{}\t{}\t{}".format(project.id, version.version_id, misuse.misuse_id,
-                                                  ':'.join(version.source_dirs),
-                                                  misuse.location.file, misuse.location.method,
-                                                  "\t".join(misuse.apis)), file=open(_INDEX_PATH, "a"))
-
-
-datasets = sys.argv[1:]
-
-white_list = []
-for dataset in datasets:
-    white_list.extend(get_white_list(__MUBENCH_DATASETS_FILE, dataset.lower()))
-initial_parameters = [DataEntityLists(white_list, [])]
-
-runner = TaskRunner(
-    [CollectProjectsTask(__MUBENCH_DATA_PATH), CollectVersionsTask(False), CollectMisusesTask(), PrintIndexTask()])
-
-if os.path.exists(_INDEX_PATH):
-    os.remove(_INDEX_PATH)
-
-runner.run(*initial_parameters)
diff --git a/mubench.pipeline/crossproject_create_project_list.py b/mubench.pipeline/crossproject_create_project_list.py
deleted file mode 100644
index b6564d107..000000000
--- a/mubench.pipeline/crossproject_create_project_list.py
+++ /dev/null
@@ -1,51 +0,0 @@
-import csv
-import logging
-import os
-from datetime import datetime
-from os.path import exists, join
-
-from utils.io import open_yamls_if_exists, write_yaml
-from utils.logging import IndentFormatter
-from utils.shell import Shell
-
-MUBENCH_ROOT_PATH = os.path.normpath(os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir))
-CHECKOUTS_PATH = os.path.join(MUBENCH_ROOT_PATH, "checkouts-xp")
-INDEX_PATH = os.path.join(CHECKOUTS_PATH, "index.csv")
-
-logger = logging.getLogger()
-logger.setLevel(logging.DEBUG)
-handler = logging.StreamHandler()
-handler.setFormatter(IndentFormatter("%(asctime)s %(indent)s%(message)s"))
-handler.setLevel(logging.INFO)
-logger.addHandler(handler)
-LOG_DIR = "logs"
-if not exists(LOG_DIR):
-    os.makedirs(LOG_DIR)
-log_name = datetime.now().strftime("prepare_ex4_%Y%m%d_%H%M%S") + ".log"
-handler = logging.FileHandler(join(LOG_DIR, log_name))
-handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
-handler.setLevel(logging.DEBUG)
-logger.addHandler(handler)
-
-example_projects_by_API = {}
-with open(INDEX_PATH) as index:
-    for row in csv.reader(index, delimiter="\t"):
-        # skip blank lines, e.g., on trailing newline
-        if not row:
-            continue
-
-        target_type = row[6]
-        try:
-            if target_type not in example_projects_by_API:
-                logger.info("Preparing examples for type: %s...", target_type)
-                target_example_file = os.path.join(CHECKOUTS_PATH, target_type + ".yml")
-                example_projects = {}
-                with open_yamls_if_exists(target_example_file) as projects:
-                    for project in projects:
-                        hash = Shell.exec("cd \"{}\"; git rev-parse HEAD".format(join(MUBENCH_ROOT_PATH, project["path"])))
-                        example_projects[project["url"]] = hash.strip()
-                example_projects_by_API[target_type] = example_projects
-        except Exception as error:
-            logger.exception("failed", exc_info=error)
-
-write_yaml(example_projects_by_API, join(CHECKOUTS_PATH, "example_projects_by_API.yml"))
diff --git a/mubench.pipeline/crossproject_prepare.py b/mubench.pipeline/crossproject_prepare.py
deleted file mode 100644
index 8ae4b5c14..000000000
--- a/mubench.pipeline/crossproject_prepare.py
+++ /dev/null
@@ -1,129 +0,0 @@
-import calendar
-import csv
-import logging
-import os
-import sys
-from datetime import datetime
-from os.path import exists, join
-from typing import List
-
-from boa.BOA import BOA
-from buildtools.maven import Project
-from utils.io import write_yamls, write_yaml, is_empty
-from utils.logging import IndentFormatter
-from utils.shell import CommandFailedError
-
-MUBENCH_ROOT_PATH = os.path.normpath(os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir))
-XP_CHECKOUTS_PATH = os.path.join(MUBENCH_ROOT_PATH, "checkouts-xp")
-CHECKOUTS_PATH = os.path.join(XP_CHECKOUTS_PATH, "checkouts")
-BOA_RESULTS_PATH = join(XP_CHECKOUTS_PATH, "boa-results")
-INDEX_PATH = os.path.join(XP_CHECKOUTS_PATH, "index.csv")
-SUBTYPES_PATH = os.path.join(XP_CHECKOUTS_PATH, "subtypes.csv")
-MAX_SUBTYPES_SAMPLE_SIZE = 25
-MAX_PROJECT_SAMPLE_SIZE = 50
-
-_SUBTYPES = {}
-
-username = sys.argv[1]
-password = sys.argv[2]
-
-now = datetime.utcnow()
-run_timestamp = calendar.timegm(now.timetuple())
-
-
-def _get_subtypes(target_type):
-    if not _SUBTYPES:
-        with open(SUBTYPES_PATH) as subtypes_file:
-            for subtypes_row in csv.reader(subtypes_file, delimiter="\t"):
-                _SUBTYPES[subtypes_row[0]] = subtypes_row[1:]
-
-    all_subtypes = _SUBTYPES.get(target_type, [])
-    subtypes_sample = [subtype for subtype in all_subtypes if "sun." not in subtype]  # filter Sun-specific types
-    return subtypes_sample
-
-
-def _get_type_and_subtypes_list(target_type):
-    return [target_type] + _get_subtypes(target_type)
-
-
-def _create_type_combinations(target_types: List):
-    if len(target_types) == 1:
-        return ([type] for type in _get_type_and_subtypes_list(target_types[0]))
-    else:
-        return ([target_type] + tail
-                for target_type in _get_type_and_subtypes_list(target_types[0])
-                for tail in _create_type_combinations(target_types[1:]))
-
-
-def _prepare_example_projects(target_types: List, boa: BOA, metadata_path: str):
-    data = []
-    for type_combination in _create_type_combinations(target_types):
-        projects = boa.query_projects_with_type_usages(target_types, type_combination)
-        for project in projects:
-            checkout = project.get_checkout(CHECKOUTS_PATH)
-            if not checkout.exists():
-                try:
-                    logger.info("  Checking out %r...", str(project))
-                    checkout.clone()
-                except CommandFailedError as error:
-                    logger.warning("    Checkout failed: %r", error)
-                    checkout.delete()
-                    continue
-            else:
-                logger.info("  Already checked out %r.", str(project))
-
-            try:
-                project_entry = {"id": project.id, "url": project.repository_url,
-                                 "path": os.path.relpath(checkout.path, MUBENCH_ROOT_PATH),
-                                 "source_paths": Project(checkout.path).get_sources_paths(),
-                                 "checkout_timestamp": run_timestamp}
-                write_yaml(project_entry)  # check for encoding problems
-                data.append(project_entry)
-            except UnicodeEncodeError:
-                logger.warning("    Illegal characters in project data.")
-
-            if len(data) >= MAX_PROJECT_SAMPLE_SIZE:
-                logger.warning("  Stopping after %r of %r example projects.", MAX_PROJECT_SAMPLE_SIZE, len(projects))
-                write_yamls(data, metadata_path)
-                return
-
-    write_yamls(data, metadata_path)
-
-
-logger = logging.getLogger()
-logger.setLevel(logging.DEBUG)
-handler = logging.StreamHandler()
-handler.setFormatter(IndentFormatter("%(asctime)s %(indent)s%(message)s"))
-handler.setLevel(logging.INFO)
-logger.addHandler(handler)
-LOG_DIR = "logs"
-if not exists(LOG_DIR):
-    os.makedirs(LOG_DIR)
-log_name = datetime.now().strftime("prepare_ex4_%Y%m%d_%H%M%S") + ".log"
-handler = logging.FileHandler(join(LOG_DIR, log_name))
-handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
-handler.setLevel(logging.DEBUG)
-logger.addHandler(handler)
-
-
-with open(INDEX_PATH) as index:
-    boa = BOA(username, password, BOA_RESULTS_PATH)
-    for row in csv.reader(index, delimiter="\t"):
-        # skip blank lines, e.g., on trailing newline
-        if not row:
-            continue
-
-        project_id = row[0]
-        version_id = row[1]
-        target_types = sorted(row[6:])
-        try:
-            target_example_file = os.path.join(XP_CHECKOUTS_PATH, "-".join(sorted(target_types)) + ".yml")
-            if not exists(target_example_file):
-                logger.info("Preparing examples for %s.%s (type(s): %s)...", project_id, version_id, target_types)
-                _prepare_example_projects(target_types, boa, target_example_file)
-            elif is_empty(target_example_file):
-                logger.info("No example projects for %s.%s (type(s): %s)", project_id, version_id, target_types)
-            else:
-                logger.info("Already prepared examples for %s.%s (type(s): %s)", project_id, version_id, target_types)
-        except Exception as error:
-            logger.exception("failed", exc_info=error)
diff --git a/mubench.pipeline/tasks/configurations/configurations.py b/mubench.pipeline/tasks/configurations/configurations.py
index 185ddcc92..61d3fc9fe 100644
--- a/mubench.pipeline/tasks/configurations/configurations.py
+++ b/mubench.pipeline/tasks/configurations/configurations.py
@@ -1,5 +1,6 @@
 from typing import List
 
+from tasks.implementations.crossproject_create_index import CrossProjectCreateIndexTask
 from tasks.implementations import stats
 from tasks.implementations.checkout import CheckoutTask
 from tasks.implementations.collect_misuses import CollectMisusesTask
@@ -7,6 +8,9 @@
 from tasks.implementations.collect_versions import CollectVersionsTask
 from tasks.implementations.compile_misuse import CompileMisuseTask
 from tasks.implementations.compile_version import CompileVersionTask
+from tasks.implementations.crossproject_create_project_list import CrossProjectCreateProjectListTask
+from tasks.implementations.crossproject_prepare import CrossProjectPrepareTask
+from tasks.implementations.crossproject_read_index import CrossProjectReadIndexTask, CrossProjectSkipReadIndexTask
 from tasks.implementations.dataset_check_misuse import MisuseCheckTask
 from tasks.implementations.dataset_check_project import ProjectCheckTask
 from tasks.implementations.dataset_check_version import VersionCheckTask
@@ -18,6 +22,7 @@
 from tasks.implementations.load_detector import LoadDetectorTask
 from tasks.implementations.publish_findings import PublishFindingsTask
 from tasks.implementations.publish_metadata import PublishMetadataTask
+from tasks.task_runner import TaskRunner
 from utils.dataset_util import get_available_datasets
 
 
@@ -103,9 +108,10 @@ def tasks(self, config) -> List:
                                          config.java_options)
         detect = DetectProvidedCorrectUsagesTask(config.findings_path, config.force_detect, config.timeout,
                                                  config.run_timestamp)
-        return [load_detector] + CheckoutTaskConfiguration().tasks(config) + [compile_version, collect_misuses,
-                                                                              filter_misuses_without_correct_usages,
-                                                                              compile_misuse, detect]
+
+        # noinspection PyTypeChecker
+        return [load_detector] + CheckoutTaskConfiguration().tasks(config) + \
+               [compile_version, collect_misuses, filter_misuses_without_correct_usages, compile_misuse] + [detect]
 
 
 class PublishProvidedPatternsExperiment(TaskConfiguration):
@@ -132,8 +138,18 @@ def tasks(self, config) -> List:
                                              config.force_compile, config.use_tmp_wrkdir)
         load_detector = LoadDetectorTask(config.detectors_path, config.detector, config.requested_release,
                                          config.java_options)
+
+        create_index = TaskRunner([CollectMisusesTask(), CrossProjectCreateIndexTask(config.xp_index_file)])
+        read_index = CrossProjectReadIndexTask(config.xp_index_file) if config.with_xp \
+            else CrossProjectSkipReadIndexTask()
+        prepare_cross_project = [create_index, read_index,
+                                 CrossProjectPrepareTask(config.root_path, config.xp_checkouts_path,
+                                                         config.run_timestamp,
+                                                         config.max_project_sample_size, config.boa_user,
+                                                         config.boa_password)]
+
         detect = DetectAllFindingsTask(config.findings_path, config.force_detect, config.timeout, config.run_timestamp)
-        return [load_detector] + CheckoutTaskConfiguration().tasks(config) + [compile_version, detect]
+        return [load_detector] + CheckoutTaskConfiguration().tasks(config) + prepare_cross_project + [compile_version, detect]
 
 
 class PublishAllFindingsExperiment(TaskConfiguration):
@@ -161,7 +177,36 @@ def tasks(self, config) -> List:
         load_detector = LoadDetectorTask(config.detectors_path, config.detector, config.requested_release,
                                          config.java_options)
         detect = DetectAllFindingsTask(config.findings_path, config.force_detect, config.timeout, config.run_timestamp)
-        return [load_detector] + CheckoutTaskConfiguration().tasks(config) + [compile_version, detect]
+
+        create_index = TaskRunner([CollectMisusesTask(), CrossProjectCreateIndexTask(config.xp_index_file)])
+        read_index = CrossProjectReadIndexTask(config.xp_index_file) if config.with_xp \
+            else CrossProjectSkipReadIndexTask()
+        prepare_cross_project = [create_index, read_index,
+                                 CrossProjectPrepareTask(config.root_path, config.xp_checkouts_path,
+                                                         config.run_timestamp,
+                                                         config.max_project_sample_size, config.boa_user,
+                                                         config.boa_password)]
+
+        # noinspection PyTypeChecker
+        return [load_detector] + CheckoutTaskConfiguration().tasks(config) + [compile_version] + \
+            prepare_cross_project + [detect]
+
+
+class RunCrossProjectPrepare(TaskConfiguration):
+    @staticmethod
+    def mode() -> str:
+        return "checkout-xp"
+
+    def tasks(self, config) -> List:
+        create_index_tasks = [CollectProjectsTask(config.data_path), CollectVersionsTask(config.development_mode),
+                              CollectMisusesTask(), CrossProjectCreateIndexTask(config.xp_index_file)]
+        create_index = TaskRunner(create_index_tasks)
+        # noinspection PyTypeChecker
+        return [create_index,
+                CrossProjectReadIndexTask(config.xp_index_file),
+                CrossProjectPrepareTask(config.root_path, config.xp_checkouts_path, config.run_timestamp, config.max_project_sample_size, config.boa_user,
+                                        config.boa_password),
+                CrossProjectCreateProjectListTask(config.root_path, config.xp_index_file, config.xp_checkouts_path)]
 
 
 class PublishBenchmarkExperiment(TaskConfiguration):
diff --git a/mubench.pipeline/tasks/implementations/crossproject_create_index.py b/mubench.pipeline/tasks/implementations/crossproject_create_index.py
new file mode 100644
index 000000000..8b1098265
--- /dev/null
+++ b/mubench.pipeline/tasks/implementations/crossproject_create_index.py
@@ -0,0 +1,20 @@
+import os
+
+from data.misuse import Misuse
+from data.project import Project
+from data.project_version import ProjectVersion
+from utils.io import safe_open
+
+
+class CrossProjectCreateIndexTask:
+    def __init__(self, index_file: str):
+        self.index_file = index_file
+
+        if os.path.exists(index_file):
+            os.remove(index_file)
+
+    def run(self, project: Project, version: ProjectVersion, misuse: Misuse):
+        print("{}\t{}\t{}\t{}\t{}\t{}\t{}".format(project.id, version.version_id, misuse.misuse_id,
+                                                  ':'.join(version.source_dirs),
+                                                  misuse.location.file, misuse.location.method,
+                                                  "\t".join(misuse.apis)), file=safe_open(self.index_file + '-' + version.id, "a"))
diff --git a/mubench.pipeline/tasks/implementations/crossproject_create_project_list.py b/mubench.pipeline/tasks/implementations/crossproject_create_project_list.py
new file mode 100644
index 000000000..0d0ceae8b
--- /dev/null
+++ b/mubench.pipeline/tasks/implementations/crossproject_create_project_list.py
@@ -0,0 +1,39 @@
+import csv
+import logging
+import os
+from os.path import join
+
+from utils.io import open_yamls_if_exists, write_yaml
+from utils.shell import Shell
+
+
+class CrossProjectCreateProjectListTask:
+    def __init__(self, root_path: str, index_file: str, base_checkout_path: str):
+        self.root_path = root_path
+        self.index_file = index_file
+        self.base_checkout_path = base_checkout_path
+
+    def run(self):
+        logger = logging.getLogger("tasks.cross_project_create_project_list")
+        example_projects_by_API = {}
+        with open(self.index_file) as index:
+            for row in csv.reader(index, delimiter="\t"):
+                # skip blank lines, e.g., on trailing newline
+                if not row:
+                    continue
+
+                target_type = row[6]
+                try:
+                    if target_type not in example_projects_by_API:
+                        logger.info("Preparing examples for type: %s...", target_type)
+                        target_example_file = os.path.join(self.base_checkout_path, target_type + ".yml")
+                        example_projects = {}
+                        with open_yamls_if_exists(target_example_file) as projects:
+                            for project in projects:
+                                hash = Shell.exec("cd \"{}\"; git rev-parse HEAD".format(join(self.root_path, project["path"])))
+                                example_projects[project["url"]] = hash.strip()
+                        example_projects_by_API[target_type] = example_projects
+                except Exception as error:
+                    logger.exception("failed", exc_info=error)
+
+        write_yaml(example_projects_by_API, join(self.base_checkout_path, "example_projects_by_API.yml"))
diff --git a/mubench.pipeline/tasks/implementations/crossproject_prepare.py b/mubench.pipeline/tasks/implementations/crossproject_prepare.py
new file mode 100644
index 000000000..604c8c289
--- /dev/null
+++ b/mubench.pipeline/tasks/implementations/crossproject_prepare.py
@@ -0,0 +1,120 @@
+import csv
+import logging
+import os
+from os.path import exists, join
+from typing import List
+
+from boa.BOA import BOA
+from buildtools.maven import Project
+from tasks.implementations.crossproject_read_index import CrossProjectMisuseApis
+from utils.io import write_yamls, write_yaml, is_empty
+from utils.shell import CommandFailedError
+
+
+class CrossProjectSourcesPaths:
+    def __init__(self, paths: List[str]):
+        self.__paths = paths
+
+    def get(self):
+        return list(self.__paths)
+
+
+class CrossProjectPrepareTask:
+    def __init__(self, root_path: str, checkouts_base_path: str, timestamp: int,
+                 max_project_sample_size: int, boa_user: str, boa_password: str):
+        self.root_path = root_path
+        self.checkouts_base_path = checkouts_base_path
+        self.project_checkouts_path = join(checkouts_base_path, "checkouts")
+        self.boa_results_path = join(checkouts_base_path, "boa-results")
+        self.subtypes_path = os.path.join(checkouts_base_path, "subtypes.csv")
+        self.timestamp = timestamp
+        self.max_project_sample_size = max_project_sample_size
+        self.boa_user = boa_user
+        self.boa_password = boa_password
+
+        self._subtypes = {}
+
+    def run(self, apis: CrossProjectMisuseApis):
+        logger = logging.getLogger("tasks.cross_project_prepare")
+        sources_paths = []
+
+        boa = BOA(self.boa_user, self.boa_password, self.boa_results_path)
+        for api in apis.get():
+            project_id = api.project_id
+            version_id = api.version_id
+            target_types = api.target_types
+            try:
+                target_example_file = os.path.join(self.project_checkouts_path, "-".join(sorted(target_types)) + ".yml")
+                if not exists(target_example_file):
+                    logger.info("Preparing examples for %s.%s (type(s): %s)...", project_id, version_id,
+                                target_types)
+                    sources_paths.extend(self._prepare_example_projects(target_types, boa, target_example_file))
+                elif is_empty(target_example_file):
+                    logger.info("No example projects for %s.%s (type(s): %s)", project_id, version_id, target_types)
+                else:
+                    logger.info("Already prepared examples for %s.%s (type(s): %s)", project_id, version_id,
+                                target_types)
+            except Exception as error:
+                logger.exception("failed", exc_info=error)
+
+        return CrossProjectSourcesPaths(sources_paths)
+
+    def _prepare_example_projects(self, target_types: List, boa: BOA, metadata_path: str) -> List[str]:
+        logger = logging.getLogger("tasks.cross_project_prepare")
+        data = []
+        sources_paths = []
+        for type_combination in self._create_type_combinations(target_types):
+            projects = boa.query_projects_with_type_usages(target_types, type_combination)
+            for project in projects:
+                checkout = project.get_checkout(self.checkouts_base_path)
+                if not checkout.exists():
+                    try:
+                        logger.info("  Checking out %r...", str(project))
+                        checkout.clone()
+                    except CommandFailedError as error:
+                        logger.warning("    Checkout failed: %r", error)
+                        checkout.delete()
+                        continue
+                else:
+                    logger.info("  Already checked out %r.", str(project))
+
+                try:
+                    project_entry = {"id": project.id, "url": project.repository_url,
+                                     "path": os.path.relpath(checkout.path, self.root_path),
+                                     "source_paths": Project(checkout.path).get_sources_paths(),
+                                     "checkout_timestamp": self.timestamp}
+                    write_yaml(project_entry)  # check for encoding problems
+                    data.append(project_entry)
+                    sources_paths.extend(Project(checkout.path).get_sources_paths())
+                except UnicodeEncodeError:
+                    logger.warning("    Illegal characters in project data.")
+
+                if len(data) >= self.max_project_sample_size:
+                    logger.warning("  Stopping after %r of %r example projects.", self.max_project_sample_size,
+                                   len(projects))
+                    write_yamls(data, metadata_path)
+                    return sources_paths
+
+        write_yamls(data, metadata_path)
+        return sources_paths
+
+    def _get_subtypes(self, target_type):
+        if not self._subtypes and exists(self.subtypes_path):
+            with open(self.subtypes_path) as subtypes_file:
+                for subtypes_row in csv.reader(subtypes_file, delimiter="\t"):
+                    self._subtypes[subtypes_row[0]] = subtypes_row[1:]
+
+        all_subtypes = self._subtypes.get(target_type, [])
+        subtypes_sample = [subtype for subtype in all_subtypes if "sun." not in subtype]  # filter Sun-specific types
+        return subtypes_sample
+
+    def _get_type_and_subtypes_list(self, target_type):
+        return [target_type] + self._get_subtypes(target_type)
+
+    def _create_type_combinations(self, target_types: List):
+        if len(target_types) == 1:
+            return ([type_] for type_ in self._get_type_and_subtypes_list(target_types[0]))
+        else:
+            return ([target_type] + tail
+                    for target_type in self._get_type_and_subtypes_list(target_types[0])
+                    for tail in self._create_type_combinations(target_types[1:]))
diff --git a/mubench.pipeline/tasks/implementations/crossproject_read_index.py b/mubench.pipeline/tasks/implementations/crossproject_read_index.py
new file mode 100644
index 000000000..d00d915c0
--- /dev/null
+++ b/mubench.pipeline/tasks/implementations/crossproject_read_index.py
@@ -0,0 +1,41 @@
+import csv
+from typing import List
+
+from data.project_version import ProjectVersion
+
+
+class CrossProjectMisuseApi:
+    def __init__(self, row):
+        self.project_id = row[0]
+        self.version_id = row[1]
+        self.misuse_id = row[2]
+        self.target_types = sorted(row[6:])
+
+
+class CrossProjectMisuseApis:
+    def __init__(self, apis: List[CrossProjectMisuseApi]):
+        self.__apis = apis
+
+    def get(self):
+        return list(self.__apis)
+
+
+class CrossProjectReadIndexTask:
+    def __init__(self, index_file: str):
+        self.index = index_file
+
+    def run(self, version: ProjectVersion):
+        apis = []
+
+        with open(self.index + '-' + version.id) as index_file:
+            for row in csv.reader(index_file, delimiter="\t"):
+                # skip blank lines, e.g., on trailing newline
+                if row:
+                    apis.append(CrossProjectMisuseApi(row))
+
+        return CrossProjectMisuseApis(apis)
+
+
+class CrossProjectSkipReadIndexTask:
+    def run(self):
+        return CrossProjectMisuseApis([])
diff --git a/mubench.pipeline/tasks/implementations/detect_all_findings.py b/mubench.pipeline/tasks/implementations/detect_all_findings.py
index de4ce5bd0..aaa3e73e4 100644
--- a/mubench.pipeline/tasks/implementations/detect_all_findings.py
+++ b/mubench.pipeline/tasks/implementations/detect_all_findings.py
@@ -1,13 +1,14 @@
 import logging
 from os.path import join
-from typing import Optional
+from typing import Optional, List
 
 from data.detector import Detector
 from data.detector_run import DetectorRun
 from data.project_version import ProjectVersion
 from data.version_compile import VersionCompile
 from tasks.configurations.detector_interface_configuration import key_detector_mode, \
-    key_target_src_paths, key_target_classes_paths, key_dependency_classpath
+    key_target_src_paths, key_target_classes_paths, key_dependency_classpath, key_training_src_path
+from tasks.implementations.crossproject_prepare import CrossProjectSourcesPaths
 
 
 class DetectAllFindingsTask:
@@ -20,24 +21,29 @@ def __init__(self, findings_base_path: str, force_detect: bool, timeout: Optiona
         self.timeout = timeout
         self.current_timestamp = current_timestamp
 
-    def run(self, detector: Detector, version: ProjectVersion, version_compile: VersionCompile):
-        run = DetectorRun(detector, version, self._get_findings_path(detector, version))
+    def run(self, detector: Detector, version: ProjectVersion, version_compile: VersionCompile,
+            xp_sources_paths: CrossProjectSourcesPaths):
+        run = self._get_detector_run(detector, version)
 
-        run.ensure_executed(self._get_detector_arguments(version_compile),
+        run.ensure_executed(self._get_detector_arguments(version_compile, xp_sources_paths.get()),
                             self.timeout, self.force_detect, self.current_timestamp, version_compile.timestamp,
                             logging.getLogger("task.detect"))
 
         return run
 
+    def _get_detector_run(self, detector, version):
+        return DetectorRun(detector, version, self._get_findings_path(detector, version))
+
     def _get_findings_path(self, detector: Detector, version: ProjectVersion):
         return join(self.findings_base_path, DetectAllFindingsTask.__RUN_MODE_NAME, detector.id,
                     version.project_id, version.version_id)
 
     @staticmethod
-    def _get_detector_arguments(version_compile: VersionCompile):
-        return {
-            key_detector_mode: DetectAllFindingsTask.__DETECTOR_MODE,
-            key_target_src_paths: version_compile.original_sources_paths,
-            key_target_classes_paths: version_compile.original_classes_paths,
-            key_dependency_classpath: version_compile.get_full_classpath()
-        }
+    def _get_detector_arguments(version_compile: VersionCompile, xp_sources_paths: List[str]):
+        detector_args = {key_detector_mode: DetectAllFindingsTask.__DETECTOR_MODE,
+                         key_target_src_paths: version_compile.original_sources_paths,
+                         key_target_classes_paths: version_compile.original_classes_paths,
+                         key_dependency_classpath: version_compile.get_full_classpath()}
+        if xp_sources_paths:
+            detector_args[key_training_src_path] = xp_sources_paths
+        return detector_args
diff --git a/mubench.pipeline/tasks/implementations/detect_provided_correct_usages.py b/mubench.pipeline/tasks/implementations/detect_provided_correct_usages.py
index ee72a4b79..8bb87b0dc 100644
--- a/mubench.pipeline/tasks/implementations/detect_provided_correct_usages.py
+++ b/mubench.pipeline/tasks/implementations/detect_provided_correct_usages.py
@@ -1,6 +1,6 @@
 import logging
 from os.path import join
-from typing import Optional
+from typing import Optional, List
 
 from data.detector import Detector
 from data.detector_run import DetectorRun
@@ -9,7 +9,9 @@
 from data.project_version import ProjectVersion
 from data.version_compile import VersionCompile
 from tasks.configurations.detector_interface_configuration import key_detector_mode, \
-    key_training_src_path, key_training_classes_path, key_target_src_paths, key_target_classes_paths, key_dependency_classpath
+    key_training_src_path, key_training_classes_path, key_target_src_paths, key_target_classes_paths, \
+    key_dependency_classpath
+from tasks.implementations.crossproject_prepare import CrossProjectSourcesPaths
 
 
 class DetectProvidedCorrectUsagesTask:
@@ -24,14 +26,19 @@ def __init__(self, findings_base_path: str, force_detect: bool, timeout: Optiona
 
     def run(self, detector: Detector, version: ProjectVersion, version_compile: VersionCompile, misuse: Misuse,
             misuse_compile: MisuseCompile):
-        run = DetectorRun(detector, version, self._get_findings_path(detector, version, misuse))
+        run = self._get_detector_run(detector, misuse, version)
 
-        run.ensure_executed(self._get_detector_arguments(version_compile, misuse_compile),
+        detector_arguments = self._get_detector_arguments(version_compile, misuse_compile)
+
+        run.ensure_executed(detector_arguments,
                             self.timeout, self.force_detect, self.current_timestamp, misuse_compile.timestamp,
                             logging.getLogger("task.detect"))
 
         return run
 
+    def _get_detector_run(self, detector, misuse, version):
+        return DetectorRun(detector, version, self._get_findings_path(detector, version, misuse))
+
     def _get_findings_path(self, detector: Detector, version: ProjectVersion, misuse: Misuse):
         return join(self.findings_base_path, DetectProvidedCorrectUsagesTask.__RUN_MODE_NAME, detector.id,
                     version.project_id, version.version_id, misuse.misuse_id)
diff --git a/mubench.pipeline/tasks/task_runner.py b/mubench.pipeline/tasks/task_runner.py
index e772a0b88..7ca292eb0 100644
--- a/mubench.pipeline/tasks/task_runner.py
+++ b/mubench.pipeline/tasks/task_runner.py
@@ -13,15 +13,18 @@ class TaskRunner:
     def __init__(self, tasks: List):
         self.tasks = tasks
         self.logger = logging.getLogger("task_runner")
+        self.__accumulated_result = None
 
     def run(self, *initial_parameters: Tuple[Any]):
         if not self.tasks:
             return
 
+        self.__accumulated_result = None
         self.__run(0, list(initial_parameters))
         for task in self.tasks:
             if callable(getattr(task, 'end', None)):
                 task.end()
+        return self.__accumulated_result
 
     def __run(self, current_task_index: int, previous_results: List):
         task = self.tasks[current_task_index]
@@ -39,6 +42,14 @@ def __run(self, current_task_index: int, previous_results: List):
             logger.debug("Full exception:", exc_info=True)
             return
 
+        is_leaf_task = current_task_index == len(self.tasks) - 1
+        is_accumulable_result = hasattr(results, '__add__')
+        if is_leaf_task and is_accumulable_result:
+            if self.__accumulated_result is None:
+                self.__accumulated_result = results
+            else:
+                self.__accumulated_result += results
+
         if results is None:
             results = [Continue()]
 
diff --git a/mubench.pipeline/tests/tasks/implementations/test_detect_all_findings.py b/mubench.pipeline/tests/tasks/implementations/test_detect_all_findings.py
new file mode 100644
index 000000000..59583cbe2
--- /dev/null
+++ b/mubench.pipeline/tests/tasks/implementations/test_detect_all_findings.py
@@ -0,0 +1,30 @@
+from unittest.mock import patch, MagicMock
+
+from nose.tools import assert_equals
+
+from tasks.configurations.detector_interface_configuration import key_training_src_path
+from tasks.implementations.crossproject_prepare import CrossProjectSourcesPaths
+from tasks.implementations.detect_all_findings import DetectAllFindingsTask
+from tests.data.stub_detector import StubDetector
+from tests.test_utils.data_util import create_version
+
+
+@patch("tasks.implementations.detect_all_findings.DetectAllFindingsTask._get_detector_run")
+class TestDetectAllFindingsTask:
+    def setup(self):
+        self.detector = StubDetector()
+        self.version = create_version("-version-", meta={})
+        self.version_compile = self.version.get_compile("-compile-")
+
+    def test_adds_xp_training_sources(self, get_detector_run_mock):
+        detector_run_mock = MagicMock()
+        get_detector_run_mock.return_value = detector_run_mock
+        xp_sources_paths = CrossProjectSourcesPaths(["xp_sources1", "xp_sources2"])
+        uut = DetectAllFindingsTask("-findings-", False, None, -1)
+
+        uut.run(self.detector, self.version, self.version_compile, xp_sources_paths)
+
+        assert_equals(1, detector_run_mock.ensure_executed.call_count)
+        actual_ensure_executed_args = detector_run_mock.ensure_executed.call_args[0]
+        actual_detector_args = actual_ensure_executed_args[0]
+        assert_equals(["xp_sources1", "xp_sources2"], actual_detector_args[key_training_src_path])
diff --git a/mubench.pipeline/tests/tasks/test_task_runner.py b/mubench.pipeline/tests/tasks/test_task_runner.py
index 2ef2dd04b..17bc97564 100644
--- a/mubench.pipeline/tests/tasks/test_task_runner.py
+++ b/mubench.pipeline/tests/tasks/test_task_runner.py
@@ -64,13 +64,13 @@ def test_runs_subsequent_task_with_results_of_previous_tasks_in_any_order(self):
         third_task.assert_called_once_with(42, ":some string:")
 
     def test_runs_subsequent_task_with_generic_result_of_previous_task(self):
-        first_task = VoidTask([[1,2]])
+        first_task = VoidTask([[1, 2]])
         second_task = ListConsumingTask()
         uut = TaskRunner([first_task, second_task])
 
         uut.run()
 
-        second_task.assert_called_once_with([1,2])
+        second_task.assert_called_once_with([1, 2])
 
     def test_reports_if_a_task_requires_an_unavailable_parameter(self):
         first_task = VoidTask([42])
@@ -186,10 +186,28 @@ def test_handles_empty_tasks(self):
         uut = TaskRunner([])
         uut.run()
 
+    def test_does_not_attempt_to_accumulate_non_accumulable_results(self):
+        branch_three_times = VoidTask(['-some string-', '-some string-', '-some string-'])
+        return_string = VoidTask(object())
+        uut = TaskRunner([branch_three_times, return_string])
+
+        result = uut.run()
+
+        assert_equals(None, result)
+
+    def test_returns_accumulated_results_of_last_task(self):
+        branch_three_times = VoidTask(['-some string-', '-some string-', '-some string-'])
+        return_string = VoidTask(42)
+        uut = TaskRunner([branch_three_times, return_string])
+
+        result = uut.run()
+
+        assert_equals(126, result)
+
 
 class MockTask:
-    def __init__(self, results: List = None):
-        self.results = results or []
+    def __init__(self, results: Any = None):
+        self.results = results
         self.calls = []
 
     def assert_called_once_with(self, *args):
@@ -240,7 +258,7 @@ def run(self, i: int, j: int):
 
 
 class FailingTask(MockTask):
-    def __init__(self, message: str = "", results = None):
+    def __init__(self, message: str = "", results=None):
         super().__init__(results)
         self.message = message
 
@@ -250,7 +268,7 @@ def run(self):
 
 
 class FailingStringConsumingTask(MockTask):
-    def __init__(self, message: str = "", results = None):
+    def __init__(self, message: str = "", results=None):
         super().__init__(results)
         self.message = message
 
diff --git a/mubench.pipeline/tests/utils/test_config_util.py b/mubench.pipeline/tests/utils/test_config_util.py
index 89a0b8a29..5b3784e76 100644
--- a/mubench.pipeline/tests/utils/test_config_util.py
+++ b/mubench.pipeline/tests/utils/test_config_util.py
@@ -1,4 +1,5 @@
 import sys
+from unittest.mock import patch
 
 from nose.tools import assert_raises, assert_equals, nottest
 
@@ -179,3 +180,19 @@ def test_allow_zero_limit():
 def test_fails_on_negative_limit():
     parser = _get_command_line_parser(['DemoDetector'], [], [])
     assert_raises(SystemExit, parser.parse_args, ['publish', 'ex2', 'DemoDetector', '-s', 'site', '--limit', '-1'])
+
+
+def test_run_with_xp():
+    parser = _get_command_line_parser(['DemoDetector'], [], [])
+    result = parser.parse_args(['run', 'ex2', 'DemoDetector', '--with-xp', '-bp', 'aaa', '-bu', 'bbb'])
+    assert_equals(True, result.with_xp)
+    assert_equals('aaa', result.boa_password)
+    assert_equals('bbb', result.boa_user)
+
+
+@patch("utils.config_util.sys")
+def test_requires_boa_credentials_on_with_xp(sys_mock):
+    args = ['run', 'ex1', 'DemoDetector', '--with-xp']
+    sys_mock.argv = args
+    parser = _get_command_line_parser(['DemoDetector'], [], [])
+    assert_raises(SystemExit, parser.parse_args, args)
diff --git a/mubench.pipeline/utils/config_util.py b/mubench.pipeline/utils/config_util.py
index c0227c326..5ffb2b9e3 100644
--- a/mubench.pipeline/utils/config_util.py
+++ b/mubench.pipeline/utils/config_util.py
@@ -5,6 +5,8 @@
 from os.path import join, abspath, dirname
 from typing import List, Any
 
+import sys
+
 from data.detector import get_available_detector_ids, Detector
 from tasks.implementations import stats
 from utils.dataset_util import get_available_dataset_ids
@@ -17,6 +19,8 @@
 __FINDINGS_PATH = join(MUBENCH_ROOT_PATH, "findings")
 __DATASETS_FILE_PATH = join(MUBENCH_ROOT_PATH, 'data', 'datasets.yml')
 __DETECTORS_PATH = join(MUBENCH_ROOT_PATH, "detectors")
+__XP_CHECKOUTS_PATH = join(MUBENCH_ROOT_PATH, "checkouts-xp")
+__XP_INDEX_FILE = join(__XP_CHECKOUTS_PATH, "index")
 
 
 class SortingHelpFormatter(HelpFormatter):
@@ -68,6 +72,8 @@ def _get_command_line_parser(available_detectors: List[str], available_scripts:
 
     subparsers.required = True
 
+    parser.add_argument('--root-path', dest='root_path', default=__get_default('root-path', MUBENCH_ROOT_PATH),
+                        help=argparse.SUPPRESS)
     parser.add_argument('--use-tmp-wrkdir', dest='use_tmp_wrkdir', default=__get_default('use-tmp-wrkdir', False),
                         help=argparse.SUPPRESS, action='store_true')
     parser.add_argument('--data-path', dest='data_path', default=__get_default('data-path', __DATA_PATH),
@@ -84,6 +90,12 @@ def _get_command_line_parser(available_detectors: List[str], available_scripts:
                         default=__get_default('detectors-path', __DETECTORS_PATH), help=argparse.SUPPRESS)
     parser.add_argument('--development-mode', dest='development_mode', default=__get_default('development-mode', False),
                         help=argparse.SUPPRESS, action='store_true')
+    parser.add_argument('--xp-checkouts-path', dest='xp_checkouts_path',
+                        default=__get_default('xp-checkouts-path', __XP_CHECKOUTS_PATH), help=argparse.SUPPRESS)
+    parser.add_argument('--xp-index-file', dest='xp_index_file',
+                        default=__get_default('xp-index-file', __XP_INDEX_FILE), help=argparse.SUPPRESS)
+    parser.add_argument('--max-project-sample-size', dest='max_project_sample_size',
+                        default=__get_default('max-project-sample-size', 50), help=argparse.SUPPRESS)
 
     __add_check_subprocess(available_datasets, subparsers)
     __add_info_subprocess(available_datasets, subparsers)
@@ -92,6 +104,7 @@ def _get_command_line_parser(available_detectors: List[str], available_scripts:
     __add_run_subprocess(available_detectors, available_datasets, subparsers)
     __add_publish_subprocess(available_detectors, available_datasets, subparsers)
     __add_stats_subprocess(available_scripts, available_datasets, subparsers)
+    __add_checkout_cross_project_subprocess(available_datasets, subparsers)
 
     # Add subprocesses provided by the ./mubench script
     __add_browse_subprocess(subparsers)
@@ -226,6 +239,7 @@ def __add_run_ex2_subprocess(available_detectors: List[str], available_datasets:
     __setup_compile_arguments(experiment_parser)
     __setup_run_arguments(experiment_parser, available_detectors)
     __setup_publish_precision_arguments(experiment_parser)
+    __setup_cross_project_arguments(experiment_parser)
 
 
 def __add_run_ex3_subprocess(available_detectors: List[str], available_datasets: List[str], subparsers) -> None:
@@ -239,6 +253,22 @@ def __add_run_ex3_subprocess(available_detectors: List[str], available_datasets:
     __setup_checkout_arguments(experiment_parser)
     __setup_compile_arguments(experiment_parser)
     __setup_run_arguments(experiment_parser, available_detectors)
+    __setup_cross_project_arguments(experiment_parser)
+
+
+def __add_checkout_cross_project_subprocess(available_datasets: List[str], subparsers) -> None:
+    parser = subparsers.add_parser("checkout-xp", formatter_class=SortingHelpFormatter,
+                                   help="TODO",
+                                   description="TODO")
+
+    __setup_filter_arguments(parser, available_datasets)
+
+    boa_user = __get_default('boa-user', None)
+    boa_password = __get_default('boa-password', None)
+    parser.add_argument("-bu", "--boa-user", metavar="BOAUSER", required=not boa_user,
+                        default=boa_user, help="Your boa username.")
+    parser.add_argument("-bp", "--boa-password", metavar="BOAPASSWORD", required=not boa_password,
+                        default=boa_password, help="Your boa password.")
 
 
 def __add_publish_subprocess(available_detectors: List[str], available_datasets: List[str], subparsers) -> None:
@@ -306,6 +336,7 @@ def __add_publish_ex2_subprocess(available_detectors: List[str], available_datas
     __setup_run_arguments(experiment_parser, available_detectors)
     __setup_publish_arguments(experiment_parser)
     __setup_publish_precision_arguments(experiment_parser)
+    __setup_cross_project_arguments(experiment_parser)
 
 
 def __add_publish_ex3_subprocess(available_detectors: List[str], available_datasets: List[str],
@@ -325,6 +356,7 @@ def __add_publish_ex3_subprocess(available_detectors: List[str], available_datas
     __setup_compile_arguments(experiment_parser)
     __setup_run_arguments(experiment_parser, available_detectors)
     __setup_publish_arguments(experiment_parser)
+    __setup_cross_project_arguments(experiment_parser)
 
 
 def __setup_filter_arguments(parser: ArgumentParser, available_datasets: List[str]) -> None:
@@ -398,5 +430,19 @@ def upload_limit(x):
                                            "Use `--limit 0` to publish only run stats.".format(default_limit))
 
 
+def __setup_cross_project_arguments(parser: ArgumentParser) -> None:
+    parser.add_argument('--with-xp', dest='with_xp', action='store_true', default=__get_default('with-xp', False),
+                        help="use sampled projects with usages for learning.")
+
+    boa_user = __get_default('boa-user', None)
+    boa_password = __get_default('boa-password', None)
+    parser.add_argument("-bu", "--boa-user", metavar="BOAUSER",
+                        required='--with-xp' in sys.argv and not boa_user,
+                        default=boa_user, help="Your boa username.")
+    parser.add_argument("-bp", "--boa-password", metavar="BOAPASSWORD",
+                        required='--with-xp' in sys.argv and not boa_password,
+                        default=boa_password, help="Your boa password.")
+
+
 def __add_browse_subprocess(subparsers) -> None:
     subparsers.add_parser('browse', help="Open a Linux shell in a container mounting the MUBench Docker Volumes.")