diff --git a/.github/dependabot.yml b/.github/dependabot.yml
index ea9349e302..6c3cd5c0e0 100644
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -11,6 +11,6 @@ updates:
       interval: "weekly"
 
   - package-ecosystem: docker
-    directory: /heat/core/tests
+    directory: /tests
     schedule:
       interval: "weekly"
diff --git a/.github/rd-release-config.yml b/.github/rd-release-config.yml
index a45fa74a14..59953506f1 100644
--- a/.github/rd-release-config.yml
+++ b/.github/rd-release-config.yml
@@ -116,130 +116,130 @@ autolabeler:
       - '/Support.+/'
   - label: 'classification'
     files:
-      - 'heat/classification/**/*'
+      - 'src/heat/classification/**/*'
   - label: 'cluster'
     files:
-      - 'heat/cluster/**/*'
+      - 'src/heat/cluster/**/*'
   - label: 'core'
     files:
-      - 'heat/core/**/*'
+      - 'src/heat/core/**/*'
   - label: 'datasets'
     files:
-      - 'heat/datasets/**/*'
+      - 'src/heat/datasets/**/*'
   - label: 'decomposition'
     files:
-      - 'heat/decomposition/**/*'
+      - 'src/heat/decomposition/**/*'
   - label: 'fft'
     files:
-      - 'heat/fft/**/*'
+      - 'src/heat/fft/**/*'
   - label: 'graph'
     files:
-      - 'heat/graph/**/*'
+      - 'src/heat/graph/**/*'
   - label: 'naive bayes'
     files:
-      - 'heat/naive_bayes/**/*'
+      - 'src/heat/naive_bayes/**/*'
   - label: 'nn'
     files:
-      - 'heat/nn/**/*'
+      - 'src/heat/nn/**/*'
   - label: 'optim'
     files:
-      - 'heat/optim/**/*'
+      - 'src/heat/optim/**/*'
   - label: 'preprocessing'
     files:
-      - 'heat/preprocessing/**/*'
+      - 'src/heat/preprocessing/**/*'
   - label: 'regression'
     files:
-      - 'heat/regression/**/*'
+      - 'src/heat/regression/**/*'
   - label: 'sparse'
     files:
-      - 'heat/sparse/**/*'
+      - 'src/heat/sparse/**/*'
   - label: 'spatial'
     files:
-      - 'heat/spatial/**/*'
+      - 'src/heat/spatial/**/*'
   - label: 'utils'
     files:
-      - 'heat/utils/**/*'
+      - 'src/heat/utils/**/*'
   - label: 'linalg'
     files:
-      - 'heat/core/linalg/**/*'
+      - 'src/heat/core/linalg/**/*'
   - label: 'arithmetics'
     files:
-      - 'heat/core/arithmetics.py'
+      - 'src/heat/core/arithmetics.py'
   - label: 'base'
     files:
-      - 'heat/core/base.py'
+      - 'src/heat/core/base.py'
   - label: 'communication'
     files:
-      - 'heat/core/communication.py'
+      - 'src/heat/core/communication.py'
   - label: 'complex_math'
     files:
-      - 'heat/core/complex_math.py'
+      - 'src/heat/core/complex_math.py'
   - label: 'constants'
     files:
-      - 'heat/core/constants.py'
+      - 'src/heat/core/constants.py'
   - label: 'devices'
     files:
-      - 'heat/core/devices.py'
+      - 'src/heat/core/devices.py'
   - label: 'dndarray'
     files:
-      - 'heat/core/dndarray.py'
+      - 'src/heat/core/dndarray.py'
   - label: 'exponential'
     files:
-      - 'heat/core/exponential.py'
+      - 'src/heat/core/exponential.py'
   - label: 'indexing'
     files:
-      - 'heat/core/indexing.py'
+      - 'src/heat/core/indexing.py'
   - label: 'io'
     files:
-      - 'heat/core/io.py'
+      - 'src/heat/core/io.py'
   - label: 'logical'
     files:
-      - 'heat/core/logical.py'
+      - 'src/heat/core/logical.py'
   - label: 'manipulations'
     files:
-      - 'heat/core/manipulations.py'
+      - 'src/heat/core/manipulations.py'
   - label: 'memory'
     files:
-      - 'heat/core/memory.py'
+      - 'src/heat/core/memory.py'
   - label: 'printing'
     files:
-      - 'heat/core/printing.py'
+      - 'src/heat/core/printing.py'
   - label: 'random'
     files:
-      - 'heat/core/random.py'
+      - 'src/heat/core/random.py'
   - label: 'relational'
     files:
-      - 'heat/core/relational.py'
+      - 'src/heat/core/relational.py'
   - label: 'rounding'
     files:
-      - 'heat/core/rounding.py'
+      - 'src/heat/core/rounding.py'
   - label: 'sanitation'
     files:
-      - 'heat/core/sanitation.py'
+      - 'src/heat/core/sanitation.py'
   - label: 'signal'
     files:
-      - 'heat/core/signal.py'
+      - 'src/heat/core/signal.py'
   - label: 'statistics'
     files:
-      - 'heat/core/statistics.py'
+      - 'src/heat/core/statistics.py'
   - label: 'stride_tricks'
     files:
-      - 'heat/core/stride_tricks.py'
+      - 'src/heat/core/stride_tricks.py'
   - label: 'tiling'
     files:
-      - 'heat/core/tiling.py'
+      - 'src/heat/core/tiling.py'
   - label: 'trigonometrics'
     files:
-      - 'heat/core/trigonometrics.py'
+      - 'src/heat/core/trigonometrics.py'
   - label: 'types'
     files:
-      - 'heat/core/types.py'
+      - 'src/heat/core/types.py'
   - label: 'version'
     files:
-      - 'heat/core/version.py'
+      - 'src/heat/core/version.py'
   - label: 'vmap'
     files:
-      - 'heat/core/vmap.py'
+      - 'src/heat/core/vmap.py'
 
 change-template: '- #$NUMBER $TITLE (by @$AUTHOR)'
 category-template: '### $TITLE'
diff --git a/.github/workflows/ReceivePR.yml b/.github/workflows/ReceivePR.yml
index 8d8839a56d..074ccb90e3 100644
--- a/.github/workflows/ReceivePR.yml
+++ b/.github/workflows/ReceivePR.yml
@@ -4,7 +4,7 @@ on:
   pull_request:
     types: [opened, synchronize, reopened, ready_for_review]
     paths:
-      - 'heat/**'
+      - 'src/heat/**'
 
 jobs:
   build:
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index c9f7978ceb..f716aed70f 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -68,5 +68,5 @@ jobs:
           pip install pytest
           pip install ${{ matrix.pytorch-version }} --extra-index-url https://download.pytorch.org/whl/cpu
           pip install ${{ matrix.install-options }}
-          mpirun -n 3 pytest heat/
-          mpirun -n 4 pytest heat/
+          mpirun -n 3 pytest
+          mpirun -n 4 pytest
diff --git a/.github/workflows/release-prep.yml b/.github/workflows/release-prep.yml
index 83304025a8..16453928d8 100644
--- a/.github/workflows/release-prep.yml
+++ b/.github/workflows/release-prep.yml
@@ -92,10 +92,10 @@ jobs:
           ## ----- END Workflow to update Dockerfile Images -------
 
           # Write on to the version.py file
-          sed -i "s/major: int = \([0-9]\+\)/major: int = $MAJOR/g" heat/core/version.py
-          sed -i "s/minor: int = \([0-9]\+\)/minor: int = $MINOR/g" heat/core/version.py
-          sed -i "s/micro: int = \([0-9]\+\)/micro: int = $MICRO/g" heat/core/version.py
-          sed -i "s/extension: str = .*/extension: str = None/g" heat/core/version.py
+          sed -i "s/major: int = \([0-9]\+\)/major: int = $MAJOR/g" src/heat/core/version.py
+          sed -i "s/minor: int = \([0-9]\+\)/minor: int = $MINOR/g" src/heat/core/version.py
+          sed -i "s/micro: int = \([0-9]\+\)/micro: int = $MICRO/g" src/heat/core/version.py
+          sed -i "s/extension: str = .*/extension: str = None/g" src/heat/core/version.py
 
           { echo -e "# v${MAJOR}.${MINOR}.${MICRO} - ${{github.event.inputs.title}}\n${{ steps.release_drafter.outputs.body}}\n"; cat CHANGELOG.md; } > tmp.md
           mv tmp.md CHANGELOG.md
@@ -105,7 +105,7 @@ jobs:
           git config --global user.name "Heat Release Bot"
 
           # Commit the changes
-          git add heat/core/version.py CHANGELOG.md
+          git add src/heat/core/version.py CHANGELOG.md
           git commit -m "Bump version to $VERSION"
 
           # Commit Dockerfile changes
diff --git a/doc/source/conf.py b/doc/source/conf.py
index c2da12b04f..81757c765d 100644
--- a/doc/source/conf.py
+++ b/doc/source/conf.py
@@ -23,7 +23,7 @@
 import sys
 
 # sys.path.insert(0, os.path.abspath('.'))
-sys.path.insert(0, os.path.abspath("../../heat"))
+sys.path.insert(0, os.path.abspath("../../src/heat"))
 
 
 # -- General configuration ------------------------------------------------
@@ -49,7 +49,7 @@
 
 # Document Python Code
 autoapi_type = "python"
-autoapi_dirs = ["../../heat/"]
+autoapi_dirs = ["../../src/heat/"]
 autoapi_ignore = ["*/operations.py", "*/tests/*"]
 autoapi_template_dir = "_templates/autoapi"
 
@@ -117,7 +117,7 @@ def setup(sphinx):
 # built documents.
 #
 # The short X.Y version.
-sys.path.insert(0, "../../heat/core")
+sys.path.insert(0, "../../src/heat/core")
 import version as ht_version
 
 version = f"{ht_version.major}.{ht_version.minor}.{ht_version.micro}"
diff --git a/heat/utils/data/tests/test_distributed_data.py b/heat/utils/data/tests/test_distributed_data.py
deleted file mode 100644
index 2b59d35c36..0000000000
--- a/heat/utils/data/tests/test_distributed_data.py
+++ /dev/null
@@ -1,92 +0,0 @@
-from typing import Optional
-import heat as ht
-from heat.utils.data.datatools import DistributedDataset, DistributedSampler
-import torch
-import unittest
-
-
-class SeedEnviroment:
-    """
-    Class to be used in a `with` Enviroment.
-    Changes the torch seed to the given and then resets it to the previous one when exiting.
-    """
-
-    def __init__(self, seed: Optional[int] = None):
-        self.seed = seed
-
-    def __enter__(self):
-        self.state = torch.random.get_rng_state()
-
-        if self.seed is not None:
-            torch.random.manual_seed(self.seed)
-
-    def __exit__(self, *args, **kwargs):
-        torch.random.set_rng_state(self.state)
-
-
-class TestDistbributedData(unittest.TestCase):
-    def test_dataset_and_sampler(self) -> bool:
-
-        reference = ht.arange(100, dtype=torch.int32).reshape(20, 5)
-
-        heat_array = ht.copy(reference).resplit_(0)
-        dset = DistributedDataset(heat_array)
-        dsampler = DistributedSampler(dset, shuffle=True)
-        dsampler._shuffle()
-
-        # To test this, the resulting array should be balanced, have the same number of elements as the original one, and the sum of all the columns should be the same
-        # And the elements should not be equal to each other.
-        self.assertTrue(dset.dndarray.size == reference.size)
-        self.assertTrue(dset.dndarray.shape == reference.shape)
-        self.assertTrue(dset.dndarray.balanced)
-
-        ref_col_sum = reference.sum(0)
-        col_sum = dset.dndarray.sum(0)
-
-        self.assertTrue(ht.equal(col_sum, ref_col_sum))
-        self.assertFalse(ht.equal(reference, dset.dndarray))
-
-    def test_batches(self) -> bool:
-        reference = ht.array(
-            [
-                [10, 11, 12, 13, 14],
-                [20, 21, 22, 23, 24],
-                [15, 16, 17, 18, 19],
-                [0, 1, 2, 3, 4],
-                [5, 6, 7, 8, 9],
-            ],
-            split=0,
-            dtype=ht.int32,
-        )
-
-        with SeedEnviroment():
-            arr = ht.arange(25, dtype=ht.int32, split=0).reshape(5, 5)
-            dset = DistributedDataset(arr)
-            dsampler = DistributedSampler(dset, shuffle=True, seed=42)
-
-            dataloader = torch.utils.data.DataLoader(
-                dset, batch_size=1, shuffle=False, sampler=dsampler
-            )
-
-            for batch in dataloader:
-                found = False
-                for larray in reference.larray:
-                    if not torch.isclose(batch, larray).all():
-                        continue
-                    found = True
-                    break
-                self.assertTrue(found)
-
-    def test_dataset_exceptions(self) -> bool:
-        with self.assertRaises(TypeError):
-            DistributedDataset("")
-        with self.assertRaises(ValueError):
-            DistributedDataset(ht.zeros(2, split=1))
-
-    def test_data_sampler_exceptions(self) -> bool:
-        with self.assertRaises(TypeError):
-            DistributedSampler(ht.zeros(10))
-        with self.assertRaises(TypeError):
-            DistributedSampler(DistributedDataset(ht.zeros(2, split=0)), shuffle="")
-        with self.assertRaises(TypeError):
-            DistributedSampler(DistributedDataset(ht.zeros(2, split=0)), shuffle=True, seed="")
diff --git a/heat/utils/data/tests/test_matrixgallery.py b/heat/utils/data/tests/test_matrixgallery.py
deleted file mode 100644
index 17390cb013..0000000000
--- a/heat/utils/data/tests/test_matrixgallery.py
+++ /dev/null
@@ -1,118 +0,0 @@
-import heat as ht
-import unittest
-import torch
-from heat.core.tests.test_suites.basic_test import TestCase
-
-
-class TestMatrixgallery(TestCase):
-    def __check_parter(self, parter):
-        self.assertEqual(parter.shape, (20, 20))
-        # TODO: check for singular values of the parter matrix
-
-    def __check_orthogonality(self, U):
-        U_orth_err = (
-            ht.norm(U.T @ U - ht.eye(U.shape[1], dtype=U.dtype, split=U.T.split, device=U.device))
-            / U.shape[1] ** 0.5
-        )
-        if U.dtype == ht.float64:
-            dtype_tol = 1e-12
-        if U.dtype == ht.float32:
-            dtype_tol = 1e-6
-        self.assertTrue(U_orth_err <= dtype_tol)
-
-    def test_hermitian(self):
-        with self.assertRaises(ValueError):
-            ht.utils.data.matrixgallery.hermitian(10, 20)
-        with self.assertRaises(ValueError):
-            ht.utils.data.matrixgallery.hermitian(20, split=0, dtype=ht.int32)
-
-        # test default: complex single precision, not positive definite
-        A = ht.utils.data.matrixgallery.hermitian(20, split=1)
-        A_err = ht.norm(A - A.T.conj().resplit_(A.split)) / ht.norm(A)
-        self.assertTrue(A_err <= 1e-6)
-
-        for posdef in [True, False]:
-            if not self.is_mps:
-                # test complex double precision
-                A = ht.utils.data.matrixgallery.hermitian(
-                    20, dtype=ht.complex128, split=0, positive_definite=posdef
-                )
-                A_err = ht.norm(A - A.T.conj().resplit_(A.split)) / ht.norm(A)
-                self.assertTrue(A.dtype == ht.complex128)
-                self.assertTrue(A_err <= 1e-12)
-
-            # test real datatype
-            A = ht.utils.data.matrixgallery.hermitian(
-                20, dtype=ht.float32, split=0, positive_definite=posdef
-            )
-            A_err = ht.norm(A - A.T.conj().resplit_(A.split)) / ht.norm(A)
-            self.assertTrue(A_err <= 1e-6)
-            self.assertTrue(A.dtype == ht.float32)
-
-    def test_parter(self):
-        parter = ht.utils.data.matrixgallery.parter(20)
-        self.__check_parter(parter)
-
-        parters0 = ht.utils.data.matrixgallery.parter(20, split=0, comm=ht.MPI_WORLD)
-        self.__check_parter(parters0)
-
-        parters1 = ht.utils.data.matrixgallery.parter(20, split=1, comm=ht.MPI_WORLD)
-        self.__check_parter(parters1)
-
-        with self.assertRaises(ValueError):
-            ht.utils.data.matrixgallery.parter(20, split=2, comm=ht.MPI_WORLD)
-
-    def test_random_orthogonal(self):
-        with self.assertRaises(RuntimeError):
-            ht.utils.data.matrixgallery.random_orthogonal(10, 20)
-
-        Q = ht.utils.data.matrixgallery.random_orthogonal(20, 15)
-        # Q_orth_err = ht.norm(
-        #                     Q.T @ Q
-        #                     - ht.eye(Q.shape[1], dtype=Q.dtype, split=Q.T.split, device=Q.device)
-        #                 )
-        # self.assertTrue(Q_orth_err <= 1e-6)
-        self.__check_orthogonality(Q)
-
-    def test_random_known_singularvalues(self):
-        with self.assertRaises(RuntimeError):
-            ht.utils.data.matrixgallery.random_known_singularvalues(30, 20, "abc", split=1)
-        with self.assertRaises(RuntimeError):
-            ht.utils.data.matrixgallery.random_known_singularvalues(30, 20, ht.eye(20), split=1)
-        with self.assertRaises(RuntimeError):
-            ht.utils.data.matrixgallery.random_known_singularvalues(30, 20, ht.ones(50), split=1)
-
-        svals_input = ht.ones(15)
-        A, SVD = ht.utils.data.matrixgallery.random_known_singularvalues(
-            30, 20, svals_input, split=1
-        )
-        U = SVD[0]
-        S = SVD[1]
-        V = SVD[2]
-        if A.dtype == ht.float64:
-            dtype_tol = 1e-12
-        if A.dtype == ht.float32:
-            dtype_tol = 1e-6
-        self.__check_orthogonality(U)
-        self.__check_orthogonality(V)
-        self.assertTrue(ht.allclose(S, svals_input, rtol=dtype_tol))
-        A_err = ht.norm(A - U @ ht.diag(S) @ V.T) / ht.norm(A)
-        self.assertTrue(A_err <= dtype_tol)
-
-    def test_random_known_rank(self):
-        with self.assertRaises(RuntimeError):
-            ht.utils.data.matrixgallery.random_known_rank(30, 20, 25, split=1)
-        rkinput = 15
-        A, SVD = ht.utils.data.matrixgallery.random_known_rank(30, 20, rkinput, split=1)
-        U = SVD[0]
-        S = SVD[1]
-        V = SVD[2]
-        if A.dtype == ht.float64:
-            dtype_tol = 1e-12
-        if A.dtype == ht.float32:
-            dtype_tol = 1e-6
-        self.__check_orthogonality(U)
-        self.__check_orthogonality(V)
-        self.assertTrue(S.shape[0] == rkinput)
-        A_err = ht.norm(A - U @ ht.diag(S) @ V.T) / ht.norm(A)
-        self.assertTrue(A_err <= dtype_tol)
diff --git a/heat/utils/data/tests/test_partial_dataset.py b/heat/utils/data/tests/test_partial_dataset.py
deleted file mode 100644
index 49b9f4d3b3..0000000000
--- a/heat/utils/data/tests/test_partial_dataset.py
+++ /dev/null
@@ -1,77 +0,0 @@
-import heat as ht
-import torch
-import unittest
-
-
-@unittest.skipIf(torch.cuda.is_available() and torch.version.hip, "not supported for HIP")
-class TestPartialDataset(unittest.TestCase):
-    @unittest.skipUnless(ht.supports_hdf5(), "Requires HDF5")
-    def test_partial_h5_dataset(self):
-        # load h5 data and get the total shape
-        full_data = ht.load("heat/datasets/iris.h5", dataset="data", split=None)
-        target_shape = full_data.shape
-
-        class TestDataset(ht.utils.data.partial_dataset.PartialH5Dataset):
-            def __init__(self, file, comm, load, load_len, use_gpus=False):
-                super(TestDataset, self).__init__(
-                    file, comm=comm, initial_load=load, load_length=load_len, use_gpu=use_gpus
-                )
-
-            def __getitem__(self, item):
-                return self.data[item]
-
-        partial_dset = TestDataset("heat/datasets/iris.h5", full_data.comm, 30, 20)
-        dl = ht.utils.data.DataLoader(dataset=partial_dset, batch_size=7)
-        first_epoch = None
-        second_epoch = None
-        for epoch in range(2):
-            elems = 0
-            last_batch = None
-            for batch in dl:
-                elems += batch.shape[0]
-                if last_batch is not None:
-                    self.assertFalse(torch.allclose(last_batch, batch))
-                self.assertEqual(batch.shape, (7, 4))
-                last_batch = batch
-                if epoch == 0:
-                    if first_epoch is None:
-                        first_epoch = batch
-                    else:
-                        first_epoch = torch.cat((first_epoch, batch), dim=0)
-                else:
-                    if second_epoch is None:
-                        second_epoch = batch
-                    else:
-                        second_epoch = torch.cat((second_epoch, batch), dim=0)
-            self.assertTrue(elems >= (target_shape[0] - 7) // full_data.comm.size)
-        self.assertFalse(torch.allclose(first_epoch, second_epoch))
-
-        partial_dset = TestDataset("heat/datasets/iris.h5", full_data.comm, 30, 20, True)
-        dl = ht.utils.data.DataLoader(
-            dataset=partial_dset,
-            batch_size=7,
-            pin_memory=True if torch.cuda.is_available() else False,
-        )
-        first_epoch = None
-        second_epoch = None
-        for epoch in range(2):
-            elems = 0
-            last_batch = None
-            for batch in dl:
-                elems += batch.shape[0]
-                if last_batch is not None:
-                    self.assertFalse(torch.allclose(last_batch, batch))
-                self.assertEqual(batch.shape, (7, 4))
-                last_batch = batch
-                if epoch == 0:
-                    if first_epoch is None:
-                        first_epoch = batch
-                    else:
-                        first_epoch = torch.cat((first_epoch, batch), dim=0)
-                else:
-                    if second_epoch is None:
-                        second_epoch = batch
-                    else:
-                        second_epoch = torch.cat((second_epoch, batch), dim=0)
-            self.assertTrue(elems >= (target_shape[0] - 7) // full_data.comm.size)
-        self.assertFalse(torch.allclose(first_epoch, second_epoch))
diff --git a/heat/utils/data/tests/test_spherical.py b/heat/utils/data/tests/test_spherical.py
deleted file mode 100644
index 7850065969..0000000000
--- a/heat/utils/data/tests/test_spherical.py
+++ /dev/null
@@ -1,84 +0,0 @@
-import heat as ht
-import unittest
-import torch
-from heat.core.tests.test_suites.basic_test import TestCase
-
-
-class TestCreateClusters(TestCase):
-    def test_create_cluster(self):
-        n_samples = ht.MPI_WORLD.size * 10 + 3
-        n_features = 3
-        n_clusters = ht.MPI_WORLD.size
-        cluster_mean = torch.arange(n_clusters, dtype=torch.float32).repeat(n_features, 1).T
-
-        # test case with uneven distribution of clusters over processes and variances given as vector
-        cluster_weight = torch.zeros(n_clusters)
-        cluster_weight[ht.MPI_WORLD.rank] += 0.5
-        cluster_weight[0] += 0.5
-        cluster_std = 0.01 * torch.ones(n_clusters)
-        data = ht.utils.data.spherical.create_clusters(
-            n_samples, n_features, n_clusters, cluster_mean, cluster_std, cluster_weight
-        )
-        self.assertEqual(data.shape, (n_samples, n_features))
-        self.assertEqual(data.dtype, ht.float32)
-
-        # test case with even distribution of clusters over processes and variances given as matrix
-        cluster_weight = None
-        cluster_std = 0.01 * torch.rand(n_clusters, n_features, n_features)
-        cluster_std = torch.transpose(cluster_std, 1, 2) @ cluster_std
-        data = ht.utils.data.spherical.create_clusters(
-            n_samples, n_features, n_clusters, cluster_mean, cluster_std, cluster_weight
-        )
-        self.assertEqual(data.shape, (n_samples, n_features))
-        self.assertEqual(data.dtype, ht.float32)
-
-    def test_if_errors_are_catched(self):
-        n_samples = ht.MPI_WORLD.size * 10 + 3
-        n_features = 3
-        n_clusters = ht.MPI_WORLD.size
-        cluster_mean = torch.arange(n_clusters, dtype=torch.float32).repeat(n_features, 1).T
-        cluster_std = 0.01 * torch.ones(n_clusters)
-
-        with self.assertRaises(TypeError):
-            ht.utils.data.spherical.create_clusters(
-                n_samples, n_features, n_clusters, "abc", cluster_std
-            )
-        with self.assertRaises(ValueError):
-            ht.utils.data.spherical.create_clusters(
-                n_samples, n_features, n_clusters, torch.zeros(2, 2), cluster_std
-            )
-        with self.assertRaises(TypeError):
-            ht.utils.data.spherical.create_clusters(
-                n_samples, n_features, n_clusters, cluster_mean, "abc"
-            )
-        with self.assertRaises(ValueError):
-            ht.utils.data.spherical.create_clusters(
-                n_samples, n_features, n_clusters, cluster_mean, torch.zeros(2, 2)
-            )
-        with self.assertRaises(TypeError):
-            ht.utils.data.spherical.create_clusters(
-                n_samples, n_features, n_clusters, cluster_mean, cluster_std, "abc"
-            )
-        with self.assertRaises(ValueError):
-            ht.utils.data.spherical.create_clusters(
-                n_samples,
-                n_features,
-                n_clusters,
-                cluster_mean,
-                cluster_std,
-                torch.ones(
-                    n_clusters + 1,
-                ),
-            )
-        with self.assertRaises(ValueError):
-            ht.utils.data.spherical.create_clusters(
-                n_samples,
-                n_features,
-                n_clusters,
-                cluster_mean,
-                cluster_std,
-                2
-                * torch.ones(
-                    n_clusters,
-                ),
-            )
diff --git a/heat/utils/tests/__init__.py b/heat/utils/tests/__init__.py
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/pyproject.toml b/pyproject.toml
index 5168e89f48..0bf4b877f2 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,7 +1,3 @@
-[build-system]
-requires = ["setuptools"]
-build-backend = "setuptools.build_meta"
-
 [project]
 name="heat"
 dynamic = ["version"]
@@ -101,14 +97,17 @@ Repository = "https://github.com/helmholtz-analytics/heat"
 Issues = "https://github.com/helmholtz-analytics/heat/issues"
 Changelog = "https://github.com/helmholtz-analytics/heat/blob/main/CHANGELOG.md"
 
+[build-system]
+requires = ["setuptools"]
+build-backend = "setuptools.build_meta"
+
 [tool.setuptools.packages.find]
-where = ["."]
+where = ["src"]
 include = ["heat", "heat.*"]
 exclude = ["*tests*", "*benchmarks*"]
 
-
 [tool.setuptools.package-data]
-datasets = ["*.csv", "*.h5", "*.nc"]
+"heat.datasets" = ["*.csv", "*.h5", "*.nc"]
 heat = ["py.typed"]
 
 [tool.setuptools.dynamic]
diff --git a/heat/__init__.py b/src/heat/__init__.py
similarity index 93%
rename from heat/__init__.py
rename to src/heat/__init__.py
index 84c4afc11b..c99086277f 100644
--- a/heat/__init__.py
+++ b/src/heat/__init__.py
@@ -9,6 +9,7 @@
 from . import core
 from . import classification
 from . import cluster
+from . import decomposition
 from . import fft
 from . import graph
 from . import naive_bayes
diff --git a/heat/classification/__init__.py b/src/heat/classification/__init__.py
similarity index 100%
rename from heat/classification/__init__.py
rename to src/heat/classification/__init__.py
diff --git a/heat/classification/kneighborsclassifier.py b/src/heat/classification/kneighborsclassifier.py
similarity index 100%
rename from heat/classification/kneighborsclassifier.py
rename to src/heat/classification/kneighborsclassifier.py
diff --git a/heat/cli.py b/src/heat/cli.py
similarity index 100%
rename from heat/cli.py
rename to src/heat/cli.py
diff --git a/heat/cluster/__init__.py b/src/heat/cluster/__init__.py
similarity index 100%
rename from heat/cluster/__init__.py
rename to src/heat/cluster/__init__.py
diff --git a/heat/cluster/_kcluster.py b/src/heat/cluster/_kcluster.py
similarity index 100%
rename from heat/cluster/_kcluster.py
rename to src/heat/cluster/_kcluster.py
diff --git a/heat/cluster/batchparallelclustering.py b/src/heat/cluster/batchparallelclustering.py
similarity index 100%
rename from heat/cluster/batchparallelclustering.py
rename to src/heat/cluster/batchparallelclustering.py
diff --git a/heat/cluster/kmeans.py b/src/heat/cluster/kmeans.py
similarity index 100%
rename from heat/cluster/kmeans.py
rename to src/heat/cluster/kmeans.py
diff --git a/heat/cluster/kmedians.py b/src/heat/cluster/kmedians.py
similarity index 100%
rename from heat/cluster/kmedians.py
rename to src/heat/cluster/kmedians.py
diff --git a/heat/cluster/kmedoids.py b/src/heat/cluster/kmedoids.py
similarity index 100%
rename from heat/cluster/kmedoids.py
rename to src/heat/cluster/kmedoids.py
diff --git a/heat/cluster/spectral.py b/src/heat/cluster/spectral.py
similarity index 100%
rename from heat/cluster/spectral.py
rename to src/heat/cluster/spectral.py
diff --git a/heat/core/__init__.py b/src/heat/core/__init__.py
similarity index 100%
rename from heat/core/__init__.py
rename to src/heat/core/__init__.py
diff --git a/heat/core/_config.py b/src/heat/core/_config.py
similarity index 100%
rename from heat/core/_config.py
rename to src/heat/core/_config.py
diff --git a/heat/core/_operations.py b/src/heat/core/_operations.py
similarity index 100%
rename from heat/core/_operations.py
rename to src/heat/core/_operations.py
diff --git a/heat/core/arithmetics.py b/src/heat/core/arithmetics.py
similarity index 100%
rename from heat/core/arithmetics.py
rename to src/heat/core/arithmetics.py
diff --git a/heat/core/base.py b/src/heat/core/base.py
similarity index 100%
rename from heat/core/base.py
rename to src/heat/core/base.py
diff --git a/heat/core/communication.py b/src/heat/core/communication.py
similarity index 100%
rename from heat/core/communication.py
rename to src/heat/core/communication.py
diff --git a/heat/core/complex_math.py b/src/heat/core/complex_math.py
similarity index 100%
rename from heat/core/complex_math.py
rename to src/heat/core/complex_math.py
diff --git a/heat/core/constants.py b/src/heat/core/constants.py
similarity index 100%
rename from heat/core/constants.py
rename to src/heat/core/constants.py
diff --git a/heat/core/devices.py b/src/heat/core/devices.py
similarity index 100%
rename from heat/core/devices.py
rename to src/heat/core/devices.py
diff --git a/heat/core/dndarray.py b/src/heat/core/dndarray.py
similarity index 100%
rename from heat/core/dndarray.py
rename to src/heat/core/dndarray.py
diff --git a/heat/core/exponential.py b/src/heat/core/exponential.py
similarity index 100%
rename from heat/core/exponential.py
rename to src/heat/core/exponential.py
diff --git a/heat/core/factories.py b/src/heat/core/factories.py
similarity index 100%
rename from heat/core/factories.py
rename to src/heat/core/factories.py
diff --git a/heat/core/indexing.py b/src/heat/core/indexing.py
similarity index 100%
rename from heat/core/indexing.py
rename to src/heat/core/indexing.py
diff --git a/heat/core/io.py b/src/heat/core/io.py
similarity index 100%
rename from heat/core/io.py
rename to src/heat/core/io.py
diff --git a/heat/core/linalg/__init__.py b/src/heat/core/linalg/__init__.py
similarity index 100%
rename from heat/core/linalg/__init__.py
rename to src/heat/core/linalg/__init__.py
diff --git a/heat/core/linalg/basics.py b/src/heat/core/linalg/basics.py
similarity index 100%
rename from heat/core/linalg/basics.py
rename to src/heat/core/linalg/basics.py
diff --git a/heat/core/linalg/eigh.py b/src/heat/core/linalg/eigh.py
similarity index 100%
rename from heat/core/linalg/eigh.py
rename to src/heat/core/linalg/eigh.py
diff --git a/heat/core/linalg/polar.py b/src/heat/core/linalg/polar.py
similarity index 100%
rename from heat/core/linalg/polar.py
rename to src/heat/core/linalg/polar.py
diff --git a/heat/core/linalg/qr.py b/src/heat/core/linalg/qr.py
similarity index 100%
rename from heat/core/linalg/qr.py
rename to src/heat/core/linalg/qr.py
diff --git a/heat/core/linalg/solver.py b/src/heat/core/linalg/solver.py
similarity index 100%
rename from heat/core/linalg/solver.py
rename to src/heat/core/linalg/solver.py
diff --git a/heat/core/linalg/svd.py b/src/heat/core/linalg/svd.py
similarity index 100%
rename from heat/core/linalg/svd.py
rename to src/heat/core/linalg/svd.py
diff --git a/heat/core/linalg/svdtools.py b/src/heat/core/linalg/svdtools.py
similarity index 100%
rename from heat/core/linalg/svdtools.py
rename to src/heat/core/linalg/svdtools.py
diff --git a/heat/core/logical.py b/src/heat/core/logical.py
similarity index 100%
rename from heat/core/logical.py
rename to src/heat/core/logical.py
diff --git a/heat/core/manipulations.py b/src/heat/core/manipulations.py
similarity index 100%
rename from heat/core/manipulations.py
rename to src/heat/core/manipulations.py
diff --git a/heat/core/memory.py b/src/heat/core/memory.py
similarity index 100%
rename from heat/core/memory.py
rename to src/heat/core/memory.py
diff --git a/heat/core/printing.py b/src/heat/core/printing.py
similarity index 100%
rename from heat/core/printing.py
rename to src/heat/core/printing.py
diff --git a/heat/core/random.py b/src/heat/core/random.py
similarity index 100%
rename from heat/core/random.py
rename to src/heat/core/random.py
diff --git a/heat/core/relational.py b/src/heat/core/relational.py
similarity index 100%
rename from heat/core/relational.py
rename to src/heat/core/relational.py
diff --git a/heat/core/rounding.py b/src/heat/core/rounding.py
similarity index 100%
rename from heat/core/rounding.py
rename to src/heat/core/rounding.py
diff --git a/heat/core/sanitation.py b/src/heat/core/sanitation.py
similarity index 100%
rename from heat/core/sanitation.py
rename to src/heat/core/sanitation.py
diff --git a/heat/core/signal.py b/src/heat/core/signal.py
similarity index 100%
rename from heat/core/signal.py
rename to src/heat/core/signal.py
diff --git a/heat/core/statistics.py b/src/heat/core/statistics.py
similarity index 100%
rename from heat/core/statistics.py
rename to src/heat/core/statistics.py
diff --git a/heat/core/stride_tricks.py b/src/heat/core/stride_tricks.py
similarity index 100%
rename from heat/core/stride_tricks.py
rename to src/heat/core/stride_tricks.py
diff --git a/heat/core/tiling.py b/src/heat/core/tiling.py
similarity index 100%
rename from heat/core/tiling.py
rename to src/heat/core/tiling.py
diff --git a/heat/core/trigonometrics.py b/src/heat/core/trigonometrics.py
similarity index 100%
rename from heat/core/trigonometrics.py
rename to src/heat/core/trigonometrics.py
diff --git a/heat/core/types.py b/src/heat/core/types.py
similarity index 100%
rename from heat/core/types.py
rename to src/heat/core/types.py
diff --git a/heat/core/version.py b/src/heat/core/version.py
similarity index 100%
rename from heat/core/version.py
rename to src/heat/core/version.py
diff --git a/heat/core/vmap.py b/src/heat/core/vmap.py
similarity index 100%
rename from heat/core/vmap.py
rename to src/heat/core/vmap.py
diff --git a/heat/datasets/__init__.py b/src/heat/datasets/__init__.py
similarity index 100%
rename from heat/datasets/__init__.py
rename to src/heat/datasets/__init__.py
diff --git a/heat/datasets/diabetes.h5 b/src/heat/datasets/diabetes.h5
similarity index 100%
rename from heat/datasets/diabetes.h5
rename to src/heat/datasets/diabetes.h5
diff --git a/heat/datasets/iris.csv b/src/heat/datasets/iris.csv
similarity index 100%
rename from heat/datasets/iris.csv
rename to src/heat/datasets/iris.csv
diff --git a/heat/datasets/iris.h5 b/src/heat/datasets/iris.h5
similarity index 100%
rename from heat/datasets/iris.h5
rename to src/heat/datasets/iris.h5
diff --git a/heat/datasets/iris.nc b/src/heat/datasets/iris.nc
similarity index 100%
rename from heat/datasets/iris.nc
rename to src/heat/datasets/iris.nc
diff --git a/heat/datasets/iris_X_test.csv b/src/heat/datasets/iris_X_test.csv
similarity index 100%
rename from heat/datasets/iris_X_test.csv
rename to src/heat/datasets/iris_X_test.csv
diff --git a/heat/datasets/iris_X_train.csv b/src/heat/datasets/iris_X_train.csv
similarity index 100%
rename from heat/datasets/iris_X_train.csv
rename to src/heat/datasets/iris_X_train.csv
diff --git a/heat/datasets/iris_labels.csv b/src/heat/datasets/iris_labels.csv
similarity index 100%
rename from heat/datasets/iris_labels.csv
rename to src/heat/datasets/iris_labels.csv
diff --git a/heat/datasets/iris_y_pred_proba.csv b/src/heat/datasets/iris_y_pred_proba.csv
similarity index 100%
rename from heat/datasets/iris_y_pred_proba.csv
rename to src/heat/datasets/iris_y_pred_proba.csv
diff --git a/heat/datasets/iris_y_test.csv b/src/heat/datasets/iris_y_test.csv
similarity index 100%
rename from heat/datasets/iris_y_test.csv
rename to src/heat/datasets/iris_y_test.csv
diff --git a/heat/datasets/iris_y_train.csv b/src/heat/datasets/iris_y_train.csv
similarity index 100%
rename from heat/datasets/iris_y_train.csv
rename to src/heat/datasets/iris_y_train.csv
diff --git a/heat/decomposition/__init__.py b/src/heat/decomposition/__init__.py
similarity index 100%
rename from heat/decomposition/__init__.py
rename to src/heat/decomposition/__init__.py
diff --git a/heat/decomposition/dmd.py b/src/heat/decomposition/dmd.py
similarity index 100%
rename from heat/decomposition/dmd.py
rename to src/heat/decomposition/dmd.py
diff --git a/heat/decomposition/pca.py b/src/heat/decomposition/pca.py
similarity index 100%
rename from heat/decomposition/pca.py
rename to src/heat/decomposition/pca.py
diff --git a/heat/fft/__init__.py b/src/heat/fft/__init__.py
similarity index 100%
rename from heat/fft/__init__.py
rename to src/heat/fft/__init__.py
diff --git a/heat/fft/fft.py b/src/heat/fft/fft.py
similarity index 100%
rename from heat/fft/fft.py
rename to src/heat/fft/fft.py
diff --git a/heat/graph/__init__.py b/src/heat/graph/__init__.py
similarity index 100%
rename from heat/graph/__init__.py
rename to src/heat/graph/__init__.py
diff --git a/heat/graph/laplacian.py b/src/heat/graph/laplacian.py
similarity index 100%
rename from heat/graph/laplacian.py
rename to src/heat/graph/laplacian.py
diff --git a/heat/naive_bayes/.DS_Store b/src/heat/naive_bayes/.DS_Store
similarity index 100%
rename from heat/naive_bayes/.DS_Store
rename to src/heat/naive_bayes/.DS_Store
diff --git a/heat/naive_bayes/__init__.py b/src/heat/naive_bayes/__init__.py
similarity index 100%
rename from heat/naive_bayes/__init__.py
rename to src/heat/naive_bayes/__init__.py
diff --git a/heat/naive_bayes/gaussianNB.py b/src/heat/naive_bayes/gaussianNB.py
similarity index 100%
rename from heat/naive_bayes/gaussianNB.py
rename to src/heat/naive_bayes/gaussianNB.py
diff --git a/heat/nn/__init__.py b/src/heat/nn/__init__.py
similarity index 100%
rename from heat/nn/__init__.py
rename to src/heat/nn/__init__.py
diff --git a/heat/nn/data_parallel.py b/src/heat/nn/data_parallel.py
similarity index 100%
rename from heat/nn/data_parallel.py
rename to src/heat/nn/data_parallel.py
diff --git a/heat/nn/functional.py b/src/heat/nn/functional.py
similarity index 100%
rename from heat/nn/functional.py
rename to src/heat/nn/functional.py
diff --git a/heat/optim/__init__.py b/src/heat/optim/__init__.py
similarity index 100%
rename from heat/optim/__init__.py
rename to src/heat/optim/__init__.py
diff --git a/heat/optim/dp_optimizer.py b/src/heat/optim/dp_optimizer.py
similarity index 100%
rename from heat/optim/dp_optimizer.py
rename to src/heat/optim/dp_optimizer.py
diff --git a/heat/optim/lr_scheduler.py b/src/heat/optim/lr_scheduler.py
similarity index 100%
rename from heat/optim/lr_scheduler.py
rename to src/heat/optim/lr_scheduler.py
diff --git a/heat/optim/utils.py b/src/heat/optim/utils.py
similarity index 100%
rename from heat/optim/utils.py
rename to src/heat/optim/utils.py
diff --git a/heat/preprocessing/__init__.py b/src/heat/preprocessing/__init__.py
similarity index 100%
rename from heat/preprocessing/__init__.py
rename to src/heat/preprocessing/__init__.py
diff --git a/heat/preprocessing/preprocessing.py b/src/heat/preprocessing/preprocessing.py
similarity index 100%
rename from heat/preprocessing/preprocessing.py
rename to src/heat/preprocessing/preprocessing.py
diff --git a/heat/py.typed b/src/heat/py.typed
similarity index 100%
rename from heat/py.typed
rename to src/heat/py.typed
diff --git a/heat/regression/__init__.py b/src/heat/regression/__init__.py
similarity index 100%
rename from heat/regression/__init__.py
rename to src/heat/regression/__init__.py
diff --git a/heat/regression/lasso.py b/src/heat/regression/lasso.py
similarity index 100%
rename from heat/regression/lasso.py
rename to src/heat/regression/lasso.py
diff --git a/heat/sparse/__init__.py b/src/heat/sparse/__init__.py
similarity index 100%
rename from heat/sparse/__init__.py
rename to src/heat/sparse/__init__.py
diff --git a/heat/sparse/_operations.py b/src/heat/sparse/_operations.py
similarity index 100%
rename from heat/sparse/_operations.py
rename to src/heat/sparse/_operations.py
diff --git a/heat/sparse/arithmetics.py b/src/heat/sparse/arithmetics.py
similarity index 100%
rename from heat/sparse/arithmetics.py
rename to src/heat/sparse/arithmetics.py
diff --git a/heat/sparse/dcsx_matrix.py b/src/heat/sparse/dcsx_matrix.py
similarity index 100%
rename from heat/sparse/dcsx_matrix.py
rename to src/heat/sparse/dcsx_matrix.py
diff --git a/heat/sparse/factories.py b/src/heat/sparse/factories.py
similarity index 100%
rename from heat/sparse/factories.py
rename to src/heat/sparse/factories.py
diff --git a/heat/sparse/manipulations.py b/src/heat/sparse/manipulations.py
similarity index 100%
rename from heat/sparse/manipulations.py
rename to src/heat/sparse/manipulations.py
diff --git a/heat/spatial/__init__.py b/src/heat/spatial/__init__.py
similarity index 100%
rename from heat/spatial/__init__.py
rename to src/heat/spatial/__init__.py
diff --git a/heat/spatial/distance.py b/src/heat/spatial/distance.py
similarity index 100%
rename from heat/spatial/distance.py
rename to src/heat/spatial/distance.py
diff --git a/heat/utils/__init__.py b/src/heat/utils/__init__.py
similarity index 100%
rename from heat/utils/__init__.py
rename to src/heat/utils/__init__.py
diff --git a/heat/utils/data/__init__.py b/src/heat/utils/data/__init__.py
similarity index 100%
rename from heat/utils/data/__init__.py
rename to src/heat/utils/data/__init__.py
diff --git a/heat/utils/data/_utils.py b/src/heat/utils/data/_utils.py
similarity index 100%
rename from heat/utils/data/_utils.py
rename to src/heat/utils/data/_utils.py
diff --git a/heat/utils/data/datatools.py b/src/heat/utils/data/datatools.py
similarity index 100%
rename from heat/utils/data/datatools.py
rename to src/heat/utils/data/datatools.py
diff --git a/heat/utils/data/matrixgallery.py b/src/heat/utils/data/matrixgallery.py
similarity index 100%
rename from heat/utils/data/matrixgallery.py
rename to src/heat/utils/data/matrixgallery.py
diff --git a/heat/utils/data/mnist.py b/src/heat/utils/data/mnist.py
similarity index 100%
rename from heat/utils/data/mnist.py
rename to src/heat/utils/data/mnist.py
diff --git a/heat/utils/data/partial_dataset.py b/src/heat/utils/data/partial_dataset.py
similarity index 100%
rename from heat/utils/data/partial_dataset.py
rename to src/heat/utils/data/partial_dataset.py
diff --git a/heat/utils/data/spherical.py b/src/heat/utils/data/spherical.py
similarity index 100%
rename from heat/utils/data/spherical.py
rename to src/heat/utils/data/spherical.py
diff --git a/heat/utils/vision_transforms.py b/src/heat/utils/vision_transforms.py
similarity index 100%
rename from heat/utils/vision_transforms.py
rename to src/heat/utils/vision_transforms.py
diff --git a/heat/classification/tests/__init__.py b/tests/classification/__init__.py
similarity index 100%
rename from heat/classification/tests/__init__.py
rename to tests/classification/__init__.py
diff --git a/heat/classification/tests/test_knn.py b/tests/classification/test_knn.py
similarity index 97%
rename from heat/classification/tests/test_knn.py
rename to tests/classification/test_knn.py
index 752778062e..41a466ffa0 100644
--- a/heat/classification/tests/test_knn.py
+++ b/tests/classification/test_knn.py
@@ -2,7 +2,7 @@
 import heat as ht
 
 from heat.classification.kneighborsclassifier import KNeighborsClassifier
-from heat.core.tests.test_suites.basic_test import TestCase
+from tests.test_suites.basic_test import TestCase
 
 
 class TestKNN(TestCase):
diff --git a/heat/cluster/tests/__init__.py b/tests/cluster/__init__.py
similarity index 100%
rename from heat/cluster/tests/__init__.py
rename to tests/cluster/__init__.py
diff --git a/heat/cluster/tests/test_batchparallelclustering.py b/tests/cluster/test_batchparallelclustering.py
similarity index 98%
rename from heat/cluster/tests/test_batchparallelclustering.py
rename to tests/cluster/test_batchparallelclustering.py
index 684d9d9247..9769ce1a99 100644
--- a/heat/cluster/tests/test_batchparallelclustering.py
+++ b/tests/cluster/test_batchparallelclustering.py
@@ -7,8 +7,8 @@
 from heat.utils.data.spherical import create_spherical_dataset
 from mpi4py import MPI
 
-from ...core.tests.test_suites.basic_test import TestCase
-from ..batchparallelclustering import _kmex, _initialize_plus_plus, _BatchParallelKCluster
+from tests.test_suites.basic_test import TestCase
+from heat.cluster.batchparallelclustering import _kmex, _initialize_plus_plus, _BatchParallelKCluster
 
 # test BatchParallelKCluster base class and auxiliary functions
 
diff --git a/heat/cluster/tests/test_kmeans.py b/tests/cluster/test_kmeans.py
similarity index 92%
rename from heat/cluster/tests/test_kmeans.py
rename to tests/cluster/test_kmeans.py
index 25eaf80518..afef8d11ad 100644
--- a/heat/cluster/tests/test_kmeans.py
+++ b/tests/cluster/test_kmeans.py
@@ -3,12 +3,17 @@
 import numpy as np
 import torch
 import heat as ht
-from heat.utils.data.spherical import create_spherical_dataset
-
-from ...core.tests.test_suites.basic_test import TestCase
 
+from pathlib import Path
+from heat.utils.data.spherical import create_spherical_dataset
+from tests.test_suites.basic_test import TestCase
 
 class TestKMeans(TestCase):
+    @classmethod
+    def setUpClass(cls):
+        super(TestKMeans, cls).setUpClass()
+        cls.data_path = str(Path(ht.__file__).parent / "datasets" / "iris.csv")
+
     def test_clusterer(self):
         kmeans = ht.cluster.KMeans()
         self.assertTrue(ht.is_estimator(kmeans))
@@ -31,7 +36,7 @@ def test_fit_iris_unsplit(self):
         oversampling=10
         for split in [None, 0]:
             # get some test data
-            iris = ht.load("heat/datasets/iris.csv", sep=";", split=split)
+            iris = ht.load(self.data_path, sep=";", split=split)
 
             # fit the clusters
             k = 3
@@ -50,7 +55,7 @@ def test_fit_iris_unsplit(self):
             self.assertIsInstance(kmeans.cluster_centers_, ht.DNDarray)
             self.assertEqual(kmeans.cluster_centers_.shape, (k, iris.shape[1]))
 
-        iris = ht.load("heat/datasets/iris.csv", sep=";", split=0)
+        iris = ht.load(self.data_path, sep=";", split=0)
         # same test with init=batchparallel
         kmeans = ht.cluster.KMeans(n_clusters=k, init="batchparallel")
         kmeans.fit(iris, oversampling=oversampling)
@@ -61,7 +66,7 @@ def test_fit_iris_unsplit(self):
 
     def test_exceptions(self):
         # get some test data
-        iris_split = ht.load("heat/datasets/iris.csv", sep=";", split=1)
+        iris_split = ht.load(self.data_path, sep=";", split=1)
 
         # build a clusterer
         k = 3
diff --git a/heat/cluster/tests/test_kmedians.py b/tests/cluster/test_kmedians.py
similarity index 92%
rename from heat/cluster/tests/test_kmedians.py
rename to tests/cluster/test_kmedians.py
index ee8b534e50..5a0f3873cb 100644
--- a/heat/cluster/tests/test_kmedians.py
+++ b/tests/cluster/test_kmedians.py
@@ -5,12 +5,18 @@
 import numpy as np
 import torch
 
+from pathlib import Path
 from heat.utils.data.spherical import create_spherical_dataset
 
-from ...core.tests.test_suites.basic_test import TestCase
+from tests.test_suites.basic_test import TestCase
 
 
 class TestKMedians(TestCase):
+    @classmethod
+    def setUpClass(cls):
+        super(TestKMedians, cls).setUpClass()
+        cls.data_path = str(Path(ht.__file__).parent / "datasets" / "iris.csv")
+
     def test_clusterer(self):
         kmedian = ht.cluster.KMedians()
         self.assertTrue(ht.is_estimator(kmedian))
@@ -32,7 +38,7 @@ def test_get_and_set_params(self):
     def test_fit_iris_unsplit(self):
         split = 0
         # get some test data
-        iris = ht.load("heat/datasets/iris.csv", sep=";", split=split)
+        iris = ht.load(self.data_path, sep=";", split=split)
 
         # fit the clusters
         k = 3
@@ -60,7 +66,7 @@ def test_fit_iris_unsplit(self):
 
     def test_exceptions(self):
         # get some test data
-        iris_split = ht.load("heat/datasets/iris.csv", sep=";", split=1)
+        iris_split = ht.load(self.data_path, sep=";", split=1)
 
         # build a clusterer
         k = 3
diff --git a/heat/cluster/tests/test_kmedoids.py b/tests/cluster/test_kmedoids.py
similarity index 91%
rename from heat/cluster/tests/test_kmedoids.py
rename to tests/cluster/test_kmedoids.py
index a1a261eca8..ef3bb1e21b 100644
--- a/heat/cluster/tests/test_kmedoids.py
+++ b/tests/cluster/test_kmedoids.py
@@ -1,11 +1,17 @@
 import unittest
 import heat as ht
+from pathlib import Path
 from heat.utils.data.spherical import create_spherical_dataset
 
-from ...core.tests.test_suites.basic_test import TestCase
+from tests.test_suites.basic_test import TestCase
 
 
-class TestKMeans(TestCase):
+class TestKMedoids(TestCase):
+    @classmethod
+    def setUpClass(cls):
+        super(TestKMedoids, cls).setUpClass()
+        cls.data_path = str(Path(ht.__file__).parent / "datasets" / "iris.csv")
+
     def test_clusterer(self):
         kmedoid = ht.cluster.KMedoids()
         self.assertTrue(ht.is_estimator(kmedoid))
@@ -26,7 +32,7 @@ def test_get_and_set_params(self):
     def test_fit_iris_unsplit(self):
         split = 0
         # get some test data
-        iris = ht.load("heat/datasets/iris.csv", sep=";", split=split)
+        iris = ht.load(self.data_path, sep=";", split=split)
         # fit the clusters
         k = 3
         kmedoid = ht.cluster.KMedoids(n_clusters=k, random_state=1)
@@ -51,7 +57,7 @@ def test_fit_iris_unsplit(self):
 
     def test_exceptions(self):
         # get some test data
-        iris_split = ht.load("heat/datasets/iris.csv", sep=";", split=1)
+        iris_split = ht.load(self.data_path, sep=";", split=1)
 
         # build a clusterer
         k = 3
@@ -65,7 +71,7 @@ def test_exceptions(self):
             kmedoid = ht.cluster.KMedoids(n_clusters=k, init="random_number")
             kmedoid.fit(iris_split)
 
-        iris_split = ht.load("heat/datasets/iris.csv", sep=";", split=0)
+        iris_split = ht.load(self.data_path, sep=";", split=0)
         with self.assertRaises(ValueError):
             kmedoid = ht.cluster.KMedoids(n_clusters=k, init="batchparallel")
             kmedoid.fit(iris_split)
diff --git a/heat/cluster/tests/test_spectral.py b/tests/cluster/test_spectral.py
similarity index 87%
rename from heat/cluster/tests/test_spectral.py
rename to tests/cluster/test_spectral.py
index cd43433d9d..5f6c4177f0 100644
--- a/heat/cluster/tests/test_spectral.py
+++ b/tests/cluster/test_spectral.py
@@ -4,10 +4,16 @@
 import heat as ht
 import torch
 
-from ...core.tests.test_suites.basic_test import TestCase
+from pathlib import Path
+from tests.test_suites.basic_test import TestCase
 
 
 class TestSpectral(TestCase):
+    @classmethod
+    def setUpClass(cls):
+        super(TestSpectral, cls).setUpClass()
+        cls.data_path = str(Path(ht.__file__).parent / "datasets" / "iris.csv")
+
     def test_clusterer(self):
         spectral = ht.cluster.Spectral()
         self.assertTrue(ht.is_estimator(spectral))
@@ -39,7 +45,7 @@ def test_fit_iris(self):
         # skip on MPS, matmul on ComplexFloat not supported as of PyTorch 2.5
         if not self.is_mps:
             # get some test data
-            iris = ht.load("heat/datasets/iris.csv", sep=";", split=0)
+            iris = ht.load(self.data_path, sep=";", split=0)
             m = 10
             # fit the clusters
             spectral = ht.cluster.Spectral(
@@ -80,7 +86,7 @@ def test_fit_iris(self):
             with self.assertRaises(NotImplementedError):
                 spectral = ht.cluster.Spectral(metric="ahalanobis", n_lanczos=m)
 
-            iris_split = ht.load("heat/datasets/iris.csv", sep=";", split=1)
+            iris_split = ht.load(self.data_path, sep=";", split=1)
             spectral = ht.cluster.Spectral(n_lanczos=20)
             with self.assertRaises(NotImplementedError):
                 spectral.fit(iris_split)
diff --git a/heat/core/tests/Dockerfile b/tests/core/Dockerfile
similarity index 100%
rename from heat/core/tests/Dockerfile
rename to tests/core/Dockerfile
diff --git a/heat/core/linalg/tests/__init__.py b/tests/core/__init__.py
similarity index 100%
rename from heat/core/linalg/tests/__init__.py
rename to tests/core/__init__.py
diff --git a/heat/core/tests/test_arithmetics.py b/tests/core/test_arithmetics.py
similarity index 99%
rename from heat/core/tests/test_arithmetics.py
rename to tests/core/test_arithmetics.py
index 8b8a8a902d..8b92dc53d9 100644
--- a/heat/core/tests/test_arithmetics.py
+++ b/tests/core/test_arithmetics.py
@@ -6,7 +6,7 @@
 import numpy as np
 import torch
 
-from .test_suites.basic_test import TestCase
+from tests.test_suites.basic_test import TestCase
 
 
 class TestArithmetics(TestCase):
diff --git a/heat/core/tests/test_communication.py b/tests/core/test_communication.py
similarity index 99%
rename from heat/core/tests/test_communication.py
rename to tests/core/test_communication.py
index 9ae4a95b70..ff37948f17 100644
--- a/heat/core/tests/test_communication.py
+++ b/tests/core/test_communication.py
@@ -6,7 +6,7 @@
 import torch
 import heat as ht
 
-from .test_suites.basic_test import TestCase
+from tests.test_suites.basic_test import TestCase
 
 envar = os.getenv("HEAT_TEST_USE_DEVICE", "cpu")
 is_mps = envar == "gpu" and platform.machine() == "arm64"
diff --git a/heat/core/tests/test_complex_math.py b/tests/core/test_complex_math.py
similarity index 99%
rename from heat/core/tests/test_complex_math.py
rename to tests/core/test_complex_math.py
index cc56088bce..4b679689ac 100644
--- a/heat/core/tests/test_complex_math.py
+++ b/tests/core/test_complex_math.py
@@ -3,7 +3,7 @@
 import heat as ht
 import platform
 
-from .test_suites.basic_test import TestCase
+from tests.test_suites.basic_test import TestCase
 
 
 class TestComplex(TestCase):
diff --git a/heat/core/tests/test_constants.py b/tests/core/test_constants.py
similarity index 88%
rename from heat/core/tests/test_constants.py
rename to tests/core/test_constants.py
index 31c725ae3f..5f4745a919 100644
--- a/heat/core/tests/test_constants.py
+++ b/tests/core/test_constants.py
@@ -1,7 +1,7 @@
 import numpy as np
 
 import heat as ht
-from .test_suites.basic_test import TestCase
+from tests.test_suites.basic_test import TestCase
 
 
 class TestConstants(TestCase):
diff --git a/heat/core/tests/test_devices.py b/tests/core/test_devices.py
similarity index 98%
rename from heat/core/tests/test_devices.py
rename to tests/core/test_devices.py
index e0ce2a758b..f9890f9182 100644
--- a/heat/core/tests/test_devices.py
+++ b/tests/core/test_devices.py
@@ -2,7 +2,7 @@
 import unittest
 
 import heat as ht
-from .test_suites.basic_test import TestCase
+from tests.test_suites.basic_test import TestCase
 
 envar = os.getenv("HEAT_TEST_USE_DEVICE", "cpu")
 
diff --git a/heat/core/tests/test_dndarray.py b/tests/core/test_dndarray.py
similarity index 99%
rename from heat/core/tests/test_dndarray.py
rename to tests/core/test_dndarray.py
index c6123c1cf2..9652872222 100644
--- a/heat/core/tests/test_dndarray.py
+++ b/tests/core/test_dndarray.py
@@ -2,7 +2,8 @@
 import torch
 
 import heat as ht
-from .test_suites.basic_test import TestCase
+from tests.test_suites.basic_test import TestCase
+from pathlib import Path
 
 pytorch_major_version = int(torch.__version__.split(".")[0])
 
@@ -354,6 +355,8 @@ def test_astype(self):
             self.assertIs(as_float64, data)
 
     def test_balance_and_lshape_map(self):
+        data_path = str(Path(ht.__file__).parent / "datasets" / "iris.csv")
+
         data = ht.zeros((70, 20), split=0)
         data = data[:50]
         data.lshape_map
@@ -382,8 +385,8 @@ def test_balance_and_lshape_map(self):
         data = data[:, 40:70].balance()
         self.assertTrue(data.is_balanced())
 
-        data = np.loadtxt("heat/datasets/iris.csv", delimiter=";")
-        htdata = ht.load("heat/datasets/iris.csv", sep=";", split=0)
+        data = np.loadtxt(data_path, delimiter=";")
+        htdata = ht.load(data_path, sep=";", split=0)
         self.assertTrue(
             ht.equal(htdata, ht.array(data.astype(np.float32), split=0, dtype=ht.float))
         )
diff --git a/heat/core/tests/test_exponential.py b/tests/core/test_exponential.py
similarity index 99%
rename from heat/core/tests/test_exponential.py
rename to tests/core/test_exponential.py
index b26cfe789a..5a28b6f8b6 100644
--- a/heat/core/tests/test_exponential.py
+++ b/tests/core/test_exponential.py
@@ -2,7 +2,7 @@
 import torch
 
 import heat as ht
-from .test_suites.basic_test import TestCase
+from tests.test_suites.basic_test import TestCase
 
 
 class TestExponential(TestCase):
diff --git a/heat/core/tests/test_factories.py b/tests/core/test_factories.py
similarity index 99%
rename from heat/core/tests/test_factories.py
rename to tests/core/test_factories.py
index fe17e897c4..253377cd92 100644
--- a/heat/core/tests/test_factories.py
+++ b/tests/core/test_factories.py
@@ -2,7 +2,7 @@
 import torch
 
 import heat as ht
-from .test_suites.basic_test import TestCase
+from tests.test_suites.basic_test import TestCase
 
 
 class TestFactories(TestCase):
diff --git a/heat/core/tests/test_indexing.py b/tests/core/test_indexing.py
similarity index 98%
rename from heat/core/tests/test_indexing.py
rename to tests/core/test_indexing.py
index 4707aa28ab..1f190edbee 100644
--- a/heat/core/tests/test_indexing.py
+++ b/tests/core/test_indexing.py
@@ -1,5 +1,5 @@
 import heat as ht
-from .test_suites.basic_test import TestCase
+from tests.test_suites.basic_test import TestCase
 
 
 class TestIndexing(TestCase):
diff --git a/heat/core/tests/test_io.py b/tests/core/test_io.py
similarity index 96%
rename from heat/core/tests/test_io.py
rename to tests/core/test_io.py
index 0ec1bd044a..4dccba3d27 100644
--- a/heat/core/tests/test_io.py
+++ b/tests/core/test_io.py
@@ -11,25 +11,26 @@
 import unittest
 
 import heat as ht
-from .test_suites.basic_test import TestCase
+from tests.test_suites.basic_test import TestCase
 
 
 class TestIO(TestCase):
     @classmethod
     def setUpClass(cls):
         super(TestIO, cls).setUpClass()
+        data_path = Path(ht.__file__).parent / "datasets"
         pwd = os.getcwd()
-        cls.HDF5_PATH = os.path.join(os.getcwd(), "heat/datasets/iris.h5")
+        cls.HDF5_PATH = str(data_path / "iris.h5")
         cls.HDF5_OUT_PATH = pwd + "/test.h5"
         cls.HDF5_DATASET = "data"
 
-        cls.NETCDF_PATH = os.path.join(os.getcwd(), "heat/datasets/iris.nc")
+        cls.NETCDF_PATH = str(data_path / "iris.nc")
         cls.NETCDF_OUT_PATH = pwd + "/test.nc"
         cls.NETCDF_VARIABLE = "data"
         cls.NETCDF_DIMENSION = "data"
 
         # load comparison data from csv
-        cls.CSV_PATH = os.path.join(os.getcwd(), "heat/datasets/iris.csv")
+        cls.CSV_PATH = str(data_path / "iris.csv")
         cls.CSV_OUT_PATH = pwd + "/test.csv"
         cls.IRIS = (
             torch.from_numpy(np.loadtxt(cls.CSV_PATH, delimiter=";"))
@@ -815,12 +816,12 @@ def test_load_npy_int(self):
             crea_array = []
             for i in range(0, ht.MPI_WORLD.size * 5):
                 x = np.random.randint(1000, size=(random.randint(0, 30), 6, 11))
-                np.save(os.path.join(os.getcwd(), "heat/datasets", "int_data") + str(i), x)
+                np.save(os.path.join(os.getcwd(), "src/heat/datasets", "int_data") + str(i), x)
                 crea_array.append(x)
             int_array = np.concatenate(crea_array)
         ht.MPI_WORLD.Barrier()
         load_array = ht.load_npy_from_path(
-            os.path.join(os.getcwd(), "heat/datasets"), dtype=ht.int32, split=0
+            os.path.join(os.getcwd(), "src/heat/datasets"), dtype=ht.int32, split=0
         )
         load_array_npy = load_array.numpy()
 
@@ -828,9 +829,9 @@ def test_load_npy_int(self):
         self.assertEqual(load_array.dtype, ht.int32)
         if ht.MPI_WORLD.rank == 0:
             self.assertTrue((load_array_npy == int_array).all)
-            for file in os.listdir(os.path.join(os.getcwd(), "heat/datasets")):
+            for file in os.listdir(os.path.join(os.getcwd(), "src/heat/datasets")):
                 if fnmatch.fnmatch(file, "*.npy"):
-                    os.remove(os.path.join(os.getcwd(), "heat/datasets", file))
+                    os.remove(os.path.join(os.getcwd(), "src/heat/datasets", file))
 
     def test_load_npy_float(self):
         # testing for float arrays and split dimension other than 0
@@ -838,7 +839,7 @@ def test_load_npy_float(self):
             crea_array = []
             for i in range(0, ht.MPI_WORLD.size * 5 + 1):
                 x = np.random.rand(2, random.randint(1, 10), 11)
-                np.save(os.path.join(os.getcwd(), "heat/datasets", "float_data") + str(i), x)
+                np.save(os.path.join(os.getcwd(), "src/heat/datasets", "float_data") + str(i), x)
                 crea_array.append(x)
             float_array = np.concatenate(crea_array, 1)
         ht.MPI_WORLD.Barrier()
@@ -846,7 +847,7 @@ def test_load_npy_float(self):
         if not self.is_mps:
             # float64 not supported in MPS
             load_array = ht.load_npy_from_path(
-                os.path.join(os.getcwd(), "heat/datasets"), dtype=ht.float64, split=1
+                os.path.join(os.getcwd(), "src/heat/datasets"), dtype=ht.float64, split=1
             )
             load_array_npy = load_array.numpy()
             self.assertIsInstance(load_array, ht.DNDarray)
@@ -854,27 +855,27 @@ def test_load_npy_float(self):
             if ht.MPI_WORLD.rank == 0:
                 self.assertTrue((load_array_npy == float_array).all)
         if ht.MPI_WORLD.rank == 0:
-            for file in os.listdir(os.path.join(os.getcwd(), "heat/datasets")):
+            for file in os.listdir(os.path.join(os.getcwd(), "src/heat/datasets")):
                 if fnmatch.fnmatch(file, "*.npy"):
-                    os.remove(os.path.join(os.getcwd(), "heat/datasets", file))
+                    os.remove(os.path.join(os.getcwd(), "src/heat/datasets", file))
 
     def test_load_npy_exception(self):
         with self.assertRaises(TypeError):
             ht.load_npy_from_path(path=1, split=0)
         with self.assertRaises(TypeError):
-            ht.load_npy_from_path("heat/datasets", split="ABC")
+            ht.load_npy_from_path("src/heat/datasets", split="ABC")
         with self.assertRaises(ValueError):
-            ht.load_npy_from_path(path="heat", dtype=ht.int64, split=0)
+            ht.load_npy_from_path(path="src/heat", dtype=ht.int64, split=0)
         if ht.MPI_WORLD.size > 1:
             if ht.MPI_WORLD.rank == 0:
                 x = np.random.rand(2, random.randint(1, 10), 11)
-                np.save(os.path.join(os.getcwd(), "heat/datasets", "float_data"), x)
+                np.save(os.path.join(os.getcwd(), "src/heat/datasets", "float_data"), x)
             ht.MPI_WORLD.Barrier()
             with self.assertRaises(RuntimeError):
-                ht.load_npy_from_path("heat/datasets", dtype=ht.int64, split=0)
+                ht.load_npy_from_path("src/heat/datasets", dtype=ht.int64, split=0)
             ht.MPI_WORLD.Barrier()
             if ht.MPI_WORLD.rank == 0:
-                os.remove(os.path.join(os.getcwd(), "heat/datasets", "float_data.npy"))
+                os.remove(os.path.join(os.getcwd(), "src/heat/datasets", "float_data.npy"))
 
     def test_load_multiple_csv(self):
         if not ht.io.supports_pandas():
@@ -882,7 +883,7 @@ def test_load_multiple_csv(self):
 
         import pandas as pd
 
-        csv_path = os.path.join(os.getcwd(), "heat/datasets/csv_tests")
+        csv_path = os.path.join(os.getcwd(), "src/heat/datasets/csv_tests")
         if ht.MPI_WORLD.rank == 0:
             nplist = []
             npdroplist = []
@@ -935,26 +936,26 @@ def test_load_multiple_csv_exception(self):
         with self.assertRaises(TypeError):
             ht.load_csv_from_folder(path=1, split=0)
         with self.assertRaises(TypeError):
-            ht.load_csv_from_folder("heat/datasets", split="ABC")
+            ht.load_csv_from_folder("src/heat/datasets", split="ABC")
         with self.assertRaises(TypeError):
-            ht.load_csv_from_folder(path="heat/datasets", func=1)
+            ht.load_csv_from_folder(path="src/heat/datasets", func=1)
         with self.assertRaises(ValueError):
             ht.load_csv_from_folder(path="heat", dtype=ht.int64, split=0)
         if ht.MPI_WORLD.size > 1:
             if ht.MPI_WORLD.rank == 0:
-                os.mkdir(os.path.join(os.getcwd(), "heat/datasets/csv_tests"))
+                os.mkdir(os.path.join(os.getcwd(), "src/heat/datasets/csv_tests"))
                 df = pd.DataFrame({"A": [0, 0, 0]})  # noqa F821
                 df.to_csv(
-                    (os.path.join(os.getcwd(), "heat/datasets/csv_tests", "fail.csv")),
+                    (os.path.join(os.getcwd(), "src/heat/datasets/csv_tests", "fail.csv")),
                     index=False,
                 )
             ht.MPI_WORLD.Barrier()
 
             with self.assertRaises(RuntimeError):
-                ht.load_csv_from_folder("heat/datasets/csv_tests", dtype=ht.int64, split=0)
+                ht.load_csv_from_folder("src/heat/datasets/csv_tests", dtype=ht.int64, split=0)
             ht.MPI_WORLD.Barrier()
             if ht.MPI_WORLD.rank == 0:
-                shutil.rmtree(os.path.join(os.getcwd(), "heat/datasets/csv_tests"))
+                shutil.rmtree(os.path.join(os.getcwd(), "src/heat/datasets/csv_tests"))
 
     def test_load_zarr(self):
         if not ht.io.supports_zarr():
diff --git a/heat/core/tests/test_logical.py b/tests/core/test_logical.py
similarity index 99%
rename from heat/core/tests/test_logical.py
rename to tests/core/test_logical.py
index c2da61d64b..9ebdbd675d 100644
--- a/heat/core/tests/test_logical.py
+++ b/tests/core/test_logical.py
@@ -1,7 +1,7 @@
 import torch
 
 import heat as ht
-from .test_suites.basic_test import TestCase
+from tests.test_suites.basic_test import TestCase
 
 
 class TestLogical(TestCase):
diff --git a/heat/core/tests/test_manipulations.py b/tests/core/test_manipulations.py
similarity index 99%
rename from heat/core/tests/test_manipulations.py
rename to tests/core/test_manipulations.py
index 30138730d1..2309620cb8 100644
--- a/heat/core/tests/test_manipulations.py
+++ b/tests/core/test_manipulations.py
@@ -2,7 +2,7 @@
 import torch
 
 import heat as ht
-from .test_suites.basic_test import TestCase
+from tests.test_suites.basic_test import TestCase
 
 
 class TestManipulations(TestCase):
diff --git a/heat/core/tests/test_memory.py b/tests/core/test_memory.py
similarity index 98%
rename from heat/core/tests/test_memory.py
rename to tests/core/test_memory.py
index bdff40ac4b..9ca81ea39b 100644
--- a/heat/core/tests/test_memory.py
+++ b/tests/core/test_memory.py
@@ -1,7 +1,7 @@
 import torch
 
 import heat as ht
-from .test_suites.basic_test import TestCase
+from tests.test_suites.basic_test import TestCase
 
 
 class TestMemory(TestCase):
diff --git a/heat/core/tests/test_operations.py b/tests/core/test_operations.py
similarity index 98%
rename from heat/core/tests/test_operations.py
rename to tests/core/test_operations.py
index 6f718a82e9..780e0a2559 100644
--- a/heat/core/tests/test_operations.py
+++ b/tests/core/test_operations.py
@@ -3,7 +3,7 @@
 
 import heat as ht
 import numpy as np
-from .test_suites.basic_test import TestCase
+from tests.test_suites.basic_test import TestCase
 
 
 class TestOperations(TestCase):
diff --git a/heat/core/tests/test_printing.py b/tests/core/test_printing.py
similarity index 99%
rename from heat/core/tests/test_printing.py
rename to tests/core/test_printing.py
index fd6e382e2a..ddf93546c2 100644
--- a/heat/core/tests/test_printing.py
+++ b/tests/core/test_printing.py
@@ -1,7 +1,7 @@
 import math
 
 import heat as ht
-from .test_suites.basic_test import TestCase
+from tests.test_suites.basic_test import TestCase
 
 
 class TestPrinting(TestCase):
diff --git a/heat/core/tests/test_random.py b/tests/core/test_random.py
similarity index 99%
rename from heat/core/tests/test_random.py
rename to tests/core/test_random.py
index f0bc9b1f92..842469dca7 100644
--- a/heat/core/tests/test_random.py
+++ b/tests/core/test_random.py
@@ -6,7 +6,7 @@
 import torch
 
 import heat as ht
-from .test_suites.basic_test import TestCase
+from tests.test_suites.basic_test import TestCase
 
 envar = os.getenv("HEAT_TEST_USE_DEVICE", "cpu")
 is_mps = envar == "gpu" and platform.system() == "Darwin"
diff --git a/heat/core/tests/test_relational.py b/tests/core/test_relational.py
similarity index 99%
rename from heat/core/tests/test_relational.py
rename to tests/core/test_relational.py
index f050c0d2d4..1f2fcc6f07 100644
--- a/heat/core/tests/test_relational.py
+++ b/tests/core/test_relational.py
@@ -1,5 +1,5 @@
 import heat as ht
-from .test_suites.basic_test import TestCase
+from tests.test_suites.basic_test import TestCase
 
 
 class TestRelational(TestCase):
diff --git a/heat/core/tests/test_rounding.py b/tests/core/test_rounding.py
similarity index 99%
rename from heat/core/tests/test_rounding.py
rename to tests/core/test_rounding.py
index 597cd044f9..dbd28522df 100644
--- a/heat/core/tests/test_rounding.py
+++ b/tests/core/test_rounding.py
@@ -4,7 +4,7 @@
 import torch
 
 import heat as ht
-from .test_suites.basic_test import TestCase
+from tests.test_suites.basic_test import TestCase
 
 
 class TestRounding(TestCase):
diff --git a/heat/core/tests/test_sanitation.py b/tests/core/test_sanitation.py
similarity index 98%
rename from heat/core/tests/test_sanitation.py
rename to tests/core/test_sanitation.py
index fd08a1401f..ddb7827bf0 100644
--- a/heat/core/tests/test_sanitation.py
+++ b/tests/core/test_sanitation.py
@@ -2,7 +2,7 @@
 import torch
 
 import heat as ht
-from .test_suites.basic_test import TestCase
+from tests.test_suites.basic_test import TestCase
 
 
 class TestSanitation(TestCase):
diff --git a/heat/core/tests/test_signal.py b/tests/core/test_signal.py
similarity index 99%
rename from heat/core/tests/test_signal.py
rename to tests/core/test_signal.py
index ad3ecea12a..31278e99ef 100644
--- a/heat/core/tests/test_signal.py
+++ b/tests/core/test_signal.py
@@ -4,7 +4,7 @@
 import heat as ht
 from heat import manipulations
 
-from .test_suites.basic_test import TestCase
+from tests.test_suites.basic_test import TestCase
 
 
 class TestSignal(TestCase):
diff --git a/heat/core/tests/test_statistics.py b/tests/core/test_statistics.py
similarity index 98%
rename from heat/core/tests/test_statistics.py
rename to tests/core/test_statistics.py
index 358c99e857..5e8930b841 100644
--- a/heat/core/tests/test_statistics.py
+++ b/tests/core/test_statistics.py
@@ -3,12 +3,18 @@
 
 from itertools import combinations
 from scipy import stats as ss
+from pathlib import Path
 
 import heat as ht
-from .test_suites.basic_test import TestCase
+from tests.test_suites.basic_test import TestCase
 
 
 class TestStatistics(TestCase):
+    @classmethod
+    def setUpClass(cls):
+        super(TestStatistics, cls).setUpClass()
+        cls.data_path = str(Path(ht.__file__).parent / "datasets" / "iris.csv")
+
     def test_argmax(self):
         torch.manual_seed(1)
         data = ht.random.randn(3, 4, 5)
@@ -417,11 +423,11 @@ def test_cov(self):
             actual = ht.array([[1, -1], [-1, 1]], split=0)
             self.assertTrue(ht.equal(cov, actual))
 
-        data = np.loadtxt("heat/datasets/iris.csv", delimiter=";")
+        data = np.loadtxt(self.data_path, delimiter=";")
         np_cov = np.cov(data[:, 0], data[:, 1:3], rowvar=False).astype(np_dtype)
 
         # split = None tests
-        htdata = ht.load("heat/datasets/iris.csv", sep=";", split=None)
+        htdata = ht.load(self.data_path, sep=";", split=None)
         ht_cov = ht.cov(htdata[:, 0], htdata[:, 1:3], rowvar=False)
         comp = ht.array(np_cov, dtype=dtype)
         self.assertTrue(ht.allclose(comp - ht_cov, 0, atol=1e-4))
@@ -439,10 +445,10 @@ def test_cov(self):
         self.assertTrue(ht.allclose(ht.array(np_cov, dtype=dtype) - ht_cov, 0, atol=1e-4))
 
         # split = 0 tests
-        data = np.loadtxt("heat/datasets/iris.csv", delimiter=";")
+        data = np.loadtxt(self.data_path, delimiter=";")
         np_cov = np.cov(data[:, 0], data[:, 1:3], rowvar=False).astype(np_dtype)
 
-        htdata = ht.load("heat/datasets/iris.csv", sep=";", split=0)
+        htdata = ht.load(self.data_path, sep=";", split=0)
         ht_cov = ht.cov(htdata[:, 0], htdata[:, 1:3], rowvar=False)
         comp = ht.array(np_cov, dtype=ht.float)
         self.assertTrue(ht.allclose(comp - ht_cov, 0, atol=1e-4))
@@ -461,18 +467,18 @@ def test_cov(self):
 
         if 1 < x.comm.size < 5:
             # split 1 tests
-            htdata = ht.load("heat/datasets/iris.csv", sep=";", split=1)
+            htdata = ht.load(self.data_path, sep=";", split=1)
             np_cov = np.cov(data, rowvar=False).astype(np_dtype)
             ht_cov = ht.cov(htdata, rowvar=False)
             self.assertTrue(ht.allclose(ht.array(np_cov, dtype=dtype), ht_cov, atol=1e-4))
 
             np_cov = np.cov(data, data, rowvar=True).astype(np_dtype)
 
-            htdata = ht.load("heat/datasets/iris.csv", sep=";", split=0)
+            htdata = ht.load(self.data_path, sep=";", split=0)
             ht_cov = ht.cov(htdata, htdata, rowvar=True)
             self.assertTrue(ht.allclose(ht.array(np_cov, dtype=dtype), ht_cov, atol=1e-4))
 
-            htdata = ht.load("heat/datasets/iris.csv", sep=";", split=0)
+            htdata = ht.load(self.data_path, sep=";", split=0)
             with self.assertRaises(RuntimeError):
                 ht.cov(htdata[1:], rowvar=False)
             with self.assertRaises(RuntimeError):
@@ -979,7 +985,7 @@ def test_mean(self):
         # values for the iris dataset mean measured by libreoffice calc
         ax0 = ht.array([5.84333333333333, 3.054, 3.75866666666667, 1.19866666666667])
         for sp in [None, 0, 1]:
-            iris = ht.load("heat/datasets/iris.csv", sep=";", split=sp)
+            iris = ht.load(self.data_path, sep=";", split=sp)
             self.assertTrue(ht.allclose(ht.mean(iris), 3.46366666666667))
             self.assertTrue(ht.allclose(ht.mean(iris, axis=0), ax0))
 
@@ -1589,5 +1595,5 @@ def test_var(self):
 
         # values for the iris dataset var measured by libreoffice calc
         for sp in [None, 0, 1]:
-            iris = ht.load("heat/datasets/iris.csv", sep=";", split=sp)
+            iris = ht.load(self.data_path, sep=";", split=sp)
             self.assertTrue(ht.allclose(ht.var(iris, bessel=True), 3.90318519755147))
diff --git a/heat/core/tests/test_stride_tricks.py b/tests/core/test_stride_tricks.py
similarity index 98%
rename from heat/core/tests/test_stride_tricks.py
rename to tests/core/test_stride_tricks.py
index 81d79eb11f..e03213bb67 100644
--- a/heat/core/tests/test_stride_tricks.py
+++ b/tests/core/test_stride_tricks.py
@@ -1,5 +1,5 @@
 import heat as ht
-from .test_suites.basic_test import TestCase
+from tests.test_suites.basic_test import TestCase
 
 
 class TestStrideTricks(TestCase):
diff --git a/heat/core/tests/test_tiling.py b/tests/core/test_tiling.py
similarity index 99%
rename from heat/core/tests/test_tiling.py
rename to tests/core/test_tiling.py
index b6e00c3161..24526551ce 100644
--- a/heat/core/tests/test_tiling.py
+++ b/tests/core/test_tiling.py
@@ -5,7 +5,7 @@
 import torch
 
 import heat as ht
-from .test_suites.basic_test import TestCase
+from tests.test_suites.basic_test import TestCase
 
 envar = os.getenv("HEAT_TEST_USE_DEVICE", "cpu")
 is_mps = envar == "gpu" and platform.machine() == "arm64"
diff --git a/heat/core/tests/test_trigonometrics.py b/tests/core/test_trigonometrics.py
similarity index 99%
rename from heat/core/tests/test_trigonometrics.py
rename to tests/core/test_trigonometrics.py
index 7e09472b86..706e74eee7 100644
--- a/heat/core/tests/test_trigonometrics.py
+++ b/tests/core/test_trigonometrics.py
@@ -2,7 +2,7 @@
 import torch
 
 import heat as ht
-from .test_suites.basic_test import TestCase
+from tests.test_suites.basic_test import TestCase
 
 
 class TestTrigonometrics(TestCase):
diff --git a/heat/core/tests/test_types.py b/tests/core/test_types.py
similarity index 99%
rename from heat/core/tests/test_types.py
rename to tests/core/test_types.py
index 42e0124ef2..b27d0c16e7 100644
--- a/heat/core/tests/test_types.py
+++ b/tests/core/test_types.py
@@ -2,7 +2,7 @@
 import torch
 
 import heat as ht
-from .test_suites.basic_test import TestCase
+from tests.test_suites.basic_test import TestCase
 
 
 class TestTypes(TestCase):
diff --git a/heat/core/tests/test_vmap.py b/tests/core/test_vmap.py
similarity index 99%
rename from heat/core/tests/test_vmap.py
rename to tests/core/test_vmap.py
index 0f7ba62d2e..07699518a9 100644
--- a/heat/core/tests/test_vmap.py
+++ b/tests/core/test_vmap.py
@@ -2,7 +2,7 @@
 import torch
 import os
 
-from .test_suites.basic_test import TestCase
+from tests.test_suites.basic_test import TestCase
 
 
 class TestVmap(TestCase):
diff --git a/heat/core/tests/__init__.py b/tests/decomposition/__init__.py
similarity index 100%
rename from heat/core/tests/__init__.py
rename to tests/decomposition/__init__.py
diff --git a/heat/decomposition/tests/test_dmd.py b/tests/decomposition/test_dmd.py
similarity index 99%
rename from heat/decomposition/tests/test_dmd.py
rename to tests/decomposition/test_dmd.py
index 38b3ec2b2b..a013906949 100644
--- a/heat/decomposition/tests/test_dmd.py
+++ b/tests/decomposition/test_dmd.py
@@ -5,7 +5,7 @@
 import torch
 import heat as ht
 
-from ...core.tests.test_suites.basic_test import TestCase
+from tests.test_suites.basic_test import TestCase
 
 # MPS does not support non-float matrix multiplication
 envar = os.getenv("HEAT_TEST_USE_DEVICE", "cpu")
diff --git a/heat/decomposition/tests/test_pca.py b/tests/decomposition/test_pca.py
similarity index 99%
rename from heat/decomposition/tests/test_pca.py
rename to tests/decomposition/test_pca.py
index 41300186e6..2a20cc4d19 100644
--- a/heat/decomposition/tests/test_pca.py
+++ b/tests/decomposition/test_pca.py
@@ -4,7 +4,7 @@
 import torch
 import heat as ht
 
-from ...core.tests.test_suites.basic_test import TestCase
+from tests.test_suites.basic_test import TestCase
 
 
 class TestPCA(TestCase):
diff --git a/heat/core/tests/test_suites/__init__.py b/tests/fft/__init__.py
similarity index 100%
rename from heat/core/tests/test_suites/__init__.py
rename to tests/fft/__init__.py
diff --git a/heat/fft/tests/test_fft.py b/tests/fft/test_fft.py
similarity index 99%
rename from heat/fft/tests/test_fft.py
rename to tests/fft/test_fft.py
index b0ecdc68b0..7d86f114b2 100644
--- a/heat/fft/tests/test_fft.py
+++ b/tests/fft/test_fft.py
@@ -5,7 +5,7 @@
 import os
 
 import heat as ht
-from heat.core.tests.test_suites.basic_test import TestCase
+from tests.test_suites.basic_test import TestCase
 
 torch_ihfftn = hasattr(torch.fft, "ihfftn")
 
diff --git a/heat/graph/tests/test_laplacian.py b/tests/graph/test_laplacian.py
similarity index 97%
rename from heat/graph/tests/test_laplacian.py
rename to tests/graph/test_laplacian.py
index 1c21764861..cf5edd6fe3 100644
--- a/heat/graph/tests/test_laplacian.py
+++ b/tests/graph/test_laplacian.py
@@ -3,7 +3,7 @@
 
 import heat as ht
 
-from heat.core.tests.test_suites.basic_test import TestCase
+from tests.test_suites.basic_test import TestCase
 
 
 class TestLaplacian(TestCase):
diff --git a/heat/decomposition/tests/__init__.py b/tests/linalg/__init__.py
similarity index 100%
rename from heat/decomposition/tests/__init__.py
rename to tests/linalg/__init__.py
diff --git a/heat/core/linalg/tests/test_basics.py b/tests/linalg/test_basics.py
similarity index 99%
rename from heat/core/linalg/tests/test_basics.py
rename to tests/linalg/test_basics.py
index 6e6ecf5374..8d913a3f32 100644
--- a/heat/core/linalg/tests/test_basics.py
+++ b/tests/linalg/test_basics.py
@@ -2,8 +2,8 @@
 import torch
 import heat as ht
 
-from ...tests.test_suites.basic_test import TestCase
-from ..basics import _estimate_largest_singularvalue
+from tests.test_suites.basic_test import TestCase
+from heat.core.linalg.basics import _estimate_largest_singularvalue
 
 
 class TestLinalgBasics(TestCase):
diff --git a/heat/core/linalg/tests/test_eigh.py b/tests/linalg/test_eigh.py
similarity index 97%
rename from heat/core/linalg/tests/test_eigh.py
rename to tests/linalg/test_eigh.py
index 5b6b5a0a78..18b6533790 100644
--- a/heat/core/linalg/tests/test_eigh.py
+++ b/tests/linalg/test_eigh.py
@@ -2,7 +2,7 @@
 import unittest
 import numpy as np
 
-from ...tests.test_suites.basic_test import TestCase
+from tests.test_suites.basic_test import TestCase
 
 
 class TestEigh(TestCase):
diff --git a/heat/core/linalg/tests/test_polar.py b/tests/linalg/test_polar.py
similarity index 98%
rename from heat/core/linalg/tests/test_polar.py
rename to tests/linalg/test_polar.py
index e1934d8c2b..f4fe9e8c7f 100644
--- a/heat/core/linalg/tests/test_polar.py
+++ b/tests/linalg/test_polar.py
@@ -3,7 +3,7 @@
 import torch
 import numpy as np
 
-from ...tests.test_suites.basic_test import TestCase
+from tests.test_suites.basic_test import TestCase
 
 
 class TestZolopolar(TestCase):
diff --git a/heat/core/linalg/tests/test_qr.py b/tests/linalg/test_qr.py
similarity index 99%
rename from heat/core/linalg/tests/test_qr.py
rename to tests/linalg/test_qr.py
index 0da27aea07..3c3b0a5bcc 100644
--- a/heat/core/linalg/tests/test_qr.py
+++ b/tests/linalg/test_qr.py
@@ -3,7 +3,7 @@
 import torch
 import numpy as np
 
-from ...tests.test_suites.basic_test import TestCase
+from tests.test_suites.basic_test import TestCase
 
 
 class TestQR(TestCase):
diff --git a/heat/core/linalg/tests/test_solver.py b/tests/linalg/test_solver.py
similarity index 99%
rename from heat/core/linalg/tests/test_solver.py
rename to tests/linalg/test_solver.py
index 944305b63e..ba5a64fa63 100644
--- a/heat/core/linalg/tests/test_solver.py
+++ b/tests/linalg/test_solver.py
@@ -4,7 +4,7 @@
 import heat as ht
 import numpy as np
 
-from ...tests.test_suites.basic_test import TestCase
+from tests.test_suites.basic_test import TestCase
 
 
 class TestSolver(TestCase):
diff --git a/heat/core/linalg/tests/test_svd.py b/tests/linalg/test_svd.py
similarity index 99%
rename from heat/core/linalg/tests/test_svd.py
rename to tests/linalg/test_svd.py
index 97dc5f5c77..2e79a9c37b 100644
--- a/heat/core/linalg/tests/test_svd.py
+++ b/tests/linalg/test_svd.py
@@ -3,7 +3,7 @@
 import torch
 import numpy as np
 
-from ...tests.test_suites.basic_test import TestCase
+from tests.test_suites.basic_test import TestCase
 
 
 class TestTallSkinnySVD(TestCase):
diff --git a/heat/core/linalg/tests/test_svdtools.py b/tests/linalg/test_svdtools.py
similarity index 99%
rename from heat/core/linalg/tests/test_svdtools.py
rename to tests/linalg/test_svdtools.py
index ea6cd9681e..7b2b1544e0 100644
--- a/heat/core/linalg/tests/test_svdtools.py
+++ b/tests/linalg/test_svdtools.py
@@ -5,7 +5,7 @@
 import numpy as np
 from mpi4py import MPI
 
-from ...tests.test_suites.basic_test import TestCase
+from tests.test_suites.basic_test import TestCase
 
 
 class TestHSVD(TestCase):
diff --git a/heat/fft/tests/__init__.py b/tests/naive_bayes/__init__.py
similarity index 100%
rename from heat/fft/tests/__init__.py
rename to tests/naive_bayes/__init__.py
diff --git a/heat/naive_bayes/tests/test_gaussiannb.py b/tests/naive_bayes/test_gaussiannb.py
similarity index 92%
rename from heat/naive_bayes/tests/test_gaussiannb.py
rename to tests/naive_bayes/test_gaussiannb.py
index 3918c6d4a0..28146cef71 100644
--- a/heat/naive_bayes/tests/test_gaussiannb.py
+++ b/tests/naive_bayes/test_gaussiannb.py
@@ -1,9 +1,10 @@
 import os
 import numpy as np
 import torch
+from pathlib import Path
 
 import heat as ht
-from heat.core.tests.test_suites.basic_test import TestCase
+from tests.test_suites.basic_test import TestCase
 
 
 class TestGaussianNB(TestCase):
@@ -28,11 +29,13 @@ def test_fit_iris(self):
         else:
             dtype = ht.float64
         # load sklearn train/test sets and resulting probabilities
-        X_train = ht.load("heat/datasets/iris_X_train.csv", sep=";", dtype=dtype)
-        X_test = ht.load("heat/datasets/iris_X_test.csv", sep=";", dtype=dtype)
-        y_train = ht.load("heat/datasets/iris_y_train.csv", sep=";", dtype=ht.int64).squeeze()
-        y_test = ht.load("heat/datasets/iris_y_test.csv", sep=";", dtype=ht.int64).squeeze()
-        y_pred_proba_sklearn = ht.load("heat/datasets/iris_y_pred_proba.csv", sep=";", dtype=dtype)
+        data_path = Path(ht.__file__).parent / "datasets"
+
+        X_train = ht.load(str(data_path / "iris_X_train.csv"), sep=";", dtype=dtype)
+        X_test = ht.load(str(data_path / "iris_X_test.csv"), sep=";", dtype=dtype)
+        y_train = ht.load(str(data_path / "iris_y_train.csv"), sep=";", dtype=ht.int64).squeeze()
+        y_test = ht.load(str(data_path / "iris_y_test.csv"), sep=";", dtype=ht.int64).squeeze()
+        y_pred_proba_sklearn = ht.load(str(data_path / "iris_y_pred_proba.csv"), sep=";", dtype=dtype)
 
         # test ht.GaussianNB
         from heat.naive_bayes import GaussianNB
diff --git a/heat/nn/tests/__init__.py b/tests/nn/__init__.py
similarity index 100%
rename from heat/nn/tests/__init__.py
rename to tests/nn/__init__.py
diff --git a/heat/nn/tests/test_data_parallel.py b/tests/nn/test_data_parallel.py
similarity index 100%
rename from heat/nn/tests/test_data_parallel.py
rename to tests/nn/test_data_parallel.py
diff --git a/heat/nn/tests/test_nn.py b/tests/nn/test_nn.py
similarity index 100%
rename from heat/nn/tests/test_nn.py
rename to tests/nn/test_nn.py
diff --git a/heat/optim/tests/__init__.py b/tests/optim/__init__.py
similarity index 100%
rename from heat/optim/tests/__init__.py
rename to tests/optim/__init__.py
diff --git a/heat/optim/tests/test_dp_optimizer.py b/tests/optim/test_dp_optimizer.py
similarity index 99%
rename from heat/optim/tests/test_dp_optimizer.py
rename to tests/optim/test_dp_optimizer.py
index 42464b7142..ed4f4ddfb3 100644
--- a/heat/optim/tests/test_dp_optimizer.py
+++ b/tests/optim/test_dp_optimizer.py
@@ -4,7 +4,7 @@
 import torch
 import unittest
 
-from heat.core.tests.test_suites.basic_test import TestCase
+from tests.test_suites.basic_test import TestCase
 
 
 class TestDASO(TestCase):
diff --git a/heat/optim/tests/test_optim.py b/tests/optim/test_optim.py
similarity index 94%
rename from heat/optim/tests/test_optim.py
rename to tests/optim/test_optim.py
index 0927c6a11b..c83504b8d4 100644
--- a/heat/optim/tests/test_optim.py
+++ b/tests/optim/test_optim.py
@@ -1,6 +1,6 @@
 import heat as ht
 
-from heat.core.tests.test_suites.basic_test import TestCase
+from tests.test_suites.basic_test import TestCase
 
 
 class TestOptim(TestCase):
diff --git a/heat/optim/tests/test_utils.py b/tests/optim/test_utils.py
similarity index 97%
rename from heat/optim/tests/test_utils.py
rename to tests/optim/test_utils.py
index 6b5f774477..cca0fc6f69 100644
--- a/heat/optim/tests/test_utils.py
+++ b/tests/optim/test_utils.py
@@ -3,7 +3,7 @@
 import os
 import torch
 
-from heat.core.tests.test_suites.basic_test import TestCase
+from tests.test_suites.basic_test import TestCase
 
 
 class TestUtils(TestCase):
diff --git a/heat/naive_bayes/tests/__init__.py b/tests/preprocessing/__init__.py
similarity index 100%
rename from heat/naive_bayes/tests/__init__.py
rename to tests/preprocessing/__init__.py
diff --git a/heat/preprocessing/tests/test_preprocessing.py b/tests/preprocessing/test_preprocessing.py
similarity index 97%
rename from heat/preprocessing/tests/test_preprocessing.py
rename to tests/preprocessing/test_preprocessing.py
index d145aa6bcb..eebe17c77e 100644
--- a/heat/preprocessing/tests/test_preprocessing.py
+++ b/tests/preprocessing/test_preprocessing.py
@@ -4,7 +4,7 @@
 from mpi4py import MPI
 import os
 
-from ...core.tests.test_suites.basic_test import TestCase
+from tests.test_suites.basic_test import TestCase
 
 atol_fit = 1e-5
 atol_inv = 1e-4
diff --git a/heat/preprocessing/tests/__init__.py b/tests/regression/__init__.py
similarity index 100%
rename from heat/preprocessing/tests/__init__.py
rename to tests/regression/__init__.py
diff --git a/heat/regression/tests/test_lasso.py b/tests/regression/test_lasso.py
similarity index 97%
rename from heat/regression/tests/test_lasso.py
rename to tests/regression/test_lasso.py
index 8b2ed6908f..656f6e8f43 100644
--- a/heat/regression/tests/test_lasso.py
+++ b/tests/regression/test_lasso.py
@@ -1,7 +1,7 @@
 import os
 import heat as ht
 
-from heat.core.tests.test_suites.basic_test import TestCase
+from tests.test_suites.basic_test import TestCase
 
 
 class TestLasso(TestCase):
diff --git a/heat/sparse/tests/__init__.py b/tests/sparse/__init__.py
similarity index 100%
rename from heat/sparse/tests/__init__.py
rename to tests/sparse/__init__.py
diff --git a/heat/sparse/tests/test_arithmetics_csr.py b/tests/sparse/test_arithmetics_csr.py
similarity index 99%
rename from heat/sparse/tests/test_arithmetics_csr.py
rename to tests/sparse/test_arithmetics_csr.py
index 38f23062a5..fd19e3f94c 100644
--- a/heat/sparse/tests/test_arithmetics_csr.py
+++ b/tests/sparse/test_arithmetics_csr.py
@@ -7,7 +7,7 @@
 import platform
 import random
 
-from heat.core.tests.test_suites.basic_test import TestCase
+from tests.test_suites.basic_test import TestCase
 
 
 envar = os.getenv("HEAT_TEST_USE_DEVICE", "cpu")
diff --git a/heat/sparse/tests/test_dcscmatrix.py b/tests/sparse/test_dcscmatrix.py
similarity index 99%
rename from heat/sparse/tests/test_dcscmatrix.py
rename to tests/sparse/test_dcscmatrix.py
index 22386d1444..569455fec3 100644
--- a/heat/sparse/tests/test_dcscmatrix.py
+++ b/tests/sparse/test_dcscmatrix.py
@@ -4,7 +4,7 @@
 import heat as ht
 import torch
 
-from heat.core.tests.test_suites.basic_test import TestCase
+from tests.test_suites.basic_test import TestCase
 
 from typing import Tuple
 
diff --git a/heat/sparse/tests/test_dcsrmatrix.py b/tests/sparse/test_dcsrmatrix.py
similarity index 99%
rename from heat/sparse/tests/test_dcsrmatrix.py
rename to tests/sparse/test_dcsrmatrix.py
index 4f5b99df64..ef1df82c0b 100644
--- a/heat/sparse/tests/test_dcsrmatrix.py
+++ b/tests/sparse/test_dcsrmatrix.py
@@ -4,7 +4,7 @@
 import heat as ht
 import torch
 
-from heat.core.tests.test_suites.basic_test import TestCase
+from tests.test_suites.basic_test import TestCase
 
 from typing import Tuple
 
diff --git a/heat/sparse/tests/test_factories.py b/tests/sparse/test_factories.py
similarity index 99%
rename from heat/sparse/tests/test_factories.py
rename to tests/sparse/test_factories.py
index 84dd5e2b5d..848885b8c4 100644
--- a/heat/sparse/tests/test_factories.py
+++ b/tests/sparse/test_factories.py
@@ -5,7 +5,7 @@
 import torch
 import scipy
 
-from heat.core.tests.test_suites.basic_test import TestCase
+from tests.test_suites.basic_test import TestCase
 
 envar = os.getenv("HEAT_TEST_USE_DEVICE", "cpu")
 is_mps = envar == "gpu" and platform.system() == "Darwin"
diff --git a/heat/sparse/tests/test_manipulations.py b/tests/sparse/test_manipulations.py
similarity index 99%
rename from heat/sparse/tests/test_manipulations.py
rename to tests/sparse/test_manipulations.py
index 97b5ab5ca9..2d86cf69ea 100644
--- a/heat/sparse/tests/test_manipulations.py
+++ b/tests/sparse/test_manipulations.py
@@ -4,7 +4,7 @@
 import heat as ht
 import torch
 
-from heat.core.tests.test_suites.basic_test import TestCase
+from tests.test_suites.basic_test import TestCase
 
 envar = os.getenv("HEAT_TEST_USE_DEVICE", "cpu")
 is_mps = envar == "gpu" and platform.system() == "Darwin"
diff --git a/heat/regression/tests/__init__.py b/tests/spatial/__init__.py
similarity index 100%
rename from heat/regression/tests/__init__.py
rename to tests/spatial/__init__.py
diff --git a/heat/spatial/tests/test_distances.py b/tests/spatial/test_distances.py
similarity index 99%
rename from heat/spatial/tests/test_distances.py
rename to tests/spatial/test_distances.py
index d5769c2009..9fda8f1318 100644
--- a/heat/spatial/tests/test_distances.py
+++ b/tests/spatial/test_distances.py
@@ -7,7 +7,7 @@
 import numpy as np
 import math
 
-from heat.core.tests.test_suites.basic_test import TestCase
+from tests.test_suites.basic_test import TestCase
 
 
 class TestDistances(TestCase):
diff --git a/heat/tests/test_cli.py b/tests/test_cli.py
similarity index 100%
rename from heat/tests/test_cli.py
rename to tests/test_cli.py
diff --git a/heat/spatial/tests/__init__.py b/tests/test_suites/__init__.py
similarity index 100%
rename from heat/spatial/tests/__init__.py
rename to tests/test_suites/__init__.py
diff --git a/heat/core/tests/test_suites/basic_test.py b/tests/test_suites/basic_test.py
similarity index 100%
rename from heat/core/tests/test_suites/basic_test.py
rename to tests/test_suites/basic_test.py
diff --git a/heat/core/tests/test_suites/test_basic_test.py b/tests/test_suites/test_basic_test.py
similarity index 100%
rename from heat/core/tests/test_suites/test_basic_test.py
rename to tests/test_suites/test_basic_test.py
diff --git a/heat/utils/data/tests/__init__.py b/tests/utils/__init__.py
similarity index 100%
rename from heat/utils/data/tests/__init__.py
rename to tests/utils/__init__.py
diff --git a/tests/utils/data/__init__.py b/tests/utils/data/__init__.py
new file mode 100644
index 0000000000..8b1c8633c5
--- /dev/null
+++ b/tests/utils/data/__init__.py
@@ -0,0 +1,9 @@
+"""
+add data utility functions to the ht.utils.data namespace
+"""
+
+from .datatools import *
+from . import matrixgallery
+from . import mnist
+from .partial_dataset import *
+from . import spherical
diff --git a/tests/utils/data/_utils.py b/tests/utils/data/_utils.py
new file mode 100644
index 0000000000..a20cd2fb09
--- /dev/null
+++ b/tests/utils/data/_utils.py
@@ -0,0 +1,280 @@
+"""
+Data utilities module.
+This file contains functions which may be useful for certain datatypes, but are not test in the heat framework
+This file contains standalone utilities for data preparation which may be useful
+The functions contained within are not tested, nor actively supported
+"""
+
+import base64
+import numpy as np
+import os
+import struct
+
+
+def dali_tfrecord2idx(train_dir, train_idx_dir, val_dir, val_idx_dir):
+    """
+    WARNING: This function likely requires adjustments and it is by no means a final product !!!
+    this file contains standalone utilities for data preparation which may be useful
+    this function contained within are not tested, nor actively supported
+
+    prepare TFRecords indexes for use with DALI. It will produce indexes for all files in the
+    given ``train_dir`` and ``val_dir`` directories
+    """
+    for tv in [train_dir, val_dir]:
+        dir_list = os.listdir(tv)
+        out = train_idx_dir if tv == train_dir else val_idx_dir
+        for file in dir_list:
+            with open(file, "rb") as f, open(out + file, "w") as idx:
+                while True:
+                    current = f.tell()
+                    try:
+                        # length
+                        byte_len = f.read(8)
+                        if len(byte_len) == 0:
+                            break
+                        # crc
+                        f.read(4)
+                        proto_len = struct.unpack("q", byte_len)[0]
+                        # proto
+                        f.read(proto_len)
+                        # crc
+                        f.read(4)
+                        idx.write(str(current) + " " + str(f.tell() - current) + "\n")
+                    except Exception:
+                        print("Not a valid TFRecord file")
+                        break
+
+
+def merge_files_imagenet_tfrecord(folder_name, output_folder=None):
+    """
+    WARNING: This function likely requires adjustments and it is by no means a final product !!!
+    this file contains standalone utilities for data preparation which may be useful
+    this function contained within are not tested, nor actively supported
+
+    merge multiple preprocessed imagenet TFRecord files together,
+    result is one HDF5 file with all of the images stacked in the 0th dimension
+
+    Parameters
+    ----------
+    folder_name : str, optional*
+        folder location of the files to join, either filenames or folder_names must not be None
+    output_folder : str, optional
+        location to create the output files. Defaults to current directory
+
+    Notes
+    -----
+    Metadata for both the created files (`imagenet_merged.h5` and `imagenet_merged_validation.h5`):
+
+    The datasets are the combination of all of the images in the Image-net 2012 dataset.
+    The data is split into training and validation.
+
+    imagenet_merged.h5 -> training
+    imagenet_merged_validation.h5 -> validation
+
+    both files have the same internal structure:
+    - file
+            * "images" : encoded ASCII string of the decoded RGB JPEG image.
+                    - to decode: `torch.as_tensor(bytearray(base64.binascii.a2b_base64(string_repr.encode('ascii'))), dtype=torch.uint8)`
+                    - note: the images must be reshaped using: `.reshape(file["metadata"]["image/height"], file["metadata"]["image/height"], 3)`
+                            (3 is the number of channels, all images are RGB)
+            * "metadata" : the metadata for each image quotes are the titles for each column
+                    0. "image/height"
+                    1. "image/width"
+                    2. "image/channels"
+                    3. "image/class/label"
+                    4. "image/object/bbox/xmin"
+                    5. "image/object/bbox/xmax"
+                    6. "image/object/bbox/ymin"
+                    7. "image/object/bbox/ymax"
+                    8. "image/object/bbox/label"
+            * "file_info" : string information related to each image
+                    0. "image/format"
+                    1. "image/filename"
+                    2. "image/class/synset"
+                    3. "image/class/text"
+
+
+    The dataset was created using the preprocessed data from the script:
+        https://github.com/tensorflow/models/blob/master/research/inception/inception/data/download_and_preprocess_imagenet.sh
+
+    """
+    import h5py
+    import tensorflow as tf
+
+    """
+    labels:
+        image/encoded: string containing JPEG encoded image in RGB colorspace
+        image/height: integer, image height in pixels
+        image/width: integer, image width in pixels
+        image/colorspace: string, specifying the colorspace, always 'RGB'
+        image/channels: integer, specifying the number of channels, always 3
+        image/format: string, specifying the format, always 'JPEG'
+        image/filename: string containing the basename of the image file
+                e.g. 'n01440764_10026.JPEG' or 'ILSVRC2012_val_00000293.JPEG'
+        image/class/label: integer specifying the index in a classification layer.
+                The label ranges from [1, 1000] where 0 is not used.
+        image/class/synset: string specifying the unique ID of the label, e.g. 'n01440764'
+        image/class/text: string specifying the human-readable version of the label
+                e.g. 'red fox, Vulpes vulpes'
+        image/object/bbox/xmin: list of integers specifying the 0+ human annotated bounding boxes
+        image/object/bbox/xmax: list of integers specifying the 0+ human annotated bounding boxes
+        image/object/bbox/ymin: list of integers specifying the 0+ human annotated bounding boxes
+        image/object/bbox/ymax: list of integers specifying the 0+ human annotated bounding boxes
+        image/object/bbox/label: integer specifying the index in a classification
+                layer. The label ranges from [1, 1000] where 0 is not used. Note this is
+                always identical to the image label."""
+    # get the number of files from the contents of the folder
+    train_names = [folder_name + f for f in os.listdir(folder_name) if f.startswith("train")].sort()
+    val_names = [folder_name + f for f in os.listdir(folder_name) if f.startswith("val")].sort()
+    num_train = len(train_names)
+    num_val = len(val_names)
+
+    def _find_output_name_and_stsp(num_names):
+        start = 0
+        stop = num_names + 1
+        output_name_lcl = output_folder
+        output_name_lcl += "imagenet_merged.h5"
+        return start, stop, output_name_lcl
+
+    train_start, train_stop, output_name_lcl_train = _find_output_name_and_stsp(num_train)
+    val_start, val_stop, output_name_lcl_val = _find_output_name_and_stsp(num_val)
+    output_name_lcl_val = f"{output_name_lcl_val[:-3]}_validation.h5"
+
+    # create the output files
+    train_lcl_file = h5py.File(output_name_lcl_train, "w")
+    dt = h5py.string_dtype(encoding="ascii")
+    train_lcl_file.create_dataset("images", (2502,), chunks=(1251,), maxshape=(None,), dtype=dt)
+    train_lcl_file.create_dataset("metadata", (2502, 9), chunks=(1251, 9), maxshape=(None, 9))
+    train_lcl_file.create_dataset(
+        "file_info", (2502, 4), chunks=(1251, 4), maxshape=(None, 4), dtype="S10"
+    )
+
+    val_lcl_file = h5py.File(output_name_lcl_val, "w")
+    val_lcl_file.create_dataset("images", (50000,), chunks=True, maxshape=(None,), dtype=dt)
+    val_lcl_file.create_dataset("metadata", (50000, 9), chunks=True, maxshape=(None, 9))
+    val_lcl_file.create_dataset(
+        "file_info", (50000, 4), chunks=True, maxshape=(None, 4), dtype="S10"
+    )
+
+    def __single_file_load(src):
+        # load a file and read it to a numpy array
+        dataset = tf.data.TFRecordDataset(filenames=[src])
+        imgs = []
+        img_meta = [[] for _ in range(9)]
+        file_arr = [[] for _ in range(4)]
+        for raw_example in iter(dataset):
+            parsed = tf.train.Example.FromString(raw_example.numpy())
+            img_str = parsed.features.feature["image/encoded"].bytes_list.value[0]
+            img = tf.image.decode_jpeg(img_str, channels=3).numpy()
+            string_repr = base64.binascii.b2a_base64(img).decode("ascii")
+            imgs.append(string_repr)
+            # to decode: np.frombuffer(base64.binascii.a2b_base64(string_repr.encode('ascii')))
+            img_meta[0].append(
+                tf.cast(
+                    parsed.features.feature["image/height"].int64_list.value[0], tf.float32
+                ).numpy()
+            )
+            img_meta[1].append(
+                tf.cast(
+                    parsed.features.feature["image/width"].int64_list.value[0], tf.float32
+                ).numpy()
+            )
+            img_meta[2].append(
+                tf.cast(
+                    parsed.features.feature["image/channels"].int64_list.value[0], tf.float32
+                ).numpy()
+            )
+            img_meta[3].append(parsed.features.feature["image/class/label"].int64_list.value[0] - 1)
+            try:
+                bbxmin = parsed.features.feature["image/object/bbox/xmin"].float_list.value[0]
+                bbxmax = parsed.features.feature["image/object/bbox/xmax"].float_list.value[0]
+                bbymin = parsed.features.feature["image/object/bbox/ymin"].float_list.value[0]
+                bbymax = parsed.features.feature["image/object/bbox/ymax"].float_list.value[0]
+                bblabel = parsed.features.feature["image/object/bbox/label"].int64_list.value[0] - 1
+            except IndexError:
+                bbxmin = 0.0
+                bbxmax = img_meta[1][-1]
+                bbymin = 0.0
+                bbymax = img_meta[0][-1]
+                bblabel = -2
+
+            img_meta[4].append(np.float(bbxmin))
+            img_meta[5].append(np.float(bbxmax))
+            img_meta[6].append(np.float(bbymin))
+            img_meta[7].append(np.float(bbymax))
+            img_meta[8].append(bblabel)
+
+            file_arr[0].append(parsed.features.feature["image/format"].bytes_list.value[0])
+            file_arr[1].append(parsed.features.feature["image/filename"].bytes_list.value[0])
+            file_arr[2].append(parsed.features.feature["image/class/synset"].bytes_list.value[0])
+            file_arr[3].append(
+                np.array(parsed.features.feature["image/class/text"].bytes_list.value[0])
+            )
+        # need to transpose because of the way that numpy understands nested lists
+        img_meta = np.array(img_meta, dtype=np.float64).T
+        file_arr = np.array(file_arr).T
+        return imgs, img_meta, file_arr
+
+    def __write_datasets(img_outl, img_metal, file_arrl, past_sizel, file):
+        file["images"].resize((past_sizel + len(img_outl),))
+        file["images"][past_sizel : len(img_outl) + past_sizel] = img_outl
+        file["metadata"].resize((past_sizel + img_metal.shape[0], 9))
+        file["metadata"][past_sizel : img_metal.shape[0] + past_sizel] = img_metal
+        file["file_info"].resize((past_sizel + img_metal.shape[0], 4))
+        file["file_info"][past_sizel : img_metal.shape[0] + past_sizel] = file_arrl
+
+    def __load_multiple_files(train_names, train_start, train_stop, file):
+        loc_files = train_names[train_start:train_stop]
+        img_out, img_meta, file_arr = None, None, None
+        past_size, i = 0, 0
+        for f in loc_files:  # train
+            # print(f)
+            # this is where the data is created for
+            imgs, img_metaf, file_arrf = __single_file_load(f)
+            # create a larger ndarray with the results
+            if img_out is not None:
+                img_out.extend(imgs)
+            else:
+                img_out = imgs
+            img_meta = np.vstack((img_meta, img_metaf)) if img_meta is not None else img_metaf
+            file_arr = np.vstack((file_arr, file_arrf)) if file_arr is not None else file_arrf
+            # when 2 files are read, write to the output file
+            if i % 2 == 1:
+                print(past_size)
+                __write_datasets(img_out, img_meta, file_arr, past_size, file)
+                past_size += len(img_out)
+                img_out, img_meta, file_arr = None, None, None
+                del imgs, img_metaf, file_arrf
+            i += 1
+
+        if img_out is not None:
+            __write_datasets(img_out, img_meta, file_arr, past_size, file)
+
+    __load_multiple_files(train_names, train_start, train_stop, train_lcl_file)
+    __load_multiple_files(val_names, val_start, val_stop, val_lcl_file)
+
+    #  add the label names to the datasets
+    img_list = [1, 2, 4, 7, 10, 11, 12, 13, 14]
+    file_list = [5, 6, 8, 9]
+    feature_list = [
+        "image/encoded",
+        "image/height",
+        "image/width",
+        "image/colorspace",
+        "image/channels",
+        "image/format",
+        "image/filename",
+        "image/class/label",
+        "image/class/synset",
+        "image/class/text",
+        "image/object/bbox/xmin",
+        "image/object/bbox/xmax",
+        "image/object/bbox/ymin",
+        "image/object/bbox/ymax",
+        "image/object/bbox/label",
+    ]
+
+    train_lcl_file["metadata"].attrs["column_names"] = [feature_list[im] for im in img_list]
+    train_lcl_file["file_info"].attrs["column_names"] = [feature_list[im] for im in file_list]
+    val_lcl_file["metadata"].attrs["column_names"] = [feature_list[im] for im in img_list]
+    val_lcl_file["file_info"].attrs["column_names"] = [feature_list[im] for im in file_list]
diff --git a/tests/utils/data/datatools.py b/tests/utils/data/datatools.py
new file mode 100644
index 0000000000..ccc79de888
--- /dev/null
+++ b/tests/utils/data/datatools.py
@@ -0,0 +1,799 @@
+"""
+Function and classes useful for loading data into neural networks
+"""
+
+import itertools
+import random
+import warnings
+import mpi4py
+import torch
+import torch.distributed
+from torch.utils import data as torch_data
+from typing import Callable, List, Iterator, Literal, Union, Optional, Sized
+from mpi4py import MPI
+from functools import reduce
+
+import torch.utils
+import torchvision
+
+from heat.dndarray import DNDarray
+from heat.communication import GPU_AWARE_MPI, MPI_WORLD, MPICommunication
+from heat.random import permutation
+from . import partial_dataset
+
+__all__ = [
+    "DataLoader",
+    "Dataset",
+    "dataset_shuffle",
+    "dataset_ishuffle",
+    "DistributedDataset",
+    "DistributedSampler",
+    "create_train_val_split",
+]
+
+
+class DataLoader:
+    r"""
+    The combines either a :func:`DNDarray <heat.core.dndarray.DNDarray>` or a torch `Dataset <https://pytorch.org/docs/stable/data.html?highlight=dataset#torch.utils.data.Dataset>`_
+    with a sampler. This provides an iterable over the local dataset and it will shuffle the data at the end of the
+    iterator. If a :func:`DNDarray <heat.core.dndarray.DNDarray>` is given, then a :func:`Dataset` will be created
+    internally.
+
+    Currently, this only supports only map-style datasets with single-process loading. It uses the random
+    batch sampler. The rest of the ``DataLoader`` functionality mentioned in `torch.utils.data.dataloader <https://pytorch.org/docs/stable/data.html?highlight=dataset#torch.utils.data.DataLoader>`_ applies.
+
+    Arguments:
+        dataset : :func:`Dataset`, torch `Dataset <https://pytorch.org/docs/stable/data.html?highlight=dataset#torch.utils.data.Dataset>`_, :func:`heat.utils.data.partial_dataset.PartialH5Dataset`
+            A torch dataset from which the data will be returned by the created iterator
+        batch_size : int, optional
+            How many samples per batch to load\n
+             Default: 1
+        num_workers : int, optional
+            How many subprocesses to use for data loading. 0 means that the data will be loaded in the main process.\n
+            Default: 0
+        collate_fn : callable, optional
+            Merges a list of samples to form a mini-batch of torch.Tensor(s).  Used when using batched loading from a
+            map-style dataset.\n
+            Default: None
+        pin_memory : bool, optional
+            If ``True``, the data loader will copy torch.Tensors into CUDA pinned memory before returning them.
+            If your data elements are a custom type, or your :attr:`collate_fn` returns a batch that is a custom type,
+            see the example below. \n
+            Default: False
+        drop_last : bool, optional
+            Set to ``True`` to drop the last incomplete batch, if the dataset size is not divisible by
+            the batch size. If ``False`` and the size of dataset is not divisible by the batch size, then
+            the last batch will be smaller.\n
+            Default: ``False``
+        timeout : int or float, optional
+            If positive, the timeout value for collecting a batch from workers. Should always be non-negative.\n
+            Default: 0
+        worker_init_fn : callable, optional
+            If not ``None``, this will be called on each worker subprocess with the worker id
+            (an int in ``[0, num_workers - 1]``) as input, after seeding and before data loading.\n
+            default: None
+
+    Attributes
+    ----------
+    dataset : :func:`Dataset`, torch `Dataset <https://pytorch.org/docs/stable/data.html?highlight=dataset#torch.utils.data.Dataset>`_, :func:`heat.utils.data.partial_dataset.PartialH5Dataset`
+        The dataset created from the local data
+    DataLoader : `torch.utils.data.dataloader <https://pytorch.org/docs/stable/data.html?highlight=dataset#torch.utils.data.DataLoader>`_
+        The local DataLoader object. Used in the creation of the iterable and the length
+    _first_iter : bool
+        Flag indicating if the iterator created is the first one. If it is not, then the data will be shuffled before
+        the iterator is created
+    last_epoch : bool
+        Flag indicating last epoch
+    """
+
+    def __init__(
+        self,
+        dataset: Union[torch_data.Dataset, partial_dataset.PartialH5Dataset],
+        batch_size: int = 1,
+        num_workers: int = 0,
+        collate_fn: Callable = None,
+        pin_memory: bool = False,
+        drop_last: bool = False,
+        timeout: Union[int, float] = 0,
+        worker_init_fn: Callable = None,
+    ):  # noqa: D107
+        if not isinstance(dataset, (torch_data.Dataset, Dataset, partial_dataset.PartialH5Dataset)):
+            raise TypeError(
+                f"dataset must be a torch Dataset, heat Dataset, heat PartialH5Dataset, currently: {type(dataset)}"
+            )
+        self.dataset = dataset
+        if hasattr(self.dataset, "ishuffle"):
+            self.ishuffle = self.dataset.ishuffle
+        if isinstance(self.dataset, partial_dataset.PartialH5Dataset):
+            drop_last = True
+
+        self.DataLoader = torch_data.DataLoader(
+            dataset=self.dataset,
+            batch_size=batch_size,
+            shuffle=True,
+            batch_sampler=None,
+            num_workers=num_workers,
+            collate_fn=collate_fn,
+            drop_last=drop_last,
+            pin_memory=pin_memory,
+            timeout=timeout,
+            worker_init_fn=worker_init_fn,
+        )
+        self._first_iter = True
+        self.last_epoch = False
+
+    def __iter__(self) -> Iterator:
+        """
+        Generate a new iterator of a type dependent on the type of dataset.
+        Returns a :class:`partial_dataset.PartialH5DataLoaderIter` if the dataset is a :class:`partial_dataset.PartialH5Dataset`
+        :func:`self._full_dataset_shuffle_iter` otherwise
+        """
+        if isinstance(self.dataset, partial_dataset.PartialH5Dataset):
+            return partial_dataset.PartialH5DataLoaderIter(self)
+        if hasattr(self, "_full_dataset_shuffle_iter") and hasattr(self.dataset, "ishuffle"):
+            # if it is a normal heat dataset then this is defined
+            self._full_dataset_shuffle_iter()
+        return self.DataLoader.__iter__()
+
+    def __len__(self) -> int:
+        """
+        Get the length of the dataloader. Returns the number of batches.
+        """
+        return self.DataLoader.__len__()
+
+    def _full_dataset_shuffle_iter(self):
+        # logic for when to shuffle the data
+        if not self.ishuffle:
+            if self._first_iter:
+                self._first_iter = False
+            else:
+                # shuffle after the first epoch but before the iterator is generated
+                self.dataset.Shuffle()
+        else:
+            # start the shuffling for the next iteration
+            if not self.last_epoch:
+                self.dataset.Ishuffle()
+
+            if self._first_iter:
+                self._first_iter = False
+            else:
+                dataset_irecv(self.dataset)
+
+
+class Dataset(torch_data.Dataset):
+    r"""
+    An abstract class representing a given dataset. This inherits from torch.utils.data.Dataset.
+
+    This class is a general example for what should be done to create a Dataset. When creating a dataset all of the
+    standard attributes should be set, the ``__getitem__``, ``__len__``, and ``shuffle`` functions must be defined.
+
+        - ``__getitem__`` : how an item is given to the network
+        - ``__len__`` : the number of data elements to be given to the network in total
+        - ``Shuffle()`` : how the data should be shuffled between the processes. The function shown below is for a dataset composed of only data and without targets. The function :func:`dataset_shuffle` abstracts this. For this function only the dataset and a list of attributes to shuffle are given.\n
+        - ``Ishuffle()`` : A non-blocking version of ``Shuffle()``, this is handled in the abstract function :func:`dataset_ishuffle`. It works similarly to :func:`dataset_shuffle`.
+
+    As the amount of data across processes can be non-uniform, the dataset class will slice off the remaining elements
+    on whichever processes have more data than the others. This should only be 1 element.
+    The shuffle function will shuffle all of the data on the process.
+
+    It is recommended that for ``DNDarray`` s, the split is either 0 or None
+
+    Parameters
+    ----------
+    array : DNDarray
+        DNDarray for which to great the dataset
+    transform : Callable
+        Transformation to call before a data item is returned
+    ishuffle : bool, optional
+        flag indicating whether to use non-blocking communications for shuffling the data between epochs
+        Note: if ``True``, the ``Ishuffle()`` function must be defined within the class\n
+        Default: False
+
+    Attributes
+    ----------
+    These are the required attributes.
+
+    htdata : DNDarray
+        Full data
+    _cut_slice : slice
+        Slice to cut off the last element to get a uniform amount of data on each process
+    comm : MPICommunicator
+        Communication object used to send the data between processes
+    lcl_half : int
+        Half of the number of data elements on the process
+    data : torch.Tensor
+        The local data to be used in training
+    transforms : Callable
+        Transform to be called during the getitem function
+    ishuffle : bool
+        Flag indicating if non-blocking communications are used for shuffling the data between epochs
+    """
+
+    def __init__(
+        self,
+        array,
+        transforms: Optional[Union[List, Callable]] = None,
+        ishuffle: Optional[bool] = False,
+        test_set: Optional[bool] = False,
+    ):  # noqa: D107
+        self.htdata = array
+        self.comm = array.comm
+        self.test_set = test_set
+        # create a slice to create a uniform amount of data on each process
+        min_data_split = array.gshape[array.split] // array.comm.size
+        self.lcl_half = min_data_split // 2
+        arb_slice = [slice(None)] * array.ndim
+        arb_slice[array.split] = slice(min_data_split)
+        self._cut_slice = tuple(arb_slice)
+        self.data = array._DNDarray__array[self._cut_slice]
+        if not isinstance(transforms, (list, tuple)) and transforms is not None:
+            transforms = [transforms]
+        self.transforms = transforms
+        self.ishuffle = ishuffle
+
+    def __getitem__(self, index: Union[int, slice, tuple, list, torch.Tensor]) -> torch.Tensor:
+        """
+        Basic form of __getitem__. As the dataset is often very specific to the dataset,
+        this should be overwritten by the user. In this form it only gets the raw items from the data.
+        """
+        if self.transforms:
+            return self.transforms[0](self.data[index])
+        return self.data[index]
+
+    def __len__(self) -> int:
+        """
+        Get the number of items in the dataset. This should be overwritten by custom datasets
+        """
+        return self.data.shape[0]
+
+    def Shuffle(self):
+        """
+        Send half of the local data to the process ``self.comm.rank + 1`` if available, else wrap around. After
+        receiving the new data, shuffle the local tensor.
+        """
+        if not self.test_set:
+            dataset_shuffle(dataset=self, attrs=[["data", "htdata"]])
+
+    def Ishuffle(self):
+        """
+        Send half of the local data to the process ``self.comm.rank + 1`` if available, else wrap around. After
+        receiving the new data, shuffle the local tensor.
+        """
+        if not self.test_set:
+            dataset_ishuffle(dataset=self, attrs=[["data", "htdata"]])
+
+
+class DistributedDataset(torch_data.Dataset):
+    """
+    A DistributedDataset for usage in PyTorch. Saves the dndarray and the larray tensor. Uses the larray tensor
+    for the distribution and getting the items. Intented to be used with DistributedSampler.
+    """
+
+    def __init__(self, dndarray: DNDarray, transforms: torchvision.transforms.Compose = None):
+        if not isinstance(dndarray, DNDarray):
+            raise TypeError(f"Expected DNDarray but got {type(dndarray)}")
+        if dndarray.split != 0:
+            raise ValueError("DistributedDataset only works with a DNDarray split of 0")
+
+        self.dndarray = dndarray
+        self.transforms = transforms
+
+    def __len__(self) -> int:
+        return len(self.dndarray.larray)
+
+    def __getitem__(self, index):
+        item = self.dndarray.larray[index]
+        if self.transforms is not None:
+            return self.transforms(item)
+        return item
+
+    def __getitems__(self, indices):
+        if self.transforms is not None:
+            return tuple(self.transforms(self.dndarray.larray[index]) for index in indices)
+        return tuple(self.dndarray.larray[index] for index in indices)
+
+
+class DistributedSampler(torch_data.Sampler):
+    """
+    A DistributedSampler for usage in PyTorch with Heat Arrays. Uses the nature of the Heat DNDArray
+    to give the locally stored data on the larray. Shuffling is done by shuffling the indices.
+    The given Indices corrospond to the index of the larray tensor.
+    Works only with DNDarray that are split on axis 0
+    """
+
+    def __init__(
+        self,
+        dataset: DistributedDataset,
+        shuffle: bool = False,
+        seed: Optional[int] = None,
+        shuffle_type: Literal["global"] | Literal["local"] = "global",
+        correction: bool = False,
+    ) -> None:
+        """
+        Parameters
+        ----------
+        dataset : DistributedDataset
+            Dataset to be shuffled
+        shuffle : bool, optional
+            If the underlying DNDarray should be shuffled, by default False
+        seed : int, optional
+            seed for shuffling, by default None
+        shuffle_type : Literal[&quot;global&quot;] | Literal[&quot;local&quot;], optional
+            Wether to shuffle process local or get new data using by shuffling globally across all processes, by default "global"
+        correction : bool, optional
+            If index correction is wanted after an global shuffle, by default False
+        """
+        if not isinstance(dataset, DistributedDataset):
+            raise TypeError(f"Expected DistributedDataset for dataset not {type(dataset)}")
+        if not isinstance(shuffle, bool):
+            raise TypeError(f"Expected bool for shuffle not {type(shuffle)}")
+        if not isinstance(seed, int) and seed is not None:
+            raise TypeError(f"Expected int or None for seed not {type(shuffle)}")
+        if not isinstance(shuffle_type, str):
+            raise TypeError("Shuffle Type needs to be an string")
+        if not isinstance(correction, bool):
+            raise TypeError("Correction Parameter needs to be an bool")
+
+        self.dataset = dataset
+        self.dndarray = dataset.dndarray
+        self.shuffle = shuffle
+        self.linked_sampler = None
+        self.correction = correction
+        self.set_shuffle_type(shuffle_type)
+        self.set_seed(seed)
+
+        if self.dndarray.split != 0:
+            raise ValueError("DistributedSampler only works with a DNDarray split of 0")
+
+    @staticmethod
+    def _in_slice(idx: int, a_slice: slice) -> bool:
+        """Check if the given index is inside the given slice
+
+        Parameters
+        ----------
+        idx : int
+            Index to check
+        a_slice : slice
+            Slice to check
+
+        Returns
+        -------
+        bool
+            Wether index is in slice
+        """
+        if idx < a_slice.start or idx >= a_slice.stop:
+            return False
+        step = a_slice.step if a_slice.step else 1
+        if (idx - a_slice.start) % step == 0:
+            return True
+        else:
+            return False
+
+    def _shuffle(self) -> None:
+        """Shuffles the given dndarray at creation across processes."""
+        if self.shuffle_type == "local":
+            rand_perm = torch.randperm(self.dndarray.larray.shape[0])
+            self.dndarray.larray = self.dndarray.larray[rand_perm]
+            return
+
+        if self.shuffle_type != "global":
+            raise ValueError("Shuffle type is not 'local' nor 'global'")
+
+        # TODO: Find out which implementation is better
+        # self.dndarray = permutation(self.dndarray)
+        # self.dataset.dndarray = self.dndarray
+        self._alltoall_shuffle()
+
+    def _alltoall_shuffle(self) -> None:
+        # Exchanges the data using Indexed data types and  i iaj
+        dtype = self.dndarray.dtype.torch_type()
+        comm: MPICommunication = self.dndarray.comm
+        rank: int = comm.rank
+        world_size: int = comm.size
+        N: int = self.dndarray.gshape[0]
+        mpi_type: mpi4py.MPI.Datatype = comm._MPICommunication__mpi_type_mappings[dtype]
+
+        if rank == 0:
+            indices = torch.randperm(N, dtype=torch.int64)
+        else:
+            indices = torch.empty(N, dtype=torch.int64)
+        mpi4py.MPI.COMM_WORLD.Bcast(indices, root=0)
+
+        indice_buffers: List[List[int]] = [list() for _ in range(world_size)]
+        rank_slices: List[slice] = [
+            comm.chunk((N,), split=0, rank=i)[-1][0] for i in range(world_size)
+        ]
+
+        block_length: int = reduce(lambda a, b: a * b, self.dndarray.gshape[1:], 1)
+        local_slice: slice = rank_slices[rank]
+        local_displacement: int = self.dndarray.counts_displs()[1][rank] * block_length
+
+        # Now figure out which rank needs to send what to each rank and what this rank will receive
+        for i, idx in enumerate(indices):
+            idx = idx.item()
+            for data_send_rank, tslice in enumerate(rank_slices):
+                if not self._in_slice(idx, tslice):
+                    continue
+                break
+            for data_recv_rank, tslice in enumerate(rank_slices):
+                if not self._in_slice(i, tslice):
+                    continue
+                break
+            if data_recv_rank == rank:
+                indice_buffers[rank].append(idx)
+            elif data_send_rank == rank:
+                indice_buffers[data_recv_rank].append(idx)
+
+        # print("RECV BUFFER creating...", flush=True)
+        send_elems_dtype: List[mpi4py.MPI.Datatype] = list()
+        local_recv_buffer: torch.Tensor = torch.empty(self.dndarray.larray.shape, dtype=dtype)
+
+        for current_rank in range(world_size):
+            if current_rank == rank:
+                send_indice = [
+                    idx for idx in indice_buffers[current_rank] if self._in_slice(idx, local_slice)
+                ]
+            else:
+                send_indice = indice_buffers[current_rank]
+            displacements = [
+                mpi_type.Get_size() * (disp * block_length - local_displacement)
+                for disp in send_indice
+            ]
+            block_lengths = [block_length] * len(displacements)
+            send_type = mpi_type.Create_struct(
+                blocklengths=block_lengths,
+                displacements=displacements,
+                datatypes=[mpi_type] * len(displacements),
+            )
+            send_type.Commit()
+            send_elems_dtype.append(send_type)
+
+        recv_counts = torch.zeros(world_size, dtype=torch.int64)
+        for idx in indice_buffers[rank]:
+            for i, tslice in enumerate(rank_slices):
+                if not self._in_slice(idx, tslice):
+                    continue
+                recv_counts[i] += 1
+                break
+
+        send_elems = self.dndarray.larray
+        send_elems = send_elems if GPU_AWARE_MPI else send_elems.cpu()
+
+        recv_types: List[mpi4py.MPI.Datatype] = []
+
+        total_displ = 0
+
+        for i in range(world_size):
+            if recv_counts[i] == 0:
+                recv_type = mpi_type.Create_contiguous(0)
+            else:
+                types = [mpi_type.Create_contiguous(block_length) for _ in range(recv_counts[i])]
+
+                displ = torch.zeros(len(types), dtype=torch.int64)
+                displ[1:] = torch.cumsum(torch.tensor([t.Get_size() for t in types])[:-1], 0)
+                displ += total_displ
+
+                recv_type = mpi_type.Create_struct(
+                    blocklengths=[1] * len(types), displacements=displ.tolist(), datatypes=types
+                )
+                total_displ += sum([t.Get_size() for t in types])
+
+            recv_type.Commit()
+            recv_types.append(recv_type)
+
+        mpi4py.MPI.COMM_WORLD.Alltoallw(
+            (send_elems, send_elems_dtype),
+            (local_recv_buffer, recv_types),
+        )
+
+        for elem in itertools.chain(recv_types, send_elems_dtype):
+            elem.Free()
+
+        # As MPI indirectly sorts the data according to the rank we need
+        # to change that to represent the permutation
+        if self.correction:
+
+            def get_from_rank(idx):
+                for i, rslice in enumerate(rank_slices):
+                    if self._in_slice(idx, rslice):
+                        return i
+                raise RuntimeError("IDX not found in slices")
+
+            idx_to_rank_map = [get_from_rank(idx) for idx in indices[local_slice]]
+
+            sort_idx = torch.argsort(torch.tensor(idx_to_rank_map), stable=True)
+            local_slices_sorted = indices[local_slice][sort_idx]
+
+            reverse_index = {idx.item(): i for i, idx in enumerate(indices[local_slice])}
+            idxmap = {i: reverse_index[idx.item()] for i, idx in enumerate(local_slices_sorted)}
+
+            for i, dest in idxmap.items():
+                self.dndarray.larray[dest] = local_recv_buffer[i].to(self.dndarray.larray.device)
+        else:
+            self.dndarray.larray = local_recv_buffer.to(self.dndarray.larray.device)
+
+    def set_shuffle_type(self, shuffle_type: Literal["global"] | Literal["local"]) -> None:
+        """Sets the Shuffle type for the Sampler.
+
+        Parameters
+        ----------
+        shuffle_type : Literal[&quot;global&quot;] | Literal[&quot;local&quot;]
+            - Local Shuffle means the shuffle of the larray only.
+            - Global Shuffle means the shuffle across all processes
+
+        Raises
+        ------
+        TypeError
+            Shuffle type needs to be a string
+        ValueError
+            Only Global/Local shuffle types exist
+        """
+        if not isinstance(shuffle_type, str):
+            raise TypeError("Shuffle type needs to be an string")
+        if not (shuffle_type == "global" or shuffle_type == "local"):
+            raise ValueError("only 'global' or 'local' allowed as shuffle type")
+
+        self.shuffle_type: Literal["global"] | Literal["local"] = shuffle_type
+
+        if self.linked_sampler is not None:
+            self.linked_sampler.set_shuffle_type(shuffle_type)
+
+    def set_seed(self, value: int | None) -> None:
+        """Sets the seed for the torch.randperm
+
+        Parameters
+        ----------
+        value : int
+            seed to set
+        """
+        self._seed = value
+        if value is not None:
+            torch.manual_seed(value)
+        if self.shuffle:
+            self._shuffle()
+
+        if self.linked_sampler is not None:
+            self.linked_sampler.set_seed(value)
+
+    def link(self, sampler: "DistributedSampler") -> None:
+        """
+        Links another DistributedSampler to this one, to automatically sets the seed/shuffle_type of this and the linked one,
+        rather than manually setting both seperately. Usefull when one Sampler contains training data and the
+        linked one the label data.
+        """
+        if not isinstance(sampler, DistributedSampler):
+            raise TypeError(f"Sampler of type {type(sampler)} needs to be an DistributedSampler")
+        self.linked_sampler = sampler
+
+    def unlink(self) -> None:
+        """
+        Removes an established link. For more info view :link: function
+        """
+        self.linked_sampler = None
+
+    def __iter__(self) -> Iterator[int]:
+        if self.shuffle_type == "local":
+            self.indices = torch.randperm(len(self.dndarray.larray)).tolist()
+        else:
+            self.indices = list(range(len(self.dndarray.larray)))
+        return iter(self.indices)
+
+    def __len__(self) -> int:
+        return len(self.dndarray.larray)
+
+
+def create_train_val_split(
+    X: DNDarray, y: DNDarray, p: float = 0.95, seed: int | None = None
+) -> tuple[DNDarray, DNDarray, DNDarray, DNDarray]:
+    """Shuffles the data and then creates the train val split.
+
+    Parameters
+    ----------
+    X : DNDarray
+        Training Data
+    y : DNDarray
+        Training Labels
+    p : float, optional
+        How much the training should contain, by default 0.95
+    seed : int | None, optional
+        Random Seed to be used, by default None
+
+    Returns
+    -------
+    tuple[DNDarray, DNDarray, DNDarray, DNDarray]
+        returns tuple of (train_arr, train_labels_arr, val_arr, val_labels_arr)
+    """
+    if seed is None:
+        seed = random.randint(-0x8000_0000_0000_0000, 0xFFFF_FFFF_FFFF_FFFF)
+
+    for arr in [X, y]:
+        dset = DistributedDataset(arr)
+        _ = DistributedSampler(dset, shuffle=True, seed=seed)
+
+    train_rows = int(X.lshape[0] * p)
+    val_rows = X.lshape[0] - train_rows
+
+    perm = torch.randperm(X.lshape[0])
+
+    train_idx = perm[:train_rows]
+    val_idx = perm[-val_rows:]
+
+    assert len(train_idx) + len(val_idx) == X.lshape[0]
+
+    comm = MPI.COMM_WORLD
+
+    total_train_rows = comm.allreduce(train_rows, MPI.SUM)
+    total_val_rows = comm.allreduce(val_rows, MPI.SUM)
+
+    train_gshape = tuple([total_train_rows, *X.gshape[1:]])
+    val_gshape = tuple([total_val_rows, *X.gshape[1:]])
+
+    train_arr = DNDarray(
+        X.larray[train_idx],
+        train_gshape,
+        X.dtype,
+        split=0,
+        device=X.device,
+        comm=X.comm,
+        balanced=True,
+    )
+    val_arr = DNDarray(
+        X.larray[val_idx], val_gshape, X.dtype, split=0, device=X.device, comm=X.comm, balanced=True
+    )
+
+    train_labels_gshape = tuple([total_train_rows, *y.gshape[1:]])
+    val_labels_gshape = tuple([total_val_rows, *y.gshape[1:]])
+
+    train_labels_arr = DNDarray(
+        y.larray[train_idx],
+        train_labels_gshape,
+        y.dtype,
+        split=0,
+        device=y.device,
+        comm=y.comm,
+        balanced=True,
+    )
+    val_labels_arr = DNDarray(
+        y.larray[val_idx],
+        val_labels_gshape,
+        y.dtype,
+        split=0,
+        device=y.device,
+        comm=y.comm,
+        balanced=True,
+    )
+
+    return train_arr, train_labels_arr, val_arr, val_labels_arr
+
+
+def dataset_shuffle(dataset: Union[Dataset, torch_data.Dataset], attrs: List[list]):
+    """
+    Shuffle the given attributes of a dataset across multiple processes. This will send half of the data to rank + 1.
+    Once the new data is received, it will be shuffled into the existing data on the process.
+    This function will be called by the DataLoader automatically if ``dataset.ishuffle = False``.
+    attrs should have the form [[torch.Tensor, DNDarray], ... i.e. [['data', 'htdata`]] assume that all of the attrs have the same dim0 shape as the local data
+
+    Parameters
+    ----------
+    dataset : Dataset
+        the dataset to shuffle
+    attrs : List[List[str, str], ... ]
+        List of lists each of which contains 2 strings. The strings are the handles corresponding to the Dataset
+        attributes corresponding to the global data DNDarray and the local data of that array, i.e. [["data, "htdata"],]
+        would shuffle the htdata around and set the correct amount of data for the ``dataset.data`` attribute. For
+        multiple parameters multiple lists are required. I.e. [["data", "htdata"], ["targets", "httargets"]]
+
+    Notes
+    -----
+    ``dataset.comm`` must be defined for this function to work.
+    """
+    # attrs -> [[torch.Tensor, DNDarray], ...]
+    if attrs[0][1] is not None:
+        prm = torch.randperm(getattr(dataset, attrs[0][1])._DNDarray__array.shape[0])
+    else:
+        prm = torch.randperm(getattr(dataset, attrs[0][0]).shape[0])
+    comm = dataset.comm
+    for att in attrs:
+        ld = getattr(dataset, att[0])
+        snd = ld[: dataset.lcl_half].clone()
+        snd_shape, snd_dtype, snd_dev = snd.shape, snd.dtype, snd.device
+        dest = comm.rank + 1 if comm.rank + 1 != comm.size else 0
+        # send the top half of the data to the next process
+        send_wait = comm.Isend(snd, dest=dest)
+        del snd
+        new_data = torch.empty(snd_shape, dtype=snd_dtype, device=snd_dev)
+        src = comm.rank - 1 if comm.rank != 0 else comm.size - 1
+        rcv_w = comm.Irecv(new_data, source=src)
+        send_wait.wait()
+        rcv_w.wait()
+        # set the DNDarray data
+        if att[1] is not None:
+            getattr(dataset, att[1])._DNDarray__array[: dataset.lcl_half] = new_data
+            # shuffle all of the data around
+            shuffled = getattr(dataset, att[1])._DNDarray__array[prm]
+            getattr(dataset, att[1])._DNDarray__array = shuffled
+            # set the torch data
+            setattr(dataset, att[0], shuffled[dataset._cut_slice])
+        else:
+            getattr(dataset, att[0])[: dataset.lcl_half] = new_data
+            # shuffle all of the data around
+            shuffled = getattr(dataset, att[0])[prm]
+            setattr(dataset, att[0], shuffled[dataset._cut_slice])
+
+
+def dataset_ishuffle(dataset: Union[Dataset, torch_data.Dataset], attrs: List[list]):
+    """
+    Shuffle the given attributes of a dataset across multiple processes, using non-blocking communications.
+    This will send half of the data to rank + 1. The data must be received by the :func:`dataset_irecv` function.
+
+    This function will be called by the DataLoader automatically if ``dataset.ishuffle = True``. This is set either
+    during the definition of the class of its initialization by a given paramete.
+
+    Parameters
+    ----------
+    dataset : Dataset
+        the dataset to shuffle
+    attrs : List[List[str, str], ... ]
+        List of lists each of which contains 2 strings. The strings are the handles corresponding to the Dataset
+        attributes corresponding to the global data DNDarray and the local data of that array, i.e. [["htdata, "data"],]
+        would shuffle the htdata around and set the correct amount of data for the ``dataset.data`` attribute. For
+        multiple parameters multiple lists are required. I.e. [["htdata", "data"], ["httargets", "targets"]]
+
+    Notes
+    -----
+    ``dataset.comm`` must be defined for this function to work.
+    """
+    # attrs should have the form [[heat array, sliced array], [...], ...]
+    #       i.e. [['data', 'htdata']]
+    # assume that all of the attrs have the same dim0 shape as the local data
+    comm = dataset.comm
+    ret_list = []
+    for att in attrs:
+        snd = getattr(dataset, att[0])[: dataset.lcl_half].clone()
+        snd_shape, snd_dtype, snd_dev = snd.shape, snd.dtype, snd.device
+        dest = comm.rank + 1 if comm.rank + 1 != comm.size else 0
+        # send the top half of the data to the next process
+        send_wait = comm.Isend(snd, dest=dest, tag=99999)
+        new_data = torch.empty(snd_shape, dtype=snd_dtype, device=snd_dev)
+        src = comm.rank - 1 if comm.rank != 0 else comm.size - 1
+        wait = comm.Irecv(new_data, source=src, tag=99999)
+        ret_list.append([att, wait, new_data])
+        send_wait.wait()
+        del snd
+    setattr(dataset, "rcv_list", ret_list)
+
+
+def dataset_irecv(dataset: Union[Dataset, torch_data.Dataset]):
+    """
+    Receive the data sent by the :func:`dataset_ishuffle` function. This will wait for the data and then shuffle the
+    data into the existing data on the process
+
+    This function will be called by the DataLoader automatically if ``dataset.ishuffle = True``. This is set either
+    during the definition of the class of its initialization by a given paramete.
+
+    Parameters
+    ----------
+    dataset : Dataset
+        the dataset to shuffle
+
+    Notes
+    -----
+    ``dataset.comm`` must be defined for this function to work.
+    """
+    setattr(dataset, "shuffle_prm", torch.randperm(dataset.data.shape[0]))
+    rcv_list = getattr(dataset, "rcv_list")
+    prm = getattr(dataset, "shuffle_prm")
+    for rcv in rcv_list:
+        rcv[1].wait()
+        if rcv[0][1] is not None:
+            getattr(dataset, rcv[0][1])._DNDarray__array[: dataset.lcl_half] = rcv[2]
+            # shuffle all of the data around
+            shuffled = getattr(dataset, rcv[0][1])._DNDarray__array[prm]
+            getattr(dataset, rcv[0][1])._DNDarray__array = shuffled
+            # set the torch data
+            setattr(dataset, rcv[0][0], shuffled[dataset._cut_slice])
+        else:
+            getattr(dataset, rcv[0][0])[: dataset.lcl_half] = rcv[2]
+            # shuffle all of the data around
+            shuffled = getattr(dataset, rcv[0][0])[prm]
+            setattr(dataset, rcv[0][0], shuffled[dataset._cut_slice])
diff --git a/tests/utils/data/matrixgallery.py b/tests/utils/data/matrixgallery.py
new file mode 100644
index 0000000000..16c0ba191a
--- /dev/null
+++ b/tests/utils/data/matrixgallery.py
@@ -0,0 +1,204 @@
+"""
+Generate matrices for specific tests and functions
+"""
+
+from heat import core
+from core.dndarray import DNDarray
+from core.communication import Communication
+from core.devices import Device
+from core.types import datatype, heat_type_is_complexfloating, heat_type_is_exact
+from core.random import randn, rand
+from core.linalg import qr, matmul
+from core.manipulations import diag, sort
+from core.exponential import log
+from typing import Type, Union, Tuple, Callable
+
+__all__ = ["hermitian", "parter", "random_known_singularvalues", "random_known_rank"]
+
+
+def hermitian(
+    n: int,
+    dtype: Type[datatype] = core.complex64,
+    split: Union[None, int] = None,
+    device: Union[None, str, Device] = None,
+    comm: Union[None, Communication] = None,
+    positive_definite: bool = False,
+) -> DNDarray:
+    r"""
+    Generates a random Hermitian matrix of size `(n,n)`. A Hermitian matrix is a complex square matrix that is equal to its conjugate transpose; for real data-types this routine
+    returns a random symmetric matrix of size `(n,n)`.
+
+    If `positive_definite=True`, the output is given by :math:`\frac{1}{n} R R^H` with :math:`R\in\mathbb{K}^{n\times n}` having entries distributed according to the standard normal distribution.
+    This corresponds to sampling a random matrix according to the so-called Wishart distribution; see, e.g., [2], and also [3] for additional information regarding the asymptotic distribution of
+    the singular values. The output matrix will be positive definite with probability 1.
+
+    If `positive_definite=False`, the output is :math:`R+R^H` with :math:`R` generated as above.
+
+    Parameters
+    ----------
+    n : int
+        size of the resulting square matrix
+    dtype: Type[datatype], optional
+        The desired data-type for the array, defaults to ht.complex64; only floating-point data-types allowed.
+        For real data-types, i.e. float32 and float64, a matrix with real entries (i.e. a symmetric one) is returned.
+    split: None or int, optional
+        The axis along which the array content is split and distributed in memory.
+    device: None or str or Device, optional
+        Specifies the device the tensor shall be allocated on, defaults globally set default device.
+    comm : Communication, optional
+        Handle to the nodes holding distributed parts or copies of this array.
+    positive_definite : bool, optional
+        If True, the resulting matrix is positive definite, defaults to False.
+
+    References
+    ----------
+    [1] https://en.wikipedia.org/wiki/Hermitian_matrix
+    [2] https://en.wikipedia.org/wiki/Wishart_distribution
+    [3] https://en.wikipedia.org/wiki/Marchenko%E2%80%93Pastur_distribution
+    """
+    if heat_type_is_complexfloating(dtype):
+        real_dtype = core.float32 if dtype is core.complex64 else core.float64
+        matrix = randn(n, n, dtype=real_dtype, split=split, device=device, comm=comm) + 1j * randn(
+            n, n, dtype=real_dtype, split=split, device=device, comm=comm
+        )
+    elif dtype in [core.float32, core.float64]:
+        matrix = randn(n, n, dtype=dtype, split=split, device=device, comm=comm)
+    else:
+        raise ValueError("dtype must be floating-point data-type but is ", dtype, ".")
+    if positive_definite:
+        return 1 / n * matrix @ core.conj(matrix).T
+
+    return matrix + core.conj(matrix).T.resplit_(split)
+
+
+def parter(
+    n: int,
+    split: Union[None, int] = None,
+    device: Union[None, str, Device] = None,
+    comm: Union[None, Communication] = None,
+    dtype: Type[datatype] = core.float32,
+) -> DNDarray:
+    """
+    Generates the Parter matrix, a Toeplitz matrix that has the interesting property of having its singular values cluster at
+    pi. The matrix has been named so by Cleve Moler in recognition of Seymour Parter's proof of this fact.
+
+    Parameters
+    ----------
+    n : int
+        size of the resulting square matrix
+    split: None or int, optional
+        The axis along which the array content is split and distributed in memory.
+    device: None or str or Device, optional
+        Specifies the device the tensor shall be allocated on, defaults globally set default device.
+    comm: None or Communication, optional
+        Handle to the nodes holding distributed tensor chunks.
+    dtype: Type[datatype], optional
+        The desired data-type for the array, defaults to ht.float64.
+
+    References
+    ----------
+    [1] https://blogs.mathworks.com/cleve/2019/06/24/bohemian-matrices-in-the-matlab-gallery/
+
+    [2] https://blogs.mathworks.com/cleve/2014/02/03/surprising-svd-square-waves-and-pi/
+
+    [3] Seymour V. Parter, On the distribution of the singular values of Toeplitz matrices, Linear Algebra and its
+    Applications 80, 1986, 115-130, http://www.sciencedirect.com/science/article/pii/0024379586902806
+    """
+    if split is None:
+        a = core.arange(n, dtype=dtype, device=device, comm=comm)
+        II = a.expand_dims(0)
+        JJ = a.expand_dims(1)
+    elif split == 0:
+        II = core.arange(n, dtype=dtype, device=device, comm=comm).expand_dims(0)
+        JJ = core.arange(n, dtype=dtype, split=split, device=device, comm=comm).expand_dims(1)
+    elif split == 1:
+        II = core.arange(n, dtype=dtype, split=0, device=device, comm=comm).expand_dims(0)
+        JJ = core.arange(n, dtype=dtype, device=device, comm=comm).expand_dims(1)
+    else:
+        raise ValueError(f"expected split value to be either {{None,0,1}}, but was {split}")
+
+    return 1.0 / (II - JJ + 0.5)
+
+
+def random_orthogonal(
+    m: int,
+    n: int,
+    split: Union[None, int] = None,
+    device: Union[None, str, Device] = None,
+    comm: Union[None, Communication] = None,
+    dtype: Type[datatype] = core.float32,
+) -> DNDarray:
+    """Auxiliary routine: creates a random mxn matrix with orthonormal columns
+    Caveat: this is done by QR of mxn matrices with i.i.d. normal entries, so this does not produce the uniform distribution on the orthogonal matrices...
+    """
+    if n > m:
+        raise RuntimeError("No orthogonal matrix of shape %d x %d possible." % (m, n))
+
+    # TODO: if QR does not make problems anymore, replace split=None by split=split
+    U = randn(m, n, split=None, dtype=dtype, comm=comm, device=device)
+    Q, _ = qr(U)
+
+    return Q[:, :n].resplit_(split)
+
+
+def random_known_singularvalues(
+    m: int,
+    n: int,
+    singular_values: DNDarray,
+    split: Union[None, int] = None,
+    device: Union[None, str, Device] = None,
+    comm: Union[None, Communication] = None,
+    dtype: Type[datatype] = core.float32,
+) -> Tuple[DNDarray, Tuple[DNDarray]]:
+    """
+    Creates an m x n matrix with singular values given by the entries of the input array singular_values.
+    Caveat: if the entries of `singular_values` are not sorted, the singular value decomposition of A (returned as second output) is so as well.
+    The singular vectors are chosen randomly using :func:`random_orthogonal`.
+    """
+    if not isinstance(singular_values, DNDarray):
+        raise RuntimeError(
+            f"Argument singular_values needs to be a DNDarray but is {type(singular_values)}."
+        )
+    if singular_values.ndim != 1:
+        raise RuntimeError(
+            f"Argument singular_values needs to be a 1D array, but dimension is {singular_values.ndim}."
+        )
+    if singular_values.shape[0] > min(m, n):
+        raise RuntimeError(
+            f"Number of given singular values must not exceed matrix dimensions. Got {singular_values.shape[0]} singular values for matrix size ({m}, {n})."
+        )
+
+    r = singular_values.shape[0]
+    U = random_orthogonal(m, r, split=split, device=device, comm=comm, dtype=dtype)
+    V = random_orthogonal(n, r, split=split, device=device, comm=comm, dtype=dtype)
+
+    A = matmul(U, matmul(diag(singular_values), V.T))
+
+    return A.resplit_(split), (U, singular_values, V)
+
+
+def random_known_rank(
+    m: int,
+    n: int,
+    r: int,
+    quantile_function: Callable = lambda x: -log(x),
+    split: Union[None, int] = None,
+    device: Union[None, str, Device] = None,
+    comm: Union[None, Communication] = None,
+    dtype: Type[datatype] = core.float32,
+) -> Tuple[DNDarray, Tuple[DNDarray]]:
+    """
+    Creates a random m x n matrix with rank r.
+    This routine uses :func:`random_known_singularvalues` with r singular values randomly chosen
+    w.r.t. the distribution with quantile function given by the input quantile_function. Default yields exponential distibution with parameter lambda=1.
+    Unlike in :func:`random_known_singularvalues`, here the singular values of the output are sorted in descending order.
+    """
+    if r > min(m, n):
+        raise RuntimeError("rank must not exceed matrix dimensions.")
+
+    singular_values = rand(r, dtype=dtype, comm=comm, device=device)
+    singular_values = sort(quantile_function(singular_values), descending=True)[0]
+
+    return random_known_singularvalues(
+        m, n, singular_values, split=split, device=device, comm=comm, dtype=dtype
+    )
diff --git a/tests/utils/data/mnist.py b/tests/utils/data/mnist.py
new file mode 100644
index 0000000000..007e493445
--- /dev/null
+++ b/tests/utils/data/mnist.py
@@ -0,0 +1,129 @@
+"""
+File for the MNIST dataset definition in heat
+"""
+
+import torch
+
+from torchvision import datasets
+from typing import Callable, Union
+
+from heat import factories
+from heat import datatools
+
+__all__ = ["MNISTDataset"]
+
+
+class MNISTDataset(datasets.MNIST):
+    """
+    Dataset wrapper for `torchvision.datasets.MNIST <https://pytorch.org/vision/stable/datasets.html#torchvision.datasets.MNIST>`_.
+    This implements all of the required functions mentioned in :class:`heat.utils.data.Dataset`. The ``__getitem__`` and ``__len__`` functions are inherited from
+    `torchvision.datasets.MNIST <https://pytorch.org/vision/stable/datasets.html#torchvision.datasets.MNIST>`_.
+
+    Parameters
+    ----------
+    root : str
+        Directory containing the MNIST dataset
+    train : bool, optional
+        If the data is the training dataset or not, default is True
+    transform : Callable, optional
+        Transform to be applied to the data dataset in the ``__getitem__`` function, default is ``None``
+    target_transform : Callable, optional
+        Transform to be applied to the target dataset in the ``__getitem__`` function, default is ``None``
+    download : bool, optional
+        If the data does not exist in the directory, download it if True (default)
+    split : int, optional
+        On which access to split the data when it is loaded into a ``DNDarray``
+    ishuffle : bool, optional
+        Flag indicating whether to use non-blocking communications for shuffling the data between epochs
+        Note: if True, the ``Ishuffle()`` function must be defined within the class
+        Default: ``False``
+    test_set : bool, optional
+        If this dataset is the testing set then keep all of the data local
+        Default: ``False``
+
+    Attributes
+    ----------
+    htdata : DNDarray
+        full data
+    httargets : DNDarray
+        full target data
+    comm : communication.MPICommunicator
+        heat communicator for sending data between processes
+    _cut_slice : slice
+        slice to remove the last element if all are not equal in length
+    lcl_half : int
+        integer value of half of the data on the process
+    data : torch.Tensor
+        the local data on a process
+    targets : torch.Tensor
+        the local targets on a process
+    ishuffle : bool
+        flag indicating if non-blocking communications are used for shuffling the data between epochs
+    test_set : bool
+        if this dataset is the testing set then keep all of the data local
+
+    Notes
+    -----
+    For other attributes see `torchvision.datasets.MNIST <https://pytorch.org/vision/stable/datasets.html#torchvision.datasets.MNIST>`_.
+    """
+
+    def __init__(
+        self,
+        root: str,
+        train: bool = True,
+        transform: Callable = None,
+        target_transform: Callable = None,
+        download: bool = True,
+        split: int = 0,
+        ishuffle: bool = False,
+        test_set: bool = False,
+    ):  # noqa: D107
+        super().__init__(
+            root,
+            train=train,
+            transform=transform,
+            target_transform=target_transform,
+            download=download,
+        )
+        if split != 0 and split is not None:
+            raise ValueError("split must be 0 or None")
+        split = None if test_set else split
+        array = factories.array(self.data, split=split)
+        targets = factories.array(self.targets, split=split)
+        self.test_set = test_set
+        self.partial_dataset = False
+        self.comm = array.comm
+        self.htdata = array
+        self.httargets = targets
+        self.ishuffle = ishuffle
+        if split is not None:
+            min_data_split = array.gshape[0] // array.comm.size
+            arb_slice = slice(min_data_split)
+            self._cut_slice = arb_slice
+            self.lcl_half = min_data_split // 2
+            self.data = array._DNDarray__array[self._cut_slice]
+            self.targets = targets._DNDarray__array[self._cut_slice]
+        else:
+            self._cut_slice = None
+            self.lcl_half = array.gshape[0] // 2
+            self.data = array._DNDarray__array
+            self.targets = targets._DNDarray__array
+        # getitem and len are defined by torch's MNIST class
+
+    def Shuffle(self):
+        """
+        Uses the :func:`datatools.dataset_shuffle` function to shuffle the data between the processes
+        """
+        if not self.test_set:
+            datatools.dataset_shuffle(
+                dataset=self, attrs=[["data", "htdata"], ["targets", "httargets"]]
+            )
+
+    def Ishuffle(self):
+        """
+        Uses the :func:`datatools.dataset_ishuffle` function to shuffle the data between the processes
+        """
+        if not self.test_set:
+            datatools.dataset_ishuffle(
+                dataset=self, attrs=[["data", "htdata"], ["targets", "httargets"]]
+            )
diff --git a/tests/utils/data/partial_dataset.py b/tests/utils/data/partial_dataset.py
new file mode 100644
index 0000000000..9e1ba362b7
--- /dev/null
+++ b/tests/utils/data/partial_dataset.py
@@ -0,0 +1,360 @@
+"""
+Tool for using a dataset which will not fit in memory with neural networks
+"""
+
+import math
+import queue
+import threading
+import torch
+import time
+
+from torch.utils import data as torch_data
+from typing import Callable, List, Iterator, Union
+
+from heat.communication import MPICommunication
+from heat.communication import MPI_WORLD
+
+__all__ = ["PartialH5Dataset", "PartialH5DataLoaderIter"]
+
+
+def queue_thread(q: queue.Queue):
+    while True:
+        items = q.get()
+        if isinstance(items, tuple):
+            func = items[0]
+            args = items[1:]
+            func(*args)
+        else:
+            items()
+        q.task_done()
+
+
+class PartialH5Dataset(torch_data.Dataset):
+    """
+    Create a Dataset object for a dataset which loads portions of data from an HDF5 file. Very similar to
+    :func:`<heat.utils.data.datatools.Dataset>`. This will create 2 threads, one for loading the data from the target file,
+    and one for converting items before being passed to the network. The conversion is done by the iterator.
+    A portion of the data of length ``initial_load`` is loaded upon initialization, the rest of the data is loaded
+    after the loaded data is returned by :func:`PartialH5DataLoaderIter`. This iterator will be used by the HeAT
+    :func:`heat.utils.data.datatools.DataLoader` automatically with this type of dataset.
+
+    Notes
+    -----
+    H5 datasets require the GIL to load data. This can be a bottleneck if data needs to be loaded multiple times (e.g.
+    the case for using this dataset). It is recommended to find another way to preprocess the data and avoid using
+    H5 files for this reason.
+
+    Parameters
+    ----------
+    file: str
+        H5 file to use
+    comm: MPICommunication
+        Global MPI communicator generated by HeAT
+    dataset_names: Union[str, List[str]], optional
+        Name/s of dataset/s to load from ``file``. If a string is given, it will be the only dataset loaded.
+        Default is "data".
+    transforms : List[Callable], optional
+        Transforms to apply to the data after it is gotten from the loaded data before it is used by the network.
+        This should be a list of Callable torch functions for each item returned by the ``__getitem__`` function
+        of the individual dataset. If a list element is ``None`` then no transform will be applied to the
+        corresponding element returned by ``__getitem__``. I.e. if ``__getitem__`` returns an image and a label
+        then the list would look like this: ``transforms = [image_transforms, None]``. If this is ``None``, no
+        transforms will be applied to any elements. Default is ``None``.
+    use_gpu : bool, optional
+        Use GPUs if available. Defaults to True.
+    validate_set : bool, optional
+        Load the entire dataset onto each node upon initialization and skip loaded in iterator
+        This is typically the case needed for validation sets when the network should be tested against the whole
+        dataset. Default is False.
+    initial_load : int, optional
+        How many elements to load from the file in the 0th dimension. Default is 7000 elements
+    load_length : int, optional
+        How many elements to load from the file in the iterator. Default is 1000 elements
+    """
+
+    def __init__(
+        self,
+        file: str,
+        comm: MPICommunication = MPI_WORLD,
+        dataset_names: Union[str, List[str]] = "data",
+        transforms: List[Callable] = None,
+        use_gpu: bool = True,
+        validate_set: bool = False,
+        initial_load: int = 7000,
+        load_length: int = 1000,
+    ):  # noqa: D107
+        import h5py
+
+        super(PartialH5Dataset, self).__init__()
+        self.ishuffle = False
+        self.file = file
+        self.comm = comm
+        self.transforms = transforms if isinstance(transforms, (list, tuple)) else [transforms]
+        self.gpu = True if torch.cuda.device_count() > 0 and use_gpu else False
+        self.torch_device = "cpu"
+        if torch.cuda.is_available() and use_gpu:
+            dev_id = MPI_WORLD.rank % torch.cuda.device_count()
+            self.torch_device = torch.device(f"cuda:{str(dev_id)}")
+            torch.cuda.set_device(dev_id)
+
+        f = h5py.File(file, "r")
+        # too much data for the process
+        fkeys = list(f.keys())
+
+        sz = f[fkeys[0]].len()
+        for k in fkeys[1:]:
+            # ensure that all of the datasets are the same length
+            if f[k].len() != sz:
+                raise ValueError(f"all datasets in {file} must be the same length")
+        self.total_size = sz
+        # how many indices will go onto each process (len)
+        self.lcl_full_sz = sz // comm.size
+        # load data that is half of of the available memory
+        self.local_data_start = comm.rank * self.lcl_full_sz
+        self.local_data_end = (comm.rank + 1) * self.lcl_full_sz
+
+        if validate_set or initial_load > self.lcl_full_sz:
+            # if its the validation set then load the whole dataset for each process
+            self.lcl_full_sz = sz
+            self.local_data_start = 0
+            self.local_data_end = sz
+            self.load_initial = sz
+            self.partial_dataset = False
+            self.load_len = 0
+            self.loads_needed = 0
+        else:
+            self.local_length = self.local_data_end - self.local_data_start
+            self.load_initial = initial_load
+            self.load_len = load_length  # int(local_data_end / 3)
+            self.loads_needed = math.ceil(self.lcl_full_sz / self.load_len)
+            self.partial_dataset = True
+
+        self.loads_left = self.loads_needed
+        self.load_start = self.local_data_start
+        self.load_end = self.local_data_start + self.load_initial
+
+        # data being loaded from dataset_names parameter
+        if isinstance(dataset_names, str):
+            dataset_names = [dataset_names]
+        self.dataset_names = dataset_names
+        self.dataset_order = []
+        for d in dataset_names:
+            hld = f[d][self.load_start : self.load_end]
+            self.__setattr__(d, hld)
+        self.load_start = self.load_end
+        self.load_end += self.load_len
+        f.close()
+        self.load_thread = None
+        self.epoch_end = False
+        # need the number of loads required for an epoch
+        self.loading_queue = queue.Queue()
+        self.loading_condition = threading.Condition()
+        threading.Thread(target=queue_thread, args=[self.loading_queue], daemon=True).start()
+        self.convert_queue = queue.Queue()
+        threading.Thread(target=queue_thread, args=[self.convert_queue], daemon=True).start()
+        self.used_indices = []
+
+    def Shuffle(self):
+        """
+        Send half of the local data to the process ``self.comm.rank + 1`` if available, else wrap around. After
+        receiving the new data, shuffle the local tensor.
+
+        Not implemented for partial dataset
+        """
+        return NotImplementedError
+
+    def Ishuffle(self):
+        """
+        Send half of the local data to the process ``self.comm.rank + 1`` if available, else wrap around. After
+        receiving the new data, shuffle the local tensor.
+
+        Not implemented for partial dataset
+        """
+        return NotImplementedError
+
+    def __getitem__(self, index: Union[int, slice, List[int], torch.Tensor]) -> torch.Tensor:
+        """
+        Abstract __getitem__ method.
+        This should be defined by the user at runtime. This function needs to be designed such
+        that the data is in the 0th dimension and the indexes called are only in the 0th dim!
+        """
+        raise NotImplementedError("__getitem__ must be overwritten")
+
+    def __len__(self) -> int:
+        """
+        Get the total length of the dataset
+        """
+        return self.total_size
+
+    def thread_replace_converted_batches(self):
+        """
+        Replace the elements of the dataset with newly loaded elements. :func:'PartialH5DataLoaderIter' will
+        put the used indices in the ``used_indices`` parameter. This object is reset to an empty list after
+        these elements are overwritten with new data.
+        """
+        import h5py
+
+        self.loads_left = self.loads_needed
+        ll = self.loads_left
+        for _ in range(ll):
+            with h5py.File(self.file, "r") as f:
+                for d in self.dataset_names:
+                    hld = f[d][self.load_start : self.load_end]
+                    self.__setattr__("hold" + d, hld)
+            if self.load_end + self.comm.size > self.total_size:
+                self.load_end = 0
+            self.load_start = self.load_end
+            self.load_end += self.load_len
+
+            # wait for lock1 *from* convert thread
+            with self.loading_condition:
+                self.loading_condition.wait()
+                for d in self.dataset_names:
+                    new = self.__getattribute__("hold" + d)
+                    dset = self.__getattribute__(d)
+                    new_top = new[: len(self.used_indices)]
+                    lnew = len(new_top)
+                    dset[self.used_indices[:lnew]] = new_top
+                    self.__setattr__(d, dset)
+                    self.__setattr__("hold" + d, new[lnew:])
+                # give up lock / notify convert thread
+                self.used_indices = []
+            self.loads_left -= 1
+
+
+class PartialH5DataLoaderIter(object):
+    """
+    Iterator to be used with :func:'PartialH5Dataset'. It closely mirrors the standard torch iterator while loading
+    new data to replace the loaded batches automatically. It also pre-fetches the batches and begins their
+    preparation, collation, and device setting in the background.
+    """
+
+    def __init__(self, loader):  # noqa: D107
+        # todo: make note that h5py is required for this...move load to dataset?
+        self.dataset = loader.dataset
+        self._dataset_kind = loader.DataLoader._dataset_kind
+        self._IterableDataset_len_called = loader.DataLoader._IterableDataset_len_called
+        self._auto_collation = loader.DataLoader._auto_collation
+        self._drop_last = loader.DataLoader.drop_last
+        self._index_sampler = loader.DataLoader._index_sampler
+        self._num_workers = loader.DataLoader.num_workers
+        self._pin_memory = loader.DataLoader.pin_memory and torch.cuda.is_available()
+        self._timeout = loader.DataLoader.timeout
+        self._collate_fn = loader.DataLoader.collate_fn
+        self._sampler_iter = iter(self._index_sampler)
+        self._base_seed = torch.empty((), dtype=torch.int64).random_().item()
+        self._num_yielded = 0
+        self.batch_size = loader.DataLoader.batch_size
+        self.comm = self.dataset.comm
+        rand_samp_list = torch.randperm(self.dataset.load_initial).tolist()
+
+        # todo: support other samplers: for now its only random
+        if self.dataset.partial_dataset:
+            self.ready_batches = []
+            mod_batch = self.dataset.load_len % self.batch_size
+            if mod_batch != 0:
+                self.dataset.load_len += self.batch_size - mod_batch
+                self.dataset.load_end = self.dataset.load_start + self.dataset.load_len
+            # generate all indices
+            index_list = []
+            idx_repeats = math.ceil(self.dataset.lcl_full_sz / self.dataset.load_initial)
+            for _ in range(idx_repeats):
+                index_list.extend(torch.randperm(self.dataset.load_initial).tolist())
+            # start the conversion
+            self.dataset.convert_queue.put((self.__thread_convert_all, index_list))
+
+            self.length = len(index_list) // self.batch_size
+            self.dataset.loading_queue.put(self.dataset.thread_replace_converted_batches)
+        else:
+            self.rand_samp_list = rand_samp_list
+            self.length = len(self._index_sampler)
+
+        self._dataset_fetcher = torch_data.dataloader._DatasetKind.create_fetcher(
+            self._dataset_kind,
+            loader.DataLoader.dataset,
+            self._auto_collation,
+            self._collate_fn,
+            self._drop_last,
+        )
+
+    def __len__(self):
+        """
+        Get the length of the iterator
+        """
+        return self.length
+
+    def _next_data(self):
+        # get the next batch
+        if not self.dataset.partial_dataset:
+            index = next(self._sampler_iter)  # may raise StopIteration
+            data = self._dataset_fetcher.fetch(index)  # may raise StopIteration
+            if self._pin_memory:
+                data = torch_data._utils.pin_memory.pin_memory(data)
+            return data
+        if self._num_yielded == self.__len__():
+            raise StopIteration
+        while len(self.ready_batches) < 1:
+            time.sleep(0.1)
+        batch = self.ready_batches.pop(0)
+        for b in range(len(batch)):
+            if batch[b].device != self.dataset.torch_device:
+                batch[b] = batch[b].to(self.dataset.torch_device)
+        return batch
+
+    def __next__(self):
+        """
+        Get the next batch of data. Shamelessly taken from torch.
+        """
+        # shamelessly taken from torch
+        data = self._next_data()
+        self._num_yielded += 1
+        # note: the warnings raised by torch for iterable datasets were removed here, look for these in
+        #       the base class of the single process iterator
+        return data
+
+    def __iter__(self):
+        """
+        Get a new iterator of this class
+
+        Returns
+        -------
+        PartialH5DataLoaderIter
+        """
+        return self
+
+    def __thread_convert_all(self, index_list):
+        # convert all of the elements, collate them into batches, and send the batches to the correct device
+        # this function als communicates with the data loading thread from the PartialH5Dataset to notify it
+        # when it has the correct amount of data to write.
+        converted_items = []
+        for ind in index_list:
+            # get the desired image/target/... to begin composing a batch
+            single_item = self.dataset[ind]
+            if not isinstance(single_item, tuple) and self.dataset.transforms[0] is not None:
+                single_item = self.dataset.transforms[0](single_item)
+            if isinstance(single_item, tuple):
+                single_item = list(single_item)
+                for ii in range(len(single_item)):
+                    # do transforms (have all torch stuff here)
+                    if self.dataset.transforms[ii] is not None:
+                        single_item[ii] = self.dataset.transforms[ii](single_item[ii])
+            converted_items.append(single_item)
+            self.dataset.used_indices.append(ind)
+            if len(converted_items) == self.batch_size:
+                if (
+                    len(self.dataset.used_indices) == self.dataset.load_len
+                    and self.dataset.loads_left > 0
+                ):
+                    with self.dataset.loading_condition:
+                        self.dataset.loading_condition.notify()
+                batch = self._collate_fn(converted_items)
+                try:
+                    for bb in range(2):
+                        bb_batch = self.ready_batches[bb]
+                        for b in range(len(batch)):
+                            bb_batch[b] = bb_batch[b].to(self.dataset.torch_device)
+                        self.ready_batches[bb] = bb_batch
+                except IndexError:
+                    pass
+                self.ready_batches.append(batch)
+                converted_items = []
diff --git a/tests/utils/data/spherical.py b/tests/utils/data/spherical.py
new file mode 100644
index 0000000000..133f25c89a
--- /dev/null
+++ b/tests/utils/data/spherical.py
@@ -0,0 +1,160 @@
+"""Create a sperical dataset."""
+
+import heat as ht
+import torch
+
+
+def create_spherical_dataset(
+    num_samples_cluster, radius=1.0, offset=4.0, dtype=ht.float32, random_state=1
+):
+    """
+    Creates k=4 sperical clusters in 3D space along the space-diagonal
+
+    Parameters
+    ----------
+    num_samples_cluster: int
+        Number of samples per cluster. Each process will create n // MPI_WORLD.size elements for each cluster
+    radius: float
+        Radius of the sphere
+    offset: float
+        Shift of the clusters along the axes. The 4 clusters will be positioned centered around c1=(offset, offset,offset),
+        c2=(2*offset,2*offset,2*offset), c3=(-offset, -offset, -offset) and c4=(2*offset, -2*offset, -2*offset)
+    dtype: ht.datatype
+        Dataset dtype
+    random_state: int
+        seed of the torch random number generator
+    """
+    # contains num_samples
+
+    p = ht.MPI_WORLD.size
+    # create k sperical clusters with each n elements per cluster. Each process creates k * n/p elements
+    num_ele = num_samples_cluster // p
+    ht.random.seed(random_state)
+    # radius between 0 and 1
+    r = ht.random.rand(num_ele, split=0) * radius
+    # theta between 0 and pi
+    theta = ht.random.rand(num_ele, split=0) * 3.1415
+    # phi between 0 and 2pi
+    phi = ht.random.rand(num_ele, split=0) * 2 * 3.1415
+    # Cartesian coordinates
+    x = r * ht.sin(theta) * ht.cos(phi)
+    x.astype(dtype, copy=False)
+    y = r * ht.sin(theta) * ht.sin(phi)
+    y.astype(dtype, copy=False)
+    z = r * ht.cos(theta)
+    z.astype(dtype, copy=False)
+
+    cluster1 = ht.stack((x + offset, y + offset, z + offset), axis=1)
+    cluster2 = ht.stack((x + 2 * offset, y + 2 * offset, z + 2 * offset), axis=1)
+    cluster3 = ht.stack((x - offset, y - offset, z - offset), axis=1)
+    cluster4 = ht.stack((x - 2 * offset, y - 2 * offset, z - 2 * offset), axis=1)
+
+    data = ht.concatenate((cluster1, cluster2, cluster3, cluster4), axis=0)
+    # Note: enhance when shuffel is available
+    return data
+
+
+def create_clusters(
+    n_samples, n_features, n_clusters, cluster_mean, cluster_std, cluster_weight=None, device=None
+):
+    """
+    Creates a DNDarray of shape (n_samples, n_features), split=0, and dtype=ht.float32, that is balanced (i.e. roughly same size of samples on each process).
+    The data set consists of n_clusters clusters, each of which is sampled from a multivariate normal distribution with mean cluster_mean[k,:] and covariance matrix cluster_std[k,:,:].
+    The clusters are of the same size (quantitatively) and distributed evenly over the processes, unless cluster_weight is specified.
+
+    Parameters
+    ----------
+    n_samples: int
+        Number of overall samples
+    n_features: int
+        Number of features
+    n_clusters: int
+        Number of clusters
+    cluster_mean: torch.Tensor of shape (n_clusters, n_features)
+        featurewise mean (center) of each cluster; of course not the true mean, but rather the mean according to which the elements of the cluster are sampled.
+    cluster_std: torch.Tensor of shape (n_clusters, n_features, n_features), or (n_clusters,)
+        featurewise standard deviation of each cluster from the mean value; of course not the true std, but rather the std according to which the elements of the cluster are sampled.
+        If shape is (n_clusters,), std is assumed to be the same in each direction for each cluster
+    cluster_weight: torch.Tensor of shape (n_clusters,), optional
+        On each process, cluster_weight is assumed to be a torch.Tensor whose entries add up to 1. The i-th entry of cluster_weight on process p specified which amount of the samples on process p
+        is sampled according to the distribution of cluster i. Thus, this parameter allows to distribute the n_cluster clusters unevenly over the processes.
+        If None, each cluster is distributed evenly over all processes.
+    device: Optional[str] = None,
+        The device on which the data is stored. If None, the default device is used.
+    """
+    device = ht.devices.sanitize_device(device)
+
+    if cluster_weight is None:
+        cluster_weight = torch.ones(n_clusters) / n_clusters
+    else:
+        if not isinstance(cluster_weight, torch.Tensor):
+            raise TypeError(
+                "cluster_weight must be None or a torch.Tensor, but is {}".format(
+                    type(cluster_weight)
+                )
+            )
+        elif not cluster_weight.shape == (n_clusters,):
+            raise ValueError(
+                "If a torch.Tensor, cluster_weight must be of shape (n_clusters,), but is {}".format(
+                    cluster_weight.shape
+                )
+            )
+        elif not torch.allclose(torch.sum(cluster_weight), torch.tensor(1.0)):
+            raise ValueError(
+                "If a torch.Tensor, cluster_weight must add up to 1, but adds up to {}".format(
+                    torch.sum(cluster_weight)
+                )
+            )
+    if not isinstance(cluster_mean, torch.Tensor):
+        raise TypeError("cluster_mean must be a torch.Tensor, but is {}".format(type(cluster_mean)))
+    elif not cluster_mean.shape == (n_clusters, n_features):
+        raise ValueError(
+            "cluster_mean must be of shape (n_clusters, n_features), but is {}".format(
+                cluster_mean.shape
+            )
+        )
+    if not isinstance(cluster_std, torch.Tensor):
+        raise TypeError("cluster_std must be a torch.Tensor, but is {}".format(type(cluster_std)))
+    elif not cluster_std.shape == (
+        n_clusters,
+        n_features,
+        n_features,
+    ) and not cluster_std.shape == (n_clusters,):
+        raise ValueError(
+            "cluster_std must be of shape (n_clusters, n_features, n_features) or (n_clusters,), but is {}".format(
+                cluster_std.shape
+            )
+        )
+    if cluster_std.shape == (n_clusters,):
+        cluster_std = torch.stack(
+            [torch.eye(n_features) * cluster_std[k] for k in range(n_clusters)], dim=0
+        )
+
+    global_shape = (n_samples, n_features)
+    local_shape = ht.MPI_WORLD.chunk(global_shape, 0)[1]
+    local_size_of_clusters = [int(local_shape[0] * cluster_weight[k]) for k in range(n_clusters)]
+    if sum(local_size_of_clusters) != local_shape[0]:
+        local_size_of_clusters[0] += local_shape[0] - sum(local_size_of_clusters)
+    distributions = [
+        torch.distributions.multivariate_normal.MultivariateNormal(
+            cluster_mean[k, :], cluster_std[k]
+        )
+        for k in range(n_clusters)
+    ]
+    local_data = [
+        distributions[k].sample((local_size_of_clusters[k],)).to(device.torch_device)
+        for k in range(n_clusters)
+    ]
+    local_data = torch.cat(local_data, dim=0)
+    rand_perm = torch.randperm(local_shape[0], device=device.torch_device)
+    local_data = local_data[rand_perm, :]
+    data = ht.DNDarray(
+        local_data,
+        global_shape,
+        dtype=ht.float32,
+        split=0,
+        device=device,
+        comm=ht.MPI_WORLD,
+        balanced=True,
+    )
+    return data
diff --git a/heat/utils/tests/test_vision_transforms.py b/tests/utils/test_vision_transforms.py
similarity index 100%
rename from heat/utils/tests/test_vision_transforms.py
rename to tests/utils/test_vision_transforms.py