From 6daef758bf34239429f99b3b6c3f00ba2ae1a235 Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Thu, 14 Nov 2024 05:57:03 -0800 Subject: [PATCH 01/41] feat: Introduce BackendManager and PolicyManger fix tests chore: remove unnecessary use of super() fix typo cosmetics proposal: inject backend through decorators add incremntal basic statistics add dbscan add kmeans remove BackendMixin from kmeans rm BackendMixin fix pyproject; remove basemixin and add covariance add svm add pca fix backend import add forest add linear_model add neighbors fixup svm refactor: unify host/dpc backend decorators into single decorator update backend import cleanup update_abstractmethods fix for py3.9 fixup fixup fixup fixup fixup after rebase decorate methods not classes - preparation for spmd simplify backend import align spmd fixup update tests fixup fixup fixup fixup: add license improve debug message wip: fix spmd spmd test cases fix more spmd tests more spmd test fixups further fixups further fixups revert error message _get_queue -> _get_policy fix for spmd classes that rely on batch functions more fixes for default policy --- onedal/__init__.py | 85 ++++++--- onedal/_device_offload.py | 3 +- onedal/basic_statistics/basic_statistics.py | 14 +- .../incremental_basic_statistics.py | 38 ++-- .../tests/test_basic_statistics.py | 1 - onedal/cluster/dbscan.py | 44 ++--- onedal/cluster/kmeans.py | 64 ++++--- onedal/cluster/kmeans_init.py | 30 ++- onedal/common/_backend.py | 143 ++++++++++++++ onedal/common/_base.py | 38 ---- onedal/common/_spmd_policy.py | 5 +- onedal/common/backend_manager.py | 43 +++++ onedal/common/hyperparameters.py | 8 +- onedal/common/policy_manager.py | 70 +++++++ onedal/common/tests/test_backend_manager.py | 121 ++++++++++++ onedal/common/tests/test_policy.py | 13 +- onedal/common/tests/test_policy_manager.py | 136 ++++++++++++++ onedal/common/tests/test_sycl.py | 23 ++- onedal/covariance/covariance.py | 19 +- onedal/covariance/incremental_covariance.py | 31 ++-- onedal/datatypes/_data_conversion.py | 18 +- onedal/datatypes/tests/test_data.py | 33 ++-- onedal/decomposition/incremental_pca.py | 31 ++-- onedal/decomposition/pca.py | 30 ++- onedal/ensemble/forest.py | 110 +++++------ .../linear_model/incremental_linear_model.py | 65 ++++--- onedal/linear_model/linear_model.py | 32 ++-- onedal/linear_model/logistic_regression.py | 68 ++++--- onedal/neighbors/neighbors.py | 175 ++++++++++-------- onedal/primitives/get_tree.py | 8 +- onedal/primitives/kernel_functions.py | 17 +- onedal/spmd/__init__.py | 10 + onedal/spmd/_base.py | 30 --- .../spmd/basic_statistics/basic_statistics.py | 17 +- .../incremental_basic_statistics.py | 58 ++---- onedal/spmd/cluster/__init__.py | 8 +- onedal/spmd/cluster/dbscan.py | 11 +- onedal/spmd/cluster/kmeans.py | 36 ++-- onedal/spmd/covariance/covariance.py | 16 +- .../spmd/covariance/incremental_covariance.py | 69 +------ onedal/spmd/decomposition/incremental_pca.py | 102 ++-------- onedal/spmd/decomposition/pca.py | 16 +- onedal/spmd/ensemble/__init__.py | 2 +- onedal/spmd/ensemble/forest.py | 28 --- .../linear_model/incremental_linear_model.py | 77 +------- onedal/spmd/linear_model/linear_model.py | 19 +- .../spmd/linear_model/logistic_regression.py | 16 +- onedal/spmd/neighbors/__init__.py | 4 +- onedal/spmd/neighbors/neighbors.py | 47 +++-- onedal/svm/svm.py | 103 ++++++++--- pyproject.toml | 7 +- sklearnex/__init__.py | 4 +- sklearnex/spmd/neighbors/__init__.py | 4 +- sklearnex/spmd/neighbors/neighbors.py | 25 --- sklearnex/tests/test_memory_usage.py | 14 +- 55 files changed, 1329 insertions(+), 910 deletions(-) create mode 100644 onedal/common/_backend.py delete mode 100644 onedal/common/_base.py create mode 100644 onedal/common/backend_manager.py create mode 100644 onedal/common/policy_manager.py create mode 100644 onedal/common/tests/test_backend_manager.py create mode 100644 onedal/common/tests/test_policy_manager.py delete mode 100644 onedal/spmd/_base.py delete mode 100644 onedal/spmd/ensemble/forest.py delete mode 100644 sklearnex/spmd/neighbors/neighbors.py diff --git a/onedal/__init__.py b/onedal/__init__.py index 8f7ade667c..4d2f298c11 100644 --- a/onedal/__init__.py +++ b/onedal/__init__.py @@ -19,6 +19,20 @@ from daal4py.sklearn._utils import daal_check_version + +class Backend: + """Encapsulates the backend module and provides a unified interface to it together with additional properties about dpc/spmd policies""" + + def __init__(self, backend_module, is_dpc, is_spmd): + self.backend = backend_module + self.is_dpc = is_dpc + self.is_spmd = is_spmd + + # accessing the instance will return the backend_module + def __getattr__(self, name): + return getattr(self.backend, name) + + if "Windows" in platform.system(): import os import site @@ -40,44 +54,67 @@ pass os.environ["PATH"] = path_to_libs + os.pathsep + os.environ["PATH"] -try: - import onedal._onedal_py_dpc as _backend - - _is_dpc_backend = True -except ImportError: - import onedal._onedal_py_host as _backend - _is_dpc_backend = False - -_is_spmd_backend = False +try: + # use dpc backend if available + import onedal._onedal_py_dpc -if _is_dpc_backend: - try: - import onedal._onedal_py_spmd_dpc as _spmd_backend + _dpc_backend = Backend(onedal._onedal_py_dpc, is_dpc=True, is_spmd=False) - _is_spmd_backend = True - except ImportError: - _is_spmd_backend = False + _host_backend = None +except ImportError: + # fall back to host backend + _dpc_backend = None + import onedal._onedal_py_host -__all__ = ["covariance", "decomposition", "ensemble", "neighbors", "primitives", "svm"] + _host_backend = Backend(onedal._onedal_py_host, is_dpc=False, is_spmd=False) -if _is_spmd_backend: - __all__.append("spmd") +try: + # also load spmd backend if available + import onedal._onedal_py_spmd_dpc + _spmd_backend = Backend(onedal._onedal_py_spmd_dpc, is_dpc=True, is_spmd=True) +except ImportError: + _spmd_backend = None + +# if/elif/else layout required for pylint to realize _default_backend cannot be None +if _dpc_backend is not None: + _default_backend = _dpc_backend +elif _host_backend is not None: + _default_backend = _host_backend +else: + raise ImportError("No oneDAL backend available") + +# Core modules to export +__all__ = [ + "_host_backend", + "_default_backend", + "_dpc_backend", + "_spmd_backend", + "covariance", + "decomposition", + "ensemble", + "neighbors", + "primitives", + "svm", +] + +# Additional features based on version checks if daal_check_version((2023, "P", 100)): __all__ += ["basic_statistics", "linear_model"] +if daal_check_version((2023, "P", 200)): + __all__ += ["cluster"] - if _is_spmd_backend: +# Exports if SPMD backend is available +if _spmd_backend is not None: + __all__ += ["spmd"] + if daal_check_version((2023, "P", 100)): __all__ += [ "spmd.basic_statistics", "spmd.decomposition", "spmd.linear_model", "spmd.neighbors", ] - -if daal_check_version((2023, "P", 200)): - __all__ += ["cluster"] - - if _is_spmd_backend: + if daal_check_version((2023, "P", 200)): __all__ += ["spmd.cluster"] diff --git a/onedal/_device_offload.py b/onedal/_device_offload.py index 4e46592bb2..0e67aaac89 100644 --- a/onedal/_device_offload.py +++ b/onedal/_device_offload.py @@ -14,7 +14,6 @@ # limitations under the License. # ============================================================================== -import logging from collections.abc import Iterable from functools import wraps @@ -36,7 +35,7 @@ # in _get_global_queue always true for situations without the # dpc backend when `device_offload` is used. Instead, it will # fail at the policy check phase yielding a RuntimeError - SyclQueue = getattr(onedal._backend, "SyclQueue", object) + SyclQueue = getattr(onedal._dpc_backend, "SyclQueue", object) if dpnp_available: import dpnp diff --git a/onedal/basic_statistics/basic_statistics.py b/onedal/basic_statistics/basic_statistics.py index c60d1599ac..dcacc6d565 100644 --- a/onedal/basic_statistics/basic_statistics.py +++ b/onedal/basic_statistics/basic_statistics.py @@ -14,23 +14,28 @@ # limitations under the License. # ============================================================================== -import warnings from abc import ABCMeta, abstractmethod import numpy as np -from ..common._base import BaseEstimator +from ..common._backend import bind_default_backend from ..datatypes import _convert_to_supported, from_table, to_table from ..utils import _is_csr from ..utils.validation import _check_array -class BaseBasicStatistics(BaseEstimator, metaclass=ABCMeta): +class BaseBasicStatistics(metaclass=ABCMeta): @abstractmethod def __init__(self, result_options, algorithm): self.options = result_options self.algorithm = algorithm + @bind_default_backend("basic_statistics") + def _get_policy(self, queue, *data): ... + + @bind_default_backend("basic_statistics") + def compute(self, policy, params, data_table, weights_table): ... + @staticmethod def get_all_result_options(): return [ @@ -99,9 +104,8 @@ def fit(self, data, sample_weight=None, queue=None): def _compute_raw( self, data_table, weights_table, policy, dtype=np.float32, is_csr=False ): - module = self._get_backend("basic_statistics") params = self._get_onedal_params(is_csr, dtype) - result = module.compute(policy, params, data_table, weights_table) + result = self.compute(policy, params, data_table, weights_table) options = self._get_result_options(self.options).split("|") return {opt: getattr(result, opt) for opt in options} diff --git a/onedal/basic_statistics/incremental_basic_statistics.py b/onedal/basic_statistics/incremental_basic_statistics.py index 4935a57a47..4375d7bbc0 100644 --- a/onedal/basic_statistics/incremental_basic_statistics.py +++ b/onedal/basic_statistics/incremental_basic_statistics.py @@ -14,9 +14,12 @@ # limitations under the License. # ============================================================================== +from abc import abstractmethod + import numpy as np from daal4py.sklearn._utils import get_dtype +from onedal.common._backend import bind_default_backend from ..datatypes import _convert_to_supported, from_table, to_table from ..utils import _check_array @@ -69,10 +72,18 @@ def __init__(self, result_options="all"): super().__init__(result_options, algorithm="by_default") self._reset() + @bind_default_backend("basic_statistics") + def partial_compute_result(self): ... + + @bind_default_backend("basic_statistics") + def partial_compute(self, *args, **kwargs): ... + + @bind_default_backend("basic_statistics") + def finalize_compute(self, *args, **kwargs): ... + def _reset(self): - self._partial_result = self._get_backend( - "basic_statistics", None, "partial_compute_result" - ) + # get the _partial_result pointer from backend + self._partial_result = self.partial_compute_result() def partial_fit(self, X, weights=None, queue=None): """ @@ -113,15 +124,8 @@ def partial_fit(self, X, weights=None, queue=None): self._onedal_params = self._get_onedal_params(False, dtype=dtype) X_table, weights_table = to_table(X, weights) - self._partial_result = self._get_backend( - "basic_statistics", - None, - "partial_compute", - policy, - self._onedal_params, - self._partial_result, - X_table, - weights_table, + self._partial_result = self.partial_compute( + policy, self._onedal_params, self._partial_result, X_table, weights_table ) def finalize_fit(self, queue=None): @@ -145,14 +149,8 @@ def finalize_fit(self, queue=None): else: policy = self._get_policy(self._queue) - result = self._get_backend( - "basic_statistics", - None, - "finalize_compute", - policy, - self._onedal_params, - self._partial_result, - ) + result = self.finalize_compute(policy, self._onedal_params, self._partial_result) + options = self._get_result_options(self.options).split("|") for opt in options: setattr(self, opt, from_table(getattr(result, opt)).ravel()) diff --git a/onedal/basic_statistics/tests/test_basic_statistics.py b/onedal/basic_statistics/tests/test_basic_statistics.py index acdf8181b4..c3886ecffa 100644 --- a/onedal/basic_statistics/tests/test_basic_statistics.py +++ b/onedal/basic_statistics/tests/test_basic_statistics.py @@ -19,7 +19,6 @@ from numpy.testing import assert_allclose from scipy import sparse as sp -from daal4py.sklearn._utils import daal_check_version from onedal.basic_statistics import BasicStatistics from onedal.basic_statistics.tests.utils import options_and_tests from onedal.tests.utils._device_selection import get_queues diff --git a/onedal/cluster/dbscan.py b/onedal/cluster/dbscan.py index fbb6a52044..7bdc226c47 100644 --- a/onedal/cluster/dbscan.py +++ b/onedal/cluster/dbscan.py @@ -14,17 +14,19 @@ # limitations under the License. # =============================================================================== +from abc import abstractmethod + import numpy as np from daal4py.sklearn._utils import get_dtype, make2d +from onedal.common._backend import bind_default_backend -from ..common._base import BaseEstimator from ..common._mixin import ClusterMixin from ..datatypes import _convert_to_supported, from_table, to_table from ..utils import _check_array -class BaseDBSCAN(BaseEstimator, ClusterMixin): +class DBSCAN(ClusterMixin): def __init__( self, eps=0.5, @@ -46,6 +48,12 @@ def __init__( self.p = p self.n_jobs = n_jobs + @bind_default_backend("dbscan") + def _get_policy(self, queue, *data): ... + + @bind_default_backend("dbscan.clustering") + def compute(self, policy, params, data_table, weights_table): ... + def _get_onedal_params(self, dtype=np.float32): return { "fptype": dtype, @@ -56,7 +64,7 @@ def _get_onedal_params(self, dtype=np.float32): "result_options": "core_observation_indices|responses", } - def _fit(self, X, y, sample_weight, module, queue): + def fit(self, X, y=None, sample_weight=None, queue=None): policy = self._get_policy(queue, X) X = _check_array(X, accept_sparse="csr", dtype=[np.float64, np.float32]) sample_weight = make2d(sample_weight) if sample_weight is not None else None @@ -68,7 +76,7 @@ def _fit(self, X, y, sample_weight, module, queue): X = _convert_to_supported(policy, X) dtype = get_dtype(X) params = self._get_onedal_params(dtype) - result = module.compute(policy, params, to_table(X), to_table(sample_weight)) + result = self.compute(policy, params, to_table(X), to_table(sample_weight)) self.labels_ = from_table(result.responses).ravel() if result.core_observation_indices is not None: @@ -80,31 +88,3 @@ def _fit(self, X, y, sample_weight, module, queue): self.components_ = np.take(X, self.core_sample_indices_, axis=0) self.n_features_in_ = X.shape[1] return self - - -class DBSCAN(BaseDBSCAN): - def __init__( - self, - eps=0.5, - *, - min_samples=5, - metric="euclidean", - metric_params=None, - algorithm="auto", - leaf_size=30, - p=None, - n_jobs=None, - ): - self.eps = eps - self.min_samples = min_samples - self.metric = metric - self.metric_params = metric_params - self.algorithm = algorithm - self.leaf_size = leaf_size - self.p = p - self.n_jobs = n_jobs - - def fit(self, X, y=None, sample_weight=None, queue=None): - return super()._fit( - X, y, sample_weight, self._get_backend("dbscan", "clustering", None), queue - ) diff --git a/onedal/cluster/kmeans.py b/onedal/cluster/kmeans.py index 93eadf8c6b..7e6d83b3e8 100644 --- a/onedal/cluster/kmeans.py +++ b/onedal/cluster/kmeans.py @@ -16,13 +16,14 @@ import logging import warnings -from abc import ABC +from abc import ABC, abstractmethod +from types import ModuleType import numpy as np from daal4py.sklearn._utils import daal_check_version, get_dtype -from onedal import _backend from onedal.basic_statistics import BasicStatistics +from onedal.common._backend import bind_default_backend if daal_check_version((2023, "P", 200)): from .kmeans_init import KMeansInit @@ -32,13 +33,14 @@ from sklearn.metrics.pairwise import euclidean_distances from sklearn.utils import check_random_state -from ..common._base import BaseEstimator as onedal_BaseEstimator +from onedal import _default_backend + from ..common._mixin import ClusterMixin, TransformerMixin from ..datatypes import _convert_to_supported, from_table, to_table from ..utils import _check_array, _is_arraylike_not_scalar, _is_csr -class _BaseKMeans(onedal_BaseEstimator, TransformerMixin, ClusterMixin, ABC): +class _BaseKMeans(TransformerMixin, ClusterMixin, ABC): def __init__( self, n_clusters, @@ -60,6 +62,18 @@ def __init__( self.random_state = random_state self.n_local_trials = n_local_trials + @bind_default_backend("kmeans_common") + def _get_policy(self, queue, X): ... + + @bind_default_backend("kmeans_common") + def _is_same_clustering(self, labels, best_labels, n_clusters): ... + + @bind_default_backend("kmeans.clustering") + def train(self, policy, params, X_table, centroids_table): ... + + @bind_default_backend("kmeans.clustering") + def infer(self, policy, params, model, centroids_table): ... + def _validate_center_shape(self, X, centers): """Check if centers is compatible with X and n_clusters.""" if centers.shape[0] != self.n_clusters: @@ -244,14 +258,14 @@ def _init_centroids_sklearn(self, X, init, random_state, policy, dtype=np.float3 return to_table(centers) def _fit_backend( - self, X_table, centroids_table, module, policy, dtype=np.float32, is_csr=False + self, X_table, centroids_table, policy, dtype=np.float32, is_csr=False ): params = self._get_onedal_params(is_csr, dtype) - meta = _backend.get_table_metadata(X_table) + meta = _default_backend.get_table_metadata(X_table) assert meta.get_npy_dtype(0) == dtype - result = module.train(policy, params, X_table, centroids_table) + result = self.train(policy, params, X_table, centroids_table) return ( result.responses, @@ -260,7 +274,7 @@ def _fit_backend( result.iteration_count, ) - def _fit(self, X, module, queue=None): + def _fit(self, X, queue=None): policy = self._get_policy(queue, X) is_csr = _is_csr(X) X = _check_array( @@ -272,8 +286,6 @@ def _fit(self, X, module, queue=None): self._check_params_vs_input(X_table, is_csr, policy, dtype=dtype) - params = self._get_onedal_params(is_csr, dtype) - self.n_features_in_ = X_table.column_count best_model, best_n_iter = None, None @@ -283,12 +295,10 @@ def is_better_iteration(inertia, labels): if best_inertia is None: return True else: - mod = self._get_backend("kmeans_common", None, None) better_inertia = inertia < best_inertia - same_clusters = mod._is_same_clustering( + return better_inertia and not self._is_same_clustering( labels, best_labels, self.n_clusters ) - return better_inertia and not same_clusters random_state = check_random_state(self.random_state) @@ -317,7 +327,7 @@ def is_better_iteration(inertia, labels): print("Initialization complete") labels, inertia, model, n_iter = self._fit_backend( - X_table, centroids_table, module, policy, dtype, is_csr + X_table, centroids_table, policy, dtype, is_csr ) if self.verbose: @@ -356,7 +366,7 @@ def cluster_centers_(self): centroids = self.model_.centroids self._cluster_centers_ = from_table(centroids) else: - raise NameError("This model have not been trained") + raise NameError("This model has not been trained") return self._cluster_centers_ @cluster_centers_.setter @@ -366,7 +376,6 @@ def cluster_centers_(self, cluster_centers): self.n_iter_ = 0 self.inertia_ = 0 - self.model_ = self._get_backend("kmeans", "clustering", "model") self.model_.centroids = to_table(self._cluster_centers_) self.n_features_in_ = self.model_.centroids.column_count self.labels_ = np.arange(self.model_.centroids.row_count) @@ -377,7 +386,7 @@ def cluster_centers_(self, cluster_centers): def cluster_centers_(self): del self._cluster_centers_ - def _predict(self, X, module, queue=None, result_options=None): + def _predict(self, X, queue=None, result_options=None): is_csr = _is_csr(X) policy = self._get_policy(queue, X) @@ -385,20 +394,21 @@ def _predict(self, X, module, queue=None, result_options=None): X_table = to_table(X) params = self._get_onedal_params(is_csr, X_table.dtype, result_options) - result = module.infer(policy, params, self.model_, X_table) + result = self.infer(policy, params, self.model_, X_table) - if ( - result_options == "compute_exact_objective_function" - ): # This is only set for score function - return result.objective_function_value * (-1) + if result_options == "compute_exact_objective_function": + # This is only set for score function + return -1 * result.objective_function_value else: return from_table(result.responses).ravel() - def _score(self, X, module, queue=None): + def _score(self, X, queue=None): result_options = "compute_exact_objective_function" return self._predict( - X, self._get_backend("kmeans", "clustering", None), queue, result_options + X, + queue, + result_options, ) def _transform(self, X): @@ -434,7 +444,7 @@ def __init__( assert self.algorithm == "lloyd" def fit(self, X, y=None, queue=None): - return super()._fit(X, self._get_backend("kmeans", "clustering", None), queue) + return self._fit(X, queue) def predict(self, X, queue=None): """Predict the closest cluster each sample in X belongs to. @@ -453,7 +463,7 @@ def predict(self, X, queue=None): labels : ndarray of shape (n_samples,) Index of the cluster each sample belongs to. """ - return super()._predict(X, self._get_backend("kmeans", "clustering", None), queue) + return self._predict(X, queue) def fit_predict(self, X, y=None, queue=None): """Compute cluster centers and predict cluster index for each sample. @@ -529,7 +539,7 @@ def score(self, X, queue=None): score: float Opposite of the value of X on the K-means objective. """ - return super()._score(X, self._get_backend("kmeans", "clustering", None), queue) + return self._score(X, queue) def k_means( diff --git a/onedal/cluster/kmeans_init.py b/onedal/cluster/kmeans_init.py index 0374be080b..4082d8e1e5 100755 --- a/onedal/cluster/kmeans_init.py +++ b/onedal/cluster/kmeans_init.py @@ -19,14 +19,14 @@ from sklearn.utils import check_random_state from daal4py.sklearn._utils import daal_check_version, get_dtype +from onedal.common._backend import bind_default_backend -from ..common._base import BaseEstimator as onedal_BaseEstimator from ..datatypes import _convert_to_supported, from_table, to_table from ..utils import _check_array if daal_check_version((2023, "P", 200)): - class KMeansInit(onedal_BaseEstimator): + class KMeansInit: """ KMeansInit oneDAL implementation. """ @@ -48,6 +48,12 @@ def __init__( else: self.local_trials_count = local_trials_count + @bind_default_backend("kmeans_init") + def _get_policy(self, policy, params, X_table): ... + + @bind_default_backend("kmeans_init.init", lookup_name="compute") + def backend_compute(self, policy, params, X_table): ... + def _get_onedal_params(self, dtype=np.float32): return { "fptype": dtype, @@ -71,31 +77,21 @@ def _get_params_and_input(self, X, policy): params = self._get_onedal_params(dtype) return (params, to_table(X), dtype) - def _compute_raw(self, X_table, module, policy, dtype=np.float32): - params = self._get_onedal_params(dtype) - - result = module.compute(policy, params, X_table) - - return result.centroids - - def _compute(self, X, module, queue): + def compute(self, X, queue=None): policy = self._get_policy(queue, X) # oneDAL KMeans Init for sparse data does not have GPU support if issparse(X): policy = self._get_policy(None, None) _, X_table, dtype = self._get_params_and_input(X, policy) - centroids = self._compute_raw(X_table, module, policy, dtype) + centroids = self.compute_raw(X_table, policy, dtype) return from_table(centroids) def compute_raw(self, X_table, policy, dtype=np.float32): - return self._compute_raw( - X_table, self._get_backend("kmeans_init", "init", None), policy, dtype - ) - - def compute(self, X, queue=None): - return self._compute(X, self._get_backend("kmeans_init", "init", None), queue) + params = self._get_onedal_params(dtype) + result = self.backend_compute(policy, params, X_table) + return result.centroids def kmeans_plusplus( X, diff --git a/onedal/common/_backend.py b/onedal/common/_backend.py new file mode 100644 index 0000000000..70c6042eed --- /dev/null +++ b/onedal/common/_backend.py @@ -0,0 +1,143 @@ +# ============================================================================== +# Copyright 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +import logging +from contextlib import contextmanager +from types import MethodType +from typing import Any, Callable, Literal, Optional + +from onedal import Backend, _default_backend, _spmd_backend +from onedal.common.policy_manager import PolicyManager + +from .backend_manager import BackendManager + +logger = logging.getLogger(__name__) + +default_manager = BackendManager(_default_backend) +spmd_manager = BackendManager(_spmd_backend) + +# define types for backend functions: default, dpc, spmd +BackendType = Literal["host", "dpc", "spmd"] + +logging.basicConfig( + level=logging.DEBUG, + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", +) + + +class BackendFunction: + """Wrapper around backend function to allow setting auxiliary information""" + + def __init__(self, method: Callable[..., Any], backend_type: BackendType, name: str): + self.method = method + self.backend_type = backend_type + self.name = name + + def __call__(self, *args: Any, **kwargs: Any) -> Any: + return self.method(*args, **kwargs) + + def __repr__(self) -> str: + return f"BackendFunction(<{self.backend_type}_backend>.{self.name})" + + +def inject_policy_manager(backend: Backend) -> Callable[..., Any]: + def _get_policy(self, queue: Any, *data: Any) -> Any: + policy_manager = PolicyManager(backend) + return policy_manager.get_policy(queue, *data) + + return _get_policy + + +@contextmanager +def DefaultPolicyOverride(instance: Any): + original_method = getattr(instance, "_get_policy", None) + try: + # Inject the new _get_policy method from _default_backend + new_policy_method = inject_policy_manager(_default_backend) + bound_method = MethodType(new_policy_method, instance) + setattr(instance, "_get_policy", bound_method) + yield + finally: + # Restore the original _get_policy method + if original_method is not None: + setattr(instance, "_get_policy", original_method) + else: + delattr(instance, "_get_policy") + + +def bind_default_backend(module_name: str, lookup_name: Optional[str] = None): + def decorator(method: Callable[..., Any]): + # grab the lookup_name from outer scope + nonlocal lookup_name + + if lookup_name is None: + lookup_name = method.__name__ + + if _default_backend is None: + logger.debug( + f"Default backend unavailable, skipping decoration for '{method.__name__}'" + ) + return method + + if lookup_name == "_get_policy": + return inject_policy_manager(_default_backend) + + backend_method = default_manager.get_backend_component(module_name, lookup_name) + wrapped_method = BackendFunction( + backend_method, + backend_type="dpc" if _default_backend.is_dpc else "host", + name=f"{module_name}.{method.__name__}", + ) + + backend_name = "dpc" if _default_backend.is_dpc else "host" + logger.debug( + f"Assigned method '<{backend_name}_backend>.{module_name}.{lookup_name}' to '{method.__qualname__}'" + ) + + return wrapped_method + + return decorator + + +def bind_spmd_backend(module_name: str, lookup_name: Optional[str] = None): + def decorator(method: Callable[..., Any]): + # grab the lookup_name from outer scope + nonlocal lookup_name + + if lookup_name is None: + lookup_name = method.__name__ + + if _spmd_backend is None: + logger.debug( + f"SPMD backend unavailable, skipping decoration for '{method.__name__}'" + ) + return method + + if lookup_name == "_get_policy": + return inject_policy_manager(_spmd_backend) + + backend_method = spmd_manager.get_backend_component(module_name, lookup_name) + wrapped_method = BackendFunction( + backend_method, backend_type="spmd", name=f"{module_name}.{method.__name__}" + ) + + logger.debug( + f"Assigned method '.{module_name}.{lookup_name}' to '{method.__qualname__}' " + ) + + return wrapped_method + + return decorator diff --git a/onedal/common/_base.py b/onedal/common/_base.py deleted file mode 100644 index 3129b8d3cb..0000000000 --- a/onedal/common/_base.py +++ /dev/null @@ -1,38 +0,0 @@ -# ============================================================================== -# Copyright 2024 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -from abc import ABC - -from onedal import _backend - -from ._policy import _get_policy - - -def _get_backend(backend, module, submodule=None, method=None, *args, **kwargs): - result = getattr(backend, module) - if submodule: - result = getattr(result, submodule) - if method: - return getattr(result, method)(*args, **kwargs) - return result - - -class BaseEstimator(ABC): - def _get_backend(self, module, submodule=None, method=None, *args, **kwargs): - return _get_backend(_backend, module, submodule, method, *args, **kwargs) - - def _get_policy(self, queue, *data): - return _get_policy(queue, *data) diff --git a/onedal/common/_spmd_policy.py b/onedal/common/_spmd_policy.py index a9f83c8a47..9a9177f587 100644 --- a/onedal/common/_spmd_policy.py +++ b/onedal/common/_spmd_policy.py @@ -14,10 +14,9 @@ # limitations under the License. # ============================================================================== -from onedal import _is_spmd_backend +from onedal import _spmd_backend -if _is_spmd_backend: - from onedal import _spmd_backend +if _spmd_backend is not None: class _SPMDDataParallelInteropPolicy(_spmd_backend.spmd_data_parallel_policy): def __init__(self, queue): diff --git a/onedal/common/backend_manager.py b/onedal/common/backend_manager.py new file mode 100644 index 0000000000..1bb8e5ec9a --- /dev/null +++ b/onedal/common/backend_manager.py @@ -0,0 +1,43 @@ +# ============================================================================== +# Copyright 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + + +class BackendManager: + def __init__(self, backend_module): + self.backend = backend_module + + def get_backend_component(self, module_name: str, component_name: str): + """Get a component of the backend module. + + Args: + module(str): The module to get the component from. + component: The component to get from the module. + + Returns: + The component of the module. + """ + submodules = module_name.split(".") + module = getattr(self.backend, submodules[0]) + for submodule in submodules[1:]: + module = getattr(module, submodule) + + # component can be provided like submodule.method, there can be arbitrary number of submodules + # and methods + result = module + for part in component_name.split("."): + result = getattr(result, part) + + return result diff --git a/onedal/common/hyperparameters.py b/onedal/common/hyperparameters.py index c32440cf62..9471999a57 100644 --- a/onedal/common/hyperparameters.py +++ b/onedal/common/hyperparameters.py @@ -19,7 +19,7 @@ from warnings import warn from daal4py.sklearn._utils import daal_check_version -from onedal import _backend +from onedal import _default_backend as backend if not daal_check_version((2024, "P", 0)): warn("Hyperparameters are supported in oneDAL starting from 2024.0.0 version.") @@ -98,11 +98,11 @@ def get_methods_with_prefix(obj, prefix): ( "linear_regression", "train", - ): _backend.linear_model.regression.train_hyperparameters(), - ("covariance", "compute"): _backend.covariance.compute_hyperparameters(), + ): backend.linear_model.regression.train_hyperparameters(), + ("covariance", "compute"): backend.covariance.compute_hyperparameters(), } if daal_check_version((2024, "P", 300)): - df_infer_hp = _backend.decision_forest.infer_hyperparameters + df_infer_hp = backend.decision_forest.infer_hyperparameters hyperparameters_backend[("decision_forest", "infer")] = df_infer_hp() hyperparameters_map = {} diff --git a/onedal/common/policy_manager.py b/onedal/common/policy_manager.py new file mode 100644 index 0000000000..7f1ed8ab52 --- /dev/null +++ b/onedal/common/policy_manager.py @@ -0,0 +1,70 @@ +# ============================================================================== +# Copyright 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + + +class Policy: + """Encapsulates backend policies for a unified interface with auxiliary information""" + + def __init__(self, policy_module, queue, is_dpc, is_spmd): + self.policy = policy_module(queue) if queue else policy_module() + self.is_dpc = is_dpc + self.is_spmd = is_spmd + if is_dpc: + if queue is None: + raise ValueError("DPC++ policy requires a queue") + self._queue = queue + + def __getattr__(self, name): + return getattr(self.policy, name) + + def __repr__(self) -> str: + return f"Policy({self.policy}, is_dpc={self.is_dpc}, is_spmd={self.is_spmd})" + + +class PolicyManager: + def __init__(self, backend): + self.backend = backend + + @staticmethod + def get_queue(*data): + if not data: + return + if iface := getattr(data[0], "__sycl_usm_array_interface__", None): + queue = iface.get("syclobj") + if not queue: + raise KeyError("No syclobj in provided data") + return queue + + def get_policy(self, provided_queue, *data): + data_queue = PolicyManager.get_queue(*data) + queue = provided_queue if provided_queue is not None else data_queue + + if not self.backend.is_dpc and queue is not None: + raise RuntimeError("Operations using queues require the DPC backend") + + if self.backend.is_spmd and queue is not None: + backend_policy = self.backend.spmd_data_parallel_policy + is_dpc = True + is_spmd = True + elif self.backend.is_dpc and queue is not None: + backend_policy = self.backend.data_parallel_policy + is_dpc = True + is_spmd = False + else: + backend_policy = self.backend.host_policy + is_dpc = False + is_spmd = False + return Policy(backend_policy, queue, is_dpc, is_spmd) diff --git a/onedal/common/tests/test_backend_manager.py b/onedal/common/tests/test_backend_manager.py new file mode 100644 index 0000000000..a03306af0b --- /dev/null +++ b/onedal/common/tests/test_backend_manager.py @@ -0,0 +1,121 @@ +# ============================================================================== +# Copyright 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + + +import pytest + +from onedal.common.backend_manager import BackendManager + + +# Define a simple backend module for testing +class DummyBackend: + class Module: + class Submodule: + def method(self, *args, **kwargs): + return "method_result" + + def __init__(self): + self.submodule_instance = self.Submodule() + + def method(self, *args, **kwargs): + return "method_result" + + def __init__(self): + self.module_instance = self.Module() + + @property + def module(self): + return self.module_instance + + +@pytest.fixture +def backend_manager(): + backend = DummyBackend() + return BackendManager(backend) + + +def test_get_backend_component_with_method(backend_manager): + result = backend_manager.get_backend_component("module", "method") + assert result() == "method_result" + + +def test_get_backend_component_with_submodule_method(backend_manager): + result = backend_manager.get_backend_component("module.submodule_instance", "method") + assert result() == "method_result" + + +def test_get_backend_component_with_invalid_module(backend_manager): + with pytest.raises(AttributeError): + backend_manager.get_backend_component("invalid_module", "method") + + +def test_get_backend_component_with_invalid_submodule(backend_manager): + with pytest.raises(AttributeError): + backend_manager.get_backend_component("module.invalid_submodule", "method") + + +def test_get_backend_component_with_invalid_method(backend_manager): + with pytest.raises(AttributeError): + backend_manager.get_backend_component( + "module", "submodule_instance.invalid_method" + ) + + +def test_get_backend_component_with_multiple_methods(backend_manager): + class ExtendedDummyBackend(DummyBackend): + class Module(DummyBackend.Module): + class Submodule(DummyBackend.Module.Submodule): + def another_method(self, *args, **kwargs): + return "another_method_result" + + def __init__(self): + super().__init__() + self.submodule_instance = self.Submodule() + + def __init__(self): + self.module_instance = self.Module() + + backend_manager.backend = ExtendedDummyBackend() + result = backend_manager.get_backend_component( + "module.submodule_instance", "another_method" + ) + assert result() == "another_method_result" + + +def test_get_backend_component_with_deeply_nested_submodules(backend_manager): + class DeeplyNestedDummyBackend(DummyBackend): + class Module(DummyBackend.Module): + class Submodule(DummyBackend.Module.Submodule): + class DeepSubmodule: + def deep_method(self, *args, **kwargs): + return "deep_method_result" + + def __init__(self): + super().__init__() + self.deep_submodule_instance = self.DeepSubmodule() + + def __init__(self): + super().__init__() + self.submodule_instance = self.Submodule() + + def __init__(self): + self.module_instance = self.Module() + + backend_manager.backend = DeeplyNestedDummyBackend() + result = backend_manager.get_backend_component( + "module.submodule_instance.deep_submodule_instance", "deep_method" + ) + assert result() == "deep_method_result" diff --git a/onedal/common/tests/test_policy.py b/onedal/common/tests/test_policy.py index 36d9865e23..8908c2dfc4 100644 --- a/onedal/common/tests/test_policy.py +++ b/onedal/common/tests/test_policy.py @@ -17,7 +17,8 @@ import numpy as np import pytest -from onedal.common._policy import _get_policy +from onedal import _default_backend, _dpc_backend +from onedal.common.policy_manager import PolicyManager from onedal.tests.utils._device_selection import ( device_type_to_str, get_memory_usm, @@ -26,11 +27,13 @@ ) from onedal.utils._dpep_helpers import dpctl_available +policy_manager = PolicyManager(_dpc_backend or _default_backend) + @pytest.mark.parametrize("queue", get_queues()) def test_queue_passed_directly(queue): device_name = device_type_to_str(queue) - test_queue = _get_policy(queue) + test_queue = policy_manager.get_policy(queue) test_device_name = test_queue.get_device_name() assert test_device_name == device_name @@ -41,7 +44,7 @@ def test_with_numpy_data(queue): y = np.zeros(3) device_name = device_type_to_str(queue) - assert _get_policy(queue, X, y).get_device_name() == device_name + assert policy_manager.get_policy(queue, X, y).get_device_name() == device_name @pytest.mark.skipif(not dpctl_available, reason="depends on dpctl") @@ -58,7 +61,7 @@ def test_with_usm_ndarray_data(queue, memtype): device_name = device_type_to_str(queue) X = usm_ndarray((5, 3), buffer=memtype(5 * 3 * 8, queue=queue)) y = usm_ndarray((3,), buffer=memtype(3 * 8, queue=queue)) - assert _get_policy(None, X, y).get_device_name() == device_name + assert policy_manager.get_policy(None, X, y).get_device_name() == device_name @pytest.mark.skipif( @@ -73,4 +76,4 @@ def test_queue_parameter_with_usm_ndarray(memtype): q2 = SyclQueue("gpu") X = usm_ndarray((5, 3), buffer=memtype(5 * 3 * 8, queue=q1)) - assert _get_policy(q2, X).get_device_name() == device_type_to_str(q2) + assert policy_manager.get_policy(q2, X).get_device_name() == device_type_to_str(q2) diff --git a/onedal/common/tests/test_policy_manager.py b/onedal/common/tests/test_policy_manager.py new file mode 100644 index 0000000000..9693f642dd --- /dev/null +++ b/onedal/common/tests/test_policy_manager.py @@ -0,0 +1,136 @@ +# ============================================================================== +# Copyright 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +from unittest.mock import MagicMock + +import pytest + +from onedal.common.policy_manager import PolicyManager + + +# Define a simple backend module for testing +class DummyBackend: + def __init__(self, is_dpc, is_spmd): + self.is_dpc = is_dpc + self.is_spmd = is_spmd + + def data_parallel_policy(self, queue): + return f"data_parallel_policy({queue})" + + def spmd_data_parallel_policy(self, queue): + return f"spmd_data_parallel_policy({queue})" + + def host_policy(self): + return "host_policy" + + +@pytest.fixture +def backend_dpc(): + return DummyBackend(is_dpc=True, is_spmd=False) + + +@pytest.fixture +def backend_spmd(): + return DummyBackend(is_dpc=True, is_spmd=True) + + +@pytest.fixture +def backend_host(): + return DummyBackend(is_dpc=False, is_spmd=False) + + +@pytest.fixture +def policy_manager_dpc(backend_dpc): + return PolicyManager(backend_dpc) + + +@pytest.fixture +def policy_manager_spmd(backend_spmd): + return PolicyManager(backend_spmd) + + +@pytest.fixture +def policy_manager_host(backend_host): + return PolicyManager(backend_host) + + +def test_get_queue_with_sycl_usm_array_interface(): + data = [MagicMock()] + data[0].__sycl_usm_array_interface__ = {"syclobj": "queue"} + queue = PolicyManager.get_queue(*data) + assert queue == "queue" + + +def test_get_queue_without_sycl_usm_array_interface(): + data = [MagicMock()] + queue = PolicyManager.get_queue(*data) + assert queue is None + + +def test_get_policy_with_provided_queue(policy_manager_dpc): + provided_queue = MagicMock() + policy = policy_manager_dpc.get_policy(provided_queue) + assert policy.policy == "data_parallel_policy({})".format(provided_queue) + assert policy.is_dpc is True + assert policy.is_spmd is False + + +def test_get_policy_with_data_queue(policy_manager_dpc): + data = [MagicMock()] + data[0].__sycl_usm_array_interface__ = {"syclobj": MagicMock()} + policy = policy_manager_dpc.get_policy(None, *data) + assert policy.policy == "data_parallel_policy({})".format( + data[0].__sycl_usm_array_interface__["syclobj"] + ) + assert policy.is_dpc is True + assert policy.is_spmd is False + + +def test_get_policy_with_host_backend_and_queue(policy_manager_host): + provided_queue = MagicMock() + with pytest.raises( + RuntimeError, match="Operations using queues require the DPC backend" + ): + policy_manager_host.get_policy(provided_queue) + + +def test_get_policy_with_host_backend(policy_manager_host): + policy = policy_manager_host.get_policy(None) + assert policy.policy == "host_policy" + assert policy.is_dpc is False + assert policy.is_spmd is False + + +def test_get_policy_with_dpc_backend_no_queue(policy_manager_dpc): + policy = policy_manager_dpc.get_policy(None) + assert policy.policy == "host_policy" + assert policy.is_dpc is False + assert policy.is_spmd is False + + +def test_get_policy_with_spmd_backend_and_queue(policy_manager_spmd): + provided_queue = MagicMock() + policy = policy_manager_spmd.get_policy(provided_queue) + assert policy.policy == "spmd_data_parallel_policy({})".format(provided_queue) + assert policy.is_dpc is True + assert policy.is_spmd is True + + +def test_get_policy_with_spmd_backend_no_queue(policy_manager_spmd): + policy = policy_manager_spmd.get_policy(None) + assert policy.policy == "host_policy" + assert policy.is_dpc is False + assert policy.is_spmd is False diff --git a/onedal/common/tests/test_sycl.py b/onedal/common/tests/test_sycl.py index d154718349..43b1048107 100644 --- a/onedal/common/tests/test_sycl.py +++ b/onedal/common/tests/test_sycl.py @@ -14,16 +14,15 @@ # limitations under the License. # ============================================================================== -import numpy as np import pytest -from onedal import _backend, _is_dpc_backend +from onedal import _default_backend as backend from onedal.tests.utils._device_selection import get_queues from onedal.utils._dpep_helpers import dpctl_available @pytest.mark.skipif( - not _is_dpc_backend or not dpctl_available, reason="requires dpc backend and dpctl" + not backend.is_dpc or not dpctl_available, reason="requires dpc backend and dpctl" ) @pytest.mark.parametrize("device_type", ["cpu", "gpu"]) @pytest.mark.parametrize("device_number", [None, 0, 1, 2, 3]) @@ -32,7 +31,7 @@ def test_sycl_queue_string_creation(device_type, device_number): from dpctl import SyclQueue from dpctl._sycl_queue import SyclQueueCreationError - onedal_SyclQueue = _backend.SyclQueue + onedal_SyclQueue = backend.SyclQueue device = ( ":".join([device_type, str(device_number)]) @@ -63,14 +62,14 @@ def test_sycl_queue_string_creation(device_type, device_number): @pytest.mark.skipif( - not _is_dpc_backend or not dpctl_available, reason="requires dpc backend and dpctl" + not backend.is_dpc or not dpctl_available, reason="requires dpc backend and dpctl" ) @pytest.mark.parametrize("queue", get_queues()) def test_sycl_queue_conversion(queue): if queue is None: pytest.skip("Not a dpctl queue") SyclQueue = queue.__class__ - onedal_SyclQueue = _backend.SyclQueue + onedal_SyclQueue = backend.SyclQueue q = onedal_SyclQueue(queue) @@ -83,7 +82,7 @@ def test_sycl_queue_conversion(queue): @pytest.mark.skipif( - not _is_dpc_backend or not dpctl_available, reason="requires dpc backend and dpctl" + not backend.is_dpc or not dpctl_available, reason="requires dpc backend and dpctl" ) @pytest.mark.parametrize("queue", get_queues()) def test_sycl_device_attributes(queue): @@ -91,7 +90,7 @@ def test_sycl_device_attributes(queue): if queue is None: pytest.skip("Not a dpctl queue") - onedal_SyclQueue = _backend.SyclQueue + onedal_SyclQueue = backend.SyclQueue onedal_queue = onedal_SyclQueue(queue) @@ -107,13 +106,13 @@ def test_sycl_device_attributes(queue): assert onedal_queue.sycl_device.filter_string in queue.sycl_device.filter_string -@pytest.mark.skipif(not _is_dpc_backend, reason="requires dpc backend") +@pytest.mark.skipif(not backend.is_dpc, reason="requires dpc backend") def test_backend_queue(): - q = _backend.SyclQueue("cpu") + q = backend.SyclQueue("cpu") # verify copying via a py capsule object is functional - q2 = _backend.SyclQueue(q._get_capsule()) + q2 = backend.SyclQueue(q._get_capsule()) # verify copying via the _get_capsule attribute - q3 = _backend.SyclQueue(q) + q3 = backend.SyclQueue(q) q_array = [q, q2, q3] diff --git a/onedal/covariance/covariance.py b/onedal/covariance/covariance.py index 4d34e5db2b..5f040a090f 100644 --- a/onedal/covariance/covariance.py +++ b/onedal/covariance/covariance.py @@ -18,19 +18,25 @@ import numpy as np from daal4py.sklearn._utils import daal_check_version, get_dtype +from onedal.common._backend import bind_default_backend from onedal.utils import _check_array -from ..common._base import BaseEstimator from ..common.hyperparameters import get_hyperparameters from ..datatypes import _convert_to_supported, from_table, to_table -class BaseEmpiricalCovariance(BaseEstimator, metaclass=ABCMeta): +class BaseEmpiricalCovariance(metaclass=ABCMeta): def __init__(self, method="dense", bias=False, assume_centered=False): self.method = method self.bias = bias self.assume_centered = assume_centered + @bind_default_backend("covariance") + def _get_policy(self, queue, *data): ... + + @bind_default_backend("covariance") + def compute(self, *args, **kwargs): ... + def _get_onedal_params(self, dtype=np.float32): params = { "fptype": dtype, @@ -100,19 +106,14 @@ def fit(self, X, y=None, queue=None): params = self._get_onedal_params(dtype) hparams = get_hyperparameters("covariance", "compute") if hparams is not None and not hparams.is_default: - result = self._get_backend( - "covariance", - None, - "compute", + result = self.compute( policy, params, hparams.backend, to_table(X), ) else: - result = self._get_backend( - "covariance", None, "compute", policy, params, to_table(X) - ) + result = self.compute(policy, params, to_table(X)) if daal_check_version((2024, "P", 1)) or (not self.bias): self.covariance_ = from_table(result.cov_matrix) else: diff --git a/onedal/covariance/incremental_covariance.py b/onedal/covariance/incremental_covariance.py index baa6d48163..94f42fc8bc 100644 --- a/onedal/covariance/incremental_covariance.py +++ b/onedal/covariance/incremental_covariance.py @@ -13,9 +13,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # =============================================================================== +from abc import abstractmethod + import numpy as np from daal4py.sklearn._utils import daal_check_version, get_dtype +from onedal.common._backend import bind_default_backend from ..datatypes import _convert_to_supported, from_table, to_table from ..utils import _check_array @@ -56,10 +59,17 @@ def __init__(self, method="dense", bias=False, assume_centered=False): super().__init__(method, bias, assume_centered) self._reset() + @bind_default_backend("covariance") + def partial_compute(self, policy, params, partial_result, X_table): ... + + @bind_default_backend("covariance") + def partial_compute_result(self): ... + + @bind_default_backend("covariance") + def finalize_compute(self, policy, params, partial_result): ... + def _reset(self): - self._partial_result = self._get_backend( - "covariance", None, "partial_compute_result" - ) + self._partial_result = self.partial_compute_result() def partial_fit(self, X, y=None, queue=None): """ @@ -96,14 +106,8 @@ def partial_fit(self, X, y=None, queue=None): params = self._get_onedal_params(self._dtype) table_X = to_table(X) - self._partial_result = self._get_backend( - "covariance", - None, - "partial_compute", - policy, - params, - self._partial_result, - table_X, + self._partial_result = self.partial_compute( + policy, params, self._partial_result, table_X ) def finalize_fit(self, queue=None): @@ -127,10 +131,7 @@ def finalize_fit(self, queue=None): else: policy = self._get_policy(self._queue) - result = self._get_backend( - "covariance", - None, - "finalize_compute", + result = self.finalize_compute( policy, params, self._partial_result, diff --git a/onedal/datatypes/_data_conversion.py b/onedal/datatypes/_data_conversion.py index 8f2989eeb0..72744944de 100644 --- a/onedal/datatypes/_data_conversion.py +++ b/onedal/datatypes/_data_conversion.py @@ -18,7 +18,7 @@ import numpy as np -from onedal import _backend, _is_dpc_backend +from onedal import _default_backend as backend def _apply_and_pass(func, *args, **kwargs): @@ -29,7 +29,7 @@ def _apply_and_pass(func, *args, **kwargs): def _convert_one_to_table(arg): # All inputs for table conversion must be array-like or sparse, not scalars - return _backend.to_table(np.atleast_2d(arg) if np.isscalar(arg) else arg) + return backend.to_table(np.atleast_2d(arg) if np.isscalar(arg) else arg) def to_table(*args): @@ -54,7 +54,7 @@ def to_table(*args): return _apply_and_pass(_convert_one_to_table, *args) -if _is_dpc_backend: +if backend.is_dpc: try: # try/catch is used here instead of dpep_helpers because @@ -79,14 +79,12 @@ def _table_to_array(table, xp=None): def _table_to_array(table, xp=None): return xp.asarray(table) - from ..common._policy import _HostInteropPolicy - def _convert_to_supported(policy, *data): def func(x): return x - # CPUs support FP64 by default - if isinstance(policy, _HostInteropPolicy): + if not policy.is_dpc: + # CPUs support FP64 by default return _apply_and_pass(func, *data) # It can be either SPMD or DPCPP policy @@ -123,12 +121,12 @@ def convert_one_from_table(table, sycl_queue=None, sua_iface=None, xp=None): # Host tables first converted into numpy.narrays and then to array from xp # namespace. return xp.asarray( - _backend.from_table(table), usm_type="device", sycl_queue=sycl_queue + backend.from_table(table), usm_type="device", sycl_queue=sycl_queue ) else: return _table_to_array(table, xp=xp) - return _backend.from_table(table) + return backend.from_table(table) else: @@ -145,7 +143,7 @@ def convert_one_from_table(table, sycl_queue=None, sua_iface=None, xp=None): raise RuntimeError( "SYCL usm array conversion from table requires the DPC backend" ) - return _backend.from_table(table) + return backend.from_table(table) def from_table(*args, sycl_queue=None, sua_iface=None, xp=None): diff --git a/onedal/datatypes/tests/test_data.py b/onedal/datatypes/tests/test_data.py index 7ce2171bbe..6784a738ea 100644 --- a/onedal/datatypes/tests/test_data.py +++ b/onedal/datatypes/tests/test_data.py @@ -18,10 +18,13 @@ import pytest from numpy.testing import assert_allclose -from onedal import _backend, _is_dpc_backend +from onedal import _default_backend, _dpc_backend +from onedal.common.policy_manager import PolicyManager from onedal.datatypes import from_table, to_table from onedal.utils._dpep_helpers import dpctl_available +backend = _dpc_backend or _default_backend + if dpctl_available: from onedal.datatypes.tests.common import ( _assert_sua_iface_fields, @@ -52,29 +55,27 @@ ORDER_DICT = {"F": np.asfortranarray, "C": np.ascontiguousarray} -if _is_dpc_backend: +if backend.is_dpc: from daal4py.sklearn._utils import get_dtype - from onedal.cluster.dbscan import BaseDBSCAN - from onedal.common._policy import _get_policy + from onedal.cluster.dbscan import DBSCAN class DummyEstimatorWithTableConversions: def fit(self, X, y=None): sua_iface, xp, _ = _get_sycl_namespace(X) - policy = _get_policy(X.sycl_queue, None) - bs_DBSCAN = BaseDBSCAN() + policy_manager = PolicyManager(_dpc_backend) + policy = policy_manager.get_policy(X.sycl_queue, None) + dbscan = DBSCAN() types = [xp.float32, xp.float64] if get_dtype(X) not in types: X = xp.astype(X, dtype=xp.float64) dtype = get_dtype(X) - params = bs_DBSCAN._get_onedal_params(dtype) + params = dbscan._get_onedal_params(dtype) X_table = to_table(X) # TODO: # check other candidates for the dummy base oneDAL func. # oneDAL backend func is needed to check result table checks. - result = _backend.dbscan.clustering.compute( - policy, params, X_table, to_table(None) - ) + result = dbscan.compute(policy, params, X_table, to_table(None)) result_responses_table = result.responses result_responses_df = from_table( result_responses_table, @@ -230,7 +231,7 @@ def test_conversion_to_table(dtype): reason="dpctl is required for checks.", ) @pytest.mark.skipif( - not _is_dpc_backend, + not backend.is_dpc, reason="__sycl_usm_array_interface__ support requires DPC backend.", ) @pytest.mark.parametrize( @@ -266,7 +267,7 @@ def test_input_sua_iface_zero_copy(dataframe, queue, order, dtype): reason="dpctl is required for checks.", ) @pytest.mark.skipif( - not _is_dpc_backend, + not backend.is_dpc, reason="__sycl_usm_array_interface__ support requires DPC backend.", ) @pytest.mark.parametrize( @@ -323,7 +324,7 @@ def test_table_conversions(dataframe, queue, order, data_shape, dtype): @pytest.mark.skipif( - not _is_dpc_backend, + not backend.is_dpc, reason="__sycl_usm_array_interface__ support requires DPC backend.", ) @pytest.mark.parametrize( @@ -343,7 +344,7 @@ def test_sua_iface_interop_invalid_shape(dataframe, queue, data_shape): @pytest.mark.skipif( - not _is_dpc_backend, + not backend.is_dpc, reason="__sycl_usm_array_interface__ support requires DPC backend.", ) @pytest.mark.parametrize( @@ -375,7 +376,7 @@ def test_sua_iface_interop_unsupported_dtypes(dataframe, queue, dtype): "dataframe,queue", get_dataframes_and_queues("numpy,dpctl,dpnp", "cpu,gpu") ) def test_to_table_non_contiguous_input(dataframe, queue): - if dataframe in "dpnp,dpctl" and not _is_dpc_backend: + if dataframe in "dpnp,dpctl" and not backend.is_dpc: pytest.skip("__sycl_usm_array_interface__ support requires DPC backend.") X, _ = np.mgrid[:10, :10] X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe) @@ -388,7 +389,7 @@ def test_to_table_non_contiguous_input(dataframe, queue): @pytest.mark.skipif( - _is_dpc_backend, + backend.is_dpc, reason="Required check should be done if no DPC backend.", ) @pytest.mark.parametrize( diff --git a/onedal/decomposition/incremental_pca.py b/onedal/decomposition/incremental_pca.py index 7199c1e1c2..48eb0554f7 100644 --- a/onedal/decomposition/incremental_pca.py +++ b/onedal/decomposition/incremental_pca.py @@ -14,9 +14,12 @@ # limitations under the License. # ============================================================================== +from abc import abstractmethod + import numpy as np from daal4py.sklearn._utils import get_dtype +from onedal.common._backend import bind_default_backend from ..datatypes import _convert_to_supported, from_table, to_table from ..utils import _check_array @@ -98,10 +101,17 @@ def __init__( self.whiten = whiten self._reset() + @bind_default_backend("decomposition.dim_reduction") + def finalize_train(self, policy, params, partial_result): ... + + @bind_default_backend("decomposition.dim_reduction") + def partial_train(self, policy, params, partial_result, X_table): ... + + @bind_default_backend("decomposition.dim_reduction") + def partial_train_result(self): ... + def _reset(self): - self._partial_result = self._get_backend( - "decomposition", "dim_reduction", "partial_train_result" - ) + self._partial_result = self.partial_train_result() if hasattr(self, "components_"): del self.components_ @@ -151,14 +161,8 @@ def partial_fit(self, X, queue): self._params = self._get_onedal_params(X) X_table = to_table(X) - self._partial_result = self._get_backend( - "decomposition", - "dim_reduction", - "partial_train", - policy, - self._params, - self._partial_result, - X_table, + self._partial_result = self.partial_train( + policy, self._params, self._partial_result, X_table ) return self @@ -181,10 +185,7 @@ def finalize_fit(self, queue=None): policy = self._get_policy(queue) else: policy = self._get_policy(self._queue) - result = self._get_backend( - "decomposition", - "dim_reduction", - "finalize_train", + result = self.finalize_train( policy, self._params, self._partial_result, diff --git a/onedal/decomposition/pca.py b/onedal/decomposition/pca.py index 9f77871dc6..7d6243953e 100644 --- a/onedal/decomposition/pca.py +++ b/onedal/decomposition/pca.py @@ -15,17 +15,18 @@ # ============================================================================== import numbers -from abc import ABCMeta +from abc import ABCMeta, abstractmethod import numpy as np from sklearn.decomposition._pca import _infer_dimension from sklearn.utils.extmath import stable_cumsum -from ..common._base import BaseEstimator +from onedal.common._backend import bind_default_backend + from ..datatypes import _convert_to_supported, from_table, to_table -class BasePCA(BaseEstimator, metaclass=ABCMeta): +class BasePCA(metaclass=ABCMeta): """ Base class for PCA oneDAL implementation. """ @@ -42,6 +43,19 @@ def __init__( self.is_deterministic = is_deterministic self.whiten = whiten + @bind_default_backend("decomposition.dim_reduction") + def _get_policy(self, queue, *data): ... + + # provides direct access to the backend model constructor + @bind_default_backend("decomposition.dim_reduction") + def model(self): ... + + @bind_default_backend("decomposition.dim_reduction") + def train(self, policy, params, X): ... + + @bind_default_backend("decomposition.dim_reduction") + def infer(self, policy, params, X, model): ... + def _get_onedal_params(self, data, stage=None): if stage is None: n_components = self._resolve_n_components_for_training(data.shape) @@ -119,7 +133,7 @@ def _compute_noise_variance(self, n_components, n_sf_min): return 0.0 def _create_model(self): - m = self._get_backend("decomposition", "dim_reduction", "model") + m = self.model() m.eigenvectors = to_table(self.components_) m.means = to_table(self.mean_) if self.whiten: @@ -133,9 +147,7 @@ def predict(self, X, queue=None): X = _convert_to_supported(policy, X) params = self._get_onedal_params(X, stage="predict") - result = self._get_backend( - "decomposition", "dim_reduction", "infer", policy, params, model, to_table(X) - ) + result = self.infer(policy, params, model, to_table(X)) return from_table(result.transformed_data) @@ -154,9 +166,7 @@ def fit(self, X, y=None, queue=None): X = _convert_to_supported(policy, X) params = self._get_onedal_params(X) - result = self._get_backend( - "decomposition", "dim_reduction", "train", policy, params, to_table(X) - ) + result = self.train(policy, params, to_table(X)) self.mean_ = from_table(result.means).ravel() self.variances_ = from_table(result.variances) diff --git a/onedal/ensemble/forest.py b/onedal/ensemble/forest.py index 7001bf3fbe..19f7525510 100644 --- a/onedal/ensemble/forest.py +++ b/onedal/ensemble/forest.py @@ -24,9 +24,9 @@ from sklearn.utils import check_random_state from daal4py.sklearn._utils import daal_check_version +from onedal.common._backend import bind_default_backend from sklearnex import get_hyperparameters -from ..common._base import BaseEstimator from ..common._estimator_checks import _check_is_fitted from ..common._mixin import ClassifierMixin, RegressorMixin from ..datatypes import _convert_to_supported, from_table, to_table @@ -39,7 +39,7 @@ ) -class BaseForest(BaseEstimator, BaseEnsemble, metaclass=ABCMeta): +class BaseForest(BaseEnsemble, metaclass=ABCMeta): @abstractmethod def __init__( self, @@ -96,6 +96,15 @@ def __init__( self.variable_importance_mode = variable_importance_mode self.algorithm = algorithm + @bind_default_backend("decision_forest") + def _get_policy(self, queue, *data): ... + + @abstractmethod + def train(self, *args, **kwargs): ... + + @abstractmethod + def infer(self, *args, **kwargs): ... + def _to_absolute_max_features(self, n_features): if self.max_features is None: return n_features @@ -288,7 +297,7 @@ def _get_sample_weight(self, sample_weight, X): return sample_weight - def _fit(self, X, y, sample_weight, module, queue): + def _fit(self, X, y, sample_weight, queue): X, y = _check_X_y( X, y, @@ -308,7 +317,7 @@ def _fit(self, X, y, sample_weight, module, queue): policy = self._get_policy(queue, *data) data = _convert_to_supported(policy, *data) params = self._get_onedal_params(data[0]) - train_result = module.train(policy, params, *to_table(*data)) + train_result = self.train(policy, params, *to_table(*data)) self._onedal_model = train_result.model @@ -345,7 +354,7 @@ def _create_model(self, module): # upate error msg. raise NotImplementedError("Creating model is not supported.") - def _predict(self, X, module, queue, hparams=None): + def _predict(self, X, queue, hparams=None): _check_is_fitted(self) X = _check_array( X, dtype=[np.float64, np.float32], force_all_finite=True, accept_sparse=False @@ -357,9 +366,9 @@ def _predict(self, X, module, queue, hparams=None): X = _convert_to_supported(policy, X) params = self._get_onedal_params(X) if hparams is not None and not hparams.is_default: - result = module.infer(policy, params, hparams.backend, model, to_table(X)) + result = self.infer(policy, params, hparams.backend, model, to_table(X)) else: - result = module.infer(policy, params, model, to_table(X)) + result = self.infer(policy, params, model, to_table(X)) y = from_table(result.responses) return y @@ -377,9 +386,9 @@ def _predict_proba(self, X, module, queue, hparams=None): model = self._onedal_model if hparams is not None and not hparams.is_default: - result = module.infer(policy, params, hparams.backend, model, to_table(X)) + result = self.infer(policy, params, hparams.backend, model, to_table(X)) else: - result = module.infer(policy, params, model, to_table(X)) + result = self.infer(policy, params, model, to_table(X)) y = from_table(result.probabilities) return y @@ -443,6 +452,12 @@ def __init__( algorithm=algorithm, ) + @bind_default_backend("decision_forest.classification") + def train(self, *args, **kwargs): ... + + @bind_default_backend("decision_forest.classification") + def infer(self, *args, **kwargs): ... + def _validate_targets(self, y, dtype): y, self.class_weight_, self.classes_ = _validate_targets( y, self.class_weight, dtype @@ -456,34 +471,18 @@ def _validate_targets(self, y, dtype): return y def fit(self, X, y, sample_weight=None, queue=None): - return self._fit( - X, - y, - sample_weight, - self._get_backend("decision_forest", "classification", None), - queue, - ) + return self._fit(X, y, sample_weight, queue) def predict(self, X, queue=None): hparams = get_hyperparameters("decision_forest", "infer") - pred = super()._predict( - X, - self._get_backend("decision_forest", "classification", None), - queue, - hparams, - ) + pred = self._predict(X, queue, hparams) return np.take(self.classes_, pred.ravel().astype(np.int64, casting="unsafe")) def predict_proba(self, X, queue=None): hparams = get_hyperparameters("decision_forest", "infer") - return super()._predict_proba( - X, - self._get_backend("decision_forest", "classification", None), - queue, - hparams, - ) + return self._predict_proba(X, queue, hparams) class RandomForestRegressor(RegressorMixin, BaseForest, metaclass=ABCMeta): @@ -544,25 +543,21 @@ def __init__( algorithm=algorithm, ) + @bind_default_backend("decision_forest.regression") + def train(self, *args, **kwargs): ... + + @bind_default_backend("decision_forest.regression") + def infer(self, *args, **kwargs): ... + def fit(self, X, y, sample_weight=None, queue=None): if sample_weight is not None: if hasattr(sample_weight, "__array__"): sample_weight[sample_weight == 0.0] = 1.0 sample_weight = [sample_weight] - return super()._fit( - X, - y, - sample_weight, - self._get_backend("decision_forest", "regression", None), - queue, - ) + return self._fit(X, y, sample_weight, queue) def predict(self, X, queue=None): - return ( - super() - ._predict(X, self._get_backend("decision_forest", "regression", None), queue) - .ravel() - ) + return self._predict(X, queue).ravel() class ExtraTreesClassifier(ClassifierMixin, BaseForest, metaclass=ABCMeta): @@ -623,6 +618,12 @@ def __init__( algorithm=algorithm, ) + @bind_default_backend("decision_forest.classification") + def train(self, *args, **kwargs): ... + + @bind_default_backend("decision_forest.classification") + def infer(self, *args, **kwargs): ... + def _validate_targets(self, y, dtype): y, self.class_weight_, self.classes_ = _validate_targets( y, self.class_weight, dtype @@ -640,20 +641,21 @@ def fit(self, X, y, sample_weight=None, queue=None): X, y, sample_weight, - self._get_backend("decision_forest", "classification", None), queue, ) def predict(self, X, queue=None): - pred = super()._predict( - X, self._get_backend("decision_forest", "classification", None), queue + pred = self._predict( + X, + queue, ) return np.take(self.classes_, pred.ravel().astype(np.int64, casting="unsafe")) def predict_proba(self, X, queue=None): - return super()._predict_proba( - X, self._get_backend("decision_forest", "classification", None), queue + return self._predict_proba( + X, + queue, ) @@ -715,22 +717,26 @@ def __init__( algorithm=algorithm, ) + @bind_default_backend("decision_forest.regression") + def train(self, *args, **kwargs): ... + + @bind_default_backend("decision_forest.regression") + def infer(self, *args, **kwargs): ... + def fit(self, X, y, sample_weight=None, queue=None): if sample_weight is not None: if hasattr(sample_weight, "__array__"): sample_weight[sample_weight == 0.0] = 1.0 sample_weight = [sample_weight] - return super()._fit( + return self._fit( X, y, sample_weight, - self._get_backend("decision_forest", "regression", None), queue, ) def predict(self, X, queue=None): - return ( - super() - ._predict(X, self._get_backend("decision_forest", "regression", None), queue) - .ravel() - ) + return self._predict( + X, + queue, + ).ravel() diff --git a/onedal/linear_model/incremental_linear_model.py b/onedal/linear_model/incremental_linear_model.py index f0558ad973..9592a848cb 100644 --- a/onedal/linear_model/incremental_linear_model.py +++ b/onedal/linear_model/incremental_linear_model.py @@ -14,9 +14,12 @@ # limitations under the License. # ============================================================================== +from abc import abstractmethod + import numpy as np from daal4py.sklearn._utils import get_dtype +from onedal.common._backend import bind_default_backend from ..common.hyperparameters import get_hyperparameters from ..datatypes import _convert_to_supported, from_table, to_table @@ -46,10 +49,18 @@ def __init__(self, fit_intercept=True, copy_X=False, algorithm="norm_eq"): super().__init__(fit_intercept=fit_intercept, copy_X=copy_X, algorithm=algorithm) self._reset() + @bind_default_backend("linear_model.regression") + def partial_train_result(self): ... + + @bind_default_backend("linear_model.regression") + def partial_train(self, *args, **kwargs): ... + + @bind_default_backend("linear_model.regression") + def finalize_train(self, *args, **kwargs): ... + def _reset(self): - self._partial_result = self._get_backend( - "linear_model", "regression", "partial_train_result" - ) + # Get the pointer to partial_result from backend + self._partial_result = self.partial_train_result() def partial_fit(self, X, y, queue=None): """ @@ -72,8 +83,6 @@ def partial_fit(self, X, y, queue=None): self : object Returns the instance itself. """ - module = self._get_backend("linear_model", "regression") - self._queue = queue policy = self._get_policy(queue, X) @@ -93,7 +102,7 @@ def partial_fit(self, X, y, queue=None): X_table, y_table = to_table(X, y) hparams = get_hyperparameters("linear_regression", "train") if hparams is not None and not hparams.is_default: - self._partial_result = module.partial_train( + self._partial_result = self.partial_train( policy, self._params, hparams.backend, @@ -102,7 +111,7 @@ def partial_fit(self, X, y, queue=None): y_table, ) else: - self._partial_result = module.partial_train( + self._partial_result = self.partial_train( policy, self._params, self._partial_result, X_table, y_table ) @@ -127,14 +136,13 @@ def finalize_fit(self, queue=None): else: policy = self._get_policy(self._queue) - module = self._get_backend("linear_model", "regression") hparams = get_hyperparameters("linear_regression", "train") if hparams is not None and not hparams.is_default: - result = module.finalize_train( + result = self.finalize_train( policy, self._params, hparams.backend, self._partial_result ) else: - result = module.finalize_train(policy, self._params, self._partial_result) + result = self.finalize_train(policy, self._params, self._partial_result) self._onedal_model = result.model @@ -171,15 +179,22 @@ class IncrementalRidge(BaseLinearRegression): """ def __init__(self, alpha=1.0, fit_intercept=True, copy_X=False, algorithm="norm_eq"): - module = self._get_backend("linear_model", "regression") super().__init__( fit_intercept=fit_intercept, alpha=alpha, copy_X=copy_X, algorithm=algorithm ) - self._partial_result = module.partial_train_result() + self._reset() def _reset(self): - module = self._get_backend("linear_model", "regression") - self._partial_result = module.partial_train_result() + self._partial_result = self.partial_train_result() + + @bind_default_backend("linear_model.regression") + def partial_train_result(self): ... + + @bind_default_backend("linear_model.regression") + def partial_train(self, *args, **kwargs): ... + + @bind_default_backend("linear_model.regression") + def finalize_train(self, *args, **kwargs): ... def partial_fit(self, X, y, queue=None): """ @@ -202,8 +217,6 @@ def partial_fit(self, X, y, queue=None): self : object Returns the instance itself. """ - module = self._get_backend("linear_model", "regression") - self._queue = queue policy = self._get_policy(queue, X) @@ -221,9 +234,20 @@ def partial_fit(self, X, y, queue=None): self.n_features_in_ = _num_features(X, fallback_1d=True) X_table, y_table = to_table(X, y) - self._partial_result = module.partial_train( - policy, self._params, self._partial_result, X_table, y_table - ) + hparams = get_hyperparameters("linear_regression", "train") + if hparams is not None and not hparams.is_default: + self._partial_result = self.partial_train( + policy, + self._params, + hparams.backend, + self._partial_result, + X_table, + y_table, + ) + else: + self._partial_result = self.partial_train( + policy, self._params, self._partial_result, X_table, y_table + ) def finalize_fit(self, queue=None): """ @@ -240,12 +264,11 @@ def finalize_fit(self, queue=None): self : object Returns the instance itself. """ - module = self._get_backend("linear_model", "regression") if queue is not None: policy = self._get_policy(queue) else: policy = self._get_policy(self._queue) - result = module.finalize_train(policy, self._params, self._partial_result) + result = self.finalize_train(policy, self._params, self._partial_result) self._onedal_model = result.model diff --git a/onedal/linear_model/linear_model.py b/onedal/linear_model/linear_model.py index e87fabff82..48c5033547 100755 --- a/onedal/linear_model/linear_model.py +++ b/onedal/linear_model/linear_model.py @@ -20,15 +20,15 @@ import numpy as np from daal4py.sklearn._utils import daal_check_version, get_dtype, make2d +from onedal.common._backend import bind_default_backend -from ..common._base import BaseEstimator from ..common._estimator_checks import _check_is_fitted from ..common.hyperparameters import get_hyperparameters from ..datatypes import _convert_to_supported, from_table, to_table from ..utils import _check_array, _check_n_features, _check_X_y, _num_features -class BaseLinearRegression(BaseEstimator, metaclass=ABCMeta): +class BaseLinearRegression(metaclass=ABCMeta): """ Base class for LinearRegression oneDAL implementation. """ @@ -40,6 +40,19 @@ def __init__(self, fit_intercept, copy_X, algorithm, alpha=0.0): self.copy_X = copy_X self.algorithm = algorithm + @bind_default_backend("linear_model") + def _get_policy(self, queue, *data): ... + + @bind_default_backend("linear_model.regression") + def train(self, *args, **kwargs): ... + + @bind_default_backend("linear_model.regression") + def infer(self, policy, params, model, X): ... + + # direct access to the backend model class + @bind_default_backend("linear_model.regression") + def model(self): ... + def _get_onedal_params(self, dtype=np.float32): intercept = "intercept|" if self.fit_intercept else "" params = { @@ -54,8 +67,7 @@ def _get_onedal_params(self, dtype=np.float32): return params def _create_model(self, policy): - module = self._get_backend("linear_model", "regression") - model = module.model() + model = self.model() coefficients = self.coef_ dtype = get_dtype(coefficients) @@ -115,7 +127,6 @@ def predict(self, X, queue=None): C : array, shape (n_samples, n_targets) Returns predicted values. """ - module = self._get_backend("linear_model", "regression") _check_is_fitted(self) @@ -136,7 +147,7 @@ def predict(self, X, queue=None): params = self._get_onedal_params(get_dtype(X)) X_table = to_table(X) - result = module.infer(policy, params, model, X_table) + result = self.infer(policy, params, model, X_table) y = from_table(result.responses) if y.shape[1] == 1 and self.coef_.ndim == 1: @@ -192,7 +203,6 @@ def fit(self, X, y, queue=None): self : object Fitted Estimator. """ - module = self._get_backend("linear_model", "regression") # TODO Fix _check_X_y to make sure this conversion is there if not isinstance(X, np.ndarray): @@ -217,9 +227,9 @@ def fit(self, X, y, queue=None): hparams = get_hyperparameters("linear_regression", "train") if hparams is not None and not hparams.is_default: - result = module.train(policy, params, hparams.backend, X_table, y_table) + result = self.train(policy, params, hparams.backend, X_table, y_table) else: - result = module.train(policy, params, X_table, y_table) + result = self.train(policy, params, X_table, y_table) self._onedal_model = result.model @@ -291,8 +301,6 @@ def fit(self, X, y, queue=None): self : object Fitted Estimator. """ - module = self._get_backend("linear_model", "regression") - X = _check_array( X, dtype=[np.float64, np.float32], @@ -313,7 +321,7 @@ def fit(self, X, y, queue=None): params = self._get_onedal_params(get_dtype(X)) X_table, y_table = to_table(X, y) - result = module.train(policy, params, X_table, y_table) + result = self.train(policy, params, X_table, y_table) self._onedal_model = result.model packed_coefficients = from_table(result.model.packed_coefficients) diff --git a/onedal/linear_model/logistic_regression.py b/onedal/linear_model/logistic_regression.py index 5df38daa48..78010cec8c 100644 --- a/onedal/linear_model/logistic_regression.py +++ b/onedal/linear_model/logistic_regression.py @@ -20,8 +20,8 @@ import numpy as np from daal4py.sklearn._utils import daal_check_version, get_dtype, make2d +from onedal.common._backend import bind_default_backend -from ..common._base import BaseEstimator as onedal_BaseEstimator from ..common._estimator_checks import _check_is_fitted from ..common._mixin import ClassifierMixin from ..datatypes import _convert_to_supported, from_table, to_table @@ -35,7 +35,7 @@ ) -class BaseLogisticRegression(onedal_BaseEstimator, metaclass=ABCMeta): +class BaseLogisticRegression(metaclass=ABCMeta): @abstractmethod def __init__(self, tol, C, fit_intercept, solver, max_iter, algorithm): self.tol = tol @@ -45,6 +45,19 @@ def __init__(self, tol, C, fit_intercept, solver, max_iter, algorithm): self.max_iter = max_iter self.algorithm = algorithm + @bind_default_backend("logistic_regression") + def _get_policy(self, queue, *data): ... + + @abstractmethod + def train(self, policy, params, X, y): ... + + @abstractmethod + def infer(self, policy, params, X): ... + + # direct access to the backend model constructor + @abstractmethod + def model(self): ... + def _get_onedal_params(self, is_csr, dtype=np.float32): intercept = "intercept|" if self.fit_intercept else "" return { @@ -62,7 +75,7 @@ def _get_onedal_params(self, is_csr, dtype=np.float32): ), } - def _fit(self, X, y, module, queue): + def _fit(self, X, y, queue): sparsity_enabled = daal_check_version((2024, "P", 700)) X, y = _check_X_y( X, @@ -87,7 +100,7 @@ def _fit(self, X, y, module, queue): params = self._get_onedal_params(is_csr, get_dtype(X)) X_table, y_table = to_table(X, y) - result = module.train(policy, params, X_table, y_table) + result = self.train(policy, params, X_table, y_table) self._onedal_model = result.model self.n_iter_ = np.array([result.iterations_count]) @@ -101,8 +114,8 @@ def _fit(self, X, y, module, queue): return self - def _create_model(self, module, policy): - m = module.model() + def _create_model(self, policy): + m = self.model() coefficients = self.coef_ dtype = get_dtype(coefficients) @@ -152,7 +165,7 @@ def _create_model(self, module, policy): return m - def _infer(self, X, module, queue): + def _infer(self, X, queue): _check_is_fitted(self) sparsity_enabled = daal_check_version((2024, "P", 700)) @@ -173,30 +186,30 @@ def _infer(self, X, module, queue): if hasattr(self, "_onedal_model"): model = self._onedal_model else: - model = self._create_model(module, policy) + model = self._create_model(policy) X = _convert_to_supported(policy, X) params = self._get_onedal_params(is_csr, get_dtype(X)) X_table = to_table(X) - result = module.infer(policy, params, model, X_table) + result = self.infer(policy, params, model, X_table) return result - def _predict(self, X, module, queue): - result = self._infer(X, module, queue) + def _predict(self, X, queue): + result = self._infer(X, queue) y = from_table(result.responses) y = np.take(self.classes_, y.ravel(), axis=0) return y - def _predict_proba(self, X, module, queue): - result = self._infer(X, module, queue) + def _predict_proba(self, X, queue): + result = self._infer(X, queue) y = from_table(result.probabilities) y = y.reshape(-1, 1) return np.hstack([1 - y, y]) - def _predict_log_proba(self, X, module, queue): - y_proba = self._predict_proba(X, module, queue) + def _predict_log_proba(self, X, queue): + y_proba = self._predict_proba(X, queue) return np.log(y_proba) @@ -225,25 +238,26 @@ def __init__( algorithm=algorithm, ) + @bind_default_backend("logistic_regression.classification") + def train(self, policy, params, X, y): ... + + @bind_default_backend("logistic_regression.classification") + def infer(self, policy, params, X, model): ... + + @bind_default_backend("logistic_regression.classification") + def model(self): ... + def fit(self, X, y, queue=None): - return super()._fit( - X, y, self._get_backend("logistic_regression", "classification", None), queue - ) + return self._fit(X, y, queue) def predict(self, X, queue=None): - y = super()._predict( - X, self._get_backend("logistic_regression", "classification", None), queue - ) + y = self._predict(X, queue) return y def predict_proba(self, X, queue=None): - y = super()._predict_proba( - X, self._get_backend("logistic_regression", "classification", None), queue - ) + y = self._predict_proba(X, queue) return y def predict_log_proba(self, X, queue=None): - y = super()._predict_log_proba( - X, self._get_backend("logistic_regression", "classification", None), queue - ) + y = self._predict_log_proba(X, queue) return y diff --git a/onedal/neighbors/neighbors.py b/onedal/neighbors/neighbors.py index 6f794736b1..d2235b87a5 100755 --- a/onedal/neighbors/neighbors.py +++ b/onedal/neighbors/neighbors.py @@ -14,7 +14,7 @@ # limitations under the License. # ============================================================================== -from abc import ABCMeta +from abc import ABCMeta, abstractmethod from numbers import Integral import numpy as np @@ -27,8 +27,8 @@ kdtree_knn_classification_prediction, kdtree_knn_classification_training, ) +from onedal.common._backend import bind_default_backend -from ..common._base import BaseEstimator from ..common._estimator_checks import _check_is_fitted, _is_classifier, _is_regressor from ..common._mixin import ClassifierMixin, RegressorMixin from ..datatypes import _convert_to_supported, from_table, to_table @@ -42,7 +42,18 @@ ) -class NeighborsCommonBase(BaseEstimator, metaclass=ABCMeta): +class NeighborsCommonBase(metaclass=ABCMeta): + def __init__(self): + self.requires_y = False + self.n_neighbors = None + self.metric = None + self.classes_ = None + self.effective_metric_ = None + self._fit_method = None + self.radius = None + self.effective_metric_params_ = None + self._onedal_model = None + def _parse_auto_method(self, method, n_samples, n_features): result_method = method @@ -60,8 +71,20 @@ def _parse_auto_method(self, method, n_samples, n_features): return result_method + @bind_default_backend("neighbors") + def _get_policy(self, queue, *data): ... + + @abstractmethod + def train(self, *args, **kwargs): ... + + @abstractmethod + def infer(self, *args, **kwargs): ... + + @abstractmethod + def _onedal_fit(self, X, y, queue): ... + def _validate_data( - self, X, y=None, reset=True, validate_separately=False, **check_params + self, X, y=None, reset=True, validate_separately=None, **check_params ): if y is None: if self.requires_y: @@ -188,10 +211,10 @@ def _validate_targets(self, y, dtype): return arr def _validate_n_classes(self): - if len(self.classes_) < 2: + length = 0 if self.classes_ is None else len(self.classes_) + if length < 2: raise ValueError( - "The number of classes has to be greater than one; got %d" - " class" % len(self.classes_) + f"The number of classes has to be greater than one; got {length}" ) def _fit(self, X, y, queue): @@ -206,7 +229,7 @@ def _fit(self, X, y, queue): if y is not None or self.requires_y: shape = getattr(y, "shape", None) - X, y = super()._validate_data( + X, y = self._validate_data( X, y, dtype=[np.float64, np.float32], accept_sparse="csr" ) self._shape = shape if shape is not None else y.shape @@ -233,7 +256,7 @@ def _fit(self, X, y, queue): else: self._y = y else: - X, _ = super()._validate_data(X, dtype=[np.float64, np.float32]) + X, _ = self._validate_data(X, dtype=[np.float64, np.float32]) self.n_samples_fit_ = X.shape[0] self.n_features_in_ = X.shape[1] @@ -248,7 +271,7 @@ def _fit(self, X, y, queue): "enter integer value" % type(self.n_neighbors) ) - self._fit_method = super()._parse_auto_method( + self._fit_method = self._parse_auto_method( self.algorithm, self.n_samples_fit_, self.n_features_in_ ) @@ -316,13 +339,13 @@ def _kneighbors(self, X=None, n_neighbors=None, return_distance=True, queue=None ) chunked_results = None - method = super()._parse_auto_method( + method = self._parse_auto_method( self._fit_method, self.n_samples_fit_, n_features ) - if ( - type(self._onedal_model) is kdtree_knn_classification_model - or type(self._onedal_model) is bf_knn_classification_model + if type(self._onedal_model) in ( + kdtree_knn_classification_model, + bf_knn_classification_model, ): params = super()._get_daal_params(X, n_neighbors=n_neighbors) prediction_results = self._onedal_predict( @@ -408,6 +431,17 @@ def __init__( ) self.weights = weights + # direct access to the backend model constructor + @bind_default_backend("neighbors.classification") + def model(self): ... + + # direct access to the backend model constructor + @bind_default_backend("neighbors.classification") + def train(self, *args, **kwargs): ... + + @bind_default_backend("neighbors.classification") + def infer(self, *args, **kwargs): ... + def _get_daal_params(self, data): params = super()._get_daal_params(data) params["resultsToEvaluate"] = "computeClassLabels" @@ -425,41 +459,29 @@ def _onedal_fit(self, X, y, queue): train_alg = kdtree_knn_classification_training return train_alg(**params).compute(X, y).model - - policy = self._get_policy(queue, X, y) - X, y = _convert_to_supported(policy, X, y) - params = self._get_onedal_params(X, y) - train_alg = self._get_backend( - "neighbors", "classification", "train", policy, params, *to_table(X, y) - ) - - return train_alg.model + else: + policy = self._get_policy(queue, X, y) + X, y = _convert_to_supported(policy, X, y) + params = self._get_onedal_params(X, y) + return self.train(policy, params, *to_table(X, y)).model def _onedal_predict(self, model, X, params, queue): if type(self._onedal_model) is kdtree_knn_classification_model: return kdtree_knn_classification_prediction(**params).compute(X, model) elif type(self._onedal_model) is bf_knn_classification_model: return bf_knn_classification_prediction(**params).compute(X, model) - - policy = self._get_policy(queue, X) - X = _convert_to_supported(policy, X) - if hasattr(self, "_onedal_model"): - model = self._onedal_model else: - model = self._create_model( - self._get_backend("neighbors", "classification", None) - ) - if "responses" not in params["result_option"]: - params["result_option"] += "|responses" - params["fptype"] = X.dtype - result = self._get_backend( - "neighbors", "classification", "infer", policy, params, model, to_table(X) - ) + policy = self._get_policy(queue, X) + X = _convert_to_supported(policy, X) + if "responses" not in params["result_option"]: + params["result_option"] += "|responses" + params["fptype"] = X.dtype + result = self.infer(policy, params, model, to_table(X)) - return result + return result def fit(self, X, y, queue=None): - return super()._fit(X, y, queue=queue) + return self._fit(X, y, queue=queue) def predict(self, X, queue=None): X = _check_array(X, accept_sparse="csr", dtype=[np.float64, np.float32]) @@ -478,7 +500,7 @@ def predict(self, X, queue=None): _check_is_fitted(self) - self._fit_method = super()._parse_auto_method( + self._fit_method = self._parse_auto_method( self.algorithm, n_samples_fit_, n_features ) @@ -537,7 +559,7 @@ def predict_proba(self, X, queue=None): return probabilities def kneighbors(self, X=None, n_neighbors=None, return_distance=True, queue=None): - return super()._kneighbors(X, n_neighbors, return_distance, queue=queue) + return self._kneighbors(X, n_neighbors, return_distance, queue=queue) class KNeighborsRegressor(NeighborsBase, RegressorMixin): @@ -562,9 +584,17 @@ def __init__( ) self.weights = weights - def _get_onedal_params(self, X, y=None): - params = super()._get_onedal_params(X, y) - return params + @bind_default_backend("neighbors.search", lookup_name="train") + def train_search(self, *args, **kwargs): ... + + @bind_default_backend("neighbors.search", lookup_name="infer") + def infer_search(self, *args, **kwargs): ... + + @bind_default_backend("neighbors.regression") + def train(self, *args, **kwargs): ... + + @bind_default_backend("neighbors.regression") + def infer(self, *args, **kwargs): ... def _get_daal_params(self, data): params = super()._get_daal_params(data) @@ -578,7 +608,6 @@ def _onedal_fit(self, X, y, queue): params = self._get_daal_params(X) if self._fit_method == "brute": train_alg = bf_knn_classification_training - else: train_alg = kdtree_knn_classification_training @@ -587,14 +616,15 @@ def _onedal_fit(self, X, y, queue): policy = self._get_policy(queue, X, y) X, y = _convert_to_supported(policy, X, y) params = self._get_onedal_params(X, y) - train_alg_regr = self._get_backend("neighbors", "regression", None) - train_alg_srch = self._get_backend("neighbors", "search", None) if gpu_device: - return train_alg_regr.train(policy, params, *to_table(X, y)).model - return train_alg_srch.train(policy, params, to_table(X)).model + return self.train(policy, params, *to_table(X, y)).model + else: + return self.train_search(policy, params, to_table(X)).model def _onedal_predict(self, model, X, params, queue): + assert self._onedal_model is not None, "Model is not trained" + if type(model) is kdtree_knn_classification_model: return kdtree_knn_classification_prediction(**params).compute(X, model) elif type(model) is bf_knn_classification_model: @@ -603,28 +633,22 @@ def _onedal_predict(self, model, X, params, queue): gpu_device = queue is not None and queue.sycl_device.is_gpu policy = self._get_policy(queue, X) X = _convert_to_supported(policy, X) - backend = ( - self._get_backend("neighbors", "regression", None) - if gpu_device - else self._get_backend("neighbors", "search", None) - ) - if hasattr(self, "_onedal_model"): - model = self._onedal_model - else: - model = self._create_model(backend) if "responses" not in params["result_option"] and gpu_device: params["result_option"] += "|responses" params["fptype"] = X.dtype result = backend.infer(policy, params, model, to_table(X)) - return result + if gpu_device: + return self.infer(policy, params, self._onedal_model, to_table(X)) + else: + return self.infer_search(policy, params, self._onedal_model, to_table(X)) def fit(self, X, y, queue=None): - return super()._fit(X, y, queue=queue) + return self._fit(X, y, queue=queue) def kneighbors(self, X=None, n_neighbors=None, return_distance=True, queue=None): - return super()._kneighbors(X, n_neighbors, return_distance, queue=queue) + return self._kneighbors(X, n_neighbors, return_distance, queue=queue) def _predict_gpu(self, X, queue=None): X = _check_array(X, accept_sparse="csr", dtype=[np.float64, np.float32]) @@ -643,7 +667,7 @@ def _predict_gpu(self, X, queue=None): _check_is_fitted(self) - self._fit_method = super()._parse_auto_method( + self._fit_method = self._parse_auto_method( self.algorithm, n_samples_fit_, n_features ) @@ -711,6 +735,12 @@ def __init__( ) self.weights = weights + @bind_default_backend("neighbors.search") + def train(self, *args, **kwargs): ... + + @bind_default_backend("neighbors.search") + def infer(self, *args, **kwargs): ... + def _get_daal_params(self, data): params = super()._get_daal_params(data) params["resultsToCompute"] = "computeIndicesOfNeighbors|computeDistances" @@ -731,14 +761,11 @@ def _onedal_fit(self, X, y, queue): return train_alg(**params).compute(X, y).model - policy = self._get_policy(queue, X, y) - X, y = _convert_to_supported(policy, X, y) - params = self._get_onedal_params(X, y) - train_alg = self._get_backend( - "neighbors", "search", "train", policy, params, to_table(X) - ) - - return train_alg.model + else: + policy = self._get_policy(queue, X, y) + X, y = _convert_to_supported(policy, X, y) + params = self._get_onedal_params(X, y) + return self.train(policy, params, to_table(X)).model def _onedal_predict(self, model, X, params, queue): if type(self._onedal_model) is kdtree_knn_classification_model: @@ -748,10 +775,6 @@ def _onedal_predict(self, model, X, params, queue): policy = self._get_policy(queue, X) X = _convert_to_supported(policy, X) - if hasattr(self, "_onedal_model"): - model = self._onedal_model - else: - model = self._create_model(self._get_backend("neighbors", "search", None)) params["fptype"] = X.dtype result = self._get_backend( @@ -761,7 +784,7 @@ def _onedal_predict(self, model, X, params, queue): return result def fit(self, X, y, queue=None): - return super()._fit(X, y, queue=queue) + return self._fit(X, y, queue=queue) def kneighbors(self, X=None, n_neighbors=None, return_distance=True, queue=None): - return super()._kneighbors(X, n_neighbors, return_distance, queue=queue) + return self._kneighbors(X, n_neighbors, return_distance, queue=queue) diff --git a/onedal/primitives/get_tree.py b/onedal/primitives/get_tree.py index 688d5d2983..4bd3b984df 100644 --- a/onedal/primitives/get_tree.py +++ b/onedal/primitives/get_tree.py @@ -14,12 +14,14 @@ # limitations under the License. # ============================================================================== -from onedal import _backend +from onedal import _default_backend, _dpc_backend + +backend = _dpc_backend or _default_backend def get_tree_state_cls(model, iTree, n_classes): - return _backend.get_tree.classification.get_tree_state(model, iTree, n_classes) + return backend.get_tree.classification.get_tree_state(model, iTree, n_classes) def get_tree_state_reg(model, iTree, n_classes): - return _backend.get_tree.regression.get_tree_state(model, iTree, n_classes) + return backend.get_tree.regression.get_tree_state(model, iTree, n_classes) diff --git a/onedal/primitives/kernel_functions.py b/onedal/primitives/kernel_functions.py index 083d91960f..ecdb48bebf 100644 --- a/onedal/primitives/kernel_functions.py +++ b/onedal/primitives/kernel_functions.py @@ -16,12 +16,15 @@ import numpy as np -from onedal import _backend +from onedal import _default_backend, _dpc_backend -from ..common._policy import _get_policy +from ..common.policy_manager import PolicyManager from ..datatypes import _convert_to_supported, from_table, to_table from ..utils import _check_array +backend = _dpc_backend or _default_backend +policy_manager = PolicyManager(backend) + def _check_inputs(X, Y): def check_input(data): @@ -33,7 +36,7 @@ def check_input(data): def _compute_kernel(params, submodule, X, Y, queue): - policy = _get_policy(queue, X, Y) + policy = policy_manager.get_policy(queue, X, Y) X, Y = _convert_to_supported(policy, X, Y) params["fptype"] = X.dtype X, Y = to_table(X, Y) @@ -61,7 +64,7 @@ def linear_kernel(X, Y=None, scale=1.0, shift=0.0, queue=None): X, Y = _check_inputs(X, Y) return _compute_kernel( {"method": "dense", "scale": scale, "shift": shift}, - _backend.linear_kernel, + backend.linear_kernel, X, Y, queue, @@ -92,7 +95,7 @@ def rbf_kernel(X, Y=None, gamma=None, queue=None): sigma = np.sqrt(0.5 / gamma) return _compute_kernel( - {"method": "dense", "sigma": sigma}, _backend.rbf_kernel, X, Y, queue + {"method": "dense", "sigma": sigma}, backend.rbf_kernel, X, Y, queue ) @@ -118,7 +121,7 @@ def poly_kernel(X, Y=None, gamma=1.0, coef0=0.0, degree=3, queue=None): X, Y = _check_inputs(X, Y) return _compute_kernel( {"method": "dense", "scale": gamma, "shift": coef0, "degree": degree}, - _backend.polynomial_kernel, + backend.polynomial_kernel, X, Y, queue, @@ -146,7 +149,7 @@ def sigmoid_kernel(X, Y=None, gamma=1.0, coef0=0.0, queue=None): X, Y = _check_inputs(X, Y) return _compute_kernel( {"method": "dense", "scale": gamma, "shift": coef0}, - _backend.sigmoid_kernel, + backend.sigmoid_kernel, X, Y, queue, diff --git a/onedal/spmd/__init__.py b/onedal/spmd/__init__.py index 2c60cc2353..cbe5944f67 100644 --- a/onedal/spmd/__init__.py +++ b/onedal/spmd/__init__.py @@ -14,6 +14,16 @@ # limitations under the License. # ============================================================================== +from . import ( + basic_statistics, + cluster, + covariance, + decomposition, + ensemble, + linear_model, + neighbors, +) + __all__ = [ "basic_statistics", "cluster", diff --git a/onedal/spmd/_base.py b/onedal/spmd/_base.py deleted file mode 100644 index 52307ddb34..0000000000 --- a/onedal/spmd/_base.py +++ /dev/null @@ -1,30 +0,0 @@ -# ============================================================================== -# Copyright 2024 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -from abc import ABC - -from onedal import _spmd_backend - -from ..common._base import _get_backend -from ..common._spmd_policy import _get_spmd_policy - - -class BaseEstimatorSPMD(ABC): - def _get_backend(self, module, submodule=None, method=None, *args, **kwargs): - return _get_backend(_spmd_backend, module, submodule, method, *args, **kwargs) - - def _get_policy(self, queue, *data): - return _get_spmd_policy(queue) diff --git a/onedal/spmd/basic_statistics/basic_statistics.py b/onedal/spmd/basic_statistics/basic_statistics.py index 8253aa6628..047c7d9773 100644 --- a/onedal/spmd/basic_statistics/basic_statistics.py +++ b/onedal/spmd/basic_statistics/basic_statistics.py @@ -14,17 +14,18 @@ # limitations under the License. # ============================================================================== -from onedal.basic_statistics import BasicStatistics as BasicStatistics_Batch - from ..._device_offload import support_input_format -from .._base import BaseEstimatorSPMD +from ...basic_statistics import BasicStatistics as BasicStatistics_Batch +from ...common._backend import bind_spmd_backend -class BasicStatistics(BaseEstimatorSPMD, BasicStatistics_Batch): - @support_input_format() - def compute(self, data, weights=None, queue=None): - return super().compute(data, weights=weights, queue=queue) +class BasicStatistics(BasicStatistics_Batch): + @bind_spmd_backend("basic_statistics") + def _get_policy(self, queue, *data): ... + + @bind_spmd_backend("basic_statistics") + def compute(self, data, weights=None, queue=None): ... @support_input_format() def fit(self, data, sample_weight=None, queue=None): - return super().fit(data, sample_weight=sample_weight, queue=queue) + return super().fit(data, sample_weight, queue) diff --git a/onedal/spmd/basic_statistics/incremental_basic_statistics.py b/onedal/spmd/basic_statistics/incremental_basic_statistics.py index a0bd62868a..b46582e645 100644 --- a/onedal/spmd/basic_statistics/incremental_basic_statistics.py +++ b/onedal/spmd/basic_statistics/incremental_basic_statistics.py @@ -19,51 +19,29 @@ from ...basic_statistics import ( IncrementalBasicStatistics as base_IncrementalBasicStatistics, ) +from ...common._backend import bind_default_backend, bind_spmd_backend from ...datatypes import _convert_to_supported, to_table -from .._base import BaseEstimatorSPMD -class IncrementalBasicStatistics(BaseEstimatorSPMD, base_IncrementalBasicStatistics): - def _reset(self): - self._partial_result = super(base_IncrementalBasicStatistics, self)._get_backend( - "basic_statistics", None, "partial_compute_result" - ) +class IncrementalBasicStatistics(base_IncrementalBasicStatistics): + @bind_default_backend("basic_statistics", lookup_name="_get_policy") + def _get_default_policy(self, queue, *data): ... - def partial_fit(self, X, weights=None, queue=None): - """ - Computes partial data for basic statistics - from data batch X and saves it to `_partial_result`. + @bind_spmd_backend("basic_statistics", lookup_name="_get_policy") + def _get_spmd_policy(self, queue, *data): ... - Parameters - ---------- - X : array-like of shape (n_samples, n_features) - Training data batch, where `n_samples` is the number of samples - in the batch, and `n_features` is the number of features. + @bind_spmd_backend("basic_statistics") + def compute(self, *args, **kwargs): ... - queue : dpctl.SyclQueue - If not None, use this queue for computations. + @bind_spmd_backend("basic_statistics") + def finalize_compute(self, *args, **kwargs): ... - Returns - ------- - self : object - Returns the instance itself. - """ - self._queue = queue - policy = super(base_IncrementalBasicStatistics, self)._get_policy(queue, X) - X, weights = _convert_to_supported(policy, X, weights) + def partial_fit(self, *args, **kwargs): + # base class partial_fit is using `compute()`, which requires host or parallel policy, but not SPMD + self._get_policy = self._get_default_policy + return super().partial_fit(*args, **kwargs) - if not hasattr(self, "_onedal_params"): - dtype = get_dtype(X) - self._onedal_params = self._get_onedal_params(False, dtype=dtype) - - X_table, weights_table = to_table(X, weights) - self._partial_result = super(base_IncrementalBasicStatistics, self)._get_backend( - "basic_statistics", - None, - "partial_compute", - policy, - self._onedal_params, - self._partial_result, - X_table, - weights_table, - ) + def finalize_fit(self, *args, **kwargs): + # base class finalize_fit is using `finalize_compute()`, which requires SPMD policy + self._get_policy = self._get_spmd_policy + return super().finalize_fit(*args, **kwargs) diff --git a/onedal/spmd/cluster/__init__.py b/onedal/spmd/cluster/__init__.py index bb7a0b3a06..94b5385367 100644 --- a/onedal/spmd/cluster/__init__.py +++ b/onedal/spmd/cluster/__init__.py @@ -18,11 +18,9 @@ from .dbscan import DBSCAN +__all__ = ["DBSCAN"] + if daal_check_version((2023, "P", 200)): from .kmeans import KMeans - __all__ = ["DBSCAN", "KMeans"] -else: - __all__ = [ - "DBSCAN", - ] + __all__ += ["KMeans"] diff --git a/onedal/spmd/cluster/dbscan.py b/onedal/spmd/cluster/dbscan.py index 1460ed6533..a9fe89d7c8 100644 --- a/onedal/spmd/cluster/dbscan.py +++ b/onedal/spmd/cluster/dbscan.py @@ -14,10 +14,13 @@ # limitations under the License. # ============================================================================== -from onedal.cluster import DBSCAN as DBSCAN_Batch +from ...cluster import DBSCAN as DBSCAN_Batch +from ...common._backend import bind_spmd_backend -from .._base import BaseEstimatorSPMD +class DBSCAN(DBSCAN_Batch): + @bind_spmd_backend("dbscan") + def _get_policy(self, queue, *data): ... -class DBSCAN(BaseEstimatorSPMD, DBSCAN_Batch): - pass + @bind_spmd_backend("dbscan.clustering") + def compute(self, policy, params, data_table, weights_table): ... diff --git a/onedal/spmd/cluster/kmeans.py b/onedal/spmd/cluster/kmeans.py index 3f552a353b..45c2404dd8 100644 --- a/onedal/spmd/cluster/kmeans.py +++ b/onedal/spmd/cluster/kmeans.py @@ -14,32 +14,44 @@ # limitations under the License. # ============================================================================== -from onedal.cluster import KMeans as KMeans_Batch -from onedal.cluster import KMeansInit as KMeansInit_Batch -from onedal.spmd.basic_statistics import BasicStatistics - from ..._device_offload import support_input_format -from .._base import BaseEstimatorSPMD +from ...cluster import KMeans as KMeans_Batch +from ...cluster import KMeansInit as KMeansInit_Batch +from ...common._backend import bind_default_backend, bind_spmd_backend +from ...spmd.basic_statistics import BasicStatistics -class KMeansInit(BaseEstimatorSPMD, KMeansInit_Batch): +class KMeansInit(KMeansInit_Batch): """ KMeansInit oneDAL implementation for SPMD iface. """ - pass + @bind_spmd_backend("kmeans_init") + def _get_policy(self, queue, *data): ... + + @bind_spmd_backend("kmeans_init.init", lookup_name="compute") + def backend_compute(self, policy, params, data): ... -class KMeans(BaseEstimatorSPMD, KMeans_Batch): +class KMeans(KMeans_Batch): def _get_basic_statistics_backend(self, result_options): return BasicStatistics(result_options) def _get_kmeans_init(self, cluster_count, seed, algorithm): return KMeansInit(cluster_count=cluster_count, seed=seed, algorithm=algorithm) + @bind_spmd_backend("kmeans") + def _get_policy(self, queue, X): ... + + @bind_spmd_backend("kmeans.clustering") + def train(self, policy, params, X_table, centroids_table): ... + + @bind_spmd_backend("kmeans.clustering") + def infer(self, policy, params, model, centroids_table): ... + @support_input_format() def fit(self, X, y=None, queue=None): - return super().fit(X, queue=queue) + return super().fit(X, y, queue=queue) @support_input_format() def predict(self, X, queue=None): @@ -48,9 +60,3 @@ def predict(self, X, queue=None): @support_input_format() def fit_predict(self, X, y=None, queue=None): return super().fit_predict(X, queue=queue) - - def transform(self, X): - return super().transform(X) - - def fit_transform(self, X, queue=None): - return super().fit_transform(X, queue=queue) diff --git a/onedal/spmd/covariance/covariance.py b/onedal/spmd/covariance/covariance.py index fe746b0993..6808073aba 100644 --- a/onedal/spmd/covariance/covariance.py +++ b/onedal/spmd/covariance/covariance.py @@ -14,13 +14,21 @@ # limitations under the License. # ============================================================================== -from onedal.covariance import EmpiricalCovariance as EmpiricalCovariance_Batch - from ..._device_offload import support_input_format -from .._base import BaseEstimatorSPMD +from ...common._backend import bind_spmd_backend +from ...covariance import EmpiricalCovariance as EmpiricalCovariance_Batch + + +class EmpiricalCovariance(EmpiricalCovariance_Batch): + @bind_spmd_backend("covariance") + def _get_policy(self, queue, *data): ... + + @bind_spmd_backend("covariance") + def compute(self, *args, **kwargs): ... + @bind_spmd_backend("covariance") + def finalize_compute(self, policy, params, partial_result): ... -class EmpiricalCovariance(BaseEstimatorSPMD, EmpiricalCovariance_Batch): @support_input_format() def fit(self, X, y=None, queue=None): return super().fit(X, queue=queue) diff --git a/onedal/spmd/covariance/incremental_covariance.py b/onedal/spmd/covariance/incremental_covariance.py index e0840c3ac6..dae7b91afe 100644 --- a/onedal/spmd/covariance/incremental_covariance.py +++ b/onedal/spmd/covariance/incremental_covariance.py @@ -14,69 +14,20 @@ # limitations under the License. # ============================================================================== -import numpy as np - -from daal4py.sklearn._utils import get_dtype - +from ...common._backend import DefaultPolicyOverride, bind_spmd_backend from ...covariance import ( IncrementalEmpiricalCovariance as base_IncrementalEmpiricalCovariance, ) -from ...datatypes import _convert_to_supported, to_table -from ...utils import _check_array -from .._base import BaseEstimatorSPMD - - -class IncrementalEmpiricalCovariance( - BaseEstimatorSPMD, base_IncrementalEmpiricalCovariance -): - def _reset(self): - self._partial_result = super( - base_IncrementalEmpiricalCovariance, self - )._get_backend("covariance", None, "partial_compute_result") - - def partial_fit(self, X, y=None, queue=None): - """ - Computes partial data for the covariance matrix - from data batch X and saves it to `_partial_result`. - Parameters - ---------- - X : array-like of shape (n_samples, n_features) - Training data batch, where `n_samples` is the number of samples - in the batch, and `n_features` is the number of features. - y : Ignored - Not used, present for API consistency by convention. +class IncrementalEmpiricalCovariance(base_IncrementalEmpiricalCovariance): + @bind_spmd_backend("covariance") + def _get_policy(self, queue, *data): ... - queue : dpctl.SyclQueue - If not None, use this queue for computations. + @bind_spmd_backend("covariance") + def finalize_compute(self, policy, params, partial_result): ... - Returns - ------- - self : object - Returns the instance itself. - """ - X = _check_array(X, dtype=[np.float64, np.float32], ensure_2d=True) - - self._queue = queue - - policy = super(base_IncrementalEmpiricalCovariance, self)._get_policy(queue, X) - - X = _convert_to_supported(policy, X) - - if not hasattr(self, "_dtype"): - self._dtype = get_dtype(X) - - params = self._get_onedal_params(self._dtype) - table_X = to_table(X) - self._partial_result = super( - base_IncrementalEmpiricalCovariance, self - )._get_backend( - "covariance", - None, - "partial_compute", - policy, - params, - self._partial_result, - table_X, - ) + def partial_fit(self, X, y=None, queue=None): + # partial fit performed by parent backend, therefore default policy required + with DefaultPolicyOverride(self): + return super().partial_fit(X, y, queue) diff --git a/onedal/spmd/decomposition/incremental_pca.py b/onedal/spmd/decomposition/incremental_pca.py index 6f82a1ac37..cdbcdb2235 100644 --- a/onedal/spmd/decomposition/incremental_pca.py +++ b/onedal/spmd/decomposition/incremental_pca.py @@ -14,15 +14,11 @@ # limitations under the License. # ============================================================================== -from daal4py.sklearn._utils import get_dtype - -from ...datatypes import _convert_to_supported, from_table, to_table +from ...common._backend import DefaultPolicyOverride, bind_spmd_backend from ...decomposition import IncrementalPCA as base_IncrementalPCA -from ...utils import _check_array -from .._base import BaseEstimatorSPMD -class IncrementalPCA(BaseEstimatorSPMD, base_IncrementalPCA): +class IncrementalPCA(base_IncrementalPCA): """ Distributed incremental estimator for PCA based on oneDAL implementation. Allows for distributed PCA computation if data is split into batches. @@ -30,88 +26,18 @@ class IncrementalPCA(BaseEstimatorSPMD, base_IncrementalPCA): API is the same as for `onedal.decomposition.IncrementalPCA` """ - def _reset(self): - self._partial_result = super(base_IncrementalPCA, self)._get_backend( - "decomposition", "dim_reduction", "partial_train_result" - ) - if hasattr(self, "components_"): - del self.components_ - - def partial_fit(self, X, y=None, queue=None): - """Incremental fit with X. All of X is processed as a single batch. - - Parameters - ---------- - X : array-like of shape (n_samples, n_features) - Training data, where `n_samples` is the number of samples and - `n_features` is the number of features. - - y : Ignored - Not used, present for API consistency by convention. - - Returns - ------- - self : object - Returns the instance itself. - """ - X = _check_array(X) - n_samples, n_features = X.shape - - first_pass = not hasattr(self, "components_") - if first_pass: - self.components_ = None - self.n_samples_seen_ = n_samples - self.n_features_in_ = n_features - else: - self.n_samples_seen_ += n_samples - - if self.n_components is None: - if self.components_ is None: - self.n_components_ = min(n_samples, n_features) - else: - self.n_components_ = self.components_.shape[0] - else: - self.n_components_ = self.n_components - - self._queue = queue - - policy = super(base_IncrementalPCA, self)._get_policy(queue, X) - X = _convert_to_supported(policy, X) - - if not hasattr(self, "_dtype"): - self._dtype = get_dtype(X) - self._params = self._get_onedal_params(X) - - X_table = to_table(X) - self._partial_result = super(base_IncrementalPCA, self)._get_backend( - "decomposition", - "dim_reduction", - "partial_train", - policy, - self._params, - self._partial_result, - X_table, - ) - return self + @bind_spmd_backend("decomposition") + def _get_policy(self, queue, *data): ... - def _create_model(self): - m = super(base_IncrementalPCA, self)._get_backend( - "decomposition", "dim_reduction", "model" - ) - m.eigenvectors = to_table(self.components_) - m.means = to_table(self.mean_) - if self.whiten: - m.eigenvalues = to_table(self.explained_variance_) - self._onedal_model = m - return m + @bind_spmd_backend("decomposition.dim_reduction") + def finalize_train(self, policy, params, partial_result): ... - def predict(self, X, queue=None): - policy = super(base_IncrementalPCA, self)._get_policy(queue, X) - model = self._create_model() - X = _convert_to_supported(policy, X) - params = self._get_onedal_params(X, stage="predict") + def partial_fit(self, X, queue): + # partial fit performed by parent backend, therefore default policy required + with DefaultPolicyOverride(self): + return super().partial_fit(X, queue) - result = super(base_IncrementalPCA, self)._get_backend( - "decomposition", "dim_reduction", "infer", policy, params, model, to_table(X) - ) - return from_table(result.transformed_data) + def infer(self, policy, params, X, model): + # infer runs in parent backend, therefore default policy required + with DefaultPolicyOverride(self): + return super().infer(policy, params, X, model) diff --git a/onedal/spmd/decomposition/pca.py b/onedal/spmd/decomposition/pca.py index 55f242f782..eadcd1c0be 100644 --- a/onedal/spmd/decomposition/pca.py +++ b/onedal/spmd/decomposition/pca.py @@ -14,13 +14,21 @@ # limitations under the License. # ============================================================================== -from onedal.decomposition.pca import PCA as PCABatch - from ..._device_offload import support_input_format -from .._base import BaseEstimatorSPMD +from ...common._backend import bind_spmd_backend +from ...decomposition.pca import PCA as PCABatch + + +class PCA(PCABatch): + @bind_spmd_backend("decomposition.dim_reduction") + def _get_policy(self, queue, *data): ... + + @bind_spmd_backend("decomposition.dim_reduction") + def train(self, policy, params, X): ... + @bind_spmd_backend("decomposition.dim_reduction") + def finalize_train(self, *args, **kwargs): ... -class PCA(BaseEstimatorSPMD, PCABatch): @support_input_format() def fit(self, X, y=None, queue=None): return super().fit(X, queue=queue) diff --git a/onedal/spmd/ensemble/__init__.py b/onedal/spmd/ensemble/__init__.py index 9068c7b255..caa541f9d5 100644 --- a/onedal/spmd/ensemble/__init__.py +++ b/onedal/spmd/ensemble/__init__.py @@ -14,6 +14,6 @@ # limitations under the License. # ============================================================================== -from .forest import RandomForestClassifier, RandomForestRegressor +from ...ensemble import RandomForestClassifier, RandomForestRegressor __all__ = ["RandomForestClassifier", "RandomForestRegressor"] diff --git a/onedal/spmd/ensemble/forest.py b/onedal/spmd/ensemble/forest.py deleted file mode 100644 index 90a3f924db..0000000000 --- a/onedal/spmd/ensemble/forest.py +++ /dev/null @@ -1,28 +0,0 @@ -# ============================================================================== -# Copyright 2023 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -from onedal.ensemble import RandomForestClassifier as RandomForestClassifier_Batch -from onedal.ensemble import RandomForestRegressor as RandomForestRegressor_Batch - -from .._base import BaseEstimatorSPMD - - -class RandomForestClassifier(BaseEstimatorSPMD, RandomForestClassifier_Batch): - pass - - -class RandomForestRegressor(BaseEstimatorSPMD, RandomForestRegressor_Batch): - pass diff --git a/onedal/spmd/linear_model/incremental_linear_model.py b/onedal/spmd/linear_model/incremental_linear_model.py index a60d311266..6470173a9c 100644 --- a/onedal/spmd/linear_model/incremental_linear_model.py +++ b/onedal/spmd/linear_model/incremental_linear_model.py @@ -14,84 +14,27 @@ # limitations under the License. # ============================================================================== -import numpy as np -from daal4py.sklearn._utils import get_dtype - -from ...common.hyperparameters import get_hyperparameters -from ...datatypes import _convert_to_supported, to_table +from ...common._backend import DefaultPolicyOverride, bind_spmd_backend from ...linear_model import ( IncrementalLinearRegression as base_IncrementalLinearRegression, ) -from ...utils import _check_X_y, _num_features -from .._base import BaseEstimatorSPMD -class IncrementalLinearRegression(BaseEstimatorSPMD, base_IncrementalLinearRegression): +class IncrementalLinearRegression(base_IncrementalLinearRegression): """ Distributed incremental Linear Regression oneDAL implementation. API is the same as for `onedal.linear_model.IncrementalLinearRegression`. """ - def _reset(self): - self._partial_result = super(base_IncrementalLinearRegression, self)._get_backend( - "linear_model", "regression", "partial_train_result" - ) - - def partial_fit(self, X, y, queue=None): - """ - Computes partial data for linear regression - from data batch X and saves it to `_partial_result`. - Parameters - ---------- - X : array-like of shape (n_samples, n_features) - Training data batch, where `n_samples` is the number of samples - in the batch, and `n_features` is the number of features. - - y: array-like of shape (n_samples,) or (n_samples, n_targets) in - case of multiple targets - Responses for training data. - - queue : dpctl.SyclQueue - If not None, use this queue for computations. - Returns - ------- - self : object - Returns the instance itself. - """ - module = super(base_IncrementalLinearRegression, self)._get_backend( - "linear_model", "regression" - ) - - self._queue = queue - policy = super(base_IncrementalLinearRegression, self)._get_policy(queue, X) - - X, y = _convert_to_supported(policy, X, y) - - if not hasattr(self, "_dtype"): - self._dtype = get_dtype(X) - self._params = self._get_onedal_params(self._dtype) - - y = np.asarray(y, dtype=self._dtype) + @bind_spmd_backend("linear_model") + def _get_policy(self): ... - X, y = _check_X_y( - X, y, dtype=[np.float64, np.float32], accept_2d_y=True, force_all_finite=False - ) + @bind_spmd_backend("linear_model.regression") + def finalize_train(self, *args, **kwargs): ... - self.n_features_in_ = _num_features(X, fallback_1d=True) - X_table, y_table = to_table(X, y) - hparams = get_hyperparameters("linear_regression", "train") - if hparams is not None and not hparams.is_default: - self._partial_result = module.partial_train( - policy, - self._params, - hparams.backend, - self._partial_result, - X_table, - y_table, - ) - else: - self._partial_result = module.partial_train( - policy, self._params, self._partial_result, X_table, y_table - ) + def partial_fit(self, X, y, queue): + # partial fit performed by parent backend, therefore default policy required + with DefaultPolicyOverride(self): + return super().partial_fit(X, y, queue) diff --git a/onedal/spmd/linear_model/linear_model.py b/onedal/spmd/linear_model/linear_model.py index 11d9cbe0e8..0317e74071 100644 --- a/onedal/spmd/linear_model/linear_model.py +++ b/onedal/spmd/linear_model/linear_model.py @@ -14,13 +14,24 @@ # limitations under the License. # ============================================================================== -from onedal.linear_model import LinearRegression as LinearRegression_Batch - from ..._device_offload import support_input_format -from .._base import BaseEstimatorSPMD +from ...common._backend import bind_spmd_backend +from ...linear_model import LinearRegression as LinearRegression_Batch + + +class LinearRegression(LinearRegression_Batch): + @bind_spmd_backend("linear_model") + def _get_policy(self, queue, *data): ... + + @bind_spmd_backend("linear_model.regression") + def train(self, *args, **kwargs): ... + + @bind_spmd_backend("linear_model.regression") + def finalize_train(self, *args, **kwargs): ... + @bind_spmd_backend("linear_model.regression") + def infer(self, policy, params, model, X): ... -class LinearRegression(BaseEstimatorSPMD, LinearRegression_Batch): @support_input_format() def fit(self, X, y, queue=None): return super().fit(X, y, queue=queue) diff --git a/onedal/spmd/linear_model/logistic_regression.py b/onedal/spmd/linear_model/logistic_regression.py index 38529eaef7..749374c5bc 100644 --- a/onedal/spmd/linear_model/logistic_regression.py +++ b/onedal/spmd/linear_model/logistic_regression.py @@ -14,13 +14,21 @@ # limitations under the License. # ============================================================================== -from onedal.linear_model import LogisticRegression as LogisticRegression_Batch - from ..._device_offload import support_input_format -from .._base import BaseEstimatorSPMD +from ...common._backend import bind_spmd_backend +from ...linear_model import LogisticRegression as LogisticRegression_Batch + + +class LogisticRegression(LogisticRegression_Batch): + @bind_spmd_backend("logistic_regression") + def _get_policy(self): ... + + @bind_spmd_backend("logistic_regression.classification") + def train(self, policy, params, X, y): ... + @bind_spmd_backend("logistic_regression.classification") + def infer(self, policy, params, X, model): ... -class LogisticRegression(BaseEstimatorSPMD, LogisticRegression_Batch): @support_input_format() def fit(self, X, y, queue=None): return super().fit(X, y, queue=queue) diff --git a/onedal/spmd/neighbors/__init__.py b/onedal/spmd/neighbors/__init__.py index 8036511d9f..1aa6247605 100644 --- a/onedal/spmd/neighbors/__init__.py +++ b/onedal/spmd/neighbors/__init__.py @@ -14,6 +14,6 @@ # limitations under the License. # ============================================================================== -from .neighbors import KNeighborsClassifier, KNeighborsRegressor, NearestNeighbors +from .neighbors import KNeighborsClassifier, KNeighborsRegressor -__all__ = ["KNeighborsClassifier", "KNeighborsRegressor", "NearestNeighbors"] +__all__ = ["KNeighborsClassifier", "KNeighborsRegressor"] diff --git a/onedal/spmd/neighbors/neighbors.py b/onedal/spmd/neighbors/neighbors.py index 87004e1a77..46a030863f 100644 --- a/onedal/spmd/neighbors/neighbors.py +++ b/onedal/spmd/neighbors/neighbors.py @@ -14,14 +14,22 @@ # limitations under the License. # ============================================================================== -from onedal.neighbors import KNeighborsClassifier as KNeighborsClassifier_Batch -from onedal.neighbors import KNeighborsRegressor as KNeighborsRegressor_Batch - from ..._device_offload import support_input_format -from .._base import BaseEstimatorSPMD +from ...common._backend import bind_spmd_backend +from ...neighbors import KNeighborsClassifier as KNeighborsClassifier_Batch +from ...neighbors import KNeighborsRegressor as KNeighborsRegressor_Batch + + +class KNeighborsClassifier(KNeighborsClassifier_Batch): + @bind_spmd_backend("neighbors") + def _get_policy(self): ... + @bind_spmd_backend("neighbors.classification") + def train(self, *args, **kwargs): ... + + @bind_spmd_backend("neighbors.classification") + def infer(self, *args, **kwargs): ... -class KNeighborsClassifier(BaseEstimatorSPMD, KNeighborsClassifier_Batch): @support_input_format() def fit(self, X, y, queue=None): return super().fit(X, y, queue=queue) @@ -39,11 +47,26 @@ def kneighbors(self, X=None, n_neighbors=None, return_distance=True, queue=None) return super().kneighbors(X, n_neighbors, return_distance, queue=queue) -class KNeighborsRegressor(BaseEstimatorSPMD, KNeighborsRegressor_Batch): +class KNeighborsRegressor(KNeighborsRegressor_Batch): + @bind_spmd_backend("neighbors") + def _get_policy(self): ... + + @bind_spmd_backend("neighbors.search", lookup_name="train") + def train_search(self, *args, **kwargs): ... + + @bind_spmd_backend("neighbors.search", lookup_name="infer") + def infer_search(self, *args, **kwargs): ... + + @bind_spmd_backend("neighbors.regression") + def train(self, *args, **kwargs): ... + + @bind_spmd_backend("neighbors.regression") + def infer(self, *args, **kwargs): ... + @support_input_format() def fit(self, X, y, queue=None): if queue is not None and queue.sycl_device.is_gpu: - return super()._fit(X, y, queue=queue) + return self._fit(X, y, queue=queue) else: raise ValueError( "SPMD version of kNN is not implemented for " @@ -63,13 +86,3 @@ def _get_onedal_params(self, X, y=None): if "responses" not in params["result_option"]: params["result_option"] += "|responses" return params - - -class NearestNeighbors(BaseEstimatorSPMD): - @support_input_format() - def fit(self, X, y, queue=None): - return super().fit(X, y, queue=queue) - - @support_input_format() - def kneighbors(self, X=None, n_neighbors=None, return_distance=True, queue=None): - return super().kneighbors(X, n_neighbors, return_distance, queue=queue) diff --git a/onedal/svm/svm.py b/onedal/svm/svm.py index f4184a40ac..3332f33069 100644 --- a/onedal/svm/svm.py +++ b/onedal/svm/svm.py @@ -20,11 +20,10 @@ import numpy as np from scipy import sparse as sp -from onedal import _backend +from onedal.common._backend import bind_default_backend from ..common._estimator_checks import _check_is_fitted from ..common._mixin import ClassifierMixin, RegressorMixin -from ..common._policy import _get_policy from ..datatypes import _convert_to_supported, from_table, to_table from ..utils import ( _check_array, @@ -84,6 +83,19 @@ def __init__( self.algorithm = algorithm self.svm_type = svm_type + @bind_default_backend("svm") + def _get_policy(self, queue, *data): ... + + @abstractmethod + def train(self, *args, **kwargs): ... + + @abstractmethod + def infer(self, *args, **kwargs): ... + + def _is_classification(self): + """helper function to determine if infer method was loaded from a classification module""" + return hasattr(self.infer, "name") and "classification" in self.infer.name + def _validate_targets(self, y, dtype): self.class_weight_ = None self.classes_ = None @@ -114,7 +126,7 @@ def _get_onedal_params(self, data): "cache_size": self.cache_size, } - def _fit(self, X, y, sample_weight, module, queue): + def _fit(self, X, y, sample_weight, queue): if hasattr(self, "decision_function_shape"): if self.decision_function_shape not in ("ovr", "ovo", None): raise ValueError( @@ -173,10 +185,10 @@ def _fit(self, X, y, sample_weight, module, queue): _gamma = self.gamma self._scale_, self._sigma_ = _gamma, np.sqrt(0.5 / _gamma) - policy = _get_policy(queue, *data) + policy = self._get_policy(queue, *data) X = _convert_to_supported(policy, X) params = self._get_onedal_params(X) - result = module.train(policy, params, *to_table(*data)) + result = self.train(policy, params, *to_table(*data)) if self._sparse: self.dual_coef_ = sp.csr_matrix(from_table(result.coeffs).T) @@ -200,8 +212,8 @@ def _fit(self, X, y, sample_weight, module, queue): self._onedal_model = result.model return self - def _create_model(self, module): - m = module.model() + def _create_model(self): + m = self.model() m.support_vectors = to_table(self.support_vectors_) m.coeffs = to_table(self.dual_coef_.T) @@ -211,14 +223,14 @@ def _create_model(self, module): m.first_class_response, m.second_class_response = 0, 1 return m - def _predict(self, X, module, queue): + def _predict(self, X, queue): _check_is_fitted(self) if self.break_ties and self.decision_function_shape == "ovo": raise ValueError( "break_ties must be False when " "decision_function_shape is 'ovo'" ) - if module in [_backend.svm.classification, _backend.svm.nu_classification]: + if self._is_classification(): sv = self.support_vectors_ if not self._sparse and sv.size > 0 and self._n_support.sum() != sv.shape[0]: raise ValueError( @@ -252,15 +264,15 @@ def _predict(self, X, module, queue): % type(self).__name__ ) - policy = _get_policy(queue, X) + policy = self._get_policy(queue, X) X = _convert_to_supported(policy, X) params = self._get_onedal_params(X) if hasattr(self, "_onedal_model"): model = self._onedal_model else: - model = self._create_model(module) - result = module.infer(policy, params, model, to_table(X)) + model = self._create_model() + result = self.infer(policy, params, model, to_table(X)) y = from_table(result.responses) return y @@ -283,7 +295,7 @@ def _ovr_decision_function(self, predictions, confidences, n_classes): ) return votes + transformed_confidences - def _decision_function(self, X, module, queue): + def _decision_function(self, X, queue): _check_is_fitted(self) X = _check_array( X, dtype=[np.float64, np.float32], force_all_finite=True, accept_sparse="csr" @@ -301,7 +313,7 @@ def _decision_function(self, X, module, queue): % type(self).__name__ ) - if module in [_backend.svm.classification, _backend.svm.nu_classification]: + if self._is_classification(): sv = self.support_vectors_ if not self._sparse and sv.size > 0 and self._n_support.sum() != sv.shape[0]: raise ValueError( @@ -309,15 +321,15 @@ def _decision_function(self, X, module, queue): f"of {self.__class__.__name__} was altered" ) - policy = _get_policy(queue, X) + policy = self._get_policy(queue, X) X = _convert_to_supported(policy, X) params = self._get_onedal_params(X) if hasattr(self, "_onedal_model"): model = self._onedal_model else: - model = self._create_model(module) - result = module.infer(policy, params, model, to_table(X)) + model = self._create_model() + result = self.infer(policy, params, model, to_table(X)) decision_function = from_table(result.decision_function) if len(self.classes_) == 2: @@ -372,11 +384,20 @@ def __init__( ) self.svm_type = SVMtype.epsilon_svr + @bind_default_backend("svm.regression") + def train(self, *args, **kwargs): ... + + @bind_default_backend("svm.regression") + def infer(self, *args, **kwargs): ... + + @bind_default_backend("svm.regression") + def model(self): ... + def fit(self, X, y, sample_weight=None, queue=None): - return super()._fit(X, y, sample_weight, _backend.svm.regression, queue) + return self._fit(X, y, sample_weight, queue) def predict(self, X, queue=None): - y = super()._predict(X, _backend.svm.regression, queue) + y = self._predict(X, queue) return y.ravel() @@ -424,6 +445,15 @@ def __init__( ) self.svm_type = SVMtype.c_svc + @bind_default_backend("svm.classification") + def train(self, *args, **kwargs): ... + + @bind_default_backend("svm.classification") + def infer(self, *args, **kwargs): ... + + @bind_default_backend("svm.classification") + def model(self): ... + def _validate_targets(self, y, dtype): y, self.class_weight_, self.classes_ = _validate_targets( y, self.class_weight, dtype @@ -431,16 +461,16 @@ def _validate_targets(self, y, dtype): return y def fit(self, X, y, sample_weight=None, queue=None): - return super()._fit(X, y, sample_weight, _backend.svm.classification, queue) + return self._fit(X, y, sample_weight, queue) def predict(self, X, queue=None): - y = super()._predict(X, _backend.svm.classification, queue) + y = self._predict(X, queue) if len(self.classes_) == 2: y = y.ravel() return self.classes_.take(np.asarray(y, dtype=np.intp)).ravel() def decision_function(self, X, queue=None): - return super()._decision_function(X, _backend.svm.classification, queue) + return self._decision_function(X, queue) class NuSVR(RegressorMixin, BaseSVM): @@ -485,12 +515,20 @@ def __init__( ) self.svm_type = SVMtype.nu_svr + @bind_default_backend("svm.nu_regression") + def train(self, *args, **kwargs): ... + + @bind_default_backend("svm.nu_regression") + def infer(self, *args, **kwargs): ... + + @bind_default_backend("svm.nu_regression") + def model(self): ... + def fit(self, X, y, sample_weight=None, queue=None): - return super()._fit(X, y, sample_weight, _backend.svm.nu_regression, queue) + return self._fit(X, y, sample_weight, queue) def predict(self, X, queue=None): - y = super()._predict(X, _backend.svm.nu_regression, queue) - return y.ravel() + return self._predict(X, queue).ravel() class NuSVC(ClassifierMixin, BaseSVM): @@ -537,6 +575,15 @@ def __init__( ) self.svm_type = SVMtype.nu_svc + @bind_default_backend("svm.nu_classification") + def train(self, *args, **kwargs): ... + + @bind_default_backend("svm.nu_classification") + def infer(self, *args, **kwargs): ... + + @bind_default_backend("svm.nu_classification") + def model(self): ... + def _validate_targets(self, y, dtype): y, self.class_weight_, self.classes_ = _validate_targets( y, self.class_weight, dtype @@ -544,13 +591,13 @@ def _validate_targets(self, y, dtype): return y def fit(self, X, y, sample_weight=None, queue=None): - return super()._fit(X, y, sample_weight, _backend.svm.nu_classification, queue) + return self._fit(X, y, sample_weight, queue) def predict(self, X, queue=None): - y = super()._predict(X, _backend.svm.nu_classification, queue) + y = self._predict(X, queue) if len(self.classes_) == 2: y = y.ravel() return self.classes_.take(np.asarray(y, dtype=np.intp)).ravel() def decision_function(self, X, queue=None): - return super()._decision_function(X, _backend.svm.nu_classification, queue) + return self._decision_function(X, queue) diff --git a/pyproject.toml b/pyproject.toml index 3255e3fa58..5f3df61b24 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,8 +18,13 @@ [tool.black] line-length = 90 target-version = ['py39', 'py310', 'py311', 'py312'] -extend-ignore = 'E203' [tool.isort] profile = "black" line_length = 90 + +[tool.pytest.ini_options] +log_cli = false +log_cli_level = "INFO" +log_cli_format = "%(asctime)s [%(levelname)8s] %(message)s (%(filename)s:%(lineno)s)" +log_cli_date_format = "%Y-%m-%d %H:%M:%S" diff --git a/sklearnex/__init__.py b/sklearnex/__init__.py index 677e681f8d..50b325ed08 100755 --- a/sklearnex/__init__.py +++ b/sklearnex/__init__.py @@ -54,10 +54,10 @@ ] onedal_iface_flag = os.environ.get("OFF_ONEDAL_IFACE", "0") if onedal_iface_flag == "0": - from onedal import _is_spmd_backend + from onedal import _spmd_backend from onedal.common.hyperparameters import get_hyperparameters - if _is_spmd_backend: + if _spmd_backend is not None: __all__.append("spmd") diff --git a/sklearnex/spmd/neighbors/__init__.py b/sklearnex/spmd/neighbors/__init__.py index 8036511d9f..7b1f9f646c 100644 --- a/sklearnex/spmd/neighbors/__init__.py +++ b/sklearnex/spmd/neighbors/__init__.py @@ -14,6 +14,6 @@ # limitations under the License. # ============================================================================== -from .neighbors import KNeighborsClassifier, KNeighborsRegressor, NearestNeighbors +from onedal.spmd.neighbors import KNeighborsClassifier, KNeighborsRegressor -__all__ = ["KNeighborsClassifier", "KNeighborsRegressor", "NearestNeighbors"] +__all__ = ["KNeighborsClassifier", "KNeighborsRegressor"] diff --git a/sklearnex/spmd/neighbors/neighbors.py b/sklearnex/spmd/neighbors/neighbors.py deleted file mode 100644 index 5b569c8e1f..0000000000 --- a/sklearnex/spmd/neighbors/neighbors.py +++ /dev/null @@ -1,25 +0,0 @@ -# ============================================================================== -# Copyright 2023 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -from onedal.spmd.neighbors import ( - KNeighborsClassifier, - KNeighborsRegressor, - NearestNeighbors, -) - -# TODO: -# Currently it uses `onedal` module interface. -# Add sklearnex dispatching. diff --git a/sklearnex/tests/test_memory_usage.py b/sklearnex/tests/test_memory_usage.py index 6e7fdb72b5..f35287ec7b 100644 --- a/sklearnex/tests/test_memory_usage.py +++ b/sklearnex/tests/test_memory_usage.py @@ -18,7 +18,6 @@ import logging import os import tracemalloc -import types import warnings from inspect import isclass @@ -29,7 +28,7 @@ from sklearn.datasets import make_classification from sklearn.model_selection import KFold -from onedal import _is_dpc_backend +from onedal import _default_backend as backend from onedal.tests.utils._dataframes_support import ( _convert_to_dataframe, get_dataframes_and_queues, @@ -47,9 +46,6 @@ if dpnp_available: import dpnp -if _is_dpc_backend: - from onedal import _backend - CPU_SKIP_LIST = ( "TSNE", # too slow for using in testing on common data size @@ -132,7 +128,7 @@ def gen_functions(functions): ORDER_DICT = {"F": np.asfortranarray, "C": np.ascontiguousarray} -if _is_dpc_backend: +if backend.is_dpc: from sklearn.utils.validation import check_is_fitted @@ -181,8 +177,8 @@ def gen_clsf_data(n_samples, n_features, dtype=None): def get_traced_memory(queue=None): - if _is_dpc_backend and queue and queue.sycl_device.is_gpu: - return _backend.get_used_memory(queue) + if backend.is_dpc and queue and queue.sycl_device.is_gpu: + return backend.get_used_memory(queue) else: return tracemalloc.get_traced_memory()[0] @@ -352,7 +348,7 @@ def test_gpu_memory_leaks(estimator, queue, order, data_shape): @pytest.mark.skipif( - not _is_dpc_backend, + not backend.is_dpc, reason="__sycl_usm_array_interface__ support requires DPC backend.", ) @pytest.mark.parametrize( From ec58015362187803cbf065dfdef1b26841e331df Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Sat, 23 Nov 2024 01:00:32 -0800 Subject: [PATCH 02/41] fixup --- onedal/ensemble/forest.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/onedal/ensemble/forest.py b/onedal/ensemble/forest.py index 19f7525510..1250576fe5 100644 --- a/onedal/ensemble/forest.py +++ b/onedal/ensemble/forest.py @@ -373,7 +373,7 @@ def _predict(self, X, queue, hparams=None): y = from_table(result.responses) return y - def _predict_proba(self, X, module, queue, hparams=None): + def _predict_proba(self, X, queue, hparams=None): _check_is_fitted(self) X = _check_array( X, dtype=[np.float64, np.float32], force_all_finite=True, accept_sparse=False @@ -482,7 +482,7 @@ def predict(self, X, queue=None): def predict_proba(self, X, queue=None): hparams = get_hyperparameters("decision_forest", "infer") - return self._predict_proba(X, queue, hparams) + return super()._predict_proba(X, queue, hparams) class RandomForestRegressor(RegressorMixin, BaseForest, metaclass=ABCMeta): @@ -653,7 +653,7 @@ def predict(self, X, queue=None): return np.take(self.classes_, pred.ravel().astype(np.int64, casting="unsafe")) def predict_proba(self, X, queue=None): - return self._predict_proba( + return super()._predict_proba( X, queue, ) From 1ddcc34afdca9efd8c0bac487343b1b6b2b04f32 Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Mon, 25 Nov 2024 09:23:34 -0800 Subject: [PATCH 03/41] Remove _get_policy() and use queue directly instead --- onedal/__init__.py | 3 + onedal/_device_offload.py | 128 +++++++++++++----- onedal/basic_statistics/basic_statistics.py | 17 +-- .../incremental_basic_statistics.py | 20 +-- onedal/cluster/dbscan.py | 10 +- onedal/cluster/kmeans.py | 86 +++++------- onedal/cluster/kmeans_init.py | 21 +-- onedal/common/_backend.py | 76 +++++------ onedal/covariance/covariance.py | 12 +- onedal/covariance/incremental_covariance.py | 22 +-- onedal/datatypes/_data_conversion.py | 28 ++-- onedal/decomposition/incremental_pca.py | 21 +-- onedal/decomposition/pca.py | 17 +-- onedal/ensemble/forest.py | 42 +++--- .../linear_model/incremental_linear_model.py | 41 ++---- onedal/linear_model/linear_model.py | 33 ++--- onedal/linear_model/logistic_regression.py | 27 ++-- onedal/neighbors/neighbors.py | 57 +++----- onedal/primitives/kernel_functions.py | 2 +- .../spmd/basic_statistics/basic_statistics.py | 3 - .../incremental_basic_statistics.py | 20 +-- onedal/spmd/cluster/dbscan.py | 5 +- onedal/spmd/cluster/kmeans.py | 12 +- onedal/spmd/covariance/covariance.py | 6 +- .../spmd/covariance/incremental_covariance.py | 11 +- onedal/spmd/decomposition/incremental_pca.py | 18 +-- onedal/spmd/decomposition/pca.py | 6 +- .../linear_model/incremental_linear_model.py | 13 +- onedal/spmd/linear_model/linear_model.py | 8 +- .../spmd/linear_model/logistic_regression.py | 6 +- onedal/spmd/neighbors/neighbors.py | 16 +-- onedal/svm/svm.py | 34 ++--- sklearnex/_device_offload.py | 2 +- 33 files changed, 342 insertions(+), 481 deletions(-) diff --git a/onedal/__init__.py b/onedal/__init__.py index 4d2f298c11..083fb5a676 100644 --- a/onedal/__init__.py +++ b/onedal/__init__.py @@ -32,6 +32,9 @@ def __init__(self, backend_module, is_dpc, is_spmd): def __getattr__(self, name): return getattr(self.backend, name) + def __repr__(self) -> str: + return f"Backend({self.backend}, is_dpc={self.is_dpc}, is_spmd={self.is_spmd})" + if "Windows" in platform.system(): import os diff --git a/onedal/_device_offload.py b/onedal/_device_offload.py index 0e67aaac89..b929223db0 100644 --- a/onedal/_device_offload.py +++ b/onedal/_device_offload.py @@ -16,6 +16,7 @@ from collections.abc import Iterable from functools import wraps +from typing import Any, Optional import numpy as np from sklearn import get_config @@ -25,17 +26,89 @@ from .utils._dpep_helpers import dpctl_available, dpnp_available if dpctl_available: - from dpctl import SyclQueue + from dpctl import SyclQueue as SyclQueueImplementation from dpctl.memory import MemoryUSMDevice, as_usm_memory from dpctl.tensor import usm_ndarray else: - import onedal + from onedal import _dpc_backend + + SyclQueueImplementation = getattr(_dpc_backend, "SyclQueue", None) + + +class SyclQueue: + def __init__(self, target=None): + if target and SyclQueueImplementation is not None: + self.implementation = SyclQueueImplementation(target) + self.implementation = object() + + @property + def sycl_device(self): + return getattr(self.implementation, "sycl_device", None) + + +class SyclQueueManager: + """Manage global and data SyclQueues""" + + # single instance of global queue + __global_queue = None + + @staticmethod + def get_global_queue() -> Optional[SyclQueue]: + """Get the global queue. Retrieve it from the config if not set.""" + if SyclQueueManager.__global_queue is not None: + return SyclQueueManager.__global_queue + + target = _get_config()["target_offload"] + + if target == "auto": + # queue will be created from the provided data to each function call + return None + + if isinstance(target, (str, int)): + q = SyclQueue(target) + else: + q = target + + SyclQueueManager.__global_queue = q + return q + + @staticmethod + def update_global_queue(queue): + """Update the global queue.""" + SyclQueueManager.__global_queue = queue + + @staticmethod + def update_global_queue_from_data(*data): + """Extract the queue from the provided data and update the global queue.""" + queue = SyclQueueManager.from_data(*data) + SyclQueueManager.update_global_queue(queue) # redundant, but explicit + + @staticmethod + def from_data(*data) -> Optional[SyclQueue]: + """Extract the queue from provided data. This updates the global queue as well.""" + for item in data: + # iterate through all data objects, extract the queue, and verify that all data objects are on the same device + usm_iface = getattr(item, "__sycl_usm_array_interface__", None) + if usm_iface is None: + # no interface found - try next data object + continue + + # extract the queue, verify it aligns with the global queue + global_queue = SyclQueueManager.get_global_queue() + data_queue = SyclQueue(usm_iface["syclobj"]) + if global_queue is None: + SyclQueueManager.update_global_queue(data_queue) + global_queue = data_queue + + # global queue must coincide with data queue + if global_queue.sycl_device != data_queue.sycl_device: + raise ValueError( + "Data objects are located on different target devices or not on selected device." + ) + + # after we went through the data, global queue is updated and verified + return SyclQueueManager.get_global_queue() - # setting fallback to `object` will make if isinstance call - # in _get_global_queue always true for situations without the - # dpc backend when `device_offload` is used. Instead, it will - # fail at the policy check phase yielding a RuntimeError - SyclQueue = getattr(onedal._dpc_backend, "SyclQueue", object) if dpnp_available: import dpnp @@ -68,7 +141,7 @@ def _copy_to_usm(queue, array): return array -def _transfer_to_host(queue, *data): +def _transfer_to_host(*data): has_usm_data, has_host_data = False, False host_data = [] @@ -81,13 +154,6 @@ def _transfer_to_host(queue, *data): "dpctl need to be installed to work " "with __sycl_usm_array_interface__" ) - if queue is not None: - if queue.sycl_device != usm_iface["syclobj"].sycl_device: - raise RuntimeError( - "Input data shall be located " "on single target device" - ) - else: - queue = usm_iface["syclobj"] buffer = as_usm_memory(item).copy_to_host() order = "C" @@ -116,25 +182,14 @@ def _transfer_to_host(queue, *data): raise RuntimeError("Input data shall be located on single target device") host_data.append(item) - return has_usm_data, queue, host_data - - -def _get_global_queue(): - target = _get_config()["target_offload"] - - if target != "auto": - if isinstance(target, SyclQueue): - return target - return SyclQueue(target) - return None + return has_usm_data, host_data def _get_host_inputs(*args, **kwargs): - q = _get_global_queue() - _, q, hostargs = _transfer_to_host(q, *args) - _, q, hostvalues = _transfer_to_host(q, *kwargs.values()) + _, hostargs = _transfer_to_host(*args) + _, hostvalues = _transfer_to_host(*kwargs.values()) hostkwargs = dict(zip(kwargs.keys(), hostvalues)) - return q, hostargs, hostkwargs + return hostargs, hostkwargs def _run_on_device(func, obj=None, *args, **kwargs): @@ -163,21 +218,22 @@ def decorator(func): def wrapper_impl(obj, *args, **kwargs): if len(args) == 0 and len(kwargs) == 0: return _run_on_device(func, obj, *args, **kwargs) + + hostargs, hostkwargs = _get_host_inputs(*args, **kwargs) data = (*args, *kwargs.values()) - data_queue, hostargs, hostkwargs = _get_host_inputs(*args, **kwargs) - if queue_param and not ( - "queue" in hostkwargs and hostkwargs["queue"] is not None - ): + data_queue = SyclQueue.from_data(*data) + if queue_param and hostkwargs.get("queue") is None: hostkwargs["queue"] = data_queue result = _run_on_device(func, obj, *hostargs, **hostkwargs) + usm_iface = getattr(data[0], "__sycl_usm_array_interface__", None) if usm_iface is not None: result = _copy_to_usm(data_queue, result) if dpnp_available and isinstance(data[0], dpnp.ndarray): result = _convert_to_dpnp(result) return result - config = get_config() - if not ("transform_output" in config and config["transform_output"]): + + if not get_config().get("transform_output"): input_array_api = getattr(data[0], "__array_namespace__", lambda: None)() if input_array_api: input_array_api_device = data[0].device diff --git a/onedal/basic_statistics/basic_statistics.py b/onedal/basic_statistics/basic_statistics.py index dcacc6d565..7196b46877 100644 --- a/onedal/basic_statistics/basic_statistics.py +++ b/onedal/basic_statistics/basic_statistics.py @@ -31,10 +31,7 @@ def __init__(self, result_options, algorithm): self.algorithm = algorithm @bind_default_backend("basic_statistics") - def _get_policy(self, queue, *data): ... - - @bind_default_backend("basic_statistics") - def compute(self, policy, params, data_table, weights_table): ... + def compute(self, params, data_table, weights_table, queue=None): ... @staticmethod def get_all_result_options(): @@ -77,8 +74,6 @@ def __init__(self, result_options="all", algorithm="by_default"): super().__init__(result_options, algorithm) def fit(self, data, sample_weight=None, queue=None): - policy = self._get_policy(queue, data, sample_weight) - is_csr = _is_csr(data) if data is not None and not is_csr: @@ -86,12 +81,14 @@ def fit(self, data, sample_weight=None, queue=None): if sample_weight is not None: sample_weight = _check_array(sample_weight, ensure_2d=False) - data, sample_weight = _convert_to_supported(policy, data, sample_weight) + data, sample_weight = _convert_to_supported(data, sample_weight) is_single_dim = data.ndim == 1 data_table, weights_table = to_table(data, sample_weight) dtype = data.dtype - raw_result = self._compute_raw(data_table, weights_table, policy, dtype, is_csr) + raw_result = self._compute_raw( + data_table, weights_table, dtype, is_csr, queue=queue + ) for opt, raw_value in raw_result.items(): value = from_table(raw_value).ravel() if is_single_dim: @@ -102,10 +99,10 @@ def fit(self, data, sample_weight=None, queue=None): return self def _compute_raw( - self, data_table, weights_table, policy, dtype=np.float32, is_csr=False + self, data_table, weights_table, dtype=np.float32, is_csr=False, queue=None ): params = self._get_onedal_params(is_csr, dtype) - result = self.compute(policy, params, data_table, weights_table) + result = self.compute(params, data_table, weights_table, queue=queue) options = self._get_result_options(self.options).split("|") return {opt: getattr(result, opt) for opt in options} diff --git a/onedal/basic_statistics/incremental_basic_statistics.py b/onedal/basic_statistics/incremental_basic_statistics.py index 4375d7bbc0..50453a86d6 100644 --- a/onedal/basic_statistics/incremental_basic_statistics.py +++ b/onedal/basic_statistics/incremental_basic_statistics.py @@ -76,10 +76,10 @@ def __init__(self, result_options="all"): def partial_compute_result(self): ... @bind_default_backend("basic_statistics") - def partial_compute(self, *args, **kwargs): ... + def partial_compute(self, *args, queue=None, **kwargs): ... @bind_default_backend("basic_statistics") - def finalize_compute(self, *args, **kwargs): ... + def finalize_compute(self, *args, queue=None, **kwargs): ... def _reset(self): # get the _partial_result pointer from backend @@ -104,9 +104,7 @@ def partial_fit(self, X, weights=None, queue=None): self : object Returns the instance itself. """ - self._queue = queue - policy = self._get_policy(queue, X) - X, weights = _convert_to_supported(policy, X, weights) + X, weights = _convert_to_supported(X, weights) X = _check_array( X, dtype=[np.float64, np.float32], ensure_2d=False, force_all_finite=False @@ -125,7 +123,7 @@ def partial_fit(self, X, weights=None, queue=None): X_table, weights_table = to_table(X, weights) self._partial_result = self.partial_compute( - policy, self._onedal_params, self._partial_result, X_table, weights_table + self._onedal_params, self._partial_result, X_table, weights_table, queue=queue ) def finalize_fit(self, queue=None): @@ -143,13 +141,9 @@ def finalize_fit(self, queue=None): self : object Returns the instance itself. """ - - if queue is not None: - policy = self._get_policy(queue) - else: - policy = self._get_policy(self._queue) - - result = self.finalize_compute(policy, self._onedal_params, self._partial_result) + result = self.finalize_compute( + self._onedal_params, self._partial_result, queue=queue + ) options = self._get_result_options(self.options).split("|") for opt in options: diff --git a/onedal/cluster/dbscan.py b/onedal/cluster/dbscan.py index 7bdc226c47..1ef8539c8c 100644 --- a/onedal/cluster/dbscan.py +++ b/onedal/cluster/dbscan.py @@ -48,11 +48,8 @@ def __init__( self.p = p self.n_jobs = n_jobs - @bind_default_backend("dbscan") - def _get_policy(self, queue, *data): ... - @bind_default_backend("dbscan.clustering") - def compute(self, policy, params, data_table, weights_table): ... + def compute(self, params, data_table, weights_table, queue=None): ... def _get_onedal_params(self, dtype=np.float32): return { @@ -65,7 +62,6 @@ def _get_onedal_params(self, dtype=np.float32): } def fit(self, X, y=None, sample_weight=None, queue=None): - policy = self._get_policy(queue, X) X = _check_array(X, accept_sparse="csr", dtype=[np.float64, np.float32]) sample_weight = make2d(sample_weight) if sample_weight is not None else None X = make2d(X) @@ -73,10 +69,10 @@ def fit(self, X, y=None, sample_weight=None, queue=None): types = [np.float32, np.float64] if get_dtype(X) not in types: X = X.astype(np.float64) - X = _convert_to_supported(policy, X) + X = _convert_to_supported(X) dtype = get_dtype(X) params = self._get_onedal_params(dtype) - result = self.compute(policy, params, to_table(X), to_table(sample_weight)) + result = self.compute(params, to_table(X), to_table(sample_weight), queue=queue) self.labels_ = from_table(result.responses).ravel() if result.core_observation_indices is not None: diff --git a/onedal/cluster/kmeans.py b/onedal/cluster/kmeans.py index 7e6d83b3e8..d8b069ce17 100644 --- a/onedal/cluster/kmeans.py +++ b/onedal/cluster/kmeans.py @@ -63,16 +63,13 @@ def __init__( self.n_local_trials = n_local_trials @bind_default_backend("kmeans_common") - def _get_policy(self, queue, X): ... - - @bind_default_backend("kmeans_common") - def _is_same_clustering(self, labels, best_labels, n_clusters): ... + def _is_same_clustering(self, labels, best_labels, n_clusters, queue=None): ... @bind_default_backend("kmeans.clustering") - def train(self, policy, params, X_table, centroids_table): ... + def train(self, params, X_table, centroids_table, queue=None): ... @bind_default_backend("kmeans.clustering") - def infer(self, policy, params, model, centroids_table): ... + def infer(self, params, model, centroids_table, queue=None): ... def _validate_center_shape(self, X, centers): """Check if centers is compatible with X and n_clusters.""" @@ -94,7 +91,7 @@ def _get_kmeans_init(self, cluster_count, seed, algorithm): def _get_basic_statistics_backend(self, result_options): return BasicStatistics(result_options) - def _tolerance(self, X_table, rtol, is_csr, policy, dtype): + def _tolerance(self, X_table, rtol, is_csr, dtype, queue=None): """Compute absolute tolerance from the relative tolerance""" if rtol == 0.0: return rtol @@ -102,13 +99,13 @@ def _tolerance(self, X_table, rtol, is_csr, policy, dtype): bs = self._get_basic_statistics_backend("variance") - res = bs._compute_raw(X_table, dummy, policy, dtype, is_csr) + res = bs._compute_raw(X_table, dummy, dtype, is_csr, queue=queue) mean_var = from_table(res["variance"]).mean() return mean_var * rtol def _check_params_vs_input( - self, X_table, is_csr, policy, default_n_init=10, dtype=np.float32 + self, X_table, is_csr, default_n_init=10, dtype=np.float32, queue=None ): # n_clusters if X_table.shape[0] < self.n_clusters: @@ -117,7 +114,7 @@ def _check_params_vs_input( ) # tol - self._tol = self._tolerance(X_table, self.tol, is_csr, policy, dtype) + self._tol = self._tolerance(X_table, self.tol, is_csr, dtype, queue=queue) # n-init # TODO(1.4): Remove @@ -173,42 +170,25 @@ def _init_centroids_onedal( X_table, init, random_seed, - policy, is_csr, dtype=np.float32, n_centroids=None, + queue=None, ): n_clusters = self.n_clusters if n_centroids is None else n_centroids - # Use host policy for KMeans init, only for csr data - # as oneDAL KMeansInit for CSR data is not implemented on GPU - if is_csr: - init_policy = self._get_policy(None, None) - logging.getLogger("sklearnex").info("Running Sparse KMeansInit on CPU") - else: - init_policy = policy if isinstance(init, str) and init == "k-means++": - if not is_csr: - alg = self._get_kmeans_init( - cluster_count=n_clusters, - seed=random_seed, - algorithm="plus_plus_dense", - ) - else: - alg = self._get_kmeans_init( - cluster_count=n_clusters, seed=random_seed, algorithm="plus_plus_csr" - ) - centers_table = alg.compute_raw(X_table, init_policy, dtype) + algorithm = "plus_plus_dense" if not is_csr else "plus_plus_csr" + alg = self._get_kmeans_init( + cluster_count=n_clusters, seed=random_seed, algorithm=algorithm + ) + centers_table = alg.compute_raw(X_table, dtype, queue=queue) elif isinstance(init, str) and init == "random": - if not is_csr: - alg = self._get_kmeans_init( - cluster_count=n_clusters, seed=random_seed, algorithm="random_dense" - ) - else: - alg = self._get_kmeans_init( - cluster_count=n_clusters, seed=random_seed, algorithm="random_csr" - ) - centers_table = alg.compute_raw(X_table, init_policy, dtype) + algorithm = "random_dense" if not is_csr else "random_csr" + alg = self._get_kmeans_init( + cluster_count=n_clusters, seed=random_seed, algorithm=algorithm + ) + centers_table = alg.compute_raw(X_table, dtype, queue=queue) elif _is_arraylike_not_scalar(init): if _is_csr(init): # oneDAL KMeans only supports Dense Centroids @@ -217,16 +197,14 @@ def _init_centroids_onedal( centers = np.asarray(init) assert centers.shape[0] == n_clusters assert centers.shape[1] == X_table.column_count - # KMeans is implemented on both CPU and GPU for Dense and CSR data - # The original policy can be used here - centers = _convert_to_supported(policy, centers) + centers = _convert_to_supported(centers) centers_table = to_table(centers) else: raise TypeError("Unsupported type of the `init` value") return centers_table - def _init_centroids_sklearn(self, X, init, random_state, policy, dtype=np.float32): + def _init_centroids_sklearn(self, X, init, random_state, dtype=np.float32): # For oneDAL versions < 2023.2 or callable init, # using the scikit-learn implementation logging.getLogger("sklearnex").info("Computing KMeansInit with Stock sklearn") @@ -254,18 +232,18 @@ def _init_centroids_sklearn(self, X, init, random_state, policy, dtype=np.float3 f"callable, got '{ init }' instead." ) - centers = _convert_to_supported(policy, centers) + centers = _convert_to_supported(centers) return to_table(centers) def _fit_backend( - self, X_table, centroids_table, policy, dtype=np.float32, is_csr=False + self, X_table, centroids_table, dtype=np.float32, is_csr=False, queue=None ): params = self._get_onedal_params(is_csr, dtype) meta = _default_backend.get_table_metadata(X_table) assert meta.get_npy_dtype(0) == dtype - result = self.train(policy, params, X_table, centroids_table) + result = self.train(params, X_table, centroids_table, queue=queue) return ( result.responses, @@ -275,16 +253,15 @@ def _fit_backend( ) def _fit(self, X, queue=None): - policy = self._get_policy(queue, X) is_csr = _is_csr(X) X = _check_array( X, dtype=[np.float64, np.float32], accept_sparse="csr", force_all_finite=False ) - X = _convert_to_supported(policy, X) + X = _convert_to_supported(X) dtype = get_dtype(X) X_table = to_table(X) - self._check_params_vs_input(X_table, is_csr, policy, dtype=dtype) + self._check_params_vs_input(X_table, is_csr, dtype=dtype, queue=queue) self.n_features_in_ = X_table.column_count @@ -316,18 +293,18 @@ def is_better_iteration(inertia, labels): if use_onedal_init: random_seed = random_state.randint(np.iinfo("i").max) centroids_table = self._init_centroids_onedal( - X_table, init, random_seed, policy, is_csr, dtype=dtype + X_table, init, random_seed, is_csr, dtype=dtype, queue=queue ) else: centroids_table = self._init_centroids_sklearn( - X, init, random_state, policy, dtype=dtype + X, init, random_state, dtype=dtype ) if self.verbose: print("Initialization complete") labels, inertia, model, n_iter = self._fit_backend( - X_table, centroids_table, policy, dtype, is_csr + X_table, centroids_table, dtype, is_csr, queue=queue ) if self.verbose: @@ -389,12 +366,11 @@ def cluster_centers_(self): def _predict(self, X, queue=None, result_options=None): is_csr = _is_csr(X) - policy = self._get_policy(queue, X) - X = _convert_to_supported(policy, X) + X = _convert_to_supported(X) X_table = to_table(X) - params = self._get_onedal_params(is_csr, X_table.dtype, result_options) + params = self._get_onedal_params(is_csr, X.dtype, result_options) - result = self.infer(policy, params, self.model_, X_table) + result = self.infer(params, self.model_, X_table, queue=queue) if result_options == "compute_exact_objective_function": # This is only set for score function diff --git a/onedal/cluster/kmeans_init.py b/onedal/cluster/kmeans_init.py index 4082d8e1e5..5c6efbd3ad 100755 --- a/onedal/cluster/kmeans_init.py +++ b/onedal/cluster/kmeans_init.py @@ -48,11 +48,8 @@ def __init__( else: self.local_trials_count = local_trials_count - @bind_default_backend("kmeans_init") - def _get_policy(self, policy, params, X_table): ... - @bind_default_backend("kmeans_init.init", lookup_name="compute") - def backend_compute(self, policy, params, X_table): ... + def backend_compute(self, params, X_table, queue=None): ... def _get_onedal_params(self, dtype=np.float32): return { @@ -63,7 +60,7 @@ def _get_onedal_params(self, dtype=np.float32): "cluster_count": self.cluster_count, } - def _get_params_and_input(self, X, policy): + def _get_params_and_input(self, X): X = _check_array( X, dtype=[np.float64, np.float32], @@ -71,26 +68,22 @@ def _get_params_and_input(self, X, policy): force_all_finite=False, ) - X = _convert_to_supported(policy, X) + X = _convert_to_supported(X) dtype = get_dtype(X) params = self._get_onedal_params(dtype) return (params, to_table(X), dtype) def compute(self, X, queue=None): - policy = self._get_policy(queue, X) - # oneDAL KMeans Init for sparse data does not have GPU support - if issparse(X): - policy = self._get_policy(None, None) - _, X_table, dtype = self._get_params_and_input(X, policy) + _, X_table, dtype = self._get_params_and_input(X) - centroids = self.compute_raw(X_table, policy, dtype) + centroids = self.compute_raw(X_table, dtype, queue=queue) return from_table(centroids) - def compute_raw(self, X_table, policy, dtype=np.float32): + def compute_raw(self, X_table, dtype=np.float32, queue=None): params = self._get_onedal_params(dtype) - result = self.backend_compute(policy, params, X_table) + result = self.backend_compute(params, X_table, queue=queue) return result.centroids def kmeans_plusplus( diff --git a/onedal/common/_backend.py b/onedal/common/_backend.py index 70c6042eed..df9b0667b7 100644 --- a/onedal/common/_backend.py +++ b/onedal/common/_backend.py @@ -20,6 +20,7 @@ from typing import Any, Callable, Literal, Optional from onedal import Backend, _default_backend, _spmd_backend +from onedal._device_offload import SyclQueueManager from onedal.common.policy_manager import PolicyManager from .backend_manager import BackendManager @@ -41,41 +42,44 @@ class BackendFunction: """Wrapper around backend function to allow setting auxiliary information""" - def __init__(self, method: Callable[..., Any], backend_type: BackendType, name: str): + def __init__( + self, + method: Callable[..., Any], + backend: Backend, + name: str, + ): self.method = method - self.backend_type = backend_type self.name = name + self.backend = backend + + def __call__(self, *args: Any, queue=None, **kwargs: Any) -> Any: + """Dispatch to backend function with the appropriate policy which is determined from the provided or global queue""" + if not args: + # immediate dispatching without args, i.e. without data + return self.method(**kwargs) + + if queue is None: + # use globally configured queue (from `target_offload` configuration or provided data) + queue = SyclQueueManager.get_global_queue() + + if queue is not None and not (self.backend.is_dpc or self.backend.is_spmd): + raise RuntimeError("Operations using queues require the DPC/SPMD backend") + + # craft the correct policy including the device queue + if queue is None: + policy = self.backend.host_policy() + elif self.backend.is_spmd: + policy = self.backend.spmd_data_parallel_policy(queue) + elif self.backend.is_dpc: + policy = self.backend.data_parallel_policy(queue) + else: + policy = self.backend.host_policy() - def __call__(self, *args: Any, **kwargs: Any) -> Any: - return self.method(*args, **kwargs) + # dispatch to backend function + return self.method(policy, *args, **kwargs) def __repr__(self) -> str: - return f"BackendFunction(<{self.backend_type}_backend>.{self.name})" - - -def inject_policy_manager(backend: Backend) -> Callable[..., Any]: - def _get_policy(self, queue: Any, *data: Any) -> Any: - policy_manager = PolicyManager(backend) - return policy_manager.get_policy(queue, *data) - - return _get_policy - - -@contextmanager -def DefaultPolicyOverride(instance: Any): - original_method = getattr(instance, "_get_policy", None) - try: - # Inject the new _get_policy method from _default_backend - new_policy_method = inject_policy_manager(_default_backend) - bound_method = MethodType(new_policy_method, instance) - setattr(instance, "_get_policy", bound_method) - yield - finally: - # Restore the original _get_policy method - if original_method is not None: - setattr(instance, "_get_policy", original_method) - else: - delattr(instance, "_get_policy") + return f"BackendFunction({self.backend}.{self.name})" def bind_default_backend(module_name: str, lookup_name: Optional[str] = None): @@ -92,13 +96,10 @@ def decorator(method: Callable[..., Any]): ) return method - if lookup_name == "_get_policy": - return inject_policy_manager(_default_backend) - backend_method = default_manager.get_backend_component(module_name, lookup_name) wrapped_method = BackendFunction( backend_method, - backend_type="dpc" if _default_backend.is_dpc else "host", + _default_backend, name=f"{module_name}.{method.__name__}", ) @@ -126,12 +127,11 @@ def decorator(method: Callable[..., Any]): ) return method - if lookup_name == "_get_policy": - return inject_policy_manager(_spmd_backend) - backend_method = spmd_manager.get_backend_component(module_name, lookup_name) wrapped_method = BackendFunction( - backend_method, backend_type="spmd", name=f"{module_name}.{method.__name__}" + backend_method, + _spmd_backend, + name=f"{module_name}.{method.__name__}", ) logger.debug( diff --git a/onedal/covariance/covariance.py b/onedal/covariance/covariance.py index 5f040a090f..9935ed2752 100644 --- a/onedal/covariance/covariance.py +++ b/onedal/covariance/covariance.py @@ -32,10 +32,7 @@ def __init__(self, method="dense", bias=False, assume_centered=False): self.assume_centered = assume_centered @bind_default_backend("covariance") - def _get_policy(self, queue, *data): ... - - @bind_default_backend("covariance") - def compute(self, *args, **kwargs): ... + def compute(self, *args, queue=None, **kwargs): ... def _get_onedal_params(self, dtype=np.float32): params = { @@ -99,21 +96,20 @@ def fit(self, X, y=None, queue=None): self : object Returns the instance itself. """ - policy = self._get_policy(queue, X) X = _check_array(X, dtype=[np.float64, np.float32]) - X = _convert_to_supported(policy, X) + X = _convert_to_supported(X) dtype = get_dtype(X) params = self._get_onedal_params(dtype) hparams = get_hyperparameters("covariance", "compute") if hparams is not None and not hparams.is_default: result = self.compute( - policy, params, hparams.backend, to_table(X), + queue=queue, ) else: - result = self.compute(policy, params, to_table(X)) + result = self.compute(params, to_table(X), queue=queue) if daal_check_version((2024, "P", 1)) or (not self.bias): self.covariance_ = from_table(result.cov_matrix) else: diff --git a/onedal/covariance/incremental_covariance.py b/onedal/covariance/incremental_covariance.py index 94f42fc8bc..4ffe73cea4 100644 --- a/onedal/covariance/incremental_covariance.py +++ b/onedal/covariance/incremental_covariance.py @@ -60,13 +60,13 @@ def __init__(self, method="dense", bias=False, assume_centered=False): self._reset() @bind_default_backend("covariance") - def partial_compute(self, policy, params, partial_result, X_table): ... + def partial_compute(self, params, partial_result, X_table, queue=None): ... @bind_default_backend("covariance") def partial_compute_result(self): ... @bind_default_backend("covariance") - def finalize_compute(self, policy, params, partial_result): ... + def finalize_compute(self, params, partial_result, queue=None): ... def _reset(self): self._partial_result = self.partial_compute_result() @@ -95,11 +95,7 @@ def partial_fit(self, X, y=None, queue=None): """ X = _check_array(X, dtype=[np.float64, np.float32], ensure_2d=True) - self._queue = queue - - policy = self._get_policy(queue, X) - - X = _convert_to_supported(policy, X) + X = _convert_to_supported(X) if not hasattr(self, "_dtype"): self._dtype = get_dtype(X) @@ -107,7 +103,7 @@ def partial_fit(self, X, y=None, queue=None): params = self._get_onedal_params(self._dtype) table_X = to_table(X) self._partial_result = self.partial_compute( - policy, params, self._partial_result, table_X + params, self._partial_result, table_X, queue=queue ) def finalize_fit(self, queue=None): @@ -126,16 +122,8 @@ def finalize_fit(self, queue=None): Returns the instance itself. """ params = self._get_onedal_params(self._dtype) - if queue is not None: - policy = self._get_policy(queue) - else: - policy = self._get_policy(self._queue) - result = self.finalize_compute( - policy, - params, - self._partial_result, - ) + result = self.finalize_compute(params, self._partial_result, queue=queue) if daal_check_version((2024, "P", 1)) or (not self.bias): self.covariance_ = from_table(result.cov_matrix) else: diff --git a/onedal/datatypes/_data_conversion.py b/onedal/datatypes/_data_conversion.py index 72744944de..82dfe6928f 100644 --- a/onedal/datatypes/_data_conversion.py +++ b/onedal/datatypes/_data_conversion.py @@ -19,6 +19,7 @@ import numpy as np from onedal import _default_backend as backend +from onedal._device_offload import SyclQueueManager def _apply_and_pass(func, *args, **kwargs): @@ -79,17 +80,10 @@ def _table_to_array(table, xp=None): def _table_to_array(table, xp=None): return xp.asarray(table) - def _convert_to_supported(policy, *data): - def func(x): + def _convert_to_supported(*data): + def identity(x): return x - if not policy.is_dpc: - # CPUs support FP64 by default - return _apply_and_pass(func, *data) - - # It can be either SPMD or DPCPP policy - device = policy._queue.sycl_device - def convert_or_pass(x): if (x is not None) and (x.dtype == np.float64): warnings.warn( @@ -101,10 +95,14 @@ def convert_or_pass(x): else: return x - if not device.has_aspect_fp64: - func = convert_or_pass + # find the device we're running on + queue = SyclQueueManager.from_data(data) + device = queue.sycl_device if queue else None - return _apply_and_pass(func, *data) + if device and not device.has_aspect_fp64: + return _apply_and_pass(convert_or_pass, *data) + else: + return _apply_and_pass(identity, *data) def convert_one_from_table(table, sycl_queue=None, sua_iface=None, xp=None): # Currently only `__sycl_usm_array_interface__` protocol used to @@ -130,11 +128,11 @@ def convert_one_from_table(table, sycl_queue=None, sua_iface=None, xp=None): else: - def _convert_to_supported(policy, *data): - def func(x): + def _convert_to_supported(*data): + def identity(x): return x - return _apply_and_pass(func, *data) + return _apply_and_pass(identity, *data) def convert_one_from_table(table, sycl_queue=None, sua_iface=None, xp=None): # Currently only `__sycl_usm_array_interface__` protocol used to diff --git a/onedal/decomposition/incremental_pca.py b/onedal/decomposition/incremental_pca.py index 48eb0554f7..b4b0d9c670 100644 --- a/onedal/decomposition/incremental_pca.py +++ b/onedal/decomposition/incremental_pca.py @@ -102,10 +102,10 @@ def __init__( self._reset() @bind_default_backend("decomposition.dim_reduction") - def finalize_train(self, policy, params, partial_result): ... + def finalize_train(self, params, partial_result, queue=None): ... @bind_default_backend("decomposition.dim_reduction") - def partial_train(self, policy, params, partial_result, X_table): ... + def partial_train(self, params, partial_result, X_table, queue=None): ... @bind_default_backend("decomposition.dim_reduction") def partial_train_result(self): ... @@ -151,10 +151,7 @@ def partial_fit(self, X, queue): else: self.n_components_ = self.n_components - self._queue = queue - - policy = self._get_policy(queue, X) - X = _convert_to_supported(policy, X) + X = _convert_to_supported(X) if not hasattr(self, "_dtype"): self._dtype = get_dtype(X) @@ -162,7 +159,7 @@ def partial_fit(self, X, queue): X_table = to_table(X) self._partial_result = self.partial_train( - policy, self._params, self._partial_result, X_table + self._params, self._partial_result, X_table, queue=queue ) return self @@ -181,15 +178,7 @@ def finalize_fit(self, queue=None): self : object Returns the instance itself. """ - if queue is not None: - policy = self._get_policy(queue) - else: - policy = self._get_policy(self._queue) - result = self.finalize_train( - policy, - self._params, - self._partial_result, - ) + result = self.finalize_train(self._params, self._partial_result, queue=queue) self.mean_ = from_table(result.means).ravel() self.var_ = from_table(result.variances).ravel() self.components_ = from_table(result.eigenvectors) diff --git a/onedal/decomposition/pca.py b/onedal/decomposition/pca.py index 7d6243953e..b296ee8508 100644 --- a/onedal/decomposition/pca.py +++ b/onedal/decomposition/pca.py @@ -43,18 +43,15 @@ def __init__( self.is_deterministic = is_deterministic self.whiten = whiten - @bind_default_backend("decomposition.dim_reduction") - def _get_policy(self, queue, *data): ... - # provides direct access to the backend model constructor @bind_default_backend("decomposition.dim_reduction") def model(self): ... @bind_default_backend("decomposition.dim_reduction") - def train(self, policy, params, X): ... + def train(self, params, X, queue=None): ... @bind_default_backend("decomposition.dim_reduction") - def infer(self, policy, params, X, model): ... + def infer(self, params, X, model, queue=None): ... def _get_onedal_params(self, data, stage=None): if stage is None: @@ -142,12 +139,11 @@ def _create_model(self): return m def predict(self, X, queue=None): - policy = self._get_policy(queue, X) model = self._create_model() - X = _convert_to_supported(policy, X) + X = _convert_to_supported(X) params = self._get_onedal_params(X, stage="predict") - result = self.infer(policy, params, model, to_table(X)) + result = self.infer(params, model, to_table(X), queue=queue) return from_table(result.transformed_data) @@ -158,15 +154,14 @@ def fit(self, X, y=None, queue=None): n_sf_min = min(n_samples, n_features) self._validate_n_components(self.n_components, n_samples, n_features) - policy = self._get_policy(queue, X) # TODO: investigate why np.ndarray with OWNDATA=FALSE flag # fails to be converted to oneDAL table if isinstance(X, np.ndarray) and not X.flags["OWNDATA"]: X = X.copy() - X = _convert_to_supported(policy, X) + X = _convert_to_supported(X) params = self._get_onedal_params(X) - result = self.train(policy, params, to_table(X)) + result = self.train(params, to_table(X), queue=queue) self.mean_ = from_table(result.means).ravel() self.variances_ = from_table(result.variances) diff --git a/onedal/ensemble/forest.py b/onedal/ensemble/forest.py index 1250576fe5..8a2fedf564 100644 --- a/onedal/ensemble/forest.py +++ b/onedal/ensemble/forest.py @@ -96,14 +96,11 @@ def __init__( self.variable_importance_mode = variable_importance_mode self.algorithm = algorithm - @bind_default_backend("decision_forest") - def _get_policy(self, queue, *data): ... - @abstractmethod - def train(self, *args, **kwargs): ... + def train(self, *args, queue=None, **kwargs): ... @abstractmethod - def infer(self, *args, **kwargs): ... + def infer(self, *args, queue=None, **kwargs): ... def _to_absolute_max_features(self, n_features): if self.max_features is None: @@ -314,10 +311,9 @@ def _fit(self, X, y, sample_weight, queue): data = (X, y, sample_weight) else: data = (X, y) - policy = self._get_policy(queue, *data) - data = _convert_to_supported(policy, *data) + data = _convert_to_supported(*data) params = self._get_onedal_params(data[0]) - train_result = self.train(policy, params, *to_table(*data)) + train_result = self.train(params, *to_table(*data), queue=queue) self._onedal_model = train_result.model @@ -360,15 +356,14 @@ def _predict(self, X, queue, hparams=None): X, dtype=[np.float64, np.float32], force_all_finite=True, accept_sparse=False ) _check_n_features(self, X, False) - policy = self._get_policy(queue, X) model = self._onedal_model - X = _convert_to_supported(policy, X) + X = _convert_to_supported(X) params = self._get_onedal_params(X) if hparams is not None and not hparams.is_default: - result = self.infer(policy, params, hparams.backend, model, to_table(X)) + result = self.infer(params, hparams.backend, model, to_table(X), queue=queue) else: - result = self.infer(policy, params, model, to_table(X)) + result = self.infer(params, model, to_table(X), queue=queue) y = from_table(result.responses) return y @@ -379,16 +374,15 @@ def _predict_proba(self, X, queue, hparams=None): X, dtype=[np.float64, np.float32], force_all_finite=True, accept_sparse=False ) _check_n_features(self, X, False) - policy = self._get_policy(queue, X) - X = _convert_to_supported(policy, X) + X = _convert_to_supported(X) params = self._get_onedal_params(X) params["infer_mode"] = "class_probabilities" model = self._onedal_model if hparams is not None and not hparams.is_default: - result = self.infer(policy, params, hparams.backend, model, to_table(X)) + result = self.infer(params, hparams.backend, model, to_table(X), queue=queue) else: - result = self.infer(policy, params, model, to_table(X)) + result = self.infer(params, model, to_table(X), queue=queue) y = from_table(result.probabilities) return y @@ -453,10 +447,10 @@ def __init__( ) @bind_default_backend("decision_forest.classification") - def train(self, *args, **kwargs): ... + def train(self, *args, queue=None, **kwargs): ... @bind_default_backend("decision_forest.classification") - def infer(self, *args, **kwargs): ... + def infer(self, *args, queue=None, **kwargs): ... def _validate_targets(self, y, dtype): y, self.class_weight_, self.classes_ = _validate_targets( @@ -544,10 +538,10 @@ def __init__( ) @bind_default_backend("decision_forest.regression") - def train(self, *args, **kwargs): ... + def train(self, *args, queue=None, **kwargs): ... @bind_default_backend("decision_forest.regression") - def infer(self, *args, **kwargs): ... + def infer(self, *args, queue=None, **kwargs): ... def fit(self, X, y, sample_weight=None, queue=None): if sample_weight is not None: @@ -619,10 +613,10 @@ def __init__( ) @bind_default_backend("decision_forest.classification") - def train(self, *args, **kwargs): ... + def train(self, *args, queue=None, **kwargs): ... @bind_default_backend("decision_forest.classification") - def infer(self, *args, **kwargs): ... + def infer(self, *args, queue=None, **kwargs): ... def _validate_targets(self, y, dtype): y, self.class_weight_, self.classes_ = _validate_targets( @@ -718,10 +712,10 @@ def __init__( ) @bind_default_backend("decision_forest.regression") - def train(self, *args, **kwargs): ... + def train(self, *args, queue=None, **kwargs): ... @bind_default_backend("decision_forest.regression") - def infer(self, *args, **kwargs): ... + def infer(self, *args, queue=None, **kwargs): ... def fit(self, X, y, sample_weight=None, queue=None): if sample_weight is not None: diff --git a/onedal/linear_model/incremental_linear_model.py b/onedal/linear_model/incremental_linear_model.py index 9592a848cb..cfe4428788 100644 --- a/onedal/linear_model/incremental_linear_model.py +++ b/onedal/linear_model/incremental_linear_model.py @@ -53,10 +53,10 @@ def __init__(self, fit_intercept=True, copy_X=False, algorithm="norm_eq"): def partial_train_result(self): ... @bind_default_backend("linear_model.regression") - def partial_train(self, *args, **kwargs): ... + def partial_train(self, *args, queue=None, **kwargs): ... @bind_default_backend("linear_model.regression") - def finalize_train(self, *args, **kwargs): ... + def finalize_train(self, *args, queue=None, **kwargs): ... def _reset(self): # Get the pointer to partial_result from backend @@ -83,10 +83,7 @@ def partial_fit(self, X, y, queue=None): self : object Returns the instance itself. """ - self._queue = queue - policy = self._get_policy(queue, X) - - X, y = _convert_to_supported(policy, X, y) + X, y = _convert_to_supported(X, y) if not hasattr(self, "_dtype"): self._dtype = get_dtype(X) @@ -103,16 +100,16 @@ def partial_fit(self, X, y, queue=None): hparams = get_hyperparameters("linear_regression", "train") if hparams is not None and not hparams.is_default: self._partial_result = self.partial_train( - policy, self._params, hparams.backend, self._partial_result, X_table, y_table, + queue=queue, ) else: self._partial_result = self.partial_train( - policy, self._params, self._partial_result, X_table, y_table + self._params, self._partial_result, X_table, y_table, queue=queue ) def finalize_fit(self, queue=None): @@ -131,18 +128,13 @@ def finalize_fit(self, queue=None): Returns the instance itself. """ - if queue is not None: - policy = self._get_policy(queue) - else: - policy = self._get_policy(self._queue) - hparams = get_hyperparameters("linear_regression", "train") if hparams is not None and not hparams.is_default: result = self.finalize_train( - policy, self._params, hparams.backend, self._partial_result + self._params, hparams.backend, self._partial_result, queue=queue ) else: - result = self.finalize_train(policy, self._params, self._partial_result) + result = self.finalize_train(self._params, self._partial_result, queue=queue) self._onedal_model = result.model @@ -191,10 +183,10 @@ def _reset(self): def partial_train_result(self): ... @bind_default_backend("linear_model.regression") - def partial_train(self, *args, **kwargs): ... + def partial_train(self, *args, queue=None, **kwargs): ... @bind_default_backend("linear_model.regression") - def finalize_train(self, *args, **kwargs): ... + def finalize_train(self, *args, queue=None, **kwargs): ... def partial_fit(self, X, y, queue=None): """ @@ -217,10 +209,7 @@ def partial_fit(self, X, y, queue=None): self : object Returns the instance itself. """ - self._queue = queue - policy = self._get_policy(queue, X) - - X, y = _convert_to_supported(policy, X, y) + X, y = _convert_to_supported(X, y) if not hasattr(self, "_dtype"): self._dtype = get_dtype(X) @@ -237,16 +226,16 @@ def partial_fit(self, X, y, queue=None): hparams = get_hyperparameters("linear_regression", "train") if hparams is not None and not hparams.is_default: self._partial_result = self.partial_train( - policy, self._params, hparams.backend, self._partial_result, X_table, y_table, + queue=queue, ) else: self._partial_result = self.partial_train( - policy, self._params, self._partial_result, X_table, y_table + self._params, self._partial_result, X_table, y_table, queue=queue ) def finalize_fit(self, queue=None): @@ -264,11 +253,7 @@ def finalize_fit(self, queue=None): self : object Returns the instance itself. """ - if queue is not None: - policy = self._get_policy(queue) - else: - policy = self._get_policy(self._queue) - result = self.finalize_train(policy, self._params, self._partial_result) + result = self.finalize_train(self._params, self._partial_result, queue=queue) self._onedal_model = result.model diff --git a/onedal/linear_model/linear_model.py b/onedal/linear_model/linear_model.py index 48c5033547..68f7011406 100755 --- a/onedal/linear_model/linear_model.py +++ b/onedal/linear_model/linear_model.py @@ -40,14 +40,11 @@ def __init__(self, fit_intercept, copy_X, algorithm, alpha=0.0): self.copy_X = copy_X self.algorithm = algorithm - @bind_default_backend("linear_model") - def _get_policy(self, queue, *data): ... - @bind_default_backend("linear_model.regression") - def train(self, *args, **kwargs): ... + def train(self, *args, queue=None, **kwargs): ... @bind_default_backend("linear_model.regression") - def infer(self, policy, params, model, X): ... + def infer(self, params, model, X, queue=None): ... # direct access to the backend model class @bind_default_backend("linear_model.regression") @@ -66,7 +63,7 @@ def _get_onedal_params(self, dtype=np.float32): return params - def _create_model(self, policy): + def _create_model(self): model = self.model() coefficients = self.coef_ @@ -103,7 +100,7 @@ def _create_model(self, policy): if self.fit_intercept: packed_coefficients[:, 0][:, np.newaxis] = intercept - packed_coefficients = _convert_to_supported(policy, packed_coefficients) + packed_coefficients = _convert_to_supported(packed_coefficients) model.packed_coefficients = to_table(packed_coefficients) @@ -130,8 +127,6 @@ def predict(self, X, queue=None): _check_is_fitted(self) - policy = self._get_policy(queue, X) - X = _check_array( X, dtype=[np.float64, np.float32], force_all_finite=False, ensure_2d=False ) @@ -140,14 +135,14 @@ def predict(self, X, queue=None): if hasattr(self, "_onedal_model"): model = self._onedal_model else: - model = self._create_model(policy) + model = self._create_model() X = make2d(X) - X = _convert_to_supported(policy, X) + X = _convert_to_supported(X) params = self._get_onedal_params(get_dtype(X)) X_table = to_table(X) - result = self.infer(policy, params, model, X_table) + result = self.infer(params, model, X_table, queue=queue) y = from_table(result.responses) if y.shape[1] == 1 and self.coef_.ndim == 1: @@ -217,19 +212,17 @@ def fit(self, X, y, queue=None): X, y = _check_X_y(X, y, force_all_finite=False, accept_2d_y=True) - policy = self._get_policy(queue, X, y) - self.n_features_in_ = _num_features(X, fallback_1d=True) - X, y = _convert_to_supported(policy, X, y) + X, y = _convert_to_supported(X, y) params = self._get_onedal_params(get_dtype(X)) X_table, y_table = to_table(X, y) hparams = get_hyperparameters("linear_regression", "train") if hparams is not None and not hparams.is_default: - result = self.train(policy, params, hparams.backend, X_table, y_table) + result = self.train(params, hparams.backend, X_table, y_table, queue=queue) else: - result = self.train(policy, params, X_table, y_table) + result = self.train(params, X_table, y_table, queue=queue) self._onedal_model = result.model @@ -313,15 +306,13 @@ def fit(self, X, y, queue=None): X, y = _check_X_y(X, y, force_all_finite=False, accept_2d_y=True) - policy = self._get_policy(queue, X, y) - self.n_features_in_ = _num_features(X, fallback_1d=True) - X, y = _convert_to_supported(policy, X, y) + X, y = _convert_to_supported(X, y) params = self._get_onedal_params(get_dtype(X)) X_table, y_table = to_table(X, y) - result = self.train(policy, params, X_table, y_table) + result = self.train(params, X_table, y_table, queue=queue) self._onedal_model = result.model packed_coefficients = from_table(result.model.packed_coefficients) diff --git a/onedal/linear_model/logistic_regression.py b/onedal/linear_model/logistic_regression.py index 78010cec8c..5ddf44e83f 100644 --- a/onedal/linear_model/logistic_regression.py +++ b/onedal/linear_model/logistic_regression.py @@ -45,14 +45,11 @@ def __init__(self, tol, C, fit_intercept, solver, max_iter, algorithm): self.max_iter = max_iter self.algorithm = algorithm - @bind_default_backend("logistic_regression") - def _get_policy(self, queue, *data): ... - @abstractmethod - def train(self, policy, params, X, y): ... + def train(self, params, X, y, queue=None): ... @abstractmethod - def infer(self, policy, params, X): ... + def infer(self, params, X, queue=None): ... # direct access to the backend model constructor @abstractmethod @@ -95,12 +92,11 @@ def _fit(self, X, y, queue): self.classes_, y = np.unique(y, return_inverse=True) y = y.astype(dtype=np.int32) - policy = self._get_policy(queue, X, y) - X, y = _convert_to_supported(policy, X, y) + X, y = _convert_to_supported(X, y) params = self._get_onedal_params(is_csr, get_dtype(X)) X_table, y_table = to_table(X, y) - result = self.train(policy, params, X_table, y_table) + result = self.train(params, X_table, y_table, queue=queue) self._onedal_model = result.model self.n_iter_ = np.array([result.iterations_count]) @@ -114,7 +110,7 @@ def _fit(self, X, y, queue): return self - def _create_model(self, policy): + def _create_model(self): m = self.model() coefficients = self.coef_ @@ -157,7 +153,7 @@ def _create_model(self, policy): if self.fit_intercept: packed_coefficients[:, 0][:, np.newaxis] = intercept - packed_coefficients = _convert_to_supported(policy, packed_coefficients) + packed_coefficients = _convert_to_supported(packed_coefficients) m.packed_coefficients = to_table(packed_coefficients) @@ -181,18 +177,17 @@ def _infer(self, X, queue): _check_n_features(self, X, False) X = make2d(X) - policy = self._get_policy(queue, X) if hasattr(self, "_onedal_model"): model = self._onedal_model else: - model = self._create_model(policy) + model = self._create_model() - X = _convert_to_supported(policy, X) + X = _convert_to_supported(X) params = self._get_onedal_params(is_csr, get_dtype(X)) X_table = to_table(X) - result = self.infer(policy, params, model, X_table) + result = self.infer(params, model, X_table, queue=queue) return result def _predict(self, X, queue): @@ -239,10 +234,10 @@ def __init__( ) @bind_default_backend("logistic_regression.classification") - def train(self, policy, params, X, y): ... + def train(self, params, X, y, queue=None): ... @bind_default_backend("logistic_regression.classification") - def infer(self, policy, params, X, model): ... + def infer(self, params, X, model, queue=None): ... @bind_default_backend("logistic_regression.classification") def model(self): ... diff --git a/onedal/neighbors/neighbors.py b/onedal/neighbors/neighbors.py index d2235b87a5..e904467e2f 100755 --- a/onedal/neighbors/neighbors.py +++ b/onedal/neighbors/neighbors.py @@ -71,9 +71,6 @@ def _parse_auto_method(self, method, n_samples, n_features): return result_method - @bind_default_backend("neighbors") - def _get_policy(self, queue, *data): ... - @abstractmethod def train(self, *args, **kwargs): ... @@ -437,10 +434,10 @@ def model(self): ... # direct access to the backend model constructor @bind_default_backend("neighbors.classification") - def train(self, *args, **kwargs): ... + def train(self, *args, queue=None, **kwargs): ... @bind_default_backend("neighbors.classification") - def infer(self, *args, **kwargs): ... + def infer(self, *args, queue=None, **kwargs): ... def _get_daal_params(self, data): params = super()._get_daal_params(data) @@ -460,10 +457,9 @@ def _onedal_fit(self, X, y, queue): return train_alg(**params).compute(X, y).model else: - policy = self._get_policy(queue, X, y) - X, y = _convert_to_supported(policy, X, y) + X, y = _convert_to_supported(X, y) params = self._get_onedal_params(X, y) - return self.train(policy, params, *to_table(X, y)).model + return self.train(params, *to_table(X, y), queue=queue).model def _onedal_predict(self, model, X, params, queue): if type(self._onedal_model) is kdtree_knn_classification_model: @@ -471,12 +467,11 @@ def _onedal_predict(self, model, X, params, queue): elif type(self._onedal_model) is bf_knn_classification_model: return bf_knn_classification_prediction(**params).compute(X, model) else: - policy = self._get_policy(queue, X) - X = _convert_to_supported(policy, X) + X = _convert_to_supported(X) if "responses" not in params["result_option"]: params["result_option"] += "|responses" params["fptype"] = X.dtype - result = self.infer(policy, params, model, to_table(X)) + result = self.infer(params, model, to_table(X), queue=queue) return result @@ -585,16 +580,16 @@ def __init__( self.weights = weights @bind_default_backend("neighbors.search", lookup_name="train") - def train_search(self, *args, **kwargs): ... + def train_search(self, *args, queue=None, **kwargs): ... @bind_default_backend("neighbors.search", lookup_name="infer") - def infer_search(self, *args, **kwargs): ... + def infer_search(self, *args, queue=None, **kwargs): ... @bind_default_backend("neighbors.regression") - def train(self, *args, **kwargs): ... + def train(self, *args, queue=None, **kwargs): ... @bind_default_backend("neighbors.regression") - def infer(self, *args, **kwargs): ... + def infer(self, *args, queue=None, **kwargs): ... def _get_daal_params(self, data): params = super()._get_daal_params(data) @@ -613,14 +608,13 @@ def _onedal_fit(self, X, y, queue): return train_alg(**params).compute(X, y).model - policy = self._get_policy(queue, X, y) - X, y = _convert_to_supported(policy, X, y) + X, y = _convert_to_supported(X, y) params = self._get_onedal_params(X, y) if gpu_device: - return self.train(policy, params, *to_table(X, y)).model + return self.train(params, *to_table(X, y), queue=queue).model else: - return self.train_search(policy, params, to_table(X)).model + return self.train_search(params, to_table(X), queue=queue).model def _onedal_predict(self, model, X, params, queue): assert self._onedal_model is not None, "Model is not trained" @@ -631,8 +625,7 @@ def _onedal_predict(self, model, X, params, queue): return bf_knn_classification_prediction(**params).compute(X, model) gpu_device = queue is not None and queue.sycl_device.is_gpu - policy = self._get_policy(queue, X) - X = _convert_to_supported(policy, X) + X = _convert_to_supported(X) if "responses" not in params["result_option"] and gpu_device: params["result_option"] += "|responses" @@ -640,9 +633,9 @@ def _onedal_predict(self, model, X, params, queue): result = backend.infer(policy, params, model, to_table(X)) if gpu_device: - return self.infer(policy, params, self._onedal_model, to_table(X)) + return self.infer(params, self._onedal_model, to_table(X), queue=queue) else: - return self.infer_search(policy, params, self._onedal_model, to_table(X)) + return self.infer_search(params, self._onedal_model, to_table(X), queue=queue) def fit(self, X, y, queue=None): return self._fit(X, y, queue=queue) @@ -736,10 +729,10 @@ def __init__( self.weights = weights @bind_default_backend("neighbors.search") - def train(self, *args, **kwargs): ... + def train(self, *args, queue=None, **kwargs): ... @bind_default_backend("neighbors.search") - def infer(self, *args, **kwargs): ... + def infer(self, *arg, queue=None, **kwargs): ... def _get_daal_params(self, data): params = super()._get_daal_params(data) @@ -762,10 +755,9 @@ def _onedal_fit(self, X, y, queue): return train_alg(**params).compute(X, y).model else: - policy = self._get_policy(queue, X, y) - X, y = _convert_to_supported(policy, X, y) + X, y = _convert_to_supported(X, y) params = self._get_onedal_params(X, y) - return self.train(policy, params, to_table(X)).model + return self.train(params, to_table(X), queue=queue).model def _onedal_predict(self, model, X, params, queue): if type(self._onedal_model) is kdtree_knn_classification_model: @@ -773,15 +765,10 @@ def _onedal_predict(self, model, X, params, queue): elif type(self._onedal_model) is bf_knn_classification_model: return bf_knn_classification_prediction(**params).compute(X, model) - policy = self._get_policy(queue, X) - X = _convert_to_supported(policy, X) + X = _convert_to_supported(X) params["fptype"] = X.dtype - result = self._get_backend( - "neighbors", "search", "infer", policy, params, model, to_table(X) - ) - - return result + return self.infer(params, model, to_table(X), queue=queue) def fit(self, X, y, queue=None): return self._fit(X, y, queue=queue) diff --git a/onedal/primitives/kernel_functions.py b/onedal/primitives/kernel_functions.py index ecdb48bebf..41e4a7ac35 100644 --- a/onedal/primitives/kernel_functions.py +++ b/onedal/primitives/kernel_functions.py @@ -37,7 +37,7 @@ def check_input(data): def _compute_kernel(params, submodule, X, Y, queue): policy = policy_manager.get_policy(queue, X, Y) - X, Y = _convert_to_supported(policy, X, Y) + X, Y = _convert_to_supported(X, Y) params["fptype"] = X.dtype X, Y = to_table(X, Y) result = submodule.compute(policy, params, X, Y) diff --git a/onedal/spmd/basic_statistics/basic_statistics.py b/onedal/spmd/basic_statistics/basic_statistics.py index 047c7d9773..cb685a2084 100644 --- a/onedal/spmd/basic_statistics/basic_statistics.py +++ b/onedal/spmd/basic_statistics/basic_statistics.py @@ -20,9 +20,6 @@ class BasicStatistics(BasicStatistics_Batch): - @bind_spmd_backend("basic_statistics") - def _get_policy(self, queue, *data): ... - @bind_spmd_backend("basic_statistics") def compute(self, data, weights=None, queue=None): ... diff --git a/onedal/spmd/basic_statistics/incremental_basic_statistics.py b/onedal/spmd/basic_statistics/incremental_basic_statistics.py index b46582e645..ab85c7ada8 100644 --- a/onedal/spmd/basic_statistics/incremental_basic_statistics.py +++ b/onedal/spmd/basic_statistics/incremental_basic_statistics.py @@ -24,24 +24,8 @@ class IncrementalBasicStatistics(base_IncrementalBasicStatistics): - @bind_default_backend("basic_statistics", lookup_name="_get_policy") - def _get_default_policy(self, queue, *data): ... - - @bind_spmd_backend("basic_statistics", lookup_name="_get_policy") - def _get_spmd_policy(self, queue, *data): ... - @bind_spmd_backend("basic_statistics") - def compute(self, *args, **kwargs): ... + def compute(self, *args, queue=None, **kwargs): ... @bind_spmd_backend("basic_statistics") - def finalize_compute(self, *args, **kwargs): ... - - def partial_fit(self, *args, **kwargs): - # base class partial_fit is using `compute()`, which requires host or parallel policy, but not SPMD - self._get_policy = self._get_default_policy - return super().partial_fit(*args, **kwargs) - - def finalize_fit(self, *args, **kwargs): - # base class finalize_fit is using `finalize_compute()`, which requires SPMD policy - self._get_policy = self._get_spmd_policy - return super().finalize_fit(*args, **kwargs) + def finalize_compute(self, *args, queue=None, **kwargs): ... diff --git a/onedal/spmd/cluster/dbscan.py b/onedal/spmd/cluster/dbscan.py index a9fe89d7c8..d7be30cc01 100644 --- a/onedal/spmd/cluster/dbscan.py +++ b/onedal/spmd/cluster/dbscan.py @@ -19,8 +19,5 @@ class DBSCAN(DBSCAN_Batch): - @bind_spmd_backend("dbscan") - def _get_policy(self, queue, *data): ... - @bind_spmd_backend("dbscan.clustering") - def compute(self, policy, params, data_table, weights_table): ... + def compute(self, params, data_table, weights_table, queue=None): ... diff --git a/onedal/spmd/cluster/kmeans.py b/onedal/spmd/cluster/kmeans.py index 45c2404dd8..ad1dcd0e43 100644 --- a/onedal/spmd/cluster/kmeans.py +++ b/onedal/spmd/cluster/kmeans.py @@ -26,11 +26,8 @@ class KMeansInit(KMeansInit_Batch): KMeansInit oneDAL implementation for SPMD iface. """ - @bind_spmd_backend("kmeans_init") - def _get_policy(self, queue, *data): ... - @bind_spmd_backend("kmeans_init.init", lookup_name="compute") - def backend_compute(self, policy, params, data): ... + def backend_compute(self, params, data, queue=None): ... class KMeans(KMeans_Batch): @@ -40,14 +37,11 @@ def _get_basic_statistics_backend(self, result_options): def _get_kmeans_init(self, cluster_count, seed, algorithm): return KMeansInit(cluster_count=cluster_count, seed=seed, algorithm=algorithm) - @bind_spmd_backend("kmeans") - def _get_policy(self, queue, X): ... - @bind_spmd_backend("kmeans.clustering") - def train(self, policy, params, X_table, centroids_table): ... + def train(self, params, X_table, centroids_table, queue=None): ... @bind_spmd_backend("kmeans.clustering") - def infer(self, policy, params, model, centroids_table): ... + def infer(self, params, model, centroids_table, queue=None): ... @support_input_format() def fit(self, X, y=None, queue=None): diff --git a/onedal/spmd/covariance/covariance.py b/onedal/spmd/covariance/covariance.py index 6808073aba..77b477fe25 100644 --- a/onedal/spmd/covariance/covariance.py +++ b/onedal/spmd/covariance/covariance.py @@ -20,14 +20,12 @@ class EmpiricalCovariance(EmpiricalCovariance_Batch): - @bind_spmd_backend("covariance") - def _get_policy(self, queue, *data): ... @bind_spmd_backend("covariance") - def compute(self, *args, **kwargs): ... + def compute(self, *args, queue=None, **kwargs): ... @bind_spmd_backend("covariance") - def finalize_compute(self, policy, params, partial_result): ... + def finalize_compute(self, params, partial_result, queue=None): ... @support_input_format() def fit(self, X, y=None, queue=None): diff --git a/onedal/spmd/covariance/incremental_covariance.py b/onedal/spmd/covariance/incremental_covariance.py index dae7b91afe..6beac5a2fb 100644 --- a/onedal/spmd/covariance/incremental_covariance.py +++ b/onedal/spmd/covariance/incremental_covariance.py @@ -14,20 +14,13 @@ # limitations under the License. # ============================================================================== -from ...common._backend import DefaultPolicyOverride, bind_spmd_backend +from ...common._backend import bind_spmd_backend from ...covariance import ( IncrementalEmpiricalCovariance as base_IncrementalEmpiricalCovariance, ) class IncrementalEmpiricalCovariance(base_IncrementalEmpiricalCovariance): - @bind_spmd_backend("covariance") - def _get_policy(self, queue, *data): ... @bind_spmd_backend("covariance") - def finalize_compute(self, policy, params, partial_result): ... - - def partial_fit(self, X, y=None, queue=None): - # partial fit performed by parent backend, therefore default policy required - with DefaultPolicyOverride(self): - return super().partial_fit(X, y, queue) + def finalize_compute(self, params, partial_result, queue=None): ... diff --git a/onedal/spmd/decomposition/incremental_pca.py b/onedal/spmd/decomposition/incremental_pca.py index cdbcdb2235..bb7c03930b 100644 --- a/onedal/spmd/decomposition/incremental_pca.py +++ b/onedal/spmd/decomposition/incremental_pca.py @@ -14,7 +14,8 @@ # limitations under the License. # ============================================================================== -from ...common._backend import DefaultPolicyOverride, bind_spmd_backend +from onedal.common._backend import bind_spmd_backend + from ...decomposition import IncrementalPCA as base_IncrementalPCA @@ -26,18 +27,5 @@ class IncrementalPCA(base_IncrementalPCA): API is the same as for `onedal.decomposition.IncrementalPCA` """ - @bind_spmd_backend("decomposition") - def _get_policy(self, queue, *data): ... - @bind_spmd_backend("decomposition.dim_reduction") - def finalize_train(self, policy, params, partial_result): ... - - def partial_fit(self, X, queue): - # partial fit performed by parent backend, therefore default policy required - with DefaultPolicyOverride(self): - return super().partial_fit(X, queue) - - def infer(self, policy, params, X, model): - # infer runs in parent backend, therefore default policy required - with DefaultPolicyOverride(self): - return super().infer(policy, params, X, model) + def finalize_train(self, params, partial_result, queue=None): ... diff --git a/onedal/spmd/decomposition/pca.py b/onedal/spmd/decomposition/pca.py index eadcd1c0be..7bc2bcce71 100644 --- a/onedal/spmd/decomposition/pca.py +++ b/onedal/spmd/decomposition/pca.py @@ -20,14 +20,12 @@ class PCA(PCABatch): - @bind_spmd_backend("decomposition.dim_reduction") - def _get_policy(self, queue, *data): ... @bind_spmd_backend("decomposition.dim_reduction") - def train(self, policy, params, X): ... + def train(self, params, X, queue=None): ... @bind_spmd_backend("decomposition.dim_reduction") - def finalize_train(self, *args, **kwargs): ... + def finalize_train(self, *args, queue=None, **kwargs): ... @support_input_format() def fit(self, X, y=None, queue=None): diff --git a/onedal/spmd/linear_model/incremental_linear_model.py b/onedal/spmd/linear_model/incremental_linear_model.py index 6470173a9c..cf276bc0b1 100644 --- a/onedal/spmd/linear_model/incremental_linear_model.py +++ b/onedal/spmd/linear_model/incremental_linear_model.py @@ -15,7 +15,8 @@ # ============================================================================== -from ...common._backend import DefaultPolicyOverride, bind_spmd_backend +from onedal.common._backend import bind_spmd_backend + from ...linear_model import ( IncrementalLinearRegression as base_IncrementalLinearRegression, ) @@ -28,13 +29,5 @@ class IncrementalLinearRegression(base_IncrementalLinearRegression): API is the same as for `onedal.linear_model.IncrementalLinearRegression`. """ - @bind_spmd_backend("linear_model") - def _get_policy(self): ... - @bind_spmd_backend("linear_model.regression") - def finalize_train(self, *args, **kwargs): ... - - def partial_fit(self, X, y, queue): - # partial fit performed by parent backend, therefore default policy required - with DefaultPolicyOverride(self): - return super().partial_fit(X, y, queue) + def finalize_train(self, *args, queue=None, **kwargs): ... diff --git a/onedal/spmd/linear_model/linear_model.py b/onedal/spmd/linear_model/linear_model.py index 0317e74071..911763247e 100644 --- a/onedal/spmd/linear_model/linear_model.py +++ b/onedal/spmd/linear_model/linear_model.py @@ -20,17 +20,15 @@ class LinearRegression(LinearRegression_Batch): - @bind_spmd_backend("linear_model") - def _get_policy(self, queue, *data): ... @bind_spmd_backend("linear_model.regression") - def train(self, *args, **kwargs): ... + def train(self, *args, queue=None, **kwargs): ... @bind_spmd_backend("linear_model.regression") - def finalize_train(self, *args, **kwargs): ... + def finalize_train(self, *args, queue=None, **kwargs): ... @bind_spmd_backend("linear_model.regression") - def infer(self, policy, params, model, X): ... + def infer(self, params, model, X, queue=None): ... @support_input_format() def fit(self, X, y, queue=None): diff --git a/onedal/spmd/linear_model/logistic_regression.py b/onedal/spmd/linear_model/logistic_regression.py index 749374c5bc..ff0e546abb 100644 --- a/onedal/spmd/linear_model/logistic_regression.py +++ b/onedal/spmd/linear_model/logistic_regression.py @@ -20,14 +20,12 @@ class LogisticRegression(LogisticRegression_Batch): - @bind_spmd_backend("logistic_regression") - def _get_policy(self): ... @bind_spmd_backend("logistic_regression.classification") - def train(self, policy, params, X, y): ... + def train(self, params, X, y, queue=None): ... @bind_spmd_backend("logistic_regression.classification") - def infer(self, policy, params, X, model): ... + def infer(self, params, X, model, queue=None): ... @support_input_format() def fit(self, X, y, queue=None): diff --git a/onedal/spmd/neighbors/neighbors.py b/onedal/spmd/neighbors/neighbors.py index 46a030863f..007b197992 100644 --- a/onedal/spmd/neighbors/neighbors.py +++ b/onedal/spmd/neighbors/neighbors.py @@ -21,14 +21,12 @@ class KNeighborsClassifier(KNeighborsClassifier_Batch): - @bind_spmd_backend("neighbors") - def _get_policy(self): ... @bind_spmd_backend("neighbors.classification") - def train(self, *args, **kwargs): ... + def train(self, *args, queue=None, **kwargs): ... @bind_spmd_backend("neighbors.classification") - def infer(self, *args, **kwargs): ... + def infer(self, *args, queue=None, **kwargs): ... @support_input_format() def fit(self, X, y, queue=None): @@ -48,20 +46,18 @@ def kneighbors(self, X=None, n_neighbors=None, return_distance=True, queue=None) class KNeighborsRegressor(KNeighborsRegressor_Batch): - @bind_spmd_backend("neighbors") - def _get_policy(self): ... @bind_spmd_backend("neighbors.search", lookup_name="train") - def train_search(self, *args, **kwargs): ... + def train_search(self, *args, queue=None, **kwargs): ... @bind_spmd_backend("neighbors.search", lookup_name="infer") - def infer_search(self, *args, **kwargs): ... + def infer_search(self, *args, queue=None, **kwargs): ... @bind_spmd_backend("neighbors.regression") - def train(self, *args, **kwargs): ... + def train(self, *args, queue=None, **kwargs): ... @bind_spmd_backend("neighbors.regression") - def infer(self, *args, **kwargs): ... + def infer(self, *args, queue=None, **kwargs): ... @support_input_format() def fit(self, X, y, queue=None): diff --git a/onedal/svm/svm.py b/onedal/svm/svm.py index 3332f33069..d0c81065bd 100644 --- a/onedal/svm/svm.py +++ b/onedal/svm/svm.py @@ -83,9 +83,6 @@ def __init__( self.algorithm = algorithm self.svm_type = svm_type - @bind_default_backend("svm") - def _get_policy(self, queue, *data): ... - @abstractmethod def train(self, *args, **kwargs): ... @@ -185,10 +182,9 @@ def _fit(self, X, y, sample_weight, queue): _gamma = self.gamma self._scale_, self._sigma_ = _gamma, np.sqrt(0.5 / _gamma) - policy = self._get_policy(queue, *data) - X = _convert_to_supported(policy, X) + X = _convert_to_supported(X) params = self._get_onedal_params(X) - result = self.train(policy, params, *to_table(*data)) + result = self.train(params, *to_table(*data), queue=queue) if self._sparse: self.dual_coef_ = sp.csr_matrix(from_table(result.coeffs).T) @@ -264,15 +260,14 @@ def _predict(self, X, queue): % type(self).__name__ ) - policy = self._get_policy(queue, X) - X = _convert_to_supported(policy, X) + X = _convert_to_supported(X) params = self._get_onedal_params(X) if hasattr(self, "_onedal_model"): model = self._onedal_model else: model = self._create_model() - result = self.infer(policy, params, model, to_table(X)) + result = self.infer(params, model, to_table(X), queue=queue) y = from_table(result.responses) return y @@ -321,15 +316,14 @@ def _decision_function(self, X, queue): f"of {self.__class__.__name__} was altered" ) - policy = self._get_policy(queue, X) - X = _convert_to_supported(policy, X) + X = _convert_to_supported(X) params = self._get_onedal_params(X) if hasattr(self, "_onedal_model"): model = self._onedal_model else: model = self._create_model() - result = self.infer(policy, params, model, to_table(X)) + result = self.infer(params, model, to_table(X), queue=queue) decision_function = from_table(result.decision_function) if len(self.classes_) == 2: @@ -385,10 +379,10 @@ def __init__( self.svm_type = SVMtype.epsilon_svr @bind_default_backend("svm.regression") - def train(self, *args, **kwargs): ... + def train(self, *args, queue=None, **kwargs): ... @bind_default_backend("svm.regression") - def infer(self, *args, **kwargs): ... + def infer(self, *args, queue=None, **kwargs): ... @bind_default_backend("svm.regression") def model(self): ... @@ -446,10 +440,10 @@ def __init__( self.svm_type = SVMtype.c_svc @bind_default_backend("svm.classification") - def train(self, *args, **kwargs): ... + def train(self, *args, queue=None, **kwargs): ... @bind_default_backend("svm.classification") - def infer(self, *args, **kwargs): ... + def infer(self, *args, queue=None, **kwargs): ... @bind_default_backend("svm.classification") def model(self): ... @@ -516,10 +510,10 @@ def __init__( self.svm_type = SVMtype.nu_svr @bind_default_backend("svm.nu_regression") - def train(self, *args, **kwargs): ... + def train(self, *args, queue=None, **kwargs): ... @bind_default_backend("svm.nu_regression") - def infer(self, *args, **kwargs): ... + def infer(self, *args, queue=None, **kwargs): ... @bind_default_backend("svm.nu_regression") def model(self): ... @@ -576,10 +570,10 @@ def __init__( self.svm_type = SVMtype.nu_svc @bind_default_backend("svm.nu_classification") - def train(self, *args, **kwargs): ... + def train(self, *args, queue=None, **kwargs): ... @bind_default_backend("svm.nu_classification") - def infer(self, *args, **kwargs): ... + def infer(self, *args, queue=None, **kwargs): ... @bind_default_backend("svm.nu_classification") def model(self): ... diff --git a/sklearnex/_device_offload.py b/sklearnex/_device_offload.py index 7e299f07e0..e5f1122217 100644 --- a/sklearnex/_device_offload.py +++ b/sklearnex/_device_offload.py @@ -16,7 +16,7 @@ from functools import wraps -from onedal._device_offload import _copy_to_usm, _get_global_queue, _transfer_to_host +from onedal._device_offload import _copy_to_usm, _transfer_to_host from onedal.utils._array_api import _asarray from onedal.utils._dpep_helpers import dpnp_available From 57155d143a300a962eda535c3b15abb8cd21a41d Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Tue, 26 Nov 2024 03:17:17 -0800 Subject: [PATCH 04/41] Only use a global queue, which is set by user-facing functions --- onedal/_device_offload.py | 49 ++++++-- onedal/basic_statistics/basic_statistics.py | 15 ++- .../incremental_basic_statistics.py | 13 +- .../tests/test_basic_statistics.py | 12 +- .../test_incremental_basic_statistics.py | 16 +-- onedal/cluster/dbscan.py | 6 +- onedal/cluster/kmeans.py | 50 ++++---- onedal/cluster/kmeans_init.py | 9 +- onedal/common/_backend.py | 86 ++++++------- onedal/common/backend_manager.py | 13 ++ onedal/covariance/covariance.py | 13 +- onedal/covariance/incremental_covariance.py | 13 +- onedal/decomposition/incremental_pca.py | 13 +- onedal/decomposition/pca.py | 11 +- onedal/ensemble/forest.py | 84 ++++++------- .../linear_model/incremental_linear_model.py | 37 +++--- onedal/linear_model/linear_model.py | 16 ++- onedal/linear_model/logistic_regression.py | 40 ++++--- onedal/neighbors/neighbors.py | 113 ++++++++++-------- .../spmd/basic_statistics/basic_statistics.py | 4 +- .../incremental_basic_statistics.py | 4 +- onedal/spmd/cluster/dbscan.py | 2 +- onedal/spmd/cluster/kmeans.py | 6 +- onedal/spmd/covariance/covariance.py | 4 +- .../spmd/covariance/incremental_covariance.py | 2 +- onedal/spmd/decomposition/pca.py | 2 +- .../linear_model/incremental_linear_model.py | 2 +- onedal/spmd/linear_model/linear_model.py | 6 +- .../spmd/linear_model/logistic_regression.py | 4 +- onedal/spmd/neighbors/neighbors.py | 20 ++-- onedal/svm/svm.py | 22 ++-- 31 files changed, 375 insertions(+), 312 deletions(-) diff --git a/onedal/_device_offload.py b/onedal/_device_offload.py index b929223db0..a02536c7c3 100644 --- a/onedal/_device_offload.py +++ b/onedal/_device_offload.py @@ -37,9 +37,12 @@ class SyclQueue: def __init__(self, target=None): - if target and SyclQueueImplementation is not None: + if target and isinstance(target, SyclQueueImplementation): + self.implementation = target + elif target and SyclQueueImplementation is not None: self.implementation = SyclQueueImplementation(target) - self.implementation = object() + else: + self.implementation = None @property def sycl_device(self): @@ -55,26 +58,31 @@ class SyclQueueManager: @staticmethod def get_global_queue() -> Optional[SyclQueue]: """Get the global queue. Retrieve it from the config if not set.""" - if SyclQueueManager.__global_queue is not None: - return SyclQueueManager.__global_queue + if (queue := SyclQueueManager.__global_queue) is not None: + if not isinstance(queue, SyclQueue): + raise ValueError("Global queue is not a SyclQueue object.") + return queue target = _get_config()["target_offload"] if target == "auto": # queue will be created from the provided data to each function call - return None + return SyclQueue(None) if isinstance(target, (str, int)): q = SyclQueue(target) else: q = target - SyclQueueManager.__global_queue = q + SyclQueueManager.update_global_queue(q) return q @staticmethod def update_global_queue(queue): """Update the global queue.""" + if not isinstance(queue, SyclQueue): + # could be a device ID or selector string + queue = SyclQueue(queue) SyclQueueManager.__global_queue = queue @staticmethod @@ -100,16 +108,39 @@ def from_data(*data) -> Optional[SyclQueue]: SyclQueueManager.update_global_queue(data_queue) global_queue = data_queue - # global queue must coincide with data queue - if global_queue.sycl_device != data_queue.sycl_device: + # if the data item is on device, assert it's compatible with global queue + if ( + data_queue.sycl_device is not None + and data_queue.sycl_device != global_queue.sycl_device + ): raise ValueError( "Data objects are located on different target devices or not on selected device." ) - # after we went through the data, global queue is updated and verified + # after we went through the data, global queue is updated and verified (if any queue found) return SyclQueueManager.get_global_queue() +def supports_queue(func): + """ + Decorator that updates the global queue based on provided queue and global configuration. + If a `queue` keyword argument is provided in the decorated function, its value will be used globally. + If no queue is provided, the global queue will be updated from the provided data. + In either case, all data objects are verified to be on the same device (or on host). + """ + + @wraps(func) + def wrapper(self, *args, **kwargs): + if (queue := kwargs.get("queue", None)) is not None: + # update the global queue with what is provided + SyclQueueManager.update_global_queue(queue) + # find the queues in data using SyclQueueManager to verify that all data objects are on the same device + kwargs["queue"] = SyclQueueManager.from_data(*args) + return func(self, *args, **kwargs) + + return wrapper + + if dpnp_available: import dpnp diff --git a/onedal/basic_statistics/basic_statistics.py b/onedal/basic_statistics/basic_statistics.py index 7196b46877..6aed8ae8db 100644 --- a/onedal/basic_statistics/basic_statistics.py +++ b/onedal/basic_statistics/basic_statistics.py @@ -18,6 +18,8 @@ import numpy as np +from onedal._device_offload import supports_queue + from ..common._backend import bind_default_backend from ..datatypes import _convert_to_supported, from_table, to_table from ..utils import _is_csr @@ -31,7 +33,7 @@ def __init__(self, result_options, algorithm): self.algorithm = algorithm @bind_default_backend("basic_statistics") - def compute(self, params, data_table, weights_table, queue=None): ... + def compute(self, params, data_table, weights_table): ... @staticmethod def get_all_result_options(): @@ -73,6 +75,7 @@ class BasicStatistics(BaseBasicStatistics): def __init__(self, result_options="all", algorithm="by_default"): super().__init__(result_options, algorithm) + @supports_queue def fit(self, data, sample_weight=None, queue=None): is_csr = _is_csr(data) @@ -86,9 +89,7 @@ def fit(self, data, sample_weight=None, queue=None): data_table, weights_table = to_table(data, sample_weight) dtype = data.dtype - raw_result = self._compute_raw( - data_table, weights_table, dtype, is_csr, queue=queue - ) + raw_result = self._compute_raw(data_table, weights_table, dtype, is_csr) for opt, raw_value in raw_result.items(): value = from_table(raw_value).ravel() if is_single_dim: @@ -98,11 +99,9 @@ def fit(self, data, sample_weight=None, queue=None): return self - def _compute_raw( - self, data_table, weights_table, dtype=np.float32, is_csr=False, queue=None - ): + def _compute_raw(self, data_table, weights_table, dtype=np.float32, is_csr=False): params = self._get_onedal_params(is_csr, dtype) - result = self.compute(params, data_table, weights_table, queue=queue) + result = self.compute(params, data_table, weights_table) options = self._get_result_options(self.options).split("|") return {opt: getattr(result, opt) for opt in options} diff --git a/onedal/basic_statistics/incremental_basic_statistics.py b/onedal/basic_statistics/incremental_basic_statistics.py index 50453a86d6..fed4de7037 100644 --- a/onedal/basic_statistics/incremental_basic_statistics.py +++ b/onedal/basic_statistics/incremental_basic_statistics.py @@ -19,6 +19,7 @@ import numpy as np from daal4py.sklearn._utils import get_dtype +from onedal._device_offload import supports_queue from onedal.common._backend import bind_default_backend from ..datatypes import _convert_to_supported, from_table, to_table @@ -76,15 +77,16 @@ def __init__(self, result_options="all"): def partial_compute_result(self): ... @bind_default_backend("basic_statistics") - def partial_compute(self, *args, queue=None, **kwargs): ... + def partial_compute(self, *args, **kwargs): ... @bind_default_backend("basic_statistics") - def finalize_compute(self, *args, queue=None, **kwargs): ... + def finalize_compute(self, *args, **kwargs): ... def _reset(self): # get the _partial_result pointer from backend self._partial_result = self.partial_compute_result() + @supports_queue def partial_fit(self, X, weights=None, queue=None): """ Computes partial data for basic statistics @@ -123,9 +125,10 @@ def partial_fit(self, X, weights=None, queue=None): X_table, weights_table = to_table(X, weights) self._partial_result = self.partial_compute( - self._onedal_params, self._partial_result, X_table, weights_table, queue=queue + self._onedal_params, self._partial_result, X_table, weights_table ) + @supports_queue def finalize_fit(self, queue=None): """ Finalizes basic statistics computation and obtains result @@ -141,9 +144,7 @@ def finalize_fit(self, queue=None): self : object Returns the instance itself. """ - result = self.finalize_compute( - self._onedal_params, self._partial_result, queue=queue - ) + result = self.finalize_compute(self._onedal_params, self._partial_result) options = self._get_result_options(self.options).split("|") for opt in options: diff --git a/onedal/basic_statistics/tests/test_basic_statistics.py b/onedal/basic_statistics/tests/test_basic_statistics.py index c3886ecffa..a80ee2198d 100644 --- a/onedal/basic_statistics/tests/test_basic_statistics.py +++ b/onedal/basic_statistics/tests/test_basic_statistics.py @@ -54,7 +54,7 @@ def test_single_option_on_random_data( basicstat = BasicStatistics(result_options=result_option) - result = basicstat.fit(data, sample_weight=weights, queue=queue) + result = basicstat.fit(data, sample_weight=weights) res = getattr(result, result_option) if weighted: @@ -86,7 +86,7 @@ def test_multiple_options_on_random_data(queue, row_count, column_count, weighte basicstat = BasicStatistics(result_options=["mean", "max", "sum"]) - result = basicstat.fit(data, sample_weight=weights, queue=queue) + result = basicstat.fit(data, sample_weight=weights) res_mean, res_max, res_sum = result.mean, result.max, result.sum if weighted: @@ -127,7 +127,7 @@ def test_all_option_on_random_data(queue, row_count, column_count, weighted, dty basicstat = BasicStatistics(result_options="all") - result = basicstat.fit(data, sample_weight=weights, queue=queue) + result = basicstat.fit(data, sample_weight=weights) if weighted: weighted_data = np.diag(weights) @ data @@ -165,7 +165,7 @@ def test_1d_input_on_random_data(queue, result_option, data_size, weighted, dtyp basicstat = BasicStatistics(result_options=result_option) - result = basicstat.fit(data, sample_weight=weights, queue=queue) + result = basicstat.fit(data, sample_weight=weights) res = getattr(result, result_option) if weighted: @@ -196,7 +196,7 @@ def test_basic_csr(queue, dtype): ) basicstat = BasicStatistics(result_options="mean") - result = basicstat.fit(data, queue=queue) + result = basicstat.fit(data) res_mean = result.mean gtr_mean = data.mean(axis=0) @@ -229,7 +229,7 @@ def test_options_csr(queue, option, dtype): ) basicstat = BasicStatistics(result_options=result_option) - result = basicstat.fit(data, queue=queue) + result = basicstat.fit(data) res = getattr(result, result_option) func = getattr(data, function) diff --git a/onedal/basic_statistics/tests/test_incremental_basic_statistics.py b/onedal/basic_statistics/tests/test_incremental_basic_statistics.py index 4d18e42ce1..c5d33f5119 100644 --- a/onedal/basic_statistics/tests/test_incremental_basic_statistics.py +++ b/onedal/basic_statistics/tests/test_incremental_basic_statistics.py @@ -38,9 +38,9 @@ def test_multiple_options_on_gold_data(queue, weighted, dtype): incbs = IncrementalBasicStatistics() for i in range(2): if weighted: - incbs.partial_fit(X_split[i], weights_split[i], queue=queue) + incbs.partial_fit(X_split[i], weights_split[i]) else: - incbs.partial_fit(X_split[i], queue=queue) + incbs.partial_fit(X_split[i]) result = incbs.finalize_fit() @@ -85,9 +85,9 @@ def test_single_option_on_random_data( for i in range(num_batches): if weighted: - incbs.partial_fit(data_split[i], weights_split[i], queue=queue) + incbs.partial_fit(data_split[i], weights_split[i]) else: - incbs.partial_fit(data_split[i], queue=queue) + incbs.partial_fit(data_split[i]) result = incbs.finalize_fit() res = getattr(result, result_option) @@ -123,9 +123,9 @@ def test_multiple_options_on_random_data( for i in range(num_batches): if weighted: - incbs.partial_fit(data_split[i], weights_split[i], queue=queue) + incbs.partial_fit(data_split[i], weights_split[i]) else: - incbs.partial_fit(data_split[i], queue=queue) + incbs.partial_fit(data_split[i]) result = incbs.finalize_fit() res_mean, res_max, res_sum = result.mean, result.max, result.sum @@ -171,9 +171,9 @@ def test_all_option_on_random_data( for i in range(num_batches): if weighted: - incbs.partial_fit(data_split[i], weights_split[i], queue=queue) + incbs.partial_fit(data_split[i], weights_split[i]) else: - incbs.partial_fit(data_split[i], queue=queue) + incbs.partial_fit(data_split[i]) result = incbs.finalize_fit() if weighted: diff --git a/onedal/cluster/dbscan.py b/onedal/cluster/dbscan.py index 1ef8539c8c..af920f50f5 100644 --- a/onedal/cluster/dbscan.py +++ b/onedal/cluster/dbscan.py @@ -19,6 +19,7 @@ import numpy as np from daal4py.sklearn._utils import get_dtype, make2d +from onedal._device_offload import supports_queue from onedal.common._backend import bind_default_backend from ..common._mixin import ClusterMixin @@ -49,7 +50,7 @@ def __init__( self.n_jobs = n_jobs @bind_default_backend("dbscan.clustering") - def compute(self, params, data_table, weights_table, queue=None): ... + def compute(self, params, data_table, weights_table): ... def _get_onedal_params(self, dtype=np.float32): return { @@ -61,6 +62,7 @@ def _get_onedal_params(self, dtype=np.float32): "result_options": "core_observation_indices|responses", } + @supports_queue def fit(self, X, y=None, sample_weight=None, queue=None): X = _check_array(X, accept_sparse="csr", dtype=[np.float64, np.float32]) sample_weight = make2d(sample_weight) if sample_weight is not None else None @@ -72,7 +74,7 @@ def fit(self, X, y=None, sample_weight=None, queue=None): X = _convert_to_supported(X) dtype = get_dtype(X) params = self._get_onedal_params(dtype) - result = self.compute(params, to_table(X), to_table(sample_weight), queue=queue) + result = self.compute(params, to_table(X), to_table(sample_weight)) self.labels_ = from_table(result.responses).ravel() if result.core_observation_indices is not None: diff --git a/onedal/cluster/kmeans.py b/onedal/cluster/kmeans.py index d8b069ce17..b6b77f092b 100644 --- a/onedal/cluster/kmeans.py +++ b/onedal/cluster/kmeans.py @@ -22,6 +22,7 @@ import numpy as np from daal4py.sklearn._utils import daal_check_version, get_dtype +from onedal._device_offload import supports_queue from onedal.basic_statistics import BasicStatistics from onedal.common._backend import bind_default_backend @@ -63,13 +64,13 @@ def __init__( self.n_local_trials = n_local_trials @bind_default_backend("kmeans_common") - def _is_same_clustering(self, labels, best_labels, n_clusters, queue=None): ... + def _is_same_clustering(self, labels, best_labels, n_clusters): ... @bind_default_backend("kmeans.clustering") - def train(self, params, X_table, centroids_table, queue=None): ... + def train(self, params, X_table, centroids_table): ... @bind_default_backend("kmeans.clustering") - def infer(self, params, model, centroids_table, queue=None): ... + def infer(self, params, model, centroids_table): ... def _validate_center_shape(self, X, centers): """Check if centers is compatible with X and n_clusters.""" @@ -91,7 +92,7 @@ def _get_kmeans_init(self, cluster_count, seed, algorithm): def _get_basic_statistics_backend(self, result_options): return BasicStatistics(result_options) - def _tolerance(self, X_table, rtol, is_csr, dtype, queue=None): + def _tolerance(self, X_table, rtol, is_csr, dtype): """Compute absolute tolerance from the relative tolerance""" if rtol == 0.0: return rtol @@ -99,13 +100,13 @@ def _tolerance(self, X_table, rtol, is_csr, dtype, queue=None): bs = self._get_basic_statistics_backend("variance") - res = bs._compute_raw(X_table, dummy, dtype, is_csr, queue=queue) + res = bs._compute_raw(X_table, dummy, dtype, is_csr) mean_var = from_table(res["variance"]).mean() return mean_var * rtol def _check_params_vs_input( - self, X_table, is_csr, default_n_init=10, dtype=np.float32, queue=None + self, X_table, is_csr, default_n_init=10, dtype=np.float32 ): # n_clusters if X_table.shape[0] < self.n_clusters: @@ -114,7 +115,7 @@ def _check_params_vs_input( ) # tol - self._tol = self._tolerance(X_table, self.tol, is_csr, dtype, queue=queue) + self._tol = self._tolerance(X_table, self.tol, is_csr, dtype) # n-init # TODO(1.4): Remove @@ -173,7 +174,6 @@ def _init_centroids_onedal( is_csr, dtype=np.float32, n_centroids=None, - queue=None, ): n_clusters = self.n_clusters if n_centroids is None else n_centroids @@ -182,13 +182,13 @@ def _init_centroids_onedal( alg = self._get_kmeans_init( cluster_count=n_clusters, seed=random_seed, algorithm=algorithm ) - centers_table = alg.compute_raw(X_table, dtype, queue=queue) + centers_table = alg.compute_raw(X_table, dtype) elif isinstance(init, str) and init == "random": algorithm = "random_dense" if not is_csr else "random_csr" alg = self._get_kmeans_init( cluster_count=n_clusters, seed=random_seed, algorithm=algorithm ) - centers_table = alg.compute_raw(X_table, dtype, queue=queue) + centers_table = alg.compute_raw(X_table, dtype) elif _is_arraylike_not_scalar(init): if _is_csr(init): # oneDAL KMeans only supports Dense Centroids @@ -235,15 +235,13 @@ def _init_centroids_sklearn(self, X, init, random_state, dtype=np.float32): centers = _convert_to_supported(centers) return to_table(centers) - def _fit_backend( - self, X_table, centroids_table, dtype=np.float32, is_csr=False, queue=None - ): + def _fit_backend(self, X_table, centroids_table, dtype=np.float32, is_csr=False): params = self._get_onedal_params(is_csr, dtype) meta = _default_backend.get_table_metadata(X_table) assert meta.get_npy_dtype(0) == dtype - result = self.train(params, X_table, centroids_table, queue=queue) + result = self.train(params, X_table, centroids_table) return ( result.responses, @@ -252,7 +250,7 @@ def _fit_backend( result.iteration_count, ) - def _fit(self, X, queue=None): + def _fit(self, X): is_csr = _is_csr(X) X = _check_array( X, dtype=[np.float64, np.float32], accept_sparse="csr", force_all_finite=False @@ -261,7 +259,7 @@ def _fit(self, X, queue=None): dtype = get_dtype(X) X_table = to_table(X) - self._check_params_vs_input(X_table, is_csr, dtype=dtype, queue=queue) + self._check_params_vs_input(X_table, is_csr, dtype=dtype) self.n_features_in_ = X_table.column_count @@ -293,7 +291,7 @@ def is_better_iteration(inertia, labels): if use_onedal_init: random_seed = random_state.randint(np.iinfo("i").max) centroids_table = self._init_centroids_onedal( - X_table, init, random_seed, is_csr, dtype=dtype, queue=queue + X_table, init, random_seed, is_csr, dtype=dtype ) else: centroids_table = self._init_centroids_sklearn( @@ -304,7 +302,7 @@ def is_better_iteration(inertia, labels): print("Initialization complete") labels, inertia, model, n_iter = self._fit_backend( - X_table, centroids_table, dtype, is_csr, queue=queue + X_table, centroids_table, dtype, is_csr ) if self.verbose: @@ -363,14 +361,14 @@ def cluster_centers_(self, cluster_centers): def cluster_centers_(self): del self._cluster_centers_ - def _predict(self, X, queue=None, result_options=None): + def _predict(self, X, result_options=None): is_csr = _is_csr(X) X = _convert_to_supported(X) X_table = to_table(X) params = self._get_onedal_params(is_csr, X.dtype, result_options) - result = self.infer(params, self.model_, X_table, queue=queue) + result = self.infer(params, self.model_, X_table) if result_options == "compute_exact_objective_function": # This is only set for score function @@ -378,12 +376,11 @@ def _predict(self, X, queue=None, result_options=None): else: return from_table(result.responses).ravel() - def _score(self, X, queue=None): + def _score(self, X): result_options = "compute_exact_objective_function" return self._predict( X, - queue, result_options, ) @@ -419,9 +416,11 @@ def __init__( self.algorithm = algorithm assert self.algorithm == "lloyd" + @supports_queue def fit(self, X, y=None, queue=None): - return self._fit(X, queue) + return self._fit(X) + @supports_queue def predict(self, X, queue=None): """Predict the closest cluster each sample in X belongs to. @@ -439,7 +438,7 @@ def predict(self, X, queue=None): labels : ndarray of shape (n_samples,) Index of the cluster each sample belongs to. """ - return self._predict(X, queue) + return self._predict(X) def fit_predict(self, X, y=None, queue=None): """Compute cluster centers and predict cluster index for each sample. @@ -502,6 +501,7 @@ def transform(self, X): return self._transform(X) + @supports_queue def score(self, X, queue=None): """Opposite of the value of X on the K-means objective. @@ -515,7 +515,7 @@ def score(self, X, queue=None): score: float Opposite of the value of X on the K-means objective. """ - return self._score(X, queue) + return self._score(X) def k_means( diff --git a/onedal/cluster/kmeans_init.py b/onedal/cluster/kmeans_init.py index 5c6efbd3ad..e9db6855f6 100755 --- a/onedal/cluster/kmeans_init.py +++ b/onedal/cluster/kmeans_init.py @@ -19,6 +19,7 @@ from sklearn.utils import check_random_state from daal4py.sklearn._utils import daal_check_version, get_dtype +from onedal._device_offload import supports_queue from onedal.common._backend import bind_default_backend from ..datatypes import _convert_to_supported, from_table, to_table @@ -49,7 +50,7 @@ def __init__( self.local_trials_count = local_trials_count @bind_default_backend("kmeans_init.init", lookup_name="compute") - def backend_compute(self, params, X_table, queue=None): ... + def backend_compute(self, params, X_table): ... def _get_onedal_params(self, dtype=np.float32): return { @@ -74,16 +75,18 @@ def _get_params_and_input(self, X): params = self._get_onedal_params(dtype) return (params, to_table(X), dtype) + @supports_queue def compute(self, X, queue=None): _, X_table, dtype = self._get_params_and_input(X) - centroids = self.compute_raw(X_table, dtype, queue=queue) + centroids = self.compute_raw(X_table, dtype) return from_table(centroids) + @supports_queue def compute_raw(self, X_table, dtype=np.float32, queue=None): params = self._get_onedal_params(dtype) - result = self.backend_compute(params, X_table, queue=queue) + result = self.backend_compute(params, X_table) return result.centroids def kmeans_plusplus( diff --git a/onedal/common/_backend.py b/onedal/common/_backend.py index df9b0667b7..f67c02863a 100644 --- a/onedal/common/_backend.py +++ b/onedal/common/_backend.py @@ -15,13 +15,10 @@ # ============================================================================== import logging -from contextlib import contextmanager -from types import MethodType from typing import Any, Callable, Literal, Optional from onedal import Backend, _default_backend, _spmd_backend from onedal._device_offload import SyclQueueManager -from onedal.common.policy_manager import PolicyManager from .backend_manager import BackendManager @@ -52,15 +49,14 @@ def __init__( self.name = name self.backend = backend - def __call__(self, *args: Any, queue=None, **kwargs: Any) -> Any: - """Dispatch to backend function with the appropriate policy which is determined from the provided or global queue""" - if not args: - # immediate dispatching without args, i.e. without data - return self.method(**kwargs) + def __call__(self, *args: Any, **kwargs: Any) -> Any: + """Dispatch to backend function with the appropriate policy which is determined from the global queue""" + if not args and not kwargs: + # immediate dispatching without any arguments, in particular no policy + return self.method() - if queue is None: - # use globally configured queue (from `target_offload` configuration or provided data) - queue = SyclQueueManager.get_global_queue() + # use globally configured queue (from `target_offload` configuration or provided data) + queue = getattr(SyclQueueManager.get_global_queue(), "implementation", None) if queue is not None and not (self.backend.is_dpc or self.backend.is_spmd): raise RuntimeError("Operations using queues require the DPC/SPMD backend") @@ -76,39 +72,57 @@ def __call__(self, *args: Any, queue=None, **kwargs: Any) -> Any: policy = self.backend.host_policy() # dispatch to backend function - return self.method(policy, *args, **kwargs) + try: + return self.method(policy, *args, **kwargs) + except: + raise RuntimeError( + f"Error in dispatching to backend function for device {policy.get_device_id()} ({policy.get_device_name()})" + ) def __repr__(self) -> str: return f"BackendFunction({self.backend}.{self.name})" +def __decorator( + method: Callable[..., Any], + backend_manager: BackendManager, + module_name: str, + lookup_name: Optional[str], +) -> Callable[..., Any]: + """Decorator to bind a method to the specified backend""" + if lookup_name is None: + lookup_name = method.__name__ + + if backend_manager.get_backend_type() == "none": + raise RuntimeError("Internal __decorator() should not be called with no backend") + + backend_method = backend_manager.get_backend_component(module_name, lookup_name) + wrapped_method = BackendFunction( + backend_method, + backend_manager.backend, + name=f"{module_name}.{method.__name__}", + ) + + backend_type = backend_manager.get_backend_type() + logger.debug( + f"Assigned method '<{backend_type}_backend>.{module_name}.{lookup_name}' to '{method.__qualname__}'" + ) + + return wrapped_method + + def bind_default_backend(module_name: str, lookup_name: Optional[str] = None): def decorator(method: Callable[..., Any]): # grab the lookup_name from outer scope nonlocal lookup_name - if lookup_name is None: - lookup_name = method.__name__ - if _default_backend is None: logger.debug( f"Default backend unavailable, skipping decoration for '{method.__name__}'" ) return method - backend_method = default_manager.get_backend_component(module_name, lookup_name) - wrapped_method = BackendFunction( - backend_method, - _default_backend, - name=f"{module_name}.{method.__name__}", - ) - - backend_name = "dpc" if _default_backend.is_dpc else "host" - logger.debug( - f"Assigned method '<{backend_name}_backend>.{module_name}.{lookup_name}' to '{method.__qualname__}'" - ) - - return wrapped_method + return __decorator(method, default_manager, module_name, lookup_name) return decorator @@ -118,26 +132,12 @@ def decorator(method: Callable[..., Any]): # grab the lookup_name from outer scope nonlocal lookup_name - if lookup_name is None: - lookup_name = method.__name__ - if _spmd_backend is None: logger.debug( f"SPMD backend unavailable, skipping decoration for '{method.__name__}'" ) return method - backend_method = spmd_manager.get_backend_component(module_name, lookup_name) - wrapped_method = BackendFunction( - backend_method, - _spmd_backend, - name=f"{module_name}.{method.__name__}", - ) - - logger.debug( - f"Assigned method '.{module_name}.{lookup_name}' to '{method.__qualname__}' " - ) - - return wrapped_method + __decorator(method, spmd_manager, module_name, lookup_name) return decorator diff --git a/onedal/common/backend_manager.py b/onedal/common/backend_manager.py index 1bb8e5ec9a..ba6da992c5 100644 --- a/onedal/common/backend_manager.py +++ b/onedal/common/backend_manager.py @@ -14,11 +14,24 @@ # limitations under the License. # ============================================================================== +from typing import Literal + +BackendType = Literal["none", "host", "dpc", "spmd"] + class BackendManager: def __init__(self, backend_module): self.backend = backend_module + def get_backend_type(self) -> BackendType: + if self.backend is None: + return "none" + if self.backend.is_spmd: + return "spmd" + if self.backend.is_dpc: + return "dpc" + return "host" + def get_backend_component(self, module_name: str, component_name: str): """Get a component of the backend module. diff --git a/onedal/covariance/covariance.py b/onedal/covariance/covariance.py index 9935ed2752..0e1593e325 100644 --- a/onedal/covariance/covariance.py +++ b/onedal/covariance/covariance.py @@ -18,6 +18,7 @@ import numpy as np from daal4py.sklearn._utils import daal_check_version, get_dtype +from onedal._device_offload import supports_queue from onedal.common._backend import bind_default_backend from onedal.utils import _check_array @@ -32,7 +33,7 @@ def __init__(self, method="dense", bias=False, assume_centered=False): self.assume_centered = assume_centered @bind_default_backend("covariance") - def compute(self, *args, queue=None, **kwargs): ... + def compute(self, *args, **kwargs): ... def _get_onedal_params(self, dtype=np.float32): params = { @@ -76,6 +77,7 @@ class EmpiricalCovariance(BaseEmpiricalCovariance): Estimated covariance matrix """ + @supports_queue def fit(self, X, y=None, queue=None): """Fit the sample covariance matrix of X. @@ -102,14 +104,9 @@ def fit(self, X, y=None, queue=None): params = self._get_onedal_params(dtype) hparams = get_hyperparameters("covariance", "compute") if hparams is not None and not hparams.is_default: - result = self.compute( - params, - hparams.backend, - to_table(X), - queue=queue, - ) + result = self.compute(params, hparams.backend, to_table(X)) else: - result = self.compute(params, to_table(X), queue=queue) + result = self.compute(params, to_table(X)) if daal_check_version((2024, "P", 1)) or (not self.bias): self.covariance_ = from_table(result.cov_matrix) else: diff --git a/onedal/covariance/incremental_covariance.py b/onedal/covariance/incremental_covariance.py index 4ffe73cea4..8dfa72607b 100644 --- a/onedal/covariance/incremental_covariance.py +++ b/onedal/covariance/incremental_covariance.py @@ -18,6 +18,7 @@ import numpy as np from daal4py.sklearn._utils import daal_check_version, get_dtype +from onedal._device_offload import supports_queue from onedal.common._backend import bind_default_backend from ..datatypes import _convert_to_supported, from_table, to_table @@ -60,17 +61,18 @@ def __init__(self, method="dense", bias=False, assume_centered=False): self._reset() @bind_default_backend("covariance") - def partial_compute(self, params, partial_result, X_table, queue=None): ... + def partial_compute(self, params, partial_result, X_table): ... @bind_default_backend("covariance") def partial_compute_result(self): ... @bind_default_backend("covariance") - def finalize_compute(self, params, partial_result, queue=None): ... + def finalize_compute(self, params, partial_result): ... def _reset(self): self._partial_result = self.partial_compute_result() + @supports_queue def partial_fit(self, X, y=None, queue=None): """ Computes partial data for the covariance matrix @@ -102,10 +104,9 @@ def partial_fit(self, X, y=None, queue=None): params = self._get_onedal_params(self._dtype) table_X = to_table(X) - self._partial_result = self.partial_compute( - params, self._partial_result, table_X, queue=queue - ) + self._partial_result = self.partial_compute(params, self._partial_result, table_X) + @supports_queue def finalize_fit(self, queue=None): """ Finalizes covariance matrix and obtains `covariance_` and `location_` @@ -123,7 +124,7 @@ def finalize_fit(self, queue=None): """ params = self._get_onedal_params(self._dtype) - result = self.finalize_compute(params, self._partial_result, queue=queue) + result = self.finalize_compute(params, self._partial_result) if daal_check_version((2024, "P", 1)) or (not self.bias): self.covariance_ = from_table(result.cov_matrix) else: diff --git a/onedal/decomposition/incremental_pca.py b/onedal/decomposition/incremental_pca.py index b4b0d9c670..510d343ff9 100644 --- a/onedal/decomposition/incremental_pca.py +++ b/onedal/decomposition/incremental_pca.py @@ -19,6 +19,7 @@ import numpy as np from daal4py.sklearn._utils import get_dtype +from onedal._device_offload import supports_queue from onedal.common._backend import bind_default_backend from ..datatypes import _convert_to_supported, from_table, to_table @@ -102,10 +103,10 @@ def __init__( self._reset() @bind_default_backend("decomposition.dim_reduction") - def finalize_train(self, params, partial_result, queue=None): ... + def finalize_train(self, params, partial_result): ... @bind_default_backend("decomposition.dim_reduction") - def partial_train(self, params, partial_result, X_table, queue=None): ... + def partial_train(self, params, partial_result, X_table): ... @bind_default_backend("decomposition.dim_reduction") def partial_train_result(self): ... @@ -115,7 +116,8 @@ def _reset(self): if hasattr(self, "components_"): del self.components_ - def partial_fit(self, X, queue): + @supports_queue + def partial_fit(self, X, queue=None): """Incremental fit with X. All of X is processed as a single batch. Parameters @@ -159,10 +161,11 @@ def partial_fit(self, X, queue): X_table = to_table(X) self._partial_result = self.partial_train( - self._params, self._partial_result, X_table, queue=queue + self._params, self._partial_result, X_table ) return self + @supports_queue def finalize_fit(self, queue=None): """ Finalizes principal components computation and obtains resulting @@ -178,7 +181,7 @@ def finalize_fit(self, queue=None): self : object Returns the instance itself. """ - result = self.finalize_train(self._params, self._partial_result, queue=queue) + result = self.finalize_train(self._params, self._partial_result) self.mean_ = from_table(result.means).ravel() self.var_ = from_table(result.variances).ravel() self.components_ = from_table(result.eigenvectors) diff --git a/onedal/decomposition/pca.py b/onedal/decomposition/pca.py index b296ee8508..d5e72c866e 100644 --- a/onedal/decomposition/pca.py +++ b/onedal/decomposition/pca.py @@ -21,6 +21,7 @@ from sklearn.decomposition._pca import _infer_dimension from sklearn.utils.extmath import stable_cumsum +from onedal._device_offload import supports_queue from onedal.common._backend import bind_default_backend from ..datatypes import _convert_to_supported, from_table, to_table @@ -48,10 +49,10 @@ def __init__( def model(self): ... @bind_default_backend("decomposition.dim_reduction") - def train(self, params, X, queue=None): ... + def train(self, params, X): ... @bind_default_backend("decomposition.dim_reduction") - def infer(self, params, X, model, queue=None): ... + def infer(self, params, X, model): ... def _get_onedal_params(self, data, stage=None): if stage is None: @@ -138,17 +139,19 @@ def _create_model(self): self._onedal_model = m return m + @supports_queue def predict(self, X, queue=None): model = self._create_model() X = _convert_to_supported(X) params = self._get_onedal_params(X, stage="predict") - result = self.infer(params, model, to_table(X), queue=queue) + result = self.infer(params, model, to_table(X)) return from_table(result.transformed_data) class PCA(BasePCA): + @supports_queue def fit(self, X, y=None, queue=None): n_samples, n_features = X.shape n_sf_min = min(n_samples, n_features) @@ -161,7 +164,7 @@ def fit(self, X, y=None, queue=None): X = _convert_to_supported(X) params = self._get_onedal_params(X) - result = self.train(params, to_table(X), queue=queue) + result = self.train(params, to_table(X)) self.mean_ = from_table(result.means).ravel() self.variances_ = from_table(result.variances) diff --git a/onedal/ensemble/forest.py b/onedal/ensemble/forest.py index 8a2fedf564..b32188d136 100644 --- a/onedal/ensemble/forest.py +++ b/onedal/ensemble/forest.py @@ -24,6 +24,7 @@ from sklearn.utils import check_random_state from daal4py.sklearn._utils import daal_check_version +from onedal._device_offload import supports_queue from onedal.common._backend import bind_default_backend from sklearnex import get_hyperparameters @@ -97,10 +98,10 @@ def __init__( self.algorithm = algorithm @abstractmethod - def train(self, *args, queue=None, **kwargs): ... + def train(self, *args, **kwargs): ... @abstractmethod - def infer(self, *args, queue=None, **kwargs): ... + def infer(self, *args, **kwargs): ... def _to_absolute_max_features(self, n_features): if self.max_features is None: @@ -294,7 +295,7 @@ def _get_sample_weight(self, sample_weight, X): return sample_weight - def _fit(self, X, y, sample_weight, queue): + def _fit(self, X, y, sample_weight): X, y = _check_X_y( X, y, @@ -313,7 +314,7 @@ def _fit(self, X, y, sample_weight, queue): data = (X, y) data = _convert_to_supported(*data) params = self._get_onedal_params(data[0]) - train_result = self.train(params, *to_table(*data), queue=queue) + train_result = self.train(params, *to_table(*data)) self._onedal_model = train_result.model @@ -350,7 +351,7 @@ def _create_model(self, module): # upate error msg. raise NotImplementedError("Creating model is not supported.") - def _predict(self, X, queue, hparams=None): + def _predict(self, X, hparams=None): _check_is_fitted(self) X = _check_array( X, dtype=[np.float64, np.float32], force_all_finite=True, accept_sparse=False @@ -361,14 +362,14 @@ def _predict(self, X, queue, hparams=None): X = _convert_to_supported(X) params = self._get_onedal_params(X) if hparams is not None and not hparams.is_default: - result = self.infer(params, hparams.backend, model, to_table(X), queue=queue) + result = self.infer(params, hparams.backend, model, to_table(X)) else: - result = self.infer(params, model, to_table(X), queue=queue) + result = self.infer(params, model, to_table(X)) y = from_table(result.responses) return y - def _predict_proba(self, X, queue, hparams=None): + def _predict_proba(self, X, hparams=None): _check_is_fitted(self) X = _check_array( X, dtype=[np.float64, np.float32], force_all_finite=True, accept_sparse=False @@ -380,9 +381,9 @@ def _predict_proba(self, X, queue, hparams=None): model = self._onedal_model if hparams is not None and not hparams.is_default: - result = self.infer(params, hparams.backend, model, to_table(X), queue=queue) + result = self.infer(params, hparams.backend, model, to_table(X)) else: - result = self.infer(params, model, to_table(X), queue=queue) + result = self.infer(params, model, to_table(X)) y = from_table(result.probabilities) return y @@ -447,10 +448,10 @@ def __init__( ) @bind_default_backend("decision_forest.classification") - def train(self, *args, queue=None, **kwargs): ... + def train(self, *args, **kwargs): ... @bind_default_backend("decision_forest.classification") - def infer(self, *args, queue=None, **kwargs): ... + def infer(self, *args, **kwargs): ... def _validate_targets(self, y, dtype): y, self.class_weight_, self.classes_ = _validate_targets( @@ -464,19 +465,22 @@ def _validate_targets(self, y, dtype): # self.n_classes_ = self.classes_ return y + @supports_queue def fit(self, X, y, sample_weight=None, queue=None): - return self._fit(X, y, sample_weight, queue) + return self._fit(X, y, sample_weight) + @supports_queue def predict(self, X, queue=None): hparams = get_hyperparameters("decision_forest", "infer") - pred = self._predict(X, queue, hparams) + pred = self._predict(X, hparams) return np.take(self.classes_, pred.ravel().astype(np.int64, casting="unsafe")) + @supports_queue def predict_proba(self, X, queue=None): hparams = get_hyperparameters("decision_forest", "infer") - return super()._predict_proba(X, queue, hparams) + return super()._predict_proba(X, hparams) class RandomForestRegressor(RegressorMixin, BaseForest, metaclass=ABCMeta): @@ -538,18 +542,20 @@ def __init__( ) @bind_default_backend("decision_forest.regression") - def train(self, *args, queue=None, **kwargs): ... + def train(self, *args, **kwargs): ... @bind_default_backend("decision_forest.regression") - def infer(self, *args, queue=None, **kwargs): ... + def infer(self, *args, **kwargs): ... + @supports_queue def fit(self, X, y, sample_weight=None, queue=None): if sample_weight is not None: if hasattr(sample_weight, "__array__"): sample_weight[sample_weight == 0.0] = 1.0 sample_weight = [sample_weight] - return self._fit(X, y, sample_weight, queue) + return self._fit(X, y, sample_weight) + @supports_queue def predict(self, X, queue=None): return self._predict(X, queue).ravel() @@ -613,10 +619,10 @@ def __init__( ) @bind_default_backend("decision_forest.classification") - def train(self, *args, queue=None, **kwargs): ... + def train(self, *args, **kwargs): ... @bind_default_backend("decision_forest.classification") - def infer(self, *args, queue=None, **kwargs): ... + def infer(self, *args, **kwargs): ... def _validate_targets(self, y, dtype): y, self.class_weight_, self.classes_ = _validate_targets( @@ -630,27 +636,19 @@ def _validate_targets(self, y, dtype): # self.n_classes_ = self.classes_ return y + @supports_queue def fit(self, X, y, sample_weight=None, queue=None): - return self._fit( - X, - y, - sample_weight, - queue, - ) + return self._fit(X, y, sample_weight) + @supports_queue def predict(self, X, queue=None): - pred = self._predict( - X, - queue, - ) + pred = self._predict(X) return np.take(self.classes_, pred.ravel().astype(np.int64, casting="unsafe")) + @supports_queue def predict_proba(self, X, queue=None): - return super()._predict_proba( - X, - queue, - ) + return super()._predict_proba(X) class ExtraTreesRegressor(RegressorMixin, BaseForest, metaclass=ABCMeta): @@ -712,25 +710,19 @@ def __init__( ) @bind_default_backend("decision_forest.regression") - def train(self, *args, queue=None, **kwargs): ... + def train(self, *args, **kwargs): ... @bind_default_backend("decision_forest.regression") - def infer(self, *args, queue=None, **kwargs): ... + def infer(self, *args, **kwargs): ... + @supports_queue def fit(self, X, y, sample_weight=None, queue=None): if sample_weight is not None: if hasattr(sample_weight, "__array__"): sample_weight[sample_weight == 0.0] = 1.0 sample_weight = [sample_weight] - return self._fit( - X, - y, - sample_weight, - queue, - ) + return self._fit(X, y, sample_weight) + @supports_queue def predict(self, X, queue=None): - return self._predict( - X, - queue, - ).ravel() + return self._predict(X).ravel() diff --git a/onedal/linear_model/incremental_linear_model.py b/onedal/linear_model/incremental_linear_model.py index cfe4428788..ba32687581 100644 --- a/onedal/linear_model/incremental_linear_model.py +++ b/onedal/linear_model/incremental_linear_model.py @@ -19,6 +19,7 @@ import numpy as np from daal4py.sklearn._utils import get_dtype +from onedal._device_offload import supports_queue from onedal.common._backend import bind_default_backend from ..common.hyperparameters import get_hyperparameters @@ -53,15 +54,16 @@ def __init__(self, fit_intercept=True, copy_X=False, algorithm="norm_eq"): def partial_train_result(self): ... @bind_default_backend("linear_model.regression") - def partial_train(self, *args, queue=None, **kwargs): ... + def partial_train(self, *args, **kwargs): ... @bind_default_backend("linear_model.regression") - def finalize_train(self, *args, queue=None, **kwargs): ... + def finalize_train(self, *args, **kwargs): ... def _reset(self): # Get the pointer to partial_result from backend self._partial_result = self.partial_train_result() + @supports_queue def partial_fit(self, X, y, queue=None): """ Computes partial data for linear regression @@ -100,18 +102,14 @@ def partial_fit(self, X, y, queue=None): hparams = get_hyperparameters("linear_regression", "train") if hparams is not None and not hparams.is_default: self._partial_result = self.partial_train( - self._params, - hparams.backend, - self._partial_result, - X_table, - y_table, - queue=queue, + self._params, hparams.backend, self._partial_result, X_table, y_table ) else: self._partial_result = self.partial_train( - self._params, self._partial_result, X_table, y_table, queue=queue + self._params, self._partial_result, X_table, y_table ) + @supports_queue def finalize_fit(self, queue=None): """ Finalizes linear regression computation and obtains coefficients @@ -131,10 +129,10 @@ def finalize_fit(self, queue=None): hparams = get_hyperparameters("linear_regression", "train") if hparams is not None and not hparams.is_default: result = self.finalize_train( - self._params, hparams.backend, self._partial_result, queue=queue + self._params, hparams.backend, self._partial_result ) else: - result = self.finalize_train(self._params, self._partial_result, queue=queue) + result = self.finalize_train(self._params, self._partial_result) self._onedal_model = result.model @@ -183,11 +181,12 @@ def _reset(self): def partial_train_result(self): ... @bind_default_backend("linear_model.regression") - def partial_train(self, *args, queue=None, **kwargs): ... + def partial_train(self, *args, **kwargs): ... @bind_default_backend("linear_model.regression") - def finalize_train(self, *args, queue=None, **kwargs): ... + def finalize_train(self, *args, **kwargs): ... + @supports_queue def partial_fit(self, X, y, queue=None): """ Computes partial data for ridge regression @@ -226,18 +225,14 @@ def partial_fit(self, X, y, queue=None): hparams = get_hyperparameters("linear_regression", "train") if hparams is not None and not hparams.is_default: self._partial_result = self.partial_train( - self._params, - hparams.backend, - self._partial_result, - X_table, - y_table, - queue=queue, + self._params, hparams.backend, self._partial_result, X_table, y_table ) else: self._partial_result = self.partial_train( - self._params, self._partial_result, X_table, y_table, queue=queue + self._params, self._partial_result, X_table, y_table ) + @supports_queue def finalize_fit(self, queue=None): """ Finalizes ridge regression computation and obtains coefficients @@ -253,7 +248,7 @@ def finalize_fit(self, queue=None): self : object Returns the instance itself. """ - result = self.finalize_train(self._params, self._partial_result, queue=queue) + result = self.finalize_train(self._params, self._partial_result) self._onedal_model = result.model diff --git a/onedal/linear_model/linear_model.py b/onedal/linear_model/linear_model.py index 68f7011406..e59279581e 100755 --- a/onedal/linear_model/linear_model.py +++ b/onedal/linear_model/linear_model.py @@ -20,6 +20,7 @@ import numpy as np from daal4py.sklearn._utils import daal_check_version, get_dtype, make2d +from onedal._device_offload import supports_queue from onedal.common._backend import bind_default_backend from ..common._estimator_checks import _check_is_fitted @@ -41,10 +42,10 @@ def __init__(self, fit_intercept, copy_X, algorithm, alpha=0.0): self.algorithm = algorithm @bind_default_backend("linear_model.regression") - def train(self, *args, queue=None, **kwargs): ... + def train(self, *args, **kwargs): ... @bind_default_backend("linear_model.regression") - def infer(self, params, model, X, queue=None): ... + def infer(self, params, model, X): ... # direct access to the backend model class @bind_default_backend("linear_model.regression") @@ -108,6 +109,7 @@ def _create_model(self): return model + @supports_queue def predict(self, X, queue=None): """ Predict using the linear model. @@ -142,7 +144,7 @@ def predict(self, X, queue=None): params = self._get_onedal_params(get_dtype(X)) X_table = to_table(X) - result = self.infer(params, model, X_table, queue=queue) + result = self.infer(params, model, X_table) y = from_table(result.responses) if y.shape[1] == 1 and self.coef_.ndim == 1: @@ -179,6 +181,7 @@ def __init__( ): super().__init__(fit_intercept=fit_intercept, copy_X=copy_X, algorithm=algorithm) + @supports_queue def fit(self, X, y, queue=None): """ Fit linear model. @@ -220,9 +223,9 @@ def fit(self, X, y, queue=None): hparams = get_hyperparameters("linear_regression", "train") if hparams is not None and not hparams.is_default: - result = self.train(params, hparams.backend, X_table, y_table, queue=queue) + result = self.train(params, hparams.backend, X_table, y_table) else: - result = self.train(params, X_table, y_table, queue=queue) + result = self.train(params, X_table, y_table) self._onedal_model = result.model @@ -275,6 +278,7 @@ def __init__( fit_intercept=fit_intercept, alpha=alpha, copy_X=copy_X, algorithm=algorithm ) + @supports_queue def fit(self, X, y, queue=None): """ Fit linear model. @@ -312,7 +316,7 @@ def fit(self, X, y, queue=None): params = self._get_onedal_params(get_dtype(X)) X_table, y_table = to_table(X, y) - result = self.train(params, X_table, y_table, queue=queue) + result = self.train(params, X_table, y_table) self._onedal_model = result.model packed_coefficients = from_table(result.model.packed_coefficients) diff --git a/onedal/linear_model/logistic_regression.py b/onedal/linear_model/logistic_regression.py index 5ddf44e83f..25683aa6bd 100644 --- a/onedal/linear_model/logistic_regression.py +++ b/onedal/linear_model/logistic_regression.py @@ -20,6 +20,7 @@ import numpy as np from daal4py.sklearn._utils import daal_check_version, get_dtype, make2d +from onedal._device_offload import supports_queue from onedal.common._backend import bind_default_backend from ..common._estimator_checks import _check_is_fitted @@ -46,10 +47,10 @@ def __init__(self, tol, C, fit_intercept, solver, max_iter, algorithm): self.algorithm = algorithm @abstractmethod - def train(self, params, X, y, queue=None): ... + def train(self, params, X, y): ... @abstractmethod - def infer(self, params, X, queue=None): ... + def infer(self, params, X): ... # direct access to the backend model constructor @abstractmethod @@ -72,7 +73,7 @@ def _get_onedal_params(self, is_csr, dtype=np.float32): ), } - def _fit(self, X, y, queue): + def _fit(self, X, y): sparsity_enabled = daal_check_version((2024, "P", 700)) X, y = _check_X_y( X, @@ -96,7 +97,7 @@ def _fit(self, X, y, queue): params = self._get_onedal_params(is_csr, get_dtype(X)) X_table, y_table = to_table(X, y) - result = self.train(params, X_table, y_table, queue=queue) + result = self.train(params, X_table, y_table) self._onedal_model = result.model self.n_iter_ = np.array([result.iterations_count]) @@ -161,7 +162,7 @@ def _create_model(self): return m - def _infer(self, X, queue): + def _infer(self, X): _check_is_fitted(self) sparsity_enabled = daal_check_version((2024, "P", 700)) @@ -187,24 +188,24 @@ def _infer(self, X, queue): params = self._get_onedal_params(is_csr, get_dtype(X)) X_table = to_table(X) - result = self.infer(params, model, X_table, queue=queue) + result = self.infer(params, model, X_table) return result - def _predict(self, X, queue): - result = self._infer(X, queue) + def _predict(self, X): + result = self._infer(X) y = from_table(result.responses) y = np.take(self.classes_, y.ravel(), axis=0) return y - def _predict_proba(self, X, queue): - result = self._infer(X, queue) + def _predict_proba(self, X): + result = self._infer(X) y = from_table(result.probabilities) y = y.reshape(-1, 1) return np.hstack([1 - y, y]) - def _predict_log_proba(self, X, queue): - y_proba = self._predict_proba(X, queue) + def _predict_log_proba(self, X): + y_proba = self._predict_proba(X) return np.log(y_proba) @@ -242,17 +243,18 @@ def infer(self, params, X, model, queue=None): ... @bind_default_backend("logistic_regression.classification") def model(self): ... + @supports_queue def fit(self, X, y, queue=None): - return self._fit(X, y, queue) + return self._fit(X, y) + @supports_queue def predict(self, X, queue=None): - y = self._predict(X, queue) - return y + return self._predict(X) + @supports_queue def predict_proba(self, X, queue=None): - y = self._predict_proba(X, queue) - return y + return self._predict_proba(X) + @supports_queue def predict_log_proba(self, X, queue=None): - y = self._predict_log_proba(X, queue) - return y + return self._predict_log_proba(X) diff --git a/onedal/neighbors/neighbors.py b/onedal/neighbors/neighbors.py index e904467e2f..42f90d0552 100755 --- a/onedal/neighbors/neighbors.py +++ b/onedal/neighbors/neighbors.py @@ -27,6 +27,7 @@ kdtree_knn_classification_prediction, kdtree_knn_classification_training, ) +from onedal._device_offload import SyclQueueManager, supports_queue from onedal.common._backend import bind_default_backend from ..common._estimator_checks import _check_is_fitted, _is_classifier, _is_regressor @@ -78,7 +79,7 @@ def train(self, *args, **kwargs): ... def infer(self, *args, **kwargs): ... @abstractmethod - def _onedal_fit(self, X, y, queue): ... + def _onedal_fit(self, X, y): ... def _validate_data( self, X, y=None, reset=True, validate_separately=None, **check_params @@ -214,7 +215,7 @@ def _validate_n_classes(self): f"The number of classes has to be greater than one; got {length}" ) - def _fit(self, X, y, queue): + def _fit(self, X, y): self._onedal_model = None self._tree = None self._shape = None @@ -273,11 +274,13 @@ def _fit(self, X, y, queue): ) _fit_y = None + # global queue is set as per user configuration (`target_offload`) or from data prior to calling this internal function + queue = SyclQueueManager.get_global_queue() gpu_device = queue is not None and queue.sycl_device.is_gpu if _is_classifier(self) or (_is_regressor(self) and gpu_device): _fit_y = self._validate_targets(self._y, X.dtype).reshape((-1, 1)) - result = self._onedal_fit(X, _fit_y, queue) + result = self._onedal_fit(X, _fit_y) if y is not None and _is_regressor(self): self._y = y if self._shape is None else y.reshape(self._shape) @@ -287,7 +290,7 @@ def _fit(self, X, y, queue): return result - def _kneighbors(self, X=None, n_neighbors=None, return_distance=True, queue=None): + def _kneighbors(self, X=None, n_neighbors=None, return_distance=True): n_features = getattr(self, "n_features_in_", None) shape = getattr(X, "shape", None) if n_features and shape and len(shape) > 1 and shape[1] != n_features: @@ -345,16 +348,12 @@ def _kneighbors(self, X=None, n_neighbors=None, return_distance=True, queue=None bf_knn_classification_model, ): params = super()._get_daal_params(X, n_neighbors=n_neighbors) - prediction_results = self._onedal_predict( - self._onedal_model, X, params, queue=queue - ) + prediction_results = self._onedal_predict(self._onedal_model, X, params) distances = prediction_results.distances indices = prediction_results.indices else: params = super()._get_onedal_params(X, n_neighbors=n_neighbors) - prediction_results = self._onedal_predict( - self._onedal_model, X, params, queue=queue - ) + prediction_results = self._onedal_predict(self._onedal_model, X, params) distances = from_table(prediction_results.distances) indices = from_table(prediction_results.indices) @@ -434,10 +433,10 @@ def model(self): ... # direct access to the backend model constructor @bind_default_backend("neighbors.classification") - def train(self, *args, queue=None, **kwargs): ... + def train(self, *args, **kwargs): ... @bind_default_backend("neighbors.classification") - def infer(self, *args, queue=None, **kwargs): ... + def infer(self, *args, **kwargs): ... def _get_daal_params(self, data): params = super()._get_daal_params(data) @@ -445,7 +444,9 @@ def _get_daal_params(self, data): params["resultsToCompute"] = "" return params - def _onedal_fit(self, X, y, queue): + def _onedal_fit(self, X, y): + # global queue is set as per user configuration (`target_offload`) or from data prior to calling this internal function + queue = SyclQueueManager.get_global_queue() gpu_device = queue is not None and queue.sycl_device.is_gpu if self.effective_metric_ == "euclidean" and not gpu_device: params = self._get_daal_params(X) @@ -459,9 +460,9 @@ def _onedal_fit(self, X, y, queue): else: X, y = _convert_to_supported(X, y) params = self._get_onedal_params(X, y) - return self.train(params, *to_table(X, y), queue=queue).model + return self.train(params, *to_table(X, y)).model - def _onedal_predict(self, model, X, params, queue): + def _onedal_predict(self, model, X, params): if type(self._onedal_model) is kdtree_knn_classification_model: return kdtree_knn_classification_prediction(**params).compute(X, model) elif type(self._onedal_model) is bf_knn_classification_model: @@ -471,13 +472,15 @@ def _onedal_predict(self, model, X, params, queue): if "responses" not in params["result_option"]: params["result_option"] += "|responses" params["fptype"] = X.dtype - result = self.infer(params, model, to_table(X), queue=queue) + result = self.infer(params, model, to_table(X)) return result + @supports_queue def fit(self, X, y, queue=None): - return self._fit(X, y, queue=queue) + return self._fit(X, y) + @supports_queue def predict(self, X, queue=None): X = _check_array(X, accept_sparse="csr", dtype=[np.float64, np.float32]) onedal_model = getattr(self, "_onedal_model", None) @@ -506,16 +509,17 @@ def predict(self, X, queue=None): or type(onedal_model) is bf_knn_classification_model ): params = self._get_daal_params(X) - prediction_result = self._onedal_predict(onedal_model, X, params, queue=queue) + prediction_result = self._onedal_predict(onedal_model, X, params) responses = prediction_result.prediction else: params = self._get_onedal_params(X) - prediction_result = self._onedal_predict(onedal_model, X, params, queue=queue) + prediction_result = self._onedal_predict(onedal_model, X, params) responses = from_table(prediction_result.responses) result = self.classes_.take(np.asarray(responses.ravel(), dtype=np.intp)) return result + @supports_queue def predict_proba(self, X, queue=None): neigh_dist, neigh_ind = self.kneighbors(X, queue=queue) @@ -553,8 +557,9 @@ def predict_proba(self, X, queue=None): return probabilities + @supports_queue def kneighbors(self, X=None, n_neighbors=None, return_distance=True, queue=None): - return self._kneighbors(X, n_neighbors, return_distance, queue=queue) + return self._kneighbors(X, n_neighbors, return_distance) class KNeighborsRegressor(NeighborsBase, RegressorMixin): @@ -580,16 +585,16 @@ def __init__( self.weights = weights @bind_default_backend("neighbors.search", lookup_name="train") - def train_search(self, *args, queue=None, **kwargs): ... + def train_search(self, *args, **kwargs): ... @bind_default_backend("neighbors.search", lookup_name="infer") - def infer_search(self, *args, queue=None, **kwargs): ... + def infer_search(self, *args, **kwargs): ... @bind_default_backend("neighbors.regression") - def train(self, *args, queue=None, **kwargs): ... + def train(self, *args, **kwargs): ... @bind_default_backend("neighbors.regression") - def infer(self, *args, queue=None, **kwargs): ... + def infer(self, *args, **kwargs): ... def _get_daal_params(self, data): params = super()._get_daal_params(data) @@ -597,7 +602,9 @@ def _get_daal_params(self, data): params["resultsToEvaluate"] = "none" return params - def _onedal_fit(self, X, y, queue): + def _onedal_fit(self, X, y): + # global queue is set as per user configuration (`target_offload`) or from data prior to calling this internal function + queue = SyclQueueManager.get_global_queue() gpu_device = queue is not None and queue.sycl_device.is_gpu if self.effective_metric_ == "euclidean" and not gpu_device: params = self._get_daal_params(X) @@ -612,11 +619,11 @@ def _onedal_fit(self, X, y, queue): params = self._get_onedal_params(X, y) if gpu_device: - return self.train(params, *to_table(X, y), queue=queue).model + return self.train(params, *to_table(X, y)).model else: - return self.train_search(params, to_table(X), queue=queue).model + return self.train_search(params, to_table(X)).model - def _onedal_predict(self, model, X, params, queue): + def _onedal_predict(self, model, X, params): assert self._onedal_model is not None, "Model is not trained" if type(model) is kdtree_knn_classification_model: @@ -624,6 +631,8 @@ def _onedal_predict(self, model, X, params, queue): elif type(model) is bf_knn_classification_model: return bf_knn_classification_prediction(**params).compute(X, model) + # global queue is set as per user configuration (`target_offload`) or from data prior to calling this internal function + queue = SyclQueueManager.get_global_queue() gpu_device = queue is not None and queue.sycl_device.is_gpu X = _convert_to_supported(X) @@ -633,17 +642,19 @@ def _onedal_predict(self, model, X, params, queue): result = backend.infer(policy, params, model, to_table(X)) if gpu_device: - return self.infer(params, self._onedal_model, to_table(X), queue=queue) + return self.infer(params, self._onedal_model, to_table(X)) else: - return self.infer_search(params, self._onedal_model, to_table(X), queue=queue) + return self.infer_search(params, self._onedal_model, to_table(X)) + @supports_queue def fit(self, X, y, queue=None): - return self._fit(X, y, queue=queue) + return self._fit(X, y) + @supports_queue def kneighbors(self, X=None, n_neighbors=None, return_distance=True, queue=None): - return self._kneighbors(X, n_neighbors, return_distance, queue=queue) + return self._kneighbors(X, n_neighbors, return_distance) - def _predict_gpu(self, X, queue=None): + def _predict_gpu(self, X): X = _check_array(X, accept_sparse="csr", dtype=[np.float64, np.float32]) onedal_model = getattr(self, "_onedal_model", None) n_features = getattr(self, "n_features_in_", None) @@ -666,14 +677,14 @@ def _predict_gpu(self, X, queue=None): params = self._get_onedal_params(X) - prediction_result = self._onedal_predict(onedal_model, X, params, queue=queue) + prediction_result = self._onedal_predict(onedal_model, X, params) responses = from_table(prediction_result.responses) result = responses.ravel() return result - def _predict_skl(self, X, queue=None): - neigh_dist, neigh_ind = self.kneighbors(X, queue=queue) + def _predict_skl(self, X): + neigh_dist, neigh_ind = self.kneighbors(X) weights = self._get_weights(neigh_dist, self.weights) @@ -696,14 +707,14 @@ def _predict_skl(self, X, queue=None): return y_pred + @supports_queue def predict(self, X, queue=None): gpu_device = queue is not None and queue.sycl_device.is_gpu is_uniform_weights = getattr(self, "weights", "uniform") == "uniform" - return ( - self._predict_gpu(X, queue=queue) - if gpu_device and is_uniform_weights - else self._predict_skl(X, queue=queue) - ) + if gpu_device and is_uniform_weights: + return self._predict_gpu(X) + else: + return self._predict_skl(X) class NearestNeighbors(NeighborsBase): @@ -729,10 +740,10 @@ def __init__( self.weights = weights @bind_default_backend("neighbors.search") - def train(self, *args, queue=None, **kwargs): ... + def train(self, *args, **kwargs): ... @bind_default_backend("neighbors.search") - def infer(self, *arg, queue=None, **kwargs): ... + def infer(self, *arg, **kwargs): ... def _get_daal_params(self, data): params = super()._get_daal_params(data) @@ -742,7 +753,9 @@ def _get_daal_params(self, data): ) return params - def _onedal_fit(self, X, y, queue): + def _onedal_fit(self, X, y): + # global queue is set as per user configuration (`target_offload`) or from data prior to calling this internal function + queue = SyclQueueManager.get_global_queue() gpu_device = queue is not None and queue.sycl_device.is_gpu if self.effective_metric_ == "euclidean" and not gpu_device: params = self._get_daal_params(X) @@ -757,9 +770,9 @@ def _onedal_fit(self, X, y, queue): else: X, y = _convert_to_supported(X, y) params = self._get_onedal_params(X, y) - return self.train(params, to_table(X), queue=queue).model + return self.train(params, to_table(X)).model - def _onedal_predict(self, model, X, params, queue): + def _onedal_predict(self, model, X, params): if type(self._onedal_model) is kdtree_knn_classification_model: return kdtree_knn_classification_prediction(**params).compute(X, model) elif type(self._onedal_model) is bf_knn_classification_model: @@ -768,10 +781,12 @@ def _onedal_predict(self, model, X, params, queue): X = _convert_to_supported(X) params["fptype"] = X.dtype - return self.infer(params, model, to_table(X), queue=queue) + return self.infer(params, model, to_table(X)) + @supports_queue def fit(self, X, y, queue=None): - return self._fit(X, y, queue=queue) + return self._fit(X, y) + @supports_queue def kneighbors(self, X=None, n_neighbors=None, return_distance=True, queue=None): - return self._kneighbors(X, n_neighbors, return_distance, queue=queue) + return self._kneighbors(X, n_neighbors, return_distance) diff --git a/onedal/spmd/basic_statistics/basic_statistics.py b/onedal/spmd/basic_statistics/basic_statistics.py index cb685a2084..17125fd939 100644 --- a/onedal/spmd/basic_statistics/basic_statistics.py +++ b/onedal/spmd/basic_statistics/basic_statistics.py @@ -14,14 +14,14 @@ # limitations under the License. # ============================================================================== -from ..._device_offload import support_input_format +from ..._device_offload import support_input_format, supports_queue from ...basic_statistics import BasicStatistics as BasicStatistics_Batch from ...common._backend import bind_spmd_backend class BasicStatistics(BasicStatistics_Batch): @bind_spmd_backend("basic_statistics") - def compute(self, data, weights=None, queue=None): ... + def compute(self, data, weights=None): ... @support_input_format() def fit(self, data, sample_weight=None, queue=None): diff --git a/onedal/spmd/basic_statistics/incremental_basic_statistics.py b/onedal/spmd/basic_statistics/incremental_basic_statistics.py index ab85c7ada8..e394e7d298 100644 --- a/onedal/spmd/basic_statistics/incremental_basic_statistics.py +++ b/onedal/spmd/basic_statistics/incremental_basic_statistics.py @@ -25,7 +25,7 @@ class IncrementalBasicStatistics(base_IncrementalBasicStatistics): @bind_spmd_backend("basic_statistics") - def compute(self, *args, queue=None, **kwargs): ... + def compute(self, *args, **kwargs): ... @bind_spmd_backend("basic_statistics") - def finalize_compute(self, *args, queue=None, **kwargs): ... + def finalize_compute(self, *args, **kwargs): ... diff --git a/onedal/spmd/cluster/dbscan.py b/onedal/spmd/cluster/dbscan.py index d7be30cc01..0281b5b1bd 100644 --- a/onedal/spmd/cluster/dbscan.py +++ b/onedal/spmd/cluster/dbscan.py @@ -20,4 +20,4 @@ class DBSCAN(DBSCAN_Batch): @bind_spmd_backend("dbscan.clustering") - def compute(self, params, data_table, weights_table, queue=None): ... + def compute(self, params, data_table, weights_table): ... diff --git a/onedal/spmd/cluster/kmeans.py b/onedal/spmd/cluster/kmeans.py index ad1dcd0e43..54213ae8db 100644 --- a/onedal/spmd/cluster/kmeans.py +++ b/onedal/spmd/cluster/kmeans.py @@ -27,7 +27,7 @@ class KMeansInit(KMeansInit_Batch): """ @bind_spmd_backend("kmeans_init.init", lookup_name="compute") - def backend_compute(self, params, data, queue=None): ... + def backend_compute(self, params, data): ... class KMeans(KMeans_Batch): @@ -38,10 +38,10 @@ def _get_kmeans_init(self, cluster_count, seed, algorithm): return KMeansInit(cluster_count=cluster_count, seed=seed, algorithm=algorithm) @bind_spmd_backend("kmeans.clustering") - def train(self, params, X_table, centroids_table, queue=None): ... + def train(self, params, X_table, centroids_table): ... @bind_spmd_backend("kmeans.clustering") - def infer(self, params, model, centroids_table, queue=None): ... + def infer(self, params, model, centroids_table): ... @support_input_format() def fit(self, X, y=None, queue=None): diff --git a/onedal/spmd/covariance/covariance.py b/onedal/spmd/covariance/covariance.py index 77b477fe25..43600cbe73 100644 --- a/onedal/spmd/covariance/covariance.py +++ b/onedal/spmd/covariance/covariance.py @@ -22,10 +22,10 @@ class EmpiricalCovariance(EmpiricalCovariance_Batch): @bind_spmd_backend("covariance") - def compute(self, *args, queue=None, **kwargs): ... + def compute(self, *args, **kwargs): ... @bind_spmd_backend("covariance") - def finalize_compute(self, params, partial_result, queue=None): ... + def finalize_compute(self, params, partial_result): ... @support_input_format() def fit(self, X, y=None, queue=None): diff --git a/onedal/spmd/covariance/incremental_covariance.py b/onedal/spmd/covariance/incremental_covariance.py index 6beac5a2fb..6255ae2dc5 100644 --- a/onedal/spmd/covariance/incremental_covariance.py +++ b/onedal/spmd/covariance/incremental_covariance.py @@ -23,4 +23,4 @@ class IncrementalEmpiricalCovariance(base_IncrementalEmpiricalCovariance): @bind_spmd_backend("covariance") - def finalize_compute(self, params, partial_result, queue=None): ... + def finalize_compute(self, params, partial_result): ... diff --git a/onedal/spmd/decomposition/pca.py b/onedal/spmd/decomposition/pca.py index 7bc2bcce71..571038afab 100644 --- a/onedal/spmd/decomposition/pca.py +++ b/onedal/spmd/decomposition/pca.py @@ -25,7 +25,7 @@ class PCA(PCABatch): def train(self, params, X, queue=None): ... @bind_spmd_backend("decomposition.dim_reduction") - def finalize_train(self, *args, queue=None, **kwargs): ... + def finalize_train(self, *args, **kwargs): ... @support_input_format() def fit(self, X, y=None, queue=None): diff --git a/onedal/spmd/linear_model/incremental_linear_model.py b/onedal/spmd/linear_model/incremental_linear_model.py index cf276bc0b1..bfdc00c4b7 100644 --- a/onedal/spmd/linear_model/incremental_linear_model.py +++ b/onedal/spmd/linear_model/incremental_linear_model.py @@ -30,4 +30,4 @@ class IncrementalLinearRegression(base_IncrementalLinearRegression): """ @bind_spmd_backend("linear_model.regression") - def finalize_train(self, *args, queue=None, **kwargs): ... + def finalize_train(self, *args, **kwargs): ... diff --git a/onedal/spmd/linear_model/linear_model.py b/onedal/spmd/linear_model/linear_model.py index 911763247e..b6146cbec7 100644 --- a/onedal/spmd/linear_model/linear_model.py +++ b/onedal/spmd/linear_model/linear_model.py @@ -22,13 +22,13 @@ class LinearRegression(LinearRegression_Batch): @bind_spmd_backend("linear_model.regression") - def train(self, *args, queue=None, **kwargs): ... + def train(self, *args, **kwargs): ... @bind_spmd_backend("linear_model.regression") - def finalize_train(self, *args, queue=None, **kwargs): ... + def finalize_train(self, *args, **kwargs): ... @bind_spmd_backend("linear_model.regression") - def infer(self, params, model, X, queue=None): ... + def infer(self, params, model, X): ... @support_input_format() def fit(self, X, y, queue=None): diff --git a/onedal/spmd/linear_model/logistic_regression.py b/onedal/spmd/linear_model/logistic_regression.py index ff0e546abb..ca35dc1fbd 100644 --- a/onedal/spmd/linear_model/logistic_regression.py +++ b/onedal/spmd/linear_model/logistic_regression.py @@ -22,10 +22,10 @@ class LogisticRegression(LogisticRegression_Batch): @bind_spmd_backend("logistic_regression.classification") - def train(self, params, X, y, queue=None): ... + def train(self, params, X, y): ... @bind_spmd_backend("logistic_regression.classification") - def infer(self, params, X, model, queue=None): ... + def infer(self, params, X, model): ... @support_input_format() def fit(self, X, y, queue=None): diff --git a/onedal/spmd/neighbors/neighbors.py b/onedal/spmd/neighbors/neighbors.py index 007b197992..838b0a8e21 100644 --- a/onedal/spmd/neighbors/neighbors.py +++ b/onedal/spmd/neighbors/neighbors.py @@ -14,7 +14,7 @@ # limitations under the License. # ============================================================================== -from ..._device_offload import support_input_format +from ..._device_offload import support_input_format, supports_queue from ...common._backend import bind_spmd_backend from ...neighbors import KNeighborsClassifier as KNeighborsClassifier_Batch from ...neighbors import KNeighborsRegressor as KNeighborsRegressor_Batch @@ -23,10 +23,10 @@ class KNeighborsClassifier(KNeighborsClassifier_Batch): @bind_spmd_backend("neighbors.classification") - def train(self, *args, queue=None, **kwargs): ... + def train(self, *args, **kwargs): ... @bind_spmd_backend("neighbors.classification") - def infer(self, *args, queue=None, **kwargs): ... + def infer(self, *args, **kwargs): ... @support_input_format() def fit(self, X, y, queue=None): @@ -48,21 +48,22 @@ def kneighbors(self, X=None, n_neighbors=None, return_distance=True, queue=None) class KNeighborsRegressor(KNeighborsRegressor_Batch): @bind_spmd_backend("neighbors.search", lookup_name="train") - def train_search(self, *args, queue=None, **kwargs): ... + def train_search(self, *args, **kwargs): ... @bind_spmd_backend("neighbors.search", lookup_name="infer") - def infer_search(self, *args, queue=None, **kwargs): ... + def infer_search(self, *args, **kwargs): ... @bind_spmd_backend("neighbors.regression") - def train(self, *args, queue=None, **kwargs): ... + def train(self, *args, **kwargs): ... @bind_spmd_backend("neighbors.regression") - def infer(self, *args, queue=None, **kwargs): ... + def infer(self, *args, **kwargs): ... @support_input_format() + @supports_queue def fit(self, X, y, queue=None): if queue is not None and queue.sycl_device.is_gpu: - return self._fit(X, y, queue=queue) + return self._fit(X, y) else: raise ValueError( "SPMD version of kNN is not implemented for " @@ -74,8 +75,9 @@ def kneighbors(self, X=None, n_neighbors=None, return_distance=True, queue=None) return super().kneighbors(X, n_neighbors, return_distance, queue=queue) @support_input_format() + @supports_queue def predict(self, X, queue=None): - return self._predict_gpu(X, queue=queue) + return self._predict_gpu(X) def _get_onedal_params(self, X, y=None): params = super()._get_onedal_params(X, y) diff --git a/onedal/svm/svm.py b/onedal/svm/svm.py index d0c81065bd..139aab9f60 100644 --- a/onedal/svm/svm.py +++ b/onedal/svm/svm.py @@ -184,7 +184,7 @@ def _fit(self, X, y, sample_weight, queue): X = _convert_to_supported(X) params = self._get_onedal_params(X) - result = self.train(params, *to_table(*data), queue=queue) + result = self.train(params, *to_table(*data)) if self._sparse: self.dual_coef_ = sp.csr_matrix(from_table(result.coeffs).T) @@ -267,7 +267,7 @@ def _predict(self, X, queue): model = self._onedal_model else: model = self._create_model() - result = self.infer(params, model, to_table(X), queue=queue) + result = self.infer(params, model, to_table(X)) y = from_table(result.responses) return y @@ -323,7 +323,7 @@ def _decision_function(self, X, queue): model = self._onedal_model else: model = self._create_model() - result = self.infer(params, model, to_table(X), queue=queue) + result = self.infer(params, model, to_table(X)) decision_function = from_table(result.decision_function) if len(self.classes_) == 2: @@ -379,10 +379,10 @@ def __init__( self.svm_type = SVMtype.epsilon_svr @bind_default_backend("svm.regression") - def train(self, *args, queue=None, **kwargs): ... + def train(self, *args, **kwargs): ... @bind_default_backend("svm.regression") - def infer(self, *args, queue=None, **kwargs): ... + def infer(self, *args, **kwargs): ... @bind_default_backend("svm.regression") def model(self): ... @@ -440,10 +440,10 @@ def __init__( self.svm_type = SVMtype.c_svc @bind_default_backend("svm.classification") - def train(self, *args, queue=None, **kwargs): ... + def train(self, *args, **kwargs): ... @bind_default_backend("svm.classification") - def infer(self, *args, queue=None, **kwargs): ... + def infer(self, *args, **kwargs): ... @bind_default_backend("svm.classification") def model(self): ... @@ -510,10 +510,10 @@ def __init__( self.svm_type = SVMtype.nu_svr @bind_default_backend("svm.nu_regression") - def train(self, *args, queue=None, **kwargs): ... + def train(self, *args, **kwargs): ... @bind_default_backend("svm.nu_regression") - def infer(self, *args, queue=None, **kwargs): ... + def infer(self, *args, **kwargs): ... @bind_default_backend("svm.nu_regression") def model(self): ... @@ -570,10 +570,10 @@ def __init__( self.svm_type = SVMtype.nu_svc @bind_default_backend("svm.nu_classification") - def train(self, *args, queue=None, **kwargs): ... + def train(self, *args, **kwargs): ... @bind_default_backend("svm.nu_classification") - def infer(self, *args, queue=None, **kwargs): ... + def infer(self, *args, **kwargs): ... @bind_default_backend("svm.nu_classification") def model(self): ... From f1ec9676caa1bf003f6fe81096625b007785f3a6 Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Tue, 26 Nov 2024 03:42:01 -0800 Subject: [PATCH 05/41] remove policy manager --- onedal/common/policy_manager.py | 70 ----------- onedal/common/tests/test_policy.py | 12 -- onedal/common/tests/test_policy_manager.py | 136 --------------------- onedal/datatypes/tests/test_data.py | 5 +- onedal/primitives/kernel_functions.py | 2 - 5 files changed, 1 insertion(+), 224 deletions(-) delete mode 100644 onedal/common/policy_manager.py delete mode 100644 onedal/common/tests/test_policy_manager.py diff --git a/onedal/common/policy_manager.py b/onedal/common/policy_manager.py deleted file mode 100644 index 7f1ed8ab52..0000000000 --- a/onedal/common/policy_manager.py +++ /dev/null @@ -1,70 +0,0 @@ -# ============================================================================== -# Copyright 2024 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - - -class Policy: - """Encapsulates backend policies for a unified interface with auxiliary information""" - - def __init__(self, policy_module, queue, is_dpc, is_spmd): - self.policy = policy_module(queue) if queue else policy_module() - self.is_dpc = is_dpc - self.is_spmd = is_spmd - if is_dpc: - if queue is None: - raise ValueError("DPC++ policy requires a queue") - self._queue = queue - - def __getattr__(self, name): - return getattr(self.policy, name) - - def __repr__(self) -> str: - return f"Policy({self.policy}, is_dpc={self.is_dpc}, is_spmd={self.is_spmd})" - - -class PolicyManager: - def __init__(self, backend): - self.backend = backend - - @staticmethod - def get_queue(*data): - if not data: - return - if iface := getattr(data[0], "__sycl_usm_array_interface__", None): - queue = iface.get("syclobj") - if not queue: - raise KeyError("No syclobj in provided data") - return queue - - def get_policy(self, provided_queue, *data): - data_queue = PolicyManager.get_queue(*data) - queue = provided_queue if provided_queue is not None else data_queue - - if not self.backend.is_dpc and queue is not None: - raise RuntimeError("Operations using queues require the DPC backend") - - if self.backend.is_spmd and queue is not None: - backend_policy = self.backend.spmd_data_parallel_policy - is_dpc = True - is_spmd = True - elif self.backend.is_dpc and queue is not None: - backend_policy = self.backend.data_parallel_policy - is_dpc = True - is_spmd = False - else: - backend_policy = self.backend.host_policy - is_dpc = False - is_spmd = False - return Policy(backend_policy, queue, is_dpc, is_spmd) diff --git a/onedal/common/tests/test_policy.py b/onedal/common/tests/test_policy.py index 8908c2dfc4..24e62e4904 100644 --- a/onedal/common/tests/test_policy.py +++ b/onedal/common/tests/test_policy.py @@ -17,8 +17,6 @@ import numpy as np import pytest -from onedal import _default_backend, _dpc_backend -from onedal.common.policy_manager import PolicyManager from onedal.tests.utils._device_selection import ( device_type_to_str, get_memory_usm, @@ -27,16 +25,6 @@ ) from onedal.utils._dpep_helpers import dpctl_available -policy_manager = PolicyManager(_dpc_backend or _default_backend) - - -@pytest.mark.parametrize("queue", get_queues()) -def test_queue_passed_directly(queue): - device_name = device_type_to_str(queue) - test_queue = policy_manager.get_policy(queue) - test_device_name = test_queue.get_device_name() - assert test_device_name == device_name - @pytest.mark.parametrize("queue", get_queues()) def test_with_numpy_data(queue): diff --git a/onedal/common/tests/test_policy_manager.py b/onedal/common/tests/test_policy_manager.py deleted file mode 100644 index 9693f642dd..0000000000 --- a/onedal/common/tests/test_policy_manager.py +++ /dev/null @@ -1,136 +0,0 @@ -# ============================================================================== -# Copyright 2024 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -from unittest.mock import MagicMock - -import pytest - -from onedal.common.policy_manager import PolicyManager - - -# Define a simple backend module for testing -class DummyBackend: - def __init__(self, is_dpc, is_spmd): - self.is_dpc = is_dpc - self.is_spmd = is_spmd - - def data_parallel_policy(self, queue): - return f"data_parallel_policy({queue})" - - def spmd_data_parallel_policy(self, queue): - return f"spmd_data_parallel_policy({queue})" - - def host_policy(self): - return "host_policy" - - -@pytest.fixture -def backend_dpc(): - return DummyBackend(is_dpc=True, is_spmd=False) - - -@pytest.fixture -def backend_spmd(): - return DummyBackend(is_dpc=True, is_spmd=True) - - -@pytest.fixture -def backend_host(): - return DummyBackend(is_dpc=False, is_spmd=False) - - -@pytest.fixture -def policy_manager_dpc(backend_dpc): - return PolicyManager(backend_dpc) - - -@pytest.fixture -def policy_manager_spmd(backend_spmd): - return PolicyManager(backend_spmd) - - -@pytest.fixture -def policy_manager_host(backend_host): - return PolicyManager(backend_host) - - -def test_get_queue_with_sycl_usm_array_interface(): - data = [MagicMock()] - data[0].__sycl_usm_array_interface__ = {"syclobj": "queue"} - queue = PolicyManager.get_queue(*data) - assert queue == "queue" - - -def test_get_queue_without_sycl_usm_array_interface(): - data = [MagicMock()] - queue = PolicyManager.get_queue(*data) - assert queue is None - - -def test_get_policy_with_provided_queue(policy_manager_dpc): - provided_queue = MagicMock() - policy = policy_manager_dpc.get_policy(provided_queue) - assert policy.policy == "data_parallel_policy({})".format(provided_queue) - assert policy.is_dpc is True - assert policy.is_spmd is False - - -def test_get_policy_with_data_queue(policy_manager_dpc): - data = [MagicMock()] - data[0].__sycl_usm_array_interface__ = {"syclobj": MagicMock()} - policy = policy_manager_dpc.get_policy(None, *data) - assert policy.policy == "data_parallel_policy({})".format( - data[0].__sycl_usm_array_interface__["syclobj"] - ) - assert policy.is_dpc is True - assert policy.is_spmd is False - - -def test_get_policy_with_host_backend_and_queue(policy_manager_host): - provided_queue = MagicMock() - with pytest.raises( - RuntimeError, match="Operations using queues require the DPC backend" - ): - policy_manager_host.get_policy(provided_queue) - - -def test_get_policy_with_host_backend(policy_manager_host): - policy = policy_manager_host.get_policy(None) - assert policy.policy == "host_policy" - assert policy.is_dpc is False - assert policy.is_spmd is False - - -def test_get_policy_with_dpc_backend_no_queue(policy_manager_dpc): - policy = policy_manager_dpc.get_policy(None) - assert policy.policy == "host_policy" - assert policy.is_dpc is False - assert policy.is_spmd is False - - -def test_get_policy_with_spmd_backend_and_queue(policy_manager_spmd): - provided_queue = MagicMock() - policy = policy_manager_spmd.get_policy(provided_queue) - assert policy.policy == "spmd_data_parallel_policy({})".format(provided_queue) - assert policy.is_dpc is True - assert policy.is_spmd is True - - -def test_get_policy_with_spmd_backend_no_queue(policy_manager_spmd): - policy = policy_manager_spmd.get_policy(None) - assert policy.policy == "host_policy" - assert policy.is_dpc is False - assert policy.is_spmd is False diff --git a/onedal/datatypes/tests/test_data.py b/onedal/datatypes/tests/test_data.py index 6784a738ea..75395230d4 100644 --- a/onedal/datatypes/tests/test_data.py +++ b/onedal/datatypes/tests/test_data.py @@ -19,7 +19,6 @@ from numpy.testing import assert_allclose from onedal import _default_backend, _dpc_backend -from onedal.common.policy_manager import PolicyManager from onedal.datatypes import from_table, to_table from onedal.utils._dpep_helpers import dpctl_available @@ -63,8 +62,6 @@ class DummyEstimatorWithTableConversions: def fit(self, X, y=None): sua_iface, xp, _ = _get_sycl_namespace(X) - policy_manager = PolicyManager(_dpc_backend) - policy = policy_manager.get_policy(X.sycl_queue, None) dbscan = DBSCAN() types = [xp.float32, xp.float64] if get_dtype(X) not in types: @@ -75,7 +72,7 @@ def fit(self, X, y=None): # TODO: # check other candidates for the dummy base oneDAL func. # oneDAL backend func is needed to check result table checks. - result = dbscan.compute(policy, params, X_table, to_table(None)) + result = dbscan.compute(params, X_table, to_table(None)) result_responses_table = result.responses result_responses_df = from_table( result_responses_table, diff --git a/onedal/primitives/kernel_functions.py b/onedal/primitives/kernel_functions.py index 41e4a7ac35..07d95c373d 100644 --- a/onedal/primitives/kernel_functions.py +++ b/onedal/primitives/kernel_functions.py @@ -18,12 +18,10 @@ from onedal import _default_backend, _dpc_backend -from ..common.policy_manager import PolicyManager from ..datatypes import _convert_to_supported, from_table, to_table from ..utils import _check_array backend = _dpc_backend or _default_backend -policy_manager = PolicyManager(backend) def _check_inputs(X, Y): From a36097978ef3ff13ed892f6256997de080b218ee Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Wed, 27 Nov 2024 02:46:45 -0800 Subject: [PATCH 06/41] wip: fixes related to global queue --- onedal/_device_offload.py | 32 +++++---- onedal/cluster/kmeans_init.py | 5 +- onedal/common/_backend.py | 7 +- onedal/common/tests/test_policy.py | 67 ------------------- onedal/ensemble/forest.py | 2 +- onedal/neighbors/neighbors.py | 10 +-- onedal/primitives/kernel_functions.py | 35 +++++----- .../primitives/tests/test_kernel_functions.py | 8 +-- onedal/svm/svm.py | 3 +- onedal/svm/tests/test_csr_svm.py | 17 +++-- onedal/svm/tests/test_nusvr.py | 16 ++--- onedal/svm/tests/test_svc.py | 5 +- onedal/svm/tests/test_svr.py | 20 +++--- onedal/tests/utils/_device_selection.py | 14 +--- sklearnex/_device_offload.py | 25 +++---- 15 files changed, 85 insertions(+), 181 deletions(-) delete mode 100644 onedal/common/tests/test_policy.py diff --git a/onedal/_device_offload.py b/onedal/_device_offload.py index a02536c7c3..958b4f8b31 100644 --- a/onedal/_device_offload.py +++ b/onedal/_device_offload.py @@ -16,7 +16,7 @@ from collections.abc import Iterable from functools import wraps -from typing import Any, Optional +from typing import Optional import numpy as np from sklearn import get_config @@ -77,20 +77,19 @@ def get_global_queue() -> Optional[SyclQueue]: SyclQueueManager.update_global_queue(q) return q + @staticmethod + def remove_global_queue(): + """Remove the global queue.""" + SyclQueueManager.__global_queue = None + @staticmethod def update_global_queue(queue): """Update the global queue.""" - if not isinstance(queue, SyclQueue): + if queue is not None and not isinstance(queue, SyclQueue): # could be a device ID or selector string queue = SyclQueue(queue) SyclQueueManager.__global_queue = queue - @staticmethod - def update_global_queue_from_data(*data): - """Extract the queue from the provided data and update the global queue.""" - queue = SyclQueueManager.from_data(*data) - SyclQueueManager.update_global_queue(queue) # redundant, but explicit - @staticmethod def from_data(*data) -> Optional[SyclQueue]: """Extract the queue from provided data. This updates the global queue as well.""" @@ -108,11 +107,10 @@ def from_data(*data) -> Optional[SyclQueue]: SyclQueueManager.update_global_queue(data_queue) global_queue = data_queue - # if the data item is on device, assert it's compatible with global queue - if ( - data_queue.sycl_device is not None - and data_queue.sycl_device != global_queue.sycl_device - ): + # if the data item is on device, assert it's compatible with device in global queue + data_device = data_queue.sycl_device + global_device = global_queue.sycl_device + if data_device is not None and data_device != global_device: raise ValueError( "Data objects are located on different target devices or not on selected device." ) @@ -131,9 +129,9 @@ def supports_queue(func): @wraps(func) def wrapper(self, *args, **kwargs): - if (queue := kwargs.get("queue", None)) is not None: - # update the global queue with what is provided - SyclQueueManager.update_global_queue(queue) + queue = kwargs.get("queue", None) + # update the global queue with what is provided, it can be None, then we will get it from provided data + SyclQueueManager.update_global_queue(queue) # find the queues in data using SyclQueueManager to verify that all data objects are on the same device kwargs["queue"] = SyclQueueManager.from_data(*args) return func(self, *args, **kwargs) @@ -252,7 +250,7 @@ def wrapper_impl(obj, *args, **kwargs): hostargs, hostkwargs = _get_host_inputs(*args, **kwargs) data = (*args, *kwargs.values()) - data_queue = SyclQueue.from_data(*data) + data_queue = SyclQueueManager.from_data(*data) if queue_param and hostkwargs.get("queue") is None: hostkwargs["queue"] = data_queue result = _run_on_device(func, obj, *hostargs, **hostkwargs) diff --git a/onedal/cluster/kmeans_init.py b/onedal/cluster/kmeans_init.py index e9db6855f6..74d20caf1a 100755 --- a/onedal/cluster/kmeans_init.py +++ b/onedal/cluster/kmeans_init.py @@ -75,11 +75,10 @@ def _get_params_and_input(self, X): params = self._get_onedal_params(dtype) return (params, to_table(X), dtype) - @supports_queue def compute(self, X, queue=None): _, X_table, dtype = self._get_params_and_input(X) - centroids = self.compute_raw(X_table, dtype) + centroids = self.compute_raw(X_table, dtype, queue=queue) return from_table(centroids) @@ -102,6 +101,6 @@ def kmeans_plusplus( return ( KMeansInit( n_clusters, seed=random_seed, local_trials_count=n_local_trials - ).compute(X, queue), + ).compute(X, queue=queue), np.full(n_clusters, -1), ) diff --git a/onedal/common/_backend.py b/onedal/common/_backend.py index f67c02863a..e733a9fa43 100644 --- a/onedal/common/_backend.py +++ b/onedal/common/_backend.py @@ -72,12 +72,7 @@ def __call__(self, *args: Any, **kwargs: Any) -> Any: policy = self.backend.host_policy() # dispatch to backend function - try: - return self.method(policy, *args, **kwargs) - except: - raise RuntimeError( - f"Error in dispatching to backend function for device {policy.get_device_id()} ({policy.get_device_name()})" - ) + return self.method(policy, *args, **kwargs) def __repr__(self) -> str: return f"BackendFunction({self.backend}.{self.name})" diff --git a/onedal/common/tests/test_policy.py b/onedal/common/tests/test_policy.py deleted file mode 100644 index 24e62e4904..0000000000 --- a/onedal/common/tests/test_policy.py +++ /dev/null @@ -1,67 +0,0 @@ -# ============================================================================== -# Copyright 2021 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -import numpy as np -import pytest - -from onedal.tests.utils._device_selection import ( - device_type_to_str, - get_memory_usm, - get_queues, - is_dpctl_device_available, -) -from onedal.utils._dpep_helpers import dpctl_available - - -@pytest.mark.parametrize("queue", get_queues()) -def test_with_numpy_data(queue): - X = np.zeros((5, 3)) - y = np.zeros(3) - - device_name = device_type_to_str(queue) - assert policy_manager.get_policy(queue, X, y).get_device_name() == device_name - - -@pytest.mark.skipif(not dpctl_available, reason="depends on dpctl") -@pytest.mark.parametrize("queue", get_queues("cpu,gpu")) -@pytest.mark.parametrize("memtype", get_memory_usm()) -def test_with_usm_ndarray_data(queue, memtype): - if queue is None: - pytest.skip( - "dpctl Memory object with queue=None uses cached default (gpu if available)" - ) - - from dpctl.tensor import usm_ndarray - - device_name = device_type_to_str(queue) - X = usm_ndarray((5, 3), buffer=memtype(5 * 3 * 8, queue=queue)) - y = usm_ndarray((3,), buffer=memtype(3 * 8, queue=queue)) - assert policy_manager.get_policy(None, X, y).get_device_name() == device_name - - -@pytest.mark.skipif( - not is_dpctl_device_available(["cpu", "gpu"]), reason="test uses multiple devices" -) -@pytest.mark.parametrize("memtype", get_memory_usm()) -def test_queue_parameter_with_usm_ndarray(memtype): - from dpctl import SyclQueue - from dpctl.tensor import usm_ndarray - - q1 = SyclQueue("cpu") - q2 = SyclQueue("gpu") - - X = usm_ndarray((5, 3), buffer=memtype(5 * 3 * 8, queue=q1)) - assert policy_manager.get_policy(q2, X).get_device_name() == device_type_to_str(q2) diff --git a/onedal/ensemble/forest.py b/onedal/ensemble/forest.py index b32188d136..1ce08eb4e1 100644 --- a/onedal/ensemble/forest.py +++ b/onedal/ensemble/forest.py @@ -557,7 +557,7 @@ def fit(self, X, y, sample_weight=None, queue=None): @supports_queue def predict(self, X, queue=None): - return self._predict(X, queue).ravel() + return self._predict(X).ravel() class ExtraTreesClassifier(ClassifierMixin, BaseForest, metaclass=ABCMeta): diff --git a/onedal/neighbors/neighbors.py b/onedal/neighbors/neighbors.py index 42f90d0552..554dfbb98e 100755 --- a/onedal/neighbors/neighbors.py +++ b/onedal/neighbors/neighbors.py @@ -275,7 +275,7 @@ def _fit(self, X, y): _fit_y = None # global queue is set as per user configuration (`target_offload`) or from data prior to calling this internal function - queue = SyclQueueManager.get_global_queue() + queue = getattr(SyclQueueManager.get_global_queue(), "implementation") gpu_device = queue is not None and queue.sycl_device.is_gpu if _is_classifier(self) or (_is_regressor(self) and gpu_device): @@ -446,7 +446,7 @@ def _get_daal_params(self, data): def _onedal_fit(self, X, y): # global queue is set as per user configuration (`target_offload`) or from data prior to calling this internal function - queue = SyclQueueManager.get_global_queue() + queue = getattr(SyclQueueManager.get_global_queue(), "implementation") gpu_device = queue is not None and queue.sycl_device.is_gpu if self.effective_metric_ == "euclidean" and not gpu_device: params = self._get_daal_params(X) @@ -604,7 +604,7 @@ def _get_daal_params(self, data): def _onedal_fit(self, X, y): # global queue is set as per user configuration (`target_offload`) or from data prior to calling this internal function - queue = SyclQueueManager.get_global_queue() + queue = getattr(SyclQueueManager.get_global_queue(), "implementation") gpu_device = queue is not None and queue.sycl_device.is_gpu if self.effective_metric_ == "euclidean" and not gpu_device: params = self._get_daal_params(X) @@ -632,7 +632,7 @@ def _onedal_predict(self, model, X, params): return bf_knn_classification_prediction(**params).compute(X, model) # global queue is set as per user configuration (`target_offload`) or from data prior to calling this internal function - queue = SyclQueueManager.get_global_queue() + queue = getattr(SyclQueueManager.get_global_queue(), "implementation") gpu_device = queue is not None and queue.sycl_device.is_gpu X = _convert_to_supported(X) @@ -755,7 +755,7 @@ def _get_daal_params(self, data): def _onedal_fit(self, X, y): # global queue is set as per user configuration (`target_offload`) or from data prior to calling this internal function - queue = SyclQueueManager.get_global_queue() + queue = getattr(SyclQueueManager.get_global_queue(), "implementation") gpu_device = queue is not None and queue.sycl_device.is_gpu if self.effective_metric_ == "euclidean" and not gpu_device: params = self._get_daal_params(X) diff --git a/onedal/primitives/kernel_functions.py b/onedal/primitives/kernel_functions.py index 07d95c373d..e0d336dced 100644 --- a/onedal/primitives/kernel_functions.py +++ b/onedal/primitives/kernel_functions.py @@ -16,13 +16,13 @@ import numpy as np -from onedal import _default_backend, _dpc_backend +from onedal import _default_backend as backend +from onedal._device_offload import supports_queue +from onedal.common._backend import BackendFunction from ..datatypes import _convert_to_supported, from_table, to_table from ..utils import _check_array -backend = _dpc_backend or _default_backend - def _check_inputs(X, Y): def check_input(data): @@ -33,15 +33,18 @@ def check_input(data): return X, Y -def _compute_kernel(params, submodule, X, Y, queue): - policy = policy_manager.get_policy(queue, X, Y) +def _compute_kernel(params, submodule, X, Y): + # get policy for direct backend calls + X, Y = _convert_to_supported(X, Y) params["fptype"] = X.dtype X, Y = to_table(X, Y) - result = submodule.compute(policy, params, X, Y) + compute_method = BackendFunction(submodule.compute, backend, "compute") + result = compute_method(params, X, Y) return from_table(result.values) +@supports_queue def linear_kernel(X, Y=None, scale=1.0, shift=0.0, queue=None): """ Compute the linear kernel between X and Y: @@ -61,14 +64,11 @@ def linear_kernel(X, Y=None, scale=1.0, shift=0.0, queue=None): """ X, Y = _check_inputs(X, Y) return _compute_kernel( - {"method": "dense", "scale": scale, "shift": shift}, - backend.linear_kernel, - X, - Y, - queue, + {"method": "dense", "scale": scale, "shift": shift}, backend.linear_kernel, X, Y ) +@supports_queue def rbf_kernel(X, Y=None, gamma=None, queue=None): """ Compute the rbf (gaussian) kernel between X and Y: @@ -92,11 +92,10 @@ def rbf_kernel(X, Y=None, gamma=None, queue=None): gamma = 1.0 / X.shape[1] if gamma is None else gamma sigma = np.sqrt(0.5 / gamma) - return _compute_kernel( - {"method": "dense", "sigma": sigma}, backend.rbf_kernel, X, Y, queue - ) + return _compute_kernel({"method": "dense", "sigma": sigma}, backend.rbf_kernel, X, Y) +@supports_queue def poly_kernel(X, Y=None, gamma=1.0, coef0=0.0, degree=3, queue=None): """ Compute the poly kernel between X and Y: @@ -122,10 +121,10 @@ def poly_kernel(X, Y=None, gamma=1.0, coef0=0.0, degree=3, queue=None): backend.polynomial_kernel, X, Y, - queue, ) +@supports_queue def sigmoid_kernel(X, Y=None, gamma=1.0, coef0=0.0, queue=None): """ Compute the sigmoid kernel between X and Y: @@ -146,9 +145,5 @@ def sigmoid_kernel(X, Y=None, gamma=1.0, coef0=0.0, queue=None): X, Y = _check_inputs(X, Y) return _compute_kernel( - {"method": "dense", "scale": gamma, "shift": coef0}, - backend.sigmoid_kernel, - X, - Y, - queue, + {"method": "dense", "scale": gamma, "shift": coef0}, backend.sigmoid_kernel, X, Y ) diff --git a/onedal/primitives/tests/test_kernel_functions.py b/onedal/primitives/tests/test_kernel_functions.py index 22a8f562cb..9becc976b4 100644 --- a/onedal/primitives/tests/test_kernel_functions.py +++ b/onedal/primitives/tests/test_kernel_functions.py @@ -91,7 +91,7 @@ def test_dense_small_rbf_kernel(queue, gamma, dtype): _test_dense_small_rbf_kernel(queue, gamma, dtype) -@pass_if_not_implemented_for_gpu(reason="poly kernel is not implemented") +@pass_if_not_implemented_for_gpu(reason="Polynomial kernel is not implemented for GPU") @pytest.mark.parametrize("queue", get_queues()) def test_dense_self_poly_kernel(queue): rng = np.random.RandomState(0) @@ -116,7 +116,7 @@ def _test_dense_small_poly_kernel(queue, gamma, coef0, degree, dtype): assert_allclose(result, expected, rtol=tol) -@pass_if_not_implemented_for_gpu(reason="poly kernel is not implemented") +@pass_if_not_implemented_for_gpu(reason="Polynomial kernel is not implemented for GPU") @pytest.mark.parametrize("queue", get_queues()) @pytest.mark.parametrize("gamma", [0.1, 1.0]) @pytest.mark.parametrize("coef0", [0.0, 1.0]) @@ -126,7 +126,7 @@ def test_dense_small_poly_kernel(queue, gamma, coef0, degree, dtype): _test_dense_small_poly_kernel(queue, gamma, coef0, degree, dtype) -@pass_if_not_implemented_for_gpu(reason="sigmoid kernel is not implemented") +@pass_if_not_implemented_for_gpu(reason="Sigmoid kernel is not implemented for GPU") @pytest.mark.parametrize("queue", get_queues()) def test_dense_self_sigmoid_kernel(queue): rng = np.random.RandomState(0) @@ -150,7 +150,7 @@ def _test_dense_small_sigmoid_kernel(queue, gamma, coef0, dtype): assert_allclose(result, expected, rtol=tol) -@pass_if_not_implemented_for_gpu(reason="sigmoid kernel is not implemented") +@pass_if_not_implemented_for_gpu(reason="Sigmoid kernel is not implemented for GPU") @pytest.mark.parametrize("queue", get_queues()) @pytest.mark.parametrize("gamma", [0.1, 1.0, 2.4]) @pytest.mark.parametrize("coef0", [0.0, 1.0, 5.5]) diff --git a/onedal/svm/svm.py b/onedal/svm/svm.py index 139aab9f60..2054eb31ae 100644 --- a/onedal/svm/svm.py +++ b/onedal/svm/svm.py @@ -175,8 +175,7 @@ def _fit(self, X, y, sample_weight, queue): _gamma = 1.0 / X.shape[1] else: raise ValueError( - "When 'gamma' is a string, it should be either 'scale' or " - "'auto'. Got '{}' instead.".format(self.gamma) + f"When 'gamma' is a string, it should be either 'scale' or 'auto'. Got '{self.gamma}' instead." ) else: _gamma = self.gamma diff --git a/onedal/svm/tests/test_csr_svm.py b/onedal/svm/tests/test_csr_svm.py index e4a05a030e..f133c4eafb 100644 --- a/onedal/svm/tests/test_csr_svm.py +++ b/onedal/svm/tests/test_csr_svm.py @@ -74,7 +74,7 @@ def _test_simple_dataset(queue, kernel): check_svm_model_equal(queue, clf0, clf1, *dataset) -@pass_if_not_implemented_for_gpu(reason="csr svm is not implemented") +@pass_if_not_implemented_for_gpu(reason="not implemented") @pytest.mark.parametrize( "queue", get_queues("cpu") @@ -82,8 +82,7 @@ def _test_simple_dataset(queue, kernel): pytest.param( get_queues("gpu"), marks=pytest.mark.xfail( - reason="raises UnknownError instead of RuntimeError " - "with unimplemented message" + reason="raises UnknownError instead of RuntimeError with unimplemented message" ), ) ], @@ -103,7 +102,7 @@ def _test_binary_dataset(queue, kernel): check_svm_model_equal(queue, clf0, clf1, *dataset) -@pass_if_not_implemented_for_gpu(reason="csr svm is not implemented") +@pass_if_not_implemented_for_gpu(reason="not implemented") @pytest.mark.parametrize( "queue", get_queues("cpu") @@ -111,9 +110,11 @@ def _test_binary_dataset(queue, kernel): pytest.param( get_queues("gpu"), marks=pytest.mark.xfail( - reason="raises UnknownError for linear and rbf, " - "Unimplemented error with inconsistent error message " - "for poly and sigmoid" + reason=( + "raises UnknownError for linear and rbf, " + "Unimplemented error with inconsistent error message " + "for poly and sigmoid" + ) ), ) ], @@ -138,7 +139,6 @@ def _test_iris(queue, kernel): check_svm_model_equal(queue, clf0, clf1, *dataset, decimal=2) -@pass_if_not_implemented_for_gpu(reason="csr svm is not implemented") @pytest.mark.parametrize("queue", get_queues()) @pytest.mark.parametrize("kernel", ["linear", "rbf", "poly", "sigmoid"]) def test_iris(queue, kernel): @@ -158,7 +158,6 @@ def _test_diabetes(queue, kernel): check_svm_model_equal(queue, clf0, clf1, *dataset) -@pass_if_not_implemented_for_gpu(reason="csr svm is not implemented") @pytest.mark.parametrize("queue", get_queues()) @pytest.mark.parametrize("kernel", ["linear", "rbf", "poly", "sigmoid"]) def test_diabetes(queue, kernel): diff --git a/onedal/svm/tests/test_nusvr.py b/onedal/svm/tests/test_nusvr.py index 1bec991961..47ef17b870 100644 --- a/onedal/svm/tests/test_nusvr.py +++ b/onedal/svm/tests/test_nusvr.py @@ -30,7 +30,7 @@ synth_params = {"n_samples": 500, "n_features": 100, "random_state": 42} -@pass_if_not_implemented_for_gpu(reason="nusvr is not implemented") +@pass_if_not_implemented_for_gpu(reason="Regression nuSVM is not implemented for GPU") @pytest.mark.parametrize("queue", get_queues()) def test_diabetes_simple(queue): diabetes = datasets.load_diabetes() @@ -39,7 +39,7 @@ def test_diabetes_simple(queue): assert clf.score(diabetes.data, diabetes.target, queue=queue) > 0.02 -@pass_if_not_implemented_for_gpu(reason="nusvr is not implemented") +@pass_if_not_implemented_for_gpu(reason="Regression nuSVM is not implemented for GPU") @pytest.mark.parametrize("queue", get_queues()) def test_input_format_for_diabetes(queue): diabetes = datasets.load_diabetes() @@ -67,7 +67,7 @@ def test_input_format_for_diabetes(queue): assert_allclose(res_c_contiguous_numpy, res_f_contiguous_numpy) -@pass_if_not_implemented_for_gpu(reason="nusvr is not implemented") +@pass_if_not_implemented_for_gpu(reason="Regression nuSVM is not implemented for GPU") @pytest.mark.parametrize("queue", get_queues()) def test_predict(queue): iris = datasets.load_iris() @@ -105,7 +105,7 @@ def _test_diabetes_compare_with_sklearn(queue, kernel): assert_allclose(clf_sklearn.dual_coef_, clf_onedal.dual_coef_, atol=1e-2) -@pass_if_not_implemented_for_gpu(reason="nusvr is not implemented") +@pass_if_not_implemented_for_gpu(reason="Regression nuSVM is not implemented for GPU") @pytest.mark.parametrize("queue", get_queues()) @pytest.mark.parametrize("kernel", ["linear", "rbf", "poly", "sigmoid"]) def test_diabetes_compare_with_sklearn(queue, kernel): @@ -129,7 +129,7 @@ def _test_synth_rbf_compare_with_sklearn(queue, C, nu, gamma): assert abs(result - expected) < 1e-3 -@pass_if_not_implemented_for_gpu(reason="nusvr is not implemented") +@pass_if_not_implemented_for_gpu(reason="Regression nuSVM is not implemented for GPU") @pytest.mark.parametrize("queue", get_queues()) @pytest.mark.parametrize("gamma", ["scale", "auto"]) @pytest.mark.parametrize("C", [100.0, 1000.0]) @@ -155,7 +155,7 @@ def _test_synth_linear_compare_with_sklearn(queue, C, nu): assert abs(result - expected) < 1e-3 -@pass_if_not_implemented_for_gpu(reason="nusvr is not implemented") +@pass_if_not_implemented_for_gpu(reason="Regression nuSVM is not implemented for GPU") @pytest.mark.parametrize("queue", get_queues()) @pytest.mark.parametrize("C", [0.001, 0.1]) @pytest.mark.parametrize("nu", [0.25, 0.75]) @@ -178,7 +178,7 @@ def _test_synth_poly_compare_with_sklearn(queue, params): assert abs(result - expected) < 1e-3 -@pass_if_not_implemented_for_gpu(reason="nusvr is not implemented") +@pass_if_not_implemented_for_gpu(reason="Regression nuSVM is not implemented for GPU") @pytest.mark.parametrize("queue", get_queues()) @pytest.mark.parametrize( "params", @@ -191,7 +191,7 @@ def test_synth_poly_compare_with_sklearn(queue, params): _test_synth_poly_compare_with_sklearn(queue, params) -@pass_if_not_implemented_for_gpu(reason="nusvr is not implemented") +@pass_if_not_implemented_for_gpu(reason="Regression nuSVM is not implemented for GPU") @pytest.mark.parametrize("queue", get_queues()) def test_pickle(queue): diabetes = datasets.load_diabetes() diff --git a/onedal/svm/tests/test_svc.py b/onedal/svm/tests/test_svc.py index 9f7eaa4810..4afd7846a4 100644 --- a/onedal/svm/tests/test_svc.py +++ b/onedal/svm/tests/test_svc.py @@ -98,7 +98,6 @@ def test_decision_function(queue): assert_array_almost_equal(dec.ravel(), clf.decision_function(X, queue=queue)) -@pass_if_not_implemented_for_gpu(reason="multiclass svm is not implemented") @pytest.mark.parametrize("queue", get_queues()) def test_iris(queue): iris = datasets.load_iris() @@ -107,7 +106,6 @@ def test_iris(queue): assert_array_equal(clf.classes_, np.sort(clf.classes_)) -@pass_if_not_implemented_for_gpu(reason="multiclass svm is not implemented") @pytest.mark.parametrize("queue", get_queues()) def test_decision_function_shape(queue): X, y = make_blobs(n_samples=80, centers=5, random_state=0) @@ -124,7 +122,6 @@ def test_decision_function_shape(queue): SVC(decision_function_shape="bad").fit(X_train, y_train, queue=queue) -@pass_if_not_implemented_for_gpu(reason="multiclass svm is not implemented") @pytest.mark.parametrize("queue", get_queues()) def test_pickle(queue): iris = datasets.load_iris() @@ -149,7 +146,7 @@ def test_pickle(queue): pytest.param( get_queues("gpu"), marks=pytest.mark.xfail( - reason="raises Unimplemented error " "with inconsistent error message" + reason="raises Unimplemented error with inconsistent error message" ), ) ], diff --git a/onedal/svm/tests/test_svr.py b/onedal/svm/tests/test_svr.py index a9000ff5f7..8432fb09b3 100644 --- a/onedal/svm/tests/test_svr.py +++ b/onedal/svm/tests/test_svr.py @@ -30,7 +30,7 @@ synth_params = {"n_samples": 500, "n_features": 100, "random_state": 42} -@pass_if_not_implemented_for_gpu(reason="svr is not implemented") +@pass_if_not_implemented_for_gpu(reason="Regression SVM is not implemented for GPU") @pytest.mark.parametrize("queue", get_queues()) def test_run_to_run_fit(queue): diabetes = datasets.load_diabetes() @@ -45,7 +45,7 @@ def test_run_to_run_fit(queue): assert_allclose(clf_first.dual_coef_, clf.dual_coef_) -@pass_if_not_implemented_for_gpu(reason="svr is not implemented") +@pass_if_not_implemented_for_gpu(reason="Regression SVM is not implemented for GPU") @pytest.mark.parametrize("queue", get_queues()) def test_diabetes_simple(queue): diabetes = datasets.load_diabetes() @@ -54,7 +54,7 @@ def test_diabetes_simple(queue): assert clf.score(diabetes.data, diabetes.target, queue=queue) > 0.02 -@pass_if_not_implemented_for_gpu(reason="svr is not implemented") +@pass_if_not_implemented_for_gpu(reason="Regression SVM is not implemented for GPU") @pytest.mark.parametrize("queue", get_queues()) def test_input_format_for_diabetes(queue): diabetes = datasets.load_diabetes() @@ -82,7 +82,7 @@ def test_input_format_for_diabetes(queue): assert_allclose(res_c_contiguous_numpy, res_f_contiguous_numpy) -@pass_if_not_implemented_for_gpu(reason="svr is not implemented") +@pass_if_not_implemented_for_gpu(reason="Regression SVM is not implemented for GPU") @pytest.mark.parametrize("queue", get_queues()) def test_predict(queue): iris = datasets.load_iris() @@ -120,7 +120,7 @@ def _test_diabetes_compare_with_sklearn(queue, kernel): assert_allclose(clf_sklearn.dual_coef_, clf_onedal.dual_coef_, atol=1e-1) -@pass_if_not_implemented_for_gpu(reason="svr is not implemented") +@pass_if_not_implemented_for_gpu(reason="Regression SVM is not implemented for GPU") @pytest.mark.parametrize("queue", get_queues()) @pytest.mark.parametrize("kernel", ["linear", "rbf", "poly", "sigmoid"]) def test_diabetes_compare_with_sklearn(queue, kernel): @@ -143,7 +143,7 @@ def _test_synth_rbf_compare_with_sklearn(queue, C, gamma): assert result > expected - 1e-5 -@pass_if_not_implemented_for_gpu(reason="svr is not implemented") +@pass_if_not_implemented_for_gpu(reason="Regression SVM is not implemented for GPU") @pytest.mark.parametrize("queue", get_queues()) @pytest.mark.parametrize("gamma", ["scale", "auto"]) @pytest.mark.parametrize("C", [100.0, 1000.0]) @@ -167,7 +167,7 @@ def _test_synth_linear_compare_with_sklearn(queue, C): assert result > expected - 1e-3 -@pass_if_not_implemented_for_gpu(reason="svr is not implemented") +@pass_if_not_implemented_for_gpu(reason="Regression SVM is not implemented for GPU") @pytest.mark.parametrize("queue", get_queues()) @pytest.mark.parametrize("C", [0.001, 0.1]) def test_synth_linear_compare_with_sklearn(queue, C): @@ -188,7 +188,7 @@ def _test_synth_poly_compare_with_sklearn(queue, params): assert result > expected - 1e-5 -@pass_if_not_implemented_for_gpu(reason="svr is not implemented") +@pass_if_not_implemented_for_gpu(reason="Regression SVM is not implemented for GPU") @pytest.mark.parametrize("queue", get_queues()) @pytest.mark.parametrize( "params", @@ -201,7 +201,7 @@ def test_synth_poly_compare_with_sklearn(queue, params): _test_synth_poly_compare_with_sklearn(queue, params) -@pass_if_not_implemented_for_gpu(reason="svr is not implemented") +@pass_if_not_implemented_for_gpu(reason="Regression SVM is not implemented for GPU") @pytest.mark.parametrize("queue", get_queues()) def test_sided_sample_weight(queue): clf = SVR(C=1e-2, kernel="linear") @@ -225,7 +225,7 @@ def test_sided_sample_weight(queue): assert y_pred == pytest.approx(1.5) -@pass_if_not_implemented_for_gpu(reason="svr is not implemented") +@pass_if_not_implemented_for_gpu(reason="Regression SVM is not implemented for GPU") @pytest.mark.parametrize("queue", get_queues()) def test_pickle(queue): diabetes = datasets.load_diabetes() diff --git a/onedal/tests/utils/_device_selection.py b/onedal/tests/utils/_device_selection.py index f1b29ab3b9..bdbe27d4eb 100644 --- a/onedal/tests/utils/_device_selection.py +++ b/onedal/tests/utils/_device_selection.py @@ -73,18 +73,6 @@ def is_dpctl_device_available(targets): return False -def device_type_to_str(queue): - if queue is None: - return "cpu" - - if dpctl_available: - if queue.sycl_device.is_cpu: - return "cpu" - if queue.sycl_device.is_gpu: - return "gpu" - return "unknown" - - def pass_if_not_implemented_for_gpu(reason=""): assert reason @@ -92,7 +80,7 @@ def decorator(test): @functools.wraps(test) def wrapper(queue, *args, **kwargs): if queue is not None and queue.sycl_device.is_gpu: - with pytest.raises(RuntimeError, match="is not implemented for GPU"): + with pytest.raises(RuntimeError, match=reason): test(queue, *args, **kwargs) else: test(queue, *args, **kwargs) diff --git a/sklearnex/_device_offload.py b/sklearnex/_device_offload.py index e5f1122217..3bfe1b464c 100644 --- a/sklearnex/_device_offload.py +++ b/sklearnex/_device_offload.py @@ -16,7 +16,7 @@ from functools import wraps -from onedal._device_offload import _copy_to_usm, _transfer_to_host +from onedal._device_offload import SyclQueueManager, _copy_to_usm, _transfer_to_host from onedal.utils._array_api import _asarray from onedal.utils._dpep_helpers import dpnp_available @@ -27,43 +27,44 @@ from ._config import get_config -def _get_backend(obj, queue, method_name, *data): +def _get_backend(obj, method_name, *data): + queue = SyclQueueManager.get_global_queue() cpu_device = queue is None or queue.sycl_device.is_cpu gpu_device = queue is not None and queue.sycl_device.is_gpu if cpu_device: patching_status = obj._onedal_cpu_supported(method_name, *data) if patching_status.get_status(): - return "onedal", queue, patching_status + return "onedal", patching_status else: - return "sklearn", None, patching_status + return "sklearn", patching_status allow_fallback_to_host = get_config()["allow_fallback_to_host"] if gpu_device: patching_status = obj._onedal_gpu_supported(method_name, *data) if patching_status.get_status(): - return "onedal", queue, patching_status + return "onedal", patching_status else: + SyclQueueManager.remove_global_queue() if allow_fallback_to_host: patching_status = obj._onedal_cpu_supported(method_name, *data) if patching_status.get_status(): - return "onedal", None, patching_status + return "onedal", patching_status else: - return "sklearn", None, patching_status + return "sklearn", patching_status else: - return "sklearn", None, patching_status + return "sklearn", patching_status raise RuntimeError("Device support is not implemented") def dispatch(obj, method_name, branches, *args, **kwargs): - q = _get_global_queue() - has_usm_data_for_args, q, hostargs = _transfer_to_host(q, *args) - has_usm_data_for_kwargs, q, hostvalues = _transfer_to_host(q, *kwargs.values()) + has_usm_data_for_args, hostargs = _transfer_to_host(*args) + has_usm_data_for_kwargs, hostvalues = _transfer_to_host(*kwargs.values()) hostkwargs = dict(zip(kwargs.keys(), hostvalues)) - backend, q, patching_status = _get_backend(obj, q, method_name, *hostargs) + backend, patching_status = _get_backend(obj, method_name, *hostargs) has_usm_data = has_usm_data_for_args or has_usm_data_for_kwargs if backend == "onedal": # Host args only used before onedal backend call. From 86d1fbdf0a8558d906928277c38f193c3ad53b38 Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Wed, 27 Nov 2024 03:54:46 -0800 Subject: [PATCH 07/41] fixup is_cpu --- sklearnex/_device_offload.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearnex/_device_offload.py b/sklearnex/_device_offload.py index 3bfe1b464c..0c284d9b2e 100644 --- a/sklearnex/_device_offload.py +++ b/sklearnex/_device_offload.py @@ -29,8 +29,8 @@ def _get_backend(obj, method_name, *data): queue = SyclQueueManager.get_global_queue() - cpu_device = queue is None or queue.sycl_device.is_cpu - gpu_device = queue is not None and queue.sycl_device.is_gpu + cpu_device = queue is None or getattr(queue.sycl_device, "is_cpu", True) + gpu_device = queue is not None and getattr(queue.sycl_device, "is_gpu", False) if cpu_device: patching_status = obj._onedal_cpu_supported(method_name, *data) From 6c3b2423d2e4a2a003df4e0213440c578ca4459a Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Wed, 27 Nov 2024 08:01:54 -0800 Subject: [PATCH 08/41] fixup dispatch --- sklearnex/_device_offload.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sklearnex/_device_offload.py b/sklearnex/_device_offload.py index 0c284d9b2e..39ca30e0a1 100644 --- a/sklearnex/_device_offload.py +++ b/sklearnex/_device_offload.py @@ -60,6 +60,7 @@ def _get_backend(obj, method_name, *data): def dispatch(obj, method_name, branches, *args, **kwargs): + queue = getattr(SyclQueueManager.get_global_queue(), "implementation", None) has_usm_data_for_args, hostargs = _transfer_to_host(*args) has_usm_data_for_kwargs, hostvalues = _transfer_to_host(*kwargs.values()) hostkwargs = dict(zip(kwargs.keys(), hostvalues)) @@ -69,8 +70,8 @@ def dispatch(obj, method_name, branches, *args, **kwargs): if backend == "onedal": # Host args only used before onedal backend call. # Device will be offloaded when onedal backend will be called. - patching_status.write_log(queue=q, transferred_to_host=False) - return branches[backend](obj, *hostargs, **hostkwargs, queue=q) + patching_status.write_log(queue=queue, transferred_to_host=False) + return branches[backend](obj, *hostargs, **hostkwargs, queue=queue) if backend == "sklearn": if ( "array_api_dispatch" in get_config() From 2001b48afc0205958b2a63f2896b555c257e97da Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Thu, 28 Nov 2024 01:10:12 -0800 Subject: [PATCH 09/41] fixup queue as kwarg, is_gpu --- onedal/neighbors/neighbors.py | 12 +++--- .../spmd/basic_statistics/basic_statistics.py | 2 +- onedal/svm/svm.py | 37 ++++++++++++------- .../basic_statistics/basic_statistics.py | 2 +- sklearnex/linear_model/incremental_ridge.py | 4 +- sklearnex/linear_model/tests/test_logreg.py | 4 +- sklearnex/neighbors/_lof.py | 2 +- .../preview/decomposition/incremental_pca.py | 4 +- 8 files changed, 39 insertions(+), 28 deletions(-) diff --git a/onedal/neighbors/neighbors.py b/onedal/neighbors/neighbors.py index 554dfbb98e..1ffcb23f0b 100755 --- a/onedal/neighbors/neighbors.py +++ b/onedal/neighbors/neighbors.py @@ -276,7 +276,7 @@ def _fit(self, X, y): _fit_y = None # global queue is set as per user configuration (`target_offload`) or from data prior to calling this internal function queue = getattr(SyclQueueManager.get_global_queue(), "implementation") - gpu_device = queue is not None and queue.sycl_device.is_gpu + gpu_device = queue is not None and getattr(queue.sycl_device, "is_gpu", False) if _is_classifier(self) or (_is_regressor(self) and gpu_device): _fit_y = self._validate_targets(self._y, X.dtype).reshape((-1, 1)) @@ -447,7 +447,7 @@ def _get_daal_params(self, data): def _onedal_fit(self, X, y): # global queue is set as per user configuration (`target_offload`) or from data prior to calling this internal function queue = getattr(SyclQueueManager.get_global_queue(), "implementation") - gpu_device = queue is not None and queue.sycl_device.is_gpu + gpu_device = queue is not None and getattr(queue.sycl_device, "is_gpu", False) if self.effective_metric_ == "euclidean" and not gpu_device: params = self._get_daal_params(X) if self._fit_method == "brute": @@ -605,7 +605,7 @@ def _get_daal_params(self, data): def _onedal_fit(self, X, y): # global queue is set as per user configuration (`target_offload`) or from data prior to calling this internal function queue = getattr(SyclQueueManager.get_global_queue(), "implementation") - gpu_device = queue is not None and queue.sycl_device.is_gpu + gpu_device = queue is not None and getattr(queue.sycl_device, "is_gpu", False) if self.effective_metric_ == "euclidean" and not gpu_device: params = self._get_daal_params(X) if self._fit_method == "brute": @@ -633,7 +633,7 @@ def _onedal_predict(self, model, X, params): # global queue is set as per user configuration (`target_offload`) or from data prior to calling this internal function queue = getattr(SyclQueueManager.get_global_queue(), "implementation") - gpu_device = queue is not None and queue.sycl_device.is_gpu + gpu_device = queue is not None and getattr(queue.sycl_device, "is_gpu", False) X = _convert_to_supported(X) if "responses" not in params["result_option"] and gpu_device: @@ -709,7 +709,7 @@ def _predict_skl(self, X): @supports_queue def predict(self, X, queue=None): - gpu_device = queue is not None and queue.sycl_device.is_gpu + gpu_device = queue is not None and getattr(queue.sycl_device, "is_gpu", False) is_uniform_weights = getattr(self, "weights", "uniform") == "uniform" if gpu_device and is_uniform_weights: return self._predict_gpu(X) @@ -756,7 +756,7 @@ def _get_daal_params(self, data): def _onedal_fit(self, X, y): # global queue is set as per user configuration (`target_offload`) or from data prior to calling this internal function queue = getattr(SyclQueueManager.get_global_queue(), "implementation") - gpu_device = queue is not None and queue.sycl_device.is_gpu + gpu_device = queue is not None and getattr(queue.sycl_device, "is_gpu", False) if self.effective_metric_ == "euclidean" and not gpu_device: params = self._get_daal_params(X) if self._fit_method == "brute": diff --git a/onedal/spmd/basic_statistics/basic_statistics.py b/onedal/spmd/basic_statistics/basic_statistics.py index 17125fd939..f519bb225f 100644 --- a/onedal/spmd/basic_statistics/basic_statistics.py +++ b/onedal/spmd/basic_statistics/basic_statistics.py @@ -25,4 +25,4 @@ def compute(self, data, weights=None): ... @support_input_format() def fit(self, data, sample_weight=None, queue=None): - return super().fit(data, sample_weight, queue) + return super().fit(data, sample_weight, queue=queue) diff --git a/onedal/svm/svm.py b/onedal/svm/svm.py index 2054eb31ae..d418876948 100644 --- a/onedal/svm/svm.py +++ b/onedal/svm/svm.py @@ -20,6 +20,7 @@ import numpy as np from scipy import sparse as sp +from onedal._device_offload import supports_queue from onedal.common._backend import bind_default_backend from ..common._estimator_checks import _check_is_fitted @@ -123,7 +124,7 @@ def _get_onedal_params(self, data): "cache_size": self.cache_size, } - def _fit(self, X, y, sample_weight, queue): + def _fit(self, X, y, sample_weight): if hasattr(self, "decision_function_shape"): if self.decision_function_shape not in ("ovr", "ovo", None): raise ValueError( @@ -218,7 +219,7 @@ def _create_model(self): m.first_class_response, m.second_class_response = 0, 1 return m - def _predict(self, X, queue): + def _predict(self, X): _check_is_fitted(self) if self.break_ties and self.decision_function_shape == "ovo": raise ValueError( @@ -289,7 +290,7 @@ def _ovr_decision_function(self, predictions, confidences, n_classes): ) return votes + transformed_confidences - def _decision_function(self, X, queue): + def _decision_function(self, X): _check_is_fitted(self) X = _check_array( X, dtype=[np.float64, np.float32], force_all_finite=True, accept_sparse="csr" @@ -386,11 +387,13 @@ def infer(self, *args, **kwargs): ... @bind_default_backend("svm.regression") def model(self): ... + @supports_queue def fit(self, X, y, sample_weight=None, queue=None): - return self._fit(X, y, sample_weight, queue) + return self._fit(X, y, sample_weight) + @supports_queue def predict(self, X, queue=None): - y = self._predict(X, queue) + y = self._predict(X) return y.ravel() @@ -453,17 +456,20 @@ def _validate_targets(self, y, dtype): ) return y + @supports_queue def fit(self, X, y, sample_weight=None, queue=None): - return self._fit(X, y, sample_weight, queue) + return self._fit(X, y, sample_weight) + @supports_queue def predict(self, X, queue=None): - y = self._predict(X, queue) + y = self._predict(X) if len(self.classes_) == 2: y = y.ravel() return self.classes_.take(np.asarray(y, dtype=np.intp)).ravel() + @supports_queue def decision_function(self, X, queue=None): - return self._decision_function(X, queue) + return self._decision_function(X) class NuSVR(RegressorMixin, BaseSVM): @@ -517,11 +523,13 @@ def infer(self, *args, **kwargs): ... @bind_default_backend("svm.nu_regression") def model(self): ... + @supports_queue def fit(self, X, y, sample_weight=None, queue=None): - return self._fit(X, y, sample_weight, queue) + return self._fit(X, y, sample_weight) + @supports_queue def predict(self, X, queue=None): - return self._predict(X, queue).ravel() + return self._predict(X).ravel() class NuSVC(ClassifierMixin, BaseSVM): @@ -583,14 +591,17 @@ def _validate_targets(self, y, dtype): ) return y + @supports_queue def fit(self, X, y, sample_weight=None, queue=None): - return self._fit(X, y, sample_weight, queue) + return self._fit(X, y, sample_weight) + @supports_queue def predict(self, X, queue=None): - y = self._predict(X, queue) + y = self._predict(X) if len(self.classes_) == 2: y = y.ravel() return self.classes_.take(np.asarray(y, dtype=np.intp)).ravel() + @supports_queue def decision_function(self, X, queue=None): - return self._decision_function(X, queue) + return self._decision_function(X) diff --git a/sklearnex/basic_statistics/basic_statistics.py b/sklearnex/basic_statistics/basic_statistics.py index da82e3bd82..4f860c9e4c 100644 --- a/sklearnex/basic_statistics/basic_statistics.py +++ b/sklearnex/basic_statistics/basic_statistics.py @@ -193,7 +193,7 @@ def _onedal_fit(self, X, sample_weight=None, queue=None): if not hasattr(self, "_onedal_estimator"): self._onedal_estimator = self._onedal_basic_statistics(**onedal_params) - self._onedal_estimator.fit(X, sample_weight, queue) + self._onedal_estimator.fit(X, sample_weight, queue=queue) self._save_attributes() self.n_features_in_ = X.shape[1] if len(X.shape) > 1 else 1 diff --git a/sklearnex/linear_model/incremental_ridge.py b/sklearnex/linear_model/incremental_ridge.py index e750491ef9..232e6da8ab 100644 --- a/sklearnex/linear_model/incremental_ridge.py +++ b/sklearnex/linear_model/incremental_ridge.py @@ -137,7 +137,7 @@ def _onedal_predict(self, X, queue=None): assert hasattr(self, "_onedal_estimator") if self._need_to_finalize: self._onedal_finalize_fit() - return self._onedal_estimator.predict(X, queue) + return self._onedal_estimator.predict(X, queue=queue) def _onedal_score(self, X, y, sample_weight=None, queue=None): return r2_score( @@ -177,7 +177,7 @@ def _onedal_partial_fit(self, X, y, check_input=True, queue=None): } if not hasattr(self, "_onedal_estimator"): self._onedal_estimator = self._onedal_incremental_ridge(**onedal_params) - self._onedal_estimator.partial_fit(X, y, queue) + self._onedal_estimator.partial_fit(X, y, queue=queue) self._need_to_finalize = True def _onedal_finalize_fit(self): diff --git a/sklearnex/linear_model/tests/test_logreg.py b/sklearnex/linear_model/tests/test_logreg.py index 65c7ea5d0f..6c30760a46 100755 --- a/sklearnex/linear_model/tests/test_logreg.py +++ b/sklearnex/linear_model/tests/test_logreg.py @@ -49,7 +49,7 @@ def test_sklearnex_multiclass_classification(dataframe, queue): from sklearnex.linear_model import LogisticRegression X, y = load_iris(return_X_y=True) - X_train, X_test, y_train, y_test = prepare_input(X, y, dataframe, queue) + X_train, X_test, y_train, y_test = prepare_input(X, y, dataframe, queue=queue) logreg = LogisticRegression(fit_intercept=True, solver="lbfgs", max_iter=200).fit( X_train, y_train @@ -72,7 +72,7 @@ def test_sklearnex_binary_classification(dataframe, queue): from sklearnex.linear_model import LogisticRegression X, y = load_breast_cancer(return_X_y=True) - X_train, X_test, y_train, y_test = prepare_input(X, y, dataframe, queue) + X_train, X_test, y_train, y_test = prepare_input(X, y, dataframe, queue=queue) logreg = LogisticRegression(fit_intercept=True, solver="newton-cg", max_iter=100).fit( X_train, y_train diff --git a/sklearnex/neighbors/_lof.py b/sklearnex/neighbors/_lof.py index 1e42f8db0d..ec2f0c7747 100644 --- a/sklearnex/neighbors/_lof.py +++ b/sklearnex/neighbors/_lof.py @@ -59,7 +59,7 @@ def _onedal_fit(self, X, y, queue=None): if sklearn_check_version("1.2"): self._validate_params() - self._onedal_knn_fit(X, y, queue) + self._onedal_knn_fit(X, y, queue=queue) if self.contamination != "auto": if not (0.0 < self.contamination <= 0.5): diff --git a/sklearnex/preview/decomposition/incremental_pca.py b/sklearnex/preview/decomposition/incremental_pca.py index aa8d7e78f1..ae13274ae0 100644 --- a/sklearnex/preview/decomposition/incremental_pca.py +++ b/sklearnex/preview/decomposition/incremental_pca.py @@ -57,9 +57,9 @@ def __init__(self, n_components=None, *, whiten=False, copy=True, batch_size=Non def _onedal_transform(self, X, queue=None): assert hasattr(self, "_onedal_estimator") if self._need_to_finalize: - self._onedal_finalize_fit() + self._onedal_finalize_fit(queue) X = check_array(X, dtype=[np.float64, np.float32]) - return self._onedal_estimator.predict(X, queue) + return self._onedal_estimator.predict(X, queue=queue) def _onedal_fit_transform(self, X, queue=None): self._onedal_fit(X, queue) From 089d23df62b5bbb3ff43776ecbb2706aa8a8b1d7 Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Thu, 28 Nov 2024 08:42:32 -0800 Subject: [PATCH 10/41] handle SUA interface errors --- onedal/_device_offload.py | 13 +++- onedal/cluster/kmeans.py | 2 +- onedal/common/_backend.py | 65 ++++++++++++++++++-- onedal/datatypes/utils/sua_iface_helpers.cpp | 2 +- 4 files changed, 75 insertions(+), 7 deletions(-) diff --git a/onedal/_device_offload.py b/onedal/_device_offload.py index 958b4f8b31..13b385d4f9 100644 --- a/onedal/_device_offload.py +++ b/onedal/_device_offload.py @@ -95,7 +95,18 @@ def from_data(*data) -> Optional[SyclQueue]: """Extract the queue from provided data. This updates the global queue as well.""" for item in data: # iterate through all data objects, extract the queue, and verify that all data objects are on the same device - usm_iface = getattr(item, "__sycl_usm_array_interface__", None) + + # get the `usm_interface` - the C++ implementation might throw an exception if the data type is not supported + try: + usm_iface = getattr(item, "__sycl_usm_array_interface__", None) + except RuntimeError as e: + if "SUA interface" in str(e): + # ignore SUA interface errors and move on + continue + else: + # unexpected, re-raise + raise e + if usm_iface is None: # no interface found - try next data object continue diff --git a/onedal/cluster/kmeans.py b/onedal/cluster/kmeans.py index b6b77f092b..e3c5736165 100644 --- a/onedal/cluster/kmeans.py +++ b/onedal/cluster/kmeans.py @@ -63,7 +63,7 @@ def __init__( self.random_state = random_state self.n_local_trials = n_local_trials - @bind_default_backend("kmeans_common") + @bind_default_backend("kmeans_common", no_policy=True) def _is_same_clustering(self, labels, best_labels, n_clusters): ... @bind_default_backend("kmeans.clustering") diff --git a/onedal/common/_backend.py b/onedal/common/_backend.py index e733a9fa43..8a917b39d7 100644 --- a/onedal/common/_backend.py +++ b/onedal/common/_backend.py @@ -44,10 +44,12 @@ def __init__( method: Callable[..., Any], backend: Backend, name: str, + no_policy: bool, ): self.method = method self.name = name self.backend = backend + self.no_policy = no_policy def __call__(self, *args: Any, **kwargs: Any) -> Any: """Dispatch to backend function with the appropriate policy which is determined from the global queue""" @@ -55,6 +57,9 @@ def __call__(self, *args: Any, **kwargs: Any) -> Any: # immediate dispatching without any arguments, in particular no policy return self.method() + if self.no_policy: + return self.method(*args, **kwargs) + # use globally configured queue (from `target_offload` configuration or provided data) queue = getattr(SyclQueueManager.get_global_queue(), "implementation", None) @@ -83,6 +88,7 @@ def __decorator( backend_manager: BackendManager, module_name: str, lookup_name: Optional[str], + no_policy: bool, ) -> Callable[..., Any]: """Decorator to bind a method to the specified backend""" if lookup_name is None: @@ -96,6 +102,7 @@ def __decorator( backend_method, backend_manager.backend, name=f"{module_name}.{method.__name__}", + no_policy=no_policy, ) backend_type = backend_manager.get_backend_type() @@ -106,7 +113,32 @@ def __decorator( return wrapped_method -def bind_default_backend(module_name: str, lookup_name: Optional[str] = None): +def bind_default_backend( + module_name: str, lookup_name: Optional[str] = None, no_policy=False +): + """ + Decorator to bind a method from the default backend to a class. + + This decorator binds a method implementation from the default backend (host/dpc). + If the default backend is unavailable, the method is returned without modification. + + Parameters: + ---------- + module_name : str + The name of the module where the target function is located (e.g. `covariance`). + lookup_name : Optional[str], optional + The name of the method to look up in the backend module. If not provided, + the name of the decorated method is used. + no_policy : bool, optional + If True, the method will be decorated without a policy. Default is False. + + Returns: + ------- + Callable[..., Any] + The decorated method bound to the implementation in default backend, or the original + method if the default backend is unavailable. + """ + def decorator(method: Callable[..., Any]): # grab the lookup_name from outer scope nonlocal lookup_name @@ -117,12 +149,37 @@ def decorator(method: Callable[..., Any]): ) return method - return __decorator(method, default_manager, module_name, lookup_name) + return __decorator(method, default_manager, module_name, lookup_name, no_policy) return decorator -def bind_spmd_backend(module_name: str, lookup_name: Optional[str] = None): +def bind_spmd_backend( + module_name: str, lookup_name: Optional[str] = None, no_policy=False +): + """ + Decorator to bind a method from the SPMD backend to a class. + + This decorator binds a method implementation from the SPMD backend. + If the SPMD backend is unavailable, the method is returned without modification. + + Parameters: + ---------- + module_name : str + The name of the module where the target function is located (e.g. `covariance`). + lookup_name : Optional[str], optional + The name of the method to look up in the backend module. If not provided, + the name of the decorated method is used. + no_policy : bool, optional + If True, the method will be decorated without a policy. Default is False. + + Returns: + ------- + Callable[..., Any] + The decorated method bound to the implementation in SPMD backend, or the original + method if the SPMD backend is unavailable. + """ + def decorator(method: Callable[..., Any]): # grab the lookup_name from outer scope nonlocal lookup_name @@ -133,6 +190,6 @@ def decorator(method: Callable[..., Any]): ) return method - __decorator(method, spmd_manager, module_name, lookup_name) + __decorator(method, spmd_manager, module_name, lookup_name, no_policy) return decorator diff --git a/onedal/datatypes/utils/sua_iface_helpers.cpp b/onedal/datatypes/utils/sua_iface_helpers.cpp index d345a35645..931f0f4362 100644 --- a/onedal/datatypes/utils/sua_iface_helpers.cpp +++ b/onedal/datatypes/utils/sua_iface_helpers.cpp @@ -167,7 +167,7 @@ dal::data_layout get_sua_iface_layout(const py::dict& sua_dict, } } else { - throw std::runtime_error("Unsupporterd data shape.`"); + throw std::runtime_error("Unsupported data shape.`"); } } From aadbd72cc2717597bfcc64c89e8bdde783b0929c Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Fri, 29 Nov 2024 00:28:44 -0800 Subject: [PATCH 11/41] fix BackendFucntion in kernel_functions.py --- onedal/primitives/kernel_functions.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/onedal/primitives/kernel_functions.py b/onedal/primitives/kernel_functions.py index e0d336dced..1bc37d58c6 100644 --- a/onedal/primitives/kernel_functions.py +++ b/onedal/primitives/kernel_functions.py @@ -39,7 +39,9 @@ def _compute_kernel(params, submodule, X, Y): X, Y = _convert_to_supported(X, Y) params["fptype"] = X.dtype X, Y = to_table(X, Y) - compute_method = BackendFunction(submodule.compute, backend, "compute") + compute_method = BackendFunction( + submodule.compute, backend, "compute", no_policy=False + ) result = compute_method(params, X, Y) return from_table(result.values) From 423902507b5bbe043d2b47f502605a66bb1546bc Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Fri, 29 Nov 2024 00:56:11 -0800 Subject: [PATCH 12/41] undo accidential changes to tests --- .../tests/test_basic_statistics.py | 13 +- .../test_incremental_basic_statistics.py | 16 +-- onedal/common/_backend.py | 48 ++++++- onedal/common/backend_manager.py | 56 -------- onedal/common/tests/test_backend_manager.py | 121 ------------------ 5 files changed, 57 insertions(+), 197 deletions(-) delete mode 100644 onedal/common/backend_manager.py delete mode 100644 onedal/common/tests/test_backend_manager.py diff --git a/onedal/basic_statistics/tests/test_basic_statistics.py b/onedal/basic_statistics/tests/test_basic_statistics.py index a80ee2198d..acdf8181b4 100644 --- a/onedal/basic_statistics/tests/test_basic_statistics.py +++ b/onedal/basic_statistics/tests/test_basic_statistics.py @@ -19,6 +19,7 @@ from numpy.testing import assert_allclose from scipy import sparse as sp +from daal4py.sklearn._utils import daal_check_version from onedal.basic_statistics import BasicStatistics from onedal.basic_statistics.tests.utils import options_and_tests from onedal.tests.utils._device_selection import get_queues @@ -54,7 +55,7 @@ def test_single_option_on_random_data( basicstat = BasicStatistics(result_options=result_option) - result = basicstat.fit(data, sample_weight=weights) + result = basicstat.fit(data, sample_weight=weights, queue=queue) res = getattr(result, result_option) if weighted: @@ -86,7 +87,7 @@ def test_multiple_options_on_random_data(queue, row_count, column_count, weighte basicstat = BasicStatistics(result_options=["mean", "max", "sum"]) - result = basicstat.fit(data, sample_weight=weights) + result = basicstat.fit(data, sample_weight=weights, queue=queue) res_mean, res_max, res_sum = result.mean, result.max, result.sum if weighted: @@ -127,7 +128,7 @@ def test_all_option_on_random_data(queue, row_count, column_count, weighted, dty basicstat = BasicStatistics(result_options="all") - result = basicstat.fit(data, sample_weight=weights) + result = basicstat.fit(data, sample_weight=weights, queue=queue) if weighted: weighted_data = np.diag(weights) @ data @@ -165,7 +166,7 @@ def test_1d_input_on_random_data(queue, result_option, data_size, weighted, dtyp basicstat = BasicStatistics(result_options=result_option) - result = basicstat.fit(data, sample_weight=weights) + result = basicstat.fit(data, sample_weight=weights, queue=queue) res = getattr(result, result_option) if weighted: @@ -196,7 +197,7 @@ def test_basic_csr(queue, dtype): ) basicstat = BasicStatistics(result_options="mean") - result = basicstat.fit(data) + result = basicstat.fit(data, queue=queue) res_mean = result.mean gtr_mean = data.mean(axis=0) @@ -229,7 +230,7 @@ def test_options_csr(queue, option, dtype): ) basicstat = BasicStatistics(result_options=result_option) - result = basicstat.fit(data) + result = basicstat.fit(data, queue=queue) res = getattr(result, result_option) func = getattr(data, function) diff --git a/onedal/basic_statistics/tests/test_incremental_basic_statistics.py b/onedal/basic_statistics/tests/test_incremental_basic_statistics.py index c5d33f5119..4d18e42ce1 100644 --- a/onedal/basic_statistics/tests/test_incremental_basic_statistics.py +++ b/onedal/basic_statistics/tests/test_incremental_basic_statistics.py @@ -38,9 +38,9 @@ def test_multiple_options_on_gold_data(queue, weighted, dtype): incbs = IncrementalBasicStatistics() for i in range(2): if weighted: - incbs.partial_fit(X_split[i], weights_split[i]) + incbs.partial_fit(X_split[i], weights_split[i], queue=queue) else: - incbs.partial_fit(X_split[i]) + incbs.partial_fit(X_split[i], queue=queue) result = incbs.finalize_fit() @@ -85,9 +85,9 @@ def test_single_option_on_random_data( for i in range(num_batches): if weighted: - incbs.partial_fit(data_split[i], weights_split[i]) + incbs.partial_fit(data_split[i], weights_split[i], queue=queue) else: - incbs.partial_fit(data_split[i]) + incbs.partial_fit(data_split[i], queue=queue) result = incbs.finalize_fit() res = getattr(result, result_option) @@ -123,9 +123,9 @@ def test_multiple_options_on_random_data( for i in range(num_batches): if weighted: - incbs.partial_fit(data_split[i], weights_split[i]) + incbs.partial_fit(data_split[i], weights_split[i], queue=queue) else: - incbs.partial_fit(data_split[i]) + incbs.partial_fit(data_split[i], queue=queue) result = incbs.finalize_fit() res_mean, res_max, res_sum = result.mean, result.max, result.sum @@ -171,9 +171,9 @@ def test_all_option_on_random_data( for i in range(num_batches): if weighted: - incbs.partial_fit(data_split[i], weights_split[i]) + incbs.partial_fit(data_split[i], weights_split[i], queue=queue) else: - incbs.partial_fit(data_split[i]) + incbs.partial_fit(data_split[i], queue=queue) result = incbs.finalize_fit() if weighted: diff --git a/onedal/common/_backend.py b/onedal/common/_backend.py index 8a917b39d7..6fb5e9c84d 100644 --- a/onedal/common/_backend.py +++ b/onedal/common/_backend.py @@ -20,15 +20,10 @@ from onedal import Backend, _default_backend, _spmd_backend from onedal._device_offload import SyclQueueManager -from .backend_manager import BackendManager - logger = logging.getLogger(__name__) -default_manager = BackendManager(_default_backend) -spmd_manager = BackendManager(_spmd_backend) - # define types for backend functions: default, dpc, spmd -BackendType = Literal["host", "dpc", "spmd"] +BackendType = Literal["none", "host", "dpc", "spmd"] logging.basicConfig( level=logging.DEBUG, @@ -36,6 +31,47 @@ ) +class BackendManager: + def __init__(self, backend_module): + self.backend = backend_module + + def get_backend_type(self) -> BackendType: + if self.backend is None: + return "none" + if self.backend.is_spmd: + return "spmd" + if self.backend.is_dpc: + return "dpc" + return "host" + + def get_backend_component(self, module_name: str, component_name: str): + """Get a component of the backend module. + + Args: + module(str): The module to get the component from. + component: The component to get from the module. + + Returns: + The component of the module. + """ + submodules = module_name.split(".") + module = getattr(self.backend, submodules[0]) + for submodule in submodules[1:]: + module = getattr(module, submodule) + + # component can be provided like submodule.method, there can be arbitrary number of submodules + # and methods + result = module + for part in component_name.split("."): + result = getattr(result, part) + + return result + + +default_manager = BackendManager(_default_backend) +spmd_manager = BackendManager(_spmd_backend) + + class BackendFunction: """Wrapper around backend function to allow setting auxiliary information""" diff --git a/onedal/common/backend_manager.py b/onedal/common/backend_manager.py deleted file mode 100644 index ba6da992c5..0000000000 --- a/onedal/common/backend_manager.py +++ /dev/null @@ -1,56 +0,0 @@ -# ============================================================================== -# Copyright 2024 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -from typing import Literal - -BackendType = Literal["none", "host", "dpc", "spmd"] - - -class BackendManager: - def __init__(self, backend_module): - self.backend = backend_module - - def get_backend_type(self) -> BackendType: - if self.backend is None: - return "none" - if self.backend.is_spmd: - return "spmd" - if self.backend.is_dpc: - return "dpc" - return "host" - - def get_backend_component(self, module_name: str, component_name: str): - """Get a component of the backend module. - - Args: - module(str): The module to get the component from. - component: The component to get from the module. - - Returns: - The component of the module. - """ - submodules = module_name.split(".") - module = getattr(self.backend, submodules[0]) - for submodule in submodules[1:]: - module = getattr(module, submodule) - - # component can be provided like submodule.method, there can be arbitrary number of submodules - # and methods - result = module - for part in component_name.split("."): - result = getattr(result, part) - - return result diff --git a/onedal/common/tests/test_backend_manager.py b/onedal/common/tests/test_backend_manager.py deleted file mode 100644 index a03306af0b..0000000000 --- a/onedal/common/tests/test_backend_manager.py +++ /dev/null @@ -1,121 +0,0 @@ -# ============================================================================== -# Copyright 2024 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - - -import pytest - -from onedal.common.backend_manager import BackendManager - - -# Define a simple backend module for testing -class DummyBackend: - class Module: - class Submodule: - def method(self, *args, **kwargs): - return "method_result" - - def __init__(self): - self.submodule_instance = self.Submodule() - - def method(self, *args, **kwargs): - return "method_result" - - def __init__(self): - self.module_instance = self.Module() - - @property - def module(self): - return self.module_instance - - -@pytest.fixture -def backend_manager(): - backend = DummyBackend() - return BackendManager(backend) - - -def test_get_backend_component_with_method(backend_manager): - result = backend_manager.get_backend_component("module", "method") - assert result() == "method_result" - - -def test_get_backend_component_with_submodule_method(backend_manager): - result = backend_manager.get_backend_component("module.submodule_instance", "method") - assert result() == "method_result" - - -def test_get_backend_component_with_invalid_module(backend_manager): - with pytest.raises(AttributeError): - backend_manager.get_backend_component("invalid_module", "method") - - -def test_get_backend_component_with_invalid_submodule(backend_manager): - with pytest.raises(AttributeError): - backend_manager.get_backend_component("module.invalid_submodule", "method") - - -def test_get_backend_component_with_invalid_method(backend_manager): - with pytest.raises(AttributeError): - backend_manager.get_backend_component( - "module", "submodule_instance.invalid_method" - ) - - -def test_get_backend_component_with_multiple_methods(backend_manager): - class ExtendedDummyBackend(DummyBackend): - class Module(DummyBackend.Module): - class Submodule(DummyBackend.Module.Submodule): - def another_method(self, *args, **kwargs): - return "another_method_result" - - def __init__(self): - super().__init__() - self.submodule_instance = self.Submodule() - - def __init__(self): - self.module_instance = self.Module() - - backend_manager.backend = ExtendedDummyBackend() - result = backend_manager.get_backend_component( - "module.submodule_instance", "another_method" - ) - assert result() == "another_method_result" - - -def test_get_backend_component_with_deeply_nested_submodules(backend_manager): - class DeeplyNestedDummyBackend(DummyBackend): - class Module(DummyBackend.Module): - class Submodule(DummyBackend.Module.Submodule): - class DeepSubmodule: - def deep_method(self, *args, **kwargs): - return "deep_method_result" - - def __init__(self): - super().__init__() - self.deep_submodule_instance = self.DeepSubmodule() - - def __init__(self): - super().__init__() - self.submodule_instance = self.Submodule() - - def __init__(self): - self.module_instance = self.Module() - - backend_manager.backend = DeeplyNestedDummyBackend() - result = backend_manager.get_backend_component( - "module.submodule_instance.deep_submodule_instance", "deep_method" - ) - assert result() == "deep_method_result" From 44ba5e0b8d00f46e02ca9e8862572e3943f6f60a Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Fri, 29 Nov 2024 03:15:24 -0800 Subject: [PATCH 13/41] fixup delete _policy.py; fix assert_all_finite from latest main --- onedal/common/_policy.py | 55 -------------------------------------- onedal/utils/validation.py | 16 +++++++---- 2 files changed, 11 insertions(+), 60 deletions(-) delete mode 100644 onedal/common/_policy.py diff --git a/onedal/common/_policy.py b/onedal/common/_policy.py deleted file mode 100644 index 0d7d8ca6a3..0000000000 --- a/onedal/common/_policy.py +++ /dev/null @@ -1,55 +0,0 @@ -# ============================================================================== -# Copyright 2021 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -import sys - -from onedal import _backend, _is_dpc_backend - - -def _get_policy(queue, *data): - data_queue = _get_queue(*data) - if _is_dpc_backend: - if queue is None: - if data_queue is None: - return _HostInteropPolicy() - return _DataParallelInteropPolicy(data_queue) - return _DataParallelInteropPolicy(queue) - else: - if not (data_queue is None and queue is None): - raise RuntimeError( - "Operation using the requested SYCL queue requires the DPC backend" - ) - return _HostInteropPolicy() - - -def _get_queue(*data): - if len(data) > 0 and hasattr(data[0], "__sycl_usm_array_interface__"): - # Assume that all data reside on the same device - return data[0].__sycl_usm_array_interface__["syclobj"] - return None - - -class _HostInteropPolicy(_backend.host_policy): - def __init__(self): - super().__init__() - - -if _is_dpc_backend: - - class _DataParallelInteropPolicy(_backend.data_parallel_policy): - def __init__(self, queue): - self._queue = queue - super().__init__(self._queue) diff --git a/onedal/utils/validation.py b/onedal/utils/validation.py index 145e44b107..a5ab409e6b 100644 --- a/onedal/utils/validation.py +++ b/onedal/utils/validation.py @@ -21,6 +21,9 @@ import numpy as np from scipy import sparse as sp +from onedal._device_offload import supports_queue +from onedal.common._backend import BackendFunction + if np.lib.NumpyVersion(np.__version__) >= np.lib.NumpyVersion("2.0.0a0"): # numpy_version >= 2.0 from numpy.exceptions import VisibleDeprecationWarning @@ -34,8 +37,7 @@ from daal4py.sklearn.utils.validation import ( _assert_all_finite as _daal4py_assert_all_finite, ) -from onedal import _backend -from onedal.common._policy import _get_policy +from onedal import _default_backend as backend from onedal.datatypes import _convert_to_supported, to_table @@ -437,25 +439,29 @@ def _is_csr(x): def _assert_all_finite(X, allow_nan=False, input_name=""): - policy = _get_policy(None, X) - X_t = to_table(_convert_to_supported(policy, X)) + X_t = to_table(_convert_to_supported(X)) params = { "fptype": X_t.dtype, "method": "dense", "allow_nan": allow_nan, } - if not _backend.finiteness_checker.compute.compute(policy, params, X_t).finite: + backend_method = BackendFunction( + backend.finiteness_checker.compute.compute, backend, "compute", no_policy=False + ) + if not backend_method(params, X_t).finite: type_err = "infinity" if allow_nan else "NaN, infinity" padded_input_name = input_name + " " if input_name else "" msg_err = f"Input {padded_input_name}contains {type_err}." raise ValueError(msg_err) +@supports_queue def assert_all_finite( X, *, allow_nan=False, input_name="", + queue=None, ): _assert_all_finite( X.data if sp.issparse(X) else X, From 1a7dadcfef3015b6688bd52f580cbf02963ca64a Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Tue, 10 Dec 2024 01:43:31 -0800 Subject: [PATCH 14/41] remove utils/__init__.py --- onedal/basic_statistics/basic_statistics.py | 3 +- .../incremental_basic_statistics.py | 2 +- onedal/cluster/dbscan.py | 2 +- onedal/cluster/kmeans.py | 2 +- onedal/cluster/kmeans_init.py | 2 +- onedal/covariance/covariance.py | 2 +- onedal/covariance/incremental_covariance.py | 2 +- onedal/decomposition/incremental_pca.py | 2 +- .../linear_model/incremental_linear_model.py | 2 +- onedal/linear_model/linear_model.py | 2 +- onedal/primitives/kernel_functions.py | 2 +- onedal/utils/__init__.py | 49 ------------------- 12 files changed, 11 insertions(+), 61 deletions(-) delete mode 100644 onedal/utils/__init__.py diff --git a/onedal/basic_statistics/basic_statistics.py b/onedal/basic_statistics/basic_statistics.py index 6aed8ae8db..43a097b9c4 100644 --- a/onedal/basic_statistics/basic_statistics.py +++ b/onedal/basic_statistics/basic_statistics.py @@ -22,8 +22,7 @@ from ..common._backend import bind_default_backend from ..datatypes import _convert_to_supported, from_table, to_table -from ..utils import _is_csr -from ..utils.validation import _check_array +from ..utils.validation import _check_array, _is_csr class BaseBasicStatistics(metaclass=ABCMeta): diff --git a/onedal/basic_statistics/incremental_basic_statistics.py b/onedal/basic_statistics/incremental_basic_statistics.py index fed4de7037..c603e14430 100644 --- a/onedal/basic_statistics/incremental_basic_statistics.py +++ b/onedal/basic_statistics/incremental_basic_statistics.py @@ -23,7 +23,7 @@ from onedal.common._backend import bind_default_backend from ..datatypes import _convert_to_supported, from_table, to_table -from ..utils import _check_array +from ..utils.validation import _check_array from .basic_statistics import BaseBasicStatistics diff --git a/onedal/cluster/dbscan.py b/onedal/cluster/dbscan.py index af920f50f5..28cf1f50f9 100644 --- a/onedal/cluster/dbscan.py +++ b/onedal/cluster/dbscan.py @@ -24,7 +24,7 @@ from ..common._mixin import ClusterMixin from ..datatypes import _convert_to_supported, from_table, to_table -from ..utils import _check_array +from ..utils.validation import _check_array class DBSCAN(ClusterMixin): diff --git a/onedal/cluster/kmeans.py b/onedal/cluster/kmeans.py index e3c5736165..43e6bce78c 100644 --- a/onedal/cluster/kmeans.py +++ b/onedal/cluster/kmeans.py @@ -38,7 +38,7 @@ from ..common._mixin import ClusterMixin, TransformerMixin from ..datatypes import _convert_to_supported, from_table, to_table -from ..utils import _check_array, _is_arraylike_not_scalar, _is_csr +from ..utils.validation import _check_array, _is_arraylike_not_scalar, _is_csr class _BaseKMeans(TransformerMixin, ClusterMixin, ABC): diff --git a/onedal/cluster/kmeans_init.py b/onedal/cluster/kmeans_init.py index 74d20caf1a..9ebd0dd09a 100755 --- a/onedal/cluster/kmeans_init.py +++ b/onedal/cluster/kmeans_init.py @@ -23,7 +23,7 @@ from onedal.common._backend import bind_default_backend from ..datatypes import _convert_to_supported, from_table, to_table -from ..utils import _check_array +from ..utils.validation import _check_array if daal_check_version((2023, "P", 200)): diff --git a/onedal/covariance/covariance.py b/onedal/covariance/covariance.py index 0e1593e325..0e2f9966a5 100644 --- a/onedal/covariance/covariance.py +++ b/onedal/covariance/covariance.py @@ -20,7 +20,7 @@ from daal4py.sklearn._utils import daal_check_version, get_dtype from onedal._device_offload import supports_queue from onedal.common._backend import bind_default_backend -from onedal.utils import _check_array +from onedal.utils.validation import _check_array from ..common.hyperparameters import get_hyperparameters from ..datatypes import _convert_to_supported, from_table, to_table diff --git a/onedal/covariance/incremental_covariance.py b/onedal/covariance/incremental_covariance.py index 8dfa72607b..b3fc87df2c 100644 --- a/onedal/covariance/incremental_covariance.py +++ b/onedal/covariance/incremental_covariance.py @@ -22,7 +22,7 @@ from onedal.common._backend import bind_default_backend from ..datatypes import _convert_to_supported, from_table, to_table -from ..utils import _check_array +from ..utils.validation import _check_array from .covariance import BaseEmpiricalCovariance diff --git a/onedal/decomposition/incremental_pca.py b/onedal/decomposition/incremental_pca.py index 510d343ff9..89df290fcd 100644 --- a/onedal/decomposition/incremental_pca.py +++ b/onedal/decomposition/incremental_pca.py @@ -23,7 +23,7 @@ from onedal.common._backend import bind_default_backend from ..datatypes import _convert_to_supported, from_table, to_table -from ..utils import _check_array +from ..utils.validation import _check_array from .pca import BasePCA diff --git a/onedal/linear_model/incremental_linear_model.py b/onedal/linear_model/incremental_linear_model.py index ba32687581..1f3d53b5e6 100644 --- a/onedal/linear_model/incremental_linear_model.py +++ b/onedal/linear_model/incremental_linear_model.py @@ -24,7 +24,7 @@ from ..common.hyperparameters import get_hyperparameters from ..datatypes import _convert_to_supported, from_table, to_table -from ..utils import _check_X_y, _num_features +from ..utils.validation import _check_X_y, _num_features from .linear_model import BaseLinearRegression diff --git a/onedal/linear_model/linear_model.py b/onedal/linear_model/linear_model.py index e59279581e..6a5d8c3711 100755 --- a/onedal/linear_model/linear_model.py +++ b/onedal/linear_model/linear_model.py @@ -26,7 +26,7 @@ from ..common._estimator_checks import _check_is_fitted from ..common.hyperparameters import get_hyperparameters from ..datatypes import _convert_to_supported, from_table, to_table -from ..utils import _check_array, _check_n_features, _check_X_y, _num_features +from ..utils.validation import _check_array, _check_n_features, _check_X_y, _num_features class BaseLinearRegression(metaclass=ABCMeta): diff --git a/onedal/primitives/kernel_functions.py b/onedal/primitives/kernel_functions.py index 1bc37d58c6..b7832af318 100644 --- a/onedal/primitives/kernel_functions.py +++ b/onedal/primitives/kernel_functions.py @@ -21,7 +21,7 @@ from onedal.common._backend import BackendFunction from ..datatypes import _convert_to_supported, from_table, to_table -from ..utils import _check_array +from ..utils.validation import _check_array def _check_inputs(X, Y): diff --git a/onedal/utils/__init__.py b/onedal/utils/__init__.py deleted file mode 100644 index 0a1b05fbc2..0000000000 --- a/onedal/utils/__init__.py +++ /dev/null @@ -1,49 +0,0 @@ -# ============================================================================== -# Copyright 2023 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -from .validation import ( - _check_array, - _check_classification_targets, - _check_n_features, - _check_X_y, - _column_or_1d, - _is_arraylike, - _is_arraylike_not_scalar, - _is_csr, - _is_integral_float, - _is_multilabel, - _num_features, - _num_samples, - _type_of_target, - _validate_targets, -) - -__all__ = [ - "_column_or_1d", - "_validate_targets", - "_check_X_y", - "_check_array", - "_check_classification_targets", - "_type_of_target", - "_is_integral_float", - "_is_multilabel", - "_check_n_features", - "_num_features", - "_num_samples", - "_is_arraylike", - "_is_arraylike_not_scalar", - "_is_csr", -] From 81f8285ce2d403cd86a3419a8bd423f26fddb5b0 Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Tue, 10 Dec 2024 04:54:58 -0800 Subject: [PATCH 15/41] fix some errors after validation cleanup --- onedal/basic_statistics/incremental_basic_statistics.py | 2 ++ onedal/covariance/incremental_covariance.py | 2 +- onedal/decomposition/incremental_pca.py | 1 - onedal/ensemble/forest.py | 2 +- onedal/linear_model/logistic_regression.py | 2 +- onedal/neighbors/neighbors.py | 2 +- onedal/svm/svm.py | 2 +- sklearnex/cluster/k_means.py | 2 +- sklearnex/ensemble/_forest.py | 2 +- sklearnex/linear_model/linear.py | 3 +-- sklearnex/linear_model/logistic_regression.py | 2 +- sklearnex/linear_model/ridge.py | 2 +- sklearnex/neighbors/common.py | 2 +- sklearnex/svm/_common.py | 2 +- 14 files changed, 14 insertions(+), 14 deletions(-) diff --git a/onedal/basic_statistics/incremental_basic_statistics.py b/onedal/basic_statistics/incremental_basic_statistics.py index a37989fbdf..70f8e22f7a 100644 --- a/onedal/basic_statistics/incremental_basic_statistics.py +++ b/onedal/basic_statistics/incremental_basic_statistics.py @@ -138,6 +138,8 @@ def partial_fit(self, X, weights=None, queue=None): self._onedal_params, self._partial_result, X_table, weights_table ) + self._need_to_finalize = True + @supports_queue def finalize_fit(self, queue=None): """ diff --git a/onedal/covariance/incremental_covariance.py b/onedal/covariance/incremental_covariance.py index 045c365cf7..1b37a99729 100644 --- a/onedal/covariance/incremental_covariance.py +++ b/onedal/covariance/incremental_covariance.py @@ -71,7 +71,7 @@ def finalize_compute(self, params, partial_result): ... def _reset(self): self._need_to_finalize = False - self.partial_compute_result() + self._partial_result = self.partial_compute_result() def __getstate__(self): # Since finalize_fit can't be dispatched without directly provided queue diff --git a/onedal/decomposition/incremental_pca.py b/onedal/decomposition/incremental_pca.py index d110930a05..b6f464eb3b 100644 --- a/onedal/decomposition/incremental_pca.py +++ b/onedal/decomposition/incremental_pca.py @@ -114,7 +114,6 @@ def _reset(self): self._partial_result = self.partial_train_result() if hasattr(self, "components_"): del self.components_ - self._partial_result = module.partial_train_result() def __getstate__(self): # Since finalize_fit can't be dispatched without directly provided queue diff --git a/onedal/ensemble/forest.py b/onedal/ensemble/forest.py index 1ce08eb4e1..3fade910a6 100644 --- a/onedal/ensemble/forest.py +++ b/onedal/ensemble/forest.py @@ -31,7 +31,7 @@ from ..common._estimator_checks import _check_is_fitted from ..common._mixin import ClassifierMixin, RegressorMixin from ..datatypes import _convert_to_supported, from_table, to_table -from ..utils import ( +from ..utils.validation import ( _check_array, _check_n_features, _check_X_y, diff --git a/onedal/linear_model/logistic_regression.py b/onedal/linear_model/logistic_regression.py index 25683aa6bd..47396ad68b 100644 --- a/onedal/linear_model/logistic_regression.py +++ b/onedal/linear_model/logistic_regression.py @@ -26,7 +26,7 @@ from ..common._estimator_checks import _check_is_fitted from ..common._mixin import ClassifierMixin from ..datatypes import _convert_to_supported, from_table, to_table -from ..utils import ( +from ..utils.validation import ( _check_array, _check_n_features, _check_X_y, diff --git a/onedal/neighbors/neighbors.py b/onedal/neighbors/neighbors.py index 1ffcb23f0b..ecd20a7a99 100755 --- a/onedal/neighbors/neighbors.py +++ b/onedal/neighbors/neighbors.py @@ -33,7 +33,7 @@ from ..common._estimator_checks import _check_is_fitted, _is_classifier, _is_regressor from ..common._mixin import ClassifierMixin, RegressorMixin from ..datatypes import _convert_to_supported, from_table, to_table -from ..utils import ( +from ..utils.validation import ( _check_array, _check_classification_targets, _check_n_features, diff --git a/onedal/svm/svm.py b/onedal/svm/svm.py index d418876948..efe8fa8ce4 100644 --- a/onedal/svm/svm.py +++ b/onedal/svm/svm.py @@ -26,7 +26,7 @@ from ..common._estimator_checks import _check_is_fitted from ..common._mixin import ClassifierMixin, RegressorMixin from ..datatypes import _convert_to_supported, from_table, to_table -from ..utils import ( +from ..utils.validation import ( _check_array, _check_n_features, _check_X_y, diff --git a/sklearnex/cluster/k_means.py b/sklearnex/cluster/k_means.py index 4ba75ca5b8..91eeada386 100644 --- a/sklearnex/cluster/k_means.py +++ b/sklearnex/cluster/k_means.py @@ -36,7 +36,7 @@ from daal4py.sklearn._n_jobs_support import control_n_jobs from daal4py.sklearn._utils import sklearn_check_version from onedal.cluster import KMeans as onedal_KMeans - from onedal.utils import _is_csr + from onedal.utils.validation import _is_csr from .._device_offload import dispatch, wrap_output_data from .._utils import PatchingConditionsChain diff --git a/sklearnex/ensemble/_forest.py b/sklearnex/ensemble/_forest.py index 2a04962645..57bf3e08e0 100644 --- a/sklearnex/ensemble/_forest.py +++ b/sklearnex/ensemble/_forest.py @@ -56,7 +56,7 @@ from onedal.ensemble import RandomForestClassifier as onedal_RandomForestClassifier from onedal.ensemble import RandomForestRegressor as onedal_RandomForestRegressor from onedal.primitives import get_tree_state_cls, get_tree_state_reg -from onedal.utils import _num_features, _num_samples +from onedal.utils.validation import _num_features, _num_samples from sklearnex import get_hyperparameters from sklearnex._utils import register_hyperparameters diff --git a/sklearnex/linear_model/linear.py b/sklearnex/linear_model/linear.py index fb7eca8cf1..4b0a2b7454 100644 --- a/sklearnex/linear_model/linear.py +++ b/sklearnex/linear_model/linear.py @@ -15,7 +15,6 @@ # =============================================================================== import logging -from abc import ABC import numpy as np from sklearn.linear_model import LinearRegression as _sklearn_LinearRegression @@ -37,7 +36,7 @@ from onedal.common.hyperparameters import get_hyperparameters from onedal.linear_model import LinearRegression as onedal_LinearRegression -from onedal.utils import _num_features, _num_samples +from onedal.utils.validation import _num_features, _num_samples if sklearn_check_version("1.6"): from sklearn.utils.validation import validate_data diff --git a/sklearnex/linear_model/logistic_regression.py b/sklearnex/linear_model/logistic_regression.py index 01e944c74f..7af9555cf0 100644 --- a/sklearnex/linear_model/logistic_regression.py +++ b/sklearnex/linear_model/logistic_regression.py @@ -34,7 +34,7 @@ from daal4py.sklearn._utils import sklearn_check_version from daal4py.sklearn.linear_model.logistic_path import daal4py_fit, daal4py_predict from onedal.linear_model import LogisticRegression as onedal_LogisticRegression - from onedal.utils import _num_samples + from onedal.utils.validation import _num_samples from .._config import get_config from .._device_offload import dispatch, wrap_output_data diff --git a/sklearnex/linear_model/ridge.py b/sklearnex/linear_model/ridge.py index 85d6714905..be71985b1c 100644 --- a/sklearnex/linear_model/ridge.py +++ b/sklearnex/linear_model/ridge.py @@ -35,7 +35,7 @@ from sklearn.utils import check_scalar from onedal.linear_model import Ridge as onedal_Ridge - from onedal.utils import _num_features, _num_samples + from onedal.utils.validation import _num_features, _num_samples from .._device_offload import dispatch, wrap_output_data from .._utils import PatchingConditionsChain diff --git a/sklearnex/neighbors/common.py b/sklearnex/neighbors/common.py index 0ad5a62dd1..3348f06dd1 100644 --- a/sklearnex/neighbors/common.py +++ b/sklearnex/neighbors/common.py @@ -25,7 +25,7 @@ from sklearn.utils.validation import check_is_fitted from daal4py.sklearn._utils import sklearn_check_version -from onedal.utils import _check_array, _num_features, _num_samples +from onedal.utils.validation import _check_array, _num_features, _num_samples from .._utils import PatchingConditionsChain from ..utils._array_api import get_namespace diff --git a/sklearnex/svm/_common.py b/sklearnex/svm/_common.py index 4b481314ae..bd31336edb 100644 --- a/sklearnex/svm/_common.py +++ b/sklearnex/svm/_common.py @@ -26,7 +26,7 @@ from sklearn.preprocessing import LabelEncoder from daal4py.sklearn._utils import sklearn_check_version -from onedal.utils import _check_array, _check_X_y, _column_or_1d +from onedal.utils.validation import _check_array, _check_X_y, _column_or_1d from .._config import config_context, get_config from .._utils import PatchingConditionsChain From e040cdd48378ab6f5c625bdfc8845f492ecb141b Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Tue, 10 Dec 2024 08:37:54 -0800 Subject: [PATCH 16/41] compare only non-cpu devices --- onedal/_device_offload.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/onedal/_device_offload.py b/onedal/_device_offload.py index 13b385d4f9..3d83f4bcd5 100644 --- a/onedal/_device_offload.py +++ b/onedal/_device_offload.py @@ -119,9 +119,9 @@ def from_data(*data) -> Optional[SyclQueue]: global_queue = data_queue # if the data item is on device, assert it's compatible with device in global queue - data_device = data_queue.sycl_device - global_device = global_queue.sycl_device - if data_device is not None and data_device != global_device: + data_dev = data_queue.sycl_device + global_dev = global_queue.sycl_device + if (data_dev and global_dev) is not None and data_dev != global_dev: raise ValueError( "Data objects are located on different target devices or not on selected device." ) From 511b44ff9378907eeddcf600eed77e5729a19176 Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Tue, 10 Dec 2024 23:17:41 -0800 Subject: [PATCH 17/41] fix after merging main --- onedal/neighbors/neighbors.py | 1 - 1 file changed, 1 deletion(-) diff --git a/onedal/neighbors/neighbors.py b/onedal/neighbors/neighbors.py index ecd20a7a99..3d979b4001 100755 --- a/onedal/neighbors/neighbors.py +++ b/onedal/neighbors/neighbors.py @@ -639,7 +639,6 @@ def _onedal_predict(self, model, X, params): if "responses" not in params["result_option"] and gpu_device: params["result_option"] += "|responses" params["fptype"] = X.dtype - result = backend.infer(policy, params, model, to_table(X)) if gpu_device: return self.infer(params, self._onedal_model, to_table(X)) From b39b852f9a448f07e2fe9ff5353486dc29bbd930 Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Wed, 11 Dec 2024 01:00:35 -0800 Subject: [PATCH 18/41] simplify SyclQueue --- onedal/_device_offload.py | 43 +++++++++++++++++++++-------------- onedal/common/_backend.py | 2 +- onedal/neighbors/neighbors.py | 10 ++++---- 3 files changed, 32 insertions(+), 23 deletions(-) diff --git a/onedal/_device_offload.py b/onedal/_device_offload.py index 3d83f4bcd5..1ff5caa693 100644 --- a/onedal/_device_offload.py +++ b/onedal/_device_offload.py @@ -32,21 +32,19 @@ else: from onedal import _dpc_backend - SyclQueueImplementation = getattr(_dpc_backend, "SyclQueue", None) + SyclQueueImplementation = getattr(_dpc_backend, "SyclQueue", object) -class SyclQueue: +class SyclQueue(SyclQueueImplementation): def __init__(self, target=None): - if target and isinstance(target, SyclQueueImplementation): - self.implementation = target - elif target and SyclQueueImplementation is not None: - self.implementation = SyclQueueImplementation(target) + if target is None: + super().__init__() else: - self.implementation = None + super().__init__(target) @property def sycl_device(self): - return getattr(self.implementation, "sycl_device", None) + return getattr(super(), "sycl_device", None) class SyclQueueManager: @@ -67,7 +65,7 @@ def get_global_queue() -> Optional[SyclQueue]: if target == "auto": # queue will be created from the provided data to each function call - return SyclQueue(None) + return None if isinstance(target, (str, int)): q = SyclQueue(target) @@ -111,14 +109,19 @@ def from_data(*data) -> Optional[SyclQueue]: # no interface found - try next data object continue - # extract the queue, verify it aligns with the global queue + # extract the queue global_queue = SyclQueueManager.get_global_queue() - data_queue = SyclQueue(usm_iface["syclobj"]) + data_queue = usm_iface["syclobj"] + if not data_queue: + # no queue, i.e. host data, no more work to do + continue + + # update the global queue if not set if global_queue is None: SyclQueueManager.update_global_queue(data_queue) global_queue = data_queue - # if the data item is on device, assert it's compatible with device in global queue + # if either queue points to a device, assert it's always the same device data_dev = data_queue.sycl_device global_dev = global_queue.sycl_device if (data_dev and global_dev) is not None and data_dev != global_dev: @@ -260,14 +263,20 @@ def wrapper_impl(obj, *args, **kwargs): return _run_on_device(func, obj, *args, **kwargs) hostargs, hostkwargs = _get_host_inputs(*args, **kwargs) + if hostkwargs.get("queue") is None: + # no queue provided, get it from the data + data_queue = SyclQueueManager.from_data(*hostargs) + if queue_param: + # if queue_param requested, add it to the hostkwargs + hostkwargs["queue"] = data_queue + else: + # use the provided queue + data_queue = hostkwargs["queue"] + data = (*args, *kwargs.values()) - data_queue = SyclQueueManager.from_data(*data) - if queue_param and hostkwargs.get("queue") is None: - hostkwargs["queue"] = data_queue result = _run_on_device(func, obj, *hostargs, **hostkwargs) - usm_iface = getattr(data[0], "__sycl_usm_array_interface__", None) - if usm_iface is not None: + if data_queue is not None: result = _copy_to_usm(data_queue, result) if dpnp_available and isinstance(data[0], dpnp.ndarray): result = _convert_to_dpnp(result) diff --git a/onedal/common/_backend.py b/onedal/common/_backend.py index 6fb5e9c84d..a81d4a31cf 100644 --- a/onedal/common/_backend.py +++ b/onedal/common/_backend.py @@ -97,7 +97,7 @@ def __call__(self, *args: Any, **kwargs: Any) -> Any: return self.method(*args, **kwargs) # use globally configured queue (from `target_offload` configuration or provided data) - queue = getattr(SyclQueueManager.get_global_queue(), "implementation", None) + queue = SyclQueueManager.get_global_queue() if queue is not None and not (self.backend.is_dpc or self.backend.is_spmd): raise RuntimeError("Operations using queues require the DPC/SPMD backend") diff --git a/onedal/neighbors/neighbors.py b/onedal/neighbors/neighbors.py index 3d979b4001..1f7f6f8986 100755 --- a/onedal/neighbors/neighbors.py +++ b/onedal/neighbors/neighbors.py @@ -275,7 +275,7 @@ def _fit(self, X, y): _fit_y = None # global queue is set as per user configuration (`target_offload`) or from data prior to calling this internal function - queue = getattr(SyclQueueManager.get_global_queue(), "implementation") + queue = SyclQueueManager.get_global_queue() gpu_device = queue is not None and getattr(queue.sycl_device, "is_gpu", False) if _is_classifier(self) or (_is_regressor(self) and gpu_device): @@ -446,7 +446,7 @@ def _get_daal_params(self, data): def _onedal_fit(self, X, y): # global queue is set as per user configuration (`target_offload`) or from data prior to calling this internal function - queue = getattr(SyclQueueManager.get_global_queue(), "implementation") + queue = SyclQueueManager.get_global_queue() gpu_device = queue is not None and getattr(queue.sycl_device, "is_gpu", False) if self.effective_metric_ == "euclidean" and not gpu_device: params = self._get_daal_params(X) @@ -604,7 +604,7 @@ def _get_daal_params(self, data): def _onedal_fit(self, X, y): # global queue is set as per user configuration (`target_offload`) or from data prior to calling this internal function - queue = getattr(SyclQueueManager.get_global_queue(), "implementation") + queue = SyclQueueManager.get_global_queue() gpu_device = queue is not None and getattr(queue.sycl_device, "is_gpu", False) if self.effective_metric_ == "euclidean" and not gpu_device: params = self._get_daal_params(X) @@ -632,7 +632,7 @@ def _onedal_predict(self, model, X, params): return bf_knn_classification_prediction(**params).compute(X, model) # global queue is set as per user configuration (`target_offload`) or from data prior to calling this internal function - queue = getattr(SyclQueueManager.get_global_queue(), "implementation") + queue = SyclQueueManager.get_global_queue() gpu_device = queue is not None and getattr(queue.sycl_device, "is_gpu", False) X = _convert_to_supported(X) @@ -754,7 +754,7 @@ def _get_daal_params(self, data): def _onedal_fit(self, X, y): # global queue is set as per user configuration (`target_offload`) or from data prior to calling this internal function - queue = getattr(SyclQueueManager.get_global_queue(), "implementation") + queue = SyclQueueManager.get_global_queue() gpu_device = queue is not None and getattr(queue.sycl_device, "is_gpu", False) if self.effective_metric_ == "euclidean" and not gpu_device: params = self._get_daal_params(X) From a5cb8191c85604b97864a7566517212919b3320e Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Wed, 11 Dec 2024 02:08:16 -0800 Subject: [PATCH 19/41] further simplify and align SyclQueue handling --- onedal/_device_offload.py | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/onedal/_device_offload.py b/onedal/_device_offload.py index 1ff5caa693..d7ad7252db 100644 --- a/onedal/_device_offload.py +++ b/onedal/_device_offload.py @@ -42,6 +42,21 @@ def __init__(self, target=None): else: super().__init__(target) + @staticmethod + def from_implementation(queue): + # extract the device descriptor and create a new queue + return SyclQueue(queue.sycl_device.filter_string) + + @staticmethod + def from_implementation_or_device_selector(value): + if value is None: + return SyclQueue() + if isinstance(value, SyclQueueImplementation): + return SyclQueue.from_implementation(value) + if isinstance(value, (str, int)): + return SyclQueue(value) + raise ValueError(f"Invalid queue or device selector {value=}.") + @property def sycl_device(self): return getattr(super(), "sycl_device", None) @@ -62,16 +77,11 @@ def get_global_queue() -> Optional[SyclQueue]: return queue target = _get_config()["target_offload"] - if target == "auto": # queue will be created from the provided data to each function call return None - if isinstance(target, (str, int)): - q = SyclQueue(target) - else: - q = target - + q = SyclQueue.from_implementation_or_device_selector(target) SyclQueueManager.update_global_queue(q) return q @@ -83,9 +93,7 @@ def remove_global_queue(): @staticmethod def update_global_queue(queue): """Update the global queue.""" - if queue is not None and not isinstance(queue, SyclQueue): - # could be a device ID or selector string - queue = SyclQueue(queue) + queue = SyclQueue.from_implementation_or_device_selector(queue) SyclQueueManager.__global_queue = queue @staticmethod From 891700fb3c7b8cc907ee21330e5c6e2c2ebaf247 Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Wed, 11 Dec 2024 04:56:45 -0800 Subject: [PATCH 20/41] fix missing return --- onedal/common/_backend.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onedal/common/_backend.py b/onedal/common/_backend.py index a81d4a31cf..782ca90727 100644 --- a/onedal/common/_backend.py +++ b/onedal/common/_backend.py @@ -226,6 +226,6 @@ def decorator(method: Callable[..., Any]): ) return method - __decorator(method, spmd_manager, module_name, lookup_name, no_policy) + return __decorator(method, spmd_manager, module_name, lookup_name, no_policy) return decorator From a23ea0d2039173ffd0bc54d0c2b5c91edd4444be Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Wed, 11 Dec 2024 06:28:37 -0800 Subject: [PATCH 21/41] remove intermediate SyclQueue class --- onedal/_device_offload.py | 52 ++++++++++++++------------------------- 1 file changed, 19 insertions(+), 33 deletions(-) diff --git a/onedal/_device_offload.py b/onedal/_device_offload.py index d7ad7252db..8398799be0 100644 --- a/onedal/_device_offload.py +++ b/onedal/_device_offload.py @@ -26,40 +26,13 @@ from .utils._dpep_helpers import dpctl_available, dpnp_available if dpctl_available: - from dpctl import SyclQueue as SyclQueueImplementation + from dpctl import SyclQueue from dpctl.memory import MemoryUSMDevice, as_usm_memory from dpctl.tensor import usm_ndarray else: from onedal import _dpc_backend - SyclQueueImplementation = getattr(_dpc_backend, "SyclQueue", object) - - -class SyclQueue(SyclQueueImplementation): - def __init__(self, target=None): - if target is None: - super().__init__() - else: - super().__init__(target) - - @staticmethod - def from_implementation(queue): - # extract the device descriptor and create a new queue - return SyclQueue(queue.sycl_device.filter_string) - - @staticmethod - def from_implementation_or_device_selector(value): - if value is None: - return SyclQueue() - if isinstance(value, SyclQueueImplementation): - return SyclQueue.from_implementation(value) - if isinstance(value, (str, int)): - return SyclQueue(value) - raise ValueError(f"Invalid queue or device selector {value=}.") - - @property - def sycl_device(self): - return getattr(super(), "sycl_device", None) + SyclQueue = getattr(_dpc_backend, "SyclQueue", None) class SyclQueueManager: @@ -69,7 +42,20 @@ class SyclQueueManager: __global_queue = None @staticmethod - def get_global_queue() -> Optional[SyclQueue]: + def __create_sycl_queue(target): + if SyclQueue is None: + # we don't have SyclQueue support + return None + if target is None: + return SyclQueue() + if isinstance(target, SyclQueue): + return target + if isinstance(target, (str, int)): + return SyclQueue(target) + raise ValueError(f"Invalid queue or device selector {target=}.") + + @staticmethod + def get_global_queue(): """Get the global queue. Retrieve it from the config if not set.""" if (queue := SyclQueueManager.__global_queue) is not None: if not isinstance(queue, SyclQueue): @@ -81,7 +67,7 @@ def get_global_queue() -> Optional[SyclQueue]: # queue will be created from the provided data to each function call return None - q = SyclQueue.from_implementation_or_device_selector(target) + q = SyclQueueManager.__create_sycl_queue(target) SyclQueueManager.update_global_queue(q) return q @@ -93,11 +79,11 @@ def remove_global_queue(): @staticmethod def update_global_queue(queue): """Update the global queue.""" - queue = SyclQueue.from_implementation_or_device_selector(queue) + queue = SyclQueueManager.__create_sycl_queue(queue) SyclQueueManager.__global_queue = queue @staticmethod - def from_data(*data) -> Optional[SyclQueue]: + def from_data(*data): """Extract the queue from provided data. This updates the global queue as well.""" for item in data: # iterate through all data objects, extract the queue, and verify that all data objects are on the same device From c7eef38d1d402b4299bb8536557bb55e6b10f93c Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Thu, 12 Dec 2024 01:23:02 -0800 Subject: [PATCH 22/41] introduce manage_global_queue context manager --- onedal/_device_offload.py | 44 ++++++++-- onedal/datatypes/_data_conversion.py | 2 +- sklearnex/_device_offload.py | 116 +++++++++++++-------------- 3 files changed, 97 insertions(+), 65 deletions(-) diff --git a/onedal/_device_offload.py b/onedal/_device_offload.py index 8398799be0..b7495e5c0b 100644 --- a/onedal/_device_offload.py +++ b/onedal/_device_offload.py @@ -15,6 +15,7 @@ # ============================================================================== from collections.abc import Iterable +from contextlib import contextmanager from functools import wraps from typing import Optional @@ -47,7 +48,7 @@ def __create_sycl_queue(target): # we don't have SyclQueue support return None if target is None: - return SyclQueue() + return None if isinstance(target, SyclQueue): return target if isinstance(target, (str, int)): @@ -126,6 +127,38 @@ def from_data(*data): # after we went through the data, global queue is updated and verified (if any queue found) return SyclQueueManager.get_global_queue() + @staticmethod + @contextmanager + def manage_global_queue(queue, *args): + """ + Context manager to manage the global SyclQueue. + + This context manager updates the global queue with the provided queue, + verifies that all data objects are on the same device, and restores the + original queue after work is done. + Note: For most applications, the original queue should be `None`, but + if there are nested calls to `manage_global_queue()`, it is + important to restore the outer queue, rather than setting it to + `None`. + + Parameters: + queue (SyclQueue or None): The queue to set as the global queue. If None, + the global queue will be determined from the provided data. + *args: Additional data objects to verify their device placement. + + Yields: + SyclQueue: The global queue after verification. + """ + original_queue = SyclQueueManager.get_global_queue() + try: + # update the global queue with what is provided, it can be None, then we will get it from provided data + SyclQueueManager.update_global_queue(queue) + # find the queues in data using SyclQueueManager to verify that all data objects are on the same device + yield SyclQueueManager.from_data(*args) + finally: + # restore the original queue + SyclQueueManager.update_global_queue(original_queue) + def supports_queue(func): """ @@ -138,11 +171,10 @@ def supports_queue(func): @wraps(func) def wrapper(self, *args, **kwargs): queue = kwargs.get("queue", None) - # update the global queue with what is provided, it can be None, then we will get it from provided data - SyclQueueManager.update_global_queue(queue) - # find the queues in data using SyclQueueManager to verify that all data objects are on the same device - kwargs["queue"] = SyclQueueManager.from_data(*args) - return func(self, *args, **kwargs) + with SyclQueueManager.manage_global_queue(queue, *args) as queue: + kwargs["queue"] = queue + result = func(self, *args, **kwargs) + return result return wrapper diff --git a/onedal/datatypes/_data_conversion.py b/onedal/datatypes/_data_conversion.py index 82dfe6928f..62009564ef 100644 --- a/onedal/datatypes/_data_conversion.py +++ b/onedal/datatypes/_data_conversion.py @@ -96,7 +96,7 @@ def convert_or_pass(x): return x # find the device we're running on - queue = SyclQueueManager.from_data(data) + queue = SyclQueueManager.from_data(*data) device = queue.sycl_device if queue else None if device and not device.has_aspect_fp64: diff --git a/sklearnex/_device_offload.py b/sklearnex/_device_offload.py index 39ca30e0a1..094542cea7 100644 --- a/sklearnex/_device_offload.py +++ b/sklearnex/_device_offload.py @@ -28,71 +28,71 @@ def _get_backend(obj, method_name, *data): - queue = SyclQueueManager.get_global_queue() - cpu_device = queue is None or getattr(queue.sycl_device, "is_cpu", True) - gpu_device = queue is not None and getattr(queue.sycl_device, "is_gpu", False) - - if cpu_device: - patching_status = obj._onedal_cpu_supported(method_name, *data) - if patching_status.get_status(): - return "onedal", patching_status - else: - return "sklearn", patching_status - - allow_fallback_to_host = get_config()["allow_fallback_to_host"] - - if gpu_device: - patching_status = obj._onedal_gpu_supported(method_name, *data) - if patching_status.get_status(): - return "onedal", patching_status - else: - SyclQueueManager.remove_global_queue() - if allow_fallback_to_host: - patching_status = obj._onedal_cpu_supported(method_name, *data) - if patching_status.get_status(): - return "onedal", patching_status - else: - return "sklearn", patching_status + with SyclQueueManager.manage_global_queue(None, *data) as queue: + cpu_device = queue is None or getattr(queue.sycl_device, "is_cpu", True) + gpu_device = queue is not None and getattr(queue.sycl_device, "is_gpu", False) + + if cpu_device: + patching_status = obj._onedal_cpu_supported(method_name, *data) + if patching_status.get_status(): + return "onedal", patching_status else: return "sklearn", patching_status + allow_fallback_to_host = get_config()["allow_fallback_to_host"] + + if gpu_device: + patching_status = obj._onedal_gpu_supported(method_name, *data) + if patching_status.get_status(): + return "onedal", patching_status + else: + SyclQueueManager.remove_global_queue() + if allow_fallback_to_host: + patching_status = obj._onedal_cpu_supported(method_name, *data) + if patching_status.get_status(): + return "onedal", patching_status + else: + return "sklearn", patching_status + else: + return "sklearn", patching_status + raise RuntimeError("Device support is not implemented") def dispatch(obj, method_name, branches, *args, **kwargs): - queue = getattr(SyclQueueManager.get_global_queue(), "implementation", None) - has_usm_data_for_args, hostargs = _transfer_to_host(*args) - has_usm_data_for_kwargs, hostvalues = _transfer_to_host(*kwargs.values()) - hostkwargs = dict(zip(kwargs.keys(), hostvalues)) - - backend, patching_status = _get_backend(obj, method_name, *hostargs) - has_usm_data = has_usm_data_for_args or has_usm_data_for_kwargs - if backend == "onedal": - # Host args only used before onedal backend call. - # Device will be offloaded when onedal backend will be called. - patching_status.write_log(queue=queue, transferred_to_host=False) - return branches[backend](obj, *hostargs, **hostkwargs, queue=queue) - if backend == "sklearn": - if ( - "array_api_dispatch" in get_config() - and get_config()["array_api_dispatch"] - and "array_api_support" in obj._get_tags() - and obj._get_tags()["array_api_support"] - and not has_usm_data - ): - # USM ndarrays are also excluded for the fallback Array API. Currently, DPNP.ndarray is - # not compliant with the Array API standard, and DPCTL usm_ndarray Array API is compliant, - # except for the linalg module. There is no guarantee that stock scikit-learn will - # work with such input data. The condition will be updated after DPNP.ndarray and - # DPCTL usm_ndarray enabling for conformance testing and these arrays supportance - # of the fallback cases. - # If `array_api_dispatch` enabled and array api is supported for the stock scikit-learn, - # then raw inputs are used for the fallback. - patching_status.write_log(transferred_to_host=False) - return branches[backend](obj, *args, **kwargs) - else: - patching_status.write_log() - return branches[backend](obj, *hostargs, **hostkwargs) + with SyclQueueManager.manage_global_queue(None, *args) as queue: + has_usm_data_for_args, hostargs = _transfer_to_host(*args) + has_usm_data_for_kwargs, hostvalues = _transfer_to_host(*kwargs.values()) + hostkwargs = dict(zip(kwargs.keys(), hostvalues)) + + backend, patching_status = _get_backend(obj, method_name, *hostargs) + has_usm_data = has_usm_data_for_args or has_usm_data_for_kwargs + if backend == "onedal": + # Host args only used before onedal backend call. + # Device will be offloaded when onedal backend will be called. + patching_status.write_log(queue=queue, transferred_to_host=False) + return branches[backend](obj, *hostargs, **hostkwargs, queue=queue) + if backend == "sklearn": + if ( + "array_api_dispatch" in get_config() + and get_config()["array_api_dispatch"] + and "array_api_support" in obj._get_tags() + and obj._get_tags()["array_api_support"] + and not has_usm_data + ): + # USM ndarrays are also excluded for the fallback Array API. Currently, DPNP.ndarray is + # not compliant with the Array API standard, and DPCTL usm_ndarray Array API is compliant, + # except for the linalg module. There is no guarantee that stock scikit-learn will + # work with such input data. The condition will be updated after DPNP.ndarray and + # DPCTL usm_ndarray enabling for conformance testing and these arrays supportance + # of the fallback cases. + # If `array_api_dispatch` enabled and array api is supported for the stock scikit-learn, + # then raw inputs are used for the fallback. + patching_status.write_log(transferred_to_host=False) + return branches[backend](obj, *args, **kwargs) + else: + patching_status.write_log() + return branches[backend](obj, *hostargs, **hostkwargs) raise RuntimeError( f"Undefined backend {backend} in " f"{obj.__class__.__name__}.{method_name}" ) From 50f6d1b3890d4a07e76262ec56cf70599cc5dc4c Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Thu, 12 Dec 2024 05:06:39 -0800 Subject: [PATCH 23/41] bring back underscore methods --- onedal/cluster/kmeans_init.py | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/onedal/cluster/kmeans_init.py b/onedal/cluster/kmeans_init.py index 9ebd0dd09a..e455cde2b4 100755 --- a/onedal/cluster/kmeans_init.py +++ b/onedal/cluster/kmeans_init.py @@ -15,12 +15,10 @@ # ============================================================================== import numpy as np -from scipy.sparse import issparse -from sklearn.utils import check_random_state - from daal4py.sklearn._utils import daal_check_version, get_dtype from onedal._device_offload import supports_queue from onedal.common._backend import bind_default_backend +from sklearn.utils import check_random_state from ..datatypes import _convert_to_supported, from_table, to_table from ..utils.validation import _check_array @@ -75,18 +73,28 @@ def _get_params_and_input(self, X): params = self._get_onedal_params(dtype) return (params, to_table(X), dtype) - def compute(self, X, queue=None): + def _compute_raw(self, X_table, dtype=np.float32): + params = self._get_onedal_params(dtype) + + result = self.backend_compute(params, X_table) + + return result.centroids + + def _compute(self, X): + # oneDAL KMeans Init for sparse data does not have GPU support _, X_table, dtype = self._get_params_and_input(X) - centroids = self.compute_raw(X_table, dtype, queue=queue) + centroids = self._compute_raw(X_table, dtype) return from_table(centroids) @supports_queue def compute_raw(self, X_table, dtype=np.float32, queue=None): - params = self._get_onedal_params(dtype) - result = self.backend_compute(params, X_table) - return result.centroids + return self._compute_raw(X_table, dtype) + + @supports_queue + def compute(self, X, queue=None): + return self._compute(X) def kmeans_plusplus( X, From d5e94251fd68f0d1359d884976e6e75e770e1449 Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Thu, 12 Dec 2024 05:57:11 -0800 Subject: [PATCH 24/41] kmeans init compute_raw does not support queue --- onedal/cluster/kmeans_init.py | 6 ------ onedal/common/_backend.py | 4 ++++ 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/onedal/cluster/kmeans_init.py b/onedal/cluster/kmeans_init.py index e455cde2b4..523891a86a 100755 --- a/onedal/cluster/kmeans_init.py +++ b/onedal/cluster/kmeans_init.py @@ -75,20 +75,14 @@ def _get_params_and_input(self, X): def _compute_raw(self, X_table, dtype=np.float32): params = self._get_onedal_params(dtype) - result = self.backend_compute(params, X_table) - return result.centroids def _compute(self, X): - # oneDAL KMeans Init for sparse data does not have GPU support _, X_table, dtype = self._get_params_and_input(X) - centroids = self._compute_raw(X_table, dtype) - return from_table(centroids) - @supports_queue def compute_raw(self, X_table, dtype=np.float32, queue=None): return self._compute_raw(X_table, dtype) diff --git a/onedal/common/_backend.py b/onedal/common/_backend.py index 782ca90727..88e4a14e86 100644 --- a/onedal/common/_backend.py +++ b/onedal/common/_backend.py @@ -112,6 +112,10 @@ def __call__(self, *args: Any, **kwargs: Any) -> Any: else: policy = self.backend.host_policy() + logger.debug( + f"Dispatching function '{self.name}' with policy {policy} to {self.backend}" + ) + # dispatch to backend function return self.method(policy, *args, **kwargs) From 35a344c831f7e18da1279eb42bd2fcd65b30a52c Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Thu, 12 Dec 2024 10:57:14 -0800 Subject: [PATCH 25/41] cleanup @support_input_format --- onedal/_device_offload.py | 92 +++++++------------ .../incremental_basic_statistics.py | 13 ++- onedal/common/_backend.py | 3 + onedal/covariance/incremental_covariance.py | 15 +-- onedal/datatypes/_data_conversion.py | 5 +- onedal/decomposition/incremental_pca.py | 13 ++- .../linear_model/incremental_linear_model.py | 36 +++++--- .../spmd/basic_statistics/basic_statistics.py | 4 +- onedal/spmd/cluster/kmeans.py | 8 +- onedal/spmd/covariance/covariance.py | 2 +- onedal/spmd/decomposition/pca.py | 2 +- onedal/spmd/linear_model/linear_model.py | 4 +- .../spmd/linear_model/logistic_regression.py | 8 +- onedal/spmd/neighbors/neighbors.py | 14 +-- .../incremental_basic_statistics.py | 13 ++- .../covariance/incremental_covariance.py | 17 ++-- .../tests/test_incremental_covariance.py | 13 ++- sklearnex/linear_model/coordinate_descent.py | 12 +-- sklearnex/linear_model/incremental_linear.py | 15 ++- sklearnex/linear_model/ridge.py | 9 +- sklearnex/manifold/t_sne.py | 4 +- sklearnex/metrics/pairwise.py | 4 +- sklearnex/metrics/ranking.py | 2 +- sklearnex/model_selection/split.py | 4 +- .../preview/decomposition/incremental_pca.py | 13 ++- .../tests/test_basic_statistics_spmd.py | 4 +- .../spmd/cluster/tests/test_kmeans_spmd.py | 1 - 27 files changed, 158 insertions(+), 172 deletions(-) diff --git a/onedal/_device_offload.py b/onedal/_device_offload.py index b7495e5c0b..019652eca9 100644 --- a/onedal/_device_offload.py +++ b/onedal/_device_offload.py @@ -14,10 +14,10 @@ # limitations under the License. # ============================================================================== +import inspect from collections.abc import Iterable from contextlib import contextmanager from functools import wraps -from typing import Optional import numpy as np from sklearn import get_config @@ -261,74 +261,50 @@ def _get_host_inputs(*args, **kwargs): return hostargs, hostkwargs -def _run_on_device(func, obj=None, *args, **kwargs): - if obj is not None: - return func(obj, *args, **kwargs) - return func(*args, **kwargs) - - -def support_input_format(freefunc=False, queue_param=True): +def support_input_format(func): """ Converts and moves the output arrays of the decorated function to match the input array type and device. Puts SYCLQueue from data to decorated function arguments. + """ - Parameters - ---------- - freefunc (bool) : Set to True if decorates free function. - queue_param (bool) : Set to False if the decorated function has no `queue` parameter + def invoke_func(self_or_None, *args, **kwargs): + if self_or_None is None: + return func(*args, **kwargs) + else: + return func(self_or_None, *args, **kwargs) - Notes - ----- - Queue will not be changed if provided explicitly. - """ + def wrapper_impl(*args, **kwargs): + # remove self from args if it is a class method + if inspect.isfunction(func) and "." in func.__qualname__: + self = args[0] + args = args[1:] + else: + self = None - def decorator(func): - def wrapper_impl(obj, *args, **kwargs): - if len(args) == 0 and len(kwargs) == 0: - return _run_on_device(func, obj, *args, **kwargs) + if len(args) == 0 and len(kwargs) == 0: + return invoke_func(self, *args, **kwargs) + data = (*args, *kwargs.values()) + # get and set the global queue from the kwarg or data + with SyclQueueManager.manage_global_queue(kwargs.get("queue"), *args) as queue: hostargs, hostkwargs = _get_host_inputs(*args, **kwargs) - if hostkwargs.get("queue") is None: - # no queue provided, get it from the data - data_queue = SyclQueueManager.from_data(*hostargs) - if queue_param: - # if queue_param requested, add it to the hostkwargs - hostkwargs["queue"] = data_queue - else: - # use the provided queue - data_queue = hostkwargs["queue"] - - data = (*args, *kwargs.values()) - result = _run_on_device(func, obj, *hostargs, **hostkwargs) - - if data_queue is not None: - result = _copy_to_usm(data_queue, result) + if "queue" in inspect.signature(func).parameters: + # set the queue if it's expected by func + hostkwargs["queue"] = queue + result = invoke_func(self, *hostargs, **hostkwargs) + + if queue is not None: + result = _copy_to_usm(queue, result) if dpnp_available and isinstance(data[0], dpnp.ndarray): result = _convert_to_dpnp(result) return result - if not get_config().get("transform_output"): - input_array_api = getattr(data[0], "__array_namespace__", lambda: None)() - if input_array_api: - input_array_api_device = data[0].device - result = _asarray( - result, input_array_api, device=input_array_api_device - ) - return result - - if freefunc: - - @wraps(func) - def wrapper_free(*args, **kwargs): - return wrapper_impl(None, *args, **kwargs) - - return wrapper_free - - @wraps(func) - def wrapper_with_self(self, *args, **kwargs): - return wrapper_impl(self, *args, **kwargs) - - return wrapper_with_self + if not get_config().get("transform_output"): + input_array_api = getattr(data[0], "__array_namespace__", lambda: None)() + if input_array_api: + input_array_api_device = data[0].device + result = _asarray(result, input_array_api, device=input_array_api_device) + return result - return decorator + return wrapper_impl diff --git a/onedal/basic_statistics/incremental_basic_statistics.py b/onedal/basic_statistics/incremental_basic_statistics.py index 70f8e22f7a..173775ab1d 100644 --- a/onedal/basic_statistics/incremental_basic_statistics.py +++ b/onedal/basic_statistics/incremental_basic_statistics.py @@ -17,9 +17,8 @@ from abc import abstractmethod import numpy as np - from daal4py.sklearn._utils import get_dtype -from onedal._device_offload import supports_queue +from onedal._device_offload import SyclQueueManager, supports_queue from onedal.common._backend import bind_default_backend from ..datatypes import _convert_to_supported, from_table, to_table @@ -72,6 +71,7 @@ class IncrementalBasicStatistics(BaseBasicStatistics): def __init__(self, result_options="all"): super().__init__(result_options, algorithm="by_default") self._reset() + self._queue = None @bind_default_backend("basic_statistics") def partial_compute_result(self): ... @@ -84,6 +84,7 @@ def finalize_compute(self, *args, **kwargs): ... def _reset(self): self._need_to_finalize = False + self._queue = None # get the _partial_result pointer from backend self._partial_result = self.partial_compute_result() @@ -97,6 +98,7 @@ def __getstate__(self): return data + @supports_queue def partial_fit(self, X, weights=None, queue=None): """ Computes partial data for basic statistics @@ -139,9 +141,9 @@ def partial_fit(self, X, weights=None, queue=None): ) self._need_to_finalize = True + self._queue = queue - @supports_queue - def finalize_fit(self, queue=None): + def finalize_fit(self): """ Finalizes basic statistics computation and obtains result attributes from the current `_partial_result`. @@ -157,7 +159,8 @@ def finalize_fit(self, queue=None): Returns the instance itself. """ if self._need_to_finalize: - result = self.finalize_compute(self._onedal_params, self._partial_result) + with SyclQueueManager.manage_global_queue(self._queue): + result = self.finalize_compute(self._onedal_params, self._partial_result) options = self._get_result_options(self.options).split("|") for opt in options: diff --git a/onedal/common/_backend.py b/onedal/common/_backend.py index 88e4a14e86..61e72c4eee 100644 --- a/onedal/common/_backend.py +++ b/onedal/common/_backend.py @@ -102,6 +102,9 @@ def __call__(self, *args: Any, **kwargs: Any) -> Any: if queue is not None and not (self.backend.is_dpc or self.backend.is_spmd): raise RuntimeError("Operations using queues require the DPC/SPMD backend") + if self.backend.is_spmd and queue is None: + raise RuntimeError("Executing functions from SPMD backend requires a queue") + # craft the correct policy including the device queue if queue is None: policy = self.backend.host_policy() diff --git a/onedal/covariance/incremental_covariance.py b/onedal/covariance/incremental_covariance.py index 1b37a99729..47de26a21e 100644 --- a/onedal/covariance/incremental_covariance.py +++ b/onedal/covariance/incremental_covariance.py @@ -13,12 +13,10 @@ # See the License for the specific language governing permissions and # limitations under the License. # =============================================================================== -from abc import abstractmethod import numpy as np - from daal4py.sklearn._utils import daal_check_version, get_dtype -from onedal._device_offload import supports_queue +from onedal._device_offload import SyclQueueManager, supports_queue from onedal.common._backend import bind_default_backend from ..datatypes import _convert_to_supported, from_table, to_table @@ -59,6 +57,7 @@ class IncrementalEmpiricalCovariance(BaseEmpiricalCovariance): def __init__(self, method="dense", bias=False, assume_centered=False): super().__init__(method, bias, assume_centered) self._reset() + self._queue = None @bind_default_backend("covariance") def partial_compute(self, params, partial_result, X_table): ... @@ -71,6 +70,7 @@ def finalize_compute(self, params, partial_result): ... def _reset(self): self._need_to_finalize = False + self._queue = None self._partial_result = self.partial_compute_result() def __getstate__(self): @@ -84,6 +84,7 @@ def __getstate__(self): return data + @supports_queue def partial_fit(self, X, y=None, queue=None): """ Computes partial data for the covariance matrix @@ -117,9 +118,10 @@ def partial_fit(self, X, y=None, queue=None): table_X = to_table(X) self._partial_result = self.partial_compute(params, self._partial_result, table_X) self._need_to_finalize = True + # store the queue for when we finalize + self._queue = queue - @supports_queue - def finalize_fit(self, queue=None): + def finalize_fit(self): """ Finalizes covariance matrix and obtains `covariance_` and `location_` attributes from the current `_partial_result`. @@ -136,8 +138,9 @@ def finalize_fit(self, queue=None): """ if self._need_to_finalize: params = self._get_onedal_params(self._dtype) + with SyclQueueManager.manage_global_queue(self._queue): + result = self.finalize_compute(params, self._partial_result) - result = self.finalize_compute(params, self._partial_result) if daal_check_version((2024, "P", 1)) or (not self.bias): self.covariance_ = from_table(result.cov_matrix) else: diff --git a/onedal/datatypes/_data_conversion.py b/onedal/datatypes/_data_conversion.py index 62009564ef..58835c609e 100644 --- a/onedal/datatypes/_data_conversion.py +++ b/onedal/datatypes/_data_conversion.py @@ -17,7 +17,6 @@ import warnings import numpy as np - from onedal import _default_backend as backend from onedal._device_offload import SyclQueueManager @@ -96,8 +95,8 @@ def convert_or_pass(x): return x # find the device we're running on - queue = SyclQueueManager.from_data(*data) - device = queue.sycl_device if queue else None + with SyclQueueManager.manage_global_queue(None, *data) as queue: + device = queue.sycl_device if queue else None if device and not device.has_aspect_fp64: return _apply_and_pass(convert_or_pass, *data) diff --git a/onedal/decomposition/incremental_pca.py b/onedal/decomposition/incremental_pca.py index b6f464eb3b..4ccb2cff00 100644 --- a/onedal/decomposition/incremental_pca.py +++ b/onedal/decomposition/incremental_pca.py @@ -15,9 +15,8 @@ # ============================================================================== import numpy as np - from daal4py.sklearn._utils import get_dtype -from onedal._device_offload import supports_queue +from onedal._device_offload import SyclQueueManager, supports_queue from onedal.common._backend import bind_default_backend from ..datatypes import _convert_to_supported, from_table, to_table @@ -98,6 +97,7 @@ def __init__( self.method = method self.is_deterministic = is_deterministic self.whiten = whiten + self._queue = None self._reset() @bind_default_backend("decomposition.dim_reduction") @@ -111,6 +111,7 @@ def partial_train_result(self): ... def _reset(self): self._need_to_finalize = False + self._queue = None self._partial_result = self.partial_train_result() if hasattr(self, "components_"): del self.components_ @@ -174,10 +175,10 @@ def partial_fit(self, X, queue=None): self._params, self._partial_result, X_table ) self._need_to_finalize = True + self._queue = queue return self - @supports_queue - def finalize_fit(self, queue=None): + def finalize_fit(self): """ Finalizes principal components computation and obtains resulting attributes from the current `_partial_result`. @@ -193,7 +194,8 @@ def finalize_fit(self, queue=None): Returns the instance itself. """ if self._need_to_finalize: - result = self.finalize_train(self._params, self._partial_result) + with SyclQueueManager.manage_global_queue(self._queue): + result = self.finalize_train(self._params, self._partial_result) self.mean_ = from_table(result.means).ravel() self.var_ = from_table(result.variances).ravel() self.components_ = from_table(result.eigenvectors) @@ -210,5 +212,6 @@ def finalize_fit(self, queue=None): self.n_components_, min(self.n_samples_seen_, self.n_features_in_) ) self._need_to_finalize = False + self._queue = None return self diff --git a/onedal/linear_model/incremental_linear_model.py b/onedal/linear_model/incremental_linear_model.py index 1f3d53b5e6..36d38ba1e8 100644 --- a/onedal/linear_model/incremental_linear_model.py +++ b/onedal/linear_model/incremental_linear_model.py @@ -17,9 +17,8 @@ from abc import abstractmethod import numpy as np - from daal4py.sklearn._utils import get_dtype -from onedal._device_offload import supports_queue +from onedal._device_offload import SyclQueueManager, supports_queue from onedal.common._backend import bind_default_backend from ..common.hyperparameters import get_hyperparameters @@ -48,6 +47,7 @@ class IncrementalLinearRegression(BaseLinearRegression): def __init__(self, fit_intercept=True, copy_X=False, algorithm="norm_eq"): super().__init__(fit_intercept=fit_intercept, copy_X=copy_X, algorithm=algorithm) + self._queue = None self._reset() @bind_default_backend("linear_model.regression") @@ -61,6 +61,7 @@ def finalize_train(self, *args, **kwargs): ... def _reset(self): # Get the pointer to partial_result from backend + self._queue = None self._partial_result = self.partial_train_result() @supports_queue @@ -108,9 +109,9 @@ def partial_fit(self, X, y, queue=None): self._partial_result = self.partial_train( self._params, self._partial_result, X_table, y_table ) + self._queue = queue - @supports_queue - def finalize_fit(self, queue=None): + def finalize_fit(self): """ Finalizes linear regression computation and obtains coefficients from the current `_partial_result`. @@ -127,12 +128,13 @@ def finalize_fit(self, queue=None): """ hparams = get_hyperparameters("linear_regression", "train") - if hparams is not None and not hparams.is_default: - result = self.finalize_train( - self._params, hparams.backend, self._partial_result - ) - else: - result = self.finalize_train(self._params, self._partial_result) + with SyclQueueManager.manage_global_queue(self._queue): + if hparams is not None and not hparams.is_default: + result = self.finalize_train( + self._params, hparams.backend, self._partial_result + ) + else: + result = self.finalize_train(self._params, self._partial_result) self._onedal_model = result.model @@ -142,6 +144,8 @@ def finalize_fit(self, queue=None): packed_coefficients[:, 0].squeeze(), ) + self._queue = None + return self @@ -172,9 +176,11 @@ def __init__(self, alpha=1.0, fit_intercept=True, copy_X=False, algorithm="norm_ super().__init__( fit_intercept=fit_intercept, alpha=alpha, copy_X=copy_X, algorithm=algorithm ) + self._queue = None self._reset() def _reset(self): + self._queue = None self._partial_result = self.partial_train_result() @bind_default_backend("linear_model.regression") @@ -232,8 +238,9 @@ def partial_fit(self, X, y, queue=None): self._params, self._partial_result, X_table, y_table ) - @supports_queue - def finalize_fit(self, queue=None): + self._queue = queue + + def finalize_fit(self): """ Finalizes ridge regression computation and obtains coefficients from the current `_partial_result`. @@ -248,7 +255,8 @@ def finalize_fit(self, queue=None): self : object Returns the instance itself. """ - result = self.finalize_train(self._params, self._partial_result) + with SyclQueueManager.manage_global_queue(self._queue): + result = self.finalize_train(self._params, self._partial_result) self._onedal_model = result.model @@ -258,4 +266,6 @@ def finalize_fit(self, queue=None): packed_coefficients[:, 0].squeeze(), ) + self._queue = None + return self diff --git a/onedal/spmd/basic_statistics/basic_statistics.py b/onedal/spmd/basic_statistics/basic_statistics.py index f519bb225f..72df4d778e 100644 --- a/onedal/spmd/basic_statistics/basic_statistics.py +++ b/onedal/spmd/basic_statistics/basic_statistics.py @@ -14,7 +14,7 @@ # limitations under the License. # ============================================================================== -from ..._device_offload import support_input_format, supports_queue +from ..._device_offload import support_input_format from ...basic_statistics import BasicStatistics as BasicStatistics_Batch from ...common._backend import bind_spmd_backend @@ -23,6 +23,6 @@ class BasicStatistics(BasicStatistics_Batch): @bind_spmd_backend("basic_statistics") def compute(self, data, weights=None): ... - @support_input_format() + @support_input_format def fit(self, data, sample_weight=None, queue=None): return super().fit(data, sample_weight, queue=queue) diff --git a/onedal/spmd/cluster/kmeans.py b/onedal/spmd/cluster/kmeans.py index 54213ae8db..ebd0c55827 100644 --- a/onedal/spmd/cluster/kmeans.py +++ b/onedal/spmd/cluster/kmeans.py @@ -17,7 +17,7 @@ from ..._device_offload import support_input_format from ...cluster import KMeans as KMeans_Batch from ...cluster import KMeansInit as KMeansInit_Batch -from ...common._backend import bind_default_backend, bind_spmd_backend +from ...common._backend import bind_spmd_backend from ...spmd.basic_statistics import BasicStatistics @@ -43,14 +43,14 @@ def train(self, params, X_table, centroids_table): ... @bind_spmd_backend("kmeans.clustering") def infer(self, params, model, centroids_table): ... - @support_input_format() + @support_input_format def fit(self, X, y=None, queue=None): return super().fit(X, y, queue=queue) - @support_input_format() + @support_input_format def predict(self, X, queue=None): return super().predict(X, queue=queue) - @support_input_format() + @support_input_format def fit_predict(self, X, y=None, queue=None): return super().fit_predict(X, queue=queue) diff --git a/onedal/spmd/covariance/covariance.py b/onedal/spmd/covariance/covariance.py index 43600cbe73..d007cb88d7 100644 --- a/onedal/spmd/covariance/covariance.py +++ b/onedal/spmd/covariance/covariance.py @@ -27,6 +27,6 @@ def compute(self, *args, **kwargs): ... @bind_spmd_backend("covariance") def finalize_compute(self, params, partial_result): ... - @support_input_format() + @support_input_format def fit(self, X, y=None, queue=None): return super().fit(X, queue=queue) diff --git a/onedal/spmd/decomposition/pca.py b/onedal/spmd/decomposition/pca.py index 571038afab..d1442af0cc 100644 --- a/onedal/spmd/decomposition/pca.py +++ b/onedal/spmd/decomposition/pca.py @@ -27,6 +27,6 @@ def train(self, params, X, queue=None): ... @bind_spmd_backend("decomposition.dim_reduction") def finalize_train(self, *args, **kwargs): ... - @support_input_format() + @support_input_format def fit(self, X, y=None, queue=None): return super().fit(X, queue=queue) diff --git a/onedal/spmd/linear_model/linear_model.py b/onedal/spmd/linear_model/linear_model.py index b6146cbec7..cbe3af8dc0 100644 --- a/onedal/spmd/linear_model/linear_model.py +++ b/onedal/spmd/linear_model/linear_model.py @@ -30,10 +30,10 @@ def finalize_train(self, *args, **kwargs): ... @bind_spmd_backend("linear_model.regression") def infer(self, params, model, X): ... - @support_input_format() + @support_input_format def fit(self, X, y, queue=None): return super().fit(X, y, queue=queue) - @support_input_format() + @support_input_format def predict(self, X, queue=None): return super().predict(X, queue=queue) diff --git a/onedal/spmd/linear_model/logistic_regression.py b/onedal/spmd/linear_model/logistic_regression.py index ca35dc1fbd..5dfed76b59 100644 --- a/onedal/spmd/linear_model/logistic_regression.py +++ b/onedal/spmd/linear_model/logistic_regression.py @@ -27,18 +27,18 @@ def train(self, params, X, y): ... @bind_spmd_backend("logistic_regression.classification") def infer(self, params, X, model): ... - @support_input_format() + @support_input_format def fit(self, X, y, queue=None): return super().fit(X, y, queue=queue) - @support_input_format() + @support_input_format def predict(self, X, queue=None): return super().predict(X, queue=queue) - @support_input_format() + @support_input_format def predict_proba(self, X, queue=None): return super().predict_proba(X, queue=queue) - @support_input_format() + @support_input_format def predict_log_proba(self, X, queue=None): return super().predict_log_proba(X, queue=queue) diff --git a/onedal/spmd/neighbors/neighbors.py b/onedal/spmd/neighbors/neighbors.py index 838b0a8e21..b9f5f98d18 100644 --- a/onedal/spmd/neighbors/neighbors.py +++ b/onedal/spmd/neighbors/neighbors.py @@ -28,19 +28,19 @@ def train(self, *args, **kwargs): ... @bind_spmd_backend("neighbors.classification") def infer(self, *args, **kwargs): ... - @support_input_format() + @support_input_format def fit(self, X, y, queue=None): return super().fit(X, y, queue=queue) - @support_input_format() + @support_input_format def predict(self, X, queue=None): return super().predict(X, queue=queue) - @support_input_format() + @support_input_format def predict_proba(self, X, queue=None): raise NotImplementedError("predict_proba not supported in distributed mode.") - @support_input_format() + @support_input_format def kneighbors(self, X=None, n_neighbors=None, return_distance=True, queue=None): return super().kneighbors(X, n_neighbors, return_distance, queue=queue) @@ -59,7 +59,7 @@ def train(self, *args, **kwargs): ... @bind_spmd_backend("neighbors.regression") def infer(self, *args, **kwargs): ... - @support_input_format() + @support_input_format @supports_queue def fit(self, X, y, queue=None): if queue is not None and queue.sycl_device.is_gpu: @@ -70,11 +70,11 @@ def fit(self, X, y, queue=None): "CPU. Consider running on it on GPU." ) - @support_input_format() + @support_input_format def kneighbors(self, X=None, n_neighbors=None, return_distance=True, queue=None): return super().kneighbors(X, n_neighbors, return_distance, queue=queue) - @support_input_format() + @support_input_format @supports_queue def predict(self, X, queue=None): return self._predict_gpu(X) diff --git a/sklearnex/basic_statistics/incremental_basic_statistics.py b/sklearnex/basic_statistics/incremental_basic_statistics.py index d1ddcd55dc..3f99900e1e 100644 --- a/sklearnex/basic_statistics/incremental_basic_statistics.py +++ b/sklearnex/basic_statistics/incremental_basic_statistics.py @@ -15,15 +15,14 @@ # ============================================================================== import numpy as np -from sklearn.base import BaseEstimator -from sklearn.utils import check_array, gen_batches -from sklearn.utils.validation import _check_sample_weight - from daal4py.sklearn._n_jobs_support import control_n_jobs from daal4py.sklearn._utils import sklearn_check_version from onedal.basic_statistics import ( IncrementalBasicStatistics as onedal_IncrementalBasicStatistics, ) +from sklearn.base import BaseEstimator +from sklearn.utils import check_array, gen_batches +from sklearn.utils.validation import _check_sample_weight from .._device_offload import dispatch from .._utils import IntelEstimator, PatchingConditionsChain @@ -186,9 +185,9 @@ def _get_onedal_result_options(self, options): assert isinstance(onedal_options, str) return options - def _onedal_finalize_fit(self, queue=None): + def _onedal_finalize_fit(self): assert hasattr(self, "_onedal_estimator") - self._onedal_estimator.finalize_fit(queue=queue) + self._onedal_estimator.finalize_fit() self._need_to_finalize = False def _onedal_partial_fit(self, X, sample_weight=None, queue=None, check_input=True): @@ -258,7 +257,7 @@ def _onedal_fit(self, X, sample_weight=None, queue=None): self.n_features_in_ = X.shape[1] - self._onedal_finalize_fit(queue=queue) + self._onedal_finalize_fit() return self diff --git a/sklearnex/covariance/incremental_covariance.py b/sklearnex/covariance/incremental_covariance.py index 89ed92b601..ea1cffd5c4 100644 --- a/sklearnex/covariance/incremental_covariance.py +++ b/sklearnex/covariance/incremental_covariance.py @@ -18,18 +18,17 @@ import warnings import numpy as np +from daal4py.sklearn._n_jobs_support import control_n_jobs +from daal4py.sklearn._utils import daal_check_version, sklearn_check_version +from onedal.covariance import ( + IncrementalEmpiricalCovariance as onedal_IncrementalEmpiricalCovariance, +) from scipy import linalg from sklearn.base import BaseEstimator, clone from sklearn.covariance import EmpiricalCovariance as _sklearn_EmpiricalCovariance from sklearn.covariance import log_likelihood from sklearn.utils import check_array, gen_batches from sklearn.utils.validation import _num_features, check_is_fitted - -from daal4py.sklearn._n_jobs_support import control_n_jobs -from daal4py.sklearn._utils import daal_check_version, sklearn_check_version -from onedal.covariance import ( - IncrementalEmpiricalCovariance as onedal_IncrementalEmpiricalCovariance, -) from sklearnex import config_context from .._device_offload import dispatch, wrap_output_data @@ -145,9 +144,9 @@ def _onedal_supported(self, method_name, *data): ) return patching_status - def _onedal_finalize_fit(self, queue=None): + def _onedal_finalize_fit(self): assert hasattr(self, "_onedal_estimator") - self._onedal_estimator.finalize_fit(queue=queue) + self._onedal_estimator.finalize_fit() self._need_to_finalize = False if not daal_check_version((2024, "P", 400)) and self.assume_centered: @@ -363,7 +362,7 @@ def _onedal_fit(self, X, queue=None): X_batch = X[batch] self._onedal_partial_fit(X_batch, queue=queue, check_input=False) - self._onedal_finalize_fit(queue=queue) + self._onedal_finalize_fit() return self diff --git a/sklearnex/covariance/tests/test_incremental_covariance.py b/sklearnex/covariance/tests/test_incremental_covariance.py index 68272ced9e..50c6e16aa4 100644 --- a/sklearnex/covariance/tests/test_incremental_covariance.py +++ b/sklearnex/covariance/tests/test_incremental_covariance.py @@ -16,8 +16,14 @@ import numpy as np import pytest +from daal4py.sklearn._utils import daal_check_version from numpy.linalg import slogdet from numpy.testing import assert_allclose +from onedal.tests.utils._dataframes_support import ( + _as_numpy, + _convert_to_dataframe, + get_dataframes_and_queues, +) from scipy.linalg import pinvh from sklearn.covariance.tests.test_covariance import ( test_covariance, @@ -26,13 +32,6 @@ from sklearn.datasets import load_diabetes from sklearn.decomposition import PCA -from daal4py.sklearn._utils import daal_check_version -from onedal.tests.utils._dataframes_support import ( - _as_numpy, - _convert_to_dataframe, - get_dataframes_and_queues, -) - @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues()) @pytest.mark.parametrize("dtype", [np.float32, np.float64]) diff --git a/sklearnex/linear_model/coordinate_descent.py b/sklearnex/linear_model/coordinate_descent.py index abe594ad29..cd7b2bcdca 100644 --- a/sklearnex/linear_model/coordinate_descent.py +++ b/sklearnex/linear_model/coordinate_descent.py @@ -19,12 +19,12 @@ # Note: `sklearnex.linear_model.ElasticNet` only has functional # sycl GPU support. No GPU device will be offloaded. -ElasticNet.fit = support_input_format(queue_param=False)(ElasticNet.fit) -ElasticNet.predict = support_input_format(queue_param=False)(ElasticNet.predict) -ElasticNet.score = support_input_format(queue_param=False)(ElasticNet.score) +ElasticNet.fit = support_input_format(ElasticNet.fit) +ElasticNet.predict = support_input_format(ElasticNet.predict) +ElasticNet.score = support_input_format(ElasticNet.score) # Note: `sklearnex.linear_model.Lasso` only has functional # sycl GPU support. No GPU device will be offloaded. -Lasso.fit = support_input_format(queue_param=False)(Lasso.fit) -Lasso.predict = support_input_format(queue_param=False)(Lasso.predict) -Lasso.score = support_input_format(queue_param=False)(Lasso.score) +Lasso.fit = support_input_format(Lasso.fit) +Lasso.predict = support_input_format(Lasso.predict) +Lasso.score = support_input_format(Lasso.score) diff --git a/sklearnex/linear_model/incremental_linear.py b/sklearnex/linear_model/incremental_linear.py index c52be49ca6..b8a72f47ef 100644 --- a/sklearnex/linear_model/incremental_linear.py +++ b/sklearnex/linear_model/incremental_linear.py @@ -18,16 +18,15 @@ import warnings import numpy as np -from sklearn.base import BaseEstimator, MultiOutputMixin, RegressorMixin -from sklearn.metrics import r2_score -from sklearn.utils import check_array, gen_batches -from sklearn.utils.validation import check_is_fitted - from daal4py.sklearn._n_jobs_support import control_n_jobs from daal4py.sklearn._utils import sklearn_check_version from onedal.linear_model import ( IncrementalLinearRegression as onedal_IncrementalLinearRegression, ) +from sklearn.base import BaseEstimator, MultiOutputMixin, RegressorMixin +from sklearn.metrics import r2_score +from sklearn.utils import check_array, gen_batches +from sklearn.utils.validation import check_is_fitted if sklearn_check_version("1.2"): from sklearn.utils._param_validation import Interval @@ -221,14 +220,14 @@ def _onedal_partial_fit(self, X, y, check_input=True, queue=None): self._onedal_estimator.partial_fit(X, y, queue=queue) self._need_to_finalize = True - def _onedal_finalize_fit(self, queue=None): + def _onedal_finalize_fit(self): assert hasattr(self, "_onedal_estimator") is_underdetermined = self.n_samples_seen_ < self.n_features_in_ + int( self.fit_intercept ) if is_underdetermined: raise ValueError("Not enough samples to finalize") - self._onedal_estimator.finalize_fit(queue=queue) + self._onedal_estimator.finalize_fit() self._need_to_finalize = False def _onedal_fit(self, X, y, queue=None): @@ -288,7 +287,7 @@ def _onedal_fit(self, X, y, queue=None): "Only one sample available. You may want to reshape your data array" ) - self._onedal_finalize_fit(queue=queue) + self._onedal_finalize_fit() return self @property diff --git a/sklearnex/linear_model/ridge.py b/sklearnex/linear_model/ridge.py index be71985b1c..672bf307f0 100644 --- a/sklearnex/linear_model/ridge.py +++ b/sklearnex/linear_model/ridge.py @@ -22,13 +22,12 @@ import numbers import numpy as np + from daal4py.sklearn._n_jobs_support import control_n_jobs from scipy.sparse import issparse from sklearn.linear_model import Ridge as _sklearn_Ridge from sklearn.metrics import r2_score from sklearn.utils.validation import check_is_fitted - from daal4py.sklearn._n_jobs_support import control_n_jobs - if not sklearn_check_version("1.2"): from sklearn.linear_model._base import _deprecate_normalize if sklearn_check_version("1.1") and not sklearn_check_version("1.2"): @@ -383,8 +382,8 @@ def _save_attributes(self): from daal4py.sklearn.linear_model import Ridge from onedal._device_offload import support_input_format - Ridge.fit = support_input_format(queue_param=False)(Ridge.fit) - Ridge.predict = support_input_format(queue_param=False)(Ridge.predict) - Ridge.score = support_input_format(queue_param=False)(Ridge.score) + Ridge.fit = support_input_format(Ridge.fit) + Ridge.predict = support_input_format(Ridge.predict) + Ridge.score = support_input_format(Ridge.score) logging.warning("Ridge requires oneDAL version >= 2024.6 but it was not found") diff --git a/sklearnex/manifold/t_sne.py b/sklearnex/manifold/t_sne.py index 0aa8d7df4f..4dea01bc6b 100755 --- a/sklearnex/manifold/t_sne.py +++ b/sklearnex/manifold/t_sne.py @@ -17,5 +17,5 @@ from daal4py.sklearn.manifold import TSNE from onedal._device_offload import support_input_format -TSNE.fit = support_input_format(queue_param=False)(TSNE.fit) -TSNE.fit_transform = support_input_format(queue_param=False)(TSNE.fit_transform) +TSNE.fit = support_input_format(TSNE.fit) +TSNE.fit_transform = support_input_format(TSNE.fit_transform) diff --git a/sklearnex/metrics/pairwise.py b/sklearnex/metrics/pairwise.py index 8ad789dce1..ffcc136e1d 100755 --- a/sklearnex/metrics/pairwise.py +++ b/sklearnex/metrics/pairwise.py @@ -17,6 +17,4 @@ from daal4py.sklearn.metrics import pairwise_distances from onedal._device_offload import support_input_format -pairwise_distances = support_input_format(freefunc=True, queue_param=False)( - pairwise_distances -) +pairwise_distances = support_input_format(pairwise_distances) diff --git a/sklearnex/metrics/ranking.py b/sklearnex/metrics/ranking.py index 753be6d0cd..4a4fdb8d65 100755 --- a/sklearnex/metrics/ranking.py +++ b/sklearnex/metrics/ranking.py @@ -17,4 +17,4 @@ from daal4py.sklearn.metrics import roc_auc_score from onedal._device_offload import support_input_format -roc_auc_score = support_input_format(freefunc=True, queue_param=False)(roc_auc_score) +roc_auc_score = support_input_format(roc_auc_score) diff --git a/sklearnex/model_selection/split.py b/sklearnex/model_selection/split.py index 59153114b9..5ed44c7428 100755 --- a/sklearnex/model_selection/split.py +++ b/sklearnex/model_selection/split.py @@ -17,6 +17,4 @@ from daal4py.sklearn.model_selection import train_test_split from onedal._device_offload import support_input_format -train_test_split = support_input_format(freefunc=True, queue_param=False)( - train_test_split -) +train_test_split = support_input_format(train_test_split) diff --git a/sklearnex/preview/decomposition/incremental_pca.py b/sklearnex/preview/decomposition/incremental_pca.py index c7ea7fc1f3..f47be3ee81 100644 --- a/sklearnex/preview/decomposition/incremental_pca.py +++ b/sklearnex/preview/decomposition/incremental_pca.py @@ -15,12 +15,11 @@ # =============================================================================== import numpy as np -from sklearn.decomposition import IncrementalPCA as _sklearn_IncrementalPCA -from sklearn.utils import check_array, gen_batches - from daal4py.sklearn._n_jobs_support import control_n_jobs from daal4py.sklearn._utils import sklearn_check_version from onedal.decomposition import IncrementalPCA as onedal_IncrementalPCA +from sklearn.decomposition import IncrementalPCA as _sklearn_IncrementalPCA +from sklearn.utils import check_array, gen_batches from ..._device_offload import dispatch, wrap_output_data from ..._utils import PatchingConditionsChain @@ -57,7 +56,7 @@ def __init__(self, n_components=None, *, whiten=False, copy=True, batch_size=Non def _onedal_transform(self, X, queue=None): assert hasattr(self, "_onedal_estimator") if self._need_to_finalize: - self._onedal_finalize_fit(queue) + self._onedal_finalize_fit() X = check_array(X, dtype=[np.float64, np.float32]) return self._onedal_estimator.predict(X, queue=queue) @@ -114,9 +113,9 @@ def _onedal_partial_fit(self, X, check_input=True, queue=None): self._onedal_estimator.partial_fit(X, queue=queue) self._need_to_finalize = True - def _onedal_finalize_fit(self, queue=None): + def _onedal_finalize_fit(self): assert hasattr(self, "_onedal_estimator") - self._onedal_estimator.finalize_fit(queue=queue) + self._onedal_estimator.finalize_fit() self._need_to_finalize = False def _onedal_fit(self, X, queue=None): @@ -147,7 +146,7 @@ def _onedal_fit(self, X, queue=None): X_batch = X[batch] self._onedal_partial_fit(X_batch, queue=queue) - self._onedal_finalize_fit(queue=queue) + self._onedal_finalize_fit() return self diff --git a/sklearnex/spmd/basic_statistics/tests/test_basic_statistics_spmd.py b/sklearnex/spmd/basic_statistics/tests/test_basic_statistics_spmd.py index d2b0cc5704..2963b38d73 100644 --- a/sklearnex/spmd/basic_statistics/tests/test_basic_statistics_spmd.py +++ b/sklearnex/spmd/basic_statistics/tests/test_basic_statistics_spmd.py @@ -17,7 +17,6 @@ import numpy as np import pytest from numpy.testing import assert_allclose - from onedal.basic_statistics.tests.utils import options_and_tests from onedal.tests.utils._dataframes_support import ( _convert_to_dataframe, @@ -62,7 +61,8 @@ def test_basic_stats_spmd_gold(dataframe, queue): ) # Ensure results of batch algo match spmd - spmd_result = BasicStatistics_SPMD().fit(local_dpt_data) + spmd = BasicStatistics_SPMD() + spmd_result = spmd.fit(local_dpt_data) batch_result = BasicStatistics_Batch().fit(data) for option in options_and_tests: diff --git a/sklearnex/spmd/cluster/tests/test_kmeans_spmd.py b/sklearnex/spmd/cluster/tests/test_kmeans_spmd.py index 18fc570873..1054d141ad 100644 --- a/sklearnex/spmd/cluster/tests/test_kmeans_spmd.py +++ b/sklearnex/spmd/cluster/tests/test_kmeans_spmd.py @@ -17,7 +17,6 @@ import numpy as np import pytest from numpy.testing import assert_allclose - from onedal.tests.utils._dataframes_support import ( _convert_to_dataframe, get_dataframes_and_queues, From 7168aa2fadb598f64b0cdaddb97f4314ecbff169 Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Fri, 13 Dec 2024 00:15:33 -0800 Subject: [PATCH 26/41] allow for onedal prefix in patching check --- sklearnex/tests/test_monkeypatch.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/sklearnex/tests/test_monkeypatch.py b/sklearnex/tests/test_monkeypatch.py index 995fab29e2..3a4dfdda8c 100755 --- a/sklearnex/tests/test_monkeypatch.py +++ b/sklearnex/tests/test_monkeypatch.py @@ -42,8 +42,9 @@ def test_monkey_patching(): n = _classes[i][1] class_module = getattr(p, n).__module__ - assert class_module.startswith("daal4py") or class_module.startswith( - "sklearnex" + assert any( + class_module.startswith(prefix) + for prefix in ["daal4py", "sklearnex", "onedal"] ), "Patching has completed with error." for i, _ in enumerate(_tokens): @@ -87,8 +88,9 @@ def test_monkey_patching(): sklearnex.patch_sklearn(t) class_module = getattr(p, n).__module__ - assert class_module.startswith("daal4py") or class_module.startswith( - "sklearnex" + assert any( + class_module.startswith(prefix) + for prefix in ["daal4py", "sklearnex", "onedal"] ), "Patching has completed with error." finally: sklearnex.unpatch_sklearn() From f0ca14bb1137804a6b20ad11c9daed5d9f669668 Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Fri, 13 Dec 2024 05:39:19 -0800 Subject: [PATCH 27/41] add missing wraps(func) --- onedal/_device_offload.py | 1 + 1 file changed, 1 insertion(+) diff --git a/onedal/_device_offload.py b/onedal/_device_offload.py index 019652eca9..675f333dfe 100644 --- a/onedal/_device_offload.py +++ b/onedal/_device_offload.py @@ -274,6 +274,7 @@ def invoke_func(self_or_None, *args, **kwargs): else: return func(self_or_None, *args, **kwargs) + @wraps(func) def wrapper_impl(*args, **kwargs): # remove self from args if it is a class method if inspect.isfunction(func) and "." in func.__qualname__: From 720e447f17e85b75795b4b6e4199c1f1c99ed0a8 Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Fri, 13 Dec 2024 08:15:33 -0800 Subject: [PATCH 28/41] fixup --- onedal/svm/tests/test_svc.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/onedal/svm/tests/test_svc.py b/onedal/svm/tests/test_svc.py index 3ed2777d43..f81b60cb13 100644 --- a/onedal/svm/tests/test_svc.py +++ b/onedal/svm/tests/test_svc.py @@ -26,15 +26,16 @@ import pytest import sklearn.utils.estimator_checks from numpy.testing import assert_array_almost_equal, assert_array_equal +from sklearn import datasets +from sklearn.datasets import make_blobs +from sklearn.metrics.pairwise import rbf_kernel +from sklearn.model_selection import train_test_split + from onedal.svm import SVC from onedal.tests.utils._device_selection import ( get_queues, pass_if_not_implemented_for_gpu, ) -from sklearn import datasets -from sklearn.datasets import make_blobs -from sklearn.metrics.pairwise import rbf_kernel -from sklearn.model_selection import train_test_split def _test_libsvm_parameters(queue, array_constr, dtype): @@ -105,7 +106,7 @@ def test_decision_function(queue): assert_array_almost_equal(dec.ravel(), clf.decision_function(X, queue=queue)) -@pass_if_not_implemented_for_gpu(reason="not implemented") +@pass_if_not_implemented_for_gpu(reason="multiclass svm is not implemented") @pytest.mark.parametrize("queue", get_queues()) def test_iris(queue): iris = datasets.load_iris() @@ -114,7 +115,7 @@ def test_iris(queue): assert_array_equal(clf.classes_, np.sort(clf.classes_)) -@pass_if_not_implemented_for_gpu(reason="not implemented") +@pass_if_not_implemented_for_gpu(reason="multiclass svm is not implemented") @pytest.mark.parametrize("queue", get_queues()) def test_decision_function_shape(queue): X, y = make_blobs(n_samples=80, centers=5, random_state=0) @@ -131,7 +132,7 @@ def test_decision_function_shape(queue): SVC(decision_function_shape="bad").fit(X_train, y_train, queue=queue) -@pass_if_not_implemented_for_gpu(reason="not implemented") +@pass_if_not_implemented_for_gpu(reason="multiclass svm is not implemented") @pytest.mark.parametrize("queue", get_queues()) def test_pickle(queue): iris = datasets.load_iris() @@ -156,7 +157,7 @@ def test_pickle(queue): pytest.param( get_queues("gpu"), marks=pytest.mark.xfail( - reason="raises Unimplemented error with inconsistent error message" + reason="raises Unimplemented error " "with inconsistent error message" ), ) ], From ff20f769725574422e655b14080f2f2362b60e2c Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Mon, 16 Dec 2024 00:26:10 -0800 Subject: [PATCH 29/41] fix neighbors --- onedal/neighbors/neighbors.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/onedal/neighbors/neighbors.py b/onedal/neighbors/neighbors.py index 1bf893c273..83e8957fd5 100755 --- a/onedal/neighbors/neighbors.py +++ b/onedal/neighbors/neighbors.py @@ -19,6 +19,7 @@ from os import X_OK, XATTR_SIZE_MAX import numpy as np + from daal4py import ( bf_knn_classification_model, bf_knn_classification_prediction, @@ -227,7 +228,7 @@ def _fit(self, X, y): if y is not None or self.requires_y: shape = getattr(y, "shape", None) - X, y = self._validate_data( + X, y = super()._validate_data( X, y, dtype=[np.float64, np.float32], accept_sparse="csr" ) self._shape = shape if shape is not None else y.shape @@ -254,7 +255,7 @@ def _fit(self, X, y): else: self._y = y else: - X, _ = self._validate_data(X, dtype=[np.float64, np.float32]) + X, _ = super()._validate_data(X, dtype=[np.float64, np.float32]) self.n_samples_fit_ = X.shape[0] self.n_features_in_ = X.shape[1] @@ -269,14 +270,13 @@ def _fit(self, X, y): "enter integer value" % type(self.n_neighbors) ) - self._fit_method = self._parse_auto_method( + self._fit_method = super()._parse_auto_method( self.algorithm, self.n_samples_fit_, self.n_features_in_ ) _fit_y = None - # global queue is set as per user configuration (`target_offload`) or from data prior to calling this internal function queue = SyclQueueManager.get_global_queue() - gpu_device = queue is not None and getattr(queue.sycl_device, "is_gpu", False) + gpu_device = queue is not None and queue.sycl_device.is_gpu if _is_classifier(self) or (_is_regressor(self) and gpu_device): _fit_y = self._validate_targets(self._y, X.dtype).reshape((-1, 1)) @@ -615,13 +615,13 @@ def _onedal_fit(self, X, y): return train_alg(**params).compute(X, y).model - X, y = to_table(X, y, queue=queue) - params = self._get_onedal_params(X, y) + X_table, y_table = to_table(X, y, queue=queue) + params = self._get_onedal_params(X_table, y) if gpu_device: - return self.train(params, X, y).model + return self.train(params, X_table, y_table).model else: - return self.train_search(params, X).model + return self.train_search(params, X_table).model def _onedal_predict(self, model, X, params): assert self._onedal_model is not None, "Model is not trained" From 6307b24819d8218e25b3c329bc56b36c68325247 Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Tue, 17 Dec 2024 03:20:53 -0800 Subject: [PATCH 30/41] debug print --- onedal/_device_offload.py | 1 + 1 file changed, 1 insertion(+) diff --git a/onedal/_device_offload.py b/onedal/_device_offload.py index 675f333dfe..cdadcf9bf2 100644 --- a/onedal/_device_offload.py +++ b/onedal/_device_offload.py @@ -206,6 +206,7 @@ def _copy_to_usm(queue, array): return _copy_to_usm(queue, array.astype(np.float32)) else: if isinstance(array, Iterable): + print(f"Array is iterable - recurse {array}") array = [_copy_to_usm(queue, i) for i in array] return array From e5d109c5e54c85bf2043ba7cad54068792ad24d6 Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Tue, 17 Dec 2024 05:02:18 -0800 Subject: [PATCH 31/41] debug output --- onedal/_device_offload.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/onedal/_device_offload.py b/onedal/_device_offload.py index cdadcf9bf2..06f047242c 100644 --- a/onedal/_device_offload.py +++ b/onedal/_device_offload.py @@ -186,6 +186,10 @@ def wrapper(self, *args, **kwargs): def _copy_to_usm(queue, array): + print(f"_copy_to_usm: {type(array)=}") + if shape := getattr(array, "shape", None): + print(f"_copy_to_usm: {shape=}") + print(f"_copy_to_usm: array=<{array}>") if not dpctl_available: raise RuntimeError( "dpctl need to be installed to work " "with __sycl_usm_array_interface__" @@ -206,7 +210,6 @@ def _copy_to_usm(queue, array): return _copy_to_usm(queue, array.astype(np.float32)) else: if isinstance(array, Iterable): - print(f"Array is iterable - recurse {array}") array = [_copy_to_usm(queue, i) for i in array] return array @@ -296,6 +299,8 @@ def wrapper_impl(*args, **kwargs): hostkwargs["queue"] = queue result = invoke_func(self, *hostargs, **hostkwargs) + # Is this even required? + # wrap_output_data does copy to device with usm, so are we copying back here? if queue is not None: result = _copy_to_usm(queue, result) if dpnp_available and isinstance(data[0], dpnp.ndarray): From 039d89e4f3e41f5472ebfc0eeb1229f2191ab04b Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Tue, 17 Dec 2024 06:01:43 -0800 Subject: [PATCH 32/41] Add new logic for sparse matrix in _copy_to_usm --- onedal/_device_offload.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/onedal/_device_offload.py b/onedal/_device_offload.py index 06f047242c..d588fa6b49 100644 --- a/onedal/_device_offload.py +++ b/onedal/_device_offload.py @@ -20,6 +20,7 @@ from functools import wraps import numpy as np +from scipy import sparse as sp from sklearn import get_config from ._config import _get_config @@ -195,6 +196,12 @@ def _copy_to_usm(queue, array): "dpctl need to be installed to work " "with __sycl_usm_array_interface__" ) + if sp.issparse(array): + data = _copy_to_usm(queue, array.data) + indices = _copy_to_usm(queue, array.indices) + indptr = _copy_to_usm(queue, array.indptr) + return array.__class__((data, indices, indptr), shape=array.shape) + if hasattr(array, "__array__"): try: From 2f37e085b6e97139d2576b00910f3c636e7dd14b Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Tue, 17 Dec 2024 07:09:23 -0800 Subject: [PATCH 33/41] only _copy_to_usm with usm_iface --- onedal/_device_offload.py | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/onedal/_device_offload.py b/onedal/_device_offload.py index d588fa6b49..7b0a327fd2 100644 --- a/onedal/_device_offload.py +++ b/onedal/_device_offload.py @@ -187,21 +187,11 @@ def wrapper(self, *args, **kwargs): def _copy_to_usm(queue, array): - print(f"_copy_to_usm: {type(array)=}") - if shape := getattr(array, "shape", None): - print(f"_copy_to_usm: {shape=}") - print(f"_copy_to_usm: array=<{array}>") if not dpctl_available: raise RuntimeError( "dpctl need to be installed to work " "with __sycl_usm_array_interface__" ) - if sp.issparse(array): - data = _copy_to_usm(queue, array.data) - indices = _copy_to_usm(queue, array.indices) - indptr = _copy_to_usm(queue, array.indptr) - return array.__class__((data, indices, indptr), shape=array.shape) - if hasattr(array, "__array__"): try: @@ -306,9 +296,8 @@ def wrapper_impl(*args, **kwargs): hostkwargs["queue"] = queue result = invoke_func(self, *hostargs, **hostkwargs) - # Is this even required? - # wrap_output_data does copy to device with usm, so are we copying back here? - if queue is not None: + usm_iface = getattr(data[0], "__sycl_usm_array_interface__", None) + if queue is not None and usm_iface is not None: result = _copy_to_usm(queue, result) if dpnp_available and isinstance(data[0], dpnp.ndarray): result = _convert_to_dpnp(result) From e40cb3f80c8eddd3eeb60f8e9cab2752e8451d45 Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Tue, 17 Dec 2024 08:45:37 -0800 Subject: [PATCH 34/41] lint --- onedal/basic_statistics/basic_statistics.py | 1 + .../incremental_basic_statistics.py | 1 + onedal/cluster/dbscan.py | 1 + onedal/cluster/kmeans.py | 4 +++- onedal/cluster/kmeans_init.py | 3 ++- onedal/covariance/covariance.py | 1 + onedal/covariance/incremental_covariance.py | 1 + onedal/datatypes/_data_conversion.py | 1 + onedal/datatypes/tests/test_data.py | 1 + onedal/decomposition/incremental_pca.py | 1 + onedal/decomposition/pca.py | 5 +++-- onedal/ensemble/forest.py | 5 +++-- onedal/linear_model/incremental_linear_model.py | 1 + onedal/linear_model/linear_model.py | 1 + onedal/linear_model/logistic_regression.py | 1 + onedal/primitives/kernel_functions.py | 1 + onedal/svm/svm.py | 12 +++++++++--- onedal/svm/tests/test_csr_svm.py | 7 ++++--- onedal/svm/tests/test_nusvc.py | 11 ++++++----- onedal/svm/tests/test_nusvr.py | 7 ++++--- onedal/utils/validation.py | 8 +++++--- .../incremental_basic_statistics.py | 7 ++++--- sklearnex/covariance/incremental_covariance.py | 11 ++++++----- .../covariance/tests/test_incremental_covariance.py | 13 +++++++------ sklearnex/linear_model/ridge.py | 3 ++- sklearnex/preview/decomposition/incremental_pca.py | 5 +++-- .../tests/test_basic_statistics_spmd.py | 1 + sklearnex/spmd/cluster/tests/test_kmeans_spmd.py | 1 + 28 files changed, 75 insertions(+), 40 deletions(-) diff --git a/onedal/basic_statistics/basic_statistics.py b/onedal/basic_statistics/basic_statistics.py index 99c99f0f6b..524091d078 100644 --- a/onedal/basic_statistics/basic_statistics.py +++ b/onedal/basic_statistics/basic_statistics.py @@ -17,6 +17,7 @@ from abc import ABCMeta, abstractmethod import numpy as np + from onedal._device_offload import supports_queue from ..common._backend import bind_default_backend diff --git a/onedal/basic_statistics/incremental_basic_statistics.py b/onedal/basic_statistics/incremental_basic_statistics.py index 50767d4c56..a5da78e5f1 100644 --- a/onedal/basic_statistics/incremental_basic_statistics.py +++ b/onedal/basic_statistics/incremental_basic_statistics.py @@ -15,6 +15,7 @@ # ============================================================================== import numpy as np + from daal4py.sklearn._utils import get_dtype from onedal._device_offload import SyclQueueManager, supports_queue from onedal.common._backend import bind_default_backend diff --git a/onedal/cluster/dbscan.py b/onedal/cluster/dbscan.py index dd4efed612..6c009f4669 100644 --- a/onedal/cluster/dbscan.py +++ b/onedal/cluster/dbscan.py @@ -15,6 +15,7 @@ # =============================================================================== import numpy as np + from daal4py.sklearn._utils import get_dtype, make2d from onedal._device_offload import supports_queue from onedal.common._backend import bind_default_backend diff --git a/onedal/cluster/kmeans.py b/onedal/cluster/kmeans.py index 4516a79add..de2c2012b0 100644 --- a/onedal/cluster/kmeans.py +++ b/onedal/cluster/kmeans.py @@ -19,6 +19,7 @@ from abc import ABC import numpy as np + from daal4py.sklearn._utils import daal_check_version from onedal._device_offload import SyclQueueManager, supports_queue from onedal.basic_statistics import BasicStatistics @@ -27,12 +28,13 @@ if daal_check_version((2023, "P", 200)): from .kmeans_init import KMeansInit -from onedal import _default_backend from sklearn.cluster._kmeans import _kmeans_plusplus from sklearn.exceptions import ConvergenceWarning from sklearn.metrics.pairwise import euclidean_distances from sklearn.utils import check_random_state +from onedal import _default_backend + from ..common._mixin import ClusterMixin, TransformerMixin from ..datatypes import from_table, to_table from ..utils.validation import _check_array, _is_arraylike_not_scalar, _is_csr diff --git a/onedal/cluster/kmeans_init.py b/onedal/cluster/kmeans_init.py index 047ed78ce5..58797ea70a 100755 --- a/onedal/cluster/kmeans_init.py +++ b/onedal/cluster/kmeans_init.py @@ -15,10 +15,11 @@ # ============================================================================== import numpy as np +from sklearn.utils import check_random_state + from daal4py.sklearn._utils import daal_check_version from onedal._device_offload import SyclQueueManager, supports_queue from onedal.common._backend import bind_default_backend -from sklearn.utils import check_random_state from ..datatypes import from_table, to_table from ..utils.validation import _check_array diff --git a/onedal/covariance/covariance.py b/onedal/covariance/covariance.py index c2efd97787..bc9445c3ef 100644 --- a/onedal/covariance/covariance.py +++ b/onedal/covariance/covariance.py @@ -16,6 +16,7 @@ from abc import ABCMeta import numpy as np + from daal4py.sklearn._utils import daal_check_version, get_dtype from onedal._device_offload import supports_queue from onedal.common._backend import bind_default_backend diff --git a/onedal/covariance/incremental_covariance.py b/onedal/covariance/incremental_covariance.py index ff429b42e5..2fd189e8d6 100644 --- a/onedal/covariance/incremental_covariance.py +++ b/onedal/covariance/incremental_covariance.py @@ -15,6 +15,7 @@ # =============================================================================== import numpy as np + from daal4py.sklearn._utils import daal_check_version from onedal._device_offload import SyclQueueManager, supports_queue from onedal.common._backend import bind_default_backend diff --git a/onedal/datatypes/_data_conversion.py b/onedal/datatypes/_data_conversion.py index 6269632711..6c2e10cd9c 100644 --- a/onedal/datatypes/_data_conversion.py +++ b/onedal/datatypes/_data_conversion.py @@ -15,6 +15,7 @@ # ============================================================================== import numpy as np + from onedal import _default_backend as backend diff --git a/onedal/datatypes/tests/test_data.py b/onedal/datatypes/tests/test_data.py index 66e0730ce8..55426a889e 100644 --- a/onedal/datatypes/tests/test_data.py +++ b/onedal/datatypes/tests/test_data.py @@ -18,6 +18,7 @@ import pytest import scipy.sparse as sp from numpy.testing import assert_allclose + from onedal import _default_backend, _dpc_backend from onedal.datatypes import from_table, to_table from onedal.utils._dpep_helpers import dpctl_available diff --git a/onedal/decomposition/incremental_pca.py b/onedal/decomposition/incremental_pca.py index f60e902f1f..0fa5be5f95 100644 --- a/onedal/decomposition/incremental_pca.py +++ b/onedal/decomposition/incremental_pca.py @@ -15,6 +15,7 @@ # ============================================================================== import numpy as np + from onedal._device_offload import SyclQueueManager, supports_queue from onedal.common._backend import bind_default_backend diff --git a/onedal/decomposition/pca.py b/onedal/decomposition/pca.py index 9955bc0e0b..c3769d90e3 100644 --- a/onedal/decomposition/pca.py +++ b/onedal/decomposition/pca.py @@ -18,11 +18,12 @@ from abc import ABCMeta import numpy as np -from onedal._device_offload import supports_queue -from onedal.common._backend import bind_default_backend from sklearn.decomposition._pca import _infer_dimension from sklearn.utils.extmath import stable_cumsum +from onedal._device_offload import supports_queue +from onedal.common._backend import bind_default_backend + from ..datatypes import from_table, to_table diff --git a/onedal/ensemble/forest.py b/onedal/ensemble/forest.py index 81725681a6..87167133a2 100644 --- a/onedal/ensemble/forest.py +++ b/onedal/ensemble/forest.py @@ -20,11 +20,12 @@ from math import ceil import numpy as np +from sklearn.ensemble import BaseEnsemble +from sklearn.utils import check_random_state + from daal4py.sklearn._utils import daal_check_version from onedal._device_offload import SyclQueueManager, supports_queue from onedal.common._backend import bind_default_backend -from sklearn.ensemble import BaseEnsemble -from sklearn.utils import check_random_state from sklearnex import get_hyperparameters from ..common._estimator_checks import _check_is_fitted diff --git a/onedal/linear_model/incremental_linear_model.py b/onedal/linear_model/incremental_linear_model.py index 4411cb60b8..a52992e8c6 100644 --- a/onedal/linear_model/incremental_linear_model.py +++ b/onedal/linear_model/incremental_linear_model.py @@ -15,6 +15,7 @@ # ============================================================================== import numpy as np + from daal4py.sklearn._utils import get_dtype from onedal._device_offload import SyclQueueManager, supports_queue from onedal.common._backend import bind_default_backend diff --git a/onedal/linear_model/linear_model.py b/onedal/linear_model/linear_model.py index a3831163ff..41b7114ae8 100755 --- a/onedal/linear_model/linear_model.py +++ b/onedal/linear_model/linear_model.py @@ -18,6 +18,7 @@ from numbers import Number import numpy as np + from daal4py.sklearn._utils import daal_check_version, get_dtype, make2d from onedal._device_offload import SyclQueueManager, supports_queue from onedal.common._backend import bind_default_backend diff --git a/onedal/linear_model/logistic_regression.py b/onedal/linear_model/logistic_regression.py index bc3b6af2d3..7175e797f0 100644 --- a/onedal/linear_model/logistic_regression.py +++ b/onedal/linear_model/logistic_regression.py @@ -18,6 +18,7 @@ from numbers import Number import numpy as np + from daal4py.sklearn._utils import daal_check_version, get_dtype, make2d from onedal._device_offload import SyclQueueManager, supports_queue from onedal.common._backend import bind_default_backend diff --git a/onedal/primitives/kernel_functions.py b/onedal/primitives/kernel_functions.py index be6edeab02..5d4240ef40 100644 --- a/onedal/primitives/kernel_functions.py +++ b/onedal/primitives/kernel_functions.py @@ -17,6 +17,7 @@ import queue import numpy as np + from onedal import _default_backend as backend from onedal._device_offload import SyclQueueManager, supports_queue from onedal.common._backend import BackendFunction diff --git a/onedal/svm/svm.py b/onedal/svm/svm.py index 8ed29ed943..b0adfe14d6 100644 --- a/onedal/svm/svm.py +++ b/onedal/svm/svm.py @@ -18,15 +18,21 @@ from enum import Enum import numpy as np +from scipy import sparse as sp + from onedal._device_offload import SyclQueueManager, supports_queue from onedal.common._backend import bind_default_backend -from scipy import sparse as sp from ..common._estimator_checks import _check_is_fitted from ..common._mixin import ClassifierMixin, RegressorMixin from ..datatypes import from_table, to_table -from ..utils.validation import (_check_array, _check_n_features, _check_X_y, - _column_or_1d, _validate_targets) +from ..utils.validation import ( + _check_array, + _check_n_features, + _check_X_y, + _column_or_1d, + _validate_targets, +) class SVMtype(Enum): diff --git a/onedal/svm/tests/test_csr_svm.py b/onedal/svm/tests/test_csr_svm.py index f74cf200c1..d7da6d404c 100644 --- a/onedal/svm/tests/test_csr_svm.py +++ b/onedal/svm/tests/test_csr_svm.py @@ -17,15 +17,16 @@ import numpy as np import pytest from numpy.testing import assert_array_almost_equal, assert_array_equal +from scipy import sparse as sp +from sklearn import datasets +from sklearn.datasets import make_classification + from onedal.common._mixin import ClassifierMixin from onedal.svm import SVC, SVR from onedal.tests.utils._device_selection import ( get_queues, pass_if_not_implemented_for_gpu, ) -from scipy import sparse as sp -from sklearn import datasets -from sklearn.datasets import make_classification def check_svm_model_equal( diff --git a/onedal/svm/tests/test_nusvc.py b/onedal/svm/tests/test_nusvc.py index bd7c75f695..29e8d2272f 100644 --- a/onedal/svm/tests/test_nusvc.py +++ b/onedal/svm/tests/test_nusvc.py @@ -17,17 +17,18 @@ import numpy as np import pytest from numpy.testing import assert_array_almost_equal, assert_array_equal -from onedal.svm import NuSVC -from onedal.tests.utils._device_selection import ( - get_queues, - pass_if_not_implemented_for_gpu, -) from sklearn import datasets from sklearn.datasets import make_blobs from sklearn.metrics.pairwise import rbf_kernel from sklearn.model_selection import train_test_split from sklearn.svm import NuSVC as SklearnNuSVC +from onedal.svm import NuSVC +from onedal.tests.utils._device_selection import ( + get_queues, + pass_if_not_implemented_for_gpu, +) + def _test_libsvm_parameters(queue, array_constr, dtype): X = array_constr([[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]], dtype=dtype) diff --git a/onedal/svm/tests/test_nusvr.py b/onedal/svm/tests/test_nusvr.py index 369ef19728..6bcc04e9f4 100644 --- a/onedal/svm/tests/test_nusvr.py +++ b/onedal/svm/tests/test_nusvr.py @@ -17,14 +17,15 @@ import numpy as np import pytest from numpy.testing import assert_allclose, assert_array_almost_equal, assert_array_equal +from sklearn import datasets +from sklearn.metrics.pairwise import rbf_kernel +from sklearn.svm import NuSVR as SklearnNuSVR + from onedal.svm import NuSVR from onedal.tests.utils._device_selection import ( get_queues, pass_if_not_implemented_for_gpu, ) -from sklearn import datasets -from sklearn.metrics.pairwise import rbf_kernel -from sklearn.svm import NuSVR as SklearnNuSVR synth_params = {"n_samples": 500, "n_features": 100, "random_state": 42} diff --git a/onedal/utils/validation.py b/onedal/utils/validation.py index 5833a25de4..9049767d4a 100644 --- a/onedal/utils/validation.py +++ b/onedal/utils/validation.py @@ -19,9 +19,10 @@ from numbers import Integral import numpy as np +from scipy import sparse as sp + from onedal._device_offload import supports_queue from onedal.common._backend import BackendFunction -from scipy import sparse as sp if np.lib.NumpyVersion(np.__version__) >= np.lib.NumpyVersion("2.0.0a0"): # numpy_version >= 2.0 @@ -30,13 +31,14 @@ # numpy_version < 2.0 from numpy import VisibleDeprecationWarning +from sklearn.preprocessing import LabelEncoder +from sklearn.utils.validation import check_array + from daal4py.sklearn.utils.validation import ( _assert_all_finite as _daal4py_assert_all_finite, ) from onedal import _default_backend as backend from onedal.datatypes import to_table -from sklearn.preprocessing import LabelEncoder -from sklearn.utils.validation import check_array class DataConversionWarning(UserWarning): diff --git a/sklearnex/basic_statistics/incremental_basic_statistics.py b/sklearnex/basic_statistics/incremental_basic_statistics.py index 3f99900e1e..d3671e3602 100644 --- a/sklearnex/basic_statistics/incremental_basic_statistics.py +++ b/sklearnex/basic_statistics/incremental_basic_statistics.py @@ -15,14 +15,15 @@ # ============================================================================== import numpy as np +from sklearn.base import BaseEstimator +from sklearn.utils import check_array, gen_batches +from sklearn.utils.validation import _check_sample_weight + from daal4py.sklearn._n_jobs_support import control_n_jobs from daal4py.sklearn._utils import sklearn_check_version from onedal.basic_statistics import ( IncrementalBasicStatistics as onedal_IncrementalBasicStatistics, ) -from sklearn.base import BaseEstimator -from sklearn.utils import check_array, gen_batches -from sklearn.utils.validation import _check_sample_weight from .._device_offload import dispatch from .._utils import IntelEstimator, PatchingConditionsChain diff --git a/sklearnex/covariance/incremental_covariance.py b/sklearnex/covariance/incremental_covariance.py index ea1cffd5c4..bbf9744933 100644 --- a/sklearnex/covariance/incremental_covariance.py +++ b/sklearnex/covariance/incremental_covariance.py @@ -18,17 +18,18 @@ import warnings import numpy as np -from daal4py.sklearn._n_jobs_support import control_n_jobs -from daal4py.sklearn._utils import daal_check_version, sklearn_check_version -from onedal.covariance import ( - IncrementalEmpiricalCovariance as onedal_IncrementalEmpiricalCovariance, -) from scipy import linalg from sklearn.base import BaseEstimator, clone from sklearn.covariance import EmpiricalCovariance as _sklearn_EmpiricalCovariance from sklearn.covariance import log_likelihood from sklearn.utils import check_array, gen_batches from sklearn.utils.validation import _num_features, check_is_fitted + +from daal4py.sklearn._n_jobs_support import control_n_jobs +from daal4py.sklearn._utils import daal_check_version, sklearn_check_version +from onedal.covariance import ( + IncrementalEmpiricalCovariance as onedal_IncrementalEmpiricalCovariance, +) from sklearnex import config_context from .._device_offload import dispatch, wrap_output_data diff --git a/sklearnex/covariance/tests/test_incremental_covariance.py b/sklearnex/covariance/tests/test_incremental_covariance.py index d1ea8fd475..e42373cf84 100644 --- a/sklearnex/covariance/tests/test_incremental_covariance.py +++ b/sklearnex/covariance/tests/test_incremental_covariance.py @@ -27,14 +27,8 @@ import numpy as np import pytest -from daal4py.sklearn._utils import daal_check_version from numpy.linalg import slogdet from numpy.testing import assert_allclose -from onedal.tests.utils._dataframes_support import ( - _as_numpy, - _convert_to_dataframe, - get_dataframes_and_queues, -) from scipy.linalg import pinvh from sklearn.covariance.tests.test_covariance import ( test_covariance, @@ -43,6 +37,13 @@ from sklearn.datasets import load_diabetes from sklearn.decomposition import PCA +from daal4py.sklearn._utils import daal_check_version +from onedal.tests.utils._dataframes_support import ( + _as_numpy, + _convert_to_dataframe, + get_dataframes_and_queues, +) + @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues()) @pytest.mark.parametrize("dtype", [np.float32, np.float64]) diff --git a/sklearnex/linear_model/ridge.py b/sklearnex/linear_model/ridge.py index 672bf307f0..b5c135219c 100644 --- a/sklearnex/linear_model/ridge.py +++ b/sklearnex/linear_model/ridge.py @@ -22,12 +22,13 @@ import numbers import numpy as np - from daal4py.sklearn._n_jobs_support import control_n_jobs from scipy.sparse import issparse from sklearn.linear_model import Ridge as _sklearn_Ridge from sklearn.metrics import r2_score from sklearn.utils.validation import check_is_fitted + from daal4py.sklearn._n_jobs_support import control_n_jobs + if not sklearn_check_version("1.2"): from sklearn.linear_model._base import _deprecate_normalize if sklearn_check_version("1.1") and not sklearn_check_version("1.2"): diff --git a/sklearnex/preview/decomposition/incremental_pca.py b/sklearnex/preview/decomposition/incremental_pca.py index f47be3ee81..949ae5ec40 100644 --- a/sklearnex/preview/decomposition/incremental_pca.py +++ b/sklearnex/preview/decomposition/incremental_pca.py @@ -15,11 +15,12 @@ # =============================================================================== import numpy as np +from sklearn.decomposition import IncrementalPCA as _sklearn_IncrementalPCA +from sklearn.utils import check_array, gen_batches + from daal4py.sklearn._n_jobs_support import control_n_jobs from daal4py.sklearn._utils import sklearn_check_version from onedal.decomposition import IncrementalPCA as onedal_IncrementalPCA -from sklearn.decomposition import IncrementalPCA as _sklearn_IncrementalPCA -from sklearn.utils import check_array, gen_batches from ..._device_offload import dispatch, wrap_output_data from ..._utils import PatchingConditionsChain diff --git a/sklearnex/spmd/basic_statistics/tests/test_basic_statistics_spmd.py b/sklearnex/spmd/basic_statistics/tests/test_basic_statistics_spmd.py index 2963b38d73..29c5ad8154 100644 --- a/sklearnex/spmd/basic_statistics/tests/test_basic_statistics_spmd.py +++ b/sklearnex/spmd/basic_statistics/tests/test_basic_statistics_spmd.py @@ -17,6 +17,7 @@ import numpy as np import pytest from numpy.testing import assert_allclose + from onedal.basic_statistics.tests.utils import options_and_tests from onedal.tests.utils._dataframes_support import ( _convert_to_dataframe, diff --git a/sklearnex/spmd/cluster/tests/test_kmeans_spmd.py b/sklearnex/spmd/cluster/tests/test_kmeans_spmd.py index 1054d141ad..18fc570873 100644 --- a/sklearnex/spmd/cluster/tests/test_kmeans_spmd.py +++ b/sklearnex/spmd/cluster/tests/test_kmeans_spmd.py @@ -17,6 +17,7 @@ import numpy as np import pytest from numpy.testing import assert_allclose + from onedal.tests.utils._dataframes_support import ( _convert_to_dataframe, get_dataframes_and_queues, From cbf0d3504a0817c164582091792b6868c28871e9 Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Tue, 17 Dec 2024 13:55:19 -0800 Subject: [PATCH 35/41] remove accidental import --- onedal/neighbors/neighbors.py | 1 - 1 file changed, 1 deletion(-) diff --git a/onedal/neighbors/neighbors.py b/onedal/neighbors/neighbors.py index 83e8957fd5..815ab7148c 100755 --- a/onedal/neighbors/neighbors.py +++ b/onedal/neighbors/neighbors.py @@ -16,7 +16,6 @@ from abc import ABCMeta, abstractmethod from numbers import Integral -from os import X_OK, XATTR_SIZE_MAX import numpy as np From c80c972f8cc52a0cf3807cdbeb10aa94ac0e8fb9 Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Fri, 21 Feb 2025 06:51:14 -0800 Subject: [PATCH 36/41] fix some tests --- onedal/covariance/incremental_covariance.py | 2 +- .../linear_model/incremental_linear_model.py | 22 +++++++++++++++++++ .../basic_statistics/basic_statistics.py | 2 +- 3 files changed, 24 insertions(+), 2 deletions(-) diff --git a/onedal/covariance/incremental_covariance.py b/onedal/covariance/incremental_covariance.py index 403a70c7ca..5246c3acb0 100644 --- a/onedal/covariance/incremental_covariance.py +++ b/onedal/covariance/incremental_covariance.py @@ -117,7 +117,7 @@ def partial_fit(self, X, y=None, queue=None): self._dtype = X_table.dtype params = self._get_onedal_params(self._dtype) - self._partial_result = self.partial_compute(params, self._partial_result, table_X) + self._partial_result = self.partial_compute(params, self._partial_result, X_table) self._need_to_finalize = True # store the queue for when we finalize self._queue = queue diff --git a/onedal/linear_model/incremental_linear_model.py b/onedal/linear_model/incremental_linear_model.py index d6276f15d7..b874e1ad1a 100644 --- a/onedal/linear_model/incremental_linear_model.py +++ b/onedal/linear_model/incremental_linear_model.py @@ -63,6 +63,17 @@ def _reset(self): self._queue = None self._partial_result = self.partial_train_result() + def __getstate__(self): + # Since finalize_fit can't be dispatched without directly provided queue + # and the dispatching policy can't be serialized, the computation is finalized + # here and the policy is not saved in serialized data. + + self.finalize_fit() + data = self.__dict__.copy() + data.pop("_queue", None) + + return data + @supports_queue def partial_fit(self, X, y, queue=None): """ @@ -186,6 +197,17 @@ def _reset(self): self._queue = None self._partial_result = self.partial_train_result() + def __getstate__(self): + # Since finalize_fit can't be dispatched without directly provided queue + # and the dispatching policy can't be serialized, the computation is finalized + # here and the policy is not saved in serialized data. + + self.finalize_fit() + data = self.__dict__.copy() + data.pop("_queue", None) + + return data + @bind_default_backend("linear_model.regression") def partial_train_result(self): ... diff --git a/sklearnex/basic_statistics/basic_statistics.py b/sklearnex/basic_statistics/basic_statistics.py index 4cb7cab017..0582c231a2 100644 --- a/sklearnex/basic_statistics/basic_statistics.py +++ b/sklearnex/basic_statistics/basic_statistics.py @@ -25,7 +25,7 @@ from daal4py.sklearn._n_jobs_support import control_n_jobs from daal4py.sklearn._utils import daal_check_version, sklearn_check_version from onedal.basic_statistics import BasicStatistics as onedal_BasicStatistics -from onedal.utils import _is_csr +from onedal.utils.validation import _is_csr from .._device_offload import dispatch from .._utils import IntelEstimator, PatchingConditionsChain From 2920ae421883384a68ec57e71dce15b3bee5a341 Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Fri, 21 Feb 2025 10:36:05 -0800 Subject: [PATCH 37/41] properly use queue from data in sklearnex --- sklearnex/_device_offload.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sklearnex/_device_offload.py b/sklearnex/_device_offload.py index 094542cea7..e949487e50 100644 --- a/sklearnex/_device_offload.py +++ b/sklearnex/_device_offload.py @@ -27,8 +27,8 @@ from ._config import get_config -def _get_backend(obj, method_name, *data): - with SyclQueueManager.manage_global_queue(None, *data) as queue: +def _get_backend(obj, queue, method_name, *data): + with SyclQueueManager.manage_global_queue(queue, *data) as queue: cpu_device = queue is None or getattr(queue.sycl_device, "is_cpu", True) gpu_device = queue is not None and getattr(queue.sycl_device, "is_gpu", False) @@ -65,7 +65,7 @@ def dispatch(obj, method_name, branches, *args, **kwargs): has_usm_data_for_kwargs, hostvalues = _transfer_to_host(*kwargs.values()) hostkwargs = dict(zip(kwargs.keys(), hostvalues)) - backend, patching_status = _get_backend(obj, method_name, *hostargs) + backend, patching_status = _get_backend(obj, queue, method_name, *hostargs) has_usm_data = has_usm_data_for_args or has_usm_data_for_kwargs if backend == "onedal": # Host args only used before onedal backend call. From fd584900993b41a7aaf81c24f424d5b39105c459 Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Fri, 21 Feb 2025 11:11:47 -0800 Subject: [PATCH 38/41] fixup --- onedal/datatypes/tests/test_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onedal/datatypes/tests/test_data.py b/onedal/datatypes/tests/test_data.py index 6c67e20ef2..d1d847aed4 100644 --- a/onedal/datatypes/tests/test_data.py +++ b/onedal/datatypes/tests/test_data.py @@ -454,7 +454,7 @@ def test_non_array(X, queue): @pytest.mark.skipif( - not _is_dpc_backend, reason="Requires DPC backend for dtype conversion" + not backend.is_dpc, reason="Requires DPC backend for dtype conversion" ) @pytest.mark.parametrize("X", [None, 5, "test", True, [], np.pi, lambda: None]) def test_low_precision_non_array(X): From 0ca7107752e576540063211525825f99b2a6549c Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Fri, 21 Feb 2025 12:21:45 -0800 Subject: [PATCH 39/41] fixup --- onedal/linear_model/incremental_linear_model.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/onedal/linear_model/incremental_linear_model.py b/onedal/linear_model/incremental_linear_model.py index b874e1ad1a..500bdeb7f4 100644 --- a/onedal/linear_model/incremental_linear_model.py +++ b/onedal/linear_model/incremental_linear_model.py @@ -59,6 +59,7 @@ def partial_train(self, *args, **kwargs): ... def finalize_train(self, *args, **kwargs): ... def _reset(self): + self._need_to_finalize = False # Get the pointer to partial_result from backend self._queue = None self._partial_result = self.partial_train_result() @@ -194,6 +195,7 @@ def __init__(self, alpha=1.0, fit_intercept=True, copy_X=False, algorithm="norm_ self._reset() def _reset(self): + self._need_to_finalize = False self._queue = None self._partial_result = self.partial_train_result() From 07ae0d27637a4d60e43800d80bdee3e6946a50a9 Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Mon, 24 Feb 2025 05:30:22 -0800 Subject: [PATCH 40/41] use utf-8 encoding --- tests/run_examples.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/run_examples.py b/tests/run_examples.py index b8a0d4562c..afaa06765b 100755 --- a/tests/run_examples.py +++ b/tests/run_examples.py @@ -246,7 +246,7 @@ def run(exdir, logdir, args): ) else: logfn = jp(logdir, script.replace(".py", ".res")) - with open(logfn, "w") as logfile: + with open(logfn, "w", encoding="utf-8") as logfile: print("\n##### " + jp(dirpath, script)) execute_string = get_exe_cmd(jp(dirpath, script), args) if execute_string: @@ -267,7 +267,7 @@ def run(exdir, logdir, args): proc.kill() out = proc.communicate()[0] print("Process has timed out: " + str(execute_string)) - logfile.write(out.decode("ascii")) + logfile.write(out.decode("utf-8")) if proc.returncode: print(out) print( From f4fc506c02ae17f7a56d3b4533d9d93fb1cb1344 Mon Sep 17 00:00:00 2001 From: Andreas Huber Date: Tue, 25 Feb 2025 00:24:38 -0800 Subject: [PATCH 41/41] use queue from data for finiteness check --- onedal/utils/validation.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/onedal/utils/validation.py b/onedal/utils/validation.py index 9049767d4a..ed97c28365 100644 --- a/onedal/utils/validation.py +++ b/onedal/utils/validation.py @@ -448,11 +448,13 @@ def _assert_all_finite(X, allow_nan=False, input_name=""): "method": "dense", "allow_nan": allow_nan, } - if not backend_method(params, X_t).finite: - type_err = "infinity" if allow_nan else "NaN, infinity" - padded_input_name = input_name + " " if input_name else "" - msg_err = f"Input {padded_input_name}contains {type_err}." - raise ValueError(msg_err) + with SyclQueueManager.manage_global_queue(None, X): + # Must use the queue provided by X + if not backend_method(params, X_t).finite: + type_err = "infinity" if allow_nan else "NaN, infinity" + padded_input_name = input_name + " " if input_name else "" + msg_err = f"Input {padded_input_name}contains {type_err}." + raise ValueError(msg_err) @supports_queue