Apply raw input for batch linear and logistic regression

uxlfoundation · samir-nasibli · Nov 5, 2024 · Nov 5, 2024 · Nov 5, 2024 · Nov 5, 2024
commit f0d92aecce0bc08688675ce48a10122b05c2b585
@@ -21,11 +21,13 @@
 
 from daal4py.sklearn._utils import daal_check_version, get_dtype, make2d
 
+from .._config import _get_config
 from ..common._base import BaseEstimator
 from ..common._estimator_checks import _check_is_fitted
 from ..common.hyperparameters import get_hyperparameters
 from ..datatypes import _convert_to_supported, from_table, to_table
 from ..utils import _check_array, _check_n_features, _check_X_y, _num_features
+from ..utils._array_api import _get_sycl_namespace
 
 
 class BaseLinearRegression(BaseEstimator, metaclass=ABCMeta):
@@ -119,28 +121,35 @@ def predict(self, X, queue=None):
 
         _check_is_fitted(self)
 
+        sua_iface, xp, _ = _get_sycl_namespace(X)
+        if xp is None:
+            xp = np
+        use_raw_input = _get_config().get("use_raw_input") is True
+
         policy = self._get_policy(queue, X)
 
-        X = _check_array(
-            X, dtype=[np.float64, np.float32], force_all_finite=False, ensure_2d=False
-        )
+        if not use_raw_input:
+            X = _check_array(
+                X, dtype=[np.float64, np.float32], force_all_finite=False, ensure_2d=False
+            )
+            X = make2d(X)
+
         _check_n_features(self, X, False)
 
         if hasattr(self, "_onedal_model"):
             model = self._onedal_model
         else:
             model = self._create_model(policy)
 
-        X = make2d(X)
         X = _convert_to_supported(policy, X)
         params = self._get_onedal_params(get_dtype(X))
 
-        X_table = to_table(X)
+        X_table = to_table(X, sua_iface=sua_iface)
         result = module.infer(policy, params, model, X_table)
-        y = from_table(result.responses)
+        y = from_table(result.responses, sua_iface=sua_iface, sycl_queue=queue, xp=xp)
 
         if y.shape[1] == 1 and self.coef_.ndim == 1:
-            return y.ravel()
+            return xp.reshape(y, (-1,))
         else:
             return y
 
@@ -194,26 +203,32 @@ def fit(self, X, y, queue=None):
         """
         module = self._get_backend("linear_model", "regression")
 
-        # TODO Fix _check_X_y to make sure this conversion is there
-        if not isinstance(X, np.ndarray):
-            X = np.asarray(X)
+        sua_iface, xp, _ = _get_sycl_namespace(X)
+        if xp is None:
+            xp = np
+        use_raw_input = _get_config().get("use_raw_input") is True
+
+        if not use_raw_input:
+            # TODO Fix _check_X_y to make sure this conversion is there
+            if not isinstance(X, np.ndarray):
+                X = np.asarray(X)
 
-        dtype = get_dtype(X)
-        if dtype not in [np.float32, np.float64]:
-            dtype = np.float64
-            X = X.astype(dtype, copy=self.copy_X)
+            dtype = get_dtype(X)
+            if dtype not in [np.float32, np.float64]:
+                dtype = np.float64
+                X = X.astype(dtype, copy=self.copy_X)
 
-        y = np.asarray(y).astype(dtype=dtype)
+            y = np.asarray(y).astype(dtype=dtype)
 
-        X, y = _check_X_y(X, y, force_all_finite=False, accept_2d_y=True)
+            X, y = _check_X_y(X, y, force_all_finite=False, accept_2d_y=True)
 
         policy = self._get_policy(queue, X, y)
 
         self.n_features_in_ = _num_features(X, fallback_1d=True)
 
         X, y = _convert_to_supported(policy, X, y)
         params = self._get_onedal_params(get_dtype(X))
-        X_table, y_table = to_table(X, y)
+        X_table, y_table = to_table(X, y, sua_iface=sua_iface)
 
         hparams = get_hyperparameters("linear_regression", "train")
         if hparams is not None and not hparams.is_default:
@@ -223,14 +238,16 @@ def fit(self, X, y, queue=None):
 
         self._onedal_model = result.model
 
-        packed_coefficients = from_table(result.model.packed_coefficients)
+        packed_coefficients = from_table(
+            result.model.packed_coefficients, sua_iface=sua_iface, sycl_queue=queue, xp=xp
+        )
         self.coef_, self.intercept_ = (
             packed_coefficients[:, 1:],
             packed_coefficients[:, 0],
         )
 
         if self.coef_.shape[0] == 1 and y.ndim == 1:
-            self.coef_ = self.coef_.ravel()
+            self.coef_ = xp.reshape(self.coef_, (-1,))
             self.intercept_ = self.intercept_[0]
 
         return self
@@ -293,37 +310,45 @@ def fit(self, X, y, queue=None):
         """
         module = self._get_backend("linear_model", "regression")
 
-        X = _check_array(
-            X,
-            dtype=[np.float64, np.float32],
-            force_all_finite=False,
-            ensure_2d=False,
-            copy=self.copy_X,
-        )
+        sua_iface, xp, _ = _get_sycl_namespace(X)
+        if xp is None:
+            xp = np
+        use_raw_input = _get_config().get("use_raw_input") is True
 
-        y = np.asarray(y).astype(dtype=get_dtype(X))
+        if not use_raw_input:
+            X = _check_array(
+                X,
+                dtype=[np.float64, np.float32],
+                force_all_finite=False,
+                ensure_2d=False,
+                copy=self.copy_X,
+            )
 
-        X, y = _check_X_y(X, y, force_all_finite=False, accept_2d_y=True)
+            y = np.asarray(y).astype(dtype=get_dtype(X))
+
+            X, y = _check_X_y(X, y, force_all_finite=False, accept_2d_y=True)
 
         policy = self._get_policy(queue, X, y)
 
         self.n_features_in_ = _num_features(X, fallback_1d=True)
 
         X, y = _convert_to_supported(policy, X, y)
         params = self._get_onedal_params(get_dtype(X))
-        X_table, y_table = to_table(X, y)
+        X_table, y_table = to_table(X, y, sua_iface=sua_iface)
 
         result = module.train(policy, params, X_table, y_table)
         self._onedal_model = result.model
 
-        packed_coefficients = from_table(result.model.packed_coefficients)
+        packed_coefficients = from_table(
+            result.model.packed_coefficients, sua_iface=sua_iface, sycl_queue=queue, xp=xp
+        )
         self.coef_, self.intercept_ = (
             packed_coefficients[:, 1:],
             packed_coefficients[:, 0],
         )
 
         if self.coef_.shape[0] == 1 and y.ndim == 1:
-            self.coef_ = self.coef_.ravel()
+            self.coef_ = xp.reshape(self.coef_, (-1,))
             self.intercept_ = self.intercept_[0]
 
         return self
@@ -21,6 +21,7 @@
 
 from daal4py.sklearn._utils import daal_check_version, get_dtype, make2d
 
+from .._config import _get_config
 from ..common._base import BaseEstimator as onedal_BaseEstimator
 from ..common._estimator_checks import _check_is_fitted
 from ..common._mixin import ClassifierMixin
@@ -33,6 +34,8 @@
     _num_features,
     _type_of_target,
 )
+from ..utils._array_api import _get_sycl_namespace
+from ..utils._dpep_helpers import get_unique_values_with_dpep
 
 
 class BaseLogisticRegression(onedal_BaseEstimator, metaclass=ABCMeta):
@@ -63,29 +66,38 @@ def _get_onedal_params(self, is_csr, dtype=np.float32):
         }
 
     def _fit(self, X, y, module, queue):
+        use_raw_input = _get_config().get("use_raw_input") is True
+        if use_raw_input and _get_sycl_namespace(X)[0] is not None:
+            queue = X.sycl_queue
+
         sparsity_enabled = daal_check_version((2024, "P", 700))
-        X, y = _check_X_y(
-            X,
-            y,
-            accept_sparse=sparsity_enabled,
-            force_all_finite=True,
-            accept_2d_y=False,
-            dtype=[np.float64, np.float32],
-        )
-        is_csr = _is_csr(X)
+        if not use_raw_input:
+            X, y = _check_X_y(
+                X,
+                y,
+                accept_sparse=sparsity_enabled,
+                force_all_finite=True,
+                accept_2d_y=False,
+                dtype=[np.float64, np.float32],
+            )
+            if _type_of_target(y) != "binary":
+                raise ValueError("Only binary classification is supported")
+
+            self.classes_, y = np.unique(y, return_inverse=True)
+            y = y.astype(dtype=np.int32)
+        else:
+            self.classes_ = get_unique_values_with_dpep(y)
+            n_classes = len(self.classes_)
+            if n_classes != 2:
+                raise ValueError("Only binary classification is supported")
 
         self.n_features_in_ = _num_features(X, fallback_1d=True)
-
-        if _type_of_target(y) != "binary":
-            raise ValueError("Only binary classification is supported")
-
-        self.classes_, y = np.unique(y, return_inverse=True)
-        y = y.astype(dtype=np.int32)
-
+        is_csr = _is_csr(X)
         policy = self._get_policy(queue, X, y)
         X, y = _convert_to_supported(policy, X, y)
         params = self._get_onedal_params(is_csr, get_dtype(X))
-        X_table, y_table = to_table(X, y)
+        sua_iface = _get_sycl_namespace(X, y)[0]
+        X_table, y_table = to_table(X, y, sua_iface=sua_iface)
 
         result = module.train(policy, params, X_table, y_table)
 
@@ -152,22 +164,29 @@ def _create_model(self, module, policy):
 
         return m
 
-    def _infer(self, X, module, queue):
+    def _infer(self, X, module, queue, sua_iface):
         _check_is_fitted(self)
+
+        use_raw_input = _get_config().get("use_raw_input") is True
+        if use_raw_input and _get_sycl_namespace(X)[0] is not None:
+            queue = X.sycl_queue
+
         sparsity_enabled = daal_check_version((2024, "P", 700))
 
-        X = _check_array(
-            X,
-            dtype=[np.float64, np.float32],
-            accept_sparse=sparsity_enabled,
-            force_all_finite=True,
-            ensure_2d=False,
-            accept_large_sparse=sparsity_enabled,
-        )
-        is_csr = _is_csr(X)
+        if not use_raw_input:
+            X = _check_array(
+                X,
+                dtype=[np.float64, np.float32],
+                accept_sparse=sparsity_enabled,
+                force_all_finite=True,
+                ensure_2d=False,
+                accept_large_sparse=sparsity_enabled,
+            )
+            X = make2d(X)
+
         _check_n_features(self, X, False)
+        is_csr = _is_csr(X)
 
-        X = make2d(X)
         policy = self._get_policy(queue, X)
 
         if hasattr(self, "_onedal_model"):
@@ -178,26 +197,44 @@ def _infer(self, X, module, queue):
         X = _convert_to_supported(policy, X)
         params = self._get_onedal_params(is_csr, get_dtype(X))
 
-        X_table = to_table(X)
+        X_table = to_table(X, sua_iface=sua_iface)
+
         result = module.infer(policy, params, model, X_table)
         return result
 
     def _predict(self, X, module, queue):
-        result = self._infer(X, module, queue)
-        y = from_table(result.responses)
-        y = np.take(self.classes_, y.ravel(), axis=0)
+        use_raw_input = _get_config().get("use_raw_input") is True
+        sua_iface, xp, _ = _get_sycl_namespace(X)
+        if xp is None:
+            xp = np
+        if use_raw_input and sua_iface is not None:
+            queue = X.sycl_queue
+
+        result = self._infer(X, module, queue, sua_iface)
+        y = from_table(result.responses, sua_iface=sua_iface, sycl_queue=queue, xp=xp)
+        y = xp.take(xp.asarray(self.classes_), xp.reshape(y, (-1,)), axis=0)
         return y
 
     def _predict_proba(self, X, module, queue):
-        result = self._infer(X, module, queue)
+        use_raw_input = _get_config().get("use_raw_input") is True
+        sua_iface, xp, _ = _get_sycl_namespace(X)
+        if xp is None:
+            xp = np
+        if use_raw_input and sua_iface is not None:
+            queue = X.sycl_queue
+
+        result = self._infer(X, module, queue, sua_iface)
 
-        y = from_table(result.probabilities)
+        y = from_table(result.probabilities, sua_iface=sua_iface, sycl_queue=queue, xp=xp)
         y = y.reshape(-1, 1)
-        return np.hstack([1 - y, y])
+        return xp.hstack([1 - y, y])
 
     def _predict_log_proba(self, X, module, queue):
+        _, xp, _ = _get_sycl_namespace(X)
+        if xp is None:
+            xp = np
         y_proba = self._predict_proba(X, module, queue)
-        return np.log(y_proba)
+        return xp.log(y_proba)
 
 
 class LogisticRegression(ClassifierMixin, BaseLogisticRegression):

@@ -54,3 +54,18 @@ def is_dpnp_available(version=None):
 
 dpctl_available = is_dpctl_available()
 dpnp_available = is_dpnp_available()
+
+
+if dpnp_available:
+    import dpnp
+if dpctl_available:
+    import dpctl.tensor as dpt
+
+
+def get_unique_values_with_dpep(X):
+    if dpnp_available:
+        return dpnp.unique(X)
+    elif dpctl_available:
+        return dpt.unique_values(X)
+    else:
+        raise RuntimeError("No DPEP package available to provide `unique` function.")