Skip to content

Commit

Permalink
[enhancement] remove string comparison from data type oneDAL offload (#…
Browse files Browse the repository at this point in the history
…2184)

* add dtype to table object

* Update table.cpp

* clarify fptype

* fix

* fix2

* fix3

* fix4

* missed some

* switch to using table in a circumstance

* Update neighbors.py

* fix int issue in testing

* Update neighbors.py
  • Loading branch information
icfaust authored Nov 25, 2024
1 parent 935c56b commit 8bc9ca0
Show file tree
Hide file tree
Showing 15 changed files with 29 additions and 22 deletions.
2 changes: 1 addition & 1 deletion onedal/basic_statistics/basic_statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def _get_result_options(self, options):
def _get_onedal_params(self, is_csr, dtype=np.float32):
options = self._get_result_options(self.options)
return {
"fptype": "float" if dtype == np.float32 else "double",
"fptype": dtype,
"method": "sparse" if is_csr else self.algorithm,
"result_option": options,
}
Expand Down
2 changes: 1 addition & 1 deletion onedal/cluster/dbscan.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def __init__(

def _get_onedal_params(self, dtype=np.float32):
return {
"fptype": "float" if dtype == np.float32 else "double",
"fptype": dtype,
"method": "by_default",
"min_observations": int(self.min_samples),
"epsilon": float(self.eps),
Expand Down
6 changes: 3 additions & 3 deletions onedal/cluster/kmeans.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ def _check_params_vs_input(
def _get_onedal_params(self, is_csr=False, dtype=np.float32, result_options=None):
thr = self._tol if hasattr(self, "_tol") else self.tol
return {
"fptype": "float" if dtype == np.float32 else "double",
"fptype": dtype,
"method": "lloyd_csr" if is_csr else "by_default",
"seed": -1,
"max_iteration_count": self.max_iter,
Expand Down Expand Up @@ -382,8 +382,8 @@ def _predict(self, X, module, queue=None, result_options=None):

policy = self._get_policy(queue, X)
X = _convert_to_supported(policy, X)
X_table, dtype = to_table(X), X.dtype
params = self._get_onedal_params(is_csr, dtype, result_options)
X_table = to_table(X)
params = self._get_onedal_params(is_csr, X_table.dtype, result_options)

result = module.infer(policy, params, self.model_, X_table)

Expand Down
2 changes: 1 addition & 1 deletion onedal/cluster/kmeans_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def __init__(

def _get_onedal_params(self, dtype=np.float32):
return {
"fptype": "float" if dtype == np.float32 else "double",
"fptype": dtype,
"local_trials_count": self.local_trials_count,
"method": self.algorithm,
"seed": self.seed,
Expand Down
8 changes: 5 additions & 3 deletions onedal/common/dispatch_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#pragma once

#include <pybind11/pybind11.h>
#include <pybind11/numpy.h>

#include "onedal/version.hpp"

Expand Down Expand Up @@ -44,9 +45,10 @@ struct fptype2t {
fptype2t(const Ops& ops) : ops(ops) {}

auto operator()(const pybind11::dict& params) {
const auto fptype = params["fptype"].cast<std::string>();
ONEDAL_PARAM_DISPATCH_VALUE(fptype, "float", ops, float);
ONEDAL_PARAM_DISPATCH_VALUE(fptype, "double", ops, double);
// fptype needs to be a numpy dtype, which uses pybind11-native dtype checking
const auto fptype = params["fptype"].cast<pybind11::dtype>().num();
ONEDAL_PARAM_DISPATCH_VALUE(fptype, pybind11::detail::npy_api::NPY_FLOAT_, ops, float);
ONEDAL_PARAM_DISPATCH_VALUE(fptype, pybind11::detail::npy_api::NPY_DOUBLE_, ops, double);
ONEDAL_PARAM_DISPATCH_THROW_INVALID_VALUE(fptype);
}

Expand Down
2 changes: 1 addition & 1 deletion onedal/covariance/covariance.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def __init__(self, method="dense", bias=False, assume_centered=False):

def _get_onedal_params(self, dtype=np.float32):
params = {
"fptype": "float" if dtype == np.float32 else "double",
"fptype": dtype,
"method": self.method,
}
if daal_check_version((2024, "P", 1)):
Expand Down
4 changes: 4 additions & 0 deletions onedal/datatypes/table.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,10 @@ ONEDAL_PY_INIT_MODULE(table) {
const auto column_count = t.get_column_count();
return py::make_tuple(row_count, column_count);
});
table_obj.def_property_readonly("dtype", [](const table& t){
// returns a numpy dtype, even if source was not from numpy
return py::dtype(convert_dal_to_npy_type(t.get_metadata().get_data_type(0)));
});

#ifdef ONEDAL_DATA_PARALLEL
define_sycl_usm_array_property(table_obj);
Expand Down
2 changes: 1 addition & 1 deletion onedal/decomposition/pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def _get_onedal_params(self, data, stage=None):
elif stage == "predict":
n_components = self.n_components_
return {
"fptype": "float" if data.dtype == np.float32 else "double",
"fptype": data.dtype,
"method": self.method,
"n_components": n_components,
"is_deterministic": self.is_deterministic,
Expand Down
2 changes: 1 addition & 1 deletion onedal/ensemble/forest.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ def _get_onedal_params(self, data):
seed = rs.randint(0, np.iinfo("i").max)

onedal_params = {
"fptype": "float" if data.dtype == np.float32 else "double",
"fptype": data.dtype,
"method": self.algorithm,
"infer_mode": self.infer_mode,
"voting_mode": self.voting_mode,
Expand Down
2 changes: 1 addition & 1 deletion onedal/linear_model/linear_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def __init__(self, fit_intercept, copy_X, algorithm, alpha=0.0):
def _get_onedal_params(self, dtype=np.float32):
intercept = "intercept|" if self.fit_intercept else ""
params = {
"fptype": "float" if dtype == np.float32 else "double",
"fptype": dtype,
"method": self.algorithm,
"intercept": self.fit_intercept,
"result_option": (intercept + "coefficients"),
Expand Down
2 changes: 1 addition & 1 deletion onedal/linear_model/logistic_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def __init__(self, tol, C, fit_intercept, solver, max_iter, algorithm):
def _get_onedal_params(self, is_csr, dtype=np.float32):
intercept = "intercept|" if self.fit_intercept else ""
return {
"fptype": "float" if dtype == np.float32 else "double",
"fptype": dtype,
"method": "sparse" if is_csr else self.algorithm,
"intercept": self.fit_intercept,
"tol": self.tol,
Expand Down
8 changes: 4 additions & 4 deletions onedal/neighbors/neighbors.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ def _get_onedal_params(self, X, y=None, n_neighbors=None):
else:
p = self.p
return {
"fptype": "float" if X.dtype == np.float32 else "double",
"fptype": X.dtype,
"vote_weights": "uniform" if weights == "uniform" else "distance",
"method": self._fit_method,
"radius": self.radius,
Expand Down Expand Up @@ -451,7 +451,7 @@ def _onedal_predict(self, model, X, params, queue):
)
if "responses" not in params["result_option"]:
params["result_option"] += "|responses"
params["fptype"] = "float" if X.dtype == np.float32 else "double"
params["fptype"] = X.dtype
result = self._get_backend(
"neighbors", "classification", "infer", policy, params, model, to_table(X)
)
Expand Down Expand Up @@ -615,7 +615,7 @@ def _onedal_predict(self, model, X, params, queue):
model = self._create_model(backend)
if "responses" not in params["result_option"] and gpu_device:
params["result_option"] += "|responses"
params["fptype"] = "float" if X.dtype == np.float32 else "double"
params["fptype"] = X.dtype
result = backend.infer(policy, params, model, to_table(X))

return result
Expand Down Expand Up @@ -753,7 +753,7 @@ def _onedal_predict(self, model, X, params, queue):
else:
model = self._create_model(self._get_backend("neighbors", "search", None))

params["fptype"] = "float" if X.dtype == np.float32 else "double"
params["fptype"] = X.dtype
result = self._get_backend(
"neighbors", "search", "infer", policy, params, model, to_table(X)
)
Expand Down
2 changes: 1 addition & 1 deletion onedal/primitives/kernel_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def check_input(data):
def _compute_kernel(params, submodule, X, Y, queue):
policy = _get_policy(queue, X, Y)
X, Y = _convert_to_supported(policy, X, Y)
params["fptype"] = "float" if X.dtype == np.float32 else "double"
params["fptype"] = X.dtype
X, Y = to_table(X, Y)
result = submodule.compute(policy, params, X, Y)
return from_table(result.values)
Expand Down
2 changes: 1 addition & 1 deletion onedal/svm/svm.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ def _get_onedal_params(self, data):
self.n_iter_ = 1 if max_iter < 1 else max_iter
class_count = 0 if self.classes_ is None else len(self.classes_)
return {
"fptype": "float" if data.dtype == np.float32 else "double",
"fptype": data.dtype,
"method": self.algorithm,
"kernel": self.kernel,
"c": self.C,
Expand Down
5 changes: 3 additions & 2 deletions sklearnex/spmd/cluster/tests/test_kmeans_spmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,10 @@ def test_kmeans_spmd_gold(dataframe, queue):
[2, 2],
[1, 3],
[2, 2],
]
],
dtype=np.float64,
)
X_test = np.array([[0, 0], [12, 3], [2, 2], [7, 8]])
X_test = np.array([[0, 0], [12, 3], [2, 2], [7, 8]], dtype=np.float64)

local_dpt_X_train = _convert_to_dataframe(
_get_local_tensor(X_train), sycl_queue=queue, target_df=dataframe
Expand Down

0 comments on commit 8bc9ca0

Please sign in to comment.