diff --git a/2023.2/.buildinfo b/2023.2/.buildinfo
new file mode 100644
index 0000000000..a41243969f
--- /dev/null
+++ b/2023.2/.buildinfo
@@ -0,0 +1,4 @@
+# Sphinx build info version 1
+# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
+config: 6800a6ffac59c178770a0c0cc83712a8
+tags: 645f666f9bcd5a90fca523b33c5a78b7
diff --git a/2023.2/.doctrees/404.doctree b/2023.2/.doctrees/404.doctree
new file mode 100644
index 0000000000..4b1df81808
Binary files /dev/null and b/2023.2/.doctrees/404.doctree differ
diff --git a/2023.2/.doctrees/acceleration.doctree b/2023.2/.doctrees/acceleration.doctree
new file mode 100644
index 0000000000..163213c792
Binary files /dev/null and b/2023.2/.doctrees/acceleration.doctree differ
diff --git a/2023.2/.doctrees/algorithms.doctree b/2023.2/.doctrees/algorithms.doctree
new file mode 100644
index 0000000000..eefc4d9661
Binary files /dev/null and b/2023.2/.doctrees/algorithms.doctree differ
diff --git a/2023.2/.doctrees/blogs.doctree b/2023.2/.doctrees/blogs.doctree
new file mode 100644
index 0000000000..dd4eaf8039
Binary files /dev/null and b/2023.2/.doctrees/blogs.doctree differ
diff --git a/2023.2/.doctrees/contribute.doctree b/2023.2/.doctrees/contribute.doctree
new file mode 100644
index 0000000000..dce1438af1
Binary files /dev/null and b/2023.2/.doctrees/contribute.doctree differ
diff --git a/2023.2/.doctrees/distributed-mode.doctree b/2023.2/.doctrees/distributed-mode.doctree
new file mode 100644
index 0000000000..4191be22a4
Binary files /dev/null and b/2023.2/.doctrees/distributed-mode.doctree differ
diff --git a/2023.2/.doctrees/environment.pickle b/2023.2/.doctrees/environment.pickle
new file mode 100644
index 0000000000..83430f7389
Binary files /dev/null and b/2023.2/.doctrees/environment.pickle differ
diff --git a/2023.2/.doctrees/global-patching.doctree b/2023.2/.doctrees/global-patching.doctree
new file mode 100644
index 0000000000..8ca7293348
Binary files /dev/null and b/2023.2/.doctrees/global-patching.doctree differ
diff --git a/2023.2/.doctrees/guide/acceleration.doctree b/2023.2/.doctrees/guide/acceleration.doctree
new file mode 100644
index 0000000000..33e3f9d478
Binary files /dev/null and b/2023.2/.doctrees/guide/acceleration.doctree differ
diff --git a/2023.2/.doctrees/index.doctree b/2023.2/.doctrees/index.doctree
new file mode 100644
index 0000000000..fe88497b70
Binary files /dev/null and b/2023.2/.doctrees/index.doctree differ
diff --git a/2023.2/.doctrees/installation.doctree b/2023.2/.doctrees/installation.doctree
new file mode 100644
index 0000000000..e51e9002ad
Binary files /dev/null and b/2023.2/.doctrees/installation.doctree differ
diff --git a/2023.2/.doctrees/kaggle.doctree b/2023.2/.doctrees/kaggle.doctree
new file mode 100644
index 0000000000..b14befb14e
Binary files /dev/null and b/2023.2/.doctrees/kaggle.doctree differ
diff --git a/2023.2/.doctrees/kaggle/automl.doctree b/2023.2/.doctrees/kaggle/automl.doctree
new file mode 100644
index 0000000000..8a3b3fcd5d
Binary files /dev/null and b/2023.2/.doctrees/kaggle/automl.doctree differ
diff --git a/2023.2/.doctrees/kaggle/classification.doctree b/2023.2/.doctrees/kaggle/classification.doctree
new file mode 100644
index 0000000000..7146fd3b80
Binary files /dev/null and b/2023.2/.doctrees/kaggle/classification.doctree differ
diff --git a/2023.2/.doctrees/kaggle/regression.doctree b/2023.2/.doctrees/kaggle/regression.doctree
new file mode 100644
index 0000000000..7d0912d121
Binary files /dev/null and b/2023.2/.doctrees/kaggle/regression.doctree differ
diff --git a/2023.2/.doctrees/memory-requirements.doctree b/2023.2/.doctrees/memory-requirements.doctree
new file mode 100644
index 0000000000..e802368688
Binary files /dev/null and b/2023.2/.doctrees/memory-requirements.doctree differ
diff --git a/2023.2/.doctrees/nbsphinx/samples/ElasticNet.ipynb b/2023.2/.doctrees/nbsphinx/samples/ElasticNet.ipynb
new file mode 100644
index 0000000000..29e5dca851
--- /dev/null
+++ b/2023.2/.doctrees/nbsphinx/samples/ElasticNet.ipynb
@@ -0,0 +1,386 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "3768ec43",
+ "metadata": {},
+ "source": [
+ "# Intel® Extension for Scikit-learn ElasticNet for Airlines DepDelay dataset"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "b1b922d1",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from timeit import default_timer as timer\n",
+ "from sklearn import metrics\n",
+ "from sklearn.model_selection import train_test_split\n",
+ "import warnings\n",
+ "from sklearn.datasets import fetch_openml\n",
+ "from sklearn.preprocessing import LabelEncoder\n",
+ "from IPython.display import HTML\n",
+ "\n",
+ "warnings.filterwarnings(\"ignore\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "34e460a7",
+ "metadata": {},
+ "source": [
+ "### Download the data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "00c2277b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "x, y = fetch_openml(name=\"Airlines_DepDelay_10M\", return_X_y=True)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "06d309c0",
+ "metadata": {},
+ "source": [
+ "### Preprocessing\n",
+ "Let's encode categorical features with LabelEncoder"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "2ff35bc2",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "for col in [\"UniqueCarrier\", \"Origin\", \"Dest\"]:\n",
+ " le = LabelEncoder().fit(x[col])\n",
+ " x[col] = le.transform(x[col])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "38637349",
+ "metadata": {},
+ "source": [
+ "Split the data into train and test sets"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "0d332789",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "((9000000, 9), (1000000, 9), (9000000,), (1000000,))"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.1, random_state=0)\n",
+ "x_train.shape, x_test.shape, y_train.shape, y_test.shape"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "246f819f",
+ "metadata": {},
+ "source": [
+ "Normalize the data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "454a341c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from sklearn.preprocessing import StandardScaler\n",
+ "\n",
+ "scaler_y = StandardScaler()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "df400504",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "y_train = y_train.to_numpy().reshape(-1, 1)\n",
+ "y_test = y_test.to_numpy().reshape(-1, 1)\n",
+ "\n",
+ "scaler_y.fit(y_train)\n",
+ "y_train = scaler_y.transform(y_train).ravel()\n",
+ "y_test = scaler_y.transform(y_test).ravel()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "fe1d4fac",
+ "metadata": {},
+ "source": [
+ "### Patch original Scikit-learn with Intel® Extension for Scikit-learn\n",
+ "Intel® Extension for Scikit-learn (previously known as daal4py) contains drop-in replacement functionality for the stock Scikit-learn package. You can take advantage of the performance optimizations of Intel® Extension for Scikit-learn by adding just two lines of code before the usual Scikit-learn imports:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "ef6938df",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Intel(R) Extension for Scikit-learn* enabled (https://github.com/intel/scikit-learn-intelex)\n"
+ ]
+ }
+ ],
+ "source": [
+ "from sklearnex import patch_sklearn\n",
+ "\n",
+ "patch_sklearn()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "20c5ab48",
+ "metadata": {},
+ "source": [
+ "Intel® Extension for Scikit-learn patching affects performance of specific Scikit-learn functionality. Refer to the [list of supported algorithms and parameters](https://intel.github.io/scikit-learn-intelex/latest/algorithms.html) for details. In cases when unsupported parameters are used, the package fallbacks into original Scikit-learn. If the patching does not cover your scenarios, [submit an issue on GitHub](https://github.com/intel/scikit-learn-intelex/issues)."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f80273e7",
+ "metadata": {},
+ "source": [
+ "Training of the ElasticNet algorithm with Intel® Extension for Scikit-learn for Airlines DepDelay dataset"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "a4dd1c7e",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'Intel® extension for Scikit-learn time: 0.28 s'"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from sklearn.linear_model import ElasticNet\n",
+ "\n",
+ "params = {\n",
+ " \"alpha\": 0.3,\n",
+ " \"fit_intercept\": False,\n",
+ " \"l1_ratio\": 0.7,\n",
+ " \"random_state\": 0,\n",
+ " \"copy_X\": False,\n",
+ "}\n",
+ "start = timer()\n",
+ "model = ElasticNet(**params).fit(x_train, y_train)\n",
+ "train_patched = timer() - start\n",
+ "f\"Intel® extension for Scikit-learn time: {train_patched:.2f} s\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f10b51fc",
+ "metadata": {},
+ "source": [
+ "Predict and get a result of the ElasticNet algorithm with Intel® Extension for Scikit-learn"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "d4295a26",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'Patched Scikit-learn MSE: 1.0109113399224974'"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "y_predict = model.predict(x_test)\n",
+ "mse_metric_opt = metrics.mean_squared_error(y_test, y_predict)\n",
+ "f\"Patched Scikit-learn MSE: {mse_metric_opt}\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "cbe6db0d",
+ "metadata": {},
+ "source": [
+ "### Train the same algorithm with original Scikit-learn\n",
+ "In order to cancel optimizations, we use *unpatch_sklearn* and reimport the class ElasticNet"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "6f64ba97",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from sklearnex import unpatch_sklearn\n",
+ "\n",
+ "unpatch_sklearn()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f242c6da",
+ "metadata": {},
+ "source": [
+ "Training of the ElasticNet algorithm with original Scikit-learn library for Airlines DepDelay dataset"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "67243849",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'Original Scikit-learn time: 3.96 s'"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from sklearn.linear_model import ElasticNet\n",
+ "\n",
+ "start = timer()\n",
+ "model = ElasticNet(**params).fit(x_train, y_train)\n",
+ "train_unpatched = timer() - start\n",
+ "f\"Original Scikit-learn time: {train_unpatched:.2f} s\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c85a125c",
+ "metadata": {},
+ "source": [
+ "Predict and get a result of the ElasticNet algorithm with original Scikit-learn"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "cd9e726c",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'Original Scikit-learn MSE: 1.0109113399545733'"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "y_predict = model.predict(x_test)\n",
+ "mse_metric_original = metrics.mean_squared_error(y_test, y_predict)\n",
+ "f\"Original Scikit-learn MSE: {mse_metric_original}\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "id": "a2edbb65",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
Compare MSE metric of patched Scikit-learn and original MSE metric of patched Scikit-learn: 1.0109113399224974 MSE metric of unpatched Scikit-learn: 1.0109113399545733 Metrics ratio: 0.9999999999682703 With Scikit-learn-intelex patching you can: Use your Scikit-learn code for training and prediction with minimal changes (a couple of lines of code); Fast execution training and prediction of Scikit-learn models; Get the similar quality Get speedup in 14.2 times. "
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "HTML(\n",
+ " f\"Compare MSE metric of patched Scikit-learn and original \"\n",
+ " f\"MSE metric of patched Scikit-learn: {mse_metric_opt} \"\n",
+ " f\"MSE metric of unpatched Scikit-learn: {mse_metric_original} \"\n",
+ " f\"Metrics ratio: {mse_metric_opt/mse_metric_original} \"\n",
+ " f\"With Scikit-learn-intelex patching you can: \"\n",
+ " f\"\"\n",
+ " f\"Use your Scikit-learn code for training and prediction with minimal changes (a couple of lines of code); \"\n",
+ " f\"Fast execution training and prediction of Scikit-learn models; \"\n",
+ " f\"Get the similar quality \"\n",
+ " f\"Get speedup in {(train_unpatched/train_patched):.1f} times. \"\n",
+ " f\" \"\n",
+ ")"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.12"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/2023.2/.doctrees/nbsphinx/samples/daal4py_data_science.ipynb b/2023.2/.doctrees/nbsphinx/samples/daal4py_data_science.ipynb
new file mode 100644
index 0000000000..9336772cb3
--- /dev/null
+++ b/2023.2/.doctrees/nbsphinx/samples/daal4py_data_science.ipynb
@@ -0,0 +1,650 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Utilizing daal4py in Data Science Workflows\n",
+ "\n",
+ "The notebook below has been made to demonstrate daal4py in a data science context. It utilizes a Cycling Dataset for pyworkout-toolkit, and attempts to create a linear regression model from the 5 features collected for telemetry to predict the user's Power output in the absence of a power meter."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'3.8.10 (default, May 19 2021, 18:05:58) \\n[GCC 7.3.0]'"
+ ]
+ },
+ "execution_count": 1,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import pandas as pd\n",
+ "import matplotlib.pyplot as plt\n",
+ "import glob\n",
+ "import sys\n",
+ "\n",
+ "%matplotlib inline\n",
+ "sys.version"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "This example will be exploring workout data pulled from Strava, processed into a CSV for Pandas and daal4py usage. Below, we utilize pandas to read in the CSV file, and look at the head of dataframe with .head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " altitude \n",
+ " cadence \n",
+ " distance \n",
+ " hr \n",
+ " latitude \n",
+ " longitude \n",
+ " power \n",
+ " speed \n",
+ " time \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " 185.800003 \n",
+ " 51 \n",
+ " 3.46 \n",
+ " 81 \n",
+ " 30.313309 \n",
+ " -97.732711 \n",
+ " 45 \n",
+ " 3.459 \n",
+ " 2016-10-20T22:01:26.000Z \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " 185.800003 \n",
+ " 68 \n",
+ " 7.17 \n",
+ " 82 \n",
+ " 30.313277 \n",
+ " -97.732715 \n",
+ " 0 \n",
+ " 3.710 \n",
+ " 2016-10-20T22:01:27.000Z \n",
+ " \n",
+ " \n",
+ " 2 \n",
+ " 186.399994 \n",
+ " 38 \n",
+ " 11.04 \n",
+ " 82 \n",
+ " 30.313243 \n",
+ " -97.732717 \n",
+ " 42 \n",
+ " 3.874 \n",
+ " 2016-10-20T22:01:28.000Z \n",
+ " \n",
+ " \n",
+ " 3 \n",
+ " 186.800003 \n",
+ " 38 \n",
+ " 15.18 \n",
+ " 83 \n",
+ " 30.313212 \n",
+ " -97.732720 \n",
+ " 5 \n",
+ " 4.135 \n",
+ " 2016-10-20T22:01:29.000Z \n",
+ " \n",
+ " \n",
+ " 4 \n",
+ " 186.600006 \n",
+ " 38 \n",
+ " 19.43 \n",
+ " 83 \n",
+ " 30.313172 \n",
+ " -97.732723 \n",
+ " 1 \n",
+ " 4.250 \n",
+ " 2016-10-20T22:01:30.000Z \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " altitude cadence distance hr latitude longitude power speed \\\n",
+ "0 185.800003 51 3.46 81 30.313309 -97.732711 45 3.459 \n",
+ "1 185.800003 68 7.17 82 30.313277 -97.732715 0 3.710 \n",
+ "2 186.399994 38 11.04 82 30.313243 -97.732717 42 3.874 \n",
+ "3 186.800003 38 15.18 83 30.313212 -97.732720 5 4.135 \n",
+ "4 186.600006 38 19.43 83 30.313172 -97.732723 1 4.250 \n",
+ "\n",
+ " time \n",
+ "0 2016-10-20T22:01:26.000Z \n",
+ "1 2016-10-20T22:01:27.000Z \n",
+ "2 2016-10-20T22:01:28.000Z \n",
+ "3 2016-10-20T22:01:29.000Z \n",
+ "4 2016-10-20T22:01:30.000Z "
+ ]
+ },
+ "execution_count": 2,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "workout_data_dd = pd.read_csv(\"data/cycling_dataset.csv\", index_col=0)\n",
+ "workout_data_dd.head()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The data above has several key features that would be of great use here. \n",
+ "- Altitude can affect performance, so it might be a useful feature. \n",
+ "- Cadence is the revolutions per minute of the crank, and may have possible influence. \n",
+ "- Heart Rate is a measure of the body's workout strain, and would have a high possibly of influence.\n",
+ "- Distance may have a loose correlation as it is highly route dependent, but might be possible.\n",
+ "- Speed has possible correlations as it ties directly into power."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Explore and visualize some of the data"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "In general, we are trying to predict on the 'power' in Watts to see if we can generate a model that can predict one's power output without the usage of a cycling power meter. Below are some basic scatterplots as we explore the data. Scatterplots are great for looking for patterns and correlation in the data itself. Below, we can see that cadence and speed are positively correlated. "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "image/png": "",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "image/png": "",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "image/png": "",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "image/png": "",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "image/png": "",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "workout_data_dd.plot.scatter(\"cadence\", \"power\")\n",
+ "plt.show()\n",
+ "workout_data_dd.plot.scatter(\"hr\", \"power\")\n",
+ "plt.show()\n",
+ "workout_data_dd.plot.scatter(\"cadence\", \"speed\")\n",
+ "plt.show()\n",
+ "workout_data_dd.plot.scatter(\"speed\", \"power\")\n",
+ "plt.show()\n",
+ "workout_data_dd.plot.scatter(\"altitude\", \"power\")\n",
+ "plt.show()\n",
+ "workout_data_dd.plot.scatter(\"distance\", \"power\")\n",
+ "plt.show()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Using daal4py for Machine Learning tasks\n",
+ "\n",
+ "In the sections below, we will be using daal4py directly. After importing the model, we will arrange it in a separate independent and dependent dataframes, then use the daal4py's training and prediction classes to generate a workable model."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import daal4py as d4p"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "It is now the time to split the dataset into train and test sets. This is demonstrated below."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "(3902, 9)\n",
+ "(3000, 9) (902, 9)\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(workout_data_dd.shape)\n",
+ "train_set = workout_data_dd[0:3000]\n",
+ "test_set = workout_data_dd[3000:]\n",
+ "print(train_set.shape, test_set.shape)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Reduce the dataset, create X. We drop the target, and other non-essential features.\n",
+ "reduced_dataset = train_set.drop([\"time\", \"power\", \"latitude\", \"longitude\"], axis=1)\n",
+ "# Get the target, create Y\n",
+ "target = train_set.power.values.reshape((-1, 1))\n",
+ "# This is essentially doing np.array(dataset.power.values, ndmin=2).T\n",
+ "# as it needs to force a 2 dimensional array as we only have 1 target"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "X is 5 features by 3k rows, Y is 3k rows by 1 column"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "(3000, 5) (3000, 1)\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(reduced_dataset.values.shape, target.shape)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Training the model"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Create the Linear Regression Model, and train the model with the data. We utilize daal4py's linear_regression_training class to create the model, then call .compute() with the independent and dependent data as the parameters."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "d4p_lm = d4p.linear_regression_training(interceptFlag=True)\n",
+ "lm_trained = d4p_lm.compute(reduced_dataset.values, target)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Model has this number of features: 5\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(\"Model has this number of features: \", lm_trained.model.NumberOfFeatures)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Prediction (inference) with the trained model"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Now that the model is trained, we can test it with the test part of the dataset. We drop the same features to match that of the trained model, and put it into daal4py's linear_regression_prediction class."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "subset = test_set.drop([\"time\", \"power\", \"latitude\", \"longitude\"], axis=1)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Now we can create the Prediction object and use the reduced dataset for prediction. The class's arguments use the independent data and the trained model from above as the parameters."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "lm_predictor_component = d4p.linear_regression_prediction()\n",
+ "result = lm_predictor_component.compute(subset.values, lm_trained.model)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "plt.plot(result.prediction[0:300])\n",
+ "plt.plot(test_set.power.values[0:300])\n",
+ "plt.show()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The graph above shows the Orange (predicted) result over the Blue (original data). This data is notoriously sparse in features leading to a difficult to predict target!"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Model properties\n",
+ "Another aspect of the model is the trained model's properties, which are explored below."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Betas: [[ 1.51003501e+01 -1.25075548e-01 1.32249115e+00 1.64363922e-03\n",
+ " 8.53155955e-01 -1.09595022e+01]]\n",
+ "Number of betas: 6\n",
+ "Number of Features: 5\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(\"Betas:\", lm_trained.model.Beta)\n",
+ "print(\"Number of betas:\", lm_trained.model.NumberOfBetas)\n",
+ "print(\"Number of Features:\", lm_trained.model.NumberOfFeatures)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Additional metrics\n",
+ "We can generate metrics on the independent data with daal4py's low_order_moments() class."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([[1.90063975e+01, 3.75882355e+01, 4.98258371e+03, 2.41394741e+01,\n",
+ " 1.81623064e+00]])"
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "metrics_processor = d4p.low_order_moments()\n",
+ "data = metrics_processor.compute(reduced_dataset.values)\n",
+ "data.standardDeviation"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Migrating the trained model for inference on external systems\n",
+ "\n",
+ "Occasionally one may need to migrate the trained model to another system for inference only--this use case allows the training on a much more powerful machine with a larger dataset, and placing the trained model for inference-only on a smaller machine."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pickle"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "with open(\"trained_model2.pickle\", \"wb\") as model_pi:\n",
+ " pickle.dump(lm_trained.model, model_pi)\n",
+ " model_pi.close"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The trained model file above can be moved to an inference-only or embedded system. This is useful if the training is extreamly heavy or computed-limited. "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "with open(\"trained_model2.pickle\", \"rb\") as model_import:\n",
+ " lm_import = pickle.load(model_import)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The imported model from file is now usable again. We can check the betas from the model to ensure that the trained model is present."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([[ 1.51003501e+01, -1.25075548e-01, 1.32249115e+00,\n",
+ " 1.64363922e-03, 8.53155955e-01, -1.09595022e+01]])"
+ ]
+ },
+ "execution_count": 18,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "lm_import.Beta"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.9 (tags/v3.10.9:1dd9be6, Dec 6 2022, 20:01:21) [MSC v.1934 64 bit (AMD64)]"
+ },
+ "vscode": {
+ "interpreter": {
+ "hash": "8837a6bc722950b4562ef1f8ddb3cf1e2be71cad9580dda11136095ace1c488e"
+ }
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/2023.2/.doctrees/nbsphinx/samples/dbscan.ipynb b/2023.2/.doctrees/nbsphinx/samples/dbscan.ipynb
new file mode 100644
index 0000000000..d6e5c92653
--- /dev/null
+++ b/2023.2/.doctrees/nbsphinx/samples/dbscan.ipynb
@@ -0,0 +1,344 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "3768ec43",
+ "metadata": {},
+ "source": [
+ "# Intel® Extension for Scikit-learn DBSCAN for spoken arabic digit dataset"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "b1b922d1",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from timeit import default_timer as timer\n",
+ "from sklearn.model_selection import train_test_split\n",
+ "from sklearn.metrics import davies_bouldin_score\n",
+ "from sklearn.datasets import fetch_openml\n",
+ "from IPython.display import HTML\n",
+ "import warnings\n",
+ "\n",
+ "warnings.filterwarnings(\"ignore\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "be391256",
+ "metadata": {},
+ "source": [
+ "### Download the data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "7e73dc65",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "x, y = fetch_openml(name=\"spoken-arabic-digit\", return_X_y=True)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "246f819f",
+ "metadata": {},
+ "source": [
+ "### Preprocessing\n",
+ "Split the data into train and test sets"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "6fd95eeb",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=0)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "33da61da",
+ "metadata": {},
+ "source": [
+ "Normalize the data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "454a341c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from sklearn.preprocessing import MinMaxScaler\n",
+ "\n",
+ "scaler_x = MinMaxScaler()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "02a779e9",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "scaler_x.fit(x_train)\n",
+ "x_train = scaler_x.transform(x_train)\n",
+ "x_test = scaler_x.transform(x_test)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "fe1d4fac",
+ "metadata": {},
+ "source": [
+ "### Patch original Scikit-learn with Intel® Extension for Scikit-learn\n",
+ "Intel® Extension for Scikit-learn (previously known as daal4py) contains drop-in replacement functionality for the stock Scikit-learn package. You can take advantage of the performance optimizations of Intel® Extension for Scikit-learn by adding just two lines of code before the usual Scikit-learn imports:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "ef6938df",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Intel(R) Extension for Scikit-learn* enabled (https://github.com/intel/scikit-learn-intelex)\n"
+ ]
+ }
+ ],
+ "source": [
+ "from sklearnex import patch_sklearn\n",
+ "\n",
+ "patch_sklearn()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "20c5ab48",
+ "metadata": {},
+ "source": [
+ "Intel® Extension for Scikit-learn patching affects performance of specific Scikit-learn functionality. Refer to the [list of supported algorithms and parameters](https://intel.github.io/scikit-learn-intelex/latest/algorithms.html) for details. In cases when unsupported parameters are used, the package fallbacks into original Scikit-learn. If the patching does not cover your scenarios, [submit an issue on GitHub](https://github.com/intel/scikit-learn-intelex/issues)."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f80273e7",
+ "metadata": {},
+ "source": [
+ "Training of the DBSCAN algorithm with Intel® Extension for Scikit-learn for spoken arabic digit dataset"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "1ffc93c7",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'Intel® extension for Scikit-learn time: 6.37 s'"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from sklearn.cluster import DBSCAN\n",
+ "\n",
+ "params = {\n",
+ " \"n_jobs\": -1,\n",
+ "}\n",
+ "start = timer()\n",
+ "y_pred = DBSCAN(**params).fit_predict(x_train)\n",
+ "train_patched = timer() - start\n",
+ "f\"Intel® extension for Scikit-learn time: {train_patched:.2f} s\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f10b51fc",
+ "metadata": {},
+ "source": [
+ "Let's take a look at Davies-Bouldin score of the DBSCAN algorithm with Intel® Extension for Scikit-learn"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "d4295a26",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'Intel® extension for Scikit-learn Davies-Bouldin score: 0.8542652084275848'"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "score_opt = davies_bouldin_score(x_train, y_pred)\n",
+ "f\"Intel® extension for Scikit-learn Davies-Bouldin score: {score_opt}\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "cbe6db0d",
+ "metadata": {},
+ "source": [
+ "### Train the same algorithm with original Scikit-learn\n",
+ "In order to cancel optimizations, we use *unpatch_sklearn* and reimport the class DBSCAN"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "6f64ba97",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from sklearnex import unpatch_sklearn\n",
+ "\n",
+ "unpatch_sklearn()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f242c6da",
+ "metadata": {},
+ "source": [
+ "Training of the DBSCAN algorithm with original Scikit-learn library for spoken arabic digit dataset"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "67243849",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'Original Scikit-learn time: 469.21 s'"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from sklearn.cluster import DBSCAN\n",
+ "\n",
+ "start = timer()\n",
+ "y_pred = DBSCAN(**params).fit_predict(x_train)\n",
+ "train_unpatched = timer() - start\n",
+ "f\"Original Scikit-learn time: {train_unpatched:.2f} s\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c85a125c",
+ "metadata": {},
+ "source": [
+ "Let's take a look Davies-Bouldin score of the DBSCAN algorithm with original Scikit-learn"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "cd9e726c",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'Original Scikit-learn Davies-Bouldin score: 0.8542652084275848'"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "score_original = davies_bouldin_score(x_train, y_pred)\n",
+ "f\"Original Scikit-learn Davies-Bouldin score: {score_opt}\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "3639eef9",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "Compare Davies-Bouldin score of patched Scikit-learn and original Davies-Bouldin score of patched Scikit-learn: 0.8542652084275848 Davies-Bouldin score of unpatched Scikit-learn: 0.8542652084275848 Metrics ratio: 1.0 With Scikit-learn-intelex patching you can: Use your Scikit-learn code for training and prediction with minimal changes (a couple of lines of code); Fast execution training and prediction of Scikit-learn models; Get the similar quality Get speedup in 73.6 times. "
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "HTML(\n",
+ " f\"Compare Davies-Bouldin score of patched Scikit-learn and original \"\n",
+ " f\"Davies-Bouldin score of patched Scikit-learn: {score_opt} \"\n",
+ " f\"Davies-Bouldin score of unpatched Scikit-learn: {score_original} \"\n",
+ " f\"Metrics ratio: {score_opt/score_original} \"\n",
+ " f\"With Scikit-learn-intelex patching you can: \"\n",
+ " f\"\"\n",
+ " f\"Use your Scikit-learn code for training and prediction with minimal changes (a couple of lines of code); \"\n",
+ " f\"Fast execution training and prediction of Scikit-learn models; \"\n",
+ " f\"Get the similar quality \"\n",
+ " f\"Get speedup in {(train_unpatched/train_patched):.1f} times. \"\n",
+ " f\" \"\n",
+ ")"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.12"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/2023.2/.doctrees/nbsphinx/samples/kmeans.ipynb b/2023.2/.doctrees/nbsphinx/samples/kmeans.ipynb
new file mode 100644
index 0000000000..df09f8ded5
--- /dev/null
+++ b/2023.2/.doctrees/nbsphinx/samples/kmeans.ipynb
@@ -0,0 +1,362 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "3768ec43",
+ "metadata": {},
+ "source": [
+ "# Intel® Extension for Scikit-learn Kmeans for spoken arabic digit dataset"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "b1b922d1",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from timeit import default_timer as timer\n",
+ "from sklearn.model_selection import train_test_split\n",
+ "from sklearn.datasets import fetch_openml\n",
+ "from IPython.display import HTML\n",
+ "import warnings\n",
+ "\n",
+ "warnings.filterwarnings(\"ignore\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "be391256",
+ "metadata": {},
+ "source": [
+ "### Download the data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "7e73dc65",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "x, y = fetch_openml(name=\"spoken-arabic-digit\", return_X_y=True)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "0cdcb77d",
+ "metadata": {},
+ "source": [
+ "### Preprocessing\n",
+ "Split the data into train and test sets"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "0d332789",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "((236930, 14), (26326, 14), (236930,), (26326,))"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.1, random_state=123)\n",
+ "x_train.shape, x_test.shape, y_train.shape, y_test.shape"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "246f819f",
+ "metadata": {},
+ "source": [
+ "Normalize the data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "454a341c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from sklearn.preprocessing import MinMaxScaler\n",
+ "\n",
+ "scaler_x = MinMaxScaler()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "02a779e9",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "scaler_x.fit(x_train)\n",
+ "x_train = scaler_x.transform(x_train)\n",
+ "x_test = scaler_x.transform(x_test)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "fe1d4fac",
+ "metadata": {},
+ "source": [
+ "### Patch original Scikit-learn with Intel® Extension for Scikit-learn\n",
+ "Intel® Extension for Scikit-learn (previously known as daal4py) contains drop-in replacement functionality for the stock Scikit-learn package. You can take advantage of the performance optimizations of Intel® Extension for Scikit-learn by adding just two lines of code before the usual Scikit-learn imports:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "ef6938df",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Intel(R) Extension for Scikit-learn* enabled (https://github.com/intel/scikit-learn-intelex)\n"
+ ]
+ }
+ ],
+ "source": [
+ "from sklearnex import patch_sklearn\n",
+ "\n",
+ "patch_sklearn()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "20c5ab48",
+ "metadata": {},
+ "source": [
+ "Intel® Extension for Scikit-learn patching affects performance of specific Scikit-learn functionality. Refer to the [list of supported algorithms and parameters](https://intel.github.io/scikit-learn-intelex/latest/algorithms.html) for details. In cases when unsupported parameters are used, the package fallbacks into original Scikit-learn. If the patching does not cover your scenarios, [submit an issue on GitHub](https://github.com/intel/scikit-learn-intelex/issues)."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f80273e7",
+ "metadata": {},
+ "source": [
+ "Training of the KMeans algorithm with Intel® Extension for Scikit-learn for spoken arabic digit dataset"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "1ffc93c7",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'Intel® extension for Scikit-learn time: 7.36 s'"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from sklearn.cluster import KMeans\n",
+ "\n",
+ "params = {\n",
+ " \"n_clusters\": 128,\n",
+ " \"random_state\": 123,\n",
+ " \"copy_x\": False,\n",
+ "}\n",
+ "start = timer()\n",
+ "model = KMeans(**params).fit(x_train, y_train)\n",
+ "train_patched = timer() - start\n",
+ "f\"Intel® extension for Scikit-learn time: {train_patched:.2f} s\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f10b51fc",
+ "metadata": {},
+ "source": [
+ "Let's take a look at inertia and number of iterations of the KMeans algorithm with Intel® Extension for Scikit-learn"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "d4295a26",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Intel® extension for Scikit-learn inertia: 13346.641333761074\n",
+ "Intel® extension for Scikit-learn number of iterations: 274\n"
+ ]
+ }
+ ],
+ "source": [
+ "inertia_opt = model.inertia_\n",
+ "n_iter_opt = model.n_iter_\n",
+ "print(f\"Intel® extension for Scikit-learn inertia: {inertia_opt}\")\n",
+ "print(f\"Intel® extension for Scikit-learn number of iterations: {n_iter_opt}\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "cbe6db0d",
+ "metadata": {},
+ "source": [
+ "### Train the same algorithm with original Scikit-learn\n",
+ "In order to cancel optimizations, we use *unpatch_sklearn* and reimport the class KMeans"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "6f64ba97",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from sklearnex import unpatch_sklearn\n",
+ "\n",
+ "unpatch_sklearn()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f242c6da",
+ "metadata": {},
+ "source": [
+ "Training of the KMeans algorithm with original Scikit-learn library for spoken arabic digit dataset"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "67243849",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'Original Scikit-learn time: 192.14 s'"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from sklearn.cluster import KMeans\n",
+ "\n",
+ "start = timer()\n",
+ "model = KMeans(**params).fit(x_train, y_train)\n",
+ "train_unpatched = timer() - start\n",
+ "f\"Original Scikit-learn time: {train_unpatched:.2f} s\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c85a125c",
+ "metadata": {},
+ "source": [
+ "Let's take a look at inertia and number of iterations of the KMeans algorithm with original Scikit-learn"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "cd9e726c",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Original Scikit-learn inertia: 13352.813785961785\n",
+ "Original Scikit-learn number of iterations: 212\n"
+ ]
+ }
+ ],
+ "source": [
+ "inertia_original = model.inertia_\n",
+ "n_iter_original = model.n_iter_\n",
+ "print(f\"Original Scikit-learn inertia: {inertia_original}\")\n",
+ "print(f\"Original Scikit-learn number of iterations: {n_iter_original}\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "3639eef9",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "Compare inertia and number of iterations of patched Scikit-learn and original Inertia: Patched Scikit-learn: 13346.641333761074 Unpatched Scikit-learn: 13352.813785961785 Ratio: 0.9995377414603653 Number of iterations: Patched Scikit-learn: 274 Unpatched Scikit-learn: 212 Ratio: 1.29 Number of iterations is bigger but algorithm is much faster and inertia is lowerWith Scikit-learn-intelex patching you can: Use your Scikit-learn code for training and prediction with minimal changes (a couple of lines of code); Fast execution training and prediction of Scikit-learn models; Get speedup in 26.1 times. "
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "HTML(\n",
+ " f\"Compare inertia and number of iterations of patched Scikit-learn and original \"\n",
+ " f\"Inertia: \"\n",
+ " f\"Patched Scikit-learn: {inertia_opt} \"\n",
+ " f\"Unpatched Scikit-learn: {inertia_original} \"\n",
+ " f\"Ratio: {inertia_opt/inertia_original} \"\n",
+ " f\"Number of iterations: \"\n",
+ " f\"Patched Scikit-learn: {n_iter_opt} \"\n",
+ " f\"Unpatched Scikit-learn: {n_iter_original} \"\n",
+ " f\"Ratio: {(n_iter_opt/n_iter_original):.2f} \"\n",
+ " f\"Number of iterations is bigger but algorithm is much faster and inertia is lower\"\n",
+ " f\"With Scikit-learn-intelex patching you can: \"\n",
+ " f\"\"\n",
+ " f\"Use your Scikit-learn code for training and prediction with minimal changes (a couple of lines of code); \"\n",
+ " f\"Fast execution training and prediction of Scikit-learn models; \"\n",
+ " f\"Get speedup in {(train_unpatched/train_patched):.1f} times. \"\n",
+ " f\" \"\n",
+ ")"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.12"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/2023.2/.doctrees/nbsphinx/samples/knn_mnist.ipynb b/2023.2/.doctrees/nbsphinx/samples/knn_mnist.ipynb
new file mode 100644
index 0000000000..b8604d70f0
--- /dev/null
+++ b/2023.2/.doctrees/nbsphinx/samples/knn_mnist.ipynb
@@ -0,0 +1,333 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "f5c4abc0",
+ "metadata": {},
+ "source": [
+ "# Intel® Extension for Scikit-learn KNN for MNIST dataset"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "23512089",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from timeit import default_timer as timer\n",
+ "from IPython.display import HTML\n",
+ "from sklearn import metrics\n",
+ "from sklearn.datasets import fetch_openml\n",
+ "from sklearn.model_selection import train_test_split"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b6e359f6",
+ "metadata": {},
+ "source": [
+ "### Download the data "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "27b99b44",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "x, y = fetch_openml(name=\"mnist_784\", return_X_y=True)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "6259f584",
+ "metadata": {},
+ "source": [
+ "Split the data into train and test sets"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "96e14dd7",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "((56000, 784), (14000, 784), (56000,), (14000,))"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=72)\n",
+ "x_train.shape, x_test.shape, y_train.shape, y_test.shape"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "0341cac9",
+ "metadata": {},
+ "source": [
+ "### Patch original Scikit-learn with Intel® Extension for Scikit-learn\n",
+ "Intel® Extension for Scikit-learn (previously known as daal4py) contains drop-in replacement functionality for the stock Scikit-learn package. You can take advantage of the performance optimizations of Intel® Extension for Scikit-learn by adding just two lines of code before the usual Scikit-learn imports:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "244c5bc9",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Intel(R) Extension for Scikit-learn* enabled (https://github.com/intel/scikit-learn-intelex)\n"
+ ]
+ }
+ ],
+ "source": [
+ "from sklearnex import patch_sklearn\n",
+ "\n",
+ "patch_sklearn()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "6bb14ac8",
+ "metadata": {},
+ "source": [
+ "Intel® Extension for Scikit-learn patching affects performance of specific Scikit-learn functionality. Refer to the [list of supported algorithms and parameters](https://intel.github.io/scikit-learn-intelex/latest/algorithms.html) for details. In cases when unsupported parameters are used, the package fallbacks into original Scikit-learn. If the patching does not cover your scenarios, [submit an issue on GitHub](https://github.com/intel/scikit-learn-intelex/issues)."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "693b4e26",
+ "metadata": {},
+ "source": [
+ "Training and predict KNN algorithm with Intel® Extension for Scikit-learn for MNIST dataset"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "e9b8f06b",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'Intel® extension for Scikit-learn time: 1.45 s'"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from sklearn.neighbors import KNeighborsClassifier\n",
+ "\n",
+ "params = {\"n_neighbors\": 40, \"weights\": \"distance\", \"n_jobs\": -1}\n",
+ "start = timer()\n",
+ "knn = KNeighborsClassifier(**params).fit(x_train, y_train)\n",
+ "predicted = knn.predict(x_test)\n",
+ "time_opt = timer() - start\n",
+ "f\"Intel® extension for Scikit-learn time: {time_opt:.2f} s\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "8ca549ae",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Classification report for Intel® extension for Scikit-learn KNN:\n",
+ " precision recall f1-score support\n",
+ "\n",
+ " 0 0.97 0.99 0.98 1365\n",
+ " 1 0.93 0.99 0.96 1637\n",
+ " 2 0.99 0.94 0.96 1401\n",
+ " 3 0.96 0.95 0.96 1455\n",
+ " 4 0.98 0.96 0.97 1380\n",
+ " 5 0.95 0.95 0.95 1219\n",
+ " 6 0.96 0.99 0.97 1317\n",
+ " 7 0.94 0.95 0.95 1420\n",
+ " 8 0.99 0.90 0.94 1379\n",
+ " 9 0.92 0.94 0.93 1427\n",
+ "\n",
+ " accuracy 0.96 14000\n",
+ " macro avg 0.96 0.96 0.96 14000\n",
+ "weighted avg 0.96 0.96 0.96 14000\n",
+ "\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "report = metrics.classification_report(y_test, predicted)\n",
+ "print(f\"Classification report for Intel® extension for Scikit-learn KNN:\\n{report}\\n\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "bd8e7b0b",
+ "metadata": {},
+ "source": [
+ "*The first column of the classification report above is the class labels.* \n",
+ " \n",
+ "### Train the same algorithm with original Scikit-learn\n",
+ "In order to cancel optimizations, we use *unpatch_sklearn* and reimport the class KNeighborsClassifier."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "5bb884d5",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from sklearnex import unpatch_sklearn\n",
+ "\n",
+ "unpatch_sklearn()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "8cfa0dba",
+ "metadata": {},
+ "source": [
+ "Training and predict KNN algorithm with original Scikit-learn library for MNSIT dataset"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "ae421d8e",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'Original Scikit-learn time: 36.15 s'"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from sklearn.neighbors import KNeighborsClassifier\n",
+ "\n",
+ "\n",
+ "start = timer()\n",
+ "knn = KNeighborsClassifier(**params).fit(x_train, y_train)\n",
+ "predicted = knn.predict(x_test)\n",
+ "time_original = timer() - start\n",
+ "f\"Original Scikit-learn time: {time_original:.2f} s\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "33da9fd1",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Classification report for original Scikit-learn KNN:\n",
+ " precision recall f1-score support\n",
+ "\n",
+ " 0 0.97 0.99 0.98 1365\n",
+ " 1 0.93 0.99 0.96 1637\n",
+ " 2 0.99 0.94 0.96 1401\n",
+ " 3 0.96 0.95 0.96 1455\n",
+ " 4 0.98 0.96 0.97 1380\n",
+ " 5 0.95 0.95 0.95 1219\n",
+ " 6 0.96 0.99 0.97 1317\n",
+ " 7 0.94 0.95 0.95 1420\n",
+ " 8 0.99 0.90 0.94 1379\n",
+ " 9 0.92 0.94 0.93 1427\n",
+ "\n",
+ " accuracy 0.96 14000\n",
+ " macro avg 0.96 0.96 0.96 14000\n",
+ "weighted avg 0.96 0.96 0.96 14000\n",
+ "\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "report = metrics.classification_report(y_test, predicted)\n",
+ "print(f\"Classification report for original Scikit-learn KNN:\\n{report}\\n\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "ffd79e96",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "With scikit-learn-intelex patching you can: Use your Scikit-learn code for training and prediction with minimal changes (a couple of lines of code); Fast execution training and prediction of Scikit-learn models; Get the similar quality Get speedup in 24.9 times. "
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "HTML(\n",
+ " f\"With scikit-learn-intelex patching you can: \"\n",
+ " f\"\"\n",
+ " f\"Use your Scikit-learn code for training and prediction with minimal changes (a couple of lines of code); \"\n",
+ " f\"Fast execution training and prediction of Scikit-learn models; \"\n",
+ " f\"Get the similar quality \"\n",
+ " f\"Get speedup in {(time_original/time_opt):.1f} times. \"\n",
+ " f\" \"\n",
+ ")"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.12"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/2023.2/.doctrees/nbsphinx/samples/lasso_regression.ipynb b/2023.2/.doctrees/nbsphinx/samples/lasso_regression.ipynb
new file mode 100644
index 0000000000..967d0d4e54
--- /dev/null
+++ b/2023.2/.doctrees/nbsphinx/samples/lasso_regression.ipynb
@@ -0,0 +1,383 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "3768ec43",
+ "metadata": {},
+ "source": [
+ "# Intel® Extension for Scikit-learn Lasso Regression for YearPredictionMSD dataset"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "b1b922d1",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from timeit import default_timer as timer\n",
+ "from sklearn import metrics\n",
+ "from sklearn.model_selection import train_test_split\n",
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "import requests\n",
+ "import warnings\n",
+ "import os\n",
+ "from IPython.display import HTML\n",
+ "\n",
+ "warnings.filterwarnings(\"ignore\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "db2d1c39",
+ "metadata": {},
+ "source": [
+ "### Download the data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "e58a6e28",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "dataset_dir = \"data\"\n",
+ "dataset_name = \"year_prediction_msd\"\n",
+ "url = \"https://archive.ics.uci.edu/ml/machine-learning-databases/00203/YearPredictionMSD.txt.zip\"\n",
+ "\n",
+ "os.makedirs(dataset_dir, exist_ok=True)\n",
+ "local_url = os.path.join(dataset_dir, os.path.basename(url))\n",
+ "\n",
+ "if not os.path.isfile(local_url):\n",
+ " response = requests.get(url, stream=True)\n",
+ " with open(local_url, \"wb+\") as file:\n",
+ " for data in response.iter_content(8192):\n",
+ " file.write(data)\n",
+ "\n",
+ "year = pd.read_csv(local_url, header=None)\n",
+ "x = year.iloc[:, 1:].to_numpy(dtype=np.float32)\n",
+ "y = year.iloc[:, 0].to_numpy(dtype=np.float32)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "532874ab",
+ "metadata": {},
+ "source": [
+ "Split the data into train and test sets"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "0d332789",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "((463810, 90), (51535, 90), (463810,), (51535,))"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.1, random_state=0)\n",
+ "x_train.shape, x_test.shape, y_train.shape, y_test.shape"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "246f819f",
+ "metadata": {},
+ "source": [
+ "### Normalize the data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "454a341c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from sklearn.preprocessing import MinMaxScaler, StandardScaler\n",
+ "\n",
+ "scaler_x = MinMaxScaler()\n",
+ "scaler_y = StandardScaler()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "df400504",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "scaler_x.fit(x_train)\n",
+ "x_train = scaler_x.transform(x_train)\n",
+ "x_test = scaler_x.transform(x_test)\n",
+ "\n",
+ "scaler_y.fit(y_train.reshape(-1, 1))\n",
+ "y_train = scaler_y.transform(y_train.reshape(-1, 1)).ravel()\n",
+ "y_test = scaler_y.transform(y_test.reshape(-1, 1)).ravel()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "fe1d4fac",
+ "metadata": {},
+ "source": [
+ "### Patch original Scikit-learn with Intel® Extension for Scikit-learn\n",
+ "Intel® Extension for Scikit-learn (previously known as daal4py) contains drop-in replacement functionality for the stock Scikit-learn package. You can take advantage of the performance optimizations of Intel® Extension for Scikit-learn by adding just two lines of code before the usual Scikit-learn imports:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "ef6938df",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Intel(R) Extension for Scikit-learn* enabled (https://github.com/intel/scikit-learn-intelex)\n"
+ ]
+ }
+ ],
+ "source": [
+ "from sklearnex import patch_sklearn\n",
+ "\n",
+ "patch_sklearn()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "20c5ab48",
+ "metadata": {},
+ "source": [
+ "Intel® Extension for Scikit-learn patching affects performance of specific Scikit-learn functionality. Refer to the [list of supported algorithms and parameters](https://intel.github.io/scikit-learn-intelex/latest/algorithms.html) for details. In cases when unsupported parameters are used, the package fallbacks into original Scikit-learn. If the patching does not cover your scenarios, [submit an issue on GitHub](https://github.com/intel/scikit-learn-intelex/issues)."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f80273e7",
+ "metadata": {},
+ "source": [
+ "Training of the Lasso algorithm with Intel® Extension for Scikit-learn for YearPredictionMSD dataset"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "a4dd1c7e",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'Intel® extension for Scikit-learn time: 0.06 s'"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from sklearn.linear_model import Lasso\n",
+ "\n",
+ "params = {\n",
+ " \"alpha\": 0.01,\n",
+ " \"fit_intercept\": False,\n",
+ " \"random_state\": 0,\n",
+ " \"copy_X\": False,\n",
+ "}\n",
+ "start = timer()\n",
+ "model = Lasso(**params).fit(x_train, y_train)\n",
+ "train_patched = timer() - start\n",
+ "f\"Intel® extension for Scikit-learn time: {train_patched:.2f} s\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f10b51fc",
+ "metadata": {},
+ "source": [
+ "Predict and get a result of the Lasso algorithm with Intel® Extension for Scikit-learn"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "d4295a26",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'Patched Scikit-learn MSE: 0.9676607251167297'"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "y_predict = model.predict(x_test)\n",
+ "mse_metric_opt = metrics.mean_squared_error(y_test, y_predict)\n",
+ "f\"Patched Scikit-learn MSE: {mse_metric_opt}\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "cbe6db0d",
+ "metadata": {},
+ "source": [
+ "### Train the same algorithm with original Scikit-learn\n",
+ "In order to cancel optimizations, we use *unpatch_sklearn* and reimport the class Lasso"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "6f64ba97",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from sklearnex import unpatch_sklearn\n",
+ "\n",
+ "unpatch_sklearn()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f242c6da",
+ "metadata": {},
+ "source": [
+ "Training of the Lasso algorithm with original Scikit-learn library for YearPredictionMSD dataset"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "67243849",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'Original Scikit-learn time: 0.83 s'"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from sklearn.linear_model import Lasso\n",
+ "\n",
+ "start = timer()\n",
+ "model = Lasso(**params).fit(x_train, y_train)\n",
+ "train_unpatched = timer() - start\n",
+ "f\"Original Scikit-learn time: {train_unpatched:.2f} s\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c85a125c",
+ "metadata": {},
+ "source": [
+ "Predict and get a result of the Lasso algorithm with original Scikit-learn"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "cd9e726c",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'Original Scikit-learn MSE: 0.9676599502563477'"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "y_predict = model.predict(x_test)\n",
+ "mse_metric_original = metrics.mean_squared_error(y_test, y_predict)\n",
+ "f\"Original Scikit-learn MSE: {mse_metric_original}\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "13c86289",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "Compare MSE metric of patched Scikit-learn and original MSE metric of patched Scikit-learn: 0.9676607251167297 MSE metric of unpatched Scikit-learn: 0.9676599502563477 Metrics ratio: 1.0000008344650269 With Scikit-learn-intelex patching you can: Use your Scikit-learn code for training and prediction with minimal changes (a couple of lines of code); Fast execution training and prediction of Scikit-learn models; Get the similar quality Get speedup in 13.7 times. "
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "HTML(\n",
+ " f\"Compare MSE metric of patched Scikit-learn and original \"\n",
+ " f\"MSE metric of patched Scikit-learn: {mse_metric_opt} \"\n",
+ " f\"MSE metric of unpatched Scikit-learn: {mse_metric_original} \"\n",
+ " f\"Metrics ratio: {mse_metric_opt/mse_metric_original} \"\n",
+ " f\"With Scikit-learn-intelex patching you can: \"\n",
+ " f\"\"\n",
+ " f\"Use your Scikit-learn code for training and prediction with minimal changes (a couple of lines of code); \"\n",
+ " f\"Fast execution training and prediction of Scikit-learn models; \"\n",
+ " f\"Get the similar quality \"\n",
+ " f\"Get speedup in {(train_unpatched/train_patched):.1f} times. \"\n",
+ " f\" \"\n",
+ ")"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.12"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/2023.2/.doctrees/nbsphinx/samples/linear_regression.ipynb b/2023.2/.doctrees/nbsphinx/samples/linear_regression.ipynb
new file mode 100644
index 0000000000..508ee06d8c
--- /dev/null
+++ b/2023.2/.doctrees/nbsphinx/samples/linear_regression.ipynb
@@ -0,0 +1,378 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "3768ec43",
+ "metadata": {},
+ "source": [
+ "# Intel® Extension for Scikit-learn Linear Regression for YearPredictionMSD dataset"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "b1b922d1",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from timeit import default_timer as timer\n",
+ "from sklearn import metrics\n",
+ "from sklearn.model_selection import train_test_split\n",
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "import os\n",
+ "import requests\n",
+ "import warnings\n",
+ "from IPython.display import HTML\n",
+ "\n",
+ "warnings.filterwarnings(\"ignore\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ad7ce109",
+ "metadata": {},
+ "source": [
+ "### Download the data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "801ea6cd",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "dataset_dir = \"data\"\n",
+ "dataset_name = \"year_prediction_msd\"\n",
+ "url = \"https://archive.ics.uci.edu/ml/machine-learning-databases/00203/YearPredictionMSD.txt.zip\"\n",
+ "\n",
+ "os.makedirs(dataset_dir, exist_ok=True)\n",
+ "local_url = os.path.join(dataset_dir, os.path.basename(url))\n",
+ "\n",
+ "if not os.path.isfile(local_url):\n",
+ " response = requests.get(url, stream=True)\n",
+ " with open(local_url, \"wb+\") as file:\n",
+ " for data in response.iter_content(8192):\n",
+ " file.write(data)\n",
+ "\n",
+ "year = pd.read_csv(local_url, header=None)\n",
+ "x = year.iloc[:, 1:].to_numpy(dtype=np.float32)\n",
+ "y = year.iloc[:, 0].to_numpy(dtype=np.float32)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "03431aec",
+ "metadata": {},
+ "source": [
+ "Split the data into train and test sets"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "0d332789",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "((463810, 90), (51535, 90), (463810,), (51535,))"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.1, random_state=0)\n",
+ "x_train.shape, x_test.shape, y_train.shape, y_test.shape"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "246f819f",
+ "metadata": {},
+ "source": [
+ "### Normalize the data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "454a341c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from sklearn.preprocessing import MinMaxScaler, StandardScaler\n",
+ "\n",
+ "scaler_x = MinMaxScaler()\n",
+ "scaler_y = StandardScaler()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "df400504",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "scaler_x.fit(x_train)\n",
+ "x_train = scaler_x.transform(x_train)\n",
+ "x_test = scaler_x.transform(x_test)\n",
+ "\n",
+ "scaler_y.fit(y_train.reshape(-1, 1))\n",
+ "y_train = scaler_y.transform(y_train.reshape(-1, 1)).ravel()\n",
+ "y_test = scaler_y.transform(y_test.reshape(-1, 1)).ravel()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "fe1d4fac",
+ "metadata": {},
+ "source": [
+ "### Patch original Scikit-learn with Intel® Extension for Scikit-learn\n",
+ "Intel® Extension for Scikit-learn (previously known as daal4py) contains drop-in replacement functionality for the stock Scikit-learn package. You can take advantage of the performance optimizations of Intel® Extension for Scikit-learn by adding just two lines of code before the usual Scikit-learn imports:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "ef6938df",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Intel(R) Extension for Scikit-learn* enabled (https://github.com/intel/scikit-learn-intelex)\n"
+ ]
+ }
+ ],
+ "source": [
+ "from sklearnex import patch_sklearn\n",
+ "\n",
+ "patch_sklearn()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "20c5ab48",
+ "metadata": {},
+ "source": [
+ "Intel® Extension for Scikit-learn patching affects performance of specific Scikit-learn functionality. Refer to the [list of supported algorithms and parameters](https://intel.github.io/scikit-learn-intelex/latest/algorithms.html) for details. In cases when unsupported parameters are used, the package fallbacks into original Scikit-learn. If the patching does not cover your scenarios, [submit an issue on GitHub](https://github.com/intel/scikit-learn-intelex/issues)."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f80273e7",
+ "metadata": {},
+ "source": [
+ "Training of the Linear Regression algorithm with Intel® Extension for Scikit-learn for YearPredictionMSD dataset"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "a4dd1c7e",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'Intel® extension for Scikit-learn time: 0.03 s'"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from sklearn.linear_model import LinearRegression\n",
+ "\n",
+ "params = {\"n_jobs\": -1, \"copy_X\": False}\n",
+ "start = timer()\n",
+ "model = LinearRegression(**params).fit(x_train, y_train)\n",
+ "train_patched = timer() - start\n",
+ "f\"Intel® extension for Scikit-learn time: {train_patched:.2f} s\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f10b51fc",
+ "metadata": {},
+ "source": [
+ "Predict and get a result of the Linear Regression algorithm with Intel® Extension for Scikit-learn"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "d4295a26",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'Patched Scikit-learn MSE: 0.7716818451881409'"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "y_predict = model.predict(x_test)\n",
+ "mse_metric_opt = metrics.mean_squared_error(y_test, y_predict)\n",
+ "f\"Patched Scikit-learn MSE: {mse_metric_opt}\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "cbe6db0d",
+ "metadata": {},
+ "source": [
+ "### Train the same algorithm with original Scikit-learn\n",
+ "In order to cancel optimizations, we use *unpatch_sklearn* and reimport the class LinearRegression"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "6f64ba97",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from sklearnex import unpatch_sklearn\n",
+ "\n",
+ "unpatch_sklearn()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f242c6da",
+ "metadata": {},
+ "source": [
+ "Training of the Linear Regression algorithm with original Scikit-learn library for YearPredictionMSD dataset"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "67243849",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'Original Scikit-learn time: 0.53 s'"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from sklearn.linear_model import LinearRegression\n",
+ "\n",
+ "start = timer()\n",
+ "model = LinearRegression(**params).fit(x_train, y_train)\n",
+ "train_unpatched = timer() - start\n",
+ "f\"Original Scikit-learn time: {train_unpatched:.2f} s\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c85a125c",
+ "metadata": {},
+ "source": [
+ "Predict and get a result of the Linear Regression algorithm with original Scikit-learn"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "cd9e726c",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'Original Scikit-learn MSE: 0.7716856598854065'"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "y_predict = model.predict(x_test)\n",
+ "mse_metric_original = metrics.mean_squared_error(y_test, y_predict)\n",
+ "f\"Original Scikit-learn MSE: {mse_metric_original}\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "91fb14e4",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "Compare MSE metric of patched Scikit-learn and original MSE metric of patched Scikit-learn: 0.7716818451881409 MSE metric of unpatched Scikit-learn: 0.7716856598854065 Metrics ratio: 0.9999950528144836 With Scikit-learn-intelex patching you can: Use your Scikit-learn code for training and prediction with minimal changes (a couple of lines of code); Fast execution training and prediction of Scikit-learn models; Get the similar quality Get speedup in 18.4 times. "
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "HTML(\n",
+ " f\"Compare MSE metric of patched Scikit-learn and original \"\n",
+ " f\"MSE metric of patched Scikit-learn: {mse_metric_opt} \"\n",
+ " f\"MSE metric of unpatched Scikit-learn: {mse_metric_original} \"\n",
+ " f\"Metrics ratio: {mse_metric_opt/mse_metric_original} \"\n",
+ " f\"With Scikit-learn-intelex patching you can: \"\n",
+ " f\"\"\n",
+ " f\"Use your Scikit-learn code for training and prediction with minimal changes (a couple of lines of code); \"\n",
+ " f\"Fast execution training and prediction of Scikit-learn models; \"\n",
+ " f\"Get the similar quality \"\n",
+ " f\"Get speedup in {(train_unpatched/train_patched):.1f} times. \"\n",
+ " f\" \"\n",
+ ")"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.12"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/2023.2/.doctrees/nbsphinx/samples/logistictic_regression_cifar.ipynb b/2023.2/.doctrees/nbsphinx/samples/logistictic_regression_cifar.ipynb
new file mode 100644
index 0000000000..43727804d7
--- /dev/null
+++ b/2023.2/.doctrees/nbsphinx/samples/logistictic_regression_cifar.ipynb
@@ -0,0 +1,329 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "f5c4abc0",
+ "metadata": {},
+ "source": [
+ "# Intel® Extension for Scikit-learn Logistic Regression for Cifar dataset"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "23512089",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from timeit import default_timer as timer\n",
+ "from sklearn import metrics\n",
+ "from sklearn.model_selection import train_test_split\n",
+ "import warnings\n",
+ "from IPython.display import HTML\n",
+ "\n",
+ "warnings.filterwarnings(\"ignore\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "fbb52aca",
+ "metadata": {},
+ "source": [
+ "### Download the data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "27b99b44",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from sklearn.datasets import fetch_openml\n",
+ "\n",
+ "x, y = fetch_openml(name=\"CIFAR-100\", return_X_y=True)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "bc8ba7c8",
+ "metadata": {},
+ "source": [
+ "Split the data into train and test sets"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "96e14dd7",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "((54000, 3072), (6000, 3072), (54000,))"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.1, random_state=43)\n",
+ "x_train.shape, x_test.shape, y_train.shape"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "0341cac9",
+ "metadata": {},
+ "source": [
+ "### Patch original Scikit-learn with Intel® Extension for Scikit-learn\n",
+ "Intel® Extension for Scikit-learn (previously known as daal4py) contains drop-in replacement functionality for the stock Scikit-learn package. You can take advantage of the performance optimizations of Intel® Extension for Scikit-learn by adding just two lines of code before the usual Scikit-learn imports:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "244c5bc9",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Intel(R) Extension for Scikit-learn* enabled (https://github.com/intel/scikit-learn-intelex)\n"
+ ]
+ }
+ ],
+ "source": [
+ "from sklearnex import patch_sklearn\n",
+ "\n",
+ "patch_sklearn()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "6bb14ac8",
+ "metadata": {},
+ "source": [
+ "Intel® Extension for Scikit-learn patching affects performance of specific Scikit-learn functionality. Refer to the [list of supported algorithms and parameters](https://intel.github.io/scikit-learn-intelex/latest/algorithms.html) for details. In cases when unsupported parameters are used, the package fallbacks into original Scikit-learn. If the patching does not cover your scenarios, [submit an issue on GitHub](https://github.com/intel/scikit-learn-intelex/issues)."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "693b4e26",
+ "metadata": {},
+ "source": [
+ "Training of the Logistic Regression algorithm with Intel® Extension for Scikit-learn for CIFAR dataset"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "e9b8f06b",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'Intel® extension for Scikit-learn time: 24.82 s'"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from sklearn.linear_model import LogisticRegression\n",
+ "\n",
+ "params = {\n",
+ " \"C\": 0.1,\n",
+ " \"solver\": \"lbfgs\",\n",
+ " \"multi_class\": \"multinomial\",\n",
+ " \"n_jobs\": -1,\n",
+ "}\n",
+ "start = timer()\n",
+ "classifier = LogisticRegression(**params).fit(x_train, y_train)\n",
+ "train_patched = timer() - start\n",
+ "f\"Intel® extension for Scikit-learn time: {train_patched:.2f} s\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "d01cdabc",
+ "metadata": {},
+ "source": [
+ "Predict probability and get a result of the Logistic Regression algorithm with Intel® Extension for Scikit-learn"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "9ead2a44",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'Intel® extension for Scikit-learn Log Loss: 3.7073530800931587 s'"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "y_predict = classifier.predict_proba(x_test)\n",
+ "log_loss_opt = metrics.log_loss(y_test, y_predict)\n",
+ "f\"Intel® extension for Scikit-learn Log Loss: {log_loss_opt} s\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "bd8e7b0b",
+ "metadata": {},
+ "source": [
+ "### Train the same algorithm with original Scikit-learn\n",
+ "In order to cancel optimizations, we use *unpatch_sklearn* and reimport the class LogisticRegression"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "5bb884d5",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from sklearnex import unpatch_sklearn\n",
+ "\n",
+ "unpatch_sklearn()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "8cfa0dba",
+ "metadata": {},
+ "source": [
+ "Training of the Logistic Regression algorithm with original Scikit-learn library for CIFAR dataset"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "ae421d8e",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'Original Scikit-learn time: 395.03 s'"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from sklearn.linear_model import LogisticRegression\n",
+ "\n",
+ "start = timer()\n",
+ "classifier = LogisticRegression(**params).fit(x_train, y_train)\n",
+ "train_unpatched = timer() - start\n",
+ "f\"Original Scikit-learn time: {train_unpatched:.2f} s\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "2d38dfb5",
+ "metadata": {},
+ "source": [
+ "Predict probability and get a result of the Logistic Regression algorithm with original Scikit-learn"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "7644999d",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'Original Scikit-learn Log Loss: 3.7140870590578428 s'"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "y_predict = classifier.predict_proba(x_test)\n",
+ "log_loss_original = metrics.log_loss(y_test, y_predict)\n",
+ "f\"Original Scikit-learn Log Loss: {log_loss_original} s\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "b7d17e2f",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "Compare Log Loss metric of patched Scikit-learn and original Log Loss metric of patched Scikit-learn: 3.7073530800931587 Log Loss metric of unpatched Scikit-learn: 3.7140870590578428 Metrics ratio: 0.9981869086917978 With Scikit-learn-intelex patching you can: Use your Scikit-learn code for training and prediction with minimal changes (a couple of lines of code); Fast execution training and prediction of Scikit-learn models; Get the similar quality Get speedup in 15.9 times. "
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "HTML(\n",
+ " f\"Compare Log Loss metric of patched Scikit-learn and original \"\n",
+ " f\"Log Loss metric of patched Scikit-learn: {log_loss_opt} \"\n",
+ " f\"Log Loss metric of unpatched Scikit-learn: {log_loss_original} \"\n",
+ " f\"Metrics ratio: {log_loss_opt/log_loss_original} \"\n",
+ " f\"With Scikit-learn-intelex patching you can: \"\n",
+ " f\"\"\n",
+ " f\"Use your Scikit-learn code for training and prediction with minimal changes (a couple of lines of code); \"\n",
+ " f\"Fast execution training and prediction of Scikit-learn models; \"\n",
+ " f\"Get the similar quality \"\n",
+ " f\"Get speedup in {(train_unpatched/train_patched):.1f} times. \"\n",
+ " f\" \"\n",
+ ")"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.12"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/2023.2/.doctrees/nbsphinx/samples/nusvr_medical_charges.ipynb b/2023.2/.doctrees/nbsphinx/samples/nusvr_medical_charges.ipynb
new file mode 100644
index 0000000000..8c72c1b71d
--- /dev/null
+++ b/2023.2/.doctrees/nbsphinx/samples/nusvr_medical_charges.ipynb
@@ -0,0 +1,354 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "f5c4abc0",
+ "metadata": {},
+ "source": [
+ "# Intel® Extension for Scikit-learn NuSVR for Medical Charges dataset"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "27b99b44",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from timeit import default_timer as timer\n",
+ "from sklearn.datasets import fetch_openml\n",
+ "from sklearn.model_selection import train_test_split\n",
+ "from IPython.display import HTML\n",
+ "import warnings\n",
+ "\n",
+ "warnings.filterwarnings(\"ignore\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "adf9ffe9",
+ "metadata": {},
+ "source": [
+ "### Download the data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "a9b315cc",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "x, y = fetch_openml(name=\"medical_charges_nominal\", return_X_y=True)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "49fbf604",
+ "metadata": {},
+ "source": [
+ "### Preprocessing"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "fafea10b",
+ "metadata": {},
+ "source": [
+ "Encode categorical features"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "f77c30f2",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "cat_columns = x.select_dtypes([\"category\"]).columns\n",
+ "x[cat_columns] = x[cat_columns].apply(lambda x: x.cat.codes)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "cd8d3b6d",
+ "metadata": {},
+ "source": [
+ "Split the data into train and test sets"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "96e14dd7",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "((48919, 11), (114146, 11), (48919,), (114146,))"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "x_train, x_test, y_train, y_test = train_test_split(x, y, train_size=0.3, random_state=42)\n",
+ "x_train.shape, x_test.shape, y_train.shape, y_test.shape"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "0341cac9",
+ "metadata": {},
+ "source": [
+ "### Patch original Scikit-learn with Intel® Extension for Scikit-learn\n",
+ "Intel® Extension for Scikit-learn (previously known as daal4py) contains drop-in replacement functionality for the stock Scikit-learn package. You can take advantage of the performance optimizations of Intel® Extension for Scikit-learn by adding just two lines of code before the usual Scikit-learn imports:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "244c5bc9",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Intel(R) Extension for Scikit-learn* enabled (https://github.com/intel/scikit-learn-intelex)\n"
+ ]
+ }
+ ],
+ "source": [
+ "from sklearnex import patch_sklearn\n",
+ "\n",
+ "patch_sklearn()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "6bb14ac8",
+ "metadata": {},
+ "source": [
+ "Intel® Extension for Scikit-learn patching affects performance of specific Scikit-learn functionality. Refer to the [list of supported algorithms and parameters](https://intel.github.io/scikit-learn-intelex/latest/algorithms.html) for details. In cases when unsupported parameters are used, the package fallbacks into original Scikit-learn. If the patching does not cover your scenarios, [submit an issue on GitHub](https://github.com/intel/scikit-learn-intelex/issues)."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "693b4e26",
+ "metadata": {},
+ "source": [
+ "Training of the NuSVR algorithm with Intel® Extension for Scikit-learn for Medical Charges dataset"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "e9b8f06b",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'Intel® extension for Scikit-learn time: 24.69 s'"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from sklearn.svm import NuSVR\n",
+ "\n",
+ "params = {\n",
+ " \"nu\": 0.4,\n",
+ " \"C\": y_train.mean(),\n",
+ " \"degree\": 2,\n",
+ " \"kernel\": \"poly\",\n",
+ "}\n",
+ "start = timer()\n",
+ "nusvr = NuSVR(**params).fit(x_train, y_train)\n",
+ "train_patched = timer() - start\n",
+ "f\"Intel® extension for Scikit-learn time: {train_patched:.2f} s\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "d01cdabc",
+ "metadata": {},
+ "source": [
+ "Predict and get a result of the NuSVR algorithm with Intel® Extension for Scikit-learn"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "9ead2a44",
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'Intel® extension for Scikit-learn R2 score: 0.8635974264586637'"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "score_opt = nusvr.score(x_test, y_test)\n",
+ "f\"Intel® extension for Scikit-learn R2 score: {score_opt}\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "bd8e7b0b",
+ "metadata": {},
+ "source": [
+ "### Train the same algorithm with original Scikit-learn\n",
+ "In order to cancel optimizations, we use *unpatch_sklearn* and reimport the class NuSVR"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "5bb884d5",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from sklearnex import unpatch_sklearn\n",
+ "\n",
+ "unpatch_sklearn()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "8cfa0dba",
+ "metadata": {},
+ "source": [
+ "Training of the NuSVR algorithm with original Scikit-learn library for Medical Charges dataset"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "ae421d8e",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'Original Scikit-learn time: 331.85 s'"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from sklearn.svm import NuSVR\n",
+ "\n",
+ "start = timer()\n",
+ "nusvr = NuSVR(**params).fit(x_train, y_train)\n",
+ "train_unpatched = timer() - start\n",
+ "f\"Original Scikit-learn time: {train_unpatched:.2f} s\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "23b8faa6",
+ "metadata": {},
+ "source": [
+ "Predict and get a result of the NuSVR algorithm with original Scikit-learn"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "7644999d",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'Original Scikit-learn R2 score: 0.8636031741516902'"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "score_original = nusvr.score(x_test, y_test)\n",
+ "f\"Original Scikit-learn R2 score: {score_original}\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "3a704d51",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "Compare R2 score of patched Scikit-learn and original R2 score of patched Scikit-learn: 0.8635974264586637 R2 score of unpatched Scikit-learn: 0.8636031741516902 Metrics ratio: 0.999993344520726 With Scikit-learn-intelex patching you can: Use your Scikit-learn code for training and prediction with minimal changes (a couple of lines of code); Fast execution training and prediction of Scikit-learn models; Get the similar quality Get speedup in 13.4 times. "
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "HTML(\n",
+ " f\"Compare R2 score of patched Scikit-learn and original \"\n",
+ " f\"R2 score of patched Scikit-learn: {score_opt} \"\n",
+ " f\"R2 score of unpatched Scikit-learn: {score_original} \"\n",
+ " f\"Metrics ratio: {score_opt/score_original} \"\n",
+ " f\"With Scikit-learn-intelex patching you can: \"\n",
+ " f\"\"\n",
+ " f\"Use your Scikit-learn code for training and prediction with minimal changes (a couple of lines of code); \"\n",
+ " f\"Fast execution training and prediction of Scikit-learn models; \"\n",
+ " f\"Get the similar quality \"\n",
+ " f\"Get speedup in {(train_unpatched/train_patched):.1f} times. \"\n",
+ " f\" \"\n",
+ ")"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.12"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/2023.2/.doctrees/nbsphinx/samples/random_forest_yolanda.ipynb b/2023.2/.doctrees/nbsphinx/samples/random_forest_yolanda.ipynb
new file mode 100644
index 0000000000..276284ed9b
--- /dev/null
+++ b/2023.2/.doctrees/nbsphinx/samples/random_forest_yolanda.ipynb
@@ -0,0 +1,320 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "f5c4abc0",
+ "metadata": {},
+ "source": [
+ "# Intel® Extension for Scikit-learn Random Forest for Yolanda dataset"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "23512089",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from timeit import default_timer as timer\n",
+ "from sklearn import metrics\n",
+ "from IPython.display import HTML\n",
+ "from sklearn.datasets import fetch_openml\n",
+ "from sklearn.model_selection import train_test_split"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "7d0b6bb9",
+ "metadata": {},
+ "source": [
+ "### Download the data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "27b99b44",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "x, y = fetch_openml(name=\"Yolanda\", return_X_y=True)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "5b3a2483",
+ "metadata": {},
+ "source": [
+ "Split the data into train and test sets"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "96e14dd7",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "((280000, 100), (120000, 100), (280000,), (120000,))"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=72)\n",
+ "x_train.shape, x_test.shape, y_train.shape, y_test.shape"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "0341cac9",
+ "metadata": {},
+ "source": [
+ "### Patch original Scikit-learn with Intel® Extension for Scikit-learn\n",
+ "Intel® Extension for Scikit-learn (previously known as daal4py) contains drop-in replacement functionality for the stock Scikit-learn package. You can take advantage of the performance optimizations of Intel® Extension for Scikit-learn by adding just two lines of code before the usual Scikit-learn imports:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "244c5bc9",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Intel(R) Extension for Scikit-learn* enabled (https://github.com/intel/scikit-learn-intelex)\n"
+ ]
+ }
+ ],
+ "source": [
+ "from sklearnex import patch_sklearn\n",
+ "\n",
+ "patch_sklearn()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "6bb14ac8",
+ "metadata": {},
+ "source": [
+ "Intel® Extension for Scikit-learn patching affects performance of specific Scikit-learn functionality. Refer to the [list of supported algorithms and parameters](https://intel.github.io/scikit-learn-intelex/latest/algorithms.html) for details. In cases when unsupported parameters are used, the package fallbacks into original Scikit-learn. If the patching does not cover your scenarios, [submit an issue on GitHub](https://github.com/intel/scikit-learn-intelex/issues)."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "693b4e26",
+ "metadata": {},
+ "source": [
+ "Training Random Forest algorithm with Intel® Extension for Scikit-learn for Yolanda dataset"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "8fecbbb1",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'Intel® extension for Scikit-learn time: 42.56 s'"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from sklearn.ensemble import RandomForestRegressor\n",
+ "\n",
+ "params = {\"n_estimators\": 150, \"random_state\": 44, \"n_jobs\": -1}\n",
+ "start = timer()\n",
+ "rf = RandomForestRegressor(**params).fit(x_train, y_train)\n",
+ "train_patched = timer() - start\n",
+ "f\"Intel® extension for Scikit-learn time: {train_patched:.2f} s\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "d9279181",
+ "metadata": {},
+ "source": [
+ "Predict and get a result of the Random Forest algorithm with Intel® Extension for Scikit-learn"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "d05bc57b",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'Intel® extension for Scikit-learn Mean Squared Error: 83.62232345666878'"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "y_pred = rf.predict(x_test)\n",
+ "mse_opt = metrics.mean_squared_error(y_test, y_pred)\n",
+ "f\"Intel® extension for Scikit-learn Mean Squared Error: {mse_opt}\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "bd8e7b0b",
+ "metadata": {},
+ "source": [
+ "### Train the same algorithm with original Scikit-learn\n",
+ "In order to cancel optimizations, we use *unpatch_sklearn* and reimport the class RandomForestRegressor."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "5bb884d5",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from sklearnex import unpatch_sklearn\n",
+ "\n",
+ "unpatch_sklearn()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "8cfa0dba",
+ "metadata": {},
+ "source": [
+ "Training Random Forest algorithm with original Scikit-learn library for Yolanda dataset"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "76a8d5f1",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'Original Scikit-learn time: 123.34 s'"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from sklearn.ensemble import RandomForestRegressor\n",
+ "\n",
+ "start = timer()\n",
+ "rf = RandomForestRegressor(**params).fit(x_train, y_train)\n",
+ "train_unpatched = timer() - start\n",
+ "f\"Original Scikit-learn time: {train_unpatched:.2f} s\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f162fe6b",
+ "metadata": {},
+ "source": [
+ "Predict and get a result of the Random Forest algorithm with original Scikit-learn"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "d5b5e45c",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'Original Scikit-learn Mean Squared Error: 83.62232345666878'"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "y_pred = rf.predict(x_test)\n",
+ "mse_original = metrics.mean_squared_error(y_test, y_pred)\n",
+ "f\"Original Scikit-learn Mean Squared Error: {mse_opt}\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "e255e563",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "Compare MSE metric of patched Scikit-learn and original MSE metric of patched Scikit-learn: 83.62232345666878 MSE metric of unpatched Scikit-learn: 83.80131297814816 Metrics ratio: 0.9978641203208111 With Scikit-learn-intelex patching you can: Use your Scikit-learn code for training and prediction with minimal changes (a couple of lines of code); Fast execution training and prediction of Scikit-learn models; Get the similar quality Get speedup in 2.9 times. "
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "HTML(\n",
+ " f\"Compare MSE metric of patched Scikit-learn and original \"\n",
+ " f\"MSE metric of patched Scikit-learn: {mse_opt} \"\n",
+ " f\"MSE metric of unpatched Scikit-learn: {mse_original} \"\n",
+ " f\"Metrics ratio: {mse_opt/mse_original} \"\n",
+ " f\"With Scikit-learn-intelex patching you can: \"\n",
+ " f\"\"\n",
+ " f\"Use your Scikit-learn code for training and prediction with minimal changes (a couple of lines of code); \"\n",
+ " f\"Fast execution training and prediction of Scikit-learn models; \"\n",
+ " f\"Get the similar quality \"\n",
+ " f\"Get speedup in {(train_unpatched/train_patched):.1f} times. \"\n",
+ " f\" \"\n",
+ ")"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.12"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/2023.2/.doctrees/nbsphinx/samples/ridge_regression.ipynb b/2023.2/.doctrees/nbsphinx/samples/ridge_regression.ipynb
new file mode 100644
index 0000000000..1c159a13ae
--- /dev/null
+++ b/2023.2/.doctrees/nbsphinx/samples/ridge_regression.ipynb
@@ -0,0 +1,390 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "3768ec43",
+ "metadata": {},
+ "source": [
+ "# Intel® Extension for Scikit-learn Ridge Regression for Airlines DepDelay dataset"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "b1b922d1",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from timeit import default_timer as timer\n",
+ "from sklearn import metrics\n",
+ "from sklearn.model_selection import train_test_split\n",
+ "import warnings\n",
+ "from sklearn.datasets import fetch_openml\n",
+ "from sklearn.preprocessing import LabelEncoder\n",
+ "from IPython.display import HTML\n",
+ "\n",
+ "warnings.filterwarnings(\"ignore\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "2a1a9234",
+ "metadata": {},
+ "source": [
+ "### Download the data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "7e73dc65",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "x, y = fetch_openml(name=\"Airlines_DepDelay_10M\", return_X_y=True)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f852cad8",
+ "metadata": {},
+ "source": [
+ "### Preprocessing\n",
+ "Let's encode categorical features with LabelEncoder"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "27ebb377",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "for col in [\"UniqueCarrier\", \"Origin\", \"Dest\"]:\n",
+ " le = LabelEncoder().fit(x[col])\n",
+ " x[col] = le.transform(x[col])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "147b3e82",
+ "metadata": {},
+ "source": [
+ "Split the data into train and test sets"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "0d332789",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "((9000000, 9), (1000000, 9), (9000000,), (1000000,))"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.1, random_state=0)\n",
+ "x_train.shape, x_test.shape, y_train.shape, y_test.shape"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "246f819f",
+ "metadata": {},
+ "source": [
+ "Normalize the data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "454a341c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from sklearn.preprocessing import MinMaxScaler, StandardScaler\n",
+ "\n",
+ "scaler_x = MinMaxScaler()\n",
+ "scaler_y = StandardScaler()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "df400504",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "y_train = y_train.to_numpy().reshape(-1, 1)\n",
+ "y_test = y_test.to_numpy().reshape(-1, 1)\n",
+ "\n",
+ "scaler_x.fit(x_train)\n",
+ "x_train = scaler_x.transform(x_train)\n",
+ "x_test = scaler_x.transform(x_test)\n",
+ "\n",
+ "scaler_y.fit(y_train)\n",
+ "y_train = scaler_y.transform(y_train).ravel()\n",
+ "y_test = scaler_y.transform(y_test).ravel()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "fe1d4fac",
+ "metadata": {},
+ "source": [
+ "### Patch original Scikit-learn with Intel® Extension for Scikit-learn\n",
+ "Intel® Extension for Scikit-learn (previously known as daal4py) contains drop-in replacement functionality for the stock Scikit-learn package. You can take advantage of the performance optimizations of Intel® Extension for Scikit-learn by adding just two lines of code before the usual Scikit-learn imports:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "ef6938df",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Intel(R) Extension for Scikit-learn* enabled (https://github.com/intel/scikit-learn-intelex)\n"
+ ]
+ }
+ ],
+ "source": [
+ "from sklearnex import patch_sklearn\n",
+ "\n",
+ "patch_sklearn()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "20c5ab48",
+ "metadata": {},
+ "source": [
+ "Intel® Extension for Scikit-learn patching affects performance of specific Scikit-learn functionality. Refer to the [list of supported algorithms and parameters](https://intel.github.io/scikit-learn-intelex/latest/algorithms.html) for details. In cases when unsupported parameters are used, the package fallbacks into original Scikit-learn. If the patching does not cover your scenarios, [submit an issue on GitHub](https://github.com/intel/scikit-learn-intelex/issues)."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f80273e7",
+ "metadata": {},
+ "source": [
+ "Training of the Ridge Regression algorithm with Intel® Extension for Scikit-learn for Airlines DepDelay dataset"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "a4dd1c7e",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'Intel® extension for Scikit-learn time: 0.06 s'"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from sklearn.linear_model import Ridge\n",
+ "\n",
+ "params = {\n",
+ " \"alpha\": 0.3,\n",
+ " \"fit_intercept\": False,\n",
+ " \"random_state\": 0,\n",
+ " \"copy_X\": False,\n",
+ "}\n",
+ "start = timer()\n",
+ "model = Ridge(random_state=0).fit(x_train, y_train)\n",
+ "train_patched = timer() - start\n",
+ "f\"Intel® extension for Scikit-learn time: {train_patched:.2f} s\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f10b51fc",
+ "metadata": {},
+ "source": [
+ "Predict and get a result of the Ridge Regression algorithm with Intel® Extension for Scikit-learn"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "d4295a26",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'Patched Scikit-learn MSE: 1.0014288520708046'"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "y_predict = model.predict(x_test)\n",
+ "mse_metric_opt = metrics.mean_squared_error(y_test, y_predict)\n",
+ "f\"Patched Scikit-learn MSE: {mse_metric_opt}\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "cbe6db0d",
+ "metadata": {},
+ "source": [
+ "### Train the same algorithm with original Scikit-learn\n",
+ "In order to cancel optimizations, we use *unpatch_sklearn* and reimport the class Ridge"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "6f64ba97",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from sklearnex import unpatch_sklearn\n",
+ "\n",
+ "unpatch_sklearn()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f242c6da",
+ "metadata": {},
+ "source": [
+ "Training of the Ridge Regression algorithm with original Scikit-learn library for Airlines DepDelay dataset"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "67243849",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'Original Scikit-learn time: 0.70 s'"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from sklearn.linear_model import Ridge\n",
+ "\n",
+ "start = timer()\n",
+ "model = Ridge(random_state=0).fit(x_train, y_train)\n",
+ "train_unpatched = timer() - start\n",
+ "f\"Original Scikit-learn time: {train_unpatched:.2f} s\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c85a125c",
+ "metadata": {},
+ "source": [
+ "Predict and get a result of the Ridge Regression algorithm with original Scikit-learn"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "cd9e726c",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'Original Scikit-learn MSE: 1.0014288520708057'"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "y_predict = model.predict(x_test)\n",
+ "mse_metric_original = metrics.mean_squared_error(y_test, y_predict)\n",
+ "f\"Original Scikit-learn MSE: {mse_metric_original}\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "id": "1bde360d",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "Compare MSE metric of patched Scikit-learn and original MSE metric of patched Scikit-learn: 1.0014288520708046 MSE metric of unpatched Scikit-learn: 1.0014288520708057 Metrics ratio: 0.9999999999999989 With Scikit-learn-intelex patching you can: Use your Scikit-learn code for training and prediction with minimal changes (a couple of lines of code); Fast execution training and prediction of Scikit-learn models; Get the similar quality Get speedup in 10.9 times. "
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "HTML(\n",
+ " f\"Compare MSE metric of patched Scikit-learn and original \"\n",
+ " f\"MSE metric of patched Scikit-learn: {mse_metric_opt} \"\n",
+ " f\"MSE metric of unpatched Scikit-learn: {mse_metric_original} \"\n",
+ " f\"Metrics ratio: {mse_metric_opt/mse_metric_original} \"\n",
+ " f\"With Scikit-learn-intelex patching you can: \"\n",
+ " f\"\"\n",
+ " f\"Use your Scikit-learn code for training and prediction with minimal changes (a couple of lines of code); \"\n",
+ " f\"Fast execution training and prediction of Scikit-learn models; \"\n",
+ " f\"Get the similar quality \"\n",
+ " f\"Get speedup in {(train_unpatched/train_patched):.1f} times. \"\n",
+ " f\" \"\n",
+ ")"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.12"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/2023.2/.doctrees/nbsphinx/samples/svc_adult.ipynb b/2023.2/.doctrees/nbsphinx/samples/svc_adult.ipynb
new file mode 100644
index 0000000000..9e49bcfecd
--- /dev/null
+++ b/2023.2/.doctrees/nbsphinx/samples/svc_adult.ipynb
@@ -0,0 +1,322 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "f5c4abc0",
+ "metadata": {},
+ "source": [
+ "# Intel® Extension for Scikit-learn SVC for Adult dataset"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "23512089",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from timeit import default_timer as timer\n",
+ "from IPython.display import HTML\n",
+ "from sklearn import metrics\n",
+ "from sklearn.datasets import fetch_openml\n",
+ "from sklearn.model_selection import train_test_split"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "2cdcbfa6",
+ "metadata": {},
+ "source": [
+ "### Download the data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "27b99b44",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "x, y = fetch_openml(name=\"a9a\", return_X_y=True)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "3a6df301",
+ "metadata": {},
+ "source": [
+ "Split the data into train and test sets"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "96e14dd7",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "0341cac9",
+ "metadata": {},
+ "source": [
+ "### Patch original Scikit-learn with Intel® Extension for Scikit-learn\n",
+ "Intel® Extension for Scikit-learn (previously known as daal4py) contains drop-in replacement functionality for the stock Scikit-learn package. You can take advantage of the performance optimizations of Intel® Extension for Scikit-learn by adding just two lines of code before the usual Scikit-learn imports:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "244c5bc9",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Intel(R) Extension for Scikit-learn* enabled (https://github.com/intel/scikit-learn-intelex)\n"
+ ]
+ }
+ ],
+ "source": [
+ "from sklearnex import patch_sklearn\n",
+ "\n",
+ "patch_sklearn()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "6bb14ac8",
+ "metadata": {},
+ "source": [
+ "Intel® Extension for Scikit-learn patching affects performance of specific Scikit-learn functionality. Refer to the [list of supported algorithms and parameters](https://intel.github.io/scikit-learn-intelex/algorithms.html) for details. In cases when unsupported parameters are used, the package fallbacks into original Scikit-learn. If the patching does not cover your scenarios, [submit an issue on GitHub](https://github.com/intel/scikit-learn-intelex/issues)."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "693b4e26",
+ "metadata": {},
+ "source": [
+ "Training of the SVC algorithm with Intel® Extension for Scikit-learn for Adult dataset"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "e9b8f06b",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'Intel® extension for Scikit-learn time: 14.08 s'"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from sklearn.svm import SVC\n",
+ "\n",
+ "params = {\"C\": 100.0, \"kernel\": \"rbf\", \"gamma\": \"scale\"}\n",
+ "start = timer()\n",
+ "classifier = SVC(**params).fit(x_train, y_train)\n",
+ "train_patched = timer() - start\n",
+ "f\"Intel® extension for Scikit-learn time: {train_patched:.2f} s\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "d01cdabc",
+ "metadata": {},
+ "source": [
+ "Predict and get a result of the SVC algorithm with Intel® Extension for Scikit-learn"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "9ead2a44",
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Classification report for Intel® extension for Scikit-learn SVC:\n",
+ " precision recall f1-score support\n",
+ "\n",
+ " -1.0 0.87 0.90 0.88 7414\n",
+ " 1.0 0.64 0.58 0.61 2355\n",
+ "\n",
+ " accuracy 0.82 9769\n",
+ " macro avg 0.76 0.74 0.75 9769\n",
+ "weighted avg 0.82 0.82 0.82 9769\n",
+ "\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "predicted = classifier.predict(x_test)\n",
+ "report = metrics.classification_report(y_test, predicted)\n",
+ "print(f\"Classification report for Intel® extension for Scikit-learn SVC:\\n{report}\\n\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "bd8e7b0b",
+ "metadata": {},
+ "source": [
+ "*The first column of the classification report above is the class labels.* \n",
+ " \n",
+ "### Train the same algorithm with original Scikit-learn\n",
+ "In order to cancel optimizations, we use *unpatch_sklearn* and reimport the class SVC."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "5bb884d5",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from sklearnex import unpatch_sklearn\n",
+ "\n",
+ "unpatch_sklearn()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "8cfa0dba",
+ "metadata": {},
+ "source": [
+ "Training of the SVC algorithm with original Scikit-learn library for Adult dataset"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "ae421d8e",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'Original Scikit-learn time: 803.06 s'"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from sklearn.svm import SVC\n",
+ "\n",
+ "start = timer()\n",
+ "classifier = SVC(**params).fit(x_train, y_train)\n",
+ "train_unpatched = timer() - start\n",
+ "f\"Original Scikit-learn time: {train_unpatched:.2f} s\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c0a7a747",
+ "metadata": {},
+ "source": [
+ "Predict and get a result of the SVC algorithm with original Scikit-learn"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "7644999d",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Classification report for original Scikit-learn SVC:\n",
+ " precision recall f1-score support\n",
+ "\n",
+ " -1.0 0.87 0.90 0.88 7414\n",
+ " 1.0 0.64 0.58 0.61 2355\n",
+ "\n",
+ " accuracy 0.82 9769\n",
+ " macro avg 0.76 0.74 0.75 9769\n",
+ "weighted avg 0.82 0.82 0.82 9769\n",
+ "\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "predicted = classifier.predict(x_test)\n",
+ "report = metrics.classification_report(y_test, predicted)\n",
+ "print(f\"Classification report for original Scikit-learn SVC:\\n{report}\\n\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "fc992182",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "With scikit-learn-intelex patching you can: Use your Scikit-learn code for training and prediction with minimal changes (a couple of lines of code); Fast execution training and prediction of Scikit-learn models; Get the similar quality Get speedup in 57.0 times. "
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "HTML(\n",
+ " f\"With scikit-learn-intelex patching you can: \"\n",
+ " f\"\"\n",
+ " f\"Use your Scikit-learn code for training and prediction with minimal changes (a couple of lines of code); \"\n",
+ " f\"Fast execution training and prediction of Scikit-learn models; \"\n",
+ " f\"Get the similar quality \"\n",
+ " f\"Get speedup in {(train_unpatched/train_patched):.1f} times. \"\n",
+ " f\" \"\n",
+ ")"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.12"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/2023.2/.doctrees/nbsphinx/samples/tsne.ipynb b/2023.2/.doctrees/nbsphinx/samples/tsne.ipynb
new file mode 100644
index 0000000000..99ad8fcefd
--- /dev/null
+++ b/2023.2/.doctrees/nbsphinx/samples/tsne.ipynb
@@ -0,0 +1,285 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "f5c4abc0",
+ "metadata": {},
+ "source": [
+ "# Intel® Extension for Scikit-learn TSNE example"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "23512089",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from timeit import default_timer as timer\n",
+ "from sklearn import metrics\n",
+ "from sklearn.datasets import make_blobs\n",
+ "import matplotlib.pyplot as plt\n",
+ "\n",
+ "%matplotlib inline\n",
+ "\n",
+ "import warnings\n",
+ "\n",
+ "warnings.filterwarnings(\"ignore\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b6e359f6",
+ "metadata": {},
+ "source": [
+ "### Generate the data \n",
+ "Generate isotropic Gaussian blobs for clustering.\n",
+ " \n",
+ "With the number of samples: 20k \n",
+ "Number of features: 100 \n",
+ "Number of blobs: 4 \n",
+ "Source: \n",
+ "https://scikit-learn.org/stable/modules/generated/sklearn.datasets.make_blobs.html\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "27b99b44",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "x, y = make_blobs(n_samples=20000, centers=4, n_features=100, random_state=0)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "0341cac9",
+ "metadata": {},
+ "source": [
+ "### Patch original Scikit-learn with Intel® Extension for Scikit-learn\n",
+ "Intel® Extension for Scikit-learn (previously known as daal4py) contains drop-in replacement functionality for the stock Scikit-learn package. You can take advantage of the performance optimizations of Intel® Extension for Scikit-learn by adding just two lines of code before the usual Scikit-learn imports:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "244c5bc9",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Intel(R) Extension for Scikit-learn* enabled (https://github.com/intel/scikit-learn-intelex)\n"
+ ]
+ }
+ ],
+ "source": [
+ "from sklearnex import patch_sklearn\n",
+ "\n",
+ "patch_sklearn()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "6bb14ac8",
+ "metadata": {},
+ "source": [
+ "Intel® Extension for Scikit-learn patching affects performance of specific Scikit-learn functionality. Refer to the [list of supported algorithms and parameters](https://intel.github.io/scikit-learn-intelex/algorithms.html) for details. In cases when unsupported parameters are used, the package fallbacks into original Scikit-learn. If the patching does not cover your scenarios, [submit an issue on GitHub](https://github.com/intel/scikit-learn-intelex/issues)."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "693b4e26",
+ "metadata": {},
+ "source": [
+ "Training TSNE algorithm with Intel® Extension for Scikit-learn for generated dataset"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "e9b8f06b",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Intel® extension for Scikit-learn time: 12.63 s\n",
+ "Intel® Extension for scikit-learn. Divergence: 4.289110606110757\n"
+ ]
+ }
+ ],
+ "source": [
+ "from sklearn.manifold import TSNE\n",
+ "\n",
+ "params = {\"n_components\": 2, \"random_state\": 42}\n",
+ "start = timer()\n",
+ "tsne = TSNE(**params)\n",
+ "embedding_intelex = tsne.fit_transform(x)\n",
+ "time_opt = timer() - start\n",
+ "\n",
+ "print(f\"Intel® extension for Scikit-learn time: {time_opt:.2f} s\")\n",
+ "print(f\"Intel® Extension for scikit-learn. Divergence: {tsne.kl_divergence_}\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "bd8e7b0b",
+ "metadata": {},
+ "source": [
+ " ### Train the same algorithm with original Scikit-learn\n",
+ "In order to cancel optimizations, we use *unpatch_sklearn* and reimport the class TSNE."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "5bb884d5",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from sklearnex import unpatch_sklearn\n",
+ "\n",
+ "unpatch_sklearn()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "8cfa0dba",
+ "metadata": {},
+ "source": [
+ "Training algorithm with original Scikit-learn library for generated dataset"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "ae421d8e",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Original Scikit-learn time: 37.66 s\n",
+ "Original Scikit-learn. Divergence: 4.2955403327941895\n"
+ ]
+ }
+ ],
+ "source": [
+ "from sklearn.manifold import TSNE\n",
+ "\n",
+ "params = {\"n_components\": 2, \"random_state\": 42}\n",
+ "start = timer()\n",
+ "tsne = TSNE(**params)\n",
+ "embedding_original = tsne.fit_transform(x)\n",
+ "time_original = timer() - start\n",
+ "\n",
+ "print(f\"Original Scikit-learn time: {time_original:.2f} s\")\n",
+ "print(f\"Original Scikit-learn. Divergence: {tsne.kl_divergence_}\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "8358d7c2",
+ "metadata": {},
+ "source": [
+ "### Plot embeddings original scikit-learn and Intel® extension"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "43ab1a96",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "colors = [int(m) for m in y]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "35147d24",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "image/png": "\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "for emb, title in zip(\n",
+ " [embedding_intelex, embedding_original],\n",
+ " [\"Intel® Extension for scikit-learn\", \"Original scikit-learn\"],\n",
+ "):\n",
+ " plt.scatter(emb[:, 0], emb[:, 1], c=colors)\n",
+ " plt.title(title)\n",
+ " plt.xlabel(\"x\")\n",
+ " plt.ylabel(\"y\")\n",
+ " plt.show()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "ffd79e96",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'Speedup for this run: 3.0'"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "f\"Speedup for this run: {(time_original/time_opt):.1f}\""
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.15"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/2023.2/.doctrees/nbsphinx/samples_daal4py_data_science_24_0.png b/2023.2/.doctrees/nbsphinx/samples_daal4py_data_science_24_0.png
new file mode 100644
index 0000000000..6c7d082563
Binary files /dev/null and b/2023.2/.doctrees/nbsphinx/samples_daal4py_data_science_24_0.png differ
diff --git a/2023.2/.doctrees/nbsphinx/samples_daal4py_data_science_7_0.png b/2023.2/.doctrees/nbsphinx/samples_daal4py_data_science_7_0.png
new file mode 100644
index 0000000000..cc9df20282
Binary files /dev/null and b/2023.2/.doctrees/nbsphinx/samples_daal4py_data_science_7_0.png differ
diff --git a/2023.2/.doctrees/nbsphinx/samples_daal4py_data_science_7_1.png b/2023.2/.doctrees/nbsphinx/samples_daal4py_data_science_7_1.png
new file mode 100644
index 0000000000..5e5d8b1d1a
Binary files /dev/null and b/2023.2/.doctrees/nbsphinx/samples_daal4py_data_science_7_1.png differ
diff --git a/2023.2/.doctrees/nbsphinx/samples_daal4py_data_science_7_2.png b/2023.2/.doctrees/nbsphinx/samples_daal4py_data_science_7_2.png
new file mode 100644
index 0000000000..e79ca5011f
Binary files /dev/null and b/2023.2/.doctrees/nbsphinx/samples_daal4py_data_science_7_2.png differ
diff --git a/2023.2/.doctrees/nbsphinx/samples_daal4py_data_science_7_3.png b/2023.2/.doctrees/nbsphinx/samples_daal4py_data_science_7_3.png
new file mode 100644
index 0000000000..b7c37519c2
Binary files /dev/null and b/2023.2/.doctrees/nbsphinx/samples_daal4py_data_science_7_3.png differ
diff --git a/2023.2/.doctrees/nbsphinx/samples_daal4py_data_science_7_4.png b/2023.2/.doctrees/nbsphinx/samples_daal4py_data_science_7_4.png
new file mode 100644
index 0000000000..87ce1f07e7
Binary files /dev/null and b/2023.2/.doctrees/nbsphinx/samples_daal4py_data_science_7_4.png differ
diff --git a/2023.2/.doctrees/nbsphinx/samples_daal4py_data_science_7_5.png b/2023.2/.doctrees/nbsphinx/samples_daal4py_data_science_7_5.png
new file mode 100644
index 0000000000..0a059e8d7f
Binary files /dev/null and b/2023.2/.doctrees/nbsphinx/samples_daal4py_data_science_7_5.png differ
diff --git a/2023.2/.doctrees/nbsphinx/samples_tsne_15_0.png b/2023.2/.doctrees/nbsphinx/samples_tsne_15_0.png
new file mode 100644
index 0000000000..5a1138f6bc
Binary files /dev/null and b/2023.2/.doctrees/nbsphinx/samples_tsne_15_0.png differ
diff --git a/2023.2/.doctrees/nbsphinx/samples_tsne_15_1.png b/2023.2/.doctrees/nbsphinx/samples_tsne_15_1.png
new file mode 100644
index 0000000000..49b9be9ae0
Binary files /dev/null and b/2023.2/.doctrees/nbsphinx/samples_tsne_15_1.png differ
diff --git a/2023.2/.doctrees/oneapi-gpu.doctree b/2023.2/.doctrees/oneapi-gpu.doctree
new file mode 100644
index 0000000000..4bd48cfe68
Binary files /dev/null and b/2023.2/.doctrees/oneapi-gpu.doctree differ
diff --git a/2023.2/.doctrees/quick-start.doctree b/2023.2/.doctrees/quick-start.doctree
new file mode 100644
index 0000000000..5b9576b904
Binary files /dev/null and b/2023.2/.doctrees/quick-start.doctree differ
diff --git a/2023.2/.doctrees/samples.doctree b/2023.2/.doctrees/samples.doctree
new file mode 100644
index 0000000000..43b6ad5c7f
Binary files /dev/null and b/2023.2/.doctrees/samples.doctree differ
diff --git a/2023.2/.doctrees/samples/ElasticNet.doctree b/2023.2/.doctrees/samples/ElasticNet.doctree
new file mode 100644
index 0000000000..35d385a0a1
Binary files /dev/null and b/2023.2/.doctrees/samples/ElasticNet.doctree differ
diff --git a/2023.2/.doctrees/samples/daal4py_data_science.doctree b/2023.2/.doctrees/samples/daal4py_data_science.doctree
new file mode 100644
index 0000000000..09a4c48947
Binary files /dev/null and b/2023.2/.doctrees/samples/daal4py_data_science.doctree differ
diff --git a/2023.2/.doctrees/samples/dbscan.doctree b/2023.2/.doctrees/samples/dbscan.doctree
new file mode 100644
index 0000000000..e28baa2e14
Binary files /dev/null and b/2023.2/.doctrees/samples/dbscan.doctree differ
diff --git a/2023.2/.doctrees/samples/kmeans.doctree b/2023.2/.doctrees/samples/kmeans.doctree
new file mode 100644
index 0000000000..3e537dc53c
Binary files /dev/null and b/2023.2/.doctrees/samples/kmeans.doctree differ
diff --git a/2023.2/.doctrees/samples/knn_mnist.doctree b/2023.2/.doctrees/samples/knn_mnist.doctree
new file mode 100644
index 0000000000..ab5294ead3
Binary files /dev/null and b/2023.2/.doctrees/samples/knn_mnist.doctree differ
diff --git a/2023.2/.doctrees/samples/lasso_regression.doctree b/2023.2/.doctrees/samples/lasso_regression.doctree
new file mode 100644
index 0000000000..a964237c2d
Binary files /dev/null and b/2023.2/.doctrees/samples/lasso_regression.doctree differ
diff --git a/2023.2/.doctrees/samples/linear_regression.doctree b/2023.2/.doctrees/samples/linear_regression.doctree
new file mode 100644
index 0000000000..e97b9174a5
Binary files /dev/null and b/2023.2/.doctrees/samples/linear_regression.doctree differ
diff --git a/2023.2/.doctrees/samples/logistictic_regression_cifar.doctree b/2023.2/.doctrees/samples/logistictic_regression_cifar.doctree
new file mode 100644
index 0000000000..bd8477f82e
Binary files /dev/null and b/2023.2/.doctrees/samples/logistictic_regression_cifar.doctree differ
diff --git a/2023.2/.doctrees/samples/nusvr_medical_charges.doctree b/2023.2/.doctrees/samples/nusvr_medical_charges.doctree
new file mode 100644
index 0000000000..4cacb1651a
Binary files /dev/null and b/2023.2/.doctrees/samples/nusvr_medical_charges.doctree differ
diff --git a/2023.2/.doctrees/samples/random_forest_yolanda.doctree b/2023.2/.doctrees/samples/random_forest_yolanda.doctree
new file mode 100644
index 0000000000..ddb199ccd4
Binary files /dev/null and b/2023.2/.doctrees/samples/random_forest_yolanda.doctree differ
diff --git a/2023.2/.doctrees/samples/ridge_regression.doctree b/2023.2/.doctrees/samples/ridge_regression.doctree
new file mode 100644
index 0000000000..a69fe0f98a
Binary files /dev/null and b/2023.2/.doctrees/samples/ridge_regression.doctree differ
diff --git a/2023.2/.doctrees/samples/svc_adult.doctree b/2023.2/.doctrees/samples/svc_adult.doctree
new file mode 100644
index 0000000000..3a1179726a
Binary files /dev/null and b/2023.2/.doctrees/samples/svc_adult.doctree differ
diff --git a/2023.2/.doctrees/samples/tsne.doctree b/2023.2/.doctrees/samples/tsne.doctree
new file mode 100644
index 0000000000..0593fc9945
Binary files /dev/null and b/2023.2/.doctrees/samples/tsne.doctree differ
diff --git a/2023.2/.doctrees/support.doctree b/2023.2/.doctrees/support.doctree
new file mode 100644
index 0000000000..7423be5d2e
Binary files /dev/null and b/2023.2/.doctrees/support.doctree differ
diff --git a/2023.2/.doctrees/system-requirements.doctree b/2023.2/.doctrees/system-requirements.doctree
new file mode 100644
index 0000000000..35f77751c6
Binary files /dev/null and b/2023.2/.doctrees/system-requirements.doctree differ
diff --git a/2023.2/.doctrees/verbose.doctree b/2023.2/.doctrees/verbose.doctree
new file mode 100644
index 0000000000..0de9443fc5
Binary files /dev/null and b/2023.2/.doctrees/verbose.doctree differ
diff --git a/2023.2/.doctrees/what-is-patching.doctree b/2023.2/.doctrees/what-is-patching.doctree
new file mode 100644
index 0000000000..3614b0516a
Binary files /dev/null and b/2023.2/.doctrees/what-is-patching.doctree differ
diff --git a/2023.2/.nojekyll b/2023.2/.nojekyll
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/2023.2/404.html b/2023.2/404.html
index 8395fa5de2..f1296f725f 100644
--- a/2023.2/404.html
+++ b/2023.2/404.html
@@ -1,28 +1,32 @@
+
+
-
+
Page not found — Intel(R) Extension for Scikit-learn* 2023.2 documentation
-
-
-
-
-
-
+
+
+
+
+
-
-
-
-
+
+
+
+
+
+
+
+
+
+ Choose version
+
+
+
+
+
+
+ About
-Features
-
-Releases
-
-Installation Guide
-
-Examples
+Get Started
-Performance
+Developer Guide
-Blogs & Publications
-
-How to contribute
+Community
-License
-
+
@@ -151,19 +160,7 @@ Page not found