diff --git a/driver_ranking/feature_store.yaml b/driver_ranking/feature_store.yaml
index f8dac74..c2d0a74 100644
--- a/driver_ranking/feature_store.yaml
+++ b/driver_ranking/feature_store.yaml
@@ -1,3 +1,3 @@
project: driver_ranking
registry: data/registry.db
-provider: local
\ No newline at end of file
+provider: gcp
diff --git a/notebooks/Driver_Ranking_Tutorial.ipynb b/notebooks/Driver_Ranking_Tutorial.ipynb
new file mode 100644
index 0000000..8d37c59
--- /dev/null
+++ b/notebooks/Driver_Ranking_Tutorial.ipynb
@@ -0,0 +1,776 @@
+{
+ "nbformat": 4,
+ "nbformat_minor": 0,
+ "metadata": {
+ "colab": {
+ "name": "Driver Ranking Tutorial",
+ "provenance": [],
+ "collapsed_sections": [],
+ "authorship_tag": "ABX9TyPxfJo7miJVPRo4IDVQvAqB",
+ "include_colab_link": true
+ },
+ "kernelspec": {
+ "name": "python3",
+ "display_name": "Python 3"
+ },
+ "language_info": {
+ "name": "python"
+ }
+ },
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "view-in-github",
+ "colab_type": "text"
+ },
+ "source": [
+ "
"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "A7ffktm_Ty80"
+ },
+ "source": [
+ ""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "qa-41097T0vH"
+ },
+ "source": [
+ "### Overview\n",
+ "Making a prediction using a linear regression model is a common use case in ML. In this guide tutorial, we build the model that predicts if a driver will complete a trip based on a number of features ingested into Feast.\n",
+ "\n",
+ "The basic local mode gives you ability to quickly try Feast, while the advanced mode shows how you can use Feast in a production setting, in particular for the Google Cloud Platform (GCP) cloud.\n",
+ "\n",
+ "This tutorial uses Feast with scikit learn to:\n",
+ "\n",
+ "* Train a model locally using data from BigQuery\n",
+ "* Test the model for online inference using SQLite (for fast iteration)\n",
+ "* Test the model for online inference using Firestore (to represent production)\n",
+ " "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "j1Qipu_GUYdA"
+ },
+ "source": [
+ "## Step 1: Install feast, scikit-learn\n",
+ "\n",
+ "Install feast, gcp dependencies and scikit-learn\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 1000
+ },
+ "id": "gxuVxKG3Ua6z",
+ "outputId": "09ba5826-ef68-400c-a7a5-6a2ac5b9ebb5"
+ },
+ "source": [
+ "!pip install feast scikit-learn 'feast[gcp]'"
+ ],
+ "execution_count": 1,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "Collecting feast\n",
+ " Downloading feast-0.11.0-py3-none-any.whl (190 kB)\n",
+ "\u001b[?25l\r\u001b[K |█▊ | 10 kB 27.9 MB/s eta 0:00:01\r\u001b[K |███▌ | 20 kB 28.6 MB/s eta 0:00:01\r\u001b[K |█████▏ | 30 kB 12.4 MB/s eta 0:00:01\r\u001b[K |███████ | 40 kB 9.2 MB/s eta 0:00:01\r\u001b[K |████████▋ | 51 kB 4.2 MB/s eta 0:00:01\r\u001b[K |██████████▍ | 61 kB 4.5 MB/s eta 0:00:01\r\u001b[K |████████████ | 71 kB 4.6 MB/s eta 0:00:01\r\u001b[K |█████████████▉ | 81 kB 4.7 MB/s eta 0:00:01\r\u001b[K |███████████████▌ | 92 kB 4.9 MB/s eta 0:00:01\r\u001b[K |█████████████████▎ | 102 kB 5.0 MB/s eta 0:00:01\r\u001b[K |███████████████████ | 112 kB 5.0 MB/s eta 0:00:01\r\u001b[K |████████████████████▊ | 122 kB 5.0 MB/s eta 0:00:01\r\u001b[K |██████████████████████▍ | 133 kB 5.0 MB/s eta 0:00:01\r\u001b[K |████████████████████████▏ | 143 kB 5.0 MB/s eta 0:00:01\r\u001b[K |█████████████████████████▉ | 153 kB 5.0 MB/s eta 0:00:01\r\u001b[K |███████████████████████████▋ | 163 kB 5.0 MB/s eta 0:00:01\r\u001b[K |█████████████████████████████▎ | 174 kB 5.0 MB/s eta 0:00:01\r\u001b[K |███████████████████████████████ | 184 kB 5.0 MB/s eta 0:00:01\r\u001b[K |████████████████████████████████| 190 kB 5.0 MB/s \n",
+ "\u001b[?25hRequirement already satisfied: scikit-learn in /usr/local/lib/python3.7/dist-packages (0.22.2.post1)\n",
+ "Requirement already satisfied: pandas>=1.0.0 in /usr/local/lib/python3.7/dist-packages (from feast) (1.1.5)\n",
+ "Requirement already satisfied: Click==7.* in /usr/local/lib/python3.7/dist-packages (from feast) (7.1.2)\n",
+ "Requirement already satisfied: protobuf>=3.10 in /usr/local/lib/python3.7/dist-packages (from feast) (3.17.3)\n",
+ "Requirement already satisfied: tabulate==0.8.* in /usr/local/lib/python3.7/dist-packages (from feast) (0.8.9)\n",
+ "Collecting fastavro>=1.1.0\n",
+ " Downloading fastavro-1.4.4-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.3 MB)\n",
+ "\u001b[K |████████████████████████████████| 2.3 MB 23.0 MB/s \n",
+ "\u001b[?25hCollecting tenacity>=7.*\n",
+ " Downloading tenacity-8.0.1-py3-none-any.whl (24 kB)\n",
+ "Collecting pydantic>=1.0.0\n",
+ " Downloading pydantic-1.8.2-cp37-cp37m-manylinux2014_x86_64.whl (10.1 MB)\n",
+ "\u001b[K |████████████████████████████████| 10.1 MB 26.1 MB/s \n",
+ "\u001b[?25hCollecting mmh3\n",
+ " Downloading mmh3-3.0.0-cp37-cp37m-manylinux2010_x86_64.whl (50 kB)\n",
+ "\u001b[K |████████████████████████████████| 50 kB 6.5 MB/s \n",
+ "\u001b[?25hRequirement already satisfied: toml==0.10.* in /usr/local/lib/python3.7/dist-packages (from feast) (0.10.2)\n",
+ "Collecting colorama>=0.3.9\n",
+ " Downloading colorama-0.4.4-py2.py3-none-any.whl (16 kB)\n",
+ "Collecting PyYAML==5.3.*\n",
+ " Downloading PyYAML-5.3.1.tar.gz (269 kB)\n",
+ "\u001b[K |████████████████████████████████| 269 kB 52.1 MB/s \n",
+ "\u001b[?25hRequirement already satisfied: google-api-core>=1.23.0 in /usr/local/lib/python3.7/dist-packages (from feast) (1.26.3)\n",
+ "Requirement already satisfied: Jinja2>=2.0.0 in /usr/local/lib/python3.7/dist-packages (from feast) (2.11.3)\n",
+ "Requirement already satisfied: grpcio>=1.34.0 in /usr/local/lib/python3.7/dist-packages (from feast) (1.34.1)\n",
+ "Collecting googleapis-common-protos==1.52.*\n",
+ " Downloading googleapis_common_protos-1.52.0-py2.py3-none-any.whl (100 kB)\n",
+ "\u001b[K |████████████████████████████████| 100 kB 7.6 MB/s \n",
+ "\u001b[?25hRequirement already satisfied: jsonschema in /usr/local/lib/python3.7/dist-packages (from feast) (2.6.0)\n",
+ "Requirement already satisfied: tqdm==4.* in /usr/local/lib/python3.7/dist-packages (from feast) (4.41.1)\n",
+ "Collecting pandavro==1.5.*\n",
+ " Downloading pandavro-1.5.2.tar.gz (3.8 kB)\n",
+ "Requirement already satisfied: pyarrow>=2.0.0 in /usr/local/lib/python3.7/dist-packages (from feast) (3.0.0)\n",
+ "Requirement already satisfied: numpy>=1.7.0 in /usr/local/lib/python3.7/dist-packages (from pandavro==1.5.*->feast) (1.19.5)\n",
+ "Requirement already satisfied: six>=1.9 in /usr/local/lib/python3.7/dist-packages (from pandavro==1.5.*->feast) (1.15.0)\n",
+ "Requirement already satisfied: setuptools>=40.3.0 in /usr/local/lib/python3.7/dist-packages (from google-api-core>=1.23.0->feast) (57.2.0)\n",
+ "Requirement already satisfied: packaging>=14.3 in /usr/local/lib/python3.7/dist-packages (from google-api-core>=1.23.0->feast) (21.0)\n",
+ "Requirement already satisfied: pytz in /usr/local/lib/python3.7/dist-packages (from google-api-core>=1.23.0->feast) (2018.9)\n",
+ "Requirement already satisfied: requests<3.0.0dev,>=2.18.0 in /usr/local/lib/python3.7/dist-packages (from google-api-core>=1.23.0->feast) (2.23.0)\n",
+ "Requirement already satisfied: google-auth<2.0dev,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from google-api-core>=1.23.0->feast) (1.32.1)\n",
+ "Requirement already satisfied: cachetools<5.0,>=2.0.0 in /usr/local/lib/python3.7/dist-packages (from google-auth<2.0dev,>=1.21.1->google-api-core>=1.23.0->feast) (4.2.2)\n",
+ "Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.7/dist-packages (from google-auth<2.0dev,>=1.21.1->google-api-core>=1.23.0->feast) (0.2.8)\n",
+ "Requirement already satisfied: rsa<5,>=3.1.4 in /usr/local/lib/python3.7/dist-packages (from google-auth<2.0dev,>=1.21.1->google-api-core>=1.23.0->feast) (4.7.2)\n",
+ "Requirement already satisfied: MarkupSafe>=0.23 in /usr/local/lib/python3.7/dist-packages (from Jinja2>=2.0.0->feast) (2.0.1)\n",
+ "Requirement already satisfied: pyparsing>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging>=14.3->google-api-core>=1.23.0->feast) (2.4.7)\n",
+ "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.7/dist-packages (from pandas>=1.0.0->feast) (2.8.1)\n",
+ "Requirement already satisfied: pyasn1<0.5.0,>=0.4.6 in /usr/local/lib/python3.7/dist-packages (from pyasn1-modules>=0.2.1->google-auth<2.0dev,>=1.21.1->google-api-core>=1.23.0->feast) (0.4.8)\n",
+ "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.7/dist-packages (from pydantic>=1.0.0->feast) (3.7.4.3)\n",
+ "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0dev,>=2.18.0->google-api-core>=1.23.0->feast) (3.0.4)\n",
+ "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0dev,>=2.18.0->google-api-core>=1.23.0->feast) (1.24.3)\n",
+ "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0dev,>=2.18.0->google-api-core>=1.23.0->feast) (2.10)\n",
+ "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0dev,>=2.18.0->google-api-core>=1.23.0->feast) (2021.5.30)\n",
+ "Requirement already satisfied: joblib>=0.11 in /usr/local/lib/python3.7/dist-packages (from scikit-learn) (1.0.1)\n",
+ "Requirement already satisfied: scipy>=0.17.0 in /usr/local/lib/python3.7/dist-packages (from scikit-learn) (1.4.1)\n",
+ "Collecting google-cloud-core==1.4.*\n",
+ " Downloading google_cloud_core-1.4.4-py2.py3-none-any.whl (27 kB)\n",
+ "Collecting google-cloud-datastore>=2.1.*\n",
+ " Downloading google_cloud_datastore-2.1.5-py2.py3-none-any.whl (127 kB)\n",
+ "\u001b[K |████████████████████████████████| 127 kB 58.5 MB/s \n",
+ "\u001b[?25hCollecting google-cloud-storage>=1.20.*\n",
+ " Downloading google_cloud_storage-1.41.1-py2.py3-none-any.whl (105 kB)\n",
+ "\u001b[K |████████████████████████████████| 105 kB 54.2 MB/s \n",
+ "\u001b[?25hCollecting google-cloud-bigquery-storage>=2.0.0\n",
+ " Downloading google_cloud_bigquery_storage-2.6.1-py2.py3-none-any.whl (125 kB)\n",
+ "\u001b[K |████████████████████████████████| 125 kB 60.5 MB/s \n",
+ "\u001b[?25hCollecting google-cloud-bigquery>=2.0.*\n",
+ " Downloading google_cloud_bigquery-2.22.1-py2.py3-none-any.whl (195 kB)\n",
+ "\u001b[K |████████████████████████████████| 195 kB 57.9 MB/s \n",
+ "\u001b[?25hCollecting grpcio>=1.34.0\n",
+ " Downloading grpcio-1.39.0-cp37-cp37m-manylinux2014_x86_64.whl (4.3 MB)\n",
+ "\u001b[K |████████████████████████████████| 4.3 MB 53.3 MB/s \n",
+ "\u001b[?25hCollecting proto-plus>=1.10.0\n",
+ " Downloading proto_plus-1.19.0-py3-none-any.whl (42 kB)\n",
+ "\u001b[K |████████████████████████████████| 42 kB 1.3 MB/s \n",
+ "\u001b[?25hCollecting google-resumable-media<3.0dev,>=0.6.0\n",
+ " Downloading google_resumable_media-1.3.1-py2.py3-none-any.whl (75 kB)\n",
+ "\u001b[K |████████████████████████████████| 75 kB 4.8 MB/s \n",
+ "\u001b[?25hCollecting google-api-core[grpc]<3.0.0dev,>=1.29.0\n",
+ " Downloading google_api_core-1.31.0-py2.py3-none-any.whl (93 kB)\n",
+ "\u001b[K |████████████████████████████████| 93 kB 1.4 MB/s \n",
+ "\u001b[?25hCollecting libcst>=0.2.5\n",
+ " Downloading libcst-0.3.19-py3-none-any.whl (513 kB)\n",
+ "\u001b[K |████████████████████████████████| 513 kB 50.1 MB/s \n",
+ "\u001b[?25hCollecting google-cloud-storage>=1.20.*\n",
+ " Downloading google_cloud_storage-1.41.0-py2.py3-none-any.whl (104 kB)\n",
+ "\u001b[K |████████████████████████████████| 104 kB 64.4 MB/s \n",
+ "\u001b[?25h Downloading google_cloud_storage-1.40.0-py2.py3-none-any.whl (104 kB)\n",
+ "\u001b[K |████████████████████████████████| 104 kB 75.0 MB/s \n",
+ "\u001b[?25hCollecting google-crc32c<2.0dev,>=1.0\n",
+ " Downloading google_crc32c-1.1.2-cp37-cp37m-manylinux2014_x86_64.whl (38 kB)\n",
+ "Requirement already satisfied: cffi>=1.0.0 in /usr/local/lib/python3.7/dist-packages (from google-crc32c<2.0dev,>=1.0->google-resumable-media<3.0dev,>=0.6.0->google-cloud-bigquery>=2.0.*->feast) (1.14.6)\n",
+ "Requirement already satisfied: pycparser in /usr/local/lib/python3.7/dist-packages (from cffi>=1.0.0->google-crc32c<2.0dev,>=1.0->google-resumable-media<3.0dev,>=0.6.0->google-cloud-bigquery>=2.0.*->feast) (2.20)\n",
+ "Collecting typing-inspect>=0.4.0\n",
+ " Downloading typing_inspect-0.7.1-py3-none-any.whl (8.4 kB)\n",
+ "Collecting mypy-extensions>=0.3.0\n",
+ " Downloading mypy_extensions-0.4.3-py2.py3-none-any.whl (4.5 kB)\n",
+ "Building wheels for collected packages: pandavro, PyYAML\n",
+ " Building wheel for pandavro (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+ " Created wheel for pandavro: filename=pandavro-1.5.2-py3-none-any.whl size=2953 sha256=ef61d7c0b4e22b55a5c39c933ee0a88fe71974f888751b75a8baca95690ea171\n",
+ " Stored in directory: /root/.cache/pip/wheels/33/3f/96/9f1b46a9f7f6043ff4741b1aa1a7b249ba33be4dc1d08843e4\n",
+ " Building wheel for PyYAML (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+ " Created wheel for PyYAML: filename=PyYAML-5.3.1-cp37-cp37m-linux_x86_64.whl size=44636 sha256=f01413855e330051c5c6b3f9d8bac9a478552572803aa3418838bd1d1f4cd756\n",
+ " Stored in directory: /root/.cache/pip/wheels/5e/03/1e/e1e954795d6f35dfc7b637fe2277bff021303bd9570ecea653\n",
+ "Successfully built pandavro PyYAML\n",
+ "Installing collected packages: mypy-extensions, googleapis-common-protos, typing-inspect, PyYAML, grpcio, google-crc32c, google-api-core, fastavro, tenacity, pydantic, proto-plus, pandavro, mmh3, libcst, google-resumable-media, google-cloud-core, colorama, google-cloud-storage, google-cloud-datastore, google-cloud-bigquery-storage, google-cloud-bigquery, feast\n",
+ " Attempting uninstall: googleapis-common-protos\n",
+ " Found existing installation: googleapis-common-protos 1.53.0\n",
+ " Uninstalling googleapis-common-protos-1.53.0:\n",
+ " Successfully uninstalled googleapis-common-protos-1.53.0\n",
+ " Attempting uninstall: PyYAML\n",
+ " Found existing installation: PyYAML 3.13\n",
+ " Uninstalling PyYAML-3.13:\n",
+ " Successfully uninstalled PyYAML-3.13\n",
+ " Attempting uninstall: grpcio\n",
+ " Found existing installation: grpcio 1.34.1\n",
+ " Uninstalling grpcio-1.34.1:\n",
+ " Successfully uninstalled grpcio-1.34.1\n",
+ " Attempting uninstall: google-api-core\n",
+ " Found existing installation: google-api-core 1.26.3\n",
+ " Uninstalling google-api-core-1.26.3:\n",
+ " Successfully uninstalled google-api-core-1.26.3\n",
+ " Attempting uninstall: google-resumable-media\n",
+ " Found existing installation: google-resumable-media 0.4.1\n",
+ " Uninstalling google-resumable-media-0.4.1:\n",
+ " Successfully uninstalled google-resumable-media-0.4.1\n",
+ " Attempting uninstall: google-cloud-core\n",
+ " Found existing installation: google-cloud-core 1.0.3\n",
+ " Uninstalling google-cloud-core-1.0.3:\n",
+ " Successfully uninstalled google-cloud-core-1.0.3\n",
+ " Attempting uninstall: google-cloud-storage\n",
+ " Found existing installation: google-cloud-storage 1.18.1\n",
+ " Uninstalling google-cloud-storage-1.18.1:\n",
+ " Successfully uninstalled google-cloud-storage-1.18.1\n",
+ " Attempting uninstall: google-cloud-datastore\n",
+ " Found existing installation: google-cloud-datastore 1.8.0\n",
+ " Uninstalling google-cloud-datastore-1.8.0:\n",
+ " Successfully uninstalled google-cloud-datastore-1.8.0\n",
+ " Attempting uninstall: google-cloud-bigquery-storage\n",
+ " Found existing installation: google-cloud-bigquery-storage 1.1.0\n",
+ " Uninstalling google-cloud-bigquery-storage-1.1.0:\n",
+ " Successfully uninstalled google-cloud-bigquery-storage-1.1.0\n",
+ " Attempting uninstall: google-cloud-bigquery\n",
+ " Found existing installation: google-cloud-bigquery 1.21.0\n",
+ " Uninstalling google-cloud-bigquery-1.21.0:\n",
+ " Successfully uninstalled google-cloud-bigquery-1.21.0\n",
+ "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
+ "tensorflow 2.5.0 requires grpcio~=1.34.0, but you have grpcio 1.39.0 which is incompatible.\n",
+ "pandas-gbq 0.13.3 requires google-cloud-bigquery[bqstorage,pandas]<2.0.0dev,>=1.11.1, but you have google-cloud-bigquery 2.22.1 which is incompatible.\u001b[0m\n",
+ "Successfully installed PyYAML-5.3.1 colorama-0.4.4 fastavro-1.4.4 feast-0.11.0 google-api-core-1.31.0 google-cloud-bigquery-2.22.1 google-cloud-bigquery-storage-2.6.1 google-cloud-core-1.4.4 google-cloud-datastore-2.1.5 google-cloud-storage-1.40.0 google-crc32c-1.1.2 google-resumable-media-1.3.1 googleapis-common-protos-1.52.0 grpcio-1.39.0 libcst-0.3.19 mmh3-3.0.0 mypy-extensions-0.4.3 pandavro-1.5.2 proto-plus-1.19.0 pydantic-1.8.2 tenacity-8.0.1 typing-inspect-0.7.1\n"
+ ],
+ "name": "stdout"
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "application/vnd.colab-display-data+json": {
+ "pip_warning": {
+ "packages": [
+ "google"
+ ]
+ }
+ }
+ },
+ "metadata": {
+ "tags": []
+ }
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "P8pFSVUp34W5"
+ },
+ "source": [
+ "#### Check feast version"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "soTYiMPXcNco"
+ },
+ "source": [
+ ""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "cOSAfdZiUnFa",
+ "outputId": "2462ce77-242b-4018-b5d6-fd0baa239836"
+ },
+ "source": [
+ "!feast version "
+ ],
+ "execution_count": 1,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "Feast is an open source project that collects anonymized error reporting and usage statistics. To opt out or learn more see https://docs.feast.dev/reference/usage\n",
+ "Feast SDK Version: \"feast 0.15.1\"\n"
+ ],
+ "name": "stdout"
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "pC4AzJ_b396l"
+ },
+ "source": [
+ "## Step 2: Clone the Git repo\n",
+ "\n",
+ "Clone the Driver Ranking Git repo into your Colab Folder"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "4Qim_qbtUyGA",
+ "outputId": "6556ee99-aac3-468c-a9d5-8a643387712d"
+ },
+ "source": [
+ "!git clone https://github.com/feast-dev/feast-driver-ranking-tutorial.git"
+ ],
+ "execution_count": 2,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "Cloning into 'feast-driver-ranking-tutorial'...\n",
+ "remote: Enumerating objects: 34, done.\u001b[K\n",
+ "remote: Counting objects: 100% (34/34), done.\u001b[K\n",
+ "remote: Compressing objects: 100% (24/24), done.\u001b[K\n",
+ "remote: Total 34 (delta 13), reused 28 (delta 8), pack-reused 0\u001b[K\n",
+ "Unpacking objects: 100% (34/34), done.\n"
+ ],
+ "name": "stdout"
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "tLnM3IOy5C5l"
+ },
+ "source": [
+ "## Step 3: Set up your Goggle Cloud Platform (GCP) Configurations"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "KiNaOTKzWIcb"
+ },
+ "source": [
+ "## Authenticate into GCP\n",
+ "This will allow you to do the advanced section of the tutorial, where you materialize remotely on a GCP\n",
+ "Feast spins up infrastructure on GCP using the credentials in our environment. Run the following cell to log into GCP:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "8Tj3MUPHWPTF"
+ },
+ "source": [
+ "from google.colab import auth\n",
+ "auth.authenticate_user()"
+ ],
+ "execution_count": 3,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "d8yzazB-Wfqc"
+ },
+ "source": [
+ "Set configurations\n",
+ "Set the following configuration, which we'll be using throughout the tutorial:\n",
+ "\n",
+ "PROJECT_ID: Your project.\n",
+ "BUCKET_NAME: The name of a bucket which will be used to store the feature store registry and model artifacts.\n",
+ "BIGQUERY_DATASET_NAME: The name of a dataset which will be used to create tables containing features.\n",
+ "AI_PLATFORM_MODEL_NAME: The name of a model name which will be created in AI Platform."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "zV0sgfOTWeXz",
+ "outputId": "d324609f-83e9-444c-c581-f2ecfd103bb6"
+ },
+ "source": [
+ "PROJECT_ID= \"kf-feast\" #@param {type:\"string\"}\n",
+ "BUCKET_NAME= \"driver_ranking_tutorial\" #@param {type:\"string\"} custom\n",
+ "BIGQUERY_DATASET_NAME=\"feast_driver_ranking_tutorial\" #@param {type:\"string\"} custom\n",
+ "AI_PLATFORM_MODEL_NAME=\"feast_driver_rankin_jsd_model\" #@param {type:\"string\"\n",
+ "\n",
+ "! gcloud config set project $PROJECT_ID\n",
+ "%env GOOGLE_CLOUD_PROJECT=$PROJECT_ID\n",
+ "!echo project_id = $PROJECT_ID > ~/.bigqueryrc"
+ ],
+ "execution_count": 7,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "Updated property [core/project].\n",
+ "env: GOOGLE_CLOUD_PROJECT=kf-feast\n"
+ ],
+ "name": "stdout"
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "6Cs_SYr2gOjR",
+ "outputId": "4b3d811b-6685-46f8-9830-b0378962bbef"
+ },
+ "source": [
+ "# Only run if your bucket doesn't already exist!\n",
+ "! gsutil mb gs://$BUCKET_NAME"
+ ],
+ "execution_count": 8,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "Creating gs://driver_ranking_tutorial/...\n"
+ ],
+ "name": "stdout"
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "ohWMCVhS5PPN"
+ },
+ "source": [
+ "## Step 4: Apply and deploy feature definitions\n",
+ "\n",
+ "`feast apply` scans python files in the current directory for feature definitions and deploys infrastructure according to `feature_store.yaml`"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "izhTk0WWX3Tx",
+ "outputId": "444a11d3-db43-4170-c28b-51d6ea618660"
+ },
+ "source": [
+ "%%shell\n",
+ "cd /content/feast-driver-ranking-tutorial/driver_ranking/\n",
+ "feast apply"
+ ],
+ "execution_count": 9,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "Registered entity \u001b[1m\u001b[32mdriver_id\u001b[0m\n",
+ "Registered feature view \u001b[1m\u001b[32mdriver_hourly_stats\u001b[0m\n",
+ "Deploying infrastructure for \u001b[1m\u001b[32mdriver_hourly_stats\u001b[0m\n"
+ ],
+ "name": "stdout"
+ },
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "tags": []
+ },
+ "execution_count": 9
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "lJlrf2Iu53BR"
+ },
+ "source": [
+ "### Inspect the files created under your local folder"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "IrJ6gqtdmKk7",
+ "outputId": "db32950c-9a73-4c06-fde3-52c753929c9b"
+ },
+ "source": [
+ "%%shell\n",
+ "cd /content/feast-driver-ranking-tutorial/driver_ranking/data/\n",
+ "ls -l "
+ ],
+ "execution_count": 10,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "total 20\n",
+ "-rw-r--r-- 1 root root 16384 Jul 26 20:43 online.db\n",
+ "-rw-r--r-- 1 root root 310 Jul 26 20:43 registry.db\n"
+ ],
+ "name": "stdout"
+ },
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "tags": []
+ },
+ "execution_count": 10
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "bHBfTEau6Qt9"
+ },
+ "source": [
+ "## Step 5: Train your model"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "F-Pc4Jo4kzBL",
+ "outputId": "6414795f-9330-44b3-ee5c-992c9dd55db7"
+ },
+ "source": [
+ "import feast\n",
+ "from joblib import dump\n",
+ "import pandas as pd\n",
+ "from sklearn.linear_model import LinearRegression\n",
+ "\n",
+ "# Load driver order data\n",
+ "orders = pd.read_csv(\"/content/feast-driver-ranking-tutorial/driver_orders.csv\", sep=\"\\t\")\n",
+ "orders[\"event_timestamp\"] = pd.to_datetime(orders[\"event_timestamp\"])\n",
+ "\n",
+ "# Connect to your feature store provider\n",
+ "fs = feast.FeatureStore(repo_path=\"/content/feast-driver-ranking-tutorial/driver_ranking\")\n",
+ " \n",
+ "# Retrieve training data from BigQuery\n",
+ "training_df = fs.get_historical_features(\n",
+ " entity_df=orders,\n",
+ " feature_refs=[\n",
+ " \"driver_hourly_stats:conv_rate\",\n",
+ " \"driver_hourly_stats:acc_rate\",\n",
+ " \"driver_hourly_stats:avg_daily_trips\",\n",
+ " ],\n",
+ ").to_df()\n",
+ "\n",
+ "print(\"----- Feature schema -----\\n\")\n",
+ "print(training_df.info())\n",
+ "\n",
+ "print()\n",
+ "print(\"----- Example features -----\\n\")\n",
+ "print(training_df.head())\n",
+ "\n",
+ "# Train model\n",
+ "target = \"trip_completed\"\n",
+ "\n",
+ "reg = LinearRegression()\n",
+ "train_X = training_df[training_df.columns.drop(target).drop(\"event_timestamp\")]\n",
+ "train_Y = training_df.loc[:, target]\n",
+ "reg.fit(train_X[sorted(train_X)], train_Y)\n",
+ "\n",
+ "# Save model\n",
+ "dump(reg, \"driver_model.bin\")"
+ ],
+ "execution_count": 13,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "----- Feature schema -----\n",
+ "\n",
+ "\n",
+ "RangeIndex: 10 entries, 0 to 9\n",
+ "Data columns (total 6 columns):\n",
+ " # Column Non-Null Count Dtype \n",
+ "--- ------ -------------- ----- \n",
+ " 0 event_timestamp 10 non-null datetime64[ns, UTC]\n",
+ " 1 driver_id 10 non-null int64 \n",
+ " 2 trip_completed 10 non-null int64 \n",
+ " 3 driver_hourly_stats__conv_rate 10 non-null float64 \n",
+ " 4 driver_hourly_stats__acc_rate 10 non-null float64 \n",
+ " 5 driver_hourly_stats__avg_daily_trips 10 non-null int64 \n",
+ "dtypes: datetime64[ns, UTC](1), float64(2), int64(3)\n",
+ "memory usage: 608.0 bytes\n",
+ "None\n",
+ "\n",
+ "----- Example features -----\n",
+ "\n",
+ " event_timestamp ... driver_hourly_stats__avg_daily_trips\n",
+ "0 2021-04-17 04:29:28+00:00 ... 982\n",
+ "1 2021-04-18 04:29:28+00:00 ... 982\n",
+ "2 2021-04-19 04:29:28+00:00 ... 982\n",
+ "3 2021-04-17 12:29:28+00:00 ... 551\n",
+ "4 2021-04-18 12:29:28+00:00 ... 551\n",
+ "\n",
+ "[5 rows x 6 columns]\n"
+ ],
+ "name": "stdout"
+ },
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "['driver_model.bin']"
+ ]
+ },
+ "metadata": {
+ "tags": []
+ },
+ "execution_count": 13
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "HpHacyo47Are"
+ },
+ "source": [
+ "## Step 6: Materialize your online store\n",
+ "Apply and materialize data to Firestore"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "67627WRSajIk",
+ "outputId": "056ce886-36a5-48dc-dcbb-23e032695708"
+ },
+ "source": [
+ "!cd /content/feast-driver-ranking-tutorial/driver_ranking/ && feast materialize-incremental 2022-01-01T00:00:00"
+ ],
+ "execution_count": 14,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "Materializing \u001b[1m\u001b[32m1\u001b[0m feature views to \u001b[1m\u001b[32m2022-01-01 00:00:00+00:00\u001b[0m into the \u001b[1m\u001b[32mdatastore\u001b[0m online store.\n",
+ "\n",
+ "\u001b[1m\u001b[32mdriver_hourly_stats\u001b[0m from \u001b[1m\u001b[32m2020-07-27 20:45:14+00:00\u001b[0m to \u001b[1m\u001b[32m2022-01-01 00:00:00+00:00\u001b[0m:\n",
+ "100%|███████████████████████████████████████████████████████████████| 10/10 [00:01<00:00, 6.16it/s]\n"
+ ],
+ "name": "stdout"
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "-869cxQO2ana"
+ },
+ "source": [
+ "### Step 7: Make Prediction"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "VP85XeGFzNYl"
+ },
+ "source": [
+ "import pandas as pd\n",
+ "import feast\n",
+ "from joblib import load\n",
+ "\n",
+ "\n",
+ "class DriverRankingModel:\n",
+ " def __init__(self):\n",
+ " # Load model\n",
+ " self.model = load(\"/content/driver_model.bin\")\n",
+ "\n",
+ " # Set up feature store\n",
+ " self.fs = feast.FeatureStore(repo_path=\"/content/feast-driver-ranking-tutorial/driver_ranking/\")\n",
+ "\n",
+ " def predict(self, driver_ids):\n",
+ " # Read features from Feast\n",
+ " driver_features = self.fs.get_online_features(\n",
+ " entity_rows=[{\"driver_id\": driver_id} for driver_id in driver_ids],\n",
+ " features=[\n",
+ " \"driver_hourly_stats:conv_rate\",\n",
+ " \"driver_hourly_stats:acc_rate\",\n",
+ " \"driver_hourly_stats:avg_daily_trips\",\n",
+ " ],\n",
+ " )\n",
+ " df = pd.DataFrame.from_dict(driver_features.to_dict())\n",
+ "\n",
+ " # Make prediction\n",
+ " df[\"prediction\"] = self.model.predict(df[sorted(df)])\n",
+ "\n",
+ " # Choose best driver\n",
+ " best_driver_id = df[\"driver_id\"].iloc[df[\"prediction\"].argmax()]\n",
+ "\n",
+ " # return best driver\n",
+ " return best_driver_id"
+ ],
+ "execution_count": 19,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "f9AJ842Rk3E9"
+ },
+ "source": [
+ "def make_drivers_prediction():\n",
+ " drivers = [1001, 1002, 1003, 1004]\n",
+ " model = DriverRankingModel()\n",
+ " best_driver = model.predict(drivers)\n",
+ " print(f\"Prediction for best driver id: {best_driver}\")"
+ ],
+ "execution_count": 20,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "lq2TNXfjbb8e",
+ "outputId": "7c163361-491b-4eb7-87e0-6b68eccc9030"
+ },
+ "source": [
+ "make_drivers_prediction()"
+ ],
+ "execution_count": 21,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "Prediction for best driver id: 1001\n"
+ ],
+ "name": "stdout"
+ }
+ ]
+ }
+ ]
+}