diff --git a/examples/evaluation/Evaluate_RAG_with_LlamaIndex.ipynb b/examples/evaluation/Evaluate_RAG_with_LlamaIndex.ipynb
index f54e4872a1..e4d1b76235 100644
--- a/examples/evaluation/Evaluate_RAG_with_LlamaIndex.ipynb
+++ b/examples/evaluation/Evaluate_RAG_with_LlamaIndex.ipynb
@@ -86,12 +86,12 @@
       "metadata": {},
       "outputs": [],
       "source": [
-        "!pip install llama-index"
+        "!pip install llama-index==0.12.8"
       ]
     },
     {
       "cell_type": "code",
-      "execution_count": 1,
+      "execution_count": null,
       "metadata": {
         "id": "t1NdWoBI_OFR"
       },
@@ -100,19 +100,18 @@
         "# The nest_asyncio module enables the nesting of asynchronous functions within an already running async loop.\n",
         "# This is necessary because Jupyter notebooks inherently operate in an asynchronous loop.\n",
         "# By applying nest_asyncio, we can run additional async functions within this existing loop without conflicts.\n",
+        "import os\n",
+        "import pandas as pd\n",
         "import nest_asyncio\n",
         "\n",
         "nest_asyncio.apply()\n",
         "\n",
-        "from llama_index.evaluation import generate_question_context_pairs\n",
-        "from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext\n",
-        "from llama_index.node_parser import SimpleNodeParser\n",
-        "from llama_index.evaluation import generate_question_context_pairs\n",
-        "from llama_index.evaluation import RetrieverEvaluator\n",
-        "from llama_index.llms import OpenAI\n",
-        "\n",
-        "import os\n",
-        "import pandas as pd"
+        "from llama_index.core.indices import VectorStoreIndex\n",
+        "from llama_index.core.readers import SimpleDirectoryReader\n",
+        "from llama_index.core.node_parser import SimpleNodeParser\n",
+        "from llama_index.core.evaluation import generate_question_context_pairs\n",
+        "from llama_index.core.evaluation.retrieval.evaluator import RetrieverEvaluator\n",
+        "from llama_index.llms.openai import OpenAI"
       ]
     },
     {
@@ -126,7 +125,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 2,
+      "execution_count": null,
       "metadata": {
         "id": "bocDlS3FrP8L"
       },
@@ -156,7 +155,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 3,
+      "execution_count": null,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/"
@@ -164,20 +163,10 @@
         "id": "UUOKqSSeCkEN",
         "outputId": "17c6b9f6-f6f6-4d9f-d75f-a197133f15f1"
       },
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current\n",
-            "                                 Dload  Upload   Total   Spent    Left  Speed\n",
-            "100 75042  100 75042    0     0   190k      0 --:--:-- --:--:-- --:--:--  190k--:--  0:00:03 24586\n"
-          ]
-        }
-      ],
+      "outputs": [],
       "source": [
         "!mkdir -p 'data/paul_graham/'\n",
-        "!curl 'https://raw.githubusercontent.com/run-llama/llama_index/main/docs/examples/data/paul_graham/paul_graham_essay.txt' -o 'data/paul_graham/paul_graham_essay.txt'"
+        "!curl 'https://raw.githubusercontent.com/run-llama/llama_index/9b28893d0bd26014862d275d3b81a2918b3d05ff/docs/docs/examples/data/paul_graham/paul_graham_essay.txt' -o 'data/paul_graham/paul_graham_essay.txt'"
       ]
     },
     {
@@ -191,7 +180,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 4,
+      "execution_count": null,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/"
@@ -201,15 +190,18 @@
       },
       "outputs": [],
       "source": [
-        "documents = SimpleDirectoryReader(\"./data/paul_graham/\").load_data()\n",
+        "from llama_index.core.settings import Settings\n",
+        "from llama_index.embeddings.openai.base import OpenAIEmbedding\n",
         "\n",
+        "documents = SimpleDirectoryReader(\"./data/paul_graham/\").load_data()\n",
         "# Define an LLM\n",
-        "llm = OpenAI(model=\"gpt-4\")\n",
+        "llm = OpenAI(model=\"gpt-4o\")\n",
         "\n",
-        "# Build index with a chunk_size of 512\n",
-        "node_parser = SimpleNodeParser.from_defaults(chunk_size=512)\n",
+        "node_parser = SimpleNodeParser(chunk_size=512)\n",
         "nodes = node_parser.get_nodes_from_documents(documents)\n",
-        "vector_index = VectorStoreIndex(nodes)"
+        "\n",
+        "vector_index = VectorStoreIndex(nodes)\n",
+        "print(vector_index._embed_model)"
       ]
     },
     {
@@ -223,7 +215,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 5,
+      "execution_count": null,
       "metadata": {
         "id": "kOZBy--R_m3I"
       },
@@ -234,7 +226,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 6,
+      "execution_count": null,
       "metadata": {
         "id": "G7NVP-N4_rXF"
       },
@@ -254,7 +246,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 7,
+      "execution_count": null,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/",
@@ -267,10 +259,10 @@
         {
           "data": {
             "text/plain": [
-              "'The author wrote short stories and worked on programming, specifically on an IBM 1401 computer using an early version of Fortran.'"
+              "'The author worked on writing short stories and programming, particularly on an IBM 1401 computer using an early version of Fortran in 9th grade.'"
             ]
           },
-          "execution_count": 7,
+          "execution_count": null,
           "metadata": {},
           "output_type": "execute_result"
         }
@@ -293,7 +285,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 8,
+      "execution_count": null,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/"
@@ -308,7 +300,7 @@
               "'What I Worked On\\n\\nFebruary 2021\\n\\nBefore college the two main things I worked on, outside of school, were writing and programming. I didn\\'t write essays. I wrote what beginning writers were supposed to write then, and probably still are: short stories. My stories were awful. They had hardly any plot, just characters with strong feelings, which I imagined made them deep.\\n\\nThe first programs I tried writing were on the IBM 1401 that our school district used for what was then called \"data processing.\" This was in 9th grade, so I was 13 or 14. The school district\\'s 1401 happened to be in the basement of our junior high school, and my friend Rich Draves and I got permission to use it. It was like a mini Bond villain\\'s lair down there, with all these alien-looking machines — CPU, disk drives, printer, card reader — sitting up on a raised floor under bright fluorescent lights.\\n\\nThe language we used was an early version of Fortran. You had to type programs on punch cards, then stack them in the card reader and press a button to load the program into memory and run it. The result would ordinarily be to print something on the spectacularly loud printer.\\n\\nI was puzzled by the 1401. I couldn\\'t figure out what to do with it. And in retrospect there\\'s not much I could have done with it. The only form of input to programs was data stored on punched cards, and I didn\\'t have any data stored on punched cards. The only other option was to do things that didn\\'t rely on any input, like calculate approximations of pi, but I didn\\'t know enough math to do anything interesting of that type. So I\\'m not surprised I can\\'t remember any programs I wrote, because they can\\'t have done much. My clearest memory is of the moment I learned it was possible for programs not to terminate, when one of mine didn\\'t. On a machine without time-sharing, this was a social as well as a technical error, as the data center manager\\'s expression made clear.\\n\\nWith microcomputers, everything changed.'"
             ]
           },
-          "execution_count": 8,
+          "execution_count": null,
           "metadata": {},
           "output_type": "execute_result"
         }
@@ -320,16 +312,16 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 9,
+      "execution_count": null,
       "metadata": {},
       "outputs": [
         {
           "data": {
             "text/plain": [
-              "\"It felt like I was doing life right. I remember that because I was slightly dismayed at how novel it felt. The good news is that I had more moments like this over the next few years.\\n\\nIn the summer of 2016 we moved to England. We wanted our kids to see what it was like living in another country, and since I was a British citizen by birth, that seemed the obvious choice. We only meant to stay for a year, but we liked it so much that we still live there. So most of Bel was written in England.\\n\\nIn the fall of 2019, Bel was finally finished. Like McCarthy's original Lisp, it's a spec rather than an implementation, although like McCarthy's Lisp it's a spec expressed as code.\\n\\nNow that I could write essays again, I wrote a bunch about topics I'd had stacked up. I kept writing essays through 2020, but I also started to think about other things I could work on. How should I choose what to do? Well, how had I chosen what to work on in the past? I wrote an essay for myself to answer that question, and I was surprised how long and messy the answer turned out to be. If this surprised me, who'd lived it, then I thought perhaps it would be interesting to other people, and encouraging to those with similarly messy lives. So I wrote a more detailed version for others to read, and this is the last sentence of it.\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nNotes\\n\\n[1] My experience skipped a step in the evolution of computers: time-sharing machines with interactive OSes. I went straight from batch processing to microcomputers, which made microcomputers seem all the more exciting.\\n\\n[2] Italian words for abstract concepts can nearly always be predicted from their English cognates (except for occasional traps like polluzione). It's the everyday words that differ. So if you string together a lot of abstract concepts with a few simple verbs, you can make a little Italian go a long way.\\n\\n[3] I lived at Piazza San Felice 4, so my walk to the Accademia went straight down the spine of old Florence: past the Pitti, across the bridge, past Orsanmichele, between the Duomo and the Baptistery, and then up Via Ricasoli to Piazza San Marco.\""
+              "\"I also worked on spam filters, and did some more painting. I used to have dinners for a group of friends every thursday night, which taught me how to cook for groups. And I bought another building in Cambridge, a former candy factory (and later, twas said, porn studio), to use as an office.\\n\\nOne night in October 2003 there was a big party at my house. It was a clever idea of my friend Maria Daniels, who was one of the thursday diners. Three separate hosts would all invite their friends to one party. So for every guest, two thirds of the other guests would be people they didn't know but would probably like. One of the guests was someone I didn't know but would turn out to like a lot: a woman called Jessica Livingston. A couple days later I asked her out.\\n\\nJessica was in charge of marketing at a Boston investment bank. This bank thought it understood startups, but over the next year, as she met friends of mine from the startup world, she was surprised how different reality was. And how colorful their stories were. So she decided to compile a book of interviews with startup founders.\\n\\nWhen the bank had financial problems and she had to fire half her staff, she started looking for a new job. In early 2005 she interviewed for a marketing job at a Boston VC firm. It took them weeks to make up their minds, and during this time I started telling her about all the things that needed to be fixed about venture capital. They should make a larger number of smaller investments instead of a handful of giant ones, they should be funding younger, more technical founders instead of MBAs, they should let the founders remain as CEO, and so on.\\n\\nOne of my tricks for writing essays had always been to give talks. The prospect of having to stand up in front of a group of people and tell them something that won't waste their time is a great spur to the imagination. When the Harvard Computer Society, the undergrad computer club, asked me to give a talk, I decided I would tell them how to start a startup. Maybe they'd be able to avoid the worst of the mistakes we'd made.\""
             ]
           },
-          "execution_count": 9,
+          "execution_count": null,
           "metadata": {},
           "output_type": "execute_result"
         }
@@ -381,7 +373,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 10,
+      "execution_count": null,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/"
@@ -394,7 +386,7 @@
           "name": "stderr",
           "output_type": "stream",
           "text": [
-            "100%|██████████| 58/58 [06:26<00:00,  6.67s/it]\n"
+            "100%|██████████| 61/61 [01:24<00:00,  1.39s/it]\n"
           ]
         }
       ],
@@ -431,7 +423,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 11,
+      "execution_count": null,
       "metadata": {
         "id": "fV9IdnwLM_aw"
       },
@@ -461,7 +453,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 12,
+      "execution_count": null,
       "metadata": {
         "id": "H6V_LCxrPQzp"
       },
@@ -474,7 +466,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 13,
+      "execution_count": null,
       "metadata": {
         "id": "NYFgmnpRPX-x"
       },
@@ -495,7 +487,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 14,
+      "execution_count": null,
       "metadata": {
         "id": "S9T268MhRNxp"
       },
@@ -518,69 +510,7 @@
         "        {\"Retriever Name\": [name], \"Hit Rate\": [hit_rate], \"MRR\": [mrr]}\n",
         "    )\n",
         "\n",
-        "    return metric_df"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 15,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 81
-        },
-        "id": "A1eESYN-RRgl",
-        "outputId": "ff27adb0-d189-4b7d-8998-6df15b6a2014"
-      },
-      "outputs": [
-        {
-          "data": {
-            "text/html": [
-              "<div>\n",
-              "<style scoped>\n",
-              "    .dataframe tbody tr th:only-of-type {\n",
-              "        vertical-align: middle;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe tbody tr th {\n",
-              "        vertical-align: top;\n",
-              "    }\n",
-              "\n",
-              "    .dataframe thead th {\n",
-              "        text-align: right;\n",
-              "    }\n",
-              "</style>\n",
-              "<table border=\"1\" class=\"dataframe\">\n",
-              "  <thead>\n",
-              "    <tr style=\"text-align: right;\">\n",
-              "      <th></th>\n",
-              "      <th>Retriever Name</th>\n",
-              "      <th>Hit Rate</th>\n",
-              "      <th>MRR</th>\n",
-              "    </tr>\n",
-              "  </thead>\n",
-              "  <tbody>\n",
-              "    <tr>\n",
-              "      <th>0</th>\n",
-              "      <td>OpenAI Embedding Retriever</td>\n",
-              "      <td>0.758621</td>\n",
-              "      <td>0.62069</td>\n",
-              "    </tr>\n",
-              "  </tbody>\n",
-              "</table>\n",
-              "</div>"
-            ],
-            "text/plain": [
-              "               Retriever Name  Hit Rate      MRR\n",
-              "0  OpenAI Embedding Retriever  0.758621  0.62069"
-            ]
-          },
-          "execution_count": 15,
-          "metadata": {},
-          "output_type": "execute_result"
-        }
-      ],
-      "source": [
+        "    return metric_df\n",
         "display_results(\"OpenAI Embedding Retriever\", eval_results)"
       ]
     },
@@ -608,14 +538,13 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 16,
+      "execution_count": null,
       "metadata": {
         "id": "-zMMJAQvRS8H"
       },
       "outputs": [],
       "source": [
         "# Get the list of queries from the above created dataset\n",
-        "\n",
         "queries = list(qa_dataset.queries.values())"
       ]
     },
@@ -643,26 +572,9 @@
         "id": "e3ITPhWVrjvP"
       },
       "source": [
-        "We will use `gpt-3.5-turbo` for generating response for a given query and `gpt-4` for evaluation.\n",
+        "We will use `gpt-4o-mini` for generating response for a given query and `gpt-4` for evaluation.\n",
         "\n",
-        "Let's create service_context seperately for `gpt-3.5-turbo` and `gpt-4`."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 17,
-      "metadata": {
-        "id": "t-yuVS1iv84q"
-      },
-      "outputs": [],
-      "source": [
-        "# gpt-3.5-turbo\n",
-        "gpt35 = OpenAI(temperature=0, model=\"gpt-3.5-turbo\")\n",
-        "service_context_gpt35 = ServiceContext.from_defaults(llm=gpt35)\n",
-        "\n",
-        "# gpt-4\n",
-        "gpt4 = OpenAI(temperature=0, model=\"gpt-4\")\n",
-        "service_context_gpt4 = ServiceContext.from_defaults(llm=gpt4)"
+        "Let's create service_context seperately for `gpt-4o-mini` and `gpt-4`."
       ]
     },
     {
@@ -671,19 +583,23 @@
         "id": "mXdRv7pIt8nw"
       },
       "source": [
-        "Create a `QueryEngine` with `gpt-3.5-turbo` service_context to generate response for the query."
+        "Create a `QueryEngine` with `gpt-4o-mini` service_context to generate response for the query."
       ]
     },
     {
       "cell_type": "code",
-      "execution_count": 18,
+      "execution_count": null,
       "metadata": {
         "id": "lrrD5n6w3Oet"
       },
       "outputs": [],
       "source": [
-        "vector_index = VectorStoreIndex(nodes, service_context = service_context_gpt35)\n",
-        "query_engine = vector_index.as_query_engine()"
+        "# gpt-4o-mini\n",
+        "gpt4mini = OpenAI(temperature=0, model=\"gpt-4o-mini\")\n",
+        "Settings.llm = gpt4mini\n",
+        "\n",
+        "vector_index = VectorStoreIndex(nodes)\n",
+        "query_engine_4mini = vector_index.as_query_engine()"
       ]
     },
     {
@@ -699,14 +615,16 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 19,
+      "execution_count": null,
       "metadata": {
         "id": "dbvXvcFnU09s"
       },
       "outputs": [],
       "source": [
-        "from llama_index.evaluation import FaithfulnessEvaluator\n",
-        "faithfulness_gpt4 = FaithfulnessEvaluator(service_context=service_context_gpt4)"
+        "from llama_index.core.evaluation import FaithfulnessEvaluator\n",
+        "\n",
+        "gpt4o = OpenAI(temperature=0, model=\"gpt-4o\")\n",
+        "faithfulness_gpt4 = FaithfulnessEvaluator(llm=gpt4o)"
       ]
     },
     {
@@ -721,7 +639,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 20,
+      "execution_count": null,
       "metadata": {
         "id": "9lfyhUuDz6cd"
       },
@@ -729,10 +647,10 @@
         {
           "data": {
             "text/plain": [
-              "\"Based on the author's experience and observations, why did he consider the AI practices during his first year of grad school as a hoax? Provide specific examples from the text to support your answer.\""
+              "'What realization did the author come to during their first year of grad school regarding the limitations of AI programs like SHRDLU, and how did this influence their academic focus?'"
             ]
           },
-          "execution_count": 20,
+          "execution_count": null,
           "metadata": {},
           "output_type": "execute_result"
         }
@@ -753,29 +671,28 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 21,
+      "execution_count": null,
       "metadata": {},
       "outputs": [],
       "source": [
-        "response_vector = query_engine.query(eval_query)"
+        "response_vector = query_engine_4mini.query(eval_query)"
       ]
     },
     {
       "cell_type": "code",
-      "execution_count": 22,
+      "execution_count": null,
       "metadata": {
         "id": "MZ6lvmRf3j8i"
       },
       "outputs": [],
       "source": [
         "# Compute faithfulness evaluation\n",
-        "\n",
         "eval_result = faithfulness_gpt4.evaluate_response(response=response_vector)"
       ]
     },
     {
       "cell_type": "code",
-      "execution_count": 23,
+      "execution_count": null,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/"
@@ -783,18 +700,7 @@
         "id": "Jj79Rq-gn3cv",
         "outputId": "5078aeb7-c620-45d6-dc1f-215e716f4e59"
       },
-      "outputs": [
-        {
-          "data": {
-            "text/plain": [
-              "True"
-            ]
-          },
-          "execution_count": 23,
-          "metadata": {},
-          "output_type": "execute_result"
-        }
-      ],
+      "outputs": [],
       "source": [
         "# You can check passing parameter in eval_result if it passed the evaluation.\n",
         "eval_result.passing"
@@ -829,15 +735,15 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 24,
+      "execution_count": null,
       "metadata": {
         "id": "Qw5X_hMB24kC"
       },
       "outputs": [],
       "source": [
-        "from llama_index.evaluation import RelevancyEvaluator\n",
+        "from llama_index.core.evaluation import RelevancyEvaluator\n",
         "\n",
-        "relevancy_gpt4 = RelevancyEvaluator(service_context=service_context_gpt4)"
+        "relevancy_gpt4 = RelevancyEvaluator(llm=gpt4o)"
       ]
     },
     {
@@ -851,16 +757,16 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 25,
+      "execution_count": null,
       "metadata": {},
       "outputs": [
         {
           "data": {
             "text/plain": [
-              "\"Based on the author's experience and observations, why did he consider the AI practices during his first year of grad school as a hoax? Provide specific examples from the text to support your answer.\""
+              "'What realization did the author come to during their first year of grad school regarding the limitations of AI programs like SHRDLU, and how did this influence their academic focus?'"
             ]
           },
-          "execution_count": 25,
+          "execution_count": null,
           "metadata": {},
           "output_type": "execute_result"
         }
@@ -874,7 +780,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 26,
+      "execution_count": null,
       "metadata": {
         "id": "r9FwcImG3cV0"
       },
@@ -882,7 +788,7 @@
       "source": [
         "# Generate response.\n",
         "# response_vector has response and source nodes (retrieved context)\n",
-        "response_vector = query_engine.query(query)\n",
+        "response_vector = query_engine_4mini.query(query)\n",
         "\n",
         "# Relevancy evaluation\n",
         "eval_result = relevancy_gpt4.evaluate_response(\n",
@@ -892,7 +798,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 27,
+      "execution_count": null,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/"
@@ -900,18 +806,7 @@
         "id": "71j-t0DX3gh4",
         "outputId": "087ca15f-ac6f-449a-8f48-ef257a6d4b0d"
       },
-      "outputs": [
-        {
-          "data": {
-            "text/plain": [
-              "True"
-            ]
-          },
-          "execution_count": 27,
-          "metadata": {},
-          "output_type": "execute_result"
-        }
-      ],
+      "outputs": [],
       "source": [
         "# You can check passing parameter in eval_result if it passed the evaluation.\n",
         "eval_result.passing"
@@ -919,7 +814,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 28,
+      "execution_count": null,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/",
@@ -928,18 +823,7 @@
         "id": "cW5-6T67w_VF",
         "outputId": "5051e37d-f506-4e2f-885a-2ed2ee4cfac7"
       },
-      "outputs": [
-        {
-          "data": {
-            "text/plain": [
-              "'YES'"
-            ]
-          },
-          "execution_count": 28,
-          "metadata": {},
-          "output_type": "execute_result"
-        }
-      ],
+      "outputs": [],
       "source": [
         "# You can get the feedback for the evaluation.\n",
         "eval_result.feedback"
@@ -958,13 +842,13 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 29,
+      "execution_count": null,
       "metadata": {
         "id": "-t6Hxrc93jla"
       },
       "outputs": [],
       "source": [
-        "from llama_index.evaluation import BatchEvalRunner\n",
+        "from llama_index.core.evaluation import BatchEvalRunner\n",
         "\n",
         "# Let's pick top 10 queries to do evaluation\n",
         "batch_eval_queries = queries[:10]\n",
@@ -972,18 +856,18 @@
         "# Initiate BatchEvalRunner to compute FaithFulness and Relevancy Evaluation.\n",
         "runner = BatchEvalRunner(\n",
         "    {\"faithfulness\": faithfulness_gpt4, \"relevancy\": relevancy_gpt4},\n",
-        "    workers=8,\n",
+        "    workers=1,\n",
         ")\n",
         "\n",
         "# Compute evaluation\n",
         "eval_results = await runner.aevaluate_queries(\n",
-        "    query_engine, queries=batch_eval_queries\n",
+        "    query_engine_4mini, queries=batch_eval_queries\n",
         ")"
       ]
     },
     {
       "cell_type": "code",
-      "execution_count": 30,
+      "execution_count": null,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/"
@@ -991,18 +875,7 @@
         "id": "cAxrc5NF4T1r",
         "outputId": "f80c105c-9d4b-4e10-8707-e4bad2bed9c0"
       },
-      "outputs": [
-        {
-          "data": {
-            "text/plain": [
-              "1.0"
-            ]
-          },
-          "execution_count": 30,
-          "metadata": {},
-          "output_type": "execute_result"
-        }
-      ],
+      "outputs": [],
       "source": [
         "# Let's get faithfulness score\n",
         "\n",
@@ -1013,7 +886,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 31,
+      "execution_count": null,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/"
@@ -1021,18 +894,7 @@
         "id": "AGU3_QHW4ajS",
         "outputId": "0e67a5f7-da94-40c4-8aa0-cd8874bb7ae9"
       },
-      "outputs": [
-        {
-          "data": {
-            "text/plain": [
-              "1.0"
-            ]
-          },
-          "execution_count": 31,
-          "metadata": {},
-          "output_type": "execute_result"
-        }
-      ],
+      "outputs": [],
       "source": [
         "# Let's get relevancy score\n",
         "\n",
@@ -1079,7 +941,7 @@
       "provenance": []
     },
     "kernelspec": {
-      "display_name": "Python 3",
+      "display_name": "base",
       "language": "python",
       "name": "python3"
     },
@@ -1093,12 +955,7 @@
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
-      "version": "3.11.3 (main, Apr  7 2023, 19:08:44) [Clang 13.0.0 (clang-1300.0.29.30)]"
-    },
-    "vscode": {
-      "interpreter": {
-        "hash": "b0fa6594d8f4cbf19f97940f81e996739fb7646882a419484c72d19e05852a7e"
-      }
+      "version": "3.10.16"
     }
   },
   "nbformat": 4,
diff --git a/examples/third_party/financial_document_analysis_with_llamaindex.ipynb b/examples/third_party/financial_document_analysis_with_llamaindex.ipynb
index b68c4da3eb..8fea4df9eb 100644
--- a/examples/third_party/financial_document_analysis_with_llamaindex.ipynb
+++ b/examples/third_party/financial_document_analysis_with_llamaindex.ipynb
@@ -95,7 +95,7 @@
    },
    "outputs": [],
    "source": [
-    "!pip install llama-index pypdf"
+    "!pip install llama-index==0.12.8 pypdf"
    ]
   },
   {
@@ -109,20 +109,21 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": null,
    "id": "09fbec4c-1864-4d76-9dbf-3d213ba58fc8",
    "metadata": {
     "tags": []
    },
    "outputs": [],
    "source": [
-    "from langchain import OpenAI\n",
-    "\n",
-    "from llama_index import SimpleDirectoryReader, ServiceContext, VectorStoreIndex\n",
-    "from llama_index import set_global_service_context\n",
-    "from llama_index.response.pprint_utils import pprint_response\n",
-    "from llama_index.tools import QueryEngineTool, ToolMetadata\n",
-    "from llama_index.query_engine import SubQuestionQueryEngine"
+    "import os\n",
+    "from llama_index.llms.openai import OpenAI\n",
+    "from llama_index.core.readers import SimpleDirectoryReader\n",
+    "from llama_index.core.indices import VectorStoreIndex\n",
+    "from llama_index.core.tools import QueryEngineTool, ToolMetadata\n",
+    "from llama_index.core.query_engine import SubQuestionQueryEngine\n",
+    "from llama_index.core.settings import Settings\n",
+    "from llama_index.embeddings.openai.base import OpenAIEmbedding"
    ]
   },
   {
@@ -134,41 +135,42 @@
    },
    "source": [
     "Before we start, we can configure the LLM provider and model that will power our RAG system.  \n",
-    "Here, we pick `gpt-3.5-turbo-instruct` from OpenAI.  "
+    "Here, we pick `gpt-4o-mini` from OpenAI.  "
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": null,
    "id": "c4ec8b0a-d5fa-4f74-a2cc-5cc52e009bc6",
    "metadata": {
     "tags": []
    },
    "outputs": [],
    "source": [
-    "llm = OpenAI(temperature=0, model_name=\"gpt-3.5-turbo-instruct\", max_tokens=-1)"
+    "os.environ['OPENAI_API_KEY'] = 'YOUR OPENAI API KEY'\n",
+    "llm = OpenAI(temperature=0, model_name=\"gpt-4o-mini\")"
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
-   "id": "c2810e8c-1c88-49f5-aada-c49eccded166",
+   "id": "6cb32a50",
    "metadata": {},
    "source": [
-    "We construct a `ServiceContext` and set it as the global default, so all subsequent operations that depends on LLM calls will use the model we configured here."
+    "Set the LLM to the previously configured OpenAI model and the embedding model to the OpenAI embedding model using `Settings`."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
+   "execution_count": null,
    "id": "05e016f9-2055-4885-8416-cc3aa2968242",
    "metadata": {
     "tags": []
    },
    "outputs": [],
    "source": [
-    "service_context = ServiceContext.from_defaults(llm=llm)\n",
-    "set_global_service_context(service_context=service_context)"
+    "Settings.llm = llm\n",
+    "Settings.embed_model = OpenAIEmbedding()\n",
+    "Settings.chunk_size = 1024"
    ]
   },
   {
@@ -198,7 +200,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": null,
    "id": "dd0ba028-1e70-4164-8af1-5f1df0ea76a9",
    "metadata": {
     "tags": []
@@ -211,7 +213,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 31,
+   "execution_count": null,
    "id": "d026ef11-ebc5-4ec3-9aab-8e065cd7f8a9",
    "metadata": {
     "tags": []
@@ -244,7 +246,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 33,
+   "execution_count": null,
    "id": "1e0b6e4c-2255-42cf-be88-0fe75a945d85",
    "metadata": {
     "tags": []
@@ -282,7 +284,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 34,
+   "execution_count": null,
    "id": "82466534-c3d8-4619-ab1b-4abcd05c8ba7",
    "metadata": {
     "tags": []
@@ -294,7 +296,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 35,
+   "execution_count": null,
    "id": "ff449977-2c7c-433f-b303-ff1d7b66c7b3",
    "metadata": {
     "tags": []
@@ -317,7 +319,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 79,
+   "execution_count": null,
    "id": "18df061f-238d-4a27-8fd6-1037b0098ae8",
    "metadata": {
     "tags": []
@@ -329,7 +331,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 80,
+   "execution_count": null,
    "id": "0e2ab622-e76f-43b6-aea3-122c8a6946de",
    "metadata": {
     "tags": []
@@ -339,8 +341,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\n",
-      "$3,208.3 million (page 63)\n"
+      "The revenue of Lyft in 2021 was $3.21 billion. (Page reference: 79)\n"
      ]
     }
    ],
@@ -350,7 +351,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 81,
+   "execution_count": null,
    "id": "2e101199-454b-4aca-913b-20c9631909b8",
    "metadata": {
     "tags": []
@@ -362,7 +363,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 82,
+   "execution_count": null,
    "id": "82b9cced-f7cf-49e4-965a-ee7c45baae7f",
    "metadata": {
     "tags": []
@@ -372,8 +373,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\n",
-      "$17,455 (page 53)\n"
+      "The revenue of Uber in 2021 was $17,455 million. (Page reference: 98)\n"
      ]
     }
    ],
@@ -408,7 +408,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 83,
+   "execution_count": null,
    "id": "8775650f-b164-478c-8129-9a8e6a0cdc97",
    "metadata": {
     "tags": []
@@ -442,7 +442,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 84,
+   "execution_count": null,
    "id": "edd4bbb7-eef9-4b53-b05d-f91033635ac2",
    "metadata": {
     "tags": []
@@ -453,19 +453,14 @@
      "output_type": "stream",
      "text": [
       "Generated 4 sub questions.\n",
-      "\u001b[36;1m\u001b[1;3m[uber_10k] Q: What customer segments grew the fastest for Uber\n",
-      "\u001b[0m\u001b[36;1m\u001b[1;3m[uber_10k] A: in 2021?\n",
-      "\n",
-      "The customer segments that grew the fastest for Uber in 2021 were its Mobility Drivers, Couriers, Riders, and Eaters. These segments experienced growth due to the continued stay-at-home order demand related to COVID-19, as well as Uber's introduction of its Uber One, Uber Pass, Eats Pass, and Rides Pass membership programs. Additionally, Uber's marketplace-centric advertising helped to connect merchants and brands with its platform network, further driving growth.\n",
-      "\u001b[0m\u001b[33;1m\u001b[1;3m[uber_10k] Q: What geographies grew the fastest for Uber\n",
-      "\u001b[0m\u001b[33;1m\u001b[1;3m[uber_10k] A: \n",
-      "Based on the context information, it appears that Uber experienced the most growth in large metropolitan areas, such as Chicago, Miami, New York City, Sao Paulo, and London. Additionally, Uber experienced growth in suburban and rural areas, as well as in countries such as Argentina, Germany, Italy, Japan, South Korea, and Spain.\n",
-      "\u001b[0m\u001b[38;5;200m\u001b[1;3m[lyft_10k] Q: What customer segments grew the fastest for Lyft\n",
-      "\u001b[0m\u001b[38;5;200m\u001b[1;3m[lyft_10k] A: \n",
-      "The customer segments that grew the fastest for Lyft were ridesharing, light vehicles, and public transit. Ridesharing grew as Lyft was able to predict demand and proactively incentivize drivers to be available for rides in the right place at the right time. Light vehicles grew as users were looking for options that were more active, usually lower-priced, and often more efficient for short trips during heavy traffic. Public transit grew as Lyft integrated third-party public transit data into the Lyft App to offer users a robust view of transportation options around them.\n",
-      "\u001b[0m\u001b[32;1m\u001b[1;3m[lyft_10k] Q: What geographies grew the fastest for Lyft\n",
-      "\u001b[0m\u001b[32;1m\u001b[1;3m[lyft_10k] A: \n",
-      "It is not possible to answer this question with the given context information.\n",
+      "\u001b[1;3;38;2;237;90;200m[lyft_10k] Q: What were the customer segments that grew the fastest for Lyft in 2021?\n",
+      "\u001b[0m\u001b[1;3;38;2;90;149;237m[lyft_10k] Q: What were the geographies that grew the fastest for Lyft in 2021?\n",
+      "\u001b[0m\u001b[1;3;38;2;11;159;203m[uber_10k] Q: What were the customer segments that grew the fastest for Uber in 2021?\n",
+      "\u001b[0m\u001b[1;3;38;2;155;135;227m[uber_10k] Q: What were the geographies that grew the fastest for Uber in 2021?\n",
+      "\u001b[0m\u001b[1;3;38;2;155;135;227m[uber_10k] A: Chicago, Miami, New York City in the United States, Sao Paulo in Brazil, and London in the United Kingdom were the geographies that grew the fastest for Uber in 2021.\n",
+      "\u001b[0m\u001b[1;3;38;2;11;159;203m[uber_10k] A: The customer segments that grew the fastest for Uber in 2021 were the membership programs, specifically Uber One, Uber Pass, Eats Pass, and Rides Pass.\n",
+      "\u001b[0m\u001b[1;3;38;2;90;149;237m[lyft_10k] A: The geographies that grew the fastest for Lyft in 2021 were those where vaccines were more widely distributed and communities fully reopened, resulting in a 36% increase in revenue compared to the prior year. Additionally, the number of Active Riders increased by 49.2% in the fourth quarter of 2021 compared to the fourth quarter of 2020.\n",
+      "\u001b[0m\u001b[1;3;38;2;237;90;200m[lyft_10k] A: The customer segment that grew the fastest for Lyft in 2021 was the number of Active Riders, which increased by 49.2% in the fourth quarter of 2021 compared to the fourth quarter of 2020.\n",
       "\u001b[0m"
      ]
     }
@@ -476,7 +471,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 85,
+   "execution_count": null,
    "id": "b631d68b-dd17-4afd-9ed7-da0131041c8b",
    "metadata": {
     "tags": []
@@ -486,12 +481,9 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "The customer segments that grew the fastest for Lyft in 2021 were the Active Riders, with a notable increase of 49.2% in the fourth quarter of 2021 compared to the same period in 2020. On the other hand, Uber experienced the fastest growth in membership programs such as Uber One, Uber Pass, Eats Pass, and Rides Pass.\n",
       "\n",
-      "The customer segments that grew the fastest for Uber in 2021 were its Mobility Drivers, Couriers, Riders, and Eaters. These segments experienced growth due to the continued stay-at-home order demand related to COVID-19, as well as Uber's introduction of its Uber One, Uber Pass, Eats Pass, and Rides Pass membership programs. Additionally, Uber's marketplace-centric advertising helped to connect merchants and brands with its platform network, further driving growth. Uber experienced the most growth in large metropolitan areas, such as Chicago, Miami, New York City, Sao Paulo, and London. Additionally, Uber experienced growth in suburban and rural areas, as well as in countries such as Argentina, Germany, Italy, Japan, South Korea, and Spain.\n",
-      "\n",
-      "The customer segments that grew the fastest for Lyft were ridesharing, light vehicles, and public transit. Ridesharing grew as Lyft was able to predict demand and proactively incentivize drivers to be available for rides in the right place at the right time. Light vehicles grew as users were looking for options that were more active, usually lower-priced, and often more efficient for short trips during heavy traffic. Public transit grew as Lyft integrated third-party public transit data into the Lyft App to offer users a robust view of transportation options around them. It is not possible to answer the question of which geographies grew the fastest for Lyft with the given context information.\n",
-      "\n",
-      "In summary, Uber and Lyft both experienced growth in customer segments related to mobility, couriers, riders, and eaters. Uber experienced the most growth in large metropolitan areas, as well as in suburban and rural areas, and in countries such as Argentina, Germany, Italy, Japan, South Korea, and Spain. Lyft experienced the most growth in ridesharing, light vehicles, and public transit. It is not possible to answer the question of which geographies grew the fastest for Lyft with the given context information.\n"
+      "In terms of geographies, Lyft saw the fastest growth in areas where vaccines were widely distributed and communities fully reopened, leading to a 36% increase in revenue compared to the previous year. For Uber, the fastest-growing geographies in 2021 were Chicago, Miami, New York City in the United States, Sao Paulo in Brazil, and London in the United Kingdom.\n"
      ]
     }
    ],
@@ -501,7 +493,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 86,
+   "execution_count": null,
    "id": "6bbbdd5b-0076-48c8-b233-e2ba43d7a6de",
    "metadata": {
     "tags": []
@@ -511,13 +503,15 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated 2 sub questions.\n",
-      "\u001b[36;1m\u001b[1;3m[uber_10k] Q: What is the revenue growth of Uber from 2020 to 2021\n",
-      "\u001b[0m\u001b[36;1m\u001b[1;3m[uber_10k] A: \n",
-      "The revenue growth of Uber from 2020 to 2021 was 57%, or 54% on a constant currency basis.\n",
-      "\u001b[0m\u001b[33;1m\u001b[1;3m[lyft_10k] Q: What is the revenue growth of Lyft from 2020 to 2021\n",
-      "\u001b[0m\u001b[33;1m\u001b[1;3m[lyft_10k] A: \n",
-      "The revenue growth of Lyft from 2020 to 2021 is 36%, increasing from $2,364,681 thousand to $3,208,323 thousand.\n",
+      "Generated 4 sub questions.\n",
+      "\u001b[1;3;38;2;237;90;200m[uber_10k] Q: What was the revenue of Uber in 2020?\n",
+      "\u001b[0m\u001b[1;3;38;2;90;149;237m[uber_10k] Q: What was the revenue of Uber in 2021?\n",
+      "\u001b[0m\u001b[1;3;38;2;11;159;203m[lyft_10k] Q: What was the revenue of Lyft in 2020?\n",
+      "\u001b[0m\u001b[1;3;38;2;155;135;227m[lyft_10k] Q: What was the revenue of Lyft in 2021?\n",
+      "\u001b[0m\u001b[1;3;38;2;90;149;237m[uber_10k] A: $17,455\n",
+      "\u001b[0m\u001b[1;3;38;2;237;90;200m[uber_10k] A: The revenue of Uber in 2020 was $11,139 million.\n",
+      "\u001b[0m\u001b[1;3;38;2;155;135;227m[lyft_10k] A: The revenue of Lyft in 2021 was $3,208,323,000.\n",
+      "\u001b[0m\u001b[1;3;38;2;11;159;203m[lyft_10k] A: Lyft's revenue in 2020 was $2,364,681.\n",
       "\u001b[0m"
      ]
     }
@@ -528,7 +522,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 87,
+   "execution_count": null,
    "id": "fadf421e-5938-4031-81df-cfbfd347b674",
    "metadata": {
     "tags": []
@@ -538,8 +532,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\n",
-      "The revenue growth of Uber from 2020 to 2021 was 57%, or 54% on a constant currency basis, while the revenue growth of Lyft from 2020 to 2021 was 36%. This means that Uber had a higher revenue growth than Lyft from 2020 to 2021.\n"
+      "The revenue growth of Uber from 2020 to 2021 was $6,316 million, while the revenue growth of Lyft during the same period was $843,642,000.\n"
      ]
     }
    ],
@@ -550,7 +543,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "base",
    "language": "python",
    "name": "python3"
   },
@@ -564,7 +557,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.16"
+   "version": "3.10.16"
   }
  },
  "nbformat": 4,