diff --git "a/notebooks/07-RAG_Improve_Chunking.ipynb" "b/notebooks/07-RAG_Improve_Chunking.ipynb"
--- "a/notebooks/07-RAG_Improve_Chunking.ipynb"
+++ "b/notebooks/07-RAG_Improve_Chunking.ipynb"
@@ -1,1121 +1,300 @@
 {
-  "cells": [
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "colab_type": "text",
-        "id": "view-in-github"
-      },
-      "source": [
-        "<a href=\"https://colab.research.google.com/github/towardsai/ai-tutor-rag-system/blob/main/notebooks/07-RAG_Improve_Chunking.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "-zE1h0uQV7uT"
-      },
-      "source": [
-        "# Install Packages and Setup Variables"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 1,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "QPJzr-I9XQ7l",
-        "outputId": "1b699f15-bd3f-473d-dd37-74257e6d263e"
-      },
-      "outputs": [],
-      "source": [
-        "!pip install -q llama-index==0.9.21 openai==1.6.0 tiktoken==0.5.2 chromadb==0.4.21 kaleido==0.2.1 python-multipart==0.0.6 cohere==4.39"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 2,
-      "metadata": {
-        "id": "riuXwpSPcvWC"
-      },
-      "outputs": [],
-      "source": [
-        "import os\n",
-        "\n",
-        "# Set the \"OPENAI_API_KEY\" in the Python environment. Will be used by OpenAI client later.\n",
-        "os.environ[\"OPENAI_API_KEY\"] = \"\""
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 3,
-      "metadata": {
-        "id": "jIEeZzqLbz0J"
-      },
-      "outputs": [],
-      "source": [
-        "import nest_asyncio\n",
-        "\n",
-        "nest_asyncio.apply()"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "Bkgi2OrYzF7q"
-      },
-      "source": [
-        "# Load a Model"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 4,
-      "metadata": {
-        "id": "9oGT6crooSSj"
-      },
-      "outputs": [],
-      "source": [
-        "from llama_index.llms import OpenAI\n",
-        "\n",
-        "llm = OpenAI(temperature=0.9, model=\"gpt-3.5-turbo\", max_tokens=512)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "0BwVuJXlzHVL"
-      },
-      "source": [
-        "# Create a VectoreStore"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 5,
-      "metadata": {
-        "id": "SQP87lHczHKc"
-      },
-      "outputs": [],
-      "source": [
-        "import chromadb\n",
-        "\n",
-        "# create client and a new collection\n",
-        "# chromadb.EphemeralClient saves data in-memory.\n",
-        "chroma_client = chromadb.PersistentClient(path=\"./mini-llama-articles\")\n",
-        "chroma_collection = chroma_client.create_collection(\"mini-llama-articles\")"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 6,
-      "metadata": {
-        "id": "zAaGcYMJzHAN"
-      },
-      "outputs": [],
-      "source": [
-        "from llama_index.vector_stores import ChromaVectorStore\n",
-        "\n",
-        "# Define a storage context object using the created vector database.\n",
-        "vector_store = ChromaVectorStore(chroma_collection=chroma_collection)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "I9JbAzFcjkpn"
-      },
-      "source": [
-        "# Load the Dataset (CSV)"
-      ]
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "provenance": [],
+      "authorship_tag": "ABX9TyMPh4RbxOzA/0Wh6s+3gc9P",
+      "include_colab_link": true
     },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "ceveDuYdWCYk"
-      },
-      "source": [
-        "## Download"
-      ]
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
     },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "eZwf6pv7WFmD"
-      },
-      "source": [
-        "The dataset includes several articles from the TowardsAI blog, which provide an in-depth explanation of the LLaMA2 model. Read the dataset as a long string."
-      ]
+    "language_info": {
+      "name": "python"
     },
-    {
-      "cell_type": "code",
-      "execution_count": 7,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
+    "widgets": {
+      "application/vnd.jupyter.widget-state+json": {
+        "7a469b6821ed458d99a1ed57e72b3d68": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HBoxModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HBoxModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HBoxView",
+            "box_style": "",
+            "children": [
+              "IPY_MODEL_8c556c8c8ce941c6b433780fd4a6ae54",
+              "IPY_MODEL_626b1ba98c374987913a7a4384f19fa1",
+              "IPY_MODEL_a4fad4d11a8941f8b90abb3099e9a090"
+            ],
+            "layout": "IPY_MODEL_c3a4b958e4814294801495226697bce2"
+          }
         },
-        "id": "wl_pbPvMlv1h",
-        "outputId": "38f73ac6-b824-4a5b-9385-e7b1afbd2cc8"
-      },
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current\n",
-            "                                 Dload  Upload   Total   Spent    Left  Speed\n",
-            "100  169k  100  169k    0     0   868k      0 --:--:-- --:--:-- --:--:--  869k\n"
-          ]
-        }
-      ],
-      "source": [
-        "!wget https://raw.githubusercontent.com/AlaFalaki/tutorial_notebooks/main/data/mini-llama-articles.csv\n",
-        "# !curl -o ./mini-llama-articles.csv https://raw.githubusercontent.com/AlaFalaki/tutorial_notebooks/main/data/mini-llama-articles.csv"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "VWBLtDbUWJfA"
-      },
-      "source": [
-        "## Read File"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 8,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
+        "8c556c8c8ce941c6b433780fd4a6ae54": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_2e939db189424ab7b5f9095932f2c99f",
+            "placeholder": "​",
+            "style": "IPY_MODEL_fd6a36e947ec451a938d266117dab12e",
+            "value": "Parsing nodes: 100%"
+          }
         },
-        "id": "0Q9sxuW0g3Gd",
-        "outputId": "6bd4f786-f888-4d3b-d324-95230ef5f544"
-      },
-      "outputs": [
-        {
-          "data": {
-            "text/plain": [
-              "14"
-            ]
-          },
-          "execution_count": 8,
-          "metadata": {},
-          "output_type": "execute_result"
-        }
-      ],
-      "source": [
-        "import csv\n",
-        "\n",
-        "rows = []\n",
-        "\n",
-        "# Load the file as a JSON\n",
-        "with open(\"./mini-llama-articles.csv\", mode=\"r\", encoding=\"utf-8\") as file:\n",
-        "  csv_reader = csv.reader(file)\n",
-        "\n",
-        "  for idx, row in enumerate( csv_reader ):\n",
-        "    if idx == 0: continue; # Skip header row\n",
-        "    rows.append( row )\n",
-        "\n",
-        "# The number of characters in the dataset.\n",
-        "len( rows )"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "S17g2RYOjmf2"
-      },
-      "source": [
-        "# Convert to Document obj"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 9,
-      "metadata": {
-        "id": "YizvmXPejkJE"
-      },
-      "outputs": [],
-      "source": [
-        "from llama_index import Document\n",
-        "\n",
-        "# Convert the chunks to Document objects so the LlamaIndex framework can process them.\n",
-        "documents = [Document(text=row[1], metadata={\"title\": row[0], \"url\": row[2], \"source_name\": row[3]}) for row in rows]\n",
-        "\n",
-        "# By default, the node/chunks ids are set to random uuids. To ensure same id's per run, we manually set them.\n",
-        "for idx, doc in enumerate(documents):\n",
-        "    doc.id_ = f\"doc_{idx}\""
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "qjuLbmFuWsyl"
-      },
-      "source": [
-        "# Transforming"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 10,
-      "metadata": {
-        "id": "9z3t70DGWsjO"
-      },
-      "outputs": [],
-      "source": [
-        "from llama_index.text_splitter import TokenTextSplitter\n",
-        "from llama_index.schema import BaseNode\n",
-        "import hashlib\n",
-        "\n",
-        "def deterministic_id_func(i: int, doc: BaseNode) -> str:\n",
-        "    \"\"\"Deterministic ID function for the text splitter.\n",
-        "    This will be used to generate a unique repeatable identifier for each node.\"\"\"\n",
-        "    unique_identifier = doc.id_ + str(i)\n",
-        "    hasher = hashlib.sha256()\n",
-        "    hasher.update(unique_identifier.encode('utf-8')) \n",
-        "    return hasher.hexdigest()\n",
-        "\n",
-        "text_splitter = TokenTextSplitter(separator=\" \", chunk_size=512, chunk_overlap=128, id_func=deterministic_id_func)"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 11,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 385,
-          "referenced_widgets": [
-            "7a469b6821ed458d99a1ed57e72b3d68",
-            "8c556c8c8ce941c6b433780fd4a6ae54",
-            "626b1ba98c374987913a7a4384f19fa1",
-            "a4fad4d11a8941f8b90abb3099e9a090",
-            "c3a4b958e4814294801495226697bce2",
-            "2e939db189424ab7b5f9095932f2c99f",
-            "fd6a36e947ec451a938d266117dab12e",
-            "e4413564a300469d86c3abc567f24701",
-            "64167ae99cd24c729435aefc1ea13519",
-            "2634e510d3c844d88891a98661beb6a9",
-            "6b3d2afb949f4de691ceac601bd96d0e",
-            "8cc800fbe6bc4f4da5dd6b93d4a5143a",
-            "812d5d9b04f74592b850b3eb32f88c04",
-            "ed22c91e813c4351ab1d3eb7e174796c",
-            "de2088a425104f05b52b7a3236c7baa9",
-            "6f9f666836084de7894aa2e65c8dbe07",
-            "63a3dcff335349deacf4abb9b68d76ab",
-            "99eb83f4b8904e20b45573bab84aa5f4",
-            "2c8aef5e8ec848c0a23c72581e5f4b1e",
-            "7d54abb8f3784a789fd042c2ed2dd685",
-            "a1a88448b188407b8e4aa2af86fb9345",
-            "6a4cc229f5774cb0b4d3def7eee8b56e"
-          ]
-        },
-        "id": "P9LDJ7o-Wsc-",
-        "outputId": "2e27e965-fd4c-4754-94f5-3a6e33a72dea"
-      },
-      "outputs": [
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "/Users/omar/Documents/ai_repos/ai-tutor-rag-system/env/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
-            "  from .autonotebook import tqdm as notebook_tqdm\n",
-            "Parsing nodes: 100%|██████████| 14/14 [00:00<00:00, 14.26it/s]\n",
-            "100%|██████████| 108/108 [00:39<00:00,  2.70it/s]\n",
-            "100%|██████████| 108/108 [00:54<00:00,  1.99it/s]\n",
-            "100%|██████████| 108/108 [00:28<00:00,  3.82it/s]\n",
-            "Generating embeddings: 100%|██████████| 108/108 [00:02<00:00, 45.21it/s]\n"
-          ]
-        }
-      ],
-      "source": [
-        "from llama_index.extractors import (\n",
-        "    SummaryExtractor,\n",
-        "    QuestionsAnsweredExtractor,\n",
-        "    KeywordExtractor,\n",
-        ")\n",
-        "from llama_index.embeddings import OpenAIEmbedding\n",
-        "from llama_index.ingestion import IngestionPipeline\n",
-        "\n",
-        "pipeline = IngestionPipeline(\n",
-        "    transformations=[\n",
-        "        text_splitter,\n",
-        "        QuestionsAnsweredExtractor(questions=3, llm=llm),\n",
-        "        SummaryExtractor(summaries=[\"prev\", \"self\"], llm=llm),\n",
-        "        KeywordExtractor(keywords=10, llm=llm),\n",
-        "        OpenAIEmbedding(),\n",
-        "    ],\n",
-        "    vector_store=vector_store\n",
-        ")\n",
-        "\n",
-        "nodes = pipeline.run(documents=documents, show_progress=True);"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 12,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "mPGa85hM2P3P",
-        "outputId": "c106c463-2459-4b11-bbae-5bd5e2246011"
-      },
-      "outputs": [
-        {
-          "data": {
-            "text/plain": [
-              "108"
-            ]
-          },
-          "execution_count": 12,
-          "metadata": {},
-          "output_type": "execute_result"
-        }
-      ],
-      "source": [
-        "len( nodes )"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "OWaT6rL7ksp8"
-      },
-      "source": [
-        "# Load Indexes"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 13,
-      "metadata": {
-        "id": "mXi56KTXk2sp"
-      },
-      "outputs": [],
-      "source": [
-        "# Create your index\n",
-        "db = chromadb.PersistentClient(path=\"./mini-llama-articles\")\n",
-        "chroma_collection = db.get_or_create_collection(\"mini-llama-articles\")\n",
-        "vector_store = ChromaVectorStore(chroma_collection=chroma_collection)"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 14,
-      "metadata": {
-        "id": "jKXURvLtkuTS"
-      },
-      "outputs": [],
-      "source": [
-        "# Create your index\n",
-        "from llama_index import VectorStoreIndex\n",
-        "\n",
-        "index = VectorStoreIndex.from_vector_store(vector_store)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "8JPD8yAinVSq"
-      },
-      "source": [
-        "# Query Dataset"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 15,
-      "metadata": {
-        "id": "b0gue7cyctt1"
-      },
-      "outputs": [],
-      "source": [
-        "# Define a query engine that is responsible for retrieving related pieces of text,\n",
-        "# and using a LLM to formulate the final answer.\n",
-        "query_engine = index.as_query_engine()\n",
-        "\n",
-        "res = query_engine.query(\"How many parameters LLaMA2 model has?\")"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 16,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 35
-        },
-        "id": "VKK3jMprctre",
-        "outputId": "3503d4e1-3d1d-4ec2-c593-4eb7306cc370"
-      },
-      "outputs": [
-        {
-          "data": {
-            "text/plain": [
-              "'The Llama 2 model is available in four different sizes: 7 billion, 13 billion, 34 billion, and 70 billion parameters.'"
-            ]
-          },
-          "execution_count": 16,
-          "metadata": {},
-          "output_type": "execute_result"
-        }
-      ],
-      "source": [
-        "res.response"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 17,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "465dH4yQc7Ct",
-        "outputId": "38bbc97b-1a07-427b-d3d4-0a5215b85358"
-      },
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Node ID\t f707756065d1f788b41fb97fcef81979e1fd241dbfa4034a24bec8e57b648482\n",
-            "Title\t Meta's Llama 2: Revolutionizing Open Source Language Models for Commercial Use\n",
-            "Text\t I. Llama 2: Revolutionizing Commercial Use Unlike its predecessor Llama 1, which was limited to research use, Llama 2 represents a major advancement as an open-source commercial model. Businesses can now integrate Llama 2 into products to create AI-powered applications. Availability on Azure and AWS facilitates fine-tuning and adoption. However, restrictions apply to prevent exploitation. Companies with over 700 million active daily users cannot use Llama 2. Additionally, its output cannot be used to improve other language models.  II. Llama 2 Model Flavors Llama 2 is available in four different model sizes: 7 billion, 13 billion, 34 billion, and 70 billion parameters. While 7B, 13B, and 70B have already been released, the 34B model is still awaited. The pretrained variant, trained on a whopping 2 trillion tokens, boasts a context window of 4096 tokens, twice the size of its predecessor Llama 1. Meta also released a Llama 2 fine-tuned model for chat applications that was trained on over 1 million human annotations. Such extensive training comes at a cost, with the 70B model taking a staggering 1720320 GPU hours to train. The context window's length determines the amount of content the model can process at once, making Llama 2 a powerful language model in terms of scale and efficiency.  III. Safety Considerations: A Top Priority for Meta Meta's commitment to safety and alignment shines through in Llama 2's design. The model demonstrates exceptionally low AI safety violation percentages, surpassing even ChatGPT in safety benchmarks. Finding the right balance between helpfulness and safety when optimizing a model poses significant challenges. While a highly helpful model may be capable of answering any question, including sensitive ones like \"How do I build a bomb?\", it also raises concerns about potential misuse. Thus, striking the perfect equilibrium between providing useful information and ensuring safety is paramount. However, prioritizing safety to an extreme extent can lead to a model that struggles to effectively address a diverse range of questions. This limitation could hinder the model's practical applicability and user experience. Thus, achieving\n",
-            "Score\t 0.699388273978391\n",
-            "-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_\n",
-            "Node ID\t 636f98cf8754c3a4759da02aa11a3f2aa7cdeb848a4980ec99300ece4a2e92fd\n",
-            "Title\t Meta's Llama 2: Revolutionizing Open Source Language Models for Commercial Use\n",
-            "Text\t The model demonstrates exceptionally low AI safety violation percentages, surpassing even ChatGPT in safety benchmarks. Finding the right balance between helpfulness and safety when optimizing a model poses significant challenges. While a highly helpful model may be capable of answering any question, including sensitive ones like \"How do I build a bomb?\", it also raises concerns about potential misuse. Thus, striking the perfect equilibrium between providing useful information and ensuring safety is paramount. However, prioritizing safety to an extreme extent can lead to a model that struggles to effectively address a diverse range of questions. This limitation could hinder the model's practical applicability and user experience. Thus, achieving an optimum balance that allows the model to be both helpful and safe is of utmost importance. To strike the right balance between helpfulness and safety, Meta employed two reward models - one for helpfulness and another for safety - to optimize the model's responses. The 34B parameter model has reported higher safety violations than other variants, possibly contributing to the delay in its release.  IV. Helpfulness Comparison: Llama 2 Outperforms Competitors Llama 2 emerges as a strong contender in the open-source language model arena, outperforming its competitors in most categories. The 70B parameter model outperforms all other open-source models, while the 7B and 34B models outshine Falcon in all categories and MPT in all categories except coding. Despite being smaller, Llam a2's performance rivals that of Chat GPT 3.5, a significantly larger closed-source model. While GPT 4 and PalM-2-L, with their larger size, outperform Llama 2, this is expected due to their capacity for handling complex language tasks. Llama 2's impressive ability to compete with larger models highlights its efficiency and potential in the market. However, Llama 2 does face challenges in coding and math problems, where models like Chat GPT 4 excel, given their significantly larger size. Chat GPT 4 performed significantly better than Llama 2 for coding (HumanEval benchmark)and math problem tasks (GSM8k benchmark). Open-source AI technologies, like Llama 2, continue to advance, offering\n",
-            "Score\t 0.6986276122119972\n",
-            "-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_\n"
-          ]
-        }
-      ],
-      "source": [
-        "for src in res.source_nodes:\n",
-        "  print(\"Node ID\\t\", src.node_id)\n",
-        "  print(\"Title\\t\", src.metadata['title'])\n",
-        "  print(\"Text\\t\", src.text)\n",
-        "  print(\"Score\\t\", src.score)\n",
-        "  print(\"-_\"*20)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "GrqBq8Dfidw6"
-      },
-      "source": [
-        "### Trying a different Query"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 18,
-      "metadata": {
-        "id": "MMBQJcPaigA0"
-      },
-      "outputs": [],
-      "source": [
-        "res = query_engine.query(\"Can LLaMA2 do NLU?\")"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 19,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 35
+        "626b1ba98c374987913a7a4384f19fa1": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "FloatProgressModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "FloatProgressModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "ProgressView",
+            "bar_style": "success",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_e4413564a300469d86c3abc567f24701",
+            "max": 14,
+            "min": 0,
+            "orientation": "horizontal",
+            "style": "IPY_MODEL_64167ae99cd24c729435aefc1ea13519",
+            "value": 14
+          }
         },
-        "id": "N2QbpT0skT75",
-        "outputId": "18ddac02-218d-432d-8f00-da96e93c8326"
-      },
-      "outputs": [
-        {
-          "data": {
-            "text/plain": [
-              "'Yes, LLaMA2 can do NLU (Natural Language Understanding) as mentioned in the context information.'"
-            ]
-          },
-          "execution_count": 19,
-          "metadata": {},
-          "output_type": "execute_result"
-        }
-      ],
-      "source": [
-        "res.response"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 20,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
+        "a4fad4d11a8941f8b90abb3099e9a090": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_2634e510d3c844d88891a98661beb6a9",
+            "placeholder": "​",
+            "style": "IPY_MODEL_6b3d2afb949f4de691ceac601bd96d0e",
+            "value": " 14/14 [00:00&lt;00:00, 34.02it/s]"
+          }
         },
-        "id": "f9HPdfMjqsbQ",
-        "outputId": "ef558e20-9625-4228-b057-5e1990752d43"
-      },
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Node ID\t 8cf94b9369ba8da18d02172b9cbf885afb60cddd0a2381a86a81ca8e6a9b10f9\n",
-            "Title\t Exploring Large Language Models -Part 3\n",
-            "Text\t LM model training via UnSupervised learning). Note that this model was loaded in 4-bit, making it runnable on a single T4 GPU and trained with QLoRa. With QLoRA, only a fraction of the adapter weights are trained and summed with the existing frozen pre-trained weights of the model during inference. Here is an illustrative Colab notebook. You can see that training the model with just the text as is, does not result in proper output to questions. The answers are not affected by the training data. Take 2: Instruct Fine-tuning with QLoRa Instruction Tuning concept is a higher-level training concept introduced by this paper FineTuned Language Models Are Zero shot Learners (FLAN) We leverage the intuition that NLP tasks can be described via natural language instructions, such as \"Is the sentiment of this movie review positive or negative?\" or \"Translate 'how are you' into Chinese.\" We take a pre-trained language model of 137B parameters and perform instruction tuning ... Since we use QLoRa we are effectively closely following this paper - QLORA: Efficient Finetuning of Quantized LLMs concerning the training data set, the format that the authors used to train their Gauanco model This is the format for the Llama2 model and will be different for others. One of the hardest problems of training is finding or creating a good quality data set to train. In our case, converting the available training data set to the instruction data set. Since our use case is Closed Book QA, we need to convert this to a QA format. Using older NLP methods like NER (Named Entity Recognition) and then using that to create a QA dataset was not effective. This is where the Self-instruct concept could be used However previous to Llama2, the best-performing model was the GPT 3/4 model via ChatGPT or its API and using these models to do the same was expensive. The 7 billion model of Llama2 has sufficient NLU (Natural Language Understanding) to create output based on a particular format. Running this in 4-bit mode via Quantisation makes it feasible compute-wise to run this on a large data set and convert it to a QA dataset. This was the prompt used. The\n",
-            "Score\t 0.7171179965716512\n",
-            "-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_\n",
-            "Node ID\t e97bbe3d37bacb34902b4db67351799f1309541d4879e53b97fad08a4417304f\n",
-            "Title\t LLaMA by Meta leaked by an anonymous forum: Questions Arises on Meta\n",
-            "Text\t LLaMA: Meta's new AI tool According to the official release, LLaMA is a foundational language model developed to assist 'researchers and academics' in their work (as opposed to the average web user) to understand and study these NLP models. Leveraging AI in such a way could give researchers an edge in terms of time spent. You may not know this, but this would be Meta's third LLM after Blender Bot 3 and Galactica. However, the two LLMs were shut down soon, and Meta stopped their further development, as it produced erroneous results. Before moving further, it is important to emphasize that LLaMA is NOT a chatbot like ChatGPT. As I mentioned before, it is a 'research tool' for researchers. We can expect the initial versions of LLaMA to be a bit more technical and indirect to use as opposed to the case with ChatGPT, which was very direct, interactive, and a lot easy to use. \"Smaller, more performant models such as LLaMA enable ... research community who don't have access to large amounts of infrastructure to study these models.. further democratizing access in this important, fast-changing field,\" said Meta in its official blog. Meta's effort of \"democratizing\" access to the public could shed light on one of the critical issues of Generative AI - toxicity and bias. ChatGPT and other LLMs (obviously, I am referring to Bing) have a track record of responding in a way that is toxic and, well... evil. The Verge and major critics have covered it in much detail. Oh and the community did get the access, but not in the way Meta anticipated. On March 3rd, a downloadable torrent of the LLaMA system was posted on 4chan. 4chan is an anonymous online forum known for its controversial content and diverse range of discussions, which has nearly 222 million unique monthly visitors. LLaMA is currently not in use on any of Meta's products. But Meta has plans to make it available to researchers before they can use them in their own products. It's worth mentioning that Meta did not release\n",
-            "Score\t 0.7109836688235909\n",
-            "-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_\n"
-          ]
-        }
-      ],
-      "source": [
-        "for src in res.source_nodes:\n",
-        "  print(\"Node ID\\t\", src.node_id)\n",
-        "  print(\"Title\\t\", src.metadata['title'])\n",
-        "  print(\"Text\\t\", src.text)\n",
-        "  print(\"Score\\t\", src.score)\n",
-        "  print(\"-_\"*20)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "TmkI8BV8rATi"
-      },
-      "source": [
-        "From the articles:\n",
-        "  \n",
-        "> [...]The 7 billion model of Llama2 has sufficient NLU (Natural Language Understanding) to create output based on a particular format[...]\n",
-        "\n"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "6Wx-IPSMbSwC"
-      },
-      "source": [
-        "# No Metadata"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 21,
-      "metadata": {
-        "id": "oGunPKGRbT6H"
-      },
-      "outputs": [],
-      "source": [
-        "documents_no_meta = [Document(text=row[1]) for row in rows]\n",
-        "\n",
-        "# By default, the node/chunks ids are set to random uuids. To ensure same id's per run, we manually set them.\n",
-        "for idx, doc in enumerate(documents_no_meta):\n",
-        "    doc.id_ = f\"doc_{idx}\""
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 22,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 331,
-          "referenced_widgets": [
-            "bd4c5bc2c7ee443999058f7f232c50f9",
-            "74cda13649844f24a2e6ebce82213865",
-            "dc498ad680d44d1e8e6fd2df2541a8ba",
-            "0bf0c22fbb024723b3a51dbe6d684c79",
-            "34fea76878874d67baae4946b8d9b1da",
-            "bfda4d80ca4f4805be90772690d26fe0",
-            "a6876009a1fb4bcc83f779eab7a4e3b7",
-            "4211db3192514c8189db0430779d660a",
-            "9a5ad060a90c4f14ba05527fdcfe8a72",
-            "b0c77210699e4f30ae2a2a97860de7bb",
-            "e31244d1c2b345a9950de74aac576290",
-            "c58ea3f8afc64b17a553aecfe07b375d",
-            "da5417a69cb5466db258defea0a70f7c",
-            "45a7725a8e8b45c1937eca9dffe650d3",
-            "d33e03cfb6c340bf9c1d661e633afc2e",
-            "6dc4da2c822c460ca0c2a11266806504",
-            "b26896dfe0ba4779bf753602039ece5a",
-            "033ed4123cec43868ada3795d974d895",
-            "00a715d98c584ca1b540187546128d93",
-            "5c59aed5b5b244f1bdf80a08837e4bf5",
-            "e301611efb2b4a19b08c13c76ceb8ab5",
-            "d39554575910469cb65078ea82c988b6"
-          ]
+        "c3a4b958e4814294801495226697bce2": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
         },
-        "id": "Hxf4jT6afiZt",
-        "outputId": "2dbf4606-8a4a-45f5-8969-a45744cd388e"
-      },
-      "outputs": [
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "Parsing nodes: 100%|██████████| 14/14 [00:00<00:00, 16.34it/s]\n",
-            "Generating embeddings: 100%|██████████| 94/94 [00:01<00:00, 67.81it/s]\n"
-          ]
-        }
-      ],
-      "source": [
-        "from llama_index.embeddings import OpenAIEmbedding\n",
-        "from llama_index.ingestion import IngestionPipeline\n",
-        "\n",
-        "pipeline = IngestionPipeline(\n",
-        "    transformations=[\n",
-        "        text_splitter,\n",
-        "        OpenAIEmbedding(),\n",
-        "    ]\n",
-        ")\n",
-        "\n",
-        "nodes_no_meta = pipeline.run(documents=documents_no_meta, show_progress=True)"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 23,
-      "metadata": {
-        "id": "A39Y1Rv6fiXE"
-      },
-      "outputs": [],
-      "source": [
-        "from llama_index import ServiceContext\n",
-        "\n",
-        "index_no_metadata = VectorStoreIndex(\n",
-        "    nodes=nodes_no_meta,\n",
-        "    service_context=ServiceContext.from_defaults(llm=OpenAI(model=\"gpt-3.5-turbo\")),\n",
-        ")"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 24,
-      "metadata": {
-        "id": "BOpdZdQufiUu"
-      },
-      "outputs": [],
-      "source": [
-        "query_engine_no_metadata = index_no_metadata.as_query_engine()"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 25,
-      "metadata": {
-        "id": "2U2NIE2Yfz8E"
-      },
-      "outputs": [],
-      "source": [
-        "res = query_engine_no_metadata.query(\"Can LLaMA2 do NLU?\")"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 26,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 89
+        "2e939db189424ab7b5f9095932f2c99f": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
         },
-        "id": "mxT7_IJ7f1gU",
-        "outputId": "8580ffab-c32c-4cdb-8125-a0165ed4b164"
-      },
-      "outputs": [
-        {
-          "data": {
-            "text/plain": [
-              "'LLaMA2 is a foundational language model developed by Meta. While the context does not explicitly mention whether LLaMA2 can do Natural Language Understanding (NLU), it is described as a research tool for researchers and academics to understand and study NLP models. Therefore, it is possible that LLaMA2 has capabilities related to NLU, but without further information, it cannot be definitively stated.'"
-            ]
-          },
-          "execution_count": 26,
-          "metadata": {},
-          "output_type": "execute_result"
-        }
-      ],
-      "source": [
-        "res.response"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 27,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
+        "fd6a36e947ec451a938d266117dab12e": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
         },
-        "id": "GD5SQ7VEf2wR",
-        "outputId": "e1f159e2-f718-493d-c65c-51a370c1702d"
-      },
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Node ID\t e97bbe3d37bacb34902b4db67351799f1309541d4879e53b97fad08a4417304f\n",
-            "Text\t LLaMA: Meta's new AI tool According to the official release, LLaMA is a foundational language model developed to assist 'researchers and academics' in their work (as opposed to the average web user) to understand and study these NLP models. Leveraging AI in such a way could give researchers an edge in terms of time spent. You may not know this, but this would be Meta's third LLM after Blender Bot 3 and Galactica. However, the two LLMs were shut down soon, and Meta stopped their further development, as it produced erroneous results. Before moving further, it is important to emphasize that LLaMA is NOT a chatbot like ChatGPT. As I mentioned before, it is a 'research tool' for researchers. We can expect the initial versions of LLaMA to be a bit more technical and indirect to use as opposed to the case with ChatGPT, which was very direct, interactive, and a lot easy to use. \"Smaller, more performant models such as LLaMA enable ... research community who don't have access to large amounts of infrastructure to study these models.. further democratizing access in this important, fast-changing field,\" said Meta in its official blog. Meta's effort of \"democratizing\" access to the public could shed light on one of the critical issues of Generative AI - toxicity and bias. ChatGPT and other LLMs (obviously, I am referring to Bing) have a track record of responding in a way that is toxic and, well... evil. The Verge and major critics have covered it in much detail. Oh and the community did get the access, but not in the way Meta anticipated. On March 3rd, a downloadable torrent of the LLaMA system was posted on 4chan. 4chan is an anonymous online forum known for its controversial content and diverse range of discussions, which has nearly 222 million unique monthly visitors. LLaMA is currently not in use on any of Meta's products. But Meta has plans to make it available to researchers before they can use them in their own products. It's worth mentioning that Meta did not release LLaMA as a public chatbot. LLaMA is more of an open-source package that can be accessed by trusted authorities upon request.  Powerful LLMs: What to hope Whether to agree with Ladish's views or not is debatable. Personally, I feel open-sourcing AI models could only benefit\n",
-            "Score\t 0.8511842082572946\n",
-            "-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_\n",
-            "Node ID\t ab651375c4bf52b30d0d709c5c1ac7c52e75399b0cdc1f1139c3d54cda15d0f4\n",
-            "Text\t for its controversial content and diverse range of discussions, which has nearly 222 million unique monthly visitors. LLaMA is currently not in use on any of Meta's products. But Meta has plans to make it available to researchers before they can use them in their own products. It's worth mentioning that Meta did not release LLaMA as a public chatbot. LLaMA is more of an open-source package that can be accessed by trusted authorities upon request.  Powerful LLMs: What to hope Whether to agree with Ladish's views or not is debatable. Personally, I feel open-sourcing AI models could only benefit the AI community to scrutinize the model and improve them for the better. What do you think? After all, one of LLaMA's major goals is to 'democratize' access to such models. But this access in the form of a leak put Meta into question - how it handles its tools and conducts release in public? Most of the users that got the leaked copies soon discovered that LLaMA was not at all similar to ChatGPT. \"Downloading\" LLaMA is going to do very little for the average internet user because it's a \"raw\" AI system that needs a decent amount of technical expertise to get up and running. However, as I am writing this, Meta hasn't acknowledged the leak to the public yet. Neither did they comment on it. There are both positive and negative consequences to this leak. On the one hand, unrestricted access to Llama could help researchers understand how and why large language models work, which could lead to improvements in robustness, bias, and the toxic nature of LLMs. This could really help in reducing the potential for generating misinformation by these troublesome machines. On the other hand, however, the leak could lead to people misusing the model itself. It is not yet perfect. Hence Meta hasn't released it fully to the public yet. Risks such as spam and phishing could be really hard to tackle if such superintelligent machines are put to the test. Thus, much safeguard must be applied to the use of these models. We can see such tools, like OpenAI Text Classifier, emerging. So there is a positive hope for this. AI is exciting, no doubt. But a lot scarier if we lose our control over it.\n",
-            "Score\t 0.8494642767398203\n",
-            "-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_\n"
-          ]
-        }
-      ],
-      "source": [
-        "for src in res.source_nodes:\n",
-        "  print(\"Node ID\\t\", src.node_id)\n",
-        "  print(\"Text\\t\", src.text)\n",
-        "  print(\"Score\\t\", src.score)\n",
-        "  print(\"-_\"*20)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "iMkpzH7vvb09"
-      },
-      "source": [
-        "# Evaluate"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 28,
-      "metadata": {
-        "id": "H8a3eKgKvckU"
-      },
-      "outputs": [
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "100%|██████████| 108/108 [05:05<00:00,  2.83s/it]\n"
-          ]
-        }
-      ],
-      "source": [
-        "from llama_index.evaluation import generate_question_context_pairs\n",
-        "from llama_index.llms import OpenAI\n",
-        "\n",
-        "llm = OpenAI(model=\"gpt-3.5-turbo\")\n",
-        "rag_eval_dataset = generate_question_context_pairs(\n",
-        "    nodes,\n",
-        "    llm=llm,\n",
-        "    num_questions_per_chunk=1\n",
-        ")\n",
-        "\n",
-        "# We can save the dataset as a json file for later use.\n",
-        "rag_eval_dataset.save_json(\"./rag_eval_dataset.json\")"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 29,
-      "metadata": {
-        "id": "3sA1K84U254o"
-      },
-      "outputs": [],
-      "source": [
-        "from llama_index.finetuning.embeddings.common import (\n",
-        "    EmbeddingQAFinetuneDataset,\n",
-        ")\n",
-        "rag_eval_dataset = EmbeddingQAFinetuneDataset.from_json(\n",
-        "    \"./rag_eval_dataset.json\"\n",
-        ")"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 30,
-      "metadata": {
-        "id": "H7ubvcbk27vr"
-      },
-      "outputs": [],
-      "source": [
-        "import pandas as pd\n",
-        "\n",
-        "def display_results_retriever(name, eval_results):\n",
-        "    \"\"\"Display results from evaluate.\"\"\"\n",
-        "\n",
-        "    metric_dicts = []\n",
-        "    for eval_result in eval_results:\n",
-        "        metric_dict = eval_result.metric_vals_dict\n",
-        "        metric_dicts.append(metric_dict)\n",
-        "\n",
-        "    full_df = pd.DataFrame(metric_dicts)\n",
-        "\n",
-        "    hit_rate = full_df[\"hit_rate\"].mean()\n",
-        "    mrr = full_df[\"mrr\"].mean()\n",
-        "\n",
-        "    metric_df = pd.DataFrame(\n",
-        "        {\"Retriever Name\": [name], \"Hit Rate\": [hit_rate], \"MRR\": [mrr]}\n",
-        "    )\n",
-        "\n",
-        "    return metric_df"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 31,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
+        "e4413564a300469d86c3abc567f24701": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
         },
-        "id": "uNLxDxoc2-Ac",
-        "outputId": "ea09c887-3b82-4f59-8818-8bd6d7f0e1e3"
-      },
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "    Retriever Name  Hit Rate       MRR\n",
-            "0  Retriever top_2  0.638646  0.523472\n",
-            "    Retriever Name  Hit Rate       MRR\n",
-            "0  Retriever top_4  0.776201  0.564683\n",
-            "    Retriever Name  Hit Rate       MRR\n",
-            "0  Retriever top_6  0.819869  0.572726\n",
-            "    Retriever Name  Hit Rate       MRR\n",
-            "0  Retriever top_8  0.840611  0.575494\n",
-            "     Retriever Name  Hit Rate       MRR\n",
-            "0  Retriever top_10  0.854803  0.576937\n"
-          ]
-        }
-      ],
-      "source": [
-        "from llama_index.evaluation import RetrieverEvaluator\n",
-        "\n",
-        "# We can evaluate the retievers with different top_k values.\n",
-        "for i in [2, 4, 6, 8, 10]:\n",
-        "    retriever = index.as_retriever(similarity_top_k=i)\n",
-        "    retriever_evaluator = RetrieverEvaluator.from_metric_names(\n",
-        "        [\"mrr\", \"hit_rate\"], retriever=retriever\n",
-        "    )\n",
-        "    eval_results = await retriever_evaluator.aevaluate_dataset(rag_eval_dataset)\n",
-        "    print(display_results_retriever(f\"Retriever top_{i}\", eval_results))"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 32,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
+        "64167ae99cd24c729435aefc1ea13519": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "ProgressStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "ProgressStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "bar_color": null,
+            "description_width": ""
+          }
         },
-        "id": "3ukkWC9R2_0J",
-        "outputId": "8d93822c-ec27-4103-d2b4-f63405ace512"
-      },
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "top_2 faithfulness_score: 0.95\n",
-            "top_2 relevancy_score: 0.95\n",
-            "-_-_-_-_-_-_-_-_-_-_\n",
-            "top_4 faithfulness_score: 1.0\n",
-            "top_4 relevancy_score: 1.0\n",
-            "-_-_-_-_-_-_-_-_-_-_\n",
-            "top_6 faithfulness_score: 1.0\n",
-            "top_6 relevancy_score: 1.0\n",
-            "-_-_-_-_-_-_-_-_-_-_\n",
-            "top_8 faithfulness_score: 1.0\n",
-            "top_8 relevancy_score: 1.0\n",
-            "-_-_-_-_-_-_-_-_-_-_\n",
-            "top_10 faithfulness_score: 1.0\n",
-            "top_10 relevancy_score: 1.0\n",
-            "-_-_-_-_-_-_-_-_-_-_\n"
-          ]
-        }
-      ],
-      "source": [
-        "from llama_index.evaluation import RelevancyEvaluator, FaithfulnessEvaluator, BatchEvalRunner\n",
-        "from llama_index import ServiceContext\n",
-        "from llama_index.llms import OpenAI\n",
-        "\n",
-        "for i in [2, 4, 6, 8, 10]:\n",
-        "    # Set Faithfulness and Relevancy evaluators\n",
-        "    query_engine = index.as_query_engine(similarity_top_k=i)\n",
-        "\n",
-        "    # While we use GPT3.5-Turbo to answer questions, we can use GPT4 to evaluate the answers.\n",
-        "    llm_gpt4 = OpenAI(temperature=0, model=\"gpt-4-1106-preview\")\n",
-        "    service_context_gpt4 = ServiceContext.from_defaults(llm=llm_gpt4)\n",
-        "\n",
-        "    faithfulness_evaluator = FaithfulnessEvaluator(service_context=service_context_gpt4)\n",
-        "    relevancy_evaluator = RelevancyEvaluator(service_context=service_context_gpt4)\n",
-        "\n",
-        "    # Run evaluation\n",
-        "    queries = list(rag_eval_dataset.queries.values())\n",
-        "    batch_eval_queries = queries[:20]\n",
-        "\n",
-        "    runner = BatchEvalRunner(\n",
-        "    {\"faithfulness\": faithfulness_evaluator, \"relevancy\": relevancy_evaluator},\n",
-        "    workers=8,\n",
-        "    )\n",
-        "    eval_results = await runner.aevaluate_queries(\n",
-        "        query_engine, queries=batch_eval_queries\n",
-        "    )\n",
-        "    faithfulness_score = sum(result.passing for result in eval_results['faithfulness']) / len(eval_results['faithfulness'])\n",
-        "    print(f\"top_{i} faithfulness_score: {faithfulness_score}\")\n",
-        "\n",
-        "    relevancy_score = sum(result.passing for result in eval_results['faithfulness']) / len(eval_results['relevancy'])\n",
-        "    print(f\"top_{i} relevancy_score: {relevancy_score}\")\n",
-        "    print(\"-_\"*10)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "# Evaluate No Metadata"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 33,
-      "metadata": {
-        "id": "1MB1YD1E3EKM"
-      },
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "    Retriever Name  Hit Rate       MRR\n",
-            "0  Retriever top_2  0.394105  0.324236\n",
-            "    Retriever Name  Hit Rate       MRR\n",
-            "0  Retriever top_4  0.529476  0.364447\n",
-            "    Retriever Name  Hit Rate       MRR\n",
-            "0  Retriever top_6  0.575328  0.372889\n",
-            "    Retriever Name  Hit Rate       MRR\n",
-            "0  Retriever top_8  0.605895  0.376944\n",
-            "     Retriever Name  Hit Rate       MRR\n",
-            "0  Retriever top_10  0.624454  0.378897\n"
-          ]
-        }
-      ],
-      "source": [
-        "from llama_index.evaluation import RetrieverEvaluator\n",
-        "\n",
-        "# We can evaluate the retievers with different top_k values.\n",
-        "for i in [2, 4, 6, 8, 10]:\n",
-        "    retriever = index_no_metadata.as_retriever(similarity_top_k=i)\n",
-        "    retriever_evaluator = RetrieverEvaluator.from_metric_names(\n",
-        "        [\"mrr\", \"hit_rate\"], retriever=retriever\n",
-        "    )\n",
-        "    eval_results = await retriever_evaluator.aevaluate_dataset(rag_eval_dataset)\n",
-        "    print(display_results_retriever(f\"Retriever top_{i}\", eval_results))"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 34,
-      "metadata": {},
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "top_2 faithfulness_score: 1.0\n",
-            "top_2 relevancy_score: 1.0\n",
-            "-_-_-_-_-_-_-_-_-_-_\n",
-            "top_4 faithfulness_score: 1.0\n",
-            "top_4 relevancy_score: 1.0\n",
-            "-_-_-_-_-_-_-_-_-_-_\n",
-            "top_6 faithfulness_score: 1.0\n",
-            "top_6 relevancy_score: 1.0\n",
-            "-_-_-_-_-_-_-_-_-_-_\n",
-            "top_8 faithfulness_score: 1.0\n",
-            "top_8 relevancy_score: 1.0\n",
-            "-_-_-_-_-_-_-_-_-_-_\n",
-            "top_10 faithfulness_score: 0.95\n",
-            "top_10 relevancy_score: 0.95\n",
-            "-_-_-_-_-_-_-_-_-_-_\n"
-          ]
-        }
-      ],
-      "source": [
-        "from llama_index.evaluation import RelevancyEvaluator, FaithfulnessEvaluator, BatchEvalRunner\n",
-        "from llama_index import ServiceContext\n",
-        "from llama_index.llms import OpenAI\n",
-        "\n",
-        "for i in [2, 4, 6, 8, 10]:\n",
-        "    # Set Faithfulness and Relevancy evaluators\n",
-        "    query_engine = index_no_metadata.as_query_engine(similarity_top_k=i)\n",
-        "\n",
-        "    # While we use GPT3.5-Turbo to answer questions, we can use GPT4 to evaluate the answers.\n",
-        "    llm_gpt4 = OpenAI(temperature=0, model=\"gpt-4-1106-preview\")\n",
-        "    service_context_gpt4 = ServiceContext.from_defaults(llm=llm_gpt4)\n",
-        "\n",
-        "    faithfulness_evaluator = FaithfulnessEvaluator(service_context=service_context_gpt4)\n",
-        "    relevancy_evaluator = RelevancyEvaluator(service_context=service_context_gpt4)\n",
-        "\n",
-        "    # Run evaluation\n",
-        "    queries = list(rag_eval_dataset.queries.values())\n",
-        "    batch_eval_queries = queries[:20]\n",
-        "\n",
-        "    runner = BatchEvalRunner(\n",
-        "    {\"faithfulness\": faithfulness_evaluator, \"relevancy\": relevancy_evaluator},\n",
-        "    workers=8,\n",
-        "    )\n",
-        "    eval_results = await runner.aevaluate_queries(\n",
-        "        query_engine, queries=batch_eval_queries\n",
-        "    )\n",
-        "    faithfulness_score = sum(result.passing for result in eval_results['faithfulness']) / len(eval_results['faithfulness'])\n",
-        "    print(f\"top_{i} faithfulness_score: {faithfulness_score}\")\n",
-        "\n",
-        "    relevancy_score = sum(result.passing for result in eval_results['faithfulness']) / len(eval_results['relevancy'])\n",
-        "    print(f\"top_{i} relevancy_score: {relevancy_score}\")\n",
-        "    print(\"-_\"*10)"
-      ]
-    }
-  ],
-  "metadata": {
-    "colab": {
-      "authorship_tag": "ABX9TyNlJV4zbpjtN6glOumdzocl",
-      "collapsed_sections": [
-        "6Wx-IPSMbSwC"
-      ],
-      "include_colab_link": true,
-      "provenance": []
-    },
-    "kernelspec": {
-      "display_name": "Python 3",
-      "name": "python3"
-    },
-    "language_info": {
-      "codemirror_mode": {
-        "name": "ipython",
-        "version": 3
-      },
-      "file_extension": ".py",
-      "mimetype": "text/x-python",
-      "name": "python",
-      "nbconvert_exporter": "python",
-      "pygments_lexer": "ipython3",
-      "version": "3.11.7"
-    },
-    "widgets": {
-      "application/vnd.jupyter.widget-state+json": {
-        "00a715d98c584ca1b540187546128d93": {
+        "2634e510d3c844d88891a98661beb6a9": {
           "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
           "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
           "state": {
             "_model_module": "@jupyter-widgets/base",
             "_model_module_version": "1.2.0",
@@ -1164,10 +343,10 @@
             "width": null
           }
         },
-        "033ed4123cec43868ada3795d974d895": {
+        "6b3d2afb949f4de691ceac601bd96d0e": {
           "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
           "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
           "state": {
             "_model_module": "@jupyter-widgets/controls",
             "_model_module_version": "1.5.0",
@@ -1179,10 +358,32 @@
             "description_width": ""
           }
         },
-        "0bf0c22fbb024723b3a51dbe6d684c79": {
+        "8cc800fbe6bc4f4da5dd6b93d4a5143a": {
           "model_module": "@jupyter-widgets/controls",
+          "model_name": "HBoxModel",
           "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HBoxModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HBoxView",
+            "box_style": "",
+            "children": [
+              "IPY_MODEL_812d5d9b04f74592b850b3eb32f88c04",
+              "IPY_MODEL_ed22c91e813c4351ab1d3eb7e174796c",
+              "IPY_MODEL_de2088a425104f05b52b7a3236c7baa9"
+            ],
+            "layout": "IPY_MODEL_6f9f666836084de7894aa2e65c8dbe07"
+          }
+        },
+        "812d5d9b04f74592b850b3eb32f88c04": {
+          "model_module": "@jupyter-widgets/controls",
           "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
           "state": {
             "_dom_classes": [],
             "_model_module": "@jupyter-widgets/controls",
@@ -1194,16 +395,61 @@
             "_view_name": "HTMLView",
             "description": "",
             "description_tooltip": null,
-            "layout": "IPY_MODEL_b0c77210699e4f30ae2a2a97860de7bb",
+            "layout": "IPY_MODEL_63a3dcff335349deacf4abb9b68d76ab",
             "placeholder": "​",
-            "style": "IPY_MODEL_e31244d1c2b345a9950de74aac576290",
-            "value": " 14/14 [00:00&lt;00:00, 21.91it/s]"
+            "style": "IPY_MODEL_99eb83f4b8904e20b45573bab84aa5f4",
+            "value": "Generating embeddings: 100%"
           }
         },
-        "2634e510d3c844d88891a98661beb6a9": {
+        "ed22c91e813c4351ab1d3eb7e174796c": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "FloatProgressModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "FloatProgressModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "ProgressView",
+            "bar_style": "success",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_2c8aef5e8ec848c0a23c72581e5f4b1e",
+            "max": 108,
+            "min": 0,
+            "orientation": "horizontal",
+            "style": "IPY_MODEL_7d54abb8f3784a789fd042c2ed2dd685",
+            "value": 108
+          }
+        },
+        "de2088a425104f05b52b7a3236c7baa9": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_a1a88448b188407b8e4aa2af86fb9345",
+            "placeholder": "​",
+            "style": "IPY_MODEL_6a4cc229f5774cb0b4d3def7eee8b56e",
+            "value": " 108/108 [00:04&lt;00:00, 22.53it/s]"
+          }
+        },
+        "6f9f666836084de7894aa2e65c8dbe07": {
           "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
           "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
           "state": {
             "_model_module": "@jupyter-widgets/base",
             "_model_module_version": "1.2.0",
@@ -1252,10 +498,10 @@
             "width": null
           }
         },
-        "2c8aef5e8ec848c0a23c72581e5f4b1e": {
+        "63a3dcff335349deacf4abb9b68d76ab": {
           "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
           "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
           "state": {
             "_model_module": "@jupyter-widgets/base",
             "_model_module_version": "1.2.0",
@@ -1304,62 +550,25 @@
             "width": null
           }
         },
-        "2e939db189424ab7b5f9095932f2c99f": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
+        "99eb83f4b8904e20b45573bab84aa5f4": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
           "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
             "_view_count": null,
             "_view_module": "@jupyter-widgets/base",
             "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
+            "_view_name": "StyleView",
+            "description_width": ""
           }
         },
-        "34fea76878874d67baae4946b8d9b1da": {
+        "2c8aef5e8ec848c0a23c72581e5f4b1e": {
           "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
           "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
           "state": {
             "_model_module": "@jupyter-widgets/base",
             "_model_module_version": "1.2.0",
@@ -1408,10 +617,26 @@
             "width": null
           }
         },
-        "4211db3192514c8189db0430779d660a": {
+        "7d54abb8f3784a789fd042c2ed2dd685": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "ProgressStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "ProgressStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "bar_color": null,
+            "description_width": ""
+          }
+        },
+        "a1a88448b188407b8e4aa2af86fb9345": {
           "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
           "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
           "state": {
             "_model_module": "@jupyter-widgets/base",
             "_model_module_version": "1.2.0",
@@ -1460,50 +685,68 @@
             "width": null
           }
         },
-        "45a7725a8e8b45c1937eca9dffe650d3": {
+        "6a4cc229f5774cb0b4d3def7eee8b56e": {
           "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "b10233c49dcc4a2f89de5389309d4fb4": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HBoxModel",
           "model_module_version": "1.5.0",
-          "model_name": "FloatProgressModel",
           "state": {
             "_dom_classes": [],
             "_model_module": "@jupyter-widgets/controls",
             "_model_module_version": "1.5.0",
-            "_model_name": "FloatProgressModel",
+            "_model_name": "HBoxModel",
             "_view_count": null,
             "_view_module": "@jupyter-widgets/controls",
             "_view_module_version": "1.5.0",
-            "_view_name": "ProgressView",
-            "bar_style": "success",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_00a715d98c584ca1b540187546128d93",
-            "max": 94,
-            "min": 0,
-            "orientation": "horizontal",
-            "style": "IPY_MODEL_5c59aed5b5b244f1bdf80a08837e4bf5",
-            "value": 94
+            "_view_name": "HBoxView",
+            "box_style": "",
+            "children": [
+              "IPY_MODEL_c617a0bc420b453693bb697a235e50d7",
+              "IPY_MODEL_f14f74d98f824013b562c82fb251ac26",
+              "IPY_MODEL_19f8baa6c24e4c7a8888f73f3cb7e3f8"
+            ],
+            "layout": "IPY_MODEL_19c0bf2b745640b3adf6478738ba02ea"
           }
         },
-        "5c59aed5b5b244f1bdf80a08837e4bf5": {
+        "c617a0bc420b453693bb697a235e50d7": {
           "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
           "model_module_version": "1.5.0",
-          "model_name": "ProgressStyleModel",
           "state": {
+            "_dom_classes": [],
             "_model_module": "@jupyter-widgets/controls",
             "_model_module_version": "1.5.0",
-            "_model_name": "ProgressStyleModel",
+            "_model_name": "HTMLModel",
             "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "bar_color": null,
-            "description_width": ""
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_0258a4a4bdc24404aa005c3b4d1235ee",
+            "placeholder": "​",
+            "style": "IPY_MODEL_8da878f475de494fac3f7acf29e4e7f0",
+            "value": "Parsing nodes: 100%"
           }
         },
-        "626b1ba98c374987913a7a4384f19fa1": {
+        "f14f74d98f824013b562c82fb251ac26": {
           "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
           "model_name": "FloatProgressModel",
+          "model_module_version": "1.5.0",
           "state": {
             "_dom_classes": [],
             "_model_module": "@jupyter-widgets/controls",
@@ -1516,18 +759,91 @@
             "bar_style": "success",
             "description": "",
             "description_tooltip": null,
-            "layout": "IPY_MODEL_e4413564a300469d86c3abc567f24701",
+            "layout": "IPY_MODEL_dc5b9ea6aeea42dfae978e4a8961b03a",
             "max": 14,
             "min": 0,
             "orientation": "horizontal",
-            "style": "IPY_MODEL_64167ae99cd24c729435aefc1ea13519",
+            "style": "IPY_MODEL_aefce46940904fce9c4e439784cbc28c",
             "value": 14
           }
         },
-        "63a3dcff335349deacf4abb9b68d76ab": {
+        "19f8baa6c24e4c7a8888f73f3cb7e3f8": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_dcfeadeb1cc2483399e8194ec43f2eee",
+            "placeholder": "​",
+            "style": "IPY_MODEL_7cec42608a51413796ec41250e0eed6d",
+            "value": " 14/14 [00:00&lt;00:00, 22.42it/s]"
+          }
+        },
+        "19c0bf2b745640b3adf6478738ba02ea": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "0258a4a4bdc24404aa005c3b4d1235ee": {
           "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
           "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
           "state": {
             "_model_module": "@jupyter-widgets/base",
             "_model_module_version": "1.2.0",
@@ -1576,41 +892,10 @@
             "width": null
           }
         },
-        "64167ae99cd24c729435aefc1ea13519": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "ProgressStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "ProgressStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "bar_color": null,
-            "description_width": ""
-          }
-        },
-        "6a4cc229f5774cb0b4d3def7eee8b56e": {
+        "8da878f475de494fac3f7acf29e4e7f0": {
           "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
           "model_name": "DescriptionStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "6b3d2afb949f4de691ceac601bd96d0e": {
-          "model_module": "@jupyter-widgets/controls",
           "model_module_version": "1.5.0",
-          "model_name": "DescriptionStyleModel",
           "state": {
             "_model_module": "@jupyter-widgets/controls",
             "_model_module_version": "1.5.0",
@@ -1622,10 +907,10 @@
             "description_width": ""
           }
         },
-        "6dc4da2c822c460ca0c2a11266806504": {
+        "dc5b9ea6aeea42dfae978e4a8961b03a": {
           "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
           "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
           "state": {
             "_model_module": "@jupyter-widgets/base",
             "_model_module_version": "1.2.0",
@@ -1674,10 +959,26 @@
             "width": null
           }
         },
-        "6f9f666836084de7894aa2e65c8dbe07": {
+        "aefce46940904fce9c4e439784cbc28c": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "ProgressStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "ProgressStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "bar_color": null,
+            "description_width": ""
+          }
+        },
+        "dcfeadeb1cc2483399e8194ec43f2eee": {
           "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
           "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
           "state": {
             "_model_module": "@jupyter-widgets/base",
             "_model_module_version": "1.2.0",
@@ -1726,31 +1027,25 @@
             "width": null
           }
         },
-        "74cda13649844f24a2e6ebce82213865": {
+        "7cec42608a51413796ec41250e0eed6d": {
           "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
           "model_module_version": "1.5.0",
-          "model_name": "HTMLModel",
           "state": {
-            "_dom_classes": [],
             "_model_module": "@jupyter-widgets/controls",
             "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
+            "_model_name": "DescriptionStyleModel",
             "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_bfda4d80ca4f4805be90772690d26fe0",
-            "placeholder": "​",
-            "style": "IPY_MODEL_a6876009a1fb4bcc83f779eab7a4e3b7",
-            "value": "Parsing nodes: 100%"
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
           }
         },
-        "7a469b6821ed458d99a1ed57e72b3d68": {
+        "036ae37776684a46a1a1f9e3c018a87e": {
           "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
           "model_name": "HBoxModel",
+          "model_module_version": "1.5.0",
           "state": {
             "_dom_classes": [],
             "_model_module": "@jupyter-widgets/controls",
@@ -1762,33 +1057,17 @@
             "_view_name": "HBoxView",
             "box_style": "",
             "children": [
-              "IPY_MODEL_8c556c8c8ce941c6b433780fd4a6ae54",
-              "IPY_MODEL_626b1ba98c374987913a7a4384f19fa1",
-              "IPY_MODEL_a4fad4d11a8941f8b90abb3099e9a090"
+              "IPY_MODEL_de5e18d6629d4cd0abf9e5c72d07ac73",
+              "IPY_MODEL_29ff5f2d9c114e8bb1b7461dbae2fdb8",
+              "IPY_MODEL_0f79e4f5836f4ebf80af47c8e100b012"
             ],
-            "layout": "IPY_MODEL_c3a4b958e4814294801495226697bce2"
-          }
-        },
-        "7d54abb8f3784a789fd042c2ed2dd685": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "ProgressStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "ProgressStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "bar_color": null,
-            "description_width": ""
+            "layout": "IPY_MODEL_99a5712bb6b64f68b30b9a1dbbc803fb"
           }
         },
-        "812d5d9b04f74592b850b3eb32f88c04": {
+        "de5e18d6629d4cd0abf9e5c72d07ac73": {
           "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
           "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
           "state": {
             "_dom_classes": [],
             "_model_module": "@jupyter-widgets/controls",
@@ -1800,90 +1079,113 @@
             "_view_name": "HTMLView",
             "description": "",
             "description_tooltip": null,
-            "layout": "IPY_MODEL_63a3dcff335349deacf4abb9b68d76ab",
+            "layout": "IPY_MODEL_24fe3fb4e04546b3a17377d3e6ff61d6",
             "placeholder": "​",
-            "style": "IPY_MODEL_99eb83f4b8904e20b45573bab84aa5f4",
+            "style": "IPY_MODEL_931b9be975234aa79ae55aa12629f661",
             "value": "Generating embeddings: 100%"
           }
         },
-        "8c556c8c8ce941c6b433780fd4a6ae54": {
+        "29ff5f2d9c114e8bb1b7461dbae2fdb8": {
           "model_module": "@jupyter-widgets/controls",
+          "model_name": "FloatProgressModel",
           "model_module_version": "1.5.0",
-          "model_name": "HTMLModel",
           "state": {
             "_dom_classes": [],
             "_model_module": "@jupyter-widgets/controls",
             "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
+            "_model_name": "FloatProgressModel",
             "_view_count": null,
             "_view_module": "@jupyter-widgets/controls",
             "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
+            "_view_name": "ProgressView",
+            "bar_style": "success",
             "description": "",
             "description_tooltip": null,
-            "layout": "IPY_MODEL_2e939db189424ab7b5f9095932f2c99f",
-            "placeholder": "​",
-            "style": "IPY_MODEL_fd6a36e947ec451a938d266117dab12e",
-            "value": "Parsing nodes: 100%"
+            "layout": "IPY_MODEL_63bf1ccee3ad4101920f74bb2410bfe6",
+            "max": 94,
+            "min": 0,
+            "orientation": "horizontal",
+            "style": "IPY_MODEL_8f353fecd64a4e18be6fe2eb4fea3f9d",
+            "value": 94
           }
         },
-        "8cc800fbe6bc4f4da5dd6b93d4a5143a": {
+        "0f79e4f5836f4ebf80af47c8e100b012": {
           "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
           "model_module_version": "1.5.0",
-          "model_name": "HBoxModel",
           "state": {
             "_dom_classes": [],
             "_model_module": "@jupyter-widgets/controls",
             "_model_module_version": "1.5.0",
-            "_model_name": "HBoxModel",
+            "_model_name": "HTMLModel",
             "_view_count": null,
             "_view_module": "@jupyter-widgets/controls",
             "_view_module_version": "1.5.0",
-            "_view_name": "HBoxView",
-            "box_style": "",
-            "children": [
-              "IPY_MODEL_812d5d9b04f74592b850b3eb32f88c04",
-              "IPY_MODEL_ed22c91e813c4351ab1d3eb7e174796c",
-              "IPY_MODEL_de2088a425104f05b52b7a3236c7baa9"
-            ],
-            "layout": "IPY_MODEL_6f9f666836084de7894aa2e65c8dbe07"
-          }
-        },
-        "99eb83f4b8904e20b45573bab84aa5f4": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "DescriptionStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_d190edde40f04461ba066bc7f10b9d31",
+            "placeholder": "​",
+            "style": "IPY_MODEL_3114d5176097487bb1313cd49867680f",
+            "value": " 94/94 [00:13&lt;00:00,  8.05it/s]"
           }
         },
-        "9a5ad060a90c4f14ba05527fdcfe8a72": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "ProgressStyleModel",
+        "99a5712bb6b64f68b30b9a1dbbc803fb": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
           "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "ProgressStyleModel",
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
             "_view_count": null,
             "_view_module": "@jupyter-widgets/base",
             "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "bar_color": null,
-            "description_width": ""
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
           }
         },
-        "a1a88448b188407b8e4aa2af86fb9345": {
+        "24fe3fb4e04546b3a17377d3e6ff61d6": {
           "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
           "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
           "state": {
             "_model_module": "@jupyter-widgets/base",
             "_model_module_version": "1.2.0",
@@ -1932,31 +1234,10 @@
             "width": null
           }
         },
-        "a4fad4d11a8941f8b90abb3099e9a090": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HTMLModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_2634e510d3c844d88891a98661beb6a9",
-            "placeholder": "​",
-            "style": "IPY_MODEL_6b3d2afb949f4de691ceac601bd96d0e",
-            "value": " 14/14 [00:00&lt;00:00, 34.02it/s]"
-          }
-        },
-        "a6876009a1fb4bcc83f779eab7a4e3b7": {
+        "931b9be975234aa79ae55aa12629f661": {
           "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
           "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
           "state": {
             "_model_module": "@jupyter-widgets/controls",
             "_model_module_version": "1.5.0",
@@ -1968,10 +1249,10 @@
             "description_width": ""
           }
         },
-        "b0c77210699e4f30ae2a2a97860de7bb": {
+        "63bf1ccee3ad4101920f74bb2410bfe6": {
           "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
           "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
           "state": {
             "_model_module": "@jupyter-widgets/base",
             "_model_module_version": "1.2.0",
@@ -2020,10 +1301,26 @@
             "width": null
           }
         },
-        "b26896dfe0ba4779bf753602039ece5a": {
+        "8f353fecd64a4e18be6fe2eb4fea3f9d": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "ProgressStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "ProgressStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "bar_color": null,
+            "description_width": ""
+          }
+        },
+        "d190edde40f04461ba066bc7f10b9d31": {
           "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
           "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
           "state": {
             "_model_module": "@jupyter-widgets/base",
             "_model_module_version": "1.2.0",
@@ -2072,417 +1369,1211 @@
             "width": null
           }
         },
-        "bd4c5bc2c7ee443999058f7f232c50f9": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HBoxModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HBoxModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HBoxView",
-            "box_style": "",
-            "children": [
-              "IPY_MODEL_74cda13649844f24a2e6ebce82213865",
-              "IPY_MODEL_dc498ad680d44d1e8e6fd2df2541a8ba",
-              "IPY_MODEL_0bf0c22fbb024723b3a51dbe6d684c79"
+        "3114d5176097487bb1313cd49867680f": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        }
+      }
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "view-in-github",
+        "colab_type": "text"
+      },
+      "source": [
+        "<a href=\"https://colab.research.google.com/github/towardsai/ai-tutor-rag-system/blob/main/notebooks/07-RAG_Improve_Chunking.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# Install Packages and Setup Variables"
+      ],
+      "metadata": {
+        "id": "-zE1h0uQV7uT"
+      }
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "QPJzr-I9XQ7l",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "34a040a3-c044-4348-ef4c-d8cc61364c90"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m15.7/15.7 MB\u001b[0m \u001b[31m27.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m225.4/225.4 kB\u001b[0m \u001b[31m18.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.0/2.0 MB\u001b[0m \u001b[31m39.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m508.6/508.6 kB\u001b[0m \u001b[31m29.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m79.9/79.9 MB\u001b[0m \u001b[31m11.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m45.7/45.7 kB\u001b[0m \u001b[31m4.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m51.7/51.7 kB\u001b[0m \u001b[31m4.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m75.9/75.9 kB\u001b[0m \u001b[31m8.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.4/2.4 MB\u001b[0m \u001b[31m59.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m92.1/92.1 kB\u001b[0m \u001b[31m8.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m60.7/60.7 kB\u001b[0m \u001b[31m5.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m41.1/41.1 kB\u001b[0m \u001b[31m3.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.4/5.4 MB\u001b[0m \u001b[31m70.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.8/6.8 MB\u001b[0m \u001b[31m65.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m57.9/57.9 kB\u001b[0m \u001b[31m6.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m105.6/105.6 kB\u001b[0m \u001b[31m9.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m67.3/67.3 kB\u001b[0m \u001b[31m6.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[?25h  Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
+            "  Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
+            "  Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
+            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m698.9/698.9 kB\u001b[0m \u001b[31m41.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.6/1.6 MB\u001b[0m \u001b[31m64.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m67.6/67.6 kB\u001b[0m \u001b[31m7.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.1/3.1 MB\u001b[0m \u001b[31m71.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m71.5/71.5 kB\u001b[0m \u001b[31m7.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━���━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m76.9/76.9 kB\u001b[0m \u001b[31m8.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.3/58.3 kB\u001b[0m \u001b[31m6.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m46.0/46.0 kB\u001b[0m \u001b[31m4.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m50.8/50.8 kB\u001b[0m \u001b[31m5.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m341.4/341.4 kB\u001b[0m \u001b[31m30.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.4/3.4 MB\u001b[0m \u001b[31m73.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m59.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m130.2/130.2 kB\u001b[0m \u001b[31m13.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.4/49.4 kB\u001b[0m \u001b[31m5.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.8/86.8 kB\u001b[0m \u001b[31m10.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[?25h  Building wheel for pypika (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n"
+          ]
+        }
+      ],
+      "source": [
+        "!pip install -q llama-index==0.9.21 openai==1.6.0 tiktoken==0.5.2 chromadb==0.4.21 kaleido==0.2.1 python-multipart==0.0.6 cohere==4.39"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import os\n",
+        "\n",
+        "# Set the \"OPENAI_API_KEY\" in the Python environment. Will be used by OpenAI client later.\n",
+        "os.environ[\"OPENAI_API_KEY\"] = \"<YOUR_OPENAI_KEY>\""
+      ],
+      "metadata": {
+        "id": "riuXwpSPcvWC"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import nest_asyncio\n",
+        "\n",
+        "nest_asyncio.apply()"
+      ],
+      "metadata": {
+        "id": "jIEeZzqLbz0J"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# Load a Model"
+      ],
+      "metadata": {
+        "id": "Bkgi2OrYzF7q"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from llama_index.llms import OpenAI\n",
+        "\n",
+        "llm = OpenAI(temperature=0.9, model=\"gpt-3.5-turbo\", max_tokens=512)"
+      ],
+      "metadata": {
+        "id": "9oGT6crooSSj"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# Create a VectoreStore"
+      ],
+      "metadata": {
+        "id": "0BwVuJXlzHVL"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import chromadb\n",
+        "\n",
+        "# create client and a new collection\n",
+        "# chromadb.EphemeralClient saves data in-memory.\n",
+        "chroma_client = chromadb.PersistentClient(path=\"./mini-llama-articles\")\n",
+        "chroma_collection = chroma_client.create_collection(\"mini-llama-articles\")"
+      ],
+      "metadata": {
+        "id": "SQP87lHczHKc"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from llama_index.vector_stores import ChromaVectorStore\n",
+        "\n",
+        "# Define a storage context object using the created vector database.\n",
+        "vector_store = ChromaVectorStore(chroma_collection=chroma_collection)"
+      ],
+      "metadata": {
+        "id": "zAaGcYMJzHAN"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# Load the Dataset (CSV)"
+      ],
+      "metadata": {
+        "id": "I9JbAzFcjkpn"
+      }
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "## Download"
+      ],
+      "metadata": {
+        "id": "ceveDuYdWCYk"
+      }
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "The dataset includes several articles from the TowardsAI blog, which provide an in-depth explanation of the LLaMA2 model. Read the dataset as a long string."
+      ],
+      "metadata": {
+        "id": "eZwf6pv7WFmD"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "!wget https://raw.githubusercontent.com/AlaFalaki/tutorial_notebooks/main/data/mini-llama-articles.csv"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "wl_pbPvMlv1h",
+        "outputId": "02651edb-4a76-4bf4-e72f-92219f994292"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "--2024-02-06 19:59:09--  https://raw.githubusercontent.com/AlaFalaki/tutorial_notebooks/main/data/mini-llama-articles.csv\n",
+            "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.111.133, 185.199.110.133, ...\n",
+            "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n",
+            "HTTP request sent, awaiting response... 200 OK\n",
+            "Length: 173646 (170K) [text/plain]\n",
+            "Saving to: ‘mini-llama-articles.csv’\n",
+            "\n",
+            "\rmini-llama-articles   0%[                    ]       0  --.-KB/s               \rmini-llama-articles 100%[===================>] 169.58K  --.-KB/s    in 0.02s   \n",
+            "\n",
+            "2024-02-06 19:59:09 (7.18 MB/s) - ‘mini-llama-articles.csv’ saved [173646/173646]\n",
+            "\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "## Read File"
+      ],
+      "metadata": {
+        "id": "VWBLtDbUWJfA"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import csv\n",
+        "\n",
+        "rows = []\n",
+        "\n",
+        "# Load the file as a JSON\n",
+        "with open(\"./mini-llama-articles.csv\", mode=\"r\", encoding=\"utf-8\") as file:\n",
+        "  csv_reader = csv.reader(file)\n",
+        "\n",
+        "  for idx, row in enumerate( csv_reader ):\n",
+        "    if idx == 0: continue; # Skip header row\n",
+        "    rows.append( row )\n",
+        "\n",
+        "# The number of characters in the dataset.\n",
+        "len( rows )"
+      ],
+      "metadata": {
+        "id": "0Q9sxuW0g3Gd",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "b74eb24b-a956-404a-b343-4f961aca883f"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "14"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 19
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# Convert to Document obj"
+      ],
+      "metadata": {
+        "id": "S17g2RYOjmf2"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from llama_index import Document\n",
+        "\n",
+        "# Convert the chunks to Document objects so the LlamaIndex framework can process them.\n",
+        "documents = [Document(text=row[1], metadata={\"title\": row[0], \"url\": row[2], \"source_name\": row[3]}) for row in rows]"
+      ],
+      "metadata": {
+        "id": "YizvmXPejkJE"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# Transforming"
+      ],
+      "metadata": {
+        "id": "qjuLbmFuWsyl"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from llama_index.text_splitter import TokenTextSplitter\n",
+        "\n",
+        "# Define the splitter object that split the text into segments with 512 tokens,\n",
+        "# with a 128 overlap between the segments.\n",
+        "text_splitter = TokenTextSplitter(\n",
+        "    separator=\" \", chunk_size=512, chunk_overlap=128\n",
+        ")"
+      ],
+      "metadata": {
+        "id": "9z3t70DGWsjO"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from llama_index.extractors import (\n",
+        "    SummaryExtractor,\n",
+        "    QuestionsAnsweredExtractor,\n",
+        "    KeywordExtractor,\n",
+        ")\n",
+        "from llama_index.embeddings import OpenAIEmbedding\n",
+        "from llama_index.ingestion import IngestionPipeline\n",
+        "\n",
+        "# Create the pipeline to apply the transformation on each chunk,\n",
+        "# and store the transformed text in the chroma vector store.\n",
+        "pipeline = IngestionPipeline(\n",
+        "    transformations=[\n",
+        "        text_splitter,\n",
+        "        QuestionsAnsweredExtractor(questions=3, llm=llm),\n",
+        "        SummaryExtractor(summaries=[\"prev\", \"self\"], llm=llm),\n",
+        "        KeywordExtractor(keywords=10, llm=llm),\n",
+        "        OpenAIEmbedding(),\n",
+        "    ],\n",
+        "    vector_store=vector_store\n",
+        ")\n",
+        "\n",
+        "# Run the transformation pipeline.\n",
+        "nodes = pipeline.run(documents=documents, show_progress=True);"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 385,
+          "referenced_widgets": [
+            "7a469b6821ed458d99a1ed57e72b3d68",
+            "8c556c8c8ce941c6b433780fd4a6ae54",
+            "626b1ba98c374987913a7a4384f19fa1",
+            "a4fad4d11a8941f8b90abb3099e9a090",
+            "c3a4b958e4814294801495226697bce2",
+            "2e939db189424ab7b5f9095932f2c99f",
+            "fd6a36e947ec451a938d266117dab12e",
+            "e4413564a300469d86c3abc567f24701",
+            "64167ae99cd24c729435aefc1ea13519",
+            "2634e510d3c844d88891a98661beb6a9",
+            "6b3d2afb949f4de691ceac601bd96d0e",
+            "8cc800fbe6bc4f4da5dd6b93d4a5143a",
+            "812d5d9b04f74592b850b3eb32f88c04",
+            "ed22c91e813c4351ab1d3eb7e174796c",
+            "de2088a425104f05b52b7a3236c7baa9",
+            "6f9f666836084de7894aa2e65c8dbe07",
+            "63a3dcff335349deacf4abb9b68d76ab",
+            "99eb83f4b8904e20b45573bab84aa5f4",
+            "2c8aef5e8ec848c0a23c72581e5f4b1e",
+            "7d54abb8f3784a789fd042c2ed2dd685",
+            "a1a88448b188407b8e4aa2af86fb9345",
+            "6a4cc229f5774cb0b4d3def7eee8b56e"
+          ]
+        },
+        "id": "P9LDJ7o-Wsc-",
+        "outputId": "2e27e965-fd4c-4754-94f5-3a6e33a72dea"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "Parsing nodes:   0%|          | 0/14 [00:00<?, ?it/s]"
             ],
-            "layout": "IPY_MODEL_34fea76878874d67baae4946b8d9b1da"
-          }
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "7a469b6821ed458d99a1ed57e72b3d68"
+            }
+          },
+          "metadata": {}
         },
-        "bfda4d80ca4f4805be90772690d26fe0": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "464\n",
+            "452\n",
+            "457\n",
+            "465\n",
+            "448\n",
+            "468\n",
+            "434\n",
+            "447\n",
+            "455\n",
+            "445\n",
+            "449\n",
+            "455\n",
+            "431\n",
+            "453\n"
+          ]
         },
-        "c3a4b958e4814294801495226697bce2": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "100%|██████████| 108/108 [00:42<00:00,  2.57it/s]\n",
+            "100%|██████████| 108/108 [00:58<00:00,  1.85it/s]\n",
+            "100%|██████████| 108/108 [00:43<00:00,  2.51it/s]\n"
+          ]
         },
-        "c58ea3f8afc64b17a553aecfe07b375d": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HBoxModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HBoxModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HBoxView",
-            "box_style": "",
-            "children": [
-              "IPY_MODEL_da5417a69cb5466db258defea0a70f7c",
-              "IPY_MODEL_45a7725a8e8b45c1937eca9dffe650d3",
-              "IPY_MODEL_d33e03cfb6c340bf9c1d661e633afc2e"
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "Generating embeddings:   0%|          | 0/108 [00:00<?, ?it/s]"
             ],
-            "layout": "IPY_MODEL_6dc4da2c822c460ca0c2a11266806504"
-          }
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "8cc800fbe6bc4f4da5dd6b93d4a5143a"
+            }
+          },
+          "metadata": {}
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "len( nodes )"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
         },
-        "d33e03cfb6c340bf9c1d661e633afc2e": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HTMLModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_e301611efb2b4a19b08c13c76ceb8ab5",
-            "placeholder": "​",
-            "style": "IPY_MODEL_d39554575910469cb65078ea82c988b6",
-            "value": " 94/94 [00:03&lt;00:00, 26.26it/s]"
-          }
+        "id": "mPGa85hM2P3P",
+        "outputId": "c106c463-2459-4b11-bbae-5bd5e2246011"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "108"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 109
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Compress the vector store directory to a zip file to be able to download and use later.\n",
+        "!zip -r vectorstore.zip mini-llama-articles"
+      ],
+      "metadata": {
+        "id": "23x20bL3_jRb"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# Load Indexes"
+      ],
+      "metadata": {
+        "id": "OWaT6rL7ksp8"
+      }
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "If you have already uploaded the zip file for the vector store checkpoint, please uncomment the code in the following cell block to extract its contents. After doing so, you will be able to load the dataset from local storage."
+      ],
+      "metadata": {
+        "id": "xnShapZMdlqD"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# !unzip vectorstore.zip"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
         },
-        "d39554575910469cb65078ea82c988b6": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "DescriptionStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
+        "id": "SodY2Xpf_kxg",
+        "outputId": "d60906e8-d08c-4f80-fa30-006bcb732f0d"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Archive:  vectorstore.zip\n",
+            "   creating: mini-llama-articles/\n",
+            "   creating: mini-llama-articles/a361e92f-9895-41b6-ba72-4ad38e9875bd/\n",
+            "  inflating: mini-llama-articles/a361e92f-9895-41b6-ba72-4ad38e9875bd/data_level0.bin  \n",
+            "  inflating: mini-llama-articles/a361e92f-9895-41b6-ba72-4ad38e9875bd/header.bin  \n",
+            " extracting: mini-llama-articles/a361e92f-9895-41b6-ba72-4ad38e9875bd/link_lists.bin  \n",
+            "  inflating: mini-llama-articles/a361e92f-9895-41b6-ba72-4ad38e9875bd/length.bin  \n",
+            "  inflating: mini-llama-articles/chroma.sqlite3  \n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Load the vector store from the local storage.\n",
+        "db = chromadb.PersistentClient(path=\"./mini-llama-articles\")\n",
+        "chroma_collection = db.get_or_create_collection(\"mini-llama-articles\")\n",
+        "vector_store = ChromaVectorStore(chroma_collection=chroma_collection)"
+      ],
+      "metadata": {
+        "id": "mXi56KTXk2sp"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from llama_index import VectorStoreIndex\n",
+        "\n",
+        "# Create the index based on the vector store.\n",
+        "index = VectorStoreIndex.from_vector_store(vector_store)"
+      ],
+      "metadata": {
+        "id": "jKXURvLtkuTS"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# Query Dataset"
+      ],
+      "metadata": {
+        "id": "8JPD8yAinVSq"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Define a query engine that is responsible for retrieving related pieces of text,\n",
+        "# and using a LLM to formulate the final answer.\n",
+        "query_engine = index.as_query_engine()\n",
+        "\n",
+        "res = query_engine.query(\"How many parameters LLaMA2 model has?\")"
+      ],
+      "metadata": {
+        "id": "b0gue7cyctt1"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "res.response"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 53
         },
-        "da5417a69cb5466db258defea0a70f7c": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HTMLModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_b26896dfe0ba4779bf753602039ece5a",
-            "placeholder": "​",
-            "style": "IPY_MODEL_033ed4123cec43868ada3795d974d895",
-            "value": "Generating embeddings: 100%"
-          }
+        "id": "VKK3jMprctre",
+        "outputId": "93cfbd8f-d0ee-4070-b557-5ae1fff4aeeb"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "'The Llama 2 model is available in four different sizes: 7 billion, 13 billion, 34 billion, and 70 billion parameters.'"
+            ],
+            "application/vnd.google.colaboratory.intrinsic+json": {
+              "type": "string"
+            }
+          },
+          "metadata": {},
+          "execution_count": 13
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Show the retrieved nodes\n",
+        "for src in res.source_nodes:\n",
+        "  print(\"Node ID\\t\", src.node_id)\n",
+        "  print(\"Title\\t\", src.metadata['title'])\n",
+        "  print(\"Text\\t\", src.text)\n",
+        "  print(\"Score\\t\", src.score)\n",
+        "  print(\"-_\"*20)"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
         },
-        "dc498ad680d44d1e8e6fd2df2541a8ba": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "FloatProgressModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "FloatProgressModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "ProgressView",
-            "bar_style": "success",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_4211db3192514c8189db0430779d660a",
-            "max": 14,
-            "min": 0,
-            "orientation": "horizontal",
-            "style": "IPY_MODEL_9a5ad060a90c4f14ba05527fdcfe8a72",
-            "value": 14
-          }
+        "id": "465dH4yQc7Ct",
+        "outputId": "85af1ac6-4ece-4c84-ee1d-675cff3080ee"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Node ID\t d6f533e5-fef8-469c-a313-def19fd38efe\n",
+            "Title\t Meta's Llama 2: Revolutionizing Open Source Language Models for Commercial Use\n",
+            "Text\t I. Llama 2: Revolutionizing Commercial Use Unlike its predecessor Llama 1, which was limited to research use, Llama 2 represents a major advancement as an open-source commercial model. Businesses can now integrate Llama 2 into products to create AI-powered applications. Availability on Azure and AWS facilitates fine-tuning and adoption. However, restrictions apply to prevent exploitation. Companies with over 700 million active daily users cannot use Llama 2. Additionally, its output cannot be used to improve other language models.  II. Llama 2 Model Flavors Llama 2 is available in four different model sizes: 7 billion, 13 billion, 34 billion, and 70 billion parameters. While 7B, 13B, and 70B have already been released, the 34B model is still awaited. The pretrained variant, trained on a whopping 2 trillion tokens, boasts a context window of 4096 tokens, twice the size of its predecessor Llama 1. Meta also released a Llama 2 fine-tuned model for chat applications that was trained on over 1 million human annotations. Such extensive training comes at a cost, with the 70B model taking a staggering 1720320 GPU hours to train. The context window's length determines the amount of content the model can process at once, making Llama 2 a powerful language model in terms of scale and efficiency.  III. Safety Considerations: A Top Priority for Meta Meta's commitment to safety and alignment shines through in Llama 2's design. The model demonstrates exceptionally low AI safety violation percentages, surpassing even ChatGPT in safety benchmarks. Finding the right balance between helpfulness and safety when optimizing a model poses significant challenges. While a highly helpful model may be capable of answering any question, including sensitive ones like \"How do I build a bomb?\", it also raises concerns about potential misuse. Thus, striking the perfect equilibrium between providing useful information and ensuring safety is paramount. However, prioritizing safety to an extreme extent can lead to a model that struggles to effectively address a diverse range of questions. This limitation could hinder the model's practical applicability and user experience. Thus, achieving\n",
+            "Score\t 0.7078549032318474\n",
+            "-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_\n",
+            "Node ID\t 2f3b7c34-8fd0-4134-af38-ef1b77e32cd8\n",
+            "Title\t Meta's Llama 2: Revolutionizing Open Source Language Models for Commercial Use\n",
+            "Text\t The model demonstrates exceptionally low AI safety violation percentages, surpassing even ChatGPT in safety benchmarks. Finding the right balance between helpfulness and safety when optimizing a model poses significant challenges. While a highly helpful model may be capable of answering any question, including sensitive ones like \"How do I build a bomb?\", it also raises concerns about potential misuse. Thus, striking the perfect equilibrium between providing useful information and ensuring safety is paramount. However, prioritizing safety to an extreme extent can lead to a model that struggles to effectively address a diverse range of questions. This limitation could hinder the model's practical applicability and user experience. Thus, achieving an optimum balance that allows the model to be both helpful and safe is of utmost importance. To strike the right balance between helpfulness and safety, Meta employed two reward models - one for helpfulness and another for safety - to optimize the model's responses. The 34B parameter model has reported higher safety violations than other variants, possibly contributing to the delay in its release.  IV. Helpfulness Comparison: Llama 2 Outperforms Competitors Llama 2 emerges as a strong contender in the open-source language model arena, outperforming its competitors in most categories. The 70B parameter model outperforms all other open-source models, while the 7B and 34B models outshine Falcon in all categories and MPT in all categories except coding. Despite being smaller, Llam a2's performance rivals that of Chat GPT 3.5, a significantly larger closed-source model. While GPT 4 and PalM-2-L, with their larger size, outperform Llama 2, this is expected due to their capacity for handling complex language tasks. Llama 2's impressive ability to compete with larger models highlights its efficiency and potential in the market. However, Llama 2 does face challenges in coding and math problems, where models like Chat GPT 4 excel, given their significantly larger size. Chat GPT 4 performed significantly better than Llama 2 for coding (HumanEval benchmark)and math problem tasks (GSM8k benchmark). Open-source AI technologies, like Llama 2, continue to advance, offering\n",
+            "Score\t 0.7026792232112851\n",
+            "-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "### Trying a different Query"
+      ],
+      "metadata": {
+        "id": "GrqBq8Dfidw6"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "res = query_engine.query(\"Does GQA helped LLaMA performance?\")"
+      ],
+      "metadata": {
+        "id": "MMBQJcPaigA0"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "res.response"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 53
         },
-        "de2088a425104f05b52b7a3236c7baa9": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "HTMLModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_a1a88448b188407b8e4aa2af86fb9345",
-            "placeholder": "​",
-            "style": "IPY_MODEL_6a4cc229f5774cb0b4d3def7eee8b56e",
-            "value": " 108/108 [00:04&lt;00:00, 22.53it/s]"
-          }
+        "id": "N2QbpT0skT75",
+        "outputId": "c80a09e3-2d1b-464b-bb3e-547c23571b34"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "'No, the context does not provide any information about whether GQA (Generalized Question Answering) helped improve the performance of LLaMA.'"
+            ],
+            "application/vnd.google.colaboratory.intrinsic+json": {
+              "type": "string"
+            }
+          },
+          "metadata": {},
+          "execution_count": 68
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "for src in res.source_nodes:\n",
+        "  print(\"Node ID\\t\", src.node_id)\n",
+        "  print(\"Title\\t\", src.metadata['title'])\n",
+        "  print(\"Text\\t\", src.text)\n",
+        "  print(\"Score\\t\", src.score)\n",
+        "  print(\"-_\"*20)"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
         },
-        "e301611efb2b4a19b08c13c76ceb8ab5": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
+        "id": "f9HPdfMjqsbQ",
+        "outputId": "8ac496a2-90ff-490f-d67c-46ff544faa39"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Node ID\t 1fd37a6f-bf45-4b03-ae54-95f4c84796cb\n",
+            "Title\t Exploring Large Language Models -Part 3\n",
+            "Text\t and then using that to create a QA dataset was not effective. This is where the Self-instruct concept could be used However previous to Llama2, the best-performing model was the GPT 3/4 model via ChatGPT or its API and using these models to do the same was expensive. The 7 billion model of Llama2 has sufficient NLU (Natural Language Understanding) to create output based on a particular format. Running this in 4-bit mode via Quantisation makes it feasible compute-wise to run this on a large data set and convert it to a QA dataset. This was the prompt used. The context was a sliding window from the text dataset. Some minimal parsing and finetuning were done on the output of the model, and we could generate a QA dataset of the format below. This was fed to the QLoRA-based fine-tuning (Colab Notebook). We can see that the output from a fine-tuned 4-bit quantized llama2 7 B model is pretty good. Colab Notebook Trying to reduce hallucination via fine-tuning In the generated dataset, I added a specific tag `Source:8989REF`. The idea was that via attention, this token will be somehow associated with the text that we were training on. And then to use this hash somehow to tweak the prompt to control hallucination. Something like \"[INST] <<SYS>>\\nYou are a helpful Question Answering Assistant. Please only answer from this reference Source:8989REF\" However, that turned out to be a very naive attempt. Also, note that the generated QA missed transforming training data related to Professor Thiersch's method to a proper QA dataset. These and other improvements need to be experimented with, as well as to train with some completely new data that the model has not seen to test more effectively. Update: Training with new data was done by writing an imaginary story with ChatGPT help and then creating an instruction tuning data set (colab notebook). The model was then trained and tested (colab notebook) with this generated instruct dataset. The results confirm that the model learns via Instruct tuning, not only the fed questions but other details and relations of the domain. Problems with hallucinations remain (Bordor, Lila characters who are\n",
+            "Score\t 0.7046179109299758\n",
+            "-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_\n",
+            "Node ID\t 6906e3b8-4c42-453c-9b60-9f5e4b1d3304\n",
+            "Title\t LLaMA by Meta leaked by an anonymous forum: Questions Arises on Meta\n",
+            "Text\t LLaMA: Meta's new AI tool According to the official release, LLaMA is a foundational language model developed to assist 'researchers and academics' in their work (as opposed to the average web user) to understand and study these NLP models. Leveraging AI in such a way could give researchers an edge in terms of time spent. You may not know this, but this would be Meta's third LLM after Blender Bot 3 and Galactica. However, the two LLMs were shut down soon, and Meta stopped their further development, as it produced erroneous results. Before moving further, it is important to emphasize that LLaMA is NOT a chatbot like ChatGPT. As I mentioned before, it is a 'research tool' for researchers. We can expect the initial versions of LLaMA to be a bit more technical and indirect to use as opposed to the case with ChatGPT, which was very direct, interactive, and a lot easy to use. \"Smaller, more performant models such as LLaMA enable ... research community who don't have access to large amounts of infrastructure to study these models.. further democratizing access in this important, fast-changing field,\" said Meta in its official blog. Meta's effort of \"democratizing\" access to the public could shed light on one of the critical issues of Generative AI - toxicity and bias. ChatGPT and other LLMs (obviously, I am referring to Bing) have a track record of responding in a way that is toxic and, well... evil. The Verge and major critics have covered it in much detail. Oh and the community did get the access, but not in the way Meta anticipated. On March 3rd, a downloadable torrent of the LLaMA system was posted on 4chan. 4chan is an anonymous online forum known for its controversial content and diverse range of discussions, which has nearly 222 million unique monthly visitors. LLaMA is currently not in use on any of Meta's products. But Meta has plans to make it available to researchers before they can use them in their own products. It's worth mentioning that Meta did not release\n",
+            "Score\t 0.6920492401808848\n",
+            "-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "From the articles:\n",
+        "  \n",
+        "> [...]The 7 billion model of Llama2 has sufficient NLU (Natural Language Understanding) to create output based on a particular format[...]\n",
+        "\n"
+      ],
+      "metadata": {
+        "id": "TmkI8BV8rATi"
+      }
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# No Metadata"
+      ],
+      "metadata": {
+        "id": "6Wx-IPSMbSwC"
+      }
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "Now, let's evaluate the ability of the query engine independently of the generated metadata, like keyword extraction or summarization."
+      ],
+      "metadata": {
+        "id": "h8QUcGEgeNsD"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from llama_index import Document\n",
+        "\n",
+        "documents_no_meta = [Document(text=row[1]) for row in rows]"
+      ],
+      "metadata": {
+        "id": "oGunPKGRbT6H"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from llama_index.embeddings import OpenAIEmbedding\n",
+        "from llama_index.ingestion import IngestionPipeline\n",
+        "\n",
+        "pipeline = IngestionPipeline(\n",
+        "    transformations=[\n",
+        "        text_splitter,\n",
+        "        OpenAIEmbedding(),\n",
+        "    ]\n",
+        ")\n",
+        "\n",
+        "nodes_no_meta = pipeline.run(documents=documents_no_meta, show_progress=True)"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 331,
+          "referenced_widgets": [
+            "b10233c49dcc4a2f89de5389309d4fb4",
+            "c617a0bc420b453693bb697a235e50d7",
+            "f14f74d98f824013b562c82fb251ac26",
+            "19f8baa6c24e4c7a8888f73f3cb7e3f8",
+            "19c0bf2b745640b3adf6478738ba02ea",
+            "0258a4a4bdc24404aa005c3b4d1235ee",
+            "8da878f475de494fac3f7acf29e4e7f0",
+            "dc5b9ea6aeea42dfae978e4a8961b03a",
+            "aefce46940904fce9c4e439784cbc28c",
+            "dcfeadeb1cc2483399e8194ec43f2eee",
+            "7cec42608a51413796ec41250e0eed6d",
+            "036ae37776684a46a1a1f9e3c018a87e",
+            "de5e18d6629d4cd0abf9e5c72d07ac73",
+            "29ff5f2d9c114e8bb1b7461dbae2fdb8",
+            "0f79e4f5836f4ebf80af47c8e100b012",
+            "99a5712bb6b64f68b30b9a1dbbc803fb",
+            "24fe3fb4e04546b3a17377d3e6ff61d6",
+            "931b9be975234aa79ae55aa12629f661",
+            "63bf1ccee3ad4101920f74bb2410bfe6",
+            "8f353fecd64a4e18be6fe2eb4fea3f9d",
+            "d190edde40f04461ba066bc7f10b9d31",
+            "3114d5176097487bb1313cd49867680f"
+          ]
         },
-        "e31244d1c2b345a9950de74aac576290": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "DescriptionStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
+        "id": "Hxf4jT6afiZt",
+        "outputId": "48b34670-17cf-494f-9d39-58ae9c47822a"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "Parsing nodes:   0%|          | 0/14 [00:00<?, ?it/s]"
+            ],
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "b10233c49dcc4a2f89de5389309d4fb4"
+            }
+          },
+          "metadata": {}
         },
-        "e4413564a300469d86c3abc567f24701": {
-          "model_module": "@jupyter-widgets/base",
-          "model_module_version": "1.2.0",
-          "model_name": "LayoutModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "510\n",
+            "510\n",
+            "510\n",
+            "510\n",
+            "510\n",
+            "510\n",
+            "510\n",
+            "510\n",
+            "510\n",
+            "510\n",
+            "510\n",
+            "510\n",
+            "510\n",
+            "510\n"
+          ]
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "Generating embeddings:   0%|          | 0/94 [00:00<?, ?it/s]"
+            ],
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "036ae37776684a46a1a1f9e3c018a87e"
+            }
+          },
+          "metadata": {}
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from llama_index import ServiceContext\n",
+        "\n",
+        "index_no_metadata = VectorStoreIndex(\n",
+        "    nodes=nodes_no_meta,\n",
+        "    service_context=ServiceContext.from_defaults(llm=OpenAI(model=\"gpt-3.5-turbo\")),\n",
+        ")"
+      ],
+      "metadata": {
+        "id": "A39Y1Rv6fiXE"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "query_engine_no_metadata = index_no_metadata.as_query_engine()"
+      ],
+      "metadata": {
+        "id": "BOpdZdQufiUu"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "res = query_engine_no_metadata.query(\"Does GQA helped LLaMA performance?\")"
+      ],
+      "metadata": {
+        "id": "2U2NIE2Yfz8E"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "res.response"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 125
         },
-        "ed22c91e813c4351ab1d3eb7e174796c": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "FloatProgressModel",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "FloatProgressModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "ProgressView",
-            "bar_style": "success",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_2c8aef5e8ec848c0a23c72581e5f4b1e",
-            "max": 108,
-            "min": 0,
-            "orientation": "horizontal",
-            "style": "IPY_MODEL_7d54abb8f3784a789fd042c2ed2dd685",
-            "value": 108
-          }
+        "id": "mxT7_IJ7f1gU",
+        "outputId": "1453e5c3-2637-4d33-f958-832723fd7bea"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "'Yes, GQA (Good Quality Answer) helped improve the performance of LLaMA (Large Language Model). The context mentions that one of the hardest problems in training LLaMA is finding or creating a good quality dataset. In this case, the available training dataset was converted to a QA (Question Answering) format, which was then used to fine-tune the LLaMA model. This fine-tuning process, along with the use of QLoRa (Quantization and LoRa), resulted in a good performance of the 4-bit quantized LLaMA model. Therefore, it can be inferred that GQA, in the form of a QA dataset, played a role in improving the performance of LLaMA.'"
+            ],
+            "application/vnd.google.colaboratory.intrinsic+json": {
+              "type": "string"
+            }
+          },
+          "metadata": {},
+          "execution_count": 65
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "for src in res.source_nodes:\n",
+        "  print(\"Node ID\\t\", src.node_id)\n",
+        "  print(\"Text\\t\", src.text)\n",
+        "  print(\"Score\\t\", src.score)\n",
+        "  print(\"-_\"*20)"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
         },
-        "fd6a36e947ec451a938d266117dab12e": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_module_version": "1.5.0",
-          "model_name": "DescriptionStyleModel",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
+        "id": "GD5SQ7VEf2wR",
+        "outputId": "b31499f2-fdb9-41e3-ca93-ccdfced3209f"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Node ID\t 7ba16ba4-1088-4296-ae94-f40e274c66f5\n",
+            "Text\t 137B parameters and perform instruction tuning ... Since we use QLoRa we are effectively closely following this paper - QLORA: Efficient Finetuning of Quantized LLMs concerning the training data set, the format that the authors used to train their Gauanco model This is the format for the Llama2 model and will be different for others. One of the hardest problems of training is finding or creating a good quality data set to train. In our case, converting the available training data set to the instruction data set. Since our use case is Closed Book QA, we need to convert this to a QA format. Using older NLP methods like NER (Named Entity Recognition) and then using that to create a QA dataset was not effective. This is where the Self-instruct concept could be used However previous to Llama2, the best-performing model was the GPT 3/4 model via ChatGPT or its API and using these models to do the same was expensive. The 7 billion model of Llama2 has sufficient NLU (Natural Language Understanding) to create output based on a particular format. Running this in 4-bit mode via Quantisation makes it feasible compute-wise to run this on a large data set and convert it to a QA dataset. This was the prompt used. The context was a sliding window from the text dataset. Some minimal parsing and finetuning were done on the output of the model, and we could generate a QA dataset of the format below. This was fed to the QLoRA-based fine-tuning (Colab Notebook). We can see that the output from a fine-tuned 4-bit quantized llama2 7 B model is pretty good. Colab Notebook Trying to reduce hallucination via fine-tuning In the generated dataset, I added a specific tag `Source:8989REF`. The idea was that via attention, this token will be somehow associated with the text that we were training on. And then to use this hash somehow to tweak the prompt to control hallucination. Something like \"[INST] <<SYS>>\\nYou are a helpful Question Answering Assistant. Please only answer from this reference Source:8989REF\" However, that turned out to be a very naive attempt. Also, note that the generated QA missed transforming training data related to Professor Thiersch's method to a proper QA dataset. These and other improvements need to be experimented with, as well as to train with some completely new data that the model has not seen\n",
+            "Score\t 0.8218537826347032\n",
+            "-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_\n",
+            "Node ID\t fb383f04-a605-4e26-87dc-8fd5d6090334\n",
+            "Text\t run the 7 billion Lamma2 pre-trained model open-sourced recently by Meta Research. Imagine the compressed knowledge and an NLU (Natural Language Understanding) model running on your local laptop. This is still a smallish model, but it's still capable of understanding and has sufficient world knowledge embedded in it to be quite useful. Imagine what a model like this or better models in the future could do if it could run in small servers or in cars, and leverage its causal reasoning and world model knowledge to supervise lower-level/specialist AI/ML systems. So we have now a way to fit reasonably large models (7B or more) in a single GPU, via Quantisation and then train them in a parameter-efficient way via LoRa/QLoRa. Take 1: Un-supervised Training Fine-tuning with QLoRa Using the small training data and QLoRA, I first tried to train a large 7B Lamma2 model by feeding in the training text as is (Causal LM model training via UnSupervised learning). Note that this model was loaded in 4-bit, making it runnable on a single T4 GPU and trained with QLoRa. With QLoRA, only a fraction of the adapter weights are trained and summed with the existing frozen pre-trained weights of the model during inference. Here is an illustrative Colab notebook. You can see that training the model with just the text as is, does not result in proper output to questions. The answers are not affected by the training data. Take 2: Instruct Fine-tuning with QLoRa Instruction Tuning concept is a higher-level training concept introduced by this paper FineTuned Language Models Are Zero shot Learners (FLAN) We leverage the intuition that NLP tasks can be described via natural language instructions, such as \"Is the sentiment of this movie review positive or negative?\" or \"Translate 'how are you' into Chinese.\" We take a pre-trained language model of 137B parameters and perform instruction tuning ... Since we use QLoRa we are effectively closely following this paper - QLORA: Efficient Finetuning of Quantized LLMs concerning the training data set, the format that the authors used to train their Gauanco model This is the format for the Llama2 model and will be different for others. One of the hardest problems of training is finding or creating a good quality data set to train. In our case, converting the available training data set to the instruction data set.\n",
+            "Score\t 0.8203676280171278\n",
+            "-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_\n"
+          ]
         }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# Evaluate"
+      ],
+      "metadata": {
+        "id": "iMkpzH7vvb09"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from llama_index.evaluation import generate_question_context_pairs\n",
+        "from llama_index.llms import OpenAI\n",
+        "\n",
+        "# Create questions for each segment. These questions will be used to\n",
+        "# assess whether the retriever can accurately identify and return the\n",
+        "# corresponding segment when queried.\n",
+        "llm = OpenAI(model=\"gpt-3.5-turbo\")\n",
+        "rag_eval_dataset = generate_question_context_pairs(\n",
+        "    nodes,\n",
+        "    llm=llm,\n",
+        "    num_questions_per_chunk=1\n",
+        ")\n",
+        "\n",
+        "# We can save the evaluation dataset as a json file for later use.\n",
+        "rag_eval_dataset.save_json(\"./rag_eval_dataset.json\")"
+      ],
+      "metadata": {
+        "id": "H8a3eKgKvckU"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "If you have uploaded the generated question JSON file, please uncomment the code in the next cell block. This will avoid the need to generate the questions manually, saving you time and effort."
+      ],
+      "metadata": {
+        "id": "eNP3cmiOe_xS"
       }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# from llama_index.finetuning.embeddings.common import (\n",
+        "#     EmbeddingQAFinetuneDataset,\n",
+        "# )\n",
+        "# rag_eval_dataset = EmbeddingQAFinetuneDataset.from_json(\n",
+        "#     \"./rag_eval_dataset.json\"\n",
+        "# )"
+      ],
+      "metadata": {
+        "id": "3sA1K84U254o"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import pandas as pd\n",
+        "\n",
+        "#  A simple function to show the evaluation result.\n",
+        "def display_results_retriever(name, eval_results):\n",
+        "    \"\"\"Display results from evaluate.\"\"\"\n",
+        "\n",
+        "    metric_dicts = []\n",
+        "    for eval_result in eval_results:\n",
+        "        metric_dict = eval_result.metric_vals_dict\n",
+        "        metric_dicts.append(metric_dict)\n",
+        "\n",
+        "    full_df = pd.DataFrame(metric_dicts)\n",
+        "\n",
+        "    hit_rate = full_df[\"hit_rate\"].mean()\n",
+        "    mrr = full_df[\"mrr\"].mean()\n",
+        "\n",
+        "    metric_df = pd.DataFrame(\n",
+        "        {\"Retriever Name\": [name], \"Hit Rate\": [hit_rate], \"MRR\": [mrr]}\n",
+        "    )\n",
+        "\n",
+        "    return metric_df"
+      ],
+      "metadata": {
+        "id": "H7ubvcbk27vr"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from llama_index.evaluation import RetrieverEvaluator\n",
+        "\n",
+        "# We can evaluate the retievers with different top_k values.\n",
+        "for i in [2, 4, 6, 8, 10]:\n",
+        "    retriever = index.as_retriever(similarity_top_k=i)\n",
+        "    retriever_evaluator = RetrieverEvaluator.from_metric_names(\n",
+        "        [\"mrr\", \"hit_rate\"], retriever=retriever\n",
+        "    )\n",
+        "    eval_results = await retriever_evaluator.aevaluate_dataset(rag_eval_dataset)\n",
+        "    print(display_results_retriever(f\"Retriever top_{i}\", eval_results))"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "uNLxDxoc2-Ac",
+        "outputId": "4084d5d0-21b6-4f0e-aec3-4aab1c8c8c44"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "    Retriever Name  Hit Rate       MRR\n",
+            "0  Retriever top_2  0.650589  0.538049\n",
+            "    Retriever Name  Hit Rate       MRR\n",
+            "0  Retriever top_4  0.765273  0.572615\n",
+            "    Retriever Name  Hit Rate      MRR\n",
+            "0  Retriever top_6   0.81672  0.58278\n",
+            "    Retriever Name  Hit Rate       MRR\n",
+            "0  Retriever top_8  0.846731  0.586084\n",
+            "     Retriever Name  Hit Rate       MRR\n",
+            "0  Retriever top_10  0.861736  0.587795\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from llama_index.evaluation import RelevancyEvaluator, FaithfulnessEvaluator, BatchEvalRunner\n",
+        "from llama_index import ServiceContext\n",
+        "from llama_index.llms import OpenAI\n",
+        "\n",
+        "for i in [2, 4, 6, 8, 10]:\n",
+        "    # Set Faithfulness and Relevancy evaluators\n",
+        "    query_engine = index.as_query_engine(similarity_top_k=i)\n",
+        "\n",
+        "    # While we use GPT3.5-Turbo to answer questions, we can use GPT4 to evaluate the answers.\n",
+        "    llm_gpt4 = OpenAI(temperature=0, model=\"gpt-4-1106-preview\")\n",
+        "    service_context_gpt4 = ServiceContext.from_defaults(llm=llm_gpt4)\n",
+        "\n",
+        "    faithfulness_evaluator = FaithfulnessEvaluator(service_context=service_context_gpt4)\n",
+        "    relevancy_evaluator = RelevancyEvaluator(service_context=service_context_gpt4)\n",
+        "\n",
+        "    # Run evaluation\n",
+        "    queries = list(rag_eval_dataset.queries.values())\n",
+        "    batch_eval_queries = queries[:20]\n",
+        "\n",
+        "    runner = BatchEvalRunner(\n",
+        "    {\"faithfulness\": faithfulness_evaluator, \"relevancy\": relevancy_evaluator},\n",
+        "    workers=8,\n",
+        "    )\n",
+        "    eval_results = await runner.aevaluate_queries(\n",
+        "        query_engine, queries=batch_eval_queries\n",
+        "    )\n",
+        "    faithfulness_score = sum(result.passing for result in eval_results['faithfulness']) / len(eval_results['faithfulness'])\n",
+        "    print(f\"top_{i} faithfulness_score: {faithfulness_score}\")\n",
+        "\n",
+        "    relevancy_score = sum(result.passing for result in eval_results['faithfulness']) / len(eval_results['relevancy'])\n",
+        "    print(f\"top_{i} relevancy_score: {relevancy_score}\")\n",
+        "    print(\"-_\"*10)"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "3ukkWC9R2_0J",
+        "outputId": "ccde96d4-e431-4f9a-f83c-63678de56a93"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "top_2 faithfulness_score: 1.0\n",
+            "top_2 relevancy_score: 1.0\n",
+            "-_-_-_-_-_-_-_-_-_-_\n",
+            "top_4 faithfulness_score: 1.0\n",
+            "top_4 relevancy_score: 1.0\n",
+            "-_-_-_-_-_-_-_-_-_-_\n",
+            "top_6 faithfulness_score: 1.0\n",
+            "top_6 relevancy_score: 1.0\n",
+            "-_-_-_-_-_-_-_-_-_-_\n",
+            "top_8 faithfulness_score: 1.0\n",
+            "top_8 relevancy_score: 1.0\n",
+            "-_-_-_-_-_-_-_-_-_-_\n",
+            "top_10 faithfulness_score: 1.0\n",
+            "top_10 relevancy_score: 1.0\n",
+            "-_-_-_-_-_-_-_-_-_-_\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [],
+      "metadata": {
+        "id": "1MB1YD1E3EKM"
+      },
+      "execution_count": null,
+      "outputs": []
     }
-  },
-  "nbformat": 4,
-  "nbformat_minor": 0
-}
+  ]
+}
\ No newline at end of file