{
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "colab_type": "text",
        "id": "view-in-github"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/github/towardsai/ai-tutor-rag-system/blob/main/notebooks/Advanced_Retriever.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "UwtfgR2TAiLM"
      },
      "source": [
        "# Install Packages and Setup Variables\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 1,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "sbO5PUR3AL-i",
        "outputId": "84609394-7c68-4a5b-e00a-ae8ac09a1bb9"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m226.7/226.7 kB\u001b[0m \u001b[31m2.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.8/1.8 MB\u001b[0m \u001b[31m11.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m15.4/15.4 MB\u001b[0m \u001b[31m17.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.0/2.0 MB\u001b[0m \u001b[31m45.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m75.6/75.6 kB\u001b[0m \u001b[31m5.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m526.8/526.8 kB\u001b[0m \u001b[31m27.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.4/2.4 MB\u001b[0m \u001b[31m53.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m92.0/92.0 kB\u001b[0m \u001b[31m6.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.4/62.4 kB\u001b[0m \u001b[31m4.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m41.3/41.3 kB\u001b[0m \u001b[31m2.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.8/6.8 MB\u001b[0m \u001b[31m52.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m59.9/59.9 kB\u001b[0m \u001b[31m3.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m107.0/107.0 kB\u001b[0m \u001b[31m10.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m67.3/67.3 kB\u001b[0m \u001b[31m1.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25h  Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
            "  Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
            "  Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m283.7/283.7 kB\u001b[0m \u001b[31m13.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.6/1.6 MB\u001b[0m \u001b[31m40.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m67.6/67.6 kB\u001b[0m \u001b[31m4.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m142.5/142.5 kB\u001b[0m \u001b[31m1.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m77.9/77.9 kB\u001b[0m \u001b[31m6.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.3/58.3 kB\u001b[0m \u001b[31m4.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m141.9/141.9 kB\u001b[0m \u001b[31m7.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m290.4/290.4 kB\u001b[0m \u001b[31m13.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m71.9/71.9 kB\u001b[0m \u001b[31m6.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m53.6/53.6 kB\u001b[0m \u001b[31m4.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m46.0/46.0 kB\u001b[0m \u001b[31m4.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m52.5/52.5 kB\u001b[0m \u001b[31m4.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m130.5/130.5 kB\u001b[0m \u001b[31m13.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m341.4/341.4 kB\u001b[0m \u001b[31m23.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.4/3.4 MB\u001b[0m \u001b[31m64.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.2/1.2 MB\u001b[0m \u001b[31m52.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m130.2/130.2 kB\u001b[0m \u001b[31m11.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.3/49.3 kB\u001b[0m \u001b[31m4.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m307.7/307.7 kB\u001b[0m \u001b[31m20.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m47.2/47.2 kB\u001b[0m \u001b[31m3.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.8/86.8 kB\u001b[0m \u001b[31m5.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25h  Building wheel for pypika (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
            "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
            "spacy 3.7.4 requires typer<0.10.0,>=0.3.0, but you have typer 0.12.3 which is incompatible.\n",
            "weasel 0.3.4 requires typer<0.10.0,>=0.3.0, but you have typer 0.12.3 which is incompatible.\u001b[0m\u001b[31m\n",
            "\u001b[0m"
          ]
        }
      ],
      "source": [
        "!pip install -q llama-index==0.10.49 llama-index-vector-stores-chroma==0.1.9 llama-index-llms-gemini==0.1.11 google-generativeai==0.5.4 openai==1.35.3 chromadb==0.5.3"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 2,
      "metadata": {
        "id": "39OAU5OlByI0"
      },
      "outputs": [],
      "source": [
        "import os\n",
        "\n",
        "# Set the following API Keys in the Python environment. Will be used later.\n",
        "os.environ[\"OPENAI_API_KEY\"] = \"[OPENAI_API_KEY]\"\n",
        "os.environ[\"GOOGLE_API_KEY\"] = \"<YOUR_API_KEY>\""
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "B2UvE-i9Nzon"
      },
      "source": [
        "# Create a Vector Store\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 3,
      "metadata": {
        "id": "O2haexSAByDD"
      },
      "outputs": [],
      "source": [
        "import chromadb\n",
        "\n",
        "# create client and a new collection\n",
        "# chromadb.EphemeralClient saves data in-memory.\n",
        "chroma_client = chromadb.PersistentClient(path=\"./mini-llama-articles\")\n",
        "chroma_collection = chroma_client.create_collection(\"mini-llama-articles\")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 4,
      "metadata": {
        "id": "OHO6a-zaBxeG"
      },
      "outputs": [],
      "source": [
        "from llama_index.vector_stores.chroma import ChromaVectorStore\n",
        "from llama_index.core.storage.storage_context import StorageContext\n",
        "\n",
        "# Define a storage context object using the created vector database.\n",
        "vector_store = ChromaVectorStore(chroma_collection=chroma_collection)\n",
        "storage_context = StorageContext.from_defaults(vector_store=vector_store)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "hZz9_ZYNN4Kv"
      },
      "source": [
        "# Load the Dataset (CSV)\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "uvOjzNNAN4wg"
      },
      "source": [
        "## Download\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "z5jGj4cRN7ou"
      },
      "source": [
        "The dataset includes several articles from the TowardsAI blog, which provide an in-depth explanation of the LLaMA2 model. Read the dataset as a long string.\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 5,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "x4llz2lHN2ij",
        "outputId": "d0cd17b8-eca9-45f0-ae14-846ab0d624e0"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "--2024-06-03 22:16:45--  https://raw.githubusercontent.com/AlaFalaki/tutorial_notebooks/main/data/mini-llama-articles.csv\n",
            "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n",
            "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n",
            "HTTP request sent, awaiting response... 200 OK\n",
            "Length: 173646 (170K) [text/plain]\n",
            "Saving to: ‘mini-llama-articles.csv’\n",
            "\n",
            "\rmini-llama-articles   0%[                    ]       0  --.-KB/s               \rmini-llama-articles 100%[===================>] 169.58K  --.-KB/s    in 0.03s   \n",
            "\n",
            "2024-06-03 22:16:45 (5.09 MB/s) - ‘mini-llama-articles.csv’ saved [173646/173646]\n",
            "\n"
          ]
        }
      ],
      "source": [
        "!wget https://raw.githubusercontent.com/AlaFalaki/tutorial_notebooks/main/data/mini-llama-articles.csv"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "V-ezlgFaN-5u"
      },
      "source": [
        "# Read File\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 6,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "_M-0-D4fN2fc",
        "outputId": "1bfc497f-0653-4231-86c9-cfeff34e2182"
      },
      "outputs": [
        {
          "data": {
            "text/plain": [
              "14"
            ]
          },
          "execution_count": 6,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "import csv\n",
        "\n",
        "rows = []\n",
        "\n",
        "# Load the file as a JSON\n",
        "with open(\"./mini-llama-articles.csv\", mode=\"r\", encoding=\"utf-8\") as file:\n",
        "    csv_reader = csv.reader(file)\n",
        "\n",
        "    for idx, row in enumerate(csv_reader):\n",
        "        if idx == 0:\n",
        "            continue\n",
        "            # Skip header row\n",
        "        rows.append(row)\n",
        "\n",
        "# The number of characters in the dataset.\n",
        "len(rows)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "PBimOJVwOCjl"
      },
      "source": [
        "# Convert to Document obj\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 7,
      "metadata": {
        "id": "Ie--Y_3wN2c8"
      },
      "outputs": [],
      "source": [
        "from llama_index.core.schema import Document\n",
        "\n",
        "# Convert the chunks to Document objects so the LlamaIndex framework can process them.\n",
        "documents = [\n",
        "    Document(\n",
        "        text=row[1], metadata={\"title\": row[0], \"url\": row[2], \"source_name\": row[3]}\n",
        "    )\n",
        "    for row in rows\n",
        "]"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "lqQpen6bOEza"
      },
      "source": [
        "# Transforming\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 8,
      "metadata": {
        "id": "zVBkAg6eN2an"
      },
      "outputs": [],
      "source": [
        "from llama_index.core.node_parser import SentenceWindowNodeParser\n",
        "\n",
        "# create the sentence window node parser\n",
        "node_parser = SentenceWindowNodeParser.from_defaults(\n",
        "    window_size=3,\n",
        "    include_metadata=True,\n",
        "    window_metadata_key=\"window\",\n",
        "    original_text_metadata_key=\"original_text\",\n",
        ")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 9,
      "metadata": {
        "id": "KiDwIXFxN2YK"
      },
      "outputs": [],
      "source": [
        "nodes = node_parser.get_nodes_from_documents(documents)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 10,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "f1aZ4wYVN2V1",
        "outputId": "e3ef377a-a195-44e3-a67a-554fcff29e67"
      },
      "outputs": [
        {
          "data": {
            "text/plain": [
              "TextNode(id_='20a4754c-3ab9-4d64-9aa3-e1379c37074e', embedding=None, metadata={'window': \"LLM Variants and Meta's Open Source Before shedding light on four major trends, I'd share the latest Meta's Llama 2 and Code Llama.  Meta's Llama 2 represents a sophisticated evolution in LLMs.  This suite spans models pretrained and fine-tuned across a parameter spectrum of 7 billion to 70 billion.  A specialized derivative, Llama 2-Chat, has been engineered explicitly for dialogue-centric applications. \", 'original_text': \"LLM Variants and Meta's Open Source Before shedding light on four major trends, I'd share the latest Meta's Llama 2 and Code Llama. \", 'title': \"Beyond GPT-4: What's New?\", 'url': 'https://pub.towardsai.net/beyond-gpt-4-whats-new-cbd61a448eb9#dda8', 'source_name': 'towards_ai'}, excluded_embed_metadata_keys=['window', 'original_text'], excluded_llm_metadata_keys=['window', 'original_text'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='1773f54a-0742-41dd-a645-ba7c07ff8f75', node_type=<ObjectType.DOCUMENT: '4'>, metadata={'title': \"Beyond GPT-4: What's New?\", 'url': 'https://pub.towardsai.net/beyond-gpt-4-whats-new-cbd61a448eb9#dda8', 'source_name': 'towards_ai'}, hash='3b095b0e25cdf965d950cdbd7feb8024030e7645998c1a33dc4427affca624ab'), <NodeRelationship.NEXT: '3'>: RelatedNodeInfo(node_id='1ac96425-5144-4897-9f7b-182156d3470c', node_type=<ObjectType.TEXT: '1'>, metadata={'window': \"LLM Variants and Meta's Open Source Before shedding light on four major trends, I'd share the latest Meta's Llama 2 and Code Llama.  Meta's Llama 2 represents a sophisticated evolution in LLMs.  This suite spans models pretrained and fine-tuned across a parameter spectrum of 7 billion to 70 billion.  A specialized derivative, Llama 2-Chat, has been engineered explicitly for dialogue-centric applications.  Benchmarking revealed Llama 2's superior performance over most extant open-source chat models. \", 'original_text': \"Meta's Llama 2 represents a sophisticated evolution in LLMs. \"}, hash='e06ffff4f5927a7e2252b2785825ad4b0dafdeb09355258be50a13bc170d7a5b')}, text=\"LLM Variants and Meta's Open Source Before shedding light on four major trends, I'd share the latest Meta's Llama 2 and Code Llama. \", start_char_idx=0, end_char_idx=132, text_template='{metadata_str}\\n\\n{content}', metadata_template='{key}: {value}', metadata_seperator='\\n')"
            ]
          },
          "execution_count": 10,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "nodes[0]"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 11,
      "metadata": {
        "id": "moNbizWrN2Tu"
      },
      "outputs": [],
      "source": [
        "from llama_index.core import VectorStoreIndex\n",
        "\n",
        "# Add the documents to the database and create Index / embeddings\n",
        "index = VectorStoreIndex(nodes, storage_context=storage_context)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 12,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "nz6dQtXzyWqK",
        "outputId": "b636525e-47cc-4f57-cfa3-70b9cb17f7e0"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "  adding: mini-llama-articles/ (stored 0%)\n",
            "  adding: mini-llama-articles/f4ee5232-8d1e-4e11-899e-02eafe4527df/ (stored 0%)\n",
            "  adding: mini-llama-articles/f4ee5232-8d1e-4e11-899e-02eafe4527df/index_metadata.pickle (deflated 38%)\n",
            "  adding: mini-llama-articles/f4ee5232-8d1e-4e11-899e-02eafe4527df/link_lists.bin (deflated 88%)\n",
            "  adding: mini-llama-articles/f4ee5232-8d1e-4e11-899e-02eafe4527df/data_level0.bin (deflated 18%)\n",
            "  adding: mini-llama-articles/f4ee5232-8d1e-4e11-899e-02eafe4527df/length.bin (deflated 43%)\n",
            "  adding: mini-llama-articles/f4ee5232-8d1e-4e11-899e-02eafe4527df/header.bin (deflated 56%)\n",
            "  adding: mini-llama-articles/chroma.sqlite3 (deflated 69%)\n"
          ]
        }
      ],
      "source": [
        "# Compress the vector store directory to a zip file to be able to download and use later.\n",
        "!zip -r vectorstore-windowed.zip mini-llama-articles"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "7qZY6xOYyjIX"
      },
      "source": [
        "# Load Indexes\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "zo9kamyEykI6"
      },
      "source": [
        "If you have already uploaded the zip file for the vector store checkpoint, please uncomment the code in the following cell block to extract its contents. After doing so, you will be able to load the dataset from local storage.\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 13,
      "metadata": {
        "id": "wS-V6NhMymx8"
      },
      "outputs": [],
      "source": [
        "# !unzip vectorstore-windowed.zip"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 14,
      "metadata": {
        "id": "fH2myF120oMi"
      },
      "outputs": [],
      "source": [
        "from llama_index.core.postprocessor import MetadataReplacementPostProcessor\n",
        "from llama_index.llms.gemini import Gemini\n",
        "\n",
        "llm = Gemini(model=\"models/gemini-1.5-flash\", temperature=1, max_tokens=512)\n",
        "\n",
        "query_engine = index.as_query_engine(\n",
        "    llm=llm,\n",
        "    # the target key defaults to `window` to match the node_parser's default\n",
        "    node_postprocessors=[\n",
        "        MetadataReplacementPostProcessor(target_metadata_key=\"window\")\n",
        "    ],\n",
        ")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 15,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "EqNreFmE0vRb",
        "outputId": "bb5204c5-3ab8-460b-9702-5cf2f2b32f73"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "The Llama 2 model is available in four different sizes: 7 billion, 13 billion, 34 billion, and 70 billion parameters.\n"
          ]
        }
      ],
      "source": [
        "response = query_engine.query(\"How many parameters LLaMA2 model has?\")\n",
        "print(response)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 22,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "whdPLhVaMfOS",
        "outputId": "7b7ea07d-d93c-41a0-bd7b-6a9e8d8b18f7"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Source  1\n",
            "Original Text: Llama 2 Model Flavors Llama 2 is available in four different model sizes: 7 billion, 13 billion, 34 billion, and 70 billion parameters. \n",
            "Window: Companies with over 700 million active daily users cannot use Llama 2.  Additionally, its output cannot be used to improve other language models.   II.  Llama 2 Model Flavors Llama 2 is available in four different model sizes: 7 billion, 13 billion, 34 billion, and 70 billion parameters.  While 7B, 13B, and 70B have already been released, the 34B model is still awaited.  The pretrained variant, trained on a whopping 2 trillion tokens, boasts a context window of 4096 tokens, twice the size of its predecessor Llama 1.  Meta also released a Llama 2 fine-tuned model for chat applications that was trained on over 1 million human annotations. \n",
            "----\n",
            "Source  2\n",
            "Original Text: The 70B parameter model outperforms all other open-source models, while the 7B and 34B models outshine Falcon in all categories and MPT in all categories except coding. \n",
            "Window: The 34B parameter model has reported higher safety violations than other variants, possibly contributing to the delay in its release.   IV.  Helpfulness Comparison: Llama 2 Outperforms Competitors Llama 2 emerges as a strong contender in the open-source language model arena, outperforming its competitors in most categories.  The 70B parameter model outperforms all other open-source models, while the 7B and 34B models outshine Falcon in all categories and MPT in all categories except coding.  Despite being smaller, Llam a2's performance rivals that of Chat GPT 3.5, a significantly larger closed-source model.  While GPT 4 and PalM-2-L, with their larger size, outperform Llama 2, this is expected due to their capacity for handling complex language tasks.  Llama 2's impressive ability to compete with larger models highlights its efficiency and potential in the market. \n",
            "----\n"
          ]
        }
      ],
      "source": [
        "for idx, item in enumerate(response.source_nodes):\n",
        "    print(\"Source \", idx + 1)\n",
        "    print(\"Original Text:\", item.node.metadata[\"original_text\"])\n",
        "    print(\"Window:\", item.node.metadata[\"window\"])\n",
        "    print(\"----\")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "dQBrOUYrLA76"
      },
      "outputs": [],
      "source": []
    }
  ],
  "metadata": {
    "colab": {
      "authorship_tag": "ABX9TyMcPZHiexcHnmM/BQzkTZ9Y",
      "include_colab_link": true,
      "provenance": []
    },
    "kernelspec": {
      "display_name": "Python 3",
      "name": "python3"
    },
    "language_info": {
      "name": "python",
      "version": "3.12.4"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}