{ "cells": [ { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "view-in-github" }, "source": [ "\"Open" ] }, { "cell_type": "markdown", "metadata": { "id": "-zE1h0uQV7uT" }, "source": [ "# Install Packages and Setup Variables" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "QPJzr-I9XQ7l", "outputId": "8e808cc4-4c21-474b-c5b7-f6841ee08020" }, "outputs": [], "source": [ "!pip install -q llama-index==0.10.11 openai==1.12.0 llama-index-finetuning llama-index-embeddings-huggingface llama-index-embeddings-cohere llama-index-readers-web cohere==4.47 tiktoken==0.6.0 chromadb==0.4.22 pandas==2.2.0 html2text sentence_transformers pydantic" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "id": "riuXwpSPcvWC" }, "outputs": [], "source": [ "import os\n", "\n", "# Set the \"OPENAI_API_KEY\" and the \"CO_API_KEY\" (Cohere) in the Python environment. Will be used by OpenAI client later.\n", "os.environ[\"OPENAI_API_KEY\"] = \"\"\n", "os.environ[\"CO_API_KEY\"] = \"\"" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "id": "jIEeZzqLbz0J" }, "outputs": [], "source": [ "# Allows running asyncio in environments with an existing event loop, like Jupyter notebooks.\n", "\n", "import nest_asyncio\n", "\n", "nest_asyncio.apply()" ] }, { "cell_type": "markdown", "metadata": { "id": "Bkgi2OrYzF7q" }, "source": [ "# Load a Model" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "id": "9oGT6crooSSj" }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/Users/louis/Documents/GitHub/ai-tutor-rag-system/.conda/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from .autonotebook import tqdm as notebook_tqdm\n" ] } ], "source": [ "from llama_index.llms.openai import OpenAI\n", "\n", "llm = OpenAI(temperature=0.9, model=\"gpt-3.5-turbo-0125\", max_tokens=512)" ] }, { "cell_type": "markdown", "metadata": { "id": "0BwVuJXlzHVL" }, "source": [ "# Create a VectoreStore" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "id": "SQP87lHczHKc" }, "outputs": [], "source": [ "import chromadb\n", "\n", "# create client and a new collection\n", "# chromadb.EphemeralClient saves data in-memory.\n", "chroma_client = chromadb.PersistentClient(path=\"./mini-llama-articles\")\n", "chroma_collection = chroma_client.create_collection(\"mini-llama-articles\")" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "id": "zAaGcYMJzHAN" }, "outputs": [], "source": [ "from llama_index.vector_stores.chroma import ChromaVectorStore\n", "\n", "# Define a storage context object using the created vector database.\n", "vector_store = ChromaVectorStore(chroma_collection=chroma_collection)" ] }, { "cell_type": "markdown", "metadata": { "id": "I9JbAzFcjkpn" }, "source": [ "# Load the Dataset (CSV)" ] }, { "cell_type": "markdown", "metadata": { "id": "ceveDuYdWCYk" }, "source": [ "## Download" ] }, { "cell_type": "markdown", "metadata": { "id": "eZwf6pv7WFmD" }, "source": [ "The dataset includes several articles from the TowardsAI blog, which provide an in-depth explanation of the LLaMA2 model. Read the dataset as a long string." ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "wl_pbPvMlv1h", "outputId": "bc9a0415-a1fb-4e89-a2b4-165420106b34" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " % Total % Received % Xferd Average Speed Time Time Time Current\n", " Dload Upload Total Spent Left Speed\n", "100 169k 100 169k 0 0 856k 0 --:--:-- --:--:-- --:--:-- 860k\n" ] } ], "source": [ "!curl -o ./mini-llama-articles.csv https://raw.githubusercontent.com/AlaFalaki/tutorial_notebooks/main/data/mini-llama-articles.csv" ] }, { "cell_type": "markdown", "metadata": { "id": "VWBLtDbUWJfA" }, "source": [ "## Read File" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "0Q9sxuW0g3Gd", "outputId": "a8361aa6-522d-4def-e49b-ed08d9c8e7d1" }, "outputs": [ { "data": { "text/plain": [ "14" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import csv\n", "\n", "rows = []\n", "\n", "# Load the file as a JSON\n", "with open(\"./mini-llama-articles.csv\", mode=\"r\", encoding=\"utf-8\") as file:\n", " csv_reader = csv.reader(file)\n", "\n", " for idx, row in enumerate( csv_reader ):\n", " if idx == 0: continue; # Skip header row\n", " rows.append( row )\n", "\n", "# The number of characters in the dataset.\n", "len( rows )" ] }, { "cell_type": "markdown", "metadata": { "id": "S17g2RYOjmf2" }, "source": [ "# Convert to Document obj" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "id": "YizvmXPejkJE" }, "outputs": [], "source": [ "from llama_index.core import Document\n", "\n", "# Convert the chunks to Document objects so the LlamaIndex framework can process them.\n", "documents = [Document(text=row[1], metadata={\"title\": row[0], \"url\": row[2], \"source_name\": row[3]}) for row in rows]" ] }, { "cell_type": "markdown", "metadata": { "id": "qjuLbmFuWsyl" }, "source": [ "# Transforming" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "id": "9z3t70DGWsjO" }, "outputs": [], "source": [ "from llama_index.core.text_splitter import TokenTextSplitter\n", "\n", "# Define the splitter object that split the text into segments with 512 tokens,\n", "# with a 128 overlap between the segments.\n", "text_splitter = TokenTextSplitter(\n", " separator=\" \", chunk_size=512, chunk_overlap=128\n", ")" ] }, { "cell_type": "markdown", "metadata": { "id": "y28yMy0GxfGR" }, "source": [ "There are two options to use the Cohere embeddings:\n", "\n", "- input_type=\"search_document\": Employ this option for texts (documents) intended for storage in your vector database.\n", "\n", "- input_type=\"search_query\": Use this when issuing search queries to locate the most related documents within your vector database." ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 385, "referenced_widgets": [ "2b1095050bb847c48855e3b74ae18b19", "a0a1c543115c4764b4150c5d0216370c", "23675bffa00749849ec944f84986ff52", "9e86b288110f4d418fd9761f59f5637f", "d6a4fd2a9cf7431b8bf738d9da0e2a7c", "700a1ffb298c4dd799c44fcee540b74c", "06e7a0370c8c46dd9a47c72a474212d1", "268f6f0800164e0ab7f8f31718f7f9be", "4001b95bd48147fb876b37a644e70dec", "22024efa09cb4330ab68a8c2bdbf92ac", "c14678e2b8c546fc9123c94fa47b924d", "9dda1537424142e0b7f2fdd5f9c1b98d", "1db171d1920d432283f9e1795c4c0c80", "23e0caeaf15546f0b5c62aa263c99e09", "03b8aded009343f288f0945b64d1f41c", "4d922a99035d45c59ce9868a4ef73d68", "aea6b63cbced40619bf32b1a2c350259", "c89c9dd46b454181aadaf82c7296cdae", "bec71553390b44879accb638a5b4873f", "97e4316196e84c7a82a2dd3e4698bc55", "b2ab2dc287a9421ca812074389ee31a7", "fa5c2f509ec54c5695a406160ab0626a" ] }, "id": "P9LDJ7o-Wsc-", "outputId": "cd49bff2-b0da-4722-8baa-6a07f1023b39" }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Parsing nodes: 100%|██████████| 14/14 [00:00<00:00, 30.35it/s]\n", "100%|██████████| 108/108 [01:01<00:00, 1.76it/s]\n", "100%|██████████| 108/108 [01:13<00:00, 1.47it/s]\n", "100%|██████████| 108/108 [00:30<00:00, 3.59it/s]\n", "Generating embeddings: 100%|██████████| 108/108 [00:04<00:00, 26.11it/s]\n" ] } ], "source": [ "from llama_index.core.extractors import (\n", " SummaryExtractor,\n", " QuestionsAnsweredExtractor,\n", " KeywordExtractor,\n", ")\n", "from llama_index.embeddings.cohere import CohereEmbedding\n", "from llama_index.core.ingestion import IngestionPipeline\n", "\n", "# Create the pipeline to apply the transformation on each chunk,\n", "# and store the transformed text in the chroma vector store.\n", "pipeline = IngestionPipeline(\n", " transformations=[\n", " text_splitter,\n", " QuestionsAnsweredExtractor(questions=3, llm=llm),\n", " SummaryExtractor(summaries=[\"prev\", \"self\"], llm=llm),\n", " KeywordExtractor(keywords=10, llm=llm),\n", " CohereEmbedding(model_name=\"embed-english-v3.0\", input_type=\"search_document\"),\n", " ],\n", " vector_store=vector_store\n", ")\n", "\n", "# Run the transformation pipeline.\n", "nodes = pipeline.run(documents=documents, show_progress=True);" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "mPGa85hM2P3P", "outputId": "9d7811ba-1e10-4098-b6eb-77a4e7d37457" }, "outputs": [ { "data": { "text/plain": [ "108" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len( nodes )" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "jjnmscmq2cXK", "outputId": "5f6fa176-4e09-4cc7-bd17-8236b061ad17" }, "outputs": [ { "data": { "text/plain": [ "1024" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len( nodes[0].embedding )" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "hV9G0lSUJJSa", "outputId": "453a4ea3-dfda-4da1-ac29-929834c83b40" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " adding: mini-llama-articles/ (stored 0%)\n", " adding: mini-llama-articles/63fe4276-8624-43c7-8c23-32dbfedb2285/ (stored 0%)\n", " adding: mini-llama-articles/63fe4276-8624-43c7-8c23-32dbfedb2285/data_level0.bin (deflated 100%)\n", " adding: mini-llama-articles/63fe4276-8624-43c7-8c23-32dbfedb2285/length.bin (deflated 25%)\n", " adding: mini-llama-articles/63fe4276-8624-43c7-8c23-32dbfedb2285/link_lists.bin (stored 0%)\n", " adding: mini-llama-articles/63fe4276-8624-43c7-8c23-32dbfedb2285/header.bin (deflated 61%)\n", " adding: mini-llama-articles/chroma.sqlite3 (deflated 70%)\n" ] } ], "source": [ "# Compress the vector store directory to a zip file to be able to download and use later.\n", "!zip -r vectorstore_cohere.zip mini-llama-articles" ] }, { "cell_type": "markdown", "metadata": { "id": "OWaT6rL7ksp8" }, "source": [ "# Load Indexes" ] }, { "cell_type": "markdown", "metadata": { "id": "B4w8xP2Ggrvf" }, "source": [ "If you have already uploaded the zip file for the vector store checkpoint, please uncomment the code in the following cell block to extract its contents. After doing so, you will be able to load the dataset from local storage." ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "id": "EF-wobGAJRgL" }, "outputs": [], "source": [ "# !unzip vectorstore_cohere.zip" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "id": "mXi56KTXk2sp" }, "outputs": [], "source": [ "# Load the vector store from the local storage.\n", "db = chromadb.PersistentClient(path=\"./mini-llama-articles\")\n", "chroma_collection = db.get_or_create_collection(\"mini-llama-articles\")\n", "vector_store = ChromaVectorStore(chroma_collection=chroma_collection)" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "id": "9l0PaY230syE" }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/var/folders/l7/9qcp7g5x5rl9x8ltw0t85qym0000gn/T/ipykernel_74455/3981499771.py:11: DeprecationWarning: Call to deprecated class method from_defaults. (ServiceContext is deprecated, please use `llama_index.settings.Settings` instead.) -- Deprecated since version 0.10.0.\n", " service_context = ServiceContext.from_defaults(\n" ] } ], "source": [ "from llama_index.core import ServiceContext\n", "\n", "# Define the Cohere Embedding Model\n", "embed_model = CohereEmbedding(\n", " model_name=\"embed-english-v3.0\",\n", " input_type=\"search_query\",\n", ")\n", "\n", "# Define the ServiceCotext object to tie the LLM for generating final answer,\n", "# and the embedding model to help with retrieving related nodes.\n", "service_context = ServiceContext.from_defaults(\n", " llm=llm, embed_model=embed_model\n", ")" ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "id": "jKXURvLtkuTS" }, "outputs": [], "source": [ "from llama_index.core import VectorStoreIndex\n", "\n", "# Create the index based on the vector store.\n", "index = VectorStoreIndex.from_vector_store(vector_store, service_context=service_context)" ] }, { "cell_type": "markdown", "metadata": { "id": "8JPD8yAinVSq" }, "source": [ "# Query Dataset" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "id": "b0gue7cyctt1" }, "outputs": [], "source": [ "# Define a query engine that is responsible for retrieving related pieces of text,\n", "# and using a LLM to formulate the final answer.\n", "query_engine = index.as_query_engine()\n", "\n", "res = query_engine.query(\"How many parameters LLaMA2 model has?\")" ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 53 }, "id": "VKK3jMprctre", "outputId": "cb85d598-d1bc-49e9-818f-c7bbde465864" }, "outputs": [ { "data": { "text/plain": [ "'LLaMA2 model has a total of 2 trillion parameters.'" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "res.response" ] }, { "cell_type": "code", "execution_count": 24, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "465dH4yQc7Ct", "outputId": "3d2b3ce2-7705-41bb-80e3-4fe6b390dcef" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Node ID\t 0a3368de-02cc-4cb2-8579-3379e9c68101\n", "Title\t Fine-Tuning a Llama-2 7B Model for Python Code Generation\n", "Text\t New Llama-2 model In mid-July, Meta released its new family of pre-trained and finetuned models called Llama-2, with an open source and commercial character to facilitate its use and expansion. The base model was released with a chat version and sizes 7B, 13B, and 70B. Together with the models, the corresponding papers were published describing their characteristics and relevant points of the learning process, which provide very interesting information on the subject. For pre-training, 40% more tokens were used, reaching 2T, the context length was doubled and the grouped-query attention (GQA) technique was applied to speed up inference on the heavier 70B model. On the standard transformer architecture, RMSNorm normalization, SwiGLU activation, and rotatory positional embedding are used, the context length reaches 4096 tokens, and an Adam optimizer is applied with a cosine learning rate schedule, a weight decay of 0.1 and gradient clipping. The dataset for tuning For our tuning process, we will take a dataset containing about 18,000 examples where the model is asked to build a Python code that solves a given task. This is an extraction of the original dataset [2], where only the Python language examples are selected. Each row contains the description of the task to be solved, an example of data input to the task if applicable, and the generated code fragment that solves the task is provided [3]. Creating the prompt To carry out an instruction fine-tuning, we must transform each one of our data examples as if it were an instruction, outlining its main sections as follows: Output: Fine-tuning the model To carry out this stage, we have used the Google Colab environment, where we have developed a notebook that allows us to run the training in an interactive way and also a Python script to run the training in unattended mode. For the first test runs, a T4 instance with a high RAM capacity is enough, but when it comes to running the whole dataset and epochs, we have opted to use an A100 instance in order to speed up the training and ensure that its execution time is reasonable. In order to be able to\n", "Score\t 0.4173821910560196\n", "-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_\n", "Node ID\t b2b33887-2da1-4838-903e-8e126224095d\n", "Title\t Fine-Tuning a Llama-2 7B Model for Python Code Generation\n", "Text\t if it were an instruction, outlining its main sections as follows: Output: Fine-tuning the model To carry out this stage, we have used the Google Colab environment, where we have developed a notebook that allows us to run the training in an interactive way and also a Python script to run the training in unattended mode. For the first test runs, a T4 instance with a high RAM capacity is enough, but when it comes to running the whole dataset and epochs, we have opted to use an A100 instance in order to speed up the training and ensure that its execution time is reasonable. In order to be able to share the model, we will log in to the Huggingface hub using the appropriate token, so that at the end of the whole process, we will upload the model files so that they can be shared with the rest of the users. Fine-tuning techniques: PEFT, Lora, and QLora In recent months, some papers have appeared showing how PEFT techniques can be used to train large language models with a drastic reduction of RAM requirements and consequently allowing fine-tuning of these models on a single GPU of reasonable size. The usual steps to train an LLM consist, first, an intensive pre-training on billions or trillions of tokens to obtain a foundation model, and then a fine-tuning is performed on this model to specialize it on a downstream task. In this fine-tuning phase is where the PEFT technique has its purpose. Parameter Efficient Fine-Tuning (PEFT) allows us to considerably reduce RAM and storage requirements by only fine-tuning a small number of additional parameters, with virtually all model parameters remaining frozen. PEFT has been found to produce good generalization with relatively low-volume datasets. Furthermore, it enhances the reusability and portability of the model, as the small checkpoints obtained can be easily added to the base model, and the base model can be easily fine-tuned and reused in multiple scenarios by adding the PEFT parameters. Finally, since the base model is not adjusted, all the knowledge acquired in the pre-training phase is preserved, thus avoiding catastrophic forgetting. Most widely used PEFT techniques aim to keep the pre-trained base model untouched\n", "Score\t 0.4013547787636657\n", "-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_\n" ] } ], "source": [ "# Show the retrieved nodes\n", "for src in res.source_nodes:\n", " print(\"Node ID\\t\", src.node_id)\n", " print(\"Title\\t\", src.metadata['title'])\n", " print(\"Text\\t\", src.text)\n", " print(\"Score\\t\", src.score)\n", " print(\"-_\"*20)" ] }, { "cell_type": "markdown", "metadata": { "id": "iMkpzH7vvb09" }, "source": [ "# Evaluate" ] }, { "cell_type": "code", "execution_count": 26, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "H8a3eKgKvckU", "outputId": "85b0765e-5a42-4f60-ccff-fc4bc688f65a" }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 108/108 [06:43<00:00, 3.74s/it]\n" ] } ], "source": [ "from llama_index.core.evaluation import generate_question_context_pairs\n", "from llama_index.llms.openai import OpenAI\n", "\n", "# Create questions for each segment. These questions will be used to\n", "# assess whether the retriever can accurately identify and return the\n", "# corresponding segment when queried.\n", "llm = OpenAI(model=\"gpt-3.5-turbo-0125\")\n", "rag_eval_dataset = generate_question_context_pairs(\n", " nodes,\n", " llm=llm,\n", " num_questions_per_chunk=1\n", ")\n", "\n", "# We can save the evaluation dataset as a json file for later use.\n", "rag_eval_dataset.save_json(\"./rag_eval_dataset_cohere.json\")" ] }, { "cell_type": "markdown", "metadata": { "id": "998nNEGYhKhu" }, "source": [ "If you have uploaded the generated question JSON file, please uncomment the code in the next cell block. This will avoid the need to generate the questions manually, saving you time and effort." ] }, { "cell_type": "code", "execution_count": 27, "metadata": { "id": "3sA1K84U254o" }, "outputs": [], "source": [ "# from llama_index.finetuning.embeddings.common import (\n", "# EmbeddingQAFinetuneDataset,\n", "# )\n", "# rag_eval_dataset = EmbeddingQAFinetuneDataset.from_json(\n", "# \"./rag_eval_dataset_cohere.json\"\n", "# )" ] }, { "cell_type": "code", "execution_count": 28, "metadata": { "id": "H7ubvcbk27vr" }, "outputs": [], "source": [ "import pandas as pd\n", "\n", "# A simple function to show the evaluation result.\n", "def display_results_retriever(name, eval_results):\n", " \"\"\"Display results from evaluate.\"\"\"\n", "\n", " metric_dicts = []\n", " for eval_result in eval_results:\n", " metric_dict = eval_result.metric_vals_dict\n", " metric_dicts.append(metric_dict)\n", "\n", " full_df = pd.DataFrame(metric_dicts)\n", "\n", " hit_rate = full_df[\"hit_rate\"].mean()\n", " mrr = full_df[\"mrr\"].mean()\n", "\n", " metric_df = pd.DataFrame(\n", " {\"Retriever Name\": [name], \"Hit Rate\": [hit_rate], \"MRR\": [mrr]}\n", " )\n", "\n", " return metric_df" ] }, { "cell_type": "code", "execution_count": 29, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "uNLxDxoc2-Ac", "outputId": "8a2df94d-99b5-4aa4-a31e-b6c94256d1bb" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " Retriever Name Hit Rate MRR\n", "0 Retriever top_2 0.677355 0.562124\n", " Retriever Name Hit Rate MRR\n", "0 Retriever top_4 0.815631 0.606045\n", " Retriever Name Hit Rate MRR\n", "0 Retriever top_6 0.865731 0.615331\n", " Retriever Name Hit Rate MRR\n", "0 Retriever top_8 0.887776 0.618301\n", " Retriever Name Hit Rate MRR\n", "0 Retriever top_10 0.8998 0.619592\n" ] } ], "source": [ "from llama_index.core.evaluation import RetrieverEvaluator\n", "\n", "# We can evaluate the retievers with different top_k values.\n", "for i in [2, 4, 6, 8, 10]:\n", " retriever = index.as_retriever(similarity_top_k=i)\n", " retriever_evaluator = RetrieverEvaluator.from_metric_names(\n", " [\"mrr\", \"hit_rate\"], retriever=retriever\n", " )\n", " eval_results = await retriever_evaluator.aevaluate_dataset(rag_eval_dataset)\n", " print(display_results_retriever(f\"Retriever top_{i}\", eval_results))" ] }, { "cell_type": "code", "execution_count": 30, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "3ukkWC9R2_0J", "outputId": "d177c25d-a163-4b71-97f4-2af468737bbb" }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/var/folders/l7/9qcp7g5x5rl9x8ltw0t85qym0000gn/T/ipykernel_74455/1546854213.py:11: DeprecationWarning: Call to deprecated class method from_defaults. (ServiceContext is deprecated, please use `llama_index.settings.Settings` instead.) -- Deprecated since version 0.10.0.\n", " service_context_gpt4 = ServiceContext.from_defaults(llm=llm_gpt4)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "top_2 faithfulness_score: 1.0\n", "top_2 relevancy_score: 1.0\n", "-_-_-_-_-_-_-_-_-_-_\n", "top_4 faithfulness_score: 1.0\n", "top_4 relevancy_score: 1.0\n", "-_-_-_-_-_-_-_-_-_-_\n", "top_6 faithfulness_score: 1.0\n", "top_6 relevancy_score: 1.0\n", "-_-_-_-_-_-_-_-_-_-_\n", "top_8 faithfulness_score: 0.45\n", "top_8 relevancy_score: 0.45\n", "-_-_-_-_-_-_-_-_-_-_\n", "top_10 faithfulness_score: 0.65\n", "top_10 relevancy_score: 0.65\n", "-_-_-_-_-_-_-_-_-_-_\n" ] } ], "source": [ "from llama_index.core.evaluation import RelevancyEvaluator, FaithfulnessEvaluator, BatchEvalRunner\n", "from llama_index.core import ServiceContext\n", "from llama_index.llms.openai import OpenAI\n", "\n", "for i in [2, 4, 6, 8, 10]:\n", " # Set Faithfulness and Relevancy evaluators\n", " query_engine = index.as_query_engine(similarity_top_k=i)\n", "\n", " # While we use GPT3.5-Turbo to answer questions, we can use GPT4 to evaluate the answers.\n", " llm_gpt4 = OpenAI(temperature=0, model=\"gpt-4-0125-preview\")\n", " service_context_gpt4 = ServiceContext.from_defaults(llm=llm_gpt4)\n", "\n", " faithfulness_evaluator = FaithfulnessEvaluator(service_context=service_context_gpt4)\n", " relevancy_evaluator = RelevancyEvaluator(service_context=service_context_gpt4)\n", "\n", " # Run evaluation\n", " queries = list(rag_eval_dataset.queries.values())\n", " batch_eval_queries = queries[:20]\n", "\n", " runner = BatchEvalRunner(\n", " {\"faithfulness\": faithfulness_evaluator, \"relevancy\": relevancy_evaluator},\n", " workers=8,\n", " )\n", " eval_results = await runner.aevaluate_queries(\n", " query_engine, queries=batch_eval_queries\n", " )\n", " faithfulness_score = sum(result.passing for result in eval_results['faithfulness']) / len(eval_results['faithfulness'])\n", " print(f\"top_{i} faithfulness_score: {faithfulness_score}\")\n", "\n", " relevancy_score = sum(result.passing for result in eval_results['faithfulness']) / len(eval_results['relevancy'])\n", " print(f\"top_{i} relevancy_score: {relevancy_score}\")\n", " print(\"-_\"*10)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "1MB1YD1E3EKM" }, "outputs": [], "source": [] } ], "metadata": { "colab": { "authorship_tag": "ABX9TyMx3DkzJEgLiO/6oTdKzS6v", "include_colab_link": true, "provenance": [] }, "kernelspec": { "display_name": "Python 3", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.8" }, "widgets": { "application/vnd.jupyter.widget-state+json": { "03b8aded009343f288f0945b64d1f41c": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_b2ab2dc287a9421ca812074389ee31a7", "placeholder": "​", "style": "IPY_MODEL_fa5c2f509ec54c5695a406160ab0626a", "value": " 108/108 [00:03<00:00, 30.08it/s]" } }, "06e7a0370c8c46dd9a47c72a474212d1": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "1db171d1920d432283f9e1795c4c0c80": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_aea6b63cbced40619bf32b1a2c350259", "placeholder": "​", "style": "IPY_MODEL_c89c9dd46b454181aadaf82c7296cdae", "value": "Generating embeddings: 100%" } }, "22024efa09cb4330ab68a8c2bdbf92ac": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "23675bffa00749849ec944f84986ff52": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_268f6f0800164e0ab7f8f31718f7f9be", "max": 14, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_4001b95bd48147fb876b37a644e70dec", "value": 14 } }, "23e0caeaf15546f0b5c62aa263c99e09": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_bec71553390b44879accb638a5b4873f", "max": 108, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_97e4316196e84c7a82a2dd3e4698bc55", "value": 108 } }, "268f6f0800164e0ab7f8f31718f7f9be": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "2b1095050bb847c48855e3b74ae18b19": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_a0a1c543115c4764b4150c5d0216370c", "IPY_MODEL_23675bffa00749849ec944f84986ff52", "IPY_MODEL_9e86b288110f4d418fd9761f59f5637f" ], "layout": "IPY_MODEL_d6a4fd2a9cf7431b8bf738d9da0e2a7c" } }, "4001b95bd48147fb876b37a644e70dec": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "4d922a99035d45c59ce9868a4ef73d68": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "700a1ffb298c4dd799c44fcee540b74c": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "97e4316196e84c7a82a2dd3e4698bc55": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "9dda1537424142e0b7f2fdd5f9c1b98d": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_1db171d1920d432283f9e1795c4c0c80", "IPY_MODEL_23e0caeaf15546f0b5c62aa263c99e09", "IPY_MODEL_03b8aded009343f288f0945b64d1f41c" ], "layout": "IPY_MODEL_4d922a99035d45c59ce9868a4ef73d68" } }, "9e86b288110f4d418fd9761f59f5637f": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_22024efa09cb4330ab68a8c2bdbf92ac", "placeholder": "​", "style": "IPY_MODEL_c14678e2b8c546fc9123c94fa47b924d", "value": " 14/14 [00:00<00:00, 13.27it/s]" } }, "a0a1c543115c4764b4150c5d0216370c": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_700a1ffb298c4dd799c44fcee540b74c", "placeholder": "​", "style": "IPY_MODEL_06e7a0370c8c46dd9a47c72a474212d1", "value": "Parsing nodes: 100%" } }, "aea6b63cbced40619bf32b1a2c350259": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "b2ab2dc287a9421ca812074389ee31a7": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "bec71553390b44879accb638a5b4873f": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "c14678e2b8c546fc9123c94fa47b924d": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "c89c9dd46b454181aadaf82c7296cdae": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "d6a4fd2a9cf7431b8bf738d9da0e2a7c": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "fa5c2f509ec54c5695a406160ab0626a": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } } } } }, "nbformat": 4, "nbformat_minor": 0 }