{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text",
"id": "view-in-github"
},
"source": [
"\n"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "-zE1h0uQV7uT"
},
"source": [
"# Install Packages and Setup Variables\n"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "QPJzr-I9XQ7l",
"outputId": "5d48c88b-a0a9-49ff-d788-e076d1cb4ead"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.1.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.2\u001b[0m\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n"
]
}
],
"source": [
"!pip install -q llama-index==0.10.57 openai==1.37.0 cohere==5.6.2 tiktoken==0.7.0 chromadb==0.5.5 html2text sentence_transformers pydantic llama-index-vector-stores-chroma==0.1.10 kaleido==0.2.1 llama-index-llms-gemini==0.1.11"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"id": "riuXwpSPcvWC"
},
"outputs": [],
"source": [
"import os\n",
"\n",
"# Set the following API Keys in the Python environment. Will be used later.\n",
"os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
"os.environ[\"GOOGLE_API_KEY\"] = \"\""
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"id": "jIEeZzqLbz0J"
},
"outputs": [],
"source": [
"# Allows running asyncio in environments with an existing event loop, like Jupyter notebooks.\n",
"import nest_asyncio\n",
"\n",
"nest_asyncio.apply()"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "Bkgi2OrYzF7q"
},
"source": [
"# Load a Model\n"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"id": "9oGT6crooSSj"
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/fabio/Desktop/ai-tutor-rag-system/venv_ai_tutor/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
" from .autonotebook import tqdm as notebook_tqdm\n",
"WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
"I0000 00:00:1723471002.830383 5318658 config.cc:230] gRPC experiments enabled: call_status_override_on_cancellation, event_engine_dns, event_engine_listener, http2_stats_fix, monitoring_experiment, pick_first_new, trace_record_callops, work_serializer_clears_time_cache\n",
"I0000 00:00:1723471002.837404 5318658 check_gcp_environment_no_op.cc:29] ALTS: Platforms other than Linux and Windows are not supported\n"
]
}
],
"source": [
"from llama_index.llms.gemini import Gemini\n",
"\n",
"llm = Gemini(model=\"models/gemini-1.5-flash\", temperature=1, max_tokens=512)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "0BwVuJXlzHVL"
},
"source": [
"# Create a VectoreStore\n"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"id": "SQP87lHczHKc"
},
"outputs": [],
"source": [
"import chromadb\n",
"from chromadb.utils.embedding_functions import OpenAIEmbeddingFunction\n",
"from llama_index.vector_stores.chroma import ChromaVectorStore\n",
"\n",
"# create client and a new collection\n",
"# chromadb.EphemeralClient saves data in-memory.\n",
"chroma_client = chromadb.PersistentClient(path=\"./mini-llama-articles\")\n",
"chroma_collection = chroma_client.get_or_create_collection(\n",
" \"mini-llama-articles\",\n",
" embedding_function=OpenAIEmbeddingFunction(api_key=os.environ[\"OPENAI_API_KEY\"], model_name=\"text-embedding-3-small\")\n",
")\n",
"vector_store = ChromaVectorStore(chroma_collection=chroma_collection)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "I9JbAzFcjkpn"
},
"source": [
"# Load the Dataset (CSV)\n"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "ceveDuYdWCYk"
},
"source": [
"## Download\n"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "eZwf6pv7WFmD"
},
"source": [
"The dataset includes several articles from the TowardsAI blog, which provide an in-depth explanation of the LLaMA2 model. Read the dataset as a long string.\n"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "wl_pbPvMlv1h",
"outputId": "a453b612-20a8-4396-d22b-b19d2bc47816"
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"I0000 00:00:1723471003.927906 5318658 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
" % Total % Received % Xferd Average Speed Time Time Time Current\n",
" Dload Upload Total Spent Left Speed\n",
"100 169k 100 169k 0 0 506k 0 --:--:-- --:--:-- --:--:-- 506k\n"
]
}
],
"source": [
"!curl -o ./mini-llama-articles.csv https://raw.githubusercontent.com/AlaFalaki/tutorial_notebooks/main/data/mini-llama-articles.csv"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "VWBLtDbUWJfA"
},
"source": [
"## Read File\n"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "0Q9sxuW0g3Gd",
"outputId": "49b27d8a-1f96-4e8d-fa0f-27afbf2c395c"
},
"outputs": [
{
"data": {
"text/plain": [
"14"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import csv\n",
"\n",
"rows = []\n",
"\n",
"# Load the file as a JSON\n",
"with open(\"./mini-llama-articles.csv\", mode=\"r\", encoding=\"utf-8\") as file:\n",
" csv_reader = csv.reader(file)\n",
"\n",
" for idx, row in enumerate(csv_reader):\n",
" if idx == 0:\n",
" continue\n",
" # Skip header row\n",
" rows.append(row)\n",
"\n",
"# The number of characters in the dataset.\n",
"len(rows)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "S17g2RYOjmf2"
},
"source": [
"# Convert to Document obj\n"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"id": "YizvmXPejkJE"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Doc ID: 8908a7bc-6918-4725-9859-6e6a7788f865\n",
"Text: LLM Variants and Meta's Open Source Before shedding light on\n",
"four major trends, I'd share the latest Meta's Llama 2 and Code Llama.\n",
"Meta's Llama 2 represents a sophisticated evolution in LLMs. This\n",
"suite spans models pretrained and fine-tuned across a parameter\n",
"spectrum of 7 billion to 70 billion. A specialized derivative, Llama\n",
"2-Chat, has been...\n"
]
}
],
"source": [
"from llama_index.core import Document\n",
"\n",
"# Convert the chunks to Document objects so the LlamaIndex framework can process them.\n",
"documents = [\n",
" Document(\n",
" text=row[1], metadata={\"title\": row[0], \"url\": row[2], \"source_name\": row[3]}\n",
" )\n",
" for row in rows\n",
"]\n",
"print(documents[0])"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "qjuLbmFuWsyl"
},
"source": [
"# Transforming\n"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"id": "9z3t70DGWsjO"
},
"outputs": [],
"source": [
"from llama_index.core.text_splitter import TokenTextSplitter\n",
"\n",
"text_splitter = TokenTextSplitter(separator=\" \", chunk_size=512, chunk_overlap=128)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 331,
"referenced_widgets": [
"3fbabd8a8660461ba5e7bc08ef39139a",
"df2365556ae242a2ab1a119f9a31a561",
"5f4b9d32df8f446e858e4c289dc282f9",
"5b588f83a15d42d9aca888e06bbd95ff",
"ad073bca655540809e39f26538d2ec0d",
"13b9c5395bca4c3ba21265240cb936cf",
"47a4586384274577a726c57605e7f8d9",
"96a3bdece738481db57e811ccb74a974",
"5c7973afd79349ed997a69120d0629b2",
"af9b6ae927dd4764b9692507791bc67e",
"134210510d49476e959dd7d032bbdbdc",
"5f9bb065c2b74d2e8ded32e1306a7807",
"73a06bc546a64f7f99a9e4a135319dcd",
"ce48deaf4d8c49cdae92bfdbb3a78df0",
"4a172e8c6aa44e41a42fc1d9cf714fd0",
"0245f2604e4d49c8bd0210302746c47b",
"e956dfab55084a9cbe33c8e331b511e7",
"cb394578badd43a89850873ad2526542",
"193aef33d9184055bb9223f56d456de6",
"abfc9aa911ce4a5ea81c7c451f08295f",
"e7937a1bc68441a080374911a6563376",
"e532ed7bfef34f67b5fcacd9534eb789"
]
},
"id": "P9LDJ7o-Wsc-",
"outputId": "01070c1f-dffa-4ab7-ad71-b07b76b12e03"
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"I0000 00:00:1723471005.241134 5318658 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork\n",
"Parsing nodes: 100%|██████████| 14/14 [00:00<00:00, 51.60it/s]\n",
" 0%| | 0/108 [00:00, ?it/s]I0000 00:00:1723471005.538637 5318658 check_gcp_environment_no_op.cc:29] ALTS: Platforms other than Linux and Windows are not supported\n",
"100%|██████████| 108/108 [04:51<00:00, 2.70s/it] \n",
"100%|██████████| 108/108 [05:05<00:00, 2.83s/it] \n",
"100%|██████████| 108/108 [03:39<00:00, 2.04s/it] \n",
"Generating embeddings: 0%| | 0/108 [00:00, ?it/s]I0000 00:00:1723471822.110812 5318658 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork\n",
"Generating embeddings: 100%|██████████| 108/108 [00:03<00:00, 31.65it/s]\n"
]
}
],
"source": [
"from llama_index.core.extractors import (\n",
" SummaryExtractor,\n",
" QuestionsAnsweredExtractor,\n",
" KeywordExtractor,\n",
")\n",
"from llama_index.embeddings.openai import OpenAIEmbedding\n",
"from llama_index.core.ingestion import IngestionPipeline\n",
"\n",
"pipeline = IngestionPipeline(\n",
" transformations=[\n",
" text_splitter,\n",
" QuestionsAnsweredExtractor(questions=3, llm=llm),\n",
" SummaryExtractor(summaries=[\"prev\", \"self\"], llm=llm),\n",
" KeywordExtractor(keywords=10, llm=llm),\n",
" OpenAIEmbedding(model=\"text-embedding-3-small\", mode=\"text_search\"),\n",
" ],\n",
" vector_store=vector_store,\n",
")\n",
"\n",
"nodes = pipeline.run(documents=documents, show_progress=True)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "mPGa85hM2P3P",
"outputId": "c106c463-2459-4b11-bbae-5bd5e2246011"
},
"outputs": [
{
"data": {
"text/plain": [
"108"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(nodes)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"id": "23x20bL3_jRb"
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"I0000 00:00:1723471826.032425 5318658 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"updating: mini-llama-articles/ (stored 0%)\n",
"updating: mini-llama-articles/chroma.sqlite3 (deflated 66%)\n",
"updating: mini-llama-articles/6fc7339a-e4bb-4707-8db9-a8a5d4e2b37c/ (stored 0%)\n",
"updating: mini-llama-articles/6fc7339a-e4bb-4707-8db9-a8a5d4e2b37c/data_level0.bin (deflated 100%)\n",
"updating: mini-llama-articles/6fc7339a-e4bb-4707-8db9-a8a5d4e2b37c/length.bin (deflated 99%)\n",
"updating: mini-llama-articles/6fc7339a-e4bb-4707-8db9-a8a5d4e2b37c/link_lists.bin (stored 0%)\n",
"updating: mini-llama-articles/6fc7339a-e4bb-4707-8db9-a8a5d4e2b37c/header.bin (deflated 61%)\n"
]
}
],
"source": [
"!zip -r vectorstore.zip mini-llama-articles"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "OWaT6rL7ksp8"
},
"source": [
"# Load Indexes\n"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "SodY2Xpf_kxg",
"outputId": "9f8b7153-ea58-4824-8363-c47e922612a8"
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"I0000 00:00:1723471826.688310 5318658 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Archive: vectorstore.zip\n",
" inflating: mini-llama-articles/chroma.sqlite3 \n",
" inflating: mini-llama-articles/6fc7339a-e4bb-4707-8db9-a8a5d4e2b37c/data_level0.bin \n",
" inflating: mini-llama-articles/6fc7339a-e4bb-4707-8db9-a8a5d4e2b37c/length.bin \n",
" extracting: mini-llama-articles/6fc7339a-e4bb-4707-8db9-a8a5d4e2b37c/link_lists.bin \n",
" inflating: mini-llama-articles/6fc7339a-e4bb-4707-8db9-a8a5d4e2b37c/header.bin \n"
]
}
],
"source": [
"!unzip -o vectorstore.zip"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"id": "mXi56KTXk2sp"
},
"outputs": [],
"source": [
"import chromadb\n",
"from llama_index.vector_stores.chroma import ChromaVectorStore\n",
"\n",
"# Create your index\n",
"db = chromadb.PersistentClient(path=\"./mini-llama-articles\")\n",
"chroma_collection = db.get_or_create_collection(\"mini-llama-articles\")\n",
"vector_store = ChromaVectorStore(chroma_collection=chroma_collection)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"id": "jKXURvLtkuTS"
},
"outputs": [],
"source": [
"# Create your index\n",
"from llama_index.core import VectorStoreIndex\n",
"\n",
"vector_index = VectorStoreIndex.from_vector_store(vector_store)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"from llama_index.embeddings.openai import OpenAIEmbedding\n",
"\n",
"llama_query_engine = vector_index.as_query_engine(\n",
" llm=llm,\n",
" similarity_top_k=3,\n",
" embed_model=OpenAIEmbedding(model=\"text-embedding-3-small\", mode=\"text_search\"),\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Llama is a family of large language models developed by Meta. \n",
"\n"
]
}
],
"source": [
"res = llama_query_engine.query(\"What is the LLama model?\")\n",
"print(res.response)"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'Llama is a family of large language models developed by Meta. \\n'"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"res.response"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Node ID\t 18dcfeee-ebbc-476f-a4d9-042b26c38aa2\n",
"Title\t Beyond GPT-4: What's New?\n",
"Text\t LLM Variants and Meta's Open Source Before shedding light on four major trends, I'd share the latest Meta's Llama 2 and Code Llama. Meta's Llama 2 represents a sophisticated evolution in LLMs. This suite spans models pretrained and fine-tuned across a parameter spectrum of 7 billion to 70 billion. A specialized derivative, Llama 2-Chat, has been engineered explicitly for dialogue-centric applications. Benchmarking revealed Llama 2's superior performance over most extant open-source chat models. Human-centric evaluations, focusing on safety and utility metrics, positioned Llama 2-Chat as a potential contender against proprietary, closed-source counterparts. The development trajectory of Llama 2 emphasized rigorous fine-tuning methodologies. Meta's transparent delineation of these processes aims to catalyze community-driven advancements in LLMs, underscoring a commitment to collaborative and responsible AI development. Code Llama is built on top of Llama 2 and is available in three models: Code Llama, the foundational code model;Codel Llama - Python specialized for Python;and Code Llama - Instruct, which is fine-tuned for understanding natural language instructions. Based on its benchmark testing, Code Llama outperformed state-of-the-art publicly available LLMs (except GPT-4) on code tasks. Llama 2, Llama 2-Chat, and Code Llama are key steps in LLM development but still have a way to go compared to GPT-4. Meta's open access and commitment to improving these models promise transparent and faster LLM progress in the future. Please refer to the LLM and Llama variants below: From LLMs to Multimodal LLMs, like OpenAI's ChatGPT (GPT-3.5), primarily focus on understanding and generating human language. They've been instrumental in tasks like text generation, translation, and even creative writing. However, their scope is limited to text. Enter multimodal models like GPT-4. These are a new breed of AI models that can understand and generate not just text, but also images, sounds, and potentially other types of data. The term \"multimodal\" refers to their ability to process multiple modes or\n",
"Score\t 0.3794056870856778\n",
"Metadata\t {'title': \"Beyond GPT-4: What's New?\", 'url': 'https://pub.towardsai.net/beyond-gpt-4-whats-new-cbd61a448eb9#dda8', 'source_name': 'towards_ai', 'questions_this_excerpt_can_answer': \"Here are three questions this context can answer specifically, unlikely to be found elsewhere:\\n\\n1. **What are the key differences in training methodologies and fine-tuning processes used for Llama 2 compared to previous versions, and how do these advancements contribute to its improved performance?** This question dives deeper into the specific technical details of Llama 2's development, which are highlighted in the text but not readily available elsewhere.\\n2. **What are the specific safety and utility metrics used to evaluate Llama 2-Chat against proprietary chat models, and how do these metrics differ from those used for evaluating other LLMs?** This question focuses on the unique aspects of evaluating chat models, a specific niche within LLM development, and delves into the methods used for comparing Llama 2-Chat against commercial counterparts.\\n3. **How does Code Llama's specialization in Python differ from its foundational model, and how does this specialization impact its performance on code tasks compared to other LLMs?** This question probes the specific advantages and limitations of Code Llama's Python specialization, offering insight into the practical implications of fine-tuning for specific programming languages.\", 'section_summary': \"Summary: \\n\\nThis section focuses on the advancements in large language models (LLMs) beyond GPT-4, particularly highlighting Meta's open-source contributions: Llama 2 and Code Llama. \\n\\n**Key Topics:**\\n\\n* **Llama 2:** A suite of LLMs spanning 7 billion to 70 billion parameters, trained and fine-tuned for improved performance. \\n * **Llama 2-Chat:** A specialized variant optimized for dialogue-centric applications. \\n* **Code Llama:** A code-focused LLM built upon Llama 2, available in three versions:\\n * **Code Llama (foundational)**\\n * **Code Llama - Python (specialized for Python)**\\n * **Code Llama - Instruct (fine-tuned for natural language instructions)**\\n* **Open-source AI:** Meta's commitment to open-sourcing these models fosters community-driven innovation and faster LLM development.\\n* **Multimodal LLMs:** The introduction of models like GPT-4 that can handle multiple data types (text, images, sound) expands the capabilities of LLMs.\\n\\n**Key Entities:**\\n\\n* **Meta:** The company behind Llama 2 and Code Llama.\\n* **GPT-4:** OpenAI's proprietary, state-of-the-art multimodal LLM.\\n* **Llama 2, Llama 2-Chat, Code Llama:** The specific LLM variants discussed in the excerpt.\", 'excerpt_keywords': 'Keywords: LLMs, Llama 2, Code Llama, GPT-4, Meta, Open Source, Fine-tuning, Multimodal, Dialogue, Python'}\n",
"-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_\n",
"Node ID\t 72d99052-2337-4e2a-8b03-a39e1d01a6ef\n",
"Title\t Meta's Llama 2: Revolutionizing Open Source Language Models for Commercial Use\n",
"Text\t The model demonstrates exceptionally low AI safety violation percentages, surpassing even ChatGPT in safety benchmarks. Finding the right balance between helpfulness and safety when optimizing a model poses significant challenges. While a highly helpful model may be capable of answering any question, including sensitive ones like \"How do I build a bomb?\", it also raises concerns about potential misuse. Thus, striking the perfect equilibrium between providing useful information and ensuring safety is paramount. However, prioritizing safety to an extreme extent can lead to a model that struggles to effectively address a diverse range of questions. This limitation could hinder the model's practical applicability and user experience. Thus, achieving an optimum balance that allows the model to be both helpful and safe is of utmost importance. To strike the right balance between helpfulness and safety, Meta employed two reward models - one for helpfulness and another for safety - to optimize the model's responses. The 34B parameter model has reported higher safety violations than other variants, possibly contributing to the delay in its release. IV. Helpfulness Comparison: Llama 2 Outperforms Competitors Llama 2 emerges as a strong contender in the open-source language model arena, outperforming its competitors in most categories. The 70B parameter model outperforms all other open-source models, while the 7B and 34B models outshine Falcon in all categories and MPT in all categories except coding. Despite being smaller, Llam a2's performance rivals that of Chat GPT 3.5, a significantly larger closed-source model. While GPT 4 and PalM-2-L, with their larger size, outperform Llama 2, this is expected due to their capacity for handling complex language tasks. Llama 2's impressive ability to compete with larger models highlights its efficiency and potential in the market. However, Llama 2 does face challenges in coding and math problems, where models like Chat GPT 4 excel, given their significantly larger size. Chat GPT 4 performed significantly better than Llama 2 for coding (HumanEval benchmark)and math problem tasks (GSM8k benchmark). Open-source AI technologies, like Llama 2, continue to advance, offering\n",
"Score\t 0.372375859380259\n",
"Metadata\t {'title': \"Meta's Llama 2: Revolutionizing Open Source Language Models for Commercial Use\", 'url': 'https://pub.towardsai.net/metas-llama-2-revolutionizing-open-source-language-models-for-commercial-use-1492bec112b#148f', 'source_name': 'towards_ai', 'questions_this_excerpt_can_answer': \"Here are 3 questions this context can answer specifically, drawing on the provided information:\\n\\n1. **How did Meta balance the trade-off between helpfulness and safety in Llama 2, and what specific methods did they use?** This question delves into the core of Llama 2's development, exploring the technical strategies employed to optimize for both helpfulness and safety, which is a key challenge in large language model design.\\n2. **How does the 34B parameter model of Llama 2 compare in terms of safety violations to other variants, and what potential reasons are given for its delayed release?** The text hints at differences in safety performance between the various Llama 2 models. This question seeks clarification on the specific performance discrepancies and potential explanations for the 34B model's delayed release.\\n3. **What are the specific benchmarks and categories where Llama 2 outperforms other open-source language models, like Falcon and MPT?** The context mentions Llama 2's impressive performance compared to competitors. This question focuses on the specific benchmarks used to assess these capabilities and the categories where Llama 2 demonstrates its superior performance.\", 'prev_section_summary': \"## Summary: \\n\\nThis section focuses on Meta's release of Llama 2, a powerful open-source language model with significant implications for commercial use. \\n\\n**Key Topics:**\\n\\n* **Open-Source and Commercial Use:** Llama 2 is the first open-source language model designed for commercial applications, allowing businesses to integrate it into their products and services. This opens doors for AI-powered applications.\\n* **Model Variations:** Llama 2 comes in four sizes (7B, 13B, 34B, and 70B parameters), with varying performance based on their training data and resources. \\n* **Training and Resources:** The 70B model was trained on a massive dataset of 2 trillion tokens, consuming vast GPU hours. Its chat version was further refined with human annotations. \\n* **Safety and Alignment:** Meta prioritized safety and alignment in Llama 2's development, resulting in better performance on safety benchmarks compared to ChatGPT. However, achieving the right balance between helpfulness and safety is a continuous challenge in large language model development. \\n\\n**Key Entities:**\\n\\n* **Llama 2:** The focus of the article, a new open-source language model from Meta.\\n* **Meta:** The company behind Llama 2.\\n* **ChatGPT:** A leading chatbot and language model used for comparison with Llama 2.\\n* **Azure and AWS:** Cloud platforms where Llama 2 is available.\", 'section_summary': \"Summary: \\n\\nThis section focuses on Meta's Llama 2, an open-source language model. The key topics are:\\n\\n**Safety and Helpfulness:** \\n* The article discusses the challenge of balancing helpfulness and safety in large language models. \\n* Meta used two reward models (one for helpfulness, one for safety) to optimize Llama 2.\\n* The 34B parameter model has higher safety violations than other variants, which may be a reason for its delayed release.\\n\\n**Performance Comparison:**\\n* Llama 2 outperforms its competitors (Falcon and MPT) in most categories.\\n* The 70B parameter model surpasses all other open-source models, while the 7B and 34B models beat Falcon and MPT in most categories.\\n* Llama 2 performs well even compared to larger, closed-source models like ChatGPT 3.5, though it struggles with coding and math problems compared to GPT-4 and PaLM-2-L.\\n\\n**Key Entities:**\\n* **Llama 2**: The open-source language model developed by Meta.\\n* **ChatGPT**: A closed-source language model from OpenAI.\\n* **Falcon and MPT**: Other open-source language models. \\n* **GPT-4 and PaLM-2-L**: Larger, closed-source language models.\\n* **HumanEval and GSM8k**: Benchmarks used to evaluate coding and math problem-solving abilities.\", 'excerpt_keywords': 'Keywords: Llama 2, open-source, language model, Meta, ChatGPT, safety, helpfulness, Falcon, MPT, benchmark'}\n",
"-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_\n",
"Node ID\t 910478c2-4c5e-477f-b7ed-553c47f29c31\n",
"Title\t Fine-Tuning a Llama-2 7B Model for Python Code Generation\n",
"Text\t only fine-tuning a small number of additional parameters, with virtually all model parameters remaining frozen. PEFT has been found to produce good generalization with relatively low-volume datasets. Furthermore, it enhances the reusability and portability of the model, as the small checkpoints obtained can be easily added to the base model, and the base model can be easily fine-tuned and reused in multiple scenarios by adding the PEFT parameters. Finally, since the base model is not adjusted, all the knowledge acquired in the pre-training phase is preserved, thus avoiding catastrophic forgetting. Most widely used PEFT techniques aim to keep the pre-trained base model untouched and add new layers or parameters on top of it. These layers are called \"Adapters\" and the technique of their adjustment \"adapter-tuning\", we add these layers to the pre-trained base model and only train the parameters of these new layers. However, a serious problem with this approach is that these layers lead to increased latency in the inference phase, which makes the process inefficient in many scenarios.In the LoRa technique, a Low-Rank Adaptation of Large Language Models, the idea is not to include new layers but to add values to the parameters in a way that avoids this scary problem of latency in the inference phase. LoRa trains and stores the changes of the additional weights while freezing all the weights of the pre-trained model. Therefore, we train a new weights matrix with the changes in the pre-trained model matrix, and this new matrix is decomposed into 2 Low-rank matrices as explained here: Merge the base model and the adapter weights As we mention, we have trained \"modification weights\" on the base model, our final model requires merging the pretrained model and the adapters in a single model. You can find and download the model in my Hugging Face account edumunozsala/llama-27b-int4-python-code-20k. Give it a try! Inferencing or generating Python code And finally, we will show you how you can download the model from the Hugging Face Hub and call the model to generate an accurate result: Thanks to Maxime Labonne for an excellent article [9] and Philipp Schmid who provides an inspiring\n",
"Score\t 0.37138571268973897\n",
"Metadata\t {'title': 'Fine-Tuning a Llama-2 7B Model for Python Code Generation', 'url': 'https://pub.towardsai.net/fine-tuning-a-llama-2-7b-model-for-python-code-generation-865453afdf73#bf4e', 'source_name': 'towards_ai', 'questions_this_excerpt_can_answer': 'Here are 3 questions that the provided context can answer specifically, which are unlikely to be found elsewhere:\\n\\n1. **How does LoRa address the latency issue associated with adapter-tuning methods in fine-tuning large language models?** \\n * The context explains that LoRa avoids adding new layers, instead focusing on adding weights to existing parameters, thus preserving the base model and avoiding inference latency. \\n2. **What is the specific Hugging Face model name and location for the fine-tuned Llama-2 7B model for Python code generation discussed in the article?** \\n * The context explicitly mentions the model is available on Hugging Face under the name \"edumunozsala/llama-27b-int4-python-code-20k\".\\n3. **What are the advantages of using PEFT (Parameter-Efficient Fine-Tuning) techniques, particularly in the context of this fine-tuned Llama-2 model for Python code generation?**\\n * The context highlights the benefits of PEFT including good generalization with small datasets, model reusability and portability, and preservation of pre-training knowledge, all of which are relevant to the specific application of Python code generation.', 'prev_section_summary': 'Summary:\\n\\nThis section focuses on the fine-tuning of a Llama-2 7B model for Python code generation. It highlights two key techniques:\\n\\n**1. Environment and Tools:**\\n* The fine-tuning process utilizes Google Colab environment and a Python script for interactive and unattended training respectively.\\n* The training process requires significant computational resources, utilizing a T4 instance for initial runs and an A100 instance for full dataset and epoch training.\\n* The trained model is uploaded to the Huggingface hub for sharing with other users.\\n\\n**2. Fine-tuning Techniques:**\\n* The section emphasizes Parameter-Efficient Fine-Tuning (PEFT) techniques, particularly Lora and QLora, as efficient methods for training large language models.\\n* PEFT methods significantly reduce RAM and storage requirements by only fine-tuning a small subset of model parameters, allowing for training on a single GPU.\\n* This approach offers advantages like improved generalization, reusability, portability, and prevention of catastrophic forgetting, while preserving the knowledge acquired during the pre-training phase.', 'section_summary': 'Summary: \\n\\nThis section discusses the fine-tuning of a Llama-2 7B model for Python code generation, highlighting the use of Parameter-Efficient Fine-Tuning (PEFT) techniques, particularly the LoRa method. \\n\\n* **PEFT Techniques:** The article emphasizes the benefits of PEFT, including good generalization with small datasets, model reusability and portability, and preservation of pre-training knowledge. \\n* **LoRa:** The section explains how LoRa addresses the latency issue associated with adapter-tuning methods by adding weights to existing parameters, rather than adding new layers. This approach avoids inference latency while still enabling fine-tuning.\\n* **Hugging Face Model:** The fine-tuned Llama-2 7B model for Python code generation is available on Hugging Face under the name \"edumunozsala/llama-27b-int4-python-code-20k\".\\n* **Inference:** The article concludes by providing instructions on how to download the model from Hugging Face and use it to generate Python code.', 'excerpt_keywords': 'Keywords: Fine-tuning, Llama-2, Python Code Generation, Parameter-Efficient Fine-Tuning (PEFT), LoRa, Hugging Face, Inference, Latency, Model Reusability, Catastrophic Forgetting.'}\n",
"-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_\n"
]
}
],
"source": [
"for src in res.source_nodes:\n",
" print(\"Node ID\\t\", src.node_id)\n",
" print(\"Title\\t\", src.metadata[\"title\"])\n",
" print(\"Text\\t\", src.text)\n",
" print(\"Score\\t\", src.score)\n",
" print(\"Metadata\\t\", src.metadata)\n",
" print(\"-_\" * 20)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Router\n",
"\n",
"Routers are modules that take in a user query and a set of “choices” (defined by metadata), and returns one or more selected choices.\n",
"\n",
"They can be used for the following use cases and more:\n",
"\n",
"- Selecting the right data source among a diverse range of data sources\n",
"\n",
"- Deciding whether to do summarization (e.g. using summary index query engine) or semantic search (e.g. using vector index query engine)\n",
"\n",
"- Deciding whether to “try” out a bunch of choices at once and combine the results (using multi-routing capabilities).\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Lets create a different query engine with Mistral AI information\n"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
"from pathlib import Path\n",
"import requests\n",
"\n",
"wiki_titles = [\n",
" \"Mistral AI\",\n",
"]\n",
"\n",
"data_path = Path(\"data_wiki\")\n",
"\n",
"for title in wiki_titles:\n",
" response = requests.get(\n",
" \"https://en.wikipedia.org/w/api.php\",\n",
" params={\n",
" \"action\": \"query\",\n",
" \"format\": \"json\",\n",
" \"titles\": title,\n",
" \"prop\": \"extracts\",\n",
" \"explaintext\": True,\n",
" },\n",
" ).json()\n",
" page = next(iter(response[\"query\"][\"pages\"].values()))\n",
" wiki_text = page[\"extract\"]\n",
"\n",
" if not data_path.exists():\n",
" Path.mkdir(data_path)\n",
"\n",
" with open(data_path / f\"mistral_ai.txt\", \"w\") as fp:\n",
" fp.write(wiki_text)"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 5/5 [00:14<00:00, 2.86s/it]\n",
"100%|██████████| 5/5 [00:14<00:00, 2.92s/it]\n",
"100%|██████████| 5/5 [00:09<00:00, 1.95s/it]\n"
]
}
],
"source": [
"from llama_index.core import VectorStoreIndex, SimpleDirectoryReader\n",
"\n",
"# Assuming you have prepared a directory for Mistral data\n",
"documents = SimpleDirectoryReader(\"data_wiki\").load_data()\n",
"\n",
"transformations = [\n",
" text_splitter,\n",
" QuestionsAnsweredExtractor(questions=3, llm=llm),\n",
" SummaryExtractor(summaries=[\"prev\", \"self\"], llm=llm),\n",
" KeywordExtractor(keywords=10, llm=llm),\n",
" OpenAIEmbedding(model=\"text-embedding-3-small\", mode=\"text_search\"),\n",
"]\n",
"\n",
"mistral_index = VectorStoreIndex.from_documents(\n",
" documents=documents, llm=llm, transformations=transformations\n",
")\n",
"\n",
"mistral_query = mistral_index.as_query_engine(\n",
" llm=llm,\n",
" similarity_top_k=2,\n",
" embed_model=OpenAIEmbedding(model=\"text-embedding-3-small\", mode=\"text_search\"),\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#from llama_index.core import VectorStoreIndex, SimpleDirectoryReader\n",
"\n",
"#documents = SimpleDirectoryReader(\"data_wiki\").load_data()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#from llama_index.core.text_splitter import TokenTextSplitter\n",
"\n",
"#text_splitter = TokenTextSplitter(separator=\" \", chunk_size=512, chunk_overlap=128)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#from llama_index.core.extractors import (\n",
"# SummaryExtractor,\n",
"# QuestionsAnsweredExtractor,\n",
"# KeywordExtractor,\n",
"#)\n",
"#from llama_index.embeddings.openai import OpenAIEmbedding\n",
"#from llama_index.core.ingestion import IngestionPipeline\n",
"#\n",
"#transformations = [\n",
"# text_splitter,\n",
"# QuestionsAnsweredExtractor(questions=3, llm=llm),\n",
"# SummaryExtractor(summaries=[\"prev\", \"self\"], llm=llm),\n",
"# KeywordExtractor(keywords=10, llm=llm),\n",
"# OpenAIEmbedding(model=\"text-embedding-3-small\", mode=\"text_search\"),\n",
"#]\n",
"#\n",
"#mistral_index = VectorStoreIndex.from_documents(\n",
"# documents=documents, llm=llm, transformations=transformations\n",
"#)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#mistral_query = mistral_index.as_query_engine(\n",
"# llm=llm,\n",
"# similarity_top_k=2,\n",
"# embed_model=OpenAIEmbedding(model=\"text-embedding-3-small\", mode=\"text_search\"),\n",
"#)"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [],
"source": [
"from llama_index.core.query_engine import RouterQueryEngine\n",
"from llama_index.core.selectors import PydanticSingleSelector, LLMSingleSelector\n",
"from llama_index.core.tools import QueryEngineTool\n",
"from llama_index.core import VectorStoreIndex, SummaryIndex\n",
"\n",
"# initialize tools\n",
"llama_tool = QueryEngineTool.from_defaults(\n",
" query_engine=llama_query_engine,\n",
" description=\"Useful for questions about the LLama LLM created by Meta\",\n",
")\n",
"mistral_tool = QueryEngineTool.from_defaults(\n",
" query_engine=mistral_query,\n",
" description=\"Useful for questions about the Mistral LLM created by Mistral AI\",\n",
")\n",
"\n",
"# initialize router query engine (single selection, pydantic)\n",
"query_engine = RouterQueryEngine(\n",
" selector=PydanticSingleSelector.from_defaults(),\n",
" query_engine_tools=[\n",
" llama_tool,\n",
" mistral_tool,\n",
" ],\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'Llama 2 is a suite of large language models, spanning 7 billion to 70 billion parameters, trained and fine-tuned for improved performance. \\n'"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"res = query_engine.query(\n",
" \"What is the LLama model?\",\n",
")\n",
"print(res.response)"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Node ID\t 18dcfeee-ebbc-476f-a4d9-042b26c38aa2\n",
"Title\t Beyond GPT-4: What's New?\n",
"Text\t LLM Variants and Meta's Open Source Before shedding light on four major trends, I'd share the latest Meta's Llama 2 and Code Llama. Meta's Llama 2 represents a sophisticated evolution in LLMs. This suite spans models pretrained and fine-tuned across a parameter spectrum of 7 billion to 70 billion. A specialized derivative, Llama 2-Chat, has been engineered explicitly for dialogue-centric applications. Benchmarking revealed Llama 2's superior performance over most extant open-source chat models. Human-centric evaluations, focusing on safety and utility metrics, positioned Llama 2-Chat as a potential contender against proprietary, closed-source counterparts. The development trajectory of Llama 2 emphasized rigorous fine-tuning methodologies. Meta's transparent delineation of these processes aims to catalyze community-driven advancements in LLMs, underscoring a commitment to collaborative and responsible AI development. Code Llama is built on top of Llama 2 and is available in three models: Code Llama, the foundational code model;Codel Llama - Python specialized for Python;and Code Llama - Instruct, which is fine-tuned for understanding natural language instructions. Based on its benchmark testing, Code Llama outperformed state-of-the-art publicly available LLMs (except GPT-4) on code tasks. Llama 2, Llama 2-Chat, and Code Llama are key steps in LLM development but still have a way to go compared to GPT-4. Meta's open access and commitment to improving these models promise transparent and faster LLM progress in the future. Please refer to the LLM and Llama variants below: From LLMs to Multimodal LLMs, like OpenAI's ChatGPT (GPT-3.5), primarily focus on understanding and generating human language. They've been instrumental in tasks like text generation, translation, and even creative writing. However, their scope is limited to text. Enter multimodal models like GPT-4. These are a new breed of AI models that can understand and generate not just text, but also images, sounds, and potentially other types of data. The term \"multimodal\" refers to their ability to process multiple modes or\n",
"Score\t 0.37941382833585724\n",
"Metadata\t {'title': \"Beyond GPT-4: What's New?\", 'url': 'https://pub.towardsai.net/beyond-gpt-4-whats-new-cbd61a448eb9#dda8', 'source_name': 'towards_ai', 'questions_this_excerpt_can_answer': \"Here are three questions this context can answer specifically, unlikely to be found elsewhere:\\n\\n1. **What are the key differences in training methodologies and fine-tuning processes used for Llama 2 compared to previous versions, and how do these advancements contribute to its improved performance?** This question dives deeper into the specific technical details of Llama 2's development, which are highlighted in the text but not readily available elsewhere.\\n2. **What are the specific safety and utility metrics used to evaluate Llama 2-Chat against proprietary chat models, and how do these metrics differ from those used for evaluating other LLMs?** This question focuses on the unique aspects of evaluating chat models, a specific niche within LLM development, and delves into the methods used for comparing Llama 2-Chat against commercial counterparts.\\n3. **How does Code Llama's specialization in Python differ from its foundational model, and how does this specialization impact its performance on code tasks compared to other LLMs?** This question probes the specific advantages and limitations of Code Llama's Python specialization, offering insight into the practical implications of fine-tuning for specific programming languages.\", 'section_summary': \"Summary: \\n\\nThis section focuses on the advancements in large language models (LLMs) beyond GPT-4, particularly highlighting Meta's open-source contributions: Llama 2 and Code Llama. \\n\\n**Key Topics:**\\n\\n* **Llama 2:** A suite of LLMs spanning 7 billion to 70 billion parameters, trained and fine-tuned for improved performance. \\n * **Llama 2-Chat:** A specialized variant optimized for dialogue-centric applications. \\n* **Code Llama:** A code-focused LLM built upon Llama 2, available in three versions:\\n * **Code Llama (foundational)**\\n * **Code Llama - Python (specialized for Python)**\\n * **Code Llama - Instruct (fine-tuned for natural language instructions)**\\n* **Open-source AI:** Meta's commitment to open-sourcing these models fosters community-driven innovation and faster LLM development.\\n* **Multimodal LLMs:** The introduction of models like GPT-4 that can handle multiple data types (text, images, sound) expands the capabilities of LLMs.\\n\\n**Key Entities:**\\n\\n* **Meta:** The company behind Llama 2 and Code Llama.\\n* **GPT-4:** OpenAI's proprietary, state-of-the-art multimodal LLM.\\n* **Llama 2, Llama 2-Chat, Code Llama:** The specific LLM variants discussed in the excerpt.\", 'excerpt_keywords': 'Keywords: LLMs, Llama 2, Code Llama, GPT-4, Meta, Open Source, Fine-tuning, Multimodal, Dialogue, Python'}\n",
"-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_\n",
"Node ID\t 72d99052-2337-4e2a-8b03-a39e1d01a6ef\n",
"Title\t Meta's Llama 2: Revolutionizing Open Source Language Models for Commercial Use\n",
"Text\t The model demonstrates exceptionally low AI safety violation percentages, surpassing even ChatGPT in safety benchmarks. Finding the right balance between helpfulness and safety when optimizing a model poses significant challenges. While a highly helpful model may be capable of answering any question, including sensitive ones like \"How do I build a bomb?\", it also raises concerns about potential misuse. Thus, striking the perfect equilibrium between providing useful information and ensuring safety is paramount. However, prioritizing safety to an extreme extent can lead to a model that struggles to effectively address a diverse range of questions. This limitation could hinder the model's practical applicability and user experience. Thus, achieving an optimum balance that allows the model to be both helpful and safe is of utmost importance. To strike the right balance between helpfulness and safety, Meta employed two reward models - one for helpfulness and another for safety - to optimize the model's responses. The 34B parameter model has reported higher safety violations than other variants, possibly contributing to the delay in its release. IV. Helpfulness Comparison: Llama 2 Outperforms Competitors Llama 2 emerges as a strong contender in the open-source language model arena, outperforming its competitors in most categories. The 70B parameter model outperforms all other open-source models, while the 7B and 34B models outshine Falcon in all categories and MPT in all categories except coding. Despite being smaller, Llam a2's performance rivals that of Chat GPT 3.5, a significantly larger closed-source model. While GPT 4 and PalM-2-L, with their larger size, outperform Llama 2, this is expected due to their capacity for handling complex language tasks. Llama 2's impressive ability to compete with larger models highlights its efficiency and potential in the market. However, Llama 2 does face challenges in coding and math problems, where models like Chat GPT 4 excel, given their significantly larger size. Chat GPT 4 performed significantly better than Llama 2 for coding (HumanEval benchmark)and math problem tasks (GSM8k benchmark). Open-source AI technologies, like Llama 2, continue to advance, offering\n",
"Score\t 0.37239497005046795\n",
"Metadata\t {'title': \"Meta's Llama 2: Revolutionizing Open Source Language Models for Commercial Use\", 'url': 'https://pub.towardsai.net/metas-llama-2-revolutionizing-open-source-language-models-for-commercial-use-1492bec112b#148f', 'source_name': 'towards_ai', 'questions_this_excerpt_can_answer': \"Here are 3 questions this context can answer specifically, drawing on the provided information:\\n\\n1. **How did Meta balance the trade-off between helpfulness and safety in Llama 2, and what specific methods did they use?** This question delves into the core of Llama 2's development, exploring the technical strategies employed to optimize for both helpfulness and safety, which is a key challenge in large language model design.\\n2. **How does the 34B parameter model of Llama 2 compare in terms of safety violations to other variants, and what potential reasons are given for its delayed release?** The text hints at differences in safety performance between the various Llama 2 models. This question seeks clarification on the specific performance discrepancies and potential explanations for the 34B model's delayed release.\\n3. **What are the specific benchmarks and categories where Llama 2 outperforms other open-source language models, like Falcon and MPT?** The context mentions Llama 2's impressive performance compared to competitors. This question focuses on the specific benchmarks used to assess these capabilities and the categories where Llama 2 demonstrates its superior performance.\", 'prev_section_summary': \"## Summary: \\n\\nThis section focuses on Meta's release of Llama 2, a powerful open-source language model with significant implications for commercial use. \\n\\n**Key Topics:**\\n\\n* **Open-Source and Commercial Use:** Llama 2 is the first open-source language model designed for commercial applications, allowing businesses to integrate it into their products and services. This opens doors for AI-powered applications.\\n* **Model Variations:** Llama 2 comes in four sizes (7B, 13B, 34B, and 70B parameters), with varying performance based on their training data and resources. \\n* **Training and Resources:** The 70B model was trained on a massive dataset of 2 trillion tokens, consuming vast GPU hours. Its chat version was further refined with human annotations. \\n* **Safety and Alignment:** Meta prioritized safety and alignment in Llama 2's development, resulting in better performance on safety benchmarks compared to ChatGPT. However, achieving the right balance between helpfulness and safety is a continuous challenge in large language model development. \\n\\n**Key Entities:**\\n\\n* **Llama 2:** The focus of the article, a new open-source language model from Meta.\\n* **Meta:** The company behind Llama 2.\\n* **ChatGPT:** A leading chatbot and language model used for comparison with Llama 2.\\n* **Azure and AWS:** Cloud platforms where Llama 2 is available.\", 'section_summary': \"Summary: \\n\\nThis section focuses on Meta's Llama 2, an open-source language model. The key topics are:\\n\\n**Safety and Helpfulness:** \\n* The article discusses the challenge of balancing helpfulness and safety in large language models. \\n* Meta used two reward models (one for helpfulness, one for safety) to optimize Llama 2.\\n* The 34B parameter model has higher safety violations than other variants, which may be a reason for its delayed release.\\n\\n**Performance Comparison:**\\n* Llama 2 outperforms its competitors (Falcon and MPT) in most categories.\\n* The 70B parameter model surpasses all other open-source models, while the 7B and 34B models beat Falcon and MPT in most categories.\\n* Llama 2 performs well even compared to larger, closed-source models like ChatGPT 3.5, though it struggles with coding and math problems compared to GPT-4 and PaLM-2-L.\\n\\n**Key Entities:**\\n* **Llama 2**: The open-source language model developed by Meta.\\n* **ChatGPT**: A closed-source language model from OpenAI.\\n* **Falcon and MPT**: Other open-source language models. \\n* **GPT-4 and PaLM-2-L**: Larger, closed-source language models.\\n* **HumanEval and GSM8k**: Benchmarks used to evaluate coding and math problem-solving abilities.\", 'excerpt_keywords': 'Keywords: Llama 2, open-source, language model, Meta, ChatGPT, safety, helpfulness, Falcon, MPT, benchmark'}\n",
"-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_\n",
"Node ID\t 910478c2-4c5e-477f-b7ed-553c47f29c31\n",
"Title\t Fine-Tuning a Llama-2 7B Model for Python Code Generation\n",
"Text\t only fine-tuning a small number of additional parameters, with virtually all model parameters remaining frozen. PEFT has been found to produce good generalization with relatively low-volume datasets. Furthermore, it enhances the reusability and portability of the model, as the small checkpoints obtained can be easily added to the base model, and the base model can be easily fine-tuned and reused in multiple scenarios by adding the PEFT parameters. Finally, since the base model is not adjusted, all the knowledge acquired in the pre-training phase is preserved, thus avoiding catastrophic forgetting. Most widely used PEFT techniques aim to keep the pre-trained base model untouched and add new layers or parameters on top of it. These layers are called \"Adapters\" and the technique of their adjustment \"adapter-tuning\", we add these layers to the pre-trained base model and only train the parameters of these new layers. However, a serious problem with this approach is that these layers lead to increased latency in the inference phase, which makes the process inefficient in many scenarios.In the LoRa technique, a Low-Rank Adaptation of Large Language Models, the idea is not to include new layers but to add values to the parameters in a way that avoids this scary problem of latency in the inference phase. LoRa trains and stores the changes of the additional weights while freezing all the weights of the pre-trained model. Therefore, we train a new weights matrix with the changes in the pre-trained model matrix, and this new matrix is decomposed into 2 Low-rank matrices as explained here: Merge the base model and the adapter weights As we mention, we have trained \"modification weights\" on the base model, our final model requires merging the pretrained model and the adapters in a single model. You can find and download the model in my Hugging Face account edumunozsala/llama-27b-int4-python-code-20k. Give it a try! Inferencing or generating Python code And finally, we will show you how you can download the model from the Hugging Face Hub and call the model to generate an accurate result: Thanks to Maxime Labonne for an excellent article [9] and Philipp Schmid who provides an inspiring\n",
"Score\t 0.37140718553455054\n",
"Metadata\t {'title': 'Fine-Tuning a Llama-2 7B Model for Python Code Generation', 'url': 'https://pub.towardsai.net/fine-tuning-a-llama-2-7b-model-for-python-code-generation-865453afdf73#bf4e', 'source_name': 'towards_ai', 'questions_this_excerpt_can_answer': 'Here are 3 questions that the provided context can answer specifically, which are unlikely to be found elsewhere:\\n\\n1. **How does LoRa address the latency issue associated with adapter-tuning methods in fine-tuning large language models?** \\n * The context explains that LoRa avoids adding new layers, instead focusing on adding weights to existing parameters, thus preserving the base model and avoiding inference latency. \\n2. **What is the specific Hugging Face model name and location for the fine-tuned Llama-2 7B model for Python code generation discussed in the article?** \\n * The context explicitly mentions the model is available on Hugging Face under the name \"edumunozsala/llama-27b-int4-python-code-20k\".\\n3. **What are the advantages of using PEFT (Parameter-Efficient Fine-Tuning) techniques, particularly in the context of this fine-tuned Llama-2 model for Python code generation?**\\n * The context highlights the benefits of PEFT including good generalization with small datasets, model reusability and portability, and preservation of pre-training knowledge, all of which are relevant to the specific application of Python code generation.', 'prev_section_summary': 'Summary:\\n\\nThis section focuses on the fine-tuning of a Llama-2 7B model for Python code generation. It highlights two key techniques:\\n\\n**1. Environment and Tools:**\\n* The fine-tuning process utilizes Google Colab environment and a Python script for interactive and unattended training respectively.\\n* The training process requires significant computational resources, utilizing a T4 instance for initial runs and an A100 instance for full dataset and epoch training.\\n* The trained model is uploaded to the Huggingface hub for sharing with other users.\\n\\n**2. Fine-tuning Techniques:**\\n* The section emphasizes Parameter-Efficient Fine-Tuning (PEFT) techniques, particularly Lora and QLora, as efficient methods for training large language models.\\n* PEFT methods significantly reduce RAM and storage requirements by only fine-tuning a small subset of model parameters, allowing for training on a single GPU.\\n* This approach offers advantages like improved generalization, reusability, portability, and prevention of catastrophic forgetting, while preserving the knowledge acquired during the pre-training phase.', 'section_summary': 'Summary: \\n\\nThis section discusses the fine-tuning of a Llama-2 7B model for Python code generation, highlighting the use of Parameter-Efficient Fine-Tuning (PEFT) techniques, particularly the LoRa method. \\n\\n* **PEFT Techniques:** The article emphasizes the benefits of PEFT, including good generalization with small datasets, model reusability and portability, and preservation of pre-training knowledge. \\n* **LoRa:** The section explains how LoRa addresses the latency issue associated with adapter-tuning methods by adding weights to existing parameters, rather than adding new layers. This approach avoids inference latency while still enabling fine-tuning.\\n* **Hugging Face Model:** The fine-tuned Llama-2 7B model for Python code generation is available on Hugging Face under the name \"edumunozsala/llama-27b-int4-python-code-20k\".\\n* **Inference:** The article concludes by providing instructions on how to download the model from Hugging Face and use it to generate Python code.', 'excerpt_keywords': 'Keywords: Fine-tuning, Llama-2, Python Code Generation, Parameter-Efficient Fine-Tuning (PEFT), LoRa, Hugging Face, Inference, Latency, Model Reusability, Catastrophic Forgetting.'}\n",
"-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_\n"
]
}
],
"source": [
"for src in res.source_nodes:\n",
" print(\"Node ID\\t\", src.node_id)\n",
" print(\"Title\\t\", src.metadata[\"title\"])\n",
" print(\"Text\\t\", src.text)\n",
" print(\"Score\\t\", src.score)\n",
" print(\"Metadata\\t\", src.metadata)\n",
" print(\"-_\" * 20)"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'Mistral is a French startup that specializes in developing language models. They have released a variety of models, some open-source and some accessible only through an API. Their models are known for their efficiency and strong performance, particularly in multilingual capabilities and instruction following. \\n'"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"res = query_engine.query(\"What is the Mistral model?\")\n",
"print(res.response)"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Node ID\t a5dcf99d-0e3f-4c9a-b003-9766cdbad1c1\n",
"Text\t fundraising of €105 million ($117 million) with investors including the American fund Lightspeed Venture Partners, Eric Schmidt, Xavier Niel and JCDecaux. The valuation is then estimated by the Financial Times at €240 million ($267 million).\n",
"On 27 September 2023, the company made its language processing model “Mistral 7B” available under the free Apache 2.0 license. This model has 7 billion parameters, a small size compared to its competitors.\n",
"On 10 December 2023, Mistral AI announced that it had raised €385 million ($428 million) as part of its second fundraising. This round of financing notably involves the Californian fund Andreessen Horowitz, BNP Paribas and the software publisher Salesforce.\n",
"On 11 December 2023, the company released the Mixtral 8x7B model with 46.7 billion parameters but using only 12.9 billion per token thanks to the mixture of experts architecture. The model masters 5 languages (French, Spanish, Italian, English and German) and outperforms, according to its developers' tests, the \"LLama 2 70B\" model from Meta. A version trained to follow instructions and called “Mixtral 8x7B Instruct” is also offered.\n",
"On 26 February 2024, Microsoft announced a new partnership with the company to expand its presence in the rapidly evolving artificial intelligence industry. Under the agreement, Mistral's rich language models will be available on Microsoft's Azure cloud, while the multilingual conversational assistant \"Le Chat\" will be launched in the style of ChatGPT.\n",
"On 10 April 2024, the company released the mixture of expert models, Mixtral 8x22B, offering high performance on various benchmarks compared to other open models.\n",
"On 16 April 2024, reporting revealed that Mistral was in talks to raise €500 million, a deal that would more than double its current valuation to at least €5 billion.\n",
"\n",
"\n",
"== Models ==\n",
"\n",
"\n",
"=== Open Weight Models ===\n",
"\n",
"\n",
"==== Mistral 7B ====\n",
"Mistral 7B is a 7.3B parameter language model using the transformers architecture. Officially released on September 27, 2023, via a BitTorrent magnet link, and Hugging Face. The model was released under the Apache\n",
"Score\t 0.5644012181648649\n",
"-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_\n",
"Node ID\t bea70bf8-5da1-4ce4-a0e2-7e89019c4ebf\n",
"Text\t Commercial purposes.\n",
"Mathstral 7B\n",
"Another 7B model that Mistral released on 16 July 2024. it's focused on STEM subjects, it achieved 56.6% on MATH benchmark and 63.47% on MMLU. it was released under Apache 2.0 license. it has a context length of 32k tokens. it was produced in collaboration with Project Numina.\n",
"Codestral Mamba 7B\n",
"Codestral Mamba is based on Mamba 2 architecture, which allows it to generate responses even with longer input. and unlike Codestral, it was released under Apache 2.0 license. only instruct version was released.\n",
"\n",
"\n",
"=== API-Only Models ===\n",
"Unlike Mistral 7B, Mixtral 8x7B and Mixtral 8x22B, the following models are closed-source and only available through the Mistral API.\n",
"\n",
"\n",
"==== Mistral Large ====\n",
"Mistral Large was launched on February 26, 2024, and Mistral claims it is second in the world only to OpenAI's GPT-4.\n",
"It is fluent in English, French, Spanish, German, and Italian, with Mistral claiming understanding of both grammar and cultural context, and provides coding capabilities. As of early 2024, it is Mistral's flagship AI. It is also available on Microsoft Azure.\n",
"Mistral Large 2 was released in July 2024, with 123B parameters and 128k context window. It is available for free with a Mistral Research Licence, and with a commercial licence for commercial purposes. \n",
"\n",
"\n",
"==== Mistral Medium ====\n",
"Mistral Medium is trained in various languages including English, French, Italian, German, Spanish and code with a score of 8.6 on MT-Bench. It is ranked in performance above Claude and below GPT-4 on the LMSys ELO Arena benchmark. \n",
"The number of parameters, and architecture of Mistral Medium is not known as Mistral has not published public information about it.\n",
"\n",
"\n",
"==== Mistral Small ====\n",
"Like the Large model, Small was launched on February 26, 2024. It is intended to be a light-weight model for low latency, with better performance than Mixtral 8x7B.\n",
"\n",
"\n",
"== References ==\n",
"\n",
"\n",
"== External links ==\n",
"Official website\n",
"Score\t 0.5546834502171653\n",
"-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_\n"
]
}
],
"source": [
"for src in res.source_nodes:\n",
" print(\"Node ID\\t\", src.node_id)\n",
" print(\"Text\\t\", src.text)\n",
" print(\"Score\\t\", src.score)\n",
" print(\"-_\" * 20)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# OpenAI Agent"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [],
"source": [
"from llama_index.agent.openai import OpenAIAgent"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [],
"source": [
"system_message_openai_agent = \"\"\"You are an AI teacher, answering questions from students of an applied AI course on Large Language Models (LLMs or llm) and Retrieval Augmented Generation (RAG) for LLMs. Topics covered include training models, fine-tuning models, giving memory to LLMs, prompting tips, hallucinations and bias, vector databases, transformer architectures, embeddings, RAG frameworks, Langchain, LlamaIndex, making LLMs interact with tools, AI agents, reinforcement learning with human feedback. Questions should be understood in this context.\n",
"\n",
"Your answers are aimed to teach students, so they should be complete, clear, and easy to understand.\n",
"\n",
"Use the available tools to gather insights pertinent to the field of AI. Always use two tools at the same time. These tools accept a string (a user query rewritten as a statement) and return informative content regarding the domain of AI.\n",
"e.g:\n",
"User question: 'How can I fine-tune an LLM?'\n",
"Input to the tool: 'Fine-tuning an LLM'\n",
"\n",
"User question: How can quantize an LLM?\n",
"Input to the tool: 'Quantization for LLMs'\n",
"\n",
"User question: 'Teach me how to build an AI agent\"'\n",
"Input to the tool: 'Building an AI Agent'\n",
"\n",
"Only some information returned by the tools might be relevant to the question, so ignore the irrelevant part and answer the question with what you have.\n",
"\n",
"Your responses are exclusively based on the output provided by the tools. Refrain from incorporating information not directly obtained from the tool's responses.\n",
"\n",
"When the conversation deepens or shifts focus within a topic, adapt your input to the tools to reflect these nuances. This means if a user requests further elaboration on a specific aspect of a previously discussed topic, you should reformulate your input to the tool to capture this new angle or more profound layer of inquiry.\n",
"\n",
"Provide comprehensive answers, ideally structured in multiple paragraphs, drawing from the tool's variety of relevant details. The depth and breadth of your responses should align with the scope and specificity of the information retrieved.\n",
"\n",
"Should the tools repository lack information on the queried topic, politely inform the user that the question transcends the bounds of your current knowledge base, citing the absence of relevant content in the tool's documentation.\n",
"\n",
"At the end of your answers, always invite the students to ask deeper questions about the topic if they have any. Make sure to reformulate the question to the tool to capture this new angle or more profound layer of inquiry.\n",
"\n",
"Do not refer to the documentation directly, but use the information provided within it to answer questions.\n",
"\n",
"If code is provided in the information, share it with the students. It's important to provide complete code blocks so they can execute the code when they copy and paste them.\n",
"\n",
"Make sure to format your answers in Markdown format, including code blocks and snippets.\n",
"\n",
"Politely reject questions not related to AI, while being cautious not to reject unfamiliar terms or acronyms too quickly.\"\"\""
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [],
"source": [
"from llama_index.llms.openai import OpenAI\n",
"\n",
"llm = OpenAI(model=\"gpt-4o\")\n",
"\n",
"agent = OpenAIAgent.from_tools(\n",
" llm=llm,\n",
" tools=[llama_tool, mistral_tool],\n",
" system_prompt=system_message_openai_agent,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The **LLaMA (Large Language Model Meta AI)** is a large language model developed by Meta AI. It is designed to perform a variety of natural language processing tasks by leveraging a vast amount of training data and sophisticated neural network architectures.\n",
"\n",
"### Key Points about LLaMA:\n",
"\n",
"1. **Developer**: Meta AI, the artificial intelligence research division of Meta (formerly Facebook).\n",
"2. **Model Size**: One of the notable versions is the LLaMA 70B, which indicates it has 70 billion parameters.\n",
"3. **Performance**: The LLaMA models are designed to be competitive with other state-of-the-art language models. However, in some benchmarks, models like Mistral AI's Mixtral 8x7B have been noted to outperform LLaMA 70B.\n",
"4. **Variants**: There are different versions of the LLaMA model, including LLaMA-2, which represents an evolution or improvement over the original LLaMA models.\n",
"\n",
"LLaMA models are part of the broader trend in AI research to develop increasingly powerful and capable language models that can understand and generate human-like text. These models are used in a variety of applications, from chatbots to advanced research tools.\n",
"\n",
"Feel free to ask more specific questions about LLaMA or any other related topics!\n"
]
}
],
"source": [
"response = agent.chat(\"What is the LLama model?\")\n",
"print(response.response)"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The **Mistral model** refers to a range of large language models (LLMs) developed by Mistral AI. These models are designed to perform a variety of natural language processing tasks and are available in both open-source and API-only formats.\n",
"\n",
"### Key Points about Mistral Models:\n",
"\n",
"1. **Developer**: Mistral AI, a company focused on developing advanced language models.\n",
"2. **Model Variants**:\n",
" - **Open-Source Models**:\n",
" - **Mistral 7B**: A general-purpose language model.\n",
" - **Mixtral 8x7B**: A model that combines multiple smaller models to enhance performance.\n",
" - **Mixtral 8x22B**: Another composite model with a larger parameter count.\n",
" - **Mathstral 7B**: Specialized for tasks related to STEM (Science, Technology, Engineering, and Mathematics).\n",
" - **Codestral Mamba 7B**: Tailored for code generation tasks.\n",
" - **API-Only Models**:\n",
" - **Mistral Small**\n",
" - **Mistral Medium**\n",
" - **Mistral Large**\n",
" \n",
"3. **Performance**: Mistral models are designed to be competitive with other leading LLMs such as LLaMA and GPT-3.5. They have shown strong performance in various benchmarks, reflecting their robustness and versatility.\n",
"\n",
"4. **Specialization**: Some models are specialized for specific tasks, such as code generation or STEM-related tasks, while others are more general-purpose.\n",
"\n",
"Mistral AI's commitment to both open-source innovation and performance excellence is evident in the diversity and capabilities of their models. These models cater to a wide range of applications, from general text generation to specialized domains.\n",
"\n",
"If you have more specific questions about any of the Mistral models or their applications, feel free to ask!\n"
]
}
],
"source": [
"response = agent.chat(\"What is the Mistral model?\")\n",
"print(response.response)"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"I'm here to help with questions related to AI, particularly in the context of Large Language Models (LLMs) and related technologies. If you have any questions about these topics, feel free to ask!\n",
"\n",
"For non-AI related queries, such as recipes, you might want to consult a cooking website or a recipe book. If you have any questions about AI, please let me know!\n"
]
}
],
"source": [
"response = agent.chat(\"Write the recipe for a chocolate cake.\")\n",
"print(response.response)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Code related questions to GPT-4o, the remaining questions to Gemini"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"I0000 00:00:1723473141.439669 5318658 check_gcp_environment_no_op.cc:29] ALTS: Platforms other than Linux and Windows are not supported\n"
]
}
],
"source": [
"from llama_index.agent.openai import OpenAIAgent\n",
"from llama_index.llms.openai import OpenAI\n",
"from llama_index.llms.gemini import Gemini\n",
"from llama_index.core.query_engine import RouterQueryEngine\n",
"from llama_index.core.selectors import PydanticSingleSelector\n",
"from llama_index.core.tools import QueryEngineTool\n",
"\n",
"# initialize LLMs\n",
"gpt_4o_llm = OpenAI(model=\"gpt-4o\")\n",
"gemini_llm = Gemini(model=\"models/gemini-1.5-flash\", temperature=1, max_tokens=512)\n",
"\n",
"# define query engines\n",
"llama_query_engine_code = vector_index.as_query_engine(\n",
" llm=gpt_4o_llm,\n",
" similarity_top_k=3,\n",
" embed_model=OpenAIEmbedding(model=\"text-embedding-3-small\", mode=\"text_search\"),\n",
")\n",
"llama_query_engine_rest = vector_index.as_query_engine(\n",
" llm=gemini_llm,\n",
" similarity_top_k=3,\n",
" embed_model=OpenAIEmbedding(model=\"text-embedding-3-small\", mode=\"text_search\"),\n",
")\n",
"\n",
"# define tools for Llama\n",
"llama_tool_code = QueryEngineTool.from_defaults(\n",
" query_engine=llama_query_engine_code,\n",
" description=\"Useful for code-related questions about the LLama LLM created by Meta\",\n",
" name=\"LLamaCodeTool\",\n",
")\n",
"llama_tool_rest = QueryEngineTool.from_defaults(\n",
" query_engine=llama_query_engine_rest,\n",
" description=\"Useful for non-code-related questions about the LLama LLM created by Meta\",\n",
" name=\"LLamaGeneralTool\",\n",
")\n",
"\n",
"# Initialize OpenAIAgent with the system message and the router query engine\n",
"agent = OpenAIAgent.from_tools(\n",
" llm=gpt_4o_llm, # The base LLM, used only if no other tools apply\n",
" tools=[llama_tool_code, llama_tool_rest],\n",
" system_prompt=system_message_openai_agent,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"LLamaGeneralTool\n",
"LLamaCodeTool\n"
]
}
],
"source": [
"# Test the agent with a code-related question\n",
"response = agent.chat(\"How do I fine-tune the LLama model? Write the code for it.\")\n",
"for source in response.sources:\n",
" print(source.tool_name)"
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"LLamaGeneralTool\n"
]
}
],
"source": [
"# Test the agent with a code-related question\n",
"response = agent.chat(\"What is the relationship between Llama and Meta?\")\n",
"for source in response.sources:\n",
" print(source.tool_name)"
]
}
],
"metadata": {
"colab": {
"authorship_tag": "ABX9TyMcBonOXFUEEHJsKREchiOp",
"include_colab_link": true,
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.4"
},
"widgets": {
"application/vnd.jupyter.widget-state+json": {
"0245f2604e4d49c8bd0210302746c47b": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"134210510d49476e959dd7d032bbdbdc": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "DescriptionStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"13b9c5395bca4c3ba21265240cb936cf": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"193aef33d9184055bb9223f56d456de6": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"3fbabd8a8660461ba5e7bc08ef39139a": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "HBoxModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_df2365556ae242a2ab1a119f9a31a561",
"IPY_MODEL_5f4b9d32df8f446e858e4c289dc282f9",
"IPY_MODEL_5b588f83a15d42d9aca888e06bbd95ff"
],
"layout": "IPY_MODEL_ad073bca655540809e39f26538d2ec0d"
}
},
"47a4586384274577a726c57605e7f8d9": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "DescriptionStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"4a172e8c6aa44e41a42fc1d9cf714fd0": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "HTMLModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_e7937a1bc68441a080374911a6563376",
"placeholder": "",
"style": "IPY_MODEL_e532ed7bfef34f67b5fcacd9534eb789",
"value": " 108/108 [00:03<00:00, 33.70it/s]"
}
},
"5b588f83a15d42d9aca888e06bbd95ff": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "HTMLModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_af9b6ae927dd4764b9692507791bc67e",
"placeholder": "",
"style": "IPY_MODEL_134210510d49476e959dd7d032bbdbdc",
"value": " 14/14 [00:00<00:00, 21.41it/s]"
}
},
"5c7973afd79349ed997a69120d0629b2": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "ProgressStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"5f4b9d32df8f446e858e4c289dc282f9": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "FloatProgressModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_96a3bdece738481db57e811ccb74a974",
"max": 14,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_5c7973afd79349ed997a69120d0629b2",
"value": 14
}
},
"5f9bb065c2b74d2e8ded32e1306a7807": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "HBoxModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_73a06bc546a64f7f99a9e4a135319dcd",
"IPY_MODEL_ce48deaf4d8c49cdae92bfdbb3a78df0",
"IPY_MODEL_4a172e8c6aa44e41a42fc1d9cf714fd0"
],
"layout": "IPY_MODEL_0245f2604e4d49c8bd0210302746c47b"
}
},
"73a06bc546a64f7f99a9e4a135319dcd": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "HTMLModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_e956dfab55084a9cbe33c8e331b511e7",
"placeholder": "",
"style": "IPY_MODEL_cb394578badd43a89850873ad2526542",
"value": "Generating embeddings: 100%"
}
},
"96a3bdece738481db57e811ccb74a974": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"abfc9aa911ce4a5ea81c7c451f08295f": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "ProgressStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"ad073bca655540809e39f26538d2ec0d": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"af9b6ae927dd4764b9692507791bc67e": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"cb394578badd43a89850873ad2526542": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "DescriptionStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"ce48deaf4d8c49cdae92bfdbb3a78df0": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "FloatProgressModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_193aef33d9184055bb9223f56d456de6",
"max": 108,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_abfc9aa911ce4a5ea81c7c451f08295f",
"value": 108
}
},
"df2365556ae242a2ab1a119f9a31a561": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "HTMLModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_13b9c5395bca4c3ba21265240cb936cf",
"placeholder": "",
"style": "IPY_MODEL_47a4586384274577a726c57605e7f8d9",
"value": "Parsing nodes: 100%"
}
},
"e532ed7bfef34f67b5fcacd9534eb789": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "DescriptionStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"e7937a1bc68441a080374911a6563376": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"e956dfab55084a9cbe33c8e331b511e7": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
}
}
}
},
"nbformat": 4,
"nbformat_minor": 0
}