# Install Packages and Setup Variables


In [None]:
!pip install -q llama-index==0.10.57 openai==1.37.0 cohere==5.6.2 tiktoken==0.7.0 chromadb==0.5.5 html2text sentence_transformers pydantic llama-index-vector-stores-chroma==0.1.10 kaleido==0.2.1 llama-index-llms-gemini==0.1.11

In [15]:
import os

# Set the following API Keys in the Python environment. Will be used later.
os.environ["OPENAI_API_KEY"] = ""
os.environ["GOOGLE_API_KEY"] = ""

In [14]:
# Allows running asyncio in environments with an existing event loop, like Jupyter notebooks.
import nest_asyncio

nest_asyncio.apply()

# Load a Model


In [None]:
from llama_index.llms.gemini import Gemini

llm = Gemini(model="models/gemini-1.5-flash", temperature=1, max_tokens=512)

# Create a VectoreStore


In [None]:
import chromadb

# create client and a new collection
# chromadb.EphemeralClient saves data in-memory.
chroma_client = chromadb.PersistentClient(path="./mini-llama-articles")
chroma_collection = chroma_client.create_collection("mini-llama-articles")

In [None]:
from llama_index.vector_stores.chroma import ChromaVectorStore

# Define a storage context object using the created vector database.
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)

# Load the Dataset (CSV)


## Download


The dataset includes several articles from the TowardsAI blog, which provide an in-depth explanation of the LLaMA2 model. Read the dataset as a long string.


In [None]:
!curl -o ./mini-llama-articles.csv https://raw.githubusercontent.com/AlaFalaki/tutorial_notebooks/main/data/mini-llama-articles.csv

## Read File


In [None]:
import csv

rows = []

# Load the file as a JSON
with open("./mini-llama-articles.csv", mode="r", encoding="utf-8") as file:
 csv_reader = csv.reader(file)

 for idx, row in enumerate(csv_reader):
 if idx == 0:
 continue
 # Skip header row
 rows.append(row)

# The number of characters in the dataset.
len(rows)

# Convert to Document obj


In [None]:
from llama_index.core import Document

# Convert the chunks to Document objects so the LlamaIndex framework can process them.
documents = [
 Document(
 text=row[1], metadata={"title": row[0], "url": row[2], "source_name": row[3]}
 )
 for row in rows
]
print(documents[0])

# Transforming


In [None]:
from llama_index.core.text_splitter import TokenTextSplitter

text_splitter = TokenTextSplitter(separator=" ", chunk_size=512, chunk_overlap=128)

In [None]:
from llama_index.core.extractors import (
 SummaryExtractor,
 QuestionsAnsweredExtractor,
 KeywordExtractor,
)
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core.ingestion import IngestionPipeline

pipeline = IngestionPipeline(
 transformations=[
 text_splitter,
 QuestionsAnsweredExtractor(questions=3, llm=llm),
 SummaryExtractor(summaries=["prev", "self"], llm=llm),
 KeywordExtractor(keywords=10, llm=llm),
 OpenAIEmbedding(model="text-embedding-3-small", mode="text_search"),
 ],
 vector_store=vector_store,
)

nodes = pipeline.run(documents=documents, show_progress=True)

In [None]:
len(nodes)

In [None]:
!zip -r vectorstore.zip mini-llama-articles

# Load Indexes


In [None]:
# !unzip vectorstore.zip

In [1]:
import chromadb
from llama_index.vector_stores.chroma import ChromaVectorStore

# Create your index
db = chromadb.PersistentClient(path="./mini-llama-articles")
chroma_collection = db.get_or_create_collection("mini-llama-articles")
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)

In [2]:
# Create your index
from llama_index.core import VectorStoreIndex

vector_index = VectorStoreIndex.from_vector_store(vector_store)

In [5]:
from llama_index.embeddings.openai import OpenAIEmbedding

llama_query_engine = vector_index.as_query_engine(
 llm=llm,
 similarity_top_k=3,
 embed_model=OpenAIEmbedding(model="text-embedding-3-small", mode="text_search"),
)

In [6]:
res = llama_query_engine.query("What is the LLama model?")

In [7]:
res.response

'The Llama model is an open-source language model developed by Meta that is designed for commercial use. It comes in different sizes ranging from 7 billion to 70 billion parameters and is known for its efficiency and potential in the market. The model incorporates features like Ghost Attention, which enhances conversational continuity, and a groundbreaking temporal capability that organizes information based on time relevance for more contextually accurate responses.'

In [8]:
for src in res.source_nodes:
 print("Node ID\t", src.node_id)
 print("Title\t", src.metadata["title"])
 print("Text\t", src.text)
 print("Score\t", src.score)
 print("Metadata\t", src.metadata)
 print("-_" * 20)

Node ID	 5c465508-45c6-4ae0-ae61-9d8c1e38e35c
Title	 Meta's Llama 2: Revolutionizing Open Source Language Models for Commercial Use
Text	 with their larger size, outperform Llama 2, this is expected due to their capacity for handling complex language tasks. Llama 2's impressive ability to compete with larger models highlights its efficiency and potential in the market. However, Llama 2 does face challenges in coding and math problems, where models like Chat GPT 4 excel, given their significantly larger size. Chat GPT 4 performed significantly better than Llama 2 for coding (HumanEval benchmark)and math problem tasks (GSM8k benchmark). Open-source AI technologies, like Llama 2, continue to advance, offering strong competition to closed-source models. V. Ghost Attention: Enhancing Conversational Continuity One unique feature in Llama 2 is Ghost Attention, which ensures continuity in conversations. This means that even after multiple interactions, the model remembers its initial instructi

# Router

Routers are modules that take in a user query and a set of “choices” (defined by metadata), and returns one or more selected choices.

They can be used for the following use cases and more:

- Selecting the right data source among a diverse range of data sources

- Deciding whether to do summarization (e.g. using summary index query engine) or semantic search (e.g. using vector index query engine)

- Deciding whether to “try” out a bunch of choices at once and combine the results (using multi-routing capabilities).


## Lets create a different query engine with Mistral AI information


In [9]:
from pathlib import Path
import requests

wiki_titles = [
 "Mistral AI",
]

data_path = Path("data_wiki")

for title in wiki_titles:
 response = requests.get(
 "https://en.wikipedia.org/w/api.php",
 params={
 "action": "query",
 "format": "json",
 "titles": title,
 "prop": "extracts",
 "explaintext": True,
 },
 ).json()
 page = next(iter(response["query"]["pages"].values()))
 wiki_text = page["extract"]

 if not data_path.exists():
 Path.mkdir(data_path)

 with open(data_path / f"mistral_ai.txt", "w") as fp:
 fp.write(wiki_text)

In [12]:
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader

documents = SimpleDirectoryReader("data_wiki").load_data()

In [11]:
from llama_index.core.text_splitter import TokenTextSplitter

text_splitter = TokenTextSplitter(separator=" ", chunk_size=512, chunk_overlap=128)

In [16]:
from llama_index.core.extractors import (
 SummaryExtractor,
 QuestionsAnsweredExtractor,
 KeywordExtractor,
)
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core.ingestion import IngestionPipeline

transformations = [
 text_splitter,
 QuestionsAnsweredExtractor(questions=3, llm=llm),
 SummaryExtractor(summaries=["prev", "self"], llm=llm),
 KeywordExtractor(keywords=10, llm=llm),
 OpenAIEmbedding(model="text-embedding-3-small", mode="text_search"),
]

mistral_index = VectorStoreIndex.from_documents(
 documents=documents, llm=llm, transformations=transformations
)

100%|██████████| 3/3 [00:02<00:00, 1.12it/s]
100%|██████████| 3/3 [00:03<00:00, 1.01s/it]
100%|██████████| 3/3 [00:01<00:00, 2.72it/s]


In [17]:
mistral_query = mistral_index.as_query_engine(
 llm=llm,
 similarity_top_k=2,
 embed_model=OpenAIEmbedding(model="text-embedding-3-small", mode="text_search"),
)

In [18]:
from llama_index.core.query_engine import RouterQueryEngine
from llama_index.core.selectors import PydanticSingleSelector
from llama_index.core.tools import QueryEngineTool
from llama_index.core import VectorStoreIndex, SummaryIndex

# initialize tools
llama_tool = QueryEngineTool.from_defaults(
 query_engine=llama_query_engine,
 description="Useful for questions about the LLama LLM create by Meta",
)
mistral_tool = QueryEngineTool.from_defaults(
 query_engine=mistral_query,
 description="Useful for questions about the Mistral LLM create by Mistral AI",
)

# initialize router query engine (single selection, pydantic)
query_engine = RouterQueryEngine(
 selector=PydanticSingleSelector.from_defaults(),
 query_engine_tools=[
 llama_tool,
 mistral_tool,
 ],
)

Response(response='The Llama model is an open-source language model developed by Meta that is designed for commercial use. It comes in different sizes with varying parameters, such as 7 billion, 13 billion, 34 billion, and 70 billion parameters. The model is known for its efficiency and potential in the market, as well as its unique features like Ghost Attention for enhancing conversational continuity and a groundbreaking temporal capability for organizing information based on time relevance. The model prioritizes safety considerations in its design and aims to strike a balance between providing useful information and ensuring safety in its responses.', source_nodes=[NodeWithScore(node=TextNode(id_='5c465508-45c6-4ae0-ae61-9d8c1e38e35c', embedding=None, metadata={'title': "Meta's Llama 2: Revolutionizing Open Source Language Models for Commercial Use", 'url': 'https://pub.towardsai.net/metas-llama-2-revolutionizing-open-source-language-models-for-commercial-use-1492bec112b#148f', 'sour

In [19]:
res = query_engine.query(
 "what is the LLama model?",
)
res.response

'The LLama model is an open-source language model developed by Meta that is designed for commercial use. It comes in different model sizes, ranging from 7 billion to 70 billion parameters, each with varying training times. The model prioritizes safety considerations in its design, aiming to strike a balance between providing helpful information and ensuring safety in responses. LLama 2 features unique capabilities such as Ghost Attention, which enhances conversational continuity, and a groundbreaking temporal capability that organizes information based on time relevance for more contextually accurate responses.'

In [20]:
for src in res.source_nodes:
 print("Node ID\t", src.node_id)
 print("Title\t", src.metadata["title"])
 print("Text\t", src.text)
 print("Score\t", src.score)
 print("Metadata\t", src.metadata)
 print("-_" * 20)

Node ID	 5c465508-45c6-4ae0-ae61-9d8c1e38e35c
Title	 Meta's Llama 2: Revolutionizing Open Source Language Models for Commercial Use
Text	 with their larger size, outperform Llama 2, this is expected due to their capacity for handling complex language tasks. Llama 2's impressive ability to compete with larger models highlights its efficiency and potential in the market. However, Llama 2 does face challenges in coding and math problems, where models like Chat GPT 4 excel, given their significantly larger size. Chat GPT 4 performed significantly better than Llama 2 for coding (HumanEval benchmark)and math problem tasks (GSM8k benchmark). Open-source AI technologies, like Llama 2, continue to advance, offering strong competition to closed-source models. V. Ghost Attention: Enhancing Conversational Continuity One unique feature in Llama 2 is Ghost Attention, which ensures continuity in conversations. This means that even after multiple interactions, the model remembers its initial instructi

In [21]:
res = query_engine.query("what is the Mistral model?")
res.response

'The Mistral model is a 7.3B parameter language model that was officially released on September 27, 2023. It uses the transformers architecture and was made available under the Apache 2.0 license. The model outperforms LLaMA 2 13B on various benchmarks and is on par with LLaMA 34B on many benchmarks. Mistral 7B incorporates Grouped-query attention (GQA) for faster inference and Sliding Window Attention (SWA) to handle longer sequences efficiently.'

In [22]:
for src in res.source_nodes:
 print("Node ID\t", src.node_id)
 print("Text\t", src.text)
 print("Score\t", src.score)
 print("-_" * 20)

Node ID	 db3ce17d-a8db-45d7-89f8-c83a346e743a
Text	 Mistral AI is a French company in artificial intelligence. It was founded in April 2023 by researchers previously employed by Meta and Google DeepMind: Arthur Mensch, Timothée Lacroix and Guillaume Lample. It has raised 385 million euros, or about $415 million in October 2023. In December 2023, it attained a valuation of more than $2 billion.It produces open large language models, citing the foundational importance of open-source software, and as a response to proprietary models.As of December 2023, two models have been published, and are available as weights. Another prototype "Mistral Medium" is available via API only.


== History ==
Mistral AI was co-founded in April 2023 by Arthur Mensch, Guillaume Lample and Timothée Lacroix.
Prior to co-founding Mistral AI, Arthur Mensch worked at DeepMind, Google's artificial intelligence laboratory, while Guillaume Lample and Timothée Lacroix worked at Meta.In June 2023, the start-up carried 