Omar Solano
commited on
Commit
Β·
13f3498
1
Parent(s):
84bd9c0
replace gpt-3.5 with gemini-1.5-flash
Browse files
notebooks/04-RAG_with_VectorStore.ipynb
CHANGED
@@ -20,27 +20,38 @@
|
|
20 |
},
|
21 |
{
|
22 |
"cell_type": "code",
|
23 |
-
"execution_count":
|
24 |
"metadata": {
|
25 |
"id": "QPJzr-I9XQ7l"
|
26 |
},
|
27 |
"outputs": [],
|
28 |
"source": [
|
29 |
-
"!pip install -q llama-index==0.10.
|
30 |
]
|
31 |
},
|
32 |
{
|
33 |
"cell_type": "code",
|
34 |
-
"execution_count":
|
35 |
"metadata": {
|
36 |
"id": "riuXwpSPcvWC"
|
37 |
},
|
38 |
"outputs": [],
|
39 |
"source": [
|
40 |
"import os\n",
|
|
|
41 |
"\n",
|
42 |
-
"
|
43 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
]
|
45 |
},
|
46 |
{
|
@@ -72,7 +83,7 @@
|
|
72 |
},
|
73 |
{
|
74 |
"cell_type": "code",
|
75 |
-
"execution_count":
|
76 |
"metadata": {
|
77 |
"colab": {
|
78 |
"base_uri": "https://localhost:8080/"
|
@@ -87,7 +98,7 @@
|
|
87 |
"text": [
|
88 |
" % Total % Received % Xferd Average Speed Time Time Time Current\n",
|
89 |
" Dload Upload Total Spent Left Speed\n",
|
90 |
-
"100 169k 100 169k 0 0
|
91 |
]
|
92 |
}
|
93 |
],
|
@@ -106,7 +117,7 @@
|
|
106 |
},
|
107 |
{
|
108 |
"cell_type": "code",
|
109 |
-
"execution_count":
|
110 |
"metadata": {
|
111 |
"colab": {
|
112 |
"base_uri": "https://localhost:8080/"
|
@@ -116,14 +127,11 @@
|
|
116 |
},
|
117 |
"outputs": [
|
118 |
{
|
119 |
-
"
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
"execution_count": 4,
|
125 |
-
"metadata": {},
|
126 |
-
"output_type": "execute_result"
|
127 |
}
|
128 |
],
|
129 |
"source": [
|
@@ -132,14 +140,16 @@
|
|
132 |
"text = \"\"\n",
|
133 |
"\n",
|
134 |
"# Load the file as a JSON\n",
|
135 |
-
"with open(\"./mini-dataset.csv\", mode=\"r\", encoding=\"
|
136 |
-
"
|
137 |
"\n",
|
138 |
-
"
|
139 |
-
"
|
|
|
|
|
140 |
"\n",
|
141 |
"# The number of characters in the dataset.\n",
|
142 |
-
"len(
|
143 |
]
|
144 |
},
|
145 |
{
|
@@ -153,7 +163,7 @@
|
|
153 |
},
|
154 |
{
|
155 |
"cell_type": "code",
|
156 |
-
"execution_count":
|
157 |
"metadata": {
|
158 |
"colab": {
|
159 |
"base_uri": "https://localhost:8080/"
|
@@ -163,14 +173,11 @@
|
|
163 |
},
|
164 |
"outputs": [
|
165 |
{
|
166 |
-
"
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
"execution_count": 5,
|
172 |
-
"metadata": {},
|
173 |
-
"output_type": "execute_result"
|
174 |
}
|
175 |
],
|
176 |
"source": [
|
@@ -179,9 +186,9 @@
|
|
179 |
"\n",
|
180 |
"# Split the long text into smaller manageable chunks of 512 characters.\n",
|
181 |
"for i in range(0, len(text), chunk_size):\n",
|
182 |
-
" chunks.append(text[i:i + chunk_size])\n",
|
183 |
"\n",
|
184 |
-
"len(
|
185 |
]
|
186 |
},
|
187 |
{
|
@@ -243,6 +250,7 @@
|
|
243 |
"source": [
|
244 |
"from llama_index.vector_stores.chroma import ChromaVectorStore\n",
|
245 |
"from llama_index.core import StorageContext\n",
|
|
|
246 |
"# Define a storage context object using the created vector database.\n",
|
247 |
"vector_store = ChromaVectorStore(chroma_collection=chroma_collection)\n",
|
248 |
"storage_context = StorageContext.from_defaults(vector_store=vector_store)"
|
@@ -254,13 +262,29 @@
|
|
254 |
"metadata": {
|
255 |
"id": "WsD52wtrlESi"
|
256 |
},
|
257 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
258 |
"source": [
|
259 |
"from llama_index.core import VectorStoreIndex\n",
|
|
|
|
|
260 |
"\n",
|
261 |
-
"#
|
262 |
"index = VectorStoreIndex.from_documents(\n",
|
263 |
-
" documents
|
|
|
|
|
|
|
264 |
")"
|
265 |
]
|
266 |
},
|
@@ -281,12 +305,14 @@
|
|
281 |
},
|
282 |
"outputs": [],
|
283 |
"source": [
|
284 |
-
"from llama_index.llms.openai import OpenAI\n",
|
285 |
"# Define a query engine that is responsible for retrieving related pieces of text,\n",
|
286 |
"# and using a LLM to formulate the final answer.\n",
|
287 |
"\n",
|
288 |
-
"
|
289 |
-
"
|
|
|
|
|
|
|
290 |
]
|
291 |
},
|
292 |
{
|
@@ -304,14 +330,13 @@
|
|
304 |
"name": "stdout",
|
305 |
"output_type": "stream",
|
306 |
"text": [
|
307 |
-
"The LLaMA2 model has 7 billion parameters
|
|
|
308 |
]
|
309 |
}
|
310 |
],
|
311 |
"source": [
|
312 |
-
"response = query_engine.query(\n",
|
313 |
-
" \"How many parameters LLaMA2 model has?\"\n",
|
314 |
-
")\n",
|
315 |
"print(response)"
|
316 |
]
|
317 |
},
|
@@ -333,6 +358,7 @@
|
|
333 |
"outputs": [],
|
334 |
"source": [
|
335 |
"from langchain.schema.document import Document\n",
|
|
|
336 |
"# Convert the chunks to Document objects so the LangChain framework can process them.\n",
|
337 |
"documents = [Document(page_content=t) for t in chunks]"
|
338 |
]
|
@@ -356,6 +382,7 @@
|
|
356 |
"source": [
|
357 |
"from langchain_chroma import Chroma\n",
|
358 |
"from langchain_openai import OpenAIEmbeddings\n",
|
|
|
359 |
"# Add the documents to chroma DB and create Index / embeddings\n",
|
360 |
"\n",
|
361 |
"embeddings = OpenAIEmbeddings(model=\"text-embedding-ada-002\")\n",
|
@@ -363,7 +390,7 @@
|
|
363 |
" documents=documents,\n",
|
364 |
" embedding=embeddings,\n",
|
365 |
" persist_directory=\"./mini-chunked-dataset\",\n",
|
366 |
-
" collection_name=\"mini-chunked-dataset\"
|
367 |
")"
|
368 |
]
|
369 |
},
|
@@ -378,20 +405,21 @@
|
|
378 |
},
|
379 |
{
|
380 |
"cell_type": "code",
|
381 |
-
"execution_count":
|
382 |
"metadata": {
|
383 |
"id": "-H64YLxshM2b"
|
384 |
},
|
385 |
"outputs": [],
|
386 |
"source": [
|
387 |
"from langchain_openai import ChatOpenAI\n",
|
|
|
388 |
"# Initializing the LLM model\n",
|
389 |
"llm = ChatOpenAI(temperature=0, model=\"gpt-3.5-turbo\", max_tokens=512)"
|
390 |
]
|
391 |
},
|
392 |
{
|
393 |
"cell_type": "code",
|
394 |
-
"execution_count":
|
395 |
"metadata": {
|
396 |
"colab": {
|
397 |
"base_uri": "https://localhost:8080/"
|
@@ -399,24 +427,15 @@
|
|
399 |
"id": "AxBqPNtthPaa",
|
400 |
"outputId": "93c9ad64-1cd1-4f52-c51e-6f3ec5d6542d"
|
401 |
},
|
402 |
-
"outputs": [
|
403 |
-
{
|
404 |
-
"name": "stdout",
|
405 |
-
"output_type": "stream",
|
406 |
-
"text": [
|
407 |
-
"The LLaMA-2 model has 7 billion parameters.\n"
|
408 |
-
]
|
409 |
-
}
|
410 |
-
],
|
411 |
"source": [
|
412 |
"from langchain.chains import RetrievalQA\n",
|
|
|
413 |
"query = \"How many parameters LLaMA2 model has?\"\n",
|
414 |
"retriever = chroma_db.as_retriever(search_kwargs={\"k\": 2})\n",
|
415 |
"# Define a RetrievalQA chain that is responsible for retrieving related pieces of text,\n",
|
416 |
"# and using a LLM to formulate the final answer.\n",
|
417 |
-
"chain = RetrievalQA.from_chain_type(llm=llm
|
418 |
-
" chain_type=\"stuff\",\n",
|
419 |
-
" retriever=retriever)\n",
|
420 |
"\n",
|
421 |
"response = chain(query)\n",
|
422 |
"print(response[\"result\"])"
|
@@ -441,7 +460,7 @@
|
|
441 |
"name": "python",
|
442 |
"nbconvert_exporter": "python",
|
443 |
"pygments_lexer": "ipython3",
|
444 |
-
"version": "3.
|
445 |
}
|
446 |
},
|
447 |
"nbformat": 4,
|
|
|
20 |
},
|
21 |
{
|
22 |
"cell_type": "code",
|
23 |
+
"execution_count": null,
|
24 |
"metadata": {
|
25 |
"id": "QPJzr-I9XQ7l"
|
26 |
},
|
27 |
"outputs": [],
|
28 |
"source": [
|
29 |
+
"!pip install -q llama-index==0.10.49 llama-index-vector-stores-chroma==0.1.9 llama-index-llms-gemini==0.1.11 google-generativeai==0.5.4 langchain==0.1.17 langchain-chroma==0.1.0 langchain_openai==0.1.5 openai==1.35.3 chromadb==0.5.3"
|
30 |
]
|
31 |
},
|
32 |
{
|
33 |
"cell_type": "code",
|
34 |
+
"execution_count": 1,
|
35 |
"metadata": {
|
36 |
"id": "riuXwpSPcvWC"
|
37 |
},
|
38 |
"outputs": [],
|
39 |
"source": [
|
40 |
"import os\n",
|
41 |
+
"from dotenv import load_dotenv\n",
|
42 |
"\n",
|
43 |
+
"load_dotenv(\".env\")\n",
|
44 |
+
"\n",
|
45 |
+
"# Here we look for the OPENAI_API_KEY in the environment variables\n",
|
46 |
+
"OPENAI_API_KEY = os.getenv(\"OPENAI_API_KEY\")\n",
|
47 |
+
"if not OPENAI_API_KEY:\n",
|
48 |
+
" # If it's not found, you can set it manually\n",
|
49 |
+
" os.environ[\"OPENAI_API_KEY\"] = \"<YOUR_OPENAI_KEY>\"\n",
|
50 |
+
"\n",
|
51 |
+
"# Get your GOOGLE_API_KEY from https://aistudio.google.com/app/apikey\n",
|
52 |
+
"GOOGLE_API_KEY = os.getenv(\"GOOGLE_API_KEY\")\n",
|
53 |
+
"if not GOOGLE_API_KEY:\n",
|
54 |
+
" os.environ[\"GOOGLE_API_KEY\"] = \"<YOUR_GOOGLE_KEY>\""
|
55 |
]
|
56 |
},
|
57 |
{
|
|
|
83 |
},
|
84 |
{
|
85 |
"cell_type": "code",
|
86 |
+
"execution_count": 2,
|
87 |
"metadata": {
|
88 |
"colab": {
|
89 |
"base_uri": "https://localhost:8080/"
|
|
|
98 |
"text": [
|
99 |
" % Total % Received % Xferd Average Speed Time Time Time Current\n",
|
100 |
" Dload Upload Total Spent Left Speed\n",
|
101 |
+
"100 169k 100 169k 0 0 1581k 0 --:--:-- --:--:-- --:--:-- 1584k\n"
|
102 |
]
|
103 |
}
|
104 |
],
|
|
|
117 |
},
|
118 |
{
|
119 |
"cell_type": "code",
|
120 |
+
"execution_count": 3,
|
121 |
"metadata": {
|
122 |
"colab": {
|
123 |
"base_uri": "https://localhost:8080/"
|
|
|
127 |
},
|
128 |
"outputs": [
|
129 |
{
|
130 |
+
"name": "stdout",
|
131 |
+
"output_type": "stream",
|
132 |
+
"text": [
|
133 |
+
"171044\n"
|
134 |
+
]
|
|
|
|
|
|
|
135 |
}
|
136 |
],
|
137 |
"source": [
|
|
|
140 |
"text = \"\"\n",
|
141 |
"\n",
|
142 |
"# Load the file as a JSON\n",
|
143 |
+
"with open(\"./mini-dataset.csv\", mode=\"r\", encoding=\"utf-8\") as file:\n",
|
144 |
+
" csv_reader = csv.reader(file)\n",
|
145 |
"\n",
|
146 |
+
" for idx, row in enumerate(csv_reader):\n",
|
147 |
+
" if idx == 0:\n",
|
148 |
+
" continue\n",
|
149 |
+
" text += row[1]\n",
|
150 |
"\n",
|
151 |
"# The number of characters in the dataset.\n",
|
152 |
+
"print(len(text))"
|
153 |
]
|
154 |
},
|
155 |
{
|
|
|
163 |
},
|
164 |
{
|
165 |
"cell_type": "code",
|
166 |
+
"execution_count": 4,
|
167 |
"metadata": {
|
168 |
"colab": {
|
169 |
"base_uri": "https://localhost:8080/"
|
|
|
173 |
},
|
174 |
"outputs": [
|
175 |
{
|
176 |
+
"name": "stdout",
|
177 |
+
"output_type": "stream",
|
178 |
+
"text": [
|
179 |
+
"335\n"
|
180 |
+
]
|
|
|
|
|
|
|
181 |
}
|
182 |
],
|
183 |
"source": [
|
|
|
186 |
"\n",
|
187 |
"# Split the long text into smaller manageable chunks of 512 characters.\n",
|
188 |
"for i in range(0, len(text), chunk_size):\n",
|
189 |
+
" chunks.append(text[i : i + chunk_size])\n",
|
190 |
"\n",
|
191 |
+
"print(len(chunks))"
|
192 |
]
|
193 |
},
|
194 |
{
|
|
|
250 |
"source": [
|
251 |
"from llama_index.vector_stores.chroma import ChromaVectorStore\n",
|
252 |
"from llama_index.core import StorageContext\n",
|
253 |
+
"\n",
|
254 |
"# Define a storage context object using the created vector database.\n",
|
255 |
"vector_store = ChromaVectorStore(chroma_collection=chroma_collection)\n",
|
256 |
"storage_context = StorageContext.from_defaults(vector_store=vector_store)"
|
|
|
262 |
"metadata": {
|
263 |
"id": "WsD52wtrlESi"
|
264 |
},
|
265 |
+
"outputs": [
|
266 |
+
{
|
267 |
+
"name": "stderr",
|
268 |
+
"output_type": "stream",
|
269 |
+
"text": [
|
270 |
+
"/Users/omar/Documents/ai_repos/ai-tutor-rag-system/env/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
|
271 |
+
" from .autonotebook import tqdm as notebook_tqdm\n",
|
272 |
+
"Parsing nodes: 100%|ββββββββββ| 335/335 [00:00<00:00, 8031.85it/s]\n",
|
273 |
+
"Generating embeddings: 100%|ββββββββββ| 335/335 [00:03<00:00, 97.24it/s] \n"
|
274 |
+
]
|
275 |
+
}
|
276 |
+
],
|
277 |
"source": [
|
278 |
"from llama_index.core import VectorStoreIndex\n",
|
279 |
+
"from llama_index.core.node_parser import SentenceSplitter\n",
|
280 |
+
"from llama_index.embeddings.openai import OpenAIEmbedding\n",
|
281 |
"\n",
|
282 |
+
"# Build index / generate embeddings using OpenAI embedding model\n",
|
283 |
"index = VectorStoreIndex.from_documents(\n",
|
284 |
+
" documents,\n",
|
285 |
+
" embed_model=OpenAIEmbedding(model=\"text-embedding-3-small\"),\n",
|
286 |
+
" storage_context=storage_context,\n",
|
287 |
+
" show_progress=True,\n",
|
288 |
")"
|
289 |
]
|
290 |
},
|
|
|
305 |
},
|
306 |
"outputs": [],
|
307 |
"source": [
|
|
|
308 |
"# Define a query engine that is responsible for retrieving related pieces of text,\n",
|
309 |
"# and using a LLM to formulate the final answer.\n",
|
310 |
"\n",
|
311 |
+
"from llama_index.llms.gemini import Gemini\n",
|
312 |
+
"\n",
|
313 |
+
"llm = Gemini(model=\"models/gemini-1.5-flash\", temperature=1, max_tokens=512)\n",
|
314 |
+
"\n",
|
315 |
+
"query_engine = index.as_query_engine(llm=llm, similarity_top_k=5)"
|
316 |
]
|
317 |
},
|
318 |
{
|
|
|
330 |
"name": "stdout",
|
331 |
"output_type": "stream",
|
332 |
"text": [
|
333 |
+
"The LLaMA2 model has four different sizes: 7 billion, 13 billion, 34 billion, and 70 billion parameters. \n",
|
334 |
+
"\n"
|
335 |
]
|
336 |
}
|
337 |
],
|
338 |
"source": [
|
339 |
+
"response = query_engine.query(\"How many parameters LLaMA2 model has?\")\n",
|
|
|
|
|
340 |
"print(response)"
|
341 |
]
|
342 |
},
|
|
|
358 |
"outputs": [],
|
359 |
"source": [
|
360 |
"from langchain.schema.document import Document\n",
|
361 |
+
"\n",
|
362 |
"# Convert the chunks to Document objects so the LangChain framework can process them.\n",
|
363 |
"documents = [Document(page_content=t) for t in chunks]"
|
364 |
]
|
|
|
382 |
"source": [
|
383 |
"from langchain_chroma import Chroma\n",
|
384 |
"from langchain_openai import OpenAIEmbeddings\n",
|
385 |
+
"\n",
|
386 |
"# Add the documents to chroma DB and create Index / embeddings\n",
|
387 |
"\n",
|
388 |
"embeddings = OpenAIEmbeddings(model=\"text-embedding-ada-002\")\n",
|
|
|
390 |
" documents=documents,\n",
|
391 |
" embedding=embeddings,\n",
|
392 |
" persist_directory=\"./mini-chunked-dataset\",\n",
|
393 |
+
" collection_name=\"mini-chunked-dataset\",\n",
|
394 |
")"
|
395 |
]
|
396 |
},
|
|
|
405 |
},
|
406 |
{
|
407 |
"cell_type": "code",
|
408 |
+
"execution_count": null,
|
409 |
"metadata": {
|
410 |
"id": "-H64YLxshM2b"
|
411 |
},
|
412 |
"outputs": [],
|
413 |
"source": [
|
414 |
"from langchain_openai import ChatOpenAI\n",
|
415 |
+
"\n",
|
416 |
"# Initializing the LLM model\n",
|
417 |
"llm = ChatOpenAI(temperature=0, model=\"gpt-3.5-turbo\", max_tokens=512)"
|
418 |
]
|
419 |
},
|
420 |
{
|
421 |
"cell_type": "code",
|
422 |
+
"execution_count": null,
|
423 |
"metadata": {
|
424 |
"colab": {
|
425 |
"base_uri": "https://localhost:8080/"
|
|
|
427 |
"id": "AxBqPNtthPaa",
|
428 |
"outputId": "93c9ad64-1cd1-4f52-c51e-6f3ec5d6542d"
|
429 |
},
|
430 |
+
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
431 |
"source": [
|
432 |
"from langchain.chains import RetrievalQA\n",
|
433 |
+
"\n",
|
434 |
"query = \"How many parameters LLaMA2 model has?\"\n",
|
435 |
"retriever = chroma_db.as_retriever(search_kwargs={\"k\": 2})\n",
|
436 |
"# Define a RetrievalQA chain that is responsible for retrieving related pieces of text,\n",
|
437 |
"# and using a LLM to formulate the final answer.\n",
|
438 |
+
"chain = RetrievalQA.from_chain_type(llm=llm, chain_type=\"stuff\", retriever=retriever)\n",
|
|
|
|
|
439 |
"\n",
|
440 |
"response = chain(query)\n",
|
441 |
"print(response[\"result\"])"
|
|
|
460 |
"name": "python",
|
461 |
"nbconvert_exporter": "python",
|
462 |
"pygments_lexer": "ipython3",
|
463 |
+
"version": "3.12.3"
|
464 |
}
|
465 |
},
|
466 |
"nbformat": 4,
|