GaoDalie commited on
Commit
3199278
β€’
1 Parent(s): 41726e0

create new notebook for cohere better embedding

Browse files
notebooks/{09-Better_Embedding_Model.ipynb β†’ Cohere_Better_Embedding_Model.ipynb} RENAMED
@@ -3,42 +3,111 @@
3
  {
4
  "cell_type": "markdown",
5
  "metadata": {
6
- "colab_type": "text",
7
- "id": "view-in-github"
8
  },
9
  "source": [
10
- "<a href=\"https://colab.research.google.com/github/towardsai/ai-tutor-rag-system/blob/main/notebooks/09-Better_Embedding_Model.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>\n"
11
- ]
12
- },
13
- {
14
- "cell_type": "markdown",
15
- "metadata": {
16
- "id": "-zE1h0uQV7uT"
17
- },
18
- "source": [
19
- "# Install Packages and Setup Variables\n"
20
  ]
21
  },
22
  {
23
  "cell_type": "code",
24
- "execution_count": 14,
25
  "metadata": {
26
  "colab": {
27
  "base_uri": "https://localhost:8080/"
28
  },
29
- "id": "QPJzr-I9XQ7l",
30
- "outputId": "8e808cc4-4c21-474b-c5b7-f6841ee08020"
31
  },
32
- "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  "source": [
34
- "!pip install -q llama-index==0.10.57 openai==1.37.0 llama-index-finetuning llama-index-embeddings-huggingface llama-index-embeddings-cohere llama-index-readers-web cohere==5.6.2 tiktoken==0.7.0 chromadb==0.5.5 html2text sentence_transformers pydantic llama-index-vector-stores-chroma==0.1.10 llama-index-llms-gemini==0.1.11"
35
  ]
36
  },
37
  {
38
  "cell_type": "code",
39
- "execution_count": 11,
40
  "metadata": {
41
- "id": "riuXwpSPcvWC"
42
  },
43
  "outputs": [],
44
  "source": [
@@ -46,15 +115,15 @@
46
  "\n",
47
  "# Set the following API Keys in the Python environment. Will be used later.\n",
48
  "os.environ[\"OPENAI_API_KEY\"] = \"<YOUR_OPENAI_KEY>\"\n",
49
- "os.environ[\"CO_API_KEY\"] = \"<YOUR_COHERE_KEY>\"\n",
50
- "os.environ[\"GOOGLE_API_KEY\"] = \"<YOUR_API_KEY>\""
51
  ]
52
  },
53
  {
54
  "cell_type": "code",
55
- "execution_count": 2,
56
  "metadata": {
57
- "id": "jIEeZzqLbz0J"
58
  },
59
  "outputs": [],
60
  "source": [
@@ -68,28 +137,19 @@
68
  {
69
  "cell_type": "markdown",
70
  "metadata": {
71
- "id": "Bkgi2OrYzF7q"
72
  },
73
  "source": [
74
- "# Load a Model\n"
75
  ]
76
  },
77
  {
78
  "cell_type": "code",
79
- "execution_count": 3,
80
  "metadata": {
81
- "id": "9oGT6crooSSj"
82
  },
83
- "outputs": [
84
- {
85
- "name": "stderr",
86
- "output_type": "stream",
87
- "text": [
88
- "/Users/louis/Documents/GitHub/ai-tutor-rag-system/.conda/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
89
- " from .autonotebook import tqdm as notebook_tqdm\n"
90
- ]
91
- }
92
- ],
93
  "source": [
94
  "from llama_index.llms.gemini import Gemini\n",
95
  "\n",
@@ -99,78 +159,79 @@
99
  {
100
  "cell_type": "markdown",
101
  "metadata": {
102
- "id": "0BwVuJXlzHVL"
103
  },
104
  "source": [
105
- "# Create a VectoreStore\n"
106
  ]
107
  },
108
  {
109
  "cell_type": "code",
110
- "execution_count": 4,
111
  "metadata": {
112
- "id": "SQP87lHczHKc"
113
  },
114
  "outputs": [],
115
  "source": [
116
  "import chromadb\n",
117
  "\n",
118
- "# create client and a new collection\n",
119
  "# chromadb.EphemeralClient saves data in-memory.\n",
 
120
  "chroma_client = chromadb.PersistentClient(path=\"./mini-llama-articles\")\n",
121
  "chroma_collection = chroma_client.create_collection(\"mini-llama-articles\")"
122
  ]
123
  },
124
  {
125
  "cell_type": "code",
126
- "execution_count": 5,
127
  "metadata": {
128
- "id": "zAaGcYMJzHAN"
129
  },
130
  "outputs": [],
131
  "source": [
132
  "from llama_index.vector_stores.chroma import ChromaVectorStore\n",
133
  "\n",
134
  "# Define a storage context object using the created vector database.\n",
135
- "vector_store = ChromaVectorStore(chroma_collection=chroma_collection)"
136
  ]
137
  },
138
  {
139
  "cell_type": "markdown",
140
  "metadata": {
141
- "id": "I9JbAzFcjkpn"
142
  },
143
  "source": [
144
- "# Load the Dataset (CSV)\n"
145
  ]
146
  },
147
  {
148
  "cell_type": "markdown",
149
  "metadata": {
150
- "id": "ceveDuYdWCYk"
151
  },
152
  "source": [
153
- "## Download\n"
154
  ]
155
  },
156
  {
157
  "cell_type": "markdown",
158
  "metadata": {
159
- "id": "eZwf6pv7WFmD"
160
  },
161
  "source": [
162
- "The dataset includes several articles from the TowardsAI blog, which provide an in-depth explanation of the LLaMA2 model. Read the dataset as a long string.\n"
163
  ]
164
  },
165
  {
166
  "cell_type": "code",
167
- "execution_count": 6,
168
  "metadata": {
169
  "colab": {
170
  "base_uri": "https://localhost:8080/"
171
  },
172
- "id": "wl_pbPvMlv1h",
173
- "outputId": "bc9a0415-a1fb-4e89-a2b4-165420106b34"
174
  },
175
  "outputs": [
176
  {
@@ -179,7 +240,7 @@
179
  "text": [
180
  " % Total % Received % Xferd Average Speed Time Time Time Current\n",
181
  " Dload Upload Total Spent Left Speed\n",
182
- "100 169k 100 169k 0 0 856k 0 --:--:-- --:--:-- --:--:-- 860k\n"
183
  ]
184
  }
185
  ],
@@ -190,21 +251,21 @@
190
  {
191
  "cell_type": "markdown",
192
  "metadata": {
193
- "id": "VWBLtDbUWJfA"
194
  },
195
  "source": [
196
- "## Read File\n"
197
  ]
198
  },
199
  {
200
  "cell_type": "code",
201
- "execution_count": 7,
202
  "metadata": {
203
  "colab": {
204
  "base_uri": "https://localhost:8080/"
205
  },
206
- "id": "0Q9sxuW0g3Gd",
207
- "outputId": "a8361aa6-522d-4def-e49b-ed08d9c8e7d1"
208
  },
209
  "outputs": [
210
  {
@@ -213,7 +274,7 @@
213
  "14"
214
  ]
215
  },
216
- "execution_count": 7,
217
  "metadata": {},
218
  "output_type": "execute_result"
219
  }
@@ -240,17 +301,17 @@
240
  {
241
  "cell_type": "markdown",
242
  "metadata": {
243
- "id": "S17g2RYOjmf2"
244
  },
245
  "source": [
246
- "# Convert to Document obj\n"
247
  ]
248
  },
249
  {
250
  "cell_type": "code",
251
- "execution_count": 8,
252
  "metadata": {
253
- "id": "YizvmXPejkJE"
254
  },
255
  "outputs": [],
256
  "source": [
@@ -268,17 +329,17 @@
268
  {
269
  "cell_type": "markdown",
270
  "metadata": {
271
- "id": "qjuLbmFuWsyl"
272
  },
273
  "source": [
274
- "# Transforming\n"
275
  ]
276
  },
277
  {
278
  "cell_type": "code",
279
- "execution_count": 9,
280
  "metadata": {
281
- "id": "9z3t70DGWsjO"
282
  },
283
  "outputs": [],
284
  "source": [
@@ -292,62 +353,88 @@
292
  {
293
  "cell_type": "markdown",
294
  "metadata": {
295
- "id": "y28yMy0GxfGR"
296
  },
297
  "source": [
298
  "There are two options to use the Cohere embeddings:\n",
299
  "\n",
300
  "- input_type=\"search_document\": Employ this option for texts (documents) intended for storage in your vector database.\n",
301
  "\n",
302
- "- input_type=\"search_query\": Use this when issuing search queries to locate the most related documents within your vector database.\n"
303
  ]
304
  },
305
  {
306
  "cell_type": "code",
307
- "execution_count": 12,
308
  "metadata": {
309
  "colab": {
310
  "base_uri": "https://localhost:8080/",
311
- "height": 385,
312
  "referenced_widgets": [
313
- "2b1095050bb847c48855e3b74ae18b19",
314
- "a0a1c543115c4764b4150c5d0216370c",
315
- "23675bffa00749849ec944f84986ff52",
316
- "9e86b288110f4d418fd9761f59f5637f",
317
- "d6a4fd2a9cf7431b8bf738d9da0e2a7c",
318
- "700a1ffb298c4dd799c44fcee540b74c",
319
- "06e7a0370c8c46dd9a47c72a474212d1",
320
- "268f6f0800164e0ab7f8f31718f7f9be",
321
- "4001b95bd48147fb876b37a644e70dec",
322
- "22024efa09cb4330ab68a8c2bdbf92ac",
323
- "c14678e2b8c546fc9123c94fa47b924d",
324
- "9dda1537424142e0b7f2fdd5f9c1b98d",
325
- "1db171d1920d432283f9e1795c4c0c80",
326
- "23e0caeaf15546f0b5c62aa263c99e09",
327
- "03b8aded009343f288f0945b64d1f41c",
328
- "4d922a99035d45c59ce9868a4ef73d68",
329
- "aea6b63cbced40619bf32b1a2c350259",
330
- "c89c9dd46b454181aadaf82c7296cdae",
331
- "bec71553390b44879accb638a5b4873f",
332
- "97e4316196e84c7a82a2dd3e4698bc55",
333
- "b2ab2dc287a9421ca812074389ee31a7",
334
- "fa5c2f509ec54c5695a406160ab0626a"
335
  ]
336
  },
337
- "id": "P9LDJ7o-Wsc-",
338
- "outputId": "cd49bff2-b0da-4722-8baa-6a07f1023b39"
339
  },
340
  "outputs": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
341
  {
342
  "name": "stderr",
343
  "output_type": "stream",
344
  "text": [
345
- "Parsing nodes: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 14/14 [00:00<00:00, 30.35it/s]\n",
346
- "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 108/108 [01:01<00:00, 1.76it/s]\n",
347
- "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 108/108 [01:13<00:00, 1.47it/s]\n",
348
- "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 108/108 [00:30<00:00, 3.59it/s]\n",
349
- "Generating embeddings: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 108/108 [00:04<00:00, 26.11it/s]\n"
350
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
351
  }
352
  ],
353
  "source": [
@@ -359,6 +446,7 @@
359
  "from llama_index.embeddings.cohere import CohereEmbedding\n",
360
  "from llama_index.core.ingestion import IngestionPipeline\n",
361
  "\n",
 
362
  "# Create the pipeline to apply the transformation on each chunk,\n",
363
  "# and store the transformed text in the chroma vector store.\n",
364
  "pipeline = IngestionPipeline(\n",
@@ -378,13 +466,13 @@
378
  },
379
  {
380
  "cell_type": "code",
381
- "execution_count": 13,
382
  "metadata": {
383
  "colab": {
384
  "base_uri": "https://localhost:8080/"
385
  },
386
- "id": "mPGa85hM2P3P",
387
- "outputId": "9d7811ba-1e10-4098-b6eb-77a4e7d37457"
388
  },
389
  "outputs": [
390
  {
@@ -393,7 +481,7 @@
393
  "108"
394
  ]
395
  },
396
- "execution_count": 13,
397
  "metadata": {},
398
  "output_type": "execute_result"
399
  }
@@ -404,13 +492,13 @@
404
  },
405
  {
406
  "cell_type": "code",
407
- "execution_count": 14,
408
  "metadata": {
409
  "colab": {
410
  "base_uri": "https://localhost:8080/"
411
  },
412
- "id": "jjnmscmq2cXK",
413
- "outputId": "5f6fa176-4e09-4cc7-bd17-8236b061ad17"
414
  },
415
  "outputs": [
416
  {
@@ -419,7 +507,7 @@
419
  "1024"
420
  ]
421
  },
422
- "execution_count": 14,
423
  "metadata": {},
424
  "output_type": "execute_result"
425
  }
@@ -430,13 +518,13 @@
430
  },
431
  {
432
  "cell_type": "code",
433
- "execution_count": 15,
434
  "metadata": {
435
  "colab": {
436
  "base_uri": "https://localhost:8080/"
437
  },
438
- "id": "hV9G0lSUJJSa",
439
- "outputId": "453a4ea3-dfda-4da1-ac29-929834c83b40"
440
  },
441
  "outputs": [
442
  {
@@ -444,12 +532,12 @@
444
  "output_type": "stream",
445
  "text": [
446
  " adding: mini-llama-articles/ (stored 0%)\n",
447
- " adding: mini-llama-articles/63fe4276-8624-43c7-8c23-32dbfedb2285/ (stored 0%)\n",
448
- " adding: mini-llama-articles/63fe4276-8624-43c7-8c23-32dbfedb2285/data_level0.bin (deflated 100%)\n",
449
- " adding: mini-llama-articles/63fe4276-8624-43c7-8c23-32dbfedb2285/length.bin (deflated 25%)\n",
450
- " adding: mini-llama-articles/63fe4276-8624-43c7-8c23-32dbfedb2285/link_lists.bin (stored 0%)\n",
451
- " adding: mini-llama-articles/63fe4276-8624-43c7-8c23-32dbfedb2285/header.bin (deflated 61%)\n",
452
- " adding: mini-llama-articles/chroma.sqlite3 (deflated 70%)\n"
453
  ]
454
  }
455
  ],
@@ -461,37 +549,26 @@
461
  {
462
  "cell_type": "markdown",
463
  "metadata": {
464
- "id": "OWaT6rL7ksp8"
465
  },
466
  "source": [
467
- "# Load Indexes\n"
468
  ]
469
  },
470
  {
471
  "cell_type": "markdown",
472
  "metadata": {
473
- "id": "B4w8xP2Ggrvf"
474
  },
475
  "source": [
476
- "If you have already uploaded the zip file for the vector store checkpoint, please uncomment the code in the following cell block to extract its contents. After doing so, you will be able to load the dataset from local storage.\n"
477
  ]
478
  },
479
  {
480
  "cell_type": "code",
481
- "execution_count": 16,
482
- "metadata": {
483
- "id": "EF-wobGAJRgL"
484
- },
485
- "outputs": [],
486
- "source": [
487
- "# !unzip vectorstore_cohere.zip"
488
- ]
489
- },
490
- {
491
- "cell_type": "code",
492
- "execution_count": 17,
493
  "metadata": {
494
- "id": "mXi56KTXk2sp"
495
  },
496
  "outputs": [],
497
  "source": [
@@ -503,17 +580,21 @@
503
  },
504
  {
505
  "cell_type": "code",
506
- "execution_count": 19,
507
  "metadata": {
508
- "id": "9l0PaY230syE"
 
 
 
 
509
  },
510
  "outputs": [
511
  {
512
  "name": "stderr",
513
  "output_type": "stream",
514
  "text": [
515
- "/var/folders/l7/9qcp7g5x5rl9x8ltw0t85qym0000gn/T/ipykernel_74455/3981499771.py:11: DeprecationWarning: Call to deprecated class method from_defaults. (ServiceContext is deprecated, please use `llama_index.settings.Settings` instead.) -- Deprecated since version 0.10.0.\n",
516
- " service_context = ServiceContext.from_defaults(\n"
517
  ]
518
  }
519
  ],
@@ -526,16 +607,17 @@
526
  " input_type=\"search_query\",\n",
527
  ")\n",
528
  "\n",
529
- "# Define the ServiceCotext object to tie the LLM for generating final answer,\n",
530
  "# and the embedding model to help with retrieving related nodes.\n",
531
- "service_context = ServiceContext.from_defaults(llm=llm, embed_model=embed_model)"
 
532
  ]
533
  },
534
  {
535
  "cell_type": "code",
536
- "execution_count": 21,
537
  "metadata": {
538
- "id": "jKXURvLtkuTS"
539
  },
540
  "outputs": [],
541
  "source": [
@@ -550,17 +632,17 @@
550
  {
551
  "cell_type": "markdown",
552
  "metadata": {
553
- "id": "8JPD8yAinVSq"
554
  },
555
  "source": [
556
- "# Query Dataset\n"
557
  ]
558
  },
559
  {
560
  "cell_type": "code",
561
- "execution_count": 22,
562
  "metadata": {
563
- "id": "b0gue7cyctt1"
564
  },
565
  "outputs": [],
566
  "source": [
@@ -573,23 +655,26 @@
573
  },
574
  {
575
  "cell_type": "code",
576
- "execution_count": 23,
577
  "metadata": {
578
  "colab": {
579
  "base_uri": "https://localhost:8080/",
580
- "height": 53
581
  },
582
- "id": "VKK3jMprctre",
583
- "outputId": "cb85d598-d1bc-49e9-818f-c7bbde465864"
584
  },
585
  "outputs": [
586
  {
587
  "data": {
 
 
 
588
  "text/plain": [
589
- "'LLaMA2 model has a total of 2 trillion parameters.'"
590
  ]
591
  },
592
- "execution_count": 23,
593
  "metadata": {},
594
  "output_type": "execute_result"
595
  }
@@ -600,28 +685,43 @@
600
  },
601
  {
602
  "cell_type": "code",
603
- "execution_count": 24,
604
  "metadata": {
605
  "colab": {
606
  "base_uri": "https://localhost:8080/"
607
  },
608
- "id": "465dH4yQc7Ct",
609
- "outputId": "3d2b3ce2-7705-41bb-80e3-4fe6b390dcef"
610
  },
611
  "outputs": [
612
  {
613
  "name": "stdout",
614
  "output_type": "stream",
615
  "text": [
616
- "Node ID\t 0a3368de-02cc-4cb2-8579-3379e9c68101\n",
 
 
 
 
 
617
  "Title\t Fine-Tuning a Llama-2 7B Model for Python Code Generation\n",
618
  "Text\t New Llama-2 model In mid-July, Meta released its new family of pre-trained and finetuned models called Llama-2, with an open source and commercial character to facilitate its use and expansion. The base model was released with a chat version and sizes 7B, 13B, and 70B. Together with the models, the corresponding papers were published describing their characteristics and relevant points of the learning process, which provide very interesting information on the subject. For pre-training, 40% more tokens were used, reaching 2T, the context length was doubled and the grouped-query attention (GQA) technique was applied to speed up inference on the heavier 70B model. On the standard transformer architecture, RMSNorm normalization, SwiGLU activation, and rotatory positional embedding are used, the context length reaches 4096 tokens, and an Adam optimizer is applied with a cosine learning rate schedule, a weight decay of 0.1 and gradient clipping. The dataset for tuning For our tuning process, we will take a dataset containing about 18,000 examples where the model is asked to build a Python code that solves a given task. This is an extraction of the original dataset [2], where only the Python language examples are selected. Each row contains the description of the task to be solved, an example of data input to the task if applicable, and the generated code fragment that solves the task is provided [3]. Creating the prompt To carry out an instruction fine-tuning, we must transform each one of our data examples as if it were an instruction, outlining its main sections as follows: Output: Fine-tuning the model To carry out this stage, we have used the Google Colab environment, where we have developed a notebook that allows us to run the training in an interactive way and also a Python script to run the training in unattended mode. For the first test runs, a T4 instance with a high RAM capacity is enough, but when it comes to running the whole dataset and epochs, we have opted to use an A100 instance in order to speed up the training and ensure that its execution time is reasonable. In order to be able to\n",
619
- "Score\t 0.4173821910560196\n",
620
  "-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_\n",
621
- "Node ID\t b2b33887-2da1-4838-903e-8e126224095d\n",
622
  "Title\t Fine-Tuning a Llama-2 7B Model for Python Code Generation\n",
623
  "Text\t if it were an instruction, outlining its main sections as follows: Output: Fine-tuning the model To carry out this stage, we have used the Google Colab environment, where we have developed a notebook that allows us to run the training in an interactive way and also a Python script to run the training in unattended mode. For the first test runs, a T4 instance with a high RAM capacity is enough, but when it comes to running the whole dataset and epochs, we have opted to use an A100 instance in order to speed up the training and ensure that its execution time is reasonable. In order to be able to share the model, we will log in to the Huggingface hub using the appropriate token, so that at the end of the whole process, we will upload the model files so that they can be shared with the rest of the users. Fine-tuning techniques: PEFT, Lora, and QLora In recent months, some papers have appeared showing how PEFT techniques can be used to train large language models with a drastic reduction of RAM requirements and consequently allowing fine-tuning of these models on a single GPU of reasonable size. The usual steps to train an LLM consist, first, an intensive pre-training on billions or trillions of tokens to obtain a foundation model, and then a fine-tuning is performed on this model to specialize it on a downstream task. In this fine-tuning phase is where the PEFT technique has its purpose. Parameter Efficient Fine-Tuning (PEFT) allows us to considerably reduce RAM and storage requirements by only fine-tuning a small number of additional parameters, with virtually all model parameters remaining frozen. PEFT has been found to produce good generalization with relatively low-volume datasets. Furthermore, it enhances the reusability and portability of the model, as the small checkpoints obtained can be easily added to the base model, and the base model can be easily fine-tuned and reused in multiple scenarios by adding the PEFT parameters. Finally, since the base model is not adjusted, all the knowledge acquired in the pre-training phase is preserved, thus avoiding catastrophic forgetting. Most widely used PEFT techniques aim to keep the pre-trained base model untouched\n",
624
- "Score\t 0.4013547787636657\n",
 
 
 
 
 
 
 
 
 
 
625
  "-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_\n"
626
  ]
627
  }
@@ -639,28 +739,29 @@
639
  {
640
  "cell_type": "markdown",
641
  "metadata": {
642
- "id": "iMkpzH7vvb09"
643
  },
644
  "source": [
645
- "# Evaluate\n"
646
  ]
647
  },
648
  {
649
  "cell_type": "code",
650
- "execution_count": 26,
651
  "metadata": {
652
  "colab": {
653
- "base_uri": "https://localhost:8080/"
 
654
  },
655
- "id": "H8a3eKgKvckU",
656
- "outputId": "85b0765e-5a42-4f60-ccff-fc4bc688f65a"
657
  },
658
  "outputs": [
659
  {
660
  "name": "stderr",
661
  "output_type": "stream",
662
  "text": [
663
- "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 108/108 [06:43<00:00, 3.74s/it]\n"
664
  ]
665
  }
666
  ],
@@ -683,33 +784,17 @@
683
  {
684
  "cell_type": "markdown",
685
  "metadata": {
686
- "id": "998nNEGYhKhu"
687
  },
688
  "source": [
689
- "If you have uploaded the generated question JSON file, please uncomment the code in the next cell block. This will avoid the need to generate the questions manually, saving you time and effort.\n"
690
  ]
691
  },
692
  {
693
  "cell_type": "code",
694
- "execution_count": 27,
695
- "metadata": {
696
- "id": "3sA1K84U254o"
697
- },
698
- "outputs": [],
699
- "source": [
700
- "# from llama_index.finetuning.embeddings.common import (\n",
701
- "# EmbeddingQAFinetuneDataset,\n",
702
- "# )\n",
703
- "# rag_eval_dataset = EmbeddingQAFinetuneDataset.from_json(\n",
704
- "# \"./rag_eval_dataset_cohere.json\"\n",
705
- "# )"
706
- ]
707
- },
708
- {
709
- "cell_type": "code",
710
- "execution_count": 28,
711
  "metadata": {
712
- "id": "H7ubvcbk27vr"
713
  },
714
  "outputs": [],
715
  "source": [
@@ -739,13 +824,13 @@
739
  },
740
  {
741
  "cell_type": "code",
742
- "execution_count": 29,
743
  "metadata": {
744
  "colab": {
745
  "base_uri": "https://localhost:8080/"
746
  },
747
- "id": "uNLxDxoc2-Ac",
748
- "outputId": "8a2df94d-99b5-4aa4-a31e-b6c94256d1bb"
749
  },
750
  "outputs": [
751
  {
@@ -753,15 +838,15 @@
753
  "output_type": "stream",
754
  "text": [
755
  " Retriever Name Hit Rate MRR\n",
756
- "0 Retriever top_2 0.677355 0.562124\n",
757
  " Retriever Name Hit Rate MRR\n",
758
- "0 Retriever top_4 0.815631 0.606045\n",
759
  " Retriever Name Hit Rate MRR\n",
760
- "0 Retriever top_6 0.865731 0.615331\n",
761
- " Retriever Name Hit Rate MRR\n",
762
- "0 Retriever top_8 0.887776 0.618301\n",
763
  " Retriever Name Hit Rate MRR\n",
764
- "0 Retriever top_10 0.8998 0.619592\n"
765
  ]
766
  }
767
  ],
@@ -780,38 +865,39 @@
780
  },
781
  {
782
  "cell_type": "code",
783
- "execution_count": 30,
784
  "metadata": {
785
  "colab": {
786
- "base_uri": "https://localhost:8080/"
 
787
  },
788
- "id": "3ukkWC9R2_0J",
789
- "outputId": "d177c25d-a163-4b71-97f4-2af468737bbb"
790
  },
791
  "outputs": [
792
  {
793
  "name": "stderr",
794
  "output_type": "stream",
795
  "text": [
796
- "/var/folders/l7/9qcp7g5x5rl9x8ltw0t85qym0000gn/T/ipykernel_74455/1546854213.py:11: DeprecationWarning: Call to deprecated class method from_defaults. (ServiceContext is deprecated, please use `llama_index.settings.Settings` instead.) -- Deprecated since version 0.10.0.\n",
797
- " service_context_gpt4 = ServiceContext.from_defaults(llm=llm_gpt4)\n"
798
  ]
799
  },
800
  {
801
  "name": "stdout",
802
  "output_type": "stream",
803
  "text": [
804
- "top_2 faithfulness_score: 1.0\n",
805
- "top_2 relevancy_score: 1.0\n",
806
  "-_-_-_-_-_-_-_-_-_-_\n",
807
- "top_4 faithfulness_score: 1.0\n",
808
- "top_4 relevancy_score: 1.0\n",
809
  "-_-_-_-_-_-_-_-_-_-_\n",
810
- "top_6 faithfulness_score: 1.0\n",
811
- "top_6 relevancy_score: 1.0\n",
812
  "-_-_-_-_-_-_-_-_-_-_\n",
813
- "top_8 faithfulness_score: 0.45\n",
814
- "top_8 relevancy_score: 0.45\n",
815
  "-_-_-_-_-_-_-_-_-_-_\n",
816
  "top_10 faithfulness_score: 0.65\n",
817
  "top_10 relevancy_score: 0.65\n",
@@ -833,11 +919,11 @@
833
  " query_engine = index.as_query_engine(similarity_top_k=i, llm=llm)\n",
834
  "\n",
835
  " # While we use GPT3.5-Turbo to answer questions, we can use GPT4 to evaluate the answers.\n",
836
- " llm_gpt4 = OpenAI(temperature=1, model=\"gpt-4o\")\n",
837
- " service_context_gpt4 = ServiceContext.from_defaults(llm=llm_gpt4)\n",
838
  "\n",
839
- " faithfulness_evaluator = FaithfulnessEvaluator(service_context=service_context_gpt4)\n",
840
- " relevancy_evaluator = RelevancyEvaluator(service_context=service_context_gpt4)\n",
841
  "\n",
842
  " # Run evaluation\n",
843
  " queries = list(rag_eval_dataset.queries.values())\n",
@@ -861,21 +947,12 @@
861
  " print(f\"top_{i} relevancy_score: {relevancy_score}\")\n",
862
  " print(\"-_\" * 10)"
863
  ]
864
- },
865
- {
866
- "cell_type": "code",
867
- "execution_count": null,
868
- "metadata": {
869
- "id": "1MB1YD1E3EKM"
870
- },
871
- "outputs": [],
872
- "source": []
873
  }
874
  ],
875
  "metadata": {
 
876
  "colab": {
877
- "authorship_tag": "ABX9TyMx3DkzJEgLiO/6oTdKzS6v",
878
- "include_colab_link": true,
879
  "provenance": []
880
  },
881
  "kernelspec": {
@@ -883,20 +960,11 @@
883
  "name": "python3"
884
  },
885
  "language_info": {
886
- "codemirror_mode": {
887
- "name": "ipython",
888
- "version": 3
889
- },
890
- "file_extension": ".py",
891
- "mimetype": "text/x-python",
892
- "name": "python",
893
- "nbconvert_exporter": "python",
894
- "pygments_lexer": "ipython3",
895
- "version": "3.12.4"
896
  },
897
  "widgets": {
898
  "application/vnd.jupyter.widget-state+json": {
899
- "03b8aded009343f288f0945b64d1f41c": {
900
  "model_module": "@jupyter-widgets/controls",
901
  "model_module_version": "1.5.0",
902
  "model_name": "HTMLModel",
@@ -911,13 +979,13 @@
911
  "_view_name": "HTMLView",
912
  "description": "",
913
  "description_tooltip": null,
914
- "layout": "IPY_MODEL_b2ab2dc287a9421ca812074389ee31a7",
915
  "placeholder": "​",
916
- "style": "IPY_MODEL_fa5c2f509ec54c5695a406160ab0626a",
917
- "value": " 108/108 [00:03&lt;00:00, 30.08it/s]"
918
  }
919
  },
920
- "06e7a0370c8c46dd9a47c72a474212d1": {
921
  "model_module": "@jupyter-widgets/controls",
922
  "model_module_version": "1.5.0",
923
  "model_name": "DescriptionStyleModel",
@@ -932,28 +1000,29 @@
932
  "description_width": ""
933
  }
934
  },
935
- "1db171d1920d432283f9e1795c4c0c80": {
936
  "model_module": "@jupyter-widgets/controls",
937
  "model_module_version": "1.5.0",
938
- "model_name": "HTMLModel",
939
  "state": {
940
  "_dom_classes": [],
941
  "_model_module": "@jupyter-widgets/controls",
942
  "_model_module_version": "1.5.0",
943
- "_model_name": "HTMLModel",
944
  "_view_count": null,
945
  "_view_module": "@jupyter-widgets/controls",
946
  "_view_module_version": "1.5.0",
947
- "_view_name": "HTMLView",
948
- "description": "",
949
- "description_tooltip": null,
950
- "layout": "IPY_MODEL_aea6b63cbced40619bf32b1a2c350259",
951
- "placeholder": "​",
952
- "style": "IPY_MODEL_c89c9dd46b454181aadaf82c7296cdae",
953
- "value": "Generating embeddings: 100%"
 
954
  }
955
  },
956
- "22024efa09cb4330ab68a8c2bdbf92ac": {
957
  "model_module": "@jupyter-widgets/base",
958
  "model_module_version": "1.2.0",
959
  "model_name": "LayoutModel",
@@ -1005,55 +1074,7 @@
1005
  "width": null
1006
  }
1007
  },
1008
- "23675bffa00749849ec944f84986ff52": {
1009
- "model_module": "@jupyter-widgets/controls",
1010
- "model_module_version": "1.5.0",
1011
- "model_name": "FloatProgressModel",
1012
- "state": {
1013
- "_dom_classes": [],
1014
- "_model_module": "@jupyter-widgets/controls",
1015
- "_model_module_version": "1.5.0",
1016
- "_model_name": "FloatProgressModel",
1017
- "_view_count": null,
1018
- "_view_module": "@jupyter-widgets/controls",
1019
- "_view_module_version": "1.5.0",
1020
- "_view_name": "ProgressView",
1021
- "bar_style": "success",
1022
- "description": "",
1023
- "description_tooltip": null,
1024
- "layout": "IPY_MODEL_268f6f0800164e0ab7f8f31718f7f9be",
1025
- "max": 14,
1026
- "min": 0,
1027
- "orientation": "horizontal",
1028
- "style": "IPY_MODEL_4001b95bd48147fb876b37a644e70dec",
1029
- "value": 14
1030
- }
1031
- },
1032
- "23e0caeaf15546f0b5c62aa263c99e09": {
1033
- "model_module": "@jupyter-widgets/controls",
1034
- "model_module_version": "1.5.0",
1035
- "model_name": "FloatProgressModel",
1036
- "state": {
1037
- "_dom_classes": [],
1038
- "_model_module": "@jupyter-widgets/controls",
1039
- "_model_module_version": "1.5.0",
1040
- "_model_name": "FloatProgressModel",
1041
- "_view_count": null,
1042
- "_view_module": "@jupyter-widgets/controls",
1043
- "_view_module_version": "1.5.0",
1044
- "_view_name": "ProgressView",
1045
- "bar_style": "success",
1046
- "description": "",
1047
- "description_tooltip": null,
1048
- "layout": "IPY_MODEL_bec71553390b44879accb638a5b4873f",
1049
- "max": 108,
1050
- "min": 0,
1051
- "orientation": "horizontal",
1052
- "style": "IPY_MODEL_97e4316196e84c7a82a2dd3e4698bc55",
1053
- "value": 108
1054
- }
1055
- },
1056
- "268f6f0800164e0ab7f8f31718f7f9be": {
1057
  "model_module": "@jupyter-widgets/base",
1058
  "model_module_version": "1.2.0",
1059
  "model_name": "LayoutModel",
@@ -1105,45 +1126,7 @@
1105
  "width": null
1106
  }
1107
  },
1108
- "2b1095050bb847c48855e3b74ae18b19": {
1109
- "model_module": "@jupyter-widgets/controls",
1110
- "model_module_version": "1.5.0",
1111
- "model_name": "HBoxModel",
1112
- "state": {
1113
- "_dom_classes": [],
1114
- "_model_module": "@jupyter-widgets/controls",
1115
- "_model_module_version": "1.5.0",
1116
- "_model_name": "HBoxModel",
1117
- "_view_count": null,
1118
- "_view_module": "@jupyter-widgets/controls",
1119
- "_view_module_version": "1.5.0",
1120
- "_view_name": "HBoxView",
1121
- "box_style": "",
1122
- "children": [
1123
- "IPY_MODEL_a0a1c543115c4764b4150c5d0216370c",
1124
- "IPY_MODEL_23675bffa00749849ec944f84986ff52",
1125
- "IPY_MODEL_9e86b288110f4d418fd9761f59f5637f"
1126
- ],
1127
- "layout": "IPY_MODEL_d6a4fd2a9cf7431b8bf738d9da0e2a7c"
1128
- }
1129
- },
1130
- "4001b95bd48147fb876b37a644e70dec": {
1131
- "model_module": "@jupyter-widgets/controls",
1132
- "model_module_version": "1.5.0",
1133
- "model_name": "ProgressStyleModel",
1134
- "state": {
1135
- "_model_module": "@jupyter-widgets/controls",
1136
- "_model_module_version": "1.5.0",
1137
- "_model_name": "ProgressStyleModel",
1138
- "_view_count": null,
1139
- "_view_module": "@jupyter-widgets/base",
1140
- "_view_module_version": "1.2.0",
1141
- "_view_name": "StyleView",
1142
- "bar_color": null,
1143
- "description_width": ""
1144
- }
1145
- },
1146
- "4d922a99035d45c59ce9868a4ef73d68": {
1147
  "model_module": "@jupyter-widgets/base",
1148
  "model_module_version": "1.2.0",
1149
  "model_name": "LayoutModel",
@@ -1195,7 +1178,7 @@
1195
  "width": null
1196
  }
1197
  },
1198
- "700a1ffb298c4dd799c44fcee540b74c": {
1199
  "model_module": "@jupyter-widgets/base",
1200
  "model_module_version": "1.2.0",
1201
  "model_name": "LayoutModel",
@@ -1247,66 +1230,7 @@
1247
  "width": null
1248
  }
1249
  },
1250
- "97e4316196e84c7a82a2dd3e4698bc55": {
1251
- "model_module": "@jupyter-widgets/controls",
1252
- "model_module_version": "1.5.0",
1253
- "model_name": "ProgressStyleModel",
1254
- "state": {
1255
- "_model_module": "@jupyter-widgets/controls",
1256
- "_model_module_version": "1.5.0",
1257
- "_model_name": "ProgressStyleModel",
1258
- "_view_count": null,
1259
- "_view_module": "@jupyter-widgets/base",
1260
- "_view_module_version": "1.2.0",
1261
- "_view_name": "StyleView",
1262
- "bar_color": null,
1263
- "description_width": ""
1264
- }
1265
- },
1266
- "9dda1537424142e0b7f2fdd5f9c1b98d": {
1267
- "model_module": "@jupyter-widgets/controls",
1268
- "model_module_version": "1.5.0",
1269
- "model_name": "HBoxModel",
1270
- "state": {
1271
- "_dom_classes": [],
1272
- "_model_module": "@jupyter-widgets/controls",
1273
- "_model_module_version": "1.5.0",
1274
- "_model_name": "HBoxModel",
1275
- "_view_count": null,
1276
- "_view_module": "@jupyter-widgets/controls",
1277
- "_view_module_version": "1.5.0",
1278
- "_view_name": "HBoxView",
1279
- "box_style": "",
1280
- "children": [
1281
- "IPY_MODEL_1db171d1920d432283f9e1795c4c0c80",
1282
- "IPY_MODEL_23e0caeaf15546f0b5c62aa263c99e09",
1283
- "IPY_MODEL_03b8aded009343f288f0945b64d1f41c"
1284
- ],
1285
- "layout": "IPY_MODEL_4d922a99035d45c59ce9868a4ef73d68"
1286
- }
1287
- },
1288
- "9e86b288110f4d418fd9761f59f5637f": {
1289
- "model_module": "@jupyter-widgets/controls",
1290
- "model_module_version": "1.5.0",
1291
- "model_name": "HTMLModel",
1292
- "state": {
1293
- "_dom_classes": [],
1294
- "_model_module": "@jupyter-widgets/controls",
1295
- "_model_module_version": "1.5.0",
1296
- "_model_name": "HTMLModel",
1297
- "_view_count": null,
1298
- "_view_module": "@jupyter-widgets/controls",
1299
- "_view_module_version": "1.5.0",
1300
- "_view_name": "HTMLView",
1301
- "description": "",
1302
- "description_tooltip": null,
1303
- "layout": "IPY_MODEL_22024efa09cb4330ab68a8c2bdbf92ac",
1304
- "placeholder": "​",
1305
- "style": "IPY_MODEL_c14678e2b8c546fc9123c94fa47b924d",
1306
- "value": " 14/14 [00:00&lt;00:00, 13.27it/s]"
1307
- }
1308
- },
1309
- "a0a1c543115c4764b4150c5d0216370c": {
1310
  "model_module": "@jupyter-widgets/controls",
1311
  "model_module_version": "1.5.0",
1312
  "model_name": "HTMLModel",
@@ -1321,13 +1245,13 @@
1321
  "_view_name": "HTMLView",
1322
  "description": "",
1323
  "description_tooltip": null,
1324
- "layout": "IPY_MODEL_700a1ffb298c4dd799c44fcee540b74c",
1325
  "placeholder": "​",
1326
- "style": "IPY_MODEL_06e7a0370c8c46dd9a47c72a474212d1",
1327
- "value": "Parsing nodes: 100%"
1328
  }
1329
  },
1330
- "aea6b63cbced40619bf32b1a2c350259": {
1331
  "model_module": "@jupyter-widgets/base",
1332
  "model_module_version": "1.2.0",
1333
  "model_name": "LayoutModel",
@@ -1379,7 +1303,28 @@
1379
  "width": null
1380
  }
1381
  },
1382
- "b2ab2dc287a9421ca812074389ee31a7": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1383
  "model_module": "@jupyter-widgets/base",
1384
  "model_module_version": "1.2.0",
1385
  "model_name": "LayoutModel",
@@ -1431,7 +1376,55 @@
1431
  "width": null
1432
  }
1433
  },
1434
- "bec71553390b44879accb638a5b4873f": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1435
  "model_module": "@jupyter-widgets/base",
1436
  "model_module_version": "1.2.0",
1437
  "model_name": "LayoutModel",
@@ -1483,7 +1476,7 @@
1483
  "width": null
1484
  }
1485
  },
1486
- "c14678e2b8c546fc9123c94fa47b924d": {
1487
  "model_module": "@jupyter-widgets/controls",
1488
  "model_module_version": "1.5.0",
1489
  "model_name": "DescriptionStyleModel",
@@ -1498,22 +1491,66 @@
1498
  "description_width": ""
1499
  }
1500
  },
1501
- "c89c9dd46b454181aadaf82c7296cdae": {
1502
  "model_module": "@jupyter-widgets/controls",
1503
  "model_module_version": "1.5.0",
1504
- "model_name": "DescriptionStyleModel",
1505
  "state": {
 
1506
  "_model_module": "@jupyter-widgets/controls",
1507
  "_model_module_version": "1.5.0",
1508
- "_model_name": "DescriptionStyleModel",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1509
  "_view_count": null,
1510
  "_view_module": "@jupyter-widgets/base",
1511
  "_view_module_version": "1.2.0",
1512
  "_view_name": "StyleView",
 
1513
  "description_width": ""
1514
  }
1515
  },
1516
- "d6a4fd2a9cf7431b8bf738d9da0e2a7c": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1517
  "model_module": "@jupyter-widgets/base",
1518
  "model_module_version": "1.2.0",
1519
  "model_name": "LayoutModel",
@@ -1565,7 +1602,38 @@
1565
  "width": null
1566
  }
1567
  },
1568
- "fa5c2f509ec54c5695a406160ab0626a": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1569
  "model_module": "@jupyter-widgets/controls",
1570
  "model_module_version": "1.5.0",
1571
  "model_name": "DescriptionStyleModel",
 
3
  {
4
  "cell_type": "markdown",
5
  "metadata": {
6
+ "id": "_x0uP9idZ_DJ"
 
7
  },
8
  "source": [
9
+ "**Install Packages and Setup Variables**"
 
 
 
 
 
 
 
 
 
10
  ]
11
  },
12
  {
13
  "cell_type": "code",
14
+ "execution_count": null,
15
  "metadata": {
16
  "colab": {
17
  "base_uri": "https://localhost:8080/"
18
  },
19
+ "id": "-XXRTyEPB9yF",
20
+ "outputId": "6533c67b-c692-40d5-8a9d-a6bef97e4d55"
21
  },
22
+ "outputs": [
23
+ {
24
+ "name": "stdout",
25
+ "output_type": "stream",
26
+ "text": [
27
+ "\u001b[?25l \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/56.5 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m56.5/56.5 kB\u001b[0m \u001b[31m945.1 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
28
+ "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
29
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m67.3/67.3 kB\u001b[0m \u001b[31m3.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
30
+ "\u001b[?25h Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
31
+ " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
32
+ " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
33
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m50.4/50.4 kB\u001b[0m \u001b[31m3.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
34
+ "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
35
+ " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
36
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.4/7.4 MB\u001b[0m \u001b[31m43.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
37
+ "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
38
+ " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
39
+ " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
40
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m337.0/337.0 kB\u001b[0m \u001b[31m18.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
41
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m177.4/177.4 kB\u001b[0m \u001b[31m8.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
42
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.1/1.1 MB\u001b[0m \u001b[31m38.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
43
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m584.3/584.3 kB\u001b[0m \u001b[31m25.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
44
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.4/2.4 MB\u001b[0m \u001b[31m50.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
45
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m15.5/15.5 MB\u001b[0m \u001b[31m20.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
46
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m72.9/72.9 kB\u001b[0m \u001b[31m5.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
47
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m171.5/171.5 kB\u001b[0m \u001b[31m8.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
48
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m273.8/273.8 kB\u001b[0m \u001b[31m16.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
49
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m139.2/139.2 kB\u001b[0m \u001b[31m8.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
50
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m93.5/93.5 kB\u001b[0m \u001b[31m6.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
51
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.1/3.1 MB\u001b[0m \u001b[31m49.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
52
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m150.7/150.7 kB\u001b[0m \u001b[31m9.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
53
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m679.1/679.1 kB\u001b[0m \u001b[31m32.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
54
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m76.4/76.4 kB\u001b[0m \u001b[31m5.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
55
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m77.9/77.9 kB\u001b[0m \u001b[31m5.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
56
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m44.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
57
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.2/1.2 MB\u001b[0m \u001b[31m39.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
58
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m67.6/67.6 kB\u001b[0m \u001b[31m3.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
59
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m211.1/211.1 kB\u001b[0m \u001b[31m7.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
60
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.2/13.2 MB\u001b[0m \u001b[31m58.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
61
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m64.0/64.0 kB\u001b[0m \u001b[31m4.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
62
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m52.5/52.5 kB\u001b[0m \u001b[31m3.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
63
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m149.7/149.7 kB\u001b[0m \u001b[31m10.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
64
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m110.5/110.5 kB\u001b[0m \u001b[31m7.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
65
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m141.9/141.9 kB\u001b[0m \u001b[31m6.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
66
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━���━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.5/4.5 MB\u001b[0m \u001b[31m53.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
67
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m37.9/37.9 MB\u001b[0m \u001b[31m16.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
68
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m53.0/53.0 kB\u001b[0m \u001b[31m3.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
69
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m9.6/9.6 MB\u001b[0m \u001b[31m59.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
70
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.8/62.8 kB\u001b[0m \u001b[31m4.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
71
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m12.5/12.5 MB\u001b[0m \u001b[31m58.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
72
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m81.3/81.3 kB\u001b[0m \u001b[31m5.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
73
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m410.7/410.7 kB\u001b[0m \u001b[31m21.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
74
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.3/58.3 kB\u001b[0m \u001b[31m3.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
75
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m341.4/341.4 kB\u001b[0m \u001b[31m18.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
76
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m180.2/180.2 kB\u001b[0m \u001b[31m10.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
77
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m861.9/861.9 kB\u001b[0m \u001b[31m26.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
78
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m216.3/216.3 kB\u001b[0m \u001b[31m12.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
79
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m295.8/295.8 kB\u001b[0m \u001b[31m15.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
80
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m229.9/229.9 kB\u001b[0m \u001b[31m11.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
81
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m82.7/82.7 kB\u001b[0m \u001b[31m6.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
82
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m71.4/71.4 kB\u001b[0m \u001b[31m5.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
83
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m97.6/97.6 kB\u001b[0m \u001b[31m6.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
84
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m476.0/476.0 kB\u001b[0m \u001b[31m23.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
85
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.4/3.4 MB\u001b[0m \u001b[31m54.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
86
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m425.7/425.7 kB\u001b[0m \u001b[31m21.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
87
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m157.3/157.3 kB\u001b[0m \u001b[31m8.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
88
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m46.0/46.0 kB\u001b[0m \u001b[31m2.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
89
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m137.6/137.6 kB\u001b[0m \u001b[31m9.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
90
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.8/86.8 kB\u001b[0m \u001b[31m6.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
91
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.3/49.3 kB\u001b[0m \u001b[31m3.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
92
+ "\u001b[?25h Building wheel for html2text (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
93
+ " Building wheel for tinysegmenter (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
94
+ " Building wheel for pypika (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
95
+ " Building wheel for spider-client (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
96
+ " Building wheel for feedfinder2 (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
97
+ " Building wheel for jieba3k (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
98
+ " Building wheel for sgmllib3k (setup.py) ... \u001b[?25l\u001b[?25hdone\n"
99
+ ]
100
+ }
101
+ ],
102
  "source": [
103
+ "!pip install -q llama-index==0.10.57 openai==1.37.0 llama-index-finetuning llama-index-embeddings-huggingface llama-index-embeddings-cohere llama-index-readers-web cohere==5.6.2 tiktoken==0.7.0 chromadb==0.5.5 html2text sentence_transformers pydantic llama-index-vector-stores-chroma==0.1.10 llama-index-llms-gemini==0.1.11 llama-index-embeddings-instructor"
104
  ]
105
  },
106
  {
107
  "cell_type": "code",
108
+ "execution_count": null,
109
  "metadata": {
110
+ "id": "CZxVvBx2CFak"
111
  },
112
  "outputs": [],
113
  "source": [
 
115
  "\n",
116
  "# Set the following API Keys in the Python environment. Will be used later.\n",
117
  "os.environ[\"OPENAI_API_KEY\"] = \"<YOUR_OPENAI_KEY>\"\n",
118
+ "os.environ[\"COHERE_API_KEY\"] = \"<YOUR_OPENAI_KEY>\"\n",
119
+ "os.environ[\"GOOGLE_API_KEY\"] = \"<YOUR_OPENAI_KEY>\""
120
  ]
121
  },
122
  {
123
  "cell_type": "code",
124
+ "execution_count": null,
125
  "metadata": {
126
+ "id": "b5wNUYxuCHBE"
127
  },
128
  "outputs": [],
129
  "source": [
 
137
  {
138
  "cell_type": "markdown",
139
  "metadata": {
140
+ "id": "E3o6KHBoaNr0"
141
  },
142
  "source": [
143
+ "**Load a Model**"
144
  ]
145
  },
146
  {
147
  "cell_type": "code",
148
+ "execution_count": null,
149
  "metadata": {
150
+ "id": "xHQU21OECJVb"
151
  },
152
+ "outputs": [],
 
 
 
 
 
 
 
 
 
153
  "source": [
154
  "from llama_index.llms.gemini import Gemini\n",
155
  "\n",
 
159
  {
160
  "cell_type": "markdown",
161
  "metadata": {
162
+ "id": "VaifBRDcaU7X"
163
  },
164
  "source": [
165
+ "**Create a VectoreStore**"
166
  ]
167
  },
168
  {
169
  "cell_type": "code",
170
+ "execution_count": null,
171
  "metadata": {
172
+ "id": "IeOaYoQGZPNc"
173
  },
174
  "outputs": [],
175
  "source": [
176
  "import chromadb\n",
177
  "\n",
178
+ "# create a client and a new collection\n",
179
  "# chromadb.EphemeralClient saves data in-memory.\n",
180
+ "\n",
181
  "chroma_client = chromadb.PersistentClient(path=\"./mini-llama-articles\")\n",
182
  "chroma_collection = chroma_client.create_collection(\"mini-llama-articles\")"
183
  ]
184
  },
185
  {
186
  "cell_type": "code",
187
+ "execution_count": null,
188
  "metadata": {
189
+ "id": "tNr31jBoCLX3"
190
  },
191
  "outputs": [],
192
  "source": [
193
  "from llama_index.vector_stores.chroma import ChromaVectorStore\n",
194
  "\n",
195
  "# Define a storage context object using the created vector database.\n",
196
+ "vector_store = ChromaVectorStore(chroma_collection=chroma_collection)\n"
197
  ]
198
  },
199
  {
200
  "cell_type": "markdown",
201
  "metadata": {
202
+ "id": "bU6I4qXmaZiM"
203
  },
204
  "source": [
205
+ "**Load the Dataset (CSV)**"
206
  ]
207
  },
208
  {
209
  "cell_type": "markdown",
210
  "metadata": {
211
+ "id": "tXv5ZnLqahac"
212
  },
213
  "source": [
214
+ "Download\n"
215
  ]
216
  },
217
  {
218
  "cell_type": "markdown",
219
  "metadata": {
220
+ "id": "FzRmU_DZaqX1"
221
  },
222
  "source": [
223
+ "The dataset includes several articles from the TowardsAI blog, which provide an in-depth explanation of the LLaMA2 model. Read the dataset as a long string."
224
  ]
225
  },
226
  {
227
  "cell_type": "code",
228
+ "execution_count": null,
229
  "metadata": {
230
  "colab": {
231
  "base_uri": "https://localhost:8080/"
232
  },
233
+ "id": "O0n2I1OZKrQ8",
234
+ "outputId": "04874625-30f2-487e-c8e5-fbd963b40c4d"
235
  },
236
  "outputs": [
237
  {
 
240
  "text": [
241
  " % Total % Received % Xferd Average Speed Time Time Time Current\n",
242
  " Dload Upload Total Spent Left Speed\n",
243
+ "100 169k 100 169k 0 0 770k 0 --:--:-- --:--:-- --:--:-- 770k\n"
244
  ]
245
  }
246
  ],
 
251
  {
252
  "cell_type": "markdown",
253
  "metadata": {
254
+ "id": "XpGJU4tIbCw6"
255
  },
256
  "source": [
257
+ "**Read File**"
258
  ]
259
  },
260
  {
261
  "cell_type": "code",
262
+ "execution_count": null,
263
  "metadata": {
264
  "colab": {
265
  "base_uri": "https://localhost:8080/"
266
  },
267
+ "id": "v0SNZediCNBi",
268
+ "outputId": "917a3deb-5cf8-4ca3-c5b4-2539531c6a90"
269
  },
270
  "outputs": [
271
  {
 
274
  "14"
275
  ]
276
  },
277
+ "execution_count": 8,
278
  "metadata": {},
279
  "output_type": "execute_result"
280
  }
 
301
  {
302
  "cell_type": "markdown",
303
  "metadata": {
304
+ "id": "qRobYkGPbLjO"
305
  },
306
  "source": [
307
+ "**Convert to Document obj**"
308
  ]
309
  },
310
  {
311
  "cell_type": "code",
312
+ "execution_count": null,
313
  "metadata": {
314
+ "id": "U4c1Ym5YCPCs"
315
  },
316
  "outputs": [],
317
  "source": [
 
329
  {
330
  "cell_type": "markdown",
331
  "metadata": {
332
+ "id": "pgn3S1JGbX3w"
333
  },
334
  "source": [
335
+ "**Transforming**"
336
  ]
337
  },
338
  {
339
  "cell_type": "code",
340
+ "execution_count": null,
341
  "metadata": {
342
+ "id": "005zALbTCQrH"
343
  },
344
  "outputs": [],
345
  "source": [
 
353
  {
354
  "cell_type": "markdown",
355
  "metadata": {
356
+ "id": "aB2biw88bmxE"
357
  },
358
  "source": [
359
  "There are two options to use the Cohere embeddings:\n",
360
  "\n",
361
  "- input_type=\"search_document\": Employ this option for texts (documents) intended for storage in your vector database.\n",
362
  "\n",
363
+ "- input_type=\"search_query\": Use this when issuing search queries to locate the most related documents within your vector database."
364
  ]
365
  },
366
  {
367
  "cell_type": "code",
368
+ "execution_count": null,
369
  "metadata": {
370
  "colab": {
371
  "base_uri": "https://localhost:8080/",
372
+ "height": 133,
373
  "referenced_widgets": [
374
+ "372216089f0e417ab26f6656e28964ec",
375
+ "0ee84219dec44116b8c90e60e93914c6",
376
+ "7d2b37edbf8d450fad9c46629e8680a1",
377
+ "69b8fbcb928b4b83ad6e84d2982e6fa8",
378
+ "4372ee01eafd4281bf40c8633ed71049",
379
+ "bac4fe58bd124d18aba31249d8055563",
380
+ "bfc9451b9ef543538e2eee56cd75d6b8",
381
+ "6c936d3880c14d4d8b16b60cb300f256",
382
+ "bbc6027350a54663954c564fac062722",
383
+ "3b301c805f84461bad4dcdecc234e40b",
384
+ "f5f332f68fb64d9a86fefcbc92fdc63e",
385
+ "a2b8557313734b9d9f5e1d4a04cb75a5",
386
+ "588ede4f435949639143886c6b191551",
387
+ "8457fabd84ee49beb3f4dc8024e5d592",
388
+ "b267053ae5ef4bba8e6ae961ae40c7ad",
389
+ "552d5ddd91594b4699bbaa6071169ffe",
390
+ "384cf40fde6040039cb3db0df7acaca0",
391
+ "35886b9a6fbb4550b6a33df4994958bd",
392
+ "9549a447edaf4b20aab7652a5615b652",
393
+ "b21b90fb603643a484717b794c2f7f75",
394
+ "605934eac55b4facbb036f9add32c6c9",
395
+ "9e72548d899447c59b57cd4b9fbc2e7b"
396
  ]
397
  },
398
+ "id": "9b4r3raRCWr2",
399
+ "outputId": "a7916387-1640-4cdd-dbce-0d7ab46a6188"
400
  },
401
  "outputs": [
402
+ {
403
+ "data": {
404
+ "application/vnd.jupyter.widget-view+json": {
405
+ "model_id": "372216089f0e417ab26f6656e28964ec",
406
+ "version_major": 2,
407
+ "version_minor": 0
408
+ },
409
+ "text/plain": [
410
+ "Parsing nodes: 0%| | 0/14 [00:00<?, ?it/s]"
411
+ ]
412
+ },
413
+ "metadata": {},
414
+ "output_type": "display_data"
415
+ },
416
  {
417
  "name": "stderr",
418
  "output_type": "stream",
419
  "text": [
420
+ "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 108/108 [04:35<00:00, 2.55s/it] \n",
421
+ "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 108/108 [04:49<00:00, 2.68s/it] \n",
422
+ "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 108/108 [02:05<00:00, 1.16s/it] \n"
 
 
423
  ]
424
+ },
425
+ {
426
+ "data": {
427
+ "application/vnd.jupyter.widget-view+json": {
428
+ "model_id": "a2b8557313734b9d9f5e1d4a04cb75a5",
429
+ "version_major": 2,
430
+ "version_minor": 0
431
+ },
432
+ "text/plain": [
433
+ "Generating embeddings: 0%| | 0/108 [00:00<?, ?it/s]"
434
+ ]
435
+ },
436
+ "metadata": {},
437
+ "output_type": "display_data"
438
  }
439
  ],
440
  "source": [
 
446
  "from llama_index.embeddings.cohere import CohereEmbedding\n",
447
  "from llama_index.core.ingestion import IngestionPipeline\n",
448
  "\n",
449
+ "\n",
450
  "# Create the pipeline to apply the transformation on each chunk,\n",
451
  "# and store the transformed text in the chroma vector store.\n",
452
  "pipeline = IngestionPipeline(\n",
 
466
  },
467
  {
468
  "cell_type": "code",
469
+ "execution_count": null,
470
  "metadata": {
471
  "colab": {
472
  "base_uri": "https://localhost:8080/"
473
  },
474
+ "id": "va2wcjo2CXPI",
475
+ "outputId": "8a30244c-2849-4b44-d634-3fd3977e7b6f"
476
  },
477
  "outputs": [
478
  {
 
481
  "108"
482
  ]
483
  },
484
+ "execution_count": 12,
485
  "metadata": {},
486
  "output_type": "execute_result"
487
  }
 
492
  },
493
  {
494
  "cell_type": "code",
495
+ "execution_count": null,
496
  "metadata": {
497
  "colab": {
498
  "base_uri": "https://localhost:8080/"
499
  },
500
+ "id": "w5ucyprJCZIg",
501
+ "outputId": "b8bc10cb-8cdb-45d5-a438-c9d9548e54d6"
502
  },
503
  "outputs": [
504
  {
 
507
  "1024"
508
  ]
509
  },
510
+ "execution_count": 13,
511
  "metadata": {},
512
  "output_type": "execute_result"
513
  }
 
518
  },
519
  {
520
  "cell_type": "code",
521
+ "execution_count": null,
522
  "metadata": {
523
  "colab": {
524
  "base_uri": "https://localhost:8080/"
525
  },
526
+ "id": "xBHheNAUCnmE",
527
+ "outputId": "e487b1b7-1656-4e3d-ff01-e168fea7bb67"
528
  },
529
  "outputs": [
530
  {
 
532
  "output_type": "stream",
533
  "text": [
534
  " adding: mini-llama-articles/ (stored 0%)\n",
535
+ " adding: mini-llama-articles/chroma.sqlite3 (deflated 73%)\n",
536
+ " adding: mini-llama-articles/47511796-2ec4-4cea-848a-09c15db482e6/ (stored 0%)\n",
537
+ " adding: mini-llama-articles/47511796-2ec4-4cea-848a-09c15db482e6/data_level0.bin (deflated 100%)\n",
538
+ " adding: mini-llama-articles/47511796-2ec4-4cea-848a-09c15db482e6/header.bin (deflated 61%)\n",
539
+ " adding: mini-llama-articles/47511796-2ec4-4cea-848a-09c15db482e6/link_lists.bin (stored 0%)\n",
540
+ " adding: mini-llama-articles/47511796-2ec4-4cea-848a-09c15db482e6/length.bin (deflated 36%)\n"
541
  ]
542
  }
543
  ],
 
549
  {
550
  "cell_type": "markdown",
551
  "metadata": {
552
+ "id": "0waG5hWdb1D7"
553
  },
554
  "source": [
555
+ "**Load Indexes**"
556
  ]
557
  },
558
  {
559
  "cell_type": "markdown",
560
  "metadata": {
561
+ "id": "U8BATim6b270"
562
  },
563
  "source": [
564
+ "If you have already uploaded the zip file for the vector store checkpoint, please uncomment the code in the following cell block to extract its contents. After doing so, you will be able to load the dataset from local storage."
565
  ]
566
  },
567
  {
568
  "cell_type": "code",
569
+ "execution_count": null,
 
 
 
 
 
 
 
 
 
 
 
570
  "metadata": {
571
+ "id": "KBdgyY_9CoGM"
572
  },
573
  "outputs": [],
574
  "source": [
 
580
  },
581
  {
582
  "cell_type": "code",
583
+ "execution_count": null,
584
  "metadata": {
585
+ "colab": {
586
+ "base_uri": "https://localhost:8080/"
587
+ },
588
+ "id": "-aAgX8HjCrfN",
589
+ "outputId": "506f49e3-3710-45d8-a798-05e168b6a610"
590
  },
591
  "outputs": [
592
  {
593
  "name": "stderr",
594
  "output_type": "stream",
595
  "text": [
596
+ "<ipython-input-16-ea3c2eb48ce1>:12: DeprecationWarning: Call to deprecated class method from_defaults. (ServiceContext is deprecated, please use `llama_index.settings.Settings` instead.) -- Deprecated since version 0.10.0.\n",
597
+ " service_context = ServiceContext.from_defaults(llm=llm, embed_model=embed_model)\n"
598
  ]
599
  }
600
  ],
 
607
  " input_type=\"search_query\",\n",
608
  ")\n",
609
  "\n",
610
+ "# Define the ServiceCotext object to tie the LLM for generating the final answer,\n",
611
  "# and the embedding model to help with retrieving related nodes.\n",
612
+ "service_context = ServiceContext.from_defaults(llm=llm, embed_model=embed_model)\n",
613
+ "\n"
614
  ]
615
  },
616
  {
617
  "cell_type": "code",
618
+ "execution_count": null,
619
  "metadata": {
620
+ "id": "ZrA6oDtGoylO"
621
  },
622
  "outputs": [],
623
  "source": [
 
632
  {
633
  "cell_type": "markdown",
634
  "metadata": {
635
+ "id": "RF5Z_QCZcEwl"
636
  },
637
  "source": [
638
+ "**Query Dataset**"
639
  ]
640
  },
641
  {
642
  "cell_type": "code",
643
+ "execution_count": null,
644
  "metadata": {
645
+ "id": "isHNAqSbCtTf"
646
  },
647
  "outputs": [],
648
  "source": [
 
655
  },
656
  {
657
  "cell_type": "code",
658
+ "execution_count": null,
659
  "metadata": {
660
  "colab": {
661
  "base_uri": "https://localhost:8080/",
662
+ "height": 36
663
  },
664
+ "id": "zJjCN_VNT9s3",
665
+ "outputId": "d88d6310-28e9-4c11-a965-fe2a20e8b6f6"
666
  },
667
  "outputs": [
668
  {
669
  "data": {
670
+ "application/vnd.google.colaboratory.intrinsic+json": {
671
+ "type": "string"
672
+ },
673
  "text/plain": [
674
+ "'LLaMA2 has four different model sizes: 7 billion, 13 billion, 34 billion, and 70 billion parameters. \\n'"
675
  ]
676
  },
677
+ "execution_count": 19,
678
  "metadata": {},
679
  "output_type": "execute_result"
680
  }
 
685
  },
686
  {
687
  "cell_type": "code",
688
+ "execution_count": null,
689
  "metadata": {
690
  "colab": {
691
  "base_uri": "https://localhost:8080/"
692
  },
693
+ "id": "-uFFe60NCw5S",
694
+ "outputId": "d90bf0b3-716e-42aa-e1ac-4f92dd5b194e"
695
  },
696
  "outputs": [
697
  {
698
  "name": "stdout",
699
  "output_type": "stream",
700
  "text": [
701
+ "Node ID\t a6cd1b7d-8df7-49ba-a853-5be4f315619f\n",
702
+ "Title\t Meta's Llama 2: Revolutionizing Open Source Language Models for Commercial Use\n",
703
+ "Text\t The model demonstrates exceptionally low AI safety violation percentages, surpassing even ChatGPT in safety benchmarks. Finding the right balance between helpfulness and safety when optimizing a model poses significant challenges. While a highly helpful model may be capable of answering any question, including sensitive ones like \"How do I build a bomb?\", it also raises concerns about potential misuse. Thus, striking the perfect equilibrium between providing useful information and ensuring safety is paramount. However, prioritizing safety to an extreme extent can lead to a model that struggles to effectively address a diverse range of questions. This limitation could hinder the model's practical applicability and user experience. Thus, achieving an optimum balance that allows the model to be both helpful and safe is of utmost importance. To strike the right balance between helpfulness and safety, Meta employed two reward models - one for helpfulness and another for safety - to optimize the model's responses. The 34B parameter model has reported higher safety violations than other variants, possibly contributing to the delay in its release. IV. Helpfulness Comparison: Llama 2 Outperforms Competitors Llama 2 emerges as a strong contender in the open-source language model arena, outperforming its competitors in most categories. The 70B parameter model outperforms all other open-source models, while the 7B and 34B models outshine Falcon in all categories and MPT in all categories except coding. Despite being smaller, Llam a2's performance rivals that of Chat GPT 3.5, a significantly larger closed-source model. While GPT 4 and PalM-2-L, with their larger size, outperform Llama 2, this is expected due to their capacity for handling complex language tasks. Llama 2's impressive ability to compete with larger models highlights its efficiency and potential in the market. However, Llama 2 does face challenges in coding and math problems, where models like Chat GPT 4 excel, given their significantly larger size. Chat GPT 4 performed significantly better than Llama 2 for coding (HumanEval benchmark)and math problem tasks (GSM8k benchmark). Open-source AI technologies, like Llama 2, continue to advance, offering\n",
704
+ "Score\t 0.42966435674678427\n",
705
+ "-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_\n",
706
+ "Node ID\t 037a4773-87e1-4b75-8672-0255b89820e4\n",
707
  "Title\t Fine-Tuning a Llama-2 7B Model for Python Code Generation\n",
708
  "Text\t New Llama-2 model In mid-July, Meta released its new family of pre-trained and finetuned models called Llama-2, with an open source and commercial character to facilitate its use and expansion. The base model was released with a chat version and sizes 7B, 13B, and 70B. Together with the models, the corresponding papers were published describing their characteristics and relevant points of the learning process, which provide very interesting information on the subject. For pre-training, 40% more tokens were used, reaching 2T, the context length was doubled and the grouped-query attention (GQA) technique was applied to speed up inference on the heavier 70B model. On the standard transformer architecture, RMSNorm normalization, SwiGLU activation, and rotatory positional embedding are used, the context length reaches 4096 tokens, and an Adam optimizer is applied with a cosine learning rate schedule, a weight decay of 0.1 and gradient clipping. The dataset for tuning For our tuning process, we will take a dataset containing about 18,000 examples where the model is asked to build a Python code that solves a given task. This is an extraction of the original dataset [2], where only the Python language examples are selected. Each row contains the description of the task to be solved, an example of data input to the task if applicable, and the generated code fragment that solves the task is provided [3]. Creating the prompt To carry out an instruction fine-tuning, we must transform each one of our data examples as if it were an instruction, outlining its main sections as follows: Output: Fine-tuning the model To carry out this stage, we have used the Google Colab environment, where we have developed a notebook that allows us to run the training in an interactive way and also a Python script to run the training in unattended mode. For the first test runs, a T4 instance with a high RAM capacity is enough, but when it comes to running the whole dataset and epochs, we have opted to use an A100 instance in order to speed up the training and ensure that its execution time is reasonable. In order to be able to\n",
709
+ "Score\t 0.3929280739950982\n",
710
  "-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_\n",
711
+ "Node ID\t a5ee37f3-abe9-4376-b228-1a47b58ab50f\n",
712
  "Title\t Fine-Tuning a Llama-2 7B Model for Python Code Generation\n",
713
  "Text\t if it were an instruction, outlining its main sections as follows: Output: Fine-tuning the model To carry out this stage, we have used the Google Colab environment, where we have developed a notebook that allows us to run the training in an interactive way and also a Python script to run the training in unattended mode. For the first test runs, a T4 instance with a high RAM capacity is enough, but when it comes to running the whole dataset and epochs, we have opted to use an A100 instance in order to speed up the training and ensure that its execution time is reasonable. In order to be able to share the model, we will log in to the Huggingface hub using the appropriate token, so that at the end of the whole process, we will upload the model files so that they can be shared with the rest of the users. Fine-tuning techniques: PEFT, Lora, and QLora In recent months, some papers have appeared showing how PEFT techniques can be used to train large language models with a drastic reduction of RAM requirements and consequently allowing fine-tuning of these models on a single GPU of reasonable size. The usual steps to train an LLM consist, first, an intensive pre-training on billions or trillions of tokens to obtain a foundation model, and then a fine-tuning is performed on this model to specialize it on a downstream task. In this fine-tuning phase is where the PEFT technique has its purpose. Parameter Efficient Fine-Tuning (PEFT) allows us to considerably reduce RAM and storage requirements by only fine-tuning a small number of additional parameters, with virtually all model parameters remaining frozen. PEFT has been found to produce good generalization with relatively low-volume datasets. Furthermore, it enhances the reusability and portability of the model, as the small checkpoints obtained can be easily added to the base model, and the base model can be easily fine-tuned and reused in multiple scenarios by adding the PEFT parameters. Finally, since the base model is not adjusted, all the knowledge acquired in the pre-training phase is preserved, thus avoiding catastrophic forgetting. Most widely used PEFT techniques aim to keep the pre-trained base model untouched\n",
714
+ "Score\t 0.3878582651558103\n",
715
+ "-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_\n",
716
+ "Node ID\t 43633dff-3ec3-4b23-83eb-35857b1fccf4\n",
717
+ "Title\t Meta's Llama 2: Revolutionizing Open Source Language Models for Commercial Use\n",
718
+ "Text\t I. Llama 2: Revolutionizing Commercial Use Unlike its predecessor Llama 1, which was limited to research use, Llama 2 represents a major advancement as an open-source commercial model. Businesses can now integrate Llama 2 into products to create AI-powered applications. Availability on Azure and AWS facilitates fine-tuning and adoption. However, restrictions apply to prevent exploitation. Companies with over 700 million active daily users cannot use Llama 2. Additionally, its output cannot be used to improve other language models. II. Llama 2 Model Flavors Llama 2 is available in four different model sizes: 7 billion, 13 billion, 34 billion, and 70 billion parameters. While 7B, 13B, and 70B have already been released, the 34B model is still awaited. The pretrained variant, trained on a whopping 2 trillion tokens, boasts a context window of 4096 tokens, twice the size of its predecessor Llama 1. Meta also released a Llama 2 fine-tuned model for chat applications that was trained on over 1 million human annotations. Such extensive training comes at a cost, with the 70B model taking a staggering 1720320 GPU hours to train. The context window's length determines the amount of content the model can process at once, making Llama 2 a powerful language model in terms of scale and efficiency. III. Safety Considerations: A Top Priority for Meta Meta's commitment to safety and alignment shines through in Llama 2's design. The model demonstrates exceptionally low AI safety violation percentages, surpassing even ChatGPT in safety benchmarks. Finding the right balance between helpfulness and safety when optimizing a model poses significant challenges. While a highly helpful model may be capable of answering any question, including sensitive ones like \"How do I build a bomb?\", it also raises concerns about potential misuse. Thus, striking the perfect equilibrium between providing useful information and ensuring safety is paramount. However, prioritizing safety to an extreme extent can lead to a model that struggles to effectively address a diverse range of questions. This limitation could hinder the model's practical applicability and user experience. Thus, achieving\n",
719
+ "Score\t 0.3768255201026545\n",
720
+ "-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_\n",
721
+ "Node ID\t 184d34e2-c7cb-4051-b3a6-159b36da52f0\n",
722
+ "Title\t Fine-Tuning a Llama-2 7B Model for Python Code Generation\n",
723
+ "Text\t only fine-tuning a small number of additional parameters, with virtually all model parameters remaining frozen. PEFT has been found to produce good generalization with relatively low-volume datasets. Furthermore, it enhances the reusability and portability of the model, as the small checkpoints obtained can be easily added to the base model, and the base model can be easily fine-tuned and reused in multiple scenarios by adding the PEFT parameters. Finally, since the base model is not adjusted, all the knowledge acquired in the pre-training phase is preserved, thus avoiding catastrophic forgetting. Most widely used PEFT techniques aim to keep the pre-trained base model untouched and add new layers or parameters on top of it. These layers are called \"Adapters\" and the technique of their adjustment \"adapter-tuning\", we add these layers to the pre-trained base model and only train the parameters of these new layers. However, a serious problem with this approach is that these layers lead to increased latency in the inference phase, which makes the process inefficient in many scenarios.In the LoRa technique, a Low-Rank Adaptation of Large Language Models, the idea is not to include new layers but to add values to the parameters in a way that avoids this scary problem of latency in the inference phase. LoRa trains and stores the changes of the additional weights while freezing all the weights of the pre-trained model. Therefore, we train a new weights matrix with the changes in the pre-trained model matrix, and this new matrix is decomposed into 2 Low-rank matrices as explained here: Merge the base model and the adapter weights As we mention, we have trained \"modification weights\" on the base model, our final model requires merging the pretrained model and the adapters in a single model. You can find and download the model in my Hugging Face account edumunozsala/llama-27b-int4-python-code-20k. Give it a try! Inferencing or generating Python code And finally, we will show you how you can download the model from the Hugging Face Hub and call the model to generate an accurate result: Thanks to Maxime Labonne for an excellent article [9] and Philipp Schmid who provides an inspiring\n",
724
+ "Score\t 0.3720018118964866\n",
725
  "-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_\n"
726
  ]
727
  }
 
739
  {
740
  "cell_type": "markdown",
741
  "metadata": {
742
+ "id": "WDb7WHnRcne3"
743
  },
744
  "source": [
745
+ "**Evaluate**"
746
  ]
747
  },
748
  {
749
  "cell_type": "code",
750
+ "execution_count": null,
751
  "metadata": {
752
  "colab": {
753
+ "base_uri": "https://localhost:8080/",
754
+ "height": 34
755
  },
756
+ "id": "lQUQEYrHCxXQ",
757
+ "outputId": "5c6c2f3a-1ed6-4ae7-f4a8-2adef8373b03"
758
  },
759
  "outputs": [
760
  {
761
  "name": "stderr",
762
  "output_type": "stream",
763
  "text": [
764
+ "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 108/108 [02:40<00:00, 1.49s/it]\n"
765
  ]
766
  }
767
  ],
 
784
  {
785
  "cell_type": "markdown",
786
  "metadata": {
787
+ "id": "JmlbVifNcv82"
788
  },
789
  "source": [
790
+ "If you have uploaded the generated question JSON file, please uncomment the code in the next cell block. This will avoid the need to generate the questions manually, saving you time and effort."
791
  ]
792
  },
793
  {
794
  "cell_type": "code",
795
+ "execution_count": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
796
  "metadata": {
797
+ "id": "88bv7AerC2Al"
798
  },
799
  "outputs": [],
800
  "source": [
 
824
  },
825
  {
826
  "cell_type": "code",
827
+ "execution_count": null,
828
  "metadata": {
829
  "colab": {
830
  "base_uri": "https://localhost:8080/"
831
  },
832
+ "id": "jRgN3aSiC5mQ",
833
+ "outputId": "fe584c8f-2744-4259-e0f3-ffb936c9b9cb"
834
  },
835
  "outputs": [
836
  {
 
838
  "output_type": "stream",
839
  "text": [
840
  " Retriever Name Hit Rate MRR\n",
841
+ "0 Retriever top_2 0.046296 0.032407\n",
842
  " Retriever Name Hit Rate MRR\n",
843
+ "0 Retriever top_4 0.101852 0.048611\n",
844
  " Retriever Name Hit Rate MRR\n",
845
+ "0 Retriever top_6 0.138889 0.055093\n",
846
+ " Retriever Name Hit Rate MRR\n",
847
+ "0 Retriever top_8 0.148148 0.05625\n",
848
  " Retriever Name Hit Rate MRR\n",
849
+ "0 Retriever top_10 0.157407 0.057176\n"
850
  ]
851
  }
852
  ],
 
865
  },
866
  {
867
  "cell_type": "code",
868
+ "execution_count": null,
869
  "metadata": {
870
  "colab": {
871
+ "base_uri": "https://localhost:8080/",
872
+ "height": 332
873
  },
874
+ "id": "S2DcdLmHU82p",
875
+ "outputId": "4f1ebcd5-8a41-474e-b455-80fe356f4451"
876
  },
877
  "outputs": [
878
  {
879
  "name": "stderr",
880
  "output_type": "stream",
881
  "text": [
882
+ "<ipython-input-24-1c5678dd03e2>:15: DeprecationWarning: Call to deprecated class method from_defaults. (ServiceContext is deprecated, please use `llama_index.settings.Settings` instead.) -- Deprecated since version 0.10.0.\n",
883
+ " service_context_gemini_flash = ServiceContext.from_defaults(llm=llm_gemini_flash)\n"
884
  ]
885
  },
886
  {
887
  "name": "stdout",
888
  "output_type": "stream",
889
  "text": [
890
+ "top_2 faithfulness_score: 0.55\n",
891
+ "top_2 relevancy_score: 0.55\n",
892
  "-_-_-_-_-_-_-_-_-_-_\n",
893
+ "top_4 faithfulness_score: 0.65\n",
894
+ "top_4 relevancy_score: 0.65\n",
895
  "-_-_-_-_-_-_-_-_-_-_\n",
896
+ "top_6 faithfulness_score: 0.65\n",
897
+ "top_6 relevancy_score: 0.65\n",
898
  "-_-_-_-_-_-_-_-_-_-_\n",
899
+ "top_8 faithfulness_score: 0.9\n",
900
+ "top_8 relevancy_score: 0.9\n",
901
  "-_-_-_-_-_-_-_-_-_-_\n",
902
  "top_10 faithfulness_score: 0.65\n",
903
  "top_10 relevancy_score: 0.65\n",
 
919
  " query_engine = index.as_query_engine(similarity_top_k=i, llm=llm)\n",
920
  "\n",
921
  " # While we use GPT3.5-Turbo to answer questions, we can use GPT4 to evaluate the answers.\n",
922
+ " llm_gemini_flash = Gemini(model=\"models/gemini-1.5-flash\", temperature=1, max_tokens=512)\n",
923
+ " service_context_gemini_flash = ServiceContext.from_defaults(llm=llm_gemini_flash)\n",
924
  "\n",
925
+ " faithfulness_evaluator = FaithfulnessEvaluator(service_context=service_context_gemini_flash)\n",
926
+ " relevancy_evaluator = RelevancyEvaluator(service_context=service_context_gemini_flash)\n",
927
  "\n",
928
  " # Run evaluation\n",
929
  " queries = list(rag_eval_dataset.queries.values())\n",
 
947
  " print(f\"top_{i} relevancy_score: {relevancy_score}\")\n",
948
  " print(\"-_\" * 10)"
949
  ]
 
 
 
 
 
 
 
 
 
950
  }
951
  ],
952
  "metadata": {
953
+ "accelerator": "GPU",
954
  "colab": {
955
+ "gpuType": "T4",
 
956
  "provenance": []
957
  },
958
  "kernelspec": {
 
960
  "name": "python3"
961
  },
962
  "language_info": {
963
+ "name": "python"
 
 
 
 
 
 
 
 
 
964
  },
965
  "widgets": {
966
  "application/vnd.jupyter.widget-state+json": {
967
+ "0ee84219dec44116b8c90e60e93914c6": {
968
  "model_module": "@jupyter-widgets/controls",
969
  "model_module_version": "1.5.0",
970
  "model_name": "HTMLModel",
 
979
  "_view_name": "HTMLView",
980
  "description": "",
981
  "description_tooltip": null,
982
+ "layout": "IPY_MODEL_bac4fe58bd124d18aba31249d8055563",
983
  "placeholder": "​",
984
+ "style": "IPY_MODEL_bfc9451b9ef543538e2eee56cd75d6b8",
985
+ "value": "Parsing nodes: 100%"
986
  }
987
  },
988
+ "35886b9a6fbb4550b6a33df4994958bd": {
989
  "model_module": "@jupyter-widgets/controls",
990
  "model_module_version": "1.5.0",
991
  "model_name": "DescriptionStyleModel",
 
1000
  "description_width": ""
1001
  }
1002
  },
1003
+ "372216089f0e417ab26f6656e28964ec": {
1004
  "model_module": "@jupyter-widgets/controls",
1005
  "model_module_version": "1.5.0",
1006
+ "model_name": "HBoxModel",
1007
  "state": {
1008
  "_dom_classes": [],
1009
  "_model_module": "@jupyter-widgets/controls",
1010
  "_model_module_version": "1.5.0",
1011
+ "_model_name": "HBoxModel",
1012
  "_view_count": null,
1013
  "_view_module": "@jupyter-widgets/controls",
1014
  "_view_module_version": "1.5.0",
1015
+ "_view_name": "HBoxView",
1016
+ "box_style": "",
1017
+ "children": [
1018
+ "IPY_MODEL_0ee84219dec44116b8c90e60e93914c6",
1019
+ "IPY_MODEL_7d2b37edbf8d450fad9c46629e8680a1",
1020
+ "IPY_MODEL_69b8fbcb928b4b83ad6e84d2982e6fa8"
1021
+ ],
1022
+ "layout": "IPY_MODEL_4372ee01eafd4281bf40c8633ed71049"
1023
  }
1024
  },
1025
+ "384cf40fde6040039cb3db0df7acaca0": {
1026
  "model_module": "@jupyter-widgets/base",
1027
  "model_module_version": "1.2.0",
1028
  "model_name": "LayoutModel",
 
1074
  "width": null
1075
  }
1076
  },
1077
+ "3b301c805f84461bad4dcdecc234e40b": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1078
  "model_module": "@jupyter-widgets/base",
1079
  "model_module_version": "1.2.0",
1080
  "model_name": "LayoutModel",
 
1126
  "width": null
1127
  }
1128
  },
1129
+ "4372ee01eafd4281bf40c8633ed71049": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1130
  "model_module": "@jupyter-widgets/base",
1131
  "model_module_version": "1.2.0",
1132
  "model_name": "LayoutModel",
 
1178
  "width": null
1179
  }
1180
  },
1181
+ "552d5ddd91594b4699bbaa6071169ffe": {
1182
  "model_module": "@jupyter-widgets/base",
1183
  "model_module_version": "1.2.0",
1184
  "model_name": "LayoutModel",
 
1230
  "width": null
1231
  }
1232
  },
1233
+ "588ede4f435949639143886c6b191551": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1234
  "model_module": "@jupyter-widgets/controls",
1235
  "model_module_version": "1.5.0",
1236
  "model_name": "HTMLModel",
 
1245
  "_view_name": "HTMLView",
1246
  "description": "",
1247
  "description_tooltip": null,
1248
+ "layout": "IPY_MODEL_384cf40fde6040039cb3db0df7acaca0",
1249
  "placeholder": "​",
1250
+ "style": "IPY_MODEL_35886b9a6fbb4550b6a33df4994958bd",
1251
+ "value": "Generating embeddings: 100%"
1252
  }
1253
  },
1254
+ "605934eac55b4facbb036f9add32c6c9": {
1255
  "model_module": "@jupyter-widgets/base",
1256
  "model_module_version": "1.2.0",
1257
  "model_name": "LayoutModel",
 
1303
  "width": null
1304
  }
1305
  },
1306
+ "69b8fbcb928b4b83ad6e84d2982e6fa8": {
1307
+ "model_module": "@jupyter-widgets/controls",
1308
+ "model_module_version": "1.5.0",
1309
+ "model_name": "HTMLModel",
1310
+ "state": {
1311
+ "_dom_classes": [],
1312
+ "_model_module": "@jupyter-widgets/controls",
1313
+ "_model_module_version": "1.5.0",
1314
+ "_model_name": "HTMLModel",
1315
+ "_view_count": null,
1316
+ "_view_module": "@jupyter-widgets/controls",
1317
+ "_view_module_version": "1.5.0",
1318
+ "_view_name": "HTMLView",
1319
+ "description": "",
1320
+ "description_tooltip": null,
1321
+ "layout": "IPY_MODEL_3b301c805f84461bad4dcdecc234e40b",
1322
+ "placeholder": "​",
1323
+ "style": "IPY_MODEL_f5f332f68fb64d9a86fefcbc92fdc63e",
1324
+ "value": " 14/14 [00:00&lt;00:00, 16.28it/s]"
1325
+ }
1326
+ },
1327
+ "6c936d3880c14d4d8b16b60cb300f256": {
1328
  "model_module": "@jupyter-widgets/base",
1329
  "model_module_version": "1.2.0",
1330
  "model_name": "LayoutModel",
 
1376
  "width": null
1377
  }
1378
  },
1379
+ "7d2b37edbf8d450fad9c46629e8680a1": {
1380
+ "model_module": "@jupyter-widgets/controls",
1381
+ "model_module_version": "1.5.0",
1382
+ "model_name": "FloatProgressModel",
1383
+ "state": {
1384
+ "_dom_classes": [],
1385
+ "_model_module": "@jupyter-widgets/controls",
1386
+ "_model_module_version": "1.5.0",
1387
+ "_model_name": "FloatProgressModel",
1388
+ "_view_count": null,
1389
+ "_view_module": "@jupyter-widgets/controls",
1390
+ "_view_module_version": "1.5.0",
1391
+ "_view_name": "ProgressView",
1392
+ "bar_style": "success",
1393
+ "description": "",
1394
+ "description_tooltip": null,
1395
+ "layout": "IPY_MODEL_6c936d3880c14d4d8b16b60cb300f256",
1396
+ "max": 14,
1397
+ "min": 0,
1398
+ "orientation": "horizontal",
1399
+ "style": "IPY_MODEL_bbc6027350a54663954c564fac062722",
1400
+ "value": 14
1401
+ }
1402
+ },
1403
+ "8457fabd84ee49beb3f4dc8024e5d592": {
1404
+ "model_module": "@jupyter-widgets/controls",
1405
+ "model_module_version": "1.5.0",
1406
+ "model_name": "FloatProgressModel",
1407
+ "state": {
1408
+ "_dom_classes": [],
1409
+ "_model_module": "@jupyter-widgets/controls",
1410
+ "_model_module_version": "1.5.0",
1411
+ "_model_name": "FloatProgressModel",
1412
+ "_view_count": null,
1413
+ "_view_module": "@jupyter-widgets/controls",
1414
+ "_view_module_version": "1.5.0",
1415
+ "_view_name": "ProgressView",
1416
+ "bar_style": "success",
1417
+ "description": "",
1418
+ "description_tooltip": null,
1419
+ "layout": "IPY_MODEL_9549a447edaf4b20aab7652a5615b652",
1420
+ "max": 108,
1421
+ "min": 0,
1422
+ "orientation": "horizontal",
1423
+ "style": "IPY_MODEL_b21b90fb603643a484717b794c2f7f75",
1424
+ "value": 108
1425
+ }
1426
+ },
1427
+ "9549a447edaf4b20aab7652a5615b652": {
1428
  "model_module": "@jupyter-widgets/base",
1429
  "model_module_version": "1.2.0",
1430
  "model_name": "LayoutModel",
 
1476
  "width": null
1477
  }
1478
  },
1479
+ "9e72548d899447c59b57cd4b9fbc2e7b": {
1480
  "model_module": "@jupyter-widgets/controls",
1481
  "model_module_version": "1.5.0",
1482
  "model_name": "DescriptionStyleModel",
 
1491
  "description_width": ""
1492
  }
1493
  },
1494
+ "a2b8557313734b9d9f5e1d4a04cb75a5": {
1495
  "model_module": "@jupyter-widgets/controls",
1496
  "model_module_version": "1.5.0",
1497
+ "model_name": "HBoxModel",
1498
  "state": {
1499
+ "_dom_classes": [],
1500
  "_model_module": "@jupyter-widgets/controls",
1501
  "_model_module_version": "1.5.0",
1502
+ "_model_name": "HBoxModel",
1503
+ "_view_count": null,
1504
+ "_view_module": "@jupyter-widgets/controls",
1505
+ "_view_module_version": "1.5.0",
1506
+ "_view_name": "HBoxView",
1507
+ "box_style": "",
1508
+ "children": [
1509
+ "IPY_MODEL_588ede4f435949639143886c6b191551",
1510
+ "IPY_MODEL_8457fabd84ee49beb3f4dc8024e5d592",
1511
+ "IPY_MODEL_b267053ae5ef4bba8e6ae961ae40c7ad"
1512
+ ],
1513
+ "layout": "IPY_MODEL_552d5ddd91594b4699bbaa6071169ffe"
1514
+ }
1515
+ },
1516
+ "b21b90fb603643a484717b794c2f7f75": {
1517
+ "model_module": "@jupyter-widgets/controls",
1518
+ "model_module_version": "1.5.0",
1519
+ "model_name": "ProgressStyleModel",
1520
+ "state": {
1521
+ "_model_module": "@jupyter-widgets/controls",
1522
+ "_model_module_version": "1.5.0",
1523
+ "_model_name": "ProgressStyleModel",
1524
  "_view_count": null,
1525
  "_view_module": "@jupyter-widgets/base",
1526
  "_view_module_version": "1.2.0",
1527
  "_view_name": "StyleView",
1528
+ "bar_color": null,
1529
  "description_width": ""
1530
  }
1531
  },
1532
+ "b267053ae5ef4bba8e6ae961ae40c7ad": {
1533
+ "model_module": "@jupyter-widgets/controls",
1534
+ "model_module_version": "1.5.0",
1535
+ "model_name": "HTMLModel",
1536
+ "state": {
1537
+ "_dom_classes": [],
1538
+ "_model_module": "@jupyter-widgets/controls",
1539
+ "_model_module_version": "1.5.0",
1540
+ "_model_name": "HTMLModel",
1541
+ "_view_count": null,
1542
+ "_view_module": "@jupyter-widgets/controls",
1543
+ "_view_module_version": "1.5.0",
1544
+ "_view_name": "HTMLView",
1545
+ "description": "",
1546
+ "description_tooltip": null,
1547
+ "layout": "IPY_MODEL_605934eac55b4facbb036f9add32c6c9",
1548
+ "placeholder": "​",
1549
+ "style": "IPY_MODEL_9e72548d899447c59b57cd4b9fbc2e7b",
1550
+ "value": " 108/108 [00:04&lt;00:00, 23.97it/s]"
1551
+ }
1552
+ },
1553
+ "bac4fe58bd124d18aba31249d8055563": {
1554
  "model_module": "@jupyter-widgets/base",
1555
  "model_module_version": "1.2.0",
1556
  "model_name": "LayoutModel",
 
1602
  "width": null
1603
  }
1604
  },
1605
+ "bbc6027350a54663954c564fac062722": {
1606
+ "model_module": "@jupyter-widgets/controls",
1607
+ "model_module_version": "1.5.0",
1608
+ "model_name": "ProgressStyleModel",
1609
+ "state": {
1610
+ "_model_module": "@jupyter-widgets/controls",
1611
+ "_model_module_version": "1.5.0",
1612
+ "_model_name": "ProgressStyleModel",
1613
+ "_view_count": null,
1614
+ "_view_module": "@jupyter-widgets/base",
1615
+ "_view_module_version": "1.2.0",
1616
+ "_view_name": "StyleView",
1617
+ "bar_color": null,
1618
+ "description_width": ""
1619
+ }
1620
+ },
1621
+ "bfc9451b9ef543538e2eee56cd75d6b8": {
1622
+ "model_module": "@jupyter-widgets/controls",
1623
+ "model_module_version": "1.5.0",
1624
+ "model_name": "DescriptionStyleModel",
1625
+ "state": {
1626
+ "_model_module": "@jupyter-widgets/controls",
1627
+ "_model_module_version": "1.5.0",
1628
+ "_model_name": "DescriptionStyleModel",
1629
+ "_view_count": null,
1630
+ "_view_module": "@jupyter-widgets/base",
1631
+ "_view_module_version": "1.2.0",
1632
+ "_view_name": "StyleView",
1633
+ "description_width": ""
1634
+ }
1635
+ },
1636
+ "f5f332f68fb64d9a86fefcbc92fdc63e": {
1637
  "model_module": "@jupyter-widgets/controls",
1638
  "model_module_version": "1.5.0",
1639
  "model_name": "DescriptionStyleModel",