AlaFalaki commited on
Commit
f11cae7
Β·
1 Parent(s): bfd8e61

Created using Colab

Browse files
Files changed (1) hide show
  1. notebooks/Advanced_Retriever.ipynb +105 -70
notebooks/Advanced_Retriever.ipynb CHANGED
@@ -4,7 +4,7 @@
4
  "metadata": {
5
  "colab": {
6
  "provenance": [],
7
- "authorship_tag": "ABX9TyPGmFGPV+qS77r0Crzw/+Ag",
8
  "include_colab_link": true
9
  },
10
  "kernelspec": {
@@ -37,56 +37,57 @@
37
  },
38
  {
39
  "cell_type": "code",
40
- "execution_count": null,
41
  "metadata": {
42
  "colab": {
43
  "base_uri": "https://localhost:8080/"
44
  },
45
  "id": "sbO5PUR3AL-i",
46
- "outputId": "7653ffd0-bb98-49b0-d381-e049d3efc6be"
47
  },
48
  "outputs": [
49
  {
50
  "output_type": "stream",
51
  "name": "stdout",
52
  "text": [
53
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m226.7/226.7 kB\u001b[0m \u001b[31m3.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
54
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.8/1.8 MB\u001b[0m \u001b[31m19.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
55
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m15.4/15.4 MB\u001b[0m \u001b[31m25.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
56
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.0/2.0 MB\u001b[0m \u001b[31m32.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
57
  "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m75.6/75.6 kB\u001b[0m \u001b[31m5.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
58
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m526.8/526.8 kB\u001b[0m \u001b[31m29.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
59
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.4/2.4 MB\u001b[0m \u001b[31m51.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
60
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m92.0/92.0 kB\u001b[0m \u001b[31m5.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
61
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.4/62.4 kB\u001b[0m \u001b[31m4.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
62
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m41.3/41.3 kB\u001b[0m \u001b[31m3.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
63
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.8/6.8 MB\u001b[0m \u001b[31m42.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
64
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m60.1/60.1 kB\u001b[0m \u001b[31m4.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
65
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━��━\u001b[0m \u001b[32m106.1/106.1 kB\u001b[0m \u001b[31m9.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
66
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m67.3/67.3 kB\u001b[0m \u001b[31m3.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
67
  "\u001b[?25h Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
68
  " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
69
  " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
70
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m283.7/283.7 kB\u001b[0m \u001b[31m12.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
71
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.6/1.6 MB\u001b[0m \u001b[31m34.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
72
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m67.6/67.6 kB\u001b[0m \u001b[31m5.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
73
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m142.5/142.5 kB\u001b[0m \u001b[31m4.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
74
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m77.9/77.9 kB\u001b[0m \u001b[31m6.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
75
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.3/58.3 kB\u001b[0m \u001b[31m3.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
76
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m141.9/141.9 kB\u001b[0m \u001b[31m11.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
77
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m290.4/290.4 kB\u001b[0m \u001b[31m26.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
78
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m71.9/71.9 kB\u001b[0m \u001b[31m8.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
79
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m53.6/53.6 kB\u001b[0m \u001b[31m6.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
80
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m46.0/46.0 kB\u001b[0m \u001b[31m5.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
81
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m50.8/50.8 kB\u001b[0m \u001b[31m5.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
82
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m341.4/341.4 kB\u001b[0m \u001b[31m32.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
83
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.4/3.4 MB\u001b[0m \u001b[31m75.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
84
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.2/1.2 MB\u001b[0m \u001b[31m64.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
85
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m130.2/130.2 kB\u001b[0m \u001b[31m14.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
86
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.3/49.3 kB\u001b[0m \u001b[31m6.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
87
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m307.7/307.7 kB\u001b[0m \u001b[31m29.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
88
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m47.2/47.2 kB\u001b[0m \u001b[31m5.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
89
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.8/86.8 kB\u001b[0m \u001b[31m9.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
 
90
  "\u001b[?25h Building wheel for pypika (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
91
  "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
92
  "spacy 3.7.4 requires typer<0.10.0,>=0.3.0, but you have typer 0.12.3 which is incompatible.\n",
@@ -110,7 +111,7 @@
110
  "metadata": {
111
  "id": "39OAU5OlByI0"
112
  },
113
- "execution_count": null,
114
  "outputs": []
115
  },
116
  {
@@ -135,7 +136,7 @@
135
  "metadata": {
136
  "id": "O2haexSAByDD"
137
  },
138
- "execution_count": null,
139
  "outputs": []
140
  },
141
  {
@@ -151,7 +152,7 @@
151
  "metadata": {
152
  "id": "OHO6a-zaBxeG"
153
  },
154
- "execution_count": null,
155
  "outputs": []
156
  },
157
  {
@@ -191,24 +192,24 @@
191
  "base_uri": "https://localhost:8080/"
192
  },
193
  "id": "x4llz2lHN2ij",
194
- "outputId": "02a5d8c6-efde-4dd1-e0aa-37517e393961"
195
  },
196
- "execution_count": null,
197
  "outputs": [
198
  {
199
  "output_type": "stream",
200
  "name": "stdout",
201
  "text": [
202
- "--2024-05-28 18:43:39-- https://raw.githubusercontent.com/AlaFalaki/tutorial_notebooks/main/data/mini-llama-articles.csv\n",
203
- "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.109.133, 185.199.110.133, 185.199.111.133, ...\n",
204
- "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.109.133|:443... connected.\n",
205
  "HTTP request sent, awaiting response... 200 OK\n",
206
  "Length: 173646 (170K) [text/plain]\n",
207
  "Saving to: β€˜mini-llama-articles.csv’\n",
208
  "\n",
209
  "\rmini-llama-articles 0%[ ] 0 --.-KB/s \rmini-llama-articles 100%[===================>] 169.58K --.-KB/s in 0.03s \n",
210
  "\n",
211
- "2024-05-28 18:43:40 (5.02 MB/s) - β€˜mini-llama-articles.csv’ saved [173646/173646]\n",
212
  "\n"
213
  ]
214
  }
@@ -246,9 +247,9 @@
246
  "base_uri": "https://localhost:8080/"
247
  },
248
  "id": "_M-0-D4fN2fc",
249
- "outputId": "4db9fffa-873a-4a52-d22f-9fa35b3d440f"
250
  },
251
- "execution_count": null,
252
  "outputs": [
253
  {
254
  "output_type": "execute_result",
@@ -258,7 +259,7 @@
258
  ]
259
  },
260
  "metadata": {},
261
- "execution_count": 7
262
  }
263
  ]
264
  },
@@ -282,7 +283,7 @@
282
  "metadata": {
283
  "id": "Ie--Y_3wN2c8"
284
  },
285
- "execution_count": null,
286
  "outputs": []
287
  },
288
  {
@@ -311,7 +312,7 @@
311
  "metadata": {
312
  "id": "zVBkAg6eN2an"
313
  },
314
- "execution_count": null,
315
  "outputs": []
316
  },
317
  {
@@ -322,7 +323,7 @@
322
  "metadata": {
323
  "id": "KiDwIXFxN2YK"
324
  },
325
- "execution_count": null,
326
  "outputs": []
327
  },
328
  {
@@ -335,19 +336,19 @@
335
  "base_uri": "https://localhost:8080/"
336
  },
337
  "id": "f1aZ4wYVN2V1",
338
- "outputId": "e2de8e4a-c84f-45b9-adc2-0cda415719cd"
339
  },
340
- "execution_count": null,
341
  "outputs": [
342
  {
343
  "output_type": "execute_result",
344
  "data": {
345
  "text/plain": [
346
- "TextNode(id_='f99b43ed-8262-4e7f-8a03-927e48607f90', embedding=None, metadata={'window': \"LLM Variants and Meta's Open Source Before shedding light on four major trends, I'd share the latest Meta's Llama 2 and Code Llama. Meta's Llama 2 represents a sophisticated evolution in LLMs. This suite spans models pretrained and fine-tuned across a parameter spectrum of 7 billion to 70 billion. A specialized derivative, Llama 2-Chat, has been engineered explicitly for dialogue-centric applications. \", 'original_text': \"LLM Variants and Meta's Open Source Before shedding light on four major trends, I'd share the latest Meta's Llama 2 and Code Llama. \", 'title': \"Beyond GPT-4: What's New?\", 'url': 'https://pub.towardsai.net/beyond-gpt-4-whats-new-cbd61a448eb9#dda8', 'source_name': 'towards_ai'}, excluded_embed_metadata_keys=['window', 'original_text'], excluded_llm_metadata_keys=['window', 'original_text'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='d8501fa1-3869-4cde-9a22-6bdc351921f4', node_type=<ObjectType.DOCUMENT: '4'>, metadata={'title': \"Beyond GPT-4: What's New?\", 'url': 'https://pub.towardsai.net/beyond-gpt-4-whats-new-cbd61a448eb9#dda8', 'source_name': 'towards_ai'}, hash='3b095b0e25cdf965d950cdbd7feb8024030e7645998c1a33dc4427affca624ab'), <NodeRelationship.NEXT: '3'>: RelatedNodeInfo(node_id='178fa654-945b-4f5a-9b22-a65751ba5242', node_type=<ObjectType.TEXT: '1'>, metadata={'window': \"LLM Variants and Meta's Open Source Before shedding light on four major trends, I'd share the latest Meta's Llama 2 and Code Llama. Meta's Llama 2 represents a sophisticated evolution in LLMs. This suite spans models pretrained and fine-tuned across a parameter spectrum of 7 billion to 70 billion. A specialized derivative, Llama 2-Chat, has been engineered explicitly for dialogue-centric applications. Benchmarking revealed Llama 2's superior performance over most extant open-source chat models. \", 'original_text': \"Meta's Llama 2 represents a sophisticated evolution in LLMs. \"}, hash='e06ffff4f5927a7e2252b2785825ad4b0dafdeb09355258be50a13bc170d7a5b')}, text=\"LLM Variants and Meta's Open Source Before shedding light on four major trends, I'd share the latest Meta's Llama 2 and Code Llama. \", start_char_idx=0, end_char_idx=132, text_template='{metadata_str}\\n\\n{content}', metadata_template='{key}: {value}', metadata_seperator='\\n')"
347
  ]
348
  },
349
  "metadata": {},
350
- "execution_count": 14
351
  }
352
  ]
353
  },
@@ -364,7 +365,7 @@
364
  "metadata": {
365
  "id": "moNbizWrN2Tu"
366
  },
367
- "execution_count": null,
368
  "outputs": []
369
  },
370
  {
@@ -378,22 +379,22 @@
378
  "base_uri": "https://localhost:8080/"
379
  },
380
  "id": "nz6dQtXzyWqK",
381
- "outputId": "dce85bdf-fe5b-422a-89ea-3ea89562013b"
382
  },
383
- "execution_count": null,
384
  "outputs": [
385
  {
386
  "output_type": "stream",
387
  "name": "stdout",
388
  "text": [
389
  " adding: mini-llama-articles/ (stored 0%)\n",
390
- " adding: mini-llama-articles/chroma.sqlite3 (deflated 69%)\n",
391
- " adding: mini-llama-articles/896c1816-3e4f-47a3-9c6f-21bee4542f13/ (stored 0%)\n",
392
- " adding: mini-llama-articles/896c1816-3e4f-47a3-9c6f-21bee4542f13/length.bin (deflated 44%)\n",
393
- " adding: mini-llama-articles/896c1816-3e4f-47a3-9c6f-21bee4542f13/index_metadata.pickle (deflated 38%)\n",
394
- " adding: mini-llama-articles/896c1816-3e4f-47a3-9c6f-21bee4542f13/header.bin (deflated 56%)\n",
395
- " adding: mini-llama-articles/896c1816-3e4f-47a3-9c6f-21bee4542f13/link_lists.bin (deflated 88%)\n",
396
- " adding: mini-llama-articles/896c1816-3e4f-47a3-9c6f-21bee4542f13/data_level0.bin (deflated 17%)\n"
397
  ]
398
  }
399
  ]
@@ -424,7 +425,7 @@
424
  "metadata": {
425
  "id": "wS-V6NhMymx8"
426
  },
427
- "execution_count": null,
428
  "outputs": []
429
  },
430
  {
@@ -442,7 +443,7 @@
442
  "metadata": {
443
  "id": "fH2myF120oMi"
444
  },
445
- "execution_count": null,
446
  "outputs": []
447
  },
448
  {
@@ -458,9 +459,9 @@
458
  "base_uri": "https://localhost:8080/"
459
  },
460
  "id": "EqNreFmE0vRb",
461
- "outputId": "2dc9b058-b02c-4765-ce74-1e0bbb0b7a61"
462
  },
463
- "execution_count": null,
464
  "outputs": [
465
  {
466
  "output_type": "stream",
@@ -471,11 +472,45 @@
471
  }
472
  ]
473
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
474
  {
475
  "cell_type": "code",
476
  "source": [],
477
  "metadata": {
478
- "id": "s6I5HgHMyQaz"
479
  },
480
  "execution_count": null,
481
  "outputs": []
 
4
  "metadata": {
5
  "colab": {
6
  "provenance": [],
7
+ "authorship_tag": "ABX9TyMcPZHiexcHnmM/BQzkTZ9Y",
8
  "include_colab_link": true
9
  },
10
  "kernelspec": {
 
37
  },
38
  {
39
  "cell_type": "code",
40
+ "execution_count": 1,
41
  "metadata": {
42
  "colab": {
43
  "base_uri": "https://localhost:8080/"
44
  },
45
  "id": "sbO5PUR3AL-i",
46
+ "outputId": "84609394-7c68-4a5b-e00a-ae8ac09a1bb9"
47
  },
48
  "outputs": [
49
  {
50
  "output_type": "stream",
51
  "name": "stdout",
52
  "text": [
53
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m226.7/226.7 kB\u001b[0m \u001b[31m2.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
54
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.8/1.8 MB\u001b[0m \u001b[31m11.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
55
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m15.4/15.4 MB\u001b[0m \u001b[31m17.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
56
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.0/2.0 MB\u001b[0m \u001b[31m45.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
57
  "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m75.6/75.6 kB\u001b[0m \u001b[31m5.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
58
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m526.8/526.8 kB\u001b[0m \u001b[31m27.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
59
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.4/2.4 MB\u001b[0m \u001b[31m53.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
60
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m92.0/92.0 kB\u001b[0m \u001b[31m6.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
61
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.4/62.4 kB\u001b[0m \u001b[31m4.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
62
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m41.3/41.3 kB\u001b[0m \u001b[31m2.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
63
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.8/6.8 MB\u001b[0m \u001b[31m52.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
64
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m59.9/59.9 kB\u001b[0m \u001b[31m3.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
65
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m107.0/107.0 kB\u001b[0m \u001b[31m10.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
66
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m67.3/67.3 kB\u001b[0m \u001b[31m1.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
67
  "\u001b[?25h Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
68
  " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
69
  " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
70
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m283.7/283.7 kB\u001b[0m \u001b[31m13.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
71
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.6/1.6 MB\u001b[0m \u001b[31m40.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
72
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m67.6/67.6 kB\u001b[0m \u001b[31m4.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
73
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m142.5/142.5 kB\u001b[0m \u001b[31m1.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
74
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m77.9/77.9 kB\u001b[0m \u001b[31m6.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
75
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.3/58.3 kB\u001b[0m \u001b[31m4.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
76
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m141.9/141.9 kB\u001b[0m \u001b[31m7.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
77
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━���━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m290.4/290.4 kB\u001b[0m \u001b[31m13.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
78
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m71.9/71.9 kB\u001b[0m \u001b[31m6.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
79
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m53.6/53.6 kB\u001b[0m \u001b[31m4.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
80
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m46.0/46.0 kB\u001b[0m \u001b[31m4.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
81
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m52.5/52.5 kB\u001b[0m \u001b[31m4.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
82
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m130.5/130.5 kB\u001b[0m \u001b[31m13.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
83
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m341.4/341.4 kB\u001b[0m \u001b[31m23.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
84
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.4/3.4 MB\u001b[0m \u001b[31m64.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
85
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.2/1.2 MB\u001b[0m \u001b[31m52.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
86
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m130.2/130.2 kB\u001b[0m \u001b[31m11.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
87
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.3/49.3 kB\u001b[0m \u001b[31m4.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
88
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m307.7/307.7 kB\u001b[0m \u001b[31m20.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
89
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m47.2/47.2 kB\u001b[0m \u001b[31m3.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
90
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.8/86.8 kB\u001b[0m \u001b[31m5.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
91
  "\u001b[?25h Building wheel for pypika (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
92
  "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
93
  "spacy 3.7.4 requires typer<0.10.0,>=0.3.0, but you have typer 0.12.3 which is incompatible.\n",
 
111
  "metadata": {
112
  "id": "39OAU5OlByI0"
113
  },
114
+ "execution_count": 2,
115
  "outputs": []
116
  },
117
  {
 
136
  "metadata": {
137
  "id": "O2haexSAByDD"
138
  },
139
+ "execution_count": 3,
140
  "outputs": []
141
  },
142
  {
 
152
  "metadata": {
153
  "id": "OHO6a-zaBxeG"
154
  },
155
+ "execution_count": 4,
156
  "outputs": []
157
  },
158
  {
 
192
  "base_uri": "https://localhost:8080/"
193
  },
194
  "id": "x4llz2lHN2ij",
195
+ "outputId": "d0cd17b8-eca9-45f0-ae14-846ab0d624e0"
196
  },
197
+ "execution_count": 5,
198
  "outputs": [
199
  {
200
  "output_type": "stream",
201
  "name": "stdout",
202
  "text": [
203
+ "--2024-06-03 22:16:45-- https://raw.githubusercontent.com/AlaFalaki/tutorial_notebooks/main/data/mini-llama-articles.csv\n",
204
+ "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n",
205
+ "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n",
206
  "HTTP request sent, awaiting response... 200 OK\n",
207
  "Length: 173646 (170K) [text/plain]\n",
208
  "Saving to: β€˜mini-llama-articles.csv’\n",
209
  "\n",
210
  "\rmini-llama-articles 0%[ ] 0 --.-KB/s \rmini-llama-articles 100%[===================>] 169.58K --.-KB/s in 0.03s \n",
211
  "\n",
212
+ "2024-06-03 22:16:45 (5.09 MB/s) - β€˜mini-llama-articles.csv’ saved [173646/173646]\n",
213
  "\n"
214
  ]
215
  }
 
247
  "base_uri": "https://localhost:8080/"
248
  },
249
  "id": "_M-0-D4fN2fc",
250
+ "outputId": "1bfc497f-0653-4231-86c9-cfeff34e2182"
251
  },
252
+ "execution_count": 6,
253
  "outputs": [
254
  {
255
  "output_type": "execute_result",
 
259
  ]
260
  },
261
  "metadata": {},
262
+ "execution_count": 6
263
  }
264
  ]
265
  },
 
283
  "metadata": {
284
  "id": "Ie--Y_3wN2c8"
285
  },
286
+ "execution_count": 7,
287
  "outputs": []
288
  },
289
  {
 
312
  "metadata": {
313
  "id": "zVBkAg6eN2an"
314
  },
315
+ "execution_count": 8,
316
  "outputs": []
317
  },
318
  {
 
323
  "metadata": {
324
  "id": "KiDwIXFxN2YK"
325
  },
326
+ "execution_count": 9,
327
  "outputs": []
328
  },
329
  {
 
336
  "base_uri": "https://localhost:8080/"
337
  },
338
  "id": "f1aZ4wYVN2V1",
339
+ "outputId": "e3ef377a-a195-44e3-a67a-554fcff29e67"
340
  },
341
+ "execution_count": 10,
342
  "outputs": [
343
  {
344
  "output_type": "execute_result",
345
  "data": {
346
  "text/plain": [
347
+ "TextNode(id_='20a4754c-3ab9-4d64-9aa3-e1379c37074e', embedding=None, metadata={'window': \"LLM Variants and Meta's Open Source Before shedding light on four major trends, I'd share the latest Meta's Llama 2 and Code Llama. Meta's Llama 2 represents a sophisticated evolution in LLMs. This suite spans models pretrained and fine-tuned across a parameter spectrum of 7 billion to 70 billion. A specialized derivative, Llama 2-Chat, has been engineered explicitly for dialogue-centric applications. \", 'original_text': \"LLM Variants and Meta's Open Source Before shedding light on four major trends, I'd share the latest Meta's Llama 2 and Code Llama. \", 'title': \"Beyond GPT-4: What's New?\", 'url': 'https://pub.towardsai.net/beyond-gpt-4-whats-new-cbd61a448eb9#dda8', 'source_name': 'towards_ai'}, excluded_embed_metadata_keys=['window', 'original_text'], excluded_llm_metadata_keys=['window', 'original_text'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='1773f54a-0742-41dd-a645-ba7c07ff8f75', node_type=<ObjectType.DOCUMENT: '4'>, metadata={'title': \"Beyond GPT-4: What's New?\", 'url': 'https://pub.towardsai.net/beyond-gpt-4-whats-new-cbd61a448eb9#dda8', 'source_name': 'towards_ai'}, hash='3b095b0e25cdf965d950cdbd7feb8024030e7645998c1a33dc4427affca624ab'), <NodeRelationship.NEXT: '3'>: RelatedNodeInfo(node_id='1ac96425-5144-4897-9f7b-182156d3470c', node_type=<ObjectType.TEXT: '1'>, metadata={'window': \"LLM Variants and Meta's Open Source Before shedding light on four major trends, I'd share the latest Meta's Llama 2 and Code Llama. Meta's Llama 2 represents a sophisticated evolution in LLMs. This suite spans models pretrained and fine-tuned across a parameter spectrum of 7 billion to 70 billion. A specialized derivative, Llama 2-Chat, has been engineered explicitly for dialogue-centric applications. Benchmarking revealed Llama 2's superior performance over most extant open-source chat models. \", 'original_text': \"Meta's Llama 2 represents a sophisticated evolution in LLMs. \"}, hash='e06ffff4f5927a7e2252b2785825ad4b0dafdeb09355258be50a13bc170d7a5b')}, text=\"LLM Variants and Meta's Open Source Before shedding light on four major trends, I'd share the latest Meta's Llama 2 and Code Llama. \", start_char_idx=0, end_char_idx=132, text_template='{metadata_str}\\n\\n{content}', metadata_template='{key}: {value}', metadata_seperator='\\n')"
348
  ]
349
  },
350
  "metadata": {},
351
+ "execution_count": 10
352
  }
353
  ]
354
  },
 
365
  "metadata": {
366
  "id": "moNbizWrN2Tu"
367
  },
368
+ "execution_count": 11,
369
  "outputs": []
370
  },
371
  {
 
379
  "base_uri": "https://localhost:8080/"
380
  },
381
  "id": "nz6dQtXzyWqK",
382
+ "outputId": "b636525e-47cc-4f57-cfa3-70b9cb17f7e0"
383
  },
384
+ "execution_count": 12,
385
  "outputs": [
386
  {
387
  "output_type": "stream",
388
  "name": "stdout",
389
  "text": [
390
  " adding: mini-llama-articles/ (stored 0%)\n",
391
+ " adding: mini-llama-articles/f4ee5232-8d1e-4e11-899e-02eafe4527df/ (stored 0%)\n",
392
+ " adding: mini-llama-articles/f4ee5232-8d1e-4e11-899e-02eafe4527df/index_metadata.pickle (deflated 38%)\n",
393
+ " adding: mini-llama-articles/f4ee5232-8d1e-4e11-899e-02eafe4527df/link_lists.bin (deflated 88%)\n",
394
+ " adding: mini-llama-articles/f4ee5232-8d1e-4e11-899e-02eafe4527df/data_level0.bin (deflated 18%)\n",
395
+ " adding: mini-llama-articles/f4ee5232-8d1e-4e11-899e-02eafe4527df/length.bin (deflated 43%)\n",
396
+ " adding: mini-llama-articles/f4ee5232-8d1e-4e11-899e-02eafe4527df/header.bin (deflated 56%)\n",
397
+ " adding: mini-llama-articles/chroma.sqlite3 (deflated 69%)\n"
398
  ]
399
  }
400
  ]
 
425
  "metadata": {
426
  "id": "wS-V6NhMymx8"
427
  },
428
+ "execution_count": 13,
429
  "outputs": []
430
  },
431
  {
 
443
  "metadata": {
444
  "id": "fH2myF120oMi"
445
  },
446
+ "execution_count": 14,
447
  "outputs": []
448
  },
449
  {
 
459
  "base_uri": "https://localhost:8080/"
460
  },
461
  "id": "EqNreFmE0vRb",
462
+ "outputId": "bb5204c5-3ab8-460b-9702-5cf2f2b32f73"
463
  },
464
+ "execution_count": 15,
465
  "outputs": [
466
  {
467
  "output_type": "stream",
 
472
  }
473
  ]
474
  },
475
+ {
476
+ "cell_type": "code",
477
+ "source": [
478
+ "for idx, item in enumerate( response.source_nodes ):\n",
479
+ " print(\"Source \", idx+1)\n",
480
+ " print(\"Original Text:\", item.node.metadata[\"original_text\"])\n",
481
+ " print(\"Window:\", item.node.metadata[\"window\"])\n",
482
+ " print(\"----\")"
483
+ ],
484
+ "metadata": {
485
+ "colab": {
486
+ "base_uri": "https://localhost:8080/"
487
+ },
488
+ "id": "whdPLhVaMfOS",
489
+ "outputId": "7b7ea07d-d93c-41a0-bd7b-6a9e8d8b18f7"
490
+ },
491
+ "execution_count": 22,
492
+ "outputs": [
493
+ {
494
+ "output_type": "stream",
495
+ "name": "stdout",
496
+ "text": [
497
+ "Source 1\n",
498
+ "Original Text: Llama 2 Model Flavors Llama 2 is available in four different model sizes: 7 billion, 13 billion, 34 billion, and 70 billion parameters. \n",
499
+ "Window: Companies with over 700 million active daily users cannot use Llama 2. Additionally, its output cannot be used to improve other language models. II. Llama 2 Model Flavors Llama 2 is available in four different model sizes: 7 billion, 13 billion, 34 billion, and 70 billion parameters. While 7B, 13B, and 70B have already been released, the 34B model is still awaited. The pretrained variant, trained on a whopping 2 trillion tokens, boasts a context window of 4096 tokens, twice the size of its predecessor Llama 1. Meta also released a Llama 2 fine-tuned model for chat applications that was trained on over 1 million human annotations. \n",
500
+ "----\n",
501
+ "Source 2\n",
502
+ "Original Text: The 70B parameter model outperforms all other open-source models, while the 7B and 34B models outshine Falcon in all categories and MPT in all categories except coding. \n",
503
+ "Window: The 34B parameter model has reported higher safety violations than other variants, possibly contributing to the delay in its release. IV. Helpfulness Comparison: Llama 2 Outperforms Competitors Llama 2 emerges as a strong contender in the open-source language model arena, outperforming its competitors in most categories. The 70B parameter model outperforms all other open-source models, while the 7B and 34B models outshine Falcon in all categories and MPT in all categories except coding. Despite being smaller, Llam a2's performance rivals that of Chat GPT 3.5, a significantly larger closed-source model. While GPT 4 and PalM-2-L, with their larger size, outperform Llama 2, this is expected due to their capacity for handling complex language tasks. Llama 2's impressive ability to compete with larger models highlights its efficiency and potential in the market. \n",
504
+ "----\n"
505
+ ]
506
+ }
507
+ ]
508
+ },
509
  {
510
  "cell_type": "code",
511
  "source": [],
512
  "metadata": {
513
+ "id": "dQBrOUYrLA76"
514
  },
515
  "execution_count": null,
516
  "outputs": []