Created using Colab
Browse files- notebooks/Advanced_Retriever.ipynb +105 -70
notebooks/Advanced_Retriever.ipynb
CHANGED
@@ -4,7 +4,7 @@
|
|
4 |
"metadata": {
|
5 |
"colab": {
|
6 |
"provenance": [],
|
7 |
-
"authorship_tag": "
|
8 |
"include_colab_link": true
|
9 |
},
|
10 |
"kernelspec": {
|
@@ -37,56 +37,57 @@
|
|
37 |
},
|
38 |
{
|
39 |
"cell_type": "code",
|
40 |
-
"execution_count":
|
41 |
"metadata": {
|
42 |
"colab": {
|
43 |
"base_uri": "https://localhost:8080/"
|
44 |
},
|
45 |
"id": "sbO5PUR3AL-i",
|
46 |
-
"outputId": "
|
47 |
},
|
48 |
"outputs": [
|
49 |
{
|
50 |
"output_type": "stream",
|
51 |
"name": "stdout",
|
52 |
"text": [
|
53 |
-
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m226.7/226.7 kB\u001b[0m \u001b[
|
54 |
-
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m1.8/1.8 MB\u001b[0m \u001b[
|
55 |
-
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m15.4/15.4 MB\u001b[0m \u001b[
|
56 |
-
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m2.0/2.0 MB\u001b[0m \u001b[
|
57 |
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m75.6/75.6 kB\u001b[0m \u001b[31m5.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
58 |
-
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m526.8/526.8 kB\u001b[0m \u001b[
|
59 |
-
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m2.4/2.4 MB\u001b[0m \u001b[
|
60 |
-
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m92.0/92.0 kB\u001b[0m \u001b[
|
61 |
-
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m62.4/62.4 kB\u001b[0m \u001b[31m4.
|
62 |
-
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m41.3/41.3 kB\u001b[0m \u001b[
|
63 |
-
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m6.8/6.8 MB\u001b[0m \u001b[
|
64 |
-
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[
|
65 |
-
"\u001b[2K \u001b[90m
|
66 |
-
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m67.3/67.3 kB\u001b[0m \u001b[
|
67 |
"\u001b[?25h Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
|
68 |
" Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
|
69 |
" Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
|
70 |
-
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m283.7/283.7 kB\u001b[0m \u001b[
|
71 |
-
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m1.6/1.6 MB\u001b[0m \u001b[
|
72 |
-
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m67.6/67.6 kB\u001b[0m \u001b[
|
73 |
-
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m142.5/142.5 kB\u001b[0m \u001b[
|
74 |
-
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m77.9/77.9 kB\u001b[0m \u001b[31m6.
|
75 |
-
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m58.3/58.3 kB\u001b[0m \u001b[
|
76 |
-
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m141.9/141.9 kB\u001b[0m \u001b[
|
77 |
-
"\u001b[2K \u001b[90m
|
78 |
-
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m71.9/71.9 kB\u001b[0m \u001b[
|
79 |
-
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m53.6/53.6 kB\u001b[0m \u001b[
|
80 |
-
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m46.0/46.0 kB\u001b[0m \u001b[
|
81 |
-
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[
|
82 |
-
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[
|
83 |
-
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[
|
84 |
-
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[
|
85 |
-
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[
|
86 |
-
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[
|
87 |
-
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[
|
88 |
-
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[
|
89 |
-
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[
|
|
|
90 |
"\u001b[?25h Building wheel for pypika (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
|
91 |
"\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
|
92 |
"spacy 3.7.4 requires typer<0.10.0,>=0.3.0, but you have typer 0.12.3 which is incompatible.\n",
|
@@ -110,7 +111,7 @@
|
|
110 |
"metadata": {
|
111 |
"id": "39OAU5OlByI0"
|
112 |
},
|
113 |
-
"execution_count":
|
114 |
"outputs": []
|
115 |
},
|
116 |
{
|
@@ -135,7 +136,7 @@
|
|
135 |
"metadata": {
|
136 |
"id": "O2haexSAByDD"
|
137 |
},
|
138 |
-
"execution_count":
|
139 |
"outputs": []
|
140 |
},
|
141 |
{
|
@@ -151,7 +152,7 @@
|
|
151 |
"metadata": {
|
152 |
"id": "OHO6a-zaBxeG"
|
153 |
},
|
154 |
-
"execution_count":
|
155 |
"outputs": []
|
156 |
},
|
157 |
{
|
@@ -191,24 +192,24 @@
|
|
191 |
"base_uri": "https://localhost:8080/"
|
192 |
},
|
193 |
"id": "x4llz2lHN2ij",
|
194 |
-
"outputId": "
|
195 |
},
|
196 |
-
"execution_count":
|
197 |
"outputs": [
|
198 |
{
|
199 |
"output_type": "stream",
|
200 |
"name": "stdout",
|
201 |
"text": [
|
202 |
-
"--2024-
|
203 |
-
"Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.
|
204 |
-
"Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.
|
205 |
"HTTP request sent, awaiting response... 200 OK\n",
|
206 |
"Length: 173646 (170K) [text/plain]\n",
|
207 |
"Saving to: βmini-llama-articles.csvβ\n",
|
208 |
"\n",
|
209 |
"\rmini-llama-articles 0%[ ] 0 --.-KB/s \rmini-llama-articles 100%[===================>] 169.58K --.-KB/s in 0.03s \n",
|
210 |
"\n",
|
211 |
-
"2024-
|
212 |
"\n"
|
213 |
]
|
214 |
}
|
@@ -246,9 +247,9 @@
|
|
246 |
"base_uri": "https://localhost:8080/"
|
247 |
},
|
248 |
"id": "_M-0-D4fN2fc",
|
249 |
-
"outputId": "
|
250 |
},
|
251 |
-
"execution_count":
|
252 |
"outputs": [
|
253 |
{
|
254 |
"output_type": "execute_result",
|
@@ -258,7 +259,7 @@
|
|
258 |
]
|
259 |
},
|
260 |
"metadata": {},
|
261 |
-
"execution_count":
|
262 |
}
|
263 |
]
|
264 |
},
|
@@ -282,7 +283,7 @@
|
|
282 |
"metadata": {
|
283 |
"id": "Ie--Y_3wN2c8"
|
284 |
},
|
285 |
-
"execution_count":
|
286 |
"outputs": []
|
287 |
},
|
288 |
{
|
@@ -311,7 +312,7 @@
|
|
311 |
"metadata": {
|
312 |
"id": "zVBkAg6eN2an"
|
313 |
},
|
314 |
-
"execution_count":
|
315 |
"outputs": []
|
316 |
},
|
317 |
{
|
@@ -322,7 +323,7 @@
|
|
322 |
"metadata": {
|
323 |
"id": "KiDwIXFxN2YK"
|
324 |
},
|
325 |
-
"execution_count":
|
326 |
"outputs": []
|
327 |
},
|
328 |
{
|
@@ -335,19 +336,19 @@
|
|
335 |
"base_uri": "https://localhost:8080/"
|
336 |
},
|
337 |
"id": "f1aZ4wYVN2V1",
|
338 |
-
"outputId": "
|
339 |
},
|
340 |
-
"execution_count":
|
341 |
"outputs": [
|
342 |
{
|
343 |
"output_type": "execute_result",
|
344 |
"data": {
|
345 |
"text/plain": [
|
346 |
-
"TextNode(id_='
|
347 |
]
|
348 |
},
|
349 |
"metadata": {},
|
350 |
-
"execution_count":
|
351 |
}
|
352 |
]
|
353 |
},
|
@@ -364,7 +365,7 @@
|
|
364 |
"metadata": {
|
365 |
"id": "moNbizWrN2Tu"
|
366 |
},
|
367 |
-
"execution_count":
|
368 |
"outputs": []
|
369 |
},
|
370 |
{
|
@@ -378,22 +379,22 @@
|
|
378 |
"base_uri": "https://localhost:8080/"
|
379 |
},
|
380 |
"id": "nz6dQtXzyWqK",
|
381 |
-
"outputId": "
|
382 |
},
|
383 |
-
"execution_count":
|
384 |
"outputs": [
|
385 |
{
|
386 |
"output_type": "stream",
|
387 |
"name": "stdout",
|
388 |
"text": [
|
389 |
" adding: mini-llama-articles/ (stored 0%)\n",
|
390 |
-
" adding: mini-llama-articles/
|
391 |
-
" adding: mini-llama-articles/
|
392 |
-
" adding: mini-llama-articles/
|
393 |
-
" adding: mini-llama-articles/
|
394 |
-
" adding: mini-llama-articles/
|
395 |
-
" adding: mini-llama-articles/
|
396 |
-
" adding: mini-llama-articles/
|
397 |
]
|
398 |
}
|
399 |
]
|
@@ -424,7 +425,7 @@
|
|
424 |
"metadata": {
|
425 |
"id": "wS-V6NhMymx8"
|
426 |
},
|
427 |
-
"execution_count":
|
428 |
"outputs": []
|
429 |
},
|
430 |
{
|
@@ -442,7 +443,7 @@
|
|
442 |
"metadata": {
|
443 |
"id": "fH2myF120oMi"
|
444 |
},
|
445 |
-
"execution_count":
|
446 |
"outputs": []
|
447 |
},
|
448 |
{
|
@@ -458,9 +459,9 @@
|
|
458 |
"base_uri": "https://localhost:8080/"
|
459 |
},
|
460 |
"id": "EqNreFmE0vRb",
|
461 |
-
"outputId": "
|
462 |
},
|
463 |
-
"execution_count":
|
464 |
"outputs": [
|
465 |
{
|
466 |
"output_type": "stream",
|
@@ -471,11 +472,45 @@
|
|
471 |
}
|
472 |
]
|
473 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
474 |
{
|
475 |
"cell_type": "code",
|
476 |
"source": [],
|
477 |
"metadata": {
|
478 |
-
"id": "
|
479 |
},
|
480 |
"execution_count": null,
|
481 |
"outputs": []
|
|
|
4 |
"metadata": {
|
5 |
"colab": {
|
6 |
"provenance": [],
|
7 |
+
"authorship_tag": "ABX9TyMcPZHiexcHnmM/BQzkTZ9Y",
|
8 |
"include_colab_link": true
|
9 |
},
|
10 |
"kernelspec": {
|
|
|
37 |
},
|
38 |
{
|
39 |
"cell_type": "code",
|
40 |
+
"execution_count": 1,
|
41 |
"metadata": {
|
42 |
"colab": {
|
43 |
"base_uri": "https://localhost:8080/"
|
44 |
},
|
45 |
"id": "sbO5PUR3AL-i",
|
46 |
+
"outputId": "84609394-7c68-4a5b-e00a-ae8ac09a1bb9"
|
47 |
},
|
48 |
"outputs": [
|
49 |
{
|
50 |
"output_type": "stream",
|
51 |
"name": "stdout",
|
52 |
"text": [
|
53 |
+
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m226.7/226.7 kB\u001b[0m \u001b[31m2.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
54 |
+
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m1.8/1.8 MB\u001b[0m \u001b[31m11.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
55 |
+
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m15.4/15.4 MB\u001b[0m \u001b[31m17.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
56 |
+
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m2.0/2.0 MB\u001b[0m \u001b[31m45.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
57 |
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m75.6/75.6 kB\u001b[0m \u001b[31m5.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
58 |
+
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m526.8/526.8 kB\u001b[0m \u001b[31m27.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
59 |
+
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m2.4/2.4 MB\u001b[0m \u001b[31m53.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
60 |
+
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m92.0/92.0 kB\u001b[0m \u001b[31m6.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
61 |
+
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m62.4/62.4 kB\u001b[0m \u001b[31m4.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
62 |
+
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m41.3/41.3 kB\u001b[0m \u001b[31m2.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
63 |
+
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m6.8/6.8 MB\u001b[0m \u001b[31m52.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
64 |
+
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m59.9/59.9 kB\u001b[0m \u001b[31m3.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
65 |
+
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m107.0/107.0 kB\u001b[0m \u001b[31m10.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
66 |
+
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m67.3/67.3 kB\u001b[0m \u001b[31m1.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
67 |
"\u001b[?25h Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
|
68 |
" Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
|
69 |
" Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
|
70 |
+
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m283.7/283.7 kB\u001b[0m \u001b[31m13.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
71 |
+
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m1.6/1.6 MB\u001b[0m \u001b[31m40.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
72 |
+
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m67.6/67.6 kB\u001b[0m \u001b[31m4.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
73 |
+
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m142.5/142.5 kB\u001b[0m \u001b[31m1.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
74 |
+
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m77.9/77.9 kB\u001b[0m \u001b[31m6.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
75 |
+
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m58.3/58.3 kB\u001b[0m \u001b[31m4.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
76 |
+
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m141.9/141.9 kB\u001b[0m \u001b[31m7.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
77 |
+
"\u001b[2K \u001b[90mββββββββββββββββββββββοΏ½οΏ½οΏ½βββββββββββββββββ\u001b[0m \u001b[32m290.4/290.4 kB\u001b[0m \u001b[31m13.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
78 |
+
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m71.9/71.9 kB\u001b[0m \u001b[31m6.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
79 |
+
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m53.6/53.6 kB\u001b[0m \u001b[31m4.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
80 |
+
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m46.0/46.0 kB\u001b[0m \u001b[31m4.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
81 |
+
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m52.5/52.5 kB\u001b[0m \u001b[31m4.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
82 |
+
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m130.5/130.5 kB\u001b[0m \u001b[31m13.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
83 |
+
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m341.4/341.4 kB\u001b[0m \u001b[31m23.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
84 |
+
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m3.4/3.4 MB\u001b[0m \u001b[31m64.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
85 |
+
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m1.2/1.2 MB\u001b[0m \u001b[31m52.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
86 |
+
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m130.2/130.2 kB\u001b[0m \u001b[31m11.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
87 |
+
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m49.3/49.3 kB\u001b[0m \u001b[31m4.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
88 |
+
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m307.7/307.7 kB\u001b[0m \u001b[31m20.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
89 |
+
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m47.2/47.2 kB\u001b[0m \u001b[31m3.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
90 |
+
"\u001b[2K \u001b[90mββββββββββββββββββββββββββββββββββββββββ\u001b[0m \u001b[32m86.8/86.8 kB\u001b[0m \u001b[31m5.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
91 |
"\u001b[?25h Building wheel for pypika (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
|
92 |
"\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
|
93 |
"spacy 3.7.4 requires typer<0.10.0,>=0.3.0, but you have typer 0.12.3 which is incompatible.\n",
|
|
|
111 |
"metadata": {
|
112 |
"id": "39OAU5OlByI0"
|
113 |
},
|
114 |
+
"execution_count": 2,
|
115 |
"outputs": []
|
116 |
},
|
117 |
{
|
|
|
136 |
"metadata": {
|
137 |
"id": "O2haexSAByDD"
|
138 |
},
|
139 |
+
"execution_count": 3,
|
140 |
"outputs": []
|
141 |
},
|
142 |
{
|
|
|
152 |
"metadata": {
|
153 |
"id": "OHO6a-zaBxeG"
|
154 |
},
|
155 |
+
"execution_count": 4,
|
156 |
"outputs": []
|
157 |
},
|
158 |
{
|
|
|
192 |
"base_uri": "https://localhost:8080/"
|
193 |
},
|
194 |
"id": "x4llz2lHN2ij",
|
195 |
+
"outputId": "d0cd17b8-eca9-45f0-ae14-846ab0d624e0"
|
196 |
},
|
197 |
+
"execution_count": 5,
|
198 |
"outputs": [
|
199 |
{
|
200 |
"output_type": "stream",
|
201 |
"name": "stdout",
|
202 |
"text": [
|
203 |
+
"--2024-06-03 22:16:45-- https://raw.githubusercontent.com/AlaFalaki/tutorial_notebooks/main/data/mini-llama-articles.csv\n",
|
204 |
+
"Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n",
|
205 |
+
"Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n",
|
206 |
"HTTP request sent, awaiting response... 200 OK\n",
|
207 |
"Length: 173646 (170K) [text/plain]\n",
|
208 |
"Saving to: βmini-llama-articles.csvβ\n",
|
209 |
"\n",
|
210 |
"\rmini-llama-articles 0%[ ] 0 --.-KB/s \rmini-llama-articles 100%[===================>] 169.58K --.-KB/s in 0.03s \n",
|
211 |
"\n",
|
212 |
+
"2024-06-03 22:16:45 (5.09 MB/s) - βmini-llama-articles.csvβ saved [173646/173646]\n",
|
213 |
"\n"
|
214 |
]
|
215 |
}
|
|
|
247 |
"base_uri": "https://localhost:8080/"
|
248 |
},
|
249 |
"id": "_M-0-D4fN2fc",
|
250 |
+
"outputId": "1bfc497f-0653-4231-86c9-cfeff34e2182"
|
251 |
},
|
252 |
+
"execution_count": 6,
|
253 |
"outputs": [
|
254 |
{
|
255 |
"output_type": "execute_result",
|
|
|
259 |
]
|
260 |
},
|
261 |
"metadata": {},
|
262 |
+
"execution_count": 6
|
263 |
}
|
264 |
]
|
265 |
},
|
|
|
283 |
"metadata": {
|
284 |
"id": "Ie--Y_3wN2c8"
|
285 |
},
|
286 |
+
"execution_count": 7,
|
287 |
"outputs": []
|
288 |
},
|
289 |
{
|
|
|
312 |
"metadata": {
|
313 |
"id": "zVBkAg6eN2an"
|
314 |
},
|
315 |
+
"execution_count": 8,
|
316 |
"outputs": []
|
317 |
},
|
318 |
{
|
|
|
323 |
"metadata": {
|
324 |
"id": "KiDwIXFxN2YK"
|
325 |
},
|
326 |
+
"execution_count": 9,
|
327 |
"outputs": []
|
328 |
},
|
329 |
{
|
|
|
336 |
"base_uri": "https://localhost:8080/"
|
337 |
},
|
338 |
"id": "f1aZ4wYVN2V1",
|
339 |
+
"outputId": "e3ef377a-a195-44e3-a67a-554fcff29e67"
|
340 |
},
|
341 |
+
"execution_count": 10,
|
342 |
"outputs": [
|
343 |
{
|
344 |
"output_type": "execute_result",
|
345 |
"data": {
|
346 |
"text/plain": [
|
347 |
+
"TextNode(id_='20a4754c-3ab9-4d64-9aa3-e1379c37074e', embedding=None, metadata={'window': \"LLM Variants and Meta's Open Source Before shedding light on four major trends, I'd share the latest Meta's Llama 2 and Code Llama. Meta's Llama 2 represents a sophisticated evolution in LLMs. This suite spans models pretrained and fine-tuned across a parameter spectrum of 7 billion to 70 billion. A specialized derivative, Llama 2-Chat, has been engineered explicitly for dialogue-centric applications. \", 'original_text': \"LLM Variants and Meta's Open Source Before shedding light on four major trends, I'd share the latest Meta's Llama 2 and Code Llama. \", 'title': \"Beyond GPT-4: What's New?\", 'url': 'https://pub.towardsai.net/beyond-gpt-4-whats-new-cbd61a448eb9#dda8', 'source_name': 'towards_ai'}, excluded_embed_metadata_keys=['window', 'original_text'], excluded_llm_metadata_keys=['window', 'original_text'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='1773f54a-0742-41dd-a645-ba7c07ff8f75', node_type=<ObjectType.DOCUMENT: '4'>, metadata={'title': \"Beyond GPT-4: What's New?\", 'url': 'https://pub.towardsai.net/beyond-gpt-4-whats-new-cbd61a448eb9#dda8', 'source_name': 'towards_ai'}, hash='3b095b0e25cdf965d950cdbd7feb8024030e7645998c1a33dc4427affca624ab'), <NodeRelationship.NEXT: '3'>: RelatedNodeInfo(node_id='1ac96425-5144-4897-9f7b-182156d3470c', node_type=<ObjectType.TEXT: '1'>, metadata={'window': \"LLM Variants and Meta's Open Source Before shedding light on four major trends, I'd share the latest Meta's Llama 2 and Code Llama. Meta's Llama 2 represents a sophisticated evolution in LLMs. This suite spans models pretrained and fine-tuned across a parameter spectrum of 7 billion to 70 billion. A specialized derivative, Llama 2-Chat, has been engineered explicitly for dialogue-centric applications. Benchmarking revealed Llama 2's superior performance over most extant open-source chat models. \", 'original_text': \"Meta's Llama 2 represents a sophisticated evolution in LLMs. \"}, hash='e06ffff4f5927a7e2252b2785825ad4b0dafdeb09355258be50a13bc170d7a5b')}, text=\"LLM Variants and Meta's Open Source Before shedding light on four major trends, I'd share the latest Meta's Llama 2 and Code Llama. \", start_char_idx=0, end_char_idx=132, text_template='{metadata_str}\\n\\n{content}', metadata_template='{key}: {value}', metadata_seperator='\\n')"
|
348 |
]
|
349 |
},
|
350 |
"metadata": {},
|
351 |
+
"execution_count": 10
|
352 |
}
|
353 |
]
|
354 |
},
|
|
|
365 |
"metadata": {
|
366 |
"id": "moNbizWrN2Tu"
|
367 |
},
|
368 |
+
"execution_count": 11,
|
369 |
"outputs": []
|
370 |
},
|
371 |
{
|
|
|
379 |
"base_uri": "https://localhost:8080/"
|
380 |
},
|
381 |
"id": "nz6dQtXzyWqK",
|
382 |
+
"outputId": "b636525e-47cc-4f57-cfa3-70b9cb17f7e0"
|
383 |
},
|
384 |
+
"execution_count": 12,
|
385 |
"outputs": [
|
386 |
{
|
387 |
"output_type": "stream",
|
388 |
"name": "stdout",
|
389 |
"text": [
|
390 |
" adding: mini-llama-articles/ (stored 0%)\n",
|
391 |
+
" adding: mini-llama-articles/f4ee5232-8d1e-4e11-899e-02eafe4527df/ (stored 0%)\n",
|
392 |
+
" adding: mini-llama-articles/f4ee5232-8d1e-4e11-899e-02eafe4527df/index_metadata.pickle (deflated 38%)\n",
|
393 |
+
" adding: mini-llama-articles/f4ee5232-8d1e-4e11-899e-02eafe4527df/link_lists.bin (deflated 88%)\n",
|
394 |
+
" adding: mini-llama-articles/f4ee5232-8d1e-4e11-899e-02eafe4527df/data_level0.bin (deflated 18%)\n",
|
395 |
+
" adding: mini-llama-articles/f4ee5232-8d1e-4e11-899e-02eafe4527df/length.bin (deflated 43%)\n",
|
396 |
+
" adding: mini-llama-articles/f4ee5232-8d1e-4e11-899e-02eafe4527df/header.bin (deflated 56%)\n",
|
397 |
+
" adding: mini-llama-articles/chroma.sqlite3 (deflated 69%)\n"
|
398 |
]
|
399 |
}
|
400 |
]
|
|
|
425 |
"metadata": {
|
426 |
"id": "wS-V6NhMymx8"
|
427 |
},
|
428 |
+
"execution_count": 13,
|
429 |
"outputs": []
|
430 |
},
|
431 |
{
|
|
|
443 |
"metadata": {
|
444 |
"id": "fH2myF120oMi"
|
445 |
},
|
446 |
+
"execution_count": 14,
|
447 |
"outputs": []
|
448 |
},
|
449 |
{
|
|
|
459 |
"base_uri": "https://localhost:8080/"
|
460 |
},
|
461 |
"id": "EqNreFmE0vRb",
|
462 |
+
"outputId": "bb5204c5-3ab8-460b-9702-5cf2f2b32f73"
|
463 |
},
|
464 |
+
"execution_count": 15,
|
465 |
"outputs": [
|
466 |
{
|
467 |
"output_type": "stream",
|
|
|
472 |
}
|
473 |
]
|
474 |
},
|
475 |
+
{
|
476 |
+
"cell_type": "code",
|
477 |
+
"source": [
|
478 |
+
"for idx, item in enumerate( response.source_nodes ):\n",
|
479 |
+
" print(\"Source \", idx+1)\n",
|
480 |
+
" print(\"Original Text:\", item.node.metadata[\"original_text\"])\n",
|
481 |
+
" print(\"Window:\", item.node.metadata[\"window\"])\n",
|
482 |
+
" print(\"----\")"
|
483 |
+
],
|
484 |
+
"metadata": {
|
485 |
+
"colab": {
|
486 |
+
"base_uri": "https://localhost:8080/"
|
487 |
+
},
|
488 |
+
"id": "whdPLhVaMfOS",
|
489 |
+
"outputId": "7b7ea07d-d93c-41a0-bd7b-6a9e8d8b18f7"
|
490 |
+
},
|
491 |
+
"execution_count": 22,
|
492 |
+
"outputs": [
|
493 |
+
{
|
494 |
+
"output_type": "stream",
|
495 |
+
"name": "stdout",
|
496 |
+
"text": [
|
497 |
+
"Source 1\n",
|
498 |
+
"Original Text: Llama 2 Model Flavors Llama 2 is available in four different model sizes: 7 billion, 13 billion, 34 billion, and 70 billion parameters. \n",
|
499 |
+
"Window: Companies with over 700 million active daily users cannot use Llama 2. Additionally, its output cannot be used to improve other language models. II. Llama 2 Model Flavors Llama 2 is available in four different model sizes: 7 billion, 13 billion, 34 billion, and 70 billion parameters. While 7B, 13B, and 70B have already been released, the 34B model is still awaited. The pretrained variant, trained on a whopping 2 trillion tokens, boasts a context window of 4096 tokens, twice the size of its predecessor Llama 1. Meta also released a Llama 2 fine-tuned model for chat applications that was trained on over 1 million human annotations. \n",
|
500 |
+
"----\n",
|
501 |
+
"Source 2\n",
|
502 |
+
"Original Text: The 70B parameter model outperforms all other open-source models, while the 7B and 34B models outshine Falcon in all categories and MPT in all categories except coding. \n",
|
503 |
+
"Window: The 34B parameter model has reported higher safety violations than other variants, possibly contributing to the delay in its release. IV. Helpfulness Comparison: Llama 2 Outperforms Competitors Llama 2 emerges as a strong contender in the open-source language model arena, outperforming its competitors in most categories. The 70B parameter model outperforms all other open-source models, while the 7B and 34B models outshine Falcon in all categories and MPT in all categories except coding. Despite being smaller, Llam a2's performance rivals that of Chat GPT 3.5, a significantly larger closed-source model. While GPT 4 and PalM-2-L, with their larger size, outperform Llama 2, this is expected due to their capacity for handling complex language tasks. Llama 2's impressive ability to compete with larger models highlights its efficiency and potential in the market. \n",
|
504 |
+
"----\n"
|
505 |
+
]
|
506 |
+
}
|
507 |
+
]
|
508 |
+
},
|
509 |
{
|
510 |
"cell_type": "code",
|
511 |
"source": [],
|
512 |
"metadata": {
|
513 |
+
"id": "dQBrOUYrLA76"
|
514 |
},
|
515 |
"execution_count": null,
|
516 |
"outputs": []
|