AlaFalaki commited on
Commit
5cf14c1
β€’
1 Parent(s): 40ddf7d

Created using Colab

Browse files
Files changed (1) hide show
  1. notebooks/Metadata_Filtering.ipynb +134 -105
notebooks/Metadata_Filtering.ipynb CHANGED
@@ -4,7 +4,7 @@
4
  "metadata": {
5
  "colab": {
6
  "provenance": [],
7
- "authorship_tag": "ABX9TyPZ72CXWSfdpg1zYB6i8Csa",
8
  "include_colab_link": true
9
  },
10
  "kernelspec": {
@@ -16,7 +16,7 @@
16
  },
17
  "widgets": {
18
  "application/vnd.jupyter.widget-state+json": {
19
- "1350b1f2c9df4113bde94c59ad0db010": {
20
  "model_module": "@jupyter-widgets/controls",
21
  "model_name": "HBoxModel",
22
  "model_module_version": "1.5.0",
@@ -31,14 +31,14 @@
31
  "_view_name": "HBoxView",
32
  "box_style": "",
33
  "children": [
34
- "IPY_MODEL_7f3d8a3d4f614b4d99b048c021c1615c",
35
- "IPY_MODEL_bf68ea9156ea438da61f8ca1fc529f7d",
36
- "IPY_MODEL_1fd460ecbf644d049ed1faf88322649f"
37
  ],
38
- "layout": "IPY_MODEL_36f78ffa8f9847149c5cab414fac1226"
39
  }
40
  },
41
- "7f3d8a3d4f614b4d99b048c021c1615c": {
42
  "model_module": "@jupyter-widgets/controls",
43
  "model_name": "HTMLModel",
44
  "model_module_version": "1.5.0",
@@ -53,13 +53,13 @@
53
  "_view_name": "HTMLView",
54
  "description": "",
55
  "description_tooltip": null,
56
- "layout": "IPY_MODEL_0862d91ad42b48e4b0de4997aa3a1dd8",
57
  "placeholder": "​",
58
- "style": "IPY_MODEL_277704b916aa40759b6db36688d1d151",
59
  "value": "Parsing nodes: 100%"
60
  }
61
  },
62
- "bf68ea9156ea438da61f8ca1fc529f7d": {
63
  "model_module": "@jupyter-widgets/controls",
64
  "model_name": "FloatProgressModel",
65
  "model_module_version": "1.5.0",
@@ -75,15 +75,15 @@
75
  "bar_style": "success",
76
  "description": "",
77
  "description_tooltip": null,
78
- "layout": "IPY_MODEL_2f3166628904417f97438e7c852f3b82",
79
  "max": 14,
80
  "min": 0,
81
  "orientation": "horizontal",
82
- "style": "IPY_MODEL_0eec40171bba48268b176b55e7a85fff",
83
  "value": 14
84
  }
85
  },
86
- "1fd460ecbf644d049ed1faf88322649f": {
87
  "model_module": "@jupyter-widgets/controls",
88
  "model_name": "HTMLModel",
89
  "model_module_version": "1.5.0",
@@ -98,13 +98,13 @@
98
  "_view_name": "HTMLView",
99
  "description": "",
100
  "description_tooltip": null,
101
- "layout": "IPY_MODEL_f51b2e545d794ccead531822085f1a2d",
102
  "placeholder": "​",
103
- "style": "IPY_MODEL_c9840d43284a419face648ce394a8e80",
104
- "value": " 14/14 [00:00<00:00, 30.79it/s]"
105
  }
106
  },
107
- "36f78ffa8f9847149c5cab414fac1226": {
108
  "model_module": "@jupyter-widgets/base",
109
  "model_name": "LayoutModel",
110
  "model_module_version": "1.2.0",
@@ -156,7 +156,7 @@
156
  "width": null
157
  }
158
  },
159
- "0862d91ad42b48e4b0de4997aa3a1dd8": {
160
  "model_module": "@jupyter-widgets/base",
161
  "model_name": "LayoutModel",
162
  "model_module_version": "1.2.0",
@@ -208,7 +208,7 @@
208
  "width": null
209
  }
210
  },
211
- "277704b916aa40759b6db36688d1d151": {
212
  "model_module": "@jupyter-widgets/controls",
213
  "model_name": "DescriptionStyleModel",
214
  "model_module_version": "1.5.0",
@@ -223,7 +223,7 @@
223
  "description_width": ""
224
  }
225
  },
226
- "2f3166628904417f97438e7c852f3b82": {
227
  "model_module": "@jupyter-widgets/base",
228
  "model_name": "LayoutModel",
229
  "model_module_version": "1.2.0",
@@ -275,7 +275,7 @@
275
  "width": null
276
  }
277
  },
278
- "0eec40171bba48268b176b55e7a85fff": {
279
  "model_module": "@jupyter-widgets/controls",
280
  "model_name": "ProgressStyleModel",
281
  "model_module_version": "1.5.0",
@@ -291,7 +291,7 @@
291
  "description_width": ""
292
  }
293
  },
294
- "f51b2e545d794ccead531822085f1a2d": {
295
  "model_module": "@jupyter-widgets/base",
296
  "model_name": "LayoutModel",
297
  "model_module_version": "1.2.0",
@@ -343,7 +343,7 @@
343
  "width": null
344
  }
345
  },
346
- "c9840d43284a419face648ce394a8e80": {
347
  "model_module": "@jupyter-widgets/controls",
348
  "model_name": "DescriptionStyleModel",
349
  "model_module_version": "1.5.0",
@@ -358,7 +358,7 @@
358
  "description_width": ""
359
  }
360
  },
361
- "3039a33537694d60a5321035fd0bbcc5": {
362
  "model_module": "@jupyter-widgets/controls",
363
  "model_name": "HBoxModel",
364
  "model_module_version": "1.5.0",
@@ -373,14 +373,14 @@
373
  "_view_name": "HBoxView",
374
  "box_style": "",
375
  "children": [
376
- "IPY_MODEL_082a58bc5d1c4b9e902384a7aa8357c7",
377
- "IPY_MODEL_e78bfb57ba894a1790129b747a280176",
378
- "IPY_MODEL_6a7ece36105542e59a308b16c13c1a47"
379
  ],
380
- "layout": "IPY_MODEL_57a79b4c6aca4dd7b861d736b7181d5f"
381
  }
382
  },
383
- "082a58bc5d1c4b9e902384a7aa8357c7": {
384
  "model_module": "@jupyter-widgets/controls",
385
  "model_name": "HTMLModel",
386
  "model_module_version": "1.5.0",
@@ -395,13 +395,13 @@
395
  "_view_name": "HTMLView",
396
  "description": "",
397
  "description_tooltip": null,
398
- "layout": "IPY_MODEL_f11c9bb9afb245cc8141f1fd606ba659",
399
  "placeholder": "​",
400
- "style": "IPY_MODEL_508c726834b54f1b8605d2319d0fdcc0",
401
  "value": "Generating embeddings: 100%"
402
  }
403
  },
404
- "e78bfb57ba894a1790129b747a280176": {
405
  "model_module": "@jupyter-widgets/controls",
406
  "model_name": "FloatProgressModel",
407
  "model_module_version": "1.5.0",
@@ -417,15 +417,15 @@
417
  "bar_style": "success",
418
  "description": "",
419
  "description_tooltip": null,
420
- "layout": "IPY_MODEL_9b16832c651b4be8b53237c8de248661",
421
  "max": 108,
422
  "min": 0,
423
  "orientation": "horizontal",
424
- "style": "IPY_MODEL_0db958902d654bcc8cd5dd922335a017",
425
  "value": 108
426
  }
427
  },
428
- "6a7ece36105542e59a308b16c13c1a47": {
429
  "model_module": "@jupyter-widgets/controls",
430
  "model_name": "HTMLModel",
431
  "model_module_version": "1.5.0",
@@ -440,13 +440,13 @@
440
  "_view_name": "HTMLView",
441
  "description": "",
442
  "description_tooltip": null,
443
- "layout": "IPY_MODEL_afe9cbdfd8814f6281d3b3757c528110",
444
  "placeholder": "​",
445
- "style": "IPY_MODEL_9581c6ec8da8419e8d95a00932060fed",
446
- "value": " 108/108 [00:01<00:00, 96.77it/s]"
447
  }
448
  },
449
- "57a79b4c6aca4dd7b861d736b7181d5f": {
450
  "model_module": "@jupyter-widgets/base",
451
  "model_name": "LayoutModel",
452
  "model_module_version": "1.2.0",
@@ -498,7 +498,7 @@
498
  "width": null
499
  }
500
  },
501
- "f11c9bb9afb245cc8141f1fd606ba659": {
502
  "model_module": "@jupyter-widgets/base",
503
  "model_name": "LayoutModel",
504
  "model_module_version": "1.2.0",
@@ -550,7 +550,7 @@
550
  "width": null
551
  }
552
  },
553
- "508c726834b54f1b8605d2319d0fdcc0": {
554
  "model_module": "@jupyter-widgets/controls",
555
  "model_name": "DescriptionStyleModel",
556
  "model_module_version": "1.5.0",
@@ -565,7 +565,7 @@
565
  "description_width": ""
566
  }
567
  },
568
- "9b16832c651b4be8b53237c8de248661": {
569
  "model_module": "@jupyter-widgets/base",
570
  "model_name": "LayoutModel",
571
  "model_module_version": "1.2.0",
@@ -617,7 +617,7 @@
617
  "width": null
618
  }
619
  },
620
- "0db958902d654bcc8cd5dd922335a017": {
621
  "model_module": "@jupyter-widgets/controls",
622
  "model_name": "ProgressStyleModel",
623
  "model_module_version": "1.5.0",
@@ -633,7 +633,7 @@
633
  "description_width": ""
634
  }
635
  },
636
- "afe9cbdfd8814f6281d3b3757c528110": {
637
  "model_module": "@jupyter-widgets/base",
638
  "model_name": "LayoutModel",
639
  "model_module_version": "1.2.0",
@@ -685,7 +685,7 @@
685
  "width": null
686
  }
687
  },
688
- "9581c6ec8da8419e8d95a00932060fed": {
689
  "model_module": "@jupyter-widgets/controls",
690
  "model_name": "DescriptionStyleModel",
691
  "model_module_version": "1.5.0",
@@ -731,28 +731,28 @@
731
  "colab": {
732
  "base_uri": "https://localhost:8080/"
733
  },
734
- "outputId": "08f002fb-9243-473e-db2e-346ec1138d2c"
735
  },
736
  "outputs": [
737
  {
738
  "output_type": "stream",
739
  "name": "stdout",
740
  "text": [
741
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m226.7/226.7 kB\u001b[0m \u001b[31m3.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
742
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.8/1.8 MB\u001b[0m \u001b[31m10.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
743
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m15.4/15.4 MB\u001b[0m \u001b[31m34.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
744
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.0/2.0 MB\u001b[0m \u001b[31m44.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
745
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m75.6/75.6 kB\u001b[0m \u001b[31m5.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
746
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m254.1/254.1 kB\u001b[0m \u001b[31m17.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
747
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m77.9/77.9 kB\u001b[0m \u001b[31m3.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
748
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.3/58.3 kB\u001b[0m \u001b[31m5.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
749
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m130.8/130.8 kB\u001b[0m \u001b[31m13.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
750
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m141.9/141.9 kB\u001b[0m \u001b[31m13.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
751
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m290.4/290.4 kB\u001b[0m \u001b[31m17.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
752
  "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.3/2.3 MB\u001b[0m \u001b[31m17.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
753
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m309.3/309.3 kB\u001b[0m \u001b[31m13.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
754
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m57.5/57.5 kB\u001b[0m \u001b[31m5.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
755
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.2/49.2 kB\u001b[0m \u001b[31m3.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
756
  "\u001b[?25h\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
757
  "cudf-cu12 24.4.1 requires protobuf<5,>=3.20, but you have protobuf 5.27.2 which is incompatible.\n",
758
  "google-ai-generativelanguage 0.6.4 requires protobuf!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.19.5, but you have protobuf 5.27.2 which is incompatible.\n",
@@ -789,7 +789,7 @@
789
  "metadata": {
790
  "id": "riuXwpSPcvWC"
791
  },
792
- "execution_count": null,
793
  "outputs": []
794
  },
795
  {
@@ -802,7 +802,7 @@
802
  "metadata": {
803
  "id": "jIEeZzqLbz0J"
804
  },
805
- "execution_count": null,
806
  "outputs": []
807
  },
808
  {
@@ -824,7 +824,7 @@
824
  "metadata": {
825
  "id": "9oGT6crooSSj"
826
  },
827
- "execution_count": null,
828
  "outputs": []
829
  },
830
  {
@@ -849,7 +849,7 @@
849
  "metadata": {
850
  "id": "aNY6mrk6BF7V"
851
  },
852
- "execution_count": null,
853
  "outputs": []
854
  },
855
  {
@@ -862,7 +862,7 @@
862
  "metadata": {
863
  "id": "Z109ur9OC7U_"
864
  },
865
- "execution_count": null,
866
  "outputs": []
867
  },
868
  {
@@ -902,24 +902,24 @@
902
  "base_uri": "https://localhost:8080/"
903
  },
904
  "id": "wl_pbPvMlv1h",
905
- "outputId": "70f543d9-5adb-49ce-cfea-2697f2f67582"
906
  },
907
- "execution_count": null,
908
  "outputs": [
909
  {
910
  "output_type": "stream",
911
  "name": "stdout",
912
  "text": [
913
- "--2024-07-03 19:38:17-- https://raw.githubusercontent.com/AlaFalaki/tutorial_notebooks/main/data/mini-llama-articles.csv\n",
914
- "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.111.133, 185.199.108.133, 185.199.110.133, ...\n",
915
- "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.111.133|:443... connected.\n",
916
  "HTTP request sent, awaiting response... 200 OK\n",
917
  "Length: 173646 (170K) [text/plain]\n",
918
  "Saving to: β€˜mini-llama-articles.csv’\n",
919
  "\n",
920
- "\rmini-llama-articles 0%[ ] 0 --.-KB/s \rmini-llama-articles 100%[===================>] 169.58K --.-KB/s in 0.02s \n",
921
  "\n",
922
- "2024-07-03 19:38:17 (6.99 MB/s) - β€˜mini-llama-articles.csv’ saved [173646/173646]\n",
923
  "\n"
924
  ]
925
  }
@@ -957,9 +957,9 @@
957
  "colab": {
958
  "base_uri": "https://localhost:8080/"
959
  },
960
- "outputId": "e5942cc4-db86-435d-aa1e-e50872c54978"
961
  },
962
- "execution_count": null,
963
  "outputs": [
964
  {
965
  "output_type": "execute_result",
@@ -993,7 +993,7 @@
993
  "metadata": {
994
  "id": "YizvmXPejkJE"
995
  },
996
- "execution_count": null,
997
  "outputs": []
998
  },
999
  {
@@ -1019,7 +1019,7 @@
1019
  "metadata": {
1020
  "id": "9z3t70DGWsjO"
1021
  },
1022
- "execution_count": null,
1023
  "outputs": []
1024
  },
1025
  {
@@ -1050,34 +1050,34 @@
1050
  "base_uri": "https://localhost:8080/",
1051
  "height": 116,
1052
  "referenced_widgets": [
1053
- "1350b1f2c9df4113bde94c59ad0db010",
1054
- "7f3d8a3d4f614b4d99b048c021c1615c",
1055
- "bf68ea9156ea438da61f8ca1fc529f7d",
1056
- "1fd460ecbf644d049ed1faf88322649f",
1057
- "36f78ffa8f9847149c5cab414fac1226",
1058
- "0862d91ad42b48e4b0de4997aa3a1dd8",
1059
- "277704b916aa40759b6db36688d1d151",
1060
- "2f3166628904417f97438e7c852f3b82",
1061
- "0eec40171bba48268b176b55e7a85fff",
1062
- "f51b2e545d794ccead531822085f1a2d",
1063
- "c9840d43284a419face648ce394a8e80",
1064
- "3039a33537694d60a5321035fd0bbcc5",
1065
- "082a58bc5d1c4b9e902384a7aa8357c7",
1066
- "e78bfb57ba894a1790129b747a280176",
1067
- "6a7ece36105542e59a308b16c13c1a47",
1068
- "57a79b4c6aca4dd7b861d736b7181d5f",
1069
- "f11c9bb9afb245cc8141f1fd606ba659",
1070
- "508c726834b54f1b8605d2319d0fdcc0",
1071
- "9b16832c651b4be8b53237c8de248661",
1072
- "0db958902d654bcc8cd5dd922335a017",
1073
- "afe9cbdfd8814f6281d3b3757c528110",
1074
- "9581c6ec8da8419e8d95a00932060fed"
1075
  ]
1076
  },
1077
  "id": "P9LDJ7o-Wsc-",
1078
- "outputId": "8fcb2a02-6322-40d8-9852-2b2814b6b9c6"
1079
  },
1080
- "execution_count": null,
1081
  "outputs": [
1082
  {
1083
  "output_type": "display_data",
@@ -1088,7 +1088,7 @@
1088
  "application/vnd.jupyter.widget-view+json": {
1089
  "version_major": 2,
1090
  "version_minor": 0,
1091
- "model_id": "1350b1f2c9df4113bde94c59ad0db010"
1092
  }
1093
  },
1094
  "metadata": {}
@@ -1097,7 +1097,7 @@
1097
  "output_type": "stream",
1098
  "name": "stderr",
1099
  "text": [
1100
- "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 108/108 [00:35<00:00, 3.07it/s]\n"
1101
  ]
1102
  },
1103
  {
@@ -1109,7 +1109,7 @@
1109
  "application/vnd.jupyter.widget-view+json": {
1110
  "version_major": 2,
1111
  "version_minor": 0,
1112
- "model_id": "3039a33537694d60a5321035fd0bbcc5"
1113
  }
1114
  },
1115
  "metadata": {}
@@ -1133,9 +1133,9 @@
1133
  "base_uri": "https://localhost:8080/"
1134
  },
1135
  "id": "mPGa85hM2P3P",
1136
- "outputId": "6c176992-8405-4702-a1e5-c27c975b29c2"
1137
  },
1138
- "execution_count": null,
1139
  "outputs": [
1140
  {
1141
  "output_type": "execute_result",
@@ -1149,6 +1149,35 @@
1149
  }
1150
  ]
1151
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1152
  {
1153
  "cell_type": "code",
1154
  "source": [
 
4
  "metadata": {
5
  "colab": {
6
  "provenance": [],
7
+ "authorship_tag": "ABX9TyPf/kwiH4yYyYbpjHG8mnc3",
8
  "include_colab_link": true
9
  },
10
  "kernelspec": {
 
16
  },
17
  "widgets": {
18
  "application/vnd.jupyter.widget-state+json": {
19
+ "d3a4fe4269ed45dcab07c9068d8d4ea8": {
20
  "model_module": "@jupyter-widgets/controls",
21
  "model_name": "HBoxModel",
22
  "model_module_version": "1.5.0",
 
31
  "_view_name": "HBoxView",
32
  "box_style": "",
33
  "children": [
34
+ "IPY_MODEL_766140d81c3246849fabd1c7f38a25cf",
35
+ "IPY_MODEL_c0ec83c50bef4785a803505390f99de5",
36
+ "IPY_MODEL_d1af5e772b954edfa6e4da4df8069e07"
37
  ],
38
+ "layout": "IPY_MODEL_e07f5e5042cb4530b2b6bc4ba3c7baa5"
39
  }
40
  },
41
+ "766140d81c3246849fabd1c7f38a25cf": {
42
  "model_module": "@jupyter-widgets/controls",
43
  "model_name": "HTMLModel",
44
  "model_module_version": "1.5.0",
 
53
  "_view_name": "HTMLView",
54
  "description": "",
55
  "description_tooltip": null,
56
+ "layout": "IPY_MODEL_218d93e956a24e399b9c587fd4aaaaae",
57
  "placeholder": "​",
58
+ "style": "IPY_MODEL_28cfd4c07624419099097b0624eddb20",
59
  "value": "Parsing nodes: 100%"
60
  }
61
  },
62
+ "c0ec83c50bef4785a803505390f99de5": {
63
  "model_module": "@jupyter-widgets/controls",
64
  "model_name": "FloatProgressModel",
65
  "model_module_version": "1.5.0",
 
75
  "bar_style": "success",
76
  "description": "",
77
  "description_tooltip": null,
78
+ "layout": "IPY_MODEL_22601ab55f4845269c4392bdfcd6c779",
79
  "max": 14,
80
  "min": 0,
81
  "orientation": "horizontal",
82
+ "style": "IPY_MODEL_586d67e1afae42ca9c65a17ae13573d8",
83
  "value": 14
84
  }
85
  },
86
+ "d1af5e772b954edfa6e4da4df8069e07": {
87
  "model_module": "@jupyter-widgets/controls",
88
  "model_name": "HTMLModel",
89
  "model_module_version": "1.5.0",
 
98
  "_view_name": "HTMLView",
99
  "description": "",
100
  "description_tooltip": null,
101
+ "layout": "IPY_MODEL_6a459a1e65494dcba3a86dd30071909a",
102
  "placeholder": "​",
103
+ "style": "IPY_MODEL_e74876ff3dd049f8bc2e0fbe71dcd255",
104
+ "value": " 14/14 [00:01&lt;00:00, 13.21it/s]"
105
  }
106
  },
107
+ "e07f5e5042cb4530b2b6bc4ba3c7baa5": {
108
  "model_module": "@jupyter-widgets/base",
109
  "model_name": "LayoutModel",
110
  "model_module_version": "1.2.0",
 
156
  "width": null
157
  }
158
  },
159
+ "218d93e956a24e399b9c587fd4aaaaae": {
160
  "model_module": "@jupyter-widgets/base",
161
  "model_name": "LayoutModel",
162
  "model_module_version": "1.2.0",
 
208
  "width": null
209
  }
210
  },
211
+ "28cfd4c07624419099097b0624eddb20": {
212
  "model_module": "@jupyter-widgets/controls",
213
  "model_name": "DescriptionStyleModel",
214
  "model_module_version": "1.5.0",
 
223
  "description_width": ""
224
  }
225
  },
226
+ "22601ab55f4845269c4392bdfcd6c779": {
227
  "model_module": "@jupyter-widgets/base",
228
  "model_name": "LayoutModel",
229
  "model_module_version": "1.2.0",
 
275
  "width": null
276
  }
277
  },
278
+ "586d67e1afae42ca9c65a17ae13573d8": {
279
  "model_module": "@jupyter-widgets/controls",
280
  "model_name": "ProgressStyleModel",
281
  "model_module_version": "1.5.0",
 
291
  "description_width": ""
292
  }
293
  },
294
+ "6a459a1e65494dcba3a86dd30071909a": {
295
  "model_module": "@jupyter-widgets/base",
296
  "model_name": "LayoutModel",
297
  "model_module_version": "1.2.0",
 
343
  "width": null
344
  }
345
  },
346
+ "e74876ff3dd049f8bc2e0fbe71dcd255": {
347
  "model_module": "@jupyter-widgets/controls",
348
  "model_name": "DescriptionStyleModel",
349
  "model_module_version": "1.5.0",
 
358
  "description_width": ""
359
  }
360
  },
361
+ "4f34f259d011412f8caab28a8bacfab5": {
362
  "model_module": "@jupyter-widgets/controls",
363
  "model_name": "HBoxModel",
364
  "model_module_version": "1.5.0",
 
373
  "_view_name": "HBoxView",
374
  "box_style": "",
375
  "children": [
376
+ "IPY_MODEL_99530a50cd02411dacdcdc64bfa780cc",
377
+ "IPY_MODEL_1e07dbafe07c4801913827784f5ce01e",
378
+ "IPY_MODEL_33744ece0c6c4fb18ca4918514e51e71"
379
  ],
380
+ "layout": "IPY_MODEL_be4df739b8fc49e2b65bb9cf68f17422"
381
  }
382
  },
383
+ "99530a50cd02411dacdcdc64bfa780cc": {
384
  "model_module": "@jupyter-widgets/controls",
385
  "model_name": "HTMLModel",
386
  "model_module_version": "1.5.0",
 
395
  "_view_name": "HTMLView",
396
  "description": "",
397
  "description_tooltip": null,
398
+ "layout": "IPY_MODEL_215e22dbf804407281752569e78672c4",
399
  "placeholder": "​",
400
+ "style": "IPY_MODEL_355241154533492daa560f1f9269693e",
401
  "value": "Generating embeddings: 100%"
402
  }
403
  },
404
+ "1e07dbafe07c4801913827784f5ce01e": {
405
  "model_module": "@jupyter-widgets/controls",
406
  "model_name": "FloatProgressModel",
407
  "model_module_version": "1.5.0",
 
417
  "bar_style": "success",
418
  "description": "",
419
  "description_tooltip": null,
420
+ "layout": "IPY_MODEL_2990c7fc63f243bc8e984f7953f66c7e",
421
  "max": 108,
422
  "min": 0,
423
  "orientation": "horizontal",
424
+ "style": "IPY_MODEL_157d589b1b6c46b68b9457b117908fdd",
425
  "value": 108
426
  }
427
  },
428
+ "33744ece0c6c4fb18ca4918514e51e71": {
429
  "model_module": "@jupyter-widgets/controls",
430
  "model_name": "HTMLModel",
431
  "model_module_version": "1.5.0",
 
440
  "_view_name": "HTMLView",
441
  "description": "",
442
  "description_tooltip": null,
443
+ "layout": "IPY_MODEL_865e3622a8ff4ad0b457bd6dfc1aa703",
444
  "placeholder": "​",
445
+ "style": "IPY_MODEL_e3ef913013ca48feabe77c39af71a17e",
446
+ "value": " 108/108 [00:02&lt;00:00, 39.82it/s]"
447
  }
448
  },
449
+ "be4df739b8fc49e2b65bb9cf68f17422": {
450
  "model_module": "@jupyter-widgets/base",
451
  "model_name": "LayoutModel",
452
  "model_module_version": "1.2.0",
 
498
  "width": null
499
  }
500
  },
501
+ "215e22dbf804407281752569e78672c4": {
502
  "model_module": "@jupyter-widgets/base",
503
  "model_name": "LayoutModel",
504
  "model_module_version": "1.2.0",
 
550
  "width": null
551
  }
552
  },
553
+ "355241154533492daa560f1f9269693e": {
554
  "model_module": "@jupyter-widgets/controls",
555
  "model_name": "DescriptionStyleModel",
556
  "model_module_version": "1.5.0",
 
565
  "description_width": ""
566
  }
567
  },
568
+ "2990c7fc63f243bc8e984f7953f66c7e": {
569
  "model_module": "@jupyter-widgets/base",
570
  "model_name": "LayoutModel",
571
  "model_module_version": "1.2.0",
 
617
  "width": null
618
  }
619
  },
620
+ "157d589b1b6c46b68b9457b117908fdd": {
621
  "model_module": "@jupyter-widgets/controls",
622
  "model_name": "ProgressStyleModel",
623
  "model_module_version": "1.5.0",
 
633
  "description_width": ""
634
  }
635
  },
636
+ "865e3622a8ff4ad0b457bd6dfc1aa703": {
637
  "model_module": "@jupyter-widgets/base",
638
  "model_name": "LayoutModel",
639
  "model_module_version": "1.2.0",
 
685
  "width": null
686
  }
687
  },
688
+ "e3ef913013ca48feabe77c39af71a17e": {
689
  "model_module": "@jupyter-widgets/controls",
690
  "model_name": "DescriptionStyleModel",
691
  "model_module_version": "1.5.0",
 
731
  "colab": {
732
  "base_uri": "https://localhost:8080/"
733
  },
734
+ "outputId": "8719014d-1535-44ec-afb9-a29eb154e737"
735
  },
736
  "outputs": [
737
  {
738
  "output_type": "stream",
739
  "name": "stdout",
740
  "text": [
741
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m226.7/226.7 kB\u001b[0m \u001b[31m3.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
742
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.8/1.8 MB\u001b[0m \u001b[31m16.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
743
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m15.4/15.4 MB\u001b[0m \u001b[31m47.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
744
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.0/2.0 MB\u001b[0m \u001b[31m46.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
745
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m75.6/75.6 kB\u001b[0m \u001b[31m6.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
746
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m254.1/254.1 kB\u001b[0m \u001b[31m10.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
747
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m77.9/77.9 kB\u001b[0m \u001b[31m4.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
748
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.3/58.3 kB\u001b[0m \u001b[31m3.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
749
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m130.8/130.8 kB\u001b[0m \u001b[31m10.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
750
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m141.9/141.9 kB\u001b[0m \u001b[31m9.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
751
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m290.4/290.4 kB\u001b[0m \u001b[31m8.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
752
  "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.3/2.3 MB\u001b[0m \u001b[31m17.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
753
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m309.3/309.3 kB\u001b[0m \u001b[31m13.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
754
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m57.5/57.5 kB\u001b[0m \u001b[31m5.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
755
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.2/49.2 kB\u001b[0m \u001b[31m3.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
756
  "\u001b[?25h\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
757
  "cudf-cu12 24.4.1 requires protobuf<5,>=3.20, but you have protobuf 5.27.2 which is incompatible.\n",
758
  "google-ai-generativelanguage 0.6.4 requires protobuf!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.19.5, but you have protobuf 5.27.2 which is incompatible.\n",
 
789
  "metadata": {
790
  "id": "riuXwpSPcvWC"
791
  },
792
+ "execution_count": 2,
793
  "outputs": []
794
  },
795
  {
 
802
  "metadata": {
803
  "id": "jIEeZzqLbz0J"
804
  },
805
+ "execution_count": 3,
806
  "outputs": []
807
  },
808
  {
 
824
  "metadata": {
825
  "id": "9oGT6crooSSj"
826
  },
827
+ "execution_count": 4,
828
  "outputs": []
829
  },
830
  {
 
849
  "metadata": {
850
  "id": "aNY6mrk6BF7V"
851
  },
852
+ "execution_count": 5,
853
  "outputs": []
854
  },
855
  {
 
862
  "metadata": {
863
  "id": "Z109ur9OC7U_"
864
  },
865
+ "execution_count": 6,
866
  "outputs": []
867
  },
868
  {
 
902
  "base_uri": "https://localhost:8080/"
903
  },
904
  "id": "wl_pbPvMlv1h",
905
+ "outputId": "9d052275-9b9d-4976-f2f5-1a1d111bbb60"
906
  },
907
+ "execution_count": 7,
908
  "outputs": [
909
  {
910
  "output_type": "stream",
911
  "name": "stdout",
912
  "text": [
913
+ "--2024-07-10 17:06:11-- https://raw.githubusercontent.com/AlaFalaki/tutorial_notebooks/main/data/mini-llama-articles.csv\n",
914
+ "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.109.133, 185.199.108.133, 185.199.111.133, ...\n",
915
+ "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.109.133|:443... connected.\n",
916
  "HTTP request sent, awaiting response... 200 OK\n",
917
  "Length: 173646 (170K) [text/plain]\n",
918
  "Saving to: β€˜mini-llama-articles.csv’\n",
919
  "\n",
920
+ "mini-llama-articles 100%[===================>] 169.58K --.-KB/s in 0.1s \n",
921
  "\n",
922
+ "2024-07-10 17:06:12 (1.52 MB/s) - β€˜mini-llama-articles.csv’ saved [173646/173646]\n",
923
  "\n"
924
  ]
925
  }
 
957
  "colab": {
958
  "base_uri": "https://localhost:8080/"
959
  },
960
+ "outputId": "9efce6d4-760f-435b-9e6e-44a993d11442"
961
  },
962
+ "execution_count": 8,
963
  "outputs": [
964
  {
965
  "output_type": "execute_result",
 
993
  "metadata": {
994
  "id": "YizvmXPejkJE"
995
  },
996
+ "execution_count": 9,
997
  "outputs": []
998
  },
999
  {
 
1019
  "metadata": {
1020
  "id": "9z3t70DGWsjO"
1021
  },
1022
+ "execution_count": 10,
1023
  "outputs": []
1024
  },
1025
  {
 
1050
  "base_uri": "https://localhost:8080/",
1051
  "height": 116,
1052
  "referenced_widgets": [
1053
+ "d3a4fe4269ed45dcab07c9068d8d4ea8",
1054
+ "766140d81c3246849fabd1c7f38a25cf",
1055
+ "c0ec83c50bef4785a803505390f99de5",
1056
+ "d1af5e772b954edfa6e4da4df8069e07",
1057
+ "e07f5e5042cb4530b2b6bc4ba3c7baa5",
1058
+ "218d93e956a24e399b9c587fd4aaaaae",
1059
+ "28cfd4c07624419099097b0624eddb20",
1060
+ "22601ab55f4845269c4392bdfcd6c779",
1061
+ "586d67e1afae42ca9c65a17ae13573d8",
1062
+ "6a459a1e65494dcba3a86dd30071909a",
1063
+ "e74876ff3dd049f8bc2e0fbe71dcd255",
1064
+ "4f34f259d011412f8caab28a8bacfab5",
1065
+ "99530a50cd02411dacdcdc64bfa780cc",
1066
+ "1e07dbafe07c4801913827784f5ce01e",
1067
+ "33744ece0c6c4fb18ca4918514e51e71",
1068
+ "be4df739b8fc49e2b65bb9cf68f17422",
1069
+ "215e22dbf804407281752569e78672c4",
1070
+ "355241154533492daa560f1f9269693e",
1071
+ "2990c7fc63f243bc8e984f7953f66c7e",
1072
+ "157d589b1b6c46b68b9457b117908fdd",
1073
+ "865e3622a8ff4ad0b457bd6dfc1aa703",
1074
+ "e3ef913013ca48feabe77c39af71a17e"
1075
  ]
1076
  },
1077
  "id": "P9LDJ7o-Wsc-",
1078
+ "outputId": "139c0860-71d6-47a4-a569-61499a173f9b"
1079
  },
1080
+ "execution_count": 11,
1081
  "outputs": [
1082
  {
1083
  "output_type": "display_data",
 
1088
  "application/vnd.jupyter.widget-view+json": {
1089
  "version_major": 2,
1090
  "version_minor": 0,
1091
+ "model_id": "d3a4fe4269ed45dcab07c9068d8d4ea8"
1092
  }
1093
  },
1094
  "metadata": {}
 
1097
  "output_type": "stream",
1098
  "name": "stderr",
1099
  "text": [
1100
+ "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 108/108 [00:57<00:00, 1.89it/s]\n"
1101
  ]
1102
  },
1103
  {
 
1109
  "application/vnd.jupyter.widget-view+json": {
1110
  "version_major": 2,
1111
  "version_minor": 0,
1112
+ "model_id": "4f34f259d011412f8caab28a8bacfab5"
1113
  }
1114
  },
1115
  "metadata": {}
 
1133
  "base_uri": "https://localhost:8080/"
1134
  },
1135
  "id": "mPGa85hM2P3P",
1136
+ "outputId": "de62aba3-ad82-4b78-a800-1aee72546c0f"
1137
  },
1138
+ "execution_count": 12,
1139
  "outputs": [
1140
  {
1141
  "output_type": "execute_result",
 
1149
  }
1150
  ]
1151
  },
1152
+ {
1153
+ "cell_type": "code",
1154
+ "source": [
1155
+ "nodes[0].metadata"
1156
+ ],
1157
+ "metadata": {
1158
+ "id": "03xtKcBanBDL",
1159
+ "outputId": "cea4fca1-f193-4f22-eade-b638888139b6",
1160
+ "colab": {
1161
+ "base_uri": "https://localhost:8080/"
1162
+ }
1163
+ },
1164
+ "execution_count": 15,
1165
+ "outputs": [
1166
+ {
1167
+ "output_type": "execute_result",
1168
+ "data": {
1169
+ "text/plain": [
1170
+ "{'title': \"Beyond GPT-4: What's New?\",\n",
1171
+ " 'url': 'https://pub.towardsai.net/beyond-gpt-4-whats-new-cbd61a448eb9#dda8',\n",
1172
+ " 'source_name': 'towards_ai',\n",
1173
+ " 'excerpt_keywords': 'Meta, Llama 2, Llama 2-Chat, Code Llama, LLMs, GPT-4, open-source, fine-tuning, benchmark, multimodal models, dialogue-centric applications, human-centric evaluations, AI development, code tasks, transparency.'}"
1174
+ ]
1175
+ },
1176
+ "metadata": {},
1177
+ "execution_count": 15
1178
+ }
1179
+ ]
1180
+ },
1181
  {
1182
  "cell_type": "code",
1183
  "source": [