Akshat1000 commited on
Commit
6c598c9
1 Parent(s): 761e513

Upload llllmmm.ipynb

Browse files
Files changed (1) hide show
  1. llllmmm.ipynb +303 -1199
llllmmm.ipynb CHANGED
@@ -13,11 +13,10 @@
13
  "source": [
14
  "import torch\n",
15
  "from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, TrainingArguments\n",
16
- "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
17
- "modelName=\"google/flan-t5-xl\""
18
  ],
19
  "outputs": [],
20
- "execution_count": 11
21
  },
22
  {
23
  "cell_type": "code",
@@ -29,71 +28,38 @@
29
  },
30
  "colab": {
31
  "base_uri": "https://localhost:8080/",
32
- "height": 336,
33
  "referenced_widgets": [
34
- "862cf57104f9417c89304dd4dabe423b",
35
- "dd0486901a544b3b9a2241a7c2687e45",
36
- "83db99b45e07443f93323b0d4a2b6123",
37
- "1f870de8520646e48f81158c0e91cc69",
38
- "fbd62b548faa4b57900ad114f616b8be",
39
- "e5831ea8ddb24c13a58206c3b2105289",
40
- "6211f1d590884b3ea13a5b3b327e5ce4",
41
- "baa5bc3b299c4d92a40cc57dde604234",
42
- "26c35a5e2404485b9edafd208e591ec4",
43
- "be8461f2a274440fb986b0cab0ee7294",
44
- "a32d56f15ca14ee983428a17a98b6428",
45
- "ea1b0a22fbc1463aba924049c9e14f01",
46
- "b75e1eaeaeb34482bd9ad362fa7234df",
47
- "1464d10b061d4141a80c11bb8f43d421",
48
- "d319f231bd3b463799b5ccff4d0e2991",
49
- "d98d861b836b4ac7b90d032d0e626126",
50
- "65339806b36248609c0cdf5b2058da99",
51
- "811774b3b16d40b1ab35ac374c2b5141",
52
- "21e139b2a50f4eb0a4bfbfd9da79b4cc",
53
- "8bdc1bc6d52445d99454f4c8b1903044",
54
- "8b0c04b3fe7546858d0555144cf40e3b",
55
- "892bd307f77b45c885c1a61bfd229439",
56
- "aee8a770b18341de904a7e2bbb019424",
57
- "e5d7d41c1e67451cae59b992706c0592",
58
- "ebb9e9e05ac9452292bf734dbb2e707d",
59
- "35e46cf2dca748368c95703794d7c0ed",
60
- "becd058ba229403d85b906c49db69e61",
61
- "ab406b8adf4146078bc5c57d31f6f405",
62
- "19a48a7be73d46858f0d3d35d97e5c1d",
63
- "e445e34f32944a0e88df4d7bb88251f3",
64
- "322ad5c458884148b410aedc7b076d47",
65
- "4d5e46b60ae1462d9d9d00b55342bd06",
66
- "8065efbe037946b5a99a92df63bb65d1",
67
- "b4835d10c5cf45b7b5d00cc1f4c7224b",
68
- "d5dc3aa40ba747e48d40fc6c7e1c9ab9",
69
- "94b13990da8140d289eb48460afb239b",
70
- "cd7fe2f3f3a945a1bde15a24b9bc124b",
71
- "69b58423c5da48b985f6365a0c3be9ce",
72
- "1bc829b42be24648908dc8e93bb7d589",
73
- "c7acf97b800a4e5d8c4ba5197ecb650b",
74
- "ae5ed23ebf514ffbb4d2ab0b32dc0875",
75
- "3bd82485e1ae44dabb4c07de0adbb1a2",
76
- "d16ffd33b7434c9f8a9132438a7c55cb",
77
- "eb794ab5bfd2413c86380b5dc5f31494",
78
- "698856b6905d4f82b59e95851d9ae534",
79
- "de6c4520d29146eab51a49976107f172",
80
- "a6ce2d53ad8f462f9c169708f6d5c73d",
81
- "d560b2a5ff664d12956c742d51c1646f",
82
- "b205eb1e6d3446f5b75f1d783bfbbcf6",
83
- "c2e186e1bbfa4ff0a4b1e679011dc920",
84
- "a49d8021b0074bafabd89995b0e5a0e7",
85
- "eb2bfb8ebb2b45b6a67247db552d1df6",
86
- "920c4484283c4355a209bfb5188b6a6e",
87
- "3b0cf610ae764791805a2ceafc61adc4",
88
- "a2493b9c90f3421db9b727c1a7396e8d"
89
  ]
90
  },
91
  "id": "df5ce2489db64f8d",
92
- "outputId": "837fe222-f0e5-4c9a-bc94-bb3d83414c29"
93
  },
94
  "source": [
95
- "tokenizer = AutoTokenizer.from_pretrained(modelName)\n",
96
- "model = AutoModelForSeq2SeqLM.from_pretrained(modelName).to(device)"
97
  ],
98
  "outputs": [
99
  {
@@ -105,8 +71,6 @@
105
  "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n",
106
  "You will be able to reuse this secret in all of your notebooks.\n",
107
  "Please note that authentication is recommended but still optional to access public models or datasets.\n",
108
- " warnings.warn(\n",
109
- "/usr/local/lib/python3.10/dist-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n",
110
  " warnings.warn(\n"
111
  ]
112
  },
@@ -114,40 +78,12 @@
114
  "output_type": "display_data",
115
  "data": {
116
  "text/plain": [
117
- "config.json: 0%| | 0.00/1.21k [00:00<?, ?B/s]"
118
- ],
119
- "application/vnd.jupyter.widget-view+json": {
120
- "version_major": 2,
121
- "version_minor": 0,
122
- "model_id": "862cf57104f9417c89304dd4dabe423b"
123
- }
124
- },
125
- "metadata": {}
126
- },
127
- {
128
- "output_type": "display_data",
129
- "data": {
130
- "text/plain": [
131
- "spiece.model: 0%| | 0.00/792k [00:00<?, ?B/s]"
132
- ],
133
- "application/vnd.jupyter.widget-view+json": {
134
- "version_major": 2,
135
- "version_minor": 0,
136
- "model_id": "ea1b0a22fbc1463aba924049c9e14f01"
137
- }
138
- },
139
- "metadata": {}
140
- },
141
- {
142
- "output_type": "display_data",
143
- "data": {
144
- "text/plain": [
145
- "tokenizer.json: 0%| | 0.00/1.39M [00:00<?, ?B/s]"
146
  ],
147
  "application/vnd.jupyter.widget-view+json": {
148
  "version_major": 2,
149
  "version_minor": 0,
150
- "model_id": "aee8a770b18341de904a7e2bbb019424"
151
  }
152
  },
153
  "metadata": {}
@@ -156,32 +92,215 @@
156
  "output_type": "display_data",
157
  "data": {
158
  "text/plain": [
159
- "model.safetensors: 0%| | 0.00/2.95G [00:00<?, ?B/s]"
160
  ],
161
  "application/vnd.jupyter.widget-view+json": {
162
  "version_major": 2,
163
  "version_minor": 0,
164
- "model_id": "b4835d10c5cf45b7b5d00cc1f4c7224b"
165
  }
166
  },
167
  "metadata": {}
168
- },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
169
  {
170
- "output_type": "display_data",
171
  "data": {
172
  "text/plain": [
173
- "generation_config.json: 0%| | 0.00/147 [00:00<?, ?B/s]"
174
- ],
175
- "application/vnd.jupyter.widget-view+json": {
176
- "version_major": 2,
177
- "version_minor": 0,
178
- "model_id": "698856b6905d4f82b59e95851d9ae534"
179
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
180
  },
181
- "metadata": {}
 
182
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
183
  ],
184
- "execution_count": 2
 
 
 
 
 
185
  },
186
  {
187
  "cell_type": "code",
@@ -194,14 +313,14 @@
194
  "id": "7ce8ee88e61ac738"
195
  },
196
  "source": [
197
- "def get_llama2_response(prompt, max_new_tokens=100):\n",
198
- " inputs = tokenizer(prompt, return_tensors=\"pt\")\n",
199
- " outputs = model.generate(**inputs, max_new_tokens=max_new_tokens, temperature= 1)\n",
200
- " response = tokenizer.decode(outputs[0], skip_special_tokens=True ,clean_up_tokenization_spaces=True) # Use indexing instead of calling\n",
201
  " return response"
202
  ],
203
  "outputs": [],
204
- "execution_count": 7
205
  },
206
  {
207
  "metadata": {
@@ -210,15 +329,15 @@
210
  },
211
  "colab": {
212
  "base_uri": "https://localhost:8080/",
213
- "height": 70
214
  },
215
  "id": "de9f0fcc6dc9fa82",
216
- "outputId": "c860d3e9-d630-491c-a837-dc84ff8be5ac"
217
  },
218
  "cell_type": "code",
219
  "source": [
220
- "prompt =\"Explain How TO Ride A Horse:\"\n",
221
- "get_llama2_response(prompt, max_new_tokens=100)"
222
  ],
223
  "id": "de9f0fcc6dc9fa82",
224
  "outputs": [
@@ -226,23 +345,32 @@
226
  "output_type": "execute_result",
227
  "data": {
228
  "text/plain": [
229
- "'- How TO RIDE A Horse: How TO How TO Ride A Horse: How TO Ride A Horse: How TO Ride A Horse: How TO Ride A Horse: How TO Ride A Horse: How TO Ride A Horse: How TO Ride A Horse: How TO Ride A Horse: How TO Ride A Horse:. Explain How TO Ride Explain How TO Ride A Horse:::::::: Explain How'"
230
  ],
231
  "application/vnd.google.colaboratory.intrinsic+json": {
232
  "type": "string"
233
  }
234
  },
235
  "metadata": {},
236
- "execution_count": 10
237
  }
238
  ],
239
- "execution_count": 10
 
 
 
 
 
 
 
 
 
 
240
  }
241
  ],
242
  "metadata": {
243
  "kernelspec": {
244
- "display_name": "Python 3 (ipykernel)",
245
- "language": "python",
246
  "name": "python3"
247
  },
248
  "language_info": {
@@ -258,11 +386,12 @@
258
  "version": "3.12.4"
259
  },
260
  "colab": {
261
- "provenance": []
 
262
  },
263
  "widgets": {
264
  "application/vnd.jupyter.widget-state+json": {
265
- "862cf57104f9417c89304dd4dabe423b": {
266
  "model_module": "@jupyter-widgets/controls",
267
  "model_name": "HBoxModel",
268
  "model_module_version": "1.5.0",
@@ -277,14 +406,14 @@
277
  "_view_name": "HBoxView",
278
  "box_style": "",
279
  "children": [
280
- "IPY_MODEL_dd0486901a544b3b9a2241a7c2687e45",
281
- "IPY_MODEL_83db99b45e07443f93323b0d4a2b6123",
282
- "IPY_MODEL_1f870de8520646e48f81158c0e91cc69"
283
  ],
284
- "layout": "IPY_MODEL_fbd62b548faa4b57900ad114f616b8be"
285
  }
286
  },
287
- "dd0486901a544b3b9a2241a7c2687e45": {
288
  "model_module": "@jupyter-widgets/controls",
289
  "model_name": "HTMLModel",
290
  "model_module_version": "1.5.0",
@@ -299,13 +428,13 @@
299
  "_view_name": "HTMLView",
300
  "description": "",
301
  "description_tooltip": null,
302
- "layout": "IPY_MODEL_e5831ea8ddb24c13a58206c3b2105289",
303
  "placeholder": "​",
304
- "style": "IPY_MODEL_6211f1d590884b3ea13a5b3b327e5ce4",
305
- "value": "config.json: 100%"
306
  }
307
  },
308
- "83db99b45e07443f93323b0d4a2b6123": {
309
  "model_module": "@jupyter-widgets/controls",
310
  "model_name": "FloatProgressModel",
311
  "model_module_version": "1.5.0",
@@ -321,15 +450,15 @@
321
  "bar_style": "success",
322
  "description": "",
323
  "description_tooltip": null,
324
- "layout": "IPY_MODEL_baa5bc3b299c4d92a40cc57dde604234",
325
- "max": 1209,
326
  "min": 0,
327
  "orientation": "horizontal",
328
- "style": "IPY_MODEL_26c35a5e2404485b9edafd208e591ec4",
329
- "value": 1209
330
  }
331
  },
332
- "1f870de8520646e48f81158c0e91cc69": {
333
  "model_module": "@jupyter-widgets/controls",
334
  "model_name": "HTMLModel",
335
  "model_module_version": "1.5.0",
@@ -344,13 +473,13 @@
344
  "_view_name": "HTMLView",
345
  "description": "",
346
  "description_tooltip": null,
347
- "layout": "IPY_MODEL_be8461f2a274440fb986b0cab0ee7294",
348
  "placeholder": "​",
349
- "style": "IPY_MODEL_a32d56f15ca14ee983428a17a98b6428",
350
- "value": " 1.21k/1.21k [00:00&lt;00:00, 75.6kB/s]"
351
  }
352
  },
353
- "fbd62b548faa4b57900ad114f616b8be": {
354
  "model_module": "@jupyter-widgets/base",
355
  "model_name": "LayoutModel",
356
  "model_module_version": "1.2.0",
@@ -402,7 +531,7 @@
402
  "width": null
403
  }
404
  },
405
- "e5831ea8ddb24c13a58206c3b2105289": {
406
  "model_module": "@jupyter-widgets/base",
407
  "model_name": "LayoutModel",
408
  "model_module_version": "1.2.0",
@@ -454,7 +583,7 @@
454
  "width": null
455
  }
456
  },
457
- "6211f1d590884b3ea13a5b3b327e5ce4": {
458
  "model_module": "@jupyter-widgets/controls",
459
  "model_name": "DescriptionStyleModel",
460
  "model_module_version": "1.5.0",
@@ -469,7 +598,7 @@
469
  "description_width": ""
470
  }
471
  },
472
- "baa5bc3b299c4d92a40cc57dde604234": {
473
  "model_module": "@jupyter-widgets/base",
474
  "model_name": "LayoutModel",
475
  "model_module_version": "1.2.0",
@@ -521,7 +650,7 @@
521
  "width": null
522
  }
523
  },
524
- "26c35a5e2404485b9edafd208e591ec4": {
525
  "model_module": "@jupyter-widgets/controls",
526
  "model_name": "ProgressStyleModel",
527
  "model_module_version": "1.5.0",
@@ -537,7 +666,7 @@
537
  "description_width": ""
538
  }
539
  },
540
- "be8461f2a274440fb986b0cab0ee7294": {
541
  "model_module": "@jupyter-widgets/base",
542
  "model_name": "LayoutModel",
543
  "model_module_version": "1.2.0",
@@ -589,7 +718,7 @@
589
  "width": null
590
  }
591
  },
592
- "a32d56f15ca14ee983428a17a98b6428": {
593
  "model_module": "@jupyter-widgets/controls",
594
  "model_name": "DescriptionStyleModel",
595
  "model_module_version": "1.5.0",
@@ -604,7 +733,7 @@
604
  "description_width": ""
605
  }
606
  },
607
- "ea1b0a22fbc1463aba924049c9e14f01": {
608
  "model_module": "@jupyter-widgets/controls",
609
  "model_name": "HBoxModel",
610
  "model_module_version": "1.5.0",
@@ -619,14 +748,14 @@
619
  "_view_name": "HBoxView",
620
  "box_style": "",
621
  "children": [
622
- "IPY_MODEL_b75e1eaeaeb34482bd9ad362fa7234df",
623
- "IPY_MODEL_1464d10b061d4141a80c11bb8f43d421",
624
- "IPY_MODEL_d319f231bd3b463799b5ccff4d0e2991"
625
  ],
626
- "layout": "IPY_MODEL_d98d861b836b4ac7b90d032d0e626126"
627
  }
628
  },
629
- "b75e1eaeaeb34482bd9ad362fa7234df": {
630
  "model_module": "@jupyter-widgets/controls",
631
  "model_name": "HTMLModel",
632
  "model_module_version": "1.5.0",
@@ -641,13 +770,13 @@
641
  "_view_name": "HTMLView",
642
  "description": "",
643
  "description_tooltip": null,
644
- "layout": "IPY_MODEL_65339806b36248609c0cdf5b2058da99",
645
  "placeholder": "​",
646
- "style": "IPY_MODEL_811774b3b16d40b1ab35ac374c2b5141",
647
- "value": "spiece.model: 100%"
648
  }
649
  },
650
- "1464d10b061d4141a80c11bb8f43d421": {
651
  "model_module": "@jupyter-widgets/controls",
652
  "model_name": "FloatProgressModel",
653
  "model_module_version": "1.5.0",
@@ -663,15 +792,15 @@
663
  "bar_style": "success",
664
  "description": "",
665
  "description_tooltip": null,
666
- "layout": "IPY_MODEL_21e139b2a50f4eb0a4bfbfd9da79b4cc",
667
- "max": 791656,
668
  "min": 0,
669
  "orientation": "horizontal",
670
- "style": "IPY_MODEL_8bdc1bc6d52445d99454f4c8b1903044",
671
- "value": 791656
672
  }
673
  },
674
- "d319f231bd3b463799b5ccff4d0e2991": {
675
  "model_module": "@jupyter-widgets/controls",
676
  "model_name": "HTMLModel",
677
  "model_module_version": "1.5.0",
@@ -686,13 +815,13 @@
686
  "_view_name": "HTMLView",
687
  "description": "",
688
  "description_tooltip": null,
689
- "layout": "IPY_MODEL_8b0c04b3fe7546858d0555144cf40e3b",
690
  "placeholder": "​",
691
- "style": "IPY_MODEL_892bd307f77b45c885c1a61bfd229439",
692
- "value": " 792k/792k [00:00&lt;00:00, 10.4MB/s]"
693
  }
694
  },
695
- "d98d861b836b4ac7b90d032d0e626126": {
696
  "model_module": "@jupyter-widgets/base",
697
  "model_name": "LayoutModel",
698
  "model_module_version": "1.2.0",
@@ -744,7 +873,7 @@
744
  "width": null
745
  }
746
  },
747
- "65339806b36248609c0cdf5b2058da99": {
748
  "model_module": "@jupyter-widgets/base",
749
  "model_name": "LayoutModel",
750
  "model_module_version": "1.2.0",
@@ -796,7 +925,7 @@
796
  "width": null
797
  }
798
  },
799
- "811774b3b16d40b1ab35ac374c2b5141": {
800
  "model_module": "@jupyter-widgets/controls",
801
  "model_name": "DescriptionStyleModel",
802
  "model_module_version": "1.5.0",
@@ -811,7 +940,7 @@
811
  "description_width": ""
812
  }
813
  },
814
- "21e139b2a50f4eb0a4bfbfd9da79b4cc": {
815
  "model_module": "@jupyter-widgets/base",
816
  "model_name": "LayoutModel",
817
  "model_module_version": "1.2.0",
@@ -863,7 +992,7 @@
863
  "width": null
864
  }
865
  },
866
- "8bdc1bc6d52445d99454f4c8b1903044": {
867
  "model_module": "@jupyter-widgets/controls",
868
  "model_name": "ProgressStyleModel",
869
  "model_module_version": "1.5.0",
@@ -879,214 +1008,7 @@
879
  "description_width": ""
880
  }
881
  },
882
- "8b0c04b3fe7546858d0555144cf40e3b": {
883
- "model_module": "@jupyter-widgets/base",
884
- "model_name": "LayoutModel",
885
- "model_module_version": "1.2.0",
886
- "state": {
887
- "_model_module": "@jupyter-widgets/base",
888
- "_model_module_version": "1.2.0",
889
- "_model_name": "LayoutModel",
890
- "_view_count": null,
891
- "_view_module": "@jupyter-widgets/base",
892
- "_view_module_version": "1.2.0",
893
- "_view_name": "LayoutView",
894
- "align_content": null,
895
- "align_items": null,
896
- "align_self": null,
897
- "border": null,
898
- "bottom": null,
899
- "display": null,
900
- "flex": null,
901
- "flex_flow": null,
902
- "grid_area": null,
903
- "grid_auto_columns": null,
904
- "grid_auto_flow": null,
905
- "grid_auto_rows": null,
906
- "grid_column": null,
907
- "grid_gap": null,
908
- "grid_row": null,
909
- "grid_template_areas": null,
910
- "grid_template_columns": null,
911
- "grid_template_rows": null,
912
- "height": null,
913
- "justify_content": null,
914
- "justify_items": null,
915
- "left": null,
916
- "margin": null,
917
- "max_height": null,
918
- "max_width": null,
919
- "min_height": null,
920
- "min_width": null,
921
- "object_fit": null,
922
- "object_position": null,
923
- "order": null,
924
- "overflow": null,
925
- "overflow_x": null,
926
- "overflow_y": null,
927
- "padding": null,
928
- "right": null,
929
- "top": null,
930
- "visibility": null,
931
- "width": null
932
- }
933
- },
934
- "892bd307f77b45c885c1a61bfd229439": {
935
- "model_module": "@jupyter-widgets/controls",
936
- "model_name": "DescriptionStyleModel",
937
- "model_module_version": "1.5.0",
938
- "state": {
939
- "_model_module": "@jupyter-widgets/controls",
940
- "_model_module_version": "1.5.0",
941
- "_model_name": "DescriptionStyleModel",
942
- "_view_count": null,
943
- "_view_module": "@jupyter-widgets/base",
944
- "_view_module_version": "1.2.0",
945
- "_view_name": "StyleView",
946
- "description_width": ""
947
- }
948
- },
949
- "aee8a770b18341de904a7e2bbb019424": {
950
- "model_module": "@jupyter-widgets/controls",
951
- "model_name": "HBoxModel",
952
- "model_module_version": "1.5.0",
953
- "state": {
954
- "_dom_classes": [],
955
- "_model_module": "@jupyter-widgets/controls",
956
- "_model_module_version": "1.5.0",
957
- "_model_name": "HBoxModel",
958
- "_view_count": null,
959
- "_view_module": "@jupyter-widgets/controls",
960
- "_view_module_version": "1.5.0",
961
- "_view_name": "HBoxView",
962
- "box_style": "",
963
- "children": [
964
- "IPY_MODEL_e5d7d41c1e67451cae59b992706c0592",
965
- "IPY_MODEL_ebb9e9e05ac9452292bf734dbb2e707d",
966
- "IPY_MODEL_35e46cf2dca748368c95703794d7c0ed"
967
- ],
968
- "layout": "IPY_MODEL_becd058ba229403d85b906c49db69e61"
969
- }
970
- },
971
- "e5d7d41c1e67451cae59b992706c0592": {
972
- "model_module": "@jupyter-widgets/controls",
973
- "model_name": "HTMLModel",
974
- "model_module_version": "1.5.0",
975
- "state": {
976
- "_dom_classes": [],
977
- "_model_module": "@jupyter-widgets/controls",
978
- "_model_module_version": "1.5.0",
979
- "_model_name": "HTMLModel",
980
- "_view_count": null,
981
- "_view_module": "@jupyter-widgets/controls",
982
- "_view_module_version": "1.5.0",
983
- "_view_name": "HTMLView",
984
- "description": "",
985
- "description_tooltip": null,
986
- "layout": "IPY_MODEL_ab406b8adf4146078bc5c57d31f6f405",
987
- "placeholder": "​",
988
- "style": "IPY_MODEL_19a48a7be73d46858f0d3d35d97e5c1d",
989
- "value": "tokenizer.json: 100%"
990
- }
991
- },
992
- "ebb9e9e05ac9452292bf734dbb2e707d": {
993
- "model_module": "@jupyter-widgets/controls",
994
- "model_name": "FloatProgressModel",
995
- "model_module_version": "1.5.0",
996
- "state": {
997
- "_dom_classes": [],
998
- "_model_module": "@jupyter-widgets/controls",
999
- "_model_module_version": "1.5.0",
1000
- "_model_name": "FloatProgressModel",
1001
- "_view_count": null,
1002
- "_view_module": "@jupyter-widgets/controls",
1003
- "_view_module_version": "1.5.0",
1004
- "_view_name": "ProgressView",
1005
- "bar_style": "success",
1006
- "description": "",
1007
- "description_tooltip": null,
1008
- "layout": "IPY_MODEL_e445e34f32944a0e88df4d7bb88251f3",
1009
- "max": 1389353,
1010
- "min": 0,
1011
- "orientation": "horizontal",
1012
- "style": "IPY_MODEL_322ad5c458884148b410aedc7b076d47",
1013
- "value": 1389353
1014
- }
1015
- },
1016
- "35e46cf2dca748368c95703794d7c0ed": {
1017
- "model_module": "@jupyter-widgets/controls",
1018
- "model_name": "HTMLModel",
1019
- "model_module_version": "1.5.0",
1020
- "state": {
1021
- "_dom_classes": [],
1022
- "_model_module": "@jupyter-widgets/controls",
1023
- "_model_module_version": "1.5.0",
1024
- "_model_name": "HTMLModel",
1025
- "_view_count": null,
1026
- "_view_module": "@jupyter-widgets/controls",
1027
- "_view_module_version": "1.5.0",
1028
- "_view_name": "HTMLView",
1029
- "description": "",
1030
- "description_tooltip": null,
1031
- "layout": "IPY_MODEL_4d5e46b60ae1462d9d9d00b55342bd06",
1032
- "placeholder": "​",
1033
- "style": "IPY_MODEL_8065efbe037946b5a99a92df63bb65d1",
1034
- "value": " 1.39M/1.39M [00:00&lt;00:00, 37.2MB/s]"
1035
- }
1036
- },
1037
- "becd058ba229403d85b906c49db69e61": {
1038
- "model_module": "@jupyter-widgets/base",
1039
- "model_name": "LayoutModel",
1040
- "model_module_version": "1.2.0",
1041
- "state": {
1042
- "_model_module": "@jupyter-widgets/base",
1043
- "_model_module_version": "1.2.0",
1044
- "_model_name": "LayoutModel",
1045
- "_view_count": null,
1046
- "_view_module": "@jupyter-widgets/base",
1047
- "_view_module_version": "1.2.0",
1048
- "_view_name": "LayoutView",
1049
- "align_content": null,
1050
- "align_items": null,
1051
- "align_self": null,
1052
- "border": null,
1053
- "bottom": null,
1054
- "display": null,
1055
- "flex": null,
1056
- "flex_flow": null,
1057
- "grid_area": null,
1058
- "grid_auto_columns": null,
1059
- "grid_auto_flow": null,
1060
- "grid_auto_rows": null,
1061
- "grid_column": null,
1062
- "grid_gap": null,
1063
- "grid_row": null,
1064
- "grid_template_areas": null,
1065
- "grid_template_columns": null,
1066
- "grid_template_rows": null,
1067
- "height": null,
1068
- "justify_content": null,
1069
- "justify_items": null,
1070
- "left": null,
1071
- "margin": null,
1072
- "max_height": null,
1073
- "max_width": null,
1074
- "min_height": null,
1075
- "min_width": null,
1076
- "object_fit": null,
1077
- "object_position": null,
1078
- "order": null,
1079
- "overflow": null,
1080
- "overflow_x": null,
1081
- "overflow_y": null,
1082
- "padding": null,
1083
- "right": null,
1084
- "top": null,
1085
- "visibility": null,
1086
- "width": null
1087
- }
1088
- },
1089
- "ab406b8adf4146078bc5c57d31f6f405": {
1090
  "model_module": "@jupyter-widgets/base",
1091
  "model_name": "LayoutModel",
1092
  "model_module_version": "1.2.0",
@@ -1138,826 +1060,7 @@
1138
  "width": null
1139
  }
1140
  },
1141
- "19a48a7be73d46858f0d3d35d97e5c1d": {
1142
- "model_module": "@jupyter-widgets/controls",
1143
- "model_name": "DescriptionStyleModel",
1144
- "model_module_version": "1.5.0",
1145
- "state": {
1146
- "_model_module": "@jupyter-widgets/controls",
1147
- "_model_module_version": "1.5.0",
1148
- "_model_name": "DescriptionStyleModel",
1149
- "_view_count": null,
1150
- "_view_module": "@jupyter-widgets/base",
1151
- "_view_module_version": "1.2.0",
1152
- "_view_name": "StyleView",
1153
- "description_width": ""
1154
- }
1155
- },
1156
- "e445e34f32944a0e88df4d7bb88251f3": {
1157
- "model_module": "@jupyter-widgets/base",
1158
- "model_name": "LayoutModel",
1159
- "model_module_version": "1.2.0",
1160
- "state": {
1161
- "_model_module": "@jupyter-widgets/base",
1162
- "_model_module_version": "1.2.0",
1163
- "_model_name": "LayoutModel",
1164
- "_view_count": null,
1165
- "_view_module": "@jupyter-widgets/base",
1166
- "_view_module_version": "1.2.0",
1167
- "_view_name": "LayoutView",
1168
- "align_content": null,
1169
- "align_items": null,
1170
- "align_self": null,
1171
- "border": null,
1172
- "bottom": null,
1173
- "display": null,
1174
- "flex": null,
1175
- "flex_flow": null,
1176
- "grid_area": null,
1177
- "grid_auto_columns": null,
1178
- "grid_auto_flow": null,
1179
- "grid_auto_rows": null,
1180
- "grid_column": null,
1181
- "grid_gap": null,
1182
- "grid_row": null,
1183
- "grid_template_areas": null,
1184
- "grid_template_columns": null,
1185
- "grid_template_rows": null,
1186
- "height": null,
1187
- "justify_content": null,
1188
- "justify_items": null,
1189
- "left": null,
1190
- "margin": null,
1191
- "max_height": null,
1192
- "max_width": null,
1193
- "min_height": null,
1194
- "min_width": null,
1195
- "object_fit": null,
1196
- "object_position": null,
1197
- "order": null,
1198
- "overflow": null,
1199
- "overflow_x": null,
1200
- "overflow_y": null,
1201
- "padding": null,
1202
- "right": null,
1203
- "top": null,
1204
- "visibility": null,
1205
- "width": null
1206
- }
1207
- },
1208
- "322ad5c458884148b410aedc7b076d47": {
1209
- "model_module": "@jupyter-widgets/controls",
1210
- "model_name": "ProgressStyleModel",
1211
- "model_module_version": "1.5.0",
1212
- "state": {
1213
- "_model_module": "@jupyter-widgets/controls",
1214
- "_model_module_version": "1.5.0",
1215
- "_model_name": "ProgressStyleModel",
1216
- "_view_count": null,
1217
- "_view_module": "@jupyter-widgets/base",
1218
- "_view_module_version": "1.2.0",
1219
- "_view_name": "StyleView",
1220
- "bar_color": null,
1221
- "description_width": ""
1222
- }
1223
- },
1224
- "4d5e46b60ae1462d9d9d00b55342bd06": {
1225
- "model_module": "@jupyter-widgets/base",
1226
- "model_name": "LayoutModel",
1227
- "model_module_version": "1.2.0",
1228
- "state": {
1229
- "_model_module": "@jupyter-widgets/base",
1230
- "_model_module_version": "1.2.0",
1231
- "_model_name": "LayoutModel",
1232
- "_view_count": null,
1233
- "_view_module": "@jupyter-widgets/base",
1234
- "_view_module_version": "1.2.0",
1235
- "_view_name": "LayoutView",
1236
- "align_content": null,
1237
- "align_items": null,
1238
- "align_self": null,
1239
- "border": null,
1240
- "bottom": null,
1241
- "display": null,
1242
- "flex": null,
1243
- "flex_flow": null,
1244
- "grid_area": null,
1245
- "grid_auto_columns": null,
1246
- "grid_auto_flow": null,
1247
- "grid_auto_rows": null,
1248
- "grid_column": null,
1249
- "grid_gap": null,
1250
- "grid_row": null,
1251
- "grid_template_areas": null,
1252
- "grid_template_columns": null,
1253
- "grid_template_rows": null,
1254
- "height": null,
1255
- "justify_content": null,
1256
- "justify_items": null,
1257
- "left": null,
1258
- "margin": null,
1259
- "max_height": null,
1260
- "max_width": null,
1261
- "min_height": null,
1262
- "min_width": null,
1263
- "object_fit": null,
1264
- "object_position": null,
1265
- "order": null,
1266
- "overflow": null,
1267
- "overflow_x": null,
1268
- "overflow_y": null,
1269
- "padding": null,
1270
- "right": null,
1271
- "top": null,
1272
- "visibility": null,
1273
- "width": null
1274
- }
1275
- },
1276
- "8065efbe037946b5a99a92df63bb65d1": {
1277
- "model_module": "@jupyter-widgets/controls",
1278
- "model_name": "DescriptionStyleModel",
1279
- "model_module_version": "1.5.0",
1280
- "state": {
1281
- "_model_module": "@jupyter-widgets/controls",
1282
- "_model_module_version": "1.5.0",
1283
- "_model_name": "DescriptionStyleModel",
1284
- "_view_count": null,
1285
- "_view_module": "@jupyter-widgets/base",
1286
- "_view_module_version": "1.2.0",
1287
- "_view_name": "StyleView",
1288
- "description_width": ""
1289
- }
1290
- },
1291
- "b4835d10c5cf45b7b5d00cc1f4c7224b": {
1292
- "model_module": "@jupyter-widgets/controls",
1293
- "model_name": "HBoxModel",
1294
- "model_module_version": "1.5.0",
1295
- "state": {
1296
- "_dom_classes": [],
1297
- "_model_module": "@jupyter-widgets/controls",
1298
- "_model_module_version": "1.5.0",
1299
- "_model_name": "HBoxModel",
1300
- "_view_count": null,
1301
- "_view_module": "@jupyter-widgets/controls",
1302
- "_view_module_version": "1.5.0",
1303
- "_view_name": "HBoxView",
1304
- "box_style": "",
1305
- "children": [
1306
- "IPY_MODEL_d5dc3aa40ba747e48d40fc6c7e1c9ab9",
1307
- "IPY_MODEL_94b13990da8140d289eb48460afb239b",
1308
- "IPY_MODEL_cd7fe2f3f3a945a1bde15a24b9bc124b"
1309
- ],
1310
- "layout": "IPY_MODEL_69b58423c5da48b985f6365a0c3be9ce"
1311
- }
1312
- },
1313
- "d5dc3aa40ba747e48d40fc6c7e1c9ab9": {
1314
- "model_module": "@jupyter-widgets/controls",
1315
- "model_name": "HTMLModel",
1316
- "model_module_version": "1.5.0",
1317
- "state": {
1318
- "_dom_classes": [],
1319
- "_model_module": "@jupyter-widgets/controls",
1320
- "_model_module_version": "1.5.0",
1321
- "_model_name": "HTMLModel",
1322
- "_view_count": null,
1323
- "_view_module": "@jupyter-widgets/controls",
1324
- "_view_module_version": "1.5.0",
1325
- "_view_name": "HTMLView",
1326
- "description": "",
1327
- "description_tooltip": null,
1328
- "layout": "IPY_MODEL_1bc829b42be24648908dc8e93bb7d589",
1329
- "placeholder": "​",
1330
- "style": "IPY_MODEL_c7acf97b800a4e5d8c4ba5197ecb650b",
1331
- "value": "model.safetensors: 100%"
1332
- }
1333
- },
1334
- "94b13990da8140d289eb48460afb239b": {
1335
- "model_module": "@jupyter-widgets/controls",
1336
- "model_name": "FloatProgressModel",
1337
- "model_module_version": "1.5.0",
1338
- "state": {
1339
- "_dom_classes": [],
1340
- "_model_module": "@jupyter-widgets/controls",
1341
- "_model_module_version": "1.5.0",
1342
- "_model_name": "FloatProgressModel",
1343
- "_view_count": null,
1344
- "_view_module": "@jupyter-widgets/controls",
1345
- "_view_module_version": "1.5.0",
1346
- "_view_name": "ProgressView",
1347
- "bar_style": "success",
1348
- "description": "",
1349
- "description_tooltip": null,
1350
- "layout": "IPY_MODEL_ae5ed23ebf514ffbb4d2ab0b32dc0875",
1351
- "max": 2950736730,
1352
- "min": 0,
1353
- "orientation": "horizontal",
1354
- "style": "IPY_MODEL_3bd82485e1ae44dabb4c07de0adbb1a2",
1355
- "value": 2950736730
1356
- }
1357
- },
1358
- "cd7fe2f3f3a945a1bde15a24b9bc124b": {
1359
- "model_module": "@jupyter-widgets/controls",
1360
- "model_name": "HTMLModel",
1361
- "model_module_version": "1.5.0",
1362
- "state": {
1363
- "_dom_classes": [],
1364
- "_model_module": "@jupyter-widgets/controls",
1365
- "_model_module_version": "1.5.0",
1366
- "_model_name": "HTMLModel",
1367
- "_view_count": null,
1368
- "_view_module": "@jupyter-widgets/controls",
1369
- "_view_module_version": "1.5.0",
1370
- "_view_name": "HTMLView",
1371
- "description": "",
1372
- "description_tooltip": null,
1373
- "layout": "IPY_MODEL_d16ffd33b7434c9f8a9132438a7c55cb",
1374
- "placeholder": "​",
1375
- "style": "IPY_MODEL_eb794ab5bfd2413c86380b5dc5f31494",
1376
- "value": " 2.95G/2.95G [02:38&lt;00:00, 20.8MB/s]"
1377
- }
1378
- },
1379
- "69b58423c5da48b985f6365a0c3be9ce": {
1380
- "model_module": "@jupyter-widgets/base",
1381
- "model_name": "LayoutModel",
1382
- "model_module_version": "1.2.0",
1383
- "state": {
1384
- "_model_module": "@jupyter-widgets/base",
1385
- "_model_module_version": "1.2.0",
1386
- "_model_name": "LayoutModel",
1387
- "_view_count": null,
1388
- "_view_module": "@jupyter-widgets/base",
1389
- "_view_module_version": "1.2.0",
1390
- "_view_name": "LayoutView",
1391
- "align_content": null,
1392
- "align_items": null,
1393
- "align_self": null,
1394
- "border": null,
1395
- "bottom": null,
1396
- "display": null,
1397
- "flex": null,
1398
- "flex_flow": null,
1399
- "grid_area": null,
1400
- "grid_auto_columns": null,
1401
- "grid_auto_flow": null,
1402
- "grid_auto_rows": null,
1403
- "grid_column": null,
1404
- "grid_gap": null,
1405
- "grid_row": null,
1406
- "grid_template_areas": null,
1407
- "grid_template_columns": null,
1408
- "grid_template_rows": null,
1409
- "height": null,
1410
- "justify_content": null,
1411
- "justify_items": null,
1412
- "left": null,
1413
- "margin": null,
1414
- "max_height": null,
1415
- "max_width": null,
1416
- "min_height": null,
1417
- "min_width": null,
1418
- "object_fit": null,
1419
- "object_position": null,
1420
- "order": null,
1421
- "overflow": null,
1422
- "overflow_x": null,
1423
- "overflow_y": null,
1424
- "padding": null,
1425
- "right": null,
1426
- "top": null,
1427
- "visibility": null,
1428
- "width": null
1429
- }
1430
- },
1431
- "1bc829b42be24648908dc8e93bb7d589": {
1432
- "model_module": "@jupyter-widgets/base",
1433
- "model_name": "LayoutModel",
1434
- "model_module_version": "1.2.0",
1435
- "state": {
1436
- "_model_module": "@jupyter-widgets/base",
1437
- "_model_module_version": "1.2.0",
1438
- "_model_name": "LayoutModel",
1439
- "_view_count": null,
1440
- "_view_module": "@jupyter-widgets/base",
1441
- "_view_module_version": "1.2.0",
1442
- "_view_name": "LayoutView",
1443
- "align_content": null,
1444
- "align_items": null,
1445
- "align_self": null,
1446
- "border": null,
1447
- "bottom": null,
1448
- "display": null,
1449
- "flex": null,
1450
- "flex_flow": null,
1451
- "grid_area": null,
1452
- "grid_auto_columns": null,
1453
- "grid_auto_flow": null,
1454
- "grid_auto_rows": null,
1455
- "grid_column": null,
1456
- "grid_gap": null,
1457
- "grid_row": null,
1458
- "grid_template_areas": null,
1459
- "grid_template_columns": null,
1460
- "grid_template_rows": null,
1461
- "height": null,
1462
- "justify_content": null,
1463
- "justify_items": null,
1464
- "left": null,
1465
- "margin": null,
1466
- "max_height": null,
1467
- "max_width": null,
1468
- "min_height": null,
1469
- "min_width": null,
1470
- "object_fit": null,
1471
- "object_position": null,
1472
- "order": null,
1473
- "overflow": null,
1474
- "overflow_x": null,
1475
- "overflow_y": null,
1476
- "padding": null,
1477
- "right": null,
1478
- "top": null,
1479
- "visibility": null,
1480
- "width": null
1481
- }
1482
- },
1483
- "c7acf97b800a4e5d8c4ba5197ecb650b": {
1484
- "model_module": "@jupyter-widgets/controls",
1485
- "model_name": "DescriptionStyleModel",
1486
- "model_module_version": "1.5.0",
1487
- "state": {
1488
- "_model_module": "@jupyter-widgets/controls",
1489
- "_model_module_version": "1.5.0",
1490
- "_model_name": "DescriptionStyleModel",
1491
- "_view_count": null,
1492
- "_view_module": "@jupyter-widgets/base",
1493
- "_view_module_version": "1.2.0",
1494
- "_view_name": "StyleView",
1495
- "description_width": ""
1496
- }
1497
- },
1498
- "ae5ed23ebf514ffbb4d2ab0b32dc0875": {
1499
- "model_module": "@jupyter-widgets/base",
1500
- "model_name": "LayoutModel",
1501
- "model_module_version": "1.2.0",
1502
- "state": {
1503
- "_model_module": "@jupyter-widgets/base",
1504
- "_model_module_version": "1.2.0",
1505
- "_model_name": "LayoutModel",
1506
- "_view_count": null,
1507
- "_view_module": "@jupyter-widgets/base",
1508
- "_view_module_version": "1.2.0",
1509
- "_view_name": "LayoutView",
1510
- "align_content": null,
1511
- "align_items": null,
1512
- "align_self": null,
1513
- "border": null,
1514
- "bottom": null,
1515
- "display": null,
1516
- "flex": null,
1517
- "flex_flow": null,
1518
- "grid_area": null,
1519
- "grid_auto_columns": null,
1520
- "grid_auto_flow": null,
1521
- "grid_auto_rows": null,
1522
- "grid_column": null,
1523
- "grid_gap": null,
1524
- "grid_row": null,
1525
- "grid_template_areas": null,
1526
- "grid_template_columns": null,
1527
- "grid_template_rows": null,
1528
- "height": null,
1529
- "justify_content": null,
1530
- "justify_items": null,
1531
- "left": null,
1532
- "margin": null,
1533
- "max_height": null,
1534
- "max_width": null,
1535
- "min_height": null,
1536
- "min_width": null,
1537
- "object_fit": null,
1538
- "object_position": null,
1539
- "order": null,
1540
- "overflow": null,
1541
- "overflow_x": null,
1542
- "overflow_y": null,
1543
- "padding": null,
1544
- "right": null,
1545
- "top": null,
1546
- "visibility": null,
1547
- "width": null
1548
- }
1549
- },
1550
- "3bd82485e1ae44dabb4c07de0adbb1a2": {
1551
- "model_module": "@jupyter-widgets/controls",
1552
- "model_name": "ProgressStyleModel",
1553
- "model_module_version": "1.5.0",
1554
- "state": {
1555
- "_model_module": "@jupyter-widgets/controls",
1556
- "_model_module_version": "1.5.0",
1557
- "_model_name": "ProgressStyleModel",
1558
- "_view_count": null,
1559
- "_view_module": "@jupyter-widgets/base",
1560
- "_view_module_version": "1.2.0",
1561
- "_view_name": "StyleView",
1562
- "bar_color": null,
1563
- "description_width": ""
1564
- }
1565
- },
1566
- "d16ffd33b7434c9f8a9132438a7c55cb": {
1567
- "model_module": "@jupyter-widgets/base",
1568
- "model_name": "LayoutModel",
1569
- "model_module_version": "1.2.0",
1570
- "state": {
1571
- "_model_module": "@jupyter-widgets/base",
1572
- "_model_module_version": "1.2.0",
1573
- "_model_name": "LayoutModel",
1574
- "_view_count": null,
1575
- "_view_module": "@jupyter-widgets/base",
1576
- "_view_module_version": "1.2.0",
1577
- "_view_name": "LayoutView",
1578
- "align_content": null,
1579
- "align_items": null,
1580
- "align_self": null,
1581
- "border": null,
1582
- "bottom": null,
1583
- "display": null,
1584
- "flex": null,
1585
- "flex_flow": null,
1586
- "grid_area": null,
1587
- "grid_auto_columns": null,
1588
- "grid_auto_flow": null,
1589
- "grid_auto_rows": null,
1590
- "grid_column": null,
1591
- "grid_gap": null,
1592
- "grid_row": null,
1593
- "grid_template_areas": null,
1594
- "grid_template_columns": null,
1595
- "grid_template_rows": null,
1596
- "height": null,
1597
- "justify_content": null,
1598
- "justify_items": null,
1599
- "left": null,
1600
- "margin": null,
1601
- "max_height": null,
1602
- "max_width": null,
1603
- "min_height": null,
1604
- "min_width": null,
1605
- "object_fit": null,
1606
- "object_position": null,
1607
- "order": null,
1608
- "overflow": null,
1609
- "overflow_x": null,
1610
- "overflow_y": null,
1611
- "padding": null,
1612
- "right": null,
1613
- "top": null,
1614
- "visibility": null,
1615
- "width": null
1616
- }
1617
- },
1618
- "eb794ab5bfd2413c86380b5dc5f31494": {
1619
- "model_module": "@jupyter-widgets/controls",
1620
- "model_name": "DescriptionStyleModel",
1621
- "model_module_version": "1.5.0",
1622
- "state": {
1623
- "_model_module": "@jupyter-widgets/controls",
1624
- "_model_module_version": "1.5.0",
1625
- "_model_name": "DescriptionStyleModel",
1626
- "_view_count": null,
1627
- "_view_module": "@jupyter-widgets/base",
1628
- "_view_module_version": "1.2.0",
1629
- "_view_name": "StyleView",
1630
- "description_width": ""
1631
- }
1632
- },
1633
- "698856b6905d4f82b59e95851d9ae534": {
1634
- "model_module": "@jupyter-widgets/controls",
1635
- "model_name": "HBoxModel",
1636
- "model_module_version": "1.5.0",
1637
- "state": {
1638
- "_dom_classes": [],
1639
- "_model_module": "@jupyter-widgets/controls",
1640
- "_model_module_version": "1.5.0",
1641
- "_model_name": "HBoxModel",
1642
- "_view_count": null,
1643
- "_view_module": "@jupyter-widgets/controls",
1644
- "_view_module_version": "1.5.0",
1645
- "_view_name": "HBoxView",
1646
- "box_style": "",
1647
- "children": [
1648
- "IPY_MODEL_de6c4520d29146eab51a49976107f172",
1649
- "IPY_MODEL_a6ce2d53ad8f462f9c169708f6d5c73d",
1650
- "IPY_MODEL_d560b2a5ff664d12956c742d51c1646f"
1651
- ],
1652
- "layout": "IPY_MODEL_b205eb1e6d3446f5b75f1d783bfbbcf6"
1653
- }
1654
- },
1655
- "de6c4520d29146eab51a49976107f172": {
1656
- "model_module": "@jupyter-widgets/controls",
1657
- "model_name": "HTMLModel",
1658
- "model_module_version": "1.5.0",
1659
- "state": {
1660
- "_dom_classes": [],
1661
- "_model_module": "@jupyter-widgets/controls",
1662
- "_model_module_version": "1.5.0",
1663
- "_model_name": "HTMLModel",
1664
- "_view_count": null,
1665
- "_view_module": "@jupyter-widgets/controls",
1666
- "_view_module_version": "1.5.0",
1667
- "_view_name": "HTMLView",
1668
- "description": "",
1669
- "description_tooltip": null,
1670
- "layout": "IPY_MODEL_c2e186e1bbfa4ff0a4b1e679011dc920",
1671
- "placeholder": "​",
1672
- "style": "IPY_MODEL_a49d8021b0074bafabd89995b0e5a0e7",
1673
- "value": "generation_config.json: 100%"
1674
- }
1675
- },
1676
- "a6ce2d53ad8f462f9c169708f6d5c73d": {
1677
- "model_module": "@jupyter-widgets/controls",
1678
- "model_name": "FloatProgressModel",
1679
- "model_module_version": "1.5.0",
1680
- "state": {
1681
- "_dom_classes": [],
1682
- "_model_module": "@jupyter-widgets/controls",
1683
- "_model_module_version": "1.5.0",
1684
- "_model_name": "FloatProgressModel",
1685
- "_view_count": null,
1686
- "_view_module": "@jupyter-widgets/controls",
1687
- "_view_module_version": "1.5.0",
1688
- "_view_name": "ProgressView",
1689
- "bar_style": "success",
1690
- "description": "",
1691
- "description_tooltip": null,
1692
- "layout": "IPY_MODEL_eb2bfb8ebb2b45b6a67247db552d1df6",
1693
- "max": 147,
1694
- "min": 0,
1695
- "orientation": "horizontal",
1696
- "style": "IPY_MODEL_920c4484283c4355a209bfb5188b6a6e",
1697
- "value": 147
1698
- }
1699
- },
1700
- "d560b2a5ff664d12956c742d51c1646f": {
1701
- "model_module": "@jupyter-widgets/controls",
1702
- "model_name": "HTMLModel",
1703
- "model_module_version": "1.5.0",
1704
- "state": {
1705
- "_dom_classes": [],
1706
- "_model_module": "@jupyter-widgets/controls",
1707
- "_model_module_version": "1.5.0",
1708
- "_model_name": "HTMLModel",
1709
- "_view_count": null,
1710
- "_view_module": "@jupyter-widgets/controls",
1711
- "_view_module_version": "1.5.0",
1712
- "_view_name": "HTMLView",
1713
- "description": "",
1714
- "description_tooltip": null,
1715
- "layout": "IPY_MODEL_3b0cf610ae764791805a2ceafc61adc4",
1716
- "placeholder": "​",
1717
- "style": "IPY_MODEL_a2493b9c90f3421db9b727c1a7396e8d",
1718
- "value": " 147/147 [00:00&lt;00:00, 7.28kB/s]"
1719
- }
1720
- },
1721
- "b205eb1e6d3446f5b75f1d783bfbbcf6": {
1722
- "model_module": "@jupyter-widgets/base",
1723
- "model_name": "LayoutModel",
1724
- "model_module_version": "1.2.0",
1725
- "state": {
1726
- "_model_module": "@jupyter-widgets/base",
1727
- "_model_module_version": "1.2.0",
1728
- "_model_name": "LayoutModel",
1729
- "_view_count": null,
1730
- "_view_module": "@jupyter-widgets/base",
1731
- "_view_module_version": "1.2.0",
1732
- "_view_name": "LayoutView",
1733
- "align_content": null,
1734
- "align_items": null,
1735
- "align_self": null,
1736
- "border": null,
1737
- "bottom": null,
1738
- "display": null,
1739
- "flex": null,
1740
- "flex_flow": null,
1741
- "grid_area": null,
1742
- "grid_auto_columns": null,
1743
- "grid_auto_flow": null,
1744
- "grid_auto_rows": null,
1745
- "grid_column": null,
1746
- "grid_gap": null,
1747
- "grid_row": null,
1748
- "grid_template_areas": null,
1749
- "grid_template_columns": null,
1750
- "grid_template_rows": null,
1751
- "height": null,
1752
- "justify_content": null,
1753
- "justify_items": null,
1754
- "left": null,
1755
- "margin": null,
1756
- "max_height": null,
1757
- "max_width": null,
1758
- "min_height": null,
1759
- "min_width": null,
1760
- "object_fit": null,
1761
- "object_position": null,
1762
- "order": null,
1763
- "overflow": null,
1764
- "overflow_x": null,
1765
- "overflow_y": null,
1766
- "padding": null,
1767
- "right": null,
1768
- "top": null,
1769
- "visibility": null,
1770
- "width": null
1771
- }
1772
- },
1773
- "c2e186e1bbfa4ff0a4b1e679011dc920": {
1774
- "model_module": "@jupyter-widgets/base",
1775
- "model_name": "LayoutModel",
1776
- "model_module_version": "1.2.0",
1777
- "state": {
1778
- "_model_module": "@jupyter-widgets/base",
1779
- "_model_module_version": "1.2.0",
1780
- "_model_name": "LayoutModel",
1781
- "_view_count": null,
1782
- "_view_module": "@jupyter-widgets/base",
1783
- "_view_module_version": "1.2.0",
1784
- "_view_name": "LayoutView",
1785
- "align_content": null,
1786
- "align_items": null,
1787
- "align_self": null,
1788
- "border": null,
1789
- "bottom": null,
1790
- "display": null,
1791
- "flex": null,
1792
- "flex_flow": null,
1793
- "grid_area": null,
1794
- "grid_auto_columns": null,
1795
- "grid_auto_flow": null,
1796
- "grid_auto_rows": null,
1797
- "grid_column": null,
1798
- "grid_gap": null,
1799
- "grid_row": null,
1800
- "grid_template_areas": null,
1801
- "grid_template_columns": null,
1802
- "grid_template_rows": null,
1803
- "height": null,
1804
- "justify_content": null,
1805
- "justify_items": null,
1806
- "left": null,
1807
- "margin": null,
1808
- "max_height": null,
1809
- "max_width": null,
1810
- "min_height": null,
1811
- "min_width": null,
1812
- "object_fit": null,
1813
- "object_position": null,
1814
- "order": null,
1815
- "overflow": null,
1816
- "overflow_x": null,
1817
- "overflow_y": null,
1818
- "padding": null,
1819
- "right": null,
1820
- "top": null,
1821
- "visibility": null,
1822
- "width": null
1823
- }
1824
- },
1825
- "a49d8021b0074bafabd89995b0e5a0e7": {
1826
- "model_module": "@jupyter-widgets/controls",
1827
- "model_name": "DescriptionStyleModel",
1828
- "model_module_version": "1.5.0",
1829
- "state": {
1830
- "_model_module": "@jupyter-widgets/controls",
1831
- "_model_module_version": "1.5.0",
1832
- "_model_name": "DescriptionStyleModel",
1833
- "_view_count": null,
1834
- "_view_module": "@jupyter-widgets/base",
1835
- "_view_module_version": "1.2.0",
1836
- "_view_name": "StyleView",
1837
- "description_width": ""
1838
- }
1839
- },
1840
- "eb2bfb8ebb2b45b6a67247db552d1df6": {
1841
- "model_module": "@jupyter-widgets/base",
1842
- "model_name": "LayoutModel",
1843
- "model_module_version": "1.2.0",
1844
- "state": {
1845
- "_model_module": "@jupyter-widgets/base",
1846
- "_model_module_version": "1.2.0",
1847
- "_model_name": "LayoutModel",
1848
- "_view_count": null,
1849
- "_view_module": "@jupyter-widgets/base",
1850
- "_view_module_version": "1.2.0",
1851
- "_view_name": "LayoutView",
1852
- "align_content": null,
1853
- "align_items": null,
1854
- "align_self": null,
1855
- "border": null,
1856
- "bottom": null,
1857
- "display": null,
1858
- "flex": null,
1859
- "flex_flow": null,
1860
- "grid_area": null,
1861
- "grid_auto_columns": null,
1862
- "grid_auto_flow": null,
1863
- "grid_auto_rows": null,
1864
- "grid_column": null,
1865
- "grid_gap": null,
1866
- "grid_row": null,
1867
- "grid_template_areas": null,
1868
- "grid_template_columns": null,
1869
- "grid_template_rows": null,
1870
- "height": null,
1871
- "justify_content": null,
1872
- "justify_items": null,
1873
- "left": null,
1874
- "margin": null,
1875
- "max_height": null,
1876
- "max_width": null,
1877
- "min_height": null,
1878
- "min_width": null,
1879
- "object_fit": null,
1880
- "object_position": null,
1881
- "order": null,
1882
- "overflow": null,
1883
- "overflow_x": null,
1884
- "overflow_y": null,
1885
- "padding": null,
1886
- "right": null,
1887
- "top": null,
1888
- "visibility": null,
1889
- "width": null
1890
- }
1891
- },
1892
- "920c4484283c4355a209bfb5188b6a6e": {
1893
- "model_module": "@jupyter-widgets/controls",
1894
- "model_name": "ProgressStyleModel",
1895
- "model_module_version": "1.5.0",
1896
- "state": {
1897
- "_model_module": "@jupyter-widgets/controls",
1898
- "_model_module_version": "1.5.0",
1899
- "_model_name": "ProgressStyleModel",
1900
- "_view_count": null,
1901
- "_view_module": "@jupyter-widgets/base",
1902
- "_view_module_version": "1.2.0",
1903
- "_view_name": "StyleView",
1904
- "bar_color": null,
1905
- "description_width": ""
1906
- }
1907
- },
1908
- "3b0cf610ae764791805a2ceafc61adc4": {
1909
- "model_module": "@jupyter-widgets/base",
1910
- "model_name": "LayoutModel",
1911
- "model_module_version": "1.2.0",
1912
- "state": {
1913
- "_model_module": "@jupyter-widgets/base",
1914
- "_model_module_version": "1.2.0",
1915
- "_model_name": "LayoutModel",
1916
- "_view_count": null,
1917
- "_view_module": "@jupyter-widgets/base",
1918
- "_view_module_version": "1.2.0",
1919
- "_view_name": "LayoutView",
1920
- "align_content": null,
1921
- "align_items": null,
1922
- "align_self": null,
1923
- "border": null,
1924
- "bottom": null,
1925
- "display": null,
1926
- "flex": null,
1927
- "flex_flow": null,
1928
- "grid_area": null,
1929
- "grid_auto_columns": null,
1930
- "grid_auto_flow": null,
1931
- "grid_auto_rows": null,
1932
- "grid_column": null,
1933
- "grid_gap": null,
1934
- "grid_row": null,
1935
- "grid_template_areas": null,
1936
- "grid_template_columns": null,
1937
- "grid_template_rows": null,
1938
- "height": null,
1939
- "justify_content": null,
1940
- "justify_items": null,
1941
- "left": null,
1942
- "margin": null,
1943
- "max_height": null,
1944
- "max_width": null,
1945
- "min_height": null,
1946
- "min_width": null,
1947
- "object_fit": null,
1948
- "object_position": null,
1949
- "order": null,
1950
- "overflow": null,
1951
- "overflow_x": null,
1952
- "overflow_y": null,
1953
- "padding": null,
1954
- "right": null,
1955
- "top": null,
1956
- "visibility": null,
1957
- "width": null
1958
- }
1959
- },
1960
- "a2493b9c90f3421db9b727c1a7396e8d": {
1961
  "model_module": "@jupyter-widgets/controls",
1962
  "model_name": "DescriptionStyleModel",
1963
  "model_module_version": "1.5.0",
@@ -1973,7 +1076,8 @@
1973
  }
1974
  }
1975
  }
1976
- }
 
1977
  },
1978
  "nbformat": 4,
1979
  "nbformat_minor": 5
 
13
  "source": [
14
  "import torch\n",
15
  "from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, TrainingArguments\n",
16
+ "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")"
 
17
  ],
18
  "outputs": [],
19
+ "execution_count": 2
20
  },
21
  {
22
  "cell_type": "code",
 
28
  },
29
  "colab": {
30
  "base_uri": "https://localhost:8080/",
31
+ "height": 205,
32
  "referenced_widgets": [
33
+ "6ecdc71d497b4ab7bc6dca2ace0bd656",
34
+ "2af56a7c045d4bc294c6cb6d362a8120",
35
+ "f5611ea1eab5406eb5796ffed1218a0c",
36
+ "68855bdfbeed46e7b7e3d82a9b4c0988",
37
+ "24b9e5603be54e72b2bcf99be716b97d",
38
+ "64c42ce2cab04fb0aed8e65efebdfd11",
39
+ "85fc022f4f1841e480af3f5496f36fe0",
40
+ "2e384f5f20db45579dc708ede8b15c87",
41
+ "c9409991f35d4966989cf936f88fe99a",
42
+ "0f4ba7cacc6d49599f3178b738092e09",
43
+ "ddf9f8decaf948bbb67b8f71610c31b1",
44
+ "46756e51804c48ebbabee753ea455457",
45
+ "f98f7a5726d6434d9087fe521e136795",
46
+ "cc3c582d99d24da1b7bf1f8168961749",
47
+ "81b3bc430cc7409f9f51cd222fb0ca0e",
48
+ "f66bc695f26e4603ac38f23168e65763",
49
+ "d4a74b31d8064a5bba43849ed4b6658b",
50
+ "72731803b79343b0b6a61d722009b6b6",
51
+ "0ff317589550455f8eef5021912dd27c",
52
+ "7e110f5c777442c4aab4ea06a27a3dc1",
53
+ "ae1843d43b0f4199ba61e8c9e962f2b3",
54
+ "d72cd66e520a4eea83c06d7df23ccc0e"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  ]
56
  },
57
  "id": "df5ce2489db64f8d",
58
+ "outputId": "34a739e5-75cd-4f03-b6b2-15e4b767bb64"
59
  },
60
  "source": [
61
+ "tokenizer = AutoTokenizer.from_pretrained(\"google/flan-t5-xl\")\n",
62
+ "model = AutoModelForSeq2SeqLM.from_pretrained(\"google/flan-t5-xl\")"
63
  ],
64
  "outputs": [
65
  {
 
71
  "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n",
72
  "You will be able to reuse this secret in all of your notebooks.\n",
73
  "Please note that authentication is recommended but still optional to access public models or datasets.\n",
 
 
74
  " warnings.warn(\n"
75
  ]
76
  },
 
78
  "output_type": "display_data",
79
  "data": {
80
  "text/plain": [
81
+ "Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s]"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  ],
83
  "application/vnd.jupyter.widget-view+json": {
84
  "version_major": 2,
85
  "version_minor": 0,
86
+ "model_id": "6ecdc71d497b4ab7bc6dca2ace0bd656"
87
  }
88
  },
89
  "metadata": {}
 
92
  "output_type": "display_data",
93
  "data": {
94
  "text/plain": [
95
+ "generation_config.json: 0%| | 0.00/147 [00:00<?, ?B/s]"
96
  ],
97
  "application/vnd.jupyter.widget-view+json": {
98
  "version_major": 2,
99
  "version_minor": 0,
100
+ "model_id": "46756e51804c48ebbabee753ea455457"
101
  }
102
  },
103
  "metadata": {}
104
+ }
105
+ ],
106
+ "execution_count": 3
107
+ },
108
+ {
109
+ "cell_type": "code",
110
+ "source": [
111
+ "model.to(device)"
112
+ ],
113
+ "metadata": {
114
+ "collapsed": true,
115
+ "id": "WJ623uJ0XR60",
116
+ "outputId": "54381bb6-1b49-4478-e5b8-b20b03dc388a",
117
+ "colab": {
118
+ "base_uri": "https://localhost:8080/"
119
+ }
120
+ },
121
+ "id": "WJ623uJ0XR60",
122
+ "execution_count": 4,
123
+ "outputs": [
124
  {
125
+ "output_type": "execute_result",
126
  "data": {
127
  "text/plain": [
128
+ "T5ForConditionalGeneration(\n",
129
+ " (shared): Embedding(32128, 2048)\n",
130
+ " (encoder): T5Stack(\n",
131
+ " (embed_tokens): Embedding(32128, 2048)\n",
132
+ " (block): ModuleList(\n",
133
+ " (0): T5Block(\n",
134
+ " (layer): ModuleList(\n",
135
+ " (0): T5LayerSelfAttention(\n",
136
+ " (SelfAttention): T5Attention(\n",
137
+ " (q): Linear(in_features=2048, out_features=2048, bias=False)\n",
138
+ " (k): Linear(in_features=2048, out_features=2048, bias=False)\n",
139
+ " (v): Linear(in_features=2048, out_features=2048, bias=False)\n",
140
+ " (o): Linear(in_features=2048, out_features=2048, bias=False)\n",
141
+ " (relative_attention_bias): Embedding(32, 32)\n",
142
+ " )\n",
143
+ " (layer_norm): T5LayerNorm()\n",
144
+ " (dropout): Dropout(p=0.1, inplace=False)\n",
145
+ " )\n",
146
+ " (1): T5LayerFF(\n",
147
+ " (DenseReluDense): T5DenseGatedActDense(\n",
148
+ " (wi_0): Linear(in_features=2048, out_features=5120, bias=False)\n",
149
+ " (wi_1): Linear(in_features=2048, out_features=5120, bias=False)\n",
150
+ " (wo): Linear(in_features=5120, out_features=2048, bias=False)\n",
151
+ " (dropout): Dropout(p=0.1, inplace=False)\n",
152
+ " (act): NewGELUActivation()\n",
153
+ " )\n",
154
+ " (layer_norm): T5LayerNorm()\n",
155
+ " (dropout): Dropout(p=0.1, inplace=False)\n",
156
+ " )\n",
157
+ " )\n",
158
+ " )\n",
159
+ " (1-23): 23 x T5Block(\n",
160
+ " (layer): ModuleList(\n",
161
+ " (0): T5LayerSelfAttention(\n",
162
+ " (SelfAttention): T5Attention(\n",
163
+ " (q): Linear(in_features=2048, out_features=2048, bias=False)\n",
164
+ " (k): Linear(in_features=2048, out_features=2048, bias=False)\n",
165
+ " (v): Linear(in_features=2048, out_features=2048, bias=False)\n",
166
+ " (o): Linear(in_features=2048, out_features=2048, bias=False)\n",
167
+ " )\n",
168
+ " (layer_norm): T5LayerNorm()\n",
169
+ " (dropout): Dropout(p=0.1, inplace=False)\n",
170
+ " )\n",
171
+ " (1): T5LayerFF(\n",
172
+ " (DenseReluDense): T5DenseGatedActDense(\n",
173
+ " (wi_0): Linear(in_features=2048, out_features=5120, bias=False)\n",
174
+ " (wi_1): Linear(in_features=2048, out_features=5120, bias=False)\n",
175
+ " (wo): Linear(in_features=5120, out_features=2048, bias=False)\n",
176
+ " (dropout): Dropout(p=0.1, inplace=False)\n",
177
+ " (act): NewGELUActivation()\n",
178
+ " )\n",
179
+ " (layer_norm): T5LayerNorm()\n",
180
+ " (dropout): Dropout(p=0.1, inplace=False)\n",
181
+ " )\n",
182
+ " )\n",
183
+ " )\n",
184
+ " )\n",
185
+ " (final_layer_norm): T5LayerNorm()\n",
186
+ " (dropout): Dropout(p=0.1, inplace=False)\n",
187
+ " )\n",
188
+ " (decoder): T5Stack(\n",
189
+ " (embed_tokens): Embedding(32128, 2048)\n",
190
+ " (block): ModuleList(\n",
191
+ " (0): T5Block(\n",
192
+ " (layer): ModuleList(\n",
193
+ " (0): T5LayerSelfAttention(\n",
194
+ " (SelfAttention): T5Attention(\n",
195
+ " (q): Linear(in_features=2048, out_features=2048, bias=False)\n",
196
+ " (k): Linear(in_features=2048, out_features=2048, bias=False)\n",
197
+ " (v): Linear(in_features=2048, out_features=2048, bias=False)\n",
198
+ " (o): Linear(in_features=2048, out_features=2048, bias=False)\n",
199
+ " (relative_attention_bias): Embedding(32, 32)\n",
200
+ " )\n",
201
+ " (layer_norm): T5LayerNorm()\n",
202
+ " (dropout): Dropout(p=0.1, inplace=False)\n",
203
+ " )\n",
204
+ " (1): T5LayerCrossAttention(\n",
205
+ " (EncDecAttention): T5Attention(\n",
206
+ " (q): Linear(in_features=2048, out_features=2048, bias=False)\n",
207
+ " (k): Linear(in_features=2048, out_features=2048, bias=False)\n",
208
+ " (v): Linear(in_features=2048, out_features=2048, bias=False)\n",
209
+ " (o): Linear(in_features=2048, out_features=2048, bias=False)\n",
210
+ " )\n",
211
+ " (layer_norm): T5LayerNorm()\n",
212
+ " (dropout): Dropout(p=0.1, inplace=False)\n",
213
+ " )\n",
214
+ " (2): T5LayerFF(\n",
215
+ " (DenseReluDense): T5DenseGatedActDense(\n",
216
+ " (wi_0): Linear(in_features=2048, out_features=5120, bias=False)\n",
217
+ " (wi_1): Linear(in_features=2048, out_features=5120, bias=False)\n",
218
+ " (wo): Linear(in_features=5120, out_features=2048, bias=False)\n",
219
+ " (dropout): Dropout(p=0.1, inplace=False)\n",
220
+ " (act): NewGELUActivation()\n",
221
+ " )\n",
222
+ " (layer_norm): T5LayerNorm()\n",
223
+ " (dropout): Dropout(p=0.1, inplace=False)\n",
224
+ " )\n",
225
+ " )\n",
226
+ " )\n",
227
+ " (1-23): 23 x T5Block(\n",
228
+ " (layer): ModuleList(\n",
229
+ " (0): T5LayerSelfAttention(\n",
230
+ " (SelfAttention): T5Attention(\n",
231
+ " (q): Linear(in_features=2048, out_features=2048, bias=False)\n",
232
+ " (k): Linear(in_features=2048, out_features=2048, bias=False)\n",
233
+ " (v): Linear(in_features=2048, out_features=2048, bias=False)\n",
234
+ " (o): Linear(in_features=2048, out_features=2048, bias=False)\n",
235
+ " )\n",
236
+ " (layer_norm): T5LayerNorm()\n",
237
+ " (dropout): Dropout(p=0.1, inplace=False)\n",
238
+ " )\n",
239
+ " (1): T5LayerCrossAttention(\n",
240
+ " (EncDecAttention): T5Attention(\n",
241
+ " (q): Linear(in_features=2048, out_features=2048, bias=False)\n",
242
+ " (k): Linear(in_features=2048, out_features=2048, bias=False)\n",
243
+ " (v): Linear(in_features=2048, out_features=2048, bias=False)\n",
244
+ " (o): Linear(in_features=2048, out_features=2048, bias=False)\n",
245
+ " )\n",
246
+ " (layer_norm): T5LayerNorm()\n",
247
+ " (dropout): Dropout(p=0.1, inplace=False)\n",
248
+ " )\n",
249
+ " (2): T5LayerFF(\n",
250
+ " (DenseReluDense): T5DenseGatedActDense(\n",
251
+ " (wi_0): Linear(in_features=2048, out_features=5120, bias=False)\n",
252
+ " (wi_1): Linear(in_features=2048, out_features=5120, bias=False)\n",
253
+ " (wo): Linear(in_features=5120, out_features=2048, bias=False)\n",
254
+ " (dropout): Dropout(p=0.1, inplace=False)\n",
255
+ " (act): NewGELUActivation()\n",
256
+ " )\n",
257
+ " (layer_norm): T5LayerNorm()\n",
258
+ " (dropout): Dropout(p=0.1, inplace=False)\n",
259
+ " )\n",
260
+ " )\n",
261
+ " )\n",
262
+ " )\n",
263
+ " (final_layer_norm): T5LayerNorm()\n",
264
+ " (dropout): Dropout(p=0.1, inplace=False)\n",
265
+ " )\n",
266
+ " (lm_head): Linear(in_features=2048, out_features=32128, bias=False)\n",
267
+ ")"
268
+ ]
269
  },
270
+ "metadata": {},
271
+ "execution_count": 4
272
  }
273
+ ]
274
+ },
275
+ {
276
+ "cell_type": "code",
277
+ "source": [
278
+ "from transformers import Trainer, TrainingArguments\n",
279
+ "\n",
280
+ "# Define training arguments\n",
281
+ "training_args = TrainingArguments(\n",
282
+ " output_dir=\"./output\",\n",
283
+ " num_train_epochs=3,\n",
284
+ " per_device_train_batch_size=8,\n",
285
+ " # ... other training arguments\n",
286
+ ")\n",
287
+ "\n",
288
+ "# Create a Trainer object\n",
289
+ "trainer = Trainer(\n",
290
+ " model=model,\n",
291
+ " args=training_args,\n",
292
+ " train_dataset=Financial.csv, # Load your training dataset\n",
293
+ ")\n",
294
+ "\n",
295
+ "# Start training\n",
296
+ "trainer.train()"
297
  ],
298
+ "metadata": {
299
+ "id": "j_Y33_fMfGd9"
300
+ },
301
+ "id": "j_Y33_fMfGd9",
302
+ "execution_count": null,
303
+ "outputs": []
304
  },
305
  {
306
  "cell_type": "code",
 
313
  "id": "7ce8ee88e61ac738"
314
  },
315
  "source": [
316
+ "def get_response(prompt, max_new_tokens=50):\n",
317
+ " inputs = tokenizer(prompt, return_tensors=\"pt\").to(device)\n",
318
+ " outputs = model.generate(**inputs, max_new_tokens=max_new_tokens, temperature= 0.0001, do_sample=True)\n",
319
+ " response = tokenizer.decode(outputs[0], skip_special_tokens=True) # Use indexing instead of calling\n",
320
  " return response"
321
  ],
322
  "outputs": [],
323
+ "execution_count": 15
324
  },
325
  {
326
  "metadata": {
 
329
  },
330
  "colab": {
331
  "base_uri": "https://localhost:8080/",
332
+ "height": 36
333
  },
334
  "id": "de9f0fcc6dc9fa82",
335
+ "outputId": "91cc9e78-4e4e-4f29-99b4-94fdaedccd1b"
336
  },
337
  "cell_type": "code",
338
  "source": [
339
+ "prompt ='What is capital of Madhya Pradesh'\n",
340
+ "get_response(prompt, max_new_tokens=50)"
341
  ],
342
  "id": "de9f0fcc6dc9fa82",
343
  "outputs": [
 
345
  "output_type": "execute_result",
346
  "data": {
347
  "text/plain": [
348
+ "'bhopal'"
349
  ],
350
  "application/vnd.google.colaboratory.intrinsic+json": {
351
  "type": "string"
352
  }
353
  },
354
  "metadata": {},
355
+ "execution_count": 34
356
  }
357
  ],
358
+ "execution_count": 34
359
+ },
360
+ {
361
+ "cell_type": "code",
362
+ "source": [],
363
+ "metadata": {
364
+ "id": "s77Nu-3UaeZ9"
365
+ },
366
+ "id": "s77Nu-3UaeZ9",
367
+ "execution_count": null,
368
+ "outputs": []
369
  }
370
  ],
371
  "metadata": {
372
  "kernelspec": {
373
+ "display_name": "Python 3",
 
374
  "name": "python3"
375
  },
376
  "language_info": {
 
386
  "version": "3.12.4"
387
  },
388
  "colab": {
389
+ "provenance": [],
390
+ "gpuType": "T4"
391
  },
392
  "widgets": {
393
  "application/vnd.jupyter.widget-state+json": {
394
+ "6ecdc71d497b4ab7bc6dca2ace0bd656": {
395
  "model_module": "@jupyter-widgets/controls",
396
  "model_name": "HBoxModel",
397
  "model_module_version": "1.5.0",
 
406
  "_view_name": "HBoxView",
407
  "box_style": "",
408
  "children": [
409
+ "IPY_MODEL_2af56a7c045d4bc294c6cb6d362a8120",
410
+ "IPY_MODEL_f5611ea1eab5406eb5796ffed1218a0c",
411
+ "IPY_MODEL_68855bdfbeed46e7b7e3d82a9b4c0988"
412
  ],
413
+ "layout": "IPY_MODEL_24b9e5603be54e72b2bcf99be716b97d"
414
  }
415
  },
416
+ "2af56a7c045d4bc294c6cb6d362a8120": {
417
  "model_module": "@jupyter-widgets/controls",
418
  "model_name": "HTMLModel",
419
  "model_module_version": "1.5.0",
 
428
  "_view_name": "HTMLView",
429
  "description": "",
430
  "description_tooltip": null,
431
+ "layout": "IPY_MODEL_64c42ce2cab04fb0aed8e65efebdfd11",
432
  "placeholder": "​",
433
+ "style": "IPY_MODEL_85fc022f4f1841e480af3f5496f36fe0",
434
+ "value": "Loading checkpoint shards: 100%"
435
  }
436
  },
437
+ "f5611ea1eab5406eb5796ffed1218a0c": {
438
  "model_module": "@jupyter-widgets/controls",
439
  "model_name": "FloatProgressModel",
440
  "model_module_version": "1.5.0",
 
450
  "bar_style": "success",
451
  "description": "",
452
  "description_tooltip": null,
453
+ "layout": "IPY_MODEL_2e384f5f20db45579dc708ede8b15c87",
454
+ "max": 2,
455
  "min": 0,
456
  "orientation": "horizontal",
457
+ "style": "IPY_MODEL_c9409991f35d4966989cf936f88fe99a",
458
+ "value": 2
459
  }
460
  },
461
+ "68855bdfbeed46e7b7e3d82a9b4c0988": {
462
  "model_module": "@jupyter-widgets/controls",
463
  "model_name": "HTMLModel",
464
  "model_module_version": "1.5.0",
 
473
  "_view_name": "HTMLView",
474
  "description": "",
475
  "description_tooltip": null,
476
+ "layout": "IPY_MODEL_0f4ba7cacc6d49599f3178b738092e09",
477
  "placeholder": "​",
478
+ "style": "IPY_MODEL_ddf9f8decaf948bbb67b8f71610c31b1",
479
+ "value": " 2/2 [00:52&lt;00:00, 23.08s/it]"
480
  }
481
  },
482
+ "24b9e5603be54e72b2bcf99be716b97d": {
483
  "model_module": "@jupyter-widgets/base",
484
  "model_name": "LayoutModel",
485
  "model_module_version": "1.2.0",
 
531
  "width": null
532
  }
533
  },
534
+ "64c42ce2cab04fb0aed8e65efebdfd11": {
535
  "model_module": "@jupyter-widgets/base",
536
  "model_name": "LayoutModel",
537
  "model_module_version": "1.2.0",
 
583
  "width": null
584
  }
585
  },
586
+ "85fc022f4f1841e480af3f5496f36fe0": {
587
  "model_module": "@jupyter-widgets/controls",
588
  "model_name": "DescriptionStyleModel",
589
  "model_module_version": "1.5.0",
 
598
  "description_width": ""
599
  }
600
  },
601
+ "2e384f5f20db45579dc708ede8b15c87": {
602
  "model_module": "@jupyter-widgets/base",
603
  "model_name": "LayoutModel",
604
  "model_module_version": "1.2.0",
 
650
  "width": null
651
  }
652
  },
653
+ "c9409991f35d4966989cf936f88fe99a": {
654
  "model_module": "@jupyter-widgets/controls",
655
  "model_name": "ProgressStyleModel",
656
  "model_module_version": "1.5.0",
 
666
  "description_width": ""
667
  }
668
  },
669
+ "0f4ba7cacc6d49599f3178b738092e09": {
670
  "model_module": "@jupyter-widgets/base",
671
  "model_name": "LayoutModel",
672
  "model_module_version": "1.2.0",
 
718
  "width": null
719
  }
720
  },
721
+ "ddf9f8decaf948bbb67b8f71610c31b1": {
722
  "model_module": "@jupyter-widgets/controls",
723
  "model_name": "DescriptionStyleModel",
724
  "model_module_version": "1.5.0",
 
733
  "description_width": ""
734
  }
735
  },
736
+ "46756e51804c48ebbabee753ea455457": {
737
  "model_module": "@jupyter-widgets/controls",
738
  "model_name": "HBoxModel",
739
  "model_module_version": "1.5.0",
 
748
  "_view_name": "HBoxView",
749
  "box_style": "",
750
  "children": [
751
+ "IPY_MODEL_f98f7a5726d6434d9087fe521e136795",
752
+ "IPY_MODEL_cc3c582d99d24da1b7bf1f8168961749",
753
+ "IPY_MODEL_81b3bc430cc7409f9f51cd222fb0ca0e"
754
  ],
755
+ "layout": "IPY_MODEL_f66bc695f26e4603ac38f23168e65763"
756
  }
757
  },
758
+ "f98f7a5726d6434d9087fe521e136795": {
759
  "model_module": "@jupyter-widgets/controls",
760
  "model_name": "HTMLModel",
761
  "model_module_version": "1.5.0",
 
770
  "_view_name": "HTMLView",
771
  "description": "",
772
  "description_tooltip": null,
773
+ "layout": "IPY_MODEL_d4a74b31d8064a5bba43849ed4b6658b",
774
  "placeholder": "​",
775
+ "style": "IPY_MODEL_72731803b79343b0b6a61d722009b6b6",
776
+ "value": "generation_config.json: 100%"
777
  }
778
  },
779
+ "cc3c582d99d24da1b7bf1f8168961749": {
780
  "model_module": "@jupyter-widgets/controls",
781
  "model_name": "FloatProgressModel",
782
  "model_module_version": "1.5.0",
 
792
  "bar_style": "success",
793
  "description": "",
794
  "description_tooltip": null,
795
+ "layout": "IPY_MODEL_0ff317589550455f8eef5021912dd27c",
796
+ "max": 147,
797
  "min": 0,
798
  "orientation": "horizontal",
799
+ "style": "IPY_MODEL_7e110f5c777442c4aab4ea06a27a3dc1",
800
+ "value": 147
801
  }
802
  },
803
+ "81b3bc430cc7409f9f51cd222fb0ca0e": {
804
  "model_module": "@jupyter-widgets/controls",
805
  "model_name": "HTMLModel",
806
  "model_module_version": "1.5.0",
 
815
  "_view_name": "HTMLView",
816
  "description": "",
817
  "description_tooltip": null,
818
+ "layout": "IPY_MODEL_ae1843d43b0f4199ba61e8c9e962f2b3",
819
  "placeholder": "​",
820
+ "style": "IPY_MODEL_d72cd66e520a4eea83c06d7df23ccc0e",
821
+ "value": " 147/147 [00:00&lt;00:00, 7.99kB/s]"
822
  }
823
  },
824
+ "f66bc695f26e4603ac38f23168e65763": {
825
  "model_module": "@jupyter-widgets/base",
826
  "model_name": "LayoutModel",
827
  "model_module_version": "1.2.0",
 
873
  "width": null
874
  }
875
  },
876
+ "d4a74b31d8064a5bba43849ed4b6658b": {
877
  "model_module": "@jupyter-widgets/base",
878
  "model_name": "LayoutModel",
879
  "model_module_version": "1.2.0",
 
925
  "width": null
926
  }
927
  },
928
+ "72731803b79343b0b6a61d722009b6b6": {
929
  "model_module": "@jupyter-widgets/controls",
930
  "model_name": "DescriptionStyleModel",
931
  "model_module_version": "1.5.0",
 
940
  "description_width": ""
941
  }
942
  },
943
+ "0ff317589550455f8eef5021912dd27c": {
944
  "model_module": "@jupyter-widgets/base",
945
  "model_name": "LayoutModel",
946
  "model_module_version": "1.2.0",
 
992
  "width": null
993
  }
994
  },
995
+ "7e110f5c777442c4aab4ea06a27a3dc1": {
996
  "model_module": "@jupyter-widgets/controls",
997
  "model_name": "ProgressStyleModel",
998
  "model_module_version": "1.5.0",
 
1008
  "description_width": ""
1009
  }
1010
  },
1011
+ "ae1843d43b0f4199ba61e8c9e962f2b3": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1012
  "model_module": "@jupyter-widgets/base",
1013
  "model_name": "LayoutModel",
1014
  "model_module_version": "1.2.0",
 
1060
  "width": null
1061
  }
1062
  },
1063
+ "d72cd66e520a4eea83c06d7df23ccc0e": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1064
  "model_module": "@jupyter-widgets/controls",
1065
  "model_name": "DescriptionStyleModel",
1066
  "model_module_version": "1.5.0",
 
1076
  }
1077
  }
1078
  }
1079
+ },
1080
+ "accelerator": "GPU"
1081
  },
1082
  "nbformat": 4,
1083
  "nbformat_minor": 5