Upload RecurrentGemma_colab.ipynb

#1
by reach-vb HF staff - opened
Files changed (1) hide show
  1. RecurrentGemma_colab.ipynb +1308 -0
RecurrentGemma_colab.ipynb ADDED
@@ -0,0 +1,1308 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "nbformat": 4,
3
+ "nbformat_minor": 0,
4
+ "metadata": {
5
+ "colab": {
6
+ "provenance": [],
7
+ "gpuType": "T4"
8
+ },
9
+ "kernelspec": {
10
+ "name": "python3",
11
+ "display_name": "Python 3"
12
+ },
13
+ "language_info": {
14
+ "name": "python"
15
+ },
16
+ "accelerator": "GPU",
17
+ "widgets": {
18
+ "application/vnd.jupyter.widget-state+json": {
19
+ "e6f2b94e3bb345859811226dc345a6e3": {
20
+ "model_module": "@jupyter-widgets/controls",
21
+ "model_name": "HBoxModel",
22
+ "model_module_version": "1.5.0",
23
+ "state": {
24
+ "_dom_classes": [],
25
+ "_model_module": "@jupyter-widgets/controls",
26
+ "_model_module_version": "1.5.0",
27
+ "_model_name": "HBoxModel",
28
+ "_view_count": null,
29
+ "_view_module": "@jupyter-widgets/controls",
30
+ "_view_module_version": "1.5.0",
31
+ "_view_name": "HBoxView",
32
+ "box_style": "",
33
+ "children": [
34
+ "IPY_MODEL_ce12a18f666740eabe1f71be5152e7d8",
35
+ "IPY_MODEL_76a4282443194092af1039de43c523c6",
36
+ "IPY_MODEL_3775878f2e484e84b6cb0971d2d35a7f"
37
+ ],
38
+ "layout": "IPY_MODEL_df936531829f4c1580c04719de77059c"
39
+ }
40
+ },
41
+ "ce12a18f666740eabe1f71be5152e7d8": {
42
+ "model_module": "@jupyter-widgets/controls",
43
+ "model_name": "HTMLModel",
44
+ "model_module_version": "1.5.0",
45
+ "state": {
46
+ "_dom_classes": [],
47
+ "_model_module": "@jupyter-widgets/controls",
48
+ "_model_module_version": "1.5.0",
49
+ "_model_name": "HTMLModel",
50
+ "_view_count": null,
51
+ "_view_module": "@jupyter-widgets/controls",
52
+ "_view_module_version": "1.5.0",
53
+ "_view_name": "HTMLView",
54
+ "description": "",
55
+ "description_tooltip": null,
56
+ "layout": "IPY_MODEL_ab60c88077984531aa08150c72fa7ab5",
57
+ "placeholder": "​",
58
+ "style": "IPY_MODEL_ad9901a17d3d4b1b898941d16c16f1cb",
59
+ "value": "tokenizer_config.json: 100%"
60
+ }
61
+ },
62
+ "76a4282443194092af1039de43c523c6": {
63
+ "model_module": "@jupyter-widgets/controls",
64
+ "model_name": "FloatProgressModel",
65
+ "model_module_version": "1.5.0",
66
+ "state": {
67
+ "_dom_classes": [],
68
+ "_model_module": "@jupyter-widgets/controls",
69
+ "_model_module_version": "1.5.0",
70
+ "_model_name": "FloatProgressModel",
71
+ "_view_count": null,
72
+ "_view_module": "@jupyter-widgets/controls",
73
+ "_view_module_version": "1.5.0",
74
+ "_view_name": "ProgressView",
75
+ "bar_style": "success",
76
+ "description": "",
77
+ "description_tooltip": null,
78
+ "layout": "IPY_MODEL_f2fbf227d64e451681083a0ca189405c",
79
+ "max": 40529,
80
+ "min": 0,
81
+ "orientation": "horizontal",
82
+ "style": "IPY_MODEL_72364dfb4d994115bfd049dae5f53423",
83
+ "value": 40529
84
+ }
85
+ },
86
+ "3775878f2e484e84b6cb0971d2d35a7f": {
87
+ "model_module": "@jupyter-widgets/controls",
88
+ "model_name": "HTMLModel",
89
+ "model_module_version": "1.5.0",
90
+ "state": {
91
+ "_dom_classes": [],
92
+ "_model_module": "@jupyter-widgets/controls",
93
+ "_model_module_version": "1.5.0",
94
+ "_model_name": "HTMLModel",
95
+ "_view_count": null,
96
+ "_view_module": "@jupyter-widgets/controls",
97
+ "_view_module_version": "1.5.0",
98
+ "_view_name": "HTMLView",
99
+ "description": "",
100
+ "description_tooltip": null,
101
+ "layout": "IPY_MODEL_fcbe52122eeb466d91b44011f7f9bc47",
102
+ "placeholder": "​",
103
+ "style": "IPY_MODEL_c0915cfb93634ede86568eface4115d8",
104
+ "value": " 40.5k/40.5k [00:00<00:00, 670kB/s]"
105
+ }
106
+ },
107
+ "df936531829f4c1580c04719de77059c": {
108
+ "model_module": "@jupyter-widgets/base",
109
+ "model_name": "LayoutModel",
110
+ "model_module_version": "1.2.0",
111
+ "state": {
112
+ "_model_module": "@jupyter-widgets/base",
113
+ "_model_module_version": "1.2.0",
114
+ "_model_name": "LayoutModel",
115
+ "_view_count": null,
116
+ "_view_module": "@jupyter-widgets/base",
117
+ "_view_module_version": "1.2.0",
118
+ "_view_name": "LayoutView",
119
+ "align_content": null,
120
+ "align_items": null,
121
+ "align_self": null,
122
+ "border": null,
123
+ "bottom": null,
124
+ "display": null,
125
+ "flex": null,
126
+ "flex_flow": null,
127
+ "grid_area": null,
128
+ "grid_auto_columns": null,
129
+ "grid_auto_flow": null,
130
+ "grid_auto_rows": null,
131
+ "grid_column": null,
132
+ "grid_gap": null,
133
+ "grid_row": null,
134
+ "grid_template_areas": null,
135
+ "grid_template_columns": null,
136
+ "grid_template_rows": null,
137
+ "height": null,
138
+ "justify_content": null,
139
+ "justify_items": null,
140
+ "left": null,
141
+ "margin": null,
142
+ "max_height": null,
143
+ "max_width": null,
144
+ "min_height": null,
145
+ "min_width": null,
146
+ "object_fit": null,
147
+ "object_position": null,
148
+ "order": null,
149
+ "overflow": null,
150
+ "overflow_x": null,
151
+ "overflow_y": null,
152
+ "padding": null,
153
+ "right": null,
154
+ "top": null,
155
+ "visibility": null,
156
+ "width": null
157
+ }
158
+ },
159
+ "ab60c88077984531aa08150c72fa7ab5": {
160
+ "model_module": "@jupyter-widgets/base",
161
+ "model_name": "LayoutModel",
162
+ "model_module_version": "1.2.0",
163
+ "state": {
164
+ "_model_module": "@jupyter-widgets/base",
165
+ "_model_module_version": "1.2.0",
166
+ "_model_name": "LayoutModel",
167
+ "_view_count": null,
168
+ "_view_module": "@jupyter-widgets/base",
169
+ "_view_module_version": "1.2.0",
170
+ "_view_name": "LayoutView",
171
+ "align_content": null,
172
+ "align_items": null,
173
+ "align_self": null,
174
+ "border": null,
175
+ "bottom": null,
176
+ "display": null,
177
+ "flex": null,
178
+ "flex_flow": null,
179
+ "grid_area": null,
180
+ "grid_auto_columns": null,
181
+ "grid_auto_flow": null,
182
+ "grid_auto_rows": null,
183
+ "grid_column": null,
184
+ "grid_gap": null,
185
+ "grid_row": null,
186
+ "grid_template_areas": null,
187
+ "grid_template_columns": null,
188
+ "grid_template_rows": null,
189
+ "height": null,
190
+ "justify_content": null,
191
+ "justify_items": null,
192
+ "left": null,
193
+ "margin": null,
194
+ "max_height": null,
195
+ "max_width": null,
196
+ "min_height": null,
197
+ "min_width": null,
198
+ "object_fit": null,
199
+ "object_position": null,
200
+ "order": null,
201
+ "overflow": null,
202
+ "overflow_x": null,
203
+ "overflow_y": null,
204
+ "padding": null,
205
+ "right": null,
206
+ "top": null,
207
+ "visibility": null,
208
+ "width": null
209
+ }
210
+ },
211
+ "ad9901a17d3d4b1b898941d16c16f1cb": {
212
+ "model_module": "@jupyter-widgets/controls",
213
+ "model_name": "DescriptionStyleModel",
214
+ "model_module_version": "1.5.0",
215
+ "state": {
216
+ "_model_module": "@jupyter-widgets/controls",
217
+ "_model_module_version": "1.5.0",
218
+ "_model_name": "DescriptionStyleModel",
219
+ "_view_count": null,
220
+ "_view_module": "@jupyter-widgets/base",
221
+ "_view_module_version": "1.2.0",
222
+ "_view_name": "StyleView",
223
+ "description_width": ""
224
+ }
225
+ },
226
+ "f2fbf227d64e451681083a0ca189405c": {
227
+ "model_module": "@jupyter-widgets/base",
228
+ "model_name": "LayoutModel",
229
+ "model_module_version": "1.2.0",
230
+ "state": {
231
+ "_model_module": "@jupyter-widgets/base",
232
+ "_model_module_version": "1.2.0",
233
+ "_model_name": "LayoutModel",
234
+ "_view_count": null,
235
+ "_view_module": "@jupyter-widgets/base",
236
+ "_view_module_version": "1.2.0",
237
+ "_view_name": "LayoutView",
238
+ "align_content": null,
239
+ "align_items": null,
240
+ "align_self": null,
241
+ "border": null,
242
+ "bottom": null,
243
+ "display": null,
244
+ "flex": null,
245
+ "flex_flow": null,
246
+ "grid_area": null,
247
+ "grid_auto_columns": null,
248
+ "grid_auto_flow": null,
249
+ "grid_auto_rows": null,
250
+ "grid_column": null,
251
+ "grid_gap": null,
252
+ "grid_row": null,
253
+ "grid_template_areas": null,
254
+ "grid_template_columns": null,
255
+ "grid_template_rows": null,
256
+ "height": null,
257
+ "justify_content": null,
258
+ "justify_items": null,
259
+ "left": null,
260
+ "margin": null,
261
+ "max_height": null,
262
+ "max_width": null,
263
+ "min_height": null,
264
+ "min_width": null,
265
+ "object_fit": null,
266
+ "object_position": null,
267
+ "order": null,
268
+ "overflow": null,
269
+ "overflow_x": null,
270
+ "overflow_y": null,
271
+ "padding": null,
272
+ "right": null,
273
+ "top": null,
274
+ "visibility": null,
275
+ "width": null
276
+ }
277
+ },
278
+ "72364dfb4d994115bfd049dae5f53423": {
279
+ "model_module": "@jupyter-widgets/controls",
280
+ "model_name": "ProgressStyleModel",
281
+ "model_module_version": "1.5.0",
282
+ "state": {
283
+ "_model_module": "@jupyter-widgets/controls",
284
+ "_model_module_version": "1.5.0",
285
+ "_model_name": "ProgressStyleModel",
286
+ "_view_count": null,
287
+ "_view_module": "@jupyter-widgets/base",
288
+ "_view_module_version": "1.2.0",
289
+ "_view_name": "StyleView",
290
+ "bar_color": null,
291
+ "description_width": ""
292
+ }
293
+ },
294
+ "fcbe52122eeb466d91b44011f7f9bc47": {
295
+ "model_module": "@jupyter-widgets/base",
296
+ "model_name": "LayoutModel",
297
+ "model_module_version": "1.2.0",
298
+ "state": {
299
+ "_model_module": "@jupyter-widgets/base",
300
+ "_model_module_version": "1.2.0",
301
+ "_model_name": "LayoutModel",
302
+ "_view_count": null,
303
+ "_view_module": "@jupyter-widgets/base",
304
+ "_view_module_version": "1.2.0",
305
+ "_view_name": "LayoutView",
306
+ "align_content": null,
307
+ "align_items": null,
308
+ "align_self": null,
309
+ "border": null,
310
+ "bottom": null,
311
+ "display": null,
312
+ "flex": null,
313
+ "flex_flow": null,
314
+ "grid_area": null,
315
+ "grid_auto_columns": null,
316
+ "grid_auto_flow": null,
317
+ "grid_auto_rows": null,
318
+ "grid_column": null,
319
+ "grid_gap": null,
320
+ "grid_row": null,
321
+ "grid_template_areas": null,
322
+ "grid_template_columns": null,
323
+ "grid_template_rows": null,
324
+ "height": null,
325
+ "justify_content": null,
326
+ "justify_items": null,
327
+ "left": null,
328
+ "margin": null,
329
+ "max_height": null,
330
+ "max_width": null,
331
+ "min_height": null,
332
+ "min_width": null,
333
+ "object_fit": null,
334
+ "object_position": null,
335
+ "order": null,
336
+ "overflow": null,
337
+ "overflow_x": null,
338
+ "overflow_y": null,
339
+ "padding": null,
340
+ "right": null,
341
+ "top": null,
342
+ "visibility": null,
343
+ "width": null
344
+ }
345
+ },
346
+ "c0915cfb93634ede86568eface4115d8": {
347
+ "model_module": "@jupyter-widgets/controls",
348
+ "model_name": "DescriptionStyleModel",
349
+ "model_module_version": "1.5.0",
350
+ "state": {
351
+ "_model_module": "@jupyter-widgets/controls",
352
+ "_model_module_version": "1.5.0",
353
+ "_model_name": "DescriptionStyleModel",
354
+ "_view_count": null,
355
+ "_view_module": "@jupyter-widgets/base",
356
+ "_view_module_version": "1.2.0",
357
+ "_view_name": "StyleView",
358
+ "description_width": ""
359
+ }
360
+ },
361
+ "401cecca00fb42b29f1ec3fd5cfa4396": {
362
+ "model_module": "@jupyter-widgets/controls",
363
+ "model_name": "HBoxModel",
364
+ "model_module_version": "1.5.0",
365
+ "state": {
366
+ "_dom_classes": [],
367
+ "_model_module": "@jupyter-widgets/controls",
368
+ "_model_module_version": "1.5.0",
369
+ "_model_name": "HBoxModel",
370
+ "_view_count": null,
371
+ "_view_module": "@jupyter-widgets/controls",
372
+ "_view_module_version": "1.5.0",
373
+ "_view_name": "HBoxView",
374
+ "box_style": "",
375
+ "children": [
376
+ "IPY_MODEL_e98c3904a7e346a5870c5ac768cd6a98",
377
+ "IPY_MODEL_d908df5a5a8945dc88f8d0f147245bbd",
378
+ "IPY_MODEL_f1a29b9608244a9db3cc919ad149ef48"
379
+ ],
380
+ "layout": "IPY_MODEL_d19c6905802d46c4beb6fe8886cb6e8c"
381
+ }
382
+ },
383
+ "e98c3904a7e346a5870c5ac768cd6a98": {
384
+ "model_module": "@jupyter-widgets/controls",
385
+ "model_name": "HTMLModel",
386
+ "model_module_version": "1.5.0",
387
+ "state": {
388
+ "_dom_classes": [],
389
+ "_model_module": "@jupyter-widgets/controls",
390
+ "_model_module_version": "1.5.0",
391
+ "_model_name": "HTMLModel",
392
+ "_view_count": null,
393
+ "_view_module": "@jupyter-widgets/controls",
394
+ "_view_module_version": "1.5.0",
395
+ "_view_name": "HTMLView",
396
+ "description": "",
397
+ "description_tooltip": null,
398
+ "layout": "IPY_MODEL_f7d7bc20d2ba40eeb576e8865cdbb8ec",
399
+ "placeholder": "​",
400
+ "style": "IPY_MODEL_1a431c4a814941169c8feff1b4741052",
401
+ "value": "Downloading shards: 100%"
402
+ }
403
+ },
404
+ "d908df5a5a8945dc88f8d0f147245bbd": {
405
+ "model_module": "@jupyter-widgets/controls",
406
+ "model_name": "FloatProgressModel",
407
+ "model_module_version": "1.5.0",
408
+ "state": {
409
+ "_dom_classes": [],
410
+ "_model_module": "@jupyter-widgets/controls",
411
+ "_model_module_version": "1.5.0",
412
+ "_model_name": "FloatProgressModel",
413
+ "_view_count": null,
414
+ "_view_module": "@jupyter-widgets/controls",
415
+ "_view_module_version": "1.5.0",
416
+ "_view_name": "ProgressView",
417
+ "bar_style": "success",
418
+ "description": "",
419
+ "description_tooltip": null,
420
+ "layout": "IPY_MODEL_21a66f11f21e4913a1a5a975727916f0",
421
+ "max": 2,
422
+ "min": 0,
423
+ "orientation": "horizontal",
424
+ "style": "IPY_MODEL_b931a5080c154b7dbbdcebf1a48aa9a3",
425
+ "value": 2
426
+ }
427
+ },
428
+ "f1a29b9608244a9db3cc919ad149ef48": {
429
+ "model_module": "@jupyter-widgets/controls",
430
+ "model_name": "HTMLModel",
431
+ "model_module_version": "1.5.0",
432
+ "state": {
433
+ "_dom_classes": [],
434
+ "_model_module": "@jupyter-widgets/controls",
435
+ "_model_module_version": "1.5.0",
436
+ "_model_name": "HTMLModel",
437
+ "_view_count": null,
438
+ "_view_module": "@jupyter-widgets/controls",
439
+ "_view_module_version": "1.5.0",
440
+ "_view_name": "HTMLView",
441
+ "description": "",
442
+ "description_tooltip": null,
443
+ "layout": "IPY_MODEL_6cc6f7e129fc47c9ac57d38f713c50ea",
444
+ "placeholder": "​",
445
+ "style": "IPY_MODEL_896598f7441c4e84b4c7963b520d6daf",
446
+ "value": " 2/2 [00:00<00:00,  7.70it/s]"
447
+ }
448
+ },
449
+ "d19c6905802d46c4beb6fe8886cb6e8c": {
450
+ "model_module": "@jupyter-widgets/base",
451
+ "model_name": "LayoutModel",
452
+ "model_module_version": "1.2.0",
453
+ "state": {
454
+ "_model_module": "@jupyter-widgets/base",
455
+ "_model_module_version": "1.2.0",
456
+ "_model_name": "LayoutModel",
457
+ "_view_count": null,
458
+ "_view_module": "@jupyter-widgets/base",
459
+ "_view_module_version": "1.2.0",
460
+ "_view_name": "LayoutView",
461
+ "align_content": null,
462
+ "align_items": null,
463
+ "align_self": null,
464
+ "border": null,
465
+ "bottom": null,
466
+ "display": null,
467
+ "flex": null,
468
+ "flex_flow": null,
469
+ "grid_area": null,
470
+ "grid_auto_columns": null,
471
+ "grid_auto_flow": null,
472
+ "grid_auto_rows": null,
473
+ "grid_column": null,
474
+ "grid_gap": null,
475
+ "grid_row": null,
476
+ "grid_template_areas": null,
477
+ "grid_template_columns": null,
478
+ "grid_template_rows": null,
479
+ "height": null,
480
+ "justify_content": null,
481
+ "justify_items": null,
482
+ "left": null,
483
+ "margin": null,
484
+ "max_height": null,
485
+ "max_width": null,
486
+ "min_height": null,
487
+ "min_width": null,
488
+ "object_fit": null,
489
+ "object_position": null,
490
+ "order": null,
491
+ "overflow": null,
492
+ "overflow_x": null,
493
+ "overflow_y": null,
494
+ "padding": null,
495
+ "right": null,
496
+ "top": null,
497
+ "visibility": null,
498
+ "width": null
499
+ }
500
+ },
501
+ "f7d7bc20d2ba40eeb576e8865cdbb8ec": {
502
+ "model_module": "@jupyter-widgets/base",
503
+ "model_name": "LayoutModel",
504
+ "model_module_version": "1.2.0",
505
+ "state": {
506
+ "_model_module": "@jupyter-widgets/base",
507
+ "_model_module_version": "1.2.0",
508
+ "_model_name": "LayoutModel",
509
+ "_view_count": null,
510
+ "_view_module": "@jupyter-widgets/base",
511
+ "_view_module_version": "1.2.0",
512
+ "_view_name": "LayoutView",
513
+ "align_content": null,
514
+ "align_items": null,
515
+ "align_self": null,
516
+ "border": null,
517
+ "bottom": null,
518
+ "display": null,
519
+ "flex": null,
520
+ "flex_flow": null,
521
+ "grid_area": null,
522
+ "grid_auto_columns": null,
523
+ "grid_auto_flow": null,
524
+ "grid_auto_rows": null,
525
+ "grid_column": null,
526
+ "grid_gap": null,
527
+ "grid_row": null,
528
+ "grid_template_areas": null,
529
+ "grid_template_columns": null,
530
+ "grid_template_rows": null,
531
+ "height": null,
532
+ "justify_content": null,
533
+ "justify_items": null,
534
+ "left": null,
535
+ "margin": null,
536
+ "max_height": null,
537
+ "max_width": null,
538
+ "min_height": null,
539
+ "min_width": null,
540
+ "object_fit": null,
541
+ "object_position": null,
542
+ "order": null,
543
+ "overflow": null,
544
+ "overflow_x": null,
545
+ "overflow_y": null,
546
+ "padding": null,
547
+ "right": null,
548
+ "top": null,
549
+ "visibility": null,
550
+ "width": null
551
+ }
552
+ },
553
+ "1a431c4a814941169c8feff1b4741052": {
554
+ "model_module": "@jupyter-widgets/controls",
555
+ "model_name": "DescriptionStyleModel",
556
+ "model_module_version": "1.5.0",
557
+ "state": {
558
+ "_model_module": "@jupyter-widgets/controls",
559
+ "_model_module_version": "1.5.0",
560
+ "_model_name": "DescriptionStyleModel",
561
+ "_view_count": null,
562
+ "_view_module": "@jupyter-widgets/base",
563
+ "_view_module_version": "1.2.0",
564
+ "_view_name": "StyleView",
565
+ "description_width": ""
566
+ }
567
+ },
568
+ "21a66f11f21e4913a1a5a975727916f0": {
569
+ "model_module": "@jupyter-widgets/base",
570
+ "model_name": "LayoutModel",
571
+ "model_module_version": "1.2.0",
572
+ "state": {
573
+ "_model_module": "@jupyter-widgets/base",
574
+ "_model_module_version": "1.2.0",
575
+ "_model_name": "LayoutModel",
576
+ "_view_count": null,
577
+ "_view_module": "@jupyter-widgets/base",
578
+ "_view_module_version": "1.2.0",
579
+ "_view_name": "LayoutView",
580
+ "align_content": null,
581
+ "align_items": null,
582
+ "align_self": null,
583
+ "border": null,
584
+ "bottom": null,
585
+ "display": null,
586
+ "flex": null,
587
+ "flex_flow": null,
588
+ "grid_area": null,
589
+ "grid_auto_columns": null,
590
+ "grid_auto_flow": null,
591
+ "grid_auto_rows": null,
592
+ "grid_column": null,
593
+ "grid_gap": null,
594
+ "grid_row": null,
595
+ "grid_template_areas": null,
596
+ "grid_template_columns": null,
597
+ "grid_template_rows": null,
598
+ "height": null,
599
+ "justify_content": null,
600
+ "justify_items": null,
601
+ "left": null,
602
+ "margin": null,
603
+ "max_height": null,
604
+ "max_width": null,
605
+ "min_height": null,
606
+ "min_width": null,
607
+ "object_fit": null,
608
+ "object_position": null,
609
+ "order": null,
610
+ "overflow": null,
611
+ "overflow_x": null,
612
+ "overflow_y": null,
613
+ "padding": null,
614
+ "right": null,
615
+ "top": null,
616
+ "visibility": null,
617
+ "width": null
618
+ }
619
+ },
620
+ "b931a5080c154b7dbbdcebf1a48aa9a3": {
621
+ "model_module": "@jupyter-widgets/controls",
622
+ "model_name": "ProgressStyleModel",
623
+ "model_module_version": "1.5.0",
624
+ "state": {
625
+ "_model_module": "@jupyter-widgets/controls",
626
+ "_model_module_version": "1.5.0",
627
+ "_model_name": "ProgressStyleModel",
628
+ "_view_count": null,
629
+ "_view_module": "@jupyter-widgets/base",
630
+ "_view_module_version": "1.2.0",
631
+ "_view_name": "StyleView",
632
+ "bar_color": null,
633
+ "description_width": ""
634
+ }
635
+ },
636
+ "6cc6f7e129fc47c9ac57d38f713c50ea": {
637
+ "model_module": "@jupyter-widgets/base",
638
+ "model_name": "LayoutModel",
639
+ "model_module_version": "1.2.0",
640
+ "state": {
641
+ "_model_module": "@jupyter-widgets/base",
642
+ "_model_module_version": "1.2.0",
643
+ "_model_name": "LayoutModel",
644
+ "_view_count": null,
645
+ "_view_module": "@jupyter-widgets/base",
646
+ "_view_module_version": "1.2.0",
647
+ "_view_name": "LayoutView",
648
+ "align_content": null,
649
+ "align_items": null,
650
+ "align_self": null,
651
+ "border": null,
652
+ "bottom": null,
653
+ "display": null,
654
+ "flex": null,
655
+ "flex_flow": null,
656
+ "grid_area": null,
657
+ "grid_auto_columns": null,
658
+ "grid_auto_flow": null,
659
+ "grid_auto_rows": null,
660
+ "grid_column": null,
661
+ "grid_gap": null,
662
+ "grid_row": null,
663
+ "grid_template_areas": null,
664
+ "grid_template_columns": null,
665
+ "grid_template_rows": null,
666
+ "height": null,
667
+ "justify_content": null,
668
+ "justify_items": null,
669
+ "left": null,
670
+ "margin": null,
671
+ "max_height": null,
672
+ "max_width": null,
673
+ "min_height": null,
674
+ "min_width": null,
675
+ "object_fit": null,
676
+ "object_position": null,
677
+ "order": null,
678
+ "overflow": null,
679
+ "overflow_x": null,
680
+ "overflow_y": null,
681
+ "padding": null,
682
+ "right": null,
683
+ "top": null,
684
+ "visibility": null,
685
+ "width": null
686
+ }
687
+ },
688
+ "896598f7441c4e84b4c7963b520d6daf": {
689
+ "model_module": "@jupyter-widgets/controls",
690
+ "model_name": "DescriptionStyleModel",
691
+ "model_module_version": "1.5.0",
692
+ "state": {
693
+ "_model_module": "@jupyter-widgets/controls",
694
+ "_model_module_version": "1.5.0",
695
+ "_model_name": "DescriptionStyleModel",
696
+ "_view_count": null,
697
+ "_view_module": "@jupyter-widgets/base",
698
+ "_view_module_version": "1.2.0",
699
+ "_view_name": "StyleView",
700
+ "description_width": ""
701
+ }
702
+ },
703
+ "b391b63b204848009b051b9c9a5062a3": {
704
+ "model_module": "@jupyter-widgets/controls",
705
+ "model_name": "HBoxModel",
706
+ "model_module_version": "1.5.0",
707
+ "state": {
708
+ "_dom_classes": [],
709
+ "_model_module": "@jupyter-widgets/controls",
710
+ "_model_module_version": "1.5.0",
711
+ "_model_name": "HBoxModel",
712
+ "_view_count": null,
713
+ "_view_module": "@jupyter-widgets/controls",
714
+ "_view_module_version": "1.5.0",
715
+ "_view_name": "HBoxView",
716
+ "box_style": "",
717
+ "children": [
718
+ "IPY_MODEL_0eee8063610d46139d7576ef02ddc228",
719
+ "IPY_MODEL_5d3bd17d0aa44d84a91d3ac8255dc296",
720
+ "IPY_MODEL_84981495b59f46009bde2cdbec478a5f"
721
+ ],
722
+ "layout": "IPY_MODEL_f51d9c821e3b4f558ad76706f99d76a6"
723
+ }
724
+ },
725
+ "0eee8063610d46139d7576ef02ddc228": {
726
+ "model_module": "@jupyter-widgets/controls",
727
+ "model_name": "HTMLModel",
728
+ "model_module_version": "1.5.0",
729
+ "state": {
730
+ "_dom_classes": [],
731
+ "_model_module": "@jupyter-widgets/controls",
732
+ "_model_module_version": "1.5.0",
733
+ "_model_name": "HTMLModel",
734
+ "_view_count": null,
735
+ "_view_module": "@jupyter-widgets/controls",
736
+ "_view_module_version": "1.5.0",
737
+ "_view_name": "HTMLView",
738
+ "description": "",
739
+ "description_tooltip": null,
740
+ "layout": "IPY_MODEL_07e7af89197e489b877e309189e6ea53",
741
+ "placeholder": "​",
742
+ "style": "IPY_MODEL_66e1d6e6e45146a7a56d3e935f56ad51",
743
+ "value": "Loading checkpoint shards: 100%"
744
+ }
745
+ },
746
+ "5d3bd17d0aa44d84a91d3ac8255dc296": {
747
+ "model_module": "@jupyter-widgets/controls",
748
+ "model_name": "FloatProgressModel",
749
+ "model_module_version": "1.5.0",
750
+ "state": {
751
+ "_dom_classes": [],
752
+ "_model_module": "@jupyter-widgets/controls",
753
+ "_model_module_version": "1.5.0",
754
+ "_model_name": "FloatProgressModel",
755
+ "_view_count": null,
756
+ "_view_module": "@jupyter-widgets/controls",
757
+ "_view_module_version": "1.5.0",
758
+ "_view_name": "ProgressView",
759
+ "bar_style": "success",
760
+ "description": "",
761
+ "description_tooltip": null,
762
+ "layout": "IPY_MODEL_85b6c5d6fdc745d4a533004de3c97408",
763
+ "max": 2,
764
+ "min": 0,
765
+ "orientation": "horizontal",
766
+ "style": "IPY_MODEL_9f5ddcf6583246af9ec1ebe7f23446d6",
767
+ "value": 2
768
+ }
769
+ },
770
+ "84981495b59f46009bde2cdbec478a5f": {
771
+ "model_module": "@jupyter-widgets/controls",
772
+ "model_name": "HTMLModel",
773
+ "model_module_version": "1.5.0",
774
+ "state": {
775
+ "_dom_classes": [],
776
+ "_model_module": "@jupyter-widgets/controls",
777
+ "_model_module_version": "1.5.0",
778
+ "_model_name": "HTMLModel",
779
+ "_view_count": null,
780
+ "_view_module": "@jupyter-widgets/controls",
781
+ "_view_module_version": "1.5.0",
782
+ "_view_name": "HTMLView",
783
+ "description": "",
784
+ "description_tooltip": null,
785
+ "layout": "IPY_MODEL_58b61a060918476c82be882ed6d5cc10",
786
+ "placeholder": "​",
787
+ "style": "IPY_MODEL_7a2fce54921c4062a739fb690387f156",
788
+ "value": " 2/2 [00:19<00:00,  8.21s/it]"
789
+ }
790
+ },
791
+ "f51d9c821e3b4f558ad76706f99d76a6": {
792
+ "model_module": "@jupyter-widgets/base",
793
+ "model_name": "LayoutModel",
794
+ "model_module_version": "1.2.0",
795
+ "state": {
796
+ "_model_module": "@jupyter-widgets/base",
797
+ "_model_module_version": "1.2.0",
798
+ "_model_name": "LayoutModel",
799
+ "_view_count": null,
800
+ "_view_module": "@jupyter-widgets/base",
801
+ "_view_module_version": "1.2.0",
802
+ "_view_name": "LayoutView",
803
+ "align_content": null,
804
+ "align_items": null,
805
+ "align_self": null,
806
+ "border": null,
807
+ "bottom": null,
808
+ "display": null,
809
+ "flex": null,
810
+ "flex_flow": null,
811
+ "grid_area": null,
812
+ "grid_auto_columns": null,
813
+ "grid_auto_flow": null,
814
+ "grid_auto_rows": null,
815
+ "grid_column": null,
816
+ "grid_gap": null,
817
+ "grid_row": null,
818
+ "grid_template_areas": null,
819
+ "grid_template_columns": null,
820
+ "grid_template_rows": null,
821
+ "height": null,
822
+ "justify_content": null,
823
+ "justify_items": null,
824
+ "left": null,
825
+ "margin": null,
826
+ "max_height": null,
827
+ "max_width": null,
828
+ "min_height": null,
829
+ "min_width": null,
830
+ "object_fit": null,
831
+ "object_position": null,
832
+ "order": null,
833
+ "overflow": null,
834
+ "overflow_x": null,
835
+ "overflow_y": null,
836
+ "padding": null,
837
+ "right": null,
838
+ "top": null,
839
+ "visibility": null,
840
+ "width": null
841
+ }
842
+ },
843
+ "07e7af89197e489b877e309189e6ea53": {
844
+ "model_module": "@jupyter-widgets/base",
845
+ "model_name": "LayoutModel",
846
+ "model_module_version": "1.2.0",
847
+ "state": {
848
+ "_model_module": "@jupyter-widgets/base",
849
+ "_model_module_version": "1.2.0",
850
+ "_model_name": "LayoutModel",
851
+ "_view_count": null,
852
+ "_view_module": "@jupyter-widgets/base",
853
+ "_view_module_version": "1.2.0",
854
+ "_view_name": "LayoutView",
855
+ "align_content": null,
856
+ "align_items": null,
857
+ "align_self": null,
858
+ "border": null,
859
+ "bottom": null,
860
+ "display": null,
861
+ "flex": null,
862
+ "flex_flow": null,
863
+ "grid_area": null,
864
+ "grid_auto_columns": null,
865
+ "grid_auto_flow": null,
866
+ "grid_auto_rows": null,
867
+ "grid_column": null,
868
+ "grid_gap": null,
869
+ "grid_row": null,
870
+ "grid_template_areas": null,
871
+ "grid_template_columns": null,
872
+ "grid_template_rows": null,
873
+ "height": null,
874
+ "justify_content": null,
875
+ "justify_items": null,
876
+ "left": null,
877
+ "margin": null,
878
+ "max_height": null,
879
+ "max_width": null,
880
+ "min_height": null,
881
+ "min_width": null,
882
+ "object_fit": null,
883
+ "object_position": null,
884
+ "order": null,
885
+ "overflow": null,
886
+ "overflow_x": null,
887
+ "overflow_y": null,
888
+ "padding": null,
889
+ "right": null,
890
+ "top": null,
891
+ "visibility": null,
892
+ "width": null
893
+ }
894
+ },
895
+ "66e1d6e6e45146a7a56d3e935f56ad51": {
896
+ "model_module": "@jupyter-widgets/controls",
897
+ "model_name": "DescriptionStyleModel",
898
+ "model_module_version": "1.5.0",
899
+ "state": {
900
+ "_model_module": "@jupyter-widgets/controls",
901
+ "_model_module_version": "1.5.0",
902
+ "_model_name": "DescriptionStyleModel",
903
+ "_view_count": null,
904
+ "_view_module": "@jupyter-widgets/base",
905
+ "_view_module_version": "1.2.0",
906
+ "_view_name": "StyleView",
907
+ "description_width": ""
908
+ }
909
+ },
910
+ "85b6c5d6fdc745d4a533004de3c97408": {
911
+ "model_module": "@jupyter-widgets/base",
912
+ "model_name": "LayoutModel",
913
+ "model_module_version": "1.2.0",
914
+ "state": {
915
+ "_model_module": "@jupyter-widgets/base",
916
+ "_model_module_version": "1.2.0",
917
+ "_model_name": "LayoutModel",
918
+ "_view_count": null,
919
+ "_view_module": "@jupyter-widgets/base",
920
+ "_view_module_version": "1.2.0",
921
+ "_view_name": "LayoutView",
922
+ "align_content": null,
923
+ "align_items": null,
924
+ "align_self": null,
925
+ "border": null,
926
+ "bottom": null,
927
+ "display": null,
928
+ "flex": null,
929
+ "flex_flow": null,
930
+ "grid_area": null,
931
+ "grid_auto_columns": null,
932
+ "grid_auto_flow": null,
933
+ "grid_auto_rows": null,
934
+ "grid_column": null,
935
+ "grid_gap": null,
936
+ "grid_row": null,
937
+ "grid_template_areas": null,
938
+ "grid_template_columns": null,
939
+ "grid_template_rows": null,
940
+ "height": null,
941
+ "justify_content": null,
942
+ "justify_items": null,
943
+ "left": null,
944
+ "margin": null,
945
+ "max_height": null,
946
+ "max_width": null,
947
+ "min_height": null,
948
+ "min_width": null,
949
+ "object_fit": null,
950
+ "object_position": null,
951
+ "order": null,
952
+ "overflow": null,
953
+ "overflow_x": null,
954
+ "overflow_y": null,
955
+ "padding": null,
956
+ "right": null,
957
+ "top": null,
958
+ "visibility": null,
959
+ "width": null
960
+ }
961
+ },
962
+ "9f5ddcf6583246af9ec1ebe7f23446d6": {
963
+ "model_module": "@jupyter-widgets/controls",
964
+ "model_name": "ProgressStyleModel",
965
+ "model_module_version": "1.5.0",
966
+ "state": {
967
+ "_model_module": "@jupyter-widgets/controls",
968
+ "_model_module_version": "1.5.0",
969
+ "_model_name": "ProgressStyleModel",
970
+ "_view_count": null,
971
+ "_view_module": "@jupyter-widgets/base",
972
+ "_view_module_version": "1.2.0",
973
+ "_view_name": "StyleView",
974
+ "bar_color": null,
975
+ "description_width": ""
976
+ }
977
+ },
978
+ "58b61a060918476c82be882ed6d5cc10": {
979
+ "model_module": "@jupyter-widgets/base",
980
+ "model_name": "LayoutModel",
981
+ "model_module_version": "1.2.0",
982
+ "state": {
983
+ "_model_module": "@jupyter-widgets/base",
984
+ "_model_module_version": "1.2.0",
985
+ "_model_name": "LayoutModel",
986
+ "_view_count": null,
987
+ "_view_module": "@jupyter-widgets/base",
988
+ "_view_module_version": "1.2.0",
989
+ "_view_name": "LayoutView",
990
+ "align_content": null,
991
+ "align_items": null,
992
+ "align_self": null,
993
+ "border": null,
994
+ "bottom": null,
995
+ "display": null,
996
+ "flex": null,
997
+ "flex_flow": null,
998
+ "grid_area": null,
999
+ "grid_auto_columns": null,
1000
+ "grid_auto_flow": null,
1001
+ "grid_auto_rows": null,
1002
+ "grid_column": null,
1003
+ "grid_gap": null,
1004
+ "grid_row": null,
1005
+ "grid_template_areas": null,
1006
+ "grid_template_columns": null,
1007
+ "grid_template_rows": null,
1008
+ "height": null,
1009
+ "justify_content": null,
1010
+ "justify_items": null,
1011
+ "left": null,
1012
+ "margin": null,
1013
+ "max_height": null,
1014
+ "max_width": null,
1015
+ "min_height": null,
1016
+ "min_width": null,
1017
+ "object_fit": null,
1018
+ "object_position": null,
1019
+ "order": null,
1020
+ "overflow": null,
1021
+ "overflow_x": null,
1022
+ "overflow_y": null,
1023
+ "padding": null,
1024
+ "right": null,
1025
+ "top": null,
1026
+ "visibility": null,
1027
+ "width": null
1028
+ }
1029
+ },
1030
+ "7a2fce54921c4062a739fb690387f156": {
1031
+ "model_module": "@jupyter-widgets/controls",
1032
+ "model_name": "DescriptionStyleModel",
1033
+ "model_module_version": "1.5.0",
1034
+ "state": {
1035
+ "_model_module": "@jupyter-widgets/controls",
1036
+ "_model_module_version": "1.5.0",
1037
+ "_model_name": "DescriptionStyleModel",
1038
+ "_view_count": null,
1039
+ "_view_module": "@jupyter-widgets/base",
1040
+ "_view_module_version": "1.2.0",
1041
+ "_view_name": "StyleView",
1042
+ "description_width": ""
1043
+ }
1044
+ }
1045
+ }
1046
+ }
1047
+ },
1048
+ "cells": [
1049
+ {
1050
+ "cell_type": "markdown",
1051
+ "source": [
1052
+ "# RecurrentGemma - 2B & 2B-it\n",
1053
+ "\n",
1054
+ "RecurrentGemma is a family of open language models built on a novel recurrent architecture developed at Google. Both pre-trained (2B) and instruction-tuned (2B-it) versions are available in English.\n",
1055
+ "\n",
1056
+ "Like Gemma, [RecurrentGemma](https://huggingface.co/google/recurrentgemma-2b-it) models are well-suited for a variety of text generation tasks, including question answering, summarization, and reasoning. Because of its novel architecture, RecurrentGemma requires less memory than Gemma and achieves faster inference when generating long sequences."
1057
+ ],
1058
+ "metadata": {
1059
+ "id": "MVkIfH6Cg7Fx"
1060
+ }
1061
+ },
1062
+ {
1063
+ "cell_type": "code",
1064
+ "execution_count": null,
1065
+ "metadata": {
1066
+ "colab": {
1067
+ "base_uri": "https://localhost:8080/"
1068
+ },
1069
+ "id": "ahVaTC6rEIVI",
1070
+ "outputId": "2036392c-b381-4ca0-80ba-16ba8c87cde3"
1071
+ },
1072
+ "outputs": [
1073
+ {
1074
+ "output_type": "stream",
1075
+ "name": "stdout",
1076
+ "text": [
1077
+ "Collecting transformers==4.40.0.dev0\n",
1078
+ " Downloading https://huggingface.co/datasets/reach-vb/random-wheels/resolve/main/transformers-4.40.0.dev0-py3-none-any.whl (8.8 MB)\n",
1079
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m8.8/8.8 MB\u001b[0m \u001b[31m30.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
1080
+ "\u001b[?25hRequirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers==4.40.0.dev0) (3.13.3)\n",
1081
+ "Requirement already satisfied: huggingface-hub<1.0,>=0.19.3 in /usr/local/lib/python3.10/dist-packages (from transformers==4.40.0.dev0) (0.20.3)\n",
1082
+ "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from transformers==4.40.0.dev0) (1.25.2)\n",
1083
+ "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers==4.40.0.dev0) (24.0)\n",
1084
+ "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers==4.40.0.dev0) (6.0.1)\n",
1085
+ "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers==4.40.0.dev0) (2023.12.25)\n",
1086
+ "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers==4.40.0.dev0) (2.31.0)\n",
1087
+ "Requirement already satisfied: tokenizers<0.19,>=0.14 in /usr/local/lib/python3.10/dist-packages (from transformers==4.40.0.dev0) (0.15.2)\n",
1088
+ "Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.10/dist-packages (from transformers==4.40.0.dev0) (0.4.2)\n",
1089
+ "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers==4.40.0.dev0) (4.66.2)\n",
1090
+ "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.19.3->transformers==4.40.0.dev0) (2023.6.0)\n",
1091
+ "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.19.3->transformers==4.40.0.dev0) (4.10.0)\n",
1092
+ "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->transformers==4.40.0.dev0) (3.3.2)\n",
1093
+ "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers==4.40.0.dev0) (3.6)\n",
1094
+ "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers==4.40.0.dev0) (2.0.7)\n",
1095
+ "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers==4.40.0.dev0) (2024.2.2)\n",
1096
+ "Installing collected packages: transformers\n",
1097
+ " Attempting uninstall: transformers\n",
1098
+ " Found existing installation: transformers 4.38.2\n",
1099
+ " Uninstalling transformers-4.38.2:\n",
1100
+ " Successfully uninstalled transformers-4.38.2\n",
1101
+ "Successfully installed transformers-4.40.0.dev0\n"
1102
+ ]
1103
+ }
1104
+ ],
1105
+ "source": [
1106
+ "!pip install git+https://github.com/huggingface/transformers.git"
1107
+ ]
1108
+ },
1109
+ {
1110
+ "cell_type": "markdown",
1111
+ "source": [
1112
+ "## Load the model checkpoints\n",
1113
+ "\n",
1114
+ "Make sure to accept the terms and conditions for the model before running the code further here: https://huggingface.co/google/recurrentgemma-2b-it.\n"
1115
+ ],
1116
+ "metadata": {
1117
+ "id": "FZK4T_zHhL9Q"
1118
+ }
1119
+ },
1120
+ {
1121
+ "cell_type": "code",
1122
+ "source": [
1123
+ "import torch\n",
1124
+ "from transformers import AutoTokenizer, AutoModelForCausalLM\n",
1125
+ "\n",
1126
+ "tokenizer = AutoTokenizer.from_pretrained(\"google/recurrentgemma-2b-it\")\n",
1127
+ "model = AutoModelForCausalLM.from_pretrained(\"google/recurrentgemma-2b-it\", torch_dtype=torch.float16).to(\"cuda:0\")"
1128
+ ],
1129
+ "metadata": {
1130
+ "colab": {
1131
+ "base_uri": "https://localhost:8080/",
1132
+ "height": 129,
1133
+ "referenced_widgets": [
1134
+ "e6f2b94e3bb345859811226dc345a6e3",
1135
+ "ce12a18f666740eabe1f71be5152e7d8",
1136
+ "76a4282443194092af1039de43c523c6",
1137
+ "3775878f2e484e84b6cb0971d2d35a7f",
1138
+ "df936531829f4c1580c04719de77059c",
1139
+ "ab60c88077984531aa08150c72fa7ab5",
1140
+ "ad9901a17d3d4b1b898941d16c16f1cb",
1141
+ "f2fbf227d64e451681083a0ca189405c",
1142
+ "72364dfb4d994115bfd049dae5f53423",
1143
+ "fcbe52122eeb466d91b44011f7f9bc47",
1144
+ "c0915cfb93634ede86568eface4115d8",
1145
+ "401cecca00fb42b29f1ec3fd5cfa4396",
1146
+ "e98c3904a7e346a5870c5ac768cd6a98",
1147
+ "d908df5a5a8945dc88f8d0f147245bbd",
1148
+ "f1a29b9608244a9db3cc919ad149ef48",
1149
+ "d19c6905802d46c4beb6fe8886cb6e8c",
1150
+ "f7d7bc20d2ba40eeb576e8865cdbb8ec",
1151
+ "1a431c4a814941169c8feff1b4741052",
1152
+ "21a66f11f21e4913a1a5a975727916f0",
1153
+ "b931a5080c154b7dbbdcebf1a48aa9a3",
1154
+ "6cc6f7e129fc47c9ac57d38f713c50ea",
1155
+ "896598f7441c4e84b4c7963b520d6daf",
1156
+ "b391b63b204848009b051b9c9a5062a3",
1157
+ "0eee8063610d46139d7576ef02ddc228",
1158
+ "5d3bd17d0aa44d84a91d3ac8255dc296",
1159
+ "84981495b59f46009bde2cdbec478a5f",
1160
+ "f51d9c821e3b4f558ad76706f99d76a6",
1161
+ "07e7af89197e489b877e309189e6ea53",
1162
+ "66e1d6e6e45146a7a56d3e935f56ad51",
1163
+ "85b6c5d6fdc745d4a533004de3c97408",
1164
+ "9f5ddcf6583246af9ec1ebe7f23446d6",
1165
+ "58b61a060918476c82be882ed6d5cc10",
1166
+ "7a2fce54921c4062a739fb690387f156"
1167
+ ]
1168
+ },
1169
+ "id": "XItA_HZ-EPIR",
1170
+ "outputId": "22b1edbc-c6d7-4ad0-b992-0f59682a30ce"
1171
+ },
1172
+ "execution_count": null,
1173
+ "outputs": [
1174
+ {
1175
+ "output_type": "display_data",
1176
+ "data": {
1177
+ "text/plain": [
1178
+ "tokenizer_config.json: 0%| | 0.00/40.5k [00:00<?, ?B/s]"
1179
+ ],
1180
+ "application/vnd.jupyter.widget-view+json": {
1181
+ "version_major": 2,
1182
+ "version_minor": 0,
1183
+ "model_id": "e6f2b94e3bb345859811226dc345a6e3"
1184
+ }
1185
+ },
1186
+ "metadata": {}
1187
+ },
1188
+ {
1189
+ "output_type": "display_data",
1190
+ "data": {
1191
+ "text/plain": [
1192
+ "Downloading shards: 0%| | 0/2 [00:00<?, ?it/s]"
1193
+ ],
1194
+ "application/vnd.jupyter.widget-view+json": {
1195
+ "version_major": 2,
1196
+ "version_minor": 0,
1197
+ "model_id": "401cecca00fb42b29f1ec3fd5cfa4396"
1198
+ }
1199
+ },
1200
+ "metadata": {}
1201
+ },
1202
+ {
1203
+ "output_type": "display_data",
1204
+ "data": {
1205
+ "text/plain": [
1206
+ "Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s]"
1207
+ ],
1208
+ "application/vnd.jupyter.widget-view+json": {
1209
+ "version_major": 2,
1210
+ "version_minor": 0,
1211
+ "model_id": "b391b63b204848009b051b9c9a5062a3"
1212
+ }
1213
+ },
1214
+ "metadata": {}
1215
+ }
1216
+ ]
1217
+ },
1218
+ {
1219
+ "cell_type": "markdown",
1220
+ "source": [
1221
+ "## Prepare our input text with chat template.\n",
1222
+ "\n",
1223
+ "The instruction-tuned models use a chat template that must be adhered to for conversational use. The easiest way to apply it is using the tokenizer's built-in chat template, as shown in the following snippet.\n",
1224
+ "\n",
1225
+ "Let's load the model and apply the chat template to a conversation. In this example, we'll start with a single user interaction:"
1226
+ ],
1227
+ "metadata": {
1228
+ "id": "u3hoYG18hmHS"
1229
+ }
1230
+ },
1231
+ {
1232
+ "cell_type": "code",
1233
+ "source": [
1234
+ "chat = [\n",
1235
+ " { \"role\": \"user\", \"content\": \"Write a hello world program\" },\n",
1236
+ "]\n",
1237
+ "prompt = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)"
1238
+ ],
1239
+ "metadata": {
1240
+ "id": "IrVmrmVpkgN3"
1241
+ },
1242
+ "execution_count": null,
1243
+ "outputs": []
1244
+ },
1245
+ {
1246
+ "cell_type": "markdown",
1247
+ "source": [
1248
+ "## Tokenize the inputs"
1249
+ ],
1250
+ "metadata": {
1251
+ "id": "fDAnnNAYsghB"
1252
+ }
1253
+ },
1254
+ {
1255
+ "cell_type": "code",
1256
+ "source": [
1257
+ "inputs = tokenizer.encode(prompt, add_special_tokens=False, return_tensors=\"pt\").to(model.device)"
1258
+ ],
1259
+ "metadata": {
1260
+ "id": "dlokCV7isgEI"
1261
+ },
1262
+ "execution_count": null,
1263
+ "outputs": []
1264
+ },
1265
+ {
1266
+ "cell_type": "markdown",
1267
+ "source": [
1268
+ "## Pass the input through the model and generate."
1269
+ ],
1270
+ "metadata": {
1271
+ "id": "pBXeqctLhuGy"
1272
+ }
1273
+ },
1274
+ {
1275
+ "cell_type": "code",
1276
+ "source": [
1277
+ "outputs = model.generate(input_ids=inputs.to(model.device), max_new_tokens=150)\n",
1278
+ "print(tokenizer.batch_decode(outputs, skip_special_tokens=True))"
1279
+ ],
1280
+ "metadata": {
1281
+ "colab": {
1282
+ "base_uri": "https://localhost:8080/"
1283
+ },
1284
+ "id": "odmrX7pQeRo6",
1285
+ "outputId": "3d2f4189-0f7e-4536-eee1-e5265915c657"
1286
+ },
1287
+ "execution_count": null,
1288
+ "outputs": [
1289
+ {
1290
+ "output_type": "stream",
1291
+ "name": "stdout",
1292
+ "text": [
1293
+ "['<start_of_turn>user\\nWrite a hello world program<end_of_turn>\\n<start_of_turn>model\\n```python\\nprint(\"Hello, world!\")\\n```\\n\\nThis program will print the message \"Hello, world!\" to the console.\\n\\n**Explanation:**\\n\\n* `print()` is a built-in Python function that prints the given argument to the console.\\n* `\"Hello, world!\"` is the string that will be printed.\\n\\n**Output:**\\n\\n```\\nHello, world!\\n```']\n"
1294
+ ]
1295
+ }
1296
+ ]
1297
+ },
1298
+ {
1299
+ "cell_type": "markdown",
1300
+ "source": [
1301
+ "Enjoy! There's much more you can do to maximise the output of your generation. Check out this guide: https://huggingface.co/docs/transformers/generation_strategies"
1302
+ ],
1303
+ "metadata": {
1304
+ "id": "rkpXJ5sHwmMH"
1305
+ }
1306
+ }
1307
+ ]
1308
+ }