File size: 52,085 Bytes
a39cfbc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a61725a4-1744-40af-8895-c092b447f315",
   "metadata": {},
   "outputs": [],
   "source": [
    "#pip install transformers datasets evaluate seqeval"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "bfbe03a5-8199-4fa6-851f-bdb3bc2069bd",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "5299508821ac4190be635084237878d1",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "VBox(children=(HTML(value='<center> <img\\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "from huggingface_hub import notebook_login\n",
    "\n",
    "notebook_login()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "b33ad12c-0487-4eff-995d-d3e27756c1af",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Downloading builder script: 100%|█████████████████████████████████████████████████████████| 7.46k/7.46k [00:00<?, ?B/s]\n",
      "Downloading metadata: 100%|███████████████████████████████████████████████████████████████| 4.28k/4.28k [00:00<?, ?B/s]\n",
      "Downloading readme: 100%|█████████████████████████████████████████████████████████████████| 9.05k/9.05k [00:00<?, ?B/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Downloading and preparing dataset wnut_17/wnut_17 to C:/Users/Vadim/.cache/huggingface/datasets/wnut_17/wnut_17/1.0.0/077c7f08b8dbc800692e8c9186cdf3606d5849ab0e7be662e6135bb10eba54f9...\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Downloading data files:   0%|                                                                    | 0/3 [00:00<?, ?it/s]\n",
      "Downloading data: 494kB [00:00, 6.32MB/s]                                                                              \u001b[A\n",
      "Downloading data files:  33%|████████████████████                                        | 1/3 [00:00<00:01,  1.57it/s]\n",
      "Downloading data: 115kB [00:00, 38.2MB/s]                                                                              \u001b[A\n",
      "Downloading data files:  67%|████████████████████████████████████████                    | 2/3 [00:01<00:00,  1.88it/s]\n",
      "Downloading data: 192kB [00:00, 3.27MB/s]                                                                              \u001b[A\n",
      "Downloading data files: 100%|████████████████████████████████████████████████████████████| 3/3 [00:01<00:00,  1.80it/s]\n",
      "Extracting data files: 100%|█████████████████████████████████████████████████████████████████████| 3/3 [00:00<?, ?it/s]\n",
      "                                                                                                                       \r"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Dataset wnut_17 downloaded and prepared to C:/Users/Vadim/.cache/huggingface/datasets/wnut_17/wnut_17/1.0.0/077c7f08b8dbc800692e8c9186cdf3606d5849ab0e7be662e6135bb10eba54f9. Subsequent calls will reuse this data.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|███████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 179.69it/s]\n"
     ]
    }
   ],
   "source": [
    "from datasets import load_dataset\n",
    "wnut = load_dataset(\"wnut_17\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "66c9ae43-ac1b-496b-893e-b916c3df0f2f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'id': '0',\n",
       " 'tokens': ['@paulwalk',\n",
       "  'It',\n",
       "  \"'s\",\n",
       "  'the',\n",
       "  'view',\n",
       "  'from',\n",
       "  'where',\n",
       "  'I',\n",
       "  \"'m\",\n",
       "  'living',\n",
       "  'for',\n",
       "  'two',\n",
       "  'weeks',\n",
       "  '.',\n",
       "  'Empire',\n",
       "  'State',\n",
       "  'Building',\n",
       "  '=',\n",
       "  'ESB',\n",
       "  '.',\n",
       "  'Pretty',\n",
       "  'bad',\n",
       "  'storm',\n",
       "  'here',\n",
       "  'last',\n",
       "  'evening',\n",
       "  '.'],\n",
       " 'ner_tags': [0,\n",
       "  0,\n",
       "  0,\n",
       "  0,\n",
       "  0,\n",
       "  0,\n",
       "  0,\n",
       "  0,\n",
       "  0,\n",
       "  0,\n",
       "  0,\n",
       "  0,\n",
       "  0,\n",
       "  0,\n",
       "  7,\n",
       "  8,\n",
       "  8,\n",
       "  0,\n",
       "  7,\n",
       "  0,\n",
       "  0,\n",
       "  0,\n",
       "  0,\n",
       "  0,\n",
       "  0,\n",
       "  0,\n",
       "  0]}"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "wnut[\"train\"][0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "63a65eb5-eee0-451e-ad7c-a974f8e1835c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['O',\n",
       " 'B-corporation',\n",
       " 'I-corporation',\n",
       " 'B-creative-work',\n",
       " 'I-creative-work',\n",
       " 'B-group',\n",
       " 'I-group',\n",
       " 'B-location',\n",
       " 'I-location',\n",
       " 'B-person',\n",
       " 'I-person',\n",
       " 'B-product',\n",
       " 'I-product']"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "label_list = wnut[\"train\"].features[f\"ner_tags\"].feature.names\n",
    "label_list"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "5ca8689a-addf-4a10-a964-88f73e87b599",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Downloading (…)okenizer_config.json: 100%|██████████████████████████████████████████████████| 28.0/28.0 [00:00<?, ?B/s]\n",
      "C:\\Users\\Vadim\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\huggingface_hub\\file_download.py:133: UserWarning: `huggingface_hub` cache-system uses symlinks by default to efficiently store duplicated files but your machine does not support them in C:\\Users\\Vadim\\.cache\\huggingface\\hub. Caching files will still work but in a degraded version that might require more space on your disk. This warning can be disabled by setting the `HF_HUB_DISABLE_SYMLINKS_WARNING` environment variable. For more details, see https://huggingface.co/docs/huggingface_hub/how-to-cache#limitations.\n",
      "To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development\n",
      "  warnings.warn(message)\n",
      "Downloading (…)lve/main/config.json: 100%|████████████████████████████████████████████████████| 483/483 [00:00<?, ?B/s]\n",
      "Downloading (…)solve/main/vocab.txt: 100%|██████████████████████████████████████████| 232k/232k [00:00<00:00, 1.48MB/s]\n",
      "Downloading (…)/main/tokenizer.json: 100%|██████████████████████████████████████████| 466k/466k [00:00<00:00, 5.94MB/s]\n"
     ]
    }
   ],
   "source": [
    "from transformers import AutoTokenizer\n",
    "tokenizer = AutoTokenizer.from_pretrained(\"distilbert-base-uncased\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "ca02c0f1-25b0-4f8b-9f46-d4feb4e0c005",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['[CLS]',\n",
       " '@',\n",
       " 'paul',\n",
       " '##walk',\n",
       " 'it',\n",
       " \"'\",\n",
       " 's',\n",
       " 'the',\n",
       " 'view',\n",
       " 'from',\n",
       " 'where',\n",
       " 'i',\n",
       " \"'\",\n",
       " 'm',\n",
       " 'living',\n",
       " 'for',\n",
       " 'two',\n",
       " 'weeks',\n",
       " '.',\n",
       " 'empire',\n",
       " 'state',\n",
       " 'building',\n",
       " '=',\n",
       " 'es',\n",
       " '##b',\n",
       " '.',\n",
       " 'pretty',\n",
       " 'bad',\n",
       " 'storm',\n",
       " 'here',\n",
       " 'last',\n",
       " 'evening',\n",
       " '.',\n",
       " '[SEP]']"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "example = wnut[\"train\"][0]\n",
    "\n",
    "tokenized_input = tokenizer(example[\"tokens\"], is_split_into_words=True)\n",
    "\n",
    "tokens = tokenizer.convert_ids_to_tokens(tokenized_input[\"input_ids\"])\n",
    "\n",
    "tokens"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "05f09ac8-b94d-4a58-b767-7ddbebc8febf",
   "metadata": {},
   "outputs": [],
   "source": [
    "def tokenize_and_align_labels(examples):\n",
    "    tokenized_inputs = tokenizer(examples[\"tokens\"], truncation=True, is_split_into_words=True)\n",
    "    labels = []\n",
    "    for i, label in enumerate(examples[f\"ner_tags\"]):\n",
    "        word_ids = tokenized_inputs.word_ids(batch_index=i)  # Map tokens to their respective word.\n",
    "        previous_word_idx = None\n",
    "        label_ids = []\n",
    "        for word_idx in word_ids:  # Set the special tokens to -100.\n",
    "            if word_idx is None:\n",
    "                label_ids.append(-100)\n",
    "            elif word_idx != previous_word_idx:  # Only label the first token of a given word.\n",
    "                label_ids.append(label[word_idx])\n",
    "            else:\n",
    "                label_ids.append(-100)\n",
    "            previous_word_idx = word_idx\n",
    "        labels.append(label_ids)\n",
    "    tokenized_inputs[\"labels\"] = labels\n",
    "    return tokenized_inputs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "9835d91b-2811-47dc-93ae-f7e491f5f1cb",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "                                                                                                                       \r"
     ]
    }
   ],
   "source": [
    "tokenized_wnut = wnut.map(tokenize_and_align_labels, batched=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "fb7c2ed2-7254-46c0-a1bf-49c598590b38",
   "metadata": {},
   "outputs": [],
   "source": [
    "from transformers import DataCollatorForTokenClassification\n",
    "data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "3203fca2-fd00-4356-831d-df30a0b3f3ca",
   "metadata": {},
   "outputs": [],
   "source": [
    "from transformers import DataCollatorForTokenClassification\n",
    "data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer, return_tensors=\"tf\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "0e730d06-b1a6-4a47-a6ed-3ea23e229e3d",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Downloading builder script: 100%|█████████████████████████████████████████████████| 6.34k/6.34k [00:00<00:00, 1.41MB/s]\n"
     ]
    }
   ],
   "source": [
    "import evaluate\n",
    "seqeval = evaluate.load(\"seqeval\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "aadb66dc-eab3-43c2-947f-4630d3320ed6",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "labels = [label_list[i] for i in example[f\"ner_tags\"]]\n",
    "\n",
    "def compute_metrics(p):\n",
    "    predictions, labels = p\n",
    "    predictions = np.argmax(predictions, axis=2)\n",
    "    true_predictions = [\n",
    "        [label_list[p] for (p, l) in zip(prediction, label) if l != -100]\n",
    "        for prediction, label in zip(predictions, labels)\n",
    "    ]\n",
    "    true_labels = [\n",
    "        [label_list[l] for (p, l) in zip(prediction, label) if l != -100]\n",
    "        for prediction, label in zip(predictions, labels)\n",
    "    ]\n",
    "\n",
    "    results = seqeval.compute(predictions=true_predictions, references=true_labels)\n",
    "    return {\n",
    "        \"precision\": results[\"overall_precision\"],\n",
    "        \"recall\": results[\"overall_recall\"],\n",
    "        \"f1\": results[\"overall_f1\"],\n",
    "        \"accuracy\": results[\"overall_accuracy\"],\n",
    "    }"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "22de9a50-d138-49a5-9604-b02bbcac291d",
   "metadata": {},
   "outputs": [],
   "source": [
    "id2label = {\n",
    "    0: \"O\",\n",
    "    1: \"B-corporation\",\n",
    "    2: \"I-corporation\",\n",
    "    3: \"B-creative-work\",\n",
    "    4: \"I-creative-work\",\n",
    "    5: \"B-group\",\n",
    "    6: \"I-group\",\n",
    "    7: \"B-location\",\n",
    "    8: \"I-location\",\n",
    "    9: \"B-person\",\n",
    "    10: \"I-person\",\n",
    "    11: \"B-product\",\n",
    "    12: \"I-product\",\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "d3c554a6-6a04-41ca-ac83-98fb20f605c8",
   "metadata": {},
   "outputs": [],
   "source": [
    "label2id = {\n",
    "    \"O\": 0,\n",
    "    \"B-corporation\": 1,\n",
    "    \"I-corporation\": 2,\n",
    "    \"B-creative-work\": 3,\n",
    "    \"I-creative-work\": 4,\n",
    "    \"B-group\": 5,\n",
    "    \"I-group\": 6,\n",
    "    \"B-location\": 7,\n",
    "    \"I-location\": 8,\n",
    "    \"B-person\": 9,\n",
    "    \"I-person\": 10,\n",
    "    \"B-product\": 11,\n",
    "    \"I-product\": 12,\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "5bc96c56-d36c-4e0f-af8a-b0495e6a118a",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Downloading pytorch_model.bin: 100%|████████████████████████████████████████████████| 268M/268M [00:23<00:00, 11.4MB/s]\n",
      "Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForTokenClassification: ['vocab_layer_norm.weight', 'vocab_transform.bias', 'vocab_projector.bias', 'vocab_projector.weight', 'vocab_transform.weight', 'vocab_layer_norm.bias']\n",
      "- This IS expected if you are initializing DistilBertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
      "- This IS NOT expected if you are initializing DistilBertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
      "Some weights of DistilBertForTokenClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']\n",
      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
     ]
    }
   ],
   "source": [
    "from transformers import AutoModelForTokenClassification, TrainingArguments, Trainer\n",
    "\n",
    "model = AutoModelForTokenClassification.from_pretrained(\n",
    "    \"distilbert-base-uncased\", num_labels=13, id2label=id2label, label2id=label2id\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "6a26e0b3-6432-44f4-b4ea-7c628402cc1f",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "PyTorch: setting up devices\n",
      "The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).\n"
     ]
    }
   ],
   "source": [
    "training_args = TrainingArguments(\n",
    "    output_dir=\"my_awesome_wnut_model\",\n",
    "    learning_rate=2e-5,\n",
    "    per_device_train_batch_size=16,\n",
    "    per_device_eval_batch_size=16,\n",
    "    num_train_epochs=2,\n",
    "    weight_decay=0.01,\n",
    "    evaluation_strategy=\"epoch\",\n",
    "    save_strategy=\"epoch\",\n",
    "    load_best_model_at_end=True,\n",
    "    push_to_hub=False,\n",
    ")\n",
    "\n",
    "trainer = Trainer(\n",
    "    model=model,\n",
    "    args=training_args,\n",
    "    train_dataset=tokenized_wnut[\"train\"],\n",
    "    eval_dataset=tokenized_wnut[\"test\"],\n",
    "    tokenizer=tokenizer,\n",
    "    data_collator=data_collator,\n",
    "    compute_metrics=compute_metrics,\n",
    ")\n",
    "trainer.train()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "adbdec19-9da9-435a-9a48-b46e39ce99f1",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "The following columns in the training set don't have a corresponding argument in `DistilBertForTokenClassification.forward` and have been ignored: tokens, ner_tags, id. If tokens, ner_tags, id are not expected by `DistilBertForTokenClassification.forward`,  you can safely ignore this message.\n",
      "***** Running training *****\n",
      "  Num examples = 3394\n",
      "  Num Epochs = 2\n",
      "  Instantaneous batch size per device = 16\n",
      "  Total train batch size (w. parallel, distributed & accumulation) = 16\n",
      "  Gradient Accumulation steps = 1\n",
      "  Total optimization steps = 426\n",
      "  Number of trainable parameters = 66372877\n"
     ]
    },
    {
     "ename": "AttributeError",
     "evalue": "EagerTensor object has no attribute 'size'. \n        If you are looking for numpy-related methods, please run the following:\n        from tensorflow.python.ops.numpy_ops import np_config\n        np_config.enable_numpy_behavior()\n      ",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
      "Input \u001b[1;32mIn [27]\u001b[0m, in \u001b[0;36m<cell line: 1>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[43mtrainer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtrain\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[1;32m~\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\transformers\\trainer.py:1501\u001b[0m, in \u001b[0;36mTrainer.train\u001b[1;34m(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)\u001b[0m\n\u001b[0;32m   1496\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel_wrapped \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel\n\u001b[0;32m   1498\u001b[0m inner_training_loop \u001b[38;5;241m=\u001b[39m find_executable_batch_size(\n\u001b[0;32m   1499\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_inner_training_loop, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_train_batch_size, args\u001b[38;5;241m.\u001b[39mauto_find_batch_size\n\u001b[0;32m   1500\u001b[0m )\n\u001b[1;32m-> 1501\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43minner_training_loop\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m   1502\u001b[0m \u001b[43m    \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m   1503\u001b[0m \u001b[43m    \u001b[49m\u001b[43mresume_from_checkpoint\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mresume_from_checkpoint\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m   1504\u001b[0m \u001b[43m    \u001b[49m\u001b[43mtrial\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtrial\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m   1505\u001b[0m \u001b[43m    \u001b[49m\u001b[43mignore_keys_for_eval\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mignore_keys_for_eval\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m   1506\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[1;32m~\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\transformers\\trainer.py:1749\u001b[0m, in \u001b[0;36mTrainer._inner_training_loop\u001b[1;34m(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)\u001b[0m\n\u001b[0;32m   1747\u001b[0m         tr_loss_step \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtraining_step(model, inputs)\n\u001b[0;32m   1748\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m-> 1749\u001b[0m     tr_loss_step \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtraining_step\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minputs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m   1751\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\n\u001b[0;32m   1752\u001b[0m     args\u001b[38;5;241m.\u001b[39mlogging_nan_inf_filter\n\u001b[0;32m   1753\u001b[0m     \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m is_torch_tpu_available()\n\u001b[0;32m   1754\u001b[0m     \u001b[38;5;129;01mand\u001b[39;00m (torch\u001b[38;5;241m.\u001b[39misnan(tr_loss_step) \u001b[38;5;129;01mor\u001b[39;00m torch\u001b[38;5;241m.\u001b[39misinf(tr_loss_step))\n\u001b[0;32m   1755\u001b[0m ):\n\u001b[0;32m   1756\u001b[0m     \u001b[38;5;66;03m# if loss is nan or inf simply add the average of previous logged losses\u001b[39;00m\n\u001b[0;32m   1757\u001b[0m     tr_loss \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m tr_loss \u001b[38;5;241m/\u001b[39m (\u001b[38;5;241m1\u001b[39m \u001b[38;5;241m+\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate\u001b[38;5;241m.\u001b[39mglobal_step \u001b[38;5;241m-\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_globalstep_last_logged)\n",
      "File \u001b[1;32m~\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\transformers\\trainer.py:2508\u001b[0m, in \u001b[0;36mTrainer.training_step\u001b[1;34m(self, model, inputs)\u001b[0m\n\u001b[0;32m   2505\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m loss_mb\u001b[38;5;241m.\u001b[39mreduce_mean()\u001b[38;5;241m.\u001b[39mdetach()\u001b[38;5;241m.\u001b[39mto(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39mdevice)\n\u001b[0;32m   2507\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcompute_loss_context_manager():\n\u001b[1;32m-> 2508\u001b[0m     loss \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcompute_loss\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minputs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m   2510\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39mn_gpu \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m1\u001b[39m:\n\u001b[0;32m   2511\u001b[0m     loss \u001b[38;5;241m=\u001b[39m loss\u001b[38;5;241m.\u001b[39mmean()  \u001b[38;5;66;03m# mean() to average on multi-gpu parallel training\u001b[39;00m\n",
      "File \u001b[1;32m~\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\transformers\\trainer.py:2540\u001b[0m, in \u001b[0;36mTrainer.compute_loss\u001b[1;34m(self, model, inputs, return_outputs)\u001b[0m\n\u001b[0;32m   2538\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m   2539\u001b[0m     labels \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m-> 2540\u001b[0m outputs \u001b[38;5;241m=\u001b[39m model(\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39minputs)\n\u001b[0;32m   2541\u001b[0m \u001b[38;5;66;03m# Save past state if it exists\u001b[39;00m\n\u001b[0;32m   2542\u001b[0m \u001b[38;5;66;03m# TODO: this needs to be fixed and made cleaner later.\u001b[39;00m\n\u001b[0;32m   2543\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39mpast_index \u001b[38;5;241m>\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m:\n",
      "File \u001b[1;32m~\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\torch\\nn\\modules\\module.py:1194\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[1;34m(self, *input, **kwargs)\u001b[0m\n\u001b[0;32m   1190\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[0;32m   1191\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[0;32m   1192\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[0;32m   1193\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[1;32m-> 1194\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m forward_call(\u001b[38;5;241m*\u001b[39m\u001b[38;5;28minput\u001b[39m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m   1195\u001b[0m \u001b[38;5;66;03m# Do not call functions when jit is used\u001b[39;00m\n\u001b[0;32m   1196\u001b[0m full_backward_hooks, non_full_backward_hooks \u001b[38;5;241m=\u001b[39m [], []\n",
      "File \u001b[1;32m~\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\transformers\\models\\distilbert\\modeling_distilbert.py:978\u001b[0m, in \u001b[0;36mDistilBertForTokenClassification.forward\u001b[1;34m(self, input_ids, attention_mask, head_mask, inputs_embeds, labels, output_attentions, output_hidden_states, return_dict)\u001b[0m\n\u001b[0;32m    972\u001b[0m \u001b[38;5;124mr\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m    973\u001b[0m \u001b[38;5;124;03mlabels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):\u001b[39;00m\n\u001b[0;32m    974\u001b[0m \u001b[38;5;124;03m    Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.\u001b[39;00m\n\u001b[0;32m    975\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m    976\u001b[0m return_dict \u001b[38;5;241m=\u001b[39m return_dict \u001b[38;5;28;01mif\u001b[39;00m return_dict \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mconfig\u001b[38;5;241m.\u001b[39muse_return_dict\n\u001b[1;32m--> 978\u001b[0m outputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdistilbert\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m    979\u001b[0m \u001b[43m    \u001b[49m\u001b[43minput_ids\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    980\u001b[0m \u001b[43m    \u001b[49m\u001b[43mattention_mask\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mattention_mask\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    981\u001b[0m \u001b[43m    \u001b[49m\u001b[43mhead_mask\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mhead_mask\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    982\u001b[0m \u001b[43m    \u001b[49m\u001b[43minputs_embeds\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minputs_embeds\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    983\u001b[0m \u001b[43m    \u001b[49m\u001b[43moutput_attentions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moutput_attentions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    984\u001b[0m \u001b[43m    \u001b[49m\u001b[43moutput_hidden_states\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moutput_hidden_states\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    985\u001b[0m \u001b[43m    \u001b[49m\u001b[43mreturn_dict\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mreturn_dict\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    986\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m    988\u001b[0m sequence_output \u001b[38;5;241m=\u001b[39m outputs[\u001b[38;5;241m0\u001b[39m]\n\u001b[0;32m    990\u001b[0m sequence_output \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdropout(sequence_output)\n",
      "File \u001b[1;32m~\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\torch\\nn\\modules\\module.py:1194\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[1;34m(self, *input, **kwargs)\u001b[0m\n\u001b[0;32m   1190\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[0;32m   1191\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[0;32m   1192\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[0;32m   1193\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[1;32m-> 1194\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m forward_call(\u001b[38;5;241m*\u001b[39m\u001b[38;5;28minput\u001b[39m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m   1195\u001b[0m \u001b[38;5;66;03m# Do not call functions when jit is used\u001b[39;00m\n\u001b[0;32m   1196\u001b[0m full_backward_hooks, non_full_backward_hooks \u001b[38;5;241m=\u001b[39m [], []\n",
      "File \u001b[1;32m~\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\transformers\\models\\distilbert\\modeling_distilbert.py:551\u001b[0m, in \u001b[0;36mDistilBertModel.forward\u001b[1;34m(self, input_ids, attention_mask, head_mask, inputs_embeds, output_attentions, output_hidden_states, return_dict)\u001b[0m\n\u001b[0;32m    549\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mYou cannot specify both input_ids and inputs_embeds at the same time\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m    550\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m input_ids \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m--> 551\u001b[0m     input_shape \u001b[38;5;241m=\u001b[39m \u001b[43minput_ids\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msize\u001b[49m()\n\u001b[0;32m    552\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m inputs_embeds \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m    553\u001b[0m     input_shape \u001b[38;5;241m=\u001b[39m inputs_embeds\u001b[38;5;241m.\u001b[39msize()[:\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m]\n",
      "File \u001b[1;32m~\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\tensorflow\\python\\framework\\ops.py:440\u001b[0m, in \u001b[0;36mTensor.__getattr__\u001b[1;34m(self, name)\u001b[0m\n\u001b[0;32m    436\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__getattr__\u001b[39m(\u001b[38;5;28mself\u001b[39m, name):\n\u001b[0;32m    437\u001b[0m   \u001b[38;5;28;01mif\u001b[39;00m name \u001b[38;5;129;01min\u001b[39;00m {\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mT\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mastype\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mravel\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtranspose\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mreshape\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mclip\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msize\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m    438\u001b[0m               \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtolist\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdata\u001b[39m\u001b[38;5;124m\"\u001b[39m}:\n\u001b[0;32m    439\u001b[0m     \u001b[38;5;66;03m# TODO(wangpeng): Export the enable_numpy_behavior knob\u001b[39;00m\n\u001b[1;32m--> 440\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mAttributeError\u001b[39;00m(\n\u001b[0;32m    441\u001b[0m         \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mtype\u001b[39m(\u001b[38;5;28mself\u001b[39m)\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m object has no attribute \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mname\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m. \u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m+\u001b[39m \u001b[38;5;124m\"\"\"\u001b[39m\n\u001b[0;32m    442\u001b[0m \u001b[38;5;124m      If you are looking for numpy-related methods, please run the following:\u001b[39m\n\u001b[0;32m    443\u001b[0m \u001b[38;5;124m      from tensorflow.python.ops.numpy_ops import np_config\u001b[39m\n\u001b[0;32m    444\u001b[0m \u001b[38;5;124m      np_config.enable_numpy_behavior()\u001b[39m\n\u001b[0;32m    445\u001b[0m \u001b[38;5;124m    \u001b[39m\u001b[38;5;124m\"\"\"\u001b[39m)\n\u001b[0;32m    446\u001b[0m   \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;21m__getattribute__\u001b[39m(name)\n",
      "\u001b[1;31mAttributeError\u001b[0m: EagerTensor object has no attribute 'size'. \n        If you are looking for numpy-related methods, please run the following:\n        from tensorflow.python.ops.numpy_ops import np_config\n        np_config.enable_numpy_behavior()\n      "
     ]
    }
   ],
   "source": [
    "trainer.train()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "5016c257-74e3-4aad-b6cc-7706b4fc367d",
   "metadata": {},
   "outputs": [
    {
     "ename": "LocalTokenNotFoundError",
     "evalue": "Token is required (`token=True`), but no token found. You need to provide a token or be logged in to Hugging Face with `huggingface-cli login` or `huggingface_hub.login`. See https://huggingface.co/settings/tokens.",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mLocalTokenNotFoundError\u001b[0m                   Traceback (most recent call last)",
      "Input \u001b[1;32mIn [28]\u001b[0m, in \u001b[0;36m<cell line: 1>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[43mtrainer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpush_to_hub\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[1;32m~\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\transformers\\trainer.py:3431\u001b[0m, in \u001b[0;36mTrainer.push_to_hub\u001b[1;34m(self, commit_message, blocking, **kwargs)\u001b[0m\n\u001b[0;32m   3428\u001b[0m \u001b[38;5;66;03m# If a user calls manually `push_to_hub` with `self.args.push_to_hub = False`, we try to create the repo but\u001b[39;00m\n\u001b[0;32m   3429\u001b[0m \u001b[38;5;66;03m# it might fail.\u001b[39;00m\n\u001b[0;32m   3430\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mrepo\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n\u001b[1;32m-> 3431\u001b[0m     \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minit_git_repo\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m   3433\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39mshould_save:\n\u001b[0;32m   3434\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39mhub_model_id \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n",
      "File \u001b[1;32m~\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\transformers\\trainer.py:3281\u001b[0m, in \u001b[0;36mTrainer.init_git_repo\u001b[1;34m(self, at_init)\u001b[0m\n\u001b[0;32m   3279\u001b[0m     repo_name \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39mhub_model_id\n\u001b[0;32m   3280\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m/\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m repo_name:\n\u001b[1;32m-> 3281\u001b[0m     repo_name \u001b[38;5;241m=\u001b[39m \u001b[43mget_full_repo_name\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrepo_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtoken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mhub_token\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m   3283\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m   3284\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mrepo \u001b[38;5;241m=\u001b[39m Repository(\n\u001b[0;32m   3285\u001b[0m         \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39moutput_dir,\n\u001b[0;32m   3286\u001b[0m         clone_from\u001b[38;5;241m=\u001b[39mrepo_name,\n\u001b[0;32m   3287\u001b[0m         use_auth_token\u001b[38;5;241m=\u001b[39muse_auth_token,\n\u001b[0;32m   3288\u001b[0m         private\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39mhub_private_repo,\n\u001b[0;32m   3289\u001b[0m     )\n",
      "File \u001b[1;32m~\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\transformers\\utils\\hub.py:801\u001b[0m, in \u001b[0;36mget_full_repo_name\u001b[1;34m(model_id, organization, token)\u001b[0m\n\u001b[0;32m    799\u001b[0m     token \u001b[38;5;241m=\u001b[39m HfFolder\u001b[38;5;241m.\u001b[39mget_token()\n\u001b[0;32m    800\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m organization \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m--> 801\u001b[0m     username \u001b[38;5;241m=\u001b[39m \u001b[43mwhoami\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtoken\u001b[49m\u001b[43m)\u001b[49m[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mname\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[0;32m    802\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00musername\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mmodel_id\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m    803\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n",
      "File \u001b[1;32m~\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\huggingface_hub\\utils\\_validators.py:118\u001b[0m, in \u001b[0;36mvalidate_hf_hub_args.<locals>._inner_fn\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m    115\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m check_use_auth_token:\n\u001b[0;32m    116\u001b[0m     kwargs \u001b[38;5;241m=\u001b[39m smoothly_deprecate_use_auth_token(fn_name\u001b[38;5;241m=\u001b[39mfn\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m, has_token\u001b[38;5;241m=\u001b[39mhas_token, kwargs\u001b[38;5;241m=\u001b[39mkwargs)\n\u001b[1;32m--> 118\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m fn(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n",
      "File \u001b[1;32m~\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\huggingface_hub\\hf_api.py:925\u001b[0m, in \u001b[0;36mHfApi.whoami\u001b[1;34m(self, token)\u001b[0m\n\u001b[0;32m    913\u001b[0m \u001b[38;5;129m@validate_hf_hub_args\u001b[39m\n\u001b[0;32m    914\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mwhoami\u001b[39m(\u001b[38;5;28mself\u001b[39m, token: Optional[\u001b[38;5;28mstr\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Dict:\n\u001b[0;32m    915\u001b[0m     \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m    916\u001b[0m \u001b[38;5;124;03m    Call HF API to know \"whoami\".\u001b[39;00m\n\u001b[0;32m    917\u001b[0m \n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m    921\u001b[0m \u001b[38;5;124;03m            not provided.\u001b[39;00m\n\u001b[0;32m    922\u001b[0m \u001b[38;5;124;03m    \"\"\"\u001b[39;00m\n\u001b[0;32m    923\u001b[0m     r \u001b[38;5;241m=\u001b[39m get_session()\u001b[38;5;241m.\u001b[39mget(\n\u001b[0;32m    924\u001b[0m         \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mendpoint\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/api/whoami-v2\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m--> 925\u001b[0m         headers\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_build_hf_headers\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m    926\u001b[0m \u001b[43m            \u001b[49m\u001b[38;5;66;43;03m# If `token` is provided and not `None`, it will be used by default.\u001b[39;49;00m\n\u001b[0;32m    927\u001b[0m \u001b[43m            \u001b[49m\u001b[38;5;66;43;03m# Otherwise, the token must be retrieved from cache or env variable.\u001b[39;49;00m\n\u001b[0;32m    928\u001b[0m \u001b[43m            \u001b[49m\u001b[43mtoken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mtoken\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtoken\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    929\u001b[0m \u001b[43m        \u001b[49m\u001b[43m)\u001b[49m,\n\u001b[0;32m    930\u001b[0m     )\n\u001b[0;32m    931\u001b[0m     \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m    932\u001b[0m         hf_raise_for_status(r)\n",
      "File \u001b[1;32m~\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\huggingface_hub\\hf_api.py:4992\u001b[0m, in \u001b[0;36mHfApi._build_hf_headers\u001b[1;34m(self, token, is_write_action, library_name, library_version, user_agent)\u001b[0m\n\u001b[0;32m   4989\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m token \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m   4990\u001b[0m     \u001b[38;5;66;03m# Cannot do `token = token or self.token` as token can be `False`.\u001b[39;00m\n\u001b[0;32m   4991\u001b[0m     token \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtoken\n\u001b[1;32m-> 4992\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mbuild_hf_headers\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m   4993\u001b[0m \u001b[43m    \u001b[49m\u001b[43mtoken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtoken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m   4994\u001b[0m \u001b[43m    \u001b[49m\u001b[43mis_write_action\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mis_write_action\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m   4995\u001b[0m \u001b[43m    \u001b[49m\u001b[43mlibrary_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlibrary_name\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlibrary_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m   4996\u001b[0m \u001b[43m    \u001b[49m\u001b[43mlibrary_version\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlibrary_version\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlibrary_version\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m   4997\u001b[0m \u001b[43m    \u001b[49m\u001b[43muser_agent\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43muser_agent\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43muser_agent\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m   4998\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[1;32m~\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\huggingface_hub\\utils\\_validators.py:118\u001b[0m, in \u001b[0;36mvalidate_hf_hub_args.<locals>._inner_fn\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m    115\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m check_use_auth_token:\n\u001b[0;32m    116\u001b[0m     kwargs \u001b[38;5;241m=\u001b[39m smoothly_deprecate_use_auth_token(fn_name\u001b[38;5;241m=\u001b[39mfn\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m, has_token\u001b[38;5;241m=\u001b[39mhas_token, kwargs\u001b[38;5;241m=\u001b[39mkwargs)\n\u001b[1;32m--> 118\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m fn(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n",
      "File \u001b[1;32m~\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\huggingface_hub\\utils\\_headers.py:121\u001b[0m, in \u001b[0;36mbuild_hf_headers\u001b[1;34m(token, is_write_action, library_name, library_version, user_agent)\u001b[0m\n\u001b[0;32m     48\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m     49\u001b[0m \u001b[38;5;124;03mBuild headers dictionary to send in a HF Hub call.\u001b[39;00m\n\u001b[0;32m     50\u001b[0m \n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m    118\u001b[0m \u001b[38;5;124;03m        If `token=True` but token is not saved locally.\u001b[39;00m\n\u001b[0;32m    119\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m    120\u001b[0m \u001b[38;5;66;03m# Get auth token to send\u001b[39;00m\n\u001b[1;32m--> 121\u001b[0m token_to_send \u001b[38;5;241m=\u001b[39m \u001b[43mget_token_to_send\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtoken\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m    122\u001b[0m _validate_token_to_send(token_to_send, is_write_action\u001b[38;5;241m=\u001b[39mis_write_action)\n\u001b[0;32m    124\u001b[0m \u001b[38;5;66;03m# Combine headers\u001b[39;00m\n",
      "File \u001b[1;32m~\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\huggingface_hub\\utils\\_headers.py:153\u001b[0m, in \u001b[0;36mget_token_to_send\u001b[1;34m(token)\u001b[0m\n\u001b[0;32m    151\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m token \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[0;32m    152\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m cached_token \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m--> 153\u001b[0m         \u001b[38;5;28;01mraise\u001b[39;00m LocalTokenNotFoundError(\n\u001b[0;32m    154\u001b[0m             \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mToken is required (`token=True`), but no token found. You\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m    155\u001b[0m             \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m need to provide a token or be logged in to Hugging Face with\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m    156\u001b[0m             \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m `huggingface-cli login` or `huggingface_hub.login`. See\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m    157\u001b[0m             \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m https://huggingface.co/settings/tokens.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m    158\u001b[0m         )\n\u001b[0;32m    159\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m cached_token\n\u001b[0;32m    161\u001b[0m \u001b[38;5;66;03m# Case implicit use of the token is forbidden by env variable\u001b[39;00m\n",
      "\u001b[1;31mLocalTokenNotFoundError\u001b[0m: Token is required (`token=True`), but no token found. You need to provide a token or be logged in to Hugging Face with `huggingface-cli login` or `huggingface_hub.login`. See https://huggingface.co/settings/tokens."
     ]
    }
   ],
   "source": [
    "trainer.push_to_hub()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c6fa45bc-3aa4-47d1-bf4c-7ae1b51f0798",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}