Rodrigo1771 commited on
Commit
ab81982
·
verified ·
1 Parent(s): 3887b64

End of training

Browse files
README.md CHANGED
@@ -1,9 +1,10 @@
1
  ---
2
  base_model: IVN-RIN/bioBIT
3
  tags:
 
4
  - generated_from_trainer
5
  datasets:
6
- - drugtemist-it-ner
7
  metrics:
8
  - precision
9
  - recall
@@ -16,8 +17,8 @@ model-index:
16
  name: Token Classification
17
  type: token-classification
18
  dataset:
19
- name: drugtemist-it-ner
20
- type: drugtemist-it-ner
21
  config: DrugTEMIST Italian NER
22
  split: validation
23
  args: DrugTEMIST Italian NER
@@ -41,7 +42,7 @@ should probably proofread and complete it, then remove this comment. -->
41
 
42
  # output
43
 
44
- This model is a fine-tuned version of [IVN-RIN/bioBIT](https://huggingface.co/IVN-RIN/bioBIT) on the drugtemist-it-ner dataset.
45
  It achieves the following results on the evaluation set:
46
  - Loss: 0.0067
47
  - Precision: 0.9328
 
1
  ---
2
  base_model: IVN-RIN/bioBIT
3
  tags:
4
+ - token-classification
5
  - generated_from_trainer
6
  datasets:
7
+ - Rodrigo1771/drugtemist-it-ner
8
  metrics:
9
  - precision
10
  - recall
 
17
  name: Token Classification
18
  type: token-classification
19
  dataset:
20
+ name: Rodrigo1771/drugtemist-it-ner
21
+ type: Rodrigo1771/drugtemist-it-ner
22
  config: DrugTEMIST Italian NER
23
  split: validation
24
  args: DrugTEMIST Italian NER
 
42
 
43
  # output
44
 
45
+ This model is a fine-tuned version of [IVN-RIN/bioBIT](https://huggingface.co/IVN-RIN/bioBIT) on the Rodrigo1771/drugtemist-it-ner dataset.
46
  It achieves the following results on the evaluation set:
47
  - Loss: 0.0067
48
  - Precision: 0.9328
all_results.json CHANGED
@@ -1,26 +1,26 @@
1
  {
2
  "epoch": 10.0,
3
- "eval_accuracy": 0.9986953367008066,
4
- "eval_f1": 0.9314045730284647,
5
- "eval_loss": 0.005624314770102501,
6
- "eval_precision": 0.9327102803738317,
7
- "eval_recall": 0.9301025163094129,
8
- "eval_runtime": 13.3976,
9
- "eval_samples": 6946,
10
- "eval_samples_per_second": 518.45,
11
- "eval_steps_per_second": 64.862,
12
- "predict_accuracy": 0.9986842934577083,
13
- "predict_f1": 0.9213546039742514,
14
- "predict_loss": 0.005766334943473339,
15
- "predict_precision": 0.8892490545651,
16
- "predict_recall": 0.9558652729384437,
17
- "predict_runtime": 26.2719,
18
- "predict_samples_per_second": 560.104,
19
- "predict_steps_per_second": 70.037,
20
- "total_flos": 1.0996932656642544e+16,
21
- "train_loss": 0.003382195293697344,
22
- "train_runtime": 1039.0596,
23
- "train_samples": 27768,
24
- "train_samples_per_second": 267.242,
25
- "train_steps_per_second": 4.177
26
  }
 
1
  {
2
  "epoch": 10.0,
3
+ "eval_accuracy": 0.9988184887042326,
4
+ "eval_f1": 0.936867469879518,
5
+ "eval_loss": 0.006724909413605928,
6
+ "eval_precision": 0.9328214971209213,
7
+ "eval_recall": 0.9409486931268151,
8
+ "eval_runtime": 14.1827,
9
+ "eval_samples": 6798,
10
+ "eval_samples_per_second": 479.317,
11
+ "eval_steps_per_second": 59.932,
12
+ "predict_accuracy": 0.9981367644802958,
13
+ "predict_f1": 0.8965517241379309,
14
+ "predict_loss": 0.010722821578383446,
15
+ "predict_precision": 0.8768736616702355,
16
+ "predict_recall": 0.9171332586786114,
17
+ "predict_runtime": 27.7085,
18
+ "predict_samples_per_second": 527.095,
19
+ "predict_steps_per_second": 65.9,
20
+ "total_flos": 1.4262694978690116e+16,
21
+ "train_loss": 0.0022696754537961062,
22
+ "train_runtime": 1261.5031,
23
+ "train_samples": 27198,
24
+ "train_samples_per_second": 215.6,
25
+ "train_steps_per_second": 3.369
26
  }
eval_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
  "epoch": 10.0,
3
- "eval_accuracy": 0.9986953367008066,
4
- "eval_f1": 0.9314045730284647,
5
- "eval_loss": 0.005624314770102501,
6
- "eval_precision": 0.9327102803738317,
7
- "eval_recall": 0.9301025163094129,
8
- "eval_runtime": 13.3976,
9
- "eval_samples": 6946,
10
- "eval_samples_per_second": 518.45,
11
- "eval_steps_per_second": 64.862
12
  }
 
1
  {
2
  "epoch": 10.0,
3
+ "eval_accuracy": 0.9988184887042326,
4
+ "eval_f1": 0.936867469879518,
5
+ "eval_loss": 0.006724909413605928,
6
+ "eval_precision": 0.9328214971209213,
7
+ "eval_recall": 0.9409486931268151,
8
+ "eval_runtime": 14.1827,
9
+ "eval_samples": 6798,
10
+ "eval_samples_per_second": 479.317,
11
+ "eval_steps_per_second": 59.932
12
  }
predict_results.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "predict_accuracy": 0.9986842934577083,
3
- "predict_f1": 0.9213546039742514,
4
- "predict_loss": 0.005766334943473339,
5
- "predict_precision": 0.8892490545651,
6
- "predict_recall": 0.9558652729384437,
7
- "predict_runtime": 26.2719,
8
- "predict_samples_per_second": 560.104,
9
- "predict_steps_per_second": 70.037
10
  }
 
1
  {
2
+ "predict_accuracy": 0.9981367644802958,
3
+ "predict_f1": 0.8965517241379309,
4
+ "predict_loss": 0.010722821578383446,
5
+ "predict_precision": 0.8768736616702355,
6
+ "predict_recall": 0.9171332586786114,
7
+ "predict_runtime": 27.7085,
8
+ "predict_samples_per_second": 527.095,
9
+ "predict_steps_per_second": 65.9
10
  }
predictions.txt CHANGED
The diff for this file is too large to render. See raw diff
 
tb/events.out.tfevents.1725056175.6b97e535edda.43233.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80aa2e40ff46b611632e40117a4acd4e5f3d41a9a035d30842811f76e0b6b31b
3
+ size 560
train.log CHANGED
@@ -1336,3 +1336,51 @@ Training completed. Do not forget to share your model on huggingface.co/models =
1336
  {'eval_loss': 0.006724909413605928, 'eval_precision': 0.9328214971209213, 'eval_recall': 0.9409486931268151, 'eval_f1': 0.936867469879518, 'eval_accuracy': 0.9988184887042326, 'eval_runtime': 14.3451, 'eval_samples_per_second': 473.891, 'eval_steps_per_second': 59.254, 'epoch': 10.0}
1337
  {'train_runtime': 1261.5031, 'train_samples_per_second': 215.6, 'train_steps_per_second': 3.369, 'train_loss': 0.0022696754537961062, 'epoch': 10.0}
1338
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1339
  0%| | 0/850 [00:00<?, ?it/s]
1340
  1%| | 9/850 [00:00<00:09, 88.19it/s]
1341
  2%|▏ | 18/850 [00:00<00:10, 81.56it/s]
1342
  3%|▎ | 27/850 [00:00<00:10, 80.93it/s]
1343
  4%|▍ | 36/850 [00:00<00:09, 84.26it/s]
1344
  5%|▌ | 45/850 [00:00<00:09, 85.17it/s]
1345
  6%|▋ | 54/850 [00:00<00:10, 77.34it/s]
1346
  7%|▋ | 63/850 [00:00<00:10, 78.32it/s]
1347
  8%|▊ | 71/850 [00:00<00:10, 74.58it/s]
1348
  9%|▉ | 79/850 [00:01<00:10, 75.01it/s]
1349
  10%|█ | 88/850 [00:01<00:09, 76.98it/s]
1350
  11%|█▏ | 97/850 [00:01<00:09, 79.71it/s]
1351
  13%|█▎ | 107/850 [00:01<00:08, 83.23it/s]
1352
  14%|█▎ | 116/850 [00:01<00:08, 83.47it/s]
1353
  15%|█▍ | 125/850 [00:01<00:08, 83.94it/s]
1354
  16%|█▌ | 134/850 [00:01<00:08, 81.91it/s]
1355
  17%|█▋ | 144/850 [00:01<00:08, 84.36it/s]
1356
  18%|█▊ | 153/850 [00:01<00:08, 79.26it/s]
1357
  19%|█▉ | 162/850 [00:02<00:08, 81.90it/s]
1358
  20%|██ | 171/850 [00:02<00:08, 83.12it/s]
1359
  21%|██ | 180/850 [00:02<00:07, 84.98it/s]
1360
  22%|██▏ | 189/850 [00:02<00:08, 82.59it/s]
1361
  23%|██▎ | 198/850 [00:02<00:07, 83.32it/s]
1362
  24%|██▍ | 207/850 [00:02<00:07, 82.06it/s]
1363
  25%|██▌ | 216/850 [00:02<00:07, 80.40it/s]
1364
  27%|██▋ | 226/850 [00:02<00:07, 83.55it/s]
1365
  28%|██▊ | 236/850 [00:02<00:07, 85.79it/s]
1366
  29%|██▉ | 245/850 [00:02<00:07, 84.04it/s]
1367
  30%|██▉ | 254/850 [00:03<00:06, 85.38it/s]
1368
  31%|███ | 263/850 [00:03<00:06, 85.08it/s]
1369
  32%|███▏ | 272/850 [00:03<00:06, 86.46it/s]
1370
  33%|███▎ | 281/850 [00:03<00:06, 87.20it/s]
1371
  34%|███▍ | 290/850 [00:03<00:06, 87.65it/s]
1372
  35%|███▌ | 299/850 [00:03<00:06, 85.40it/s]
1373
  36%|███▌ | 308/850 [00:03<00:06, 85.40it/s]
1374
  37%|███▋ | 318/850 [00:03<00:06, 87.55it/s]
1375
  38%|███▊ | 327/850 [00:03<00:06, 85.57it/s]
1376
  40%|███▉ | 336/850 [00:04<00:05, 86.18it/s]
1377
  41%|████ | 345/850 [00:04<00:05, 85.61it/s]
1378
  42%|████▏ | 355/850 [00:04<00:05, 87.78it/s]
1379
  43%|████▎ | 364/850 [00:04<00:05, 84.15it/s]
1380
  44%|████▍ | 373/850 [00:04<00:05, 84.84it/s]
1381
  45%|████▍ | 382/850 [00:04<00:05, 80.74it/s]
1382
  46%|████▌ | 391/850 [00:04<00:05, 80.97it/s]
1383
  47%|████▋ | 400/850 [00:04<00:05, 78.77it/s]
1384
  48%|████▊ | 410/850 [00:04<00:05, 83.43it/s]
1385
  49%|████▉ | 420/850 [00:05<00:05, 85.76it/s]
1386
  51%|█████ | 430/850 [00:05<00:04, 86.88it/s]
1387
  52%|█████▏ | 439/850 [00:05<00:04, 85.98it/s]
1388
  53%|█████▎ | 448/850 [00:05<00:04, 81.98it/s]
1389
  54%|█████▍ | 457/850 [00:05<00:04, 83.43it/s]
1390
  55%|█████▍ | 467/850 [00:05<00:04, 86.56it/s]
1391
  56%|█████▌ | 477/850 [00:05<00:04, 88.16it/s]
1392
  57%|█████▋ | 486/850 [00:05<00:04, 85.36it/s]
1393
  58%|█████▊ | 496/850 [00:05<00:04, 87.52it/s]
1394
  60%|█████▉ | 506/850 [00:06<00:03, 90.01it/s]
1395
  61%|██████ | 516/850 [00:06<00:03, 89.02it/s]
1396
  62%|██████▏ | 525/850 [00:06<00:03, 88.20it/s]
1397
  63%|██████▎ | 534/850 [00:06<00:03, 85.08it/s]
1398
  64%|██████▍ | 543/850 [00:06<00:03, 83.29it/s]
1399
  65%|████���█▍ | 552/850 [00:06<00:03, 81.94it/s]
1400
  66%|██████▌ | 561/850 [00:06<00:03, 80.66it/s]
1401
  67%|██████▋ | 570/850 [00:06<00:03, 79.12it/s]
1402
  68%|██████▊ | 578/850 [00:06<00:03, 78.66it/s]
1403
  69%|██████▉ | 587/850 [00:07<00:03, 80.62it/s]
1404
  70%|███████ | 596/850 [00:07<00:03, 81.35it/s]
1405
  71%|███████ | 605/850 [00:07<00:03, 81.20it/s]
1406
  72%|███████▏ | 615/850 [00:07<00:02, 82.94it/s]
1407
  74%|███████▎ | 625/850 [00:07<00:02, 84.80it/s]
1408
  75%|███████▍ | 634/850 [00:07<00:02, 84.22it/s]
1409
  76%|███████▌ | 643/850 [00:07<00:02, 84.53it/s]
1410
  77%|███████▋ | 653/850 [00:07<00:02, 87.76it/s]
1411
  78%|███████▊ | 662/850 [00:07<00:02, 87.53it/s]
1412
  79%|███████▉ | 671/850 [00:08<00:02, 87.19it/s]
1413
  80%|████████ | 680/850 [00:08<00:01, 86.07it/s]
1414
  81%|████████ | 689/850 [00:08<00:01, 84.91it/s]
1415
  82%|████████▏ | 698/850 [00:08<00:01, 84.44it/s]
1416
  83%|████████▎ | 707/850 [00:08<00:01, 81.34it/s]
1417
  84%|████████▍ | 716/850 [00:08<00:01, 81.52it/s]
1418
  85%|████████▌ | 725/850 [00:08<00:01, 78.53it/s]
1419
  86%|████████▋ | 734/850 [00:08<00:01, 81.34it/s]
1420
  87%|████████▋ | 743/850 [00:08<00:01, 79.59it/s]
1421
  88%|████████▊ | 752/850 [00:09<00:01, 78.93it/s]
1422
  90%|████████▉ | 761/850 [00:09<00:01, 81.25it/s]
1423
  91%|█████████ | 770/850 [00:09<00:00, 81.25it/s]
1424
  92%|█████████▏| 779/850 [00:09<00:00, 80.11it/s]
1425
  93%|█████████▎| 788/850 [00:09<00:00, 78.07it/s]
1426
  94%|█████████▍| 798/850 [00:09<00:00, 82.72it/s]
1427
  95%|█████████▍| 807/850 [00:09<00:00, 81.30it/s]
1428
  96%|█████████▌| 816/850 [00:09<00:00, 80.69it/s]
1429
  97%|█████████▋| 826/850 [00:09<00:00, 82.05it/s]
1430
  98%|█████████▊| 835/850 [00:10<00:00, 82.91it/s]
1431
  99%|█████████▉| 844/850 [00:10<00:00, 83.44it/s]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1432
  0%| | 0/1826 [00:00<?, ?it/s]
1433
  1%| | 10/1826 [00:00<00:18, 99.46it/s]
1434
  1%| | 20/1826 [00:00<00:22, 80.10it/s]
1435
  2%|▏ | 29/1826 [00:00<00:21, 82.82it/s]
1436
  2%|▏ | 38/1826 [00:00<00:21, 82.91it/s]
1437
  3%|▎ | 47/1826 [00:00<00:20, 85.19it/s]
1438
  3%|▎ | 56/1826 [00:00<00:20, 85.50it/s]
1439
  4%|▎ | 65/1826 [00:00<00:21, 82.13it/s]
1440
  4%|▍ | 75/1826 [00:00<00:20, 84.79it/s]
1441
  5%|▍ | 84/1826 [00:00<00:20, 86.10it/s]
1442
  5%|▌ | 93/1826 [00:01<00:19, 87.04it/s]
1443
  6%|▌ | 103/1826 [00:01<00:19, 89.92it/s]
1444
  6%|▌ | 113/1826 [00:01<00:19, 89.70it/s]
1445
  7%|▋ | 122/1826 [00:01<00:19, 86.11it/s]
1446
  7%|▋ | 131/1826 [00:01<00:19, 84.81it/s]
1447
  8%|▊ | 140/1826 [00:01<00:19, 85.70it/s]
1448
  8%|▊ | 150/1826 [00:01<00:19, 87.70it/s]
1449
  9%|▊ | 159/1826 [00:01<00:20, 82.63it/s]
1450
  9%|▉ | 168/1826 [00:01<00:20, 82.89it/s]
1451
  10%|▉ | 178/1826 [00:02<00:19, 86.63it/s]
1452
  10%|█ | 187/1826 [00:02<00:19, 86.22it/s]
1453
  11%|█ | 197/1826 [00:02<00:18, 86.83it/s]
1454
  11%|█▏ | 206/1826 [00:02<00:18, 85.81it/s]
1455
  12%|█▏ | 215/1826 [00:02<00:19, 84.24it/s]
1456
  12%|█▏ | 224/1826 [00:02<00:18, 85.72it/s]
1457
  13%|█▎ | 233/1826 [00:02<00:19, 82.32it/s]
1458
  13%|█▎ | 242/1826 [00:02<00:19, 81.78it/s]
1459
  14%|█▎ | 251/1826 [00:02<00:19, 81.73it/s]
1460
  14%|█▍ | 260/1826 [00:03<00:19, 81.82it/s]
1461
  15%|█▍ | 270/1826 [00:03<00:18, 84.77it/s]
1462
  15%|█▌ | 280/1826 [00:03<00:17, 88.25it/s]
1463
  16%|█▌ | 289/1826 [00:03<00:17, 86.31it/s]
1464
  16%|█▋ | 298/1826 [00:03<00:17, 86.81it/s]
1465
  17%|█▋ | 307/1826 [00:03<00:17, 86.16it/s]
1466
  17%|█▋ | 316/1826 [00:03<00:17, 85.47it/s]
1467
  18%|█▊ | 326/1826 [00:03<00:17, 87.31it/s]
1468
  18%|█▊ | 335/1826 [00:03<00:17, 86.45it/s]
1469
  19%|█▉ | 345/1826 [00:04<00:16, 88.96it/s]
1470
  19%|█▉ | 354/1826 [00:04<00:17, 84.12it/s]
1471
  20%|█▉ | 364/1826 [00:04<00:16, 86.21it/s]
1472
  20%|██ | 374/1826 [00:04<00:16, 87.29it/s]
1473
  21%|██ | 383/1826 [00:04<00:16, 87.07it/s]
1474
  21%|██▏ | 392/1826 [00:04<00:17, 82.64it/s]
1475
  22%|██▏ | 401/1826 [00:04<00:16, 84.60it/s]
1476
  22%|██▏ | 410/1826 [00:04<00:16, 85.41it/s]
1477
  23%|██▎ | 419/1826 [00:04<00:16, 84.65it/s]
1478
  23%|██▎ | 428/1826 [00:05<00:16, 83.56it/s]
1479
  24%|██▍ | 437/1826 [00:05<00:16, 84.51it/s]
1480
  24%|██▍ | 446/1826 [00:05<00:16, 84.92it/s]
1481
  25%|██▍ | 455/1826 [00:05<00:15, 85.69it/s]
1482
  25%|██▌ | 465/1826 [00:05<00:15, 88.89it/s]
1483
  26%|██▌ | 474/1826 [00:05<00:16, 84.46it/s]
1484
  26%|██▋ | 483/1826 [00:05<00:15, 84.49it/s]
1485
  27%|██▋ | 492/1826 [00:05<00:16, 82.42it/s]
1486
  27%|██▋ | 502/1826 [00:05<00:15, 84.22it/s]
1487
  28%|██▊ | 512/1826 [00:05<00:15, 86.43it/s]
1488
  29%|██▊ | 522/1826 [00:06<00:14, 88.67it/s]
1489
  29%|██▉ | 531/1826 [00:06<00:14, 86.95it/s]
1490
  30%|██▉ | 540/1826 [00:06<00:14, 87.11it/s]
1491
  30%|███ | 549/1826 [00:06<00:14, 85.25it/s]
1492
  31%|███ | 558/1826 [00:06<00:15, 83.01it/s]
1493
  31%|███ | 568/1826 [00:06<00:14, 85.14it/s]
1494
  32%|███▏ | 577/1826 [00:06<00:14, 84.98it/s]
1495
  32%|███▏ | 586/1826 [00:06<00:15, 78.67it/s]
1496
  33%|███▎ | 595/1826 [00:06<00:15, 80.71it/s]
1497
  33%|███▎ | 604/1826 [00:07<00:14, 82.23it/s]
1498
  34%|███▎ | 613/1826 [00:07<00:14, 81.40it/s]
1499
  34%|███▍ | 622/1826 [00:07<00:14, 82.83it/s]
1500
  35%|███▍ | 632/1826 [00:07<00:13, 86.62it/s]
1501
  35%|███▌ | 642/1826 [00:07<00:13, 87.79it/s]
1502
  36%|███▌ | 651/1826 [00:07<00:13, 84.88it/s]
1503
  36%|███▌ | 660/1826 [00:07<00:13, 86.28it/s]
1504
  37%|███▋ | 670/1826 [00:07<00:13, 87.60it/s]
1505
  37%|███▋ | 679/1826 [00:07<00:13, 87.43it/s]
1506
  38%|███▊ | 689/1826 [00:08<00:12, 88.58it/s]
1507
  38%|███▊ | 699/1826 [00:08<00:12, 90.30it/s]
1508
  39%|███▉ | 709/1826 [00:08<00:12, 91.17it/s]
1509
  39%|███▉ | 719/1826 [00:08<00:11, 92.57it/s]
1510
  40%|███▉ | 729/1826 [00:08<00:11, 93.00it/s]
1511
  40%|████ | 739/1826 [00:08<00:12, 90.46it/s]
1512
  41%|████ | 749/1826 [00:08<00:11, 91.75it/s]
1513
  42%|████▏ | 759/1826 [00:08<00:11, 91.80it/s]
1514
  42%|████▏ | 769/1826 [00:08<00:11, 93.48it/s]
1515
  43%|████▎ | 779/1826 [00:09<00:11, 90.59it/s]
1516
  43%|████▎ | 789/1826 [00:09<00:11, 89.82it/s]
1517
  44%|████▍ | 799/1826 [00:09<00:11, 88.45it/s]
1518
  44%|████▍ | 809/1826 [00:09<00:11, 88.97it/s]
1519
  45%|████▍ | 819/1826 [00:09<00:11, 89.58it/s]
1520
  45%|████▌ | 829/1826 [00:09<00:10, 90.95it/s]
1521
  46%|████▌ | 839/1826 [00:09<00:11, 87.82it/s]
1522
  46%|████▋ | 849/1826 [00:09<00:10, 90.77it/s]
1523
  47%|████▋ | 859/1826 [00:09<00:10, 93.15it/s]
1524
  48%|████▊ | 869/1826 [00:10<00:10, 90.43it/s]
1525
  48%|████▊ | 879/1826 [00:10<00:10, 89.07it/s]
1526
  49%|████▊ | 888/1826 [00:10<00:10, 88.03it/s]
1527
  49%|████▉ | 898/1826 [00:10<00:10, 89.92it/s]
1528
  50%|████▉ | 908/1826 [00:10<00:10, 91.01it/s]
1529
  50%|█████ | 918/1826 [00:10<00:09, 91.51it/s]
1530
  51%|█████ | 928/1826 [00:10<00:09, 91.40it/s]
1531
  51%|█████▏ | 938/1826 [00:10<00:10, 88.19it/s]
1532
  52%|█████▏ | 947/1826 [00:10<00:10, 84.32it/s]
1533
  52%|█████▏ | 956/1826 [00:11<00:10, 85.23it/s]
1534
  53%|█████▎ | 966/1826 [00:11<00:09, 86.86it/s]
1535
  53%|█████▎ | 975/1826 [00:11<00:09, 85.98it/s]
1536
  54%|█████▍ | 985/1826 [00:11<00:09, 87.66it/s]
1537
  54%|█████▍ | 994/1826 [00:11<00:09, 85.02it/s]
1538
  55%|█████▍ | 1003/1826 [00:11<00:09, 83.12it/s]
1539
  55%|█████▌ | 1012/1826 [00:11<00:09, 84.61it/s]
1540
  56%|█████▌ | 1022/1826 [00:11<00:09, 88.01it/s]
1541
  57%|█████▋ | 1032/1826 [00:11<00:08, 90.28it/s]
1542
  57%|█████▋ | 1042/1826 [00:12<00:09, 86.92it/s]
1543
  58%|█████▊ | 1051/1826 [00:12<00:09, 85.51it/s]
1544
  58%|█████▊ | 1060/1826 [00:12<00:08, 86.69it/s]
1545
  59%|█████▊ | 1070/1826 [00:12<00:08, 89.80it/s]
1546
  59%|█████▉ | 1080/1826 [00:12<00:08, 91.79it/s]
1547
  60%|█████▉ | 1090/1826 [00:12<00:07, 92.06it/s]
1548
  60%|██████ | 1100/1826 [00:12<00:07, 92.64it/s]
1549
  61%|██████ | 1110/1826 [00:12<00:08, 89.48it/s]
1550
  61%|██████▏ | 1119/1826 [00:12<00:08, 87.58it/s]
1551
  62%|██████▏ | 1129/1826 [00:13<00:07, 89.36it/s]
1552
  62%|██████▏ | 1138/1826 [00:13<00:07, 88.55it/s]
1553
  63%|██████▎ | 1147/1826 [00:13<00:07, 87.81it/s]
1554
  63%|██████▎ | 1156/1826 [00:13<00:07, 87.34it/s]
1555
  64%|██████▍ | 1165/1826 [00:13<00:07, 83.98it/s]
1556
  64%|██████▍ | 1174/1826 [00:13<00:07, 84.35it/s]
1557
  65%|██████▍ | 1183/1826 [00:13<00:07, 82.32it/s]
1558
  65%|██████▌ | 1193/1826 [00:13<00:07, 84.81it/s]
1559
  66%|██████▌ | 1202/1826 [00:13<00:07, 85.14it/s]
1560
  66%|██████▋ | 1211/1826 [00:13<00:07, 83.20it/s]
1561
  67%|██████▋ | 1220/1826 [00:14<00:07, 81.78it/s]
1562
  67%|██████▋ | 1230/1826 [00:14<00:06, 85.50it/s]
1563
  68%|██████▊ | 1240/1826 [00:14<00:06, 86.53it/s]
1564
  68%|██████▊ | 1250/1826 [00:14<00:06, 85.10it/s]
1565
  69%|██████▉ | 1260/1826 [00:14<00:06, 86.63it/s]
1566
  69%|██████▉ | 1269/1826 [00:14<00:06, 87.13it/s]
1567
  70%|██████▉ | 1278/1826 [00:14<00:06, 87.73it/s]
1568
  70%|███████ | 1287/1826 [00:14<00:06, 88.07it/s]
1569
  71%|███████ | 1297/1826 [00:14<00:05, 90.99it/s]
1570
  72%|███████▏ | 1307/1826 [00:15<00:05, 91.31it/s]
1571
  72%|███████▏ | 1317/1826 [00:15<00:05, 93.37it/s]
1572
  73%|███████▎ | 1327/1826 [00:15<00:05, 92.71it/s]
1573
  73%|███████▎ | 1337/1826 [00:15<00:05, 88.69it/s]
1574
  74%|███████▎ | 1346/1826 [00:15<00:05, 85.16it/s]
1575
  74%|███████▍ | 1355/1826 [00:15<00:05, 86.29it/s]
1576
  75%|███████▍ | 1364/1826 [00:15<00:05, 87.05it/s]
1577
  75%|███████▌ | 1373/1826 [00:15<00:05, 85.50it/s]
1578
  76%|███████▌ | 1383/1826 [00:15<00:05, 88.26it/s]
1579
  76%|███████▋ | 1393/1826 [00:16<00:04, 89.45it/s]
1580
  77%|███████▋ | 1403/1826 [00:16<00:04, 91.42it/s]
1581
  77%|███████▋ | 1413/1826 [00:16<00:04, 92.32it/s]
1582
  78%|███████▊ | 1423/1826 [00:16<00:04, 89.99it/s]
1583
  78%|███████▊ | 1433/1826 [00:16<00:04, 90.52it/s]
1584
  79%|███████▉ | 1443/1826 [00:16<00:04, 85.95it/s]
1585
  80%|███████▉ | 1453/1826 [00:16<00:04, 88.09it/s]
1586
  80%|████████ | 1462/1826 [00:16<00:04, 88.33it/s]
1587
  81%|████████ | 1471/1826 [00:16<00:04, 84.47it/s]
1588
  81%|████████ | 1480/1826 [00:17<00:04, 84.58it/s]
1589
  82%|████████▏ | 1489/1826 [00:17<00:04, 80.66it/s]
1590
  82%|████████▏ | 1498/1826 [00:17<00:04, 81.96it/s]
1591
  83%|████████▎ | 1507/1826 [00:17<00:03, 82.12it/s]
1592
  83%|████████▎ | 1517/1826 [00:17<00:03, 84.73it/s]
1593
  84%|████████▎ | 1526/1826 [00:17<00:03, 82.53it/s]
1594
  84%|████████▍ | 1535/1826 [00:17<00:03, 81.76it/s]
1595
  85%|████████▍ | 1544/1826 [00:17<00:03, 82.99it/s]
1596
  85%|████████▌ | 1553/1826 [00:17<00:03, 83.81it/s]
1597
  86%|████████▌ | 1562/1826 [00:18<00:03, 82.96it/s]
1598
  86%|████████▌ | 1571/1826 [00:18<00:03, 81.80it/s]
1599
  87%|████████▋ | 1580/1826 [00:18<00:02, 83.72it/s]
1600
  87%|████████▋ | 1589/1826 [00:18<00:02, 82.90it/s]
1601
  88%|████████▊ | 1598/1826 [00:18<00:02, 84.03it/s]
1602
  88%|████████▊ | 1608/1826 [00:18<00:02, 86.25it/s]
1603
  89%|████████▊ | 1617/1826 [00:18<00:02, 85.18it/s]
1604
  89%|████████▉ | 1626/1826 [00:18<00:02, 77.48it/s]
1605
  90%|████████▉ | 1636/1826 [00:18<00:02, 80.59it/s]
1606
  90%|█████████ | 1645/1826 [00:19<00:02, 82.51it/s]
1607
  91%|█████████ | 1655/1826 [00:19<00:02, 85.30it/s]
1608
  91%|█████████ | 1664/1826 [00:19<00:01, 84.48it/s]
1609
  92%|█████████▏| 1673/1826 [00:19<00:01, 85.56it/s]
1610
  92%|█████████▏| 1683/1826 [00:19<00:01, 88.32it/s]
1611
  93%|█████████▎| 1692/1826 [00:19<00:01, 86.33it/s]
1612
  93%|█████████▎| 1701/1826 [00:19<00:01, 85.33it/s]
1613
  94%|█████████▎| 1711/1826 [00:19<00:01, 87.29it/s]
1614
  94%|█████████▍| 1720/1826 [00:19<00:01, 84.39it/s]
1615
  95%|█████████▍| 1730/1826 [00:20<00:01, 87.06it/s]
1616
  95%|█████████▌| 1740/1826 [00:20<00:00, 87.99it/s]
1617
  96%|█████████▌| 1750/1826 [00:20<00:00, 88.28it/s]
1618
  96%|█████████▋| 1759/1826 [00:20<00:00, 87.89it/s]
1619
  97%|█████████▋| 1768/1826 [00:20<00:00, 87.33it/s]
1620
  97%|█████████▋| 1777/1826 [00:20<00:00, 88.09it/s]
1621
  98%|█████████▊| 1787/1826 [00:20<00:00, 88.65it/s]
1622
  98%|█████████▊| 1796/1826 [00:20<00:00, 83.22it/s]
1623
  99%|█████████▉| 1806/1826 [00:20<00:00, 86.07it/s]
1624
  99%|█████████▉| 1815/1826 [00:20<00:00, 86.85it/s]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1336
  {'eval_loss': 0.006724909413605928, 'eval_precision': 0.9328214971209213, 'eval_recall': 0.9409486931268151, 'eval_f1': 0.936867469879518, 'eval_accuracy': 0.9988184887042326, 'eval_runtime': 14.3451, 'eval_samples_per_second': 473.891, 'eval_steps_per_second': 59.254, 'epoch': 10.0}
1337
  {'train_runtime': 1261.5031, 'train_samples_per_second': 215.6, 'train_steps_per_second': 3.369, 'train_loss': 0.0022696754537961062, 'epoch': 10.0}
1338
 
1339
+ ***** train metrics *****
1340
+ epoch = 10.0
1341
+ total_flos = 13283169GF
1342
+ train_loss = 0.0023
1343
+ train_runtime = 0:21:01.50
1344
+ train_samples = 27198
1345
+ train_samples_per_second = 215.6
1346
+ train_steps_per_second = 3.369
1347
+ 08/30/2024 22:16:00 - INFO - __main__ - *** Evaluate ***
1348
+ [INFO|trainer.py:805] 2024-08-30 22:16:00,920 >> The following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: id, tokens, ner_tags. If id, tokens, ner_tags are not expected by `BertForTokenClassification.forward`, you can safely ignore this message.
1349
+ [INFO|trainer.py:3788] 2024-08-30 22:16:00,922 >>
1350
+ ***** Running Evaluation *****
1351
+ [INFO|trainer.py:3790] 2024-08-30 22:16:00,923 >> Num examples = 6798
1352
+ [INFO|trainer.py:3793] 2024-08-30 22:16:00,923 >> Batch size = 8
1353
+
1354
  0%| | 0/850 [00:00<?, ?it/s]
1355
  1%| | 9/850 [00:00<00:09, 88.19it/s]
1356
  2%|▏ | 18/850 [00:00<00:10, 81.56it/s]
1357
  3%|▎ | 27/850 [00:00<00:10, 80.93it/s]
1358
  4%|▍ | 36/850 [00:00<00:09, 84.26it/s]
1359
  5%|▌ | 45/850 [00:00<00:09, 85.17it/s]
1360
  6%|▋ | 54/850 [00:00<00:10, 77.34it/s]
1361
  7%|▋ | 63/850 [00:00<00:10, 78.32it/s]
1362
  8%|▊ | 71/850 [00:00<00:10, 74.58it/s]
1363
  9%|▉ | 79/850 [00:01<00:10, 75.01it/s]
1364
  10%|█ | 88/850 [00:01<00:09, 76.98it/s]
1365
  11%|█▏ | 97/850 [00:01<00:09, 79.71it/s]
1366
  13%|█▎ | 107/850 [00:01<00:08, 83.23it/s]
1367
  14%|█▎ | 116/850 [00:01<00:08, 83.47it/s]
1368
  15%|█▍ | 125/850 [00:01<00:08, 83.94it/s]
1369
  16%|█▌ | 134/850 [00:01<00:08, 81.91it/s]
1370
  17%|█▋ | 144/850 [00:01<00:08, 84.36it/s]
1371
  18%|█▊ | 153/850 [00:01<00:08, 79.26it/s]
1372
  19%|█▉ | 162/850 [00:02<00:08, 81.90it/s]
1373
  20%|██ | 171/850 [00:02<00:08, 83.12it/s]
1374
  21%|██ | 180/850 [00:02<00:07, 84.98it/s]
1375
  22%|██▏ | 189/850 [00:02<00:08, 82.59it/s]
1376
  23%|██▎ | 198/850 [00:02<00:07, 83.32it/s]
1377
  24%|██▍ | 207/850 [00:02<00:07, 82.06it/s]
1378
  25%|██▌ | 216/850 [00:02<00:07, 80.40it/s]
1379
  27%|██▋ | 226/850 [00:02<00:07, 83.55it/s]
1380
  28%|██▊ | 236/850 [00:02<00:07, 85.79it/s]
1381
  29%|██▉ | 245/850 [00:02<00:07, 84.04it/s]
1382
  30%|██▉ | 254/850 [00:03<00:06, 85.38it/s]
1383
  31%|███ | 263/850 [00:03<00:06, 85.08it/s]
1384
  32%|███▏ | 272/850 [00:03<00:06, 86.46it/s]
1385
  33%|███▎ | 281/850 [00:03<00:06, 87.20it/s]
1386
  34%|███▍ | 290/850 [00:03<00:06, 87.65it/s]
1387
  35%|███▌ | 299/850 [00:03<00:06, 85.40it/s]
1388
  36%|███▌ | 308/850 [00:03<00:06, 85.40it/s]
1389
  37%|███▋ | 318/850 [00:03<00:06, 87.55it/s]
1390
  38%|███▊ | 327/850 [00:03<00:06, 85.57it/s]
1391
  40%|███▉ | 336/850 [00:04<00:05, 86.18it/s]
1392
  41%|████ | 345/850 [00:04<00:05, 85.61it/s]
1393
  42%|████▏ | 355/850 [00:04<00:05, 87.78it/s]
1394
  43%|████▎ | 364/850 [00:04<00:05, 84.15it/s]
1395
  44%|████▍ | 373/850 [00:04<00:05, 84.84it/s]
1396
  45%|████▍ | 382/850 [00:04<00:05, 80.74it/s]
1397
  46%|████▌ | 391/850 [00:04<00:05, 80.97it/s]
1398
  47%|████▋ | 400/850 [00:04<00:05, 78.77it/s]
1399
  48%|████▊ | 410/850 [00:04<00:05, 83.43it/s]
1400
  49%|████▉ | 420/850 [00:05<00:05, 85.76it/s]
1401
  51%|█████ | 430/850 [00:05<00:04, 86.88it/s]
1402
  52%|█████▏ | 439/850 [00:05<00:04, 85.98it/s]
1403
  53%|█████▎ | 448/850 [00:05<00:04, 81.98it/s]
1404
  54%|█████▍ | 457/850 [00:05<00:04, 83.43it/s]
1405
  55%|█████▍ | 467/850 [00:05<00:04, 86.56it/s]
1406
  56%|█████▌ | 477/850 [00:05<00:04, 88.16it/s]
1407
  57%|█████▋ | 486/850 [00:05<00:04, 85.36it/s]
1408
  58%|█████▊ | 496/850 [00:05<00:04, 87.52it/s]
1409
  60%|█████▉ | 506/850 [00:06<00:03, 90.01it/s]
1410
  61%|██████ | 516/850 [00:06<00:03, 89.02it/s]
1411
  62%|██████▏ | 525/850 [00:06<00:03, 88.20it/s]
1412
  63%|██████▎ | 534/850 [00:06<00:03, 85.08it/s]
1413
  64%|██████▍ | 543/850 [00:06<00:03, 83.29it/s]
1414
  65%|████���█▍ | 552/850 [00:06<00:03, 81.94it/s]
1415
  66%|██████▌ | 561/850 [00:06<00:03, 80.66it/s]
1416
  67%|██████▋ | 570/850 [00:06<00:03, 79.12it/s]
1417
  68%|██████▊ | 578/850 [00:06<00:03, 78.66it/s]
1418
  69%|██████▉ | 587/850 [00:07<00:03, 80.62it/s]
1419
  70%|███████ | 596/850 [00:07<00:03, 81.35it/s]
1420
  71%|███████ | 605/850 [00:07<00:03, 81.20it/s]
1421
  72%|███████▏ | 615/850 [00:07<00:02, 82.94it/s]
1422
  74%|███████▎ | 625/850 [00:07<00:02, 84.80it/s]
1423
  75%|███████▍ | 634/850 [00:07<00:02, 84.22it/s]
1424
  76%|███████▌ | 643/850 [00:07<00:02, 84.53it/s]
1425
  77%|███████▋ | 653/850 [00:07<00:02, 87.76it/s]
1426
  78%|███████▊ | 662/850 [00:07<00:02, 87.53it/s]
1427
  79%|███████▉ | 671/850 [00:08<00:02, 87.19it/s]
1428
  80%|████████ | 680/850 [00:08<00:01, 86.07it/s]
1429
  81%|████████ | 689/850 [00:08<00:01, 84.91it/s]
1430
  82%|████████▏ | 698/850 [00:08<00:01, 84.44it/s]
1431
  83%|████████▎ | 707/850 [00:08<00:01, 81.34it/s]
1432
  84%|████████▍ | 716/850 [00:08<00:01, 81.52it/s]
1433
  85%|████████▌ | 725/850 [00:08<00:01, 78.53it/s]
1434
  86%|████████▋ | 734/850 [00:08<00:01, 81.34it/s]
1435
  87%|████████▋ | 743/850 [00:08<00:01, 79.59it/s]
1436
  88%|████████▊ | 752/850 [00:09<00:01, 78.93it/s]
1437
  90%|████████▉ | 761/850 [00:09<00:01, 81.25it/s]
1438
  91%|█████████ | 770/850 [00:09<00:00, 81.25it/s]
1439
  92%|█████████▏| 779/850 [00:09<00:00, 80.11it/s]
1440
  93%|█████████▎| 788/850 [00:09<00:00, 78.07it/s]
1441
  94%|█████████▍| 798/850 [00:09<00:00, 82.72it/s]
1442
  95%|█████████▍| 807/850 [00:09<00:00, 81.30it/s]
1443
  96%|█████████▌| 816/850 [00:09<00:00, 80.69it/s]
1444
  97%|█████████▋| 826/850 [00:09<00:00, 82.05it/s]
1445
  98%|█████████▊| 835/850 [00:10<00:00, 82.91it/s]
1446
  99%|█████████▉| 844/850 [00:10<00:00, 83.44it/s]
1447
+ ***** eval metrics *****
1448
+ epoch = 10.0
1449
+ eval_accuracy = 0.9988
1450
+ eval_f1 = 0.9369
1451
+ eval_loss = 0.0067
1452
+ eval_precision = 0.9328
1453
+ eval_recall = 0.9409
1454
+ eval_runtime = 0:00:14.18
1455
+ eval_samples = 6798
1456
+ eval_samples_per_second = 479.317
1457
+ eval_steps_per_second = 59.932
1458
+ 08/30/2024 22:16:15 - INFO - __main__ - *** Predict ***
1459
+ [INFO|trainer.py:805] 2024-08-30 22:16:15,108 >> The following columns in the test set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: id, tokens, ner_tags. If id, tokens, ner_tags are not expected by `BertForTokenClassification.forward`, you can safely ignore this message.
1460
+ [INFO|trainer.py:3788] 2024-08-30 22:16:15,110 >>
1461
+ ***** Running Prediction *****
1462
+ [INFO|trainer.py:3790] 2024-08-30 22:16:15,110 >> Num examples = 14605
1463
+ [INFO|trainer.py:3793] 2024-08-30 22:16:15,110 >> Batch size = 8
1464
+
1465
  0%| | 0/1826 [00:00<?, ?it/s]
1466
  1%| | 10/1826 [00:00<00:18, 99.46it/s]
1467
  1%| | 20/1826 [00:00<00:22, 80.10it/s]
1468
  2%|▏ | 29/1826 [00:00<00:21, 82.82it/s]
1469
  2%|▏ | 38/1826 [00:00<00:21, 82.91it/s]
1470
  3%|▎ | 47/1826 [00:00<00:20, 85.19it/s]
1471
  3%|▎ | 56/1826 [00:00<00:20, 85.50it/s]
1472
  4%|▎ | 65/1826 [00:00<00:21, 82.13it/s]
1473
  4%|▍ | 75/1826 [00:00<00:20, 84.79it/s]
1474
  5%|▍ | 84/1826 [00:00<00:20, 86.10it/s]
1475
  5%|▌ | 93/1826 [00:01<00:19, 87.04it/s]
1476
  6%|▌ | 103/1826 [00:01<00:19, 89.92it/s]
1477
  6%|▌ | 113/1826 [00:01<00:19, 89.70it/s]
1478
  7%|▋ | 122/1826 [00:01<00:19, 86.11it/s]
1479
  7%|▋ | 131/1826 [00:01<00:19, 84.81it/s]
1480
  8%|▊ | 140/1826 [00:01<00:19, 85.70it/s]
1481
  8%|▊ | 150/1826 [00:01<00:19, 87.70it/s]
1482
  9%|▊ | 159/1826 [00:01<00:20, 82.63it/s]
1483
  9%|▉ | 168/1826 [00:01<00:20, 82.89it/s]
1484
  10%|▉ | 178/1826 [00:02<00:19, 86.63it/s]
1485
  10%|█ | 187/1826 [00:02<00:19, 86.22it/s]
1486
  11%|█ | 197/1826 [00:02<00:18, 86.83it/s]
1487
  11%|█▏ | 206/1826 [00:02<00:18, 85.81it/s]
1488
  12%|█▏ | 215/1826 [00:02<00:19, 84.24it/s]
1489
  12%|█▏ | 224/1826 [00:02<00:18, 85.72it/s]
1490
  13%|█▎ | 233/1826 [00:02<00:19, 82.32it/s]
1491
  13%|█▎ | 242/1826 [00:02<00:19, 81.78it/s]
1492
  14%|█▎ | 251/1826 [00:02<00:19, 81.73it/s]
1493
  14%|█▍ | 260/1826 [00:03<00:19, 81.82it/s]
1494
  15%|█▍ | 270/1826 [00:03<00:18, 84.77it/s]
1495
  15%|█▌ | 280/1826 [00:03<00:17, 88.25it/s]
1496
  16%|█▌ | 289/1826 [00:03<00:17, 86.31it/s]
1497
  16%|█▋ | 298/1826 [00:03<00:17, 86.81it/s]
1498
  17%|█▋ | 307/1826 [00:03<00:17, 86.16it/s]
1499
  17%|█▋ | 316/1826 [00:03<00:17, 85.47it/s]
1500
  18%|█▊ | 326/1826 [00:03<00:17, 87.31it/s]
1501
  18%|█▊ | 335/1826 [00:03<00:17, 86.45it/s]
1502
  19%|█▉ | 345/1826 [00:04<00:16, 88.96it/s]
1503
  19%|█▉ | 354/1826 [00:04<00:17, 84.12it/s]
1504
  20%|█▉ | 364/1826 [00:04<00:16, 86.21it/s]
1505
  20%|██ | 374/1826 [00:04<00:16, 87.29it/s]
1506
  21%|██ | 383/1826 [00:04<00:16, 87.07it/s]
1507
  21%|██▏ | 392/1826 [00:04<00:17, 82.64it/s]
1508
  22%|██▏ | 401/1826 [00:04<00:16, 84.60it/s]
1509
  22%|██▏ | 410/1826 [00:04<00:16, 85.41it/s]
1510
  23%|██▎ | 419/1826 [00:04<00:16, 84.65it/s]
1511
  23%|██▎ | 428/1826 [00:05<00:16, 83.56it/s]
1512
  24%|██▍ | 437/1826 [00:05<00:16, 84.51it/s]
1513
  24%|██▍ | 446/1826 [00:05<00:16, 84.92it/s]
1514
  25%|██▍ | 455/1826 [00:05<00:15, 85.69it/s]
1515
  25%|██▌ | 465/1826 [00:05<00:15, 88.89it/s]
1516
  26%|██▌ | 474/1826 [00:05<00:16, 84.46it/s]
1517
  26%|██▋ | 483/1826 [00:05<00:15, 84.49it/s]
1518
  27%|██▋ | 492/1826 [00:05<00:16, 82.42it/s]
1519
  27%|██▋ | 502/1826 [00:05<00:15, 84.22it/s]
1520
  28%|██▊ | 512/1826 [00:05<00:15, 86.43it/s]
1521
  29%|██▊ | 522/1826 [00:06<00:14, 88.67it/s]
1522
  29%|██▉ | 531/1826 [00:06<00:14, 86.95it/s]
1523
  30%|██▉ | 540/1826 [00:06<00:14, 87.11it/s]
1524
  30%|███ | 549/1826 [00:06<00:14, 85.25it/s]
1525
  31%|███ | 558/1826 [00:06<00:15, 83.01it/s]
1526
  31%|███ | 568/1826 [00:06<00:14, 85.14it/s]
1527
  32%|███▏ | 577/1826 [00:06<00:14, 84.98it/s]
1528
  32%|███▏ | 586/1826 [00:06<00:15, 78.67it/s]
1529
  33%|███▎ | 595/1826 [00:06<00:15, 80.71it/s]
1530
  33%|███▎ | 604/1826 [00:07<00:14, 82.23it/s]
1531
  34%|███▎ | 613/1826 [00:07<00:14, 81.40it/s]
1532
  34%|███▍ | 622/1826 [00:07<00:14, 82.83it/s]
1533
  35%|███▍ | 632/1826 [00:07<00:13, 86.62it/s]
1534
  35%|███▌ | 642/1826 [00:07<00:13, 87.79it/s]
1535
  36%|███▌ | 651/1826 [00:07<00:13, 84.88it/s]
1536
  36%|███▌ | 660/1826 [00:07<00:13, 86.28it/s]
1537
  37%|███▋ | 670/1826 [00:07<00:13, 87.60it/s]
1538
  37%|███▋ | 679/1826 [00:07<00:13, 87.43it/s]
1539
  38%|███▊ | 689/1826 [00:08<00:12, 88.58it/s]
1540
  38%|███▊ | 699/1826 [00:08<00:12, 90.30it/s]
1541
  39%|███▉ | 709/1826 [00:08<00:12, 91.17it/s]
1542
  39%|███▉ | 719/1826 [00:08<00:11, 92.57it/s]
1543
  40%|███▉ | 729/1826 [00:08<00:11, 93.00it/s]
1544
  40%|████ | 739/1826 [00:08<00:12, 90.46it/s]
1545
  41%|████ | 749/1826 [00:08<00:11, 91.75it/s]
1546
  42%|████▏ | 759/1826 [00:08<00:11, 91.80it/s]
1547
  42%|████▏ | 769/1826 [00:08<00:11, 93.48it/s]
1548
  43%|████▎ | 779/1826 [00:09<00:11, 90.59it/s]
1549
  43%|████▎ | 789/1826 [00:09<00:11, 89.82it/s]
1550
  44%|████▍ | 799/1826 [00:09<00:11, 88.45it/s]
1551
  44%|████▍ | 809/1826 [00:09<00:11, 88.97it/s]
1552
  45%|████▍ | 819/1826 [00:09<00:11, 89.58it/s]
1553
  45%|████▌ | 829/1826 [00:09<00:10, 90.95it/s]
1554
  46%|████▌ | 839/1826 [00:09<00:11, 87.82it/s]
1555
  46%|████▋ | 849/1826 [00:09<00:10, 90.77it/s]
1556
  47%|████▋ | 859/1826 [00:09<00:10, 93.15it/s]
1557
  48%|████▊ | 869/1826 [00:10<00:10, 90.43it/s]
1558
  48%|████▊ | 879/1826 [00:10<00:10, 89.07it/s]
1559
  49%|████▊ | 888/1826 [00:10<00:10, 88.03it/s]
1560
  49%|████▉ | 898/1826 [00:10<00:10, 89.92it/s]
1561
  50%|████▉ | 908/1826 [00:10<00:10, 91.01it/s]
1562
  50%|█████ | 918/1826 [00:10<00:09, 91.51it/s]
1563
  51%|█████ | 928/1826 [00:10<00:09, 91.40it/s]
1564
  51%|█████▏ | 938/1826 [00:10<00:10, 88.19it/s]
1565
  52%|█████▏ | 947/1826 [00:10<00:10, 84.32it/s]
1566
  52%|█████▏ | 956/1826 [00:11<00:10, 85.23it/s]
1567
  53%|█████▎ | 966/1826 [00:11<00:09, 86.86it/s]
1568
  53%|█████▎ | 975/1826 [00:11<00:09, 85.98it/s]
1569
  54%|█████▍ | 985/1826 [00:11<00:09, 87.66it/s]
1570
  54%|█████▍ | 994/1826 [00:11<00:09, 85.02it/s]
1571
  55%|█████▍ | 1003/1826 [00:11<00:09, 83.12it/s]
1572
  55%|█████▌ | 1012/1826 [00:11<00:09, 84.61it/s]
1573
  56%|█████▌ | 1022/1826 [00:11<00:09, 88.01it/s]
1574
  57%|█████▋ | 1032/1826 [00:11<00:08, 90.28it/s]
1575
  57%|█████▋ | 1042/1826 [00:12<00:09, 86.92it/s]
1576
  58%|█████▊ | 1051/1826 [00:12<00:09, 85.51it/s]
1577
  58%|█████▊ | 1060/1826 [00:12<00:08, 86.69it/s]
1578
  59%|█████▊ | 1070/1826 [00:12<00:08, 89.80it/s]
1579
  59%|█████▉ | 1080/1826 [00:12<00:08, 91.79it/s]
1580
  60%|█████▉ | 1090/1826 [00:12<00:07, 92.06it/s]
1581
  60%|██████ | 1100/1826 [00:12<00:07, 92.64it/s]
1582
  61%|██████ | 1110/1826 [00:12<00:08, 89.48it/s]
1583
  61%|██████▏ | 1119/1826 [00:12<00:08, 87.58it/s]
1584
  62%|██████▏ | 1129/1826 [00:13<00:07, 89.36it/s]
1585
  62%|██████▏ | 1138/1826 [00:13<00:07, 88.55it/s]
1586
  63%|██████▎ | 1147/1826 [00:13<00:07, 87.81it/s]
1587
  63%|██████▎ | 1156/1826 [00:13<00:07, 87.34it/s]
1588
  64%|██████▍ | 1165/1826 [00:13<00:07, 83.98it/s]
1589
  64%|██████▍ | 1174/1826 [00:13<00:07, 84.35it/s]
1590
  65%|██████▍ | 1183/1826 [00:13<00:07, 82.32it/s]
1591
  65%|██████▌ | 1193/1826 [00:13<00:07, 84.81it/s]
1592
  66%|██████▌ | 1202/1826 [00:13<00:07, 85.14it/s]
1593
  66%|██████▋ | 1211/1826 [00:13<00:07, 83.20it/s]
1594
  67%|██████▋ | 1220/1826 [00:14<00:07, 81.78it/s]
1595
  67%|██████▋ | 1230/1826 [00:14<00:06, 85.50it/s]
1596
  68%|██████▊ | 1240/1826 [00:14<00:06, 86.53it/s]
1597
  68%|██████▊ | 1250/1826 [00:14<00:06, 85.10it/s]
1598
  69%|██████▉ | 1260/1826 [00:14<00:06, 86.63it/s]
1599
  69%|██████▉ | 1269/1826 [00:14<00:06, 87.13it/s]
1600
  70%|██████▉ | 1278/1826 [00:14<00:06, 87.73it/s]
1601
  70%|███████ | 1287/1826 [00:14<00:06, 88.07it/s]
1602
  71%|███████ | 1297/1826 [00:14<00:05, 90.99it/s]
1603
  72%|███████▏ | 1307/1826 [00:15<00:05, 91.31it/s]
1604
  72%|███████▏ | 1317/1826 [00:15<00:05, 93.37it/s]
1605
  73%|███████▎ | 1327/1826 [00:15<00:05, 92.71it/s]
1606
  73%|███████▎ | 1337/1826 [00:15<00:05, 88.69it/s]
1607
  74%|███████▎ | 1346/1826 [00:15<00:05, 85.16it/s]
1608
  74%|███████▍ | 1355/1826 [00:15<00:05, 86.29it/s]
1609
  75%|███████▍ | 1364/1826 [00:15<00:05, 87.05it/s]
1610
  75%|███████▌ | 1373/1826 [00:15<00:05, 85.50it/s]
1611
  76%|███████▌ | 1383/1826 [00:15<00:05, 88.26it/s]
1612
  76%|███████▋ | 1393/1826 [00:16<00:04, 89.45it/s]
1613
  77%|███████▋ | 1403/1826 [00:16<00:04, 91.42it/s]
1614
  77%|███████▋ | 1413/1826 [00:16<00:04, 92.32it/s]
1615
  78%|███████▊ | 1423/1826 [00:16<00:04, 89.99it/s]
1616
  78%|███████▊ | 1433/1826 [00:16<00:04, 90.52it/s]
1617
  79%|███████▉ | 1443/1826 [00:16<00:04, 85.95it/s]
1618
  80%|███████▉ | 1453/1826 [00:16<00:04, 88.09it/s]
1619
  80%|████████ | 1462/1826 [00:16<00:04, 88.33it/s]
1620
  81%|████████ | 1471/1826 [00:16<00:04, 84.47it/s]
1621
  81%|████████ | 1480/1826 [00:17<00:04, 84.58it/s]
1622
  82%|████████▏ | 1489/1826 [00:17<00:04, 80.66it/s]
1623
  82%|████████▏ | 1498/1826 [00:17<00:04, 81.96it/s]
1624
  83%|████████▎ | 1507/1826 [00:17<00:03, 82.12it/s]
1625
  83%|████████▎ | 1517/1826 [00:17<00:03, 84.73it/s]
1626
  84%|████████▎ | 1526/1826 [00:17<00:03, 82.53it/s]
1627
  84%|████████▍ | 1535/1826 [00:17<00:03, 81.76it/s]
1628
  85%|████████▍ | 1544/1826 [00:17<00:03, 82.99it/s]
1629
  85%|████████▌ | 1553/1826 [00:17<00:03, 83.81it/s]
1630
  86%|████████▌ | 1562/1826 [00:18<00:03, 82.96it/s]
1631
  86%|████████▌ | 1571/1826 [00:18<00:03, 81.80it/s]
1632
  87%|████████▋ | 1580/1826 [00:18<00:02, 83.72it/s]
1633
  87%|████████▋ | 1589/1826 [00:18<00:02, 82.90it/s]
1634
  88%|████████▊ | 1598/1826 [00:18<00:02, 84.03it/s]
1635
  88%|████████▊ | 1608/1826 [00:18<00:02, 86.25it/s]
1636
  89%|████████▊ | 1617/1826 [00:18<00:02, 85.18it/s]
1637
  89%|████████▉ | 1626/1826 [00:18<00:02, 77.48it/s]
1638
  90%|████████▉ | 1636/1826 [00:18<00:02, 80.59it/s]
1639
  90%|█████████ | 1645/1826 [00:19<00:02, 82.51it/s]
1640
  91%|█████████ | 1655/1826 [00:19<00:02, 85.30it/s]
1641
  91%|█████████ | 1664/1826 [00:19<00:01, 84.48it/s]
1642
  92%|█████████▏| 1673/1826 [00:19<00:01, 85.56it/s]
1643
  92%|█████████▏| 1683/1826 [00:19<00:01, 88.32it/s]
1644
  93%|█████████▎| 1692/1826 [00:19<00:01, 86.33it/s]
1645
  93%|█████████▎| 1701/1826 [00:19<00:01, 85.33it/s]
1646
  94%|█████████▎| 1711/1826 [00:19<00:01, 87.29it/s]
1647
  94%|█████████▍| 1720/1826 [00:19<00:01, 84.39it/s]
1648
  95%|█████████▍| 1730/1826 [00:20<00:01, 87.06it/s]
1649
  95%|█████████▌| 1740/1826 [00:20<00:00, 87.99it/s]
1650
  96%|█████████▌| 1750/1826 [00:20<00:00, 88.28it/s]
1651
  96%|█████████▋| 1759/1826 [00:20<00:00, 87.89it/s]
1652
  97%|█████████▋| 1768/1826 [00:20<00:00, 87.33it/s]
1653
  97%|█████████▋| 1777/1826 [00:20<00:00, 88.09it/s]
1654
  98%|█████████▊| 1787/1826 [00:20<00:00, 88.65it/s]
1655
  98%|█████████▊| 1796/1826 [00:20<00:00, 83.22it/s]
1656
  99%|█████████▉| 1806/1826 [00:20<00:00, 86.07it/s]
1657
  99%|█████████▉| 1815/1826 [00:20<00:00, 86.85it/s]
1658
+ [INFO|trainer.py:3478] 2024-08-30 22:16:43,468 >> Saving model checkpoint to /content/dissertation/scripts/ner/output
1659
+ [INFO|configuration_utils.py:472] 2024-08-30 22:16:43,469 >> Configuration saved in /content/dissertation/scripts/ner/output/config.json
1660
+ [INFO|modeling_utils.py:2690] 2024-08-30 22:16:44,625 >> Model weights saved in /content/dissertation/scripts/ner/output/model.safetensors
1661
+ [INFO|tokenization_utils_base.py:2574] 2024-08-30 22:16:44,626 >> tokenizer config file saved in /content/dissertation/scripts/ner/output/tokenizer_config.json
1662
+ [INFO|tokenization_utils_base.py:2583] 2024-08-30 22:16:44,627 >> Special tokens file saved in /content/dissertation/scripts/ner/output/special_tokens_map.json
1663
+ ***** predict metrics *****
1664
+ predict_accuracy = 0.9981
1665
+ predict_f1 = 0.8966
1666
+ predict_loss = 0.0107
1667
+ predict_precision = 0.8769
1668
+ predict_recall = 0.9171
1669
+ predict_runtime = 0:00:27.70
1670
+ predict_samples_per_second = 527.095
1671
+ predict_steps_per_second = 65.9
1672
+
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 10.0,
3
- "total_flos": 1.0996932656642544e+16,
4
- "train_loss": 0.003382195293697344,
5
- "train_runtime": 1039.0596,
6
- "train_samples": 27768,
7
- "train_samples_per_second": 267.242,
8
- "train_steps_per_second": 4.177
9
  }
 
1
  {
2
  "epoch": 10.0,
3
+ "total_flos": 1.4262694978690116e+16,
4
+ "train_loss": 0.0022696754537961062,
5
+ "train_runtime": 1261.5031,
6
+ "train_samples": 27198,
7
+ "train_samples_per_second": 215.6,
8
+ "train_steps_per_second": 3.369
9
  }
trainer_state.json CHANGED
@@ -1,201 +1,201 @@
1
  {
2
- "best_metric": 0.9314045730284647,
3
- "best_model_checkpoint": "/content/dissertation/scripts/ner/output/checkpoint-3038",
4
  "epoch": 10.0,
5
  "eval_steps": 500,
6
- "global_step": 4340,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_accuracy": 0.9980813775011861,
14
- "eval_f1": 0.8937558247903076,
15
- "eval_loss": 0.005673006176948547,
16
- "eval_precision": 0.8937558247903076,
17
- "eval_recall": 0.8937558247903076,
18
- "eval_runtime": 13.5502,
19
- "eval_samples_per_second": 512.611,
20
- "eval_steps_per_second": 64.132,
21
- "step": 434
22
- },
23
- {
24
- "epoch": 1.1520737327188941,
25
- "grad_norm": 0.324382483959198,
26
- "learning_rate": 4.423963133640553e-05,
27
- "loss": 0.0182,
28
  "step": 500
29
  },
30
  {
31
  "epoch": 2.0,
32
- "eval_accuracy": 0.9985418469009014,
33
- "eval_f1": 0.9160165213400643,
34
- "eval_loss": 0.004419892560690641,
35
- "eval_precision": 0.9023508137432188,
36
- "eval_recall": 0.9301025163094129,
37
- "eval_runtime": 13.4449,
38
- "eval_samples_per_second": 516.626,
39
- "eval_steps_per_second": 64.634,
40
- "step": 868
41
- },
42
- {
43
- "epoch": 2.3041474654377883,
44
- "grad_norm": 0.0693276971578598,
45
- "learning_rate": 3.847926267281106e-05,
46
- "loss": 0.0039,
47
  "step": 1000
48
  },
49
  {
50
  "epoch": 3.0,
51
- "eval_accuracy": 0.9986534758462869,
52
- "eval_f1": 0.9205175600739371,
53
- "eval_loss": 0.0044819144532084465,
54
- "eval_precision": 0.9129239230064161,
55
- "eval_recall": 0.9282385834109972,
56
- "eval_runtime": 13.422,
57
- "eval_samples_per_second": 517.51,
58
- "eval_steps_per_second": 64.745,
59
- "step": 1302
60
- },
61
- {
62
- "epoch": 3.456221198156682,
63
- "grad_norm": 0.06998981535434723,
64
- "learning_rate": 3.271889400921659e-05,
65
- "loss": 0.0024,
66
  "step": 1500
67
  },
68
  {
69
  "epoch": 4.0,
70
- "eval_accuracy": 0.9983255658192169,
71
- "eval_f1": 0.9076923076923078,
72
- "eval_loss": 0.005129755008965731,
73
- "eval_precision": 0.882145998240985,
74
- "eval_recall": 0.934762348555452,
75
- "eval_runtime": 13.6729,
76
- "eval_samples_per_second": 508.013,
77
- "eval_steps_per_second": 63.556,
78
- "step": 1736
79
- },
80
- {
81
- "epoch": 4.6082949308755765,
82
- "grad_norm": 0.0027600331231951714,
83
- "learning_rate": 2.6958525345622122e-05,
84
- "loss": 0.0017,
85
  "step": 2000
86
  },
87
  {
88
  "epoch": 5.0,
89
- "eval_accuracy": 0.9986464990372004,
90
- "eval_f1": 0.9285051067780873,
91
- "eval_loss": 0.0056963409297168255,
92
- "eval_precision": 0.9250693802035153,
93
- "eval_recall": 0.9319664492078286,
94
- "eval_runtime": 13.4387,
95
- "eval_samples_per_second": 516.864,
96
- "eval_steps_per_second": 64.664,
97
- "step": 2170
98
- },
99
- {
100
- "epoch": 5.76036866359447,
101
- "grad_norm": 0.018086101859807968,
102
- "learning_rate": 2.1198156682027652e-05,
103
- "loss": 0.0012,
104
  "step": 2500
105
  },
106
  {
107
  "epoch": 6.0,
108
- "eval_accuracy": 0.9984023107191695,
109
- "eval_f1": 0.9116835326586937,
110
- "eval_loss": 0.006062328349798918,
111
- "eval_precision": 0.9000908265213442,
112
- "eval_recall": 0.923578751164958,
113
- "eval_runtime": 13.3783,
114
- "eval_samples_per_second": 519.199,
115
- "eval_steps_per_second": 64.956,
116
- "step": 2604
117
- },
118
- {
119
- "epoch": 6.912442396313364,
120
- "grad_norm": 0.009607589803636074,
121
- "learning_rate": 1.543778801843318e-05,
122
- "loss": 0.0009,
123
- "step": 3000
124
  },
125
  {
126
  "epoch": 7.0,
127
- "eval_accuracy": 0.9986953367008066,
128
- "eval_f1": 0.9314045730284647,
129
- "eval_loss": 0.005624314770102501,
130
- "eval_precision": 0.9327102803738317,
131
- "eval_recall": 0.9301025163094129,
132
- "eval_runtime": 13.399,
133
- "eval_samples_per_second": 518.397,
134
- "eval_steps_per_second": 64.856,
135
- "step": 3038
 
 
 
 
 
 
 
136
  },
137
  {
138
  "epoch": 8.0,
139
- "eval_accuracy": 0.9985837077554209,
140
- "eval_f1": 0.9231477220432582,
141
- "eval_loss": 0.006831143982708454,
142
- "eval_precision": 0.9118181818181819,
143
- "eval_recall": 0.934762348555452,
144
- "eval_runtime": 13.3718,
145
- "eval_samples_per_second": 519.45,
146
- "eval_steps_per_second": 64.987,
147
- "step": 3472
148
- },
149
- {
150
- "epoch": 8.064516129032258,
151
- "grad_norm": 0.0005392630700953305,
152
- "learning_rate": 9.67741935483871e-06,
153
- "loss": 0.0006,
154
  "step": 3500
155
  },
156
  {
157
  "epoch": 9.0,
158
- "eval_accuracy": 0.9986813830826333,
159
- "eval_f1": 0.9288702928870293,
160
- "eval_loss": 0.0071532572619616985,
161
- "eval_precision": 0.9267161410018553,
162
- "eval_recall": 0.9310344827586207,
163
- "eval_runtime": 13.5255,
164
- "eval_samples_per_second": 513.548,
165
- "eval_steps_per_second": 64.249,
166
- "step": 3906
167
- },
168
- {
169
- "epoch": 9.216589861751151,
170
- "grad_norm": 0.0045371875166893005,
171
- "learning_rate": 3.9170506912442395e-06,
172
- "loss": 0.0004,
173
  "step": 4000
174
  },
175
  {
176
  "epoch": 10.0,
177
- "eval_accuracy": 0.998618591800854,
178
- "eval_f1": 0.9259944495837187,
179
- "eval_loss": 0.007294897455722094,
180
- "eval_precision": 0.9191919191919192,
181
- "eval_recall": 0.9328984156570364,
182
- "eval_runtime": 13.8041,
183
- "eval_samples_per_second": 503.184,
184
- "eval_steps_per_second": 62.952,
185
- "step": 4340
186
  },
187
  {
188
  "epoch": 10.0,
189
- "step": 4340,
190
- "total_flos": 1.0996932656642544e+16,
191
- "train_loss": 0.003382195293697344,
192
- "train_runtime": 1039.0596,
193
- "train_samples_per_second": 267.242,
194
- "train_steps_per_second": 4.177
195
  }
196
  ],
197
  "logging_steps": 500,
198
- "max_steps": 4340,
199
  "num_input_tokens_seen": 0,
200
  "num_train_epochs": 10,
201
  "save_steps": 500,
@@ -211,7 +211,7 @@
211
  "attributes": {}
212
  }
213
  },
214
- "total_flos": 1.0996932656642544e+16,
215
  "train_batch_size": 32,
216
  "trial_name": null,
217
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.936867469879518,
3
+ "best_model_checkpoint": "/content/dissertation/scripts/ner/output/checkpoint-4250",
4
  "epoch": 10.0,
5
  "eval_steps": 500,
6
+ "global_step": 4250,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_accuracy": 0.9981303557517528,
14
+ "eval_f1": 0.8939962476547841,
15
+ "eval_loss": 0.00556989898905158,
16
+ "eval_precision": 0.8671519563239308,
17
+ "eval_recall": 0.9225556631171346,
18
+ "eval_runtime": 14.25,
19
+ "eval_samples_per_second": 477.051,
20
+ "eval_steps_per_second": 59.649,
21
+ "step": 425
22
+ },
23
+ {
24
+ "epoch": 1.1764705882352942,
25
+ "grad_norm": 0.1311497986316681,
26
+ "learning_rate": 4.411764705882353e-05,
27
+ "loss": 0.0104,
28
  "step": 500
29
  },
30
  {
31
  "epoch": 2.0,
32
+ "eval_accuracy": 0.9985782913528953,
33
+ "eval_f1": 0.9216722729456991,
34
+ "eval_loss": 0.0041933078318834305,
35
+ "eval_precision": 0.9150763358778626,
36
+ "eval_recall": 0.9283639883833494,
37
+ "eval_runtime": 14.1751,
38
+ "eval_samples_per_second": 479.575,
39
+ "eval_steps_per_second": 59.965,
40
+ "step": 850
41
+ },
42
+ {
43
+ "epoch": 2.3529411764705883,
44
+ "grad_norm": 0.002636878052726388,
45
+ "learning_rate": 3.8235294117647055e-05,
46
+ "loss": 0.0034,
47
  "step": 1000
48
  },
49
  {
50
  "epoch": 3.0,
51
+ "eval_accuracy": 0.9985133731498312,
52
+ "eval_f1": 0.9155339805825242,
53
+ "eval_loss": 0.004266700241714716,
54
+ "eval_precision": 0.9182083739045764,
55
+ "eval_recall": 0.9128751210067764,
56
+ "eval_runtime": 14.285,
57
+ "eval_samples_per_second": 475.883,
58
+ "eval_steps_per_second": 59.503,
59
+ "step": 1275
60
+ },
61
+ {
62
+ "epoch": 3.5294117647058822,
63
+ "grad_norm": 0.08669757843017578,
64
+ "learning_rate": 3.235294117647059e-05,
65
+ "loss": 0.0022,
66
  "step": 1500
67
  },
68
  {
69
  "epoch": 4.0,
70
+ "eval_accuracy": 0.9985847831732018,
71
+ "eval_f1": 0.9250367466927977,
72
+ "eval_loss": 0.0043651387095451355,
73
+ "eval_precision": 0.9365079365079365,
74
+ "eval_recall": 0.9138431752178122,
75
+ "eval_runtime": 14.5173,
76
+ "eval_samples_per_second": 468.27,
77
+ "eval_steps_per_second": 58.551,
78
+ "step": 1700
79
+ },
80
+ {
81
+ "epoch": 4.705882352941177,
82
+ "grad_norm": 0.27693310379981995,
83
+ "learning_rate": 2.647058823529412e-05,
84
+ "loss": 0.0012,
85
  "step": 2000
86
  },
87
  {
88
  "epoch": 5.0,
89
+ "eval_accuracy": 0.9985393404310569,
90
+ "eval_f1": 0.919463087248322,
91
+ "eval_loss": 0.006118799094110727,
92
+ "eval_precision": 0.9107312440645774,
93
+ "eval_recall": 0.9283639883833494,
94
+ "eval_runtime": 14.2824,
95
+ "eval_samples_per_second": 475.97,
96
+ "eval_steps_per_second": 59.514,
97
+ "step": 2125
98
+ },
99
+ {
100
+ "epoch": 5.882352941176471,
101
+ "grad_norm": 0.008007431402802467,
102
+ "learning_rate": 2.058823529411765e-05,
103
+ "loss": 0.0009,
104
  "step": 2500
105
  },
106
  {
107
  "epoch": 6.0,
108
+ "eval_accuracy": 0.9986626850168787,
109
+ "eval_f1": 0.9221213569039655,
110
+ "eval_loss": 0.005954863503575325,
111
+ "eval_precision": 0.910377358490566,
112
+ "eval_recall": 0.9341723136495643,
113
+ "eval_runtime": 14.2494,
114
+ "eval_samples_per_second": 477.072,
115
+ "eval_steps_per_second": 59.652,
116
+ "step": 2550
 
 
 
 
 
 
 
117
  },
118
  {
119
  "epoch": 7.0,
120
+ "eval_accuracy": 0.9986691768371851,
121
+ "eval_f1": 0.9314148681055155,
122
+ "eval_loss": 0.006543714087456465,
123
+ "eval_precision": 0.9230038022813688,
124
+ "eval_recall": 0.9399806389157793,
125
+ "eval_runtime": 14.386,
126
+ "eval_samples_per_second": 472.542,
127
+ "eval_steps_per_second": 59.085,
128
+ "step": 2975
129
+ },
130
+ {
131
+ "epoch": 7.0588235294117645,
132
+ "grad_norm": 0.0017305670771747828,
133
+ "learning_rate": 1.4705882352941177e-05,
134
+ "loss": 0.0005,
135
+ "step": 3000
136
  },
137
  {
138
  "epoch": 8.0,
139
+ "eval_accuracy": 0.9986886522981044,
140
+ "eval_f1": 0.9280540801545147,
141
+ "eval_loss": 0.005883762612938881,
142
+ "eval_precision": 0.9258188824662813,
143
+ "eval_recall": 0.9303000968054211,
144
+ "eval_runtime": 14.3169,
145
+ "eval_samples_per_second": 474.822,
146
+ "eval_steps_per_second": 59.37,
147
+ "step": 3400
148
+ },
149
+ {
150
+ "epoch": 8.235294117647058,
151
+ "grad_norm": 0.00020609228522516787,
152
+ "learning_rate": 8.823529411764707e-06,
153
+ "loss": 0.0004,
154
  "step": 3500
155
  },
156
  {
157
  "epoch": 9.0,
158
+ "eval_accuracy": 0.9987276032199429,
159
+ "eval_f1": 0.9317307692307693,
160
+ "eval_loss": 0.00656876852735877,
161
+ "eval_precision": 0.9255014326647565,
162
+ "eval_recall": 0.9380445304937076,
163
+ "eval_runtime": 14.5715,
164
+ "eval_samples_per_second": 466.526,
165
+ "eval_steps_per_second": 58.333,
166
+ "step": 3825
167
+ },
168
+ {
169
+ "epoch": 9.411764705882353,
170
+ "grad_norm": 0.00026785818045027554,
171
+ "learning_rate": 2.9411764705882355e-06,
172
+ "loss": 0.0001,
173
  "step": 4000
174
  },
175
  {
176
  "epoch": 10.0,
177
+ "eval_accuracy": 0.9988184887042326,
178
+ "eval_f1": 0.936867469879518,
179
+ "eval_loss": 0.006724909413605928,
180
+ "eval_precision": 0.9328214971209213,
181
+ "eval_recall": 0.9409486931268151,
182
+ "eval_runtime": 14.3451,
183
+ "eval_samples_per_second": 473.891,
184
+ "eval_steps_per_second": 59.254,
185
+ "step": 4250
186
  },
187
  {
188
  "epoch": 10.0,
189
+ "step": 4250,
190
+ "total_flos": 1.4262694978690116e+16,
191
+ "train_loss": 0.0022696754537961062,
192
+ "train_runtime": 1261.5031,
193
+ "train_samples_per_second": 215.6,
194
+ "train_steps_per_second": 3.369
195
  }
196
  ],
197
  "logging_steps": 500,
198
+ "max_steps": 4250,
199
  "num_input_tokens_seen": 0,
200
  "num_train_epochs": 10,
201
  "save_steps": 500,
 
211
  "attributes": {}
212
  }
213
  },
214
+ "total_flos": 1.4262694978690116e+16,
215
  "train_batch_size": 32,
216
  "trial_name": null,
217
  "trial_params": null