Rodrigo1771 commited on
Commit
5f87f70
1 Parent(s): cbf661b

End of training

Browse files
README.md CHANGED
@@ -2,9 +2,10 @@
2
  license: apache-2.0
3
  base_model: PlanTL-GOB-ES/bsc-bio-ehr-es
4
  tags:
 
5
  - generated_from_trainer
6
  datasets:
7
- - drugtemist-ner
8
  metrics:
9
  - precision
10
  - recall
@@ -17,24 +18,24 @@ model-index:
17
  name: Token Classification
18
  type: token-classification
19
  dataset:
20
- name: drugtemist-ner
21
- type: drugtemist-ner
22
  config: DrugTEMIST NER
23
  split: validation
24
  args: DrugTEMIST NER
25
  metrics:
26
  - name: Precision
27
  type: precision
28
- value: 0.9437386569872959
29
  - name: Recall
30
  type: recall
31
- value: 0.9558823529411765
32
  - name: F1
33
  type: f1
34
- value: 0.949771689497717
35
  - name: Accuracy
36
  type: accuracy
37
- value: 0.9990250667691949
38
  ---
39
 
40
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -42,12 +43,12 @@ should probably proofread and complete it, then remove this comment. -->
42
 
43
  # output
44
 
45
- This model is a fine-tuned version of [PlanTL-GOB-ES/bsc-bio-ehr-es](https://huggingface.co/PlanTL-GOB-ES/bsc-bio-ehr-es) on the drugtemist-ner dataset.
46
  It achieves the following results on the evaluation set:
47
- - Loss: 0.0065
48
- - Precision: 0.9437
49
- - Recall: 0.9559
50
- - F1: 0.9498
51
  - Accuracy: 0.9990
52
 
53
  ## Model description
 
2
  license: apache-2.0
3
  base_model: PlanTL-GOB-ES/bsc-bio-ehr-es
4
  tags:
5
+ - token-classification
6
  - generated_from_trainer
7
  datasets:
8
+ - Rodrigo1771/drugtemist-ner
9
  metrics:
10
  - precision
11
  - recall
 
18
  name: Token Classification
19
  type: token-classification
20
  dataset:
21
+ name: Rodrigo1771/drugtemist-ner
22
+ type: Rodrigo1771/drugtemist-ner
23
  config: DrugTEMIST NER
24
  split: validation
25
  args: DrugTEMIST NER
26
  metrics:
27
  - name: Precision
28
  type: precision
29
+ value: 0.9430379746835443
30
  - name: Recall
31
  type: recall
32
+ value: 0.9586397058823529
33
  - name: F1
34
  type: f1
35
+ value: 0.95077484047402
36
  - name: Accuracy
37
  type: accuracy
38
+ value: 0.9990319324961724
39
  ---
40
 
41
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
43
 
44
  # output
45
 
46
+ This model is a fine-tuned version of [PlanTL-GOB-ES/bsc-bio-ehr-es](https://huggingface.co/PlanTL-GOB-ES/bsc-bio-ehr-es) on the Rodrigo1771/drugtemist-ner dataset.
47
  It achieves the following results on the evaluation set:
48
+ - Loss: 0.0058
49
+ - Precision: 0.9430
50
+ - Recall: 0.9586
51
+ - F1: 0.9508
52
  - Accuracy: 0.9990
53
 
54
  ## Model description
all_results.json CHANGED
@@ -1,19 +1,26 @@
1
  {
2
- "eval_accuracy": 0.028794858943639246,
3
- "eval_f1": 0.00559299062744574,
4
- "eval_loss": 1.2929713726043701,
5
- "eval_precision": 0.0028258395540381536,
6
- "eval_recall": 0.2693014705882353,
7
- "eval_runtime": 16.5731,
 
8
  "eval_samples": 6807,
9
- "eval_samples_per_second": 410.727,
10
- "eval_steps_per_second": 51.348,
11
- "predict_accuracy": 0.028794858943639246,
12
- "predict_f1": 0.00559299062744574,
13
- "predict_loss": 1.2929713726043701,
14
- "predict_precision": 0.0028258395540381536,
15
- "predict_recall": 0.2693014705882353,
16
- "predict_runtime": 15.7694,
17
- "predict_samples_per_second": 431.659,
18
- "predict_steps_per_second": 53.965
 
 
 
 
 
 
19
  }
 
1
  {
2
+ "epoch": 9.997061416397296,
3
+ "eval_accuracy": 0.9990319324961724,
4
+ "eval_f1": 0.95077484047402,
5
+ "eval_loss": 0.0058256350457668304,
6
+ "eval_precision": 0.9430379746835443,
7
+ "eval_recall": 0.9586397058823529,
8
+ "eval_runtime": 15.6142,
9
  "eval_samples": 6807,
10
+ "eval_samples_per_second": 435.949,
11
+ "eval_steps_per_second": 54.502,
12
+ "predict_accuracy": 0.9990319324961724,
13
+ "predict_f1": 0.95077484047402,
14
+ "predict_loss": 0.0058256350457668304,
15
+ "predict_precision": 0.9430379746835443,
16
+ "predict_recall": 0.9586397058823529,
17
+ "predict_runtime": 15.6972,
18
+ "predict_samples_per_second": 433.645,
19
+ "predict_steps_per_second": 54.214,
20
+ "total_flos": 6700358578825584.0,
21
+ "train_loss": 0.0023016003105682196,
22
+ "train_runtime": 3387.0101,
23
+ "train_samples": 27224,
24
+ "train_samples_per_second": 80.378,
25
+ "train_steps_per_second": 5.022
26
  }
eval_results.json CHANGED
@@ -1,11 +1,12 @@
1
  {
2
- "eval_accuracy": 0.028794858943639246,
3
- "eval_f1": 0.00559299062744574,
4
- "eval_loss": 1.2929713726043701,
5
- "eval_precision": 0.0028258395540381536,
6
- "eval_recall": 0.2693014705882353,
7
- "eval_runtime": 16.5731,
 
8
  "eval_samples": 6807,
9
- "eval_samples_per_second": 410.727,
10
- "eval_steps_per_second": 51.348
11
  }
 
1
  {
2
+ "epoch": 9.997061416397296,
3
+ "eval_accuracy": 0.9990319324961724,
4
+ "eval_f1": 0.95077484047402,
5
+ "eval_loss": 0.0058256350457668304,
6
+ "eval_precision": 0.9430379746835443,
7
+ "eval_recall": 0.9586397058823529,
8
+ "eval_runtime": 15.6142,
9
  "eval_samples": 6807,
10
+ "eval_samples_per_second": 435.949,
11
+ "eval_steps_per_second": 54.502
12
  }
predict_results.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "predict_accuracy": 0.028794858943639246,
3
- "predict_f1": 0.00559299062744574,
4
- "predict_loss": 1.2929713726043701,
5
- "predict_precision": 0.0028258395540381536,
6
- "predict_recall": 0.2693014705882353,
7
- "predict_runtime": 15.7694,
8
- "predict_samples_per_second": 431.659,
9
- "predict_steps_per_second": 53.965
10
  }
 
1
  {
2
+ "predict_accuracy": 0.9990319324961724,
3
+ "predict_f1": 0.95077484047402,
4
+ "predict_loss": 0.0058256350457668304,
5
+ "predict_precision": 0.9430379746835443,
6
+ "predict_recall": 0.9586397058823529,
7
+ "predict_runtime": 15.6972,
8
+ "predict_samples_per_second": 433.645,
9
+ "predict_steps_per_second": 54.214
10
  }
predictions.txt CHANGED
The diff for this file is too large to render. See raw diff
 
tb/events.out.tfevents.1715786919.61af03e56d14.5022.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a27f137b8259d61cfd2526684bc8f5a7589382dd45db907e2e5fc45dc56b2e14
3
+ size 569
train.log CHANGED
@@ -1590,3 +1590,51 @@ Training completed. Do not forget to share your model on huggingface.co/models =
1590
  [INFO|modeling_utils.py:2590] 2024-05-15 15:28:19,154 >> Model weights saved in /content/dissertation/scripts/ner/output/model.safetensors
1591
  [INFO|tokenization_utils_base.py:2488] 2024-05-15 15:28:19,155 >> tokenizer config file saved in /content/dissertation/scripts/ner/output/tokenizer_config.json
1592
  [INFO|tokenization_utils_base.py:2497] 2024-05-15 15:28:19,156 >> Special tokens file saved in /content/dissertation/scripts/ner/output/special_tokens_map.json
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1593
  0%| | 0/851 [00:00<?, ?it/s]
1594
  1%| | 10/851 [00:00<00:09, 90.51it/s]
1595
  2%|▏ | 20/851 [00:00<00:10, 77.83it/s]
1596
  3%|▎ | 28/851 [00:00<00:11, 74.40it/s]
1597
  4%|▍ | 36/851 [00:00<00:11, 73.24it/s]
1598
  5%|▌ | 44/851 [00:00<00:10, 73.71it/s]
1599
  6%|▌ | 52/851 [00:00<00:10, 74.57it/s]
1600
  7%|▋ | 60/851 [00:00<00:10, 75.32it/s]
1601
  8%|▊ | 68/851 [00:00<00:11, 69.52it/s]
1602
  9%|▉ | 76/851 [00:01<00:11, 69.83it/s]
1603
  10%|▉ | 84/851 [00:01<00:10, 70.07it/s]
1604
  11%|█ | 92/851 [00:01<00:10, 69.97it/s]
1605
  12%|█▏ | 100/851 [00:01<00:10, 69.61it/s]
1606
  13%|█▎ | 107/851 [00:01<00:10, 68.65it/s]
1607
  14%|█▎ | 115/851 [00:01<00:10, 68.72it/s]
1608
  15%|█▍ | 124/851 [00:01<00:10, 72.49it/s]
1609
  16%|█▌ | 132/851 [00:01<00:10, 65.39it/s]
1610
  16%|█▋ | 139/851 [00:01<00:10, 65.90it/s]
1611
  17%|█▋ | 147/851 [00:02<00:10, 67.50it/s]
1612
  18%|█▊ | 154/851 [00:02<00:10, 67.04it/s]
1613
  19%|█▉ | 161/851 [00:02<00:10, 66.91it/s]
1614
  20%|█▉ | 169/851 [00:02<00:10, 67.78it/s]
1615
  21%|██ | 177/851 [00:02<00:09, 69.96it/s]
1616
  22%|██▏ | 185/851 [00:02<00:09, 71.55it/s]
1617
  23%|██▎ | 193/851 [00:02<00:09, 72.55it/s]
1618
  24%|██▎ | 201/851 [00:02<00:08, 74.28it/s]
1619
  25%|██▍ | 209/851 [00:02<00:09, 70.56it/s]
1620
  25%|██▌ | 217/851 [00:03<00:09, 68.99it/s]
1621
  26%|██▋ | 224/851 [00:03<00:09, 69.14it/s]
1622
  27%|██▋ | 232/851 [00:03<00:08, 71.37it/s]
1623
  28%|██▊ | 240/851 [00:03<00:08, 68.79it/s]
1624
  29%|██▉ | 247/851 [00:03<00:09, 66.73it/s]
1625
  30%|██▉ | 255/851 [00:03<00:08, 69.91it/s]
1626
  31%|███ | 263/851 [00:03<00:08, 72.69it/s]
1627
  32%|███▏ | 271/851 [00:03<00:08, 71.09it/s]
1628
  33%|███▎ | 279/851 [00:03<00:07, 73.46it/s]
1629
  34%|███▎ | 287/851 [00:04<00:07, 72.07it/s]
1630
  35%|███▍ | 295/851 [00:04<00:07, 72.11it/s]
1631
  36%|███▌ | 303/851 [00:04<00:07, 73.81it/s]
1632
  37%|███▋ | 311/851 [00:04<00:07, 68.28it/s]
1633
  38%|███▊ | 320/851 [00:04<00:07, 71.75it/s]
1634
  39%|███▊ | 328/851 [00:04<00:07, 72.02it/s]
1635
  39%|███▉ | 336/851 [00:04<00:07, 71.36it/s]
1636
  40%|████ | 344/851 [00:04<00:06, 73.18it/s]
1637
  41%|████▏ | 352/851 [00:04<00:06, 72.55it/s]
1638
  42%|████▏ | 360/851 [00:05<00:07, 66.67it/s]
1639
  43%|████▎ | 368/851 [00:05<00:07, 68.29it/s]
1640
  44%|████▍ | 375/851 [00:05<00:06, 68.41it/s]
1641
  45%|████▍ | 382/851 [00:05<00:07, 66.01it/s]
1642
  46%|████▌ | 390/851 [00:05<00:06, 69.04it/s]
1643
  47%|████▋ | 398/851 [00:05<00:06, 70.03it/s]
1644
  48%|████▊ | 406/851 [00:05<00:06, 65.84it/s]
1645
  49%|████▊ | 414/851 [00:05<00:06, 66.93it/s]
1646
  50%|████▉ | 422/851 [00:06<00:06, 69.24it/s]
1647
  50%|█████ | 429/851 [00:06<00:06, 67.99it/s]
1648
  51%|█████▏ | 437/851 [00:06<00:05, 70.32it/s]
1649
  52%|█████▏ | 445/851 [00:06<00:05, 71.78it/s]
1650
  53%|█████▎ | 453/851 [00:06<00:05, 71.61it/s]
1651
  54%|█████▍ | 461/851 [00:06<00:05, 69.59it/s]
1652
  55%|█████▍ | 468/851 [00:06<00:05, 67.35it/s]
1653
  56%|█████▌ | 475/851 [00:06<00:06, 62.52it/s]
1654
  57%|█████▋ | 482/851 [00:06<00:05, 63.14it/s]
1655
  57%|█████▋ | 489/851 [00:07<00:05, 63.97it/s]
1656
  59%|█████▊ | 498/851 [00:07<00:05, 69.11it/s]
1657
  59%|█████▉ | 506/851 [00:07<00:04, 71.64it/s]
1658
  60%|██████ | 514/851 [00:07<00:04, 70.85it/s]
1659
  61%|██████▏ | 522/851 [00:07<00:04, 66.68it/s]
1660
  62%|██████▏ | 529/851 [00:07<00:04, 65.77it/s]
1661
  63%|██████▎ | 537/851 [00:07<00:04, 68.38it/s]
1662
  64%|██████▍ | 545/851 [00:07<00:04, 69.93it/s]
1663
  65%|██████▍ | 553/851 [00:07<00:04, 67.90it/s]
1664
  66%|██████▌ | 561/851 [00:08<00:04, 70.98it/s]
1665
  67%|██████▋ | 569/851 [00:08<00:03, 73.27it/s]
1666
  68%|██████▊ | 577/851 [00:08<00:03, 73.20it/s]
1667
  69%|██████▊ | 585/851 [00:08<00:03, 68.88it/s]
1668
  70%|██████▉ | 592/851 [00:08<00:03, 67.31it/s]
1669
  70%|███████ | 599/851 [00:08<00:03, 67.73it/s]
1670
  71%|███████▏ | 607/851 [00:08<00:03, 68.80it/s]
1671
  72%|███████▏ | 614/851 [00:08<00:03, 66.33it/s]
1672
  73%|███████▎ | 622/851 [00:08<00:03, 67.71it/s]
1673
  74%|███████▍ | 629/851 [00:09<00:03, 63.90it/s]
1674
  75%|███████▍ | 637/851 [00:09<00:03, 66.80it/s]
1675
  76%|███████▌ | 644/851 [00:09<00:03, 62.23it/s]
1676
  77%|███████▋ | 652/851 [00:09<00:03, 65.75it/s]
1677
  78%|███████▊ | 660/851 [00:09<00:02, 68.73it/s]
1678
  78%|███████▊ | 668/851 [00:09<00:02, 70.08it/s]
1679
  79%|███████▉ | 676/851 [00:09<00:02, 69.11it/s]
1680
  80%|████████ | 684/851 [00:09<00:02, 70.02it/s]
1681
  81%|████████▏ | 693/851 [00:09<00:02, 73.25it/s]
1682
  82%|████████▏ | 701/851 [00:10<00:02, 73.96it/s]
1683
  83%|████████▎ | 710/851 [00:10<00:01, 75.96it/s]
1684
  84%|████████▍ | 718/851 [00:10<00:01, 73.07it/s]
1685
  85%|████████▌ | 726/851 [00:10<00:01, 74.63it/s]
1686
  86%|████████▋ | 734/851 [00:10<00:01, 75.56it/s]
1687
  87%|████████▋ | 742/851 [00:10<00:01, 75.66it/s]
1688
  88%|████████▊ | 750/851 [00:10<00:01, 74.81it/s]
1689
  89%|████████▉ | 758/851 [00:10<00:01, 75.25it/s]
1690
  90%|█████████ | 766/851 [00:10<00:01, 71.07it/s]
1691
  91%|█████████ | 774/851 [00:11<00:01, 71.24it/s]
1692
  92%|█████████▏| 782/851 [00:11<00:00, 69.20it/s]
1693
  93%|█████████▎| 790/851 [00:11<00:00, 70.20it/s]
1694
  94%|█████████▍| 798/851 [00:11<00:00, 70.94it/s]
1695
  95%|█████████▍| 806/851 [00:11<00:00, 72.86it/s]
1696
  96%|█████████▌| 814/851 [00:11<00:00, 69.90it/s]
1697
  97%|█████████▋| 822/851 [00:11<00:00, 70.42it/s]
1698
  98%|█████████▊| 830/851 [00:11<00:00, 70.64it/s]
1699
  98%|█████████▊| 838/851 [00:11<00:00, 71.14it/s]
1700
  99%|█████████▉| 846/851 [00:12<00:00, 67.21it/s]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1701
  0%| | 0/851 [00:00<?, ?it/s]
1702
  1%| | 9/851 [00:00<00:09, 89.28it/s]
1703
  2%|▏ | 18/851 [00:00<00:10, 76.32it/s]
1704
  3%|▎ | 26/851 [00:00<00:11, 74.84it/s]
1705
  4%|▍ | 34/851 [00:00<00:11, 73.37it/s]
1706
  5%|▍ | 42/851 [00:00<00:11, 72.40it/s]
1707
  6%|▌ | 50/851 [00:00<00:10, 73.86it/s]
1708
  7%|▋ | 58/851 [00:00<00:10, 73.78it/s]
1709
  8%|▊ | 66/851 [00:00<00:10, 71.71it/s]
1710
  9%|▊ | 74/851 [00:01<00:11, 67.99it/s]
1711
  10%|▉ | 82/851 [00:01<00:11, 69.81it/s]
1712
  11%|█ | 90/851 [00:01<00:10, 71.29it/s]
1713
  12%|█▏ | 98/851 [00:01<00:10, 69.84it/s]
1714
  12%|█▏ | 106/851 [00:01<00:10, 70.25it/s]
1715
  13%|█▎ | 114/851 [00:01<00:10, 67.45it/s]
1716
  14%|█▍ | 122/851 [00:01<00:10, 70.51it/s]
1717
  15%|█▌ | 130/851 [00:01<00:11, 65.54it/s]
1718
  16%|█▌ | 137/851 [00:01<00:11, 64.79it/s]
1719
  17%|█▋ | 145/851 [00:02<00:10, 66.82it/s]
1720
  18%|█▊ | 152/851 [00:02<00:10, 65.49it/s]
1721
  19%|█▉ | 160/851 [00:02<00:09, 69.23it/s]
1722
  20%|█▉ | 168/851 [00:02<00:09, 70.13it/s]
1723
  21%|██ | 176/851 [00:02<00:09, 71.41it/s]
1724
  22%|██▏ | 184/851 [00:02<00:09, 71.13it/s]
1725
  23%|██▎ | 192/851 [00:02<00:09, 71.88it/s]
1726
  24%|██▎ | 200/851 [00:02<00:08, 73.17it/s]
1727
  24%|██▍ | 208/851 [00:02<00:09, 69.02it/s]
1728
  25%|██▌ | 215/851 [00:03<00:09, 67.30it/s]
1729
  26%|██▌ | 223/851 [00:03<00:09, 68.83it/s]
1730
  27%|██▋ | 231/851 [00:03<00:08, 69.85it/s]
1731
  28%|██▊ | 239/851 [00:03<00:09, 67.99it/s]
1732
  29%|██▉ | 246/851 [00:03<00:09, 65.91it/s]
1733
  30%|██▉ | 254/851 [00:03<00:08, 69.57it/s]
1734
  31%|███ | 262/851 [00:03<00:08, 71.81it/s]
1735
  32%|███▏ | 270/851 [00:03<00:08, 70.28it/s]
1736
  33%|███▎ | 278/851 [00:03<00:07, 72.94it/s]
1737
  34%|███▎ | 286/851 [00:04<00:07, 73.83it/s]
1738
  35%|███▍ | 294/851 [00:04<00:07, 70.51it/s]
1739
  36%|███▌ | 303/851 [00:04<00:07, 73.46it/s]
1740
  37%|███▋ | 311/851 [00:04<00:07, 70.10it/s]
1741
  37%|███▋ | 319/851 [00:04<00:07, 72.11it/s]
1742
  38%|███▊ | 327/851 [00:04<00:07, 71.11it/s]
1743
  39%|███▉ | 335/851 [00:04<00:07, 70.55it/s]
1744
  40%|████ | 343/851 [00:04<00:07, 72.12it/s]
1745
  41%|████ | 351/851 [00:04<00:06, 72.42it/s]
1746
  42%|████▏ | 359/851 [00:05<00:07, 66.81it/s]
1747
  43%|████▎ | 367/851 [00:05<00:07, 68.19it/s]
1748
  44%|████▍ | 374/851 [00:05<00:07, 67.00it/s]
1749
  45%|████▍ | 381/851 [00:05<00:07, 64.83it/s]
1750
  46%|████▌ | 389/851 [00:05<00:06, 68.13it/s]
1751
  47%|████▋ | 397/851 [00:05<00:06, 69.09it/s]
1752
  47%|████▋ | 404/851 [00:05<00:06, 64.36it/s]
1753
  48%|████▊ | 412/851 [00:05<00:06, 65.60it/s]
1754
  49%|████▉ | 420/851 [00:06<00:06, 68.49it/s]
1755
  50%|█████ | 427/851 [00:06<00:06, 65.71it/s]
1756
  51%|█████ | 435/851 [00:06<00:06, 68.37it/s]
1757
  52%|█████▏ | 443/851 [00:06<00:05, 70.54it/s]
1758
  53%|█████▎ | 451/851 [00:06<00:05, 71.45it/s]
1759
  54%|█████▍ | 459/851 [00:06<00:05, 72.02it/s]
1760
  55%|█████▍ | 467/851 [00:06<00:05, 66.48it/s]
1761
  56%|█████▌ | 474/851 [00:06<00:06, 62.19it/s]
1762
  57%|█████▋ | 481/851 [00:06<00:05, 63.17it/s]
1763
  57%|█████▋ | 488/851 [00:07<00:05, 64.90it/s]
1764
  58%|█████▊ | 496/851 [00:07<00:05, 67.97it/s]
1765
  59%|█████▉ | 504/851 [00:07<00:04, 71.24it/s]
1766
  60%|██████ | 512/851 [00:07<00:04, 70.23it/s]
1767
  61%|██████ | 520/851 [00:07<00:04, 71.31it/s]
1768
  62%|██████▏ | 528/851 [00:07<00:04, 65.51it/s]
1769
  63%|██████▎ | 536/851 [00:07<00:04, 67.75it/s]
1770
  64%|██████▍ | 544/851 [00:07<00:04, 69.64it/s]
1771
  65%|██████▍ | 552/851 [00:07<00:04, 67.32it/s]
1772
  66%|██████▌ | 560/851 [00:08<00:04, 69.83it/s]
1773
  67%|██████▋ | 568/851 [00:08<00:03, 72.10it/s]
1774
  68%|██████▊ | 576/851 [00:08<00:03, 72.39it/s]
1775
  69%|██████▊ | 584/851 [00:08<00:03, 68.89it/s]
1776
  69%|██████▉ | 591/851 [00:08<00:03, 66.82it/s]
1777
  70%|███████ | 598/851 [00:08<00:03, 67.43it/s]
1778
  71%|███████ | 605/851 [00:08<00:03, 67.25it/s]
1779
  72%|███████▏ | 612/851 [00:08<00:03, 65.87it/s]
1780
  73%|███████▎ | 619/851 [00:08<00:03, 65.34it/s]
1781
  74%|███████▎ | 626/851 [00:09<00:03, 64.84it/s]
1782
  74%|███████▍ | 633/851 [00:09<00:03, 65.27it/s]
1783
  75%|███████▌ | 640/851 [00:09<00:03, 64.95it/s]
1784
  76%|███████▌ | 647/851 [00:09<00:03, 63.18it/s]
1785
  77%|███████▋ | 655/851 [00:09<00:02, 66.34it/s]
1786
  78%|███████▊ | 663/851 [00:09<00:02, 67.96it/s]
1787
  79%|███████▊ | 670/851 [00:09<00:02, 68.44it/s]
1788
  80%|███████▉ | 677/851 [00:09<00:02, 68.31it/s]
1789
  80%|████████ | 685/851 [00:09<00:02, 69.35it/s]
1790
  81%|████████▏ | 693/851 [00:10<00:02, 72.25it/s]
1791
  82%|████████▏ | 701/851 [00:10<00:02, 72.73it/s]
1792
  83%|████████▎ | 709/851 [00:10<00:01, 74.24it/s]
1793
  84%|████████▍ | 717/851 [00:10<00:01, 73.24it/s]
1794
  85%|████████▌ | 725/851 [00:10<00:01, 73.55it/s]
1795
  86%|████████▌ | 733/851 [00:10<00:01, 74.45it/s]
1796
  87%|████████▋ | 741/851 [00:10<00:01, 75.51it/s]
1797
  88%|████████▊ | 749/851 [00:10<00:01, 74.86it/s]
1798
  89%|████████▉ | 757/851 [00:10<00:01, 74.62it/s]
1799
  90%|████████▉ | 765/851 [00:11<00:01, 73.45it/s]
1800
  91%|█████████ | 773/851 [00:11<00:01, 70.77it/s]
1801
  92%|█████████▏| 781/851 [00:11<00:01, 67.45it/s]
1802
  93%|█████████▎| 789/851 [00:11<00:00, 69.14it/s]
1803
  94%|█████████▎| 797/851 [00:11<00:00, 70.53it/s]
1804
  95%|█████████▍| 805/851 [00:11<00:00, 73.00it/s]
1805
  96%|█████████▌| 813/851 [00:11<00:00, 69.66it/s]
1806
  96%|█████████▋| 821/851 [00:11<00:00, 70.69it/s]
1807
  97%|█████████▋| 829/851 [00:11<00:00, 71.60it/s]
1808
  98%|█████████▊| 837/851 [00:12<00:00, 72.30it/s]
1809
  99%|█████████▉| 845/851 [00:12<00:00, 66.40it/s]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1590
  [INFO|modeling_utils.py:2590] 2024-05-15 15:28:19,154 >> Model weights saved in /content/dissertation/scripts/ner/output/model.safetensors
1591
  [INFO|tokenization_utils_base.py:2488] 2024-05-15 15:28:19,155 >> tokenizer config file saved in /content/dissertation/scripts/ner/output/tokenizer_config.json
1592
  [INFO|tokenization_utils_base.py:2497] 2024-05-15 15:28:19,156 >> Special tokens file saved in /content/dissertation/scripts/ner/output/special_tokens_map.json
1593
+ {'eval_loss': 0.006454338785260916, 'eval_precision': 0.9437386569872959, 'eval_recall': 0.9558823529411765, 'eval_f1': 0.949771689497717, 'eval_accuracy': 0.9990250667691949, 'eval_runtime': 15.7656, 'eval_samples_per_second': 431.764, 'eval_steps_per_second': 53.978, 'epoch': 10.0}
1594
+ {'train_runtime': 3387.0101, 'train_samples_per_second': 80.378, 'train_steps_per_second': 5.022, 'train_loss': 0.0023016003105682196, 'epoch': 10.0}
1595
+ ***** train metrics *****
1596
+ epoch = 9.9971
1597
+ total_flos = 6240195GF
1598
+ train_loss = 0.0023
1599
+ train_runtime = 0:56:27.01
1600
+ train_samples = 27224
1601
+ train_samples_per_second = 80.378
1602
+ train_steps_per_second = 5.022
1603
+ 05/15/2024 15:28:23 - INFO - __main__ - *** Evaluate ***
1604
+ [INFO|trainer.py:786] 2024-05-15 15:28:23,892 >> The following columns in the evaluation set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: ner_tags, tokens, id. If ner_tags, tokens, id are not expected by `RobertaForTokenClassification.forward`, you can safely ignore this message.
1605
+ [INFO|trainer.py:3614] 2024-05-15 15:28:23,895 >> ***** Running Evaluation *****
1606
+ [INFO|trainer.py:3616] 2024-05-15 15:28:23,895 >> Num examples = 6807
1607
+ [INFO|trainer.py:3619] 2024-05-15 15:28:23,895 >> Batch size = 8
1608
+
1609
  0%| | 0/851 [00:00<?, ?it/s]
1610
  1%| | 10/851 [00:00<00:09, 90.51it/s]
1611
  2%|▏ | 20/851 [00:00<00:10, 77.83it/s]
1612
  3%|▎ | 28/851 [00:00<00:11, 74.40it/s]
1613
  4%|▍ | 36/851 [00:00<00:11, 73.24it/s]
1614
  5%|▌ | 44/851 [00:00<00:10, 73.71it/s]
1615
  6%|▌ | 52/851 [00:00<00:10, 74.57it/s]
1616
  7%|▋ | 60/851 [00:00<00:10, 75.32it/s]
1617
  8%|▊ | 68/851 [00:00<00:11, 69.52it/s]
1618
  9%|▉ | 76/851 [00:01<00:11, 69.83it/s]
1619
  10%|▉ | 84/851 [00:01<00:10, 70.07it/s]
1620
  11%|█ | 92/851 [00:01<00:10, 69.97it/s]
1621
  12%|█▏ | 100/851 [00:01<00:10, 69.61it/s]
1622
  13%|█▎ | 107/851 [00:01<00:10, 68.65it/s]
1623
  14%|█▎ | 115/851 [00:01<00:10, 68.72it/s]
1624
  15%|█▍ | 124/851 [00:01<00:10, 72.49it/s]
1625
  16%|█▌ | 132/851 [00:01<00:10, 65.39it/s]
1626
  16%|█▋ | 139/851 [00:01<00:10, 65.90it/s]
1627
  17%|█▋ | 147/851 [00:02<00:10, 67.50it/s]
1628
  18%|█▊ | 154/851 [00:02<00:10, 67.04it/s]
1629
  19%|█▉ | 161/851 [00:02<00:10, 66.91it/s]
1630
  20%|█▉ | 169/851 [00:02<00:10, 67.78it/s]
1631
  21%|██ | 177/851 [00:02<00:09, 69.96it/s]
1632
  22%|██▏ | 185/851 [00:02<00:09, 71.55it/s]
1633
  23%|██▎ | 193/851 [00:02<00:09, 72.55it/s]
1634
  24%|██▎ | 201/851 [00:02<00:08, 74.28it/s]
1635
  25%|██▍ | 209/851 [00:02<00:09, 70.56it/s]
1636
  25%|██▌ | 217/851 [00:03<00:09, 68.99it/s]
1637
  26%|██▋ | 224/851 [00:03<00:09, 69.14it/s]
1638
  27%|██▋ | 232/851 [00:03<00:08, 71.37it/s]
1639
  28%|██▊ | 240/851 [00:03<00:08, 68.79it/s]
1640
  29%|██▉ | 247/851 [00:03<00:09, 66.73it/s]
1641
  30%|██▉ | 255/851 [00:03<00:08, 69.91it/s]
1642
  31%|███ | 263/851 [00:03<00:08, 72.69it/s]
1643
  32%|███▏ | 271/851 [00:03<00:08, 71.09it/s]
1644
  33%|███▎ | 279/851 [00:03<00:07, 73.46it/s]
1645
  34%|███▎ | 287/851 [00:04<00:07, 72.07it/s]
1646
  35%|███▍ | 295/851 [00:04<00:07, 72.11it/s]
1647
  36%|███▌ | 303/851 [00:04<00:07, 73.81it/s]
1648
  37%|███▋ | 311/851 [00:04<00:07, 68.28it/s]
1649
  38%|███▊ | 320/851 [00:04<00:07, 71.75it/s]
1650
  39%|███▊ | 328/851 [00:04<00:07, 72.02it/s]
1651
  39%|███▉ | 336/851 [00:04<00:07, 71.36it/s]
1652
  40%|████ | 344/851 [00:04<00:06, 73.18it/s]
1653
  41%|████▏ | 352/851 [00:04<00:06, 72.55it/s]
1654
  42%|████▏ | 360/851 [00:05<00:07, 66.67it/s]
1655
  43%|████▎ | 368/851 [00:05<00:07, 68.29it/s]
1656
  44%|████▍ | 375/851 [00:05<00:06, 68.41it/s]
1657
  45%|████▍ | 382/851 [00:05<00:07, 66.01it/s]
1658
  46%|████▌ | 390/851 [00:05<00:06, 69.04it/s]
1659
  47%|████▋ | 398/851 [00:05<00:06, 70.03it/s]
1660
  48%|████▊ | 406/851 [00:05<00:06, 65.84it/s]
1661
  49%|████▊ | 414/851 [00:05<00:06, 66.93it/s]
1662
  50%|████▉ | 422/851 [00:06<00:06, 69.24it/s]
1663
  50%|█████ | 429/851 [00:06<00:06, 67.99it/s]
1664
  51%|█████▏ | 437/851 [00:06<00:05, 70.32it/s]
1665
  52%|█████▏ | 445/851 [00:06<00:05, 71.78it/s]
1666
  53%|█████▎ | 453/851 [00:06<00:05, 71.61it/s]
1667
  54%|█████▍ | 461/851 [00:06<00:05, 69.59it/s]
1668
  55%|█████▍ | 468/851 [00:06<00:05, 67.35it/s]
1669
  56%|█████▌ | 475/851 [00:06<00:06, 62.52it/s]
1670
  57%|█████▋ | 482/851 [00:06<00:05, 63.14it/s]
1671
  57%|█████▋ | 489/851 [00:07<00:05, 63.97it/s]
1672
  59%|█████▊ | 498/851 [00:07<00:05, 69.11it/s]
1673
  59%|█████▉ | 506/851 [00:07<00:04, 71.64it/s]
1674
  60%|██████ | 514/851 [00:07<00:04, 70.85it/s]
1675
  61%|██████▏ | 522/851 [00:07<00:04, 66.68it/s]
1676
  62%|██████▏ | 529/851 [00:07<00:04, 65.77it/s]
1677
  63%|██████▎ | 537/851 [00:07<00:04, 68.38it/s]
1678
  64%|██████▍ | 545/851 [00:07<00:04, 69.93it/s]
1679
  65%|██████▍ | 553/851 [00:07<00:04, 67.90it/s]
1680
  66%|██████▌ | 561/851 [00:08<00:04, 70.98it/s]
1681
  67%|██████▋ | 569/851 [00:08<00:03, 73.27it/s]
1682
  68%|██████▊ | 577/851 [00:08<00:03, 73.20it/s]
1683
  69%|██████▊ | 585/851 [00:08<00:03, 68.88it/s]
1684
  70%|██████▉ | 592/851 [00:08<00:03, 67.31it/s]
1685
  70%|███████ | 599/851 [00:08<00:03, 67.73it/s]
1686
  71%|███████▏ | 607/851 [00:08<00:03, 68.80it/s]
1687
  72%|███████▏ | 614/851 [00:08<00:03, 66.33it/s]
1688
  73%|███████▎ | 622/851 [00:08<00:03, 67.71it/s]
1689
  74%|███████▍ | 629/851 [00:09<00:03, 63.90it/s]
1690
  75%|███████▍ | 637/851 [00:09<00:03, 66.80it/s]
1691
  76%|███████▌ | 644/851 [00:09<00:03, 62.23it/s]
1692
  77%|███████▋ | 652/851 [00:09<00:03, 65.75it/s]
1693
  78%|███████▊ | 660/851 [00:09<00:02, 68.73it/s]
1694
  78%|███████▊ | 668/851 [00:09<00:02, 70.08it/s]
1695
  79%|███████▉ | 676/851 [00:09<00:02, 69.11it/s]
1696
  80%|████████ | 684/851 [00:09<00:02, 70.02it/s]
1697
  81%|████████▏ | 693/851 [00:09<00:02, 73.25it/s]
1698
  82%|████████▏ | 701/851 [00:10<00:02, 73.96it/s]
1699
  83%|████████▎ | 710/851 [00:10<00:01, 75.96it/s]
1700
  84%|████████▍ | 718/851 [00:10<00:01, 73.07it/s]
1701
  85%|████████▌ | 726/851 [00:10<00:01, 74.63it/s]
1702
  86%|████████▋ | 734/851 [00:10<00:01, 75.56it/s]
1703
  87%|████████▋ | 742/851 [00:10<00:01, 75.66it/s]
1704
  88%|████████▊ | 750/851 [00:10<00:01, 74.81it/s]
1705
  89%|████████▉ | 758/851 [00:10<00:01, 75.25it/s]
1706
  90%|█████████ | 766/851 [00:10<00:01, 71.07it/s]
1707
  91%|█████████ | 774/851 [00:11<00:01, 71.24it/s]
1708
  92%|█████████▏| 782/851 [00:11<00:00, 69.20it/s]
1709
  93%|█████████▎| 790/851 [00:11<00:00, 70.20it/s]
1710
  94%|█████████▍| 798/851 [00:11<00:00, 70.94it/s]
1711
  95%|█████████▍| 806/851 [00:11<00:00, 72.86it/s]
1712
  96%|█████████▌| 814/851 [00:11<00:00, 69.90it/s]
1713
  97%|█████████▋| 822/851 [00:11<00:00, 70.42it/s]
1714
  98%|█████████▊| 830/851 [00:11<00:00, 70.64it/s]
1715
  98%|█████████▊| 838/851 [00:11<00:00, 71.14it/s]
1716
  99%|█████████▉| 846/851 [00:12<00:00, 67.21it/s]
1717
+ ***** eval metrics *****
1718
+ epoch = 9.9971
1719
+ eval_accuracy = 0.999
1720
+ eval_f1 = 0.9508
1721
+ eval_loss = 0.0058
1722
+ eval_precision = 0.943
1723
+ eval_recall = 0.9586
1724
+ eval_runtime = 0:00:15.61
1725
+ eval_samples = 6807
1726
+ eval_samples_per_second = 435.949
1727
+ eval_steps_per_second = 54.502
1728
+ 05/15/2024 15:28:39 - INFO - __main__ - *** Predict ***
1729
+ [INFO|trainer.py:786] 2024-05-15 15:28:39,512 >> The following columns in the test set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: ner_tags, tokens, id. If ner_tags, tokens, id are not expected by `RobertaForTokenClassification.forward`, you can safely ignore this message.
1730
+ [INFO|trainer.py:3614] 2024-05-15 15:28:39,514 >> ***** Running Prediction *****
1731
+ [INFO|trainer.py:3616] 2024-05-15 15:28:39,514 >> Num examples = 6807
1732
+ [INFO|trainer.py:3619] 2024-05-15 15:28:39,514 >> Batch size = 8
1733
+
1734
  0%| | 0/851 [00:00<?, ?it/s]
1735
  1%| | 9/851 [00:00<00:09, 89.28it/s]
1736
  2%|▏ | 18/851 [00:00<00:10, 76.32it/s]
1737
  3%|▎ | 26/851 [00:00<00:11, 74.84it/s]
1738
  4%|▍ | 34/851 [00:00<00:11, 73.37it/s]
1739
  5%|▍ | 42/851 [00:00<00:11, 72.40it/s]
1740
  6%|▌ | 50/851 [00:00<00:10, 73.86it/s]
1741
  7%|▋ | 58/851 [00:00<00:10, 73.78it/s]
1742
  8%|▊ | 66/851 [00:00<00:10, 71.71it/s]
1743
  9%|▊ | 74/851 [00:01<00:11, 67.99it/s]
1744
  10%|▉ | 82/851 [00:01<00:11, 69.81it/s]
1745
  11%|█ | 90/851 [00:01<00:10, 71.29it/s]
1746
  12%|█▏ | 98/851 [00:01<00:10, 69.84it/s]
1747
  12%|█▏ | 106/851 [00:01<00:10, 70.25it/s]
1748
  13%|█▎ | 114/851 [00:01<00:10, 67.45it/s]
1749
  14%|█▍ | 122/851 [00:01<00:10, 70.51it/s]
1750
  15%|█▌ | 130/851 [00:01<00:11, 65.54it/s]
1751
  16%|█▌ | 137/851 [00:01<00:11, 64.79it/s]
1752
  17%|█▋ | 145/851 [00:02<00:10, 66.82it/s]
1753
  18%|█▊ | 152/851 [00:02<00:10, 65.49it/s]
1754
  19%|█▉ | 160/851 [00:02<00:09, 69.23it/s]
1755
  20%|█▉ | 168/851 [00:02<00:09, 70.13it/s]
1756
  21%|██ | 176/851 [00:02<00:09, 71.41it/s]
1757
  22%|██▏ | 184/851 [00:02<00:09, 71.13it/s]
1758
  23%|██▎ | 192/851 [00:02<00:09, 71.88it/s]
1759
  24%|██▎ | 200/851 [00:02<00:08, 73.17it/s]
1760
  24%|██▍ | 208/851 [00:02<00:09, 69.02it/s]
1761
  25%|██▌ | 215/851 [00:03<00:09, 67.30it/s]
1762
  26%|██▌ | 223/851 [00:03<00:09, 68.83it/s]
1763
  27%|██▋ | 231/851 [00:03<00:08, 69.85it/s]
1764
  28%|██▊ | 239/851 [00:03<00:09, 67.99it/s]
1765
  29%|██▉ | 246/851 [00:03<00:09, 65.91it/s]
1766
  30%|██▉ | 254/851 [00:03<00:08, 69.57it/s]
1767
  31%|███ | 262/851 [00:03<00:08, 71.81it/s]
1768
  32%|███▏ | 270/851 [00:03<00:08, 70.28it/s]
1769
  33%|███▎ | 278/851 [00:03<00:07, 72.94it/s]
1770
  34%|███▎ | 286/851 [00:04<00:07, 73.83it/s]
1771
  35%|███▍ | 294/851 [00:04<00:07, 70.51it/s]
1772
  36%|███▌ | 303/851 [00:04<00:07, 73.46it/s]
1773
  37%|███▋ | 311/851 [00:04<00:07, 70.10it/s]
1774
  37%|███▋ | 319/851 [00:04<00:07, 72.11it/s]
1775
  38%|███▊ | 327/851 [00:04<00:07, 71.11it/s]
1776
  39%|███▉ | 335/851 [00:04<00:07, 70.55it/s]
1777
  40%|████ | 343/851 [00:04<00:07, 72.12it/s]
1778
  41%|████ | 351/851 [00:04<00:06, 72.42it/s]
1779
  42%|████▏ | 359/851 [00:05<00:07, 66.81it/s]
1780
  43%|████▎ | 367/851 [00:05<00:07, 68.19it/s]
1781
  44%|████▍ | 374/851 [00:05<00:07, 67.00it/s]
1782
  45%|████▍ | 381/851 [00:05<00:07, 64.83it/s]
1783
  46%|████▌ | 389/851 [00:05<00:06, 68.13it/s]
1784
  47%|████▋ | 397/851 [00:05<00:06, 69.09it/s]
1785
  47%|████▋ | 404/851 [00:05<00:06, 64.36it/s]
1786
  48%|████▊ | 412/851 [00:05<00:06, 65.60it/s]
1787
  49%|████▉ | 420/851 [00:06<00:06, 68.49it/s]
1788
  50%|█████ | 427/851 [00:06<00:06, 65.71it/s]
1789
  51%|█████ | 435/851 [00:06<00:06, 68.37it/s]
1790
  52%|█████▏ | 443/851 [00:06<00:05, 70.54it/s]
1791
  53%|█████▎ | 451/851 [00:06<00:05, 71.45it/s]
1792
  54%|█████▍ | 459/851 [00:06<00:05, 72.02it/s]
1793
  55%|█████▍ | 467/851 [00:06<00:05, 66.48it/s]
1794
  56%|█████▌ | 474/851 [00:06<00:06, 62.19it/s]
1795
  57%|█████▋ | 481/851 [00:06<00:05, 63.17it/s]
1796
  57%|█████▋ | 488/851 [00:07<00:05, 64.90it/s]
1797
  58%|█████▊ | 496/851 [00:07<00:05, 67.97it/s]
1798
  59%|█████▉ | 504/851 [00:07<00:04, 71.24it/s]
1799
  60%|██████ | 512/851 [00:07<00:04, 70.23it/s]
1800
  61%|██████ | 520/851 [00:07<00:04, 71.31it/s]
1801
  62%|██████▏ | 528/851 [00:07<00:04, 65.51it/s]
1802
  63%|██████▎ | 536/851 [00:07<00:04, 67.75it/s]
1803
  64%|██████▍ | 544/851 [00:07<00:04, 69.64it/s]
1804
  65%|██████▍ | 552/851 [00:07<00:04, 67.32it/s]
1805
  66%|██████▌ | 560/851 [00:08<00:04, 69.83it/s]
1806
  67%|██████▋ | 568/851 [00:08<00:03, 72.10it/s]
1807
  68%|██████▊ | 576/851 [00:08<00:03, 72.39it/s]
1808
  69%|██████▊ | 584/851 [00:08<00:03, 68.89it/s]
1809
  69%|██████▉ | 591/851 [00:08<00:03, 66.82it/s]
1810
  70%|███████ | 598/851 [00:08<00:03, 67.43it/s]
1811
  71%|███████ | 605/851 [00:08<00:03, 67.25it/s]
1812
  72%|███████▏ | 612/851 [00:08<00:03, 65.87it/s]
1813
  73%|███████▎ | 619/851 [00:08<00:03, 65.34it/s]
1814
  74%|███████▎ | 626/851 [00:09<00:03, 64.84it/s]
1815
  74%|███████▍ | 633/851 [00:09<00:03, 65.27it/s]
1816
  75%|███████▌ | 640/851 [00:09<00:03, 64.95it/s]
1817
  76%|███████▌ | 647/851 [00:09<00:03, 63.18it/s]
1818
  77%|███████▋ | 655/851 [00:09<00:02, 66.34it/s]
1819
  78%|███████▊ | 663/851 [00:09<00:02, 67.96it/s]
1820
  79%|███████▊ | 670/851 [00:09<00:02, 68.44it/s]
1821
  80%|███████▉ | 677/851 [00:09<00:02, 68.31it/s]
1822
  80%|████████ | 685/851 [00:09<00:02, 69.35it/s]
1823
  81%|████████▏ | 693/851 [00:10<00:02, 72.25it/s]
1824
  82%|████████▏ | 701/851 [00:10<00:02, 72.73it/s]
1825
  83%|████████▎ | 709/851 [00:10<00:01, 74.24it/s]
1826
  84%|████████▍ | 717/851 [00:10<00:01, 73.24it/s]
1827
  85%|████████▌ | 725/851 [00:10<00:01, 73.55it/s]
1828
  86%|████████▌ | 733/851 [00:10<00:01, 74.45it/s]
1829
  87%|████████▋ | 741/851 [00:10<00:01, 75.51it/s]
1830
  88%|████████▊ | 749/851 [00:10<00:01, 74.86it/s]
1831
  89%|████████▉ | 757/851 [00:10<00:01, 74.62it/s]
1832
  90%|████████▉ | 765/851 [00:11<00:01, 73.45it/s]
1833
  91%|█████████ | 773/851 [00:11<00:01, 70.77it/s]
1834
  92%|█████████▏| 781/851 [00:11<00:01, 67.45it/s]
1835
  93%|█████████▎| 789/851 [00:11<00:00, 69.14it/s]
1836
  94%|█████████▎| 797/851 [00:11<00:00, 70.53it/s]
1837
  95%|█████████▍| 805/851 [00:11<00:00, 73.00it/s]
1838
  96%|█████████▌| 813/851 [00:11<00:00, 69.66it/s]
1839
  96%|█████████▋| 821/851 [00:11<00:00, 70.69it/s]
1840
  97%|█████████▋| 829/851 [00:11<00:00, 71.60it/s]
1841
  98%|█████████▊| 837/851 [00:12<00:00, 72.30it/s]
1842
  99%|█████████▉| 845/851 [00:12<00:00, 66.40it/s]
1843
+ [INFO|trainer.py:3305] 2024-05-15 15:28:55,509 >> Saving model checkpoint to /content/dissertation/scripts/ner/output
1844
+ [INFO|configuration_utils.py:471] 2024-05-15 15:28:55,510 >> Configuration saved in /content/dissertation/scripts/ner/output/config.json
1845
+ [INFO|modeling_utils.py:2590] 2024-05-15 15:28:56,772 >> Model weights saved in /content/dissertation/scripts/ner/output/model.safetensors
1846
+ [INFO|tokenization_utils_base.py:2488] 2024-05-15 15:28:56,773 >> tokenizer config file saved in /content/dissertation/scripts/ner/output/tokenizer_config.json
1847
+ [INFO|tokenization_utils_base.py:2497] 2024-05-15 15:28:56,774 >> Special tokens file saved in /content/dissertation/scripts/ner/output/special_tokens_map.json
1848
+ ***** predict metrics *****
1849
+ predict_accuracy = 0.999
1850
+ predict_f1 = 0.9508
1851
+ predict_loss = 0.0058
1852
+ predict_precision = 0.943
1853
+ predict_recall = 0.9586
1854
+ predict_runtime = 0:00:15.69
1855
+ predict_samples_per_second = 433.645
1856
+ predict_steps_per_second = 54.214
1857
+
train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 9.997061416397296,
3
+ "total_flos": 6700358578825584.0,
4
+ "train_loss": 0.0023016003105682196,
5
+ "train_runtime": 3387.0101,
6
+ "train_samples": 27224,
7
+ "train_samples_per_second": 80.378,
8
+ "train_steps_per_second": 5.022
9
+ }
trainer_state.json ADDED
@@ -0,0 +1,388 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.95077484047402,
3
+ "best_model_checkpoint": "/content/dissertation/scripts/ner/output/checkpoint-13612",
4
+ "epoch": 9.997061416397296,
5
+ "eval_steps": 500,
6
+ "global_step": 17010,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.29385836027034967,
13
+ "grad_norm": 0.008281867019832134,
14
+ "learning_rate": 4.853027630805409e-05,
15
+ "loss": 0.0278,
16
+ "step": 500
17
+ },
18
+ {
19
+ "epoch": 0.5877167205406993,
20
+ "grad_norm": 0.7454735040664673,
21
+ "learning_rate": 4.7060552616108174e-05,
22
+ "loss": 0.0082,
23
+ "step": 1000
24
+ },
25
+ {
26
+ "epoch": 0.8815750808110491,
27
+ "grad_norm": 0.028717944398522377,
28
+ "learning_rate": 4.559082892416226e-05,
29
+ "loss": 0.0055,
30
+ "step": 1500
31
+ },
32
+ {
33
+ "epoch": 0.9997061416397296,
34
+ "eval_accuracy": 0.9984002856142422,
35
+ "eval_f1": 0.9022907900888265,
36
+ "eval_loss": 0.003938445821404457,
37
+ "eval_precision": 0.9181731684110371,
38
+ "eval_recall": 0.8869485294117647,
39
+ "eval_runtime": 15.693,
40
+ "eval_samples_per_second": 433.761,
41
+ "eval_steps_per_second": 54.228,
42
+ "step": 1701
43
+ },
44
+ {
45
+ "epoch": 1.1754334410813987,
46
+ "grad_norm": 0.007521205581724644,
47
+ "learning_rate": 4.4121105232216346e-05,
48
+ "loss": 0.0042,
49
+ "step": 2000
50
+ },
51
+ {
52
+ "epoch": 1.4692918013517484,
53
+ "grad_norm": 0.0010779600124806166,
54
+ "learning_rate": 4.265138154027043e-05,
55
+ "loss": 0.0036,
56
+ "step": 2500
57
+ },
58
+ {
59
+ "epoch": 1.7631501616220981,
60
+ "grad_norm": 0.0016530421562492847,
61
+ "learning_rate": 4.118165784832452e-05,
62
+ "loss": 0.0037,
63
+ "step": 3000
64
+ },
65
+ {
66
+ "epoch": 2.0,
67
+ "eval_accuracy": 0.9981599851700297,
68
+ "eval_f1": 0.9000469263256687,
69
+ "eval_loss": 0.005709913093596697,
70
+ "eval_precision": 0.9194630872483222,
71
+ "eval_recall": 0.8814338235294118,
72
+ "eval_runtime": 15.7138,
73
+ "eval_samples_per_second": 433.187,
74
+ "eval_steps_per_second": 54.156,
75
+ "step": 3403
76
+ },
77
+ {
78
+ "epoch": 2.0570085218924477,
79
+ "grad_norm": 0.7208561897277832,
80
+ "learning_rate": 3.971193415637861e-05,
81
+ "loss": 0.0032,
82
+ "step": 3500
83
+ },
84
+ {
85
+ "epoch": 2.3508668821627974,
86
+ "grad_norm": 0.010068068280816078,
87
+ "learning_rate": 3.824221046443269e-05,
88
+ "loss": 0.0023,
89
+ "step": 4000
90
+ },
91
+ {
92
+ "epoch": 2.644725242433147,
93
+ "grad_norm": 0.017267288640141487,
94
+ "learning_rate": 3.677248677248677e-05,
95
+ "loss": 0.0029,
96
+ "step": 4500
97
+ },
98
+ {
99
+ "epoch": 2.938583602703497,
100
+ "grad_norm": 0.14932678639888763,
101
+ "learning_rate": 3.530276308054086e-05,
102
+ "loss": 0.0024,
103
+ "step": 5000
104
+ },
105
+ {
106
+ "epoch": 2.9997061416397295,
107
+ "eval_accuracy": 0.9990113353152399,
108
+ "eval_f1": 0.9414965986394559,
109
+ "eval_loss": 0.0036316141486167908,
110
+ "eval_precision": 0.9292748433303492,
111
+ "eval_recall": 0.9540441176470589,
112
+ "eval_runtime": 15.718,
113
+ "eval_samples_per_second": 433.069,
114
+ "eval_steps_per_second": 54.142,
115
+ "step": 5104
116
+ },
117
+ {
118
+ "epoch": 3.2324419629738466,
119
+ "grad_norm": 0.0006910681258887053,
120
+ "learning_rate": 3.3833039388594945e-05,
121
+ "loss": 0.0019,
122
+ "step": 5500
123
+ },
124
+ {
125
+ "epoch": 3.5263003232441963,
126
+ "grad_norm": 0.07551395893096924,
127
+ "learning_rate": 3.2363315696649034e-05,
128
+ "loss": 0.0017,
129
+ "step": 6000
130
+ },
131
+ {
132
+ "epoch": 3.820158683514546,
133
+ "grad_norm": 0.035475652664899826,
134
+ "learning_rate": 3.0893592004703116e-05,
135
+ "loss": 0.0013,
136
+ "step": 6500
137
+ },
138
+ {
139
+ "epoch": 4.0,
140
+ "eval_accuracy": 0.9989358123184874,
141
+ "eval_f1": 0.9402501157943493,
142
+ "eval_loss": 0.005335611291229725,
143
+ "eval_precision": 0.9477124183006536,
144
+ "eval_recall": 0.9329044117647058,
145
+ "eval_runtime": 15.7659,
146
+ "eval_samples_per_second": 431.754,
147
+ "eval_steps_per_second": 53.977,
148
+ "step": 6806
149
+ },
150
+ {
151
+ "epoch": 4.114017043784895,
152
+ "grad_norm": 0.0003611127322074026,
153
+ "learning_rate": 2.9423868312757202e-05,
154
+ "loss": 0.0011,
155
+ "step": 7000
156
+ },
157
+ {
158
+ "epoch": 4.407875404055245,
159
+ "grad_norm": 0.000422955141402781,
160
+ "learning_rate": 2.795414462081129e-05,
161
+ "loss": 0.0007,
162
+ "step": 7500
163
+ },
164
+ {
165
+ "epoch": 4.701733764325595,
166
+ "grad_norm": 0.00019804929615929723,
167
+ "learning_rate": 2.648442092886537e-05,
168
+ "loss": 0.0009,
169
+ "step": 8000
170
+ },
171
+ {
172
+ "epoch": 4.9955921245959445,
173
+ "grad_norm": 0.00024289640714414418,
174
+ "learning_rate": 2.501469723691946e-05,
175
+ "loss": 0.0012,
176
+ "step": 8500
177
+ },
178
+ {
179
+ "epoch": 4.99970614163973,
180
+ "eval_accuracy": 0.9988808865026674,
181
+ "eval_f1": 0.9406350667280257,
182
+ "eval_loss": 0.006314857862889767,
183
+ "eval_precision": 0.9419354838709677,
184
+ "eval_recall": 0.9393382352941176,
185
+ "eval_runtime": 15.9896,
186
+ "eval_samples_per_second": 425.713,
187
+ "eval_steps_per_second": 53.222,
188
+ "step": 8507
189
+ },
190
+ {
191
+ "epoch": 5.289450484866294,
192
+ "grad_norm": 0.0022946298122406006,
193
+ "learning_rate": 2.3544973544973546e-05,
194
+ "loss": 0.0014,
195
+ "step": 9000
196
+ },
197
+ {
198
+ "epoch": 5.583308845136644,
199
+ "grad_norm": 0.0001970727025764063,
200
+ "learning_rate": 2.2075249853027632e-05,
201
+ "loss": 0.0008,
202
+ "step": 9500
203
+ },
204
+ {
205
+ "epoch": 5.877167205406994,
206
+ "grad_norm": 1.1225332021713257,
207
+ "learning_rate": 2.0605526161081718e-05,
208
+ "loss": 0.0007,
209
+ "step": 10000
210
+ },
211
+ {
212
+ "epoch": 6.0,
213
+ "eval_accuracy": 0.9989426780454649,
214
+ "eval_f1": 0.9480401093892434,
215
+ "eval_loss": 0.0047547053545713425,
216
+ "eval_precision": 0.9403254972875226,
217
+ "eval_recall": 0.9558823529411765,
218
+ "eval_runtime": 15.7168,
219
+ "eval_samples_per_second": 433.104,
220
+ "eval_steps_per_second": 54.146,
221
+ "step": 10209
222
+ },
223
+ {
224
+ "epoch": 6.171025565677343,
225
+ "grad_norm": 0.0004107556596864015,
226
+ "learning_rate": 1.91358024691358e-05,
227
+ "loss": 0.0005,
228
+ "step": 10500
229
+ },
230
+ {
231
+ "epoch": 6.464883925947693,
232
+ "grad_norm": 1.6069244146347046,
233
+ "learning_rate": 1.766607877718989e-05,
234
+ "loss": 0.0005,
235
+ "step": 11000
236
+ },
237
+ {
238
+ "epoch": 6.758742286218043,
239
+ "grad_norm": 0.00012789235915988684,
240
+ "learning_rate": 1.6196355085243976e-05,
241
+ "loss": 0.0005,
242
+ "step": 11500
243
+ },
244
+ {
245
+ "epoch": 6.99970614163973,
246
+ "eval_accuracy": 0.9989220808645324,
247
+ "eval_f1": 0.9445983379501385,
248
+ "eval_loss": 0.00655418960377574,
249
+ "eval_precision": 0.9489795918367347,
250
+ "eval_recall": 0.9402573529411765,
251
+ "eval_runtime": 15.8136,
252
+ "eval_samples_per_second": 430.452,
253
+ "eval_steps_per_second": 53.814,
254
+ "step": 11910
255
+ },
256
+ {
257
+ "epoch": 7.052600646488393,
258
+ "grad_norm": 0.00015387983876280487,
259
+ "learning_rate": 1.472663139329806e-05,
260
+ "loss": 0.0002,
261
+ "step": 12000
262
+ },
263
+ {
264
+ "epoch": 7.346459006758742,
265
+ "grad_norm": 0.00010029759141616523,
266
+ "learning_rate": 1.3256907701352148e-05,
267
+ "loss": 0.0002,
268
+ "step": 12500
269
+ },
270
+ {
271
+ "epoch": 7.640317367029092,
272
+ "grad_norm": 9.027074702316895e-05,
273
+ "learning_rate": 1.1787184009406232e-05,
274
+ "loss": 0.0002,
275
+ "step": 13000
276
+ },
277
+ {
278
+ "epoch": 7.934175727299442,
279
+ "grad_norm": 8.488987077726051e-05,
280
+ "learning_rate": 1.0317460317460318e-05,
281
+ "loss": 0.0002,
282
+ "step": 13500
283
+ },
284
+ {
285
+ "epoch": 8.0,
286
+ "eval_accuracy": 0.9990319324961724,
287
+ "eval_f1": 0.95077484047402,
288
+ "eval_loss": 0.0058256350457668304,
289
+ "eval_precision": 0.9430379746835443,
290
+ "eval_recall": 0.9586397058823529,
291
+ "eval_runtime": 15.7444,
292
+ "eval_samples_per_second": 432.344,
293
+ "eval_steps_per_second": 54.051,
294
+ "step": 13612
295
+ },
296
+ {
297
+ "epoch": 8.22803408756979,
298
+ "grad_norm": 0.0003942732000723481,
299
+ "learning_rate": 8.847736625514404e-06,
300
+ "loss": 0.0002,
301
+ "step": 14000
302
+ },
303
+ {
304
+ "epoch": 8.521892447840141,
305
+ "grad_norm": 0.006602809764444828,
306
+ "learning_rate": 7.37801293356849e-06,
307
+ "loss": 0.0001,
308
+ "step": 14500
309
+ },
310
+ {
311
+ "epoch": 8.81575080811049,
312
+ "grad_norm": 8.021829853532836e-05,
313
+ "learning_rate": 5.908289241622575e-06,
314
+ "loss": 0.0001,
315
+ "step": 15000
316
+ },
317
+ {
318
+ "epoch": 8.999706141639729,
319
+ "eval_accuracy": 0.9990250667691949,
320
+ "eval_f1": 0.9494305239179955,
321
+ "eval_loss": 0.006341648753732443,
322
+ "eval_precision": 0.941282746160795,
323
+ "eval_recall": 0.9577205882352942,
324
+ "eval_runtime": 16.0762,
325
+ "eval_samples_per_second": 423.422,
326
+ "eval_steps_per_second": 52.936,
327
+ "step": 15313
328
+ },
329
+ {
330
+ "epoch": 9.10960916838084,
331
+ "grad_norm": 6.85813938616775e-05,
332
+ "learning_rate": 4.438565549676661e-06,
333
+ "loss": 0.0001,
334
+ "step": 15500
335
+ },
336
+ {
337
+ "epoch": 9.40346752865119,
338
+ "grad_norm": 0.0018187027890235186,
339
+ "learning_rate": 2.9688418577307467e-06,
340
+ "loss": 0.0001,
341
+ "step": 16000
342
+ },
343
+ {
344
+ "epoch": 9.69732588892154,
345
+ "grad_norm": 0.000292234995868057,
346
+ "learning_rate": 1.4991181657848325e-06,
347
+ "loss": 0.0,
348
+ "step": 16500
349
+ },
350
+ {
351
+ "epoch": 9.991184249191889,
352
+ "grad_norm": 6.29909336566925e-05,
353
+ "learning_rate": 2.9394473838918286e-08,
354
+ "loss": 0.0,
355
+ "step": 17000
356
+ },
357
+ {
358
+ "epoch": 9.997061416397296,
359
+ "eval_accuracy": 0.9990250667691949,
360
+ "eval_f1": 0.949771689497717,
361
+ "eval_loss": 0.006454338785260916,
362
+ "eval_precision": 0.9437386569872959,
363
+ "eval_recall": 0.9558823529411765,
364
+ "eval_runtime": 15.7656,
365
+ "eval_samples_per_second": 431.764,
366
+ "eval_steps_per_second": 53.978,
367
+ "step": 17010
368
+ },
369
+ {
370
+ "epoch": 9.997061416397296,
371
+ "step": 17010,
372
+ "total_flos": 6700358578825584.0,
373
+ "train_loss": 0.0023016003105682196,
374
+ "train_runtime": 3387.0101,
375
+ "train_samples_per_second": 80.378,
376
+ "train_steps_per_second": 5.022
377
+ }
378
+ ],
379
+ "logging_steps": 500,
380
+ "max_steps": 17010,
381
+ "num_input_tokens_seen": 0,
382
+ "num_train_epochs": 10,
383
+ "save_steps": 500,
384
+ "total_flos": 6700358578825584.0,
385
+ "train_batch_size": 4,
386
+ "trial_name": null,
387
+ "trial_params": null
388
+ }