Rodrigo1771 commited on
Commit
e66fdad
1 Parent(s): 734f37d

End of training

Browse files
README.md CHANGED
@@ -2,9 +2,10 @@
2
  license: apache-2.0
3
  base_model: PlanTL-GOB-ES/bsc-bio-ehr-es
4
  tags:
 
5
  - generated_from_trainer
6
  datasets:
7
- - multi-train-distemist-dev-ner
8
  metrics:
9
  - precision
10
  - recall
@@ -17,24 +18,24 @@ model-index:
17
  name: Token Classification
18
  type: token-classification
19
  dataset:
20
- name: multi-train-distemist-dev-ner
21
- type: multi-train-distemist-dev-ner
22
  config: MultiTrainDisTEMISTDevNER
23
  split: validation
24
  args: MultiTrainDisTEMISTDevNER
25
  metrics:
26
  - name: Precision
27
  type: precision
28
- value: 0.3145413870246085
29
  - name: Recall
30
  type: recall
31
- value: 0.8224145999064109
32
  - name: F1
33
  type: f1
34
- value: 0.45504563402161957
35
  - name: Accuracy
36
  type: accuracy
37
- value: 0.8526409018818958
38
  ---
39
 
40
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -42,13 +43,13 @@ should probably proofread and complete it, then remove this comment. -->
42
 
43
  # output
44
 
45
- This model is a fine-tuned version of [PlanTL-GOB-ES/bsc-bio-ehr-es](https://huggingface.co/PlanTL-GOB-ES/bsc-bio-ehr-es) on the multi-train-distemist-dev-ner dataset.
46
  It achieves the following results on the evaluation set:
47
- - Loss: 1.1188
48
- - Precision: 0.3145
49
- - Recall: 0.8224
50
- - F1: 0.4550
51
- - Accuracy: 0.8526
52
 
53
  ## Model description
54
 
 
2
  license: apache-2.0
3
  base_model: PlanTL-GOB-ES/bsc-bio-ehr-es
4
  tags:
5
+ - token-classification
6
  - generated_from_trainer
7
  datasets:
8
+ - Rodrigo1771/multi-train-distemist-dev-ner
9
  metrics:
10
  - precision
11
  - recall
 
18
  name: Token Classification
19
  type: token-classification
20
  dataset:
21
+ name: Rodrigo1771/multi-train-distemist-dev-ner
22
+ type: Rodrigo1771/multi-train-distemist-dev-ner
23
  config: MultiTrainDisTEMISTDevNER
24
  split: validation
25
  args: MultiTrainDisTEMISTDevNER
26
  metrics:
27
  - name: Precision
28
  type: precision
29
+ value: 0.32143181611701643
30
  - name: Recall
31
  type: recall
32
+ value: 0.8277959756668226
33
  - name: F1
34
  type: f1
35
+ value: 0.46305870034683594
36
  - name: Accuracy
37
  type: accuracy
38
+ value: 0.8559776451929613
39
  ---
40
 
41
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
43
 
44
  # output
45
 
46
+ This model is a fine-tuned version of [PlanTL-GOB-ES/bsc-bio-ehr-es](https://huggingface.co/PlanTL-GOB-ES/bsc-bio-ehr-es) on the Rodrigo1771/multi-train-distemist-dev-ner dataset.
47
  It achieves the following results on the evaluation set:
48
+ - Loss: 0.9499
49
+ - Precision: 0.3214
50
+ - Recall: 0.8278
51
+ - F1: 0.4631
52
+ - Accuracy: 0.8560
53
 
54
  ## Model description
55
 
all_results.json CHANGED
@@ -1,19 +1,26 @@
1
  {
2
- "eval_accuracy": 0.018743434648577764,
3
- "eval_f1": 0.008761828065230522,
4
- "eval_loss": 2.3735408782958984,
5
- "eval_precision": 0.004537076421380973,
6
- "eval_recall": 0.1272812353766963,
7
- "eval_runtime": 16.9354,
 
8
  "eval_samples": 6807,
9
- "eval_samples_per_second": 401.939,
10
- "eval_steps_per_second": 50.25,
11
- "predict_accuracy": 0.018743434648577764,
12
- "predict_f1": 0.008761828065230522,
13
- "predict_loss": 2.3735408782958984,
14
- "predict_precision": 0.004537076421380973,
15
- "predict_recall": 0.1272812353766963,
16
- "predict_runtime": 16.1794,
17
- "predict_samples_per_second": 420.719,
18
- "predict_steps_per_second": 52.598
 
 
 
 
 
 
19
  }
 
1
  {
2
+ "epoch": 9.997061416397296,
3
+ "eval_accuracy": 0.8559776451929613,
4
+ "eval_f1": 0.46305870034683594,
5
+ "eval_loss": 0.9498798847198486,
6
+ "eval_precision": 0.32143181611701643,
7
+ "eval_recall": 0.8277959756668226,
8
+ "eval_runtime": 16.1603,
9
  "eval_samples": 6807,
10
+ "eval_samples_per_second": 421.218,
11
+ "eval_steps_per_second": 52.66,
12
+ "predict_accuracy": 0.8559776451929613,
13
+ "predict_f1": 0.46305870034683594,
14
+ "predict_loss": 0.9498798847198486,
15
+ "predict_precision": 0.32143181611701643,
16
+ "predict_recall": 0.8277959756668226,
17
+ "predict_runtime": 16.4367,
18
+ "predict_samples_per_second": 414.134,
19
+ "predict_steps_per_second": 51.774,
20
+ "total_flos": 6700722040732752.0,
21
+ "train_loss": 0.08913132946046923,
22
+ "train_runtime": 3311.208,
23
+ "train_samples": 27224,
24
+ "train_samples_per_second": 82.218,
25
+ "train_steps_per_second": 5.137
26
  }
eval_results.json CHANGED
@@ -1,11 +1,12 @@
1
  {
2
- "eval_accuracy": 0.018743434648577764,
3
- "eval_f1": 0.008761828065230522,
4
- "eval_loss": 2.3735408782958984,
5
- "eval_precision": 0.004537076421380973,
6
- "eval_recall": 0.1272812353766963,
7
- "eval_runtime": 16.9354,
 
8
  "eval_samples": 6807,
9
- "eval_samples_per_second": 401.939,
10
- "eval_steps_per_second": 50.25
11
  }
 
1
  {
2
+ "epoch": 9.997061416397296,
3
+ "eval_accuracy": 0.8559776451929613,
4
+ "eval_f1": 0.46305870034683594,
5
+ "eval_loss": 0.9498798847198486,
6
+ "eval_precision": 0.32143181611701643,
7
+ "eval_recall": 0.8277959756668226,
8
+ "eval_runtime": 16.1603,
9
  "eval_samples": 6807,
10
+ "eval_samples_per_second": 421.218,
11
+ "eval_steps_per_second": 52.66
12
  }
predict_results.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "predict_accuracy": 0.018743434648577764,
3
- "predict_f1": 0.008761828065230522,
4
- "predict_loss": 2.3735408782958984,
5
- "predict_precision": 0.004537076421380973,
6
- "predict_recall": 0.1272812353766963,
7
- "predict_runtime": 16.1794,
8
- "predict_samples_per_second": 420.719,
9
- "predict_steps_per_second": 52.598
10
  }
 
1
  {
2
+ "predict_accuracy": 0.8559776451929613,
3
+ "predict_f1": 0.46305870034683594,
4
+ "predict_loss": 0.9498798847198486,
5
+ "predict_precision": 0.32143181611701643,
6
+ "predict_recall": 0.8277959756668226,
7
+ "predict_runtime": 16.4367,
8
+ "predict_samples_per_second": 414.134,
9
+ "predict_steps_per_second": 51.774
10
  }
predictions.txt CHANGED
The diff for this file is too large to render. See raw diff
 
tb/events.out.tfevents.1715603564.dff07dfba241.5879.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d165a43ac9f9c5f183a1937575bd2f2d93ca6034ab66002728aec974fcf320a2
3
+ size 569
train.log CHANGED
@@ -1640,3 +1640,53 @@ Training completed. Do not forget to share your model on huggingface.co/models =
1640
  [INFO|modeling_utils.py:2590] 2024-05-13 12:32:23,238 >> Model weights saved in /content/dissertation/scripts/ner/output/model.safetensors
1641
  [INFO|tokenization_utils_base.py:2488] 2024-05-13 12:32:23,239 >> tokenizer config file saved in /content/dissertation/scripts/ner/output/tokenizer_config.json
1642
  [INFO|tokenization_utils_base.py:2497] 2024-05-13 12:32:23,239 >> Special tokens file saved in /content/dissertation/scripts/ner/output/special_tokens_map.json
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1643
  0%| | 0/851 [00:00<?, ?it/s]
1644
  1%| | 10/851 [00:00<00:09, 92.31it/s]
1645
  2%|▏ | 20/851 [00:00<00:10, 77.97it/s]
1646
  3%|▎ | 28/851 [00:00<00:11, 73.38it/s]
1647
  4%|▍ | 36/851 [00:00<00:11, 72.05it/s]
1648
  5%|▌ | 44/851 [00:00<00:11, 73.19it/s]
1649
  6%|▌ | 52/851 [00:00<00:10, 73.90it/s]
1650
  7%|▋ | 60/851 [00:00<00:10, 74.36it/s]
1651
  8%|▊ | 68/851 [00:00<00:11, 68.74it/s]
1652
  9%|▉ | 75/851 [00:01<00:11, 68.45it/s]
1653
  10%|▉ | 83/851 [00:01<00:11, 69.37it/s]
1654
  11%|█ | 91/851 [00:01<00:10, 70.51it/s]
1655
  12%|█▏ | 99/851 [00:01<00:10, 69.26it/s]
1656
  13%|█▎ | 107/851 [00:01<00:10, 67.76it/s]
1657
  13%|█▎ | 114/851 [00:01<00:10, 67.63it/s]
1658
  14%|█▍ | 122/851 [00:01<00:10, 70.32it/s]
1659
  15%|█▌ | 130/851 [00:01<00:11, 64.72it/s]
1660
  16%|█▌ | 137/851 [00:01<00:11, 64.04it/s]
1661
  17%|█▋ | 145/851 [00:02<00:10, 66.06it/s]
1662
  18%|█▊ | 152/851 [00:02<00:10, 64.75it/s]
1663
  19%|█▉ | 160/851 [00:02<00:10, 68.49it/s]
1664
  20%|█▉ | 167/851 [00:02<00:09, 68.88it/s]
1665
  21%|██ | 175/851 [00:02<00:09, 70.11it/s]
1666
  22%|██▏ | 183/851 [00:02<00:09, 70.82it/s]
1667
  22%|██▏ | 191/851 [00:02<00:09, 71.05it/s]
1668
  23%|██▎ | 199/851 [00:02<00:09, 72.33it/s]
1669
  24%|██▍ | 207/851 [00:02<00:09, 71.19it/s]
1670
  25%|██▌ | 215/851 [00:03<00:09, 67.78it/s]
1671
  26%|██▌ | 223/851 [00:03<00:09, 68.86it/s]
1672
  27%|██▋ | 231/851 [00:03<00:08, 69.60it/s]
1673
  28%|██▊ | 238/851 [00:03<00:09, 66.60it/s]
1674
  29%|██▉ | 245/851 [00:03<00:09, 64.21it/s]
1675
  30%|██▉ | 253/851 [00:03<00:08, 67.82it/s]
1676
  31%|███ | 261/851 [00:03<00:08, 70.71it/s]
1677
  32%|███▏ | 269/851 [00:03<00:08, 69.23it/s]
1678
  33%|███▎ | 277/851 [00:03<00:07, 71.99it/s]
1679
  33%|███▎ | 285/851 [00:04<00:07, 73.42it/s]
1680
  34%|███▍ | 293/851 [00:04<00:07, 71.22it/s]
1681
  35%|███▌ | 301/851 [00:04<00:07, 72.04it/s]
1682
  36%|███▋ | 310/851 [00:04<00:07, 74.21it/s]
1683
  37%|███▋ | 318/851 [00:04<00:07, 70.47it/s]
1684
  38%|███▊ | 326/851 [00:04<00:07, 69.69it/s]
1685
  39%|███▉ | 334/851 [00:04<00:07, 69.49it/s]
1686
  40%|████ | 342/851 [00:04<00:07, 70.77it/s]
1687
  41%|████ | 350/851 [00:05<00:07, 70.56it/s]
1688
  42%|████▏ | 358/851 [00:05<00:07, 65.68it/s]
1689
  43%|████▎ | 366/851 [00:05<00:07, 66.28it/s]
1690
  44%|████▍ | 373/851 [00:05<00:07, 66.55it/s]
1691
  45%|████▍ | 380/851 [00:05<00:07, 64.58it/s]
1692
  46%|████▌ | 388/851 [00:05<00:06, 67.42it/s]
1693
  46%|████▋ | 395/851 [00:05<00:06, 67.26it/s]
1694
  47%|████▋ | 402/851 [00:05<00:06, 67.54it/s]
1695
  48%|████▊ | 409/851 [00:05<00:06, 64.14it/s]
1696
  49%|████▉ | 416/851 [00:06<00:06, 65.62it/s]
1697
  50%|████▉ | 424/851 [00:06<00:06, 67.91it/s]
1698
  51%|█████ | 431/851 [00:06<00:06, 66.25it/s]
1699
  52%|█████▏ | 439/851 [00:06<00:06, 68.51it/s]
1700
  52%|██��██▏ | 446/851 [00:06<00:05, 67.97it/s]
1701
  53%|█████▎ | 454/851 [00:06<00:05, 68.93it/s]
1702
  54%|█████▍ | 461/851 [00:06<00:05, 68.17it/s]
1703
  55%|█████▍ | 468/851 [00:06<00:05, 65.16it/s]
1704
  56%|█████▌ | 475/851 [00:06<00:06, 60.96it/s]
1705
  57%|█████▋ | 482/851 [00:07<00:05, 61.68it/s]
1706
  57%|█████▋ | 489/851 [00:07<00:05, 61.89it/s]
1707
  58%|█████▊ | 497/851 [00:07<00:05, 66.34it/s]
1708
  59%|█████▉ | 505/851 [00:07<00:04, 69.56it/s]
1709
  60%|██████ | 513/851 [00:07<00:04, 68.23it/s]
1710
  61%|██████ | 521/851 [00:07<00:04, 66.68it/s]
1711
  62%|██████▏ | 528/851 [00:07<00:05, 63.84it/s]
1712
  63%|██████▎ | 536/851 [00:07<00:04, 66.25it/s]
1713
  64%|██████▍ | 544/851 [00:07<00:04, 68.40it/s]
1714
  65%|██████▍ | 551/851 [00:08<00:04, 65.47it/s]
1715
  66%|██████▌ | 558/851 [00:08<00:04, 66.51it/s]
1716
  67%|██████▋ | 566/851 [00:08<00:04, 69.07it/s]
1717
  67%|██████▋ | 573/851 [00:08<00:04, 69.29it/s]
1718
  68%|██████▊ | 580/851 [00:08<00:04, 67.14it/s]
1719
  69%|██████▉ | 587/851 [00:08<00:04, 63.62it/s]
1720
  70%|██████▉ | 594/851 [00:08<00:03, 65.03it/s]
1721
  71%|███████ | 601/851 [00:08<00:03, 64.50it/s]
1722
  71%|███████▏ | 608/851 [00:08<00:03, 65.17it/s]
1723
  72%|███████▏ | 615/851 [00:09<00:03, 60.27it/s]
1724
  73%|███████▎ | 623/851 [00:09<00:03, 64.78it/s]
1725
  74%|███████▍ | 630/851 [00:09<00:03, 61.49it/s]
1726
  75%|███████▍ | 638/851 [00:09<00:03, 64.58it/s]
1727
  76%|███████▌ | 645/851 [00:09<00:03, 59.82it/s]
1728
  77%|███████▋ | 653/851 [00:09<00:03, 62.97it/s]
1729
  78%|███████▊ | 660/851 [00:09<00:02, 64.81it/s]
1730
  78%|███████▊ | 667/851 [00:09<00:02, 65.46it/s]
1731
  79%|███████▉ | 674/851 [00:09<00:02, 64.18it/s]
1732
  80%|████████ | 681/851 [00:10<00:02, 65.23it/s]
1733
  81%|████████ | 689/851 [00:10<00:02, 69.23it/s]
1734
  82%|████████▏ | 697/851 [00:10<00:02, 70.21it/s]
1735
  83%|████████▎ | 705/851 [00:10<00:02, 71.66it/s]
1736
  84%|████████▍ | 713/851 [00:10<00:01, 72.76it/s]
1737
  85%|████████▍ | 721/851 [00:10<00:01, 69.62it/s]
1738
  86%|████████▌ | 729/851 [00:10<00:01, 70.75it/s]
1739
  87%|████████▋ | 737/851 [00:10<00:01, 71.16it/s]
1740
  88%|████████▊ | 745/851 [00:10<00:01, 71.47it/s]
1741
  88%|████████▊ | 753/851 [00:11<00:01, 70.42it/s]
1742
  89%|████████▉ | 761/851 [00:11<00:01, 72.99it/s]
1743
  90%|█████████ | 769/851 [00:11<00:01, 67.29it/s]
1744
  91%|█████████ | 776/851 [00:11<00:01, 67.64it/s]
1745
  92%|█████████▏| 783/851 [00:11<00:01, 65.98it/s]
1746
  93%|█████████▎| 790/851 [00:11<00:00, 66.89it/s]
1747
  94%|█████████▎| 797/851 [00:11<00:00, 67.62it/s]
1748
  95%|█████████▍| 805/851 [00:11<00:00, 70.55it/s]
1749
  96%|█████████▌| 813/851 [00:11<00:00, 67.17it/s]
1750
  96%|█████████▋| 820/851 [00:12<00:00, 67.58it/s]
1751
  97%|█████████▋| 828/851 [00:12<00:00, 68.89it/s]
1752
  98%|█████████▊| 835/851 [00:12<00:00, 68.68it/s]
1753
  99%|█████████▉| 842/851 [00:12<00:00, 65.37it/s]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1754
  0%| | 0/851 [00:00<?, ?it/s]
1755
  1%| | 10/851 [00:00<00:09, 91.52it/s]
1756
  2%|▏ | 20/851 [00:00<00:10, 77.24it/s]
1757
  3%|▎ | 28/851 [00:00<00:11, 73.46it/s]
1758
  4%|▍ | 36/851 [00:00<00:11, 71.95it/s]
1759
  5%|▌ | 44/851 [00:00<00:11, 72.24it/s]
1760
  6%|▌ | 52/851 [00:00<00:10, 72.93it/s]
1761
  7%|▋ | 60/851 [00:00<00:10, 73.18it/s]
1762
  8%|▊ | 68/851 [00:00<00:11, 67.50it/s]
1763
  9%|▉ | 75/851 [00:01<00:11, 67.33it/s]
1764
  10%|▉ | 83/851 [00:01<00:11, 68.35it/s]
1765
  11%|█ | 91/851 [00:01<00:10, 69.50it/s]
1766
  12%|█▏ | 98/851 [00:01<00:11, 68.01it/s]
1767
  12%|█▏ | 105/851 [00:01<00:10, 68.42it/s]
1768
  13%|█▎ | 112/851 [00:01<00:11, 66.54it/s]
1769
  14%|█▍ | 119/851 [00:01<00:10, 67.31it/s]
1770
  15%|█▍ | 126/851 [00:01<00:11, 63.93it/s]
1771
  16%|█▌ | 133/851 [00:01<00:11, 64.45it/s]
1772
  16%|█▋ | 140/851 [00:02<00:11, 64.55it/s]
1773
  17%|█▋ | 147/851 [00:02<00:10, 65.85it/s]
1774
  18%|█▊ | 154/851 [00:02<00:10, 64.70it/s]
1775
  19%|█▉ | 162/851 [00:02<00:10, 66.76it/s]
1776
  20%|█▉ | 170/851 [00:02<00:10, 67.85it/s]
1777
  21%|██ | 178/851 [00:02<00:09, 69.49it/s]
1778
  22%|██▏ | 186/851 [00:02<00:09, 69.84it/s]
1779
  23%|██▎ | 194/851 [00:02<00:09, 70.57it/s]
1780
  24%|██▍ | 203/851 [00:02<00:08, 72.61it/s]
1781
  25%|██▍ | 211/851 [00:03<00:09, 69.42it/s]
1782
  26%|██▌ | 218/851 [00:03<00:09, 66.85it/s]
1783
  27%|██▋ | 226/851 [00:03<00:09, 68.61it/s]
1784
  27%|██▋ | 234/851 [00:03<00:08, 69.75it/s]
1785
  28%|██▊ | 241/851 [00:03<00:09, 64.31it/s]
1786
  29%|██▉ | 249/851 [00:03<00:09, 66.20it/s]
1787
  30%|███ | 257/851 [00:03<00:08, 69.02it/s]
1788
  31%|███ | 265/851 [00:03<00:08, 71.69it/s]
1789
  32%|███▏ | 273/851 [00:03<00:08, 69.07it/s]
1790
  33%|███▎ | 281/851 [00:04<00:07, 71.84it/s]
1791
  34%|███▍ | 289/851 [00:04<00:08, 69.07it/s]
1792
  35%|███▍ | 297/851 [00:04<00:07, 70.91it/s]
1793
  36%|███▌ | 305/851 [00:04<00:07, 73.25it/s]
1794
  37%|███▋ | 313/851 [00:04<00:07, 68.13it/s]
1795
  38%|███▊ | 322/851 [00:04<00:07, 71.73it/s]
1796
  39%|███▉ | 330/851 [00:04<00:07, 69.46it/s]
1797
  40%|███▉ | 338/851 [00:04<00:07, 69.29it/s]
1798
  41%|████ | 346/851 [00:05<00:07, 69.47it/s]
1799
  41%|████▏ | 353/851 [00:05<00:07, 67.00it/s]
1800
  42%|████▏ | 360/851 [00:05<00:07, 65.48it/s]
1801
  43%|████▎ | 368/851 [00:05<00:07, 66.78it/s]
1802
  44%|████▍ | 375/851 [00:05<00:07, 66.05it/s]
1803
  45%|████▍ | 382/851 [00:05<00:07, 64.23it/s]
1804
  46%|████▌ | 390/851 [00:05<00:06, 67.18it/s]
1805
  47%|████▋ | 398/851 [00:05<00:06, 67.83it/s]
1806
  48%|████▊ | 405/851 [00:05<00:07, 62.69it/s]
1807
  48%|████▊ | 412/851 [00:06<00:06, 63.96it/s]
1808
  49%|████▉ | 420/851 [00:06<00:06, 67.12it/s]
1809
  50%|█████ | 427/851 [00:06<00:06, 64.71it/s]
1810
  51%|█████ | 435/851 [00:06<00:06, 67.20it/s]
1811
  52%|█████▏ | 443/851 [00:06<00:05, 68.92it/s]
1812
  53%|█████▎ | 451/851 [00:06<00:05, 69.97it/s]
1813
  54%|█████▍ | 459/851 [00:06<00:05, 70.79it/s]
1814
  55%|█████▍ | 467/851 [00:06<00:05, 64.99it/s]
1815
  56%|█████▌ | 474/851 [00:06<00:06, 60.17it/s]
1816
  57%|█████▋ | 481/851 [00:07<00:06, 61.07it/s]
1817
  57%|█████▋ | 488/851 [00:07<00:05, 62.68it/s]
1818
  58%|█████▊ | 496/851 [00:07<00:05, 65.90it/s]
1819
  59%|█████▉ | 504/851 [00:07<00:05, 69.35it/s]
1820
  60%|██████ | 512/851 [00:07<00:05, 67.73it/s]
1821
  61%|██████ | 520/851 [00:07<00:04, 68.81it/s]
1822
  62%|██████▏ | 527/851 [00:07<00:05, 62.95it/s]
1823
  63%|██████▎ | 535/851 [00:07<00:04, 65.45it/s]
1824
  64%|██████▍ | 543/851 [00:07<00:04, 67.58it/s]
1825
  65%|██████▍ | 550/851 [00:08<00:04, 65.74it/s]
1826
  65%|██████▌ | 557/851 [00:08<00:04, 66.86it/s]
1827
  66%|██████▋ | 565/851 [00:08<00:04, 70.26it/s]
1828
  67%|██████▋ | 573/851 [00:08<00:03, 70.03it/s]
1829
  68%|██████▊ | 581/851 [00:08<00:03, 68.47it/s]
1830
  69%|██████▉ | 588/851 [00:08<00:04, 64.15it/s]
1831
  70%|███████ | 596/851 [00:08<00:03, 65.33it/s]
1832
  71%|███████ | 604/851 [00:08<00:03, 66.81it/s]
1833
  72%|███████▏ | 611/851 [00:09<00:03, 66.06it/s]
1834
  73%|███████▎ | 618/851 [00:09<00:03, 61.98it/s]
1835
  73%|███████▎ | 625/851 [00:09<00:03, 63.84it/s]
1836
  74%|███████▍ | 632/851 [00:09<00:03, 62.09it/s]
1837
  75%|███████▌ | 639/851 [00:09<00:03, 62.91it/s]
1838
  76%|███████▌ | 646/851 [00:09<00:03, 60.42it/s]
1839
  77%|███████▋ | 654/851 [00:09<00:03, 63.78it/s]
1840
  78%|███████▊ | 661/851 [00:09<00:02, 64.94it/s]
1841
  78%|███████▊ | 668/851 [00:09<00:02, 66.04it/s]
1842
  79%|███████▉ | 675/851 [00:10<00:02, 64.72it/s]
1843
  80%|████████ | 682/851 [00:10<00:02, 65.98it/s]
1844
  81%|████████ | 690/851 [00:10<00:02, 69.12it/s]
1845
  82%|████████▏ | 698/851 [00:10<00:02, 70.40it/s]
1846
  83%|████████▎ | 706/851 [00:10<00:02, 71.97it/s]
1847
  84%|████████▍ | 714/851 [00:10<00:01, 71.53it/s]
1848
  85%|████████▍ | 722/851 [00:10<00:01, 69.93it/s]
1849
  86%|████████▌ | 730/851 [00:10<00:01, 71.53it/s]
1850
  87%|████████▋ | 738/851 [00:10<00:01, 72.49it/s]
1851
  88%|████████▊ | 746/851 [00:11<00:01, 71.54it/s]
1852
  89%|████████▊ | 754/851 [00:11<00:01, 71.10it/s]
1853
  90%|████████▉ | 762/851 [00:11<00:01, 72.35it/s]
1854
  90%|█████████ | 770/851 [00:11<00:01, 67.47it/s]
1855
  91%|█████████▏| 777/851 [00:11<00:01, 68.02it/s]
1856
  92%|█████████▏| 784/851 [00:11<00:01, 66.04it/s]
1857
  93%|█████████▎| 791/851 [00:11<00:00, 66.57it/s]
1858
  94%|█████████▍| 798/851 [00:11<00:00, 67.52it/s]
1859
  95%|█████████▍| 806/851 [00:11<00:00, 69.97it/s]
1860
  96%|█████████▌| 814/851 [00:12<00:00, 67.07it/s]
1861
  97%|█████████▋| 822/851 [00:12<00:00, 68.50it/s]
1862
  97%|█████████▋| 829/851 [00:12<00:00, 68.73it/s]
1863
  98%|█████████▊| 836/851 [00:12<00:00, 68.89it/s]
1864
  99%|█████████▉| 843/851 [00:12<00:00, 64.90it/s]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1640
  [INFO|modeling_utils.py:2590] 2024-05-13 12:32:23,238 >> Model weights saved in /content/dissertation/scripts/ner/output/model.safetensors
1641
  [INFO|tokenization_utils_base.py:2488] 2024-05-13 12:32:23,239 >> tokenizer config file saved in /content/dissertation/scripts/ner/output/tokenizer_config.json
1642
  [INFO|tokenization_utils_base.py:2497] 2024-05-13 12:32:23,239 >> Special tokens file saved in /content/dissertation/scripts/ner/output/special_tokens_map.json
1643
+ {'eval_loss': 1.1188451051712036, 'eval_precision': 0.3145413870246085, 'eval_recall': 0.8224145999064109, 'eval_f1': 0.45504563402161957, 'eval_accuracy': 0.8526409018818958, 'eval_runtime': 16.3165, 'eval_samples_per_second': 417.185, 'eval_steps_per_second': 52.156, 'epoch': 10.0}
1644
+ {'train_runtime': 3311.208, 'train_samples_per_second': 82.218, 'train_steps_per_second': 5.137, 'train_loss': 0.08913132946046923, 'epoch': 10.0}
1645
+ ***** train metrics *****
1646
+ epoch = 9.9971
1647
+ total_flos = 6240533GF
1648
+ train_loss = 0.0891
1649
+ train_runtime = 0:55:11.20
1650
+ train_samples = 27224
1651
+ train_samples_per_second = 82.218
1652
+ train_steps_per_second = 5.137
1653
+ 05/13/2024 12:32:28 - INFO - __main__ - *** Evaluate ***
1654
+ [INFO|trainer.py:786] 2024-05-13 12:32:28,316 >> The following columns in the evaluation set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: tokens, ner_tags, id. If tokens, ner_tags, id are not expected by `RobertaForTokenClassification.forward`, you can safely ignore this message.
1655
+ [INFO|trainer.py:3614] 2024-05-13 12:32:28,318 >> ***** Running Evaluation *****
1656
+ [INFO|trainer.py:3616] 2024-05-13 12:32:28,318 >> Num examples = 6807
1657
+ [INFO|trainer.py:3619] 2024-05-13 12:32:28,318 >> Batch size = 8
1658
+
1659
  0%| | 0/851 [00:00<?, ?it/s]
1660
  1%| | 10/851 [00:00<00:09, 92.31it/s]
1661
  2%|▏ | 20/851 [00:00<00:10, 77.97it/s]
1662
  3%|▎ | 28/851 [00:00<00:11, 73.38it/s]
1663
  4%|▍ | 36/851 [00:00<00:11, 72.05it/s]
1664
  5%|▌ | 44/851 [00:00<00:11, 73.19it/s]
1665
  6%|▌ | 52/851 [00:00<00:10, 73.90it/s]
1666
  7%|▋ | 60/851 [00:00<00:10, 74.36it/s]
1667
  8%|▊ | 68/851 [00:00<00:11, 68.74it/s]
1668
  9%|▉ | 75/851 [00:01<00:11, 68.45it/s]
1669
  10%|▉ | 83/851 [00:01<00:11, 69.37it/s]
1670
  11%|█ | 91/851 [00:01<00:10, 70.51it/s]
1671
  12%|█▏ | 99/851 [00:01<00:10, 69.26it/s]
1672
  13%|█▎ | 107/851 [00:01<00:10, 67.76it/s]
1673
  13%|█▎ | 114/851 [00:01<00:10, 67.63it/s]
1674
  14%|█▍ | 122/851 [00:01<00:10, 70.32it/s]
1675
  15%|█▌ | 130/851 [00:01<00:11, 64.72it/s]
1676
  16%|█▌ | 137/851 [00:01<00:11, 64.04it/s]
1677
  17%|█▋ | 145/851 [00:02<00:10, 66.06it/s]
1678
  18%|█▊ | 152/851 [00:02<00:10, 64.75it/s]
1679
  19%|█▉ | 160/851 [00:02<00:10, 68.49it/s]
1680
  20%|█▉ | 167/851 [00:02<00:09, 68.88it/s]
1681
  21%|██ | 175/851 [00:02<00:09, 70.11it/s]
1682
  22%|██▏ | 183/851 [00:02<00:09, 70.82it/s]
1683
  22%|██▏ | 191/851 [00:02<00:09, 71.05it/s]
1684
  23%|██▎ | 199/851 [00:02<00:09, 72.33it/s]
1685
  24%|██▍ | 207/851 [00:02<00:09, 71.19it/s]
1686
  25%|██▌ | 215/851 [00:03<00:09, 67.78it/s]
1687
  26%|██▌ | 223/851 [00:03<00:09, 68.86it/s]
1688
  27%|██▋ | 231/851 [00:03<00:08, 69.60it/s]
1689
  28%|██▊ | 238/851 [00:03<00:09, 66.60it/s]
1690
  29%|██▉ | 245/851 [00:03<00:09, 64.21it/s]
1691
  30%|██▉ | 253/851 [00:03<00:08, 67.82it/s]
1692
  31%|███ | 261/851 [00:03<00:08, 70.71it/s]
1693
  32%|███▏ | 269/851 [00:03<00:08, 69.23it/s]
1694
  33%|███▎ | 277/851 [00:03<00:07, 71.99it/s]
1695
  33%|███▎ | 285/851 [00:04<00:07, 73.42it/s]
1696
  34%|███▍ | 293/851 [00:04<00:07, 71.22it/s]
1697
  35%|███▌ | 301/851 [00:04<00:07, 72.04it/s]
1698
  36%|███▋ | 310/851 [00:04<00:07, 74.21it/s]
1699
  37%|███▋ | 318/851 [00:04<00:07, 70.47it/s]
1700
  38%|███▊ | 326/851 [00:04<00:07, 69.69it/s]
1701
  39%|███▉ | 334/851 [00:04<00:07, 69.49it/s]
1702
  40%|████ | 342/851 [00:04<00:07, 70.77it/s]
1703
  41%|████ | 350/851 [00:05<00:07, 70.56it/s]
1704
  42%|████▏ | 358/851 [00:05<00:07, 65.68it/s]
1705
  43%|████▎ | 366/851 [00:05<00:07, 66.28it/s]
1706
  44%|████▍ | 373/851 [00:05<00:07, 66.55it/s]
1707
  45%|████▍ | 380/851 [00:05<00:07, 64.58it/s]
1708
  46%|████▌ | 388/851 [00:05<00:06, 67.42it/s]
1709
  46%|████▋ | 395/851 [00:05<00:06, 67.26it/s]
1710
  47%|████▋ | 402/851 [00:05<00:06, 67.54it/s]
1711
  48%|████▊ | 409/851 [00:05<00:06, 64.14it/s]
1712
  49%|████▉ | 416/851 [00:06<00:06, 65.62it/s]
1713
  50%|████▉ | 424/851 [00:06<00:06, 67.91it/s]
1714
  51%|█████ | 431/851 [00:06<00:06, 66.25it/s]
1715
  52%|█████▏ | 439/851 [00:06<00:06, 68.51it/s]
1716
  52%|██��██▏ | 446/851 [00:06<00:05, 67.97it/s]
1717
  53%|█████▎ | 454/851 [00:06<00:05, 68.93it/s]
1718
  54%|█████▍ | 461/851 [00:06<00:05, 68.17it/s]
1719
  55%|█████▍ | 468/851 [00:06<00:05, 65.16it/s]
1720
  56%|█████▌ | 475/851 [00:06<00:06, 60.96it/s]
1721
  57%|█████▋ | 482/851 [00:07<00:05, 61.68it/s]
1722
  57%|█████▋ | 489/851 [00:07<00:05, 61.89it/s]
1723
  58%|█████▊ | 497/851 [00:07<00:05, 66.34it/s]
1724
  59%|█████▉ | 505/851 [00:07<00:04, 69.56it/s]
1725
  60%|██████ | 513/851 [00:07<00:04, 68.23it/s]
1726
  61%|██████ | 521/851 [00:07<00:04, 66.68it/s]
1727
  62%|██████▏ | 528/851 [00:07<00:05, 63.84it/s]
1728
  63%|██████▎ | 536/851 [00:07<00:04, 66.25it/s]
1729
  64%|██████▍ | 544/851 [00:07<00:04, 68.40it/s]
1730
  65%|██████▍ | 551/851 [00:08<00:04, 65.47it/s]
1731
  66%|██████▌ | 558/851 [00:08<00:04, 66.51it/s]
1732
  67%|██████▋ | 566/851 [00:08<00:04, 69.07it/s]
1733
  67%|██████▋ | 573/851 [00:08<00:04, 69.29it/s]
1734
  68%|██████▊ | 580/851 [00:08<00:04, 67.14it/s]
1735
  69%|██████▉ | 587/851 [00:08<00:04, 63.62it/s]
1736
  70%|██████▉ | 594/851 [00:08<00:03, 65.03it/s]
1737
  71%|███████ | 601/851 [00:08<00:03, 64.50it/s]
1738
  71%|███████▏ | 608/851 [00:08<00:03, 65.17it/s]
1739
  72%|███████▏ | 615/851 [00:09<00:03, 60.27it/s]
1740
  73%|███████▎ | 623/851 [00:09<00:03, 64.78it/s]
1741
  74%|███████▍ | 630/851 [00:09<00:03, 61.49it/s]
1742
  75%|███████▍ | 638/851 [00:09<00:03, 64.58it/s]
1743
  76%|███████▌ | 645/851 [00:09<00:03, 59.82it/s]
1744
  77%|███████▋ | 653/851 [00:09<00:03, 62.97it/s]
1745
  78%|███████▊ | 660/851 [00:09<00:02, 64.81it/s]
1746
  78%|███████▊ | 667/851 [00:09<00:02, 65.46it/s]
1747
  79%|███████▉ | 674/851 [00:09<00:02, 64.18it/s]
1748
  80%|████████ | 681/851 [00:10<00:02, 65.23it/s]
1749
  81%|████████ | 689/851 [00:10<00:02, 69.23it/s]
1750
  82%|████████▏ | 697/851 [00:10<00:02, 70.21it/s]
1751
  83%|████████▎ | 705/851 [00:10<00:02, 71.66it/s]
1752
  84%|████████▍ | 713/851 [00:10<00:01, 72.76it/s]
1753
  85%|████████▍ | 721/851 [00:10<00:01, 69.62it/s]
1754
  86%|████████▌ | 729/851 [00:10<00:01, 70.75it/s]
1755
  87%|████████▋ | 737/851 [00:10<00:01, 71.16it/s]
1756
  88%|████████▊ | 745/851 [00:10<00:01, 71.47it/s]
1757
  88%|████████▊ | 753/851 [00:11<00:01, 70.42it/s]
1758
  89%|████████▉ | 761/851 [00:11<00:01, 72.99it/s]
1759
  90%|█████████ | 769/851 [00:11<00:01, 67.29it/s]
1760
  91%|█████████ | 776/851 [00:11<00:01, 67.64it/s]
1761
  92%|█████████▏| 783/851 [00:11<00:01, 65.98it/s]
1762
  93%|█████████▎| 790/851 [00:11<00:00, 66.89it/s]
1763
  94%|█████████▎| 797/851 [00:11<00:00, 67.62it/s]
1764
  95%|█████████▍| 805/851 [00:11<00:00, 70.55it/s]
1765
  96%|█████████▌| 813/851 [00:11<00:00, 67.17it/s]
1766
  96%|█████████▋| 820/851 [00:12<00:00, 67.58it/s]
1767
  97%|█████████▋| 828/851 [00:12<00:00, 68.89it/s]
1768
  98%|█████████▊| 835/851 [00:12<00:00, 68.68it/s]
1769
  99%|█████████▉| 842/851 [00:12<00:00, 65.37it/s]
1770
+ _warn_prf(average, modifier, msg_start, len(result))
1771
+
1772
+ ***** eval metrics *****
1773
+ epoch = 9.9971
1774
+ eval_accuracy = 0.856
1775
+ eval_f1 = 0.4631
1776
+ eval_loss = 0.9499
1777
+ eval_precision = 0.3214
1778
+ eval_recall = 0.8278
1779
+ eval_runtime = 0:00:16.16
1780
+ eval_samples = 6807
1781
+ eval_samples_per_second = 421.218
1782
+ eval_steps_per_second = 52.66
1783
+ 05/13/2024 12:32:44 - INFO - __main__ - *** Predict ***
1784
+ [INFO|trainer.py:786] 2024-05-13 12:32:44,481 >> The following columns in the test set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: tokens, ner_tags, id. If tokens, ner_tags, id are not expected by `RobertaForTokenClassification.forward`, you can safely ignore this message.
1785
+ [INFO|trainer.py:3614] 2024-05-13 12:32:44,483 >> ***** Running Prediction *****
1786
+ [INFO|trainer.py:3616] 2024-05-13 12:32:44,484 >> Num examples = 6807
1787
+ [INFO|trainer.py:3619] 2024-05-13 12:32:44,484 >> Batch size = 8
1788
+
1789
  0%| | 0/851 [00:00<?, ?it/s]
1790
  1%| | 10/851 [00:00<00:09, 91.52it/s]
1791
  2%|▏ | 20/851 [00:00<00:10, 77.24it/s]
1792
  3%|▎ | 28/851 [00:00<00:11, 73.46it/s]
1793
  4%|▍ | 36/851 [00:00<00:11, 71.95it/s]
1794
  5%|▌ | 44/851 [00:00<00:11, 72.24it/s]
1795
  6%|▌ | 52/851 [00:00<00:10, 72.93it/s]
1796
  7%|▋ | 60/851 [00:00<00:10, 73.18it/s]
1797
  8%|▊ | 68/851 [00:00<00:11, 67.50it/s]
1798
  9%|▉ | 75/851 [00:01<00:11, 67.33it/s]
1799
  10%|▉ | 83/851 [00:01<00:11, 68.35it/s]
1800
  11%|█ | 91/851 [00:01<00:10, 69.50it/s]
1801
  12%|█▏ | 98/851 [00:01<00:11, 68.01it/s]
1802
  12%|█▏ | 105/851 [00:01<00:10, 68.42it/s]
1803
  13%|█▎ | 112/851 [00:01<00:11, 66.54it/s]
1804
  14%|█▍ | 119/851 [00:01<00:10, 67.31it/s]
1805
  15%|█▍ | 126/851 [00:01<00:11, 63.93it/s]
1806
  16%|█▌ | 133/851 [00:01<00:11, 64.45it/s]
1807
  16%|█▋ | 140/851 [00:02<00:11, 64.55it/s]
1808
  17%|█▋ | 147/851 [00:02<00:10, 65.85it/s]
1809
  18%|█▊ | 154/851 [00:02<00:10, 64.70it/s]
1810
  19%|█▉ | 162/851 [00:02<00:10, 66.76it/s]
1811
  20%|█▉ | 170/851 [00:02<00:10, 67.85it/s]
1812
  21%|██ | 178/851 [00:02<00:09, 69.49it/s]
1813
  22%|██▏ | 186/851 [00:02<00:09, 69.84it/s]
1814
  23%|██▎ | 194/851 [00:02<00:09, 70.57it/s]
1815
  24%|██▍ | 203/851 [00:02<00:08, 72.61it/s]
1816
  25%|██▍ | 211/851 [00:03<00:09, 69.42it/s]
1817
  26%|██▌ | 218/851 [00:03<00:09, 66.85it/s]
1818
  27%|██▋ | 226/851 [00:03<00:09, 68.61it/s]
1819
  27%|██▋ | 234/851 [00:03<00:08, 69.75it/s]
1820
  28%|██▊ | 241/851 [00:03<00:09, 64.31it/s]
1821
  29%|██▉ | 249/851 [00:03<00:09, 66.20it/s]
1822
  30%|███ | 257/851 [00:03<00:08, 69.02it/s]
1823
  31%|███ | 265/851 [00:03<00:08, 71.69it/s]
1824
  32%|███▏ | 273/851 [00:03<00:08, 69.07it/s]
1825
  33%|███▎ | 281/851 [00:04<00:07, 71.84it/s]
1826
  34%|███▍ | 289/851 [00:04<00:08, 69.07it/s]
1827
  35%|███▍ | 297/851 [00:04<00:07, 70.91it/s]
1828
  36%|███▌ | 305/851 [00:04<00:07, 73.25it/s]
1829
  37%|███▋ | 313/851 [00:04<00:07, 68.13it/s]
1830
  38%|███▊ | 322/851 [00:04<00:07, 71.73it/s]
1831
  39%|███▉ | 330/851 [00:04<00:07, 69.46it/s]
1832
  40%|███▉ | 338/851 [00:04<00:07, 69.29it/s]
1833
  41%|████ | 346/851 [00:05<00:07, 69.47it/s]
1834
  41%|████▏ | 353/851 [00:05<00:07, 67.00it/s]
1835
  42%|████▏ | 360/851 [00:05<00:07, 65.48it/s]
1836
  43%|████▎ | 368/851 [00:05<00:07, 66.78it/s]
1837
  44%|████▍ | 375/851 [00:05<00:07, 66.05it/s]
1838
  45%|████▍ | 382/851 [00:05<00:07, 64.23it/s]
1839
  46%|████▌ | 390/851 [00:05<00:06, 67.18it/s]
1840
  47%|████▋ | 398/851 [00:05<00:06, 67.83it/s]
1841
  48%|████▊ | 405/851 [00:05<00:07, 62.69it/s]
1842
  48%|████▊ | 412/851 [00:06<00:06, 63.96it/s]
1843
  49%|████▉ | 420/851 [00:06<00:06, 67.12it/s]
1844
  50%|█████ | 427/851 [00:06<00:06, 64.71it/s]
1845
  51%|█████ | 435/851 [00:06<00:06, 67.20it/s]
1846
  52%|█████▏ | 443/851 [00:06<00:05, 68.92it/s]
1847
  53%|█████▎ | 451/851 [00:06<00:05, 69.97it/s]
1848
  54%|█████▍ | 459/851 [00:06<00:05, 70.79it/s]
1849
  55%|█████▍ | 467/851 [00:06<00:05, 64.99it/s]
1850
  56%|█████▌ | 474/851 [00:06<00:06, 60.17it/s]
1851
  57%|█████▋ | 481/851 [00:07<00:06, 61.07it/s]
1852
  57%|█████▋ | 488/851 [00:07<00:05, 62.68it/s]
1853
  58%|█████▊ | 496/851 [00:07<00:05, 65.90it/s]
1854
  59%|█████▉ | 504/851 [00:07<00:05, 69.35it/s]
1855
  60%|██████ | 512/851 [00:07<00:05, 67.73it/s]
1856
  61%|██████ | 520/851 [00:07<00:04, 68.81it/s]
1857
  62%|██████▏ | 527/851 [00:07<00:05, 62.95it/s]
1858
  63%|██████▎ | 535/851 [00:07<00:04, 65.45it/s]
1859
  64%|██████▍ | 543/851 [00:07<00:04, 67.58it/s]
1860
  65%|██████▍ | 550/851 [00:08<00:04, 65.74it/s]
1861
  65%|██████▌ | 557/851 [00:08<00:04, 66.86it/s]
1862
  66%|██████▋ | 565/851 [00:08<00:04, 70.26it/s]
1863
  67%|██████▋ | 573/851 [00:08<00:03, 70.03it/s]
1864
  68%|██████▊ | 581/851 [00:08<00:03, 68.47it/s]
1865
  69%|██████▉ | 588/851 [00:08<00:04, 64.15it/s]
1866
  70%|███████ | 596/851 [00:08<00:03, 65.33it/s]
1867
  71%|███████ | 604/851 [00:08<00:03, 66.81it/s]
1868
  72%|███████▏ | 611/851 [00:09<00:03, 66.06it/s]
1869
  73%|███████▎ | 618/851 [00:09<00:03, 61.98it/s]
1870
  73%|███████▎ | 625/851 [00:09<00:03, 63.84it/s]
1871
  74%|███████▍ | 632/851 [00:09<00:03, 62.09it/s]
1872
  75%|███████▌ | 639/851 [00:09<00:03, 62.91it/s]
1873
  76%|███████▌ | 646/851 [00:09<00:03, 60.42it/s]
1874
  77%|███████▋ | 654/851 [00:09<00:03, 63.78it/s]
1875
  78%|███████▊ | 661/851 [00:09<00:02, 64.94it/s]
1876
  78%|███████▊ | 668/851 [00:09<00:02, 66.04it/s]
1877
  79%|███████▉ | 675/851 [00:10<00:02, 64.72it/s]
1878
  80%|████████ | 682/851 [00:10<00:02, 65.98it/s]
1879
  81%|████████ | 690/851 [00:10<00:02, 69.12it/s]
1880
  82%|████████▏ | 698/851 [00:10<00:02, 70.40it/s]
1881
  83%|████████▎ | 706/851 [00:10<00:02, 71.97it/s]
1882
  84%|████████▍ | 714/851 [00:10<00:01, 71.53it/s]
1883
  85%|████████▍ | 722/851 [00:10<00:01, 69.93it/s]
1884
  86%|████████▌ | 730/851 [00:10<00:01, 71.53it/s]
1885
  87%|████████▋ | 738/851 [00:10<00:01, 72.49it/s]
1886
  88%|████████▊ | 746/851 [00:11<00:01, 71.54it/s]
1887
  89%|████████▊ | 754/851 [00:11<00:01, 71.10it/s]
1888
  90%|████████▉ | 762/851 [00:11<00:01, 72.35it/s]
1889
  90%|█████████ | 770/851 [00:11<00:01, 67.47it/s]
1890
  91%|█████████▏| 777/851 [00:11<00:01, 68.02it/s]
1891
  92%|█████████▏| 784/851 [00:11<00:01, 66.04it/s]
1892
  93%|█████████▎| 791/851 [00:11<00:00, 66.57it/s]
1893
  94%|█████████▍| 798/851 [00:11<00:00, 67.52it/s]
1894
  95%|█████████▍| 806/851 [00:11<00:00, 69.97it/s]
1895
  96%|█████████▌| 814/851 [00:12<00:00, 67.07it/s]
1896
  97%|█████████▋| 822/851 [00:12<00:00, 68.50it/s]
1897
  97%|█████████▋| 829/851 [00:12<00:00, 68.73it/s]
1898
  98%|█████████▊| 836/851 [00:12<00:00, 68.89it/s]
1899
  99%|█████████▉| 843/851 [00:12<00:00, 64.90it/s]
1900
+ [INFO|trainer.py:3305] 2024-05-13 12:33:01,240 >> Saving model checkpoint to /content/dissertation/scripts/ner/output
1901
+ [INFO|configuration_utils.py:471] 2024-05-13 12:33:01,241 >> Configuration saved in /content/dissertation/scripts/ner/output/config.json
1902
+ [INFO|modeling_utils.py:2590] 2024-05-13 12:33:02,436 >> Model weights saved in /content/dissertation/scripts/ner/output/model.safetensors
1903
+ [INFO|tokenization_utils_base.py:2488] 2024-05-13 12:33:02,437 >> tokenizer config file saved in /content/dissertation/scripts/ner/output/tokenizer_config.json
1904
+ [INFO|tokenization_utils_base.py:2497] 2024-05-13 12:33:02,437 >> Special tokens file saved in /content/dissertation/scripts/ner/output/special_tokens_map.json
1905
+ ***** predict metrics *****
1906
+ predict_accuracy = 0.856
1907
+ predict_f1 = 0.4631
1908
+ predict_loss = 0.9499
1909
+ predict_precision = 0.3214
1910
+ predict_recall = 0.8278
1911
+ predict_runtime = 0:00:16.43
1912
+ predict_samples_per_second = 414.134
1913
+ predict_steps_per_second = 51.774
1914
+
train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 9.997061416397296,
3
+ "total_flos": 6700722040732752.0,
4
+ "train_loss": 0.08913132946046923,
5
+ "train_runtime": 3311.208,
6
+ "train_samples": 27224,
7
+ "train_samples_per_second": 82.218,
8
+ "train_steps_per_second": 5.137
9
+ }
trainer_state.json ADDED
@@ -0,0 +1,388 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.46305870034683594,
3
+ "best_model_checkpoint": "/content/dissertation/scripts/ner/output/checkpoint-11910",
4
+ "epoch": 9.997061416397296,
5
+ "eval_steps": 500,
6
+ "global_step": 17010,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.29385836027034967,
13
+ "grad_norm": 1.9668159484863281,
14
+ "learning_rate": 4.853027630805409e-05,
15
+ "loss": 0.4174,
16
+ "step": 500
17
+ },
18
+ {
19
+ "epoch": 0.5877167205406993,
20
+ "grad_norm": 3.246731758117676,
21
+ "learning_rate": 4.7060552616108174e-05,
22
+ "loss": 0.2765,
23
+ "step": 1000
24
+ },
25
+ {
26
+ "epoch": 0.8815750808110491,
27
+ "grad_norm": 2.936720609664917,
28
+ "learning_rate": 4.559082892416226e-05,
29
+ "loss": 0.2596,
30
+ "step": 1500
31
+ },
32
+ {
33
+ "epoch": 0.9997061416397296,
34
+ "eval_accuracy": 0.8358953937837708,
35
+ "eval_f1": 0.3926881971617123,
36
+ "eval_loss": 0.431875616312027,
37
+ "eval_precision": 0.26165460347108727,
38
+ "eval_recall": 0.786616752456715,
39
+ "eval_runtime": 16.2639,
40
+ "eval_samples_per_second": 418.535,
41
+ "eval_steps_per_second": 52.325,
42
+ "step": 1701
43
+ },
44
+ {
45
+ "epoch": 1.1754334410813987,
46
+ "grad_norm": 2.302980661392212,
47
+ "learning_rate": 4.4121105232216346e-05,
48
+ "loss": 0.2135,
49
+ "step": 2000
50
+ },
51
+ {
52
+ "epoch": 1.4692918013517484,
53
+ "grad_norm": 3.7150967121124268,
54
+ "learning_rate": 4.265138154027043e-05,
55
+ "loss": 0.1839,
56
+ "step": 2500
57
+ },
58
+ {
59
+ "epoch": 1.7631501616220981,
60
+ "grad_norm": 1.2019191980361938,
61
+ "learning_rate": 4.118165784832452e-05,
62
+ "loss": 0.1853,
63
+ "step": 3000
64
+ },
65
+ {
66
+ "epoch": 2.0,
67
+ "eval_accuracy": 0.8644911466450625,
68
+ "eval_f1": 0.44846535316981634,
69
+ "eval_loss": 0.3841294050216675,
70
+ "eval_precision": 0.3142374154770849,
71
+ "eval_recall": 0.7828731867103416,
72
+ "eval_runtime": 16.2484,
73
+ "eval_samples_per_second": 418.933,
74
+ "eval_steps_per_second": 52.374,
75
+ "step": 3403
76
+ },
77
+ {
78
+ "epoch": 2.0570085218924477,
79
+ "grad_norm": 2.090308904647827,
80
+ "learning_rate": 3.971193415637861e-05,
81
+ "loss": 0.1684,
82
+ "step": 3500
83
+ },
84
+ {
85
+ "epoch": 2.3508668821627974,
86
+ "grad_norm": 0.9995286464691162,
87
+ "learning_rate": 3.824221046443269e-05,
88
+ "loss": 0.1222,
89
+ "step": 4000
90
+ },
91
+ {
92
+ "epoch": 2.644725242433147,
93
+ "grad_norm": 1.3526027202606201,
94
+ "learning_rate": 3.677248677248677e-05,
95
+ "loss": 0.1277,
96
+ "step": 4500
97
+ },
98
+ {
99
+ "epoch": 2.938583602703497,
100
+ "grad_norm": 2.2816500663757324,
101
+ "learning_rate": 3.530276308054086e-05,
102
+ "loss": 0.1254,
103
+ "step": 5000
104
+ },
105
+ {
106
+ "epoch": 2.9997061416397295,
107
+ "eval_accuracy": 0.843626202360437,
108
+ "eval_f1": 0.4435178651613316,
109
+ "eval_loss": 0.6409665942192078,
110
+ "eval_precision": 0.30552364118426867,
111
+ "eval_recall": 0.8088441740758072,
112
+ "eval_runtime": 16.2306,
113
+ "eval_samples_per_second": 419.394,
114
+ "eval_steps_per_second": 52.432,
115
+ "step": 5104
116
+ },
117
+ {
118
+ "epoch": 3.2324419629738466,
119
+ "grad_norm": 0.6440290808677673,
120
+ "learning_rate": 3.3833039388594945e-05,
121
+ "loss": 0.0929,
122
+ "step": 5500
123
+ },
124
+ {
125
+ "epoch": 3.5263003232441963,
126
+ "grad_norm": 3.7906153202056885,
127
+ "learning_rate": 3.2363315696649034e-05,
128
+ "loss": 0.0896,
129
+ "step": 6000
130
+ },
131
+ {
132
+ "epoch": 3.820158683514546,
133
+ "grad_norm": 1.0953032970428467,
134
+ "learning_rate": 3.0893592004703116e-05,
135
+ "loss": 0.0823,
136
+ "step": 6500
137
+ },
138
+ {
139
+ "epoch": 4.0,
140
+ "eval_accuracy": 0.843598739452527,
141
+ "eval_f1": 0.4336244267135767,
142
+ "eval_loss": 0.7241775393486023,
143
+ "eval_precision": 0.2964012711500472,
144
+ "eval_recall": 0.8074403369209172,
145
+ "eval_runtime": 16.5937,
146
+ "eval_samples_per_second": 410.216,
147
+ "eval_steps_per_second": 51.285,
148
+ "step": 6806
149
+ },
150
+ {
151
+ "epoch": 4.114017043784895,
152
+ "grad_norm": 1.7823286056518555,
153
+ "learning_rate": 2.9423868312757202e-05,
154
+ "loss": 0.0761,
155
+ "step": 7000
156
+ },
157
+ {
158
+ "epoch": 4.407875404055245,
159
+ "grad_norm": 1.165720820426941,
160
+ "learning_rate": 2.795414462081129e-05,
161
+ "loss": 0.0603,
162
+ "step": 7500
163
+ },
164
+ {
165
+ "epoch": 4.701733764325595,
166
+ "grad_norm": 4.1130452156066895,
167
+ "learning_rate": 2.648442092886537e-05,
168
+ "loss": 0.0589,
169
+ "step": 8000
170
+ },
171
+ {
172
+ "epoch": 4.9955921245959445,
173
+ "grad_norm": 0.3957385718822479,
174
+ "learning_rate": 2.501469723691946e-05,
175
+ "loss": 0.0597,
176
+ "step": 8500
177
+ },
178
+ {
179
+ "epoch": 4.99970614163973,
180
+ "eval_accuracy": 0.8501829716239504,
181
+ "eval_f1": 0.4494872643069798,
182
+ "eval_loss": 0.7755796909332275,
183
+ "eval_precision": 0.3133474771700028,
184
+ "eval_recall": 0.7948058025269069,
185
+ "eval_runtime": 16.1833,
186
+ "eval_samples_per_second": 420.619,
187
+ "eval_steps_per_second": 52.585,
188
+ "step": 8507
189
+ },
190
+ {
191
+ "epoch": 5.289450484866294,
192
+ "grad_norm": 1.0087623596191406,
193
+ "learning_rate": 2.3544973544973546e-05,
194
+ "loss": 0.0423,
195
+ "step": 9000
196
+ },
197
+ {
198
+ "epoch": 5.583308845136644,
199
+ "grad_norm": 2.162200450897217,
200
+ "learning_rate": 2.2075249853027632e-05,
201
+ "loss": 0.043,
202
+ "step": 9500
203
+ },
204
+ {
205
+ "epoch": 5.877167205406994,
206
+ "grad_norm": 1.1820895671844482,
207
+ "learning_rate": 2.0605526161081718e-05,
208
+ "loss": 0.0446,
209
+ "step": 10000
210
+ },
211
+ {
212
+ "epoch": 6.0,
213
+ "eval_accuracy": 0.8483223596130476,
214
+ "eval_f1": 0.4512908099586152,
215
+ "eval_loss": 0.8560824394226074,
216
+ "eval_precision": 0.31372728103023106,
217
+ "eval_recall": 0.8036967711745437,
218
+ "eval_runtime": 16.2581,
219
+ "eval_samples_per_second": 418.684,
220
+ "eval_steps_per_second": 52.343,
221
+ "step": 10209
222
+ },
223
+ {
224
+ "epoch": 6.171025565677343,
225
+ "grad_norm": 2.190476894378662,
226
+ "learning_rate": 1.91358024691358e-05,
227
+ "loss": 0.0333,
228
+ "step": 10500
229
+ },
230
+ {
231
+ "epoch": 6.464883925947693,
232
+ "grad_norm": 1.5180469751358032,
233
+ "learning_rate": 1.766607877718989e-05,
234
+ "loss": 0.0333,
235
+ "step": 11000
236
+ },
237
+ {
238
+ "epoch": 6.758742286218043,
239
+ "grad_norm": 1.4279770851135254,
240
+ "learning_rate": 1.6196355085243976e-05,
241
+ "loss": 0.0325,
242
+ "step": 11500
243
+ },
244
+ {
245
+ "epoch": 6.99970614163973,
246
+ "eval_accuracy": 0.8559776451929613,
247
+ "eval_f1": 0.46305870034683594,
248
+ "eval_loss": 0.9498798847198486,
249
+ "eval_precision": 0.32143181611701643,
250
+ "eval_recall": 0.8277959756668226,
251
+ "eval_runtime": 16.1891,
252
+ "eval_samples_per_second": 420.467,
253
+ "eval_steps_per_second": 52.566,
254
+ "step": 11910
255
+ },
256
+ {
257
+ "epoch": 7.052600646488393,
258
+ "grad_norm": 0.6558970212936401,
259
+ "learning_rate": 1.472663139329806e-05,
260
+ "loss": 0.0289,
261
+ "step": 12000
262
+ },
263
+ {
264
+ "epoch": 7.346459006758742,
265
+ "grad_norm": 0.26189878582954407,
266
+ "learning_rate": 1.3256907701352148e-05,
267
+ "loss": 0.0224,
268
+ "step": 12500
269
+ },
270
+ {
271
+ "epoch": 7.640317367029092,
272
+ "grad_norm": 1.370686650276184,
273
+ "learning_rate": 1.1787184009406232e-05,
274
+ "loss": 0.0231,
275
+ "step": 13000
276
+ },
277
+ {
278
+ "epoch": 7.934175727299442,
279
+ "grad_norm": 0.36619672179222107,
280
+ "learning_rate": 1.0317460317460318e-05,
281
+ "loss": 0.022,
282
+ "step": 13500
283
+ },
284
+ {
285
+ "epoch": 8.0,
286
+ "eval_accuracy": 0.851041187496138,
287
+ "eval_f1": 0.45333161323614785,
288
+ "eval_loss": 1.045249104499817,
289
+ "eval_precision": 0.3129397096802921,
290
+ "eval_recall": 0.8221806270472625,
291
+ "eval_runtime": 16.479,
292
+ "eval_samples_per_second": 413.071,
293
+ "eval_steps_per_second": 51.641,
294
+ "step": 13612
295
+ },
296
+ {
297
+ "epoch": 8.22803408756979,
298
+ "grad_norm": 1.0557399988174438,
299
+ "learning_rate": 8.847736625514404e-06,
300
+ "loss": 0.0164,
301
+ "step": 14000
302
+ },
303
+ {
304
+ "epoch": 8.521892447840141,
305
+ "grad_norm": 4.32920503616333,
306
+ "learning_rate": 7.37801293356849e-06,
307
+ "loss": 0.0164,
308
+ "step": 14500
309
+ },
310
+ {
311
+ "epoch": 8.81575080811049,
312
+ "grad_norm": 0.1674884408712387,
313
+ "learning_rate": 5.908289241622575e-06,
314
+ "loss": 0.017,
315
+ "step": 15000
316
+ },
317
+ {
318
+ "epoch": 8.999706141639729,
319
+ "eval_accuracy": 0.8523937357107058,
320
+ "eval_f1": 0.45305514157973176,
321
+ "eval_loss": 1.102479100227356,
322
+ "eval_precision": 0.31328972130119187,
323
+ "eval_recall": 0.8179691155825924,
324
+ "eval_runtime": 16.1914,
325
+ "eval_samples_per_second": 420.409,
326
+ "eval_steps_per_second": 52.559,
327
+ "step": 15313
328
+ },
329
+ {
330
+ "epoch": 9.10960916838084,
331
+ "grad_norm": 3.3734261989593506,
332
+ "learning_rate": 4.438565549676661e-06,
333
+ "loss": 0.0171,
334
+ "step": 15500
335
+ },
336
+ {
337
+ "epoch": 9.40346752865119,
338
+ "grad_norm": 0.19828377664089203,
339
+ "learning_rate": 2.9688418577307467e-06,
340
+ "loss": 0.013,
341
+ "step": 16000
342
+ },
343
+ {
344
+ "epoch": 9.69732588892154,
345
+ "grad_norm": 1.491190791130066,
346
+ "learning_rate": 1.4991181657848325e-06,
347
+ "loss": 0.0133,
348
+ "step": 16500
349
+ },
350
+ {
351
+ "epoch": 9.991184249191889,
352
+ "grad_norm": 2.5641109943389893,
353
+ "learning_rate": 2.9394473838918286e-08,
354
+ "loss": 0.0135,
355
+ "step": 17000
356
+ },
357
+ {
358
+ "epoch": 9.997061416397296,
359
+ "eval_accuracy": 0.8526409018818958,
360
+ "eval_f1": 0.45504563402161957,
361
+ "eval_loss": 1.1188451051712036,
362
+ "eval_precision": 0.3145413870246085,
363
+ "eval_recall": 0.8224145999064109,
364
+ "eval_runtime": 16.3165,
365
+ "eval_samples_per_second": 417.185,
366
+ "eval_steps_per_second": 52.156,
367
+ "step": 17010
368
+ },
369
+ {
370
+ "epoch": 9.997061416397296,
371
+ "step": 17010,
372
+ "total_flos": 6700722040732752.0,
373
+ "train_loss": 0.08913132946046923,
374
+ "train_runtime": 3311.208,
375
+ "train_samples_per_second": 82.218,
376
+ "train_steps_per_second": 5.137
377
+ }
378
+ ],
379
+ "logging_steps": 500,
380
+ "max_steps": 17010,
381
+ "num_input_tokens_seen": 0,
382
+ "num_train_epochs": 10,
383
+ "save_steps": 500,
384
+ "total_flos": 6700722040732752.0,
385
+ "train_batch_size": 4,
386
+ "trial_name": null,
387
+ "trial_params": null
388
+ }