napsternxg commited on
Commit
8902673
1 Parent(s): ad79c0e

End of training

Browse files
README.md ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: napsternxg/gte-small-L3-ingredient-v2
3
+ tags:
4
+ - generated_from_trainer
5
+ datasets:
6
+ - nyt_ingredients
7
+ model-index:
8
+ - name: nyt_ingredients-crf-tagger-gte-small-L3-ingredient-v2
9
+ results: []
10
+ ---
11
+
12
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
13
+ should probably proofread and complete it, then remove this comment. -->
14
+
15
+ # nyt_ingredients-crf-tagger-gte-small-L3-ingredient-v2
16
+
17
+ This model is a fine-tuned version of [napsternxg/gte-small-L3-ingredient-v2](https://huggingface.co/napsternxg/gte-small-L3-ingredient-v2) on the nyt_ingredients dataset.
18
+ It achieves the following results on the evaluation set:
19
+ - Loss: 1.6099
20
+ - Comment: {'precision': 0.042328042328042326, 'recall': 0.015407896546980328, 'f1': 0.022592032274331823, 'number': 7269}
21
+ - Name: {'precision': 0.1018981018981019, 'recall': 0.03297769156159069, 'f1': 0.049829018075232046, 'number': 9279}
22
+ - Qty: {'precision': 0.15665304220758594, 'recall': 0.9842980705256155, 'f1': 0.27028903423831624, 'number': 7515}
23
+ - Range End: {'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 90}
24
+ - Unit: {'precision': 0.5, 'recall': 0.00016485328058028355, 'f1': 0.00032959789057350036, 'number': 6066}
25
+ - Overall Precision: 0.1478
26
+ - Overall Recall: 0.2586
27
+ - Overall F1: 0.1881
28
+ - Overall Accuracy: 0.1721
29
+
30
+ ## Model description
31
+
32
+ More information needed
33
+
34
+ ## Intended uses & limitations
35
+
36
+ More information needed
37
+
38
+ ## Training and evaluation data
39
+
40
+ More information needed
41
+
42
+ ## Training procedure
43
+
44
+ ### Training hyperparameters
45
+
46
+ The following hyperparameters were used during training:
47
+ - learning_rate: 5e-05
48
+ - train_batch_size: 32
49
+ - eval_batch_size: 32
50
+ - seed: 42
51
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
52
+ - lr_scheduler_type: linear
53
+ - num_epochs: 3
54
+
55
+ ### Training results
56
+
57
+ | Training Loss | Epoch | Step | Validation Loss | Comment | Name | Qty | Range End | Unit | Overall Precision | Overall Recall | Overall F1 | Overall Accuracy |
58
+ |:-------------:|:-----:|:-----:|:---------------:|:---------------------------------------------------------------------------------------------------------------:|:-------------------------------------------------------------------------------------------------------------:|:-----------------------------------------------------------------------------------------------------------:|:----------------------------------------------------------:|:------------------------------------------------------------:|:-----------------:|:--------------:|:----------:|:----------------:|
59
+ | 5.3925 | 0.19 | 1000 | 4.7348 | {'precision': 0.040214477211796246, 'recall': 0.010744985673352435, 'f1': 0.016958733747880157, 'number': 1396} | {'precision': 0.10684931506849316, 'recall': 0.04377104377104377, 'f1': 0.06210191082802548, 'number': 1782} | {'precision': 0.15598917211820437, 'recall': 0.987152034261242, 'f1': 0.26940683744034283, 'number': 1401} | {'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 15} | {'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 1093} | 0.1481 | 0.2595 | 0.1886 | 0.1706 |
60
+ | 4.0783 | 0.38 | 2000 | 3.8413 | {'precision': 0.038560411311053984, 'recall': 0.010744985673352435, 'f1': 0.01680672268907563, 'number': 1396} | {'precision': 0.10653409090909091, 'recall': 0.04208754208754209, 'f1': 0.06033789219629928, 'number': 1782} | {'precision': 0.15589396503102088, 'recall': 0.9864382583868665, 'f1': 0.2692382622248198, 'number': 1401} | {'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 15} | {'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 1093} | 0.1478 | 0.2588 | 0.1882 | 0.1701 |
61
+ | 3.6055 | 0.57 | 3000 | 3.3592 | {'precision': 0.038461538461538464, 'recall': 0.012177650429799427, 'f1': 0.018498367791077257, 'number': 1396} | {'precision': 0.10407876230661041, 'recall': 0.04152637485970819, 'f1': 0.05936622543120738, 'number': 1782} | {'precision': 0.1565937181086291, 'recall': 0.9857244825124911, 'f1': 0.2702544031311155, 'number': 1401} | {'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 15} | {'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 1093} | 0.1476 | 0.2588 | 0.1880 | 0.1708 |
62
+ | 3.2433 | 0.76 | 4000 | 3.0284 | {'precision': 0.03762376237623762, 'recall': 0.013610315186246419, 'f1': 0.019989479221462388, 'number': 1396} | {'precision': 0.10084033613445378, 'recall': 0.04040404040404041, 'f1': 0.057692307692307696, 'number': 1782} | {'precision': 0.1578404401650619, 'recall': 0.9828693790149893, 'f1': 0.272, 'number': 1401} | {'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 15} | {'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 1093} | 0.1476 | 0.2581 | 0.1878 | 0.1722 |
63
+ | 2.8283 | 0.95 | 5000 | 2.7775 | {'precision': 0.034545454545454546, 'recall': 0.013610315186246419, 'f1': 0.019527235354573486, 'number': 1396} | {'precision': 0.10086455331412104, 'recall': 0.03928170594837262, 'f1': 0.05654281098546042, 'number': 1782} | {'precision': 0.1570031832651205, 'recall': 0.9857244825124911, 'f1': 0.2708639796018437, 'number': 1401} | {'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 15} | {'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 1093} | 0.1464 | 0.2585 | 0.1869 | 0.1707 |
64
+ | 2.5028 | 1.14 | 6000 | 2.4982 | {'precision': 0.03616636528028933, 'recall': 0.014326647564469915, 'f1': 0.02052334530528476, 'number': 1396} | {'precision': 0.10099573257467995, 'recall': 0.03984287317620651, 'f1': 0.057142857142857134, 'number': 1782} | {'precision': 0.157134735645253, 'recall': 0.9864382583868665, 'f1': 0.2710867006669282, 'number': 1401} | {'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 15} | {'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 1093} | 0.1466 | 0.2590 | 0.1872 | 0.1716 |
65
+ | 2.3731 | 1.33 | 7000 | 2.3250 | {'precision': 0.037698412698412696, 'recall': 0.013610315186246419, 'f1': 0.02, 'number': 1396} | {'precision': 0.09957924263674614, 'recall': 0.03984287317620651, 'f1': 0.05691382765531061, 'number': 1782} | {'precision': 0.15701254275940707, 'recall': 0.9828693790149893, 'f1': 0.27076983580768854, 'number': 1401} | {'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 15} | {'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 1093} | 0.1469 | 0.2580 | 0.1872 | 0.1711 |
66
+ | 2.1459 | 1.52 | 8000 | 2.1464 | {'precision': 0.04007285974499089, 'recall': 0.015759312320916905, 'f1': 0.02262210796915167, 'number': 1396} | {'precision': 0.10235131396957123, 'recall': 0.04152637485970819, 'f1': 0.0590818363273453, 'number': 1782} | {'precision': 0.15672235481304694, 'recall': 0.9842969307637401, 'f1': 0.2703921568627451, 'number': 1401} | {'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 15} | {'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 1093} | 0.1465 | 0.2594 | 0.1872 | 0.1714 |
67
+ | 1.9918 | 1.71 | 9000 | 1.9713 | {'precision': 0.04070796460176991, 'recall': 0.0164756446991404, 'f1': 0.023457419683834777, 'number': 1396} | {'precision': 0.09900990099009901, 'recall': 0.03928170594837262, 'f1': 0.05624748895138609, 'number': 1782} | {'precision': 0.15779205875602478, 'recall': 0.9814418272662384, 'f1': 0.2718734552644587, 'number': 1401} | {'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 15} | {'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 1093} | 0.1470 | 0.2581 | 0.1873 | 0.1728 |
68
+ | 1.954 | 1.9 | 10000 | 1.8380 | {'precision': 0.04, 'recall': 0.0164756446991404, 'f1': 0.02333840690005073, 'number': 1396} | {'precision': 0.10164835164835165, 'recall': 0.04152637485970819, 'f1': 0.058964143426294816, 'number': 1782} | {'precision': 0.1576777739608382, 'recall': 0.9828693790149893, 'f1': 0.27175843694493784, 'number': 1401} | {'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 15} | {'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 1093} | 0.1469 | 0.2592 | 0.1875 | 0.1726 |
69
+ | 1.6977 | 2.09 | 11000 | 1.7403 | {'precision': 0.04013377926421405, 'recall': 0.017191977077363897, 'f1': 0.02407221664994985, 'number': 1396} | {'precision': 0.10339943342776203, 'recall': 0.0409652076318743, 'f1': 0.058681672025723476, 'number': 1782} | {'precision': 0.15731749114589283, 'recall': 0.9828693790149893, 'f1': 0.27122316328540474, 'number': 1401} | {'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 15} | {'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 1093} | 0.1466 | 0.2592 | 0.1872 | 0.1715 |
70
+ | 1.5499 | 2.28 | 12000 | 1.6569 | {'precision': 0.03861788617886179, 'recall': 0.013610315186246419, 'f1': 0.0201271186440678, 'number': 1396} | {'precision': 0.10152990264255911, 'recall': 0.0409652076318743, 'f1': 0.058376649340263896, 'number': 1782} | {'precision': 0.15768394553152534, 'recall': 0.9835831548893648, 'f1': 0.27179487179487183, 'number': 1401} | {'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 15} | {'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 1093} | 0.1477 | 0.2585 | 0.1880 | 0.1733 |
71
+ | 1.5793 | 2.47 | 13000 | 1.5988 | {'precision': 0.033582089552238806, 'recall': 0.012893982808022923, 'f1': 0.018633540372670808, 'number': 1396} | {'precision': 0.10198300283286119, 'recall': 0.04040404040404041, 'f1': 0.057877813504823156, 'number': 1782} | {'precision': 0.1576962632841961, 'recall': 0.9850107066381156, 'f1': 0.27186761229314416, 'number': 1401} | {'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 15} | {'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 1093} | 0.1471 | 0.2585 | 0.1875 | 0.1736 |
72
+ | 1.405 | 2.66 | 14000 | 1.5497 | {'precision': 0.03512396694214876, 'recall': 0.012177650429799427, 'f1': 0.018085106382978725, 'number': 1396} | {'precision': 0.10198300283286119, 'recall': 0.04040404040404041, 'f1': 0.057877813504823156, 'number': 1782} | {'precision': 0.15643407340280924, 'recall': 0.9857244825124911, 'f1': 0.2700166194153876, 'number': 1401} | {'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 15} | {'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 1093} | 0.1467 | 0.2585 | 0.1872 | 0.1713 |
73
+ | 1.4509 | 2.85 | 15000 | 1.5233 | {'precision': 0.03985507246376811, 'recall': 0.015759312320916905, 'f1': 0.022587268993839834, 'number': 1396} | {'precision': 0.10198300283286119, 'recall': 0.04040404040404041, 'f1': 0.057877813504823156, 'number': 1782} | {'precision': 0.1578525641025641, 'recall': 0.9842969307637401, 'f1': 0.2720726053072901, 'number': 1401} | {'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 15} | {'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 1093} | 0.1474 | 0.2590 | 0.1879 | 0.1727 |
74
+
75
+
76
+ ### Framework versions
77
+
78
+ - Transformers 4.34.0
79
+ - Pytorch 2.0.1+cu118
80
+ - Datasets 2.14.5
81
+ - Tokenizers 0.14.1
added_tokens.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "[CLS]": 101,
3
+ "[MASK]": 103,
4
+ "[PAD]": 0,
5
+ "[SEP]": 102,
6
+ "[UNK]": 100
7
+ }
all_results.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "eval_COMMENT": {
4
+ "f1": 0.022592032274331823,
5
+ "number": 7269,
6
+ "precision": 0.042328042328042326,
7
+ "recall": 0.015407896546980328
8
+ },
9
+ "eval_NAME": {
10
+ "f1": 0.049829018075232046,
11
+ "number": 9279,
12
+ "precision": 0.1018981018981019,
13
+ "recall": 0.03297769156159069
14
+ },
15
+ "eval_QTY": {
16
+ "f1": 0.27028903423831624,
17
+ "number": 7515,
18
+ "precision": 0.15665304220758594,
19
+ "recall": 0.9842980705256155
20
+ },
21
+ "eval_RANGE_END": {
22
+ "f1": 0.0,
23
+ "number": 90,
24
+ "precision": 0.0,
25
+ "recall": 0.0
26
+ },
27
+ "eval_UNIT": {
28
+ "f1": 0.00032959789057350036,
29
+ "number": 6066,
30
+ "precision": 0.5,
31
+ "recall": 0.00016485328058028355
32
+ },
33
+ "eval_loss": 1.6098747253417969,
34
+ "eval_overall_accuracy": 0.17205696773030468,
35
+ "eval_overall_f1": 0.1881356136191313,
36
+ "eval_overall_precision": 0.14783431057310384,
37
+ "eval_overall_recall": 0.25864522320394456,
38
+ "eval_runtime": 13.1011,
39
+ "eval_samples_per_second": 683.452,
40
+ "eval_steps_per_second": 21.372
41
+ }
config.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "napsternxg/gte-small-L3-ingredient-v2",
3
+ "architectures": [
4
+ "PretrainedCRFModel"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 384,
11
+ "id2label": {
12
+ "0": "O",
13
+ "1": "B-COMMENT",
14
+ "2": "I-COMMENT",
15
+ "3": "B-NAME",
16
+ "4": "I-NAME",
17
+ "5": "B-RANGE_END",
18
+ "6": "I-RANGE_END",
19
+ "7": "B-QTY",
20
+ "8": "I-QTY",
21
+ "9": "B-UNIT",
22
+ "10": "I-UNIT"
23
+ },
24
+ "initializer_range": 0.02,
25
+ "intermediate_size": 1536,
26
+ "label2id": {
27
+ "B-COMMENT": 1,
28
+ "B-NAME": 3,
29
+ "B-QTY": 7,
30
+ "B-RANGE_END": 5,
31
+ "B-UNIT": 9,
32
+ "I-COMMENT": 2,
33
+ "I-NAME": 4,
34
+ "I-QTY": 8,
35
+ "I-RANGE_END": 6,
36
+ "I-UNIT": 10,
37
+ "O": 0
38
+ },
39
+ "layer_norm_eps": 1e-12,
40
+ "max_position_embeddings": 512,
41
+ "model_type": "bert",
42
+ "num_attention_heads": 12,
43
+ "num_hidden_layers": 3,
44
+ "pad_token_id": 0,
45
+ "position_embedding_type": "absolute",
46
+ "torch_dtype": "float32",
47
+ "transformers_version": "4.34.0",
48
+ "type_vocab_size": 2,
49
+ "use_cache": true,
50
+ "vocab_size": 30522
51
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a39bba0dcf8bd261a2deee2815a6746567d9daeca9426ae6e4f9202475e2b6dc
3
+ size 69005087
special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
test_results.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "eval_COMMENT": {
4
+ "f1": 0.022592032274331823,
5
+ "number": 7269,
6
+ "precision": 0.042328042328042326,
7
+ "recall": 0.015407896546980328
8
+ },
9
+ "eval_NAME": {
10
+ "f1": 0.049829018075232046,
11
+ "number": 9279,
12
+ "precision": 0.1018981018981019,
13
+ "recall": 0.03297769156159069
14
+ },
15
+ "eval_QTY": {
16
+ "f1": 0.27028903423831624,
17
+ "number": 7515,
18
+ "precision": 0.15665304220758594,
19
+ "recall": 0.9842980705256155
20
+ },
21
+ "eval_RANGE_END": {
22
+ "f1": 0.0,
23
+ "number": 90,
24
+ "precision": 0.0,
25
+ "recall": 0.0
26
+ },
27
+ "eval_UNIT": {
28
+ "f1": 0.00032959789057350036,
29
+ "number": 6066,
30
+ "precision": 0.5,
31
+ "recall": 0.00016485328058028355
32
+ },
33
+ "eval_loss": 1.6098747253417969,
34
+ "eval_overall_accuracy": 0.17205696773030468,
35
+ "eval_overall_f1": 0.1881356136191313,
36
+ "eval_overall_precision": 0.14783431057310384,
37
+ "eval_overall_recall": 0.25864522320394456,
38
+ "eval_runtime": 13.1011,
39
+ "eval_samples_per_second": 683.452,
40
+ "eval_steps_per_second": 21.372
41
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "additional_special_tokens": [],
45
+ "clean_up_tokenization_spaces": true,
46
+ "cls_token": "[CLS]",
47
+ "do_basic_tokenize": true,
48
+ "do_lower_case": true,
49
+ "mask_token": "[MASK]",
50
+ "max_length": 128,
51
+ "model_max_length": 1000000000000000019884624838656,
52
+ "never_split": null,
53
+ "pad_to_multiple_of": null,
54
+ "pad_token": "[PAD]",
55
+ "pad_token_type_id": 0,
56
+ "padding_side": "right",
57
+ "sep_token": "[SEP]",
58
+ "stride": 0,
59
+ "strip_accents": null,
60
+ "tokenize_chinese_chars": true,
61
+ "tokenizer_class": "BertTokenizer",
62
+ "truncation_side": "right",
63
+ "truncation_strategy": "longest_first",
64
+ "unk_token": "[UNK]"
65
+ }
train_results.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "eval_COMMENT": {
4
+ "f1": 0.02639274279615795,
5
+ "number": 135123,
6
+ "precision": 0.04730569849073206,
7
+ "recall": 0.018301843505546797
8
+ },
9
+ "eval_NAME": {
10
+ "f1": 0.05007133958937708,
11
+ "number": 174739,
12
+ "precision": 0.10144978165938864,
13
+ "recall": 0.033238143745815185
14
+ },
15
+ "eval_QTY": {
16
+ "f1": 0.27320922349688087,
17
+ "number": 141548,
18
+ "precision": 0.15865637377387948,
19
+ "recall": 0.9828326786673072
20
+ },
21
+ "eval_RANGE_END": {
22
+ "f1": 0.0,
23
+ "number": 1759,
24
+ "precision": 0.0,
25
+ "recall": 0.0
26
+ },
27
+ "eval_UNIT": {
28
+ "f1": 0.0003348076618090187,
29
+ "number": 113394,
30
+ "precision": 0.18269230769230768,
31
+ "recall": 0.00016755736635095333
32
+ },
33
+ "eval_loss": 1.2795538902282715,
34
+ "eval_overall_accuracy": 0.17505487764888336,
35
+ "eval_overall_f1": 0.18984382294138288,
36
+ "eval_overall_precision": 0.1494381042938442,
37
+ "eval_overall_recall": 0.260197012512289,
38
+ "eval_runtime": 205.4105,
39
+ "eval_samples_per_second": 819.856,
40
+ "eval_steps_per_second": 25.622
41
+ }
trainer_state.json ADDED
@@ -0,0 +1,844 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 3.0,
5
+ "eval_steps": 1000,
6
+ "global_step": 15789,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.1,
13
+ "learning_rate": 4.8416619165241625e-05,
14
+ "loss": 8.4421,
15
+ "step": 500
16
+ },
17
+ {
18
+ "epoch": 0.19,
19
+ "learning_rate": 4.683323833048325e-05,
20
+ "loss": 5.3925,
21
+ "step": 1000
22
+ },
23
+ {
24
+ "epoch": 0.19,
25
+ "eval_COMMENT": {
26
+ "f1": 0.016958733747880157,
27
+ "number": 1396,
28
+ "precision": 0.040214477211796246,
29
+ "recall": 0.010744985673352435
30
+ },
31
+ "eval_NAME": {
32
+ "f1": 0.06210191082802548,
33
+ "number": 1782,
34
+ "precision": 0.10684931506849316,
35
+ "recall": 0.04377104377104377
36
+ },
37
+ "eval_QTY": {
38
+ "f1": 0.26940683744034283,
39
+ "number": 1401,
40
+ "precision": 0.15598917211820437,
41
+ "recall": 0.987152034261242
42
+ },
43
+ "eval_RANGE_END": {
44
+ "f1": 0.0,
45
+ "number": 15,
46
+ "precision": 0.0,
47
+ "recall": 0.0
48
+ },
49
+ "eval_UNIT": {
50
+ "f1": 0.0,
51
+ "number": 1093,
52
+ "precision": 0.0,
53
+ "recall": 0.0
54
+ },
55
+ "eval_loss": 4.734821796417236,
56
+ "eval_overall_accuracy": 0.17059665871121718,
57
+ "eval_overall_f1": 0.1885539090444558,
58
+ "eval_overall_precision": 0.14805898284682517,
59
+ "eval_overall_recall": 0.25953930015825566,
60
+ "eval_runtime": 2.992,
61
+ "eval_samples_per_second": 568.847,
62
+ "eval_steps_per_second": 18.048,
63
+ "step": 1000
64
+ },
65
+ {
66
+ "epoch": 0.29,
67
+ "learning_rate": 4.524985749572487e-05,
68
+ "loss": 4.7949,
69
+ "step": 1500
70
+ },
71
+ {
72
+ "epoch": 0.38,
73
+ "learning_rate": 4.36664766609665e-05,
74
+ "loss": 4.0783,
75
+ "step": 2000
76
+ },
77
+ {
78
+ "epoch": 0.38,
79
+ "eval_COMMENT": {
80
+ "f1": 0.01680672268907563,
81
+ "number": 1396,
82
+ "precision": 0.038560411311053984,
83
+ "recall": 0.010744985673352435
84
+ },
85
+ "eval_NAME": {
86
+ "f1": 0.06033789219629928,
87
+ "number": 1782,
88
+ "precision": 0.10653409090909091,
89
+ "recall": 0.04208754208754209
90
+ },
91
+ "eval_QTY": {
92
+ "f1": 0.2692382622248198,
93
+ "number": 1401,
94
+ "precision": 0.15589396503102088,
95
+ "recall": 0.9864382583868665
96
+ },
97
+ "eval_RANGE_END": {
98
+ "f1": 0.0,
99
+ "number": 15,
100
+ "precision": 0.0,
101
+ "recall": 0.0
102
+ },
103
+ "eval_UNIT": {
104
+ "f1": 0.0,
105
+ "number": 1093,
106
+ "precision": 0.0,
107
+ "recall": 0.0
108
+ },
109
+ "eval_loss": 3.8412649631500244,
110
+ "eval_overall_accuracy": 0.17011933174224345,
111
+ "eval_overall_f1": 0.1881751358261425,
112
+ "eval_overall_precision": 0.14782084755975095,
113
+ "eval_overall_recall": 0.2588359416212414,
114
+ "eval_runtime": 2.449,
115
+ "eval_samples_per_second": 694.983,
116
+ "eval_steps_per_second": 22.05,
117
+ "step": 2000
118
+ },
119
+ {
120
+ "epoch": 0.48,
121
+ "learning_rate": 4.2083095826208124e-05,
122
+ "loss": 3.7302,
123
+ "step": 2500
124
+ },
125
+ {
126
+ "epoch": 0.57,
127
+ "learning_rate": 4.049971499144975e-05,
128
+ "loss": 3.6055,
129
+ "step": 3000
130
+ },
131
+ {
132
+ "epoch": 0.57,
133
+ "eval_COMMENT": {
134
+ "f1": 0.018498367791077257,
135
+ "number": 1396,
136
+ "precision": 0.038461538461538464,
137
+ "recall": 0.012177650429799427
138
+ },
139
+ "eval_NAME": {
140
+ "f1": 0.05936622543120738,
141
+ "number": 1782,
142
+ "precision": 0.10407876230661041,
143
+ "recall": 0.04152637485970819
144
+ },
145
+ "eval_QTY": {
146
+ "f1": 0.2702544031311155,
147
+ "number": 1401,
148
+ "precision": 0.1565937181086291,
149
+ "recall": 0.9857244825124911
150
+ },
151
+ "eval_RANGE_END": {
152
+ "f1": 0.0,
153
+ "number": 15,
154
+ "precision": 0.0,
155
+ "recall": 0.0
156
+ },
157
+ "eval_UNIT": {
158
+ "f1": 0.0,
159
+ "number": 1093,
160
+ "precision": 0.0,
161
+ "recall": 0.0
162
+ },
163
+ "eval_loss": 3.3591551780700684,
164
+ "eval_overall_accuracy": 0.17078758949880668,
165
+ "eval_overall_f1": 0.1880068969921451,
166
+ "eval_overall_precision": 0.14761331728840754,
167
+ "eval_overall_recall": 0.2588359416212414,
168
+ "eval_runtime": 3.2795,
169
+ "eval_samples_per_second": 518.989,
170
+ "eval_steps_per_second": 16.466,
171
+ "step": 3000
172
+ },
173
+ {
174
+ "epoch": 0.67,
175
+ "learning_rate": 3.891633415669137e-05,
176
+ "loss": 3.4413,
177
+ "step": 3500
178
+ },
179
+ {
180
+ "epoch": 0.76,
181
+ "learning_rate": 3.733295332193299e-05,
182
+ "loss": 3.2433,
183
+ "step": 4000
184
+ },
185
+ {
186
+ "epoch": 0.76,
187
+ "eval_COMMENT": {
188
+ "f1": 0.019989479221462388,
189
+ "number": 1396,
190
+ "precision": 0.03762376237623762,
191
+ "recall": 0.013610315186246419
192
+ },
193
+ "eval_NAME": {
194
+ "f1": 0.057692307692307696,
195
+ "number": 1782,
196
+ "precision": 0.10084033613445378,
197
+ "recall": 0.04040404040404041
198
+ },
199
+ "eval_QTY": {
200
+ "f1": 0.272,
201
+ "number": 1401,
202
+ "precision": 0.1578404401650619,
203
+ "recall": 0.9828693790149893
204
+ },
205
+ "eval_RANGE_END": {
206
+ "f1": 0.0,
207
+ "number": 15,
208
+ "precision": 0.0,
209
+ "recall": 0.0
210
+ },
211
+ "eval_UNIT": {
212
+ "f1": 0.0,
213
+ "number": 1093,
214
+ "precision": 0.0,
215
+ "recall": 0.0
216
+ },
217
+ "eval_loss": 3.0283868312835693,
218
+ "eval_overall_accuracy": 0.1722195704057279,
219
+ "eval_overall_f1": 0.18784388995521434,
220
+ "eval_overall_precision": 0.14764155687418284,
221
+ "eval_overall_recall": 0.2581325830842272,
222
+ "eval_runtime": 2.572,
223
+ "eval_samples_per_second": 661.731,
224
+ "eval_steps_per_second": 20.995,
225
+ "step": 4000
226
+ },
227
+ {
228
+ "epoch": 0.86,
229
+ "learning_rate": 3.5749572487174616e-05,
230
+ "loss": 2.9769,
231
+ "step": 4500
232
+ },
233
+ {
234
+ "epoch": 0.95,
235
+ "learning_rate": 3.416619165241624e-05,
236
+ "loss": 2.8283,
237
+ "step": 5000
238
+ },
239
+ {
240
+ "epoch": 0.95,
241
+ "eval_COMMENT": {
242
+ "f1": 0.019527235354573486,
243
+ "number": 1396,
244
+ "precision": 0.034545454545454546,
245
+ "recall": 0.013610315186246419
246
+ },
247
+ "eval_NAME": {
248
+ "f1": 0.05654281098546042,
249
+ "number": 1782,
250
+ "precision": 0.10086455331412104,
251
+ "recall": 0.03928170594837262
252
+ },
253
+ "eval_QTY": {
254
+ "f1": 0.2708639796018437,
255
+ "number": 1401,
256
+ "precision": 0.1570031832651205,
257
+ "recall": 0.9857244825124911
258
+ },
259
+ "eval_RANGE_END": {
260
+ "f1": 0.0,
261
+ "number": 15,
262
+ "precision": 0.0,
263
+ "recall": 0.0
264
+ },
265
+ "eval_UNIT": {
266
+ "f1": 0.0,
267
+ "number": 1093,
268
+ "precision": 0.0,
269
+ "recall": 0.0
270
+ },
271
+ "eval_loss": 2.7775347232818604,
272
+ "eval_overall_accuracy": 0.17069212410501194,
273
+ "eval_overall_f1": 0.186939657913143,
274
+ "eval_overall_precision": 0.14641434262948208,
275
+ "eval_overall_recall": 0.2584842623527343,
276
+ "eval_runtime": 2.5326,
277
+ "eval_samples_per_second": 672.042,
278
+ "eval_steps_per_second": 21.322,
279
+ "step": 5000
280
+ },
281
+ {
282
+ "epoch": 1.05,
283
+ "learning_rate": 3.258281081765787e-05,
284
+ "loss": 2.845,
285
+ "step": 5500
286
+ },
287
+ {
288
+ "epoch": 1.14,
289
+ "learning_rate": 3.0999429982899484e-05,
290
+ "loss": 2.5028,
291
+ "step": 6000
292
+ },
293
+ {
294
+ "epoch": 1.14,
295
+ "eval_COMMENT": {
296
+ "f1": 0.02052334530528476,
297
+ "number": 1396,
298
+ "precision": 0.03616636528028933,
299
+ "recall": 0.014326647564469915
300
+ },
301
+ "eval_NAME": {
302
+ "f1": 0.057142857142857134,
303
+ "number": 1782,
304
+ "precision": 0.10099573257467995,
305
+ "recall": 0.03984287317620651
306
+ },
307
+ "eval_QTY": {
308
+ "f1": 0.2710867006669282,
309
+ "number": 1401,
310
+ "precision": 0.157134735645253,
311
+ "recall": 0.9864382583868665
312
+ },
313
+ "eval_RANGE_END": {
314
+ "f1": 0.0,
315
+ "number": 15,
316
+ "precision": 0.0,
317
+ "recall": 0.0
318
+ },
319
+ "eval_UNIT": {
320
+ "f1": 0.0,
321
+ "number": 1093,
322
+ "precision": 0.0,
323
+ "recall": 0.0
324
+ },
325
+ "eval_loss": 2.49822735786438,
326
+ "eval_overall_accuracy": 0.17155131264916468,
327
+ "eval_overall_f1": 0.18719024018299657,
328
+ "eval_overall_precision": 0.14655258183265346,
329
+ "eval_overall_recall": 0.259011781255495,
330
+ "eval_runtime": 4.0514,
331
+ "eval_samples_per_second": 420.1,
332
+ "eval_steps_per_second": 13.329,
333
+ "step": 6000
334
+ },
335
+ {
336
+ "epoch": 1.24,
337
+ "learning_rate": 2.941604914814111e-05,
338
+ "loss": 2.515,
339
+ "step": 6500
340
+ },
341
+ {
342
+ "epoch": 1.33,
343
+ "learning_rate": 2.7832668313382737e-05,
344
+ "loss": 2.3731,
345
+ "step": 7000
346
+ },
347
+ {
348
+ "epoch": 1.33,
349
+ "eval_COMMENT": {
350
+ "f1": 0.02,
351
+ "number": 1396,
352
+ "precision": 0.037698412698412696,
353
+ "recall": 0.013610315186246419
354
+ },
355
+ "eval_NAME": {
356
+ "f1": 0.05691382765531061,
357
+ "number": 1782,
358
+ "precision": 0.09957924263674614,
359
+ "recall": 0.03984287317620651
360
+ },
361
+ "eval_QTY": {
362
+ "f1": 0.27076983580768854,
363
+ "number": 1401,
364
+ "precision": 0.15701254275940707,
365
+ "recall": 0.9828693790149893
366
+ },
367
+ "eval_RANGE_END": {
368
+ "f1": 0.0,
369
+ "number": 15,
370
+ "precision": 0.0,
371
+ "recall": 0.0
372
+ },
373
+ "eval_UNIT": {
374
+ "f1": 0.0,
375
+ "number": 1093,
376
+ "precision": 0.0,
377
+ "recall": 0.0
378
+ },
379
+ "eval_loss": 2.3250303268432617,
380
+ "eval_overall_accuracy": 0.17107398568019094,
381
+ "eval_overall_f1": 0.18718897537322954,
382
+ "eval_overall_precision": 0.14689095824571943,
383
+ "eval_overall_recall": 0.2579567434499736,
384
+ "eval_runtime": 2.5162,
385
+ "eval_samples_per_second": 676.415,
386
+ "eval_steps_per_second": 21.461,
387
+ "step": 7000
388
+ },
389
+ {
390
+ "epoch": 1.43,
391
+ "learning_rate": 2.6249287478624363e-05,
392
+ "loss": 2.3351,
393
+ "step": 7500
394
+ },
395
+ {
396
+ "epoch": 1.52,
397
+ "learning_rate": 2.4665906643865983e-05,
398
+ "loss": 2.1459,
399
+ "step": 8000
400
+ },
401
+ {
402
+ "epoch": 1.52,
403
+ "eval_COMMENT": {
404
+ "f1": 0.02262210796915167,
405
+ "number": 1396,
406
+ "precision": 0.04007285974499089,
407
+ "recall": 0.015759312320916905
408
+ },
409
+ "eval_NAME": {
410
+ "f1": 0.0590818363273453,
411
+ "number": 1782,
412
+ "precision": 0.10235131396957123,
413
+ "recall": 0.04152637485970819
414
+ },
415
+ "eval_QTY": {
416
+ "f1": 0.2703921568627451,
417
+ "number": 1401,
418
+ "precision": 0.15672235481304694,
419
+ "recall": 0.9842969307637401
420
+ },
421
+ "eval_RANGE_END": {
422
+ "f1": 0.0,
423
+ "number": 15,
424
+ "precision": 0.0,
425
+ "recall": 0.0
426
+ },
427
+ "eval_UNIT": {
428
+ "f1": 0.0,
429
+ "number": 1093,
430
+ "precision": 0.0,
431
+ "recall": 0.0
432
+ },
433
+ "eval_loss": 2.1463518142700195,
434
+ "eval_overall_accuracy": 0.17136038186157518,
435
+ "eval_overall_f1": 0.1872064982865846,
436
+ "eval_overall_precision": 0.1464601330553073,
437
+ "eval_overall_recall": 0.2593634605240021,
438
+ "eval_runtime": 3.7102,
439
+ "eval_samples_per_second": 458.735,
440
+ "eval_steps_per_second": 14.554,
441
+ "step": 8000
442
+ },
443
+ {
444
+ "epoch": 1.62,
445
+ "learning_rate": 2.308252580910761e-05,
446
+ "loss": 2.0879,
447
+ "step": 8500
448
+ },
449
+ {
450
+ "epoch": 1.71,
451
+ "learning_rate": 2.1499144974349232e-05,
452
+ "loss": 1.9918,
453
+ "step": 9000
454
+ },
455
+ {
456
+ "epoch": 1.71,
457
+ "eval_COMMENT": {
458
+ "f1": 0.023457419683834777,
459
+ "number": 1396,
460
+ "precision": 0.04070796460176991,
461
+ "recall": 0.0164756446991404
462
+ },
463
+ "eval_NAME": {
464
+ "f1": 0.05624748895138609,
465
+ "number": 1782,
466
+ "precision": 0.09900990099009901,
467
+ "recall": 0.03928170594837262
468
+ },
469
+ "eval_QTY": {
470
+ "f1": 0.2718734552644587,
471
+ "number": 1401,
472
+ "precision": 0.15779205875602478,
473
+ "recall": 0.9814418272662384
474
+ },
475
+ "eval_RANGE_END": {
476
+ "f1": 0.0,
477
+ "number": 15,
478
+ "precision": 0.0,
479
+ "recall": 0.0
480
+ },
481
+ "eval_UNIT": {
482
+ "f1": 0.0,
483
+ "number": 1093,
484
+ "precision": 0.0,
485
+ "recall": 0.0
486
+ },
487
+ "eval_loss": 1.9712719917297363,
488
+ "eval_overall_accuracy": 0.1727923627684964,
489
+ "eval_overall_f1": 0.18732852676577555,
490
+ "eval_overall_precision": 0.14700580813138395,
491
+ "eval_overall_recall": 0.2581325830842272,
492
+ "eval_runtime": 2.4728,
493
+ "eval_samples_per_second": 688.282,
494
+ "eval_steps_per_second": 21.837,
495
+ "step": 9000
496
+ },
497
+ {
498
+ "epoch": 1.81,
499
+ "learning_rate": 1.9915764139590855e-05,
500
+ "loss": 1.8722,
501
+ "step": 9500
502
+ },
503
+ {
504
+ "epoch": 1.9,
505
+ "learning_rate": 1.8332383304832478e-05,
506
+ "loss": 1.954,
507
+ "step": 10000
508
+ },
509
+ {
510
+ "epoch": 1.9,
511
+ "eval_COMMENT": {
512
+ "f1": 0.02333840690005073,
513
+ "number": 1396,
514
+ "precision": 0.04,
515
+ "recall": 0.0164756446991404
516
+ },
517
+ "eval_NAME": {
518
+ "f1": 0.058964143426294816,
519
+ "number": 1782,
520
+ "precision": 0.10164835164835165,
521
+ "recall": 0.04152637485970819
522
+ },
523
+ "eval_QTY": {
524
+ "f1": 0.27175843694493784,
525
+ "number": 1401,
526
+ "precision": 0.1576777739608382,
527
+ "recall": 0.9828693790149893
528
+ },
529
+ "eval_RANGE_END": {
530
+ "f1": 0.0,
531
+ "number": 15,
532
+ "precision": 0.0,
533
+ "recall": 0.0
534
+ },
535
+ "eval_UNIT": {
536
+ "f1": 0.0,
537
+ "number": 1093,
538
+ "precision": 0.0,
539
+ "recall": 0.0
540
+ },
541
+ "eval_loss": 1.8380228281021118,
542
+ "eval_overall_accuracy": 0.1726014319809069,
543
+ "eval_overall_f1": 0.18749602493162884,
544
+ "eval_overall_precision": 0.14687126345157434,
545
+ "eval_overall_recall": 0.25918762088974856,
546
+ "eval_runtime": 3.3595,
547
+ "eval_samples_per_second": 506.628,
548
+ "eval_steps_per_second": 16.074,
549
+ "step": 10000
550
+ },
551
+ {
552
+ "epoch": 2.0,
553
+ "learning_rate": 1.6749002470074104e-05,
554
+ "loss": 1.8542,
555
+ "step": 10500
556
+ },
557
+ {
558
+ "epoch": 2.09,
559
+ "learning_rate": 1.5165621635315727e-05,
560
+ "loss": 1.6977,
561
+ "step": 11000
562
+ },
563
+ {
564
+ "epoch": 2.09,
565
+ "eval_COMMENT": {
566
+ "f1": 0.02407221664994985,
567
+ "number": 1396,
568
+ "precision": 0.04013377926421405,
569
+ "recall": 0.017191977077363897
570
+ },
571
+ "eval_NAME": {
572
+ "f1": 0.058681672025723476,
573
+ "number": 1782,
574
+ "precision": 0.10339943342776203,
575
+ "recall": 0.0409652076318743
576
+ },
577
+ "eval_QTY": {
578
+ "f1": 0.27122316328540474,
579
+ "number": 1401,
580
+ "precision": 0.15731749114589283,
581
+ "recall": 0.9828693790149893
582
+ },
583
+ "eval_RANGE_END": {
584
+ "f1": 0.0,
585
+ "number": 15,
586
+ "precision": 0.0,
587
+ "recall": 0.0
588
+ },
589
+ "eval_UNIT": {
590
+ "f1": 0.0,
591
+ "number": 1093,
592
+ "precision": 0.0,
593
+ "recall": 0.0
594
+ },
595
+ "eval_loss": 1.7402697801589966,
596
+ "eval_overall_accuracy": 0.17145584725536994,
597
+ "eval_overall_f1": 0.18724593495934957,
598
+ "eval_overall_precision": 0.14656458188326538,
599
+ "eval_overall_recall": 0.25918762088974856,
600
+ "eval_runtime": 3.1606,
601
+ "eval_samples_per_second": 538.504,
602
+ "eval_steps_per_second": 17.085,
603
+ "step": 11000
604
+ },
605
+ {
606
+ "epoch": 2.19,
607
+ "learning_rate": 1.358224080055735e-05,
608
+ "loss": 1.568,
609
+ "step": 11500
610
+ },
611
+ {
612
+ "epoch": 2.28,
613
+ "learning_rate": 1.1998859965798973e-05,
614
+ "loss": 1.5499,
615
+ "step": 12000
616
+ },
617
+ {
618
+ "epoch": 2.28,
619
+ "eval_COMMENT": {
620
+ "f1": 0.0201271186440678,
621
+ "number": 1396,
622
+ "precision": 0.03861788617886179,
623
+ "recall": 0.013610315186246419
624
+ },
625
+ "eval_NAME": {
626
+ "f1": 0.058376649340263896,
627
+ "number": 1782,
628
+ "precision": 0.10152990264255911,
629
+ "recall": 0.0409652076318743
630
+ },
631
+ "eval_QTY": {
632
+ "f1": 0.27179487179487183,
633
+ "number": 1401,
634
+ "precision": 0.15768394553152534,
635
+ "recall": 0.9835831548893648
636
+ },
637
+ "eval_RANGE_END": {
638
+ "f1": 0.0,
639
+ "number": 15,
640
+ "precision": 0.0,
641
+ "recall": 0.0
642
+ },
643
+ "eval_UNIT": {
644
+ "f1": 0.0,
645
+ "number": 1093,
646
+ "precision": 0.0,
647
+ "recall": 0.0
648
+ },
649
+ "eval_loss": 1.656908392906189,
650
+ "eval_overall_accuracy": 0.17326968973747017,
651
+ "eval_overall_f1": 0.1880156040161156,
652
+ "eval_overall_precision": 0.14773869346733667,
653
+ "eval_overall_recall": 0.2584842623527343,
654
+ "eval_runtime": 2.4637,
655
+ "eval_samples_per_second": 690.825,
656
+ "eval_steps_per_second": 21.918,
657
+ "step": 12000
658
+ },
659
+ {
660
+ "epoch": 2.38,
661
+ "learning_rate": 1.0415479131040598e-05,
662
+ "loss": 1.6363,
663
+ "step": 12500
664
+ },
665
+ {
666
+ "epoch": 2.47,
667
+ "learning_rate": 8.832098296282222e-06,
668
+ "loss": 1.5793,
669
+ "step": 13000
670
+ },
671
+ {
672
+ "epoch": 2.47,
673
+ "eval_COMMENT": {
674
+ "f1": 0.018633540372670808,
675
+ "number": 1396,
676
+ "precision": 0.033582089552238806,
677
+ "recall": 0.012893982808022923
678
+ },
679
+ "eval_NAME": {
680
+ "f1": 0.057877813504823156,
681
+ "number": 1782,
682
+ "precision": 0.10198300283286119,
683
+ "recall": 0.04040404040404041
684
+ },
685
+ "eval_QTY": {
686
+ "f1": 0.27186761229314416,
687
+ "number": 1401,
688
+ "precision": 0.1576962632841961,
689
+ "recall": 0.9850107066381156
690
+ },
691
+ "eval_RANGE_END": {
692
+ "f1": 0.0,
693
+ "number": 15,
694
+ "precision": 0.0,
695
+ "recall": 0.0
696
+ },
697
+ "eval_UNIT": {
698
+ "f1": 0.0,
699
+ "number": 1093,
700
+ "precision": 0.0,
701
+ "recall": 0.0
702
+ },
703
+ "eval_loss": 1.598816156387329,
704
+ "eval_overall_accuracy": 0.1735560859188544,
705
+ "eval_overall_f1": 0.18749999999999997,
706
+ "eval_overall_precision": 0.1471029720804563,
707
+ "eval_overall_recall": 0.2584842623527343,
708
+ "eval_runtime": 3.8605,
709
+ "eval_samples_per_second": 440.877,
710
+ "eval_steps_per_second": 13.988,
711
+ "step": 13000
712
+ },
713
+ {
714
+ "epoch": 2.57,
715
+ "learning_rate": 7.248717461523846e-06,
716
+ "loss": 1.5645,
717
+ "step": 13500
718
+ },
719
+ {
720
+ "epoch": 2.66,
721
+ "learning_rate": 5.66533662676547e-06,
722
+ "loss": 1.405,
723
+ "step": 14000
724
+ },
725
+ {
726
+ "epoch": 2.66,
727
+ "eval_COMMENT": {
728
+ "f1": 0.018085106382978725,
729
+ "number": 1396,
730
+ "precision": 0.03512396694214876,
731
+ "recall": 0.012177650429799427
732
+ },
733
+ "eval_NAME": {
734
+ "f1": 0.057877813504823156,
735
+ "number": 1782,
736
+ "precision": 0.10198300283286119,
737
+ "recall": 0.04040404040404041
738
+ },
739
+ "eval_QTY": {
740
+ "f1": 0.2700166194153876,
741
+ "number": 1401,
742
+ "precision": 0.15643407340280924,
743
+ "recall": 0.9857244825124911
744
+ },
745
+ "eval_RANGE_END": {
746
+ "f1": 0.0,
747
+ "number": 15,
748
+ "precision": 0.0,
749
+ "recall": 0.0
750
+ },
751
+ "eval_UNIT": {
752
+ "f1": 0.0,
753
+ "number": 1093,
754
+ "precision": 0.0,
755
+ "recall": 0.0
756
+ },
757
+ "eval_loss": 1.549748420715332,
758
+ "eval_overall_accuracy": 0.17126491646778044,
759
+ "eval_overall_f1": 0.18720152817574023,
760
+ "eval_overall_precision": 0.14673587542423638,
761
+ "eval_overall_recall": 0.2584842623527343,
762
+ "eval_runtime": 2.4896,
763
+ "eval_samples_per_second": 683.639,
764
+ "eval_steps_per_second": 21.69,
765
+ "step": 14000
766
+ },
767
+ {
768
+ "epoch": 2.76,
769
+ "learning_rate": 4.081955792007094e-06,
770
+ "loss": 1.4811,
771
+ "step": 14500
772
+ },
773
+ {
774
+ "epoch": 2.85,
775
+ "learning_rate": 2.4985749572487175e-06,
776
+ "loss": 1.4509,
777
+ "step": 15000
778
+ },
779
+ {
780
+ "epoch": 2.85,
781
+ "eval_COMMENT": {
782
+ "f1": 0.022587268993839834,
783
+ "number": 1396,
784
+ "precision": 0.03985507246376811,
785
+ "recall": 0.015759312320916905
786
+ },
787
+ "eval_NAME": {
788
+ "f1": 0.057877813504823156,
789
+ "number": 1782,
790
+ "precision": 0.10198300283286119,
791
+ "recall": 0.04040404040404041
792
+ },
793
+ "eval_QTY": {
794
+ "f1": 0.2720726053072901,
795
+ "number": 1401,
796
+ "precision": 0.1578525641025641,
797
+ "recall": 0.9842969307637401
798
+ },
799
+ "eval_RANGE_END": {
800
+ "f1": 0.0,
801
+ "number": 15,
802
+ "precision": 0.0,
803
+ "recall": 0.0
804
+ },
805
+ "eval_UNIT": {
806
+ "f1": 0.0,
807
+ "number": 1093,
808
+ "precision": 0.0,
809
+ "recall": 0.0
810
+ },
811
+ "eval_loss": 1.5232961177825928,
812
+ "eval_overall_accuracy": 0.17269689737470167,
813
+ "eval_overall_f1": 0.18787067151329637,
814
+ "eval_overall_precision": 0.1473884330598359,
815
+ "eval_overall_recall": 0.259011781255495,
816
+ "eval_runtime": 3.8156,
817
+ "eval_samples_per_second": 446.059,
818
+ "eval_steps_per_second": 14.152,
819
+ "step": 15000
820
+ },
821
+ {
822
+ "epoch": 2.95,
823
+ "learning_rate": 9.151941224903414e-07,
824
+ "loss": 1.4787,
825
+ "step": 15500
826
+ },
827
+ {
828
+ "epoch": 3.0,
829
+ "step": 15789,
830
+ "total_flos": 486353449919844.0,
831
+ "train_loss": 2.636403929357464,
832
+ "train_runtime": 881.1788,
833
+ "train_samples_per_second": 573.347,
834
+ "train_steps_per_second": 17.918
835
+ }
836
+ ],
837
+ "logging_steps": 500,
838
+ "max_steps": 15789,
839
+ "num_train_epochs": 3,
840
+ "save_steps": 500,
841
+ "total_flos": 486353449919844.0,
842
+ "trial_name": null,
843
+ "trial_params": null
844
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e5b422e9ca9f0ecedbcc66ba4e6cbb0826f8ad3b2f17ce70a5b6d864ef26cee
3
+ size 4155
validation_results.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "eval_COMMENT": {
4
+ "f1": 0.022587268993839834,
5
+ "number": 1396,
6
+ "precision": 0.03985507246376811,
7
+ "recall": 0.015759312320916905
8
+ },
9
+ "eval_NAME": {
10
+ "f1": 0.057877813504823156,
11
+ "number": 1782,
12
+ "precision": 0.10198300283286119,
13
+ "recall": 0.04040404040404041
14
+ },
15
+ "eval_QTY": {
16
+ "f1": 0.2720726053072901,
17
+ "number": 1401,
18
+ "precision": 0.1578525641025641,
19
+ "recall": 0.9842969307637401
20
+ },
21
+ "eval_RANGE_END": {
22
+ "f1": 0.0,
23
+ "number": 15,
24
+ "precision": 0.0,
25
+ "recall": 0.0
26
+ },
27
+ "eval_UNIT": {
28
+ "f1": 0.0,
29
+ "number": 1093,
30
+ "precision": 0.0,
31
+ "recall": 0.0
32
+ },
33
+ "eval_loss": 1.5058764219284058,
34
+ "eval_overall_accuracy": 0.17269689737470167,
35
+ "eval_overall_f1": 0.18787067151329637,
36
+ "eval_overall_precision": 0.1473884330598359,
37
+ "eval_overall_recall": 0.259011781255495,
38
+ "eval_runtime": 2.9379,
39
+ "eval_samples_per_second": 579.318,
40
+ "eval_steps_per_second": 18.38
41
+ }
vocab.txt ADDED
The diff for this file is too large to render. See raw diff