pranaydeeps commited on
Commit
64884ff
1 Parent(s): 7b1ee53

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: mit
3
+ tags:
4
+ - generated_from_trainer
5
+ metrics:
6
+ - precision
7
+ - recall
8
+ - f1
9
+ - accuracy
10
+ model-index:
11
+ - name: pos_final_xlm_en
12
+ results: []
13
+ ---
14
+
15
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
16
+ should probably proofread and complete it, then remove this comment. -->
17
+
18
+ # pos_final_xlm_en
19
+
20
+ This model is a fine-tuned version of [xlm-roberta-base](https://huggingface.co/xlm-roberta-base) on the None dataset.
21
+ It achieves the following results on the evaluation set:
22
+ - Loss: 0.0719
23
+ - Precision: 0.9686
24
+ - Recall: 0.9705
25
+ - F1: 0.9695
26
+ - Accuracy: 0.9790
27
+
28
+ ## Model description
29
+
30
+ More information needed
31
+
32
+ ## Intended uses & limitations
33
+
34
+ More information needed
35
+
36
+ ## Training and evaluation data
37
+
38
+ More information needed
39
+
40
+ ## Training procedure
41
+
42
+ ### Training hyperparameters
43
+
44
+ The following hyperparameters were used during training:
45
+ - learning_rate: 5e-05
46
+ - train_batch_size: 256
47
+ - eval_batch_size: 256
48
+ - seed: 42
49
+ - gradient_accumulation_steps: 4
50
+ - total_train_batch_size: 1024
51
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
52
+ - lr_scheduler_type: linear
53
+ - lr_scheduler_warmup_steps: 500
54
+ - num_epochs: 40.0
55
+ - mixed_precision_training: Native AMP
56
+
57
+ ### Training results
58
+
59
+ | Training Loss | Epoch | Step | Validation Loss | Precision | Recall | F1 | Accuracy |
60
+ |:-------------:|:-----:|:----:|:---------------:|:---------:|:------:|:------:|:--------:|
61
+ | No log | 0.99 | 60 | 3.0062 | 0.2412 | 0.1720 | 0.2008 | 0.3036 |
62
+ | No log | 1.99 | 120 | 0.5353 | 0.8699 | 0.8553 | 0.8625 | 0.8970 |
63
+ | No log | 2.99 | 180 | 0.1312 | 0.9578 | 0.9553 | 0.9566 | 0.9691 |
64
+ | No log | 3.99 | 240 | 0.0981 | 0.9621 | 0.9628 | 0.9625 | 0.9737 |
65
+ | No log | 4.99 | 300 | 0.0853 | 0.9652 | 0.9659 | 0.9655 | 0.9760 |
66
+ | No log | 5.99 | 360 | 0.0788 | 0.9656 | 0.9676 | 0.9666 | 0.9769 |
67
+ | No log | 6.99 | 420 | 0.0745 | 0.9664 | 0.9689 | 0.9677 | 0.9775 |
68
+ | No log | 7.99 | 480 | 0.0718 | 0.9675 | 0.9689 | 0.9682 | 0.9780 |
69
+ | 0.7956 | 8.99 | 540 | 0.0707 | 0.9679 | 0.9683 | 0.9681 | 0.9779 |
70
+ | 0.7956 | 9.99 | 600 | 0.0686 | 0.9682 | 0.9698 | 0.9690 | 0.9786 |
71
+ | 0.7956 | 10.99 | 660 | 0.0686 | 0.9689 | 0.9694 | 0.9692 | 0.9787 |
72
+ | 0.7956 | 11.99 | 720 | 0.0680 | 0.9679 | 0.9707 | 0.9693 | 0.9787 |
73
+ | 0.7956 | 12.99 | 780 | 0.0685 | 0.9683 | 0.9706 | 0.9694 | 0.9789 |
74
+ | 0.7956 | 13.99 | 840 | 0.0695 | 0.9689 | 0.9700 | 0.9694 | 0.9788 |
75
+ | 0.7956 | 14.99 | 900 | 0.0703 | 0.9682 | 0.9699 | 0.9690 | 0.9786 |
76
+ | 0.7956 | 15.99 | 960 | 0.0719 | 0.9686 | 0.9705 | 0.9695 | 0.9790 |
77
+ | 0.051 | 16.99 | 1020 | 0.0735 | 0.9687 | 0.9701 | 0.9694 | 0.9788 |
78
+ | 0.051 | 17.99 | 1080 | 0.0747 | 0.9684 | 0.9701 | 0.9692 | 0.9787 |
79
+ | 0.051 | 18.99 | 1140 | 0.0761 | 0.9685 | 0.9697 | 0.9691 | 0.9786 |
80
+ | 0.051 | 19.99 | 1200 | 0.0774 | 0.9678 | 0.9698 | 0.9688 | 0.9784 |
81
+ | 0.051 | 20.99 | 1260 | 0.0796 | 0.9685 | 0.9694 | 0.9690 | 0.9785 |
82
+ | 0.051 | 21.99 | 1320 | 0.0796 | 0.9681 | 0.9701 | 0.9691 | 0.9786 |
83
+ | 0.051 | 22.99 | 1380 | 0.0820 | 0.9684 | 0.9690 | 0.9687 | 0.9784 |
84
+ | 0.051 | 23.99 | 1440 | 0.0829 | 0.9679 | 0.9688 | 0.9683 | 0.9781 |
85
+ | 0.0318 | 24.99 | 1500 | 0.0854 | 0.9681 | 0.9690 | 0.9686 | 0.9782 |
86
+ | 0.0318 | 25.99 | 1560 | 0.0881 | 0.9677 | 0.9692 | 0.9684 | 0.9782 |
87
+ | 0.0318 | 26.99 | 1620 | 0.0893 | 0.9679 | 0.9690 | 0.9685 | 0.9783 |
88
+ | 0.0318 | 27.99 | 1680 | 0.0910 | 0.9676 | 0.9691 | 0.9683 | 0.9781 |
89
+ | 0.0318 | 28.99 | 1740 | 0.0919 | 0.9684 | 0.9686 | 0.9685 | 0.9783 |
90
+ | 0.0318 | 29.99 | 1800 | 0.0933 | 0.9678 | 0.9686 | 0.9682 | 0.9781 |
91
+ | 0.0318 | 30.99 | 1860 | 0.0947 | 0.9677 | 0.9688 | 0.9683 | 0.9781 |
92
+ | 0.0318 | 31.99 | 1920 | 0.0966 | 0.9678 | 0.9694 | 0.9686 | 0.9783 |
93
+ | 0.0318 | 32.99 | 1980 | 0.0974 | 0.9677 | 0.9689 | 0.9683 | 0.9781 |
94
+ | 0.0211 | 33.99 | 2040 | 0.0981 | 0.9684 | 0.9693 | 0.9688 | 0.9784 |
95
+ | 0.0211 | 34.99 | 2100 | 0.0989 | 0.9681 | 0.9690 | 0.9686 | 0.9783 |
96
+ | 0.0211 | 35.99 | 2160 | 0.1008 | 0.9679 | 0.9695 | 0.9687 | 0.9784 |
97
+ | 0.0211 | 36.99 | 2220 | 0.1015 | 0.9681 | 0.9689 | 0.9685 | 0.9782 |
98
+ | 0.0211 | 37.99 | 2280 | 0.1015 | 0.9677 | 0.9689 | 0.9683 | 0.9781 |
99
+ | 0.0211 | 38.99 | 2340 | 0.1024 | 0.9679 | 0.9690 | 0.9684 | 0.9782 |
100
+ | 0.0211 | 39.99 | 2400 | 0.1022 | 0.9680 | 0.9690 | 0.9685 | 0.9782 |
101
+
102
+
103
+ ### Framework versions
104
+
105
+ - Transformers 4.25.1
106
+ - Pytorch 1.12.0
107
+ - Datasets 2.18.0
108
+ - Tokenizers 0.13.2
all_results.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 39.99,
3
+ "eval_accuracy": 0.9790089140228122,
4
+ "eval_f1": 0.9695497407877142,
5
+ "eval_loss": 0.07188576459884644,
6
+ "eval_precision": 0.9686181737446121,
7
+ "eval_recall": 0.97048310142215,
8
+ "eval_runtime": 9.3965,
9
+ "eval_samples": 2072,
10
+ "eval_samples_per_second": 735.381,
11
+ "eval_steps_per_second": 2.873,
12
+ "train_loss": 0.19011780440807344,
13
+ "train_runtime": 1964.637,
14
+ "train_samples": 62189,
15
+ "train_samples_per_second": 1266.168,
16
+ "train_steps_per_second": 1.222
17
+ }
config.json ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "xlm-roberta-base",
3
+ "architectures": [
4
+ "XLMRobertaForTokenClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "classifier_dropout": null,
9
+ "eos_token_id": 2,
10
+ "finetuning_task": "pos",
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.1,
13
+ "hidden_size": 768,
14
+ "id2label": {
15
+ "0": "`",
16
+ "1": "\tSYM",
17
+ "2": "IN",
18
+ "3": "$",
19
+ "4": "WDT",
20
+ "5": "LS",
21
+ "6": "\tDT",
22
+ "7": "VBZ",
23
+ "8": "CD",
24
+ "9": "SYM",
25
+ "10": "UH",
26
+ "11": "VB",
27
+ "12": "``",
28
+ "13": "VBN",
29
+ "14": "NN",
30
+ "15": "sleepy\t#",
31
+ "16": "terrorist\t#",
32
+ "17": "TO",
33
+ "18": "POS",
34
+ "19": "it",
35
+ "20": "NNP",
36
+ "21": "(",
37
+ "22": ".",
38
+ "23": "CC",
39
+ "24": ":",
40
+ "25": "RBR",
41
+ "26": "''",
42
+ "27": "#",
43
+ "28": "FW",
44
+ "29": "MD",
45
+ "30": "DT",
46
+ "31": "WP$",
47
+ "32": ",",
48
+ "33": "RBS",
49
+ "34": ")",
50
+ "35": "RB",
51
+ "36": "WP",
52
+ "37": "WRB",
53
+ "38": "@",
54
+ "39": "NNS",
55
+ "40": "PRP$",
56
+ "41": "JJS",
57
+ "42": "Ready\t#",
58
+ "43": "EX",
59
+ "44": "U",
60
+ "45": "NNPS",
61
+ "46": "\tPRP",
62
+ "47": "hero\t#",
63
+ "48": "JJR",
64
+ "49": "JJ",
65
+ "50": "PRP",
66
+ "51": "PDT",
67
+ "52": "VBD",
68
+ "53": "RP",
69
+ "54": "VBP",
70
+ "55": "VBG"
71
+ },
72
+ "initializer_range": 0.02,
73
+ "intermediate_size": 3072,
74
+ "label2id": {
75
+ "\tDT": 6,
76
+ "\tPRP": 46,
77
+ "\tSYM": 1,
78
+ "#": 27,
79
+ "$": 3,
80
+ "''": 26,
81
+ "(": 21,
82
+ ")": 34,
83
+ ",": 32,
84
+ ".": 22,
85
+ ":": 24,
86
+ "@": 38,
87
+ "CC": 23,
88
+ "CD": 8,
89
+ "DT": 30,
90
+ "EX": 43,
91
+ "FW": 28,
92
+ "IN": 2,
93
+ "JJ": 49,
94
+ "JJR": 48,
95
+ "JJS": 41,
96
+ "LS": 5,
97
+ "MD": 29,
98
+ "NN": 14,
99
+ "NNP": 20,
100
+ "NNPS": 45,
101
+ "NNS": 39,
102
+ "PDT": 51,
103
+ "POS": 18,
104
+ "PRP": 50,
105
+ "PRP$": 40,
106
+ "RB": 35,
107
+ "RBR": 25,
108
+ "RBS": 33,
109
+ "RP": 53,
110
+ "Ready\t#": 42,
111
+ "SYM": 9,
112
+ "TO": 17,
113
+ "U": 44,
114
+ "UH": 10,
115
+ "VB": 11,
116
+ "VBD": 52,
117
+ "VBG": 55,
118
+ "VBN": 13,
119
+ "VBP": 54,
120
+ "VBZ": 7,
121
+ "WDT": 4,
122
+ "WP": 36,
123
+ "WP$": 31,
124
+ "WRB": 37,
125
+ "`": 0,
126
+ "``": 12,
127
+ "hero\t#": 47,
128
+ "it": 19,
129
+ "sleepy\t#": 15,
130
+ "terrorist\t#": 16
131
+ },
132
+ "layer_norm_eps": 1e-05,
133
+ "max_position_embeddings": 514,
134
+ "model_type": "xlm-roberta",
135
+ "num_attention_heads": 12,
136
+ "num_hidden_layers": 12,
137
+ "output_past": true,
138
+ "pad_token_id": 1,
139
+ "position_embedding_type": "absolute",
140
+ "torch_dtype": "float32",
141
+ "transformers_version": "4.25.1",
142
+ "type_vocab_size": 1,
143
+ "use_cache": true,
144
+ "vocab_size": 250002
145
+ }
eval_results.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 39.99,
3
+ "eval_accuracy": 0.9790089140228122,
4
+ "eval_f1": 0.9695497407877142,
5
+ "eval_loss": 0.07188576459884644,
6
+ "eval_precision": 0.9686181737446121,
7
+ "eval_recall": 0.97048310142215,
8
+ "eval_runtime": 9.3965,
9
+ "eval_samples": 2072,
10
+ "eval_samples_per_second": 735.381,
11
+ "eval_steps_per_second": 2.873
12
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60d6d87ae3bbd38bd10f9e4dd805847032e26dc05af8f15e035615ee175b3a47
3
+ size 1110055537
sentencepiece.bpe.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
3
+ size 5069051
special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "cls_token": "<s>",
4
+ "eos_token": "</s>",
5
+ "mask_token": {
6
+ "content": "<mask>",
7
+ "lstrip": true,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "pad_token": "<pad>",
13
+ "sep_token": "</s>",
14
+ "unk_token": "<unk>"
15
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2c509a525eb51aebb33fb59c24ee923c1d4c1db23c3ae81fe05ccf354084f7b
3
+ size 17082758
tokenizer_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "cls_token": "<s>",
4
+ "eos_token": "</s>",
5
+ "mask_token": {
6
+ "__type": "AddedToken",
7
+ "content": "<mask>",
8
+ "lstrip": true,
9
+ "normalized": true,
10
+ "rstrip": false,
11
+ "single_word": false
12
+ },
13
+ "model_max_length": 512,
14
+ "name_or_path": "xlm-roberta-base",
15
+ "pad_token": "<pad>",
16
+ "sep_token": "</s>",
17
+ "special_tokens_map_file": null,
18
+ "token": null,
19
+ "tokenizer_class": "XLMRobertaTokenizer",
20
+ "unk_token": "<unk>"
21
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 39.99,
3
+ "train_loss": 0.19011780440807344,
4
+ "train_runtime": 1964.637,
5
+ "train_samples": 62189,
6
+ "train_samples_per_second": 1266.168,
7
+ "train_steps_per_second": 1.222
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,529 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.9695497407877142,
3
+ "best_model_checkpoint": "models/pos_final_xlm_en/checkpoint-960",
4
+ "epoch": 39.98765432098765,
5
+ "global_step": 2400,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.99,
12
+ "eval_accuracy": 0.3035943640371897,
13
+ "eval_f1": 0.2008264425810438,
14
+ "eval_loss": 3.0061752796173096,
15
+ "eval_precision": 0.24116944979086247,
16
+ "eval_recall": 0.17204639246429285,
17
+ "eval_runtime": 8.5419,
18
+ "eval_samples_per_second": 808.95,
19
+ "eval_steps_per_second": 3.161,
20
+ "step": 60
21
+ },
22
+ {
23
+ "epoch": 1.99,
24
+ "eval_accuracy": 0.8970254640723346,
25
+ "eval_f1": 0.8625090892213438,
26
+ "eval_loss": 0.5353450775146484,
27
+ "eval_precision": 0.8698569221887629,
28
+ "eval_recall": 0.8552843532822976,
29
+ "eval_runtime": 8.8286,
30
+ "eval_samples_per_second": 782.684,
31
+ "eval_steps_per_second": 3.058,
32
+ "step": 120
33
+ },
34
+ {
35
+ "epoch": 2.99,
36
+ "eval_accuracy": 0.9690916642704239,
37
+ "eval_f1": 0.9565594734295436,
38
+ "eval_loss": 0.13116228580474854,
39
+ "eval_precision": 0.9577732320280538,
40
+ "eval_recall": 0.955348787260482,
41
+ "eval_runtime": 8.7122,
42
+ "eval_samples_per_second": 793.137,
43
+ "eval_steps_per_second": 3.099,
44
+ "step": 180
45
+ },
46
+ {
47
+ "epoch": 3.99,
48
+ "eval_accuracy": 0.9736924502380268,
49
+ "eval_f1": 0.9624591697465074,
50
+ "eval_loss": 0.09810493141412735,
51
+ "eval_precision": 0.9620755729286427,
52
+ "eval_recall": 0.962843072580274,
53
+ "eval_runtime": 8.7346,
54
+ "eval_samples_per_second": 791.107,
55
+ "eval_steps_per_second": 3.091,
56
+ "step": 240
57
+ },
58
+ {
59
+ "epoch": 4.99,
60
+ "eval_accuracy": 0.976024793124381,
61
+ "eval_f1": 0.9655468564286207,
62
+ "eval_loss": 0.08534899353981018,
63
+ "eval_precision": 0.9652285898261429,
64
+ "eval_recall": 0.9658653329855944,
65
+ "eval_runtime": 9.6188,
66
+ "eval_samples_per_second": 718.383,
67
+ "eval_steps_per_second": 2.807,
68
+ "step": 300
69
+ },
70
+ {
71
+ "epoch": 5.99,
72
+ "eval_accuracy": 0.9768554905907537,
73
+ "eval_f1": 0.9665754810234248,
74
+ "eval_loss": 0.07884209603071213,
75
+ "eval_precision": 0.9655618493570116,
76
+ "eval_recall": 0.9675912431155362,
77
+ "eval_runtime": 8.6507,
78
+ "eval_samples_per_second": 798.781,
79
+ "eval_steps_per_second": 3.121,
80
+ "step": 360
81
+ },
82
+ {
83
+ "epoch": 6.99,
84
+ "eval_accuracy": 0.9775456084858941,
85
+ "eval_f1": 0.9676831206836455,
86
+ "eval_loss": 0.0745365098118782,
87
+ "eval_precision": 0.9664282162120806,
88
+ "eval_recall": 0.9689412883727352,
89
+ "eval_runtime": 8.6592,
90
+ "eval_samples_per_second": 797.994,
91
+ "eval_steps_per_second": 3.118,
92
+ "step": 420
93
+ },
94
+ {
95
+ "epoch": 7.99,
96
+ "eval_accuracy": 0.9780376369852072,
97
+ "eval_f1": 0.9681889956921002,
98
+ "eval_loss": 0.07183900475502014,
99
+ "eval_precision": 0.9675067024128686,
100
+ "eval_recall": 0.9688722519675376,
101
+ "eval_runtime": 8.714,
102
+ "eval_samples_per_second": 792.979,
103
+ "eval_steps_per_second": 3.098,
104
+ "step": 480
105
+ },
106
+ {
107
+ "epoch": 8.33,
108
+ "learning_rate": 4.99e-05,
109
+ "loss": 0.7956,
110
+ "step": 500
111
+ },
112
+ {
113
+ "epoch": 8.99,
114
+ "eval_accuracy": 0.9779226173360172,
115
+ "eval_f1": 0.9680838417498475,
116
+ "eval_loss": 0.07068216055631638,
117
+ "eval_precision": 0.9679168168329358,
118
+ "eval_recall": 0.9682509243207584,
119
+ "eval_runtime": 8.6148,
120
+ "eval_samples_per_second": 802.109,
121
+ "eval_steps_per_second": 3.134,
122
+ "step": 540
123
+ },
124
+ {
125
+ "epoch": 9.99,
126
+ "eval_accuracy": 0.9785999552701364,
127
+ "eval_f1": 0.9690196980148693,
128
+ "eval_loss": 0.06864651292562485,
129
+ "eval_precision": 0.9682478900853156,
130
+ "eval_recall": 0.9697927373701732,
131
+ "eval_runtime": 9.3629,
132
+ "eval_samples_per_second": 738.021,
133
+ "eval_steps_per_second": 2.884,
134
+ "step": 600
135
+ },
136
+ {
137
+ "epoch": 10.99,
138
+ "eval_accuracy": 0.9786830250167737,
139
+ "eval_f1": 0.9691608673146603,
140
+ "eval_loss": 0.06855177879333496,
141
+ "eval_precision": 0.9688896725672537,
142
+ "eval_recall": 0.9694322139208076,
143
+ "eval_runtime": 9.7452,
144
+ "eval_samples_per_second": 709.065,
145
+ "eval_steps_per_second": 2.771,
146
+ "step": 660
147
+ },
148
+ {
149
+ "epoch": 11.99,
150
+ "eval_accuracy": 0.9787149749193265,
151
+ "eval_f1": 0.9692986487728651,
152
+ "eval_loss": 0.06800223141908646,
153
+ "eval_precision": 0.9679492082918993,
154
+ "eval_recall": 0.9706518570792998,
155
+ "eval_runtime": 8.6538,
156
+ "eval_samples_per_second": 798.493,
157
+ "eval_steps_per_second": 3.12,
158
+ "step": 720
159
+ },
160
+ {
161
+ "epoch": 12.99,
162
+ "eval_accuracy": 0.97886833445158,
163
+ "eval_f1": 0.9694478391289856,
164
+ "eval_loss": 0.06851476430892944,
165
+ "eval_precision": 0.968346050526928,
166
+ "eval_recall": 0.9705521378273476,
167
+ "eval_runtime": 8.6111,
168
+ "eval_samples_per_second": 802.455,
169
+ "eval_steps_per_second": 3.135,
170
+ "step": 780
171
+ },
172
+ {
173
+ "epoch": 13.99,
174
+ "eval_accuracy": 0.9788299945685166,
175
+ "eval_f1": 0.9694149394930217,
176
+ "eval_loss": 0.06948242336511612,
177
+ "eval_precision": 0.9688690015554006,
178
+ "eval_recall": 0.969961493027323,
179
+ "eval_runtime": 8.5894,
180
+ "eval_samples_per_second": 804.476,
181
+ "eval_steps_per_second": 3.143,
182
+ "step": 840
183
+ },
184
+ {
185
+ "epoch": 14.99,
186
+ "eval_accuracy": 0.9786382951531998,
187
+ "eval_f1": 0.9690144083384428,
188
+ "eval_loss": 0.07028312981128693,
189
+ "eval_precision": 0.9681685222904575,
190
+ "eval_recall": 0.9698617737753709,
191
+ "eval_runtime": 8.8189,
192
+ "eval_samples_per_second": 783.547,
193
+ "eval_steps_per_second": 3.062,
194
+ "step": 900
195
+ },
196
+ {
197
+ "epoch": 15.99,
198
+ "eval_accuracy": 0.9790089140228122,
199
+ "eval_f1": 0.9695497407877142,
200
+ "eval_loss": 0.07188576459884644,
201
+ "eval_precision": 0.9686181737446121,
202
+ "eval_recall": 0.97048310142215,
203
+ "eval_runtime": 8.5817,
204
+ "eval_samples_per_second": 805.199,
205
+ "eval_steps_per_second": 3.146,
206
+ "step": 960
207
+ },
208
+ {
209
+ "epoch": 16.66,
210
+ "learning_rate": 3.686842105263158e-05,
211
+ "loss": 0.051,
212
+ "step": 1000
213
+ },
214
+ {
215
+ "epoch": 16.99,
216
+ "eval_accuracy": 0.978823604588006,
217
+ "eval_f1": 0.9694240468488908,
218
+ "eval_loss": 0.07346100360155106,
219
+ "eval_precision": 0.9687112241302716,
220
+ "eval_recall": 0.9701379193961616,
221
+ "eval_runtime": 9.6099,
222
+ "eval_samples_per_second": 719.048,
223
+ "eval_steps_per_second": 2.81,
224
+ "step": 1020
225
+ },
226
+ {
227
+ "epoch": 17.99,
228
+ "eval_accuracy": 0.9786830250167737,
229
+ "eval_f1": 0.9692255223920633,
230
+ "eval_loss": 0.07468883693218231,
231
+ "eval_precision": 0.9683683392420959,
232
+ "eval_recall": 0.9700842244143412,
233
+ "eval_runtime": 8.7732,
234
+ "eval_samples_per_second": 787.628,
235
+ "eval_steps_per_second": 3.078,
236
+ "step": 1080
237
+ },
238
+ {
239
+ "epoch": 18.99,
240
+ "eval_accuracy": 0.9785743953480942,
241
+ "eval_f1": 0.9691213222329547,
242
+ "eval_loss": 0.07609081268310547,
243
+ "eval_precision": 0.9685273432113142,
244
+ "eval_recall": 0.9697160302532869,
245
+ "eval_runtime": 8.6582,
246
+ "eval_samples_per_second": 798.088,
247
+ "eval_steps_per_second": 3.118,
248
+ "step": 1140
249
+ },
250
+ {
251
+ "epoch": 19.99,
252
+ "eval_accuracy": 0.9784210358158407,
253
+ "eval_f1": 0.9688278250741372,
254
+ "eval_loss": 0.07741989195346832,
255
+ "eval_precision": 0.9678266327811629,
256
+ "eval_recall": 0.9698310909286163,
257
+ "eval_runtime": 8.7584,
258
+ "eval_samples_per_second": 788.959,
259
+ "eval_steps_per_second": 3.083,
260
+ "step": 1200
261
+ },
262
+ {
263
+ "epoch": 20.99,
264
+ "eval_accuracy": 0.9784977155819675,
265
+ "eval_f1": 0.9689622916379138,
266
+ "eval_loss": 0.0795513391494751,
267
+ "eval_precision": 0.9685464216189702,
268
+ "eval_recall": 0.9693785189389872,
269
+ "eval_runtime": 8.6744,
270
+ "eval_samples_per_second": 796.596,
271
+ "eval_steps_per_second": 3.113,
272
+ "step": 1260
273
+ },
274
+ {
275
+ "epoch": 21.99,
276
+ "eval_accuracy": 0.9785999552701364,
277
+ "eval_f1": 0.9690767468323875,
278
+ "eval_loss": 0.07958221435546875,
279
+ "eval_precision": 0.9680789987369388,
280
+ "eval_recall": 0.9700765537026526,
281
+ "eval_runtime": 8.6133,
282
+ "eval_samples_per_second": 802.246,
283
+ "eval_steps_per_second": 3.135,
284
+ "step": 1320
285
+ },
286
+ {
287
+ "epoch": 22.99,
288
+ "eval_accuracy": 0.9783954758937985,
289
+ "eval_f1": 0.9686965590754671,
290
+ "eval_loss": 0.08197388052940369,
291
+ "eval_precision": 0.968436627924806,
292
+ "eval_recall": 0.9689566297961125,
293
+ "eval_runtime": 8.7277,
294
+ "eval_samples_per_second": 791.73,
295
+ "eval_steps_per_second": 3.094,
296
+ "step": 1380
297
+ },
298
+ {
299
+ "epoch": 23.99,
300
+ "eval_accuracy": 0.978127096712355,
301
+ "eval_f1": 0.9683426871530653,
302
+ "eval_loss": 0.08289676904678345,
303
+ "eval_precision": 0.9678825963675377,
304
+ "eval_recall": 0.9688032155623398,
305
+ "eval_runtime": 8.6527,
306
+ "eval_samples_per_second": 798.592,
307
+ "eval_steps_per_second": 3.12,
308
+ "step": 1440
309
+ },
310
+ {
311
+ "epoch": 24.99,
312
+ "learning_rate": 2.3710526315789475e-05,
313
+ "loss": 0.0318,
314
+ "step": 1500
315
+ },
316
+ {
317
+ "epoch": 24.99,
318
+ "eval_accuracy": 0.9782101664589923,
319
+ "eval_f1": 0.9685605958957412,
320
+ "eval_loss": 0.08542540669441223,
321
+ "eval_precision": 0.96811894087443,
322
+ "eval_recall": 0.9690026540662443,
323
+ "eval_runtime": 8.7516,
324
+ "eval_samples_per_second": 789.565,
325
+ "eval_steps_per_second": 3.085,
326
+ "step": 1500
327
+ },
328
+ {
329
+ "epoch": 25.99,
330
+ "eval_accuracy": 0.9781526566343972,
331
+ "eval_f1": 0.9684325094947744,
332
+ "eval_loss": 0.08812534809112549,
333
+ "eval_precision": 0.9676870878552774,
334
+ "eval_recall": 0.9691790804350827,
335
+ "eval_runtime": 9.0034,
336
+ "eval_samples_per_second": 767.488,
337
+ "eval_steps_per_second": 2.999,
338
+ "step": 1560
339
+ },
340
+ {
341
+ "epoch": 26.99,
342
+ "eval_accuracy": 0.9782548963225662,
343
+ "eval_f1": 0.9684803649117427,
344
+ "eval_loss": 0.08933103829622269,
345
+ "eval_precision": 0.9679127176886124,
346
+ "eval_recall": 0.9690486783363761,
347
+ "eval_runtime": 8.7744,
348
+ "eval_samples_per_second": 787.518,
349
+ "eval_steps_per_second": 3.077,
350
+ "step": 1620
351
+ },
352
+ {
353
+ "epoch": 27.99,
354
+ "eval_accuracy": 0.9780951468098023,
355
+ "eval_f1": 0.9683360927152317,
356
+ "eval_loss": 0.090970478951931,
357
+ "eval_precision": 0.9676092584366048,
358
+ "eval_recall": 0.9690640197597533,
359
+ "eval_runtime": 8.5971,
360
+ "eval_samples_per_second": 803.763,
361
+ "eval_steps_per_second": 3.141,
362
+ "step": 1680
363
+ },
364
+ {
365
+ "epoch": 28.99,
366
+ "eval_accuracy": 0.9782612863030767,
367
+ "eval_f1": 0.9684535086171853,
368
+ "eval_loss": 0.09189366549253464,
369
+ "eval_precision": 0.9683569544143813,
370
+ "eval_recall": 0.9685500820766151,
371
+ "eval_runtime": 8.7669,
372
+ "eval_samples_per_second": 788.195,
373
+ "eval_steps_per_second": 3.08,
374
+ "step": 1740
375
+ },
376
+ {
377
+ "epoch": 29.99,
378
+ "eval_accuracy": 0.9780759768682705,
379
+ "eval_f1": 0.9681861749031936,
380
+ "eval_loss": 0.09329535067081451,
381
+ "eval_precision": 0.9678225410841305,
382
+ "eval_recall": 0.9685500820766151,
383
+ "eval_runtime": 8.9984,
384
+ "eval_samples_per_second": 767.912,
385
+ "eval_steps_per_second": 3.001,
386
+ "step": 1800
387
+ },
388
+ {
389
+ "epoch": 30.99,
390
+ "eval_accuracy": 0.9780887568292916,
391
+ "eval_f1": 0.968276115855809,
392
+ "eval_loss": 0.0947079062461853,
393
+ "eval_precision": 0.967741935483871,
394
+ "eval_recall": 0.9688108862740286,
395
+ "eval_runtime": 9.134,
396
+ "eval_samples_per_second": 756.516,
397
+ "eval_steps_per_second": 2.956,
398
+ "step": 1860
399
+ },
400
+ {
401
+ "epoch": 31.99,
402
+ "eval_accuracy": 0.9782804562446085,
403
+ "eval_f1": 0.9685832975657613,
404
+ "eval_loss": 0.09658045321702957,
405
+ "eval_precision": 0.9677970255326318,
406
+ "eval_recall": 0.9693708482272986,
407
+ "eval_runtime": 8.9466,
408
+ "eval_samples_per_second": 772.361,
409
+ "eval_steps_per_second": 3.018,
410
+ "step": 1920
411
+ },
412
+ {
413
+ "epoch": 32.99,
414
+ "eval_accuracy": 0.9780951468098023,
415
+ "eval_f1": 0.9683005734261446,
416
+ "eval_loss": 0.09742453694343567,
417
+ "eval_precision": 0.967721916611759,
418
+ "eval_recall": 0.9688799226792262,
419
+ "eval_runtime": 9.0098,
420
+ "eval_samples_per_second": 766.942,
421
+ "eval_steps_per_second": 2.997,
422
+ "step": 1980
423
+ },
424
+ {
425
+ "epoch": 33.33,
426
+ "learning_rate": 1.055263157894737e-05,
427
+ "loss": 0.0211,
428
+ "step": 2000
429
+ },
430
+ {
431
+ "epoch": 33.99,
432
+ "eval_accuracy": 0.9784274257963513,
433
+ "eval_f1": 0.9688261902936441,
434
+ "eval_loss": 0.09810397773981094,
435
+ "eval_precision": 0.968351035296642,
436
+ "eval_recall": 0.9693018118221008,
437
+ "eval_runtime": 8.8808,
438
+ "eval_samples_per_second": 778.087,
439
+ "eval_steps_per_second": 3.04,
440
+ "step": 2040
441
+ },
442
+ {
443
+ "epoch": 34.99,
444
+ "eval_accuracy": 0.978286846225119,
445
+ "eval_f1": 0.9685722171959579,
446
+ "eval_loss": 0.09894430637359619,
447
+ "eval_precision": 0.9681268488573487,
448
+ "eval_recall": 0.9690179954896215,
449
+ "eval_runtime": 9.0496,
450
+ "eval_samples_per_second": 763.573,
451
+ "eval_steps_per_second": 2.984,
452
+ "step": 2100
453
+ },
454
+ {
455
+ "epoch": 35.99,
456
+ "eval_accuracy": 0.9783507460302246,
457
+ "eval_f1": 0.9687094017421564,
458
+ "eval_loss": 0.10078005492687225,
459
+ "eval_precision": 0.9679341374688876,
460
+ "eval_recall": 0.969485908902628,
461
+ "eval_runtime": 8.9188,
462
+ "eval_samples_per_second": 774.772,
463
+ "eval_steps_per_second": 3.027,
464
+ "step": 2160
465
+ },
466
+ {
467
+ "epoch": 36.99,
468
+ "eval_accuracy": 0.9782229464200134,
469
+ "eval_f1": 0.9684928880880267,
470
+ "eval_loss": 0.10152223706245422,
471
+ "eval_precision": 0.9681291390728477,
472
+ "eval_recall": 0.9688569105441602,
473
+ "eval_runtime": 8.9289,
474
+ "eval_samples_per_second": 773.895,
475
+ "eval_steps_per_second": 3.024,
476
+ "step": 2220
477
+ },
478
+ {
479
+ "epoch": 37.99,
480
+ "eval_accuracy": 0.97806958688776,
481
+ "eval_f1": 0.9682817728476643,
482
+ "eval_loss": 0.10151796787977219,
483
+ "eval_precision": 0.9676920130243248,
484
+ "eval_recall": 0.9688722519675376,
485
+ "eval_runtime": 9.2785,
486
+ "eval_samples_per_second": 744.734,
487
+ "eval_steps_per_second": 2.91,
488
+ "step": 2280
489
+ },
490
+ {
491
+ "epoch": 38.99,
492
+ "eval_accuracy": 0.9781654365954184,
493
+ "eval_f1": 0.9684071725914399,
494
+ "eval_loss": 0.10238787531852722,
495
+ "eval_precision": 0.9678506849734898,
496
+ "eval_recall": 0.9689643005078011,
497
+ "eval_runtime": 8.88,
498
+ "eval_samples_per_second": 778.153,
499
+ "eval_steps_per_second": 3.041,
500
+ "step": 2340
501
+ },
502
+ {
503
+ "epoch": 39.99,
504
+ "eval_accuracy": 0.9782165564395029,
505
+ "eval_f1": 0.9685044199615122,
506
+ "eval_loss": 0.10218308120965958,
507
+ "eval_precision": 0.9680220083374204,
508
+ "eval_recall": 0.968987312642867,
509
+ "eval_runtime": 9.7749,
510
+ "eval_samples_per_second": 706.909,
511
+ "eval_steps_per_second": 2.762,
512
+ "step": 2400
513
+ },
514
+ {
515
+ "epoch": 39.99,
516
+ "step": 2400,
517
+ "total_flos": 1.3719917000335334e+17,
518
+ "train_loss": 0.19011780440807344,
519
+ "train_runtime": 1964.637,
520
+ "train_samples_per_second": 1266.168,
521
+ "train_steps_per_second": 1.222
522
+ }
523
+ ],
524
+ "max_steps": 2400,
525
+ "num_train_epochs": 40,
526
+ "total_flos": 1.3719917000335334e+17,
527
+ "trial_name": null,
528
+ "trial_params": null
529
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfe30f9636ac9afcec5398df38fe874cf42b23a0ee7f4e32aab9bbb6aae85244
3
+ size 3439