adalbertojunior commited on
Commit
ba43331
·
verified ·
1 Parent(s): c05b2f1

Upload 8 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "adalbertojunior/distill-bge-retromae",
3
+ "architectures": [
4
+ "XLMRobertaForTokenClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "classifier_dropout": null,
9
+ "eos_token_id": 2,
10
+ "finetuning_task": "ner",
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.1,
13
+ "hidden_size": 1024,
14
+ "id2label": {
15
+ "0": "B-Segmento",
16
+ "1": "I-Segmento"
17
+ },
18
+ "initializer_range": 0.02,
19
+ "intermediate_size": 4096,
20
+ "label2id": {
21
+ "B-Segmento": 0,
22
+ "I-Segmento": 1
23
+ },
24
+ "layer_norm_eps": 1e-05,
25
+ "max_position_embeddings": 8194,
26
+ "model_type": "xlm-roberta",
27
+ "num_attention_heads": 16,
28
+ "num_hidden_layers": 12,
29
+ "output_past": true,
30
+ "pad_token_id": 1,
31
+ "position_embedding_type": "absolute",
32
+ "torch_dtype": "bfloat16",
33
+ "transformers_version": "4.46.1",
34
+ "type_vocab_size": 1,
35
+ "use_cache": true,
36
+ "vocab_size": 250002
37
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9dfb87c1efad6a2ff5360926a9f0e7141f16dfe73d012faf1926d916ecaacb1b
3
+ size 831129388
sentencepiece.bpe.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
3
+ size 5069051
special_tokens_map.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "<s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "<mask>",
25
+ "lstrip": true,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": {
31
+ "content": "<pad>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "</s>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "<unk>",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
51
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95c87c2955c3edd5cce0a9d3ea8d1e49b444b9f75b82ab9e68f7005cd6371142
3
+ size 17082898
tokenizer_config.json ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<s>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<pad>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "</s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "<unk>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "250001": {
36
+ "content": "<mask>",
37
+ "lstrip": true,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "bos_token": "<s>",
45
+ "clean_up_tokenization_spaces": true,
46
+ "cls_token": "<s>",
47
+ "eos_token": "</s>",
48
+ "mask_token": "<mask>",
49
+ "max_length": 1024,
50
+ "model_max_length": 8192,
51
+ "pad_to_multiple_of": null,
52
+ "pad_token": "<pad>",
53
+ "pad_token_type_id": 0,
54
+ "padding_side": "right",
55
+ "sep_token": "</s>",
56
+ "stride": 0,
57
+ "tokenizer_class": "XLMRobertaTokenizer",
58
+ "truncation_side": "right",
59
+ "truncation_strategy": "longest_first",
60
+ "unk_token": "<unk>"
61
+ }
trainer_state.json ADDED
@@ -0,0 +1,1038 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.9118125313435558,
5
+ "eval_steps": 100,
6
+ "global_step": 7500,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.012157500417914077,
13
+ "eval_accuracy": 0.9982292163897571,
14
+ "eval_f1": 0.5714285714285715,
15
+ "eval_loss": NaN,
16
+ "eval_precision": 0.5992217898832685,
17
+ "eval_recall": 0.5460992907801419,
18
+ "eval_runtime": 4.2246,
19
+ "eval_samples_per_second": 12.072,
20
+ "eval_steps_per_second": 12.072,
21
+ "step": 100
22
+ },
23
+ {
24
+ "epoch": 0.024315000835828153,
25
+ "eval_accuracy": 0.9979520502594583,
26
+ "eval_f1": 0.561128526645768,
27
+ "eval_loss": NaN,
28
+ "eval_precision": 0.5028089887640449,
29
+ "eval_recall": 0.6347517730496454,
30
+ "eval_runtime": 4.0691,
31
+ "eval_samples_per_second": 12.534,
32
+ "eval_steps_per_second": 12.534,
33
+ "step": 200
34
+ },
35
+ {
36
+ "epoch": 0.03647250125374223,
37
+ "eval_accuracy": 0.9982908088631569,
38
+ "eval_f1": 0.6003401360544217,
39
+ "eval_loss": NaN,
40
+ "eval_precision": 0.576797385620915,
41
+ "eval_recall": 0.625886524822695,
42
+ "eval_runtime": 3.8202,
43
+ "eval_samples_per_second": 13.35,
44
+ "eval_steps_per_second": 13.35,
45
+ "step": 300
46
+ },
47
+ {
48
+ "epoch": 0.048630001671656306,
49
+ "eval_accuracy": 0.9983370032182067,
50
+ "eval_f1": 0.5949579831932772,
51
+ "eval_loss": NaN,
52
+ "eval_precision": 0.5654952076677316,
53
+ "eval_recall": 0.6276595744680851,
54
+ "eval_runtime": 3.892,
55
+ "eval_samples_per_second": 13.104,
56
+ "eval_steps_per_second": 13.104,
57
+ "step": 400
58
+ },
59
+ {
60
+ "epoch": 0.060787502089570386,
61
+ "grad_norm": 0.56640625,
62
+ "learning_rate": 9.39209726443769e-05,
63
+ "loss": 0.0667,
64
+ "step": 500
65
+ },
66
+ {
67
+ "epoch": 0.060787502089570386,
68
+ "eval_accuracy": 0.9980906333246077,
69
+ "eval_f1": 0.5690789473684211,
70
+ "eval_loss": NaN,
71
+ "eval_precision": 0.5306748466257669,
72
+ "eval_recall": 0.6134751773049646,
73
+ "eval_runtime": 3.9741,
74
+ "eval_samples_per_second": 12.833,
75
+ "eval_steps_per_second": 12.833,
76
+ "step": 500
77
+ },
78
+ {
79
+ "epoch": 0.07294500250748447,
80
+ "eval_accuracy": 0.9981522257980074,
81
+ "eval_f1": 0.5753646677471637,
82
+ "eval_loss": NaN,
83
+ "eval_precision": 0.5298507462686567,
84
+ "eval_recall": 0.6294326241134752,
85
+ "eval_runtime": 3.9469,
86
+ "eval_samples_per_second": 12.922,
87
+ "eval_steps_per_second": 12.922,
88
+ "step": 600
89
+ },
90
+ {
91
+ "epoch": 0.08510250292539853,
92
+ "eval_accuracy": 0.9983524013365567,
93
+ "eval_f1": 0.608108108108108,
94
+ "eval_loss": NaN,
95
+ "eval_precision": 0.5806451612903226,
96
+ "eval_recall": 0.6382978723404256,
97
+ "eval_runtime": 4.9419,
98
+ "eval_samples_per_second": 10.32,
99
+ "eval_steps_per_second": 10.32,
100
+ "step": 700
101
+ },
102
+ {
103
+ "epoch": 0.09726000334331261,
104
+ "eval_accuracy": 0.9978904577860586,
105
+ "eval_f1": 0.5460317460317461,
106
+ "eval_loss": NaN,
107
+ "eval_precision": 0.4942528735632184,
108
+ "eval_recall": 0.6099290780141844,
109
+ "eval_runtime": 3.9464,
110
+ "eval_samples_per_second": 12.923,
111
+ "eval_steps_per_second": 12.923,
112
+ "step": 800
113
+ },
114
+ {
115
+ "epoch": 0.10941750376122669,
116
+ "eval_accuracy": 0.9981984201530573,
117
+ "eval_f1": 0.5831960461285007,
118
+ "eval_loss": NaN,
119
+ "eval_precision": 0.5446153846153846,
120
+ "eval_recall": 0.6276595744680851,
121
+ "eval_runtime": 3.9207,
122
+ "eval_samples_per_second": 13.008,
123
+ "eval_steps_per_second": 13.008,
124
+ "step": 900
125
+ },
126
+ {
127
+ "epoch": 0.12157500417914077,
128
+ "grad_norm": 0.49609375,
129
+ "learning_rate": 8.78419452887538e-05,
130
+ "loss": 0.0273,
131
+ "step": 1000
132
+ },
133
+ {
134
+ "epoch": 0.12157500417914077,
135
+ "eval_accuracy": 0.9981368276796575,
136
+ "eval_f1": 0.5692821368948245,
137
+ "eval_loss": NaN,
138
+ "eval_precision": 0.5378548895899053,
139
+ "eval_recall": 0.6046099290780141,
140
+ "eval_runtime": 3.9151,
141
+ "eval_samples_per_second": 13.026,
142
+ "eval_steps_per_second": 13.026,
143
+ "step": 1000
144
+ },
145
+ {
146
+ "epoch": 0.13373250459705485,
147
+ "eval_accuracy": 0.9985679749934558,
148
+ "eval_f1": 0.6378091872791519,
149
+ "eval_loss": NaN,
150
+ "eval_precision": 0.6355633802816901,
151
+ "eval_recall": 0.6400709219858156,
152
+ "eval_runtime": 3.9167,
153
+ "eval_samples_per_second": 13.021,
154
+ "eval_steps_per_second": 13.021,
155
+ "step": 1100
156
+ },
157
+ {
158
+ "epoch": 0.14589000501496893,
159
+ "eval_accuracy": 0.9982446145081071,
160
+ "eval_f1": 0.5888324873096447,
161
+ "eval_loss": NaN,
162
+ "eval_precision": 0.5631067961165048,
163
+ "eval_recall": 0.6170212765957447,
164
+ "eval_runtime": 3.9347,
165
+ "eval_samples_per_second": 12.962,
166
+ "eval_steps_per_second": 12.962,
167
+ "step": 1200
168
+ },
169
+ {
170
+ "epoch": 0.158047505432883,
171
+ "eval_accuracy": 0.9982908088631569,
172
+ "eval_f1": 0.6126418152350082,
173
+ "eval_loss": NaN,
174
+ "eval_precision": 0.564179104477612,
175
+ "eval_recall": 0.6702127659574468,
176
+ "eval_runtime": 3.9694,
177
+ "eval_samples_per_second": 12.848,
178
+ "eval_steps_per_second": 12.848,
179
+ "step": 1300
180
+ },
181
+ {
182
+ "epoch": 0.17020500585079706,
183
+ "eval_accuracy": 0.9983524013365567,
184
+ "eval_f1": 0.6128500823723229,
185
+ "eval_loss": NaN,
186
+ "eval_precision": 0.5723076923076923,
187
+ "eval_recall": 0.6595744680851063,
188
+ "eval_runtime": 3.9587,
189
+ "eval_samples_per_second": 12.883,
190
+ "eval_steps_per_second": 12.883,
191
+ "step": 1400
192
+ },
193
+ {
194
+ "epoch": 0.18236250626871114,
195
+ "grad_norm": 1.453125,
196
+ "learning_rate": 8.17629179331307e-05,
197
+ "loss": 0.0198,
198
+ "step": 1500
199
+ },
200
+ {
201
+ "epoch": 0.18236250626871114,
202
+ "eval_accuracy": 0.9984139938099564,
203
+ "eval_f1": 0.6216216216216217,
204
+ "eval_loss": NaN,
205
+ "eval_precision": 0.5935483870967742,
206
+ "eval_recall": 0.6524822695035462,
207
+ "eval_runtime": 3.9699,
208
+ "eval_samples_per_second": 12.847,
209
+ "eval_steps_per_second": 12.847,
210
+ "step": 1500
211
+ },
212
+ {
213
+ "epoch": 0.19452000668662522,
214
+ "eval_accuracy": 0.9983985956916065,
215
+ "eval_f1": 0.6113013698630136,
216
+ "eval_loss": NaN,
217
+ "eval_precision": 0.5910596026490066,
218
+ "eval_recall": 0.6329787234042553,
219
+ "eval_runtime": 3.9634,
220
+ "eval_samples_per_second": 12.868,
221
+ "eval_steps_per_second": 12.868,
222
+ "step": 1600
223
+ },
224
+ {
225
+ "epoch": 0.2066775071045393,
226
+ "eval_accuracy": 0.998537178756756,
227
+ "eval_f1": 0.6450511945392491,
228
+ "eval_loss": NaN,
229
+ "eval_precision": 0.6217105263157895,
230
+ "eval_recall": 0.6702127659574468,
231
+ "eval_runtime": 3.9348,
232
+ "eval_samples_per_second": 12.961,
233
+ "eval_steps_per_second": 12.961,
234
+ "step": 1700
235
+ },
236
+ {
237
+ "epoch": 0.21883500752245338,
238
+ "eval_accuracy": 0.9985063825200561,
239
+ "eval_f1": 0.61101243339254,
240
+ "eval_loss": NaN,
241
+ "eval_precision": 0.6120996441281139,
242
+ "eval_recall": 0.6099290780141844,
243
+ "eval_runtime": 3.9403,
244
+ "eval_samples_per_second": 12.943,
245
+ "eval_steps_per_second": 12.943,
246
+ "step": 1800
247
+ },
248
+ {
249
+ "epoch": 0.23099250794036746,
250
+ "eval_accuracy": 0.9985987712301557,
251
+ "eval_f1": 0.638888888888889,
252
+ "eval_loss": NaN,
253
+ "eval_precision": 0.6258503401360545,
254
+ "eval_recall": 0.6524822695035462,
255
+ "eval_runtime": 3.8792,
256
+ "eval_samples_per_second": 13.147,
257
+ "eval_steps_per_second": 13.147,
258
+ "step": 1900
259
+ },
260
+ {
261
+ "epoch": 0.24315000835828154,
262
+ "grad_norm": 0.337890625,
263
+ "learning_rate": 7.56838905775076e-05,
264
+ "loss": 0.015,
265
+ "step": 2000
266
+ },
267
+ {
268
+ "epoch": 0.24315000835828154,
269
+ "eval_accuracy": 0.9983677994549066,
270
+ "eval_f1": 0.6179966044142615,
271
+ "eval_loss": NaN,
272
+ "eval_precision": 0.5928338762214984,
273
+ "eval_recall": 0.6453900709219859,
274
+ "eval_runtime": 3.8941,
275
+ "eval_samples_per_second": 13.097,
276
+ "eval_steps_per_second": 13.097,
277
+ "step": 2000
278
+ },
279
+ {
280
+ "epoch": 0.2553075087761956,
281
+ "eval_accuracy": 0.998521780638406,
282
+ "eval_f1": 0.6296928327645053,
283
+ "eval_loss": NaN,
284
+ "eval_precision": 0.6069078947368421,
285
+ "eval_recall": 0.6542553191489362,
286
+ "eval_runtime": 3.8421,
287
+ "eval_samples_per_second": 13.274,
288
+ "eval_steps_per_second": 13.274,
289
+ "step": 2100
290
+ },
291
+ {
292
+ "epoch": 0.2674650091941097,
293
+ "eval_accuracy": 0.9984755862833562,
294
+ "eval_f1": 0.6352739726027398,
295
+ "eval_loss": NaN,
296
+ "eval_precision": 0.6142384105960265,
297
+ "eval_recall": 0.6578014184397163,
298
+ "eval_runtime": 3.8929,
299
+ "eval_samples_per_second": 13.101,
300
+ "eval_steps_per_second": 13.101,
301
+ "step": 2200
302
+ },
303
+ {
304
+ "epoch": 0.2796225096120238,
305
+ "eval_accuracy": 0.9985679749934558,
306
+ "eval_f1": 0.6507666098807496,
307
+ "eval_loss": NaN,
308
+ "eval_precision": 0.6262295081967213,
309
+ "eval_recall": 0.6773049645390071,
310
+ "eval_runtime": 4.0354,
311
+ "eval_samples_per_second": 12.638,
312
+ "eval_steps_per_second": 12.638,
313
+ "step": 2300
314
+ },
315
+ {
316
+ "epoch": 0.29178001002993786,
317
+ "eval_accuracy": 0.998275410744807,
318
+ "eval_f1": 0.5949152542372882,
319
+ "eval_loss": NaN,
320
+ "eval_precision": 0.5698051948051948,
321
+ "eval_recall": 0.6223404255319149,
322
+ "eval_runtime": 3.9548,
323
+ "eval_samples_per_second": 12.896,
324
+ "eval_steps_per_second": 12.896,
325
+ "step": 2400
326
+ },
327
+ {
328
+ "epoch": 0.30393751044785194,
329
+ "grad_norm": 0.5,
330
+ "learning_rate": 6.96048632218845e-05,
331
+ "loss": 0.0122,
332
+ "step": 2500
333
+ },
334
+ {
335
+ "epoch": 0.30393751044785194,
336
+ "eval_accuracy": 0.9983677994549066,
337
+ "eval_f1": 0.6200657894736843,
338
+ "eval_loss": NaN,
339
+ "eval_precision": 0.5782208588957055,
340
+ "eval_recall": 0.6684397163120568,
341
+ "eval_runtime": 3.9191,
342
+ "eval_samples_per_second": 13.013,
343
+ "eval_steps_per_second": 13.013,
344
+ "step": 2500
345
+ },
346
+ {
347
+ "epoch": 0.316095010865766,
348
+ "eval_accuracy": 0.998537178756756,
349
+ "eval_f1": 0.6379310344827586,
350
+ "eval_loss": NaN,
351
+ "eval_precision": 0.6208053691275168,
352
+ "eval_recall": 0.6560283687943262,
353
+ "eval_runtime": 3.9133,
354
+ "eval_samples_per_second": 13.033,
355
+ "eval_steps_per_second": 13.033,
356
+ "step": 2600
357
+ },
358
+ {
359
+ "epoch": 0.32825251128368005,
360
+ "eval_accuracy": 0.9984293919283064,
361
+ "eval_f1": 0.6284722222222222,
362
+ "eval_loss": NaN,
363
+ "eval_precision": 0.6156462585034014,
364
+ "eval_recall": 0.6418439716312057,
365
+ "eval_runtime": 3.9458,
366
+ "eval_samples_per_second": 12.925,
367
+ "eval_steps_per_second": 12.925,
368
+ "step": 2700
369
+ },
370
+ {
371
+ "epoch": 0.34041001170159413,
372
+ "eval_accuracy": 0.9984293919283064,
373
+ "eval_f1": 0.6254295532646049,
374
+ "eval_loss": NaN,
375
+ "eval_precision": 0.6066666666666667,
376
+ "eval_recall": 0.6453900709219859,
377
+ "eval_runtime": 5.0134,
378
+ "eval_samples_per_second": 10.173,
379
+ "eval_steps_per_second": 10.173,
380
+ "step": 2800
381
+ },
382
+ {
383
+ "epoch": 0.3525675121195082,
384
+ "eval_accuracy": 0.9984601881650063,
385
+ "eval_f1": 0.6185567010309279,
386
+ "eval_loss": NaN,
387
+ "eval_precision": 0.6,
388
+ "eval_recall": 0.6382978723404256,
389
+ "eval_runtime": 3.9121,
390
+ "eval_samples_per_second": 13.037,
391
+ "eval_steps_per_second": 13.037,
392
+ "step": 2900
393
+ },
394
+ {
395
+ "epoch": 0.3647250125374223,
396
+ "grad_norm": 0.47265625,
397
+ "learning_rate": 6.352583586626139e-05,
398
+ "loss": 0.0102,
399
+ "step": 3000
400
+ },
401
+ {
402
+ "epoch": 0.3647250125374223,
403
+ "eval_accuracy": 0.9984601881650063,
404
+ "eval_f1": 0.6175438596491228,
405
+ "eval_loss": NaN,
406
+ "eval_precision": 0.6111111111111112,
407
+ "eval_recall": 0.624113475177305,
408
+ "eval_runtime": 3.9032,
409
+ "eval_samples_per_second": 13.066,
410
+ "eval_steps_per_second": 13.066,
411
+ "step": 3000
412
+ },
413
+ {
414
+ "epoch": 0.37688251295533637,
415
+ "eval_accuracy": 0.9983985956916065,
416
+ "eval_f1": 0.6067615658362988,
417
+ "eval_loss": NaN,
418
+ "eval_precision": 0.6089285714285714,
419
+ "eval_recall": 0.6046099290780141,
420
+ "eval_runtime": 3.9167,
421
+ "eval_samples_per_second": 13.021,
422
+ "eval_steps_per_second": 13.021,
423
+ "step": 3100
424
+ },
425
+ {
426
+ "epoch": 0.38904001337325045,
427
+ "eval_accuracy": 0.9981984201530573,
428
+ "eval_f1": 0.5792163543441226,
429
+ "eval_loss": NaN,
430
+ "eval_precision": 0.5573770491803278,
431
+ "eval_recall": 0.6028368794326241,
432
+ "eval_runtime": 3.9349,
433
+ "eval_samples_per_second": 12.961,
434
+ "eval_steps_per_second": 12.961,
435
+ "step": 3200
436
+ },
437
+ {
438
+ "epoch": 0.40119751379116453,
439
+ "eval_accuracy": 0.998275410744807,
440
+ "eval_f1": 0.6016528925619835,
441
+ "eval_loss": NaN,
442
+ "eval_precision": 0.5634674922600619,
443
+ "eval_recall": 0.6453900709219859,
444
+ "eval_runtime": 5.1154,
445
+ "eval_samples_per_second": 9.97,
446
+ "eval_steps_per_second": 9.97,
447
+ "step": 3300
448
+ },
449
+ {
450
+ "epoch": 0.4133550142090786,
451
+ "eval_accuracy": 0.9983985956916065,
452
+ "eval_f1": 0.6064735945485519,
453
+ "eval_loss": NaN,
454
+ "eval_precision": 0.5836065573770491,
455
+ "eval_recall": 0.6312056737588653,
456
+ "eval_runtime": 3.9386,
457
+ "eval_samples_per_second": 12.949,
458
+ "eval_steps_per_second": 12.949,
459
+ "step": 3400
460
+ },
461
+ {
462
+ "epoch": 0.4255125146269927,
463
+ "grad_norm": 0.63671875,
464
+ "learning_rate": 5.744680851063831e-05,
465
+ "loss": 0.0087,
466
+ "step": 3500
467
+ },
468
+ {
469
+ "epoch": 0.4255125146269927,
470
+ "eval_accuracy": 0.9984601881650063,
471
+ "eval_f1": 0.6166950596252129,
472
+ "eval_loss": NaN,
473
+ "eval_precision": 0.5934426229508196,
474
+ "eval_recall": 0.6418439716312057,
475
+ "eval_runtime": 3.9507,
476
+ "eval_samples_per_second": 12.909,
477
+ "eval_steps_per_second": 12.909,
478
+ "step": 3500
479
+ },
480
+ {
481
+ "epoch": 0.43767001504490677,
482
+ "eval_accuracy": 0.9985525768751059,
483
+ "eval_f1": 0.6412859560067682,
484
+ "eval_loss": NaN,
485
+ "eval_precision": 0.6132686084142395,
486
+ "eval_recall": 0.6719858156028369,
487
+ "eval_runtime": 3.9696,
488
+ "eval_samples_per_second": 12.848,
489
+ "eval_steps_per_second": 12.848,
490
+ "step": 3600
491
+ },
492
+ {
493
+ "epoch": 0.44982751546282085,
494
+ "eval_accuracy": 0.998521780638406,
495
+ "eval_f1": 0.638655462184874,
496
+ "eval_loss": NaN,
497
+ "eval_precision": 0.6070287539936102,
498
+ "eval_recall": 0.6737588652482269,
499
+ "eval_runtime": 3.9563,
500
+ "eval_samples_per_second": 12.891,
501
+ "eval_steps_per_second": 12.891,
502
+ "step": 3700
503
+ },
504
+ {
505
+ "epoch": 0.46198501588073493,
506
+ "eval_accuracy": 0.9984909844017061,
507
+ "eval_f1": 0.6237288135593221,
508
+ "eval_loss": NaN,
509
+ "eval_precision": 0.5974025974025974,
510
+ "eval_recall": 0.6524822695035462,
511
+ "eval_runtime": 3.9225,
512
+ "eval_samples_per_second": 13.002,
513
+ "eval_steps_per_second": 13.002,
514
+ "step": 3800
515
+ },
516
+ {
517
+ "epoch": 0.474142516298649,
518
+ "eval_accuracy": 0.9984601881650063,
519
+ "eval_f1": 0.6193656093489149,
520
+ "eval_loss": NaN,
521
+ "eval_precision": 0.5851735015772871,
522
+ "eval_recall": 0.6578014184397163,
523
+ "eval_runtime": 3.8167,
524
+ "eval_samples_per_second": 13.362,
525
+ "eval_steps_per_second": 13.362,
526
+ "step": 3900
527
+ },
528
+ {
529
+ "epoch": 0.4863000167165631,
530
+ "grad_norm": 0.2578125,
531
+ "learning_rate": 5.13677811550152e-05,
532
+ "loss": 0.0074,
533
+ "step": 4000
534
+ },
535
+ {
536
+ "epoch": 0.4863000167165631,
537
+ "eval_accuracy": 0.9984293919283064,
538
+ "eval_f1": 0.6102564102564102,
539
+ "eval_loss": NaN,
540
+ "eval_precision": 0.5891089108910891,
541
+ "eval_recall": 0.6329787234042553,
542
+ "eval_runtime": 3.9064,
543
+ "eval_samples_per_second": 13.055,
544
+ "eval_steps_per_second": 13.055,
545
+ "step": 4000
546
+ },
547
+ {
548
+ "epoch": 0.49845751713447717,
549
+ "eval_accuracy": 0.9983677994549066,
550
+ "eval_f1": 0.5982905982905984,
551
+ "eval_loss": NaN,
552
+ "eval_precision": 0.5775577557755776,
553
+ "eval_recall": 0.6205673758865248,
554
+ "eval_runtime": 3.8733,
555
+ "eval_samples_per_second": 13.167,
556
+ "eval_steps_per_second": 13.167,
557
+ "step": 4100
558
+ },
559
+ {
560
+ "epoch": 0.5106150175523912,
561
+ "eval_accuracy": 0.9985525768751059,
562
+ "eval_f1": 0.6456558773424191,
563
+ "eval_loss": NaN,
564
+ "eval_precision": 0.6213114754098361,
565
+ "eval_recall": 0.6719858156028369,
566
+ "eval_runtime": 3.9945,
567
+ "eval_samples_per_second": 12.767,
568
+ "eval_steps_per_second": 12.767,
569
+ "step": 4200
570
+ },
571
+ {
572
+ "epoch": 0.5227725179703053,
573
+ "eval_accuracy": 0.9984755862833562,
574
+ "eval_f1": 0.6348408710217756,
575
+ "eval_loss": NaN,
576
+ "eval_precision": 0.6015873015873016,
577
+ "eval_recall": 0.6719858156028369,
578
+ "eval_runtime": 3.9449,
579
+ "eval_samples_per_second": 12.928,
580
+ "eval_steps_per_second": 12.928,
581
+ "step": 4300
582
+ },
583
+ {
584
+ "epoch": 0.5349300183882194,
585
+ "eval_accuracy": 0.9984447900466563,
586
+ "eval_f1": 0.6231155778894472,
587
+ "eval_loss": NaN,
588
+ "eval_precision": 0.5904761904761905,
589
+ "eval_recall": 0.6595744680851063,
590
+ "eval_runtime": 3.9223,
591
+ "eval_samples_per_second": 13.003,
592
+ "eval_steps_per_second": 13.003,
593
+ "step": 4400
594
+ },
595
+ {
596
+ "epoch": 0.5470875188061335,
597
+ "grad_norm": 0.796875,
598
+ "learning_rate": 4.52887537993921e-05,
599
+ "loss": 0.0069,
600
+ "step": 4500
601
+ },
602
+ {
603
+ "epoch": 0.5470875188061335,
604
+ "eval_accuracy": 0.9983985956916065,
605
+ "eval_f1": 0.6139767054908487,
606
+ "eval_loss": NaN,
607
+ "eval_precision": 0.5783699059561128,
608
+ "eval_recall": 0.6542553191489362,
609
+ "eval_runtime": 3.8802,
610
+ "eval_samples_per_second": 13.144,
611
+ "eval_steps_per_second": 13.144,
612
+ "step": 4500
613
+ },
614
+ {
615
+ "epoch": 0.5592450192240476,
616
+ "eval_accuracy": 0.9983216050998568,
617
+ "eval_f1": 0.5902192242833052,
618
+ "eval_loss": NaN,
619
+ "eval_precision": 0.5627009646302251,
620
+ "eval_recall": 0.6205673758865248,
621
+ "eval_runtime": 3.8007,
622
+ "eval_samples_per_second": 13.418,
623
+ "eval_steps_per_second": 13.418,
624
+ "step": 4600
625
+ },
626
+ {
627
+ "epoch": 0.5714025196419616,
628
+ "eval_accuracy": 0.9984755862833562,
629
+ "eval_f1": 0.6243739565943239,
630
+ "eval_loss": NaN,
631
+ "eval_precision": 0.5899053627760252,
632
+ "eval_recall": 0.6631205673758865,
633
+ "eval_runtime": 5.2589,
634
+ "eval_samples_per_second": 9.698,
635
+ "eval_steps_per_second": 9.698,
636
+ "step": 4700
637
+ },
638
+ {
639
+ "epoch": 0.5835600200598757,
640
+ "eval_accuracy": 0.9984755862833562,
641
+ "eval_f1": 0.6231155778894472,
642
+ "eval_loss": NaN,
643
+ "eval_precision": 0.5904761904761905,
644
+ "eval_recall": 0.6595744680851063,
645
+ "eval_runtime": 3.9226,
646
+ "eval_samples_per_second": 13.001,
647
+ "eval_steps_per_second": 13.001,
648
+ "step": 4800
649
+ },
650
+ {
651
+ "epoch": 0.5957175204777898,
652
+ "eval_accuracy": 0.9984755862833562,
653
+ "eval_f1": 0.6298157453936348,
654
+ "eval_loss": NaN,
655
+ "eval_precision": 0.5968253968253968,
656
+ "eval_recall": 0.6666666666666666,
657
+ "eval_runtime": 3.9435,
658
+ "eval_samples_per_second": 12.933,
659
+ "eval_steps_per_second": 12.933,
660
+ "step": 4900
661
+ },
662
+ {
663
+ "epoch": 0.6078750208957039,
664
+ "grad_norm": 0.298828125,
665
+ "learning_rate": 3.9209726443769e-05,
666
+ "loss": 0.0065,
667
+ "step": 5000
668
+ },
669
+ {
670
+ "epoch": 0.6078750208957039,
671
+ "eval_accuracy": 0.9984293919283064,
672
+ "eval_f1": 0.6074450084602369,
673
+ "eval_loss": NaN,
674
+ "eval_precision": 0.580906148867314,
675
+ "eval_recall": 0.6365248226950354,
676
+ "eval_runtime": 3.9158,
677
+ "eval_samples_per_second": 13.024,
678
+ "eval_steps_per_second": 13.024,
679
+ "step": 5000
680
+ },
681
+ {
682
+ "epoch": 0.620032521313618,
683
+ "eval_accuracy": 0.9984909844017061,
684
+ "eval_f1": 0.6282271944922547,
685
+ "eval_loss": NaN,
686
+ "eval_precision": 0.6103678929765887,
687
+ "eval_recall": 0.6471631205673759,
688
+ "eval_runtime": 3.9051,
689
+ "eval_samples_per_second": 13.06,
690
+ "eval_steps_per_second": 13.06,
691
+ "step": 5100
692
+ },
693
+ {
694
+ "epoch": 0.632190021731532,
695
+ "eval_accuracy": 0.9984139938099564,
696
+ "eval_f1": 0.6184873949579831,
697
+ "eval_loss": NaN,
698
+ "eval_precision": 0.5878594249201278,
699
+ "eval_recall": 0.6524822695035462,
700
+ "eval_runtime": 3.9165,
701
+ "eval_samples_per_second": 13.022,
702
+ "eval_steps_per_second": 13.022,
703
+ "step": 5200
704
+ },
705
+ {
706
+ "epoch": 0.6443475221494461,
707
+ "eval_accuracy": 0.9984139938099564,
708
+ "eval_f1": 0.6115843270868824,
709
+ "eval_loss": NaN,
710
+ "eval_precision": 0.5885245901639344,
711
+ "eval_recall": 0.6365248226950354,
712
+ "eval_runtime": 3.9234,
713
+ "eval_samples_per_second": 12.999,
714
+ "eval_steps_per_second": 12.999,
715
+ "step": 5300
716
+ },
717
+ {
718
+ "epoch": 0.6565050225673601,
719
+ "eval_accuracy": 0.9985063825200561,
720
+ "eval_f1": 0.6408094435075886,
721
+ "eval_loss": NaN,
722
+ "eval_precision": 0.6109324758842444,
723
+ "eval_recall": 0.6737588652482269,
724
+ "eval_runtime": 3.9194,
725
+ "eval_samples_per_second": 13.012,
726
+ "eval_steps_per_second": 13.012,
727
+ "step": 5400
728
+ },
729
+ {
730
+ "epoch": 0.6686625229852742,
731
+ "grad_norm": 0.40625,
732
+ "learning_rate": 3.31306990881459e-05,
733
+ "loss": 0.0059,
734
+ "step": 5500
735
+ },
736
+ {
737
+ "epoch": 0.6686625229852742,
738
+ "eval_accuracy": 0.998537178756756,
739
+ "eval_f1": 0.6480541455160745,
740
+ "eval_loss": NaN,
741
+ "eval_precision": 0.6197411003236246,
742
+ "eval_recall": 0.6790780141843972,
743
+ "eval_runtime": 3.9129,
744
+ "eval_samples_per_second": 13.034,
745
+ "eval_steps_per_second": 13.034,
746
+ "step": 5500
747
+ },
748
+ {
749
+ "epoch": 0.6808200234031883,
750
+ "eval_accuracy": 0.9984601881650063,
751
+ "eval_f1": 0.6254295532646049,
752
+ "eval_loss": NaN,
753
+ "eval_precision": 0.6066666666666667,
754
+ "eval_recall": 0.6453900709219859,
755
+ "eval_runtime": 5.213,
756
+ "eval_samples_per_second": 9.783,
757
+ "eval_steps_per_second": 9.783,
758
+ "step": 5600
759
+ },
760
+ {
761
+ "epoch": 0.6929775238211023,
762
+ "eval_accuracy": 0.9984909844017061,
763
+ "eval_f1": 0.626465661641541,
764
+ "eval_loss": NaN,
765
+ "eval_precision": 0.5936507936507937,
766
+ "eval_recall": 0.6631205673758865,
767
+ "eval_runtime": 3.9034,
768
+ "eval_samples_per_second": 13.066,
769
+ "eval_steps_per_second": 13.066,
770
+ "step": 5700
771
+ },
772
+ {
773
+ "epoch": 0.7051350242390164,
774
+ "eval_accuracy": 0.9984601881650063,
775
+ "eval_f1": 0.6192893401015229,
776
+ "eval_loss": NaN,
777
+ "eval_precision": 0.5922330097087378,
778
+ "eval_recall": 0.648936170212766,
779
+ "eval_runtime": 3.9098,
780
+ "eval_samples_per_second": 13.044,
781
+ "eval_steps_per_second": 13.044,
782
+ "step": 5800
783
+ },
784
+ {
785
+ "epoch": 0.7172925246569305,
786
+ "eval_accuracy": 0.9984909844017061,
787
+ "eval_f1": 0.6323777403035413,
788
+ "eval_loss": NaN,
789
+ "eval_precision": 0.6028938906752411,
790
+ "eval_recall": 0.6648936170212766,
791
+ "eval_runtime": 3.9284,
792
+ "eval_samples_per_second": 12.982,
793
+ "eval_steps_per_second": 12.982,
794
+ "step": 5900
795
+ },
796
+ {
797
+ "epoch": 0.7294500250748446,
798
+ "grad_norm": 0.41796875,
799
+ "learning_rate": 2.7051671732522798e-05,
800
+ "loss": 0.0058,
801
+ "step": 6000
802
+ },
803
+ {
804
+ "epoch": 0.7294500250748446,
805
+ "eval_accuracy": 0.9984293919283064,
806
+ "eval_f1": 0.6126279863481229,
807
+ "eval_loss": NaN,
808
+ "eval_precision": 0.5904605263157895,
809
+ "eval_recall": 0.6365248226950354,
810
+ "eval_runtime": 5.2952,
811
+ "eval_samples_per_second": 9.631,
812
+ "eval_steps_per_second": 9.631,
813
+ "step": 6000
814
+ },
815
+ {
816
+ "epoch": 0.7416075254927587,
817
+ "eval_accuracy": 0.9984755862833562,
818
+ "eval_f1": 0.6201022146507666,
819
+ "eval_loss": NaN,
820
+ "eval_precision": 0.5967213114754099,
821
+ "eval_recall": 0.6453900709219859,
822
+ "eval_runtime": 5.2342,
823
+ "eval_samples_per_second": 9.744,
824
+ "eval_steps_per_second": 9.744,
825
+ "step": 6100
826
+ },
827
+ {
828
+ "epoch": 0.7537650259106727,
829
+ "eval_accuracy": 0.9984909844017061,
830
+ "eval_f1": 0.6306913996627319,
831
+ "eval_loss": NaN,
832
+ "eval_precision": 0.6012861736334405,
833
+ "eval_recall": 0.6631205673758865,
834
+ "eval_runtime": 3.8769,
835
+ "eval_samples_per_second": 13.155,
836
+ "eval_steps_per_second": 13.155,
837
+ "step": 6200
838
+ },
839
+ {
840
+ "epoch": 0.7659225263285868,
841
+ "eval_accuracy": 0.9984909844017061,
842
+ "eval_f1": 0.6281833616298812,
843
+ "eval_loss": NaN,
844
+ "eval_precision": 0.6026058631921825,
845
+ "eval_recall": 0.6560283687943262,
846
+ "eval_runtime": 3.8104,
847
+ "eval_samples_per_second": 13.384,
848
+ "eval_steps_per_second": 13.384,
849
+ "step": 6300
850
+ },
851
+ {
852
+ "epoch": 0.7780800267465009,
853
+ "eval_accuracy": 0.9984601881650063,
854
+ "eval_f1": 0.6302521008403361,
855
+ "eval_loss": NaN,
856
+ "eval_precision": 0.5990415335463258,
857
+ "eval_recall": 0.6648936170212766,
858
+ "eval_runtime": 3.9796,
859
+ "eval_samples_per_second": 12.815,
860
+ "eval_steps_per_second": 12.815,
861
+ "step": 6400
862
+ },
863
+ {
864
+ "epoch": 0.790237527164415,
865
+ "grad_norm": 0.5390625,
866
+ "learning_rate": 2.0972644376899697e-05,
867
+ "loss": 0.0055,
868
+ "step": 6500
869
+ },
870
+ {
871
+ "epoch": 0.790237527164415,
872
+ "eval_accuracy": 0.9984601881650063,
873
+ "eval_f1": 0.6302521008403361,
874
+ "eval_loss": NaN,
875
+ "eval_precision": 0.5990415335463258,
876
+ "eval_recall": 0.6648936170212766,
877
+ "eval_runtime": 3.9236,
878
+ "eval_samples_per_second": 12.998,
879
+ "eval_steps_per_second": 12.998,
880
+ "step": 6500
881
+ },
882
+ {
883
+ "epoch": 0.8023950275823291,
884
+ "eval_accuracy": 0.9985063825200561,
885
+ "eval_f1": 0.6256410256410256,
886
+ "eval_loss": NaN,
887
+ "eval_precision": 0.6039603960396039,
888
+ "eval_recall": 0.648936170212766,
889
+ "eval_runtime": 3.9065,
890
+ "eval_samples_per_second": 13.055,
891
+ "eval_steps_per_second": 13.055,
892
+ "step": 6600
893
+ },
894
+ {
895
+ "epoch": 0.8145525280002431,
896
+ "eval_accuracy": 0.9984755862833562,
897
+ "eval_f1": 0.6209262435677531,
898
+ "eval_loss": NaN,
899
+ "eval_precision": 0.6013289036544851,
900
+ "eval_recall": 0.6418439716312057,
901
+ "eval_runtime": 3.883,
902
+ "eval_samples_per_second": 13.134,
903
+ "eval_steps_per_second": 13.134,
904
+ "step": 6700
905
+ },
906
+ {
907
+ "epoch": 0.8267100284181572,
908
+ "eval_accuracy": 0.9984293919283064,
909
+ "eval_f1": 0.6214405360134003,
910
+ "eval_loss": NaN,
911
+ "eval_precision": 0.5888888888888889,
912
+ "eval_recall": 0.6578014184397163,
913
+ "eval_runtime": 3.8972,
914
+ "eval_samples_per_second": 13.086,
915
+ "eval_steps_per_second": 13.086,
916
+ "step": 6800
917
+ },
918
+ {
919
+ "epoch": 0.8388675288360713,
920
+ "eval_accuracy": 0.9984293919283064,
921
+ "eval_f1": 0.6121416526138279,
922
+ "eval_loss": NaN,
923
+ "eval_precision": 0.5836012861736335,
924
+ "eval_recall": 0.6436170212765957,
925
+ "eval_runtime": 3.9065,
926
+ "eval_samples_per_second": 13.055,
927
+ "eval_steps_per_second": 13.055,
928
+ "step": 6900
929
+ },
930
+ {
931
+ "epoch": 0.8510250292539854,
932
+ "grad_norm": 0.5234375,
933
+ "learning_rate": 1.4893617021276596e-05,
934
+ "loss": 0.0052,
935
+ "step": 7000
936
+ },
937
+ {
938
+ "epoch": 0.8510250292539854,
939
+ "eval_accuracy": 0.9984601881650063,
940
+ "eval_f1": 0.6235294117647059,
941
+ "eval_loss": NaN,
942
+ "eval_precision": 0.5926517571884984,
943
+ "eval_recall": 0.6578014184397163,
944
+ "eval_runtime": 4.6336,
945
+ "eval_samples_per_second": 11.007,
946
+ "eval_steps_per_second": 11.007,
947
+ "step": 7000
948
+ },
949
+ {
950
+ "epoch": 0.8631825296718995,
951
+ "eval_accuracy": 0.9984601881650063,
952
+ "eval_f1": 0.6302521008403361,
953
+ "eval_loss": NaN,
954
+ "eval_precision": 0.5990415335463258,
955
+ "eval_recall": 0.6648936170212766,
956
+ "eval_runtime": 3.9703,
957
+ "eval_samples_per_second": 12.845,
958
+ "eval_steps_per_second": 12.845,
959
+ "step": 7100
960
+ },
961
+ {
962
+ "epoch": 0.8753400300898135,
963
+ "eval_accuracy": 0.9984293919283064,
964
+ "eval_f1": 0.6214405360134003,
965
+ "eval_loss": NaN,
966
+ "eval_precision": 0.5888888888888889,
967
+ "eval_recall": 0.6578014184397163,
968
+ "eval_runtime": 3.9965,
969
+ "eval_samples_per_second": 12.761,
970
+ "eval_steps_per_second": 12.761,
971
+ "step": 7200
972
+ },
973
+ {
974
+ "epoch": 0.8874975305077276,
975
+ "eval_accuracy": 0.998537178756756,
976
+ "eval_f1": 0.637137989778535,
977
+ "eval_loss": NaN,
978
+ "eval_precision": 0.6131147540983607,
979
+ "eval_recall": 0.6631205673758865,
980
+ "eval_runtime": 4.0553,
981
+ "eval_samples_per_second": 12.576,
982
+ "eval_steps_per_second": 12.576,
983
+ "step": 7300
984
+ },
985
+ {
986
+ "epoch": 0.8996550309256417,
987
+ "eval_accuracy": 0.9985525768751059,
988
+ "eval_f1": 0.6417657045840408,
989
+ "eval_loss": NaN,
990
+ "eval_precision": 0.6156351791530945,
991
+ "eval_recall": 0.6702127659574468,
992
+ "eval_runtime": 4.0275,
993
+ "eval_samples_per_second": 12.663,
994
+ "eval_steps_per_second": 12.663,
995
+ "step": 7400
996
+ },
997
+ {
998
+ "epoch": 0.9118125313435558,
999
+ "grad_norm": 0.2412109375,
1000
+ "learning_rate": 8.814589665653496e-06,
1001
+ "loss": 0.0052,
1002
+ "step": 7500
1003
+ },
1004
+ {
1005
+ "epoch": 0.9118125313435558,
1006
+ "eval_accuracy": 0.9985525768751059,
1007
+ "eval_f1": 0.6417657045840408,
1008
+ "eval_loss": NaN,
1009
+ "eval_precision": 0.6156351791530945,
1010
+ "eval_recall": 0.6702127659574468,
1011
+ "eval_runtime": 4.1063,
1012
+ "eval_samples_per_second": 12.42,
1013
+ "eval_steps_per_second": 12.42,
1014
+ "step": 7500
1015
+ }
1016
+ ],
1017
+ "logging_steps": 500,
1018
+ "max_steps": 8225,
1019
+ "num_input_tokens_seen": 0,
1020
+ "num_train_epochs": 1,
1021
+ "save_steps": 500,
1022
+ "stateful_callbacks": {
1023
+ "TrainerControl": {
1024
+ "args": {
1025
+ "should_epoch_stop": false,
1026
+ "should_evaluate": false,
1027
+ "should_log": false,
1028
+ "should_save": true,
1029
+ "should_training_stop": false
1030
+ },
1031
+ "attributes": {}
1032
+ }
1033
+ },
1034
+ "total_flos": 6.941757589813818e+16,
1035
+ "train_batch_size": 1,
1036
+ "trial_name": null,
1037
+ "trial_params": null
1038
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c11918fc93cdd5b1d48fbaa6f39e87e39fb16810854310c513e8c4296922bc5e
3
+ size 5216