sdonoso commited on
Commit
9a0d740
1 Parent(s): c514f0a

best albert-tiny model finetuned on ner

Browse files
all_results.json CHANGED
@@ -1,17 +1,17 @@
1
  {
2
- "epoch": 3.0,
3
- "eval_accuracy": 0.9462037830356554,
4
- "eval_f1": 0.7145563624881592,
5
- "eval_loss": 0.17879623174667358,
6
- "eval_precision": 0.7064148587482441,
7
- "eval_recall": 0.7228877176169941,
8
- "eval_runtime": 2.3471,
9
  "eval_samples": 1916,
10
- "eval_samples_per_second": 816.317,
11
- "eval_steps_per_second": 51.126,
12
- "train_loss": 0.1592878557059983,
13
- "train_runtime": 37.0086,
14
  "train_samples": 8324,
15
- "train_samples_per_second": 674.762,
16
- "train_steps_per_second": 42.233
17
  }
 
1
  {
2
+ "epoch": 4.0,
3
+ "eval_accuracy": 0.9473241942924933,
4
+ "eval_f1": 0.7241245438310427,
5
+ "eval_loss": 0.1745937019586563,
6
+ "eval_precision": 0.7045935327893623,
7
+ "eval_recall": 0.7447692061970931,
8
+ "eval_runtime": 1.4947,
9
  "eval_samples": 1916,
10
+ "eval_samples_per_second": 1281.891,
11
+ "eval_steps_per_second": 80.285,
12
+ "train_loss": 0.1328041532904539,
13
+ "train_runtime": 90.5157,
14
  "train_samples": 8324,
15
+ "train_samples_per_second": 367.848,
16
+ "train_steps_per_second": 23.024
17
  }
eval_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
- "epoch": 3.0,
3
- "eval_accuracy": 0.9462037830356554,
4
- "eval_f1": 0.7145563624881592,
5
- "eval_loss": 0.17879623174667358,
6
- "eval_precision": 0.7064148587482441,
7
- "eval_recall": 0.7228877176169941,
8
- "eval_runtime": 2.3471,
9
  "eval_samples": 1916,
10
- "eval_samples_per_second": 816.317,
11
- "eval_steps_per_second": 51.126
12
  }
 
1
  {
2
+ "epoch": 4.0,
3
+ "eval_accuracy": 0.9473241942924933,
4
+ "eval_f1": 0.7241245438310427,
5
+ "eval_loss": 0.1745937019586563,
6
+ "eval_precision": 0.7045935327893623,
7
+ "eval_recall": 0.7447692061970931,
8
+ "eval_runtime": 1.4947,
9
  "eval_samples": 1916,
10
+ "eval_samples_per_second": 1281.891,
11
+ "eval_steps_per_second": 80.285
12
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:895b4e0ce37706a7e4f38ed1afdaf174c3ba1e67c9bd4636341a766c10a2e7b1
3
  size 21011223
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ee5bc516deaa9037d742f40795bb6b274bd5ece153efe88ed66b62a57c9ff79
3
  size 21011223
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 3.0,
3
- "train_loss": 0.1592878557059983,
4
- "train_runtime": 37.0086,
5
  "train_samples": 8324,
6
- "train_samples_per_second": 674.762,
7
- "train_steps_per_second": 42.233
8
  }
 
1
  {
2
+ "epoch": 4.0,
3
+ "train_loss": 0.1328041532904539,
4
+ "train_runtime": 90.5157,
5
  "train_samples": 8324,
6
+ "train_samples_per_second": 367.848,
7
+ "train_steps_per_second": 23.024
8
  }
trainer_state.json CHANGED
@@ -1,43 +1,541 @@
1
  {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 3.0,
5
- "global_step": 1563,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  {
11
  "epoch": 0.96,
12
- "learning_rate": 3.4101087651951375e-05,
13
- "loss": 0.274,
14
  "step": 500
15
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  {
17
  "epoch": 1.92,
18
- "learning_rate": 1.8106206014075498e-05,
19
- "loss": 0.1249,
20
  "step": 1000
21
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  {
23
  "epoch": 2.88,
24
- "learning_rate": 2.1113243761996164e-06,
25
- "loss": 0.0879,
26
  "step": 1500
27
  },
28
  {
29
- "epoch": 3.0,
30
- "step": 1563,
31
- "total_flos": 15403812867840.0,
32
- "train_loss": 0.1592878557059983,
33
- "train_runtime": 37.0086,
34
- "train_samples_per_second": 674.762,
35
- "train_steps_per_second": 42.233
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  }
37
  ],
38
- "max_steps": 1563,
39
- "num_train_epochs": 3,
40
- "total_flos": 15403812867840.0,
41
  "trial_name": null,
42
  "trial_params": null
43
  }
 
1
  {
2
+ "best_metric": 0.1745937019586563,
3
+ "best_model_checkpoint": "/home/sdonoso/data/all_results/ner-c/albert-tiny/epochs_4_bs_16_lr_5e-5/checkpoint-1400",
4
+ "epoch": 4.0,
5
+ "global_step": 2084,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
+ {
11
+ "epoch": 0.1,
12
+ "eval_accuracy": 0.863771172477427,
13
+ "eval_f1": 0.24746022618363045,
14
+ "eval_loss": 0.4690728187561035,
15
+ "eval_precision": 0.3093697579678888,
16
+ "eval_recall": 0.20619709311611564,
17
+ "eval_runtime": 1.4686,
18
+ "eval_samples_per_second": 1304.616,
19
+ "eval_steps_per_second": 81.709,
20
+ "step": 50
21
+ },
22
+ {
23
+ "epoch": 0.19,
24
+ "eval_accuracy": 0.9084722862980293,
25
+ "eval_f1": 0.5024801196756161,
26
+ "eval_loss": 0.3399835526943207,
27
+ "eval_precision": 0.4954968944099379,
28
+ "eval_recall": 0.5096629931320875,
29
+ "eval_runtime": 1.4666,
30
+ "eval_samples_per_second": 1306.423,
31
+ "eval_steps_per_second": 81.822,
32
+ "step": 100
33
+ },
34
+ {
35
+ "epoch": 0.29,
36
+ "eval_accuracy": 0.9171060436301325,
37
+ "eval_f1": 0.5566067486597288,
38
+ "eval_loss": 0.29001280665397644,
39
+ "eval_precision": 0.5495874202086253,
40
+ "eval_recall": 0.5638076984507268,
41
+ "eval_runtime": 1.5018,
42
+ "eval_samples_per_second": 1275.795,
43
+ "eval_steps_per_second": 79.904,
44
+ "step": 150
45
+ },
46
+ {
47
+ "epoch": 0.38,
48
+ "eval_accuracy": 0.9215053054768338,
49
+ "eval_f1": 0.5868562353475006,
50
+ "eval_loss": 0.26263922452926636,
51
+ "eval_precision": 0.5573111174949728,
52
+ "eval_recall": 0.6197093116115636,
53
+ "eval_runtime": 1.4723,
54
+ "eval_samples_per_second": 1301.379,
55
+ "eval_steps_per_second": 81.506,
56
+ "step": 200
57
+ },
58
+ {
59
+ "epoch": 0.48,
60
+ "eval_accuracy": 0.9255750346009358,
61
+ "eval_f1": 0.6203032300593275,
62
+ "eval_loss": 0.25319042801856995,
63
+ "eval_precision": 0.6406808510638298,
64
+ "eval_recall": 0.6011819198211148,
65
+ "eval_runtime": 1.469,
66
+ "eval_samples_per_second": 1304.326,
67
+ "eval_steps_per_second": 81.691,
68
+ "step": 250
69
+ },
70
+ {
71
+ "epoch": 0.58,
72
+ "eval_accuracy": 0.9309793712515653,
73
+ "eval_f1": 0.6298792914249882,
74
+ "eval_loss": 0.22614283859729767,
75
+ "eval_precision": 0.6184392796675389,
76
+ "eval_recall": 0.6417505190864079,
77
+ "eval_runtime": 1.4706,
78
+ "eval_samples_per_second": 1302.847,
79
+ "eval_steps_per_second": 81.598,
80
+ "step": 300
81
+ },
82
+ {
83
+ "epoch": 0.67,
84
+ "eval_accuracy": 0.9334838199433204,
85
+ "eval_f1": 0.6383647798742139,
86
+ "eval_loss": 0.21767833828926086,
87
+ "eval_precision": 0.6285802755844558,
88
+ "eval_recall": 0.6484587126657083,
89
+ "eval_runtime": 1.47,
90
+ "eval_samples_per_second": 1303.418,
91
+ "eval_steps_per_second": 81.634,
92
+ "step": 350
93
+ },
94
+ {
95
+ "epoch": 0.77,
96
+ "eval_accuracy": 0.9345383246556384,
97
+ "eval_f1": 0.6418154877953678,
98
+ "eval_loss": 0.21528242528438568,
99
+ "eval_precision": 0.6270953977445901,
100
+ "eval_recall": 0.657243251876697,
101
+ "eval_runtime": 1.5017,
102
+ "eval_samples_per_second": 1275.883,
103
+ "eval_steps_per_second": 79.909,
104
+ "step": 400
105
+ },
106
+ {
107
+ "epoch": 0.86,
108
+ "eval_accuracy": 0.9356422592763461,
109
+ "eval_f1": 0.6500153704272977,
110
+ "eval_loss": 0.21207766234874725,
111
+ "eval_precision": 0.626425714708932,
112
+ "eval_recall": 0.6754512058776553,
113
+ "eval_runtime": 1.4752,
114
+ "eval_samples_per_second": 1298.836,
115
+ "eval_steps_per_second": 81.347,
116
+ "step": 450
117
+ },
118
  {
119
  "epoch": 0.96,
120
+ "learning_rate": 3.807581573896353e-05,
121
+ "loss": 0.2732,
122
  "step": 500
123
  },
124
+ {
125
+ "epoch": 0.96,
126
+ "eval_accuracy": 0.9400415211230475,
127
+ "eval_f1": 0.6775403935982847,
128
+ "eval_loss": 0.19518034160137177,
129
+ "eval_precision": 0.6507796410709032,
130
+ "eval_recall": 0.7065963903529787,
131
+ "eval_runtime": 1.4979,
132
+ "eval_samples_per_second": 1279.083,
133
+ "eval_steps_per_second": 80.11,
134
+ "step": 500
135
+ },
136
+ {
137
+ "epoch": 1.06,
138
+ "eval_accuracy": 0.9415738482831345,
139
+ "eval_f1": 0.68428873074216,
140
+ "eval_loss": 0.18678122758865356,
141
+ "eval_precision": 0.6703953417100827,
142
+ "eval_recall": 0.6987701645104616,
143
+ "eval_runtime": 1.473,
144
+ "eval_samples_per_second": 1300.753,
145
+ "eval_steps_per_second": 81.467,
146
+ "step": 550
147
+ },
148
+ {
149
+ "epoch": 1.15,
150
+ "eval_accuracy": 0.9421670071838133,
151
+ "eval_f1": 0.6916773778920309,
152
+ "eval_loss": 0.19731050729751587,
153
+ "eval_precision": 0.6958138031356069,
154
+ "eval_recall": 0.6875898418782942,
155
+ "eval_runtime": 1.4718,
156
+ "eval_samples_per_second": 1301.838,
157
+ "eval_steps_per_second": 81.535,
158
+ "step": 600
159
+ },
160
+ {
161
+ "epoch": 1.25,
162
+ "eval_accuracy": 0.9407500164766361,
163
+ "eval_f1": 0.6780254777070065,
164
+ "eval_loss": 0.1952008754014969,
165
+ "eval_precision": 0.6759803143356088,
166
+ "eval_recall": 0.6800830538252676,
167
+ "eval_runtime": 1.5038,
168
+ "eval_samples_per_second": 1274.147,
169
+ "eval_steps_per_second": 79.8,
170
+ "step": 650
171
+ },
172
+ {
173
+ "epoch": 1.34,
174
+ "eval_accuracy": 0.9399097080340078,
175
+ "eval_f1": 0.689172175007869,
176
+ "eval_loss": 0.19703227281570435,
177
+ "eval_precision": 0.679230649914689,
178
+ "eval_recall": 0.6994090400894426,
179
+ "eval_runtime": 1.4697,
180
+ "eval_samples_per_second": 1303.698,
181
+ "eval_steps_per_second": 81.651,
182
+ "step": 700
183
+ },
184
+ {
185
+ "epoch": 1.44,
186
+ "eval_accuracy": 0.9425789230870626,
187
+ "eval_f1": 0.697266881028939,
188
+ "eval_loss": 0.19310028851032257,
189
+ "eval_precision": 0.7018935102767438,
190
+ "eval_recall": 0.6927008465101422,
191
+ "eval_runtime": 1.4705,
192
+ "eval_samples_per_second": 1302.989,
193
+ "eval_steps_per_second": 81.607,
194
+ "step": 750
195
+ },
196
+ {
197
+ "epoch": 1.54,
198
+ "eval_accuracy": 0.9420351940947737,
199
+ "eval_f1": 0.689247483623582,
200
+ "eval_loss": 0.1976134181022644,
201
+ "eval_precision": 0.6894677960684034,
202
+ "eval_recall": 0.6890273119310014,
203
+ "eval_runtime": 1.4714,
204
+ "eval_samples_per_second": 1302.156,
205
+ "eval_steps_per_second": 81.555,
206
+ "step": 800
207
+ },
208
+ {
209
+ "epoch": 1.63,
210
+ "eval_accuracy": 0.9442265867000593,
211
+ "eval_f1": 0.6969575339166734,
212
+ "eval_loss": 0.18812265992164612,
213
+ "eval_precision": 0.7006132989025178,
214
+ "eval_recall": 0.6933397220891231,
215
+ "eval_runtime": 1.4736,
216
+ "eval_samples_per_second": 1300.193,
217
+ "eval_steps_per_second": 81.432,
218
+ "step": 850
219
+ },
220
+ {
221
+ "epoch": 1.73,
222
+ "eval_accuracy": 0.9432050352600013,
223
+ "eval_f1": 0.6990461049284578,
224
+ "eval_loss": 0.18850077688694,
225
+ "eval_precision": 0.69583794904257,
226
+ "eval_recall": 0.702283980194857,
227
+ "eval_runtime": 1.5056,
228
+ "eval_samples_per_second": 1272.624,
229
+ "eval_steps_per_second": 79.705,
230
+ "step": 900
231
+ },
232
+ {
233
+ "epoch": 1.82,
234
+ "eval_accuracy": 0.9444078296974889,
235
+ "eval_f1": 0.7056041617403641,
236
+ "eval_loss": 0.18260571360588074,
237
+ "eval_precision": 0.6965452847805789,
238
+ "eval_recall": 0.7149017728797317,
239
+ "eval_runtime": 1.4756,
240
+ "eval_samples_per_second": 1298.431,
241
+ "eval_steps_per_second": 81.321,
242
+ "step": 950
243
+ },
244
  {
245
  "epoch": 1.92,
246
+ "learning_rate": 2.607965451055662e-05,
247
+ "loss": 0.1235,
248
  "step": 1000
249
  },
250
+ {
251
+ "epoch": 1.92,
252
+ "eval_accuracy": 0.9451163250510776,
253
+ "eval_f1": 0.7066519546027743,
254
+ "eval_loss": 0.18071572482585907,
255
+ "eval_precision": 0.6975260619262487,
256
+ "eval_recall": 0.7160198051429484,
257
+ "eval_runtime": 1.4701,
258
+ "eval_samples_per_second": 1303.293,
259
+ "eval_steps_per_second": 81.626,
260
+ "step": 1000
261
+ },
262
+ {
263
+ "epoch": 2.02,
264
+ "eval_accuracy": 0.9455282409543267,
265
+ "eval_f1": 0.7122369446609509,
266
+ "eval_loss": 0.17818446457386017,
267
+ "eval_precision": 0.6955396559598113,
268
+ "eval_recall": 0.7297556300910397,
269
+ "eval_runtime": 1.4738,
270
+ "eval_samples_per_second": 1300.036,
271
+ "eval_steps_per_second": 81.422,
272
+ "step": 1050
273
+ },
274
+ {
275
+ "epoch": 2.11,
276
+ "eval_accuracy": 0.9441277268832795,
277
+ "eval_f1": 0.7015257469802925,
278
+ "eval_loss": 0.18732576072216034,
279
+ "eval_precision": 0.6980863514154674,
280
+ "eval_recall": 0.7049992014055263,
281
+ "eval_runtime": 1.4745,
282
+ "eval_samples_per_second": 1299.388,
283
+ "eval_steps_per_second": 81.381,
284
+ "step": 1100
285
+ },
286
+ {
287
+ "epoch": 2.21,
288
+ "eval_accuracy": 0.9422493903644632,
289
+ "eval_f1": 0.6907414521705724,
290
+ "eval_loss": 0.19080577790737152,
291
+ "eval_precision": 0.6655315368670418,
292
+ "eval_recall": 0.7179364318798914,
293
+ "eval_runtime": 1.5005,
294
+ "eval_samples_per_second": 1276.934,
295
+ "eval_steps_per_second": 79.975,
296
+ "step": 1150
297
+ },
298
+ {
299
+ "epoch": 2.3,
300
+ "eval_accuracy": 0.945956633493706,
301
+ "eval_f1": 0.712525347059741,
302
+ "eval_loss": 0.181381955742836,
303
+ "eval_precision": 0.696235329980186,
304
+ "eval_recall": 0.7295959111962945,
305
+ "eval_runtime": 1.4739,
306
+ "eval_samples_per_second": 1299.963,
307
+ "eval_steps_per_second": 81.417,
308
+ "step": 1200
309
+ },
310
+ {
311
+ "epoch": 2.4,
312
+ "eval_accuracy": 0.9461873063995254,
313
+ "eval_f1": 0.7136230091232412,
314
+ "eval_loss": 0.17689131200313568,
315
+ "eval_precision": 0.6915929866626704,
316
+ "eval_recall": 0.7371026992493211,
317
+ "eval_runtime": 1.472,
318
+ "eval_samples_per_second": 1301.631,
319
+ "eval_steps_per_second": 81.522,
320
+ "step": 1250
321
+ },
322
+ {
323
+ "epoch": 2.5,
324
+ "eval_accuracy": 0.9468463718447242,
325
+ "eval_f1": 0.7252625760088447,
326
+ "eval_loss": 0.1800052374601364,
327
+ "eval_precision": 0.7172758512964699,
328
+ "eval_recall": 0.7334291646701805,
329
+ "eval_runtime": 1.4724,
330
+ "eval_samples_per_second": 1301.261,
331
+ "eval_steps_per_second": 81.499,
332
+ "step": 1300
333
+ },
334
+ {
335
+ "epoch": 2.59,
336
+ "eval_accuracy": 0.9469617082976339,
337
+ "eval_f1": 0.7245991396167384,
338
+ "eval_loss": 0.17777691781520844,
339
+ "eval_precision": 0.7099938687921521,
340
+ "eval_recall": 0.7398179204599904,
341
+ "eval_runtime": 1.4757,
342
+ "eval_samples_per_second": 1298.39,
343
+ "eval_steps_per_second": 81.319,
344
+ "step": 1350
345
+ },
346
+ {
347
+ "epoch": 2.69,
348
+ "eval_accuracy": 0.9473241942924933,
349
+ "eval_f1": 0.7241245438310427,
350
+ "eval_loss": 0.1745937019586563,
351
+ "eval_precision": 0.7045935327893623,
352
+ "eval_recall": 0.7447692061970931,
353
+ "eval_runtime": 1.5088,
354
+ "eval_samples_per_second": 1269.882,
355
+ "eval_steps_per_second": 79.533,
356
+ "step": 1400
357
+ },
358
+ {
359
+ "epoch": 2.78,
360
+ "eval_accuracy": 0.9480985961906018,
361
+ "eval_f1": 0.7267085687007716,
362
+ "eval_loss": 0.17996077239513397,
363
+ "eval_precision": 0.724001268230818,
364
+ "eval_recall": 0.7294361923015493,
365
+ "eval_runtime": 1.4708,
366
+ "eval_samples_per_second": 1302.711,
367
+ "eval_steps_per_second": 81.589,
368
+ "step": 1450
369
+ },
370
  {
371
  "epoch": 2.88,
372
+ "learning_rate": 1.4083493282149713e-05,
373
+ "loss": 0.083,
374
  "step": 1500
375
  },
376
  {
377
+ "epoch": 2.88,
378
+ "eval_accuracy": 0.9467475120279444,
379
+ "eval_f1": 0.714475847490801,
380
+ "eval_loss": 0.1799446940422058,
381
+ "eval_precision": 0.7007063882063882,
382
+ "eval_recall": 0.7287973167225683,
383
+ "eval_runtime": 1.4712,
384
+ "eval_samples_per_second": 1302.299,
385
+ "eval_steps_per_second": 81.564,
386
+ "step": 1500
387
+ },
388
+ {
389
+ "epoch": 2.98,
390
+ "eval_accuracy": 0.9470440914782838,
391
+ "eval_f1": 0.7189552956257009,
392
+ "eval_loss": 0.18403704464435577,
393
+ "eval_precision": 0.7212666773830574,
394
+ "eval_recall": 0.7166586807219294,
395
+ "eval_runtime": 1.4732,
396
+ "eval_samples_per_second": 1300.543,
397
+ "eval_steps_per_second": 81.454,
398
+ "step": 1550
399
+ },
400
+ {
401
+ "epoch": 3.07,
402
+ "eval_accuracy": 0.9481150728267317,
403
+ "eval_f1": 0.7218129404228059,
404
+ "eval_loss": 0.18616726994514465,
405
+ "eval_precision": 0.7237835233659868,
406
+ "eval_recall": 0.7198530586168344,
407
+ "eval_runtime": 1.4726,
408
+ "eval_samples_per_second": 1301.059,
409
+ "eval_steps_per_second": 81.486,
410
+ "step": 1600
411
+ },
412
+ {
413
+ "epoch": 3.17,
414
+ "eval_accuracy": 0.9484446055493311,
415
+ "eval_f1": 0.7236070954129874,
416
+ "eval_loss": 0.18031327426433563,
417
+ "eval_precision": 0.7083843329253366,
418
+ "eval_recall": 0.7394984826704999,
419
+ "eval_runtime": 1.5062,
420
+ "eval_samples_per_second": 1272.051,
421
+ "eval_steps_per_second": 79.669,
422
+ "step": 1650
423
+ },
424
+ {
425
+ "epoch": 3.26,
426
+ "eval_accuracy": 0.9472582877479734,
427
+ "eval_f1": 0.7165758509042092,
428
+ "eval_loss": 0.1847413182258606,
429
+ "eval_precision": 0.7086847860043737,
430
+ "eval_recall": 0.7246446254591918,
431
+ "eval_runtime": 1.4724,
432
+ "eval_samples_per_second": 1301.279,
433
+ "eval_steps_per_second": 81.5,
434
+ "step": 1700
435
+ },
436
+ {
437
+ "epoch": 3.36,
438
+ "eval_accuracy": 0.9482139326435115,
439
+ "eval_f1": 0.7222266150075118,
440
+ "eval_loss": 0.18235260248184204,
441
+ "eval_precision": 0.7151581584716568,
442
+ "eval_recall": 0.7294361923015493,
443
+ "eval_runtime": 1.4742,
444
+ "eval_samples_per_second": 1299.703,
445
+ "eval_steps_per_second": 81.401,
446
+ "step": 1750
447
+ },
448
+ {
449
+ "epoch": 3.45,
450
+ "eval_accuracy": 0.9486917550912806,
451
+ "eval_f1": 0.7286566227244193,
452
+ "eval_loss": 0.17917561531066895,
453
+ "eval_precision": 0.7161807805028536,
454
+ "eval_recall": 0.7415748283021881,
455
+ "eval_runtime": 1.4683,
456
+ "eval_samples_per_second": 1304.936,
457
+ "eval_steps_per_second": 81.729,
458
+ "step": 1800
459
+ },
460
+ {
461
+ "epoch": 3.55,
462
+ "eval_accuracy": 0.9481480260989916,
463
+ "eval_f1": 0.7228443887540136,
464
+ "eval_loss": 0.1824788898229599,
465
+ "eval_precision": 0.7091272280270436,
466
+ "eval_recall": 0.7371026992493211,
467
+ "eval_runtime": 1.471,
468
+ "eval_samples_per_second": 1302.492,
469
+ "eval_steps_per_second": 81.576,
470
+ "step": 1850
471
+ },
472
+ {
473
+ "epoch": 3.65,
474
+ "eval_accuracy": 0.9487411849996704,
475
+ "eval_f1": 0.7265369649805447,
476
+ "eval_loss": 0.17811530828475952,
477
+ "eval_precision": 0.708453483077857,
478
+ "eval_recall": 0.7455678006708194,
479
+ "eval_runtime": 1.5062,
480
+ "eval_samples_per_second": 1272.042,
481
+ "eval_steps_per_second": 79.669,
482
+ "step": 1900
483
+ },
484
+ {
485
+ "epoch": 3.74,
486
+ "eval_accuracy": 0.9482963158241613,
487
+ "eval_f1": 0.7255009823182711,
488
+ "eval_loss": 0.18192069232463837,
489
+ "eval_precision": 0.7141089108910891,
490
+ "eval_recall": 0.7372624181440665,
491
+ "eval_runtime": 1.4716,
492
+ "eval_samples_per_second": 1301.966,
493
+ "eval_steps_per_second": 81.543,
494
+ "step": 1950
495
+ },
496
+ {
497
+ "epoch": 3.84,
498
+ "learning_rate": 2.0873320537428026e-06,
499
+ "loss": 0.0639,
500
+ "step": 2000
501
+ },
502
+ {
503
+ "epoch": 3.84,
504
+ "eval_accuracy": 0.9487576616358004,
505
+ "eval_f1": 0.7273722057214319,
506
+ "eval_loss": 0.17967551946640015,
507
+ "eval_precision": 0.7122302158273381,
508
+ "eval_recall": 0.7431720172496407,
509
+ "eval_runtime": 1.4718,
510
+ "eval_samples_per_second": 1301.808,
511
+ "eval_steps_per_second": 81.533,
512
+ "step": 2000
513
+ },
514
+ {
515
+ "epoch": 3.93,
516
+ "eval_accuracy": 0.9482468859157714,
517
+ "eval_f1": 0.7234075810863619,
518
+ "eval_loss": 0.18143223226070404,
519
+ "eval_precision": 0.7082950719314356,
520
+ "eval_recall": 0.7391790448810094,
521
+ "eval_runtime": 1.4715,
522
+ "eval_samples_per_second": 1302.101,
523
+ "eval_steps_per_second": 81.551,
524
+ "step": 2050
525
+ },
526
+ {
527
+ "epoch": 4.0,
528
+ "step": 2084,
529
+ "total_flos": 20529237663360.0,
530
+ "train_loss": 0.1328041532904539,
531
+ "train_runtime": 90.5157,
532
+ "train_samples_per_second": 367.848,
533
+ "train_steps_per_second": 23.024
534
  }
535
  ],
536
+ "max_steps": 2084,
537
+ "num_train_epochs": 4,
538
+ "total_flos": 20529237663360.0,
539
  "trial_name": null,
540
  "trial_params": null
541
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:048fe57b149f1e34227a7230e59e34e3922583d18f960913a8be4e8cdb47eda1
3
  size 2863
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:415dd26e4c9bb86cdb797bec4f0ea4d492d7142400b45ffc20323abb9907ca27
3
  size 2863