chizhik commited on
Commit
2724fbb
1 Parent(s): b78d151

re-trained model after eliminating annotation errors

Browse files
Files changed (8) hide show
  1. README.md +0 -4
  2. optimizer.pt +1 -1
  3. pytorch_model.bin +1 -1
  4. rng_state.pth +2 -2
  5. scheduler.pt +1 -1
  6. tokenizer.json +6 -1
  7. trainer_state.json +404 -294
  8. training_args.bin +1 -1
README.md DELETED
@@ -1,4 +0,0 @@
1
- ---
2
- license: afl-3.0
3
- inference: false
4
- ---
 
 
 
 
 
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ebb137e877bb4f592417f468e35466c05294f419aabe8caca2d8707adfedd6d4
3
  size 924893085
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9da9dddf0cfbf346edd2ca654882f56561949189ca0c9b562cef512f86ccfb4d
3
  size 924893085
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7539ebd87020112228b621477123c3a9606ce46f30ea21b470a6c797f221788f
3
  size 462459309
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a393efa30dd905fa9d799bd88308d72d2ef5859e8b9d02229af7c534d5a84f76
3
  size 462459309
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fb9a82346993ff4d4270db79f4c3f93be6a696b9a7ff41e62ddbad8e1099f69f
3
- size 14439
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be9a74d5f00cf5a029abab89255469fc995f5fe9e1de9b33c8e29137e5472212
3
+ size 14567
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a829f1b672b920ee77b0097d3441199318b1e0b1f4de9df50935e3325f55daa6
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:947f2a3c7397d694c3a87a2fa268dc849fc1c15ee2221ef0434c5ad05dcef178
3
  size 623
tokenizer.json CHANGED
@@ -1,6 +1,11 @@
1
  {
2
  "version": "1.0",
3
- "truncation": null,
 
 
 
 
 
4
  "padding": null,
5
  "added_tokens": [
6
  {
 
1
  {
2
  "version": "1.0",
3
+ "truncation": {
4
+ "direction": "Right",
5
+ "max_length": 512,
6
+ "strategy": "LongestFirst",
7
+ "stride": 0
8
+ },
9
  "padding": null,
10
  "added_tokens": [
11
  {
trainer_state.json CHANGED
@@ -1,396 +1,506 @@
1
  {
2
- "best_metric": 0.6252569868800233,
3
- "best_model_checkpoint": "./CARES/checkpoints/bio-ber-stratified/run-1/checkpoint-3408",
4
- "epoch": 24.170212765957448,
5
- "global_step": 3408,
6
  "is_hyper_param_search": true,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 1.01,
12
- "eval_loss": 0.2541882395744324,
13
- "eval_macro_f1": 0.026901004304160685,
14
- "eval_macro_precision": 0.048828125,
15
- "eval_macro_recall": 0.018564356435643563,
16
- "eval_micro_f1": 0.17543859649122806,
17
- "eval_micro_precision": 0.78125,
18
- "eval_micro_recall": 0.09881422924901186,
19
- "eval_runtime": 9.8993,
20
- "eval_samples_per_second": 98.088,
21
- "eval_steps_per_second": 3.132,
22
  "step": 142
23
  },
24
  {
25
- "epoch": 2.01,
26
- "eval_loss": 0.22154481709003448,
27
- "eval_macro_f1": 0.10623663256202004,
28
- "eval_macro_precision": 0.18484581306311026,
29
- "eval_macro_recall": 0.09432864376066943,
30
- "eval_micro_f1": 0.41890639481000924,
31
- "eval_micro_precision": 0.70625,
32
- "eval_micro_recall": 0.2977602108036891,
33
- "eval_runtime": 9.9434,
34
- "eval_samples_per_second": 97.653,
35
- "eval_steps_per_second": 3.118,
36
  "step": 284
37
  },
38
  {
39
- "epoch": 3.02,
40
- "eval_loss": 0.18873167037963867,
41
- "eval_macro_f1": 0.20450440261358283,
42
- "eval_macro_precision": 0.3100231872943203,
43
- "eval_macro_recall": 0.1738961275122085,
44
- "eval_micro_f1": 0.568944099378882,
45
- "eval_micro_precision": 0.7658862876254181,
46
- "eval_micro_recall": 0.4525691699604743,
47
- "eval_runtime": 9.9071,
48
- "eval_samples_per_second": 98.011,
49
- "eval_steps_per_second": 3.129,
50
  "step": 426
51
  },
52
  {
53
- "epoch": 3.55,
54
- "learning_rate": 4.1013980584550545e-05,
55
- "loss": 0.2716,
56
  "step": 500
57
  },
58
  {
59
- "epoch": 4.03,
60
- "eval_loss": 0.17541566491127014,
61
- "eval_macro_f1": 0.2919005682162893,
62
- "eval_macro_precision": 0.3812561939105582,
63
- "eval_macro_recall": 0.25249891158193927,
64
- "eval_micro_f1": 0.6301369863013698,
65
- "eval_micro_precision": 0.7762777242044359,
66
- "eval_micro_recall": 0.5303030303030303,
67
- "eval_runtime": 9.944,
68
- "eval_samples_per_second": 97.647,
69
- "eval_steps_per_second": 3.117,
70
  "step": 568
71
  },
72
  {
73
- "epoch": 5.04,
74
- "eval_loss": 0.1692640334367752,
75
- "eval_macro_f1": 0.4143233067428529,
76
- "eval_macro_precision": 0.5073151086851961,
77
- "eval_macro_recall": 0.3678263076835473,
78
- "eval_micro_f1": 0.6415811478525276,
79
- "eval_micro_precision": 0.7583108715184187,
80
- "eval_micro_recall": 0.5559947299077734,
81
- "eval_runtime": 9.9093,
82
- "eval_samples_per_second": 97.989,
83
- "eval_steps_per_second": 3.128,
84
  "step": 710
85
  },
86
  {
87
- "epoch": 6.04,
88
- "eval_loss": 0.1651107370853424,
89
- "eval_macro_f1": 0.4093940822021235,
90
- "eval_macro_precision": 0.5308122229847152,
91
- "eval_macro_recall": 0.358915994433547,
92
- "eval_micro_f1": 0.6615737203972498,
93
- "eval_micro_precision": 0.7872727272727272,
94
- "eval_micro_recall": 0.5704874835309618,
95
- "eval_runtime": 9.8986,
96
- "eval_samples_per_second": 98.095,
97
- "eval_steps_per_second": 3.132,
98
  "step": 852
99
  },
100
  {
101
- "epoch": 7.05,
102
- "eval_loss": 0.1729801893234253,
103
- "eval_macro_f1": 0.42556225648133283,
104
- "eval_macro_precision": 0.58718674661248,
105
- "eval_macro_recall": 0.3723531112715551,
106
- "eval_micro_f1": 0.6549062844542448,
107
- "eval_micro_precision": 0.7406483790523691,
108
- "eval_micro_recall": 0.5869565217391305,
109
- "eval_runtime": 9.912,
110
- "eval_samples_per_second": 97.962,
111
- "eval_steps_per_second": 3.128,
112
  "step": 994
113
  },
114
  {
115
- "epoch": 7.09,
116
- "learning_rate": 3.950611365129501e-05,
117
- "loss": 0.1249,
118
  "step": 1000
119
  },
120
  {
121
- "epoch": 8.06,
122
- "eval_loss": 0.17469000816345215,
123
- "eval_macro_f1": 0.4273735214628892,
124
- "eval_macro_precision": 0.6238596091113585,
125
- "eval_macro_recall": 0.375472703111614,
126
- "eval_micro_f1": 0.6517101875689592,
127
- "eval_micro_precision": 0.7377185678601166,
128
- "eval_micro_recall": 0.5836627140974967,
129
- "eval_runtime": 9.8904,
130
- "eval_samples_per_second": 98.176,
131
- "eval_steps_per_second": 3.134,
132
  "step": 1136
133
  },
134
  {
135
- "epoch": 9.06,
136
- "eval_loss": 0.17877863347530365,
137
- "eval_macro_f1": 0.475201225203798,
138
- "eval_macro_precision": 0.5739651831374122,
139
- "eval_macro_recall": 0.4350214360169413,
140
- "eval_micro_f1": 0.6738227146814404,
141
- "eval_micro_precision": 0.7102189781021898,
142
- "eval_micro_recall": 0.6409749670619236,
143
- "eval_runtime": 9.9151,
144
- "eval_samples_per_second": 97.932,
145
- "eval_steps_per_second": 3.127,
146
  "step": 1278
147
  },
148
  {
149
- "epoch": 10.07,
150
- "eval_loss": 0.17759235203266144,
151
- "eval_macro_f1": 0.47628022362120415,
152
- "eval_macro_precision": 0.6058635009097308,
153
- "eval_macro_recall": 0.42583522254336503,
154
- "eval_micro_f1": 0.6828591256072173,
155
- "eval_micro_precision": 0.7214076246334311,
156
- "eval_micro_recall": 0.6482213438735178,
157
- "eval_runtime": 9.9061,
158
- "eval_samples_per_second": 98.021,
159
- "eval_steps_per_second": 3.129,
160
  "step": 1420
161
  },
162
  {
163
- "epoch": 10.64,
164
- "learning_rate": 3.7998246718039476e-05,
165
- "loss": 0.0762,
166
  "step": 1500
167
  },
168
  {
169
- "epoch": 11.08,
170
- "eval_loss": 0.1776473969221115,
171
- "eval_macro_f1": 0.5089905336706273,
172
- "eval_macro_precision": 0.591647351441912,
173
- "eval_macro_recall": 0.47047423582847886,
174
- "eval_micro_f1": 0.688728024819028,
175
- "eval_micro_precision": 0.7223427331887202,
176
- "eval_micro_recall": 0.658102766798419,
177
- "eval_runtime": 9.9428,
178
- "eval_samples_per_second": 97.658,
179
- "eval_steps_per_second": 3.118,
180
  "step": 1562
181
  },
182
  {
183
- "epoch": 12.09,
184
- "eval_loss": 0.18189238011837006,
185
- "eval_macro_f1": 0.48004944819202805,
186
- "eval_macro_precision": 0.6458220963071541,
187
- "eval_macro_recall": 0.41943684599646036,
188
- "eval_micro_f1": 0.6883162725026473,
189
- "eval_micro_precision": 0.7414448669201521,
190
- "eval_micro_recall": 0.642292490118577,
191
- "eval_runtime": 9.9058,
192
- "eval_samples_per_second": 98.024,
193
- "eval_steps_per_second": 3.129,
194
  "step": 1704
195
  },
196
  {
197
- "epoch": 13.09,
198
- "eval_loss": 0.19002576172351837,
199
- "eval_macro_f1": 0.5199688644954614,
200
- "eval_macro_precision": 0.5810572518525183,
201
- "eval_macro_recall": 0.49026284720966035,
202
- "eval_micro_f1": 0.6896090878717006,
203
- "eval_micro_precision": 0.6996610169491525,
204
- "eval_micro_recall": 0.6798418972332015,
205
- "eval_runtime": 9.9018,
206
- "eval_samples_per_second": 98.063,
207
- "eval_steps_per_second": 3.131,
208
  "step": 1846
209
  },
210
  {
211
- "epoch": 14.1,
212
- "eval_loss": 0.19194385409355164,
213
- "eval_macro_f1": 0.520691157744967,
214
- "eval_macro_precision": 0.5966466597800141,
215
- "eval_macro_recall": 0.47615158217927345,
216
- "eval_micro_f1": 0.6891228070175438,
217
- "eval_micro_precision": 0.7372372372372372,
218
- "eval_micro_recall": 0.6469038208168643,
219
- "eval_runtime": 9.9153,
220
- "eval_samples_per_second": 97.93,
221
- "eval_steps_per_second": 3.126,
222
  "step": 1988
223
  },
224
  {
225
- "epoch": 14.18,
226
- "learning_rate": 3.649037978478394e-05,
227
- "loss": 0.0449,
228
  "step": 2000
229
  },
230
  {
231
- "epoch": 15.11,
232
- "eval_loss": 0.192080557346344,
233
- "eval_macro_f1": 0.5605505080291033,
234
- "eval_macro_precision": 0.6623700312301602,
235
- "eval_macro_recall": 0.5087278128204601,
236
- "eval_micro_f1": 0.6957736639888229,
237
- "eval_micro_precision": 0.7405204460966542,
238
- "eval_micro_recall": 0.6561264822134387,
239
- "eval_runtime": 9.9139,
240
- "eval_samples_per_second": 97.943,
241
- "eval_steps_per_second": 3.127,
242
  "step": 2130
243
  },
244
  {
245
- "epoch": 16.11,
246
- "eval_loss": 0.1960616409778595,
247
- "eval_macro_f1": 0.53306733167955,
248
- "eval_macro_precision": 0.5965001445588101,
249
- "eval_macro_recall": 0.49021955336676715,
250
- "eval_micro_f1": 0.698961937716263,
251
- "eval_micro_precision": 0.7361516034985423,
252
- "eval_micro_recall": 0.6653491436100132,
253
- "eval_runtime": 9.942,
254
- "eval_samples_per_second": 97.667,
255
- "eval_steps_per_second": 3.118,
256
  "step": 2272
257
  },
258
  {
259
- "epoch": 17.12,
260
- "eval_loss": 0.20587308704853058,
261
- "eval_macro_f1": 0.5199165971603507,
262
- "eval_macro_precision": 0.5554999099415838,
263
- "eval_macro_recall": 0.501101816922742,
264
- "eval_micro_f1": 0.6840148698884758,
265
- "eval_micro_precision": 0.7022900763358778,
266
- "eval_micro_recall": 0.6666666666666666,
267
- "eval_runtime": 9.9026,
268
- "eval_samples_per_second": 98.055,
269
- "eval_steps_per_second": 3.131,
270
  "step": 2414
271
  },
272
  {
273
- "epoch": 17.73,
274
- "learning_rate": 3.498251285152841e-05,
275
- "loss": 0.0256,
276
  "step": 2500
277
  },
278
  {
279
- "epoch": 18.13,
280
- "eval_loss": 0.20113714039325714,
281
- "eval_macro_f1": 0.529618760993627,
282
- "eval_macro_precision": 0.6032995030406657,
283
- "eval_macro_recall": 0.4871012589073676,
284
- "eval_micro_f1": 0.6982167352537724,
285
- "eval_micro_precision": 0.7281831187410587,
286
- "eval_micro_recall": 0.6706192358366272,
287
- "eval_runtime": 9.899,
288
- "eval_samples_per_second": 98.091,
289
- "eval_steps_per_second": 3.132,
290
  "step": 2556
291
  },
292
  {
293
- "epoch": 19.13,
294
- "eval_loss": 0.20477713644504547,
295
- "eval_macro_f1": 0.5176514067210329,
296
- "eval_macro_precision": 0.5857117797598407,
297
- "eval_macro_recall": 0.47641342794341435,
298
- "eval_micro_f1": 0.6941015089163237,
299
- "eval_micro_precision": 0.7238912732474965,
300
- "eval_micro_recall": 0.6666666666666666,
301
- "eval_runtime": 9.9038,
302
- "eval_samples_per_second": 98.043,
303
- "eval_steps_per_second": 3.13,
304
  "step": 2698
305
  },
306
  {
307
- "epoch": 20.14,
308
- "eval_loss": 0.2074529379606247,
309
- "eval_macro_f1": 0.5704867306049771,
310
- "eval_macro_precision": 0.6916588415347732,
311
- "eval_macro_recall": 0.5189916858370269,
312
- "eval_micro_f1": 0.7134187457855697,
313
- "eval_micro_precision": 0.7306629834254144,
314
- "eval_micro_recall": 0.696969696969697,
315
- "eval_runtime": 9.8989,
316
- "eval_samples_per_second": 98.092,
317
- "eval_steps_per_second": 3.132,
318
  "step": 2840
319
  },
320
  {
321
- "epoch": 21.15,
322
- "eval_loss": 0.21805770695209503,
323
- "eval_macro_f1": 0.5306833888134577,
324
- "eval_macro_precision": 0.6107555764603518,
325
- "eval_macro_recall": 0.4823641731674541,
326
- "eval_micro_f1": 0.709366391184573,
327
- "eval_micro_precision": 0.7431457431457431,
328
- "eval_micro_recall": 0.6785243741765481,
329
- "eval_runtime": 9.95,
330
- "eval_samples_per_second": 97.588,
331
- "eval_steps_per_second": 3.116,
332
  "step": 2982
333
  },
334
  {
335
- "epoch": 21.28,
336
- "learning_rate": 3.347464591827287e-05,
337
- "loss": 0.015,
338
  "step": 3000
339
  },
340
  {
341
- "epoch": 22.16,
342
- "eval_loss": 0.21425750851631165,
343
- "eval_macro_f1": 0.5900896049733363,
344
- "eval_macro_precision": 0.7164448328913706,
345
- "eval_macro_recall": 0.5339604602685475,
346
- "eval_micro_f1": 0.7065292096219932,
347
- "eval_micro_precision": 0.7385057471264368,
348
- "eval_micro_recall": 0.6772068511198946,
349
- "eval_runtime": 9.9181,
350
- "eval_samples_per_second": 97.901,
351
- "eval_steps_per_second": 3.126,
352
  "step": 3124
353
  },
354
  {
355
- "epoch": 23.16,
356
- "eval_loss": 0.21841417253017426,
357
- "eval_macro_f1": 0.5731327712532719,
358
- "eval_macro_precision": 0.6766904212874656,
359
- "eval_macro_recall": 0.5216860842370524,
360
- "eval_micro_f1": 0.7131730443616662,
361
- "eval_micro_precision": 0.7337979094076655,
362
- "eval_micro_recall": 0.6936758893280632,
363
- "eval_runtime": 9.8943,
364
- "eval_samples_per_second": 98.137,
365
- "eval_steps_per_second": 3.133,
366
  "step": 3266
367
  },
368
  {
369
- "epoch": 24.17,
370
- "eval_loss": 0.22420497238636017,
371
- "eval_macro_f1": 0.6252569868800233,
372
- "eval_macro_precision": 0.7327575817423139,
373
- "eval_macro_recall": 0.5742398722065618,
374
- "eval_micro_f1": 0.7081471295978001,
375
- "eval_micro_precision": 0.7404744787922358,
376
- "eval_micro_recall": 0.6785243741765481,
377
- "eval_runtime": 9.961,
378
- "eval_samples_per_second": 97.48,
379
- "eval_steps_per_second": 3.112,
380
  "step": 3408
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
381
  }
382
  ],
383
- "max_steps": 14100,
384
  "num_train_epochs": 100,
385
- "total_flos": 1.1986190383959552e+16,
386
  "trial_name": null,
387
  "trial_params": {
388
- "adam_epsilon": 1.241521755885265e-07,
389
- "learning_rate": 4.181013432530947e-05,
390
  "per_device_eval_batch_size": 32,
391
- "per_device_train_batch_size": 16,
392
- "seed": 321,
393
- "warmup_steps": 236,
394
- "weight_decay": 3.190500833235664e-11
395
  }
396
  }
 
1
  {
2
+ "best_metric": 0.7364675967036229,
3
+ "best_model_checkpoint": "./CARES/checkpoints/bio-ber-stratified/run-3/checkpoint-4402",
4
+ "epoch": 62.0,
5
+ "global_step": 4402,
6
  "is_hyper_param_search": true,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 2.0,
12
+ "eval_loss": 0.3057152032852173,
13
+ "eval_macro_f1": 0.0,
14
+ "eval_macro_precision": 0.0,
15
+ "eval_macro_recall": 0.0,
16
+ "eval_micro_f1": 0.0,
17
+ "eval_micro_precision": 0.0,
18
+ "eval_micro_recall": 0.0,
19
+ "eval_runtime": 2.8103,
20
+ "eval_samples_per_second": 343.733,
21
+ "eval_steps_per_second": 11.031,
22
  "step": 142
23
  },
24
  {
25
+ "epoch": 4.0,
26
+ "eval_loss": 0.2555946707725525,
27
+ "eval_macro_f1": 0.030103995621237,
28
+ "eval_macro_precision": 0.11450892857142857,
29
+ "eval_macro_recall": 0.018604569384720493,
30
+ "eval_micro_f1": 0.06492679821769573,
31
+ "eval_micro_precision": 0.9444444444444444,
32
+ "eval_micro_recall": 0.03361898483849703,
33
+ "eval_runtime": 2.814,
34
+ "eval_samples_per_second": 343.287,
35
+ "eval_steps_per_second": 11.016,
36
  "step": 284
37
  },
38
  {
39
+ "epoch": 6.0,
40
+ "eval_loss": 0.20202794671058655,
41
+ "eval_macro_f1": 0.20731282317116273,
42
+ "eval_macro_precision": 0.31073371688318574,
43
+ "eval_macro_recall": 0.17971428447401858,
44
+ "eval_micro_f1": 0.5260370697263901,
45
+ "eval_micro_precision": 0.7957276368491322,
46
+ "eval_micro_recall": 0.3928806855636124,
47
+ "eval_runtime": 2.8153,
48
+ "eval_samples_per_second": 343.125,
49
+ "eval_steps_per_second": 11.011,
50
  "step": 426
51
  },
52
  {
53
+ "epoch": 7.04,
54
+ "learning_rate": 3.3448027530200134e-05,
55
+ "loss": 0.3166,
56
  "step": 500
57
  },
58
  {
59
+ "epoch": 8.0,
60
+ "eval_loss": 0.16987857222557068,
61
+ "eval_macro_f1": 0.33747696352729556,
62
+ "eval_macro_precision": 0.5216496556534496,
63
+ "eval_macro_recall": 0.3003629712640149,
64
+ "eval_micro_f1": 0.6596736596736597,
65
+ "eval_micro_precision": 0.8032166508987701,
66
+ "eval_micro_recall": 0.5596572181938035,
67
+ "eval_runtime": 2.8142,
68
+ "eval_samples_per_second": 343.259,
69
+ "eval_steps_per_second": 11.016,
70
  "step": 568
71
  },
72
  {
73
+ "epoch": 10.0,
74
+ "eval_loss": 0.15612231194972992,
75
+ "eval_macro_f1": 0.3962525677685097,
76
+ "eval_macro_precision": 0.5709879791429361,
77
+ "eval_macro_recall": 0.34789915051543907,
78
+ "eval_micro_f1": 0.6866002214839424,
79
+ "eval_micro_precision": 0.7802013422818792,
80
+ "eval_micro_recall": 0.6130520764667106,
81
+ "eval_runtime": 2.8176,
82
+ "eval_samples_per_second": 342.846,
83
+ "eval_steps_per_second": 11.002,
84
  "step": 710
85
  },
86
  {
87
+ "epoch": 12.0,
88
+ "eval_loss": 0.15700845420360565,
89
+ "eval_macro_f1": 0.5033118074728004,
90
+ "eval_macro_precision": 0.6788170359469108,
91
+ "eval_macro_recall": 0.4359827145940205,
92
+ "eval_micro_f1": 0.7075812274368232,
93
+ "eval_micro_precision": 0.7821229050279329,
94
+ "eval_micro_recall": 0.6460118655240606,
95
+ "eval_runtime": 2.9435,
96
+ "eval_samples_per_second": 328.182,
97
+ "eval_steps_per_second": 10.532,
98
  "step": 852
99
  },
100
  {
101
+ "epoch": 14.0,
102
+ "eval_loss": 0.14791876077651978,
103
+ "eval_macro_f1": 0.5201462693304842,
104
+ "eval_macro_precision": 0.6808348875622964,
105
+ "eval_macro_recall": 0.45464762510361434,
106
+ "eval_micro_f1": 0.7195077813970322,
107
+ "eval_micro_precision": 0.797752808988764,
108
+ "eval_micro_recall": 0.6552406064601186,
109
+ "eval_runtime": 2.814,
110
+ "eval_samples_per_second": 343.289,
111
+ "eval_steps_per_second": 11.017,
112
  "step": 994
113
  },
114
  {
115
+ "epoch": 14.08,
116
+ "learning_rate": 3.091408605063951e-05,
117
+ "loss": 0.1146,
118
  "step": 1000
119
  },
120
  {
121
+ "epoch": 16.0,
122
+ "eval_loss": 0.15235331654548645,
123
+ "eval_macro_f1": 0.560037439235141,
124
+ "eval_macro_precision": 0.6537080668718289,
125
+ "eval_macro_recall": 0.5093665280810307,
126
+ "eval_micro_f1": 0.7277091906721537,
127
+ "eval_micro_precision": 0.7583988563259471,
128
+ "eval_micro_recall": 0.6994067237969677,
129
+ "eval_runtime": 2.8165,
130
+ "eval_samples_per_second": 342.975,
131
+ "eval_steps_per_second": 11.006,
132
  "step": 1136
133
  },
134
  {
135
+ "epoch": 18.0,
136
+ "eval_loss": 0.1468934863805771,
137
+ "eval_macro_f1": 0.5752898959236763,
138
+ "eval_macro_precision": 0.6785878888299093,
139
+ "eval_macro_recall": 0.5191620319354175,
140
+ "eval_micro_f1": 0.7432950191570881,
141
+ "eval_micro_precision": 0.7880354505169868,
142
+ "eval_micro_recall": 0.7033618984838497,
143
+ "eval_runtime": 2.8153,
144
+ "eval_samples_per_second": 343.13,
145
+ "eval_steps_per_second": 11.011,
146
  "step": 1278
147
  },
148
  {
149
+ "epoch": 20.0,
150
+ "eval_loss": 0.15318024158477783,
151
+ "eval_macro_f1": 0.5614807929032789,
152
+ "eval_macro_precision": 0.6891794820810673,
153
+ "eval_macro_recall": 0.5011545072485702,
154
+ "eval_micro_f1": 0.7351351351351351,
155
+ "eval_micro_precision": 0.8108108108108109,
156
+ "eval_micro_recall": 0.6723796967699407,
157
+ "eval_runtime": 2.8153,
158
+ "eval_samples_per_second": 343.128,
159
+ "eval_steps_per_second": 11.011,
160
  "step": 1420
161
  },
162
  {
163
+ "epoch": 21.13,
164
+ "learning_rate": 2.83801445710789e-05,
165
+ "loss": 0.0497,
166
  "step": 1500
167
  },
168
  {
169
+ "epoch": 22.0,
170
+ "eval_loss": 0.15478584170341492,
171
+ "eval_macro_f1": 0.5700524565361512,
172
+ "eval_macro_precision": 0.6592231713192768,
173
+ "eval_macro_recall": 0.5224052472361357,
174
+ "eval_micro_f1": 0.7377900935226879,
175
+ "eval_micro_precision": 0.7773722627737226,
176
+ "eval_micro_recall": 0.7020435069215557,
177
+ "eval_runtime": 2.8156,
178
+ "eval_samples_per_second": 343.089,
179
+ "eval_steps_per_second": 11.01,
180
  "step": 1562
181
  },
182
  {
183
+ "epoch": 24.0,
184
+ "eval_loss": 0.16469129920005798,
185
+ "eval_macro_f1": 0.5893809890315513,
186
+ "eval_macro_precision": 0.7250493831689879,
187
+ "eval_macro_recall": 0.5372381591153548,
188
+ "eval_micro_f1": 0.7337398373983739,
189
+ "eval_micro_precision": 0.7547038327526132,
190
+ "eval_micro_recall": 0.7139090309822017,
191
+ "eval_runtime": 2.8148,
192
+ "eval_samples_per_second": 343.185,
193
+ "eval_steps_per_second": 11.013,
194
  "step": 1704
195
  },
196
  {
197
+ "epoch": 26.0,
198
+ "eval_loss": 0.1553143858909607,
199
+ "eval_macro_f1": 0.5787478878813199,
200
+ "eval_macro_precision": 0.6623158627962805,
201
+ "eval_macro_recall": 0.5281203519575204,
202
+ "eval_micro_f1": 0.7432293452176895,
203
+ "eval_micro_precision": 0.7742857142857142,
204
+ "eval_micro_recall": 0.7145682267633487,
205
+ "eval_runtime": 2.8124,
206
+ "eval_samples_per_second": 343.477,
207
+ "eval_steps_per_second": 11.023,
208
  "step": 1846
209
  },
210
  {
211
+ "epoch": 28.0,
212
+ "eval_loss": 0.1586785614490509,
213
+ "eval_macro_f1": 0.5883554607456316,
214
+ "eval_macro_precision": 0.7177006367498535,
215
+ "eval_macro_recall": 0.5324948845496986,
216
+ "eval_micro_f1": 0.7415575465196417,
217
+ "eval_micro_precision": 0.7768953068592058,
218
+ "eval_micro_recall": 0.7092946605141727,
219
+ "eval_runtime": 2.8164,
220
+ "eval_samples_per_second": 342.99,
221
+ "eval_steps_per_second": 11.007,
222
  "step": 1988
223
  },
224
  {
225
+ "epoch": 28.17,
226
+ "learning_rate": 2.5846203091518283e-05,
227
+ "loss": 0.0242,
228
  "step": 2000
229
  },
230
  {
231
+ "epoch": 30.0,
232
+ "eval_loss": 0.16228000819683075,
233
+ "eval_macro_f1": 0.6422229361530924,
234
+ "eval_macro_precision": 0.790092149978405,
235
+ "eval_macro_recall": 0.5761555188160953,
236
+ "eval_micro_f1": 0.75272599366866,
237
+ "eval_micro_precision": 0.8069381598793364,
238
+ "eval_micro_recall": 0.7053394858272907,
239
+ "eval_runtime": 2.8172,
240
+ "eval_samples_per_second": 342.897,
241
+ "eval_steps_per_second": 11.004,
242
  "step": 2130
243
  },
244
  {
245
+ "epoch": 32.0,
246
+ "eval_loss": 0.15960222482681274,
247
+ "eval_macro_f1": 0.6566298525236257,
248
+ "eval_macro_precision": 0.74358025279367,
249
+ "eval_macro_recall": 0.6057522843683747,
250
+ "eval_micro_f1": 0.7514529914529915,
251
+ "eval_micro_precision": 0.7805397727272727,
252
+ "eval_micro_recall": 0.7244561634805537,
253
+ "eval_runtime": 2.8157,
254
+ "eval_samples_per_second": 343.071,
255
+ "eval_steps_per_second": 11.01,
256
  "step": 2272
257
  },
258
  {
259
+ "epoch": 34.0,
260
+ "eval_loss": 0.1677185595035553,
261
+ "eval_macro_f1": 0.6556730308579793,
262
+ "eval_macro_precision": 0.7959295044168222,
263
+ "eval_macro_recall": 0.5949893771939523,
264
+ "eval_micro_f1": 0.7518796992481203,
265
+ "eval_micro_precision": 0.7806955287437899,
266
+ "eval_micro_recall": 0.7251153592617007,
267
+ "eval_runtime": 2.8129,
268
+ "eval_samples_per_second": 343.412,
269
+ "eval_steps_per_second": 11.02,
270
  "step": 2414
271
  },
272
  {
273
+ "epoch": 35.21,
274
+ "learning_rate": 2.331226161195767e-05,
275
+ "loss": 0.0135,
276
  "step": 2500
277
  },
278
  {
279
+ "epoch": 36.0,
280
+ "eval_loss": 0.16759739816188812,
281
+ "eval_macro_f1": 0.6573674747389432,
282
+ "eval_macro_precision": 0.771138294992533,
283
+ "eval_macro_recall": 0.595599674775744,
284
+ "eval_micro_f1": 0.7523187907935418,
285
+ "eval_micro_precision": 0.7855093256814921,
286
+ "eval_micro_recall": 0.7218193803559657,
287
+ "eval_runtime": 2.8149,
288
+ "eval_samples_per_second": 343.173,
289
+ "eval_steps_per_second": 11.013,
290
  "step": 2556
291
  },
292
  {
293
+ "epoch": 38.0,
294
+ "eval_loss": 0.1679902970790863,
295
+ "eval_macro_f1": 0.6639964013713953,
296
+ "eval_macro_precision": 0.758530396471428,
297
+ "eval_macro_recall": 0.6080264740396573,
298
+ "eval_micro_f1": 0.7524888431170615,
299
+ "eval_micro_precision": 0.7851002865329513,
300
+ "eval_micro_recall": 0.7224785761371127,
301
+ "eval_runtime": 2.8145,
302
+ "eval_samples_per_second": 343.228,
303
+ "eval_steps_per_second": 11.015,
304
  "step": 2698
305
  },
306
  {
307
+ "epoch": 40.0,
308
+ "eval_loss": 0.17791299521923065,
309
+ "eval_macro_f1": 0.706771883149178,
310
+ "eval_macro_precision": 0.8182896833237783,
311
+ "eval_macro_recall": 0.6422691238382328,
312
+ "eval_micro_f1": 0.7502562350529552,
313
+ "eval_micro_precision": 0.7787234042553192,
314
+ "eval_micro_recall": 0.7237969676994067,
315
+ "eval_runtime": 2.8149,
316
+ "eval_samples_per_second": 343.168,
317
+ "eval_steps_per_second": 11.013,
318
  "step": 2840
319
  },
320
  {
321
+ "epoch": 42.0,
322
+ "eval_loss": 0.17416273057460785,
323
+ "eval_macro_f1": 0.7042921705265328,
324
+ "eval_macro_precision": 0.8549297777881784,
325
+ "eval_macro_recall": 0.6327733363764919,
326
+ "eval_micro_f1": 0.7576791808873721,
327
+ "eval_micro_precision": 0.7855626326963907,
328
+ "eval_micro_recall": 0.7317073170731707,
329
+ "eval_runtime": 2.8128,
330
+ "eval_samples_per_second": 343.433,
331
+ "eval_steps_per_second": 11.021,
332
  "step": 2982
333
  },
334
  {
335
+ "epoch": 42.25,
336
+ "learning_rate": 2.077832013239705e-05,
337
+ "loss": 0.0089,
338
  "step": 3000
339
  },
340
  {
341
+ "epoch": 44.0,
342
+ "eval_loss": 0.17721112072467804,
343
+ "eval_macro_f1": 0.7133417842190977,
344
+ "eval_macro_precision": 0.8405823975317646,
345
+ "eval_macro_recall": 0.6551419828069049,
346
+ "eval_micro_f1": 0.7551782682512733,
347
+ "eval_micro_precision": 0.7787114845938375,
348
+ "eval_micro_recall": 0.7330257086354647,
349
+ "eval_runtime": 2.8136,
350
+ "eval_samples_per_second": 343.328,
351
+ "eval_steps_per_second": 11.018,
352
  "step": 3124
353
  },
354
  {
355
+ "epoch": 46.0,
356
+ "eval_loss": 0.17867934703826904,
357
+ "eval_macro_f1": 0.7067733545432416,
358
+ "eval_macro_precision": 0.8456299689876698,
359
+ "eval_macro_recall": 0.6424972968645987,
360
+ "eval_micro_f1": 0.7581743869209809,
361
+ "eval_micro_precision": 0.7843551797040169,
362
+ "eval_micro_recall": 0.7336849044166117,
363
+ "eval_runtime": 2.8132,
364
+ "eval_samples_per_second": 343.385,
365
+ "eval_steps_per_second": 11.02,
366
  "step": 3266
367
  },
368
  {
369
+ "epoch": 48.0,
370
+ "eval_loss": 0.18271668255329132,
371
+ "eval_macro_f1": 0.7082847532013592,
372
+ "eval_macro_precision": 0.8433782766054858,
373
+ "eval_macro_recall": 0.641905005936743,
374
+ "eval_micro_f1": 0.7553444180522565,
375
+ "eval_micro_precision": 0.7783216783216783,
376
+ "eval_micro_recall": 0.7336849044166117,
377
+ "eval_runtime": 2.8144,
378
+ "eval_samples_per_second": 343.24,
379
+ "eval_steps_per_second": 11.015,
380
  "step": 3408
381
+ },
382
+ {
383
+ "epoch": 49.3,
384
+ "learning_rate": 1.8244378652836435e-05,
385
+ "loss": 0.0062,
386
+ "step": 3500
387
+ },
388
+ {
389
+ "epoch": 50.0,
390
+ "eval_loss": 0.18576982617378235,
391
+ "eval_macro_f1": 0.7214487691145703,
392
+ "eval_macro_precision": 0.8418509428350153,
393
+ "eval_macro_recall": 0.6584015327988482,
394
+ "eval_micro_f1": 0.7551299589603283,
395
+ "eval_micro_precision": 0.7846481876332623,
396
+ "eval_micro_recall": 0.7277521423862887,
397
+ "eval_runtime": 2.8151,
398
+ "eval_samples_per_second": 343.147,
399
+ "eval_steps_per_second": 11.012,
400
+ "step": 3550
401
+ },
402
+ {
403
+ "epoch": 52.0,
404
+ "eval_loss": 0.18477760255336761,
405
+ "eval_macro_f1": 0.7358242812719007,
406
+ "eval_macro_precision": 0.8363784298641037,
407
+ "eval_macro_recall": 0.682320650811777,
408
+ "eval_micro_f1": 0.7623529411764705,
409
+ "eval_micro_precision": 0.7777777777777778,
410
+ "eval_micro_recall": 0.7475280158206987,
411
+ "eval_runtime": 2.8152,
412
+ "eval_samples_per_second": 343.143,
413
+ "eval_steps_per_second": 11.012,
414
+ "step": 3692
415
+ },
416
+ {
417
+ "epoch": 54.0,
418
+ "eval_loss": 0.18875756859779358,
419
+ "eval_macro_f1": 0.7155614476825691,
420
+ "eval_macro_precision": 0.8418373811449409,
421
+ "eval_macro_recall": 0.6438779359984136,
422
+ "eval_micro_f1": 0.7593035165585524,
423
+ "eval_micro_precision": 0.7875354107648725,
424
+ "eval_micro_recall": 0.7330257086354647,
425
+ "eval_runtime": 2.8151,
426
+ "eval_samples_per_second": 343.149,
427
+ "eval_steps_per_second": 11.012,
428
+ "step": 3834
429
+ },
430
+ {
431
+ "epoch": 56.0,
432
+ "eval_loss": 0.18950717151165009,
433
+ "eval_macro_f1": 0.7224219338762576,
434
+ "eval_macro_precision": 0.8143469166127995,
435
+ "eval_macro_recall": 0.6672755163012613,
436
+ "eval_micro_f1": 0.7565011820330969,
437
+ "eval_micro_precision": 0.775623268698061,
438
+ "eval_micro_recall": 0.7382992748846408,
439
+ "eval_runtime": 2.8155,
440
+ "eval_samples_per_second": 343.104,
441
+ "eval_steps_per_second": 11.011,
442
+ "step": 3976
443
+ },
444
+ {
445
+ "epoch": 56.34,
446
+ "learning_rate": 1.5710437173275818e-05,
447
+ "loss": 0.0047,
448
+ "step": 4000
449
+ },
450
+ {
451
+ "epoch": 58.0,
452
+ "eval_loss": 0.19801756739616394,
453
+ "eval_macro_f1": 0.7241508401060731,
454
+ "eval_macro_precision": 0.8273979735617242,
455
+ "eval_macro_recall": 0.6621892130462943,
456
+ "eval_micro_f1": 0.7576676777890125,
457
+ "eval_micro_precision": 0.7751724137931034,
458
+ "eval_micro_recall": 0.7409360580092288,
459
+ "eval_runtime": 2.8137,
460
+ "eval_samples_per_second": 343.316,
461
+ "eval_steps_per_second": 11.017,
462
+ "step": 4118
463
+ },
464
+ {
465
+ "epoch": 60.0,
466
+ "eval_loss": 0.194396510720253,
467
+ "eval_macro_f1": 0.7287972344248086,
468
+ "eval_macro_precision": 0.8203998323639319,
469
+ "eval_macro_recall": 0.6775029786499779,
470
+ "eval_micro_f1": 0.7583222370173102,
471
+ "eval_micro_precision": 0.7659717552118359,
472
+ "eval_micro_recall": 0.7508239947264338,
473
+ "eval_runtime": 2.8142,
474
+ "eval_samples_per_second": 343.264,
475
+ "eval_steps_per_second": 11.016,
476
+ "step": 4260
477
+ },
478
+ {
479
+ "epoch": 62.0,
480
+ "eval_loss": 0.19370318949222565,
481
+ "eval_macro_f1": 0.7364675967036229,
482
+ "eval_macro_precision": 0.8295903604136947,
483
+ "eval_macro_recall": 0.6817263379239634,
484
+ "eval_micro_f1": 0.7599051811716898,
485
+ "eval_micro_precision": 0.7813370473537604,
486
+ "eval_micro_recall": 0.7396176664469347,
487
+ "eval_runtime": 2.8163,
488
+ "eval_samples_per_second": 343.005,
489
+ "eval_steps_per_second": 11.007,
490
+ "step": 4402
491
  }
492
  ],
493
+ "max_steps": 7100,
494
  "num_train_epochs": 100,
495
+ "total_flos": 3.3093548336434176e+16,
496
  "trial_name": null,
497
  "trial_params": {
498
+ "adam_epsilon": 3.039596615397574e-08,
499
+ "learning_rate": 3.349363847683222e-05,
500
  "per_device_eval_batch_size": 32,
501
+ "per_device_train_batch_size": 32,
502
+ "seed": 326,
503
+ "warmup_steps": 491,
504
+ "weight_decay": 0.01214452830676255
505
  }
506
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:67f59b494d3ca6dddabd8ddbd8a1784f33689fc7327a434b7a6651a9dfcc91ab
3
  size 3439
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89e8d0d99ac817af410528cfbc62521472f947089cd55e91f497ac31b9c15ffc
3
  size 3439