chizhik commited on
Commit
4c29c9d
1 Parent(s): 3038abc

re-trained model after eliminating annotation errors

Browse files
Files changed (7) hide show
  1. README.md +0 -4
  2. optimizer.pt +1 -1
  3. pytorch_model.bin +1 -1
  4. rng_state.pth +2 -2
  5. scheduler.pt +1 -1
  6. trainer_state.json +201 -517
  7. training_args.bin +1 -1
README.md DELETED
@@ -1,4 +0,0 @@
1
- ---
2
- license: afl-3.0
3
- inference: false
4
- ---
 
 
 
 
 
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d5c5cbc732dc61f382394ca38be8c180004f09ebf568bf5f6c366f50c08d16a9
3
  size 1008039837
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ec1ce3a63b75c6aa0715c8710c405d13c678f4147cedc324bcbac332a02aca8
3
  size 1008039837
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:72d93baa2b2bd83e9ad3d5a6dee38be0e6fdc3537acb4f533d9871c48a5cffca
3
  size 504033325
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4a34342441087075352a656fa4d85e95c5aa11fc4407c5367b8c0a76c1d5996
3
  size 504033325
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4c81a7f6b3c22226527f74224efa5bfd6812e7643711f3363571e33b7048a5d4
3
- size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa069ecc27b6d86cf02d066f167af631efe8c187ca951ac5ef9de95cfbf526f7
3
+ size 14503
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:501d80737344dcd2274196fc2df8440bccd24c52b8c2e1bc8bdc49a42cbf6651
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7dc4a8d7215a4f34ed15795f0b52176bdc71ccf4c978bd45cb3ccfcf2a049c02
3
  size 623
trainer_state.json CHANGED
@@ -1,588 +1,272 @@
1
  {
2
- "best_metric": 0.7370102490601179,
3
- "best_model_checkpoint": "./CARES/checkpoints/roberta-stratified/run-4/checkpoint-5112",
4
- "epoch": 36.255319148936174,
5
- "global_step": 5112,
6
  "is_hyper_param_search": true,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 1.01,
12
- "eval_loss": 0.3023780882358551,
13
- "eval_macro_f1": 0.04276761517615176,
14
- "eval_macro_precision": 0.03250514933058703,
15
- "eval_macro_recall": 0.0625,
16
- "eval_micro_f1": 0.4057854560064283,
17
- "eval_micro_precision": 0.5200823892893924,
18
- "eval_micro_recall": 0.3326745718050066,
19
- "eval_runtime": 2.6706,
20
- "eval_samples_per_second": 363.595,
21
- "eval_steps_per_second": 22.842,
22
  "step": 142
23
  },
24
  {
25
- "epoch": 2.01,
26
- "eval_loss": 0.2355797439813614,
27
- "eval_macro_f1": 0.0478700416730977,
28
- "eval_macro_precision": 0.10847007722007722,
29
- "eval_macro_recall": 0.04781835896355593,
30
- "eval_micro_f1": 0.3766552231486022,
31
- "eval_micro_precision": 0.7370441458733206,
32
- "eval_micro_recall": 0.25296442687747034,
33
- "eval_runtime": 2.6743,
34
- "eval_samples_per_second": 363.08,
35
- "eval_steps_per_second": 22.809,
36
  "step": 284
37
  },
38
  {
39
- "epoch": 3.02,
40
- "eval_loss": 0.20839156210422516,
41
- "eval_macro_f1": 0.1270678623977507,
42
- "eval_macro_precision": 0.15146020789583886,
43
- "eval_macro_recall": 0.11056835837347367,
44
- "eval_micro_f1": 0.5080789946140036,
45
- "eval_micro_precision": 0.7971830985915493,
46
- "eval_micro_recall": 0.37285902503293805,
47
- "eval_runtime": 2.6737,
48
- "eval_samples_per_second": 363.172,
49
- "eval_steps_per_second": 22.815,
50
  "step": 426
51
  },
52
  {
53
- "epoch": 3.55,
54
- "learning_rate": 4.760888771678367e-05,
55
- "loss": 0.3067,
56
  "step": 500
57
  },
58
  {
59
- "epoch": 4.03,
60
- "eval_loss": 0.17458897829055786,
61
- "eval_macro_f1": 0.2134043623340923,
62
- "eval_macro_precision": 0.2555480902740095,
63
- "eval_macro_recall": 0.1956241201896679,
64
- "eval_micro_f1": 0.652869972666927,
65
- "eval_micro_precision": 0.8015340364333653,
66
- "eval_micro_recall": 0.5507246376811594,
67
- "eval_runtime": 2.6714,
68
- "eval_samples_per_second": 363.484,
69
- "eval_steps_per_second": 22.835,
70
  "step": 568
71
  },
72
  {
73
- "epoch": 5.04,
74
- "eval_loss": 0.1594998985528946,
75
- "eval_macro_f1": 0.322947699403185,
76
- "eval_macro_precision": 0.33082406427783345,
77
- "eval_macro_recall": 0.31592054350156284,
78
- "eval_micro_f1": 0.7086383601756956,
79
- "eval_micro_precision": 0.7973640856672158,
80
- "eval_micro_recall": 0.6376811594202898,
81
- "eval_runtime": 2.6774,
82
- "eval_samples_per_second": 362.663,
83
- "eval_steps_per_second": 22.783,
84
  "step": 710
85
  },
86
  {
87
- "epoch": 6.04,
88
- "eval_loss": 0.14328011870384216,
89
- "eval_macro_f1": 0.35552541662372633,
90
- "eval_macro_precision": 0.4154000509380286,
91
- "eval_macro_recall": 0.3384352474665031,
92
- "eval_micro_f1": 0.730332967435053,
93
- "eval_micro_precision": 0.8213991769547325,
94
- "eval_micro_recall": 0.6574440052700923,
95
- "eval_runtime": 2.6759,
96
- "eval_samples_per_second": 362.867,
97
- "eval_steps_per_second": 22.796,
98
  "step": 852
99
  },
100
  {
101
- "epoch": 7.05,
102
- "eval_loss": 0.1372288167476654,
103
- "eval_macro_f1": 0.35769698646328396,
104
- "eval_macro_precision": 0.45729587282684003,
105
- "eval_macro_recall": 0.340956446807801,
106
- "eval_micro_f1": 0.747014115092291,
107
- "eval_micro_precision": 0.8289156626506025,
108
- "eval_micro_recall": 0.6798418972332015,
109
- "eval_runtime": 2.6665,
110
- "eval_samples_per_second": 364.154,
111
- "eval_steps_per_second": 22.877,
112
  "step": 994
113
  },
114
  {
115
- "epoch": 7.09,
116
- "learning_rate": 4.585856096249015e-05,
117
- "loss": 0.1366,
118
  "step": 1000
119
  },
120
  {
121
- "epoch": 8.06,
122
- "eval_loss": 0.13587501645088196,
123
- "eval_macro_f1": 0.4519999262177494,
124
- "eval_macro_precision": 0.6383482484756009,
125
- "eval_macro_recall": 0.4120606473790193,
126
- "eval_micro_f1": 0.7476635514018692,
127
- "eval_micro_precision": 0.8227848101265823,
128
- "eval_micro_recall": 0.6851119894598156,
129
- "eval_runtime": 2.6678,
130
- "eval_samples_per_second": 363.976,
131
- "eval_steps_per_second": 22.866,
132
  "step": 1136
133
  },
134
  {
135
- "epoch": 9.06,
136
- "eval_loss": 0.1254434585571289,
137
- "eval_macro_f1": 0.5752781473187552,
138
- "eval_macro_precision": 0.6682540412600851,
139
- "eval_macro_recall": 0.5248832453258128,
140
- "eval_micro_f1": 0.7869884575026234,
141
- "eval_micro_precision": 0.8389261744966443,
142
- "eval_micro_recall": 0.741106719367589,
143
- "eval_runtime": 2.6676,
144
- "eval_samples_per_second": 364.002,
145
- "eval_steps_per_second": 22.867,
146
  "step": 1278
147
  },
148
  {
149
- "epoch": 10.07,
150
- "eval_loss": 0.12716087698936462,
151
- "eval_macro_f1": 0.5783160913404322,
152
- "eval_macro_precision": 0.6669120855288475,
153
- "eval_macro_recall": 0.5333612306839322,
154
- "eval_micro_f1": 0.7954701441317777,
155
- "eval_micro_precision": 0.8302292263610315,
156
- "eval_micro_recall": 0.7635046113306982,
157
- "eval_runtime": 2.6683,
158
- "eval_samples_per_second": 363.896,
159
- "eval_steps_per_second": 22.861,
160
  "step": 1420
161
  },
162
  {
163
- "epoch": 10.64,
164
- "learning_rate": 4.410823420819664e-05,
165
- "loss": 0.0661,
166
  "step": 1500
167
  },
168
  {
169
- "epoch": 11.08,
170
- "eval_loss": 0.12210354208946228,
171
- "eval_macro_f1": 0.6129525814973475,
172
- "eval_macro_precision": 0.694661979564102,
173
- "eval_macro_recall": 0.5604968051311103,
174
- "eval_micro_f1": 0.8090971743625087,
175
- "eval_micro_precision": 0.8482658959537572,
176
- "eval_micro_recall": 0.7733860342555995,
177
- "eval_runtime": 2.6691,
178
- "eval_samples_per_second": 363.79,
179
- "eval_steps_per_second": 22.854,
180
  "step": 1562
181
  },
182
  {
183
- "epoch": 12.09,
184
- "eval_loss": 0.13834641873836517,
185
- "eval_macro_f1": 0.59964421079272,
186
- "eval_macro_precision": 0.6259545070217613,
187
- "eval_macro_recall": 0.5849130461175929,
188
- "eval_micro_f1": 0.7874705287975748,
189
- "eval_micro_precision": 0.8056512749827704,
190
- "eval_micro_recall": 0.7700922266139657,
191
- "eval_runtime": 2.6674,
192
- "eval_samples_per_second": 364.02,
193
- "eval_steps_per_second": 22.868,
194
  "step": 1704
195
  },
196
  {
197
- "epoch": 13.09,
198
- "eval_loss": 0.13302326202392578,
199
- "eval_macro_f1": 0.6249414362192053,
200
- "eval_macro_precision": 0.6603169616331872,
201
- "eval_macro_recall": 0.6008160113233191,
202
- "eval_micro_f1": 0.8105579685933846,
203
- "eval_micro_precision": 0.8223728813559322,
204
- "eval_micro_recall": 0.7990777338603425,
205
- "eval_runtime": 2.6684,
206
- "eval_samples_per_second": 363.887,
207
- "eval_steps_per_second": 22.86,
208
  "step": 1846
209
  },
210
  {
211
- "epoch": 14.1,
212
- "eval_loss": 0.13799144327640533,
213
- "eval_macro_f1": 0.6256821918613437,
214
- "eval_macro_precision": 0.6740205274811021,
215
- "eval_macro_recall": 0.5938987995613589,
216
- "eval_micro_f1": 0.8119891008174386,
217
- "eval_micro_precision": 0.840620592383639,
218
- "eval_micro_recall": 0.7852437417654808,
219
- "eval_runtime": 2.6691,
220
- "eval_samples_per_second": 363.79,
221
- "eval_steps_per_second": 22.854,
222
  "step": 1988
223
  },
224
  {
225
- "epoch": 14.18,
226
- "learning_rate": 4.235790745390312e-05,
227
- "loss": 0.0324,
228
  "step": 2000
229
  },
230
  {
231
- "epoch": 15.11,
232
- "eval_loss": 0.13957080245018005,
233
- "eval_macro_f1": 0.6541379860188454,
234
- "eval_macro_precision": 0.7002594602789083,
235
- "eval_macro_recall": 0.6359217043250158,
236
- "eval_micro_f1": 0.803843605036448,
237
- "eval_micro_precision": 0.8086666666666666,
238
- "eval_micro_recall": 0.7990777338603425,
239
- "eval_runtime": 2.6709,
240
- "eval_samples_per_second": 363.55,
241
- "eval_steps_per_second": 22.839,
242
  "step": 2130
243
  },
244
  {
245
- "epoch": 16.11,
246
- "eval_loss": 0.13600043952465057,
247
- "eval_macro_f1": 0.6528569810495737,
248
- "eval_macro_precision": 0.7119806310239326,
249
- "eval_macro_recall": 0.6199612374678921,
250
- "eval_micro_f1": 0.8169491525423729,
251
- "eval_micro_precision": 0.8414804469273743,
252
- "eval_micro_recall": 0.7938076416337286,
253
- "eval_runtime": 2.6686,
254
- "eval_samples_per_second": 363.864,
255
- "eval_steps_per_second": 22.859,
256
  "step": 2272
257
- },
258
- {
259
- "epoch": 17.12,
260
- "eval_loss": 0.1411595642566681,
261
- "eval_macro_f1": 0.6780053870985077,
262
- "eval_macro_precision": 0.7825949490722317,
263
- "eval_macro_recall": 0.6371295063080809,
264
- "eval_micro_f1": 0.8134328358208954,
265
- "eval_micro_precision": 0.8384615384615385,
266
- "eval_micro_recall": 0.7898550724637681,
267
- "eval_runtime": 2.6685,
268
- "eval_samples_per_second": 363.87,
269
- "eval_steps_per_second": 22.859,
270
- "step": 2414
271
- },
272
- {
273
- "epoch": 17.73,
274
- "learning_rate": 4.06075806996096e-05,
275
- "loss": 0.0173,
276
- "step": 2500
277
- },
278
- {
279
- "epoch": 18.13,
280
- "eval_loss": 0.14683738350868225,
281
- "eval_macro_f1": 0.6538188838769178,
282
- "eval_macro_precision": 0.7058131112592007,
283
- "eval_macro_recall": 0.628120629850237,
284
- "eval_micro_f1": 0.8044280442804428,
285
- "eval_micro_precision": 0.8195488721804511,
286
- "eval_micro_recall": 0.7898550724637681,
287
- "eval_runtime": 2.6704,
288
- "eval_samples_per_second": 363.611,
289
- "eval_steps_per_second": 22.843,
290
- "step": 2556
291
- },
292
- {
293
- "epoch": 19.13,
294
- "eval_loss": 0.14477181434631348,
295
- "eval_macro_f1": 0.7213223353389469,
296
- "eval_macro_precision": 0.7931053666626622,
297
- "eval_macro_recall": 0.6989155005450692,
298
- "eval_micro_f1": 0.8122731771692511,
299
- "eval_micro_precision": 0.813615333773959,
300
- "eval_micro_recall": 0.810935441370224,
301
- "eval_runtime": 2.6682,
302
- "eval_samples_per_second": 363.916,
303
- "eval_steps_per_second": 22.862,
304
- "step": 2698
305
- },
306
- {
307
- "epoch": 20.14,
308
- "eval_loss": 0.1553182750940323,
309
- "eval_macro_f1": 0.6767777822180807,
310
- "eval_macro_precision": 0.7296284296772766,
311
- "eval_macro_recall": 0.6640188299255232,
312
- "eval_micro_f1": 0.8082867477803354,
313
- "eval_micro_precision": 0.8069599474720945,
314
- "eval_micro_recall": 0.8096179183135704,
315
- "eval_runtime": 2.6706,
316
- "eval_samples_per_second": 363.587,
317
- "eval_steps_per_second": 22.841,
318
- "step": 2840
319
- },
320
- {
321
- "epoch": 21.15,
322
- "eval_loss": 0.14831620454788208,
323
- "eval_macro_f1": 0.6951923518777028,
324
- "eval_macro_precision": 0.8479068478364982,
325
- "eval_macro_recall": 0.6493756779822191,
326
- "eval_micro_f1": 0.8177506775067751,
327
- "eval_micro_precision": 0.8417015341701534,
328
- "eval_micro_recall": 0.7951251646903821,
329
- "eval_runtime": 2.6679,
330
- "eval_samples_per_second": 363.953,
331
- "eval_steps_per_second": 22.864,
332
- "step": 2982
333
- },
334
- {
335
- "epoch": 21.28,
336
- "learning_rate": 3.885725394531609e-05,
337
- "loss": 0.0121,
338
- "step": 3000
339
- },
340
- {
341
- "epoch": 22.16,
342
- "eval_loss": 0.1529681533575058,
343
- "eval_macro_f1": 0.7215214471733791,
344
- "eval_macro_precision": 0.840496134606828,
345
- "eval_macro_recall": 0.6777226713039917,
346
- "eval_micro_f1": 0.8099395567494962,
347
- "eval_micro_precision": 0.826027397260274,
348
- "eval_micro_recall": 0.7944664031620553,
349
- "eval_runtime": 2.6724,
350
- "eval_samples_per_second": 363.343,
351
- "eval_steps_per_second": 22.826,
352
- "step": 3124
353
- },
354
- {
355
- "epoch": 23.16,
356
- "eval_loss": 0.15208803117275238,
357
- "eval_macro_f1": 0.7282532116551124,
358
- "eval_macro_precision": 0.8178655579947314,
359
- "eval_macro_recall": 0.702450635375965,
360
- "eval_micro_f1": 0.8099009900990098,
361
- "eval_micro_precision": 0.8115079365079365,
362
- "eval_micro_recall": 0.808300395256917,
363
- "eval_runtime": 2.6735,
364
- "eval_samples_per_second": 363.196,
365
- "eval_steps_per_second": 22.817,
366
- "step": 3266
367
- },
368
- {
369
- "epoch": 24.17,
370
- "eval_loss": 0.17097046971321106,
371
- "eval_macro_f1": 0.6781929633024913,
372
- "eval_macro_precision": 0.7664743620916477,
373
- "eval_macro_recall": 0.647287649907105,
374
- "eval_micro_f1": 0.8018836192398252,
375
- "eval_micro_precision": 0.8192439862542955,
376
- "eval_micro_recall": 0.7852437417654808,
377
- "eval_runtime": 2.6679,
378
- "eval_samples_per_second": 363.956,
379
- "eval_steps_per_second": 22.864,
380
- "step": 3408
381
- },
382
- {
383
- "epoch": 24.82,
384
- "learning_rate": 3.710692719102257e-05,
385
- "loss": 0.0086,
386
- "step": 3500
387
- },
388
- {
389
- "epoch": 25.18,
390
- "eval_loss": 0.16351111233234406,
391
- "eval_macro_f1": 0.7009939961760294,
392
- "eval_macro_precision": 0.8049793687947511,
393
- "eval_macro_recall": 0.6631263784729529,
394
- "eval_micro_f1": 0.8080672268907563,
395
- "eval_micro_precision": 0.8249828414550446,
396
- "eval_micro_recall": 0.7918313570487484,
397
- "eval_runtime": 2.6758,
398
- "eval_samples_per_second": 362.882,
399
- "eval_steps_per_second": 22.797,
400
- "step": 3550
401
- },
402
- {
403
- "epoch": 26.18,
404
- "eval_loss": 0.1703951209783554,
405
- "eval_macro_f1": 0.728858993061112,
406
- "eval_macro_precision": 0.8293166501604144,
407
- "eval_macro_recall": 0.6848344575219967,
408
- "eval_micro_f1": 0.8101945003353456,
409
- "eval_micro_precision": 0.825136612021858,
410
- "eval_micro_recall": 0.7957839262187089,
411
- "eval_runtime": 2.6853,
412
- "eval_samples_per_second": 361.599,
413
- "eval_steps_per_second": 22.716,
414
- "step": 3692
415
- },
416
- {
417
- "epoch": 27.19,
418
- "eval_loss": 0.1729104220867157,
419
- "eval_macro_f1": 0.7246800474910258,
420
- "eval_macro_precision": 0.8088443156400115,
421
- "eval_macro_recall": 0.6938957592472167,
422
- "eval_micro_f1": 0.8112827400940228,
423
- "eval_micro_precision": 0.8273972602739726,
424
- "eval_micro_recall": 0.7957839262187089,
425
- "eval_runtime": 2.6704,
426
- "eval_samples_per_second": 363.619,
427
- "eval_steps_per_second": 22.843,
428
- "step": 3834
429
- },
430
- {
431
- "epoch": 28.2,
432
- "eval_loss": 0.17421075701713562,
433
- "eval_macro_f1": 0.721641920467399,
434
- "eval_macro_precision": 0.7953793533738551,
435
- "eval_macro_recall": 0.6939586213926427,
436
- "eval_micro_f1": 0.8118745830553702,
437
- "eval_micro_precision": 0.8222972972972973,
438
- "eval_micro_recall": 0.8017127799736495,
439
- "eval_runtime": 2.6674,
440
- "eval_samples_per_second": 364.019,
441
- "eval_steps_per_second": 22.868,
442
- "step": 3976
443
- },
444
- {
445
- "epoch": 28.37,
446
- "learning_rate": 3.535660043672905e-05,
447
- "loss": 0.0054,
448
- "step": 4000
449
- },
450
- {
451
- "epoch": 29.21,
452
- "eval_loss": 0.17936377227306366,
453
- "eval_macro_f1": 0.7165402537125084,
454
- "eval_macro_precision": 0.7314012003015316,
455
- "eval_macro_recall": 0.7193045979731636,
456
- "eval_micro_f1": 0.8045826513911619,
457
- "eval_micro_precision": 0.7996096291476903,
458
- "eval_micro_recall": 0.8096179183135704,
459
- "eval_runtime": 2.669,
460
- "eval_samples_per_second": 363.809,
461
- "eval_steps_per_second": 22.855,
462
- "step": 4118
463
- },
464
- {
465
- "epoch": 30.21,
466
- "eval_loss": 0.184128999710083,
467
- "eval_macro_f1": 0.7249069877656021,
468
- "eval_macro_precision": 0.7920679958383108,
469
- "eval_macro_recall": 0.6986794530201526,
470
- "eval_micro_f1": 0.8046822742474916,
471
- "eval_micro_precision": 0.8172554347826086,
472
- "eval_micro_recall": 0.7924901185770751,
473
- "eval_runtime": 2.6699,
474
- "eval_samples_per_second": 363.687,
475
- "eval_steps_per_second": 22.847,
476
- "step": 4260
477
- },
478
- {
479
- "epoch": 31.22,
480
- "eval_loss": 0.18270088732242584,
481
- "eval_macro_f1": 0.7341637485973148,
482
- "eval_macro_precision": 0.8378115033399074,
483
- "eval_macro_recall": 0.6892808840460984,
484
- "eval_micro_f1": 0.8134680134680136,
485
- "eval_micro_precision": 0.8319559228650137,
486
- "eval_micro_recall": 0.7957839262187089,
487
- "eval_runtime": 2.6693,
488
- "eval_samples_per_second": 363.77,
489
- "eval_steps_per_second": 22.853,
490
- "step": 4402
491
- },
492
- {
493
- "epoch": 31.91,
494
- "learning_rate": 3.3606273682435536e-05,
495
- "loss": 0.0035,
496
- "step": 4500
497
- },
498
- {
499
- "epoch": 32.23,
500
- "eval_loss": 0.18414482474327087,
501
- "eval_macro_f1": 0.7314425663595913,
502
- "eval_macro_precision": 0.8099477622958757,
503
- "eval_macro_recall": 0.6986944147838622,
504
- "eval_micro_f1": 0.8134003350083752,
505
- "eval_micro_precision": 0.8275391956373551,
506
- "eval_micro_recall": 0.7997364953886693,
507
- "eval_runtime": 2.6686,
508
- "eval_samples_per_second": 363.857,
509
- "eval_steps_per_second": 22.858,
510
- "step": 4544
511
- },
512
- {
513
- "epoch": 33.23,
514
- "eval_loss": 0.18519891798496246,
515
- "eval_macro_f1": 0.7352936873025266,
516
- "eval_macro_precision": 0.8265688293622399,
517
- "eval_macro_recall": 0.6958315897622773,
518
- "eval_micro_f1": 0.8212722988892629,
519
- "eval_micro_precision": 0.8396421197522368,
520
- "eval_micro_recall": 0.8036890645586298,
521
- "eval_runtime": 2.6687,
522
- "eval_samples_per_second": 363.853,
523
- "eval_steps_per_second": 22.858,
524
- "step": 4686
525
- },
526
- {
527
- "epoch": 34.24,
528
- "eval_loss": 0.1865757256746292,
529
- "eval_macro_f1": 0.7207303759640109,
530
- "eval_macro_precision": 0.7610227795725935,
531
- "eval_macro_recall": 0.6996289765073358,
532
- "eval_micro_f1": 0.8130245048674052,
533
- "eval_micro_precision": 0.8288843258042436,
534
- "eval_micro_recall": 0.7977602108036891,
535
- "eval_runtime": 2.6701,
536
- "eval_samples_per_second": 363.659,
537
- "eval_steps_per_second": 22.846,
538
- "step": 4828
539
- },
540
- {
541
- "epoch": 35.25,
542
- "eval_loss": 0.19158615171909332,
543
- "eval_macro_f1": 0.7231833700345036,
544
- "eval_macro_precision": 0.759857266859788,
545
- "eval_macro_recall": 0.7038379676451936,
546
- "eval_micro_f1": 0.8071928071928073,
547
- "eval_micro_precision": 0.8161616161616162,
548
- "eval_micro_recall": 0.7984189723320159,
549
- "eval_runtime": 2.6696,
550
- "eval_samples_per_second": 363.724,
551
- "eval_steps_per_second": 22.85,
552
- "step": 4970
553
- },
554
- {
555
- "epoch": 35.46,
556
- "learning_rate": 3.185594692814201e-05,
557
- "loss": 0.0025,
558
- "step": 5000
559
- },
560
- {
561
- "epoch": 36.26,
562
- "eval_loss": 0.18590226769447327,
563
- "eval_macro_f1": 0.7370102490601179,
564
- "eval_macro_precision": 0.8141501549264045,
565
- "eval_macro_recall": 0.7143003391573518,
566
- "eval_micro_f1": 0.8030253206182177,
567
- "eval_micro_precision": 0.8017071569271176,
568
- "eval_micro_recall": 0.8043478260869565,
569
- "eval_runtime": 2.6693,
570
- "eval_samples_per_second": 363.761,
571
- "eval_steps_per_second": 22.852,
572
- "step": 5112
573
  }
574
  ],
575
- "max_steps": 14100,
576
  "num_train_epochs": 100,
577
- "total_flos": 1.9741043739581184e+16,
578
  "trial_name": null,
579
  "trial_params": {
580
- "adam_epsilon": 2.7636948844125687e-08,
581
- "learning_rate": 4.7699904708006934e-05,
582
- "per_device_eval_batch_size": 16,
583
- "per_device_train_batch_size": 16,
584
- "seed": 320,
585
- "warmup_steps": 474,
586
- "weight_decay": 0.08343382340090989
587
  }
588
  }
 
1
  {
2
+ "best_metric": 0.8328207869559483,
3
+ "best_model_checkpoint": "./CARES/checkpoints/roberta-stratified/run-9/checkpoint-2272",
4
+ "epoch": 32.0,
5
+ "global_step": 2272,
6
  "is_hyper_param_search": true,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 2.0,
12
+ "eval_loss": 0.29745519161224365,
13
+ "eval_macro_f1": 0.0,
14
+ "eval_macro_precision": 0.0,
15
+ "eval_macro_recall": 0.0,
16
+ "eval_micro_f1": 0.0,
17
+ "eval_micro_precision": 0.0,
18
+ "eval_micro_recall": 0.0,
19
+ "eval_runtime": 2.6698,
20
+ "eval_samples_per_second": 361.828,
21
+ "eval_steps_per_second": 11.611,
22
  "step": 142
23
  },
24
  {
25
+ "epoch": 4.0,
26
+ "eval_loss": 0.2143602967262268,
27
+ "eval_macro_f1": 0.20760247776727883,
28
+ "eval_macro_precision": 0.2427021619684663,
29
+ "eval_macro_recall": 0.19452630677199997,
30
+ "eval_micro_f1": 0.5696619950535862,
31
+ "eval_micro_precision": 0.7601760176017601,
32
+ "eval_micro_recall": 0.45550428477257743,
33
+ "eval_runtime": 2.6814,
34
+ "eval_samples_per_second": 360.254,
35
+ "eval_steps_per_second": 11.561,
36
  "step": 284
37
  },
38
  {
39
+ "epoch": 6.0,
40
+ "eval_loss": 0.15995128452777863,
41
+ "eval_macro_f1": 0.32385167310781526,
42
+ "eval_macro_precision": 0.42763364413607774,
43
+ "eval_macro_recall": 0.2931228227099484,
44
+ "eval_micro_f1": 0.6900908014212397,
45
+ "eval_micro_precision": 0.860236220472441,
46
+ "eval_micro_recall": 0.5761371127224786,
47
+ "eval_runtime": 2.674,
48
+ "eval_samples_per_second": 361.255,
49
+ "eval_steps_per_second": 11.593,
50
  "step": 426
51
  },
52
  {
53
+ "epoch": 7.04,
54
+ "learning_rate": 4.40741152818786e-05,
55
+ "loss": 0.2773,
56
  "step": 500
57
  },
58
  {
59
+ "epoch": 8.0,
60
+ "eval_loss": 0.13520723581314087,
61
+ "eval_macro_f1": 0.4425371105794407,
62
+ "eval_macro_precision": 0.49169850541474464,
63
+ "eval_macro_recall": 0.41330740987880255,
64
+ "eval_micro_f1": 0.7547309833024118,
65
+ "eval_micro_precision": 0.8633276740237691,
66
+ "eval_micro_recall": 0.6704021094264997,
67
+ "eval_runtime": 2.6984,
68
+ "eval_samples_per_second": 357.994,
69
+ "eval_steps_per_second": 11.488,
70
  "step": 568
71
  },
72
  {
73
+ "epoch": 10.0,
74
+ "eval_loss": 0.10962910205125809,
75
+ "eval_macro_f1": 0.5655053778813368,
76
+ "eval_macro_precision": 0.6696587937430376,
77
+ "eval_macro_recall": 0.5338063461136978,
78
+ "eval_micro_f1": 0.8189806678383128,
79
+ "eval_micro_precision": 0.8772590361445783,
80
+ "eval_micro_recall": 0.7679630850362558,
81
+ "eval_runtime": 2.6744,
82
+ "eval_samples_per_second": 361.208,
83
+ "eval_steps_per_second": 11.592,
84
  "step": 710
85
  },
86
  {
87
+ "epoch": 12.0,
88
+ "eval_loss": 0.0956677794456482,
89
+ "eval_macro_f1": 0.6602463081601572,
90
+ "eval_macro_precision": 0.799820247637967,
91
+ "eval_macro_recall": 0.5970945139878616,
92
+ "eval_micro_f1": 0.8437173686042465,
93
+ "eval_micro_precision": 0.8938053097345132,
94
+ "eval_micro_recall": 0.7989452867501649,
95
+ "eval_runtime": 2.6891,
96
+ "eval_samples_per_second": 359.223,
97
+ "eval_steps_per_second": 11.528,
98
  "step": 852
99
  },
100
  {
101
+ "epoch": 14.0,
102
+ "eval_loss": 0.09418635815382004,
103
+ "eval_macro_f1": 0.709171253471717,
104
+ "eval_macro_precision": 0.8508463365856882,
105
+ "eval_macro_recall": 0.6566230286073916,
106
+ "eval_micro_f1": 0.8551865799383773,
107
+ "eval_micro_precision": 0.8896011396011396,
108
+ "eval_micro_recall": 0.8233355306526038,
109
+ "eval_runtime": 2.6733,
110
+ "eval_samples_per_second": 361.353,
111
+ "eval_steps_per_second": 11.596,
112
  "step": 994
113
  },
114
  {
115
+ "epoch": 14.08,
116
+ "learning_rate": 4.073516715446356e-05,
117
+ "loss": 0.068,
118
  "step": 1000
119
  },
120
  {
121
+ "epoch": 16.0,
122
+ "eval_loss": 0.09525582939386368,
123
+ "eval_macro_f1": 0.776452344525862,
124
+ "eval_macro_precision": 0.8422867152488994,
125
+ "eval_macro_recall": 0.7394950574942071,
126
+ "eval_micro_f1": 0.8662207357859532,
127
+ "eval_micro_precision": 0.8791581805838425,
128
+ "eval_micro_recall": 0.8536585365853658,
129
+ "eval_runtime": 2.6899,
130
+ "eval_samples_per_second": 359.119,
131
+ "eval_steps_per_second": 11.525,
132
  "step": 1136
133
  },
134
  {
135
+ "epoch": 18.0,
136
+ "eval_loss": 0.09120669960975647,
137
+ "eval_macro_f1": 0.7799395687308482,
138
+ "eval_macro_precision": 0.8259713713716451,
139
+ "eval_macro_recall": 0.7560566435810081,
140
+ "eval_micro_f1": 0.8660743665679499,
141
+ "eval_micro_precision": 0.8646517739816032,
142
+ "eval_micro_recall": 0.8675016479894528,
143
+ "eval_runtime": 2.6882,
144
+ "eval_samples_per_second": 359.349,
145
+ "eval_steps_per_second": 11.532,
146
  "step": 1278
147
  },
148
  {
149
+ "epoch": 20.0,
150
+ "eval_loss": 0.09322977066040039,
151
+ "eval_macro_f1": 0.7717526983031062,
152
+ "eval_macro_precision": 0.8213810289257493,
153
+ "eval_macro_recall": 0.747806253729357,
154
+ "eval_micro_f1": 0.8603205757278378,
155
+ "eval_micro_precision": 0.8538961038961039,
156
+ "eval_micro_recall": 0.8668424522083059,
157
+ "eval_runtime": 2.6911,
158
+ "eval_samples_per_second": 358.966,
159
+ "eval_steps_per_second": 11.52,
160
  "step": 1420
161
  },
162
  {
163
+ "epoch": 21.13,
164
+ "learning_rate": 3.739621902704851e-05,
165
+ "loss": 0.0222,
166
  "step": 1500
167
  },
168
  {
169
+ "epoch": 22.0,
170
+ "eval_loss": 0.10442250967025757,
171
+ "eval_macro_f1": 0.765921285849591,
172
+ "eval_macro_precision": 0.7782427042161657,
173
+ "eval_macro_recall": 0.774471020810195,
174
+ "eval_micro_f1": 0.8516003879728419,
175
+ "eval_micro_precision": 0.8356598984771574,
176
+ "eval_micro_recall": 0.8681608437705999,
177
+ "eval_runtime": 2.6921,
178
+ "eval_samples_per_second": 358.824,
179
+ "eval_steps_per_second": 11.515,
180
  "step": 1562
181
  },
182
  {
183
+ "epoch": 24.0,
184
+ "eval_loss": 0.09634574502706528,
185
+ "eval_macro_f1": 0.805432074935829,
186
+ "eval_macro_precision": 0.9021531097855335,
187
+ "eval_macro_recall": 0.7712696478949495,
188
+ "eval_micro_f1": 0.8704318936877077,
189
+ "eval_micro_precision": 0.8774279973208305,
190
+ "eval_micro_recall": 0.8635464733025708,
191
+ "eval_runtime": 2.6753,
192
+ "eval_samples_per_second": 361.083,
193
+ "eval_steps_per_second": 11.588,
194
  "step": 1704
195
  },
196
  {
197
+ "epoch": 26.0,
198
+ "eval_loss": 0.09631907194852829,
199
+ "eval_macro_f1": 0.8054621279402976,
200
+ "eval_macro_precision": 0.8819456215653025,
201
+ "eval_macro_recall": 0.782841919580731,
202
+ "eval_micro_f1": 0.8722700198544011,
203
+ "eval_micro_precision": 0.8757475083056478,
204
+ "eval_micro_recall": 0.8688200395517469,
205
+ "eval_runtime": 2.6758,
206
+ "eval_samples_per_second": 361.016,
207
+ "eval_steps_per_second": 11.585,
208
  "step": 1846
209
  },
210
  {
211
+ "epoch": 28.0,
212
+ "eval_loss": 0.09827031195163727,
213
+ "eval_macro_f1": 0.824449032097833,
214
+ "eval_macro_precision": 0.8941626132550853,
215
+ "eval_macro_recall": 0.7913713438345684,
216
+ "eval_micro_f1": 0.8718459495351926,
217
+ "eval_micro_precision": 0.8782608695652174,
218
+ "eval_micro_recall": 0.8655240606460118,
219
+ "eval_runtime": 2.6758,
220
+ "eval_samples_per_second": 361.008,
221
+ "eval_steps_per_second": 11.585,
222
  "step": 1988
223
  },
224
  {
225
+ "epoch": 28.17,
226
+ "learning_rate": 3.4057270899633464e-05,
227
+ "loss": 0.011,
228
  "step": 2000
229
  },
230
  {
231
+ "epoch": 30.0,
232
+ "eval_loss": 0.10059890896081924,
233
+ "eval_macro_f1": 0.8165877105799546,
234
+ "eval_macro_precision": 0.884478008536637,
235
+ "eval_macro_recall": 0.7957804401453603,
236
+ "eval_micro_f1": 0.8693088765149033,
237
+ "eval_micro_precision": 0.8639322916666666,
238
+ "eval_micro_recall": 0.8747528015820699,
239
+ "eval_runtime": 2.6759,
240
+ "eval_samples_per_second": 361.006,
241
+ "eval_steps_per_second": 11.585,
242
  "step": 2130
243
  },
244
  {
245
+ "epoch": 32.0,
246
+ "eval_loss": 0.10830199718475342,
247
+ "eval_macro_f1": 0.8328207869559483,
248
+ "eval_macro_precision": 0.8739781063350807,
249
+ "eval_macro_recall": 0.8212976019561394,
250
+ "eval_micro_f1": 0.8676422764227643,
251
+ "eval_micro_precision": 0.8562259306803595,
252
+ "eval_micro_recall": 0.8793671720500988,
253
+ "eval_runtime": 2.674,
254
+ "eval_samples_per_second": 361.259,
255
+ "eval_steps_per_second": 11.593,
256
  "step": 2272
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
257
  }
258
  ],
259
+ "max_steps": 7100,
260
  "num_train_epochs": 100,
261
+ "total_flos": 1.673289313217472e+16,
262
  "trial_name": null,
263
  "trial_params": {
264
+ "adam_epsilon": 1.724439344881123e-07,
265
+ "learning_rate": 4.540969453284462e-05,
266
+ "per_device_eval_batch_size": 32,
267
+ "per_device_train_batch_size": 32,
268
+ "seed": 324,
269
+ "warmup_steps": 300,
270
+ "weight_decay": 0.00598936569463419
271
  }
272
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ad2c884e186c33efca82a69390d8020a7e0d5a1fa537e67f0f88b9f3eb3c34c9
3
  size 3439
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac58073aa21113afba4497d625fe9f04a3f41c55ebf5e736db1a27388505e1c5
3
  size 3439