Upload folder using huggingface_hub

#2
Files changed (7) hide show
  1. config.json +12 -10
  2. optimizer.pt +3 -0
  3. pytorch_model.bin +2 -2
  4. rng_state.pth +3 -0
  5. scheduler.pt +3 -0
  6. trainer_state.json +650 -0
  7. training_args.bin +2 -2
config.json CHANGED
@@ -8,14 +8,15 @@
8
  "hidden_dropout_prob": 0.1,
9
  "hidden_size": 768,
10
  "id2label": {
11
- "0": "1",
12
- "1": "2",
13
- "2": "3",
14
- "3": "4",
15
- "4": "5",
16
- "5": "6",
17
- "6": "7",
18
- "7": "8"
 
19
  },
20
  "initializer_range": 0.02,
21
  "intermediate_size": 3072,
@@ -27,7 +28,8 @@
27
  "LABEL_4": 4,
28
  "LABEL_5": 5,
29
  "LABEL_6": 6,
30
- "LABEL_7": 7
 
31
  },
32
  "layer_norm_eps": 1e-07,
33
  "max_position_embeddings": 512,
@@ -49,7 +51,7 @@
49
  "relative_attention": true,
50
  "share_att_key": true,
51
  "torch_dtype": "float32",
52
- "transformers_version": "4.29.2",
53
  "type_vocab_size": 0,
54
  "vocab_size": 128100
55
  }
 
8
  "hidden_dropout_prob": 0.1,
9
  "hidden_size": 768,
10
  "id2label": {
11
+ "0": "LABEL_0",
12
+ "1": "LABEL_1",
13
+ "2": "LABEL_2",
14
+ "3": "LABEL_3",
15
+ "4": "LABEL_4",
16
+ "5": "LABEL_5",
17
+ "6": "LABEL_6",
18
+ "7": "LABEL_7",
19
+ "8": "LABEL_8"
20
  },
21
  "initializer_range": 0.02,
22
  "intermediate_size": 3072,
 
28
  "LABEL_4": 4,
29
  "LABEL_5": 5,
30
  "LABEL_6": 6,
31
+ "LABEL_7": 7,
32
+ "LABEL_8": 8
33
  },
34
  "layer_norm_eps": 1e-07,
35
  "max_position_embeddings": 512,
 
51
  "relative_attention": true,
52
  "share_att_key": true,
53
  "torch_dtype": "float32",
54
+ "transformers_version": "4.31.0",
55
  "type_vocab_size": 0,
56
  "vocab_size": 128100
57
  }
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23c32774f045051599e224ffdc1af9e9714d98a366bf5cd8e703d88663efa38a
3
+ size 1475550981
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0240dc7a901ffe350f144b6f693cea82b9e3c537d25d14d918bd9b8ef4d5b047
3
- size 737787193
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e66aeda1581b9ec935bfc043d16fe252be9c12fb47acaebf5158d2ac9157ca0b
3
+ size 737785845
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7accdc88685b15ec75ad9685ebdfbcbcb719085daac37c1f0e03703b75aa0efe
3
+ size 14575
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55c42c6a07c8319456d8d4702714c375b84c69d0eb73081a20d0cad687db3316
3
+ size 627
trainer_state.json ADDED
@@ -0,0 +1,650 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 1.0422136783599854,
3
+ "best_model_checkpoint": "models/deberta-v3-base/1691009101/checkpoint-470",
4
+ "epoch": 4.986737400530504,
5
+ "global_step": 470,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.05,
12
+ "learning_rate": 1.9787234042553193e-05,
13
+ "loss": 2.1681,
14
+ "step": 5
15
+ },
16
+ {
17
+ "epoch": 0.11,
18
+ "learning_rate": 1.9574468085106384e-05,
19
+ "loss": 2.1189,
20
+ "step": 10
21
+ },
22
+ {
23
+ "epoch": 0.16,
24
+ "learning_rate": 1.9361702127659575e-05,
25
+ "loss": 2.163,
26
+ "step": 15
27
+ },
28
+ {
29
+ "epoch": 0.21,
30
+ "learning_rate": 1.914893617021277e-05,
31
+ "loss": 2.111,
32
+ "step": 20
33
+ },
34
+ {
35
+ "epoch": 0.27,
36
+ "learning_rate": 1.893617021276596e-05,
37
+ "loss": 2.1833,
38
+ "step": 25
39
+ },
40
+ {
41
+ "epoch": 0.32,
42
+ "learning_rate": 1.872340425531915e-05,
43
+ "loss": 2.0812,
44
+ "step": 30
45
+ },
46
+ {
47
+ "epoch": 0.37,
48
+ "learning_rate": 1.8510638297872342e-05,
49
+ "loss": 2.1295,
50
+ "step": 35
51
+ },
52
+ {
53
+ "epoch": 0.42,
54
+ "learning_rate": 1.8297872340425533e-05,
55
+ "loss": 2.0143,
56
+ "step": 40
57
+ },
58
+ {
59
+ "epoch": 0.48,
60
+ "learning_rate": 1.8085106382978724e-05,
61
+ "loss": 2.1968,
62
+ "step": 45
63
+ },
64
+ {
65
+ "epoch": 0.53,
66
+ "learning_rate": 1.7872340425531915e-05,
67
+ "loss": 1.995,
68
+ "step": 50
69
+ },
70
+ {
71
+ "epoch": 0.58,
72
+ "learning_rate": 1.765957446808511e-05,
73
+ "loss": 2.0799,
74
+ "step": 55
75
+ },
76
+ {
77
+ "epoch": 0.64,
78
+ "learning_rate": 1.74468085106383e-05,
79
+ "loss": 1.9155,
80
+ "step": 60
81
+ },
82
+ {
83
+ "epoch": 0.69,
84
+ "learning_rate": 1.723404255319149e-05,
85
+ "loss": 1.9628,
86
+ "step": 65
87
+ },
88
+ {
89
+ "epoch": 0.74,
90
+ "learning_rate": 1.7021276595744682e-05,
91
+ "loss": 1.9793,
92
+ "step": 70
93
+ },
94
+ {
95
+ "epoch": 0.8,
96
+ "learning_rate": 1.6808510638297873e-05,
97
+ "loss": 1.8198,
98
+ "step": 75
99
+ },
100
+ {
101
+ "epoch": 0.85,
102
+ "learning_rate": 1.6595744680851064e-05,
103
+ "loss": 1.6043,
104
+ "step": 80
105
+ },
106
+ {
107
+ "epoch": 0.9,
108
+ "learning_rate": 1.6382978723404255e-05,
109
+ "loss": 1.6209,
110
+ "step": 85
111
+ },
112
+ {
113
+ "epoch": 0.95,
114
+ "learning_rate": 1.6170212765957446e-05,
115
+ "loss": 1.5354,
116
+ "step": 90
117
+ },
118
+ {
119
+ "epoch": 1.0,
120
+ "eval_dignity_contempt_accuracy": 0.797153024911032,
121
+ "eval_loss": 1.4699641466140747,
122
+ "eval_mae": 0.5818505338078291,
123
+ "eval_n_samples": 670,
124
+ "eval_runtime": 13.4181,
125
+ "eval_samples_per_second": 49.933,
126
+ "eval_scorable_accuracy": 0.8776119402985074,
127
+ "eval_steps_per_second": 6.26,
128
+ "eval_top_1_accuracy": 0.5223880597014925,
129
+ "eval_top_2_accuracy": 0.8843416370106761,
130
+ "step": 94
131
+ },
132
+ {
133
+ "epoch": 1.01,
134
+ "learning_rate": 1.595744680851064e-05,
135
+ "loss": 1.5383,
136
+ "step": 95
137
+ },
138
+ {
139
+ "epoch": 1.06,
140
+ "learning_rate": 1.5744680851063832e-05,
141
+ "loss": 1.5934,
142
+ "step": 100
143
+ },
144
+ {
145
+ "epoch": 1.11,
146
+ "learning_rate": 1.5531914893617023e-05,
147
+ "loss": 1.5562,
148
+ "step": 105
149
+ },
150
+ {
151
+ "epoch": 1.17,
152
+ "learning_rate": 1.5319148936170214e-05,
153
+ "loss": 1.4006,
154
+ "step": 110
155
+ },
156
+ {
157
+ "epoch": 1.22,
158
+ "learning_rate": 1.5106382978723407e-05,
159
+ "loss": 1.4296,
160
+ "step": 115
161
+ },
162
+ {
163
+ "epoch": 1.27,
164
+ "learning_rate": 1.4893617021276596e-05,
165
+ "loss": 1.3508,
166
+ "step": 120
167
+ },
168
+ {
169
+ "epoch": 1.33,
170
+ "learning_rate": 1.4680851063829789e-05,
171
+ "loss": 1.3246,
172
+ "step": 125
173
+ },
174
+ {
175
+ "epoch": 1.38,
176
+ "learning_rate": 1.4468085106382981e-05,
177
+ "loss": 1.2573,
178
+ "step": 130
179
+ },
180
+ {
181
+ "epoch": 1.43,
182
+ "learning_rate": 1.425531914893617e-05,
183
+ "loss": 1.4491,
184
+ "step": 135
185
+ },
186
+ {
187
+ "epoch": 1.49,
188
+ "learning_rate": 1.4042553191489363e-05,
189
+ "loss": 1.2537,
190
+ "step": 140
191
+ },
192
+ {
193
+ "epoch": 1.54,
194
+ "learning_rate": 1.3829787234042556e-05,
195
+ "loss": 1.3947,
196
+ "step": 145
197
+ },
198
+ {
199
+ "epoch": 1.59,
200
+ "learning_rate": 1.3617021276595745e-05,
201
+ "loss": 1.282,
202
+ "step": 150
203
+ },
204
+ {
205
+ "epoch": 1.64,
206
+ "learning_rate": 1.3404255319148938e-05,
207
+ "loss": 1.3167,
208
+ "step": 155
209
+ },
210
+ {
211
+ "epoch": 1.7,
212
+ "learning_rate": 1.3191489361702127e-05,
213
+ "loss": 1.3456,
214
+ "step": 160
215
+ },
216
+ {
217
+ "epoch": 1.75,
218
+ "learning_rate": 1.297872340425532e-05,
219
+ "loss": 1.2924,
220
+ "step": 165
221
+ },
222
+ {
223
+ "epoch": 1.8,
224
+ "learning_rate": 1.2765957446808513e-05,
225
+ "loss": 1.4235,
226
+ "step": 170
227
+ },
228
+ {
229
+ "epoch": 1.86,
230
+ "learning_rate": 1.2553191489361702e-05,
231
+ "loss": 1.157,
232
+ "step": 175
233
+ },
234
+ {
235
+ "epoch": 1.91,
236
+ "learning_rate": 1.2340425531914895e-05,
237
+ "loss": 1.1315,
238
+ "step": 180
239
+ },
240
+ {
241
+ "epoch": 1.96,
242
+ "learning_rate": 1.2127659574468087e-05,
243
+ "loss": 1.2265,
244
+ "step": 185
245
+ },
246
+ {
247
+ "epoch": 1.99,
248
+ "eval_dignity_contempt_accuracy": 0.8087248322147651,
249
+ "eval_loss": 1.1996970176696777,
250
+ "eval_mae": 0.5134228187919463,
251
+ "eval_n_samples": 670,
252
+ "eval_runtime": 13.4177,
253
+ "eval_samples_per_second": 49.934,
254
+ "eval_scorable_accuracy": 0.9313432835820895,
255
+ "eval_steps_per_second": 6.26,
256
+ "eval_top_1_accuracy": 0.5492537313432836,
257
+ "eval_top_2_accuracy": 0.9328859060402684,
258
+ "step": 188
259
+ },
260
+ {
261
+ "epoch": 2.02,
262
+ "learning_rate": 1.1914893617021277e-05,
263
+ "loss": 1.1528,
264
+ "step": 190
265
+ },
266
+ {
267
+ "epoch": 2.07,
268
+ "learning_rate": 1.170212765957447e-05,
269
+ "loss": 1.1732,
270
+ "step": 195
271
+ },
272
+ {
273
+ "epoch": 2.12,
274
+ "learning_rate": 1.1489361702127662e-05,
275
+ "loss": 1.1946,
276
+ "step": 200
277
+ },
278
+ {
279
+ "epoch": 2.18,
280
+ "learning_rate": 1.1276595744680851e-05,
281
+ "loss": 1.0079,
282
+ "step": 205
283
+ },
284
+ {
285
+ "epoch": 2.23,
286
+ "learning_rate": 1.1063829787234044e-05,
287
+ "loss": 0.9267,
288
+ "step": 210
289
+ },
290
+ {
291
+ "epoch": 2.28,
292
+ "learning_rate": 1.0851063829787233e-05,
293
+ "loss": 1.06,
294
+ "step": 215
295
+ },
296
+ {
297
+ "epoch": 2.33,
298
+ "learning_rate": 1.0638297872340426e-05,
299
+ "loss": 1.0085,
300
+ "step": 220
301
+ },
302
+ {
303
+ "epoch": 2.39,
304
+ "learning_rate": 1.0425531914893619e-05,
305
+ "loss": 1.0053,
306
+ "step": 225
307
+ },
308
+ {
309
+ "epoch": 2.44,
310
+ "learning_rate": 1.0212765957446808e-05,
311
+ "loss": 1.0857,
312
+ "step": 230
313
+ },
314
+ {
315
+ "epoch": 2.49,
316
+ "learning_rate": 1e-05,
317
+ "loss": 0.8903,
318
+ "step": 235
319
+ },
320
+ {
321
+ "epoch": 2.55,
322
+ "learning_rate": 9.787234042553192e-06,
323
+ "loss": 1.1211,
324
+ "step": 240
325
+ },
326
+ {
327
+ "epoch": 2.6,
328
+ "learning_rate": 9.574468085106385e-06,
329
+ "loss": 1.0351,
330
+ "step": 245
331
+ },
332
+ {
333
+ "epoch": 2.65,
334
+ "learning_rate": 9.361702127659576e-06,
335
+ "loss": 0.9636,
336
+ "step": 250
337
+ },
338
+ {
339
+ "epoch": 2.71,
340
+ "learning_rate": 9.148936170212767e-06,
341
+ "loss": 0.9087,
342
+ "step": 255
343
+ },
344
+ {
345
+ "epoch": 2.76,
346
+ "learning_rate": 8.936170212765958e-06,
347
+ "loss": 0.9624,
348
+ "step": 260
349
+ },
350
+ {
351
+ "epoch": 2.81,
352
+ "learning_rate": 8.72340425531915e-06,
353
+ "loss": 0.9403,
354
+ "step": 265
355
+ },
356
+ {
357
+ "epoch": 2.86,
358
+ "learning_rate": 8.510638297872341e-06,
359
+ "loss": 0.9479,
360
+ "step": 270
361
+ },
362
+ {
363
+ "epoch": 2.92,
364
+ "learning_rate": 8.297872340425532e-06,
365
+ "loss": 0.8829,
366
+ "step": 275
367
+ },
368
+ {
369
+ "epoch": 2.97,
370
+ "learning_rate": 8.085106382978723e-06,
371
+ "loss": 0.992,
372
+ "step": 280
373
+ },
374
+ {
375
+ "epoch": 2.99,
376
+ "eval_dignity_contempt_accuracy": 0.8266199649737302,
377
+ "eval_loss": 1.091895580291748,
378
+ "eval_mae": 0.4658493870402802,
379
+ "eval_n_samples": 670,
380
+ "eval_runtime": 13.4128,
381
+ "eval_samples_per_second": 49.952,
382
+ "eval_scorable_accuracy": 0.9059701492537313,
383
+ "eval_steps_per_second": 6.263,
384
+ "eval_top_1_accuracy": 0.5776119402985075,
385
+ "eval_top_2_accuracy": 0.9316987740805605,
386
+ "step": 282
387
+ },
388
+ {
389
+ "epoch": 3.02,
390
+ "learning_rate": 7.872340425531916e-06,
391
+ "loss": 0.9552,
392
+ "step": 285
393
+ },
394
+ {
395
+ "epoch": 3.08,
396
+ "learning_rate": 7.659574468085107e-06,
397
+ "loss": 0.8174,
398
+ "step": 290
399
+ },
400
+ {
401
+ "epoch": 3.13,
402
+ "learning_rate": 7.446808510638298e-06,
403
+ "loss": 0.766,
404
+ "step": 295
405
+ },
406
+ {
407
+ "epoch": 3.18,
408
+ "learning_rate": 7.234042553191491e-06,
409
+ "loss": 0.7773,
410
+ "step": 300
411
+ },
412
+ {
413
+ "epoch": 3.24,
414
+ "learning_rate": 7.021276595744682e-06,
415
+ "loss": 0.9016,
416
+ "step": 305
417
+ },
418
+ {
419
+ "epoch": 3.29,
420
+ "learning_rate": 6.808510638297873e-06,
421
+ "loss": 0.8505,
422
+ "step": 310
423
+ },
424
+ {
425
+ "epoch": 3.34,
426
+ "learning_rate": 6.595744680851064e-06,
427
+ "loss": 0.9318,
428
+ "step": 315
429
+ },
430
+ {
431
+ "epoch": 3.4,
432
+ "learning_rate": 6.382978723404256e-06,
433
+ "loss": 0.7373,
434
+ "step": 320
435
+ },
436
+ {
437
+ "epoch": 3.45,
438
+ "learning_rate": 6.170212765957447e-06,
439
+ "loss": 0.7445,
440
+ "step": 325
441
+ },
442
+ {
443
+ "epoch": 3.5,
444
+ "learning_rate": 5.957446808510638e-06,
445
+ "loss": 0.8805,
446
+ "step": 330
447
+ },
448
+ {
449
+ "epoch": 3.55,
450
+ "learning_rate": 5.744680851063831e-06,
451
+ "loss": 0.7931,
452
+ "step": 335
453
+ },
454
+ {
455
+ "epoch": 3.61,
456
+ "learning_rate": 5.531914893617022e-06,
457
+ "loss": 0.8024,
458
+ "step": 340
459
+ },
460
+ {
461
+ "epoch": 3.66,
462
+ "learning_rate": 5.319148936170213e-06,
463
+ "loss": 0.7106,
464
+ "step": 345
465
+ },
466
+ {
467
+ "epoch": 3.71,
468
+ "learning_rate": 5.106382978723404e-06,
469
+ "loss": 0.7161,
470
+ "step": 350
471
+ },
472
+ {
473
+ "epoch": 3.77,
474
+ "learning_rate": 4.893617021276596e-06,
475
+ "loss": 0.7998,
476
+ "step": 355
477
+ },
478
+ {
479
+ "epoch": 3.82,
480
+ "learning_rate": 4.680851063829788e-06,
481
+ "loss": 0.6772,
482
+ "step": 360
483
+ },
484
+ {
485
+ "epoch": 3.87,
486
+ "learning_rate": 4.468085106382979e-06,
487
+ "loss": 0.7802,
488
+ "step": 365
489
+ },
490
+ {
491
+ "epoch": 3.93,
492
+ "learning_rate": 4.255319148936171e-06,
493
+ "loss": 0.7525,
494
+ "step": 370
495
+ },
496
+ {
497
+ "epoch": 3.98,
498
+ "learning_rate": 4.042553191489362e-06,
499
+ "loss": 0.7967,
500
+ "step": 375
501
+ },
502
+ {
503
+ "epoch": 4.0,
504
+ "eval_dignity_contempt_accuracy": 0.8305084745762712,
505
+ "eval_loss": 1.0538846254348755,
506
+ "eval_mae": 0.43559322033898307,
507
+ "eval_n_samples": 670,
508
+ "eval_runtime": 13.4019,
509
+ "eval_samples_per_second": 49.993,
510
+ "eval_scorable_accuracy": 0.926865671641791,
511
+ "eval_steps_per_second": 6.268,
512
+ "eval_top_1_accuracy": 0.6044776119402985,
513
+ "eval_top_2_accuracy": 0.9372881355932203,
514
+ "step": 377
515
+ },
516
+ {
517
+ "epoch": 4.03,
518
+ "learning_rate": 3.8297872340425535e-06,
519
+ "loss": 0.7263,
520
+ "step": 380
521
+ },
522
+ {
523
+ "epoch": 4.08,
524
+ "learning_rate": 3.6170212765957453e-06,
525
+ "loss": 0.7746,
526
+ "step": 385
527
+ },
528
+ {
529
+ "epoch": 4.14,
530
+ "learning_rate": 3.4042553191489363e-06,
531
+ "loss": 0.7231,
532
+ "step": 390
533
+ },
534
+ {
535
+ "epoch": 4.19,
536
+ "learning_rate": 3.191489361702128e-06,
537
+ "loss": 0.7129,
538
+ "step": 395
539
+ },
540
+ {
541
+ "epoch": 4.24,
542
+ "learning_rate": 2.978723404255319e-06,
543
+ "loss": 0.7609,
544
+ "step": 400
545
+ },
546
+ {
547
+ "epoch": 4.3,
548
+ "learning_rate": 2.765957446808511e-06,
549
+ "loss": 0.7708,
550
+ "step": 405
551
+ },
552
+ {
553
+ "epoch": 4.35,
554
+ "learning_rate": 2.553191489361702e-06,
555
+ "loss": 0.6699,
556
+ "step": 410
557
+ },
558
+ {
559
+ "epoch": 4.4,
560
+ "learning_rate": 2.340425531914894e-06,
561
+ "loss": 0.5928,
562
+ "step": 415
563
+ },
564
+ {
565
+ "epoch": 4.46,
566
+ "learning_rate": 2.1276595744680853e-06,
567
+ "loss": 0.6845,
568
+ "step": 420
569
+ },
570
+ {
571
+ "epoch": 4.51,
572
+ "learning_rate": 1.9148936170212767e-06,
573
+ "loss": 0.6752,
574
+ "step": 425
575
+ },
576
+ {
577
+ "epoch": 4.56,
578
+ "learning_rate": 1.7021276595744682e-06,
579
+ "loss": 0.5874,
580
+ "step": 430
581
+ },
582
+ {
583
+ "epoch": 4.62,
584
+ "learning_rate": 1.4893617021276596e-06,
585
+ "loss": 0.6781,
586
+ "step": 435
587
+ },
588
+ {
589
+ "epoch": 4.67,
590
+ "learning_rate": 1.276595744680851e-06,
591
+ "loss": 0.6023,
592
+ "step": 440
593
+ },
594
+ {
595
+ "epoch": 4.72,
596
+ "learning_rate": 1.0638297872340427e-06,
597
+ "loss": 0.5626,
598
+ "step": 445
599
+ },
600
+ {
601
+ "epoch": 4.77,
602
+ "learning_rate": 8.510638297872341e-07,
603
+ "loss": 0.6321,
604
+ "step": 450
605
+ },
606
+ {
607
+ "epoch": 4.83,
608
+ "learning_rate": 6.382978723404255e-07,
609
+ "loss": 0.6318,
610
+ "step": 455
611
+ },
612
+ {
613
+ "epoch": 4.88,
614
+ "learning_rate": 4.2553191489361704e-07,
615
+ "loss": 0.5816,
616
+ "step": 460
617
+ },
618
+ {
619
+ "epoch": 4.93,
620
+ "learning_rate": 2.1276595744680852e-07,
621
+ "loss": 0.7118,
622
+ "step": 465
623
+ },
624
+ {
625
+ "epoch": 4.99,
626
+ "learning_rate": 0.0,
627
+ "loss": 0.6383,
628
+ "step": 470
629
+ },
630
+ {
631
+ "epoch": 4.99,
632
+ "eval_dignity_contempt_accuracy": 0.8398637137989778,
633
+ "eval_loss": 1.0422136783599854,
634
+ "eval_mae": 0.42759795570698467,
635
+ "eval_n_samples": 670,
636
+ "eval_runtime": 13.4143,
637
+ "eval_samples_per_second": 49.947,
638
+ "eval_scorable_accuracy": 0.9223880597014925,
639
+ "eval_steps_per_second": 6.262,
640
+ "eval_top_1_accuracy": 0.6119402985074627,
641
+ "eval_top_2_accuracy": 0.9369676320272572,
642
+ "step": 470
643
+ }
644
+ ],
645
+ "max_steps": 470,
646
+ "num_train_epochs": 5,
647
+ "total_flos": 7904494608261120.0,
648
+ "trial_name": null,
649
+ "trial_params": null
650
+ }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:949464b0c2752939fbd03de5a8fb0cbacecb0496c11c82a489d4865feb21017a
3
- size 3963
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29dfdab16090b73b24c6d9632b5dc452f2c65616e861cc77cef737ef36112f3c
3
+ size 4027