rabindralamsal commited on
Commit
5b12fc5
1 Parent(s): 8119854
Files changed (7) hide show
  1. config.json +28 -0
  2. model.safetensors +3 -0
  3. optimizer.pt +3 -0
  4. rng_state.pth +3 -0
  5. scheduler.pt +3 -0
  6. trainer_state.json +531 -0
  7. training_args.bin +3 -0
config.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "crisistransformers/CT-M1-Complete",
3
+ "architectures": [
4
+ "RobertaForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "classifier_dropout": null,
9
+ "eos_token_id": 2,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 768,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 3072,
15
+ "layer_norm_eps": 1e-12,
16
+ "max_position_embeddings": 130,
17
+ "model_type": "roberta",
18
+ "num_attention_heads": 12,
19
+ "num_hidden_layers": 12,
20
+ "pad_token_id": 1,
21
+ "position_embedding_type": "absolute",
22
+ "problem_type": "single_label_classification",
23
+ "torch_dtype": "float32",
24
+ "transformers_version": "4.36.0",
25
+ "type_vocab_size": 1,
26
+ "use_cache": true,
27
+ "vocab_size": 64000
28
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02e39796f23e56f3c30a63d2429c859494268863b1b0b125d7a2f1c8a9ae3acc
3
+ size 539627096
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:089c335fc0e1151bef6508ebe236293b584fafa301eed36169837cf3e7ece660
3
+ size 1079374010
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed88393f254cc24a46e4f171d697cc6ec90c3dfc7ac20c098d9d759b7053eee6
3
+ size 14244
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d93fb5204d5a7bbbe18d5f1fcdb70aa4286df3e670d61d8c2a38ccfd8a3cbd5
3
+ size 1064
trainer_state.json ADDED
@@ -0,0 +1,531 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.9092560719066743,
3
+ "best_model_checkpoint": "./output_1/checkpoint-1408",
4
+ "epoch": 29.53846153846154,
5
+ "eval_steps": 64,
6
+ "global_step": 1920,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.98,
13
+ "learning_rate": 9.753846153846154e-06,
14
+ "loss": 0.5876,
15
+ "step": 64
16
+ },
17
+ {
18
+ "epoch": 0.98,
19
+ "eval_f1": 0.8264750469953308,
20
+ "eval_loss": 0.426924467086792,
21
+ "eval_precision": 0.8384115884115884,
22
+ "eval_recall": 0.8222047843249156,
23
+ "eval_runtime": 1.7863,
24
+ "eval_samples_per_second": 163.47,
25
+ "eval_steps_per_second": 5.598,
26
+ "step": 64
27
+ },
28
+ {
29
+ "epoch": 1.97,
30
+ "learning_rate": 9.50769230769231e-06,
31
+ "loss": 0.331,
32
+ "step": 128
33
+ },
34
+ {
35
+ "epoch": 1.97,
36
+ "eval_f1": 0.8797480620155038,
37
+ "eval_loss": 0.3329169452190399,
38
+ "eval_precision": 0.8921407595440745,
39
+ "eval_recall": 0.8746849289009369,
40
+ "eval_runtime": 1.7913,
41
+ "eval_samples_per_second": 163.008,
42
+ "eval_steps_per_second": 5.582,
43
+ "step": 128
44
+ },
45
+ {
46
+ "epoch": 2.95,
47
+ "learning_rate": 9.261538461538461e-06,
48
+ "loss": 0.2585,
49
+ "step": 192
50
+ },
51
+ {
52
+ "epoch": 2.95,
53
+ "eval_f1": 0.8914823472996463,
54
+ "eval_loss": 0.31825584173202515,
55
+ "eval_precision": 0.8960271317829458,
56
+ "eval_recall": 0.8887382888666951,
57
+ "eval_runtime": 1.7885,
58
+ "eval_samples_per_second": 163.262,
59
+ "eval_steps_per_second": 5.591,
60
+ "step": 192
61
+ },
62
+ {
63
+ "epoch": 3.94,
64
+ "learning_rate": 9.015384615384616e-06,
65
+ "loss": 0.2016,
66
+ "step": 256
67
+ },
68
+ {
69
+ "epoch": 3.94,
70
+ "eval_f1": 0.8988811005098933,
71
+ "eval_loss": 0.3036338686943054,
72
+ "eval_precision": 0.9010176651305684,
73
+ "eval_recall": 0.8972987111808627,
74
+ "eval_runtime": 1.7803,
75
+ "eval_samples_per_second": 164.015,
76
+ "eval_steps_per_second": 5.617,
77
+ "step": 256
78
+ },
79
+ {
80
+ "epoch": 4.92,
81
+ "learning_rate": 8.76923076923077e-06,
82
+ "loss": 0.1733,
83
+ "step": 320
84
+ },
85
+ {
86
+ "epoch": 4.92,
87
+ "eval_f1": 0.8919073833036791,
88
+ "eval_loss": 0.31887006759643555,
89
+ "eval_precision": 0.8940092165898618,
90
+ "eval_recall": 0.890355257526038,
91
+ "eval_runtime": 1.7908,
92
+ "eval_samples_per_second": 163.059,
93
+ "eval_steps_per_second": 5.584,
94
+ "step": 320
95
+ },
96
+ {
97
+ "epoch": 5.91,
98
+ "learning_rate": 8.523076923076923e-06,
99
+ "loss": 0.1627,
100
+ "step": 384
101
+ },
102
+ {
103
+ "epoch": 5.91,
104
+ "eval_f1": 0.8975599724181316,
105
+ "eval_loss": 0.3417821526527405,
106
+ "eval_precision": 0.9093253968253968,
107
+ "eval_recall": 0.8924478052028344,
108
+ "eval_runtime": 1.7807,
109
+ "eval_samples_per_second": 163.98,
110
+ "eval_steps_per_second": 5.616,
111
+ "step": 384
112
+ },
113
+ {
114
+ "epoch": 6.89,
115
+ "learning_rate": 8.276923076923078e-06,
116
+ "loss": 0.1268,
117
+ "step": 448
118
+ },
119
+ {
120
+ "epoch": 6.89,
121
+ "eval_f1": 0.8907594463150018,
122
+ "eval_loss": 0.33714038133621216,
123
+ "eval_precision": 0.9003055391287207,
124
+ "eval_recall": 0.8863128358776811,
125
+ "eval_runtime": 1.7763,
126
+ "eval_samples_per_second": 164.386,
127
+ "eval_steps_per_second": 5.63,
128
+ "step": 448
129
+ },
130
+ {
131
+ "epoch": 7.88,
132
+ "learning_rate": 8.03076923076923e-06,
133
+ "loss": 0.1059,
134
+ "step": 512
135
+ },
136
+ {
137
+ "epoch": 7.88,
138
+ "eval_f1": 0.8910119800132443,
139
+ "eval_loss": 0.36000651121139526,
140
+ "eval_precision": 0.8987068965517242,
141
+ "eval_recall": 0.8871213202073525,
142
+ "eval_runtime": 1.809,
143
+ "eval_samples_per_second": 161.417,
144
+ "eval_steps_per_second": 5.528,
145
+ "step": 512
146
+ },
147
+ {
148
+ "epoch": 8.86,
149
+ "learning_rate": 7.784615384615385e-06,
150
+ "loss": 0.1036,
151
+ "step": 576
152
+ },
153
+ {
154
+ "epoch": 8.86,
155
+ "eval_f1": 0.9018817204301075,
156
+ "eval_loss": 0.3717869520187378,
157
+ "eval_precision": 0.9072715791518919,
158
+ "eval_recall": 0.8987492271840967,
159
+ "eval_runtime": 1.7837,
160
+ "eval_samples_per_second": 163.704,
161
+ "eval_steps_per_second": 5.606,
162
+ "step": 576
163
+ },
164
+ {
165
+ "epoch": 9.85,
166
+ "learning_rate": 7.538461538461539e-06,
167
+ "loss": 0.0901,
168
+ "step": 640
169
+ },
170
+ {
171
+ "epoch": 9.85,
172
+ "eval_f1": 0.8978072239720984,
173
+ "eval_loss": 0.3917124569416046,
174
+ "eval_precision": 0.9075004928050463,
175
+ "eval_recall": 0.8932562895325058,
176
+ "eval_runtime": 1.816,
177
+ "eval_samples_per_second": 160.796,
178
+ "eval_steps_per_second": 5.507,
179
+ "step": 640
180
+ },
181
+ {
182
+ "epoch": 10.83,
183
+ "learning_rate": 7.292307692307693e-06,
184
+ "loss": 0.0859,
185
+ "step": 704
186
+ },
187
+ {
188
+ "epoch": 10.83,
189
+ "eval_f1": 0.8944069431051109,
190
+ "eval_loss": 0.3997272551059723,
191
+ "eval_precision": 0.9030704986489806,
192
+ "eval_recall": 0.8901888048699291,
193
+ "eval_runtime": 1.7905,
194
+ "eval_samples_per_second": 163.086,
195
+ "eval_steps_per_second": 5.585,
196
+ "step": 704
197
+ },
198
+ {
199
+ "epoch": 11.82,
200
+ "learning_rate": 7.046153846153847e-06,
201
+ "loss": 0.0559,
202
+ "step": 768
203
+ },
204
+ {
205
+ "epoch": 11.82,
206
+ "eval_f1": 0.9027916488324536,
207
+ "eval_loss": 0.4531518518924713,
208
+ "eval_precision": 0.9027916488324536,
209
+ "eval_recall": 0.9027916488324536,
210
+ "eval_runtime": 1.7898,
211
+ "eval_samples_per_second": 163.144,
212
+ "eval_steps_per_second": 5.587,
213
+ "step": 768
214
+ },
215
+ {
216
+ "epoch": 12.8,
217
+ "learning_rate": 6.800000000000001e-06,
218
+ "loss": 0.0593,
219
+ "step": 832
220
+ },
221
+ {
222
+ "epoch": 12.8,
223
+ "eval_f1": 0.8808563748079878,
224
+ "eval_loss": 0.47470441460609436,
225
+ "eval_precision": 0.8859960169038714,
226
+ "eval_recall": 0.8779188662196225,
227
+ "eval_runtime": 1.7809,
228
+ "eval_samples_per_second": 163.966,
229
+ "eval_steps_per_second": 5.615,
230
+ "step": 832
231
+ },
232
+ {
233
+ "epoch": 13.78,
234
+ "learning_rate": 6.553846153846154e-06,
235
+ "loss": 0.0448,
236
+ "step": 896
237
+ },
238
+ {
239
+ "epoch": 13.78,
240
+ "eval_f1": 0.8958481951776287,
241
+ "eval_loss": 0.4653804302215576,
242
+ "eval_precision": 0.8958481951776287,
243
+ "eval_recall": 0.8958481951776287,
244
+ "eval_runtime": 1.7817,
245
+ "eval_samples_per_second": 163.887,
246
+ "eval_steps_per_second": 5.613,
247
+ "step": 896
248
+ },
249
+ {
250
+ "epoch": 14.77,
251
+ "learning_rate": 6.307692307692308e-06,
252
+ "loss": 0.035,
253
+ "step": 960
254
+ },
255
+ {
256
+ "epoch": 14.77,
257
+ "eval_f1": 0.9024576473395372,
258
+ "eval_loss": 0.4710945188999176,
259
+ "eval_precision": 0.9040958083832336,
260
+ "eval_recall": 0.9011746801731108,
261
+ "eval_runtime": 1.8119,
262
+ "eval_samples_per_second": 161.155,
263
+ "eval_steps_per_second": 5.519,
264
+ "step": 960
265
+ },
266
+ {
267
+ "epoch": 15.75,
268
+ "learning_rate": 6.061538461538462e-06,
269
+ "loss": 0.0372,
270
+ "step": 1024
271
+ },
272
+ {
273
+ "epoch": 15.75,
274
+ "eval_f1": 0.8839804948528084,
275
+ "eval_loss": 0.5761662125587463,
276
+ "eval_precision": 0.8915556426332288,
277
+ "eval_recall": 0.8801778665525277,
278
+ "eval_runtime": 1.7874,
279
+ "eval_samples_per_second": 163.369,
280
+ "eval_steps_per_second": 5.595,
281
+ "step": 1024
282
+ },
283
+ {
284
+ "epoch": 16.74,
285
+ "learning_rate": 5.815384615384616e-06,
286
+ "loss": 0.0341,
287
+ "step": 1088
288
+ },
289
+ {
290
+ "epoch": 16.74,
291
+ "eval_f1": 0.9020838323353293,
292
+ "eval_loss": 0.5446054339408875,
293
+ "eval_precision": 0.9060461070030448,
294
+ "eval_recall": 0.8995577115137681,
295
+ "eval_runtime": 1.7858,
296
+ "eval_samples_per_second": 163.512,
297
+ "eval_steps_per_second": 5.6,
298
+ "step": 1088
299
+ },
300
+ {
301
+ "epoch": 17.72,
302
+ "learning_rate": 5.56923076923077e-06,
303
+ "loss": 0.0321,
304
+ "step": 1152
305
+ },
306
+ {
307
+ "epoch": 17.72,
308
+ "eval_f1": 0.9058548177161077,
309
+ "eval_loss": 0.5152109265327454,
310
+ "eval_precision": 0.908026113671275,
311
+ "eval_recall": 0.9042421648356874,
312
+ "eval_runtime": 1.7852,
313
+ "eval_samples_per_second": 163.569,
314
+ "eval_steps_per_second": 5.602,
315
+ "step": 1152
316
+ },
317
+ {
318
+ "epoch": 18.71,
319
+ "learning_rate": 5.323076923076923e-06,
320
+ "loss": 0.0171,
321
+ "step": 1216
322
+ },
323
+ {
324
+ "epoch": 18.71,
325
+ "eval_f1": 0.887100671789667,
326
+ "eval_loss": 0.6121218204498291,
327
+ "eval_precision": 0.897562663766253,
328
+ "eval_recall": 0.882436866885433,
329
+ "eval_runtime": 1.7977,
330
+ "eval_samples_per_second": 162.431,
331
+ "eval_steps_per_second": 5.563,
332
+ "step": 1216
333
+ },
334
+ {
335
+ "epoch": 19.69,
336
+ "learning_rate": 5.076923076923077e-06,
337
+ "loss": 0.0261,
338
+ "step": 1280
339
+ },
340
+ {
341
+ "epoch": 19.69,
342
+ "eval_f1": 0.9024576473395372,
343
+ "eval_loss": 0.5516932010650635,
344
+ "eval_precision": 0.9040958083832336,
345
+ "eval_recall": 0.9011746801731108,
346
+ "eval_runtime": 1.7934,
347
+ "eval_samples_per_second": 162.821,
348
+ "eval_steps_per_second": 5.576,
349
+ "step": 1280
350
+ },
351
+ {
352
+ "epoch": 20.68,
353
+ "learning_rate": 4.830769230769231e-06,
354
+ "loss": 0.0153,
355
+ "step": 1344
356
+ },
357
+ {
358
+ "epoch": 20.68,
359
+ "eval_f1": 0.89848348618354,
360
+ "eval_loss": 0.5832306146621704,
361
+ "eval_precision": 0.9031007751937985,
362
+ "eval_recall": 0.89568174252152,
363
+ "eval_runtime": 1.7807,
364
+ "eval_samples_per_second": 163.978,
365
+ "eval_steps_per_second": 5.616,
366
+ "step": 1344
367
+ },
368
+ {
369
+ "epoch": 21.66,
370
+ "learning_rate": 4.5846153846153855e-06,
371
+ "loss": 0.0183,
372
+ "step": 1408
373
+ },
374
+ {
375
+ "epoch": 21.66,
376
+ "eval_f1": 0.9092560719066743,
377
+ "eval_loss": 0.5685573220252991,
378
+ "eval_precision": 0.9120123153894261,
379
+ "eval_recall": 0.9073096494982642,
380
+ "eval_runtime": 1.8067,
381
+ "eval_samples_per_second": 161.617,
382
+ "eval_steps_per_second": 5.535,
383
+ "step": 1408
384
+ },
385
+ {
386
+ "epoch": 22.65,
387
+ "learning_rate": 4.338461538461539e-06,
388
+ "loss": 0.0098,
389
+ "step": 1472
390
+ },
391
+ {
392
+ "epoch": 22.65,
393
+ "eval_f1": 0.9058548177161077,
394
+ "eval_loss": 0.5791997909545898,
395
+ "eval_precision": 0.908026113671275,
396
+ "eval_recall": 0.9042421648356874,
397
+ "eval_runtime": 1.7948,
398
+ "eval_samples_per_second": 162.694,
399
+ "eval_steps_per_second": 5.572,
400
+ "step": 1472
401
+ },
402
+ {
403
+ "epoch": 23.63,
404
+ "learning_rate": 4.0923076923076925e-06,
405
+ "loss": 0.0216,
406
+ "step": 1536
407
+ },
408
+ {
409
+ "epoch": 23.63,
410
+ "eval_f1": 0.8948732718894008,
411
+ "eval_loss": 0.6078387498855591,
412
+ "eval_precision": 0.9001797250692184,
413
+ "eval_recall": 0.8918057735292719,
414
+ "eval_runtime": 1.7897,
415
+ "eval_samples_per_second": 163.158,
416
+ "eval_steps_per_second": 5.588,
417
+ "step": 1536
418
+ },
419
+ {
420
+ "epoch": 24.62,
421
+ "learning_rate": 3.846153846153847e-06,
422
+ "loss": 0.0076,
423
+ "step": 1600
424
+ },
425
+ {
426
+ "epoch": 24.62,
427
+ "eval_f1": 0.8950898203592814,
428
+ "eval_loss": 0.6126357913017273,
429
+ "eval_precision": 0.898989898989899,
430
+ "eval_recall": 0.8926142578589432,
431
+ "eval_runtime": 1.7831,
432
+ "eval_samples_per_second": 163.755,
433
+ "eval_steps_per_second": 5.608,
434
+ "step": 1600
435
+ },
436
+ {
437
+ "epoch": 25.6,
438
+ "learning_rate": 3.6000000000000003e-06,
439
+ "loss": 0.0073,
440
+ "step": 1664
441
+ },
442
+ {
443
+ "epoch": 25.6,
444
+ "eval_f1": 0.9058548177161077,
445
+ "eval_loss": 0.6077719926834106,
446
+ "eval_precision": 0.908026113671275,
447
+ "eval_recall": 0.9042421648356874,
448
+ "eval_runtime": 1.7913,
449
+ "eval_samples_per_second": 163.011,
450
+ "eval_steps_per_second": 5.583,
451
+ "step": 1664
452
+ },
453
+ {
454
+ "epoch": 26.58,
455
+ "learning_rate": 3.353846153846154e-06,
456
+ "loss": 0.0257,
457
+ "step": 1728
458
+ },
459
+ {
460
+ "epoch": 26.58,
461
+ "eval_f1": 0.9020838323353293,
462
+ "eval_loss": 0.6265017986297607,
463
+ "eval_precision": 0.9060461070030448,
464
+ "eval_recall": 0.8995577115137681,
465
+ "eval_runtime": 1.8318,
466
+ "eval_samples_per_second": 159.409,
467
+ "eval_steps_per_second": 5.459,
468
+ "step": 1728
469
+ },
470
+ {
471
+ "epoch": 27.57,
472
+ "learning_rate": 3.1076923076923076e-06,
473
+ "loss": 0.0047,
474
+ "step": 1792
475
+ },
476
+ {
477
+ "epoch": 27.57,
478
+ "eval_f1": 0.891252898280854,
479
+ "eval_loss": 0.6706699728965759,
480
+ "eval_precision": 0.8972822910578608,
481
+ "eval_recall": 0.8879298045370239,
482
+ "eval_runtime": 1.7836,
483
+ "eval_samples_per_second": 163.716,
484
+ "eval_steps_per_second": 5.607,
485
+ "step": 1792
486
+ },
487
+ {
488
+ "epoch": 28.55,
489
+ "learning_rate": 2.8615384615384615e-06,
490
+ "loss": 0.0063,
491
+ "step": 1856
492
+ },
493
+ {
494
+ "epoch": 28.55,
495
+ "eval_f1": 0.89848348618354,
496
+ "eval_loss": 0.6434349417686462,
497
+ "eval_precision": 0.9031007751937985,
498
+ "eval_recall": 0.89568174252152,
499
+ "eval_runtime": 1.8097,
500
+ "eval_samples_per_second": 161.353,
501
+ "eval_steps_per_second": 5.526,
502
+ "step": 1856
503
+ },
504
+ {
505
+ "epoch": 29.54,
506
+ "learning_rate": 2.615384615384616e-06,
507
+ "loss": 0.0045,
508
+ "step": 1920
509
+ },
510
+ {
511
+ "epoch": 29.54,
512
+ "eval_f1": 0.9092560719066743,
513
+ "eval_loss": 0.6284749507904053,
514
+ "eval_precision": 0.9120123153894261,
515
+ "eval_recall": 0.9073096494982642,
516
+ "eval_runtime": 1.7895,
517
+ "eval_samples_per_second": 163.173,
518
+ "eval_steps_per_second": 5.588,
519
+ "step": 1920
520
+ }
521
+ ],
522
+ "logging_steps": 64,
523
+ "max_steps": 2600,
524
+ "num_input_tokens_seen": 0,
525
+ "num_train_epochs": 40,
526
+ "save_steps": 64,
527
+ "total_flos": 2249673523312320.0,
528
+ "train_batch_size": 32,
529
+ "trial_name": null,
530
+ "trial_params": null
531
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:842dac18de31e7b25b3f64e230c94e086f188da05c6d39f8d137f30f9e831131
3
+ size 4664