Jacques7103 commited on
Commit
f05d24e
·
1 Parent(s): 4ceb749

🍻 cheers

Browse files
all_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
  "epoch": 4.0,
3
- "eval_accuracy": 0.9308,
4
- "eval_loss": 0.26776641607284546,
5
- "eval_runtime": 877.6964,
6
- "eval_samples_per_second": 2.848,
7
- "eval_steps_per_second": 0.357,
8
- "train_loss": 0.2672756195687917,
9
- "train_runtime": 47803.3846,
10
- "train_samples_per_second": 0.628,
11
- "train_steps_per_second": 0.039
12
  }
 
1
  {
2
  "epoch": 4.0,
3
+ "eval_accuracy": 0.98,
4
+ "eval_loss": 0.10598108917474747,
5
+ "eval_runtime": 37.1403,
6
+ "eval_samples_per_second": 2.692,
7
+ "eval_steps_per_second": 0.35,
8
+ "train_loss": 0.18094830640724727,
9
+ "train_runtime": 416.3142,
10
+ "train_samples_per_second": 0.961,
11
+ "train_steps_per_second": 0.067
12
  }
config.json CHANGED
@@ -9,30 +9,14 @@
9
  "hidden_size": 768,
10
  "id2label": {
11
  "0": "apple_pie",
12
- "1": "baby_back_ribs",
13
- "2": "baklava",
14
- "3": "beef_carpaccio",
15
- "4": "beef_tartare",
16
- "5": "beet_salad",
17
- "6": "beignets",
18
- "7": "bibimbap",
19
- "8": "bread_pudding",
20
- "9": "breakfast_burrito"
21
  },
22
  "image_size": 224,
23
  "initializer_range": 0.02,
24
  "intermediate_size": 3072,
25
  "label2id": {
26
  "apple_pie": "0",
27
- "baby_back_ribs": "1",
28
- "baklava": "2",
29
- "beef_carpaccio": "3",
30
- "beef_tartare": "4",
31
- "beet_salad": "5",
32
- "beignets": "6",
33
- "bibimbap": "7",
34
- "bread_pudding": "8",
35
- "breakfast_burrito": "9"
36
  },
37
  "layer_norm_eps": 1e-12,
38
  "model_type": "vit",
 
9
  "hidden_size": 768,
10
  "id2label": {
11
  "0": "apple_pie",
12
+ "1": "baby_back_ribs"
 
 
 
 
 
 
 
 
13
  },
14
  "image_size": 224,
15
  "initializer_range": 0.02,
16
  "intermediate_size": 3072,
17
  "label2id": {
18
  "apple_pie": "0",
19
+ "baby_back_ribs": "1"
 
 
 
 
 
 
 
 
20
  },
21
  "layer_norm_eps": 1e-12,
22
  "model_type": "vit",
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 4.0,
3
- "eval_accuracy": 0.9308,
4
- "eval_loss": 0.26776641607284546,
5
- "eval_runtime": 877.6964,
6
- "eval_samples_per_second": 2.848,
7
- "eval_steps_per_second": 0.357
8
  }
 
1
  {
2
  "epoch": 4.0,
3
+ "eval_accuracy": 0.98,
4
+ "eval_loss": 0.10598108917474747,
5
+ "eval_runtime": 37.1403,
6
+ "eval_samples_per_second": 2.692,
7
+ "eval_steps_per_second": 0.35
8
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:99deb9e3e1dbbaa50ea420465fb7dfd1b3e740c3ad70faee969878a50c43a4fe
3
- size 343303786
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83ef21ce3f8713c0bbca09103dc638a2c1618560f3f0d22b7c0b5d752c1604ac
3
+ size 343279210
runs/Dec14_21-37-11_DESKTOP-MQJAOOG/1702564650.3208292/events.out.tfevents.1702564650.DESKTOP-MQJAOOG.17968.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a4b461c47f9745057cae5d70805a5734164ab911b0e5f3a0473b22f8fdfaf7c
3
+ size 4560
runs/Dec14_21-37-11_DESKTOP-MQJAOOG/events.out.tfevents.1702564650.DESKTOP-MQJAOOG.17968.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24fac6b07696d9595866e52ccf2ff6f5e9f9ce37492be59dd7da711a17260dd7
3
+ size 3185
runs/Dec14_21-39-12_DESKTOP-MQJAOOG/1702564766.38767/events.out.tfevents.1702564766.DESKTOP-MQJAOOG.17968.3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eccc9a9061b21158d076db553dd31cf7f48374491575414d04899c925898f050
3
+ size 4560
runs/Dec14_21-39-12_DESKTOP-MQJAOOG/events.out.tfevents.1702564766.DESKTOP-MQJAOOG.17968.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8e89a1a307e0e0509adb17b2fdac0874b66d489099f136a81eea672939faa78
3
+ size 3841
runs/Dec14_21-39-12_DESKTOP-MQJAOOG/events.out.tfevents.1702565255.DESKTOP-MQJAOOG.17968.4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3d68baf6d227e9c3f805a5968b07a4594e9aeac2096f6386f5fbb45db22232e
3
+ size 405
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "epoch": 4.0,
3
- "train_loss": 0.2672756195687917,
4
- "train_runtime": 47803.3846,
5
- "train_samples_per_second": 0.628,
6
- "train_steps_per_second": 0.039
7
  }
 
1
  {
2
  "epoch": 4.0,
3
+ "train_loss": 0.18094830640724727,
4
+ "train_runtime": 416.3142,
5
+ "train_samples_per_second": 0.961,
6
+ "train_steps_per_second": 0.067
7
  }
trainer_state.json CHANGED
@@ -1,1307 +1,35 @@
1
  {
2
- "best_metric": 0.26776641607284546,
3
- "best_model_checkpoint": "food-recognition\\checkpoint-1500",
4
  "epoch": 4.0,
5
- "global_step": 1876,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
- {
11
- "epoch": 0.02,
12
- "learning_rate": 0.00019893390191897656,
13
- "loss": 2.1647,
14
- "step": 10
15
- },
16
- {
17
- "epoch": 0.04,
18
- "learning_rate": 0.0001978678038379531,
19
- "loss": 1.7478,
20
- "step": 20
21
- },
22
- {
23
- "epoch": 0.06,
24
- "learning_rate": 0.00019680170575692965,
25
- "loss": 1.4997,
26
- "step": 30
27
- },
28
- {
29
- "epoch": 0.09,
30
- "learning_rate": 0.0001957356076759062,
31
- "loss": 1.1421,
32
- "step": 40
33
- },
34
- {
35
- "epoch": 0.11,
36
- "learning_rate": 0.00019466950959488275,
37
- "loss": 1.0115,
38
- "step": 50
39
- },
40
- {
41
- "epoch": 0.13,
42
- "learning_rate": 0.00019360341151385927,
43
- "loss": 0.8962,
44
- "step": 60
45
- },
46
- {
47
- "epoch": 0.15,
48
- "learning_rate": 0.00019253731343283584,
49
- "loss": 0.774,
50
- "step": 70
51
- },
52
- {
53
- "epoch": 0.17,
54
- "learning_rate": 0.0001914712153518124,
55
- "loss": 0.8508,
56
- "step": 80
57
- },
58
- {
59
- "epoch": 0.19,
60
- "learning_rate": 0.0001904051172707889,
61
- "loss": 0.8852,
62
- "step": 90
63
- },
64
- {
65
- "epoch": 0.21,
66
- "learning_rate": 0.00018933901918976546,
67
- "loss": 0.6346,
68
- "step": 100
69
- },
70
- {
71
- "epoch": 0.21,
72
- "eval_accuracy": 0.8136,
73
- "eval_loss": 0.6600381135940552,
74
- "eval_runtime": 910.9195,
75
- "eval_samples_per_second": 2.744,
76
- "eval_steps_per_second": 0.344,
77
- "step": 100
78
- },
79
- {
80
- "epoch": 0.23,
81
- "learning_rate": 0.00018827292110874203,
82
- "loss": 0.7183,
83
- "step": 110
84
- },
85
- {
86
- "epoch": 0.26,
87
- "learning_rate": 0.00018720682302771858,
88
- "loss": 0.7806,
89
- "step": 120
90
- },
91
- {
92
- "epoch": 0.28,
93
- "learning_rate": 0.0001861407249466951,
94
- "loss": 0.7702,
95
- "step": 130
96
- },
97
- {
98
- "epoch": 0.3,
99
- "learning_rate": 0.00018507462686567165,
100
- "loss": 0.8338,
101
- "step": 140
102
- },
103
- {
104
- "epoch": 0.32,
105
- "learning_rate": 0.0001840085287846482,
106
- "loss": 0.732,
107
- "step": 150
108
- },
109
- {
110
- "epoch": 0.34,
111
- "learning_rate": 0.00018294243070362474,
112
- "loss": 0.6746,
113
- "step": 160
114
- },
115
- {
116
- "epoch": 0.36,
117
- "learning_rate": 0.0001818763326226013,
118
- "loss": 0.5369,
119
- "step": 170
120
- },
121
- {
122
- "epoch": 0.38,
123
- "learning_rate": 0.00018081023454157784,
124
- "loss": 0.6082,
125
- "step": 180
126
- },
127
- {
128
- "epoch": 0.41,
129
- "learning_rate": 0.00017974413646055436,
130
- "loss": 0.5756,
131
- "step": 190
132
- },
133
- {
134
- "epoch": 0.43,
135
- "learning_rate": 0.00017867803837953093,
136
- "loss": 0.4988,
137
- "step": 200
138
- },
139
- {
140
- "epoch": 0.43,
141
- "eval_accuracy": 0.8212,
142
- "eval_loss": 0.5744591951370239,
143
- "eval_runtime": 901.9851,
144
- "eval_samples_per_second": 2.772,
145
- "eval_steps_per_second": 0.347,
146
- "step": 200
147
- },
148
- {
149
- "epoch": 0.45,
150
- "learning_rate": 0.00017761194029850748,
151
- "loss": 0.672,
152
- "step": 210
153
- },
154
- {
155
- "epoch": 0.47,
156
- "learning_rate": 0.00017654584221748402,
157
- "loss": 0.6575,
158
- "step": 220
159
- },
160
- {
161
- "epoch": 0.49,
162
- "learning_rate": 0.00017547974413646055,
163
- "loss": 0.5028,
164
- "step": 230
165
- },
166
- {
167
- "epoch": 0.51,
168
- "learning_rate": 0.00017441364605543712,
169
- "loss": 0.5451,
170
- "step": 240
171
- },
172
- {
173
- "epoch": 0.53,
174
- "learning_rate": 0.00017334754797441367,
175
- "loss": 0.6499,
176
- "step": 250
177
- },
178
- {
179
- "epoch": 0.55,
180
- "learning_rate": 0.0001722814498933902,
181
- "loss": 0.7127,
182
- "step": 260
183
- },
184
- {
185
- "epoch": 0.58,
186
- "learning_rate": 0.00017121535181236673,
187
- "loss": 0.538,
188
- "step": 270
189
- },
190
- {
191
- "epoch": 0.6,
192
- "learning_rate": 0.00017014925373134328,
193
- "loss": 0.6385,
194
- "step": 280
195
- },
196
- {
197
- "epoch": 0.62,
198
- "learning_rate": 0.00016908315565031986,
199
- "loss": 0.4637,
200
- "step": 290
201
- },
202
- {
203
- "epoch": 0.64,
204
- "learning_rate": 0.00016801705756929638,
205
- "loss": 0.4619,
206
- "step": 300
207
- },
208
- {
209
- "epoch": 0.64,
210
- "eval_accuracy": 0.8852,
211
- "eval_loss": 0.3846997916698456,
212
- "eval_runtime": 886.5096,
213
- "eval_samples_per_second": 2.82,
214
- "eval_steps_per_second": 0.353,
215
- "step": 300
216
- },
217
- {
218
- "epoch": 0.66,
219
- "learning_rate": 0.00016695095948827292,
220
- "loss": 0.6006,
221
- "step": 310
222
- },
223
- {
224
- "epoch": 0.68,
225
- "learning_rate": 0.00016588486140724947,
226
- "loss": 0.4678,
227
- "step": 320
228
- },
229
- {
230
- "epoch": 0.7,
231
- "learning_rate": 0.00016481876332622602,
232
- "loss": 0.5459,
233
- "step": 330
234
- },
235
- {
236
- "epoch": 0.72,
237
- "learning_rate": 0.00016375266524520257,
238
- "loss": 0.4216,
239
- "step": 340
240
- },
241
- {
242
- "epoch": 0.75,
243
- "learning_rate": 0.00016268656716417911,
244
- "loss": 0.6596,
245
- "step": 350
246
- },
247
- {
248
- "epoch": 0.77,
249
- "learning_rate": 0.00016162046908315566,
250
- "loss": 0.6491,
251
- "step": 360
252
- },
253
- {
254
- "epoch": 0.79,
255
- "learning_rate": 0.0001605543710021322,
256
- "loss": 0.5269,
257
- "step": 370
258
- },
259
- {
260
- "epoch": 0.81,
261
- "learning_rate": 0.00015948827292110876,
262
- "loss": 0.441,
263
- "step": 380
264
- },
265
- {
266
- "epoch": 0.83,
267
- "learning_rate": 0.0001584221748400853,
268
- "loss": 0.4428,
269
- "step": 390
270
- },
271
- {
272
- "epoch": 0.85,
273
- "learning_rate": 0.00015735607675906182,
274
- "loss": 0.4205,
275
- "step": 400
276
- },
277
- {
278
- "epoch": 0.85,
279
- "eval_accuracy": 0.864,
280
- "eval_loss": 0.43827134370803833,
281
- "eval_runtime": 905.4707,
282
- "eval_samples_per_second": 2.761,
283
- "eval_steps_per_second": 0.346,
284
- "step": 400
285
- },
286
- {
287
- "epoch": 0.87,
288
- "learning_rate": 0.0001562899786780384,
289
- "loss": 0.6422,
290
- "step": 410
291
- },
292
- {
293
- "epoch": 0.9,
294
- "learning_rate": 0.00015522388059701495,
295
- "loss": 0.4558,
296
- "step": 420
297
- },
298
- {
299
- "epoch": 0.92,
300
- "learning_rate": 0.0001541577825159915,
301
- "loss": 0.4575,
302
- "step": 430
303
- },
304
- {
305
- "epoch": 0.94,
306
- "learning_rate": 0.000153091684434968,
307
- "loss": 0.4454,
308
- "step": 440
309
- },
310
- {
311
- "epoch": 0.96,
312
- "learning_rate": 0.00015202558635394456,
313
- "loss": 0.4917,
314
- "step": 450
315
- },
316
- {
317
- "epoch": 0.98,
318
- "learning_rate": 0.00015095948827292113,
319
- "loss": 0.5301,
320
- "step": 460
321
- },
322
- {
323
- "epoch": 1.0,
324
- "learning_rate": 0.00014989339019189766,
325
- "loss": 0.3294,
326
- "step": 470
327
- },
328
- {
329
- "epoch": 1.02,
330
- "learning_rate": 0.0001488272921108742,
331
- "loss": 0.2967,
332
- "step": 480
333
- },
334
- {
335
- "epoch": 1.04,
336
- "learning_rate": 0.00014776119402985075,
337
- "loss": 0.3406,
338
- "step": 490
339
- },
340
- {
341
- "epoch": 1.07,
342
- "learning_rate": 0.0001466950959488273,
343
- "loss": 0.287,
344
- "step": 500
345
- },
346
- {
347
- "epoch": 1.07,
348
- "eval_accuracy": 0.8992,
349
- "eval_loss": 0.3214336931705475,
350
- "eval_runtime": 905.5537,
351
- "eval_samples_per_second": 2.761,
352
- "eval_steps_per_second": 0.346,
353
- "step": 500
354
- },
355
- {
356
- "epoch": 1.09,
357
- "learning_rate": 0.00014562899786780384,
358
- "loss": 0.3278,
359
- "step": 510
360
- },
361
- {
362
- "epoch": 1.11,
363
- "learning_rate": 0.0001445628997867804,
364
- "loss": 0.2406,
365
- "step": 520
366
- },
367
- {
368
- "epoch": 1.13,
369
- "learning_rate": 0.00014349680170575694,
370
- "loss": 0.2839,
371
- "step": 530
372
- },
373
- {
374
- "epoch": 1.15,
375
- "learning_rate": 0.0001424307036247335,
376
- "loss": 0.2843,
377
- "step": 540
378
- },
379
- {
380
- "epoch": 1.17,
381
- "learning_rate": 0.00014136460554371003,
382
- "loss": 0.2085,
383
- "step": 550
384
- },
385
- {
386
- "epoch": 1.19,
387
- "learning_rate": 0.00014029850746268658,
388
- "loss": 0.2465,
389
- "step": 560
390
- },
391
- {
392
- "epoch": 1.22,
393
- "learning_rate": 0.0001392324093816631,
394
- "loss": 0.2444,
395
- "step": 570
396
- },
397
- {
398
- "epoch": 1.24,
399
- "learning_rate": 0.00013816631130063965,
400
- "loss": 0.3261,
401
- "step": 580
402
- },
403
- {
404
- "epoch": 1.26,
405
- "learning_rate": 0.00013710021321961622,
406
- "loss": 0.1768,
407
- "step": 590
408
- },
409
- {
410
- "epoch": 1.28,
411
- "learning_rate": 0.00013603411513859277,
412
- "loss": 0.2963,
413
- "step": 600
414
- },
415
- {
416
- "epoch": 1.28,
417
- "eval_accuracy": 0.8796,
418
- "eval_loss": 0.35504186153411865,
419
- "eval_runtime": 901.3195,
420
- "eval_samples_per_second": 2.774,
421
- "eval_steps_per_second": 0.347,
422
- "step": 600
423
- },
424
- {
425
- "epoch": 1.3,
426
- "learning_rate": 0.0001349680170575693,
427
- "loss": 0.3221,
428
- "step": 610
429
- },
430
- {
431
- "epoch": 1.32,
432
- "learning_rate": 0.00013390191897654584,
433
- "loss": 0.3559,
434
- "step": 620
435
- },
436
- {
437
- "epoch": 1.34,
438
- "learning_rate": 0.0001328358208955224,
439
- "loss": 0.2589,
440
- "step": 630
441
- },
442
- {
443
- "epoch": 1.36,
444
- "learning_rate": 0.00013176972281449893,
445
- "loss": 0.317,
446
- "step": 640
447
- },
448
- {
449
- "epoch": 1.39,
450
- "learning_rate": 0.00013070362473347548,
451
- "loss": 0.2702,
452
- "step": 650
453
- },
454
- {
455
- "epoch": 1.41,
456
- "learning_rate": 0.00012963752665245203,
457
- "loss": 0.2416,
458
- "step": 660
459
- },
460
  {
461
  "epoch": 1.43,
462
  "learning_rate": 0.00012857142857142858,
463
- "loss": 0.3619,
464
- "step": 670
465
- },
466
- {
467
- "epoch": 1.45,
468
- "learning_rate": 0.00012750533049040512,
469
- "loss": 0.1777,
470
- "step": 680
471
- },
472
- {
473
- "epoch": 1.47,
474
- "learning_rate": 0.00012643923240938167,
475
- "loss": 0.3028,
476
- "step": 690
477
- },
478
- {
479
- "epoch": 1.49,
480
- "learning_rate": 0.00012537313432835822,
481
- "loss": 0.2908,
482
- "step": 700
483
- },
484
- {
485
- "epoch": 1.49,
486
- "eval_accuracy": 0.9064,
487
- "eval_loss": 0.3182031810283661,
488
- "eval_runtime": 904.1865,
489
- "eval_samples_per_second": 2.765,
490
- "eval_steps_per_second": 0.346,
491
- "step": 700
492
- },
493
- {
494
- "epoch": 1.51,
495
- "learning_rate": 0.00012430703624733474,
496
- "loss": 0.236,
497
- "step": 710
498
- },
499
- {
500
- "epoch": 1.54,
501
- "learning_rate": 0.0001232409381663113,
502
- "loss": 0.1903,
503
- "step": 720
504
- },
505
- {
506
- "epoch": 1.56,
507
- "learning_rate": 0.00012217484008528786,
508
- "loss": 0.2509,
509
- "step": 730
510
- },
511
- {
512
- "epoch": 1.58,
513
- "learning_rate": 0.00012110874200426441,
514
- "loss": 0.3468,
515
- "step": 740
516
- },
517
- {
518
- "epoch": 1.6,
519
- "learning_rate": 0.00012004264392324094,
520
- "loss": 0.4192,
521
- "step": 750
522
- },
523
- {
524
- "epoch": 1.62,
525
- "learning_rate": 0.00011897654584221749,
526
- "loss": 0.2604,
527
- "step": 760
528
- },
529
- {
530
- "epoch": 1.64,
531
- "learning_rate": 0.00011791044776119405,
532
- "loss": 0.2523,
533
- "step": 770
534
- },
535
- {
536
- "epoch": 1.66,
537
- "learning_rate": 0.00011684434968017057,
538
- "loss": 0.169,
539
- "step": 780
540
- },
541
- {
542
- "epoch": 1.68,
543
- "learning_rate": 0.00011577825159914713,
544
- "loss": 0.3698,
545
- "step": 790
546
- },
547
- {
548
- "epoch": 1.71,
549
- "learning_rate": 0.00011471215351812368,
550
- "loss": 0.3791,
551
- "step": 800
552
- },
553
- {
554
- "epoch": 1.71,
555
- "eval_accuracy": 0.9012,
556
- "eval_loss": 0.30747249722480774,
557
- "eval_runtime": 905.3757,
558
- "eval_samples_per_second": 2.761,
559
- "eval_steps_per_second": 0.346,
560
- "step": 800
561
- },
562
- {
563
- "epoch": 1.73,
564
- "learning_rate": 0.00011364605543710023,
565
- "loss": 0.4315,
566
- "step": 810
567
- },
568
- {
569
- "epoch": 1.75,
570
- "learning_rate": 0.00011257995735607676,
571
- "loss": 0.2439,
572
- "step": 820
573
- },
574
- {
575
- "epoch": 1.77,
576
- "learning_rate": 0.0001115138592750533,
577
- "loss": 0.1582,
578
- "step": 830
579
- },
580
- {
581
- "epoch": 1.79,
582
- "learning_rate": 0.00011044776119402987,
583
- "loss": 0.1514,
584
- "step": 840
585
- },
586
- {
587
- "epoch": 1.81,
588
- "learning_rate": 0.00010938166311300639,
589
- "loss": 0.302,
590
- "step": 850
591
- },
592
- {
593
- "epoch": 1.83,
594
- "learning_rate": 0.00010831556503198295,
595
- "loss": 0.1793,
596
- "step": 860
597
- },
598
- {
599
- "epoch": 1.86,
600
- "learning_rate": 0.0001072494669509595,
601
- "loss": 0.2064,
602
- "step": 870
603
- },
604
- {
605
- "epoch": 1.88,
606
- "learning_rate": 0.00010618336886993603,
607
- "loss": 0.1789,
608
- "step": 880
609
- },
610
- {
611
- "epoch": 1.9,
612
- "learning_rate": 0.00010511727078891258,
613
- "loss": 0.2323,
614
- "step": 890
615
- },
616
- {
617
- "epoch": 1.92,
618
- "learning_rate": 0.00010405117270788914,
619
- "loss": 0.1399,
620
- "step": 900
621
- },
622
- {
623
- "epoch": 1.92,
624
- "eval_accuracy": 0.9108,
625
- "eval_loss": 0.31127122044563293,
626
- "eval_runtime": 903.7995,
627
- "eval_samples_per_second": 2.766,
628
- "eval_steps_per_second": 0.346,
629
- "step": 900
630
- },
631
- {
632
- "epoch": 1.94,
633
- "learning_rate": 0.00010298507462686569,
634
- "loss": 0.2957,
635
- "step": 910
636
- },
637
- {
638
- "epoch": 1.96,
639
- "learning_rate": 0.00010191897654584222,
640
- "loss": 0.2673,
641
- "step": 920
642
- },
643
- {
644
- "epoch": 1.98,
645
- "learning_rate": 0.00010085287846481877,
646
- "loss": 0.2396,
647
- "step": 930
648
- },
649
- {
650
- "epoch": 2.0,
651
- "learning_rate": 9.978678038379531e-05,
652
- "loss": 0.1854,
653
- "step": 940
654
- },
655
- {
656
- "epoch": 2.03,
657
- "learning_rate": 9.872068230277186e-05,
658
- "loss": 0.0911,
659
- "step": 950
660
- },
661
- {
662
- "epoch": 2.05,
663
- "learning_rate": 9.76545842217484e-05,
664
- "loss": 0.0954,
665
- "step": 960
666
- },
667
- {
668
- "epoch": 2.07,
669
- "learning_rate": 9.658848614072496e-05,
670
- "loss": 0.0634,
671
- "step": 970
672
- },
673
- {
674
- "epoch": 2.09,
675
- "learning_rate": 9.552238805970149e-05,
676
- "loss": 0.1001,
677
- "step": 980
678
- },
679
- {
680
- "epoch": 2.11,
681
- "learning_rate": 9.445628997867805e-05,
682
- "loss": 0.076,
683
- "step": 990
684
- },
685
- {
686
- "epoch": 2.13,
687
- "learning_rate": 9.339019189765458e-05,
688
- "loss": 0.0501,
689
- "step": 1000
690
- },
691
- {
692
- "epoch": 2.13,
693
- "eval_accuracy": 0.9004,
694
- "eval_loss": 0.33236250281333923,
695
- "eval_runtime": 901.7876,
696
- "eval_samples_per_second": 2.772,
697
- "eval_steps_per_second": 0.347,
698
- "step": 1000
699
- },
700
- {
701
- "epoch": 2.15,
702
- "learning_rate": 9.232409381663113e-05,
703
- "loss": 0.1246,
704
- "step": 1010
705
- },
706
- {
707
- "epoch": 2.17,
708
- "learning_rate": 9.125799573560768e-05,
709
- "loss": 0.1432,
710
- "step": 1020
711
- },
712
- {
713
- "epoch": 2.2,
714
- "learning_rate": 9.019189765458423e-05,
715
- "loss": 0.1998,
716
- "step": 1030
717
- },
718
- {
719
- "epoch": 2.22,
720
- "learning_rate": 8.912579957356077e-05,
721
- "loss": 0.1226,
722
- "step": 1040
723
- },
724
- {
725
- "epoch": 2.24,
726
- "learning_rate": 8.805970149253732e-05,
727
- "loss": 0.0544,
728
- "step": 1050
729
- },
730
- {
731
- "epoch": 2.26,
732
- "learning_rate": 8.699360341151387e-05,
733
- "loss": 0.0842,
734
- "step": 1060
735
- },
736
- {
737
- "epoch": 2.28,
738
- "learning_rate": 8.59275053304904e-05,
739
- "loss": 0.063,
740
- "step": 1070
741
- },
742
- {
743
- "epoch": 2.3,
744
- "learning_rate": 8.486140724946695e-05,
745
- "loss": 0.0682,
746
- "step": 1080
747
- },
748
- {
749
- "epoch": 2.32,
750
- "learning_rate": 8.37953091684435e-05,
751
- "loss": 0.0897,
752
- "step": 1090
753
- },
754
- {
755
- "epoch": 2.35,
756
- "learning_rate": 8.272921108742005e-05,
757
- "loss": 0.0873,
758
- "step": 1100
759
- },
760
- {
761
- "epoch": 2.35,
762
- "eval_accuracy": 0.9196,
763
- "eval_loss": 0.28453874588012695,
764
- "eval_runtime": 903.3406,
765
- "eval_samples_per_second": 2.768,
766
- "eval_steps_per_second": 0.346,
767
- "step": 1100
768
- },
769
- {
770
- "epoch": 2.37,
771
- "learning_rate": 8.166311300639659e-05,
772
- "loss": 0.076,
773
- "step": 1110
774
- },
775
- {
776
- "epoch": 2.39,
777
- "learning_rate": 8.059701492537314e-05,
778
- "loss": 0.0576,
779
- "step": 1120
780
- },
781
- {
782
- "epoch": 2.41,
783
- "learning_rate": 7.953091684434967e-05,
784
- "loss": 0.0622,
785
- "step": 1130
786
- },
787
- {
788
- "epoch": 2.43,
789
- "learning_rate": 7.846481876332623e-05,
790
- "loss": 0.0799,
791
- "step": 1140
792
- },
793
- {
794
- "epoch": 2.45,
795
- "learning_rate": 7.739872068230277e-05,
796
- "loss": 0.1253,
797
- "step": 1150
798
- },
799
- {
800
- "epoch": 2.47,
801
- "learning_rate": 7.633262260127933e-05,
802
- "loss": 0.0698,
803
- "step": 1160
804
- },
805
- {
806
- "epoch": 2.49,
807
- "learning_rate": 7.526652452025586e-05,
808
- "loss": 0.1532,
809
- "step": 1170
810
- },
811
- {
812
- "epoch": 2.52,
813
- "learning_rate": 7.420042643923242e-05,
814
- "loss": 0.1728,
815
- "step": 1180
816
- },
817
- {
818
- "epoch": 2.54,
819
- "learning_rate": 7.313432835820896e-05,
820
- "loss": 0.0871,
821
- "step": 1190
822
- },
823
- {
824
- "epoch": 2.56,
825
- "learning_rate": 7.20682302771855e-05,
826
- "loss": 0.0809,
827
- "step": 1200
828
- },
829
- {
830
- "epoch": 2.56,
831
- "eval_accuracy": 0.9104,
832
- "eval_loss": 0.3186958134174347,
833
- "eval_runtime": 903.5081,
834
- "eval_samples_per_second": 2.767,
835
- "eval_steps_per_second": 0.346,
836
- "step": 1200
837
- },
838
- {
839
- "epoch": 2.58,
840
- "learning_rate": 7.100213219616205e-05,
841
- "loss": 0.1217,
842
- "step": 1210
843
- },
844
- {
845
- "epoch": 2.6,
846
- "learning_rate": 6.993603411513859e-05,
847
- "loss": 0.0469,
848
- "step": 1220
849
- },
850
- {
851
- "epoch": 2.62,
852
- "learning_rate": 6.886993603411515e-05,
853
- "loss": 0.0659,
854
- "step": 1230
855
- },
856
- {
857
- "epoch": 2.64,
858
- "learning_rate": 6.780383795309168e-05,
859
- "loss": 0.1019,
860
- "step": 1240
861
- },
862
- {
863
- "epoch": 2.67,
864
- "learning_rate": 6.673773987206824e-05,
865
- "loss": 0.046,
866
- "step": 1250
867
- },
868
- {
869
- "epoch": 2.69,
870
- "learning_rate": 6.567164179104478e-05,
871
- "loss": 0.0203,
872
- "step": 1260
873
- },
874
- {
875
- "epoch": 2.71,
876
- "learning_rate": 6.460554371002132e-05,
877
- "loss": 0.0483,
878
- "step": 1270
879
- },
880
- {
881
- "epoch": 2.73,
882
- "learning_rate": 6.353944562899787e-05,
883
- "loss": 0.0785,
884
- "step": 1280
885
- },
886
- {
887
- "epoch": 2.75,
888
- "learning_rate": 6.247334754797442e-05,
889
- "loss": 0.0358,
890
- "step": 1290
891
- },
892
- {
893
- "epoch": 2.77,
894
- "learning_rate": 6.140724946695097e-05,
895
- "loss": 0.0696,
896
- "step": 1300
897
- },
898
- {
899
- "epoch": 2.77,
900
- "eval_accuracy": 0.9184,
901
- "eval_loss": 0.3137047588825226,
902
- "eval_runtime": 902.4586,
903
- "eval_samples_per_second": 2.77,
904
- "eval_steps_per_second": 0.347,
905
- "step": 1300
906
- },
907
- {
908
- "epoch": 2.79,
909
- "learning_rate": 6.0341151385927506e-05,
910
- "loss": 0.0716,
911
- "step": 1310
912
- },
913
- {
914
- "epoch": 2.81,
915
- "learning_rate": 5.927505330490405e-05,
916
- "loss": 0.0409,
917
- "step": 1320
918
- },
919
- {
920
- "epoch": 2.84,
921
- "learning_rate": 5.82089552238806e-05,
922
- "loss": 0.0595,
923
- "step": 1330
924
  },
925
  {
926
  "epoch": 2.86,
927
  "learning_rate": 5.714285714285714e-05,
928
- "loss": 0.1415,
929
- "step": 1340
930
- },
931
- {
932
- "epoch": 2.88,
933
- "learning_rate": 5.6076759061833696e-05,
934
- "loss": 0.1064,
935
- "step": 1350
936
- },
937
- {
938
- "epoch": 2.9,
939
- "learning_rate": 5.5010660980810236e-05,
940
- "loss": 0.0783,
941
- "step": 1360
942
- },
943
- {
944
- "epoch": 2.92,
945
- "learning_rate": 5.394456289978679e-05,
946
- "loss": 0.0889,
947
- "step": 1370
948
- },
949
- {
950
- "epoch": 2.94,
951
- "learning_rate": 5.287846481876333e-05,
952
- "loss": 0.1406,
953
- "step": 1380
954
- },
955
- {
956
- "epoch": 2.96,
957
- "learning_rate": 5.181236673773987e-05,
958
- "loss": 0.0857,
959
- "step": 1390
960
- },
961
- {
962
- "epoch": 2.99,
963
- "learning_rate": 5.074626865671642e-05,
964
- "loss": 0.0684,
965
- "step": 1400
966
- },
967
- {
968
- "epoch": 2.99,
969
- "eval_accuracy": 0.9208,
970
- "eval_loss": 0.28956088423728943,
971
- "eval_runtime": 904.414,
972
- "eval_samples_per_second": 2.764,
973
- "eval_steps_per_second": 0.346,
974
- "step": 1400
975
- },
976
- {
977
- "epoch": 3.01,
978
- "learning_rate": 4.9680170575692967e-05,
979
- "loss": 0.038,
980
- "step": 1410
981
- },
982
- {
983
- "epoch": 3.03,
984
- "learning_rate": 4.861407249466951e-05,
985
- "loss": 0.0321,
986
- "step": 1420
987
- },
988
- {
989
- "epoch": 3.05,
990
- "learning_rate": 4.7547974413646055e-05,
991
- "loss": 0.0125,
992
- "step": 1430
993
- },
994
- {
995
- "epoch": 3.07,
996
- "learning_rate": 4.64818763326226e-05,
997
- "loss": 0.0197,
998
- "step": 1440
999
- },
1000
- {
1001
- "epoch": 3.09,
1002
- "learning_rate": 4.541577825159915e-05,
1003
- "loss": 0.0289,
1004
- "step": 1450
1005
- },
1006
- {
1007
- "epoch": 3.11,
1008
- "learning_rate": 4.43496801705757e-05,
1009
- "loss": 0.0273,
1010
- "step": 1460
1011
- },
1012
- {
1013
- "epoch": 3.13,
1014
- "learning_rate": 4.328358208955224e-05,
1015
- "loss": 0.0118,
1016
- "step": 1470
1017
- },
1018
- {
1019
- "epoch": 3.16,
1020
- "learning_rate": 4.2217484008528785e-05,
1021
- "loss": 0.0105,
1022
- "step": 1480
1023
- },
1024
- {
1025
- "epoch": 3.18,
1026
- "learning_rate": 4.115138592750533e-05,
1027
- "loss": 0.008,
1028
- "step": 1490
1029
- },
1030
- {
1031
- "epoch": 3.2,
1032
- "learning_rate": 4.008528784648188e-05,
1033
- "loss": 0.0084,
1034
- "step": 1500
1035
- },
1036
- {
1037
- "epoch": 3.2,
1038
- "eval_accuracy": 0.9308,
1039
- "eval_loss": 0.26776641607284546,
1040
- "eval_runtime": 908.0277,
1041
- "eval_samples_per_second": 2.753,
1042
- "eval_steps_per_second": 0.345,
1043
- "step": 1500
1044
- },
1045
- {
1046
- "epoch": 3.22,
1047
- "learning_rate": 3.901918976545843e-05,
1048
- "loss": 0.0076,
1049
- "step": 1510
1050
- },
1051
- {
1052
- "epoch": 3.24,
1053
- "learning_rate": 3.7953091684434974e-05,
1054
- "loss": 0.0087,
1055
- "step": 1520
1056
- },
1057
- {
1058
- "epoch": 3.26,
1059
- "learning_rate": 3.6886993603411515e-05,
1060
- "loss": 0.019,
1061
- "step": 1530
1062
- },
1063
- {
1064
- "epoch": 3.28,
1065
- "learning_rate": 3.582089552238806e-05,
1066
- "loss": 0.0182,
1067
- "step": 1540
1068
- },
1069
- {
1070
- "epoch": 3.3,
1071
- "learning_rate": 3.47547974413646e-05,
1072
- "loss": 0.0401,
1073
- "step": 1550
1074
- },
1075
- {
1076
- "epoch": 3.33,
1077
- "learning_rate": 3.368869936034115e-05,
1078
- "loss": 0.0461,
1079
- "step": 1560
1080
- },
1081
- {
1082
- "epoch": 3.35,
1083
- "learning_rate": 3.26226012793177e-05,
1084
- "loss": 0.0272,
1085
- "step": 1570
1086
- },
1087
- {
1088
- "epoch": 3.37,
1089
- "learning_rate": 3.1556503198294245e-05,
1090
- "loss": 0.0333,
1091
- "step": 1580
1092
- },
1093
- {
1094
- "epoch": 3.39,
1095
- "learning_rate": 3.0490405117270792e-05,
1096
- "loss": 0.0699,
1097
- "step": 1590
1098
- },
1099
- {
1100
- "epoch": 3.41,
1101
- "learning_rate": 2.9424307036247333e-05,
1102
- "loss": 0.0072,
1103
- "step": 1600
1104
- },
1105
- {
1106
- "epoch": 3.41,
1107
- "eval_accuracy": 0.9292,
1108
- "eval_loss": 0.2749463617801666,
1109
- "eval_runtime": 908.1664,
1110
- "eval_samples_per_second": 2.753,
1111
- "eval_steps_per_second": 0.345,
1112
- "step": 1600
1113
- },
1114
- {
1115
- "epoch": 3.43,
1116
- "learning_rate": 2.835820895522388e-05,
1117
- "loss": 0.0238,
1118
- "step": 1610
1119
- },
1120
- {
1121
- "epoch": 3.45,
1122
- "learning_rate": 2.7292110874200428e-05,
1123
- "loss": 0.0098,
1124
- "step": 1620
1125
- },
1126
- {
1127
- "epoch": 3.48,
1128
- "learning_rate": 2.6226012793176975e-05,
1129
- "loss": 0.0162,
1130
- "step": 1630
1131
- },
1132
- {
1133
- "epoch": 3.5,
1134
- "learning_rate": 2.515991471215352e-05,
1135
- "loss": 0.0075,
1136
- "step": 1640
1137
- },
1138
- {
1139
- "epoch": 3.52,
1140
- "learning_rate": 2.4093816631130063e-05,
1141
- "loss": 0.0326,
1142
- "step": 1650
1143
- },
1144
- {
1145
- "epoch": 3.54,
1146
- "learning_rate": 2.302771855010661e-05,
1147
- "loss": 0.0141,
1148
- "step": 1660
1149
- },
1150
- {
1151
- "epoch": 3.56,
1152
- "learning_rate": 2.1961620469083158e-05,
1153
- "loss": 0.0083,
1154
- "step": 1670
1155
- },
1156
- {
1157
- "epoch": 3.58,
1158
- "learning_rate": 2.0895522388059702e-05,
1159
- "loss": 0.011,
1160
- "step": 1680
1161
- },
1162
- {
1163
- "epoch": 3.6,
1164
- "learning_rate": 1.982942430703625e-05,
1165
- "loss": 0.0067,
1166
- "step": 1690
1167
- },
1168
- {
1169
- "epoch": 3.62,
1170
- "learning_rate": 1.8763326226012797e-05,
1171
- "loss": 0.0585,
1172
- "step": 1700
1173
- },
1174
- {
1175
- "epoch": 3.62,
1176
- "eval_accuracy": 0.9316,
1177
- "eval_loss": 0.2734415531158447,
1178
- "eval_runtime": 910.1726,
1179
- "eval_samples_per_second": 2.747,
1180
- "eval_steps_per_second": 0.344,
1181
- "step": 1700
1182
- },
1183
- {
1184
- "epoch": 3.65,
1185
- "learning_rate": 1.769722814498934e-05,
1186
- "loss": 0.007,
1187
- "step": 1710
1188
- },
1189
- {
1190
- "epoch": 3.67,
1191
- "learning_rate": 1.6631130063965885e-05,
1192
- "loss": 0.0061,
1193
- "step": 1720
1194
- },
1195
- {
1196
- "epoch": 3.69,
1197
- "learning_rate": 1.5565031982942432e-05,
1198
- "loss": 0.0061,
1199
- "step": 1730
1200
- },
1201
- {
1202
- "epoch": 3.71,
1203
- "learning_rate": 1.4498933901918976e-05,
1204
- "loss": 0.0159,
1205
- "step": 1740
1206
- },
1207
- {
1208
- "epoch": 3.73,
1209
- "learning_rate": 1.3432835820895523e-05,
1210
- "loss": 0.0154,
1211
- "step": 1750
1212
- },
1213
- {
1214
- "epoch": 3.75,
1215
- "learning_rate": 1.2366737739872069e-05,
1216
- "loss": 0.0071,
1217
- "step": 1760
1218
- },
1219
- {
1220
- "epoch": 3.77,
1221
- "learning_rate": 1.1300639658848615e-05,
1222
- "loss": 0.0067,
1223
- "step": 1770
1224
- },
1225
- {
1226
- "epoch": 3.8,
1227
- "learning_rate": 1.023454157782516e-05,
1228
- "loss": 0.0387,
1229
- "step": 1780
1230
- },
1231
- {
1232
- "epoch": 3.82,
1233
- "learning_rate": 9.168443496801706e-06,
1234
- "loss": 0.0492,
1235
- "step": 1790
1236
- },
1237
- {
1238
- "epoch": 3.84,
1239
- "learning_rate": 8.102345415778252e-06,
1240
- "loss": 0.0152,
1241
- "step": 1800
1242
- },
1243
- {
1244
- "epoch": 3.84,
1245
- "eval_accuracy": 0.9332,
1246
- "eval_loss": 0.26903361082077026,
1247
- "eval_runtime": 906.1705,
1248
- "eval_samples_per_second": 2.759,
1249
- "eval_steps_per_second": 0.345,
1250
- "step": 1800
1251
- },
1252
- {
1253
- "epoch": 3.86,
1254
- "learning_rate": 7.0362473347547975e-06,
1255
- "loss": 0.0054,
1256
- "step": 1810
1257
- },
1258
- {
1259
- "epoch": 3.88,
1260
- "learning_rate": 5.970149253731343e-06,
1261
- "loss": 0.006,
1262
- "step": 1820
1263
- },
1264
- {
1265
- "epoch": 3.9,
1266
- "learning_rate": 4.90405117270789e-06,
1267
- "loss": 0.0257,
1268
- "step": 1830
1269
- },
1270
- {
1271
- "epoch": 3.92,
1272
- "learning_rate": 3.837953091684435e-06,
1273
- "loss": 0.0076,
1274
- "step": 1840
1275
- },
1276
- {
1277
- "epoch": 3.94,
1278
- "learning_rate": 2.771855010660981e-06,
1279
- "loss": 0.0055,
1280
- "step": 1850
1281
- },
1282
- {
1283
- "epoch": 3.97,
1284
- "learning_rate": 1.7057569296375267e-06,
1285
- "loss": 0.0064,
1286
- "step": 1860
1287
- },
1288
- {
1289
- "epoch": 3.99,
1290
- "learning_rate": 6.396588486140725e-07,
1291
- "loss": 0.0054,
1292
- "step": 1870
1293
  },
1294
  {
1295
  "epoch": 4.0,
1296
- "step": 1876,
1297
  "total_flos": 0.0,
1298
- "train_loss": 0.2672756195687917,
1299
- "train_runtime": 47803.3846,
1300
- "train_samples_per_second": 0.628,
1301
- "train_steps_per_second": 0.039
1302
  }
1303
  ],
1304
- "max_steps": 1876,
1305
  "num_train_epochs": 4,
1306
  "total_flos": 0.0,
1307
  "trial_name": null,
 
1
  {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
  "epoch": 4.0,
5
+ "global_step": 28,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  {
11
  "epoch": 1.43,
12
  "learning_rate": 0.00012857142857142858,
13
+ "loss": 0.3956,
14
+ "step": 10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  },
16
  {
17
  "epoch": 2.86,
18
  "learning_rate": 5.714285714285714e-05,
19
+ "loss": 0.0812,
20
+ "step": 20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  },
22
  {
23
  "epoch": 4.0,
24
+ "step": 28,
25
  "total_flos": 0.0,
26
+ "train_loss": 0.18094830640724727,
27
+ "train_runtime": 416.3142,
28
+ "train_samples_per_second": 0.961,
29
+ "train_steps_per_second": 0.067
30
  }
31
  ],
32
+ "max_steps": 28,
33
  "num_train_epochs": 4,
34
  "total_flos": 0.0,
35
  "trial_name": null,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a6b07bb068422c354051a5fc05e6f184ed3cfe074042bae148583b24e1b5f918
3
  size 3256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3eb12cbc835ddd417563643373525c41ded388268170fb56d0f75ed3ddab9f7c
3
  size 3256