jane102350 commited on
Commit
5ee030d
1 Parent(s): 30539e5

End of training

Browse files
Files changed (2) hide show
  1. README.md +3 -1
  2. trainer_state.json +41 -531
README.md CHANGED
@@ -2,6 +2,8 @@
2
  license: cc-by-nc-4.0
3
  library_name: peft
4
  tags:
 
 
5
  - generated_from_trainer
6
  base_model: facebook/musicgen-melody
7
  model-index:
@@ -14,7 +16,7 @@ should probably proofread and complete it, then remove this comment. -->
14
 
15
  # musicgen-melody-lora-kk-colab
16
 
17
- This model is a fine-tuned version of [facebook/musicgen-melody](https://huggingface.co/facebook/musicgen-melody) on an unknown dataset.
18
 
19
  ## Model description
20
 
 
2
  license: cc-by-nc-4.0
3
  library_name: peft
4
  tags:
5
+ - text-to-audio
6
+ - tiny-kk
7
  - generated_from_trainer
8
  base_model: facebook/musicgen-melody
9
  model-index:
 
16
 
17
  # musicgen-melody-lora-kk-colab
18
 
19
+ This model is a fine-tuned version of [facebook/musicgen-melody](https://huggingface.co/facebook/musicgen-melody) on the jane102350/kk-colab dataset.
20
 
21
  ## Model description
22
 
trainer_state.json CHANGED
@@ -1,587 +1,97 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 15.238095238095237,
5
  "eval_steps": 500,
6
- "global_step": 160,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.19047619047619047,
13
- "grad_norm": 0.9942206144332886,
14
- "learning_rate": 0.00019750000000000003,
15
- "loss": 9.5613,
16
  "step": 2
17
  },
18
  {
19
  "epoch": 0.38095238095238093,
20
- "grad_norm": 1.2790788412094116,
21
- "learning_rate": 0.000195,
22
- "loss": 9.2339,
23
  "step": 4
24
  },
25
  {
26
  "epoch": 0.5714285714285714,
27
- "grad_norm": 1.9939367771148682,
28
- "learning_rate": 0.00019250000000000002,
29
- "loss": 8.7953,
30
  "step": 6
31
  },
32
  {
33
  "epoch": 0.7619047619047619,
34
- "grad_norm": 2.010485887527466,
35
- "learning_rate": 0.00019,
36
- "loss": 8.2168,
37
  "step": 8
38
  },
39
  {
40
  "epoch": 0.9523809523809523,
41
- "grad_norm": 1.352328896522522,
42
- "learning_rate": 0.0001875,
43
- "loss": 7.8941,
44
  "step": 10
45
  },
46
  {
47
  "epoch": 1.1428571428571428,
48
- "grad_norm": 0.9626594185829163,
49
- "learning_rate": 0.00018500000000000002,
50
- "loss": 7.6817,
51
  "step": 12
52
  },
53
  {
54
  "epoch": 1.3333333333333333,
55
- "grad_norm": 1.1568268537521362,
56
- "learning_rate": 0.0001825,
57
- "loss": 7.5131,
58
  "step": 14
59
  },
60
  {
61
  "epoch": 1.5238095238095237,
62
- "grad_norm": 1.0264520645141602,
63
- "learning_rate": 0.00018,
64
- "loss": 7.4247,
65
  "step": 16
66
  },
67
  {
68
  "epoch": 1.7142857142857144,
69
- "grad_norm": 0.9865540862083435,
70
- "learning_rate": 0.0001775,
71
- "loss": 7.4369,
72
  "step": 18
73
  },
74
  {
75
  "epoch": 1.9047619047619047,
76
- "grad_norm": 1.0182702541351318,
77
- "learning_rate": 0.000175,
78
- "loss": 7.3787,
79
  "step": 20
80
  },
81
  {
82
- "epoch": 2.0952380952380953,
83
- "grad_norm": 0.7922359108924866,
84
- "learning_rate": 0.00017250000000000002,
85
- "loss": 7.373,
86
- "step": 22
87
- },
88
- {
89
- "epoch": 2.2857142857142856,
90
- "grad_norm": 0.7033187747001648,
91
- "learning_rate": 0.00017,
92
- "loss": 7.3096,
93
- "step": 24
94
- },
95
- {
96
- "epoch": 2.4761904761904763,
97
- "grad_norm": 2.9758119583129883,
98
- "learning_rate": 0.0001675,
99
- "loss": 7.1991,
100
- "step": 26
101
- },
102
- {
103
- "epoch": 2.6666666666666665,
104
- "grad_norm": 0.7531760931015015,
105
- "learning_rate": 0.000165,
106
- "loss": 7.2661,
107
- "step": 28
108
- },
109
- {
110
- "epoch": 2.857142857142857,
111
- "grad_norm": 1.3790533542633057,
112
- "learning_rate": 0.00016250000000000002,
113
- "loss": 7.2782,
114
- "step": 30
115
- },
116
- {
117
- "epoch": 3.0476190476190474,
118
- "grad_norm": 0.6538093686103821,
119
- "learning_rate": 0.00016,
120
- "loss": 7.2109,
121
- "step": 32
122
- },
123
- {
124
- "epoch": 3.238095238095238,
125
- "grad_norm": 0.6145215630531311,
126
- "learning_rate": 0.0001575,
127
- "loss": 7.2192,
128
- "step": 34
129
- },
130
- {
131
- "epoch": 3.4285714285714284,
132
- "grad_norm": 0.4128475785255432,
133
- "learning_rate": 0.000155,
134
- "loss": 7.2892,
135
- "step": 36
136
- },
137
- {
138
- "epoch": 3.619047619047619,
139
- "grad_norm": 1.0160013437271118,
140
- "learning_rate": 0.0001525,
141
- "loss": 7.2049,
142
- "step": 38
143
- },
144
- {
145
- "epoch": 3.8095238095238093,
146
- "grad_norm": 0.5834835171699524,
147
- "learning_rate": 0.00015000000000000001,
148
- "loss": 7.1672,
149
- "step": 40
150
- },
151
- {
152
- "epoch": 4.0,
153
- "grad_norm": 0.4894554615020752,
154
- "learning_rate": 0.0001475,
155
- "loss": 7.1269,
156
- "step": 42
157
- },
158
- {
159
- "epoch": 4.190476190476191,
160
- "grad_norm": 0.593618631362915,
161
- "learning_rate": 0.000145,
162
- "loss": 7.0175,
163
- "step": 44
164
- },
165
- {
166
- "epoch": 4.380952380952381,
167
- "grad_norm": 1.6190487146377563,
168
- "learning_rate": 0.00014250000000000002,
169
- "loss": 7.2919,
170
- "step": 46
171
- },
172
- {
173
- "epoch": 4.571428571428571,
174
- "grad_norm": 0.755859911441803,
175
- "learning_rate": 0.00014,
176
- "loss": 7.1624,
177
- "step": 48
178
- },
179
- {
180
- "epoch": 4.761904761904762,
181
- "grad_norm": 0.46613645553588867,
182
- "learning_rate": 0.0001375,
183
- "loss": 7.2233,
184
- "step": 50
185
- },
186
- {
187
- "epoch": 4.9523809523809526,
188
- "grad_norm": 0.5973020792007446,
189
- "learning_rate": 0.00013500000000000003,
190
- "loss": 7.1642,
191
- "step": 52
192
- },
193
- {
194
- "epoch": 5.142857142857143,
195
- "grad_norm": 0.97837233543396,
196
- "learning_rate": 0.0001325,
197
- "loss": 7.1172,
198
- "step": 54
199
- },
200
- {
201
- "epoch": 5.333333333333333,
202
- "grad_norm": 0.9348046183586121,
203
- "learning_rate": 0.00013000000000000002,
204
- "loss": 7.1564,
205
- "step": 56
206
- },
207
- {
208
- "epoch": 5.523809523809524,
209
- "grad_norm": 0.6632198691368103,
210
- "learning_rate": 0.0001275,
211
- "loss": 7.0821,
212
- "step": 58
213
- },
214
- {
215
- "epoch": 5.714285714285714,
216
- "grad_norm": 0.7776179909706116,
217
- "learning_rate": 0.000125,
218
- "loss": 7.2272,
219
- "step": 60
220
- },
221
- {
222
- "epoch": 5.904761904761905,
223
- "grad_norm": 0.6282438039779663,
224
- "learning_rate": 0.00012250000000000002,
225
- "loss": 7.0926,
226
- "step": 62
227
- },
228
- {
229
- "epoch": 6.095238095238095,
230
- "grad_norm": 0.6008353233337402,
231
- "learning_rate": 0.00012,
232
- "loss": 7.1073,
233
- "step": 64
234
- },
235
- {
236
- "epoch": 6.285714285714286,
237
- "grad_norm": 0.8796420097351074,
238
- "learning_rate": 0.00011750000000000001,
239
- "loss": 7.1737,
240
- "step": 66
241
- },
242
- {
243
- "epoch": 6.476190476190476,
244
- "grad_norm": 0.6400454640388489,
245
- "learning_rate": 0.00011499999999999999,
246
- "loss": 7.0924,
247
- "step": 68
248
- },
249
- {
250
- "epoch": 6.666666666666667,
251
- "grad_norm": 0.5479526519775391,
252
- "learning_rate": 0.00011250000000000001,
253
- "loss": 7.1275,
254
- "step": 70
255
- },
256
- {
257
- "epoch": 6.857142857142857,
258
- "grad_norm": 0.5992618203163147,
259
- "learning_rate": 0.00011000000000000002,
260
- "loss": 7.0599,
261
- "step": 72
262
- },
263
- {
264
- "epoch": 7.0476190476190474,
265
- "grad_norm": 0.5336684584617615,
266
- "learning_rate": 0.0001075,
267
- "loss": 7.0206,
268
- "step": 74
269
- },
270
- {
271
- "epoch": 7.238095238095238,
272
- "grad_norm": 0.3991040289402008,
273
- "learning_rate": 0.000105,
274
- "loss": 7.0123,
275
- "step": 76
276
- },
277
- {
278
- "epoch": 7.428571428571429,
279
- "grad_norm": 1.032917857170105,
280
- "learning_rate": 0.0001025,
281
- "loss": 7.0267,
282
- "step": 78
283
- },
284
- {
285
- "epoch": 7.619047619047619,
286
- "grad_norm": 0.5554404854774475,
287
- "learning_rate": 0.0001,
288
- "loss": 7.0203,
289
- "step": 80
290
- },
291
- {
292
- "epoch": 7.809523809523809,
293
- "grad_norm": 0.7755109667778015,
294
- "learning_rate": 9.75e-05,
295
- "loss": 7.1445,
296
- "step": 82
297
- },
298
- {
299
- "epoch": 8.0,
300
- "grad_norm": 1.8295842409133911,
301
- "learning_rate": 9.5e-05,
302
- "loss": 7.0002,
303
- "step": 84
304
- },
305
- {
306
- "epoch": 8.19047619047619,
307
- "grad_norm": 1.4985620975494385,
308
- "learning_rate": 9.250000000000001e-05,
309
- "loss": 7.0613,
310
- "step": 86
311
- },
312
- {
313
- "epoch": 8.380952380952381,
314
- "grad_norm": 1.0733778476715088,
315
- "learning_rate": 9e-05,
316
- "loss": 7.0594,
317
- "step": 88
318
- },
319
- {
320
- "epoch": 8.571428571428571,
321
- "grad_norm": 0.7009026408195496,
322
- "learning_rate": 8.75e-05,
323
- "loss": 6.9432,
324
- "step": 90
325
- },
326
- {
327
- "epoch": 8.761904761904763,
328
- "grad_norm": 1.195196509361267,
329
- "learning_rate": 8.5e-05,
330
- "loss": 6.9266,
331
- "step": 92
332
- },
333
- {
334
- "epoch": 8.952380952380953,
335
- "grad_norm": 2.6835684776306152,
336
- "learning_rate": 8.25e-05,
337
- "loss": 6.9855,
338
- "step": 94
339
- },
340
- {
341
- "epoch": 9.142857142857142,
342
- "grad_norm": 0.7434377670288086,
343
- "learning_rate": 8e-05,
344
- "loss": 6.7975,
345
- "step": 96
346
- },
347
- {
348
- "epoch": 9.333333333333334,
349
- "grad_norm": 0.5993837118148804,
350
- "learning_rate": 7.75e-05,
351
- "loss": 7.0476,
352
- "step": 98
353
- },
354
- {
355
- "epoch": 9.523809523809524,
356
- "grad_norm": 0.4656153619289398,
357
- "learning_rate": 7.500000000000001e-05,
358
- "loss": 6.9894,
359
- "step": 100
360
- },
361
- {
362
- "epoch": 9.714285714285714,
363
- "grad_norm": 0.7926774621009827,
364
- "learning_rate": 7.25e-05,
365
- "loss": 6.9854,
366
- "step": 102
367
- },
368
- {
369
- "epoch": 9.904761904761905,
370
- "grad_norm": 1.0828678607940674,
371
- "learning_rate": 7e-05,
372
- "loss": 6.9185,
373
- "step": 104
374
- },
375
- {
376
- "epoch": 10.095238095238095,
377
- "grad_norm": 0.6923830509185791,
378
- "learning_rate": 6.750000000000001e-05,
379
- "loss": 6.9804,
380
- "step": 106
381
- },
382
- {
383
- "epoch": 10.285714285714286,
384
- "grad_norm": 0.5546735525131226,
385
- "learning_rate": 6.500000000000001e-05,
386
- "loss": 6.9273,
387
- "step": 108
388
- },
389
- {
390
- "epoch": 10.476190476190476,
391
- "grad_norm": 0.8265076875686646,
392
- "learning_rate": 6.25e-05,
393
- "loss": 6.9087,
394
- "step": 110
395
- },
396
- {
397
- "epoch": 10.666666666666666,
398
- "grad_norm": 0.3945198655128479,
399
- "learning_rate": 6e-05,
400
- "loss": 6.9375,
401
- "step": 112
402
- },
403
- {
404
- "epoch": 10.857142857142858,
405
- "grad_norm": 0.5948878526687622,
406
- "learning_rate": 5.7499999999999995e-05,
407
- "loss": 6.8764,
408
- "step": 114
409
- },
410
- {
411
- "epoch": 11.047619047619047,
412
- "grad_norm": 0.7741471529006958,
413
- "learning_rate": 5.500000000000001e-05,
414
- "loss": 6.7551,
415
- "step": 116
416
- },
417
- {
418
- "epoch": 11.238095238095237,
419
- "grad_norm": 0.32554784417152405,
420
- "learning_rate": 5.25e-05,
421
- "loss": 6.8862,
422
- "step": 118
423
- },
424
- {
425
- "epoch": 11.428571428571429,
426
- "grad_norm": 0.5033702850341797,
427
- "learning_rate": 5e-05,
428
- "loss": 6.7297,
429
- "step": 120
430
- },
431
- {
432
- "epoch": 11.619047619047619,
433
- "grad_norm": 0.5291158556938171,
434
- "learning_rate": 4.75e-05,
435
- "loss": 6.9826,
436
- "step": 122
437
- },
438
- {
439
- "epoch": 11.80952380952381,
440
- "grad_norm": 0.39498385787010193,
441
- "learning_rate": 4.5e-05,
442
- "loss": 6.837,
443
- "step": 124
444
- },
445
- {
446
- "epoch": 12.0,
447
- "grad_norm": 0.4402136206626892,
448
- "learning_rate": 4.25e-05,
449
- "loss": 7.0434,
450
- "step": 126
451
- },
452
- {
453
- "epoch": 12.19047619047619,
454
- "grad_norm": 0.6476764678955078,
455
- "learning_rate": 4e-05,
456
- "loss": 6.8524,
457
- "step": 128
458
- },
459
- {
460
- "epoch": 12.380952380952381,
461
- "grad_norm": 0.330609530210495,
462
- "learning_rate": 3.7500000000000003e-05,
463
- "loss": 6.8742,
464
- "step": 130
465
- },
466
- {
467
- "epoch": 12.571428571428571,
468
- "grad_norm": 0.5420040488243103,
469
- "learning_rate": 3.5e-05,
470
- "loss": 6.7931,
471
- "step": 132
472
- },
473
- {
474
- "epoch": 12.761904761904763,
475
- "grad_norm": 0.3482373356819153,
476
- "learning_rate": 3.2500000000000004e-05,
477
- "loss": 6.883,
478
- "step": 134
479
- },
480
- {
481
- "epoch": 12.952380952380953,
482
- "grad_norm": 0.3476051092147827,
483
- "learning_rate": 3e-05,
484
- "loss": 6.9857,
485
- "step": 136
486
- },
487
- {
488
- "epoch": 13.142857142857142,
489
- "grad_norm": 0.43590274453163147,
490
- "learning_rate": 2.8749999999999997e-05,
491
- "loss": 8.116,
492
- "step": 138
493
- },
494
- {
495
- "epoch": 13.333333333333334,
496
- "grad_norm": 0.2993098497390747,
497
- "learning_rate": 2.625e-05,
498
- "loss": 6.657,
499
- "step": 140
500
- },
501
- {
502
- "epoch": 13.523809523809524,
503
- "grad_norm": 0.3477262556552887,
504
- "learning_rate": 2.375e-05,
505
- "loss": 6.9781,
506
- "step": 142
507
- },
508
- {
509
- "epoch": 13.714285714285714,
510
- "grad_norm": 0.47370073199272156,
511
- "learning_rate": 2.125e-05,
512
- "loss": 6.9277,
513
- "step": 144
514
- },
515
- {
516
- "epoch": 13.904761904761905,
517
- "grad_norm": 0.3924289345741272,
518
- "learning_rate": 1.8750000000000002e-05,
519
- "loss": 6.8967,
520
- "step": 146
521
- },
522
- {
523
- "epoch": 14.095238095238095,
524
- "grad_norm": 0.5621922612190247,
525
- "learning_rate": 1.6250000000000002e-05,
526
- "loss": 6.7197,
527
- "step": 148
528
- },
529
- {
530
- "epoch": 14.285714285714286,
531
- "grad_norm": 0.3454875349998474,
532
- "learning_rate": 1.3750000000000002e-05,
533
- "loss": 6.9314,
534
- "step": 150
535
- },
536
- {
537
- "epoch": 14.476190476190476,
538
- "grad_norm": 0.3146642744541168,
539
- "learning_rate": 1.125e-05,
540
- "loss": 6.9142,
541
- "step": 152
542
- },
543
- {
544
- "epoch": 14.666666666666666,
545
- "grad_norm": 0.3762160837650299,
546
- "learning_rate": 8.75e-06,
547
- "loss": 6.8759,
548
- "step": 154
549
- },
550
- {
551
- "epoch": 14.857142857142858,
552
- "grad_norm": 0.33906954526901245,
553
- "learning_rate": 6.25e-06,
554
- "loss": 6.8712,
555
- "step": 156
556
- },
557
- {
558
- "epoch": 15.047619047619047,
559
- "grad_norm": 0.3414846360683441,
560
- "learning_rate": 3.75e-06,
561
- "loss": 6.737,
562
- "step": 158
563
- },
564
- {
565
- "epoch": 15.238095238095237,
566
- "grad_norm": 0.4463809132575989,
567
- "learning_rate": 1.25e-06,
568
- "loss": 6.9144,
569
- "step": 160
570
- },
571
- {
572
- "epoch": 15.238095238095237,
573
- "step": 160,
574
- "total_flos": 800861569170024.0,
575
- "train_loss": 7.170098584890366,
576
- "train_runtime": 677.1666,
577
- "train_samples_per_second": 3.969,
578
  "train_steps_per_second": 0.236
579
  }
580
  ],
581
  "logging_steps": 2,
582
- "max_steps": 160,
583
  "num_input_tokens_seen": 0,
584
- "num_train_epochs": 16,
585
  "save_steps": 500,
586
  "stateful_callbacks": {
587
  "TrainerControl": {
@@ -595,7 +105,7 @@
595
  "attributes": {}
596
  }
597
  },
598
- "total_flos": 800861569170024.0,
599
  "train_batch_size": 2,
600
  "trial_name": null,
601
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.9047619047619047,
5
  "eval_steps": 500,
6
+ "global_step": 20,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.19047619047619047,
13
+ "grad_norm": 2.075279951095581,
14
+ "learning_rate": 0.00018,
15
+ "loss": 9.5584,
16
  "step": 2
17
  },
18
  {
19
  "epoch": 0.38095238095238093,
20
+ "grad_norm": 1.2216403484344482,
21
+ "learning_rate": 0.00016,
22
+ "loss": 9.2621,
23
  "step": 4
24
  },
25
  {
26
  "epoch": 0.5714285714285714,
27
+ "grad_norm": 1.9454573392868042,
28
+ "learning_rate": 0.00014,
29
+ "loss": 8.8941,
30
  "step": 6
31
  },
32
  {
33
  "epoch": 0.7619047619047619,
34
+ "grad_norm": 2.4250237941741943,
35
+ "learning_rate": 0.00012,
36
+ "loss": 8.4539,
37
  "step": 8
38
  },
39
  {
40
  "epoch": 0.9523809523809523,
41
+ "grad_norm": 1.720076560974121,
42
+ "learning_rate": 0.0001,
43
+ "loss": 8.1194,
44
  "step": 10
45
  },
46
  {
47
  "epoch": 1.1428571428571428,
48
+ "grad_norm": 1.4687391519546509,
49
+ "learning_rate": 8e-05,
50
+ "loss": 7.9072,
51
  "step": 12
52
  },
53
  {
54
  "epoch": 1.3333333333333333,
55
+ "grad_norm": 1.1075403690338135,
56
+ "learning_rate": 6e-05,
57
+ "loss": 7.7305,
58
  "step": 14
59
  },
60
  {
61
  "epoch": 1.5238095238095237,
62
+ "grad_norm": 1.532745361328125,
63
+ "learning_rate": 4e-05,
64
+ "loss": 7.6572,
65
  "step": 16
66
  },
67
  {
68
  "epoch": 1.7142857142857144,
69
+ "grad_norm": 1.2055246829986572,
70
+ "learning_rate": 2e-05,
71
+ "loss": 7.6057,
72
  "step": 18
73
  },
74
  {
75
  "epoch": 1.9047619047619047,
76
+ "grad_norm": 0.9867807030677795,
77
+ "learning_rate": 0.0,
78
+ "loss": 7.5695,
79
  "step": 20
80
  },
81
  {
82
+ "epoch": 1.9047619047619047,
83
+ "step": 20,
84
+ "total_flos": 99814369025136.0,
85
+ "train_loss": 8.27579402923584,
86
+ "train_runtime": 84.8689,
87
+ "train_samples_per_second": 3.959,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  "train_steps_per_second": 0.236
89
  }
90
  ],
91
  "logging_steps": 2,
92
+ "max_steps": 20,
93
  "num_input_tokens_seen": 0,
94
+ "num_train_epochs": 2,
95
  "save_steps": 500,
96
  "stateful_callbacks": {
97
  "TrainerControl": {
 
105
  "attributes": {}
106
  }
107
  },
108
+ "total_flos": 99814369025136.0,
109
  "train_batch_size": 2,
110
  "trial_name": null,
111
  "trial_params": null