|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 19.753086419753085, |
|
"eval_steps": 100, |
|
"global_step": 800, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.24691358024691357, |
|
"grad_norm": 15.961835861206055, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.6418, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.49382716049382713, |
|
"grad_norm": 8.321958541870117, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 0.6123, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.7407407407407407, |
|
"grad_norm": 10.797492980957031, |
|
"learning_rate": 3e-06, |
|
"loss": 0.5325, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.9876543209876543, |
|
"grad_norm": 8.990424156188965, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 0.3937, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.2345679012345678, |
|
"grad_norm": 5.069626331329346, |
|
"learning_rate": 5e-06, |
|
"loss": 0.2157, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.4814814814814814, |
|
"grad_norm": 2.54313063621521, |
|
"learning_rate": 6e-06, |
|
"loss": 0.1184, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.7283950617283952, |
|
"grad_norm": 1.0293046236038208, |
|
"learning_rate": 7e-06, |
|
"loss": 0.0469, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.9753086419753085, |
|
"grad_norm": 0.32547664642333984, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 0.0176, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 2.2222222222222223, |
|
"grad_norm": 0.20642875134944916, |
|
"learning_rate": 9e-06, |
|
"loss": 0.0088, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 2.4691358024691357, |
|
"grad_norm": 0.16824281215667725, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0056, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.4691358024691357, |
|
"eval_accuracy": 1.0, |
|
"eval_accuracy_label_GD622:Null": 1.0, |
|
"eval_accuracy_label_GD622:YES": 1.0, |
|
"eval_f1": 1.0, |
|
"eval_loss": 0.004177506547421217, |
|
"eval_precision": 1.0, |
|
"eval_recall": 1.0, |
|
"eval_runtime": 0.7278, |
|
"eval_samples_per_second": 133.281, |
|
"eval_steps_per_second": 9.618, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.7160493827160495, |
|
"grad_norm": 0.10963490605354309, |
|
"learning_rate": 1.1000000000000001e-05, |
|
"loss": 0.0038, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 2.962962962962963, |
|
"grad_norm": 0.07338671386241913, |
|
"learning_rate": 1.2e-05, |
|
"loss": 0.0029, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 3.2098765432098766, |
|
"grad_norm": 0.0706721693277359, |
|
"learning_rate": 1.3000000000000001e-05, |
|
"loss": 0.0024, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 3.45679012345679, |
|
"grad_norm": 0.06313496083021164, |
|
"learning_rate": 1.4e-05, |
|
"loss": 0.002, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 3.7037037037037037, |
|
"grad_norm": 0.063168965280056, |
|
"learning_rate": 1.5000000000000002e-05, |
|
"loss": 0.0018, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 3.950617283950617, |
|
"grad_norm": 0.05419662222266197, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 0.0016, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 4.197530864197531, |
|
"grad_norm": 0.047441959381103516, |
|
"learning_rate": 1.7e-05, |
|
"loss": 0.0014, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 4.444444444444445, |
|
"grad_norm": 0.04058253392577171, |
|
"learning_rate": 1.8e-05, |
|
"loss": 0.0012, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 4.6913580246913575, |
|
"grad_norm": 0.029308538883924484, |
|
"learning_rate": 1.9e-05, |
|
"loss": 0.0011, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 4.938271604938271, |
|
"grad_norm": 0.027978356927633286, |
|
"learning_rate": 2e-05, |
|
"loss": 0.001, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 4.938271604938271, |
|
"eval_accuracy": 1.0, |
|
"eval_accuracy_label_GD622:Null": 1.0, |
|
"eval_accuracy_label_GD622:YES": 1.0, |
|
"eval_f1": 1.0, |
|
"eval_loss": 0.0008954937802627683, |
|
"eval_precision": 1.0, |
|
"eval_recall": 1.0, |
|
"eval_runtime": 0.521, |
|
"eval_samples_per_second": 186.189, |
|
"eval_steps_per_second": 13.436, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 5.185185185185185, |
|
"grad_norm": 0.023471660912036896, |
|
"learning_rate": 1.9666666666666666e-05, |
|
"loss": 0.0009, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 5.432098765432099, |
|
"grad_norm": 0.021547624841332436, |
|
"learning_rate": 1.9333333333333333e-05, |
|
"loss": 0.0008, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 5.679012345679013, |
|
"grad_norm": 0.02687031961977482, |
|
"learning_rate": 1.9e-05, |
|
"loss": 0.0007, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 5.925925925925926, |
|
"grad_norm": 0.021016767248511314, |
|
"learning_rate": 1.866666666666667e-05, |
|
"loss": 0.0006, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 6.172839506172839, |
|
"grad_norm": 0.017553668469190598, |
|
"learning_rate": 1.8333333333333333e-05, |
|
"loss": 0.0006, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 6.419753086419753, |
|
"grad_norm": 0.016819961369037628, |
|
"learning_rate": 1.8e-05, |
|
"loss": 0.0006, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 6.666666666666667, |
|
"grad_norm": 0.018349776044487953, |
|
"learning_rate": 1.7666666666666668e-05, |
|
"loss": 0.0005, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 6.91358024691358, |
|
"grad_norm": 0.01844148337841034, |
|
"learning_rate": 1.7333333333333336e-05, |
|
"loss": 0.0005, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 7.160493827160494, |
|
"grad_norm": 0.016825733706355095, |
|
"learning_rate": 1.7e-05, |
|
"loss": 0.0005, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 7.407407407407407, |
|
"grad_norm": 0.013994095847010612, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 0.0005, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 7.407407407407407, |
|
"eval_accuracy": 1.0, |
|
"eval_accuracy_label_GD622:Null": 1.0, |
|
"eval_accuracy_label_GD622:YES": 1.0, |
|
"eval_f1": 1.0, |
|
"eval_loss": 0.00043683411786332726, |
|
"eval_precision": 1.0, |
|
"eval_recall": 1.0, |
|
"eval_runtime": 0.5186, |
|
"eval_samples_per_second": 187.027, |
|
"eval_steps_per_second": 13.497, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 7.654320987654321, |
|
"grad_norm": 0.015141790732741356, |
|
"learning_rate": 1.6333333333333335e-05, |
|
"loss": 0.0004, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 7.901234567901234, |
|
"grad_norm": 0.013697362504899502, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 0.0004, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 8.148148148148149, |
|
"grad_norm": 0.01174458209425211, |
|
"learning_rate": 1.5666666666666667e-05, |
|
"loss": 0.0004, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 8.395061728395062, |
|
"grad_norm": 0.013013974763453007, |
|
"learning_rate": 1.5333333333333334e-05, |
|
"loss": 0.0004, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 8.641975308641975, |
|
"grad_norm": 0.011320522986352444, |
|
"learning_rate": 1.5000000000000002e-05, |
|
"loss": 0.0004, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 8.88888888888889, |
|
"grad_norm": 0.010261823423206806, |
|
"learning_rate": 1.4666666666666666e-05, |
|
"loss": 0.0003, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 9.135802469135802, |
|
"grad_norm": 0.009858865290880203, |
|
"learning_rate": 1.4333333333333334e-05, |
|
"loss": 0.0003, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 9.382716049382717, |
|
"grad_norm": 0.009954158216714859, |
|
"learning_rate": 1.4e-05, |
|
"loss": 0.0003, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 9.62962962962963, |
|
"grad_norm": 0.010688001289963722, |
|
"learning_rate": 1.3666666666666667e-05, |
|
"loss": 0.0003, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 9.876543209876543, |
|
"grad_norm": 0.009526471607387066, |
|
"learning_rate": 1.3333333333333333e-05, |
|
"loss": 0.0003, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 9.876543209876543, |
|
"eval_accuracy": 1.0, |
|
"eval_accuracy_label_GD622:Null": 1.0, |
|
"eval_accuracy_label_GD622:YES": 1.0, |
|
"eval_f1": 1.0, |
|
"eval_loss": 0.0002707206876948476, |
|
"eval_precision": 1.0, |
|
"eval_recall": 1.0, |
|
"eval_runtime": 0.5189, |
|
"eval_samples_per_second": 186.945, |
|
"eval_steps_per_second": 13.491, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 10.123456790123457, |
|
"grad_norm": 0.00999557226896286, |
|
"learning_rate": 1.3000000000000001e-05, |
|
"loss": 0.0003, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 10.37037037037037, |
|
"grad_norm": 0.009755443781614304, |
|
"learning_rate": 1.2666666666666667e-05, |
|
"loss": 0.0003, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 10.617283950617283, |
|
"grad_norm": 0.008844558149576187, |
|
"learning_rate": 1.2333333333333334e-05, |
|
"loss": 0.0003, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 10.864197530864198, |
|
"grad_norm": 0.00740455137565732, |
|
"learning_rate": 1.2e-05, |
|
"loss": 0.0003, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 11.11111111111111, |
|
"grad_norm": 0.007182607427239418, |
|
"learning_rate": 1.1666666666666668e-05, |
|
"loss": 0.0002, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 11.358024691358025, |
|
"grad_norm": 0.007493776269257069, |
|
"learning_rate": 1.1333333333333334e-05, |
|
"loss": 0.0003, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 11.604938271604938, |
|
"grad_norm": 0.008535212837159634, |
|
"learning_rate": 1.1000000000000001e-05, |
|
"loss": 0.0002, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 11.851851851851851, |
|
"grad_norm": 0.007039290387183428, |
|
"learning_rate": 1.0666666666666667e-05, |
|
"loss": 0.0002, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 12.098765432098766, |
|
"grad_norm": 0.007746797055006027, |
|
"learning_rate": 1.0333333333333335e-05, |
|
"loss": 0.0002, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 12.345679012345679, |
|
"grad_norm": 0.008360541425645351, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0002, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 12.345679012345679, |
|
"eval_accuracy": 1.0, |
|
"eval_accuracy_label_GD622:Null": 1.0, |
|
"eval_accuracy_label_GD622:YES": 1.0, |
|
"eval_f1": 1.0, |
|
"eval_loss": 0.00022185646230354905, |
|
"eval_precision": 1.0, |
|
"eval_recall": 1.0, |
|
"eval_runtime": 0.5185, |
|
"eval_samples_per_second": 187.093, |
|
"eval_steps_per_second": 13.502, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 12.592592592592592, |
|
"grad_norm": 0.006791314110159874, |
|
"learning_rate": 9.666666666666667e-06, |
|
"loss": 0.0002, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 12.839506172839506, |
|
"grad_norm": 0.007602753583341837, |
|
"learning_rate": 9.333333333333334e-06, |
|
"loss": 0.0002, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 13.08641975308642, |
|
"grad_norm": 0.0071947514079511166, |
|
"learning_rate": 9e-06, |
|
"loss": 0.0002, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 13.333333333333334, |
|
"grad_norm": 0.007956212386488914, |
|
"learning_rate": 8.666666666666668e-06, |
|
"loss": 0.0002, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 13.580246913580247, |
|
"grad_norm": 0.007944190874695778, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 0.0002, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 13.82716049382716, |
|
"grad_norm": 0.007252030540257692, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 0.0002, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 14.074074074074074, |
|
"grad_norm": 0.007420521695166826, |
|
"learning_rate": 7.666666666666667e-06, |
|
"loss": 0.0002, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 14.320987654320987, |
|
"grad_norm": 0.007615529000759125, |
|
"learning_rate": 7.333333333333333e-06, |
|
"loss": 0.0002, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 14.567901234567902, |
|
"grad_norm": 0.007768448442220688, |
|
"learning_rate": 7e-06, |
|
"loss": 0.0002, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 14.814814814814815, |
|
"grad_norm": 0.005428287200629711, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 0.0002, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 14.814814814814815, |
|
"eval_accuracy": 1.0, |
|
"eval_accuracy_label_GD622:Null": 1.0, |
|
"eval_accuracy_label_GD622:YES": 1.0, |
|
"eval_f1": 1.0, |
|
"eval_loss": 0.00019342350424267352, |
|
"eval_precision": 1.0, |
|
"eval_recall": 1.0, |
|
"eval_runtime": 0.5188, |
|
"eval_samples_per_second": 186.984, |
|
"eval_steps_per_second": 13.494, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 15.061728395061728, |
|
"grad_norm": 0.005406714044511318, |
|
"learning_rate": 6.333333333333333e-06, |
|
"loss": 0.0002, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 15.308641975308642, |
|
"grad_norm": 0.007229079958051443, |
|
"learning_rate": 6e-06, |
|
"loss": 0.0002, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 15.555555555555555, |
|
"grad_norm": 0.006031244061887264, |
|
"learning_rate": 5.666666666666667e-06, |
|
"loss": 0.0002, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 15.802469135802468, |
|
"grad_norm": 0.0075646815821528435, |
|
"learning_rate": 5.333333333333334e-06, |
|
"loss": 0.0002, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 16.049382716049383, |
|
"grad_norm": 0.006907324306666851, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0002, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 16.296296296296298, |
|
"grad_norm": 0.005848431494086981, |
|
"learning_rate": 4.666666666666667e-06, |
|
"loss": 0.0002, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 16.54320987654321, |
|
"grad_norm": 0.007128111552447081, |
|
"learning_rate": 4.333333333333334e-06, |
|
"loss": 0.0002, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 16.790123456790123, |
|
"grad_norm": 0.00657699815928936, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 0.0002, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 17.037037037037038, |
|
"grad_norm": 0.005719279404729605, |
|
"learning_rate": 3.6666666666666666e-06, |
|
"loss": 0.0002, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 17.28395061728395, |
|
"grad_norm": 0.0061570280231535435, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 0.0002, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 17.28395061728395, |
|
"eval_accuracy": 1.0, |
|
"eval_accuracy_label_GD622:Null": 1.0, |
|
"eval_accuracy_label_GD622:YES": 1.0, |
|
"eval_f1": 1.0, |
|
"eval_loss": 0.0001789480447769165, |
|
"eval_precision": 1.0, |
|
"eval_recall": 1.0, |
|
"eval_runtime": 0.5201, |
|
"eval_samples_per_second": 186.493, |
|
"eval_steps_per_second": 13.458, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 17.530864197530864, |
|
"grad_norm": 0.006241227500140667, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0002, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 17.77777777777778, |
|
"grad_norm": 0.006561820395290852, |
|
"learning_rate": 2.666666666666667e-06, |
|
"loss": 0.0002, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 18.02469135802469, |
|
"grad_norm": 0.00643093092367053, |
|
"learning_rate": 2.3333333333333336e-06, |
|
"loss": 0.0002, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 18.271604938271604, |
|
"grad_norm": 0.005693737417459488, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 0.0002, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 18.51851851851852, |
|
"grad_norm": 0.0065653519704937935, |
|
"learning_rate": 1.6666666666666667e-06, |
|
"loss": 0.0002, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 18.765432098765434, |
|
"grad_norm": 0.004837734624743462, |
|
"learning_rate": 1.3333333333333334e-06, |
|
"loss": 0.0002, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 19.012345679012345, |
|
"grad_norm": 0.005498081911355257, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.0002, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 19.25925925925926, |
|
"grad_norm": 0.0063169412314891815, |
|
"learning_rate": 6.666666666666667e-07, |
|
"loss": 0.0002, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 19.506172839506174, |
|
"grad_norm": 0.00681178318336606, |
|
"learning_rate": 3.3333333333333335e-07, |
|
"loss": 0.0002, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 19.753086419753085, |
|
"grad_norm": 0.006289825774729252, |
|
"learning_rate": 0.0, |
|
"loss": 0.0002, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 19.753086419753085, |
|
"eval_accuracy": 1.0, |
|
"eval_accuracy_label_GD622:Null": 1.0, |
|
"eval_accuracy_label_GD622:YES": 1.0, |
|
"eval_f1": 1.0, |
|
"eval_loss": 0.0001743907341733575, |
|
"eval_precision": 1.0, |
|
"eval_recall": 1.0, |
|
"eval_runtime": 0.5213, |
|
"eval_samples_per_second": 186.078, |
|
"eval_steps_per_second": 13.428, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 19.753086419753085, |
|
"step": 800, |
|
"total_flos": 93696895492200.0, |
|
"train_loss": 0.032879929275804895, |
|
"train_runtime": 407.7795, |
|
"train_samples_per_second": 63.073, |
|
"train_steps_per_second": 1.962 |
|
}, |
|
{ |
|
"epoch": 19.753086419753085, |
|
"eval_accuracy": 1.0, |
|
"eval_accuracy_label_GD622:Null": 1.0, |
|
"eval_accuracy_label_GD622:YES": 1.0, |
|
"eval_f1": 1.0, |
|
"eval_loss": 0.0001743907341733575, |
|
"eval_precision": 1.0, |
|
"eval_recall": 1.0, |
|
"eval_runtime": 0.5104, |
|
"eval_samples_per_second": 190.041, |
|
"eval_steps_per_second": 13.714, |
|
"step": 800 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 800, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 93696895492200.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|