|
{ |
|
"best_metric": 0.39916983246803284, |
|
"best_model_checkpoint": "Action_all_10_class/checkpoint-1200", |
|
"epoch": 5.0, |
|
"eval_steps": 100, |
|
"global_step": 1245, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019839357429718877, |
|
"loss": 2.2294, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00019678714859437752, |
|
"loss": 2.1036, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00019518072289156628, |
|
"loss": 1.9019, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00019357429718875504, |
|
"loss": 1.6396, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00019196787148594377, |
|
"loss": 1.5942, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00019036144578313252, |
|
"loss": 1.3722, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00018875502008032128, |
|
"loss": 1.2927, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00018714859437751004, |
|
"loss": 1.2947, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0001855421686746988, |
|
"loss": 1.2353, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00018393574297188755, |
|
"loss": 1.1348, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_accuracy": 0.698005698005698, |
|
"eval_loss": 1.0964313745498657, |
|
"eval_runtime": 13.3411, |
|
"eval_samples_per_second": 52.619, |
|
"eval_steps_per_second": 6.596, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0001823293172690763, |
|
"loss": 1.0305, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00018072289156626507, |
|
"loss": 1.0352, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00017911646586345382, |
|
"loss": 1.0957, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00017751004016064258, |
|
"loss": 1.1747, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00017590361445783134, |
|
"loss": 1.0863, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.0001742971887550201, |
|
"loss": 1.0802, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.00017269076305220885, |
|
"loss": 1.0912, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.0001710843373493976, |
|
"loss": 1.0056, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.00016947791164658636, |
|
"loss": 0.9641, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.00016787148594377512, |
|
"loss": 0.9992, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_accuracy": 0.7948717948717948, |
|
"eval_loss": 0.7362096309661865, |
|
"eval_runtime": 13.2093, |
|
"eval_samples_per_second": 53.145, |
|
"eval_steps_per_second": 6.662, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.00016626506024096388, |
|
"loss": 0.9851, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.00016465863453815263, |
|
"loss": 0.886, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.0001630522088353414, |
|
"loss": 0.9604, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.00016144578313253015, |
|
"loss": 0.7707, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.00016, |
|
"loss": 0.9153, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.00015839357429718874, |
|
"loss": 0.7581, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.0001567871485943775, |
|
"loss": 0.6622, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 0.00015518072289156626, |
|
"loss": 0.7251, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 0.00015357429718875501, |
|
"loss": 0.7955, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.00015196787148594377, |
|
"loss": 0.8314, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_accuracy": 0.8205128205128205, |
|
"eval_loss": 0.6410418748855591, |
|
"eval_runtime": 12.6655, |
|
"eval_samples_per_second": 55.426, |
|
"eval_steps_per_second": 6.948, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.00015036144578313253, |
|
"loss": 0.845, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 0.00014875502008032128, |
|
"loss": 0.7132, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 0.00014714859437751004, |
|
"loss": 0.7161, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 0.0001455421686746988, |
|
"loss": 0.7773, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 0.00014393574297188756, |
|
"loss": 0.7676, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 0.0001423293172690763, |
|
"loss": 0.8516, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 0.00014072289156626507, |
|
"loss": 0.6597, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.00013911646586345383, |
|
"loss": 0.6998, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 0.00013751004016064258, |
|
"loss": 0.8191, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 0.00013590361445783134, |
|
"loss": 0.7359, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"eval_accuracy": 0.8247863247863247, |
|
"eval_loss": 0.5803518891334534, |
|
"eval_runtime": 12.2034, |
|
"eval_samples_per_second": 57.525, |
|
"eval_steps_per_second": 7.211, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 0.0001342971887550201, |
|
"loss": 0.7354, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 0.00013269076305220885, |
|
"loss": 0.7507, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 0.0001310843373493976, |
|
"loss": 0.8088, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 0.00012947791164658637, |
|
"loss": 0.7279, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 0.00012787148594377512, |
|
"loss": 0.7254, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 0.00012626506024096385, |
|
"loss": 0.6707, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 0.0001246586345381526, |
|
"loss": 0.6482, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 0.00012305220883534137, |
|
"loss": 0.7238, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 0.00012144578313253012, |
|
"loss": 0.6626, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 0.00011983935742971888, |
|
"loss": 0.776, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"eval_accuracy": 0.8376068376068376, |
|
"eval_loss": 0.5489934086799622, |
|
"eval_runtime": 12.4349, |
|
"eval_samples_per_second": 56.454, |
|
"eval_steps_per_second": 7.077, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 0.00011823293172690764, |
|
"loss": 0.8069, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 0.0001166265060240964, |
|
"loss": 0.5448, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 0.00011502008032128515, |
|
"loss": 0.7318, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 0.00011341365461847391, |
|
"loss": 0.5083, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 0.00011180722891566267, |
|
"loss": 0.6493, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 0.0001102008032128514, |
|
"loss": 0.4928, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 0.00010859437751004015, |
|
"loss": 0.5395, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 0.00010698795180722891, |
|
"loss": 0.5588, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 0.00010538152610441767, |
|
"loss": 0.5892, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 0.00010377510040160642, |
|
"loss": 0.614, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"eval_accuracy": 0.8504273504273504, |
|
"eval_loss": 0.5006864070892334, |
|
"eval_runtime": 12.2103, |
|
"eval_samples_per_second": 57.493, |
|
"eval_steps_per_second": 7.207, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 0.00010216867469879518, |
|
"loss": 0.7658, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 0.00010056224899598394, |
|
"loss": 0.5152, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 9.89558232931727e-05, |
|
"loss": 0.4941, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 9.734939759036145e-05, |
|
"loss": 0.6831, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 9.574297188755021e-05, |
|
"loss": 0.5971, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 9.413654618473896e-05, |
|
"loss": 0.5842, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 9.253012048192772e-05, |
|
"loss": 0.5976, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 9.092369477911648e-05, |
|
"loss": 0.5051, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 8.931726907630522e-05, |
|
"loss": 0.4737, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 8.771084337349398e-05, |
|
"loss": 0.5484, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"eval_accuracy": 0.8333333333333334, |
|
"eval_loss": 0.5322346091270447, |
|
"eval_runtime": 12.3228, |
|
"eval_samples_per_second": 56.968, |
|
"eval_steps_per_second": 7.141, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 8.610441767068274e-05, |
|
"loss": 0.4436, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 8.449799196787149e-05, |
|
"loss": 0.6452, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 8.289156626506025e-05, |
|
"loss": 0.5724, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 8.128514056224899e-05, |
|
"loss": 0.3933, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 7.967871485943775e-05, |
|
"loss": 0.5753, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 7.80722891566265e-05, |
|
"loss": 0.4426, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 7.646586345381526e-05, |
|
"loss": 0.5442, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 7.485943775100402e-05, |
|
"loss": 0.4839, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 7.325301204819278e-05, |
|
"loss": 0.3711, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 7.164658634538153e-05, |
|
"loss": 0.3844, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"eval_accuracy": 0.8518518518518519, |
|
"eval_loss": 0.5012323260307312, |
|
"eval_runtime": 12.9006, |
|
"eval_samples_per_second": 54.416, |
|
"eval_steps_per_second": 6.821, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 7.004016064257029e-05, |
|
"loss": 0.513, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"learning_rate": 6.843373493975905e-05, |
|
"loss": 0.3505, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 6.68273092369478e-05, |
|
"loss": 0.4022, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 6.522088353413655e-05, |
|
"loss": 0.3574, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 6.36144578313253e-05, |
|
"loss": 0.5703, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 6.200803212851406e-05, |
|
"loss": 0.4585, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 6.040160642570282e-05, |
|
"loss": 0.3114, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 5.8795180722891576e-05, |
|
"loss": 0.3508, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 5.718875502008032e-05, |
|
"loss": 0.3614, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 5.5582329317269076e-05, |
|
"loss": 0.5681, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"eval_accuracy": 0.8589743589743589, |
|
"eval_loss": 0.49228861927986145, |
|
"eval_runtime": 13.1999, |
|
"eval_samples_per_second": 53.182, |
|
"eval_steps_per_second": 6.667, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 5.397590361445783e-05, |
|
"loss": 0.5074, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 5.236947791164659e-05, |
|
"loss": 0.3371, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 5.076305220883535e-05, |
|
"loss": 0.5226, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"learning_rate": 4.9156626506024104e-05, |
|
"loss": 0.4462, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"learning_rate": 4.7550200803212854e-05, |
|
"loss": 0.4781, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"learning_rate": 4.594377510040161e-05, |
|
"loss": 0.4001, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 4.433734939759036e-05, |
|
"loss": 0.4788, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 4.273092369477912e-05, |
|
"loss": 0.4561, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 4.1124497991967875e-05, |
|
"loss": 0.2563, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 3.9518072289156625e-05, |
|
"loss": 0.4315, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"eval_accuracy": 0.8774928774928775, |
|
"eval_loss": 0.42575880885124207, |
|
"eval_runtime": 13.0968, |
|
"eval_samples_per_second": 53.601, |
|
"eval_steps_per_second": 6.719, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 3.791164658634538e-05, |
|
"loss": 0.2898, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 3.630522088353414e-05, |
|
"loss": 0.3803, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"learning_rate": 3.4698795180722896e-05, |
|
"loss": 0.2827, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 3.309236947791165e-05, |
|
"loss": 0.3382, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"learning_rate": 3.14859437751004e-05, |
|
"loss": 0.3808, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"learning_rate": 2.987951807228916e-05, |
|
"loss": 0.2551, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 4.3, |
|
"learning_rate": 2.827309236947791e-05, |
|
"loss": 0.3011, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 4.34, |
|
"learning_rate": 2.6666666666666667e-05, |
|
"loss": 0.4091, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"learning_rate": 2.5060240963855423e-05, |
|
"loss": 0.286, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"learning_rate": 2.345381526104418e-05, |
|
"loss": 0.3804, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"eval_accuracy": 0.8675213675213675, |
|
"eval_loss": 0.4324430227279663, |
|
"eval_runtime": 12.1257, |
|
"eval_samples_per_second": 57.894, |
|
"eval_steps_per_second": 7.257, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 4.46, |
|
"learning_rate": 2.1847389558232934e-05, |
|
"loss": 0.2672, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 2.0240963855421687e-05, |
|
"loss": 0.3661, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"learning_rate": 1.863453815261044e-05, |
|
"loss": 0.3126, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 1.7028112449799198e-05, |
|
"loss": 0.3455, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 4.62, |
|
"learning_rate": 1.5421686746987955e-05, |
|
"loss": 0.3604, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"learning_rate": 1.3815261044176708e-05, |
|
"loss": 0.4628, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 1.2208835341365463e-05, |
|
"loss": 0.4074, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"learning_rate": 1.0602409638554217e-05, |
|
"loss": 0.2512, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"learning_rate": 8.995983935742972e-06, |
|
"loss": 0.2974, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 4.82, |
|
"learning_rate": 7.389558232931727e-06, |
|
"loss": 0.2887, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 4.82, |
|
"eval_accuracy": 0.8803418803418803, |
|
"eval_loss": 0.39916983246803284, |
|
"eval_runtime": 12.2383, |
|
"eval_samples_per_second": 57.361, |
|
"eval_steps_per_second": 7.191, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"learning_rate": 5.783132530120483e-06, |
|
"loss": 0.3073, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"learning_rate": 4.176706827309238e-06, |
|
"loss": 0.2825, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"learning_rate": 2.570281124497992e-06, |
|
"loss": 0.288, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 4.98, |
|
"learning_rate": 9.638554216867472e-07, |
|
"loss": 0.3198, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 1245, |
|
"total_flos": 1.539101261655982e+18, |
|
"train_loss": 0.6643937945844658, |
|
"train_runtime": 748.7697, |
|
"train_samples_per_second": 26.524, |
|
"train_steps_per_second": 1.663 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1245, |
|
"num_train_epochs": 5, |
|
"save_steps": 100, |
|
"total_flos": 1.539101261655982e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|