|
{ |
|
"best_metric": 0.6480256915092468, |
|
"best_model_checkpoint": "./croupier-creature-classifier/checkpoint-100", |
|
"epoch": 15.0, |
|
"global_step": 1365, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00019853479853479855, |
|
"loss": 0.2843, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0001970695970695971, |
|
"loss": 0.3895, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00019560439560439562, |
|
"loss": 0.3592, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00019413919413919413, |
|
"loss": 0.3891, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00019267399267399268, |
|
"loss": 0.2862, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00019120879120879122, |
|
"loss": 0.4721, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.00018974358974358974, |
|
"loss": 0.4275, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.00018827838827838828, |
|
"loss": 0.2297, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.00018681318681318683, |
|
"loss": 0.3175, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.00018534798534798537, |
|
"loss": 0.1967, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"eval_accuracy": 0.8058823529411765, |
|
"eval_loss": 0.6480256915092468, |
|
"eval_runtime": 1.3833, |
|
"eval_samples_per_second": 122.894, |
|
"eval_steps_per_second": 15.904, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 0.0001838827838827839, |
|
"loss": 0.2783, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 0.0001824175824175824, |
|
"loss": 0.3196, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 0.00018095238095238095, |
|
"loss": 0.2813, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 0.0001794871794871795, |
|
"loss": 0.2686, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 0.00017802197802197802, |
|
"loss": 0.2325, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 0.00017655677655677656, |
|
"loss": 0.3969, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 0.0001750915750915751, |
|
"loss": 0.1645, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 0.00017362637362637365, |
|
"loss": 0.3043, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 0.00017216117216117217, |
|
"loss": 0.1483, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 0.00017069597069597068, |
|
"loss": 0.1047, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"eval_accuracy": 0.7529411764705882, |
|
"eval_loss": 0.8702762126922607, |
|
"eval_runtime": 1.4808, |
|
"eval_samples_per_second": 114.8, |
|
"eval_steps_per_second": 14.856, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 0.00016923076923076923, |
|
"loss": 0.1261, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 0.00016776556776556777, |
|
"loss": 0.282, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 0.00016630036630036632, |
|
"loss": 0.1773, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 0.00016483516483516484, |
|
"loss": 0.1566, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 0.00016336996336996338, |
|
"loss": 0.2896, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 0.00016190476190476192, |
|
"loss": 0.2573, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 0.00016043956043956044, |
|
"loss": 0.1943, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 0.00015897435897435896, |
|
"loss": 0.1993, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 0.0001575091575091575, |
|
"loss": 0.1106, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 0.00015604395604395605, |
|
"loss": 0.2249, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"eval_accuracy": 0.7588235294117647, |
|
"eval_loss": 0.9538877606391907, |
|
"eval_runtime": 1.4679, |
|
"eval_samples_per_second": 115.811, |
|
"eval_steps_per_second": 14.987, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 0.0001545787545787546, |
|
"loss": 0.2232, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 0.0001531135531135531, |
|
"loss": 0.084, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 0.00015164835164835165, |
|
"loss": 0.1352, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 0.0001501831501831502, |
|
"loss": 0.1632, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 0.00014871794871794872, |
|
"loss": 0.2161, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"learning_rate": 0.00014725274725274726, |
|
"loss": 0.1823, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 0.00014578754578754578, |
|
"loss": 0.0965, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 0.00014432234432234432, |
|
"loss": 0.135, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"learning_rate": 0.00014285714285714287, |
|
"loss": 0.0782, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 0.0001413919413919414, |
|
"loss": 0.0984, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"eval_accuracy": 0.7529411764705882, |
|
"eval_loss": 0.9318849444389343, |
|
"eval_runtime": 1.4691, |
|
"eval_samples_per_second": 115.719, |
|
"eval_steps_per_second": 14.975, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"learning_rate": 0.00013992673992673993, |
|
"loss": 0.1553, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 4.62, |
|
"learning_rate": 0.00013846153846153847, |
|
"loss": 0.0954, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"learning_rate": 0.000136996336996337, |
|
"loss": 0.1225, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"learning_rate": 0.00013553113553113554, |
|
"loss": 0.2155, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"learning_rate": 0.00013406593406593405, |
|
"loss": 0.1496, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"learning_rate": 0.0001326007326007326, |
|
"loss": 0.0837, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 5.16, |
|
"learning_rate": 0.00013113553113553114, |
|
"loss": 0.0693, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 5.27, |
|
"learning_rate": 0.0001296703296703297, |
|
"loss": 0.08, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 5.38, |
|
"learning_rate": 0.00012820512820512823, |
|
"loss": 0.0989, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 5.49, |
|
"learning_rate": 0.00012673992673992675, |
|
"loss": 0.086, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 5.49, |
|
"eval_accuracy": 0.7705882352941177, |
|
"eval_loss": 0.9061105251312256, |
|
"eval_runtime": 1.4763, |
|
"eval_samples_per_second": 115.156, |
|
"eval_steps_per_second": 14.903, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"learning_rate": 0.00012527472527472527, |
|
"loss": 0.1005, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 5.71, |
|
"learning_rate": 0.0001238095238095238, |
|
"loss": 0.0685, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 5.82, |
|
"learning_rate": 0.00012234432234432236, |
|
"loss": 0.1174, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 5.93, |
|
"learning_rate": 0.00012087912087912087, |
|
"loss": 0.1728, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"learning_rate": 0.00011941391941391942, |
|
"loss": 0.086, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"learning_rate": 0.00011794871794871796, |
|
"loss": 0.0738, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 6.26, |
|
"learning_rate": 0.0001164835164835165, |
|
"loss": 0.04, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 6.37, |
|
"learning_rate": 0.00011501831501831501, |
|
"loss": 0.1145, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 6.48, |
|
"learning_rate": 0.00011355311355311356, |
|
"loss": 0.0571, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 6.59, |
|
"learning_rate": 0.0001120879120879121, |
|
"loss": 0.1164, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 6.59, |
|
"eval_accuracy": 0.8176470588235294, |
|
"eval_loss": 0.7493334412574768, |
|
"eval_runtime": 1.4541, |
|
"eval_samples_per_second": 116.908, |
|
"eval_steps_per_second": 15.129, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 6.7, |
|
"learning_rate": 0.00011062271062271063, |
|
"loss": 0.0479, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 6.81, |
|
"learning_rate": 0.00010915750915750915, |
|
"loss": 0.0899, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 6.92, |
|
"learning_rate": 0.0001076923076923077, |
|
"loss": 0.0746, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"learning_rate": 0.00010622710622710624, |
|
"loss": 0.0826, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"learning_rate": 0.00010476190476190477, |
|
"loss": 0.0382, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 7.25, |
|
"learning_rate": 0.00010329670329670331, |
|
"loss": 0.0724, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 7.36, |
|
"learning_rate": 0.00010183150183150183, |
|
"loss": 0.036, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 7.47, |
|
"learning_rate": 0.00010036630036630038, |
|
"loss": 0.0579, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 7.58, |
|
"learning_rate": 9.89010989010989e-05, |
|
"loss": 0.089, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 7.69, |
|
"learning_rate": 9.743589743589744e-05, |
|
"loss": 0.0518, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 7.69, |
|
"eval_accuracy": 0.7764705882352941, |
|
"eval_loss": 0.8781054615974426, |
|
"eval_runtime": 1.4674, |
|
"eval_samples_per_second": 115.852, |
|
"eval_steps_per_second": 14.993, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 7.8, |
|
"learning_rate": 9.597069597069598e-05, |
|
"loss": 0.0603, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 7.91, |
|
"learning_rate": 9.450549450549451e-05, |
|
"loss": 0.0565, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"learning_rate": 9.304029304029304e-05, |
|
"loss": 0.0689, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 8.13, |
|
"learning_rate": 9.157509157509158e-05, |
|
"loss": 0.0295, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 8.24, |
|
"learning_rate": 9.010989010989012e-05, |
|
"loss": 0.0555, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 8.35, |
|
"learning_rate": 8.864468864468865e-05, |
|
"loss": 0.0501, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 8.46, |
|
"learning_rate": 8.717948717948718e-05, |
|
"loss": 0.0657, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 8.57, |
|
"learning_rate": 8.571428571428571e-05, |
|
"loss": 0.0479, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 8.68, |
|
"learning_rate": 8.424908424908426e-05, |
|
"loss": 0.0665, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 8.79, |
|
"learning_rate": 8.278388278388279e-05, |
|
"loss": 0.0458, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 8.79, |
|
"eval_accuracy": 0.7823529411764706, |
|
"eval_loss": 0.8851077556610107, |
|
"eval_runtime": 1.4711, |
|
"eval_samples_per_second": 115.561, |
|
"eval_steps_per_second": 14.955, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 8.9, |
|
"learning_rate": 8.131868131868132e-05, |
|
"loss": 0.0704, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"learning_rate": 7.985347985347986e-05, |
|
"loss": 0.0779, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 9.12, |
|
"learning_rate": 7.83882783882784e-05, |
|
"loss": 0.0575, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 9.23, |
|
"learning_rate": 7.692307692307693e-05, |
|
"loss": 0.0541, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 9.34, |
|
"learning_rate": 7.545787545787546e-05, |
|
"loss": 0.0413, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 9.45, |
|
"learning_rate": 7.3992673992674e-05, |
|
"loss": 0.0367, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 9.56, |
|
"learning_rate": 7.252747252747253e-05, |
|
"loss": 0.0857, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 9.67, |
|
"learning_rate": 7.106227106227106e-05, |
|
"loss": 0.061, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 9.78, |
|
"learning_rate": 6.95970695970696e-05, |
|
"loss": 0.0408, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 9.89, |
|
"learning_rate": 6.813186813186814e-05, |
|
"loss": 0.0521, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 9.89, |
|
"eval_accuracy": 0.788235294117647, |
|
"eval_loss": 0.9448409080505371, |
|
"eval_runtime": 1.3677, |
|
"eval_samples_per_second": 124.3, |
|
"eval_steps_per_second": 16.086, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 6.666666666666667e-05, |
|
"loss": 0.0452, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 10.11, |
|
"learning_rate": 6.52014652014652e-05, |
|
"loss": 0.0573, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 10.22, |
|
"learning_rate": 6.373626373626373e-05, |
|
"loss": 0.0333, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 10.33, |
|
"learning_rate": 6.227106227106228e-05, |
|
"loss": 0.035, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 10.44, |
|
"learning_rate": 6.08058608058608e-05, |
|
"loss": 0.0513, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 10.55, |
|
"learning_rate": 5.9340659340659345e-05, |
|
"loss": 0.0412, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 10.66, |
|
"learning_rate": 5.787545787545788e-05, |
|
"loss": 0.044, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 10.77, |
|
"learning_rate": 5.6410256410256414e-05, |
|
"loss": 0.0559, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 10.88, |
|
"learning_rate": 5.494505494505495e-05, |
|
"loss": 0.0889, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 10.99, |
|
"learning_rate": 5.347985347985348e-05, |
|
"loss": 0.0576, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 10.99, |
|
"eval_accuracy": 0.7823529411764706, |
|
"eval_loss": 0.8883835673332214, |
|
"eval_runtime": 1.3691, |
|
"eval_samples_per_second": 124.171, |
|
"eval_steps_per_second": 16.069, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 11.1, |
|
"learning_rate": 5.201465201465202e-05, |
|
"loss": 0.0591, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 11.21, |
|
"learning_rate": 5.054945054945055e-05, |
|
"loss": 0.0649, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 11.32, |
|
"learning_rate": 4.908424908424908e-05, |
|
"loss": 0.0425, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 11.43, |
|
"learning_rate": 4.761904761904762e-05, |
|
"loss": 0.0471, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 11.54, |
|
"learning_rate": 4.615384615384616e-05, |
|
"loss": 0.0366, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 11.65, |
|
"learning_rate": 4.4688644688644696e-05, |
|
"loss": 0.0414, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 11.76, |
|
"learning_rate": 4.322344322344323e-05, |
|
"loss": 0.0393, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 11.87, |
|
"learning_rate": 4.1758241758241765e-05, |
|
"loss": 0.0551, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 11.98, |
|
"learning_rate": 4.0293040293040296e-05, |
|
"loss": 0.0503, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 12.09, |
|
"learning_rate": 3.8827838827838833e-05, |
|
"loss": 0.0442, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 12.09, |
|
"eval_accuracy": 0.788235294117647, |
|
"eval_loss": 0.896532416343689, |
|
"eval_runtime": 1.5128, |
|
"eval_samples_per_second": 112.377, |
|
"eval_steps_per_second": 14.543, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 12.2, |
|
"learning_rate": 3.7362637362637365e-05, |
|
"loss": 0.0595, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 12.31, |
|
"learning_rate": 3.58974358974359e-05, |
|
"loss": 0.0304, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 12.42, |
|
"learning_rate": 3.443223443223443e-05, |
|
"loss": 0.0365, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 12.53, |
|
"learning_rate": 3.296703296703297e-05, |
|
"loss": 0.0917, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 12.64, |
|
"learning_rate": 3.15018315018315e-05, |
|
"loss": 0.0549, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 12.75, |
|
"learning_rate": 3.0036630036630036e-05, |
|
"loss": 0.0616, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 12.86, |
|
"learning_rate": 2.857142857142857e-05, |
|
"loss": 0.0526, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 12.97, |
|
"learning_rate": 2.7106227106227105e-05, |
|
"loss": 0.0297, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 13.08, |
|
"learning_rate": 2.564102564102564e-05, |
|
"loss": 0.0409, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 13.19, |
|
"learning_rate": 2.4175824175824177e-05, |
|
"loss": 0.0254, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 13.19, |
|
"eval_accuracy": 0.788235294117647, |
|
"eval_loss": 0.9140186905860901, |
|
"eval_runtime": 1.3923, |
|
"eval_samples_per_second": 122.1, |
|
"eval_steps_per_second": 15.801, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 13.3, |
|
"learning_rate": 2.2710622710622712e-05, |
|
"loss": 0.0373, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 13.41, |
|
"learning_rate": 2.1245421245421246e-05, |
|
"loss": 0.0325, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 13.52, |
|
"learning_rate": 1.978021978021978e-05, |
|
"loss": 0.0373, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 13.63, |
|
"learning_rate": 1.8315018315018315e-05, |
|
"loss": 0.0545, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 13.74, |
|
"learning_rate": 1.6849816849816853e-05, |
|
"loss": 0.0613, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 13.85, |
|
"learning_rate": 1.5384615384615387e-05, |
|
"loss": 0.0515, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 13.96, |
|
"learning_rate": 1.391941391941392e-05, |
|
"loss": 0.0708, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 14.07, |
|
"learning_rate": 1.2454212454212454e-05, |
|
"loss": 0.0615, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 14.18, |
|
"learning_rate": 1.0989010989010989e-05, |
|
"loss": 0.0495, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 14.29, |
|
"learning_rate": 9.523809523809523e-06, |
|
"loss": 0.0426, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 14.29, |
|
"eval_accuracy": 0.788235294117647, |
|
"eval_loss": 0.927388072013855, |
|
"eval_runtime": 1.3816, |
|
"eval_samples_per_second": 123.046, |
|
"eval_steps_per_second": 15.924, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 14.4, |
|
"learning_rate": 8.058608058608059e-06, |
|
"loss": 0.0437, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 14.51, |
|
"learning_rate": 6.5934065934065935e-06, |
|
"loss": 0.028, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 14.62, |
|
"learning_rate": 5.128205128205128e-06, |
|
"loss": 0.0345, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 14.73, |
|
"learning_rate": 3.663003663003663e-06, |
|
"loss": 0.0565, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 14.84, |
|
"learning_rate": 2.197802197802198e-06, |
|
"loss": 0.0758, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 14.95, |
|
"learning_rate": 7.326007326007326e-07, |
|
"loss": 0.047, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"step": 1365, |
|
"total_flos": 1.6866433871220326e+18, |
|
"train_loss": 0.11297949704276773, |
|
"train_runtime": 502.5996, |
|
"train_samples_per_second": 43.305, |
|
"train_steps_per_second": 2.716 |
|
} |
|
], |
|
"max_steps": 1365, |
|
"num_train_epochs": 15, |
|
"total_flos": 1.6866433871220326e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|