{ "best_metric": 0.6480256915092468, "best_model_checkpoint": "./croupier-creature-classifier/checkpoint-100", "epoch": 15.0, "global_step": 1365, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.11, "learning_rate": 0.00019853479853479855, "loss": 0.2843, "step": 10 }, { "epoch": 0.22, "learning_rate": 0.0001970695970695971, "loss": 0.3895, "step": 20 }, { "epoch": 0.33, "learning_rate": 0.00019560439560439562, "loss": 0.3592, "step": 30 }, { "epoch": 0.44, "learning_rate": 0.00019413919413919413, "loss": 0.3891, "step": 40 }, { "epoch": 0.55, "learning_rate": 0.00019267399267399268, "loss": 0.2862, "step": 50 }, { "epoch": 0.66, "learning_rate": 0.00019120879120879122, "loss": 0.4721, "step": 60 }, { "epoch": 0.77, "learning_rate": 0.00018974358974358974, "loss": 0.4275, "step": 70 }, { "epoch": 0.88, "learning_rate": 0.00018827838827838828, "loss": 0.2297, "step": 80 }, { "epoch": 0.99, "learning_rate": 0.00018681318681318683, "loss": 0.3175, "step": 90 }, { "epoch": 1.1, "learning_rate": 0.00018534798534798537, "loss": 0.1967, "step": 100 }, { "epoch": 1.1, "eval_accuracy": 0.8058823529411765, "eval_loss": 0.6480256915092468, "eval_runtime": 1.3833, "eval_samples_per_second": 122.894, "eval_steps_per_second": 15.904, "step": 100 }, { "epoch": 1.21, "learning_rate": 0.0001838827838827839, "loss": 0.2783, "step": 110 }, { "epoch": 1.32, "learning_rate": 0.0001824175824175824, "loss": 0.3196, "step": 120 }, { "epoch": 1.43, "learning_rate": 0.00018095238095238095, "loss": 0.2813, "step": 130 }, { "epoch": 1.54, "learning_rate": 0.0001794871794871795, "loss": 0.2686, "step": 140 }, { "epoch": 1.65, "learning_rate": 0.00017802197802197802, "loss": 0.2325, "step": 150 }, { "epoch": 1.76, "learning_rate": 0.00017655677655677656, "loss": 0.3969, "step": 160 }, { "epoch": 1.87, "learning_rate": 0.0001750915750915751, "loss": 0.1645, "step": 170 }, { "epoch": 1.98, "learning_rate": 0.00017362637362637365, "loss": 0.3043, "step": 180 }, { "epoch": 2.09, "learning_rate": 0.00017216117216117217, "loss": 0.1483, "step": 190 }, { "epoch": 2.2, "learning_rate": 0.00017069597069597068, "loss": 0.1047, "step": 200 }, { "epoch": 2.2, "eval_accuracy": 0.7529411764705882, "eval_loss": 0.8702762126922607, "eval_runtime": 1.4808, "eval_samples_per_second": 114.8, "eval_steps_per_second": 14.856, "step": 200 }, { "epoch": 2.31, "learning_rate": 0.00016923076923076923, "loss": 0.1261, "step": 210 }, { "epoch": 2.42, "learning_rate": 0.00016776556776556777, "loss": 0.282, "step": 220 }, { "epoch": 2.53, "learning_rate": 0.00016630036630036632, "loss": 0.1773, "step": 230 }, { "epoch": 2.64, "learning_rate": 0.00016483516483516484, "loss": 0.1566, "step": 240 }, { "epoch": 2.75, "learning_rate": 0.00016336996336996338, "loss": 0.2896, "step": 250 }, { "epoch": 2.86, "learning_rate": 0.00016190476190476192, "loss": 0.2573, "step": 260 }, { "epoch": 2.97, "learning_rate": 0.00016043956043956044, "loss": 0.1943, "step": 270 }, { "epoch": 3.08, "learning_rate": 0.00015897435897435896, "loss": 0.1993, "step": 280 }, { "epoch": 3.19, "learning_rate": 0.0001575091575091575, "loss": 0.1106, "step": 290 }, { "epoch": 3.3, "learning_rate": 0.00015604395604395605, "loss": 0.2249, "step": 300 }, { "epoch": 3.3, "eval_accuracy": 0.7588235294117647, "eval_loss": 0.9538877606391907, "eval_runtime": 1.4679, "eval_samples_per_second": 115.811, "eval_steps_per_second": 14.987, "step": 300 }, { "epoch": 3.41, "learning_rate": 0.0001545787545787546, "loss": 0.2232, "step": 310 }, { "epoch": 3.52, "learning_rate": 0.0001531135531135531, "loss": 0.084, "step": 320 }, { "epoch": 3.63, "learning_rate": 0.00015164835164835165, "loss": 0.1352, "step": 330 }, { "epoch": 3.74, "learning_rate": 0.0001501831501831502, "loss": 0.1632, "step": 340 }, { "epoch": 3.85, "learning_rate": 0.00014871794871794872, "loss": 0.2161, "step": 350 }, { "epoch": 3.96, "learning_rate": 0.00014725274725274726, "loss": 0.1823, "step": 360 }, { "epoch": 4.07, "learning_rate": 0.00014578754578754578, "loss": 0.0965, "step": 370 }, { "epoch": 4.18, "learning_rate": 0.00014432234432234432, "loss": 0.135, "step": 380 }, { "epoch": 4.29, "learning_rate": 0.00014285714285714287, "loss": 0.0782, "step": 390 }, { "epoch": 4.4, "learning_rate": 0.0001413919413919414, "loss": 0.0984, "step": 400 }, { "epoch": 4.4, "eval_accuracy": 0.7529411764705882, "eval_loss": 0.9318849444389343, "eval_runtime": 1.4691, "eval_samples_per_second": 115.719, "eval_steps_per_second": 14.975, "step": 400 }, { "epoch": 4.51, "learning_rate": 0.00013992673992673993, "loss": 0.1553, "step": 410 }, { "epoch": 4.62, "learning_rate": 0.00013846153846153847, "loss": 0.0954, "step": 420 }, { "epoch": 4.73, "learning_rate": 0.000136996336996337, "loss": 0.1225, "step": 430 }, { "epoch": 4.84, "learning_rate": 0.00013553113553113554, "loss": 0.2155, "step": 440 }, { "epoch": 4.95, "learning_rate": 0.00013406593406593405, "loss": 0.1496, "step": 450 }, { "epoch": 5.05, "learning_rate": 0.0001326007326007326, "loss": 0.0837, "step": 460 }, { "epoch": 5.16, "learning_rate": 0.00013113553113553114, "loss": 0.0693, "step": 470 }, { "epoch": 5.27, "learning_rate": 0.0001296703296703297, "loss": 0.08, "step": 480 }, { "epoch": 5.38, "learning_rate": 0.00012820512820512823, "loss": 0.0989, "step": 490 }, { "epoch": 5.49, "learning_rate": 0.00012673992673992675, "loss": 0.086, "step": 500 }, { "epoch": 5.49, "eval_accuracy": 0.7705882352941177, "eval_loss": 0.9061105251312256, "eval_runtime": 1.4763, "eval_samples_per_second": 115.156, "eval_steps_per_second": 14.903, "step": 500 }, { "epoch": 5.6, "learning_rate": 0.00012527472527472527, "loss": 0.1005, "step": 510 }, { "epoch": 5.71, "learning_rate": 0.0001238095238095238, "loss": 0.0685, "step": 520 }, { "epoch": 5.82, "learning_rate": 0.00012234432234432236, "loss": 0.1174, "step": 530 }, { "epoch": 5.93, "learning_rate": 0.00012087912087912087, "loss": 0.1728, "step": 540 }, { "epoch": 6.04, "learning_rate": 0.00011941391941391942, "loss": 0.086, "step": 550 }, { "epoch": 6.15, "learning_rate": 0.00011794871794871796, "loss": 0.0738, "step": 560 }, { "epoch": 6.26, "learning_rate": 0.0001164835164835165, "loss": 0.04, "step": 570 }, { "epoch": 6.37, "learning_rate": 0.00011501831501831501, "loss": 0.1145, "step": 580 }, { "epoch": 6.48, "learning_rate": 0.00011355311355311356, "loss": 0.0571, "step": 590 }, { "epoch": 6.59, "learning_rate": 0.0001120879120879121, "loss": 0.1164, "step": 600 }, { "epoch": 6.59, "eval_accuracy": 0.8176470588235294, "eval_loss": 0.7493334412574768, "eval_runtime": 1.4541, "eval_samples_per_second": 116.908, "eval_steps_per_second": 15.129, "step": 600 }, { "epoch": 6.7, "learning_rate": 0.00011062271062271063, "loss": 0.0479, "step": 610 }, { "epoch": 6.81, "learning_rate": 0.00010915750915750915, "loss": 0.0899, "step": 620 }, { "epoch": 6.92, "learning_rate": 0.0001076923076923077, "loss": 0.0746, "step": 630 }, { "epoch": 7.03, "learning_rate": 0.00010622710622710624, "loss": 0.0826, "step": 640 }, { "epoch": 7.14, "learning_rate": 0.00010476190476190477, "loss": 0.0382, "step": 650 }, { "epoch": 7.25, "learning_rate": 0.00010329670329670331, "loss": 0.0724, "step": 660 }, { "epoch": 7.36, "learning_rate": 0.00010183150183150183, "loss": 0.036, "step": 670 }, { "epoch": 7.47, "learning_rate": 0.00010036630036630038, "loss": 0.0579, "step": 680 }, { "epoch": 7.58, "learning_rate": 9.89010989010989e-05, "loss": 0.089, "step": 690 }, { "epoch": 7.69, "learning_rate": 9.743589743589744e-05, "loss": 0.0518, "step": 700 }, { "epoch": 7.69, "eval_accuracy": 0.7764705882352941, "eval_loss": 0.8781054615974426, "eval_runtime": 1.4674, "eval_samples_per_second": 115.852, "eval_steps_per_second": 14.993, "step": 700 }, { "epoch": 7.8, "learning_rate": 9.597069597069598e-05, "loss": 0.0603, "step": 710 }, { "epoch": 7.91, "learning_rate": 9.450549450549451e-05, "loss": 0.0565, "step": 720 }, { "epoch": 8.02, "learning_rate": 9.304029304029304e-05, "loss": 0.0689, "step": 730 }, { "epoch": 8.13, "learning_rate": 9.157509157509158e-05, "loss": 0.0295, "step": 740 }, { "epoch": 8.24, "learning_rate": 9.010989010989012e-05, "loss": 0.0555, "step": 750 }, { "epoch": 8.35, "learning_rate": 8.864468864468865e-05, "loss": 0.0501, "step": 760 }, { "epoch": 8.46, "learning_rate": 8.717948717948718e-05, "loss": 0.0657, "step": 770 }, { "epoch": 8.57, "learning_rate": 8.571428571428571e-05, "loss": 0.0479, "step": 780 }, { "epoch": 8.68, "learning_rate": 8.424908424908426e-05, "loss": 0.0665, "step": 790 }, { "epoch": 8.79, "learning_rate": 8.278388278388279e-05, "loss": 0.0458, "step": 800 }, { "epoch": 8.79, "eval_accuracy": 0.7823529411764706, "eval_loss": 0.8851077556610107, "eval_runtime": 1.4711, "eval_samples_per_second": 115.561, "eval_steps_per_second": 14.955, "step": 800 }, { "epoch": 8.9, "learning_rate": 8.131868131868132e-05, "loss": 0.0704, "step": 810 }, { "epoch": 9.01, "learning_rate": 7.985347985347986e-05, "loss": 0.0779, "step": 820 }, { "epoch": 9.12, "learning_rate": 7.83882783882784e-05, "loss": 0.0575, "step": 830 }, { "epoch": 9.23, "learning_rate": 7.692307692307693e-05, "loss": 0.0541, "step": 840 }, { "epoch": 9.34, "learning_rate": 7.545787545787546e-05, "loss": 0.0413, "step": 850 }, { "epoch": 9.45, "learning_rate": 7.3992673992674e-05, "loss": 0.0367, "step": 860 }, { "epoch": 9.56, "learning_rate": 7.252747252747253e-05, "loss": 0.0857, "step": 870 }, { "epoch": 9.67, "learning_rate": 7.106227106227106e-05, "loss": 0.061, "step": 880 }, { "epoch": 9.78, "learning_rate": 6.95970695970696e-05, "loss": 0.0408, "step": 890 }, { "epoch": 9.89, "learning_rate": 6.813186813186814e-05, "loss": 0.0521, "step": 900 }, { "epoch": 9.89, "eval_accuracy": 0.788235294117647, "eval_loss": 0.9448409080505371, "eval_runtime": 1.3677, "eval_samples_per_second": 124.3, "eval_steps_per_second": 16.086, "step": 900 }, { "epoch": 10.0, "learning_rate": 6.666666666666667e-05, "loss": 0.0452, "step": 910 }, { "epoch": 10.11, "learning_rate": 6.52014652014652e-05, "loss": 0.0573, "step": 920 }, { "epoch": 10.22, "learning_rate": 6.373626373626373e-05, "loss": 0.0333, "step": 930 }, { "epoch": 10.33, "learning_rate": 6.227106227106228e-05, "loss": 0.035, "step": 940 }, { "epoch": 10.44, "learning_rate": 6.08058608058608e-05, "loss": 0.0513, "step": 950 }, { "epoch": 10.55, "learning_rate": 5.9340659340659345e-05, "loss": 0.0412, "step": 960 }, { "epoch": 10.66, "learning_rate": 5.787545787545788e-05, "loss": 0.044, "step": 970 }, { "epoch": 10.77, "learning_rate": 5.6410256410256414e-05, "loss": 0.0559, "step": 980 }, { "epoch": 10.88, "learning_rate": 5.494505494505495e-05, "loss": 0.0889, "step": 990 }, { "epoch": 10.99, "learning_rate": 5.347985347985348e-05, "loss": 0.0576, "step": 1000 }, { "epoch": 10.99, "eval_accuracy": 0.7823529411764706, "eval_loss": 0.8883835673332214, "eval_runtime": 1.3691, "eval_samples_per_second": 124.171, "eval_steps_per_second": 16.069, "step": 1000 }, { "epoch": 11.1, "learning_rate": 5.201465201465202e-05, "loss": 0.0591, "step": 1010 }, { "epoch": 11.21, "learning_rate": 5.054945054945055e-05, "loss": 0.0649, "step": 1020 }, { "epoch": 11.32, "learning_rate": 4.908424908424908e-05, "loss": 0.0425, "step": 1030 }, { "epoch": 11.43, "learning_rate": 4.761904761904762e-05, "loss": 0.0471, "step": 1040 }, { "epoch": 11.54, "learning_rate": 4.615384615384616e-05, "loss": 0.0366, "step": 1050 }, { "epoch": 11.65, "learning_rate": 4.4688644688644696e-05, "loss": 0.0414, "step": 1060 }, { "epoch": 11.76, "learning_rate": 4.322344322344323e-05, "loss": 0.0393, "step": 1070 }, { "epoch": 11.87, "learning_rate": 4.1758241758241765e-05, "loss": 0.0551, "step": 1080 }, { "epoch": 11.98, "learning_rate": 4.0293040293040296e-05, "loss": 0.0503, "step": 1090 }, { "epoch": 12.09, "learning_rate": 3.8827838827838833e-05, "loss": 0.0442, "step": 1100 }, { "epoch": 12.09, "eval_accuracy": 0.788235294117647, "eval_loss": 0.896532416343689, "eval_runtime": 1.5128, "eval_samples_per_second": 112.377, "eval_steps_per_second": 14.543, "step": 1100 }, { "epoch": 12.2, "learning_rate": 3.7362637362637365e-05, "loss": 0.0595, "step": 1110 }, { "epoch": 12.31, "learning_rate": 3.58974358974359e-05, "loss": 0.0304, "step": 1120 }, { "epoch": 12.42, "learning_rate": 3.443223443223443e-05, "loss": 0.0365, "step": 1130 }, { "epoch": 12.53, "learning_rate": 3.296703296703297e-05, "loss": 0.0917, "step": 1140 }, { "epoch": 12.64, "learning_rate": 3.15018315018315e-05, "loss": 0.0549, "step": 1150 }, { "epoch": 12.75, "learning_rate": 3.0036630036630036e-05, "loss": 0.0616, "step": 1160 }, { "epoch": 12.86, "learning_rate": 2.857142857142857e-05, "loss": 0.0526, "step": 1170 }, { "epoch": 12.97, "learning_rate": 2.7106227106227105e-05, "loss": 0.0297, "step": 1180 }, { "epoch": 13.08, "learning_rate": 2.564102564102564e-05, "loss": 0.0409, "step": 1190 }, { "epoch": 13.19, "learning_rate": 2.4175824175824177e-05, "loss": 0.0254, "step": 1200 }, { "epoch": 13.19, "eval_accuracy": 0.788235294117647, "eval_loss": 0.9140186905860901, "eval_runtime": 1.3923, "eval_samples_per_second": 122.1, "eval_steps_per_second": 15.801, "step": 1200 }, { "epoch": 13.3, "learning_rate": 2.2710622710622712e-05, "loss": 0.0373, "step": 1210 }, { "epoch": 13.41, "learning_rate": 2.1245421245421246e-05, "loss": 0.0325, "step": 1220 }, { "epoch": 13.52, "learning_rate": 1.978021978021978e-05, "loss": 0.0373, "step": 1230 }, { "epoch": 13.63, "learning_rate": 1.8315018315018315e-05, "loss": 0.0545, "step": 1240 }, { "epoch": 13.74, "learning_rate": 1.6849816849816853e-05, "loss": 0.0613, "step": 1250 }, { "epoch": 13.85, "learning_rate": 1.5384615384615387e-05, "loss": 0.0515, "step": 1260 }, { "epoch": 13.96, "learning_rate": 1.391941391941392e-05, "loss": 0.0708, "step": 1270 }, { "epoch": 14.07, "learning_rate": 1.2454212454212454e-05, "loss": 0.0615, "step": 1280 }, { "epoch": 14.18, "learning_rate": 1.0989010989010989e-05, "loss": 0.0495, "step": 1290 }, { "epoch": 14.29, "learning_rate": 9.523809523809523e-06, "loss": 0.0426, "step": 1300 }, { "epoch": 14.29, "eval_accuracy": 0.788235294117647, "eval_loss": 0.927388072013855, "eval_runtime": 1.3816, "eval_samples_per_second": 123.046, "eval_steps_per_second": 15.924, "step": 1300 }, { "epoch": 14.4, "learning_rate": 8.058608058608059e-06, "loss": 0.0437, "step": 1310 }, { "epoch": 14.51, "learning_rate": 6.5934065934065935e-06, "loss": 0.028, "step": 1320 }, { "epoch": 14.62, "learning_rate": 5.128205128205128e-06, "loss": 0.0345, "step": 1330 }, { "epoch": 14.73, "learning_rate": 3.663003663003663e-06, "loss": 0.0565, "step": 1340 }, { "epoch": 14.84, "learning_rate": 2.197802197802198e-06, "loss": 0.0758, "step": 1350 }, { "epoch": 14.95, "learning_rate": 7.326007326007326e-07, "loss": 0.047, "step": 1360 }, { "epoch": 15.0, "step": 1365, "total_flos": 1.6866433871220326e+18, "train_loss": 0.11297949704276773, "train_runtime": 502.5996, "train_samples_per_second": 43.305, "train_steps_per_second": 2.716 } ], "max_steps": 1365, "num_train_epochs": 15, "total_flos": 1.6866433871220326e+18, "trial_name": null, "trial_params": null }