{ "best_metric": 0.7472190257000384, "best_model_checkpoint": "vit-base-patch16-224-in21k-finetuned-crop-classification/checkpoint-1468", "epoch": 9.97275204359673, "eval_steps": 500, "global_step": 1830, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "learning_rate": 2.73224043715847e-06, "loss": 1.6369, "step": 10 }, { "epoch": 0.11, "learning_rate": 5.46448087431694e-06, "loss": 1.6111, "step": 20 }, { "epoch": 0.16, "learning_rate": 8.196721311475409e-06, "loss": 1.5398, "step": 30 }, { "epoch": 0.22, "learning_rate": 1.092896174863388e-05, "loss": 1.4457, "step": 40 }, { "epoch": 0.27, "learning_rate": 1.3661202185792351e-05, "loss": 1.3015, "step": 50 }, { "epoch": 0.33, "learning_rate": 1.6393442622950818e-05, "loss": 1.1684, "step": 60 }, { "epoch": 0.38, "learning_rate": 1.912568306010929e-05, "loss": 1.0962, "step": 70 }, { "epoch": 0.44, "learning_rate": 2.185792349726776e-05, "loss": 1.0177, "step": 80 }, { "epoch": 0.49, "learning_rate": 2.459016393442623e-05, "loss": 0.9627, "step": 90 }, { "epoch": 0.54, "learning_rate": 2.7322404371584703e-05, "loss": 0.8997, "step": 100 }, { "epoch": 0.6, "learning_rate": 3.005464480874317e-05, "loss": 0.8822, "step": 110 }, { "epoch": 0.65, "learning_rate": 3.2786885245901635e-05, "loss": 0.8726, "step": 120 }, { "epoch": 0.71, "learning_rate": 3.551912568306011e-05, "loss": 0.8288, "step": 130 }, { "epoch": 0.76, "learning_rate": 3.825136612021858e-05, "loss": 0.8337, "step": 140 }, { "epoch": 0.82, "learning_rate": 4.098360655737705e-05, "loss": 0.829, "step": 150 }, { "epoch": 0.87, "learning_rate": 4.371584699453552e-05, "loss": 0.7949, "step": 160 }, { "epoch": 0.93, "learning_rate": 4.644808743169399e-05, "loss": 0.7856, "step": 170 }, { "epoch": 0.98, "learning_rate": 4.918032786885246e-05, "loss": 0.8031, "step": 180 }, { "epoch": 1.0, "eval_accuracy": 0.7050249328730341, "eval_loss": 0.7602581977844238, "eval_runtime": 93.216, "eval_samples_per_second": 27.967, "eval_steps_per_second": 0.88, "step": 183 }, { "epoch": 1.04, "learning_rate": 4.9787492410443234e-05, "loss": 0.78, "step": 190 }, { "epoch": 1.09, "learning_rate": 4.948391013964785e-05, "loss": 0.7994, "step": 200 }, { "epoch": 1.14, "learning_rate": 4.918032786885246e-05, "loss": 0.7665, "step": 210 }, { "epoch": 1.2, "learning_rate": 4.8876745598057074e-05, "loss": 0.7866, "step": 220 }, { "epoch": 1.25, "learning_rate": 4.857316332726169e-05, "loss": 0.7733, "step": 230 }, { "epoch": 1.31, "learning_rate": 4.8269581056466304e-05, "loss": 0.7504, "step": 240 }, { "epoch": 1.36, "learning_rate": 4.7965998785670915e-05, "loss": 0.7805, "step": 250 }, { "epoch": 1.42, "learning_rate": 4.766241651487553e-05, "loss": 0.7671, "step": 260 }, { "epoch": 1.47, "learning_rate": 4.7358834244080144e-05, "loss": 0.7012, "step": 270 }, { "epoch": 1.53, "learning_rate": 4.705525197328476e-05, "loss": 0.7305, "step": 280 }, { "epoch": 1.58, "learning_rate": 4.6751669702489374e-05, "loss": 0.7441, "step": 290 }, { "epoch": 1.63, "learning_rate": 4.644808743169399e-05, "loss": 0.7758, "step": 300 }, { "epoch": 1.69, "learning_rate": 4.61445051608986e-05, "loss": 0.731, "step": 310 }, { "epoch": 1.74, "learning_rate": 4.584092289010322e-05, "loss": 0.7303, "step": 320 }, { "epoch": 1.8, "learning_rate": 4.553734061930783e-05, "loss": 0.7265, "step": 330 }, { "epoch": 1.85, "learning_rate": 4.523375834851245e-05, "loss": 0.7385, "step": 340 }, { "epoch": 1.91, "learning_rate": 4.493017607771706e-05, "loss": 0.7396, "step": 350 }, { "epoch": 1.96, "learning_rate": 4.462659380692168e-05, "loss": 0.7311, "step": 360 }, { "epoch": 2.0, "eval_accuracy": 0.7249712313003452, "eval_loss": 0.7046979069709778, "eval_runtime": 93.2263, "eval_samples_per_second": 27.964, "eval_steps_per_second": 0.88, "step": 367 }, { "epoch": 2.02, "learning_rate": 4.432301153612629e-05, "loss": 0.7129, "step": 370 }, { "epoch": 2.07, "learning_rate": 4.401942926533091e-05, "loss": 0.7103, "step": 380 }, { "epoch": 2.13, "learning_rate": 4.371584699453552e-05, "loss": 0.6994, "step": 390 }, { "epoch": 2.18, "learning_rate": 4.341226472374014e-05, "loss": 0.6637, "step": 400 }, { "epoch": 2.23, "learning_rate": 4.310868245294475e-05, "loss": 0.7107, "step": 410 }, { "epoch": 2.29, "learning_rate": 4.280510018214937e-05, "loss": 0.7326, "step": 420 }, { "epoch": 2.34, "learning_rate": 4.250151791135398e-05, "loss": 0.7229, "step": 430 }, { "epoch": 2.4, "learning_rate": 4.21979356405586e-05, "loss": 0.7064, "step": 440 }, { "epoch": 2.45, "learning_rate": 4.189435336976321e-05, "loss": 0.6781, "step": 450 }, { "epoch": 2.51, "learning_rate": 4.1590771098967827e-05, "loss": 0.7207, "step": 460 }, { "epoch": 2.56, "learning_rate": 4.128718882817244e-05, "loss": 0.7244, "step": 470 }, { "epoch": 2.62, "learning_rate": 4.098360655737705e-05, "loss": 0.6972, "step": 480 }, { "epoch": 2.67, "learning_rate": 4.068002428658167e-05, "loss": 0.7148, "step": 490 }, { "epoch": 2.72, "learning_rate": 4.037644201578628e-05, "loss": 0.7168, "step": 500 }, { "epoch": 2.78, "learning_rate": 4.007285974499089e-05, "loss": 0.7146, "step": 510 }, { "epoch": 2.83, "learning_rate": 3.976927747419551e-05, "loss": 0.6583, "step": 520 }, { "epoch": 2.89, "learning_rate": 3.946569520340012e-05, "loss": 0.7119, "step": 530 }, { "epoch": 2.94, "learning_rate": 3.916211293260474e-05, "loss": 0.6891, "step": 540 }, { "epoch": 3.0, "learning_rate": 3.885853066180935e-05, "loss": 0.7144, "step": 550 }, { "epoch": 3.0, "eval_accuracy": 0.7211354046797085, "eval_loss": 0.6968041062355042, "eval_runtime": 91.8623, "eval_samples_per_second": 28.379, "eval_steps_per_second": 0.893, "step": 550 }, { "epoch": 3.05, "learning_rate": 3.8554948391013967e-05, "loss": 0.7037, "step": 560 }, { "epoch": 3.11, "learning_rate": 3.825136612021858e-05, "loss": 0.6613, "step": 570 }, { "epoch": 3.16, "learning_rate": 3.7947783849423196e-05, "loss": 0.6462, "step": 580 }, { "epoch": 3.22, "learning_rate": 3.764420157862781e-05, "loss": 0.6484, "step": 590 }, { "epoch": 3.27, "learning_rate": 3.7340619307832425e-05, "loss": 0.6747, "step": 600 }, { "epoch": 3.32, "learning_rate": 3.7037037037037037e-05, "loss": 0.7036, "step": 610 }, { "epoch": 3.38, "learning_rate": 3.6733454766241655e-05, "loss": 0.6444, "step": 620 }, { "epoch": 3.43, "learning_rate": 3.6429872495446266e-05, "loss": 0.6337, "step": 630 }, { "epoch": 3.49, "learning_rate": 3.6126290224650884e-05, "loss": 0.6715, "step": 640 }, { "epoch": 3.54, "learning_rate": 3.5822707953855495e-05, "loss": 0.6583, "step": 650 }, { "epoch": 3.6, "learning_rate": 3.551912568306011e-05, "loss": 0.663, "step": 660 }, { "epoch": 3.65, "learning_rate": 3.5215543412264725e-05, "loss": 0.6901, "step": 670 }, { "epoch": 3.71, "learning_rate": 3.491196114146934e-05, "loss": 0.6582, "step": 680 }, { "epoch": 3.76, "learning_rate": 3.4608378870673954e-05, "loss": 0.6868, "step": 690 }, { "epoch": 3.81, "learning_rate": 3.430479659987857e-05, "loss": 0.7168, "step": 700 }, { "epoch": 3.87, "learning_rate": 3.400121432908318e-05, "loss": 0.6522, "step": 710 }, { "epoch": 3.92, "learning_rate": 3.36976320582878e-05, "loss": 0.6337, "step": 720 }, { "epoch": 3.98, "learning_rate": 3.339404978749241e-05, "loss": 0.6516, "step": 730 }, { "epoch": 4.0, "eval_accuracy": 0.7376294591484465, "eval_loss": 0.6568663716316223, "eval_runtime": 93.5303, "eval_samples_per_second": 27.873, "eval_steps_per_second": 0.877, "step": 734 }, { "epoch": 4.03, "learning_rate": 3.3090467516697024e-05, "loss": 0.6547, "step": 740 }, { "epoch": 4.09, "learning_rate": 3.2786885245901635e-05, "loss": 0.6444, "step": 750 }, { "epoch": 4.14, "learning_rate": 3.248330297510625e-05, "loss": 0.6687, "step": 760 }, { "epoch": 4.2, "learning_rate": 3.2179720704310865e-05, "loss": 0.6634, "step": 770 }, { "epoch": 4.25, "learning_rate": 3.187613843351548e-05, "loss": 0.6334, "step": 780 }, { "epoch": 4.31, "learning_rate": 3.1572556162720094e-05, "loss": 0.6095, "step": 790 }, { "epoch": 4.36, "learning_rate": 3.126897389192471e-05, "loss": 0.6467, "step": 800 }, { "epoch": 4.41, "learning_rate": 3.096539162112932e-05, "loss": 0.6532, "step": 810 }, { "epoch": 4.47, "learning_rate": 3.066180935033394e-05, "loss": 0.6544, "step": 820 }, { "epoch": 4.52, "learning_rate": 3.0358227079538553e-05, "loss": 0.6161, "step": 830 }, { "epoch": 4.58, "learning_rate": 3.005464480874317e-05, "loss": 0.6092, "step": 840 }, { "epoch": 4.63, "learning_rate": 2.9751062537947782e-05, "loss": 0.6323, "step": 850 }, { "epoch": 4.69, "learning_rate": 2.94474802671524e-05, "loss": 0.6593, "step": 860 }, { "epoch": 4.74, "learning_rate": 2.9143897996357018e-05, "loss": 0.6493, "step": 870 }, { "epoch": 4.8, "learning_rate": 2.884031572556163e-05, "loss": 0.6475, "step": 880 }, { "epoch": 4.85, "learning_rate": 2.8536733454766244e-05, "loss": 0.6196, "step": 890 }, { "epoch": 4.9, "learning_rate": 2.823315118397086e-05, "loss": 0.6257, "step": 900 }, { "epoch": 4.96, "learning_rate": 2.7929568913175473e-05, "loss": 0.6371, "step": 910 }, { "epoch": 5.0, "eval_accuracy": 0.7376294591484465, "eval_loss": 0.6482810974121094, "eval_runtime": 93.1536, "eval_samples_per_second": 27.986, "eval_steps_per_second": 0.88, "step": 917 }, { "epoch": 5.01, "learning_rate": 2.7625986642380085e-05, "loss": 0.6049, "step": 920 }, { "epoch": 5.07, "learning_rate": 2.7322404371584703e-05, "loss": 0.6072, "step": 930 }, { "epoch": 5.12, "learning_rate": 2.7018822100789314e-05, "loss": 0.5548, "step": 940 }, { "epoch": 5.18, "learning_rate": 2.6715239829993932e-05, "loss": 0.5689, "step": 950 }, { "epoch": 5.23, "learning_rate": 2.6411657559198543e-05, "loss": 0.6459, "step": 960 }, { "epoch": 5.29, "learning_rate": 2.610807528840316e-05, "loss": 0.6033, "step": 970 }, { "epoch": 5.34, "learning_rate": 2.5804493017607773e-05, "loss": 0.5981, "step": 980 }, { "epoch": 5.4, "learning_rate": 2.550091074681239e-05, "loss": 0.6201, "step": 990 }, { "epoch": 5.45, "learning_rate": 2.5197328476017002e-05, "loss": 0.6121, "step": 1000 }, { "epoch": 5.5, "learning_rate": 2.4893746205221617e-05, "loss": 0.6147, "step": 1010 }, { "epoch": 5.56, "learning_rate": 2.459016393442623e-05, "loss": 0.6007, "step": 1020 }, { "epoch": 5.61, "learning_rate": 2.4286581663630846e-05, "loss": 0.6087, "step": 1030 }, { "epoch": 5.67, "learning_rate": 2.3982999392835457e-05, "loss": 0.6195, "step": 1040 }, { "epoch": 5.72, "learning_rate": 2.3679417122040072e-05, "loss": 0.5848, "step": 1050 }, { "epoch": 5.78, "learning_rate": 2.3375834851244687e-05, "loss": 0.6066, "step": 1060 }, { "epoch": 5.83, "learning_rate": 2.30722525804493e-05, "loss": 0.6386, "step": 1070 }, { "epoch": 5.89, "learning_rate": 2.2768670309653916e-05, "loss": 0.6196, "step": 1080 }, { "epoch": 5.94, "learning_rate": 2.246508803885853e-05, "loss": 0.6127, "step": 1090 }, { "epoch": 5.99, "learning_rate": 2.2161505768063146e-05, "loss": 0.6246, "step": 1100 }, { "epoch": 6.0, "eval_accuracy": 0.7364787111622555, "eval_loss": 0.6492410898208618, "eval_runtime": 92.6582, "eval_samples_per_second": 28.136, "eval_steps_per_second": 0.885, "step": 1101 }, { "epoch": 6.05, "learning_rate": 2.185792349726776e-05, "loss": 0.5467, "step": 1110 }, { "epoch": 6.1, "learning_rate": 2.1554341226472375e-05, "loss": 0.577, "step": 1120 }, { "epoch": 6.16, "learning_rate": 2.125075895567699e-05, "loss": 0.6047, "step": 1130 }, { "epoch": 6.21, "learning_rate": 2.0947176684881604e-05, "loss": 0.5822, "step": 1140 }, { "epoch": 6.27, "learning_rate": 2.064359441408622e-05, "loss": 0.605, "step": 1150 }, { "epoch": 6.32, "learning_rate": 2.0340012143290834e-05, "loss": 0.5743, "step": 1160 }, { "epoch": 6.38, "learning_rate": 2.0036429872495445e-05, "loss": 0.5658, "step": 1170 }, { "epoch": 6.43, "learning_rate": 1.973284760170006e-05, "loss": 0.5686, "step": 1180 }, { "epoch": 6.49, "learning_rate": 1.9429265330904674e-05, "loss": 0.5884, "step": 1190 }, { "epoch": 6.54, "learning_rate": 1.912568306010929e-05, "loss": 0.544, "step": 1200 }, { "epoch": 6.59, "learning_rate": 1.8822100789313904e-05, "loss": 0.5888, "step": 1210 }, { "epoch": 6.65, "learning_rate": 1.8518518518518518e-05, "loss": 0.5377, "step": 1220 }, { "epoch": 6.7, "learning_rate": 1.8214936247723133e-05, "loss": 0.5665, "step": 1230 }, { "epoch": 6.76, "learning_rate": 1.7911353976927748e-05, "loss": 0.6032, "step": 1240 }, { "epoch": 6.81, "learning_rate": 1.7607771706132362e-05, "loss": 0.5905, "step": 1250 }, { "epoch": 6.87, "learning_rate": 1.7304189435336977e-05, "loss": 0.5831, "step": 1260 }, { "epoch": 6.92, "learning_rate": 1.700060716454159e-05, "loss": 0.5762, "step": 1270 }, { "epoch": 6.98, "learning_rate": 1.6697024893746206e-05, "loss": 0.5659, "step": 1280 }, { "epoch": 7.0, "eval_accuracy": 0.7410817031070196, "eval_loss": 0.6481292843818665, "eval_runtime": 93.3242, "eval_samples_per_second": 27.935, "eval_steps_per_second": 0.879, "step": 1284 }, { "epoch": 7.03, "learning_rate": 1.6393442622950818e-05, "loss": 0.5738, "step": 1290 }, { "epoch": 7.08, "learning_rate": 1.6089860352155432e-05, "loss": 0.5339, "step": 1300 }, { "epoch": 7.14, "learning_rate": 1.5786278081360047e-05, "loss": 0.5405, "step": 1310 }, { "epoch": 7.19, "learning_rate": 1.548269581056466e-05, "loss": 0.5792, "step": 1320 }, { "epoch": 7.25, "learning_rate": 1.5179113539769276e-05, "loss": 0.5677, "step": 1330 }, { "epoch": 7.3, "learning_rate": 1.4875531268973891e-05, "loss": 0.5319, "step": 1340 }, { "epoch": 7.36, "learning_rate": 1.4571948998178509e-05, "loss": 0.5693, "step": 1350 }, { "epoch": 7.41, "learning_rate": 1.4268366727383122e-05, "loss": 0.5532, "step": 1360 }, { "epoch": 7.47, "learning_rate": 1.3964784456587737e-05, "loss": 0.565, "step": 1370 }, { "epoch": 7.52, "learning_rate": 1.3661202185792351e-05, "loss": 0.536, "step": 1380 }, { "epoch": 7.57, "learning_rate": 1.3357619914996966e-05, "loss": 0.5408, "step": 1390 }, { "epoch": 7.63, "learning_rate": 1.305403764420158e-05, "loss": 0.5835, "step": 1400 }, { "epoch": 7.68, "learning_rate": 1.2750455373406195e-05, "loss": 0.5353, "step": 1410 }, { "epoch": 7.74, "learning_rate": 1.2446873102610808e-05, "loss": 0.5568, "step": 1420 }, { "epoch": 7.79, "learning_rate": 1.2143290831815423e-05, "loss": 0.5531, "step": 1430 }, { "epoch": 7.85, "learning_rate": 1.1839708561020036e-05, "loss": 0.555, "step": 1440 }, { "epoch": 7.9, "learning_rate": 1.153612629022465e-05, "loss": 0.5478, "step": 1450 }, { "epoch": 7.96, "learning_rate": 1.1232544019429265e-05, "loss": 0.533, "step": 1460 }, { "epoch": 8.0, "eval_accuracy": 0.7472190257000384, "eval_loss": 0.6449919939041138, "eval_runtime": 93.8292, "eval_samples_per_second": 27.785, "eval_steps_per_second": 0.874, "step": 1468 }, { "epoch": 8.01, "learning_rate": 1.092896174863388e-05, "loss": 0.5692, "step": 1470 }, { "epoch": 8.07, "learning_rate": 1.0625379477838495e-05, "loss": 0.5422, "step": 1480 }, { "epoch": 8.12, "learning_rate": 1.032179720704311e-05, "loss": 0.5154, "step": 1490 }, { "epoch": 8.17, "learning_rate": 1.0018214936247722e-05, "loss": 0.5471, "step": 1500 }, { "epoch": 8.23, "learning_rate": 9.714632665452337e-06, "loss": 0.5253, "step": 1510 }, { "epoch": 8.28, "learning_rate": 9.411050394656952e-06, "loss": 0.526, "step": 1520 }, { "epoch": 8.34, "learning_rate": 9.107468123861566e-06, "loss": 0.5038, "step": 1530 }, { "epoch": 8.39, "learning_rate": 8.803885853066181e-06, "loss": 0.5109, "step": 1540 }, { "epoch": 8.45, "learning_rate": 8.500303582270796e-06, "loss": 0.4941, "step": 1550 }, { "epoch": 8.5, "learning_rate": 8.196721311475409e-06, "loss": 0.5616, "step": 1560 }, { "epoch": 8.56, "learning_rate": 7.893139040680023e-06, "loss": 0.5514, "step": 1570 }, { "epoch": 8.61, "learning_rate": 7.589556769884638e-06, "loss": 0.5203, "step": 1580 }, { "epoch": 8.66, "learning_rate": 7.2859744990892545e-06, "loss": 0.5351, "step": 1590 }, { "epoch": 8.72, "learning_rate": 6.982392228293868e-06, "loss": 0.5334, "step": 1600 }, { "epoch": 8.77, "learning_rate": 6.678809957498483e-06, "loss": 0.5162, "step": 1610 }, { "epoch": 8.83, "learning_rate": 6.375227686703098e-06, "loss": 0.5482, "step": 1620 }, { "epoch": 8.88, "learning_rate": 6.0716454159077115e-06, "loss": 0.5077, "step": 1630 }, { "epoch": 8.94, "learning_rate": 5.768063145112325e-06, "loss": 0.5392, "step": 1640 }, { "epoch": 8.99, "learning_rate": 5.46448087431694e-06, "loss": 0.5416, "step": 1650 }, { "epoch": 9.0, "eval_accuracy": 0.74530111238972, "eval_loss": 0.638206422328949, "eval_runtime": 94.5986, "eval_samples_per_second": 27.559, "eval_steps_per_second": 0.867, "step": 1651 }, { "epoch": 9.05, "learning_rate": 5.160898603521555e-06, "loss": 0.541, "step": 1660 }, { "epoch": 9.1, "learning_rate": 4.8573163327261686e-06, "loss": 0.5393, "step": 1670 }, { "epoch": 9.16, "learning_rate": 4.553734061930783e-06, "loss": 0.488, "step": 1680 }, { "epoch": 9.21, "learning_rate": 4.250151791135398e-06, "loss": 0.5104, "step": 1690 }, { "epoch": 9.26, "learning_rate": 3.946569520340012e-06, "loss": 0.5151, "step": 1700 }, { "epoch": 9.32, "learning_rate": 3.6429872495446273e-06, "loss": 0.5165, "step": 1710 }, { "epoch": 9.37, "learning_rate": 3.3394049787492415e-06, "loss": 0.5116, "step": 1720 }, { "epoch": 9.43, "learning_rate": 3.0358227079538558e-06, "loss": 0.5174, "step": 1730 }, { "epoch": 9.48, "learning_rate": 2.73224043715847e-06, "loss": 0.5321, "step": 1740 }, { "epoch": 9.54, "learning_rate": 2.4286581663630843e-06, "loss": 0.4799, "step": 1750 }, { "epoch": 9.59, "learning_rate": 2.125075895567699e-06, "loss": 0.4766, "step": 1760 }, { "epoch": 9.65, "learning_rate": 1.8214936247723136e-06, "loss": 0.5425, "step": 1770 }, { "epoch": 9.7, "learning_rate": 1.5179113539769279e-06, "loss": 0.4936, "step": 1780 }, { "epoch": 9.75, "learning_rate": 1.2143290831815421e-06, "loss": 0.5408, "step": 1790 }, { "epoch": 9.81, "learning_rate": 9.107468123861568e-07, "loss": 0.5223, "step": 1800 }, { "epoch": 9.86, "learning_rate": 6.071645415907711e-07, "loss": 0.5074, "step": 1810 }, { "epoch": 9.92, "learning_rate": 3.0358227079538554e-07, "loss": 0.5151, "step": 1820 }, { "epoch": 9.97, "learning_rate": 0.0, "loss": 0.5062, "step": 1830 }, { "epoch": 9.97, "eval_accuracy": 0.7460682777138473, "eval_loss": 0.6395400166511536, "eval_runtime": 94.039, "eval_samples_per_second": 27.723, "eval_steps_per_second": 0.872, "step": 1830 }, { "epoch": 9.97, "step": 1830, "total_flos": 1.8133380652001642e+19, "train_loss": 0.6611971782204883, "train_runtime": 13354.5549, "train_samples_per_second": 17.568, "train_steps_per_second": 0.137 } ], "logging_steps": 10, "max_steps": 1830, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "total_flos": 1.8133380652001642e+19, "train_batch_size": 32, "trial_name": null, "trial_params": null }