{ "best_metric": null, "best_model_checkpoint": null, "epoch": 10.0, "global_step": 5630, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.18, "learning_rate": 6.937275985663082e-06, "loss": 0.6362, "step": 100 }, { "epoch": 0.18, "eval_accuracy": 0.7197197079658508, "eval_loss": 0.5481122136116028, "eval_runtime": 6.2072, "eval_samples_per_second": 160.941, "eval_steps_per_second": 10.149, "step": 100 }, { "epoch": 0.36, "learning_rate": 6.811827956989247e-06, "loss": 0.4264, "step": 200 }, { "epoch": 0.36, "eval_accuracy": 0.8008008003234863, "eval_loss": 0.4550396203994751, "eval_runtime": 6.2195, "eval_samples_per_second": 160.623, "eval_steps_per_second": 10.129, "step": 200 }, { "epoch": 0.53, "learning_rate": 6.6863799283154114e-06, "loss": 0.4174, "step": 300 }, { "epoch": 0.53, "eval_accuracy": 0.7867867946624756, "eval_loss": 0.452409952878952, "eval_runtime": 6.2183, "eval_samples_per_second": 160.655, "eval_steps_per_second": 10.131, "step": 300 }, { "epoch": 0.71, "learning_rate": 6.560931899641577e-06, "loss": 0.4197, "step": 400 }, { "epoch": 0.71, "eval_accuracy": 0.7917917966842651, "eval_loss": 0.4586125910282135, "eval_runtime": 6.2441, "eval_samples_per_second": 159.991, "eval_steps_per_second": 10.09, "step": 400 }, { "epoch": 0.89, "learning_rate": 6.435483870967742e-06, "loss": 0.3819, "step": 500 }, { "epoch": 0.89, "eval_accuracy": 0.8078078031539917, "eval_loss": 0.4367608428001404, "eval_runtime": 6.2213, "eval_samples_per_second": 160.577, "eval_steps_per_second": 10.126, "step": 500 }, { "epoch": 1.07, "learning_rate": 6.310035842293907e-06, "loss": 0.3558, "step": 600 }, { "epoch": 1.07, "eval_accuracy": 0.8068068027496338, "eval_loss": 0.4524727463722229, "eval_runtime": 6.2342, "eval_samples_per_second": 160.246, "eval_steps_per_second": 10.106, "step": 600 }, { "epoch": 1.24, "learning_rate": 6.184587813620071e-06, "loss": 0.2982, "step": 700 }, { "epoch": 1.24, "eval_accuracy": 0.792792797088623, "eval_loss": 0.49992287158966064, "eval_runtime": 6.206, "eval_samples_per_second": 160.973, "eval_steps_per_second": 10.151, "step": 700 }, { "epoch": 1.42, "learning_rate": 6.059139784946236e-06, "loss": 0.2885, "step": 800 }, { "epoch": 1.42, "eval_accuracy": 0.8108108043670654, "eval_loss": 0.5129059553146362, "eval_runtime": 6.2199, "eval_samples_per_second": 160.613, "eval_steps_per_second": 10.129, "step": 800 }, { "epoch": 1.6, "learning_rate": 5.933691756272401e-06, "loss": 0.253, "step": 900 }, { "epoch": 1.6, "eval_accuracy": 0.8208208084106445, "eval_loss": 0.5872611403465271, "eval_runtime": 6.2332, "eval_samples_per_second": 160.27, "eval_steps_per_second": 10.107, "step": 900 }, { "epoch": 1.78, "learning_rate": 5.8082437275985665e-06, "loss": 0.3354, "step": 1000 }, { "epoch": 1.78, "eval_accuracy": 0.8178178071975708, "eval_loss": 0.4244420826435089, "eval_runtime": 6.2275, "eval_samples_per_second": 160.417, "eval_steps_per_second": 10.116, "step": 1000 }, { "epoch": 1.95, "learning_rate": 5.682795698924731e-06, "loss": 0.3083, "step": 1100 }, { "epoch": 1.95, "eval_accuracy": 0.8058058023452759, "eval_loss": 0.4852960705757141, "eval_runtime": 6.2193, "eval_samples_per_second": 160.63, "eval_steps_per_second": 10.13, "step": 1100 }, { "epoch": 2.13, "learning_rate": 5.557347670250896e-06, "loss": 0.2301, "step": 1200 }, { "epoch": 2.13, "eval_accuracy": 0.8018018007278442, "eval_loss": 0.7208853960037231, "eval_runtime": 6.2021, "eval_samples_per_second": 161.075, "eval_steps_per_second": 10.158, "step": 1200 }, { "epoch": 2.31, "learning_rate": 5.431899641577061e-06, "loss": 0.2167, "step": 1300 }, { "epoch": 2.31, "eval_accuracy": 0.7777777910232544, "eval_loss": 0.8089737892150879, "eval_runtime": 6.2037, "eval_samples_per_second": 161.034, "eval_steps_per_second": 10.155, "step": 1300 }, { "epoch": 2.49, "learning_rate": 5.306451612903225e-06, "loss": 0.1863, "step": 1400 }, { "epoch": 2.49, "eval_accuracy": 0.8038038015365601, "eval_loss": 0.6812323927879333, "eval_runtime": 6.2398, "eval_samples_per_second": 160.102, "eval_steps_per_second": 10.097, "step": 1400 }, { "epoch": 2.66, "learning_rate": 5.181003584229391e-06, "loss": 0.2181, "step": 1500 }, { "epoch": 2.66, "eval_accuracy": 0.8138138055801392, "eval_loss": 0.6958026885986328, "eval_runtime": 6.2122, "eval_samples_per_second": 160.812, "eval_steps_per_second": 10.141, "step": 1500 }, { "epoch": 2.84, "learning_rate": 5.0555555555555555e-06, "loss": 0.2159, "step": 1600 }, { "epoch": 2.84, "eval_accuracy": 0.8118118047714233, "eval_loss": 0.6314735412597656, "eval_runtime": 6.2306, "eval_samples_per_second": 160.337, "eval_steps_per_second": 10.111, "step": 1600 }, { "epoch": 3.02, "learning_rate": 4.930107526881721e-06, "loss": 0.1828, "step": 1700 }, { "epoch": 3.02, "eval_accuracy": 0.8138138055801392, "eval_loss": 0.7173236608505249, "eval_runtime": 6.2107, "eval_samples_per_second": 160.851, "eval_steps_per_second": 10.144, "step": 1700 }, { "epoch": 3.2, "learning_rate": 4.804659498207885e-06, "loss": 0.1287, "step": 1800 }, { "epoch": 3.2, "eval_accuracy": 0.8018018007278442, "eval_loss": 0.9080932140350342, "eval_runtime": 6.2027, "eval_samples_per_second": 161.06, "eval_steps_per_second": 10.157, "step": 1800 }, { "epoch": 3.37, "learning_rate": 4.67921146953405e-06, "loss": 0.1711, "step": 1900 }, { "epoch": 3.37, "eval_accuracy": 0.8068068027496338, "eval_loss": 0.8858422040939331, "eval_runtime": 6.2188, "eval_samples_per_second": 160.641, "eval_steps_per_second": 10.131, "step": 1900 }, { "epoch": 3.55, "learning_rate": 4.553763440860215e-06, "loss": 0.1598, "step": 2000 }, { "epoch": 3.55, "eval_accuracy": 0.8028028011322021, "eval_loss": 0.7877860069274902, "eval_runtime": 6.2062, "eval_samples_per_second": 160.967, "eval_steps_per_second": 10.151, "step": 2000 }, { "epoch": 3.73, "learning_rate": 4.42831541218638e-06, "loss": 0.1467, "step": 2100 }, { "epoch": 3.73, "eval_accuracy": 0.7947947978973389, "eval_loss": 0.900332510471344, "eval_runtime": 6.2358, "eval_samples_per_second": 160.203, "eval_steps_per_second": 10.103, "step": 2100 }, { "epoch": 3.91, "learning_rate": 4.302867383512545e-06, "loss": 0.127, "step": 2200 }, { "epoch": 3.91, "eval_accuracy": 0.804804801940918, "eval_loss": 0.9066368341445923, "eval_runtime": 6.2129, "eval_samples_per_second": 160.795, "eval_steps_per_second": 10.14, "step": 2200 }, { "epoch": 4.09, "learning_rate": 4.17741935483871e-06, "loss": 0.1134, "step": 2300 }, { "epoch": 4.09, "eval_accuracy": 0.8118118047714233, "eval_loss": 0.9645766615867615, "eval_runtime": 6.2157, "eval_samples_per_second": 160.721, "eval_steps_per_second": 10.136, "step": 2300 }, { "epoch": 4.26, "learning_rate": 4.051971326164874e-06, "loss": 0.1017, "step": 2400 }, { "epoch": 4.26, "eval_accuracy": 0.804804801940918, "eval_loss": 0.9778422713279724, "eval_runtime": 6.2303, "eval_samples_per_second": 160.346, "eval_steps_per_second": 10.112, "step": 2400 }, { "epoch": 4.44, "learning_rate": 3.926523297491039e-06, "loss": 0.085, "step": 2500 }, { "epoch": 4.44, "eval_accuracy": 0.8088088035583496, "eval_loss": 1.0528582334518433, "eval_runtime": 6.238, "eval_samples_per_second": 160.149, "eval_steps_per_second": 10.099, "step": 2500 }, { "epoch": 4.62, "learning_rate": 3.801075268817204e-06, "loss": 0.0996, "step": 2600 }, { "epoch": 4.62, "eval_accuracy": 0.8058058023452759, "eval_loss": 1.0082268714904785, "eval_runtime": 6.2065, "eval_samples_per_second": 160.961, "eval_steps_per_second": 10.151, "step": 2600 }, { "epoch": 4.8, "learning_rate": 3.6756272401433694e-06, "loss": 0.1054, "step": 2700 }, { "epoch": 4.8, "eval_accuracy": 0.8108108043670654, "eval_loss": 0.9697705507278442, "eval_runtime": 6.2348, "eval_samples_per_second": 160.231, "eval_steps_per_second": 10.105, "step": 2700 }, { "epoch": 4.97, "learning_rate": 3.5501792114695336e-06, "loss": 0.1375, "step": 2800 }, { "epoch": 4.97, "eval_accuracy": 0.804804801940918, "eval_loss": 0.9333746433258057, "eval_runtime": 6.2109, "eval_samples_per_second": 160.846, "eval_steps_per_second": 10.143, "step": 2800 }, { "epoch": 5.15, "learning_rate": 3.4247311827956988e-06, "loss": 0.0487, "step": 2900 }, { "epoch": 5.15, "eval_accuracy": 0.8108108043670654, "eval_loss": 1.1273365020751953, "eval_runtime": 6.2065, "eval_samples_per_second": 160.961, "eval_steps_per_second": 10.151, "step": 2900 }, { "epoch": 5.33, "learning_rate": 3.299283154121864e-06, "loss": 0.0611, "step": 3000 }, { "epoch": 5.33, "eval_accuracy": 0.8058058023452759, "eval_loss": 1.1528337001800537, "eval_runtime": 6.2119, "eval_samples_per_second": 160.821, "eval_steps_per_second": 10.142, "step": 3000 }, { "epoch": 5.51, "learning_rate": 3.1738351254480286e-06, "loss": 0.0668, "step": 3100 }, { "epoch": 5.51, "eval_accuracy": 0.8118118047714233, "eval_loss": 1.0147671699523926, "eval_runtime": 6.2218, "eval_samples_per_second": 160.564, "eval_steps_per_second": 10.126, "step": 3100 }, { "epoch": 5.68, "learning_rate": 3.0483870967741937e-06, "loss": 0.0582, "step": 3200 }, { "epoch": 5.68, "eval_accuracy": 0.8108108043670654, "eval_loss": 1.1332666873931885, "eval_runtime": 6.2186, "eval_samples_per_second": 160.648, "eval_steps_per_second": 10.131, "step": 3200 }, { "epoch": 5.86, "learning_rate": 2.9229390681003584e-06, "loss": 0.0869, "step": 3300 }, { "epoch": 5.86, "eval_accuracy": 0.8088088035583496, "eval_loss": 1.060727596282959, "eval_runtime": 6.1932, "eval_samples_per_second": 161.305, "eval_steps_per_second": 10.172, "step": 3300 }, { "epoch": 6.04, "learning_rate": 2.797491039426523e-06, "loss": 0.0623, "step": 3400 }, { "epoch": 6.04, "eval_accuracy": 0.8068068027496338, "eval_loss": 1.1880476474761963, "eval_runtime": 6.2192, "eval_samples_per_second": 160.631, "eval_steps_per_second": 10.13, "step": 3400 }, { "epoch": 6.22, "learning_rate": 2.6720430107526883e-06, "loss": 0.0317, "step": 3500 }, { "epoch": 6.22, "eval_accuracy": 0.8008008003234863, "eval_loss": 1.2836244106292725, "eval_runtime": 6.2079, "eval_samples_per_second": 160.925, "eval_steps_per_second": 10.148, "step": 3500 }, { "epoch": 6.39, "learning_rate": 2.546594982078853e-06, "loss": 0.0546, "step": 3600 }, { "epoch": 6.39, "eval_accuracy": 0.8058058023452759, "eval_loss": 1.2147704362869263, "eval_runtime": 6.2243, "eval_samples_per_second": 160.501, "eval_steps_per_second": 10.122, "step": 3600 }, { "epoch": 6.57, "learning_rate": 2.4211469534050177e-06, "loss": 0.0486, "step": 3700 }, { "epoch": 6.57, "eval_accuracy": 0.8008008003234863, "eval_loss": 1.334807276725769, "eval_runtime": 6.1963, "eval_samples_per_second": 161.225, "eval_steps_per_second": 10.167, "step": 3700 }, { "epoch": 6.75, "learning_rate": 2.2956989247311828e-06, "loss": 0.0332, "step": 3800 }, { "epoch": 6.75, "eval_accuracy": 0.8018018007278442, "eval_loss": 1.3734461069107056, "eval_runtime": 6.3321, "eval_samples_per_second": 157.768, "eval_steps_per_second": 9.949, "step": 3800 }, { "epoch": 6.93, "learning_rate": 2.1702508960573475e-06, "loss": 0.051, "step": 3900 }, { "epoch": 6.93, "eval_accuracy": 0.7977977991104126, "eval_loss": 1.2966439723968506, "eval_runtime": 6.2073, "eval_samples_per_second": 160.94, "eval_steps_per_second": 10.149, "step": 3900 }, { "epoch": 7.1, "learning_rate": 2.044802867383512e-06, "loss": 0.0217, "step": 4000 }, { "epoch": 7.1, "eval_accuracy": 0.804804801940918, "eval_loss": 1.385273814201355, "eval_runtime": 6.2117, "eval_samples_per_second": 160.826, "eval_steps_per_second": 10.142, "step": 4000 }, { "epoch": 7.28, "learning_rate": 1.9193548387096773e-06, "loss": 0.0109, "step": 4100 }, { "epoch": 7.28, "eval_accuracy": 0.8068068027496338, "eval_loss": 1.480326533317566, "eval_runtime": 6.2106, "eval_samples_per_second": 160.854, "eval_steps_per_second": 10.144, "step": 4100 }, { "epoch": 7.46, "learning_rate": 1.793906810035842e-06, "loss": 0.0345, "step": 4200 }, { "epoch": 7.46, "eval_accuracy": 0.7997997999191284, "eval_loss": 1.4906260967254639, "eval_runtime": 6.2002, "eval_samples_per_second": 161.124, "eval_steps_per_second": 10.161, "step": 4200 }, { "epoch": 7.64, "learning_rate": 1.6684587813620071e-06, "loss": 0.0365, "step": 4300 }, { "epoch": 7.64, "eval_accuracy": 0.8028028011322021, "eval_loss": 1.4347106218338013, "eval_runtime": 6.2133, "eval_samples_per_second": 160.783, "eval_steps_per_second": 10.139, "step": 4300 }, { "epoch": 7.82, "learning_rate": 1.543010752688172e-06, "loss": 0.0265, "step": 4400 }, { "epoch": 7.82, "eval_accuracy": 0.8128128051757812, "eval_loss": 1.3976863622665405, "eval_runtime": 6.224, "eval_samples_per_second": 160.508, "eval_steps_per_second": 10.122, "step": 4400 }, { "epoch": 7.99, "learning_rate": 1.417562724014337e-06, "loss": 0.0257, "step": 4500 }, { "epoch": 7.99, "eval_accuracy": 0.8108108043670654, "eval_loss": 1.370467185974121, "eval_runtime": 6.2313, "eval_samples_per_second": 160.321, "eval_steps_per_second": 10.11, "step": 4500 }, { "epoch": 8.17, "learning_rate": 1.2921146953405017e-06, "loss": 0.0036, "step": 4600 }, { "epoch": 8.17, "eval_accuracy": 0.8168168067932129, "eval_loss": 1.4352822303771973, "eval_runtime": 6.2072, "eval_samples_per_second": 160.943, "eval_steps_per_second": 10.15, "step": 4600 }, { "epoch": 8.35, "learning_rate": 1.1666666666666666e-06, "loss": 0.0269, "step": 4700 }, { "epoch": 8.35, "eval_accuracy": 0.8068068027496338, "eval_loss": 1.4826140403747559, "eval_runtime": 6.2178, "eval_samples_per_second": 160.669, "eval_steps_per_second": 10.132, "step": 4700 }, { "epoch": 8.53, "learning_rate": 1.0412186379928315e-06, "loss": 0.0231, "step": 4800 }, { "epoch": 8.53, "eval_accuracy": 0.8118118047714233, "eval_loss": 1.4810999631881714, "eval_runtime": 6.3061, "eval_samples_per_second": 158.417, "eval_steps_per_second": 9.99, "step": 4800 }, { "epoch": 8.7, "learning_rate": 9.157706093189965e-07, "loss": 0.0204, "step": 4900 }, { "epoch": 8.7, "eval_accuracy": 0.8028028011322021, "eval_loss": 1.5245323181152344, "eval_runtime": 6.2057, "eval_samples_per_second": 160.982, "eval_steps_per_second": 10.152, "step": 4900 }, { "epoch": 8.88, "learning_rate": 7.903225806451612e-07, "loss": 0.0263, "step": 5000 }, { "epoch": 8.88, "eval_accuracy": 0.8018018007278442, "eval_loss": 1.5123308897018433, "eval_runtime": 6.2053, "eval_samples_per_second": 160.991, "eval_steps_per_second": 10.153, "step": 5000 }, { "epoch": 9.06, "learning_rate": 6.648745519713261e-07, "loss": 0.0138, "step": 5100 }, { "epoch": 9.06, "eval_accuracy": 0.8028028011322021, "eval_loss": 1.51128089427948, "eval_runtime": 6.2898, "eval_samples_per_second": 158.83, "eval_steps_per_second": 10.016, "step": 5100 }, { "epoch": 9.24, "learning_rate": 5.39426523297491e-07, "loss": 0.0089, "step": 5200 }, { "epoch": 9.24, "eval_accuracy": 0.7977977991104126, "eval_loss": 1.5846397876739502, "eval_runtime": 6.2124, "eval_samples_per_second": 160.808, "eval_steps_per_second": 10.141, "step": 5200 }, { "epoch": 9.41, "learning_rate": 4.1397849462365595e-07, "loss": 0.029, "step": 5300 }, { "epoch": 9.41, "eval_accuracy": 0.8008008003234863, "eval_loss": 1.5361814498901367, "eval_runtime": 6.2541, "eval_samples_per_second": 159.736, "eval_steps_per_second": 10.073, "step": 5300 }, { "epoch": 9.59, "learning_rate": 2.8853046594982076e-07, "loss": 0.0058, "step": 5400 }, { "epoch": 9.59, "eval_accuracy": 0.8018018007278442, "eval_loss": 1.5759379863739014, "eval_runtime": 6.221, "eval_samples_per_second": 160.585, "eval_steps_per_second": 10.127, "step": 5400 }, { "epoch": 9.77, "learning_rate": 1.6308243727598568e-07, "loss": 0.0084, "step": 5500 }, { "epoch": 9.77, "eval_accuracy": 0.8018018007278442, "eval_loss": 1.5678976774215698, "eval_runtime": 6.2009, "eval_samples_per_second": 161.105, "eval_steps_per_second": 10.16, "step": 5500 }, { "epoch": 9.95, "learning_rate": 3.763440860215054e-08, "loss": 0.0065, "step": 5600 }, { "epoch": 9.95, "eval_accuracy": 0.8028028011322021, "eval_loss": 1.568334937095642, "eval_runtime": 6.2439, "eval_samples_per_second": 159.996, "eval_steps_per_second": 10.09, "step": 5600 }, { "epoch": 10.0, "step": 5630, "total_flos": 4.193719446528e+16, "train_loss": 0.13640729715885533, "train_runtime": 2182.3127, "train_samples_per_second": 41.241, "train_steps_per_second": 2.58 } ], "max_steps": 5630, "num_train_epochs": 10, "total_flos": 4.193719446528e+16, "trial_name": null, "trial_params": null }