{ "best_metric": 0.8164658634538152, "best_model_checkpoint": "./results/checkpoint-62000", "epoch": 2.526075619295958, "global_step": 62000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 2.5e-05, "loss": 1.0225, "step": 250 }, { "epoch": 0.02, "learning_rate": 5e-05, "loss": 0.8271, "step": 500 }, { "epoch": 0.02, "eval_accuracy": 0.661847389558233, "eval_loss": 0.8168992400169373, "eval_runtime": 9.3368, "eval_samples_per_second": 266.686, "step": 500 }, { "epoch": 0.03, "learning_rate": 4.98290761909971e-05, "loss": 0.7821, "step": 750 }, { "epoch": 0.04, "learning_rate": 4.965815238199421e-05, "loss": 0.7463, "step": 1000 }, { "epoch": 0.04, "eval_accuracy": 0.708433734939759, "eval_loss": 0.7267000675201416, "eval_runtime": 9.0373, "eval_samples_per_second": 275.525, "step": 1000 }, { "epoch": 0.05, "learning_rate": 4.9487228572991307e-05, "loss": 0.7266, "step": 1250 }, { "epoch": 0.06, "learning_rate": 4.9316304763988406e-05, "loss": 0.7156, "step": 1500 }, { "epoch": 0.06, "eval_accuracy": 0.7180722891566265, "eval_loss": 0.6909603476524353, "eval_runtime": 9.3259, "eval_samples_per_second": 266.998, "step": 1500 }, { "epoch": 0.07, "learning_rate": 4.914538095498551e-05, "loss": 0.7193, "step": 1750 }, { "epoch": 0.08, "learning_rate": 4.897445714598261e-05, "loss": 0.7073, "step": 2000 }, { "epoch": 0.08, "eval_accuracy": 0.7176706827309237, "eval_loss": 0.6720558404922485, "eval_runtime": 9.0667, "eval_samples_per_second": 274.632, "step": 2000 }, { "epoch": 0.09, "learning_rate": 4.880353333697971e-05, "loss": 0.6817, "step": 2250 }, { "epoch": 0.1, "learning_rate": 4.8632609527976816e-05, "loss": 0.6825, "step": 2500 }, { "epoch": 0.1, "eval_accuracy": 0.7373493975903614, "eval_loss": 0.6640393137931824, "eval_runtime": 9.298, "eval_samples_per_second": 267.799, "step": 2500 }, { "epoch": 0.11, "learning_rate": 4.8461685718973915e-05, "loss": 0.6759, "step": 2750 }, { "epoch": 0.12, "learning_rate": 4.8290761909971014e-05, "loss": 0.6886, "step": 3000 }, { "epoch": 0.12, "eval_accuracy": 0.7578313253012048, "eval_loss": 0.6005652546882629, "eval_runtime": 9.0815, "eval_samples_per_second": 274.184, "step": 3000 }, { "epoch": 0.13, "learning_rate": 4.811983810096811e-05, "loss": 0.6971, "step": 3250 }, { "epoch": 0.14, "learning_rate": 4.794891429196521e-05, "loss": 0.6642, "step": 3500 }, { "epoch": 0.14, "eval_accuracy": 0.7582329317269076, "eval_loss": 0.5993230938911438, "eval_runtime": 8.992, "eval_samples_per_second": 276.912, "step": 3500 }, { "epoch": 0.15, "learning_rate": 4.777799048296232e-05, "loss": 0.664, "step": 3750 }, { "epoch": 0.16, "learning_rate": 4.760706667395942e-05, "loss": 0.671, "step": 4000 }, { "epoch": 0.16, "eval_accuracy": 0.7610441767068273, "eval_loss": 0.5954618453979492, "eval_runtime": 8.999, "eval_samples_per_second": 276.698, "step": 4000 }, { "epoch": 0.17, "learning_rate": 4.7436142864956516e-05, "loss": 0.6421, "step": 4250 }, { "epoch": 0.18, "learning_rate": 4.726521905595362e-05, "loss": 0.6768, "step": 4500 }, { "epoch": 0.18, "eval_accuracy": 0.7200803212851405, "eval_loss": 0.6515903472900391, "eval_runtime": 9.1435, "eval_samples_per_second": 272.325, "step": 4500 }, { "epoch": 0.19, "learning_rate": 4.709429524695072e-05, "loss": 0.6606, "step": 4750 }, { "epoch": 0.2, "learning_rate": 4.692337143794782e-05, "loss": 0.6511, "step": 5000 }, { "epoch": 0.2, "eval_accuracy": 0.7518072289156627, "eval_loss": 0.6293097734451294, "eval_runtime": 9.1699, "eval_samples_per_second": 271.54, "step": 5000 }, { "epoch": 0.21, "learning_rate": 4.6752447628944926e-05, "loss": 0.651, "step": 5250 }, { "epoch": 0.22, "learning_rate": 4.6581523819942026e-05, "loss": 0.6729, "step": 5500 }, { "epoch": 0.22, "eval_accuracy": 0.748995983935743, "eval_loss": 0.6285788416862488, "eval_runtime": 9.1397, "eval_samples_per_second": 272.439, "step": 5500 }, { "epoch": 0.23, "learning_rate": 4.6410600010939125e-05, "loss": 0.6573, "step": 5750 }, { "epoch": 0.24, "learning_rate": 4.623967620193623e-05, "loss": 0.6335, "step": 6000 }, { "epoch": 0.24, "eval_accuracy": 0.7485943775100402, "eval_loss": 0.6102361679077148, "eval_runtime": 9.2187, "eval_samples_per_second": 270.105, "step": 6000 }, { "epoch": 0.25, "learning_rate": 4.606875239293333e-05, "loss": 0.6171, "step": 6250 }, { "epoch": 0.26, "learning_rate": 4.589782858393043e-05, "loss": 0.6445, "step": 6500 }, { "epoch": 0.26, "eval_accuracy": 0.7578313253012048, "eval_loss": 0.5952242612838745, "eval_runtime": 9.1091, "eval_samples_per_second": 273.352, "step": 6500 }, { "epoch": 0.28, "learning_rate": 4.5726904774927535e-05, "loss": 0.6371, "step": 6750 }, { "epoch": 0.29, "learning_rate": 4.5555980965924634e-05, "loss": 0.6285, "step": 7000 }, { "epoch": 0.29, "eval_accuracy": 0.7550200803212851, "eval_loss": 0.5844168663024902, "eval_runtime": 9.3454, "eval_samples_per_second": 266.442, "step": 7000 }, { "epoch": 0.3, "learning_rate": 4.538505715692173e-05, "loss": 0.645, "step": 7250 }, { "epoch": 0.31, "learning_rate": 4.521413334791884e-05, "loss": 0.6614, "step": 7500 }, { "epoch": 0.31, "eval_accuracy": 0.7706827309236948, "eval_loss": 0.5955267548561096, "eval_runtime": 9.1185, "eval_samples_per_second": 273.072, "step": 7500 }, { "epoch": 0.32, "learning_rate": 4.504320953891594e-05, "loss": 0.6042, "step": 7750 }, { "epoch": 0.33, "learning_rate": 4.487228572991304e-05, "loss": 0.6497, "step": 8000 }, { "epoch": 0.33, "eval_accuracy": 0.7803212851405622, "eval_loss": 0.5732316970825195, "eval_runtime": 9.5547, "eval_samples_per_second": 260.605, "step": 8000 }, { "epoch": 0.34, "learning_rate": 4.470136192091014e-05, "loss": 0.6394, "step": 8250 }, { "epoch": 0.35, "learning_rate": 4.453043811190724e-05, "loss": 0.6109, "step": 8500 }, { "epoch": 0.35, "eval_accuracy": 0.7594377510040161, "eval_loss": 0.5940960049629211, "eval_runtime": 9.1183, "eval_samples_per_second": 273.077, "step": 8500 }, { "epoch": 0.36, "learning_rate": 4.4359514302904335e-05, "loss": 0.6324, "step": 8750 }, { "epoch": 0.37, "learning_rate": 4.418859049390144e-05, "loss": 0.6266, "step": 9000 }, { "epoch": 0.37, "eval_accuracy": 0.7730923694779116, "eval_loss": 0.5806799530982971, "eval_runtime": 9.6937, "eval_samples_per_second": 256.869, "step": 9000 }, { "epoch": 0.38, "learning_rate": 4.401766668489854e-05, "loss": 0.622, "step": 9250 }, { "epoch": 0.39, "learning_rate": 4.384674287589564e-05, "loss": 0.629, "step": 9500 }, { "epoch": 0.39, "eval_accuracy": 0.770281124497992, "eval_loss": 0.5808362364768982, "eval_runtime": 9.089, "eval_samples_per_second": 273.959, "step": 9500 }, { "epoch": 0.4, "learning_rate": 4.3675819066892745e-05, "loss": 0.6107, "step": 9750 }, { "epoch": 0.41, "learning_rate": 4.3504895257889844e-05, "loss": 0.6347, "step": 10000 }, { "epoch": 0.41, "eval_accuracy": 0.7650602409638554, "eval_loss": 0.5932815074920654, "eval_runtime": 9.5702, "eval_samples_per_second": 260.184, "step": 10000 }, { "epoch": 0.42, "learning_rate": 4.333397144888694e-05, "loss": 0.6441, "step": 10250 }, { "epoch": 0.43, "learning_rate": 4.316304763988405e-05, "loss": 0.6181, "step": 10500 }, { "epoch": 0.43, "eval_accuracy": 0.7815261044176707, "eval_loss": 0.5563480257987976, "eval_runtime": 9.0503, "eval_samples_per_second": 275.128, "step": 10500 }, { "epoch": 0.44, "learning_rate": 4.299212383088115e-05, "loss": 0.6133, "step": 10750 }, { "epoch": 0.45, "learning_rate": 4.282120002187825e-05, "loss": 0.6173, "step": 11000 }, { "epoch": 0.45, "eval_accuracy": 0.7775100401606426, "eval_loss": 0.5630077719688416, "eval_runtime": 9.4887, "eval_samples_per_second": 262.417, "step": 11000 }, { "epoch": 0.46, "learning_rate": 4.265027621287535e-05, "loss": 0.5979, "step": 11250 }, { "epoch": 0.47, "learning_rate": 4.247935240387245e-05, "loss": 0.6114, "step": 11500 }, { "epoch": 0.47, "eval_accuracy": 0.7666666666666667, "eval_loss": 0.5745715498924255, "eval_runtime": 9.0458, "eval_samples_per_second": 275.267, "step": 11500 }, { "epoch": 0.48, "learning_rate": 4.230842859486955e-05, "loss": 0.6168, "step": 11750 }, { "epoch": 0.49, "learning_rate": 4.213750478586666e-05, "loss": 0.602, "step": 12000 }, { "epoch": 0.49, "eval_accuracy": 0.7614457831325301, "eval_loss": 0.5948830246925354, "eval_runtime": 9.3904, "eval_samples_per_second": 265.165, "step": 12000 }, { "epoch": 0.5, "learning_rate": 4.1966580976863756e-05, "loss": 0.6268, "step": 12250 }, { "epoch": 0.51, "learning_rate": 4.1795657167860855e-05, "loss": 0.6041, "step": 12500 }, { "epoch": 0.51, "eval_accuracy": 0.7755020080321285, "eval_loss": 0.5688386559486389, "eval_runtime": 9.0467, "eval_samples_per_second": 275.238, "step": 12500 }, { "epoch": 0.52, "learning_rate": 4.162473335885796e-05, "loss": 0.6284, "step": 12750 }, { "epoch": 0.53, "learning_rate": 4.145380954985506e-05, "loss": 0.6193, "step": 13000 }, { "epoch": 0.53, "eval_accuracy": 0.7590361445783133, "eval_loss": 0.583430290222168, "eval_runtime": 9.3111, "eval_samples_per_second": 267.423, "step": 13000 }, { "epoch": 0.54, "learning_rate": 4.128288574085216e-05, "loss": 0.5877, "step": 13250 }, { "epoch": 0.55, "learning_rate": 4.1111961931849265e-05, "loss": 0.5842, "step": 13500 }, { "epoch": 0.55, "eval_accuracy": 0.7614457831325301, "eval_loss": 0.5992956161499023, "eval_runtime": 9.0426, "eval_samples_per_second": 275.365, "step": 13500 }, { "epoch": 0.56, "learning_rate": 4.0941038122846365e-05, "loss": 0.592, "step": 13750 }, { "epoch": 0.57, "learning_rate": 4.0770114313843464e-05, "loss": 0.6068, "step": 14000 }, { "epoch": 0.57, "eval_accuracy": 0.7666666666666667, "eval_loss": 0.582902193069458, "eval_runtime": 9.2607, "eval_samples_per_second": 268.878, "step": 14000 }, { "epoch": 0.58, "learning_rate": 4.059919050484056e-05, "loss": 0.6125, "step": 14250 }, { "epoch": 0.59, "learning_rate": 4.042826669583766e-05, "loss": 0.6012, "step": 14500 }, { "epoch": 0.59, "eval_accuracy": 0.7799196787148595, "eval_loss": 0.565805196762085, "eval_runtime": 9.0222, "eval_samples_per_second": 275.985, "step": 14500 }, { "epoch": 0.6, "learning_rate": 4.025734288683477e-05, "loss": 0.5845, "step": 14750 }, { "epoch": 0.61, "learning_rate": 4.008641907783187e-05, "loss": 0.6159, "step": 15000 }, { "epoch": 0.61, "eval_accuracy": 0.7538152610441767, "eval_loss": 0.6279062628746033, "eval_runtime": 9.2223, "eval_samples_per_second": 269.997, "step": 15000 }, { "epoch": 0.62, "learning_rate": 3.9915495268828966e-05, "loss": 0.5897, "step": 15250 }, { "epoch": 0.63, "learning_rate": 3.974457145982607e-05, "loss": 0.5996, "step": 15500 }, { "epoch": 0.63, "eval_accuracy": 0.7726907630522089, "eval_loss": 0.5615552067756653, "eval_runtime": 9.0848, "eval_samples_per_second": 274.085, "step": 15500 }, { "epoch": 0.64, "learning_rate": 3.957364765082317e-05, "loss": 0.5899, "step": 15750 }, { "epoch": 0.65, "learning_rate": 3.940272384182027e-05, "loss": 0.6014, "step": 16000 }, { "epoch": 0.65, "eval_accuracy": 0.7883534136546185, "eval_loss": 0.5467997193336487, "eval_runtime": 9.0085, "eval_samples_per_second": 276.404, "step": 16000 }, { "epoch": 0.66, "learning_rate": 3.9231800032817376e-05, "loss": 0.6057, "step": 16250 }, { "epoch": 0.67, "learning_rate": 3.9060876223814475e-05, "loss": 0.6118, "step": 16500 }, { "epoch": 0.67, "eval_accuracy": 0.7626506024096386, "eval_loss": 0.572376012802124, "eval_runtime": 8.9948, "eval_samples_per_second": 276.827, "step": 16500 }, { "epoch": 0.68, "learning_rate": 3.8889952414811574e-05, "loss": 0.6071, "step": 16750 }, { "epoch": 0.69, "learning_rate": 3.871902860580868e-05, "loss": 0.5986, "step": 17000 }, { "epoch": 0.69, "eval_accuracy": 0.7718875502008032, "eval_loss": 0.5707191228866577, "eval_runtime": 9.1769, "eval_samples_per_second": 271.333, "step": 17000 }, { "epoch": 0.7, "learning_rate": 3.854810479680578e-05, "loss": 0.5837, "step": 17250 }, { "epoch": 0.71, "learning_rate": 3.837718098780288e-05, "loss": 0.5946, "step": 17500 }, { "epoch": 0.71, "eval_accuracy": 0.7726907630522089, "eval_loss": 0.5725845098495483, "eval_runtime": 9.1347, "eval_samples_per_second": 272.587, "step": 17500 }, { "epoch": 0.72, "learning_rate": 3.8206257178799984e-05, "loss": 0.5858, "step": 17750 }, { "epoch": 0.73, "learning_rate": 3.8035333369797084e-05, "loss": 0.5768, "step": 18000 }, { "epoch": 0.73, "eval_accuracy": 0.763855421686747, "eval_loss": 0.5683776140213013, "eval_runtime": 9.114, "eval_samples_per_second": 273.207, "step": 18000 }, { "epoch": 0.74, "learning_rate": 3.786440956079418e-05, "loss": 0.6339, "step": 18250 }, { "epoch": 0.75, "learning_rate": 3.769348575179129e-05, "loss": 0.577, "step": 18500 }, { "epoch": 0.75, "eval_accuracy": 0.7803212851405622, "eval_loss": 0.5440804958343506, "eval_runtime": 9.2459, "eval_samples_per_second": 269.31, "step": 18500 }, { "epoch": 0.76, "learning_rate": 3.752256194278839e-05, "loss": 0.6009, "step": 18750 }, { "epoch": 0.77, "learning_rate": 3.735163813378549e-05, "loss": 0.5862, "step": 19000 }, { "epoch": 0.77, "eval_accuracy": 0.7863453815261044, "eval_loss": 0.5475257635116577, "eval_runtime": 9.1052, "eval_samples_per_second": 273.471, "step": 19000 }, { "epoch": 0.78, "learning_rate": 3.7180714324782586e-05, "loss": 0.591, "step": 19250 }, { "epoch": 0.79, "learning_rate": 3.7009790515779685e-05, "loss": 0.6009, "step": 19500 }, { "epoch": 0.79, "eval_accuracy": 0.7730923694779116, "eval_loss": 0.5383695960044861, "eval_runtime": 9.4316, "eval_samples_per_second": 264.007, "step": 19500 }, { "epoch": 0.8, "learning_rate": 3.6838866706776784e-05, "loss": 0.5783, "step": 19750 }, { "epoch": 0.81, "learning_rate": 3.666794289777389e-05, "loss": 0.5803, "step": 20000 }, { "epoch": 0.81, "eval_accuracy": 0.7799196787148595, "eval_loss": 0.5557690858840942, "eval_runtime": 9.0883, "eval_samples_per_second": 273.978, "step": 20000 }, { "epoch": 0.83, "learning_rate": 3.649701908877099e-05, "loss": 0.5826, "step": 20250 }, { "epoch": 0.84, "learning_rate": 3.632609527976809e-05, "loss": 0.5811, "step": 20500 }, { "epoch": 0.84, "eval_accuracy": 0.7650602409638554, "eval_loss": 0.5788130760192871, "eval_runtime": 9.68, "eval_samples_per_second": 257.231, "step": 20500 }, { "epoch": 0.85, "learning_rate": 3.6155171470765194e-05, "loss": 0.5937, "step": 20750 }, { "epoch": 0.86, "learning_rate": 3.598424766176229e-05, "loss": 0.597, "step": 21000 }, { "epoch": 0.86, "eval_accuracy": 0.791566265060241, "eval_loss": 0.528762698173523, "eval_runtime": 9.0827, "eval_samples_per_second": 274.147, "step": 21000 }, { "epoch": 0.87, "learning_rate": 3.581332385275939e-05, "loss": 0.5732, "step": 21250 }, { "epoch": 0.88, "learning_rate": 3.56424000437565e-05, "loss": 0.6004, "step": 21500 }, { "epoch": 0.88, "eval_accuracy": 0.7819277108433735, "eval_loss": 0.5339077711105347, "eval_runtime": 9.6053, "eval_samples_per_second": 259.233, "step": 21500 }, { "epoch": 0.89, "learning_rate": 3.54714762347536e-05, "loss": 0.5548, "step": 21750 }, { "epoch": 0.9, "learning_rate": 3.53005524257507e-05, "loss": 0.5718, "step": 22000 }, { "epoch": 0.9, "eval_accuracy": 0.7759036144578313, "eval_loss": 0.5448992848396301, "eval_runtime": 8.9793, "eval_samples_per_second": 277.305, "step": 22000 }, { "epoch": 0.91, "learning_rate": 3.51296286167478e-05, "loss": 0.5747, "step": 22250 }, { "epoch": 0.92, "learning_rate": 3.49587048077449e-05, "loss": 0.5922, "step": 22500 }, { "epoch": 0.92, "eval_accuracy": 0.7827309236947791, "eval_loss": 0.534243643283844, "eval_runtime": 9.0048, "eval_samples_per_second": 276.52, "step": 22500 }, { "epoch": 0.93, "learning_rate": 3.4787780998742e-05, "loss": 0.5665, "step": 22750 }, { "epoch": 0.94, "learning_rate": 3.461685718973911e-05, "loss": 0.5834, "step": 23000 }, { "epoch": 0.94, "eval_accuracy": 0.8012048192771084, "eval_loss": 0.517599880695343, "eval_runtime": 9.1146, "eval_samples_per_second": 273.189, "step": 23000 }, { "epoch": 0.95, "learning_rate": 3.4445933380736206e-05, "loss": 0.5759, "step": 23250 }, { "epoch": 0.96, "learning_rate": 3.4275009571733305e-05, "loss": 0.5791, "step": 23500 }, { "epoch": 0.96, "eval_accuracy": 0.7879518072289157, "eval_loss": 0.5282337665557861, "eval_runtime": 9.3392, "eval_samples_per_second": 266.619, "step": 23500 }, { "epoch": 0.97, "learning_rate": 3.410408576273041e-05, "loss": 0.5892, "step": 23750 }, { "epoch": 0.98, "learning_rate": 3.393316195372751e-05, "loss": 0.5598, "step": 24000 }, { "epoch": 0.98, "eval_accuracy": 0.7855421686746988, "eval_loss": 0.5417413711547852, "eval_runtime": 9.0517, "eval_samples_per_second": 275.086, "step": 24000 }, { "epoch": 0.99, "learning_rate": 3.376223814472461e-05, "loss": 0.5695, "step": 24250 }, { "epoch": 1.0, "learning_rate": 3.3591314335721715e-05, "loss": 0.5663, "step": 24500 }, { "epoch": 1.0, "eval_accuracy": 0.7811244979919679, "eval_loss": 0.5498207807540894, "eval_runtime": 9.2845, "eval_samples_per_second": 268.188, "step": 24500 }, { "epoch": 1.01, "learning_rate": 3.342039052671881e-05, "loss": 0.4729, "step": 24750 }, { "epoch": 1.02, "learning_rate": 3.324946671771591e-05, "loss": 0.4511, "step": 25000 }, { "epoch": 1.02, "eval_accuracy": 0.7779116465863454, "eval_loss": 0.5648518204689026, "eval_runtime": 9.0553, "eval_samples_per_second": 274.977, "step": 25000 }, { "epoch": 1.03, "learning_rate": 3.307854290871301e-05, "loss": 0.4426, "step": 25250 }, { "epoch": 1.04, "learning_rate": 3.290761909971011e-05, "loss": 0.4361, "step": 25500 }, { "epoch": 1.04, "eval_accuracy": 0.7959839357429719, "eval_loss": 0.5434826016426086, "eval_runtime": 9.2435, "eval_samples_per_second": 269.378, "step": 25500 }, { "epoch": 1.05, "learning_rate": 3.273669529070722e-05, "loss": 0.4643, "step": 25750 }, { "epoch": 1.06, "learning_rate": 3.2565771481704317e-05, "loss": 0.4686, "step": 26000 }, { "epoch": 1.06, "eval_accuracy": 0.7947791164658634, "eval_loss": 0.5344994068145752, "eval_runtime": 9.0719, "eval_samples_per_second": 274.473, "step": 26000 }, { "epoch": 1.07, "learning_rate": 3.2394847672701416e-05, "loss": 0.4505, "step": 26250 }, { "epoch": 1.08, "learning_rate": 3.222392386369852e-05, "loss": 0.4483, "step": 26500 }, { "epoch": 1.08, "eval_accuracy": 0.7983935742971887, "eval_loss": 0.5487410426139832, "eval_runtime": 9.2215, "eval_samples_per_second": 270.021, "step": 26500 }, { "epoch": 1.09, "learning_rate": 3.205300005469562e-05, "loss": 0.4497, "step": 26750 }, { "epoch": 1.1, "learning_rate": 3.188207624569272e-05, "loss": 0.4608, "step": 27000 }, { "epoch": 1.1, "eval_accuracy": 0.7879518072289157, "eval_loss": 0.5529894232749939, "eval_runtime": 9.098, "eval_samples_per_second": 273.687, "step": 27000 }, { "epoch": 1.11, "learning_rate": 3.1711152436689826e-05, "loss": 0.4549, "step": 27250 }, { "epoch": 1.12, "learning_rate": 3.1540228627686925e-05, "loss": 0.4636, "step": 27500 }, { "epoch": 1.12, "eval_accuracy": 0.7859437751004016, "eval_loss": 0.5583757162094116, "eval_runtime": 9.1565, "eval_samples_per_second": 271.938, "step": 27500 }, { "epoch": 1.13, "learning_rate": 3.1369304818684024e-05, "loss": 0.4537, "step": 27750 }, { "epoch": 1.14, "learning_rate": 3.119838100968113e-05, "loss": 0.4628, "step": 28000 }, { "epoch": 1.14, "eval_accuracy": 0.795582329317269, "eval_loss": 0.5474898815155029, "eval_runtime": 9.1471, "eval_samples_per_second": 272.217, "step": 28000 }, { "epoch": 1.15, "learning_rate": 3.102745720067823e-05, "loss": 0.4648, "step": 28250 }, { "epoch": 1.16, "learning_rate": 3.085653339167533e-05, "loss": 0.476, "step": 28500 }, { "epoch": 1.16, "eval_accuracy": 0.7911646586345381, "eval_loss": 0.5516586899757385, "eval_runtime": 8.9921, "eval_samples_per_second": 276.91, "step": 28500 }, { "epoch": 1.17, "learning_rate": 3.0685609582672434e-05, "loss": 0.4794, "step": 28750 }, { "epoch": 1.18, "learning_rate": 3.0514685773669533e-05, "loss": 0.4537, "step": 29000 }, { "epoch": 1.18, "eval_accuracy": 0.804417670682731, "eval_loss": 0.5304700136184692, "eval_runtime": 9.0138, "eval_samples_per_second": 276.242, "step": 29000 }, { "epoch": 1.19, "learning_rate": 3.0343761964666632e-05, "loss": 0.481, "step": 29250 }, { "epoch": 1.2, "learning_rate": 3.0172838155663735e-05, "loss": 0.4715, "step": 29500 }, { "epoch": 1.2, "eval_accuracy": 0.7939759036144578, "eval_loss": 0.5461502075195312, "eval_runtime": 9.0986, "eval_samples_per_second": 273.667, "step": 29500 }, { "epoch": 1.21, "learning_rate": 3.0001914346660837e-05, "loss": 0.4385, "step": 29750 }, { "epoch": 1.22, "learning_rate": 2.9830990537657933e-05, "loss": 0.4633, "step": 30000 }, { "epoch": 1.22, "eval_accuracy": 0.793574297188755, "eval_loss": 0.5552789568901062, "eval_runtime": 9.3994, "eval_samples_per_second": 264.911, "step": 30000 }, { "epoch": 1.23, "learning_rate": 2.9660066728655032e-05, "loss": 0.4748, "step": 30250 }, { "epoch": 1.24, "learning_rate": 2.9489142919652135e-05, "loss": 0.466, "step": 30500 }, { "epoch": 1.24, "eval_accuracy": 0.7907630522088354, "eval_loss": 0.5751305222511292, "eval_runtime": 9.0876, "eval_samples_per_second": 274.0, "step": 30500 }, { "epoch": 1.25, "learning_rate": 2.9318219110649237e-05, "loss": 0.4705, "step": 30750 }, { "epoch": 1.26, "learning_rate": 2.9147295301646336e-05, "loss": 0.4815, "step": 31000 }, { "epoch": 1.26, "eval_accuracy": 0.8008032128514057, "eval_loss": 0.5165457725524902, "eval_runtime": 9.6998, "eval_samples_per_second": 256.707, "step": 31000 }, { "epoch": 1.27, "learning_rate": 2.897637149264344e-05, "loss": 0.4638, "step": 31250 }, { "epoch": 1.28, "learning_rate": 2.880544768364054e-05, "loss": 0.4544, "step": 31500 }, { "epoch": 1.28, "eval_accuracy": 0.7847389558232932, "eval_loss": 0.5818995833396912, "eval_runtime": 9.0916, "eval_samples_per_second": 273.879, "step": 31500 }, { "epoch": 1.29, "learning_rate": 2.863452387463764e-05, "loss": 0.4639, "step": 31750 }, { "epoch": 1.3, "learning_rate": 2.8463600065634743e-05, "loss": 0.4626, "step": 32000 }, { "epoch": 1.3, "eval_accuracy": 0.8012048192771084, "eval_loss": 0.5314044952392578, "eval_runtime": 9.5865, "eval_samples_per_second": 259.741, "step": 32000 }, { "epoch": 1.31, "learning_rate": 2.8292676256631846e-05, "loss": 0.4502, "step": 32250 }, { "epoch": 1.32, "learning_rate": 2.8121752447628945e-05, "loss": 0.4742, "step": 32500 }, { "epoch": 1.32, "eval_accuracy": 0.8036144578313253, "eval_loss": 0.5086196064949036, "eval_runtime": 9.0534, "eval_samples_per_second": 275.036, "step": 32500 }, { "epoch": 1.33, "learning_rate": 2.7950828638626047e-05, "loss": 0.4555, "step": 32750 }, { "epoch": 1.34, "learning_rate": 2.777990482962315e-05, "loss": 0.4643, "step": 33000 }, { "epoch": 1.34, "eval_accuracy": 0.8012048192771084, "eval_loss": 0.5454714894294739, "eval_runtime": 9.453, "eval_samples_per_second": 263.409, "step": 33000 }, { "epoch": 1.35, "learning_rate": 2.760898102062025e-05, "loss": 0.4597, "step": 33250 }, { "epoch": 1.36, "learning_rate": 2.743805721161735e-05, "loss": 0.4727, "step": 33500 }, { "epoch": 1.36, "eval_accuracy": 0.7943775100401607, "eval_loss": 0.5575410723686218, "eval_runtime": 8.9953, "eval_samples_per_second": 276.812, "step": 33500 }, { "epoch": 1.38, "learning_rate": 2.7267133402614454e-05, "loss": 0.4612, "step": 33750 }, { "epoch": 1.39, "learning_rate": 2.7096209593611553e-05, "loss": 0.4646, "step": 34000 }, { "epoch": 1.39, "eval_accuracy": 0.7987951807228916, "eval_loss": 0.5220562219619751, "eval_runtime": 9.4179, "eval_samples_per_second": 264.391, "step": 34000 }, { "epoch": 1.4, "learning_rate": 2.6925285784608655e-05, "loss": 0.46, "step": 34250 }, { "epoch": 1.41, "learning_rate": 2.6754361975605758e-05, "loss": 0.4573, "step": 34500 }, { "epoch": 1.41, "eval_accuracy": 0.7939759036144578, "eval_loss": 0.5481248497962952, "eval_runtime": 9.0056, "eval_samples_per_second": 276.493, "step": 34500 }, { "epoch": 1.42, "learning_rate": 2.6583438166602857e-05, "loss": 0.4633, "step": 34750 }, { "epoch": 1.43, "learning_rate": 2.641251435759996e-05, "loss": 0.4532, "step": 35000 }, { "epoch": 1.43, "eval_accuracy": 0.793574297188755, "eval_loss": 0.5569635629653931, "eval_runtime": 9.3588, "eval_samples_per_second": 266.061, "step": 35000 }, { "epoch": 1.44, "learning_rate": 2.6241590548597055e-05, "loss": 0.4431, "step": 35250 }, { "epoch": 1.45, "learning_rate": 2.6070666739594158e-05, "loss": 0.4637, "step": 35500 }, { "epoch": 1.45, "eval_accuracy": 0.7971887550200804, "eval_loss": 0.5141083002090454, "eval_runtime": 9.013, "eval_samples_per_second": 276.266, "step": 35500 }, { "epoch": 1.46, "learning_rate": 2.5899742930591257e-05, "loss": 0.4337, "step": 35750 }, { "epoch": 1.47, "learning_rate": 2.572881912158836e-05, "loss": 0.4572, "step": 36000 }, { "epoch": 1.47, "eval_accuracy": 0.7887550200803213, "eval_loss": 0.5640882253646851, "eval_runtime": 9.316, "eval_samples_per_second": 267.283, "step": 36000 }, { "epoch": 1.48, "learning_rate": 2.5557895312585462e-05, "loss": 0.4485, "step": 36250 }, { "epoch": 1.49, "learning_rate": 2.538697150358256e-05, "loss": 0.4613, "step": 36500 }, { "epoch": 1.49, "eval_accuracy": 0.8004016064257028, "eval_loss": 0.5442628860473633, "eval_runtime": 9.0407, "eval_samples_per_second": 275.422, "step": 36500 }, { "epoch": 1.5, "learning_rate": 2.5216047694579664e-05, "loss": 0.4547, "step": 36750 }, { "epoch": 1.51, "learning_rate": 2.5045123885576766e-05, "loss": 0.4614, "step": 37000 }, { "epoch": 1.51, "eval_accuracy": 0.7987951807228916, "eval_loss": 0.5075089931488037, "eval_runtime": 9.22, "eval_samples_per_second": 270.065, "step": 37000 }, { "epoch": 1.52, "learning_rate": 2.4874200076573865e-05, "loss": 0.4545, "step": 37250 }, { "epoch": 1.53, "learning_rate": 2.4703276267570968e-05, "loss": 0.4724, "step": 37500 }, { "epoch": 1.53, "eval_accuracy": 0.8092369477911646, "eval_loss": 0.5035138726234436, "eval_runtime": 9.0797, "eval_samples_per_second": 274.237, "step": 37500 }, { "epoch": 1.54, "learning_rate": 2.453235245856807e-05, "loss": 0.4451, "step": 37750 }, { "epoch": 1.55, "learning_rate": 2.436142864956517e-05, "loss": 0.4396, "step": 38000 }, { "epoch": 1.55, "eval_accuracy": 0.7971887550200804, "eval_loss": 0.5381002426147461, "eval_runtime": 9.1884, "eval_samples_per_second": 270.995, "step": 38000 }, { "epoch": 1.56, "learning_rate": 2.4190504840562272e-05, "loss": 0.459, "step": 38250 }, { "epoch": 1.57, "learning_rate": 2.4019581031559374e-05, "loss": 0.4561, "step": 38500 }, { "epoch": 1.57, "eval_accuracy": 0.8100401606425702, "eval_loss": 0.4913768470287323, "eval_runtime": 9.0787, "eval_samples_per_second": 274.268, "step": 38500 }, { "epoch": 1.58, "learning_rate": 2.3848657222556474e-05, "loss": 0.4503, "step": 38750 }, { "epoch": 1.59, "learning_rate": 2.3677733413553576e-05, "loss": 0.4643, "step": 39000 }, { "epoch": 1.59, "eval_accuracy": 0.8092369477911646, "eval_loss": 0.5171190500259399, "eval_runtime": 8.9997, "eval_samples_per_second": 276.677, "step": 39000 }, { "epoch": 1.6, "learning_rate": 2.3506809604550675e-05, "loss": 0.4624, "step": 39250 }, { "epoch": 1.61, "learning_rate": 2.3335885795547778e-05, "loss": 0.443, "step": 39500 }, { "epoch": 1.61, "eval_accuracy": 0.7951807228915663, "eval_loss": 0.5365191102027893, "eval_runtime": 9.0082, "eval_samples_per_second": 276.415, "step": 39500 }, { "epoch": 1.62, "learning_rate": 2.3164961986544877e-05, "loss": 0.4467, "step": 39750 }, { "epoch": 1.63, "learning_rate": 2.299403817754198e-05, "loss": 0.4525, "step": 40000 }, { "epoch": 1.63, "eval_accuracy": 0.8036144578313253, "eval_loss": 0.5161953568458557, "eval_runtime": 9.0761, "eval_samples_per_second": 274.346, "step": 40000 }, { "epoch": 1.64, "learning_rate": 2.2823114368539082e-05, "loss": 0.4591, "step": 40250 }, { "epoch": 1.65, "learning_rate": 2.265219055953618e-05, "loss": 0.4618, "step": 40500 }, { "epoch": 1.65, "eval_accuracy": 0.8080321285140563, "eval_loss": 0.49769964814186096, "eval_runtime": 9.2939, "eval_samples_per_second": 267.916, "step": 40500 }, { "epoch": 1.66, "learning_rate": 2.2481266750533284e-05, "loss": 0.4434, "step": 40750 }, { "epoch": 1.67, "learning_rate": 2.2310342941530386e-05, "loss": 0.4561, "step": 41000 }, { "epoch": 1.67, "eval_accuracy": 0.8092369477911646, "eval_loss": 0.4977361559867859, "eval_runtime": 8.9616, "eval_samples_per_second": 277.852, "step": 41000 }, { "epoch": 1.68, "learning_rate": 2.2139419132527485e-05, "loss": 0.4483, "step": 41250 }, { "epoch": 1.69, "learning_rate": 2.1968495323524588e-05, "loss": 0.4512, "step": 41500 }, { "epoch": 1.69, "eval_accuracy": 0.802008032128514, "eval_loss": 0.4957820475101471, "eval_runtime": 9.0164, "eval_samples_per_second": 276.164, "step": 41500 }, { "epoch": 1.7, "learning_rate": 2.179757151452169e-05, "loss": 0.4473, "step": 41750 }, { "epoch": 1.71, "learning_rate": 2.162664770551879e-05, "loss": 0.4564, "step": 42000 }, { "epoch": 1.71, "eval_accuracy": 0.7987951807228916, "eval_loss": 0.5506803393363953, "eval_runtime": 9.1173, "eval_samples_per_second": 273.106, "step": 42000 }, { "epoch": 1.72, "learning_rate": 2.145572389651589e-05, "loss": 0.4491, "step": 42250 }, { "epoch": 1.73, "learning_rate": 2.128480008751299e-05, "loss": 0.4331, "step": 42500 }, { "epoch": 1.73, "eval_accuracy": 0.8140562248995984, "eval_loss": 0.5114361047744751, "eval_runtime": 9.554, "eval_samples_per_second": 260.625, "step": 42500 }, { "epoch": 1.74, "learning_rate": 2.1113876278510094e-05, "loss": 0.4631, "step": 42750 }, { "epoch": 1.75, "learning_rate": 2.0942952469507193e-05, "loss": 0.4598, "step": 43000 }, { "epoch": 1.75, "eval_accuracy": 0.8032128514056225, "eval_loss": 0.5167751312255859, "eval_runtime": 9.065, "eval_samples_per_second": 274.682, "step": 43000 }, { "epoch": 1.76, "learning_rate": 2.0772028660504295e-05, "loss": 0.433, "step": 43250 }, { "epoch": 1.77, "learning_rate": 2.0601104851501398e-05, "loss": 0.4711, "step": 43500 }, { "epoch": 1.77, "eval_accuracy": 0.8100401606425702, "eval_loss": 0.49471431970596313, "eval_runtime": 9.4303, "eval_samples_per_second": 264.043, "step": 43500 }, { "epoch": 1.78, "learning_rate": 2.0430181042498497e-05, "loss": 0.4638, "step": 43750 }, { "epoch": 1.79, "learning_rate": 2.02592572334956e-05, "loss": 0.4133, "step": 44000 }, { "epoch": 1.79, "eval_accuracy": 0.8144578313253013, "eval_loss": 0.5469810962677002, "eval_runtime": 9.0719, "eval_samples_per_second": 274.474, "step": 44000 }, { "epoch": 1.8, "learning_rate": 2.0088333424492702e-05, "loss": 0.4558, "step": 44250 }, { "epoch": 1.81, "learning_rate": 1.99174096154898e-05, "loss": 0.4442, "step": 44500 }, { "epoch": 1.81, "eval_accuracy": 0.8024096385542169, "eval_loss": 0.5169267654418945, "eval_runtime": 9.3219, "eval_samples_per_second": 267.114, "step": 44500 }, { "epoch": 1.82, "learning_rate": 1.97464858064869e-05, "loss": 0.4412, "step": 44750 }, { "epoch": 1.83, "learning_rate": 1.9575561997484003e-05, "loss": 0.4425, "step": 45000 }, { "epoch": 1.83, "eval_accuracy": 0.8076305220883534, "eval_loss": 0.5110898613929749, "eval_runtime": 9.0459, "eval_samples_per_second": 275.262, "step": 45000 }, { "epoch": 1.84, "learning_rate": 1.9404638188481102e-05, "loss": 0.4443, "step": 45250 }, { "epoch": 1.85, "learning_rate": 1.9233714379478204e-05, "loss": 0.4522, "step": 45500 }, { "epoch": 1.85, "eval_accuracy": 0.8032128514056225, "eval_loss": 0.527312159538269, "eval_runtime": 8.9896, "eval_samples_per_second": 276.986, "step": 45500 }, { "epoch": 1.86, "learning_rate": 1.9062790570475307e-05, "loss": 0.45, "step": 45750 }, { "epoch": 1.87, "learning_rate": 1.8891866761472406e-05, "loss": 0.4451, "step": 46000 }, { "epoch": 1.87, "eval_accuracy": 0.8132530120481928, "eval_loss": 0.4912641644477844, "eval_runtime": 9.0608, "eval_samples_per_second": 274.811, "step": 46000 }, { "epoch": 1.88, "learning_rate": 1.872094295246951e-05, "loss": 0.4321, "step": 46250 }, { "epoch": 1.89, "learning_rate": 1.855001914346661e-05, "loss": 0.463, "step": 46500 }, { "epoch": 1.89, "eval_accuracy": 0.8040160642570281, "eval_loss": 0.5067523717880249, "eval_runtime": 8.9793, "eval_samples_per_second": 277.305, "step": 46500 }, { "epoch": 1.9, "learning_rate": 1.837909533446371e-05, "loss": 0.44, "step": 46750 }, { "epoch": 1.91, "learning_rate": 1.8208171525460813e-05, "loss": 0.4173, "step": 47000 }, { "epoch": 1.91, "eval_accuracy": 0.8008032128514057, "eval_loss": 0.5309551954269409, "eval_runtime": 9.0236, "eval_samples_per_second": 275.944, "step": 47000 }, { "epoch": 1.93, "learning_rate": 1.803724771645791e-05, "loss": 0.4377, "step": 47250 }, { "epoch": 1.94, "learning_rate": 1.7866323907455014e-05, "loss": 0.4336, "step": 47500 }, { "epoch": 1.94, "eval_accuracy": 0.8036144578313253, "eval_loss": 0.5289146900177002, "eval_runtime": 9.1489, "eval_samples_per_second": 272.164, "step": 47500 }, { "epoch": 1.95, "learning_rate": 1.7695400098452113e-05, "loss": 0.4424, "step": 47750 }, { "epoch": 1.96, "learning_rate": 1.7524476289449216e-05, "loss": 0.4266, "step": 48000 }, { "epoch": 1.96, "eval_accuracy": 0.8048192771084337, "eval_loss": 0.516535758972168, "eval_runtime": 9.1038, "eval_samples_per_second": 273.513, "step": 48000 }, { "epoch": 1.97, "learning_rate": 1.735355248044632e-05, "loss": 0.4276, "step": 48250 }, { "epoch": 1.98, "learning_rate": 1.7182628671443417e-05, "loss": 0.4336, "step": 48500 }, { "epoch": 1.98, "eval_accuracy": 0.8104417670682731, "eval_loss": 0.5314404368400574, "eval_runtime": 9.1174, "eval_samples_per_second": 273.103, "step": 48500 }, { "epoch": 1.99, "learning_rate": 1.701170486244052e-05, "loss": 0.4286, "step": 48750 }, { "epoch": 2.0, "learning_rate": 1.6840781053437622e-05, "loss": 0.4342, "step": 49000 }, { "epoch": 2.0, "eval_accuracy": 0.814859437751004, "eval_loss": 0.4977148473262787, "eval_runtime": 9.12, "eval_samples_per_second": 273.026, "step": 49000 }, { "epoch": 2.01, "learning_rate": 1.666985724443472e-05, "loss": 0.3449, "step": 49250 }, { "epoch": 2.02, "learning_rate": 1.6498933435431824e-05, "loss": 0.3156, "step": 49500 }, { "epoch": 2.02, "eval_accuracy": 0.8128514056224899, "eval_loss": 0.5999274849891663, "eval_runtime": 9.1186, "eval_samples_per_second": 273.069, "step": 49500 }, { "epoch": 2.03, "learning_rate": 1.6328009626428927e-05, "loss": 0.2989, "step": 49750 }, { "epoch": 2.04, "learning_rate": 1.6157085817426022e-05, "loss": 0.3013, "step": 50000 }, { "epoch": 2.04, "eval_accuracy": 0.8028112449799196, "eval_loss": 0.6256367564201355, "eval_runtime": 9.2774, "eval_samples_per_second": 268.395, "step": 50000 }, { "epoch": 2.05, "learning_rate": 1.5986162008423125e-05, "loss": 0.2947, "step": 50250 }, { "epoch": 2.06, "learning_rate": 1.5815238199420227e-05, "loss": 0.2849, "step": 50500 }, { "epoch": 2.06, "eval_accuracy": 0.8116465863453816, "eval_loss": 0.6139395236968994, "eval_runtime": 9.009, "eval_samples_per_second": 276.389, "step": 50500 }, { "epoch": 2.07, "learning_rate": 1.5644314390417327e-05, "loss": 0.2929, "step": 50750 }, { "epoch": 2.08, "learning_rate": 1.547339058141443e-05, "loss": 0.2927, "step": 51000 }, { "epoch": 2.08, "eval_accuracy": 0.808433734939759, "eval_loss": 0.5880476236343384, "eval_runtime": 8.9456, "eval_samples_per_second": 278.35, "step": 51000 }, { "epoch": 2.09, "learning_rate": 1.530246677241153e-05, "loss": 0.2725, "step": 51250 }, { "epoch": 2.1, "learning_rate": 1.5131542963408632e-05, "loss": 0.2944, "step": 51500 }, { "epoch": 2.1, "eval_accuracy": 0.8064257028112449, "eval_loss": 0.6175798177719116, "eval_runtime": 9.0878, "eval_samples_per_second": 273.995, "step": 51500 }, { "epoch": 2.11, "learning_rate": 1.4960619154405733e-05, "loss": 0.2812, "step": 51750 }, { "epoch": 2.12, "learning_rate": 1.4789695345402834e-05, "loss": 0.2869, "step": 52000 }, { "epoch": 2.12, "eval_accuracy": 0.8100401606425702, "eval_loss": 0.5959600806236267, "eval_runtime": 9.5023, "eval_samples_per_second": 262.042, "step": 52000 }, { "epoch": 2.13, "learning_rate": 1.4618771536399937e-05, "loss": 0.2726, "step": 52250 }, { "epoch": 2.14, "learning_rate": 1.4447847727397037e-05, "loss": 0.3034, "step": 52500 }, { "epoch": 2.14, "eval_accuracy": 0.7907630522088354, "eval_loss": 0.6271839141845703, "eval_runtime": 9.0834, "eval_samples_per_second": 274.126, "step": 52500 }, { "epoch": 2.15, "learning_rate": 1.4276923918394136e-05, "loss": 0.3019, "step": 52750 }, { "epoch": 2.16, "learning_rate": 1.4106000109391237e-05, "loss": 0.279, "step": 53000 }, { "epoch": 2.16, "eval_accuracy": 0.8096385542168675, "eval_loss": 0.6030941009521484, "eval_runtime": 9.4494, "eval_samples_per_second": 263.51, "step": 53000 }, { "epoch": 2.17, "learning_rate": 1.3935076300388338e-05, "loss": 0.3079, "step": 53250 }, { "epoch": 2.18, "learning_rate": 1.376415249138544e-05, "loss": 0.2896, "step": 53500 }, { "epoch": 2.18, "eval_accuracy": 0.8068273092369478, "eval_loss": 0.6132158637046814, "eval_runtime": 9.0141, "eval_samples_per_second": 276.233, "step": 53500 }, { "epoch": 2.19, "learning_rate": 1.3593228682382541e-05, "loss": 0.2823, "step": 53750 }, { "epoch": 2.2, "learning_rate": 1.3422304873379642e-05, "loss": 0.2952, "step": 54000 }, { "epoch": 2.2, "eval_accuracy": 0.8064257028112449, "eval_loss": 0.6195886135101318, "eval_runtime": 8.9618, "eval_samples_per_second": 277.847, "step": 54000 }, { "epoch": 2.21, "learning_rate": 1.3251381064376745e-05, "loss": 0.3049, "step": 54250 }, { "epoch": 2.22, "learning_rate": 1.3080457255373846e-05, "loss": 0.2921, "step": 54500 }, { "epoch": 2.22, "eval_accuracy": 0.8076305220883534, "eval_loss": 0.6113378405570984, "eval_runtime": 9.0032, "eval_samples_per_second": 276.569, "step": 54500 }, { "epoch": 2.23, "learning_rate": 1.2909533446370946e-05, "loss": 0.2765, "step": 54750 }, { "epoch": 2.24, "learning_rate": 1.2738609637368049e-05, "loss": 0.2958, "step": 55000 }, { "epoch": 2.24, "eval_accuracy": 0.8064257028112449, "eval_loss": 0.6207754611968994, "eval_runtime": 9.0338, "eval_samples_per_second": 275.632, "step": 55000 }, { "epoch": 2.25, "learning_rate": 1.2567685828365146e-05, "loss": 0.2888, "step": 55250 }, { "epoch": 2.26, "learning_rate": 1.239676201936225e-05, "loss": 0.2996, "step": 55500 }, { "epoch": 2.26, "eval_accuracy": 0.8128514056224899, "eval_loss": 0.5894312262535095, "eval_runtime": 8.9904, "eval_samples_per_second": 276.961, "step": 55500 }, { "epoch": 2.27, "learning_rate": 1.2225838210359351e-05, "loss": 0.2994, "step": 55750 }, { "epoch": 2.28, "learning_rate": 1.205491440135645e-05, "loss": 0.288, "step": 56000 }, { "epoch": 2.28, "eval_accuracy": 0.8052208835341366, "eval_loss": 0.6171417832374573, "eval_runtime": 9.2622, "eval_samples_per_second": 268.834, "step": 56000 }, { "epoch": 2.29, "learning_rate": 1.1883990592353553e-05, "loss": 0.2902, "step": 56250 }, { "epoch": 2.3, "learning_rate": 1.1713066783350654e-05, "loss": 0.3005, "step": 56500 }, { "epoch": 2.3, "eval_accuracy": 0.8112449799196787, "eval_loss": 0.5888203978538513, "eval_runtime": 8.9926, "eval_samples_per_second": 276.894, "step": 56500 }, { "epoch": 2.31, "learning_rate": 1.1542142974347755e-05, "loss": 0.3011, "step": 56750 }, { "epoch": 2.32, "learning_rate": 1.1371219165344857e-05, "loss": 0.3082, "step": 57000 }, { "epoch": 2.32, "eval_accuracy": 0.8076305220883534, "eval_loss": 0.6049151420593262, "eval_runtime": 9.2353, "eval_samples_per_second": 269.619, "step": 57000 }, { "epoch": 2.33, "learning_rate": 1.1200295356341956e-05, "loss": 0.273, "step": 57250 }, { "epoch": 2.34, "learning_rate": 1.1029371547339059e-05, "loss": 0.2773, "step": 57500 }, { "epoch": 2.34, "eval_accuracy": 0.810843373493976, "eval_loss": 0.6248819231987, "eval_runtime": 9.0359, "eval_samples_per_second": 275.569, "step": 57500 }, { "epoch": 2.35, "learning_rate": 1.085844773833616e-05, "loss": 0.3164, "step": 57750 }, { "epoch": 2.36, "learning_rate": 1.068752392933326e-05, "loss": 0.2824, "step": 58000 }, { "epoch": 2.36, "eval_accuracy": 0.8136546184738955, "eval_loss": 0.5784918069839478, "eval_runtime": 9.2382, "eval_samples_per_second": 269.533, "step": 58000 }, { "epoch": 2.37, "learning_rate": 1.0516600120330363e-05, "loss": 0.2992, "step": 58250 }, { "epoch": 2.38, "learning_rate": 1.0345676311327464e-05, "loss": 0.293, "step": 58500 }, { "epoch": 2.38, "eval_accuracy": 0.8072289156626506, "eval_loss": 0.611821711063385, "eval_runtime": 9.0825, "eval_samples_per_second": 274.153, "step": 58500 }, { "epoch": 2.39, "learning_rate": 1.0174752502324563e-05, "loss": 0.2742, "step": 58750 }, { "epoch": 2.4, "learning_rate": 1.0003828693321665e-05, "loss": 0.2927, "step": 59000 }, { "epoch": 2.4, "eval_accuracy": 0.804417670682731, "eval_loss": 0.6135737895965576, "eval_runtime": 9.1843, "eval_samples_per_second": 271.114, "step": 59000 }, { "epoch": 2.41, "learning_rate": 9.832904884318766e-06, "loss": 0.282, "step": 59250 }, { "epoch": 2.42, "learning_rate": 9.661981075315867e-06, "loss": 0.3021, "step": 59500 }, { "epoch": 2.42, "eval_accuracy": 0.8088353413654619, "eval_loss": 0.5996263027191162, "eval_runtime": 9.1543, "eval_samples_per_second": 272.003, "step": 59500 }, { "epoch": 2.43, "learning_rate": 9.49105726631297e-06, "loss": 0.2931, "step": 59750 }, { "epoch": 2.44, "learning_rate": 9.320133457310069e-06, "loss": 0.2745, "step": 60000 }, { "epoch": 2.44, "eval_accuracy": 0.810843373493976, "eval_loss": 0.5868379473686218, "eval_runtime": 9.1158, "eval_samples_per_second": 273.152, "step": 60000 }, { "epoch": 2.45, "learning_rate": 9.149209648307171e-06, "loss": 0.2897, "step": 60250 }, { "epoch": 2.46, "learning_rate": 8.978285839304272e-06, "loss": 0.2919, "step": 60500 }, { "epoch": 2.46, "eval_accuracy": 0.8096385542168675, "eval_loss": 0.5863232016563416, "eval_runtime": 9.7927, "eval_samples_per_second": 254.27, "step": 60500 }, { "epoch": 2.48, "learning_rate": 8.807362030301373e-06, "loss": 0.2903, "step": 60750 }, { "epoch": 2.49, "learning_rate": 8.636438221298475e-06, "loss": 0.2662, "step": 61000 }, { "epoch": 2.49, "eval_accuracy": 0.804417670682731, "eval_loss": 0.636022686958313, "eval_runtime": 10.4967, "eval_samples_per_second": 237.217, "step": 61000 }, { "epoch": 2.5, "learning_rate": 8.465514412295575e-06, "loss": 0.2908, "step": 61250 }, { "epoch": 2.51, "learning_rate": 8.294590603292677e-06, "loss": 0.2977, "step": 61500 }, { "epoch": 2.51, "eval_accuracy": 0.8104417670682731, "eval_loss": 0.596953272819519, "eval_runtime": 9.4896, "eval_samples_per_second": 262.393, "step": 61500 }, { "epoch": 2.52, "learning_rate": 8.123666794289778e-06, "loss": 0.2723, "step": 61750 }, { "epoch": 2.53, "learning_rate": 7.952742985286879e-06, "loss": 0.2785, "step": 62000 }, { "epoch": 2.53, "eval_accuracy": 0.8164658634538152, "eval_loss": 0.5756428241729736, "eval_runtime": 9.105, "eval_samples_per_second": 273.476, "step": 62000 } ], "max_steps": 73632, "num_train_epochs": 3, "total_flos": 55970352570961800, "trial_name": null, "trial_params": null }