{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "global_step": 87, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 3e-05, "loss": 2.6543, "step": 1 }, { "epoch": 0.03, "eval_accuracy": 0.05133992343294669, "eval_loss": 2.611328125, "eval_runtime": 3.8997, "eval_samples_per_second": 22.053, "eval_steps_per_second": 1.539, "step": 1 }, { "epoch": 0.07, "learning_rate": 3e-05, "loss": 2.6077, "step": 2 }, { "epoch": 0.07, "eval_accuracy": 0.05133992343294669, "eval_loss": 2.611328125, "eval_runtime": 3.9386, "eval_samples_per_second": 21.835, "eval_steps_per_second": 1.523, "step": 2 }, { "epoch": 0.1, "learning_rate": 2.9990221430845156e-05, "loss": 2.5964, "step": 3 }, { "epoch": 0.1, "eval_accuracy": 0.05192560425118565, "eval_loss": 2.560546875, "eval_runtime": 3.8896, "eval_samples_per_second": 22.11, "eval_steps_per_second": 1.543, "step": 3 }, { "epoch": 0.14, "learning_rate": 2.996089847276925e-05, "loss": 2.7302, "step": 4 }, { "epoch": 0.14, "eval_accuracy": 0.052582709559453746, "eval_loss": 2.5234375, "eval_runtime": 4.1976, "eval_samples_per_second": 20.488, "eval_steps_per_second": 1.429, "step": 4 }, { "epoch": 0.17, "learning_rate": 2.9912069357315394e-05, "loss": 2.7004, "step": 5 }, { "epoch": 0.17, "eval_accuracy": 0.05293983200959945, "eval_loss": 2.5078125, "eval_runtime": 4.2482, "eval_samples_per_second": 20.244, "eval_steps_per_second": 1.412, "step": 5 }, { "epoch": 0.21, "learning_rate": 2.9843797748334563e-05, "loss": 2.5681, "step": 6 }, { "epoch": 0.21, "eval_accuracy": 0.05318267527569853, "eval_loss": 2.494140625, "eval_runtime": 4.2937, "eval_samples_per_second": 20.029, "eval_steps_per_second": 1.397, "step": 6 }, { "epoch": 0.24, "learning_rate": 2.975617265898004e-05, "loss": 2.6404, "step": 7 }, { "epoch": 0.24, "eval_accuracy": 0.05342551854179761, "eval_loss": 2.48828125, "eval_runtime": 4.3127, "eval_samples_per_second": 19.941, "eval_steps_per_second": 1.391, "step": 7 }, { "epoch": 0.28, "learning_rate": 2.96493083356513e-05, "loss": 2.5325, "step": 8 }, { "epoch": 0.28, "eval_accuracy": 0.053554082623850065, "eval_loss": 2.48046875, "eval_runtime": 4.2885, "eval_samples_per_second": 20.053, "eval_steps_per_second": 1.399, "step": 8 }, { "epoch": 0.31, "learning_rate": 2.952334410903845e-05, "loss": 2.7205, "step": 9 }, { "epoch": 0.31, "eval_accuracy": 0.053554082623850065, "eval_loss": 2.474609375, "eval_runtime": 3.8974, "eval_samples_per_second": 22.066, "eval_steps_per_second": 1.539, "step": 9 }, { "epoch": 0.34, "learning_rate": 2.937844421246162e-05, "loss": 2.5149, "step": 10 }, { "epoch": 0.34, "eval_accuracy": 0.05329695445974516, "eval_loss": 2.46484375, "eval_runtime": 4.2968, "eval_samples_per_second": 20.015, "eval_steps_per_second": 1.396, "step": 10 }, { "epoch": 0.38, "learning_rate": 2.9214797567742036e-05, "loss": 2.5017, "step": 11 }, { "epoch": 0.38, "eval_accuracy": 0.05345408833780927, "eval_loss": 2.451171875, "eval_runtime": 4.2885, "eval_samples_per_second": 20.054, "eval_steps_per_second": 1.399, "step": 11 }, { "epoch": 0.41, "learning_rate": 2.9032617538884018e-05, "loss": 2.7026, "step": 12 }, { "epoch": 0.41, "eval_accuracy": 0.053896920175989946, "eval_loss": 2.439453125, "eval_runtime": 4.2979, "eval_samples_per_second": 20.01, "eval_steps_per_second": 1.396, "step": 12 }, { "epoch": 0.45, "learning_rate": 2.8832141653888998e-05, "loss": 2.5259, "step": 13 }, { "epoch": 0.45, "eval_accuracy": 0.05425404262613565, "eval_loss": 2.431640625, "eval_runtime": 4.2986, "eval_samples_per_second": 20.007, "eval_steps_per_second": 1.396, "step": 13 }, { "epoch": 0.48, "learning_rate": 2.861363129506436e-05, "loss": 2.563, "step": 14 }, { "epoch": 0.48, "eval_accuracy": 0.05455402548425804, "eval_loss": 2.421875, "eval_runtime": 4.2879, "eval_samples_per_second": 20.056, "eval_steps_per_second": 1.399, "step": 14 }, { "epoch": 0.52, "learning_rate": 2.8377371358230733e-05, "loss": 2.5679, "step": 15 }, { "epoch": 0.52, "eval_accuracy": 0.055011142220444544, "eval_loss": 2.4140625, "eval_runtime": 3.9042, "eval_samples_per_second": 22.027, "eval_steps_per_second": 1.537, "step": 15 }, { "epoch": 0.55, "learning_rate": 2.8123669881272247e-05, "loss": 2.3701, "step": 16 }, { "epoch": 0.55, "eval_accuracy": 0.05512542140449117, "eval_loss": 2.408203125, "eval_runtime": 3.9081, "eval_samples_per_second": 22.006, "eval_steps_per_second": 1.535, "step": 16 }, { "epoch": 0.59, "learning_rate": 2.8123669881272247e-05, "loss": 2.4739, "step": 17 }, { "epoch": 0.59, "eval_accuracy": 0.05512542140449117, "eval_loss": 2.408203125, "eval_runtime": 4.3256, "eval_samples_per_second": 19.882, "eval_steps_per_second": 1.387, "step": 17 }, { "epoch": 0.62, "learning_rate": 2.7852857642513838e-05, "loss": 2.481, "step": 18 }, { "epoch": 0.62, "eval_accuracy": 0.054839723444374606, "eval_loss": 2.40234375, "eval_runtime": 3.8988, "eval_samples_per_second": 22.058, "eval_steps_per_second": 1.539, "step": 18 }, { "epoch": 0.66, "learning_rate": 2.7565287729449473e-05, "loss": 2.5795, "step": 19 }, { "epoch": 0.66, "eval_accuracy": 0.054896863036397923, "eval_loss": 2.39453125, "eval_runtime": 4.2984, "eval_samples_per_second": 20.007, "eval_steps_per_second": 1.396, "step": 19 }, { "epoch": 0.69, "learning_rate": 2.7261335078383377e-05, "loss": 2.4902, "step": 20 }, { "epoch": 0.69, "eval_accuracy": 0.05491114793440375, "eval_loss": 2.38671875, "eval_runtime": 4.3044, "eval_samples_per_second": 19.98, "eval_steps_per_second": 1.394, "step": 20 }, { "epoch": 0.72, "learning_rate": 2.6941395985584656e-05, "loss": 2.4509, "step": 21 }, { "epoch": 0.72, "eval_accuracy": 0.055139706302497, "eval_loss": 2.380859375, "eval_runtime": 4.2949, "eval_samples_per_second": 20.024, "eval_steps_per_second": 1.397, "step": 21 }, { "epoch": 0.76, "learning_rate": 2.6605887590592547e-05, "loss": 2.6052, "step": 22 }, { "epoch": 0.76, "eval_accuracy": 0.055325409976572766, "eval_loss": 2.373046875, "eval_runtime": 4.2966, "eval_samples_per_second": 20.016, "eval_steps_per_second": 1.396, "step": 22 }, { "epoch": 0.79, "learning_rate": 2.6255247332346036e-05, "loss": 2.3323, "step": 23 }, { "epoch": 0.79, "eval_accuracy": 0.05552539854865436, "eval_loss": 2.36328125, "eval_runtime": 3.5753, "eval_samples_per_second": 24.054, "eval_steps_per_second": 1.678, "step": 23 }, { "epoch": 0.83, "learning_rate": 2.5889932378846963e-05, "loss": 2.5994, "step": 24 }, { "epoch": 0.83, "eval_accuracy": 0.05563967773270099, "eval_loss": 2.35546875, "eval_runtime": 4.2029, "eval_samples_per_second": 20.462, "eval_steps_per_second": 1.428, "step": 24 }, { "epoch": 0.86, "learning_rate": 2.5510419031100137e-05, "loss": 2.3347, "step": 25 }, { "epoch": 0.86, "eval_accuracy": 0.05558253814067768, "eval_loss": 2.34765625, "eval_runtime": 4.4126, "eval_samples_per_second": 19.49, "eval_steps_per_second": 1.36, "step": 25 }, { "epoch": 0.9, "learning_rate": 2.5117202102107707e-05, "loss": 2.421, "step": 26 }, { "epoch": 0.9, "eval_accuracy": 0.05586823610079424, "eval_loss": 2.33984375, "eval_runtime": 4.3, "eval_samples_per_second": 20.0, "eval_steps_per_second": 1.395, "step": 26 }, { "epoch": 0.93, "learning_rate": 2.4710794271727415e-05, "loss": 2.5337, "step": 27 }, { "epoch": 0.93, "eval_accuracy": 0.05603965487686418, "eval_loss": 2.3359375, "eval_runtime": 3.2828, "eval_samples_per_second": 26.197, "eval_steps_per_second": 1.828, "step": 27 }, { "epoch": 0.97, "learning_rate": 2.4291725418235848e-05, "loss": 2.4102, "step": 28 }, { "epoch": 0.97, "eval_accuracy": 0.0562539283469516, "eval_loss": 2.33203125, "eval_runtime": 4.3038, "eval_samples_per_second": 19.982, "eval_steps_per_second": 1.394, "step": 28 }, { "epoch": 1.0, "learning_rate": 2.3860541927468265e-05, "loss": 2.4309, "step": 29 }, { "epoch": 1.0, "eval_accuracy": 0.05643963202102737, "eval_loss": 2.326171875, "eval_runtime": 4.2875, "eval_samples_per_second": 20.058, "eval_steps_per_second": 1.399, "step": 29 }, { "epoch": 1.03, "learning_rate": 2.341780598043574e-05, "loss": 1.9305, "step": 30 }, { "epoch": 1.03, "eval_accuracy": 0.0563539226329924, "eval_loss": 2.322265625, "eval_runtime": 4.2962, "eval_samples_per_second": 20.017, "eval_steps_per_second": 1.397, "step": 30 }, { "epoch": 1.07, "learning_rate": 2.2964094820348302e-05, "loss": 1.8601, "step": 31 }, { "epoch": 1.07, "eval_accuracy": 0.056696760185132276, "eval_loss": 2.3203125, "eval_runtime": 4.32, "eval_samples_per_second": 19.907, "eval_steps_per_second": 1.389, "step": 31 }, { "epoch": 1.1, "learning_rate": 2.25e-05, "loss": 1.8682, "step": 32 }, { "epoch": 1.1, "eval_accuracy": 0.056425347123021545, "eval_loss": 2.328125, "eval_runtime": 4.3035, "eval_samples_per_second": 19.984, "eval_steps_per_second": 1.394, "step": 32 }, { "epoch": 1.14, "learning_rate": 2.2026126610496852e-05, "loss": 1.8657, "step": 33 }, { "epoch": 1.14, "eval_accuracy": 0.056382492429004054, "eval_loss": 2.353515625, "eval_runtime": 4.2925, "eval_samples_per_second": 20.035, "eval_steps_per_second": 1.398, "step": 33 }, { "epoch": 1.17, "learning_rate": 2.154309249233351e-05, "loss": 2.063, "step": 34 }, { "epoch": 1.17, "eval_accuracy": 0.056696760185132276, "eval_loss": 2.33984375, "eval_runtime": 3.2879, "eval_samples_per_second": 26.157, "eval_steps_per_second": 1.825, "step": 34 }, { "epoch": 1.21, "learning_rate": 2.105152742984713e-05, "loss": 1.6443, "step": 35 }, { "epoch": 1.21, "eval_accuracy": 0.05683960916519056, "eval_loss": 2.32421875, "eval_runtime": 4.3025, "eval_samples_per_second": 19.988, "eval_steps_per_second": 1.395, "step": 35 }, { "epoch": 1.24, "learning_rate": 2.055207233009872e-05, "loss": 1.7592, "step": 36 }, { "epoch": 1.24, "eval_accuracy": 0.056882463859208046, "eval_loss": 2.31640625, "eval_runtime": 4.2776, "eval_samples_per_second": 20.105, "eval_steps_per_second": 1.403, "step": 36 }, { "epoch": 1.28, "learning_rate": 2.0045378387252624e-05, "loss": 1.8981, "step": 37 }, { "epoch": 1.28, "eval_accuracy": 0.05686817896120222, "eval_loss": 2.310546875, "eval_runtime": 3.5855, "eval_samples_per_second": 23.986, "eval_steps_per_second": 1.673, "step": 37 }, { "epoch": 1.31, "learning_rate": 1.953210623354359e-05, "loss": 1.9379, "step": 38 }, { "epoch": 1.31, "eval_accuracy": 0.05728244100337124, "eval_loss": 2.3046875, "eval_runtime": 4.2888, "eval_samples_per_second": 20.052, "eval_steps_per_second": 1.399, "step": 38 }, { "epoch": 1.34, "learning_rate": 1.9012925077938318e-05, "loss": 1.6008, "step": 39 }, { "epoch": 1.34, "eval_accuracy": 0.057368150391406206, "eval_loss": 2.302734375, "eval_runtime": 4.1858, "eval_samples_per_second": 20.546, "eval_steps_per_second": 1.433, "step": 39 }, { "epoch": 1.38, "learning_rate": 1.848851183361466e-05, "loss": 1.595, "step": 40 }, { "epoch": 1.38, "eval_accuracy": 0.057453859779441174, "eval_loss": 2.302734375, "eval_runtime": 4.3045, "eval_samples_per_second": 19.979, "eval_steps_per_second": 1.394, "step": 40 }, { "epoch": 1.41, "learning_rate": 1.7959550235396002e-05, "loss": 1.7096, "step": 41 }, { "epoch": 1.41, "eval_accuracy": 0.05752528426947032, "eval_loss": 2.302734375, "eval_runtime": 4.3092, "eval_samples_per_second": 19.957, "eval_steps_per_second": 1.392, "step": 41 }, { "epoch": 1.45, "learning_rate": 1.7426729948291474e-05, "loss": 1.7245, "step": 42 }, { "epoch": 1.45, "eval_accuracy": 0.0575681389634878, "eval_loss": 2.302734375, "eval_runtime": 3.5821, "eval_samples_per_second": 24.008, "eval_steps_per_second": 1.675, "step": 42 }, { "epoch": 1.48, "learning_rate": 1.689074566830434e-05, "loss": 1.795, "step": 43 }, { "epoch": 1.48, "eval_accuracy": 0.05768241814753443, "eval_loss": 2.30078125, "eval_runtime": 4.2953, "eval_samples_per_second": 20.022, "eval_steps_per_second": 1.397, "step": 43 }, { "epoch": 1.52, "learning_rate": 1.635229621668098e-05, "loss": 1.7241, "step": 44 }, { "epoch": 1.52, "eval_accuracy": 0.057610993657505286, "eval_loss": 2.30078125, "eval_runtime": 4.3019, "eval_samples_per_second": 19.991, "eval_steps_per_second": 1.395, "step": 44 }, { "epoch": 1.55, "learning_rate": 1.5812083628781265e-05, "loss": 1.6356, "step": 45 }, { "epoch": 1.55, "eval_accuracy": 0.057639563453516944, "eval_loss": 2.298828125, "eval_runtime": 4.2961, "eval_samples_per_second": 20.018, "eval_steps_per_second": 1.397, "step": 45 }, { "epoch": 1.59, "learning_rate": 1.5270812238758407e-05, "loss": 1.77, "step": 46 }, { "epoch": 1.59, "eval_accuracy": 0.057553854065481976, "eval_loss": 2.296875, "eval_runtime": 4.3142, "eval_samples_per_second": 19.934, "eval_steps_per_second": 1.391, "step": 46 }, { "epoch": 1.62, "learning_rate": 1.4729187761241592e-05, "loss": 1.6675, "step": 47 }, { "epoch": 1.62, "eval_accuracy": 0.057668133249528596, "eval_loss": 2.29296875, "eval_runtime": 4.2943, "eval_samples_per_second": 20.026, "eval_steps_per_second": 1.397, "step": 47 }, { "epoch": 1.66, "learning_rate": 1.4187916371218739e-05, "loss": 1.6929, "step": 48 }, { "epoch": 1.66, "eval_accuracy": 0.05771098794354608, "eval_loss": 2.291015625, "eval_runtime": 3.8871, "eval_samples_per_second": 22.124, "eval_steps_per_second": 1.544, "step": 48 }, { "epoch": 1.69, "learning_rate": 1.3647703783319022e-05, "loss": 1.6635, "step": 49 }, { "epoch": 1.69, "eval_accuracy": 0.05762527855551111, "eval_loss": 2.291015625, "eval_runtime": 4.3038, "eval_samples_per_second": 19.982, "eval_steps_per_second": 1.394, "step": 49 }, { "epoch": 1.72, "learning_rate": 1.310925433169566e-05, "loss": 1.6093, "step": 50 }, { "epoch": 1.72, "eval_accuracy": 0.05781098222958688, "eval_loss": 2.291015625, "eval_runtime": 3.8811, "eval_samples_per_second": 22.159, "eval_steps_per_second": 1.546, "step": 50 }, { "epoch": 1.76, "learning_rate": 1.2573270051708529e-05, "loss": 1.7362, "step": 51 }, { "epoch": 1.76, "eval_accuracy": 0.05796811610765099, "eval_loss": 2.2890625, "eval_runtime": 4.3003, "eval_samples_per_second": 19.998, "eval_steps_per_second": 1.395, "step": 51 }, { "epoch": 1.79, "learning_rate": 1.2040449764604002e-05, "loss": 1.7015, "step": 52 }, { "epoch": 1.79, "eval_accuracy": 0.05805382549568596, "eval_loss": 2.28515625, "eval_runtime": 4.2877, "eval_samples_per_second": 20.057, "eval_steps_per_second": 1.399, "step": 52 }, { "epoch": 1.83, "learning_rate": 1.1511488166385349e-05, "loss": 1.9515, "step": 53 }, { "epoch": 1.83, "eval_accuracy": 0.05816810467973259, "eval_loss": 2.28125, "eval_runtime": 4.3062, "eval_samples_per_second": 19.971, "eval_steps_per_second": 1.393, "step": 53 }, { "epoch": 1.86, "learning_rate": 1.098707492206169e-05, "loss": 1.6494, "step": 54 }, { "epoch": 1.86, "eval_accuracy": 0.05801097080166848, "eval_loss": 2.27734375, "eval_runtime": 3.8918, "eval_samples_per_second": 22.098, "eval_steps_per_second": 1.542, "step": 54 }, { "epoch": 1.9, "learning_rate": 1.0467893766456408e-05, "loss": 1.7522, "step": 55 }, { "epoch": 1.9, "eval_accuracy": 0.05798240100565682, "eval_loss": 2.2734375, "eval_runtime": 4.1919, "eval_samples_per_second": 20.516, "eval_steps_per_second": 1.431, "step": 55 }, { "epoch": 1.93, "learning_rate": 9.954621612747371e-06, "loss": 1.7369, "step": 56 }, { "epoch": 1.93, "eval_accuracy": 0.05806811039369179, "eval_loss": 2.267578125, "eval_runtime": 4.3092, "eval_samples_per_second": 19.957, "eval_steps_per_second": 1.392, "step": 56 }, { "epoch": 1.97, "learning_rate": 9.447927669901284e-06, "loss": 1.6528, "step": 57 }, { "epoch": 1.97, "eval_accuracy": 0.058125249985715104, "eval_loss": 2.263671875, "eval_runtime": 3.8882, "eval_samples_per_second": 22.118, "eval_steps_per_second": 1.543, "step": 57 }, { "epoch": 2.0, "learning_rate": 8.948472570152874e-06, "loss": 1.51, "step": 58 }, { "epoch": 2.0, "eval_accuracy": 0.05826809896577338, "eval_loss": 2.26171875, "eval_runtime": 4.2958, "eval_samples_per_second": 20.019, "eval_steps_per_second": 1.397, "step": 58 }, { "epoch": 2.03, "learning_rate": 8.456907507666488e-06, "loss": 1.4579, "step": 59 }, { "epoch": 2.03, "eval_accuracy": 0.05845380263984915, "eval_loss": 2.263671875, "eval_runtime": 4.2959, "eval_samples_per_second": 20.019, "eval_steps_per_second": 1.397, "step": 59 }, { "epoch": 2.07, "learning_rate": 7.97387338950315e-06, "loss": 1.2645, "step": 60 }, { "epoch": 2.07, "eval_accuracy": 0.05851094223187246, "eval_loss": 2.26953125, "eval_runtime": 4.2999, "eval_samples_per_second": 20.0, "eval_steps_per_second": 1.395, "step": 60 }, { "epoch": 2.1, "learning_rate": 7.500000000000004e-06, "loss": 1.2424, "step": 61 }, { "epoch": 2.1, "eval_accuracy": 0.05839666304782584, "eval_loss": 2.27734375, "eval_runtime": 4.1932, "eval_samples_per_second": 20.51, "eval_steps_per_second": 1.431, "step": 61 }, { "epoch": 2.14, "learning_rate": 7.035905179651701e-06, "loss": 1.2117, "step": 62 }, { "epoch": 2.14, "eval_accuracy": 0.058425232843837493, "eval_loss": 2.2890625, "eval_runtime": 4.3132, "eval_samples_per_second": 19.939, "eval_steps_per_second": 1.391, "step": 62 }, { "epoch": 2.17, "learning_rate": 6.582194019564266e-06, "loss": 1.4059, "step": 63 }, { "epoch": 2.17, "eval_accuracy": 0.058039540597680135, "eval_loss": 2.30078125, "eval_runtime": 3.5756, "eval_samples_per_second": 24.052, "eval_steps_per_second": 1.678, "step": 63 }, { "epoch": 2.21, "learning_rate": 6.1394580725317366e-06, "loss": 1.328, "step": 64 }, { "epoch": 2.21, "eval_accuracy": 0.05811096508770927, "eval_loss": 2.314453125, "eval_runtime": 4.2869, "eval_samples_per_second": 20.061, "eval_steps_per_second": 1.4, "step": 64 }, { "epoch": 2.24, "learning_rate": 5.708274581764155e-06, "loss": 1.3436, "step": 65 }, { "epoch": 2.24, "eval_accuracy": 0.05795383120964517, "eval_loss": 2.328125, "eval_runtime": 3.8909, "eval_samples_per_second": 22.103, "eval_steps_per_second": 1.542, "step": 65 }, { "epoch": 2.28, "learning_rate": 5.289205728272587e-06, "loss": 1.389, "step": 66 }, { "epoch": 2.28, "eval_accuracy": 0.058039540597680135, "eval_loss": 2.337890625, "eval_runtime": 4.3156, "eval_samples_per_second": 19.928, "eval_steps_per_second": 1.39, "step": 66 }, { "epoch": 2.31, "learning_rate": 4.882797897892293e-06, "loss": 1.2127, "step": 67 }, { "epoch": 2.31, "eval_accuracy": 0.0580252556996743, "eval_loss": 2.33984375, "eval_runtime": 4.3, "eval_samples_per_second": 20.0, "eval_steps_per_second": 1.395, "step": 67 }, { "epoch": 2.34, "learning_rate": 4.4895809688998655e-06, "loss": 1.3645, "step": 68 }, { "epoch": 2.34, "eval_accuracy": 0.058096680189703445, "eval_loss": 2.341796875, "eval_runtime": 4.216, "eval_samples_per_second": 20.398, "eval_steps_per_second": 1.423, "step": 68 }, { "epoch": 2.38, "learning_rate": 4.110067621153041e-06, "loss": 1.3389, "step": 69 }, { "epoch": 2.38, "eval_accuracy": 0.05805382549568596, "eval_loss": 2.337890625, "eval_runtime": 3.9033, "eval_samples_per_second": 22.032, "eval_steps_per_second": 1.537, "step": 69 }, { "epoch": 2.41, "learning_rate": 3.744752667653965e-06, "loss": 1.2549, "step": 70 }, { "epoch": 2.41, "eval_accuracy": 0.05808239529169762, "eval_loss": 2.33203125, "eval_runtime": 3.5918, "eval_samples_per_second": 23.943, "eval_steps_per_second": 1.67, "step": 70 }, { "epoch": 2.45, "learning_rate": 3.394112409407455e-06, "loss": 1.2193, "step": 71 }, { "epoch": 2.45, "eval_accuracy": 0.05816810467973259, "eval_loss": 2.328125, "eval_runtime": 4.2985, "eval_samples_per_second": 20.007, "eval_steps_per_second": 1.396, "step": 71 }, { "epoch": 2.48, "learning_rate": 3.0586040144153436e-06, "loss": 1.3617, "step": 72 }, { "epoch": 2.48, "eval_accuracy": 0.0583252385577967, "eval_loss": 2.322265625, "eval_runtime": 3.2953, "eval_samples_per_second": 26.097, "eval_steps_per_second": 1.821, "step": 72 }, { "epoch": 2.52, "learning_rate": 2.7386649216166233e-06, "loss": 1.2336, "step": 73 }, { "epoch": 2.52, "eval_accuracy": 0.058253814067767556, "eval_loss": 2.318359375, "eval_runtime": 4.1807, "eval_samples_per_second": 20.571, "eval_steps_per_second": 1.435, "step": 73 }, { "epoch": 2.55, "learning_rate": 2.4347122705505303e-06, "loss": 1.179, "step": 74 }, { "epoch": 2.55, "eval_accuracy": 0.05829666876178504, "eval_loss": 2.314453125, "eval_runtime": 3.5945, "eval_samples_per_second": 23.926, "eval_steps_per_second": 1.669, "step": 74 }, { "epoch": 2.59, "learning_rate": 2.1471423574861643e-06, "loss": 1.2468, "step": 75 }, { "epoch": 2.59, "eval_accuracy": 0.058282383863779215, "eval_loss": 2.3125, "eval_runtime": 3.5979, "eval_samples_per_second": 23.903, "eval_steps_per_second": 1.668, "step": 75 }, { "epoch": 2.62, "learning_rate": 1.8763301187277554e-06, "loss": 1.3325, "step": 76 }, { "epoch": 2.62, "eval_accuracy": 0.05829666876178504, "eval_loss": 2.30859375, "eval_runtime": 3.9093, "eval_samples_per_second": 21.999, "eval_steps_per_second": 1.535, "step": 76 }, { "epoch": 2.66, "learning_rate": 1.6226286417692666e-06, "loss": 1.1471, "step": 77 }, { "epoch": 2.66, "eval_accuracy": 0.058339523455802525, "eval_loss": 2.306640625, "eval_runtime": 4.2929, "eval_samples_per_second": 20.033, "eval_steps_per_second": 1.398, "step": 77 }, { "epoch": 2.69, "learning_rate": 1.3863687049356465e-06, "loss": 1.3123, "step": 78 }, { "epoch": 2.69, "eval_accuracy": 0.0583252385577967, "eval_loss": 2.306640625, "eval_runtime": 4.3098, "eval_samples_per_second": 19.955, "eval_steps_per_second": 1.392, "step": 78 }, { "epoch": 2.72, "learning_rate": 1.1678583461110026e-06, "loss": 1.3285, "step": 79 }, { "epoch": 2.72, "eval_accuracy": 0.05845380263984915, "eval_loss": 2.3046875, "eval_runtime": 4.3011, "eval_samples_per_second": 19.995, "eval_steps_per_second": 1.395, "step": 79 }, { "epoch": 2.76, "learning_rate": 9.67382461115986e-07, "loss": 1.3232, "step": 80 }, { "epoch": 2.76, "eval_accuracy": 0.05836809325181418, "eval_loss": 2.302734375, "eval_runtime": 4.2978, "eval_samples_per_second": 20.01, "eval_steps_per_second": 1.396, "step": 80 }, { "epoch": 2.79, "learning_rate": 7.852024322579649e-07, "loss": 1.1228, "step": 81 }, { "epoch": 2.79, "eval_accuracy": 0.05835380835380835, "eval_loss": 2.302734375, "eval_runtime": 3.6063, "eval_samples_per_second": 23.847, "eval_steps_per_second": 1.664, "step": 81 }, { "epoch": 2.83, "learning_rate": 6.215557875383804e-07, "loss": 1.3524, "step": 82 }, { "epoch": 2.83, "eval_accuracy": 0.05839666304782584, "eval_loss": 2.302734375, "eval_runtime": 4.1961, "eval_samples_per_second": 20.495, "eval_steps_per_second": 1.43, "step": 82 }, { "epoch": 2.86, "learning_rate": 4.766558909615504e-07, "loss": 1.2042, "step": 83 }, { "epoch": 2.86, "eval_accuracy": 0.058339523455802525, "eval_loss": 2.302734375, "eval_runtime": 4.3056, "eval_samples_per_second": 19.974, "eval_steps_per_second": 1.394, "step": 83 }, { "epoch": 2.9, "learning_rate": 3.5069166434870014e-07, "loss": 1.3588, "step": 84 }, { "epoch": 2.9, "eval_accuracy": 0.058339523455802525, "eval_loss": 2.30078125, "eval_runtime": 4.1957, "eval_samples_per_second": 20.497, "eval_steps_per_second": 1.43, "step": 84 }, { "epoch": 2.93, "learning_rate": 2.438273410199598e-07, "loss": 1.2982, "step": 85 }, { "epoch": 2.93, "eval_accuracy": 0.058425232843837493, "eval_loss": 2.30078125, "eval_runtime": 3.898, "eval_samples_per_second": 22.062, "eval_steps_per_second": 1.539, "step": 85 }, { "epoch": 2.97, "learning_rate": 1.5620225166544155e-07, "loss": 1.4373, "step": 86 }, { "epoch": 2.97, "eval_accuracy": 0.05845380263984915, "eval_loss": 2.30078125, "eval_runtime": 4.3019, "eval_samples_per_second": 19.991, "eval_steps_per_second": 1.395, "step": 86 }, { "epoch": 3.0, "learning_rate": 8.793064268460605e-08, "loss": 1.3562, "step": 87 }, { "epoch": 3.0, "eval_accuracy": 0.05841094794583167, "eval_loss": 2.30078125, "eval_runtime": 4.2946, "eval_samples_per_second": 20.025, "eval_steps_per_second": 1.397, "step": 87 }, { "epoch": 3.0, "step": 87, "total_flos": 4783591391232.0, "train_loss": 1.8617539679867097, "train_runtime": 1017.4729, "train_samples_per_second": 1.353, "train_steps_per_second": 0.086 } ], "max_steps": 87, "num_train_epochs": 3, "total_flos": 4783591391232.0, "trial_name": null, "trial_params": null }