{ "best_metric": 0.5172018348623854, "best_model_checkpoint": "outputs/soft_prompt/deberta-v2-xlarge/sst2/checkpoint-3400", "epoch": 3.0, "global_step": 25257, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "eval_accuracy": 0.5091743119266054, "eval_loss": 2.831559658050537, "eval_runtime": 12.4014, "eval_samples_per_second": 70.315, "eval_steps_per_second": 8.789, "step": 200 }, { "epoch": 0.05, "eval_accuracy": 0.4908256880733945, "eval_loss": 0.7252020835876465, "eval_runtime": 12.5411, "eval_samples_per_second": 69.532, "eval_steps_per_second": 8.691, "step": 400 }, { "epoch": 0.06, "learning_rate": 0.0294061052381518, "loss": 2.7746, "step": 500 }, { "epoch": 0.07, "eval_accuracy": 0.5091743119266054, "eval_loss": 6.318162441253662, "eval_runtime": 12.2524, "eval_samples_per_second": 71.17, "eval_steps_per_second": 8.896, "step": 600 }, { "epoch": 0.1, "eval_accuracy": 0.5091743119266054, "eval_loss": 1.9581830501556396, "eval_runtime": 13.2254, "eval_samples_per_second": 65.934, "eval_steps_per_second": 8.242, "step": 800 }, { "epoch": 0.12, "learning_rate": 0.0288122104763036, "loss": 2.8668, "step": 1000 }, { "epoch": 0.12, "eval_accuracy": 0.4908256880733945, "eval_loss": 0.97942715883255, "eval_runtime": 12.2204, "eval_samples_per_second": 71.356, "eval_steps_per_second": 8.92, "step": 1000 }, { "epoch": 0.14, "eval_accuracy": 0.5091743119266054, "eval_loss": 9.121758460998535, "eval_runtime": 12.6821, "eval_samples_per_second": 68.758, "eval_steps_per_second": 8.595, "step": 1200 }, { "epoch": 0.17, "eval_accuracy": 0.4908256880733945, "eval_loss": 2.1010525226593018, "eval_runtime": 13.3142, "eval_samples_per_second": 65.494, "eval_steps_per_second": 8.187, "step": 1400 }, { "epoch": 0.18, "learning_rate": 0.0282183157144554, "loss": 3.0595, "step": 1500 }, { "epoch": 0.19, "eval_accuracy": 0.4908256880733945, "eval_loss": 2.240478754043579, "eval_runtime": 11.7539, "eval_samples_per_second": 74.188, "eval_steps_per_second": 9.274, "step": 1600 }, { "epoch": 0.21, "eval_accuracy": 0.4908256880733945, "eval_loss": 2.822434663772583, "eval_runtime": 11.937, "eval_samples_per_second": 73.05, "eval_steps_per_second": 9.131, "step": 1800 }, { "epoch": 0.24, "learning_rate": 0.027624420952607195, "loss": 2.7406, "step": 2000 }, { "epoch": 0.24, "eval_accuracy": 0.5091743119266054, "eval_loss": 2.2581450939178467, "eval_runtime": 12.944, "eval_samples_per_second": 67.367, "eval_steps_per_second": 8.421, "step": 2000 }, { "epoch": 0.26, "eval_accuracy": 0.5091743119266054, "eval_loss": 5.0679168701171875, "eval_runtime": 12.8993, "eval_samples_per_second": 67.6, "eval_steps_per_second": 8.45, "step": 2200 }, { "epoch": 0.29, "eval_accuracy": 0.5091743119266054, "eval_loss": 4.079117774963379, "eval_runtime": 12.6438, "eval_samples_per_second": 68.966, "eval_steps_per_second": 8.621, "step": 2400 }, { "epoch": 0.3, "learning_rate": 0.027030526190758998, "loss": 2.341, "step": 2500 }, { "epoch": 0.31, "eval_accuracy": 0.5091743119266054, "eval_loss": 6.498974800109863, "eval_runtime": 11.7875, "eval_samples_per_second": 73.977, "eval_steps_per_second": 9.247, "step": 2600 }, { "epoch": 0.33, "eval_accuracy": 0.5091743119266054, "eval_loss": 2.2673330307006836, "eval_runtime": 12.4045, "eval_samples_per_second": 70.297, "eval_steps_per_second": 8.787, "step": 2800 }, { "epoch": 0.36, "learning_rate": 0.026436631428910798, "loss": 2.5017, "step": 3000 }, { "epoch": 0.36, "eval_accuracy": 0.4908256880733945, "eval_loss": 2.775005340576172, "eval_runtime": 12.8935, "eval_samples_per_second": 67.631, "eval_steps_per_second": 8.454, "step": 3000 }, { "epoch": 0.38, "eval_accuracy": 0.5126146788990825, "eval_loss": 0.8905919790267944, "eval_runtime": 11.7168, "eval_samples_per_second": 74.423, "eval_steps_per_second": 9.303, "step": 3200 }, { "epoch": 0.4, "eval_accuracy": 0.5172018348623854, "eval_loss": 3.1630539894104004, "eval_runtime": 12.3677, "eval_samples_per_second": 70.506, "eval_steps_per_second": 8.813, "step": 3400 }, { "epoch": 0.42, "learning_rate": 0.025842736667062594, "loss": 2.587, "step": 3500 }, { "epoch": 0.43, "eval_accuracy": 0.4908256880733945, "eval_loss": 1.8831442594528198, "eval_runtime": 12.7491, "eval_samples_per_second": 68.397, "eval_steps_per_second": 8.55, "step": 3600 }, { "epoch": 0.45, "eval_accuracy": 0.5091743119266054, "eval_loss": 1.40650475025177, "eval_runtime": 11.8781, "eval_samples_per_second": 73.412, "eval_steps_per_second": 9.177, "step": 3800 }, { "epoch": 0.48, "learning_rate": 0.025248841905214398, "loss": 2.2272, "step": 4000 }, { "epoch": 0.48, "eval_accuracy": 0.5091743119266054, "eval_loss": 5.7484235763549805, "eval_runtime": 12.2084, "eval_samples_per_second": 71.426, "eval_steps_per_second": 8.928, "step": 4000 }, { "epoch": 0.5, "eval_accuracy": 0.4908256880733945, "eval_loss": 3.051417589187622, "eval_runtime": 13.0485, "eval_samples_per_second": 66.828, "eval_steps_per_second": 8.353, "step": 4200 }, { "epoch": 0.52, "eval_accuracy": 0.5091743119266054, "eval_loss": 1.1735517978668213, "eval_runtime": 12.52, "eval_samples_per_second": 69.648, "eval_steps_per_second": 8.706, "step": 4400 }, { "epoch": 0.53, "learning_rate": 0.024654947143366194, "loss": 2.474, "step": 4500 }, { "epoch": 0.55, "eval_accuracy": 0.5091743119266054, "eval_loss": 3.5813305377960205, "eval_runtime": 12.4532, "eval_samples_per_second": 70.022, "eval_steps_per_second": 8.753, "step": 4600 }, { "epoch": 0.57, "eval_accuracy": 0.4908256880733945, "eval_loss": 3.0905096530914307, "eval_runtime": 12.1764, "eval_samples_per_second": 71.614, "eval_steps_per_second": 8.952, "step": 4800 }, { "epoch": 0.59, "learning_rate": 0.024061052381517994, "loss": 2.6958, "step": 5000 }, { "epoch": 0.59, "eval_accuracy": 0.5091743119266054, "eval_loss": 1.6613879203796387, "eval_runtime": 13.0709, "eval_samples_per_second": 66.713, "eval_steps_per_second": 8.339, "step": 5000 }, { "epoch": 0.62, "eval_accuracy": 0.5091743119266054, "eval_loss": 0.9912046194076538, "eval_runtime": 11.0684, "eval_samples_per_second": 78.783, "eval_steps_per_second": 9.848, "step": 5200 }, { "epoch": 0.64, "eval_accuracy": 0.5091743119266054, "eval_loss": 2.708740711212158, "eval_runtime": 12.3098, "eval_samples_per_second": 70.838, "eval_steps_per_second": 8.855, "step": 5400 }, { "epoch": 0.65, "learning_rate": 0.023467157619669794, "loss": 2.4571, "step": 5500 }, { "epoch": 0.67, "eval_accuracy": 0.4908256880733945, "eval_loss": 0.9453651309013367, "eval_runtime": 12.5807, "eval_samples_per_second": 69.313, "eval_steps_per_second": 8.664, "step": 5600 }, { "epoch": 0.69, "eval_accuracy": 0.4908256880733945, "eval_loss": 0.9460629820823669, "eval_runtime": 12.3033, "eval_samples_per_second": 70.875, "eval_steps_per_second": 8.859, "step": 5800 }, { "epoch": 0.71, "learning_rate": 0.022873262857821593, "loss": 2.4046, "step": 6000 }, { "epoch": 0.71, "eval_accuracy": 0.5034403669724771, "eval_loss": 0.7613060474395752, "eval_runtime": 12.922, "eval_samples_per_second": 67.482, "eval_steps_per_second": 8.435, "step": 6000 }, { "epoch": 0.74, "eval_accuracy": 0.5091743119266054, "eval_loss": 3.473540782928467, "eval_runtime": 12.5608, "eval_samples_per_second": 69.423, "eval_steps_per_second": 8.678, "step": 6200 }, { "epoch": 0.76, "eval_accuracy": 0.4908256880733945, "eval_loss": 0.8309389352798462, "eval_runtime": 12.3028, "eval_samples_per_second": 70.878, "eval_steps_per_second": 8.86, "step": 6400 }, { "epoch": 0.77, "learning_rate": 0.022279368095973393, "loss": 1.9778, "step": 6500 }, { "epoch": 0.78, "eval_accuracy": 0.5091743119266054, "eval_loss": 2.345905303955078, "eval_runtime": 12.6993, "eval_samples_per_second": 68.665, "eval_steps_per_second": 8.583, "step": 6600 }, { "epoch": 0.81, "eval_accuracy": 0.5091743119266054, "eval_loss": 1.992404818534851, "eval_runtime": 11.8953, "eval_samples_per_second": 73.306, "eval_steps_per_second": 9.163, "step": 6800 }, { "epoch": 0.83, "learning_rate": 0.02168547333412519, "loss": 1.9132, "step": 7000 }, { "epoch": 0.83, "eval_accuracy": 0.48394495412844035, "eval_loss": 0.7653124928474426, "eval_runtime": 12.5926, "eval_samples_per_second": 69.247, "eval_steps_per_second": 8.656, "step": 7000 }, { "epoch": 0.86, "eval_accuracy": 0.4908256880733945, "eval_loss": 1.6025058031082153, "eval_runtime": 12.8694, "eval_samples_per_second": 67.757, "eval_steps_per_second": 8.47, "step": 7200 }, { "epoch": 0.88, "eval_accuracy": 0.5091743119266054, "eval_loss": 2.3198583126068115, "eval_runtime": 12.2526, "eval_samples_per_second": 71.168, "eval_steps_per_second": 8.896, "step": 7400 }, { "epoch": 0.89, "learning_rate": 0.021091578572276993, "loss": 2.1041, "step": 7500 }, { "epoch": 0.9, "eval_accuracy": 0.5091743119266054, "eval_loss": 0.9014425277709961, "eval_runtime": 12.1923, "eval_samples_per_second": 71.52, "eval_steps_per_second": 8.94, "step": 7600 }, { "epoch": 0.93, "eval_accuracy": 0.4908256880733945, "eval_loss": 1.1414848566055298, "eval_runtime": 12.2534, "eval_samples_per_second": 71.164, "eval_steps_per_second": 8.896, "step": 7800 }, { "epoch": 0.95, "learning_rate": 0.020497683810428793, "loss": 2.2236, "step": 8000 }, { "epoch": 0.95, "eval_accuracy": 0.4908256880733945, "eval_loss": 0.9049626588821411, "eval_runtime": 12.5976, "eval_samples_per_second": 69.219, "eval_steps_per_second": 8.652, "step": 8000 }, { "epoch": 0.97, "eval_accuracy": 0.4908256880733945, "eval_loss": 2.53812575340271, "eval_runtime": 12.192, "eval_samples_per_second": 71.522, "eval_steps_per_second": 8.94, "step": 8200 }, { "epoch": 1.0, "eval_accuracy": 0.4908256880733945, "eval_loss": 3.9090523719787598, "eval_runtime": 12.1436, "eval_samples_per_second": 71.807, "eval_steps_per_second": 8.976, "step": 8400 }, { "epoch": 1.01, "learning_rate": 0.01990378904858059, "loss": 1.9257, "step": 8500 }, { "epoch": 1.02, "eval_accuracy": 0.44380733944954126, "eval_loss": 1.3826260566711426, "eval_runtime": 12.394, "eval_samples_per_second": 70.357, "eval_steps_per_second": 8.795, "step": 8600 }, { "epoch": 1.05, "eval_accuracy": 0.5091743119266054, "eval_loss": 0.7106401920318604, "eval_runtime": 12.254, "eval_samples_per_second": 71.161, "eval_steps_per_second": 8.895, "step": 8800 }, { "epoch": 1.07, "learning_rate": 0.019309894286732392, "loss": 1.9533, "step": 9000 }, { "epoch": 1.07, "eval_accuracy": 0.5091743119266054, "eval_loss": 1.6487476825714111, "eval_runtime": 13.5392, "eval_samples_per_second": 64.406, "eval_steps_per_second": 8.051, "step": 9000 }, { "epoch": 1.09, "eval_accuracy": 0.4908256880733945, "eval_loss": 1.4527719020843506, "eval_runtime": 12.2271, "eval_samples_per_second": 71.317, "eval_steps_per_second": 8.915, "step": 9200 }, { "epoch": 1.12, "eval_accuracy": 0.4908256880733945, "eval_loss": 1.1699163913726807, "eval_runtime": 12.1388, "eval_samples_per_second": 71.836, "eval_steps_per_second": 8.979, "step": 9400 }, { "epoch": 1.13, "learning_rate": 0.01871599952488419, "loss": 1.7969, "step": 9500 }, { "epoch": 1.14, "eval_accuracy": 0.4908256880733945, "eval_loss": 1.155882716178894, "eval_runtime": 12.1707, "eval_samples_per_second": 71.648, "eval_steps_per_second": 8.956, "step": 9600 }, { "epoch": 1.16, "eval_accuracy": 0.4908256880733945, "eval_loss": 1.0576764345169067, "eval_runtime": 12.3024, "eval_samples_per_second": 70.88, "eval_steps_per_second": 8.86, "step": 9800 }, { "epoch": 1.19, "learning_rate": 0.01812210476303599, "loss": 1.8048, "step": 10000 }, { "epoch": 1.19, "eval_accuracy": 0.4908256880733945, "eval_loss": 1.1585994958877563, "eval_runtime": 12.8412, "eval_samples_per_second": 67.906, "eval_steps_per_second": 8.488, "step": 10000 }, { "epoch": 1.21, "eval_accuracy": 0.4908256880733945, "eval_loss": 0.7068227529525757, "eval_runtime": 12.4262, "eval_samples_per_second": 70.174, "eval_steps_per_second": 8.772, "step": 10200 }, { "epoch": 1.24, "eval_accuracy": 0.4908256880733945, "eval_loss": 0.7638933658599854, "eval_runtime": 12.0016, "eval_samples_per_second": 72.657, "eval_steps_per_second": 9.082, "step": 10400 }, { "epoch": 1.25, "learning_rate": 0.01752821000118779, "loss": 2.0729, "step": 10500 }, { "epoch": 1.26, "eval_accuracy": 0.5091743119266054, "eval_loss": 1.188310146331787, "eval_runtime": 13.0443, "eval_samples_per_second": 66.849, "eval_steps_per_second": 8.356, "step": 10600 }, { "epoch": 1.28, "eval_accuracy": 0.5091743119266054, "eval_loss": 0.6973594427108765, "eval_runtime": 11.5707, "eval_samples_per_second": 75.363, "eval_steps_per_second": 9.42, "step": 10800 }, { "epoch": 1.31, "learning_rate": 0.016934315239339588, "loss": 1.7558, "step": 11000 }, { "epoch": 1.31, "eval_accuracy": 0.4908256880733945, "eval_loss": 1.8548436164855957, "eval_runtime": 13.0206, "eval_samples_per_second": 66.971, "eval_steps_per_second": 8.371, "step": 11000 }, { "epoch": 1.33, "eval_accuracy": 0.4908256880733945, "eval_loss": 0.7331790924072266, "eval_runtime": 12.107, "eval_samples_per_second": 72.024, "eval_steps_per_second": 9.003, "step": 11200 }, { "epoch": 1.35, "eval_accuracy": 0.5091743119266054, "eval_loss": 1.7065454721450806, "eval_runtime": 12.0677, "eval_samples_per_second": 72.259, "eval_steps_per_second": 9.032, "step": 11400 }, { "epoch": 1.37, "learning_rate": 0.016340420477491388, "loss": 1.931, "step": 11500 }, { "epoch": 1.38, "eval_accuracy": 0.5091743119266054, "eval_loss": 3.5782742500305176, "eval_runtime": 12.3183, "eval_samples_per_second": 70.789, "eval_steps_per_second": 8.849, "step": 11600 }, { "epoch": 1.4, "eval_accuracy": 0.4908256880733945, "eval_loss": 2.2677643299102783, "eval_runtime": 11.6938, "eval_samples_per_second": 74.569, "eval_steps_per_second": 9.321, "step": 11800 }, { "epoch": 1.43, "learning_rate": 0.015746525715643188, "loss": 1.739, "step": 12000 }, { "epoch": 1.43, "eval_accuracy": 0.5091743119266054, "eval_loss": 1.367167353630066, "eval_runtime": 12.6396, "eval_samples_per_second": 68.99, "eval_steps_per_second": 8.624, "step": 12000 }, { "epoch": 1.45, "eval_accuracy": 0.4908256880733945, "eval_loss": 1.4737409353256226, "eval_runtime": 12.3657, "eval_samples_per_second": 70.517, "eval_steps_per_second": 8.815, "step": 12200 }, { "epoch": 1.47, "eval_accuracy": 0.4908256880733945, "eval_loss": 0.8329254984855652, "eval_runtime": 12.1625, "eval_samples_per_second": 71.696, "eval_steps_per_second": 8.962, "step": 12400 }, { "epoch": 1.48, "learning_rate": 0.015152630953794988, "loss": 1.655, "step": 12500 }, { "epoch": 1.5, "eval_accuracy": 0.5091743119266054, "eval_loss": 2.237131118774414, "eval_runtime": 12.2197, "eval_samples_per_second": 71.36, "eval_steps_per_second": 8.92, "step": 12600 }, { "epoch": 1.52, "eval_accuracy": 0.4908256880733945, "eval_loss": 3.846944570541382, "eval_runtime": 12.0155, "eval_samples_per_second": 72.573, "eval_steps_per_second": 9.072, "step": 12800 }, { "epoch": 1.54, "learning_rate": 0.014558736191946788, "loss": 1.7284, "step": 13000 }, { "epoch": 1.54, "eval_accuracy": 0.5091743119266054, "eval_loss": 1.403191328048706, "eval_runtime": 12.6645, "eval_samples_per_second": 68.854, "eval_steps_per_second": 8.607, "step": 13000 }, { "epoch": 1.57, "eval_accuracy": 0.5091743119266054, "eval_loss": 1.1265949010849, "eval_runtime": 12.3046, "eval_samples_per_second": 70.868, "eval_steps_per_second": 8.859, "step": 13200 }, { "epoch": 1.59, "eval_accuracy": 0.4908256880733945, "eval_loss": 1.9130467176437378, "eval_runtime": 12.0511, "eval_samples_per_second": 72.359, "eval_steps_per_second": 9.045, "step": 13400 }, { "epoch": 1.6, "learning_rate": 0.013964841430098586, "loss": 1.5742, "step": 13500 }, { "epoch": 1.62, "eval_accuracy": 0.5091743119266054, "eval_loss": 0.832761824131012, "eval_runtime": 12.2374, "eval_samples_per_second": 71.257, "eval_steps_per_second": 8.907, "step": 13600 }, { "epoch": 1.64, "eval_accuracy": 0.5091743119266054, "eval_loss": 3.8501062393188477, "eval_runtime": 12.573, "eval_samples_per_second": 69.355, "eval_steps_per_second": 8.669, "step": 13800 }, { "epoch": 1.66, "learning_rate": 0.013370946668250385, "loss": 1.7039, "step": 14000 }, { "epoch": 1.66, "eval_accuracy": 0.4908256880733945, "eval_loss": 1.2914493083953857, "eval_runtime": 12.0326, "eval_samples_per_second": 72.47, "eval_steps_per_second": 9.059, "step": 14000 }, { "epoch": 1.69, "eval_accuracy": 0.4908256880733945, "eval_loss": 1.136923909187317, "eval_runtime": 11.7256, "eval_samples_per_second": 74.367, "eval_steps_per_second": 9.296, "step": 14200 }, { "epoch": 1.71, "eval_accuracy": 0.5091743119266054, "eval_loss": 0.7574475407600403, "eval_runtime": 12.2584, "eval_samples_per_second": 71.135, "eval_steps_per_second": 8.892, "step": 14400 }, { "epoch": 1.72, "learning_rate": 0.012777051906402184, "loss": 1.4352, "step": 14500 }, { "epoch": 1.73, "eval_accuracy": 0.5091743119266054, "eval_loss": 0.7623356580734253, "eval_runtime": 12.706, "eval_samples_per_second": 68.629, "eval_steps_per_second": 8.579, "step": 14600 }, { "epoch": 1.76, "eval_accuracy": 0.5091743119266054, "eval_loss": 1.6579828262329102, "eval_runtime": 12.16, "eval_samples_per_second": 71.71, "eval_steps_per_second": 8.964, "step": 14800 }, { "epoch": 1.78, "learning_rate": 0.012183157144553985, "loss": 1.6328, "step": 15000 }, { "epoch": 1.78, "eval_accuracy": 0.481651376146789, "eval_loss": 0.693511426448822, "eval_runtime": 12.7779, "eval_samples_per_second": 68.243, "eval_steps_per_second": 8.53, "step": 15000 }, { "epoch": 1.81, "eval_accuracy": 0.4908256880733945, "eval_loss": 0.6989684700965881, "eval_runtime": 12.0393, "eval_samples_per_second": 72.43, "eval_steps_per_second": 9.054, "step": 15200 }, { "epoch": 1.83, "eval_accuracy": 0.5091743119266054, "eval_loss": 0.7303033471107483, "eval_runtime": 12.3696, "eval_samples_per_second": 70.496, "eval_steps_per_second": 8.812, "step": 15400 }, { "epoch": 1.84, "learning_rate": 0.011589262382705785, "loss": 1.4498, "step": 15500 }, { "epoch": 1.85, "eval_accuracy": 0.4908256880733945, "eval_loss": 1.675624132156372, "eval_runtime": 12.6317, "eval_samples_per_second": 69.032, "eval_steps_per_second": 8.629, "step": 15600 }, { "epoch": 1.88, "eval_accuracy": 0.4908256880733945, "eval_loss": 1.0083491802215576, "eval_runtime": 12.349, "eval_samples_per_second": 70.613, "eval_steps_per_second": 8.827, "step": 15800 }, { "epoch": 1.9, "learning_rate": 0.010995367620857583, "loss": 1.4022, "step": 16000 }, { "epoch": 1.9, "eval_accuracy": 0.4908256880733945, "eval_loss": 0.7790195345878601, "eval_runtime": 11.9659, "eval_samples_per_second": 72.874, "eval_steps_per_second": 9.109, "step": 16000 }, { "epoch": 1.92, "eval_accuracy": 0.5091743119266054, "eval_loss": 1.296112060546875, "eval_runtime": 13.0796, "eval_samples_per_second": 66.669, "eval_steps_per_second": 8.334, "step": 16200 }, { "epoch": 1.95, "eval_accuracy": 0.4908256880733945, "eval_loss": 0.7208316326141357, "eval_runtime": 12.4652, "eval_samples_per_second": 69.955, "eval_steps_per_second": 8.744, "step": 16400 }, { "epoch": 1.96, "learning_rate": 0.010401472859009383, "loss": 1.4503, "step": 16500 }, { "epoch": 1.97, "eval_accuracy": 0.4908256880733945, "eval_loss": 0.8011333346366882, "eval_runtime": 12.77, "eval_samples_per_second": 68.285, "eval_steps_per_second": 8.536, "step": 16600 }, { "epoch": 2.0, "eval_accuracy": 0.4908256880733945, "eval_loss": 0.8194194436073303, "eval_runtime": 12.2506, "eval_samples_per_second": 71.18, "eval_steps_per_second": 8.898, "step": 16800 }, { "epoch": 2.02, "learning_rate": 0.009807578097161183, "loss": 1.3401, "step": 17000 }, { "epoch": 2.02, "eval_accuracy": 0.4908256880733945, "eval_loss": 0.9209619760513306, "eval_runtime": 12.7245, "eval_samples_per_second": 68.529, "eval_steps_per_second": 8.566, "step": 17000 }, { "epoch": 2.04, "eval_accuracy": 0.4954128440366973, "eval_loss": 2.172947645187378, "eval_runtime": 13.3907, "eval_samples_per_second": 65.12, "eval_steps_per_second": 8.14, "step": 17200 }, { "epoch": 2.07, "eval_accuracy": 0.4908256880733945, "eval_loss": 2.8217873573303223, "eval_runtime": 12.0414, "eval_samples_per_second": 72.417, "eval_steps_per_second": 9.052, "step": 17400 }, { "epoch": 2.08, "learning_rate": 0.009213683335312983, "loss": 1.515, "step": 17500 }, { "epoch": 2.09, "eval_accuracy": 0.5091743119266054, "eval_loss": 1.8880350589752197, "eval_runtime": 12.7892, "eval_samples_per_second": 68.183, "eval_steps_per_second": 8.523, "step": 17600 }, { "epoch": 2.11, "eval_accuracy": 0.4908256880733945, "eval_loss": 0.8059159517288208, "eval_runtime": 12.8563, "eval_samples_per_second": 67.827, "eval_steps_per_second": 8.478, "step": 17800 }, { "epoch": 2.14, "learning_rate": 0.008619788573464782, "loss": 1.2519, "step": 18000 }, { "epoch": 2.14, "eval_accuracy": 0.5091743119266054, "eval_loss": 1.1668144464492798, "eval_runtime": 12.0845, "eval_samples_per_second": 72.158, "eval_steps_per_second": 9.02, "step": 18000 }, { "epoch": 2.16, "eval_accuracy": 0.5091743119266054, "eval_loss": 0.8440486192703247, "eval_runtime": 11.5453, "eval_samples_per_second": 75.529, "eval_steps_per_second": 9.441, "step": 18200 }, { "epoch": 2.19, "eval_accuracy": 0.5091743119266054, "eval_loss": 1.482177972793579, "eval_runtime": 11.5533, "eval_samples_per_second": 75.476, "eval_steps_per_second": 9.435, "step": 18400 }, { "epoch": 2.2, "learning_rate": 0.008025893811616582, "loss": 1.2221, "step": 18500 }, { "epoch": 2.21, "eval_accuracy": 0.5091743119266054, "eval_loss": 0.6977333426475525, "eval_runtime": 12.3902, "eval_samples_per_second": 70.378, "eval_steps_per_second": 8.797, "step": 18600 }, { "epoch": 2.23, "eval_accuracy": 0.5091743119266054, "eval_loss": 1.3418121337890625, "eval_runtime": 12.4195, "eval_samples_per_second": 70.212, "eval_steps_per_second": 8.776, "step": 18800 }, { "epoch": 2.26, "learning_rate": 0.00743199904976838, "loss": 1.1201, "step": 19000 }, { "epoch": 2.26, "eval_accuracy": 0.4908256880733945, "eval_loss": 0.7915144562721252, "eval_runtime": 12.717, "eval_samples_per_second": 68.57, "eval_steps_per_second": 8.571, "step": 19000 }, { "epoch": 2.28, "eval_accuracy": 0.5091743119266054, "eval_loss": 0.961918830871582, "eval_runtime": 13.173, "eval_samples_per_second": 66.196, "eval_steps_per_second": 8.275, "step": 19200 }, { "epoch": 2.3, "eval_accuracy": 0.4908256880733945, "eval_loss": 1.220428228378296, "eval_runtime": 12.2958, "eval_samples_per_second": 70.918, "eval_steps_per_second": 8.865, "step": 19400 }, { "epoch": 2.32, "learning_rate": 0.006838104287920181, "loss": 1.0869, "step": 19500 }, { "epoch": 2.33, "eval_accuracy": 0.5091743119266054, "eval_loss": 1.6541168689727783, "eval_runtime": 12.5954, "eval_samples_per_second": 69.232, "eval_steps_per_second": 8.654, "step": 19600 }, { "epoch": 2.35, "eval_accuracy": 0.5091743119266054, "eval_loss": 2.7402684688568115, "eval_runtime": 12.7516, "eval_samples_per_second": 68.383, "eval_steps_per_second": 8.548, "step": 19800 }, { "epoch": 2.38, "learning_rate": 0.00624420952607198, "loss": 1.0804, "step": 20000 }, { "epoch": 2.38, "eval_accuracy": 0.4908256880733945, "eval_loss": 1.2037415504455566, "eval_runtime": 13.138, "eval_samples_per_second": 66.372, "eval_steps_per_second": 8.297, "step": 20000 }, { "epoch": 2.4, "eval_accuracy": 0.5091743119266054, "eval_loss": 0.7337152361869812, "eval_runtime": 11.6201, "eval_samples_per_second": 75.043, "eval_steps_per_second": 9.38, "step": 20200 }, { "epoch": 2.42, "eval_accuracy": 0.5091743119266054, "eval_loss": 0.8853695392608643, "eval_runtime": 13.0127, "eval_samples_per_second": 67.012, "eval_steps_per_second": 8.376, "step": 20400 }, { "epoch": 2.43, "learning_rate": 0.005650314764223779, "loss": 1.0025, "step": 20500 }, { "epoch": 2.45, "eval_accuracy": 0.5091743119266054, "eval_loss": 0.7113233804702759, "eval_runtime": 12.2168, "eval_samples_per_second": 71.377, "eval_steps_per_second": 8.922, "step": 20600 }, { "epoch": 2.47, "eval_accuracy": 0.5091743119266054, "eval_loss": 1.0583016872406006, "eval_runtime": 12.0017, "eval_samples_per_second": 72.657, "eval_steps_per_second": 9.082, "step": 20800 }, { "epoch": 2.49, "learning_rate": 0.005056420002375579, "loss": 0.9856, "step": 21000 }, { "epoch": 2.49, "eval_accuracy": 0.5091743119266054, "eval_loss": 0.7113476395606995, "eval_runtime": 12.9697, "eval_samples_per_second": 67.234, "eval_steps_per_second": 8.404, "step": 21000 }, { "epoch": 2.52, "eval_accuracy": 0.4908256880733945, "eval_loss": 0.7482680678367615, "eval_runtime": 12.7958, "eval_samples_per_second": 68.147, "eval_steps_per_second": 8.518, "step": 21200 }, { "epoch": 2.54, "eval_accuracy": 0.5091743119266054, "eval_loss": 0.6966097950935364, "eval_runtime": 12.2964, "eval_samples_per_second": 70.915, "eval_steps_per_second": 8.864, "step": 21400 }, { "epoch": 2.55, "learning_rate": 0.004462525240527379, "loss": 1.0364, "step": 21500 }, { "epoch": 2.57, "eval_accuracy": 0.4908256880733945, "eval_loss": 1.0607110261917114, "eval_runtime": 12.067, "eval_samples_per_second": 72.263, "eval_steps_per_second": 9.033, "step": 21600 }, { "epoch": 2.59, "eval_accuracy": 0.5091743119266054, "eval_loss": 1.1381345987319946, "eval_runtime": 12.2151, "eval_samples_per_second": 71.387, "eval_steps_per_second": 8.923, "step": 21800 }, { "epoch": 2.61, "learning_rate": 0.003868630478679178, "loss": 0.9683, "step": 22000 }, { "epoch": 2.61, "eval_accuracy": 0.4908256880733945, "eval_loss": 0.7138826847076416, "eval_runtime": 12.7162, "eval_samples_per_second": 68.574, "eval_steps_per_second": 8.572, "step": 22000 }, { "epoch": 2.64, "eval_accuracy": 0.4908256880733945, "eval_loss": 1.6361145973205566, "eval_runtime": 11.8698, "eval_samples_per_second": 73.464, "eval_steps_per_second": 9.183, "step": 22200 }, { "epoch": 2.66, "eval_accuracy": 0.5091743119266054, "eval_loss": 1.1421782970428467, "eval_runtime": 11.321, "eval_samples_per_second": 77.025, "eval_steps_per_second": 9.628, "step": 22400 }, { "epoch": 2.67, "learning_rate": 0.0032747357168309774, "loss": 0.908, "step": 22500 }, { "epoch": 2.68, "eval_accuracy": 0.5091743119266054, "eval_loss": 0.7317955493927002, "eval_runtime": 12.2644, "eval_samples_per_second": 71.1, "eval_steps_per_second": 8.888, "step": 22600 }, { "epoch": 2.71, "eval_accuracy": 0.5091743119266054, "eval_loss": 0.6962340474128723, "eval_runtime": 11.822, "eval_samples_per_second": 73.761, "eval_steps_per_second": 9.22, "step": 22800 }, { "epoch": 2.73, "learning_rate": 0.0026808409549827768, "loss": 0.8761, "step": 23000 }, { "epoch": 2.73, "eval_accuracy": 0.5091743119266054, "eval_loss": 0.9568504691123962, "eval_runtime": 12.3037, "eval_samples_per_second": 70.873, "eval_steps_per_second": 8.859, "step": 23000 }, { "epoch": 2.76, "eval_accuracy": 0.5091743119266054, "eval_loss": 0.9744265675544739, "eval_runtime": 11.9452, "eval_samples_per_second": 73.0, "eval_steps_per_second": 9.125, "step": 23200 }, { "epoch": 2.78, "eval_accuracy": 0.4908256880733945, "eval_loss": 1.091610312461853, "eval_runtime": 12.8562, "eval_samples_per_second": 67.827, "eval_steps_per_second": 8.478, "step": 23400 }, { "epoch": 2.79, "learning_rate": 0.0020869461931345766, "loss": 0.8209, "step": 23500 }, { "epoch": 2.8, "eval_accuracy": 0.4908256880733945, "eval_loss": 0.8106526136398315, "eval_runtime": 11.7827, "eval_samples_per_second": 74.007, "eval_steps_per_second": 9.251, "step": 23600 }, { "epoch": 2.83, "eval_accuracy": 0.5091743119266054, "eval_loss": 0.7210954427719116, "eval_runtime": 11.3303, "eval_samples_per_second": 76.962, "eval_steps_per_second": 9.62, "step": 23800 }, { "epoch": 2.85, "learning_rate": 0.001493051431286376, "loss": 0.8008, "step": 24000 }, { "epoch": 2.85, "eval_accuracy": 0.49770642201834864, "eval_loss": 0.6930689811706543, "eval_runtime": 12.4553, "eval_samples_per_second": 70.01, "eval_steps_per_second": 8.751, "step": 24000 }, { "epoch": 2.87, "eval_accuracy": 0.4908256880733945, "eval_loss": 0.7184925675392151, "eval_runtime": 12.3113, "eval_samples_per_second": 70.829, "eval_steps_per_second": 8.854, "step": 24200 }, { "epoch": 2.9, "eval_accuracy": 0.5091743119266054, "eval_loss": 0.729166567325592, "eval_runtime": 11.2952, "eval_samples_per_second": 77.201, "eval_steps_per_second": 9.65, "step": 24400 }, { "epoch": 2.91, "learning_rate": 0.0008991566694381756, "loss": 0.7738, "step": 24500 }, { "epoch": 2.92, "eval_accuracy": 0.5091743119266054, "eval_loss": 0.7178325057029724, "eval_runtime": 12.4374, "eval_samples_per_second": 70.111, "eval_steps_per_second": 8.764, "step": 24600 }, { "epoch": 2.95, "eval_accuracy": 0.5091743119266054, "eval_loss": 0.6961101293563843, "eval_runtime": 12.5193, "eval_samples_per_second": 69.652, "eval_steps_per_second": 8.707, "step": 24800 }, { "epoch": 2.97, "learning_rate": 0.00030526190758997505, "loss": 0.755, "step": 25000 }, { "epoch": 2.97, "eval_accuracy": 0.5091743119266054, "eval_loss": 0.7101095914840698, "eval_runtime": 12.5702, "eval_samples_per_second": 69.371, "eval_steps_per_second": 8.671, "step": 25000 }, { "epoch": 2.99, "eval_accuracy": 0.5091743119266054, "eval_loss": 0.7049440145492554, "eval_runtime": 11.2857, "eval_samples_per_second": 77.266, "eval_steps_per_second": 9.658, "step": 25200 }, { "epoch": 3.0, "step": 25257, "total_flos": 1.070008779996841e+17, "train_loss": 1.708119561667774, "train_runtime": 7791.0638, "train_samples_per_second": 25.933, "train_steps_per_second": 3.242 } ], "max_steps": 25257, "num_train_epochs": 3, "total_flos": 1.070008779996841e+17, "trial_name": null, "trial_params": null }