|
{ |
|
"best_metric": 0.5188751220703125, |
|
"best_model_checkpoint": "AlexWang99/byt5_add_2k/checkpoint-450", |
|
"epoch": 150.0, |
|
"eval_steps": 500, |
|
"global_step": 450, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 7.222967147827148, |
|
"eval_runtime": 10.6822, |
|
"eval_samples_per_second": 936.137, |
|
"eval_steps_per_second": 1.217, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 5.985061168670654, |
|
"eval_runtime": 10.9203, |
|
"eval_samples_per_second": 915.722, |
|
"eval_steps_per_second": 1.19, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 5.1600236892700195, |
|
"eval_runtime": 10.7228, |
|
"eval_samples_per_second": 932.591, |
|
"eval_steps_per_second": 1.212, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 4.493813514709473, |
|
"eval_runtime": 11.0064, |
|
"eval_samples_per_second": 908.565, |
|
"eval_steps_per_second": 1.181, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 3.8893628120422363, |
|
"eval_runtime": 10.8904, |
|
"eval_samples_per_second": 918.238, |
|
"eval_steps_per_second": 1.194, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 3.341691493988037, |
|
"eval_runtime": 10.8018, |
|
"eval_samples_per_second": 925.772, |
|
"eval_steps_per_second": 1.204, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 2.9026920795440674, |
|
"eval_runtime": 10.8887, |
|
"eval_samples_per_second": 918.384, |
|
"eval_steps_per_second": 1.194, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 2.606121063232422, |
|
"eval_runtime": 10.8106, |
|
"eval_samples_per_second": 925.018, |
|
"eval_steps_per_second": 1.203, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 2.402636766433716, |
|
"eval_runtime": 11.0832, |
|
"eval_samples_per_second": 902.265, |
|
"eval_steps_per_second": 1.173, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 2.1915464401245117, |
|
"eval_runtime": 10.8272, |
|
"eval_samples_per_second": 923.604, |
|
"eval_steps_per_second": 1.201, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 2.1069843769073486, |
|
"eval_runtime": 10.8305, |
|
"eval_samples_per_second": 923.32, |
|
"eval_steps_per_second": 1.2, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 2.0630228519439697, |
|
"eval_runtime": 10.9163, |
|
"eval_samples_per_second": 916.063, |
|
"eval_steps_per_second": 1.191, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_loss": 1.9474797248840332, |
|
"eval_runtime": 10.8302, |
|
"eval_samples_per_second": 923.345, |
|
"eval_steps_per_second": 1.2, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": 1.9065855741500854, |
|
"eval_runtime": 11.0817, |
|
"eval_samples_per_second": 902.388, |
|
"eval_steps_per_second": 1.173, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_loss": 1.8849406242370605, |
|
"eval_runtime": 10.8362, |
|
"eval_samples_per_second": 922.835, |
|
"eval_steps_per_second": 1.2, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": 1.7817472219467163, |
|
"eval_runtime": 10.8435, |
|
"eval_samples_per_second": 922.207, |
|
"eval_steps_per_second": 1.199, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_loss": 1.7877730131149292, |
|
"eval_runtime": 10.9315, |
|
"eval_samples_per_second": 914.788, |
|
"eval_steps_per_second": 1.189, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_loss": 1.7331796884536743, |
|
"eval_runtime": 10.8485, |
|
"eval_samples_per_second": 921.788, |
|
"eval_steps_per_second": 1.198, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_loss": 1.692647099494934, |
|
"eval_runtime": 11.0859, |
|
"eval_samples_per_second": 902.043, |
|
"eval_steps_per_second": 1.173, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_loss": 1.6727197170257568, |
|
"eval_runtime": 10.8467, |
|
"eval_samples_per_second": 921.936, |
|
"eval_steps_per_second": 1.199, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_loss": 1.66093909740448, |
|
"eval_runtime": 10.8432, |
|
"eval_samples_per_second": 922.235, |
|
"eval_steps_per_second": 1.199, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_loss": 1.6616569757461548, |
|
"eval_runtime": 10.9265, |
|
"eval_samples_per_second": 915.204, |
|
"eval_steps_per_second": 1.19, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_loss": 1.6537153720855713, |
|
"eval_runtime": 10.8532, |
|
"eval_samples_per_second": 921.386, |
|
"eval_steps_per_second": 1.198, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_loss": 1.645085334777832, |
|
"eval_runtime": 11.0411, |
|
"eval_samples_per_second": 905.709, |
|
"eval_steps_per_second": 1.177, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_loss": 1.6413605213165283, |
|
"eval_runtime": 10.7889, |
|
"eval_samples_per_second": 926.88, |
|
"eval_steps_per_second": 1.205, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_loss": 1.635939598083496, |
|
"eval_runtime": 10.8026, |
|
"eval_samples_per_second": 925.7, |
|
"eval_steps_per_second": 1.203, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_loss": 1.6321375370025635, |
|
"eval_runtime": 10.9127, |
|
"eval_samples_per_second": 916.367, |
|
"eval_steps_per_second": 1.191, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_loss": 1.6276355981826782, |
|
"eval_runtime": 10.8251, |
|
"eval_samples_per_second": 923.783, |
|
"eval_steps_per_second": 1.201, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_loss": 1.6232202053070068, |
|
"eval_runtime": 11.0643, |
|
"eval_samples_per_second": 903.805, |
|
"eval_steps_per_second": 1.175, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_loss": 1.618307113647461, |
|
"eval_runtime": 10.8447, |
|
"eval_samples_per_second": 922.106, |
|
"eval_steps_per_second": 1.199, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_loss": 1.612666368484497, |
|
"eval_runtime": 10.8371, |
|
"eval_samples_per_second": 922.759, |
|
"eval_steps_per_second": 1.2, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_loss": 1.6067509651184082, |
|
"eval_runtime": 10.9144, |
|
"eval_samples_per_second": 916.219, |
|
"eval_steps_per_second": 1.191, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_loss": 1.5994765758514404, |
|
"eval_runtime": 10.8587, |
|
"eval_samples_per_second": 920.924, |
|
"eval_steps_per_second": 1.197, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_loss": 1.590742588043213, |
|
"eval_runtime": 11.0839, |
|
"eval_samples_per_second": 902.211, |
|
"eval_steps_per_second": 1.173, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_loss": 1.5813907384872437, |
|
"eval_runtime": 10.8372, |
|
"eval_samples_per_second": 922.744, |
|
"eval_steps_per_second": 1.2, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_loss": 1.570920467376709, |
|
"eval_runtime": 10.8433, |
|
"eval_samples_per_second": 922.23, |
|
"eval_steps_per_second": 1.199, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_loss": 1.5595602989196777, |
|
"eval_runtime": 10.9304, |
|
"eval_samples_per_second": 914.879, |
|
"eval_steps_per_second": 1.189, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_loss": 1.5501924753189087, |
|
"eval_runtime": 10.8429, |
|
"eval_samples_per_second": 922.259, |
|
"eval_steps_per_second": 1.199, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_loss": 1.5431841611862183, |
|
"eval_runtime": 11.1003, |
|
"eval_samples_per_second": 900.873, |
|
"eval_steps_per_second": 1.171, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_loss": 1.5323028564453125, |
|
"eval_runtime": 10.8468, |
|
"eval_samples_per_second": 921.928, |
|
"eval_steps_per_second": 1.199, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_loss": 1.5221501588821411, |
|
"eval_runtime": 10.8395, |
|
"eval_samples_per_second": 922.551, |
|
"eval_steps_per_second": 1.199, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_loss": 1.5130882263183594, |
|
"eval_runtime": 10.9396, |
|
"eval_samples_per_second": 914.108, |
|
"eval_steps_per_second": 1.188, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_loss": 1.5008268356323242, |
|
"eval_runtime": 10.8617, |
|
"eval_samples_per_second": 920.668, |
|
"eval_steps_per_second": 1.197, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_loss": 1.487107515335083, |
|
"eval_runtime": 11.0741, |
|
"eval_samples_per_second": 903.011, |
|
"eval_steps_per_second": 1.174, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_loss": 1.4803069829940796, |
|
"eval_runtime": 10.8489, |
|
"eval_samples_per_second": 921.753, |
|
"eval_steps_per_second": 1.198, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_loss": 1.4744497537612915, |
|
"eval_runtime": 10.8329, |
|
"eval_samples_per_second": 923.112, |
|
"eval_steps_per_second": 1.2, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_loss": 1.4682186841964722, |
|
"eval_runtime": 11.0865, |
|
"eval_samples_per_second": 902.0, |
|
"eval_steps_per_second": 1.173, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_loss": 1.4509010314941406, |
|
"eval_runtime": 10.862, |
|
"eval_samples_per_second": 920.64, |
|
"eval_steps_per_second": 1.197, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_loss": 1.4542888402938843, |
|
"eval_runtime": 11.0884, |
|
"eval_samples_per_second": 901.845, |
|
"eval_steps_per_second": 1.172, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_loss": 1.455941081047058, |
|
"eval_runtime": 10.8598, |
|
"eval_samples_per_second": 920.824, |
|
"eval_steps_per_second": 1.197, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_loss": 1.4334802627563477, |
|
"eval_runtime": 10.8586, |
|
"eval_samples_per_second": 920.928, |
|
"eval_steps_per_second": 1.197, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_loss": 1.4138292074203491, |
|
"eval_runtime": 10.87, |
|
"eval_samples_per_second": 919.964, |
|
"eval_steps_per_second": 1.196, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_loss": 1.4225151538848877, |
|
"eval_runtime": 10.8028, |
|
"eval_samples_per_second": 925.687, |
|
"eval_steps_per_second": 1.203, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_loss": 1.4350632429122925, |
|
"eval_runtime": 11.0684, |
|
"eval_samples_per_second": 903.472, |
|
"eval_steps_per_second": 1.175, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_loss": 1.3809276819229126, |
|
"eval_runtime": 10.7633, |
|
"eval_samples_per_second": 929.082, |
|
"eval_steps_per_second": 1.208, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_loss": 1.370450735092163, |
|
"eval_runtime": 10.785, |
|
"eval_samples_per_second": 927.215, |
|
"eval_steps_per_second": 1.205, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_loss": 1.3955777883529663, |
|
"eval_runtime": 10.895, |
|
"eval_samples_per_second": 917.848, |
|
"eval_steps_per_second": 1.193, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_loss": 1.368485927581787, |
|
"eval_runtime": 10.819, |
|
"eval_samples_per_second": 924.297, |
|
"eval_steps_per_second": 1.202, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_loss": 1.33143949508667, |
|
"eval_runtime": 10.8396, |
|
"eval_samples_per_second": 922.543, |
|
"eval_steps_per_second": 1.199, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_loss": 1.3212394714355469, |
|
"eval_runtime": 10.8092, |
|
"eval_samples_per_second": 925.139, |
|
"eval_steps_per_second": 1.203, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"eval_loss": 1.3334152698516846, |
|
"eval_runtime": 10.9031, |
|
"eval_samples_per_second": 917.174, |
|
"eval_steps_per_second": 1.192, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_loss": 1.3178024291992188, |
|
"eval_runtime": 10.8916, |
|
"eval_samples_per_second": 918.135, |
|
"eval_steps_per_second": 1.194, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"eval_loss": 1.2850910425186157, |
|
"eval_runtime": 10.7826, |
|
"eval_samples_per_second": 927.417, |
|
"eval_steps_per_second": 1.206, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_loss": 1.2826738357543945, |
|
"eval_runtime": 11.0431, |
|
"eval_samples_per_second": 905.544, |
|
"eval_steps_per_second": 1.177, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"eval_loss": 1.276419758796692, |
|
"eval_runtime": 10.8459, |
|
"eval_samples_per_second": 922.009, |
|
"eval_steps_per_second": 1.199, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_loss": 1.2498911619186401, |
|
"eval_runtime": 10.8147, |
|
"eval_samples_per_second": 924.671, |
|
"eval_steps_per_second": 1.202, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"eval_loss": 1.2304234504699707, |
|
"eval_runtime": 10.9437, |
|
"eval_samples_per_second": 913.768, |
|
"eval_steps_per_second": 1.188, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_loss": 1.2450603246688843, |
|
"eval_runtime": 10.8378, |
|
"eval_samples_per_second": 922.698, |
|
"eval_steps_per_second": 1.2, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"eval_loss": 1.2278028726577759, |
|
"eval_runtime": 11.123, |
|
"eval_samples_per_second": 899.041, |
|
"eval_steps_per_second": 1.169, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_loss": 1.2043875455856323, |
|
"eval_runtime": 10.7963, |
|
"eval_samples_per_second": 926.245, |
|
"eval_steps_per_second": 1.204, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"eval_loss": 1.199507236480713, |
|
"eval_runtime": 10.8038, |
|
"eval_samples_per_second": 925.601, |
|
"eval_steps_per_second": 1.203, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_loss": 1.1909499168395996, |
|
"eval_runtime": 10.9182, |
|
"eval_samples_per_second": 915.902, |
|
"eval_steps_per_second": 1.191, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"eval_loss": 1.172565221786499, |
|
"eval_runtime": 10.8472, |
|
"eval_samples_per_second": 921.898, |
|
"eval_steps_per_second": 1.198, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"eval_loss": 1.1473827362060547, |
|
"eval_runtime": 11.0541, |
|
"eval_samples_per_second": 904.638, |
|
"eval_steps_per_second": 1.176, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"eval_loss": 1.1195847988128662, |
|
"eval_runtime": 10.924, |
|
"eval_samples_per_second": 915.419, |
|
"eval_steps_per_second": 1.19, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_loss": 1.1185705661773682, |
|
"eval_runtime": 10.8085, |
|
"eval_samples_per_second": 925.199, |
|
"eval_steps_per_second": 1.203, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 77.0, |
|
"eval_loss": 1.1256279945373535, |
|
"eval_runtime": 11.0649, |
|
"eval_samples_per_second": 903.755, |
|
"eval_steps_per_second": 1.175, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"eval_loss": 1.0834866762161255, |
|
"eval_runtime": 10.8281, |
|
"eval_samples_per_second": 923.523, |
|
"eval_steps_per_second": 1.201, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 79.0, |
|
"eval_loss": 1.0588449239730835, |
|
"eval_runtime": 10.7832, |
|
"eval_samples_per_second": 927.372, |
|
"eval_steps_per_second": 1.206, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_loss": 1.0453879833221436, |
|
"eval_runtime": 10.8835, |
|
"eval_samples_per_second": 918.819, |
|
"eval_steps_per_second": 1.194, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 81.0, |
|
"eval_loss": 1.0334476232528687, |
|
"eval_runtime": 10.8064, |
|
"eval_samples_per_second": 925.376, |
|
"eval_steps_per_second": 1.203, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 82.0, |
|
"eval_loss": 1.028348684310913, |
|
"eval_runtime": 10.9037, |
|
"eval_samples_per_second": 917.121, |
|
"eval_steps_per_second": 1.192, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 83.0, |
|
"eval_loss": 1.0091253519058228, |
|
"eval_runtime": 10.8016, |
|
"eval_samples_per_second": 925.788, |
|
"eval_steps_per_second": 1.204, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"eval_loss": 0.9820342659950256, |
|
"eval_runtime": 11.0485, |
|
"eval_samples_per_second": 905.101, |
|
"eval_steps_per_second": 1.177, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 85.0, |
|
"eval_loss": 0.9488591551780701, |
|
"eval_runtime": 10.8216, |
|
"eval_samples_per_second": 924.074, |
|
"eval_steps_per_second": 1.201, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 86.0, |
|
"eval_loss": 0.9406836032867432, |
|
"eval_runtime": 10.8253, |
|
"eval_samples_per_second": 923.765, |
|
"eval_steps_per_second": 1.201, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 87.0, |
|
"eval_loss": 0.9392226338386536, |
|
"eval_runtime": 10.9531, |
|
"eval_samples_per_second": 912.984, |
|
"eval_steps_per_second": 1.187, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"eval_loss": 0.9232249855995178, |
|
"eval_runtime": 10.835, |
|
"eval_samples_per_second": 922.938, |
|
"eval_steps_per_second": 1.2, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 89.0, |
|
"eval_loss": 0.902049720287323, |
|
"eval_runtime": 10.915, |
|
"eval_samples_per_second": 916.173, |
|
"eval_steps_per_second": 1.191, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"eval_loss": 0.8934366106987, |
|
"eval_runtime": 10.833, |
|
"eval_samples_per_second": 923.105, |
|
"eval_steps_per_second": 1.2, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 91.0, |
|
"eval_loss": 0.8797369003295898, |
|
"eval_runtime": 10.8275, |
|
"eval_samples_per_second": 923.576, |
|
"eval_steps_per_second": 1.201, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"eval_loss": 0.8834591507911682, |
|
"eval_runtime": 10.8227, |
|
"eval_samples_per_second": 923.98, |
|
"eval_steps_per_second": 1.201, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 93.0, |
|
"eval_loss": 0.8583576083183289, |
|
"eval_runtime": 10.8262, |
|
"eval_samples_per_second": 923.681, |
|
"eval_steps_per_second": 1.201, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 94.0, |
|
"eval_loss": 0.8160658478736877, |
|
"eval_runtime": 10.8597, |
|
"eval_samples_per_second": 920.834, |
|
"eval_steps_per_second": 1.197, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 95.0, |
|
"eval_loss": 0.7998712658882141, |
|
"eval_runtime": 10.7874, |
|
"eval_samples_per_second": 927.007, |
|
"eval_steps_per_second": 1.205, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"eval_loss": 0.8268041014671326, |
|
"eval_runtime": 11.0529, |
|
"eval_samples_per_second": 904.74, |
|
"eval_steps_per_second": 1.176, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 97.0, |
|
"eval_loss": 0.8250266313552856, |
|
"eval_runtime": 10.8021, |
|
"eval_samples_per_second": 925.746, |
|
"eval_steps_per_second": 1.203, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 98.0, |
|
"eval_loss": 0.7785258293151855, |
|
"eval_runtime": 10.7519, |
|
"eval_samples_per_second": 930.071, |
|
"eval_steps_per_second": 1.209, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 99.0, |
|
"eval_loss": 0.7797490358352661, |
|
"eval_runtime": 10.8842, |
|
"eval_samples_per_second": 918.761, |
|
"eval_steps_per_second": 1.194, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"eval_loss": 0.7988857626914978, |
|
"eval_runtime": 10.8165, |
|
"eval_samples_per_second": 924.512, |
|
"eval_steps_per_second": 1.202, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 101.0, |
|
"eval_loss": 0.7674239277839661, |
|
"eval_runtime": 10.8984, |
|
"eval_samples_per_second": 917.569, |
|
"eval_steps_per_second": 1.193, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 102.0, |
|
"eval_loss": 0.7283704280853271, |
|
"eval_runtime": 10.7882, |
|
"eval_samples_per_second": 926.941, |
|
"eval_steps_per_second": 1.205, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 103.0, |
|
"eval_loss": 0.7265847325325012, |
|
"eval_runtime": 10.7846, |
|
"eval_samples_per_second": 927.248, |
|
"eval_steps_per_second": 1.205, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 104.0, |
|
"eval_loss": 0.7218329906463623, |
|
"eval_runtime": 10.8432, |
|
"eval_samples_per_second": 922.237, |
|
"eval_steps_per_second": 1.199, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 105.0, |
|
"eval_loss": 0.7195408344268799, |
|
"eval_runtime": 10.775, |
|
"eval_samples_per_second": 928.073, |
|
"eval_steps_per_second": 1.206, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 106.0, |
|
"eval_loss": 0.720020592212677, |
|
"eval_runtime": 11.033, |
|
"eval_samples_per_second": 906.372, |
|
"eval_steps_per_second": 1.178, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 107.0, |
|
"eval_loss": 0.7001694440841675, |
|
"eval_runtime": 10.7937, |
|
"eval_samples_per_second": 926.47, |
|
"eval_steps_per_second": 1.204, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 108.0, |
|
"eval_loss": 0.6704011559486389, |
|
"eval_runtime": 10.9903, |
|
"eval_samples_per_second": 909.893, |
|
"eval_steps_per_second": 1.183, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 109.0, |
|
"eval_loss": 0.6617050766944885, |
|
"eval_runtime": 11.0129, |
|
"eval_samples_per_second": 908.026, |
|
"eval_steps_per_second": 1.18, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 110.0, |
|
"eval_loss": 0.6687906384468079, |
|
"eval_runtime": 10.7417, |
|
"eval_samples_per_second": 930.948, |
|
"eval_steps_per_second": 1.21, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 111.0, |
|
"eval_loss": 0.6637664437294006, |
|
"eval_runtime": 10.8682, |
|
"eval_samples_per_second": 920.118, |
|
"eval_steps_per_second": 1.196, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 112.0, |
|
"eval_loss": 0.6454914212226868, |
|
"eval_runtime": 10.7981, |
|
"eval_samples_per_second": 926.087, |
|
"eval_steps_per_second": 1.204, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 113.0, |
|
"eval_loss": 0.6308099627494812, |
|
"eval_runtime": 10.8495, |
|
"eval_samples_per_second": 921.699, |
|
"eval_steps_per_second": 1.198, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 114.0, |
|
"eval_loss": 0.6411617398262024, |
|
"eval_runtime": 10.9089, |
|
"eval_samples_per_second": 916.679, |
|
"eval_steps_per_second": 1.192, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 115.0, |
|
"eval_loss": 0.6422205567359924, |
|
"eval_runtime": 10.7825, |
|
"eval_samples_per_second": 927.428, |
|
"eval_steps_per_second": 1.206, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 116.0, |
|
"eval_loss": 0.6265988349914551, |
|
"eval_runtime": 10.865, |
|
"eval_samples_per_second": 920.387, |
|
"eval_steps_per_second": 1.197, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 117.0, |
|
"eval_loss": 0.615440845489502, |
|
"eval_runtime": 10.8031, |
|
"eval_samples_per_second": 925.657, |
|
"eval_steps_per_second": 1.203, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 118.0, |
|
"eval_loss": 0.6053263545036316, |
|
"eval_runtime": 10.8755, |
|
"eval_samples_per_second": 919.496, |
|
"eval_steps_per_second": 1.195, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 119.0, |
|
"eval_loss": 0.6083167791366577, |
|
"eval_runtime": 10.861, |
|
"eval_samples_per_second": 920.722, |
|
"eval_steps_per_second": 1.197, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 120.0, |
|
"eval_loss": 0.607414960861206, |
|
"eval_runtime": 10.7885, |
|
"eval_samples_per_second": 926.909, |
|
"eval_steps_per_second": 1.205, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 121.0, |
|
"eval_loss": 0.6041896343231201, |
|
"eval_runtime": 10.8677, |
|
"eval_samples_per_second": 920.161, |
|
"eval_steps_per_second": 1.196, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 122.0, |
|
"eval_loss": 0.5942515134811401, |
|
"eval_runtime": 10.786, |
|
"eval_samples_per_second": 927.126, |
|
"eval_steps_per_second": 1.205, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 123.0, |
|
"eval_loss": 0.5849249958992004, |
|
"eval_runtime": 10.8776, |
|
"eval_samples_per_second": 919.318, |
|
"eval_steps_per_second": 1.195, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 124.0, |
|
"eval_loss": 0.5770248770713806, |
|
"eval_runtime": 10.8665, |
|
"eval_samples_per_second": 920.261, |
|
"eval_steps_per_second": 1.196, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 125.0, |
|
"eval_loss": 0.5670948028564453, |
|
"eval_runtime": 10.7839, |
|
"eval_samples_per_second": 927.305, |
|
"eval_steps_per_second": 1.205, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 126.0, |
|
"eval_loss": 0.5637474656105042, |
|
"eval_runtime": 10.8663, |
|
"eval_samples_per_second": 920.276, |
|
"eval_steps_per_second": 1.196, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 127.0, |
|
"eval_loss": 0.5640723705291748, |
|
"eval_runtime": 10.7903, |
|
"eval_samples_per_second": 926.754, |
|
"eval_steps_per_second": 1.205, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 128.0, |
|
"eval_loss": 0.5665525197982788, |
|
"eval_runtime": 10.8832, |
|
"eval_samples_per_second": 918.844, |
|
"eval_steps_per_second": 1.194, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 129.0, |
|
"eval_loss": 0.5674743056297302, |
|
"eval_runtime": 10.8693, |
|
"eval_samples_per_second": 920.019, |
|
"eval_steps_per_second": 1.196, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 130.0, |
|
"eval_loss": 0.558768630027771, |
|
"eval_runtime": 10.7538, |
|
"eval_samples_per_second": 929.908, |
|
"eval_steps_per_second": 1.209, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 131.0, |
|
"eval_loss": 0.5466377139091492, |
|
"eval_runtime": 10.8966, |
|
"eval_samples_per_second": 917.721, |
|
"eval_steps_per_second": 1.193, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 132.0, |
|
"eval_loss": 0.5402641296386719, |
|
"eval_runtime": 10.8007, |
|
"eval_samples_per_second": 925.864, |
|
"eval_steps_per_second": 1.204, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 133.0, |
|
"eval_loss": 0.540850043296814, |
|
"eval_runtime": 10.8814, |
|
"eval_samples_per_second": 918.996, |
|
"eval_steps_per_second": 1.195, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 134.0, |
|
"eval_loss": 0.5402743220329285, |
|
"eval_runtime": 10.8778, |
|
"eval_samples_per_second": 919.303, |
|
"eval_steps_per_second": 1.195, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 135.0, |
|
"eval_loss": 0.5400083065032959, |
|
"eval_runtime": 10.7631, |
|
"eval_samples_per_second": 929.098, |
|
"eval_steps_per_second": 1.208, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 136.0, |
|
"eval_loss": 0.5397944450378418, |
|
"eval_runtime": 10.8575, |
|
"eval_samples_per_second": 921.018, |
|
"eval_steps_per_second": 1.197, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 137.0, |
|
"eval_loss": 0.5391473770141602, |
|
"eval_runtime": 10.8073, |
|
"eval_samples_per_second": 925.299, |
|
"eval_steps_per_second": 1.203, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 138.0, |
|
"eval_loss": 0.5366548895835876, |
|
"eval_runtime": 10.8743, |
|
"eval_samples_per_second": 919.601, |
|
"eval_steps_per_second": 1.195, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 139.0, |
|
"eval_loss": 0.5325959920883179, |
|
"eval_runtime": 10.8603, |
|
"eval_samples_per_second": 920.781, |
|
"eval_steps_per_second": 1.197, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 140.0, |
|
"eval_loss": 0.5299940705299377, |
|
"eval_runtime": 10.7821, |
|
"eval_samples_per_second": 927.467, |
|
"eval_steps_per_second": 1.206, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 141.0, |
|
"eval_loss": 0.5305985808372498, |
|
"eval_runtime": 10.8496, |
|
"eval_samples_per_second": 921.697, |
|
"eval_steps_per_second": 1.198, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 142.0, |
|
"eval_loss": 0.5292918682098389, |
|
"eval_runtime": 10.7928, |
|
"eval_samples_per_second": 926.542, |
|
"eval_steps_per_second": 1.205, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 143.0, |
|
"eval_loss": 0.5267909169197083, |
|
"eval_runtime": 10.8928, |
|
"eval_samples_per_second": 918.04, |
|
"eval_steps_per_second": 1.193, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 144.0, |
|
"eval_loss": 0.523544430732727, |
|
"eval_runtime": 10.8792, |
|
"eval_samples_per_second": 919.184, |
|
"eval_steps_per_second": 1.195, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 145.0, |
|
"eval_loss": 0.5205994248390198, |
|
"eval_runtime": 10.7887, |
|
"eval_samples_per_second": 926.892, |
|
"eval_steps_per_second": 1.205, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 146.0, |
|
"eval_loss": 0.519413411617279, |
|
"eval_runtime": 10.8656, |
|
"eval_samples_per_second": 920.336, |
|
"eval_steps_per_second": 1.196, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 147.0, |
|
"eval_loss": 0.519058108329773, |
|
"eval_runtime": 10.7991, |
|
"eval_samples_per_second": 926.005, |
|
"eval_steps_per_second": 1.204, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 148.0, |
|
"eval_loss": 0.5188890099525452, |
|
"eval_runtime": 10.8757, |
|
"eval_samples_per_second": 919.48, |
|
"eval_steps_per_second": 1.195, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 149.0, |
|
"eval_loss": 0.5188961625099182, |
|
"eval_runtime": 10.8633, |
|
"eval_samples_per_second": 920.533, |
|
"eval_steps_per_second": 1.197, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 150.0, |
|
"eval_loss": 0.5188751220703125, |
|
"eval_runtime": 10.8003, |
|
"eval_samples_per_second": 925.904, |
|
"eval_steps_per_second": 1.204, |
|
"step": 450 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 450, |
|
"num_train_epochs": 150, |
|
"save_steps": 500, |
|
"total_flos": 8613277286400000.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|