|
{ |
|
"best_metric": 1.4208089113235474, |
|
"best_model_checkpoint": "AlexWang99/byt5_re_1k/checkpoint-362", |
|
"epoch": 181.0, |
|
"eval_steps": 500, |
|
"global_step": 362, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 7.775562286376953, |
|
"eval_runtime": 10.6848, |
|
"eval_samples_per_second": 935.909, |
|
"eval_steps_per_second": 1.217, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 6.888936519622803, |
|
"eval_runtime": 10.8293, |
|
"eval_samples_per_second": 923.42, |
|
"eval_steps_per_second": 1.2, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 6.1838483810424805, |
|
"eval_runtime": 10.6887, |
|
"eval_samples_per_second": 935.569, |
|
"eval_steps_per_second": 1.216, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 5.626400947570801, |
|
"eval_runtime": 10.9681, |
|
"eval_samples_per_second": 911.734, |
|
"eval_steps_per_second": 1.185, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 5.153331279754639, |
|
"eval_runtime": 10.7369, |
|
"eval_samples_per_second": 931.367, |
|
"eval_steps_per_second": 1.211, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 4.734142780303955, |
|
"eval_runtime": 11.004, |
|
"eval_samples_per_second": 908.762, |
|
"eval_steps_per_second": 1.181, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 4.333576679229736, |
|
"eval_runtime": 10.7676, |
|
"eval_samples_per_second": 928.708, |
|
"eval_steps_per_second": 1.207, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 3.938502788543701, |
|
"eval_runtime": 10.782, |
|
"eval_samples_per_second": 927.476, |
|
"eval_steps_per_second": 1.206, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 3.557016134262085, |
|
"eval_runtime": 11.022, |
|
"eval_samples_per_second": 907.278, |
|
"eval_steps_per_second": 1.179, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 3.2168831825256348, |
|
"eval_runtime": 10.7843, |
|
"eval_samples_per_second": 927.273, |
|
"eval_steps_per_second": 1.205, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 2.9309566020965576, |
|
"eval_runtime": 10.8755, |
|
"eval_samples_per_second": 919.501, |
|
"eval_steps_per_second": 1.195, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 2.7042219638824463, |
|
"eval_runtime": 10.7967, |
|
"eval_samples_per_second": 926.21, |
|
"eval_steps_per_second": 1.204, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_loss": 2.5138416290283203, |
|
"eval_runtime": 10.8666, |
|
"eval_samples_per_second": 920.248, |
|
"eval_steps_per_second": 1.196, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": 2.3652985095977783, |
|
"eval_runtime": 10.7919, |
|
"eval_samples_per_second": 926.619, |
|
"eval_steps_per_second": 1.205, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_loss": 2.248030185699463, |
|
"eval_runtime": 10.8705, |
|
"eval_samples_per_second": 919.923, |
|
"eval_steps_per_second": 1.196, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": 2.13440203666687, |
|
"eval_runtime": 10.7977, |
|
"eval_samples_per_second": 926.12, |
|
"eval_steps_per_second": 1.204, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_loss": 2.0899171829223633, |
|
"eval_runtime": 10.7912, |
|
"eval_samples_per_second": 926.685, |
|
"eval_steps_per_second": 1.205, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_loss": 2.100477457046509, |
|
"eval_runtime": 10.8653, |
|
"eval_samples_per_second": 920.357, |
|
"eval_steps_per_second": 1.196, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_loss": 2.047548532485962, |
|
"eval_runtime": 10.7885, |
|
"eval_samples_per_second": 926.915, |
|
"eval_steps_per_second": 1.205, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_loss": 1.996556043624878, |
|
"eval_runtime": 11.0201, |
|
"eval_samples_per_second": 907.429, |
|
"eval_steps_per_second": 1.18, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_loss": 1.9695481061935425, |
|
"eval_runtime": 10.783, |
|
"eval_samples_per_second": 927.385, |
|
"eval_steps_per_second": 1.206, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_loss": 1.9760867357254028, |
|
"eval_runtime": 10.8889, |
|
"eval_samples_per_second": 918.37, |
|
"eval_steps_per_second": 1.194, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_loss": 1.9658013582229614, |
|
"eval_runtime": 10.7879, |
|
"eval_samples_per_second": 926.962, |
|
"eval_steps_per_second": 1.205, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_loss": 1.9119044542312622, |
|
"eval_runtime": 10.8618, |
|
"eval_samples_per_second": 920.659, |
|
"eval_steps_per_second": 1.197, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_loss": 1.89186429977417, |
|
"eval_runtime": 10.7933, |
|
"eval_samples_per_second": 926.503, |
|
"eval_steps_per_second": 1.204, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_loss": 1.8932195901870728, |
|
"eval_runtime": 10.873, |
|
"eval_samples_per_second": 919.709, |
|
"eval_steps_per_second": 1.196, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_loss": 1.9076615571975708, |
|
"eval_runtime": 10.7917, |
|
"eval_samples_per_second": 926.637, |
|
"eval_steps_per_second": 1.205, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_loss": 1.8671808242797852, |
|
"eval_runtime": 10.7945, |
|
"eval_samples_per_second": 926.402, |
|
"eval_steps_per_second": 1.204, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_loss": 1.8664835691452026, |
|
"eval_runtime": 10.8739, |
|
"eval_samples_per_second": 919.632, |
|
"eval_steps_per_second": 1.196, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_loss": 1.8679856061935425, |
|
"eval_runtime": 10.7966, |
|
"eval_samples_per_second": 926.219, |
|
"eval_steps_per_second": 1.204, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_loss": 1.858332633972168, |
|
"eval_runtime": 10.8742, |
|
"eval_samples_per_second": 919.609, |
|
"eval_steps_per_second": 1.195, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_loss": 1.8586550951004028, |
|
"eval_runtime": 10.7775, |
|
"eval_samples_per_second": 927.859, |
|
"eval_steps_per_second": 1.206, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_loss": 1.836769700050354, |
|
"eval_runtime": 10.8615, |
|
"eval_samples_per_second": 920.686, |
|
"eval_steps_per_second": 1.197, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_loss": 1.8279472589492798, |
|
"eval_runtime": 10.7787, |
|
"eval_samples_per_second": 927.754, |
|
"eval_steps_per_second": 1.206, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_loss": 1.817927360534668, |
|
"eval_runtime": 10.7813, |
|
"eval_samples_per_second": 927.534, |
|
"eval_steps_per_second": 1.206, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_loss": 1.7950810194015503, |
|
"eval_runtime": 11.0203, |
|
"eval_samples_per_second": 907.419, |
|
"eval_steps_per_second": 1.18, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_loss": 1.7667052745819092, |
|
"eval_runtime": 10.7915, |
|
"eval_samples_per_second": 926.655, |
|
"eval_steps_per_second": 1.205, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_loss": 1.7852015495300293, |
|
"eval_runtime": 10.8703, |
|
"eval_samples_per_second": 919.941, |
|
"eval_steps_per_second": 1.196, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_loss": 1.7741429805755615, |
|
"eval_runtime": 10.7837, |
|
"eval_samples_per_second": 927.326, |
|
"eval_steps_per_second": 1.206, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_loss": 1.718443512916565, |
|
"eval_runtime": 10.8729, |
|
"eval_samples_per_second": 919.72, |
|
"eval_steps_per_second": 1.196, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_loss": 1.6899281740188599, |
|
"eval_runtime": 10.789, |
|
"eval_samples_per_second": 926.869, |
|
"eval_steps_per_second": 1.205, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_loss": 1.6984148025512695, |
|
"eval_runtime": 10.8669, |
|
"eval_samples_per_second": 920.229, |
|
"eval_steps_per_second": 1.196, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_loss": 1.68986177444458, |
|
"eval_runtime": 10.7878, |
|
"eval_samples_per_second": 926.976, |
|
"eval_steps_per_second": 1.205, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_loss": 1.643083930015564, |
|
"eval_runtime": 10.7947, |
|
"eval_samples_per_second": 926.384, |
|
"eval_steps_per_second": 1.204, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_loss": 1.6373401880264282, |
|
"eval_runtime": 10.8675, |
|
"eval_samples_per_second": 920.171, |
|
"eval_steps_per_second": 1.196, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_loss": 1.6289523839950562, |
|
"eval_runtime": 10.7853, |
|
"eval_samples_per_second": 927.192, |
|
"eval_steps_per_second": 1.205, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_loss": 1.6165605783462524, |
|
"eval_runtime": 11.0285, |
|
"eval_samples_per_second": 906.741, |
|
"eval_steps_per_second": 1.179, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_loss": 1.6076592206954956, |
|
"eval_runtime": 10.7945, |
|
"eval_samples_per_second": 926.401, |
|
"eval_steps_per_second": 1.204, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_loss": 1.604373574256897, |
|
"eval_runtime": 10.8717, |
|
"eval_samples_per_second": 919.822, |
|
"eval_steps_per_second": 1.196, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_loss": 1.616941213607788, |
|
"eval_runtime": 10.793, |
|
"eval_samples_per_second": 926.53, |
|
"eval_steps_per_second": 1.204, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_loss": 1.6135770082473755, |
|
"eval_runtime": 11.0422, |
|
"eval_samples_per_second": 905.614, |
|
"eval_steps_per_second": 1.177, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_loss": 1.584197759628296, |
|
"eval_runtime": 10.7862, |
|
"eval_samples_per_second": 927.109, |
|
"eval_steps_per_second": 1.205, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_loss": 1.5821086168289185, |
|
"eval_runtime": 11.0219, |
|
"eval_samples_per_second": 907.286, |
|
"eval_steps_per_second": 1.179, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_loss": 1.592121958732605, |
|
"eval_runtime": 10.7783, |
|
"eval_samples_per_second": 927.787, |
|
"eval_steps_per_second": 1.206, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_loss": 1.598443865776062, |
|
"eval_runtime": 11.0215, |
|
"eval_samples_per_second": 907.318, |
|
"eval_steps_per_second": 1.18, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_loss": 1.562904715538025, |
|
"eval_runtime": 10.8689, |
|
"eval_samples_per_second": 920.053, |
|
"eval_steps_per_second": 1.196, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_loss": 1.556736707687378, |
|
"eval_runtime": 10.7877, |
|
"eval_samples_per_second": 926.98, |
|
"eval_steps_per_second": 1.205, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_loss": 1.554081916809082, |
|
"eval_runtime": 10.8724, |
|
"eval_samples_per_second": 919.757, |
|
"eval_steps_per_second": 1.196, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_loss": 1.5780812501907349, |
|
"eval_runtime": 10.7882, |
|
"eval_samples_per_second": 926.94, |
|
"eval_steps_per_second": 1.205, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_loss": 1.5569387674331665, |
|
"eval_runtime": 10.878, |
|
"eval_samples_per_second": 919.289, |
|
"eval_steps_per_second": 1.195, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"eval_loss": 1.5472592115402222, |
|
"eval_runtime": 10.797, |
|
"eval_samples_per_second": 926.18, |
|
"eval_steps_per_second": 1.204, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_loss": 1.5447337627410889, |
|
"eval_runtime": 10.7871, |
|
"eval_samples_per_second": 927.031, |
|
"eval_steps_per_second": 1.205, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"eval_loss": 1.5714285373687744, |
|
"eval_runtime": 11.0266, |
|
"eval_samples_per_second": 906.896, |
|
"eval_steps_per_second": 1.179, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_loss": 1.572940468788147, |
|
"eval_runtime": 10.7931, |
|
"eval_samples_per_second": 926.518, |
|
"eval_steps_per_second": 1.204, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"eval_loss": 1.5434839725494385, |
|
"eval_runtime": 10.8657, |
|
"eval_samples_per_second": 920.328, |
|
"eval_steps_per_second": 1.196, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_loss": 1.5406599044799805, |
|
"eval_runtime": 10.7896, |
|
"eval_samples_per_second": 926.816, |
|
"eval_steps_per_second": 1.205, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"eval_loss": 1.5430865287780762, |
|
"eval_runtime": 10.868, |
|
"eval_samples_per_second": 920.137, |
|
"eval_steps_per_second": 1.196, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_loss": 1.5774503946304321, |
|
"eval_runtime": 10.7888, |
|
"eval_samples_per_second": 926.889, |
|
"eval_steps_per_second": 1.205, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"eval_loss": 1.5680021047592163, |
|
"eval_runtime": 10.8677, |
|
"eval_samples_per_second": 920.161, |
|
"eval_steps_per_second": 1.196, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_loss": 1.5386120080947876, |
|
"eval_runtime": 10.7802, |
|
"eval_samples_per_second": 927.629, |
|
"eval_steps_per_second": 1.206, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"eval_loss": 1.537217378616333, |
|
"eval_runtime": 10.7876, |
|
"eval_samples_per_second": 926.994, |
|
"eval_steps_per_second": 1.205, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_loss": 1.5415935516357422, |
|
"eval_runtime": 10.8784, |
|
"eval_samples_per_second": 919.255, |
|
"eval_steps_per_second": 1.195, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"eval_loss": 1.5851819515228271, |
|
"eval_runtime": 10.7879, |
|
"eval_samples_per_second": 926.962, |
|
"eval_steps_per_second": 1.205, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"eval_loss": 1.5615135431289673, |
|
"eval_runtime": 11.0229, |
|
"eval_samples_per_second": 907.205, |
|
"eval_steps_per_second": 1.179, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"eval_loss": 1.5356107950210571, |
|
"eval_runtime": 10.7892, |
|
"eval_samples_per_second": 926.852, |
|
"eval_steps_per_second": 1.205, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_loss": 1.5345250368118286, |
|
"eval_runtime": 10.8628, |
|
"eval_samples_per_second": 920.574, |
|
"eval_steps_per_second": 1.197, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 77.0, |
|
"eval_loss": 1.5348494052886963, |
|
"eval_runtime": 10.7982, |
|
"eval_samples_per_second": 926.08, |
|
"eval_steps_per_second": 1.204, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"eval_loss": 1.5350173711776733, |
|
"eval_runtime": 11.0234, |
|
"eval_samples_per_second": 907.165, |
|
"eval_steps_per_second": 1.179, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 79.0, |
|
"eval_loss": 1.5344058275222778, |
|
"eval_runtime": 10.7938, |
|
"eval_samples_per_second": 926.459, |
|
"eval_steps_per_second": 1.204, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_loss": 1.5319613218307495, |
|
"eval_runtime": 11.0103, |
|
"eval_samples_per_second": 908.24, |
|
"eval_steps_per_second": 1.181, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 81.0, |
|
"eval_loss": 1.5311990976333618, |
|
"eval_runtime": 10.7993, |
|
"eval_samples_per_second": 925.99, |
|
"eval_steps_per_second": 1.204, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 82.0, |
|
"eval_loss": 1.5308246612548828, |
|
"eval_runtime": 11.0279, |
|
"eval_samples_per_second": 906.787, |
|
"eval_steps_per_second": 1.179, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 83.0, |
|
"eval_loss": 1.530852198600769, |
|
"eval_runtime": 10.8812, |
|
"eval_samples_per_second": 919.019, |
|
"eval_steps_per_second": 1.195, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"eval_loss": 1.528510332107544, |
|
"eval_runtime": 10.7908, |
|
"eval_samples_per_second": 926.72, |
|
"eval_steps_per_second": 1.205, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 85.0, |
|
"eval_loss": 1.5265289545059204, |
|
"eval_runtime": 10.8831, |
|
"eval_samples_per_second": 918.858, |
|
"eval_steps_per_second": 1.195, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 86.0, |
|
"eval_loss": 1.525810956954956, |
|
"eval_runtime": 10.7922, |
|
"eval_samples_per_second": 926.595, |
|
"eval_steps_per_second": 1.205, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 87.0, |
|
"eval_loss": 1.5260074138641357, |
|
"eval_runtime": 10.8658, |
|
"eval_samples_per_second": 920.317, |
|
"eval_steps_per_second": 1.196, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"eval_loss": 1.523621678352356, |
|
"eval_runtime": 10.8062, |
|
"eval_samples_per_second": 925.398, |
|
"eval_steps_per_second": 1.203, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 89.0, |
|
"eval_loss": 1.522126317024231, |
|
"eval_runtime": 10.7844, |
|
"eval_samples_per_second": 927.263, |
|
"eval_steps_per_second": 1.205, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"eval_loss": 1.5219790935516357, |
|
"eval_runtime": 10.8694, |
|
"eval_samples_per_second": 920.018, |
|
"eval_steps_per_second": 1.196, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 91.0, |
|
"eval_loss": 1.5207847356796265, |
|
"eval_runtime": 10.7979, |
|
"eval_samples_per_second": 926.103, |
|
"eval_steps_per_second": 1.204, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"eval_loss": 1.517883062362671, |
|
"eval_runtime": 10.8783, |
|
"eval_samples_per_second": 919.261, |
|
"eval_steps_per_second": 1.195, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 93.0, |
|
"eval_loss": 1.515496015548706, |
|
"eval_runtime": 10.783, |
|
"eval_samples_per_second": 927.387, |
|
"eval_steps_per_second": 1.206, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 94.0, |
|
"eval_loss": 1.5140419006347656, |
|
"eval_runtime": 11.0298, |
|
"eval_samples_per_second": 906.635, |
|
"eval_steps_per_second": 1.179, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 95.0, |
|
"eval_loss": 1.5137407779693604, |
|
"eval_runtime": 10.8014, |
|
"eval_samples_per_second": 925.809, |
|
"eval_steps_per_second": 1.204, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"eval_loss": 1.5128830671310425, |
|
"eval_runtime": 10.8766, |
|
"eval_samples_per_second": 919.402, |
|
"eval_steps_per_second": 1.195, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 97.0, |
|
"eval_loss": 1.5099443197250366, |
|
"eval_runtime": 10.809, |
|
"eval_samples_per_second": 925.158, |
|
"eval_steps_per_second": 1.203, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 98.0, |
|
"eval_loss": 1.506999135017395, |
|
"eval_runtime": 10.9785, |
|
"eval_samples_per_second": 910.872, |
|
"eval_steps_per_second": 1.184, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 99.0, |
|
"eval_loss": 1.5044018030166626, |
|
"eval_runtime": 10.883, |
|
"eval_samples_per_second": 918.868, |
|
"eval_steps_per_second": 1.195, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"eval_loss": 1.5024514198303223, |
|
"eval_runtime": 10.7906, |
|
"eval_samples_per_second": 926.73, |
|
"eval_steps_per_second": 1.205, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 101.0, |
|
"eval_loss": 1.501634120941162, |
|
"eval_runtime": 10.7793, |
|
"eval_samples_per_second": 927.703, |
|
"eval_steps_per_second": 1.206, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 102.0, |
|
"eval_loss": 1.5009431838989258, |
|
"eval_runtime": 10.7085, |
|
"eval_samples_per_second": 933.839, |
|
"eval_steps_per_second": 1.214, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 103.0, |
|
"eval_loss": 1.499057650566101, |
|
"eval_runtime": 10.6874, |
|
"eval_samples_per_second": 935.68, |
|
"eval_steps_per_second": 1.216, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 104.0, |
|
"eval_loss": 1.4963877201080322, |
|
"eval_runtime": 10.6752, |
|
"eval_samples_per_second": 936.753, |
|
"eval_steps_per_second": 1.218, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 105.0, |
|
"eval_loss": 1.4944827556610107, |
|
"eval_runtime": 10.6755, |
|
"eval_samples_per_second": 936.725, |
|
"eval_steps_per_second": 1.218, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 106.0, |
|
"eval_loss": 1.4929250478744507, |
|
"eval_runtime": 10.682, |
|
"eval_samples_per_second": 936.158, |
|
"eval_steps_per_second": 1.217, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 107.0, |
|
"eval_loss": 1.4915369749069214, |
|
"eval_runtime": 10.6838, |
|
"eval_samples_per_second": 935.995, |
|
"eval_steps_per_second": 1.217, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 108.0, |
|
"eval_loss": 1.49076509475708, |
|
"eval_runtime": 10.6769, |
|
"eval_samples_per_second": 936.603, |
|
"eval_steps_per_second": 1.218, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 109.0, |
|
"eval_loss": 1.4891281127929688, |
|
"eval_runtime": 10.6778, |
|
"eval_samples_per_second": 936.521, |
|
"eval_steps_per_second": 1.217, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 110.0, |
|
"eval_loss": 1.486264944076538, |
|
"eval_runtime": 10.67, |
|
"eval_samples_per_second": 937.205, |
|
"eval_steps_per_second": 1.218, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 111.0, |
|
"eval_loss": 1.481858730316162, |
|
"eval_runtime": 10.6684, |
|
"eval_samples_per_second": 937.351, |
|
"eval_steps_per_second": 1.219, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 112.0, |
|
"eval_loss": 1.4790288209915161, |
|
"eval_runtime": 10.6764, |
|
"eval_samples_per_second": 936.646, |
|
"eval_steps_per_second": 1.218, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 113.0, |
|
"eval_loss": 1.477858066558838, |
|
"eval_runtime": 10.672, |
|
"eval_samples_per_second": 937.028, |
|
"eval_steps_per_second": 1.218, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 114.0, |
|
"eval_loss": 1.477932095527649, |
|
"eval_runtime": 10.6779, |
|
"eval_samples_per_second": 936.51, |
|
"eval_steps_per_second": 1.217, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 115.0, |
|
"eval_loss": 1.4761333465576172, |
|
"eval_runtime": 10.6829, |
|
"eval_samples_per_second": 936.078, |
|
"eval_steps_per_second": 1.217, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 116.0, |
|
"eval_loss": 1.472651481628418, |
|
"eval_runtime": 10.6687, |
|
"eval_samples_per_second": 937.324, |
|
"eval_steps_per_second": 1.219, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 117.0, |
|
"eval_loss": 1.4697270393371582, |
|
"eval_runtime": 10.6649, |
|
"eval_samples_per_second": 937.657, |
|
"eval_steps_per_second": 1.219, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 118.0, |
|
"eval_loss": 1.4677116870880127, |
|
"eval_runtime": 10.6723, |
|
"eval_samples_per_second": 937.001, |
|
"eval_steps_per_second": 1.218, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 119.0, |
|
"eval_loss": 1.4658899307250977, |
|
"eval_runtime": 10.6745, |
|
"eval_samples_per_second": 936.811, |
|
"eval_steps_per_second": 1.218, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 120.0, |
|
"eval_loss": 1.4640589952468872, |
|
"eval_runtime": 10.6687, |
|
"eval_samples_per_second": 937.323, |
|
"eval_steps_per_second": 1.219, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 121.0, |
|
"eval_loss": 1.4623686075210571, |
|
"eval_runtime": 10.6735, |
|
"eval_samples_per_second": 936.896, |
|
"eval_steps_per_second": 1.218, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 122.0, |
|
"eval_loss": 1.4619512557983398, |
|
"eval_runtime": 10.6694, |
|
"eval_samples_per_second": 937.256, |
|
"eval_steps_per_second": 1.218, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 123.0, |
|
"eval_loss": 1.4633115530014038, |
|
"eval_runtime": 10.6685, |
|
"eval_samples_per_second": 937.343, |
|
"eval_steps_per_second": 1.219, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 124.0, |
|
"eval_loss": 1.464633584022522, |
|
"eval_runtime": 10.6778, |
|
"eval_samples_per_second": 936.522, |
|
"eval_steps_per_second": 1.217, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 125.0, |
|
"eval_loss": 1.464717984199524, |
|
"eval_runtime": 10.7082, |
|
"eval_samples_per_second": 933.861, |
|
"eval_steps_per_second": 1.214, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 126.0, |
|
"eval_loss": 1.4619494676589966, |
|
"eval_runtime": 10.6743, |
|
"eval_samples_per_second": 936.83, |
|
"eval_steps_per_second": 1.218, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 127.0, |
|
"eval_loss": 1.4600682258605957, |
|
"eval_runtime": 10.6839, |
|
"eval_samples_per_second": 935.989, |
|
"eval_steps_per_second": 1.217, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 128.0, |
|
"eval_loss": 1.460466980934143, |
|
"eval_runtime": 10.6674, |
|
"eval_samples_per_second": 937.434, |
|
"eval_steps_per_second": 1.219, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 129.0, |
|
"eval_loss": 1.4635497331619263, |
|
"eval_runtime": 10.685, |
|
"eval_samples_per_second": 935.888, |
|
"eval_steps_per_second": 1.217, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 130.0, |
|
"eval_loss": 1.4629032611846924, |
|
"eval_runtime": 10.6911, |
|
"eval_samples_per_second": 935.356, |
|
"eval_steps_per_second": 1.216, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 131.0, |
|
"eval_loss": 1.4596558809280396, |
|
"eval_runtime": 10.6919, |
|
"eval_samples_per_second": 935.291, |
|
"eval_steps_per_second": 1.216, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 132.0, |
|
"eval_loss": 1.4560246467590332, |
|
"eval_runtime": 10.6769, |
|
"eval_samples_per_second": 936.603, |
|
"eval_steps_per_second": 1.218, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 133.0, |
|
"eval_loss": 1.453616738319397, |
|
"eval_runtime": 10.6767, |
|
"eval_samples_per_second": 936.622, |
|
"eval_steps_per_second": 1.218, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 134.0, |
|
"eval_loss": 1.4538458585739136, |
|
"eval_runtime": 10.6773, |
|
"eval_samples_per_second": 936.563, |
|
"eval_steps_per_second": 1.218, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 135.0, |
|
"eval_loss": 1.4554544687271118, |
|
"eval_runtime": 10.6749, |
|
"eval_samples_per_second": 936.779, |
|
"eval_steps_per_second": 1.218, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 136.0, |
|
"eval_loss": 1.458116054534912, |
|
"eval_runtime": 10.6729, |
|
"eval_samples_per_second": 936.95, |
|
"eval_steps_per_second": 1.218, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 137.0, |
|
"eval_loss": 1.458404541015625, |
|
"eval_runtime": 10.6614, |
|
"eval_samples_per_second": 937.965, |
|
"eval_steps_per_second": 1.219, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 138.0, |
|
"eval_loss": 1.4573506116867065, |
|
"eval_runtime": 10.6734, |
|
"eval_samples_per_second": 936.913, |
|
"eval_steps_per_second": 1.218, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 139.0, |
|
"eval_loss": 1.4543631076812744, |
|
"eval_runtime": 10.6672, |
|
"eval_samples_per_second": 937.452, |
|
"eval_steps_per_second": 1.219, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 140.0, |
|
"eval_loss": 1.4508144855499268, |
|
"eval_runtime": 10.6794, |
|
"eval_samples_per_second": 936.382, |
|
"eval_steps_per_second": 1.217, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 141.0, |
|
"eval_loss": 1.447538137435913, |
|
"eval_runtime": 10.6758, |
|
"eval_samples_per_second": 936.702, |
|
"eval_steps_per_second": 1.218, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 142.0, |
|
"eval_loss": 1.4447238445281982, |
|
"eval_runtime": 10.6659, |
|
"eval_samples_per_second": 937.568, |
|
"eval_steps_per_second": 1.219, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 143.0, |
|
"eval_loss": 1.4447556734085083, |
|
"eval_runtime": 10.6716, |
|
"eval_samples_per_second": 937.067, |
|
"eval_steps_per_second": 1.218, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 144.0, |
|
"eval_loss": 1.4442577362060547, |
|
"eval_runtime": 10.676, |
|
"eval_samples_per_second": 936.684, |
|
"eval_steps_per_second": 1.218, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 145.0, |
|
"eval_loss": 1.4424697160720825, |
|
"eval_runtime": 10.668, |
|
"eval_samples_per_second": 937.381, |
|
"eval_steps_per_second": 1.219, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 146.0, |
|
"eval_loss": 1.4408490657806396, |
|
"eval_runtime": 10.6762, |
|
"eval_samples_per_second": 936.664, |
|
"eval_steps_per_second": 1.218, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 147.0, |
|
"eval_loss": 1.4410812854766846, |
|
"eval_runtime": 10.6701, |
|
"eval_samples_per_second": 937.198, |
|
"eval_steps_per_second": 1.218, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 148.0, |
|
"eval_loss": 1.4422898292541504, |
|
"eval_runtime": 10.7048, |
|
"eval_samples_per_second": 934.163, |
|
"eval_steps_per_second": 1.214, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 149.0, |
|
"eval_loss": 1.4430429935455322, |
|
"eval_runtime": 10.7029, |
|
"eval_samples_per_second": 934.33, |
|
"eval_steps_per_second": 1.215, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 150.0, |
|
"eval_loss": 1.4431097507476807, |
|
"eval_runtime": 10.701, |
|
"eval_samples_per_second": 934.49, |
|
"eval_steps_per_second": 1.215, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 151.0, |
|
"eval_loss": 1.4416182041168213, |
|
"eval_runtime": 10.7066, |
|
"eval_samples_per_second": 934.002, |
|
"eval_steps_per_second": 1.214, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 152.0, |
|
"eval_loss": 1.4399303197860718, |
|
"eval_runtime": 10.6813, |
|
"eval_samples_per_second": 936.212, |
|
"eval_steps_per_second": 1.217, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 153.0, |
|
"eval_loss": 1.437761664390564, |
|
"eval_runtime": 10.6766, |
|
"eval_samples_per_second": 936.628, |
|
"eval_steps_per_second": 1.218, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 154.0, |
|
"eval_loss": 1.436241865158081, |
|
"eval_runtime": 10.6783, |
|
"eval_samples_per_second": 936.479, |
|
"eval_steps_per_second": 1.217, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 155.0, |
|
"eval_loss": 1.4360020160675049, |
|
"eval_runtime": 10.7009, |
|
"eval_samples_per_second": 934.499, |
|
"eval_steps_per_second": 1.215, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 156.0, |
|
"eval_loss": 1.4338910579681396, |
|
"eval_runtime": 10.6868, |
|
"eval_samples_per_second": 935.733, |
|
"eval_steps_per_second": 1.216, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 157.0, |
|
"eval_loss": 1.4325634241104126, |
|
"eval_runtime": 10.6777, |
|
"eval_samples_per_second": 936.527, |
|
"eval_steps_per_second": 1.217, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 158.0, |
|
"eval_loss": 1.4329997301101685, |
|
"eval_runtime": 10.6692, |
|
"eval_samples_per_second": 937.275, |
|
"eval_steps_per_second": 1.218, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 159.0, |
|
"eval_loss": 1.4324475526809692, |
|
"eval_runtime": 10.6817, |
|
"eval_samples_per_second": 936.178, |
|
"eval_steps_per_second": 1.217, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 160.0, |
|
"eval_loss": 1.4320900440216064, |
|
"eval_runtime": 10.6759, |
|
"eval_samples_per_second": 936.685, |
|
"eval_steps_per_second": 1.218, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 161.0, |
|
"eval_loss": 1.4325189590454102, |
|
"eval_runtime": 10.6696, |
|
"eval_samples_per_second": 937.244, |
|
"eval_steps_per_second": 1.218, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 162.0, |
|
"eval_loss": 1.4319082498550415, |
|
"eval_runtime": 10.6597, |
|
"eval_samples_per_second": 938.116, |
|
"eval_steps_per_second": 1.22, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 163.0, |
|
"eval_loss": 1.432405710220337, |
|
"eval_runtime": 10.6632, |
|
"eval_samples_per_second": 937.807, |
|
"eval_steps_per_second": 1.219, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 164.0, |
|
"eval_loss": 1.4340990781784058, |
|
"eval_runtime": 10.6793, |
|
"eval_samples_per_second": 936.393, |
|
"eval_steps_per_second": 1.217, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 165.0, |
|
"eval_loss": 1.4348700046539307, |
|
"eval_runtime": 10.6745, |
|
"eval_samples_per_second": 936.81, |
|
"eval_steps_per_second": 1.218, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 166.0, |
|
"eval_loss": 1.4340572357177734, |
|
"eval_runtime": 10.6758, |
|
"eval_samples_per_second": 936.694, |
|
"eval_steps_per_second": 1.218, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 167.0, |
|
"eval_loss": 1.432230830192566, |
|
"eval_runtime": 10.6679, |
|
"eval_samples_per_second": 937.39, |
|
"eval_steps_per_second": 1.219, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 168.0, |
|
"eval_loss": 1.4290224313735962, |
|
"eval_runtime": 10.6717, |
|
"eval_samples_per_second": 937.056, |
|
"eval_steps_per_second": 1.218, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 169.0, |
|
"eval_loss": 1.4260591268539429, |
|
"eval_runtime": 10.6646, |
|
"eval_samples_per_second": 937.683, |
|
"eval_steps_per_second": 1.219, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 170.0, |
|
"eval_loss": 1.4241833686828613, |
|
"eval_runtime": 10.6743, |
|
"eval_samples_per_second": 936.827, |
|
"eval_steps_per_second": 1.218, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 171.0, |
|
"eval_loss": 1.4232484102249146, |
|
"eval_runtime": 10.6668, |
|
"eval_samples_per_second": 937.489, |
|
"eval_steps_per_second": 1.219, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 172.0, |
|
"eval_loss": 1.4233402013778687, |
|
"eval_runtime": 10.674, |
|
"eval_samples_per_second": 936.859, |
|
"eval_steps_per_second": 1.218, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 173.0, |
|
"eval_loss": 1.4248952865600586, |
|
"eval_runtime": 10.6819, |
|
"eval_samples_per_second": 936.165, |
|
"eval_steps_per_second": 1.217, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 174.0, |
|
"eval_loss": 1.4270906448364258, |
|
"eval_runtime": 10.6759, |
|
"eval_samples_per_second": 936.692, |
|
"eval_steps_per_second": 1.218, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 175.0, |
|
"eval_loss": 1.4285681247711182, |
|
"eval_runtime": 10.6797, |
|
"eval_samples_per_second": 936.356, |
|
"eval_steps_per_second": 1.217, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 176.0, |
|
"eval_loss": 1.4294575452804565, |
|
"eval_runtime": 10.6749, |
|
"eval_samples_per_second": 936.78, |
|
"eval_steps_per_second": 1.218, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 177.0, |
|
"eval_loss": 1.428400993347168, |
|
"eval_runtime": 10.6621, |
|
"eval_samples_per_second": 937.902, |
|
"eval_steps_per_second": 1.219, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 178.0, |
|
"eval_loss": 1.4263620376586914, |
|
"eval_runtime": 10.6806, |
|
"eval_samples_per_second": 936.281, |
|
"eval_steps_per_second": 1.217, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 179.0, |
|
"eval_loss": 1.4240350723266602, |
|
"eval_runtime": 10.669, |
|
"eval_samples_per_second": 937.296, |
|
"eval_steps_per_second": 1.218, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 180.0, |
|
"eval_loss": 1.4221690893173218, |
|
"eval_runtime": 10.6788, |
|
"eval_samples_per_second": 936.431, |
|
"eval_steps_per_second": 1.217, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 181.0, |
|
"eval_loss": 1.4208089113235474, |
|
"eval_runtime": 10.6801, |
|
"eval_samples_per_second": 936.319, |
|
"eval_steps_per_second": 1.217, |
|
"step": 362 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 400, |
|
"num_train_epochs": 200, |
|
"save_steps": 500, |
|
"total_flos": 5196677296128000.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|