|
{ |
|
"best_metric": 0.7123447060585022, |
|
"best_model_checkpoint": "/mnt/bn/qingyi-bn-lq/llama/saved-alpaca-belle13b/checkpoint-13400", |
|
"epoch": 2.8910463861920173, |
|
"global_step": 13400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5.9999999999999995e-05, |
|
"loss": 1.6589, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00011999999999999999, |
|
"loss": 1.4071, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00017999999999999998, |
|
"loss": 1.044, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00023999999999999998, |
|
"loss": 0.9883, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0003, |
|
"loss": 0.9659, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00029956537486417964, |
|
"loss": 0.9505, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00029913074972835925, |
|
"loss": 0.9205, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0002986961245925389, |
|
"loss": 0.9168, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0002982614994567186, |
|
"loss": 0.9117, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0002978268743208982, |
|
"loss": 0.9064, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_loss": 0.9033477306365967, |
|
"eval_runtime": 25.3136, |
|
"eval_samples_per_second": 79.009, |
|
"eval_steps_per_second": 1.264, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00029739224918507785, |
|
"loss": 0.8981, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0002969576240492575, |
|
"loss": 0.8912, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0002965229989134371, |
|
"loss": 0.8875, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0002960883737776168, |
|
"loss": 0.8907, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00029565374864179645, |
|
"loss": 0.8753, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00029521912350597606, |
|
"loss": 0.8782, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0002947844983701557, |
|
"loss": 0.8697, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0002943498732343354, |
|
"loss": 0.8745, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.000293915248098515, |
|
"loss": 0.8725, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00029348062296269466, |
|
"loss": 0.8658, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_loss": 0.8655584454536438, |
|
"eval_runtime": 25.3343, |
|
"eval_samples_per_second": 78.944, |
|
"eval_steps_per_second": 1.263, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0002930459978268743, |
|
"loss": 0.8641, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00029261137269105393, |
|
"loss": 0.8509, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0002921767475552336, |
|
"loss": 0.8541, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00029174212241941326, |
|
"loss": 0.8575, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00029130749728359287, |
|
"loss": 0.8482, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00029087287214777253, |
|
"loss": 0.8572, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0002904382470119522, |
|
"loss": 0.8489, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0002900036218761318, |
|
"loss": 0.8585, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00028956899674031147, |
|
"loss": 0.8387, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00028913437160449113, |
|
"loss": 0.8306, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_loss": 0.8434031009674072, |
|
"eval_runtime": 25.3211, |
|
"eval_samples_per_second": 78.986, |
|
"eval_steps_per_second": 1.264, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00028869974646867074, |
|
"loss": 0.8331, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0002882651213328504, |
|
"loss": 0.8447, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00028783049619703007, |
|
"loss": 0.836, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0002873958710612097, |
|
"loss": 0.8436, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00028696124592538934, |
|
"loss": 0.8281, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.000286526620789569, |
|
"loss": 0.8378, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0002860919956537486, |
|
"loss": 0.8338, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0002856573705179283, |
|
"loss": 0.8323, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00028522274538210794, |
|
"loss": 0.8153, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00028478812024628755, |
|
"loss": 0.8349, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_loss": 0.8282934427261353, |
|
"eval_runtime": 25.4025, |
|
"eval_samples_per_second": 78.733, |
|
"eval_steps_per_second": 1.26, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0002843534951104672, |
|
"loss": 0.8198, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0002839188699746469, |
|
"loss": 0.8254, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0002834842448388265, |
|
"loss": 0.8165, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00028304961970300615, |
|
"loss": 0.8241, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0002826149945671858, |
|
"loss": 0.814, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0002821803694313654, |
|
"loss": 0.8222, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0002817457442955451, |
|
"loss": 0.825, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00028131111915972475, |
|
"loss": 0.8153, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00028087649402390436, |
|
"loss": 0.8229, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00028044186888808397, |
|
"loss": 0.8129, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_loss": 0.816320538520813, |
|
"eval_runtime": 25.4153, |
|
"eval_samples_per_second": 78.693, |
|
"eval_steps_per_second": 1.259, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00028000724375226363, |
|
"loss": 0.8121, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0002795726186164433, |
|
"loss": 0.8063, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0002791379934806229, |
|
"loss": 0.8097, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00027870336834480257, |
|
"loss": 0.8142, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00027826874320898223, |
|
"loss": 0.8021, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00027783411807316184, |
|
"loss": 0.8014, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0002773994929373415, |
|
"loss": 0.8031, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00027696486780152117, |
|
"loss": 0.8011, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0002765302426657008, |
|
"loss": 0.7944, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00027609561752988044, |
|
"loss": 0.8071, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_loss": 0.8064733147621155, |
|
"eval_runtime": 25.3901, |
|
"eval_samples_per_second": 78.771, |
|
"eval_steps_per_second": 1.26, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0002756609923940601, |
|
"loss": 0.8025, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0002752263672582397, |
|
"loss": 0.7954, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0002747917421224194, |
|
"loss": 0.8013, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00027435711698659904, |
|
"loss": 0.7967, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00027392249185077865, |
|
"loss": 0.8132, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0002734878667149583, |
|
"loss": 0.8017, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.000273053241579138, |
|
"loss": 0.7964, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0002726186164433176, |
|
"loss": 0.8012, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00027218399130749725, |
|
"loss": 0.7982, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0002717493661716769, |
|
"loss": 0.8031, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_loss": 0.798474133014679, |
|
"eval_runtime": 25.432, |
|
"eval_samples_per_second": 78.641, |
|
"eval_steps_per_second": 1.258, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0002713147410358565, |
|
"loss": 0.7925, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0002708801159000362, |
|
"loss": 0.794, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00027044549076421585, |
|
"loss": 0.804, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00027001086562839546, |
|
"loss": 0.7942, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0002695762404925751, |
|
"loss": 0.7872, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0002691416153567548, |
|
"loss": 0.7962, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0002687069902209344, |
|
"loss": 0.7898, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00026827236508511406, |
|
"loss": 0.7886, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0002678377399492937, |
|
"loss": 0.7904, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00026740311481347333, |
|
"loss": 0.7892, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_loss": 0.7912269234657288, |
|
"eval_runtime": 25.444, |
|
"eval_samples_per_second": 78.604, |
|
"eval_steps_per_second": 1.258, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.000266968489677653, |
|
"loss": 0.7897, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00026653386454183266, |
|
"loss": 0.7927, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00026609923940601227, |
|
"loss": 0.7829, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00026566461427019193, |
|
"loss": 0.7788, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0002652299891343716, |
|
"loss": 0.786, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0002647953639985512, |
|
"loss": 0.7828, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00026436073886273087, |
|
"loss": 0.7788, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00026392611372691053, |
|
"loss": 0.7851, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00026349148859109014, |
|
"loss": 0.7936, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0002630568634552698, |
|
"loss": 0.7758, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_loss": 0.7854430675506592, |
|
"eval_runtime": 25.4734, |
|
"eval_samples_per_second": 78.513, |
|
"eval_steps_per_second": 1.256, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00026262223831944947, |
|
"loss": 0.787, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0002621876131836291, |
|
"loss": 0.7779, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00026175298804780874, |
|
"loss": 0.7792, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0002613183629119884, |
|
"loss": 0.7728, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.000260883737776168, |
|
"loss": 0.7844, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0002604491126403477, |
|
"loss": 0.7726, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00026001448750452734, |
|
"loss": 0.7706, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00025957986236870695, |
|
"loss": 0.7659, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0002591452372328866, |
|
"loss": 0.7808, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0002587106120970663, |
|
"loss": 0.7692, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_loss": 0.7800412774085999, |
|
"eval_runtime": 25.5146, |
|
"eval_samples_per_second": 78.387, |
|
"eval_steps_per_second": 1.254, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0002582759869612459, |
|
"loss": 0.7665, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00025784136182542555, |
|
"loss": 0.7795, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0002574067366896052, |
|
"loss": 0.7846, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0002569721115537848, |
|
"loss": 0.7639, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0002565374864179645, |
|
"loss": 0.7827, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00025610286128214415, |
|
"loss": 0.7751, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00025566823614632376, |
|
"loss": 0.776, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0002552336110105034, |
|
"loss": 0.7773, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0002547989858746831, |
|
"loss": 0.7757, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0002543643607388627, |
|
"loss": 0.7769, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_loss": 0.7759379744529724, |
|
"eval_runtime": 25.4789, |
|
"eval_samples_per_second": 78.496, |
|
"eval_steps_per_second": 1.256, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00025392973560304236, |
|
"loss": 0.7657, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.000253495110467222, |
|
"loss": 0.7664, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00025306048533140163, |
|
"loss": 0.7774, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0002526258601955813, |
|
"loss": 0.7591, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00025219123505976096, |
|
"loss": 0.7605, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00025175660992394057, |
|
"loss": 0.7693, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00025132198478812023, |
|
"loss": 0.7702, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0002508873596522999, |
|
"loss": 0.7706, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0002504527345164795, |
|
"loss": 0.7664, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00025001810938065917, |
|
"loss": 0.76, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_loss": 0.7723669409751892, |
|
"eval_runtime": 25.4827, |
|
"eval_samples_per_second": 78.485, |
|
"eval_steps_per_second": 1.256, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00024958348424483883, |
|
"loss": 0.7702, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00024914885910901844, |
|
"loss": 0.7686, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0002487142339731981, |
|
"loss": 0.762, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00024827960883737777, |
|
"loss": 0.7719, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.0002478449837015574, |
|
"loss": 0.7612, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00024741035856573704, |
|
"loss": 0.7565, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.0002469757334299167, |
|
"loss": 0.7719, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.0002465411082940963, |
|
"loss": 0.7619, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.000246106483158276, |
|
"loss": 0.7607, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00024567185802245564, |
|
"loss": 0.7564, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_loss": 0.7678729295730591, |
|
"eval_runtime": 25.4455, |
|
"eval_samples_per_second": 78.599, |
|
"eval_steps_per_second": 1.258, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00024523723288663525, |
|
"loss": 0.7613, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.0002448026077508149, |
|
"loss": 0.7525, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.0002443679826149946, |
|
"loss": 0.7563, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00024393335747917422, |
|
"loss": 0.7601, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00024349873234335383, |
|
"loss": 0.7633, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00024306410720753346, |
|
"loss": 0.75, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.0002426294820717131, |
|
"loss": 0.7602, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00024219485693589276, |
|
"loss": 0.7546, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.0002417602318000724, |
|
"loss": 0.7532, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00024132560666425203, |
|
"loss": 0.7661, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_loss": 0.7649803757667542, |
|
"eval_runtime": 25.4783, |
|
"eval_samples_per_second": 78.498, |
|
"eval_steps_per_second": 1.256, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.0002408909815284317, |
|
"loss": 0.7587, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00024045635639261133, |
|
"loss": 0.7543, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00024002173125679097, |
|
"loss": 0.7672, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00023958710612097063, |
|
"loss": 0.7623, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00023915248098515027, |
|
"loss": 0.7487, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.0002387178558493299, |
|
"loss": 0.75, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00023828323071350957, |
|
"loss": 0.7567, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.0002378486055776892, |
|
"loss": 0.7592, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00023741398044186884, |
|
"loss": 0.7569, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.0002369793553060485, |
|
"loss": 0.7524, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_loss": 0.7613279819488525, |
|
"eval_runtime": 25.4837, |
|
"eval_samples_per_second": 78.482, |
|
"eval_steps_per_second": 1.256, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00023654473017022814, |
|
"loss": 0.7593, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00023611010503440778, |
|
"loss": 0.7516, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00023567547989858744, |
|
"loss": 0.7525, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00023524085476276708, |
|
"loss": 0.7583, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00023480622962694672, |
|
"loss": 0.7535, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00023437160449112638, |
|
"loss": 0.7528, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.00023393697935530602, |
|
"loss": 0.7418, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.00023350235421948565, |
|
"loss": 0.7496, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00023306772908366532, |
|
"loss": 0.7537, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00023263310394784495, |
|
"loss": 0.7569, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_loss": 0.7581906914710999, |
|
"eval_runtime": 25.4588, |
|
"eval_samples_per_second": 78.558, |
|
"eval_steps_per_second": 1.257, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.0002321984788120246, |
|
"loss": 0.7465, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.00023176385367620425, |
|
"loss": 0.7367, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.0002313292285403839, |
|
"loss": 0.7425, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00023089460340456353, |
|
"loss": 0.7637, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.0002304599782687432, |
|
"loss": 0.7574, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00023002535313292283, |
|
"loss": 0.7448, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00022959072799710246, |
|
"loss": 0.7595, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00022915610286128213, |
|
"loss": 0.7465, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.00022872147772546176, |
|
"loss": 0.7532, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.0002282868525896414, |
|
"loss": 0.7466, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_loss": 0.7559078931808472, |
|
"eval_runtime": 25.464, |
|
"eval_samples_per_second": 78.542, |
|
"eval_steps_per_second": 1.257, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.00022785222745382106, |
|
"loss": 0.753, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.0002274176023180007, |
|
"loss": 0.7459, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.00022698297718218034, |
|
"loss": 0.7519, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.00022654835204636, |
|
"loss": 0.7451, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.00022611372691053964, |
|
"loss": 0.7468, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.00022567910177471927, |
|
"loss": 0.7491, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.00022524447663889894, |
|
"loss": 0.7524, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.00022480985150307857, |
|
"loss": 0.7484, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.0002243752263672582, |
|
"loss": 0.7484, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.00022394060123143787, |
|
"loss": 0.7529, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_loss": 0.7531791925430298, |
|
"eval_runtime": 25.4572, |
|
"eval_samples_per_second": 78.563, |
|
"eval_steps_per_second": 1.257, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.0002235059760956175, |
|
"loss": 0.7475, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00022307135095979715, |
|
"loss": 0.7518, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.0002226367258239768, |
|
"loss": 0.751, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00022220210068815645, |
|
"loss": 0.7402, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.00022176747555233608, |
|
"loss": 0.755, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.00022133285041651575, |
|
"loss": 0.7441, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.00022089822528069538, |
|
"loss": 0.746, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.00022046360014487502, |
|
"loss": 0.7441, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.00022002897500905468, |
|
"loss": 0.7475, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.00021959434987323432, |
|
"loss": 0.7458, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_loss": 0.7513870596885681, |
|
"eval_runtime": 25.4906, |
|
"eval_samples_per_second": 78.46, |
|
"eval_steps_per_second": 1.255, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.00021915972473741396, |
|
"loss": 0.7436, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.00021872509960159362, |
|
"loss": 0.7451, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.00021829047446577326, |
|
"loss": 0.7475, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.0002178558493299529, |
|
"loss": 0.7424, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.00021742122419413256, |
|
"loss": 0.7503, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.0002169865990583122, |
|
"loss": 0.7334, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.00021655197392249183, |
|
"loss": 0.7436, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.0002161173487866715, |
|
"loss": 0.7453, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.00021568272365085113, |
|
"loss": 0.7424, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.00021524809851503076, |
|
"loss": 0.7509, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_loss": 0.7488968968391418, |
|
"eval_runtime": 25.492, |
|
"eval_samples_per_second": 78.456, |
|
"eval_steps_per_second": 1.255, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.00021481347337921043, |
|
"loss": 0.7445, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.00021437884824339006, |
|
"loss": 0.74, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.0002139442231075697, |
|
"loss": 0.7362, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.00021350959797174936, |
|
"loss": 0.7409, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.000213074972835929, |
|
"loss": 0.7315, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.00021264034770010864, |
|
"loss": 0.7488, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.0002122057225642883, |
|
"loss": 0.7375, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.00021177109742846794, |
|
"loss": 0.7481, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.00021133647229264757, |
|
"loss": 0.7524, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.00021092357841361823, |
|
"loss": 0.7403, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"eval_loss": 0.7469983100891113, |
|
"eval_runtime": 25.4847, |
|
"eval_samples_per_second": 78.479, |
|
"eval_steps_per_second": 1.256, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.00021048895327779787, |
|
"loss": 0.7394, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.0002100543281419775, |
|
"loss": 0.7405, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.00020961970300615717, |
|
"loss": 0.7534, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.0002091850778703368, |
|
"loss": 0.7412, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.00020875045273451644, |
|
"loss": 0.7393, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.0002083158275986961, |
|
"loss": 0.7289, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.00020788120246287574, |
|
"loss": 0.7342, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.00020744657732705538, |
|
"loss": 0.7427, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.00020701195219123504, |
|
"loss": 0.7386, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.00020657732705541468, |
|
"loss": 0.7374, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_loss": 0.7451291680335999, |
|
"eval_runtime": 25.461, |
|
"eval_samples_per_second": 78.552, |
|
"eval_steps_per_second": 1.257, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.0002061427019195943, |
|
"loss": 0.7364, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.00020570807678377398, |
|
"loss": 0.7377, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.0002052734516479536, |
|
"loss": 0.7391, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.00020483882651213325, |
|
"loss": 0.731, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.0002044042013763129, |
|
"loss": 0.735, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.00020396957624049255, |
|
"loss": 0.7344, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.00020353495110467219, |
|
"loss": 0.7355, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.00020310032596885185, |
|
"loss": 0.7357, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.00020266570083303149, |
|
"loss": 0.7377, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.00020223107569721112, |
|
"loss": 0.7438, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_loss": 0.7437875270843506, |
|
"eval_runtime": 25.5255, |
|
"eval_samples_per_second": 78.353, |
|
"eval_steps_per_second": 1.254, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.00020179645056139079, |
|
"loss": 0.7343, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.00020136182542557042, |
|
"loss": 0.7473, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.00020092720028975006, |
|
"loss": 0.7305, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.00020049257515392972, |
|
"loss": 0.7284, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.00020005795001810936, |
|
"loss": 0.7335, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.000199623324882289, |
|
"loss": 0.7282, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.00019918869974646866, |
|
"loss": 0.7337, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.0001987540746106483, |
|
"loss": 0.7195, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.00019831944947482793, |
|
"loss": 0.7327, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.0001978848243390076, |
|
"loss": 0.7259, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"eval_loss": 0.7413464188575745, |
|
"eval_runtime": 25.4959, |
|
"eval_samples_per_second": 78.444, |
|
"eval_steps_per_second": 1.255, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.00019745019920318723, |
|
"loss": 0.7263, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.00019701557406736687, |
|
"loss": 0.7341, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.00019658094893154653, |
|
"loss": 0.7406, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.00019614632379572617, |
|
"loss": 0.7309, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 0.0001957116986599058, |
|
"loss": 0.7274, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 0.00019527707352408547, |
|
"loss": 0.7241, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.0001948424483882651, |
|
"loss": 0.7368, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.00019440782325244474, |
|
"loss": 0.7445, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.0001939731981166244, |
|
"loss": 0.7347, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.00019353857298080404, |
|
"loss": 0.7436, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"eval_loss": 0.7399871945381165, |
|
"eval_runtime": 25.5032, |
|
"eval_samples_per_second": 78.422, |
|
"eval_steps_per_second": 1.255, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.00019310394784498368, |
|
"loss": 0.7248, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.00019266932270916334, |
|
"loss": 0.7374, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.00019223469757334298, |
|
"loss": 0.7187, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.00019180007243752261, |
|
"loss": 0.7381, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.00019136544730170228, |
|
"loss": 0.7389, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.00019093082216588191, |
|
"loss": 0.7343, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 0.00019049619703006155, |
|
"loss": 0.7323, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 0.00019006157189424121, |
|
"loss": 0.723, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 0.00018962694675842085, |
|
"loss": 0.7236, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 0.0001891923216226005, |
|
"loss": 0.7399, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"eval_loss": 0.7393975257873535, |
|
"eval_runtime": 25.6137, |
|
"eval_samples_per_second": 78.083, |
|
"eval_steps_per_second": 1.249, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.00018875769648678015, |
|
"loss": 0.7373, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.0001883230713509598, |
|
"loss": 0.7257, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.00018788844621513942, |
|
"loss": 0.7261, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 0.0001874538210793191, |
|
"loss": 0.7302, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 0.00018701919594349872, |
|
"loss": 0.7337, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 0.00018658457080767836, |
|
"loss": 0.7237, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 0.00018614994567185802, |
|
"loss": 0.7238, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 0.00018571532053603766, |
|
"loss": 0.7287, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 0.0001852806954002173, |
|
"loss": 0.7237, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.00018484607026439696, |
|
"loss": 0.7256, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"eval_loss": 0.7377527952194214, |
|
"eval_runtime": 25.4964, |
|
"eval_samples_per_second": 78.442, |
|
"eval_steps_per_second": 1.255, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.0001844114451285766, |
|
"loss": 0.7279, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.00018397681999275623, |
|
"loss": 0.7226, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 0.0001835421948569359, |
|
"loss": 0.7167, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 0.00018310756972111553, |
|
"loss": 0.7268, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 0.00018267294458529517, |
|
"loss": 0.7398, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 0.00018223831944947483, |
|
"loss": 0.7331, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.00018180369431365447, |
|
"loss": 0.7372, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.0001813690691778341, |
|
"loss": 0.7321, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.00018093444404201377, |
|
"loss": 0.7346, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 0.0001804998189061934, |
|
"loss": 0.722, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"eval_loss": 0.7368175983428955, |
|
"eval_runtime": 25.5045, |
|
"eval_samples_per_second": 78.417, |
|
"eval_steps_per_second": 1.255, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 0.00018006519377037304, |
|
"loss": 0.7279, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 0.0001796305686345527, |
|
"loss": 0.72, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 0.00017919594349873234, |
|
"loss": 0.7295, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 0.00017876131836291198, |
|
"loss": 0.7245, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 0.00017832669322709164, |
|
"loss": 0.7418, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 0.00017789206809127128, |
|
"loss": 0.7317, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.00017745744295545092, |
|
"loss": 0.7303, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.00017702281781963058, |
|
"loss": 0.7332, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 0.00017658819268381022, |
|
"loss": 0.7202, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 0.00017615356754798983, |
|
"loss": 0.7238, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"eval_loss": 0.7348505854606628, |
|
"eval_runtime": 25.509, |
|
"eval_samples_per_second": 78.404, |
|
"eval_steps_per_second": 1.254, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 0.00017571894241216946, |
|
"loss": 0.724, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 0.00017528431727634913, |
|
"loss": 0.7258, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 0.00017484969214052876, |
|
"loss": 0.7217, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 0.0001744150670047084, |
|
"loss": 0.7209, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 0.00017398044186888806, |
|
"loss": 0.7276, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 0.0001735458167330677, |
|
"loss": 0.7287, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 0.00017311119159724733, |
|
"loss": 0.7244, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 0.000172676566461427, |
|
"loss": 0.7247, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 0.00017224194132560663, |
|
"loss": 0.7191, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 0.00017180731618978627, |
|
"loss": 0.7208, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"eval_loss": 0.7340711951255798, |
|
"eval_runtime": 25.4669, |
|
"eval_samples_per_second": 78.533, |
|
"eval_steps_per_second": 1.257, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 0.00017137269105396593, |
|
"loss": 0.7285, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 0.00017093806591814557, |
|
"loss": 0.7294, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.0001705034407823252, |
|
"loss": 0.7365, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.00017006881564650487, |
|
"loss": 0.7149, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 0.0001696341905106845, |
|
"loss": 0.7229, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 0.00016919956537486414, |
|
"loss": 0.7253, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 0.0001687649402390438, |
|
"loss": 0.7188, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 0.00016833031510322344, |
|
"loss": 0.7308, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 0.00016789568996740308, |
|
"loss": 0.7186, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 0.00016746106483158274, |
|
"loss": 0.7121, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"eval_loss": 0.7324739694595337, |
|
"eval_runtime": 25.5, |
|
"eval_samples_per_second": 78.431, |
|
"eval_steps_per_second": 1.255, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 0.00016702643969576238, |
|
"loss": 0.7286, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 0.00016659181455994202, |
|
"loss": 0.7246, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 0.00016615718942412168, |
|
"loss": 0.7234, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 0.00016572256428830132, |
|
"loss": 0.7245, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 0.00016528793915248095, |
|
"loss": 0.7252, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 0.00016485331401666062, |
|
"loss": 0.7259, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 0.00016441868888084025, |
|
"loss": 0.7173, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 0.0001639840637450199, |
|
"loss": 0.7222, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.00016354943860919955, |
|
"loss": 0.7113, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.0001631148134733792, |
|
"loss": 0.72, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"eval_loss": 0.7319995164871216, |
|
"eval_runtime": 25.5112, |
|
"eval_samples_per_second": 78.397, |
|
"eval_steps_per_second": 1.254, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 0.00016268018833755883, |
|
"loss": 0.7333, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 0.0001622455632017385, |
|
"loss": 0.7208, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 0.00016181093806591813, |
|
"loss": 0.7161, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 0.00016137631293009776, |
|
"loss": 0.7171, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 0.00016094168779427743, |
|
"loss": 0.7297, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 0.00016050706265845706, |
|
"loss": 0.7156, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 0.0001600724375226367, |
|
"loss": 0.7175, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 0.00015963781238681636, |
|
"loss": 0.7152, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 0.000159203187250996, |
|
"loss": 0.7282, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 0.00015876856211517564, |
|
"loss": 0.722, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"eval_loss": 0.7307416796684265, |
|
"eval_runtime": 25.4967, |
|
"eval_samples_per_second": 78.442, |
|
"eval_steps_per_second": 1.255, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 0.0001583339369793553, |
|
"loss": 0.7274, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 0.00015789931184353494, |
|
"loss": 0.7313, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 0.00015746468670771457, |
|
"loss": 0.7209, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 0.00015703006157189424, |
|
"loss": 0.7202, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 0.00015659543643607387, |
|
"loss": 0.7264, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 0.0001561608113002535, |
|
"loss": 0.7226, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 0.00015572618616443317, |
|
"loss": 0.711, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.0001552915610286128, |
|
"loss": 0.7216, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.00015485693589279245, |
|
"loss": 0.7184, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 0.0001544223107569721, |
|
"loss": 0.7216, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"eval_loss": 0.7297094464302063, |
|
"eval_runtime": 25.4826, |
|
"eval_samples_per_second": 78.485, |
|
"eval_steps_per_second": 1.256, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 0.00015398768562115175, |
|
"loss": 0.7203, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 0.00015355306048533138, |
|
"loss": 0.7184, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 0.00015311843534951105, |
|
"loss": 0.7183, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 0.00015268381021369068, |
|
"loss": 0.7267, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 0.00015224918507787032, |
|
"loss": 0.7299, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 0.00015181455994204998, |
|
"loss": 0.719, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 0.00015137993480622962, |
|
"loss": 0.7229, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 0.00015094530967040926, |
|
"loss": 0.7231, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 0.00015051068453458892, |
|
"loss": 0.7279, |
|
"step": 6980 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 0.00015007605939876856, |
|
"loss": 0.7252, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"eval_loss": 0.7288112640380859, |
|
"eval_runtime": 25.4887, |
|
"eval_samples_per_second": 78.466, |
|
"eval_steps_per_second": 1.255, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 0.0001496414342629482, |
|
"loss": 0.7148, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 0.00014920680912712786, |
|
"loss": 0.7147, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 0.0001487721839913075, |
|
"loss": 0.7209, |
|
"step": 7060 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.00014833755885548713, |
|
"loss": 0.724, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.00014790293371966676, |
|
"loss": 0.7256, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 0.0001474683085838464, |
|
"loss": 0.7246, |
|
"step": 7120 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 0.00014703368344802606, |
|
"loss": 0.7103, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 0.0001465990583122057, |
|
"loss": 0.7223, |
|
"step": 7160 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 0.00014616443317638534, |
|
"loss": 0.7149, |
|
"step": 7180 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 0.000145729808040565, |
|
"loss": 0.7214, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"eval_loss": 0.7280930876731873, |
|
"eval_runtime": 25.4883, |
|
"eval_samples_per_second": 78.467, |
|
"eval_steps_per_second": 1.255, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 0.00014529518290474464, |
|
"loss": 0.7118, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 0.00014486055776892427, |
|
"loss": 0.7171, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 0.00014442593263310394, |
|
"loss": 0.7191, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 0.00014399130749728357, |
|
"loss": 0.7155, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 0.0001435566823614632, |
|
"loss": 0.7198, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 0.00014312205722564287, |
|
"loss": 0.7188, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 0.0001426874320898225, |
|
"loss": 0.7236, |
|
"step": 7340 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 0.00014225280695400215, |
|
"loss": 0.712, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 0.0001418181818181818, |
|
"loss": 0.7181, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.00014138355668236145, |
|
"loss": 0.7198, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_loss": 0.7276077270507812, |
|
"eval_runtime": 25.4843, |
|
"eval_samples_per_second": 78.48, |
|
"eval_steps_per_second": 1.256, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.00014094893154654108, |
|
"loss": 0.7187, |
|
"step": 7420 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 0.00014051430641072075, |
|
"loss": 0.7153, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 0.00014007968127490038, |
|
"loss": 0.7208, |
|
"step": 7460 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 0.00013964505613908002, |
|
"loss": 0.7153, |
|
"step": 7480 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 0.00013921043100325968, |
|
"loss": 0.7207, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 0.00013877580586743932, |
|
"loss": 0.7167, |
|
"step": 7520 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 0.00013834118073161896, |
|
"loss": 0.7183, |
|
"step": 7540 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 0.00013792828685258964, |
|
"loss": 0.7196, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 0.00013749366171676928, |
|
"loss": 0.7233, |
|
"step": 7580 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 0.00013705903658094894, |
|
"loss": 0.7237, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"eval_loss": 0.7260885238647461, |
|
"eval_runtime": 25.503, |
|
"eval_samples_per_second": 78.422, |
|
"eval_steps_per_second": 1.255, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 0.00013662441144512855, |
|
"loss": 0.72, |
|
"step": 7620 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 0.0001361897863093082, |
|
"loss": 0.7094, |
|
"step": 7640 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 0.00013575516117348785, |
|
"loss": 0.7111, |
|
"step": 7660 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 0.00013532053603766749, |
|
"loss": 0.7182, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 0.00013488591090184715, |
|
"loss": 0.7182, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 0.00013445128576602679, |
|
"loss": 0.7183, |
|
"step": 7720 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 0.00013401666063020642, |
|
"loss": 0.7112, |
|
"step": 7740 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 0.00013358203549438609, |
|
"loss": 0.7183, |
|
"step": 7760 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 0.00013314741035856572, |
|
"loss": 0.7152, |
|
"step": 7780 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 0.00013271278522274536, |
|
"loss": 0.7233, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"eval_loss": 0.7252987027168274, |
|
"eval_runtime": 25.5066, |
|
"eval_samples_per_second": 78.411, |
|
"eval_steps_per_second": 1.255, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 0.00013227816008692502, |
|
"loss": 0.7124, |
|
"step": 7820 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 0.00013184353495110466, |
|
"loss": 0.7109, |
|
"step": 7840 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 0.0001314089098152843, |
|
"loss": 0.7132, |
|
"step": 7860 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 0.00013097428467946396, |
|
"loss": 0.7157, |
|
"step": 7880 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 0.0001305396595436436, |
|
"loss": 0.7237, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 0.00013010503440782323, |
|
"loss": 0.7176, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 0.0001296704092720029, |
|
"loss": 0.7199, |
|
"step": 7940 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 0.00012923578413618253, |
|
"loss": 0.7119, |
|
"step": 7960 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 0.00012880115900036217, |
|
"loss": 0.717, |
|
"step": 7980 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 0.00012836653386454183, |
|
"loss": 0.7155, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"eval_loss": 0.7248360514640808, |
|
"eval_runtime": 25.5301, |
|
"eval_samples_per_second": 78.339, |
|
"eval_steps_per_second": 1.253, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 0.00012793190872872147, |
|
"loss": 0.7085, |
|
"step": 8020 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 0.0001274972835929011, |
|
"loss": 0.7174, |
|
"step": 8040 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 0.00012706265845708077, |
|
"loss": 0.7224, |
|
"step": 8060 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 0.0001266280333212604, |
|
"loss": 0.7169, |
|
"step": 8080 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 0.00012619340818544004, |
|
"loss": 0.7191, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 0.0001257587830496197, |
|
"loss": 0.7179, |
|
"step": 8120 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 0.00012532415791379934, |
|
"loss": 0.7208, |
|
"step": 8140 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 0.00012488953277797898, |
|
"loss": 0.7168, |
|
"step": 8160 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 0.00012445490764215864, |
|
"loss": 0.7101, |
|
"step": 8180 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 0.00012402028250633828, |
|
"loss": 0.7167, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"eval_loss": 0.7242170572280884, |
|
"eval_runtime": 25.4873, |
|
"eval_samples_per_second": 78.47, |
|
"eval_steps_per_second": 1.256, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 0.00012358565737051791, |
|
"loss": 0.7062, |
|
"step": 8220 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 0.00012315103223469758, |
|
"loss": 0.7177, |
|
"step": 8240 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 0.00012271640709887721, |
|
"loss": 0.7035, |
|
"step": 8260 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 0.00012228178196305685, |
|
"loss": 0.7157, |
|
"step": 8280 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 0.0001218471568272365, |
|
"loss": 0.7196, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 0.00012141253169141615, |
|
"loss": 0.7105, |
|
"step": 8320 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 0.00012097790655559579, |
|
"loss": 0.7105, |
|
"step": 8340 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 0.00012054328141977544, |
|
"loss": 0.7139, |
|
"step": 8360 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 0.00012010865628395509, |
|
"loss": 0.7215, |
|
"step": 8380 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 0.00011967403114813472, |
|
"loss": 0.725, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"eval_loss": 0.7237139344215393, |
|
"eval_runtime": 25.506, |
|
"eval_samples_per_second": 78.413, |
|
"eval_steps_per_second": 1.255, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 0.00011923940601231437, |
|
"loss": 0.7107, |
|
"step": 8420 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 0.00011880478087649402, |
|
"loss": 0.7095, |
|
"step": 8440 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 0.00011837015574067366, |
|
"loss": 0.7061, |
|
"step": 8460 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 0.0001179355306048533, |
|
"loss": 0.716, |
|
"step": 8480 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 0.00011750090546903295, |
|
"loss": 0.7203, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 0.00011706628033321258, |
|
"loss": 0.7098, |
|
"step": 8520 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 0.00011663165519739223, |
|
"loss": 0.7104, |
|
"step": 8540 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 0.00011619703006157188, |
|
"loss": 0.7051, |
|
"step": 8560 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 0.00011576240492575152, |
|
"loss": 0.7198, |
|
"step": 8580 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 0.00011532777978993117, |
|
"loss": 0.7175, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"eval_loss": 0.7230754494667053, |
|
"eval_runtime": 25.5133, |
|
"eval_samples_per_second": 78.39, |
|
"eval_steps_per_second": 1.254, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 0.00011489315465411082, |
|
"loss": 0.7046, |
|
"step": 8620 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 0.00011445852951829046, |
|
"loss": 0.7176, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 0.0001140239043824701, |
|
"loss": 0.7193, |
|
"step": 8660 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 0.00011358927924664976, |
|
"loss": 0.7046, |
|
"step": 8680 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 0.00011315465411082939, |
|
"loss": 0.7116, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 0.00011274176023180006, |
|
"loss": 0.7152, |
|
"step": 8720 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 0.00011230713509597971, |
|
"loss": 0.7164, |
|
"step": 8740 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 0.00011187250996015936, |
|
"loss": 0.7192, |
|
"step": 8760 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 0.000111437884824339, |
|
"loss": 0.7124, |
|
"step": 8780 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 0.00011100325968851865, |
|
"loss": 0.7032, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"eval_loss": 0.7217770218849182, |
|
"eval_runtime": 25.4723, |
|
"eval_samples_per_second": 78.517, |
|
"eval_steps_per_second": 1.256, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 0.0001105686345526983, |
|
"loss": 0.7157, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 0.00011013400941687794, |
|
"loss": 0.7115, |
|
"step": 8840 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 0.00010969938428105759, |
|
"loss": 0.7137, |
|
"step": 8860 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 0.00010926475914523724, |
|
"loss": 0.7176, |
|
"step": 8880 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 0.00010883013400941687, |
|
"loss": 0.7081, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 0.00010839550887359652, |
|
"loss": 0.7233, |
|
"step": 8920 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 0.00010796088373777617, |
|
"loss": 0.7058, |
|
"step": 8940 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 0.00010752625860195581, |
|
"loss": 0.7154, |
|
"step": 8960 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 0.00010709163346613546, |
|
"loss": 0.7135, |
|
"step": 8980 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 0.00010665700833031508, |
|
"loss": 0.7078, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"eval_loss": 0.7215875387191772, |
|
"eval_runtime": 25.484, |
|
"eval_samples_per_second": 78.481, |
|
"eval_steps_per_second": 1.256, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 0.00010622238319449473, |
|
"loss": 0.7061, |
|
"step": 9020 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 0.00010578775805867438, |
|
"loss": 0.7174, |
|
"step": 9040 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 0.00010535313292285402, |
|
"loss": 0.7132, |
|
"step": 9060 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 0.00010491850778703367, |
|
"loss": 0.7247, |
|
"step": 9080 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 0.00010448388265121332, |
|
"loss": 0.7064, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 0.00010404925751539295, |
|
"loss": 0.7098, |
|
"step": 9120 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 0.0001036146323795726, |
|
"loss": 0.708, |
|
"step": 9140 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 0.00010318000724375225, |
|
"loss": 0.7144, |
|
"step": 9160 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 0.00010274538210793189, |
|
"loss": 0.7151, |
|
"step": 9180 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 0.00010231075697211154, |
|
"loss": 0.718, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"eval_loss": 0.7208251357078552, |
|
"eval_runtime": 25.5022, |
|
"eval_samples_per_second": 78.425, |
|
"eval_steps_per_second": 1.255, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 0.00010187613183629119, |
|
"loss": 0.7108, |
|
"step": 9220 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 0.00010144150670047083, |
|
"loss": 0.6952, |
|
"step": 9240 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.00010100688156465048, |
|
"loss": 0.7013, |
|
"step": 9260 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.00010057225642883013, |
|
"loss": 0.7013, |
|
"step": 9280 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 0.00010013763129300976, |
|
"loss": 0.7049, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 9.970300615718941e-05, |
|
"loss": 0.7093, |
|
"step": 9320 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 9.926838102136906e-05, |
|
"loss": 0.713, |
|
"step": 9340 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 9.88337558855487e-05, |
|
"loss": 0.7108, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 9.839913074972835e-05, |
|
"loss": 0.7115, |
|
"step": 9380 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 9.7964505613908e-05, |
|
"loss": 0.7119, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"eval_loss": 0.7202969789505005, |
|
"eval_runtime": 25.504, |
|
"eval_samples_per_second": 78.419, |
|
"eval_steps_per_second": 1.255, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 9.752988047808764e-05, |
|
"loss": 0.7107, |
|
"step": 9420 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 9.709525534226729e-05, |
|
"loss": 0.7065, |
|
"step": 9440 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 9.666063020644694e-05, |
|
"loss": 0.7121, |
|
"step": 9460 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 9.622600507062657e-05, |
|
"loss": 0.7163, |
|
"step": 9480 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 9.579137993480622e-05, |
|
"loss": 0.7026, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 9.535675479898587e-05, |
|
"loss": 0.7158, |
|
"step": 9520 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 9.492212966316551e-05, |
|
"loss": 0.7016, |
|
"step": 9540 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 9.448750452734516e-05, |
|
"loss": 0.7149, |
|
"step": 9560 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 9.405287939152481e-05, |
|
"loss": 0.7079, |
|
"step": 9580 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 9.361825425570445e-05, |
|
"loss": 0.709, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"eval_loss": 0.7194134593009949, |
|
"eval_runtime": 25.5286, |
|
"eval_samples_per_second": 78.343, |
|
"eval_steps_per_second": 1.253, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 9.31836291198841e-05, |
|
"loss": 0.7127, |
|
"step": 9620 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 9.274900398406375e-05, |
|
"loss": 0.7037, |
|
"step": 9640 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 9.231437884824338e-05, |
|
"loss": 0.7114, |
|
"step": 9660 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 9.187975371242303e-05, |
|
"loss": 0.706, |
|
"step": 9680 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 9.144512857660268e-05, |
|
"loss": 0.7026, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 9.101050344078232e-05, |
|
"loss": 0.7079, |
|
"step": 9720 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 9.057587830496197e-05, |
|
"loss": 0.7053, |
|
"step": 9740 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 9.014125316914162e-05, |
|
"loss": 0.7125, |
|
"step": 9760 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 8.970662803332126e-05, |
|
"loss": 0.7045, |
|
"step": 9780 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 8.92720028975009e-05, |
|
"loss": 0.7109, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"eval_loss": 0.7186465859413147, |
|
"eval_runtime": 25.5049, |
|
"eval_samples_per_second": 78.416, |
|
"eval_steps_per_second": 1.255, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 8.883737776168056e-05, |
|
"loss": 0.7035, |
|
"step": 9820 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 8.840275262586019e-05, |
|
"loss": 0.7073, |
|
"step": 9840 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 8.796812749003983e-05, |
|
"loss": 0.7114, |
|
"step": 9860 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 8.753350235421946e-05, |
|
"loss": 0.7066, |
|
"step": 9880 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 8.709887721839911e-05, |
|
"loss": 0.7055, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 8.666425208257877e-05, |
|
"loss": 0.7064, |
|
"step": 9920 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 8.62296269467584e-05, |
|
"loss": 0.7154, |
|
"step": 9940 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 8.579500181093805e-05, |
|
"loss": 0.7099, |
|
"step": 9960 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 8.53603766751177e-05, |
|
"loss": 0.7112, |
|
"step": 9980 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 8.492575153929734e-05, |
|
"loss": 0.7086, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"eval_loss": 0.7181739211082458, |
|
"eval_runtime": 25.5087, |
|
"eval_samples_per_second": 78.405, |
|
"eval_steps_per_second": 1.254, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 8.449112640347699e-05, |
|
"loss": 0.7155, |
|
"step": 10020 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 8.405650126765664e-05, |
|
"loss": 0.7097, |
|
"step": 10040 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 8.362187613183627e-05, |
|
"loss": 0.7025, |
|
"step": 10060 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 8.318725099601592e-05, |
|
"loss": 0.7065, |
|
"step": 10080 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 8.275262586019557e-05, |
|
"loss": 0.6982, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 8.231800072437521e-05, |
|
"loss": 0.7039, |
|
"step": 10120 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 8.188337558855486e-05, |
|
"loss": 0.7097, |
|
"step": 10140 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 8.144875045273451e-05, |
|
"loss": 0.7089, |
|
"step": 10160 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 8.101412531691415e-05, |
|
"loss": 0.7018, |
|
"step": 10180 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 8.05795001810938e-05, |
|
"loss": 0.7025, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"eval_loss": 0.7179592251777649, |
|
"eval_runtime": 25.4993, |
|
"eval_samples_per_second": 78.433, |
|
"eval_steps_per_second": 1.255, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 8.014487504527345e-05, |
|
"loss": 0.7067, |
|
"step": 10220 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 7.971024990945308e-05, |
|
"loss": 0.71, |
|
"step": 10240 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 7.927562477363273e-05, |
|
"loss": 0.7255, |
|
"step": 10260 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 7.884099963781238e-05, |
|
"loss": 0.7065, |
|
"step": 10280 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 7.840637450199202e-05, |
|
"loss": 0.712, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 7.797174936617167e-05, |
|
"loss": 0.7132, |
|
"step": 10320 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 7.753712423035132e-05, |
|
"loss": 0.7106, |
|
"step": 10340 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 7.710249909453096e-05, |
|
"loss": 0.708, |
|
"step": 10360 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 7.666787395871061e-05, |
|
"loss": 0.7054, |
|
"step": 10380 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 7.623324882289026e-05, |
|
"loss": 0.7087, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"eval_loss": 0.717901349067688, |
|
"eval_runtime": 25.4862, |
|
"eval_samples_per_second": 78.474, |
|
"eval_steps_per_second": 1.256, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 7.57986236870699e-05, |
|
"loss": 0.7014, |
|
"step": 10420 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 7.536399855124954e-05, |
|
"loss": 0.7103, |
|
"step": 10440 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 7.49293734154292e-05, |
|
"loss": 0.7089, |
|
"step": 10460 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 7.449474827960883e-05, |
|
"loss": 0.704, |
|
"step": 10480 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 7.406012314378847e-05, |
|
"loss": 0.7074, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 7.362549800796812e-05, |
|
"loss": 0.7094, |
|
"step": 10520 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 7.319087287214777e-05, |
|
"loss": 0.7069, |
|
"step": 10540 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 7.27562477363274e-05, |
|
"loss": 0.7081, |
|
"step": 10560 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 7.232162260050705e-05, |
|
"loss": 0.7036, |
|
"step": 10580 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 7.18869974646867e-05, |
|
"loss": 0.6984, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"eval_loss": 0.7175166010856628, |
|
"eval_runtime": 25.5016, |
|
"eval_samples_per_second": 78.426, |
|
"eval_steps_per_second": 1.255, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 7.145237232886634e-05, |
|
"loss": 0.7097, |
|
"step": 10620 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 7.101774719304599e-05, |
|
"loss": 0.7143, |
|
"step": 10640 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 7.058312205722564e-05, |
|
"loss": 0.7099, |
|
"step": 10660 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 7.014849692140528e-05, |
|
"loss": 0.6994, |
|
"step": 10680 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 6.971387178558493e-05, |
|
"loss": 0.7129, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 6.927924664976458e-05, |
|
"loss": 0.7067, |
|
"step": 10720 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 6.884462151394421e-05, |
|
"loss": 0.7044, |
|
"step": 10740 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 6.840999637812386e-05, |
|
"loss": 0.7092, |
|
"step": 10760 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 6.797537124230351e-05, |
|
"loss": 0.7075, |
|
"step": 10780 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 6.754074610648315e-05, |
|
"loss": 0.7073, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"eval_loss": 0.7168901562690735, |
|
"eval_runtime": 25.5153, |
|
"eval_samples_per_second": 78.384, |
|
"eval_steps_per_second": 1.254, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 6.71061209706628e-05, |
|
"loss": 0.7088, |
|
"step": 10820 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 6.667149583484245e-05, |
|
"loss": 0.7046, |
|
"step": 10840 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 6.623687069902209e-05, |
|
"loss": 0.7029, |
|
"step": 10860 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 6.580224556320174e-05, |
|
"loss": 0.7055, |
|
"step": 10880 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 6.536762042738139e-05, |
|
"loss": 0.7095, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 6.493299529156102e-05, |
|
"loss": 0.7057, |
|
"step": 10920 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 6.449837015574066e-05, |
|
"loss": 0.7064, |
|
"step": 10940 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 6.406374501992031e-05, |
|
"loss": 0.7039, |
|
"step": 10960 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 6.362911988409996e-05, |
|
"loss": 0.7109, |
|
"step": 10980 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 6.31944947482796e-05, |
|
"loss": 0.7051, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"eval_loss": 0.7164381146430969, |
|
"eval_runtime": 25.4817, |
|
"eval_samples_per_second": 78.488, |
|
"eval_steps_per_second": 1.256, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 6.275986961245924e-05, |
|
"loss": 0.7117, |
|
"step": 11020 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 6.23252444766389e-05, |
|
"loss": 0.6972, |
|
"step": 11040 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 6.189061934081853e-05, |
|
"loss": 0.7087, |
|
"step": 11060 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 6.145599420499818e-05, |
|
"loss": 0.703, |
|
"step": 11080 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 6.1021369069177825e-05, |
|
"loss": 0.7062, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 6.0586743933357475e-05, |
|
"loss": 0.7018, |
|
"step": 11120 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 6.015211879753712e-05, |
|
"loss": 0.7003, |
|
"step": 11140 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 5.971749366171676e-05, |
|
"loss": 0.7005, |
|
"step": 11160 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 5.928286852589641e-05, |
|
"loss": 0.7099, |
|
"step": 11180 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 5.8848243390076054e-05, |
|
"loss": 0.7002, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"eval_loss": 0.7161288857460022, |
|
"eval_runtime": 25.5084, |
|
"eval_samples_per_second": 78.406, |
|
"eval_steps_per_second": 1.254, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 5.84136182542557e-05, |
|
"loss": 0.7071, |
|
"step": 11220 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 5.797899311843535e-05, |
|
"loss": 0.7028, |
|
"step": 11240 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 5.754436798261499e-05, |
|
"loss": 0.7199, |
|
"step": 11260 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 5.7109742846794634e-05, |
|
"loss": 0.6974, |
|
"step": 11280 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 5.6675117710974284e-05, |
|
"loss": 0.7003, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 5.624049257515393e-05, |
|
"loss": 0.7079, |
|
"step": 11320 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 5.580586743933357e-05, |
|
"loss": 0.6988, |
|
"step": 11340 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 5.537124230351322e-05, |
|
"loss": 0.7047, |
|
"step": 11360 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 5.493661716769286e-05, |
|
"loss": 0.6946, |
|
"step": 11380 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 5.45019920318725e-05, |
|
"loss": 0.7096, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"eval_loss": 0.7155815958976746, |
|
"eval_runtime": 25.525, |
|
"eval_samples_per_second": 78.355, |
|
"eval_steps_per_second": 1.254, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 5.406736689605215e-05, |
|
"loss": 0.709, |
|
"step": 11420 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 5.3632741760231794e-05, |
|
"loss": 0.7112, |
|
"step": 11440 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 5.319811662441144e-05, |
|
"loss": 0.6983, |
|
"step": 11460 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 5.276349148859109e-05, |
|
"loss": 0.7, |
|
"step": 11480 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 5.232886635277073e-05, |
|
"loss": 0.7006, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 5.189424121695037e-05, |
|
"loss": 0.7068, |
|
"step": 11520 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 5.1459616081130023e-05, |
|
"loss": 0.7012, |
|
"step": 11540 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 5.102499094530967e-05, |
|
"loss": 0.7079, |
|
"step": 11560 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 5.059036580948931e-05, |
|
"loss": 0.7031, |
|
"step": 11580 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 5.015574067366896e-05, |
|
"loss": 0.7038, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"eval_loss": 0.7149330973625183, |
|
"eval_runtime": 25.4843, |
|
"eval_samples_per_second": 78.48, |
|
"eval_steps_per_second": 1.256, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 4.97211155378486e-05, |
|
"loss": 0.6972, |
|
"step": 11620 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 4.9286490402028246e-05, |
|
"loss": 0.7039, |
|
"step": 11640 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 4.885186526620789e-05, |
|
"loss": 0.7052, |
|
"step": 11660 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 4.841724013038754e-05, |
|
"loss": 0.7045, |
|
"step": 11680 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 4.798261499456718e-05, |
|
"loss": 0.701, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 4.7547989858746826e-05, |
|
"loss": 0.7084, |
|
"step": 11720 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 4.7113364722926476e-05, |
|
"loss": 0.6988, |
|
"step": 11740 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 4.667873958710612e-05, |
|
"loss": 0.7155, |
|
"step": 11760 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 4.624411445128576e-05, |
|
"loss": 0.7044, |
|
"step": 11780 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 4.5809489315465406e-05, |
|
"loss": 0.7014, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"eval_loss": 0.714367151260376, |
|
"eval_runtime": 25.4959, |
|
"eval_samples_per_second": 78.444, |
|
"eval_steps_per_second": 1.255, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 4.537486417964505e-05, |
|
"loss": 0.708, |
|
"step": 11820 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 4.494023904382469e-05, |
|
"loss": 0.6976, |
|
"step": 11840 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 4.450561390800434e-05, |
|
"loss": 0.7057, |
|
"step": 11860 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 4.4070988772183986e-05, |
|
"loss": 0.7039, |
|
"step": 11880 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 4.363636363636363e-05, |
|
"loss": 0.7089, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 4.320173850054328e-05, |
|
"loss": 0.7026, |
|
"step": 11920 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 4.276711336472292e-05, |
|
"loss": 0.7023, |
|
"step": 11940 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 4.2332488228902565e-05, |
|
"loss": 0.7006, |
|
"step": 11960 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 4.1897863093082215e-05, |
|
"loss": 0.7008, |
|
"step": 11980 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 4.146323795726186e-05, |
|
"loss": 0.7057, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"eval_loss": 0.7141902446746826, |
|
"eval_runtime": 25.5019, |
|
"eval_samples_per_second": 78.426, |
|
"eval_steps_per_second": 1.255, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 4.10286128214415e-05, |
|
"loss": 0.7083, |
|
"step": 12020 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 4.059398768562115e-05, |
|
"loss": 0.6986, |
|
"step": 12040 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 4.0159362549800795e-05, |
|
"loss": 0.7076, |
|
"step": 12060 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 3.972473741398044e-05, |
|
"loss": 0.7071, |
|
"step": 12080 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 3.929011227816009e-05, |
|
"loss": 0.6984, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 3.885548714233973e-05, |
|
"loss": 0.7096, |
|
"step": 12120 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 3.8420862006519375e-05, |
|
"loss": 0.7027, |
|
"step": 12140 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 3.7986236870699025e-05, |
|
"loss": 0.7062, |
|
"step": 12160 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 3.755161173487867e-05, |
|
"loss": 0.7049, |
|
"step": 12180 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 3.711698659905831e-05, |
|
"loss": 0.7052, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"eval_loss": 0.7140177488327026, |
|
"eval_runtime": 25.4673, |
|
"eval_samples_per_second": 78.532, |
|
"eval_steps_per_second": 1.257, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 3.6682361463237955e-05, |
|
"loss": 0.7011, |
|
"step": 12220 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 3.62477363274176e-05, |
|
"loss": 0.7025, |
|
"step": 12240 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 3.581311119159725e-05, |
|
"loss": 0.7006, |
|
"step": 12260 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 3.537848605577689e-05, |
|
"loss": 0.7073, |
|
"step": 12280 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 3.4943860919956534e-05, |
|
"loss": 0.7033, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 3.4509235784136184e-05, |
|
"loss": 0.6992, |
|
"step": 12320 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 3.407461064831582e-05, |
|
"loss": 0.7043, |
|
"step": 12340 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 3.363998551249547e-05, |
|
"loss": 0.7083, |
|
"step": 12360 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 3.3205360376675114e-05, |
|
"loss": 0.7086, |
|
"step": 12380 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 3.277073524085476e-05, |
|
"loss": 0.7168, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"eval_loss": 0.7138265371322632, |
|
"eval_runtime": 25.5077, |
|
"eval_samples_per_second": 78.408, |
|
"eval_steps_per_second": 1.255, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 3.233611010503441e-05, |
|
"loss": 0.7026, |
|
"step": 12420 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 3.190148496921405e-05, |
|
"loss": 0.7097, |
|
"step": 12440 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 3.1466859833393694e-05, |
|
"loss": 0.7094, |
|
"step": 12460 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 3.1032234697573344e-05, |
|
"loss": 0.6971, |
|
"step": 12480 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 3.059760956175299e-05, |
|
"loss": 0.6977, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 3.016298442593263e-05, |
|
"loss": 0.6945, |
|
"step": 12520 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 2.9728359290112277e-05, |
|
"loss": 0.6998, |
|
"step": 12540 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 2.929373415429192e-05, |
|
"loss": 0.7067, |
|
"step": 12560 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 2.8859109018471563e-05, |
|
"loss": 0.6935, |
|
"step": 12580 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 2.842448388265121e-05, |
|
"loss": 0.6927, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"eval_loss": 0.7132371664047241, |
|
"eval_runtime": 25.516, |
|
"eval_samples_per_second": 78.382, |
|
"eval_steps_per_second": 1.254, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 2.7989858746830857e-05, |
|
"loss": 0.7025, |
|
"step": 12620 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 2.75552336110105e-05, |
|
"loss": 0.7098, |
|
"step": 12640 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 2.7120608475190147e-05, |
|
"loss": 0.6939, |
|
"step": 12660 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 2.6685983339369793e-05, |
|
"loss": 0.7038, |
|
"step": 12680 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 2.6251358203549436e-05, |
|
"loss": 0.7039, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 2.5816733067729083e-05, |
|
"loss": 0.7018, |
|
"step": 12720 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 2.538210793190873e-05, |
|
"loss": 0.6943, |
|
"step": 12740 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 2.4947482796088373e-05, |
|
"loss": 0.7007, |
|
"step": 12760 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 2.4512857660268016e-05, |
|
"loss": 0.7019, |
|
"step": 12780 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 2.407823252444766e-05, |
|
"loss": 0.6957, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"eval_loss": 0.7126932144165039, |
|
"eval_runtime": 25.4915, |
|
"eval_samples_per_second": 78.458, |
|
"eval_steps_per_second": 1.255, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 2.3643607388627306e-05, |
|
"loss": 0.6993, |
|
"step": 12820 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 2.3208982252806953e-05, |
|
"loss": 0.6951, |
|
"step": 12840 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 2.2774357116986596e-05, |
|
"loss": 0.7056, |
|
"step": 12860 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 2.2339731981166243e-05, |
|
"loss": 0.7153, |
|
"step": 12880 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 2.190510684534589e-05, |
|
"loss": 0.7022, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 2.1470481709525532e-05, |
|
"loss": 0.7078, |
|
"step": 12920 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 2.103585657370518e-05, |
|
"loss": 0.6969, |
|
"step": 12940 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 2.0601231437884826e-05, |
|
"loss": 0.7056, |
|
"step": 12960 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 2.016660630206447e-05, |
|
"loss": 0.6975, |
|
"step": 12980 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 1.9731981166244112e-05, |
|
"loss": 0.7065, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"eval_loss": 0.7130131721496582, |
|
"eval_runtime": 25.4905, |
|
"eval_samples_per_second": 78.461, |
|
"eval_steps_per_second": 1.255, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 1.9297356030423755e-05, |
|
"loss": 0.7, |
|
"step": 13020 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 1.8862730894603402e-05, |
|
"loss": 0.7144, |
|
"step": 13040 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 1.842810575878305e-05, |
|
"loss": 0.6964, |
|
"step": 13060 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 1.7993480622962692e-05, |
|
"loss": 0.6981, |
|
"step": 13080 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 1.755885548714234e-05, |
|
"loss": 0.7102, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 1.7124230351321985e-05, |
|
"loss": 0.6975, |
|
"step": 13120 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 1.668960521550163e-05, |
|
"loss": 0.7062, |
|
"step": 13140 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 1.625498007968127e-05, |
|
"loss": 0.6956, |
|
"step": 13160 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 1.5820354943860918e-05, |
|
"loss": 0.71, |
|
"step": 13180 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 1.5385729808040565e-05, |
|
"loss": 0.7081, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"eval_loss": 0.7126001119613647, |
|
"eval_runtime": 25.5102, |
|
"eval_samples_per_second": 78.4, |
|
"eval_steps_per_second": 1.254, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 1.495110467222021e-05, |
|
"loss": 0.6977, |
|
"step": 13220 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 1.4516479536399855e-05, |
|
"loss": 0.705, |
|
"step": 13240 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 1.4081854400579498e-05, |
|
"loss": 0.7016, |
|
"step": 13260 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 1.3647229264759143e-05, |
|
"loss": 0.6922, |
|
"step": 13280 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 1.321260412893879e-05, |
|
"loss": 0.6987, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 1.2777978993118434e-05, |
|
"loss": 0.7041, |
|
"step": 13320 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 1.234335385729808e-05, |
|
"loss": 0.7101, |
|
"step": 13340 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 1.1908728721477723e-05, |
|
"loss": 0.6976, |
|
"step": 13360 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.147410358565737e-05, |
|
"loss": 0.7011, |
|
"step": 13380 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.1039478449837014e-05, |
|
"loss": 0.6973, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"eval_loss": 0.7123447060585022, |
|
"eval_runtime": 25.5029, |
|
"eval_samples_per_second": 78.422, |
|
"eval_steps_per_second": 1.255, |
|
"step": 13400 |
|
} |
|
], |
|
"max_steps": 13905, |
|
"num_train_epochs": 3, |
|
"total_flos": 6.775116663531084e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|