| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.989247311827957, |
| "eval_steps": 500, |
| "global_step": 696, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.043010752688172046, |
| "grad_norm": 3.3071749210357666, |
| "learning_rate": 2.9984721919587606e-05, |
| "loss": 0.9267, |
| "num_input_tokens_seen": 26208, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.08602150537634409, |
| "grad_norm": 1.9867345094680786, |
| "learning_rate": 2.9938918800982563e-05, |
| "loss": 0.1561, |
| "num_input_tokens_seen": 53152, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.12903225806451613, |
| "grad_norm": 10.124670028686523, |
| "learning_rate": 2.9862683948682103e-05, |
| "loss": 0.1764, |
| "num_input_tokens_seen": 79776, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.17204301075268819, |
| "grad_norm": 2.0668740272521973, |
| "learning_rate": 2.975617265898004e-05, |
| "loss": 0.1505, |
| "num_input_tokens_seen": 106496, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.21505376344086022, |
| "grad_norm": 2.0054426193237305, |
| "learning_rate": 2.961960190361624e-05, |
| "loss": 0.1615, |
| "num_input_tokens_seen": 133728, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.25806451612903225, |
| "grad_norm": 2.5156946182250977, |
| "learning_rate": 2.9453249887788343e-05, |
| "loss": 0.1369, |
| "num_input_tokens_seen": 159936, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.3010752688172043, |
| "grad_norm": 1.1180003881454468, |
| "learning_rate": 2.925745548342631e-05, |
| "loss": 0.1253, |
| "num_input_tokens_seen": 186240, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.34408602150537637, |
| "grad_norm": 13.929261207580566, |
| "learning_rate": 2.9032617538884018e-05, |
| "loss": 0.1479, |
| "num_input_tokens_seen": 212832, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.3870967741935484, |
| "grad_norm": 2.071075439453125, |
| "learning_rate": 2.877919406645433e-05, |
| "loss": 0.1102, |
| "num_input_tokens_seen": 239648, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.43010752688172044, |
| "grad_norm": 2.170992374420166, |
| "learning_rate": 2.84977013093626e-05, |
| "loss": 0.099, |
| "num_input_tokens_seen": 265888, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.4731182795698925, |
| "grad_norm": 5.617093086242676, |
| "learning_rate": 2.818871269013928e-05, |
| "loss": 0.107, |
| "num_input_tokens_seen": 292480, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.5161290322580645, |
| "grad_norm": 6.3603010177612305, |
| "learning_rate": 2.7852857642513838e-05, |
| "loss": 0.1183, |
| "num_input_tokens_seen": 318784, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.5591397849462365, |
| "grad_norm": 3.8436505794525146, |
| "learning_rate": 2.7490820329209546e-05, |
| "loss": 0.1097, |
| "num_input_tokens_seen": 346016, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.6021505376344086, |
| "grad_norm": 1.736432671546936, |
| "learning_rate": 2.7103338248251055e-05, |
| "loss": 0.0946, |
| "num_input_tokens_seen": 372384, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.6451612903225806, |
| "grad_norm": 8.300951957702637, |
| "learning_rate": 2.6691200730623874e-05, |
| "loss": 0.1251, |
| "num_input_tokens_seen": 399328, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.6881720430107527, |
| "grad_norm": 2.540724277496338, |
| "learning_rate": 2.6255247332346036e-05, |
| "loss": 0.1069, |
| "num_input_tokens_seen": 426048, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.7311827956989247, |
| "grad_norm": 1.967483639717102, |
| "learning_rate": 2.5796366124227532e-05, |
| "loss": 0.0904, |
| "num_input_tokens_seen": 452640, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.7741935483870968, |
| "grad_norm": 5.206757545471191, |
| "learning_rate": 2.531549188280135e-05, |
| "loss": 0.1273, |
| "num_input_tokens_seen": 479808, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.8172043010752689, |
| "grad_norm": 3.0387344360351562, |
| "learning_rate": 2.481360418611132e-05, |
| "loss": 0.1206, |
| "num_input_tokens_seen": 506176, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.8602150537634409, |
| "grad_norm": 2.0281670093536377, |
| "learning_rate": 2.4291725418235848e-05, |
| "loss": 0.103, |
| "num_input_tokens_seen": 533216, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.9032258064516129, |
| "grad_norm": 2.5650763511657715, |
| "learning_rate": 2.3750918686612414e-05, |
| "loss": 0.0696, |
| "num_input_tokens_seen": 561056, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.946236559139785, |
| "grad_norm": 8.955713272094727, |
| "learning_rate": 2.3192285656405456e-05, |
| "loss": 0.0822, |
| "num_input_tokens_seen": 588160, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.989247311827957, |
| "grad_norm": 3.034013032913208, |
| "learning_rate": 2.2616964306329183e-05, |
| "loss": 0.0913, |
| "num_input_tokens_seen": 615168, |
| "step": 230 |
| }, |
| { |
| "epoch": 1.0301075268817204, |
| "grad_norm": 2.60020112991333, |
| "learning_rate": 2.2026126610496852e-05, |
| "loss": 0.0735, |
| "num_input_tokens_seen": 639864, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.0731182795698926, |
| "grad_norm": 4.891764163970947, |
| "learning_rate": 2.1420976151018813e-05, |
| "loss": 0.0752, |
| "num_input_tokens_seen": 667224, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.1161290322580646, |
| "grad_norm": 1.1149002313613892, |
| "learning_rate": 2.0802745666212592e-05, |
| "loss": 0.0588, |
| "num_input_tokens_seen": 693848, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.1591397849462366, |
| "grad_norm": 3.1601271629333496, |
| "learning_rate": 2.0172694539419557e-05, |
| "loss": 0.0924, |
| "num_input_tokens_seen": 720568, |
| "step": 270 |
| }, |
| { |
| "epoch": 1.2021505376344086, |
| "grad_norm": 3.555192470550537, |
| "learning_rate": 1.953210623354359e-05, |
| "loss": 0.062, |
| "num_input_tokens_seen": 746872, |
| "step": 280 |
| }, |
| { |
| "epoch": 1.2451612903225806, |
| "grad_norm": 2.235698699951172, |
| "learning_rate": 1.888228567653781e-05, |
| "loss": 0.0621, |
| "num_input_tokens_seen": 773720, |
| "step": 290 |
| }, |
| { |
| "epoch": 1.2881720430107526, |
| "grad_norm": 2.9058539867401123, |
| "learning_rate": 1.8224556603165363e-05, |
| "loss": 0.075, |
| "num_input_tokens_seen": 801464, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.3311827956989246, |
| "grad_norm": 3.204787015914917, |
| "learning_rate": 1.7560258858449248e-05, |
| "loss": 0.0858, |
| "num_input_tokens_seen": 829144, |
| "step": 310 |
| }, |
| { |
| "epoch": 1.3741935483870968, |
| "grad_norm": 1.0885004997253418, |
| "learning_rate": 1.689074566830434e-05, |
| "loss": 0.0697, |
| "num_input_tokens_seen": 855672, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.4172043010752688, |
| "grad_norm": 3.0750925540924072, |
| "learning_rate": 1.621738088291147e-05, |
| "loss": 0.0827, |
| "num_input_tokens_seen": 882424, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.4602150537634409, |
| "grad_norm": 2.689297914505005, |
| "learning_rate": 1.5541536198449044e-05, |
| "loss": 0.0651, |
| "num_input_tokens_seen": 908792, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.5032258064516129, |
| "grad_norm": 2.297851324081421, |
| "learning_rate": 1.4864588362841808e-05, |
| "loss": 0.0607, |
| "num_input_tokens_seen": 935672, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.546236559139785, |
| "grad_norm": 2.712674140930176, |
| "learning_rate": 1.4187916371218739e-05, |
| "loss": 0.056, |
| "num_input_tokens_seen": 961848, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.589247311827957, |
| "grad_norm": 0.8086225986480713, |
| "learning_rate": 1.3512898656793283e-05, |
| "loss": 0.0823, |
| "num_input_tokens_seen": 988600, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.632258064516129, |
| "grad_norm": 2.166210174560547, |
| "learning_rate": 1.2840910282888211e-05, |
| "loss": 0.058, |
| "num_input_tokens_seen": 1014840, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.675268817204301, |
| "grad_norm": 5.169621467590332, |
| "learning_rate": 1.2173320141825232e-05, |
| "loss": 0.0705, |
| "num_input_tokens_seen": 1040856, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.718279569892473, |
| "grad_norm": 1.8176069259643555, |
| "learning_rate": 1.1511488166385349e-05, |
| "loss": 0.0514, |
| "num_input_tokens_seen": 1067544, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.761290322580645, |
| "grad_norm": 3.424694776535034, |
| "learning_rate": 1.0856762559520605e-05, |
| "loss": 0.0834, |
| "num_input_tokens_seen": 1094584, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.8043010752688171, |
| "grad_norm": 1.8838876485824585, |
| "learning_rate": 1.0210477047960303e-05, |
| "loss": 0.0583, |
| "num_input_tokens_seen": 1120760, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.8473118279569891, |
| "grad_norm": 3.7757434844970703, |
| "learning_rate": 9.573948165306438e-06, |
| "loss": 0.0922, |
| "num_input_tokens_seen": 1146776, |
| "step": 430 |
| }, |
| { |
| "epoch": 1.8903225806451613, |
| "grad_norm": 3.0619328022003174, |
| "learning_rate": 8.948472570152874e-06, |
| "loss": 0.0633, |
| "num_input_tokens_seen": 1174424, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.9333333333333333, |
| "grad_norm": 2.5175821781158447, |
| "learning_rate": 8.33532440469145e-06, |
| "loss": 0.0597, |
| "num_input_tokens_seen": 1201048, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.9763440860215054, |
| "grad_norm": 3.6232197284698486, |
| "learning_rate": 7.735752699185711e-06, |
| "loss": 0.0491, |
| "num_input_tokens_seen": 1227576, |
| "step": 460 |
| }, |
| { |
| "epoch": 2.0172043010752687, |
| "grad_norm": 2.8846399784088135, |
| "learning_rate": 7.150978827599619e-06, |
| "loss": 0.0341, |
| "num_input_tokens_seen": 1252160, |
| "step": 470 |
| }, |
| { |
| "epoch": 2.0602150537634407, |
| "grad_norm": 2.196216106414795, |
| "learning_rate": 6.582194019564266e-06, |
| "loss": 0.0373, |
| "num_input_tokens_seen": 1279328, |
| "step": 480 |
| }, |
| { |
| "epoch": 2.1032258064516127, |
| "grad_norm": 1.4616115093231201, |
| "learning_rate": 6.0305569337509225e-06, |
| "loss": 0.0281, |
| "num_input_tokens_seen": 1306304, |
| "step": 490 |
| }, |
| { |
| "epoch": 2.146236559139785, |
| "grad_norm": 0.17581823468208313, |
| "learning_rate": 5.497191297593647e-06, |
| "loss": 0.0183, |
| "num_input_tokens_seen": 1333184, |
| "step": 500 |
| }, |
| { |
| "epoch": 2.189247311827957, |
| "grad_norm": 3.8919403553009033, |
| "learning_rate": 4.98318361816957e-06, |
| "loss": 0.0334, |
| "num_input_tokens_seen": 1359872, |
| "step": 510 |
| }, |
| { |
| "epoch": 2.232258064516129, |
| "grad_norm": 1.3041765689849854, |
| "learning_rate": 4.4895809688998655e-06, |
| "loss": 0.0282, |
| "num_input_tokens_seen": 1387328, |
| "step": 520 |
| }, |
| { |
| "epoch": 2.275268817204301, |
| "grad_norm": 1.669753074645996, |
| "learning_rate": 4.017388856580178e-06, |
| "loss": 0.0562, |
| "num_input_tokens_seen": 1414816, |
| "step": 530 |
| }, |
| { |
| "epoch": 2.318279569892473, |
| "grad_norm": 0.28061679005622864, |
| "learning_rate": 3.567569173085455e-06, |
| "loss": 0.0243, |
| "num_input_tokens_seen": 1441504, |
| "step": 540 |
| }, |
| { |
| "epoch": 2.361290322580645, |
| "grad_norm": 2.324270009994507, |
| "learning_rate": 3.1410382359217645e-06, |
| "loss": 0.044, |
| "num_input_tokens_seen": 1467680, |
| "step": 550 |
| }, |
| { |
| "epoch": 2.404301075268817, |
| "grad_norm": 2.708113670349121, |
| "learning_rate": 2.7386649216166233e-06, |
| "loss": 0.0551, |
| "num_input_tokens_seen": 1494176, |
| "step": 560 |
| }, |
| { |
| "epoch": 2.447311827956989, |
| "grad_norm": 3.16683030128479, |
| "learning_rate": 2.361268895750264e-06, |
| "loss": 0.0258, |
| "num_input_tokens_seen": 1520544, |
| "step": 570 |
| }, |
| { |
| "epoch": 2.490322580645161, |
| "grad_norm": 6.040332794189453, |
| "learning_rate": 2.0096189432334194e-06, |
| "loss": 0.0415, |
| "num_input_tokens_seen": 1547264, |
| "step": 580 |
| }, |
| { |
| "epoch": 2.533333333333333, |
| "grad_norm": 5.078160285949707, |
| "learning_rate": 1.6844314022329676e-06, |
| "loss": 0.0375, |
| "num_input_tokens_seen": 1573920, |
| "step": 590 |
| }, |
| { |
| "epoch": 2.576344086021505, |
| "grad_norm": 4.950022220611572, |
| "learning_rate": 1.3863687049356465e-06, |
| "loss": 0.0235, |
| "num_input_tokens_seen": 1600640, |
| "step": 600 |
| }, |
| { |
| "epoch": 2.6193548387096772, |
| "grad_norm": 1.7687643766403198, |
| "learning_rate": 1.116038028122413e-06, |
| "loss": 0.0354, |
| "num_input_tokens_seen": 1626848, |
| "step": 610 |
| }, |
| { |
| "epoch": 2.6623655913978492, |
| "grad_norm": 3.893580913543701, |
| "learning_rate": 8.7399005630238e-07, |
| "loss": 0.0357, |
| "num_input_tokens_seen": 1653408, |
| "step": 620 |
| }, |
| { |
| "epoch": 2.7053763440860212, |
| "grad_norm": 2.830453395843506, |
| "learning_rate": 6.607178599258268e-07, |
| "loss": 0.0512, |
| "num_input_tokens_seen": 1679968, |
| "step": 630 |
| }, |
| { |
| "epoch": 2.7483870967741937, |
| "grad_norm": 3.638772487640381, |
| "learning_rate": 4.766558909615504e-07, |
| "loss": 0.0243, |
| "num_input_tokens_seen": 1706944, |
| "step": 640 |
| }, |
| { |
| "epoch": 2.7913978494623657, |
| "grad_norm": 1.229244589805603, |
| "learning_rate": 3.22179097884579e-07, |
| "loss": 0.0367, |
| "num_input_tokens_seen": 1733888, |
| "step": 650 |
| }, |
| { |
| "epoch": 2.8344086021505377, |
| "grad_norm": 2.9207515716552734, |
| "learning_rate": 1.9760216187710788e-07, |
| "loss": 0.0377, |
| "num_input_tokens_seen": 1760448, |
| "step": 660 |
| }, |
| { |
| "epoch": 2.8774193548387097, |
| "grad_norm": 3.560971975326538, |
| "learning_rate": 1.0317885579858522e-07, |
| "loss": 0.0471, |
| "num_input_tokens_seen": 1787072, |
| "step": 670 |
| }, |
| { |
| "epoch": 2.9204301075268817, |
| "grad_norm": 0.2031625360250473, |
| "learning_rate": 3.910152723075322e-08, |
| "loss": 0.0222, |
| "num_input_tokens_seen": 1813632, |
| "step": 680 |
| }, |
| { |
| "epoch": 2.9634408602150537, |
| "grad_norm": 4.158380508422852, |
| "learning_rate": 5.50070665074065e-09, |
| "loss": 0.0454, |
| "num_input_tokens_seen": 1840384, |
| "step": 690 |
| }, |
| { |
| "epoch": 2.989247311827957, |
| "num_input_tokens_seen": 1855776, |
| "step": 696, |
| "total_flos": 7.944329136203366e+16, |
| "train_loss": 0.08561765917459097, |
| "train_runtime": 638.811, |
| "train_samples_per_second": 8.721, |
| "train_steps_per_second": 1.09 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 696, |
| "num_input_tokens_seen": 1855776, |
| "num_train_epochs": 3, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 7.944329136203366e+16, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|