| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.001194743130227, | |
| "eval_steps": 500, | |
| "global_step": 1675, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.05973715651135006, | |
| "grad_norm": 2.8490114212036133, | |
| "learning_rate": 0.00016447368421052634, | |
| "loss": 11.6497, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.11947431302270012, | |
| "grad_norm": 0.05463433265686035, | |
| "learning_rate": 0.0002475359342915811, | |
| "loss": 4.6396, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.17921146953405018, | |
| "grad_norm": 0.03409096226096153, | |
| "learning_rate": 0.00024240246406570843, | |
| "loss": 4.4228, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.23894862604540024, | |
| "grad_norm": 0.03413880988955498, | |
| "learning_rate": 0.00023726899383983574, | |
| "loss": 4.3988, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.2986857825567503, | |
| "grad_norm": 0.033178623765707016, | |
| "learning_rate": 0.00023213552361396305, | |
| "loss": 4.3922, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.35842293906810035, | |
| "grad_norm": 0.028784427791833878, | |
| "learning_rate": 0.00022700205338809036, | |
| "loss": 4.4053, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.41816009557945044, | |
| "grad_norm": 0.036163728684186935, | |
| "learning_rate": 0.00022186858316221766, | |
| "loss": 4.3944, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.4778972520908005, | |
| "grad_norm": 0.03532182425260544, | |
| "learning_rate": 0.00021673511293634497, | |
| "loss": 4.3938, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.5376344086021505, | |
| "grad_norm": 0.03272629156708717, | |
| "learning_rate": 0.00021160164271047228, | |
| "loss": 4.3859, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.5973715651135006, | |
| "grad_norm": 0.027959033846855164, | |
| "learning_rate": 0.0002064681724845996, | |
| "loss": 4.3881, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.6571087216248507, | |
| "grad_norm": 0.024525364860892296, | |
| "learning_rate": 0.0002013347022587269, | |
| "loss": 4.3989, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.7168458781362007, | |
| "grad_norm": 0.025551579892635345, | |
| "learning_rate": 0.00019620123203285423, | |
| "loss": 4.3802, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.7765830346475507, | |
| "grad_norm": 0.03189048916101456, | |
| "learning_rate": 0.00019106776180698152, | |
| "loss": 4.4041, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.8363201911589009, | |
| "grad_norm": 0.02770661748945713, | |
| "learning_rate": 0.00018593429158110883, | |
| "loss": 4.3955, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.8960573476702509, | |
| "grad_norm": 0.03752126544713974, | |
| "learning_rate": 0.00018080082135523616, | |
| "loss": 4.3857, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.955794504181601, | |
| "grad_norm": 0.0396958664059639, | |
| "learning_rate": 0.00017566735112936344, | |
| "loss": 4.3847, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.015531660692951, | |
| "grad_norm": 0.03522910550236702, | |
| "learning_rate": 0.00017053388090349075, | |
| "loss": 4.3815, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.075268817204301, | |
| "grad_norm": 0.033044200390577316, | |
| "learning_rate": 0.00016540041067761806, | |
| "loss": 4.3903, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.135005973715651, | |
| "grad_norm": 0.03267841041088104, | |
| "learning_rate": 0.0001602669404517454, | |
| "loss": 4.3836, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.194743130227001, | |
| "grad_norm": 0.04201454669237137, | |
| "learning_rate": 0.00015513347022587268, | |
| "loss": 4.3776, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.2544802867383513, | |
| "grad_norm": 0.047623638063669205, | |
| "learning_rate": 0.00015, | |
| "loss": 4.3734, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.3142174432497014, | |
| "grad_norm": 0.03200829401612282, | |
| "learning_rate": 0.00014486652977412732, | |
| "loss": 4.3837, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.3739545997610514, | |
| "grad_norm": 0.04358180612325668, | |
| "learning_rate": 0.00013973305954825463, | |
| "loss": 4.3815, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 1.4336917562724014, | |
| "grad_norm": 0.04975922778248787, | |
| "learning_rate": 0.0001345995893223819, | |
| "loss": 4.3746, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.4934289127837514, | |
| "grad_norm": 0.03673349320888519, | |
| "learning_rate": 0.00012946611909650925, | |
| "loss": 4.3755, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 1.5531660692951017, | |
| "grad_norm": 0.03130173310637474, | |
| "learning_rate": 0.00012433264887063656, | |
| "loss": 4.3902, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.6129032258064515, | |
| "grad_norm": 0.03993390500545502, | |
| "learning_rate": 0.00011919917864476385, | |
| "loss": 4.3852, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 1.6726403823178018, | |
| "grad_norm": 0.04937516897916794, | |
| "learning_rate": 0.00011406570841889118, | |
| "loss": 4.3782, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.7323775388291516, | |
| "grad_norm": 0.04578279331326485, | |
| "learning_rate": 0.00010893223819301848, | |
| "loss": 4.377, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 1.7921146953405018, | |
| "grad_norm": 0.048149123787879944, | |
| "learning_rate": 0.00010379876796714579, | |
| "loss": 4.3835, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.8518518518518519, | |
| "grad_norm": 0.0500078909099102, | |
| "learning_rate": 9.86652977412731e-05, | |
| "loss": 4.3806, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 1.911589008363202, | |
| "grad_norm": 0.040174700319767, | |
| "learning_rate": 9.353182751540041e-05, | |
| "loss": 4.3863, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.971326164874552, | |
| "grad_norm": 0.033409375697374344, | |
| "learning_rate": 8.839835728952772e-05, | |
| "loss": 4.3754, | |
| "step": 1650 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 2511, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 25, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.0486522326430515e+17, | |
| "train_batch_size": 6, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |