| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 391, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.02557544757033248, |
| "grad_norm": 5.1979091027844655, |
| "learning_rate": 2.25e-06, |
| "loss": 0.5442, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.05115089514066496, |
| "grad_norm": 1.555199471497413, |
| "learning_rate": 4.75e-06, |
| "loss": 0.4196, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.07672634271099744, |
| "grad_norm": 0.8843964610571793, |
| "learning_rate": 7.25e-06, |
| "loss": 0.2928, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.10230179028132992, |
| "grad_norm": 0.9254467392549297, |
| "learning_rate": 9.75e-06, |
| "loss": 0.2508, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.1278772378516624, |
| "grad_norm": 0.9595475930941818, |
| "learning_rate": 9.983786540671052e-06, |
| "loss": 0.2316, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.1534526854219949, |
| "grad_norm": 0.8734055150087038, |
| "learning_rate": 9.927874998629714e-06, |
| "loss": 0.2198, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.17902813299232737, |
| "grad_norm": 0.8904948291987135, |
| "learning_rate": 9.83251270794707e-06, |
| "loss": 0.2047, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.20460358056265984, |
| "grad_norm": 0.8941187964853939, |
| "learning_rate": 9.698463103929542e-06, |
| "loss": 0.2058, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.23017902813299232, |
| "grad_norm": 0.8232662082320363, |
| "learning_rate": 9.526799338236828e-06, |
| "loss": 0.1903, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.2557544757033248, |
| "grad_norm": 0.8507117163294342, |
| "learning_rate": 9.318895687625752e-06, |
| "loss": 0.1835, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.2813299232736573, |
| "grad_norm": 0.7198977204875759, |
| "learning_rate": 9.076416551997721e-06, |
| "loss": 0.1774, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.3069053708439898, |
| "grad_norm": 0.6072199252093429, |
| "learning_rate": 8.801303129827352e-06, |
| "loss": 0.175, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.33248081841432225, |
| "grad_norm": 0.552937855243384, |
| "learning_rate": 8.495757877643857e-06, |
| "loss": 0.1688, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.35805626598465473, |
| "grad_norm": 0.5078356984647172, |
| "learning_rate": 8.162226877976886e-06, |
| "loss": 0.1679, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.3836317135549872, |
| "grad_norm": 0.4569791962942034, |
| "learning_rate": 7.803380256922495e-06, |
| "loss": 0.1659, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.4092071611253197, |
| "grad_norm": 0.43754448097194093, |
| "learning_rate": 7.422090808099014e-06, |
| "loss": 0.1614, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.43478260869565216, |
| "grad_norm": 0.3965482560875204, |
| "learning_rate": 7.021410994121525e-06, |
| "loss": 0.1658, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.46035805626598464, |
| "grad_norm": 0.40365524431178995, |
| "learning_rate": 6.6045485097126585e-06, |
| "loss": 0.162, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.4859335038363171, |
| "grad_norm": 0.38726639073470187, |
| "learning_rate": 6.1748406020824115e-06, |
| "loss": 0.1603, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.5115089514066496, |
| "grad_norm": 0.3760876442217351, |
| "learning_rate": 5.735727354158581e-06, |
| "loss": 0.1607, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.5370843989769821, |
| "grad_norm": 0.3698546651087282, |
| "learning_rate": 5.290724144552379e-06, |
| "loss": 0.1566, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.5626598465473146, |
| "grad_norm": 0.378775457895615, |
| "learning_rate": 4.8433935047346e-06, |
| "loss": 0.1617, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.5882352941176471, |
| "grad_norm": 0.37700547911055976, |
| "learning_rate": 4.397316598723385e-06, |
| "loss": 0.1577, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.6138107416879796, |
| "grad_norm": 0.34954174520163833, |
| "learning_rate": 3.956064553606708e-06, |
| "loss": 0.1595, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.639386189258312, |
| "grad_norm": 0.357317340726361, |
| "learning_rate": 3.523169870416795e-06, |
| "loss": 0.154, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.6649616368286445, |
| "grad_norm": 0.3684051566901591, |
| "learning_rate": 3.1020981442305187e-06, |
| "loss": 0.1531, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.690537084398977, |
| "grad_norm": 0.3895498086957201, |
| "learning_rate": 2.6962203198941587e-06, |
| "loss": 0.1579, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.7161125319693095, |
| "grad_norm": 0.3731438076456078, |
| "learning_rate": 2.308785705482982e-06, |
| "loss": 0.1586, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.7416879795396419, |
| "grad_norm": 0.41906481058101075, |
| "learning_rate": 1.942895959539939e-06, |
| "loss": 0.1575, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.7672634271099744, |
| "grad_norm": 0.3896314041500002, |
| "learning_rate": 1.6014802603420044e-06, |
| "loss": 0.1553, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.7928388746803069, |
| "grad_norm": 0.385720584527069, |
| "learning_rate": 1.2872718559798852e-06, |
| "loss": 0.1532, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.8184143222506394, |
| "grad_norm": 0.32351933058141824, |
| "learning_rate": 1.0027861829824953e-06, |
| "loss": 0.1562, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.8439897698209718, |
| "grad_norm": 0.3670621953960731, |
| "learning_rate": 7.50300728660407e-07, |
| "loss": 0.155, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.8695652173913043, |
| "grad_norm": 0.3829568746421653, |
| "learning_rate": 5.318367983829393e-07, |
| "loss": 0.1523, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.8951406649616368, |
| "grad_norm": 0.3775945531822611, |
| "learning_rate": 3.49143333753309e-07, |
| "loss": 0.1521, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.9207161125319693, |
| "grad_norm": 0.36031263149371584, |
| "learning_rate": 2.0368291122759898e-07, |
| "loss": 0.1523, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.9462915601023018, |
| "grad_norm": 0.3348863130660584, |
| "learning_rate": 9.662003326740166e-08, |
| "loss": 0.1514, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.9718670076726342, |
| "grad_norm": 0.360326022397128, |
| "learning_rate": 2.8811805762860578e-08, |
| "loss": 0.1501, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.9974424552429667, |
| "grad_norm": 0.37241654765357973, |
| "learning_rate": 8.010763592264381e-10, |
| "loss": 0.152, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.0, |
| "step": 391, |
| "total_flos": 24007129694208.0, |
| "train_loss": 0.18847447641365364, |
| "train_runtime": 1067.9437, |
| "train_samples_per_second": 46.819, |
| "train_steps_per_second": 0.366 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 391, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 24007129694208.0, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|