| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 6.0, | |
| "eval_steps": 500, | |
| "global_step": 1320, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.22727272727272727, | |
| "grad_norm": 0.2216796875, | |
| "learning_rate": 4.943181818181818e-05, | |
| "loss": 0.5826, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.45454545454545453, | |
| "grad_norm": 0.150390625, | |
| "learning_rate": 4.886363636363637e-05, | |
| "loss": 0.3128, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.6818181818181818, | |
| "grad_norm": 0.1298828125, | |
| "learning_rate": 4.829545454545455e-05, | |
| "loss": 0.289, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.9090909090909091, | |
| "grad_norm": 0.1435546875, | |
| "learning_rate": 4.772727272727273e-05, | |
| "loss": 0.2702, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 0.24845334887504578, | |
| "eval_runtime": 52.787, | |
| "eval_samples_per_second": 14.89, | |
| "eval_steps_per_second": 0.474, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.1363636363636362, | |
| "grad_norm": 0.1396484375, | |
| "learning_rate": 4.715909090909091e-05, | |
| "loss": 0.2663, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.3636363636363638, | |
| "grad_norm": 0.166015625, | |
| "learning_rate": 4.659090909090909e-05, | |
| "loss": 0.2591, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.5909090909090908, | |
| "grad_norm": 0.162109375, | |
| "learning_rate": 4.602272727272727e-05, | |
| "loss": 0.2498, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.8181818181818183, | |
| "grad_norm": 0.119140625, | |
| "learning_rate": 4.545454545454546e-05, | |
| "loss": 0.2557, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_loss": 0.2350398302078247, | |
| "eval_runtime": 53.0739, | |
| "eval_samples_per_second": 14.81, | |
| "eval_steps_per_second": 0.471, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 2.0454545454545454, | |
| "grad_norm": 0.1259765625, | |
| "learning_rate": 4.488636363636364e-05, | |
| "loss": 0.2466, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 2.2727272727272725, | |
| "grad_norm": 0.1416015625, | |
| "learning_rate": 4.431818181818182e-05, | |
| "loss": 0.2361, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "grad_norm": 0.1337890625, | |
| "learning_rate": 4.375e-05, | |
| "loss": 0.253, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 2.7272727272727275, | |
| "grad_norm": 0.1513671875, | |
| "learning_rate": 4.318181818181819e-05, | |
| "loss": 0.2409, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 2.9545454545454546, | |
| "grad_norm": 0.1640625, | |
| "learning_rate": 4.261363636363637e-05, | |
| "loss": 0.2436, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_loss": 0.22992941737174988, | |
| "eval_runtime": 53.1353, | |
| "eval_samples_per_second": 14.792, | |
| "eval_steps_per_second": 0.47, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 3.1818181818181817, | |
| "grad_norm": 0.16015625, | |
| "learning_rate": 4.204545454545455e-05, | |
| "loss": 0.2363, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 3.409090909090909, | |
| "grad_norm": 0.1650390625, | |
| "learning_rate": 4.1477272727272734e-05, | |
| "loss": 0.2256, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 3.6363636363636362, | |
| "grad_norm": 0.158203125, | |
| "learning_rate": 4.0909090909090915e-05, | |
| "loss": 0.2389, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 3.8636363636363638, | |
| "grad_norm": 0.169921875, | |
| "learning_rate": 4.034090909090909e-05, | |
| "loss": 0.2291, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_loss": 0.22730520367622375, | |
| "eval_runtime": 53.1918, | |
| "eval_samples_per_second": 14.777, | |
| "eval_steps_per_second": 0.47, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 4.090909090909091, | |
| "grad_norm": 0.1708984375, | |
| "learning_rate": 3.9772727272727275e-05, | |
| "loss": 0.2318, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 4.318181818181818, | |
| "grad_norm": 0.181640625, | |
| "learning_rate": 3.9204545454545456e-05, | |
| "loss": 0.2322, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 4.545454545454545, | |
| "grad_norm": 0.1591796875, | |
| "learning_rate": 3.8636363636363636e-05, | |
| "loss": 0.2194, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 4.7727272727272725, | |
| "grad_norm": 0.166015625, | |
| "learning_rate": 3.8068181818181816e-05, | |
| "loss": 0.2277, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 0.1767578125, | |
| "learning_rate": 3.7500000000000003e-05, | |
| "loss": 0.2329, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_loss": 0.2264343947172165, | |
| "eval_runtime": 54.0094, | |
| "eval_samples_per_second": 14.553, | |
| "eval_steps_per_second": 0.463, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 5.2272727272727275, | |
| "grad_norm": 0.1806640625, | |
| "learning_rate": 3.6931818181818184e-05, | |
| "loss": 0.2234, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 5.454545454545454, | |
| "grad_norm": 0.19140625, | |
| "learning_rate": 3.6363636363636364e-05, | |
| "loss": 0.2154, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 5.681818181818182, | |
| "grad_norm": 0.173828125, | |
| "learning_rate": 3.579545454545455e-05, | |
| "loss": 0.2349, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 5.909090909090909, | |
| "grad_norm": 0.162109375, | |
| "learning_rate": 3.522727272727273e-05, | |
| "loss": 0.2172, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_loss": 0.22602269053459167, | |
| "eval_runtime": 54.0842, | |
| "eval_samples_per_second": 14.533, | |
| "eval_steps_per_second": 0.462, | |
| "step": 1320 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 4400, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 20, | |
| "save_steps": 500, | |
| "total_flos": 9.285391673846661e+17, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |