| { | |
| "best_metric": 0.29641835422253565, | |
| "best_model_checkpoint": "./summary/checkpoint-168", | |
| "epoch": 2.986666666666667, | |
| "eval_steps": 500, | |
| "global_step": 168, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.14222222222222222, | |
| "grad_norm": 8.576995849609375, | |
| "learning_rate": 4.761904761904762e-05, | |
| "loss": 6.1269, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.28444444444444444, | |
| "grad_norm": 7.30542516708374, | |
| "learning_rate": 4.523809523809524e-05, | |
| "loss": 4.2774, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.4266666666666667, | |
| "grad_norm": 7.0902628898620605, | |
| "learning_rate": 4.2857142857142856e-05, | |
| "loss": 3.9068, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.5688888888888889, | |
| "grad_norm": 8.697046279907227, | |
| "learning_rate": 4.047619047619048e-05, | |
| "loss": 3.3875, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.7111111111111111, | |
| "grad_norm": 7.340813159942627, | |
| "learning_rate": 3.809523809523809e-05, | |
| "loss": 3.8833, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.8533333333333334, | |
| "grad_norm": 8.722084045410156, | |
| "learning_rate": 3.571428571428572e-05, | |
| "loss": 3.4346, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.9955555555555555, | |
| "grad_norm": 7.050210952758789, | |
| "learning_rate": 3.3333333333333335e-05, | |
| "loss": 3.5247, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.9955555555555555, | |
| "eval_loss": 3.147299289703369, | |
| "eval_rouge-1": 0.3681179246506479, | |
| "eval_rouge-2": 0.20816045958286267, | |
| "eval_rouge-l": 0.2931656876171607, | |
| "eval_runtime": 217.5697, | |
| "eval_samples_per_second": 0.23, | |
| "eval_steps_per_second": 0.06, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 1.1377777777777778, | |
| "grad_norm": 6.299299240112305, | |
| "learning_rate": 3.095238095238095e-05, | |
| "loss": 3.3798, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "grad_norm": 8.227340698242188, | |
| "learning_rate": 2.857142857142857e-05, | |
| "loss": 3.0201, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 1.4222222222222223, | |
| "grad_norm": 7.490102291107178, | |
| "learning_rate": 2.6190476190476192e-05, | |
| "loss": 2.7197, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.5644444444444443, | |
| "grad_norm": 5.702718734741211, | |
| "learning_rate": 2.380952380952381e-05, | |
| "loss": 2.7057, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 1.7066666666666666, | |
| "grad_norm": 7.035086631774902, | |
| "learning_rate": 2.1428571428571428e-05, | |
| "loss": 2.6395, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 1.8488888888888888, | |
| "grad_norm": 6.97080659866333, | |
| "learning_rate": 1.9047619047619046e-05, | |
| "loss": 2.8412, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 1.991111111111111, | |
| "grad_norm": 5.344171047210693, | |
| "learning_rate": 1.6666666666666667e-05, | |
| "loss": 2.8042, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 1.991111111111111, | |
| "eval_loss": 2.980454683303833, | |
| "eval_rouge-1": 0.358032604043401, | |
| "eval_rouge-2": 0.20148071328827782, | |
| "eval_rouge-l": 0.2943552293948257, | |
| "eval_runtime": 218.2497, | |
| "eval_samples_per_second": 0.229, | |
| "eval_steps_per_second": 0.06, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 2.1333333333333333, | |
| "grad_norm": 6.205347537994385, | |
| "learning_rate": 1.4285714285714285e-05, | |
| "loss": 2.497, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 2.2755555555555556, | |
| "grad_norm": 6.757753372192383, | |
| "learning_rate": 1.1904761904761905e-05, | |
| "loss": 2.4279, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 2.417777777777778, | |
| "grad_norm": 6.1174821853637695, | |
| "learning_rate": 9.523809523809523e-06, | |
| "loss": 2.4113, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "grad_norm": 5.852235317230225, | |
| "learning_rate": 7.142857142857143e-06, | |
| "loss": 2.6189, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 2.7022222222222223, | |
| "grad_norm": 6.898237228393555, | |
| "learning_rate": 4.7619047619047615e-06, | |
| "loss": 2.4112, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 2.8444444444444446, | |
| "grad_norm": 7.234611511230469, | |
| "learning_rate": 2.3809523809523808e-06, | |
| "loss": 2.2202, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 2.986666666666667, | |
| "grad_norm": 6.406458377838135, | |
| "learning_rate": 0.0, | |
| "loss": 2.3927, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 2.986666666666667, | |
| "eval_loss": 2.968492269515991, | |
| "eval_rouge-1": 0.372908062309785, | |
| "eval_rouge-2": 0.2065141199985535, | |
| "eval_rouge-l": 0.29641835422253565, | |
| "eval_runtime": 217.7138, | |
| "eval_samples_per_second": 0.23, | |
| "eval_steps_per_second": 0.06, | |
| "step": 168 | |
| } | |
| ], | |
| "logging_steps": 8, | |
| "max_steps": 168, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 654660126941184.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |