| { |
| "best_global_step": 1024, |
| "best_metric": 0.8984, |
| "best_model_checkpoint": "models/flan-t5-email-v2/checkpoint-1024", |
| "epoch": 4.0, |
| "eval_steps": 500, |
| "global_step": 1024, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.1953125, |
| "grad_norm": 3.140934467315674, |
| "learning_rate": 0.000288515625, |
| "loss": 2.7681, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.390625, |
| "grad_norm": 3.5862669944763184, |
| "learning_rate": 0.00027679687499999997, |
| "loss": 2.0029, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.5859375, |
| "grad_norm": 3.535059928894043, |
| "learning_rate": 0.00026507812499999996, |
| "loss": 1.8103, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.78125, |
| "grad_norm": 3.164299964904785, |
| "learning_rate": 0.00025335937499999995, |
| "loss": 1.7036, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.9765625, |
| "grad_norm": 3.8106772899627686, |
| "learning_rate": 0.00024164062499999997, |
| "loss": 1.5879, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_action_accuracy": 0.8516, |
| "eval_category_accuracy": 0.8164, |
| "eval_gen_len": 21.1094, |
| "eval_loss": 1.4673562049865723, |
| "eval_rouge1": 0.5852, |
| "eval_rouge2": 0.3857, |
| "eval_rougeL": 0.5541, |
| "eval_rougeLsum": 0.5549, |
| "eval_runtime": 49.7378, |
| "eval_samples_per_second": 5.147, |
| "eval_steps_per_second": 0.643, |
| "step": 256 |
| }, |
| { |
| "epoch": 1.171875, |
| "grad_norm": 3.2147984504699707, |
| "learning_rate": 0.00022992187499999996, |
| "loss": 1.4613, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.3671875, |
| "grad_norm": 3.4544200897216797, |
| "learning_rate": 0.00021820312499999997, |
| "loss": 1.34, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.5625, |
| "grad_norm": 2.983030319213867, |
| "learning_rate": 0.00020648437499999996, |
| "loss": 1.3194, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.7578125, |
| "grad_norm": 3.9024441242218018, |
| "learning_rate": 0.00019476562499999998, |
| "loss": 1.26, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.953125, |
| "grad_norm": 3.664095878601074, |
| "learning_rate": 0.00018304687499999997, |
| "loss": 1.2268, |
| "step": 500 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_action_accuracy": 0.8438, |
| "eval_category_accuracy": 0.8633, |
| "eval_gen_len": 21.5078, |
| "eval_loss": 1.353543996810913, |
| "eval_rouge1": 0.6192, |
| "eval_rouge2": 0.4239, |
| "eval_rougeL": 0.5915, |
| "eval_rougeLsum": 0.5911, |
| "eval_runtime": 19.3119, |
| "eval_samples_per_second": 13.256, |
| "eval_steps_per_second": 1.657, |
| "step": 512 |
| }, |
| { |
| "epoch": 2.1484375, |
| "grad_norm": 3.5792407989501953, |
| "learning_rate": 0.00017132812499999999, |
| "loss": 1.1758, |
| "step": 550 |
| }, |
| { |
| "epoch": 2.34375, |
| "grad_norm": 4.2944464683532715, |
| "learning_rate": 0.00015960937499999997, |
| "loss": 1.171, |
| "step": 600 |
| }, |
| { |
| "epoch": 2.5390625, |
| "grad_norm": 3.5692195892333984, |
| "learning_rate": 0.000147890625, |
| "loss": 1.0639, |
| "step": 650 |
| }, |
| { |
| "epoch": 2.734375, |
| "grad_norm": 2.5475616455078125, |
| "learning_rate": 0.00013617187499999998, |
| "loss": 1.049, |
| "step": 700 |
| }, |
| { |
| "epoch": 2.9296875, |
| "grad_norm": 4.432695388793945, |
| "learning_rate": 0.000124453125, |
| "loss": 1.018, |
| "step": 750 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_action_accuracy": 0.8438, |
| "eval_category_accuracy": 0.875, |
| "eval_gen_len": 21.1055, |
| "eval_loss": 1.3012762069702148, |
| "eval_rouge1": 0.6324, |
| "eval_rouge2": 0.4473, |
| "eval_rougeL": 0.604, |
| "eval_rougeLsum": 0.6037, |
| "eval_runtime": 18.596, |
| "eval_samples_per_second": 13.766, |
| "eval_steps_per_second": 1.721, |
| "step": 768 |
| }, |
| { |
| "epoch": 3.125, |
| "grad_norm": 3.5918500423431396, |
| "learning_rate": 0.00011273437499999999, |
| "loss": 0.9856, |
| "step": 800 |
| }, |
| { |
| "epoch": 3.3203125, |
| "grad_norm": 2.998674154281616, |
| "learning_rate": 0.00010101562499999999, |
| "loss": 0.9834, |
| "step": 850 |
| }, |
| { |
| "epoch": 3.515625, |
| "grad_norm": 3.258812427520752, |
| "learning_rate": 8.9296875e-05, |
| "loss": 0.9978, |
| "step": 900 |
| }, |
| { |
| "epoch": 3.7109375, |
| "grad_norm": 2.94854474067688, |
| "learning_rate": 7.7578125e-05, |
| "loss": 0.91, |
| "step": 950 |
| }, |
| { |
| "epoch": 3.90625, |
| "grad_norm": 4.353010654449463, |
| "learning_rate": 6.5859375e-05, |
| "loss": 0.9243, |
| "step": 1000 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_action_accuracy": 0.875, |
| "eval_category_accuracy": 0.8984, |
| "eval_gen_len": 21.3203, |
| "eval_loss": 1.279372215270996, |
| "eval_rouge1": 0.6502, |
| "eval_rouge2": 0.4636, |
| "eval_rougeL": 0.62, |
| "eval_rougeLsum": 0.6192, |
| "eval_runtime": 18.4054, |
| "eval_samples_per_second": 13.909, |
| "eval_steps_per_second": 1.739, |
| "step": 1024 |
| } |
| ], |
| "logging_steps": 50, |
| "max_steps": 1280, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1489239994392576.0, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|