{ "best_metric": 1.2666095495224, "best_model_checkpoint": "/data/tianxing/PycharmProjects/Transformers/examples/conversational/few_shot_intent/file_dir/serialization_dir/checkpoint-6000", "epoch": 0.17463054724847743, "eval_steps": 1000, "global_step": 6000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "grad_norm": 0.6197968125343323, "learning_rate": 0.0002, "loss": 1.7096, "step": 1000 }, { "epoch": 0.03, "eval_loss": 1.4217256307601929, "eval_runtime": 2455.1897, "eval_samples_per_second": 118.455, "eval_steps_per_second": 14.807, "step": 1000 }, { "epoch": 0.06, "grad_norm": 0.3991679549217224, "learning_rate": 0.00019400443671682956, "loss": 1.2426, "step": 2000 }, { "epoch": 0.06, "eval_loss": 1.3142809867858887, "eval_runtime": 2454.5071, "eval_samples_per_second": 118.488, "eval_steps_per_second": 14.811, "step": 2000 }, { "epoch": 0.09, "grad_norm": 0.3880527913570404, "learning_rate": 0.0001880088734336591, "loss": 1.1529, "step": 3000 }, { "epoch": 0.09, "eval_loss": 1.2872347831726074, "eval_runtime": 2455.0423, "eval_samples_per_second": 118.462, "eval_steps_per_second": 14.808, "step": 3000 }, { "epoch": 0.12, "grad_norm": 0.3351105749607086, "learning_rate": 0.00018201331015048865, "loss": 1.0968, "step": 4000 }, { "epoch": 0.12, "eval_loss": 1.2708055973052979, "eval_runtime": 2454.672, "eval_samples_per_second": 118.48, "eval_steps_per_second": 14.81, "step": 4000 }, { "epoch": 0.15, "grad_norm": 0.42601627111434937, "learning_rate": 0.0001760177468673182, "loss": 1.0568, "step": 5000 }, { "epoch": 0.15, "eval_loss": 1.2738263607025146, "eval_runtime": 2454.9426, "eval_samples_per_second": 118.467, "eval_steps_per_second": 14.808, "step": 5000 }, { "epoch": 0.17, "grad_norm": 0.3943077027797699, "learning_rate": 0.00017002218358414775, "loss": 1.0274, "step": 6000 }, { "epoch": 0.17, "eval_loss": 1.2666095495224, "eval_runtime": 2453.7183, "eval_samples_per_second": 118.526, "eval_steps_per_second": 14.816, "step": 6000 } ], "logging_steps": 1000, "max_steps": 34358, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "total_flos": 1.9766892483628237e+17, "train_batch_size": 16, "trial_name": null, "trial_params": null }