| { |
| "best_global_step": 50, |
| "best_metric": 0.7476052045822144, |
| "best_model_checkpoint": "error_analysis_results/checkpoint-50", |
| "epoch": 4.0, |
| "eval_steps": 25, |
| "global_step": 56, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.35714285714285715, |
| "grad_norm": 0.11138060879106787, |
| "learning_rate": 0.0004, |
| "loss": 1.2608, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.7142857142857143, |
| "grad_norm": 0.10740821093198347, |
| "learning_rate": 0.00036862745098039214, |
| "loss": 1.0437, |
| "step": 10 |
| }, |
| { |
| "epoch": 1.0714285714285714, |
| "grad_norm": 0.07388919095804597, |
| "learning_rate": 0.0003294117647058824, |
| "loss": 0.9581, |
| "step": 15 |
| }, |
| { |
| "epoch": 1.4285714285714286, |
| "grad_norm": 0.07214144653287455, |
| "learning_rate": 0.00029019607843137256, |
| "loss": 0.8013, |
| "step": 20 |
| }, |
| { |
| "epoch": 1.7857142857142856, |
| "grad_norm": 0.08476239587638566, |
| "learning_rate": 0.00025098039215686274, |
| "loss": 0.7592, |
| "step": 25 |
| }, |
| { |
| "epoch": 1.7857142857142856, |
| "eval_accuracy": 0.784605669100989, |
| "eval_loss": 0.7806898951530457, |
| "eval_runtime": 308.1114, |
| "eval_samples_per_second": 0.523, |
| "eval_steps_per_second": 0.068, |
| "step": 25 |
| }, |
| { |
| "epoch": 2.142857142857143, |
| "grad_norm": 0.07926909066415756, |
| "learning_rate": 0.00021176470588235295, |
| "loss": 0.7113, |
| "step": 30 |
| }, |
| { |
| "epoch": 2.5, |
| "grad_norm": 0.0749471487159779, |
| "learning_rate": 0.00017254901960784316, |
| "loss": 0.6215, |
| "step": 35 |
| }, |
| { |
| "epoch": 2.857142857142857, |
| "grad_norm": 0.0814097048760763, |
| "learning_rate": 0.00013333333333333334, |
| "loss": 0.6253, |
| "step": 40 |
| }, |
| { |
| "epoch": 3.2142857142857144, |
| "grad_norm": 0.0823719010294674, |
| "learning_rate": 9.411764705882353e-05, |
| "loss": 0.5681, |
| "step": 45 |
| }, |
| { |
| "epoch": 3.571428571428571, |
| "grad_norm": 0.07773301484979464, |
| "learning_rate": 5.490196078431373e-05, |
| "loss": 0.5326, |
| "step": 50 |
| }, |
| { |
| "epoch": 3.571428571428571, |
| "eval_accuracy": 0.7972704920605544, |
| "eval_loss": 0.7476052045822144, |
| "eval_runtime": 302.9956, |
| "eval_samples_per_second": 0.531, |
| "eval_steps_per_second": 0.069, |
| "step": 50 |
| }, |
| { |
| "epoch": 3.928571428571429, |
| "grad_norm": 0.07906229843791522, |
| "learning_rate": 1.568627450980392e-05, |
| "loss": 0.5131, |
| "step": 55 |
| }, |
| { |
| "epoch": 4.0, |
| "step": 56, |
| "total_flos": 86439169622016.0, |
| "train_loss": 0.7588071397372654, |
| "train_runtime": 6802.9439, |
| "train_samples_per_second": 0.256, |
| "train_steps_per_second": 0.008 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 56, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 4, |
| "save_steps": 25, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 86439169622016.0, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|