{ "best_metric": 0.9842931937172775, "best_model_checkpoint": "model/checkpoint-94", "epoch": 7.703703703703704, "eval_steps": 500, "global_step": 104, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.59, "learning_rate": 0.00036363636363636367, "loss": 1.0999, "step": 8 }, { "epoch": 0.96, "eval_accuracy": 0.643979057591623, "eval_loss": 0.7986512780189514, "eval_runtime": 28.581, "eval_samples_per_second": 6.683, "eval_steps_per_second": 0.21, "step": 13 }, { "epoch": 1.19, "learning_rate": 0.0004731182795698925, "loss": 1.0029, "step": 16 }, { "epoch": 1.78, "learning_rate": 0.00043010752688172043, "loss": 0.6342, "step": 24 }, { "epoch": 2.0, "eval_accuracy": 0.9424083769633508, "eval_loss": 0.24137574434280396, "eval_runtime": 26.3088, "eval_samples_per_second": 7.26, "eval_steps_per_second": 0.228, "step": 27 }, { "epoch": 2.37, "learning_rate": 0.0003870967741935484, "loss": 0.5732, "step": 32 }, { "epoch": 2.96, "learning_rate": 0.0003440860215053764, "loss": 0.4882, "step": 40 }, { "epoch": 2.96, "eval_accuracy": 0.9633507853403142, "eval_loss": 0.16461053490638733, "eval_runtime": 26.318, "eval_samples_per_second": 7.257, "eval_steps_per_second": 0.228, "step": 40 }, { "epoch": 3.56, "learning_rate": 0.0003010752688172043, "loss": 0.463, "step": 48 }, { "epoch": 4.0, "eval_accuracy": 0.9424083769633508, "eval_loss": 0.25284695625305176, "eval_runtime": 26.2832, "eval_samples_per_second": 7.267, "eval_steps_per_second": 0.228, "step": 54 }, { "epoch": 4.15, "learning_rate": 0.00025806451612903227, "loss": 0.4286, "step": 56 }, { "epoch": 4.74, "learning_rate": 0.00021505376344086021, "loss": 0.4609, "step": 64 }, { "epoch": 4.96, "eval_accuracy": 0.9790575916230366, "eval_loss": 0.11301162838935852, "eval_runtime": 26.3727, "eval_samples_per_second": 7.242, "eval_steps_per_second": 0.228, "step": 67 }, { "epoch": 5.33, "learning_rate": 0.0001720430107526882, "loss": 0.4636, "step": 72 }, { "epoch": 5.93, "learning_rate": 0.00012903225806451613, "loss": 0.4251, "step": 80 }, { "epoch": 6.0, "eval_accuracy": 0.9633507853403142, "eval_loss": 0.13039104640483856, "eval_runtime": 26.3002, "eval_samples_per_second": 7.262, "eval_steps_per_second": 0.228, "step": 81 }, { "epoch": 6.52, "learning_rate": 8.60215053763441e-05, "loss": 0.3802, "step": 88 }, { "epoch": 6.96, "eval_accuracy": 0.9842931937172775, "eval_loss": 0.07386188954114914, "eval_runtime": 26.267, "eval_samples_per_second": 7.271, "eval_steps_per_second": 0.228, "step": 94 }, { "epoch": 7.11, "learning_rate": 4.301075268817205e-05, "loss": 0.378, "step": 96 }, { "epoch": 7.7, "learning_rate": 0.0, "loss": 0.4147, "step": 104 }, { "epoch": 7.7, "eval_accuracy": 0.9842931937172775, "eval_loss": 0.06754852086305618, "eval_runtime": 26.3749, "eval_samples_per_second": 7.242, "eval_steps_per_second": 0.227, "step": 104 }, { "epoch": 7.7, "step": 104, "total_flos": 1.0251773186064077e+18, "train_loss": 0.5548124955250666, "train_runtime": 4572.4853, "train_samples_per_second": 3.004, "train_steps_per_second": 0.023 } ], "logging_steps": 8, "max_steps": 104, "num_train_epochs": 8, "save_steps": 500, "total_flos": 1.0251773186064077e+18, "trial_name": null, "trial_params": null }