{ "best_metric": 0.4901912808418274, "best_model_checkpoint": "../experiments_checkpoints/LoRA/Qwen/Qwen1.5_7B_LoRA_MAdAiLab/twitter_disaster/checkpoint-250", "epoch": 0.9191176470588235, "eval_steps": 50, "global_step": 250, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04, "grad_norm": 24.977563858032227, "learning_rate": 4.9387254901960786e-05, "loss": 1.7697, "step": 10 }, { "epoch": 0.07, "grad_norm": 142.99278259277344, "learning_rate": 4.877450980392157e-05, "loss": 1.666, "step": 20 }, { "epoch": 0.11, "grad_norm": 87.81151580810547, "learning_rate": 4.816176470588236e-05, "loss": 1.0546, "step": 30 }, { "epoch": 0.15, "grad_norm": 50.04526901245117, "learning_rate": 4.7549019607843135e-05, "loss": 0.813, "step": 40 }, { "epoch": 0.18, "grad_norm": 22.704946517944336, "learning_rate": 4.6936274509803925e-05, "loss": 0.8422, "step": 50 }, { "epoch": 0.18, "eval_accuracy": 0.7178308823529411, "eval_f1_macro": 0.6371659017461913, "eval_f1_micro": 0.7178308823529411, "eval_loss": 0.6453067660331726, "eval_runtime": 13.5072, "eval_samples_per_second": 80.55, "eval_steps_per_second": 2.517, "step": 50 }, { "epoch": 0.22, "grad_norm": 20.923370361328125, "learning_rate": 4.632352941176471e-05, "loss": 0.5966, "step": 60 }, { "epoch": 0.26, "grad_norm": 66.72351837158203, "learning_rate": 4.571078431372549e-05, "loss": 0.7947, "step": 70 }, { "epoch": 0.29, "grad_norm": 157.05340576171875, "learning_rate": 4.5098039215686275e-05, "loss": 0.8614, "step": 80 }, { "epoch": 0.33, "grad_norm": 68.06228637695312, "learning_rate": 4.448529411764706e-05, "loss": 0.6034, "step": 90 }, { "epoch": 0.37, "grad_norm": 15.797639846801758, "learning_rate": 4.387254901960784e-05, "loss": 0.6082, "step": 100 }, { "epoch": 0.37, "eval_accuracy": 0.7472426470588235, "eval_f1_macro": 0.7123361820896584, "eval_f1_micro": 0.7472426470588235, "eval_loss": 0.5489143133163452, "eval_runtime": 13.5318, "eval_samples_per_second": 80.403, "eval_steps_per_second": 2.513, "step": 100 }, { "epoch": 0.4, "grad_norm": 45.799476623535156, "learning_rate": 4.325980392156863e-05, "loss": 0.5511, "step": 110 }, { "epoch": 0.44, "grad_norm": 49.33269500732422, "learning_rate": 4.2647058823529415e-05, "loss": 0.4801, "step": 120 }, { "epoch": 0.48, "grad_norm": 36.33636474609375, "learning_rate": 4.20343137254902e-05, "loss": 0.4603, "step": 130 }, { "epoch": 0.51, "grad_norm": 82.08908081054688, "learning_rate": 4.142156862745099e-05, "loss": 0.5596, "step": 140 }, { "epoch": 0.55, "grad_norm": 13.81618595123291, "learning_rate": 4.0808823529411765e-05, "loss": 0.4305, "step": 150 }, { "epoch": 0.55, "eval_accuracy": 0.7251838235294118, "eval_f1_macro": 0.5776786815440837, "eval_f1_micro": 0.7251838235294118, "eval_loss": 0.5571720004081726, "eval_runtime": 13.5624, "eval_samples_per_second": 80.222, "eval_steps_per_second": 2.507, "step": 150 }, { "epoch": 0.59, "grad_norm": 57.01852035522461, "learning_rate": 4.0196078431372555e-05, "loss": 0.5056, "step": 160 }, { "epoch": 0.62, "grad_norm": 53.905147552490234, "learning_rate": 3.958333333333333e-05, "loss": 0.567, "step": 170 }, { "epoch": 0.66, "grad_norm": 47.106292724609375, "learning_rate": 3.897058823529412e-05, "loss": 0.5069, "step": 180 }, { "epoch": 0.7, "grad_norm": 8.995195388793945, "learning_rate": 3.8357843137254904e-05, "loss": 0.5449, "step": 190 }, { "epoch": 0.74, "grad_norm": 79.07156372070312, "learning_rate": 3.774509803921569e-05, "loss": 0.5021, "step": 200 }, { "epoch": 0.74, "eval_accuracy": 0.7720588235294118, "eval_f1_macro": 0.7436838605490643, "eval_f1_micro": 0.7720588235294118, "eval_loss": 0.49997127056121826, "eval_runtime": 13.6431, "eval_samples_per_second": 79.747, "eval_steps_per_second": 2.492, "step": 200 }, { "epoch": 0.77, "grad_norm": 15.758883476257324, "learning_rate": 3.713235294117647e-05, "loss": 0.5018, "step": 210 }, { "epoch": 0.81, "grad_norm": 183.47061157226562, "learning_rate": 3.6519607843137254e-05, "loss": 0.616, "step": 220 }, { "epoch": 0.85, "grad_norm": 48.367374420166016, "learning_rate": 3.5906862745098044e-05, "loss": 0.4927, "step": 230 }, { "epoch": 0.88, "grad_norm": 5.8350114822387695, "learning_rate": 3.529411764705883e-05, "loss": 0.4508, "step": 240 }, { "epoch": 0.92, "grad_norm": 15.554094314575195, "learning_rate": 3.468137254901961e-05, "loss": 0.4715, "step": 250 }, { "epoch": 0.92, "eval_accuracy": 0.7766544117647058, "eval_f1_macro": 0.7450627015924902, "eval_f1_micro": 0.7766544117647058, "eval_loss": 0.4901912808418274, "eval_runtime": 13.5595, "eval_samples_per_second": 80.239, "eval_steps_per_second": 2.507, "step": 250 } ], "logging_steps": 10, "max_steps": 816, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 50, "total_flos": 4.17576786067456e+16, "train_batch_size": 16, "trial_name": null, "trial_params": null }