{ "best_metric": 0.5038631558418274, "best_model_checkpoint": "../../experiments_checkpoints/MAdAiLab/Qwen/Qwen1.5_1.8B_twitter/checkpoint-250", "epoch": 0.9191176470588235, "eval_steps": 50, "global_step": 250, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04, "grad_norm": 109.95573425292969, "learning_rate": 4.938725490196079e-06, "loss": 1.1177, "step": 10 }, { "epoch": 0.07, "grad_norm": 117.97087860107422, "learning_rate": 4.8774509803921576e-06, "loss": 0.8448, "step": 20 }, { "epoch": 0.11, "grad_norm": 46.05353546142578, "learning_rate": 4.816176470588236e-06, "loss": 0.8301, "step": 30 }, { "epoch": 0.15, "grad_norm": 41.749080657958984, "learning_rate": 4.754901960784314e-06, "loss": 0.7069, "step": 40 }, { "epoch": 0.18, "grad_norm": 26.182971954345703, "learning_rate": 4.693627450980393e-06, "loss": 0.6585, "step": 50 }, { "epoch": 0.18, "eval_accuracy": 0.7123161764705882, "eval_f1_macro": 0.5810560943233977, "eval_f1_micro": 0.7123161764705882, "eval_loss": 0.6434972286224365, "eval_runtime": 2.8155, "eval_samples_per_second": 386.429, "eval_steps_per_second": 12.076, "step": 50 }, { "epoch": 0.22, "grad_norm": 118.7502212524414, "learning_rate": 4.632352941176471e-06, "loss": 0.6179, "step": 60 }, { "epoch": 0.26, "grad_norm": 34.24118423461914, "learning_rate": 4.571078431372549e-06, "loss": 0.6324, "step": 70 }, { "epoch": 0.29, "grad_norm": 92.65385437011719, "learning_rate": 4.509803921568628e-06, "loss": 0.5743, "step": 80 }, { "epoch": 0.33, "grad_norm": 44.45353317260742, "learning_rate": 4.448529411764706e-06, "loss": 0.4798, "step": 90 }, { "epoch": 0.37, "grad_norm": 35.91661071777344, "learning_rate": 4.3872549019607845e-06, "loss": 0.6396, "step": 100 }, { "epoch": 0.37, "eval_accuracy": 0.7297794117647058, "eval_f1_macro": 0.6997657235537935, "eval_f1_micro": 0.7297794117647058, "eval_loss": 0.6015912294387817, "eval_runtime": 2.832, "eval_samples_per_second": 384.18, "eval_steps_per_second": 12.006, "step": 100 }, { "epoch": 0.4, "grad_norm": 56.78458786010742, "learning_rate": 4.3259803921568635e-06, "loss": 0.5814, "step": 110 }, { "epoch": 0.44, "grad_norm": 53.924827575683594, "learning_rate": 4.264705882352942e-06, "loss": 0.5131, "step": 120 }, { "epoch": 0.48, "grad_norm": 74.93573760986328, "learning_rate": 4.20343137254902e-06, "loss": 0.5242, "step": 130 }, { "epoch": 0.51, "grad_norm": 26.097169876098633, "learning_rate": 4.142156862745099e-06, "loss": 0.5529, "step": 140 }, { "epoch": 0.55, "grad_norm": 78.16165161132812, "learning_rate": 4.080882352941177e-06, "loss": 0.5108, "step": 150 }, { "epoch": 0.55, "eval_accuracy": 0.7527573529411765, "eval_f1_macro": 0.6963299829940972, "eval_f1_micro": 0.7527573529411765, "eval_loss": 0.5226907134056091, "eval_runtime": 2.826, "eval_samples_per_second": 384.993, "eval_steps_per_second": 12.031, "step": 150 }, { "epoch": 0.59, "grad_norm": 19.04606056213379, "learning_rate": 4.019607843137255e-06, "loss": 0.5012, "step": 160 }, { "epoch": 0.62, "grad_norm": 127.43460845947266, "learning_rate": 3.958333333333333e-06, "loss": 0.5601, "step": 170 }, { "epoch": 0.66, "grad_norm": 73.45539093017578, "learning_rate": 3.897058823529412e-06, "loss": 0.6134, "step": 180 }, { "epoch": 0.7, "grad_norm": 59.42979049682617, "learning_rate": 3.8357843137254904e-06, "loss": 0.5447, "step": 190 }, { "epoch": 0.74, "grad_norm": 22.45537757873535, "learning_rate": 3.774509803921569e-06, "loss": 0.5065, "step": 200 }, { "epoch": 0.74, "eval_accuracy": 0.7417279411764706, "eval_f1_macro": 0.6346563132227484, "eval_f1_micro": 0.7417279411764706, "eval_loss": 0.5502642393112183, "eval_runtime": 2.8852, "eval_samples_per_second": 377.094, "eval_steps_per_second": 11.784, "step": 200 }, { "epoch": 0.77, "grad_norm": 50.938880920410156, "learning_rate": 3.7132352941176476e-06, "loss": 0.4589, "step": 210 }, { "epoch": 0.81, "grad_norm": 84.68132781982422, "learning_rate": 3.6519607843137257e-06, "loss": 0.5403, "step": 220 }, { "epoch": 0.85, "grad_norm": 27.052024841308594, "learning_rate": 3.5906862745098043e-06, "loss": 0.4618, "step": 230 }, { "epoch": 0.88, "grad_norm": 23.497787475585938, "learning_rate": 3.529411764705883e-06, "loss": 0.452, "step": 240 }, { "epoch": 0.92, "grad_norm": 15.185086250305176, "learning_rate": 3.468137254901961e-06, "loss": 0.4883, "step": 250 }, { "epoch": 0.92, "eval_accuracy": 0.7775735294117647, "eval_f1_macro": 0.7420002194942226, "eval_f1_micro": 0.7775735294117647, "eval_loss": 0.5038631558418274, "eval_runtime": 2.8371, "eval_samples_per_second": 383.494, "eval_steps_per_second": 11.984, "step": 250 } ], "logging_steps": 10, "max_steps": 816, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 50, "total_flos": 7461905891328000.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }