{ "best_metric": 1.6124553680419922, "best_model_checkpoint": "../experiments_checkpoints/LoRA/Qwen/Qwen1.5_7B_LoRA_coastalcph/lex_glue/checkpoint-400", "epoch": 2.5477707006369426, "eval_steps": 50, "global_step": 400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.06, "grad_norm": 83.9561996459961, "learning_rate": 4.893842887473461e-05, "loss": 8.475, "step": 10 }, { "epoch": 0.13, "grad_norm": 91.12702941894531, "learning_rate": 4.787685774946922e-05, "loss": 5.743, "step": 20 }, { "epoch": 0.19, "grad_norm": 89.63079071044922, "learning_rate": 4.681528662420383e-05, "loss": 4.143, "step": 30 }, { "epoch": 0.25, "grad_norm": 232.11468505859375, "learning_rate": 4.575371549893843e-05, "loss": 2.7887, "step": 40 }, { "epoch": 0.32, "grad_norm": 57.7495231628418, "learning_rate": 4.469214437367304e-05, "loss": 2.3973, "step": 50 }, { "epoch": 0.32, "eval_accuracy": 0.38, "eval_f1_macro": 0.16768233889185707, "eval_f1_micro": 0.38, "eval_loss": 2.194821357727051, "eval_runtime": 17.0388, "eval_samples_per_second": 82.165, "eval_steps_per_second": 2.582, "step": 50 }, { "epoch": 0.38, "grad_norm": 60.894527435302734, "learning_rate": 4.3630573248407646e-05, "loss": 1.8434, "step": 60 }, { "epoch": 0.45, "grad_norm": 33.58325958251953, "learning_rate": 4.256900212314226e-05, "loss": 1.732, "step": 70 }, { "epoch": 0.51, "grad_norm": 63.34943771362305, "learning_rate": 4.150743099787686e-05, "loss": 1.8363, "step": 80 }, { "epoch": 0.57, "grad_norm": 46.44338607788086, "learning_rate": 4.044585987261147e-05, "loss": 1.7555, "step": 90 }, { "epoch": 0.64, "grad_norm": 43.241111755371094, "learning_rate": 3.9384288747346076e-05, "loss": 1.6438, "step": 100 }, { "epoch": 0.64, "eval_accuracy": 0.42714285714285716, "eval_f1_macro": 0.24662591434994435, "eval_f1_micro": 0.42714285714285716, "eval_loss": 1.8118304014205933, "eval_runtime": 17.1543, "eval_samples_per_second": 81.612, "eval_steps_per_second": 2.565, "step": 100 }, { "epoch": 0.7, "grad_norm": 112.62821197509766, "learning_rate": 3.8322717622080686e-05, "loss": 1.7039, "step": 110 }, { "epoch": 0.76, "grad_norm": 59.15960693359375, "learning_rate": 3.7261146496815283e-05, "loss": 1.7463, "step": 120 }, { "epoch": 0.83, "grad_norm": 64.70002746582031, "learning_rate": 3.6199575371549894e-05, "loss": 1.6117, "step": 130 }, { "epoch": 0.89, "grad_norm": 66.38226318359375, "learning_rate": 3.51380042462845e-05, "loss": 1.6342, "step": 140 }, { "epoch": 0.96, "grad_norm": 82.49270629882812, "learning_rate": 3.407643312101911e-05, "loss": 1.7379, "step": 150 }, { "epoch": 0.96, "eval_accuracy": 0.47714285714285715, "eval_f1_macro": 0.27038446232193303, "eval_f1_micro": 0.47714285714285715, "eval_loss": 1.7119196653366089, "eval_runtime": 17.2159, "eval_samples_per_second": 81.32, "eval_steps_per_second": 2.556, "step": 150 }, { "epoch": 1.02, "grad_norm": 72.54532623291016, "learning_rate": 3.301486199575371e-05, "loss": 1.5684, "step": 160 }, { "epoch": 1.08, "grad_norm": 43.69814682006836, "learning_rate": 3.1953290870488323e-05, "loss": 1.4182, "step": 170 }, { "epoch": 1.15, "grad_norm": 101.71517181396484, "learning_rate": 3.089171974522293e-05, "loss": 1.4734, "step": 180 }, { "epoch": 1.21, "grad_norm": 75.00550842285156, "learning_rate": 2.9830148619957538e-05, "loss": 1.4867, "step": 190 }, { "epoch": 1.27, "grad_norm": 72.43107604980469, "learning_rate": 2.8768577494692145e-05, "loss": 1.409, "step": 200 }, { "epoch": 1.27, "eval_accuracy": 0.48714285714285716, "eval_f1_macro": 0.2973212519943923, "eval_f1_micro": 0.48714285714285716, "eval_loss": 1.748794674873352, "eval_runtime": 17.186, "eval_samples_per_second": 81.461, "eval_steps_per_second": 2.56, "step": 200 }, { "epoch": 1.34, "grad_norm": 53.7850456237793, "learning_rate": 2.7707006369426753e-05, "loss": 1.5008, "step": 210 }, { "epoch": 1.4, "grad_norm": 83.63137817382812, "learning_rate": 2.664543524416136e-05, "loss": 1.3074, "step": 220 }, { "epoch": 1.46, "grad_norm": 81.88774108886719, "learning_rate": 2.5583864118895967e-05, "loss": 1.3816, "step": 230 }, { "epoch": 1.53, "grad_norm": 35.77708053588867, "learning_rate": 2.4522292993630575e-05, "loss": 1.2949, "step": 240 }, { "epoch": 1.59, "grad_norm": 66.99419403076172, "learning_rate": 2.3460721868365182e-05, "loss": 1.2443, "step": 250 }, { "epoch": 1.59, "eval_accuracy": 0.5364285714285715, "eval_f1_macro": 0.33337140604249654, "eval_f1_micro": 0.5364285714285715, "eval_loss": 1.6798213720321655, "eval_runtime": 17.2342, "eval_samples_per_second": 81.234, "eval_steps_per_second": 2.553, "step": 250 }, { "epoch": 1.66, "grad_norm": 42.91401290893555, "learning_rate": 2.239915074309979e-05, "loss": 1.3525, "step": 260 }, { "epoch": 1.72, "grad_norm": 43.42304611206055, "learning_rate": 2.1337579617834397e-05, "loss": 1.324, "step": 270 }, { "epoch": 1.78, "grad_norm": 53.237369537353516, "learning_rate": 2.0276008492569004e-05, "loss": 1.2523, "step": 280 }, { "epoch": 1.85, "grad_norm": 71.94126892089844, "learning_rate": 1.921443736730361e-05, "loss": 1.3336, "step": 290 }, { "epoch": 1.91, "grad_norm": 39.801326751708984, "learning_rate": 1.8152866242038215e-05, "loss": 1.1602, "step": 300 }, { "epoch": 1.91, "eval_accuracy": 0.5242857142857142, "eval_f1_macro": 0.35727383145505925, "eval_f1_micro": 0.5242857142857142, "eval_loss": 1.6131696701049805, "eval_runtime": 17.2122, "eval_samples_per_second": 81.337, "eval_steps_per_second": 2.556, "step": 300 }, { "epoch": 1.97, "grad_norm": 40.524967193603516, "learning_rate": 1.7091295116772823e-05, "loss": 1.3412, "step": 310 }, { "epoch": 2.04, "grad_norm": 53.3381462097168, "learning_rate": 1.602972399150743e-05, "loss": 1.0643, "step": 320 }, { "epoch": 2.1, "grad_norm": 37.6945686340332, "learning_rate": 1.4968152866242039e-05, "loss": 1.0189, "step": 330 }, { "epoch": 2.17, "grad_norm": 36.366859436035156, "learning_rate": 1.3906581740976646e-05, "loss": 1.1482, "step": 340 }, { "epoch": 2.23, "grad_norm": 66.20160675048828, "learning_rate": 1.2845010615711253e-05, "loss": 1.1191, "step": 350 }, { "epoch": 2.23, "eval_accuracy": 0.5385714285714286, "eval_f1_macro": 0.3914097926983185, "eval_f1_micro": 0.5385714285714286, "eval_loss": 1.6507365703582764, "eval_runtime": 17.2629, "eval_samples_per_second": 81.099, "eval_steps_per_second": 2.549, "step": 350 }, { "epoch": 2.29, "grad_norm": 22.359830856323242, "learning_rate": 1.178343949044586e-05, "loss": 0.8946, "step": 360 }, { "epoch": 2.36, "grad_norm": 64.09331512451172, "learning_rate": 1.0721868365180468e-05, "loss": 0.9666, "step": 370 }, { "epoch": 2.42, "grad_norm": 74.93325805664062, "learning_rate": 9.660297239915075e-06, "loss": 1.0045, "step": 380 }, { "epoch": 2.48, "grad_norm": 67.56449127197266, "learning_rate": 8.598726114649681e-06, "loss": 0.9639, "step": 390 }, { "epoch": 2.55, "grad_norm": 34.843387603759766, "learning_rate": 7.537154989384289e-06, "loss": 0.8907, "step": 400 }, { "epoch": 2.55, "eval_accuracy": 0.5507142857142857, "eval_f1_macro": 0.4051157419199404, "eval_f1_micro": 0.5507142857142857, "eval_loss": 1.6124553680419922, "eval_runtime": 17.2236, "eval_samples_per_second": 81.284, "eval_steps_per_second": 2.555, "step": 400 } ], "logging_steps": 10, "max_steps": 471, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 50, "total_flos": 6.681316623908864e+16, "train_batch_size": 16, "trial_name": null, "trial_params": null }