akkky02's picture
Upload folder using huggingface_hub
4a3dcc6 verified
raw
history blame
No virus
18.9 kB
{
"best_metric": 0.4901912808418274,
"best_model_checkpoint": "../experiments_checkpoints/LoRA/Qwen/Qwen1.5_7B_LoRA_MAdAiLab/twitter_disaster/checkpoint-250",
"epoch": 3.0,
"eval_steps": 50,
"global_step": 816,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.04,
"grad_norm": 24.977563858032227,
"learning_rate": 4.9387254901960786e-05,
"loss": 1.7697,
"step": 10
},
{
"epoch": 0.07,
"grad_norm": 142.99278259277344,
"learning_rate": 4.877450980392157e-05,
"loss": 1.666,
"step": 20
},
{
"epoch": 0.11,
"grad_norm": 87.81151580810547,
"learning_rate": 4.816176470588236e-05,
"loss": 1.0546,
"step": 30
},
{
"epoch": 0.15,
"grad_norm": 50.04526901245117,
"learning_rate": 4.7549019607843135e-05,
"loss": 0.813,
"step": 40
},
{
"epoch": 0.18,
"grad_norm": 22.704946517944336,
"learning_rate": 4.6936274509803925e-05,
"loss": 0.8422,
"step": 50
},
{
"epoch": 0.18,
"eval_accuracy": 0.7178308823529411,
"eval_f1_macro": 0.6371659017461913,
"eval_f1_micro": 0.7178308823529411,
"eval_loss": 0.6453067660331726,
"eval_runtime": 13.5072,
"eval_samples_per_second": 80.55,
"eval_steps_per_second": 2.517,
"step": 50
},
{
"epoch": 0.22,
"grad_norm": 20.923370361328125,
"learning_rate": 4.632352941176471e-05,
"loss": 0.5966,
"step": 60
},
{
"epoch": 0.26,
"grad_norm": 66.72351837158203,
"learning_rate": 4.571078431372549e-05,
"loss": 0.7947,
"step": 70
},
{
"epoch": 0.29,
"grad_norm": 157.05340576171875,
"learning_rate": 4.5098039215686275e-05,
"loss": 0.8614,
"step": 80
},
{
"epoch": 0.33,
"grad_norm": 68.06228637695312,
"learning_rate": 4.448529411764706e-05,
"loss": 0.6034,
"step": 90
},
{
"epoch": 0.37,
"grad_norm": 15.797639846801758,
"learning_rate": 4.387254901960784e-05,
"loss": 0.6082,
"step": 100
},
{
"epoch": 0.37,
"eval_accuracy": 0.7472426470588235,
"eval_f1_macro": 0.7123361820896584,
"eval_f1_micro": 0.7472426470588235,
"eval_loss": 0.5489143133163452,
"eval_runtime": 13.5318,
"eval_samples_per_second": 80.403,
"eval_steps_per_second": 2.513,
"step": 100
},
{
"epoch": 0.4,
"grad_norm": 45.799476623535156,
"learning_rate": 4.325980392156863e-05,
"loss": 0.5511,
"step": 110
},
{
"epoch": 0.44,
"grad_norm": 49.33269500732422,
"learning_rate": 4.2647058823529415e-05,
"loss": 0.4801,
"step": 120
},
{
"epoch": 0.48,
"grad_norm": 36.33636474609375,
"learning_rate": 4.20343137254902e-05,
"loss": 0.4603,
"step": 130
},
{
"epoch": 0.51,
"grad_norm": 82.08908081054688,
"learning_rate": 4.142156862745099e-05,
"loss": 0.5596,
"step": 140
},
{
"epoch": 0.55,
"grad_norm": 13.81618595123291,
"learning_rate": 4.0808823529411765e-05,
"loss": 0.4305,
"step": 150
},
{
"epoch": 0.55,
"eval_accuracy": 0.7251838235294118,
"eval_f1_macro": 0.5776786815440837,
"eval_f1_micro": 0.7251838235294118,
"eval_loss": 0.5571720004081726,
"eval_runtime": 13.5624,
"eval_samples_per_second": 80.222,
"eval_steps_per_second": 2.507,
"step": 150
},
{
"epoch": 0.59,
"grad_norm": 57.01852035522461,
"learning_rate": 4.0196078431372555e-05,
"loss": 0.5056,
"step": 160
},
{
"epoch": 0.62,
"grad_norm": 53.905147552490234,
"learning_rate": 3.958333333333333e-05,
"loss": 0.567,
"step": 170
},
{
"epoch": 0.66,
"grad_norm": 47.106292724609375,
"learning_rate": 3.897058823529412e-05,
"loss": 0.5069,
"step": 180
},
{
"epoch": 0.7,
"grad_norm": 8.995195388793945,
"learning_rate": 3.8357843137254904e-05,
"loss": 0.5449,
"step": 190
},
{
"epoch": 0.74,
"grad_norm": 79.07156372070312,
"learning_rate": 3.774509803921569e-05,
"loss": 0.5021,
"step": 200
},
{
"epoch": 0.74,
"eval_accuracy": 0.7720588235294118,
"eval_f1_macro": 0.7436838605490643,
"eval_f1_micro": 0.7720588235294118,
"eval_loss": 0.49997127056121826,
"eval_runtime": 13.6431,
"eval_samples_per_second": 79.747,
"eval_steps_per_second": 2.492,
"step": 200
},
{
"epoch": 0.77,
"grad_norm": 15.758883476257324,
"learning_rate": 3.713235294117647e-05,
"loss": 0.5018,
"step": 210
},
{
"epoch": 0.81,
"grad_norm": 183.47061157226562,
"learning_rate": 3.6519607843137254e-05,
"loss": 0.616,
"step": 220
},
{
"epoch": 0.85,
"grad_norm": 48.367374420166016,
"learning_rate": 3.5906862745098044e-05,
"loss": 0.4927,
"step": 230
},
{
"epoch": 0.88,
"grad_norm": 5.8350114822387695,
"learning_rate": 3.529411764705883e-05,
"loss": 0.4508,
"step": 240
},
{
"epoch": 0.92,
"grad_norm": 15.554094314575195,
"learning_rate": 3.468137254901961e-05,
"loss": 0.4715,
"step": 250
},
{
"epoch": 0.92,
"eval_accuracy": 0.7766544117647058,
"eval_f1_macro": 0.7450627015924902,
"eval_f1_micro": 0.7766544117647058,
"eval_loss": 0.4901912808418274,
"eval_runtime": 13.5595,
"eval_samples_per_second": 80.239,
"eval_steps_per_second": 2.507,
"step": 250
},
{
"epoch": 0.96,
"grad_norm": 37.8280029296875,
"learning_rate": 3.4068627450980394e-05,
"loss": 0.4188,
"step": 260
},
{
"epoch": 0.99,
"grad_norm": 20.624736785888672,
"learning_rate": 3.345588235294118e-05,
"loss": 0.5049,
"step": 270
},
{
"epoch": 1.03,
"grad_norm": 60.454341888427734,
"learning_rate": 3.284313725490196e-05,
"loss": 0.4536,
"step": 280
},
{
"epoch": 1.07,
"grad_norm": 53.48725509643555,
"learning_rate": 3.223039215686275e-05,
"loss": 0.4097,
"step": 290
},
{
"epoch": 1.1,
"grad_norm": 18.42328453063965,
"learning_rate": 3.161764705882353e-05,
"loss": 0.3937,
"step": 300
},
{
"epoch": 1.1,
"eval_accuracy": 0.7601102941176471,
"eval_f1_macro": 0.7018342410563818,
"eval_f1_micro": 0.7601102941176471,
"eval_loss": 0.5194450616836548,
"eval_runtime": 13.566,
"eval_samples_per_second": 80.2,
"eval_steps_per_second": 2.506,
"step": 300
},
{
"epoch": 1.14,
"grad_norm": 42.46508026123047,
"learning_rate": 3.100490196078432e-05,
"loss": 0.4045,
"step": 310
},
{
"epoch": 1.18,
"grad_norm": 65.97752380371094,
"learning_rate": 3.0392156862745097e-05,
"loss": 0.3829,
"step": 320
},
{
"epoch": 1.21,
"grad_norm": 4.754347801208496,
"learning_rate": 2.9779411764705883e-05,
"loss": 0.4535,
"step": 330
},
{
"epoch": 1.25,
"grad_norm": 86.20097351074219,
"learning_rate": 2.916666666666667e-05,
"loss": 0.4082,
"step": 340
},
{
"epoch": 1.29,
"grad_norm": 17.790315628051758,
"learning_rate": 2.855392156862745e-05,
"loss": 0.4219,
"step": 350
},
{
"epoch": 1.29,
"eval_accuracy": 0.7665441176470589,
"eval_f1_macro": 0.7228498074454428,
"eval_f1_micro": 0.7665441176470589,
"eval_loss": 0.5227769017219543,
"eval_runtime": 13.5702,
"eval_samples_per_second": 80.176,
"eval_steps_per_second": 2.505,
"step": 350
},
{
"epoch": 1.32,
"grad_norm": 47.08971405029297,
"learning_rate": 2.7941176470588236e-05,
"loss": 0.4235,
"step": 360
},
{
"epoch": 1.36,
"grad_norm": 13.4403715133667,
"learning_rate": 2.732843137254902e-05,
"loss": 0.3631,
"step": 370
},
{
"epoch": 1.4,
"grad_norm": 50.05192184448242,
"learning_rate": 2.6715686274509806e-05,
"loss": 0.5085,
"step": 380
},
{
"epoch": 1.43,
"grad_norm": 97.1346206665039,
"learning_rate": 2.6102941176470593e-05,
"loss": 0.4432,
"step": 390
},
{
"epoch": 1.47,
"grad_norm": 52.068641662597656,
"learning_rate": 2.5490196078431373e-05,
"loss": 0.4315,
"step": 400
},
{
"epoch": 1.47,
"eval_accuracy": 0.7555147058823529,
"eval_f1_macro": 0.6900563751949143,
"eval_f1_micro": 0.7555147058823529,
"eval_loss": 0.5791015625,
"eval_runtime": 13.6465,
"eval_samples_per_second": 79.728,
"eval_steps_per_second": 2.491,
"step": 400
},
{
"epoch": 1.51,
"grad_norm": 4.731142997741699,
"learning_rate": 2.487745098039216e-05,
"loss": 0.4396,
"step": 410
},
{
"epoch": 1.54,
"grad_norm": 22.810226440429688,
"learning_rate": 2.4264705882352942e-05,
"loss": 0.4104,
"step": 420
},
{
"epoch": 1.58,
"grad_norm": 14.011224746704102,
"learning_rate": 2.3651960784313726e-05,
"loss": 0.3847,
"step": 430
},
{
"epoch": 1.62,
"grad_norm": 27.048315048217773,
"learning_rate": 2.303921568627451e-05,
"loss": 0.3681,
"step": 440
},
{
"epoch": 1.65,
"grad_norm": 6.750571250915527,
"learning_rate": 2.2426470588235296e-05,
"loss": 0.4134,
"step": 450
},
{
"epoch": 1.65,
"eval_accuracy": 0.7389705882352942,
"eval_f1_macro": 0.719594754017431,
"eval_f1_micro": 0.7389705882352942,
"eval_loss": 0.6182358860969543,
"eval_runtime": 13.5558,
"eval_samples_per_second": 80.261,
"eval_steps_per_second": 2.508,
"step": 450
},
{
"epoch": 1.69,
"grad_norm": 51.78306579589844,
"learning_rate": 2.181372549019608e-05,
"loss": 0.4691,
"step": 460
},
{
"epoch": 1.73,
"grad_norm": 56.603797912597656,
"learning_rate": 2.1200980392156862e-05,
"loss": 0.4194,
"step": 470
},
{
"epoch": 1.76,
"grad_norm": 24.38219451904297,
"learning_rate": 2.058823529411765e-05,
"loss": 0.3631,
"step": 480
},
{
"epoch": 1.8,
"grad_norm": 44.108612060546875,
"learning_rate": 1.9975490196078432e-05,
"loss": 0.3859,
"step": 490
},
{
"epoch": 1.84,
"grad_norm": 68.38048553466797,
"learning_rate": 1.936274509803922e-05,
"loss": 0.4173,
"step": 500
},
{
"epoch": 1.84,
"eval_accuracy": 0.7637867647058824,
"eval_f1_macro": 0.7115502256608639,
"eval_f1_micro": 0.7637867647058824,
"eval_loss": 0.5453814268112183,
"eval_runtime": 13.5612,
"eval_samples_per_second": 80.229,
"eval_steps_per_second": 2.507,
"step": 500
},
{
"epoch": 1.88,
"grad_norm": 18.69806480407715,
"learning_rate": 1.8750000000000002e-05,
"loss": 0.4323,
"step": 510
},
{
"epoch": 1.91,
"grad_norm": 15.339709281921387,
"learning_rate": 1.8137254901960785e-05,
"loss": 0.4529,
"step": 520
},
{
"epoch": 1.95,
"grad_norm": 85.60708618164062,
"learning_rate": 1.7524509803921568e-05,
"loss": 0.4462,
"step": 530
},
{
"epoch": 1.99,
"grad_norm": 42.06242752075195,
"learning_rate": 1.6911764705882355e-05,
"loss": 0.3753,
"step": 540
},
{
"epoch": 2.02,
"grad_norm": 43.271724700927734,
"learning_rate": 1.6299019607843138e-05,
"loss": 0.3278,
"step": 550
},
{
"epoch": 2.02,
"eval_accuracy": 0.7720588235294118,
"eval_f1_macro": 0.7219169329073483,
"eval_f1_micro": 0.7720588235294118,
"eval_loss": 0.5476648807525635,
"eval_runtime": 13.5763,
"eval_samples_per_second": 80.14,
"eval_steps_per_second": 2.504,
"step": 550
},
{
"epoch": 2.06,
"grad_norm": 63.24125289916992,
"learning_rate": 1.568627450980392e-05,
"loss": 0.3151,
"step": 560
},
{
"epoch": 2.1,
"grad_norm": 18.521787643432617,
"learning_rate": 1.5073529411764706e-05,
"loss": 0.2909,
"step": 570
},
{
"epoch": 2.13,
"grad_norm": 21.641969680786133,
"learning_rate": 1.4460784313725493e-05,
"loss": 0.2893,
"step": 580
},
{
"epoch": 2.17,
"grad_norm": 26.12430763244629,
"learning_rate": 1.3848039215686276e-05,
"loss": 0.2642,
"step": 590
},
{
"epoch": 2.21,
"grad_norm": 5.281397819519043,
"learning_rate": 1.323529411764706e-05,
"loss": 0.2641,
"step": 600
},
{
"epoch": 2.21,
"eval_accuracy": 0.7527573529411765,
"eval_f1_macro": 0.7152217678178568,
"eval_f1_micro": 0.7527573529411765,
"eval_loss": 0.6011173129081726,
"eval_runtime": 13.6441,
"eval_samples_per_second": 79.741,
"eval_steps_per_second": 2.492,
"step": 600
},
{
"epoch": 2.24,
"grad_norm": 32.14168167114258,
"learning_rate": 1.2622549019607843e-05,
"loss": 0.2578,
"step": 610
},
{
"epoch": 2.28,
"grad_norm": 8.231173515319824,
"learning_rate": 1.200980392156863e-05,
"loss": 0.2012,
"step": 620
},
{
"epoch": 2.32,
"grad_norm": 46.39328384399414,
"learning_rate": 1.1397058823529412e-05,
"loss": 0.2582,
"step": 630
},
{
"epoch": 2.35,
"grad_norm": 52.687957763671875,
"learning_rate": 1.0784313725490197e-05,
"loss": 0.289,
"step": 640
},
{
"epoch": 2.39,
"grad_norm": 22.3091983795166,
"learning_rate": 1.017156862745098e-05,
"loss": 0.2256,
"step": 650
},
{
"epoch": 2.39,
"eval_accuracy": 0.7601102941176471,
"eval_f1_macro": 0.6962074067417461,
"eval_f1_micro": 0.7601102941176471,
"eval_loss": 0.6484518647193909,
"eval_runtime": 13.5588,
"eval_samples_per_second": 80.243,
"eval_steps_per_second": 2.508,
"step": 650
},
{
"epoch": 2.43,
"grad_norm": 16.085580825805664,
"learning_rate": 9.558823529411764e-06,
"loss": 0.2501,
"step": 660
},
{
"epoch": 2.46,
"grad_norm": 20.8741455078125,
"learning_rate": 8.946078431372549e-06,
"loss": 0.3018,
"step": 670
},
{
"epoch": 2.5,
"grad_norm": 25.310302734375,
"learning_rate": 8.333333333333334e-06,
"loss": 0.2451,
"step": 680
},
{
"epoch": 2.54,
"grad_norm": 17.985490798950195,
"learning_rate": 7.720588235294119e-06,
"loss": 0.232,
"step": 690
},
{
"epoch": 2.57,
"grad_norm": 7.154005527496338,
"learning_rate": 7.107843137254902e-06,
"loss": 0.2544,
"step": 700
},
{
"epoch": 2.57,
"eval_accuracy": 0.7628676470588235,
"eval_f1_macro": 0.7165018421562924,
"eval_f1_micro": 0.7628676470588235,
"eval_loss": 0.6459027528762817,
"eval_runtime": 13.5807,
"eval_samples_per_second": 80.114,
"eval_steps_per_second": 2.504,
"step": 700
},
{
"epoch": 2.61,
"grad_norm": 9.307502746582031,
"learning_rate": 6.495098039215687e-06,
"loss": 0.3067,
"step": 710
},
{
"epoch": 2.65,
"grad_norm": 9.009349822998047,
"learning_rate": 5.882352941176471e-06,
"loss": 0.2225,
"step": 720
},
{
"epoch": 2.68,
"grad_norm": 14.490361213684082,
"learning_rate": 5.269607843137255e-06,
"loss": 0.2576,
"step": 730
},
{
"epoch": 2.72,
"grad_norm": 24.25351333618164,
"learning_rate": 4.65686274509804e-06,
"loss": 0.2805,
"step": 740
},
{
"epoch": 2.76,
"grad_norm": 14.948960304260254,
"learning_rate": 4.044117647058824e-06,
"loss": 0.2839,
"step": 750
},
{
"epoch": 2.76,
"eval_accuracy": 0.765625,
"eval_f1_macro": 0.7252674888969208,
"eval_f1_micro": 0.765625,
"eval_loss": 0.5921774506568909,
"eval_runtime": 13.5697,
"eval_samples_per_second": 80.178,
"eval_steps_per_second": 2.506,
"step": 750
},
{
"epoch": 2.79,
"grad_norm": 43.209163665771484,
"learning_rate": 3.431372549019608e-06,
"loss": 0.2235,
"step": 760
},
{
"epoch": 2.83,
"grad_norm": 7.3651604652404785,
"learning_rate": 2.818627450980392e-06,
"loss": 0.2487,
"step": 770
},
{
"epoch": 2.87,
"grad_norm": 21.250938415527344,
"learning_rate": 2.2058823529411767e-06,
"loss": 0.2038,
"step": 780
},
{
"epoch": 2.9,
"grad_norm": 21.40656852722168,
"learning_rate": 1.5931372549019608e-06,
"loss": 0.27,
"step": 790
},
{
"epoch": 2.94,
"grad_norm": 32.97100830078125,
"learning_rate": 9.80392156862745e-07,
"loss": 0.2634,
"step": 800
},
{
"epoch": 2.94,
"eval_accuracy": 0.7637867647058824,
"eval_f1_macro": 0.7075804081718023,
"eval_f1_micro": 0.7637867647058824,
"eval_loss": 0.6311897039413452,
"eval_runtime": 13.5748,
"eval_samples_per_second": 80.148,
"eval_steps_per_second": 2.505,
"step": 800
},
{
"epoch": 2.98,
"grad_norm": 8.623468399047852,
"learning_rate": 3.6764705882352943e-07,
"loss": 0.2572,
"step": 810
},
{
"epoch": 3.0,
"step": 816,
"total_flos": 1.3629706293215232e+17,
"train_loss": 0.4493609409706265,
"train_runtime": 1238.3624,
"train_samples_per_second": 21.076,
"train_steps_per_second": 0.659
}
],
"logging_steps": 10,
"max_steps": 816,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 50,
"total_flos": 1.3629706293215232e+17,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}