akkky02's picture
Upload folder using huggingface_hub
8f76322 verified
raw
history blame
No virus
11.2 kB
{
"best_metric": 2.0716516971588135,
"best_model_checkpoint": "../../experiments_checkpoints/MAdAiLab/microsoft/phi_2_scotus/checkpoint-400",
"epoch": 3.0,
"eval_steps": 50,
"global_step": 471,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.06,
"grad_norm": 431.59503173828125,
"learning_rate": 4.893842887473461e-06,
"loss": 3.1594,
"step": 10
},
{
"epoch": 0.13,
"grad_norm": 229.4266357421875,
"learning_rate": 4.787685774946922e-06,
"loss": 2.5938,
"step": 20
},
{
"epoch": 0.19,
"grad_norm": 349.4831848144531,
"learning_rate": 4.6815286624203824e-06,
"loss": 2.5949,
"step": 30
},
{
"epoch": 0.25,
"grad_norm": 381.1759033203125,
"learning_rate": 4.575371549893844e-06,
"loss": 2.4016,
"step": 40
},
{
"epoch": 0.32,
"grad_norm": 266.1092529296875,
"learning_rate": 4.469214437367304e-06,
"loss": 2.5187,
"step": 50
},
{
"epoch": 0.32,
"eval_accuracy": 0.21714285714285714,
"eval_f1_macro": 0.09407480701856562,
"eval_f1_micro": 0.21714285714285714,
"eval_loss": 2.465625047683716,
"eval_runtime": 6.6333,
"eval_samples_per_second": 211.056,
"eval_steps_per_second": 6.633,
"step": 50
},
{
"epoch": 0.38,
"grad_norm": 364.63336181640625,
"learning_rate": 4.3630573248407645e-06,
"loss": 2.3957,
"step": 60
},
{
"epoch": 0.45,
"grad_norm": 492.18695068359375,
"learning_rate": 4.256900212314226e-06,
"loss": 2.3813,
"step": 70
},
{
"epoch": 0.51,
"grad_norm": 341.39208984375,
"learning_rate": 4.150743099787686e-06,
"loss": 2.4027,
"step": 80
},
{
"epoch": 0.57,
"grad_norm": 281.28106689453125,
"learning_rate": 4.044585987261147e-06,
"loss": 2.2816,
"step": 90
},
{
"epoch": 0.64,
"grad_norm": 750.2114868164062,
"learning_rate": 3.938428874734608e-06,
"loss": 2.2348,
"step": 100
},
{
"epoch": 0.64,
"eval_accuracy": 0.25785714285714284,
"eval_f1_macro": 0.09797406214322793,
"eval_f1_micro": 0.25785714285714284,
"eval_loss": 2.315580368041992,
"eval_runtime": 6.6546,
"eval_samples_per_second": 210.38,
"eval_steps_per_second": 6.612,
"step": 100
},
{
"epoch": 0.7,
"grad_norm": 516.4442749023438,
"learning_rate": 3.832271762208068e-06,
"loss": 2.227,
"step": 110
},
{
"epoch": 0.76,
"grad_norm": 475.4119567871094,
"learning_rate": 3.7261146496815285e-06,
"loss": 2.2816,
"step": 120
},
{
"epoch": 0.83,
"grad_norm": 450.1903076171875,
"learning_rate": 3.6199575371549893e-06,
"loss": 2.0625,
"step": 130
},
{
"epoch": 0.89,
"grad_norm": 303.1587219238281,
"learning_rate": 3.51380042462845e-06,
"loss": 2.0875,
"step": 140
},
{
"epoch": 0.96,
"grad_norm": 450.5704345703125,
"learning_rate": 3.407643312101911e-06,
"loss": 2.2023,
"step": 150
},
{
"epoch": 0.96,
"eval_accuracy": 0.2914285714285714,
"eval_f1_macro": 0.11026826954041259,
"eval_f1_micro": 0.2914285714285714,
"eval_loss": 2.2223215103149414,
"eval_runtime": 6.6626,
"eval_samples_per_second": 210.129,
"eval_steps_per_second": 6.604,
"step": 150
},
{
"epoch": 1.02,
"grad_norm": 471.473876953125,
"learning_rate": 3.3014861995753718e-06,
"loss": 2.1367,
"step": 160
},
{
"epoch": 1.08,
"grad_norm": 415.1793212890625,
"learning_rate": 3.195329087048832e-06,
"loss": 2.0531,
"step": 170
},
{
"epoch": 1.15,
"grad_norm": 258.8355712890625,
"learning_rate": 3.089171974522293e-06,
"loss": 2.0867,
"step": 180
},
{
"epoch": 1.21,
"grad_norm": 515.2882080078125,
"learning_rate": 2.983014861995754e-06,
"loss": 2.1055,
"step": 190
},
{
"epoch": 1.27,
"grad_norm": 467.22967529296875,
"learning_rate": 2.8768577494692146e-06,
"loss": 2.1145,
"step": 200
},
{
"epoch": 1.27,
"eval_accuracy": 0.30642857142857144,
"eval_f1_macro": 0.11391156543615343,
"eval_f1_micro": 0.30642857142857144,
"eval_loss": 2.179955244064331,
"eval_runtime": 6.6599,
"eval_samples_per_second": 210.212,
"eval_steps_per_second": 6.607,
"step": 200
},
{
"epoch": 1.34,
"grad_norm": 663.5429077148438,
"learning_rate": 2.7707006369426754e-06,
"loss": 2.141,
"step": 210
},
{
"epoch": 1.4,
"grad_norm": 476.073486328125,
"learning_rate": 2.6645435244161363e-06,
"loss": 1.9898,
"step": 220
},
{
"epoch": 1.46,
"grad_norm": 424.5915222167969,
"learning_rate": 2.5583864118895966e-06,
"loss": 2.0258,
"step": 230
},
{
"epoch": 1.53,
"grad_norm": 548.7160034179688,
"learning_rate": 2.4522292993630575e-06,
"loss": 1.9828,
"step": 240
},
{
"epoch": 1.59,
"grad_norm": 205.3092041015625,
"learning_rate": 2.3460721868365183e-06,
"loss": 1.993,
"step": 250
},
{
"epoch": 1.59,
"eval_accuracy": 0.31785714285714284,
"eval_f1_macro": 0.12540622823012645,
"eval_f1_micro": 0.31785714285714284,
"eval_loss": 2.135892868041992,
"eval_runtime": 6.6619,
"eval_samples_per_second": 210.149,
"eval_steps_per_second": 6.605,
"step": 250
},
{
"epoch": 1.66,
"grad_norm": 362.8172912597656,
"learning_rate": 2.239915074309979e-06,
"loss": 2.0078,
"step": 260
},
{
"epoch": 1.72,
"grad_norm": 251.77694702148438,
"learning_rate": 2.13375796178344e-06,
"loss": 2.0547,
"step": 270
},
{
"epoch": 1.78,
"grad_norm": 262.9300537109375,
"learning_rate": 2.0276008492569003e-06,
"loss": 1.9664,
"step": 280
},
{
"epoch": 1.85,
"grad_norm": 256.0235900878906,
"learning_rate": 1.921443736730361e-06,
"loss": 2.025,
"step": 290
},
{
"epoch": 1.91,
"grad_norm": 284.8929443359375,
"learning_rate": 1.8152866242038217e-06,
"loss": 1.9609,
"step": 300
},
{
"epoch": 1.91,
"eval_accuracy": 0.3457142857142857,
"eval_f1_macro": 0.1344938590991505,
"eval_f1_micro": 0.3457142857142857,
"eval_loss": 2.1033928394317627,
"eval_runtime": 6.6623,
"eval_samples_per_second": 210.137,
"eval_steps_per_second": 6.604,
"step": 300
},
{
"epoch": 1.97,
"grad_norm": 274.1907043457031,
"learning_rate": 1.7091295116772823e-06,
"loss": 2.05,
"step": 310
},
{
"epoch": 2.04,
"grad_norm": 187.34756469726562,
"learning_rate": 1.6029723991507432e-06,
"loss": 1.9762,
"step": 320
},
{
"epoch": 2.1,
"grad_norm": 178.41671752929688,
"learning_rate": 1.496815286624204e-06,
"loss": 1.9336,
"step": 330
},
{
"epoch": 2.17,
"grad_norm": 246.0443115234375,
"learning_rate": 1.3906581740976646e-06,
"loss": 2.0461,
"step": 340
},
{
"epoch": 2.23,
"grad_norm": 277.84808349609375,
"learning_rate": 1.2845010615711254e-06,
"loss": 2.0137,
"step": 350
},
{
"epoch": 2.23,
"eval_accuracy": 0.35642857142857143,
"eval_f1_macro": 0.13816564544838333,
"eval_f1_micro": 0.35642857142857143,
"eval_loss": 2.1008036136627197,
"eval_runtime": 6.8511,
"eval_samples_per_second": 204.346,
"eval_steps_per_second": 6.422,
"step": 350
},
{
"epoch": 2.29,
"grad_norm": 272.0749816894531,
"learning_rate": 1.178343949044586e-06,
"loss": 1.9598,
"step": 360
},
{
"epoch": 2.36,
"grad_norm": 244.73826599121094,
"learning_rate": 1.0721868365180468e-06,
"loss": 1.9484,
"step": 370
},
{
"epoch": 2.42,
"grad_norm": 279.2901916503906,
"learning_rate": 9.660297239915076e-07,
"loss": 1.9512,
"step": 380
},
{
"epoch": 2.48,
"grad_norm": 230.9502410888672,
"learning_rate": 8.598726114649681e-07,
"loss": 1.968,
"step": 390
},
{
"epoch": 2.55,
"grad_norm": 266.0552673339844,
"learning_rate": 7.537154989384289e-07,
"loss": 1.9418,
"step": 400
},
{
"epoch": 2.55,
"eval_accuracy": 0.3557142857142857,
"eval_f1_macro": 0.135882859734301,
"eval_f1_micro": 0.3557142857142857,
"eval_loss": 2.0716516971588135,
"eval_runtime": 6.6647,
"eval_samples_per_second": 210.062,
"eval_steps_per_second": 6.602,
"step": 400
},
{
"epoch": 2.61,
"grad_norm": 235.61495971679688,
"learning_rate": 6.475583864118897e-07,
"loss": 1.977,
"step": 410
},
{
"epoch": 2.68,
"grad_norm": 299.27386474609375,
"learning_rate": 5.414012738853504e-07,
"loss": 2.0035,
"step": 420
},
{
"epoch": 2.74,
"grad_norm": 204.36082458496094,
"learning_rate": 4.35244161358811e-07,
"loss": 1.9215,
"step": 430
},
{
"epoch": 2.8,
"grad_norm": 285.29052734375,
"learning_rate": 3.290870488322718e-07,
"loss": 1.8539,
"step": 440
},
{
"epoch": 2.87,
"grad_norm": 301.00225830078125,
"learning_rate": 2.229299363057325e-07,
"loss": 1.8887,
"step": 450
},
{
"epoch": 2.87,
"eval_accuracy": 0.36357142857142855,
"eval_f1_macro": 0.14420660697547488,
"eval_f1_micro": 0.36357142857142855,
"eval_loss": 2.0744197368621826,
"eval_runtime": 6.6737,
"eval_samples_per_second": 209.778,
"eval_steps_per_second": 6.593,
"step": 450
},
{
"epoch": 2.93,
"grad_norm": 201.05862426757812,
"learning_rate": 1.1677282377919321e-07,
"loss": 1.9344,
"step": 460
},
{
"epoch": 2.99,
"grad_norm": 656.1539306640625,
"learning_rate": 1.0615711252653928e-08,
"loss": 2.0457,
"step": 470
},
{
"epoch": 3.0,
"step": 471,
"total_flos": 2.914106259328205e+16,
"train_loss": 2.1244609209129512,
"train_runtime": 925.7508,
"train_samples_per_second": 16.203,
"train_steps_per_second": 0.509
}
],
"logging_steps": 10,
"max_steps": 471,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 50,
"total_flos": 2.914106259328205e+16,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}