|
{ |
|
"best_metric": 0.4315283000469208, |
|
"best_model_checkpoint": "xblock-large-patch2-224/checkpoint-5181", |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 5181, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 19.092004776000977, |
|
"learning_rate": 2.2157996146435453e-06, |
|
"loss": 2.5421, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 11.274575233459473, |
|
"learning_rate": 4.624277456647399e-06, |
|
"loss": 1.9399, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 10.260701179504395, |
|
"learning_rate": 7.032755298651253e-06, |
|
"loss": 1.5011, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 9.719381332397461, |
|
"learning_rate": 9.441233140655107e-06, |
|
"loss": 1.3912, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 10.234914779663086, |
|
"learning_rate": 1.184971098265896e-05, |
|
"loss": 1.2166, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 13.722230911254883, |
|
"learning_rate": 1.4258188824662813e-05, |
|
"loss": 1.1751, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 17.890995025634766, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 1.1862, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 13.72986125946045, |
|
"learning_rate": 1.907514450867052e-05, |
|
"loss": 0.8828, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 12.903642654418945, |
|
"learning_rate": 2.1483622350674377e-05, |
|
"loss": 0.9541, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 8.937955856323242, |
|
"learning_rate": 2.3892100192678228e-05, |
|
"loss": 0.9032, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 10.743949890136719, |
|
"learning_rate": 2.6300578034682083e-05, |
|
"loss": 1.0891, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 9.766569137573242, |
|
"learning_rate": 2.8709055876685937e-05, |
|
"loss": 0.8951, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 10.973127365112305, |
|
"learning_rate": 3.111753371868979e-05, |
|
"loss": 1.0709, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 13.805391311645508, |
|
"learning_rate": 3.352601156069364e-05, |
|
"loss": 1.1141, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 7.355842113494873, |
|
"learning_rate": 3.59344894026975e-05, |
|
"loss": 1.0031, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 10.688377380371094, |
|
"learning_rate": 3.834296724470135e-05, |
|
"loss": 1.0344, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 8.896137237548828, |
|
"learning_rate": 4.07514450867052e-05, |
|
"loss": 1.0991, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 11.027874946594238, |
|
"learning_rate": 4.3159922928709055e-05, |
|
"loss": 1.0221, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 27.792613983154297, |
|
"learning_rate": 4.556840077071291e-05, |
|
"loss": 0.8256, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 8.005478858947754, |
|
"learning_rate": 4.7976878612716764e-05, |
|
"loss": 0.9862, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 11.962843894958496, |
|
"learning_rate": 4.995709995709996e-05, |
|
"loss": 1.0462, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 7.288177013397217, |
|
"learning_rate": 4.9688974688974696e-05, |
|
"loss": 0.9999, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 3.769800901412964, |
|
"learning_rate": 4.9420849420849425e-05, |
|
"loss": 1.1262, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 8.510008811950684, |
|
"learning_rate": 4.9152724152724154e-05, |
|
"loss": 0.944, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 8.264263153076172, |
|
"learning_rate": 4.888459888459889e-05, |
|
"loss": 0.9652, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 10.604584693908691, |
|
"learning_rate": 4.861647361647362e-05, |
|
"loss": 0.9246, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 5.483927249908447, |
|
"learning_rate": 4.834834834834835e-05, |
|
"loss": 1.0557, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 10.221104621887207, |
|
"learning_rate": 4.808022308022308e-05, |
|
"loss": 0.7906, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 5.725340843200684, |
|
"learning_rate": 4.781209781209782e-05, |
|
"loss": 0.9, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 7.057939052581787, |
|
"learning_rate": 4.754397254397255e-05, |
|
"loss": 0.8744, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 9.354517936706543, |
|
"learning_rate": 4.727584727584728e-05, |
|
"loss": 1.0611, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 4.2830491065979, |
|
"learning_rate": 4.700772200772201e-05, |
|
"loss": 0.8894, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 10.702705383300781, |
|
"learning_rate": 4.673959673959674e-05, |
|
"loss": 1.0246, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 4.7863359451293945, |
|
"learning_rate": 4.647147147147147e-05, |
|
"loss": 0.7967, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 7.841278553009033, |
|
"learning_rate": 4.62033462033462e-05, |
|
"loss": 0.8616, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 6.256266117095947, |
|
"learning_rate": 4.593522093522094e-05, |
|
"loss": 0.9735, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 6.177362442016602, |
|
"learning_rate": 4.566709566709567e-05, |
|
"loss": 0.8287, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 6.448288917541504, |
|
"learning_rate": 4.53989703989704e-05, |
|
"loss": 1.0062, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 7.912018299102783, |
|
"learning_rate": 4.513084513084513e-05, |
|
"loss": 1.0174, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 9.055561065673828, |
|
"learning_rate": 4.486271986271987e-05, |
|
"loss": 0.9616, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 7.433628559112549, |
|
"learning_rate": 4.4594594594594596e-05, |
|
"loss": 0.9309, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 3.5334954261779785, |
|
"learning_rate": 4.4326469326469325e-05, |
|
"loss": 0.807, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 5.620259761810303, |
|
"learning_rate": 4.405834405834406e-05, |
|
"loss": 0.8042, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 7.67726469039917, |
|
"learning_rate": 4.379021879021879e-05, |
|
"loss": 0.7394, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 9.430630683898926, |
|
"learning_rate": 4.3522093522093526e-05, |
|
"loss": 0.7895, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 9.241034507751465, |
|
"learning_rate": 4.3253968253968256e-05, |
|
"loss": 0.8032, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 7.471988201141357, |
|
"learning_rate": 4.298584298584299e-05, |
|
"loss": 0.7669, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 3.161353588104248, |
|
"learning_rate": 4.271771771771772e-05, |
|
"loss": 0.8795, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 6.6813201904296875, |
|
"learning_rate": 4.244959244959245e-05, |
|
"loss": 0.819, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 16.08786392211914, |
|
"learning_rate": 4.2181467181467186e-05, |
|
"loss": 0.8779, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 3.753849506378174, |
|
"learning_rate": 4.1913341913341915e-05, |
|
"loss": 0.9255, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 5.4661431312561035, |
|
"learning_rate": 4.1645216645216644e-05, |
|
"loss": 0.8028, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 6.559650897979736, |
|
"learning_rate": 4.137709137709138e-05, |
|
"loss": 0.7556, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 5.7179341316223145, |
|
"learning_rate": 4.1108966108966116e-05, |
|
"loss": 0.7076, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 12.687734603881836, |
|
"learning_rate": 4.0840840840840845e-05, |
|
"loss": 0.8583, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 5.1677398681640625, |
|
"learning_rate": 4.0572715572715574e-05, |
|
"loss": 0.8944, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 11.766656875610352, |
|
"learning_rate": 4.03045903045903e-05, |
|
"loss": 0.5638, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 4.12522554397583, |
|
"learning_rate": 4.003646503646504e-05, |
|
"loss": 0.6883, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 2.586186408996582, |
|
"learning_rate": 3.976833976833977e-05, |
|
"loss": 0.8784, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 4.075995445251465, |
|
"learning_rate": 3.95002145002145e-05, |
|
"loss": 0.7596, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 12.722098350524902, |
|
"learning_rate": 3.923208923208923e-05, |
|
"loss": 0.6316, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 12.86962890625, |
|
"learning_rate": 3.896396396396397e-05, |
|
"loss": 0.7721, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 10.640520095825195, |
|
"learning_rate": 3.86958386958387e-05, |
|
"loss": 0.711, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 7.080173015594482, |
|
"learning_rate": 3.842771342771343e-05, |
|
"loss": 0.8145, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 2.814232587814331, |
|
"learning_rate": 3.815958815958816e-05, |
|
"loss": 0.8509, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 12.209416389465332, |
|
"learning_rate": 3.789146289146289e-05, |
|
"loss": 0.8037, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 3.5646421909332275, |
|
"learning_rate": 3.762333762333762e-05, |
|
"loss": 0.8347, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 8.00243854522705, |
|
"learning_rate": 3.735521235521236e-05, |
|
"loss": 0.7534, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 5.955112934112549, |
|
"learning_rate": 3.708708708708709e-05, |
|
"loss": 0.9062, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7815393518518519, |
|
"eval_f1_macro": 0.36763187534206965, |
|
"eval_f1_micro": 0.7815393518518519, |
|
"eval_f1_weighted": 0.7649698649865229, |
|
"eval_loss": 0.6848556399345398, |
|
"eval_precision_macro": 0.4527657259795588, |
|
"eval_precision_micro": 0.7815393518518519, |
|
"eval_precision_weighted": 0.7691072277887989, |
|
"eval_recall_macro": 0.3815255183458625, |
|
"eval_recall_micro": 0.7815393518518519, |
|
"eval_recall_weighted": 0.7815393518518519, |
|
"eval_runtime": 3330.8434, |
|
"eval_samples_per_second": 1.038, |
|
"eval_steps_per_second": 0.065, |
|
"step": 1727 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"grad_norm": 3.4409847259521484, |
|
"learning_rate": 3.681896181896182e-05, |
|
"loss": 0.8201, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"grad_norm": 7.564268589019775, |
|
"learning_rate": 3.655083655083655e-05, |
|
"loss": 0.548, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 10.243828773498535, |
|
"learning_rate": 3.628271128271129e-05, |
|
"loss": 0.6348, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 14.57152271270752, |
|
"learning_rate": 3.6014586014586017e-05, |
|
"loss": 0.66, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 4.4091973304748535, |
|
"learning_rate": 3.5746460746460746e-05, |
|
"loss": 0.8169, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"grad_norm": 5.423861503601074, |
|
"learning_rate": 3.547833547833548e-05, |
|
"loss": 0.7353, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 6.510718822479248, |
|
"learning_rate": 3.521021021021021e-05, |
|
"loss": 0.7008, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"grad_norm": 2.5035436153411865, |
|
"learning_rate": 3.4942084942084947e-05, |
|
"loss": 0.5975, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"grad_norm": 15.364286422729492, |
|
"learning_rate": 3.4673959673959676e-05, |
|
"loss": 0.8198, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"grad_norm": 16.03240966796875, |
|
"learning_rate": 3.440583440583441e-05, |
|
"loss": 0.7089, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"grad_norm": 8.521039962768555, |
|
"learning_rate": 3.413770913770914e-05, |
|
"loss": 0.6514, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"grad_norm": 5.206024169921875, |
|
"learning_rate": 3.386958386958387e-05, |
|
"loss": 0.7545, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"grad_norm": 7.756472110748291, |
|
"learning_rate": 3.36014586014586e-05, |
|
"loss": 0.8055, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 17.274944305419922, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 0.7212, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"grad_norm": 5.303420543670654, |
|
"learning_rate": 3.3065208065208064e-05, |
|
"loss": 0.7299, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"grad_norm": 5.07370138168335, |
|
"learning_rate": 3.27970827970828e-05, |
|
"loss": 0.5858, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"grad_norm": 6.2755937576293945, |
|
"learning_rate": 3.252895752895753e-05, |
|
"loss": 0.7687, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"grad_norm": 12.160276412963867, |
|
"learning_rate": 3.227155727155727e-05, |
|
"loss": 0.727, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"grad_norm": 3.9984447956085205, |
|
"learning_rate": 3.2003432003432e-05, |
|
"loss": 0.6697, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"grad_norm": 5.756568908691406, |
|
"learning_rate": 3.173530673530674e-05, |
|
"loss": 0.7912, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"grad_norm": 8.783411026000977, |
|
"learning_rate": 3.1467181467181466e-05, |
|
"loss": 0.7035, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"grad_norm": 5.451704502105713, |
|
"learning_rate": 3.1199056199056196e-05, |
|
"loss": 0.7144, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"grad_norm": 5.039503574371338, |
|
"learning_rate": 3.093093093093093e-05, |
|
"loss": 0.7893, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"grad_norm": 12.280179023742676, |
|
"learning_rate": 3.066280566280567e-05, |
|
"loss": 0.5903, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"grad_norm": 6.999240398406982, |
|
"learning_rate": 3.0394680394680397e-05, |
|
"loss": 0.651, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"grad_norm": 7.355953216552734, |
|
"learning_rate": 3.012655512655513e-05, |
|
"loss": 0.878, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"grad_norm": 12.737029075622559, |
|
"learning_rate": 2.9858429858429858e-05, |
|
"loss": 0.731, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 4.184784889221191, |
|
"learning_rate": 2.959030459030459e-05, |
|
"loss": 0.8775, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"grad_norm": 9.188583374023438, |
|
"learning_rate": 2.9322179322179323e-05, |
|
"loss": 0.7259, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"grad_norm": 12.98018741607666, |
|
"learning_rate": 2.906477906477907e-05, |
|
"loss": 0.6219, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"grad_norm": 12.084989547729492, |
|
"learning_rate": 2.87966537966538e-05, |
|
"loss": 0.5074, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"grad_norm": 5.283312797546387, |
|
"learning_rate": 2.852852852852853e-05, |
|
"loss": 0.6606, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"grad_norm": 19.32860565185547, |
|
"learning_rate": 2.826040326040326e-05, |
|
"loss": 0.7651, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"grad_norm": 4.0794901847839355, |
|
"learning_rate": 2.7992277992277993e-05, |
|
"loss": 0.6737, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"grad_norm": 33.1405029296875, |
|
"learning_rate": 2.7724152724152726e-05, |
|
"loss": 0.654, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"grad_norm": 4.921344757080078, |
|
"learning_rate": 2.7456027456027455e-05, |
|
"loss": 0.7194, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"grad_norm": 5.453707695007324, |
|
"learning_rate": 2.7187902187902187e-05, |
|
"loss": 0.6851, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"grad_norm": 8.766169548034668, |
|
"learning_rate": 2.6919776919776923e-05, |
|
"loss": 0.8228, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"grad_norm": 8.957389831542969, |
|
"learning_rate": 2.6651651651651656e-05, |
|
"loss": 0.6645, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"grad_norm": 5.715158939361572, |
|
"learning_rate": 2.6383526383526385e-05, |
|
"loss": 0.6905, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"grad_norm": 6.306962490081787, |
|
"learning_rate": 2.6115401115401117e-05, |
|
"loss": 0.6924, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"grad_norm": 8.548517227172852, |
|
"learning_rate": 2.5847275847275846e-05, |
|
"loss": 0.8402, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"grad_norm": 7.5719895362854, |
|
"learning_rate": 2.557915057915058e-05, |
|
"loss": 0.7758, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"grad_norm": 8.910326957702637, |
|
"learning_rate": 2.531102531102531e-05, |
|
"loss": 0.5645, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"grad_norm": 8.304277420043945, |
|
"learning_rate": 2.504290004290004e-05, |
|
"loss": 0.7066, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"grad_norm": 13.99254035949707, |
|
"learning_rate": 2.4774774774774777e-05, |
|
"loss": 0.7396, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"grad_norm": 3.806931257247925, |
|
"learning_rate": 2.4506649506649506e-05, |
|
"loss": 0.6334, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"grad_norm": 6.550988674163818, |
|
"learning_rate": 2.423852423852424e-05, |
|
"loss": 0.9251, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"grad_norm": 3.1442198753356934, |
|
"learning_rate": 2.397039897039897e-05, |
|
"loss": 0.546, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"grad_norm": 5.249305248260498, |
|
"learning_rate": 2.3702273702273703e-05, |
|
"loss": 0.6419, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"grad_norm": 5.300810813903809, |
|
"learning_rate": 2.3434148434148436e-05, |
|
"loss": 0.535, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"grad_norm": 8.079426765441895, |
|
"learning_rate": 2.3166023166023168e-05, |
|
"loss": 0.8142, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"grad_norm": 6.737719535827637, |
|
"learning_rate": 2.28978978978979e-05, |
|
"loss": 0.5974, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"grad_norm": 5.037626266479492, |
|
"learning_rate": 2.262977262977263e-05, |
|
"loss": 0.7068, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 3.4523470401763916, |
|
"learning_rate": 2.2361647361647362e-05, |
|
"loss": 0.5756, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"grad_norm": 2.2966675758361816, |
|
"learning_rate": 2.2093522093522095e-05, |
|
"loss": 0.4941, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"grad_norm": 11.497820854187012, |
|
"learning_rate": 2.1825396825396827e-05, |
|
"loss": 0.8353, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"grad_norm": 11.813599586486816, |
|
"learning_rate": 2.1557271557271557e-05, |
|
"loss": 0.8303, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"grad_norm": 2.504293203353882, |
|
"learning_rate": 2.128914628914629e-05, |
|
"loss": 0.5574, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"grad_norm": 8.983193397521973, |
|
"learning_rate": 2.102102102102102e-05, |
|
"loss": 0.6033, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"grad_norm": 7.629824161529541, |
|
"learning_rate": 2.0752895752895754e-05, |
|
"loss": 0.6305, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"grad_norm": 10.86919116973877, |
|
"learning_rate": 2.0484770484770487e-05, |
|
"loss": 0.6045, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"grad_norm": 6.0854387283325195, |
|
"learning_rate": 2.0216645216645216e-05, |
|
"loss": 0.6208, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"grad_norm": 6.228011131286621, |
|
"learning_rate": 1.994851994851995e-05, |
|
"loss": 0.6249, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"grad_norm": 7.076812744140625, |
|
"learning_rate": 1.968039468039468e-05, |
|
"loss": 0.6176, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"grad_norm": 7.893978595733643, |
|
"learning_rate": 1.9412269412269413e-05, |
|
"loss": 0.7779, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"grad_norm": 7.72683048248291, |
|
"learning_rate": 1.9144144144144142e-05, |
|
"loss": 0.669, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"grad_norm": 3.820025682449341, |
|
"learning_rate": 1.887601887601888e-05, |
|
"loss": 0.6182, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 5.642152309417725, |
|
"learning_rate": 1.8607893607893607e-05, |
|
"loss": 0.6453, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8454861111111112, |
|
"eval_f1_macro": 0.5038698715266201, |
|
"eval_f1_micro": 0.845486111111111, |
|
"eval_f1_weighted": 0.830657390454042, |
|
"eval_loss": 0.4813511371612549, |
|
"eval_precision_macro": 0.7578927995388053, |
|
"eval_precision_micro": 0.8454861111111112, |
|
"eval_precision_weighted": 0.8347843050246918, |
|
"eval_recall_macro": 0.456096431265928, |
|
"eval_recall_micro": 0.8454861111111112, |
|
"eval_recall_weighted": 0.8454861111111112, |
|
"eval_runtime": 3281.605, |
|
"eval_samples_per_second": 1.053, |
|
"eval_steps_per_second": 0.066, |
|
"step": 3454 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"grad_norm": 8.240702629089355, |
|
"learning_rate": 1.833976833976834e-05, |
|
"loss": 0.4854, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"grad_norm": 7.087810039520264, |
|
"learning_rate": 1.8071643071643072e-05, |
|
"loss": 0.5459, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"grad_norm": 0.7334815859794617, |
|
"learning_rate": 1.7803517803517805e-05, |
|
"loss": 0.5212, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"grad_norm": 6.095980167388916, |
|
"learning_rate": 1.7535392535392538e-05, |
|
"loss": 0.5191, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"grad_norm": 10.246546745300293, |
|
"learning_rate": 1.7267267267267267e-05, |
|
"loss": 0.6645, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"grad_norm": 3.6809566020965576, |
|
"learning_rate": 1.6999141999142e-05, |
|
"loss": 0.5629, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"grad_norm": 13.609752655029297, |
|
"learning_rate": 1.673101673101673e-05, |
|
"loss": 0.6598, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"grad_norm": 5.554472923278809, |
|
"learning_rate": 1.6462891462891464e-05, |
|
"loss": 0.4935, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"grad_norm": 16.72881317138672, |
|
"learning_rate": 1.6205491205491204e-05, |
|
"loss": 0.379, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"grad_norm": 9.573266983032227, |
|
"learning_rate": 1.593736593736594e-05, |
|
"loss": 0.7141, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"grad_norm": 5.245655059814453, |
|
"learning_rate": 1.566924066924067e-05, |
|
"loss": 0.7518, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"grad_norm": 13.11945915222168, |
|
"learning_rate": 1.54011154011154e-05, |
|
"loss": 0.6588, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"grad_norm": 6.219137191772461, |
|
"learning_rate": 1.5132990132990132e-05, |
|
"loss": 0.5884, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"grad_norm": 6.516097545623779, |
|
"learning_rate": 1.4864864864864867e-05, |
|
"loss": 0.4572, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"grad_norm": 4.23282527923584, |
|
"learning_rate": 1.4596739596739597e-05, |
|
"loss": 0.6028, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"grad_norm": 2.6169815063476562, |
|
"learning_rate": 1.4328614328614328e-05, |
|
"loss": 0.6198, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"grad_norm": 4.720090389251709, |
|
"learning_rate": 1.4060489060489059e-05, |
|
"loss": 0.556, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"grad_norm": 7.367048263549805, |
|
"learning_rate": 1.3792363792363793e-05, |
|
"loss": 0.5812, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"grad_norm": 7.3934173583984375, |
|
"learning_rate": 1.3524238524238526e-05, |
|
"loss": 0.6141, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"grad_norm": 10.528743743896484, |
|
"learning_rate": 1.3256113256113257e-05, |
|
"loss": 0.6047, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"grad_norm": 7.282771110534668, |
|
"learning_rate": 1.2987987987987987e-05, |
|
"loss": 0.6281, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"grad_norm": 11.970826148986816, |
|
"learning_rate": 1.2719862719862722e-05, |
|
"loss": 0.4103, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"grad_norm": 6.620480537414551, |
|
"learning_rate": 1.2451737451737452e-05, |
|
"loss": 0.5386, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"grad_norm": 5.699476718902588, |
|
"learning_rate": 1.2183612183612183e-05, |
|
"loss": 0.6507, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"grad_norm": 2.477766275405884, |
|
"learning_rate": 1.1915486915486916e-05, |
|
"loss": 0.524, |
|
"step": 4075 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"grad_norm": 6.517852306365967, |
|
"learning_rate": 1.1647361647361647e-05, |
|
"loss": 0.6979, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"grad_norm": 4.506691932678223, |
|
"learning_rate": 1.1379236379236379e-05, |
|
"loss": 0.4651, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"grad_norm": 6.522432804107666, |
|
"learning_rate": 1.1111111111111112e-05, |
|
"loss": 0.6845, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"grad_norm": 12.015291213989258, |
|
"learning_rate": 1.0842985842985844e-05, |
|
"loss": 0.5348, |
|
"step": 4175 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"grad_norm": 7.297937393188477, |
|
"learning_rate": 1.0574860574860575e-05, |
|
"loss": 0.5412, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"grad_norm": 13.665657997131348, |
|
"learning_rate": 1.0306735306735307e-05, |
|
"loss": 0.5137, |
|
"step": 4225 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"grad_norm": 11.743260383605957, |
|
"learning_rate": 1.0038610038610038e-05, |
|
"loss": 0.5738, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"grad_norm": 10.24691104888916, |
|
"learning_rate": 9.77048477048477e-06, |
|
"loss": 0.5134, |
|
"step": 4275 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"grad_norm": 4.543239116668701, |
|
"learning_rate": 9.502359502359502e-06, |
|
"loss": 0.6055, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 7.915064811706543, |
|
"learning_rate": 9.234234234234234e-06, |
|
"loss": 0.5153, |
|
"step": 4325 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"grad_norm": 8.37210750579834, |
|
"learning_rate": 8.966108966108967e-06, |
|
"loss": 0.4754, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"grad_norm": 7.3417487144470215, |
|
"learning_rate": 8.6979836979837e-06, |
|
"loss": 0.6285, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"grad_norm": 9.027023315429688, |
|
"learning_rate": 8.42985842985843e-06, |
|
"loss": 0.4925, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"grad_norm": 7.813179016113281, |
|
"learning_rate": 8.161733161733163e-06, |
|
"loss": 0.5089, |
|
"step": 4425 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"grad_norm": 13.45531940460205, |
|
"learning_rate": 7.893607893607893e-06, |
|
"loss": 0.4717, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"grad_norm": 7.09887170791626, |
|
"learning_rate": 7.625482625482626e-06, |
|
"loss": 0.6506, |
|
"step": 4475 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"grad_norm": 4.6297383308410645, |
|
"learning_rate": 7.357357357357357e-06, |
|
"loss": 0.4828, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"grad_norm": 8.980986595153809, |
|
"learning_rate": 7.089232089232089e-06, |
|
"loss": 0.4233, |
|
"step": 4525 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"grad_norm": 26.76249885559082, |
|
"learning_rate": 6.821106821106821e-06, |
|
"loss": 0.5748, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"grad_norm": 8.72842788696289, |
|
"learning_rate": 6.552981552981553e-06, |
|
"loss": 0.6565, |
|
"step": 4575 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"grad_norm": 9.191315650939941, |
|
"learning_rate": 6.284856284856284e-06, |
|
"loss": 0.5332, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"grad_norm": 7.631181240081787, |
|
"learning_rate": 6.016731016731017e-06, |
|
"loss": 0.4692, |
|
"step": 4625 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"grad_norm": 7.811351299285889, |
|
"learning_rate": 5.748605748605749e-06, |
|
"loss": 0.6485, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"grad_norm": 8.997116088867188, |
|
"learning_rate": 5.480480480480481e-06, |
|
"loss": 0.4207, |
|
"step": 4675 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"grad_norm": 9.758033752441406, |
|
"learning_rate": 5.212355212355213e-06, |
|
"loss": 0.5205, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"grad_norm": 5.98590612411499, |
|
"learning_rate": 4.944229944229944e-06, |
|
"loss": 0.6115, |
|
"step": 4725 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"grad_norm": 11.506319046020508, |
|
"learning_rate": 4.676104676104676e-06, |
|
"loss": 0.4449, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"grad_norm": 7.969517230987549, |
|
"learning_rate": 4.4079794079794084e-06, |
|
"loss": 0.5384, |
|
"step": 4775 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"grad_norm": 0.8463253974914551, |
|
"learning_rate": 4.13985413985414e-06, |
|
"loss": 0.4981, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"grad_norm": 12.771890640258789, |
|
"learning_rate": 3.871728871728872e-06, |
|
"loss": 0.4786, |
|
"step": 4825 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"grad_norm": 0.6047688126564026, |
|
"learning_rate": 3.603603603603604e-06, |
|
"loss": 0.4913, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"grad_norm": 7.117040157318115, |
|
"learning_rate": 3.3354783354783355e-06, |
|
"loss": 0.5386, |
|
"step": 4875 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"grad_norm": 5.262890338897705, |
|
"learning_rate": 3.0673530673530676e-06, |
|
"loss": 0.6115, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"grad_norm": 0.5500399470329285, |
|
"learning_rate": 2.7992277992277993e-06, |
|
"loss": 0.5285, |
|
"step": 4925 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"grad_norm": 2.5653154850006104, |
|
"learning_rate": 2.531102531102531e-06, |
|
"loss": 0.3621, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"grad_norm": 5.71751594543457, |
|
"learning_rate": 2.262977262977263e-06, |
|
"loss": 0.6007, |
|
"step": 4975 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"grad_norm": 12.138904571533203, |
|
"learning_rate": 1.9948519948519947e-06, |
|
"loss": 0.5797, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"grad_norm": 8.795024871826172, |
|
"learning_rate": 1.7267267267267268e-06, |
|
"loss": 0.5585, |
|
"step": 5025 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"grad_norm": 3.7619569301605225, |
|
"learning_rate": 1.4586014586014587e-06, |
|
"loss": 0.3951, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"grad_norm": 2.763073205947876, |
|
"learning_rate": 1.1904761904761904e-06, |
|
"loss": 0.287, |
|
"step": 5075 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"grad_norm": 7.337412357330322, |
|
"learning_rate": 9.223509223509224e-07, |
|
"loss": 0.4416, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"grad_norm": 4.266438961029053, |
|
"learning_rate": 6.542256542256542e-07, |
|
"loss": 0.5275, |
|
"step": 5125 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"grad_norm": 8.12879753112793, |
|
"learning_rate": 3.8610038610038613e-07, |
|
"loss": 0.4297, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 6.199108123779297, |
|
"learning_rate": 1.1797511797511798e-07, |
|
"loss": 0.4389, |
|
"step": 5175 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8602430555555556, |
|
"eval_f1_macro": 0.6149830093941424, |
|
"eval_f1_micro": 0.8602430555555556, |
|
"eval_f1_weighted": 0.8515059109185544, |
|
"eval_loss": 0.4315283000469208, |
|
"eval_precision_macro": 0.7610988679415244, |
|
"eval_precision_micro": 0.8602430555555556, |
|
"eval_precision_weighted": 0.8532444856848228, |
|
"eval_recall_macro": 0.5527145295483504, |
|
"eval_recall_micro": 0.8602430555555556, |
|
"eval_recall_weighted": 0.8602430555555556, |
|
"eval_runtime": 3373.7409, |
|
"eval_samples_per_second": 1.024, |
|
"eval_steps_per_second": 0.064, |
|
"step": 5181 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 5181, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 1.135272556528692e+19, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|