{ "best_metric": 0.9319305111443131, "best_model_checkpoint": "cls_comment-phobert-base-v2-v3.2.1/checkpoint-3700", "epoch": 34.78260869565217, "eval_steps": 100, "global_step": 4000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.8695652173913043, "grad_norm": 1.2353936433792114, "learning_rate": 2.5e-06, "loss": 1.8571, "step": 100 }, { "epoch": 0.8695652173913043, "eval_accuracy": 0.3985861881457314, "eval_f1_score": 0.08142634970006665, "eval_loss": 1.6996082067489624, "eval_precision": 0.05694088402081877, "eval_recall": 0.14285714285714285, "eval_runtime": 6.7198, "eval_samples_per_second": 547.337, "eval_steps_per_second": 8.631, "step": 100 }, { "epoch": 1.7391304347826086, "grad_norm": 2.164222240447998, "learning_rate": 5e-06, "loss": 1.552, "step": 200 }, { "epoch": 1.7391304347826086, "eval_accuracy": 0.6150081566068516, "eval_f1_score": 0.25519317041574235, "eval_loss": 1.287759780883789, "eval_precision": 0.27379414130499563, "eval_recall": 0.28595668999910956, "eval_runtime": 6.6372, "eval_samples_per_second": 554.145, "eval_steps_per_second": 8.739, "step": 200 }, { "epoch": 2.608695652173913, "grad_norm": 5.309168815612793, "learning_rate": 7.500000000000001e-06, "loss": 1.1701, "step": 300 }, { "epoch": 2.608695652173913, "eval_accuracy": 0.7746057640021751, "eval_f1_score": 0.5380183437718388, "eval_loss": 0.9308958649635315, "eval_precision": 0.5797370934127978, "eval_recall": 0.5249138773909011, "eval_runtime": 6.5583, "eval_samples_per_second": 560.819, "eval_steps_per_second": 8.844, "step": 300 }, { "epoch": 3.4782608695652173, "grad_norm": 6.065152645111084, "learning_rate": 1e-05, "loss": 0.8958, "step": 400 }, { "epoch": 3.4782608695652173, "eval_accuracy": 0.8371397498640566, "eval_f1_score": 0.6099241108684058, "eval_loss": 0.7467954754829407, "eval_precision": 0.6113332602554015, "eval_recall": 0.6120619930302185, "eval_runtime": 6.7113, "eval_samples_per_second": 548.029, "eval_steps_per_second": 8.642, "step": 400 }, { "epoch": 4.3478260869565215, "grad_norm": 6.0916032791137695, "learning_rate": 9.722222222222223e-06, "loss": 0.7463, "step": 500 }, { "epoch": 4.3478260869565215, "eval_accuracy": 0.8640565524741708, "eval_f1_score": 0.6758460592010713, "eval_loss": 0.6539974808692932, "eval_precision": 0.7556366523817865, "eval_recall": 0.6741270736536388, "eval_runtime": 6.6507, "eval_samples_per_second": 553.021, "eval_steps_per_second": 8.721, "step": 500 }, { "epoch": 5.217391304347826, "grad_norm": 5.243598461151123, "learning_rate": 9.444444444444445e-06, "loss": 0.6489, "step": 600 }, { "epoch": 5.217391304347826, "eval_accuracy": 0.8866231647634584, "eval_f1_score": 0.7501544920720539, "eval_loss": 0.5884435772895813, "eval_precision": 0.7611116878989773, "eval_recall": 0.7443224665881154, "eval_runtime": 6.5994, "eval_samples_per_second": 557.321, "eval_steps_per_second": 8.789, "step": 600 }, { "epoch": 6.086956521739131, "grad_norm": 4.099198341369629, "learning_rate": 9.166666666666666e-06, "loss": 0.5604, "step": 700 }, { "epoch": 6.086956521739131, "eval_accuracy": 0.9010331702011963, "eval_f1_score": 0.8349576474350455, "eval_loss": 0.5296739339828491, "eval_precision": 0.906039613501241, "eval_recall": 0.8195735271786126, "eval_runtime": 6.5841, "eval_samples_per_second": 558.615, "eval_steps_per_second": 8.809, "step": 700 }, { "epoch": 6.956521739130435, "grad_norm": 7.3427557945251465, "learning_rate": 8.888888888888888e-06, "loss": 0.4907, "step": 800 }, { "epoch": 6.956521739130435, "eval_accuracy": 0.9170744970092441, "eval_f1_score": 0.8961939258721013, "eval_loss": 0.4928275942802429, "eval_precision": 0.9189847475576503, "eval_recall": 0.8769461858418355, "eval_runtime": 6.5949, "eval_samples_per_second": 557.707, "eval_steps_per_second": 8.795, "step": 800 }, { "epoch": 7.826086956521739, "grad_norm": 5.116893768310547, "learning_rate": 8.611111111111112e-06, "loss": 0.4428, "step": 900 }, { "epoch": 7.826086956521739, "eval_accuracy": 0.921968461120174, "eval_f1_score": 0.9048377528141078, "eval_loss": 0.46924909949302673, "eval_precision": 0.9169978450568272, "eval_recall": 0.8957923531168384, "eval_runtime": 6.6744, "eval_samples_per_second": 551.061, "eval_steps_per_second": 8.69, "step": 900 }, { "epoch": 8.695652173913043, "grad_norm": 3.862825393676758, "learning_rate": 8.333333333333334e-06, "loss": 0.4086, "step": 1000 }, { "epoch": 8.695652173913043, "eval_accuracy": 0.9235997824904839, "eval_f1_score": 0.9073197371323319, "eval_loss": 0.4600367546081543, "eval_precision": 0.8977722877003532, "eval_recall": 0.9183169396463787, "eval_runtime": 6.5594, "eval_samples_per_second": 560.722, "eval_steps_per_second": 8.842, "step": 1000 }, { "epoch": 9.565217391304348, "grad_norm": 3.6134090423583984, "learning_rate": 8.055555555555557e-06, "loss": 0.3892, "step": 1100 }, { "epoch": 9.565217391304348, "eval_accuracy": 0.9293094072865687, "eval_f1_score": 0.9155883832712851, "eval_loss": 0.45303475856781006, "eval_precision": 0.9155788758247504, "eval_recall": 0.9158764039642001, "eval_runtime": 6.5742, "eval_samples_per_second": 559.46, "eval_steps_per_second": 8.822, "step": 1100 }, { "epoch": 10.434782608695652, "grad_norm": 6.361451148986816, "learning_rate": 7.77777777777778e-06, "loss": 0.3659, "step": 1200 }, { "epoch": 10.434782608695652, "eval_accuracy": 0.9257748776508973, "eval_f1_score": 0.915368183730898, "eval_loss": 0.4573982357978821, "eval_precision": 0.9071440635058929, "eval_recall": 0.9257672878056209, "eval_runtime": 6.6383, "eval_samples_per_second": 554.054, "eval_steps_per_second": 8.737, "step": 1200 }, { "epoch": 11.304347826086957, "grad_norm": 6.6150312423706055, "learning_rate": 7.500000000000001e-06, "loss": 0.3577, "step": 1300 }, { "epoch": 11.304347826086957, "eval_accuracy": 0.9287656334964655, "eval_f1_score": 0.9158955580622885, "eval_loss": 0.45325955748558044, "eval_precision": 0.9151528325914676, "eval_recall": 0.9176855536409609, "eval_runtime": 6.6572, "eval_samples_per_second": 552.485, "eval_steps_per_second": 8.712, "step": 1300 }, { "epoch": 12.173913043478262, "grad_norm": 5.580691814422607, "learning_rate": 7.222222222222223e-06, "loss": 0.338, "step": 1400 }, { "epoch": 12.173913043478262, "eval_accuracy": 0.933931484502447, "eval_f1_score": 0.9203137031893097, "eval_loss": 0.44535157084465027, "eval_precision": 0.9127875535751154, "eval_recall": 0.9284587781330601, "eval_runtime": 6.607, "eval_samples_per_second": 556.686, "eval_steps_per_second": 8.779, "step": 1400 }, { "epoch": 13.043478260869565, "grad_norm": 3.8554763793945312, "learning_rate": 6.944444444444445e-06, "loss": 0.3302, "step": 1500 }, { "epoch": 13.043478260869565, "eval_accuracy": 0.9312126155519304, "eval_f1_score": 0.9179345627885324, "eval_loss": 0.4539467692375183, "eval_precision": 0.919620307390688, "eval_recall": 0.9172409654840804, "eval_runtime": 6.6136, "eval_samples_per_second": 556.129, "eval_steps_per_second": 8.77, "step": 1500 }, { "epoch": 13.91304347826087, "grad_norm": 11.763055801391602, "learning_rate": 6.666666666666667e-06, "loss": 0.3186, "step": 1600 }, { "epoch": 13.91304347826087, "eval_accuracy": 0.9320282762370854, "eval_f1_score": 0.9219566242363927, "eval_loss": 0.4532802700996399, "eval_precision": 0.9298453666516142, "eval_recall": 0.914622870958479, "eval_runtime": 6.6786, "eval_samples_per_second": 550.714, "eval_steps_per_second": 8.684, "step": 1600 }, { "epoch": 14.782608695652174, "grad_norm": 4.790759563446045, "learning_rate": 6.3888888888888885e-06, "loss": 0.3146, "step": 1700 }, { "epoch": 14.782608695652174, "eval_accuracy": 0.935562805872757, "eval_f1_score": 0.9245866574233509, "eval_loss": 0.4484989047050476, "eval_precision": 0.9280507721712984, "eval_recall": 0.9224481039362212, "eval_runtime": 6.5869, "eval_samples_per_second": 558.384, "eval_steps_per_second": 8.805, "step": 1700 }, { "epoch": 15.652173913043478, "grad_norm": 5.715475559234619, "learning_rate": 6.111111111111112e-06, "loss": 0.3093, "step": 1800 }, { "epoch": 15.652173913043478, "eval_accuracy": 0.9325720500271887, "eval_f1_score": 0.9193696012739166, "eval_loss": 0.45573264360427856, "eval_precision": 0.9290964969084188, "eval_recall": 0.9124598145426113, "eval_runtime": 6.6184, "eval_samples_per_second": 555.721, "eval_steps_per_second": 8.763, "step": 1800 }, { "epoch": 16.52173913043478, "grad_norm": 7.8289079666137695, "learning_rate": 5.833333333333334e-06, "loss": 0.3019, "step": 1900 }, { "epoch": 16.52173913043478, "eval_accuracy": 0.9290375203915171, "eval_f1_score": 0.9169347716468026, "eval_loss": 0.46843111515045166, "eval_precision": 0.9128246753731022, "eval_recall": 0.923370474591805, "eval_runtime": 6.6032, "eval_samples_per_second": 557.0, "eval_steps_per_second": 8.784, "step": 1900 }, { "epoch": 17.391304347826086, "grad_norm": 5.883206844329834, "learning_rate": 5.555555555555557e-06, "loss": 0.2985, "step": 2000 }, { "epoch": 17.391304347826086, "eval_accuracy": 0.9347471451876019, "eval_f1_score": 0.92475047525614, "eval_loss": 0.4544869661331177, "eval_precision": 0.9259386916015938, "eval_recall": 0.9237552045152712, "eval_runtime": 6.6237, "eval_samples_per_second": 555.275, "eval_steps_per_second": 8.756, "step": 2000 }, { "epoch": 18.26086956521739, "grad_norm": 8.993196487426758, "learning_rate": 5.2777777777777785e-06, "loss": 0.2959, "step": 2100 }, { "epoch": 18.26086956521739, "eval_accuracy": 0.9333877107123436, "eval_f1_score": 0.9219602449474701, "eval_loss": 0.46893206238746643, "eval_precision": 0.92490748541024, "eval_recall": 0.9208243882860574, "eval_runtime": 6.7132, "eval_samples_per_second": 547.875, "eval_steps_per_second": 8.64, "step": 2100 }, { "epoch": 19.130434782608695, "grad_norm": 6.87612771987915, "learning_rate": 5e-06, "loss": 0.2891, "step": 2200 }, { "epoch": 19.130434782608695, "eval_accuracy": 0.9385535617183252, "eval_f1_score": 0.9262085591552154, "eval_loss": 0.4558440148830414, "eval_precision": 0.9360271609767613, "eval_recall": 0.9180112460726695, "eval_runtime": 6.5974, "eval_samples_per_second": 557.491, "eval_steps_per_second": 8.791, "step": 2200 }, { "epoch": 20.0, "grad_norm": 8.078782081604004, "learning_rate": 4.722222222222222e-06, "loss": 0.2905, "step": 2300 }, { "epoch": 20.0, "eval_accuracy": 0.9358346927678086, "eval_f1_score": 0.9227437963525783, "eval_loss": 0.45897340774536133, "eval_precision": 0.9307760201675803, "eval_recall": 0.9162662496043394, "eval_runtime": 6.6433, "eval_samples_per_second": 553.643, "eval_steps_per_second": 8.731, "step": 2300 }, { "epoch": 20.869565217391305, "grad_norm": 6.95852518081665, "learning_rate": 4.444444444444444e-06, "loss": 0.2875, "step": 2400 }, { "epoch": 20.869565217391305, "eval_accuracy": 0.9306688417618271, "eval_f1_score": 0.9192946959387055, "eval_loss": 0.4796580672264099, "eval_precision": 0.9267958791034487, "eval_recall": 0.9145913486548557, "eval_runtime": 6.5982, "eval_samples_per_second": 557.426, "eval_steps_per_second": 8.79, "step": 2400 }, { "epoch": 21.73913043478261, "grad_norm": 5.104602336883545, "learning_rate": 4.166666666666667e-06, "loss": 0.2812, "step": 2500 }, { "epoch": 21.73913043478261, "eval_accuracy": 0.935562805872757, "eval_f1_score": 0.9246927415584684, "eval_loss": 0.46965479850769043, "eval_precision": 0.9241768322453005, "eval_recall": 0.9257346798390873, "eval_runtime": 6.6685, "eval_samples_per_second": 551.547, "eval_steps_per_second": 8.698, "step": 2500 }, { "epoch": 22.608695652173914, "grad_norm": 1.9872806072235107, "learning_rate": 3.88888888888889e-06, "loss": 0.2789, "step": 2600 }, { "epoch": 22.608695652173914, "eval_accuracy": 0.9380097879282219, "eval_f1_score": 0.9255376836601789, "eval_loss": 0.46675482392311096, "eval_precision": 0.9271185300366118, "eval_recall": 0.9250032647695392, "eval_runtime": 6.6231, "eval_samples_per_second": 555.328, "eval_steps_per_second": 8.757, "step": 2600 }, { "epoch": 23.47826086956522, "grad_norm": 4.836044788360596, "learning_rate": 3.6111111111111115e-06, "loss": 0.2785, "step": 2700 }, { "epoch": 23.47826086956522, "eval_accuracy": 0.9382816748232735, "eval_f1_score": 0.9293325929996209, "eval_loss": 0.4671032130718231, "eval_precision": 0.9288859327813286, "eval_recall": 0.9301070328166979, "eval_runtime": 6.6888, "eval_samples_per_second": 549.877, "eval_steps_per_second": 8.671, "step": 2700 }, { "epoch": 24.347826086956523, "grad_norm": 2.3744585514068604, "learning_rate": 3.3333333333333333e-06, "loss": 0.2773, "step": 2800 }, { "epoch": 24.347826086956523, "eval_accuracy": 0.9390973355084284, "eval_f1_score": 0.9293323313487988, "eval_loss": 0.46571776270866394, "eval_precision": 0.9327721009515447, "eval_recall": 0.927360363082818, "eval_runtime": 6.6343, "eval_samples_per_second": 554.389, "eval_steps_per_second": 8.742, "step": 2800 }, { "epoch": 25.217391304347824, "grad_norm": 4.463809490203857, "learning_rate": 3.055555555555556e-06, "loss": 0.2814, "step": 2900 }, { "epoch": 25.217391304347824, "eval_accuracy": 0.9361065796628603, "eval_f1_score": 0.9259199302104238, "eval_loss": 0.47015631198883057, "eval_precision": 0.924434133110186, "eval_recall": 0.9285494955527653, "eval_runtime": 6.6115, "eval_samples_per_second": 556.303, "eval_steps_per_second": 8.773, "step": 2900 }, { "epoch": 26.08695652173913, "grad_norm": 11.97252082824707, "learning_rate": 2.7777777777777783e-06, "loss": 0.2744, "step": 3000 }, { "epoch": 26.08695652173913, "eval_accuracy": 0.9352909189777052, "eval_f1_score": 0.9273773946315609, "eval_loss": 0.4731716811656952, "eval_precision": 0.9272954212892072, "eval_recall": 0.929014459015713, "eval_runtime": 6.6523, "eval_samples_per_second": 552.888, "eval_steps_per_second": 8.719, "step": 3000 }, { "epoch": 26.956521739130434, "grad_norm": 8.698735237121582, "learning_rate": 2.5e-06, "loss": 0.2772, "step": 3100 }, { "epoch": 26.956521739130434, "eval_accuracy": 0.9388254486133768, "eval_f1_score": 0.9280828388852939, "eval_loss": 0.46764281392097473, "eval_precision": 0.9264283970809257, "eval_recall": 0.9301128430609281, "eval_runtime": 6.6577, "eval_samples_per_second": 552.441, "eval_steps_per_second": 8.712, "step": 3100 }, { "epoch": 27.82608695652174, "grad_norm": 9.868401527404785, "learning_rate": 2.222222222222222e-06, "loss": 0.2736, "step": 3200 }, { "epoch": 27.82608695652174, "eval_accuracy": 0.9393692224034802, "eval_f1_score": 0.9280880031444269, "eval_loss": 0.46609246730804443, "eval_precision": 0.9325449452470522, "eval_recall": 0.9241991833476634, "eval_runtime": 6.6629, "eval_samples_per_second": 552.016, "eval_steps_per_second": 8.705, "step": 3200 }, { "epoch": 28.695652173913043, "grad_norm": 13.52723217010498, "learning_rate": 1.944444444444445e-06, "loss": 0.2754, "step": 3300 }, { "epoch": 28.695652173913043, "eval_accuracy": 0.9366503534529635, "eval_f1_score": 0.925681100132558, "eval_loss": 0.4745844602584839, "eval_precision": 0.9287802003680891, "eval_recall": 0.9233293953593443, "eval_runtime": 6.6962, "eval_samples_per_second": 549.264, "eval_steps_per_second": 8.662, "step": 3300 }, { "epoch": 29.565217391304348, "grad_norm": 0.09545432776212692, "learning_rate": 1.6666666666666667e-06, "loss": 0.2717, "step": 3400 }, { "epoch": 29.565217391304348, "eval_accuracy": 0.9380097879282219, "eval_f1_score": 0.9283345025534331, "eval_loss": 0.46884092688560486, "eval_precision": 0.9315263692536144, "eval_recall": 0.925530596287616, "eval_runtime": 6.6275, "eval_samples_per_second": 554.958, "eval_steps_per_second": 8.751, "step": 3400 }, { "epoch": 30.434782608695652, "grad_norm": 1.8131216764450073, "learning_rate": 1.3888888888888892e-06, "loss": 0.27, "step": 3500 }, { "epoch": 30.434782608695652, "eval_accuracy": 0.9388254486133768, "eval_f1_score": 0.9303895468552515, "eval_loss": 0.4696621894836426, "eval_precision": 0.9308188528131611, "eval_recall": 0.9307349860958422, "eval_runtime": 6.6391, "eval_samples_per_second": 553.987, "eval_steps_per_second": 8.736, "step": 3500 }, { "epoch": 31.304347826086957, "grad_norm": 1.2362151145935059, "learning_rate": 1.111111111111111e-06, "loss": 0.2674, "step": 3600 }, { "epoch": 31.304347826086957, "eval_accuracy": 0.9390973355084284, "eval_f1_score": 0.9273897248556044, "eval_loss": 0.466818243265152, "eval_precision": 0.9240182135116143, "eval_recall": 0.9311490745870729, "eval_runtime": 6.6874, "eval_samples_per_second": 549.992, "eval_steps_per_second": 8.673, "step": 3600 }, { "epoch": 32.17391304347826, "grad_norm": 6.612317085266113, "learning_rate": 8.333333333333333e-07, "loss": 0.2693, "step": 3700 }, { "epoch": 32.17391304347826, "eval_accuracy": 0.9407286568787384, "eval_f1_score": 0.9319305111443131, "eval_loss": 0.46566537022590637, "eval_precision": 0.9319435555053062, "eval_recall": 0.9325682397024007, "eval_runtime": 6.6594, "eval_samples_per_second": 552.298, "eval_steps_per_second": 8.709, "step": 3700 }, { "epoch": 33.04347826086956, "grad_norm": 3.6944832801818848, "learning_rate": 5.555555555555555e-07, "loss": 0.2685, "step": 3800 }, { "epoch": 33.04347826086956, "eval_accuracy": 0.9401848830886351, "eval_f1_score": 0.9298408543588726, "eval_loss": 0.46719253063201904, "eval_precision": 0.9297325884730158, "eval_recall": 0.9303720415200172, "eval_runtime": 6.6751, "eval_samples_per_second": 551.006, "eval_steps_per_second": 8.689, "step": 3800 }, { "epoch": 33.91304347826087, "grad_norm": 0.5045357346534729, "learning_rate": 2.7777777777777776e-07, "loss": 0.268, "step": 3900 }, { "epoch": 33.91304347826087, "eval_accuracy": 0.94100054377379, "eval_f1_score": 0.9316787163771599, "eval_loss": 0.46683618426322937, "eval_precision": 0.9328330606111085, "eval_recall": 0.9311209947261558, "eval_runtime": 6.6584, "eval_samples_per_second": 552.388, "eval_steps_per_second": 8.711, "step": 3900 }, { "epoch": 34.78260869565217, "grad_norm": 0.15777729451656342, "learning_rate": 0.0, "loss": 0.272, "step": 4000 }, { "epoch": 34.78260869565217, "eval_accuracy": 0.9401848830886351, "eval_f1_score": 0.9309979115269789, "eval_loss": 0.46536290645599365, "eval_precision": 0.9324573806162856, "eval_recall": 0.9300452091014509, "eval_runtime": 6.6565, "eval_samples_per_second": 552.546, "eval_steps_per_second": 8.713, "step": 4000 }, { "epoch": 34.78260869565217, "step": 4000, "total_flos": 8662131210539100.0, "train_loss": 0.4411096167564392, "train_runtime": 3432.8242, "train_samples_per_second": 149.148, "train_steps_per_second": 1.165 } ], "logging_steps": 100, "max_steps": 4000, "num_input_tokens_seen": 0, "num_train_epochs": 35, "save_steps": 100, "total_flos": 8662131210539100.0, "train_batch_size": 64, "trial_name": null, "trial_params": null }