|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"global_step": 1750, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.9971428571428576e-05, |
|
"loss": 3.1194, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.9e-05, |
|
"loss": 3.222, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_accuracy": 0.5, |
|
"eval_f1": 0.0, |
|
"eval_loss": 0.6869668960571289, |
|
"eval_precision": 0.0, |
|
"eval_recall": 0.0, |
|
"eval_runtime": 240.1814, |
|
"eval_samples_per_second": 12.491, |
|
"eval_steps_per_second": 1.561, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.8e-05, |
|
"loss": 1.7052, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_accuracy": 0.5, |
|
"eval_f1": 0.0, |
|
"eval_loss": 2.0989105701446533, |
|
"eval_precision": 0.0, |
|
"eval_recall": 0.0, |
|
"eval_runtime": 240.275, |
|
"eval_samples_per_second": 12.486, |
|
"eval_steps_per_second": 1.561, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.7e-05, |
|
"loss": 1.4622, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_accuracy": 0.5, |
|
"eval_f1": 0.6666666666666666, |
|
"eval_loss": 0.706234872341156, |
|
"eval_precision": 0.5, |
|
"eval_recall": 1.0, |
|
"eval_runtime": 240.4103, |
|
"eval_samples_per_second": 12.479, |
|
"eval_steps_per_second": 1.56, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.600000000000001e-05, |
|
"loss": 0.9345, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_accuracy": 0.514, |
|
"eval_f1": 0.6729475100942127, |
|
"eval_loss": 0.7361006140708923, |
|
"eval_precision": 0.5070993914807302, |
|
"eval_recall": 1.0, |
|
"eval_runtime": 240.3366, |
|
"eval_samples_per_second": 12.482, |
|
"eval_steps_per_second": 1.56, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.5e-05, |
|
"loss": 1.3471, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_accuracy": 0.5063333333333333, |
|
"eval_f1": 0.6693458361241348, |
|
"eval_loss": 0.7378450632095337, |
|
"eval_precision": 0.5031889895938234, |
|
"eval_recall": 0.9993333333333333, |
|
"eval_runtime": 240.3903, |
|
"eval_samples_per_second": 12.48, |
|
"eval_steps_per_second": 1.56, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.4000000000000006e-05, |
|
"loss": 1.0776, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_accuracy": 0.502, |
|
"eval_f1": 0.6675567423230975, |
|
"eval_loss": 1.2094249725341797, |
|
"eval_precision": 0.501002004008016, |
|
"eval_recall": 1.0, |
|
"eval_runtime": 241.2524, |
|
"eval_samples_per_second": 12.435, |
|
"eval_steps_per_second": 1.554, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.3e-05, |
|
"loss": 0.7897, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_accuracy": 0.818, |
|
"eval_f1": 0.8189655172413793, |
|
"eval_loss": 0.42468851804733276, |
|
"eval_precision": 0.8146437994722955, |
|
"eval_recall": 0.8233333333333334, |
|
"eval_runtime": 240.4771, |
|
"eval_samples_per_second": 12.475, |
|
"eval_steps_per_second": 1.559, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.2e-05, |
|
"loss": 0.8229, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_accuracy": 0.6603333333333333, |
|
"eval_f1": 0.7405143875732112, |
|
"eval_loss": 0.7496446967124939, |
|
"eval_precision": 0.5990935311083643, |
|
"eval_recall": 0.9693333333333334, |
|
"eval_runtime": 241.2847, |
|
"eval_samples_per_second": 12.433, |
|
"eval_steps_per_second": 1.554, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.1e-05, |
|
"loss": 0.8056, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_accuracy": 0.653, |
|
"eval_f1": 0.7400749063670412, |
|
"eval_loss": 0.7388193607330322, |
|
"eval_precision": 0.5916167664670658, |
|
"eval_recall": 0.988, |
|
"eval_runtime": 240.4851, |
|
"eval_samples_per_second": 12.475, |
|
"eval_steps_per_second": 1.559, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4e-05, |
|
"loss": 0.831, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_accuracy": 0.7976666666666666, |
|
"eval_f1": 0.7876880027981812, |
|
"eval_loss": 0.45541757345199585, |
|
"eval_precision": 0.8285504047093452, |
|
"eval_recall": 0.7506666666666667, |
|
"eval_runtime": 240.8878, |
|
"eval_samples_per_second": 12.454, |
|
"eval_steps_per_second": 1.557, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.9000000000000006e-05, |
|
"loss": 0.3693, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_accuracy": 0.764, |
|
"eval_f1": 0.7972508591065293, |
|
"eval_loss": 0.567838191986084, |
|
"eval_precision": 0.6987951807228916, |
|
"eval_recall": 0.928, |
|
"eval_runtime": 240.8981, |
|
"eval_samples_per_second": 12.453, |
|
"eval_steps_per_second": 1.557, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.8e-05, |
|
"loss": 0.7178, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_accuracy": 0.8273333333333334, |
|
"eval_f1": 0.832579185520362, |
|
"eval_loss": 0.4122844934463501, |
|
"eval_precision": 0.8080301129234629, |
|
"eval_recall": 0.8586666666666667, |
|
"eval_runtime": 240.7253, |
|
"eval_samples_per_second": 12.462, |
|
"eval_steps_per_second": 1.558, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 3.7e-05, |
|
"loss": 0.5769, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_accuracy": 0.8406666666666667, |
|
"eval_f1": 0.8505315822388994, |
|
"eval_loss": 0.3792554438114166, |
|
"eval_precision": 0.800942285041225, |
|
"eval_recall": 0.9066666666666666, |
|
"eval_runtime": 240.7257, |
|
"eval_samples_per_second": 12.462, |
|
"eval_steps_per_second": 1.558, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 3.6e-05, |
|
"loss": 0.5896, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_accuracy": 0.6713333333333333, |
|
"eval_f1": 0.7512613521695257, |
|
"eval_loss": 0.8622921705245972, |
|
"eval_precision": 0.604301948051948, |
|
"eval_recall": 0.9926666666666667, |
|
"eval_runtime": 240.7579, |
|
"eval_samples_per_second": 12.461, |
|
"eval_steps_per_second": 1.558, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.4586, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_accuracy": 0.8066666666666666, |
|
"eval_f1": 0.791816223977028, |
|
"eval_loss": 0.5524763464927673, |
|
"eval_precision": 0.8576982892690513, |
|
"eval_recall": 0.7353333333333333, |
|
"eval_runtime": 241.0103, |
|
"eval_samples_per_second": 12.448, |
|
"eval_steps_per_second": 1.556, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.4000000000000007e-05, |
|
"loss": 0.5359, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_accuracy": 0.5066666666666667, |
|
"eval_f1": 0.6696428571428572, |
|
"eval_loss": 2.7870869636535645, |
|
"eval_precision": 0.5033557046979866, |
|
"eval_recall": 1.0, |
|
"eval_runtime": 240.5172, |
|
"eval_samples_per_second": 12.473, |
|
"eval_steps_per_second": 1.559, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.3e-05, |
|
"loss": 0.4746, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_accuracy": 0.728, |
|
"eval_f1": 0.7827476038338658, |
|
"eval_loss": 0.8008346557617188, |
|
"eval_precision": 0.651595744680851, |
|
"eval_recall": 0.98, |
|
"eval_runtime": 241.3279, |
|
"eval_samples_per_second": 12.431, |
|
"eval_steps_per_second": 1.554, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.2000000000000005e-05, |
|
"loss": 0.4525, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_accuracy": 0.819, |
|
"eval_f1": 0.831104199066874, |
|
"eval_loss": 0.4293590486049652, |
|
"eval_precision": 0.7790087463556852, |
|
"eval_recall": 0.8906666666666667, |
|
"eval_runtime": 240.4794, |
|
"eval_samples_per_second": 12.475, |
|
"eval_steps_per_second": 1.559, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.1e-05, |
|
"loss": 0.5492, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_accuracy": 0.8193333333333334, |
|
"eval_f1": 0.7991104521868051, |
|
"eval_loss": 0.6366798281669617, |
|
"eval_precision": 0.8998330550918197, |
|
"eval_recall": 0.7186666666666667, |
|
"eval_runtime": 240.7182, |
|
"eval_samples_per_second": 12.463, |
|
"eval_steps_per_second": 1.558, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3e-05, |
|
"loss": 0.3315, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_accuracy": 0.8446666666666667, |
|
"eval_f1": 0.8411724608043626, |
|
"eval_loss": 0.44469600915908813, |
|
"eval_precision": 0.8605299860529986, |
|
"eval_recall": 0.8226666666666667, |
|
"eval_runtime": 240.3073, |
|
"eval_samples_per_second": 12.484, |
|
"eval_steps_per_second": 1.561, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 2.9e-05, |
|
"loss": 0.5122, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_accuracy": 0.723, |
|
"eval_f1": 0.7735077677841373, |
|
"eval_loss": 0.7516367435455322, |
|
"eval_precision": 0.6542185338865837, |
|
"eval_recall": 0.946, |
|
"eval_runtime": 240.4309, |
|
"eval_samples_per_second": 12.478, |
|
"eval_steps_per_second": 1.56, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 2.8000000000000003e-05, |
|
"loss": 0.4712, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_accuracy": 0.7073333333333334, |
|
"eval_f1": 0.7701570680628271, |
|
"eval_loss": 0.7024486660957336, |
|
"eval_precision": 0.634051724137931, |
|
"eval_recall": 0.9806666666666667, |
|
"eval_runtime": 240.5793, |
|
"eval_samples_per_second": 12.47, |
|
"eval_steps_per_second": 1.559, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.7000000000000002e-05, |
|
"loss": 0.5167, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_accuracy": 0.637, |
|
"eval_f1": 0.7323666748586877, |
|
"eval_loss": 1.0258432626724243, |
|
"eval_precision": 0.5799922148695991, |
|
"eval_recall": 0.9933333333333333, |
|
"eval_runtime": 240.5552, |
|
"eval_samples_per_second": 12.471, |
|
"eval_steps_per_second": 1.559, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.6000000000000002e-05, |
|
"loss": 0.3025, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_accuracy": 0.7826666666666666, |
|
"eval_f1": 0.8170594837261503, |
|
"eval_loss": 0.5468646883964539, |
|
"eval_precision": 0.7054263565891473, |
|
"eval_recall": 0.9706666666666667, |
|
"eval_runtime": 240.9803, |
|
"eval_samples_per_second": 12.449, |
|
"eval_steps_per_second": 1.556, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.3937, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_accuracy": 0.7956666666666666, |
|
"eval_f1": 0.8233938346297897, |
|
"eval_loss": 0.5975732207298279, |
|
"eval_precision": 0.7250126839167935, |
|
"eval_recall": 0.9526666666666667, |
|
"eval_runtime": 240.6768, |
|
"eval_samples_per_second": 12.465, |
|
"eval_steps_per_second": 1.558, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.4e-05, |
|
"loss": 0.3635, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_accuracy": 0.7516666666666667, |
|
"eval_f1": 0.7974993204675184, |
|
"eval_loss": 0.681395411491394, |
|
"eval_precision": 0.6732446076181735, |
|
"eval_recall": 0.978, |
|
"eval_runtime": 243.1168, |
|
"eval_samples_per_second": 12.34, |
|
"eval_steps_per_second": 1.542, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.3000000000000003e-05, |
|
"loss": 0.3354, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_accuracy": 0.6006666666666667, |
|
"eval_f1": 0.7139446036294174, |
|
"eval_loss": 1.097399115562439, |
|
"eval_precision": 0.5561755952380952, |
|
"eval_recall": 0.9966666666666667, |
|
"eval_runtime": 240.1778, |
|
"eval_samples_per_second": 12.491, |
|
"eval_steps_per_second": 1.561, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.2000000000000003e-05, |
|
"loss": 0.2047, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_accuracy": 0.5143333333333333, |
|
"eval_f1": 0.6729517396184064, |
|
"eval_loss": 1.91347074508667, |
|
"eval_precision": 0.5072758037225042, |
|
"eval_recall": 0.9993333333333333, |
|
"eval_runtime": 240.5169, |
|
"eval_samples_per_second": 12.473, |
|
"eval_steps_per_second": 1.559, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.1e-05, |
|
"loss": 0.4875, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_accuracy": 0.8083333333333333, |
|
"eval_f1": 0.833574529667149, |
|
"eval_loss": 0.5008577704429626, |
|
"eval_precision": 0.7365728900255755, |
|
"eval_recall": 0.96, |
|
"eval_runtime": 240.1154, |
|
"eval_samples_per_second": 12.494, |
|
"eval_steps_per_second": 1.562, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2e-05, |
|
"loss": 0.2811, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_accuracy": 0.8413333333333334, |
|
"eval_f1": 0.8487928843710293, |
|
"eval_loss": 0.3812311887741089, |
|
"eval_precision": 0.8106796116504854, |
|
"eval_recall": 0.8906666666666667, |
|
"eval_runtime": 240.4478, |
|
"eval_samples_per_second": 12.477, |
|
"eval_steps_per_second": 1.56, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.9e-05, |
|
"loss": 0.2227, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_accuracy": 0.8606666666666667, |
|
"eval_f1": 0.862137203166227, |
|
"eval_loss": 0.36810368299484253, |
|
"eval_precision": 0.8531331592689295, |
|
"eval_recall": 0.8713333333333333, |
|
"eval_runtime": 240.5301, |
|
"eval_samples_per_second": 12.472, |
|
"eval_steps_per_second": 1.559, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.8e-05, |
|
"loss": 0.3917, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_accuracy": 0.8543333333333333, |
|
"eval_f1": 0.8612257859637981, |
|
"eval_loss": 0.4265972375869751, |
|
"eval_precision": 0.8223165554881746, |
|
"eval_recall": 0.904, |
|
"eval_runtime": 241.0329, |
|
"eval_samples_per_second": 12.446, |
|
"eval_steps_per_second": 1.556, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.7000000000000003e-05, |
|
"loss": 0.3635, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_accuracy": 0.6436666666666667, |
|
"eval_f1": 0.736374845869297, |
|
"eval_loss": 1.1040711402893066, |
|
"eval_precision": 0.5843444227005871, |
|
"eval_recall": 0.9953333333333333, |
|
"eval_runtime": 240.5878, |
|
"eval_samples_per_second": 12.469, |
|
"eval_steps_per_second": 1.559, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 0.4145, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_accuracy": 0.8136666666666666, |
|
"eval_f1": 0.8390440541318746, |
|
"eval_loss": 0.5226981043815613, |
|
"eval_precision": 0.7384693360364927, |
|
"eval_recall": 0.9713333333333334, |
|
"eval_runtime": 240.5513, |
|
"eval_samples_per_second": 12.471, |
|
"eval_steps_per_second": 1.559, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.4154, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_accuracy": 0.6573333333333333, |
|
"eval_f1": 0.7438963627304432, |
|
"eval_loss": 0.8615061640739441, |
|
"eval_precision": 0.5938743038981702, |
|
"eval_recall": 0.9953333333333333, |
|
"eval_runtime": 241.311, |
|
"eval_samples_per_second": 12.432, |
|
"eval_steps_per_second": 1.554, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.4000000000000001e-05, |
|
"loss": 0.2322, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_accuracy": 0.627, |
|
"eval_f1": 0.7275383491599707, |
|
"eval_loss": 1.0941112041473389, |
|
"eval_precision": 0.57307249712313, |
|
"eval_recall": 0.996, |
|
"eval_runtime": 240.2621, |
|
"eval_samples_per_second": 12.486, |
|
"eval_steps_per_second": 1.561, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.3000000000000001e-05, |
|
"loss": 0.3221, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_accuracy": 0.722, |
|
"eval_f1": 0.7807570977917981, |
|
"eval_loss": 0.7642508149147034, |
|
"eval_precision": 0.64453125, |
|
"eval_recall": 0.99, |
|
"eval_runtime": 240.1026, |
|
"eval_samples_per_second": 12.495, |
|
"eval_steps_per_second": 1.562, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.2e-05, |
|
"loss": 0.2182, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_accuracy": 0.8256666666666667, |
|
"eval_f1": 0.847477398658501, |
|
"eval_loss": 0.4936087727546692, |
|
"eval_precision": 0.7532400207361327, |
|
"eval_recall": 0.9686666666666667, |
|
"eval_runtime": 240.5209, |
|
"eval_samples_per_second": 12.473, |
|
"eval_steps_per_second": 1.559, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.1000000000000001e-05, |
|
"loss": 0.2608, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_accuracy": 0.7696666666666667, |
|
"eval_f1": 0.8104252400548698, |
|
"eval_loss": 0.6222155690193176, |
|
"eval_precision": 0.6885780885780886, |
|
"eval_recall": 0.9846666666666667, |
|
"eval_runtime": 242.2988, |
|
"eval_samples_per_second": 12.381, |
|
"eval_steps_per_second": 1.548, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1e-05, |
|
"loss": 0.3251, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_accuracy": 0.8256666666666667, |
|
"eval_f1": 0.8467623791385878, |
|
"eval_loss": 0.4772394299507141, |
|
"eval_precision": 0.7553580763199164, |
|
"eval_recall": 0.9633333333333334, |
|
"eval_runtime": 240.46, |
|
"eval_samples_per_second": 12.476, |
|
"eval_steps_per_second": 1.56, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 9e-06, |
|
"loss": 0.1763, |
|
"step": 1435 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_accuracy": 0.7433333333333333, |
|
"eval_f1": 0.7941176470588235, |
|
"eval_loss": 0.6803577542304993, |
|
"eval_precision": 0.6629464285714286, |
|
"eval_recall": 0.99, |
|
"eval_runtime": 240.563, |
|
"eval_samples_per_second": 12.471, |
|
"eval_steps_per_second": 1.559, |
|
"step": 1435 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 0.2904, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_accuracy": 0.7246666666666667, |
|
"eval_f1": 0.7827459231983166, |
|
"eval_loss": 0.727342426776886, |
|
"eval_precision": 0.6463944396177237, |
|
"eval_recall": 0.992, |
|
"eval_runtime": 241.4946, |
|
"eval_samples_per_second": 12.423, |
|
"eval_steps_per_second": 1.553, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.000000000000001e-06, |
|
"loss": 0.2758, |
|
"step": 1505 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_accuracy": 0.689, |
|
"eval_f1": 0.7615640173779707, |
|
"eval_loss": 0.8572436571121216, |
|
"eval_precision": 0.6174886033982594, |
|
"eval_recall": 0.9933333333333333, |
|
"eval_runtime": 240.6981, |
|
"eval_samples_per_second": 12.464, |
|
"eval_steps_per_second": 1.558, |
|
"step": 1505 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 6e-06, |
|
"loss": 0.3054, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_accuracy": 0.6446666666666667, |
|
"eval_f1": 0.7369200394866732, |
|
"eval_loss": 0.9957032799720764, |
|
"eval_precision": 0.5850313479623824, |
|
"eval_recall": 0.9953333333333333, |
|
"eval_runtime": 240.6892, |
|
"eval_samples_per_second": 12.464, |
|
"eval_steps_per_second": 1.558, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5e-06, |
|
"loss": 0.2433, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_accuracy": 0.7466666666666667, |
|
"eval_f1": 0.7960279119699409, |
|
"eval_loss": 0.6531896591186523, |
|
"eval_precision": 0.6662174303683738, |
|
"eval_recall": 0.9886666666666667, |
|
"eval_runtime": 242.6474, |
|
"eval_samples_per_second": 12.364, |
|
"eval_steps_per_second": 1.545, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 0.2869, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_accuracy": 0.816, |
|
"eval_f1": 0.8410138248847926, |
|
"eval_loss": 0.45964765548706055, |
|
"eval_precision": 0.7403651115618661, |
|
"eval_recall": 0.9733333333333334, |
|
"eval_runtime": 240.842, |
|
"eval_samples_per_second": 12.456, |
|
"eval_steps_per_second": 1.557, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3e-06, |
|
"loss": 0.119, |
|
"step": 1645 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_accuracy": 0.727, |
|
"eval_f1": 0.7838479809976248, |
|
"eval_loss": 0.7068488001823425, |
|
"eval_precision": 0.6487549148099607, |
|
"eval_recall": 0.99, |
|
"eval_runtime": 240.4825, |
|
"eval_samples_per_second": 12.475, |
|
"eval_steps_per_second": 1.559, |
|
"step": 1645 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 0.2153, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_accuracy": 0.7513333333333333, |
|
"eval_f1": 0.7988133764832793, |
|
"eval_loss": 0.6478840112686157, |
|
"eval_precision": 0.6707427536231884, |
|
"eval_recall": 0.9873333333333333, |
|
"eval_runtime": 240.4615, |
|
"eval_samples_per_second": 12.476, |
|
"eval_steps_per_second": 1.56, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.2594, |
|
"step": 1715 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_accuracy": 0.7313333333333333, |
|
"eval_f1": 0.7866596082583377, |
|
"eval_loss": 0.6965696811676025, |
|
"eval_precision": 0.6523266022827041, |
|
"eval_recall": 0.9906666666666667, |
|
"eval_runtime": 240.2996, |
|
"eval_samples_per_second": 12.484, |
|
"eval_steps_per_second": 1.561, |
|
"step": 1715 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.0, |
|
"loss": 0.2108, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7433333333333333, |
|
"eval_f1": 0.7938972162740899, |
|
"eval_loss": 0.6547001004219055, |
|
"eval_precision": 0.6632379248658319, |
|
"eval_recall": 0.9886666666666667, |
|
"eval_runtime": 240.3896, |
|
"eval_samples_per_second": 12.48, |
|
"eval_steps_per_second": 1.56, |
|
"step": 1750 |
|
} |
|
], |
|
"max_steps": 1750, |
|
"num_train_epochs": 1, |
|
"total_flos": 5.1979933974528e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|