|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.998719590268886, |
|
"eval_steps": 75, |
|
"global_step": 390, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 8.333333333333333e-07, |
|
"loss": 2.0203, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.6666666666666667e-06, |
|
"loss": 2.1077, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.5e-06, |
|
"loss": 1.6828, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 1.7609, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.166666666666667e-06, |
|
"loss": 1.8274, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 5e-06, |
|
"loss": 1.7816, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 5.833333333333334e-06, |
|
"loss": 1.592, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 1.4271, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 1.4018, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 1.1137, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.166666666666666e-06, |
|
"loss": 1.2774, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1e-05, |
|
"loss": 1.1152, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.999827315381885e-06, |
|
"loss": 1.1456, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.99930927345553e-06, |
|
"loss": 1.0368, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.998445910004082e-06, |
|
"loss": 1.0394, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.99723728466338e-06, |
|
"loss": 1.0852, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.995683480917821e-06, |
|
"loss": 1.0978, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.993784606094612e-06, |
|
"loss": 1.0971, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.991540791356342e-06, |
|
"loss": 0.8742, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.988952191691925e-06, |
|
"loss": 1.0309, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.986018985905901e-06, |
|
"loss": 0.9933, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.982741376606077e-06, |
|
"loss": 1.0179, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.97911959018954e-06, |
|
"loss": 0.9589, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.975153876827008e-06, |
|
"loss": 0.9256, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.97084451044556e-06, |
|
"loss": 0.9052, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.966191788709716e-06, |
|
"loss": 0.7886, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.961196033000862e-06, |
|
"loss": 0.8052, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.955857588395065e-06, |
|
"loss": 0.8644, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.950176823639233e-06, |
|
"loss": 0.943, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.944154131125643e-06, |
|
"loss": 0.8096, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.937789926864838e-06, |
|
"loss": 0.8375, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.931084650456892e-06, |
|
"loss": 0.9596, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.924038765061042e-06, |
|
"loss": 0.8191, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.916652757363698e-06, |
|
"loss": 0.6918, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.90892713754483e-06, |
|
"loss": 0.8118, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.900862439242719e-06, |
|
"loss": 0.8023, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.892459219517108e-06, |
|
"loss": 0.8091, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.883718058810708e-06, |
|
"loss": 0.7172, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.874639560909118e-06, |
|
"loss": 0.8353, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.86522435289912e-06, |
|
"loss": 0.8134, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.855473085125351e-06, |
|
"loss": 0.8074, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.84538643114539e-06, |
|
"loss": 0.6778, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.834965087683237e-06, |
|
"loss": 0.8074, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.824209774581176e-06, |
|
"loss": 0.6441, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.81312123475006e-06, |
|
"loss": 0.6854, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.801700234118e-06, |
|
"loss": 0.7312, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.789947561577445e-06, |
|
"loss": 0.6815, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.777864028930705e-06, |
|
"loss": 0.6503, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.765450470833867e-06, |
|
"loss": 0.6441, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.752707744739146e-06, |
|
"loss": 0.7132, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.73963673083566e-06, |
|
"loss": 0.6414, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.726238331988625e-06, |
|
"loss": 0.6329, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.712513473676997e-06, |
|
"loss": 0.7038, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.698463103929542e-06, |
|
"loss": 0.7058, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.684088193259356e-06, |
|
"loss": 0.7323, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.669389734596819e-06, |
|
"loss": 0.5464, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.654368743221022e-06, |
|
"loss": 0.5878, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.639026256689628e-06, |
|
"loss": 0.4955, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.623363334767208e-06, |
|
"loss": 0.5786, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.60738105935204e-06, |
|
"loss": 0.526, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.591080534401371e-06, |
|
"loss": 0.5261, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.574462885855173e-06, |
|
"loss": 0.6276, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.557529261558367e-06, |
|
"loss": 0.585, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.540280831181525e-06, |
|
"loss": 0.651, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.522718786140096e-06, |
|
"loss": 0.4835, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.504844339512096e-06, |
|
"loss": 0.5757, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.486658725954321e-06, |
|
"loss": 0.6357, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.468163201617063e-06, |
|
"loss": 0.6052, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 9.449359044057344e-06, |
|
"loss": 0.6328, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 9.430247552150673e-06, |
|
"loss": 0.4931, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 9.410830046001321e-06, |
|
"loss": 0.5083, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 9.391107866851143e-06, |
|
"loss": 0.5612, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.37108237698693e-06, |
|
"loss": 0.5848, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.350754959646306e-06, |
|
"loss": 0.5098, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.330127018922195e-06, |
|
"loss": 0.6561, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_accuracy": 0.8096002455644081, |
|
"eval_accuracy_<|content|>": 0.9054457292055204, |
|
"eval_accuracy_<|from|>": 0.9848293299620733, |
|
"eval_accuracy_<|recipient|>": 0.5600505689001264, |
|
"eval_accuracy_<|stop|>": 0.9188891337888473, |
|
"eval_accuracy_total_num_<|content|>": 5362, |
|
"eval_accuracy_total_num_<|from|>": 791, |
|
"eval_accuracy_total_num_<|recipient|>": 791, |
|
"eval_accuracy_total_num_<|stop|>": 4537, |
|
"eval_loss": NaN, |
|
"eval_perplexity": 1.0685622608271412, |
|
"eval_runtime": 368.5003, |
|
"eval_samples_per_second": 3.731, |
|
"eval_steps_per_second": 0.467, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.309199979665821e-06, |
|
"loss": 0.5904, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.287975287388297e-06, |
|
"loss": 0.5897, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.266454408160779e-06, |
|
"loss": 0.5341, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.244638828513189e-06, |
|
"loss": 0.5282, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.22253005533154e-06, |
|
"loss": 0.4889, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.200129615753858e-06, |
|
"loss": 0.6347, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.177439057064684e-06, |
|
"loss": 0.5477, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.154459946588199e-06, |
|
"loss": 0.5674, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.131193871579975e-06, |
|
"loss": 0.5769, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.107642439117322e-06, |
|
"loss": 0.4182, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.083807275988285e-06, |
|
"loss": 0.6244, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.059690028579285e-06, |
|
"loss": 0.4599, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 9.035292362761382e-06, |
|
"loss": 0.5209, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 9.01061596377522e-06, |
|
"loss": 0.5992, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 8.985662536114614e-06, |
|
"loss": 0.5819, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 8.960433803408813e-06, |
|
"loss": 0.5248, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 8.934931508303446e-06, |
|
"loss": 0.497, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 8.90915741234015e-06, |
|
"loss": 0.4701, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 8.883113295834893e-06, |
|
"loss": 0.4594, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 8.856800957755e-06, |
|
"loss": 0.4374, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 8.83022221559489e-06, |
|
"loss": 0.53, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 8.803378905250544e-06, |
|
"loss": 0.4277, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 8.776272880892675e-06, |
|
"loss": 0.4828, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 8.748906014838672e-06, |
|
"loss": 0.5068, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 8.721280197423259e-06, |
|
"loss": 0.462, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 8.69339733686793e-06, |
|
"loss": 0.4336, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 8.665259359149132e-06, |
|
"loss": 0.4928, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 8.636868207865244e-06, |
|
"loss": 0.5075, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 8.608225844102312e-06, |
|
"loss": 0.5317, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 8.579334246298593e-06, |
|
"loss": 0.5727, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 8.550195410107903e-06, |
|
"loss": 0.5007, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 8.52081134826176e-06, |
|
"loss": 0.5853, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 8.491184090430365e-06, |
|
"loss": 0.5461, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 8.461315683082398e-06, |
|
"loss": 0.5821, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 8.43120818934367e-06, |
|
"loss": 0.5009, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 8.400863688854598e-06, |
|
"loss": 0.4381, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 8.370284277626576e-06, |
|
"loss": 0.4998, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 8.339472067897187e-06, |
|
"loss": 0.554, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 8.308429187984298e-06, |
|
"loss": 0.4264, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 8.277157782139051e-06, |
|
"loss": 0.4692, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 8.24566001039776e-06, |
|
"loss": 0.5083, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 8.213938048432697e-06, |
|
"loss": 0.5876, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 8.181994087401819e-06, |
|
"loss": 0.4498, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 8.149830333797407e-06, |
|
"loss": 0.5222, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 8.117449009293668e-06, |
|
"loss": 0.5295, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 8.084852350593264e-06, |
|
"loss": 0.492, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 8.052042609272817e-06, |
|
"loss": 0.675, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 8.019022051627387e-06, |
|
"loss": 0.4563, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 7.985792958513932e-06, |
|
"loss": 0.5108, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 7.952357625193749e-06, |
|
"loss": 0.4258, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 7.918718361173951e-06, |
|
"loss": 0.4717, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 7.884877490047915e-06, |
|
"loss": 0.529, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 7.85083734933481e-06, |
|
"loss": 0.4353, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 7.81660029031811e-06, |
|
"loss": 0.512, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 7.782168677883206e-06, |
|
"loss": 0.5723, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 7.747544890354031e-06, |
|
"loss": 0.5354, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 7.712731319328798e-06, |
|
"loss": 0.4359, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 7.677730369514792e-06, |
|
"loss": 0.3552, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 7.642544458562278e-06, |
|
"loss": 0.4758, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 7.607176016897491e-06, |
|
"loss": 0.4783, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 7.571627487554769e-06, |
|
"loss": 0.6152, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 7.535901326007796e-06, |
|
"loss": 0.4984, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 0.4241, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 7.463925989374089e-06, |
|
"loss": 0.4422, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 7.4276817859007615e-06, |
|
"loss": 0.475, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 7.391269893106592e-06, |
|
"loss": 0.4778, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 7.354692826101102e-06, |
|
"loss": 0.406, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 7.317953111403029e-06, |
|
"loss": 0.499, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 7.281053286765816e-06, |
|
"loss": 0.6181, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 7.243995901002312e-06, |
|
"loss": 0.6451, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 7.206783513808721e-06, |
|
"loss": 0.4481, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 7.169418695587791e-06, |
|
"loss": 0.4808, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 7.1319040272712705e-06, |
|
"loss": 0.5812, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 7.094242100141625e-06, |
|
"loss": 0.5282, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 7.056435515653059e-06, |
|
"loss": 0.508, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_accuracy": 0.8220327916670906, |
|
"eval_accuracy_<|content|>": 0.9985080193957478, |
|
"eval_accuracy_<|from|>": 0.97724399494311, |
|
"eval_accuracy_<|recipient|>": 1.0, |
|
"eval_accuracy_<|stop|>": 0.84174564690324, |
|
"eval_accuracy_total_num_<|content|>": 5362, |
|
"eval_accuracy_total_num_<|from|>": 791, |
|
"eval_accuracy_total_num_<|recipient|>": 791, |
|
"eval_accuracy_total_num_<|stop|>": 4537, |
|
"eval_loss": NaN, |
|
"eval_perplexity": 1.0626254108805258, |
|
"eval_runtime": 331.6718, |
|
"eval_samples_per_second": 4.146, |
|
"eval_steps_per_second": 0.519, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 7.0184868852518114e-06, |
|
"loss": 0.3785, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 6.980398830195785e-06, |
|
"loss": 0.3625, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 6.942173981373474e-06, |
|
"loss": 0.5124, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 6.903814979122249e-06, |
|
"loss": 0.5874, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 6.86532447304597e-06, |
|
"loss": 0.473, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 6.8267051218319766e-06, |
|
"loss": 0.5132, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 6.787959593067431e-06, |
|
"loss": 0.597, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 6.749090563055075e-06, |
|
"loss": 0.4303, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 6.710100716628345e-06, |
|
"loss": 0.5029, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 6.6709927469659385e-06, |
|
"loss": 0.5159, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 6.631769355405779e-06, |
|
"loss": 0.4789, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 6.592433251258423e-06, |
|
"loss": 0.4632, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 6.552987151619919e-06, |
|
"loss": 0.5442, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 6.513433781184131e-06, |
|
"loss": 0.4984, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 6.473775872054522e-06, |
|
"loss": 0.4579, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 6.434016163555452e-06, |
|
"loss": 0.434, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 6.394157402042952e-06, |
|
"loss": 0.5519, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 6.354202340715027e-06, |
|
"loss": 0.4145, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 6.314153739421477e-06, |
|
"loss": 0.4358, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 6.274014364473274e-06, |
|
"loss": 0.4972, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 6.233786988451468e-06, |
|
"loss": 0.5023, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 6.19347439001569e-06, |
|
"loss": 0.4899, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 6.153079353712201e-06, |
|
"loss": 0.4607, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 6.112604669781572e-06, |
|
"loss": 0.4166, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 6.0720531339659386e-06, |
|
"loss": 0.4975, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 6.031427547315889e-06, |
|
"loss": 0.4031, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 5.990730715996989e-06, |
|
"loss": 0.5201, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 5.949965451095952e-06, |
|
"loss": 0.4893, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 5.909134568426455e-06, |
|
"loss": 0.61, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 5.8682408883346535e-06, |
|
"loss": 0.4173, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 5.827287235504356e-06, |
|
"loss": 0.4462, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 5.786276438761928e-06, |
|
"loss": 0.53, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 5.745211330880872e-06, |
|
"loss": 0.3297, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 5.7040947483861845e-06, |
|
"loss": 0.466, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 5.6629295313583975e-06, |
|
"loss": 0.5355, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 5.621718523237427e-06, |
|
"loss": 0.4806, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 5.5804645706261515e-06, |
|
"loss": 0.3791, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 5.539170523093794e-06, |
|
"loss": 0.5198, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 5.497839232979084e-06, |
|
"loss": 0.4091, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 5.456473555193242e-06, |
|
"loss": 0.3974, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 5.415076347022777e-06, |
|
"loss": 0.4699, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 5.373650467932122e-06, |
|
"loss": 0.4385, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 5.332198779366123e-06, |
|
"loss": 0.4943, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 5.290724144552379e-06, |
|
"loss": 0.4705, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 5.249229428303486e-06, |
|
"loss": 0.5045, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 5.207717496819134e-06, |
|
"loss": 0.5795, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 5.166191217488134e-06, |
|
"loss": 0.4442, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 5.1246534586903655e-06, |
|
"loss": 0.4642, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 5.083107089598632e-06, |
|
"loss": 0.4908, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 5.041554979980487e-06, |
|
"loss": 0.4577, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 5e-06, |
|
"loss": 0.402, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.958445020019516e-06, |
|
"loss": 0.4191, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.916892910401369e-06, |
|
"loss": 0.4828, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.875346541309637e-06, |
|
"loss": 0.4478, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.833808782511867e-06, |
|
"loss": 0.5202, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.792282503180867e-06, |
|
"loss": 0.3495, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.750770571696514e-06, |
|
"loss": 0.5703, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.7092758554476215e-06, |
|
"loss": 0.3986, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.66780122063388e-06, |
|
"loss": 0.3185, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.626349532067879e-06, |
|
"loss": 0.4846, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.584923652977224e-06, |
|
"loss": 0.4592, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.5435264448067595e-06, |
|
"loss": 0.4666, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.502160767020918e-06, |
|
"loss": 0.428, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.460829476906208e-06, |
|
"loss": 0.5192, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.4195354293738484e-06, |
|
"loss": 0.4456, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.3782814767625755e-06, |
|
"loss": 0.5031, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.337070468641604e-06, |
|
"loss": 0.3835, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.295905251613817e-06, |
|
"loss": 0.4406, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.254788669119127e-06, |
|
"loss": 0.4374, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.213723561238074e-06, |
|
"loss": 0.4021, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.172712764495645e-06, |
|
"loss": 0.5134, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.131759111665349e-06, |
|
"loss": 0.4838, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.090865431573547e-06, |
|
"loss": 0.4011, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.0500345489040515e-06, |
|
"loss": 0.3548, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.009269284003014e-06, |
|
"loss": 0.391, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_accuracy": 0.8278573148685179, |
|
"eval_accuracy_<|content|>": 0.9986945169712794, |
|
"eval_accuracy_<|from|>": 0.9949431099873578, |
|
"eval_accuracy_<|recipient|>": 1.0, |
|
"eval_accuracy_<|stop|>": 0.939607670266696, |
|
"eval_accuracy_total_num_<|content|>": 5362, |
|
"eval_accuracy_total_num_<|from|>": 791, |
|
"eval_accuracy_total_num_<|recipient|>": 791, |
|
"eval_accuracy_total_num_<|stop|>": 4537, |
|
"eval_loss": NaN, |
|
"eval_perplexity": 1.0600965829681877, |
|
"eval_runtime": 334.7156, |
|
"eval_samples_per_second": 4.108, |
|
"eval_steps_per_second": 0.514, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 3.968572452684113e-06, |
|
"loss": 0.5301, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 3.927946866034062e-06, |
|
"loss": 0.4173, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 3.887395330218429e-06, |
|
"loss": 0.5295, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 3.8469206462878e-06, |
|
"loss": 0.3869, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 3.806525609984312e-06, |
|
"loss": 0.4667, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 3.7662130115485317e-06, |
|
"loss": 0.4839, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 3.7259856355267275e-06, |
|
"loss": 0.4778, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3.685846260578524e-06, |
|
"loss": 0.4695, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3.6457976592849753e-06, |
|
"loss": 0.4, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3.6058425979570482e-06, |
|
"loss": 0.441, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3.5659838364445505e-06, |
|
"loss": 0.4077, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.526224127945479e-06, |
|
"loss": 0.4368, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.4865662188158713e-06, |
|
"loss": 0.5274, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.4470128483800813e-06, |
|
"loss": 0.4905, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.4075667487415785e-06, |
|
"loss": 0.5323, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.3682306445942224e-06, |
|
"loss": 0.5451, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.3290072530340628e-06, |
|
"loss": 0.4653, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.289899283371657e-06, |
|
"loss": 0.4957, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.250909436944928e-06, |
|
"loss": 0.423, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.2120404069325695e-06, |
|
"loss": 0.5153, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.173294878168025e-06, |
|
"loss": 0.4329, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.1346755269540303e-06, |
|
"loss": 0.377, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.0961850208777527e-06, |
|
"loss": 0.4111, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.057826018626527e-06, |
|
"loss": 0.3951, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.019601169804216e-06, |
|
"loss": 0.5059, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 2.981513114748189e-06, |
|
"loss": 0.4764, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 2.9435644843469434e-06, |
|
"loss": 0.4289, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 2.905757899858377e-06, |
|
"loss": 0.26, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 2.8680959727287316e-06, |
|
"loss": 0.4835, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 2.83058130441221e-06, |
|
"loss": 0.4426, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 2.7932164861912805e-06, |
|
"loss": 0.4571, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 2.7560040989976894e-06, |
|
"loss": 0.5578, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 2.718946713234185e-06, |
|
"loss": 0.3772, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 2.682046888596972e-06, |
|
"loss": 0.4563, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 2.645307173898901e-06, |
|
"loss": 0.5051, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 2.608730106893411e-06, |
|
"loss": 0.4382, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 2.5723182140992385e-06, |
|
"loss": 0.4664, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 2.536074010625911e-06, |
|
"loss": 0.3587, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 2.5000000000000015e-06, |
|
"loss": 0.3431, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 2.464098673992205e-06, |
|
"loss": 0.3606, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 2.428372512445233e-06, |
|
"loss": 0.4422, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 2.39282398310251e-06, |
|
"loss": 0.5567, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 2.357455541437723e-06, |
|
"loss": 0.4313, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 2.3222696304852084e-06, |
|
"loss": 0.5045, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 2.2872686806712037e-06, |
|
"loss": 0.4603, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 2.2524551096459703e-06, |
|
"loss": 0.5706, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 2.217831322116797e-06, |
|
"loss": 0.4547, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 2.1833997096818897e-06, |
|
"loss": 0.4556, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 2.1491626506651914e-06, |
|
"loss": 0.5204, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 2.115122509952085e-06, |
|
"loss": 0.3224, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 2.081281638826052e-06, |
|
"loss": 0.5885, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 2.047642374806252e-06, |
|
"loss": 0.6023, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 2.0142070414860704e-06, |
|
"loss": 0.4437, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.980977948372612e-06, |
|
"loss": 0.4753, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.947957390727185e-06, |
|
"loss": 0.4953, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.9151476494067376e-06, |
|
"loss": 0.379, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.8825509907063328e-06, |
|
"loss": 0.459, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.8501696662025937e-06, |
|
"loss": 0.5653, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.8180059125981826e-06, |
|
"loss": 0.3608, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.7860619515673034e-06, |
|
"loss": 0.4926, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.7543399896022406e-06, |
|
"loss": 0.3828, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.7228422178609488e-06, |
|
"loss": 0.3937, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.6915708120157042e-06, |
|
"loss": 0.3875, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.6605279321028138e-06, |
|
"loss": 0.4678, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.6297157223734228e-06, |
|
"loss": 0.5462, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.5991363111454023e-06, |
|
"loss": 0.476, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.5687918106563326e-06, |
|
"loss": 0.5062, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.5386843169176025e-06, |
|
"loss": 0.5062, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.5088159095696365e-06, |
|
"loss": 0.4348, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.4791886517382415e-06, |
|
"loss": 0.3801, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.4498045898920988e-06, |
|
"loss": 0.4874, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.4206657537014078e-06, |
|
"loss": 0.5251, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.3917741558976894e-06, |
|
"loss": 0.3894, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.3631317921347564e-06, |
|
"loss": 0.3325, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.3347406408508695e-06, |
|
"loss": 0.4508, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_accuracy": 0.8337530653153841, |
|
"eval_accuracy_<|content|>": 1.0, |
|
"eval_accuracy_<|from|>": 0.9911504424778761, |
|
"eval_accuracy_<|recipient|>": 1.0, |
|
"eval_accuracy_<|stop|>": 0.9329953713907868, |
|
"eval_accuracy_total_num_<|content|>": 5362, |
|
"eval_accuracy_total_num_<|from|>": 791, |
|
"eval_accuracy_total_num_<|recipient|>": 791, |
|
"eval_accuracy_total_num_<|stop|>": 4537, |
|
"eval_loss": NaN, |
|
"eval_perplexity": 1.0576072932064766, |
|
"eval_runtime": 333.0641, |
|
"eval_samples_per_second": 4.128, |
|
"eval_steps_per_second": 0.516, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.3066026631320733e-06, |
|
"loss": 0.3393, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.2787198025767417e-06, |
|
"loss": 0.5482, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.2510939851613285e-06, |
|
"loss": 0.4206, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.223727119107327e-06, |
|
"loss": 0.4613, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.1966210947494583e-06, |
|
"loss": 0.3015, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.1697777844051105e-06, |
|
"loss": 0.4651, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.1431990422450018e-06, |
|
"loss": 0.3467, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.1168867041651082e-06, |
|
"loss": 0.4167, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.0908425876598512e-06, |
|
"loss": 0.4565, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.065068491696556e-06, |
|
"loss": 0.5543, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.0395661965911891e-06, |
|
"loss": 0.4687, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.0143374638853892e-06, |
|
"loss": 0.4595, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 9.893840362247809e-07, |
|
"loss": 0.4511, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 9.647076372386195e-07, |
|
"loss": 0.4676, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.403099714207175e-07, |
|
"loss": 0.4385, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.161927240117174e-07, |
|
"loss": 0.3807, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 8.923575608826812e-07, |
|
"loss": 0.4795, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 8.688061284200266e-07, |
|
"loss": 0.5137, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 8.455400534118008e-07, |
|
"loss": 0.4358, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 8.225609429353187e-07, |
|
"loss": 0.5148, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 7.99870384246143e-07, |
|
"loss": 0.4713, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 7.774699446684608e-07, |
|
"loss": 0.4893, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 7.553611714868136e-07, |
|
"loss": 0.4368, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 7.33545591839222e-07, |
|
"loss": 0.4275, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 7.120247126117025e-07, |
|
"loss": 0.4828, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 6.908000203341802e-07, |
|
"loss": 0.5223, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 6.698729810778065e-07, |
|
"loss": 0.4837, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 6.492450403536959e-07, |
|
"loss": 0.4888, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 6.289176230130728e-07, |
|
"loss": 0.4453, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 6.088921331488568e-07, |
|
"loss": 0.4143, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 5.891699539986789e-07, |
|
"loss": 0.4453, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 5.697524478493288e-07, |
|
"loss": 0.4139, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 5.506409559426573e-07, |
|
"loss": 0.4899, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 5.318367983829393e-07, |
|
"loss": 0.3488, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 5.133412740456805e-07, |
|
"loss": 0.5504, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 4.951556604879049e-07, |
|
"loss": 0.2987, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 4.772812138599043e-07, |
|
"loss": 0.503, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.5971916881847543e-07, |
|
"loss": 0.3901, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.4247073844163434e-07, |
|
"loss": 0.4166, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.255371141448272e-07, |
|
"loss": 0.4068, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.089194655986306e-07, |
|
"loss": 0.4019, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.9261894064796136e-07, |
|
"loss": 0.4503, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.766366652327924e-07, |
|
"loss": 0.4989, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.6097374331037326e-07, |
|
"loss": 0.4407, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.4563125677897936e-07, |
|
"loss": 0.3357, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.306102654031823e-07, |
|
"loss": 0.4232, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.1591180674064584e-07, |
|
"loss": 0.3591, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.015368960704584e-07, |
|
"loss": 0.4366, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 2.8748652632300367e-07, |
|
"loss": 0.4547, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.737616680113758e-07, |
|
"loss": 0.386, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.6036326916434153e-07, |
|
"loss": 0.4585, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.472922552608559e-07, |
|
"loss": 0.4554, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.3454952916613482e-07, |
|
"loss": 0.4524, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.2213597106929608e-07, |
|
"loss": 0.3612, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.1005243842255552e-07, |
|
"loss": 0.4398, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.982997658820013e-07, |
|
"loss": 0.3687, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.8687876524993987e-07, |
|
"loss": 0.3961, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.757902254188254e-07, |
|
"loss": 0.4561, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.6503491231676382e-07, |
|
"loss": 0.4427, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.5461356885461077e-07, |
|
"loss": 0.4432, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.4452691487465087e-07, |
|
"loss": 0.4639, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.3477564710088097e-07, |
|
"loss": 0.4095, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.253604390908819e-07, |
|
"loss": 0.3953, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.1628194118929403e-07, |
|
"loss": 0.4071, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.0754078048289374e-07, |
|
"loss": 0.4452, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 9.913756075728088e-08, |
|
"loss": 0.433, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 9.1072862455171e-08, |
|
"loss": 0.4269, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 8.334724263630301e-08, |
|
"loss": 0.4502, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 7.59612349389599e-08, |
|
"loss": 0.3055, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 6.891534954310886e-08, |
|
"loss": 0.4559, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 6.221007313516159e-08, |
|
"loss": 0.3325, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 5.584586887435739e-08, |
|
"loss": 0.5016, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.9823176360768166e-08, |
|
"loss": 0.363, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.41424116049366e-08, |
|
"loss": 0.5222, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.8803966999139686e-08, |
|
"loss": 0.4091, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_accuracy": 0.8351479322052294, |
|
"eval_accuracy_<|content|>": 1.0, |
|
"eval_accuracy_<|from|>": 0.9924146649810367, |
|
"eval_accuracy_<|recipient|>": 1.0, |
|
"eval_accuracy_<|stop|>": 0.9301300418778928, |
|
"eval_accuracy_total_num_<|content|>": 5362, |
|
"eval_accuracy_total_num_<|from|>": 791, |
|
"eval_accuracy_total_num_<|recipient|>": 791, |
|
"eval_accuracy_total_num_<|stop|>": 4537, |
|
"eval_loss": NaN, |
|
"eval_perplexity": 1.0568973984577945, |
|
"eval_runtime": 333.4139, |
|
"eval_samples_per_second": 4.124, |
|
"eval_steps_per_second": 0.516, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.3808211290284886e-08, |
|
"loss": 0.3643, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 2.9155489554439364e-08, |
|
"loss": 0.3772, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 2.4846123172992953e-08, |
|
"loss": 0.4053, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 2.088040981046091e-08, |
|
"loss": 0.4798, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.725862339392259e-08, |
|
"loss": 0.524, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.3981014094099354e-08, |
|
"loss": 0.4353, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.1047808308075059e-08, |
|
"loss": 0.4712, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 8.459208643659122e-09, |
|
"loss": 0.533, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 6.215393905388278e-09, |
|
"loss": 0.2897, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.316519082179227e-09, |
|
"loss": 0.4482, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.7627153366222014e-09, |
|
"loss": 0.4109, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.5540899959187727e-09, |
|
"loss": 0.3478, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 6.907265444716649e-10, |
|
"loss": 0.4204, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.7268461811548176e-10, |
|
"loss": 0.3867, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.0, |
|
"loss": 0.3951, |
|
"step": 390 |
|
} |
|
], |
|
"logging_steps": 1.0, |
|
"max_steps": 390, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100.0, |
|
"total_flos": 805496420302848.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|