|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.06, |
|
"eval_steps": 1000, |
|
"global_step": 150, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 6.666666666666667e-05, |
|
"loss": 2.4138, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00013333333333333334, |
|
"loss": 2.3782, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0002, |
|
"loss": 2.1039, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00019863945578231293, |
|
"loss": 2.4971, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00019727891156462587, |
|
"loss": 2.1249, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0001959183673469388, |
|
"loss": 2.649, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0001945578231292517, |
|
"loss": 2.9032, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00019319727891156462, |
|
"loss": 1.8552, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00019183673469387756, |
|
"loss": 2.3256, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00019047619047619048, |
|
"loss": 2.4122, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00018911564625850343, |
|
"loss": 2.0678, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00018775510204081634, |
|
"loss": 2.3379, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00018639455782312926, |
|
"loss": 2.1801, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0001850340136054422, |
|
"loss": 2.1896, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00018367346938775512, |
|
"loss": 2.0319, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00018231292517006804, |
|
"loss": 1.9579, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00018095238095238095, |
|
"loss": 2.1247, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0001795918367346939, |
|
"loss": 2.8723, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00017823129251700681, |
|
"loss": 1.8466, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00017687074829931973, |
|
"loss": 2.3306, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00017551020408163265, |
|
"loss": 2.3662, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0001741496598639456, |
|
"loss": 2.1281, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0001727891156462585, |
|
"loss": 2.0371, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00017142857142857143, |
|
"loss": 2.4064, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00017006802721088434, |
|
"loss": 1.8651, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00016870748299319729, |
|
"loss": 2.2024, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00016734693877551023, |
|
"loss": 1.8373, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00016598639455782315, |
|
"loss": 1.8414, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00016462585034013606, |
|
"loss": 2.0103, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00016326530612244898, |
|
"loss": 1.8933, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00016190476190476192, |
|
"loss": 1.9478, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00016054421768707484, |
|
"loss": 1.7242, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00015918367346938776, |
|
"loss": 2.6501, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00015782312925170067, |
|
"loss": 1.9442, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00015646258503401362, |
|
"loss": 2.097, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00015510204081632654, |
|
"loss": 1.9518, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00015374149659863945, |
|
"loss": 2.0381, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00015238095238095237, |
|
"loss": 2.1018, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0001510204081632653, |
|
"loss": 2.6064, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00014965986394557826, |
|
"loss": 1.8832, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00014829931972789117, |
|
"loss": 1.7176, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0001469387755102041, |
|
"loss": 1.9294, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.000145578231292517, |
|
"loss": 2.2566, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00014421768707482995, |
|
"loss": 2.0185, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00014285714285714287, |
|
"loss": 1.9994, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00014149659863945578, |
|
"loss": 1.8176, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0001401360544217687, |
|
"loss": 2.2882, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00013877551020408165, |
|
"loss": 1.9445, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00013741496598639456, |
|
"loss": 2.1634, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00013605442176870748, |
|
"loss": 1.7348, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0001346938775510204, |
|
"loss": 2.1267, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00013333333333333334, |
|
"loss": 2.0458, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00013197278911564626, |
|
"loss": 1.8534, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00013061224489795917, |
|
"loss": 1.6838, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00012925170068027212, |
|
"loss": 1.896, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00012789115646258506, |
|
"loss": 2.2231, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00012653061224489798, |
|
"loss": 2.0001, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0001251700680272109, |
|
"loss": 1.9079, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0001238095238095238, |
|
"loss": 2.2205, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00012244897959183676, |
|
"loss": 2.1606, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00012108843537414967, |
|
"loss": 2.1003, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00011972789115646259, |
|
"loss": 2.0567, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00011836734693877552, |
|
"loss": 1.8886, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00011700680272108844, |
|
"loss": 1.565, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00011564625850340137, |
|
"loss": 1.7418, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00011428571428571428, |
|
"loss": 2.2, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00011292517006802721, |
|
"loss": 1.8804, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00011156462585034013, |
|
"loss": 1.8546, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00011020408163265306, |
|
"loss": 1.9946, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.000108843537414966, |
|
"loss": 1.9766, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00010748299319727892, |
|
"loss": 2.5233, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00010612244897959185, |
|
"loss": 2.1251, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00010476190476190477, |
|
"loss": 1.6734, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0001034013605442177, |
|
"loss": 1.7247, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00010204081632653062, |
|
"loss": 1.7936, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00010068027210884355, |
|
"loss": 2.1881, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.931972789115646e-05, |
|
"loss": 2.1806, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.79591836734694e-05, |
|
"loss": 1.7475, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.659863945578231e-05, |
|
"loss": 1.8856, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.523809523809524e-05, |
|
"loss": 1.9571, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.387755102040817e-05, |
|
"loss": 1.773, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.25170068027211e-05, |
|
"loss": 2.0793, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.115646258503402e-05, |
|
"loss": 1.6465, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 8.979591836734695e-05, |
|
"loss": 2.3302, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 8.843537414965987e-05, |
|
"loss": 1.9541, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 8.70748299319728e-05, |
|
"loss": 1.9938, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 8.571428571428571e-05, |
|
"loss": 1.7522, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 8.435374149659864e-05, |
|
"loss": 2.1434, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 8.299319727891157e-05, |
|
"loss": 2.1333, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 8.163265306122449e-05, |
|
"loss": 2.2407, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 8.027210884353742e-05, |
|
"loss": 1.8978, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 7.891156462585034e-05, |
|
"loss": 1.2992, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 7.755102040816327e-05, |
|
"loss": 1.9454, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 7.619047619047618e-05, |
|
"loss": 2.5311, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 7.482993197278913e-05, |
|
"loss": 1.753, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 7.346938775510205e-05, |
|
"loss": 1.8511, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 7.210884353741498e-05, |
|
"loss": 1.96, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 7.074829931972789e-05, |
|
"loss": 1.8998, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 6.938775510204082e-05, |
|
"loss": 2.0674, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 6.802721088435374e-05, |
|
"loss": 1.8385, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 6.666666666666667e-05, |
|
"loss": 1.9221, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 6.530612244897959e-05, |
|
"loss": 1.9015, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 6.394557823129253e-05, |
|
"loss": 2.0576, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 6.258503401360545e-05, |
|
"loss": 2.1968, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 6.122448979591838e-05, |
|
"loss": 2.1305, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 5.9863945578231295e-05, |
|
"loss": 1.7918, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 5.850340136054422e-05, |
|
"loss": 1.8854, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 5.714285714285714e-05, |
|
"loss": 1.8177, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 5.5782312925170065e-05, |
|
"loss": 2.1727, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 5.4421768707483e-05, |
|
"loss": 1.8914, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 5.3061224489795926e-05, |
|
"loss": 1.7695, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 5.170068027210885e-05, |
|
"loss": 1.8377, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 5.034013605442177e-05, |
|
"loss": 2.294, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.89795918367347e-05, |
|
"loss": 2.0519, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.761904761904762e-05, |
|
"loss": 2.1649, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.625850340136055e-05, |
|
"loss": 2.1548, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.4897959183673474e-05, |
|
"loss": 2.0267, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.35374149659864e-05, |
|
"loss": 2.036, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.217687074829932e-05, |
|
"loss": 1.8979, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.0816326530612245e-05, |
|
"loss": 1.6725, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 3.945578231292517e-05, |
|
"loss": 1.6443, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 3.809523809523809e-05, |
|
"loss": 2.0864, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 3.673469387755102e-05, |
|
"loss": 1.7234, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 3.5374149659863946e-05, |
|
"loss": 1.8916, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 3.401360544217687e-05, |
|
"loss": 2.2922, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 3.265306122448979e-05, |
|
"loss": 1.9379, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 3.1292517006802724e-05, |
|
"loss": 2.0516, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.9931972789115647e-05, |
|
"loss": 1.5403, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.857142857142857e-05, |
|
"loss": 1.9823, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.72108843537415e-05, |
|
"loss": 2.5569, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.5850340136054425e-05, |
|
"loss": 2.0392, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.448979591836735e-05, |
|
"loss": 1.3206, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.3129251700680275e-05, |
|
"loss": 1.4851, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.17687074829932e-05, |
|
"loss": 1.8076, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.0408163265306123e-05, |
|
"loss": 2.2816, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.9047619047619046e-05, |
|
"loss": 2.044, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.7687074829931973e-05, |
|
"loss": 2.0136, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.6326530612244897e-05, |
|
"loss": 1.5755, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.4965986394557824e-05, |
|
"loss": 1.9756, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.360544217687075e-05, |
|
"loss": 1.9262, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.2244897959183674e-05, |
|
"loss": 1.8739, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.08843537414966e-05, |
|
"loss": 1.8307, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.523809523809523e-06, |
|
"loss": 1.8247, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 8.163265306122448e-06, |
|
"loss": 1.8952, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 6.802721088435375e-06, |
|
"loss": 1.8041, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 5.4421768707483e-06, |
|
"loss": 1.8557, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.081632653061224e-06, |
|
"loss": 1.9398, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.72108843537415e-06, |
|
"loss": 1.944, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.360544217687075e-06, |
|
"loss": 2.0813, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0, |
|
"loss": 1.8519, |
|
"step": 150 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 150, |
|
"num_train_epochs": 1, |
|
"save_steps": 5, |
|
"total_flos": 8125557976793088.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|