|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 14.285714285714286, |
|
"eval_steps": 500, |
|
"global_step": 500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.002998214285714286, |
|
"loss": 3.0944, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0029964285714285713, |
|
"loss": 2.8734, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.002994642857142857, |
|
"loss": 8.7265, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.002992857142857143, |
|
"loss": 4.7587, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.002991071428571429, |
|
"loss": 4.3637, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0029892857142857143, |
|
"loss": 3.315, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0029875, |
|
"loss": 3.1726, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.002985714285714286, |
|
"loss": 2.9938, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0029839285714285714, |
|
"loss": 3.0509, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0029821428571428573, |
|
"loss": 2.93, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.002980357142857143, |
|
"loss": 2.8436, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0029785714285714285, |
|
"loss": 2.8756, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0029767857142857144, |
|
"loss": 2.8528, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.002975, |
|
"loss": 2.8405, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.002973214285714286, |
|
"loss": 2.8022, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0029714285714285715, |
|
"loss": 2.8356, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0029696428571428573, |
|
"loss": 2.7915, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.002967857142857143, |
|
"loss": 2.7848, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.0029660714285714286, |
|
"loss": 2.6604, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.0029642857142857144, |
|
"loss": 2.736, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.0029625000000000003, |
|
"loss": 2.7747, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.0029607142857142857, |
|
"loss": 2.6958, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.0029589285714285716, |
|
"loss": 2.7309, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.0029571428571428574, |
|
"loss": 2.7294, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.0029553571428571433, |
|
"loss": 2.7493, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.0029535714285714287, |
|
"loss": 2.7351, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.002951785714285714, |
|
"loss": 2.6825, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.00295, |
|
"loss": 2.6658, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.0029482142857142858, |
|
"loss": 2.6127, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.002946428571428571, |
|
"loss": 2.6338, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.002944642857142857, |
|
"loss": 2.7094, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.002942857142857143, |
|
"loss": 2.6943, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.0029410714285714283, |
|
"loss": 2.7222, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.002939285714285714, |
|
"loss": 2.6541, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.0029375, |
|
"loss": 2.7111, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.002935714285714286, |
|
"loss": 2.5123, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 0.0029339285714285713, |
|
"loss": 2.493, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.002932142857142857, |
|
"loss": 2.4434, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 0.002930357142857143, |
|
"loss": 2.4883, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 0.0029285714285714284, |
|
"loss": 2.5004, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.0029267857142857142, |
|
"loss": 2.4667, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.002925, |
|
"loss": 2.4461, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 0.002923214285714286, |
|
"loss": 2.5361, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 0.0029214285714285713, |
|
"loss": 2.4597, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 0.002919642857142857, |
|
"loss": 2.4006, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.002917857142857143, |
|
"loss": 2.5019, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 0.0029160714285714285, |
|
"loss": 2.5209, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 0.0029142857142857143, |
|
"loss": 2.4753, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 0.0029125, |
|
"loss": 2.4104, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 0.0029107142857142856, |
|
"loss": 2.3938, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.0029089285714285714, |
|
"loss": 2.4999, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 0.0029071428571428573, |
|
"loss": 2.4292, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 0.002905357142857143, |
|
"loss": 2.4636, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 0.0029035714285714285, |
|
"loss": 2.5163, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 0.0029017857142857144, |
|
"loss": 2.5098, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.0029000000000000002, |
|
"loss": 2.447, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 0.0028982142857142856, |
|
"loss": 2.4262, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 0.0028964285714285715, |
|
"loss": 2.5146, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 0.0028946428571428573, |
|
"loss": 2.4225, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 0.0028928571428571428, |
|
"loss": 2.452, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 0.0028910714285714286, |
|
"loss": 2.4449, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 0.0028892857142857145, |
|
"loss": 2.4984, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 0.0028875000000000003, |
|
"loss": 2.4493, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 0.0028857142857142857, |
|
"loss": 2.4187, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 0.0028839285714285716, |
|
"loss": 2.5019, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 0.0028821428571428574, |
|
"loss": 2.4274, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 0.002880357142857143, |
|
"loss": 2.4485, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 0.0028785714285714287, |
|
"loss": 2.5096, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 0.0028767857142857145, |
|
"loss": 2.4862, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.0028750000000000004, |
|
"loss": 2.469, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 0.002873214285714286, |
|
"loss": 2.1795, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 0.0028714285714285716, |
|
"loss": 2.1106, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 0.0028696428571428575, |
|
"loss": 2.0896, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 0.002867857142857143, |
|
"loss": 2.2018, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 0.0028660714285714288, |
|
"loss": 2.0803, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 0.0028642857142857146, |
|
"loss": 2.1395, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 0.0028625, |
|
"loss": 2.1019, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 0.002860714285714286, |
|
"loss": 2.1383, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 0.0028589285714285713, |
|
"loss": 2.1109, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 0.002857142857142857, |
|
"loss": 2.0854, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 0.0028553571428571426, |
|
"loss": 2.1667, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 0.0028535714285714284, |
|
"loss": 2.1111, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 0.0028517857142857143, |
|
"loss": 2.1032, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 0.00285, |
|
"loss": 2.1708, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 0.0028482142857142855, |
|
"loss": 2.1118, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 0.0028464285714285714, |
|
"loss": 2.1481, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 0.002844642857142857, |
|
"loss": 2.1538, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 0.0028428571428571426, |
|
"loss": 2.1843, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 0.0028410714285714285, |
|
"loss": 2.1828, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 0.0028392857142857143, |
|
"loss": 2.2151, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 0.0028375, |
|
"loss": 2.1969, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 0.0028357142857142856, |
|
"loss": 2.1509, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 0.0028339285714285714, |
|
"loss": 2.2636, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 0.0028321428571428573, |
|
"loss": 2.2809, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 0.0028303571428571427, |
|
"loss": 2.2044, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 0.0028285714285714286, |
|
"loss": 2.2064, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 0.0028267857142857144, |
|
"loss": 2.2408, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 0.002825, |
|
"loss": 2.2446, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 0.0028232142857142857, |
|
"loss": 2.1965, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 0.0028214285714285715, |
|
"loss": 2.3093, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 0.0028196428571428574, |
|
"loss": 2.2188, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 0.0028178571428571428, |
|
"loss": 2.3098, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 0.0028160714285714286, |
|
"loss": 2.2268, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 0.0028142857142857145, |
|
"loss": 2.2574, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 0.0028125, |
|
"loss": 2.2982, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 0.0028107142857142857, |
|
"loss": 1.879, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 0.0028089285714285716, |
|
"loss": 1.8074, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 0.002807142857142857, |
|
"loss": 1.8174, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 0.002805357142857143, |
|
"loss": 1.784, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 0.0028035714285714287, |
|
"loss": 1.8469, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 0.0028017857142857146, |
|
"loss": 1.8614, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 0.0028, |
|
"loss": 1.8648, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 0.002798214285714286, |
|
"loss": 1.8172, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 0.0027964285714285717, |
|
"loss": 1.8579, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"learning_rate": 0.002794642857142857, |
|
"loss": 1.8261, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 0.002792857142857143, |
|
"loss": 1.8993, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"learning_rate": 0.0027910714285714288, |
|
"loss": 1.8144, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 0.0027892857142857146, |
|
"loss": 1.8583, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 0.0027875, |
|
"loss": 1.8589, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 0.002785714285714286, |
|
"loss": 1.9069, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 0.0027839285714285717, |
|
"loss": 1.9004, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 0.002782142857142857, |
|
"loss": 1.9378, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 0.002780357142857143, |
|
"loss": 1.9161, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 0.002778571428571429, |
|
"loss": 1.9886, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 0.0027767857142857143, |
|
"loss": 1.8636, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 0.002775, |
|
"loss": 1.9642, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 0.002773214285714286, |
|
"loss": 1.959, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"learning_rate": 0.002771428571428572, |
|
"loss": 1.9787, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 0.0027696428571428572, |
|
"loss": 2.0272, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"learning_rate": 0.002767857142857143, |
|
"loss": 2.0362, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 0.002766071428571429, |
|
"loss": 2.0369, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 0.0027642857142857143, |
|
"loss": 2.0721, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 0.0027624999999999998, |
|
"loss": 1.9939, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 0.0027607142857142856, |
|
"loss": 2.0403, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"learning_rate": 0.0027589285714285715, |
|
"loss": 2.1132, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"learning_rate": 0.002757142857142857, |
|
"loss": 2.0741, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 0.0027553571428571427, |
|
"loss": 2.0754, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 0.0027535714285714286, |
|
"loss": 2.1321, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 0.0027517857142857144, |
|
"loss": 2.0665, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 0.00275, |
|
"loss": 2.1085, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 0.0027482142857142857, |
|
"loss": 1.653, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 0.0027464285714285715, |
|
"loss": 1.5934, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 0.002744642857142857, |
|
"loss": 1.6795, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 0.002742857142857143, |
|
"loss": 1.6043, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"learning_rate": 0.0027410714285714287, |
|
"loss": 1.586, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 0.002739285714285714, |
|
"loss": 1.6061, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 0.0027375, |
|
"loss": 1.6438, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"learning_rate": 0.0027357142857142858, |
|
"loss": 1.6097, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"learning_rate": 0.0027339285714285716, |
|
"loss": 1.7163, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"learning_rate": 0.002732142857142857, |
|
"loss": 1.6485, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 0.002730357142857143, |
|
"loss": 1.6555, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 4.34, |
|
"learning_rate": 0.0027285714285714287, |
|
"loss": 1.689, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"learning_rate": 0.002726785714285714, |
|
"loss": 1.7174, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 0.002725, |
|
"loss": 1.7205, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"learning_rate": 0.002723214285714286, |
|
"loss": 1.7064, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 4.46, |
|
"learning_rate": 0.0027214285714285717, |
|
"loss": 1.7045, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"learning_rate": 0.002719642857142857, |
|
"loss": 1.7749, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"learning_rate": 0.002717857142857143, |
|
"loss": 1.7826, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"learning_rate": 0.002716071428571429, |
|
"loss": 1.7882, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"learning_rate": 0.0027142857142857142, |
|
"loss": 1.8073, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"learning_rate": 0.0027125, |
|
"loss": 1.7931, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"learning_rate": 0.002710714285714286, |
|
"loss": 1.8388, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"learning_rate": 0.0027089285714285713, |
|
"loss": 1.8267, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 4.69, |
|
"learning_rate": 0.002707142857142857, |
|
"loss": 1.8208, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 4.71, |
|
"learning_rate": 0.002705357142857143, |
|
"loss": 1.8404, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"learning_rate": 0.002703571428571429, |
|
"loss": 1.8375, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 4.77, |
|
"learning_rate": 0.0027017857142857143, |
|
"loss": 1.9444, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 0.0027, |
|
"loss": 1.8325, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"learning_rate": 0.002698214285714286, |
|
"loss": 1.8705, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"learning_rate": 0.0026964285714285714, |
|
"loss": 1.9368, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 4.89, |
|
"learning_rate": 0.0026946428571428573, |
|
"loss": 1.8758, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 4.91, |
|
"learning_rate": 0.002692857142857143, |
|
"loss": 1.999, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"learning_rate": 0.0026910714285714285, |
|
"loss": 1.9547, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 4.97, |
|
"learning_rate": 0.0026892857142857144, |
|
"loss": 1.9332, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 0.0026875000000000002, |
|
"loss": 1.9704, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"learning_rate": 0.002685714285714286, |
|
"loss": 1.4766, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"learning_rate": 0.0026839285714285715, |
|
"loss": 1.4459, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"learning_rate": 0.0026821428571428573, |
|
"loss": 1.4451, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 5.11, |
|
"learning_rate": 0.002680357142857143, |
|
"loss": 1.4761, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 5.14, |
|
"learning_rate": 0.0026785714285714286, |
|
"loss": 1.5092, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 5.17, |
|
"learning_rate": 0.0026767857142857144, |
|
"loss": 1.4439, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"learning_rate": 0.0026750000000000003, |
|
"loss": 1.4539, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 5.23, |
|
"learning_rate": 0.002673214285714286, |
|
"loss": 1.4804, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 5.26, |
|
"learning_rate": 0.002671428571428571, |
|
"loss": 1.4958, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 5.29, |
|
"learning_rate": 0.002669642857142857, |
|
"loss": 1.5054, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 5.31, |
|
"learning_rate": 0.002667857142857143, |
|
"loss": 1.4673, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 5.34, |
|
"learning_rate": 0.0026660714285714287, |
|
"loss": 1.5703, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 5.37, |
|
"learning_rate": 0.002664285714285714, |
|
"loss": 1.5504, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 5.4, |
|
"learning_rate": 0.0026625, |
|
"loss": 1.6126, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 5.43, |
|
"learning_rate": 0.002660714285714286, |
|
"loss": 1.5777, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 5.46, |
|
"learning_rate": 0.002658928571428571, |
|
"loss": 1.5994, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 5.49, |
|
"learning_rate": 0.002657142857142857, |
|
"loss": 1.5939, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 5.51, |
|
"learning_rate": 0.002655357142857143, |
|
"loss": 1.6297, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 5.54, |
|
"learning_rate": 0.0026535714285714283, |
|
"loss": 1.6749, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 5.57, |
|
"learning_rate": 0.002651785714285714, |
|
"loss": 1.6525, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"learning_rate": 0.00265, |
|
"loss": 1.657, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 5.63, |
|
"learning_rate": 0.002648214285714286, |
|
"loss": 1.6818, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"learning_rate": 0.0026464285714285713, |
|
"loss": 1.6985, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 5.69, |
|
"learning_rate": 0.002644642857142857, |
|
"loss": 1.7456, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 5.71, |
|
"learning_rate": 0.002642857142857143, |
|
"loss": 1.678, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 5.74, |
|
"learning_rate": 0.0026410714285714284, |
|
"loss": 1.7613, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 5.77, |
|
"learning_rate": 0.0026392857142857142, |
|
"loss": 1.7541, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 5.8, |
|
"learning_rate": 0.0026375, |
|
"loss": 1.798, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 5.83, |
|
"learning_rate": 0.002635714285714286, |
|
"loss": 1.821, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 5.86, |
|
"learning_rate": 0.0026339285714285714, |
|
"loss": 1.8385, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 5.89, |
|
"learning_rate": 0.002632142857142857, |
|
"loss": 1.8613, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 5.91, |
|
"learning_rate": 0.002630357142857143, |
|
"loss": 1.902, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 5.94, |
|
"learning_rate": 0.0026285714285714285, |
|
"loss": 2.0848, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 5.97, |
|
"learning_rate": 0.0026267857142857143, |
|
"loss": 2.3277, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 0.002625, |
|
"loss": 2.8535, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"learning_rate": 0.0026232142857142856, |
|
"loss": 6.2197, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"learning_rate": 0.0026214285714285714, |
|
"loss": 10.2288, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 6.09, |
|
"learning_rate": 0.0026196428571428573, |
|
"loss": 12.5006, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 6.11, |
|
"learning_rate": 0.002617857142857143, |
|
"loss": 10.5184, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 6.14, |
|
"learning_rate": 0.0026160714285714285, |
|
"loss": 9.4834, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 6.17, |
|
"learning_rate": 0.0026142857142857144, |
|
"loss": 16.0513, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"learning_rate": 0.0026125000000000002, |
|
"loss": 11.0576, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 6.23, |
|
"learning_rate": 0.0026107142857142857, |
|
"loss": 15.3574, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 6.26, |
|
"learning_rate": 0.0026089285714285715, |
|
"loss": 15.5239, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 6.29, |
|
"learning_rate": 0.0026071428571428574, |
|
"loss": 15.3973, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 6.31, |
|
"learning_rate": 0.0026053571428571428, |
|
"loss": 12.059, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 6.34, |
|
"learning_rate": 0.0026035714285714286, |
|
"loss": 10.8352, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 6.37, |
|
"learning_rate": 0.0026017857142857145, |
|
"loss": 10.1507, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 0.0026000000000000003, |
|
"loss": 10.651, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 6.43, |
|
"learning_rate": 0.0025982142857142857, |
|
"loss": 9.8363, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 6.46, |
|
"learning_rate": 0.0025964285714285716, |
|
"loss": 9.3673, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 6.49, |
|
"learning_rate": 0.0025946428571428574, |
|
"loss": 9.5433, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 6.51, |
|
"learning_rate": 0.002592857142857143, |
|
"loss": 9.9206, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 6.54, |
|
"learning_rate": 0.0025910714285714287, |
|
"loss": 9.5516, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 6.57, |
|
"learning_rate": 0.0025892857142857145, |
|
"loss": 9.2165, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"learning_rate": 0.0025875000000000004, |
|
"loss": 9.0825, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 6.63, |
|
"learning_rate": 0.002585714285714286, |
|
"loss": 8.7437, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 6.66, |
|
"learning_rate": 0.0025839285714285717, |
|
"loss": 8.6366, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 6.69, |
|
"learning_rate": 0.0025821428571428575, |
|
"loss": 9.7431, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 6.71, |
|
"learning_rate": 0.002580357142857143, |
|
"loss": 8.1876, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 6.74, |
|
"learning_rate": 0.0025785714285714288, |
|
"loss": 8.4559, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 6.77, |
|
"learning_rate": 0.002576785714285714, |
|
"loss": 8.0092, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"learning_rate": 0.002575, |
|
"loss": 8.028, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 6.83, |
|
"learning_rate": 0.0025732142857142854, |
|
"loss": 7.8379, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 6.86, |
|
"learning_rate": 0.0025714285714285713, |
|
"loss": 7.8127, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 6.89, |
|
"learning_rate": 0.002569642857142857, |
|
"loss": 7.8252, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 6.91, |
|
"learning_rate": 0.002567857142857143, |
|
"loss": 7.7094, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 6.94, |
|
"learning_rate": 0.0025660714285714284, |
|
"loss": 7.7962, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 6.97, |
|
"learning_rate": 0.0025642857142857143, |
|
"loss": 7.4966, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 0.0025625, |
|
"loss": 7.4851, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"learning_rate": 0.0025607142857142855, |
|
"loss": 7.5188, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 7.06, |
|
"learning_rate": 0.0025589285714285714, |
|
"loss": 7.7866, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 7.09, |
|
"learning_rate": 0.0025571428571428572, |
|
"loss": 7.5743, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 7.11, |
|
"learning_rate": 0.0025553571428571426, |
|
"loss": 7.4608, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"learning_rate": 0.0025535714285714285, |
|
"loss": 7.4655, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 7.17, |
|
"learning_rate": 0.0025517857142857143, |
|
"loss": 7.5474, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"learning_rate": 0.00255, |
|
"loss": 7.6983, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 7.23, |
|
"learning_rate": 0.0025482142857142856, |
|
"loss": 7.4936, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 7.26, |
|
"learning_rate": 0.0025464285714285714, |
|
"loss": 7.6966, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 7.29, |
|
"learning_rate": 0.0025446428571428573, |
|
"loss": 7.4701, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 7.31, |
|
"learning_rate": 0.0025428571428571427, |
|
"loss": 7.511, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 7.34, |
|
"learning_rate": 0.0025410714285714286, |
|
"loss": 7.3709, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 7.37, |
|
"learning_rate": 0.0025392857142857144, |
|
"loss": 7.4582, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 7.4, |
|
"learning_rate": 0.0025375, |
|
"loss": 7.4263, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 7.43, |
|
"learning_rate": 0.0025357142857142857, |
|
"loss": 7.3134, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 7.46, |
|
"learning_rate": 0.0025339285714285715, |
|
"loss": 7.3849, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 7.49, |
|
"learning_rate": 0.0025321428571428574, |
|
"loss": 7.292, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 7.51, |
|
"learning_rate": 0.002530357142857143, |
|
"loss": 7.343, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 7.54, |
|
"learning_rate": 0.0025285714285714286, |
|
"loss": 7.3166, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 7.57, |
|
"learning_rate": 0.0025267857142857145, |
|
"loss": 7.2676, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 7.6, |
|
"learning_rate": 0.002525, |
|
"loss": 7.2955, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 7.63, |
|
"learning_rate": 0.0025232142857142857, |
|
"loss": 7.3386, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 7.66, |
|
"learning_rate": 0.0025214285714285716, |
|
"loss": 7.2682, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 7.69, |
|
"learning_rate": 0.0025196428571428574, |
|
"loss": 7.2359, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 7.71, |
|
"learning_rate": 0.002517857142857143, |
|
"loss": 7.1849, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 7.74, |
|
"learning_rate": 0.0025160714285714287, |
|
"loss": 7.2421, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 7.77, |
|
"learning_rate": 0.0025142857142857146, |
|
"loss": 7.2341, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 7.8, |
|
"learning_rate": 0.0025125, |
|
"loss": 7.2901, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 7.83, |
|
"learning_rate": 0.002510714285714286, |
|
"loss": 7.1931, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 7.86, |
|
"learning_rate": 0.0025089285714285717, |
|
"loss": 7.1907, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 7.89, |
|
"learning_rate": 0.002507142857142857, |
|
"loss": 7.2369, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 7.91, |
|
"learning_rate": 0.002505357142857143, |
|
"loss": 7.1764, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 7.94, |
|
"learning_rate": 0.002503571428571429, |
|
"loss": 7.1928, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 7.97, |
|
"learning_rate": 0.0025017857142857146, |
|
"loss": 7.2114, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 0.0025, |
|
"loss": 7.2307, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 8.03, |
|
"learning_rate": 0.002498214285714286, |
|
"loss": 7.2477, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"learning_rate": 0.0024964285714285718, |
|
"loss": 7.2069, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 8.09, |
|
"learning_rate": 0.002494642857142857, |
|
"loss": 7.1484, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 8.11, |
|
"learning_rate": 0.002492857142857143, |
|
"loss": 7.1076, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 8.14, |
|
"learning_rate": 0.002491071428571429, |
|
"loss": 7.0819, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 8.17, |
|
"learning_rate": 0.0024892857142857143, |
|
"loss": 7.0708, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 8.2, |
|
"learning_rate": 0.0024875, |
|
"loss": 7.0763, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 8.23, |
|
"learning_rate": 0.002485714285714286, |
|
"loss": 7.0792, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 8.26, |
|
"learning_rate": 0.0024839285714285714, |
|
"loss": 7.1397, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 8.29, |
|
"learning_rate": 0.0024821428571428572, |
|
"loss": 7.0893, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 8.31, |
|
"learning_rate": 0.0024803571428571427, |
|
"loss": 7.1263, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 8.34, |
|
"learning_rate": 0.0024785714285714285, |
|
"loss": 7.0226, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 8.37, |
|
"learning_rate": 0.0024767857142857144, |
|
"loss": 7.1017, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"learning_rate": 0.0024749999999999998, |
|
"loss": 7.0161, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 8.43, |
|
"learning_rate": 0.0024732142857142856, |
|
"loss": 7.117, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 8.46, |
|
"learning_rate": 0.0024714285714285715, |
|
"loss": 7.0234, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 8.49, |
|
"learning_rate": 0.002469642857142857, |
|
"loss": 7.0663, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 8.51, |
|
"learning_rate": 0.0024678571428571427, |
|
"loss": 7.1604, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 8.54, |
|
"learning_rate": 0.0024660714285714286, |
|
"loss": 7.0543, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 8.57, |
|
"learning_rate": 0.0024642857142857144, |
|
"loss": 7.0131, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 8.6, |
|
"learning_rate": 0.0024625, |
|
"loss": 7.0294, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 8.63, |
|
"learning_rate": 0.0024607142857142857, |
|
"loss": 7.0273, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 8.66, |
|
"learning_rate": 0.0024589285714285715, |
|
"loss": 7.0074, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 8.69, |
|
"learning_rate": 0.002457142857142857, |
|
"loss": 6.9747, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 8.71, |
|
"learning_rate": 0.002455357142857143, |
|
"loss": 7.0617, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 8.74, |
|
"learning_rate": 0.0024535714285714287, |
|
"loss": 7.0907, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 8.77, |
|
"learning_rate": 0.002451785714285714, |
|
"loss": 7.0037, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"learning_rate": 0.00245, |
|
"loss": 6.969, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 8.83, |
|
"learning_rate": 0.0024482142857142858, |
|
"loss": 7.0575, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 8.86, |
|
"learning_rate": 0.0024464285714285716, |
|
"loss": 6.9494, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 8.89, |
|
"learning_rate": 0.002444642857142857, |
|
"loss": 6.969, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 8.91, |
|
"learning_rate": 0.002442857142857143, |
|
"loss": 6.8827, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 8.94, |
|
"learning_rate": 0.0024410714285714287, |
|
"loss": 6.9058, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 8.97, |
|
"learning_rate": 0.002439285714285714, |
|
"loss": 6.8808, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 0.0024375, |
|
"loss": 6.9516, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"learning_rate": 0.002435714285714286, |
|
"loss": 6.9132, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 9.06, |
|
"learning_rate": 0.0024339285714285717, |
|
"loss": 6.9058, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"learning_rate": 0.002432142857142857, |
|
"loss": 6.9332, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 9.11, |
|
"learning_rate": 0.002430357142857143, |
|
"loss": 6.9757, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 9.14, |
|
"learning_rate": 0.002428571428571429, |
|
"loss": 6.8261, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 9.17, |
|
"learning_rate": 0.0024267857142857142, |
|
"loss": 6.8571, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 9.2, |
|
"learning_rate": 0.002425, |
|
"loss": 6.8435, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 9.23, |
|
"learning_rate": 0.002423214285714286, |
|
"loss": 6.9033, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 9.26, |
|
"learning_rate": 0.0024214285714285713, |
|
"loss": 6.8042, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 9.29, |
|
"learning_rate": 0.002419642857142857, |
|
"loss": 6.8732, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 9.31, |
|
"learning_rate": 0.002417857142857143, |
|
"loss": 6.752, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 9.34, |
|
"learning_rate": 0.002416071428571429, |
|
"loss": 6.8016, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 9.37, |
|
"learning_rate": 0.0024142857142857143, |
|
"loss": 6.8879, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 9.4, |
|
"learning_rate": 0.0024125, |
|
"loss": 6.7643, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 9.43, |
|
"learning_rate": 0.002410714285714286, |
|
"loss": 6.7084, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 9.46, |
|
"learning_rate": 0.0024089285714285714, |
|
"loss": 6.8049, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 9.49, |
|
"learning_rate": 0.0024071428571428573, |
|
"loss": 6.7925, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 9.51, |
|
"learning_rate": 0.002405357142857143, |
|
"loss": 6.7289, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 9.54, |
|
"learning_rate": 0.0024035714285714285, |
|
"loss": 6.7439, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"learning_rate": 0.0024017857142857144, |
|
"loss": 6.7119, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"learning_rate": 0.0024000000000000002, |
|
"loss": 6.7251, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 9.63, |
|
"learning_rate": 0.002398214285714286, |
|
"loss": 6.6659, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 9.66, |
|
"learning_rate": 0.0023964285714285715, |
|
"loss": 6.7422, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 9.69, |
|
"learning_rate": 0.0023946428571428573, |
|
"loss": 6.7852, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 9.71, |
|
"learning_rate": 0.002392857142857143, |
|
"loss": 6.6828, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 9.74, |
|
"learning_rate": 0.0023910714285714286, |
|
"loss": 6.686, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 9.77, |
|
"learning_rate": 0.002389285714285714, |
|
"loss": 6.7326, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 9.8, |
|
"learning_rate": 0.0023875, |
|
"loss": 6.5601, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 9.83, |
|
"learning_rate": 0.0023857142857142857, |
|
"loss": 6.6646, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 9.86, |
|
"learning_rate": 0.002383928571428571, |
|
"loss": 6.5673, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 9.89, |
|
"learning_rate": 0.002382142857142857, |
|
"loss": 6.6227, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 9.91, |
|
"learning_rate": 0.002380357142857143, |
|
"loss": 6.5526, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 9.94, |
|
"learning_rate": 0.0023785714285714287, |
|
"loss": 6.6842, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 9.97, |
|
"learning_rate": 0.002376785714285714, |
|
"loss": 6.6211, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 0.002375, |
|
"loss": 6.6952, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 10.03, |
|
"learning_rate": 0.002373214285714286, |
|
"loss": 6.5324, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 10.06, |
|
"learning_rate": 0.002371428571428571, |
|
"loss": 6.5792, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 10.09, |
|
"learning_rate": 0.002369642857142857, |
|
"loss": 6.5276, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 10.11, |
|
"learning_rate": 0.002367857142857143, |
|
"loss": 6.5634, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 10.14, |
|
"learning_rate": 0.0023660714285714288, |
|
"loss": 6.5385, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 10.17, |
|
"learning_rate": 0.002364285714285714, |
|
"loss": 6.4516, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 10.2, |
|
"learning_rate": 0.0023625, |
|
"loss": 6.5641, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 10.23, |
|
"learning_rate": 0.002360714285714286, |
|
"loss": 6.5001, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 10.26, |
|
"learning_rate": 0.0023589285714285713, |
|
"loss": 6.4846, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 10.29, |
|
"learning_rate": 0.002357142857142857, |
|
"loss": 6.4638, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 10.31, |
|
"learning_rate": 0.002355357142857143, |
|
"loss": 6.5217, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 10.34, |
|
"learning_rate": 0.0023535714285714284, |
|
"loss": 6.5444, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 10.37, |
|
"learning_rate": 0.0023517857142857142, |
|
"loss": 6.496, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 10.4, |
|
"learning_rate": 0.00235, |
|
"loss": 6.5345, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 10.43, |
|
"learning_rate": 0.002348214285714286, |
|
"loss": 6.4732, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 10.46, |
|
"learning_rate": 0.0023464285714285714, |
|
"loss": 6.4765, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 10.49, |
|
"learning_rate": 0.002344642857142857, |
|
"loss": 6.3881, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 10.51, |
|
"learning_rate": 0.002342857142857143, |
|
"loss": 6.4908, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 10.54, |
|
"learning_rate": 0.0023410714285714285, |
|
"loss": 6.4593, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 10.57, |
|
"learning_rate": 0.0023392857142857143, |
|
"loss": 6.5006, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 10.6, |
|
"learning_rate": 0.0023375, |
|
"loss": 6.4495, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 10.63, |
|
"learning_rate": 0.0023357142857142856, |
|
"loss": 6.3569, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 10.66, |
|
"learning_rate": 0.0023339285714285714, |
|
"loss": 6.3592, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 10.69, |
|
"learning_rate": 0.0023321428571428573, |
|
"loss": 6.3258, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 10.71, |
|
"learning_rate": 0.002330357142857143, |
|
"loss": 6.3216, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 10.74, |
|
"learning_rate": 0.0023285714285714285, |
|
"loss": 6.4878, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 10.77, |
|
"learning_rate": 0.0023267857142857144, |
|
"loss": 6.3412, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 10.8, |
|
"learning_rate": 0.0023250000000000002, |
|
"loss": 6.3925, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 10.83, |
|
"learning_rate": 0.0023232142857142857, |
|
"loss": 6.275, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 10.86, |
|
"learning_rate": 0.0023214285714285715, |
|
"loss": 6.3575, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 10.89, |
|
"learning_rate": 0.0023196428571428574, |
|
"loss": 6.3259, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 10.91, |
|
"learning_rate": 0.002317857142857143, |
|
"loss": 6.315, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 10.94, |
|
"learning_rate": 0.0023160714285714286, |
|
"loss": 6.277, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 10.97, |
|
"learning_rate": 0.0023142857142857145, |
|
"loss": 6.3259, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 0.0023125000000000003, |
|
"loss": 6.3747, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 11.03, |
|
"learning_rate": 0.0023107142857142857, |
|
"loss": 6.3646, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 11.06, |
|
"learning_rate": 0.0023089285714285716, |
|
"loss": 6.3687, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 11.09, |
|
"learning_rate": 0.0023071428571428574, |
|
"loss": 6.3374, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 11.11, |
|
"learning_rate": 0.002305357142857143, |
|
"loss": 6.3129, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 11.14, |
|
"learning_rate": 0.0023035714285714287, |
|
"loss": 6.3425, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 11.17, |
|
"learning_rate": 0.0023017857142857145, |
|
"loss": 6.2122, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 11.2, |
|
"learning_rate": 0.0023000000000000004, |
|
"loss": 6.2768, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 11.23, |
|
"learning_rate": 0.002298214285714286, |
|
"loss": 6.2853, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 11.26, |
|
"learning_rate": 0.0022964285714285712, |
|
"loss": 6.3215, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 11.29, |
|
"learning_rate": 0.002294642857142857, |
|
"loss": 6.3244, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 11.31, |
|
"learning_rate": 0.002292857142857143, |
|
"loss": 6.2399, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 11.34, |
|
"learning_rate": 0.0022910714285714283, |
|
"loss": 6.2457, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 11.37, |
|
"learning_rate": 0.002289285714285714, |
|
"loss": 6.2018, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 11.4, |
|
"learning_rate": 0.0022875, |
|
"loss": 6.2101, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 11.43, |
|
"learning_rate": 0.0022857142857142855, |
|
"loss": 6.2257, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 11.46, |
|
"learning_rate": 0.0022839285714285713, |
|
"loss": 6.3029, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 11.49, |
|
"learning_rate": 0.002282142857142857, |
|
"loss": 6.2312, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 11.51, |
|
"learning_rate": 0.002280357142857143, |
|
"loss": 6.203, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 11.54, |
|
"learning_rate": 0.0022785714285714284, |
|
"loss": 6.2881, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 11.57, |
|
"learning_rate": 0.0022767857142857143, |
|
"loss": 6.3466, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 11.6, |
|
"learning_rate": 0.002275, |
|
"loss": 6.1908, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 11.63, |
|
"learning_rate": 0.0022732142857142855, |
|
"loss": 6.196, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 11.66, |
|
"learning_rate": 0.0022714285714285714, |
|
"loss": 6.1726, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 11.69, |
|
"learning_rate": 0.0022696428571428572, |
|
"loss": 6.1207, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 11.71, |
|
"learning_rate": 0.0022678571428571426, |
|
"loss": 6.2382, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 11.74, |
|
"learning_rate": 0.0022660714285714285, |
|
"loss": 6.1757, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 11.77, |
|
"learning_rate": 0.0022642857142857143, |
|
"loss": 6.1153, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 11.8, |
|
"learning_rate": 0.0022625, |
|
"loss": 6.1261, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 11.83, |
|
"learning_rate": 0.0022607142857142856, |
|
"loss": 6.0762, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 11.86, |
|
"learning_rate": 0.0022589285714285715, |
|
"loss": 6.1386, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 11.89, |
|
"learning_rate": 0.0022571428571428573, |
|
"loss": 6.1204, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 11.91, |
|
"learning_rate": 0.0022553571428571427, |
|
"loss": 6.1059, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 11.94, |
|
"learning_rate": 0.0022535714285714286, |
|
"loss": 6.0591, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 11.97, |
|
"learning_rate": 0.0022517857142857144, |
|
"loss": 6.1713, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 0.0022500000000000003, |
|
"loss": 6.2039, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 12.03, |
|
"learning_rate": 0.0022482142857142857, |
|
"loss": 6.0168, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 12.06, |
|
"learning_rate": 0.0022464285714285715, |
|
"loss": 6.0206, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 12.09, |
|
"learning_rate": 0.0022446428571428574, |
|
"loss": 6.0642, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 12.11, |
|
"learning_rate": 0.002242857142857143, |
|
"loss": 6.0665, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 12.14, |
|
"learning_rate": 0.0022410714285714286, |
|
"loss": 5.9766, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 12.17, |
|
"learning_rate": 0.0022392857142857145, |
|
"loss": 6.2167, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 12.2, |
|
"learning_rate": 0.0022375, |
|
"loss": 6.002, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 12.23, |
|
"learning_rate": 0.0022357142857142858, |
|
"loss": 6.0266, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 12.26, |
|
"learning_rate": 0.0022339285714285716, |
|
"loss": 5.9339, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 12.29, |
|
"learning_rate": 0.0022321428571428575, |
|
"loss": 6.1066, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 12.31, |
|
"learning_rate": 0.002230357142857143, |
|
"loss": 5.9262, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 12.34, |
|
"learning_rate": 0.0022285714285714287, |
|
"loss": 6.0696, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 12.37, |
|
"learning_rate": 0.0022267857142857146, |
|
"loss": 5.9181, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 12.4, |
|
"learning_rate": 0.002225, |
|
"loss": 6.0291, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 12.43, |
|
"learning_rate": 0.002223214285714286, |
|
"loss": 5.9493, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 12.46, |
|
"learning_rate": 0.0022214285714285717, |
|
"loss": 5.9639, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 12.49, |
|
"learning_rate": 0.002219642857142857, |
|
"loss": 6.0303, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 12.51, |
|
"learning_rate": 0.002217857142857143, |
|
"loss": 6.0157, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 12.54, |
|
"learning_rate": 0.002216071428571429, |
|
"loss": 5.9309, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 12.57, |
|
"learning_rate": 0.0022142857142857146, |
|
"loss": 5.9554, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 12.6, |
|
"learning_rate": 0.0022125, |
|
"loss": 5.9761, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 12.63, |
|
"learning_rate": 0.002210714285714286, |
|
"loss": 5.9042, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 12.66, |
|
"learning_rate": 0.0022089285714285718, |
|
"loss": 6.0009, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 12.69, |
|
"learning_rate": 0.002207142857142857, |
|
"loss": 5.9199, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 12.71, |
|
"learning_rate": 0.002205357142857143, |
|
"loss": 5.9472, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 12.74, |
|
"learning_rate": 0.002203571428571429, |
|
"loss": 6.0478, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 12.77, |
|
"learning_rate": 0.0022017857142857143, |
|
"loss": 6.0131, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 12.8, |
|
"learning_rate": 0.0021999999999999997, |
|
"loss": 5.9161, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 12.83, |
|
"learning_rate": 0.0021982142857142855, |
|
"loss": 5.935, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 12.86, |
|
"learning_rate": 0.0021964285714285714, |
|
"loss": 5.9035, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 12.89, |
|
"learning_rate": 0.0021946428571428572, |
|
"loss": 5.9422, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 12.91, |
|
"learning_rate": 0.0021928571428571427, |
|
"loss": 6.0135, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 12.94, |
|
"learning_rate": 0.0021910714285714285, |
|
"loss": 5.9757, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 12.97, |
|
"learning_rate": 0.0021892857142857144, |
|
"loss": 5.942, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 0.0021874999999999998, |
|
"loss": 5.943, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 13.03, |
|
"learning_rate": 0.0021857142857142856, |
|
"loss": 5.8982, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 13.06, |
|
"learning_rate": 0.0021839285714285715, |
|
"loss": 5.9874, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 13.09, |
|
"learning_rate": 0.002182142857142857, |
|
"loss": 5.8677, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 13.11, |
|
"learning_rate": 0.0021803571428571427, |
|
"loss": 5.8782, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 13.14, |
|
"learning_rate": 0.0021785714285714286, |
|
"loss": 5.787, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 13.17, |
|
"learning_rate": 0.0021767857142857144, |
|
"loss": 5.8339, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 13.2, |
|
"learning_rate": 0.002175, |
|
"loss": 5.8303, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 13.23, |
|
"learning_rate": 0.0021732142857142857, |
|
"loss": 5.8187, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 13.26, |
|
"learning_rate": 0.0021714285714285715, |
|
"loss": 5.7448, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 13.29, |
|
"learning_rate": 0.002169642857142857, |
|
"loss": 5.8681, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 13.31, |
|
"learning_rate": 0.002167857142857143, |
|
"loss": 5.8039, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 13.34, |
|
"learning_rate": 0.0021660714285714287, |
|
"loss": 5.8511, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 13.37, |
|
"learning_rate": 0.0021642857142857145, |
|
"loss": 5.8184, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 13.4, |
|
"learning_rate": 0.0021625, |
|
"loss": 5.7656, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 13.43, |
|
"learning_rate": 0.0021607142857142858, |
|
"loss": 5.8613, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 13.46, |
|
"learning_rate": 0.0021589285714285716, |
|
"loss": 5.849, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 13.49, |
|
"learning_rate": 0.002157142857142857, |
|
"loss": 5.8011, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 13.51, |
|
"learning_rate": 0.002155357142857143, |
|
"loss": 5.7813, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 13.54, |
|
"learning_rate": 0.0021535714285714287, |
|
"loss": 5.8186, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 13.57, |
|
"learning_rate": 0.002151785714285714, |
|
"loss": 5.8303, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 13.6, |
|
"learning_rate": 0.00215, |
|
"loss": 5.7879, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 13.63, |
|
"learning_rate": 0.002148214285714286, |
|
"loss": 5.6829, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 13.66, |
|
"learning_rate": 0.0021464285714285717, |
|
"loss": 5.7869, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 13.69, |
|
"learning_rate": 0.002144642857142857, |
|
"loss": 5.6489, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 13.71, |
|
"learning_rate": 0.002142857142857143, |
|
"loss": 5.8708, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 13.74, |
|
"learning_rate": 0.002141071428571429, |
|
"loss": 5.7791, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 13.77, |
|
"learning_rate": 0.0021392857142857142, |
|
"loss": 5.7497, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 13.8, |
|
"learning_rate": 0.0021375, |
|
"loss": 5.827, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 13.83, |
|
"learning_rate": 0.002135714285714286, |
|
"loss": 5.7286, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 13.86, |
|
"learning_rate": 0.0021339285714285713, |
|
"loss": 5.8183, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 13.89, |
|
"learning_rate": 0.002132142857142857, |
|
"loss": 5.7191, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 13.91, |
|
"learning_rate": 0.002130357142857143, |
|
"loss": 5.7647, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 13.94, |
|
"learning_rate": 0.002128571428571429, |
|
"loss": 5.799, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 13.97, |
|
"learning_rate": 0.0021267857142857143, |
|
"loss": 5.7583, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 0.002125, |
|
"loss": 5.6326, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 14.03, |
|
"learning_rate": 0.002123214285714286, |
|
"loss": 5.614, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 14.06, |
|
"learning_rate": 0.0021214285714285714, |
|
"loss": 5.7278, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 14.09, |
|
"learning_rate": 0.0021196428571428573, |
|
"loss": 5.6661, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 14.11, |
|
"learning_rate": 0.002117857142857143, |
|
"loss": 5.6822, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 14.14, |
|
"learning_rate": 0.002116071428571429, |
|
"loss": 5.7356, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 14.17, |
|
"learning_rate": 0.0021142857142857144, |
|
"loss": 5.6169, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 14.2, |
|
"learning_rate": 0.0021125000000000002, |
|
"loss": 5.7203, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 14.23, |
|
"learning_rate": 0.002110714285714286, |
|
"loss": 5.6377, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 14.26, |
|
"learning_rate": 0.0021089285714285715, |
|
"loss": 5.6836, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 14.29, |
|
"learning_rate": 0.002107142857142857, |
|
"loss": 5.6531, |
|
"step": 500 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 1680, |
|
"num_train_epochs": 48, |
|
"save_steps": 100, |
|
"total_flos": 2.884638740186112e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|