|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.977777777777778, |
|
"eval_steps": 500, |
|
"global_step": 336, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.008888888888888889, |
|
"grad_norm": 0.12853842973709106, |
|
"learning_rate": 4e-05, |
|
"loss": 1.435, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.017777777777777778, |
|
"grad_norm": 0.1351887285709381, |
|
"learning_rate": 8e-05, |
|
"loss": 1.4834, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.02666666666666667, |
|
"grad_norm": 0.1353459358215332, |
|
"learning_rate": 0.00012, |
|
"loss": 1.4884, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.035555555555555556, |
|
"grad_norm": 0.15532286465168, |
|
"learning_rate": 0.00016, |
|
"loss": 1.4011, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.044444444444444446, |
|
"grad_norm": 0.18807101249694824, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2824, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.05333333333333334, |
|
"grad_norm": 0.21187615394592285, |
|
"learning_rate": 0.00019939577039274927, |
|
"loss": 1.1333, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.06222222222222222, |
|
"grad_norm": 0.2363642305135727, |
|
"learning_rate": 0.0001987915407854985, |
|
"loss": 1.1263, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.07111111111111111, |
|
"grad_norm": 0.23471707105636597, |
|
"learning_rate": 0.00019818731117824773, |
|
"loss": 0.9295, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.2460624724626541, |
|
"learning_rate": 0.00019758308157099698, |
|
"loss": 0.7405, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.08888888888888889, |
|
"grad_norm": 0.7134775519371033, |
|
"learning_rate": 0.00019697885196374624, |
|
"loss": 0.6167, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.09777777777777778, |
|
"grad_norm": 0.2952987551689148, |
|
"learning_rate": 0.0001963746223564955, |
|
"loss": 0.4617, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.10666666666666667, |
|
"grad_norm": 0.2638149559497833, |
|
"learning_rate": 0.00019577039274924472, |
|
"loss": 0.3641, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.11555555555555555, |
|
"grad_norm": 0.21738190948963165, |
|
"learning_rate": 0.00019516616314199395, |
|
"loss": 0.2669, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.12444444444444444, |
|
"grad_norm": 0.15935368835926056, |
|
"learning_rate": 0.0001945619335347432, |
|
"loss": 0.1899, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.13333333333333333, |
|
"grad_norm": 0.11436018347740173, |
|
"learning_rate": 0.00019395770392749247, |
|
"loss": 0.1529, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.14222222222222222, |
|
"grad_norm": 0.11110496520996094, |
|
"learning_rate": 0.00019335347432024172, |
|
"loss": 0.1928, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.1511111111111111, |
|
"grad_norm": 0.09791559725999832, |
|
"learning_rate": 0.00019274924471299093, |
|
"loss": 0.2024, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.07391936331987381, |
|
"learning_rate": 0.00019214501510574018, |
|
"loss": 0.1278, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.1688888888888889, |
|
"grad_norm": 0.08692853152751923, |
|
"learning_rate": 0.00019154078549848944, |
|
"loss": 0.1588, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.17777777777777778, |
|
"grad_norm": 0.08260414749383926, |
|
"learning_rate": 0.0001909365558912387, |
|
"loss": 0.1765, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.18666666666666668, |
|
"grad_norm": 0.08524943888187408, |
|
"learning_rate": 0.00019033232628398793, |
|
"loss": 0.2432, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.19555555555555557, |
|
"grad_norm": 0.08510305732488632, |
|
"learning_rate": 0.00018972809667673716, |
|
"loss": 0.1527, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.20444444444444446, |
|
"grad_norm": 0.07516520470380783, |
|
"learning_rate": 0.0001891238670694864, |
|
"loss": 0.1394, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.21333333333333335, |
|
"grad_norm": 0.08073515444993973, |
|
"learning_rate": 0.00018851963746223567, |
|
"loss": 0.1754, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.2222222222222222, |
|
"grad_norm": 0.06894166767597198, |
|
"learning_rate": 0.0001879154078549849, |
|
"loss": 0.1352, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.2311111111111111, |
|
"grad_norm": 0.07681784778833389, |
|
"learning_rate": 0.00018731117824773416, |
|
"loss": 0.1514, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.06964302062988281, |
|
"learning_rate": 0.00018670694864048338, |
|
"loss": 0.1408, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.24888888888888888, |
|
"grad_norm": 0.08564883470535278, |
|
"learning_rate": 0.00018610271903323264, |
|
"loss": 0.194, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.2577777777777778, |
|
"grad_norm": 0.07006793469190598, |
|
"learning_rate": 0.0001854984894259819, |
|
"loss": 0.1454, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.26666666666666666, |
|
"grad_norm": 0.07050830870866776, |
|
"learning_rate": 0.00018489425981873113, |
|
"loss": 0.134, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.27555555555555555, |
|
"grad_norm": 0.0687691867351532, |
|
"learning_rate": 0.00018429003021148036, |
|
"loss": 0.122, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.28444444444444444, |
|
"grad_norm": 0.8294079899787903, |
|
"learning_rate": 0.00018368580060422961, |
|
"loss": 0.1737, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.29333333333333333, |
|
"grad_norm": 0.0775313451886177, |
|
"learning_rate": 0.00018308157099697887, |
|
"loss": 0.1249, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.3022222222222222, |
|
"grad_norm": 0.08075764775276184, |
|
"learning_rate": 0.0001824773413897281, |
|
"loss": 0.1596, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.3111111111111111, |
|
"grad_norm": 0.09499762207269669, |
|
"learning_rate": 0.00018187311178247736, |
|
"loss": 0.1788, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.09979691356420517, |
|
"learning_rate": 0.00018126888217522659, |
|
"loss": 0.1286, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.3288888888888889, |
|
"grad_norm": 0.1078299731016159, |
|
"learning_rate": 0.00018066465256797584, |
|
"loss": 0.1473, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.3377777777777778, |
|
"grad_norm": 0.11813590675592422, |
|
"learning_rate": 0.0001800604229607251, |
|
"loss": 0.1577, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.3466666666666667, |
|
"grad_norm": 0.12400685250759125, |
|
"learning_rate": 0.00017945619335347433, |
|
"loss": 0.1554, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.35555555555555557, |
|
"grad_norm": 0.13364343345165253, |
|
"learning_rate": 0.00017885196374622359, |
|
"loss": 0.1937, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.36444444444444446, |
|
"grad_norm": 0.1314571350812912, |
|
"learning_rate": 0.00017824773413897281, |
|
"loss": 0.1417, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.37333333333333335, |
|
"grad_norm": 0.14195436239242554, |
|
"learning_rate": 0.00017764350453172207, |
|
"loss": 0.1471, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.38222222222222224, |
|
"grad_norm": 0.14103101193904877, |
|
"learning_rate": 0.0001770392749244713, |
|
"loss": 0.1512, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.39111111111111113, |
|
"grad_norm": 0.15249131619930267, |
|
"learning_rate": 0.00017643504531722056, |
|
"loss": 0.1261, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.16161827743053436, |
|
"learning_rate": 0.0001758308157099698, |
|
"loss": 0.1361, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.4088888888888889, |
|
"grad_norm": 0.15586595237255096, |
|
"learning_rate": 0.00017522658610271904, |
|
"loss": 0.1368, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.4177777777777778, |
|
"grad_norm": 0.14314575493335724, |
|
"learning_rate": 0.00017462235649546827, |
|
"loss": 0.1449, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.4266666666666667, |
|
"grad_norm": 0.11395762860774994, |
|
"learning_rate": 0.00017401812688821753, |
|
"loss": 0.1285, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.43555555555555553, |
|
"grad_norm": 0.09165538102388382, |
|
"learning_rate": 0.0001734138972809668, |
|
"loss": 0.1531, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.4444444444444444, |
|
"grad_norm": 0.08022209256887436, |
|
"learning_rate": 0.00017280966767371602, |
|
"loss": 0.1078, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.4533333333333333, |
|
"grad_norm": 0.08753547072410583, |
|
"learning_rate": 0.00017220543806646527, |
|
"loss": 0.1511, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.4622222222222222, |
|
"grad_norm": 0.09489995241165161, |
|
"learning_rate": 0.0001716012084592145, |
|
"loss": 0.1109, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.4711111111111111, |
|
"grad_norm": 0.09574859589338303, |
|
"learning_rate": 0.00017099697885196376, |
|
"loss": 0.112, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.10898902267217636, |
|
"learning_rate": 0.000170392749244713, |
|
"loss": 0.1055, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.4888888888888889, |
|
"grad_norm": 0.1365131437778473, |
|
"learning_rate": 0.00016978851963746225, |
|
"loss": 0.1231, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.49777777777777776, |
|
"grad_norm": 0.13064222037792206, |
|
"learning_rate": 0.00016918429003021147, |
|
"loss": 0.1426, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.5066666666666667, |
|
"grad_norm": 0.15198342502117157, |
|
"learning_rate": 0.00016858006042296073, |
|
"loss": 0.1152, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.5155555555555555, |
|
"grad_norm": 0.14036396145820618, |
|
"learning_rate": 0.00016797583081571, |
|
"loss": 0.0877, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.5244444444444445, |
|
"grad_norm": 0.12717780470848083, |
|
"learning_rate": 0.00016737160120845922, |
|
"loss": 0.1527, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.5333333333333333, |
|
"grad_norm": 0.09991107136011124, |
|
"learning_rate": 0.00016676737160120847, |
|
"loss": 0.1184, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.5422222222222223, |
|
"grad_norm": 0.08420077711343765, |
|
"learning_rate": 0.0001661631419939577, |
|
"loss": 0.1162, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.5511111111111111, |
|
"grad_norm": 0.07980040460824966, |
|
"learning_rate": 0.00016555891238670696, |
|
"loss": 0.1154, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.07247896492481232, |
|
"learning_rate": 0.00016495468277945622, |
|
"loss": 0.1431, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.5688888888888889, |
|
"grad_norm": 0.06688614189624786, |
|
"learning_rate": 0.00016435045317220545, |
|
"loss": 0.1054, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.5777777777777777, |
|
"grad_norm": 0.059071995317935944, |
|
"learning_rate": 0.00016374622356495468, |
|
"loss": 0.0957, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.5866666666666667, |
|
"grad_norm": 0.05386095494031906, |
|
"learning_rate": 0.00016314199395770393, |
|
"loss": 0.1225, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.5955555555555555, |
|
"grad_norm": 0.04684595763683319, |
|
"learning_rate": 0.0001625377643504532, |
|
"loss": 0.0944, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.6044444444444445, |
|
"grad_norm": 0.0526355542242527, |
|
"learning_rate": 0.00016193353474320242, |
|
"loss": 0.1067, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.6133333333333333, |
|
"grad_norm": 0.05670945346355438, |
|
"learning_rate": 0.00016132930513595165, |
|
"loss": 0.1201, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.6222222222222222, |
|
"grad_norm": 0.056082066148519516, |
|
"learning_rate": 0.0001607250755287009, |
|
"loss": 0.1068, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.6311111111111111, |
|
"grad_norm": 0.044239770621061325, |
|
"learning_rate": 0.00016012084592145016, |
|
"loss": 0.0701, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.06141743063926697, |
|
"learning_rate": 0.00015951661631419942, |
|
"loss": 0.1047, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.6488888888888888, |
|
"grad_norm": 0.06362797319889069, |
|
"learning_rate": 0.00015891238670694865, |
|
"loss": 0.15, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.6577777777777778, |
|
"grad_norm": 0.057470619678497314, |
|
"learning_rate": 0.00015830815709969788, |
|
"loss": 0.1157, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.6666666666666666, |
|
"grad_norm": 0.057423558086156845, |
|
"learning_rate": 0.00015770392749244713, |
|
"loss": 0.1046, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.6755555555555556, |
|
"grad_norm": 0.052142687141895294, |
|
"learning_rate": 0.0001570996978851964, |
|
"loss": 0.0964, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.6844444444444444, |
|
"grad_norm": 0.06471661478281021, |
|
"learning_rate": 0.00015649546827794562, |
|
"loss": 0.188, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.6933333333333334, |
|
"grad_norm": 0.054023627191782, |
|
"learning_rate": 0.00015589123867069485, |
|
"loss": 0.1177, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.7022222222222222, |
|
"grad_norm": 0.051102183759212494, |
|
"learning_rate": 0.0001552870090634441, |
|
"loss": 0.1012, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.7111111111111111, |
|
"grad_norm": 0.06166974827647209, |
|
"learning_rate": 0.00015468277945619336, |
|
"loss": 0.1576, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.049090635031461716, |
|
"learning_rate": 0.00015407854984894262, |
|
"loss": 0.1162, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.7288888888888889, |
|
"grad_norm": 0.05077630653977394, |
|
"learning_rate": 0.00015347432024169185, |
|
"loss": 0.0921, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.7377777777777778, |
|
"grad_norm": 0.0594823993742466, |
|
"learning_rate": 0.00015287009063444108, |
|
"loss": 0.0842, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.7466666666666667, |
|
"grad_norm": 0.049247365444898605, |
|
"learning_rate": 0.00015226586102719034, |
|
"loss": 0.087, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.7555555555555555, |
|
"grad_norm": 0.058970123529434204, |
|
"learning_rate": 0.0001516616314199396, |
|
"loss": 0.1541, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.7644444444444445, |
|
"grad_norm": 0.07077732682228088, |
|
"learning_rate": 0.00015105740181268885, |
|
"loss": 0.1124, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.7733333333333333, |
|
"grad_norm": 0.04939179867506027, |
|
"learning_rate": 0.00015045317220543805, |
|
"loss": 0.0978, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.7822222222222223, |
|
"grad_norm": 0.06014474108815193, |
|
"learning_rate": 0.0001498489425981873, |
|
"loss": 0.1195, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.7911111111111111, |
|
"grad_norm": 0.05583404749631882, |
|
"learning_rate": 0.00014924471299093656, |
|
"loss": 0.141, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.053725555539131165, |
|
"learning_rate": 0.00014864048338368582, |
|
"loss": 0.1149, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.8088888888888889, |
|
"grad_norm": 0.044657181948423386, |
|
"learning_rate": 0.00014803625377643505, |
|
"loss": 0.0721, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.8177777777777778, |
|
"grad_norm": 0.06342475116252899, |
|
"learning_rate": 0.00014743202416918428, |
|
"loss": 0.1177, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.8266666666666667, |
|
"grad_norm": 0.056376609951257706, |
|
"learning_rate": 0.00014682779456193354, |
|
"loss": 0.0976, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.8355555555555556, |
|
"grad_norm": 0.05411997064948082, |
|
"learning_rate": 0.0001462235649546828, |
|
"loss": 0.1059, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.8444444444444444, |
|
"grad_norm": 0.056368228048086166, |
|
"learning_rate": 0.00014561933534743205, |
|
"loss": 0.1133, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.8533333333333334, |
|
"grad_norm": 0.05209295451641083, |
|
"learning_rate": 0.00014501510574018128, |
|
"loss": 0.0996, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.8622222222222222, |
|
"grad_norm": 0.054722413420677185, |
|
"learning_rate": 0.0001444108761329305, |
|
"loss": 0.1029, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.8711111111111111, |
|
"grad_norm": 0.05473008751869202, |
|
"learning_rate": 0.00014380664652567977, |
|
"loss": 0.1176, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.05287059769034386, |
|
"learning_rate": 0.00014320241691842902, |
|
"loss": 0.0909, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.8888888888888888, |
|
"grad_norm": 0.04134868085384369, |
|
"learning_rate": 0.00014259818731117825, |
|
"loss": 0.0673, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.8977777777777778, |
|
"grad_norm": 0.048795074224472046, |
|
"learning_rate": 0.00014199395770392748, |
|
"loss": 0.0761, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.9066666666666666, |
|
"grad_norm": 0.06269072741270065, |
|
"learning_rate": 0.00014138972809667674, |
|
"loss": 0.1244, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.9155555555555556, |
|
"grad_norm": 0.05946790799498558, |
|
"learning_rate": 0.000140785498489426, |
|
"loss": 0.1307, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.9244444444444444, |
|
"grad_norm": 0.059073008596897125, |
|
"learning_rate": 0.00014018126888217525, |
|
"loss": 0.1274, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.9333333333333333, |
|
"grad_norm": 0.058441124856472015, |
|
"learning_rate": 0.00013957703927492448, |
|
"loss": 0.1185, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.9422222222222222, |
|
"grad_norm": 0.06066688522696495, |
|
"learning_rate": 0.0001389728096676737, |
|
"loss": 0.116, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.9511111111111111, |
|
"grad_norm": 0.05696433410048485, |
|
"learning_rate": 0.00013836858006042297, |
|
"loss": 0.0981, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.04884543642401695, |
|
"learning_rate": 0.00013776435045317222, |
|
"loss": 0.0844, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.9688888888888889, |
|
"grad_norm": 0.05151006579399109, |
|
"learning_rate": 0.00013716012084592145, |
|
"loss": 0.1165, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.9777777777777777, |
|
"grad_norm": 0.048714157193899155, |
|
"learning_rate": 0.0001365558912386707, |
|
"loss": 0.0914, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.9866666666666667, |
|
"grad_norm": 0.05728358030319214, |
|
"learning_rate": 0.00013595166163141994, |
|
"loss": 0.096, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.9955555555555555, |
|
"grad_norm": 0.04793373495340347, |
|
"learning_rate": 0.0001353474320241692, |
|
"loss": 0.0732, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.0691237822175026, |
|
"learning_rate": 0.00013474320241691845, |
|
"loss": 0.0772, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 1.008888888888889, |
|
"grad_norm": 0.059231679886579514, |
|
"learning_rate": 0.00013413897280966768, |
|
"loss": 0.1035, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 1.0177777777777777, |
|
"grad_norm": 0.050710082054138184, |
|
"learning_rate": 0.0001335347432024169, |
|
"loss": 0.0927, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 1.0266666666666666, |
|
"grad_norm": 0.052448518574237823, |
|
"learning_rate": 0.00013293051359516617, |
|
"loss": 0.0927, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 1.0355555555555556, |
|
"grad_norm": 0.05111970752477646, |
|
"learning_rate": 0.00013232628398791543, |
|
"loss": 0.101, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 1.0444444444444445, |
|
"grad_norm": 0.050373584032058716, |
|
"learning_rate": 0.00013172205438066465, |
|
"loss": 0.0958, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 1.0533333333333332, |
|
"grad_norm": 0.053760871291160583, |
|
"learning_rate": 0.0001311178247734139, |
|
"loss": 0.1004, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 1.0622222222222222, |
|
"grad_norm": 0.050390031188726425, |
|
"learning_rate": 0.00013051359516616314, |
|
"loss": 0.098, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.0711111111111111, |
|
"grad_norm": 0.056723542511463165, |
|
"learning_rate": 0.0001299093655589124, |
|
"loss": 0.1019, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"grad_norm": 0.05544177070260048, |
|
"learning_rate": 0.00012930513595166163, |
|
"loss": 0.105, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 1.0888888888888888, |
|
"grad_norm": 0.05266315117478371, |
|
"learning_rate": 0.00012870090634441088, |
|
"loss": 0.0852, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 1.0977777777777777, |
|
"grad_norm": 0.05095309391617775, |
|
"learning_rate": 0.0001280966767371601, |
|
"loss": 0.0883, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 1.1066666666666667, |
|
"grad_norm": 0.052665866911411285, |
|
"learning_rate": 0.00012749244712990937, |
|
"loss": 0.0821, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 1.1155555555555556, |
|
"grad_norm": 0.05651025474071503, |
|
"learning_rate": 0.00012688821752265863, |
|
"loss": 0.0923, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 1.1244444444444444, |
|
"grad_norm": 0.053802527487277985, |
|
"learning_rate": 0.00012628398791540786, |
|
"loss": 0.0934, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 1.1333333333333333, |
|
"grad_norm": 0.04798378795385361, |
|
"learning_rate": 0.0001256797583081571, |
|
"loss": 0.0745, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 1.1422222222222222, |
|
"grad_norm": 0.04485182464122772, |
|
"learning_rate": 0.00012507552870090634, |
|
"loss": 0.0687, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 1.1511111111111112, |
|
"grad_norm": 0.05560389533638954, |
|
"learning_rate": 0.0001244712990936556, |
|
"loss": 0.1228, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"grad_norm": 0.053619932383298874, |
|
"learning_rate": 0.00012386706948640483, |
|
"loss": 0.1016, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 1.1688888888888889, |
|
"grad_norm": 0.061660535633563995, |
|
"learning_rate": 0.00012326283987915408, |
|
"loss": 0.1084, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 1.1777777777777778, |
|
"grad_norm": 0.05607709661126137, |
|
"learning_rate": 0.00012265861027190334, |
|
"loss": 0.1173, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 1.1866666666666668, |
|
"grad_norm": 0.06121942773461342, |
|
"learning_rate": 0.00012205438066465258, |
|
"loss": 0.1047, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 1.1955555555555555, |
|
"grad_norm": 0.058909133076667786, |
|
"learning_rate": 0.00012145015105740183, |
|
"loss": 0.0945, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 1.2044444444444444, |
|
"grad_norm": 0.04819121211767197, |
|
"learning_rate": 0.00012084592145015106, |
|
"loss": 0.0814, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 1.2133333333333334, |
|
"grad_norm": 0.052952077239751816, |
|
"learning_rate": 0.0001202416918429003, |
|
"loss": 0.0723, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 1.2222222222222223, |
|
"grad_norm": 0.059440117329359055, |
|
"learning_rate": 0.00011963746223564956, |
|
"loss": 0.1039, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 1.231111111111111, |
|
"grad_norm": 0.06102529540657997, |
|
"learning_rate": 0.0001190332326283988, |
|
"loss": 0.0946, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"grad_norm": 0.05853147804737091, |
|
"learning_rate": 0.00011842900302114803, |
|
"loss": 0.0928, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.248888888888889, |
|
"grad_norm": 0.05324235185980797, |
|
"learning_rate": 0.00011782477341389729, |
|
"loss": 0.0656, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 1.2577777777777777, |
|
"grad_norm": 0.060984380543231964, |
|
"learning_rate": 0.00011722054380664653, |
|
"loss": 0.114, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 1.2666666666666666, |
|
"grad_norm": 0.05423833429813385, |
|
"learning_rate": 0.00011661631419939579, |
|
"loss": 0.0758, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 1.2755555555555556, |
|
"grad_norm": 0.05878995731472969, |
|
"learning_rate": 0.00011601208459214503, |
|
"loss": 0.095, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 1.2844444444444445, |
|
"grad_norm": 0.05326640605926514, |
|
"learning_rate": 0.00011540785498489426, |
|
"loss": 0.0893, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 1.2933333333333334, |
|
"grad_norm": 0.045536670833826065, |
|
"learning_rate": 0.00011480362537764352, |
|
"loss": 0.0756, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 1.3022222222222222, |
|
"grad_norm": 0.06222836673259735, |
|
"learning_rate": 0.00011419939577039276, |
|
"loss": 0.0832, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 1.3111111111111111, |
|
"grad_norm": 0.05563228577375412, |
|
"learning_rate": 0.00011359516616314202, |
|
"loss": 0.0987, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"grad_norm": 0.05855840817093849, |
|
"learning_rate": 0.00011299093655589123, |
|
"loss": 0.1012, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 1.3288888888888888, |
|
"grad_norm": 0.06626389175653458, |
|
"learning_rate": 0.00011238670694864049, |
|
"loss": 0.1326, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.3377777777777777, |
|
"grad_norm": 0.050519317388534546, |
|
"learning_rate": 0.00011178247734138973, |
|
"loss": 0.0673, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 1.3466666666666667, |
|
"grad_norm": 0.057173047214746475, |
|
"learning_rate": 0.00011117824773413899, |
|
"loss": 0.0813, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 1.3555555555555556, |
|
"grad_norm": 0.04732273891568184, |
|
"learning_rate": 0.00011057401812688822, |
|
"loss": 0.0611, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 1.3644444444444446, |
|
"grad_norm": 0.058079130947589874, |
|
"learning_rate": 0.00010996978851963746, |
|
"loss": 0.111, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 1.3733333333333333, |
|
"grad_norm": 0.054955486208200455, |
|
"learning_rate": 0.00010936555891238672, |
|
"loss": 0.0819, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 1.3822222222222222, |
|
"grad_norm": 0.06241566315293312, |
|
"learning_rate": 0.00010876132930513596, |
|
"loss": 0.0897, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 1.3911111111111112, |
|
"grad_norm": 0.06284183263778687, |
|
"learning_rate": 0.00010815709969788522, |
|
"loss": 0.0995, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 0.05234519764780998, |
|
"learning_rate": 0.00010755287009063443, |
|
"loss": 0.0886, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 1.4088888888888889, |
|
"grad_norm": 0.05802811682224274, |
|
"learning_rate": 0.00010694864048338369, |
|
"loss": 0.0867, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 1.4177777777777778, |
|
"grad_norm": 0.05341397225856781, |
|
"learning_rate": 0.00010634441087613293, |
|
"loss": 0.0822, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.4266666666666667, |
|
"grad_norm": 0.07113339751958847, |
|
"learning_rate": 0.00010574018126888219, |
|
"loss": 0.1002, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 1.4355555555555555, |
|
"grad_norm": 0.06063013896346092, |
|
"learning_rate": 0.00010513595166163142, |
|
"loss": 0.0981, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 1.4444444444444444, |
|
"grad_norm": 0.06280765682458878, |
|
"learning_rate": 0.00010453172205438066, |
|
"loss": 0.0872, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 1.4533333333333334, |
|
"grad_norm": 0.053992606699466705, |
|
"learning_rate": 0.00010392749244712992, |
|
"loss": 0.0811, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 1.462222222222222, |
|
"grad_norm": 0.06484760344028473, |
|
"learning_rate": 0.00010332326283987916, |
|
"loss": 0.1499, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 1.471111111111111, |
|
"grad_norm": 0.04423009976744652, |
|
"learning_rate": 0.00010271903323262842, |
|
"loss": 0.0587, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"grad_norm": 0.05195729061961174, |
|
"learning_rate": 0.00010211480362537765, |
|
"loss": 0.0756, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 1.488888888888889, |
|
"grad_norm": 0.07213317602872849, |
|
"learning_rate": 0.00010151057401812689, |
|
"loss": 0.1274, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 1.4977777777777779, |
|
"grad_norm": 0.07075338065624237, |
|
"learning_rate": 0.00010090634441087615, |
|
"loss": 0.1482, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 1.5066666666666668, |
|
"grad_norm": 0.05478464439511299, |
|
"learning_rate": 0.00010030211480362539, |
|
"loss": 0.0757, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.5155555555555555, |
|
"grad_norm": 0.05618472024798393, |
|
"learning_rate": 9.969788519637463e-05, |
|
"loss": 0.0809, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 1.5244444444444445, |
|
"grad_norm": 0.05127232149243355, |
|
"learning_rate": 9.909365558912386e-05, |
|
"loss": 0.0738, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 1.5333333333333332, |
|
"grad_norm": 0.05737868323922157, |
|
"learning_rate": 9.848942598187312e-05, |
|
"loss": 0.0944, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 1.5422222222222222, |
|
"grad_norm": 0.05908210948109627, |
|
"learning_rate": 9.788519637462236e-05, |
|
"loss": 0.0892, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 1.551111111111111, |
|
"grad_norm": 0.04666788876056671, |
|
"learning_rate": 9.72809667673716e-05, |
|
"loss": 0.059, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"grad_norm": 0.06086019426584244, |
|
"learning_rate": 9.667673716012086e-05, |
|
"loss": 0.0967, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 1.568888888888889, |
|
"grad_norm": 0.07607266306877136, |
|
"learning_rate": 9.607250755287009e-05, |
|
"loss": 0.1171, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 1.5777777777777777, |
|
"grad_norm": 0.05511125177145004, |
|
"learning_rate": 9.546827794561935e-05, |
|
"loss": 0.0803, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 1.5866666666666667, |
|
"grad_norm": 0.04639158770442009, |
|
"learning_rate": 9.486404833836858e-05, |
|
"loss": 0.0552, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 1.5955555555555554, |
|
"grad_norm": 0.06064977869391441, |
|
"learning_rate": 9.425981873111783e-05, |
|
"loss": 0.1009, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.6044444444444443, |
|
"grad_norm": 0.0693751648068428, |
|
"learning_rate": 9.365558912386708e-05, |
|
"loss": 0.1033, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 1.6133333333333333, |
|
"grad_norm": 0.060564182698726654, |
|
"learning_rate": 9.305135951661632e-05, |
|
"loss": 0.0783, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 1.6222222222222222, |
|
"grad_norm": 0.051936618983745575, |
|
"learning_rate": 9.244712990936556e-05, |
|
"loss": 0.0789, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 1.6311111111111112, |
|
"grad_norm": 0.0632539689540863, |
|
"learning_rate": 9.184290030211481e-05, |
|
"loss": 0.1033, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 1.6400000000000001, |
|
"grad_norm": 0.06275516003370285, |
|
"learning_rate": 9.123867069486405e-05, |
|
"loss": 0.0837, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 1.6488888888888888, |
|
"grad_norm": 0.07486943155527115, |
|
"learning_rate": 9.063444108761329e-05, |
|
"loss": 0.1463, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 1.6577777777777778, |
|
"grad_norm": 0.061665549874305725, |
|
"learning_rate": 9.003021148036255e-05, |
|
"loss": 0.0962, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 1.6666666666666665, |
|
"grad_norm": 0.058605264872312546, |
|
"learning_rate": 8.942598187311179e-05, |
|
"loss": 0.0903, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 1.6755555555555555, |
|
"grad_norm": 0.05885601043701172, |
|
"learning_rate": 8.882175226586104e-05, |
|
"loss": 0.0792, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 1.6844444444444444, |
|
"grad_norm": 0.06224210560321808, |
|
"learning_rate": 8.821752265861028e-05, |
|
"loss": 0.0985, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.6933333333333334, |
|
"grad_norm": 0.0739174336194992, |
|
"learning_rate": 8.761329305135952e-05, |
|
"loss": 0.1079, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 1.7022222222222223, |
|
"grad_norm": 0.06836007535457611, |
|
"learning_rate": 8.700906344410877e-05, |
|
"loss": 0.122, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 1.7111111111111112, |
|
"grad_norm": 0.06372584402561188, |
|
"learning_rate": 8.640483383685801e-05, |
|
"loss": 0.1006, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"grad_norm": 0.05704513192176819, |
|
"learning_rate": 8.580060422960725e-05, |
|
"loss": 0.0856, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 1.728888888888889, |
|
"grad_norm": 0.06607326865196228, |
|
"learning_rate": 8.51963746223565e-05, |
|
"loss": 0.0851, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 1.7377777777777776, |
|
"grad_norm": 0.06291351467370987, |
|
"learning_rate": 8.459214501510574e-05, |
|
"loss": 0.1026, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 1.7466666666666666, |
|
"grad_norm": 0.055036935955286026, |
|
"learning_rate": 8.3987915407855e-05, |
|
"loss": 0.0721, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 1.7555555555555555, |
|
"grad_norm": 0.06576612591743469, |
|
"learning_rate": 8.338368580060424e-05, |
|
"loss": 0.1199, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 1.7644444444444445, |
|
"grad_norm": 0.0656011551618576, |
|
"learning_rate": 8.277945619335348e-05, |
|
"loss": 0.0947, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 1.7733333333333334, |
|
"grad_norm": 0.05662452057003975, |
|
"learning_rate": 8.217522658610272e-05, |
|
"loss": 0.0783, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.7822222222222224, |
|
"grad_norm": 0.05521798133850098, |
|
"learning_rate": 8.157099697885197e-05, |
|
"loss": 0.0635, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 1.791111111111111, |
|
"grad_norm": 0.06891798228025436, |
|
"learning_rate": 8.096676737160121e-05, |
|
"loss": 0.1013, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 0.06973979622125626, |
|
"learning_rate": 8.036253776435045e-05, |
|
"loss": 0.0787, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 1.8088888888888888, |
|
"grad_norm": 0.06616077572107315, |
|
"learning_rate": 7.975830815709971e-05, |
|
"loss": 0.0817, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 1.8177777777777777, |
|
"grad_norm": 0.06029290705919266, |
|
"learning_rate": 7.915407854984894e-05, |
|
"loss": 0.0806, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 1.8266666666666667, |
|
"grad_norm": 0.06291759759187698, |
|
"learning_rate": 7.85498489425982e-05, |
|
"loss": 0.087, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 1.8355555555555556, |
|
"grad_norm": 0.05678003653883934, |
|
"learning_rate": 7.794561933534742e-05, |
|
"loss": 0.0738, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 1.8444444444444446, |
|
"grad_norm": 0.05496233329176903, |
|
"learning_rate": 7.734138972809668e-05, |
|
"loss": 0.0835, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 1.8533333333333335, |
|
"grad_norm": 0.057058099657297134, |
|
"learning_rate": 7.673716012084592e-05, |
|
"loss": 0.0764, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 1.8622222222222222, |
|
"grad_norm": 0.05875209718942642, |
|
"learning_rate": 7.613293051359517e-05, |
|
"loss": 0.0734, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.871111111111111, |
|
"grad_norm": 0.06268563121557236, |
|
"learning_rate": 7.552870090634442e-05, |
|
"loss": 0.0791, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"grad_norm": 0.08160948008298874, |
|
"learning_rate": 7.492447129909365e-05, |
|
"loss": 0.1602, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 1.8888888888888888, |
|
"grad_norm": 0.06325849145650864, |
|
"learning_rate": 7.432024169184291e-05, |
|
"loss": 0.0955, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 1.8977777777777778, |
|
"grad_norm": 0.06835322082042694, |
|
"learning_rate": 7.371601208459214e-05, |
|
"loss": 0.1167, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 1.9066666666666667, |
|
"grad_norm": 0.06621617823839188, |
|
"learning_rate": 7.31117824773414e-05, |
|
"loss": 0.104, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 1.9155555555555557, |
|
"grad_norm": 0.0520428791642189, |
|
"learning_rate": 7.250755287009064e-05, |
|
"loss": 0.0595, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 1.9244444444444444, |
|
"grad_norm": 0.05874223634600639, |
|
"learning_rate": 7.190332326283988e-05, |
|
"loss": 0.0734, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 1.9333333333333333, |
|
"grad_norm": 0.06542330980300903, |
|
"learning_rate": 7.129909365558913e-05, |
|
"loss": 0.0957, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 1.942222222222222, |
|
"grad_norm": 0.05599142238497734, |
|
"learning_rate": 7.069486404833837e-05, |
|
"loss": 0.0804, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 1.951111111111111, |
|
"grad_norm": 0.058727771043777466, |
|
"learning_rate": 7.009063444108763e-05, |
|
"loss": 0.0812, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"grad_norm": 0.050783053040504456, |
|
"learning_rate": 6.948640483383686e-05, |
|
"loss": 0.073, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 1.968888888888889, |
|
"grad_norm": 0.058176856487989426, |
|
"learning_rate": 6.888217522658611e-05, |
|
"loss": 0.0941, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 1.9777777777777779, |
|
"grad_norm": 0.06019238755106926, |
|
"learning_rate": 6.827794561933535e-05, |
|
"loss": 0.0804, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 1.9866666666666668, |
|
"grad_norm": 0.052067600190639496, |
|
"learning_rate": 6.76737160120846e-05, |
|
"loss": 0.0664, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 1.9955555555555555, |
|
"grad_norm": 0.05987400561571121, |
|
"learning_rate": 6.706948640483384e-05, |
|
"loss": 0.087, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.08068395406007767, |
|
"learning_rate": 6.646525679758308e-05, |
|
"loss": 0.0728, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 2.008888888888889, |
|
"grad_norm": 0.056796178221702576, |
|
"learning_rate": 6.586102719033233e-05, |
|
"loss": 0.0844, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 2.017777777777778, |
|
"grad_norm": 0.05155782401561737, |
|
"learning_rate": 6.525679758308157e-05, |
|
"loss": 0.0668, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 2.026666666666667, |
|
"grad_norm": 0.050025951117277145, |
|
"learning_rate": 6.465256797583081e-05, |
|
"loss": 0.0632, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 2.0355555555555553, |
|
"grad_norm": 0.06146937608718872, |
|
"learning_rate": 6.404833836858006e-05, |
|
"loss": 0.1061, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 2.0444444444444443, |
|
"grad_norm": 0.05745534226298332, |
|
"learning_rate": 6.344410876132931e-05, |
|
"loss": 0.0775, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 2.0533333333333332, |
|
"grad_norm": 0.058149635791778564, |
|
"learning_rate": 6.283987915407856e-05, |
|
"loss": 0.0812, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 2.062222222222222, |
|
"grad_norm": 0.052630528807640076, |
|
"learning_rate": 6.22356495468278e-05, |
|
"loss": 0.0715, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 2.071111111111111, |
|
"grad_norm": 0.053744129836559296, |
|
"learning_rate": 6.163141993957704e-05, |
|
"loss": 0.064, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"grad_norm": 0.05753091350197792, |
|
"learning_rate": 6.102719033232629e-05, |
|
"loss": 0.0769, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 2.088888888888889, |
|
"grad_norm": 0.06525509059429169, |
|
"learning_rate": 6.042296072507553e-05, |
|
"loss": 0.0861, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 2.097777777777778, |
|
"grad_norm": 0.060992028564214706, |
|
"learning_rate": 5.981873111782478e-05, |
|
"loss": 0.0916, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 2.1066666666666665, |
|
"grad_norm": 0.05694571137428284, |
|
"learning_rate": 5.9214501510574015e-05, |
|
"loss": 0.0718, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 2.1155555555555554, |
|
"grad_norm": 0.06200471892952919, |
|
"learning_rate": 5.8610271903323265e-05, |
|
"loss": 0.0765, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 2.1244444444444444, |
|
"grad_norm": 0.07638643682003021, |
|
"learning_rate": 5.8006042296072515e-05, |
|
"loss": 0.1205, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 2.1333333333333333, |
|
"grad_norm": 0.05985346809029579, |
|
"learning_rate": 5.740181268882176e-05, |
|
"loss": 0.0633, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 2.1422222222222222, |
|
"grad_norm": 0.06811255216598511, |
|
"learning_rate": 5.679758308157101e-05, |
|
"loss": 0.0716, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 2.151111111111111, |
|
"grad_norm": 0.07051990926265717, |
|
"learning_rate": 5.6193353474320244e-05, |
|
"loss": 0.0985, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"grad_norm": 0.06572071462869644, |
|
"learning_rate": 5.5589123867069494e-05, |
|
"loss": 0.0645, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 2.168888888888889, |
|
"grad_norm": 0.06567268073558807, |
|
"learning_rate": 5.498489425981873e-05, |
|
"loss": 0.0602, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 2.1777777777777776, |
|
"grad_norm": 0.0646674782037735, |
|
"learning_rate": 5.438066465256798e-05, |
|
"loss": 0.0679, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 2.1866666666666665, |
|
"grad_norm": 0.06456337124109268, |
|
"learning_rate": 5.3776435045317216e-05, |
|
"loss": 0.0706, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 2.1955555555555555, |
|
"grad_norm": 0.07218114286661148, |
|
"learning_rate": 5.3172205438066466e-05, |
|
"loss": 0.076, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 2.2044444444444444, |
|
"grad_norm": 0.07510354369878769, |
|
"learning_rate": 5.256797583081571e-05, |
|
"loss": 0.0817, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 2.2133333333333334, |
|
"grad_norm": 0.07315120100975037, |
|
"learning_rate": 5.196374622356496e-05, |
|
"loss": 0.0725, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.2222222222222223, |
|
"grad_norm": 0.08618611097335815, |
|
"learning_rate": 5.135951661631421e-05, |
|
"loss": 0.099, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 2.2311111111111113, |
|
"grad_norm": 0.07698719948530197, |
|
"learning_rate": 5.0755287009063445e-05, |
|
"loss": 0.0878, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"grad_norm": 0.06022505462169647, |
|
"learning_rate": 5.0151057401812695e-05, |
|
"loss": 0.0558, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 2.2488888888888887, |
|
"grad_norm": 0.06770103424787521, |
|
"learning_rate": 4.954682779456193e-05, |
|
"loss": 0.0729, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 2.2577777777777777, |
|
"grad_norm": 0.08226872980594635, |
|
"learning_rate": 4.894259818731118e-05, |
|
"loss": 0.0953, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 2.2666666666666666, |
|
"grad_norm": 0.07327685505151749, |
|
"learning_rate": 4.833836858006043e-05, |
|
"loss": 0.088, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 2.2755555555555556, |
|
"grad_norm": 0.0711284875869751, |
|
"learning_rate": 4.7734138972809674e-05, |
|
"loss": 0.0689, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 2.2844444444444445, |
|
"grad_norm": 0.08122266829013824, |
|
"learning_rate": 4.712990936555892e-05, |
|
"loss": 0.1304, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 2.2933333333333334, |
|
"grad_norm": 0.06601269543170929, |
|
"learning_rate": 4.652567975830816e-05, |
|
"loss": 0.0544, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 2.3022222222222224, |
|
"grad_norm": 0.08035172522068024, |
|
"learning_rate": 4.5921450151057403e-05, |
|
"loss": 0.0809, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 2.311111111111111, |
|
"grad_norm": 0.07023400068283081, |
|
"learning_rate": 4.5317220543806646e-05, |
|
"loss": 0.0719, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"grad_norm": 0.07323497533798218, |
|
"learning_rate": 4.4712990936555896e-05, |
|
"loss": 0.0757, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 2.328888888888889, |
|
"grad_norm": 0.0711096003651619, |
|
"learning_rate": 4.410876132930514e-05, |
|
"loss": 0.079, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 2.3377777777777777, |
|
"grad_norm": 0.08337131887674332, |
|
"learning_rate": 4.350453172205438e-05, |
|
"loss": 0.0863, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 2.3466666666666667, |
|
"grad_norm": 0.06306735426187515, |
|
"learning_rate": 4.2900302114803626e-05, |
|
"loss": 0.0544, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 2.3555555555555556, |
|
"grad_norm": 0.06520617753267288, |
|
"learning_rate": 4.229607250755287e-05, |
|
"loss": 0.0492, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 2.3644444444444446, |
|
"grad_norm": 0.07521840184926987, |
|
"learning_rate": 4.169184290030212e-05, |
|
"loss": 0.0817, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 2.3733333333333335, |
|
"grad_norm": 0.11388452351093292, |
|
"learning_rate": 4.108761329305136e-05, |
|
"loss": 0.1195, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 2.3822222222222225, |
|
"grad_norm": 0.0842316523194313, |
|
"learning_rate": 4.0483383685800605e-05, |
|
"loss": 0.0697, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 2.391111111111111, |
|
"grad_norm": 0.0726594477891922, |
|
"learning_rate": 3.9879154078549855e-05, |
|
"loss": 0.0685, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"grad_norm": 0.07490499317646027, |
|
"learning_rate": 3.92749244712991e-05, |
|
"loss": 0.0753, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 2.408888888888889, |
|
"grad_norm": 0.08467234671115875, |
|
"learning_rate": 3.867069486404834e-05, |
|
"loss": 0.0902, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 2.417777777777778, |
|
"grad_norm": 0.07775551825761795, |
|
"learning_rate": 3.8066465256797584e-05, |
|
"loss": 0.0749, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 2.4266666666666667, |
|
"grad_norm": 0.07680890709161758, |
|
"learning_rate": 3.746223564954683e-05, |
|
"loss": 0.0839, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 2.4355555555555557, |
|
"grad_norm": 0.07432601600885391, |
|
"learning_rate": 3.685800604229607e-05, |
|
"loss": 0.0718, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 2.4444444444444446, |
|
"grad_norm": 0.0697220042347908, |
|
"learning_rate": 3.625377643504532e-05, |
|
"loss": 0.0581, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 2.453333333333333, |
|
"grad_norm": 0.0710417628288269, |
|
"learning_rate": 3.564954682779456e-05, |
|
"loss": 0.0741, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 2.462222222222222, |
|
"grad_norm": 0.07673481106758118, |
|
"learning_rate": 3.504531722054381e-05, |
|
"loss": 0.0792, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 2.471111111111111, |
|
"grad_norm": 0.07718473672866821, |
|
"learning_rate": 3.4441087613293056e-05, |
|
"loss": 0.0786, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"grad_norm": 0.0753309354186058, |
|
"learning_rate": 3.38368580060423e-05, |
|
"loss": 0.0651, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 2.488888888888889, |
|
"grad_norm": 0.06466688960790634, |
|
"learning_rate": 3.323262839879154e-05, |
|
"loss": 0.0662, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 2.497777777777778, |
|
"grad_norm": 0.06847023218870163, |
|
"learning_rate": 3.2628398791540785e-05, |
|
"loss": 0.0606, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 2.506666666666667, |
|
"grad_norm": 0.08267179876565933, |
|
"learning_rate": 3.202416918429003e-05, |
|
"loss": 0.0963, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 2.5155555555555553, |
|
"grad_norm": 0.07589060813188553, |
|
"learning_rate": 3.141993957703928e-05, |
|
"loss": 0.0811, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 2.5244444444444447, |
|
"grad_norm": 0.07186582684516907, |
|
"learning_rate": 3.081570996978852e-05, |
|
"loss": 0.0727, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 2.533333333333333, |
|
"grad_norm": 0.06798484176397324, |
|
"learning_rate": 3.0211480362537764e-05, |
|
"loss": 0.0683, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 2.542222222222222, |
|
"grad_norm": 0.06041852384805679, |
|
"learning_rate": 2.9607250755287007e-05, |
|
"loss": 0.0549, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 2.551111111111111, |
|
"grad_norm": 0.07672443240880966, |
|
"learning_rate": 2.9003021148036257e-05, |
|
"loss": 0.0835, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"grad_norm": 0.06650765985250473, |
|
"learning_rate": 2.8398791540785504e-05, |
|
"loss": 0.0625, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 2.568888888888889, |
|
"grad_norm": 0.07676387578248978, |
|
"learning_rate": 2.7794561933534747e-05, |
|
"loss": 0.0804, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 2.5777777777777775, |
|
"grad_norm": 0.06516220420598984, |
|
"learning_rate": 2.719033232628399e-05, |
|
"loss": 0.0558, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 2.586666666666667, |
|
"grad_norm": 0.06658012419939041, |
|
"learning_rate": 2.6586102719033233e-05, |
|
"loss": 0.0656, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 2.5955555555555554, |
|
"grad_norm": 0.06700943410396576, |
|
"learning_rate": 2.598187311178248e-05, |
|
"loss": 0.0652, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 2.6044444444444443, |
|
"grad_norm": 0.08271019905805588, |
|
"learning_rate": 2.5377643504531723e-05, |
|
"loss": 0.0939, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 2.6133333333333333, |
|
"grad_norm": 0.06946663558483124, |
|
"learning_rate": 2.4773413897280966e-05, |
|
"loss": 0.0608, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 2.6222222222222222, |
|
"grad_norm": 0.06652197986841202, |
|
"learning_rate": 2.4169184290030216e-05, |
|
"loss": 0.063, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 2.631111111111111, |
|
"grad_norm": 0.05776477977633476, |
|
"learning_rate": 2.356495468277946e-05, |
|
"loss": 0.0519, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"grad_norm": 0.06055834889411926, |
|
"learning_rate": 2.2960725075528702e-05, |
|
"loss": 0.0587, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 2.648888888888889, |
|
"grad_norm": 0.0723641887307167, |
|
"learning_rate": 2.2356495468277948e-05, |
|
"loss": 0.0673, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 2.6577777777777776, |
|
"grad_norm": 0.06614179909229279, |
|
"learning_rate": 2.175226586102719e-05, |
|
"loss": 0.0609, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.6666666666666665, |
|
"grad_norm": 0.07408380508422852, |
|
"learning_rate": 2.1148036253776434e-05, |
|
"loss": 0.073, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 2.6755555555555555, |
|
"grad_norm": 0.08053004741668701, |
|
"learning_rate": 2.054380664652568e-05, |
|
"loss": 0.084, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 2.6844444444444444, |
|
"grad_norm": 0.061500728130340576, |
|
"learning_rate": 1.9939577039274927e-05, |
|
"loss": 0.0478, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 2.6933333333333334, |
|
"grad_norm": 0.06272031366825104, |
|
"learning_rate": 1.933534743202417e-05, |
|
"loss": 0.0557, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 2.7022222222222223, |
|
"grad_norm": 0.08150549232959747, |
|
"learning_rate": 1.8731117824773413e-05, |
|
"loss": 0.0878, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 2.7111111111111112, |
|
"grad_norm": 0.08058076351881027, |
|
"learning_rate": 1.812688821752266e-05, |
|
"loss": 0.0708, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 2.7199999999999998, |
|
"grad_norm": 0.07689347863197327, |
|
"learning_rate": 1.7522658610271906e-05, |
|
"loss": 0.0827, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 2.728888888888889, |
|
"grad_norm": 0.07880861312150955, |
|
"learning_rate": 1.691842900302115e-05, |
|
"loss": 0.1007, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 2.7377777777777776, |
|
"grad_norm": 0.0824202448129654, |
|
"learning_rate": 1.6314199395770393e-05, |
|
"loss": 0.0773, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 2.7466666666666666, |
|
"grad_norm": 0.07320031523704529, |
|
"learning_rate": 1.570996978851964e-05, |
|
"loss": 0.0742, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 2.7555555555555555, |
|
"grad_norm": 0.0690142959356308, |
|
"learning_rate": 1.5105740181268882e-05, |
|
"loss": 0.0557, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 2.7644444444444445, |
|
"grad_norm": 0.062487728893756866, |
|
"learning_rate": 1.4501510574018129e-05, |
|
"loss": 0.0624, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 2.7733333333333334, |
|
"grad_norm": 0.07230405509471893, |
|
"learning_rate": 1.3897280966767373e-05, |
|
"loss": 0.0713, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 2.7822222222222224, |
|
"grad_norm": 0.08470380306243896, |
|
"learning_rate": 1.3293051359516617e-05, |
|
"loss": 0.1038, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 2.7911111111111113, |
|
"grad_norm": 0.06646347045898438, |
|
"learning_rate": 1.2688821752265861e-05, |
|
"loss": 0.0612, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"grad_norm": 0.07566383481025696, |
|
"learning_rate": 1.2084592145015108e-05, |
|
"loss": 0.0645, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 2.8088888888888888, |
|
"grad_norm": 0.07404222339391708, |
|
"learning_rate": 1.1480362537764351e-05, |
|
"loss": 0.0677, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 2.8177777777777777, |
|
"grad_norm": 0.07574770599603653, |
|
"learning_rate": 1.0876132930513596e-05, |
|
"loss": 0.0828, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 2.8266666666666667, |
|
"grad_norm": 0.08625300973653793, |
|
"learning_rate": 1.027190332326284e-05, |
|
"loss": 0.0852, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 2.8355555555555556, |
|
"grad_norm": 0.0686657726764679, |
|
"learning_rate": 9.667673716012085e-06, |
|
"loss": 0.0584, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 2.8444444444444446, |
|
"grad_norm": 0.0677306279540062, |
|
"learning_rate": 9.06344410876133e-06, |
|
"loss": 0.0712, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 2.8533333333333335, |
|
"grad_norm": 0.06888636946678162, |
|
"learning_rate": 8.459214501510575e-06, |
|
"loss": 0.0703, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 2.862222222222222, |
|
"grad_norm": 0.06507274508476257, |
|
"learning_rate": 7.85498489425982e-06, |
|
"loss": 0.0594, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 2.871111111111111, |
|
"grad_norm": 0.07514124363660812, |
|
"learning_rate": 7.250755287009064e-06, |
|
"loss": 0.0753, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"grad_norm": 0.0745137482881546, |
|
"learning_rate": 6.646525679758308e-06, |
|
"loss": 0.0802, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 2.888888888888889, |
|
"grad_norm": 0.07600755989551544, |
|
"learning_rate": 6.042296072507554e-06, |
|
"loss": 0.0763, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 2.897777777777778, |
|
"grad_norm": 0.05982338637113571, |
|
"learning_rate": 5.438066465256798e-06, |
|
"loss": 0.0496, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 2.9066666666666667, |
|
"grad_norm": 0.06928596645593643, |
|
"learning_rate": 4.833836858006043e-06, |
|
"loss": 0.0626, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 2.9155555555555557, |
|
"grad_norm": 0.06719228625297546, |
|
"learning_rate": 4.229607250755287e-06, |
|
"loss": 0.0644, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 2.924444444444444, |
|
"grad_norm": 0.06885026395320892, |
|
"learning_rate": 3.625377643504532e-06, |
|
"loss": 0.0723, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 2.9333333333333336, |
|
"grad_norm": 0.07556895166635513, |
|
"learning_rate": 3.021148036253777e-06, |
|
"loss": 0.0746, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 2.942222222222222, |
|
"grad_norm": 0.08047506213188171, |
|
"learning_rate": 2.4169184290030213e-06, |
|
"loss": 0.0876, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 2.951111111111111, |
|
"grad_norm": 0.08711687475442886, |
|
"learning_rate": 1.812688821752266e-06, |
|
"loss": 0.1013, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"grad_norm": 0.07905985414981842, |
|
"learning_rate": 1.2084592145015106e-06, |
|
"loss": 0.0794, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 2.968888888888889, |
|
"grad_norm": 0.0679263323545456, |
|
"learning_rate": 6.042296072507553e-07, |
|
"loss": 0.0551, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 2.977777777777778, |
|
"grad_norm": 0.07112424075603485, |
|
"learning_rate": 0.0, |
|
"loss": 0.0743, |
|
"step": 336 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 336, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.934810290318336e+17, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|