|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.3334453781512605, |
|
"eval_steps": 500, |
|
"global_step": 496, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0006722689075630252, |
|
"grad_norm": 0.0835651233792305, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 1.0786, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0013445378151260505, |
|
"grad_norm": 0.08339695632457733, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 1.0642, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0020168067226890756, |
|
"grad_norm": 0.08287601917982101, |
|
"learning_rate": 3e-06, |
|
"loss": 0.9508, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.002689075630252101, |
|
"grad_norm": 0.09812851250171661, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 0.9892, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0033613445378151263, |
|
"grad_norm": 0.10798798501491547, |
|
"learning_rate": 5e-06, |
|
"loss": 0.9393, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.004033613445378151, |
|
"grad_norm": 0.11012189835309982, |
|
"learning_rate": 6e-06, |
|
"loss": 1.0222, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.004705882352941176, |
|
"grad_norm": 0.0818193256855011, |
|
"learning_rate": 7e-06, |
|
"loss": 0.9702, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.005378151260504202, |
|
"grad_norm": 0.08617620915174484, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 1.1444, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.006050420168067227, |
|
"grad_norm": 0.07566545158624649, |
|
"learning_rate": 9e-06, |
|
"loss": 0.9204, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0067226890756302525, |
|
"grad_norm": 0.07344148308038712, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8932, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.007394957983193277, |
|
"grad_norm": 0.1588195115327835, |
|
"learning_rate": 9.999988689583452e-06, |
|
"loss": 1.0167, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.008067226890756302, |
|
"grad_norm": 0.105576291680336, |
|
"learning_rate": 9.999954758384983e-06, |
|
"loss": 1.0244, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.008739495798319327, |
|
"grad_norm": 0.10251228511333466, |
|
"learning_rate": 9.999898206558095e-06, |
|
"loss": 1.0446, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.009411764705882352, |
|
"grad_norm": 0.11292218416929245, |
|
"learning_rate": 9.999819034358644e-06, |
|
"loss": 1.0304, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.010084033613445379, |
|
"grad_norm": 0.12369322031736374, |
|
"learning_rate": 9.999717242144817e-06, |
|
"loss": 1.049, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.010756302521008404, |
|
"grad_norm": 0.16626787185668945, |
|
"learning_rate": 9.99959283037714e-06, |
|
"loss": 1.0647, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.011428571428571429, |
|
"grad_norm": 0.800682008266449, |
|
"learning_rate": 9.99944579961847e-06, |
|
"loss": 0.9812, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.012100840336134453, |
|
"grad_norm": 0.09268046170473099, |
|
"learning_rate": 9.999276150534001e-06, |
|
"loss": 0.9062, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.012773109243697478, |
|
"grad_norm": 0.0998532846570015, |
|
"learning_rate": 9.999083883891251e-06, |
|
"loss": 1.0247, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.013445378151260505, |
|
"grad_norm": 0.17539170384407043, |
|
"learning_rate": 9.99886900056007e-06, |
|
"loss": 1.0814, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01411764705882353, |
|
"grad_norm": 0.150508314371109, |
|
"learning_rate": 9.998631501512624e-06, |
|
"loss": 0.9967, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.014789915966386555, |
|
"grad_norm": 0.10628243535757065, |
|
"learning_rate": 9.998371387823398e-06, |
|
"loss": 0.9941, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.01546218487394958, |
|
"grad_norm": 0.13679274916648865, |
|
"learning_rate": 9.998088660669189e-06, |
|
"loss": 0.983, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.016134453781512605, |
|
"grad_norm": 0.10756199061870575, |
|
"learning_rate": 9.997783321329104e-06, |
|
"loss": 1.002, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.01680672268907563, |
|
"grad_norm": 0.1575344055891037, |
|
"learning_rate": 9.997455371184546e-06, |
|
"loss": 1.0167, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.017478991596638654, |
|
"grad_norm": 0.16291604936122894, |
|
"learning_rate": 9.997104811719221e-06, |
|
"loss": 1.0141, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.01815126050420168, |
|
"grad_norm": 0.21997588872909546, |
|
"learning_rate": 9.996731644519111e-06, |
|
"loss": 0.8077, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.018823529411764704, |
|
"grad_norm": 0.1384553760290146, |
|
"learning_rate": 9.996335871272494e-06, |
|
"loss": 1.0216, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.019495798319327733, |
|
"grad_norm": 0.1880977898836136, |
|
"learning_rate": 9.995917493769912e-06, |
|
"loss": 1.0098, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.020168067226890758, |
|
"grad_norm": 0.14351259171962738, |
|
"learning_rate": 9.995476513904171e-06, |
|
"loss": 1.0134, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.020840336134453782, |
|
"grad_norm": 0.1357450932264328, |
|
"learning_rate": 9.995012933670341e-06, |
|
"loss": 0.9587, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.021512605042016807, |
|
"grad_norm": 0.15096725523471832, |
|
"learning_rate": 9.994526755165736e-06, |
|
"loss": 0.8823, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.022184873949579832, |
|
"grad_norm": 0.2435840666294098, |
|
"learning_rate": 9.994017980589907e-06, |
|
"loss": 0.9802, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.022857142857142857, |
|
"grad_norm": 0.12320239096879959, |
|
"learning_rate": 9.993486612244634e-06, |
|
"loss": 1.0142, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.023529411764705882, |
|
"grad_norm": 0.11510821431875229, |
|
"learning_rate": 9.99293265253392e-06, |
|
"loss": 0.8819, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.024201680672268907, |
|
"grad_norm": 0.13881511986255646, |
|
"learning_rate": 9.992356103963967e-06, |
|
"loss": 1.0291, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.024873949579831932, |
|
"grad_norm": 0.12611961364746094, |
|
"learning_rate": 9.99175696914318e-06, |
|
"loss": 0.8883, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.025546218487394957, |
|
"grad_norm": 0.1128062754869461, |
|
"learning_rate": 9.991135250782143e-06, |
|
"loss": 0.9399, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.02621848739495798, |
|
"grad_norm": 0.17889104783535004, |
|
"learning_rate": 9.990490951693612e-06, |
|
"loss": 0.8848, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.02689075630252101, |
|
"grad_norm": 0.10607869923114777, |
|
"learning_rate": 9.989824074792507e-06, |
|
"loss": 0.9864, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.027563025210084035, |
|
"grad_norm": 0.11618159711360931, |
|
"learning_rate": 9.989134623095888e-06, |
|
"loss": 0.9561, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.02823529411764706, |
|
"grad_norm": 0.13454240560531616, |
|
"learning_rate": 9.98842259972295e-06, |
|
"loss": 0.949, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.028907563025210085, |
|
"grad_norm": 0.11738969385623932, |
|
"learning_rate": 9.987688007895002e-06, |
|
"loss": 0.9184, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.02957983193277311, |
|
"grad_norm": 0.11672742664813995, |
|
"learning_rate": 9.986930850935465e-06, |
|
"loss": 0.945, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.030252100840336135, |
|
"grad_norm": 0.10767639428377151, |
|
"learning_rate": 9.986151132269843e-06, |
|
"loss": 1.0525, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.03092436974789916, |
|
"grad_norm": 0.11100558191537857, |
|
"learning_rate": 9.98534885542571e-06, |
|
"loss": 0.8767, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.03159663865546219, |
|
"grad_norm": 0.12392528355121613, |
|
"learning_rate": 9.9845240240327e-06, |
|
"loss": 0.9066, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.03226890756302521, |
|
"grad_norm": 0.12757766246795654, |
|
"learning_rate": 9.98367664182249e-06, |
|
"loss": 0.8557, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.03294117647058824, |
|
"grad_norm": 0.09705185890197754, |
|
"learning_rate": 9.982806712628776e-06, |
|
"loss": 0.8187, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.03361344537815126, |
|
"grad_norm": 0.11287973076105118, |
|
"learning_rate": 9.981914240387264e-06, |
|
"loss": 0.8826, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.03428571428571429, |
|
"grad_norm": 0.10316391289234161, |
|
"learning_rate": 9.98099922913565e-06, |
|
"loss": 0.8536, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.03495798319327731, |
|
"grad_norm": 0.09859252721071243, |
|
"learning_rate": 9.980061683013594e-06, |
|
"loss": 0.7973, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.03563025210084034, |
|
"grad_norm": 0.10780596733093262, |
|
"learning_rate": 9.979101606262709e-06, |
|
"loss": 0.8864, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.03630252100840336, |
|
"grad_norm": 0.13508446514606476, |
|
"learning_rate": 9.978119003226546e-06, |
|
"loss": 0.9185, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.03697478991596639, |
|
"grad_norm": 0.12588554620742798, |
|
"learning_rate": 9.977113878350561e-06, |
|
"loss": 0.8077, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.03764705882352941, |
|
"grad_norm": 0.09272392839193344, |
|
"learning_rate": 9.976086236182113e-06, |
|
"loss": 0.9335, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.03831932773109244, |
|
"grad_norm": 0.0940898135304451, |
|
"learning_rate": 9.975036081370417e-06, |
|
"loss": 0.8295, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.038991596638655465, |
|
"grad_norm": 0.36463847756385803, |
|
"learning_rate": 9.973963418666556e-06, |
|
"loss": 0.9301, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.03966386554621849, |
|
"grad_norm": 0.10689139366149902, |
|
"learning_rate": 9.972868252923433e-06, |
|
"loss": 0.8314, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.040336134453781515, |
|
"grad_norm": 0.10545141249895096, |
|
"learning_rate": 9.97175058909576e-06, |
|
"loss": 0.836, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.041008403361344536, |
|
"grad_norm": 0.09649187326431274, |
|
"learning_rate": 9.970610432240033e-06, |
|
"loss": 0.798, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.041680672268907565, |
|
"grad_norm": 0.11074765026569366, |
|
"learning_rate": 9.969447787514512e-06, |
|
"loss": 0.9432, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.042352941176470586, |
|
"grad_norm": 0.14206334948539734, |
|
"learning_rate": 9.968262660179197e-06, |
|
"loss": 1.03, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.043025210084033615, |
|
"grad_norm": 0.2147335410118103, |
|
"learning_rate": 9.9670550555958e-06, |
|
"loss": 0.8486, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.043697478991596636, |
|
"grad_norm": 0.11276064813137054, |
|
"learning_rate": 9.965824979227727e-06, |
|
"loss": 0.8415, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.044369747899159664, |
|
"grad_norm": 0.1048726737499237, |
|
"learning_rate": 9.964572436640046e-06, |
|
"loss": 0.8171, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.045042016806722686, |
|
"grad_norm": 0.09552193433046341, |
|
"learning_rate": 9.96329743349947e-06, |
|
"loss": 0.818, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.045714285714285714, |
|
"grad_norm": 0.11481764167547226, |
|
"learning_rate": 9.961999975574327e-06, |
|
"loss": 0.8802, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.04638655462184874, |
|
"grad_norm": 0.12009887397289276, |
|
"learning_rate": 9.960680068734528e-06, |
|
"loss": 0.8471, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.047058823529411764, |
|
"grad_norm": 0.11171095073223114, |
|
"learning_rate": 9.959337718951558e-06, |
|
"loss": 0.7807, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.04773109243697479, |
|
"grad_norm": 0.19787555932998657, |
|
"learning_rate": 9.957972932298425e-06, |
|
"loss": 0.7859, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.048403361344537814, |
|
"grad_norm": 0.14838945865631104, |
|
"learning_rate": 9.956585714949656e-06, |
|
"loss": 0.84, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.04907563025210084, |
|
"grad_norm": 0.1107582226395607, |
|
"learning_rate": 9.95517607318125e-06, |
|
"loss": 0.8462, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.049747899159663864, |
|
"grad_norm": 0.10351675003767014, |
|
"learning_rate": 9.953744013370664e-06, |
|
"loss": 0.8877, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.05042016806722689, |
|
"grad_norm": 0.10547688603401184, |
|
"learning_rate": 9.952289541996772e-06, |
|
"loss": 0.9026, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.051092436974789913, |
|
"grad_norm": 0.1045449823141098, |
|
"learning_rate": 9.950812665639846e-06, |
|
"loss": 0.7632, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.05176470588235294, |
|
"grad_norm": 0.10957052558660507, |
|
"learning_rate": 9.949313390981523e-06, |
|
"loss": 0.6904, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.05243697478991596, |
|
"grad_norm": 0.11886177957057953, |
|
"learning_rate": 9.94779172480477e-06, |
|
"loss": 0.8598, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.05310924369747899, |
|
"grad_norm": 0.11801187694072723, |
|
"learning_rate": 9.946247673993859e-06, |
|
"loss": 0.7523, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.05378151260504202, |
|
"grad_norm": 0.11699094623327255, |
|
"learning_rate": 9.944681245534329e-06, |
|
"loss": 0.7911, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.05445378151260504, |
|
"grad_norm": 0.12566529214382172, |
|
"learning_rate": 9.943092446512969e-06, |
|
"loss": 0.7427, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.05512605042016807, |
|
"grad_norm": 0.12246568500995636, |
|
"learning_rate": 9.941481284117767e-06, |
|
"loss": 0.6664, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.05579831932773109, |
|
"grad_norm": 0.13946433365345, |
|
"learning_rate": 9.939847765637892e-06, |
|
"loss": 0.7136, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.05647058823529412, |
|
"grad_norm": 0.1847638487815857, |
|
"learning_rate": 9.938191898463652e-06, |
|
"loss": 0.7466, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.05714285714285714, |
|
"grad_norm": 0.15350033342838287, |
|
"learning_rate": 9.936513690086466e-06, |
|
"loss": 0.8925, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.05781512605042017, |
|
"grad_norm": 0.12483131885528564, |
|
"learning_rate": 9.934813148098833e-06, |
|
"loss": 0.7337, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.05848739495798319, |
|
"grad_norm": 0.22703243792057037, |
|
"learning_rate": 9.93309028019428e-06, |
|
"loss": 0.7534, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.05915966386554622, |
|
"grad_norm": 0.1408054232597351, |
|
"learning_rate": 9.931345094167355e-06, |
|
"loss": 0.7855, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.05983193277310924, |
|
"grad_norm": 0.10884613543748856, |
|
"learning_rate": 9.929577597913569e-06, |
|
"loss": 0.873, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.06050420168067227, |
|
"grad_norm": 0.12801045179367065, |
|
"learning_rate": 9.927787799429366e-06, |
|
"loss": 0.731, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.0611764705882353, |
|
"grad_norm": 0.11761047691106796, |
|
"learning_rate": 9.925975706812099e-06, |
|
"loss": 0.8504, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.06184873949579832, |
|
"grad_norm": 0.11633070558309555, |
|
"learning_rate": 9.924141328259973e-06, |
|
"loss": 0.771, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.06252100840336135, |
|
"grad_norm": 0.1195223405957222, |
|
"learning_rate": 9.922284672072022e-06, |
|
"loss": 0.8507, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.06319327731092438, |
|
"grad_norm": 0.11958979815244675, |
|
"learning_rate": 9.920405746648067e-06, |
|
"loss": 0.8864, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.06386554621848739, |
|
"grad_norm": 0.4856259822845459, |
|
"learning_rate": 9.918504560488683e-06, |
|
"loss": 0.8751, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.06453781512605042, |
|
"grad_norm": 0.14082475006580353, |
|
"learning_rate": 9.91658112219515e-06, |
|
"loss": 0.749, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.06521008403361345, |
|
"grad_norm": 0.13045863807201385, |
|
"learning_rate": 9.914635440469427e-06, |
|
"loss": 0.7975, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.06588235294117648, |
|
"grad_norm": 0.14883267879486084, |
|
"learning_rate": 9.912667524114097e-06, |
|
"loss": 0.8199, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.06655462184873949, |
|
"grad_norm": 0.128151535987854, |
|
"learning_rate": 9.910677382032346e-06, |
|
"loss": 0.7441, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.06722689075630252, |
|
"grad_norm": 0.13587278127670288, |
|
"learning_rate": 9.908665023227906e-06, |
|
"loss": 0.8143, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.06789915966386555, |
|
"grad_norm": 0.18061594665050507, |
|
"learning_rate": 9.906630456805024e-06, |
|
"loss": 0.9194, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.06857142857142857, |
|
"grad_norm": 0.14718933403491974, |
|
"learning_rate": 9.904573691968417e-06, |
|
"loss": 0.8189, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.0692436974789916, |
|
"grad_norm": 0.11956693977117538, |
|
"learning_rate": 9.902494738023233e-06, |
|
"loss": 0.7266, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.06991596638655462, |
|
"grad_norm": 0.14369268715381622, |
|
"learning_rate": 9.900393604375006e-06, |
|
"loss": 0.8043, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.07058823529411765, |
|
"grad_norm": 0.12656539678573608, |
|
"learning_rate": 9.898270300529615e-06, |
|
"loss": 0.8553, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.07126050420168067, |
|
"grad_norm": 0.1397569477558136, |
|
"learning_rate": 9.89612483609324e-06, |
|
"loss": 0.8091, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.0719327731092437, |
|
"grad_norm": 0.1262049674987793, |
|
"learning_rate": 9.893957220772319e-06, |
|
"loss": 0.8208, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.07260504201680672, |
|
"grad_norm": 2.6684651374816895, |
|
"learning_rate": 9.891767464373503e-06, |
|
"loss": 0.9089, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.07327731092436975, |
|
"grad_norm": 0.3158283233642578, |
|
"learning_rate": 9.889555576803617e-06, |
|
"loss": 0.8193, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.07394957983193277, |
|
"grad_norm": 0.1529214233160019, |
|
"learning_rate": 9.887321568069612e-06, |
|
"loss": 0.7586, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.0746218487394958, |
|
"grad_norm": 0.13424837589263916, |
|
"learning_rate": 9.88506544827851e-06, |
|
"loss": 0.7868, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.07529411764705882, |
|
"grad_norm": 0.1253993809223175, |
|
"learning_rate": 9.882787227637376e-06, |
|
"loss": 0.7582, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.07596638655462185, |
|
"grad_norm": 0.1568586230278015, |
|
"learning_rate": 9.88048691645326e-06, |
|
"loss": 0.7763, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.07663865546218487, |
|
"grad_norm": 0.16978754103183746, |
|
"learning_rate": 9.87816452513315e-06, |
|
"loss": 0.796, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.0773109243697479, |
|
"grad_norm": 0.1352425515651703, |
|
"learning_rate": 9.875820064183936e-06, |
|
"loss": 0.7634, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.07798319327731093, |
|
"grad_norm": 0.1855115294456482, |
|
"learning_rate": 9.873453544212347e-06, |
|
"loss": 0.6697, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.07865546218487394, |
|
"grad_norm": 0.1740746945142746, |
|
"learning_rate": 9.871064975924913e-06, |
|
"loss": 0.7209, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.07932773109243697, |
|
"grad_norm": 0.2007463425397873, |
|
"learning_rate": 9.868654370127918e-06, |
|
"loss": 0.7044, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.12837733328342438, |
|
"learning_rate": 9.866221737727341e-06, |
|
"loss": 0.7969, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.08067226890756303, |
|
"grad_norm": 0.17075017094612122, |
|
"learning_rate": 9.86376708972882e-06, |
|
"loss": 0.6553, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.08134453781512604, |
|
"grad_norm": 0.1436099112033844, |
|
"learning_rate": 9.861290437237585e-06, |
|
"loss": 0.7498, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.08201680672268907, |
|
"grad_norm": 0.12680667638778687, |
|
"learning_rate": 9.858791791458431e-06, |
|
"loss": 0.6969, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.0826890756302521, |
|
"grad_norm": 0.21285346150398254, |
|
"learning_rate": 9.856271163695646e-06, |
|
"loss": 0.8366, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.08336134453781513, |
|
"grad_norm": 0.1799962818622589, |
|
"learning_rate": 9.85372856535297e-06, |
|
"loss": 0.6776, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.08403361344537816, |
|
"grad_norm": 0.1583629548549652, |
|
"learning_rate": 9.851164007933539e-06, |
|
"loss": 0.8507, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.08470588235294117, |
|
"grad_norm": 0.15405218303203583, |
|
"learning_rate": 9.848577503039843e-06, |
|
"loss": 0.7331, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.0853781512605042, |
|
"grad_norm": 0.18484558165073395, |
|
"learning_rate": 9.845969062373655e-06, |
|
"loss": 0.7908, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.08605042016806723, |
|
"grad_norm": 0.1605813205242157, |
|
"learning_rate": 9.843338697736e-06, |
|
"loss": 0.8332, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.08672268907563026, |
|
"grad_norm": 0.14769910275936127, |
|
"learning_rate": 9.840686421027085e-06, |
|
"loss": 0.7287, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.08739495798319327, |
|
"grad_norm": 0.16447949409484863, |
|
"learning_rate": 9.83801224424625e-06, |
|
"loss": 0.7388, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.0880672268907563, |
|
"grad_norm": 0.15745119750499725, |
|
"learning_rate": 9.835316179491919e-06, |
|
"loss": 0.729, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.08873949579831933, |
|
"grad_norm": 0.18337000906467438, |
|
"learning_rate": 9.832598238961534e-06, |
|
"loss": 0.7714, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.08941176470588236, |
|
"grad_norm": 0.14670008420944214, |
|
"learning_rate": 9.829858434951516e-06, |
|
"loss": 0.7787, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.09008403361344537, |
|
"grad_norm": 0.15470527112483978, |
|
"learning_rate": 9.82709677985719e-06, |
|
"loss": 0.8125, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.0907563025210084, |
|
"grad_norm": 0.15246663987636566, |
|
"learning_rate": 9.824313286172748e-06, |
|
"loss": 0.7938, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.09142857142857143, |
|
"grad_norm": 0.15700078010559082, |
|
"learning_rate": 9.821507966491178e-06, |
|
"loss": 0.6438, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.09210084033613446, |
|
"grad_norm": 0.15387018024921417, |
|
"learning_rate": 9.81868083350421e-06, |
|
"loss": 0.6915, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.09277310924369749, |
|
"grad_norm": 0.17690274119377136, |
|
"learning_rate": 9.81583190000227e-06, |
|
"loss": 0.6618, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.0934453781512605, |
|
"grad_norm": 0.1894235461950302, |
|
"learning_rate": 9.812961178874404e-06, |
|
"loss": 0.7949, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.09411764705882353, |
|
"grad_norm": 0.27365830540657043, |
|
"learning_rate": 9.810068683108233e-06, |
|
"loss": 0.6956, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.09478991596638656, |
|
"grad_norm": 0.1983897089958191, |
|
"learning_rate": 9.807154425789894e-06, |
|
"loss": 0.6946, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.09546218487394958, |
|
"grad_norm": 0.16158057749271393, |
|
"learning_rate": 9.804218420103969e-06, |
|
"loss": 0.6912, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.0961344537815126, |
|
"grad_norm": 0.1842929571866989, |
|
"learning_rate": 9.801260679333435e-06, |
|
"loss": 0.6841, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.09680672268907563, |
|
"grad_norm": 0.18475371599197388, |
|
"learning_rate": 9.798281216859608e-06, |
|
"loss": 0.8085, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.09747899159663866, |
|
"grad_norm": 0.16353516280651093, |
|
"learning_rate": 9.795280046162072e-06, |
|
"loss": 0.7655, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.09815126050420168, |
|
"grad_norm": 0.15461254119873047, |
|
"learning_rate": 9.792257180818622e-06, |
|
"loss": 0.7782, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.0988235294117647, |
|
"grad_norm": 0.1649758517742157, |
|
"learning_rate": 9.789212634505205e-06, |
|
"loss": 0.728, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.09949579831932773, |
|
"grad_norm": 0.1581656038761139, |
|
"learning_rate": 9.786146420995856e-06, |
|
"loss": 0.832, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.10016806722689076, |
|
"grad_norm": 0.15653114020824432, |
|
"learning_rate": 9.783058554162637e-06, |
|
"loss": 0.7491, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.10084033613445378, |
|
"grad_norm": 0.18371178209781647, |
|
"learning_rate": 9.779949047975568e-06, |
|
"loss": 0.7454, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.10151260504201681, |
|
"grad_norm": 0.1631736010313034, |
|
"learning_rate": 9.776817916502577e-06, |
|
"loss": 0.7361, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.10218487394957983, |
|
"grad_norm": 0.16311539709568024, |
|
"learning_rate": 9.773665173909424e-06, |
|
"loss": 0.6452, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.10285714285714286, |
|
"grad_norm": 0.19410477578639984, |
|
"learning_rate": 9.770490834459641e-06, |
|
"loss": 0.8368, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.10352941176470588, |
|
"grad_norm": 0.19130192697048187, |
|
"learning_rate": 9.767294912514468e-06, |
|
"loss": 0.6695, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.10420168067226891, |
|
"grad_norm": 0.15637724101543427, |
|
"learning_rate": 9.76407742253279e-06, |
|
"loss": 0.6275, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.10487394957983193, |
|
"grad_norm": 0.16742682456970215, |
|
"learning_rate": 9.760838379071065e-06, |
|
"loss": 0.7172, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.10554621848739495, |
|
"grad_norm": 0.47532567381858826, |
|
"learning_rate": 9.757577796783268e-06, |
|
"loss": 0.8479, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.10621848739495798, |
|
"grad_norm": 0.14579716324806213, |
|
"learning_rate": 9.754295690420815e-06, |
|
"loss": 0.6863, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.10689075630252101, |
|
"grad_norm": 0.1644534468650818, |
|
"learning_rate": 9.750992074832503e-06, |
|
"loss": 0.7262, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.10756302521008404, |
|
"grad_norm": 0.1633559912443161, |
|
"learning_rate": 9.74766696496444e-06, |
|
"loss": 0.5784, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.10823529411764705, |
|
"grad_norm": 0.17903897166252136, |
|
"learning_rate": 9.744320375859975e-06, |
|
"loss": 0.7111, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.10890756302521008, |
|
"grad_norm": 0.1747799664735794, |
|
"learning_rate": 9.740952322659636e-06, |
|
"loss": 0.6858, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.10957983193277311, |
|
"grad_norm": 0.15818876028060913, |
|
"learning_rate": 9.737562820601058e-06, |
|
"loss": 0.7589, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.11025210084033614, |
|
"grad_norm": 0.16842347383499146, |
|
"learning_rate": 9.73415188501891e-06, |
|
"loss": 0.6315, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.11092436974789915, |
|
"grad_norm": 0.17130237817764282, |
|
"learning_rate": 9.730719531344837e-06, |
|
"loss": 0.7264, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.11159663865546218, |
|
"grad_norm": 0.21110771596431732, |
|
"learning_rate": 9.727265775107375e-06, |
|
"loss": 0.8001, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.11226890756302521, |
|
"grad_norm": 0.2074054777622223, |
|
"learning_rate": 9.723790631931892e-06, |
|
"loss": 0.7568, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.11294117647058824, |
|
"grad_norm": 0.17436759173870087, |
|
"learning_rate": 9.72029411754052e-06, |
|
"loss": 0.7216, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.11361344537815125, |
|
"grad_norm": 0.15503355860710144, |
|
"learning_rate": 9.71677624775207e-06, |
|
"loss": 0.5998, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.11428571428571428, |
|
"grad_norm": 0.2187543511390686, |
|
"learning_rate": 9.71323703848197e-06, |
|
"loss": 0.8118, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.11495798319327731, |
|
"grad_norm": 0.17232966423034668, |
|
"learning_rate": 9.709676505742194e-06, |
|
"loss": 0.7798, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.11563025210084034, |
|
"grad_norm": 0.16726981103420258, |
|
"learning_rate": 9.706094665641184e-06, |
|
"loss": 0.7202, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.11630252100840337, |
|
"grad_norm": 0.1730181723833084, |
|
"learning_rate": 9.70249153438378e-06, |
|
"loss": 0.724, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.11697478991596638, |
|
"grad_norm": 0.2613270580768585, |
|
"learning_rate": 9.698867128271152e-06, |
|
"loss": 0.639, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.11764705882352941, |
|
"grad_norm": 0.17092633247375488, |
|
"learning_rate": 9.695221463700715e-06, |
|
"loss": 0.7663, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.11831932773109244, |
|
"grad_norm": 5.159976959228516, |
|
"learning_rate": 9.691554557166063e-06, |
|
"loss": 0.6931, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.11899159663865547, |
|
"grad_norm": 0.16329586505889893, |
|
"learning_rate": 9.687866425256894e-06, |
|
"loss": 0.6889, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.11966386554621848, |
|
"grad_norm": 0.1860726773738861, |
|
"learning_rate": 9.684157084658929e-06, |
|
"loss": 0.6816, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.12033613445378151, |
|
"grad_norm": 0.16123488545417786, |
|
"learning_rate": 9.680426552153843e-06, |
|
"loss": 0.6856, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.12100840336134454, |
|
"grad_norm": 0.180853471159935, |
|
"learning_rate": 9.676674844619187e-06, |
|
"loss": 0.693, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.12168067226890757, |
|
"grad_norm": 0.20253466069698334, |
|
"learning_rate": 9.67290197902831e-06, |
|
"loss": 0.8013, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.1223529411764706, |
|
"grad_norm": 0.17012205719947815, |
|
"learning_rate": 9.66910797245029e-06, |
|
"loss": 0.7746, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.12302521008403361, |
|
"grad_norm": 0.24241431057453156, |
|
"learning_rate": 9.665292842049836e-06, |
|
"loss": 0.7619, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.12369747899159664, |
|
"grad_norm": 0.3148103654384613, |
|
"learning_rate": 9.66145660508724e-06, |
|
"loss": 0.738, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.12436974789915967, |
|
"grad_norm": 0.20171897113323212, |
|
"learning_rate": 9.657599278918278e-06, |
|
"loss": 0.7616, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.1250420168067227, |
|
"grad_norm": 0.17481227219104767, |
|
"learning_rate": 9.65372088099413e-06, |
|
"loss": 0.7468, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.12571428571428572, |
|
"grad_norm": 0.20101875066757202, |
|
"learning_rate": 9.649821428861319e-06, |
|
"loss": 0.7537, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.12638655462184875, |
|
"grad_norm": 0.20836427807807922, |
|
"learning_rate": 9.645900940161616e-06, |
|
"loss": 0.7499, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.12705882352941175, |
|
"grad_norm": 0.22894078493118286, |
|
"learning_rate": 9.641959432631966e-06, |
|
"loss": 0.7718, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.12773109243697478, |
|
"grad_norm": 0.17879128456115723, |
|
"learning_rate": 9.637996924104403e-06, |
|
"loss": 0.6813, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.1284033613445378, |
|
"grad_norm": 0.1844119280576706, |
|
"learning_rate": 9.634013432505977e-06, |
|
"loss": 0.8137, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.12907563025210084, |
|
"grad_norm": 0.16975240409374237, |
|
"learning_rate": 9.630008975858667e-06, |
|
"loss": 0.7139, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.12974789915966387, |
|
"grad_norm": 0.17935213446617126, |
|
"learning_rate": 9.625983572279304e-06, |
|
"loss": 0.6553, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.1304201680672269, |
|
"grad_norm": 0.19229981303215027, |
|
"learning_rate": 9.621937239979484e-06, |
|
"loss": 0.6752, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.13109243697478992, |
|
"grad_norm": 0.20354849100112915, |
|
"learning_rate": 9.617869997265486e-06, |
|
"loss": 0.6714, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.13176470588235295, |
|
"grad_norm": 0.21125207841396332, |
|
"learning_rate": 9.613781862538196e-06, |
|
"loss": 0.6478, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.13243697478991598, |
|
"grad_norm": 0.1878240555524826, |
|
"learning_rate": 9.609672854293018e-06, |
|
"loss": 0.6485, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.13310924369747898, |
|
"grad_norm": 0.1830340474843979, |
|
"learning_rate": 9.605542991119787e-06, |
|
"loss": 0.8007, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.133781512605042, |
|
"grad_norm": 0.1766654998064041, |
|
"learning_rate": 9.601392291702693e-06, |
|
"loss": 0.6918, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.13445378151260504, |
|
"grad_norm": 0.2146570086479187, |
|
"learning_rate": 9.597220774820193e-06, |
|
"loss": 0.7351, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.13512605042016806, |
|
"grad_norm": 0.3166307210922241, |
|
"learning_rate": 9.593028459344923e-06, |
|
"loss": 0.7551, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.1357983193277311, |
|
"grad_norm": 0.20315152406692505, |
|
"learning_rate": 9.588815364243618e-06, |
|
"loss": 0.7196, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.13647058823529412, |
|
"grad_norm": 0.19757510721683502, |
|
"learning_rate": 9.58458150857702e-06, |
|
"loss": 0.7198, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.13714285714285715, |
|
"grad_norm": 0.1865769773721695, |
|
"learning_rate": 9.5803269114998e-06, |
|
"loss": 0.6766, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.13781512605042018, |
|
"grad_norm": 0.21868400275707245, |
|
"learning_rate": 9.576051592260464e-06, |
|
"loss": 0.7456, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.1384873949579832, |
|
"grad_norm": 0.23371611535549164, |
|
"learning_rate": 9.571755570201266e-06, |
|
"loss": 0.7324, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.1391596638655462, |
|
"grad_norm": 0.1825055480003357, |
|
"learning_rate": 9.567438864758128e-06, |
|
"loss": 0.7392, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.13983193277310924, |
|
"grad_norm": 0.20412546396255493, |
|
"learning_rate": 9.563101495460543e-06, |
|
"loss": 0.6748, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.14050420168067226, |
|
"grad_norm": 0.18851631879806519, |
|
"learning_rate": 9.558743481931494e-06, |
|
"loss": 0.7368, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.1411764705882353, |
|
"grad_norm": 0.19340087473392487, |
|
"learning_rate": 9.554364843887361e-06, |
|
"loss": 0.7966, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.14184873949579832, |
|
"grad_norm": 0.21277926862239838, |
|
"learning_rate": 9.549965601137827e-06, |
|
"loss": 0.7708, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.14252100840336135, |
|
"grad_norm": 0.19546258449554443, |
|
"learning_rate": 9.545545773585806e-06, |
|
"loss": 0.8285, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.14319327731092438, |
|
"grad_norm": 2.4378411769866943, |
|
"learning_rate": 9.54110538122733e-06, |
|
"loss": 0.6936, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.1438655462184874, |
|
"grad_norm": 0.17651917040348053, |
|
"learning_rate": 9.536644444151475e-06, |
|
"loss": 0.6601, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.14453781512605043, |
|
"grad_norm": 0.19953793287277222, |
|
"learning_rate": 9.532162982540264e-06, |
|
"loss": 0.6732, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.14521008403361343, |
|
"grad_norm": 0.18227693438529968, |
|
"learning_rate": 9.527661016668577e-06, |
|
"loss": 0.7152, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.14588235294117646, |
|
"grad_norm": 0.2055724710226059, |
|
"learning_rate": 9.523138566904053e-06, |
|
"loss": 0.651, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.1465546218487395, |
|
"grad_norm": 0.1922536939382553, |
|
"learning_rate": 9.518595653707015e-06, |
|
"loss": 0.694, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.14722689075630252, |
|
"grad_norm": 0.1832892894744873, |
|
"learning_rate": 9.514032297630354e-06, |
|
"loss": 0.7108, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.14789915966386555, |
|
"grad_norm": 0.20151115953922272, |
|
"learning_rate": 9.509448519319455e-06, |
|
"loss": 0.7187, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.14857142857142858, |
|
"grad_norm": 0.1979878842830658, |
|
"learning_rate": 9.504844339512096e-06, |
|
"loss": 0.7189, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.1492436974789916, |
|
"grad_norm": 0.20959986746311188, |
|
"learning_rate": 9.500219779038353e-06, |
|
"loss": 0.7598, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.14991596638655463, |
|
"grad_norm": 0.26220154762268066, |
|
"learning_rate": 9.495574858820507e-06, |
|
"loss": 0.6579, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.15058823529411763, |
|
"grad_norm": 0.20292134582996368, |
|
"learning_rate": 9.49090959987295e-06, |
|
"loss": 0.6989, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.15126050420168066, |
|
"grad_norm": 0.18850331008434296, |
|
"learning_rate": 9.486224023302095e-06, |
|
"loss": 0.6869, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.1519327731092437, |
|
"grad_norm": 0.2740769386291504, |
|
"learning_rate": 9.481518150306267e-06, |
|
"loss": 0.7336, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.15260504201680672, |
|
"grad_norm": 0.2561083734035492, |
|
"learning_rate": 9.476792002175621e-06, |
|
"loss": 0.7116, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.15327731092436975, |
|
"grad_norm": 0.19910801947116852, |
|
"learning_rate": 9.47204560029204e-06, |
|
"loss": 0.6767, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.15394957983193278, |
|
"grad_norm": 0.1883971393108368, |
|
"learning_rate": 9.467278966129034e-06, |
|
"loss": 0.7448, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.1546218487394958, |
|
"grad_norm": 0.20471471548080444, |
|
"learning_rate": 9.462492121251653e-06, |
|
"loss": 0.6735, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.15529411764705883, |
|
"grad_norm": 0.2020663022994995, |
|
"learning_rate": 9.45768508731638e-06, |
|
"loss": 0.7307, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.15596638655462186, |
|
"grad_norm": 0.1929408609867096, |
|
"learning_rate": 9.452857886071037e-06, |
|
"loss": 0.7016, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.15663865546218486, |
|
"grad_norm": 0.29967066645622253, |
|
"learning_rate": 9.448010539354684e-06, |
|
"loss": 0.7133, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.1573109243697479, |
|
"grad_norm": 0.18856149911880493, |
|
"learning_rate": 9.443143069097531e-06, |
|
"loss": 0.6637, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.15798319327731092, |
|
"grad_norm": 0.18955619633197784, |
|
"learning_rate": 9.43825549732082e-06, |
|
"loss": 0.7367, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.15865546218487395, |
|
"grad_norm": 1.0548439025878906, |
|
"learning_rate": 9.433347846136743e-06, |
|
"loss": 0.7294, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.15932773109243697, |
|
"grad_norm": 0.19085678458213806, |
|
"learning_rate": 9.428420137748329e-06, |
|
"loss": 0.7797, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.2003992199897766, |
|
"learning_rate": 9.423472394449354e-06, |
|
"loss": 0.7579, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.16067226890756303, |
|
"grad_norm": 0.4015868902206421, |
|
"learning_rate": 9.418504638624233e-06, |
|
"loss": 0.6845, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.16134453781512606, |
|
"grad_norm": 0.1845397651195526, |
|
"learning_rate": 9.413516892747918e-06, |
|
"loss": 0.7254, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.1620168067226891, |
|
"grad_norm": 0.2877404987812042, |
|
"learning_rate": 9.408509179385806e-06, |
|
"loss": 0.5533, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.1626890756302521, |
|
"grad_norm": 1.1842849254608154, |
|
"learning_rate": 9.403481521193625e-06, |
|
"loss": 0.7173, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.16336134453781512, |
|
"grad_norm": 0.19836628437042236, |
|
"learning_rate": 9.398433940917337e-06, |
|
"loss": 0.6032, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.16403361344537815, |
|
"grad_norm": 0.18493618071079254, |
|
"learning_rate": 9.39336646139304e-06, |
|
"loss": 0.691, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.16470588235294117, |
|
"grad_norm": 0.21216131746768951, |
|
"learning_rate": 9.388279105546852e-06, |
|
"loss": 0.7955, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.1653781512605042, |
|
"grad_norm": 0.19313201308250427, |
|
"learning_rate": 9.38317189639482e-06, |
|
"loss": 0.6655, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.16605042016806723, |
|
"grad_norm": 0.20905853807926178, |
|
"learning_rate": 9.37804485704281e-06, |
|
"loss": 0.7399, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.16672268907563026, |
|
"grad_norm": 0.19061219692230225, |
|
"learning_rate": 9.3728980106864e-06, |
|
"loss": 0.6734, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.1673949579831933, |
|
"grad_norm": 0.31909891963005066, |
|
"learning_rate": 9.367731380610784e-06, |
|
"loss": 0.8297, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.16806722689075632, |
|
"grad_norm": 0.18734252452850342, |
|
"learning_rate": 9.362544990190654e-06, |
|
"loss": 0.6964, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.16873949579831932, |
|
"grad_norm": 0.2664547562599182, |
|
"learning_rate": 9.357338862890107e-06, |
|
"loss": 0.6729, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.16941176470588235, |
|
"grad_norm": 0.23206093907356262, |
|
"learning_rate": 9.352113022262525e-06, |
|
"loss": 0.7237, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.17008403361344537, |
|
"grad_norm": 0.19709858298301697, |
|
"learning_rate": 9.34686749195049e-06, |
|
"loss": 0.7221, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.1707563025210084, |
|
"grad_norm": 0.2132461667060852, |
|
"learning_rate": 9.341602295685649e-06, |
|
"loss": 0.7707, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.17142857142857143, |
|
"grad_norm": 0.20399631559848785, |
|
"learning_rate": 9.33631745728863e-06, |
|
"loss": 0.6073, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.17210084033613446, |
|
"grad_norm": 0.21512362360954285, |
|
"learning_rate": 9.33101300066892e-06, |
|
"loss": 0.7637, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.1727731092436975, |
|
"grad_norm": 0.38459283113479614, |
|
"learning_rate": 9.325688949824768e-06, |
|
"loss": 0.7271, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.17344537815126052, |
|
"grad_norm": 0.6972576975822449, |
|
"learning_rate": 9.320345328843063e-06, |
|
"loss": 0.7559, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.17411764705882352, |
|
"grad_norm": 0.20466119050979614, |
|
"learning_rate": 9.31498216189924e-06, |
|
"loss": 0.6713, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.17478991596638654, |
|
"grad_norm": 0.3543570637702942, |
|
"learning_rate": 9.309599473257157e-06, |
|
"loss": 0.7256, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.17546218487394957, |
|
"grad_norm": 0.19050233066082, |
|
"learning_rate": 9.304197287269e-06, |
|
"loss": 0.7107, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.1761344537815126, |
|
"grad_norm": 0.19116462767124176, |
|
"learning_rate": 9.29877562837515e-06, |
|
"loss": 0.6839, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.17680672268907563, |
|
"grad_norm": 0.22071011364459991, |
|
"learning_rate": 9.293334521104103e-06, |
|
"loss": 0.7256, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.17747899159663866, |
|
"grad_norm": 0.21820850670337677, |
|
"learning_rate": 9.287873990072328e-06, |
|
"loss": 0.6952, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.1781512605042017, |
|
"grad_norm": 0.19550518691539764, |
|
"learning_rate": 9.282394059984186e-06, |
|
"loss": 0.697, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.17882352941176471, |
|
"grad_norm": 0.18131797015666962, |
|
"learning_rate": 9.276894755631785e-06, |
|
"loss": 0.6845, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.17949579831932774, |
|
"grad_norm": 0.2566013038158417, |
|
"learning_rate": 9.271376101894897e-06, |
|
"loss": 0.694, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.18016806722689074, |
|
"grad_norm": 0.20400108397006989, |
|
"learning_rate": 9.265838123740833e-06, |
|
"loss": 0.6565, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.18084033613445377, |
|
"grad_norm": 0.22308161854743958, |
|
"learning_rate": 9.260280846224328e-06, |
|
"loss": 0.6994, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.1815126050420168, |
|
"grad_norm": 0.19968609511852264, |
|
"learning_rate": 9.25470429448743e-06, |
|
"loss": 0.7693, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.18218487394957983, |
|
"grad_norm": 0.19011756777763367, |
|
"learning_rate": 9.24910849375939e-06, |
|
"loss": 0.6951, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.18285714285714286, |
|
"grad_norm": 0.2302047312259674, |
|
"learning_rate": 9.243493469356543e-06, |
|
"loss": 0.765, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.18352941176470589, |
|
"grad_norm": 0.19871503114700317, |
|
"learning_rate": 9.237859246682194e-06, |
|
"loss": 0.5769, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.1842016806722689, |
|
"grad_norm": 0.20518113672733307, |
|
"learning_rate": 9.232205851226504e-06, |
|
"loss": 0.6779, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.18487394957983194, |
|
"grad_norm": 0.2253684103488922, |
|
"learning_rate": 9.226533308566379e-06, |
|
"loss": 0.8146, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.18554621848739497, |
|
"grad_norm": 0.21118251979351044, |
|
"learning_rate": 9.220841644365343e-06, |
|
"loss": 0.6319, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.18621848739495797, |
|
"grad_norm": 0.229267880320549, |
|
"learning_rate": 9.215130884373437e-06, |
|
"loss": 0.5965, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.186890756302521, |
|
"grad_norm": 0.21749767661094666, |
|
"learning_rate": 9.20940105442709e-06, |
|
"loss": 0.6744, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.18756302521008403, |
|
"grad_norm": 0.21476207673549652, |
|
"learning_rate": 9.203652180449006e-06, |
|
"loss": 0.7267, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.18823529411764706, |
|
"grad_norm": 0.21310514211654663, |
|
"learning_rate": 9.19788428844805e-06, |
|
"loss": 0.6398, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.18890756302521008, |
|
"grad_norm": 0.20523132383823395, |
|
"learning_rate": 9.192097404519125e-06, |
|
"loss": 0.71, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.1895798319327731, |
|
"grad_norm": 0.21132954955101013, |
|
"learning_rate": 9.186291554843058e-06, |
|
"loss": 0.7067, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.19025210084033614, |
|
"grad_norm": 0.3063057065010071, |
|
"learning_rate": 9.180466765686485e-06, |
|
"loss": 0.7032, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.19092436974789917, |
|
"grad_norm": 0.22190402448177338, |
|
"learning_rate": 9.174623063401715e-06, |
|
"loss": 0.6836, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.1915966386554622, |
|
"grad_norm": 0.19611823558807373, |
|
"learning_rate": 9.168760474426637e-06, |
|
"loss": 0.6805, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.1922689075630252, |
|
"grad_norm": 0.19553613662719727, |
|
"learning_rate": 9.162879025284576e-06, |
|
"loss": 0.6299, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.19294117647058823, |
|
"grad_norm": 0.20817598700523376, |
|
"learning_rate": 9.156978742584193e-06, |
|
"loss": 0.7777, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.19361344537815126, |
|
"grad_norm": 0.2142859697341919, |
|
"learning_rate": 9.151059653019345e-06, |
|
"loss": 0.6493, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.19428571428571428, |
|
"grad_norm": 0.21361011266708374, |
|
"learning_rate": 9.145121783368983e-06, |
|
"loss": 0.5819, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.1949579831932773, |
|
"grad_norm": 0.19033358991146088, |
|
"learning_rate": 9.139165160497017e-06, |
|
"loss": 0.5813, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.19563025210084034, |
|
"grad_norm": 0.20409995317459106, |
|
"learning_rate": 9.133189811352198e-06, |
|
"loss": 0.6008, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.19630252100840337, |
|
"grad_norm": 0.20880399644374847, |
|
"learning_rate": 9.127195762968008e-06, |
|
"loss": 0.7718, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.1969747899159664, |
|
"grad_norm": 0.33951178193092346, |
|
"learning_rate": 9.121183042462517e-06, |
|
"loss": 0.7092, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.1976470588235294, |
|
"grad_norm": 0.2210870087146759, |
|
"learning_rate": 9.115151677038274e-06, |
|
"loss": 0.786, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.19831932773109243, |
|
"grad_norm": 0.22350308299064636, |
|
"learning_rate": 9.109101693982183e-06, |
|
"loss": 0.7115, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.19899159663865545, |
|
"grad_norm": 0.23932407796382904, |
|
"learning_rate": 9.103033120665372e-06, |
|
"loss": 0.6476, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.19966386554621848, |
|
"grad_norm": 0.2179158478975296, |
|
"learning_rate": 9.096945984543082e-06, |
|
"loss": 0.653, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.2003361344537815, |
|
"grad_norm": 0.20750783383846283, |
|
"learning_rate": 9.090840313154527e-06, |
|
"loss": 0.6717, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.20100840336134454, |
|
"grad_norm": 0.2178952544927597, |
|
"learning_rate": 9.084716134122785e-06, |
|
"loss": 0.636, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.20168067226890757, |
|
"grad_norm": 0.19057603180408478, |
|
"learning_rate": 9.078573475154663e-06, |
|
"loss": 0.6087, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.2023529411764706, |
|
"grad_norm": 0.3372030258178711, |
|
"learning_rate": 9.072412364040569e-06, |
|
"loss": 0.794, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.20302521008403362, |
|
"grad_norm": 0.2145543247461319, |
|
"learning_rate": 9.066232828654398e-06, |
|
"loss": 0.649, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.20369747899159663, |
|
"grad_norm": 0.22783125936985016, |
|
"learning_rate": 9.0600348969534e-06, |
|
"loss": 0.6988, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.20436974789915965, |
|
"grad_norm": 0.35722485184669495, |
|
"learning_rate": 9.053818596978051e-06, |
|
"loss": 0.5973, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.20504201680672268, |
|
"grad_norm": 0.256307989358902, |
|
"learning_rate": 9.047583956851924e-06, |
|
"loss": 0.733, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.2057142857142857, |
|
"grad_norm": 0.3715127110481262, |
|
"learning_rate": 9.041331004781571e-06, |
|
"loss": 0.7164, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.20638655462184874, |
|
"grad_norm": 0.21380753815174103, |
|
"learning_rate": 9.035059769056392e-06, |
|
"loss": 0.6332, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.20705882352941177, |
|
"grad_norm": 0.22382056713104248, |
|
"learning_rate": 9.0287702780485e-06, |
|
"loss": 0.5795, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.2077310924369748, |
|
"grad_norm": 0.2213864028453827, |
|
"learning_rate": 9.0224625602126e-06, |
|
"loss": 0.6573, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.20840336134453782, |
|
"grad_norm": 0.2228260636329651, |
|
"learning_rate": 9.01613664408586e-06, |
|
"loss": 0.8475, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.20907563025210085, |
|
"grad_norm": 0.25468334555625916, |
|
"learning_rate": 9.009792558287777e-06, |
|
"loss": 0.819, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.20974789915966385, |
|
"grad_norm": 0.20580936968326569, |
|
"learning_rate": 9.003430331520054e-06, |
|
"loss": 0.7051, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.21042016806722688, |
|
"grad_norm": 0.22535796463489532, |
|
"learning_rate": 8.997049992566463e-06, |
|
"loss": 0.6888, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.2110924369747899, |
|
"grad_norm": 0.1907246857881546, |
|
"learning_rate": 8.990651570292719e-06, |
|
"loss": 0.5605, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.21176470588235294, |
|
"grad_norm": 0.203364759683609, |
|
"learning_rate": 8.984235093646355e-06, |
|
"loss": 0.6962, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.21243697478991597, |
|
"grad_norm": 0.2653529942035675, |
|
"learning_rate": 8.977800591656578e-06, |
|
"loss": 0.6416, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.213109243697479, |
|
"grad_norm": 0.19569313526153564, |
|
"learning_rate": 8.971348093434147e-06, |
|
"loss": 0.5888, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.21378151260504202, |
|
"grad_norm": 0.20980022847652435, |
|
"learning_rate": 8.96487762817124e-06, |
|
"loss": 0.7006, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.21445378151260505, |
|
"grad_norm": 0.20609882473945618, |
|
"learning_rate": 8.958389225141319e-06, |
|
"loss": 0.5779, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.21512605042016808, |
|
"grad_norm": 0.2215481400489807, |
|
"learning_rate": 8.951882913699001e-06, |
|
"loss": 0.7546, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.21579831932773108, |
|
"grad_norm": 0.23511946201324463, |
|
"learning_rate": 8.945358723279922e-06, |
|
"loss": 0.7205, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.2164705882352941, |
|
"grad_norm": 0.41210508346557617, |
|
"learning_rate": 8.938816683400609e-06, |
|
"loss": 0.755, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.21714285714285714, |
|
"grad_norm": 0.29828765988349915, |
|
"learning_rate": 8.932256823658337e-06, |
|
"loss": 0.681, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.21781512605042017, |
|
"grad_norm": 0.2204638570547104, |
|
"learning_rate": 8.925679173731005e-06, |
|
"loss": 0.6376, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.2184873949579832, |
|
"grad_norm": 0.27005571126937866, |
|
"learning_rate": 8.919083763377001e-06, |
|
"loss": 0.7109, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.21915966386554622, |
|
"grad_norm": 0.24745038151741028, |
|
"learning_rate": 8.912470622435056e-06, |
|
"loss": 0.6251, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.21983193277310925, |
|
"grad_norm": 0.20610134303569794, |
|
"learning_rate": 8.905839780824124e-06, |
|
"loss": 0.687, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.22050420168067228, |
|
"grad_norm": 0.22061991691589355, |
|
"learning_rate": 8.899191268543237e-06, |
|
"loss": 0.6248, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.2211764705882353, |
|
"grad_norm": 0.22769795358181, |
|
"learning_rate": 8.892525115671372e-06, |
|
"loss": 0.7537, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.2218487394957983, |
|
"grad_norm": 0.2176414579153061, |
|
"learning_rate": 8.885841352367315e-06, |
|
"loss": 0.6707, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.22252100840336134, |
|
"grad_norm": 0.23448556661605835, |
|
"learning_rate": 8.879140008869525e-06, |
|
"loss": 0.6581, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.22319327731092437, |
|
"grad_norm": 0.47701385617256165, |
|
"learning_rate": 8.872421115495996e-06, |
|
"loss": 0.6326, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.2238655462184874, |
|
"grad_norm": 0.2669718265533447, |
|
"learning_rate": 8.865684702644121e-06, |
|
"loss": 0.6678, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.22453781512605042, |
|
"grad_norm": 0.2369898110628128, |
|
"learning_rate": 8.858930800790557e-06, |
|
"loss": 0.6582, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.22521008403361345, |
|
"grad_norm": 0.2101169228553772, |
|
"learning_rate": 8.852159440491077e-06, |
|
"loss": 0.7423, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.22588235294117648, |
|
"grad_norm": 0.21075445413589478, |
|
"learning_rate": 8.845370652380447e-06, |
|
"loss": 0.6468, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.2265546218487395, |
|
"grad_norm": 0.2244008332490921, |
|
"learning_rate": 8.838564467172274e-06, |
|
"loss": 0.6697, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.2272268907563025, |
|
"grad_norm": 0.21729138493537903, |
|
"learning_rate": 8.831740915658872e-06, |
|
"loss": 0.682, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.22789915966386554, |
|
"grad_norm": 0.21469177305698395, |
|
"learning_rate": 8.824900028711128e-06, |
|
"loss": 0.6144, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.22857142857142856, |
|
"grad_norm": 0.19239458441734314, |
|
"learning_rate": 8.818041837278355e-06, |
|
"loss": 0.6099, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.2292436974789916, |
|
"grad_norm": 0.24779051542282104, |
|
"learning_rate": 8.811166372388149e-06, |
|
"loss": 0.6901, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.22991596638655462, |
|
"grad_norm": 0.21615615487098694, |
|
"learning_rate": 8.804273665146263e-06, |
|
"loss": 0.6693, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.23058823529411765, |
|
"grad_norm": 0.27268433570861816, |
|
"learning_rate": 8.797363746736452e-06, |
|
"loss": 0.8542, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.23126050420168068, |
|
"grad_norm": 0.2103165239095688, |
|
"learning_rate": 8.790436648420338e-06, |
|
"loss": 0.6726, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.2319327731092437, |
|
"grad_norm": 0.2211274951696396, |
|
"learning_rate": 8.783492401537268e-06, |
|
"loss": 0.7011, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.23260504201680673, |
|
"grad_norm": 0.22610008716583252, |
|
"learning_rate": 8.77653103750417e-06, |
|
"loss": 0.5894, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.23327731092436974, |
|
"grad_norm": 0.21889732778072357, |
|
"learning_rate": 8.769552587815417e-06, |
|
"loss": 0.6557, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.23394957983193276, |
|
"grad_norm": 0.22735275328159332, |
|
"learning_rate": 8.76255708404268e-06, |
|
"loss": 0.6196, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.2346218487394958, |
|
"grad_norm": 0.24510014057159424, |
|
"learning_rate": 8.755544557834779e-06, |
|
"loss": 0.6766, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.23529411764705882, |
|
"grad_norm": 0.20867806673049927, |
|
"learning_rate": 8.748515040917555e-06, |
|
"loss": 0.694, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.23596638655462185, |
|
"grad_norm": 0.23174314200878143, |
|
"learning_rate": 8.741468565093713e-06, |
|
"loss": 0.6212, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.23663865546218488, |
|
"grad_norm": 0.2600148022174835, |
|
"learning_rate": 8.73440516224268e-06, |
|
"loss": 0.5647, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.2373109243697479, |
|
"grad_norm": 0.21916405856609344, |
|
"learning_rate": 8.727324864320472e-06, |
|
"loss": 0.7096, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.23798319327731093, |
|
"grad_norm": 0.25194647908210754, |
|
"learning_rate": 8.720227703359536e-06, |
|
"loss": 0.7456, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.23865546218487396, |
|
"grad_norm": 0.2153853476047516, |
|
"learning_rate": 8.713113711468607e-06, |
|
"loss": 0.6285, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.23932773109243696, |
|
"grad_norm": 0.2569597065448761, |
|
"learning_rate": 8.705982920832573e-06, |
|
"loss": 0.6491, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.2086285650730133, |
|
"learning_rate": 8.698835363712318e-06, |
|
"loss": 0.662, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.24067226890756302, |
|
"grad_norm": 0.23374126851558685, |
|
"learning_rate": 8.691671072444582e-06, |
|
"loss": 0.729, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.24134453781512605, |
|
"grad_norm": 0.2192952185869217, |
|
"learning_rate": 8.684490079441813e-06, |
|
"loss": 0.6381, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.24201680672268908, |
|
"grad_norm": 0.2088990956544876, |
|
"learning_rate": 8.677292417192018e-06, |
|
"loss": 0.6033, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.2426890756302521, |
|
"grad_norm": 0.21980631351470947, |
|
"learning_rate": 8.67007811825862e-06, |
|
"loss": 0.6557, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.24336134453781513, |
|
"grad_norm": 0.21364949643611908, |
|
"learning_rate": 8.66284721528031e-06, |
|
"loss": 0.5617, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.24403361344537816, |
|
"grad_norm": 0.21444816887378693, |
|
"learning_rate": 8.6555997409709e-06, |
|
"loss": 0.7188, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.2447058823529412, |
|
"grad_norm": 0.3139742910861969, |
|
"learning_rate": 8.648335728119168e-06, |
|
"loss": 0.6365, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.2453781512605042, |
|
"grad_norm": 0.3673727810382843, |
|
"learning_rate": 8.64105520958872e-06, |
|
"loss": 0.6725, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.24605042016806722, |
|
"grad_norm": 0.22386662662029266, |
|
"learning_rate": 8.633758218317836e-06, |
|
"loss": 0.6167, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.24672268907563025, |
|
"grad_norm": 0.2022496610879898, |
|
"learning_rate": 8.62644478731932e-06, |
|
"loss": 0.6761, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.24739495798319328, |
|
"grad_norm": 0.35748136043548584, |
|
"learning_rate": 8.619114949680349e-06, |
|
"loss": 0.6699, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.2480672268907563, |
|
"grad_norm": 0.23497340083122253, |
|
"learning_rate": 8.611768738562333e-06, |
|
"loss": 0.7103, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.24873949579831933, |
|
"grad_norm": 0.23439949750900269, |
|
"learning_rate": 8.604406187200758e-06, |
|
"loss": 0.7444, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.24941176470588236, |
|
"grad_norm": 0.19876611232757568, |
|
"learning_rate": 8.597027328905026e-06, |
|
"loss": 0.5419, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.2500840336134454, |
|
"grad_norm": 0.2008461058139801, |
|
"learning_rate": 8.589632197058326e-06, |
|
"loss": 0.5705, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.2507563025210084, |
|
"grad_norm": 0.279691606760025, |
|
"learning_rate": 8.582220825117466e-06, |
|
"loss": 0.6726, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.25142857142857145, |
|
"grad_norm": 0.21170209348201752, |
|
"learning_rate": 8.574793246612727e-06, |
|
"loss": 0.599, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.25210084033613445, |
|
"grad_norm": 0.23143023252487183, |
|
"learning_rate": 8.567349495147711e-06, |
|
"loss": 0.673, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.2527731092436975, |
|
"grad_norm": 0.3455113172531128, |
|
"learning_rate": 8.559889604399195e-06, |
|
"loss": 0.7567, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.2534453781512605, |
|
"grad_norm": 0.23424625396728516, |
|
"learning_rate": 8.552413608116959e-06, |
|
"loss": 0.6846, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.2541176470588235, |
|
"grad_norm": 0.25633034110069275, |
|
"learning_rate": 8.544921540123663e-06, |
|
"loss": 0.6866, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.25478991596638656, |
|
"grad_norm": 0.2438989281654358, |
|
"learning_rate": 8.537413434314668e-06, |
|
"loss": 0.5694, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.25546218487394956, |
|
"grad_norm": 0.2545592784881592, |
|
"learning_rate": 8.529889324657894e-06, |
|
"loss": 0.7359, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.2561344537815126, |
|
"grad_norm": 0.33247342705726624, |
|
"learning_rate": 8.52234924519367e-06, |
|
"loss": 0.6206, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.2568067226890756, |
|
"grad_norm": 0.31537359952926636, |
|
"learning_rate": 8.51479323003457e-06, |
|
"loss": 0.6515, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.2574789915966387, |
|
"grad_norm": 0.2566099464893341, |
|
"learning_rate": 8.507221313365266e-06, |
|
"loss": 0.7823, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.2581512605042017, |
|
"grad_norm": 0.21989482641220093, |
|
"learning_rate": 8.499633529442373e-06, |
|
"loss": 0.6416, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.25882352941176473, |
|
"grad_norm": 0.23314514756202698, |
|
"learning_rate": 8.492029912594284e-06, |
|
"loss": 0.7286, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.25949579831932773, |
|
"grad_norm": 0.2304847538471222, |
|
"learning_rate": 8.484410497221036e-06, |
|
"loss": 0.7232, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.26016806722689073, |
|
"grad_norm": 0.3086731731891632, |
|
"learning_rate": 8.476775317794126e-06, |
|
"loss": 0.797, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.2608403361344538, |
|
"grad_norm": 0.24391745030879974, |
|
"learning_rate": 8.469124408856384e-06, |
|
"loss": 0.6864, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.2615126050420168, |
|
"grad_norm": 0.2130875438451767, |
|
"learning_rate": 8.461457805021793e-06, |
|
"loss": 0.7461, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.26218487394957984, |
|
"grad_norm": 0.2305675446987152, |
|
"learning_rate": 8.45377554097535e-06, |
|
"loss": 0.7015, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.26285714285714284, |
|
"grad_norm": 0.24206651747226715, |
|
"learning_rate": 8.446077651472892e-06, |
|
"loss": 0.6689, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.2635294117647059, |
|
"grad_norm": 0.2249177098274231, |
|
"learning_rate": 8.438364171340957e-06, |
|
"loss": 0.6084, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.2642016806722689, |
|
"grad_norm": 0.23055896162986755, |
|
"learning_rate": 8.430635135476616e-06, |
|
"loss": 0.6501, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.26487394957983196, |
|
"grad_norm": 0.30286553502082825, |
|
"learning_rate": 8.422890578847313e-06, |
|
"loss": 0.6421, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.26554621848739496, |
|
"grad_norm": 0.20739050209522247, |
|
"learning_rate": 8.415130536490712e-06, |
|
"loss": 0.6456, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.26621848739495796, |
|
"grad_norm": 0.23757591843605042, |
|
"learning_rate": 8.407355043514538e-06, |
|
"loss": 0.7336, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.266890756302521, |
|
"grad_norm": 0.2152106910943985, |
|
"learning_rate": 8.399564135096417e-06, |
|
"loss": 0.6866, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.267563025210084, |
|
"grad_norm": 0.22571507096290588, |
|
"learning_rate": 8.39175784648372e-06, |
|
"loss": 0.6797, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.26823529411764707, |
|
"grad_norm": 0.2353668212890625, |
|
"learning_rate": 8.383936212993392e-06, |
|
"loss": 0.624, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.2689075630252101, |
|
"grad_norm": 0.2632080018520355, |
|
"learning_rate": 8.376099270011808e-06, |
|
"loss": 0.7362, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.26957983193277313, |
|
"grad_norm": 0.26158758997917175, |
|
"learning_rate": 8.368247052994605e-06, |
|
"loss": 0.5969, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.27025210084033613, |
|
"grad_norm": 0.239485502243042, |
|
"learning_rate": 8.360379597466519e-06, |
|
"loss": 0.631, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.2709243697478992, |
|
"grad_norm": 0.2522924840450287, |
|
"learning_rate": 8.352496939021233e-06, |
|
"loss": 0.7101, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.2715966386554622, |
|
"grad_norm": 0.22495077550411224, |
|
"learning_rate": 8.344599113321203e-06, |
|
"loss": 0.5785, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.2722689075630252, |
|
"grad_norm": 0.25629207491874695, |
|
"learning_rate": 8.336686156097512e-06, |
|
"loss": 0.757, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.27294117647058824, |
|
"grad_norm": 0.40765833854675293, |
|
"learning_rate": 8.328758103149696e-06, |
|
"loss": 0.7224, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.27361344537815124, |
|
"grad_norm": 0.2634163200855255, |
|
"learning_rate": 8.320814990345587e-06, |
|
"loss": 0.7794, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.2742857142857143, |
|
"grad_norm": 0.24929873645305634, |
|
"learning_rate": 8.312856853621152e-06, |
|
"loss": 0.7422, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.2749579831932773, |
|
"grad_norm": 0.2845971882343292, |
|
"learning_rate": 8.304883728980325e-06, |
|
"loss": 0.7359, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.27563025210084036, |
|
"grad_norm": 0.24346016347408295, |
|
"learning_rate": 8.296895652494851e-06, |
|
"loss": 0.6485, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.27630252100840336, |
|
"grad_norm": 0.23367783427238464, |
|
"learning_rate": 8.288892660304122e-06, |
|
"loss": 0.5814, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.2769747899159664, |
|
"grad_norm": 0.34782543778419495, |
|
"learning_rate": 8.280874788615004e-06, |
|
"loss": 0.5857, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.2776470588235294, |
|
"grad_norm": 0.27658021450042725, |
|
"learning_rate": 8.272842073701688e-06, |
|
"loss": 0.679, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.2783193277310924, |
|
"grad_norm": 0.2381054013967514, |
|
"learning_rate": 8.264794551905512e-06, |
|
"loss": 0.5859, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.27899159663865547, |
|
"grad_norm": 0.2386159598827362, |
|
"learning_rate": 8.256732259634807e-06, |
|
"loss": 0.7333, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.27966386554621847, |
|
"grad_norm": 0.23790478706359863, |
|
"learning_rate": 8.248655233364724e-06, |
|
"loss": 0.6381, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.2803361344537815, |
|
"grad_norm": 0.2430446296930313, |
|
"learning_rate": 8.24056350963708e-06, |
|
"loss": 0.6681, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.2810084033613445, |
|
"grad_norm": 0.2563830614089966, |
|
"learning_rate": 8.232457125060178e-06, |
|
"loss": 0.7506, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.2816806722689076, |
|
"grad_norm": 0.236500084400177, |
|
"learning_rate": 8.224336116308654e-06, |
|
"loss": 0.6722, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.2823529411764706, |
|
"grad_norm": 0.24401167035102844, |
|
"learning_rate": 8.216200520123305e-06, |
|
"loss": 0.7555, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.28302521008403364, |
|
"grad_norm": 0.24071764945983887, |
|
"learning_rate": 8.20805037331092e-06, |
|
"loss": 0.6701, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.28369747899159664, |
|
"grad_norm": 0.2599017322063446, |
|
"learning_rate": 8.199885712744128e-06, |
|
"loss": 0.7838, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.28436974789915964, |
|
"grad_norm": 0.23312319815158844, |
|
"learning_rate": 8.191706575361208e-06, |
|
"loss": 0.6454, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.2850420168067227, |
|
"grad_norm": 0.3320861756801605, |
|
"learning_rate": 8.183512998165941e-06, |
|
"loss": 0.5617, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.2857142857142857, |
|
"grad_norm": 0.29827767610549927, |
|
"learning_rate": 8.175305018227439e-06, |
|
"loss": 0.6889, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.28638655462184875, |
|
"grad_norm": 0.2738671898841858, |
|
"learning_rate": 8.167082672679968e-06, |
|
"loss": 0.7371, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.28705882352941176, |
|
"grad_norm": 0.24301859736442566, |
|
"learning_rate": 8.15884599872279e-06, |
|
"loss": 0.6621, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.2877310924369748, |
|
"grad_norm": 0.2335437387228012, |
|
"learning_rate": 8.150595033619989e-06, |
|
"loss": 0.6062, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.2884033613445378, |
|
"grad_norm": 0.24218881130218506, |
|
"learning_rate": 8.142329814700306e-06, |
|
"loss": 0.7732, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.28907563025210087, |
|
"grad_norm": 0.23290084302425385, |
|
"learning_rate": 8.13405037935697e-06, |
|
"loss": 0.671, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.28974789915966387, |
|
"grad_norm": 0.24484236538410187, |
|
"learning_rate": 8.125756765047528e-06, |
|
"loss": 0.7383, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.29042016806722687, |
|
"grad_norm": 0.2516631782054901, |
|
"learning_rate": 8.117449009293668e-06, |
|
"loss": 0.6141, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.2910924369747899, |
|
"grad_norm": 0.5268867015838623, |
|
"learning_rate": 8.109127149681065e-06, |
|
"loss": 0.626, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.2917647058823529, |
|
"grad_norm": 0.46247419714927673, |
|
"learning_rate": 8.100791223859198e-06, |
|
"loss": 0.6713, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.292436974789916, |
|
"grad_norm": 0.33584773540496826, |
|
"learning_rate": 8.092441269541182e-06, |
|
"loss": 0.7156, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.293109243697479, |
|
"grad_norm": 0.29816320538520813, |
|
"learning_rate": 8.084077324503602e-06, |
|
"loss": 0.6754, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.29378151260504204, |
|
"grad_norm": 0.253115713596344, |
|
"learning_rate": 8.075699426586345e-06, |
|
"loss": 0.6449, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.29445378151260504, |
|
"grad_norm": 0.24944230914115906, |
|
"learning_rate": 8.067307613692408e-06, |
|
"loss": 0.5776, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.29512605042016804, |
|
"grad_norm": 0.25170648097991943, |
|
"learning_rate": 8.058901923787757e-06, |
|
"loss": 0.5794, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.2957983193277311, |
|
"grad_norm": 0.3000252842903137, |
|
"learning_rate": 8.050482394901132e-06, |
|
"loss": 0.7025, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.2964705882352941, |
|
"grad_norm": 0.25940564274787903, |
|
"learning_rate": 8.042049065123882e-06, |
|
"loss": 0.6512, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.29714285714285715, |
|
"grad_norm": 0.25988590717315674, |
|
"learning_rate": 8.0336019726098e-06, |
|
"loss": 0.6544, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.29781512605042015, |
|
"grad_norm": 0.3429681956768036, |
|
"learning_rate": 8.025141155574939e-06, |
|
"loss": 0.7266, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.2984873949579832, |
|
"grad_norm": 0.3786052167415619, |
|
"learning_rate": 8.016666652297443e-06, |
|
"loss": 0.7693, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.2991596638655462, |
|
"grad_norm": 0.24071510136127472, |
|
"learning_rate": 8.008178501117376e-06, |
|
"loss": 0.692, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.29983193277310927, |
|
"grad_norm": 0.22229009866714478, |
|
"learning_rate": 7.999676740436553e-06, |
|
"loss": 0.6618, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.30050420168067227, |
|
"grad_norm": 0.2864013910293579, |
|
"learning_rate": 7.991161408718353e-06, |
|
"loss": 0.7082, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.30117647058823527, |
|
"grad_norm": 0.22621232271194458, |
|
"learning_rate": 7.982632544487555e-06, |
|
"loss": 0.5874, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.3018487394957983, |
|
"grad_norm": 0.24563252925872803, |
|
"learning_rate": 7.974090186330165e-06, |
|
"loss": 0.7186, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.3025210084033613, |
|
"grad_norm": 0.30611452460289, |
|
"learning_rate": 7.96553437289323e-06, |
|
"loss": 0.6962, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.3031932773109244, |
|
"grad_norm": 0.23493875563144684, |
|
"learning_rate": 7.956965142884678e-06, |
|
"loss": 0.6031, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.3038655462184874, |
|
"grad_norm": 0.26626601815223694, |
|
"learning_rate": 7.948382535073134e-06, |
|
"loss": 0.6433, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.30453781512605044, |
|
"grad_norm": 0.2446458488702774, |
|
"learning_rate": 7.939786588287743e-06, |
|
"loss": 0.8106, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.30521008403361344, |
|
"grad_norm": 0.2521326243877411, |
|
"learning_rate": 7.931177341418003e-06, |
|
"loss": 0.6874, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.3058823529411765, |
|
"grad_norm": 0.23391471803188324, |
|
"learning_rate": 7.922554833413581e-06, |
|
"loss": 0.6423, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.3065546218487395, |
|
"grad_norm": 0.24418647587299347, |
|
"learning_rate": 7.91391910328414e-06, |
|
"loss": 0.6531, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.3072268907563025, |
|
"grad_norm": 0.2206183671951294, |
|
"learning_rate": 7.90527019009916e-06, |
|
"loss": 0.7098, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.30789915966386555, |
|
"grad_norm": 0.23588207364082336, |
|
"learning_rate": 7.89660813298777e-06, |
|
"loss": 0.6648, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.30857142857142855, |
|
"grad_norm": 0.35487234592437744, |
|
"learning_rate": 7.887932971138555e-06, |
|
"loss": 0.7094, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.3092436974789916, |
|
"grad_norm": 0.26851218938827515, |
|
"learning_rate": 7.879244743799393e-06, |
|
"loss": 0.7412, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.3099159663865546, |
|
"grad_norm": 0.2544209063053131, |
|
"learning_rate": 7.870543490277274e-06, |
|
"loss": 0.6263, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.31058823529411766, |
|
"grad_norm": 0.21729524433612823, |
|
"learning_rate": 7.861829249938119e-06, |
|
"loss": 0.6552, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.31126050420168067, |
|
"grad_norm": 0.3726813495159149, |
|
"learning_rate": 7.853102062206601e-06, |
|
"loss": 0.6353, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.3119327731092437, |
|
"grad_norm": 0.225904643535614, |
|
"learning_rate": 7.844361966565973e-06, |
|
"loss": 0.6529, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.3126050420168067, |
|
"grad_norm": 0.432298481464386, |
|
"learning_rate": 7.835609002557883e-06, |
|
"loss": 0.6485, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.3132773109243697, |
|
"grad_norm": 0.45495152473449707, |
|
"learning_rate": 7.8268432097822e-06, |
|
"loss": 0.6468, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.3139495798319328, |
|
"grad_norm": 0.23771986365318298, |
|
"learning_rate": 7.81806462789683e-06, |
|
"loss": 0.6454, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.3146218487394958, |
|
"grad_norm": 0.2486136257648468, |
|
"learning_rate": 7.80927329661754e-06, |
|
"loss": 0.6243, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.31529411764705884, |
|
"grad_norm": 0.29532894492149353, |
|
"learning_rate": 7.80046925571778e-06, |
|
"loss": 0.7355, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.31596638655462184, |
|
"grad_norm": 0.29661232233047485, |
|
"learning_rate": 7.791652545028494e-06, |
|
"loss": 0.5596, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.3166386554621849, |
|
"grad_norm": 0.2840026021003723, |
|
"learning_rate": 7.782823204437952e-06, |
|
"loss": 0.7123, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.3173109243697479, |
|
"grad_norm": 0.24036720395088196, |
|
"learning_rate": 7.773981273891563e-06, |
|
"loss": 0.6763, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.31798319327731095, |
|
"grad_norm": 0.3194415271282196, |
|
"learning_rate": 7.765126793391691e-06, |
|
"loss": 0.7346, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.31865546218487395, |
|
"grad_norm": 0.24713455140590668, |
|
"learning_rate": 7.756259802997483e-06, |
|
"loss": 0.6414, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.31932773109243695, |
|
"grad_norm": 0.2846837043762207, |
|
"learning_rate": 7.747380342824683e-06, |
|
"loss": 0.5973, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.25316038727760315, |
|
"learning_rate": 7.738488453045446e-06, |
|
"loss": 0.6392, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.320672268907563, |
|
"grad_norm": 0.23145373165607452, |
|
"learning_rate": 7.729584173888162e-06, |
|
"loss": 0.6528, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.32134453781512606, |
|
"grad_norm": 0.2564818561077118, |
|
"learning_rate": 7.720667545637278e-06, |
|
"loss": 0.7126, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.32201680672268906, |
|
"grad_norm": 0.2856581509113312, |
|
"learning_rate": 7.7117386086331e-06, |
|
"loss": 0.6829, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.3226890756302521, |
|
"grad_norm": 0.25063401460647583, |
|
"learning_rate": 7.70279740327163e-06, |
|
"loss": 0.6701, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.3233613445378151, |
|
"grad_norm": 0.24919474124908447, |
|
"learning_rate": 7.693843970004369e-06, |
|
"loss": 0.6791, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.3240336134453782, |
|
"grad_norm": 0.2518171966075897, |
|
"learning_rate": 7.684878349338144e-06, |
|
"loss": 0.6549, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.3247058823529412, |
|
"grad_norm": 0.22823643684387207, |
|
"learning_rate": 7.675900581834914e-06, |
|
"loss": 0.6602, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.3253781512605042, |
|
"grad_norm": 0.22611699998378754, |
|
"learning_rate": 7.666910708111597e-06, |
|
"loss": 0.6114, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.32605042016806723, |
|
"grad_norm": 0.21574345231056213, |
|
"learning_rate": 7.657908768839879e-06, |
|
"loss": 0.5915, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.32672268907563023, |
|
"grad_norm": 0.26198115944862366, |
|
"learning_rate": 7.648894804746031e-06, |
|
"loss": 0.6995, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.3273949579831933, |
|
"grad_norm": 0.2797788381576538, |
|
"learning_rate": 7.63986885661073e-06, |
|
"loss": 0.7056, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.3280672268907563, |
|
"grad_norm": 0.26868724822998047, |
|
"learning_rate": 7.630830965268872e-06, |
|
"loss": 0.6964, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.32873949579831935, |
|
"grad_norm": 0.2783353626728058, |
|
"learning_rate": 7.621781171609379e-06, |
|
"loss": 0.8026, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.32941176470588235, |
|
"grad_norm": 0.25210657715797424, |
|
"learning_rate": 7.612719516575027e-06, |
|
"loss": 0.7001, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.3300840336134454, |
|
"grad_norm": 0.25456830859184265, |
|
"learning_rate": 7.603646041162253e-06, |
|
"loss": 0.7183, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.3307563025210084, |
|
"grad_norm": 0.2586143910884857, |
|
"learning_rate": 7.594560786420974e-06, |
|
"loss": 0.6673, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.3314285714285714, |
|
"grad_norm": 0.23656786978244781, |
|
"learning_rate": 7.585463793454393e-06, |
|
"loss": 0.6919, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.33210084033613446, |
|
"grad_norm": 0.2484300434589386, |
|
"learning_rate": 7.576355103418822e-06, |
|
"loss": 0.6797, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.33277310924369746, |
|
"grad_norm": 0.2324364334344864, |
|
"learning_rate": 7.567234757523495e-06, |
|
"loss": 0.5792, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.3334453781512605, |
|
"grad_norm": 0.2646612226963043, |
|
"learning_rate": 7.558102797030376e-06, |
|
"loss": 0.6596, |
|
"step": 496 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 1487, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 248, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.1838970659809e+19, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|