|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.51566991968441, |
|
"global_step": 120000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004000105276443632, |
|
"loss": 10.1199, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00040004211053127486, |
|
"loss": 9.997, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.000400094748522194, |
|
"loss": 9.9386, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00040016844138622554, |
|
"loss": 9.8988, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004002631888001141, |
|
"loss": 9.8579, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004003789903482477, |
|
"loss": 9.8159, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004005158455226594, |
|
"loss": 9.7867, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004006737537230326, |
|
"loss": 9.7605, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004008527142566991, |
|
"loss": 9.7357, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004010527263386479, |
|
"loss": 9.7138, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00040127378909152016, |
|
"loss": 9.6894, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.000401515901545621, |
|
"loss": 9.6634, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00040177906263891804, |
|
"loss": 9.6451, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00040206327121705167, |
|
"loss": 9.6279, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00040236852603333685, |
|
"loss": 9.6038, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004026948257487631, |
|
"loss": 9.5874, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00040304216893201697, |
|
"loss": 9.5729, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004034105540594666, |
|
"loss": 9.547, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004037999795151858, |
|
"loss": 9.5348, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004042104435909525, |
|
"loss": 9.5207, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004046419444862573, |
|
"loss": 9.5061, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004050944803083139, |
|
"loss": 9.493, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004055680490720661, |
|
"loss": 9.4782, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004060626487001964, |
|
"loss": 9.4636, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004065782770231313, |
|
"loss": 9.4546, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.000407114931779062, |
|
"loss": 9.4453, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00040767261061393917, |
|
"loss": 9.4174, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00040825131108149573, |
|
"loss": 9.4159, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00040885103064325357, |
|
"loss": 9.3993, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00040947176666852707, |
|
"loss": 9.3953, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00041011351643444917, |
|
"loss": 9.3854, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004107762771259713, |
|
"loss": 9.3679, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004114600458358809, |
|
"loss": 9.3595, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00041216481956481664, |
|
"loss": 9.3504, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00041289059522127414, |
|
"loss": 9.3417, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004136373696216229, |
|
"loss": 9.3275, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004144051394901274, |
|
"loss": 9.3201, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004151939014589469, |
|
"loss": 9.3123, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004160036520681667, |
|
"loss": 9.3084, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004168343877657965, |
|
"loss": 9.2954, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00041768179413688954, |
|
"loss": 9.2862, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00041855438410810103, |
|
"loss": 9.283, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00041944794797888797, |
|
"loss": 9.2711, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00042036248182962185, |
|
"loss": 9.2726, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004212979816486783, |
|
"loss": 9.2621, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00042225444333247354, |
|
"loss": 9.2527, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004232318626854678, |
|
"loss": 9.2453, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004242302354201949, |
|
"loss": 9.2314, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.000425249557157276, |
|
"loss": 9.2337, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00042628982342543184, |
|
"loss": 9.2276, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004273456715498305, |
|
"loss": 9.2181, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00042842770843401837, |
|
"loss": 9.2142, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004295306759082608, |
|
"loss": 9.2052, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00043065456913437584, |
|
"loss": 9.1994, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00043179938318238693, |
|
"loss": 9.2017, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004329651130305402, |
|
"loss": 9.1991, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004341517535653445, |
|
"loss": 9.1921, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00043535929958157804, |
|
"loss": 9.1786, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004365877457823183, |
|
"loss": 9.1766, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00043783708677896244, |
|
"loss": 9.1614, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004391073170912519, |
|
"loss": 9.1717, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004403984311473017, |
|
"loss": 9.1551, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004417104232836127, |
|
"loss": 9.1542, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00044304328774510786, |
|
"loss": 9.1525, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.000444397018685155, |
|
"loss": 9.1443, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00044577161016558405, |
|
"loss": 9.1301, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0004471670561567286, |
|
"loss": 9.1343, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00044858335053743655, |
|
"loss": 9.1287, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0004500204870951062, |
|
"loss": 9.1189, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00045147845952571257, |
|
"loss": 9.1171, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0004529498156216581, |
|
"loss": 9.1105, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0004544493364218305, |
|
"loss": 9.0969, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00045596967366771067, |
|
"loss": 9.1014, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00045751082069031036, |
|
"loss": 9.0951, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00045907277072936015, |
|
"loss": 9.0867, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00046065551693333547, |
|
"loss": 9.0872, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00046225905235949306, |
|
"loss": 9.0708, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0004638833699738953, |
|
"loss": 9.0716, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00046552846265143777, |
|
"loss": 9.071, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00046719432317589814, |
|
"loss": 9.0618, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0004688724595049813, |
|
"loss": 9.0518, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0004705797299630679, |
|
"loss": 9.0442, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0004723077461105934, |
|
"loss": 9.0477, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0004740565003675777, |
|
"loss": 9.0397, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0004758259850630858, |
|
"loss": 9.0355, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00047761619243523283, |
|
"loss": 9.0248, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0004794271146312465, |
|
"loss": 9.0137, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00048125874370748105, |
|
"loss": 9.0205, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00048311107162946065, |
|
"loss": 9.0008, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00048498409027191575, |
|
"loss": 8.9975, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0004868682714790542, |
|
"loss": 8.9992, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0004887825434734695, |
|
"loss": 8.9777, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0004907174813103439, |
|
"loss": 8.9871, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0004926730765020346, |
|
"loss": 8.9765, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0004946393879009196, |
|
"loss": 8.9754, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0004966361687980866, |
|
"loss": 8.9678, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.000498653581087638, |
|
"loss": 8.9677, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0005006916159201579, |
|
"loss": 8.9644, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0005027502643557748, |
|
"loss": 8.9642, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0005048295173641828, |
|
"loss": 8.9569, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0005069293658247036, |
|
"loss": 8.9605, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0005090498005263129, |
|
"loss": 8.9431, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.000511190812167682, |
|
"loss": 8.9431, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.000513352391357226, |
|
"loss": 8.9342, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0005155345286131357, |
|
"loss": 8.9324, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0005177372143634305, |
|
"loss": 8.9382, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0005199604389459836, |
|
"loss": 8.9424, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0005222041926085837, |
|
"loss": 8.9157, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0005244684655089597, |
|
"loss": 8.9236, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0005267532477148378, |
|
"loss": 8.9246, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0005290585292039816, |
|
"loss": 8.9268, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0005313842998642265, |
|
"loss": 8.9203, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0005337305494935388, |
|
"loss": 8.9095, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0005360972678000522, |
|
"loss": 8.9061, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0005384724576463773, |
|
"loss": 8.9117, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0005408799798596632, |
|
"loss": 8.9043, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0005433079393890421, |
|
"loss": 8.9012, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0005457563255842242, |
|
"loss": 8.8969, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0005482251277053145, |
|
"loss": 8.9013, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0005507143349228714, |
|
"loss": 8.8912, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0005532239363179401, |
|
"loss": 8.8896, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0005557539208821075, |
|
"loss": 8.8838, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0005583042775175479, |
|
"loss": 8.889, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0005608749950370764, |
|
"loss": 8.888, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0005634530562276738, |
|
"loss": 8.8814, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0005660643599338256, |
|
"loss": 8.8755, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0005686959904843206, |
|
"loss": 8.8667, |
|
"step": 25400 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0005713479363354621, |
|
"loss": 8.8748, |
|
"step": 25600 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.000574020185854441, |
|
"loss": 8.862, |
|
"step": 25800 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0005767127273193853, |
|
"loss": 8.8534, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0005794255489194114, |
|
"loss": 8.8655, |
|
"step": 26200 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0005821586387546804, |
|
"loss": 8.8574, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0005849119848364386, |
|
"loss": 8.8531, |
|
"step": 26600 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0005876855750870848, |
|
"loss": 8.8479, |
|
"step": 26800 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0005904653779220791, |
|
"loss": 8.8405, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0005932793188544346, |
|
"loss": 8.8435, |
|
"step": 27200 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0005961134672522114, |
|
"loss": 8.8425, |
|
"step": 27400 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0005989678106833648, |
|
"loss": 8.8389, |
|
"step": 27600 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0006018423366272695, |
|
"loss": 8.8525, |
|
"step": 27800 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0006047370324747583, |
|
"loss": 8.8273, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0006076518855281984, |
|
"loss": 8.8306, |
|
"step": 28200 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.000610586883001531, |
|
"loss": 8.8437, |
|
"step": 28400 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.000613542012020336, |
|
"loss": 8.8236, |
|
"step": 28600 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0006165172596218869, |
|
"loss": 8.8274, |
|
"step": 28800 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0006194975859987236, |
|
"loss": 8.8275, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0006225129310954997, |
|
"loss": 8.8211, |
|
"step": 29200 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0006255483554239195, |
|
"loss": 8.8177, |
|
"step": 29400 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.000628603845669035, |
|
"loss": 8.8223, |
|
"step": 29600 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0006316793884278832, |
|
"loss": 8.8123, |
|
"step": 29800 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0006347749702095389, |
|
"loss": 8.8107, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0006378905774351747, |
|
"loss": 8.8122, |
|
"step": 30200 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0006410261964381238, |
|
"loss": 8.811, |
|
"step": 30400 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.000644181813463934, |
|
"loss": 8.813, |
|
"step": 30600 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0006473574146704329, |
|
"loss": 8.8057, |
|
"step": 30800 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0006505369586176524, |
|
"loss": 8.8033, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0006537523865622775, |
|
"loss": 8.795, |
|
"step": 31200 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0006569877567060931, |
|
"loss": 8.7938, |
|
"step": 31400 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0006602430548570907, |
|
"loss": 8.7969, |
|
"step": 31600 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.000663518266735847, |
|
"loss": 8.7966, |
|
"step": 31800 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0006668133779755819, |
|
"loss": 8.7936, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0006701283741222287, |
|
"loss": 8.7888, |
|
"step": 32200 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0006734632406344993, |
|
"loss": 8.7829, |
|
"step": 32400 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0006768179628839337, |
|
"loss": 8.7789, |
|
"step": 32600 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0006801925261549872, |
|
"loss": 8.778, |
|
"step": 32800 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0006835698944044951, |
|
"loss": 8.7897, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0006869839962045932, |
|
"loss": 8.779, |
|
"step": 33200 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0006904178944328165, |
|
"loss": 8.7697, |
|
"step": 33400 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0006938715740263026, |
|
"loss": 8.7818, |
|
"step": 33600 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0006973450198354252, |
|
"loss": 8.7667, |
|
"step": 33800 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0007008382166238496, |
|
"loss": 8.7759, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0007043511490686036, |
|
"loss": 8.7797, |
|
"step": 34200 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0007078838017601421, |
|
"loss": 8.7644, |
|
"step": 34400 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0007114361592024231, |
|
"loss": 8.7678, |
|
"step": 34600 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0007150082058129618, |
|
"loss": 8.7672, |
|
"step": 34800 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0007185819184105553, |
|
"loss": 8.7672, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0007221750927446872, |
|
"loss": 8.7573, |
|
"step": 35200 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0007258059161614535, |
|
"loss": 8.7584, |
|
"step": 35400 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0007294563657132755, |
|
"loss": 8.7442, |
|
"step": 35600 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0007331264253873856, |
|
"loss": 8.7595, |
|
"step": 35800 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0007368160790850002, |
|
"loss": 8.7564, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0007405253106213833, |
|
"loss": 8.7517, |
|
"step": 36200 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0007442541037259286, |
|
"loss": 8.7583, |
|
"step": 36400 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0007480024420422077, |
|
"loss": 8.7426, |
|
"step": 36600 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0007517703091280727, |
|
"loss": 8.7519, |
|
"step": 36800 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0007555576884556992, |
|
"loss": 8.7393, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0007593645634116821, |
|
"loss": 8.7262, |
|
"step": 37200 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0007631717371015569, |
|
"loss": 8.7383, |
|
"step": 37400 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0007670174558631893, |
|
"loss": 8.7365, |
|
"step": 37600 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0007708826199846926, |
|
"loss": 8.7385, |
|
"step": 37800 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0007747672125114589, |
|
"loss": 8.7385, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0007786712164036449, |
|
"loss": 8.7164, |
|
"step": 38200 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0007825946145362667, |
|
"loss": 8.7262, |
|
"step": 38400 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0007865373896992697, |
|
"loss": 8.728, |
|
"step": 38600 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0007904995245975929, |
|
"loss": 8.7281, |
|
"step": 38800 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0007944810018512619, |
|
"loss": 8.7179, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.000798481803995452, |
|
"loss": 8.7264, |
|
"step": 39200 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0008025019134805696, |
|
"loss": 8.7229, |
|
"step": 39400 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0008065210677225022, |
|
"loss": 8.7246, |
|
"step": 39600 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.000810579642586285, |
|
"loss": 8.7199, |
|
"step": 39800 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0008146574717236045, |
|
"loss": 8.7209, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0008187545372469861, |
|
"loss": 8.7075, |
|
"step": 40200 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0008228708211845768, |
|
"loss": 8.7101, |
|
"step": 40400 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0008270063054802209, |
|
"loss": 8.7144, |
|
"step": 40600 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0008311609719935404, |
|
"loss": 8.7173, |
|
"step": 40800 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0008353348025000144, |
|
"loss": 8.7183, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0008395277786910574, |
|
"loss": 8.7107, |
|
"step": 41200 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0008437398821741025, |
|
"loss": 8.7113, |
|
"step": 41400 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0008479710944726774, |
|
"loss": 8.7085, |
|
"step": 41600 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0008522000980575213, |
|
"loss": 8.7115, |
|
"step": 41800 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0008564693769110079, |
|
"loss": 8.7055, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0008607577087418623, |
|
"loss": 8.6935, |
|
"step": 42200 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0008650650747392373, |
|
"loss": 8.7042, |
|
"step": 42400 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0008693914560087938, |
|
"loss": 8.6849, |
|
"step": 42600 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0008737368335727785, |
|
"loss": 8.6876, |
|
"step": 42800 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0008781011883701138, |
|
"loss": 8.6922, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0008824845012564749, |
|
"loss": 8.6922, |
|
"step": 43200 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.000886886753004381, |
|
"loss": 8.6853, |
|
"step": 43400 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.000891307924303272, |
|
"loss": 8.6936, |
|
"step": 43600 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0008957257484203587, |
|
"loss": 8.6995, |
|
"step": 43800 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0009001846062028449, |
|
"loss": 8.6841, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0009046623252050388, |
|
"loss": 8.6735, |
|
"step": 44200 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0009091588857853411, |
|
"loss": 8.6888, |
|
"step": 44400 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0009136742682195071, |
|
"loss": 8.6788, |
|
"step": 44600 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0009182084527007278, |
|
"loss": 8.6817, |
|
"step": 44800 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0009227614193397203, |
|
"loss": 8.671, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0009273331481648092, |
|
"loss": 8.6738, |
|
"step": 45200 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0009319236191220222, |
|
"loss": 8.6657, |
|
"step": 45400 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0009365328120751783, |
|
"loss": 8.6599, |
|
"step": 45600 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0009411375208451828, |
|
"loss": 8.6747, |
|
"step": 45800 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0009457840036964961, |
|
"loss": 8.6698, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0009504491477449178, |
|
"loss": 8.6765, |
|
"step": 46200 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0009551329325267026, |
|
"loss": 8.6732, |
|
"step": 46400 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0009598353374963477, |
|
"loss": 8.6654, |
|
"step": 46600 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0009645563420266623, |
|
"loss": 8.6614, |
|
"step": 46800 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0009692959254088748, |
|
"loss": 8.6672, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0009740540668527146, |
|
"loss": 8.6508, |
|
"step": 47200 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0009788307454865058, |
|
"loss": 8.6533, |
|
"step": 47400 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0009836259403572592, |
|
"loss": 8.656, |
|
"step": 47600 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0009884155160084767, |
|
"loss": 8.65, |
|
"step": 47800 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0009932475878516138, |
|
"loss": 8.6559, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0009980981126919714, |
|
"loss": 8.646, |
|
"step": 48200 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0010029670692526266, |
|
"loss": 8.6504, |
|
"step": 48400 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.001007854436175815, |
|
"loss": 8.6437, |
|
"step": 48600 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00101276019202301, |
|
"loss": 8.6483, |
|
"step": 48800 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0010176843152750244, |
|
"loss": 8.6381, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.001022602026387454, |
|
"loss": 8.6419, |
|
"step": 49200 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0010275627280027944, |
|
"loss": 8.6381, |
|
"step": 49400 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0010325417320913577, |
|
"loss": 8.6274, |
|
"step": 49600 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0010375390168126473, |
|
"loss": 8.6264, |
|
"step": 49800 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0010425545602459826, |
|
"loss": 8.6279, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0010475883403905893, |
|
"loss": 8.636, |
|
"step": 50200 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.001052640335165696, |
|
"loss": 8.6305, |
|
"step": 50400 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.001057710522410639, |
|
"loss": 8.6259, |
|
"step": 50600 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.001062798879884943, |
|
"loss": 8.6288, |
|
"step": 50800 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0010679053852684361, |
|
"loss": 8.6286, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0010730300161613388, |
|
"loss": 8.6203, |
|
"step": 51200 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0010781469914207427, |
|
"loss": 8.618, |
|
"step": 51400 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0010833077154690767, |
|
"loss": 8.6183, |
|
"step": 51600 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0010884864974642153, |
|
"loss": 8.6244, |
|
"step": 51800 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0010936833146893334, |
|
"loss": 8.6129, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.001098898144348496, |
|
"loss": 8.622, |
|
"step": 52200 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.001104130963566756, |
|
"loss": 8.6104, |
|
"step": 52400 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.001109381749390256, |
|
"loss": 8.603, |
|
"step": 52600 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00111465047878633, |
|
"loss": 8.6093, |
|
"step": 52800 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.001119937128643592, |
|
"loss": 8.5969, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0011252416757720606, |
|
"loss": 8.5992, |
|
"step": 53200 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0011305374403745901, |
|
"loss": 8.6047, |
|
"step": 53400 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.001135877622966507, |
|
"loss": 8.5958, |
|
"step": 53600 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.001141235632906355, |
|
"loss": 8.5948, |
|
"step": 53800 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0011466114466911256, |
|
"loss": 8.5896, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.001152005040739713, |
|
"loss": 8.5887, |
|
"step": 54200 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0011574163913930131, |
|
"loss": 8.5862, |
|
"step": 54400 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0011628454749140395, |
|
"loss": 8.5949, |
|
"step": 54600 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0011682922674880192, |
|
"loss": 8.588, |
|
"step": 54800 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0011737567452224911, |
|
"loss": 8.5918, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0011792388841474245, |
|
"loss": 8.5904, |
|
"step": 55200 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0011847111175024606, |
|
"loss": 8.5739, |
|
"step": 55400 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0011902284185834888, |
|
"loss": 8.5756, |
|
"step": 55600 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0011957633086016797, |
|
"loss": 8.568, |
|
"step": 55800 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0012013157632781366, |
|
"loss": 8.5696, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00120688575825691, |
|
"loss": 8.5768, |
|
"step": 56200 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0012124732691051188, |
|
"loss": 8.5696, |
|
"step": 56400 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0012180782713130424, |
|
"loss": 8.5687, |
|
"step": 56600 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0012237007402942333, |
|
"loss": 8.56, |
|
"step": 56800 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0012293406513856284, |
|
"loss": 8.56, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0012349979798476525, |
|
"loss": 8.5602, |
|
"step": 57200 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0012406442840364133, |
|
"loss": 8.5551, |
|
"step": 57400 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0012463362859392122, |
|
"loss": 8.5556, |
|
"step": 57600 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0012520456306609733, |
|
"loss": 8.5508, |
|
"step": 57800 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0012577722931575563, |
|
"loss": 8.549, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0012634874855592566, |
|
"loss": 8.5582, |
|
"step": 58200 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.001269248621894795, |
|
"loss": 8.554, |
|
"step": 58400 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0012750270005439136, |
|
"loss": 8.5453, |
|
"step": 58600 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0012808225961596451, |
|
"loss": 8.5545, |
|
"step": 58800 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0012866353833195041, |
|
"loss": 8.543, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0012924653365255934, |
|
"loss": 8.5454, |
|
"step": 59200 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.001298312430204715, |
|
"loss": 8.5496, |
|
"step": 59400 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0013041766387084808, |
|
"loss": 8.5348, |
|
"step": 59600 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0013100579363134381, |
|
"loss": 8.5311, |
|
"step": 59800 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.001315956297221161, |
|
"loss": 8.5378, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0013218716955583822, |
|
"loss": 8.5304, |
|
"step": 60200 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0013278041053770978, |
|
"loss": 8.5166, |
|
"step": 60400 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0013337237114701053, |
|
"loss": 8.5324, |
|
"step": 60600 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.001339689981377656, |
|
"loss": 8.5196, |
|
"step": 60800 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0013456731846064624, |
|
"loss": 8.5191, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0013516732949110932, |
|
"loss": 8.5285, |
|
"step": 61200 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0013576902859719474, |
|
"loss": 8.5143, |
|
"step": 61400 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0013637241313953895, |
|
"loss": 8.519, |
|
"step": 61600 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0013697748047138431, |
|
"loss": 8.4964, |
|
"step": 61800 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0013758422793859176, |
|
"loss": 8.5216, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.001381926528796519, |
|
"loss": 8.5098, |
|
"step": 62200 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0013880275262569807, |
|
"loss": 8.511, |
|
"step": 62400 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0013941452450051631, |
|
"loss": 8.5124, |
|
"step": 62600 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.001400248944656608, |
|
"loss": 8.5054, |
|
"step": 62800 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0014063999421298785, |
|
"loss": 8.5016, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0014125675802999262, |
|
"loss": 8.501, |
|
"step": 63200 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.001418751832112295, |
|
"loss": 8.505, |
|
"step": 63400 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0014249526704396467, |
|
"loss": 8.4953, |
|
"step": 63600 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0014311700680818915, |
|
"loss": 8.5074, |
|
"step": 63800 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0014374039977662987, |
|
"loss": 8.4991, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0014436544321476206, |
|
"loss": 8.5087, |
|
"step": 64200 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0014499213438082127, |
|
"loss": 8.4871, |
|
"step": 64400 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0014562047052581514, |
|
"loss": 8.4954, |
|
"step": 64600 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0014624729492123557, |
|
"loss": 8.4791, |
|
"step": 64800 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0014687890455785963, |
|
"loss": 8.4853, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0014751215089706584, |
|
"loss": 8.4855, |
|
"step": 65200 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0014814703116110776, |
|
"loss": 8.4707, |
|
"step": 65400 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.001487835425650709, |
|
"loss": 8.4743, |
|
"step": 65600 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.001494216823168866, |
|
"loss": 8.4717, |
|
"step": 65800 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0015006144761734279, |
|
"loss": 8.4823, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.001507028356600975, |
|
"loss": 8.4708, |
|
"step": 66200 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0015134584363168998, |
|
"loss": 8.4649, |
|
"step": 66400 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.001519904687115537, |
|
"loss": 8.4695, |
|
"step": 66600 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0015263347286438994, |
|
"loss": 8.4759, |
|
"step": 66800 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0015328131562056986, |
|
"loss": 8.4655, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0015393076699503766, |
|
"loss": 8.4752, |
|
"step": 67200 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0015458182413896245, |
|
"loss": 8.4535, |
|
"step": 67400 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.001552344841964707, |
|
"loss": 8.4535, |
|
"step": 67600 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0015588874430465648, |
|
"loss": 8.4519, |
|
"step": 67800 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.001565446015935959, |
|
"loss": 8.4568, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00157202053186359, |
|
"loss": 8.4524, |
|
"step": 68200 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0015786109619902212, |
|
"loss": 8.4589, |
|
"step": 68400 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0015852172774068075, |
|
"loss": 8.4559, |
|
"step": 68600 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0015918062988814347, |
|
"loss": 8.446, |
|
"step": 68800 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0015984442188082624, |
|
"loss": 8.439, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0016050979370261006, |
|
"loss": 8.4504, |
|
"step": 69200 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0016117674243482875, |
|
"loss": 8.4487, |
|
"step": 69400 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0016184526515189961, |
|
"loss": 8.4472, |
|
"step": 69600 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0016251535892133542, |
|
"loss": 8.433, |
|
"step": 69800 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.001631870208037572, |
|
"loss": 8.426, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0016386024785290804, |
|
"loss": 8.4311, |
|
"step": 70200 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0016453503711566474, |
|
"loss": 8.432, |
|
"step": 70400 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0016520800001573153, |
|
"loss": 8.4282, |
|
"step": 70600 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0016588589704489114, |
|
"loss": 8.4386, |
|
"step": 70800 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0016656534740210893, |
|
"loss": 8.4158, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0016724634810696363, |
|
"loss": 8.4242, |
|
"step": 71200 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0016792889617223312, |
|
"loss": 8.4279, |
|
"step": 71400 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0016861298860390735, |
|
"loss": 8.4242, |
|
"step": 71600 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0016929862240120247, |
|
"loss": 8.4271, |
|
"step": 71800 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0016998579455657307, |
|
"loss": 8.4265, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0017067450205572581, |
|
"loss": 8.421, |
|
"step": 72200 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0017136474187763266, |
|
"loss": 8.4156, |
|
"step": 72400 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0017205304834985446, |
|
"loss": 8.421, |
|
"step": 72600 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0017274633610356825, |
|
"loss": 8.4256, |
|
"step": 72800 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0017344114709189774, |
|
"loss": 8.4191, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0017413747826704132, |
|
"loss": 8.4015, |
|
"step": 73200 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.001748318335641869, |
|
"loss": 8.4129, |
|
"step": 73400 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0017553118838016506, |
|
"loss": 8.4179, |
|
"step": 73600 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0017623205421495314, |
|
"loss": 8.4142, |
|
"step": 73800 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0017693442799418986, |
|
"loss": 8.4005, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0017763830663689965, |
|
"loss": 8.41, |
|
"step": 74200 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0017834368705550597, |
|
"loss": 8.4162, |
|
"step": 74400 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.001790505661558443, |
|
"loss": 8.4081, |
|
"step": 74600 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0017975894083717692, |
|
"loss": 8.4027, |
|
"step": 74800 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0018046880799220469, |
|
"loss": 8.4097, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0018118016450708232, |
|
"loss": 8.4077, |
|
"step": 75200 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0018189300726143137, |
|
"loss": 8.4086, |
|
"step": 75400 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0018260375781495742, |
|
"loss": 8.4084, |
|
"step": 75600 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.001833195562689592, |
|
"loss": 8.4166, |
|
"step": 75800 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0018403683157795104, |
|
"loss": 8.4121, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0018475558059559121, |
|
"loss": 8.404, |
|
"step": 76200 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.001854758001690741, |
|
"loss": 8.4035, |
|
"step": 76400 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0018619748713914318, |
|
"loss": 8.4044, |
|
"step": 76600 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0018692063834010522, |
|
"loss": 8.3948, |
|
"step": 76800 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0018764525059984417, |
|
"loss": 8.3935, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.001883713207398349, |
|
"loss": 8.4028, |
|
"step": 77200 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0018909884557515733, |
|
"loss": 8.401, |
|
"step": 77400 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0018982417342748425, |
|
"loss": 8.4085, |
|
"step": 77600 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0019055459083963232, |
|
"loss": 8.3979, |
|
"step": 77800 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00191282790468081, |
|
"loss": 8.4027, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0019201608770511077, |
|
"loss": 8.401, |
|
"step": 78200 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.001927508236496343, |
|
"loss": 8.4054, |
|
"step": 78400 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0019348699507871943, |
|
"loss": 8.408, |
|
"step": 78600 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0019422459876313608, |
|
"loss": 8.4064, |
|
"step": 78800 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0019496363146737205, |
|
"loss": 8.4066, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.001957040899496469, |
|
"loss": 8.4061, |
|
"step": 79200 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0019644597096192574, |
|
"loss": 8.411, |
|
"step": 79400 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00197189271249934, |
|
"loss": 8.3999, |
|
"step": 79600 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.001979339875531708, |
|
"loss": 8.4002, |
|
"step": 79800 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.001986801166049247, |
|
"loss": 8.4058, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0019942391393900083, |
|
"loss": 8.4172, |
|
"step": 80200 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.002001728516400637, |
|
"loss": 8.4089, |
|
"step": 80400 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.002009231922688247, |
|
"loss": 8.4087, |
|
"step": 80600 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.002016749325339009, |
|
"loss": 8.4115, |
|
"step": 80800 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0020242806913776997, |
|
"loss": 8.4317, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0020318259877678373, |
|
"loss": 8.4049, |
|
"step": 81200 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.002039385181411845, |
|
"loss": 8.4129, |
|
"step": 81400 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.002046958239151178, |
|
"loss": 8.424, |
|
"step": 81600 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0020545451277664776, |
|
"loss": 8.4146, |
|
"step": 81800 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0020621458139777164, |
|
"loss": 8.4233, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.002069722158008656, |
|
"loss": 8.4199, |
|
"step": 82200 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0020773502707586607, |
|
"loss": 8.4134, |
|
"step": 82400 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0020849920810694245, |
|
"loss": 8.415, |
|
"step": 82600 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0020926475554200047, |
|
"loss": 8.4301, |
|
"step": 82800 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0021003166602295217, |
|
"loss": 8.4128, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.002107999361857309, |
|
"loss": 8.4284, |
|
"step": 83200 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.002115695626603048, |
|
"loss": 8.422, |
|
"step": 83400 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.002123405420706933, |
|
"loss": 8.4173, |
|
"step": 83600 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.002131128710349813, |
|
"loss": 8.4245, |
|
"step": 83800 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.002138865461653332, |
|
"loss": 8.427, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0021465768564397046, |
|
"loss": 8.4287, |
|
"step": 84200 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.002154340362309423, |
|
"loss": 8.4361, |
|
"step": 84400 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.002162078310516678, |
|
"loss": 8.4299, |
|
"step": 84600 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0021698684354139377, |
|
"loss": 8.4447, |
|
"step": 84800 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0021776718520393184, |
|
"loss": 8.4399, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0021854885261629875, |
|
"loss": 8.4469, |
|
"step": 85200 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0021933184234969594, |
|
"loss": 8.4328, |
|
"step": 85400 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0022011615096952444, |
|
"loss": 8.4504, |
|
"step": 85600 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.002209017750354, |
|
"loss": 8.4383, |
|
"step": 85800 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0022168871110116815, |
|
"loss": 8.4472, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0022247695571491945, |
|
"loss": 8.448, |
|
"step": 86200 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0022326650541900405, |
|
"loss": 8.4451, |
|
"step": 86400 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0022405339926133165, |
|
"loss": 8.4415, |
|
"step": 86600 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0022484554226809986, |
|
"loss": 8.4439, |
|
"step": 86800 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0022563897997535266, |
|
"loss": 8.4497, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0022643370890266133, |
|
"loss": 8.452, |
|
"step": 87200 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0022722972556393217, |
|
"loss": 8.4677, |
|
"step": 87400 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0022802702646742383, |
|
"loss": 8.4595, |
|
"step": 87600 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.002288256081157608, |
|
"loss": 8.4595, |
|
"step": 87800 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.002296254670059502, |
|
"loss": 8.4642, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0023042659962939603, |
|
"loss": 8.4709, |
|
"step": 88200 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0023122900247191545, |
|
"loss": 8.4679, |
|
"step": 88400 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.002320286505209589, |
|
"loss": 8.4603, |
|
"step": 88600 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0023283357692971242, |
|
"loss": 8.4662, |
|
"step": 88800 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.002336397629992889, |
|
"loss": 8.4653, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.002344472051933384, |
|
"loss": 8.4833, |
|
"step": 89200 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.002352558999700007, |
|
"loss": 8.4974, |
|
"step": 89400 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.002360658437819213, |
|
"loss": 8.4881, |
|
"step": 89600 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0023687703307626647, |
|
"loss": 8.4878, |
|
"step": 89800 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0023768946429473976, |
|
"loss": 8.4846, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.002385031338735963, |
|
"loss": 8.4866, |
|
"step": 90200 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0023931803824365962, |
|
"loss": 8.4847, |
|
"step": 90400 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.002401300900956714, |
|
"loss": 8.4934, |
|
"step": 90600 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.002409474471896992, |
|
"loss": 8.4872, |
|
"step": 90800 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0024176602835290807, |
|
"loss": 8.4977, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0024258582999457665, |
|
"loss": 8.4967, |
|
"step": 91200 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0024340684851863, |
|
"loss": 8.505, |
|
"step": 91400 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.002442290803236551, |
|
"loss": 8.5126, |
|
"step": 91600 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0024505252180291688, |
|
"loss": 8.5033, |
|
"step": 91800 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0024587304311256865, |
|
"loss": 8.513, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0024669888709567232, |
|
"loss": 8.5082, |
|
"step": 92200 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0024752592991915973, |
|
"loss": 8.517, |
|
"step": 92400 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0024835416795519205, |
|
"loss": 8.5293, |
|
"step": 92600 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.002491835975706881, |
|
"loss": 8.5094, |
|
"step": 92800 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0025001421512733943, |
|
"loss": 8.5139, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0025084601698162666, |
|
"loss": 8.5099, |
|
"step": 93200 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0025167899948483575, |
|
"loss": 8.5185, |
|
"step": 93400 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0025251315898307336, |
|
"loss": 8.5143, |
|
"step": 93600 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.002533484918172837, |
|
"loss": 8.5277, |
|
"step": 93800 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0025418499432326358, |
|
"loss": 8.5231, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.002550184715947826, |
|
"loss": 8.5436, |
|
"step": 94200 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0025585729662869474, |
|
"loss": 8.5373, |
|
"step": 94400 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.002566972803294579, |
|
"loss": 8.5347, |
|
"step": 94600 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00257538419012468, |
|
"loss": 8.5544, |
|
"step": 94800 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0025838070898805453, |
|
"loss": 8.5339, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.002592241465614974, |
|
"loss": 8.5405, |
|
"step": 95200 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.002600687280330416, |
|
"loss": 8.5501, |
|
"step": 95400 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0026091444969791513, |
|
"loss": 8.5344, |
|
"step": 95600 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.002617613078463441, |
|
"loss": 8.5477, |
|
"step": 95800 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.002626092987635699, |
|
"loss": 8.5443, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.002634541703276827, |
|
"loss": 8.5398, |
|
"step": 96200 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.002643044100010169, |
|
"loss": 8.5523, |
|
"step": 96400 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.002651557712877833, |
|
"loss": 8.5562, |
|
"step": 96600 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0026600825045346955, |
|
"loss": 8.5525, |
|
"step": 96800 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0026686184375866043, |
|
"loss": 8.5728, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.002677165474590528, |
|
"loss": 8.5631, |
|
"step": 97200 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.002685723578054729, |
|
"loss": 8.5658, |
|
"step": 97400 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0026942927104389334, |
|
"loss": 8.566, |
|
"step": 97600 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.002702872834154482, |
|
"loss": 8.5716, |
|
"step": 97800 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0027114639115645017, |
|
"loss": 8.5697, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.002720022867925799, |
|
"loss": 8.5726, |
|
"step": 98200 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0027286356853246747, |
|
"loss": 8.5718, |
|
"step": 98400 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0027372593434088002, |
|
"loss": 8.5716, |
|
"step": 98600 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.002745893804350339, |
|
"loss": 8.5767, |
|
"step": 98800 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00275453903027407, |
|
"loss": 8.5957, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0027631949832575475, |
|
"loss": 8.5881, |
|
"step": 99200 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.002771861625331276, |
|
"loss": 8.5835, |
|
"step": 99400 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.002780495505581529, |
|
"loss": 8.5905, |
|
"step": 99600 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.002789183358769584, |
|
"loss": 8.5938, |
|
"step": 99800 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0027978817870494, |
|
"loss": 8.5906, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0028065907522651585, |
|
"loss": 8.5938, |
|
"step": 100200 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.002815310216214826, |
|
"loss": 8.5887, |
|
"step": 100400 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00282404014065031, |
|
"loss": 8.5922, |
|
"step": 100600 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0028327804872776367, |
|
"loss": 8.5926, |
|
"step": 100800 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.002841531217757113, |
|
"loss": 8.5978, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0028502922937035, |
|
"loss": 8.5984, |
|
"step": 101200 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0028590636766861726, |
|
"loss": 8.6046, |
|
"step": 101400 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0028678453282293013, |
|
"loss": 8.6093, |
|
"step": 101600 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0028766372098120076, |
|
"loss": 8.6083, |
|
"step": 101800 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0028854392828685377, |
|
"loss": 8.6057, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0028942515087884407, |
|
"loss": 8.6146, |
|
"step": 102200 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00290307384891672, |
|
"loss": 8.608, |
|
"step": 102400 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00291190626455402, |
|
"loss": 8.6081, |
|
"step": 102600 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0029207044797924615, |
|
"loss": 8.6164, |
|
"step": 102800 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0029295568802797795, |
|
"loss": 8.6008, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0029384192401078115, |
|
"loss": 8.6166, |
|
"step": 103200 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00294729152040165, |
|
"loss": 8.5962, |
|
"step": 103400 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.002956173682242877, |
|
"loss": 8.6129, |
|
"step": 103600 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.002965065686669722, |
|
"loss": 8.6092, |
|
"step": 103800 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0029739674946772463, |
|
"loss": 8.6189, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.002982879067217503, |
|
"loss": 8.612, |
|
"step": 104200 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0029918003651997144, |
|
"loss": 8.6135, |
|
"step": 104400 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.003000731349490442, |
|
"loss": 8.6182, |
|
"step": 104600 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0030096719809137584, |
|
"loss": 8.6423, |
|
"step": 104800 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.003018622220251419, |
|
"loss": 8.6145, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0030275372054660438, |
|
"loss": 8.6249, |
|
"step": 105200 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0030365064952603237, |
|
"loss": 8.6265, |
|
"step": 105400 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0030454852752588536, |
|
"loss": 8.6304, |
|
"step": 105600 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0030544735060760494, |
|
"loss": 8.6309, |
|
"step": 105800 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0030634711482848704, |
|
"loss": 8.6258, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.003072478162416994, |
|
"loss": 8.6328, |
|
"step": 106200 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.003081494508962985, |
|
"loss": 8.6298, |
|
"step": 106400 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0030905201483724717, |
|
"loss": 8.639, |
|
"step": 106600 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0030995550410543226, |
|
"loss": 8.6212, |
|
"step": 106800 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0031085991473768114, |
|
"loss": 8.6374, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.003117652427667799, |
|
"loss": 8.6326, |
|
"step": 107200 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0031267148422149046, |
|
"loss": 8.6291, |
|
"step": 107400 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.003135740971163656, |
|
"loss": 8.6375, |
|
"step": 107600 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0031448214897512507, |
|
"loss": 8.6226, |
|
"step": 107800 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.003153911023417371, |
|
"loss": 8.6359, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.003163009532290608, |
|
"loss": 8.6491, |
|
"step": 108200 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0031721169764601844, |
|
"loss": 8.6405, |
|
"step": 108400 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0031812333159761293, |
|
"loss": 8.632, |
|
"step": 108600 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.003190358510849451, |
|
"loss": 8.6363, |
|
"step": 108800 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0031994925210523124, |
|
"loss": 8.6316, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0032086353065182106, |
|
"loss": 8.6423, |
|
"step": 109200 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.003217786827142146, |
|
"loss": 8.6274, |
|
"step": 109400 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.003226947042780804, |
|
"loss": 8.6366, |
|
"step": 109600 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.003236070047437989, |
|
"loss": 8.6388, |
|
"step": 109800 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.003245247489550804, |
|
"loss": 8.6364, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0032544335062216403, |
|
"loss": 8.6422, |
|
"step": 110200 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0032636280571558636, |
|
"loss": 8.618, |
|
"step": 110400 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.003272831102021408, |
|
"loss": 8.6276, |
|
"step": 110600 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.003282042600448948, |
|
"loss": 8.6454, |
|
"step": 110800 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0032912625120320753, |
|
"loss": 8.6388, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0033004907963274733, |
|
"loss": 8.6339, |
|
"step": 111200 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.003309727412855108, |
|
"loss": 8.6243, |
|
"step": 111400 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0033189723210983865, |
|
"loss": 8.6264, |
|
"step": 111600 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0033282254805043487, |
|
"loss": 8.6401, |
|
"step": 111800 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.003337440523277331, |
|
"loss": 8.6366, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0033467100224565524, |
|
"loss": 8.6338, |
|
"step": 112200 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.003355987651126521, |
|
"loss": 8.6377, |
|
"step": 112400 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0033652733685907424, |
|
"loss": 8.6414, |
|
"step": 112600 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0033745671341172496, |
|
"loss": 8.6264, |
|
"step": 112800 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0033838689069387654, |
|
"loss": 8.6289, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00339317864625289, |
|
"loss": 8.6244, |
|
"step": 113200 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.003402496311222283, |
|
"loss": 8.6287, |
|
"step": 113400 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0034118218609748346, |
|
"loss": 8.6251, |
|
"step": 113600 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.003421155254603846, |
|
"loss": 8.6214, |
|
"step": 113800 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0034304964511682147, |
|
"loss": 8.6303, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0034398454096926092, |
|
"loss": 8.6369, |
|
"step": 114200 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.003449202089167651, |
|
"loss": 8.6236, |
|
"step": 114400 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0034585196077173436, |
|
"loss": 8.6251, |
|
"step": 114600 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.003467891567838331, |
|
"loss": 8.6295, |
|
"step": 114800 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.003477271125884973, |
|
"loss": 8.6219, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0034866582407136653, |
|
"loss": 8.6271, |
|
"step": 115200 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.003496052871147656, |
|
"loss": 8.6372, |
|
"step": 115400 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0035054549759772242, |
|
"loss": 8.6238, |
|
"step": 115600 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0035148645139598637, |
|
"loss": 8.6207, |
|
"step": 115800 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0035242814438204637, |
|
"loss": 8.6099, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0035337057242514833, |
|
"loss": 8.6142, |
|
"step": 116200 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0035431373139131472, |
|
"loss": 8.6033, |
|
"step": 116400 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0035525761714336104, |
|
"loss": 8.6178, |
|
"step": 116600 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0035619750070819923, |
|
"loss": 8.6138, |
|
"step": 116800 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0035714282402552104, |
|
"loss": 8.6143, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0035808886171885554, |
|
"loss": 8.6034, |
|
"step": 117200 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0035903560963839124, |
|
"loss": 8.6156, |
|
"step": 117400 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0035998306363120057, |
|
"loss": 8.6148, |
|
"step": 117600 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0036093121954125906, |
|
"loss": 8.6039, |
|
"step": 117800 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.003618800732094636, |
|
"loss": 8.6107, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0036282962047364973, |
|
"loss": 8.6094, |
|
"step": 118200 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0036377985716861084, |
|
"loss": 8.616, |
|
"step": 118400 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.003647307791261164, |
|
"loss": 8.6135, |
|
"step": 118600 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.003656823821749292, |
|
"loss": 8.6062, |
|
"step": 118800 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0036662989906407328, |
|
"loss": 8.6029, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0036758284841655496, |
|
"loss": 8.6011, |
|
"step": 119200 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0036853646634968946, |
|
"loss": 8.5993, |
|
"step": 119400 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.003694907486804143, |
|
"loss": 8.6029, |
|
"step": 119600 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00370445691222752, |
|
"loss": 8.6018, |
|
"step": 119800 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.003714012897878298, |
|
"loss": 8.5978, |
|
"step": 120000 |
|
} |
|
], |
|
"max_steps": 1000000, |
|
"num_train_epochs": 5, |
|
"total_flos": 1.9125959786496e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|