Bingsu's picture
Training in progress, step 120000
b97e6d1
raw
history blame
73.4 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.51566991968441,
"global_step": 120000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 0.0004000105276443632,
"loss": 10.1199,
"step": 200
},
{
"epoch": 0.0,
"learning_rate": 0.00040004211053127486,
"loss": 9.997,
"step": 400
},
{
"epoch": 0.0,
"learning_rate": 0.000400094748522194,
"loss": 9.9386,
"step": 600
},
{
"epoch": 0.0,
"learning_rate": 0.00040016844138622554,
"loss": 9.8988,
"step": 800
},
{
"epoch": 0.0,
"learning_rate": 0.0004002631888001141,
"loss": 9.8579,
"step": 1000
},
{
"epoch": 0.01,
"learning_rate": 0.0004003789903482477,
"loss": 9.8159,
"step": 1200
},
{
"epoch": 0.01,
"learning_rate": 0.0004005158455226594,
"loss": 9.7867,
"step": 1400
},
{
"epoch": 0.01,
"learning_rate": 0.0004006737537230326,
"loss": 9.7605,
"step": 1600
},
{
"epoch": 0.01,
"learning_rate": 0.0004008527142566991,
"loss": 9.7357,
"step": 1800
},
{
"epoch": 0.01,
"learning_rate": 0.0004010527263386479,
"loss": 9.7138,
"step": 2000
},
{
"epoch": 0.01,
"learning_rate": 0.00040127378909152016,
"loss": 9.6894,
"step": 2200
},
{
"epoch": 0.01,
"learning_rate": 0.000401515901545621,
"loss": 9.6634,
"step": 2400
},
{
"epoch": 0.01,
"learning_rate": 0.00040177906263891804,
"loss": 9.6451,
"step": 2600
},
{
"epoch": 0.01,
"learning_rate": 0.00040206327121705167,
"loss": 9.6279,
"step": 2800
},
{
"epoch": 0.01,
"learning_rate": 0.00040236852603333685,
"loss": 9.6038,
"step": 3000
},
{
"epoch": 0.01,
"learning_rate": 0.0004026948257487631,
"loss": 9.5874,
"step": 3200
},
{
"epoch": 0.01,
"learning_rate": 0.00040304216893201697,
"loss": 9.5729,
"step": 3400
},
{
"epoch": 0.02,
"learning_rate": 0.0004034105540594666,
"loss": 9.547,
"step": 3600
},
{
"epoch": 0.02,
"learning_rate": 0.0004037999795151858,
"loss": 9.5348,
"step": 3800
},
{
"epoch": 0.02,
"learning_rate": 0.0004042104435909525,
"loss": 9.5207,
"step": 4000
},
{
"epoch": 0.02,
"learning_rate": 0.0004046419444862573,
"loss": 9.5061,
"step": 4200
},
{
"epoch": 0.02,
"learning_rate": 0.0004050944803083139,
"loss": 9.493,
"step": 4400
},
{
"epoch": 0.02,
"learning_rate": 0.0004055680490720661,
"loss": 9.4782,
"step": 4600
},
{
"epoch": 0.02,
"learning_rate": 0.0004060626487001964,
"loss": 9.4636,
"step": 4800
},
{
"epoch": 0.02,
"learning_rate": 0.0004065782770231313,
"loss": 9.4546,
"step": 5000
},
{
"epoch": 0.02,
"learning_rate": 0.000407114931779062,
"loss": 9.4453,
"step": 5200
},
{
"epoch": 0.02,
"learning_rate": 0.00040767261061393917,
"loss": 9.4174,
"step": 5400
},
{
"epoch": 0.02,
"learning_rate": 0.00040825131108149573,
"loss": 9.4159,
"step": 5600
},
{
"epoch": 0.02,
"learning_rate": 0.00040885103064325357,
"loss": 9.3993,
"step": 5800
},
{
"epoch": 0.03,
"learning_rate": 0.00040947176666852707,
"loss": 9.3953,
"step": 6000
},
{
"epoch": 0.03,
"learning_rate": 0.00041011351643444917,
"loss": 9.3854,
"step": 6200
},
{
"epoch": 0.03,
"learning_rate": 0.0004107762771259713,
"loss": 9.3679,
"step": 6400
},
{
"epoch": 0.03,
"learning_rate": 0.0004114600458358809,
"loss": 9.3595,
"step": 6600
},
{
"epoch": 0.03,
"learning_rate": 0.00041216481956481664,
"loss": 9.3504,
"step": 6800
},
{
"epoch": 0.03,
"learning_rate": 0.00041289059522127414,
"loss": 9.3417,
"step": 7000
},
{
"epoch": 0.03,
"learning_rate": 0.0004136373696216229,
"loss": 9.3275,
"step": 7200
},
{
"epoch": 0.03,
"learning_rate": 0.0004144051394901274,
"loss": 9.3201,
"step": 7400
},
{
"epoch": 0.03,
"learning_rate": 0.0004151939014589469,
"loss": 9.3123,
"step": 7600
},
{
"epoch": 0.03,
"learning_rate": 0.0004160036520681667,
"loss": 9.3084,
"step": 7800
},
{
"epoch": 0.03,
"learning_rate": 0.0004168343877657965,
"loss": 9.2954,
"step": 8000
},
{
"epoch": 0.04,
"learning_rate": 0.00041768179413688954,
"loss": 9.2862,
"step": 8200
},
{
"epoch": 0.04,
"learning_rate": 0.00041855438410810103,
"loss": 9.283,
"step": 8400
},
{
"epoch": 0.04,
"learning_rate": 0.00041944794797888797,
"loss": 9.2711,
"step": 8600
},
{
"epoch": 0.04,
"learning_rate": 0.00042036248182962185,
"loss": 9.2726,
"step": 8800
},
{
"epoch": 0.04,
"learning_rate": 0.0004212979816486783,
"loss": 9.2621,
"step": 9000
},
{
"epoch": 0.04,
"learning_rate": 0.00042225444333247354,
"loss": 9.2527,
"step": 9200
},
{
"epoch": 0.04,
"learning_rate": 0.0004232318626854678,
"loss": 9.2453,
"step": 9400
},
{
"epoch": 0.04,
"learning_rate": 0.0004242302354201949,
"loss": 9.2314,
"step": 9600
},
{
"epoch": 0.04,
"learning_rate": 0.000425249557157276,
"loss": 9.2337,
"step": 9800
},
{
"epoch": 0.04,
"learning_rate": 0.00042628982342543184,
"loss": 9.2276,
"step": 10000
},
{
"epoch": 0.04,
"learning_rate": 0.0004273456715498305,
"loss": 9.2181,
"step": 10200
},
{
"epoch": 0.04,
"learning_rate": 0.00042842770843401837,
"loss": 9.2142,
"step": 10400
},
{
"epoch": 0.05,
"learning_rate": 0.0004295306759082608,
"loss": 9.2052,
"step": 10600
},
{
"epoch": 0.05,
"learning_rate": 0.00043065456913437584,
"loss": 9.1994,
"step": 10800
},
{
"epoch": 0.05,
"learning_rate": 0.00043179938318238693,
"loss": 9.2017,
"step": 11000
},
{
"epoch": 0.05,
"learning_rate": 0.0004329651130305402,
"loss": 9.1991,
"step": 11200
},
{
"epoch": 0.05,
"learning_rate": 0.0004341517535653445,
"loss": 9.1921,
"step": 11400
},
{
"epoch": 0.05,
"learning_rate": 0.00043535929958157804,
"loss": 9.1786,
"step": 11600
},
{
"epoch": 0.05,
"learning_rate": 0.0004365877457823183,
"loss": 9.1766,
"step": 11800
},
{
"epoch": 0.05,
"learning_rate": 0.00043783708677896244,
"loss": 9.1614,
"step": 12000
},
{
"epoch": 0.05,
"learning_rate": 0.0004391073170912519,
"loss": 9.1717,
"step": 12200
},
{
"epoch": 0.05,
"learning_rate": 0.0004403984311473017,
"loss": 9.1551,
"step": 12400
},
{
"epoch": 0.05,
"learning_rate": 0.0004417104232836127,
"loss": 9.1542,
"step": 12600
},
{
"epoch": 0.06,
"learning_rate": 0.00044304328774510786,
"loss": 9.1525,
"step": 12800
},
{
"epoch": 0.06,
"learning_rate": 0.000444397018685155,
"loss": 9.1443,
"step": 13000
},
{
"epoch": 0.06,
"learning_rate": 0.00044577161016558405,
"loss": 9.1301,
"step": 13200
},
{
"epoch": 0.06,
"learning_rate": 0.0004471670561567286,
"loss": 9.1343,
"step": 13400
},
{
"epoch": 0.06,
"learning_rate": 0.00044858335053743655,
"loss": 9.1287,
"step": 13600
},
{
"epoch": 0.06,
"learning_rate": 0.0004500204870951062,
"loss": 9.1189,
"step": 13800
},
{
"epoch": 0.06,
"learning_rate": 0.00045147845952571257,
"loss": 9.1171,
"step": 14000
},
{
"epoch": 0.06,
"learning_rate": 0.0004529498156216581,
"loss": 9.1105,
"step": 14200
},
{
"epoch": 0.06,
"learning_rate": 0.0004544493364218305,
"loss": 9.0969,
"step": 14400
},
{
"epoch": 0.06,
"learning_rate": 0.00045596967366771067,
"loss": 9.1014,
"step": 14600
},
{
"epoch": 0.06,
"learning_rate": 0.00045751082069031036,
"loss": 9.0951,
"step": 14800
},
{
"epoch": 0.06,
"learning_rate": 0.00045907277072936015,
"loss": 9.0867,
"step": 15000
},
{
"epoch": 0.07,
"learning_rate": 0.00046065551693333547,
"loss": 9.0872,
"step": 15200
},
{
"epoch": 0.07,
"learning_rate": 0.00046225905235949306,
"loss": 9.0708,
"step": 15400
},
{
"epoch": 0.07,
"learning_rate": 0.0004638833699738953,
"loss": 9.0716,
"step": 15600
},
{
"epoch": 0.07,
"learning_rate": 0.00046552846265143777,
"loss": 9.071,
"step": 15800
},
{
"epoch": 0.07,
"learning_rate": 0.00046719432317589814,
"loss": 9.0618,
"step": 16000
},
{
"epoch": 0.07,
"learning_rate": 0.0004688724595049813,
"loss": 9.0518,
"step": 16200
},
{
"epoch": 0.07,
"learning_rate": 0.0004705797299630679,
"loss": 9.0442,
"step": 16400
},
{
"epoch": 0.07,
"learning_rate": 0.0004723077461105934,
"loss": 9.0477,
"step": 16600
},
{
"epoch": 0.07,
"learning_rate": 0.0004740565003675777,
"loss": 9.0397,
"step": 16800
},
{
"epoch": 0.07,
"learning_rate": 0.0004758259850630858,
"loss": 9.0355,
"step": 17000
},
{
"epoch": 0.07,
"learning_rate": 0.00047761619243523283,
"loss": 9.0248,
"step": 17200
},
{
"epoch": 0.07,
"learning_rate": 0.0004794271146312465,
"loss": 9.0137,
"step": 17400
},
{
"epoch": 0.08,
"learning_rate": 0.00048125874370748105,
"loss": 9.0205,
"step": 17600
},
{
"epoch": 0.08,
"learning_rate": 0.00048311107162946065,
"loss": 9.0008,
"step": 17800
},
{
"epoch": 0.08,
"learning_rate": 0.00048498409027191575,
"loss": 8.9975,
"step": 18000
},
{
"epoch": 0.08,
"learning_rate": 0.0004868682714790542,
"loss": 8.9992,
"step": 18200
},
{
"epoch": 0.08,
"learning_rate": 0.0004887825434734695,
"loss": 8.9777,
"step": 18400
},
{
"epoch": 0.08,
"learning_rate": 0.0004907174813103439,
"loss": 8.9871,
"step": 18600
},
{
"epoch": 0.08,
"learning_rate": 0.0004926730765020346,
"loss": 8.9765,
"step": 18800
},
{
"epoch": 0.08,
"learning_rate": 0.0004946393879009196,
"loss": 8.9754,
"step": 19000
},
{
"epoch": 0.08,
"learning_rate": 0.0004966361687980866,
"loss": 8.9678,
"step": 19200
},
{
"epoch": 0.08,
"learning_rate": 0.000498653581087638,
"loss": 8.9677,
"step": 19400
},
{
"epoch": 0.08,
"learning_rate": 0.0005006916159201579,
"loss": 8.9644,
"step": 19600
},
{
"epoch": 0.09,
"learning_rate": 0.0005027502643557748,
"loss": 8.9642,
"step": 19800
},
{
"epoch": 0.09,
"learning_rate": 0.0005048295173641828,
"loss": 8.9569,
"step": 20000
},
{
"epoch": 0.09,
"learning_rate": 0.0005069293658247036,
"loss": 8.9605,
"step": 20200
},
{
"epoch": 0.09,
"learning_rate": 0.0005090498005263129,
"loss": 8.9431,
"step": 20400
},
{
"epoch": 0.09,
"learning_rate": 0.000511190812167682,
"loss": 8.9431,
"step": 20600
},
{
"epoch": 0.09,
"learning_rate": 0.000513352391357226,
"loss": 8.9342,
"step": 20800
},
{
"epoch": 0.09,
"learning_rate": 0.0005155345286131357,
"loss": 8.9324,
"step": 21000
},
{
"epoch": 0.09,
"learning_rate": 0.0005177372143634305,
"loss": 8.9382,
"step": 21200
},
{
"epoch": 0.09,
"learning_rate": 0.0005199604389459836,
"loss": 8.9424,
"step": 21400
},
{
"epoch": 0.09,
"learning_rate": 0.0005222041926085837,
"loss": 8.9157,
"step": 21600
},
{
"epoch": 0.09,
"learning_rate": 0.0005244684655089597,
"loss": 8.9236,
"step": 21800
},
{
"epoch": 0.09,
"learning_rate": 0.0005267532477148378,
"loss": 8.9246,
"step": 22000
},
{
"epoch": 0.1,
"learning_rate": 0.0005290585292039816,
"loss": 8.9268,
"step": 22200
},
{
"epoch": 0.1,
"learning_rate": 0.0005313842998642265,
"loss": 8.9203,
"step": 22400
},
{
"epoch": 0.1,
"learning_rate": 0.0005337305494935388,
"loss": 8.9095,
"step": 22600
},
{
"epoch": 0.1,
"learning_rate": 0.0005360972678000522,
"loss": 8.9061,
"step": 22800
},
{
"epoch": 0.1,
"learning_rate": 0.0005384724576463773,
"loss": 8.9117,
"step": 23000
},
{
"epoch": 0.1,
"learning_rate": 0.0005408799798596632,
"loss": 8.9043,
"step": 23200
},
{
"epoch": 0.1,
"learning_rate": 0.0005433079393890421,
"loss": 8.9012,
"step": 23400
},
{
"epoch": 0.1,
"learning_rate": 0.0005457563255842242,
"loss": 8.8969,
"step": 23600
},
{
"epoch": 0.1,
"learning_rate": 0.0005482251277053145,
"loss": 8.9013,
"step": 23800
},
{
"epoch": 0.1,
"learning_rate": 0.0005507143349228714,
"loss": 8.8912,
"step": 24000
},
{
"epoch": 0.1,
"learning_rate": 0.0005532239363179401,
"loss": 8.8896,
"step": 24200
},
{
"epoch": 0.1,
"learning_rate": 0.0005557539208821075,
"loss": 8.8838,
"step": 24400
},
{
"epoch": 0.11,
"learning_rate": 0.0005583042775175479,
"loss": 8.889,
"step": 24600
},
{
"epoch": 0.11,
"learning_rate": 0.0005608749950370764,
"loss": 8.888,
"step": 24800
},
{
"epoch": 0.11,
"learning_rate": 0.0005634530562276738,
"loss": 8.8814,
"step": 25000
},
{
"epoch": 0.11,
"learning_rate": 0.0005660643599338256,
"loss": 8.8755,
"step": 25200
},
{
"epoch": 0.11,
"learning_rate": 0.0005686959904843206,
"loss": 8.8667,
"step": 25400
},
{
"epoch": 0.11,
"learning_rate": 0.0005713479363354621,
"loss": 8.8748,
"step": 25600
},
{
"epoch": 0.11,
"learning_rate": 0.000574020185854441,
"loss": 8.862,
"step": 25800
},
{
"epoch": 0.11,
"learning_rate": 0.0005767127273193853,
"loss": 8.8534,
"step": 26000
},
{
"epoch": 0.11,
"learning_rate": 0.0005794255489194114,
"loss": 8.8655,
"step": 26200
},
{
"epoch": 0.11,
"learning_rate": 0.0005821586387546804,
"loss": 8.8574,
"step": 26400
},
{
"epoch": 0.11,
"learning_rate": 0.0005849119848364386,
"loss": 8.8531,
"step": 26600
},
{
"epoch": 0.12,
"learning_rate": 0.0005876855750870848,
"loss": 8.8479,
"step": 26800
},
{
"epoch": 0.12,
"learning_rate": 0.0005904653779220791,
"loss": 8.8405,
"step": 27000
},
{
"epoch": 0.12,
"learning_rate": 0.0005932793188544346,
"loss": 8.8435,
"step": 27200
},
{
"epoch": 0.12,
"learning_rate": 0.0005961134672522114,
"loss": 8.8425,
"step": 27400
},
{
"epoch": 0.12,
"learning_rate": 0.0005989678106833648,
"loss": 8.8389,
"step": 27600
},
{
"epoch": 0.12,
"learning_rate": 0.0006018423366272695,
"loss": 8.8525,
"step": 27800
},
{
"epoch": 0.12,
"learning_rate": 0.0006047370324747583,
"loss": 8.8273,
"step": 28000
},
{
"epoch": 0.12,
"learning_rate": 0.0006076518855281984,
"loss": 8.8306,
"step": 28200
},
{
"epoch": 0.12,
"learning_rate": 0.000610586883001531,
"loss": 8.8437,
"step": 28400
},
{
"epoch": 0.12,
"learning_rate": 0.000613542012020336,
"loss": 8.8236,
"step": 28600
},
{
"epoch": 0.12,
"learning_rate": 0.0006165172596218869,
"loss": 8.8274,
"step": 28800
},
{
"epoch": 0.12,
"learning_rate": 0.0006194975859987236,
"loss": 8.8275,
"step": 29000
},
{
"epoch": 0.13,
"learning_rate": 0.0006225129310954997,
"loss": 8.8211,
"step": 29200
},
{
"epoch": 0.13,
"learning_rate": 0.0006255483554239195,
"loss": 8.8177,
"step": 29400
},
{
"epoch": 0.13,
"learning_rate": 0.000628603845669035,
"loss": 8.8223,
"step": 29600
},
{
"epoch": 0.13,
"learning_rate": 0.0006316793884278832,
"loss": 8.8123,
"step": 29800
},
{
"epoch": 0.13,
"learning_rate": 0.0006347749702095389,
"loss": 8.8107,
"step": 30000
},
{
"epoch": 0.13,
"learning_rate": 0.0006378905774351747,
"loss": 8.8122,
"step": 30200
},
{
"epoch": 0.13,
"learning_rate": 0.0006410261964381238,
"loss": 8.811,
"step": 30400
},
{
"epoch": 0.13,
"learning_rate": 0.000644181813463934,
"loss": 8.813,
"step": 30600
},
{
"epoch": 0.13,
"learning_rate": 0.0006473574146704329,
"loss": 8.8057,
"step": 30800
},
{
"epoch": 0.13,
"learning_rate": 0.0006505369586176524,
"loss": 8.8033,
"step": 31000
},
{
"epoch": 0.13,
"learning_rate": 0.0006537523865622775,
"loss": 8.795,
"step": 31200
},
{
"epoch": 0.13,
"learning_rate": 0.0006569877567060931,
"loss": 8.7938,
"step": 31400
},
{
"epoch": 0.14,
"learning_rate": 0.0006602430548570907,
"loss": 8.7969,
"step": 31600
},
{
"epoch": 0.14,
"learning_rate": 0.000663518266735847,
"loss": 8.7966,
"step": 31800
},
{
"epoch": 0.14,
"learning_rate": 0.0006668133779755819,
"loss": 8.7936,
"step": 32000
},
{
"epoch": 0.14,
"learning_rate": 0.0006701283741222287,
"loss": 8.7888,
"step": 32200
},
{
"epoch": 0.14,
"learning_rate": 0.0006734632406344993,
"loss": 8.7829,
"step": 32400
},
{
"epoch": 0.14,
"learning_rate": 0.0006768179628839337,
"loss": 8.7789,
"step": 32600
},
{
"epoch": 0.14,
"learning_rate": 0.0006801925261549872,
"loss": 8.778,
"step": 32800
},
{
"epoch": 0.14,
"learning_rate": 0.0006835698944044951,
"loss": 8.7897,
"step": 33000
},
{
"epoch": 0.14,
"learning_rate": 0.0006869839962045932,
"loss": 8.779,
"step": 33200
},
{
"epoch": 0.14,
"learning_rate": 0.0006904178944328165,
"loss": 8.7697,
"step": 33400
},
{
"epoch": 0.14,
"learning_rate": 0.0006938715740263026,
"loss": 8.7818,
"step": 33600
},
{
"epoch": 0.15,
"learning_rate": 0.0006973450198354252,
"loss": 8.7667,
"step": 33800
},
{
"epoch": 0.15,
"learning_rate": 0.0007008382166238496,
"loss": 8.7759,
"step": 34000
},
{
"epoch": 0.15,
"learning_rate": 0.0007043511490686036,
"loss": 8.7797,
"step": 34200
},
{
"epoch": 0.15,
"learning_rate": 0.0007078838017601421,
"loss": 8.7644,
"step": 34400
},
{
"epoch": 0.15,
"learning_rate": 0.0007114361592024231,
"loss": 8.7678,
"step": 34600
},
{
"epoch": 0.15,
"learning_rate": 0.0007150082058129618,
"loss": 8.7672,
"step": 34800
},
{
"epoch": 0.15,
"learning_rate": 0.0007185819184105553,
"loss": 8.7672,
"step": 35000
},
{
"epoch": 0.15,
"learning_rate": 0.0007221750927446872,
"loss": 8.7573,
"step": 35200
},
{
"epoch": 0.15,
"learning_rate": 0.0007258059161614535,
"loss": 8.7584,
"step": 35400
},
{
"epoch": 0.15,
"learning_rate": 0.0007294563657132755,
"loss": 8.7442,
"step": 35600
},
{
"epoch": 0.15,
"learning_rate": 0.0007331264253873856,
"loss": 8.7595,
"step": 35800
},
{
"epoch": 0.15,
"learning_rate": 0.0007368160790850002,
"loss": 8.7564,
"step": 36000
},
{
"epoch": 0.16,
"learning_rate": 0.0007405253106213833,
"loss": 8.7517,
"step": 36200
},
{
"epoch": 0.16,
"learning_rate": 0.0007442541037259286,
"loss": 8.7583,
"step": 36400
},
{
"epoch": 0.16,
"learning_rate": 0.0007480024420422077,
"loss": 8.7426,
"step": 36600
},
{
"epoch": 0.16,
"learning_rate": 0.0007517703091280727,
"loss": 8.7519,
"step": 36800
},
{
"epoch": 0.16,
"learning_rate": 0.0007555576884556992,
"loss": 8.7393,
"step": 37000
},
{
"epoch": 0.16,
"learning_rate": 0.0007593645634116821,
"loss": 8.7262,
"step": 37200
},
{
"epoch": 0.16,
"learning_rate": 0.0007631717371015569,
"loss": 8.7383,
"step": 37400
},
{
"epoch": 0.16,
"learning_rate": 0.0007670174558631893,
"loss": 8.7365,
"step": 37600
},
{
"epoch": 0.16,
"learning_rate": 0.0007708826199846926,
"loss": 8.7385,
"step": 37800
},
{
"epoch": 0.16,
"learning_rate": 0.0007747672125114589,
"loss": 8.7385,
"step": 38000
},
{
"epoch": 0.16,
"learning_rate": 0.0007786712164036449,
"loss": 8.7164,
"step": 38200
},
{
"epoch": 0.17,
"learning_rate": 0.0007825946145362667,
"loss": 8.7262,
"step": 38400
},
{
"epoch": 0.17,
"learning_rate": 0.0007865373896992697,
"loss": 8.728,
"step": 38600
},
{
"epoch": 0.17,
"learning_rate": 0.0007904995245975929,
"loss": 8.7281,
"step": 38800
},
{
"epoch": 0.17,
"learning_rate": 0.0007944810018512619,
"loss": 8.7179,
"step": 39000
},
{
"epoch": 0.17,
"learning_rate": 0.000798481803995452,
"loss": 8.7264,
"step": 39200
},
{
"epoch": 0.17,
"learning_rate": 0.0008025019134805696,
"loss": 8.7229,
"step": 39400
},
{
"epoch": 0.17,
"learning_rate": 0.0008065210677225022,
"loss": 8.7246,
"step": 39600
},
{
"epoch": 0.17,
"learning_rate": 0.000810579642586285,
"loss": 8.7199,
"step": 39800
},
{
"epoch": 0.17,
"learning_rate": 0.0008146574717236045,
"loss": 8.7209,
"step": 40000
},
{
"epoch": 0.17,
"learning_rate": 0.0008187545372469861,
"loss": 8.7075,
"step": 40200
},
{
"epoch": 0.17,
"learning_rate": 0.0008228708211845768,
"loss": 8.7101,
"step": 40400
},
{
"epoch": 0.17,
"learning_rate": 0.0008270063054802209,
"loss": 8.7144,
"step": 40600
},
{
"epoch": 0.18,
"learning_rate": 0.0008311609719935404,
"loss": 8.7173,
"step": 40800
},
{
"epoch": 0.18,
"learning_rate": 0.0008353348025000144,
"loss": 8.7183,
"step": 41000
},
{
"epoch": 0.18,
"learning_rate": 0.0008395277786910574,
"loss": 8.7107,
"step": 41200
},
{
"epoch": 0.18,
"learning_rate": 0.0008437398821741025,
"loss": 8.7113,
"step": 41400
},
{
"epoch": 0.18,
"learning_rate": 0.0008479710944726774,
"loss": 8.7085,
"step": 41600
},
{
"epoch": 0.18,
"learning_rate": 0.0008522000980575213,
"loss": 8.7115,
"step": 41800
},
{
"epoch": 0.18,
"learning_rate": 0.0008564693769110079,
"loss": 8.7055,
"step": 42000
},
{
"epoch": 0.18,
"learning_rate": 0.0008607577087418623,
"loss": 8.6935,
"step": 42200
},
{
"epoch": 0.18,
"learning_rate": 0.0008650650747392373,
"loss": 8.7042,
"step": 42400
},
{
"epoch": 0.18,
"learning_rate": 0.0008693914560087938,
"loss": 8.6849,
"step": 42600
},
{
"epoch": 0.18,
"learning_rate": 0.0008737368335727785,
"loss": 8.6876,
"step": 42800
},
{
"epoch": 0.18,
"learning_rate": 0.0008781011883701138,
"loss": 8.6922,
"step": 43000
},
{
"epoch": 0.19,
"learning_rate": 0.0008824845012564749,
"loss": 8.6922,
"step": 43200
},
{
"epoch": 0.19,
"learning_rate": 0.000886886753004381,
"loss": 8.6853,
"step": 43400
},
{
"epoch": 0.19,
"learning_rate": 0.000891307924303272,
"loss": 8.6936,
"step": 43600
},
{
"epoch": 0.19,
"learning_rate": 0.0008957257484203587,
"loss": 8.6995,
"step": 43800
},
{
"epoch": 0.19,
"learning_rate": 0.0009001846062028449,
"loss": 8.6841,
"step": 44000
},
{
"epoch": 0.19,
"learning_rate": 0.0009046623252050388,
"loss": 8.6735,
"step": 44200
},
{
"epoch": 0.19,
"learning_rate": 0.0009091588857853411,
"loss": 8.6888,
"step": 44400
},
{
"epoch": 0.19,
"learning_rate": 0.0009136742682195071,
"loss": 8.6788,
"step": 44600
},
{
"epoch": 0.19,
"learning_rate": 0.0009182084527007278,
"loss": 8.6817,
"step": 44800
},
{
"epoch": 0.19,
"learning_rate": 0.0009227614193397203,
"loss": 8.671,
"step": 45000
},
{
"epoch": 0.19,
"learning_rate": 0.0009273331481648092,
"loss": 8.6738,
"step": 45200
},
{
"epoch": 0.2,
"learning_rate": 0.0009319236191220222,
"loss": 8.6657,
"step": 45400
},
{
"epoch": 0.2,
"learning_rate": 0.0009365328120751783,
"loss": 8.6599,
"step": 45600
},
{
"epoch": 0.2,
"learning_rate": 0.0009411375208451828,
"loss": 8.6747,
"step": 45800
},
{
"epoch": 0.2,
"learning_rate": 0.0009457840036964961,
"loss": 8.6698,
"step": 46000
},
{
"epoch": 0.2,
"learning_rate": 0.0009504491477449178,
"loss": 8.6765,
"step": 46200
},
{
"epoch": 0.2,
"learning_rate": 0.0009551329325267026,
"loss": 8.6732,
"step": 46400
},
{
"epoch": 0.2,
"learning_rate": 0.0009598353374963477,
"loss": 8.6654,
"step": 46600
},
{
"epoch": 0.2,
"learning_rate": 0.0009645563420266623,
"loss": 8.6614,
"step": 46800
},
{
"epoch": 0.2,
"learning_rate": 0.0009692959254088748,
"loss": 8.6672,
"step": 47000
},
{
"epoch": 0.2,
"learning_rate": 0.0009740540668527146,
"loss": 8.6508,
"step": 47200
},
{
"epoch": 0.2,
"learning_rate": 0.0009788307454865058,
"loss": 8.6533,
"step": 47400
},
{
"epoch": 0.2,
"learning_rate": 0.0009836259403572592,
"loss": 8.656,
"step": 47600
},
{
"epoch": 0.21,
"learning_rate": 0.0009884155160084767,
"loss": 8.65,
"step": 47800
},
{
"epoch": 0.21,
"learning_rate": 0.0009932475878516138,
"loss": 8.6559,
"step": 48000
},
{
"epoch": 0.21,
"learning_rate": 0.0009980981126919714,
"loss": 8.646,
"step": 48200
},
{
"epoch": 0.21,
"learning_rate": 0.0010029670692526266,
"loss": 8.6504,
"step": 48400
},
{
"epoch": 0.21,
"learning_rate": 0.001007854436175815,
"loss": 8.6437,
"step": 48600
},
{
"epoch": 0.21,
"learning_rate": 0.00101276019202301,
"loss": 8.6483,
"step": 48800
},
{
"epoch": 0.21,
"learning_rate": 0.0010176843152750244,
"loss": 8.6381,
"step": 49000
},
{
"epoch": 0.21,
"learning_rate": 0.001022602026387454,
"loss": 8.6419,
"step": 49200
},
{
"epoch": 0.21,
"learning_rate": 0.0010275627280027944,
"loss": 8.6381,
"step": 49400
},
{
"epoch": 0.21,
"learning_rate": 0.0010325417320913577,
"loss": 8.6274,
"step": 49600
},
{
"epoch": 0.21,
"learning_rate": 0.0010375390168126473,
"loss": 8.6264,
"step": 49800
},
{
"epoch": 0.21,
"learning_rate": 0.0010425545602459826,
"loss": 8.6279,
"step": 50000
},
{
"epoch": 0.22,
"learning_rate": 0.0010475883403905893,
"loss": 8.636,
"step": 50200
},
{
"epoch": 0.22,
"learning_rate": 0.001052640335165696,
"loss": 8.6305,
"step": 50400
},
{
"epoch": 0.22,
"learning_rate": 0.001057710522410639,
"loss": 8.6259,
"step": 50600
},
{
"epoch": 0.22,
"learning_rate": 0.001062798879884943,
"loss": 8.6288,
"step": 50800
},
{
"epoch": 0.22,
"learning_rate": 0.0010679053852684361,
"loss": 8.6286,
"step": 51000
},
{
"epoch": 0.22,
"learning_rate": 0.0010730300161613388,
"loss": 8.6203,
"step": 51200
},
{
"epoch": 0.22,
"learning_rate": 0.0010781469914207427,
"loss": 8.618,
"step": 51400
},
{
"epoch": 0.22,
"learning_rate": 0.0010833077154690767,
"loss": 8.6183,
"step": 51600
},
{
"epoch": 0.22,
"learning_rate": 0.0010884864974642153,
"loss": 8.6244,
"step": 51800
},
{
"epoch": 0.22,
"learning_rate": 0.0010936833146893334,
"loss": 8.6129,
"step": 52000
},
{
"epoch": 0.22,
"learning_rate": 0.001098898144348496,
"loss": 8.622,
"step": 52200
},
{
"epoch": 0.23,
"learning_rate": 0.001104130963566756,
"loss": 8.6104,
"step": 52400
},
{
"epoch": 0.23,
"learning_rate": 0.001109381749390256,
"loss": 8.603,
"step": 52600
},
{
"epoch": 0.23,
"learning_rate": 0.00111465047878633,
"loss": 8.6093,
"step": 52800
},
{
"epoch": 0.23,
"learning_rate": 0.001119937128643592,
"loss": 8.5969,
"step": 53000
},
{
"epoch": 0.23,
"learning_rate": 0.0011252416757720606,
"loss": 8.5992,
"step": 53200
},
{
"epoch": 0.23,
"learning_rate": 0.0011305374403745901,
"loss": 8.6047,
"step": 53400
},
{
"epoch": 0.23,
"learning_rate": 0.001135877622966507,
"loss": 8.5958,
"step": 53600
},
{
"epoch": 0.23,
"learning_rate": 0.001141235632906355,
"loss": 8.5948,
"step": 53800
},
{
"epoch": 0.23,
"learning_rate": 0.0011466114466911256,
"loss": 8.5896,
"step": 54000
},
{
"epoch": 0.23,
"learning_rate": 0.001152005040739713,
"loss": 8.5887,
"step": 54200
},
{
"epoch": 0.23,
"learning_rate": 0.0011574163913930131,
"loss": 8.5862,
"step": 54400
},
{
"epoch": 0.23,
"learning_rate": 0.0011628454749140395,
"loss": 8.5949,
"step": 54600
},
{
"epoch": 0.24,
"learning_rate": 0.0011682922674880192,
"loss": 8.588,
"step": 54800
},
{
"epoch": 0.24,
"learning_rate": 0.0011737567452224911,
"loss": 8.5918,
"step": 55000
},
{
"epoch": 0.24,
"learning_rate": 0.0011792388841474245,
"loss": 8.5904,
"step": 55200
},
{
"epoch": 0.24,
"learning_rate": 0.0011847111175024606,
"loss": 8.5739,
"step": 55400
},
{
"epoch": 0.24,
"learning_rate": 0.0011902284185834888,
"loss": 8.5756,
"step": 55600
},
{
"epoch": 0.24,
"learning_rate": 0.0011957633086016797,
"loss": 8.568,
"step": 55800
},
{
"epoch": 0.24,
"learning_rate": 0.0012013157632781366,
"loss": 8.5696,
"step": 56000
},
{
"epoch": 0.24,
"learning_rate": 0.00120688575825691,
"loss": 8.5768,
"step": 56200
},
{
"epoch": 0.24,
"learning_rate": 0.0012124732691051188,
"loss": 8.5696,
"step": 56400
},
{
"epoch": 0.24,
"learning_rate": 0.0012180782713130424,
"loss": 8.5687,
"step": 56600
},
{
"epoch": 0.24,
"learning_rate": 0.0012237007402942333,
"loss": 8.56,
"step": 56800
},
{
"epoch": 0.24,
"learning_rate": 0.0012293406513856284,
"loss": 8.56,
"step": 57000
},
{
"epoch": 0.25,
"learning_rate": 0.0012349979798476525,
"loss": 8.5602,
"step": 57200
},
{
"epoch": 0.25,
"learning_rate": 0.0012406442840364133,
"loss": 8.5551,
"step": 57400
},
{
"epoch": 0.25,
"learning_rate": 0.0012463362859392122,
"loss": 8.5556,
"step": 57600
},
{
"epoch": 0.25,
"learning_rate": 0.0012520456306609733,
"loss": 8.5508,
"step": 57800
},
{
"epoch": 0.25,
"learning_rate": 0.0012577722931575563,
"loss": 8.549,
"step": 58000
},
{
"epoch": 0.25,
"learning_rate": 0.0012634874855592566,
"loss": 8.5582,
"step": 58200
},
{
"epoch": 0.25,
"learning_rate": 0.001269248621894795,
"loss": 8.554,
"step": 58400
},
{
"epoch": 0.25,
"learning_rate": 0.0012750270005439136,
"loss": 8.5453,
"step": 58600
},
{
"epoch": 0.25,
"learning_rate": 0.0012808225961596451,
"loss": 8.5545,
"step": 58800
},
{
"epoch": 0.25,
"learning_rate": 0.0012866353833195041,
"loss": 8.543,
"step": 59000
},
{
"epoch": 0.25,
"learning_rate": 0.0012924653365255934,
"loss": 8.5454,
"step": 59200
},
{
"epoch": 0.26,
"learning_rate": 0.001298312430204715,
"loss": 8.5496,
"step": 59400
},
{
"epoch": 0.26,
"learning_rate": 0.0013041766387084808,
"loss": 8.5348,
"step": 59600
},
{
"epoch": 0.26,
"learning_rate": 0.0013100579363134381,
"loss": 8.5311,
"step": 59800
},
{
"epoch": 0.26,
"learning_rate": 0.001315956297221161,
"loss": 8.5378,
"step": 60000
},
{
"epoch": 0.26,
"learning_rate": 0.0013218716955583822,
"loss": 8.5304,
"step": 60200
},
{
"epoch": 0.26,
"learning_rate": 0.0013278041053770978,
"loss": 8.5166,
"step": 60400
},
{
"epoch": 0.26,
"learning_rate": 0.0013337237114701053,
"loss": 8.5324,
"step": 60600
},
{
"epoch": 0.26,
"learning_rate": 0.001339689981377656,
"loss": 8.5196,
"step": 60800
},
{
"epoch": 0.26,
"learning_rate": 0.0013456731846064624,
"loss": 8.5191,
"step": 61000
},
{
"epoch": 0.26,
"learning_rate": 0.0013516732949110932,
"loss": 8.5285,
"step": 61200
},
{
"epoch": 0.26,
"learning_rate": 0.0013576902859719474,
"loss": 8.5143,
"step": 61400
},
{
"epoch": 0.26,
"learning_rate": 0.0013637241313953895,
"loss": 8.519,
"step": 61600
},
{
"epoch": 0.27,
"learning_rate": 0.0013697748047138431,
"loss": 8.4964,
"step": 61800
},
{
"epoch": 0.27,
"learning_rate": 0.0013758422793859176,
"loss": 8.5216,
"step": 62000
},
{
"epoch": 0.27,
"learning_rate": 0.001381926528796519,
"loss": 8.5098,
"step": 62200
},
{
"epoch": 0.27,
"learning_rate": 0.0013880275262569807,
"loss": 8.511,
"step": 62400
},
{
"epoch": 0.27,
"learning_rate": 0.0013941452450051631,
"loss": 8.5124,
"step": 62600
},
{
"epoch": 0.27,
"learning_rate": 0.001400248944656608,
"loss": 8.5054,
"step": 62800
},
{
"epoch": 0.27,
"learning_rate": 0.0014063999421298785,
"loss": 8.5016,
"step": 63000
},
{
"epoch": 0.27,
"learning_rate": 0.0014125675802999262,
"loss": 8.501,
"step": 63200
},
{
"epoch": 0.27,
"learning_rate": 0.001418751832112295,
"loss": 8.505,
"step": 63400
},
{
"epoch": 0.27,
"learning_rate": 0.0014249526704396467,
"loss": 8.4953,
"step": 63600
},
{
"epoch": 0.27,
"learning_rate": 0.0014311700680818915,
"loss": 8.5074,
"step": 63800
},
{
"epoch": 0.28,
"learning_rate": 0.0014374039977662987,
"loss": 8.4991,
"step": 64000
},
{
"epoch": 0.28,
"learning_rate": 0.0014436544321476206,
"loss": 8.5087,
"step": 64200
},
{
"epoch": 0.28,
"learning_rate": 0.0014499213438082127,
"loss": 8.4871,
"step": 64400
},
{
"epoch": 0.28,
"learning_rate": 0.0014562047052581514,
"loss": 8.4954,
"step": 64600
},
{
"epoch": 0.28,
"learning_rate": 0.0014624729492123557,
"loss": 8.4791,
"step": 64800
},
{
"epoch": 0.28,
"learning_rate": 0.0014687890455785963,
"loss": 8.4853,
"step": 65000
},
{
"epoch": 0.28,
"learning_rate": 0.0014751215089706584,
"loss": 8.4855,
"step": 65200
},
{
"epoch": 0.28,
"learning_rate": 0.0014814703116110776,
"loss": 8.4707,
"step": 65400
},
{
"epoch": 0.28,
"learning_rate": 0.001487835425650709,
"loss": 8.4743,
"step": 65600
},
{
"epoch": 0.28,
"learning_rate": 0.001494216823168866,
"loss": 8.4717,
"step": 65800
},
{
"epoch": 0.28,
"learning_rate": 0.0015006144761734279,
"loss": 8.4823,
"step": 66000
},
{
"epoch": 0.28,
"learning_rate": 0.001507028356600975,
"loss": 8.4708,
"step": 66200
},
{
"epoch": 0.29,
"learning_rate": 0.0015134584363168998,
"loss": 8.4649,
"step": 66400
},
{
"epoch": 0.29,
"learning_rate": 0.001519904687115537,
"loss": 8.4695,
"step": 66600
},
{
"epoch": 0.29,
"learning_rate": 0.0015263347286438994,
"loss": 8.4759,
"step": 66800
},
{
"epoch": 0.29,
"learning_rate": 0.0015328131562056986,
"loss": 8.4655,
"step": 67000
},
{
"epoch": 0.29,
"learning_rate": 0.0015393076699503766,
"loss": 8.4752,
"step": 67200
},
{
"epoch": 0.29,
"learning_rate": 0.0015458182413896245,
"loss": 8.4535,
"step": 67400
},
{
"epoch": 0.29,
"learning_rate": 0.001552344841964707,
"loss": 8.4535,
"step": 67600
},
{
"epoch": 0.29,
"learning_rate": 0.0015588874430465648,
"loss": 8.4519,
"step": 67800
},
{
"epoch": 0.29,
"learning_rate": 0.001565446015935959,
"loss": 8.4568,
"step": 68000
},
{
"epoch": 0.29,
"learning_rate": 0.00157202053186359,
"loss": 8.4524,
"step": 68200
},
{
"epoch": 0.29,
"learning_rate": 0.0015786109619902212,
"loss": 8.4589,
"step": 68400
},
{
"epoch": 0.29,
"learning_rate": 0.0015852172774068075,
"loss": 8.4559,
"step": 68600
},
{
"epoch": 0.3,
"learning_rate": 0.0015918062988814347,
"loss": 8.446,
"step": 68800
},
{
"epoch": 0.3,
"learning_rate": 0.0015984442188082624,
"loss": 8.439,
"step": 69000
},
{
"epoch": 0.3,
"learning_rate": 0.0016050979370261006,
"loss": 8.4504,
"step": 69200
},
{
"epoch": 0.3,
"learning_rate": 0.0016117674243482875,
"loss": 8.4487,
"step": 69400
},
{
"epoch": 0.3,
"learning_rate": 0.0016184526515189961,
"loss": 8.4472,
"step": 69600
},
{
"epoch": 0.3,
"learning_rate": 0.0016251535892133542,
"loss": 8.433,
"step": 69800
},
{
"epoch": 0.3,
"learning_rate": 0.001631870208037572,
"loss": 8.426,
"step": 70000
},
{
"epoch": 0.3,
"learning_rate": 0.0016386024785290804,
"loss": 8.4311,
"step": 70200
},
{
"epoch": 0.3,
"learning_rate": 0.0016453503711566474,
"loss": 8.432,
"step": 70400
},
{
"epoch": 0.3,
"learning_rate": 0.0016520800001573153,
"loss": 8.4282,
"step": 70600
},
{
"epoch": 0.3,
"learning_rate": 0.0016588589704489114,
"loss": 8.4386,
"step": 70800
},
{
"epoch": 0.31,
"learning_rate": 0.0016656534740210893,
"loss": 8.4158,
"step": 71000
},
{
"epoch": 0.31,
"learning_rate": 0.0016724634810696363,
"loss": 8.4242,
"step": 71200
},
{
"epoch": 0.31,
"learning_rate": 0.0016792889617223312,
"loss": 8.4279,
"step": 71400
},
{
"epoch": 0.31,
"learning_rate": 0.0016861298860390735,
"loss": 8.4242,
"step": 71600
},
{
"epoch": 0.31,
"learning_rate": 0.0016929862240120247,
"loss": 8.4271,
"step": 71800
},
{
"epoch": 0.31,
"learning_rate": 0.0016998579455657307,
"loss": 8.4265,
"step": 72000
},
{
"epoch": 0.31,
"learning_rate": 0.0017067450205572581,
"loss": 8.421,
"step": 72200
},
{
"epoch": 0.31,
"learning_rate": 0.0017136474187763266,
"loss": 8.4156,
"step": 72400
},
{
"epoch": 0.31,
"learning_rate": 0.0017205304834985446,
"loss": 8.421,
"step": 72600
},
{
"epoch": 0.31,
"learning_rate": 0.0017274633610356825,
"loss": 8.4256,
"step": 72800
},
{
"epoch": 0.31,
"learning_rate": 0.0017344114709189774,
"loss": 8.4191,
"step": 73000
},
{
"epoch": 0.31,
"learning_rate": 0.0017413747826704132,
"loss": 8.4015,
"step": 73200
},
{
"epoch": 0.32,
"learning_rate": 0.001748318335641869,
"loss": 8.4129,
"step": 73400
},
{
"epoch": 0.32,
"learning_rate": 0.0017553118838016506,
"loss": 8.4179,
"step": 73600
},
{
"epoch": 0.32,
"learning_rate": 0.0017623205421495314,
"loss": 8.4142,
"step": 73800
},
{
"epoch": 0.32,
"learning_rate": 0.0017693442799418986,
"loss": 8.4005,
"step": 74000
},
{
"epoch": 0.32,
"learning_rate": 0.0017763830663689965,
"loss": 8.41,
"step": 74200
},
{
"epoch": 0.32,
"learning_rate": 0.0017834368705550597,
"loss": 8.4162,
"step": 74400
},
{
"epoch": 0.32,
"learning_rate": 0.001790505661558443,
"loss": 8.4081,
"step": 74600
},
{
"epoch": 0.32,
"learning_rate": 0.0017975894083717692,
"loss": 8.4027,
"step": 74800
},
{
"epoch": 0.32,
"learning_rate": 0.0018046880799220469,
"loss": 8.4097,
"step": 75000
},
{
"epoch": 0.32,
"learning_rate": 0.0018118016450708232,
"loss": 8.4077,
"step": 75200
},
{
"epoch": 0.32,
"learning_rate": 0.0018189300726143137,
"loss": 8.4086,
"step": 75400
},
{
"epoch": 0.32,
"learning_rate": 0.0018260375781495742,
"loss": 8.4084,
"step": 75600
},
{
"epoch": 0.33,
"learning_rate": 0.001833195562689592,
"loss": 8.4166,
"step": 75800
},
{
"epoch": 0.33,
"learning_rate": 0.0018403683157795104,
"loss": 8.4121,
"step": 76000
},
{
"epoch": 0.33,
"learning_rate": 0.0018475558059559121,
"loss": 8.404,
"step": 76200
},
{
"epoch": 0.33,
"learning_rate": 0.001854758001690741,
"loss": 8.4035,
"step": 76400
},
{
"epoch": 0.33,
"learning_rate": 0.0018619748713914318,
"loss": 8.4044,
"step": 76600
},
{
"epoch": 0.33,
"learning_rate": 0.0018692063834010522,
"loss": 8.3948,
"step": 76800
},
{
"epoch": 0.33,
"learning_rate": 0.0018764525059984417,
"loss": 8.3935,
"step": 77000
},
{
"epoch": 0.33,
"learning_rate": 0.001883713207398349,
"loss": 8.4028,
"step": 77200
},
{
"epoch": 0.33,
"learning_rate": 0.0018909884557515733,
"loss": 8.401,
"step": 77400
},
{
"epoch": 0.33,
"learning_rate": 0.0018982417342748425,
"loss": 8.4085,
"step": 77600
},
{
"epoch": 0.33,
"learning_rate": 0.0019055459083963232,
"loss": 8.3979,
"step": 77800
},
{
"epoch": 0.34,
"learning_rate": 0.00191282790468081,
"loss": 8.4027,
"step": 78000
},
{
"epoch": 0.34,
"learning_rate": 0.0019201608770511077,
"loss": 8.401,
"step": 78200
},
{
"epoch": 0.34,
"learning_rate": 0.001927508236496343,
"loss": 8.4054,
"step": 78400
},
{
"epoch": 0.34,
"learning_rate": 0.0019348699507871943,
"loss": 8.408,
"step": 78600
},
{
"epoch": 0.34,
"learning_rate": 0.0019422459876313608,
"loss": 8.4064,
"step": 78800
},
{
"epoch": 0.34,
"learning_rate": 0.0019496363146737205,
"loss": 8.4066,
"step": 79000
},
{
"epoch": 0.34,
"learning_rate": 0.001957040899496469,
"loss": 8.4061,
"step": 79200
},
{
"epoch": 0.34,
"learning_rate": 0.0019644597096192574,
"loss": 8.411,
"step": 79400
},
{
"epoch": 0.34,
"learning_rate": 0.00197189271249934,
"loss": 8.3999,
"step": 79600
},
{
"epoch": 0.34,
"learning_rate": 0.001979339875531708,
"loss": 8.4002,
"step": 79800
},
{
"epoch": 0.34,
"learning_rate": 0.001986801166049247,
"loss": 8.4058,
"step": 80000
},
{
"epoch": 0.34,
"learning_rate": 0.0019942391393900083,
"loss": 8.4172,
"step": 80200
},
{
"epoch": 0.35,
"learning_rate": 0.002001728516400637,
"loss": 8.4089,
"step": 80400
},
{
"epoch": 0.35,
"learning_rate": 0.002009231922688247,
"loss": 8.4087,
"step": 80600
},
{
"epoch": 0.35,
"learning_rate": 0.002016749325339009,
"loss": 8.4115,
"step": 80800
},
{
"epoch": 0.35,
"learning_rate": 0.0020242806913776997,
"loss": 8.4317,
"step": 81000
},
{
"epoch": 0.35,
"learning_rate": 0.0020318259877678373,
"loss": 8.4049,
"step": 81200
},
{
"epoch": 0.35,
"learning_rate": 0.002039385181411845,
"loss": 8.4129,
"step": 81400
},
{
"epoch": 0.35,
"learning_rate": 0.002046958239151178,
"loss": 8.424,
"step": 81600
},
{
"epoch": 0.35,
"learning_rate": 0.0020545451277664776,
"loss": 8.4146,
"step": 81800
},
{
"epoch": 0.35,
"learning_rate": 0.0020621458139777164,
"loss": 8.4233,
"step": 82000
},
{
"epoch": 0.35,
"learning_rate": 0.002069722158008656,
"loss": 8.4199,
"step": 82200
},
{
"epoch": 0.35,
"learning_rate": 0.0020773502707586607,
"loss": 8.4134,
"step": 82400
},
{
"epoch": 0.35,
"learning_rate": 0.0020849920810694245,
"loss": 8.415,
"step": 82600
},
{
"epoch": 0.36,
"learning_rate": 0.0020926475554200047,
"loss": 8.4301,
"step": 82800
},
{
"epoch": 0.36,
"learning_rate": 0.0021003166602295217,
"loss": 8.4128,
"step": 83000
},
{
"epoch": 0.36,
"learning_rate": 0.002107999361857309,
"loss": 8.4284,
"step": 83200
},
{
"epoch": 0.36,
"learning_rate": 0.002115695626603048,
"loss": 8.422,
"step": 83400
},
{
"epoch": 0.36,
"learning_rate": 0.002123405420706933,
"loss": 8.4173,
"step": 83600
},
{
"epoch": 0.36,
"learning_rate": 0.002131128710349813,
"loss": 8.4245,
"step": 83800
},
{
"epoch": 0.36,
"learning_rate": 0.002138865461653332,
"loss": 8.427,
"step": 84000
},
{
"epoch": 0.36,
"learning_rate": 0.0021465768564397046,
"loss": 8.4287,
"step": 84200
},
{
"epoch": 0.36,
"learning_rate": 0.002154340362309423,
"loss": 8.4361,
"step": 84400
},
{
"epoch": 0.36,
"learning_rate": 0.002162078310516678,
"loss": 8.4299,
"step": 84600
},
{
"epoch": 0.36,
"learning_rate": 0.0021698684354139377,
"loss": 8.4447,
"step": 84800
},
{
"epoch": 0.37,
"learning_rate": 0.0021776718520393184,
"loss": 8.4399,
"step": 85000
},
{
"epoch": 0.37,
"learning_rate": 0.0021854885261629875,
"loss": 8.4469,
"step": 85200
},
{
"epoch": 0.37,
"learning_rate": 0.0021933184234969594,
"loss": 8.4328,
"step": 85400
},
{
"epoch": 0.37,
"learning_rate": 0.0022011615096952444,
"loss": 8.4504,
"step": 85600
},
{
"epoch": 0.37,
"learning_rate": 0.002209017750354,
"loss": 8.4383,
"step": 85800
},
{
"epoch": 0.37,
"learning_rate": 0.0022168871110116815,
"loss": 8.4472,
"step": 86000
},
{
"epoch": 0.37,
"learning_rate": 0.0022247695571491945,
"loss": 8.448,
"step": 86200
},
{
"epoch": 0.37,
"learning_rate": 0.0022326650541900405,
"loss": 8.4451,
"step": 86400
},
{
"epoch": 0.37,
"learning_rate": 0.0022405339926133165,
"loss": 8.4415,
"step": 86600
},
{
"epoch": 0.37,
"learning_rate": 0.0022484554226809986,
"loss": 8.4439,
"step": 86800
},
{
"epoch": 0.37,
"learning_rate": 0.0022563897997535266,
"loss": 8.4497,
"step": 87000
},
{
"epoch": 0.37,
"learning_rate": 0.0022643370890266133,
"loss": 8.452,
"step": 87200
},
{
"epoch": 0.38,
"learning_rate": 0.0022722972556393217,
"loss": 8.4677,
"step": 87400
},
{
"epoch": 0.38,
"learning_rate": 0.0022802702646742383,
"loss": 8.4595,
"step": 87600
},
{
"epoch": 0.38,
"learning_rate": 0.002288256081157608,
"loss": 8.4595,
"step": 87800
},
{
"epoch": 0.38,
"learning_rate": 0.002296254670059502,
"loss": 8.4642,
"step": 88000
},
{
"epoch": 0.38,
"learning_rate": 0.0023042659962939603,
"loss": 8.4709,
"step": 88200
},
{
"epoch": 0.38,
"learning_rate": 0.0023122900247191545,
"loss": 8.4679,
"step": 88400
},
{
"epoch": 0.38,
"learning_rate": 0.002320286505209589,
"loss": 8.4603,
"step": 88600
},
{
"epoch": 0.38,
"learning_rate": 0.0023283357692971242,
"loss": 8.4662,
"step": 88800
},
{
"epoch": 0.38,
"learning_rate": 0.002336397629992889,
"loss": 8.4653,
"step": 89000
},
{
"epoch": 0.38,
"learning_rate": 0.002344472051933384,
"loss": 8.4833,
"step": 89200
},
{
"epoch": 0.38,
"learning_rate": 0.002352558999700007,
"loss": 8.4974,
"step": 89400
},
{
"epoch": 0.39,
"learning_rate": 0.002360658437819213,
"loss": 8.4881,
"step": 89600
},
{
"epoch": 0.39,
"learning_rate": 0.0023687703307626647,
"loss": 8.4878,
"step": 89800
},
{
"epoch": 0.39,
"learning_rate": 0.0023768946429473976,
"loss": 8.4846,
"step": 90000
},
{
"epoch": 0.39,
"learning_rate": 0.002385031338735963,
"loss": 8.4866,
"step": 90200
},
{
"epoch": 0.39,
"learning_rate": 0.0023931803824365962,
"loss": 8.4847,
"step": 90400
},
{
"epoch": 0.39,
"learning_rate": 0.002401300900956714,
"loss": 8.4934,
"step": 90600
},
{
"epoch": 0.39,
"learning_rate": 0.002409474471896992,
"loss": 8.4872,
"step": 90800
},
{
"epoch": 0.39,
"learning_rate": 0.0024176602835290807,
"loss": 8.4977,
"step": 91000
},
{
"epoch": 0.39,
"learning_rate": 0.0024258582999457665,
"loss": 8.4967,
"step": 91200
},
{
"epoch": 0.39,
"learning_rate": 0.0024340684851863,
"loss": 8.505,
"step": 91400
},
{
"epoch": 0.39,
"learning_rate": 0.002442290803236551,
"loss": 8.5126,
"step": 91600
},
{
"epoch": 0.39,
"learning_rate": 0.0024505252180291688,
"loss": 8.5033,
"step": 91800
},
{
"epoch": 0.4,
"learning_rate": 0.0024587304311256865,
"loss": 8.513,
"step": 92000
},
{
"epoch": 0.4,
"learning_rate": 0.0024669888709567232,
"loss": 8.5082,
"step": 92200
},
{
"epoch": 0.4,
"learning_rate": 0.0024752592991915973,
"loss": 8.517,
"step": 92400
},
{
"epoch": 0.4,
"learning_rate": 0.0024835416795519205,
"loss": 8.5293,
"step": 92600
},
{
"epoch": 0.4,
"learning_rate": 0.002491835975706881,
"loss": 8.5094,
"step": 92800
},
{
"epoch": 0.4,
"learning_rate": 0.0025001421512733943,
"loss": 8.5139,
"step": 93000
},
{
"epoch": 0.4,
"learning_rate": 0.0025084601698162666,
"loss": 8.5099,
"step": 93200
},
{
"epoch": 0.4,
"learning_rate": 0.0025167899948483575,
"loss": 8.5185,
"step": 93400
},
{
"epoch": 0.4,
"learning_rate": 0.0025251315898307336,
"loss": 8.5143,
"step": 93600
},
{
"epoch": 0.4,
"learning_rate": 0.002533484918172837,
"loss": 8.5277,
"step": 93800
},
{
"epoch": 0.4,
"learning_rate": 0.0025418499432326358,
"loss": 8.5231,
"step": 94000
},
{
"epoch": 0.4,
"learning_rate": 0.002550184715947826,
"loss": 8.5436,
"step": 94200
},
{
"epoch": 0.41,
"learning_rate": 0.0025585729662869474,
"loss": 8.5373,
"step": 94400
},
{
"epoch": 0.41,
"learning_rate": 0.002566972803294579,
"loss": 8.5347,
"step": 94600
},
{
"epoch": 0.41,
"learning_rate": 0.00257538419012468,
"loss": 8.5544,
"step": 94800
},
{
"epoch": 0.41,
"learning_rate": 0.0025838070898805453,
"loss": 8.5339,
"step": 95000
},
{
"epoch": 0.41,
"learning_rate": 0.002592241465614974,
"loss": 8.5405,
"step": 95200
},
{
"epoch": 0.41,
"learning_rate": 0.002600687280330416,
"loss": 8.5501,
"step": 95400
},
{
"epoch": 0.41,
"learning_rate": 0.0026091444969791513,
"loss": 8.5344,
"step": 95600
},
{
"epoch": 0.41,
"learning_rate": 0.002617613078463441,
"loss": 8.5477,
"step": 95800
},
{
"epoch": 0.41,
"learning_rate": 0.002626092987635699,
"loss": 8.5443,
"step": 96000
},
{
"epoch": 0.41,
"learning_rate": 0.002634541703276827,
"loss": 8.5398,
"step": 96200
},
{
"epoch": 0.41,
"learning_rate": 0.002643044100010169,
"loss": 8.5523,
"step": 96400
},
{
"epoch": 0.42,
"learning_rate": 0.002651557712877833,
"loss": 8.5562,
"step": 96600
},
{
"epoch": 0.42,
"learning_rate": 0.0026600825045346955,
"loss": 8.5525,
"step": 96800
},
{
"epoch": 0.42,
"learning_rate": 0.0026686184375866043,
"loss": 8.5728,
"step": 97000
},
{
"epoch": 0.42,
"learning_rate": 0.002677165474590528,
"loss": 8.5631,
"step": 97200
},
{
"epoch": 0.42,
"learning_rate": 0.002685723578054729,
"loss": 8.5658,
"step": 97400
},
{
"epoch": 0.42,
"learning_rate": 0.0026942927104389334,
"loss": 8.566,
"step": 97600
},
{
"epoch": 0.42,
"learning_rate": 0.002702872834154482,
"loss": 8.5716,
"step": 97800
},
{
"epoch": 0.42,
"learning_rate": 0.0027114639115645017,
"loss": 8.5697,
"step": 98000
},
{
"epoch": 0.42,
"learning_rate": 0.002720022867925799,
"loss": 8.5726,
"step": 98200
},
{
"epoch": 0.42,
"learning_rate": 0.0027286356853246747,
"loss": 8.5718,
"step": 98400
},
{
"epoch": 0.42,
"learning_rate": 0.0027372593434088002,
"loss": 8.5716,
"step": 98600
},
{
"epoch": 0.42,
"learning_rate": 0.002745893804350339,
"loss": 8.5767,
"step": 98800
},
{
"epoch": 0.43,
"learning_rate": 0.00275453903027407,
"loss": 8.5957,
"step": 99000
},
{
"epoch": 0.43,
"learning_rate": 0.0027631949832575475,
"loss": 8.5881,
"step": 99200
},
{
"epoch": 0.43,
"learning_rate": 0.002771861625331276,
"loss": 8.5835,
"step": 99400
},
{
"epoch": 0.43,
"learning_rate": 0.002780495505581529,
"loss": 8.5905,
"step": 99600
},
{
"epoch": 0.43,
"learning_rate": 0.002789183358769584,
"loss": 8.5938,
"step": 99800
},
{
"epoch": 0.43,
"learning_rate": 0.0027978817870494,
"loss": 8.5906,
"step": 100000
},
{
"epoch": 0.43,
"learning_rate": 0.0028065907522651585,
"loss": 8.5938,
"step": 100200
},
{
"epoch": 0.43,
"learning_rate": 0.002815310216214826,
"loss": 8.5887,
"step": 100400
},
{
"epoch": 0.43,
"learning_rate": 0.00282404014065031,
"loss": 8.5922,
"step": 100600
},
{
"epoch": 0.43,
"learning_rate": 0.0028327804872776367,
"loss": 8.5926,
"step": 100800
},
{
"epoch": 0.43,
"learning_rate": 0.002841531217757113,
"loss": 8.5978,
"step": 101000
},
{
"epoch": 0.43,
"learning_rate": 0.0028502922937035,
"loss": 8.5984,
"step": 101200
},
{
"epoch": 0.44,
"learning_rate": 0.0028590636766861726,
"loss": 8.6046,
"step": 101400
},
{
"epoch": 0.44,
"learning_rate": 0.0028678453282293013,
"loss": 8.6093,
"step": 101600
},
{
"epoch": 0.44,
"learning_rate": 0.0028766372098120076,
"loss": 8.6083,
"step": 101800
},
{
"epoch": 0.44,
"learning_rate": 0.0028854392828685377,
"loss": 8.6057,
"step": 102000
},
{
"epoch": 0.44,
"learning_rate": 0.0028942515087884407,
"loss": 8.6146,
"step": 102200
},
{
"epoch": 0.44,
"learning_rate": 0.00290307384891672,
"loss": 8.608,
"step": 102400
},
{
"epoch": 0.44,
"learning_rate": 0.00291190626455402,
"loss": 8.6081,
"step": 102600
},
{
"epoch": 0.44,
"learning_rate": 0.0029207044797924615,
"loss": 8.6164,
"step": 102800
},
{
"epoch": 0.44,
"learning_rate": 0.0029295568802797795,
"loss": 8.6008,
"step": 103000
},
{
"epoch": 0.44,
"learning_rate": 0.0029384192401078115,
"loss": 8.6166,
"step": 103200
},
{
"epoch": 0.44,
"learning_rate": 0.00294729152040165,
"loss": 8.5962,
"step": 103400
},
{
"epoch": 0.45,
"learning_rate": 0.002956173682242877,
"loss": 8.6129,
"step": 103600
},
{
"epoch": 0.45,
"learning_rate": 0.002965065686669722,
"loss": 8.6092,
"step": 103800
},
{
"epoch": 0.45,
"learning_rate": 0.0029739674946772463,
"loss": 8.6189,
"step": 104000
},
{
"epoch": 0.45,
"learning_rate": 0.002982879067217503,
"loss": 8.612,
"step": 104200
},
{
"epoch": 0.45,
"learning_rate": 0.0029918003651997144,
"loss": 8.6135,
"step": 104400
},
{
"epoch": 0.45,
"learning_rate": 0.003000731349490442,
"loss": 8.6182,
"step": 104600
},
{
"epoch": 0.45,
"learning_rate": 0.0030096719809137584,
"loss": 8.6423,
"step": 104800
},
{
"epoch": 0.45,
"learning_rate": 0.003018622220251419,
"loss": 8.6145,
"step": 105000
},
{
"epoch": 0.45,
"learning_rate": 0.0030275372054660438,
"loss": 8.6249,
"step": 105200
},
{
"epoch": 0.45,
"learning_rate": 0.0030365064952603237,
"loss": 8.6265,
"step": 105400
},
{
"epoch": 0.45,
"learning_rate": 0.0030454852752588536,
"loss": 8.6304,
"step": 105600
},
{
"epoch": 0.45,
"learning_rate": 0.0030544735060760494,
"loss": 8.6309,
"step": 105800
},
{
"epoch": 0.46,
"learning_rate": 0.0030634711482848704,
"loss": 8.6258,
"step": 106000
},
{
"epoch": 0.46,
"learning_rate": 0.003072478162416994,
"loss": 8.6328,
"step": 106200
},
{
"epoch": 0.46,
"learning_rate": 0.003081494508962985,
"loss": 8.6298,
"step": 106400
},
{
"epoch": 0.46,
"learning_rate": 0.0030905201483724717,
"loss": 8.639,
"step": 106600
},
{
"epoch": 0.46,
"learning_rate": 0.0030995550410543226,
"loss": 8.6212,
"step": 106800
},
{
"epoch": 0.46,
"learning_rate": 0.0031085991473768114,
"loss": 8.6374,
"step": 107000
},
{
"epoch": 0.46,
"learning_rate": 0.003117652427667799,
"loss": 8.6326,
"step": 107200
},
{
"epoch": 0.46,
"learning_rate": 0.0031267148422149046,
"loss": 8.6291,
"step": 107400
},
{
"epoch": 0.46,
"learning_rate": 0.003135740971163656,
"loss": 8.6375,
"step": 107600
},
{
"epoch": 0.46,
"learning_rate": 0.0031448214897512507,
"loss": 8.6226,
"step": 107800
},
{
"epoch": 0.46,
"learning_rate": 0.003153911023417371,
"loss": 8.6359,
"step": 108000
},
{
"epoch": 0.46,
"learning_rate": 0.003163009532290608,
"loss": 8.6491,
"step": 108200
},
{
"epoch": 0.47,
"learning_rate": 0.0031721169764601844,
"loss": 8.6405,
"step": 108400
},
{
"epoch": 0.47,
"learning_rate": 0.0031812333159761293,
"loss": 8.632,
"step": 108600
},
{
"epoch": 0.47,
"learning_rate": 0.003190358510849451,
"loss": 8.6363,
"step": 108800
},
{
"epoch": 0.47,
"learning_rate": 0.0031994925210523124,
"loss": 8.6316,
"step": 109000
},
{
"epoch": 0.47,
"learning_rate": 0.0032086353065182106,
"loss": 8.6423,
"step": 109200
},
{
"epoch": 0.47,
"learning_rate": 0.003217786827142146,
"loss": 8.6274,
"step": 109400
},
{
"epoch": 0.47,
"learning_rate": 0.003226947042780804,
"loss": 8.6366,
"step": 109600
},
{
"epoch": 0.47,
"learning_rate": 0.003236070047437989,
"loss": 8.6388,
"step": 109800
},
{
"epoch": 0.47,
"learning_rate": 0.003245247489550804,
"loss": 8.6364,
"step": 110000
},
{
"epoch": 0.47,
"learning_rate": 0.0032544335062216403,
"loss": 8.6422,
"step": 110200
},
{
"epoch": 0.47,
"learning_rate": 0.0032636280571558636,
"loss": 8.618,
"step": 110400
},
{
"epoch": 0.48,
"learning_rate": 0.003272831102021408,
"loss": 8.6276,
"step": 110600
},
{
"epoch": 0.48,
"learning_rate": 0.003282042600448948,
"loss": 8.6454,
"step": 110800
},
{
"epoch": 0.48,
"learning_rate": 0.0032912625120320753,
"loss": 8.6388,
"step": 111000
},
{
"epoch": 0.48,
"learning_rate": 0.0033004907963274733,
"loss": 8.6339,
"step": 111200
},
{
"epoch": 0.48,
"learning_rate": 0.003309727412855108,
"loss": 8.6243,
"step": 111400
},
{
"epoch": 0.48,
"learning_rate": 0.0033189723210983865,
"loss": 8.6264,
"step": 111600
},
{
"epoch": 0.48,
"learning_rate": 0.0033282254805043487,
"loss": 8.6401,
"step": 111800
},
{
"epoch": 0.48,
"learning_rate": 0.003337440523277331,
"loss": 8.6366,
"step": 112000
},
{
"epoch": 0.48,
"learning_rate": 0.0033467100224565524,
"loss": 8.6338,
"step": 112200
},
{
"epoch": 0.48,
"learning_rate": 0.003355987651126521,
"loss": 8.6377,
"step": 112400
},
{
"epoch": 0.48,
"learning_rate": 0.0033652733685907424,
"loss": 8.6414,
"step": 112600
},
{
"epoch": 0.48,
"learning_rate": 0.0033745671341172496,
"loss": 8.6264,
"step": 112800
},
{
"epoch": 0.49,
"learning_rate": 0.0033838689069387654,
"loss": 8.6289,
"step": 113000
},
{
"epoch": 0.49,
"learning_rate": 0.00339317864625289,
"loss": 8.6244,
"step": 113200
},
{
"epoch": 0.49,
"learning_rate": 0.003402496311222283,
"loss": 8.6287,
"step": 113400
},
{
"epoch": 0.49,
"learning_rate": 0.0034118218609748346,
"loss": 8.6251,
"step": 113600
},
{
"epoch": 0.49,
"learning_rate": 0.003421155254603846,
"loss": 8.6214,
"step": 113800
},
{
"epoch": 0.49,
"learning_rate": 0.0034304964511682147,
"loss": 8.6303,
"step": 114000
},
{
"epoch": 0.49,
"learning_rate": 0.0034398454096926092,
"loss": 8.6369,
"step": 114200
},
{
"epoch": 0.49,
"learning_rate": 0.003449202089167651,
"loss": 8.6236,
"step": 114400
},
{
"epoch": 0.49,
"learning_rate": 0.0034585196077173436,
"loss": 8.6251,
"step": 114600
},
{
"epoch": 0.49,
"learning_rate": 0.003467891567838331,
"loss": 8.6295,
"step": 114800
},
{
"epoch": 0.49,
"learning_rate": 0.003477271125884973,
"loss": 8.6219,
"step": 115000
},
{
"epoch": 0.5,
"learning_rate": 0.0034866582407136653,
"loss": 8.6271,
"step": 115200
},
{
"epoch": 0.5,
"learning_rate": 0.003496052871147656,
"loss": 8.6372,
"step": 115400
},
{
"epoch": 0.5,
"learning_rate": 0.0035054549759772242,
"loss": 8.6238,
"step": 115600
},
{
"epoch": 0.5,
"learning_rate": 0.0035148645139598637,
"loss": 8.6207,
"step": 115800
},
{
"epoch": 0.5,
"learning_rate": 0.0035242814438204637,
"loss": 8.6099,
"step": 116000
},
{
"epoch": 0.5,
"learning_rate": 0.0035337057242514833,
"loss": 8.6142,
"step": 116200
},
{
"epoch": 0.5,
"learning_rate": 0.0035431373139131472,
"loss": 8.6033,
"step": 116400
},
{
"epoch": 0.5,
"learning_rate": 0.0035525761714336104,
"loss": 8.6178,
"step": 116600
},
{
"epoch": 0.5,
"learning_rate": 0.0035619750070819923,
"loss": 8.6138,
"step": 116800
},
{
"epoch": 0.5,
"learning_rate": 0.0035714282402552104,
"loss": 8.6143,
"step": 117000
},
{
"epoch": 0.5,
"learning_rate": 0.0035808886171885554,
"loss": 8.6034,
"step": 117200
},
{
"epoch": 0.5,
"learning_rate": 0.0035903560963839124,
"loss": 8.6156,
"step": 117400
},
{
"epoch": 0.51,
"learning_rate": 0.0035998306363120057,
"loss": 8.6148,
"step": 117600
},
{
"epoch": 0.51,
"learning_rate": 0.0036093121954125906,
"loss": 8.6039,
"step": 117800
},
{
"epoch": 0.51,
"learning_rate": 0.003618800732094636,
"loss": 8.6107,
"step": 118000
},
{
"epoch": 0.51,
"learning_rate": 0.0036282962047364973,
"loss": 8.6094,
"step": 118200
},
{
"epoch": 0.51,
"learning_rate": 0.0036377985716861084,
"loss": 8.616,
"step": 118400
},
{
"epoch": 0.51,
"learning_rate": 0.003647307791261164,
"loss": 8.6135,
"step": 118600
},
{
"epoch": 0.51,
"learning_rate": 0.003656823821749292,
"loss": 8.6062,
"step": 118800
},
{
"epoch": 0.51,
"learning_rate": 0.0036662989906407328,
"loss": 8.6029,
"step": 119000
},
{
"epoch": 0.51,
"learning_rate": 0.0036758284841655496,
"loss": 8.6011,
"step": 119200
},
{
"epoch": 0.51,
"learning_rate": 0.0036853646634968946,
"loss": 8.5993,
"step": 119400
},
{
"epoch": 0.51,
"learning_rate": 0.003694907486804143,
"loss": 8.6029,
"step": 119600
},
{
"epoch": 0.51,
"learning_rate": 0.00370445691222752,
"loss": 8.6018,
"step": 119800
},
{
"epoch": 0.52,
"learning_rate": 0.003714012897878298,
"loss": 8.5978,
"step": 120000
}
],
"max_steps": 1000000,
"num_train_epochs": 5,
"total_flos": 1.9125959786496e+17,
"trial_name": null,
"trial_params": null
}