Bingsu's picture
Training in progress, step 30000
83847fb
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.1289213579716373,
"global_step": 30000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 0.00040004211081201384,
"loss": 8.3496,
"step": 200
},
{
"epoch": 0.0,
"learning_rate": 0.000400168442509171,
"loss": 8.2272,
"step": 400
},
{
"epoch": 0.0,
"learning_rate": 0.000400378992874836,
"loss": 7.6879,
"step": 600
},
{
"epoch": 0.0,
"learning_rate": 0.0004006737582146567,
"loss": 7.4747,
"step": 800
},
{
"epoch": 0.0,
"learning_rate": 0.0004010527333566261,
"loss": 7.2829,
"step": 1000
},
{
"epoch": 0.01,
"learning_rate": 0.0004015159116511832,
"loss": 7.1171,
"step": 1200
},
{
"epoch": 0.01,
"learning_rate": 0.00040206328497132196,
"loss": 6.9445,
"step": 1400
},
{
"epoch": 0.01,
"learning_rate": 0.0004026948437127389,
"loss": 6.8391,
"step": 1600
},
{
"epoch": 0.01,
"learning_rate": 0.0004034105767939909,
"loss": 6.7131,
"step": 1800
},
{
"epoch": 0.01,
"learning_rate": 0.00040421047165670534,
"loss": 6.6113,
"step": 2000
},
{
"epoch": 0.01,
"learning_rate": 0.0004050945142657896,
"loss": 6.4966,
"step": 2200
},
{
"epoch": 0.01,
"learning_rate": 0.0004060626891096795,
"loss": 6.3979,
"step": 2400
},
{
"epoch": 0.01,
"learning_rate": 0.0004071149792006148,
"loss": 6.3116,
"step": 2600
},
{
"epoch": 0.01,
"learning_rate": 0.00040825136607492915,
"loss": 6.2301,
"step": 2800
},
{
"epoch": 0.01,
"learning_rate": 0.0004094718297933883,
"loss": 6.123,
"step": 3000
},
{
"epoch": 0.01,
"learning_rate": 0.0004107763489415231,
"loss": 6.0802,
"step": 3200
},
{
"epoch": 0.01,
"learning_rate": 0.00041216490063001633,
"loss": 6.0029,
"step": 3400
},
{
"epoch": 0.02,
"learning_rate": 0.00041363746049510354,
"loss": 5.9471,
"step": 3600
},
{
"epoch": 0.02,
"learning_rate": 0.0004151940026989945,
"loss": 5.9132,
"step": 3800
},
{
"epoch": 0.02,
"learning_rate": 0.0004168344999303346,
"loss": 5.8561,
"step": 4000
},
{
"epoch": 0.02,
"learning_rate": 0.00041855892340467854,
"loss": 5.8044,
"step": 4200
},
{
"epoch": 0.02,
"learning_rate": 0.0004203672428649916,
"loss": 5.734,
"step": 4400
},
{
"epoch": 0.02,
"learning_rate": 0.0004222594265821944,
"loss": 5.7245,
"step": 4600
},
{
"epoch": 0.02,
"learning_rate": 0.0004242354413557057,
"loss": 5.6867,
"step": 4800
},
{
"epoch": 0.02,
"learning_rate": 0.00042629525251402893,
"loss": 5.6387,
"step": 5000
},
{
"epoch": 0.02,
"learning_rate": 0.0004284388239153662,
"loss": 5.6119,
"step": 5200
},
{
"epoch": 0.02,
"learning_rate": 0.0004306661179482429,
"loss": 5.5533,
"step": 5400
},
{
"epoch": 0.02,
"learning_rate": 0.0004329770955321787,
"loss": 5.517,
"step": 5600
},
{
"epoch": 0.02,
"learning_rate": 0.0004353717161183629,
"loss": 5.4864,
"step": 5800
},
{
"epoch": 0.03,
"learning_rate": 0.0004378499376903721,
"loss": 5.4671,
"step": 6000
},
{
"epoch": 0.03,
"learning_rate": 0.00044041171676490604,
"loss": 5.4412,
"step": 6200
},
{
"epoch": 0.03,
"learning_rate": 0.0004430570083925455,
"loss": 5.4108,
"step": 6400
},
{
"epoch": 0.03,
"learning_rate": 0.0004457857661585539,
"loss": 5.3807,
"step": 6600
},
{
"epoch": 0.03,
"learning_rate": 0.0004485979421836768,
"loss": 5.3353,
"step": 6800
},
{
"epoch": 0.03,
"learning_rate": 0.0004514934871249904,
"loss": 5.3277,
"step": 7000
},
{
"epoch": 0.03,
"learning_rate": 0.00045447235017676696,
"loss": 5.2979,
"step": 7200
},
{
"epoch": 0.03,
"learning_rate": 0.00045753447907136494,
"loss": 5.2791,
"step": 7400
},
{
"epoch": 0.03,
"learning_rate": 0.000460679820080143,
"loss": 5.2494,
"step": 7600
},
{
"epoch": 0.03,
"learning_rate": 0.00046390831801440893,
"loss": 5.2175,
"step": 7800
},
{
"epoch": 0.03,
"learning_rate": 0.0004672199162263843,
"loss": 5.2038,
"step": 8000
},
{
"epoch": 0.04,
"learning_rate": 0.0004706145566101966,
"loss": 5.1835,
"step": 8200
},
{
"epoch": 0.04,
"learning_rate": 0.0004740921796029061,
"loss": 5.1691,
"step": 8400
},
{
"epoch": 0.04,
"learning_rate": 0.0004776527241855382,
"loss": 5.1582,
"step": 8600
},
{
"epoch": 0.04,
"learning_rate": 0.0004812961278841711,
"loss": 5.1504,
"step": 8800
},
{
"epoch": 0.04,
"learning_rate": 0.0004850223267710129,
"loss": 5.1162,
"step": 9000
},
{
"epoch": 0.04,
"learning_rate": 0.0004888312554655432,
"loss": 5.0957,
"step": 9200
},
{
"epoch": 0.04,
"learning_rate": 0.0004927228471356421,
"loss": 5.079,
"step": 9400
},
{
"epoch": 0.04,
"learning_rate": 0.0004966970334987757,
"loss": 5.0572,
"step": 9600
},
{
"epoch": 0.04,
"learning_rate": 0.0005007537448231871,
"loss": 5.0342,
"step": 9800
},
{
"epoch": 0.04,
"learning_rate": 0.0005048929099291249,
"loss": 5.0106,
"step": 10000
},
{
"epoch": 0.04,
"learning_rate": 0.0005091144561900837,
"loss": 5.0155,
"step": 10200
},
{
"epoch": 0.04,
"learning_rate": 0.0005134183095340927,
"loss": 4.9817,
"step": 10400
},
{
"epoch": 0.05,
"learning_rate": 0.0005178043944449977,
"loss": 4.9742,
"step": 10600
},
{
"epoch": 0.05,
"learning_rate": 0.0005222726339638023,
"loss": 4.9299,
"step": 10800
},
{
"epoch": 0.05,
"learning_rate": 0.0005268229496900086,
"loss": 4.9208,
"step": 11000
},
{
"epoch": 0.05,
"learning_rate": 0.0005314552617829947,
"loss": 4.8617,
"step": 11200
},
{
"epoch": 0.05,
"learning_rate": 0.0005361694889634196,
"loss": 4.7952,
"step": 11400
},
{
"epoch": 0.05,
"learning_rate": 0.0005409655485146408,
"loss": 4.7641,
"step": 11600
},
{
"epoch": 0.05,
"learning_rate": 0.0005458433562841782,
"loss": 4.7361,
"step": 11800
},
{
"epoch": 0.05,
"learning_rate": 0.0005508028266851747,
"loss": 4.7023,
"step": 12000
},
{
"epoch": 0.05,
"learning_rate": 0.000555843872697916,
"loss": 4.6561,
"step": 12200
},
{
"epoch": 0.05,
"learning_rate": 0.0005609664058713396,
"loss": 4.63,
"step": 12400
},
{
"epoch": 0.05,
"learning_rate": 0.0005661703363245996,
"loss": 4.6307,
"step": 12600
},
{
"epoch": 0.06,
"learning_rate": 0.0005714555727486404,
"loss": 4.5881,
"step": 12800
},
{
"epoch": 0.06,
"learning_rate": 0.0005768220224077955,
"loss": 4.5489,
"step": 13000
},
{
"epoch": 0.06,
"learning_rate": 0.0005822695911414169,
"loss": 4.5521,
"step": 13200
},
{
"epoch": 0.06,
"learning_rate": 0.0005877981833655298,
"loss": 4.5165,
"step": 13400
},
{
"epoch": 0.06,
"learning_rate": 0.0005934077020745051,
"loss": 4.505,
"step": 13600
},
{
"epoch": 0.06,
"learning_rate": 0.0005990980488427659,
"loss": 4.4863,
"step": 13800
},
{
"epoch": 0.06,
"learning_rate": 0.000604869123826509,
"loss": 4.5071,
"step": 14000
},
{
"epoch": 0.06,
"learning_rate": 0.0006107208257654633,
"loss": 4.4501,
"step": 14200
},
{
"epoch": 0.06,
"learning_rate": 0.0006166530519846631,
"loss": 4.4623,
"step": 14400
},
{
"epoch": 0.06,
"learning_rate": 0.0006226656983962468,
"loss": 4.4336,
"step": 14600
},
{
"epoch": 0.06,
"learning_rate": 0.0006287586595012887,
"loss": 4.4335,
"step": 14800
},
{
"epoch": 0.06,
"learning_rate": 0.000634931828391647,
"loss": 4.4142,
"step": 15000
},
{
"epoch": 0.07,
"learning_rate": 0.0006411850967518416,
"loss": 4.4145,
"step": 15200
},
{
"epoch": 0.07,
"learning_rate": 0.0006475183548609511,
"loss": 4.3842,
"step": 15400
},
{
"epoch": 0.07,
"learning_rate": 0.0006539314915945428,
"loss": 4.3748,
"step": 15600
},
{
"epoch": 0.07,
"learning_rate": 0.0006604243944266178,
"loss": 4.3815,
"step": 15800
},
{
"epoch": 0.07,
"learning_rate": 0.0006669969494315867,
"loss": 4.352,
"step": 16000
},
{
"epoch": 0.07,
"learning_rate": 0.0006736490412862749,
"loss": 4.3575,
"step": 16200
},
{
"epoch": 0.07,
"learning_rate": 0.000680380553271933,
"loss": 4.3416,
"step": 16400
},
{
"epoch": 0.07,
"learning_rate": 0.0006871913672762998,
"loss": 4.341,
"step": 16600
},
{
"epoch": 0.07,
"learning_rate": 0.0006940813637956594,
"loss": 4.3183,
"step": 16800
},
{
"epoch": 0.07,
"learning_rate": 0.0007010504219369541,
"loss": 4.3145,
"step": 17000
},
{
"epoch": 0.07,
"learning_rate": 0.0007080984194198885,
"loss": 4.3065,
"step": 17200
},
{
"epoch": 0.07,
"learning_rate": 0.0007152252325790948,
"loss": 4.2805,
"step": 17400
},
{
"epoch": 0.08,
"learning_rate": 0.0007224307363662818,
"loss": 4.2804,
"step": 17600
},
{
"epoch": 0.08,
"learning_rate": 0.0007297148043524434,
"loss": 4.2996,
"step": 17800
},
{
"epoch": 0.08,
"learning_rate": 0.0007370773087300737,
"loss": 4.2743,
"step": 18000
},
{
"epoch": 0.08,
"learning_rate": 0.0007445181203154048,
"loss": 4.2621,
"step": 18200
},
{
"epoch": 0.08,
"learning_rate": 0.0007520371085506811,
"loss": 4.2548,
"step": 18400
},
{
"epoch": 0.08,
"learning_rate": 0.0007596341415064441,
"loss": 4.2643,
"step": 18600
},
{
"epoch": 0.08,
"learning_rate": 0.0007673090858838494,
"loss": 4.266,
"step": 18800
},
{
"epoch": 0.08,
"learning_rate": 0.0007750618070170041,
"loss": 4.2503,
"step": 19000
},
{
"epoch": 0.08,
"learning_rate": 0.0007828921688753324,
"loss": 4.2093,
"step": 19200
},
{
"epoch": 0.08,
"learning_rate": 0.0007908000340659631,
"loss": 4.2449,
"step": 19400
},
{
"epoch": 0.08,
"learning_rate": 0.0007987852638361333,
"loss": 4.2158,
"step": 19600
},
{
"epoch": 0.09,
"learning_rate": 0.0008068477180756314,
"loss": 4.202,
"step": 19800
},
{
"epoch": 0.09,
"learning_rate": 0.0008149872553192515,
"loss": 4.2065,
"step": 20000
},
{
"epoch": 0.09,
"learning_rate": 0.0008232037327492777,
"loss": 4.1773,
"step": 20200
},
{
"epoch": 0.09,
"learning_rate": 0.0008314970061979818,
"loss": 4.1904,
"step": 20400
},
{
"epoch": 0.09,
"learning_rate": 0.0008398669301501703,
"loss": 4.1868,
"step": 20600
},
{
"epoch": 0.09,
"learning_rate": 0.0008483133577457148,
"loss": 4.2006,
"step": 20800
},
{
"epoch": 0.09,
"learning_rate": 0.0008568361407821495,
"loss": 4.1467,
"step": 21000
},
{
"epoch": 0.09,
"learning_rate": 0.0008654351297172607,
"loss": 4.1585,
"step": 21200
},
{
"epoch": 0.09,
"learning_rate": 0.0008741101736717116,
"loss": 4.1547,
"step": 21400
},
{
"epoch": 0.09,
"learning_rate": 0.0008828611204316911,
"loss": 4.1557,
"step": 21600
},
{
"epoch": 0.09,
"learning_rate": 0.0008916878164515838,
"loss": 4.1496,
"step": 21800
},
{
"epoch": 0.09,
"learning_rate": 0.0009005901068566691,
"loss": 4.1434,
"step": 22000
},
{
"epoch": 0.1,
"learning_rate": 0.0009095678354458306,
"loss": 4.1173,
"step": 22200
},
{
"epoch": 0.1,
"learning_rate": 0.0009186208446943008,
"loss": 4.1364,
"step": 22400
},
{
"epoch": 0.1,
"learning_rate": 0.0009277489757564244,
"loss": 4.1445,
"step": 22600
},
{
"epoch": 0.1,
"learning_rate": 0.0009369520684684475,
"loss": 4.1156,
"step": 22800
},
{
"epoch": 0.1,
"learning_rate": 0.0009462299613513248,
"loss": 4.1033,
"step": 23000
},
{
"epoch": 0.1,
"learning_rate": 0.0009555824916135536,
"loss": 4.1187,
"step": 23200
},
{
"epoch": 0.1,
"learning_rate": 0.0009650094951540386,
"loss": 4.0823,
"step": 23400
},
{
"epoch": 0.1,
"learning_rate": 0.0009745108065649499,
"loss": 4.0624,
"step": 23600
},
{
"epoch": 0.1,
"learning_rate": 0.0009840862591346498,
"loss": 4.0845,
"step": 23800
},
{
"epoch": 0.1,
"learning_rate": 0.0009937356848506058,
"loss": 4.0483,
"step": 24000
},
{
"epoch": 0.1,
"learning_rate": 0.001003458914402332,
"loss": 4.0512,
"step": 24200
},
{
"epoch": 0.1,
"learning_rate": 0.0010132557771843787,
"loss": 4.0606,
"step": 24400
},
{
"epoch": 0.11,
"learning_rate": 0.0010231261012993067,
"loss": 4.046,
"step": 24600
},
{
"epoch": 0.11,
"learning_rate": 0.0010330697135607168,
"loss": 4.0315,
"step": 24800
},
{
"epoch": 0.11,
"learning_rate": 0.00104308643949628,
"loss": 4.0179,
"step": 25000
},
{
"epoch": 0.11,
"learning_rate": 0.001053176103350803,
"loss": 4.0351,
"step": 25200
},
{
"epoch": 0.11,
"learning_rate": 0.0010633385280893123,
"loss": 4.02,
"step": 25400
},
{
"epoch": 0.11,
"learning_rate": 0.0010735735354001595,
"loss": 4.0201,
"step": 25600
},
{
"epoch": 0.11,
"learning_rate": 0.0010838809456981471,
"loss": 4.0044,
"step": 25800
},
{
"epoch": 0.11,
"learning_rate": 0.001094260578127686,
"loss": 3.9914,
"step": 26000
},
{
"epoch": 0.11,
"learning_rate": 0.0011047122505659646,
"loss": 3.9991,
"step": 26200
},
{
"epoch": 0.11,
"learning_rate": 0.0011152357796261423,
"loss": 4.0109,
"step": 26400
},
{
"epoch": 0.11,
"learning_rate": 0.0011258309806605731,
"loss": 4.0405,
"step": 26600
},
{
"epoch": 0.12,
"learning_rate": 0.0011364976677640387,
"loss": 4.0349,
"step": 26800
},
{
"epoch": 0.12,
"learning_rate": 0.0011472356537770186,
"loss": 4.0312,
"step": 27000
},
{
"epoch": 0.12,
"learning_rate": 0.0011580447502889633,
"loss": 4.0185,
"step": 27200
},
{
"epoch": 0.12,
"learning_rate": 0.0011689247676416152,
"loss": 4.011,
"step": 27400
},
{
"epoch": 0.12,
"learning_rate": 0.0011798755149323176,
"loss": 3.9898,
"step": 27600
},
{
"epoch": 0.12,
"learning_rate": 0.001190896800017379,
"loss": 3.981,
"step": 27800
},
{
"epoch": 0.12,
"learning_rate": 0.0012019884295154416,
"loss": 3.949,
"step": 28000
},
{
"epoch": 0.12,
"learning_rate": 0.0012131502088108658,
"loss": 3.9896,
"step": 28200
},
{
"epoch": 0.12,
"learning_rate": 0.0012243819420571598,
"loss": 3.9951,
"step": 28400
},
{
"epoch": 0.12,
"learning_rate": 0.0012356834321804039,
"loss": 4.0361,
"step": 28600
},
{
"epoch": 0.12,
"learning_rate": 0.0012470544808827113,
"loss": 4.1212,
"step": 28800
},
{
"epoch": 0.12,
"learning_rate": 0.001258494888645708,
"loss": 4.0721,
"step": 29000
},
{
"epoch": 0.13,
"learning_rate": 0.0012700044547340368,
"loss": 4.0311,
"step": 29200
},
{
"epoch": 0.13,
"learning_rate": 0.0012815829771988738,
"loss": 4.0114,
"step": 29400
},
{
"epoch": 0.13,
"learning_rate": 0.001293230252881479,
"loss": 3.9868,
"step": 29600
},
{
"epoch": 0.13,
"learning_rate": 0.0013049460774167514,
"loss": 3.9881,
"step": 29800
},
{
"epoch": 0.13,
"learning_rate": 0.0013167302452368242,
"loss": 3.9705,
"step": 30000
}
],
"max_steps": 500000,
"num_train_epochs": 3,
"total_flos": 4.781489946624e+16,
"trial_name": null,
"trial_params": null
}