Bingsu's picture
Training in progress, step 20000
35deea0
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.08594757198109153,
"global_step": 20000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 0.00040004211081201384,
"loss": 8.3496,
"step": 200
},
{
"epoch": 0.0,
"learning_rate": 0.000400168442509171,
"loss": 8.2272,
"step": 400
},
{
"epoch": 0.0,
"learning_rate": 0.000400378992874836,
"loss": 7.6879,
"step": 600
},
{
"epoch": 0.0,
"learning_rate": 0.0004006737582146567,
"loss": 7.4747,
"step": 800
},
{
"epoch": 0.0,
"learning_rate": 0.0004010527333566261,
"loss": 7.2829,
"step": 1000
},
{
"epoch": 0.01,
"learning_rate": 0.0004015159116511832,
"loss": 7.1171,
"step": 1200
},
{
"epoch": 0.01,
"learning_rate": 0.00040206328497132196,
"loss": 6.9445,
"step": 1400
},
{
"epoch": 0.01,
"learning_rate": 0.0004026948437127389,
"loss": 6.8391,
"step": 1600
},
{
"epoch": 0.01,
"learning_rate": 0.0004034105767939909,
"loss": 6.7131,
"step": 1800
},
{
"epoch": 0.01,
"learning_rate": 0.00040421047165670534,
"loss": 6.6113,
"step": 2000
},
{
"epoch": 0.01,
"learning_rate": 0.0004050945142657896,
"loss": 6.4966,
"step": 2200
},
{
"epoch": 0.01,
"learning_rate": 0.0004060626891096795,
"loss": 6.3979,
"step": 2400
},
{
"epoch": 0.01,
"learning_rate": 0.0004071149792006148,
"loss": 6.3116,
"step": 2600
},
{
"epoch": 0.01,
"learning_rate": 0.00040825136607492915,
"loss": 6.2301,
"step": 2800
},
{
"epoch": 0.01,
"learning_rate": 0.0004094718297933883,
"loss": 6.123,
"step": 3000
},
{
"epoch": 0.01,
"learning_rate": 0.0004107763489415231,
"loss": 6.0802,
"step": 3200
},
{
"epoch": 0.01,
"learning_rate": 0.00041216490063001633,
"loss": 6.0029,
"step": 3400
},
{
"epoch": 0.02,
"learning_rate": 0.00041363746049510354,
"loss": 5.9471,
"step": 3600
},
{
"epoch": 0.02,
"learning_rate": 0.0004151940026989945,
"loss": 5.9132,
"step": 3800
},
{
"epoch": 0.02,
"learning_rate": 0.0004168344999303346,
"loss": 5.8561,
"step": 4000
},
{
"epoch": 0.02,
"learning_rate": 0.00041855892340467854,
"loss": 5.8044,
"step": 4200
},
{
"epoch": 0.02,
"learning_rate": 0.0004203672428649916,
"loss": 5.734,
"step": 4400
},
{
"epoch": 0.02,
"learning_rate": 0.0004222594265821944,
"loss": 5.7245,
"step": 4600
},
{
"epoch": 0.02,
"learning_rate": 0.0004242354413557057,
"loss": 5.6867,
"step": 4800
},
{
"epoch": 0.02,
"learning_rate": 0.00042629525251402893,
"loss": 5.6387,
"step": 5000
},
{
"epoch": 0.02,
"learning_rate": 0.0004284388239153662,
"loss": 5.6119,
"step": 5200
},
{
"epoch": 0.02,
"learning_rate": 0.0004306661179482429,
"loss": 5.5533,
"step": 5400
},
{
"epoch": 0.02,
"learning_rate": 0.0004329770955321787,
"loss": 5.517,
"step": 5600
},
{
"epoch": 0.02,
"learning_rate": 0.0004353717161183629,
"loss": 5.4864,
"step": 5800
},
{
"epoch": 0.03,
"learning_rate": 0.0004378499376903721,
"loss": 5.4671,
"step": 6000
},
{
"epoch": 0.03,
"learning_rate": 0.00044041171676490604,
"loss": 5.4412,
"step": 6200
},
{
"epoch": 0.03,
"learning_rate": 0.0004430570083925455,
"loss": 5.4108,
"step": 6400
},
{
"epoch": 0.03,
"learning_rate": 0.0004457857661585539,
"loss": 5.3807,
"step": 6600
},
{
"epoch": 0.03,
"learning_rate": 0.0004485979421836768,
"loss": 5.3353,
"step": 6800
},
{
"epoch": 0.03,
"learning_rate": 0.0004514934871249904,
"loss": 5.3277,
"step": 7000
},
{
"epoch": 0.03,
"learning_rate": 0.00045447235017676696,
"loss": 5.2979,
"step": 7200
},
{
"epoch": 0.03,
"learning_rate": 0.00045753447907136494,
"loss": 5.2791,
"step": 7400
},
{
"epoch": 0.03,
"learning_rate": 0.000460679820080143,
"loss": 5.2494,
"step": 7600
},
{
"epoch": 0.03,
"learning_rate": 0.00046390831801440893,
"loss": 5.2175,
"step": 7800
},
{
"epoch": 0.03,
"learning_rate": 0.0004672199162263843,
"loss": 5.2038,
"step": 8000
},
{
"epoch": 0.04,
"learning_rate": 0.0004706145566101966,
"loss": 5.1835,
"step": 8200
},
{
"epoch": 0.04,
"learning_rate": 0.0004740921796029061,
"loss": 5.1691,
"step": 8400
},
{
"epoch": 0.04,
"learning_rate": 0.0004776527241855382,
"loss": 5.1582,
"step": 8600
},
{
"epoch": 0.04,
"learning_rate": 0.0004812961278841711,
"loss": 5.1504,
"step": 8800
},
{
"epoch": 0.04,
"learning_rate": 0.0004850223267710129,
"loss": 5.1162,
"step": 9000
},
{
"epoch": 0.04,
"learning_rate": 0.0004888312554655432,
"loss": 5.0957,
"step": 9200
},
{
"epoch": 0.04,
"learning_rate": 0.0004927228471356421,
"loss": 5.079,
"step": 9400
},
{
"epoch": 0.04,
"learning_rate": 0.0004966970334987757,
"loss": 5.0572,
"step": 9600
},
{
"epoch": 0.04,
"learning_rate": 0.0005007537448231871,
"loss": 5.0342,
"step": 9800
},
{
"epoch": 0.04,
"learning_rate": 0.0005048929099291249,
"loss": 5.0106,
"step": 10000
},
{
"epoch": 0.04,
"learning_rate": 0.0005091144561900837,
"loss": 5.0155,
"step": 10200
},
{
"epoch": 0.04,
"learning_rate": 0.0005134183095340927,
"loss": 4.9817,
"step": 10400
},
{
"epoch": 0.05,
"learning_rate": 0.0005178043944449977,
"loss": 4.9742,
"step": 10600
},
{
"epoch": 0.05,
"learning_rate": 0.0005222726339638023,
"loss": 4.9299,
"step": 10800
},
{
"epoch": 0.05,
"learning_rate": 0.0005268229496900086,
"loss": 4.9208,
"step": 11000
},
{
"epoch": 0.05,
"learning_rate": 0.0005314552617829947,
"loss": 4.8617,
"step": 11200
},
{
"epoch": 0.05,
"learning_rate": 0.0005361694889634196,
"loss": 4.7952,
"step": 11400
},
{
"epoch": 0.05,
"learning_rate": 0.0005409655485146408,
"loss": 4.7641,
"step": 11600
},
{
"epoch": 0.05,
"learning_rate": 0.0005458433562841782,
"loss": 4.7361,
"step": 11800
},
{
"epoch": 0.05,
"learning_rate": 0.0005508028266851747,
"loss": 4.7023,
"step": 12000
},
{
"epoch": 0.05,
"learning_rate": 0.000555843872697916,
"loss": 4.6561,
"step": 12200
},
{
"epoch": 0.05,
"learning_rate": 0.0005609664058713396,
"loss": 4.63,
"step": 12400
},
{
"epoch": 0.05,
"learning_rate": 0.0005661703363245996,
"loss": 4.6307,
"step": 12600
},
{
"epoch": 0.06,
"learning_rate": 0.0005714555727486404,
"loss": 4.5881,
"step": 12800
},
{
"epoch": 0.06,
"learning_rate": 0.0005768220224077955,
"loss": 4.5489,
"step": 13000
},
{
"epoch": 0.06,
"learning_rate": 0.0005822695911414169,
"loss": 4.5521,
"step": 13200
},
{
"epoch": 0.06,
"learning_rate": 0.0005877981833655298,
"loss": 4.5165,
"step": 13400
},
{
"epoch": 0.06,
"learning_rate": 0.0005934077020745051,
"loss": 4.505,
"step": 13600
},
{
"epoch": 0.06,
"learning_rate": 0.0005990980488427659,
"loss": 4.4863,
"step": 13800
},
{
"epoch": 0.06,
"learning_rate": 0.000604869123826509,
"loss": 4.5071,
"step": 14000
},
{
"epoch": 0.06,
"learning_rate": 0.0006107208257654633,
"loss": 4.4501,
"step": 14200
},
{
"epoch": 0.06,
"learning_rate": 0.0006166530519846631,
"loss": 4.4623,
"step": 14400
},
{
"epoch": 0.06,
"learning_rate": 0.0006226656983962468,
"loss": 4.4336,
"step": 14600
},
{
"epoch": 0.06,
"learning_rate": 0.0006287586595012887,
"loss": 4.4335,
"step": 14800
},
{
"epoch": 0.06,
"learning_rate": 0.000634931828391647,
"loss": 4.4142,
"step": 15000
},
{
"epoch": 0.07,
"learning_rate": 0.0006411850967518416,
"loss": 4.4145,
"step": 15200
},
{
"epoch": 0.07,
"learning_rate": 0.0006475183548609511,
"loss": 4.3842,
"step": 15400
},
{
"epoch": 0.07,
"learning_rate": 0.0006539314915945428,
"loss": 4.3748,
"step": 15600
},
{
"epoch": 0.07,
"learning_rate": 0.0006604243944266178,
"loss": 4.3815,
"step": 15800
},
{
"epoch": 0.07,
"learning_rate": 0.0006669969494315867,
"loss": 4.352,
"step": 16000
},
{
"epoch": 0.07,
"learning_rate": 0.0006736490412862749,
"loss": 4.3575,
"step": 16200
},
{
"epoch": 0.07,
"learning_rate": 0.000680380553271933,
"loss": 4.3416,
"step": 16400
},
{
"epoch": 0.07,
"learning_rate": 0.0006871913672762998,
"loss": 4.341,
"step": 16600
},
{
"epoch": 0.07,
"learning_rate": 0.0006940813637956594,
"loss": 4.3183,
"step": 16800
},
{
"epoch": 0.07,
"learning_rate": 0.0007010504219369541,
"loss": 4.3145,
"step": 17000
},
{
"epoch": 0.07,
"learning_rate": 0.0007080984194198885,
"loss": 4.3065,
"step": 17200
},
{
"epoch": 0.07,
"learning_rate": 0.0007152252325790948,
"loss": 4.2805,
"step": 17400
},
{
"epoch": 0.08,
"learning_rate": 0.0007224307363662818,
"loss": 4.2804,
"step": 17600
},
{
"epoch": 0.08,
"learning_rate": 0.0007297148043524434,
"loss": 4.2996,
"step": 17800
},
{
"epoch": 0.08,
"learning_rate": 0.0007370773087300737,
"loss": 4.2743,
"step": 18000
},
{
"epoch": 0.08,
"learning_rate": 0.0007445181203154048,
"loss": 4.2621,
"step": 18200
},
{
"epoch": 0.08,
"learning_rate": 0.0007520371085506811,
"loss": 4.2548,
"step": 18400
},
{
"epoch": 0.08,
"learning_rate": 0.0007596341415064441,
"loss": 4.2643,
"step": 18600
},
{
"epoch": 0.08,
"learning_rate": 0.0007673090858838494,
"loss": 4.266,
"step": 18800
},
{
"epoch": 0.08,
"learning_rate": 0.0007750618070170041,
"loss": 4.2503,
"step": 19000
},
{
"epoch": 0.08,
"learning_rate": 0.0007828921688753324,
"loss": 4.2093,
"step": 19200
},
{
"epoch": 0.08,
"learning_rate": 0.0007908000340659631,
"loss": 4.2449,
"step": 19400
},
{
"epoch": 0.08,
"learning_rate": 0.0007987852638361333,
"loss": 4.2158,
"step": 19600
},
{
"epoch": 0.09,
"learning_rate": 0.0008068477180756314,
"loss": 4.202,
"step": 19800
},
{
"epoch": 0.09,
"learning_rate": 0.0008149872553192515,
"loss": 4.2065,
"step": 20000
}
],
"max_steps": 500000,
"num_train_epochs": 3,
"total_flos": 3.187659964416e+16,
"trial_name": null,
"trial_params": null
}