rizkyjun's picture
Training in progress, step 4100, checkpoint
46671c8
{
"best_metric": 1.9820196628570557,
"best_model_checkpoint": "./outputs/checkpoint-4100",
"epoch": 2.987249544626594,
"eval_steps": 100,
"global_step": 4100,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.07,
"learning_rate": 0.0002,
"loss": 2.7398,
"step": 100
},
{
"epoch": 0.07,
"eval_loss": 2.641923666000366,
"eval_runtime": 206.3681,
"eval_samples_per_second": 30.402,
"eval_steps_per_second": 3.804,
"step": 100
},
{
"epoch": 0.15,
"learning_rate": 0.0002,
"loss": 2.6046,
"step": 200
},
{
"epoch": 0.15,
"eval_loss": 2.592717409133911,
"eval_runtime": 206.4891,
"eval_samples_per_second": 30.384,
"eval_steps_per_second": 3.802,
"step": 200
},
{
"epoch": 0.22,
"learning_rate": 0.0002,
"loss": 2.5628,
"step": 300
},
{
"epoch": 0.22,
"eval_loss": 2.5575265884399414,
"eval_runtime": 206.1916,
"eval_samples_per_second": 30.428,
"eval_steps_per_second": 3.807,
"step": 300
},
{
"epoch": 0.29,
"learning_rate": 0.0002,
"loss": 2.5372,
"step": 400
},
{
"epoch": 0.29,
"eval_loss": 2.525071859359741,
"eval_runtime": 205.8756,
"eval_samples_per_second": 30.475,
"eval_steps_per_second": 3.813,
"step": 400
},
{
"epoch": 0.36,
"learning_rate": 0.0002,
"loss": 2.4945,
"step": 500
},
{
"epoch": 0.36,
"eval_loss": 2.4999899864196777,
"eval_runtime": 206.061,
"eval_samples_per_second": 30.447,
"eval_steps_per_second": 3.81,
"step": 500
},
{
"epoch": 0.44,
"learning_rate": 0.0002,
"loss": 2.4681,
"step": 600
},
{
"epoch": 0.44,
"eval_loss": 2.474062919616699,
"eval_runtime": 206.3754,
"eval_samples_per_second": 30.401,
"eval_steps_per_second": 3.804,
"step": 600
},
{
"epoch": 0.51,
"learning_rate": 0.0002,
"loss": 2.4511,
"step": 700
},
{
"epoch": 0.51,
"eval_loss": 2.4490628242492676,
"eval_runtime": 205.613,
"eval_samples_per_second": 30.514,
"eval_steps_per_second": 3.818,
"step": 700
},
{
"epoch": 0.58,
"learning_rate": 0.0002,
"loss": 2.428,
"step": 800
},
{
"epoch": 0.58,
"eval_loss": 2.4292285442352295,
"eval_runtime": 206.1663,
"eval_samples_per_second": 30.432,
"eval_steps_per_second": 3.808,
"step": 800
},
{
"epoch": 0.66,
"learning_rate": 0.0002,
"loss": 2.3917,
"step": 900
},
{
"epoch": 0.66,
"eval_loss": 2.4080374240875244,
"eval_runtime": 206.1066,
"eval_samples_per_second": 30.441,
"eval_steps_per_second": 3.809,
"step": 900
},
{
"epoch": 0.73,
"learning_rate": 0.0002,
"loss": 2.3881,
"step": 1000
},
{
"epoch": 0.73,
"eval_loss": 2.3876078128814697,
"eval_runtime": 205.7165,
"eval_samples_per_second": 30.498,
"eval_steps_per_second": 3.816,
"step": 1000
},
{
"epoch": 0.8,
"learning_rate": 0.0002,
"loss": 2.3815,
"step": 1100
},
{
"epoch": 0.8,
"eval_loss": 2.3660354614257812,
"eval_runtime": 206.3111,
"eval_samples_per_second": 30.41,
"eval_steps_per_second": 3.805,
"step": 1100
},
{
"epoch": 0.87,
"learning_rate": 0.0002,
"loss": 2.3383,
"step": 1200
},
{
"epoch": 0.87,
"eval_loss": 2.3476908206939697,
"eval_runtime": 206.414,
"eval_samples_per_second": 30.395,
"eval_steps_per_second": 3.803,
"step": 1200
},
{
"epoch": 0.95,
"learning_rate": 0.0002,
"loss": 2.3498,
"step": 1300
},
{
"epoch": 0.95,
"eval_loss": 2.3309385776519775,
"eval_runtime": 206.425,
"eval_samples_per_second": 30.394,
"eval_steps_per_second": 3.803,
"step": 1300
},
{
"epoch": 1.02,
"learning_rate": 0.0002,
"loss": 2.2994,
"step": 1400
},
{
"epoch": 1.02,
"eval_loss": 2.3077964782714844,
"eval_runtime": 206.5558,
"eval_samples_per_second": 30.374,
"eval_steps_per_second": 3.8,
"step": 1400
},
{
"epoch": 1.09,
"learning_rate": 0.0002,
"loss": 2.2611,
"step": 1500
},
{
"epoch": 1.09,
"eval_loss": 2.290616989135742,
"eval_runtime": 206.2977,
"eval_samples_per_second": 30.412,
"eval_steps_per_second": 3.805,
"step": 1500
},
{
"epoch": 1.17,
"learning_rate": 0.0002,
"loss": 2.2529,
"step": 1600
},
{
"epoch": 1.17,
"eval_loss": 2.2742502689361572,
"eval_runtime": 206.4691,
"eval_samples_per_second": 30.387,
"eval_steps_per_second": 3.802,
"step": 1600
},
{
"epoch": 1.24,
"learning_rate": 0.0002,
"loss": 2.2533,
"step": 1700
},
{
"epoch": 1.24,
"eval_loss": 2.2574470043182373,
"eval_runtime": 205.8893,
"eval_samples_per_second": 30.473,
"eval_steps_per_second": 3.813,
"step": 1700
},
{
"epoch": 1.31,
"learning_rate": 0.0002,
"loss": 2.214,
"step": 1800
},
{
"epoch": 1.31,
"eval_loss": 2.241929769515991,
"eval_runtime": 205.7921,
"eval_samples_per_second": 30.487,
"eval_steps_per_second": 3.815,
"step": 1800
},
{
"epoch": 1.38,
"learning_rate": 0.0002,
"loss": 2.199,
"step": 1900
},
{
"epoch": 1.38,
"eval_loss": 2.2262773513793945,
"eval_runtime": 206.0779,
"eval_samples_per_second": 30.445,
"eval_steps_per_second": 3.809,
"step": 1900
},
{
"epoch": 1.46,
"learning_rate": 0.0002,
"loss": 2.1938,
"step": 2000
},
{
"epoch": 1.46,
"eval_loss": 2.2117421627044678,
"eval_runtime": 206.4818,
"eval_samples_per_second": 30.385,
"eval_steps_per_second": 3.802,
"step": 2000
},
{
"epoch": 1.53,
"learning_rate": 0.0002,
"loss": 2.1761,
"step": 2100
},
{
"epoch": 1.53,
"eval_loss": 2.1993796825408936,
"eval_runtime": 206.3625,
"eval_samples_per_second": 30.403,
"eval_steps_per_second": 3.804,
"step": 2100
},
{
"epoch": 1.6,
"learning_rate": 0.0002,
"loss": 2.159,
"step": 2200
},
{
"epoch": 1.6,
"eval_loss": 2.1830873489379883,
"eval_runtime": 206.5502,
"eval_samples_per_second": 30.375,
"eval_steps_per_second": 3.801,
"step": 2200
},
{
"epoch": 1.68,
"learning_rate": 0.0002,
"loss": 2.1497,
"step": 2300
},
{
"epoch": 1.68,
"eval_loss": 2.1709437370300293,
"eval_runtime": 206.5171,
"eval_samples_per_second": 30.38,
"eval_steps_per_second": 3.801,
"step": 2300
},
{
"epoch": 1.75,
"learning_rate": 0.0002,
"loss": 2.1548,
"step": 2400
},
{
"epoch": 1.75,
"eval_loss": 2.1570258140563965,
"eval_runtime": 206.4592,
"eval_samples_per_second": 30.389,
"eval_steps_per_second": 3.802,
"step": 2400
},
{
"epoch": 1.82,
"learning_rate": 0.0002,
"loss": 2.1218,
"step": 2500
},
{
"epoch": 1.82,
"eval_loss": 2.1442573070526123,
"eval_runtime": 206.6982,
"eval_samples_per_second": 30.353,
"eval_steps_per_second": 3.798,
"step": 2500
},
{
"epoch": 1.89,
"learning_rate": 0.0002,
"loss": 2.1246,
"step": 2600
},
{
"epoch": 1.89,
"eval_loss": 2.1329336166381836,
"eval_runtime": 206.0093,
"eval_samples_per_second": 30.455,
"eval_steps_per_second": 3.811,
"step": 2600
},
{
"epoch": 1.97,
"learning_rate": 0.0002,
"loss": 2.1067,
"step": 2700
},
{
"epoch": 1.97,
"eval_loss": 2.117797374725342,
"eval_runtime": 206.8081,
"eval_samples_per_second": 30.337,
"eval_steps_per_second": 3.796,
"step": 2700
},
{
"epoch": 2.04,
"learning_rate": 0.0002,
"loss": 2.0643,
"step": 2800
},
{
"epoch": 2.04,
"eval_loss": 2.111072540283203,
"eval_runtime": 312.0262,
"eval_samples_per_second": 20.107,
"eval_steps_per_second": 2.516,
"step": 2800
},
{
"epoch": 2.11,
"learning_rate": 0.0002,
"loss": 2.0379,
"step": 2900
},
{
"epoch": 2.11,
"eval_loss": 2.097459554672241,
"eval_runtime": 206.2193,
"eval_samples_per_second": 30.424,
"eval_steps_per_second": 3.807,
"step": 2900
},
{
"epoch": 2.19,
"learning_rate": 0.0002,
"loss": 2.0567,
"step": 3000
},
{
"epoch": 2.19,
"eval_loss": 2.0867855548858643,
"eval_runtime": 206.4732,
"eval_samples_per_second": 30.387,
"eval_steps_per_second": 3.802,
"step": 3000
},
{
"epoch": 2.26,
"learning_rate": 0.0002,
"loss": 2.0243,
"step": 3100
},
{
"epoch": 2.26,
"eval_loss": 2.0787196159362793,
"eval_runtime": 206.3044,
"eval_samples_per_second": 30.411,
"eval_steps_per_second": 3.805,
"step": 3100
},
{
"epoch": 2.33,
"learning_rate": 0.0002,
"loss": 2.0407,
"step": 3200
},
{
"epoch": 2.33,
"eval_loss": 2.066490888595581,
"eval_runtime": 206.5529,
"eval_samples_per_second": 30.375,
"eval_steps_per_second": 3.8,
"step": 3200
},
{
"epoch": 2.4,
"learning_rate": 0.0002,
"loss": 2.009,
"step": 3300
},
{
"epoch": 2.4,
"eval_loss": 2.0566089153289795,
"eval_runtime": 205.9821,
"eval_samples_per_second": 30.459,
"eval_steps_per_second": 3.811,
"step": 3300
},
{
"epoch": 2.48,
"learning_rate": 0.0002,
"loss": 2.002,
"step": 3400
},
{
"epoch": 2.48,
"eval_loss": 2.0491139888763428,
"eval_runtime": 206.3719,
"eval_samples_per_second": 30.401,
"eval_steps_per_second": 3.804,
"step": 3400
},
{
"epoch": 2.55,
"learning_rate": 0.0002,
"loss": 2.0002,
"step": 3500
},
{
"epoch": 2.55,
"eval_loss": 2.0374956130981445,
"eval_runtime": 206.2009,
"eval_samples_per_second": 30.427,
"eval_steps_per_second": 3.807,
"step": 3500
},
{
"epoch": 2.62,
"learning_rate": 0.0002,
"loss": 1.9945,
"step": 3600
},
{
"epoch": 2.62,
"eval_loss": 2.027383804321289,
"eval_runtime": 206.0279,
"eval_samples_per_second": 30.452,
"eval_steps_per_second": 3.81,
"step": 3600
},
{
"epoch": 2.7,
"learning_rate": 0.0002,
"loss": 1.9759,
"step": 3700
},
{
"epoch": 2.7,
"eval_loss": 2.0199601650238037,
"eval_runtime": 206.7188,
"eval_samples_per_second": 30.35,
"eval_steps_per_second": 3.797,
"step": 3700
},
{
"epoch": 2.77,
"learning_rate": 0.0002,
"loss": 1.9675,
"step": 3800
},
{
"epoch": 2.77,
"eval_loss": 2.0125296115875244,
"eval_runtime": 206.1247,
"eval_samples_per_second": 30.438,
"eval_steps_per_second": 3.808,
"step": 3800
},
{
"epoch": 2.84,
"learning_rate": 0.0002,
"loss": 1.9542,
"step": 3900
},
{
"epoch": 2.84,
"eval_loss": 1.9999363422393799,
"eval_runtime": 206.6362,
"eval_samples_per_second": 30.363,
"eval_steps_per_second": 3.799,
"step": 3900
},
{
"epoch": 2.91,
"learning_rate": 0.0002,
"loss": 1.9555,
"step": 4000
},
{
"epoch": 2.91,
"eval_loss": 1.9915796518325806,
"eval_runtime": 206.7673,
"eval_samples_per_second": 30.343,
"eval_steps_per_second": 3.797,
"step": 4000
},
{
"epoch": 2.99,
"learning_rate": 0.0002,
"loss": 1.9575,
"step": 4100
},
{
"epoch": 2.99,
"eval_loss": 1.9820196628570557,
"eval_runtime": 207.4963,
"eval_samples_per_second": 30.237,
"eval_steps_per_second": 3.783,
"step": 4100
}
],
"logging_steps": 100,
"max_steps": 4116,
"num_train_epochs": 3,
"save_steps": 100,
"total_flos": 1.1959366338920448e+17,
"trial_name": null,
"trial_params": null
}