lesso10's picture
Training in progress, step 250, checkpoint
d6bddfc verified
{
"best_metric": 1.3310532569885254,
"best_model_checkpoint": "miner_id_24/checkpoint-250",
"epoch": 0.002108094661882697,
"eval_steps": 50,
"global_step": 250,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 8.432378647530788e-06,
"eval_loss": 2.016960620880127,
"eval_runtime": 3409.8944,
"eval_samples_per_second": 14.644,
"eval_steps_per_second": 3.661,
"step": 1
},
{
"epoch": 8.432378647530789e-05,
"grad_norm": 0.7229357957839966,
"learning_rate": 4.2000000000000004e-05,
"loss": 1.8129,
"step": 10
},
{
"epoch": 0.00016864757295061578,
"grad_norm": 0.6948916912078857,
"learning_rate": 8.400000000000001e-05,
"loss": 1.5689,
"step": 20
},
{
"epoch": 0.00025297135942592365,
"grad_norm": 0.9268532991409302,
"learning_rate": 0.000126,
"loss": 1.2979,
"step": 30
},
{
"epoch": 0.00033729514590123157,
"grad_norm": 1.0942802429199219,
"learning_rate": 0.00016800000000000002,
"loss": 1.3864,
"step": 40
},
{
"epoch": 0.00042161893237653943,
"grad_norm": 4.664062023162842,
"learning_rate": 0.00021,
"loss": 1.9757,
"step": 50
},
{
"epoch": 0.00042161893237653943,
"eval_loss": 1.524364948272705,
"eval_runtime": 3404.7905,
"eval_samples_per_second": 14.666,
"eval_steps_per_second": 3.667,
"step": 50
},
{
"epoch": 0.0005059427188518473,
"grad_norm": 0.6090065240859985,
"learning_rate": 0.00020974422527728155,
"loss": 1.4874,
"step": 60
},
{
"epoch": 0.0005902665053271552,
"grad_norm": 0.6349875926971436,
"learning_rate": 0.0002089781472178649,
"loss": 1.241,
"step": 70
},
{
"epoch": 0.0006745902918024631,
"grad_norm": 0.7745322585105896,
"learning_rate": 0.0002077054980770496,
"loss": 1.0923,
"step": 80
},
{
"epoch": 0.000758914078277771,
"grad_norm": 1.1790937185287476,
"learning_rate": 0.00020593247807352348,
"loss": 1.1274,
"step": 90
},
{
"epoch": 0.0008432378647530789,
"grad_norm": 3.3187856674194336,
"learning_rate": 0.00020366772518252038,
"loss": 2.0574,
"step": 100
},
{
"epoch": 0.0008432378647530789,
"eval_loss": 1.4387811422348022,
"eval_runtime": 3420.3676,
"eval_samples_per_second": 14.599,
"eval_steps_per_second": 3.65,
"step": 100
},
{
"epoch": 0.0009275616512283868,
"grad_norm": 0.5923383235931396,
"learning_rate": 0.0002009222730524731,
"loss": 1.5078,
"step": 110
},
{
"epoch": 0.0010118854377036946,
"grad_norm": 0.6275829672813416,
"learning_rate": 0.00019770949725018733,
"loss": 1.3687,
"step": 120
},
{
"epoch": 0.0010962092241790025,
"grad_norm": 0.9197534918785095,
"learning_rate": 0.00019404505009642473,
"loss": 1.1862,
"step": 130
},
{
"epoch": 0.0011805330106543104,
"grad_norm": 1.0203487873077393,
"learning_rate": 0.0001899467844093695,
"loss": 1.1169,
"step": 140
},
{
"epoch": 0.0012648567971296184,
"grad_norm": 2.69136118888855,
"learning_rate": 0.00018543466652749268,
"loss": 1.9026,
"step": 150
},
{
"epoch": 0.0012648567971296184,
"eval_loss": 1.4064786434173584,
"eval_runtime": 3415.4512,
"eval_samples_per_second": 14.62,
"eval_steps_per_second": 3.655,
"step": 150
},
{
"epoch": 0.0013491805836049263,
"grad_norm": 0.7159033417701721,
"learning_rate": 0.00018053067903555837,
"loss": 1.4857,
"step": 160
},
{
"epoch": 0.001433504370080234,
"grad_norm": 0.6960952877998352,
"learning_rate": 0.00017525871366768012,
"loss": 1.0184,
"step": 170
},
{
"epoch": 0.001517828156555542,
"grad_norm": 0.6692061424255371,
"learning_rate": 0.00016964445490919413,
"loss": 1.163,
"step": 180
},
{
"epoch": 0.0016021519430308498,
"grad_norm": 1.0128861665725708,
"learning_rate": 0.00016371525486442843,
"loss": 1.2,
"step": 190
},
{
"epoch": 0.0016864757295061577,
"grad_norm": 3.461775302886963,
"learning_rate": 0.0001575,
"loss": 1.9163,
"step": 200
},
{
"epoch": 0.0016864757295061577,
"eval_loss": 1.361072063446045,
"eval_runtime": 3401.7609,
"eval_samples_per_second": 14.679,
"eval_steps_per_second": 3.67,
"step": 200
},
{
"epoch": 0.0017707995159814657,
"grad_norm": 0.7128476500511169,
"learning_rate": 0.00015102897041285315,
"loss": 1.3027,
"step": 210
},
{
"epoch": 0.0018551233024567736,
"grad_norm": 0.6080098748207092,
"learning_rate": 0.00014433369230867077,
"loss": 1.2597,
"step": 220
},
{
"epoch": 0.0019394470889320815,
"grad_norm": 0.7591213583946228,
"learning_rate": 0.0001374467844093695,
"loss": 1.0402,
"step": 230
},
{
"epoch": 0.002023770875407389,
"grad_norm": 0.8502488136291504,
"learning_rate": 0.0001304017990379651,
"loss": 1.0837,
"step": 240
},
{
"epoch": 0.002108094661882697,
"grad_norm": 3.2781741619110107,
"learning_rate": 0.0001232330586550277,
"loss": 1.553,
"step": 250
},
{
"epoch": 0.002108094661882697,
"eval_loss": 1.3310532569885254,
"eval_runtime": 3412.7075,
"eval_samples_per_second": 14.631,
"eval_steps_per_second": 3.658,
"step": 250
}
],
"logging_steps": 10,
"max_steps": 500,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 50,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 3,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 5.483158242852864e+16,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}