gpt2_2 / trainer_state.json
KatAlex's picture
Upload folder using huggingface_hub
4d6ec3c verified
raw
history blame contribute delete
No virus
6.79 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.932811944543192,
"eval_steps": 500,
"global_step": 16500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.08887308922858159,
"grad_norm": 20.37746810913086,
"learning_rate": 4.851878184619031e-05,
"loss": 0.6617,
"step": 500
},
{
"epoch": 0.17774617845716317,
"grad_norm": 8.832418441772461,
"learning_rate": 4.703756369238061e-05,
"loss": 0.5007,
"step": 1000
},
{
"epoch": 0.26661926768574473,
"grad_norm": 112.71693420410156,
"learning_rate": 4.555634553857092e-05,
"loss": 0.4867,
"step": 1500
},
{
"epoch": 0.35549235691432635,
"grad_norm": 0.7494603991508484,
"learning_rate": 4.407512738476123e-05,
"loss": 0.5113,
"step": 2000
},
{
"epoch": 0.4443654461429079,
"grad_norm": 0.5181692838668823,
"learning_rate": 4.259390923095153e-05,
"loss": 0.557,
"step": 2500
},
{
"epoch": 0.5332385353714895,
"grad_norm": 0.8127353191375732,
"learning_rate": 4.1112691077141844e-05,
"loss": 0.4746,
"step": 3000
},
{
"epoch": 0.6221116246000711,
"grad_norm": 52.21469497680664,
"learning_rate": 3.9631472923332156e-05,
"loss": 0.4848,
"step": 3500
},
{
"epoch": 0.7109847138286527,
"grad_norm": 3.922440767288208,
"learning_rate": 3.815025476952246e-05,
"loss": 0.4689,
"step": 4000
},
{
"epoch": 0.7998578030572343,
"grad_norm": 39.936492919921875,
"learning_rate": 3.6669036615712765e-05,
"loss": 0.4724,
"step": 4500
},
{
"epoch": 0.8887308922858158,
"grad_norm": 3.033518075942993,
"learning_rate": 3.518781846190307e-05,
"loss": 0.4384,
"step": 5000
},
{
"epoch": 0.9776039815143974,
"grad_norm": 8.464579582214355,
"learning_rate": 3.370660030809338e-05,
"loss": 0.4534,
"step": 5500
},
{
"epoch": 1.066477070742979,
"grad_norm": 0.24204222857952118,
"learning_rate": 3.2225382154283686e-05,
"loss": 0.3978,
"step": 6000
},
{
"epoch": 1.1553501599715605,
"grad_norm": 0.057052597403526306,
"learning_rate": 3.074416400047399e-05,
"loss": 0.3852,
"step": 6500
},
{
"epoch": 1.2442232492001422,
"grad_norm": 0.15880955755710602,
"learning_rate": 2.92629458466643e-05,
"loss": 0.3915,
"step": 7000
},
{
"epoch": 1.3330963384287238,
"grad_norm": 0.09902948141098022,
"learning_rate": 2.7781727692854603e-05,
"loss": 0.3785,
"step": 7500
},
{
"epoch": 1.4219694276573054,
"grad_norm": 0.15144290030002594,
"learning_rate": 2.6300509539044908e-05,
"loss": 0.3587,
"step": 8000
},
{
"epoch": 1.510842516885887,
"grad_norm": 0.22604715824127197,
"learning_rate": 2.481929138523522e-05,
"loss": 0.3811,
"step": 8500
},
{
"epoch": 1.5997156061144686,
"grad_norm": 14.215106964111328,
"learning_rate": 2.3338073231425524e-05,
"loss": 0.3591,
"step": 9000
},
{
"epoch": 1.68858869534305,
"grad_norm": 0.2168210744857788,
"learning_rate": 2.1856855077615832e-05,
"loss": 0.37,
"step": 9500
},
{
"epoch": 1.7774617845716318,
"grad_norm": 0.15380479395389557,
"learning_rate": 2.0375636923806137e-05,
"loss": 0.3492,
"step": 10000
},
{
"epoch": 1.8663348738002132,
"grad_norm": 0.09666112810373306,
"learning_rate": 1.889441876999645e-05,
"loss": 0.3922,
"step": 10500
},
{
"epoch": 1.9552079630287948,
"grad_norm": 0.7467890381813049,
"learning_rate": 1.7413200616186753e-05,
"loss": 0.3786,
"step": 11000
},
{
"epoch": 2.0440810522573765,
"grad_norm": 1.9722317457199097,
"learning_rate": 1.593198246237706e-05,
"loss": 0.3258,
"step": 11500
},
{
"epoch": 2.132954141485958,
"grad_norm": 0.07470700144767761,
"learning_rate": 1.4450764308567366e-05,
"loss": 0.2807,
"step": 12000
},
{
"epoch": 2.2218272307145397,
"grad_norm": 0.11256339401006699,
"learning_rate": 1.2969546154757672e-05,
"loss": 0.2751,
"step": 12500
},
{
"epoch": 2.310700319943121,
"grad_norm": 0.010098825208842754,
"learning_rate": 1.148832800094798e-05,
"loss": 0.2385,
"step": 13000
},
{
"epoch": 2.399573409171703,
"grad_norm": 0.15252766013145447,
"learning_rate": 1.0007109847138287e-05,
"loss": 0.2379,
"step": 13500
},
{
"epoch": 2.4884464984002843,
"grad_norm": 17.05082130432129,
"learning_rate": 8.525891693328595e-06,
"loss": 0.2706,
"step": 14000
},
{
"epoch": 2.5773195876288657,
"grad_norm": 0.11130794882774353,
"learning_rate": 7.0446735395189e-06,
"loss": 0.2591,
"step": 14500
},
{
"epoch": 2.6661926768574475,
"grad_norm": 74.68379974365234,
"learning_rate": 5.5634553857092076e-06,
"loss": 0.3314,
"step": 15000
},
{
"epoch": 2.7550657660860294,
"grad_norm": 15.91612434387207,
"learning_rate": 4.082237231899514e-06,
"loss": 0.2514,
"step": 15500
},
{
"epoch": 2.8439388553146108,
"grad_norm": 39.0718994140625,
"learning_rate": 2.601019078089821e-06,
"loss": 0.2358,
"step": 16000
},
{
"epoch": 2.932811944543192,
"grad_norm": 0.09988761693239212,
"learning_rate": 1.119800924280128e-06,
"loss": 0.2359,
"step": 16500
}
],
"logging_steps": 500,
"max_steps": 16878,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 4794112531464192.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}