Phi-2-QLora / trainer_state.json
JamieAi33's picture
Upload trainer_state.json with huggingface_hub
b24ee19 verified
raw
history blame
16.2 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.001000500250125,
"eval_steps": 25,
"global_step": 1000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.05002501250625312,
"grad_norm": 0.517996072769165,
"learning_rate": 0.0001951951951951952,
"loss": 1.677,
"step": 25
},
{
"epoch": 0.05002501250625312,
"eval_loss": 1.3813503980636597,
"eval_runtime": 148.0614,
"eval_samples_per_second": 3.37,
"eval_steps_per_second": 0.425,
"step": 25
},
{
"epoch": 0.10005002501250625,
"grad_norm": 0.5020231604576111,
"learning_rate": 0.0001901901901901902,
"loss": 1.2016,
"step": 50
},
{
"epoch": 0.10005002501250625,
"eval_loss": 1.347744107246399,
"eval_runtime": 151.7258,
"eval_samples_per_second": 3.289,
"eval_steps_per_second": 0.415,
"step": 50
},
{
"epoch": 0.1500750375187594,
"grad_norm": 0.3798060119152069,
"learning_rate": 0.0001851851851851852,
"loss": 1.4491,
"step": 75
},
{
"epoch": 0.1500750375187594,
"eval_loss": 1.3210723400115967,
"eval_runtime": 150.0032,
"eval_samples_per_second": 3.327,
"eval_steps_per_second": 0.42,
"step": 75
},
{
"epoch": 0.2001000500250125,
"grad_norm": 0.3365944027900696,
"learning_rate": 0.00018018018018018018,
"loss": 1.2076,
"step": 100
},
{
"epoch": 0.2001000500250125,
"eval_loss": 1.3334178924560547,
"eval_runtime": 151.2551,
"eval_samples_per_second": 3.299,
"eval_steps_per_second": 0.417,
"step": 100
},
{
"epoch": 0.25012506253126565,
"grad_norm": 0.22820694744586945,
"learning_rate": 0.0001751751751751752,
"loss": 1.4415,
"step": 125
},
{
"epoch": 0.25012506253126565,
"eval_loss": 1.309592366218567,
"eval_runtime": 149.299,
"eval_samples_per_second": 3.342,
"eval_steps_per_second": 0.422,
"step": 125
},
{
"epoch": 0.3001500750375188,
"grad_norm": 0.3848935663700104,
"learning_rate": 0.0001701701701701702,
"loss": 1.139,
"step": 150
},
{
"epoch": 0.3001500750375188,
"eval_loss": 1.3208202123641968,
"eval_runtime": 149.5811,
"eval_samples_per_second": 3.336,
"eval_steps_per_second": 0.421,
"step": 150
},
{
"epoch": 0.3501750875437719,
"grad_norm": 0.2774136960506439,
"learning_rate": 0.00016516516516516518,
"loss": 1.4055,
"step": 175
},
{
"epoch": 0.3501750875437719,
"eval_loss": 1.3086917400360107,
"eval_runtime": 150.1042,
"eval_samples_per_second": 3.324,
"eval_steps_per_second": 0.42,
"step": 175
},
{
"epoch": 0.400200100050025,
"grad_norm": 0.32166117429733276,
"learning_rate": 0.00016016016016016018,
"loss": 1.1459,
"step": 200
},
{
"epoch": 0.400200100050025,
"eval_loss": 1.306862473487854,
"eval_runtime": 150.7168,
"eval_samples_per_second": 3.311,
"eval_steps_per_second": 0.418,
"step": 200
},
{
"epoch": 0.4502251125562781,
"grad_norm": 0.23773141205310822,
"learning_rate": 0.00015515515515515516,
"loss": 1.4444,
"step": 225
},
{
"epoch": 0.4502251125562781,
"eval_loss": 1.3020325899124146,
"eval_runtime": 148.5364,
"eval_samples_per_second": 3.359,
"eval_steps_per_second": 0.424,
"step": 225
},
{
"epoch": 0.5002501250625313,
"grad_norm": 0.37095341086387634,
"learning_rate": 0.00015015015015015014,
"loss": 1.2264,
"step": 250
},
{
"epoch": 0.5002501250625313,
"eval_loss": 1.3001904487609863,
"eval_runtime": 152.658,
"eval_samples_per_second": 3.269,
"eval_steps_per_second": 0.413,
"step": 250
},
{
"epoch": 0.5502751375687844,
"grad_norm": 0.2519828677177429,
"learning_rate": 0.00014514514514514515,
"loss": 1.4605,
"step": 275
},
{
"epoch": 0.5502751375687844,
"eval_loss": 1.299567699432373,
"eval_runtime": 148.4653,
"eval_samples_per_second": 3.361,
"eval_steps_per_second": 0.424,
"step": 275
},
{
"epoch": 0.6003001500750376,
"grad_norm": 0.3685779273509979,
"learning_rate": 0.00014014014014014013,
"loss": 1.1655,
"step": 300
},
{
"epoch": 0.6003001500750376,
"eval_loss": 1.2988265752792358,
"eval_runtime": 151.1788,
"eval_samples_per_second": 3.301,
"eval_steps_per_second": 0.417,
"step": 300
},
{
"epoch": 0.6503251625812907,
"grad_norm": 0.26966241002082825,
"learning_rate": 0.00013513513513513514,
"loss": 1.4313,
"step": 325
},
{
"epoch": 0.6503251625812907,
"eval_loss": 1.298296332359314,
"eval_runtime": 152.0718,
"eval_samples_per_second": 3.281,
"eval_steps_per_second": 0.414,
"step": 325
},
{
"epoch": 0.7003501750875438,
"grad_norm": 0.35637611150741577,
"learning_rate": 0.00013013013013013014,
"loss": 1.2002,
"step": 350
},
{
"epoch": 0.7003501750875438,
"eval_loss": 1.2959158420562744,
"eval_runtime": 151.1585,
"eval_samples_per_second": 3.301,
"eval_steps_per_second": 0.417,
"step": 350
},
{
"epoch": 0.7503751875937968,
"grad_norm": 0.22513383626937866,
"learning_rate": 0.00012512512512512512,
"loss": 1.3994,
"step": 375
},
{
"epoch": 0.7503751875937968,
"eval_loss": 1.2951635122299194,
"eval_runtime": 148.5372,
"eval_samples_per_second": 3.359,
"eval_steps_per_second": 0.424,
"step": 375
},
{
"epoch": 0.80040020010005,
"grad_norm": 0.35314086079597473,
"learning_rate": 0.00012012012012012013,
"loss": 1.1836,
"step": 400
},
{
"epoch": 0.80040020010005,
"eval_loss": 1.294690728187561,
"eval_runtime": 149.3769,
"eval_samples_per_second": 3.341,
"eval_steps_per_second": 0.422,
"step": 400
},
{
"epoch": 0.8504252126063031,
"grad_norm": 0.240916907787323,
"learning_rate": 0.00011511511511511512,
"loss": 1.4378,
"step": 425
},
{
"epoch": 0.8504252126063031,
"eval_loss": 1.2916043996810913,
"eval_runtime": 152.0772,
"eval_samples_per_second": 3.281,
"eval_steps_per_second": 0.414,
"step": 425
},
{
"epoch": 0.9004502251125562,
"grad_norm": 0.31087398529052734,
"learning_rate": 0.00011011011011011012,
"loss": 1.1989,
"step": 450
},
{
"epoch": 0.9004502251125562,
"eval_loss": 1.2893831729888916,
"eval_runtime": 150.4895,
"eval_samples_per_second": 3.316,
"eval_steps_per_second": 0.419,
"step": 450
},
{
"epoch": 0.9504752376188094,
"grad_norm": 0.2413586527109146,
"learning_rate": 0.00010510510510510511,
"loss": 1.4508,
"step": 475
},
{
"epoch": 0.9504752376188094,
"eval_loss": 1.2888984680175781,
"eval_runtime": 151.1108,
"eval_samples_per_second": 3.302,
"eval_steps_per_second": 0.417,
"step": 475
},
{
"epoch": 1.0005002501250626,
"grad_norm": 0.40069064497947693,
"learning_rate": 0.00010010010010010012,
"loss": 1.2076,
"step": 500
},
{
"epoch": 1.0005002501250626,
"eval_loss": 1.2911962270736694,
"eval_runtime": 148.6843,
"eval_samples_per_second": 3.356,
"eval_steps_per_second": 0.424,
"step": 500
},
{
"epoch": 1.0505252626313157,
"grad_norm": 0.22050493955612183,
"learning_rate": 9.50950950950951e-05,
"loss": 1.3994,
"step": 525
},
{
"epoch": 1.0505252626313157,
"eval_loss": 1.2921332120895386,
"eval_runtime": 149.3015,
"eval_samples_per_second": 3.342,
"eval_steps_per_second": 0.422,
"step": 525
},
{
"epoch": 1.1005502751375689,
"grad_norm": 0.3588818907737732,
"learning_rate": 9.009009009009009e-05,
"loss": 1.177,
"step": 550
},
{
"epoch": 1.1005502751375689,
"eval_loss": 1.2903811931610107,
"eval_runtime": 149.8093,
"eval_samples_per_second": 3.331,
"eval_steps_per_second": 0.421,
"step": 550
},
{
"epoch": 1.150575287643822,
"grad_norm": 0.2672303020954132,
"learning_rate": 8.50850850850851e-05,
"loss": 1.4015,
"step": 575
},
{
"epoch": 1.150575287643822,
"eval_loss": 1.2898900508880615,
"eval_runtime": 149.8311,
"eval_samples_per_second": 3.33,
"eval_steps_per_second": 0.42,
"step": 575
},
{
"epoch": 1.2006003001500751,
"grad_norm": 0.31220486760139465,
"learning_rate": 8.008008008008009e-05,
"loss": 1.192,
"step": 600
},
{
"epoch": 1.2006003001500751,
"eval_loss": 1.288824439048767,
"eval_runtime": 151.038,
"eval_samples_per_second": 3.304,
"eval_steps_per_second": 0.417,
"step": 600
},
{
"epoch": 1.2506253126563283,
"grad_norm": 0.2526504695415497,
"learning_rate": 7.507507507507507e-05,
"loss": 1.3829,
"step": 625
},
{
"epoch": 1.2506253126563283,
"eval_loss": 1.2878332138061523,
"eval_runtime": 151.5015,
"eval_samples_per_second": 3.294,
"eval_steps_per_second": 0.416,
"step": 625
},
{
"epoch": 1.3006503251625814,
"grad_norm": 0.28051283955574036,
"learning_rate": 7.007007007007007e-05,
"loss": 1.1514,
"step": 650
},
{
"epoch": 1.3006503251625814,
"eval_loss": 1.2859280109405518,
"eval_runtime": 150.4738,
"eval_samples_per_second": 3.316,
"eval_steps_per_second": 0.419,
"step": 650
},
{
"epoch": 1.3506753376688345,
"grad_norm": 0.26419979333877563,
"learning_rate": 6.506506506506507e-05,
"loss": 1.4028,
"step": 675
},
{
"epoch": 1.3506753376688345,
"eval_loss": 1.2848296165466309,
"eval_runtime": 149.0963,
"eval_samples_per_second": 3.347,
"eval_steps_per_second": 0.423,
"step": 675
},
{
"epoch": 1.4007003501750876,
"grad_norm": 0.3227976858615875,
"learning_rate": 6.0060060060060066e-05,
"loss": 1.1778,
"step": 700
},
{
"epoch": 1.4007003501750876,
"eval_loss": 1.285400152206421,
"eval_runtime": 149.1519,
"eval_samples_per_second": 3.346,
"eval_steps_per_second": 0.422,
"step": 700
},
{
"epoch": 1.4507253626813408,
"grad_norm": 0.24903441965579987,
"learning_rate": 5.505505505505506e-05,
"loss": 1.4058,
"step": 725
},
{
"epoch": 1.4507253626813408,
"eval_loss": 1.2824435234069824,
"eval_runtime": 149.5232,
"eval_samples_per_second": 3.337,
"eval_steps_per_second": 0.421,
"step": 725
},
{
"epoch": 1.500750375187594,
"grad_norm": 0.31187903881073,
"learning_rate": 5.005005005005006e-05,
"loss": 1.1698,
"step": 750
},
{
"epoch": 1.500750375187594,
"eval_loss": 1.2831988334655762,
"eval_runtime": 150.4227,
"eval_samples_per_second": 3.317,
"eval_steps_per_second": 0.419,
"step": 750
},
{
"epoch": 1.550775387693847,
"grad_norm": 0.2889004051685333,
"learning_rate": 4.5045045045045046e-05,
"loss": 1.3516,
"step": 775
},
{
"epoch": 1.550775387693847,
"eval_loss": 1.2823545932769775,
"eval_runtime": 149.8614,
"eval_samples_per_second": 3.33,
"eval_steps_per_second": 0.42,
"step": 775
},
{
"epoch": 1.6008004002001002,
"grad_norm": 0.37189939618110657,
"learning_rate": 4.0040040040040046e-05,
"loss": 1.1264,
"step": 800
},
{
"epoch": 1.6008004002001002,
"eval_loss": 1.2828818559646606,
"eval_runtime": 150.672,
"eval_samples_per_second": 3.312,
"eval_steps_per_second": 0.418,
"step": 800
},
{
"epoch": 1.6508254127063533,
"grad_norm": 0.25290611386299133,
"learning_rate": 3.503503503503503e-05,
"loss": 1.4113,
"step": 825
},
{
"epoch": 1.6508254127063533,
"eval_loss": 1.2822470664978027,
"eval_runtime": 149.3988,
"eval_samples_per_second": 3.34,
"eval_steps_per_second": 0.422,
"step": 825
},
{
"epoch": 1.7008504252126064,
"grad_norm": 0.3559873104095459,
"learning_rate": 3.0030030030030033e-05,
"loss": 1.1248,
"step": 850
},
{
"epoch": 1.7008504252126064,
"eval_loss": 1.2828270196914673,
"eval_runtime": 149.9897,
"eval_samples_per_second": 3.327,
"eval_steps_per_second": 0.42,
"step": 850
},
{
"epoch": 1.7508754377188596,
"grad_norm": 0.3052867352962494,
"learning_rate": 2.502502502502503e-05,
"loss": 1.336,
"step": 875
},
{
"epoch": 1.7508754377188596,
"eval_loss": 1.282852053642273,
"eval_runtime": 151.397,
"eval_samples_per_second": 3.296,
"eval_steps_per_second": 0.416,
"step": 875
},
{
"epoch": 1.8009004502251127,
"grad_norm": 0.33662667870521545,
"learning_rate": 2.0020020020020023e-05,
"loss": 1.0725,
"step": 900
},
{
"epoch": 1.8009004502251127,
"eval_loss": 1.2822794914245605,
"eval_runtime": 150.7632,
"eval_samples_per_second": 3.31,
"eval_steps_per_second": 0.418,
"step": 900
},
{
"epoch": 1.8509254627313658,
"grad_norm": 0.29956212639808655,
"learning_rate": 1.5015015015015016e-05,
"loss": 1.3989,
"step": 925
},
{
"epoch": 1.8509254627313658,
"eval_loss": 1.2824186086654663,
"eval_runtime": 150.6938,
"eval_samples_per_second": 3.311,
"eval_steps_per_second": 0.418,
"step": 925
},
{
"epoch": 1.900950475237619,
"grad_norm": 0.3255136013031006,
"learning_rate": 1.0010010010010011e-05,
"loss": 1.112,
"step": 950
},
{
"epoch": 1.900950475237619,
"eval_loss": 1.28144371509552,
"eval_runtime": 149.8969,
"eval_samples_per_second": 3.329,
"eval_steps_per_second": 0.42,
"step": 950
},
{
"epoch": 1.950975487743872,
"grad_norm": 0.2689700424671173,
"learning_rate": 5.005005005005006e-06,
"loss": 1.3972,
"step": 975
},
{
"epoch": 1.950975487743872,
"eval_loss": 1.280760645866394,
"eval_runtime": 149.8977,
"eval_samples_per_second": 3.329,
"eval_steps_per_second": 0.42,
"step": 975
},
{
"epoch": 2.001000500250125,
"grad_norm": 0.3633726239204407,
"learning_rate": 0.0,
"loss": 1.1746,
"step": 1000
},
{
"epoch": 2.001000500250125,
"eval_loss": 1.2818013429641724,
"eval_runtime": 149.8121,
"eval_samples_per_second": 3.331,
"eval_steps_per_second": 0.421,
"step": 1000
}
],
"logging_steps": 25,
"max_steps": 1000,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 25,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.844485620424704e+16,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}