Political_Offensive / llama_binary /trainer_state.json
GabrielAndrei17's picture
Upload 37 files
6517377 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.001194743130227,
"eval_steps": 500,
"global_step": 1675,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.05973715651135006,
"grad_norm": 2.8490114212036133,
"learning_rate": 0.00016447368421052634,
"loss": 11.6497,
"step": 50
},
{
"epoch": 0.11947431302270012,
"grad_norm": 0.05463433265686035,
"learning_rate": 0.0002475359342915811,
"loss": 4.6396,
"step": 100
},
{
"epoch": 0.17921146953405018,
"grad_norm": 0.03409096226096153,
"learning_rate": 0.00024240246406570843,
"loss": 4.4228,
"step": 150
},
{
"epoch": 0.23894862604540024,
"grad_norm": 0.03413880988955498,
"learning_rate": 0.00023726899383983574,
"loss": 4.3988,
"step": 200
},
{
"epoch": 0.2986857825567503,
"grad_norm": 0.033178623765707016,
"learning_rate": 0.00023213552361396305,
"loss": 4.3922,
"step": 250
},
{
"epoch": 0.35842293906810035,
"grad_norm": 0.028784427791833878,
"learning_rate": 0.00022700205338809036,
"loss": 4.4053,
"step": 300
},
{
"epoch": 0.41816009557945044,
"grad_norm": 0.036163728684186935,
"learning_rate": 0.00022186858316221766,
"loss": 4.3944,
"step": 350
},
{
"epoch": 0.4778972520908005,
"grad_norm": 0.03532182425260544,
"learning_rate": 0.00021673511293634497,
"loss": 4.3938,
"step": 400
},
{
"epoch": 0.5376344086021505,
"grad_norm": 0.03272629156708717,
"learning_rate": 0.00021160164271047228,
"loss": 4.3859,
"step": 450
},
{
"epoch": 0.5973715651135006,
"grad_norm": 0.027959033846855164,
"learning_rate": 0.0002064681724845996,
"loss": 4.3881,
"step": 500
},
{
"epoch": 0.6571087216248507,
"grad_norm": 0.024525364860892296,
"learning_rate": 0.0002013347022587269,
"loss": 4.3989,
"step": 550
},
{
"epoch": 0.7168458781362007,
"grad_norm": 0.025551579892635345,
"learning_rate": 0.00019620123203285423,
"loss": 4.3802,
"step": 600
},
{
"epoch": 0.7765830346475507,
"grad_norm": 0.03189048916101456,
"learning_rate": 0.00019106776180698152,
"loss": 4.4041,
"step": 650
},
{
"epoch": 0.8363201911589009,
"grad_norm": 0.02770661748945713,
"learning_rate": 0.00018593429158110883,
"loss": 4.3955,
"step": 700
},
{
"epoch": 0.8960573476702509,
"grad_norm": 0.03752126544713974,
"learning_rate": 0.00018080082135523616,
"loss": 4.3857,
"step": 750
},
{
"epoch": 0.955794504181601,
"grad_norm": 0.0396958664059639,
"learning_rate": 0.00017566735112936344,
"loss": 4.3847,
"step": 800
},
{
"epoch": 1.015531660692951,
"grad_norm": 0.03522910550236702,
"learning_rate": 0.00017053388090349075,
"loss": 4.3815,
"step": 850
},
{
"epoch": 1.075268817204301,
"grad_norm": 0.033044200390577316,
"learning_rate": 0.00016540041067761806,
"loss": 4.3903,
"step": 900
},
{
"epoch": 1.135005973715651,
"grad_norm": 0.03267841041088104,
"learning_rate": 0.0001602669404517454,
"loss": 4.3836,
"step": 950
},
{
"epoch": 1.194743130227001,
"grad_norm": 0.04201454669237137,
"learning_rate": 0.00015513347022587268,
"loss": 4.3776,
"step": 1000
},
{
"epoch": 1.2544802867383513,
"grad_norm": 0.047623638063669205,
"learning_rate": 0.00015,
"loss": 4.3734,
"step": 1050
},
{
"epoch": 1.3142174432497014,
"grad_norm": 0.03200829401612282,
"learning_rate": 0.00014486652977412732,
"loss": 4.3837,
"step": 1100
},
{
"epoch": 1.3739545997610514,
"grad_norm": 0.04358180612325668,
"learning_rate": 0.00013973305954825463,
"loss": 4.3815,
"step": 1150
},
{
"epoch": 1.4336917562724014,
"grad_norm": 0.04975922778248787,
"learning_rate": 0.0001345995893223819,
"loss": 4.3746,
"step": 1200
},
{
"epoch": 1.4934289127837514,
"grad_norm": 0.03673349320888519,
"learning_rate": 0.00012946611909650925,
"loss": 4.3755,
"step": 1250
},
{
"epoch": 1.5531660692951017,
"grad_norm": 0.03130173310637474,
"learning_rate": 0.00012433264887063656,
"loss": 4.3902,
"step": 1300
},
{
"epoch": 1.6129032258064515,
"grad_norm": 0.03993390500545502,
"learning_rate": 0.00011919917864476385,
"loss": 4.3852,
"step": 1350
},
{
"epoch": 1.6726403823178018,
"grad_norm": 0.04937516897916794,
"learning_rate": 0.00011406570841889118,
"loss": 4.3782,
"step": 1400
},
{
"epoch": 1.7323775388291516,
"grad_norm": 0.04578279331326485,
"learning_rate": 0.00010893223819301848,
"loss": 4.377,
"step": 1450
},
{
"epoch": 1.7921146953405018,
"grad_norm": 0.048149123787879944,
"learning_rate": 0.00010379876796714579,
"loss": 4.3835,
"step": 1500
},
{
"epoch": 1.8518518518518519,
"grad_norm": 0.0500078909099102,
"learning_rate": 9.86652977412731e-05,
"loss": 4.3806,
"step": 1550
},
{
"epoch": 1.911589008363202,
"grad_norm": 0.040174700319767,
"learning_rate": 9.353182751540041e-05,
"loss": 4.3863,
"step": 1600
},
{
"epoch": 1.971326164874552,
"grad_norm": 0.033409375697374344,
"learning_rate": 8.839835728952772e-05,
"loss": 4.3754,
"step": 1650
}
],
"logging_steps": 50,
"max_steps": 2511,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 25,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 2.0486522326430515e+17,
"train_batch_size": 6,
"trial_name": null,
"trial_params": null
}