pszemraj's picture
update with +2 epochs ft
7c2dda0
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.5197355996222852,
"global_step": 125,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.02,
"learning_rate": 0.0004,
"loss": 2.6298,
"step": 2
},
{
"epoch": 0.05,
"learning_rate": 0.0008,
"loss": 2.6348,
"step": 4
},
{
"epoch": 0.07,
"learning_rate": 0.0009999024041442454,
"loss": 2.6855,
"step": 6
},
{
"epoch": 0.1,
"learning_rate": 0.0009991218658821608,
"loss": 2.6779,
"step": 8
},
{
"epoch": 0.12,
"learning_rate": 0.0009975620080758321,
"loss": 2.7265,
"step": 10
},
{
"epoch": 0.15,
"learning_rate": 0.000995225266258058,
"loss": 2.7332,
"step": 12
},
{
"epoch": 0.17,
"learning_rate": 0.0009921152889737985,
"loss": 2.7308,
"step": 14
},
{
"epoch": 0.19,
"learning_rate": 0.0009882369320834067,
"loss": 2.7706,
"step": 16
},
{
"epoch": 0.22,
"learning_rate": 0.0009835962511807786,
"loss": 2.7455,
"step": 18
},
{
"epoch": 0.24,
"learning_rate": 0.0009782004921382612,
"loss": 2.7819,
"step": 20
},
{
"epoch": 0.27,
"learning_rate": 0.0009720580797930845,
"loss": 2.7122,
"step": 22
},
{
"epoch": 0.29,
"learning_rate": 0.0009651786047929772,
"loss": 2.7141,
"step": 24
},
{
"epoch": 0.31,
"learning_rate": 0.0009575728086215092,
"loss": 2.694,
"step": 26
},
{
"epoch": 0.34,
"learning_rate": 0.00094925256682654,
"loss": 2.6981,
"step": 28
},
{
"epoch": 0.36,
"learning_rate": 0.0009402308704779598,
"loss": 2.6848,
"step": 30
},
{
"epoch": 0.39,
"learning_rate": 0.0009305218058836777,
"loss": 2.6498,
"step": 32
},
{
"epoch": 0.41,
"learning_rate": 0.0009201405325955221,
"loss": 2.7059,
"step": 34
},
{
"epoch": 0.44,
"learning_rate": 0.0009091032597394012,
"loss": 2.7016,
"step": 36
},
{
"epoch": 0.46,
"learning_rate": 0.0008974272207066767,
"loss": 2.6599,
"step": 38
},
{
"epoch": 0.48,
"learning_rate": 0.0008851306462462688,
"loss": 2.6922,
"step": 40
},
{
"epoch": 0.51,
"learning_rate": 0.0008722327359995064,
"loss": 2.6473,
"step": 42
},
{
"epoch": 0.53,
"learning_rate": 0.0008587536285221655,
"loss": 2.7236,
"step": 44
},
{
"epoch": 0.56,
"learning_rate": 0.000844714369840506,
"loss": 2.6924,
"step": 46
},
{
"epoch": 0.58,
"learning_rate": 0.0008301368805903988,
"loss": 2.6428,
"step": 48
},
{
"epoch": 0.6,
"learning_rate": 0.0008150439217908557,
"loss": 2.6161,
"step": 50
},
{
"epoch": 0.63,
"learning_rate": 0.0007994590593054001,
"loss": 2.6721,
"step": 52
},
{
"epoch": 0.65,
"learning_rate": 0.0007834066270467691,
"loss": 2.6506,
"step": 54
},
{
"epoch": 0.68,
"learning_rate": 0.0007669116889823954,
"loss": 2.6787,
"step": 56
},
{
"epoch": 0.7,
"learning_rate": 0.00075,
"loss": 2.6172,
"step": 58
},
{
"epoch": 0.73,
"learning_rate": 0.0007326979656943906,
"loss": 2.6071,
"step": 60
},
{
"epoch": 0.75,
"learning_rate": 0.0007150326011382603,
"loss": 2.6513,
"step": 62
},
{
"epoch": 0.77,
"learning_rate": 0.0006970314887013584,
"loss": 2.679,
"step": 64
},
{
"epoch": 0.8,
"learning_rate": 0.0006787227349838946,
"loss": 2.6879,
"step": 66
},
{
"epoch": 0.82,
"learning_rate": 0.0006601349269314187,
"loss": 2.6459,
"step": 68
},
{
"epoch": 0.85,
"learning_rate": 0.0006412970871996995,
"loss": 2.6268,
"step": 70
},
{
"epoch": 0.87,
"learning_rate": 0.0006222386288392914,
"loss": 2.615,
"step": 72
},
{
"epoch": 0.89,
"learning_rate": 0.0006029893093705492,
"loss": 2.6541,
"step": 74
},
{
"epoch": 0.92,
"learning_rate": 0.0005835791843207916,
"loss": 2.6697,
"step": 76
},
{
"epoch": 0.94,
"learning_rate": 0.0005640385602961634,
"loss": 2.6017,
"step": 78
},
{
"epoch": 0.97,
"learning_rate": 0.0005443979476614675,
"loss": 2.658,
"step": 80
},
{
"epoch": 0.99,
"learning_rate": 0.0005246880129018515,
"loss": 2.5922,
"step": 82
},
{
"epoch": 1.02,
"learning_rate": 0.0005049395307407329,
"loss": 3.5292,
"step": 84
},
{
"epoch": 1.05,
"learning_rate": 0.0004851833360887201,
"loss": 2.5377,
"step": 86
},
{
"epoch": 1.07,
"learning_rate": 0.0004654502758985611,
"loss": 2.6096,
"step": 88
},
{
"epoch": 1.1,
"learning_rate": 0.00044577116100128736,
"loss": 2.5112,
"step": 90
},
{
"epoch": 1.12,
"learning_rate": 0.00042617671799875947,
"loss": 2.5469,
"step": 92
},
{
"epoch": 1.15,
"learning_rate": 0.0004066975412877255,
"loss": 2.5364,
"step": 94
},
{
"epoch": 1.17,
"learning_rate": 0.00038736404529030255,
"loss": 2.5132,
"step": 96
},
{
"epoch": 1.19,
"learning_rate": 0.0003682064169654663,
"loss": 2.5339,
"step": 98
},
{
"epoch": 1.22,
"learning_rate": 0.0003492545686756986,
"loss": 2.5136,
"step": 100
},
{
"epoch": 1.24,
"learning_rate": 0.00033053809148238423,
"loss": 2.5043,
"step": 102
},
{
"epoch": 1.27,
"learning_rate": 0.00031208620894288106,
"loss": 2.5174,
"step": 104
},
{
"epoch": 1.29,
"learning_rate": 0.0002939277314814041,
"loss": 2.479,
"step": 106
},
{
"epoch": 1.31,
"learning_rate": 0.0002760910114049686,
"loss": 2.5554,
"step": 108
},
{
"epoch": 1.34,
"learning_rate": 0.00025860389863462764,
"loss": 2.5007,
"step": 110
},
{
"epoch": 1.36,
"learning_rate": 0.00024149369722112717,
"loss": 2.493,
"step": 112
},
{
"epoch": 1.39,
"learning_rate": 0.00022478712271287087,
"loss": 2.4715,
"step": 114
},
{
"epoch": 1.41,
"learning_rate": 0.00020851026044276406,
"loss": 2.5359,
"step": 116
},
{
"epoch": 1.44,
"learning_rate": 0.00019268852479906146,
"loss": 2.5447,
"step": 118
},
{
"epoch": 1.46,
"learning_rate": 0.00017734661954381753,
"loss": 2.5141,
"step": 120
},
{
"epoch": 1.48,
"learning_rate": 0.00016250849924089484,
"loss": 2.558,
"step": 122
},
{
"epoch": 1.51,
"learning_rate": 0.00014819733185375534,
"loss": 2.5372,
"step": 124
}
],
"max_steps": 164,
"num_train_epochs": 2,
"total_flos": 1.1869674526642012e+18,
"trial_name": null,
"trial_params": null
}