dada22231's picture
Training in progress, step 50, checkpoint
b0c8a3c verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.3289365210222588,
"eval_steps": 25,
"global_step": 50,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.026380873866446827,
"grad_norm": 1.777541995048523,
"learning_rate": 5e-05,
"loss": 2.4185,
"step": 1
},
{
"epoch": 0.026380873866446827,
"eval_loss": 3.623441457748413,
"eval_runtime": 1.1406,
"eval_samples_per_second": 43.838,
"eval_steps_per_second": 11.398,
"step": 1
},
{
"epoch": 0.05276174773289365,
"grad_norm": 2.632246255874634,
"learning_rate": 0.0001,
"loss": 2.7331,
"step": 2
},
{
"epoch": 0.07914262159934048,
"grad_norm": 2.883641004562378,
"learning_rate": 9.990365154573717e-05,
"loss": 2.8645,
"step": 3
},
{
"epoch": 0.1055234954657873,
"grad_norm": 2.4836220741271973,
"learning_rate": 9.961501876182148e-05,
"loss": 2.5391,
"step": 4
},
{
"epoch": 0.13190436933223412,
"grad_norm": 2.5708866119384766,
"learning_rate": 9.913533761814537e-05,
"loss": 2.4391,
"step": 5
},
{
"epoch": 0.15828524319868095,
"grad_norm": 2.931725263595581,
"learning_rate": 9.846666218300807e-05,
"loss": 2.3818,
"step": 6
},
{
"epoch": 0.18466611706512778,
"grad_norm": 2.899811267852783,
"learning_rate": 9.761185582727977e-05,
"loss": 2.4126,
"step": 7
},
{
"epoch": 0.2110469909315746,
"grad_norm": 3.1962263584136963,
"learning_rate": 9.657457896300791e-05,
"loss": 2.4357,
"step": 8
},
{
"epoch": 0.23742786479802144,
"grad_norm": 4.456316947937012,
"learning_rate": 9.535927336897098e-05,
"loss": 2.7543,
"step": 9
},
{
"epoch": 0.26380873866446825,
"grad_norm": 2.725989818572998,
"learning_rate": 9.397114317029975e-05,
"loss": 1.7691,
"step": 10
},
{
"epoch": 0.2901896125309151,
"grad_norm": 2.840864896774292,
"learning_rate": 9.241613255361455e-05,
"loss": 1.2073,
"step": 11
},
{
"epoch": 0.3165704863973619,
"grad_norm": 2.4223415851593018,
"learning_rate": 9.070090031310558e-05,
"loss": 1.0719,
"step": 12
},
{
"epoch": 0.34295136026380874,
"grad_norm": 1.9131155014038086,
"learning_rate": 8.883279133655399e-05,
"loss": 1.0024,
"step": 13
},
{
"epoch": 0.36933223413025557,
"grad_norm": 1.920853853225708,
"learning_rate": 8.681980515339464e-05,
"loss": 0.9629,
"step": 14
},
{
"epoch": 0.3957131079967024,
"grad_norm": 1.7250676155090332,
"learning_rate": 8.467056167950311e-05,
"loss": 1.1086,
"step": 15
},
{
"epoch": 0.4220939818631492,
"grad_norm": 1.8188282251358032,
"learning_rate": 8.239426430539243e-05,
"loss": 1.0697,
"step": 16
},
{
"epoch": 0.44847485572959606,
"grad_norm": 1.8869132995605469,
"learning_rate": 8.000066048588211e-05,
"loss": 1.1263,
"step": 17
},
{
"epoch": 0.4748557295960429,
"grad_norm": 3.000149965286255,
"learning_rate": 7.75e-05,
"loss": 1.2637,
"step": 18
},
{
"epoch": 0.5012366034624897,
"grad_norm": 2.330080270767212,
"learning_rate": 7.490299105985507e-05,
"loss": 1.1939,
"step": 19
},
{
"epoch": 0.5276174773289365,
"grad_norm": 1.816513180732727,
"learning_rate": 7.222075445642904e-05,
"loss": 0.6319,
"step": 20
},
{
"epoch": 0.5539983511953833,
"grad_norm": 1.4163626432418823,
"learning_rate": 6.946477593864228e-05,
"loss": 0.4592,
"step": 21
},
{
"epoch": 0.5803792250618302,
"grad_norm": 1.3975715637207031,
"learning_rate": 6.664685702961344e-05,
"loss": 0.4425,
"step": 22
},
{
"epoch": 0.606760098928277,
"grad_norm": 1.2029556035995483,
"learning_rate": 6.377906449072578e-05,
"loss": 0.4378,
"step": 23
},
{
"epoch": 0.6331409727947238,
"grad_norm": 1.521054744720459,
"learning_rate": 6.087367864990233e-05,
"loss": 0.4887,
"step": 24
},
{
"epoch": 0.6595218466611706,
"grad_norm": 2.3948323726654053,
"learning_rate": 5.794314081535644e-05,
"loss": 0.5964,
"step": 25
},
{
"epoch": 0.6595218466611706,
"eval_loss": 0.5060842037200928,
"eval_runtime": 1.1542,
"eval_samples_per_second": 43.32,
"eval_steps_per_second": 11.263,
"step": 25
},
{
"epoch": 0.6859027205276175,
"grad_norm": 2.5826122760772705,
"learning_rate": 5.500000000000001e-05,
"loss": 0.6125,
"step": 26
},
{
"epoch": 0.7122835943940643,
"grad_norm": 3.505153179168701,
"learning_rate": 5.205685918464356e-05,
"loss": 0.6932,
"step": 27
},
{
"epoch": 0.7386644682605111,
"grad_norm": 2.812425136566162,
"learning_rate": 4.912632135009769e-05,
"loss": 0.6941,
"step": 28
},
{
"epoch": 0.765045342126958,
"grad_norm": 2.6993019580841064,
"learning_rate": 4.6220935509274235e-05,
"loss": 0.5275,
"step": 29
},
{
"epoch": 0.7914262159934048,
"grad_norm": 1.6319504976272583,
"learning_rate": 4.3353142970386564e-05,
"loss": 0.1855,
"step": 30
},
{
"epoch": 0.8178070898598516,
"grad_norm": 1.3986767530441284,
"learning_rate": 4.053522406135775e-05,
"loss": 0.1984,
"step": 31
},
{
"epoch": 0.8441879637262985,
"grad_norm": 1.2282105684280396,
"learning_rate": 3.777924554357096e-05,
"loss": 0.209,
"step": 32
},
{
"epoch": 0.8705688375927453,
"grad_norm": 0.9565277695655823,
"learning_rate": 3.509700894014496e-05,
"loss": 0.1846,
"step": 33
},
{
"epoch": 0.8969497114591921,
"grad_norm": 1.4811856746673584,
"learning_rate": 3.250000000000001e-05,
"loss": 0.2668,
"step": 34
},
{
"epoch": 0.9233305853256389,
"grad_norm": 1.6929315328598022,
"learning_rate": 2.9999339514117912e-05,
"loss": 0.3288,
"step": 35
},
{
"epoch": 0.9497114591920858,
"grad_norm": 2.3932266235351562,
"learning_rate": 2.760573569460757e-05,
"loss": 0.4036,
"step": 36
},
{
"epoch": 0.9760923330585326,
"grad_norm": 3.5589423179626465,
"learning_rate": 2.53294383204969e-05,
"loss": 0.4499,
"step": 37
},
{
"epoch": 1.012366034624897,
"grad_norm": 2.737474203109741,
"learning_rate": 2.3180194846605367e-05,
"loss": 0.4322,
"step": 38
},
{
"epoch": 1.0387469084913439,
"grad_norm": 1.1250501871109009,
"learning_rate": 2.1167208663446025e-05,
"loss": 0.1337,
"step": 39
},
{
"epoch": 1.0651277823577905,
"grad_norm": 0.7960028052330017,
"learning_rate": 1.9299099686894423e-05,
"loss": 0.085,
"step": 40
},
{
"epoch": 1.0915086562242373,
"grad_norm": 1.7320730686187744,
"learning_rate": 1.758386744638546e-05,
"loss": 0.0977,
"step": 41
},
{
"epoch": 1.1178895300906841,
"grad_norm": 1.0344929695129395,
"learning_rate": 1.602885682970026e-05,
"loss": 0.083,
"step": 42
},
{
"epoch": 1.144270403957131,
"grad_norm": 1.1064660549163818,
"learning_rate": 1.464072663102903e-05,
"loss": 0.1348,
"step": 43
},
{
"epoch": 1.1706512778235778,
"grad_norm": 1.4996426105499268,
"learning_rate": 1.3425421036992098e-05,
"loss": 0.1599,
"step": 44
},
{
"epoch": 1.1970321516900246,
"grad_norm": 1.5553085803985596,
"learning_rate": 1.2388144172720251e-05,
"loss": 0.2289,
"step": 45
},
{
"epoch": 1.2234130255564715,
"grad_norm": 2.5401673316955566,
"learning_rate": 1.1533337816991932e-05,
"loss": 0.3329,
"step": 46
},
{
"epoch": 1.2497938994229183,
"grad_norm": 2.7614471912384033,
"learning_rate": 1.0864662381854632e-05,
"loss": 0.2996,
"step": 47
},
{
"epoch": 1.2761747732893651,
"grad_norm": 2.3601768016815186,
"learning_rate": 1.0384981238178534e-05,
"loss": 0.1815,
"step": 48
},
{
"epoch": 1.302555647155812,
"grad_norm": 0.7562735080718994,
"learning_rate": 1.0096348454262845e-05,
"loss": 0.0601,
"step": 49
},
{
"epoch": 1.3289365210222588,
"grad_norm": 0.7278848886489868,
"learning_rate": 1e-05,
"loss": 0.0562,
"step": 50
},
{
"epoch": 1.3289365210222588,
"eval_loss": 0.14836329221725464,
"eval_runtime": 1.1577,
"eval_samples_per_second": 43.188,
"eval_steps_per_second": 11.229,
"step": 50
}
],
"logging_steps": 1,
"max_steps": 50,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 25,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.059536353886208e+17,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}