chibao24's picture
Upload folder using huggingface_hub
8b89149 verified
raw
history blame
No virus
11.4 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 7.398694794354227,
"eval_steps": 750,
"global_step": 97500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.15176809834572771,
"grad_norm": 1.347341537475586,
"learning_rate": 1.3333333333333333e-05,
"loss": 2.0649,
"step": 2000
},
{
"epoch": 0.30353619669145543,
"grad_norm": 1.299472689628601,
"learning_rate": 1.984469638142569e-05,
"loss": 1.7903,
"step": 4000
},
{
"epoch": 0.45530429503718317,
"grad_norm": 1.5234107971191406,
"learning_rate": 1.9534089144277063e-05,
"loss": 1.6734,
"step": 6000
},
{
"epoch": 0.6070723933829109,
"grad_norm": 6.629986763000488,
"learning_rate": 1.922348190712844e-05,
"loss": 1.6014,
"step": 8000
},
{
"epoch": 0.7588404917286387,
"grad_norm": 1.8827601671218872,
"learning_rate": 1.8912874669979814e-05,
"loss": 1.5582,
"step": 10000
},
{
"epoch": 0.9106085900743663,
"grad_norm": 1.4713941812515259,
"learning_rate": 1.8602267432831186e-05,
"loss": 1.5393,
"step": 12000
},
{
"epoch": 1.0,
"eval_loss": 1.4226312637329102,
"eval_rouge1": 15.9571,
"eval_rouge2": 5.6803,
"eval_rougeL": 13.7071,
"eval_rougeLsum": 13.8127,
"eval_runtime": 167.9928,
"eval_samples_per_second": 29.763,
"eval_steps_per_second": 3.72,
"step": 13178
},
{
"epoch": 1.062376688420094,
"grad_norm": 1.397419810295105,
"learning_rate": 1.829166019568256e-05,
"loss": 1.5108,
"step": 14000
},
{
"epoch": 1.214144786765822,
"grad_norm": 1.282456398010254,
"learning_rate": 1.7981052958533937e-05,
"loss": 1.4931,
"step": 16000
},
{
"epoch": 1.3659128851115496,
"grad_norm": 1.5143241882324219,
"learning_rate": 1.767044572138531e-05,
"loss": 1.4687,
"step": 18000
},
{
"epoch": 1.5176809834572773,
"grad_norm": 1.0350826978683472,
"learning_rate": 1.7359838484236684e-05,
"loss": 1.4543,
"step": 20000
},
{
"epoch": 1.669449081803005,
"grad_norm": 1.2214738130569458,
"learning_rate": 1.704923124708806e-05,
"loss": 1.4466,
"step": 22000
},
{
"epoch": 1.8212171801487327,
"grad_norm": 1.226135492324829,
"learning_rate": 1.6738624009939432e-05,
"loss": 1.4314,
"step": 24000
},
{
"epoch": 1.9729852784944604,
"grad_norm": 1.2347540855407715,
"learning_rate": 1.6428016772790807e-05,
"loss": 1.4255,
"step": 26000
},
{
"epoch": 2.0,
"eval_loss": 1.3427051305770874,
"eval_rouge1": 16.3039,
"eval_rouge2": 6.0757,
"eval_rougeL": 14.0031,
"eval_rougeLsum": 14.1114,
"eval_runtime": 168.2423,
"eval_samples_per_second": 29.719,
"eval_steps_per_second": 3.715,
"step": 26356
},
{
"epoch": 2.124753376840188,
"grad_norm": 1.024688482284546,
"learning_rate": 1.6117409535642183e-05,
"loss": 1.4107,
"step": 28000
},
{
"epoch": 2.2765214751859157,
"grad_norm": 1.1046956777572632,
"learning_rate": 1.5806802298493555e-05,
"loss": 1.3999,
"step": 30000
},
{
"epoch": 2.428289573531644,
"grad_norm": 1.4118067026138306,
"learning_rate": 1.549619506134493e-05,
"loss": 1.394,
"step": 32000
},
{
"epoch": 2.5800576718773716,
"grad_norm": 1.0044879913330078,
"learning_rate": 1.5185587824196304e-05,
"loss": 1.3894,
"step": 34000
},
{
"epoch": 2.7318257702230992,
"grad_norm": 3.0287246704101562,
"learning_rate": 1.4874980587047681e-05,
"loss": 1.3823,
"step": 36000
},
{
"epoch": 2.883593868568827,
"grad_norm": 0.8824607133865356,
"learning_rate": 1.4564373349899055e-05,
"loss": 1.3747,
"step": 38000
},
{
"epoch": 3.0,
"eval_loss": 1.2928217649459839,
"eval_rouge1": 16.2852,
"eval_rouge2": 6.139,
"eval_rougeL": 14.0119,
"eval_rougeLsum": 14.1209,
"eval_runtime": 172.2837,
"eval_samples_per_second": 29.022,
"eval_steps_per_second": 3.628,
"step": 39534
},
{
"epoch": 3.0353619669145546,
"grad_norm": 1.7139147520065308,
"learning_rate": 1.4253766112750429e-05,
"loss": 1.365,
"step": 40000
},
{
"epoch": 3.1871300652602823,
"grad_norm": 0.9839210510253906,
"learning_rate": 1.3943158875601804e-05,
"loss": 1.3631,
"step": 42000
},
{
"epoch": 3.33889816360601,
"grad_norm": 2.022289514541626,
"learning_rate": 1.3632551638453178e-05,
"loss": 1.3497,
"step": 44000
},
{
"epoch": 3.4906662619517377,
"grad_norm": 4.369687080383301,
"learning_rate": 1.3321944401304551e-05,
"loss": 1.3536,
"step": 46000
},
{
"epoch": 3.6424343602974654,
"grad_norm": 1.046391487121582,
"learning_rate": 1.3011337164155927e-05,
"loss": 1.3455,
"step": 48000
},
{
"epoch": 3.794202458643193,
"grad_norm": 3.8603522777557373,
"learning_rate": 1.27007299270073e-05,
"loss": 1.3396,
"step": 50000
},
{
"epoch": 3.9459705569889207,
"grad_norm": 1.02574622631073,
"learning_rate": 1.2390122689858674e-05,
"loss": 1.3347,
"step": 52000
},
{
"epoch": 4.0,
"eval_loss": 1.2622406482696533,
"eval_rouge1": 16.4481,
"eval_rouge2": 6.2714,
"eval_rougeL": 14.1706,
"eval_rougeLsum": 14.2806,
"eval_runtime": 167.9003,
"eval_samples_per_second": 29.78,
"eval_steps_per_second": 3.722,
"step": 52712
},
{
"epoch": 4.097738655334648,
"grad_norm": 1.366310715675354,
"learning_rate": 1.2079515452710048e-05,
"loss": 1.3304,
"step": 54000
},
{
"epoch": 4.249506753680376,
"grad_norm": 1.1469073295593262,
"learning_rate": 1.1768908215561424e-05,
"loss": 1.3271,
"step": 56000
},
{
"epoch": 4.401274852026104,
"grad_norm": 1.0787475109100342,
"learning_rate": 1.1458300978412797e-05,
"loss": 1.3199,
"step": 58000
},
{
"epoch": 4.5530429503718315,
"grad_norm": 1.045688271522522,
"learning_rate": 1.1147693741264171e-05,
"loss": 1.3229,
"step": 60000
},
{
"epoch": 4.704811048717559,
"grad_norm": 1.0128060579299927,
"learning_rate": 1.0837086504115546e-05,
"loss": 1.3156,
"step": 62000
},
{
"epoch": 4.856579147063288,
"grad_norm": 1.1346766948699951,
"learning_rate": 1.052647926696692e-05,
"loss": 1.3186,
"step": 64000
},
{
"epoch": 5.0,
"eval_loss": 1.23964262008667,
"eval_rouge1": 16.5213,
"eval_rouge2": 6.4307,
"eval_rougeL": 14.289,
"eval_rougeLsum": 14.3853,
"eval_runtime": 170.4893,
"eval_samples_per_second": 29.327,
"eval_steps_per_second": 3.666,
"step": 65890
},
{
"epoch": 5.008347245409015,
"grad_norm": 1.000510334968567,
"learning_rate": 1.0215872029818294e-05,
"loss": 1.3074,
"step": 66000
},
{
"epoch": 5.160115343754743,
"grad_norm": 1.4083774089813232,
"learning_rate": 9.90526479266967e-06,
"loss": 1.3079,
"step": 68000
},
{
"epoch": 5.311883442100471,
"grad_norm": 1.065021276473999,
"learning_rate": 9.594657555521045e-06,
"loss": 1.3055,
"step": 70000
},
{
"epoch": 5.4636515404461985,
"grad_norm": 0.9340164065361023,
"learning_rate": 9.284050318372419e-06,
"loss": 1.305,
"step": 72000
},
{
"epoch": 5.615419638791926,
"grad_norm": 0.9457820653915405,
"learning_rate": 8.973443081223792e-06,
"loss": 1.3015,
"step": 74000
},
{
"epoch": 5.767187737137654,
"grad_norm": 0.8897130489349365,
"learning_rate": 8.662835844075168e-06,
"loss": 1.2985,
"step": 76000
},
{
"epoch": 5.9189558354833816,
"grad_norm": 1.2775472402572632,
"learning_rate": 8.352228606926543e-06,
"loss": 1.2973,
"step": 78000
},
{
"epoch": 6.0,
"eval_loss": 1.2244175672531128,
"eval_rouge1": 16.4244,
"eval_rouge2": 6.384,
"eval_rougeL": 14.2167,
"eval_rougeLsum": 14.3188,
"eval_runtime": 170.4105,
"eval_samples_per_second": 29.341,
"eval_steps_per_second": 3.668,
"step": 79068
},
{
"epoch": 6.070723933829109,
"grad_norm": 1.0759906768798828,
"learning_rate": 8.041621369777917e-06,
"loss": 1.2908,
"step": 80000
},
{
"epoch": 6.222492032174837,
"grad_norm": 1.315941333770752,
"learning_rate": 7.73101413262929e-06,
"loss": 1.2927,
"step": 82000
},
{
"epoch": 6.374260130520565,
"grad_norm": 0.936198353767395,
"learning_rate": 7.420406895480665e-06,
"loss": 1.2945,
"step": 84000
},
{
"epoch": 6.526028228866292,
"grad_norm": 1.233934998512268,
"learning_rate": 7.10979965833204e-06,
"loss": 1.285,
"step": 86000
},
{
"epoch": 6.67779632721202,
"grad_norm": 1.1760342121124268,
"learning_rate": 6.7991924211834135e-06,
"loss": 1.2877,
"step": 88000
},
{
"epoch": 6.829564425557748,
"grad_norm": 2.0586724281311035,
"learning_rate": 6.488585184034788e-06,
"loss": 1.283,
"step": 90000
},
{
"epoch": 6.981332523903475,
"grad_norm": 0.8292114734649658,
"learning_rate": 6.1779779468861636e-06,
"loss": 1.2817,
"step": 92000
},
{
"epoch": 7.0,
"eval_loss": 1.21384859085083,
"eval_rouge1": 16.5727,
"eval_rouge2": 6.4685,
"eval_rougeL": 14.3558,
"eval_rougeLsum": 14.4703,
"eval_runtime": 168.1885,
"eval_samples_per_second": 29.729,
"eval_steps_per_second": 3.716,
"step": 92246
},
{
"epoch": 7.133100622249203,
"grad_norm": 0.9742059111595154,
"learning_rate": 5.867370709737537e-06,
"loss": 1.2858,
"step": 94000
},
{
"epoch": 7.284868720594931,
"grad_norm": 1.0256426334381104,
"learning_rate": 5.556763472588912e-06,
"loss": 1.2804,
"step": 96000
}
],
"logging_steps": 2000,
"max_steps": 131780,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.0556092079249818e+17,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}