PEGASUS_mini / trainer_state.json
fatihfauzan26's picture
Upload 8 files
f96e89f verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 5.0,
"eval_steps": 500,
"global_step": 6250,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.08,
"grad_norm": 24.225746154785156,
"learning_rate": 1e-05,
"loss": 10.557,
"step": 100
},
{
"epoch": 0.16,
"grad_norm": 24.75214385986328,
"learning_rate": 2e-05,
"loss": 9.3749,
"step": 200
},
{
"epoch": 0.24,
"grad_norm": 21.598390579223633,
"learning_rate": 3e-05,
"loss": 8.3939,
"step": 300
},
{
"epoch": 0.32,
"grad_norm": 12.049657821655273,
"learning_rate": 4e-05,
"loss": 4.1354,
"step": 400
},
{
"epoch": 0.4,
"grad_norm": 5.240299224853516,
"learning_rate": 5e-05,
"loss": 0.9051,
"step": 500
},
{
"epoch": 0.48,
"grad_norm": 6.658221244812012,
"learning_rate": 4.91304347826087e-05,
"loss": 0.737,
"step": 600
},
{
"epoch": 0.56,
"grad_norm": 2.5896286964416504,
"learning_rate": 4.8260869565217394e-05,
"loss": 0.6799,
"step": 700
},
{
"epoch": 0.64,
"grad_norm": 6.020174026489258,
"learning_rate": 4.739130434782609e-05,
"loss": 0.6532,
"step": 800
},
{
"epoch": 0.72,
"grad_norm": 3.4233405590057373,
"learning_rate": 4.6521739130434785e-05,
"loss": 0.6223,
"step": 900
},
{
"epoch": 0.8,
"grad_norm": 3.2131102085113525,
"learning_rate": 4.565217391304348e-05,
"loss": 0.5963,
"step": 1000
},
{
"epoch": 0.88,
"grad_norm": 2.3392815589904785,
"learning_rate": 4.478260869565218e-05,
"loss": 0.6014,
"step": 1100
},
{
"epoch": 0.96,
"grad_norm": 1.9733258485794067,
"learning_rate": 4.391304347826087e-05,
"loss": 0.5765,
"step": 1200
},
{
"epoch": 1.04,
"grad_norm": 2.700756788253784,
"learning_rate": 4.304347826086957e-05,
"loss": 0.554,
"step": 1300
},
{
"epoch": 1.12,
"grad_norm": 5.853904724121094,
"learning_rate": 4.2173913043478264e-05,
"loss": 0.5718,
"step": 1400
},
{
"epoch": 1.2,
"grad_norm": 4.578104496002197,
"learning_rate": 4.130434782608696e-05,
"loss": 0.5471,
"step": 1500
},
{
"epoch": 1.28,
"grad_norm": 1.733497977256775,
"learning_rate": 4.0434782608695655e-05,
"loss": 0.5577,
"step": 1600
},
{
"epoch": 1.3599999999999999,
"grad_norm": 2.4556689262390137,
"learning_rate": 3.956521739130435e-05,
"loss": 0.5367,
"step": 1700
},
{
"epoch": 1.44,
"grad_norm": 1.4540470838546753,
"learning_rate": 3.869565217391305e-05,
"loss": 0.5277,
"step": 1800
},
{
"epoch": 1.52,
"grad_norm": 2.833214521408081,
"learning_rate": 3.7826086956521736e-05,
"loss": 0.5237,
"step": 1900
},
{
"epoch": 1.6,
"grad_norm": 1.5153348445892334,
"learning_rate": 3.695652173913043e-05,
"loss": 0.5183,
"step": 2000
},
{
"epoch": 1.6800000000000002,
"grad_norm": 1.163548231124878,
"learning_rate": 3.6086956521739134e-05,
"loss": 0.4998,
"step": 2100
},
{
"epoch": 1.76,
"grad_norm": 0.9056810736656189,
"learning_rate": 3.521739130434783e-05,
"loss": 0.5205,
"step": 2200
},
{
"epoch": 1.8399999999999999,
"grad_norm": 2.93011736869812,
"learning_rate": 3.4347826086956526e-05,
"loss": 0.5067,
"step": 2300
},
{
"epoch": 1.92,
"grad_norm": 1.668942928314209,
"learning_rate": 3.347826086956522e-05,
"loss": 0.5001,
"step": 2400
},
{
"epoch": 2.0,
"grad_norm": 8.79544448852539,
"learning_rate": 3.260869565217392e-05,
"loss": 0.4944,
"step": 2500
},
{
"epoch": 2.08,
"grad_norm": 2.793776035308838,
"learning_rate": 3.173913043478261e-05,
"loss": 0.462,
"step": 2600
},
{
"epoch": 2.16,
"grad_norm": 1.993105173110962,
"learning_rate": 3.086956521739131e-05,
"loss": 0.4893,
"step": 2700
},
{
"epoch": 2.24,
"grad_norm": 1.373683214187622,
"learning_rate": 3e-05,
"loss": 0.4884,
"step": 2800
},
{
"epoch": 2.32,
"grad_norm": 2.5384674072265625,
"learning_rate": 2.9130434782608696e-05,
"loss": 0.4839,
"step": 2900
},
{
"epoch": 2.4,
"grad_norm": 1.294622778892517,
"learning_rate": 2.826086956521739e-05,
"loss": 0.4824,
"step": 3000
},
{
"epoch": 2.48,
"grad_norm": 1.2775664329528809,
"learning_rate": 2.7391304347826085e-05,
"loss": 0.4675,
"step": 3100
},
{
"epoch": 2.56,
"grad_norm": 1.2705273628234863,
"learning_rate": 2.6521739130434787e-05,
"loss": 0.4785,
"step": 3200
},
{
"epoch": 2.64,
"grad_norm": 3.2393271923065186,
"learning_rate": 2.5652173913043483e-05,
"loss": 0.4728,
"step": 3300
},
{
"epoch": 2.7199999999999998,
"grad_norm": 1.82649827003479,
"learning_rate": 2.4782608695652175e-05,
"loss": 0.474,
"step": 3400
},
{
"epoch": 2.8,
"grad_norm": 1.2423534393310547,
"learning_rate": 2.391304347826087e-05,
"loss": 0.4651,
"step": 3500
},
{
"epoch": 2.88,
"grad_norm": 1.142115831375122,
"learning_rate": 2.3043478260869567e-05,
"loss": 0.4687,
"step": 3600
},
{
"epoch": 2.96,
"grad_norm": 2.1958296298980713,
"learning_rate": 2.2173913043478262e-05,
"loss": 0.456,
"step": 3700
},
{
"epoch": 3.04,
"grad_norm": 4.805281162261963,
"learning_rate": 2.1304347826086958e-05,
"loss": 0.4439,
"step": 3800
},
{
"epoch": 3.12,
"grad_norm": 23.097047805786133,
"learning_rate": 2.0434782608695654e-05,
"loss": 0.4393,
"step": 3900
},
{
"epoch": 3.2,
"grad_norm": 3.215237617492676,
"learning_rate": 1.956521739130435e-05,
"loss": 0.4443,
"step": 4000
},
{
"epoch": 3.2800000000000002,
"grad_norm": 1.1977019309997559,
"learning_rate": 1.8695652173913045e-05,
"loss": 0.4333,
"step": 4100
},
{
"epoch": 3.36,
"grad_norm": 1.6901699304580688,
"learning_rate": 1.782608695652174e-05,
"loss": 0.4678,
"step": 4200
},
{
"epoch": 3.44,
"grad_norm": 2.7421112060546875,
"learning_rate": 1.6956521739130433e-05,
"loss": 0.4529,
"step": 4300
},
{
"epoch": 3.52,
"grad_norm": 1.5662778615951538,
"learning_rate": 1.608695652173913e-05,
"loss": 0.4399,
"step": 4400
},
{
"epoch": 3.6,
"grad_norm": 2.1081831455230713,
"learning_rate": 1.5217391304347828e-05,
"loss": 0.4531,
"step": 4500
},
{
"epoch": 3.68,
"grad_norm": 1.103431224822998,
"learning_rate": 1.4347826086956522e-05,
"loss": 0.4401,
"step": 4600
},
{
"epoch": 3.76,
"grad_norm": 1.3851810693740845,
"learning_rate": 1.3478260869565218e-05,
"loss": 0.4401,
"step": 4700
},
{
"epoch": 3.84,
"grad_norm": 1.0307343006134033,
"learning_rate": 1.2608695652173914e-05,
"loss": 0.4424,
"step": 4800
},
{
"epoch": 3.92,
"grad_norm": 1.1726175546646118,
"learning_rate": 1.173913043478261e-05,
"loss": 0.4459,
"step": 4900
},
{
"epoch": 4.0,
"grad_norm": 1.542671799659729,
"learning_rate": 1.0869565217391305e-05,
"loss": 0.4389,
"step": 5000
},
{
"epoch": 4.08,
"grad_norm": 1.4532389640808105,
"learning_rate": 1e-05,
"loss": 0.4361,
"step": 5100
},
{
"epoch": 4.16,
"grad_norm": 3.246967077255249,
"learning_rate": 9.130434782608697e-06,
"loss": 0.4452,
"step": 5200
},
{
"epoch": 4.24,
"grad_norm": 0.9646230936050415,
"learning_rate": 8.26086956521739e-06,
"loss": 0.4239,
"step": 5300
},
{
"epoch": 4.32,
"grad_norm": 1.250227451324463,
"learning_rate": 7.391304347826088e-06,
"loss": 0.4218,
"step": 5400
},
{
"epoch": 4.4,
"grad_norm": 1.2563761472702026,
"learning_rate": 6.521739130434783e-06,
"loss": 0.431,
"step": 5500
},
{
"epoch": 4.48,
"grad_norm": 0.9907436370849609,
"learning_rate": 5.652173913043479e-06,
"loss": 0.4297,
"step": 5600
},
{
"epoch": 4.5600000000000005,
"grad_norm": 1.0406742095947266,
"learning_rate": 4.782608695652174e-06,
"loss": 0.4403,
"step": 5700
},
{
"epoch": 4.64,
"grad_norm": 1.2739229202270508,
"learning_rate": 3.91304347826087e-06,
"loss": 0.438,
"step": 5800
},
{
"epoch": 4.72,
"grad_norm": 1.569171667098999,
"learning_rate": 3.0434782608695654e-06,
"loss": 0.4199,
"step": 5900
},
{
"epoch": 4.8,
"grad_norm": 1.1578859090805054,
"learning_rate": 2.173913043478261e-06,
"loss": 0.4202,
"step": 6000
},
{
"epoch": 4.88,
"grad_norm": 1.2546783685684204,
"learning_rate": 1.3043478260869564e-06,
"loss": 0.4451,
"step": 6100
},
{
"epoch": 4.96,
"grad_norm": 0.965720534324646,
"learning_rate": 4.347826086956522e-07,
"loss": 0.4295,
"step": 6200
}
],
"logging_steps": 100,
"max_steps": 6250,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 1250,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.80591525888e+16,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}