Sibinraj's picture
Upload folder using huggingface_hub
33ead4c verified
raw
history blame contribute delete
No virus
62.1 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 8.998716302952504,
"eval_steps": 500,
"global_step": 3505,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.025673940949935817,
"grad_norm": 5121380.5,
"learning_rate": 2.0000000000000003e-06,
"loss": 13.9863,
"step": 10
},
{
"epoch": 0.051347881899871634,
"grad_norm": 5153433.5,
"learning_rate": 4.000000000000001e-06,
"loss": 13.5554,
"step": 20
},
{
"epoch": 0.07702182284980745,
"grad_norm": 5378263.0,
"learning_rate": 6e-06,
"loss": 12.3429,
"step": 30
},
{
"epoch": 0.10269576379974327,
"grad_norm": 5853167.0,
"learning_rate": 8.000000000000001e-06,
"loss": 10.9424,
"step": 40
},
{
"epoch": 0.12836970474967907,
"grad_norm": 5810211.5,
"learning_rate": 1e-05,
"loss": 8.8056,
"step": 50
},
{
"epoch": 0.1540436456996149,
"grad_norm": 5421540.5,
"learning_rate": 1.2e-05,
"loss": 6.446,
"step": 60
},
{
"epoch": 0.1797175866495507,
"grad_norm": 3989913.0,
"learning_rate": 1.4000000000000001e-05,
"loss": 4.0088,
"step": 70
},
{
"epoch": 0.20539152759948653,
"grad_norm": 1593379.125,
"learning_rate": 1.6000000000000003e-05,
"loss": 2.4106,
"step": 80
},
{
"epoch": 0.23106546854942234,
"grad_norm": 351223.375,
"learning_rate": 1.8e-05,
"loss": 1.5032,
"step": 90
},
{
"epoch": 0.25673940949935814,
"grad_norm": 255315.6875,
"learning_rate": 2e-05,
"loss": 1.0421,
"step": 100
},
{
"epoch": 0.28241335044929394,
"grad_norm": 150451.453125,
"learning_rate": 2.2000000000000003e-05,
"loss": 0.6102,
"step": 110
},
{
"epoch": 0.3080872913992298,
"grad_norm": 44021.93359375,
"learning_rate": 2.4e-05,
"loss": 0.3687,
"step": 120
},
{
"epoch": 0.3337612323491656,
"grad_norm": 23825.30078125,
"learning_rate": 2.6000000000000002e-05,
"loss": 0.2745,
"step": 130
},
{
"epoch": 0.3594351732991014,
"grad_norm": 19372.984375,
"learning_rate": 2.8000000000000003e-05,
"loss": 0.233,
"step": 140
},
{
"epoch": 0.3851091142490372,
"grad_norm": 19907.76171875,
"learning_rate": 3e-05,
"loss": 0.2075,
"step": 150
},
{
"epoch": 0.41078305519897307,
"grad_norm": 23577.75390625,
"learning_rate": 3.2000000000000005e-05,
"loss": 0.197,
"step": 160
},
{
"epoch": 0.43645699614890887,
"grad_norm": 15442.431640625,
"learning_rate": 3.4000000000000007e-05,
"loss": 0.1759,
"step": 170
},
{
"epoch": 0.4621309370988447,
"grad_norm": 24589.8671875,
"learning_rate": 3.6e-05,
"loss": 0.1626,
"step": 180
},
{
"epoch": 0.4878048780487805,
"grad_norm": 11427.1728515625,
"learning_rate": 3.8e-05,
"loss": 0.1541,
"step": 190
},
{
"epoch": 0.5134788189987163,
"grad_norm": 10105.453125,
"learning_rate": 4e-05,
"loss": 0.1542,
"step": 200
},
{
"epoch": 0.5391527599486521,
"grad_norm": 9209.875,
"learning_rate": 4.2e-05,
"loss": 0.1497,
"step": 210
},
{
"epoch": 0.5648267008985879,
"grad_norm": 8737.0283203125,
"learning_rate": 4.4000000000000006e-05,
"loss": 0.1397,
"step": 220
},
{
"epoch": 0.5905006418485238,
"grad_norm": 28856.4609375,
"learning_rate": 4.600000000000001e-05,
"loss": 0.1466,
"step": 230
},
{
"epoch": 0.6161745827984596,
"grad_norm": 9787.6689453125,
"learning_rate": 4.8e-05,
"loss": 0.1358,
"step": 240
},
{
"epoch": 0.6418485237483954,
"grad_norm": 6893.62353515625,
"learning_rate": 5e-05,
"loss": 0.1265,
"step": 250
},
{
"epoch": 0.6675224646983312,
"grad_norm": 14635.2509765625,
"learning_rate": 5.2000000000000004e-05,
"loss": 0.1376,
"step": 260
},
{
"epoch": 0.693196405648267,
"grad_norm": 7151.79150390625,
"learning_rate": 5.4000000000000005e-05,
"loss": 0.1375,
"step": 270
},
{
"epoch": 0.7188703465982028,
"grad_norm": 6492.265625,
"learning_rate": 5.6000000000000006e-05,
"loss": 0.1271,
"step": 280
},
{
"epoch": 0.7445442875481386,
"grad_norm": 6572.00244140625,
"learning_rate": 5.8e-05,
"loss": 0.1334,
"step": 290
},
{
"epoch": 0.7702182284980744,
"grad_norm": 9629.9423828125,
"learning_rate": 6e-05,
"loss": 0.1384,
"step": 300
},
{
"epoch": 0.7958921694480102,
"grad_norm": 6990.53466796875,
"learning_rate": 6.2e-05,
"loss": 0.1349,
"step": 310
},
{
"epoch": 0.8215661103979461,
"grad_norm": 8195.203125,
"learning_rate": 6.400000000000001e-05,
"loss": 0.1231,
"step": 320
},
{
"epoch": 0.8472400513478819,
"grad_norm": 5189.6279296875,
"learning_rate": 6.6e-05,
"loss": 0.1274,
"step": 330
},
{
"epoch": 0.8729139922978177,
"grad_norm": 8494.017578125,
"learning_rate": 6.800000000000001e-05,
"loss": 0.1324,
"step": 340
},
{
"epoch": 0.8985879332477535,
"grad_norm": 6397.20166015625,
"learning_rate": 7e-05,
"loss": 0.1223,
"step": 350
},
{
"epoch": 0.9242618741976893,
"grad_norm": 5670.24072265625,
"learning_rate": 7.2e-05,
"loss": 0.129,
"step": 360
},
{
"epoch": 0.9499358151476252,
"grad_norm": 6880.46533203125,
"learning_rate": 7.4e-05,
"loss": 0.1261,
"step": 370
},
{
"epoch": 0.975609756097561,
"grad_norm": 5716.75537109375,
"learning_rate": 7.6e-05,
"loss": 0.1263,
"step": 380
},
{
"epoch": 1.0012836970474968,
"grad_norm": 7472.732421875,
"learning_rate": 7.800000000000001e-05,
"loss": 0.1229,
"step": 390
},
{
"epoch": 1.0269576379974326,
"grad_norm": 6848.66162109375,
"learning_rate": 8e-05,
"loss": 0.1236,
"step": 400
},
{
"epoch": 1.0526315789473684,
"grad_norm": 6818.19921875,
"learning_rate": 8.2e-05,
"loss": 0.1198,
"step": 410
},
{
"epoch": 1.0783055198973042,
"grad_norm": 6299.18115234375,
"learning_rate": 8.4e-05,
"loss": 0.127,
"step": 420
},
{
"epoch": 1.10397946084724,
"grad_norm": 5963.009765625,
"learning_rate": 8.6e-05,
"loss": 0.1228,
"step": 430
},
{
"epoch": 1.1296534017971758,
"grad_norm": 5657.501953125,
"learning_rate": 8.800000000000001e-05,
"loss": 0.1248,
"step": 440
},
{
"epoch": 1.1553273427471118,
"grad_norm": 15452.26953125,
"learning_rate": 9e-05,
"loss": 0.1237,
"step": 450
},
{
"epoch": 1.1810012836970474,
"grad_norm": 6293.9599609375,
"learning_rate": 9.200000000000001e-05,
"loss": 0.1218,
"step": 460
},
{
"epoch": 1.2066752246469834,
"grad_norm": 6370.39794921875,
"learning_rate": 9.4e-05,
"loss": 0.1191,
"step": 470
},
{
"epoch": 1.2323491655969192,
"grad_norm": 6101.39697265625,
"learning_rate": 9.6e-05,
"loss": 0.1161,
"step": 480
},
{
"epoch": 1.258023106546855,
"grad_norm": 6093.3916015625,
"learning_rate": 9.8e-05,
"loss": 0.1247,
"step": 490
},
{
"epoch": 1.2836970474967908,
"grad_norm": 4891.51171875,
"learning_rate": 0.0001,
"loss": 0.117,
"step": 500
},
{
"epoch": 1.2836970474967908,
"eval_gen_len": 38.948,
"eval_loss": 0.10336166620254517,
"eval_rouge1": 0.5729,
"eval_rouge2": 0.2787,
"eval_rougeL": 0.5355,
"eval_runtime": 65.3435,
"eval_samples_per_second": 7.652,
"eval_steps_per_second": 0.49,
"step": 500
},
{
"epoch": 1.3093709884467266,
"grad_norm": 4472.9599609375,
"learning_rate": 9.970501474926254e-05,
"loss": 0.1215,
"step": 510
},
{
"epoch": 1.3350449293966624,
"grad_norm": 5620.90576171875,
"learning_rate": 9.941002949852508e-05,
"loss": 0.1212,
"step": 520
},
{
"epoch": 1.3607188703465982,
"grad_norm": 8320.6953125,
"learning_rate": 9.911504424778762e-05,
"loss": 0.1145,
"step": 530
},
{
"epoch": 1.386392811296534,
"grad_norm": 6414.8359375,
"learning_rate": 9.882005899705014e-05,
"loss": 0.1248,
"step": 540
},
{
"epoch": 1.4120667522464698,
"grad_norm": 6748.93017578125,
"learning_rate": 9.85250737463127e-05,
"loss": 0.1167,
"step": 550
},
{
"epoch": 1.4377406931964056,
"grad_norm": 6198.98779296875,
"learning_rate": 9.823008849557522e-05,
"loss": 0.126,
"step": 560
},
{
"epoch": 1.4634146341463414,
"grad_norm": 6716.78515625,
"learning_rate": 9.793510324483777e-05,
"loss": 0.1173,
"step": 570
},
{
"epoch": 1.4890885750962772,
"grad_norm": 5211.44921875,
"learning_rate": 9.76401179941003e-05,
"loss": 0.1117,
"step": 580
},
{
"epoch": 1.514762516046213,
"grad_norm": 5593.43798828125,
"learning_rate": 9.734513274336283e-05,
"loss": 0.1113,
"step": 590
},
{
"epoch": 1.540436456996149,
"grad_norm": 6742.3544921875,
"learning_rate": 9.705014749262537e-05,
"loss": 0.1184,
"step": 600
},
{
"epoch": 1.5661103979460846,
"grad_norm": 5100.9228515625,
"learning_rate": 9.675516224188791e-05,
"loss": 0.1122,
"step": 610
},
{
"epoch": 1.5917843388960207,
"grad_norm": 6717.302734375,
"learning_rate": 9.646017699115044e-05,
"loss": 0.1211,
"step": 620
},
{
"epoch": 1.6174582798459562,
"grad_norm": 8748.5849609375,
"learning_rate": 9.616519174041299e-05,
"loss": 0.1198,
"step": 630
},
{
"epoch": 1.6431322207958923,
"grad_norm": 6425.04248046875,
"learning_rate": 9.587020648967551e-05,
"loss": 0.1146,
"step": 640
},
{
"epoch": 1.6688061617458279,
"grad_norm": 5612.4580078125,
"learning_rate": 9.557522123893806e-05,
"loss": 0.1113,
"step": 650
},
{
"epoch": 1.6944801026957639,
"grad_norm": 6821.66259765625,
"learning_rate": 9.528023598820059e-05,
"loss": 0.1144,
"step": 660
},
{
"epoch": 1.7201540436456995,
"grad_norm": 7305.1376953125,
"learning_rate": 9.498525073746313e-05,
"loss": 0.1139,
"step": 670
},
{
"epoch": 1.7458279845956355,
"grad_norm": 6363.8701171875,
"learning_rate": 9.469026548672566e-05,
"loss": 0.1121,
"step": 680
},
{
"epoch": 1.7715019255455713,
"grad_norm": 5113.75537109375,
"learning_rate": 9.43952802359882e-05,
"loss": 0.115,
"step": 690
},
{
"epoch": 1.797175866495507,
"grad_norm": 6163.470703125,
"learning_rate": 9.410029498525074e-05,
"loss": 0.1244,
"step": 700
},
{
"epoch": 1.822849807445443,
"grad_norm": 6645.03466796875,
"learning_rate": 9.380530973451328e-05,
"loss": 0.1168,
"step": 710
},
{
"epoch": 1.8485237483953787,
"grad_norm": 9238.5634765625,
"learning_rate": 9.351032448377582e-05,
"loss": 0.1198,
"step": 720
},
{
"epoch": 1.8741976893453145,
"grad_norm": 5459.36962890625,
"learning_rate": 9.321533923303836e-05,
"loss": 0.1175,
"step": 730
},
{
"epoch": 1.8998716302952503,
"grad_norm": 5480.2998046875,
"learning_rate": 9.29203539823009e-05,
"loss": 0.1115,
"step": 740
},
{
"epoch": 1.925545571245186,
"grad_norm": 5369.232421875,
"learning_rate": 9.262536873156342e-05,
"loss": 0.1126,
"step": 750
},
{
"epoch": 1.951219512195122,
"grad_norm": 5776.08154296875,
"learning_rate": 9.233038348082597e-05,
"loss": 0.1184,
"step": 760
},
{
"epoch": 1.976893453145058,
"grad_norm": 4656.33203125,
"learning_rate": 9.20353982300885e-05,
"loss": 0.1145,
"step": 770
},
{
"epoch": 2.0025673940949935,
"grad_norm": 6602.3056640625,
"learning_rate": 9.174041297935103e-05,
"loss": 0.126,
"step": 780
},
{
"epoch": 2.0282413350449295,
"grad_norm": 5742.654296875,
"learning_rate": 9.144542772861357e-05,
"loss": 0.1036,
"step": 790
},
{
"epoch": 2.053915275994865,
"grad_norm": 5546.1728515625,
"learning_rate": 9.115044247787611e-05,
"loss": 0.1077,
"step": 800
},
{
"epoch": 2.079589216944801,
"grad_norm": 6285.85693359375,
"learning_rate": 9.085545722713865e-05,
"loss": 0.1156,
"step": 810
},
{
"epoch": 2.1052631578947367,
"grad_norm": 5655.2763671875,
"learning_rate": 9.056047197640119e-05,
"loss": 0.1136,
"step": 820
},
{
"epoch": 2.1309370988446728,
"grad_norm": 5984.7080078125,
"learning_rate": 9.026548672566371e-05,
"loss": 0.1076,
"step": 830
},
{
"epoch": 2.1566110397946083,
"grad_norm": 5037.02490234375,
"learning_rate": 8.997050147492626e-05,
"loss": 0.1181,
"step": 840
},
{
"epoch": 2.1822849807445444,
"grad_norm": 5577.619140625,
"learning_rate": 8.96755162241888e-05,
"loss": 0.1147,
"step": 850
},
{
"epoch": 2.20795892169448,
"grad_norm": 4631.0732421875,
"learning_rate": 8.938053097345133e-05,
"loss": 0.1106,
"step": 860
},
{
"epoch": 2.233632862644416,
"grad_norm": 4876.57177734375,
"learning_rate": 8.908554572271388e-05,
"loss": 0.1099,
"step": 870
},
{
"epoch": 2.2593068035943515,
"grad_norm": 4877.7177734375,
"learning_rate": 8.87905604719764e-05,
"loss": 0.1134,
"step": 880
},
{
"epoch": 2.2849807445442876,
"grad_norm": 5603.54150390625,
"learning_rate": 8.849557522123895e-05,
"loss": 0.1091,
"step": 890
},
{
"epoch": 2.3106546854942236,
"grad_norm": 4900.8056640625,
"learning_rate": 8.820058997050148e-05,
"loss": 0.111,
"step": 900
},
{
"epoch": 2.336328626444159,
"grad_norm": 5891.17919921875,
"learning_rate": 8.790560471976402e-05,
"loss": 0.113,
"step": 910
},
{
"epoch": 2.3620025673940948,
"grad_norm": 6097.0400390625,
"learning_rate": 8.761061946902655e-05,
"loss": 0.1131,
"step": 920
},
{
"epoch": 2.387676508344031,
"grad_norm": 5553.201171875,
"learning_rate": 8.731563421828909e-05,
"loss": 0.105,
"step": 930
},
{
"epoch": 2.413350449293967,
"grad_norm": 4939.27392578125,
"learning_rate": 8.702064896755162e-05,
"loss": 0.115,
"step": 940
},
{
"epoch": 2.4390243902439024,
"grad_norm": 5012.8740234375,
"learning_rate": 8.672566371681417e-05,
"loss": 0.1095,
"step": 950
},
{
"epoch": 2.4646983311938384,
"grad_norm": 6639.900390625,
"learning_rate": 8.64306784660767e-05,
"loss": 0.1096,
"step": 960
},
{
"epoch": 2.490372272143774,
"grad_norm": 6572.72705078125,
"learning_rate": 8.613569321533924e-05,
"loss": 0.1091,
"step": 970
},
{
"epoch": 2.51604621309371,
"grad_norm": 4813.23291015625,
"learning_rate": 8.584070796460177e-05,
"loss": 0.1114,
"step": 980
},
{
"epoch": 2.5417201540436456,
"grad_norm": 6162.396484375,
"learning_rate": 8.554572271386431e-05,
"loss": 0.1101,
"step": 990
},
{
"epoch": 2.5673940949935816,
"grad_norm": 6070.53857421875,
"learning_rate": 8.525073746312685e-05,
"loss": 0.1103,
"step": 1000
},
{
"epoch": 2.5673940949935816,
"eval_gen_len": 38.946,
"eval_loss": 0.0969744473695755,
"eval_rouge1": 0.5903,
"eval_rouge2": 0.3078,
"eval_rougeL": 0.556,
"eval_runtime": 65.5669,
"eval_samples_per_second": 7.626,
"eval_steps_per_second": 0.488,
"step": 1000
},
{
"epoch": 2.593068035943517,
"grad_norm": 6648.20751953125,
"learning_rate": 8.495575221238938e-05,
"loss": 0.109,
"step": 1010
},
{
"epoch": 2.6187419768934532,
"grad_norm": 4565.7236328125,
"learning_rate": 8.466076696165192e-05,
"loss": 0.1062,
"step": 1020
},
{
"epoch": 2.644415917843389,
"grad_norm": 5601.0908203125,
"learning_rate": 8.436578171091446e-05,
"loss": 0.1099,
"step": 1030
},
{
"epoch": 2.670089858793325,
"grad_norm": 5187.435546875,
"learning_rate": 8.4070796460177e-05,
"loss": 0.1127,
"step": 1040
},
{
"epoch": 2.6957637997432604,
"grad_norm": 5046.6767578125,
"learning_rate": 8.377581120943954e-05,
"loss": 0.122,
"step": 1050
},
{
"epoch": 2.7214377406931964,
"grad_norm": 4885.3818359375,
"learning_rate": 8.348082595870208e-05,
"loss": 0.105,
"step": 1060
},
{
"epoch": 2.7471116816431325,
"grad_norm": 5473.66015625,
"learning_rate": 8.31858407079646e-05,
"loss": 0.1081,
"step": 1070
},
{
"epoch": 2.772785622593068,
"grad_norm": 6577.04931640625,
"learning_rate": 8.289085545722715e-05,
"loss": 0.1161,
"step": 1080
},
{
"epoch": 2.7984595635430036,
"grad_norm": 5628.3349609375,
"learning_rate": 8.259587020648968e-05,
"loss": 0.1057,
"step": 1090
},
{
"epoch": 2.8241335044929397,
"grad_norm": 5690.630859375,
"learning_rate": 8.230088495575221e-05,
"loss": 0.1116,
"step": 1100
},
{
"epoch": 2.8498074454428757,
"grad_norm": 4804.5458984375,
"learning_rate": 8.200589970501475e-05,
"loss": 0.103,
"step": 1110
},
{
"epoch": 2.8754813863928113,
"grad_norm": 4892.4580078125,
"learning_rate": 8.171091445427729e-05,
"loss": 0.1116,
"step": 1120
},
{
"epoch": 2.901155327342747,
"grad_norm": 5333.4130859375,
"learning_rate": 8.141592920353983e-05,
"loss": 0.106,
"step": 1130
},
{
"epoch": 2.926829268292683,
"grad_norm": 5062.69580078125,
"learning_rate": 8.112094395280237e-05,
"loss": 0.1086,
"step": 1140
},
{
"epoch": 2.952503209242619,
"grad_norm": 4429.697265625,
"learning_rate": 8.082595870206489e-05,
"loss": 0.1079,
"step": 1150
},
{
"epoch": 2.9781771501925545,
"grad_norm": 5827.998046875,
"learning_rate": 8.053097345132744e-05,
"loss": 0.1151,
"step": 1160
},
{
"epoch": 3.0038510911424905,
"grad_norm": 5520.6826171875,
"learning_rate": 8.023598820058997e-05,
"loss": 0.1063,
"step": 1170
},
{
"epoch": 3.029525032092426,
"grad_norm": 5321.1328125,
"learning_rate": 7.99410029498525e-05,
"loss": 0.1028,
"step": 1180
},
{
"epoch": 3.055198973042362,
"grad_norm": 6147.1103515625,
"learning_rate": 7.964601769911504e-05,
"loss": 0.1027,
"step": 1190
},
{
"epoch": 3.0808729139922977,
"grad_norm": 5837.15673828125,
"learning_rate": 7.935103244837758e-05,
"loss": 0.1129,
"step": 1200
},
{
"epoch": 3.1065468549422337,
"grad_norm": 5890.27490234375,
"learning_rate": 7.905604719764012e-05,
"loss": 0.105,
"step": 1210
},
{
"epoch": 3.1322207958921693,
"grad_norm": 5193.16259765625,
"learning_rate": 7.876106194690266e-05,
"loss": 0.1025,
"step": 1220
},
{
"epoch": 3.1578947368421053,
"grad_norm": 5144.00048828125,
"learning_rate": 7.84660766961652e-05,
"loss": 0.1053,
"step": 1230
},
{
"epoch": 3.183568677792041,
"grad_norm": 4484.66552734375,
"learning_rate": 7.817109144542774e-05,
"loss": 0.1111,
"step": 1240
},
{
"epoch": 3.209242618741977,
"grad_norm": 5307.7431640625,
"learning_rate": 7.787610619469027e-05,
"loss": 0.1088,
"step": 1250
},
{
"epoch": 3.2349165596919125,
"grad_norm": 5560.54736328125,
"learning_rate": 7.75811209439528e-05,
"loss": 0.1056,
"step": 1260
},
{
"epoch": 3.2605905006418485,
"grad_norm": 5743.2255859375,
"learning_rate": 7.728613569321535e-05,
"loss": 0.1057,
"step": 1270
},
{
"epoch": 3.2862644415917845,
"grad_norm": 5270.234375,
"learning_rate": 7.699115044247787e-05,
"loss": 0.1072,
"step": 1280
},
{
"epoch": 3.31193838254172,
"grad_norm": 5415.4580078125,
"learning_rate": 7.669616519174043e-05,
"loss": 0.1044,
"step": 1290
},
{
"epoch": 3.337612323491656,
"grad_norm": 5077.26123046875,
"learning_rate": 7.640117994100295e-05,
"loss": 0.1063,
"step": 1300
},
{
"epoch": 3.3632862644415917,
"grad_norm": 5815.587890625,
"learning_rate": 7.610619469026549e-05,
"loss": 0.104,
"step": 1310
},
{
"epoch": 3.3889602053915278,
"grad_norm": 5168.4140625,
"learning_rate": 7.581120943952803e-05,
"loss": 0.1057,
"step": 1320
},
{
"epoch": 3.4146341463414633,
"grad_norm": 6175.1962890625,
"learning_rate": 7.551622418879057e-05,
"loss": 0.1085,
"step": 1330
},
{
"epoch": 3.4403080872913994,
"grad_norm": 4866.1806640625,
"learning_rate": 7.522123893805309e-05,
"loss": 0.1088,
"step": 1340
},
{
"epoch": 3.465982028241335,
"grad_norm": 4987.28662109375,
"learning_rate": 7.492625368731564e-05,
"loss": 0.1144,
"step": 1350
},
{
"epoch": 3.491655969191271,
"grad_norm": 6046.6435546875,
"learning_rate": 7.463126843657817e-05,
"loss": 0.1066,
"step": 1360
},
{
"epoch": 3.5173299101412066,
"grad_norm": 6097.72802734375,
"learning_rate": 7.433628318584072e-05,
"loss": 0.1109,
"step": 1370
},
{
"epoch": 3.5430038510911426,
"grad_norm": 6237.98388671875,
"learning_rate": 7.404129793510324e-05,
"loss": 0.1061,
"step": 1380
},
{
"epoch": 3.568677792041078,
"grad_norm": 5106.3720703125,
"learning_rate": 7.374631268436578e-05,
"loss": 0.1052,
"step": 1390
},
{
"epoch": 3.594351732991014,
"grad_norm": 5083.40234375,
"learning_rate": 7.345132743362832e-05,
"loss": 0.1012,
"step": 1400
},
{
"epoch": 3.62002567394095,
"grad_norm": 5191.7353515625,
"learning_rate": 7.315634218289086e-05,
"loss": 0.1081,
"step": 1410
},
{
"epoch": 3.645699614890886,
"grad_norm": 6808.04638671875,
"learning_rate": 7.28613569321534e-05,
"loss": 0.1046,
"step": 1420
},
{
"epoch": 3.6713735558408214,
"grad_norm": 5069.12353515625,
"learning_rate": 7.256637168141593e-05,
"loss": 0.1018,
"step": 1430
},
{
"epoch": 3.6970474967907574,
"grad_norm": 5624.8330078125,
"learning_rate": 7.227138643067847e-05,
"loss": 0.1087,
"step": 1440
},
{
"epoch": 3.7227214377406934,
"grad_norm": 8308.7177734375,
"learning_rate": 7.197640117994101e-05,
"loss": 0.1045,
"step": 1450
},
{
"epoch": 3.748395378690629,
"grad_norm": 5380.990234375,
"learning_rate": 7.168141592920355e-05,
"loss": 0.1083,
"step": 1460
},
{
"epoch": 3.7740693196405646,
"grad_norm": 4756.90576171875,
"learning_rate": 7.138643067846607e-05,
"loss": 0.1058,
"step": 1470
},
{
"epoch": 3.7997432605905006,
"grad_norm": 5780.90625,
"learning_rate": 7.109144542772862e-05,
"loss": 0.1029,
"step": 1480
},
{
"epoch": 3.8254172015404366,
"grad_norm": 5286.49560546875,
"learning_rate": 7.079646017699115e-05,
"loss": 0.1029,
"step": 1490
},
{
"epoch": 3.851091142490372,
"grad_norm": 5398.28369140625,
"learning_rate": 7.050147492625369e-05,
"loss": 0.1086,
"step": 1500
},
{
"epoch": 3.851091142490372,
"eval_gen_len": 38.946,
"eval_loss": 0.09477131813764572,
"eval_rouge1": 0.5988,
"eval_rouge2": 0.3178,
"eval_rougeL": 0.5661,
"eval_runtime": 65.5594,
"eval_samples_per_second": 7.627,
"eval_steps_per_second": 0.488,
"step": 1500
},
{
"epoch": 3.8767650834403082,
"grad_norm": 4950.18212890625,
"learning_rate": 7.020648967551623e-05,
"loss": 0.1025,
"step": 1510
},
{
"epoch": 3.902439024390244,
"grad_norm": 4885.29248046875,
"learning_rate": 6.991150442477876e-05,
"loss": 0.1067,
"step": 1520
},
{
"epoch": 3.92811296534018,
"grad_norm": 6418.5791015625,
"learning_rate": 6.96165191740413e-05,
"loss": 0.1009,
"step": 1530
},
{
"epoch": 3.9537869062901154,
"grad_norm": 6914.34375,
"learning_rate": 6.932153392330384e-05,
"loss": 0.1085,
"step": 1540
},
{
"epoch": 3.9794608472400514,
"grad_norm": 5611.89306640625,
"learning_rate": 6.902654867256638e-05,
"loss": 0.1125,
"step": 1550
},
{
"epoch": 4.005134788189987,
"grad_norm": 4575.3046875,
"learning_rate": 6.873156342182892e-05,
"loss": 0.1037,
"step": 1560
},
{
"epoch": 4.030808729139923,
"grad_norm": 5809.431640625,
"learning_rate": 6.843657817109145e-05,
"loss": 0.0999,
"step": 1570
},
{
"epoch": 4.056482670089859,
"grad_norm": 6907.01025390625,
"learning_rate": 6.814159292035398e-05,
"loss": 0.1007,
"step": 1580
},
{
"epoch": 4.082156611039794,
"grad_norm": 6448.38330078125,
"learning_rate": 6.784660766961653e-05,
"loss": 0.11,
"step": 1590
},
{
"epoch": 4.10783055198973,
"grad_norm": 15915.1982421875,
"learning_rate": 6.755162241887906e-05,
"loss": 0.1106,
"step": 1600
},
{
"epoch": 4.133504492939666,
"grad_norm": 5690.85107421875,
"learning_rate": 6.725663716814161e-05,
"loss": 0.1037,
"step": 1610
},
{
"epoch": 4.159178433889602,
"grad_norm": 4913.6220703125,
"learning_rate": 6.696165191740413e-05,
"loss": 0.1052,
"step": 1620
},
{
"epoch": 4.184852374839538,
"grad_norm": 5320.2470703125,
"learning_rate": 6.666666666666667e-05,
"loss": 0.1018,
"step": 1630
},
{
"epoch": 4.2105263157894735,
"grad_norm": 6042.61376953125,
"learning_rate": 6.637168141592921e-05,
"loss": 0.1013,
"step": 1640
},
{
"epoch": 4.2362002567394095,
"grad_norm": 5034.08203125,
"learning_rate": 6.607669616519175e-05,
"loss": 0.1085,
"step": 1650
},
{
"epoch": 4.2618741976893455,
"grad_norm": 6053.23876953125,
"learning_rate": 6.578171091445427e-05,
"loss": 0.0986,
"step": 1660
},
{
"epoch": 4.2875481386392815,
"grad_norm": 5543.45556640625,
"learning_rate": 6.548672566371682e-05,
"loss": 0.0986,
"step": 1670
},
{
"epoch": 4.313222079589217,
"grad_norm": 6083.2236328125,
"learning_rate": 6.519174041297935e-05,
"loss": 0.1028,
"step": 1680
},
{
"epoch": 4.338896020539153,
"grad_norm": 5847.65087890625,
"learning_rate": 6.48967551622419e-05,
"loss": 0.1001,
"step": 1690
},
{
"epoch": 4.364569961489089,
"grad_norm": 5046.88623046875,
"learning_rate": 6.460176991150442e-05,
"loss": 0.102,
"step": 1700
},
{
"epoch": 4.390243902439025,
"grad_norm": 7761.01611328125,
"learning_rate": 6.430678466076696e-05,
"loss": 0.1017,
"step": 1710
},
{
"epoch": 4.41591784338896,
"grad_norm": 5590.93505859375,
"learning_rate": 6.40117994100295e-05,
"loss": 0.1058,
"step": 1720
},
{
"epoch": 4.441591784338896,
"grad_norm": 4478.46484375,
"learning_rate": 6.371681415929204e-05,
"loss": 0.0995,
"step": 1730
},
{
"epoch": 4.467265725288832,
"grad_norm": 6958.63720703125,
"learning_rate": 6.342182890855458e-05,
"loss": 0.1028,
"step": 1740
},
{
"epoch": 4.492939666238768,
"grad_norm": 5210.4853515625,
"learning_rate": 6.312684365781711e-05,
"loss": 0.1078,
"step": 1750
},
{
"epoch": 4.518613607188703,
"grad_norm": 4667.54345703125,
"learning_rate": 6.283185840707965e-05,
"loss": 0.1025,
"step": 1760
},
{
"epoch": 4.544287548138639,
"grad_norm": 5578.6943359375,
"learning_rate": 6.253687315634219e-05,
"loss": 0.1029,
"step": 1770
},
{
"epoch": 4.569961489088575,
"grad_norm": 6289.7841796875,
"learning_rate": 6.224188790560473e-05,
"loss": 0.1062,
"step": 1780
},
{
"epoch": 4.595635430038511,
"grad_norm": 5193.00244140625,
"learning_rate": 6.194690265486725e-05,
"loss": 0.1104,
"step": 1790
},
{
"epoch": 4.621309370988447,
"grad_norm": 5092.68408203125,
"learning_rate": 6.16519174041298e-05,
"loss": 0.0996,
"step": 1800
},
{
"epoch": 4.646983311938382,
"grad_norm": 5535.3857421875,
"learning_rate": 6.135693215339233e-05,
"loss": 0.1066,
"step": 1810
},
{
"epoch": 4.672657252888318,
"grad_norm": 6088.28515625,
"learning_rate": 6.106194690265487e-05,
"loss": 0.1031,
"step": 1820
},
{
"epoch": 4.698331193838254,
"grad_norm": 5986.71240234375,
"learning_rate": 6.0766961651917406e-05,
"loss": 0.1043,
"step": 1830
},
{
"epoch": 4.7240051347881895,
"grad_norm": 5196.69140625,
"learning_rate": 6.0471976401179945e-05,
"loss": 0.1035,
"step": 1840
},
{
"epoch": 4.7496790757381255,
"grad_norm": 5394.7138671875,
"learning_rate": 6.017699115044248e-05,
"loss": 0.1017,
"step": 1850
},
{
"epoch": 4.775353016688062,
"grad_norm": 5689.53173828125,
"learning_rate": 5.988200589970502e-05,
"loss": 0.107,
"step": 1860
},
{
"epoch": 4.801026957637998,
"grad_norm": 5098.4541015625,
"learning_rate": 5.958702064896755e-05,
"loss": 0.1032,
"step": 1870
},
{
"epoch": 4.826700898587934,
"grad_norm": 4243.0087890625,
"learning_rate": 5.92920353982301e-05,
"loss": 0.1017,
"step": 1880
},
{
"epoch": 4.852374839537869,
"grad_norm": 5340.123046875,
"learning_rate": 5.899705014749263e-05,
"loss": 0.0986,
"step": 1890
},
{
"epoch": 4.878048780487805,
"grad_norm": 5436.1259765625,
"learning_rate": 5.870206489675516e-05,
"loss": 0.1,
"step": 1900
},
{
"epoch": 4.903722721437741,
"grad_norm": 5866.4375,
"learning_rate": 5.8407079646017705e-05,
"loss": 0.104,
"step": 1910
},
{
"epoch": 4.929396662387677,
"grad_norm": 5687.0595703125,
"learning_rate": 5.8112094395280236e-05,
"loss": 0.1003,
"step": 1920
},
{
"epoch": 4.955070603337612,
"grad_norm": 5049.65869140625,
"learning_rate": 5.781710914454278e-05,
"loss": 0.1051,
"step": 1930
},
{
"epoch": 4.980744544287548,
"grad_norm": 4348.83251953125,
"learning_rate": 5.752212389380531e-05,
"loss": 0.1067,
"step": 1940
},
{
"epoch": 5.006418485237484,
"grad_norm": 5278.10498046875,
"learning_rate": 5.7227138643067844e-05,
"loss": 0.1028,
"step": 1950
},
{
"epoch": 5.03209242618742,
"grad_norm": 5227.31689453125,
"learning_rate": 5.693215339233039e-05,
"loss": 0.1015,
"step": 1960
},
{
"epoch": 5.057766367137355,
"grad_norm": 5626.7041015625,
"learning_rate": 5.663716814159292e-05,
"loss": 0.0953,
"step": 1970
},
{
"epoch": 5.083440308087291,
"grad_norm": 4941.787109375,
"learning_rate": 5.634218289085545e-05,
"loss": 0.1,
"step": 1980
},
{
"epoch": 5.109114249037227,
"grad_norm": 5543.74365234375,
"learning_rate": 5.6047197640118e-05,
"loss": 0.0975,
"step": 1990
},
{
"epoch": 5.134788189987163,
"grad_norm": 6526.22509765625,
"learning_rate": 5.575221238938053e-05,
"loss": 0.1046,
"step": 2000
},
{
"epoch": 5.134788189987163,
"eval_gen_len": 38.946,
"eval_loss": 0.09348437190055847,
"eval_rouge1": 0.6042,
"eval_rouge2": 0.3248,
"eval_rougeL": 0.5712,
"eval_runtime": 65.0847,
"eval_samples_per_second": 7.682,
"eval_steps_per_second": 0.492,
"step": 2000
},
{
"epoch": 5.160462130937099,
"grad_norm": 5070.046875,
"learning_rate": 5.545722713864307e-05,
"loss": 0.0981,
"step": 2010
},
{
"epoch": 5.186136071887034,
"grad_norm": 5264.22509765625,
"learning_rate": 5.5162241887905605e-05,
"loss": 0.1023,
"step": 2020
},
{
"epoch": 5.21181001283697,
"grad_norm": 10262.3994140625,
"learning_rate": 5.486725663716814e-05,
"loss": 0.1007,
"step": 2030
},
{
"epoch": 5.2374839537869065,
"grad_norm": 4638.310546875,
"learning_rate": 5.457227138643069e-05,
"loss": 0.1019,
"step": 2040
},
{
"epoch": 5.2631578947368425,
"grad_norm": 5691.34033203125,
"learning_rate": 5.427728613569322e-05,
"loss": 0.1048,
"step": 2050
},
{
"epoch": 5.288831835686778,
"grad_norm": 5892.60986328125,
"learning_rate": 5.398230088495575e-05,
"loss": 0.1002,
"step": 2060
},
{
"epoch": 5.314505776636714,
"grad_norm": 5043.25,
"learning_rate": 5.3687315634218295e-05,
"loss": 0.1026,
"step": 2070
},
{
"epoch": 5.34017971758665,
"grad_norm": 5076.90283203125,
"learning_rate": 5.339233038348083e-05,
"loss": 0.103,
"step": 2080
},
{
"epoch": 5.365853658536586,
"grad_norm": 5730.2998046875,
"learning_rate": 5.309734513274337e-05,
"loss": 0.0995,
"step": 2090
},
{
"epoch": 5.391527599486521,
"grad_norm": 5071.3759765625,
"learning_rate": 5.28023598820059e-05,
"loss": 0.1006,
"step": 2100
},
{
"epoch": 5.417201540436457,
"grad_norm": 4912.38134765625,
"learning_rate": 5.2507374631268435e-05,
"loss": 0.0965,
"step": 2110
},
{
"epoch": 5.442875481386393,
"grad_norm": 5349.1376953125,
"learning_rate": 5.221238938053098e-05,
"loss": 0.1016,
"step": 2120
},
{
"epoch": 5.468549422336329,
"grad_norm": 6012.4912109375,
"learning_rate": 5.191740412979351e-05,
"loss": 0.0985,
"step": 2130
},
{
"epoch": 5.494223363286264,
"grad_norm": 6078.17333984375,
"learning_rate": 5.162241887905604e-05,
"loss": 0.1001,
"step": 2140
},
{
"epoch": 5.5198973042362,
"grad_norm": 6352.015625,
"learning_rate": 5.132743362831859e-05,
"loss": 0.0995,
"step": 2150
},
{
"epoch": 5.545571245186136,
"grad_norm": 10780.03125,
"learning_rate": 5.103244837758112e-05,
"loss": 0.1037,
"step": 2160
},
{
"epoch": 5.571245186136072,
"grad_norm": 4540.59326171875,
"learning_rate": 5.0737463126843664e-05,
"loss": 0.1003,
"step": 2170
},
{
"epoch": 5.596919127086007,
"grad_norm": 5141.4697265625,
"learning_rate": 5.0442477876106195e-05,
"loss": 0.0993,
"step": 2180
},
{
"epoch": 5.622593068035943,
"grad_norm": 8023.310546875,
"learning_rate": 5.014749262536873e-05,
"loss": 0.0923,
"step": 2190
},
{
"epoch": 5.648267008985879,
"grad_norm": 6443.39404296875,
"learning_rate": 4.985250737463127e-05,
"loss": 0.1059,
"step": 2200
},
{
"epoch": 5.673940949935815,
"grad_norm": 4546.185546875,
"learning_rate": 4.955752212389381e-05,
"loss": 0.1027,
"step": 2210
},
{
"epoch": 5.699614890885751,
"grad_norm": 5331.25634765625,
"learning_rate": 4.926253687315635e-05,
"loss": 0.0994,
"step": 2220
},
{
"epoch": 5.7252888318356865,
"grad_norm": 5486.52587890625,
"learning_rate": 4.8967551622418886e-05,
"loss": 0.1097,
"step": 2230
},
{
"epoch": 5.7509627727856225,
"grad_norm": 5083.9794921875,
"learning_rate": 4.867256637168142e-05,
"loss": 0.0971,
"step": 2240
},
{
"epoch": 5.7766367137355585,
"grad_norm": 5799.4931640625,
"learning_rate": 4.8377581120943956e-05,
"loss": 0.103,
"step": 2250
},
{
"epoch": 5.802310654685495,
"grad_norm": 5407.1708984375,
"learning_rate": 4.8082595870206494e-05,
"loss": 0.1091,
"step": 2260
},
{
"epoch": 5.82798459563543,
"grad_norm": 4890.9697265625,
"learning_rate": 4.778761061946903e-05,
"loss": 0.1013,
"step": 2270
},
{
"epoch": 5.853658536585366,
"grad_norm": 5403.1416015625,
"learning_rate": 4.749262536873156e-05,
"loss": 0.1076,
"step": 2280
},
{
"epoch": 5.879332477535302,
"grad_norm": 5159.65234375,
"learning_rate": 4.71976401179941e-05,
"loss": 0.0994,
"step": 2290
},
{
"epoch": 5.905006418485238,
"grad_norm": 6055.45458984375,
"learning_rate": 4.690265486725664e-05,
"loss": 0.0998,
"step": 2300
},
{
"epoch": 5.930680359435174,
"grad_norm": 5306.44677734375,
"learning_rate": 4.660766961651918e-05,
"loss": 0.0995,
"step": 2310
},
{
"epoch": 5.956354300385109,
"grad_norm": 5193.0009765625,
"learning_rate": 4.631268436578171e-05,
"loss": 0.1011,
"step": 2320
},
{
"epoch": 5.982028241335045,
"grad_norm": 6859.47509765625,
"learning_rate": 4.601769911504425e-05,
"loss": 0.1043,
"step": 2330
},
{
"epoch": 6.007702182284981,
"grad_norm": 4973.0458984375,
"learning_rate": 4.5722713864306786e-05,
"loss": 0.1002,
"step": 2340
},
{
"epoch": 6.033376123234916,
"grad_norm": 5652.50439453125,
"learning_rate": 4.5427728613569324e-05,
"loss": 0.0994,
"step": 2350
},
{
"epoch": 6.059050064184852,
"grad_norm": 6935.865234375,
"learning_rate": 4.5132743362831855e-05,
"loss": 0.0998,
"step": 2360
},
{
"epoch": 6.084724005134788,
"grad_norm": 4675.81982421875,
"learning_rate": 4.48377581120944e-05,
"loss": 0.1014,
"step": 2370
},
{
"epoch": 6.110397946084724,
"grad_norm": 4515.3134765625,
"learning_rate": 4.454277286135694e-05,
"loss": 0.0968,
"step": 2380
},
{
"epoch": 6.13607188703466,
"grad_norm": 5213.7578125,
"learning_rate": 4.4247787610619477e-05,
"loss": 0.0987,
"step": 2390
},
{
"epoch": 6.161745827984595,
"grad_norm": 5425.05615234375,
"learning_rate": 4.395280235988201e-05,
"loss": 0.102,
"step": 2400
},
{
"epoch": 6.187419768934531,
"grad_norm": 4345.66552734375,
"learning_rate": 4.3657817109144546e-05,
"loss": 0.0978,
"step": 2410
},
{
"epoch": 6.213093709884467,
"grad_norm": 5057.90087890625,
"learning_rate": 4.3362831858407084e-05,
"loss": 0.1011,
"step": 2420
},
{
"epoch": 6.238767650834403,
"grad_norm": 6916.2607421875,
"learning_rate": 4.306784660766962e-05,
"loss": 0.1023,
"step": 2430
},
{
"epoch": 6.264441591784339,
"grad_norm": 6013.05126953125,
"learning_rate": 4.2772861356932154e-05,
"loss": 0.0995,
"step": 2440
},
{
"epoch": 6.290115532734275,
"grad_norm": 4742.91357421875,
"learning_rate": 4.247787610619469e-05,
"loss": 0.0974,
"step": 2450
},
{
"epoch": 6.315789473684211,
"grad_norm": 4979.93115234375,
"learning_rate": 4.218289085545723e-05,
"loss": 0.1019,
"step": 2460
},
{
"epoch": 6.341463414634147,
"grad_norm": 5349.9130859375,
"learning_rate": 4.188790560471977e-05,
"loss": 0.1027,
"step": 2470
},
{
"epoch": 6.367137355584082,
"grad_norm": 5003.3203125,
"learning_rate": 4.15929203539823e-05,
"loss": 0.1038,
"step": 2480
},
{
"epoch": 6.392811296534018,
"grad_norm": 5897.6796875,
"learning_rate": 4.129793510324484e-05,
"loss": 0.0998,
"step": 2490
},
{
"epoch": 6.418485237483954,
"grad_norm": 5018.42138671875,
"learning_rate": 4.1002949852507376e-05,
"loss": 0.0962,
"step": 2500
},
{
"epoch": 6.418485237483954,
"eval_gen_len": 38.946,
"eval_loss": 0.09220927208662033,
"eval_rouge1": 0.6077,
"eval_rouge2": 0.3279,
"eval_rougeL": 0.5755,
"eval_runtime": 65.0093,
"eval_samples_per_second": 7.691,
"eval_steps_per_second": 0.492,
"step": 2500
},
{
"epoch": 6.44415917843389,
"grad_norm": 6092.09375,
"learning_rate": 4.0707964601769914e-05,
"loss": 0.0929,
"step": 2510
},
{
"epoch": 6.469833119383825,
"grad_norm": 6269.76171875,
"learning_rate": 4.0412979351032446e-05,
"loss": 0.0972,
"step": 2520
},
{
"epoch": 6.495507060333761,
"grad_norm": 4338.68896484375,
"learning_rate": 4.0117994100294984e-05,
"loss": 0.1009,
"step": 2530
},
{
"epoch": 6.521181001283697,
"grad_norm": 4670.00537109375,
"learning_rate": 3.982300884955752e-05,
"loss": 0.1032,
"step": 2540
},
{
"epoch": 6.546854942233633,
"grad_norm": 5199.564453125,
"learning_rate": 3.952802359882006e-05,
"loss": 0.0977,
"step": 2550
},
{
"epoch": 6.572528883183569,
"grad_norm": 6262.904296875,
"learning_rate": 3.92330383480826e-05,
"loss": 0.0966,
"step": 2560
},
{
"epoch": 6.598202824133504,
"grad_norm": 7214.66748046875,
"learning_rate": 3.893805309734514e-05,
"loss": 0.0942,
"step": 2570
},
{
"epoch": 6.62387676508344,
"grad_norm": 5746.22705078125,
"learning_rate": 3.8643067846607675e-05,
"loss": 0.0946,
"step": 2580
},
{
"epoch": 6.649550706033376,
"grad_norm": 6876.60986328125,
"learning_rate": 3.834808259587021e-05,
"loss": 0.1031,
"step": 2590
},
{
"epoch": 6.675224646983312,
"grad_norm": 5216.8642578125,
"learning_rate": 3.8053097345132744e-05,
"loss": 0.0984,
"step": 2600
},
{
"epoch": 6.700898587933247,
"grad_norm": 5965.8583984375,
"learning_rate": 3.775811209439528e-05,
"loss": 0.099,
"step": 2610
},
{
"epoch": 6.7265725288831835,
"grad_norm": 7099.044921875,
"learning_rate": 3.746312684365782e-05,
"loss": 0.1052,
"step": 2620
},
{
"epoch": 6.7522464698331195,
"grad_norm": 4748.5703125,
"learning_rate": 3.716814159292036e-05,
"loss": 0.1045,
"step": 2630
},
{
"epoch": 6.7779204107830555,
"grad_norm": 5743.19921875,
"learning_rate": 3.687315634218289e-05,
"loss": 0.0937,
"step": 2640
},
{
"epoch": 6.803594351732991,
"grad_norm": 5680.45068359375,
"learning_rate": 3.657817109144543e-05,
"loss": 0.0965,
"step": 2650
},
{
"epoch": 6.829268292682927,
"grad_norm": 7245.03564453125,
"learning_rate": 3.628318584070797e-05,
"loss": 0.0909,
"step": 2660
},
{
"epoch": 6.854942233632863,
"grad_norm": 5226.4365234375,
"learning_rate": 3.5988200589970505e-05,
"loss": 0.0987,
"step": 2670
},
{
"epoch": 6.880616174582799,
"grad_norm": 5511.99853515625,
"learning_rate": 3.5693215339233036e-05,
"loss": 0.1066,
"step": 2680
},
{
"epoch": 6.906290115532734,
"grad_norm": 5711.359375,
"learning_rate": 3.5398230088495574e-05,
"loss": 0.0995,
"step": 2690
},
{
"epoch": 6.93196405648267,
"grad_norm": 5092.283203125,
"learning_rate": 3.510324483775811e-05,
"loss": 0.0971,
"step": 2700
},
{
"epoch": 6.957637997432606,
"grad_norm": 6100.78271484375,
"learning_rate": 3.480825958702065e-05,
"loss": 0.1009,
"step": 2710
},
{
"epoch": 6.983311938382542,
"grad_norm": 5600.61181640625,
"learning_rate": 3.451327433628319e-05,
"loss": 0.1026,
"step": 2720
},
{
"epoch": 7.008985879332478,
"grad_norm": 5000.9541015625,
"learning_rate": 3.421828908554573e-05,
"loss": 0.101,
"step": 2730
},
{
"epoch": 7.034659820282413,
"grad_norm": 5288.25048828125,
"learning_rate": 3.3923303834808265e-05,
"loss": 0.0961,
"step": 2740
},
{
"epoch": 7.060333761232349,
"grad_norm": 5404.33837890625,
"learning_rate": 3.3628318584070804e-05,
"loss": 0.1074,
"step": 2750
},
{
"epoch": 7.086007702182285,
"grad_norm": 4586.51708984375,
"learning_rate": 3.3333333333333335e-05,
"loss": 0.0978,
"step": 2760
},
{
"epoch": 7.111681643132221,
"grad_norm": 5383.466796875,
"learning_rate": 3.303834808259587e-05,
"loss": 0.0983,
"step": 2770
},
{
"epoch": 7.137355584082156,
"grad_norm": 5845.02294921875,
"learning_rate": 3.274336283185841e-05,
"loss": 0.0922,
"step": 2780
},
{
"epoch": 7.163029525032092,
"grad_norm": 5654.388671875,
"learning_rate": 3.244837758112095e-05,
"loss": 0.0941,
"step": 2790
},
{
"epoch": 7.188703465982028,
"grad_norm": 5832.42724609375,
"learning_rate": 3.215339233038348e-05,
"loss": 0.0949,
"step": 2800
},
{
"epoch": 7.214377406931964,
"grad_norm": 5299.91015625,
"learning_rate": 3.185840707964602e-05,
"loss": 0.0988,
"step": 2810
},
{
"epoch": 7.2400513478818995,
"grad_norm": 5652.5087890625,
"learning_rate": 3.156342182890856e-05,
"loss": 0.0982,
"step": 2820
},
{
"epoch": 7.2657252888318355,
"grad_norm": 6181.0361328125,
"learning_rate": 3.1268436578171095e-05,
"loss": 0.098,
"step": 2830
},
{
"epoch": 7.291399229781772,
"grad_norm": 13162.5078125,
"learning_rate": 3.097345132743363e-05,
"loss": 0.0951,
"step": 2840
},
{
"epoch": 7.317073170731708,
"grad_norm": 5318.009765625,
"learning_rate": 3.0678466076696165e-05,
"loss": 0.0988,
"step": 2850
},
{
"epoch": 7.342747111681643,
"grad_norm": 5820.310546875,
"learning_rate": 3.0383480825958703e-05,
"loss": 0.0983,
"step": 2860
},
{
"epoch": 7.368421052631579,
"grad_norm": 5990.56640625,
"learning_rate": 3.008849557522124e-05,
"loss": 0.0982,
"step": 2870
},
{
"epoch": 7.394094993581515,
"grad_norm": 5594.0703125,
"learning_rate": 2.9793510324483776e-05,
"loss": 0.0974,
"step": 2880
},
{
"epoch": 7.419768934531451,
"grad_norm": 6317.15234375,
"learning_rate": 2.9498525073746314e-05,
"loss": 0.0932,
"step": 2890
},
{
"epoch": 7.445442875481387,
"grad_norm": 8022.15185546875,
"learning_rate": 2.9203539823008852e-05,
"loss": 0.1041,
"step": 2900
},
{
"epoch": 7.471116816431322,
"grad_norm": 5091.68310546875,
"learning_rate": 2.890855457227139e-05,
"loss": 0.0995,
"step": 2910
},
{
"epoch": 7.496790757381258,
"grad_norm": 6386.40869140625,
"learning_rate": 2.8613569321533922e-05,
"loss": 0.0964,
"step": 2920
},
{
"epoch": 7.522464698331194,
"grad_norm": 4850.58203125,
"learning_rate": 2.831858407079646e-05,
"loss": 0.1063,
"step": 2930
},
{
"epoch": 7.548138639281129,
"grad_norm": 6846.75146484375,
"learning_rate": 2.8023598820059e-05,
"loss": 0.102,
"step": 2940
},
{
"epoch": 7.573812580231065,
"grad_norm": 5613.95166015625,
"learning_rate": 2.7728613569321537e-05,
"loss": 0.0977,
"step": 2950
},
{
"epoch": 7.599486521181001,
"grad_norm": 5055.47705078125,
"learning_rate": 2.743362831858407e-05,
"loss": 0.0937,
"step": 2960
},
{
"epoch": 7.625160462130937,
"grad_norm": 5020.2568359375,
"learning_rate": 2.713864306784661e-05,
"loss": 0.0978,
"step": 2970
},
{
"epoch": 7.650834403080873,
"grad_norm": 5974.265625,
"learning_rate": 2.6843657817109148e-05,
"loss": 0.098,
"step": 2980
},
{
"epoch": 7.676508344030808,
"grad_norm": 6458.8662109375,
"learning_rate": 2.6548672566371686e-05,
"loss": 0.0964,
"step": 2990
},
{
"epoch": 7.702182284980744,
"grad_norm": 5247.0791015625,
"learning_rate": 2.6253687315634217e-05,
"loss": 0.1029,
"step": 3000
},
{
"epoch": 7.702182284980744,
"eval_gen_len": 38.946,
"eval_loss": 0.09166006743907928,
"eval_rouge1": 0.6133,
"eval_rouge2": 0.3322,
"eval_rougeL": 0.5794,
"eval_runtime": 65.3195,
"eval_samples_per_second": 7.655,
"eval_steps_per_second": 0.49,
"step": 3000
},
{
"epoch": 7.7278562259306804,
"grad_norm": 9114.529296875,
"learning_rate": 2.5958702064896756e-05,
"loss": 0.0958,
"step": 3010
},
{
"epoch": 7.7535301668806165,
"grad_norm": 4675.69384765625,
"learning_rate": 2.5663716814159294e-05,
"loss": 0.0967,
"step": 3020
},
{
"epoch": 7.779204107830552,
"grad_norm": 5986.85546875,
"learning_rate": 2.5368731563421832e-05,
"loss": 0.0963,
"step": 3030
},
{
"epoch": 7.804878048780488,
"grad_norm": 5686.59716796875,
"learning_rate": 2.5073746312684367e-05,
"loss": 0.1,
"step": 3040
},
{
"epoch": 7.830551989730424,
"grad_norm": 4628.58447265625,
"learning_rate": 2.4778761061946905e-05,
"loss": 0.0971,
"step": 3050
},
{
"epoch": 7.85622593068036,
"grad_norm": 4568.95068359375,
"learning_rate": 2.4483775811209443e-05,
"loss": 0.0995,
"step": 3060
},
{
"epoch": 7.881899871630296,
"grad_norm": 5026.5517578125,
"learning_rate": 2.4188790560471978e-05,
"loss": 0.0997,
"step": 3070
},
{
"epoch": 7.907573812580231,
"grad_norm": 5142.33544921875,
"learning_rate": 2.3893805309734516e-05,
"loss": 0.0989,
"step": 3080
},
{
"epoch": 7.933247753530167,
"grad_norm": 4715.99169921875,
"learning_rate": 2.359882005899705e-05,
"loss": 0.0982,
"step": 3090
},
{
"epoch": 7.958921694480103,
"grad_norm": 7074.0263671875,
"learning_rate": 2.330383480825959e-05,
"loss": 0.0936,
"step": 3100
},
{
"epoch": 7.984595635430038,
"grad_norm": 5483.7958984375,
"learning_rate": 2.3008849557522124e-05,
"loss": 0.1055,
"step": 3110
},
{
"epoch": 8.010269576379974,
"grad_norm": 21462.302734375,
"learning_rate": 2.2713864306784662e-05,
"loss": 0.097,
"step": 3120
},
{
"epoch": 8.03594351732991,
"grad_norm": 5375.9345703125,
"learning_rate": 2.24188790560472e-05,
"loss": 0.0945,
"step": 3130
},
{
"epoch": 8.061617458279846,
"grad_norm": 5927.3203125,
"learning_rate": 2.2123893805309738e-05,
"loss": 0.0919,
"step": 3140
},
{
"epoch": 8.087291399229782,
"grad_norm": 4952.16064453125,
"learning_rate": 2.1828908554572273e-05,
"loss": 0.1023,
"step": 3150
},
{
"epoch": 8.112965340179718,
"grad_norm": 4753.6865234375,
"learning_rate": 2.153392330383481e-05,
"loss": 0.1038,
"step": 3160
},
{
"epoch": 8.138639281129654,
"grad_norm": 4751.951171875,
"learning_rate": 2.1238938053097346e-05,
"loss": 0.0953,
"step": 3170
},
{
"epoch": 8.164313222079588,
"grad_norm": 5049.07470703125,
"learning_rate": 2.0943952802359884e-05,
"loss": 0.0995,
"step": 3180
},
{
"epoch": 8.189987163029524,
"grad_norm": 5914.5595703125,
"learning_rate": 2.064896755162242e-05,
"loss": 0.0994,
"step": 3190
},
{
"epoch": 8.21566110397946,
"grad_norm": 5663.07568359375,
"learning_rate": 2.0353982300884957e-05,
"loss": 0.099,
"step": 3200
},
{
"epoch": 8.241335044929397,
"grad_norm": 5172.39208984375,
"learning_rate": 2.0058997050147492e-05,
"loss": 0.1056,
"step": 3210
},
{
"epoch": 8.267008985879333,
"grad_norm": 4296.75732421875,
"learning_rate": 1.976401179941003e-05,
"loss": 0.0917,
"step": 3220
},
{
"epoch": 8.292682926829269,
"grad_norm": 5991.23046875,
"learning_rate": 1.946902654867257e-05,
"loss": 0.1005,
"step": 3230
},
{
"epoch": 8.318356867779205,
"grad_norm": 4786.93017578125,
"learning_rate": 1.9174041297935107e-05,
"loss": 0.1013,
"step": 3240
},
{
"epoch": 8.34403080872914,
"grad_norm": 5587.01416015625,
"learning_rate": 1.887905604719764e-05,
"loss": 0.1061,
"step": 3250
},
{
"epoch": 8.369704749679077,
"grad_norm": 5002.3935546875,
"learning_rate": 1.858407079646018e-05,
"loss": 0.0981,
"step": 3260
},
{
"epoch": 8.39537869062901,
"grad_norm": 5792.34814453125,
"learning_rate": 1.8289085545722714e-05,
"loss": 0.0982,
"step": 3270
},
{
"epoch": 8.421052631578947,
"grad_norm": 5482.14501953125,
"learning_rate": 1.7994100294985252e-05,
"loss": 0.0977,
"step": 3280
},
{
"epoch": 8.446726572528883,
"grad_norm": 5414.59326171875,
"learning_rate": 1.7699115044247787e-05,
"loss": 0.0959,
"step": 3290
},
{
"epoch": 8.472400513478819,
"grad_norm": 6676.62548828125,
"learning_rate": 1.7404129793510325e-05,
"loss": 0.0964,
"step": 3300
},
{
"epoch": 8.498074454428755,
"grad_norm": 5211.7705078125,
"learning_rate": 1.7109144542772864e-05,
"loss": 0.0936,
"step": 3310
},
{
"epoch": 8.523748395378691,
"grad_norm": 5187.91015625,
"learning_rate": 1.6814159292035402e-05,
"loss": 0.092,
"step": 3320
},
{
"epoch": 8.549422336328627,
"grad_norm": 16135.931640625,
"learning_rate": 1.6519174041297937e-05,
"loss": 0.093,
"step": 3330
},
{
"epoch": 8.575096277278563,
"grad_norm": 5429.2236328125,
"learning_rate": 1.6224188790560475e-05,
"loss": 0.0957,
"step": 3340
},
{
"epoch": 8.600770218228497,
"grad_norm": 5034.25732421875,
"learning_rate": 1.592920353982301e-05,
"loss": 0.0997,
"step": 3350
},
{
"epoch": 8.626444159178433,
"grad_norm": 6611.349609375,
"learning_rate": 1.5634218289085548e-05,
"loss": 0.0963,
"step": 3360
},
{
"epoch": 8.65211810012837,
"grad_norm": 5671.7568359375,
"learning_rate": 1.5339233038348082e-05,
"loss": 0.1065,
"step": 3370
},
{
"epoch": 8.677792041078305,
"grad_norm": 8826.3564453125,
"learning_rate": 1.504424778761062e-05,
"loss": 0.0972,
"step": 3380
},
{
"epoch": 8.703465982028241,
"grad_norm": 5669.00439453125,
"learning_rate": 1.4749262536873157e-05,
"loss": 0.0995,
"step": 3390
},
{
"epoch": 8.729139922978177,
"grad_norm": 7719.87353515625,
"learning_rate": 1.4454277286135695e-05,
"loss": 0.0898,
"step": 3400
},
{
"epoch": 8.754813863928113,
"grad_norm": 5668.51953125,
"learning_rate": 1.415929203539823e-05,
"loss": 0.1028,
"step": 3410
},
{
"epoch": 8.78048780487805,
"grad_norm": 5719.044921875,
"learning_rate": 1.3864306784660768e-05,
"loss": 0.094,
"step": 3420
},
{
"epoch": 8.806161745827985,
"grad_norm": 6085.166015625,
"learning_rate": 1.3569321533923305e-05,
"loss": 0.0938,
"step": 3430
},
{
"epoch": 8.83183568677792,
"grad_norm": 5559.7431640625,
"learning_rate": 1.3274336283185843e-05,
"loss": 0.0962,
"step": 3440
},
{
"epoch": 8.857509627727856,
"grad_norm": 7504.08349609375,
"learning_rate": 1.2979351032448378e-05,
"loss": 0.0964,
"step": 3450
},
{
"epoch": 8.883183568677792,
"grad_norm": 6102.48486328125,
"learning_rate": 1.2684365781710916e-05,
"loss": 0.0967,
"step": 3460
},
{
"epoch": 8.908857509627728,
"grad_norm": 5322.08251953125,
"learning_rate": 1.2389380530973452e-05,
"loss": 0.0952,
"step": 3470
},
{
"epoch": 8.934531450577664,
"grad_norm": 5769.94091796875,
"learning_rate": 1.2094395280235989e-05,
"loss": 0.0957,
"step": 3480
},
{
"epoch": 8.9602053915276,
"grad_norm": 4727.4755859375,
"learning_rate": 1.1799410029498525e-05,
"loss": 0.0942,
"step": 3490
},
{
"epoch": 8.985879332477536,
"grad_norm": 4858.51416015625,
"learning_rate": 1.1504424778761062e-05,
"loss": 0.0977,
"step": 3500
},
{
"epoch": 8.985879332477536,
"eval_gen_len": 38.946,
"eval_loss": 0.09172539412975311,
"eval_rouge1": 0.6126,
"eval_rouge2": 0.3339,
"eval_rougeL": 0.5795,
"eval_runtime": 65.0875,
"eval_samples_per_second": 7.682,
"eval_steps_per_second": 0.492,
"step": 3500
}
],
"logging_steps": 10,
"max_steps": 3890,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.517722961707008e+16,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}