BaoLocTown's picture
Model save
9c65b87
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.0,
"eval_steps": 1000,
"global_step": 1922,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 3.0927835051546394e-07,
"loss": 0.5665,
"step": 1
},
{
"epoch": 0.01,
"learning_rate": 3.092783505154639e-06,
"loss": 0.7248,
"step": 10
},
{
"epoch": 0.02,
"learning_rate": 6.185567010309278e-06,
"loss": 0.7059,
"step": 20
},
{
"epoch": 0.03,
"learning_rate": 9.278350515463918e-06,
"loss": 0.6139,
"step": 30
},
{
"epoch": 0.04,
"learning_rate": 1.2371134020618556e-05,
"loss": 0.5087,
"step": 40
},
{
"epoch": 0.05,
"learning_rate": 1.5463917525773194e-05,
"loss": 0.4724,
"step": 50
},
{
"epoch": 0.06,
"learning_rate": 1.8556701030927837e-05,
"loss": 0.4124,
"step": 60
},
{
"epoch": 0.07,
"learning_rate": 2.1649484536082473e-05,
"loss": 0.4254,
"step": 70
},
{
"epoch": 0.08,
"learning_rate": 2.4742268041237112e-05,
"loss": 0.3885,
"step": 80
},
{
"epoch": 0.09,
"learning_rate": 2.7835051546391755e-05,
"loss": 0.3624,
"step": 90
},
{
"epoch": 0.1,
"learning_rate": 2.999979997843754e-05,
"loss": 0.4005,
"step": 100
},
{
"epoch": 0.11,
"learning_rate": 2.999624418794702e-05,
"loss": 0.3947,
"step": 110
},
{
"epoch": 0.12,
"learning_rate": 2.998824468665815e-05,
"loss": 0.383,
"step": 120
},
{
"epoch": 0.14,
"learning_rate": 2.997580384499581e-05,
"loss": 0.3477,
"step": 130
},
{
"epoch": 0.15,
"learning_rate": 2.9958925349449884e-05,
"loss": 0.3627,
"step": 140
},
{
"epoch": 0.16,
"learning_rate": 2.9937614201482864e-05,
"loss": 0.3744,
"step": 150
},
{
"epoch": 0.17,
"learning_rate": 2.9911876716047828e-05,
"loss": 0.3545,
"step": 160
},
{
"epoch": 0.18,
"learning_rate": 2.988172051971717e-05,
"loss": 0.3598,
"step": 170
},
{
"epoch": 0.19,
"learning_rate": 2.9847154548422685e-05,
"loss": 0.336,
"step": 180
},
{
"epoch": 0.2,
"learning_rate": 2.9808189044807664e-05,
"loss": 0.3482,
"step": 190
},
{
"epoch": 0.21,
"learning_rate": 2.976483555519177e-05,
"loss": 0.3675,
"step": 200
},
{
"epoch": 0.22,
"learning_rate": 2.9717106926149626e-05,
"loss": 0.3417,
"step": 210
},
{
"epoch": 0.23,
"learning_rate": 2.9665017300704087e-05,
"loss": 0.3401,
"step": 220
},
{
"epoch": 0.24,
"learning_rate": 2.9608582114135352e-05,
"loss": 0.3315,
"step": 230
},
{
"epoch": 0.25,
"learning_rate": 2.954781808940717e-05,
"loss": 0.366,
"step": 240
},
{
"epoch": 0.26,
"learning_rate": 2.9482743232211473e-05,
"loss": 0.3604,
"step": 250
},
{
"epoch": 0.27,
"learning_rate": 2.9413376825632873e-05,
"loss": 0.3548,
"step": 260
},
{
"epoch": 0.28,
"learning_rate": 2.9339739424434687e-05,
"loss": 0.3615,
"step": 270
},
{
"epoch": 0.29,
"learning_rate": 2.9261852848968123e-05,
"loss": 0.3681,
"step": 280
},
{
"epoch": 0.3,
"learning_rate": 2.9179740178706412e-05,
"loss": 0.368,
"step": 290
},
{
"epoch": 0.31,
"learning_rate": 2.9093425745405897e-05,
"loss": 0.3509,
"step": 300
},
{
"epoch": 0.32,
"learning_rate": 2.9012171361681493e-05,
"loss": 0.3321,
"step": 310
},
{
"epoch": 0.33,
"learning_rate": 2.8917945065723644e-05,
"loss": 0.3653,
"step": 320
},
{
"epoch": 0.34,
"learning_rate": 2.8819594582268972e-05,
"loss": 0.3548,
"step": 330
},
{
"epoch": 0.35,
"learning_rate": 2.8717149054688286e-05,
"loss": 0.3602,
"step": 340
},
{
"epoch": 0.36,
"learning_rate": 2.8610638839802383e-05,
"loss": 0.3333,
"step": 350
},
{
"epoch": 0.37,
"learning_rate": 2.85000954988866e-05,
"loss": 0.3642,
"step": 360
},
{
"epoch": 0.39,
"learning_rate": 2.838555178831857e-05,
"loss": 0.346,
"step": 370
},
{
"epoch": 0.4,
"learning_rate": 2.8267041649871796e-05,
"loss": 0.3542,
"step": 380
},
{
"epoch": 0.41,
"learning_rate": 2.8144600200657953e-05,
"loss": 0.3364,
"step": 390
},
{
"epoch": 0.42,
"learning_rate": 2.8018263722720953e-05,
"loss": 0.3526,
"step": 400
},
{
"epoch": 0.43,
"learning_rate": 2.7888069652285765e-05,
"loss": 0.3457,
"step": 410
},
{
"epoch": 0.44,
"learning_rate": 2.775405656866529e-05,
"loss": 0.328,
"step": 420
},
{
"epoch": 0.45,
"learning_rate": 2.7616264182828423e-05,
"loss": 0.3445,
"step": 430
},
{
"epoch": 0.46,
"learning_rate": 2.747473332563291e-05,
"loss": 0.3503,
"step": 440
},
{
"epoch": 0.47,
"learning_rate": 2.7329505935726215e-05,
"loss": 0.3631,
"step": 450
},
{
"epoch": 0.48,
"learning_rate": 2.718062504711823e-05,
"loss": 0.3456,
"step": 460
},
{
"epoch": 0.49,
"learning_rate": 2.7028134776429372e-05,
"loss": 0.3373,
"step": 470
},
{
"epoch": 0.5,
"learning_rate": 2.6872080309817844e-05,
"loss": 0.3477,
"step": 480
},
{
"epoch": 0.51,
"learning_rate": 2.6712507889590042e-05,
"loss": 0.3603,
"step": 490
},
{
"epoch": 0.52,
"learning_rate": 2.654946480049793e-05,
"loss": 0.3242,
"step": 500
},
{
"epoch": 0.53,
"learning_rate": 2.6382999355727565e-05,
"loss": 0.3277,
"step": 510
},
{
"epoch": 0.54,
"learning_rate": 2.6213160882582855e-05,
"loss": 0.3424,
"step": 520
},
{
"epoch": 0.55,
"learning_rate": 2.6039999707868797e-05,
"loss": 0.3277,
"step": 530
},
{
"epoch": 0.56,
"learning_rate": 2.5863567142978596e-05,
"loss": 0.34,
"step": 540
},
{
"epoch": 0.57,
"learning_rate": 2.5683915468688945e-05,
"loss": 0.3086,
"step": 550
},
{
"epoch": 0.58,
"learning_rate": 2.5501097919668147e-05,
"loss": 0.3462,
"step": 560
},
{
"epoch": 0.59,
"learning_rate": 2.531516866870149e-05,
"loss": 0.3497,
"step": 570
},
{
"epoch": 0.6,
"learning_rate": 2.512618281063873e-05,
"loss": 0.3328,
"step": 580
},
{
"epoch": 0.61,
"learning_rate": 2.4934196346068248e-05,
"loss": 0.339,
"step": 590
},
{
"epoch": 0.62,
"learning_rate": 2.4739266164722916e-05,
"loss": 0.3323,
"step": 600
},
{
"epoch": 0.63,
"learning_rate": 2.4541450028622397e-05,
"loss": 0.344,
"step": 610
},
{
"epoch": 0.65,
"learning_rate": 2.4340806554957033e-05,
"loss": 0.3494,
"step": 620
},
{
"epoch": 0.66,
"learning_rate": 2.413739519871833e-05,
"loss": 0.3224,
"step": 630
},
{
"epoch": 0.67,
"learning_rate": 2.39312762350811e-05,
"loss": 0.3296,
"step": 640
},
{
"epoch": 0.68,
"learning_rate": 2.372251074154269e-05,
"loss": 0.3324,
"step": 650
},
{
"epoch": 0.69,
"learning_rate": 2.351116057982436e-05,
"loss": 0.3217,
"step": 660
},
{
"epoch": 0.7,
"learning_rate": 2.329728837754034e-05,
"loss": 0.344,
"step": 670
},
{
"epoch": 0.71,
"learning_rate": 2.3080957509639908e-05,
"loss": 0.3426,
"step": 680
},
{
"epoch": 0.72,
"learning_rate": 2.286223207962802e-05,
"loss": 0.3166,
"step": 690
},
{
"epoch": 0.73,
"learning_rate": 2.264117690057007e-05,
"loss": 0.3145,
"step": 700
},
{
"epoch": 0.74,
"learning_rate": 2.2417857475886383e-05,
"loss": 0.342,
"step": 710
},
{
"epoch": 0.75,
"learning_rate": 2.219233997994211e-05,
"loss": 0.3259,
"step": 720
},
{
"epoch": 0.76,
"learning_rate": 2.1964691238438374e-05,
"loss": 0.3195,
"step": 730
},
{
"epoch": 0.77,
"learning_rate": 2.173497870861034e-05,
"loss": 0.3442,
"step": 740
},
{
"epoch": 0.78,
"learning_rate": 2.1503270459238204e-05,
"loss": 0.3387,
"step": 750
},
{
"epoch": 0.79,
"learning_rate": 2.1269635150476917e-05,
"loss": 0.337,
"step": 760
},
{
"epoch": 0.8,
"learning_rate": 2.1034142013510735e-05,
"loss": 0.32,
"step": 770
},
{
"epoch": 0.81,
"learning_rate": 2.0796860830038506e-05,
"loss": 0.3168,
"step": 780
},
{
"epoch": 0.82,
"learning_rate": 2.055786191159583e-05,
"loss": 0.3229,
"step": 790
},
{
"epoch": 0.83,
"learning_rate": 2.0317216078720264e-05,
"loss": 0.3341,
"step": 800
},
{
"epoch": 0.84,
"learning_rate": 2.0074994639965654e-05,
"loss": 0.3204,
"step": 810
},
{
"epoch": 0.85,
"learning_rate": 1.9831269370771864e-05,
"loss": 0.3518,
"step": 820
},
{
"epoch": 0.86,
"learning_rate": 1.9586112492196187e-05,
"loss": 0.317,
"step": 830
},
{
"epoch": 0.87,
"learning_rate": 1.9339596649512653e-05,
"loss": 0.3297,
"step": 840
},
{
"epoch": 0.88,
"learning_rate": 1.9091794890685704e-05,
"loss": 0.3332,
"step": 850
},
{
"epoch": 0.89,
"learning_rate": 1.884278064472448e-05,
"loss": 0.3375,
"step": 860
},
{
"epoch": 0.91,
"learning_rate": 1.8592627699924218e-05,
"loss": 0.3372,
"step": 870
},
{
"epoch": 0.92,
"learning_rate": 1.8341410182001225e-05,
"loss": 0.3291,
"step": 880
},
{
"epoch": 0.93,
"learning_rate": 1.8089202532127756e-05,
"loss": 0.33,
"step": 890
},
{
"epoch": 0.94,
"learning_rate": 1.783607948487357e-05,
"loss": 0.3214,
"step": 900
},
{
"epoch": 0.95,
"learning_rate": 1.758211604606041e-05,
"loss": 0.3423,
"step": 910
},
{
"epoch": 0.96,
"learning_rate": 1.7327387470536174e-05,
"loss": 0.3171,
"step": 920
},
{
"epoch": 0.97,
"learning_rate": 1.7071969239875282e-05,
"loss": 0.3308,
"step": 930
},
{
"epoch": 0.98,
"learning_rate": 1.6815937040011855e-05,
"loss": 0.3234,
"step": 940
},
{
"epoch": 0.99,
"learning_rate": 1.6559366738812345e-05,
"loss": 0.3103,
"step": 950
},
{
"epoch": 1.0,
"learning_rate": 1.6302334363594227e-05,
"loss": 0.3305,
"step": 960
},
{
"epoch": 1.01,
"learning_rate": 1.6044916078597505e-05,
"loss": 0.289,
"step": 970
},
{
"epoch": 1.02,
"learning_rate": 1.578718816241556e-05,
"loss": 0.3036,
"step": 980
},
{
"epoch": 1.03,
"learning_rate": 1.5529226985392172e-05,
"loss": 0.2895,
"step": 990
},
{
"epoch": 1.04,
"learning_rate": 1.527110898699136e-05,
"loss": 0.2954,
"step": 1000
},
{
"epoch": 1.04,
"eval_loss": 0.450633704662323,
"eval_runtime": 13.7441,
"eval_samples_per_second": 3.201,
"eval_steps_per_second": 0.8,
"step": 1000
},
{
"epoch": 1.05,
"learning_rate": 1.5012910653146696e-05,
"loss": 0.2868,
"step": 1010
},
{
"epoch": 1.06,
"learning_rate": 1.475470849359687e-05,
"loss": 0.3139,
"step": 1020
},
{
"epoch": 1.07,
"learning_rate": 1.449657901921421e-05,
"loss": 0.2902,
"step": 1030
},
{
"epoch": 1.08,
"learning_rate": 1.4238598719332875e-05,
"loss": 0.3213,
"step": 1040
},
{
"epoch": 1.09,
"learning_rate": 1.398084403908339e-05,
"loss": 0.2874,
"step": 1050
},
{
"epoch": 1.1,
"learning_rate": 1.372339135674031e-05,
"loss": 0.3001,
"step": 1060
},
{
"epoch": 1.11,
"learning_rate": 1.3466316961089692e-05,
"loss": 0.3068,
"step": 1070
},
{
"epoch": 1.12,
"learning_rate": 1.3209697028823101e-05,
"loss": 0.281,
"step": 1080
},
{
"epoch": 1.13,
"learning_rate": 1.295360760196477e-05,
"loss": 0.2817,
"step": 1090
},
{
"epoch": 1.14,
"learning_rate": 1.2698124565338744e-05,
"loss": 0.2874,
"step": 1100
},
{
"epoch": 1.16,
"learning_rate": 1.244332362408255e-05,
"loss": 0.2759,
"step": 1110
},
{
"epoch": 1.17,
"learning_rate": 1.2189280281214128e-05,
"loss": 0.289,
"step": 1120
},
{
"epoch": 1.18,
"learning_rate": 1.193606981525869e-05,
"loss": 0.3084,
"step": 1130
},
{
"epoch": 1.19,
"learning_rate": 1.1683767257942017e-05,
"loss": 0.27,
"step": 1140
},
{
"epoch": 1.2,
"learning_rate": 1.143244737195698e-05,
"loss": 0.3034,
"step": 1150
},
{
"epoch": 1.21,
"learning_rate": 1.1182184628809725e-05,
"loss": 0.3149,
"step": 1160
},
{
"epoch": 1.22,
"learning_rate": 1.093305318675215e-05,
"loss": 0.3223,
"step": 1170
},
{
"epoch": 1.23,
"learning_rate": 1.068512686880725e-05,
"loss": 0.2817,
"step": 1180
},
{
"epoch": 1.24,
"learning_rate": 1.0438479140893711e-05,
"loss": 0.2904,
"step": 1190
},
{
"epoch": 1.25,
"learning_rate": 1.0193183090056407e-05,
"loss": 0.3009,
"step": 1200
},
{
"epoch": 1.26,
"learning_rate": 9.949311402809125e-06,
"loss": 0.2932,
"step": 1210
},
{
"epoch": 1.27,
"learning_rate": 9.706936343595973e-06,
"loss": 0.3013,
"step": 1220
},
{
"epoch": 1.28,
"learning_rate": 9.466129733377887e-06,
"loss": 0.2935,
"step": 1230
},
{
"epoch": 1.29,
"learning_rate": 9.226962928350503e-06,
"loss": 0.289,
"step": 1240
},
{
"epoch": 1.3,
"learning_rate": 8.989506798799793e-06,
"loss": 0.2822,
"step": 1250
},
{
"epoch": 1.31,
"learning_rate": 8.753831708101669e-06,
"loss": 0.3064,
"step": 1260
},
{
"epoch": 1.32,
"learning_rate": 8.520007491871777e-06,
"loss": 0.3173,
"step": 1270
},
{
"epoch": 1.33,
"learning_rate": 8.288103437271743e-06,
"loss": 0.2797,
"step": 1280
},
{
"epoch": 1.34,
"learning_rate": 8.058188262477835e-06,
"loss": 0.2604,
"step": 1290
},
{
"epoch": 1.35,
"learning_rate": 7.830330096318326e-06,
"loss": 0.2976,
"step": 1300
},
{
"epoch": 1.36,
"learning_rate": 7.604596458085425e-06,
"loss": 0.2776,
"step": 1310
},
{
"epoch": 1.37,
"learning_rate": 7.381054237527869e-06,
"loss": 0.288,
"step": 1320
},
{
"epoch": 1.38,
"learning_rate": 7.159769675030054e-06,
"loss": 0.2975,
"step": 1330
},
{
"epoch": 1.39,
"learning_rate": 6.940808341983558e-06,
"loss": 0.2845,
"step": 1340
},
{
"epoch": 1.4,
"learning_rate": 6.724235121356932e-06,
"loss": 0.2831,
"step": 1350
},
{
"epoch": 1.42,
"learning_rate": 6.510114188469482e-06,
"loss": 0.2933,
"step": 1360
},
{
"epoch": 1.43,
"learning_rate": 6.298508991974726e-06,
"loss": 0.2913,
"step": 1370
},
{
"epoch": 1.44,
"learning_rate": 6.089482235059209e-06,
"loss": 0.2855,
"step": 1380
},
{
"epoch": 1.45,
"learning_rate": 5.8830958568621676e-06,
"loss": 0.3165,
"step": 1390
},
{
"epoch": 1.46,
"learning_rate": 5.679411014121645e-06,
"loss": 0.2847,
"step": 1400
},
{
"epoch": 1.47,
"learning_rate": 5.478488063052423e-06,
"loss": 0.2904,
"step": 1410
},
{
"epoch": 1.48,
"learning_rate": 5.280386541461171e-06,
"loss": 0.2815,
"step": 1420
},
{
"epoch": 1.49,
"learning_rate": 5.085165151104125e-06,
"loss": 0.3055,
"step": 1430
},
{
"epoch": 1.5,
"learning_rate": 4.892881740292471e-06,
"loss": 0.2837,
"step": 1440
},
{
"epoch": 1.51,
"learning_rate": 4.7035932867506446e-06,
"loss": 0.2791,
"step": 1450
},
{
"epoch": 1.52,
"learning_rate": 4.51735588073261e-06,
"loss": 0.2597,
"step": 1460
},
{
"epoch": 1.53,
"learning_rate": 4.334224708401099e-06,
"loss": 0.2961,
"step": 1470
},
{
"epoch": 1.54,
"learning_rate": 4.154254035474777e-06,
"loss": 0.2974,
"step": 1480
},
{
"epoch": 1.55,
"learning_rate": 3.977497191148111e-06,
"loss": 0.2864,
"step": 1490
},
{
"epoch": 1.56,
"learning_rate": 3.804006552288802e-06,
"loss": 0.2961,
"step": 1500
},
{
"epoch": 1.57,
"learning_rate": 3.633833527917379e-06,
"loss": 0.2924,
"step": 1510
},
{
"epoch": 1.58,
"learning_rate": 3.467028543973591e-06,
"loss": 0.2942,
"step": 1520
},
{
"epoch": 1.59,
"learning_rate": 3.303641028374138e-06,
"loss": 0.2837,
"step": 1530
},
{
"epoch": 1.6,
"learning_rate": 3.1437193963660797e-06,
"loss": 0.3026,
"step": 1540
},
{
"epoch": 1.61,
"learning_rate": 2.987311036180369e-06,
"loss": 0.2928,
"step": 1550
},
{
"epoch": 1.62,
"learning_rate": 2.8344622949896986e-06,
"loss": 0.2862,
"step": 1560
},
{
"epoch": 1.63,
"learning_rate": 2.685218465174828e-06,
"loss": 0.2872,
"step": 1570
},
{
"epoch": 1.64,
"learning_rate": 2.5396237709034816e-06,
"loss": 0.3098,
"step": 1580
},
{
"epoch": 1.65,
"learning_rate": 2.3977213550257583e-06,
"loss": 0.2866,
"step": 1590
},
{
"epoch": 1.66,
"learning_rate": 2.2595532662899915e-06,
"loss": 0.3035,
"step": 1600
},
{
"epoch": 1.68,
"learning_rate": 2.1251604468827823e-06,
"loss": 0.3049,
"step": 1610
},
{
"epoch": 1.69,
"learning_rate": 1.994582720296954e-06,
"loss": 0.2678,
"step": 1620
},
{
"epoch": 1.7,
"learning_rate": 1.8678587795309971e-06,
"loss": 0.2966,
"step": 1630
},
{
"epoch": 1.71,
"learning_rate": 1.7450261756234849e-06,
"loss": 0.2787,
"step": 1640
},
{
"epoch": 1.72,
"learning_rate": 1.6261213065258984e-06,
"loss": 0.2957,
"step": 1650
},
{
"epoch": 1.73,
"learning_rate": 1.511179406317131e-06,
"loss": 0.2747,
"step": 1660
},
{
"epoch": 1.74,
"learning_rate": 1.4002345347628598e-06,
"loss": 0.2764,
"step": 1670
},
{
"epoch": 1.75,
"learning_rate": 1.293319567222918e-06,
"loss": 0.2983,
"step": 1680
},
{
"epoch": 1.76,
"learning_rate": 1.1904661849095993e-06,
"loss": 0.2981,
"step": 1690
},
{
"epoch": 1.77,
"learning_rate": 1.0917048654998407e-06,
"loss": 0.3148,
"step": 1700
},
{
"epoch": 1.78,
"learning_rate": 9.970648741040273e-07,
"loss": 0.3123,
"step": 1710
},
{
"epoch": 1.79,
"learning_rate": 9.065742545941052e-07,
"loss": 0.2972,
"step": 1720
},
{
"epoch": 1.8,
"learning_rate": 8.202598212935897e-07,
"loss": 0.271,
"step": 1730
},
{
"epoch": 1.81,
"learning_rate": 7.381471510318799e-07,
"loss": 0.2761,
"step": 1740
},
{
"epoch": 1.82,
"learning_rate": 6.602605755653124e-07,
"loss": 0.3004,
"step": 1750
},
{
"epoch": 1.83,
"learning_rate": 5.866231743671296e-07,
"loss": 0.2771,
"step": 1760
},
{
"epoch": 1.84,
"learning_rate": 5.172567677885276e-07,
"loss": 0.2985,
"step": 1770
},
{
"epoch": 1.85,
"learning_rate": 4.521819105928293e-07,
"loss": 0.2873,
"step": 1780
},
{
"epoch": 1.86,
"learning_rate": 3.9141788586465224e-07,
"loss": 0.2869,
"step": 1790
},
{
"epoch": 1.87,
"learning_rate": 3.349826992959154e-07,
"loss": 0.2814,
"step": 1800
},
{
"epoch": 1.88,
"learning_rate": 2.828930738503727e-07,
"loss": 0.2875,
"step": 1810
},
{
"epoch": 1.89,
"learning_rate": 2.3516444480822962e-07,
"loss": 0.3106,
"step": 1820
},
{
"epoch": 1.9,
"learning_rate": 1.9181095519233748e-07,
"loss": 0.2933,
"step": 1830
},
{
"epoch": 1.91,
"learning_rate": 1.52845451577317e-07,
"loss": 0.2798,
"step": 1840
},
{
"epoch": 1.93,
"learning_rate": 1.1827948028283353e-07,
"loss": 0.3051,
"step": 1850
},
{
"epoch": 1.94,
"learning_rate": 8.812328395217395e-08,
"loss": 0.2644,
"step": 1860
},
{
"epoch": 1.95,
"learning_rate": 6.238579851713611e-08,
"loss": 0.3041,
"step": 1870
},
{
"epoch": 1.96,
"learning_rate": 4.1074650550116946e-08,
"loss": 0.2848,
"step": 1880
},
{
"epoch": 1.97,
"learning_rate": 2.4196155004190256e-08,
"loss": 0.2695,
"step": 1890
},
{
"epoch": 1.98,
"learning_rate": 1.175531334185187e-08,
"loss": 0.2958,
"step": 1900
},
{
"epoch": 1.99,
"learning_rate": 3.755812052983476e-09,
"loss": 0.2994,
"step": 1910
},
{
"epoch": 2.0,
"learning_rate": 2.0002156246146096e-10,
"loss": 0.2929,
"step": 1920
},
{
"epoch": 2.0,
"step": 1922,
"total_flos": 3.4599459174219776e+17,
"train_loss": 0.3247208358966101,
"train_runtime": 7447.4364,
"train_samples_per_second": 1.032,
"train_steps_per_second": 0.258
}
],
"logging_steps": 10,
"max_steps": 1922,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 1000,
"total_flos": 3.4599459174219776e+17,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}