llama3_depression_1 / checkpoint-1000 /trainer_state.json

Upload 22 files

5758f6d verified 5 months ago

No virus

174 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 2.8149190710767065,
	"eval_steps": 500,
	"global_step": 1000,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.0028149190710767065,
	"grad_norm": 0.3785315454006195,
	"learning_rate": 4e-05,
	"loss": 2.7534,
	"step": 1
	},
	{
	"epoch": 0.005629838142153413,
	"grad_norm": 0.3462165296077728,
	"learning_rate": 8e-05,
	"loss": 2.7611,
	"step": 2
	},
	{
	"epoch": 0.00844475721323012,
	"grad_norm": 0.41192108392715454,
	"learning_rate": 0.00012,
	"loss": 2.9121,
	"step": 3
	},
	{
	"epoch": 0.011259676284306826,
	"grad_norm": 0.4809342324733734,
	"learning_rate": 0.00016,
	"loss": 2.9178,
	"step": 4
	},
	{
	"epoch": 0.014074595355383532,
	"grad_norm": 0.4055400490760803,
	"learning_rate": 0.0002,
	"loss": 2.9431,
	"step": 5
	},
	{
	"epoch": 0.01688951442646024,
	"grad_norm": 0.4463144838809967,
	"learning_rate": 0.00019979899497487438,
	"loss": 2.7718,
	"step": 6
	},
	{
	"epoch": 0.019704433497536946,
	"grad_norm": 0.49435216188430786,
	"learning_rate": 0.00019959798994974876,
	"loss": 2.7197,
	"step": 7
	},
	{
	"epoch": 0.022519352568613652,
	"grad_norm": 0.5844013690948486,
	"learning_rate": 0.00019939698492462313,
	"loss": 2.7076,
	"step": 8
	},
	{
	"epoch": 0.025334271639690358,
	"grad_norm": 0.6181543469429016,
	"learning_rate": 0.0001991959798994975,
	"loss": 2.8055,
	"step": 9
	},
	{
	"epoch": 0.028149190710767064,
	"grad_norm": 0.7984749674797058,
	"learning_rate": 0.00019899497487437187,
	"loss": 2.7629,
	"step": 10
	},
	{
	"epoch": 0.03096410978184377,
	"grad_norm": 0.6961840391159058,
	"learning_rate": 0.00019879396984924622,
	"loss": 2.6917,
	"step": 11
	},
	{
	"epoch": 0.03377902885292048,
	"grad_norm": 0.837709367275238,
	"learning_rate": 0.00019859296482412062,
	"loss": 2.6108,
	"step": 12
	},
	{
	"epoch": 0.036593947923997186,
	"grad_norm": 0.7435119152069092,
	"learning_rate": 0.000198391959798995,
	"loss": 2.7828,
	"step": 13
	},
	{
	"epoch": 0.03940886699507389,
	"grad_norm": 0.8047707080841064,
	"learning_rate": 0.00019819095477386937,
	"loss": 2.7011,
	"step": 14
	},
	{
	"epoch": 0.0422237860661506,
	"grad_norm": 0.9793757796287537,
	"learning_rate": 0.0001979899497487437,
	"loss": 2.865,
	"step": 15
	},
	{
	"epoch": 0.045038705137227304,
	"grad_norm": 0.7536874413490295,
	"learning_rate": 0.0001977889447236181,
	"loss": 2.7922,
	"step": 16
	},
	{
	"epoch": 0.04785362420830401,
	"grad_norm": 0.6820270419120789,
	"learning_rate": 0.00019758793969849249,
	"loss": 3.1034,
	"step": 17
	},
	{
	"epoch": 0.050668543279380716,
	"grad_norm": 0.7651283144950867,
	"learning_rate": 0.00019738693467336683,
	"loss": 2.8736,
	"step": 18
	},
	{
	"epoch": 0.05348346235045742,
	"grad_norm": 0.5590704083442688,
	"learning_rate": 0.0001971859296482412,
	"loss": 2.6802,
	"step": 19
	},
	{
	"epoch": 0.05629838142153413,
	"grad_norm": 0.5996040105819702,
	"learning_rate": 0.0001969849246231156,
	"loss": 2.6352,
	"step": 20
	},
	{
	"epoch": 0.059113300492610835,
	"grad_norm": 0.6097638607025146,
	"learning_rate": 0.00019678391959798995,
	"loss": 2.5763,
	"step": 21
	},
	{
	"epoch": 0.06192821956368754,
	"grad_norm": 0.5201358795166016,
	"learning_rate": 0.00019658291457286432,
	"loss": 2.6492,
	"step": 22
	},
	{
	"epoch": 0.06474313863476425,
	"grad_norm": 0.8090603351593018,
	"learning_rate": 0.0001963819095477387,
	"loss": 2.5719,
	"step": 23
	},
	{
	"epoch": 0.06755805770584096,
	"grad_norm": 0.6470005512237549,
	"learning_rate": 0.0001961809045226131,
	"loss": 2.9084,
	"step": 24
	},
	{
	"epoch": 0.07037297677691766,
	"grad_norm": 0.6126617193222046,
	"learning_rate": 0.00019597989949748744,
	"loss": 2.7863,
	"step": 25
	},
	{
	"epoch": 0.07318789584799437,
	"grad_norm": 0.5378536581993103,
	"learning_rate": 0.00019577889447236181,
	"loss": 2.6437,
	"step": 26
	},
	{
	"epoch": 0.07600281491907107,
	"grad_norm": 0.6851357817649841,
	"learning_rate": 0.0001955778894472362,
	"loss": 2.6539,
	"step": 27
	},
	{
	"epoch": 0.07881773399014778,
	"grad_norm": 0.6153799295425415,
	"learning_rate": 0.00019537688442211056,
	"loss": 2.67,
	"step": 28
	},
	{
	"epoch": 0.08163265306122448,
	"grad_norm": 0.5324752926826477,
	"learning_rate": 0.00019517587939698493,
	"loss": 2.6674,
	"step": 29
	},
	{
	"epoch": 0.0844475721323012,
	"grad_norm": 0.5797032713890076,
	"learning_rate": 0.0001949748743718593,
	"loss": 2.5109,
	"step": 30
	},
	{
	"epoch": 0.08726249120337791,
	"grad_norm": 0.6937679052352905,
	"learning_rate": 0.00019477386934673368,
	"loss": 2.7514,
	"step": 31
	},
	{
	"epoch": 0.09007741027445461,
	"grad_norm": 0.6234177350997925,
	"learning_rate": 0.00019457286432160805,
	"loss": 2.6147,
	"step": 32
	},
	{
	"epoch": 0.09289232934553132,
	"grad_norm": 0.6435564756393433,
	"learning_rate": 0.00019437185929648243,
	"loss": 2.6747,
	"step": 33
	},
	{
	"epoch": 0.09570724841660802,
	"grad_norm": 0.6985692381858826,
	"learning_rate": 0.0001941708542713568,
	"loss": 2.7223,
	"step": 34
	},
	{
	"epoch": 0.09852216748768473,
	"grad_norm": 0.6003565788269043,
	"learning_rate": 0.00019396984924623117,
	"loss": 2.6478,
	"step": 35
	},
	{
	"epoch": 0.10133708655876143,
	"grad_norm": 0.7325728535652161,
	"learning_rate": 0.00019376884422110552,
	"loss": 2.855,
	"step": 36
	},
	{
	"epoch": 0.10415200562983815,
	"grad_norm": 0.6490616798400879,
	"learning_rate": 0.00019356783919597992,
	"loss": 2.8664,
	"step": 37
	},
	{
	"epoch": 0.10696692470091484,
	"grad_norm": 0.6137815713882446,
	"learning_rate": 0.0001933668341708543,
	"loss": 2.5355,
	"step": 38
	},
	{
	"epoch": 0.10978184377199156,
	"grad_norm": 0.6218917369842529,
	"learning_rate": 0.00019316582914572864,
	"loss": 2.7318,
	"step": 39
	},
	{
	"epoch": 0.11259676284306826,
	"grad_norm": 0.6341124773025513,
	"learning_rate": 0.000192964824120603,
	"loss": 2.5333,
	"step": 40
	},
	{
	"epoch": 0.11541168191414497,
	"grad_norm": 0.5556070804595947,
	"learning_rate": 0.0001927638190954774,
	"loss": 2.9166,
	"step": 41
	},
	{
	"epoch": 0.11822660098522167,
	"grad_norm": 0.5476509928703308,
	"learning_rate": 0.00019256281407035178,
	"loss": 2.8262,
	"step": 42
	},
	{
	"epoch": 0.12104152005629838,
	"grad_norm": 0.8177505731582642,
	"learning_rate": 0.00019236180904522613,
	"loss": 2.9477,
	"step": 43
	},
	{
	"epoch": 0.12385643912737508,
	"grad_norm": 0.6593706011772156,
	"learning_rate": 0.0001921608040201005,
	"loss": 2.8449,
	"step": 44
	},
	{
	"epoch": 0.1266713581984518,
	"grad_norm": 0.59237140417099,
	"learning_rate": 0.0001919597989949749,
	"loss": 2.6079,
	"step": 45
	},
	{
	"epoch": 0.1294862772695285,
	"grad_norm": 0.5167338252067566,
	"learning_rate": 0.00019175879396984925,
	"loss": 2.6834,
	"step": 46
	},
	{
	"epoch": 0.13230119634060522,
	"grad_norm": 0.5484845042228699,
	"learning_rate": 0.00019155778894472362,
	"loss": 2.5793,
	"step": 47
	},
	{
	"epoch": 0.13511611541168192,
	"grad_norm": 0.5930073261260986,
	"learning_rate": 0.000191356783919598,
	"loss": 2.757,
	"step": 48
	},
	{
	"epoch": 0.13793103448275862,
	"grad_norm": 0.6741965413093567,
	"learning_rate": 0.0001911557788944724,
	"loss": 2.7182,
	"step": 49
	},
	{
	"epoch": 0.14074595355383532,
	"grad_norm": 0.558120608329773,
	"learning_rate": 0.00019095477386934674,
	"loss": 2.6401,
	"step": 50
	},
	{
	"epoch": 0.14356087262491204,
	"grad_norm": 0.6161705255508423,
	"learning_rate": 0.0001907537688442211,
	"loss": 2.6181,
	"step": 51
	},
	{
	"epoch": 0.14637579169598874,
	"grad_norm": 0.6661592721939087,
	"learning_rate": 0.00019055276381909548,
	"loss": 2.7207,
	"step": 52
	},
	{
	"epoch": 0.14919071076706544,
	"grad_norm": 0.5285555720329285,
	"learning_rate": 0.00019035175879396986,
	"loss": 2.5631,
	"step": 53
	},
	{
	"epoch": 0.15200562983814214,
	"grad_norm": 0.6050645709037781,
	"learning_rate": 0.00019015075376884423,
	"loss": 2.4716,
	"step": 54
	},
	{
	"epoch": 0.15482054890921887,
	"grad_norm": 0.6041057109832764,
	"learning_rate": 0.0001899497487437186,
	"loss": 2.7241,
	"step": 55
	},
	{
	"epoch": 0.15763546798029557,
	"grad_norm": 0.6147128343582153,
	"learning_rate": 0.00018974874371859298,
	"loss": 2.6362,
	"step": 56
	},
	{
	"epoch": 0.16045038705137227,
	"grad_norm": 0.5417614579200745,
	"learning_rate": 0.00018954773869346732,
	"loss": 2.8325,
	"step": 57
	},
	{
	"epoch": 0.16326530612244897,
	"grad_norm": 0.5944551229476929,
	"learning_rate": 0.00018934673366834172,
	"loss": 2.6793,
	"step": 58
	},
	{
	"epoch": 0.1660802251935257,
	"grad_norm": 0.6394937634468079,
	"learning_rate": 0.0001891457286432161,
	"loss": 2.721,
	"step": 59
	},
	{
	"epoch": 0.1688951442646024,
	"grad_norm": 0.5581662058830261,
	"learning_rate": 0.00018894472361809047,
	"loss": 2.6786,
	"step": 60
	},
	{
	"epoch": 0.1717100633356791,
	"grad_norm": 0.5921449065208435,
	"learning_rate": 0.00018874371859296481,
	"loss": 2.5239,
	"step": 61
	},
	{
	"epoch": 0.17452498240675582,
	"grad_norm": 0.5900184512138367,
	"learning_rate": 0.00018854271356783921,
	"loss": 2.6708,
	"step": 62
	},
	{
	"epoch": 0.17733990147783252,
	"grad_norm": 0.6194185614585876,
	"learning_rate": 0.0001883417085427136,
	"loss": 2.9428,
	"step": 63
	},
	{
	"epoch": 0.18015482054890922,
	"grad_norm": 0.629349410533905,
	"learning_rate": 0.00018814070351758793,
	"loss": 2.6705,
	"step": 64
	},
	{
	"epoch": 0.18296973961998592,
	"grad_norm": 0.5497152805328369,
	"learning_rate": 0.0001879396984924623,
	"loss": 2.8205,
	"step": 65
	},
	{
	"epoch": 0.18578465869106264,
	"grad_norm": 0.5276259779930115,
	"learning_rate": 0.0001877386934673367,
	"loss": 2.5922,
	"step": 66
	},
	{
	"epoch": 0.18859957776213934,
	"grad_norm": 0.7193230390548706,
	"learning_rate": 0.00018753768844221108,
	"loss": 2.867,
	"step": 67
	},
	{
	"epoch": 0.19141449683321604,
	"grad_norm": 0.6483210325241089,
	"learning_rate": 0.00018733668341708543,
	"loss": 2.9455,
	"step": 68
	},
	{
	"epoch": 0.19422941590429274,
	"grad_norm": 0.7181980013847351,
	"learning_rate": 0.0001871356783919598,
	"loss": 2.7443,
	"step": 69
	},
	{
	"epoch": 0.19704433497536947,
	"grad_norm": 0.6001389026641846,
	"learning_rate": 0.0001869346733668342,
	"loss": 2.6464,
	"step": 70
	},
	{
	"epoch": 0.19985925404644617,
	"grad_norm": 0.7344582080841064,
	"learning_rate": 0.00018673366834170854,
	"loss": 2.6694,
	"step": 71
	},
	{
	"epoch": 0.20267417311752287,
	"grad_norm": 0.6493490934371948,
	"learning_rate": 0.00018653266331658292,
	"loss": 2.6506,
	"step": 72
	},
	{
	"epoch": 0.20548909218859956,
	"grad_norm": 0.5350422859191895,
	"learning_rate": 0.0001863316582914573,
	"loss": 2.6709,
	"step": 73
	},
	{
	"epoch": 0.2083040112596763,
	"grad_norm": 0.5754289031028748,
	"learning_rate": 0.0001861306532663317,
	"loss": 2.3618,
	"step": 74
	},
	{
	"epoch": 0.211118930330753,
	"grad_norm": 0.6207188367843628,
	"learning_rate": 0.00018592964824120604,
	"loss": 2.7498,
	"step": 75
	},
	{
	"epoch": 0.2139338494018297,
	"grad_norm": 0.5524656176567078,
	"learning_rate": 0.0001857286432160804,
	"loss": 2.4996,
	"step": 76
	},
	{
	"epoch": 0.21674876847290642,
	"grad_norm": 0.7466227412223816,
	"learning_rate": 0.00018552763819095478,
	"loss": 2.944,
	"step": 77
	},
	{
	"epoch": 0.21956368754398312,
	"grad_norm": 0.6438124179840088,
	"learning_rate": 0.00018532663316582915,
	"loss": 2.7136,
	"step": 78
	},
	{
	"epoch": 0.22237860661505982,
	"grad_norm": 0.6562415957450867,
	"learning_rate": 0.00018512562814070353,
	"loss": 2.797,
	"step": 79
	},
	{
	"epoch": 0.22519352568613651,
	"grad_norm": 0.6599562168121338,
	"learning_rate": 0.0001849246231155779,
	"loss": 2.511,
	"step": 80
	},
	{
	"epoch": 0.22800844475721324,
	"grad_norm": 0.6012830138206482,
	"learning_rate": 0.00018472361809045227,
	"loss": 2.5117,
	"step": 81
	},
	{
	"epoch": 0.23082336382828994,
	"grad_norm": 0.5618470907211304,
	"learning_rate": 0.00018452261306532662,
	"loss": 2.7258,
	"step": 82
	},
	{
	"epoch": 0.23363828289936664,
	"grad_norm": 0.7711282968521118,
	"learning_rate": 0.00018432160804020102,
	"loss": 2.8518,
	"step": 83
	},
	{
	"epoch": 0.23645320197044334,
	"grad_norm": 0.5676078200340271,
	"learning_rate": 0.0001841206030150754,
	"loss": 2.7253,
	"step": 84
	},
	{
	"epoch": 0.23926812104152007,
	"grad_norm": 0.8567176461219788,
	"learning_rate": 0.00018391959798994977,
	"loss": 2.767,
	"step": 85
	},
	{
	"epoch": 0.24208304011259676,
	"grad_norm": 0.5816414952278137,
	"learning_rate": 0.0001837185929648241,
	"loss": 2.5211,
	"step": 86
	},
	{
	"epoch": 0.24489795918367346,
	"grad_norm": 0.5357186198234558,
	"learning_rate": 0.0001835175879396985,
	"loss": 2.5882,
	"step": 87
	},
	{
	"epoch": 0.24771287825475016,
	"grad_norm": 0.5406627655029297,
	"learning_rate": 0.00018331658291457288,
	"loss": 2.3529,
	"step": 88
	},
	{
	"epoch": 0.25052779732582686,
	"grad_norm": 0.9183681607246399,
	"learning_rate": 0.00018311557788944723,
	"loss": 2.9405,
	"step": 89
	},
	{
	"epoch": 0.2533427163969036,
	"grad_norm": 0.5938777327537537,
	"learning_rate": 0.0001829145728643216,
	"loss": 2.8762,
	"step": 90
	},
	{
	"epoch": 0.2561576354679803,
	"grad_norm": 0.559532880783081,
	"learning_rate": 0.000182713567839196,
	"loss": 2.4396,
	"step": 91
	},
	{
	"epoch": 0.258972554539057,
	"grad_norm": 0.8062023520469666,
	"learning_rate": 0.00018251256281407038,
	"loss": 2.8835,
	"step": 92
	},
	{
	"epoch": 0.2617874736101337,
	"grad_norm": 0.5407679080963135,
	"learning_rate": 0.00018231155778894472,
	"loss": 2.5692,
	"step": 93
	},
	{
	"epoch": 0.26460239268121044,
	"grad_norm": 0.5537972450256348,
	"learning_rate": 0.0001821105527638191,
	"loss": 2.4051,
	"step": 94
	},
	{
	"epoch": 0.2674173117522871,
	"grad_norm": 0.6128715872764587,
	"learning_rate": 0.0001819095477386935,
	"loss": 2.6613,
	"step": 95
	},
	{
	"epoch": 0.27023223082336384,
	"grad_norm": 0.9666823148727417,
	"learning_rate": 0.00018170854271356784,
	"loss": 2.6943,
	"step": 96
	},
	{
	"epoch": 0.2730471498944405,
	"grad_norm": 0.587451696395874,
	"learning_rate": 0.00018150753768844221,
	"loss": 2.5654,
	"step": 97
	},
	{
	"epoch": 0.27586206896551724,
	"grad_norm": 0.6436663269996643,
	"learning_rate": 0.0001813065326633166,
	"loss": 2.5957,
	"step": 98
	},
	{
	"epoch": 0.27867698803659396,
	"grad_norm": 0.5880750417709351,
	"learning_rate": 0.00018110552763819096,
	"loss": 2.8445,
	"step": 99
	},
	{
	"epoch": 0.28149190710767064,
	"grad_norm": 0.5972994565963745,
	"learning_rate": 0.00018090452261306533,
	"loss": 2.6186,
	"step": 100
	},
	{
	"epoch": 0.28430682617874736,
	"grad_norm": 0.5434820652008057,
	"learning_rate": 0.0001807035175879397,
	"loss": 2.8376,
	"step": 101
	},
	{
	"epoch": 0.2871217452498241,
	"grad_norm": 0.5735207200050354,
	"learning_rate": 0.00018050251256281408,
	"loss": 2.4118,
	"step": 102
	},
	{
	"epoch": 0.28993666432090076,
	"grad_norm": 0.5313388705253601,
	"learning_rate": 0.00018030150753768845,
	"loss": 2.468,
	"step": 103
	},
	{
	"epoch": 0.2927515833919775,
	"grad_norm": 0.6161223649978638,
	"learning_rate": 0.00018010050251256282,
	"loss": 2.663,
	"step": 104
	},
	{
	"epoch": 0.2955665024630542,
	"grad_norm": 0.5644655227661133,
	"learning_rate": 0.0001798994974874372,
	"loss": 2.3672,
	"step": 105
	},
	{
	"epoch": 0.2983814215341309,
	"grad_norm": 0.6080154776573181,
	"learning_rate": 0.00017969849246231157,
	"loss": 2.6672,
	"step": 106
	},
	{
	"epoch": 0.3011963406052076,
	"grad_norm": 0.5323423147201538,
	"learning_rate": 0.00017949748743718592,
	"loss": 2.8084,
	"step": 107
	},
	{
	"epoch": 0.3040112596762843,
	"grad_norm": 0.5441535711288452,
	"learning_rate": 0.00017929648241206032,
	"loss": 2.5269,
	"step": 108
	},
	{
	"epoch": 0.306826178747361,
	"grad_norm": 0.5068178772926331,
	"learning_rate": 0.0001790954773869347,
	"loss": 2.5472,
	"step": 109
	},
	{
	"epoch": 0.30964109781843774,
	"grad_norm": 0.6056650876998901,
	"learning_rate": 0.00017889447236180906,
	"loss": 2.6083,
	"step": 110
	},
	{
	"epoch": 0.3124560168895144,
	"grad_norm": 0.5633851885795593,
	"learning_rate": 0.0001786934673366834,
	"loss": 2.5353,
	"step": 111
	},
	{
	"epoch": 0.31527093596059114,
	"grad_norm": 0.6467467546463013,
	"learning_rate": 0.0001784924623115578,
	"loss": 2.7402,
	"step": 112
	},
	{
	"epoch": 0.31808585503166786,
	"grad_norm": 0.590074360370636,
	"learning_rate": 0.00017829145728643218,
	"loss": 2.7417,
	"step": 113
	},
	{
	"epoch": 0.32090077410274453,
	"grad_norm": 0.5952100157737732,
	"learning_rate": 0.00017809045226130653,
	"loss": 2.4225,
	"step": 114
	},
	{
	"epoch": 0.32371569317382126,
	"grad_norm": 0.5567030310630798,
	"learning_rate": 0.0001778894472361809,
	"loss": 2.5335,
	"step": 115
	},
	{
	"epoch": 0.32653061224489793,
	"grad_norm": 0.6068913340568542,
	"learning_rate": 0.0001776884422110553,
	"loss": 2.6689,
	"step": 116
	},
	{
	"epoch": 0.32934553131597466,
	"grad_norm": 0.5481736660003662,
	"learning_rate": 0.00017748743718592967,
	"loss": 2.5618,
	"step": 117
	},
	{
	"epoch": 0.3321604503870514,
	"grad_norm": 0.5849531888961792,
	"learning_rate": 0.00017728643216080402,
	"loss": 2.8453,
	"step": 118
	},
	{
	"epoch": 0.33497536945812806,
	"grad_norm": 0.6313461065292358,
	"learning_rate": 0.0001770854271356784,
	"loss": 2.6883,
	"step": 119
	},
	{
	"epoch": 0.3377902885292048,
	"grad_norm": 1.3009490966796875,
	"learning_rate": 0.0001768844221105528,
	"loss": 2.5748,
	"step": 120
	},
	{
	"epoch": 0.3406052076002815,
	"grad_norm": 0.5591140985488892,
	"learning_rate": 0.00017668341708542714,
	"loss": 2.7251,
	"step": 121
	},
	{
	"epoch": 0.3434201266713582,
	"grad_norm": 0.5167106986045837,
	"learning_rate": 0.0001764824120603015,
	"loss": 2.6753,
	"step": 122
	},
	{
	"epoch": 0.3462350457424349,
	"grad_norm": 0.6912369132041931,
	"learning_rate": 0.00017628140703517588,
	"loss": 2.9036,
	"step": 123
	},
	{
	"epoch": 0.34904996481351164,
	"grad_norm": 0.7323533892631531,
	"learning_rate": 0.00017608040201005026,
	"loss": 2.7168,
	"step": 124
	},
	{
	"epoch": 0.3518648838845883,
	"grad_norm": 0.5843552947044373,
	"learning_rate": 0.00017587939698492463,
	"loss": 2.5335,
	"step": 125
	},
	{
	"epoch": 0.35467980295566504,
	"grad_norm": 0.6250912547111511,
	"learning_rate": 0.000175678391959799,
	"loss": 2.7493,
	"step": 126
	},
	{
	"epoch": 0.3574947220267417,
	"grad_norm": 0.5447134375572205,
	"learning_rate": 0.00017547738693467338,
	"loss": 2.5758,
	"step": 127
	},
	{
	"epoch": 0.36030964109781843,
	"grad_norm": 0.8142397403717041,
	"learning_rate": 0.00017527638190954775,
	"loss": 2.8021,
	"step": 128
	},
	{
	"epoch": 0.36312456016889516,
	"grad_norm": NaN,
	"learning_rate": 0.00017527638190954775,
	"loss": 2.6983,
	"step": 129
	},
	{
	"epoch": 0.36593947923997183,
	"grad_norm": 0.5528063774108887,
	"learning_rate": 0.00017507537688442212,
	"loss": 2.695,
	"step": 130
	},
	{
	"epoch": 0.36875439831104856,
	"grad_norm": 0.605383574962616,
	"learning_rate": 0.0001748743718592965,
	"loss": 2.7013,
	"step": 131
	},
	{
	"epoch": 0.3715693173821253,
	"grad_norm": 0.627310037612915,
	"learning_rate": 0.00017467336683417087,
	"loss": 2.7744,
	"step": 132
	},
	{
	"epoch": 0.37438423645320196,
	"grad_norm": 0.6117985844612122,
	"learning_rate": 0.00017447236180904521,
	"loss": 2.9001,
	"step": 133
	},
	{
	"epoch": 0.3771991555242787,
	"grad_norm": 0.5570118427276611,
	"learning_rate": 0.00017427135678391961,
	"loss": 2.6795,
	"step": 134
	},
	{
	"epoch": 0.3800140745953554,
	"grad_norm": 0.6382287740707397,
	"learning_rate": 0.000174070351758794,
	"loss": 2.8177,
	"step": 135
	},
	{
	"epoch": 0.3828289936664321,
	"grad_norm": 0.7003315091133118,
	"learning_rate": 0.00017386934673366836,
	"loss": 2.531,
	"step": 136
	},
	{
	"epoch": 0.3856439127375088,
	"grad_norm": 0.5270616412162781,
	"learning_rate": 0.0001736683417085427,
	"loss": 2.7267,
	"step": 137
	},
	{
	"epoch": 0.3884588318085855,
	"grad_norm": 0.6856080889701843,
	"learning_rate": 0.0001734673366834171,
	"loss": 2.8481,
	"step": 138
	},
	{
	"epoch": 0.3912737508796622,
	"grad_norm": 0.7767484784126282,
	"learning_rate": 0.00017326633165829148,
	"loss": 2.7403,
	"step": 139
	},
	{
	"epoch": 0.39408866995073893,
	"grad_norm": 0.5755979418754578,
	"learning_rate": 0.00017306532663316582,
	"loss": 2.6526,
	"step": 140
	},
	{
	"epoch": 0.3969035890218156,
	"grad_norm": 0.6108975410461426,
	"learning_rate": 0.0001728643216080402,
	"loss": 2.5531,
	"step": 141
	},
	{
	"epoch": 0.39971850809289233,
	"grad_norm": 0.6080026030540466,
	"learning_rate": 0.0001726633165829146,
	"loss": 2.7344,
	"step": 142
	},
	{
	"epoch": 0.40253342716396906,
	"grad_norm": 0.5954862833023071,
	"learning_rate": 0.00017246231155778897,
	"loss": 2.6179,
	"step": 143
	},
	{
	"epoch": 0.40534834623504573,
	"grad_norm": 0.7604647874832153,
	"learning_rate": 0.00017226130653266332,
	"loss": 2.8556,
	"step": 144
	},
	{
	"epoch": 0.40816326530612246,
	"grad_norm": 0.540407657623291,
	"learning_rate": 0.0001720603015075377,
	"loss": 2.671,
	"step": 145
	},
	{
	"epoch": 0.41097818437719913,
	"grad_norm": 0.5598605871200562,
	"learning_rate": 0.00017185929648241206,
	"loss": 2.697,
	"step": 146
	},
	{
	"epoch": 0.41379310344827586,
	"grad_norm": 0.6206982135772705,
	"learning_rate": 0.00017165829145728644,
	"loss": 2.5687,
	"step": 147
	},
	{
	"epoch": 0.4166080225193526,
	"grad_norm": 0.5486766695976257,
	"learning_rate": 0.0001714572864321608,
	"loss": 2.8242,
	"step": 148
	},
	{
	"epoch": 0.41942294159042925,
	"grad_norm": 0.7132663130760193,
	"learning_rate": 0.00017125628140703518,
	"loss": 2.8706,
	"step": 149
	},
	{
	"epoch": 0.422237860661506,
	"grad_norm": 0.6359018087387085,
	"learning_rate": 0.00017105527638190955,
	"loss": 2.773,
	"step": 150
	},
	{
	"epoch": 0.4250527797325827,
	"grad_norm": 0.5943129062652588,
	"learning_rate": 0.00017085427135678393,
	"loss": 2.6535,
	"step": 151
	},
	{
	"epoch": 0.4278676988036594,
	"grad_norm": 0.6567736864089966,
	"learning_rate": 0.0001706532663316583,
	"loss": 2.6059,
	"step": 152
	},
	{
	"epoch": 0.4306826178747361,
	"grad_norm": 0.6345821619033813,
	"learning_rate": 0.00017045226130653267,
	"loss": 2.5776,
	"step": 153
	},
	{
	"epoch": 0.43349753694581283,
	"grad_norm": 0.9386352896690369,
	"learning_rate": 0.00017025125628140705,
	"loss": 2.7449,
	"step": 154
	},
	{
	"epoch": 0.4363124560168895,
	"grad_norm": 0.5455414652824402,
	"learning_rate": 0.00017005025125628142,
	"loss": 2.3967,
	"step": 155
	},
	{
	"epoch": 0.43912737508796623,
	"grad_norm": 0.7040349841117859,
	"learning_rate": 0.0001698492462311558,
	"loss": 2.9214,
	"step": 156
	},
	{
	"epoch": 0.4419422941590429,
	"grad_norm": 0.5507174730300903,
	"learning_rate": 0.00016964824120603016,
	"loss": 2.5087,
	"step": 157
	},
	{
	"epoch": 0.44475721323011963,
	"grad_norm": 0.6239134669303894,
	"learning_rate": 0.0001694472361809045,
	"loss": 3.2032,
	"step": 158
	},
	{
	"epoch": 0.44757213230119636,
	"grad_norm": 0.7403885722160339,
	"learning_rate": 0.0001692462311557789,
	"loss": 2.8829,
	"step": 159
	},
	{
	"epoch": 0.45038705137227303,
	"grad_norm": 0.5260657072067261,
	"learning_rate": 0.00016904522613065328,
	"loss": 2.4572,
	"step": 160
	},
	{
	"epoch": 0.45320197044334976,
	"grad_norm": 0.7505115270614624,
	"learning_rate": 0.00016884422110552766,
	"loss": 2.7264,
	"step": 161
	},
	{
	"epoch": 0.4560168895144265,
	"grad_norm": 0.6088585257530212,
	"learning_rate": 0.000168643216080402,
	"loss": 2.5714,
	"step": 162
	},
	{
	"epoch": 0.45883180858550315,
	"grad_norm": 0.6011828184127808,
	"learning_rate": 0.0001684422110552764,
	"loss": 2.597,
	"step": 163
	},
	{
	"epoch": 0.4616467276565799,
	"grad_norm": 0.5229634046554565,
	"learning_rate": 0.00016824120603015078,
	"loss": 2.723,
	"step": 164
	},
	{
	"epoch": 0.4644616467276566,
	"grad_norm": 0.6184930801391602,
	"learning_rate": 0.00016804020100502512,
	"loss": 2.703,
	"step": 165
	},
	{
	"epoch": 0.4672765657987333,
	"grad_norm": 0.6252800226211548,
	"learning_rate": 0.0001678391959798995,
	"loss": 2.5517,
	"step": 166
	},
	{
	"epoch": 0.47009148486981,
	"grad_norm": 0.5429969429969788,
	"learning_rate": 0.0001676381909547739,
	"loss": 2.7248,
	"step": 167
	},
	{
	"epoch": 0.4729064039408867,
	"grad_norm": 0.6234527230262756,
	"learning_rate": 0.00016743718592964827,
	"loss": 2.5844,
	"step": 168
	},
	{
	"epoch": 0.4757213230119634,
	"grad_norm": 0.6872987747192383,
	"learning_rate": 0.0001672361809045226,
	"loss": 2.5067,
	"step": 169
	},
	{
	"epoch": 0.47853624208304013,
	"grad_norm": 0.5591785907745361,
	"learning_rate": 0.00016703517587939699,
	"loss": 2.4999,
	"step": 170
	},
	{
	"epoch": 0.4813511611541168,
	"grad_norm": 0.5767291188240051,
	"learning_rate": 0.00016683417085427136,
	"loss": 2.8885,
	"step": 171
	},
	{
	"epoch": 0.48416608022519353,
	"grad_norm": 0.6422219276428223,
	"learning_rate": 0.00016663316582914573,
	"loss": 2.6989,
	"step": 172
	},
	{
	"epoch": 0.48698099929627026,
	"grad_norm": 0.6035985350608826,
	"learning_rate": 0.0001664321608040201,
	"loss": 2.6412,
	"step": 173
	},
	{
	"epoch": 0.4897959183673469,
	"grad_norm": 0.5744962096214294,
	"learning_rate": 0.00016623115577889448,
	"loss": 2.6395,
	"step": 174
	},
	{
	"epoch": 0.49261083743842365,
	"grad_norm": 0.6550725698471069,
	"learning_rate": 0.00016603015075376885,
	"loss": 2.7526,
	"step": 175
	},
	{
	"epoch": 0.4954257565095003,
	"grad_norm": 0.7883411049842834,
	"learning_rate": 0.00016582914572864322,
	"loss": 2.5775,
	"step": 176
	},
	{
	"epoch": 0.49824067558057705,
	"grad_norm": 0.6014293432235718,
	"learning_rate": 0.0001656281407035176,
	"loss": 2.6568,
	"step": 177
	},
	{
	"epoch": 0.5010555946516537,
	"grad_norm": 0.5285369157791138,
	"learning_rate": 0.00016542713567839197,
	"loss": 2.4862,
	"step": 178
	},
	{
	"epoch": 0.5038705137227305,
	"grad_norm": 0.6501176953315735,
	"learning_rate": 0.00016522613065326634,
	"loss": 2.631,
	"step": 179
	},
	{
	"epoch": 0.5066854327938072,
	"grad_norm": 0.6903632879257202,
	"learning_rate": 0.00016502512562814072,
	"loss": 2.7363,
	"step": 180
	},
	{
	"epoch": 0.5095003518648838,
	"grad_norm": 0.6202127933502197,
	"learning_rate": 0.0001648241206030151,
	"loss": 2.6678,
	"step": 181
	},
	{
	"epoch": 0.5123152709359606,
	"grad_norm": 0.688332200050354,
	"learning_rate": 0.00016462311557788946,
	"loss": 2.6346,
	"step": 182
	},
	{
	"epoch": 0.5151301900070373,
	"grad_norm": 0.5428361892700195,
	"learning_rate": 0.0001644221105527638,
	"loss": 2.9607,
	"step": 183
	},
	{
	"epoch": 0.517945109078114,
	"grad_norm": 0.5395454168319702,
	"learning_rate": 0.0001642211055276382,
	"loss": 2.8427,
	"step": 184
	},
	{
	"epoch": 0.5207600281491908,
	"grad_norm": 0.554793655872345,
	"learning_rate": 0.00016402010050251258,
	"loss": 2.4523,
	"step": 185
	},
	{
	"epoch": 0.5235749472202674,
	"grad_norm": 0.5698427557945251,
	"learning_rate": 0.00016381909547738695,
	"loss": 2.7052,
	"step": 186
	},
	{
	"epoch": 0.5263898662913441,
	"grad_norm": 0.5099812150001526,
	"learning_rate": 0.0001636180904522613,
	"loss": 2.3651,
	"step": 187
	},
	{
	"epoch": 0.5292047853624209,
	"grad_norm": 0.5726649761199951,
	"learning_rate": 0.0001634170854271357,
	"loss": 2.9888,
	"step": 188
	},
	{
	"epoch": 0.5320197044334976,
	"grad_norm": 0.630757212638855,
	"learning_rate": 0.00016321608040201007,
	"loss": 2.6886,
	"step": 189
	},
	{
	"epoch": 0.5348346235045742,
	"grad_norm": 0.5425901412963867,
	"learning_rate": 0.00016301507537688442,
	"loss": 2.6047,
	"step": 190
	},
	{
	"epoch": 0.5376495425756509,
	"grad_norm": 0.6030427813529968,
	"learning_rate": 0.0001628140703517588,
	"loss": 2.8713,
	"step": 191
	},
	{
	"epoch": 0.5404644616467277,
	"grad_norm": 0.5966265201568604,
	"learning_rate": 0.00016261306532663316,
	"loss": 2.7212,
	"step": 192
	},
	{
	"epoch": 0.5432793807178043,
	"grad_norm": 0.5909569263458252,
	"learning_rate": 0.00016241206030150756,
	"loss": 2.8696,
	"step": 193
	},
	{
	"epoch": 0.546094299788881,
	"grad_norm": 0.5485597252845764,
	"learning_rate": 0.0001622110552763819,
	"loss": 2.6367,
	"step": 194
	},
	{
	"epoch": 0.5489092188599578,
	"grad_norm": 0.6274406909942627,
	"learning_rate": 0.00016201005025125628,
	"loss": 2.6882,
	"step": 195
	},
	{
	"epoch": 0.5517241379310345,
	"grad_norm": 0.5513599514961243,
	"learning_rate": 0.00016180904522613066,
	"loss": 2.6596,
	"step": 196
	},
	{
	"epoch": 0.5545390570021111,
	"grad_norm": 0.7499818801879883,
	"learning_rate": 0.00016160804020100503,
	"loss": 2.6191,
	"step": 197
	},
	{
	"epoch": 0.5573539760731879,
	"grad_norm": 0.5143455862998962,
	"learning_rate": 0.0001614070351758794,
	"loss": 2.6543,
	"step": 198
	},
	{
	"epoch": 0.5601688951442646,
	"grad_norm": 0.6372074484825134,
	"learning_rate": 0.00016120603015075378,
	"loss": 2.4355,
	"step": 199
	},
	{
	"epoch": 0.5629838142153413,
	"grad_norm": 0.7132628560066223,
	"learning_rate": 0.00016100502512562815,
	"loss": 2.671,
	"step": 200
	},
	{
	"epoch": 0.565798733286418,
	"grad_norm": 0.5970779657363892,
	"learning_rate": 0.00016080402010050252,
	"loss": 2.6802,
	"step": 201
	},
	{
	"epoch": 0.5686136523574947,
	"grad_norm": 0.6065824627876282,
	"learning_rate": 0.0001606030150753769,
	"loss": 2.6151,
	"step": 202
	},
	{
	"epoch": 0.5714285714285714,
	"grad_norm": 0.5521674752235413,
	"learning_rate": 0.00016040201005025127,
	"loss": 2.5132,
	"step": 203
	},
	{
	"epoch": 0.5742434904996482,
	"grad_norm": 0.6067800521850586,
	"learning_rate": 0.00016020100502512564,
	"loss": 2.9283,
	"step": 204
	},
	{
	"epoch": 0.5770584095707249,
	"grad_norm": 0.5979752540588379,
	"learning_rate": 0.00016,
	"loss": 2.5355,
	"step": 205
	},
	{
	"epoch": 0.5798733286418015,
	"grad_norm": 0.6044461727142334,
	"learning_rate": 0.00015979899497487439,
	"loss": 2.6752,
	"step": 206
	},
	{
	"epoch": 0.5826882477128783,
	"grad_norm": 0.580636739730835,
	"learning_rate": 0.00015959798994974876,
	"loss": 2.538,
	"step": 207
	},
	{
	"epoch": 0.585503166783955,
	"grad_norm": 0.6181825995445251,
	"learning_rate": 0.0001593969849246231,
	"loss": 2.6529,
	"step": 208
	},
	{
	"epoch": 0.5883180858550316,
	"grad_norm": 0.6641463041305542,
	"learning_rate": 0.0001591959798994975,
	"loss": 2.6252,
	"step": 209
	},
	{
	"epoch": 0.5911330049261084,
	"grad_norm": 0.6233858466148376,
	"learning_rate": 0.00015899497487437188,
	"loss": 2.6832,
	"step": 210
	},
	{
	"epoch": 0.5939479239971851,
	"grad_norm": 0.6696732044219971,
	"learning_rate": 0.00015879396984924625,
	"loss": 3.0325,
	"step": 211
	},
	{
	"epoch": 0.5967628430682618,
	"grad_norm": 0.7569646239280701,
	"learning_rate": 0.0001585929648241206,
	"loss": 2.675,
	"step": 212
	},
	{
	"epoch": 0.5995777621393384,
	"grad_norm": 0.5962279438972473,
	"learning_rate": 0.000158391959798995,
	"loss": 2.6369,
	"step": 213
	},
	{
	"epoch": 0.6023926812104152,
	"grad_norm": 0.6349969506263733,
	"learning_rate": 0.00015819095477386937,
	"loss": 2.5531,
	"step": 214
	},
	{
	"epoch": 0.6052076002814919,
	"grad_norm": 0.8234291076660156,
	"learning_rate": 0.00015798994974874372,
	"loss": 2.5466,
	"step": 215
	},
	{
	"epoch": 0.6080225193525686,
	"grad_norm": 0.6057316660881042,
	"learning_rate": 0.0001577889447236181,
	"loss": 2.3296,
	"step": 216
	},
	{
	"epoch": 0.6108374384236454,
	"grad_norm": 0.6568176746368408,
	"learning_rate": 0.00015758793969849246,
	"loss": 2.8075,
	"step": 217
	},
	{
	"epoch": 0.613652357494722,
	"grad_norm": 0.5945923328399658,
	"learning_rate": 0.00015738693467336686,
	"loss": 2.696,
	"step": 218
	},
	{
	"epoch": 0.6164672765657987,
	"grad_norm": 0.6226676106452942,
	"learning_rate": 0.0001571859296482412,
	"loss": 2.5764,
	"step": 219
	},
	{
	"epoch": 0.6192821956368755,
	"grad_norm": 0.6158185601234436,
	"learning_rate": 0.00015698492462311558,
	"loss": 2.66,
	"step": 220
	},
	{
	"epoch": 0.6220971147079521,
	"grad_norm": 0.7033487558364868,
	"learning_rate": 0.00015678391959798995,
	"loss": 2.7747,
	"step": 221
	},
	{
	"epoch": 0.6249120337790288,
	"grad_norm": 0.5215992331504822,
	"learning_rate": 0.00015658291457286433,
	"loss": 2.4176,
	"step": 222
	},
	{
	"epoch": 0.6277269528501056,
	"grad_norm": 0.8559087514877319,
	"learning_rate": 0.0001563819095477387,
	"loss": 2.8081,
	"step": 223
	},
	{
	"epoch": 0.6305418719211823,
	"grad_norm": 0.5106130242347717,
	"learning_rate": 0.00015618090452261307,
	"loss": 2.6433,
	"step": 224
	},
	{
	"epoch": 0.633356790992259,
	"grad_norm": 0.6176455020904541,
	"learning_rate": 0.00015597989949748745,
	"loss": 2.4351,
	"step": 225
	},
	{
	"epoch": 0.6361717100633357,
	"grad_norm": 0.8193095922470093,
	"learning_rate": 0.00015577889447236182,
	"loss": 2.8882,
	"step": 226
	},
	{
	"epoch": 0.6389866291344124,
	"grad_norm": 0.8569721579551697,
	"learning_rate": 0.0001555778894472362,
	"loss": 2.7263,
	"step": 227
	},
	{
	"epoch": 0.6418015482054891,
	"grad_norm": 0.6688103079795837,
	"learning_rate": 0.00015537688442211056,
	"loss": 2.6202,
	"step": 228
	},
	{
	"epoch": 0.6446164672765659,
	"grad_norm": 0.6070395708084106,
	"learning_rate": 0.00015517587939698494,
	"loss": 2.7117,
	"step": 229
	},
	{
	"epoch": 0.6474313863476425,
	"grad_norm": 0.7812969088554382,
	"learning_rate": 0.0001549748743718593,
	"loss": 2.5815,
	"step": 230
	},
	{
	"epoch": 0.6502463054187192,
	"grad_norm": 0.8165440559387207,
	"learning_rate": 0.00015477386934673368,
	"loss": 2.8503,
	"step": 231
	},
	{
	"epoch": 0.6530612244897959,
	"grad_norm": 0.6454505324363708,
	"learning_rate": 0.00015457286432160806,
	"loss": 2.5918,
	"step": 232
	},
	{
	"epoch": 0.6558761435608726,
	"grad_norm": 0.7109069228172302,
	"learning_rate": 0.0001543718592964824,
	"loss": 2.5554,
	"step": 233
	},
	{
	"epoch": 0.6586910626319493,
	"grad_norm": 0.6079565286636353,
	"learning_rate": 0.0001541708542713568,
	"loss": 2.6232,
	"step": 234
	},
	{
	"epoch": 0.661505981703026,
	"grad_norm": 0.576082170009613,
	"learning_rate": 0.00015396984924623117,
	"loss": 2.8387,
	"step": 235
	},
	{
	"epoch": 0.6643209007741028,
	"grad_norm": 0.5683891177177429,
	"learning_rate": 0.00015376884422110555,
	"loss": 2.7391,
	"step": 236
	},
	{
	"epoch": 0.6671358198451794,
	"grad_norm": 0.6114887595176697,
	"learning_rate": 0.0001535678391959799,
	"loss": 2.6629,
	"step": 237
	},
	{
	"epoch": 0.6699507389162561,
	"grad_norm": 0.6666116118431091,
	"learning_rate": 0.00015336683417085427,
	"loss": 2.9027,
	"step": 238
	},
	{
	"epoch": 0.6727656579873329,
	"grad_norm": 0.5646522641181946,
	"learning_rate": 0.00015316582914572867,
	"loss": 2.7252,
	"step": 239
	},
	{
	"epoch": 0.6755805770584096,
	"grad_norm": 0.6885817646980286,
	"learning_rate": 0.000152964824120603,
	"loss": 2.5966,
	"step": 240
	},
	{
	"epoch": 0.6783954961294862,
	"grad_norm": 0.5778309106826782,
	"learning_rate": 0.00015276381909547739,
	"loss": 2.6275,
	"step": 241
	},
	{
	"epoch": 0.681210415200563,
	"grad_norm": 0.6230787038803101,
	"learning_rate": 0.00015256281407035176,
	"loss": 2.5258,
	"step": 242
	},
	{
	"epoch": 0.6840253342716397,
	"grad_norm": 0.5411630272865295,
	"learning_rate": 0.00015236180904522613,
	"loss": 2.6047,
	"step": 243
	},
	{
	"epoch": 0.6868402533427164,
	"grad_norm": 0.5547896027565002,
	"learning_rate": 0.0001521608040201005,
	"loss": 2.6451,
	"step": 244
	},
	{
	"epoch": 0.6896551724137931,
	"grad_norm": 0.6721991300582886,
	"learning_rate": 0.00015195979899497488,
	"loss": 2.7127,
	"step": 245
	},
	{
	"epoch": 0.6924700914848698,
	"grad_norm": 0.6402304172515869,
	"learning_rate": 0.00015175879396984925,
	"loss": 2.8568,
	"step": 246
	},
	{
	"epoch": 0.6952850105559465,
	"grad_norm": 0.594251275062561,
	"learning_rate": 0.00015155778894472362,
	"loss": 2.7168,
	"step": 247
	},
	{
	"epoch": 0.6980999296270233,
	"grad_norm": 0.8853170871734619,
	"learning_rate": 0.000151356783919598,
	"loss": 2.7259,
	"step": 248
	},
	{
	"epoch": 0.7009148486981,
	"grad_norm": 0.5619581341743469,
	"learning_rate": 0.00015115577889447237,
	"loss": 2.6218,
	"step": 249
	},
	{
	"epoch": 0.7037297677691766,
	"grad_norm": 0.6149075031280518,
	"learning_rate": 0.00015095477386934674,
	"loss": 2.8984,
	"step": 250
	},
	{
	"epoch": 0.7065446868402533,
	"grad_norm": 0.6819274425506592,
	"learning_rate": 0.00015075376884422112,
	"loss": 2.8293,
	"step": 251
	},
	{
	"epoch": 0.7093596059113301,
	"grad_norm": 0.5911348462104797,
	"learning_rate": 0.0001505527638190955,
	"loss": 2.6333,
	"step": 252
	},
	{
	"epoch": 0.7121745249824067,
	"grad_norm": 0.7064481973648071,
	"learning_rate": 0.00015035175879396986,
	"loss": 2.8061,
	"step": 253
	},
	{
	"epoch": 0.7149894440534834,
	"grad_norm": 0.6039316654205322,
	"learning_rate": 0.00015015075376884423,
	"loss": 2.6468,
	"step": 254
	},
	{
	"epoch": 0.7178043631245602,
	"grad_norm": 0.5624644756317139,
	"learning_rate": 0.0001499497487437186,
	"loss": 2.7813,
	"step": 255
	},
	{
	"epoch": 0.7206192821956369,
	"grad_norm": 0.5971612334251404,
	"learning_rate": 0.00014974874371859298,
	"loss": 2.6411,
	"step": 256
	},
	{
	"epoch": 0.7234342012667135,
	"grad_norm": 0.6717031598091125,
	"learning_rate": 0.00014954773869346735,
	"loss": 2.5579,
	"step": 257
	},
	{
	"epoch": 0.7262491203377903,
	"grad_norm": 0.6643320322036743,
	"learning_rate": 0.0001493467336683417,
	"loss": 2.8469,
	"step": 258
	},
	{
	"epoch": 0.729064039408867,
	"grad_norm": 0.5971053838729858,
	"learning_rate": 0.0001491457286432161,
	"loss": 2.613,
	"step": 259
	},
	{
	"epoch": 0.7318789584799437,
	"grad_norm": 0.6267710328102112,
	"learning_rate": 0.00014894472361809047,
	"loss": 2.6969,
	"step": 260
	},
	{
	"epoch": 0.7346938775510204,
	"grad_norm": 0.6237425804138184,
	"learning_rate": 0.00014874371859296482,
	"loss": 2.7047,
	"step": 261
	},
	{
	"epoch": 0.7375087966220971,
	"grad_norm": 0.5603229999542236,
	"learning_rate": 0.0001485427135678392,
	"loss": 2.7534,
	"step": 262
	},
	{
	"epoch": 0.7403237156931738,
	"grad_norm": 0.6484439969062805,
	"learning_rate": 0.00014834170854271356,
	"loss": 2.9389,
	"step": 263
	},
	{
	"epoch": 0.7431386347642506,
	"grad_norm": 0.6225891709327698,
	"learning_rate": 0.00014814070351758796,
	"loss": 2.7516,
	"step": 264
	},
	{
	"epoch": 0.7459535538353272,
	"grad_norm": 0.5303828716278076,
	"learning_rate": 0.0001479396984924623,
	"loss": 2.3335,
	"step": 265
	},
	{
	"epoch": 0.7487684729064039,
	"grad_norm": 0.6280227303504944,
	"learning_rate": 0.00014773869346733668,
	"loss": 2.6259,
	"step": 266
	},
	{
	"epoch": 0.7515833919774807,
	"grad_norm": 0.5551609992980957,
	"learning_rate": 0.00014753768844221106,
	"loss": 2.6572,
	"step": 267
	},
	{
	"epoch": 0.7543983110485574,
	"grad_norm": 0.7833865284919739,
	"learning_rate": 0.00014733668341708543,
	"loss": 2.7358,
	"step": 268
	},
	{
	"epoch": 0.757213230119634,
	"grad_norm": 0.6138265132904053,
	"learning_rate": 0.0001471356783919598,
	"loss": 2.4282,
	"step": 269
	},
	{
	"epoch": 0.7600281491907108,
	"grad_norm": 0.6331743001937866,
	"learning_rate": 0.00014693467336683417,
	"loss": 2.6373,
	"step": 270
	},
	{
	"epoch": 0.7628430682617875,
	"grad_norm": 0.569272518157959,
	"learning_rate": 0.00014673366834170855,
	"loss": 2.6546,
	"step": 271
	},
	{
	"epoch": 0.7656579873328642,
	"grad_norm": 0.6755379438400269,
	"learning_rate": 0.00014653266331658292,
	"loss": 2.6258,
	"step": 272
	},
	{
	"epoch": 0.7684729064039408,
	"grad_norm": 0.6408460140228271,
	"learning_rate": 0.0001463316582914573,
	"loss": 2.6824,
	"step": 273
	},
	{
	"epoch": 0.7712878254750176,
	"grad_norm": 0.6325194239616394,
	"learning_rate": 0.00014613065326633167,
	"loss": 2.7959,
	"step": 274
	},
	{
	"epoch": 0.7741027445460943,
	"grad_norm": 0.6526459455490112,
	"learning_rate": 0.00014592964824120604,
	"loss": 2.613,
	"step": 275
	},
	{
	"epoch": 0.776917663617171,
	"grad_norm": 0.610998272895813,
	"learning_rate": 0.0001457286432160804,
	"loss": 2.4973,
	"step": 276
	},
	{
	"epoch": 0.7797325826882477,
	"grad_norm": 0.510045051574707,
	"learning_rate": 0.00014552763819095479,
	"loss": 2.7343,
	"step": 277
	},
	{
	"epoch": 0.7825475017593244,
	"grad_norm": 0.5863422155380249,
	"learning_rate": 0.00014532663316582916,
	"loss": 2.5543,
	"step": 278
	},
	{
	"epoch": 0.7853624208304011,
	"grad_norm": 0.5406447649002075,
	"learning_rate": 0.00014512562814070353,
	"loss": 2.7748,
	"step": 279
	},
	{
	"epoch": 0.7881773399014779,
	"grad_norm": 0.7465657591819763,
	"learning_rate": 0.0001449246231155779,
	"loss": 2.4034,
	"step": 280
	},
	{
	"epoch": 0.7909922589725545,
	"grad_norm": 0.5192904472351074,
	"learning_rate": 0.00014472361809045228,
	"loss": 2.4881,
	"step": 281
	},
	{
	"epoch": 0.7938071780436312,
	"grad_norm": 0.6085344552993774,
	"learning_rate": 0.00014452261306532665,
	"loss": 2.6534,
	"step": 282
	},
	{
	"epoch": 0.796622097114708,
	"grad_norm": 0.6155668497085571,
	"learning_rate": 0.000144321608040201,
	"loss": 2.6149,
	"step": 283
	},
	{
	"epoch": 0.7994370161857847,
	"grad_norm": 0.623285710811615,
	"learning_rate": 0.00014412060301507537,
	"loss": 2.7725,
	"step": 284
	},
	{
	"epoch": 0.8022519352568613,
	"grad_norm": 0.9461747407913208,
	"learning_rate": 0.00014391959798994977,
	"loss": 2.7837,
	"step": 285
	},
	{
	"epoch": 0.8050668543279381,
	"grad_norm": 0.7152134776115417,
	"learning_rate": 0.00014371859296482411,
	"loss": 2.6678,
	"step": 286
	},
	{
	"epoch": 0.8078817733990148,
	"grad_norm": 0.6519983410835266,
	"learning_rate": 0.0001435175879396985,
	"loss": 2.5234,
	"step": 287
	},
	{
	"epoch": 0.8106966924700915,
	"grad_norm": 0.6523590087890625,
	"learning_rate": 0.00014331658291457286,
	"loss": 2.7284,
	"step": 288
	},
	{
	"epoch": 0.8135116115411682,
	"grad_norm": 0.6067202687263489,
	"learning_rate": 0.00014311557788944726,
	"loss": 2.5459,
	"step": 289
	},
	{
	"epoch": 0.8163265306122449,
	"grad_norm": 0.5836743116378784,
	"learning_rate": 0.0001429145728643216,
	"loss": 2.452,
	"step": 290
	},
	{
	"epoch": 0.8191414496833216,
	"grad_norm": 0.685727596282959,
	"learning_rate": 0.00014271356783919598,
	"loss": 2.6568,
	"step": 291
	},
	{
	"epoch": 0.8219563687543983,
	"grad_norm": 0.6456769704818726,
	"learning_rate": 0.00014251256281407035,
	"loss": 2.6553,
	"step": 292
	},
	{
	"epoch": 0.824771287825475,
	"grad_norm": 0.6357674598693848,
	"learning_rate": 0.00014231155778894473,
	"loss": 2.7589,
	"step": 293
	},
	{
	"epoch": 0.8275862068965517,
	"grad_norm": 0.6339374780654907,
	"learning_rate": 0.0001421105527638191,
	"loss": 2.7797,
	"step": 294
	},
	{
	"epoch": 0.8304011259676284,
	"grad_norm": 0.5491819381713867,
	"learning_rate": 0.00014190954773869347,
	"loss": 2.4772,
	"step": 295
	},
	{
	"epoch": 0.8332160450387052,
	"grad_norm": 0.6312305331230164,
	"learning_rate": 0.00014170854271356784,
	"loss": 2.6715,
	"step": 296
	},
	{
	"epoch": 0.8360309641097818,
	"grad_norm": 0.647985577583313,
	"learning_rate": 0.00014150753768844222,
	"loss": 2.7811,
	"step": 297
	},
	{
	"epoch": 0.8388458831808585,
	"grad_norm": 0.6383928060531616,
	"learning_rate": 0.0001413065326633166,
	"loss": 2.8496,
	"step": 298
	},
	{
	"epoch": 0.8416608022519353,
	"grad_norm": 0.5548710823059082,
	"learning_rate": 0.00014110552763819096,
	"loss": 2.5096,
	"step": 299
	},
	{
	"epoch": 0.844475721323012,
	"grad_norm": 0.5331722497940063,
	"learning_rate": 0.00014090452261306534,
	"loss": 2.5683,
	"step": 300
	},
	{
	"epoch": 0.8472906403940886,
	"grad_norm": 0.5956087112426758,
	"learning_rate": 0.0001407035175879397,
	"loss": 2.6175,
	"step": 301
	},
	{
	"epoch": 0.8501055594651654,
	"grad_norm": 0.6151571273803711,
	"learning_rate": 0.00014050251256281408,
	"loss": 2.5863,
	"step": 302
	},
	{
	"epoch": 0.8529204785362421,
	"grad_norm": 0.5952453017234802,
	"learning_rate": 0.00014030150753768846,
	"loss": 2.5152,
	"step": 303
	},
	{
	"epoch": 0.8557353976073188,
	"grad_norm": 0.6127233505249023,
	"learning_rate": 0.0001401005025125628,
	"loss": 2.5753,
	"step": 304
	},
	{
	"epoch": 0.8585503166783955,
	"grad_norm": 0.551474928855896,
	"learning_rate": 0.0001398994974874372,
	"loss": 2.6523,
	"step": 305
	},
	{
	"epoch": 0.8613652357494722,
	"grad_norm": 0.8345268368721008,
	"learning_rate": 0.00013969849246231157,
	"loss": 2.78,
	"step": 306
	},
	{
	"epoch": 0.8641801548205489,
	"grad_norm": 0.6494585275650024,
	"learning_rate": 0.00013949748743718595,
	"loss": 2.7354,
	"step": 307
	},
	{
	"epoch": 0.8669950738916257,
	"grad_norm": 0.6813188791275024,
	"learning_rate": 0.0001392964824120603,
	"loss": 2.8736,
	"step": 308
	},
	{
	"epoch": 0.8698099929627023,
	"grad_norm": 0.6250954270362854,
	"learning_rate": 0.00013909547738693467,
	"loss": 2.7548,
	"step": 309
	},
	{
	"epoch": 0.872624912033779,
	"grad_norm": 0.6115372180938721,
	"learning_rate": 0.00013889447236180907,
	"loss": 2.7126,
	"step": 310
	},
	{
	"epoch": 0.8754398311048557,
	"grad_norm": 0.6005333662033081,
	"learning_rate": 0.0001386934673366834,
	"loss": 2.9623,
	"step": 311
	},
	{
	"epoch": 0.8782547501759325,
	"grad_norm": 0.5203389525413513,
	"learning_rate": 0.00013849246231155778,
	"loss": 2.7269,
	"step": 312
	},
	{
	"epoch": 0.8810696692470091,
	"grad_norm": 0.5951765775680542,
	"learning_rate": 0.00013829145728643216,
	"loss": 2.6058,
	"step": 313
	},
	{
	"epoch": 0.8838845883180858,
	"grad_norm": 0.6142780184745789,
	"learning_rate": 0.00013809045226130656,
	"loss": 2.5918,
	"step": 314
	},
	{
	"epoch": 0.8866995073891626,
	"grad_norm": 0.5776972770690918,
	"learning_rate": 0.0001378894472361809,
	"loss": 2.5006,
	"step": 315
	},
	{
	"epoch": 0.8895144264602393,
	"grad_norm": 0.6553467512130737,
	"learning_rate": 0.00013768844221105528,
	"loss": 2.5903,
	"step": 316
	},
	{
	"epoch": 0.8923293455313159,
	"grad_norm": 0.5776195526123047,
	"learning_rate": 0.00013748743718592965,
	"loss": 2.5532,
	"step": 317
	},
	{
	"epoch": 0.8951442646023927,
	"grad_norm": 0.5531054139137268,
	"learning_rate": 0.00013728643216080402,
	"loss": 2.624,
	"step": 318
	},
	{
	"epoch": 0.8979591836734694,
	"grad_norm": 0.7402701377868652,
	"learning_rate": 0.0001370854271356784,
	"loss": 2.7686,
	"step": 319
	},
	{
	"epoch": 0.9007741027445461,
	"grad_norm": 0.5394028425216675,
	"learning_rate": 0.00013688442211055277,
	"loss": 2.6044,
	"step": 320
	},
	{
	"epoch": 0.9035890218156228,
	"grad_norm": 0.6454526782035828,
	"learning_rate": 0.00013668341708542714,
	"loss": 2.6938,
	"step": 321
	},
	{
	"epoch": 0.9064039408866995,
	"grad_norm": 0.7545249462127686,
	"learning_rate": 0.00013648241206030151,
	"loss": 2.5263,
	"step": 322
	},
	{
	"epoch": 0.9092188599577762,
	"grad_norm": 0.6479030251502991,
	"learning_rate": 0.0001362814070351759,
	"loss": 2.4625,
	"step": 323
	},
	{
	"epoch": 0.912033779028853,
	"grad_norm": 0.9134926199913025,
	"learning_rate": 0.00013608040201005026,
	"loss": 2.7668,
	"step": 324
	},
	{
	"epoch": 0.9148486980999296,
	"grad_norm": 0.6736027002334595,
	"learning_rate": 0.00013587939698492463,
	"loss": 2.826,
	"step": 325
	},
	{
	"epoch": 0.9176636171710063,
	"grad_norm": 0.6161238551139832,
	"learning_rate": 0.000135678391959799,
	"loss": 2.513,
	"step": 326
	},
	{
	"epoch": 0.9204785362420831,
	"grad_norm": 0.7301089763641357,
	"learning_rate": 0.00013547738693467338,
	"loss": 2.6374,
	"step": 327
	},
	{
	"epoch": 0.9232934553131598,
	"grad_norm": 0.5782633423805237,
	"learning_rate": 0.00013527638190954775,
	"loss": 2.4847,
	"step": 328
	},
	{
	"epoch": 0.9261083743842364,
	"grad_norm": 0.6025380492210388,
	"learning_rate": 0.0001350753768844221,
	"loss": 2.6272,
	"step": 329
	},
	{
	"epoch": 0.9289232934553132,
	"grad_norm": 0.6242662668228149,
	"learning_rate": 0.00013487437185929647,
	"loss": 2.801,
	"step": 330
	},
	{
	"epoch": 0.9317382125263899,
	"grad_norm": 0.7133350372314453,
	"learning_rate": 0.00013467336683417087,
	"loss": 2.5261,
	"step": 331
	},
	{
	"epoch": 0.9345531315974666,
	"grad_norm": 0.5895963311195374,
	"learning_rate": 0.00013447236180904524,
	"loss": 2.7813,
	"step": 332
	},
	{
	"epoch": 0.9373680506685432,
	"grad_norm": 0.7254224419593811,
	"learning_rate": 0.0001342713567839196,
	"loss": 2.6859,
	"step": 333
	},
	{
	"epoch": 0.94018296973962,
	"grad_norm": 0.7255984544754028,
	"learning_rate": 0.00013407035175879396,
	"loss": 2.6328,
	"step": 334
	},
	{
	"epoch": 0.9429978888106967,
	"grad_norm": 0.827979564666748,
	"learning_rate": 0.00013386934673366836,
	"loss": 2.7311,
	"step": 335
	},
	{
	"epoch": 0.9458128078817734,
	"grad_norm": 0.6603137850761414,
	"learning_rate": 0.0001336683417085427,
	"loss": 2.6389,
	"step": 336
	},
	{
	"epoch": 0.9486277269528501,
	"grad_norm": 0.6362401247024536,
	"learning_rate": 0.00013346733668341708,
	"loss": 2.7471,
	"step": 337
	},
	{
	"epoch": 0.9514426460239268,
	"grad_norm": 0.7190608382225037,
	"learning_rate": 0.00013326633165829146,
	"loss": 2.5964,
	"step": 338
	},
	{
	"epoch": 0.9542575650950035,
	"grad_norm": 0.6639814376831055,
	"learning_rate": 0.00013306532663316586,
	"loss": 2.6358,
	"step": 339
	},
	{
	"epoch": 0.9570724841660803,
	"grad_norm": 0.6791893243789673,
	"learning_rate": 0.0001328643216080402,
	"loss": 2.6764,
	"step": 340
	},
	{
	"epoch": 0.9598874032371569,
	"grad_norm": 0.6663180589675903,
	"learning_rate": 0.00013266331658291457,
	"loss": 2.8057,
	"step": 341
	},
	{
	"epoch": 0.9627023223082336,
	"grad_norm": 0.5866056680679321,
	"learning_rate": 0.00013246231155778895,
	"loss": 2.6845,
	"step": 342
	},
	{
	"epoch": 0.9655172413793104,
	"grad_norm": 0.6590510010719299,
	"learning_rate": 0.00013226130653266332,
	"loss": 2.4007,
	"step": 343
	},
	{
	"epoch": 0.9683321604503871,
	"grad_norm": 0.6014566421508789,
	"learning_rate": 0.0001320603015075377,
	"loss": 2.4501,
	"step": 344
	},
	{
	"epoch": 0.9711470795214637,
	"grad_norm": 0.7037169337272644,
	"learning_rate": 0.00013185929648241207,
	"loss": 3.0925,
	"step": 345
	},
	{
	"epoch": 0.9739619985925405,
	"grad_norm": 0.5314791798591614,
	"learning_rate": 0.00013165829145728644,
	"loss": 2.4216,
	"step": 346
	},
	{
	"epoch": 0.9767769176636172,
	"grad_norm": 0.5568397045135498,
	"learning_rate": 0.0001314572864321608,
	"loss": 2.4926,
	"step": 347
	},
	{
	"epoch": 0.9795918367346939,
	"grad_norm": 0.728171706199646,
	"learning_rate": 0.00013125628140703518,
	"loss": 2.6955,
	"step": 348
	},
	{
	"epoch": 0.9824067558057706,
	"grad_norm": 0.7184565663337708,
	"learning_rate": 0.00013105527638190956,
	"loss": 2.6989,
	"step": 349
	},
	{
	"epoch": 0.9852216748768473,
	"grad_norm": 1.116127610206604,
	"learning_rate": 0.00013085427135678393,
	"loss": 2.5996,
	"step": 350
	},
	{
	"epoch": 0.988036593947924,
	"grad_norm": 0.6320902705192566,
	"learning_rate": 0.0001306532663316583,
	"loss": 2.571,
	"step": 351
	},
	{
	"epoch": 0.9908515130190007,
	"grad_norm": 0.6517199277877808,
	"learning_rate": 0.00013045226130653268,
	"loss": 2.6731,
	"step": 352
	},
	{
	"epoch": 0.9936664320900774,
	"grad_norm": 0.6911283135414124,
	"learning_rate": 0.00013025125628140705,
	"loss": 2.8807,
	"step": 353
	},
	{
	"epoch": 0.9964813511611541,
	"grad_norm": 0.7019006609916687,
	"learning_rate": 0.0001300502512562814,
	"loss": 2.6346,
	"step": 354
	},
	{
	"epoch": 0.9992962702322308,
	"grad_norm": 0.6711909770965576,
	"learning_rate": 0.00012984924623115577,
	"loss": 2.7176,
	"step": 355
	},
	{
	"epoch": 1.0021111893033074,
	"grad_norm": 0.6791936755180359,
	"learning_rate": 0.00012964824120603017,
	"loss": 2.4828,
	"step": 356
	},
	{
	"epoch": 1.0049261083743843,
	"grad_norm": 0.550987720489502,
	"learning_rate": 0.00012944723618090454,
	"loss": 2.4396,
	"step": 357
	},
	{
	"epoch": 1.007741027445461,
	"grad_norm": 0.6731054186820984,
	"learning_rate": 0.0001292462311557789,
	"loss": 2.7791,
	"step": 358
	},
	{
	"epoch": 1.0105559465165377,
	"grad_norm": 0.5614567995071411,
	"learning_rate": 0.00012904522613065326,
	"loss": 2.5572,
	"step": 359
	},
	{
	"epoch": 1.0133708655876144,
	"grad_norm": 0.5224441289901733,
	"learning_rate": 0.00012884422110552766,
	"loss": 2.3983,
	"step": 360
	},
	{
	"epoch": 1.016185784658691,
	"grad_norm": 0.534264326095581,
	"learning_rate": 0.000128643216080402,
	"loss": 2.4682,
	"step": 361
	},
	{
	"epoch": 1.0190007037297677,
	"grad_norm": 0.7560765743255615,
	"learning_rate": 0.00012844221105527638,
	"loss": 2.5593,
	"step": 362
	},
	{
	"epoch": 1.0218156228008444,
	"grad_norm": 0.6609757542610168,
	"learning_rate": 0.00012824120603015075,
	"loss": 2.4428,
	"step": 363
	},
	{
	"epoch": 1.0246305418719213,
	"grad_norm": 0.48449280858039856,
	"learning_rate": 0.00012804020100502515,
	"loss": 2.2216,
	"step": 364
	},
	{
	"epoch": 1.027445460942998,
	"grad_norm": 0.6201764345169067,
	"learning_rate": 0.0001278391959798995,
	"loss": 2.4076,
	"step": 365
	},
	{
	"epoch": 1.0302603800140746,
	"grad_norm": 0.6022098660469055,
	"learning_rate": 0.00012763819095477387,
	"loss": 2.3617,
	"step": 366
	},
	{
	"epoch": 1.0330752990851513,
	"grad_norm": 0.5485665798187256,
	"learning_rate": 0.00012743718592964824,
	"loss": 2.4646,
	"step": 367
	},
	{
	"epoch": 1.035890218156228,
	"grad_norm": 0.6300007700920105,
	"learning_rate": 0.00012723618090452262,
	"loss": 2.3845,
	"step": 368
	},
	{
	"epoch": 1.0387051372273046,
	"grad_norm": 0.6588097214698792,
	"learning_rate": 0.000127035175879397,
	"loss": 2.3836,
	"step": 369
	},
	{
	"epoch": 1.0415200562983815,
	"grad_norm": 0.6159886717796326,
	"learning_rate": 0.00012683417085427136,
	"loss": 2.239,
	"step": 370
	},
	{
	"epoch": 1.0443349753694582,
	"grad_norm": 0.7142757177352905,
	"learning_rate": 0.00012663316582914574,
	"loss": 2.3208,
	"step": 371
	},
	{
	"epoch": 1.0471498944405349,
	"grad_norm": 0.7620591521263123,
	"learning_rate": 0.0001264321608040201,
	"loss": 2.5665,
	"step": 372
	},
	{
	"epoch": 1.0499648135116115,
	"grad_norm": 0.6486737728118896,
	"learning_rate": 0.00012623115577889448,
	"loss": 2.8276,
	"step": 373
	},
	{
	"epoch": 1.0527797325826882,
	"grad_norm": 0.622787594795227,
	"learning_rate": 0.00012603015075376885,
	"loss": 2.4826,
	"step": 374
	},
	{
	"epoch": 1.0555946516537649,
	"grad_norm": 0.6556206345558167,
	"learning_rate": 0.00012582914572864323,
	"loss": 2.5765,
	"step": 375
	},
	{
	"epoch": 1.0584095707248418,
	"grad_norm": 0.6902799606323242,
	"learning_rate": 0.0001256281407035176,
	"loss": 2.3851,
	"step": 376
	},
	{
	"epoch": 1.0612244897959184,
	"grad_norm": 0.6362977027893066,
	"learning_rate": 0.00012542713567839197,
	"loss": 2.4587,
	"step": 377
	},
	{
	"epoch": 1.064039408866995,
	"grad_norm": 0.6027363538742065,
	"learning_rate": 0.00012522613065326635,
	"loss": 2.3945,
	"step": 378
	},
	{
	"epoch": 1.0668543279380718,
	"grad_norm": 0.681010365486145,
	"learning_rate": 0.0001250251256281407,
	"loss": 2.247,
	"step": 379
	},
	{
	"epoch": 1.0696692470091484,
	"grad_norm": 0.588394284248352,
	"learning_rate": 0.00012482412060301507,
	"loss": 2.3279,
	"step": 380
	},
	{
	"epoch": 1.0724841660802251,
	"grad_norm": 0.6285263299942017,
	"learning_rate": 0.00012462311557788947,
	"loss": 2.2176,
	"step": 381
	},
	{
	"epoch": 1.0752990851513018,
	"grad_norm": 0.6699137091636658,
	"learning_rate": 0.00012442211055276384,
	"loss": 2.4252,
	"step": 382
	},
	{
	"epoch": 1.0781140042223787,
	"grad_norm": 0.7217219471931458,
	"learning_rate": 0.00012422110552763818,
	"loss": 2.3409,
	"step": 383
	},
	{
	"epoch": 1.0809289232934554,
	"grad_norm": 0.6710893511772156,
	"learning_rate": 0.00012402010050251256,
	"loss": 2.4054,
	"step": 384
	},
	{
	"epoch": 1.083743842364532,
	"grad_norm": 0.665313720703125,
	"learning_rate": 0.00012381909547738696,
	"loss": 2.4749,
	"step": 385
	},
	{
	"epoch": 1.0865587614356087,
	"grad_norm": 0.655486524105072,
	"learning_rate": 0.0001236180904522613,
	"loss": 2.1261,
	"step": 386
	},
	{
	"epoch": 1.0893736805066854,
	"grad_norm": 0.9402002692222595,
	"learning_rate": 0.00012341708542713568,
	"loss": 2.5344,
	"step": 387
	},
	{
	"epoch": 1.092188599577762,
	"grad_norm": 0.760830819606781,
	"learning_rate": 0.00012321608040201005,
	"loss": 2.6215,
	"step": 388
	},
	{
	"epoch": 1.095003518648839,
	"grad_norm": 0.7938470244407654,
	"learning_rate": 0.00012301507537688445,
	"loss": 2.3226,
	"step": 389
	},
	{
	"epoch": 1.0978184377199156,
	"grad_norm": 0.789606511592865,
	"learning_rate": 0.0001228140703517588,
	"loss": 2.4419,
	"step": 390
	},
	{
	"epoch": 1.1006333567909923,
	"grad_norm": 0.7316797375679016,
	"learning_rate": 0.00012261306532663317,
	"loss": 2.6489,
	"step": 391
	},
	{
	"epoch": 1.103448275862069,
	"grad_norm": 0.7367439270019531,
	"learning_rate": 0.00012241206030150754,
	"loss": 2.2951,
	"step": 392
	},
	{
	"epoch": 1.1062631949331456,
	"grad_norm": 0.735031008720398,
	"learning_rate": 0.00012221105527638191,
	"loss": 2.3752,
	"step": 393
	},
	{
	"epoch": 1.1090781140042223,
	"grad_norm": 0.8442686796188354,
	"learning_rate": 0.00012201005025125629,
	"loss": 2.4054,
	"step": 394
	},
	{
	"epoch": 1.1118930330752992,
	"grad_norm": 0.7112425565719604,
	"learning_rate": 0.00012180904522613066,
	"loss": 2.4153,
	"step": 395
	},
	{
	"epoch": 1.1147079521463759,
	"grad_norm": 0.8225473165512085,
	"learning_rate": 0.00012160804020100502,
	"loss": 2.3042,
	"step": 396
	},
	{
	"epoch": 1.1175228712174525,
	"grad_norm": 0.8238793015480042,
	"learning_rate": 0.00012140703517587942,
	"loss": 2.3454,
	"step": 397
	},
	{
	"epoch": 1.1203377902885292,
	"grad_norm": 0.9430282711982727,
	"learning_rate": 0.00012120603015075378,
	"loss": 2.4815,
	"step": 398
	},
	{
	"epoch": 1.1231527093596059,
	"grad_norm": 0.8851016759872437,
	"learning_rate": 0.00012100502512562815,
	"loss": 2.2304,
	"step": 399
	},
	{
	"epoch": 1.1259676284306825,
	"grad_norm": 1.1577056646347046,
	"learning_rate": 0.00012080402010050251,
	"loss": 2.3881,
	"step": 400
	},
	{
	"epoch": 1.1287825475017592,
	"grad_norm": 0.8923066258430481,
	"learning_rate": 0.00012060301507537688,
	"loss": 2.6341,
	"step": 401
	},
	{
	"epoch": 1.131597466572836,
	"grad_norm": 0.8544619679450989,
	"learning_rate": 0.00012040201005025127,
	"loss": 2.5802,
	"step": 402
	},
	{
	"epoch": 1.1344123856439128,
	"grad_norm": 0.9601594805717468,
	"learning_rate": 0.00012020100502512563,
	"loss": 2.2292,
	"step": 403
	},
	{
	"epoch": 1.1372273047149895,
	"grad_norm": 0.9403390884399414,
	"learning_rate": 0.00012,
	"loss": 2.5492,
	"step": 404
	},
	{
	"epoch": 1.1400422237860661,
	"grad_norm": 0.7530049681663513,
	"learning_rate": 0.00011979899497487436,
	"loss": 2.3684,
	"step": 405
	},
	{
	"epoch": 1.1428571428571428,
	"grad_norm": 1.0320841073989868,
	"learning_rate": 0.00011959798994974876,
	"loss": 2.4157,
	"step": 406
	},
	{
	"epoch": 1.1456720619282197,
	"grad_norm": 1.1246405839920044,
	"learning_rate": 0.00011939698492462312,
	"loss": 2.6815,
	"step": 407
	},
	{
	"epoch": 1.1484869809992964,
	"grad_norm": 0.8500766158103943,
	"learning_rate": 0.0001191959798994975,
	"loss": 2.4088,
	"step": 408
	},
	{
	"epoch": 1.151301900070373,
	"grad_norm": 0.7606078386306763,
	"learning_rate": 0.00011899497487437185,
	"loss": 2.2564,
	"step": 409
	},
	{
	"epoch": 1.1541168191414497,
	"grad_norm": 0.8513486385345459,
	"learning_rate": 0.00011879396984924624,
	"loss": 2.5881,
	"step": 410
	},
	{
	"epoch": 1.1569317382125264,
	"grad_norm": 0.7827906012535095,
	"learning_rate": 0.00011859296482412061,
	"loss": 2.4386,
	"step": 411
	},
	{
	"epoch": 1.159746657283603,
	"grad_norm": 0.9784967303276062,
	"learning_rate": 0.00011839195979899497,
	"loss": 2.5552,
	"step": 412
	},
	{
	"epoch": 1.1625615763546797,
	"grad_norm": 0.8472895622253418,
	"learning_rate": 0.00011819095477386935,
	"loss": 2.6518,
	"step": 413
	},
	{
	"epoch": 1.1653764954257566,
	"grad_norm": 0.7687847018241882,
	"learning_rate": 0.00011798994974874373,
	"loss": 2.39,
	"step": 414
	},
	{
	"epoch": 1.1681914144968333,
	"grad_norm": 0.7497126460075378,
	"learning_rate": 0.0001177889447236181,
	"loss": 2.2635,
	"step": 415
	},
	{
	"epoch": 1.17100633356791,
	"grad_norm": 0.7597271203994751,
	"learning_rate": 0.00011758793969849247,
	"loss": 2.6295,
	"step": 416
	},
	{
	"epoch": 1.1738212526389866,
	"grad_norm": 0.770999550819397,
	"learning_rate": 0.00011738693467336684,
	"loss": 2.3357,
	"step": 417
	},
	{
	"epoch": 1.1766361717100633,
	"grad_norm": 0.819741427898407,
	"learning_rate": 0.00011718592964824122,
	"loss": 2.4455,
	"step": 418
	},
	{
	"epoch": 1.17945109078114,
	"grad_norm": 0.8049472570419312,
	"learning_rate": 0.00011698492462311558,
	"loss": 2.184,
	"step": 419
	},
	{
	"epoch": 1.1822660098522166,
	"grad_norm": 0.8897677659988403,
	"learning_rate": 0.00011678391959798996,
	"loss": 2.3405,
	"step": 420
	},
	{
	"epoch": 1.1850809289232935,
	"grad_norm": 0.9535378217697144,
	"learning_rate": 0.00011658291457286432,
	"loss": 2.3352,
	"step": 421
	},
	{
	"epoch": 1.1878958479943702,
	"grad_norm": 0.7934727072715759,
	"learning_rate": 0.00011638190954773872,
	"loss": 2.2503,
	"step": 422
	},
	{
	"epoch": 1.1907107670654469,
	"grad_norm": 1.1643705368041992,
	"learning_rate": 0.00011618090452261308,
	"loss": 2.4985,
	"step": 423
	},
	{
	"epoch": 1.1935256861365235,
	"grad_norm": 1.05571448802948,
	"learning_rate": 0.00011597989949748745,
	"loss": 2.5174,
	"step": 424
	},
	{
	"epoch": 1.1963406052076002,
	"grad_norm": 0.8346055150032043,
	"learning_rate": 0.00011577889447236181,
	"loss": 2.331,
	"step": 425
	},
	{
	"epoch": 1.199155524278677,
	"grad_norm": 1.067415475845337,
	"learning_rate": 0.00011557788944723618,
	"loss": 2.4303,
	"step": 426
	},
	{
	"epoch": 1.2019704433497538,
	"grad_norm": 1.0706610679626465,
	"learning_rate": 0.00011537688442211057,
	"loss": 2.2276,
	"step": 427
	},
	{
	"epoch": 1.2047853624208305,
	"grad_norm": 0.877740204334259,
	"learning_rate": 0.00011517587939698493,
	"loss": 2.3532,
	"step": 428
	},
	{
	"epoch": 1.2076002814919071,
	"grad_norm": 0.9245136380195618,
	"learning_rate": 0.0001149748743718593,
	"loss": 2.1982,
	"step": 429
	},
	{
	"epoch": 1.2104152005629838,
	"grad_norm": 0.8375447392463684,
	"learning_rate": 0.00011477386934673366,
	"loss": 2.2702,
	"step": 430
	},
	{
	"epoch": 1.2132301196340605,
	"grad_norm": 1.0361285209655762,
	"learning_rate": 0.00011457286432160806,
	"loss": 2.5355,
	"step": 431
	},
	{
	"epoch": 1.2160450387051371,
	"grad_norm": 0.9980331063270569,
	"learning_rate": 0.00011437185929648242,
	"loss": 2.0764,
	"step": 432
	},
	{
	"epoch": 1.218859957776214,
	"grad_norm": 0.8354774117469788,
	"learning_rate": 0.00011417085427135679,
	"loss": 2.5983,
	"step": 433
	},
	{
	"epoch": 1.2216748768472907,
	"grad_norm": 0.8765326738357544,
	"learning_rate": 0.00011396984924623115,
	"loss": 2.0808,
	"step": 434
	},
	{
	"epoch": 1.2244897959183674,
	"grad_norm": 1.077864408493042,
	"learning_rate": 0.00011376884422110554,
	"loss": 2.2868,
	"step": 435
	},
	{
	"epoch": 1.227304714989444,
	"grad_norm": 0.8155612945556641,
	"learning_rate": 0.00011356783919597991,
	"loss": 2.2297,
	"step": 436
	},
	{
	"epoch": 1.2301196340605207,
	"grad_norm": 0.9063975811004639,
	"learning_rate": 0.00011336683417085427,
	"loss": 2.522,
	"step": 437
	},
	{
	"epoch": 1.2329345531315974,
	"grad_norm": 0.9975262880325317,
	"learning_rate": 0.00011316582914572864,
	"loss": 2.4027,
	"step": 438
	},
	{
	"epoch": 1.235749472202674,
	"grad_norm": 0.860905110836029,
	"learning_rate": 0.00011296482412060303,
	"loss": 2.4354,
	"step": 439
	},
	{
	"epoch": 1.238564391273751,
	"grad_norm": 0.9583187103271484,
	"learning_rate": 0.0001127638190954774,
	"loss": 2.3433,
	"step": 440
	},
	{
	"epoch": 1.2413793103448276,
	"grad_norm": 0.8732121586799622,
	"learning_rate": 0.00011256281407035176,
	"loss": 2.3921,
	"step": 441
	},
	{
	"epoch": 1.2441942294159043,
	"grad_norm": 0.9089124798774719,
	"learning_rate": 0.00011236180904522614,
	"loss": 2.2424,
	"step": 442
	},
	{
	"epoch": 1.247009148486981,
	"grad_norm": 0.8566604852676392,
	"learning_rate": 0.00011216080402010052,
	"loss": 2.4264,
	"step": 443
	},
	{
	"epoch": 1.2498240675580576,
	"grad_norm": 0.9148624539375305,
	"learning_rate": 0.00011195979899497488,
	"loss": 2.3394,
	"step": 444
	},
	{
	"epoch": 1.2526389866291345,
	"grad_norm": 0.8913928866386414,
	"learning_rate": 0.00011175879396984925,
	"loss": 2.2236,
	"step": 445
	},
	{
	"epoch": 1.255453905700211,
	"grad_norm": 1.1119465827941895,
	"learning_rate": 0.00011155778894472361,
	"loss": 2.391,
	"step": 446
	},
	{
	"epoch": 1.2582688247712879,
	"grad_norm": 1.1434952020645142,
	"learning_rate": 0.00011135678391959799,
	"loss": 2.4987,
	"step": 447
	},
	{
	"epoch": 1.2610837438423645,
	"grad_norm": 0.9885523319244385,
	"learning_rate": 0.00011115577889447237,
	"loss": 2.5184,
	"step": 448
	},
	{
	"epoch": 1.2638986629134412,
	"grad_norm": 0.9945192337036133,
	"learning_rate": 0.00011095477386934675,
	"loss": 2.4046,
	"step": 449
	},
	{
	"epoch": 1.266713581984518,
	"grad_norm": 0.9107452034950256,
	"learning_rate": 0.0001107537688442211,
	"loss": 2.4296,
	"step": 450
	},
	{
	"epoch": 1.2695285010555946,
	"grad_norm": 1.2265137434005737,
	"learning_rate": 0.00011055276381909548,
	"loss": 2.4336,
	"step": 451
	},
	{
	"epoch": 1.2723434201266715,
	"grad_norm": 0.907394289970398,
	"learning_rate": 0.00011035175879396986,
	"loss": 2.4008,
	"step": 452
	},
	{
	"epoch": 1.2751583391977481,
	"grad_norm": 0.884708821773529,
	"learning_rate": 0.00011015075376884422,
	"loss": 2.5134,
	"step": 453
	},
	{
	"epoch": 1.2779732582688248,
	"grad_norm": 0.8295673727989197,
	"learning_rate": 0.0001099497487437186,
	"loss": 2.5117,
	"step": 454
	},
	{
	"epoch": 1.2807881773399015,
	"grad_norm": 1.0812764167785645,
	"learning_rate": 0.00010974874371859296,
	"loss": 2.372,
	"step": 455
	},
	{
	"epoch": 1.2836030964109781,
	"grad_norm": 1.0535778999328613,
	"learning_rate": 0.00010954773869346736,
	"loss": 2.5114,
	"step": 456
	},
	{
	"epoch": 1.2864180154820548,
	"grad_norm": 1.1005867719650269,
	"learning_rate": 0.00010934673366834172,
	"loss": 2.4269,
	"step": 457
	},
	{
	"epoch": 1.2892329345531315,
	"grad_norm": 0.813443124294281,
	"learning_rate": 0.00010914572864321609,
	"loss": 2.3226,
	"step": 458
	},
	{
	"epoch": 1.2920478536242084,
	"grad_norm": 0.8614223599433899,
	"learning_rate": 0.00010894472361809045,
	"loss": 2.3945,
	"step": 459
	},
	{
	"epoch": 1.294862772695285,
	"grad_norm": 0.9305881857872009,
	"learning_rate": 0.00010874371859296483,
	"loss": 2.4106,
	"step": 460
	},
	{
	"epoch": 1.2976776917663617,
	"grad_norm": 0.7936707139015198,
	"learning_rate": 0.00010854271356783921,
	"loss": 2.2523,
	"step": 461
	},
	{
	"epoch": 1.3004926108374384,
	"grad_norm": 0.9864185452461243,
	"learning_rate": 0.00010834170854271357,
	"loss": 2.4317,
	"step": 462
	},
	{
	"epoch": 1.303307529908515,
	"grad_norm": 0.8099750876426697,
	"learning_rate": 0.00010814070351758794,
	"loss": 2.5428,
	"step": 463
	},
	{
	"epoch": 1.306122448979592,
	"grad_norm": 0.8694155216217041,
	"learning_rate": 0.00010793969849246233,
	"loss": 2.4084,
	"step": 464
	},
	{
	"epoch": 1.3089373680506686,
	"grad_norm": 0.963947057723999,
	"learning_rate": 0.0001077386934673367,
	"loss": 2.3802,
	"step": 465
	},
	{
	"epoch": 1.3117522871217453,
	"grad_norm": 0.9907119274139404,
	"learning_rate": 0.00010753768844221106,
	"loss": 2.3028,
	"step": 466
	},
	{
	"epoch": 1.314567206192822,
	"grad_norm": 0.8978596329689026,
	"learning_rate": 0.00010733668341708543,
	"loss": 2.129,
	"step": 467
	},
	{
	"epoch": 1.3173821252638986,
	"grad_norm": 1.0621075630187988,
	"learning_rate": 0.00010713567839195982,
	"loss": 2.4765,
	"step": 468
	},
	{
	"epoch": 1.3201970443349753,
	"grad_norm": 1.0847358703613281,
	"learning_rate": 0.00010693467336683418,
	"loss": 2.4611,
	"step": 469
	},
	{
	"epoch": 1.323011963406052,
	"grad_norm": 0.8706623315811157,
	"learning_rate": 0.00010673366834170855,
	"loss": 2.5458,
	"step": 470
	},
	{
	"epoch": 1.3258268824771289,
	"grad_norm": 1.0084209442138672,
	"learning_rate": 0.00010653266331658291,
	"loss": 2.4968,
	"step": 471
	},
	{
	"epoch": 1.3286418015482055,
	"grad_norm": 0.8770229816436768,
	"learning_rate": 0.00010633165829145728,
	"loss": 2.3268,
	"step": 472
	},
	{
	"epoch": 1.3314567206192822,
	"grad_norm": 0.9652953743934631,
	"learning_rate": 0.00010613065326633167,
	"loss": 2.3758,
	"step": 473
	},
	{
	"epoch": 1.334271639690359,
	"grad_norm": 0.8194919228553772,
	"learning_rate": 0.00010592964824120604,
	"loss": 2.4732,
	"step": 474
	},
	{
	"epoch": 1.3370865587614356,
	"grad_norm": 3.534748077392578,
	"learning_rate": 0.0001057286432160804,
	"loss": 2.3725,
	"step": 475
	},
	{
	"epoch": 1.3399014778325122,
	"grad_norm": 0.9962548017501831,
	"learning_rate": 0.00010552763819095478,
	"loss": 2.3963,
	"step": 476
	},
	{
	"epoch": 1.342716396903589,
	"grad_norm": 0.794152021408081,
	"learning_rate": 0.00010532663316582916,
	"loss": 2.2899,
	"step": 477
	},
	{
	"epoch": 1.3455313159746658,
	"grad_norm": 1.100648283958435,
	"learning_rate": 0.00010512562814070352,
	"loss": 2.5847,
	"step": 478
	},
	{
	"epoch": 1.3483462350457425,
	"grad_norm": 0.8269829154014587,
	"learning_rate": 0.0001049246231155779,
	"loss": 2.4694,
	"step": 479
	},
	{
	"epoch": 1.3511611541168191,
	"grad_norm": 0.915529727935791,
	"learning_rate": 0.00010472361809045225,
	"loss": 2.2721,
	"step": 480
	},
	{
	"epoch": 1.3539760731878958,
	"grad_norm": 0.8491760492324829,
	"learning_rate": 0.00010452261306532664,
	"loss": 2.3008,
	"step": 481
	},
	{
	"epoch": 1.3567909922589725,
	"grad_norm": 0.8877702355384827,
	"learning_rate": 0.00010432160804020101,
	"loss": 2.33,
	"step": 482
	},
	{
	"epoch": 1.3596059113300494,
	"grad_norm": 0.86586993932724,
	"learning_rate": 0.00010412060301507539,
	"loss": 2.0962,
	"step": 483
	},
	{
	"epoch": 1.362420830401126,
	"grad_norm": 0.8984941244125366,
	"learning_rate": 0.00010391959798994975,
	"loss": 2.1901,
	"step": 484
	},
	{
	"epoch": 1.3652357494722027,
	"grad_norm": 0.8369758129119873,
	"learning_rate": 0.00010371859296482413,
	"loss": 2.4045,
	"step": 485
	},
	{
	"epoch": 1.3680506685432794,
	"grad_norm": 0.7900081276893616,
	"learning_rate": 0.0001035175879396985,
	"loss": 2.1448,
	"step": 486
	},
	{
	"epoch": 1.370865587614356,
	"grad_norm": 0.9296205043792725,
	"learning_rate": 0.00010331658291457286,
	"loss": 2.2958,
	"step": 487
	},
	{
	"epoch": 1.3736805066854327,
	"grad_norm": 1.0592749118804932,
	"learning_rate": 0.00010311557788944724,
	"loss": 2.524,
	"step": 488
	},
	{
	"epoch": 1.3764954257565094,
	"grad_norm": 0.7983985543251038,
	"learning_rate": 0.00010291457286432162,
	"loss": 2.4329,
	"step": 489
	},
	{
	"epoch": 1.3793103448275863,
	"grad_norm": 1.1377589702606201,
	"learning_rate": 0.00010271356783919598,
	"loss": 2.2845,
	"step": 490
	},
	{
	"epoch": 1.382125263898663,
	"grad_norm": 1.1031099557876587,
	"learning_rate": 0.00010251256281407036,
	"loss": 2.3531,
	"step": 491
	},
	{
	"epoch": 1.3849401829697396,
	"grad_norm": 0.9376154541969299,
	"learning_rate": 0.00010231155778894473,
	"loss": 2.1406,
	"step": 492
	},
	{
	"epoch": 1.3877551020408163,
	"grad_norm": 1.0728362798690796,
	"learning_rate": 0.00010211055276381909,
	"loss": 2.4291,
	"step": 493
	},
	{
	"epoch": 1.390570021111893,
	"grad_norm": 1.021877408027649,
	"learning_rate": 0.00010190954773869348,
	"loss": 2.4566,
	"step": 494
	},
	{
	"epoch": 1.3933849401829699,
	"grad_norm": 1.1455014944076538,
	"learning_rate": 0.00010170854271356785,
	"loss": 2.3074,
	"step": 495
	},
	{
	"epoch": 1.3961998592540463,
	"grad_norm": 0.8512632846832275,
	"learning_rate": 0.00010150753768844221,
	"loss": 2.4004,
	"step": 496
	},
	{
	"epoch": 1.3990147783251232,
	"grad_norm": 0.9220101237297058,
	"learning_rate": 0.00010130653266331658,
	"loss": 2.4456,
	"step": 497
	},
	{
	"epoch": 1.4018296973962,
	"grad_norm": 1.0779199600219727,
	"learning_rate": 0.00010110552763819097,
	"loss": 2.4918,
	"step": 498
	},
	{
	"epoch": 1.4046446164672766,
	"grad_norm": 1.0237290859222412,
	"learning_rate": 0.00010090452261306533,
	"loss": 2.3606,
	"step": 499
	},
	{
	"epoch": 1.4074595355383532,
	"grad_norm": 1.2366681098937988,
	"learning_rate": 0.0001007035175879397,
	"loss": 2.468,
	"step": 500
	},
	{
	"epoch": 1.41027445460943,
	"grad_norm": 0.9772239923477173,
	"learning_rate": 0.00010050251256281407,
	"loss": 2.2868,
	"step": 501
	},
	{
	"epoch": 1.4130893736805068,
	"grad_norm": 0.9963237643241882,
	"learning_rate": 0.00010030150753768846,
	"loss": 2.3794,
	"step": 502
	},
	{
	"epoch": 1.4159042927515835,
	"grad_norm": 0.8932761549949646,
	"learning_rate": 0.00010010050251256282,
	"loss": 2.4493,
	"step": 503
	},
	{
	"epoch": 1.4187192118226601,
	"grad_norm": 0.8121969103813171,
	"learning_rate": 9.989949748743719e-05,
	"loss": 2.4045,
	"step": 504
	},
	{
	"epoch": 1.4215341308937368,
	"grad_norm": 0.9738163948059082,
	"learning_rate": 9.969849246231156e-05,
	"loss": 2.3725,
	"step": 505
	},
	{
	"epoch": 1.4243490499648135,
	"grad_norm": 0.8812170028686523,
	"learning_rate": 9.949748743718594e-05,
	"loss": 2.2143,
	"step": 506
	},
	{
	"epoch": 1.4271639690358902,
	"grad_norm": 0.9500517249107361,
	"learning_rate": 9.929648241206031e-05,
	"loss": 2.4194,
	"step": 507
	},
	{
	"epoch": 1.4299788881069668,
	"grad_norm": 0.8924652338027954,
	"learning_rate": 9.909547738693468e-05,
	"loss": 2.4061,
	"step": 508
	},
	{
	"epoch": 1.4327938071780437,
	"grad_norm": 0.9255656003952026,
	"learning_rate": 9.889447236180906e-05,
	"loss": 2.4458,
	"step": 509
	},
	{
	"epoch": 1.4356087262491204,
	"grad_norm": 0.9696526527404785,
	"learning_rate": 9.869346733668342e-05,
	"loss": 2.3285,
	"step": 510
	},
	{
	"epoch": 1.438423645320197,
	"grad_norm": 0.9249640107154846,
	"learning_rate": 9.84924623115578e-05,
	"loss": 2.3348,
	"step": 511
	},
	{
	"epoch": 1.4412385643912737,
	"grad_norm": 0.8589572906494141,
	"learning_rate": 9.829145728643216e-05,
	"loss": 2.3224,
	"step": 512
	},
	{
	"epoch": 1.4440534834623504,
	"grad_norm": 0.9638547301292419,
	"learning_rate": 9.809045226130655e-05,
	"loss": 2.3172,
	"step": 513
	},
	{
	"epoch": 1.4468684025334273,
	"grad_norm": 0.9466349482536316,
	"learning_rate": 9.788944723618091e-05,
	"loss": 2.4924,
	"step": 514
	},
	{
	"epoch": 1.4496833216045037,
	"grad_norm": 0.891727089881897,
	"learning_rate": 9.768844221105528e-05,
	"loss": 2.446,
	"step": 515
	},
	{
	"epoch": 1.4524982406755806,
	"grad_norm": 0.9032166600227356,
	"learning_rate": 9.748743718592965e-05,
	"loss": 2.3733,
	"step": 516
	},
	{
	"epoch": 1.4553131597466573,
	"grad_norm": 0.8339729905128479,
	"learning_rate": 9.728643216080403e-05,
	"loss": 2.0429,
	"step": 517
	},
	{
	"epoch": 1.458128078817734,
	"grad_norm": 0.8902753591537476,
	"learning_rate": 9.70854271356784e-05,
	"loss": 2.4414,
	"step": 518
	},
	{
	"epoch": 1.4609429978888107,
	"grad_norm": 3.1321892738342285,
	"learning_rate": 9.688442211055276e-05,
	"loss": 2.1431,
	"step": 519
	},
	{
	"epoch": 1.4637579169598873,
	"grad_norm": 0.8980495929718018,
	"learning_rate": 9.668341708542715e-05,
	"loss": 2.646,
	"step": 520
	},
	{
	"epoch": 1.4665728360309642,
	"grad_norm": 0.9685273170471191,
	"learning_rate": 9.64824120603015e-05,
	"loss": 2.3513,
	"step": 521
	},
	{
	"epoch": 1.469387755102041,
	"grad_norm": 0.9794145822525024,
	"learning_rate": 9.628140703517589e-05,
	"loss": 2.477,
	"step": 522
	},
	{
	"epoch": 1.4722026741731176,
	"grad_norm": 1.0298691987991333,
	"learning_rate": 9.608040201005025e-05,
	"loss": 2.3071,
	"step": 523
	},
	{
	"epoch": 1.4750175932441942,
	"grad_norm": 1.007864236831665,
	"learning_rate": 9.587939698492462e-05,
	"loss": 2.443,
	"step": 524
	},
	{
	"epoch": 1.477832512315271,
	"grad_norm": 0.8953837752342224,
	"learning_rate": 9.5678391959799e-05,
	"loss": 2.645,
	"step": 525
	},
	{
	"epoch": 1.4806474313863476,
	"grad_norm": 0.9239638447761536,
	"learning_rate": 9.547738693467337e-05,
	"loss": 2.4693,
	"step": 526
	},
	{
	"epoch": 1.4834623504574243,
	"grad_norm": 0.9644606709480286,
	"learning_rate": 9.527638190954774e-05,
	"loss": 2.427,
	"step": 527
	},
	{
	"epoch": 1.4862772695285011,
	"grad_norm": 0.9406245946884155,
	"learning_rate": 9.507537688442212e-05,
	"loss": 2.4382,
	"step": 528
	},
	{
	"epoch": 1.4890921885995778,
	"grad_norm": 1.0155800580978394,
	"learning_rate": 9.487437185929649e-05,
	"loss": 2.4304,
	"step": 529
	},
	{
	"epoch": 1.4919071076706545,
	"grad_norm": 0.8856943845748901,
	"learning_rate": 9.467336683417086e-05,
	"loss": 2.4116,
	"step": 530
	},
	{
	"epoch": 1.4947220267417312,
	"grad_norm": 0.8708421587944031,
	"learning_rate": 9.447236180904523e-05,
	"loss": 2.4999,
	"step": 531
	},
	{
	"epoch": 1.4975369458128078,
	"grad_norm": 0.9287678599357605,
	"learning_rate": 9.427135678391961e-05,
	"loss": 2.3666,
	"step": 532
	},
	{
	"epoch": 1.5003518648838847,
	"grad_norm": 0.9635983109474182,
	"learning_rate": 9.407035175879397e-05,
	"loss": 2.545,
	"step": 533
	},
	{
	"epoch": 1.5031667839549612,
	"grad_norm": 0.8631216287612915,
	"learning_rate": 9.386934673366835e-05,
	"loss": 2.3323,
	"step": 534
	},
	{
	"epoch": 1.505981703026038,
	"grad_norm": 0.9372367858886719,
	"learning_rate": 9.366834170854271e-05,
	"loss": 2.5233,
	"step": 535
	},
	{
	"epoch": 1.5087966220971147,
	"grad_norm": 0.8693691492080688,
	"learning_rate": 9.34673366834171e-05,
	"loss": 2.3968,
	"step": 536
	},
	{
	"epoch": 1.5116115411681914,
	"grad_norm": 0.911888062953949,
	"learning_rate": 9.326633165829146e-05,
	"loss": 2.5019,
	"step": 537
	},
	{
	"epoch": 1.514426460239268,
	"grad_norm": 0.9012334942817688,
	"learning_rate": 9.306532663316585e-05,
	"loss": 2.4697,
	"step": 538
	},
	{
	"epoch": 1.5172413793103448,
	"grad_norm": 0.9713804721832275,
	"learning_rate": 9.28643216080402e-05,
	"loss": 2.4162,
	"step": 539
	},
	{
	"epoch": 1.5200562983814216,
	"grad_norm": 0.9272815585136414,
	"learning_rate": 9.266331658291458e-05,
	"loss": 2.4306,
	"step": 540
	},
	{
	"epoch": 1.522871217452498,
	"grad_norm": 0.9575127959251404,
	"learning_rate": 9.246231155778895e-05,
	"loss": 2.2645,
	"step": 541
	},
	{
	"epoch": 1.525686136523575,
	"grad_norm": 1.0728868246078491,
	"learning_rate": 9.226130653266331e-05,
	"loss": 2.4701,
	"step": 542
	},
	{
	"epoch": 1.5285010555946517,
	"grad_norm": 0.8400806784629822,
	"learning_rate": 9.20603015075377e-05,
	"loss": 2.1602,
	"step": 543
	},
	{
	"epoch": 1.5313159746657283,
	"grad_norm": 0.9699164032936096,
	"learning_rate": 9.185929648241206e-05,
	"loss": 2.3135,
	"step": 544
	},
	{
	"epoch": 1.5341308937368052,
	"grad_norm": 1.0054633617401123,
	"learning_rate": 9.165829145728644e-05,
	"loss": 2.2226,
	"step": 545
	},
	{
	"epoch": 1.5369458128078817,
	"grad_norm": 0.9745274782180786,
	"learning_rate": 9.14572864321608e-05,
	"loss": 2.3635,
	"step": 546
	},
	{
	"epoch": 1.5397607318789586,
	"grad_norm": 0.8937272429466248,
	"learning_rate": 9.125628140703519e-05,
	"loss": 2.1517,
	"step": 547
	},
	{
	"epoch": 1.5425756509500352,
	"grad_norm": 1.0121883153915405,
	"learning_rate": 9.105527638190955e-05,
	"loss": 2.3947,
	"step": 548
	},
	{
	"epoch": 1.545390570021112,
	"grad_norm": 0.9827173352241516,
	"learning_rate": 9.085427135678392e-05,
	"loss": 2.3363,
	"step": 549
	},
	{
	"epoch": 1.5482054890921886,
	"grad_norm": 0.9372025728225708,
	"learning_rate": 9.06532663316583e-05,
	"loss": 2.4856,
	"step": 550
	},
	{
	"epoch": 1.5510204081632653,
	"grad_norm": 1.1509451866149902,
	"learning_rate": 9.045226130653267e-05,
	"loss": 2.444,
	"step": 551
	},
	{
	"epoch": 1.5538353272343421,
	"grad_norm": 1.0390241146087646,
	"learning_rate": 9.025125628140704e-05,
	"loss": 2.5131,
	"step": 552
	},
	{
	"epoch": 1.5566502463054186,
	"grad_norm": 0.9539273381233215,
	"learning_rate": 9.005025125628141e-05,
	"loss": 2.3651,
	"step": 553
	},
	{
	"epoch": 1.5594651653764955,
	"grad_norm": 1.0755736827850342,
	"learning_rate": 8.984924623115579e-05,
	"loss": 2.5255,
	"step": 554
	},
	{
	"epoch": 1.5622800844475722,
	"grad_norm": 0.850516140460968,
	"learning_rate": 8.964824120603016e-05,
	"loss": 2.3374,
	"step": 555
	},
	{
	"epoch": 1.5650950035186488,
	"grad_norm": 0.8225517868995667,
	"learning_rate": 8.944723618090453e-05,
	"loss": 2.2753,
	"step": 556
	},
	{
	"epoch": 1.5679099225897255,
	"grad_norm": 0.9345348477363586,
	"learning_rate": 8.92462311557789e-05,
	"loss": 2.3177,
	"step": 557
	},
	{
	"epoch": 1.5707248416608022,
	"grad_norm": 1.1513749361038208,
	"learning_rate": 8.904522613065326e-05,
	"loss": 2.5246,
	"step": 558
	},
	{
	"epoch": 1.573539760731879,
	"grad_norm": 0.8601298332214355,
	"learning_rate": 8.884422110552765e-05,
	"loss": 2.2688,
	"step": 559
	},
	{
	"epoch": 1.5763546798029555,
	"grad_norm": 0.9165076613426208,
	"learning_rate": 8.864321608040201e-05,
	"loss": 2.3773,
	"step": 560
	},
	{
	"epoch": 1.5791695988740324,
	"grad_norm": 1.0467596054077148,
	"learning_rate": 8.84422110552764e-05,
	"loss": 2.3887,
	"step": 561
	},
	{
	"epoch": 1.581984517945109,
	"grad_norm": 0.994055449962616,
	"learning_rate": 8.824120603015076e-05,
	"loss": 2.5547,
	"step": 562
	},
	{
	"epoch": 1.5847994370161858,
	"grad_norm": 1.100698709487915,
	"learning_rate": 8.804020100502513e-05,
	"loss": 2.3517,
	"step": 563
	},
	{
	"epoch": 1.5876143560872626,
	"grad_norm": 0.9837064743041992,
	"learning_rate": 8.78391959798995e-05,
	"loss": 2.3602,
	"step": 564
	},
	{
	"epoch": 1.590429275158339,
	"grad_norm": 0.9642098546028137,
	"learning_rate": 8.763819095477387e-05,
	"loss": 2.3596,
	"step": 565
	},
	{
	"epoch": 1.593244194229416,
	"grad_norm": 0.930444061756134,
	"learning_rate": 8.743718592964825e-05,
	"loss": 2.4475,
	"step": 566
	},
	{
	"epoch": 1.5960591133004927,
	"grad_norm": 3.837113618850708,
	"learning_rate": 8.723618090452261e-05,
	"loss": 2.3049,
	"step": 567
	},
	{
	"epoch": 1.5988740323715693,
	"grad_norm": 0.9022939205169678,
	"learning_rate": 8.7035175879397e-05,
	"loss": 2.5377,
	"step": 568
	},
	{
	"epoch": 1.601688951442646,
	"grad_norm": 0.9081845879554749,
	"learning_rate": 8.683417085427135e-05,
	"loss": 2.2669,
	"step": 569
	},
	{
	"epoch": 1.6045038705137227,
	"grad_norm": 1.0544410943984985,
	"learning_rate": 8.663316582914574e-05,
	"loss": 2.5433,
	"step": 570
	},
	{
	"epoch": 1.6073187895847996,
	"grad_norm": 1.0048531293869019,
	"learning_rate": 8.64321608040201e-05,
	"loss": 2.3306,
	"step": 571
	},
	{
	"epoch": 1.610133708655876,
	"grad_norm": 0.9438222646713257,
	"learning_rate": 8.623115577889449e-05,
	"loss": 2.3569,
	"step": 572
	},
	{
	"epoch": 1.612948627726953,
	"grad_norm": 0.8773800134658813,
	"learning_rate": 8.603015075376884e-05,
	"loss": 2.2443,
	"step": 573
	},
	{
	"epoch": 1.6157635467980296,
	"grad_norm": 0.8797910809516907,
	"learning_rate": 8.582914572864322e-05,
	"loss": 2.3595,
	"step": 574
	},
	{
	"epoch": 1.6185784658691063,
	"grad_norm": 1.003212809562683,
	"learning_rate": 8.562814070351759e-05,
	"loss": 2.3623,
	"step": 575
	},
	{
	"epoch": 1.621393384940183,
	"grad_norm": 0.97022944688797,
	"learning_rate": 8.542713567839196e-05,
	"loss": 2.3163,
	"step": 576
	},
	{
	"epoch": 1.6242083040112596,
	"grad_norm": 1.0100817680358887,
	"learning_rate": 8.522613065326634e-05,
	"loss": 2.4039,
	"step": 577
	},
	{
	"epoch": 1.6270232230823365,
	"grad_norm": 0.9104019403457642,
	"learning_rate": 8.502512562814071e-05,
	"loss": 2.2857,
	"step": 578
	},
	{
	"epoch": 1.629838142153413,
	"grad_norm": 0.8757676482200623,
	"learning_rate": 8.482412060301508e-05,
	"loss": 1.8537,
	"step": 579
	},
	{
	"epoch": 1.6326530612244898,
	"grad_norm": 1.2689822912216187,
	"learning_rate": 8.462311557788946e-05,
	"loss": 2.375,
	"step": 580
	},
	{
	"epoch": 1.6354679802955665,
	"grad_norm": 0.9648078680038452,
	"learning_rate": 8.442211055276383e-05,
	"loss": 2.4079,
	"step": 581
	},
	{
	"epoch": 1.6382828993666432,
	"grad_norm": 0.8941888213157654,
	"learning_rate": 8.42211055276382e-05,
	"loss": 2.3487,
	"step": 582
	},
	{
	"epoch": 1.64109781843772,
	"grad_norm": 1.1510968208312988,
	"learning_rate": 8.402010050251256e-05,
	"loss": 2.2717,
	"step": 583
	},
	{
	"epoch": 1.6439127375087965,
	"grad_norm": 0.8997256755828857,
	"learning_rate": 8.381909547738695e-05,
	"loss": 2.2837,
	"step": 584
	},
	{
	"epoch": 1.6467276565798734,
	"grad_norm": 0.9518352746963501,
	"learning_rate": 8.36180904522613e-05,
	"loss": 2.4866,
	"step": 585
	},
	{
	"epoch": 1.64954257565095,
	"grad_norm": 0.9324397444725037,
	"learning_rate": 8.341708542713568e-05,
	"loss": 2.3405,
	"step": 586
	},
	{
	"epoch": 1.6523574947220268,
	"grad_norm": 0.9339852929115295,
	"learning_rate": 8.321608040201005e-05,
	"loss": 2.2778,
	"step": 587
	},
	{
	"epoch": 1.6551724137931034,
	"grad_norm": 1.0167529582977295,
	"learning_rate": 8.301507537688443e-05,
	"loss": 2.3714,
	"step": 588
	},
	{
	"epoch": 1.65798733286418,
	"grad_norm": 1.1903690099716187,
	"learning_rate": 8.28140703517588e-05,
	"loss": 2.6944,
	"step": 589
	},
	{
	"epoch": 1.660802251935257,
	"grad_norm": 0.9818819165229797,
	"learning_rate": 8.261306532663317e-05,
	"loss": 2.5714,
	"step": 590
	},
	{
	"epoch": 1.6636171710063334,
	"grad_norm": 0.9528331160545349,
	"learning_rate": 8.241206030150754e-05,
	"loss": 2.4357,
	"step": 591
	},
	{
	"epoch": 1.6664320900774103,
	"grad_norm": 1.125532865524292,
	"learning_rate": 8.22110552763819e-05,
	"loss": 2.7857,
	"step": 592
	},
	{
	"epoch": 1.669247009148487,
	"grad_norm": 1.0312976837158203,
	"learning_rate": 8.201005025125629e-05,
	"loss": 2.5711,
	"step": 593
	},
	{
	"epoch": 1.6720619282195637,
	"grad_norm": 1.0230183601379395,
	"learning_rate": 8.180904522613065e-05,
	"loss": 2.449,
	"step": 594
	},
	{
	"epoch": 1.6748768472906403,
	"grad_norm": 1.0071231126785278,
	"learning_rate": 8.160804020100504e-05,
	"loss": 2.4473,
	"step": 595
	},
	{
	"epoch": 1.677691766361717,
	"grad_norm": 0.8198001980781555,
	"learning_rate": 8.14070351758794e-05,
	"loss": 2.2743,
	"step": 596
	},
	{
	"epoch": 1.680506685432794,
	"grad_norm": 0.9489749670028687,
	"learning_rate": 8.120603015075378e-05,
	"loss": 2.3807,
	"step": 597
	},
	{
	"epoch": 1.6833216045038704,
	"grad_norm": 0.9406233429908752,
	"learning_rate": 8.100502512562814e-05,
	"loss": 2.3478,
	"step": 598
	},
	{
	"epoch": 1.6861365235749473,
	"grad_norm": 1.1207877397537231,
	"learning_rate": 8.080402010050251e-05,
	"loss": 2.4251,
	"step": 599
	},
	{
	"epoch": 1.688951442646024,
	"grad_norm": 0.8841767311096191,
	"learning_rate": 8.060301507537689e-05,
	"loss": 2.4555,
	"step": 600
	},
	{
	"epoch": 1.6917663617171006,
	"grad_norm": 1.0105196237564087,
	"learning_rate": 8.040201005025126e-05,
	"loss": 2.4965,
	"step": 601
	},
	{
	"epoch": 1.6945812807881775,
	"grad_norm": 1.4616045951843262,
	"learning_rate": 8.020100502512563e-05,
	"loss": 2.1856,
	"step": 602
	},
	{
	"epoch": 1.697396199859254,
	"grad_norm": 0.8796388506889343,
	"learning_rate": 8e-05,
	"loss": 2.267,
	"step": 603
	},
	{
	"epoch": 1.7002111189303308,
	"grad_norm": 0.9459576606750488,
	"learning_rate": 7.979899497487438e-05,
	"loss": 2.3772,
	"step": 604
	},
	{
	"epoch": 1.7030260380014075,
	"grad_norm": 0.9645008444786072,
	"learning_rate": 7.959798994974875e-05,
	"loss": 2.4959,
	"step": 605
	},
	{
	"epoch": 1.7058409570724842,
	"grad_norm": 1.002943515777588,
	"learning_rate": 7.939698492462313e-05,
	"loss": 2.2909,
	"step": 606
	},
	{
	"epoch": 1.7086558761435608,
	"grad_norm": 0.9787498116493225,
	"learning_rate": 7.91959798994975e-05,
	"loss": 2.4482,
	"step": 607
	},
	{
	"epoch": 1.7114707952146375,
	"grad_norm": 0.9761974811553955,
	"learning_rate": 7.899497487437186e-05,
	"loss": 2.358,
	"step": 608
	},
	{
	"epoch": 1.7142857142857144,
	"grad_norm": 1.0572192668914795,
	"learning_rate": 7.879396984924623e-05,
	"loss": 2.3769,
	"step": 609
	},
	{
	"epoch": 1.7171006333567909,
	"grad_norm": 1.1789201498031616,
	"learning_rate": 7.85929648241206e-05,
	"loss": 2.4841,
	"step": 610
	},
	{
	"epoch": 1.7199155524278678,
	"grad_norm": 1.2059478759765625,
	"learning_rate": 7.839195979899498e-05,
	"loss": 2.3738,
	"step": 611
	},
	{
	"epoch": 1.7227304714989444,
	"grad_norm": 0.884793758392334,
	"learning_rate": 7.819095477386935e-05,
	"loss": 2.2696,
	"step": 612
	},
	{
	"epoch": 1.725545390570021,
	"grad_norm": 1.1065027713775635,
	"learning_rate": 7.798994974874372e-05,
	"loss": 2.2802,
	"step": 613
	},
	{
	"epoch": 1.7283603096410978,
	"grad_norm": 0.9025463461875916,
	"learning_rate": 7.77889447236181e-05,
	"loss": 2.2835,
	"step": 614
	},
	{
	"epoch": 1.7311752287121744,
	"grad_norm": 0.939608633518219,
	"learning_rate": 7.758793969849247e-05,
	"loss": 2.2813,
	"step": 615
	},
	{
	"epoch": 1.7339901477832513,
	"grad_norm": 1.2778400182724,
	"learning_rate": 7.738693467336684e-05,
	"loss": 2.5467,
	"step": 616
	},
	{
	"epoch": 1.7368050668543278,
	"grad_norm": 1.1586782932281494,
	"learning_rate": 7.71859296482412e-05,
	"loss": 2.2648,
	"step": 617
	},
	{
	"epoch": 1.7396199859254047,
	"grad_norm": 0.9858127236366272,
	"learning_rate": 7.698492462311559e-05,
	"loss": 2.053,
	"step": 618
	},
	{
	"epoch": 1.7424349049964813,
	"grad_norm": 0.966553807258606,
	"learning_rate": 7.678391959798995e-05,
	"loss": 2.2932,
	"step": 619
	},
	{
	"epoch": 1.745249824067558,
	"grad_norm": 1.2511391639709473,
	"learning_rate": 7.658291457286433e-05,
	"loss": 2.4427,
	"step": 620
	},
	{
	"epoch": 1.748064743138635,
	"grad_norm": 0.9545679092407227,
	"learning_rate": 7.638190954773869e-05,
	"loss": 2.5727,
	"step": 621
	},
	{
	"epoch": 1.7508796622097114,
	"grad_norm": 0.9427103400230408,
	"learning_rate": 7.618090452261307e-05,
	"loss": 2.213,
	"step": 622
	},
	{
	"epoch": 1.7536945812807883,
	"grad_norm": 0.9781317710876465,
	"learning_rate": 7.597989949748744e-05,
	"loss": 2.46,
	"step": 623
	},
	{
	"epoch": 1.756509500351865,
	"grad_norm": 0.9984252452850342,
	"learning_rate": 7.577889447236181e-05,
	"loss": 2.5281,
	"step": 624
	},
	{
	"epoch": 1.7593244194229416,
	"grad_norm": 1.1169133186340332,
	"learning_rate": 7.557788944723618e-05,
	"loss": 2.036,
	"step": 625
	},
	{
	"epoch": 1.7621393384940183,
	"grad_norm": 0.9950047731399536,
	"learning_rate": 7.537688442211056e-05,
	"loss": 2.3513,
	"step": 626
	},
	{
	"epoch": 1.764954257565095,
	"grad_norm": 1.1179485321044922,
	"learning_rate": 7.517587939698493e-05,
	"loss": 2.3691,
	"step": 627
	},
	{
	"epoch": 1.7677691766361718,
	"grad_norm": 1.3760029077529907,
	"learning_rate": 7.49748743718593e-05,
	"loss": 2.5204,
	"step": 628
	},
	{
	"epoch": 1.7705840957072483,
	"grad_norm": 1.0102930068969727,
	"learning_rate": 7.477386934673368e-05,
	"loss": 2.4864,
	"step": 629
	},
	{
	"epoch": 1.7733990147783252,
	"grad_norm": 0.9945108890533447,
	"learning_rate": 7.457286432160805e-05,
	"loss": 2.4329,
	"step": 630
	},
	{
	"epoch": 1.7762139338494018,
	"grad_norm": 0.9327785968780518,
	"learning_rate": 7.437185929648241e-05,
	"loss": 2.4472,
	"step": 631
	},
	{
	"epoch": 1.7790288529204785,
	"grad_norm": 1.280433177947998,
	"learning_rate": 7.417085427135678e-05,
	"loss": 2.5645,
	"step": 632
	},
	{
	"epoch": 1.7818437719915552,
	"grad_norm": 0.9169235229492188,
	"learning_rate": 7.396984924623115e-05,
	"loss": 2.3657,
	"step": 633
	},
	{
	"epoch": 1.7846586910626319,
	"grad_norm": 0.8970544338226318,
	"learning_rate": 7.376884422110553e-05,
	"loss": 2.4142,
	"step": 634
	},
	{
	"epoch": 1.7874736101337088,
	"grad_norm": 0.9948874711990356,
	"learning_rate": 7.35678391959799e-05,
	"loss": 2.28,
	"step": 635
	},
	{
	"epoch": 1.7902885292047852,
	"grad_norm": 1.002539873123169,
	"learning_rate": 7.336683417085427e-05,
	"loss": 2.5045,
	"step": 636
	},
	{
	"epoch": 1.793103448275862,
	"grad_norm": 0.9109549522399902,
	"learning_rate": 7.316582914572865e-05,
	"loss": 2.5396,
	"step": 637
	},
	{
	"epoch": 1.7959183673469388,
	"grad_norm": 1.1350212097167969,
	"learning_rate": 7.296482412060302e-05,
	"loss": 2.435,
	"step": 638
	},
	{
	"epoch": 1.7987332864180154,
	"grad_norm": 1.1271899938583374,
	"learning_rate": 7.276381909547739e-05,
	"loss": 2.366,
	"step": 639
	},
	{
	"epoch": 1.8015482054890923,
	"grad_norm": 0.9859978556632996,
	"learning_rate": 7.256281407035177e-05,
	"loss": 2.4066,
	"step": 640
	},
	{
	"epoch": 1.8043631245601688,
	"grad_norm": 1.0322918891906738,
	"learning_rate": 7.236180904522614e-05,
	"loss": 2.5016,
	"step": 641
	},
	{
	"epoch": 1.8071780436312457,
	"grad_norm": 1.045304298400879,
	"learning_rate": 7.21608040201005e-05,
	"loss": 2.332,
	"step": 642
	},
	{
	"epoch": 1.8099929627023223,
	"grad_norm": 1.0578879117965698,
	"learning_rate": 7.195979899497488e-05,
	"loss": 2.3446,
	"step": 643
	},
	{
	"epoch": 1.812807881773399,
	"grad_norm": 1.385239839553833,
	"learning_rate": 7.175879396984924e-05,
	"loss": 2.1476,
	"step": 644
	},
	{
	"epoch": 1.8156228008444757,
	"grad_norm": 0.9029603600502014,
	"learning_rate": 7.155778894472363e-05,
	"loss": 2.2793,
	"step": 645
	},
	{
	"epoch": 1.8184377199155524,
	"grad_norm": 0.9613030552864075,
	"learning_rate": 7.135678391959799e-05,
	"loss": 2.5449,
	"step": 646
	},
	{
	"epoch": 1.8212526389866293,
	"grad_norm": 0.9739305377006531,
	"learning_rate": 7.115577889447236e-05,
	"loss": 2.3309,
	"step": 647
	},
	{
	"epoch": 1.8240675580577057,
	"grad_norm": 1.0117180347442627,
	"learning_rate": 7.095477386934674e-05,
	"loss": 2.4027,
	"step": 648
	},
	{
	"epoch": 1.8268824771287826,
	"grad_norm": 1.2691757678985596,
	"learning_rate": 7.075376884422111e-05,
	"loss": 2.5499,
	"step": 649
	},
	{
	"epoch": 1.8296973961998593,
	"grad_norm": 0.9493529200553894,
	"learning_rate": 7.055276381909548e-05,
	"loss": 2.2126,
	"step": 650
	},
	{
	"epoch": 1.832512315270936,
	"grad_norm": 1.0153197050094604,
	"learning_rate": 7.035175879396985e-05,
	"loss": 2.2784,
	"step": 651
	},
	{
	"epoch": 1.8353272343420126,
	"grad_norm": 0.9323593974113464,
	"learning_rate": 7.015075376884423e-05,
	"loss": 2.2333,
	"step": 652
	},
	{
	"epoch": 1.8381421534130893,
	"grad_norm": 1.0737582445144653,
	"learning_rate": 6.99497487437186e-05,
	"loss": 2.4598,
	"step": 653
	},
	{
	"epoch": 1.8409570724841662,
	"grad_norm": 0.9445222616195679,
	"learning_rate": 6.974874371859297e-05,
	"loss": 2.2218,
	"step": 654
	},
	{
	"epoch": 1.8437719915552426,
	"grad_norm": 1.043349027633667,
	"learning_rate": 6.954773869346733e-05,
	"loss": 2.5126,
	"step": 655
	},
	{
	"epoch": 1.8465869106263195,
	"grad_norm": 0.9958374500274658,
	"learning_rate": 6.93467336683417e-05,
	"loss": 2.3045,
	"step": 656
	},
	{
	"epoch": 1.8494018296973962,
	"grad_norm": 0.9490264654159546,
	"learning_rate": 6.914572864321608e-05,
	"loss": 2.3651,
	"step": 657
	},
	{
	"epoch": 1.8522167487684729,
	"grad_norm": 1.07566499710083,
	"learning_rate": 6.894472361809045e-05,
	"loss": 2.4624,
	"step": 658
	},
	{
	"epoch": 1.8550316678395498,
	"grad_norm": 1.0136849880218506,
	"learning_rate": 6.874371859296482e-05,
	"loss": 2.3797,
	"step": 659
	},
	{
	"epoch": 1.8578465869106262,
	"grad_norm": 1.0830200910568237,
	"learning_rate": 6.85427135678392e-05,
	"loss": 2.4643,
	"step": 660
	},
	{
	"epoch": 1.860661505981703,
	"grad_norm": 0.920754075050354,
	"learning_rate": 6.834170854271357e-05,
	"loss": 2.375,
	"step": 661
	},
	{
	"epoch": 1.8634764250527798,
	"grad_norm": 1.0753567218780518,
	"learning_rate": 6.814070351758794e-05,
	"loss": 2.4276,
	"step": 662
	},
	{
	"epoch": 1.8662913441238564,
	"grad_norm": 1.2756551504135132,
	"learning_rate": 6.793969849246232e-05,
	"loss": 2.5999,
	"step": 663
	},
	{
	"epoch": 1.8691062631949331,
	"grad_norm": 1.165073037147522,
	"learning_rate": 6.773869346733669e-05,
	"loss": 2.5914,
	"step": 664
	},
	{
	"epoch": 1.8719211822660098,
	"grad_norm": 1.0647106170654297,
	"learning_rate": 6.753768844221105e-05,
	"loss": 2.1331,
	"step": 665
	},
	{
	"epoch": 1.8747361013370867,
	"grad_norm": 1.023997187614441,
	"learning_rate": 6.733668341708544e-05,
	"loss": 2.1842,
	"step": 666
	},
	{
	"epoch": 1.8775510204081631,
	"grad_norm": 1.1140164136886597,
	"learning_rate": 6.71356783919598e-05,
	"loss": 2.3345,
	"step": 667
	},
	{
	"epoch": 1.88036593947924,
	"grad_norm": 0.9038817882537842,
	"learning_rate": 6.693467336683418e-05,
	"loss": 2.2091,
	"step": 668
	},
	{
	"epoch": 1.8831808585503167,
	"grad_norm": 1.0922759771347046,
	"learning_rate": 6.673366834170854e-05,
	"loss": 2.4865,
	"step": 669
	},
	{
	"epoch": 1.8859957776213934,
	"grad_norm": 0.912775456905365,
	"learning_rate": 6.653266331658293e-05,
	"loss": 2.3812,
	"step": 670
	},
	{
	"epoch": 1.88881069669247,
	"grad_norm": 1.0939160585403442,
	"learning_rate": 6.633165829145729e-05,
	"loss": 2.5171,
	"step": 671
	},
	{
	"epoch": 1.8916256157635467,
	"grad_norm": 1.234376311302185,
	"learning_rate": 6.613065326633166e-05,
	"loss": 2.1198,
	"step": 672
	},
	{
	"epoch": 1.8944405348346236,
	"grad_norm": 1.0135494470596313,
	"learning_rate": 6.592964824120603e-05,
	"loss": 2.2054,
	"step": 673
	},
	{
	"epoch": 1.8972554539057,
	"grad_norm": 1.0782523155212402,
	"learning_rate": 6.57286432160804e-05,
	"loss": 2.4281,
	"step": 674
	},
	{
	"epoch": 1.900070372976777,
	"grad_norm": 1.0938283205032349,
	"learning_rate": 6.552763819095478e-05,
	"loss": 2.4033,
	"step": 675
	},
	{
	"epoch": 1.9028852920478536,
	"grad_norm": 1.080575942993164,
	"learning_rate": 6.532663316582915e-05,
	"loss": 2.2659,
	"step": 676
	},
	{
	"epoch": 1.9057002111189303,
	"grad_norm": 0.9452334642410278,
	"learning_rate": 6.512562814070352e-05,
	"loss": 2.1833,
	"step": 677
	},
	{
	"epoch": 1.9085151301900072,
	"grad_norm": 0.9910850524902344,
	"learning_rate": 6.492462311557788e-05,
	"loss": 2.4197,
	"step": 678
	},
	{
	"epoch": 1.9113300492610836,
	"grad_norm": 0.9662689566612244,
	"learning_rate": 6.472361809045227e-05,
	"loss": 2.2544,
	"step": 679
	},
	{
	"epoch": 1.9141449683321605,
	"grad_norm": 0.8501513004302979,
	"learning_rate": 6.452261306532663e-05,
	"loss": 2.3498,
	"step": 680
	},
	{
	"epoch": 1.9169598874032372,
	"grad_norm": 1.2835460901260376,
	"learning_rate": 6.4321608040201e-05,
	"loss": 2.5167,
	"step": 681
	},
	{
	"epoch": 1.9197748064743139,
	"grad_norm": 0.9385748505592346,
	"learning_rate": 6.412060301507538e-05,
	"loss": 2.362,
	"step": 682
	},
	{
	"epoch": 1.9225897255453905,
	"grad_norm": 0.9999021887779236,
	"learning_rate": 6.391959798994975e-05,
	"loss": 2.2323,
	"step": 683
	},
	{
	"epoch": 1.9254046446164672,
	"grad_norm": 1.0630273818969727,
	"learning_rate": 6.371859296482412e-05,
	"loss": 2.4489,
	"step": 684
	},
	{
	"epoch": 1.928219563687544,
	"grad_norm": 0.9764763116836548,
	"learning_rate": 6.35175879396985e-05,
	"loss": 2.2774,
	"step": 685
	},
	{
	"epoch": 1.9310344827586206,
	"grad_norm": 0.8416815996170044,
	"learning_rate": 6.331658291457287e-05,
	"loss": 2.2472,
	"step": 686
	},
	{
	"epoch": 1.9338494018296974,
	"grad_norm": 0.8282995820045471,
	"learning_rate": 6.311557788944724e-05,
	"loss": 2.1936,
	"step": 687
	},
	{
	"epoch": 1.9366643209007741,
	"grad_norm": 0.8734938502311707,
	"learning_rate": 6.291457286432161e-05,
	"loss": 2.365,
	"step": 688
	},
	{
	"epoch": 1.9394792399718508,
	"grad_norm": 1.1187288761138916,
	"learning_rate": 6.271356783919599e-05,
	"loss": 2.3209,
	"step": 689
	},
	{
	"epoch": 1.9422941590429277,
	"grad_norm": 0.9740754961967468,
	"learning_rate": 6.251256281407035e-05,
	"loss": 2.363,
	"step": 690
	},
	{
	"epoch": 1.9451090781140041,
	"grad_norm": 1.023774266242981,
	"learning_rate": 6.231155778894473e-05,
	"loss": 2.2532,
	"step": 691
	},
	{
	"epoch": 1.947923997185081,
	"grad_norm": 1.019603967666626,
	"learning_rate": 6.211055276381909e-05,
	"loss": 2.3242,
	"step": 692
	},
	{
	"epoch": 1.9507389162561575,
	"grad_norm": 1.0288832187652588,
	"learning_rate": 6.190954773869348e-05,
	"loss": 2.6201,
	"step": 693
	},
	{
	"epoch": 1.9535538353272344,
	"grad_norm": 0.9193139672279358,
	"learning_rate": 6.170854271356784e-05,
	"loss": 2.1794,
	"step": 694
	},
	{
	"epoch": 1.956368754398311,
	"grad_norm": 0.9151753187179565,
	"learning_rate": 6.150753768844222e-05,
	"loss": 2.3056,
	"step": 695
	},
	{
	"epoch": 1.9591836734693877,
	"grad_norm": 1.0923340320587158,
	"learning_rate": 6.130653266331658e-05,
	"loss": 2.2293,
	"step": 696
	},
	{
	"epoch": 1.9619985925404646,
	"grad_norm": 0.9855085015296936,
	"learning_rate": 6.110552763819096e-05,
	"loss": 2.5036,
	"step": 697
	},
	{
	"epoch": 1.964813511611541,
	"grad_norm": 0.9077695608139038,
	"learning_rate": 6.090452261306533e-05,
	"loss": 2.3812,
	"step": 698
	},
	{
	"epoch": 1.967628430682618,
	"grad_norm": 0.9925841689109802,
	"learning_rate": 6.070351758793971e-05,
	"loss": 2.4093,
	"step": 699
	},
	{
	"epoch": 1.9704433497536946,
	"grad_norm": 0.9586440324783325,
	"learning_rate": 6.0502512562814076e-05,
	"loss": 2.4851,
	"step": 700
	},
	{
	"epoch": 1.9732582688247713,
	"grad_norm": 0.9627270698547363,
	"learning_rate": 6.030150753768844e-05,
	"loss": 2.5393,
	"step": 701
	},
	{
	"epoch": 1.976073187895848,
	"grad_norm": 1.0779012441635132,
	"learning_rate": 6.0100502512562815e-05,
	"loss": 2.3773,
	"step": 702
	},
	{
	"epoch": 1.9788881069669246,
	"grad_norm": 0.9611048698425293,
	"learning_rate": 5.989949748743718e-05,
	"loss": 2.3767,
	"step": 703
	},
	{
	"epoch": 1.9817030260380015,
	"grad_norm": 0.9639135599136353,
	"learning_rate": 5.969849246231156e-05,
	"loss": 2.2936,
	"step": 704
	},
	{
	"epoch": 1.984517945109078,
	"grad_norm": 0.9925483465194702,
	"learning_rate": 5.949748743718593e-05,
	"loss": 2.3456,
	"step": 705
	},
	{
	"epoch": 1.9873328641801549,
	"grad_norm": 1.1674792766571045,
	"learning_rate": 5.929648241206031e-05,
	"loss": 2.3306,
	"step": 706
	},
	{
	"epoch": 1.9901477832512315,
	"grad_norm": 1.0779776573181152,
	"learning_rate": 5.909547738693467e-05,
	"loss": 2.4675,
	"step": 707
	},
	{
	"epoch": 1.9929627023223082,
	"grad_norm": 1.2033969163894653,
	"learning_rate": 5.889447236180905e-05,
	"loss": 2.3369,
	"step": 708
	},
	{
	"epoch": 1.995777621393385,
	"grad_norm": 1.01941978931427,
	"learning_rate": 5.869346733668342e-05,
	"loss": 2.3271,
	"step": 709
	},
	{
	"epoch": 1.9985925404644616,
	"grad_norm": 0.8707964420318604,
	"learning_rate": 5.849246231155779e-05,
	"loss": 2.4758,
	"step": 710
	},
	{
	"epoch": 2.0014074595355384,
	"grad_norm": 0.9140713810920715,
	"learning_rate": 5.829145728643216e-05,
	"loss": 2.2651,
	"step": 711
	},
	{
	"epoch": 2.004222378606615,
	"grad_norm": 1.016658902168274,
	"learning_rate": 5.809045226130654e-05,
	"loss": 1.7622,
	"step": 712
	},
	{
	"epoch": 2.007037297677692,
	"grad_norm": 1.0154222249984741,
	"learning_rate": 5.7889447236180904e-05,
	"loss": 1.8809,
	"step": 713
	},
	{
	"epoch": 2.0098522167487687,
	"grad_norm": 0.996522843837738,
	"learning_rate": 5.7688442211055284e-05,
	"loss": 2.1164,
	"step": 714
	},
	{
	"epoch": 2.012667135819845,
	"grad_norm": 1.0130479335784912,
	"learning_rate": 5.748743718592965e-05,
	"loss": 2.1545,
	"step": 715
	},
	{
	"epoch": 2.015482054890922,
	"grad_norm": 1.0692882537841797,
	"learning_rate": 5.728643216080403e-05,
	"loss": 2.0243,
	"step": 716
	},
	{
	"epoch": 2.0182969739619985,
	"grad_norm": 1.0084363222122192,
	"learning_rate": 5.7085427135678396e-05,
	"loss": 1.984,
	"step": 717
	},
	{
	"epoch": 2.0211118930330754,
	"grad_norm": 1.1399943828582764,
	"learning_rate": 5.688442211055277e-05,
	"loss": 1.9587,
	"step": 718
	},
	{
	"epoch": 2.023926812104152,
	"grad_norm": 1.6473337411880493,
	"learning_rate": 5.6683417085427135e-05,
	"loss": 2.0001,
	"step": 719
	},
	{
	"epoch": 2.0267417311752287,
	"grad_norm": 1.5070980787277222,
	"learning_rate": 5.6482412060301515e-05,
	"loss": 1.8712,
	"step": 720
	},
	{
	"epoch": 2.0295566502463056,
	"grad_norm": 1.262854814529419,
	"learning_rate": 5.628140703517588e-05,
	"loss": 2.1427,
	"step": 721
	},
	{
	"epoch": 2.032371569317382,
	"grad_norm": 1.7418184280395508,
	"learning_rate": 5.608040201005026e-05,
	"loss": 1.9851,
	"step": 722
	},
	{
	"epoch": 2.035186488388459,
	"grad_norm": 1.5306885242462158,
	"learning_rate": 5.587939698492463e-05,
	"loss": 2.0147,
	"step": 723
	},
	{
	"epoch": 2.0380014074595354,
	"grad_norm": 1.3408687114715576,
	"learning_rate": 5.567839195979899e-05,
	"loss": 2.1003,
	"step": 724
	},
	{
	"epoch": 2.0408163265306123,
	"grad_norm": 1.3069605827331543,
	"learning_rate": 5.547738693467337e-05,
	"loss": 2.1251,
	"step": 725
	},
	{
	"epoch": 2.0436312456016887,
	"grad_norm": 1.357084035873413,
	"learning_rate": 5.527638190954774e-05,
	"loss": 1.7681,
	"step": 726
	},
	{
	"epoch": 2.0464461646727656,
	"grad_norm": 1.2578508853912354,
	"learning_rate": 5.507537688442211e-05,
	"loss": 1.8022,
	"step": 727
	},
	{
	"epoch": 2.0492610837438425,
	"grad_norm": 1.2653518915176392,
	"learning_rate": 5.487437185929648e-05,
	"loss": 2.0304,
	"step": 728
	},
	{
	"epoch": 2.052076002814919,
	"grad_norm": 1.2066705226898193,
	"learning_rate": 5.467336683417086e-05,
	"loss": 1.9383,
	"step": 729
	},
	{
	"epoch": 2.054890921885996,
	"grad_norm": 1.2147313356399536,
	"learning_rate": 5.4472361809045224e-05,
	"loss": 1.9466,
	"step": 730
	},
	{
	"epoch": 2.0577058409570723,
	"grad_norm": 1.2353148460388184,
	"learning_rate": 5.4271356783919604e-05,
	"loss": 2.1842,
	"step": 731
	},
	{
	"epoch": 2.060520760028149,
	"grad_norm": 1.2019646167755127,
	"learning_rate": 5.407035175879397e-05,
	"loss": 1.9507,
	"step": 732
	},
	{
	"epoch": 2.063335679099226,
	"grad_norm": 1.2473183870315552,
	"learning_rate": 5.386934673366835e-05,
	"loss": 1.9146,
	"step": 733
	},
	{
	"epoch": 2.0661505981703026,
	"grad_norm": 1.3237521648406982,
	"learning_rate": 5.3668341708542716e-05,
	"loss": 2.026,
	"step": 734
	},
	{
	"epoch": 2.0689655172413794,
	"grad_norm": 1.6444705724716187,
	"learning_rate": 5.346733668341709e-05,
	"loss": 2.1689,
	"step": 735
	},
	{
	"epoch": 2.071780436312456,
	"grad_norm": 1.4315435886383057,
	"learning_rate": 5.3266331658291455e-05,
	"loss": 2.1981,
	"step": 736
	},
	{
	"epoch": 2.074595355383533,
	"grad_norm": 1.7484960556030273,
	"learning_rate": 5.3065326633165835e-05,
	"loss": 1.998,
	"step": 737
	},
	{
	"epoch": 2.0774102744546092,
	"grad_norm": 1.4129494428634644,
	"learning_rate": 5.28643216080402e-05,
	"loss": 1.9493,
	"step": 738
	},
	{
	"epoch": 2.080225193525686,
	"grad_norm": 1.7426577806472778,
	"learning_rate": 5.266331658291458e-05,
	"loss": 1.8491,
	"step": 739
	},
	{
	"epoch": 2.083040112596763,
	"grad_norm": 1.4977487325668335,
	"learning_rate": 5.246231155778895e-05,
	"loss": 2.0474,
	"step": 740
	},
	{
	"epoch": 2.0858550316678395,
	"grad_norm": 1.479008674621582,
	"learning_rate": 5.226130653266332e-05,
	"loss": 2.036,
	"step": 741
	},
	{
	"epoch": 2.0886699507389164,
	"grad_norm": 1.8596562147140503,
	"learning_rate": 5.206030150753769e-05,
	"loss": 2.0772,
	"step": 742
	},
	{
	"epoch": 2.091484869809993,
	"grad_norm": 1.4239286184310913,
	"learning_rate": 5.1859296482412066e-05,
	"loss": 1.9441,
	"step": 743
	},
	{
	"epoch": 2.0942997888810697,
	"grad_norm": 1.3117451667785645,
	"learning_rate": 5.165829145728643e-05,
	"loss": 1.8894,
	"step": 744
	},
	{
	"epoch": 2.097114707952146,
	"grad_norm": 1.255926251411438,
	"learning_rate": 5.145728643216081e-05,
	"loss": 2.1199,
	"step": 745
	},
	{
	"epoch": 2.099929627023223,
	"grad_norm": 1.6750807762145996,
	"learning_rate": 5.125628140703518e-05,
	"loss": 1.9147,
	"step": 746
	},
	{
	"epoch": 2.1027445460943,
	"grad_norm": 1.307915210723877,
	"learning_rate": 5.1055276381909544e-05,
	"loss": 2.1592,
	"step": 747
	},
	{
	"epoch": 2.1055594651653764,
	"grad_norm": 1.3630294799804688,
	"learning_rate": 5.0854271356783924e-05,
	"loss": 1.7529,
	"step": 748
	},
	{
	"epoch": 2.1083743842364533,
	"grad_norm": 1.4674683809280396,
	"learning_rate": 5.065326633165829e-05,
	"loss": 1.9104,
	"step": 749
	},
	{
	"epoch": 2.1111893033075297,
	"grad_norm": 1.2863240242004395,
	"learning_rate": 5.045226130653266e-05,
	"loss": 1.806,
	"step": 750
	},
	{
	"epoch": 2.1140042223786066,
	"grad_norm": 1.4659481048583984,
	"learning_rate": 5.0251256281407036e-05,
	"loss": 2.1066,
	"step": 751
	},
	{
	"epoch": 2.1168191414496835,
	"grad_norm": 1.4531869888305664,
	"learning_rate": 5.005025125628141e-05,
	"loss": 1.9145,
	"step": 752
	},
	{
	"epoch": 2.11963406052076,
	"grad_norm": 1.4428577423095703,
	"learning_rate": 4.984924623115578e-05,
	"loss": 2.0346,
	"step": 753
	},
	{
	"epoch": 2.122448979591837,
	"grad_norm": 1.6657663583755493,
	"learning_rate": 4.9648241206030155e-05,
	"loss": 1.8736,
	"step": 754
	},
	{
	"epoch": 2.1252638986629133,
	"grad_norm": 1.45827317237854,
	"learning_rate": 4.944723618090453e-05,
	"loss": 1.8692,
	"step": 755
	},
	{
	"epoch": 2.12807881773399,
	"grad_norm": 1.4714118242263794,
	"learning_rate": 4.92462311557789e-05,
	"loss": 2.1693,
	"step": 756
	},
	{
	"epoch": 2.1308937368050667,
	"grad_norm": 1.4159564971923828,
	"learning_rate": 4.9045226130653274e-05,
	"loss": 2.0312,
	"step": 757
	},
	{
	"epoch": 2.1337086558761436,
	"grad_norm": 1.409199833869934,
	"learning_rate": 4.884422110552764e-05,
	"loss": 2.043,
	"step": 758
	},
	{
	"epoch": 2.1365235749472204,
	"grad_norm": 1.4486503601074219,
	"learning_rate": 4.864321608040201e-05,
	"loss": 1.8924,
	"step": 759
	},
	{
	"epoch": 2.139338494018297,
	"grad_norm": 1.5751312971115112,
	"learning_rate": 4.844221105527638e-05,
	"loss": 1.8043,
	"step": 760
	},
	{
	"epoch": 2.142153413089374,
	"grad_norm": 1.4712185859680176,
	"learning_rate": 4.824120603015075e-05,
	"loss": 1.9763,
	"step": 761
	},
	{
	"epoch": 2.1449683321604502,
	"grad_norm": 1.3530285358428955,
	"learning_rate": 4.8040201005025125e-05,
	"loss": 1.7487,
	"step": 762
	},
	{
	"epoch": 2.147783251231527,
	"grad_norm": 1.5095936059951782,
	"learning_rate": 4.78391959798995e-05,
	"loss": 2.0177,
	"step": 763
	},
	{
	"epoch": 2.1505981703026036,
	"grad_norm": 1.2347254753112793,
	"learning_rate": 4.763819095477387e-05,
	"loss": 1.9277,
	"step": 764
	},
	{
	"epoch": 2.1534130893736805,
	"grad_norm": 1.728926658630371,
	"learning_rate": 4.7437185929648244e-05,
	"loss": 2.0394,
	"step": 765
	},
	{
	"epoch": 2.1562280084447574,
	"grad_norm": 1.3105862140655518,
	"learning_rate": 4.723618090452262e-05,
	"loss": 1.9132,
	"step": 766
	},
	{
	"epoch": 2.159042927515834,
	"grad_norm": 1.4253538846969604,
	"learning_rate": 4.703517587939698e-05,
	"loss": 1.7072,
	"step": 767
	},
	{
	"epoch": 2.1618578465869107,
	"grad_norm": 1.5160298347473145,
	"learning_rate": 4.6834170854271356e-05,
	"loss": 2.3564,
	"step": 768
	},
	{
	"epoch": 2.164672765657987,
	"grad_norm": 1.384318470954895,
	"learning_rate": 4.663316582914573e-05,
	"loss": 1.911,
	"step": 769
	},
	{
	"epoch": 2.167487684729064,
	"grad_norm": 1.6801820993423462,
	"learning_rate": 4.64321608040201e-05,
	"loss": 2.0506,
	"step": 770
	},
	{
	"epoch": 2.170302603800141,
	"grad_norm": 1.401426076889038,
	"learning_rate": 4.6231155778894475e-05,
	"loss": 1.6387,
	"step": 771
	},
	{
	"epoch": 2.1731175228712174,
	"grad_norm": 1.4150290489196777,
	"learning_rate": 4.603015075376885e-05,
	"loss": 2.1757,
	"step": 772
	},
	{
	"epoch": 2.1759324419422943,
	"grad_norm": 1.909029483795166,
	"learning_rate": 4.582914572864322e-05,
	"loss": 1.7829,
	"step": 773
	},
	{
	"epoch": 2.1787473610133707,
	"grad_norm": 1.530287265777588,
	"learning_rate": 4.5628140703517594e-05,
	"loss": 2.0015,
	"step": 774
	},
	{
	"epoch": 2.1815622800844476,
	"grad_norm": 1.5270819664001465,
	"learning_rate": 4.542713567839196e-05,
	"loss": 2.1369,
	"step": 775
	},
	{
	"epoch": 2.184377199155524,
	"grad_norm": 2.737819194793701,
	"learning_rate": 4.522613065326633e-05,
	"loss": 1.7966,
	"step": 776
	},
	{
	"epoch": 2.187192118226601,
	"grad_norm": 1.4319558143615723,
	"learning_rate": 4.5025125628140706e-05,
	"loss": 1.949,
	"step": 777
	},
	{
	"epoch": 2.190007037297678,
	"grad_norm": 1.5846929550170898,
	"learning_rate": 4.482412060301508e-05,
	"loss": 1.9676,
	"step": 778
	},
	{
	"epoch": 2.1928219563687543,
	"grad_norm": 1.3088924884796143,
	"learning_rate": 4.462311557788945e-05,
	"loss": 1.9452,
	"step": 779
	},
	{
	"epoch": 2.195636875439831,
	"grad_norm": 1.4991919994354248,
	"learning_rate": 4.4422110552763825e-05,
	"loss": 1.8966,
	"step": 780
	},
	{
	"epoch": 2.1984517945109077,
	"grad_norm": 1.306575059890747,
	"learning_rate": 4.42211055276382e-05,
	"loss": 1.9106,
	"step": 781
	},
	{
	"epoch": 2.2012667135819846,
	"grad_norm": 1.562092900276184,
	"learning_rate": 4.4020100502512564e-05,
	"loss": 1.8167,
	"step": 782
	},
	{
	"epoch": 2.204081632653061,
	"grad_norm": 1.6543974876403809,
	"learning_rate": 4.381909547738694e-05,
	"loss": 1.9092,
	"step": 783
	},
	{
	"epoch": 2.206896551724138,
	"grad_norm": 1.485269546508789,
	"learning_rate": 4.3618090452261303e-05,
	"loss": 1.9781,
	"step": 784
	},
	{
	"epoch": 2.209711470795215,
	"grad_norm": 1.2385632991790771,
	"learning_rate": 4.3417085427135676e-05,
	"loss": 1.87,
	"step": 785
	},
	{
	"epoch": 2.2125263898662912,
	"grad_norm": 1.2291756868362427,
	"learning_rate": 4.321608040201005e-05,
	"loss": 2.0753,
	"step": 786
	},
	{
	"epoch": 2.215341308937368,
	"grad_norm": 1.3407044410705566,
	"learning_rate": 4.301507537688442e-05,
	"loss": 1.8011,
	"step": 787
	},
	{
	"epoch": 2.2181562280084446,
	"grad_norm": 1.5226972103118896,
	"learning_rate": 4.2814070351758795e-05,
	"loss": 2.031,
	"step": 788
	},
	{
	"epoch": 2.2209711470795215,
	"grad_norm": 1.2508612871170044,
	"learning_rate": 4.261306532663317e-05,
	"loss": 1.9893,
	"step": 789
	},
	{
	"epoch": 2.2237860661505984,
	"grad_norm": 1.8101375102996826,
	"learning_rate": 4.241206030150754e-05,
	"loss": 2.0614,
	"step": 790
	},
	{
	"epoch": 2.226600985221675,
	"grad_norm": 1.3492687940597534,
	"learning_rate": 4.2211055276381914e-05,
	"loss": 1.5472,
	"step": 791
	},
	{
	"epoch": 2.2294159042927517,
	"grad_norm": 1.4373085498809814,
	"learning_rate": 4.201005025125628e-05,
	"loss": 1.9417,
	"step": 792
	},
	{
	"epoch": 2.232230823363828,
	"grad_norm": 1.343981385231018,
	"learning_rate": 4.180904522613065e-05,
	"loss": 1.9317,
	"step": 793
	},
	{
	"epoch": 2.235045742434905,
	"grad_norm": 1.3740363121032715,
	"learning_rate": 4.1608040201005026e-05,
	"loss": 1.7677,
	"step": 794
	},
	{
	"epoch": 2.2378606615059815,
	"grad_norm": 1.4676454067230225,
	"learning_rate": 4.14070351758794e-05,
	"loss": 1.9661,
	"step": 795
	},
	{
	"epoch": 2.2406755805770584,
	"grad_norm": 1.320854902267456,
	"learning_rate": 4.120603015075377e-05,
	"loss": 1.8218,
	"step": 796
	},
	{
	"epoch": 2.2434904996481353,
	"grad_norm": 1.7027606964111328,
	"learning_rate": 4.1005025125628145e-05,
	"loss": 1.979,
	"step": 797
	},
	{
	"epoch": 2.2463054187192117,
	"grad_norm": 1.363239049911499,
	"learning_rate": 4.080402010050252e-05,
	"loss": 1.8902,
	"step": 798
	},
	{
	"epoch": 2.2491203377902886,
	"grad_norm": 1.707664966583252,
	"learning_rate": 4.060301507537689e-05,
	"loss": 2.0026,
	"step": 799
	},
	{
	"epoch": 2.251935256861365,
	"grad_norm": 1.7282025814056396,
	"learning_rate": 4.040201005025126e-05,
	"loss": 1.8098,
	"step": 800
	},
	{
	"epoch": 2.254750175932442,
	"grad_norm": 1.5891460180282593,
	"learning_rate": 4.020100502512563e-05,
	"loss": 1.8639,
	"step": 801
	},
	{
	"epoch": 2.2575650950035184,
	"grad_norm": 1.5569334030151367,
	"learning_rate": 4e-05,
	"loss": 2.1027,
	"step": 802
	},
	{
	"epoch": 2.2603800140745953,
	"grad_norm": 1.4195587635040283,
	"learning_rate": 3.9798994974874376e-05,
	"loss": 1.8757,
	"step": 803
	},
	{
	"epoch": 2.263194933145672,
	"grad_norm": 1.3400124311447144,
	"learning_rate": 3.959798994974875e-05,
	"loss": 1.943,
	"step": 804
	},
	{
	"epoch": 2.2660098522167487,
	"grad_norm": 1.4379513263702393,
	"learning_rate": 3.9396984924623115e-05,
	"loss": 2.0351,
	"step": 805
	},
	{
	"epoch": 2.2688247712878256,
	"grad_norm": 1.8557440042495728,
	"learning_rate": 3.919597989949749e-05,
	"loss": 1.6522,
	"step": 806
	},
	{
	"epoch": 2.271639690358902,
	"grad_norm": 1.68703031539917,
	"learning_rate": 3.899497487437186e-05,
	"loss": 1.8939,
	"step": 807
	},
	{
	"epoch": 2.274454609429979,
	"grad_norm": 1.4797513484954834,
	"learning_rate": 3.8793969849246234e-05,
	"loss": 1.7212,
	"step": 808
	},
	{
	"epoch": 2.277269528501056,
	"grad_norm": 2.070215940475464,
	"learning_rate": 3.85929648241206e-05,
	"loss": 1.9758,
	"step": 809
	},
	{
	"epoch": 2.2800844475721322,
	"grad_norm": 1.4958938360214233,
	"learning_rate": 3.8391959798994973e-05,
	"loss": 2.0214,
	"step": 810
	},
	{
	"epoch": 2.282899366643209,
	"grad_norm": 1.4052972793579102,
	"learning_rate": 3.8190954773869346e-05,
	"loss": 1.9959,
	"step": 811
	},
	{
	"epoch": 2.2857142857142856,
	"grad_norm": 1.652631402015686,
	"learning_rate": 3.798994974874372e-05,
	"loss": 1.8977,
	"step": 812
	},
	{
	"epoch": 2.2885292047853625,
	"grad_norm": 1.4963494539260864,
	"learning_rate": 3.778894472361809e-05,
	"loss": 1.7929,
	"step": 813
	},
	{
	"epoch": 2.2913441238564394,
	"grad_norm": 1.554140329360962,
	"learning_rate": 3.7587939698492465e-05,
	"loss": 1.9422,
	"step": 814
	},
	{
	"epoch": 2.294159042927516,
	"grad_norm": 1.5336120128631592,
	"learning_rate": 3.738693467336684e-05,
	"loss": 2.0812,
	"step": 815
	},
	{
	"epoch": 2.2969739619985927,
	"grad_norm": 1.998458981513977,
	"learning_rate": 3.7185929648241204e-05,
	"loss": 1.8639,
	"step": 816
	},
	{
	"epoch": 2.299788881069669,
	"grad_norm": 1.662591814994812,
	"learning_rate": 3.698492462311558e-05,
	"loss": 2.0354,
	"step": 817
	},
	{
	"epoch": 2.302603800140746,
	"grad_norm": 1.6507760286331177,
	"learning_rate": 3.678391959798995e-05,
	"loss": 2.0235,
	"step": 818
	},
	{
	"epoch": 2.3054187192118225,
	"grad_norm": 1.6057195663452148,
	"learning_rate": 3.658291457286432e-05,
	"loss": 1.9731,
	"step": 819
	},
	{
	"epoch": 2.3082336382828994,
	"grad_norm": 1.5184822082519531,
	"learning_rate": 3.6381909547738696e-05,
	"loss": 1.7881,
	"step": 820
	},
	{
	"epoch": 2.311048557353976,
	"grad_norm": 1.5405902862548828,
	"learning_rate": 3.618090452261307e-05,
	"loss": 1.9947,
	"step": 821
	},
	{
	"epoch": 2.3138634764250527,
	"grad_norm": 1.880598545074463,
	"learning_rate": 3.597989949748744e-05,
	"loss": 1.9817,
	"step": 822
	},
	{
	"epoch": 2.3166783954961296,
	"grad_norm": 1.707992434501648,
	"learning_rate": 3.5778894472361815e-05,
	"loss": 1.7769,
	"step": 823
	},
	{
	"epoch": 2.319493314567206,
	"grad_norm": 1.4633187055587769,
	"learning_rate": 3.557788944723618e-05,
	"loss": 1.8575,
	"step": 824
	},
	{
	"epoch": 2.322308233638283,
	"grad_norm": 1.4804046154022217,
	"learning_rate": 3.5376884422110554e-05,
	"loss": 1.9168,
	"step": 825
	},
	{
	"epoch": 2.3251231527093594,
	"grad_norm": 1.6535083055496216,
	"learning_rate": 3.517587939698493e-05,
	"loss": 2.0092,
	"step": 826
	},
	{
	"epoch": 2.3279380717804363,
	"grad_norm": 1.45828115940094,
	"learning_rate": 3.49748743718593e-05,
	"loss": 1.824,
	"step": 827
	},
	{
	"epoch": 2.330752990851513,
	"grad_norm": 1.4650769233703613,
	"learning_rate": 3.4773869346733667e-05,
	"loss": 2.0699,
	"step": 828
	},
	{
	"epoch": 2.3335679099225897,
	"grad_norm": 1.629009485244751,
	"learning_rate": 3.457286432160804e-05,
	"loss": 1.856,
	"step": 829
	},
	{
	"epoch": 2.3363828289936666,
	"grad_norm": 1.6346815824508667,
	"learning_rate": 3.437185929648241e-05,
	"loss": 1.7844,
	"step": 830
	},
	{
	"epoch": 2.339197748064743,
	"grad_norm": 2.298454523086548,
	"learning_rate": 3.4170854271356785e-05,
	"loss": 1.8711,
	"step": 831
	},
	{
	"epoch": 2.34201266713582,
	"grad_norm": 1.6962236166000366,
	"learning_rate": 3.396984924623116e-05,
	"loss": 1.9879,
	"step": 832
	},
	{
	"epoch": 2.344827586206897,
	"grad_norm": 1.5078356266021729,
	"learning_rate": 3.3768844221105525e-05,
	"loss": 1.6655,
	"step": 833
	},
	{
	"epoch": 2.3476425052779732,
	"grad_norm": 1.9207500219345093,
	"learning_rate": 3.35678391959799e-05,
	"loss": 1.7975,
	"step": 834
	},
	{
	"epoch": 2.35045742434905,
	"grad_norm": 1.5677213668823242,
	"learning_rate": 3.336683417085427e-05,
	"loss": 2.0483,
	"step": 835
	},
	{
	"epoch": 2.3532723434201266,
	"grad_norm": 1.6488611698150635,
	"learning_rate": 3.3165829145728643e-05,
	"loss": 1.8616,
	"step": 836
	},
	{
	"epoch": 2.3560872624912035,
	"grad_norm": 1.718641996383667,
	"learning_rate": 3.2964824120603016e-05,
	"loss": 1.9279,
	"step": 837
	},
	{
	"epoch": 2.35890218156228,
	"grad_norm": 1.6529680490493774,
	"learning_rate": 3.276381909547739e-05,
	"loss": 2.1128,
	"step": 838
	},
	{
	"epoch": 2.361717100633357,
	"grad_norm": 1.691375732421875,
	"learning_rate": 3.256281407035176e-05,
	"loss": 1.801,
	"step": 839
	},
	{
	"epoch": 2.3645320197044333,
	"grad_norm": 1.6755695343017578,
	"learning_rate": 3.2361809045226135e-05,
	"loss": 2.0209,
	"step": 840
	},
	{
	"epoch": 2.36734693877551,
	"grad_norm": 1.3565911054611206,
	"learning_rate": 3.21608040201005e-05,
	"loss": 1.8019,
	"step": 841
	},
	{
	"epoch": 2.370161857846587,
	"grad_norm": 1.4155783653259277,
	"learning_rate": 3.1959798994974875e-05,
	"loss": 1.9121,
	"step": 842
	},
	{
	"epoch": 2.3729767769176635,
	"grad_norm": 1.6148691177368164,
	"learning_rate": 3.175879396984925e-05,
	"loss": 1.9028,
	"step": 843
	},
	{
	"epoch": 2.3757916959887404,
	"grad_norm": 1.408504605293274,
	"learning_rate": 3.155778894472362e-05,
	"loss": 1.7751,
	"step": 844
	},
	{
	"epoch": 2.378606615059817,
	"grad_norm": 1.4195948839187622,
	"learning_rate": 3.1356783919597993e-05,
	"loss": 1.8083,
	"step": 845
	},
	{
	"epoch": 2.3814215341308937,
	"grad_norm": 1.5938619375228882,
	"learning_rate": 3.1155778894472366e-05,
	"loss": 1.8096,
	"step": 846
	},
	{
	"epoch": 2.3842364532019706,
	"grad_norm": 1.4775474071502686,
	"learning_rate": 3.095477386934674e-05,
	"loss": 2.0482,
	"step": 847
	},
	{
	"epoch": 2.387051372273047,
	"grad_norm": 1.3416311740875244,
	"learning_rate": 3.075376884422111e-05,
	"loss": 1.8107,
	"step": 848
	},
	{
	"epoch": 2.389866291344124,
	"grad_norm": 1.6304892301559448,
	"learning_rate": 3.055276381909548e-05,
	"loss": 1.913,
	"step": 849
	},
	{
	"epoch": 2.3926812104152004,
	"grad_norm": 1.2348568439483643,
	"learning_rate": 3.0351758793969855e-05,
	"loss": 1.8254,
	"step": 850
	},
	{
	"epoch": 2.3954961294862773,
	"grad_norm": 1.6292930841445923,
	"learning_rate": 3.015075376884422e-05,
	"loss": 2.1953,
	"step": 851
	},
	{
	"epoch": 2.398311048557354,
	"grad_norm": 1.6192723512649536,
	"learning_rate": 2.994974874371859e-05,
	"loss": 1.7694,
	"step": 852
	},
	{
	"epoch": 2.4011259676284307,
	"grad_norm": 1.6231796741485596,
	"learning_rate": 2.9748743718592964e-05,
	"loss": 2.1208,
	"step": 853
	},
	{
	"epoch": 2.4039408866995076,
	"grad_norm": 1.3113828897476196,
	"learning_rate": 2.9547738693467337e-05,
	"loss": 1.9544,
	"step": 854
	},
	{
	"epoch": 2.406755805770584,
	"grad_norm": 1.6705840826034546,
	"learning_rate": 2.934673366834171e-05,
	"loss": 2.0316,
	"step": 855
	},
	{
	"epoch": 2.409570724841661,
	"grad_norm": 1.7489991188049316,
	"learning_rate": 2.914572864321608e-05,
	"loss": 2.1702,
	"step": 856
	},
	{
	"epoch": 2.4123856439127374,
	"grad_norm": 1.7634392976760864,
	"learning_rate": 2.8944723618090452e-05,
	"loss": 2.003,
	"step": 857
	},
	{
	"epoch": 2.4152005629838142,
	"grad_norm": 1.607228398323059,
	"learning_rate": 2.8743718592964825e-05,
	"loss": 1.9383,
	"step": 858
	},
	{
	"epoch": 2.4180154820548907,
	"grad_norm": 1.661271095275879,
	"learning_rate": 2.8542713567839198e-05,
	"loss": 1.9874,
	"step": 859
	},
	{
	"epoch": 2.4208304011259676,
	"grad_norm": 1.4608184099197388,
	"learning_rate": 2.8341708542713568e-05,
	"loss": 1.8086,
	"step": 860
	},
	{
	"epoch": 2.4236453201970445,
	"grad_norm": 1.4614999294281006,
	"learning_rate": 2.814070351758794e-05,
	"loss": 2.069,
	"step": 861
	},
	{
	"epoch": 2.426460239268121,
	"grad_norm": 1.5575437545776367,
	"learning_rate": 2.7939698492462314e-05,
	"loss": 1.8134,
	"step": 862
	},
	{
	"epoch": 2.429275158339198,
	"grad_norm": 1.5272866487503052,
	"learning_rate": 2.7738693467336686e-05,
	"loss": 1.9724,
	"step": 863
	},
	{
	"epoch": 2.4320900774102743,
	"grad_norm": 1.524636149406433,
	"learning_rate": 2.7537688442211056e-05,
	"loss": 1.8841,
	"step": 864
	},
	{
	"epoch": 2.434904996481351,
	"grad_norm": 1.7057536840438843,
	"learning_rate": 2.733668341708543e-05,
	"loss": 1.9102,
	"step": 865
	},
	{
	"epoch": 2.437719915552428,
	"grad_norm": 1.4803720712661743,
	"learning_rate": 2.7135678391959802e-05,
	"loss": 1.8558,
	"step": 866
	},
	{
	"epoch": 2.4405348346235045,
	"grad_norm": 1.486907958984375,
	"learning_rate": 2.6934673366834175e-05,
	"loss": 2.0651,
	"step": 867
	},
	{
	"epoch": 2.4433497536945814,
	"grad_norm": 1.5045924186706543,
	"learning_rate": 2.6733668341708545e-05,
	"loss": 1.8523,
	"step": 868
	},
	{
	"epoch": 2.446164672765658,
	"grad_norm": 1.5075145959854126,
	"learning_rate": 2.6532663316582917e-05,
	"loss": 2.1232,
	"step": 869
	},
	{
	"epoch": 2.4489795918367347,
	"grad_norm": 1.5650744438171387,
	"learning_rate": 2.633165829145729e-05,
	"loss": 1.9524,
	"step": 870
	},
	{
	"epoch": 2.4517945109078116,
	"grad_norm": 1.4480630159378052,
	"learning_rate": 2.613065326633166e-05,
	"loss": 1.8816,
	"step": 871
	},
	{
	"epoch": 2.454609429978888,
	"grad_norm": 1.7436559200286865,
	"learning_rate": 2.5929648241206033e-05,
	"loss": 2.0141,
	"step": 872
	},
	{
	"epoch": 2.457424349049965,
	"grad_norm": 1.647824764251709,
	"learning_rate": 2.5728643216080406e-05,
	"loss": 1.9715,
	"step": 873
	},
	{
	"epoch": 2.4602392681210414,
	"grad_norm": 1.5032564401626587,
	"learning_rate": 2.5527638190954772e-05,
	"loss": 1.9423,
	"step": 874
	},
	{
	"epoch": 2.4630541871921183,
	"grad_norm": 1.615399718284607,
	"learning_rate": 2.5326633165829145e-05,
	"loss": 2.1547,
	"step": 875
	},
	{
	"epoch": 2.4658691062631948,
	"grad_norm": 1.66806161403656,
	"learning_rate": 2.5125628140703518e-05,
	"loss": 2.0234,
	"step": 876
	},
	{
	"epoch": 2.4686840253342717,
	"grad_norm": 1.69028902053833,
	"learning_rate": 2.492462311557789e-05,
	"loss": 1.9796,
	"step": 877
	},
	{
	"epoch": 2.471498944405348,
	"grad_norm": 1.5616704225540161,
	"learning_rate": 2.4723618090452264e-05,
	"loss": 1.979,
	"step": 878
	},
	{
	"epoch": 2.474313863476425,
	"grad_norm": 1.9459314346313477,
	"learning_rate": 2.4522613065326637e-05,
	"loss": 2.0534,
	"step": 879
	},
	{
	"epoch": 2.477128782547502,
	"grad_norm": 1.2826955318450928,
	"learning_rate": 2.4321608040201007e-05,
	"loss": 1.9959,
	"step": 880
	},
	{
	"epoch": 2.4799437016185784,
	"grad_norm": 1.3462079763412476,
	"learning_rate": 2.4120603015075376e-05,
	"loss": 1.7451,
	"step": 881
	},
	{
	"epoch": 2.4827586206896552,
	"grad_norm": 1.7991423606872559,
	"learning_rate": 2.391959798994975e-05,
	"loss": 2.1944,
	"step": 882
	},
	{
	"epoch": 2.4855735397607317,
	"grad_norm": 1.4024704694747925,
	"learning_rate": 2.3718592964824122e-05,
	"loss": 1.9266,
	"step": 883
	},
	{
	"epoch": 2.4883884588318086,
	"grad_norm": 1.7428147792816162,
	"learning_rate": 2.351758793969849e-05,
	"loss": 1.991,
	"step": 884
	},
	{
	"epoch": 2.4912033779028855,
	"grad_norm": 1.4942609071731567,
	"learning_rate": 2.3316582914572865e-05,
	"loss": 1.7034,
	"step": 885
	},
	{
	"epoch": 2.494018296973962,
	"grad_norm": 1.6050865650177002,
	"learning_rate": 2.3115577889447238e-05,
	"loss": 1.9718,
	"step": 886
	},
	{
	"epoch": 2.496833216045039,
	"grad_norm": 1.6679102182388306,
	"learning_rate": 2.291457286432161e-05,
	"loss": 1.7724,
	"step": 887
	},
	{
	"epoch": 2.4996481351161153,
	"grad_norm": 1.4811137914657593,
	"learning_rate": 2.271356783919598e-05,
	"loss": 1.9245,
	"step": 888
	},
	{
	"epoch": 2.502463054187192,
	"grad_norm": 1.5758980512619019,
	"learning_rate": 2.2512562814070353e-05,
	"loss": 1.9094,
	"step": 889
	},
	{
	"epoch": 2.505277973258269,
	"grad_norm": 1.6346875429153442,
	"learning_rate": 2.2311557788944726e-05,
	"loss": 1.8482,
	"step": 890
	},
	{
	"epoch": 2.5080928923293455,
	"grad_norm": 1.6329705715179443,
	"learning_rate": 2.21105527638191e-05,
	"loss": 1.9275,
	"step": 891
	},
	{
	"epoch": 2.510907811400422,
	"grad_norm": 1.9860655069351196,
	"learning_rate": 2.190954773869347e-05,
	"loss": 2.0708,
	"step": 892
	},
	{
	"epoch": 2.513722730471499,
	"grad_norm": 2.201899528503418,
	"learning_rate": 2.1708542713567838e-05,
	"loss": 1.5935,
	"step": 893
	},
	{
	"epoch": 2.5165376495425757,
	"grad_norm": 1.7361814975738525,
	"learning_rate": 2.150753768844221e-05,
	"loss": 1.716,
	"step": 894
	},
	{
	"epoch": 2.519352568613652,
	"grad_norm": 1.6891804933547974,
	"learning_rate": 2.1306532663316584e-05,
	"loss": 2.0822,
	"step": 895
	},
	{
	"epoch": 2.522167487684729,
	"grad_norm": 1.5002251863479614,
	"learning_rate": 2.1105527638190957e-05,
	"loss": 1.8622,
	"step": 896
	},
	{
	"epoch": 2.5249824067558055,
	"grad_norm": 1.6818735599517822,
	"learning_rate": 2.0904522613065327e-05,
	"loss": 1.7687,
	"step": 897
	},
	{
	"epoch": 2.5277973258268824,
	"grad_norm": 1.6019138097763062,
	"learning_rate": 2.07035175879397e-05,
	"loss": 1.8481,
	"step": 898
	},
	{
	"epoch": 2.5306122448979593,
	"grad_norm": 1.517175555229187,
	"learning_rate": 2.0502512562814073e-05,
	"loss": 1.8791,
	"step": 899
	},
	{
	"epoch": 2.533427163969036,
	"grad_norm": 1.4796918630599976,
	"learning_rate": 2.0301507537688446e-05,
	"loss": 1.8031,
	"step": 900
	},
	{
	"epoch": 2.5362420830401127,
	"grad_norm": 1.5934321880340576,
	"learning_rate": 2.0100502512562815e-05,
	"loss": 1.9215,
	"step": 901
	},
	{
	"epoch": 2.539057002111189,
	"grad_norm": 1.5581581592559814,
	"learning_rate": 1.9899497487437188e-05,
	"loss": 1.9739,
	"step": 902
	},
	{
	"epoch": 2.541871921182266,
	"grad_norm": 1.6254914999008179,
	"learning_rate": 1.9698492462311558e-05,
	"loss": 2.0283,
	"step": 903
	},
	{
	"epoch": 2.544686840253343,
	"grad_norm": 1.5393351316452026,
	"learning_rate": 1.949748743718593e-05,
	"loss": 2.1366,
	"step": 904
	},
	{
	"epoch": 2.5475017593244194,
	"grad_norm": 1.369229793548584,
	"learning_rate": 1.92964824120603e-05,
	"loss": 1.8624,
	"step": 905
	},
	{
	"epoch": 2.5503166783954963,
	"grad_norm": 1.5214154720306396,
	"learning_rate": 1.9095477386934673e-05,
	"loss": 1.8812,
	"step": 906
	},
	{
	"epoch": 2.5531315974665727,
	"grad_norm": 1.7752878665924072,
	"learning_rate": 1.8894472361809046e-05,
	"loss": 1.959,
	"step": 907
	},
	{
	"epoch": 2.5559465165376496,
	"grad_norm": 1.5053621530532837,
	"learning_rate": 1.869346733668342e-05,
	"loss": 1.895,
	"step": 908
	},
	{
	"epoch": 2.5587614356087265,
	"grad_norm": 1.660584568977356,
	"learning_rate": 1.849246231155779e-05,
	"loss": 1.8461,
	"step": 909
	},
	{
	"epoch": 2.561576354679803,
	"grad_norm": 1.5243057012557983,
	"learning_rate": 1.829145728643216e-05,
	"loss": 1.7571,
	"step": 910
	},
	{
	"epoch": 2.5643912737508794,
	"grad_norm": 1.4486278295516968,
	"learning_rate": 1.8090452261306535e-05,
	"loss": 1.9905,
	"step": 911
	},
	{
	"epoch": 2.5672061928219563,
	"grad_norm": 1.5734102725982666,
	"learning_rate": 1.7889447236180908e-05,
	"loss": 1.8319,
	"step": 912
	},
	{
	"epoch": 2.570021111893033,
	"grad_norm": 1.524849534034729,
	"learning_rate": 1.7688442211055277e-05,
	"loss": 1.8446,
	"step": 913
	},
	{
	"epoch": 2.5728360309641096,
	"grad_norm": 1.4271085262298584,
	"learning_rate": 1.748743718592965e-05,
	"loss": 1.932,
	"step": 914
	},
	{
	"epoch": 2.5756509500351865,
	"grad_norm": 1.4514641761779785,
	"learning_rate": 1.728643216080402e-05,
	"loss": 1.8912,
	"step": 915
	},
	{
	"epoch": 2.578465869106263,
	"grad_norm": 1.5679149627685547,
	"learning_rate": 1.7085427135678393e-05,
	"loss": 1.8389,
	"step": 916
	},
	{
	"epoch": 2.58128078817734,
	"grad_norm": 1.628262996673584,
	"learning_rate": 1.6884422110552762e-05,
	"loss": 1.7108,
	"step": 917
	},
	{
	"epoch": 2.5840957072484168,
	"grad_norm": 1.466387152671814,
	"learning_rate": 1.6683417085427135e-05,
	"loss": 1.7445,
	"step": 918
	},
	{
	"epoch": 2.586910626319493,
	"grad_norm": 1.6148653030395508,
	"learning_rate": 1.6482412060301508e-05,
	"loss": 1.8271,
	"step": 919
	},
	{
	"epoch": 2.58972554539057,
	"grad_norm": 1.6727656126022339,
	"learning_rate": 1.628140703517588e-05,
	"loss": 1.8221,
	"step": 920
	},
	{
	"epoch": 2.5925404644616465,
	"grad_norm": 1.6274527311325073,
	"learning_rate": 1.608040201005025e-05,
	"loss": 1.9275,
	"step": 921
	},
	{
	"epoch": 2.5953553835327234,
	"grad_norm": 1.5122441053390503,
	"learning_rate": 1.5879396984924624e-05,
	"loss": 1.7531,
	"step": 922
	},
	{
	"epoch": 2.5981703026038003,
	"grad_norm": 1.5030601024627686,
	"learning_rate": 1.5678391959798997e-05,
	"loss": 1.9965,
	"step": 923
	},
	{
	"epoch": 2.600985221674877,
	"grad_norm": 1.7044039964675903,
	"learning_rate": 1.547738693467337e-05,
	"loss": 1.946,
	"step": 924
	},
	{
	"epoch": 2.6038001407459537,
	"grad_norm": 1.505894422531128,
	"learning_rate": 1.527638190954774e-05,
	"loss": 1.8394,
	"step": 925
	},
	{
	"epoch": 2.60661505981703,
	"grad_norm": 1.5264232158660889,
	"learning_rate": 1.507537688442211e-05,
	"loss": 1.8762,
	"step": 926
	},
	{
	"epoch": 2.609429978888107,
	"grad_norm": 1.512060284614563,
	"learning_rate": 1.4874371859296482e-05,
	"loss": 1.9039,
	"step": 927
	},
	{
	"epoch": 2.612244897959184,
	"grad_norm": 1.6046111583709717,
	"learning_rate": 1.4673366834170855e-05,
	"loss": 1.8746,
	"step": 928
	},
	{
	"epoch": 2.6150598170302604,
	"grad_norm": 1.7210888862609863,
	"learning_rate": 1.4472361809045226e-05,
	"loss": 1.8613,
	"step": 929
	},
	{
	"epoch": 2.6178747361013373,
	"grad_norm": 1.7266684770584106,
	"learning_rate": 1.4271356783919599e-05,
	"loss": 1.966,
	"step": 930
	},
	{
	"epoch": 2.6206896551724137,
	"grad_norm": 1.6090869903564453,
	"learning_rate": 1.407035175879397e-05,
	"loss": 1.8954,
	"step": 931
	},
	{
	"epoch": 2.6235045742434906,
	"grad_norm": 1.6034605503082275,
	"learning_rate": 1.3869346733668343e-05,
	"loss": 1.9799,
	"step": 932
	},
	{
	"epoch": 2.626319493314567,
	"grad_norm": 1.8251813650131226,
	"learning_rate": 1.3668341708542715e-05,
	"loss": 1.8237,
	"step": 933
	},
	{
	"epoch": 2.629134412385644,
	"grad_norm": 1.9154014587402344,
	"learning_rate": 1.3467336683417087e-05,
	"loss": 1.7706,
	"step": 934
	},
	{
	"epoch": 2.6319493314567204,
	"grad_norm": 1.6452045440673828,
	"learning_rate": 1.3266331658291459e-05,
	"loss": 2.212,
	"step": 935
	},
	{
	"epoch": 2.6347642505277973,
	"grad_norm": 1.5571489334106445,
	"learning_rate": 1.306532663316583e-05,
	"loss": 1.7057,
	"step": 936
	},
	{
	"epoch": 2.637579169598874,
	"grad_norm": 1.5120084285736084,
	"learning_rate": 1.2864321608040203e-05,
	"loss": 1.958,
	"step": 937
	},
	{
	"epoch": 2.6403940886699506,
	"grad_norm": 1.4263646602630615,
	"learning_rate": 1.2663316582914573e-05,
	"loss": 2.2181,
	"step": 938
	},
	{
	"epoch": 2.6432090077410275,
	"grad_norm": 1.7924742698669434,
	"learning_rate": 1.2462311557788946e-05,
	"loss": 2.0682,
	"step": 939
	},
	{
	"epoch": 2.646023926812104,
	"grad_norm": 1.568236231803894,
	"learning_rate": 1.2261306532663318e-05,
	"loss": 1.9372,
	"step": 940
	},
	{
	"epoch": 2.648838845883181,
	"grad_norm": 1.826130986213684,
	"learning_rate": 1.2060301507537688e-05,
	"loss": 1.8557,
	"step": 941
	},
	{
	"epoch": 2.6516537649542578,
	"grad_norm": 1.4434233903884888,
	"learning_rate": 1.1859296482412061e-05,
	"loss": 1.7765,
	"step": 942
	},
	{
	"epoch": 2.654468684025334,
	"grad_norm": 1.4508579969406128,
	"learning_rate": 1.1658291457286432e-05,
	"loss": 1.9047,
	"step": 943
	},
	{
	"epoch": 2.657283603096411,
	"grad_norm": 1.6055655479431152,
	"learning_rate": 1.1457286432160805e-05,
	"loss": 1.9673,
	"step": 944
	},
	{
	"epoch": 2.6600985221674875,
	"grad_norm": 1.7329376935958862,
	"learning_rate": 1.1256281407035177e-05,
	"loss": 1.9288,
	"step": 945
	},
	{
	"epoch": 2.6629134412385644,
	"grad_norm": 2.1003239154815674,
	"learning_rate": 1.105527638190955e-05,
	"loss": 1.9618,
	"step": 946
	},
	{
	"epoch": 2.6657283603096413,
	"grad_norm": 1.5641478300094604,
	"learning_rate": 1.0854271356783919e-05,
	"loss": 1.9631,
	"step": 947
	},
	{
	"epoch": 2.668543279380718,
	"grad_norm": 1.5357648134231567,
	"learning_rate": 1.0653266331658292e-05,
	"loss": 2.0265,
	"step": 948
	},
	{
	"epoch": 2.6713581984517947,
	"grad_norm": 1.3177186250686646,
	"learning_rate": 1.0452261306532663e-05,
	"loss": 1.878,
	"step": 949
	},
	{
	"epoch": 2.674173117522871,
	"grad_norm": 1.499448299407959,
	"learning_rate": 1.0251256281407036e-05,
	"loss": 1.8745,
	"step": 950
	},
	{
	"epoch": 2.676988036593948,
	"grad_norm": 1.6168919801712036,
	"learning_rate": 1.0050251256281408e-05,
	"loss": 2.0069,
	"step": 951
	},
	{
	"epoch": 2.6798029556650245,
	"grad_norm": 1.6026453971862793,
	"learning_rate": 9.849246231155779e-06,
	"loss": 1.7609,
	"step": 952
	},
	{
	"epoch": 2.6826178747361014,
	"grad_norm": 1.796265721321106,
	"learning_rate": 9.64824120603015e-06,
	"loss": 2.0887,
	"step": 953
	},
	{
	"epoch": 2.685432793807178,
	"grad_norm": 1.5531530380249023,
	"learning_rate": 9.447236180904523e-06,
	"loss": 1.8629,
	"step": 954
	},
	{
	"epoch": 2.6882477128782547,
	"grad_norm": 1.753846287727356,
	"learning_rate": 9.246231155778894e-06,
	"loss": 1.8199,
	"step": 955
	},
	{
	"epoch": 2.6910626319493316,
	"grad_norm": 1.5960685014724731,
	"learning_rate": 9.045226130653267e-06,
	"loss": 2.0348,
	"step": 956
	},
	{
	"epoch": 2.693877551020408,
	"grad_norm": 1.6763644218444824,
	"learning_rate": 8.844221105527639e-06,
	"loss": 1.8563,
	"step": 957
	},
	{
	"epoch": 2.696692470091485,
	"grad_norm": 1.4198554754257202,
	"learning_rate": 8.64321608040201e-06,
	"loss": 1.7267,
	"step": 958
	},
	{
	"epoch": 2.6995073891625614,
	"grad_norm": 1.6178436279296875,
	"learning_rate": 8.442211055276381e-06,
	"loss": 2.0219,
	"step": 959
	},
	{
	"epoch": 2.7023223082336383,
	"grad_norm": 1.5461405515670776,
	"learning_rate": 8.241206030150754e-06,
	"loss": 1.8272,
	"step": 960
	},
	{
	"epoch": 2.705137227304715,
	"grad_norm": 1.7036101818084717,
	"learning_rate": 8.040201005025125e-06,
	"loss": 1.9612,
	"step": 961
	},
	{
	"epoch": 2.7079521463757916,
	"grad_norm": 1.540170669555664,
	"learning_rate": 7.839195979899498e-06,
	"loss": 1.8085,
	"step": 962
	},
	{
	"epoch": 2.7107670654468685,
	"grad_norm": 1.4888712167739868,
	"learning_rate": 7.63819095477387e-06,
	"loss": 1.9548,
	"step": 963
	},
	{
	"epoch": 2.713581984517945,
	"grad_norm": 1.6294909715652466,
	"learning_rate": 7.437185929648241e-06,
	"loss": 1.9253,
	"step": 964
	},
	{
	"epoch": 2.716396903589022,
	"grad_norm": 1.50801682472229,
	"learning_rate": 7.236180904522613e-06,
	"loss": 2.0657,
	"step": 965
	},
	{
	"epoch": 2.7192118226600988,
	"grad_norm": 1.7348463535308838,
	"learning_rate": 7.035175879396985e-06,
	"loss": 1.9505,
	"step": 966
	},
	{
	"epoch": 2.722026741731175,
	"grad_norm": 1.63502836227417,
	"learning_rate": 6.834170854271357e-06,
	"loss": 1.9629,
	"step": 967
	},
	{
	"epoch": 2.724841660802252,
	"grad_norm": 1.6600550413131714,
	"learning_rate": 6.633165829145729e-06,
	"loss": 1.8303,
	"step": 968
	},
	{
	"epoch": 2.7276565798733285,
	"grad_norm": 2.031532049179077,
	"learning_rate": 6.4321608040201015e-06,
	"loss": 1.7822,
	"step": 969
	},
	{
	"epoch": 2.7304714989444054,
	"grad_norm": 1.804951548576355,
	"learning_rate": 6.231155778894473e-06,
	"loss": 2.1692,
	"step": 970
	},
	{
	"epoch": 2.733286418015482,
	"grad_norm": 1.4661808013916016,
	"learning_rate": 6.030150753768844e-06,
	"loss": 1.964,
	"step": 971
	},
	{
	"epoch": 2.736101337086559,
	"grad_norm": 2.2973814010620117,
	"learning_rate": 5.829145728643216e-06,
	"loss": 1.7313,
	"step": 972
	},
	{
	"epoch": 2.7389162561576352,
	"grad_norm": 1.6403539180755615,
	"learning_rate": 5.628140703517588e-06,
	"loss": 1.9302,
	"step": 973
	},
	{
	"epoch": 2.741731175228712,
	"grad_norm": 1.5790972709655762,
	"learning_rate": 5.4271356783919595e-06,
	"loss": 1.6091,
	"step": 974
	},
	{
	"epoch": 2.744546094299789,
	"grad_norm": 1.765395998954773,
	"learning_rate": 5.226130653266332e-06,
	"loss": 2.0181,
	"step": 975
	},
	{
	"epoch": 2.7473610133708655,
	"grad_norm": 1.4669338464736938,
	"learning_rate": 5.025125628140704e-06,
	"loss": 1.8627,
	"step": 976
	},
	{
	"epoch": 2.7501759324419424,
	"grad_norm": 1.678789734840393,
	"learning_rate": 4.824120603015075e-06,
	"loss": 2.0305,
	"step": 977
	},
	{
	"epoch": 2.752990851513019,
	"grad_norm": 1.5381121635437012,
	"learning_rate": 4.623115577889447e-06,
	"loss": 1.8813,
	"step": 978
	},
	{
	"epoch": 2.7558057705840957,
	"grad_norm": 1.5611159801483154,
	"learning_rate": 4.422110552763819e-06,
	"loss": 1.7127,
	"step": 979
	},
	{
	"epoch": 2.7586206896551726,
	"grad_norm": 1.6193746328353882,
	"learning_rate": 4.2211055276381906e-06,
	"loss": 1.9128,
	"step": 980
	},
	{
	"epoch": 2.761435608726249,
	"grad_norm": 1.4393192529678345,
	"learning_rate": 4.020100502512563e-06,
	"loss": 1.5317,
	"step": 981
	},
	{
	"epoch": 2.764250527797326,
	"grad_norm": 1.6281440258026123,
	"learning_rate": 3.819095477386935e-06,
	"loss": 1.9902,
	"step": 982
	},
	{
	"epoch": 2.7670654468684024,
	"grad_norm": 1.6578021049499512,
	"learning_rate": 3.6180904522613065e-06,
	"loss": 2.2476,
	"step": 983
	},
	{
	"epoch": 2.7698803659394793,
	"grad_norm": 1.6723147630691528,
	"learning_rate": 3.4170854271356786e-06,
	"loss": 1.9034,
	"step": 984
	},
	{
	"epoch": 2.772695285010556,
	"grad_norm": 1.6200298070907593,
	"learning_rate": 3.2160804020100507e-06,
	"loss": 2.129,
	"step": 985
	},
	{
	"epoch": 2.7755102040816326,
	"grad_norm": 1.6610552072525024,
	"learning_rate": 3.015075376884422e-06,
	"loss": 1.9335,
	"step": 986
	},
	{
	"epoch": 2.7783251231527095,
	"grad_norm": 1.7721863985061646,
	"learning_rate": 2.814070351758794e-06,
	"loss": 1.888,
	"step": 987
	},
	{
	"epoch": 2.781140042223786,
	"grad_norm": 1.949180006980896,
	"learning_rate": 2.613065326633166e-06,
	"loss": 2.0525,
	"step": 988
	},
	{
	"epoch": 2.783954961294863,
	"grad_norm": 1.5390926599502563,
	"learning_rate": 2.4120603015075375e-06,
	"loss": 1.981,
	"step": 989
	},
	{
	"epoch": 2.7867698803659398,
	"grad_norm": 1.7333327531814575,
	"learning_rate": 2.2110552763819096e-06,
	"loss": 1.7485,
	"step": 990
	},
	{
	"epoch": 2.789584799437016,
	"grad_norm": 1.438262939453125,
	"learning_rate": 2.0100502512562813e-06,
	"loss": 1.8643,
	"step": 991
	},
	{
	"epoch": 2.7923997185080927,
	"grad_norm": 1.7008702754974365,
	"learning_rate": 1.8090452261306533e-06,
	"loss": 2.0458,
	"step": 992
	},
	{
	"epoch": 2.7952146375791695,
	"grad_norm": 1.8295824527740479,
	"learning_rate": 1.6080402010050254e-06,
	"loss": 2.0128,
	"step": 993
	},
	{
	"epoch": 2.7980295566502464,
	"grad_norm": 1.8252149820327759,
	"learning_rate": 1.407035175879397e-06,
	"loss": 1.724,
	"step": 994
	},
	{
	"epoch": 2.800844475721323,
	"grad_norm": 1.612557291984558,
	"learning_rate": 1.2060301507537688e-06,
	"loss": 1.9216,
	"step": 995
	},
	{
	"epoch": 2.8036593947924,
	"grad_norm": 1.486989974975586,
	"learning_rate": 1.0050251256281407e-06,
	"loss": 1.6633,
	"step": 996
	},
	{
	"epoch": 2.8064743138634762,
	"grad_norm": 1.5488345623016357,
	"learning_rate": 8.040201005025127e-07,
	"loss": 1.8513,
	"step": 997
	},
	{
	"epoch": 2.809289232934553,
	"grad_norm": 1.741253137588501,
	"learning_rate": 6.030150753768844e-07,
	"loss": 1.7444,
	"step": 998
	},
	{
	"epoch": 2.81210415200563,
	"grad_norm": 1.6252341270446777,
	"learning_rate": 4.0201005025125634e-07,
	"loss": 1.9627,
	"step": 999
	},
	{
	"epoch": 2.8149190710767065,
	"grad_norm": 1.5533764362335205,
	"learning_rate": 2.0100502512562817e-07,
	"loss": 1.8062,
	"step": 1000
	}
	],
	"logging_steps": 1,
	"max_steps": 1000,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 3,
	"save_steps": 500,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 4.447791767273472e+16,
	"train_batch_size": 2,
	"trial_name": null,
	"trial_params": null
	}