|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.9879518072289155, |
|
"eval_steps": 500, |
|
"global_step": 664, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0030120481927710845, |
|
"grad_norm": 0.4040583074092865, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 1.684, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.006024096385542169, |
|
"grad_norm": 0.4095500409603119, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 1.6744, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.009036144578313253, |
|
"grad_norm": 0.43334975838661194, |
|
"learning_rate": 3e-06, |
|
"loss": 1.5956, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.012048192771084338, |
|
"grad_norm": 0.39737147092819214, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 1.6404, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.015060240963855422, |
|
"grad_norm": 0.41804248094558716, |
|
"learning_rate": 5e-06, |
|
"loss": 1.6398, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.018072289156626505, |
|
"grad_norm": 0.41640806198120117, |
|
"learning_rate": 6e-06, |
|
"loss": 1.6439, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.02108433734939759, |
|
"grad_norm": 0.41058269143104553, |
|
"learning_rate": 7e-06, |
|
"loss": 1.595, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.024096385542168676, |
|
"grad_norm": 0.3926224708557129, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 1.6345, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.02710843373493976, |
|
"grad_norm": 0.4289781451225281, |
|
"learning_rate": 9e-06, |
|
"loss": 1.644, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.030120481927710843, |
|
"grad_norm": 0.40193450450897217, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5528, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03313253012048193, |
|
"grad_norm": 0.4374159574508667, |
|
"learning_rate": 9.999942312273667e-06, |
|
"loss": 1.6132, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.03614457831325301, |
|
"grad_norm": 0.42067164182662964, |
|
"learning_rate": 9.999769250425817e-06, |
|
"loss": 1.5811, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.0391566265060241, |
|
"grad_norm": 0.45214423537254333, |
|
"learning_rate": 9.999480818449868e-06, |
|
"loss": 1.5985, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.04216867469879518, |
|
"grad_norm": 0.42575493454933167, |
|
"learning_rate": 9.999077023001411e-06, |
|
"loss": 1.6494, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.045180722891566265, |
|
"grad_norm": 0.4473470151424408, |
|
"learning_rate": 9.998557873398066e-06, |
|
"loss": 1.6773, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.04819277108433735, |
|
"grad_norm": 0.4473958909511566, |
|
"learning_rate": 9.997923381619257e-06, |
|
"loss": 1.6342, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.05120481927710843, |
|
"grad_norm": 0.46099603176116943, |
|
"learning_rate": 9.997173562305937e-06, |
|
"loss": 1.5608, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.05421686746987952, |
|
"grad_norm": 0.44749438762664795, |
|
"learning_rate": 9.996308432760257e-06, |
|
"loss": 1.6081, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.0572289156626506, |
|
"grad_norm": 0.46720796823501587, |
|
"learning_rate": 9.995328012945158e-06, |
|
"loss": 1.595, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.060240963855421686, |
|
"grad_norm": 0.436519056558609, |
|
"learning_rate": 9.994232325483917e-06, |
|
"loss": 1.6167, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06325301204819277, |
|
"grad_norm": 0.42265141010284424, |
|
"learning_rate": 9.99302139565962e-06, |
|
"loss": 1.6195, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.06626506024096386, |
|
"grad_norm": 0.3948360085487366, |
|
"learning_rate": 9.991695251414584e-06, |
|
"loss": 1.5915, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.06927710843373494, |
|
"grad_norm": 0.4320020377635956, |
|
"learning_rate": 9.990253923349706e-06, |
|
"loss": 1.5906, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.07228915662650602, |
|
"grad_norm": 0.44574347138404846, |
|
"learning_rate": 9.988697444723763e-06, |
|
"loss": 1.5712, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.07530120481927711, |
|
"grad_norm": 0.41239818930625916, |
|
"learning_rate": 9.98702585145264e-06, |
|
"loss": 1.5752, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.0783132530120482, |
|
"grad_norm": 0.42800942063331604, |
|
"learning_rate": 9.9852391821085e-06, |
|
"loss": 1.5531, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.08132530120481928, |
|
"grad_norm": 0.41428813338279724, |
|
"learning_rate": 9.983337477918904e-06, |
|
"loss": 1.5882, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.08433734939759036, |
|
"grad_norm": 0.4180893898010254, |
|
"learning_rate": 9.981320782765847e-06, |
|
"loss": 1.5694, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.08734939759036145, |
|
"grad_norm": 0.4115102291107178, |
|
"learning_rate": 9.97918914318475e-06, |
|
"loss": 1.5347, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.09036144578313253, |
|
"grad_norm": 0.42680180072784424, |
|
"learning_rate": 9.976942608363394e-06, |
|
"loss": 1.5275, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.09337349397590361, |
|
"grad_norm": 0.39122140407562256, |
|
"learning_rate": 9.97458123014077e-06, |
|
"loss": 1.4961, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.0963855421686747, |
|
"grad_norm": 0.39151236414909363, |
|
"learning_rate": 9.972105063005895e-06, |
|
"loss": 1.5359, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.09939759036144578, |
|
"grad_norm": 0.38214412331581116, |
|
"learning_rate": 9.969514164096548e-06, |
|
"loss": 1.5103, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.10240963855421686, |
|
"grad_norm": 0.39758872985839844, |
|
"learning_rate": 9.966808593197959e-06, |
|
"loss": 1.4839, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.10542168674698796, |
|
"grad_norm": 0.3730682134628296, |
|
"learning_rate": 9.96398841274142e-06, |
|
"loss": 1.4908, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.10843373493975904, |
|
"grad_norm": 0.367106556892395, |
|
"learning_rate": 9.96105368780285e-06, |
|
"loss": 1.4372, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.11144578313253012, |
|
"grad_norm": 0.3657713532447815, |
|
"learning_rate": 9.958004486101293e-06, |
|
"loss": 1.4791, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.1144578313253012, |
|
"grad_norm": 0.3604431450366974, |
|
"learning_rate": 9.954840877997356e-06, |
|
"loss": 1.443, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.11746987951807229, |
|
"grad_norm": 0.3567802906036377, |
|
"learning_rate": 9.95156293649158e-06, |
|
"loss": 1.4589, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.12048192771084337, |
|
"grad_norm": 0.3510221838951111, |
|
"learning_rate": 9.948170737222763e-06, |
|
"loss": 1.378, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.12349397590361445, |
|
"grad_norm": 0.35590696334838867, |
|
"learning_rate": 9.94466435846621e-06, |
|
"loss": 1.4064, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.12650602409638553, |
|
"grad_norm": 0.3688894510269165, |
|
"learning_rate": 9.941043881131928e-06, |
|
"loss": 1.3728, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.12951807228915663, |
|
"grad_norm": 0.35922420024871826, |
|
"learning_rate": 9.93730938876276e-06, |
|
"loss": 1.3809, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.13253012048192772, |
|
"grad_norm": 0.34513840079307556, |
|
"learning_rate": 9.933460967532454e-06, |
|
"loss": 1.4276, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.1355421686746988, |
|
"grad_norm": 0.34767019748687744, |
|
"learning_rate": 9.929498706243681e-06, |
|
"loss": 1.3542, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.13855421686746988, |
|
"grad_norm": 0.3442816138267517, |
|
"learning_rate": 9.925422696325976e-06, |
|
"loss": 1.3512, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.14156626506024098, |
|
"grad_norm": 0.36368539929389954, |
|
"learning_rate": 9.921233031833639e-06, |
|
"loss": 1.3736, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.14457831325301204, |
|
"grad_norm": 0.33587586879730225, |
|
"learning_rate": 9.916929809443555e-06, |
|
"loss": 1.3906, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.14759036144578314, |
|
"grad_norm": 0.34448426961898804, |
|
"learning_rate": 9.912513128452974e-06, |
|
"loss": 1.362, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.15060240963855423, |
|
"grad_norm": 0.3427204489707947, |
|
"learning_rate": 9.907983090777206e-06, |
|
"loss": 1.3292, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.1536144578313253, |
|
"grad_norm": 0.38191652297973633, |
|
"learning_rate": 9.903339800947284e-06, |
|
"loss": 1.3735, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.1566265060240964, |
|
"grad_norm": 0.3497113287448883, |
|
"learning_rate": 9.898583366107539e-06, |
|
"loss": 1.3776, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.15963855421686746, |
|
"grad_norm": 0.31867995858192444, |
|
"learning_rate": 9.893713896013134e-06, |
|
"loss": 1.3058, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.16265060240963855, |
|
"grad_norm": 0.3136507272720337, |
|
"learning_rate": 9.888731503027535e-06, |
|
"loss": 1.3463, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.16566265060240964, |
|
"grad_norm": 0.3268043100833893, |
|
"learning_rate": 9.883636302119911e-06, |
|
"loss": 1.3189, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.1686746987951807, |
|
"grad_norm": 0.3265782296657562, |
|
"learning_rate": 9.878428410862484e-06, |
|
"loss": 1.2983, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.1716867469879518, |
|
"grad_norm": 0.30159902572631836, |
|
"learning_rate": 9.873107949427815e-06, |
|
"loss": 1.3545, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.1746987951807229, |
|
"grad_norm": 0.3109259307384491, |
|
"learning_rate": 9.867675040586035e-06, |
|
"loss": 1.3894, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.17771084337349397, |
|
"grad_norm": 0.30744504928588867, |
|
"learning_rate": 9.862129809702006e-06, |
|
"loss": 1.3777, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.18072289156626506, |
|
"grad_norm": 0.3043947219848633, |
|
"learning_rate": 9.856472384732432e-06, |
|
"loss": 1.354, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.18373493975903615, |
|
"grad_norm": 0.3052617013454437, |
|
"learning_rate": 9.850702896222908e-06, |
|
"loss": 1.3074, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.18674698795180722, |
|
"grad_norm": 0.3007952570915222, |
|
"learning_rate": 9.844821477304904e-06, |
|
"loss": 1.2124, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.1897590361445783, |
|
"grad_norm": 0.2832448482513428, |
|
"learning_rate": 9.838828263692693e-06, |
|
"loss": 1.2841, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.1927710843373494, |
|
"grad_norm": 0.27628499269485474, |
|
"learning_rate": 9.832723393680222e-06, |
|
"loss": 1.2425, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.19578313253012047, |
|
"grad_norm": 0.2700969874858856, |
|
"learning_rate": 9.826507008137919e-06, |
|
"loss": 1.2543, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.19879518072289157, |
|
"grad_norm": 0.2948736548423767, |
|
"learning_rate": 9.820179250509442e-06, |
|
"loss": 1.2708, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.20180722891566266, |
|
"grad_norm": 0.29045990109443665, |
|
"learning_rate": 9.813740266808375e-06, |
|
"loss": 1.3043, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.20481927710843373, |
|
"grad_norm": 0.27807915210723877, |
|
"learning_rate": 9.807190205614847e-06, |
|
"loss": 1.206, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.20783132530120482, |
|
"grad_norm": 0.267451673746109, |
|
"learning_rate": 9.800529218072112e-06, |
|
"loss": 1.2255, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.21084337349397592, |
|
"grad_norm": 0.2782948613166809, |
|
"learning_rate": 9.793757457883062e-06, |
|
"loss": 1.2236, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.21385542168674698, |
|
"grad_norm": 0.276692271232605, |
|
"learning_rate": 9.786875081306677e-06, |
|
"loss": 1.2588, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.21686746987951808, |
|
"grad_norm": 0.2745719254016876, |
|
"learning_rate": 9.779882247154419e-06, |
|
"loss": 1.215, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.21987951807228914, |
|
"grad_norm": 0.2591319978237152, |
|
"learning_rate": 9.772779116786568e-06, |
|
"loss": 1.2833, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.22289156626506024, |
|
"grad_norm": 0.27248722314834595, |
|
"learning_rate": 9.765565854108503e-06, |
|
"loss": 1.2575, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.22590361445783133, |
|
"grad_norm": 0.273562490940094, |
|
"learning_rate": 9.758242625566912e-06, |
|
"loss": 1.2134, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.2289156626506024, |
|
"grad_norm": 0.29504141211509705, |
|
"learning_rate": 9.750809600145955e-06, |
|
"loss": 1.2222, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.2319277108433735, |
|
"grad_norm": 0.2649330496788025, |
|
"learning_rate": 9.743266949363368e-06, |
|
"loss": 1.1992, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.23493975903614459, |
|
"grad_norm": 0.26566869020462036, |
|
"learning_rate": 9.735614847266502e-06, |
|
"loss": 1.2432, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.23795180722891565, |
|
"grad_norm": 0.25488752126693726, |
|
"learning_rate": 9.727853470428301e-06, |
|
"loss": 1.1646, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.24096385542168675, |
|
"grad_norm": 0.280771404504776, |
|
"learning_rate": 9.719982997943245e-06, |
|
"loss": 1.2075, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.24397590361445784, |
|
"grad_norm": 0.3047221899032593, |
|
"learning_rate": 9.712003611423194e-06, |
|
"loss": 1.2378, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.2469879518072289, |
|
"grad_norm": 0.2466488480567932, |
|
"learning_rate": 9.703915494993215e-06, |
|
"loss": 1.2169, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.25351837277412415, |
|
"learning_rate": 9.695718835287328e-06, |
|
"loss": 1.1995, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.25301204819277107, |
|
"grad_norm": 0.26343056559562683, |
|
"learning_rate": 9.6874138214442e-06, |
|
"loss": 1.2329, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.2560240963855422, |
|
"grad_norm": 0.2685820460319519, |
|
"learning_rate": 9.679000645102771e-06, |
|
"loss": 1.2054, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.25903614457831325, |
|
"grad_norm": 0.2713760733604431, |
|
"learning_rate": 9.670479500397854e-06, |
|
"loss": 1.2417, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.2620481927710843, |
|
"grad_norm": 0.2718029320240021, |
|
"learning_rate": 9.66185058395563e-06, |
|
"loss": 1.2587, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.26506024096385544, |
|
"grad_norm": 0.26352736353874207, |
|
"learning_rate": 9.653114094889128e-06, |
|
"loss": 1.2541, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.2680722891566265, |
|
"grad_norm": 0.2755894362926483, |
|
"learning_rate": 9.644270234793625e-06, |
|
"loss": 1.2091, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.2710843373493976, |
|
"grad_norm": 0.25112101435661316, |
|
"learning_rate": 9.63531920774199e-06, |
|
"loss": 1.2425, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.2740963855421687, |
|
"grad_norm": 0.26138511300086975, |
|
"learning_rate": 9.62626122027999e-06, |
|
"loss": 1.2252, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.27710843373493976, |
|
"grad_norm": 0.2561100721359253, |
|
"learning_rate": 9.617096481421498e-06, |
|
"loss": 1.2206, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.28012048192771083, |
|
"grad_norm": 0.26238083839416504, |
|
"learning_rate": 9.607825202643696e-06, |
|
"loss": 1.1859, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.28313253012048195, |
|
"grad_norm": 0.27538710832595825, |
|
"learning_rate": 9.598447597882181e-06, |
|
"loss": 1.2062, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.286144578313253, |
|
"grad_norm": 0.25858640670776367, |
|
"learning_rate": 9.588963883526033e-06, |
|
"loss": 1.2354, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.2891566265060241, |
|
"grad_norm": 0.2807197570800781, |
|
"learning_rate": 9.579374278412819e-06, |
|
"loss": 1.2433, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.2921686746987952, |
|
"grad_norm": 0.28452298045158386, |
|
"learning_rate": 9.569679003823542e-06, |
|
"loss": 1.2191, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.29518072289156627, |
|
"grad_norm": 0.25671708583831787, |
|
"learning_rate": 9.559878283477546e-06, |
|
"loss": 1.2095, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.29819277108433734, |
|
"grad_norm": 0.25289785861968994, |
|
"learning_rate": 9.549972343527336e-06, |
|
"loss": 1.2033, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.30120481927710846, |
|
"grad_norm": 0.27585139870643616, |
|
"learning_rate": 9.539961412553375e-06, |
|
"loss": 1.149, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.3042168674698795, |
|
"grad_norm": 0.2492348849773407, |
|
"learning_rate": 9.529845721558802e-06, |
|
"loss": 1.1271, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.3072289156626506, |
|
"grad_norm": 0.254409521818161, |
|
"learning_rate": 9.5196255039641e-06, |
|
"loss": 1.2528, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.3102409638554217, |
|
"grad_norm": 0.3059585690498352, |
|
"learning_rate": 9.50930099560172e-06, |
|
"loss": 1.2058, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.3132530120481928, |
|
"grad_norm": 0.2655487656593323, |
|
"learning_rate": 9.498872434710624e-06, |
|
"loss": 1.1311, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.31626506024096385, |
|
"grad_norm": 0.271914005279541, |
|
"learning_rate": 9.488340061930797e-06, |
|
"loss": 1.1831, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.3192771084337349, |
|
"grad_norm": 0.29053163528442383, |
|
"learning_rate": 9.477704120297698e-06, |
|
"loss": 1.1585, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.32228915662650603, |
|
"grad_norm": 0.26874732971191406, |
|
"learning_rate": 9.46696485523664e-06, |
|
"loss": 1.2012, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.3253012048192771, |
|
"grad_norm": 0.25582486391067505, |
|
"learning_rate": 9.45612251455714e-06, |
|
"loss": 1.1397, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.32831325301204817, |
|
"grad_norm": 0.26407524943351746, |
|
"learning_rate": 9.445177348447187e-06, |
|
"loss": 1.1887, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.3313253012048193, |
|
"grad_norm": 0.2506115734577179, |
|
"learning_rate": 9.434129609467484e-06, |
|
"loss": 1.2219, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.33433734939759036, |
|
"grad_norm": 0.2572745084762573, |
|
"learning_rate": 9.422979552545604e-06, |
|
"loss": 1.1362, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.3373493975903614, |
|
"grad_norm": 0.28277891874313354, |
|
"learning_rate": 9.411727434970121e-06, |
|
"loss": 1.1409, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.34036144578313254, |
|
"grad_norm": 0.30223405361175537, |
|
"learning_rate": 9.400373516384671e-06, |
|
"loss": 1.1546, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.3433734939759036, |
|
"grad_norm": 0.2697835862636566, |
|
"learning_rate": 9.388918058781947e-06, |
|
"loss": 1.2384, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.3463855421686747, |
|
"grad_norm": 0.2695978283882141, |
|
"learning_rate": 9.377361326497673e-06, |
|
"loss": 1.1364, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.3493975903614458, |
|
"grad_norm": 0.25360485911369324, |
|
"learning_rate": 9.365703586204495e-06, |
|
"loss": 1.178, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.35240963855421686, |
|
"grad_norm": 0.27773186564445496, |
|
"learning_rate": 9.353945106905822e-06, |
|
"loss": 1.1682, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.35542168674698793, |
|
"grad_norm": 0.27416011691093445, |
|
"learning_rate": 9.342086159929629e-06, |
|
"loss": 1.1477, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.35843373493975905, |
|
"grad_norm": 0.27577441930770874, |
|
"learning_rate": 9.330127018922195e-06, |
|
"loss": 1.1497, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.3614457831325301, |
|
"grad_norm": 0.2513567805290222, |
|
"learning_rate": 9.318067959841776e-06, |
|
"loss": 1.1339, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.3644578313253012, |
|
"grad_norm": 0.27182286977767944, |
|
"learning_rate": 9.305909260952255e-06, |
|
"loss": 1.1362, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.3674698795180723, |
|
"grad_norm": 0.26553475856781006, |
|
"learning_rate": 9.29365120281671e-06, |
|
"loss": 1.1576, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.3704819277108434, |
|
"grad_norm": 0.25539693236351013, |
|
"learning_rate": 9.28129406829094e-06, |
|
"loss": 1.1384, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.37349397590361444, |
|
"grad_norm": 0.2685853838920593, |
|
"learning_rate": 9.268838142516943e-06, |
|
"loss": 1.1842, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.37650602409638556, |
|
"grad_norm": 0.2612561881542206, |
|
"learning_rate": 9.256283712916337e-06, |
|
"loss": 1.1578, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.3795180722891566, |
|
"grad_norm": 0.26739126443862915, |
|
"learning_rate": 9.24363106918372e-06, |
|
"loss": 1.1477, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.3825301204819277, |
|
"grad_norm": 0.2942097783088684, |
|
"learning_rate": 9.230880503279991e-06, |
|
"loss": 1.1747, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.3855421686746988, |
|
"grad_norm": 0.2746829688549042, |
|
"learning_rate": 9.218032309425613e-06, |
|
"loss": 1.1651, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.3885542168674699, |
|
"grad_norm": 0.27550533413887024, |
|
"learning_rate": 9.205086784093823e-06, |
|
"loss": 1.1361, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.39156626506024095, |
|
"grad_norm": 0.31240707635879517, |
|
"learning_rate": 9.19204422600379e-06, |
|
"loss": 1.1933, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.39457831325301207, |
|
"grad_norm": 0.24383339285850525, |
|
"learning_rate": 9.178904936113719e-06, |
|
"loss": 1.1739, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.39759036144578314, |
|
"grad_norm": 0.3256170153617859, |
|
"learning_rate": 9.165669217613919e-06, |
|
"loss": 1.1709, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.4006024096385542, |
|
"grad_norm": 0.2967703938484192, |
|
"learning_rate": 9.152337375919792e-06, |
|
"loss": 1.1379, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.4036144578313253, |
|
"grad_norm": 0.2854821979999542, |
|
"learning_rate": 9.138909718664788e-06, |
|
"loss": 1.1741, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.4066265060240964, |
|
"grad_norm": 0.33066266775131226, |
|
"learning_rate": 9.125386555693316e-06, |
|
"loss": 1.1779, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.40963855421686746, |
|
"grad_norm": 0.27965742349624634, |
|
"learning_rate": 9.111768199053588e-06, |
|
"loss": 1.1717, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.4126506024096386, |
|
"grad_norm": 0.29010990262031555, |
|
"learning_rate": 9.098054962990415e-06, |
|
"loss": 1.1526, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.41566265060240964, |
|
"grad_norm": 0.2726079523563385, |
|
"learning_rate": 9.084247163937959e-06, |
|
"loss": 1.1136, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.4186746987951807, |
|
"grad_norm": 0.2590181231498718, |
|
"learning_rate": 9.070345120512436e-06, |
|
"loss": 1.1267, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.42168674698795183, |
|
"grad_norm": 0.291429340839386, |
|
"learning_rate": 9.056349153504753e-06, |
|
"loss": 1.1429, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.4246987951807229, |
|
"grad_norm": 0.2864663004875183, |
|
"learning_rate": 9.042259585873119e-06, |
|
"loss": 1.1161, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.42771084337349397, |
|
"grad_norm": 0.29812097549438477, |
|
"learning_rate": 9.028076742735583e-06, |
|
"loss": 1.157, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.4307228915662651, |
|
"grad_norm": 0.29142752289772034, |
|
"learning_rate": 9.013800951362532e-06, |
|
"loss": 1.0919, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.43373493975903615, |
|
"grad_norm": 0.2857559621334076, |
|
"learning_rate": 8.999432541169145e-06, |
|
"loss": 1.1391, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.4367469879518072, |
|
"grad_norm": 0.29825499653816223, |
|
"learning_rate": 8.984971843707787e-06, |
|
"loss": 1.1589, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.4397590361445783, |
|
"grad_norm": 0.26081719994544983, |
|
"learning_rate": 8.970419192660366e-06, |
|
"loss": 1.1411, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.4427710843373494, |
|
"grad_norm": 0.3022754490375519, |
|
"learning_rate": 8.955774923830618e-06, |
|
"loss": 1.1528, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.4457831325301205, |
|
"grad_norm": 0.28860539197921753, |
|
"learning_rate": 8.94103937513637e-06, |
|
"loss": 1.1784, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.44879518072289154, |
|
"grad_norm": 0.25238746404647827, |
|
"learning_rate": 8.92621288660175e-06, |
|
"loss": 1.1447, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.45180722891566266, |
|
"grad_norm": 0.2728082239627838, |
|
"learning_rate": 8.911295800349316e-06, |
|
"loss": 1.0984, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.45481927710843373, |
|
"grad_norm": 0.26758912205696106, |
|
"learning_rate": 8.896288460592187e-06, |
|
"loss": 1.0918, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.4578313253012048, |
|
"grad_norm": 0.27047985792160034, |
|
"learning_rate": 8.881191213626084e-06, |
|
"loss": 1.1279, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.4608433734939759, |
|
"grad_norm": 0.309121698141098, |
|
"learning_rate": 8.86600440782135e-06, |
|
"loss": 1.1366, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.463855421686747, |
|
"grad_norm": 0.2778535485267639, |
|
"learning_rate": 8.850728393614903e-06, |
|
"loss": 1.1423, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.46686746987951805, |
|
"grad_norm": 0.2797792851924896, |
|
"learning_rate": 8.835363523502154e-06, |
|
"loss": 1.1664, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.46987951807228917, |
|
"grad_norm": 0.3094732463359833, |
|
"learning_rate": 8.819910152028872e-06, |
|
"loss": 1.1295, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.47289156626506024, |
|
"grad_norm": 0.2910013496875763, |
|
"learning_rate": 8.804368635783002e-06, |
|
"loss": 1.0793, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.4759036144578313, |
|
"grad_norm": 0.26490893959999084, |
|
"learning_rate": 8.788739333386443e-06, |
|
"loss": 1.092, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.4789156626506024, |
|
"grad_norm": 0.25550705194473267, |
|
"learning_rate": 8.773022605486755e-06, |
|
"loss": 1.1325, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.4819277108433735, |
|
"grad_norm": 0.2488010972738266, |
|
"learning_rate": 8.75721881474886e-06, |
|
"loss": 1.0885, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.48493975903614456, |
|
"grad_norm": 0.3159677982330322, |
|
"learning_rate": 8.741328325846663e-06, |
|
"loss": 1.1544, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.4879518072289157, |
|
"grad_norm": 0.30506086349487305, |
|
"learning_rate": 8.725351505454631e-06, |
|
"loss": 1.1716, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.49096385542168675, |
|
"grad_norm": 0.29045408964157104, |
|
"learning_rate": 8.709288722239345e-06, |
|
"loss": 1.1199, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.4939759036144578, |
|
"grad_norm": 0.2709057033061981, |
|
"learning_rate": 8.693140346850975e-06, |
|
"loss": 1.113, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.49698795180722893, |
|
"grad_norm": 0.28410249948501587, |
|
"learning_rate": 8.67690675191475e-06, |
|
"loss": 1.1383, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.29826584458351135, |
|
"learning_rate": 8.660588312022345e-06, |
|
"loss": 1.0619, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.5030120481927711, |
|
"grad_norm": 0.3092498779296875, |
|
"learning_rate": 8.644185403723231e-06, |
|
"loss": 1.1101, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.5060240963855421, |
|
"grad_norm": 0.30253866314888, |
|
"learning_rate": 8.627698405516007e-06, |
|
"loss": 1.0649, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.5090361445783133, |
|
"grad_norm": 0.2906908690929413, |
|
"learning_rate": 8.611127697839649e-06, |
|
"loss": 1.1436, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.5120481927710844, |
|
"grad_norm": 0.30768147110939026, |
|
"learning_rate": 8.594473663064735e-06, |
|
"loss": 1.1116, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.5150602409638554, |
|
"grad_norm": 0.3316003680229187, |
|
"learning_rate": 8.577736685484626e-06, |
|
"loss": 1.1484, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.5180722891566265, |
|
"grad_norm": 0.3070067763328552, |
|
"learning_rate": 8.560917151306594e-06, |
|
"loss": 1.144, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.5210843373493976, |
|
"grad_norm": 0.27163851261138916, |
|
"learning_rate": 8.544015448642916e-06, |
|
"loss": 1.1071, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.5240963855421686, |
|
"grad_norm": 0.2992447316646576, |
|
"learning_rate": 8.527031967501906e-06, |
|
"loss": 1.1647, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.5271084337349398, |
|
"grad_norm": 0.262173593044281, |
|
"learning_rate": 8.509967099778934e-06, |
|
"loss": 1.2107, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.5301204819277109, |
|
"grad_norm": 0.33722054958343506, |
|
"learning_rate": 8.492821239247365e-06, |
|
"loss": 1.0553, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.5331325301204819, |
|
"grad_norm": 0.27636295557022095, |
|
"learning_rate": 8.475594781549483e-06, |
|
"loss": 1.1275, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.536144578313253, |
|
"grad_norm": 0.2799915671348572, |
|
"learning_rate": 8.45828812418736e-06, |
|
"loss": 1.0764, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.5391566265060241, |
|
"grad_norm": 0.27971795201301575, |
|
"learning_rate": 8.44090166651368e-06, |
|
"loss": 1.0634, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.5421686746987951, |
|
"grad_norm": 0.3047524690628052, |
|
"learning_rate": 8.42343580972253e-06, |
|
"loss": 1.1203, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.5451807228915663, |
|
"grad_norm": 0.3009694218635559, |
|
"learning_rate": 8.405890956840136e-06, |
|
"loss": 1.1168, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.5481927710843374, |
|
"grad_norm": 0.30559536814689636, |
|
"learning_rate": 8.388267512715565e-06, |
|
"loss": 1.113, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.5512048192771084, |
|
"grad_norm": 0.3450864851474762, |
|
"learning_rate": 8.370565884011389e-06, |
|
"loss": 1.0621, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.5542168674698795, |
|
"grad_norm": 0.3391083776950836, |
|
"learning_rate": 8.352786479194288e-06, |
|
"loss": 1.1276, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.5572289156626506, |
|
"grad_norm": 0.3621962070465088, |
|
"learning_rate": 8.33492970852564e-06, |
|
"loss": 1.081, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.5602409638554217, |
|
"grad_norm": 0.28517264127731323, |
|
"learning_rate": 8.316995984052048e-06, |
|
"loss": 1.0723, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.5632530120481928, |
|
"grad_norm": 0.3252887427806854, |
|
"learning_rate": 8.298985719595824e-06, |
|
"loss": 1.0727, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.5662650602409639, |
|
"grad_norm": 0.3289787769317627, |
|
"learning_rate": 8.280899330745452e-06, |
|
"loss": 1.0726, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.5692771084337349, |
|
"grad_norm": 0.3431254029273987, |
|
"learning_rate": 8.262737234845993e-06, |
|
"loss": 1.0908, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.572289156626506, |
|
"grad_norm": 0.295175164937973, |
|
"learning_rate": 8.244499850989453e-06, |
|
"loss": 1.1408, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.5753012048192772, |
|
"grad_norm": 0.3039282262325287, |
|
"learning_rate": 8.226187600005116e-06, |
|
"loss": 1.1105, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.5783132530120482, |
|
"grad_norm": 0.30349868535995483, |
|
"learning_rate": 8.207800904449829e-06, |
|
"loss": 1.109, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.5813253012048193, |
|
"grad_norm": 0.3329324722290039, |
|
"learning_rate": 8.189340188598263e-06, |
|
"loss": 1.0828, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.5843373493975904, |
|
"grad_norm": 0.32696786522865295, |
|
"learning_rate": 8.1708058784331e-06, |
|
"loss": 1.116, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.5873493975903614, |
|
"grad_norm": 0.30085158348083496, |
|
"learning_rate": 8.15219840163523e-06, |
|
"loss": 1.141, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.5903614457831325, |
|
"grad_norm": 0.30034953355789185, |
|
"learning_rate": 8.133518187573864e-06, |
|
"loss": 1.1254, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.5933734939759037, |
|
"grad_norm": 0.35607779026031494, |
|
"learning_rate": 8.114765667296628e-06, |
|
"loss": 1.0621, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.5963855421686747, |
|
"grad_norm": 0.30774402618408203, |
|
"learning_rate": 8.095941273519634e-06, |
|
"loss": 1.0462, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.5993975903614458, |
|
"grad_norm": 0.3458847999572754, |
|
"learning_rate": 8.077045440617465e-06, |
|
"loss": 1.0695, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.6024096385542169, |
|
"grad_norm": 0.3302537202835083, |
|
"learning_rate": 8.058078604613178e-06, |
|
"loss": 1.1314, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.6054216867469879, |
|
"grad_norm": 0.32025519013404846, |
|
"learning_rate": 8.039041203168233e-06, |
|
"loss": 1.1179, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.608433734939759, |
|
"grad_norm": 0.32808589935302734, |
|
"learning_rate": 8.019933675572389e-06, |
|
"loss": 1.1393, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.6114457831325302, |
|
"grad_norm": 0.31607547402381897, |
|
"learning_rate": 8.000756462733577e-06, |
|
"loss": 1.1027, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.6144578313253012, |
|
"grad_norm": 0.33204394578933716, |
|
"learning_rate": 7.981510007167719e-06, |
|
"loss": 1.0795, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.6174698795180723, |
|
"grad_norm": 0.3012982904911041, |
|
"learning_rate": 7.962194752988519e-06, |
|
"loss": 1.104, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.6204819277108434, |
|
"grad_norm": 0.28379830718040466, |
|
"learning_rate": 7.942811145897215e-06, |
|
"loss": 1.1108, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.6234939759036144, |
|
"grad_norm": 0.3218439817428589, |
|
"learning_rate": 7.923359633172299e-06, |
|
"loss": 1.0856, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.6265060240963856, |
|
"grad_norm": 0.2985135614871979, |
|
"learning_rate": 7.903840663659186e-06, |
|
"loss": 1.1621, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.6295180722891566, |
|
"grad_norm": 0.3362099528312683, |
|
"learning_rate": 7.884254687759863e-06, |
|
"loss": 1.1173, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.6325301204819277, |
|
"grad_norm": 0.32187989354133606, |
|
"learning_rate": 7.864602157422501e-06, |
|
"loss": 1.1293, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.6355421686746988, |
|
"grad_norm": 0.34748998284339905, |
|
"learning_rate": 7.844883526131014e-06, |
|
"loss": 1.1501, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.6385542168674698, |
|
"grad_norm": 0.2776443660259247, |
|
"learning_rate": 7.8250992488946e-06, |
|
"loss": 1.1272, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.641566265060241, |
|
"grad_norm": 0.34776571393013, |
|
"learning_rate": 7.805249782237256e-06, |
|
"loss": 1.0993, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.6445783132530121, |
|
"grad_norm": 0.3251356780529022, |
|
"learning_rate": 7.78533558418722e-06, |
|
"loss": 1.0717, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.6475903614457831, |
|
"grad_norm": 0.32606494426727295, |
|
"learning_rate": 7.765357114266409e-06, |
|
"loss": 1.1061, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.6506024096385542, |
|
"grad_norm": 0.32897332310676575, |
|
"learning_rate": 7.745314833479834e-06, |
|
"loss": 1.065, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.6536144578313253, |
|
"grad_norm": 0.34086140990257263, |
|
"learning_rate": 7.72520920430493e-06, |
|
"loss": 1.1221, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.6566265060240963, |
|
"grad_norm": 0.395309180021286, |
|
"learning_rate": 7.705040690680915e-06, |
|
"loss": 1.0839, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.6596385542168675, |
|
"grad_norm": 0.3107753396034241, |
|
"learning_rate": 7.684809757998066e-06, |
|
"loss": 1.0287, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.6626506024096386, |
|
"grad_norm": 0.32579633593559265, |
|
"learning_rate": 7.664516873086987e-06, |
|
"loss": 1.0925, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.6656626506024096, |
|
"grad_norm": 0.32496118545532227, |
|
"learning_rate": 7.644162504207834e-06, |
|
"loss": 1.0225, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.6686746987951807, |
|
"grad_norm": 0.34487584233283997, |
|
"learning_rate": 7.623747121039512e-06, |
|
"loss": 1.1216, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.6716867469879518, |
|
"grad_norm": 0.28649845719337463, |
|
"learning_rate": 7.603271194668835e-06, |
|
"loss": 1.0989, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.6746987951807228, |
|
"grad_norm": 0.3071340024471283, |
|
"learning_rate": 7.582735197579657e-06, |
|
"loss": 1.0908, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.677710843373494, |
|
"grad_norm": 0.33348020911216736, |
|
"learning_rate": 7.562139603641971e-06, |
|
"loss": 1.0497, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.6807228915662651, |
|
"grad_norm": 0.3527333736419678, |
|
"learning_rate": 7.541484888100974e-06, |
|
"loss": 1.1121, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.6837349397590361, |
|
"grad_norm": 0.3623991310596466, |
|
"learning_rate": 7.520771527566093e-06, |
|
"loss": 1.0675, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.6867469879518072, |
|
"grad_norm": 0.3683350384235382, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 1.082, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.6897590361445783, |
|
"grad_norm": 0.30012479424476624, |
|
"learning_rate": 7.479170784707574e-06, |
|
"loss": 1.1421, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.6927710843373494, |
|
"grad_norm": 0.32032355666160583, |
|
"learning_rate": 7.458284362324844e-06, |
|
"loss": 1.0996, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.6957831325301205, |
|
"grad_norm": 0.30791187286376953, |
|
"learning_rate": 7.437341214807895e-06, |
|
"loss": 1.1221, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.6987951807228916, |
|
"grad_norm": 0.3271755576133728, |
|
"learning_rate": 7.416341825421755e-06, |
|
"loss": 1.0937, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.7018072289156626, |
|
"grad_norm": 0.320961594581604, |
|
"learning_rate": 7.395286678729232e-06, |
|
"loss": 1.0727, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.7048192771084337, |
|
"grad_norm": 0.31970059871673584, |
|
"learning_rate": 7.374176260579746e-06, |
|
"loss": 1.104, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.7078313253012049, |
|
"grad_norm": 0.3410727083683014, |
|
"learning_rate": 7.353011058098104e-06, |
|
"loss": 1.0866, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.7108433734939759, |
|
"grad_norm": 0.3979572653770447, |
|
"learning_rate": 7.33179155967327e-06, |
|
"loss": 1.0773, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.713855421686747, |
|
"grad_norm": 0.3386681079864502, |
|
"learning_rate": 7.310518254947092e-06, |
|
"loss": 1.0943, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.7168674698795181, |
|
"grad_norm": 0.32043731212615967, |
|
"learning_rate": 7.289191634803002e-06, |
|
"loss": 1.1104, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.7198795180722891, |
|
"grad_norm": 0.3244670331478119, |
|
"learning_rate": 7.267812191354691e-06, |
|
"loss": 1.1137, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.7228915662650602, |
|
"grad_norm": 0.32313597202301025, |
|
"learning_rate": 7.246380417934752e-06, |
|
"loss": 1.1296, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.7259036144578314, |
|
"grad_norm": 0.4050733149051666, |
|
"learning_rate": 7.224896809083297e-06, |
|
"loss": 1.0725, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.7289156626506024, |
|
"grad_norm": 0.2902127206325531, |
|
"learning_rate": 7.203361860536544e-06, |
|
"loss": 1.119, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.7319277108433735, |
|
"grad_norm": 0.31548964977264404, |
|
"learning_rate": 7.181776069215382e-06, |
|
"loss": 1.0712, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.7349397590361446, |
|
"grad_norm": 0.31955307722091675, |
|
"learning_rate": 7.160139933213899e-06, |
|
"loss": 1.0925, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.7379518072289156, |
|
"grad_norm": 0.37396878004074097, |
|
"learning_rate": 7.138453951787894e-06, |
|
"loss": 1.1029, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.7409638554216867, |
|
"grad_norm": 0.3100704550743103, |
|
"learning_rate": 7.1167186253433474e-06, |
|
"loss": 1.1001, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.7439759036144579, |
|
"grad_norm": 0.32318195700645447, |
|
"learning_rate": 7.094934455424889e-06, |
|
"loss": 1.0909, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.7469879518072289, |
|
"grad_norm": 0.40869641304016113, |
|
"learning_rate": 7.073101944704209e-06, |
|
"loss": 1.0925, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.31567490100860596, |
|
"learning_rate": 7.051221596968471e-06, |
|
"loss": 1.0973, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.7530120481927711, |
|
"grad_norm": 0.32018548250198364, |
|
"learning_rate": 7.029293917108678e-06, |
|
"loss": 1.0222, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.7560240963855421, |
|
"grad_norm": 0.3648555874824524, |
|
"learning_rate": 7.0073194111080315e-06, |
|
"loss": 1.075, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.7590361445783133, |
|
"grad_norm": 0.3636914789676666, |
|
"learning_rate": 6.985298586030241e-06, |
|
"loss": 1.1419, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.7620481927710844, |
|
"grad_norm": 0.337217777967453, |
|
"learning_rate": 6.963231950007845e-06, |
|
"loss": 1.0848, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.7650602409638554, |
|
"grad_norm": 0.34433966875076294, |
|
"learning_rate": 6.941120012230464e-06, |
|
"loss": 1.0675, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.7680722891566265, |
|
"grad_norm": 0.31864967942237854, |
|
"learning_rate": 6.918963282933063e-06, |
|
"loss": 1.0576, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.7710843373493976, |
|
"grad_norm": 0.37333500385284424, |
|
"learning_rate": 6.896762273384179e-06, |
|
"loss": 1.0536, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.7740963855421686, |
|
"grad_norm": 0.389068603515625, |
|
"learning_rate": 6.8745174958741164e-06, |
|
"loss": 1.0992, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.7771084337349398, |
|
"grad_norm": 0.3978760540485382, |
|
"learning_rate": 6.852229463703131e-06, |
|
"loss": 1.124, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.7801204819277109, |
|
"grad_norm": 0.37109795212745667, |
|
"learning_rate": 6.829898691169581e-06, |
|
"loss": 1.065, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.7831325301204819, |
|
"grad_norm": 0.3223637044429779, |
|
"learning_rate": 6.8075256935580655e-06, |
|
"loss": 1.0475, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.786144578313253, |
|
"grad_norm": 0.32434552907943726, |
|
"learning_rate": 6.78511098712753e-06, |
|
"loss": 1.0797, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.7891566265060241, |
|
"grad_norm": 0.3082960546016693, |
|
"learning_rate": 6.762655089099353e-06, |
|
"loss": 1.0889, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.7921686746987951, |
|
"grad_norm": 0.33072763681411743, |
|
"learning_rate": 6.740158517645418e-06, |
|
"loss": 1.0575, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.7951807228915663, |
|
"grad_norm": 0.3404625952243805, |
|
"learning_rate": 6.717621791876147e-06, |
|
"loss": 1.0192, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.7981927710843374, |
|
"grad_norm": 0.31751227378845215, |
|
"learning_rate": 6.695045431828524e-06, |
|
"loss": 1.105, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.8012048192771084, |
|
"grad_norm": 0.3528308868408203, |
|
"learning_rate": 6.672429958454103e-06, |
|
"loss": 1.0803, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.8042168674698795, |
|
"grad_norm": 0.3395234942436218, |
|
"learning_rate": 6.649775893606982e-06, |
|
"loss": 1.1057, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.8072289156626506, |
|
"grad_norm": 0.37763112783432007, |
|
"learning_rate": 6.627083760031755e-06, |
|
"loss": 1.0911, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.8102409638554217, |
|
"grad_norm": 0.3695107400417328, |
|
"learning_rate": 6.604354081351461e-06, |
|
"loss": 1.1105, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.8132530120481928, |
|
"grad_norm": 0.3498575687408447, |
|
"learning_rate": 6.5815873820554925e-06, |
|
"loss": 1.0347, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.8162650602409639, |
|
"grad_norm": 0.3670216202735901, |
|
"learning_rate": 6.558784187487495e-06, |
|
"loss": 1.009, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.8192771084337349, |
|
"grad_norm": 0.38344910740852356, |
|
"learning_rate": 6.535945023833249e-06, |
|
"loss": 1.0132, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.822289156626506, |
|
"grad_norm": 0.3509382903575897, |
|
"learning_rate": 6.513070418108525e-06, |
|
"loss": 1.0768, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.8253012048192772, |
|
"grad_norm": 0.37638577818870544, |
|
"learning_rate": 6.490160898146919e-06, |
|
"loss": 1.0435, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.8283132530120482, |
|
"grad_norm": 0.36278653144836426, |
|
"learning_rate": 6.467216992587679e-06, |
|
"loss": 1.1227, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.8313253012048193, |
|
"grad_norm": 0.34076735377311707, |
|
"learning_rate": 6.444239230863505e-06, |
|
"loss": 1.042, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.8343373493975904, |
|
"grad_norm": 0.3733161687850952, |
|
"learning_rate": 6.421228143188325e-06, |
|
"loss": 1.0266, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.8373493975903614, |
|
"grad_norm": 0.3508923351764679, |
|
"learning_rate": 6.398184260545072e-06, |
|
"loss": 1.0716, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.8403614457831325, |
|
"grad_norm": 0.38440215587615967, |
|
"learning_rate": 6.375108114673425e-06, |
|
"loss": 1.1266, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.8433734939759037, |
|
"grad_norm": 0.38378679752349854, |
|
"learning_rate": 6.3520002380575395e-06, |
|
"loss": 1.1126, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.8463855421686747, |
|
"grad_norm": 0.36522167921066284, |
|
"learning_rate": 6.32886116391376e-06, |
|
"loss": 1.1011, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.8493975903614458, |
|
"grad_norm": 0.345027357339859, |
|
"learning_rate": 6.305691426178316e-06, |
|
"loss": 1.1179, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.8524096385542169, |
|
"grad_norm": 0.3252032697200775, |
|
"learning_rate": 6.282491559495005e-06, |
|
"loss": 1.0666, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.8554216867469879, |
|
"grad_norm": 0.3353135585784912, |
|
"learning_rate": 6.259262099202849e-06, |
|
"loss": 1.045, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.858433734939759, |
|
"grad_norm": 0.35613197088241577, |
|
"learning_rate": 6.23600358132375e-06, |
|
"loss": 1.0619, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.8614457831325302, |
|
"grad_norm": 0.3165310025215149, |
|
"learning_rate": 6.212716542550112e-06, |
|
"loss": 1.0846, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.8644578313253012, |
|
"grad_norm": 0.3935311436653137, |
|
"learning_rate": 6.189401520232464e-06, |
|
"loss": 1.0634, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.8674698795180723, |
|
"grad_norm": 0.3519918620586395, |
|
"learning_rate": 6.166059052367055e-06, |
|
"loss": 1.1106, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.8704819277108434, |
|
"grad_norm": 0.34923064708709717, |
|
"learning_rate": 6.142689677583447e-06, |
|
"loss": 1.0479, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.8734939759036144, |
|
"grad_norm": 0.327006459236145, |
|
"learning_rate": 6.119293935132076e-06, |
|
"loss": 1.0652, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.8765060240963856, |
|
"grad_norm": 0.3877696692943573, |
|
"learning_rate": 6.095872364871818e-06, |
|
"loss": 1.0686, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.8795180722891566, |
|
"grad_norm": 0.3664681613445282, |
|
"learning_rate": 6.072425507257528e-06, |
|
"loss": 1.0205, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.8825301204819277, |
|
"grad_norm": 0.35179319977760315, |
|
"learning_rate": 6.048953903327568e-06, |
|
"loss": 1.0839, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.8855421686746988, |
|
"grad_norm": 0.3436523675918579, |
|
"learning_rate": 6.025458094691323e-06, |
|
"loss": 1.1028, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.8885542168674698, |
|
"grad_norm": 0.3567025363445282, |
|
"learning_rate": 6.0019386235167055e-06, |
|
"loss": 1.0638, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.891566265060241, |
|
"grad_norm": 0.34170979261398315, |
|
"learning_rate": 5.978396032517641e-06, |
|
"loss": 1.1007, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.8945783132530121, |
|
"grad_norm": 0.32985955476760864, |
|
"learning_rate": 5.9548308649415486e-06, |
|
"loss": 1.1342, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.8975903614457831, |
|
"grad_norm": 0.3722776174545288, |
|
"learning_rate": 5.931243664556803e-06, |
|
"loss": 1.1253, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.9006024096385542, |
|
"grad_norm": 0.3619896173477173, |
|
"learning_rate": 5.90763497564019e-06, |
|
"loss": 1.0155, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.9036144578313253, |
|
"grad_norm": 0.39154163002967834, |
|
"learning_rate": 5.884005342964343e-06, |
|
"loss": 1.151, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.9066265060240963, |
|
"grad_norm": 0.35082048177719116, |
|
"learning_rate": 5.860355311785175e-06, |
|
"loss": 1.0529, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.9096385542168675, |
|
"grad_norm": 0.3391878306865692, |
|
"learning_rate": 5.836685427829296e-06, |
|
"loss": 1.1057, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.9126506024096386, |
|
"grad_norm": 0.34184518456459045, |
|
"learning_rate": 5.812996237281423e-06, |
|
"loss": 1.0481, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.9156626506024096, |
|
"grad_norm": 0.3681842088699341, |
|
"learning_rate": 5.7892882867717705e-06, |
|
"loss": 1.0455, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.9186746987951807, |
|
"grad_norm": 0.3326142728328705, |
|
"learning_rate": 5.765562123363445e-06, |
|
"loss": 1.071, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.9216867469879518, |
|
"grad_norm": 0.36516857147216797, |
|
"learning_rate": 5.7418182945398136e-06, |
|
"loss": 1.0701, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.9246987951807228, |
|
"grad_norm": 0.3817295730113983, |
|
"learning_rate": 5.718057348191874e-06, |
|
"loss": 1.0718, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.927710843373494, |
|
"grad_norm": 0.3265933096408844, |
|
"learning_rate": 5.6942798326056205e-06, |
|
"loss": 1.0765, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.9307228915662651, |
|
"grad_norm": 0.3510778248310089, |
|
"learning_rate": 5.670486296449373e-06, |
|
"loss": 1.1283, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.9337349397590361, |
|
"grad_norm": 0.32118940353393555, |
|
"learning_rate": 5.646677288761132e-06, |
|
"loss": 1.0592, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.9367469879518072, |
|
"grad_norm": 0.3270410895347595, |
|
"learning_rate": 5.622853358935908e-06, |
|
"loss": 1.0876, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.9397590361445783, |
|
"grad_norm": 0.35170766711235046, |
|
"learning_rate": 5.599015056713037e-06, |
|
"loss": 1.0684, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.9427710843373494, |
|
"grad_norm": 0.3354102671146393, |
|
"learning_rate": 5.575162932163501e-06, |
|
"loss": 1.0861, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.9457831325301205, |
|
"grad_norm": 0.37484198808670044, |
|
"learning_rate": 5.551297535677236e-06, |
|
"loss": 1.0697, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.9487951807228916, |
|
"grad_norm": 0.3812544047832489, |
|
"learning_rate": 5.527419417950424e-06, |
|
"loss": 1.0526, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.9518072289156626, |
|
"grad_norm": 0.3424987196922302, |
|
"learning_rate": 5.503529129972792e-06, |
|
"loss": 1.0456, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.9548192771084337, |
|
"grad_norm": 0.37978166341781616, |
|
"learning_rate": 5.479627223014902e-06, |
|
"loss": 1.0712, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.9578313253012049, |
|
"grad_norm": 0.37075453996658325, |
|
"learning_rate": 5.455714248615417e-06, |
|
"loss": 1.0659, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.9608433734939759, |
|
"grad_norm": 0.3791234791278839, |
|
"learning_rate": 5.431790758568388e-06, |
|
"loss": 1.0408, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.963855421686747, |
|
"grad_norm": 0.3565094769001007, |
|
"learning_rate": 5.4078573049105135e-06, |
|
"loss": 1.0777, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.9668674698795181, |
|
"grad_norm": 0.3704342246055603, |
|
"learning_rate": 5.383914439908403e-06, |
|
"loss": 1.1454, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.9698795180722891, |
|
"grad_norm": 0.3541310429573059, |
|
"learning_rate": 5.359962716045836e-06, |
|
"loss": 1.0392, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.9728915662650602, |
|
"grad_norm": 0.380628377199173, |
|
"learning_rate": 5.336002686011007e-06, |
|
"loss": 1.137, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.9759036144578314, |
|
"grad_norm": 0.39858028292655945, |
|
"learning_rate": 5.312034902683779e-06, |
|
"loss": 1.1154, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.9789156626506024, |
|
"grad_norm": 0.3649790585041046, |
|
"learning_rate": 5.288059919122922e-06, |
|
"loss": 0.9955, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.9819277108433735, |
|
"grad_norm": 0.41761839389801025, |
|
"learning_rate": 5.2640782885533515e-06, |
|
"loss": 1.0635, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.9849397590361446, |
|
"grad_norm": 0.43014606833457947, |
|
"learning_rate": 5.240090564353365e-06, |
|
"loss": 1.0369, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.9879518072289156, |
|
"grad_norm": 0.36708715558052063, |
|
"learning_rate": 5.21609730004187e-06, |
|
"loss": 1.0698, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.9909638554216867, |
|
"grad_norm": 0.34585151076316833, |
|
"learning_rate": 5.1920990492656135e-06, |
|
"loss": 1.1109, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.9939759036144579, |
|
"grad_norm": 0.3957839906215668, |
|
"learning_rate": 5.168096365786402e-06, |
|
"loss": 1.0439, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.9969879518072289, |
|
"grad_norm": 0.35115697979927063, |
|
"learning_rate": 5.144089803468333e-06, |
|
"loss": 1.1163, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.38435807824134827, |
|
"learning_rate": 5.1200799162650035e-06, |
|
"loss": 1.0951, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 1.0030120481927711, |
|
"grad_norm": 0.31511375308036804, |
|
"learning_rate": 5.096067258206735e-06, |
|
"loss": 1.1165, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 1.0060240963855422, |
|
"grad_norm": 0.41014838218688965, |
|
"learning_rate": 5.072052383387787e-06, |
|
"loss": 1.1078, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 1.0090361445783131, |
|
"grad_norm": 0.36247673630714417, |
|
"learning_rate": 5.048035845953569e-06, |
|
"loss": 0.9971, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 1.0120481927710843, |
|
"grad_norm": 0.3278728127479553, |
|
"learning_rate": 5.024018200087855e-06, |
|
"loss": 1.1189, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 1.0030120481927711, |
|
"grad_norm": 0.3709251880645752, |
|
"learning_rate": 5e-06, |
|
"loss": 1.0844, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 1.0060240963855422, |
|
"grad_norm": 0.3526400029659271, |
|
"learning_rate": 4.975981799912147e-06, |
|
"loss": 1.0526, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 1.0090361445783131, |
|
"grad_norm": 0.3683416545391083, |
|
"learning_rate": 4.951964154046432e-06, |
|
"loss": 1.0687, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 1.0120481927710843, |
|
"grad_norm": 0.3674441874027252, |
|
"learning_rate": 4.927947616612216e-06, |
|
"loss": 1.048, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.0150602409638554, |
|
"grad_norm": 0.33722299337387085, |
|
"learning_rate": 4.903932741793266e-06, |
|
"loss": 0.9881, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 1.0180722891566265, |
|
"grad_norm": 0.38287389278411865, |
|
"learning_rate": 4.879920083734997e-06, |
|
"loss": 1.0368, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 1.0210843373493976, |
|
"grad_norm": 0.38486406207084656, |
|
"learning_rate": 4.855910196531669e-06, |
|
"loss": 1.036, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 1.0240963855421688, |
|
"grad_norm": 0.3734920620918274, |
|
"learning_rate": 4.8319036342135985e-06, |
|
"loss": 1.0488, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 1.0271084337349397, |
|
"grad_norm": 0.40039393305778503, |
|
"learning_rate": 4.807900950734388e-06, |
|
"loss": 1.0315, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 1.0301204819277108, |
|
"grad_norm": 0.3681272566318512, |
|
"learning_rate": 4.78390269995813e-06, |
|
"loss": 1.0515, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 1.033132530120482, |
|
"grad_norm": 0.3793656826019287, |
|
"learning_rate": 4.759909435646636e-06, |
|
"loss": 1.1554, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 1.036144578313253, |
|
"grad_norm": 0.3956240117549896, |
|
"learning_rate": 4.735921711446649e-06, |
|
"loss": 1.0764, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 1.0391566265060241, |
|
"grad_norm": 0.3506197929382324, |
|
"learning_rate": 4.711940080877079e-06, |
|
"loss": 1.0664, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 1.0421686746987953, |
|
"grad_norm": 0.33289358019828796, |
|
"learning_rate": 4.687965097316223e-06, |
|
"loss": 1.0989, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.0451807228915662, |
|
"grad_norm": 0.38886559009552, |
|
"learning_rate": 4.6639973139889944e-06, |
|
"loss": 1.0367, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 1.0481927710843373, |
|
"grad_norm": 0.36474519968032837, |
|
"learning_rate": 4.640037283954165e-06, |
|
"loss": 1.041, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 1.0512048192771084, |
|
"grad_norm": 0.3719565272331238, |
|
"learning_rate": 4.616085560091596e-06, |
|
"loss": 1.0481, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 1.0542168674698795, |
|
"grad_norm": 0.38648533821105957, |
|
"learning_rate": 4.592142695089489e-06, |
|
"loss": 1.0783, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 1.0572289156626506, |
|
"grad_norm": 0.4134596884250641, |
|
"learning_rate": 4.568209241431615e-06, |
|
"loss": 1.0206, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 1.0602409638554218, |
|
"grad_norm": 0.35198330879211426, |
|
"learning_rate": 4.544285751384585e-06, |
|
"loss": 1.0578, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 1.0632530120481927, |
|
"grad_norm": 0.4130623936653137, |
|
"learning_rate": 4.520372776985101e-06, |
|
"loss": 1.0467, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 1.0662650602409638, |
|
"grad_norm": 0.35723182559013367, |
|
"learning_rate": 4.496470870027209e-06, |
|
"loss": 1.0781, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 1.069277108433735, |
|
"grad_norm": 0.35406294465065, |
|
"learning_rate": 4.472580582049578e-06, |
|
"loss": 1.001, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 1.072289156626506, |
|
"grad_norm": 0.38317278027534485, |
|
"learning_rate": 4.448702464322764e-06, |
|
"loss": 1.0656, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.0753012048192772, |
|
"grad_norm": 0.338810533285141, |
|
"learning_rate": 4.4248370678364995e-06, |
|
"loss": 0.9687, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 1.0783132530120483, |
|
"grad_norm": 0.33876633644104004, |
|
"learning_rate": 4.400984943286965e-06, |
|
"loss": 1.0505, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 1.0813253012048192, |
|
"grad_norm": 0.3846857249736786, |
|
"learning_rate": 4.377146641064093e-06, |
|
"loss": 1.0058, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 1.0843373493975903, |
|
"grad_norm": 0.4660666286945343, |
|
"learning_rate": 4.3533227112388694e-06, |
|
"loss": 1.0146, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 1.0873493975903614, |
|
"grad_norm": 0.4065142869949341, |
|
"learning_rate": 4.329513703550628e-06, |
|
"loss": 1.0294, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 1.0903614457831325, |
|
"grad_norm": 0.39198002219200134, |
|
"learning_rate": 4.305720167394381e-06, |
|
"loss": 1.0866, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 1.0933734939759037, |
|
"grad_norm": 0.35108157992362976, |
|
"learning_rate": 4.2819426518081265e-06, |
|
"loss": 1.0525, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 1.0963855421686748, |
|
"grad_norm": 0.34058380126953125, |
|
"learning_rate": 4.258181705460188e-06, |
|
"loss": 1.0815, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 1.0993975903614457, |
|
"grad_norm": 0.4144446849822998, |
|
"learning_rate": 4.234437876636557e-06, |
|
"loss": 1.0305, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 1.1024096385542168, |
|
"grad_norm": 0.3802807927131653, |
|
"learning_rate": 4.21071171322823e-06, |
|
"loss": 1.0463, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.105421686746988, |
|
"grad_norm": 0.3633134067058563, |
|
"learning_rate": 4.1870037627185785e-06, |
|
"loss": 1.0386, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 1.108433734939759, |
|
"grad_norm": 0.4094638526439667, |
|
"learning_rate": 4.163314572170704e-06, |
|
"loss": 1.0414, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 1.1114457831325302, |
|
"grad_norm": 0.37921878695487976, |
|
"learning_rate": 4.139644688214827e-06, |
|
"loss": 1.0142, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 1.1144578313253013, |
|
"grad_norm": 0.413327157497406, |
|
"learning_rate": 4.115994657035659e-06, |
|
"loss": 1.0886, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 1.1174698795180722, |
|
"grad_norm": 0.37856829166412354, |
|
"learning_rate": 4.0923650243598104e-06, |
|
"loss": 1.084, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 1.1204819277108433, |
|
"grad_norm": 0.41401001811027527, |
|
"learning_rate": 4.0687563354431986e-06, |
|
"loss": 1.118, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 1.1234939759036144, |
|
"grad_norm": 0.3299630582332611, |
|
"learning_rate": 4.045169135058452e-06, |
|
"loss": 0.9993, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 1.1265060240963856, |
|
"grad_norm": 0.40372124314308167, |
|
"learning_rate": 4.021603967482361e-06, |
|
"loss": 0.9855, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 1.1295180722891567, |
|
"grad_norm": 0.360078364610672, |
|
"learning_rate": 3.998061376483298e-06, |
|
"loss": 1.0382, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 1.1325301204819278, |
|
"grad_norm": 0.3652278184890747, |
|
"learning_rate": 3.974541905308679e-06, |
|
"loss": 1.0232, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.1355421686746987, |
|
"grad_norm": 0.3333640396595001, |
|
"learning_rate": 3.951046096672434e-06, |
|
"loss": 1.0304, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 1.1385542168674698, |
|
"grad_norm": 0.3765230178833008, |
|
"learning_rate": 3.927574492742473e-06, |
|
"loss": 1.0738, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 1.141566265060241, |
|
"grad_norm": 0.3517187833786011, |
|
"learning_rate": 3.904127635128184e-06, |
|
"loss": 1.0491, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 1.144578313253012, |
|
"grad_norm": 0.35913482308387756, |
|
"learning_rate": 3.880706064867927e-06, |
|
"loss": 1.0509, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 1.1475903614457832, |
|
"grad_norm": 0.3901945650577545, |
|
"learning_rate": 3.857310322416555e-06, |
|
"loss": 1.0653, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 1.1506024096385543, |
|
"grad_norm": 0.3298746347427368, |
|
"learning_rate": 3.833940947632947e-06, |
|
"loss": 0.9943, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 1.1536144578313252, |
|
"grad_norm": 0.3933880031108856, |
|
"learning_rate": 3.8105984797675364e-06, |
|
"loss": 1.06, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 1.1566265060240963, |
|
"grad_norm": 0.42192092537879944, |
|
"learning_rate": 3.7872834574498894e-06, |
|
"loss": 1.0453, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 1.1596385542168675, |
|
"grad_norm": 0.38652369379997253, |
|
"learning_rate": 3.7639964186762506e-06, |
|
"loss": 1.035, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 1.1626506024096386, |
|
"grad_norm": 0.44319620728492737, |
|
"learning_rate": 3.740737900797151e-06, |
|
"loss": 1.1098, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.1656626506024097, |
|
"grad_norm": 0.3664276599884033, |
|
"learning_rate": 3.7175084405049978e-06, |
|
"loss": 0.991, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 1.1686746987951806, |
|
"grad_norm": 0.3838660717010498, |
|
"learning_rate": 3.6943085738216855e-06, |
|
"loss": 1.092, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 1.1716867469879517, |
|
"grad_norm": 0.38596200942993164, |
|
"learning_rate": 3.6711388360862417e-06, |
|
"loss": 1.077, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 1.1746987951807228, |
|
"grad_norm": 0.337519109249115, |
|
"learning_rate": 3.6479997619424605e-06, |
|
"loss": 1.0932, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 1.177710843373494, |
|
"grad_norm": 0.350619912147522, |
|
"learning_rate": 3.6248918853265756e-06, |
|
"loss": 1.0796, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 1.180722891566265, |
|
"grad_norm": 0.38858625292778015, |
|
"learning_rate": 3.6018157394549287e-06, |
|
"loss": 1.0689, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 1.1837349397590362, |
|
"grad_norm": 0.38901758193969727, |
|
"learning_rate": 3.5787718568116764e-06, |
|
"loss": 1.1019, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 1.1867469879518073, |
|
"grad_norm": 0.34919285774230957, |
|
"learning_rate": 3.5557607691364983e-06, |
|
"loss": 1.0646, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 1.1897590361445782, |
|
"grad_norm": 0.41810017824172974, |
|
"learning_rate": 3.5327830074123214e-06, |
|
"loss": 1.0429, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 1.1927710843373494, |
|
"grad_norm": 0.3683408796787262, |
|
"learning_rate": 3.509839101853082e-06, |
|
"loss": 0.9905, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.1957831325301205, |
|
"grad_norm": 0.3720911741256714, |
|
"learning_rate": 3.486929581891476e-06, |
|
"loss": 1.0213, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 1.1987951807228916, |
|
"grad_norm": 0.3495194911956787, |
|
"learning_rate": 3.464054976166753e-06, |
|
"loss": 1.0386, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 1.2018072289156627, |
|
"grad_norm": 0.36551299691200256, |
|
"learning_rate": 3.441215812512508e-06, |
|
"loss": 1.0043, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 1.2048192771084336, |
|
"grad_norm": 0.3687341809272766, |
|
"learning_rate": 3.41841261794451e-06, |
|
"loss": 1.0313, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 1.2078313253012047, |
|
"grad_norm": 0.3739585280418396, |
|
"learning_rate": 3.3956459186485414e-06, |
|
"loss": 1.0326, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 1.2108433734939759, |
|
"grad_norm": 0.38974305987358093, |
|
"learning_rate": 3.372916239968246e-06, |
|
"loss": 1.0665, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 1.213855421686747, |
|
"grad_norm": 0.4061500132083893, |
|
"learning_rate": 3.3502241063930196e-06, |
|
"loss": 1.114, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 1.216867469879518, |
|
"grad_norm": 0.398306280374527, |
|
"learning_rate": 3.327570041545897e-06, |
|
"loss": 1.0584, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 1.2198795180722892, |
|
"grad_norm": 0.36864137649536133, |
|
"learning_rate": 3.304954568171478e-06, |
|
"loss": 1.081, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 1.2228915662650603, |
|
"grad_norm": 0.3283785581588745, |
|
"learning_rate": 3.282378208123856e-06, |
|
"loss": 1.0605, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.2259036144578312, |
|
"grad_norm": 0.38243263959884644, |
|
"learning_rate": 3.259841482354582e-06, |
|
"loss": 1.0161, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 1.2289156626506024, |
|
"grad_norm": 0.38818714022636414, |
|
"learning_rate": 3.2373449109006476e-06, |
|
"loss": 1.0602, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 1.2319277108433735, |
|
"grad_norm": 0.3809143304824829, |
|
"learning_rate": 3.21488901287247e-06, |
|
"loss": 1.0088, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 1.2349397590361446, |
|
"grad_norm": 0.37948790192604065, |
|
"learning_rate": 3.192474306441936e-06, |
|
"loss": 1.0532, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 1.2379518072289157, |
|
"grad_norm": 0.44067418575286865, |
|
"learning_rate": 3.170101308830421e-06, |
|
"loss": 1.0377, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 1.2409638554216866, |
|
"grad_norm": 0.3667253255844116, |
|
"learning_rate": 3.1477705362968702e-06, |
|
"loss": 1.0234, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 1.2439759036144578, |
|
"grad_norm": 0.37526583671569824, |
|
"learning_rate": 3.1254825041258852e-06, |
|
"loss": 1.0344, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 1.2469879518072289, |
|
"grad_norm": 0.42664584517478943, |
|
"learning_rate": 3.103237726615822e-06, |
|
"loss": 1.0439, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 0.3878503441810608, |
|
"learning_rate": 3.081036717066938e-06, |
|
"loss": 1.1294, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 1.2530120481927711, |
|
"grad_norm": 0.4370405972003937, |
|
"learning_rate": 3.0588799877695375e-06, |
|
"loss": 1.0563, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.2560240963855422, |
|
"grad_norm": 0.38727104663848877, |
|
"learning_rate": 3.036768049992157e-06, |
|
"loss": 1.0561, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 1.2590361445783134, |
|
"grad_norm": 0.3639293909072876, |
|
"learning_rate": 3.0147014139697596e-06, |
|
"loss": 1.0747, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 1.2620481927710843, |
|
"grad_norm": 0.3889468014240265, |
|
"learning_rate": 2.99268058889197e-06, |
|
"loss": 1.0575, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 1.2650602409638554, |
|
"grad_norm": 0.3735024929046631, |
|
"learning_rate": 2.9707060828913226e-06, |
|
"loss": 1.0432, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 1.2680722891566265, |
|
"grad_norm": 0.3623259365558624, |
|
"learning_rate": 2.9487784030315297e-06, |
|
"loss": 1.0929, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 1.2710843373493976, |
|
"grad_norm": 0.38363751769065857, |
|
"learning_rate": 2.9268980552957917e-06, |
|
"loss": 1.018, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 1.2740963855421688, |
|
"grad_norm": 0.36796835064888, |
|
"learning_rate": 2.905065544575114e-06, |
|
"loss": 1.0636, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 1.2771084337349397, |
|
"grad_norm": 0.3460337817668915, |
|
"learning_rate": 2.8832813746566546e-06, |
|
"loss": 1.1039, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 1.2801204819277108, |
|
"grad_norm": 0.37609270215034485, |
|
"learning_rate": 2.86154604821211e-06, |
|
"loss": 1.0746, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 1.283132530120482, |
|
"grad_norm": 0.39871373772621155, |
|
"learning_rate": 2.8398600667861032e-06, |
|
"loss": 1.0095, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.286144578313253, |
|
"grad_norm": 0.38184547424316406, |
|
"learning_rate": 2.8182239307846195e-06, |
|
"loss": 1.0278, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 1.2891566265060241, |
|
"grad_norm": 0.40051835775375366, |
|
"learning_rate": 2.796638139463456e-06, |
|
"loss": 1.0261, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 1.2921686746987953, |
|
"grad_norm": 0.38206747174263, |
|
"learning_rate": 2.7751031909167046e-06, |
|
"loss": 1.0817, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 1.2951807228915664, |
|
"grad_norm": 0.42132294178009033, |
|
"learning_rate": 2.7536195820652506e-06, |
|
"loss": 1.0253, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 1.2981927710843373, |
|
"grad_norm": 0.37671446800231934, |
|
"learning_rate": 2.73218780864531e-06, |
|
"loss": 1.0555, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 1.3012048192771084, |
|
"grad_norm": 0.405241459608078, |
|
"learning_rate": 2.710808365197e-06, |
|
"loss": 1.0957, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 1.3042168674698795, |
|
"grad_norm": 0.3754029870033264, |
|
"learning_rate": 2.689481745052908e-06, |
|
"loss": 0.9929, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 1.3072289156626506, |
|
"grad_norm": 0.3823848068714142, |
|
"learning_rate": 2.6682084403267305e-06, |
|
"loss": 1.0884, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 1.3102409638554218, |
|
"grad_norm": 0.3721786439418793, |
|
"learning_rate": 2.6469889419018985e-06, |
|
"loss": 1.0173, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 1.3132530120481927, |
|
"grad_norm": 0.3947805166244507, |
|
"learning_rate": 2.6258237394202556e-06, |
|
"loss": 1.0628, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.3162650602409638, |
|
"grad_norm": 0.3939521908760071, |
|
"learning_rate": 2.60471332127077e-06, |
|
"loss": 1.0576, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 1.319277108433735, |
|
"grad_norm": 0.40392783284187317, |
|
"learning_rate": 2.5836581745782474e-06, |
|
"loss": 1.0515, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 1.322289156626506, |
|
"grad_norm": 0.39871639013290405, |
|
"learning_rate": 2.5626587851921053e-06, |
|
"loss": 1.0039, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 1.3253012048192772, |
|
"grad_norm": 0.409939706325531, |
|
"learning_rate": 2.541715637675156e-06, |
|
"loss": 1.0394, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 1.3283132530120483, |
|
"grad_norm": 0.3738921880722046, |
|
"learning_rate": 2.520829215292426e-06, |
|
"loss": 1.0766, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 1.3313253012048194, |
|
"grad_norm": 0.3500833213329315, |
|
"learning_rate": 2.5000000000000015e-06, |
|
"loss": 1.0533, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 1.3343373493975903, |
|
"grad_norm": 0.3490578532218933, |
|
"learning_rate": 2.4792284724339077e-06, |
|
"loss": 1.0512, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 1.3373493975903614, |
|
"grad_norm": 0.32971325516700745, |
|
"learning_rate": 2.4585151118990286e-06, |
|
"loss": 1.0417, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 1.3403614457831325, |
|
"grad_norm": 0.36310428380966187, |
|
"learning_rate": 2.4378603963580293e-06, |
|
"loss": 1.122, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 1.3433734939759037, |
|
"grad_norm": 0.40908730030059814, |
|
"learning_rate": 2.417264802420343e-06, |
|
"loss": 1.0535, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.3463855421686746, |
|
"grad_norm": 0.3725447654724121, |
|
"learning_rate": 2.396728805331167e-06, |
|
"loss": 1.0547, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 1.3493975903614457, |
|
"grad_norm": 0.39754316210746765, |
|
"learning_rate": 2.3762528789604887e-06, |
|
"loss": 1.0292, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 1.3524096385542168, |
|
"grad_norm": 0.39532670378685, |
|
"learning_rate": 2.3558374957921678e-06, |
|
"loss": 1.0182, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 1.355421686746988, |
|
"grad_norm": 0.40215209126472473, |
|
"learning_rate": 2.3354831269130133e-06, |
|
"loss": 1.0495, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 1.358433734939759, |
|
"grad_norm": 0.421367347240448, |
|
"learning_rate": 2.3151902420019357e-06, |
|
"loss": 1.0389, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 1.3614457831325302, |
|
"grad_norm": 0.38005751371383667, |
|
"learning_rate": 2.2949593093190863e-06, |
|
"loss": 1.0681, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 1.3644578313253013, |
|
"grad_norm": 0.3765680193901062, |
|
"learning_rate": 2.274790795695071e-06, |
|
"loss": 1.0338, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 1.3674698795180724, |
|
"grad_norm": 0.34579628705978394, |
|
"learning_rate": 2.2546851665201692e-06, |
|
"loss": 1.0749, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 1.3704819277108433, |
|
"grad_norm": 0.3837708830833435, |
|
"learning_rate": 2.2346428857335904e-06, |
|
"loss": 1.0642, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 1.3734939759036144, |
|
"grad_norm": 0.3635129928588867, |
|
"learning_rate": 2.2146644158127827e-06, |
|
"loss": 1.0432, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.3765060240963856, |
|
"grad_norm": 0.40961354970932007, |
|
"learning_rate": 2.1947502177627437e-06, |
|
"loss": 1.0437, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 1.3795180722891567, |
|
"grad_norm": 0.37368935346603394, |
|
"learning_rate": 2.1749007511054005e-06, |
|
"loss": 1.0578, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 1.3825301204819276, |
|
"grad_norm": 0.40420466661453247, |
|
"learning_rate": 2.1551164738689896e-06, |
|
"loss": 1.0743, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 1.3855421686746987, |
|
"grad_norm": 0.3825657069683075, |
|
"learning_rate": 2.1353978425775006e-06, |
|
"loss": 1.0327, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 1.3885542168674698, |
|
"grad_norm": 0.39921796321868896, |
|
"learning_rate": 2.1157453122401385e-06, |
|
"loss": 1.0576, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 1.391566265060241, |
|
"grad_norm": 0.36656901240348816, |
|
"learning_rate": 2.0961593363408154e-06, |
|
"loss": 1.0264, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 1.394578313253012, |
|
"grad_norm": 0.3587695360183716, |
|
"learning_rate": 2.076640366827703e-06, |
|
"loss": 1.071, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 1.3975903614457832, |
|
"grad_norm": 0.3668745756149292, |
|
"learning_rate": 2.0571888541027857e-06, |
|
"loss": 0.9852, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 1.4006024096385543, |
|
"grad_norm": 0.41092541813850403, |
|
"learning_rate": 2.0378052470114822e-06, |
|
"loss": 1.0234, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 1.4036144578313254, |
|
"grad_norm": 0.42871734499931335, |
|
"learning_rate": 2.018489992832283e-06, |
|
"loss": 1.0427, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.4066265060240963, |
|
"grad_norm": 0.3699125349521637, |
|
"learning_rate": 1.999243537266424e-06, |
|
"loss": 1.0422, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 1.4096385542168675, |
|
"grad_norm": 0.36434075236320496, |
|
"learning_rate": 1.980066324427613e-06, |
|
"loss": 1.0588, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 1.4126506024096386, |
|
"grad_norm": 0.4026855528354645, |
|
"learning_rate": 1.960958796831769e-06, |
|
"loss": 1.0295, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 1.4156626506024097, |
|
"grad_norm": 0.3882656395435333, |
|
"learning_rate": 1.9419213953868236e-06, |
|
"loss": 1.0366, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 1.4186746987951806, |
|
"grad_norm": 0.40121057629585266, |
|
"learning_rate": 1.9229545593825367e-06, |
|
"loss": 1.0806, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 1.4216867469879517, |
|
"grad_norm": 0.3884546756744385, |
|
"learning_rate": 1.9040587264803673e-06, |
|
"loss": 1.1063, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 1.4246987951807228, |
|
"grad_norm": 0.3452583849430084, |
|
"learning_rate": 1.8852343327033717e-06, |
|
"loss": 1.0373, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 1.427710843373494, |
|
"grad_norm": 0.39576640725135803, |
|
"learning_rate": 1.8664818124261375e-06, |
|
"loss": 1.0804, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 1.430722891566265, |
|
"grad_norm": 0.40806901454925537, |
|
"learning_rate": 1.8478015983647718e-06, |
|
"loss": 1.0341, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 1.4337349397590362, |
|
"grad_norm": 0.37504813075065613, |
|
"learning_rate": 1.8291941215669024e-06, |
|
"loss": 1.0557, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.4367469879518073, |
|
"grad_norm": 0.39833274483680725, |
|
"learning_rate": 1.8106598114017398e-06, |
|
"loss": 1.0336, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 1.4397590361445782, |
|
"grad_norm": 0.39540019631385803, |
|
"learning_rate": 1.7921990955501705e-06, |
|
"loss": 1.0473, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 1.4427710843373494, |
|
"grad_norm": 0.40363839268684387, |
|
"learning_rate": 1.7738123999948853e-06, |
|
"loss": 1.0193, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 1.4457831325301205, |
|
"grad_norm": 0.37323495745658875, |
|
"learning_rate": 1.755500149010549e-06, |
|
"loss": 0.9827, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 1.4487951807228916, |
|
"grad_norm": 0.41902491450309753, |
|
"learning_rate": 1.737262765154008e-06, |
|
"loss": 1.069, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 1.4518072289156627, |
|
"grad_norm": 0.40718671679496765, |
|
"learning_rate": 1.7191006692545493e-06, |
|
"loss": 1.0873, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 1.4548192771084336, |
|
"grad_norm": 0.4020282030105591, |
|
"learning_rate": 1.7010142804041785e-06, |
|
"loss": 1.0425, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 1.4578313253012047, |
|
"grad_norm": 0.3684733510017395, |
|
"learning_rate": 1.6830040159479521e-06, |
|
"loss": 1.0121, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 1.4608433734939759, |
|
"grad_norm": 0.3506666421890259, |
|
"learning_rate": 1.66507029147436e-06, |
|
"loss": 1.0484, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 1.463855421686747, |
|
"grad_norm": 0.468654602766037, |
|
"learning_rate": 1.6472135208057128e-06, |
|
"loss": 1.0682, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.466867469879518, |
|
"grad_norm": 0.4075433313846588, |
|
"learning_rate": 1.629434115988614e-06, |
|
"loss": 1.0589, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 1.4698795180722892, |
|
"grad_norm": 0.3535695970058441, |
|
"learning_rate": 1.611732487284437e-06, |
|
"loss": 1.0628, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 1.4728915662650603, |
|
"grad_norm": 0.37299081683158875, |
|
"learning_rate": 1.5941090431598654e-06, |
|
"loss": 1.019, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 1.4759036144578312, |
|
"grad_norm": 0.34906429052352905, |
|
"learning_rate": 1.5765641902774704e-06, |
|
"loss": 1.0281, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 1.4789156626506024, |
|
"grad_norm": 0.4228847920894623, |
|
"learning_rate": 1.5590983334863191e-06, |
|
"loss": 1.0176, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 1.4819277108433735, |
|
"grad_norm": 0.4109274446964264, |
|
"learning_rate": 1.5417118758126408e-06, |
|
"loss": 1.0818, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 1.4849397590361446, |
|
"grad_norm": 0.3916458189487457, |
|
"learning_rate": 1.524405218450517e-06, |
|
"loss": 1.0299, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 1.4879518072289157, |
|
"grad_norm": 0.3761802911758423, |
|
"learning_rate": 1.5071787607526366e-06, |
|
"loss": 1.0152, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 1.4909638554216866, |
|
"grad_norm": 0.3690100610256195, |
|
"learning_rate": 1.4900329002210684e-06, |
|
"loss": 1.0818, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 1.4939759036144578, |
|
"grad_norm": 0.36231639981269836, |
|
"learning_rate": 1.472968032498095e-06, |
|
"loss": 1.0708, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.4969879518072289, |
|
"grad_norm": 0.3943842053413391, |
|
"learning_rate": 1.4559845513570859e-06, |
|
"loss": 1.0399, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 0.380312979221344, |
|
"learning_rate": 1.439082848693406e-06, |
|
"loss": 0.9593, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 1.5030120481927711, |
|
"grad_norm": 0.43198204040527344, |
|
"learning_rate": 1.4222633145153758e-06, |
|
"loss": 0.9807, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 1.5060240963855422, |
|
"grad_norm": 0.3783879578113556, |
|
"learning_rate": 1.4055263369352673e-06, |
|
"loss": 1.0255, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 1.5090361445783134, |
|
"grad_norm": 0.3918922543525696, |
|
"learning_rate": 1.388872302160353e-06, |
|
"loss": 1.0401, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 1.5120481927710845, |
|
"grad_norm": 0.39092695713043213, |
|
"learning_rate": 1.3723015944839947e-06, |
|
"loss": 1.0715, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 1.5150602409638554, |
|
"grad_norm": 0.33539846539497375, |
|
"learning_rate": 1.35581459627677e-06, |
|
"loss": 1.0185, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 1.5180722891566265, |
|
"grad_norm": 0.3622112572193146, |
|
"learning_rate": 1.339411687977657e-06, |
|
"loss": 1.0932, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 1.5210843373493976, |
|
"grad_norm": 0.3799549341201782, |
|
"learning_rate": 1.3230932480852487e-06, |
|
"loss": 1.0413, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 1.5240963855421685, |
|
"grad_norm": 0.37101662158966064, |
|
"learning_rate": 1.3068596531490253e-06, |
|
"loss": 1.0402, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.5271084337349397, |
|
"grad_norm": 0.3901662826538086, |
|
"learning_rate": 1.290711277760658e-06, |
|
"loss": 1.0245, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 1.5301204819277108, |
|
"grad_norm": 0.37363961338996887, |
|
"learning_rate": 1.2746484945453691e-06, |
|
"loss": 1.0387, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 1.533132530120482, |
|
"grad_norm": 0.376298725605011, |
|
"learning_rate": 1.2586716741533389e-06, |
|
"loss": 1.0305, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 1.536144578313253, |
|
"grad_norm": 0.35384973883628845, |
|
"learning_rate": 1.2427811852511396e-06, |
|
"loss": 1.0001, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 1.5391566265060241, |
|
"grad_norm": 0.3355305790901184, |
|
"learning_rate": 1.226977394513247e-06, |
|
"loss": 1.0756, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 1.5421686746987953, |
|
"grad_norm": 0.3982202112674713, |
|
"learning_rate": 1.2112606666135602e-06, |
|
"loss": 1.0102, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 1.5451807228915664, |
|
"grad_norm": 0.33996695280075073, |
|
"learning_rate": 1.1956313642169974e-06, |
|
"loss": 1.0388, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 1.5481927710843375, |
|
"grad_norm": 0.3969401717185974, |
|
"learning_rate": 1.1800898479711293e-06, |
|
"loss": 1.0541, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 1.5512048192771084, |
|
"grad_norm": 0.3649154603481293, |
|
"learning_rate": 1.1646364764978468e-06, |
|
"loss": 1.0625, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 1.5542168674698795, |
|
"grad_norm": 0.39856594800949097, |
|
"learning_rate": 1.1492716063850973e-06, |
|
"loss": 1.0405, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.5572289156626506, |
|
"grad_norm": 0.3574175238609314, |
|
"learning_rate": 1.1339955921786504e-06, |
|
"loss": 1.0486, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 1.5602409638554215, |
|
"grad_norm": 0.36913472414016724, |
|
"learning_rate": 1.1188087863739173e-06, |
|
"loss": 0.9595, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 1.5632530120481927, |
|
"grad_norm": 0.32440900802612305, |
|
"learning_rate": 1.1037115394078162e-06, |
|
"loss": 1.0586, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 1.5662650602409638, |
|
"grad_norm": 0.41809505224227905, |
|
"learning_rate": 1.0887041996506858e-06, |
|
"loss": 1.0959, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 1.569277108433735, |
|
"grad_norm": 0.3481323719024658, |
|
"learning_rate": 1.0737871133982524e-06, |
|
"loss": 1.0388, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 1.572289156626506, |
|
"grad_norm": 0.3880089223384857, |
|
"learning_rate": 1.0589606248636291e-06, |
|
"loss": 1.0153, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 1.5753012048192772, |
|
"grad_norm": 0.3808007836341858, |
|
"learning_rate": 1.0442250761693829e-06, |
|
"loss": 1.0111, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 1.5783132530120483, |
|
"grad_norm": 0.38831576704978943, |
|
"learning_rate": 1.0295808073396352e-06, |
|
"loss": 0.9816, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 1.5813253012048194, |
|
"grad_norm": 0.41834479570388794, |
|
"learning_rate": 1.015028156292212e-06, |
|
"loss": 1.0189, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 1.5843373493975905, |
|
"grad_norm": 0.3809266984462738, |
|
"learning_rate": 1.0005674588308566e-06, |
|
"loss": 1.0146, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.5873493975903614, |
|
"grad_norm": 0.4059775471687317, |
|
"learning_rate": 9.861990486374695e-07, |
|
"loss": 0.9792, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 1.5903614457831325, |
|
"grad_norm": 0.36427873373031616, |
|
"learning_rate": 9.719232572644189e-07, |
|
"loss": 1.0827, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 1.5933734939759037, |
|
"grad_norm": 0.3794417679309845, |
|
"learning_rate": 9.577404141268815e-07, |
|
"loss": 1.0314, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 1.5963855421686746, |
|
"grad_norm": 0.40571263432502747, |
|
"learning_rate": 9.436508464952471e-07, |
|
"loss": 1.0521, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 1.5993975903614457, |
|
"grad_norm": 0.36858484148979187, |
|
"learning_rate": 9.296548794875659e-07, |
|
"loss": 1.0314, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 1.6024096385542168, |
|
"grad_norm": 0.35998910665512085, |
|
"learning_rate": 9.157528360620416e-07, |
|
"loss": 1.0451, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 1.605421686746988, |
|
"grad_norm": 0.3696284294128418, |
|
"learning_rate": 9.019450370095867e-07, |
|
"loss": 0.9977, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 1.608433734939759, |
|
"grad_norm": 0.4475997984409332, |
|
"learning_rate": 8.882318009464124e-07, |
|
"loss": 1.0073, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 1.6114457831325302, |
|
"grad_norm": 0.40017929673194885, |
|
"learning_rate": 8.74613444306684e-07, |
|
"loss": 0.9603, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 1.6144578313253013, |
|
"grad_norm": 0.3758133053779602, |
|
"learning_rate": 8.61090281335214e-07, |
|
"loss": 0.9584, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.6174698795180724, |
|
"grad_norm": 0.35535839200019836, |
|
"learning_rate": 8.476626240802099e-07, |
|
"loss": 1.1102, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 1.6204819277108435, |
|
"grad_norm": 0.43646156787872314, |
|
"learning_rate": 8.343307823860819e-07, |
|
"loss": 1.0792, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 1.6234939759036144, |
|
"grad_norm": 0.39517444372177124, |
|
"learning_rate": 8.210950638862813e-07, |
|
"loss": 1.0216, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 1.6265060240963856, |
|
"grad_norm": 0.42866745591163635, |
|
"learning_rate": 8.079557739962129e-07, |
|
"loss": 1.0596, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 1.6295180722891565, |
|
"grad_norm": 0.3550488352775574, |
|
"learning_rate": 7.949132159061784e-07, |
|
"loss": 1.0535, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 1.6325301204819276, |
|
"grad_norm": 0.3993145823478699, |
|
"learning_rate": 7.819676905743872e-07, |
|
"loss": 1.008, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 1.6355421686746987, |
|
"grad_norm": 0.39240461587905884, |
|
"learning_rate": 7.691194967200099e-07, |
|
"loss": 1.0231, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 1.6385542168674698, |
|
"grad_norm": 0.356810063123703, |
|
"learning_rate": 7.563689308162803e-07, |
|
"loss": 1.0048, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 1.641566265060241, |
|
"grad_norm": 0.36379274725914, |
|
"learning_rate": 7.43716287083664e-07, |
|
"loss": 1.0925, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 1.644578313253012, |
|
"grad_norm": 0.4245232045650482, |
|
"learning_rate": 7.31161857483057e-07, |
|
"loss": 1.0368, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.6475903614457832, |
|
"grad_norm": 0.3779962658882141, |
|
"learning_rate": 7.187059317090622e-07, |
|
"loss": 1.1019, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 1.6506024096385543, |
|
"grad_norm": 0.41444671154022217, |
|
"learning_rate": 7.063487971832922e-07, |
|
"loss": 1.084, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 1.6536144578313254, |
|
"grad_norm": 0.369693398475647, |
|
"learning_rate": 6.940907390477458e-07, |
|
"loss": 1.0164, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 1.6566265060240963, |
|
"grad_norm": 0.43131789565086365, |
|
"learning_rate": 6.819320401582258e-07, |
|
"loss": 1.0915, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 1.6596385542168675, |
|
"grad_norm": 0.41402745246887207, |
|
"learning_rate": 6.698729810778065e-07, |
|
"loss": 1.0014, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 1.6626506024096386, |
|
"grad_norm": 0.38247060775756836, |
|
"learning_rate": 6.579138400703716e-07, |
|
"loss": 1.0127, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 1.6656626506024095, |
|
"grad_norm": 0.45507028698921204, |
|
"learning_rate": 6.460548930941801e-07, |
|
"loss": 1.0202, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 1.6686746987951806, |
|
"grad_norm": 0.381002813577652, |
|
"learning_rate": 6.342964137955071e-07, |
|
"loss": 1.035, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 1.6716867469879517, |
|
"grad_norm": 0.4605034291744232, |
|
"learning_rate": 6.226386735023271e-07, |
|
"loss": 1.0472, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 1.6746987951807228, |
|
"grad_norm": 0.3616805970668793, |
|
"learning_rate": 6.110819412180535e-07, |
|
"loss": 1.0302, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.677710843373494, |
|
"grad_norm": 0.3862994313240051, |
|
"learning_rate": 5.99626483615331e-07, |
|
"loss": 1.024, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 1.680722891566265, |
|
"grad_norm": 0.406364381313324, |
|
"learning_rate": 5.882725650298787e-07, |
|
"loss": 1.0184, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 1.6837349397590362, |
|
"grad_norm": 0.42682695388793945, |
|
"learning_rate": 5.770204474543978e-07, |
|
"loss": 1.0347, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 1.6867469879518073, |
|
"grad_norm": 0.4065680503845215, |
|
"learning_rate": 5.658703905325186e-07, |
|
"loss": 1.0352, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 1.6897590361445785, |
|
"grad_norm": 0.402649462223053, |
|
"learning_rate": 5.548226515528133e-07, |
|
"loss": 1.0293, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 1.6927710843373494, |
|
"grad_norm": 0.38777557015419006, |
|
"learning_rate": 5.438774854428614e-07, |
|
"loss": 1.0521, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 1.6957831325301205, |
|
"grad_norm": 0.42119914293289185, |
|
"learning_rate": 5.330351447633603e-07, |
|
"loss": 1.0862, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 1.6987951807228916, |
|
"grad_norm": 0.3981137275695801, |
|
"learning_rate": 5.222958797023036e-07, |
|
"loss": 1.0312, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 1.7018072289156625, |
|
"grad_norm": 0.40969544649124146, |
|
"learning_rate": 5.11659938069205e-07, |
|
"loss": 1.0397, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 1.7048192771084336, |
|
"grad_norm": 0.373832643032074, |
|
"learning_rate": 5.011275652893782e-07, |
|
"loss": 1.0546, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.7078313253012047, |
|
"grad_norm": 0.4301709532737732, |
|
"learning_rate": 4.906990043982813e-07, |
|
"loss": 1.0475, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 1.7108433734939759, |
|
"grad_norm": 0.4075815975666046, |
|
"learning_rate": 4.803744960358992e-07, |
|
"loss": 0.9895, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 1.713855421686747, |
|
"grad_norm": 0.41060760617256165, |
|
"learning_rate": 4.701542784411994e-07, |
|
"loss": 1.032, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 1.716867469879518, |
|
"grad_norm": 0.38388729095458984, |
|
"learning_rate": 4.6003858744662564e-07, |
|
"loss": 1.0629, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 1.7198795180722892, |
|
"grad_norm": 0.37711286544799805, |
|
"learning_rate": 4.500276564726652e-07, |
|
"loss": 1.0032, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 1.7228915662650603, |
|
"grad_norm": 0.4005860388278961, |
|
"learning_rate": 4.401217165224564e-07, |
|
"loss": 1.0953, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 1.7259036144578315, |
|
"grad_norm": 0.39737778902053833, |
|
"learning_rate": 4.3032099617645874e-07, |
|
"loss": 1.0731, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 1.7289156626506024, |
|
"grad_norm": 0.39624249935150146, |
|
"learning_rate": 4.2062572158718284e-07, |
|
"loss": 1.0633, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 1.7319277108433735, |
|
"grad_norm": 0.3743440508842468, |
|
"learning_rate": 4.1103611647396734e-07, |
|
"loss": 1.0415, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 1.7349397590361446, |
|
"grad_norm": 0.3983217477798462, |
|
"learning_rate": 4.0155240211781966e-07, |
|
"loss": 1.0129, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.7379518072289155, |
|
"grad_norm": 0.4027600586414337, |
|
"learning_rate": 3.921747973563056e-07, |
|
"loss": 1.0909, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 1.7409638554216866, |
|
"grad_norm": 0.4163624942302704, |
|
"learning_rate": 3.829035185785035e-07, |
|
"loss": 1.0766, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 1.7439759036144578, |
|
"grad_norm": 0.3989628255367279, |
|
"learning_rate": 3.737387797200126e-07, |
|
"loss": 1.0506, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 1.7469879518072289, |
|
"grad_norm": 0.339167058467865, |
|
"learning_rate": 3.646807922580098e-07, |
|
"loss": 1.027, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"grad_norm": 0.44778549671173096, |
|
"learning_rate": 3.557297652063768e-07, |
|
"loss": 1.0107, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 1.7530120481927711, |
|
"grad_norm": 0.43992355465888977, |
|
"learning_rate": 3.4688590511087304e-07, |
|
"loss": 1.0068, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 1.7560240963855422, |
|
"grad_norm": 0.36404716968536377, |
|
"learning_rate": 3.3814941604437155e-07, |
|
"loss": 1.0631, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 1.7590361445783134, |
|
"grad_norm": 0.39936619997024536, |
|
"learning_rate": 3.2952049960214785e-07, |
|
"loss": 0.9933, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 1.7620481927710845, |
|
"grad_norm": 0.42165055871009827, |
|
"learning_rate": 3.20999354897229e-07, |
|
"loss": 1.0362, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 1.7650602409638554, |
|
"grad_norm": 0.41388002038002014, |
|
"learning_rate": 3.1258617855580155e-07, |
|
"loss": 1.048, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.7680722891566265, |
|
"grad_norm": 0.37040451169013977, |
|
"learning_rate": 3.0428116471267146e-07, |
|
"loss": 1.073, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 1.7710843373493976, |
|
"grad_norm": 0.4236885607242584, |
|
"learning_rate": 2.9608450500678566e-07, |
|
"loss": 1.076, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 1.7740963855421685, |
|
"grad_norm": 0.442690908908844, |
|
"learning_rate": 2.879963885768083e-07, |
|
"loss": 1.0546, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 1.7771084337349397, |
|
"grad_norm": 0.4121167063713074, |
|
"learning_rate": 2.800170020567566e-07, |
|
"loss": 1.0169, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 1.7801204819277108, |
|
"grad_norm": 0.37887606024742126, |
|
"learning_rate": 2.721465295716996e-07, |
|
"loss": 1.0828, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 1.783132530120482, |
|
"grad_norm": 0.380744993686676, |
|
"learning_rate": 2.643851527335006e-07, |
|
"loss": 1.0376, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 1.786144578313253, |
|
"grad_norm": 0.3593333065509796, |
|
"learning_rate": 2.5673305063663335e-07, |
|
"loss": 0.9841, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 1.7891566265060241, |
|
"grad_norm": 0.40235635638237, |
|
"learning_rate": 2.4919039985404626e-07, |
|
"loss": 1.0454, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 1.7921686746987953, |
|
"grad_norm": 0.3604947030544281, |
|
"learning_rate": 2.4175737443308976e-07, |
|
"loss": 1.0195, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 1.7951807228915664, |
|
"grad_norm": 0.3955729007720947, |
|
"learning_rate": 2.3443414589149838e-07, |
|
"loss": 1.0324, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.7981927710843375, |
|
"grad_norm": 0.3765583038330078, |
|
"learning_rate": 2.272208832134326e-07, |
|
"loss": 1.0905, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 1.8012048192771084, |
|
"grad_norm": 0.3759413957595825, |
|
"learning_rate": 2.201177528455828e-07, |
|
"loss": 1.0711, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 1.8042168674698795, |
|
"grad_norm": 0.3818155825138092, |
|
"learning_rate": 2.131249186933243e-07, |
|
"loss": 1.0911, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 1.8072289156626506, |
|
"grad_norm": 0.39899012446403503, |
|
"learning_rate": 2.0624254211693894e-07, |
|
"loss": 1.0562, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 1.8102409638554215, |
|
"grad_norm": 0.390240341424942, |
|
"learning_rate": 1.994707819278896e-07, |
|
"loss": 1.02, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 1.8132530120481927, |
|
"grad_norm": 0.4145658016204834, |
|
"learning_rate": 1.9280979438515479e-07, |
|
"loss": 1.022, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 1.8162650602409638, |
|
"grad_norm": 0.4315582811832428, |
|
"learning_rate": 1.8625973319162605e-07, |
|
"loss": 1.0332, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 1.819277108433735, |
|
"grad_norm": 0.40108925104141235, |
|
"learning_rate": 1.7982074949055794e-07, |
|
"loss": 1.0494, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 1.822289156626506, |
|
"grad_norm": 0.37406808137893677, |
|
"learning_rate": 1.7349299186208258e-07, |
|
"loss": 1.0744, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 1.8253012048192772, |
|
"grad_norm": 0.49371138215065, |
|
"learning_rate": 1.6727660631977894e-07, |
|
"loss": 1.0319, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.8283132530120483, |
|
"grad_norm": 0.3623702824115753, |
|
"learning_rate": 1.6117173630730787e-07, |
|
"loss": 1.1106, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 1.8313253012048194, |
|
"grad_norm": 0.3635117709636688, |
|
"learning_rate": 1.5517852269509692e-07, |
|
"loss": 1.0454, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 1.8343373493975905, |
|
"grad_norm": 0.3568932116031647, |
|
"learning_rate": 1.492971037770924e-07, |
|
"loss": 1.0008, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 1.8373493975903614, |
|
"grad_norm": 0.3618745803833008, |
|
"learning_rate": 1.435276152675691e-07, |
|
"loss": 1.0265, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 1.8403614457831325, |
|
"grad_norm": 0.3803001940250397, |
|
"learning_rate": 1.378701902979962e-07, |
|
"loss": 1.0643, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 1.8433734939759037, |
|
"grad_norm": 0.459064781665802, |
|
"learning_rate": 1.323249594139664e-07, |
|
"loss": 1.0108, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 1.8463855421686746, |
|
"grad_norm": 0.3967600166797638, |
|
"learning_rate": 1.2689205057218602e-07, |
|
"loss": 0.9983, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 1.8493975903614457, |
|
"grad_norm": 0.3921276926994324, |
|
"learning_rate": 1.2157158913751687e-07, |
|
"loss": 0.9829, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 1.8524096385542168, |
|
"grad_norm": 0.3510358929634094, |
|
"learning_rate": 1.1636369788008973e-07, |
|
"loss": 1.0848, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 1.855421686746988, |
|
"grad_norm": 0.33366602659225464, |
|
"learning_rate": 1.1126849697246533e-07, |
|
"loss": 1.0474, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.858433734939759, |
|
"grad_norm": 0.3874680697917938, |
|
"learning_rate": 1.0628610398686679e-07, |
|
"loss": 1.0968, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 1.8614457831325302, |
|
"grad_norm": 0.3490632474422455, |
|
"learning_rate": 1.014166338924627e-07, |
|
"loss": 1.0689, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 1.8644578313253013, |
|
"grad_norm": 0.44556349515914917, |
|
"learning_rate": 9.666019905271662e-08, |
|
"loss": 1.0402, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 1.8674698795180724, |
|
"grad_norm": 0.4002796411514282, |
|
"learning_rate": 9.201690922279405e-08, |
|
"loss": 1.0333, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 1.8704819277108435, |
|
"grad_norm": 0.4069937467575073, |
|
"learning_rate": 8.748687154702673e-08, |
|
"loss": 1.1043, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 1.8734939759036144, |
|
"grad_norm": 0.4305992126464844, |
|
"learning_rate": 8.307019055644517e-08, |
|
"loss": 1.0116, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 1.8765060240963856, |
|
"grad_norm": 0.36993542313575745, |
|
"learning_rate": 7.876696816636276e-08, |
|
"loss": 1.0075, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 1.8795180722891565, |
|
"grad_norm": 0.3679683208465576, |
|
"learning_rate": 7.45773036740255e-08, |
|
"loss": 1.0131, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 1.8825301204819276, |
|
"grad_norm": 0.39285150170326233, |
|
"learning_rate": 7.050129375632098e-08, |
|
"loss": 1.0376, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 1.8855421686746987, |
|
"grad_norm": 0.4058956801891327, |
|
"learning_rate": 6.65390324675469e-08, |
|
"loss": 1.0214, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.8885542168674698, |
|
"grad_norm": 0.3686175048351288, |
|
"learning_rate": 6.269061123724163e-08, |
|
"loss": 1.0229, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 1.891566265060241, |
|
"grad_norm": 0.37797847390174866, |
|
"learning_rate": 5.895611886807317e-08, |
|
"loss": 1.0389, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 1.894578313253012, |
|
"grad_norm": 0.4053489565849304, |
|
"learning_rate": 5.533564153379134e-08, |
|
"loss": 1.0475, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 1.8975903614457832, |
|
"grad_norm": 0.4105575382709503, |
|
"learning_rate": 5.182926277723821e-08, |
|
"loss": 1.029, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 1.9006024096385543, |
|
"grad_norm": 0.3795925974845886, |
|
"learning_rate": 4.843706350842081e-08, |
|
"loss": 1.0502, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 1.9036144578313254, |
|
"grad_norm": 0.3822070062160492, |
|
"learning_rate": 4.515912200264427e-08, |
|
"loss": 1.025, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 1.9066265060240963, |
|
"grad_norm": 0.389304518699646, |
|
"learning_rate": 4.19955138987066e-08, |
|
"loss": 1.0387, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 1.9096385542168675, |
|
"grad_norm": 0.4106118381023407, |
|
"learning_rate": 3.894631219715006e-08, |
|
"loss": 1.0442, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 1.9126506024096386, |
|
"grad_norm": 0.39749521017074585, |
|
"learning_rate": 3.601158725858034e-08, |
|
"loss": 1.0183, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 1.9156626506024095, |
|
"grad_norm": 0.37224337458610535, |
|
"learning_rate": 3.3191406802041693e-08, |
|
"loss": 0.9505, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.9186746987951806, |
|
"grad_norm": 0.4012593924999237, |
|
"learning_rate": 3.048583590345266e-08, |
|
"loss": 0.9986, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 1.9216867469879517, |
|
"grad_norm": 0.39663243293762207, |
|
"learning_rate": 2.7894936994106724e-08, |
|
"loss": 1.0163, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 1.9246987951807228, |
|
"grad_norm": 0.467464804649353, |
|
"learning_rate": 2.5418769859231194e-08, |
|
"loss": 1.0142, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 1.927710843373494, |
|
"grad_norm": 0.3819372355937958, |
|
"learning_rate": 2.3057391636606698e-08, |
|
"loss": 0.993, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 1.930722891566265, |
|
"grad_norm": 0.3579060733318329, |
|
"learning_rate": 2.081085681524986e-08, |
|
"loss": 1.0213, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 1.9337349397590362, |
|
"grad_norm": 0.33856555819511414, |
|
"learning_rate": 1.8679217234154335e-08, |
|
"loss": 1.0315, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 1.9367469879518073, |
|
"grad_norm": 0.36222004890441895, |
|
"learning_rate": 1.6662522081097308e-08, |
|
"loss": 0.9624, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 1.9397590361445785, |
|
"grad_norm": 0.3850827217102051, |
|
"learning_rate": 1.4760817891500966e-08, |
|
"loss": 1.0241, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 1.9427710843373494, |
|
"grad_norm": 0.42454764246940613, |
|
"learning_rate": 1.2974148547362231e-08, |
|
"loss": 0.96, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 1.9457831325301205, |
|
"grad_norm": 0.3735847473144531, |
|
"learning_rate": 1.1302555276238581e-08, |
|
"loss": 1.007, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.9487951807228916, |
|
"grad_norm": 0.3734501302242279, |
|
"learning_rate": 9.746076650294922e-09, |
|
"loss": 1.0119, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 1.9518072289156625, |
|
"grad_norm": 0.431612491607666, |
|
"learning_rate": 8.304748585417077e-09, |
|
"loss": 1.0564, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 1.9548192771084336, |
|
"grad_norm": 0.3824908137321472, |
|
"learning_rate": 6.978604340380779e-09, |
|
"loss": 0.9928, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 1.9578313253012047, |
|
"grad_norm": 0.38838639855384827, |
|
"learning_rate": 5.767674516083954e-09, |
|
"loss": 1.0136, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 1.9608433734939759, |
|
"grad_norm": 0.3842463493347168, |
|
"learning_rate": 4.671987054842842e-09, |
|
"loss": 1.1033, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 1.963855421686747, |
|
"grad_norm": 0.3982362449169159, |
|
"learning_rate": 3.6915672397436208e-09, |
|
"loss": 0.9286, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 1.966867469879518, |
|
"grad_norm": 0.4161483645439148, |
|
"learning_rate": 2.8264376940634332e-09, |
|
"loss": 1.0393, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 1.9698795180722892, |
|
"grad_norm": 0.37065836787223816, |
|
"learning_rate": 2.076618380744133e-09, |
|
"loss": 1.0168, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 1.9728915662650603, |
|
"grad_norm": 0.3696196973323822, |
|
"learning_rate": 1.4421266019348789e-09, |
|
"loss": 1.0211, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 1.9759036144578315, |
|
"grad_norm": 0.4084506630897522, |
|
"learning_rate": 9.229769985902304e-10, |
|
"loss": 1.0667, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.9789156626506024, |
|
"grad_norm": 0.4110511839389801, |
|
"learning_rate": 5.191815501343067e-10, |
|
"loss": 1.044, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 1.9819277108433735, |
|
"grad_norm": 0.36027991771698, |
|
"learning_rate": 2.307495741843413e-10, |
|
"loss": 1.0415, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 1.9849397590361446, |
|
"grad_norm": 0.3866373598575592, |
|
"learning_rate": 5.768772633363284e-11, |
|
"loss": 1.0336, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 1.9879518072289155, |
|
"grad_norm": 0.3646948039531708, |
|
"learning_rate": 0.0, |
|
"loss": 1.0516, |
|
"step": 664 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 664, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 166, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.6730319840173097e+18, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|