explcre commited on
Commit
a11f1ee
·
verified ·
1 Parent(s): 97824f2

Upload exp_phase8_bridge_SOTA_LONG_r64_091002/log.jsonl with huggingface_hub

Browse files
exp_phase8_bridge_SOTA_LONG_r64_091002/log.jsonl ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"step": 0, "loss": 1.4771766662597656, "loss_mdlm": 1.2698568105697632, "loss_lm": 2.0731990337371826, "loss_ct": 0.0, "lr": 0.0, "gnorm": 2.046747922897339, "bridge_gate_avg": 1.0, "bridge_out_proj_avg": 0.01805008575320244, "elapsed_s": 3.41748309135437}
2
+ {"step": 200, "loss": 1.4674551486968994, "loss_mdlm": 1.2933814525604248, "loss_lm": 1.7407373189926147, "loss_ct": 0.0, "lr": 9.999999999999999e-06, "gnorm": 0.8235923051834106, "bridge_gate_avg": 1.0005184412002563, "bridge_out_proj_avg": 0.01805406343191862, "elapsed_s": 78.83660554885864}
3
+ {"step": 400, "loss": 1.4564045667648315, "loss_mdlm": 1.3532789945602417, "loss_lm": 1.0312552452087402, "loss_ct": 0.0, "lr": 1.9999999999999998e-05, "gnorm": 0.9557592272758484, "bridge_gate_avg": 1.0016122162342072, "bridge_out_proj_avg": 0.018060246016830206, "elapsed_s": 151.1859803199768}
4
+ {"step": 600, "loss": 1.428107500076294, "loss_mdlm": 1.351922631263733, "loss_lm": 0.7618482112884521, "loss_ct": 0.0, "lr": 3e-05, "gnorm": 0.7029718160629272, "bridge_gate_avg": 1.0025420188903809, "bridge_out_proj_avg": 0.018063785042613745, "elapsed_s": 223.26806592941284}
5
+ {"step": 800, "loss": 1.4152441024780273, "loss_mdlm": 1.3073134422302246, "loss_lm": 1.079306960105896, "loss_ct": 0.0, "lr": 2.9898575366129145e-05, "gnorm": 0.9592772722244263, "bridge_gate_avg": 1.0033547580242157, "bridge_out_proj_avg": 0.01806690776720643, "elapsed_s": 295.06374645233154}
6
+ {"step": 1000, "loss": 1.401649832725525, "loss_mdlm": 1.3284980058670044, "loss_lm": 0.7315187454223633, "loss_ct": 0.0, "lr": 2.959567305869736e-05, "gnorm": 0.6264079213142395, "bridge_gate_avg": 1.0041163265705109, "bridge_out_proj_avg": 0.01807023538276553, "elapsed_s": 367.37817645072937}
7
+ {"step": 1200, "loss": 1.4114406108856201, "loss_mdlm": 1.3347961902618408, "loss_lm": 0.766444206237793, "loss_ct": 0.0, "lr": 2.9095389311788626e-05, "gnorm": 0.6073337197303772, "bridge_gate_avg": 1.0047536194324493, "bridge_out_proj_avg": 0.01807371061295271, "elapsed_s": 439.95951890945435}
8
+ {"step": 1400, "loss": 1.3474040031433105, "loss_mdlm": 1.2737336158752441, "loss_lm": 0.7367034554481506, "loss_ct": 0.0, "lr": 2.8404489604851186e-05, "gnorm": 0.7758491039276123, "bridge_gate_avg": 1.0054397881031036, "bridge_out_proj_avg": 0.018077437300235033, "elapsed_s": 512.308562040329}
9
+ {"step": 1600, "loss": 1.4252818822860718, "loss_mdlm": 1.3383435010910034, "loss_lm": 0.8693832755088806, "loss_ct": 0.0, "lr": 2.753231717119405e-05, "gnorm": 0.6700665354728699, "bridge_gate_avg": 1.005999743938446, "bridge_out_proj_avg": 0.018080448731780052, "elapsed_s": 585.3924908638}
10
+ {"step": 1800, "loss": 1.3865306377410889, "loss_mdlm": 1.29781973361969, "loss_lm": 0.8871088027954102, "loss_ct": 0.0, "lr": 2.649066664678467e-05, "gnorm": 0.7665178775787354, "bridge_gate_avg": 1.0065360367298126, "bridge_out_proj_avg": 0.018083886243402958, "elapsed_s": 658.4022510051727}
11
+ {"step": 2000, "loss": 1.4101440906524658, "loss_mdlm": 1.297258734703064, "loss_lm": 1.12885320186615, "loss_ct": 0.0, "lr": 2.5293624568031008e-05, "gnorm": 0.7928639054298401, "bridge_gate_avg": 1.0069794654846191, "bridge_out_proj_avg": 0.01808658381924033, "elapsed_s": 730.723313331604}
12
+ {"step": 2200, "loss": 1.3743839263916016, "loss_mdlm": 1.2700661420822144, "loss_lm": 1.0431773662567139, "loss_ct": 0.0, "lr": 2.3957378875541795e-05, "gnorm": 0.828351616859436, "bridge_gate_avg": 1.00755113363266, "bridge_out_proj_avg": 0.018090149853378534, "elapsed_s": 803.2530047893524}
13
+ {"step": 2400, "loss": 1.4019296169281006, "loss_mdlm": 1.334976077079773, "loss_lm": 0.6695358157157898, "loss_ct": 0.0, "lr": 2.25e-05, "gnorm": 0.5859558582305908, "bridge_gate_avg": 1.0079363286495209, "bridge_out_proj_avg": 0.018092641606926918, "elapsed_s": 876.680447101593}
14
+ {"step": 2600, "loss": 1.3457640409469604, "loss_mdlm": 1.289982795715332, "loss_lm": 0.5578129887580872, "loss_ct": 0.0, "lr": 2.0941196490587352e-05, "gnorm": 0.5281834602355957, "bridge_gate_avg": 1.0082216262817383, "bridge_out_proj_avg": 0.018094590865075588, "elapsed_s": 950.8291006088257}
15
+ {"step": 2800, "loss": 1.4279905557632446, "loss_mdlm": 1.3654744625091553, "loss_lm": 0.6251607537269592, "loss_ct": 0.0, "lr": 1.9302048490666356e-05, "gnorm": 0.5678055286407471, "bridge_gate_avg": 1.0084308385849, "bridge_out_proj_avg": 0.018096365500241518, "elapsed_s": 1024.8331458568573}
16
+ {"step": 3000, "loss": 1.3377902507781982, "loss_mdlm": 1.297116994857788, "loss_lm": 0.4067322611808777, "loss_ct": 0.0, "lr": 1.760472266500396e-05, "gnorm": 0.4921174943447113, "bridge_gate_avg": 1.0087150931358337, "bridge_out_proj_avg": 0.01809803582727909, "elapsed_s": 1097.8331460952759}
17
+ {"step": 3200, "loss": 1.44217848777771, "loss_mdlm": 1.3659121990203857, "loss_lm": 0.762662410736084, "loss_ct": 0.0, "lr": 1.587217243365714e-05, "gnorm": 0.6275360584259033, "bridge_gate_avg": 1.0088620483875275, "bridge_out_proj_avg": 0.018099387641996145, "elapsed_s": 1171.71955037117}
18
+ {"step": 3400, "loss": 1.3152567148208618, "loss_mdlm": 1.2598294019699097, "loss_lm": 0.5542728304862976, "loss_ct": 0.0, "lr": 1.4127827566342864e-05, "gnorm": 0.6236312389373779, "bridge_gate_avg": 1.0090686976909637, "bridge_out_proj_avg": 0.01810082606971264, "elapsed_s": 1245.711915254593}
19
+ {"step": 3600, "loss": 1.3719602823257446, "loss_mdlm": 1.2764781713485718, "loss_lm": 0.954820990562439, "loss_ct": 0.0, "lr": 1.2395277334996045e-05, "gnorm": 0.7446183562278748, "bridge_gate_avg": 1.0091664791107178, "bridge_out_proj_avg": 0.018101639114320278, "elapsed_s": 1319.3250000476837}
20
+ {"step": 3800, "loss": 1.3774652481079102, "loss_mdlm": 1.3074816465377808, "loss_lm": 0.6998361945152283, "loss_ct": 0.0, "lr": 1.069795150933365e-05, "gnorm": 0.8059535622596741, "bridge_gate_avg": 1.0092300176620483, "bridge_out_proj_avg": 0.01810215273872018, "elapsed_s": 1393.4430575370789}
21
+ {"step": 4000, "loss": 0.8532123565673828, "loss_mdlm": 0.7984133362770081, "loss_lm": 0.547990083694458, "loss_ct": 0.0, "lr": 9.058803509412647e-06, "gnorm": 0.5724920630455017, "bridge_gate_avg": 1.009334146976471, "bridge_out_proj_avg": 0.018102773930877447, "elapsed_s": 1466.9462931156158}
22
+ {"step": 4200, "loss": 1.195914626121521, "loss_mdlm": 1.1422357559204102, "loss_lm": 0.5367885828018188, "loss_ct": 0.0, "lr": 7.500000000000004e-06, "gnorm": 0.5804440975189209, "bridge_gate_avg": 1.009422093629837, "bridge_out_proj_avg": 0.018103201407939196, "elapsed_s": 1540.655529499054}
23
+ {"step": 4400, "loss": 1.227728247642517, "loss_mdlm": 1.1699634790420532, "loss_lm": 0.5776478052139282, "loss_ct": 0.0, "lr": 6.0426211244582105e-06, "gnorm": 0.6263238787651062, "bridge_gate_avg": 1.009477436542511, "bridge_out_proj_avg": 0.018103512935340405, "elapsed_s": 1614.4201426506042}
24
+ {"step": 4600, "loss": 1.3471072912216187, "loss_mdlm": 1.2953765392303467, "loss_lm": 0.5173071622848511, "loss_ct": 0.0, "lr": 4.706375431968998e-06, "gnorm": 0.5137556195259094, "bridge_gate_avg": 1.0094515085220337, "bridge_out_proj_avg": 0.01810343749821186, "elapsed_s": 1688.3798348903656}
25
+ {"step": 4800, "loss": 1.3600887060165405, "loss_mdlm": 1.288453459739685, "loss_lm": 0.7163524031639099, "loss_ct": 0.0, "lr": 3.5093333532153316e-06, "gnorm": 0.6518459320068359, "bridge_gate_avg": 1.009463220834732, "bridge_out_proj_avg": 0.018103460781276226, "elapsed_s": 1762.1098828315735}
26
+ {"step": 5000, "loss": 1.3427187204360962, "loss_mdlm": 1.2901394367218018, "loss_lm": 0.5257923007011414, "loss_ct": 0.0, "lr": 2.467682828805956e-06, "gnorm": 0.5392300486564636, "bridge_gate_avg": 1.0094878375530243, "bridge_out_proj_avg": 0.01810358790680766, "elapsed_s": 1836.1641039848328}
27
+ {"step": 5200, "loss": 1.3893451690673828, "loss_mdlm": 1.326551914215088, "loss_lm": 0.6279319524765015, "loss_ct": 0.0, "lr": 1.5955103951488177e-06, "gnorm": 0.7531287670135498, "bridge_gate_avg": 1.0095196664333344, "bridge_out_proj_avg": 0.018103813752532005, "elapsed_s": 1909.055740594864}
28
+ {"step": 5400, "loss": 1.3494280576705933, "loss_mdlm": 1.2925466299057007, "loss_lm": 0.568814754486084, "loss_ct": 0.0, "lr": 9.046106882113753e-07, "gnorm": 0.7180877327919006, "bridge_gate_avg": 1.0095295906066895, "bridge_out_proj_avg": 0.018103898968547583, "elapsed_s": 1982.8871750831604}
29
+ {"step": 5600, "loss": 1.40628981590271, "loss_mdlm": 1.3383479118347168, "loss_lm": 0.6794193983078003, "loss_ct": 0.0, "lr": 4.043269413026429e-07, "gnorm": 0.5934202671051025, "bridge_gate_avg": 1.0095404386520386, "bridge_out_proj_avg": 0.018103953450918198, "elapsed_s": 2056.621971130371}
30
+ {"step": 5800, "loss": 1.4003167152404785, "loss_mdlm": 1.3438469171524048, "loss_lm": 0.5646980404853821, "loss_ct": 0.0, "lr": 1.0142463387085465e-07, "gnorm": 0.5838003754615784, "bridge_gate_avg": 1.0095417499542236, "bridge_out_proj_avg": 0.01810396509245038, "elapsed_s": 2130.8950622081757}