Upload exp_phase8_bridge_SOTA_LONG_r128_091002/log.jsonl with huggingface_hub
Browse files
exp_phase8_bridge_SOTA_LONG_r128_091002/log.jsonl
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"step": 0, "loss": 1.479889154434204, "loss_mdlm": 1.2954649925231934, "loss_lm": 1.8442420959472656, "loss_ct": 0.0, "lr": 0.0, "gnorm": 0.8368141651153564, "bridge_gate_avg": 1.0, "bridge_out_proj_avg": 0.018043143674731255, "elapsed_s": 3.2734665870666504}
|
| 2 |
+
{"step": 200, "loss": 1.4883160591125488, "loss_mdlm": 1.3265641927719116, "loss_lm": 1.6175190210342407, "loss_ct": 0.0, "lr": 9.999999999999999e-06, "gnorm": 1.072212815284729, "bridge_gate_avg": 1.0004418690999348, "bridge_out_proj_avg": 0.018045851960778236, "elapsed_s": 79.90479516983032}
|
| 3 |
+
{"step": 400, "loss": 1.4194917678833008, "loss_mdlm": 1.324537992477417, "loss_lm": 0.9495377540588379, "loss_ct": 0.0, "lr": 1.9999999999999998e-05, "gnorm": 0.944308876991272, "bridge_gate_avg": 1.0014090538024902, "bridge_out_proj_avg": 0.018049978651106358, "elapsed_s": 154.3746178150177}
|
| 4 |
+
{"step": 600, "loss": 1.4748342037200928, "loss_mdlm": 1.4003125429153442, "loss_lm": 0.7452169060707092, "loss_ct": 0.0, "lr": 3e-05, "gnorm": 0.7232361435890198, "bridge_gate_avg": 1.0022333860397339, "bridge_out_proj_avg": 0.018053191093107063, "elapsed_s": 227.73391032218933}
|
| 5 |
+
{"step": 800, "loss": 1.3909645080566406, "loss_mdlm": 1.2825087308883667, "loss_lm": 1.084557294845581, "loss_ct": 0.0, "lr": 2.9898575366129145e-05, "gnorm": 0.8972605466842651, "bridge_gate_avg": 1.003064235051473, "bridge_out_proj_avg": 0.01805681362748146, "elapsed_s": 301.1413543224335}
|
| 6 |
+
{"step": 1000, "loss": 1.3881882429122925, "loss_mdlm": 1.319993257522583, "loss_lm": 0.6819499731063843, "loss_ct": 0.0, "lr": 2.959567305869736e-05, "gnorm": 0.5902879238128662, "bridge_gate_avg": 1.0037123362223308, "bridge_out_proj_avg": 0.01805982117851575, "elapsed_s": 374.9508559703827}
|
| 7 |
+
{"step": 1200, "loss": 1.3923734426498413, "loss_mdlm": 1.3167356252670288, "loss_lm": 0.7563783526420593, "loss_ct": 0.0, "lr": 2.9095389311788626e-05, "gnorm": 0.5419765114784241, "bridge_gate_avg": 1.0045844912528992, "bridge_out_proj_avg": 0.018062879020969074, "elapsed_s": 449.0034747123718}
|
| 8 |
+
{"step": 1400, "loss": 1.3313223123550415, "loss_mdlm": 1.2591946125030518, "loss_lm": 0.7212766408920288, "loss_ct": 0.0, "lr": 2.8404489604851186e-05, "gnorm": 0.734231173992157, "bridge_gate_avg": 1.0052776734034221, "bridge_out_proj_avg": 0.01806603403141101, "elapsed_s": 522.5694088935852}
|
| 9 |
+
{"step": 1600, "loss": 1.3858485221862793, "loss_mdlm": 1.3052363395690918, "loss_lm": 0.8061221837997437, "loss_ct": 0.0, "lr": 2.753231717119405e-05, "gnorm": 0.5556174516677856, "bridge_gate_avg": 1.0059536894162495, "bridge_out_proj_avg": 0.01806905586272478, "elapsed_s": 596.3355066776276}
|
| 10 |
+
{"step": 1800, "loss": 1.4007569551467896, "loss_mdlm": 1.315049409866333, "loss_lm": 0.8570749163627625, "loss_ct": 0.0, "lr": 2.649066664678467e-05, "gnorm": 0.6915852427482605, "bridge_gate_avg": 1.0064383546511333, "bridge_out_proj_avg": 0.018072103150188923, "elapsed_s": 670.1486418247223}
|
| 11 |
+
{"step": 2000, "loss": 1.4287676811218262, "loss_mdlm": 1.3167616128921509, "loss_lm": 1.1200612783432007, "loss_ct": 0.0, "lr": 2.5293624568031008e-05, "gnorm": 0.7140088081359863, "bridge_gate_avg": 1.0067600111166637, "bridge_out_proj_avg": 0.01807445598145326, "elapsed_s": 743.8084454536438}
|
| 12 |
+
{"step": 2200, "loss": 1.403022289276123, "loss_mdlm": 1.3054381608963013, "loss_lm": 0.9758411049842834, "loss_ct": 0.0, "lr": 2.3957378875541795e-05, "gnorm": 0.7162560224533081, "bridge_gate_avg": 1.0071489413579304, "bridge_out_proj_avg": 0.01807702798396349, "elapsed_s": 817.5222096443176}
|
| 13 |
+
{"step": 2400, "loss": 1.3879482746124268, "loss_mdlm": 1.324583649635315, "loss_lm": 0.6336467862129211, "loss_ct": 0.0, "lr": 2.25e-05, "gnorm": 0.4920961558818817, "bridge_gate_avg": 1.007487177848816, "bridge_out_proj_avg": 0.018079120044906933, "elapsed_s": 891.3555245399475}
|
| 14 |
+
{"step": 2600, "loss": 1.3223228454589844, "loss_mdlm": 1.2693978548049927, "loss_lm": 0.5292495489120483, "loss_ct": 0.0, "lr": 2.0941196490587352e-05, "gnorm": 0.4579283595085144, "bridge_gate_avg": 1.0077858765920003, "bridge_out_proj_avg": 0.0180812847490112, "elapsed_s": 965.2870907783508}
|
| 15 |
+
{"step": 2800, "loss": 1.4382069110870361, "loss_mdlm": 1.3800545930862427, "loss_lm": 0.5815231204032898, "loss_ct": 0.0, "lr": 1.9302048490666356e-05, "gnorm": 0.47625961899757385, "bridge_gate_avg": 1.0080878734588623, "bridge_out_proj_avg": 0.01808339326332013, "elapsed_s": 1039.3185381889343}
|
| 16 |
+
{"step": 3000, "loss": 1.3406109809875488, "loss_mdlm": 1.302790641784668, "loss_lm": 0.378203421831131, "loss_ct": 0.0, "lr": 1.760472266500396e-05, "gnorm": 0.4136255979537964, "bridge_gate_avg": 1.0082773367563884, "bridge_out_proj_avg": 0.018084654584527016, "elapsed_s": 1113.5113925933838}
|
| 17 |
+
{"step": 3200, "loss": 1.4274667501449585, "loss_mdlm": 1.356912612915039, "loss_lm": 0.7055411338806152, "loss_ct": 0.0, "lr": 1.587217243365714e-05, "gnorm": 0.5915622711181641, "bridge_gate_avg": 1.0084294279416401, "bridge_out_proj_avg": 0.018085920562346775, "elapsed_s": 1187.486094236374}
|
| 18 |
+
{"step": 3400, "loss": 1.3264739513397217, "loss_mdlm": 1.2728101015090942, "loss_lm": 0.5366389155387878, "loss_ct": 0.0, "lr": 1.4127827566342864e-05, "gnorm": 0.531493604183197, "bridge_gate_avg": 1.008579154809316, "bridge_out_proj_avg": 0.018086757510900497, "elapsed_s": 1261.7564461231232}
|
| 19 |
+
{"step": 3600, "loss": 1.3899680376052856, "loss_mdlm": 1.3003880977630615, "loss_lm": 0.8957993984222412, "loss_ct": 0.0, "lr": 1.2395277334996045e-05, "gnorm": 0.6250145435333252, "bridge_gate_avg": 1.0086683829625447, "bridge_out_proj_avg": 0.01808725328495105, "elapsed_s": 1335.909040927887}
|
| 20 |
+
{"step": 3800, "loss": 1.397671103477478, "loss_mdlm": 1.3297935724258423, "loss_lm": 0.6787751317024231, "loss_ct": 0.0, "lr": 1.069795150933365e-05, "gnorm": 0.7216941118240356, "bridge_gate_avg": 1.0087865392367046, "bridge_out_proj_avg": 0.0180880402525266, "elapsed_s": 1409.3257946968079}
|
| 21 |
+
{"step": 4000, "loss": 0.8670775890350342, "loss_mdlm": 0.8144523501396179, "loss_lm": 0.526252269744873, "loss_ct": 0.0, "lr": 9.058803509412647e-06, "gnorm": 0.5033635497093201, "bridge_gate_avg": 1.0088178118069966, "bridge_out_proj_avg": 0.018088208821912605, "elapsed_s": 1483.1837995052338}
|
| 22 |
+
{"step": 4200, "loss": 1.185857892036438, "loss_mdlm": 1.1355332136154175, "loss_lm": 0.503247082233429, "loss_ct": 0.0, "lr": 7.500000000000004e-06, "gnorm": 0.5133223533630371, "bridge_gate_avg": 1.008887767791748, "bridge_out_proj_avg": 0.018088577315211296, "elapsed_s": 1557.4427886009216}
|
| 23 |
+
{"step": 4400, "loss": 1.2576298713684082, "loss_mdlm": 1.2048470973968506, "loss_lm": 0.5278276801109314, "loss_ct": 0.0, "lr": 6.0426211244582105e-06, "gnorm": 0.5270090103149414, "bridge_gate_avg": 1.008905827999115, "bridge_out_proj_avg": 0.018088790277640026, "elapsed_s": 1631.331220149994}
|
| 24 |
+
{"step": 4600, "loss": 1.3451511859893799, "loss_mdlm": 1.2928588390350342, "loss_lm": 0.5229232311248779, "loss_ct": 0.0, "lr": 4.706375431968998e-06, "gnorm": 0.4760102331638336, "bridge_gate_avg": 1.008898397286733, "bridge_out_proj_avg": 0.018088822873930138, "elapsed_s": 1705.6235630512238}
|
| 25 |
+
{"step": 4800, "loss": 1.3656636476516724, "loss_mdlm": 1.2990772724151611, "loss_lm": 0.6658639907836914, "loss_ct": 0.0, "lr": 3.5093333532153316e-06, "gnorm": 0.5504053831100464, "bridge_gate_avg": 1.0089142719904582, "bridge_out_proj_avg": 0.01808875830223163, "elapsed_s": 1779.7601804733276}
|
| 26 |
+
{"step": 5000, "loss": 1.3441420793533325, "loss_mdlm": 1.2955025434494019, "loss_lm": 0.48639559745788574, "loss_ct": 0.0, "lr": 2.467682828805956e-06, "gnorm": 0.44995608925819397, "bridge_gate_avg": 1.0089120268821716, "bridge_out_proj_avg": 0.01808877754956484, "elapsed_s": 1853.671347618103}
|
| 27 |
+
{"step": 5200, "loss": 1.3686158657073975, "loss_mdlm": 1.3124985694885254, "loss_lm": 0.5611728429794312, "loss_ct": 0.0, "lr": 1.5955103951488177e-06, "gnorm": 0.6221829056739807, "bridge_gate_avg": 1.0089394648869832, "bridge_out_proj_avg": 0.01808895884702603, "elapsed_s": 1927.0654735565186}
|
| 28 |
+
{"step": 5400, "loss": 1.3672200441360474, "loss_mdlm": 1.3146119117736816, "loss_lm": 0.5260812044143677, "loss_ct": 0.0, "lr": 9.046106882113753e-07, "gnorm": 0.5715479850769043, "bridge_gate_avg": 1.0089434782663982, "bridge_out_proj_avg": 0.018089016278584797, "elapsed_s": 2001.1912295818329}
|
| 29 |
+
{"step": 5600, "loss": 1.4277749061584473, "loss_mdlm": 1.3640351295471191, "loss_lm": 0.637397825717926, "loss_ct": 0.0, "lr": 4.043269413026429e-07, "gnorm": 0.5203056931495667, "bridge_gate_avg": 1.0089505712191265, "bridge_out_proj_avg": 0.018089072468380134, "elapsed_s": 2075.0510442256927}
|
| 30 |
+
{"step": 5800, "loss": 1.3556466102600098, "loss_mdlm": 1.302357792854309, "loss_lm": 0.5328884720802307, "loss_ct": 0.0, "lr": 1.0142463387085465e-07, "gnorm": 0.5163808465003967, "bridge_gate_avg": 1.0089513858159382, "bridge_out_proj_avg": 0.018089083023369312, "elapsed_s": 2149.250453710556}
|