|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 5.0, |
|
"eval_steps": 100, |
|
"global_step": 1315, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 4.0682622539611835, |
|
"learning_rate": 5e-06, |
|
"loss": 4.2205, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 4.312709224854891, |
|
"learning_rate": 1e-05, |
|
"loss": 4.2858, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.6601980442252438, |
|
"learning_rate": 1.5e-05, |
|
"loss": 4.1264, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.812299442715205, |
|
"learning_rate": 2e-05, |
|
"loss": 4.302, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.116526134864302, |
|
"learning_rate": 2.5e-05, |
|
"loss": 4.1039, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.6960273710433897, |
|
"learning_rate": 3e-05, |
|
"loss": 3.7882, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.511832693580313, |
|
"learning_rate": 3.5e-05, |
|
"loss": 3.4971, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 1.9433322085763716, |
|
"learning_rate": 4e-05, |
|
"loss": 3.1992, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 1.6894476126655664, |
|
"learning_rate": 4.5e-05, |
|
"loss": 3.0814, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 1.073675333188434, |
|
"learning_rate": 5e-05, |
|
"loss": 2.8756, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 1.4627544436421924, |
|
"learning_rate": 5.500000000000001e-05, |
|
"loss": 2.7048, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 1.99973700742081, |
|
"learning_rate": 6e-05, |
|
"loss": 2.6456, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 1.6172270630421473, |
|
"learning_rate": 6.500000000000001e-05, |
|
"loss": 2.6635, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 1.1295069230502908, |
|
"learning_rate": 7e-05, |
|
"loss": 2.59, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 1.2649325235735724, |
|
"learning_rate": 7.500000000000001e-05, |
|
"loss": 2.44, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 1.1789342809576489, |
|
"learning_rate": 8e-05, |
|
"loss": 2.5202, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.7081838083903702, |
|
"learning_rate": 8.5e-05, |
|
"loss": 2.367, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.7108144063965985, |
|
"learning_rate": 9e-05, |
|
"loss": 2.4458, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.6609440232385762, |
|
"learning_rate": 9.5e-05, |
|
"loss": 2.3898, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.5625207480279918, |
|
"learning_rate": 0.0001, |
|
"loss": 2.3829, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.6335176424475211, |
|
"learning_rate": 0.000105, |
|
"loss": 2.3701, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.5478182407846386, |
|
"learning_rate": 0.00011000000000000002, |
|
"loss": 2.3466, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.5542241828994607, |
|
"learning_rate": 0.00011499999999999999, |
|
"loss": 2.3094, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.5391779694945925, |
|
"learning_rate": 0.00012, |
|
"loss": 2.2829, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.5560267338571905, |
|
"learning_rate": 0.000125, |
|
"loss": 2.3031, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.5899065231096706, |
|
"learning_rate": 0.00013000000000000002, |
|
"loss": 2.3218, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.5822360971814909, |
|
"learning_rate": 0.00013500000000000003, |
|
"loss": 2.2678, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.5713806277891299, |
|
"learning_rate": 0.00014, |
|
"loss": 2.2085, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.5778957318063205, |
|
"learning_rate": 0.000145, |
|
"loss": 2.1972, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.6239096985024343, |
|
"learning_rate": 0.00015000000000000001, |
|
"loss": 2.157, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.664987114969562, |
|
"learning_rate": 0.000155, |
|
"loss": 2.144, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.6602627781365719, |
|
"learning_rate": 0.00016, |
|
"loss": 2.1819, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.6953248375158279, |
|
"learning_rate": 0.000165, |
|
"loss": 2.0508, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.750259033408061, |
|
"learning_rate": 0.00017, |
|
"loss": 2.129, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.6643380168708766, |
|
"learning_rate": 0.000175, |
|
"loss": 2.014, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.8486382402556593, |
|
"learning_rate": 0.00018, |
|
"loss": 1.9433, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.7414001872340891, |
|
"learning_rate": 0.00018500000000000002, |
|
"loss": 2.0308, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.7295050222316729, |
|
"learning_rate": 0.00019, |
|
"loss": 1.9284, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.7235284135505695, |
|
"learning_rate": 0.000195, |
|
"loss": 1.8847, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.753068728806512, |
|
"learning_rate": 0.0002, |
|
"loss": 1.8865, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.7560315372185343, |
|
"learning_rate": 0.00019999969643677332, |
|
"loss": 1.9206, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.7536264695992508, |
|
"learning_rate": 0.00019999878574893627, |
|
"loss": 1.8679, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.7688437635155836, |
|
"learning_rate": 0.0001999972679420179, |
|
"loss": 1.8956, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.7269625532226869, |
|
"learning_rate": 0.0001999951430252332, |
|
"loss": 1.7802, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.7518593665603851, |
|
"learning_rate": 0.00019999241101148306, |
|
"loss": 1.8219, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.8030012654181582, |
|
"learning_rate": 0.00019998907191735434, |
|
"loss": 1.7884, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.7431537767433151, |
|
"learning_rate": 0.00019998512576311953, |
|
"loss": 1.7157, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.8789668942389129, |
|
"learning_rate": 0.00019998057257273675, |
|
"loss": 1.705, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.8821830829681667, |
|
"learning_rate": 0.00019997541237384966, |
|
"loss": 1.7197, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.7827120449376896, |
|
"learning_rate": 0.0001999696451977872, |
|
"loss": 1.744, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.7824589333224853, |
|
"learning_rate": 0.00019996327107956333, |
|
"loss": 1.5894, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.805173848765375, |
|
"learning_rate": 0.00019995629005787713, |
|
"loss": 1.764, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.8408625701287021, |
|
"learning_rate": 0.00019994870217511217, |
|
"loss": 1.7377, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.7546347988189869, |
|
"learning_rate": 0.0001999405074773365, |
|
"loss": 1.7158, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.821101064412719, |
|
"learning_rate": 0.0001999317060143023, |
|
"loss": 1.5648, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.7901726402638417, |
|
"learning_rate": 0.00019992229783944557, |
|
"loss": 1.5496, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.8488465994765656, |
|
"learning_rate": 0.00019991228300988585, |
|
"loss": 1.5493, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.8859226699933315, |
|
"learning_rate": 0.0001999016615864258, |
|
"loss": 1.4703, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.9572611655447181, |
|
"learning_rate": 0.0001998904336335509, |
|
"loss": 1.5522, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.8654728304283881, |
|
"learning_rate": 0.00019987859921942903, |
|
"loss": 1.4136, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.7903035834496105, |
|
"learning_rate": 0.00019986615841591002, |
|
"loss": 1.5446, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.8146733074382629, |
|
"learning_rate": 0.0001998531112985253, |
|
"loss": 1.4741, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.8150030052550084, |
|
"learning_rate": 0.00019983945794648734, |
|
"loss": 1.4877, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.8980109486617155, |
|
"learning_rate": 0.00019982519844268933, |
|
"loss": 1.4981, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.9375060640657445, |
|
"learning_rate": 0.00019981033287370443, |
|
"loss": 1.5115, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.8164746637962466, |
|
"learning_rate": 0.00019979486132978545, |
|
"loss": 1.4431, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.8458741113281569, |
|
"learning_rate": 0.0001997787839048642, |
|
"loss": 1.3408, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.8444471080538805, |
|
"learning_rate": 0.00019976210069655104, |
|
"loss": 1.3728, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.9081815525327502, |
|
"learning_rate": 0.0001997448118061341, |
|
"loss": 1.3964, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.9703756561572067, |
|
"learning_rate": 0.00019972691733857883, |
|
"loss": 1.2744, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.8300846731216797, |
|
"learning_rate": 0.00019970841740252725, |
|
"loss": 1.201, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.8970681814115683, |
|
"learning_rate": 0.00019968931211029734, |
|
"loss": 1.3049, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.9203036618648276, |
|
"learning_rate": 0.00019966960157788248, |
|
"loss": 1.3423, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.8554417095674692, |
|
"learning_rate": 0.00019964928592495045, |
|
"loss": 1.2076, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.8693522008551818, |
|
"learning_rate": 0.00019962836527484296, |
|
"loss": 1.2388, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.8955501483204824, |
|
"learning_rate": 0.0001996068397545748, |
|
"loss": 1.2769, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.8972853945681226, |
|
"learning_rate": 0.00019958470949483318, |
|
"loss": 1.2043, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.9200733943459781, |
|
"learning_rate": 0.00019956197462997667, |
|
"loss": 1.1127, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 1.046096681591468, |
|
"learning_rate": 0.00019953863529803466, |
|
"loss": 1.1452, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 1.0188934085587635, |
|
"learning_rate": 0.00019951469164070646, |
|
"loss": 1.1164, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.8178106254313157, |
|
"learning_rate": 0.00019949014380336028, |
|
"loss": 1.0532, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.7925775197670102, |
|
"learning_rate": 0.00019946499193503262, |
|
"loss": 1.0213, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.8643770007688552, |
|
"learning_rate": 0.000199439236188427, |
|
"loss": 1.204, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.8294076531102257, |
|
"learning_rate": 0.0001994128767199135, |
|
"loss": 1.1476, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.8800850098334678, |
|
"learning_rate": 0.0001993859136895274, |
|
"loss": 1.0514, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.9129898809803545, |
|
"learning_rate": 0.0001993583472609683, |
|
"loss": 1.0532, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.9139962309722894, |
|
"learning_rate": 0.00019933017760159937, |
|
"loss": 1.116, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.8049163381340136, |
|
"learning_rate": 0.00019930140488244602, |
|
"loss": 1.0375, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.7506103786806886, |
|
"learning_rate": 0.0001992720292781951, |
|
"loss": 0.9954, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.8494376985530913, |
|
"learning_rate": 0.0001992420509671936, |
|
"loss": 1.011, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 1.0838465138358377, |
|
"learning_rate": 0.0001992114701314478, |
|
"loss": 1.1547, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.7780322978386432, |
|
"learning_rate": 0.00019918028695662207, |
|
"loss": 0.8977, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.7445309005650187, |
|
"learning_rate": 0.00019914850163203768, |
|
"loss": 0.9499, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.75618752117177, |
|
"learning_rate": 0.00019911611435067172, |
|
"loss": 0.9058, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.922514343070264, |
|
"learning_rate": 0.00019908312530915603, |
|
"loss": 0.9812, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.8335423463861146, |
|
"learning_rate": 0.00019904953470777575, |
|
"loss": 0.9237, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.8569297364051901, |
|
"learning_rate": 0.0001990153427504683, |
|
"loss": 0.9101, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.8431577631707102, |
|
"learning_rate": 0.00019898054964482214, |
|
"loss": 0.8826, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.8393428609756466, |
|
"learning_rate": 0.00019894515560207537, |
|
"loss": 0.9449, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.8009262477457554, |
|
"learning_rate": 0.0001989091608371146, |
|
"loss": 0.8172, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_blimp_filtered_avg": 0.7153731343283583, |
|
"eval_blimp_filtered_std": 0.004995278040149823, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_blimp_supplement_avg": 0.8275862068965517, |
|
"eval_blimp_supplement_std": 0.016574057417324883, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_vqa_filtered_avg": 0.5, |
|
"eval_vqa_filtered_std": 0.050251890762960605, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_winoground_filtered_avg": 0.64, |
|
"eval_winoground_filtered_std": 0.048241815132442176, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.7731939394783305, |
|
"learning_rate": 0.0001988725655684736, |
|
"loss": 0.841, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.9253945491288401, |
|
"learning_rate": 0.00019883537001833188, |
|
"loss": 0.9227, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.9312582898914622, |
|
"learning_rate": 0.0001987975744125135, |
|
"loss": 0.9283, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.781149897014019, |
|
"learning_rate": 0.00019875917898048558, |
|
"loss": 0.7902, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.8132523662925432, |
|
"learning_rate": 0.0001987201839553569, |
|
"loss": 0.8595, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.8368435917049845, |
|
"learning_rate": 0.00019868058957387663, |
|
"loss": 0.8643, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.7460582173754553, |
|
"learning_rate": 0.00019864039607643273, |
|
"loss": 0.7484, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.7789594633508866, |
|
"learning_rate": 0.0001985996037070505, |
|
"loss": 0.7889, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.8428485721906823, |
|
"learning_rate": 0.00019855821271339125, |
|
"loss": 0.838, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.8694838248366092, |
|
"learning_rate": 0.00019851622334675066, |
|
"loss": 0.77, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.8994640080800599, |
|
"learning_rate": 0.00019847363586205727, |
|
"loss": 0.7702, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.9931271763497148, |
|
"learning_rate": 0.00019843045051787096, |
|
"loss": 0.8737, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.8186757528083024, |
|
"learning_rate": 0.00019838666757638135, |
|
"loss": 0.7276, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.7976246474968561, |
|
"learning_rate": 0.0001983422873034063, |
|
"loss": 0.7967, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.7725738117793295, |
|
"learning_rate": 0.0001982973099683902, |
|
"loss": 0.8214, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.7497131152895721, |
|
"learning_rate": 0.00019825173584440232, |
|
"loss": 0.7069, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.8096175366747367, |
|
"learning_rate": 0.0001982055652081352, |
|
"loss": 0.699, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.8131636775412501, |
|
"learning_rate": 0.00019815879833990304, |
|
"loss": 0.7479, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.8103259093372099, |
|
"learning_rate": 0.00019811143552363983, |
|
"loss": 0.7013, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.8280999953040724, |
|
"learning_rate": 0.00019806347704689778, |
|
"loss": 0.6887, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.8116880398114632, |
|
"learning_rate": 0.00019801492320084546, |
|
"loss": 0.7934, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.8303791216043742, |
|
"learning_rate": 0.00019796577428026616, |
|
"loss": 0.8011, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.7619123156871999, |
|
"learning_rate": 0.00019791603058355595, |
|
"loss": 0.7217, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.7733520932459683, |
|
"learning_rate": 0.00019786569241272197, |
|
"loss": 0.6981, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.7846699948014345, |
|
"learning_rate": 0.00019781476007338058, |
|
"loss": 0.7401, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.7963675285315092, |
|
"learning_rate": 0.00019776323387475547, |
|
"loss": 0.705, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.7856922504610907, |
|
"learning_rate": 0.00019771111412967583, |
|
"loss": 0.6999, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.831698101406348, |
|
"learning_rate": 0.0001976584011545744, |
|
"loss": 0.7021, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.8387219512744284, |
|
"learning_rate": 0.00019760509526948566, |
|
"loss": 0.7296, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.8945842296083665, |
|
"learning_rate": 0.00019755119679804367, |
|
"loss": 0.7529, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.7498731306068172, |
|
"learning_rate": 0.00019749670606748033, |
|
"loss": 0.6911, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.7247722560830141, |
|
"learning_rate": 0.0001974416234086233, |
|
"loss": 0.6402, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 0.7627410009794175, |
|
"learning_rate": 0.00019738594915589397, |
|
"loss": 0.6702, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 0.7772008131178036, |
|
"learning_rate": 0.00019732968364730545, |
|
"loss": 0.6963, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 0.8275796285376315, |
|
"learning_rate": 0.00019727282722446047, |
|
"loss": 0.5929, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.7831120443420706, |
|
"learning_rate": 0.0001972153802325495, |
|
"loss": 0.6179, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.8292277458348148, |
|
"learning_rate": 0.0001971573430203484, |
|
"loss": 0.6375, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.8340476959630893, |
|
"learning_rate": 0.00019709871594021642, |
|
"loss": 0.6766, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.7994798406256604, |
|
"learning_rate": 0.00019703949934809408, |
|
"loss": 0.6239, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.7425672818805296, |
|
"learning_rate": 0.00019697969360350098, |
|
"loss": 0.6334, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 0.7443030221681443, |
|
"learning_rate": 0.00019691929906953356, |
|
"loss": 0.6278, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 0.7858836231557178, |
|
"learning_rate": 0.0001968583161128631, |
|
"loss": 0.6494, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 0.7748084569986792, |
|
"learning_rate": 0.00019679674510373325, |
|
"loss": 0.6408, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.7352594538958535, |
|
"learning_rate": 0.00019673458641595784, |
|
"loss": 0.54, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.9565541632170729, |
|
"learning_rate": 0.00019667184042691875, |
|
"loss": 0.6669, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.8159705350255112, |
|
"learning_rate": 0.00019660850751756348, |
|
"loss": 0.549, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.853659410491719, |
|
"learning_rate": 0.00019654458807240283, |
|
"loss": 0.703, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.7233148520384484, |
|
"learning_rate": 0.0001964800824795087, |
|
"loss": 0.5607, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.8017084720492917, |
|
"learning_rate": 0.00019641499113051157, |
|
"loss": 0.6522, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.6987528025960802, |
|
"learning_rate": 0.00019634931442059832, |
|
"loss": 0.5149, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.8039264184533087, |
|
"learning_rate": 0.00019628305274850956, |
|
"loss": 0.7189, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 0.795017894232834, |
|
"learning_rate": 0.00019621620651653744, |
|
"loss": 0.6096, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 0.7625532547573897, |
|
"learning_rate": 0.00019614877613052312, |
|
"loss": 0.583, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.8479800597198418, |
|
"learning_rate": 0.00019608076199985433, |
|
"loss": 0.5841, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.7984666488345297, |
|
"learning_rate": 0.00019601216453746283, |
|
"loss": 0.617, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.7291592602065788, |
|
"learning_rate": 0.00019594298415982194, |
|
"loss": 0.5409, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.7444160049758751, |
|
"learning_rate": 0.0001958732212869441, |
|
"loss": 0.5235, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.7698434281876576, |
|
"learning_rate": 0.00019580287634237808, |
|
"loss": 0.518, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.658666402238984, |
|
"learning_rate": 0.00019573194975320673, |
|
"loss": 0.4665, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.9088095098968566, |
|
"learning_rate": 0.0001956604419500441, |
|
"loss": 0.719, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.6790919494296814, |
|
"learning_rate": 0.00019558835336703294, |
|
"loss": 0.5317, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.7461814218746576, |
|
"learning_rate": 0.00019551568444184215, |
|
"loss": 0.5383, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.750432430440515, |
|
"learning_rate": 0.00019544243561566403, |
|
"loss": 0.5389, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.7472033806009103, |
|
"learning_rate": 0.00019536860733321152, |
|
"loss": 0.5066, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.7137901167042525, |
|
"learning_rate": 0.00019529420004271567, |
|
"loss": 0.4392, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.8491528440848178, |
|
"learning_rate": 0.00019521921419592283, |
|
"loss": 0.4803, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.888429079441991, |
|
"learning_rate": 0.0001951436502480919, |
|
"loss": 0.4945, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.7568924214482684, |
|
"learning_rate": 0.00019506750865799162, |
|
"loss": 0.5297, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.772439312971248, |
|
"learning_rate": 0.0001949907898878977, |
|
"loss": 0.5506, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.7127854034612457, |
|
"learning_rate": 0.00019491349440359015, |
|
"loss": 0.5152, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.6775965473007823, |
|
"learning_rate": 0.00019483562267435018, |
|
"loss": 0.4436, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.7085484218846827, |
|
"learning_rate": 0.00019475717517295778, |
|
"loss": 0.4726, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.6960992654258619, |
|
"learning_rate": 0.00019467815237568842, |
|
"loss": 0.4145, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.7739939422897306, |
|
"learning_rate": 0.00019459855476231043, |
|
"loss": 0.5368, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.7248885499578243, |
|
"learning_rate": 0.00019451838281608197, |
|
"loss": 0.4216, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.624271177758561, |
|
"learning_rate": 0.00019443763702374812, |
|
"loss": 0.3793, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.6547069254648431, |
|
"learning_rate": 0.00019435631787553795, |
|
"loss": 0.3958, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.7974662713630617, |
|
"learning_rate": 0.00019427442586516155, |
|
"loss": 0.4547, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.6979645264553271, |
|
"learning_rate": 0.00019419196148980693, |
|
"loss": 0.4269, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.7406062144284534, |
|
"learning_rate": 0.0001941089252501372, |
|
"loss": 0.4589, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 0.7721441112785269, |
|
"learning_rate": 0.00019402531765028722, |
|
"loss": 0.5091, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 0.6823443706634189, |
|
"learning_rate": 0.00019394113919786094, |
|
"loss": 0.4578, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.6296709496710926, |
|
"learning_rate": 0.00019385639040392803, |
|
"loss": 0.4192, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.7391260784858245, |
|
"learning_rate": 0.00019377107178302074, |
|
"loss": 0.5101, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.7622753386981794, |
|
"learning_rate": 0.00019368518385313107, |
|
"loss": 0.5035, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.7760535426376107, |
|
"learning_rate": 0.00019359872713570732, |
|
"loss": 0.4588, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.7543376381267045, |
|
"learning_rate": 0.00019351170215565114, |
|
"loss": 0.4783, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.761968062534956, |
|
"learning_rate": 0.00019342410944131415, |
|
"loss": 0.4452, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.6064844487399527, |
|
"learning_rate": 0.00019333594952449488, |
|
"loss": 0.3948, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.663418856322293, |
|
"learning_rate": 0.00019324722294043558, |
|
"loss": 0.3929, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.7628743041097833, |
|
"learning_rate": 0.00019315793022781877, |
|
"loss": 0.4686, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.7308155097864294, |
|
"learning_rate": 0.00019306807192876412, |
|
"loss": 0.4433, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.6266943285552069, |
|
"learning_rate": 0.00019297764858882514, |
|
"loss": 0.3778, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 0.7304858297996167, |
|
"learning_rate": 0.00019288666075698588, |
|
"loss": 0.4559, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 0.793956243255512, |
|
"learning_rate": 0.0001927951089856575, |
|
"loss": 0.5042, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.7521517748378912, |
|
"learning_rate": 0.00019270299383067498, |
|
"loss": 0.4876, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.6541305777620361, |
|
"learning_rate": 0.00019261031585129386, |
|
"loss": 0.4332, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.7459198000292951, |
|
"learning_rate": 0.0001925170756101867, |
|
"loss": 0.4742, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.7351417487869903, |
|
"learning_rate": 0.0001924232736734396, |
|
"loss": 0.4613, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.6880337228260249, |
|
"learning_rate": 0.00019232891061054895, |
|
"loss": 0.4507, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_blimp_filtered_avg": 0.7174626865671642, |
|
"eval_blimp_filtered_std": 0.004966500893623926, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_blimp_supplement_avg": 0.8448275862068966, |
|
"eval_blimp_supplement_std": 0.015878951045947127, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_vqa_filtered_avg": 0.49, |
|
"eval_vqa_filtered_std": 0.05024183937956912, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_winoground_filtered_avg": 0.68, |
|
"eval_winoground_filtered_std": 0.046882617226215034, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.6676123005810946, |
|
"learning_rate": 0.00019223398699441785, |
|
"loss": 0.4456, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.6441892256930322, |
|
"learning_rate": 0.00019213850340135276, |
|
"loss": 0.3842, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.7664915489965373, |
|
"learning_rate": 0.00019204246041105974, |
|
"loss": 0.4655, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.7694952589016031, |
|
"learning_rate": 0.0001919458586066412, |
|
"loss": 0.4042, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.8094634626332351, |
|
"learning_rate": 0.00019184869857459232, |
|
"loss": 0.4796, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.7250896027775108, |
|
"learning_rate": 0.00019175098090479727, |
|
"loss": 0.4493, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.695912278074156, |
|
"learning_rate": 0.00019165270619052595, |
|
"loss": 0.3892, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.6437397715709255, |
|
"learning_rate": 0.00019155387502843013, |
|
"loss": 0.3551, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.6624223376721595, |
|
"learning_rate": 0.00019145448801853989, |
|
"loss": 0.3824, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.7132818106598392, |
|
"learning_rate": 0.0001913545457642601, |
|
"loss": 0.443, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.5835412605472261, |
|
"learning_rate": 0.00019125404887236663, |
|
"loss": 0.2834, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.7082542269732592, |
|
"learning_rate": 0.00019115299795300267, |
|
"loss": 0.3691, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.8422813525765561, |
|
"learning_rate": 0.00019105139361967507, |
|
"loss": 0.4764, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.827672240696612, |
|
"learning_rate": 0.00019094923648925067, |
|
"loss": 0.4577, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.6311700380194617, |
|
"learning_rate": 0.00019084652718195238, |
|
"loss": 0.3641, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.6733244279961911, |
|
"learning_rate": 0.00019074326632135562, |
|
"loss": 0.3914, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.688661851686969, |
|
"learning_rate": 0.00019063945453438432, |
|
"loss": 0.4429, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.593793920110184, |
|
"learning_rate": 0.0001905350924513074, |
|
"loss": 0.3195, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.6401900884414997, |
|
"learning_rate": 0.0001904301807057346, |
|
"loss": 0.3838, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.6222158517510352, |
|
"learning_rate": 0.0001903247199346129, |
|
"loss": 0.3656, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.6346751167318555, |
|
"learning_rate": 0.00019021871077822255, |
|
"loss": 0.3423, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.6573146165491399, |
|
"learning_rate": 0.00019011215388017316, |
|
"loss": 0.3611, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.7506685575783819, |
|
"learning_rate": 0.00019000504988739986, |
|
"loss": 0.4136, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.7933264473843609, |
|
"learning_rate": 0.00018989739945015933, |
|
"loss": 0.4243, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.7056764465112211, |
|
"learning_rate": 0.00018978920322202582, |
|
"loss": 0.393, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.6440690477957951, |
|
"learning_rate": 0.00018968046185988732, |
|
"loss": 0.3339, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.7240338095005376, |
|
"learning_rate": 0.0001895711760239413, |
|
"loss": 0.4307, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 0.6482583513704759, |
|
"learning_rate": 0.00018946134637769105, |
|
"loss": 0.348, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 0.6314129612332641, |
|
"learning_rate": 0.00018935097358794144, |
|
"loss": 0.3594, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 0.6952900742432684, |
|
"learning_rate": 0.00018924005832479478, |
|
"loss": 0.4159, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.6973683572083686, |
|
"learning_rate": 0.00018912860126164707, |
|
"loss": 0.372, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.6228752985295244, |
|
"learning_rate": 0.00018901660307518354, |
|
"loss": 0.3182, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.6853492125246201, |
|
"learning_rate": 0.00018890406444537486, |
|
"loss": 0.3377, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.7047592541956417, |
|
"learning_rate": 0.0001887909860554728, |
|
"loss": 0.3938, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.7514455442163654, |
|
"learning_rate": 0.0001886773685920062, |
|
"loss": 0.4148, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.6032514516825037, |
|
"learning_rate": 0.00018856321274477673, |
|
"loss": 0.2853, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.6566649001522608, |
|
"learning_rate": 0.0001884485192068547, |
|
"loss": 0.333, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.5910387459158075, |
|
"learning_rate": 0.00018833328867457497, |
|
"loss": 0.2821, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.6135310255057351, |
|
"learning_rate": 0.00018821752184753252, |
|
"loss": 0.3528, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.6456839429891811, |
|
"learning_rate": 0.00018810121942857845, |
|
"loss": 0.3332, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 0.7053660522639559, |
|
"learning_rate": 0.0001879843821238155, |
|
"loss": 0.38, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 0.6185890160689334, |
|
"learning_rate": 0.00018786701064259383, |
|
"loss": 0.2801, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 0.7139325318031294, |
|
"learning_rate": 0.00018774910569750673, |
|
"loss": 0.3278, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 0.7529151224886904, |
|
"learning_rate": 0.00018763066800438636, |
|
"loss": 0.3163, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 0.7718980285573507, |
|
"learning_rate": 0.00018751169828229927, |
|
"loss": 0.3497, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.6881945450213135, |
|
"learning_rate": 0.00018739219725354212, |
|
"loss": 0.3393, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.6753691180440268, |
|
"learning_rate": 0.00018727216564363723, |
|
"loss": 0.3151, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.6015854239315479, |
|
"learning_rate": 0.00018715160418132832, |
|
"loss": 0.3239, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 0.5509912220014497, |
|
"learning_rate": 0.00018703051359857586, |
|
"loss": 0.2652, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 0.6581124878526092, |
|
"learning_rate": 0.00018690889463055283, |
|
"loss": 0.3287, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 0.6346317349050878, |
|
"learning_rate": 0.0001867867480156402, |
|
"loss": 0.2897, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.658112039759539, |
|
"learning_rate": 0.00018666407449542232, |
|
"loss": 0.2801, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.7964252151066129, |
|
"learning_rate": 0.0001865408748146826, |
|
"loss": 0.3569, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 0.7087027514563875, |
|
"learning_rate": 0.0001864171497213989, |
|
"loss": 0.319, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 0.5693751866666873, |
|
"learning_rate": 0.00018629289996673897, |
|
"loss": 0.296, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 0.4858712761873739, |
|
"learning_rate": 0.00018616812630505597, |
|
"loss": 0.2337, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.6605776129831655, |
|
"learning_rate": 0.0001860428294938838, |
|
"loss": 0.4049, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.5884323793209852, |
|
"learning_rate": 0.00018591701029393255, |
|
"loss": 0.2898, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.6234908079533132, |
|
"learning_rate": 0.00018579066946908384, |
|
"loss": 0.3411, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 0.5283455200292801, |
|
"learning_rate": 0.00018566380778638628, |
|
"loss": 0.2715, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 0.5257190836713216, |
|
"learning_rate": 0.00018553642601605068, |
|
"loss": 0.242, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.6940858344290759, |
|
"learning_rate": 0.00018540852493144545, |
|
"loss": 0.3354, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.6657062269261629, |
|
"learning_rate": 0.00018528010530909192, |
|
"loss": 0.3073, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.5838419940555095, |
|
"learning_rate": 0.00018515116792865957, |
|
"loss": 0.2293, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"grad_norm": 0.6175240728549981, |
|
"learning_rate": 0.00018502171357296144, |
|
"loss": 0.2266, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"grad_norm": 0.514362276581525, |
|
"learning_rate": 0.00018489174302794905, |
|
"loss": 0.2012, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"grad_norm": 0.5495577560324101, |
|
"learning_rate": 0.000184761257082708, |
|
"loss": 0.1992, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"grad_norm": 0.5803859857188628, |
|
"learning_rate": 0.000184630256529453, |
|
"loss": 0.2063, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"grad_norm": 0.6033165262234346, |
|
"learning_rate": 0.00018449874216352306, |
|
"loss": 0.1708, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"grad_norm": 0.6125263532068304, |
|
"learning_rate": 0.00018436671478337666, |
|
"loss": 0.1831, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"grad_norm": 0.5508763917497557, |
|
"learning_rate": 0.00018423417519058694, |
|
"loss": 0.1907, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"grad_norm": 0.6343768467745463, |
|
"learning_rate": 0.0001841011241898369, |
|
"loss": 0.2211, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 0.5592715854666213, |
|
"learning_rate": 0.0001839675625889143, |
|
"loss": 0.1835, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 0.6853195392487573, |
|
"learning_rate": 0.00018383349119870695, |
|
"loss": 0.2482, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 0.7078404397255201, |
|
"learning_rate": 0.00018369891083319778, |
|
"loss": 0.2346, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 0.5364140790780816, |
|
"learning_rate": 0.00018356382230945976, |
|
"loss": 0.2005, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 0.5699757158649345, |
|
"learning_rate": 0.00018342822644765104, |
|
"loss": 0.1952, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 0.533449428098461, |
|
"learning_rate": 0.00018329212407100994, |
|
"loss": 0.1827, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 0.5434329673812834, |
|
"learning_rate": 0.00018315551600585009, |
|
"loss": 0.1977, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 0.6086249429030556, |
|
"learning_rate": 0.00018301840308155507, |
|
"loss": 0.2075, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 0.5193552341089737, |
|
"learning_rate": 0.0001828807861305738, |
|
"loss": 0.1774, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 0.5899649673976222, |
|
"learning_rate": 0.00018274266598841517, |
|
"loss": 0.1935, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"grad_norm": 0.6261684110299474, |
|
"learning_rate": 0.0001826040434936431, |
|
"loss": 0.1879, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"grad_norm": 0.5177380809892513, |
|
"learning_rate": 0.0001824649194878714, |
|
"loss": 0.1605, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"grad_norm": 0.6856627216594803, |
|
"learning_rate": 0.00018232529481575872, |
|
"loss": 0.1959, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"grad_norm": 0.7111645847311474, |
|
"learning_rate": 0.00018218517032500344, |
|
"loss": 0.2339, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"grad_norm": 0.6073029325620858, |
|
"learning_rate": 0.00018204454686633834, |
|
"loss": 0.1954, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 0.5935505620907925, |
|
"learning_rate": 0.00018190342529352565, |
|
"loss": 0.1935, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 0.6542152124825684, |
|
"learning_rate": 0.0001817618064633518, |
|
"loss": 0.2287, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 0.5147785285870337, |
|
"learning_rate": 0.0001816196912356222, |
|
"loss": 0.182, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"grad_norm": 0.5727570935780749, |
|
"learning_rate": 0.00018147708047315587, |
|
"loss": 0.1815, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"grad_norm": 0.5657627178730809, |
|
"learning_rate": 0.00018133397504178057, |
|
"loss": 0.2132, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"grad_norm": 0.5662562121438198, |
|
"learning_rate": 0.00018119037581032724, |
|
"loss": 0.2056, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"grad_norm": 0.6099744881610398, |
|
"learning_rate": 0.00018104628365062477, |
|
"loss": 0.213, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"grad_norm": 0.5460632540608352, |
|
"learning_rate": 0.00018090169943749476, |
|
"loss": 0.162, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"grad_norm": 0.5391352126605721, |
|
"learning_rate": 0.00018075662404874626, |
|
"loss": 0.1893, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"grad_norm": 0.6494284594879935, |
|
"learning_rate": 0.00018061105836517024, |
|
"loss": 0.2291, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"grad_norm": 0.5004570177715079, |
|
"learning_rate": 0.00018046500327053463, |
|
"loss": 0.1541, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"grad_norm": 0.5918866837652731, |
|
"learning_rate": 0.0001803184596515784, |
|
"loss": 0.2188, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"grad_norm": 0.4814928700454864, |
|
"learning_rate": 0.00018017142839800668, |
|
"loss": 0.1371, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"eval_blimp_filtered_avg": 0.7256716417910448, |
|
"eval_blimp_filtered_std": 0.004928318952407523, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"eval_blimp_supplement_avg": 0.8405172413793104, |
|
"eval_blimp_supplement_std": 0.016405077514349695, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"eval_vqa_filtered_avg": 0.52, |
|
"eval_vqa_filtered_std": 0.05021167315686779, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"eval_winoground_filtered_avg": 0.65, |
|
"eval_winoground_filtered_std": 0.047937248544110196, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"grad_norm": 0.5140990035555452, |
|
"learning_rate": 0.0001800239104024851, |
|
"loss": 0.1533, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"grad_norm": 0.4933080278392121, |
|
"learning_rate": 0.0001798759065606345, |
|
"loss": 0.1556, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"grad_norm": 0.5882761858054979, |
|
"learning_rate": 0.00017972741777102523, |
|
"loss": 0.2151, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"grad_norm": 0.5476770873594954, |
|
"learning_rate": 0.00017957844493517213, |
|
"loss": 0.1809, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"grad_norm": 0.6421832624123264, |
|
"learning_rate": 0.0001794289889575286, |
|
"loss": 0.2046, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"grad_norm": 0.5179683138411204, |
|
"learning_rate": 0.0001792790507454815, |
|
"loss": 0.176, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"grad_norm": 0.604477016740788, |
|
"learning_rate": 0.00017912863120934534, |
|
"loss": 0.2174, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"grad_norm": 0.5803547980215563, |
|
"learning_rate": 0.00017897773126235688, |
|
"loss": 0.1599, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"grad_norm": 0.5391766451927411, |
|
"learning_rate": 0.0001788263518206697, |
|
"loss": 0.1867, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"grad_norm": 0.5008335310625588, |
|
"learning_rate": 0.00017867449380334834, |
|
"loss": 0.1543, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"grad_norm": 0.5344073359347221, |
|
"learning_rate": 0.00017852215813236305, |
|
"loss": 0.1836, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"grad_norm": 0.5560594943415273, |
|
"learning_rate": 0.000178369345732584, |
|
"loss": 0.1878, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"grad_norm": 0.4967512656076653, |
|
"learning_rate": 0.00017821605753177562, |
|
"loss": 0.1643, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"grad_norm": 0.4616678862036335, |
|
"learning_rate": 0.00017806229446059124, |
|
"loss": 0.1405, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 0.5366458032488048, |
|
"learning_rate": 0.00017790805745256704, |
|
"loss": 0.1563, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 0.5850965548297408, |
|
"learning_rate": 0.00017775334744411678, |
|
"loss": 0.1735, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"grad_norm": 0.6019045486967092, |
|
"learning_rate": 0.00017759816537452574, |
|
"loss": 0.1702, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"grad_norm": 0.5447191325941722, |
|
"learning_rate": 0.00017744251218594542, |
|
"loss": 0.1536, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"grad_norm": 0.5782504760607824, |
|
"learning_rate": 0.00017728638882338746, |
|
"loss": 0.1765, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"grad_norm": 0.6057705101997692, |
|
"learning_rate": 0.00017712979623471807, |
|
"loss": 0.1771, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"grad_norm": 0.6097931752718987, |
|
"learning_rate": 0.00017697273537065232, |
|
"loss": 0.2139, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"grad_norm": 0.56308892592465, |
|
"learning_rate": 0.00017681520718474823, |
|
"loss": 0.1959, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"grad_norm": 0.48304728057105417, |
|
"learning_rate": 0.00017665721263340113, |
|
"loss": 0.1541, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"grad_norm": 0.5046672087816175, |
|
"learning_rate": 0.0001764987526758377, |
|
"loss": 0.2057, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"grad_norm": 0.591812842303311, |
|
"learning_rate": 0.00017633982827411032, |
|
"loss": 0.2117, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"grad_norm": 0.5124492509013987, |
|
"learning_rate": 0.00017618044039309098, |
|
"loss": 0.1782, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"grad_norm": 0.49461512298625476, |
|
"learning_rate": 0.0001760205900004657, |
|
"loss": 0.1587, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 0.5072005711040857, |
|
"learning_rate": 0.00017586027806672857, |
|
"loss": 0.1551, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 0.7166883343661715, |
|
"learning_rate": 0.00017569950556517566, |
|
"loss": 0.2556, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 0.6256555159933375, |
|
"learning_rate": 0.00017553827347189938, |
|
"loss": 0.183, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"grad_norm": 0.533828362038465, |
|
"learning_rate": 0.00017537658276578247, |
|
"loss": 0.1606, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"grad_norm": 0.5661675325630594, |
|
"learning_rate": 0.00017521443442849188, |
|
"loss": 0.1789, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"grad_norm": 0.5680894670630955, |
|
"learning_rate": 0.00017505182944447316, |
|
"loss": 0.1879, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"grad_norm": 0.4817701078567901, |
|
"learning_rate": 0.00017488876880094413, |
|
"loss": 0.1521, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"grad_norm": 0.4821470961031985, |
|
"learning_rate": 0.0001747252534878891, |
|
"loss": 0.1463, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"grad_norm": 0.49328987347131154, |
|
"learning_rate": 0.0001745612844980528, |
|
"loss": 0.1717, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"grad_norm": 0.5124773897350241, |
|
"learning_rate": 0.00017439686282693436, |
|
"loss": 0.1664, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"grad_norm": 0.4947058789125862, |
|
"learning_rate": 0.00017423198947278117, |
|
"loss": 0.155, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"grad_norm": 0.49669905069620973, |
|
"learning_rate": 0.00017406666543658304, |
|
"loss": 0.1317, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"grad_norm": 0.5945246572487285, |
|
"learning_rate": 0.00017390089172206592, |
|
"loss": 0.179, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"grad_norm": 0.6088690966027199, |
|
"learning_rate": 0.00017373466933568588, |
|
"loss": 0.1688, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"grad_norm": 0.5504854313763279, |
|
"learning_rate": 0.00017356799928662297, |
|
"loss": 0.1409, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"grad_norm": 0.5616999197359607, |
|
"learning_rate": 0.00017340088258677522, |
|
"loss": 0.1611, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"grad_norm": 0.5630261456683686, |
|
"learning_rate": 0.00017323332025075223, |
|
"loss": 0.1424, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"grad_norm": 0.5286011853132182, |
|
"learning_rate": 0.00017306531329586933, |
|
"loss": 0.1435, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"grad_norm": 0.5614256677889038, |
|
"learning_rate": 0.00017289686274214118, |
|
"loss": 0.1717, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"grad_norm": 0.5877414440813399, |
|
"learning_rate": 0.00017272796961227563, |
|
"loss": 0.15, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"grad_norm": 0.5368624648164415, |
|
"learning_rate": 0.00017255863493166756, |
|
"loss": 0.1549, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"grad_norm": 0.5500707727315131, |
|
"learning_rate": 0.0001723888597283926, |
|
"loss": 0.175, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"grad_norm": 0.6833139983554208, |
|
"learning_rate": 0.00017221864503320092, |
|
"loss": 0.1678, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"grad_norm": 0.5282683935361436, |
|
"learning_rate": 0.00017204799187951105, |
|
"loss": 0.1464, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"grad_norm": 0.6220938804700495, |
|
"learning_rate": 0.00017187690130340328, |
|
"loss": 0.1936, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"grad_norm": 0.5368424100980813, |
|
"learning_rate": 0.00017170537434361386, |
|
"loss": 0.16, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"grad_norm": 0.4964367798749474, |
|
"learning_rate": 0.0001715334120415283, |
|
"loss": 0.1315, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"grad_norm": 0.5332125576256961, |
|
"learning_rate": 0.00017136101544117525, |
|
"loss": 0.1601, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"grad_norm": 0.6452241501826331, |
|
"learning_rate": 0.00017118818558922003, |
|
"loss": 0.1944, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"grad_norm": 0.5478001238846428, |
|
"learning_rate": 0.00017101492353495845, |
|
"loss": 0.1628, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"grad_norm": 0.5907872420921463, |
|
"learning_rate": 0.00017084123033031024, |
|
"loss": 0.1979, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"grad_norm": 0.5539786918398002, |
|
"learning_rate": 0.0001706671070298128, |
|
"loss": 0.1569, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"grad_norm": 0.504557687915929, |
|
"learning_rate": 0.00017049255469061474, |
|
"loss": 0.1676, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"grad_norm": 0.5193398413523059, |
|
"learning_rate": 0.00017031757437246947, |
|
"loss": 0.1535, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"grad_norm": 0.5089502238747983, |
|
"learning_rate": 0.00017014216713772884, |
|
"loss": 0.1609, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"grad_norm": 0.5090619290397491, |
|
"learning_rate": 0.00016996633405133655, |
|
"loss": 0.1601, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"grad_norm": 0.5294032162956256, |
|
"learning_rate": 0.00016979007618082175, |
|
"loss": 0.1416, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"grad_norm": 0.5376905990718213, |
|
"learning_rate": 0.0001696133945962927, |
|
"loss": 0.1704, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"grad_norm": 0.6016988384720686, |
|
"learning_rate": 0.0001694362903704299, |
|
"loss": 0.1724, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 0.6471404474003535, |
|
"learning_rate": 0.00016925876457848, |
|
"loss": 0.1893, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 0.5121222508453968, |
|
"learning_rate": 0.00016908081829824912, |
|
"loss": 0.1166, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 0.5997272693042293, |
|
"learning_rate": 0.0001689024526100961, |
|
"loss": 0.2107, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"grad_norm": 0.5636921436981437, |
|
"learning_rate": 0.00016872366859692627, |
|
"loss": 0.1805, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"grad_norm": 0.6126951187530087, |
|
"learning_rate": 0.00016854446734418466, |
|
"loss": 0.1914, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"grad_norm": 0.554597312718397, |
|
"learning_rate": 0.0001683648499398495, |
|
"loss": 0.1836, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"grad_norm": 0.5003434649301144, |
|
"learning_rate": 0.00016818481747442554, |
|
"loss": 0.1631, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"grad_norm": 0.5181592112650633, |
|
"learning_rate": 0.0001680043710409375, |
|
"loss": 0.1323, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"grad_norm": 0.5773215207200062, |
|
"learning_rate": 0.00016782351173492342, |
|
"loss": 0.2156, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"grad_norm": 0.46149339744192175, |
|
"learning_rate": 0.00016764224065442796, |
|
"loss": 0.1308, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"grad_norm": 0.5154009122145548, |
|
"learning_rate": 0.0001674605588999959, |
|
"loss": 0.1723, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"grad_norm": 0.5032409831088436, |
|
"learning_rate": 0.0001672784675746651, |
|
"loss": 0.1522, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"grad_norm": 0.5765338444046834, |
|
"learning_rate": 0.00016709596778396026, |
|
"loss": 0.1746, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"grad_norm": 0.6731025984083968, |
|
"learning_rate": 0.00016691306063588583, |
|
"loss": 0.2205, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"grad_norm": 0.4837466365127126, |
|
"learning_rate": 0.00016672974724091954, |
|
"loss": 0.1553, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"grad_norm": 0.5424268673951455, |
|
"learning_rate": 0.00016654602871200546, |
|
"loss": 0.1433, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"grad_norm": 0.5171465106257717, |
|
"learning_rate": 0.0001663619061645474, |
|
"loss": 0.1698, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"grad_norm": 0.5543423821478913, |
|
"learning_rate": 0.00016617738071640208, |
|
"loss": 0.1494, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"grad_norm": 0.5614170511875883, |
|
"learning_rate": 0.0001659924534878723, |
|
"loss": 0.1719, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"grad_norm": 0.5817955693629092, |
|
"learning_rate": 0.0001658071256017001, |
|
"loss": 0.1653, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"grad_norm": 0.5093939905856546, |
|
"learning_rate": 0.0001656213981830602, |
|
"loss": 0.1331, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"grad_norm": 0.530369374719384, |
|
"learning_rate": 0.00016543527235955282, |
|
"loss": 0.1703, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"grad_norm": 0.5540917573532339, |
|
"learning_rate": 0.00016524874926119717, |
|
"loss": 0.1801, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"grad_norm": 0.5189763553195834, |
|
"learning_rate": 0.0001650618300204242, |
|
"loss": 0.1526, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"grad_norm": 0.4919336835652238, |
|
"learning_rate": 0.00016487451577207018, |
|
"loss": 0.1251, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"grad_norm": 0.5303366245761384, |
|
"learning_rate": 0.00016468680765336936, |
|
"loss": 0.1504, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"grad_norm": 0.579006149147232, |
|
"learning_rate": 0.00016449870680394747, |
|
"loss": 0.2013, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 0.5698441995370652, |
|
"learning_rate": 0.0001643102143658145, |
|
"loss": 0.1494, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 0.45335914900667046, |
|
"learning_rate": 0.00016412133148335784, |
|
"loss": 0.1196, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"grad_norm": 0.500799551817792, |
|
"learning_rate": 0.0001639320593033355, |
|
"loss": 0.1576, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"grad_norm": 0.6003518072235265, |
|
"learning_rate": 0.000163742398974869, |
|
"loss": 0.1584, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"grad_norm": 0.6168365552101142, |
|
"learning_rate": 0.00016355235164943626, |
|
"loss": 0.2091, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"grad_norm": 0.5713474186129687, |
|
"learning_rate": 0.0001633619184808649, |
|
"loss": 0.1567, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"grad_norm": 0.5874214731593885, |
|
"learning_rate": 0.0001631711006253251, |
|
"loss": 0.1574, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"eval_blimp_filtered_avg": 0.7201492537313433, |
|
"eval_blimp_filtered_std": 0.004978357158642791, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"eval_blimp_supplement_avg": 0.8297413793103449, |
|
"eval_blimp_supplement_std": 0.01657166693464671, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"eval_vqa_filtered_avg": 0.49, |
|
"eval_vqa_filtered_std": 0.05024183937956912, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"eval_winoground_filtered_avg": 0.61, |
|
"eval_winoground_filtered_std": 0.04902071300001975, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"grad_norm": 0.6042802091257128, |
|
"learning_rate": 0.00016297989924132252, |
|
"loss": 0.1818, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"grad_norm": 0.47364098207681, |
|
"learning_rate": 0.00016278831548969134, |
|
"loss": 0.1328, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"grad_norm": 0.5471235998749361, |
|
"learning_rate": 0.00016259635053358717, |
|
"loss": 0.1507, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"grad_norm": 0.5880254357081499, |
|
"learning_rate": 0.00016240400553848007, |
|
"loss": 0.1797, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"grad_norm": 0.4946694660134096, |
|
"learning_rate": 0.0001622112816721474, |
|
"loss": 0.1445, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"grad_norm": 0.5391138081584693, |
|
"learning_rate": 0.0001620181801046667, |
|
"loss": 0.1592, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"grad_norm": 0.4600495974038577, |
|
"learning_rate": 0.00016182470200840868, |
|
"loss": 0.1255, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"grad_norm": 0.4600858105564683, |
|
"learning_rate": 0.00016163084855803006, |
|
"loss": 0.1274, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"grad_norm": 0.4869984031862606, |
|
"learning_rate": 0.00016143662093046638, |
|
"loss": 0.1312, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"grad_norm": 0.5760667761271687, |
|
"learning_rate": 0.000161242020304925, |
|
"loss": 0.1956, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"grad_norm": 0.4914141963984456, |
|
"learning_rate": 0.0001610470478628778, |
|
"loss": 0.1305, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"grad_norm": 0.5844115229455148, |
|
"learning_rate": 0.00016085170478805395, |
|
"loss": 0.1767, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"grad_norm": 0.5609976894365024, |
|
"learning_rate": 0.00016065599226643303, |
|
"loss": 0.1568, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"grad_norm": 0.5199227181198156, |
|
"learning_rate": 0.0001604599114862375, |
|
"loss": 0.1358, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"grad_norm": 0.580620607781089, |
|
"learning_rate": 0.00016026346363792567, |
|
"loss": 0.1568, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"grad_norm": 0.5406448317458065, |
|
"learning_rate": 0.00016006664991418434, |
|
"loss": 0.1398, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"grad_norm": 0.44523867426706976, |
|
"learning_rate": 0.00015986947150992172, |
|
"loss": 0.1355, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"grad_norm": 0.5115419137782133, |
|
"learning_rate": 0.0001596719296222601, |
|
"loss": 0.1468, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"grad_norm": 0.5985959567307599, |
|
"learning_rate": 0.0001594740254505285, |
|
"loss": 0.1378, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 0.6222671216404903, |
|
"learning_rate": 0.0001592757601962555, |
|
"loss": 0.184, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 0.5413250088547485, |
|
"learning_rate": 0.00015907713506316192, |
|
"loss": 0.1758, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 0.5382013654400807, |
|
"learning_rate": 0.00015887815125715344, |
|
"loss": 0.1509, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"grad_norm": 0.48249026147532587, |
|
"learning_rate": 0.00015867880998631347, |
|
"loss": 0.1183, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"grad_norm": 0.5223760532607825, |
|
"learning_rate": 0.0001584791124608955, |
|
"loss": 0.1446, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"grad_norm": 0.42511278258760743, |
|
"learning_rate": 0.0001582790598933161, |
|
"loss": 0.1085, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"grad_norm": 0.5280124307363728, |
|
"learning_rate": 0.00015807865349814733, |
|
"loss": 0.1211, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"grad_norm": 0.44071272749150264, |
|
"learning_rate": 0.00015787789449210938, |
|
"loss": 0.1199, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"grad_norm": 0.5672982836442205, |
|
"learning_rate": 0.0001576767840940633, |
|
"loss": 0.1653, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"grad_norm": 0.5407137471822783, |
|
"learning_rate": 0.00015747532352500357, |
|
"loss": 0.1459, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"grad_norm": 0.47958987114022433, |
|
"learning_rate": 0.00015727351400805052, |
|
"loss": 0.122, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"grad_norm": 0.4949008797838833, |
|
"learning_rate": 0.0001570713567684432, |
|
"loss": 0.1476, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"grad_norm": 0.5706163209211994, |
|
"learning_rate": 0.0001568688530335316, |
|
"loss": 0.1859, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"grad_norm": 0.48156623541863475, |
|
"learning_rate": 0.0001566660040327695, |
|
"loss": 0.1354, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"grad_norm": 0.48592626955333434, |
|
"learning_rate": 0.00015646281099770682, |
|
"loss": 0.1287, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"grad_norm": 0.42812001499236796, |
|
"learning_rate": 0.00015625927516198232, |
|
"loss": 0.1041, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"grad_norm": 0.5351459013098889, |
|
"learning_rate": 0.0001560553977613158, |
|
"loss": 0.1321, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"grad_norm": 0.603892324255596, |
|
"learning_rate": 0.00015585118003350092, |
|
"loss": 0.1524, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"grad_norm": 0.5704162659549111, |
|
"learning_rate": 0.00015564662321839755, |
|
"loss": 0.1643, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"grad_norm": 0.5619464269808508, |
|
"learning_rate": 0.00015544172855792423, |
|
"loss": 0.1695, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"grad_norm": 0.5552857752448269, |
|
"learning_rate": 0.0001552364972960506, |
|
"loss": 0.1436, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"grad_norm": 0.4859084061375966, |
|
"learning_rate": 0.00015503093067878996, |
|
"loss": 0.1304, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"grad_norm": 0.44977231534740475, |
|
"learning_rate": 0.00015482502995419167, |
|
"loss": 0.1328, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"grad_norm": 0.4713213980355454, |
|
"learning_rate": 0.0001546187963723334, |
|
"loss": 0.1105, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"grad_norm": 0.5341277409726155, |
|
"learning_rate": 0.00015441223118531388, |
|
"loss": 0.1449, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"grad_norm": 0.5705390112616555, |
|
"learning_rate": 0.00015420533564724495, |
|
"loss": 0.1695, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"grad_norm": 0.4774790901030609, |
|
"learning_rate": 0.00015399811101424418, |
|
"loss": 0.1403, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"grad_norm": 0.5512200465464098, |
|
"learning_rate": 0.00015379055854442708, |
|
"loss": 0.1643, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"grad_norm": 0.5219733540585905, |
|
"learning_rate": 0.00015358267949789966, |
|
"loss": 0.1237, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"grad_norm": 0.5033089815712022, |
|
"learning_rate": 0.0001533744751367506, |
|
"loss": 0.1291, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"grad_norm": 0.4747834365201173, |
|
"learning_rate": 0.0001531659467250436, |
|
"loss": 0.1275, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"grad_norm": 0.40965665913605903, |
|
"learning_rate": 0.0001529570955288099, |
|
"loss": 0.1156, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"grad_norm": 0.440005816758666, |
|
"learning_rate": 0.00015274792281604028, |
|
"loss": 0.1099, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"grad_norm": 0.5579574679556536, |
|
"learning_rate": 0.00015253842985667762, |
|
"loss": 0.1456, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"grad_norm": 0.4959791770798946, |
|
"learning_rate": 0.0001523286179226091, |
|
"loss": 0.1468, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"grad_norm": 0.5066322640800116, |
|
"learning_rate": 0.0001521184882876585, |
|
"loss": 0.1085, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"grad_norm": 0.5297332606909233, |
|
"learning_rate": 0.00015190804222757845, |
|
"loss": 0.1476, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"grad_norm": 0.5830381345482916, |
|
"learning_rate": 0.00015169728102004256, |
|
"loss": 0.1424, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"grad_norm": 0.44997991084618094, |
|
"learning_rate": 0.00015148620594463794, |
|
"loss": 0.1068, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"grad_norm": 0.5459028855766525, |
|
"learning_rate": 0.00015127481828285718, |
|
"loss": 0.1673, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"grad_norm": 0.46924015811236747, |
|
"learning_rate": 0.0001510631193180907, |
|
"loss": 0.1136, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"grad_norm": 0.5011418751892371, |
|
"learning_rate": 0.00015085111033561895, |
|
"loss": 0.1154, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"grad_norm": 0.5328628999011582, |
|
"learning_rate": 0.00015063879262260446, |
|
"loss": 0.1397, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"grad_norm": 0.4964913122706592, |
|
"learning_rate": 0.00015042616746808435, |
|
"loss": 0.144, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"grad_norm": 0.501470133961365, |
|
"learning_rate": 0.00015021323616296213, |
|
"loss": 0.1379, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"grad_norm": 0.5269212199791002, |
|
"learning_rate": 0.00015000000000000001, |
|
"loss": 0.1311, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"grad_norm": 0.5049669772855443, |
|
"learning_rate": 0.00014978646027381123, |
|
"loss": 0.1196, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"grad_norm": 0.4525225552090921, |
|
"learning_rate": 0.00014957261828085191, |
|
"loss": 0.1141, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"grad_norm": 0.4790059846117726, |
|
"learning_rate": 0.0001493584753194134, |
|
"loss": 0.1028, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"grad_norm": 0.5832150044411881, |
|
"learning_rate": 0.00014914403268961426, |
|
"loss": 0.158, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"grad_norm": 0.522112830999454, |
|
"learning_rate": 0.00014892929169339235, |
|
"loss": 0.1459, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"grad_norm": 0.4887536825683872, |
|
"learning_rate": 0.00014871425363449718, |
|
"loss": 0.1263, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"grad_norm": 0.467502684654505, |
|
"learning_rate": 0.00014849891981848158, |
|
"loss": 0.1388, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 0.4546537818819024, |
|
"learning_rate": 0.0001482832915526942, |
|
"loss": 0.105, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 0.541602974136478, |
|
"learning_rate": 0.00014806737014627124, |
|
"loss": 0.1388, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"grad_norm": 0.5066261091776695, |
|
"learning_rate": 0.00014785115691012864, |
|
"loss": 0.1231, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"grad_norm": 0.6064880991698398, |
|
"learning_rate": 0.00014763465315695425, |
|
"loss": 0.173, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"grad_norm": 0.5259754097567955, |
|
"learning_rate": 0.00014741786020119955, |
|
"loss": 0.1383, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"grad_norm": 0.5335061292186741, |
|
"learning_rate": 0.00014720077935907196, |
|
"loss": 0.1671, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"grad_norm": 0.43421172131647107, |
|
"learning_rate": 0.00014698341194852664, |
|
"loss": 0.1207, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"grad_norm": 0.4476149707237085, |
|
"learning_rate": 0.00014676575928925867, |
|
"loss": 0.1103, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"grad_norm": 0.49116627439944716, |
|
"learning_rate": 0.00014654782270269497, |
|
"loss": 0.1195, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"grad_norm": 0.44289326248169675, |
|
"learning_rate": 0.00014632960351198618, |
|
"loss": 0.1217, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"grad_norm": 0.4022818703819969, |
|
"learning_rate": 0.00014611110304199872, |
|
"loss": 0.0868, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"grad_norm": 0.43714254165230704, |
|
"learning_rate": 0.00014589232261930674, |
|
"loss": 0.1062, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"grad_norm": 0.4499910364889525, |
|
"learning_rate": 0.00014567326357218407, |
|
"loss": 0.1115, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"grad_norm": 0.4812264446699121, |
|
"learning_rate": 0.00014545392723059616, |
|
"loss": 0.1272, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"grad_norm": 0.5466847826869639, |
|
"learning_rate": 0.0001452343149261919, |
|
"loss": 0.1387, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"grad_norm": 0.4671789379009673, |
|
"learning_rate": 0.00014501442799229572, |
|
"loss": 0.1227, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"grad_norm": 0.4168568601736311, |
|
"learning_rate": 0.00014479426776389936, |
|
"loss": 0.0955, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"grad_norm": 0.4236347478067532, |
|
"learning_rate": 0.00014457383557765386, |
|
"loss": 0.1226, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"grad_norm": 0.49129515437526694, |
|
"learning_rate": 0.00014435313277186125, |
|
"loss": 0.1411, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"grad_norm": 0.4395945017572998, |
|
"learning_rate": 0.00014413216068646668, |
|
"loss": 0.1306, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"grad_norm": 0.4832622577980266, |
|
"learning_rate": 0.0001439109206630501, |
|
"loss": 0.1133, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"grad_norm": 0.4840123051282916, |
|
"learning_rate": 0.0001436894140448183, |
|
"loss": 0.1354, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"grad_norm": 0.6106816446433545, |
|
"learning_rate": 0.00014346764217659653, |
|
"loss": 0.1759, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"grad_norm": 0.4439633620623163, |
|
"learning_rate": 0.0001432456064048204, |
|
"loss": 0.105, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"grad_norm": 0.6103065192992649, |
|
"learning_rate": 0.00014302330807752786, |
|
"loss": 0.1736, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"grad_norm": 0.4594414148489128, |
|
"learning_rate": 0.0001428007485443509, |
|
"loss": 0.1067, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"grad_norm": 0.48309766302252394, |
|
"learning_rate": 0.00014257792915650728, |
|
"loss": 0.1319, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"grad_norm": 0.4979398119598126, |
|
"learning_rate": 0.00014235485126679243, |
|
"loss": 0.1361, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"eval_blimp_filtered_avg": 0.7231343283582089, |
|
"eval_blimp_filtered_std": 0.004963011033511667, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"eval_blimp_supplement_avg": 0.8275862068965517, |
|
"eval_blimp_supplement_std": 0.0167403676680407, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"eval_vqa_filtered_avg": 0.47, |
|
"eval_vqa_filtered_std": 0.0501613558046592, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"eval_winoground_filtered_avg": 0.64, |
|
"eval_winoground_filtered_std": 0.048241815132442176, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"grad_norm": 0.4877241025128464, |
|
"learning_rate": 0.00014213151622957128, |
|
"loss": 0.1502, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"grad_norm": 0.5812751924824102, |
|
"learning_rate": 0.00014190792540076986, |
|
"loss": 0.1687, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"grad_norm": 0.4627935359645996, |
|
"learning_rate": 0.00014168408013786728, |
|
"loss": 0.1246, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"grad_norm": 0.4587837001229827, |
|
"learning_rate": 0.00014145998179988735, |
|
"loss": 0.1072, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"grad_norm": 0.5196182852163257, |
|
"learning_rate": 0.00014123563174739037, |
|
"loss": 0.1269, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"grad_norm": 0.42954431425652795, |
|
"learning_rate": 0.0001410110313424648, |
|
"loss": 0.1034, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"grad_norm": 0.4551597635457604, |
|
"learning_rate": 0.00014078618194871914, |
|
"loss": 0.1084, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"grad_norm": 0.5751209424815239, |
|
"learning_rate": 0.0001405610849312736, |
|
"loss": 0.136, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"grad_norm": 0.5118033581542103, |
|
"learning_rate": 0.00014033574165675164, |
|
"loss": 0.1241, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"grad_norm": 0.43644759709330094, |
|
"learning_rate": 0.00014011015349327187, |
|
"loss": 0.117, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"grad_norm": 0.4958861591526709, |
|
"learning_rate": 0.00013988432181043982, |
|
"loss": 0.1163, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"grad_norm": 0.44950223725406846, |
|
"learning_rate": 0.00013965824797933926, |
|
"loss": 0.1013, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"grad_norm": 0.5174784327558367, |
|
"learning_rate": 0.0001394319333725243, |
|
"loss": 0.1228, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"grad_norm": 0.451176763896539, |
|
"learning_rate": 0.00013920537936401077, |
|
"loss": 0.1137, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"grad_norm": 0.35979415227278505, |
|
"learning_rate": 0.00013897858732926793, |
|
"loss": 0.0893, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"grad_norm": 0.4087154672782133, |
|
"learning_rate": 0.0001387515586452103, |
|
"loss": 0.0979, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"grad_norm": 0.4238917271153031, |
|
"learning_rate": 0.000138524294690189, |
|
"loss": 0.0902, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"grad_norm": 0.37289193742062965, |
|
"learning_rate": 0.00013829679684398375, |
|
"loss": 0.0905, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"grad_norm": 0.4561111955028976, |
|
"learning_rate": 0.000138069066487794, |
|
"loss": 0.1154, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"grad_norm": 0.4795856880336275, |
|
"learning_rate": 0.00013784110500423104, |
|
"loss": 0.1053, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"grad_norm": 0.5758379418361174, |
|
"learning_rate": 0.00013761291377730936, |
|
"loss": 0.1646, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"grad_norm": 0.3967821645775129, |
|
"learning_rate": 0.00013738449419243827, |
|
"loss": 0.0797, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"grad_norm": 0.5352641990707583, |
|
"learning_rate": 0.00013715584763641345, |
|
"loss": 0.1205, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"grad_norm": 0.39673398503322155, |
|
"learning_rate": 0.0001369269754974087, |
|
"loss": 0.0672, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.5673600812406742, |
|
"learning_rate": 0.00013669787916496722, |
|
"loss": 0.1269, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.5134798249291933, |
|
"learning_rate": 0.00013646856002999354, |
|
"loss": 0.1275, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.26137163241501854, |
|
"learning_rate": 0.00013623901948474473, |
|
"loss": 0.0391, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"grad_norm": 0.3318640399705981, |
|
"learning_rate": 0.00013600925892282218, |
|
"loss": 0.0414, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"grad_norm": 0.39323951881930796, |
|
"learning_rate": 0.00013577927973916306, |
|
"loss": 0.054, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"grad_norm": 0.31507008260792096, |
|
"learning_rate": 0.0001355490833300318, |
|
"loss": 0.042, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"grad_norm": 0.351605629418437, |
|
"learning_rate": 0.00013531867109301175, |
|
"loss": 0.0419, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"grad_norm": 0.300514728252508, |
|
"learning_rate": 0.00013508804442699648, |
|
"loss": 0.0432, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"grad_norm": 0.429759783149188, |
|
"learning_rate": 0.00013485720473218154, |
|
"loss": 0.0434, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"grad_norm": 0.3371850480060168, |
|
"learning_rate": 0.00013462615341005573, |
|
"loss": 0.0398, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"grad_norm": 0.41727641506204793, |
|
"learning_rate": 0.00013439489186339282, |
|
"loss": 0.0532, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"grad_norm": 0.4526462472237055, |
|
"learning_rate": 0.0001341634214962428, |
|
"loss": 0.0504, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"grad_norm": 0.394759014800421, |
|
"learning_rate": 0.00013393174371392348, |
|
"loss": 0.0362, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"grad_norm": 0.43530217979086694, |
|
"learning_rate": 0.00013369985992301198, |
|
"loss": 0.0581, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"grad_norm": 0.3489279774411472, |
|
"learning_rate": 0.00013346777153133615, |
|
"loss": 0.0428, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"grad_norm": 0.3524007670573337, |
|
"learning_rate": 0.00013323547994796597, |
|
"loss": 0.0392, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"grad_norm": 0.4161180575138653, |
|
"learning_rate": 0.00013300298658320517, |
|
"loss": 0.0435, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"grad_norm": 0.36047484402484764, |
|
"learning_rate": 0.00013277029284858237, |
|
"loss": 0.0453, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"grad_norm": 0.3660355277365804, |
|
"learning_rate": 0.00013253740015684284, |
|
"loss": 0.0442, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"grad_norm": 0.3239828127697563, |
|
"learning_rate": 0.00013230430992193973, |
|
"loss": 0.0405, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"grad_norm": 0.4158448690843572, |
|
"learning_rate": 0.00013207102355902552, |
|
"loss": 0.0524, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"grad_norm": 0.27764986772544564, |
|
"learning_rate": 0.00013183754248444343, |
|
"loss": 0.0332, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"grad_norm": 0.32771023150803125, |
|
"learning_rate": 0.00013160386811571876, |
|
"loss": 0.0459, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"grad_norm": 0.29940107859442794, |
|
"learning_rate": 0.0001313700018715505, |
|
"loss": 0.0377, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"grad_norm": 0.3819582084310897, |
|
"learning_rate": 0.00013113594517180242, |
|
"loss": 0.058, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"grad_norm": 0.32714441974400016, |
|
"learning_rate": 0.00013090169943749476, |
|
"loss": 0.0437, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"grad_norm": 0.36220473718120416, |
|
"learning_rate": 0.00013066726609079526, |
|
"loss": 0.0425, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"grad_norm": 0.3426348230655199, |
|
"learning_rate": 0.00013043264655501074, |
|
"loss": 0.0396, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"grad_norm": 0.32235085289402154, |
|
"learning_rate": 0.00013019784225457855, |
|
"loss": 0.0382, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"grad_norm": 0.2964086383376768, |
|
"learning_rate": 0.0001299628546150577, |
|
"loss": 0.0382, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"grad_norm": 0.27015454300882835, |
|
"learning_rate": 0.00012972768506312027, |
|
"loss": 0.0283, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"grad_norm": 0.33035662649730047, |
|
"learning_rate": 0.00012949233502654284, |
|
"loss": 0.0384, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"grad_norm": 0.3187428949344172, |
|
"learning_rate": 0.00012925680593419778, |
|
"loss": 0.032, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"grad_norm": 0.3399662779151198, |
|
"learning_rate": 0.00012902109921604448, |
|
"loss": 0.0405, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"grad_norm": 0.35365859482095496, |
|
"learning_rate": 0.00012878521630312078, |
|
"loss": 0.0397, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"grad_norm": 0.30675985331556893, |
|
"learning_rate": 0.00012854915862753422, |
|
"loss": 0.035, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"grad_norm": 0.414074353684469, |
|
"learning_rate": 0.0001283129276224534, |
|
"loss": 0.052, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"grad_norm": 0.38344430660186296, |
|
"learning_rate": 0.0001280765247220993, |
|
"loss": 0.0359, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"grad_norm": 0.3137291177532792, |
|
"learning_rate": 0.0001278399513617364, |
|
"loss": 0.031, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"grad_norm": 0.39542817136382147, |
|
"learning_rate": 0.0001276032089776642, |
|
"loss": 0.0438, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"grad_norm": 0.3778319793304532, |
|
"learning_rate": 0.0001273662990072083, |
|
"loss": 0.0485, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"grad_norm": 0.35242405330198695, |
|
"learning_rate": 0.0001271292228887118, |
|
"loss": 0.0395, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"grad_norm": 0.24773421468492, |
|
"learning_rate": 0.00012689198206152657, |
|
"loss": 0.0205, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"grad_norm": 0.37351452341587044, |
|
"learning_rate": 0.00012665457796600443, |
|
"loss": 0.0403, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"grad_norm": 0.29584266385998476, |
|
"learning_rate": 0.0001264170120434884, |
|
"loss": 0.0332, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"grad_norm": 0.28547841885732445, |
|
"learning_rate": 0.00012617928573630406, |
|
"loss": 0.0288, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"grad_norm": 0.37964584553416536, |
|
"learning_rate": 0.0001259414004877507, |
|
"loss": 0.0348, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"grad_norm": 0.36767195929723195, |
|
"learning_rate": 0.0001257033577420926, |
|
"loss": 0.0404, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"grad_norm": 0.3509417575248461, |
|
"learning_rate": 0.00012546515894455026, |
|
"loss": 0.0373, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"grad_norm": 0.2848574183498485, |
|
"learning_rate": 0.00012522680554129156, |
|
"loss": 0.0338, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"grad_norm": 0.3019415495988161, |
|
"learning_rate": 0.0001249882989794231, |
|
"loss": 0.0342, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"grad_norm": 0.2990668841044976, |
|
"learning_rate": 0.00012474964070698127, |
|
"loss": 0.0335, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"grad_norm": 0.4328132625325146, |
|
"learning_rate": 0.00012451083217292357, |
|
"loss": 0.0394, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"grad_norm": 0.4292821742979654, |
|
"learning_rate": 0.00012427187482711986, |
|
"loss": 0.0516, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"grad_norm": 0.2686010674908365, |
|
"learning_rate": 0.0001240327701203433, |
|
"loss": 0.0344, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"grad_norm": 0.28452697548238837, |
|
"learning_rate": 0.00012379351950426187, |
|
"loss": 0.0307, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"grad_norm": 0.3597827634214583, |
|
"learning_rate": 0.00012355412443142936, |
|
"loss": 0.0429, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"grad_norm": 0.31337666682629667, |
|
"learning_rate": 0.00012331458635527658, |
|
"loss": 0.038, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"grad_norm": 0.3751035825027268, |
|
"learning_rate": 0.0001230749067301025, |
|
"loss": 0.0385, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"grad_norm": 0.3982139708950298, |
|
"learning_rate": 0.00012283508701106557, |
|
"loss": 0.0461, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"grad_norm": 0.3672383693603079, |
|
"learning_rate": 0.00012259512865417477, |
|
"loss": 0.045, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"grad_norm": 0.3836978621116552, |
|
"learning_rate": 0.00012235503311628073, |
|
"loss": 0.0434, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"grad_norm": 0.3661127438866315, |
|
"learning_rate": 0.00012211480185506698, |
|
"loss": 0.0374, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"grad_norm": 0.39751744692779245, |
|
"learning_rate": 0.00012187443632904105, |
|
"loss": 0.0334, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"grad_norm": 0.2943198017134509, |
|
"learning_rate": 0.00012163393799752565, |
|
"loss": 0.0354, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"grad_norm": 0.3002324681197963, |
|
"learning_rate": 0.00012139330832064974, |
|
"loss": 0.0263, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"grad_norm": 0.29049707177485556, |
|
"learning_rate": 0.00012115254875933979, |
|
"loss": 0.0374, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"grad_norm": 0.36487904735548193, |
|
"learning_rate": 0.00012091166077531075, |
|
"loss": 0.0366, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"grad_norm": 0.6001906832098756, |
|
"learning_rate": 0.00012067064583105729, |
|
"loss": 0.0397, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"grad_norm": 0.29594265959931015, |
|
"learning_rate": 0.00012042950538984492, |
|
"loss": 0.0339, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"grad_norm": 0.3136944220749086, |
|
"learning_rate": 0.00012018824091570103, |
|
"loss": 0.0383, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"grad_norm": 0.34881594693482015, |
|
"learning_rate": 0.00011994685387340607, |
|
"loss": 0.0328, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"grad_norm": 0.343330706863882, |
|
"learning_rate": 0.00011970534572848464, |
|
"loss": 0.0402, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"grad_norm": 0.34079159501332024, |
|
"learning_rate": 0.00011946371794719656, |
|
"loss": 0.0351, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"grad_norm": 0.2827419424824256, |
|
"learning_rate": 0.000119221971996528, |
|
"loss": 0.0257, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"grad_norm": 0.3123401643495521, |
|
"learning_rate": 0.0001189801093441826, |
|
"loss": 0.029, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"eval_blimp_filtered_avg": 0.7173134328358209, |
|
"eval_blimp_filtered_std": 0.005026688908914533, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"eval_blimp_supplement_avg": 0.8254310344827587, |
|
"eval_blimp_supplement_std": 0.017008878963692253, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"eval_vqa_filtered_avg": 0.5, |
|
"eval_vqa_filtered_std": 0.050251890762960605, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"eval_winoground_filtered_avg": 0.65, |
|
"eval_winoground_filtered_std": 0.04793724854411019, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"grad_norm": 0.3284360674259941, |
|
"learning_rate": 0.00011873813145857249, |
|
"loss": 0.0307, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"grad_norm": 0.4088320006622324, |
|
"learning_rate": 0.0001184960398088094, |
|
"loss": 0.0424, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"grad_norm": 0.3347370845203778, |
|
"learning_rate": 0.00011825383586469583, |
|
"loss": 0.0301, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"grad_norm": 0.34998604945319767, |
|
"learning_rate": 0.00011801152109671595, |
|
"loss": 0.0398, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"grad_norm": 0.28072401871679603, |
|
"learning_rate": 0.00011776909697602689, |
|
"loss": 0.0207, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"grad_norm": 0.27417539486849196, |
|
"learning_rate": 0.00011752656497444952, |
|
"loss": 0.0303, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"grad_norm": 0.3528956989148332, |
|
"learning_rate": 0.00011728392656445981, |
|
"loss": 0.035, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"grad_norm": 0.2732570327379083, |
|
"learning_rate": 0.00011704118321917976, |
|
"loss": 0.0293, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"grad_norm": 0.23173712745705208, |
|
"learning_rate": 0.00011679833641236844, |
|
"loss": 0.0194, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"grad_norm": 0.3833499067600359, |
|
"learning_rate": 0.000116555387618413, |
|
"loss": 0.0301, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"grad_norm": 0.3445447080613637, |
|
"learning_rate": 0.00011631233831231991, |
|
"loss": 0.0394, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"grad_norm": 0.3521386004172123, |
|
"learning_rate": 0.00011606918996970573, |
|
"loss": 0.0359, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"grad_norm": 0.32961191495589676, |
|
"learning_rate": 0.00011582594406678839, |
|
"loss": 0.0353, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"grad_norm": 0.32438381236368063, |
|
"learning_rate": 0.00011558260208037817, |
|
"loss": 0.0326, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"grad_norm": 0.31086642828231076, |
|
"learning_rate": 0.00011533916548786857, |
|
"loss": 0.0319, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"grad_norm": 0.3725691312752257, |
|
"learning_rate": 0.00011509563576722753, |
|
"loss": 0.0419, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"grad_norm": 0.2712234441709734, |
|
"learning_rate": 0.00011485201439698847, |
|
"loss": 0.0286, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"grad_norm": 0.2992546512802102, |
|
"learning_rate": 0.00011460830285624118, |
|
"loss": 0.0298, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"grad_norm": 0.3740397615866031, |
|
"learning_rate": 0.00011436450262462284, |
|
"loss": 0.0434, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"grad_norm": 0.34323442855123726, |
|
"learning_rate": 0.00011412061518230914, |
|
"loss": 0.038, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"grad_norm": 0.2859959872205353, |
|
"learning_rate": 0.00011387664201000532, |
|
"loss": 0.0337, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"grad_norm": 0.35126159863306816, |
|
"learning_rate": 0.00011363258458893699, |
|
"loss": 0.0418, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"grad_norm": 0.36586330790757604, |
|
"learning_rate": 0.00011338844440084138, |
|
"loss": 0.0472, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"grad_norm": 0.3515588484900087, |
|
"learning_rate": 0.0001131442229279581, |
|
"loss": 0.0343, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"grad_norm": 0.26331852309035336, |
|
"learning_rate": 0.00011289992165302035, |
|
"loss": 0.0269, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"grad_norm": 0.304600621064075, |
|
"learning_rate": 0.00011265554205924575, |
|
"loss": 0.0312, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"grad_norm": 0.29191144254029494, |
|
"learning_rate": 0.0001124110856303275, |
|
"loss": 0.0351, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"grad_norm": 0.39129462924046776, |
|
"learning_rate": 0.00011216655385042525, |
|
"loss": 0.035, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"grad_norm": 0.38533876670236805, |
|
"learning_rate": 0.00011192194820415609, |
|
"loss": 0.0398, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"grad_norm": 0.2395517368524462, |
|
"learning_rate": 0.00011167727017658562, |
|
"loss": 0.0305, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"grad_norm": 0.2882756076173429, |
|
"learning_rate": 0.00011143252125321892, |
|
"loss": 0.028, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"grad_norm": 0.3143279186770457, |
|
"learning_rate": 0.00011118770291999137, |
|
"loss": 0.0349, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"grad_norm": 0.2851418078470389, |
|
"learning_rate": 0.00011094281666325988, |
|
"loss": 0.0274, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"grad_norm": 0.36040663639869686, |
|
"learning_rate": 0.00011069786396979367, |
|
"loss": 0.0357, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"grad_norm": 0.33153007014041297, |
|
"learning_rate": 0.00011045284632676536, |
|
"loss": 0.0339, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"grad_norm": 0.28195191033726724, |
|
"learning_rate": 0.00011020776522174186, |
|
"loss": 0.0265, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"grad_norm": 0.29390455843884594, |
|
"learning_rate": 0.0001099626221426754, |
|
"loss": 0.031, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"grad_norm": 0.38900881293341505, |
|
"learning_rate": 0.0001097174185778945, |
|
"loss": 0.0277, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"grad_norm": 0.3393893043389277, |
|
"learning_rate": 0.00010947215601609479, |
|
"loss": 0.0373, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"grad_norm": 0.2908885670583566, |
|
"learning_rate": 0.00010922683594633021, |
|
"loss": 0.0275, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"grad_norm": 0.32162535341655224, |
|
"learning_rate": 0.00010898145985800381, |
|
"loss": 0.0296, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"grad_norm": 0.3101819804262916, |
|
"learning_rate": 0.00010873602924085869, |
|
"loss": 0.0279, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"grad_norm": 0.3225846056983095, |
|
"learning_rate": 0.00010849054558496905, |
|
"loss": 0.0359, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"grad_norm": 0.3320709262623266, |
|
"learning_rate": 0.00010824501038073116, |
|
"loss": 0.0272, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"grad_norm": 0.34267032779621814, |
|
"learning_rate": 0.00010799942511885418, |
|
"loss": 0.0349, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"grad_norm": 0.3252434307442351, |
|
"learning_rate": 0.00010775379129035116, |
|
"loss": 0.0353, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"grad_norm": 0.3424342161187399, |
|
"learning_rate": 0.00010750811038653008, |
|
"loss": 0.0395, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"grad_norm": 0.3350229460047768, |
|
"learning_rate": 0.00010726238389898471, |
|
"loss": 0.0311, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"grad_norm": 0.3333212059527241, |
|
"learning_rate": 0.00010701661331958553, |
|
"loss": 0.0305, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"grad_norm": 0.342236697340712, |
|
"learning_rate": 0.00010677080014047076, |
|
"loss": 0.0387, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"grad_norm": 0.31813476077439445, |
|
"learning_rate": 0.00010652494585403725, |
|
"loss": 0.0332, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"grad_norm": 0.34237738807094575, |
|
"learning_rate": 0.00010627905195293135, |
|
"loss": 0.0305, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"grad_norm": 0.40607599891255564, |
|
"learning_rate": 0.00010603311993004004, |
|
"loss": 0.0394, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"grad_norm": 0.29414538244181715, |
|
"learning_rate": 0.00010578715127848167, |
|
"loss": 0.03, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"grad_norm": 0.37329000324002554, |
|
"learning_rate": 0.000105541147491597, |
|
"loss": 0.0421, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"grad_norm": 0.3522250044405319, |
|
"learning_rate": 0.00010529511006294009, |
|
"loss": 0.045, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 0.28121115420821313, |
|
"learning_rate": 0.00010504904048626925, |
|
"loss": 0.0292, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 0.26325089300460475, |
|
"learning_rate": 0.00010480294025553798, |
|
"loss": 0.0292, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"grad_norm": 0.2719725064408085, |
|
"learning_rate": 0.00010455681086488586, |
|
"loss": 0.0231, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"grad_norm": 0.3453111024050063, |
|
"learning_rate": 0.00010431065380862959, |
|
"loss": 0.0378, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"grad_norm": 0.27775866655685405, |
|
"learning_rate": 0.00010406447058125368, |
|
"loss": 0.0286, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"grad_norm": 0.33965483172047317, |
|
"learning_rate": 0.00010381826267740171, |
|
"loss": 0.0333, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"grad_norm": 0.27735302965276887, |
|
"learning_rate": 0.00010357203159186694, |
|
"loss": 0.0271, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"grad_norm": 0.3359647684514768, |
|
"learning_rate": 0.0001033257788195835, |
|
"loss": 0.0399, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"grad_norm": 0.31390916758505816, |
|
"learning_rate": 0.00010307950585561706, |
|
"loss": 0.0259, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"grad_norm": 0.26289076188241683, |
|
"learning_rate": 0.0001028332141951559, |
|
"loss": 0.0303, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"grad_norm": 0.4074393150808, |
|
"learning_rate": 0.0001025869053335019, |
|
"loss": 0.0339, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"grad_norm": 0.254687354277676, |
|
"learning_rate": 0.0001023405807660613, |
|
"loss": 0.0263, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"grad_norm": 0.31504128834532047, |
|
"learning_rate": 0.0001020942419883357, |
|
"loss": 0.0308, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"grad_norm": 0.3328845285750464, |
|
"learning_rate": 0.00010184789049591299, |
|
"loss": 0.0355, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"grad_norm": 0.31546120267640665, |
|
"learning_rate": 0.00010160152778445829, |
|
"loss": 0.0352, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"grad_norm": 0.2983572052164693, |
|
"learning_rate": 0.0001013551553497047, |
|
"loss": 0.0293, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"grad_norm": 0.31011068039080025, |
|
"learning_rate": 0.0001011087746874445, |
|
"loss": 0.0354, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"grad_norm": 0.2987245112804117, |
|
"learning_rate": 0.00010086238729351988, |
|
"loss": 0.0354, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"grad_norm": 0.3234038283715016, |
|
"learning_rate": 0.00010061599466381389, |
|
"loss": 0.0287, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"grad_norm": 0.27880614367112977, |
|
"learning_rate": 0.00010036959829424131, |
|
"loss": 0.0249, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"grad_norm": 0.3435335426279227, |
|
"learning_rate": 0.0001001231996807397, |
|
"loss": 0.0363, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"grad_norm": 0.3166036055942454, |
|
"learning_rate": 9.987680031926032e-05, |
|
"loss": 0.0302, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"grad_norm": 0.2894910254443763, |
|
"learning_rate": 9.96304017057587e-05, |
|
"loss": 0.0285, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"grad_norm": 0.34201749143530447, |
|
"learning_rate": 9.938400533618615e-05, |
|
"loss": 0.0443, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"grad_norm": 0.3740842570087233, |
|
"learning_rate": 9.913761270648015e-05, |
|
"loss": 0.0303, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"grad_norm": 0.3215036425488617, |
|
"learning_rate": 9.889122531255552e-05, |
|
"loss": 0.0383, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"grad_norm": 0.29139970169902235, |
|
"learning_rate": 9.864484465029536e-05, |
|
"loss": 0.0266, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"grad_norm": 0.26997285345644567, |
|
"learning_rate": 9.839847221554175e-05, |
|
"loss": 0.0271, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"grad_norm": 0.37692961834447125, |
|
"learning_rate": 9.815210950408704e-05, |
|
"loss": 0.0409, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"grad_norm": 0.34286292595950857, |
|
"learning_rate": 9.790575801166432e-05, |
|
"loss": 0.0373, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"grad_norm": 0.3255553751579432, |
|
"learning_rate": 9.765941923393874e-05, |
|
"loss": 0.0272, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"grad_norm": 0.3158220004892916, |
|
"learning_rate": 9.741309466649813e-05, |
|
"loss": 0.0269, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"grad_norm": 0.2736162407266226, |
|
"learning_rate": 9.716678580484411e-05, |
|
"loss": 0.0247, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"grad_norm": 0.337905173595386, |
|
"learning_rate": 9.692049414438299e-05, |
|
"loss": 0.0378, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"grad_norm": 0.3258813431481511, |
|
"learning_rate": 9.667422118041651e-05, |
|
"loss": 0.0276, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"grad_norm": 0.31627150266086607, |
|
"learning_rate": 9.642796840813308e-05, |
|
"loss": 0.0246, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"grad_norm": 0.32285673653685554, |
|
"learning_rate": 9.61817373225983e-05, |
|
"loss": 0.0256, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"grad_norm": 0.3021398612416659, |
|
"learning_rate": 9.593552941874635e-05, |
|
"loss": 0.0256, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"grad_norm": 0.2771885920682674, |
|
"learning_rate": 9.568934619137046e-05, |
|
"loss": 0.0236, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"grad_norm": 0.2913089871269692, |
|
"learning_rate": 9.544318913511416e-05, |
|
"loss": 0.0276, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"grad_norm": 0.29943649863489186, |
|
"learning_rate": 9.519705974446207e-05, |
|
"loss": 0.0281, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"grad_norm": 0.28658510456203773, |
|
"learning_rate": 9.495095951373076e-05, |
|
"loss": 0.0222, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"grad_norm": 0.32216521722606845, |
|
"learning_rate": 9.470488993705992e-05, |
|
"loss": 0.0275, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"grad_norm": 0.3534602351211303, |
|
"learning_rate": 9.4458852508403e-05, |
|
"loss": 0.0349, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"eval_blimp_filtered_avg": 0.7164179104477612, |
|
"eval_blimp_filtered_std": 0.005041707326458033, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"eval_blimp_supplement_avg": 0.8297413793103449, |
|
"eval_blimp_supplement_std": 0.016828487437818656, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"eval_vqa_filtered_avg": 0.5, |
|
"eval_vqa_filtered_std": 0.050251890762960605, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"eval_winoground_filtered_avg": 0.66, |
|
"eval_winoground_filtered_std": 0.04760952285695238, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"grad_norm": 0.38483480730152236, |
|
"learning_rate": 9.421284872151836e-05, |
|
"loss": 0.0447, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"grad_norm": 0.2691349907593638, |
|
"learning_rate": 9.396688006996e-05, |
|
"loss": 0.0275, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"grad_norm": 0.30394270832959963, |
|
"learning_rate": 9.372094804706867e-05, |
|
"loss": 0.0245, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"grad_norm": 0.3894950215679374, |
|
"learning_rate": 9.34750541459628e-05, |
|
"loss": 0.0328, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"grad_norm": 0.3066861317584869, |
|
"learning_rate": 9.322919985952926e-05, |
|
"loss": 0.0257, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"grad_norm": 0.2839567453054656, |
|
"learning_rate": 9.298338668041451e-05, |
|
"loss": 0.0301, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"grad_norm": 0.3037116435680821, |
|
"learning_rate": 9.27376161010153e-05, |
|
"loss": 0.0221, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"grad_norm": 0.3134542478986076, |
|
"learning_rate": 9.249188961346993e-05, |
|
"loss": 0.0338, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"grad_norm": 0.3541455631807929, |
|
"learning_rate": 9.224620870964886e-05, |
|
"loss": 0.0355, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"grad_norm": 0.3115683125480853, |
|
"learning_rate": 9.200057488114585e-05, |
|
"loss": 0.0252, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"grad_norm": 0.27292900060486747, |
|
"learning_rate": 9.175498961926886e-05, |
|
"loss": 0.021, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"grad_norm": 0.28750762243690786, |
|
"learning_rate": 9.150945441503093e-05, |
|
"loss": 0.0262, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"grad_norm": 0.3131812214087709, |
|
"learning_rate": 9.126397075914135e-05, |
|
"loss": 0.032, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"grad_norm": 0.2503186984511136, |
|
"learning_rate": 9.101854014199622e-05, |
|
"loss": 0.0222, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"grad_norm": 0.26585317166475225, |
|
"learning_rate": 9.077316405366981e-05, |
|
"loss": 0.0237, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"grad_norm": 0.30126199934192793, |
|
"learning_rate": 9.052784398390525e-05, |
|
"loss": 0.0294, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"grad_norm": 0.2903130573653526, |
|
"learning_rate": 9.028258142210552e-05, |
|
"loss": 0.0294, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"grad_norm": 0.27174816407170116, |
|
"learning_rate": 9.00373778573246e-05, |
|
"loss": 0.0269, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"grad_norm": 0.3629515708975863, |
|
"learning_rate": 8.979223477825814e-05, |
|
"loss": 0.0281, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"grad_norm": 0.3714998200084635, |
|
"learning_rate": 8.954715367323468e-05, |
|
"loss": 0.0331, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"grad_norm": 0.3387130445188704, |
|
"learning_rate": 8.930213603020638e-05, |
|
"loss": 0.0349, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"grad_norm": 0.33356330455180916, |
|
"learning_rate": 8.905718333674013e-05, |
|
"loss": 0.0323, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"grad_norm": 0.3443287065763936, |
|
"learning_rate": 8.881229708000865e-05, |
|
"loss": 0.0314, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"grad_norm": 0.36589381487249345, |
|
"learning_rate": 8.85674787467811e-05, |
|
"loss": 0.0331, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"grad_norm": 0.33686969776224657, |
|
"learning_rate": 8.832272982341439e-05, |
|
"loss": 0.0347, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"grad_norm": 0.2821892144697015, |
|
"learning_rate": 8.80780517958439e-05, |
|
"loss": 0.029, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"grad_norm": 0.3370413152254308, |
|
"learning_rate": 8.783344614957477e-05, |
|
"loss": 0.0309, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"grad_norm": 0.27254650378131945, |
|
"learning_rate": 8.758891436967252e-05, |
|
"loss": 0.0206, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"grad_norm": 0.344842568070324, |
|
"learning_rate": 8.734445794075428e-05, |
|
"loss": 0.0331, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"grad_norm": 0.3263812096586238, |
|
"learning_rate": 8.710007834697969e-05, |
|
"loss": 0.0323, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"grad_norm": 0.24908057089857247, |
|
"learning_rate": 8.68557770720419e-05, |
|
"loss": 0.024, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"grad_norm": 0.2829456218767608, |
|
"learning_rate": 8.661155559915863e-05, |
|
"loss": 0.0258, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"grad_norm": 0.2992177526407328, |
|
"learning_rate": 8.636741541106299e-05, |
|
"loss": 0.0258, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"grad_norm": 0.3498647491010859, |
|
"learning_rate": 8.61233579899947e-05, |
|
"loss": 0.0309, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"grad_norm": 0.31399097802426235, |
|
"learning_rate": 8.587938481769089e-05, |
|
"loss": 0.0391, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"grad_norm": 0.28430578853728555, |
|
"learning_rate": 8.563549737537719e-05, |
|
"loss": 0.026, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"grad_norm": 0.2523093762154707, |
|
"learning_rate": 8.539169714375885e-05, |
|
"loss": 0.0212, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"grad_norm": 0.29858190065300216, |
|
"learning_rate": 8.514798560301152e-05, |
|
"loss": 0.0339, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"grad_norm": 0.2536866212590354, |
|
"learning_rate": 8.490436423277248e-05, |
|
"loss": 0.0254, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"grad_norm": 0.2924064432366086, |
|
"learning_rate": 8.466083451213144e-05, |
|
"loss": 0.0293, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"grad_norm": 0.2981743418161694, |
|
"learning_rate": 8.441739791962187e-05, |
|
"loss": 0.0258, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"grad_norm": 0.2812452863028169, |
|
"learning_rate": 8.417405593321163e-05, |
|
"loss": 0.0258, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"grad_norm": 0.28479487575175905, |
|
"learning_rate": 8.393081003029431e-05, |
|
"loss": 0.0256, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"grad_norm": 0.34570399590977596, |
|
"learning_rate": 8.368766168768014e-05, |
|
"loss": 0.0325, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"grad_norm": 0.3337375710597649, |
|
"learning_rate": 8.344461238158699e-05, |
|
"loss": 0.0327, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"grad_norm": 0.2986872900474696, |
|
"learning_rate": 8.320166358763159e-05, |
|
"loss": 0.0217, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"grad_norm": 0.28712435596387936, |
|
"learning_rate": 8.295881678082024e-05, |
|
"loss": 0.0256, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"grad_norm": 0.3272426704832391, |
|
"learning_rate": 8.271607343554021e-05, |
|
"loss": 0.0246, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"grad_norm": 0.3092816463373921, |
|
"learning_rate": 8.247343502555053e-05, |
|
"loss": 0.0304, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"grad_norm": 0.3000320928728694, |
|
"learning_rate": 8.223090302397313e-05, |
|
"loss": 0.0246, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"grad_norm": 0.2990018271701567, |
|
"learning_rate": 8.198847890328406e-05, |
|
"loss": 0.0301, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"grad_norm": 0.3674125202368627, |
|
"learning_rate": 8.174616413530418e-05, |
|
"loss": 0.0294, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"grad_norm": 0.32604533269795055, |
|
"learning_rate": 8.150396019119062e-05, |
|
"loss": 0.0345, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"grad_norm": 0.283530884706442, |
|
"learning_rate": 8.126186854142752e-05, |
|
"loss": 0.0193, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"grad_norm": 0.25164065278234365, |
|
"learning_rate": 8.101989065581743e-05, |
|
"loss": 0.0184, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"grad_norm": 0.25607430685124377, |
|
"learning_rate": 8.077802800347205e-05, |
|
"loss": 0.0199, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"grad_norm": 0.2934139007796417, |
|
"learning_rate": 8.053628205280347e-05, |
|
"loss": 0.0234, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"grad_norm": 0.30060563019331127, |
|
"learning_rate": 8.029465427151538e-05, |
|
"loss": 0.0243, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"grad_norm": 0.31584329920715304, |
|
"learning_rate": 8.005314612659393e-05, |
|
"loss": 0.0233, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"grad_norm": 0.3399830490651399, |
|
"learning_rate": 7.9811759084299e-05, |
|
"loss": 0.0303, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"grad_norm": 0.23710075133737013, |
|
"learning_rate": 7.957049461015512e-05, |
|
"loss": 0.0201, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"grad_norm": 0.291414714994268, |
|
"learning_rate": 7.932935416894272e-05, |
|
"loss": 0.0261, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"grad_norm": 0.25516874264704126, |
|
"learning_rate": 7.908833922468927e-05, |
|
"loss": 0.0209, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"grad_norm": 0.3225236014785536, |
|
"learning_rate": 7.884745124066023e-05, |
|
"loss": 0.0293, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"grad_norm": 0.33073323071844557, |
|
"learning_rate": 7.860669167935028e-05, |
|
"loss": 0.0312, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"grad_norm": 0.2747460833033659, |
|
"learning_rate": 7.836606200247436e-05, |
|
"loss": 0.0249, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"grad_norm": 0.22799207899871587, |
|
"learning_rate": 7.812556367095896e-05, |
|
"loss": 0.0236, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"grad_norm": 0.2957004620104486, |
|
"learning_rate": 7.788519814493304e-05, |
|
"loss": 0.0264, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"grad_norm": 0.28521269666590277, |
|
"learning_rate": 7.764496688371929e-05, |
|
"loss": 0.0279, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"grad_norm": 0.24287300349001784, |
|
"learning_rate": 7.740487134582525e-05, |
|
"loss": 0.0182, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"grad_norm": 0.3175933387720721, |
|
"learning_rate": 7.716491298893442e-05, |
|
"loss": 0.0339, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"grad_norm": 0.32835585850076665, |
|
"learning_rate": 7.692509326989753e-05, |
|
"loss": 0.0264, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"grad_norm": 0.3273636208785487, |
|
"learning_rate": 7.668541364472346e-05, |
|
"loss": 0.0371, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"grad_norm": 0.32029214184531163, |
|
"learning_rate": 7.644587556857065e-05, |
|
"loss": 0.0266, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"grad_norm": 0.3573218111505275, |
|
"learning_rate": 7.620648049573815e-05, |
|
"loss": 0.0294, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"grad_norm": 0.2639798311874394, |
|
"learning_rate": 7.596722987965669e-05, |
|
"loss": 0.0245, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"grad_norm": 0.3198924686801555, |
|
"learning_rate": 7.572812517288018e-05, |
|
"loss": 0.0222, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"grad_norm": 0.3153205405405442, |
|
"learning_rate": 7.548916782707642e-05, |
|
"loss": 0.0322, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"grad_norm": 0.26231028932152634, |
|
"learning_rate": 7.525035929301877e-05, |
|
"loss": 0.0302, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"grad_norm": 0.2688781021142035, |
|
"learning_rate": 7.50117010205769e-05, |
|
"loss": 0.0251, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"grad_norm": 0.26830771551596, |
|
"learning_rate": 7.477319445870845e-05, |
|
"loss": 0.0226, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"grad_norm": 0.30176145649756575, |
|
"learning_rate": 7.453484105544976e-05, |
|
"loss": 0.0331, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"grad_norm": 0.3115716482147434, |
|
"learning_rate": 7.429664225790743e-05, |
|
"loss": 0.0233, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"grad_norm": 0.31100418685578723, |
|
"learning_rate": 7.405859951224933e-05, |
|
"loss": 0.0284, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"grad_norm": 0.2588637124537901, |
|
"learning_rate": 7.382071426369597e-05, |
|
"loss": 0.0233, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"grad_norm": 0.31963780634052935, |
|
"learning_rate": 7.358298795651165e-05, |
|
"loss": 0.0201, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"grad_norm": 0.3357482742804347, |
|
"learning_rate": 7.33454220339956e-05, |
|
"loss": 0.0234, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 0.31241871424393247, |
|
"learning_rate": 7.310801793847344e-05, |
|
"loss": 0.0352, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 0.34133470506967756, |
|
"learning_rate": 7.287077711128823e-05, |
|
"loss": 0.0237, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 0.14462364414170686, |
|
"learning_rate": 7.263370099279172e-05, |
|
"loss": 0.0101, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"grad_norm": 0.139685520904427, |
|
"learning_rate": 7.239679102233582e-05, |
|
"loss": 0.008, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"grad_norm": 0.15075598570023135, |
|
"learning_rate": 7.21600486382636e-05, |
|
"loss": 0.009, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"grad_norm": 0.09798572261973783, |
|
"learning_rate": 7.192347527790073e-05, |
|
"loss": 0.0047, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"grad_norm": 0.17274723978571413, |
|
"learning_rate": 7.168707237754658e-05, |
|
"loss": 0.0101, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"grad_norm": 0.14385963722120923, |
|
"learning_rate": 7.14508413724658e-05, |
|
"loss": 0.0087, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"grad_norm": 0.1417300685940669, |
|
"learning_rate": 7.121478369687926e-05, |
|
"loss": 0.005, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"grad_norm": 0.13023296035033566, |
|
"learning_rate": 7.097890078395553e-05, |
|
"loss": 0.0062, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"grad_norm": 0.15199661343358178, |
|
"learning_rate": 7.074319406580224e-05, |
|
"loss": 0.0077, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"grad_norm": 0.26597073732306437, |
|
"learning_rate": 7.050766497345714e-05, |
|
"loss": 0.0203, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"grad_norm": 0.16538027691964055, |
|
"learning_rate": 7.027231493687974e-05, |
|
"loss": 0.0111, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"eval_blimp_filtered_avg": 0.7185074626865672, |
|
"eval_blimp_filtered_std": 0.005025927855759427, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"eval_blimp_supplement_avg": 0.8297413793103449, |
|
"eval_blimp_supplement_std": 0.016768829882349248, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"eval_vqa_filtered_avg": 0.49, |
|
"eval_vqa_filtered_std": 0.05024183937956912, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"eval_winoground_filtered_avg": 0.67, |
|
"eval_winoground_filtered_std": 0.04725815626252606, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"grad_norm": 0.21979041950155864, |
|
"learning_rate": 7.003714538494233e-05, |
|
"loss": 0.0087, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"grad_norm": 0.18913233160499004, |
|
"learning_rate": 6.980215774542147e-05, |
|
"loss": 0.0096, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"grad_norm": 0.1282988164073535, |
|
"learning_rate": 6.95673534449893e-05, |
|
"loss": 0.0068, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"grad_norm": 0.21752972603923165, |
|
"learning_rate": 6.933273390920478e-05, |
|
"loss": 0.0104, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"grad_norm": 0.1967481497472081, |
|
"learning_rate": 6.909830056250527e-05, |
|
"loss": 0.01, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"grad_norm": 0.18179145828619866, |
|
"learning_rate": 6.886405482819756e-05, |
|
"loss": 0.0093, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"grad_norm": 0.17276847874184295, |
|
"learning_rate": 6.862999812844953e-05, |
|
"loss": 0.0092, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"grad_norm": 0.1342024775407428, |
|
"learning_rate": 6.839613188428126e-05, |
|
"loss": 0.0065, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"grad_norm": 0.14878102990268274, |
|
"learning_rate": 6.81624575155566e-05, |
|
"loss": 0.0063, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"grad_norm": 0.16403017563876973, |
|
"learning_rate": 6.792897644097451e-05, |
|
"loss": 0.0087, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"grad_norm": 0.19801520787974072, |
|
"learning_rate": 6.769569007806027e-05, |
|
"loss": 0.0093, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"grad_norm": 0.20572707361042517, |
|
"learning_rate": 6.746259984315717e-05, |
|
"loss": 0.011, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"grad_norm": 0.13697981744457283, |
|
"learning_rate": 6.722970715141763e-05, |
|
"loss": 0.0046, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"grad_norm": 0.19205653189851232, |
|
"learning_rate": 6.699701341679488e-05, |
|
"loss": 0.0092, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"grad_norm": 0.1918525797269716, |
|
"learning_rate": 6.676452005203406e-05, |
|
"loss": 0.0098, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"grad_norm": 0.2248320215810751, |
|
"learning_rate": 6.653222846866389e-05, |
|
"loss": 0.0109, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"grad_norm": 0.15114455523839437, |
|
"learning_rate": 6.630014007698807e-05, |
|
"loss": 0.0067, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"grad_norm": 0.13233475079909107, |
|
"learning_rate": 6.606825628607654e-05, |
|
"loss": 0.0071, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"grad_norm": 0.13146130811845824, |
|
"learning_rate": 6.583657850375723e-05, |
|
"loss": 0.0059, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"grad_norm": 0.1857326870966165, |
|
"learning_rate": 6.560510813660719e-05, |
|
"loss": 0.0036, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"grad_norm": 0.2298298643113706, |
|
"learning_rate": 6.537384658994428e-05, |
|
"loss": 0.0086, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"grad_norm": 0.14279703425417056, |
|
"learning_rate": 6.51427952678185e-05, |
|
"loss": 0.0085, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"grad_norm": 0.10832021907148229, |
|
"learning_rate": 6.491195557300353e-05, |
|
"loss": 0.0058, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"grad_norm": 0.11823319091468878, |
|
"learning_rate": 6.468132890698829e-05, |
|
"loss": 0.0062, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"grad_norm": 0.18897801838669076, |
|
"learning_rate": 6.44509166699682e-05, |
|
"loss": 0.0092, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"grad_norm": 0.25020039121328774, |
|
"learning_rate": 6.422072026083697e-05, |
|
"loss": 0.0137, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"grad_norm": 0.20775807254077025, |
|
"learning_rate": 6.399074107717782e-05, |
|
"loss": 0.0093, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"grad_norm": 0.20035420751641417, |
|
"learning_rate": 6.376098051525529e-05, |
|
"loss": 0.008, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"grad_norm": 0.1813508652414028, |
|
"learning_rate": 6.35314399700065e-05, |
|
"loss": 0.0046, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"grad_norm": 0.17007027062456834, |
|
"learning_rate": 6.33021208350328e-05, |
|
"loss": 0.0073, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"grad_norm": 0.19015476370218232, |
|
"learning_rate": 6.307302450259136e-05, |
|
"loss": 0.0106, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"grad_norm": 0.16589400476299693, |
|
"learning_rate": 6.284415236358653e-05, |
|
"loss": 0.0055, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"grad_norm": 0.16209318707015577, |
|
"learning_rate": 6.261550580756175e-05, |
|
"loss": 0.005, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"grad_norm": 0.18839393523384632, |
|
"learning_rate": 6.238708622269065e-05, |
|
"loss": 0.0089, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"grad_norm": 0.16926357657366423, |
|
"learning_rate": 6.215889499576898e-05, |
|
"loss": 0.0071, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"grad_norm": 0.1427288948192287, |
|
"learning_rate": 6.193093351220605e-05, |
|
"loss": 0.0071, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"grad_norm": 0.0823573599543454, |
|
"learning_rate": 6.170320315601628e-05, |
|
"loss": 0.0028, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"grad_norm": 0.17793420603102786, |
|
"learning_rate": 6.147570530981099e-05, |
|
"loss": 0.0073, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"grad_norm": 0.2613656409512038, |
|
"learning_rate": 6.12484413547897e-05, |
|
"loss": 0.0099, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"grad_norm": 0.10748497227166323, |
|
"learning_rate": 6.102141267073207e-05, |
|
"loss": 0.0056, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"grad_norm": 0.3115342005600293, |
|
"learning_rate": 6.0794620635989244e-05, |
|
"loss": 0.0202, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"grad_norm": 0.17604863819079872, |
|
"learning_rate": 6.056806662747572e-05, |
|
"loss": 0.0058, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"grad_norm": 0.09017231755285983, |
|
"learning_rate": 6.034175202066077e-05, |
|
"loss": 0.0031, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"grad_norm": 0.15807640643888599, |
|
"learning_rate": 6.011567818956021e-05, |
|
"loss": 0.0083, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"grad_norm": 0.1322797473736452, |
|
"learning_rate": 5.988984650672813e-05, |
|
"loss": 0.0054, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"grad_norm": 0.1646419558157819, |
|
"learning_rate": 5.96642583432484e-05, |
|
"loss": 0.0088, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"grad_norm": 0.15547257435043996, |
|
"learning_rate": 5.943891506872645e-05, |
|
"loss": 0.0074, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"grad_norm": 0.19268286944193538, |
|
"learning_rate": 5.921381805128088e-05, |
|
"loss": 0.0125, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"grad_norm": 0.16001416685280997, |
|
"learning_rate": 5.898896865753522e-05, |
|
"loss": 0.0102, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"grad_norm": 0.14560946392731133, |
|
"learning_rate": 5.876436825260967e-05, |
|
"loss": 0.0058, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"grad_norm": 0.10597553502655095, |
|
"learning_rate": 5.854001820011265e-05, |
|
"loss": 0.0042, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"grad_norm": 0.1477597061504793, |
|
"learning_rate": 5.831591986213274e-05, |
|
"loss": 0.0115, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"grad_norm": 0.11589346222710697, |
|
"learning_rate": 5.809207459923016e-05, |
|
"loss": 0.0052, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"grad_norm": 0.11233318637657551, |
|
"learning_rate": 5.786848377042875e-05, |
|
"loss": 0.0047, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"grad_norm": 0.14038827344355653, |
|
"learning_rate": 5.764514873320761e-05, |
|
"loss": 0.0056, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"grad_norm": 0.13670003396305133, |
|
"learning_rate": 5.7422070843492734e-05, |
|
"loss": 0.007, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"grad_norm": 0.10138719221994133, |
|
"learning_rate": 5.719925145564913e-05, |
|
"loss": 0.005, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"grad_norm": 0.10712806624372671, |
|
"learning_rate": 5.697669192247215e-05, |
|
"loss": 0.0032, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"grad_norm": 0.2867594004405392, |
|
"learning_rate": 5.675439359517962e-05, |
|
"loss": 0.007, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"grad_norm": 0.15537957888603157, |
|
"learning_rate": 5.6532357823403517e-05, |
|
"loss": 0.0059, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"grad_norm": 0.13908151142988748, |
|
"learning_rate": 5.63105859551817e-05, |
|
"loss": 0.0055, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"grad_norm": 0.1670760871245682, |
|
"learning_rate": 5.608907933694994e-05, |
|
"loss": 0.007, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"grad_norm": 0.15128774900728617, |
|
"learning_rate": 5.586783931353338e-05, |
|
"loss": 0.0071, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"grad_norm": 0.118351563993401, |
|
"learning_rate": 5.56468672281388e-05, |
|
"loss": 0.0052, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"grad_norm": 0.26632088069900783, |
|
"learning_rate": 5.542616442234618e-05, |
|
"loss": 0.0096, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"grad_norm": 0.15185019444874828, |
|
"learning_rate": 5.5205732236100635e-05, |
|
"loss": 0.0065, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"grad_norm": 0.1448611480489451, |
|
"learning_rate": 5.498557200770429e-05, |
|
"loss": 0.0065, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"grad_norm": 0.16431150027248104, |
|
"learning_rate": 5.476568507380815e-05, |
|
"loss": 0.0066, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"grad_norm": 0.11908895770673851, |
|
"learning_rate": 5.454607276940389e-05, |
|
"loss": 0.0032, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"grad_norm": 0.1443718220854692, |
|
"learning_rate": 5.4326736427815946e-05, |
|
"loss": 0.0053, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"grad_norm": 0.14871442319667327, |
|
"learning_rate": 5.410767738069328e-05, |
|
"loss": 0.0059, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"grad_norm": 0.18082767972242653, |
|
"learning_rate": 5.388889695800129e-05, |
|
"loss": 0.0068, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"grad_norm": 0.1648249037129662, |
|
"learning_rate": 5.3670396488013854e-05, |
|
"loss": 0.0067, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"grad_norm": 0.12517423729992644, |
|
"learning_rate": 5.345217729730501e-05, |
|
"loss": 0.0048, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"grad_norm": 0.1464921521793365, |
|
"learning_rate": 5.3234240710741337e-05, |
|
"loss": 0.0068, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"grad_norm": 0.17291303731043475, |
|
"learning_rate": 5.301658805147338e-05, |
|
"loss": 0.0096, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"grad_norm": 0.1280298983910598, |
|
"learning_rate": 5.279922064092808e-05, |
|
"loss": 0.0034, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"grad_norm": 0.12917298155423965, |
|
"learning_rate": 5.25821397988005e-05, |
|
"loss": 0.0043, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"grad_norm": 0.16991548955604624, |
|
"learning_rate": 5.236534684304575e-05, |
|
"loss": 0.0036, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"grad_norm": 0.1341181151945374, |
|
"learning_rate": 5.214884308987136e-05, |
|
"loss": 0.0046, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"grad_norm": 0.07618404508173873, |
|
"learning_rate": 5.193262985372879e-05, |
|
"loss": 0.0027, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"grad_norm": 0.08880977813478996, |
|
"learning_rate": 5.171670844730581e-05, |
|
"loss": 0.0036, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"grad_norm": 0.13216208460528805, |
|
"learning_rate": 5.150108018151845e-05, |
|
"loss": 0.0042, |
|
"step": 883 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"grad_norm": 0.11790812970039133, |
|
"learning_rate": 5.128574636550283e-05, |
|
"loss": 0.0035, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"grad_norm": 0.11136507705857467, |
|
"learning_rate": 5.107070830660765e-05, |
|
"loss": 0.0032, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"grad_norm": 0.1625804231101919, |
|
"learning_rate": 5.0855967310385776e-05, |
|
"loss": 0.0035, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"grad_norm": 0.16854543758219576, |
|
"learning_rate": 5.064152468058661e-05, |
|
"loss": 0.013, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"grad_norm": 0.1904583816627798, |
|
"learning_rate": 5.0427381719148115e-05, |
|
"loss": 0.0058, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"grad_norm": 0.1407250892066329, |
|
"learning_rate": 5.021353972618877e-05, |
|
"loss": 0.0048, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"grad_norm": 0.09332762885645429, |
|
"learning_rate": 5.000000000000002e-05, |
|
"loss": 0.0033, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"grad_norm": 0.1282598573789377, |
|
"learning_rate": 4.978676383703792e-05, |
|
"loss": 0.0051, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"grad_norm": 0.12527309494209085, |
|
"learning_rate": 4.957383253191567e-05, |
|
"loss": 0.0039, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"grad_norm": 0.1474717370503847, |
|
"learning_rate": 4.9361207377395526e-05, |
|
"loss": 0.0056, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"grad_norm": 0.17216191154710148, |
|
"learning_rate": 4.914888966438107e-05, |
|
"loss": 0.0061, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"grad_norm": 0.11969436074946377, |
|
"learning_rate": 4.893688068190932e-05, |
|
"loss": 0.0045, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"grad_norm": 0.16780413116289464, |
|
"learning_rate": 4.872518171714285e-05, |
|
"loss": 0.0067, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"grad_norm": 0.09403083909006464, |
|
"learning_rate": 4.8513794055362094e-05, |
|
"loss": 0.0033, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"grad_norm": 0.09334786241295161, |
|
"learning_rate": 4.8302718979957465e-05, |
|
"loss": 0.0025, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"grad_norm": 0.12070714502867205, |
|
"learning_rate": 4.809195777242157e-05, |
|
"loss": 0.0044, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"grad_norm": 0.0724744178923367, |
|
"learning_rate": 4.7881511712341484e-05, |
|
"loss": 0.0029, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"eval_blimp_filtered_avg": 0.7164179104477612, |
|
"eval_blimp_filtered_std": 0.0050468818783488715, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"eval_blimp_supplement_avg": 0.8318965517241379, |
|
"eval_blimp_supplement_std": 0.016663496994065188, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"eval_vqa_filtered_avg": 0.48, |
|
"eval_vqa_filtered_std": 0.05021167315686779, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"eval_winoground_filtered_avg": 0.68, |
|
"eval_winoground_filtered_std": 0.046882617226215034, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"grad_norm": 0.12264611037048552, |
|
"learning_rate": 4.7671382077390923e-05, |
|
"loss": 0.0049, |
|
"step": 901 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"grad_norm": 0.1447159721309767, |
|
"learning_rate": 4.746157014332242e-05, |
|
"loss": 0.0044, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"grad_norm": 0.16375949938502024, |
|
"learning_rate": 4.7252077183959766e-05, |
|
"loss": 0.0065, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"grad_norm": 0.10516324597971437, |
|
"learning_rate": 4.704290447119013e-05, |
|
"loss": 0.0043, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"grad_norm": 0.08499567671503878, |
|
"learning_rate": 4.683405327495638e-05, |
|
"loss": 0.0033, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"grad_norm": 0.24460270399320144, |
|
"learning_rate": 4.6625524863249435e-05, |
|
"loss": 0.0073, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"grad_norm": 0.12925387617205644, |
|
"learning_rate": 4.6417320502100316e-05, |
|
"loss": 0.0044, |
|
"step": 907 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"grad_norm": 0.09078563973748639, |
|
"learning_rate": 4.6209441455572934e-05, |
|
"loss": 0.0024, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"grad_norm": 0.15653536427369144, |
|
"learning_rate": 4.600188898575585e-05, |
|
"loss": 0.0069, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"grad_norm": 0.2617481572374967, |
|
"learning_rate": 4.5794664352755055e-05, |
|
"loss": 0.0078, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"grad_norm": 0.16686255253211194, |
|
"learning_rate": 4.558776881468616e-05, |
|
"loss": 0.0063, |
|
"step": 911 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"grad_norm": 0.14235218284923637, |
|
"learning_rate": 4.538120362766659e-05, |
|
"loss": 0.0043, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"grad_norm": 0.2061352593588118, |
|
"learning_rate": 4.5174970045808373e-05, |
|
"loss": 0.0083, |
|
"step": 913 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"grad_norm": 0.09141171412747422, |
|
"learning_rate": 4.496906932121006e-05, |
|
"loss": 0.0038, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"grad_norm": 0.14581164354276802, |
|
"learning_rate": 4.476350270394942e-05, |
|
"loss": 0.0054, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"grad_norm": 0.1909792639108279, |
|
"learning_rate": 4.4558271442075817e-05, |
|
"loss": 0.0058, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"grad_norm": 0.16782053974713387, |
|
"learning_rate": 4.435337678160244e-05, |
|
"loss": 0.0053, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"grad_norm": 0.1679302657777338, |
|
"learning_rate": 4.414881996649909e-05, |
|
"loss": 0.006, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"grad_norm": 0.11555008314937329, |
|
"learning_rate": 4.394460223868422e-05, |
|
"loss": 0.0038, |
|
"step": 919 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"grad_norm": 0.13987569584452733, |
|
"learning_rate": 4.374072483801769e-05, |
|
"loss": 0.0057, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"grad_norm": 0.15472739669003643, |
|
"learning_rate": 4.353718900229315e-05, |
|
"loss": 0.0047, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"grad_norm": 0.11860213300146173, |
|
"learning_rate": 4.333399596723054e-05, |
|
"loss": 0.0041, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"grad_norm": 0.12771536456303176, |
|
"learning_rate": 4.313114696646844e-05, |
|
"loss": 0.0051, |
|
"step": 923 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"grad_norm": 0.1786915439582824, |
|
"learning_rate": 4.2928643231556844e-05, |
|
"loss": 0.0059, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"grad_norm": 0.24120215565132727, |
|
"learning_rate": 4.272648599194948e-05, |
|
"loss": 0.0046, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"grad_norm": 0.14109245430082046, |
|
"learning_rate": 4.2524676474996436e-05, |
|
"loss": 0.0045, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"grad_norm": 0.08773485394717533, |
|
"learning_rate": 4.232321590593672e-05, |
|
"loss": 0.0026, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"grad_norm": 0.11777262879064121, |
|
"learning_rate": 4.212210550789066e-05, |
|
"loss": 0.0039, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"grad_norm": 0.15303150859624717, |
|
"learning_rate": 4.192134650185271e-05, |
|
"loss": 0.0049, |
|
"step": 929 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"grad_norm": 0.17847021642871766, |
|
"learning_rate": 4.172094010668391e-05, |
|
"loss": 0.0122, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"grad_norm": 0.12872303754581352, |
|
"learning_rate": 4.1520887539104516e-05, |
|
"loss": 0.0034, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"grad_norm": 0.09422413881251451, |
|
"learning_rate": 4.132119001368658e-05, |
|
"loss": 0.0032, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"grad_norm": 0.11765015102315801, |
|
"learning_rate": 4.112184874284655e-05, |
|
"loss": 0.004, |
|
"step": 933 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"grad_norm": 0.1816646133957582, |
|
"learning_rate": 4.092286493683812e-05, |
|
"loss": 0.0054, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"grad_norm": 0.08821639897774167, |
|
"learning_rate": 4.072423980374452e-05, |
|
"loss": 0.0027, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"grad_norm": 0.109204348627212, |
|
"learning_rate": 4.052597454947151e-05, |
|
"loss": 0.0037, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"grad_norm": 0.07762991091541192, |
|
"learning_rate": 4.0328070377739936e-05, |
|
"loss": 0.0048, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"grad_norm": 0.12303959337300706, |
|
"learning_rate": 4.0130528490078255e-05, |
|
"loss": 0.0054, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"grad_norm": 0.14938977746099588, |
|
"learning_rate": 3.993335008581569e-05, |
|
"loss": 0.0054, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"grad_norm": 0.10793183299648912, |
|
"learning_rate": 3.973653636207437e-05, |
|
"loss": 0.0051, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"grad_norm": 0.07679917764318481, |
|
"learning_rate": 3.954008851376252e-05, |
|
"loss": 0.0021, |
|
"step": 941 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"grad_norm": 0.20216658828207906, |
|
"learning_rate": 3.934400773356702e-05, |
|
"loss": 0.0082, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"grad_norm": 0.17081936190481015, |
|
"learning_rate": 3.914829521194606e-05, |
|
"loss": 0.0047, |
|
"step": 943 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"grad_norm": 0.1681844318722247, |
|
"learning_rate": 3.895295213712227e-05, |
|
"loss": 0.0053, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"grad_norm": 0.11660462512191513, |
|
"learning_rate": 3.875797969507502e-05, |
|
"loss": 0.0068, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"grad_norm": 0.10899837547854943, |
|
"learning_rate": 3.8563379069533626e-05, |
|
"loss": 0.004, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"grad_norm": 0.11737468807394794, |
|
"learning_rate": 3.836915144196995e-05, |
|
"loss": 0.0043, |
|
"step": 947 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"grad_norm": 0.17750707113239161, |
|
"learning_rate": 3.8175297991591316e-05, |
|
"loss": 0.0059, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"grad_norm": 0.08434750541376605, |
|
"learning_rate": 3.7981819895333336e-05, |
|
"loss": 0.0028, |
|
"step": 949 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"grad_norm": 0.08977864719318272, |
|
"learning_rate": 3.778871832785262e-05, |
|
"loss": 0.0025, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"grad_norm": 0.26117422627907305, |
|
"learning_rate": 3.759599446151994e-05, |
|
"loss": 0.0058, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"grad_norm": 0.1423125205304331, |
|
"learning_rate": 3.740364946641284e-05, |
|
"loss": 0.005, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"grad_norm": 0.048844504093019145, |
|
"learning_rate": 3.721168451030868e-05, |
|
"loss": 0.0013, |
|
"step": 953 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"grad_norm": 0.12750445172306626, |
|
"learning_rate": 3.702010075867748e-05, |
|
"loss": 0.0039, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"grad_norm": 0.1363488868484316, |
|
"learning_rate": 3.682889937467493e-05, |
|
"loss": 0.0054, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"grad_norm": 0.10861963297037971, |
|
"learning_rate": 3.6638081519135115e-05, |
|
"loss": 0.0025, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"grad_norm": 0.07423146974751119, |
|
"learning_rate": 3.6447648350563767e-05, |
|
"loss": 0.002, |
|
"step": 957 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"grad_norm": 0.1473106789075466, |
|
"learning_rate": 3.6257601025131026e-05, |
|
"loss": 0.0048, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"grad_norm": 0.20756086874083257, |
|
"learning_rate": 3.6067940696664484e-05, |
|
"loss": 0.0044, |
|
"step": 959 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"grad_norm": 0.12432385236166127, |
|
"learning_rate": 3.587866851664219e-05, |
|
"loss": 0.0042, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"grad_norm": 0.1555267425126639, |
|
"learning_rate": 3.568978563418551e-05, |
|
"loss": 0.004, |
|
"step": 961 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"grad_norm": 0.10864709636610591, |
|
"learning_rate": 3.5501293196052544e-05, |
|
"loss": 0.003, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"grad_norm": 0.10844528424446415, |
|
"learning_rate": 3.531319234663063e-05, |
|
"loss": 0.003, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"grad_norm": 0.18756992146351528, |
|
"learning_rate": 3.512548422792983e-05, |
|
"loss": 0.0039, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"grad_norm": 0.16307530028534215, |
|
"learning_rate": 3.493816997957582e-05, |
|
"loss": 0.0036, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"grad_norm": 0.21835831614587714, |
|
"learning_rate": 3.4751250738802835e-05, |
|
"loss": 0.0057, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"grad_norm": 0.12509619886030035, |
|
"learning_rate": 3.456472764044718e-05, |
|
"loss": 0.0037, |
|
"step": 967 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"grad_norm": 0.1878620997227208, |
|
"learning_rate": 3.4378601816939824e-05, |
|
"loss": 0.0044, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"grad_norm": 0.20067996408678637, |
|
"learning_rate": 3.4192874398299915e-05, |
|
"loss": 0.0074, |
|
"step": 969 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"grad_norm": 0.09941229298151419, |
|
"learning_rate": 3.400754651212776e-05, |
|
"loss": 0.0038, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"grad_norm": 0.13212089570245356, |
|
"learning_rate": 3.382261928359791e-05, |
|
"loss": 0.0047, |
|
"step": 971 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"grad_norm": 0.08933394980267556, |
|
"learning_rate": 3.36380938354526e-05, |
|
"loss": 0.0039, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"grad_norm": 0.16729919593124423, |
|
"learning_rate": 3.3453971287994545e-05, |
|
"loss": 0.0079, |
|
"step": 973 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"grad_norm": 0.15697707244755352, |
|
"learning_rate": 3.3270252759080476e-05, |
|
"loss": 0.0052, |
|
"step": 974 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"grad_norm": 0.09194443755715154, |
|
"learning_rate": 3.308693936411421e-05, |
|
"loss": 0.0024, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"grad_norm": 0.04543214336381121, |
|
"learning_rate": 3.290403221603976e-05, |
|
"loss": 0.0014, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"grad_norm": 0.13598936920247842, |
|
"learning_rate": 3.2721532425334934e-05, |
|
"loss": 0.003, |
|
"step": 977 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"grad_norm": 0.12519394602390674, |
|
"learning_rate": 3.253944110000415e-05, |
|
"loss": 0.0044, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"grad_norm": 0.10159326068686843, |
|
"learning_rate": 3.235775934557204e-05, |
|
"loss": 0.0037, |
|
"step": 979 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"grad_norm": 0.09206667100999626, |
|
"learning_rate": 3.2176488265076596e-05, |
|
"loss": 0.0023, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"grad_norm": 0.13180534984976824, |
|
"learning_rate": 3.199562895906252e-05, |
|
"loss": 0.0059, |
|
"step": 981 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"grad_norm": 0.1164365775293987, |
|
"learning_rate": 3.1815182525574495e-05, |
|
"loss": 0.0039, |
|
"step": 982 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"grad_norm": 0.09037223834103687, |
|
"learning_rate": 3.163515006015052e-05, |
|
"loss": 0.0031, |
|
"step": 983 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"grad_norm": 0.1243136017465312, |
|
"learning_rate": 3.1455532655815346e-05, |
|
"loss": 0.0023, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"grad_norm": 0.06736081989652704, |
|
"learning_rate": 3.1276331403073735e-05, |
|
"loss": 0.0018, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"grad_norm": 0.17307715618033684, |
|
"learning_rate": 3.10975473899039e-05, |
|
"loss": 0.005, |
|
"step": 986 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"grad_norm": 0.06048816505854376, |
|
"learning_rate": 3.09191817017509e-05, |
|
"loss": 0.0021, |
|
"step": 987 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"grad_norm": 0.12504484352987397, |
|
"learning_rate": 3.074123542152001e-05, |
|
"loss": 0.0032, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"grad_norm": 0.06833118623733686, |
|
"learning_rate": 3.056370962957014e-05, |
|
"loss": 0.0021, |
|
"step": 989 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"grad_norm": 0.1108735830960286, |
|
"learning_rate": 3.0386605403707346e-05, |
|
"loss": 0.0034, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"grad_norm": 0.09939702792131645, |
|
"learning_rate": 3.020992381917823e-05, |
|
"loss": 0.0029, |
|
"step": 991 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"grad_norm": 0.0991719206432376, |
|
"learning_rate": 3.0033665948663448e-05, |
|
"loss": 0.0033, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"grad_norm": 0.12662565643384302, |
|
"learning_rate": 2.9857832862271183e-05, |
|
"loss": 0.004, |
|
"step": 993 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"grad_norm": 0.20781158845550377, |
|
"learning_rate": 2.968242562753051e-05, |
|
"loss": 0.0058, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"grad_norm": 0.14694222050004627, |
|
"learning_rate": 2.9507445309385294e-05, |
|
"loss": 0.0115, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"grad_norm": 0.10808293020495671, |
|
"learning_rate": 2.9332892970187255e-05, |
|
"loss": 0.0043, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"grad_norm": 0.12048492965543693, |
|
"learning_rate": 2.915876966968978e-05, |
|
"loss": 0.0029, |
|
"step": 997 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"grad_norm": 0.09225586697033628, |
|
"learning_rate": 2.8985076465041582e-05, |
|
"loss": 0.0023, |
|
"step": 998 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"grad_norm": 0.16415643694268406, |
|
"learning_rate": 2.8811814410779957e-05, |
|
"loss": 0.0055, |
|
"step": 999 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"grad_norm": 0.11278583943799678, |
|
"learning_rate": 2.8638984558824777e-05, |
|
"loss": 0.0039, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"eval_blimp_filtered_avg": 0.716865671641791, |
|
"eval_blimp_filtered_std": 0.00502778551816782, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"eval_blimp_supplement_avg": 0.8362068965517241, |
|
"eval_blimp_supplement_std": 0.016430816592740906, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"eval_vqa_filtered_avg": 0.48, |
|
"eval_vqa_filtered_std": 0.05021167315686779, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"eval_winoground_filtered_avg": 0.7, |
|
"eval_winoground_filtered_std": 0.046056618647183814, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"grad_norm": 0.19790160401886048, |
|
"learning_rate": 2.8466587958471713e-05, |
|
"loss": 0.0079, |
|
"step": 1001 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"grad_norm": 0.15241865463221207, |
|
"learning_rate": 2.8294625656386153e-05, |
|
"loss": 0.0033, |
|
"step": 1002 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"grad_norm": 0.15486582212720096, |
|
"learning_rate": 2.812309869659675e-05, |
|
"loss": 0.0049, |
|
"step": 1003 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"grad_norm": 0.1403453911129104, |
|
"learning_rate": 2.7952008120489005e-05, |
|
"loss": 0.0035, |
|
"step": 1004 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"grad_norm": 0.2066076788884496, |
|
"learning_rate": 2.7781354966799078e-05, |
|
"loss": 0.0069, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"grad_norm": 0.14517188108322096, |
|
"learning_rate": 2.7611140271607417e-05, |
|
"loss": 0.0038, |
|
"step": 1006 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"grad_norm": 0.19311241295438267, |
|
"learning_rate": 2.744136506833247e-05, |
|
"loss": 0.0046, |
|
"step": 1007 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"grad_norm": 0.15261908108180047, |
|
"learning_rate": 2.7272030387724423e-05, |
|
"loss": 0.0051, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"grad_norm": 0.148487491757181, |
|
"learning_rate": 2.7103137257858868e-05, |
|
"loss": 0.0047, |
|
"step": 1009 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"grad_norm": 0.2545086755837555, |
|
"learning_rate": 2.6934686704130696e-05, |
|
"loss": 0.0078, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"grad_norm": 0.2155734204394312, |
|
"learning_rate": 2.6766679749247793e-05, |
|
"loss": 0.0053, |
|
"step": 1011 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"grad_norm": 0.11633424479852102, |
|
"learning_rate": 2.6599117413224817e-05, |
|
"loss": 0.0026, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"grad_norm": 0.16185230460669028, |
|
"learning_rate": 2.6432000713377027e-05, |
|
"loss": 0.006, |
|
"step": 1013 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"grad_norm": 0.06116976817258535, |
|
"learning_rate": 2.6265330664314157e-05, |
|
"loss": 0.002, |
|
"step": 1014 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"grad_norm": 0.1467173492208502, |
|
"learning_rate": 2.6099108277934103e-05, |
|
"loss": 0.0045, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"grad_norm": 0.06626573294126052, |
|
"learning_rate": 2.5933334563416976e-05, |
|
"loss": 0.002, |
|
"step": 1016 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"grad_norm": 0.12063079499322236, |
|
"learning_rate": 2.5768010527218845e-05, |
|
"loss": 0.0027, |
|
"step": 1017 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"grad_norm": 0.10686596344627741, |
|
"learning_rate": 2.5603137173065674e-05, |
|
"loss": 0.0043, |
|
"step": 1018 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"grad_norm": 0.11272599358320497, |
|
"learning_rate": 2.543871550194723e-05, |
|
"loss": 0.0027, |
|
"step": 1019 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"grad_norm": 0.1718157414634651, |
|
"learning_rate": 2.527474651211089e-05, |
|
"loss": 0.0041, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"grad_norm": 0.11357917775880114, |
|
"learning_rate": 2.5111231199055896e-05, |
|
"loss": 0.0038, |
|
"step": 1021 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"grad_norm": 0.07702764510610083, |
|
"learning_rate": 2.494817055552686e-05, |
|
"loss": 0.0025, |
|
"step": 1022 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"grad_norm": 0.1682497107538642, |
|
"learning_rate": 2.4785565571508118e-05, |
|
"loss": 0.007, |
|
"step": 1023 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"grad_norm": 0.06959209717037654, |
|
"learning_rate": 2.462341723421758e-05, |
|
"loss": 0.0021, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"grad_norm": 0.1216172792972272, |
|
"learning_rate": 2.4461726528100615e-05, |
|
"loss": 0.0034, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"grad_norm": 0.1513509657366934, |
|
"learning_rate": 2.4300494434824373e-05, |
|
"loss": 0.004, |
|
"step": 1026 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"grad_norm": 0.0865648646398974, |
|
"learning_rate": 2.4139721933271465e-05, |
|
"loss": 0.0024, |
|
"step": 1027 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"grad_norm": 0.08556831061548267, |
|
"learning_rate": 2.3979409999534298e-05, |
|
"loss": 0.0023, |
|
"step": 1028 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"grad_norm": 0.1641906844837052, |
|
"learning_rate": 2.381955960690906e-05, |
|
"loss": 0.0075, |
|
"step": 1029 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"grad_norm": 0.11390717886890653, |
|
"learning_rate": 2.36601717258897e-05, |
|
"loss": 0.003, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"grad_norm": 0.11731050753260445, |
|
"learning_rate": 2.35012473241623e-05, |
|
"loss": 0.0028, |
|
"step": 1031 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"grad_norm": 0.16970688657951408, |
|
"learning_rate": 2.3342787366598872e-05, |
|
"loss": 0.0043, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"grad_norm": 0.11089863057017167, |
|
"learning_rate": 2.3184792815251766e-05, |
|
"loss": 0.0032, |
|
"step": 1033 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"grad_norm": 0.046462119059264825, |
|
"learning_rate": 2.302726462934769e-05, |
|
"loss": 0.0018, |
|
"step": 1034 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"grad_norm": 0.11684974119182631, |
|
"learning_rate": 2.2870203765281926e-05, |
|
"loss": 0.0027, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"grad_norm": 0.16444221243072213, |
|
"learning_rate": 2.2713611176612582e-05, |
|
"loss": 0.0046, |
|
"step": 1036 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"grad_norm": 0.1818375194889315, |
|
"learning_rate": 2.25574878140546e-05, |
|
"loss": 0.0051, |
|
"step": 1037 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"grad_norm": 0.11786328596160862, |
|
"learning_rate": 2.240183462547427e-05, |
|
"loss": 0.0024, |
|
"step": 1038 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"grad_norm": 0.08316727437427408, |
|
"learning_rate": 2.224665255588325e-05, |
|
"loss": 0.0031, |
|
"step": 1039 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"grad_norm": 0.07270479819118501, |
|
"learning_rate": 2.2091942547432955e-05, |
|
"loss": 0.0027, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"grad_norm": 0.05459676934354277, |
|
"learning_rate": 2.193770553940876e-05, |
|
"loss": 0.0016, |
|
"step": 1041 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"grad_norm": 0.11956646260392087, |
|
"learning_rate": 2.1783942468224382e-05, |
|
"loss": 0.0018, |
|
"step": 1042 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"grad_norm": 0.08747129822087717, |
|
"learning_rate": 2.163065426741603e-05, |
|
"loss": 0.0025, |
|
"step": 1043 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"grad_norm": 0.1777877170154106, |
|
"learning_rate": 2.147784186763696e-05, |
|
"loss": 0.0043, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"grad_norm": 0.08218483908984636, |
|
"learning_rate": 2.132550619665168e-05, |
|
"loss": 0.0018, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"grad_norm": 0.13582850245610714, |
|
"learning_rate": 2.117364817933033e-05, |
|
"loss": 0.0047, |
|
"step": 1046 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"grad_norm": 0.3137890453832542, |
|
"learning_rate": 2.1022268737643138e-05, |
|
"loss": 0.0065, |
|
"step": 1047 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"grad_norm": 0.14756824814987923, |
|
"learning_rate": 2.08713687906547e-05, |
|
"loss": 0.0046, |
|
"step": 1048 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"grad_norm": 0.12543766199519707, |
|
"learning_rate": 2.0720949254518517e-05, |
|
"loss": 0.0033, |
|
"step": 1049 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"grad_norm": 0.1272943584540691, |
|
"learning_rate": 2.05710110424714e-05, |
|
"loss": 0.0046, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 0.18454830615921256, |
|
"learning_rate": 2.0421555064827878e-05, |
|
"loss": 0.0072, |
|
"step": 1051 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 0.1537809820642007, |
|
"learning_rate": 2.0272582228974792e-05, |
|
"loss": 0.0043, |
|
"step": 1052 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 0.05312631536933881, |
|
"learning_rate": 2.012409343936551e-05, |
|
"loss": 0.0012, |
|
"step": 1053 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"grad_norm": 0.04447185122963781, |
|
"learning_rate": 1.9976089597514903e-05, |
|
"loss": 0.0012, |
|
"step": 1054 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"grad_norm": 0.026337967004294745, |
|
"learning_rate": 1.982857160199334e-05, |
|
"loss": 0.0007, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"grad_norm": 0.026301866799899334, |
|
"learning_rate": 1.9681540348421623e-05, |
|
"loss": 0.001, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"grad_norm": 0.05436305337809422, |
|
"learning_rate": 1.9534996729465426e-05, |
|
"loss": 0.0014, |
|
"step": 1057 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"grad_norm": 0.05631442649400675, |
|
"learning_rate": 1.938894163482974e-05, |
|
"loss": 0.0015, |
|
"step": 1058 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"grad_norm": 0.10283079607735844, |
|
"learning_rate": 1.9243375951253796e-05, |
|
"loss": 0.0023, |
|
"step": 1059 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"grad_norm": 0.050174636822106565, |
|
"learning_rate": 1.9098300562505266e-05, |
|
"loss": 0.0011, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"grad_norm": 0.018160762498011917, |
|
"learning_rate": 1.895371634937525e-05, |
|
"loss": 0.0008, |
|
"step": 1061 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"grad_norm": 0.053625104843317006, |
|
"learning_rate": 1.880962418967279e-05, |
|
"loss": 0.0017, |
|
"step": 1062 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"grad_norm": 0.0367115996959884, |
|
"learning_rate": 1.8666024958219408e-05, |
|
"loss": 0.0013, |
|
"step": 1063 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"grad_norm": 0.01705265689595668, |
|
"learning_rate": 1.852291952684414e-05, |
|
"loss": 0.0006, |
|
"step": 1064 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"grad_norm": 0.029645947416295275, |
|
"learning_rate": 1.8380308764377842e-05, |
|
"loss": 0.001, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"grad_norm": 0.02616977130242847, |
|
"learning_rate": 1.8238193536648195e-05, |
|
"loss": 0.001, |
|
"step": 1066 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"grad_norm": 0.029725522847751006, |
|
"learning_rate": 1.8096574706474333e-05, |
|
"loss": 0.0011, |
|
"step": 1067 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"grad_norm": 0.046999003971840925, |
|
"learning_rate": 1.795545313366166e-05, |
|
"loss": 0.0016, |
|
"step": 1068 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"grad_norm": 0.0639334214169769, |
|
"learning_rate": 1.7814829674996592e-05, |
|
"loss": 0.002, |
|
"step": 1069 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"grad_norm": 0.0367804832140112, |
|
"learning_rate": 1.767470518424129e-05, |
|
"loss": 0.0011, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"grad_norm": 0.029348342947469082, |
|
"learning_rate": 1.7535080512128632e-05, |
|
"loss": 0.0008, |
|
"step": 1071 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"grad_norm": 0.0466731334600783, |
|
"learning_rate": 1.7395956506356937e-05, |
|
"loss": 0.0015, |
|
"step": 1072 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"grad_norm": 0.027508272565636798, |
|
"learning_rate": 1.7257334011584847e-05, |
|
"loss": 0.0009, |
|
"step": 1073 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"grad_norm": 0.035628751171613196, |
|
"learning_rate": 1.7119213869426197e-05, |
|
"loss": 0.001, |
|
"step": 1074 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"grad_norm": 0.06971830663806321, |
|
"learning_rate": 1.6981596918444953e-05, |
|
"loss": 0.0012, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"grad_norm": 0.021694267386634027, |
|
"learning_rate": 1.684448399414994e-05, |
|
"loss": 0.0009, |
|
"step": 1076 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"grad_norm": 0.02007528840430175, |
|
"learning_rate": 1.6707875928990058e-05, |
|
"loss": 0.0008, |
|
"step": 1077 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"grad_norm": 0.04559222251809475, |
|
"learning_rate": 1.6571773552349e-05, |
|
"loss": 0.0011, |
|
"step": 1078 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"grad_norm": 0.03837022867264384, |
|
"learning_rate": 1.6436177690540243e-05, |
|
"loss": 0.0013, |
|
"step": 1079 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"grad_norm": 0.0596087088326944, |
|
"learning_rate": 1.630108916680223e-05, |
|
"loss": 0.0017, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"grad_norm": 0.017751318308507253, |
|
"learning_rate": 1.6166508801293013e-05, |
|
"loss": 0.0006, |
|
"step": 1081 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"grad_norm": 0.026619692717408015, |
|
"learning_rate": 1.603243741108571e-05, |
|
"loss": 0.0013, |
|
"step": 1082 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"grad_norm": 0.020963517608948525, |
|
"learning_rate": 1.5898875810163137e-05, |
|
"loss": 0.0007, |
|
"step": 1083 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"grad_norm": 0.027136972648439773, |
|
"learning_rate": 1.5765824809413056e-05, |
|
"loss": 0.0009, |
|
"step": 1084 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"grad_norm": 0.05016619274243327, |
|
"learning_rate": 1.5633285216623385e-05, |
|
"loss": 0.0013, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"grad_norm": 0.029039341510529837, |
|
"learning_rate": 1.5501257836476978e-05, |
|
"loss": 0.0009, |
|
"step": 1086 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"grad_norm": 0.07303975658197855, |
|
"learning_rate": 1.5369743470547027e-05, |
|
"loss": 0.0019, |
|
"step": 1087 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"grad_norm": 0.03350400586584435, |
|
"learning_rate": 1.5238742917292015e-05, |
|
"loss": 0.0009, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"grad_norm": 0.08029935115169112, |
|
"learning_rate": 1.5108256972050972e-05, |
|
"loss": 0.0019, |
|
"step": 1089 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"grad_norm": 0.04476861542864655, |
|
"learning_rate": 1.4978286427038601e-05, |
|
"loss": 0.0014, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"grad_norm": 0.0692481373749192, |
|
"learning_rate": 1.4848832071340423e-05, |
|
"loss": 0.0014, |
|
"step": 1091 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"grad_norm": 0.0459173556561311, |
|
"learning_rate": 1.4719894690908098e-05, |
|
"loss": 0.0013, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"grad_norm": 0.05681694784227542, |
|
"learning_rate": 1.4591475068554572e-05, |
|
"loss": 0.0017, |
|
"step": 1093 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"grad_norm": 0.03490009549853983, |
|
"learning_rate": 1.4463573983949341e-05, |
|
"loss": 0.001, |
|
"step": 1094 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"grad_norm": 0.08256537486908391, |
|
"learning_rate": 1.4336192213613742e-05, |
|
"loss": 0.0029, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"grad_norm": 0.03304929283596535, |
|
"learning_rate": 1.4209330530916165e-05, |
|
"loss": 0.0015, |
|
"step": 1096 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"grad_norm": 0.019413105962450914, |
|
"learning_rate": 1.4082989706067461e-05, |
|
"loss": 0.0008, |
|
"step": 1097 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"grad_norm": 0.06390145660785629, |
|
"learning_rate": 1.3957170506116201e-05, |
|
"loss": 0.0009, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"grad_norm": 0.017555521429492564, |
|
"learning_rate": 1.3831873694944031e-05, |
|
"loss": 0.0007, |
|
"step": 1099 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"grad_norm": 0.04810371136355367, |
|
"learning_rate": 1.3707100033261034e-05, |
|
"loss": 0.0014, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"eval_blimp_filtered_avg": 0.716865671641791, |
|
"eval_blimp_filtered_std": 0.005034673021350593, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"eval_blimp_supplement_avg": 0.834051724137931, |
|
"eval_blimp_supplement_std": 0.016524406240927558, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"eval_vqa_filtered_avg": 0.48, |
|
"eval_vqa_filtered_std": 0.05021167315686779, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"eval_winoground_filtered_avg": 0.68, |
|
"eval_winoground_filtered_std": 0.046882617226215034, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"grad_norm": 0.013464323565587763, |
|
"learning_rate": 1.3582850278601134e-05, |
|
"loss": 0.0006, |
|
"step": 1101 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"grad_norm": 0.029333816746967, |
|
"learning_rate": 1.3459125185317434e-05, |
|
"loss": 0.0009, |
|
"step": 1102 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"grad_norm": 0.019738111761638182, |
|
"learning_rate": 1.3335925504577717e-05, |
|
"loss": 0.0007, |
|
"step": 1103 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"grad_norm": 0.038558679072453785, |
|
"learning_rate": 1.3213251984359831e-05, |
|
"loss": 0.0009, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"grad_norm": 0.10728075730756652, |
|
"learning_rate": 1.3091105369447165e-05, |
|
"loss": 0.0014, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"grad_norm": 0.03939456309548406, |
|
"learning_rate": 1.2969486401424169e-05, |
|
"loss": 0.0009, |
|
"step": 1106 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"grad_norm": 0.02688561690752942, |
|
"learning_rate": 1.2848395818671687e-05, |
|
"loss": 0.0007, |
|
"step": 1107 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"grad_norm": 0.10118668815662003, |
|
"learning_rate": 1.2727834356362778e-05, |
|
"loss": 0.0035, |
|
"step": 1108 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"grad_norm": 0.029540871735792434, |
|
"learning_rate": 1.2607802746457897e-05, |
|
"loss": 0.0008, |
|
"step": 1109 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"grad_norm": 0.019004632066757458, |
|
"learning_rate": 1.2488301717700735e-05, |
|
"loss": 0.0008, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"grad_norm": 0.050506924718716324, |
|
"learning_rate": 1.2369331995613665e-05, |
|
"loss": 0.0015, |
|
"step": 1111 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"grad_norm": 0.02906757664336032, |
|
"learning_rate": 1.2250894302493265e-05, |
|
"loss": 0.0009, |
|
"step": 1112 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"grad_norm": 0.04633777117624858, |
|
"learning_rate": 1.21329893574062e-05, |
|
"loss": 0.0013, |
|
"step": 1113 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"grad_norm": 0.03520147966889168, |
|
"learning_rate": 1.2015617876184527e-05, |
|
"loss": 0.0007, |
|
"step": 1114 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"grad_norm": 0.02696880607305335, |
|
"learning_rate": 1.1898780571421552e-05, |
|
"loss": 0.0009, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"grad_norm": 0.028899261052257607, |
|
"learning_rate": 1.1782478152467502e-05, |
|
"loss": 0.0009, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"grad_norm": 0.025787842105068102, |
|
"learning_rate": 1.166671132542505e-05, |
|
"loss": 0.0007, |
|
"step": 1117 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"grad_norm": 0.023827728319133067, |
|
"learning_rate": 1.1551480793145331e-05, |
|
"loss": 0.0009, |
|
"step": 1118 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"grad_norm": 0.030668462671104377, |
|
"learning_rate": 1.1436787255223302e-05, |
|
"loss": 0.0007, |
|
"step": 1119 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"grad_norm": 0.02229439145398785, |
|
"learning_rate": 1.1322631407993811e-05, |
|
"loss": 0.0008, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"grad_norm": 0.08225114568135586, |
|
"learning_rate": 1.1209013944527203e-05, |
|
"loss": 0.0017, |
|
"step": 1121 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"grad_norm": 0.0575147043161383, |
|
"learning_rate": 1.1095935554625148e-05, |
|
"loss": 0.0011, |
|
"step": 1122 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"grad_norm": 0.05879183520465905, |
|
"learning_rate": 1.098339692481648e-05, |
|
"loss": 0.0016, |
|
"step": 1123 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"grad_norm": 0.11651577381341721, |
|
"learning_rate": 1.0871398738352955e-05, |
|
"loss": 0.0017, |
|
"step": 1124 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"grad_norm": 0.04216428635525978, |
|
"learning_rate": 1.0759941675205221e-05, |
|
"loss": 0.0016, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"grad_norm": 0.03712542340164034, |
|
"learning_rate": 1.0649026412058583e-05, |
|
"loss": 0.0012, |
|
"step": 1126 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"grad_norm": 0.04116733983643567, |
|
"learning_rate": 1.0538653622308948e-05, |
|
"loss": 0.0009, |
|
"step": 1127 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"grad_norm": 0.05379118870775519, |
|
"learning_rate": 1.042882397605871e-05, |
|
"loss": 0.0014, |
|
"step": 1128 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"grad_norm": 0.023844321094186195, |
|
"learning_rate": 1.0319538140112728e-05, |
|
"loss": 0.0008, |
|
"step": 1129 |
|
}, |
|
{ |
|
"epoch": 4.3, |
|
"grad_norm": 0.02336448873502654, |
|
"learning_rate": 1.0210796777974197e-05, |
|
"loss": 0.0007, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 4.3, |
|
"grad_norm": 0.04194993258789071, |
|
"learning_rate": 1.0102600549840701e-05, |
|
"loss": 0.0013, |
|
"step": 1131 |
|
}, |
|
{ |
|
"epoch": 4.3, |
|
"grad_norm": 0.054557517394749656, |
|
"learning_rate": 9.994950112600154e-06, |
|
"loss": 0.0011, |
|
"step": 1132 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"grad_norm": 0.15976956920768468, |
|
"learning_rate": 9.887846119826849e-06, |
|
"loss": 0.0023, |
|
"step": 1133 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"grad_norm": 0.05469859681916068, |
|
"learning_rate": 9.781289221777478e-06, |
|
"loss": 0.0011, |
|
"step": 1134 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"grad_norm": 0.0330826221094112, |
|
"learning_rate": 9.675280065387116e-06, |
|
"loss": 0.0012, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"grad_norm": 0.037994630222178825, |
|
"learning_rate": 9.569819294265414e-06, |
|
"loss": 0.0007, |
|
"step": 1136 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"grad_norm": 0.05446067811636603, |
|
"learning_rate": 9.464907548692614e-06, |
|
"loss": 0.0012, |
|
"step": 1137 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"grad_norm": 0.030162059858075463, |
|
"learning_rate": 9.360545465615667e-06, |
|
"loss": 0.0009, |
|
"step": 1138 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"grad_norm": 0.01776810562751649, |
|
"learning_rate": 9.256733678644414e-06, |
|
"loss": 0.0006, |
|
"step": 1139 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"grad_norm": 0.07017734423776259, |
|
"learning_rate": 9.153472818047625e-06, |
|
"loss": 0.0012, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 4.34, |
|
"grad_norm": 0.049265529448664684, |
|
"learning_rate": 9.05076351074936e-06, |
|
"loss": 0.0009, |
|
"step": 1141 |
|
}, |
|
{ |
|
"epoch": 4.34, |
|
"grad_norm": 0.01911138033873858, |
|
"learning_rate": 8.948606380324941e-06, |
|
"loss": 0.0008, |
|
"step": 1142 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"grad_norm": 0.03207512532598277, |
|
"learning_rate": 8.847002046997354e-06, |
|
"loss": 0.0008, |
|
"step": 1143 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"grad_norm": 0.09498054680592705, |
|
"learning_rate": 8.745951127633411e-06, |
|
"loss": 0.0015, |
|
"step": 1144 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"grad_norm": 0.02206935138105853, |
|
"learning_rate": 8.645454235739903e-06, |
|
"loss": 0.0007, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"grad_norm": 0.03319965177433449, |
|
"learning_rate": 8.54551198146013e-06, |
|
"loss": 0.0007, |
|
"step": 1146 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"grad_norm": 0.02840733607459401, |
|
"learning_rate": 8.44612497156989e-06, |
|
"loss": 0.0008, |
|
"step": 1147 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"grad_norm": 0.018408351544896853, |
|
"learning_rate": 8.347293809474054e-06, |
|
"loss": 0.0006, |
|
"step": 1148 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"grad_norm": 0.08136631297548408, |
|
"learning_rate": 8.249019095202736e-06, |
|
"loss": 0.0025, |
|
"step": 1149 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"grad_norm": 0.03770345328525376, |
|
"learning_rate": 8.151301425407699e-06, |
|
"loss": 0.0011, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"grad_norm": 0.021828484215553397, |
|
"learning_rate": 8.054141393358816e-06, |
|
"loss": 0.0008, |
|
"step": 1151 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"grad_norm": 0.06858372388136494, |
|
"learning_rate": 7.957539588940299e-06, |
|
"loss": 0.0013, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"grad_norm": 0.03540771842662069, |
|
"learning_rate": 7.861496598647278e-06, |
|
"loss": 0.0009, |
|
"step": 1153 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"grad_norm": 0.06376796414899714, |
|
"learning_rate": 7.76601300558214e-06, |
|
"loss": 0.0017, |
|
"step": 1154 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"grad_norm": 0.08541088279435645, |
|
"learning_rate": 7.671089389451058e-06, |
|
"loss": 0.0012, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"grad_norm": 0.01913967487748475, |
|
"learning_rate": 7.576726326560424e-06, |
|
"loss": 0.0008, |
|
"step": 1156 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"grad_norm": 0.01505834145399495, |
|
"learning_rate": 7.482924389813317e-06, |
|
"loss": 0.0006, |
|
"step": 1157 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"grad_norm": 0.029574633808729885, |
|
"learning_rate": 7.389684148706122e-06, |
|
"loss": 0.0008, |
|
"step": 1158 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"grad_norm": 0.07775324960665371, |
|
"learning_rate": 7.2970061693250154e-06, |
|
"loss": 0.0011, |
|
"step": 1159 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"grad_norm": 0.03207503686714158, |
|
"learning_rate": 7.204891014342552e-06, |
|
"loss": 0.0009, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"grad_norm": 0.02888329550946116, |
|
"learning_rate": 7.113339243014139e-06, |
|
"loss": 0.0008, |
|
"step": 1161 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"grad_norm": 0.0194570206084492, |
|
"learning_rate": 7.022351411174866e-06, |
|
"loss": 0.0008, |
|
"step": 1162 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"grad_norm": 0.14398642426432864, |
|
"learning_rate": 6.931928071235894e-06, |
|
"loss": 0.0032, |
|
"step": 1163 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"grad_norm": 0.09922204312397419, |
|
"learning_rate": 6.842069772181236e-06, |
|
"loss": 0.0019, |
|
"step": 1164 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"grad_norm": 0.04657668585468913, |
|
"learning_rate": 6.75277705956443e-06, |
|
"loss": 0.001, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"grad_norm": 0.014151142624219092, |
|
"learning_rate": 6.664050475505101e-06, |
|
"loss": 0.0006, |
|
"step": 1166 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"grad_norm": 0.041876195216166465, |
|
"learning_rate": 6.575890558685882e-06, |
|
"loss": 0.0013, |
|
"step": 1167 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"grad_norm": 0.02777421677729617, |
|
"learning_rate": 6.48829784434889e-06, |
|
"loss": 0.0008, |
|
"step": 1168 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"grad_norm": 0.03827771284490116, |
|
"learning_rate": 6.4012728642926845e-06, |
|
"loss": 0.0015, |
|
"step": 1169 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"grad_norm": 0.1320945724487641, |
|
"learning_rate": 6.314816146868952e-06, |
|
"loss": 0.0021, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"grad_norm": 0.07196675390680835, |
|
"learning_rate": 6.228928216979257e-06, |
|
"loss": 0.0027, |
|
"step": 1171 |
|
}, |
|
{ |
|
"epoch": 4.46, |
|
"grad_norm": 0.06301629094647111, |
|
"learning_rate": 6.143609596072008e-06, |
|
"loss": 0.0008, |
|
"step": 1172 |
|
}, |
|
{ |
|
"epoch": 4.46, |
|
"grad_norm": 0.04690844306331864, |
|
"learning_rate": 6.0588608021390655e-06, |
|
"loss": 0.0012, |
|
"step": 1173 |
|
}, |
|
{ |
|
"epoch": 4.46, |
|
"grad_norm": 0.02355863678935003, |
|
"learning_rate": 5.97468234971279e-06, |
|
"loss": 0.0007, |
|
"step": 1174 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"grad_norm": 0.012557456867681805, |
|
"learning_rate": 5.891074749862857e-06, |
|
"loss": 0.0005, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"grad_norm": 0.06758959915320514, |
|
"learning_rate": 5.80803851019307e-06, |
|
"loss": 0.0013, |
|
"step": 1176 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"grad_norm": 0.03280577696402171, |
|
"learning_rate": 5.725574134838474e-06, |
|
"loss": 0.0011, |
|
"step": 1177 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"grad_norm": 0.01838083263589963, |
|
"learning_rate": 5.643682124462057e-06, |
|
"loss": 0.0007, |
|
"step": 1178 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"grad_norm": 0.09736850218115797, |
|
"learning_rate": 5.562362976251901e-06, |
|
"loss": 0.0026, |
|
"step": 1179 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"grad_norm": 0.038368083330140405, |
|
"learning_rate": 5.481617183918053e-06, |
|
"loss": 0.0012, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"grad_norm": 0.03241919329467971, |
|
"learning_rate": 5.401445237689573e-06, |
|
"loss": 0.0007, |
|
"step": 1181 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"grad_norm": 0.019548513203401407, |
|
"learning_rate": 5.321847624311593e-06, |
|
"loss": 0.0007, |
|
"step": 1182 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"grad_norm": 0.03343420920569663, |
|
"learning_rate": 5.242824827042237e-06, |
|
"loss": 0.0006, |
|
"step": 1183 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"grad_norm": 0.013860278165749494, |
|
"learning_rate": 5.1643773256498164e-06, |
|
"loss": 0.0004, |
|
"step": 1184 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"grad_norm": 0.03250235438008899, |
|
"learning_rate": 5.086505596409885e-06, |
|
"loss": 0.0008, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"grad_norm": 0.03812286821993014, |
|
"learning_rate": 5.009210112102292e-06, |
|
"loss": 0.001, |
|
"step": 1186 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"grad_norm": 0.04724744059732396, |
|
"learning_rate": 4.932491342008383e-06, |
|
"loss": 0.0008, |
|
"step": 1187 |
|
}, |
|
{ |
|
"epoch": 4.52, |
|
"grad_norm": 0.07149621133781368, |
|
"learning_rate": 4.856349751908107e-06, |
|
"loss": 0.0021, |
|
"step": 1188 |
|
}, |
|
{ |
|
"epoch": 4.52, |
|
"grad_norm": 0.011104684384602798, |
|
"learning_rate": 4.7807858040771924e-06, |
|
"loss": 0.0005, |
|
"step": 1189 |
|
}, |
|
{ |
|
"epoch": 4.52, |
|
"grad_norm": 0.02671249567166847, |
|
"learning_rate": 4.705799957284351e-06, |
|
"loss": 0.001, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 4.53, |
|
"grad_norm": 0.11429993772721231, |
|
"learning_rate": 4.6313926667885035e-06, |
|
"loss": 0.0019, |
|
"step": 1191 |
|
}, |
|
{ |
|
"epoch": 4.53, |
|
"grad_norm": 0.019599526292067716, |
|
"learning_rate": 4.557564384335977e-06, |
|
"loss": 0.0007, |
|
"step": 1192 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"grad_norm": 0.023058983963623273, |
|
"learning_rate": 4.4843155581578474e-06, |
|
"loss": 0.0007, |
|
"step": 1193 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"grad_norm": 0.01789699934131272, |
|
"learning_rate": 4.411646632967059e-06, |
|
"loss": 0.0006, |
|
"step": 1194 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"grad_norm": 0.03475695043132507, |
|
"learning_rate": 4.339558049955927e-06, |
|
"loss": 0.0009, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"grad_norm": 0.04626061457798834, |
|
"learning_rate": 4.268050246793276e-06, |
|
"loss": 0.001, |
|
"step": 1196 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"grad_norm": 0.03777665253578553, |
|
"learning_rate": 4.197123657621915e-06, |
|
"loss": 0.0009, |
|
"step": 1197 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"grad_norm": 0.08401265052642046, |
|
"learning_rate": 4.126778713055923e-06, |
|
"loss": 0.0018, |
|
"step": 1198 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"grad_norm": 0.01795488576011003, |
|
"learning_rate": 4.0570158401780465e-06, |
|
"loss": 0.0006, |
|
"step": 1199 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"grad_norm": 0.05169945359266721, |
|
"learning_rate": 3.987835462537193e-06, |
|
"loss": 0.0012, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"eval_blimp_filtered_avg": 0.7150746268656717, |
|
"eval_blimp_filtered_std": 0.0050445406050592155, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"eval_blimp_supplement_avg": 0.8362068965517241, |
|
"eval_blimp_supplement_std": 0.016487870687225285, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"eval_vqa_filtered_avg": 0.51, |
|
"eval_vqa_filtered_std": 0.05024183937956912, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"eval_winoground_filtered_avg": 0.67, |
|
"eval_winoground_filtered_std": 0.04725815626252606, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"grad_norm": 0.03549591031661078, |
|
"learning_rate": 3.919238000145687e-06, |
|
"loss": 0.0014, |
|
"step": 1201 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"grad_norm": 0.12212372267764961, |
|
"learning_rate": 3.8512238694768835e-06, |
|
"loss": 0.0018, |
|
"step": 1202 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"grad_norm": 0.0708069932236777, |
|
"learning_rate": 3.783793483462583e-06, |
|
"loss": 0.0016, |
|
"step": 1203 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"grad_norm": 0.07637524055458063, |
|
"learning_rate": 3.7169472514904544e-06, |
|
"loss": 0.001, |
|
"step": 1204 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"grad_norm": 0.03487034956626954, |
|
"learning_rate": 3.6506855794016913e-06, |
|
"loss": 0.0012, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"grad_norm": 0.026640228769618846, |
|
"learning_rate": 3.5850088694884156e-06, |
|
"loss": 0.0009, |
|
"step": 1206 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"grad_norm": 0.012878283131233357, |
|
"learning_rate": 3.5199175204913117e-06, |
|
"loss": 0.0005, |
|
"step": 1207 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"grad_norm": 0.04703267681123701, |
|
"learning_rate": 3.455411927597185e-06, |
|
"loss": 0.0012, |
|
"step": 1208 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"grad_norm": 0.0153560104298258, |
|
"learning_rate": 3.3914924824365422e-06, |
|
"loss": 0.0006, |
|
"step": 1209 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"grad_norm": 0.05920573168766921, |
|
"learning_rate": 3.3281595730812575e-06, |
|
"loss": 0.001, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"grad_norm": 0.07240573247327886, |
|
"learning_rate": 3.265413584042165e-06, |
|
"loss": 0.0015, |
|
"step": 1211 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"grad_norm": 0.03877978634973686, |
|
"learning_rate": 3.203254896266761e-06, |
|
"loss": 0.0009, |
|
"step": 1212 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"grad_norm": 0.0201160866989832, |
|
"learning_rate": 3.1416838871368924e-06, |
|
"loss": 0.0009, |
|
"step": 1213 |
|
}, |
|
{ |
|
"epoch": 4.62, |
|
"grad_norm": 0.048187931567094715, |
|
"learning_rate": 3.080700930466429e-06, |
|
"loss": 0.0012, |
|
"step": 1214 |
|
}, |
|
{ |
|
"epoch": 4.62, |
|
"grad_norm": 0.07174256245695844, |
|
"learning_rate": 3.0203063964990617e-06, |
|
"loss": 0.0018, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 4.62, |
|
"grad_norm": 0.03188905472254505, |
|
"learning_rate": 2.9605006519059507e-06, |
|
"loss": 0.0009, |
|
"step": 1216 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"grad_norm": 0.026587740185543742, |
|
"learning_rate": 2.9012840597836045e-06, |
|
"loss": 0.0007, |
|
"step": 1217 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"grad_norm": 0.051838257669286805, |
|
"learning_rate": 2.8426569796516146e-06, |
|
"loss": 0.0011, |
|
"step": 1218 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"grad_norm": 0.027884361782616187, |
|
"learning_rate": 2.7846197674504913e-06, |
|
"loss": 0.0007, |
|
"step": 1219 |
|
}, |
|
{ |
|
"epoch": 4.64, |
|
"grad_norm": 0.05934491011655865, |
|
"learning_rate": 2.7271727755395214e-06, |
|
"loss": 0.0014, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 4.64, |
|
"grad_norm": 0.07722505492466579, |
|
"learning_rate": 2.6703163526945794e-06, |
|
"loss": 0.0014, |
|
"step": 1221 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"grad_norm": 0.017911785319840934, |
|
"learning_rate": 2.614050844106042e-06, |
|
"loss": 0.0006, |
|
"step": 1222 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"grad_norm": 0.07048675908478848, |
|
"learning_rate": 2.5583765913766987e-06, |
|
"loss": 0.0012, |
|
"step": 1223 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"grad_norm": 0.029385343497706086, |
|
"learning_rate": 2.503293932519668e-06, |
|
"loss": 0.0011, |
|
"step": 1224 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"grad_norm": 0.026721071049153142, |
|
"learning_rate": 2.4488032019563402e-06, |
|
"loss": 0.0008, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"grad_norm": 0.05733009421653368, |
|
"learning_rate": 2.394904730514358e-06, |
|
"loss": 0.0008, |
|
"step": 1226 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"grad_norm": 0.048459011121664085, |
|
"learning_rate": 2.3415988454255855e-06, |
|
"loss": 0.001, |
|
"step": 1227 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"grad_norm": 0.08348067589435784, |
|
"learning_rate": 2.2888858703241866e-06, |
|
"loss": 0.0015, |
|
"step": 1228 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"grad_norm": 0.0723135438217715, |
|
"learning_rate": 2.236766125244549e-06, |
|
"loss": 0.0013, |
|
"step": 1229 |
|
}, |
|
{ |
|
"epoch": 4.68, |
|
"grad_norm": 0.043987088946181196, |
|
"learning_rate": 2.1852399266194314e-06, |
|
"loss": 0.0012, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 4.68, |
|
"grad_norm": 0.02610918331075917, |
|
"learning_rate": 2.1343075872780396e-06, |
|
"loss": 0.0008, |
|
"step": 1231 |
|
}, |
|
{ |
|
"epoch": 4.68, |
|
"grad_norm": 0.02610217806342308, |
|
"learning_rate": 2.0839694164440425e-06, |
|
"loss": 0.0007, |
|
"step": 1232 |
|
}, |
|
{ |
|
"epoch": 4.69, |
|
"grad_norm": 0.016676961400433666, |
|
"learning_rate": 2.03422571973384e-06, |
|
"loss": 0.0007, |
|
"step": 1233 |
|
}, |
|
{ |
|
"epoch": 4.69, |
|
"grad_norm": 0.0712789068176972, |
|
"learning_rate": 1.985076799154528e-06, |
|
"loss": 0.0011, |
|
"step": 1234 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"grad_norm": 0.040623193014393696, |
|
"learning_rate": 1.9365229531022264e-06, |
|
"loss": 0.0009, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"grad_norm": 0.0174120811798264, |
|
"learning_rate": 1.8885644763601774e-06, |
|
"loss": 0.0006, |
|
"step": 1236 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"grad_norm": 0.02397234113764592, |
|
"learning_rate": 1.8412016600969695e-06, |
|
"loss": 0.0008, |
|
"step": 1237 |
|
}, |
|
{ |
|
"epoch": 4.71, |
|
"grad_norm": 0.04262503636577534, |
|
"learning_rate": 1.7944347918648185e-06, |
|
"loss": 0.0011, |
|
"step": 1238 |
|
}, |
|
{ |
|
"epoch": 4.71, |
|
"grad_norm": 0.02559933629938596, |
|
"learning_rate": 1.748264155597712e-06, |
|
"loss": 0.0007, |
|
"step": 1239 |
|
}, |
|
{ |
|
"epoch": 4.71, |
|
"grad_norm": 0.024338642632736805, |
|
"learning_rate": 1.7026900316098215e-06, |
|
"loss": 0.0007, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"grad_norm": 0.04865116104780992, |
|
"learning_rate": 1.657712696593705e-06, |
|
"loss": 0.0014, |
|
"step": 1241 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"grad_norm": 0.016653138748607372, |
|
"learning_rate": 1.6133324236186742e-06, |
|
"loss": 0.0006, |
|
"step": 1242 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"grad_norm": 0.02913263827559182, |
|
"learning_rate": 1.5695494821290735e-06, |
|
"loss": 0.0007, |
|
"step": 1243 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"grad_norm": 0.1053160720503692, |
|
"learning_rate": 1.5263641379427595e-06, |
|
"loss": 0.0022, |
|
"step": 1244 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"grad_norm": 0.06917659494864752, |
|
"learning_rate": 1.4837766532493468e-06, |
|
"loss": 0.0014, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"grad_norm": 0.034919075850131744, |
|
"learning_rate": 1.4417872866087534e-06, |
|
"loss": 0.0006, |
|
"step": 1246 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"grad_norm": 0.07398471019830524, |
|
"learning_rate": 1.400396292949513e-06, |
|
"loss": 0.0013, |
|
"step": 1247 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"grad_norm": 0.02043767100243107, |
|
"learning_rate": 1.3596039235672874e-06, |
|
"loss": 0.0007, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"grad_norm": 0.026716481737152874, |
|
"learning_rate": 1.3194104261233798e-06, |
|
"loss": 0.0007, |
|
"step": 1249 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"grad_norm": 0.04718046174374857, |
|
"learning_rate": 1.2798160446431006e-06, |
|
"loss": 0.0013, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"grad_norm": 0.053929792186028926, |
|
"learning_rate": 1.2408210195144376e-06, |
|
"loss": 0.001, |
|
"step": 1251 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"grad_norm": 0.016211755152668282, |
|
"learning_rate": 1.2024255874865108e-06, |
|
"loss": 0.0006, |
|
"step": 1252 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"grad_norm": 0.02218925262348946, |
|
"learning_rate": 1.1646299816681195e-06, |
|
"loss": 0.0006, |
|
"step": 1253 |
|
}, |
|
{ |
|
"epoch": 4.77, |
|
"grad_norm": 0.061559821718042196, |
|
"learning_rate": 1.1274344315264196e-06, |
|
"loss": 0.0018, |
|
"step": 1254 |
|
}, |
|
{ |
|
"epoch": 4.77, |
|
"grad_norm": 0.011397207501113183, |
|
"learning_rate": 1.0908391628854041e-06, |
|
"loss": 0.0005, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"grad_norm": 0.022367382815004444, |
|
"learning_rate": 1.0548443979246481e-06, |
|
"loss": 0.0008, |
|
"step": 1256 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"grad_norm": 0.11917770316499016, |
|
"learning_rate": 1.0194503551778866e-06, |
|
"loss": 0.0016, |
|
"step": 1257 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"grad_norm": 0.023235418948401518, |
|
"learning_rate": 9.846572495316952e-07, |
|
"loss": 0.0007, |
|
"step": 1258 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"grad_norm": 0.017722263245702797, |
|
"learning_rate": 9.504652922242562e-07, |
|
"loss": 0.0007, |
|
"step": 1259 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"grad_norm": 0.15628375169850256, |
|
"learning_rate": 9.168746908439718e-07, |
|
"loss": 0.0026, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"grad_norm": 0.019997091181886018, |
|
"learning_rate": 8.838856493282754e-07, |
|
"loss": 0.0007, |
|
"step": 1261 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"grad_norm": 0.05125265984626611, |
|
"learning_rate": 8.514983679623556e-07, |
|
"loss": 0.001, |
|
"step": 1262 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"grad_norm": 0.014670438570475425, |
|
"learning_rate": 8.197130433779565e-07, |
|
"loss": 0.0005, |
|
"step": 1263 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"grad_norm": 0.06613452163795065, |
|
"learning_rate": 7.885298685522235e-07, |
|
"loss": 0.0024, |
|
"step": 1264 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"grad_norm": 0.03851957779319382, |
|
"learning_rate": 7.579490328064265e-07, |
|
"loss": 0.0013, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"grad_norm": 0.01550807081268757, |
|
"learning_rate": 7.27970721804927e-07, |
|
"loss": 0.0006, |
|
"step": 1266 |
|
}, |
|
{ |
|
"epoch": 4.82, |
|
"grad_norm": 0.02645554854912103, |
|
"learning_rate": 6.985951175539796e-07, |
|
"loss": 0.0007, |
|
"step": 1267 |
|
}, |
|
{ |
|
"epoch": 4.82, |
|
"grad_norm": 0.03279121203073833, |
|
"learning_rate": 6.698223984006436e-07, |
|
"loss": 0.0009, |
|
"step": 1268 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"grad_norm": 0.0519224271730649, |
|
"learning_rate": 6.416527390317173e-07, |
|
"loss": 0.0009, |
|
"step": 1269 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"grad_norm": 0.09986357170391567, |
|
"learning_rate": 6.140863104726391e-07, |
|
"loss": 0.0016, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"grad_norm": 0.03275074991380398, |
|
"learning_rate": 5.87123280086499e-07, |
|
"loss": 0.0007, |
|
"step": 1271 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"grad_norm": 0.04587670912561613, |
|
"learning_rate": 5.607638115729841e-07, |
|
"loss": 0.0015, |
|
"step": 1272 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"grad_norm": 0.027686155223001784, |
|
"learning_rate": 5.350080649674127e-07, |
|
"loss": 0.0007, |
|
"step": 1273 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"grad_norm": 0.0223211970919243, |
|
"learning_rate": 5.098561966397131e-07, |
|
"loss": 0.0006, |
|
"step": 1274 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"grad_norm": 0.017928512348313316, |
|
"learning_rate": 4.85308359293557e-07, |
|
"loss": 0.0005, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"grad_norm": 0.03475070764217293, |
|
"learning_rate": 4.613647019653389e-07, |
|
"loss": 0.0007, |
|
"step": 1276 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"grad_norm": 0.020434320555203736, |
|
"learning_rate": 4.3802537002335386e-07, |
|
"loss": 0.0008, |
|
"step": 1277 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"grad_norm": 0.025932942114901558, |
|
"learning_rate": 4.152905051668321e-07, |
|
"loss": 0.0008, |
|
"step": 1278 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"grad_norm": 0.0629734875825493, |
|
"learning_rate": 3.931602454251837e-07, |
|
"loss": 0.0014, |
|
"step": 1279 |
|
}, |
|
{ |
|
"epoch": 4.87, |
|
"grad_norm": 0.017477717878726103, |
|
"learning_rate": 3.716347251570551e-07, |
|
"loss": 0.0006, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 4.87, |
|
"grad_norm": 0.028129693282624404, |
|
"learning_rate": 3.50714075049563e-07, |
|
"loss": 0.0009, |
|
"step": 1281 |
|
}, |
|
{ |
|
"epoch": 4.87, |
|
"grad_norm": 0.3789011247756691, |
|
"learning_rate": 3.3039842211752824e-07, |
|
"loss": 0.0007, |
|
"step": 1282 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"grad_norm": 0.02028981654233484, |
|
"learning_rate": 3.106878897026544e-07, |
|
"loss": 0.0007, |
|
"step": 1283 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"grad_norm": 0.017043915032762993, |
|
"learning_rate": 2.915825974727726e-07, |
|
"loss": 0.0007, |
|
"step": 1284 |
|
}, |
|
{ |
|
"epoch": 4.89, |
|
"grad_norm": 0.02765997006700044, |
|
"learning_rate": 2.7308266142119785e-07, |
|
"loss": 0.0008, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 4.89, |
|
"grad_norm": 0.05844451311284074, |
|
"learning_rate": 2.5518819386590734e-07, |
|
"loss": 0.0011, |
|
"step": 1286 |
|
}, |
|
{ |
|
"epoch": 4.89, |
|
"grad_norm": 0.05688879982444352, |
|
"learning_rate": 2.3789930344897404e-07, |
|
"loss": 0.0008, |
|
"step": 1287 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"grad_norm": 0.028207898085014158, |
|
"learning_rate": 2.212160951358011e-07, |
|
"loss": 0.001, |
|
"step": 1288 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"grad_norm": 0.016086514604394798, |
|
"learning_rate": 2.0513867021457744e-07, |
|
"loss": 0.0007, |
|
"step": 1289 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"grad_norm": 0.02971312168354947, |
|
"learning_rate": 1.8966712629558957e-07, |
|
"loss": 0.0009, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 4.91, |
|
"grad_norm": 0.021944051433958625, |
|
"learning_rate": 1.748015573106887e-07, |
|
"loss": 0.0007, |
|
"step": 1291 |
|
}, |
|
{ |
|
"epoch": 4.91, |
|
"grad_norm": 0.04949653596283253, |
|
"learning_rate": 1.6054205351265784e-07, |
|
"loss": 0.0021, |
|
"step": 1292 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"grad_norm": 0.06054720833614147, |
|
"learning_rate": 1.4688870147473443e-07, |
|
"loss": 0.0011, |
|
"step": 1293 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"grad_norm": 0.02434476920884362, |
|
"learning_rate": 1.338415840900109e-07, |
|
"loss": 0.0007, |
|
"step": 1294 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"grad_norm": 0.04425200814298354, |
|
"learning_rate": 1.2140078057101266e-07, |
|
"loss": 0.001, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 4.93, |
|
"grad_norm": 0.08400768778349636, |
|
"learning_rate": 1.0956636644912088e-07, |
|
"loss": 0.0015, |
|
"step": 1296 |
|
}, |
|
{ |
|
"epoch": 4.93, |
|
"grad_norm": 0.01681668533811899, |
|
"learning_rate": 9.833841357421714e-08, |
|
"loss": 0.0006, |
|
"step": 1297 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"grad_norm": 0.06053068555064212, |
|
"learning_rate": 8.771699011416168e-08, |
|
"loss": 0.0019, |
|
"step": 1298 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"grad_norm": 0.04317850317270507, |
|
"learning_rate": 7.770216055443814e-08, |
|
"loss": 0.0011, |
|
"step": 1299 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"grad_norm": 0.08586897126139875, |
|
"learning_rate": 6.829398569770939e-08, |
|
"loss": 0.001, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"eval_blimp_filtered_avg": 0.7153731343283583, |
|
"eval_blimp_filtered_std": 0.005045209651150972, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"eval_blimp_supplement_avg": 0.834051724137931, |
|
"eval_blimp_supplement_std": 0.016654601081150626, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"eval_vqa_filtered_avg": 0.5, |
|
"eval_vqa_filtered_std": 0.050251890762960605, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"eval_winoground_filtered_avg": 0.69, |
|
"eval_winoground_filtered_std": 0.04648231987117316, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"grad_norm": 0.016208779457975556, |
|
"learning_rate": 5.94925226635068e-08, |
|
"loss": 0.0006, |
|
"step": 1301 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"grad_norm": 0.01833907339793384, |
|
"learning_rate": 5.1297824887841516e-08, |
|
"loss": 0.0006, |
|
"step": 1302 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"grad_norm": 0.036830940713068985, |
|
"learning_rate": 4.370994212287149e-08, |
|
"loss": 0.001, |
|
"step": 1303 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"grad_norm": 0.015645769111424807, |
|
"learning_rate": 3.672892043666831e-08, |
|
"loss": 0.0006, |
|
"step": 1304 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"grad_norm": 0.07940082249502824, |
|
"learning_rate": 3.0354802212839705e-08, |
|
"loss": 0.0011, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 4.97, |
|
"grad_norm": 0.015159814855124942, |
|
"learning_rate": 2.4587626150351926e-08, |
|
"loss": 0.0005, |
|
"step": 1306 |
|
}, |
|
{ |
|
"epoch": 4.97, |
|
"grad_norm": 0.022458919214575546, |
|
"learning_rate": 1.94274272632633e-08, |
|
"loss": 0.001, |
|
"step": 1307 |
|
}, |
|
{ |
|
"epoch": 4.97, |
|
"grad_norm": 0.02980953425255622, |
|
"learning_rate": 1.4874236880491055e-08, |
|
"loss": 0.0008, |
|
"step": 1308 |
|
}, |
|
{ |
|
"epoch": 4.98, |
|
"grad_norm": 0.023911967525172478, |
|
"learning_rate": 1.0928082645667025e-08, |
|
"loss": 0.0007, |
|
"step": 1309 |
|
}, |
|
{ |
|
"epoch": 4.98, |
|
"grad_norm": 0.01333819056514401, |
|
"learning_rate": 7.58898851693779e-09, |
|
"loss": 0.0005, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 4.98, |
|
"grad_norm": 0.028702718224002614, |
|
"learning_rate": 4.856974766831446e-09, |
|
"loss": 0.0008, |
|
"step": 1311 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"grad_norm": 0.022211928652402655, |
|
"learning_rate": 2.732057982124392e-09, |
|
"loss": 0.0006, |
|
"step": 1312 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"grad_norm": 0.021399146337567086, |
|
"learning_rate": 1.2142510637414006e-09, |
|
"loss": 0.0008, |
|
"step": 1313 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 0.06401289450470597, |
|
"learning_rate": 3.035632266890076e-10, |
|
"loss": 0.0017, |
|
"step": 1314 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 0.10829033858276516, |
|
"learning_rate": 0.0, |
|
"loss": 0.0017, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 1315, |
|
"total_flos": 76761234800640.0, |
|
"train_loss": 0.24336853736315273, |
|
"train_runtime": 20724.9349, |
|
"train_samples_per_second": 16.232, |
|
"train_steps_per_second": 0.063 |
|
} |
|
], |
|
"logging_steps": 1.0, |
|
"max_steps": 1315, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"total_flos": 76761234800640.0, |
|
"train_batch_size": 64, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|