sanchit-gandhi's picture
qf2iwkac: saving weights and logs of step 0k
a869d63
raw history blame
No virus
7.91 kB
{"train/decoder_grad_norm": 0.293959379196167, "train/decoder_param_norm": 10.987303733825684, "train/encoder_grad_norm": 5.3447409300133586e-05, "train/encoder_param_norm": 21.972993850708008, "train/grad_norm": 0.293959379196167, "layer_grad_norm/": {"decoder": {"model": {"decoder": {"embed_positions": {"embedding": 0.15045838057994843}, "embed_tokens": {"embedding": 0.25100693106651306}, "layernorm_embedding": {"bias": 0.006534375250339508, "scale": 0.0037290260661393404}, "layers": {"FlaxBartDecoderLayers": {"encoder_attn": {"k_proj": {"bias": 3.375024326852938e-13, "kernel": 9.308541350921962e-13}, "out_proj": {"bias": 0.01009051688015461, "kernel": 5.131477155373432e-05}, "q_proj": {"bias": 6.986853044906038e-13, "kernel": 1.1284255864787984e-12}, "v_proj": {"bias": 0.0011288232635706663, "kernel": 4.451439963304438e-06}}, "encoder_attn_layer_norm": {"bias": 0.010046548210084438, "scale": 0.005095764063298702}, "fc1": {"bias": 0.00023096689255908132, "kernel": 0.0004343906184658408}, "fc2": {"bias": 0.010574166662991047, "kernel": 0.0004366966022644192}, "final_layer_norm": {"bias": 0.010372617281973362, "scale": 0.005281996447592974}, "self_attn": {"k_proj": {"bias": 1.130181281894238e-09, "kernel": 4.527865712589119e-06}, "out_proj": {"bias": 0.010307326912879944, "kernel": 0.0009190444834530354}, "q_proj": {"bias": 1.58041484610294e-06, "kernel": 4.583661393553484e-06}, "v_proj": {"bias": 0.0008930374169722199, "kernel": 0.0008962888969108462}}, "self_attn_layer_norm": {"bias": 0.010049148462712765, "scale": 0.005089657846838236}}}}}}, "encoder": {"adapter": {"layers": {"0": {"conv": {"bias": 2.78683756960163e-07, "kernel": 1.3058978538538213e-06}}, "1": {"conv": {"bias": 3.2350201308872784e-06, "kernel": 1.4072030580791761e-06}}, "2": {"conv": {"bias": 5.3230603953124955e-05, "kernel": 2.5485987862339243e-06}}}}, "encoder": {"layer_norm": {"bias": 4.616065041318507e-08, "scale": 4.433106681744903e-08}, "layers": {"FlaxWav2Vec2EncoderLayers": {"attention": {"k_proj": {"bias": 1.3622831949482096e-15, "kernel": 1.7724854917133825e-10}, "out_proj": {"bias": 8.77685579325771e-07, "kernel": 1.5102111206033442e-07}, "q_proj": {"bias": 6.979490596581428e-11, "kernel": 2.1076571043998626e-10}, "v_proj": {"bias": 5.5353790884282716e-08, "kernel": 1.2770148316576524e-07}}, "feed_forward": {"intermediate_dense": {"bias": 3.7398876884253696e-08, "kernel": 1.1362677554416223e-07}, "output_dense": {"bias": 8.160148468050465e-07, "kernel": 1.2486131595323968e-07}}, "final_layer_norm": {"bias": 2.575513136093832e-09, "scale": 1.88160664826853e-09}, "layer_norm": {"bias": 4.1193279898266155e-09, "scale": 2.5369788492213274e-09}}}, "pos_conv_embed": {"conv": {"bias": 3.465826523552096e-07, "weight_g": 7.019787418016676e-09, "weight_v": 7.727043538352518e-08}}}, "feature_extractor": {"conv_layers": {"0": {"conv": {"kernel": 0.0}, "layer_norm": {"bias": 0.0, "scale": 0.0}}, "1": {"conv": {"kernel": 0.0}, "layer_norm": {"bias": 0.0, "scale": 0.0}}, "2": {"conv": {"kernel": 0.0}, "layer_norm": {"bias": 0.0, "scale": 0.0}}}}, "feature_projection": {"layer_norm": {"bias": 6.646207850735664e-08, "scale": 8.013022245734192e-09}, "projection": {"bias": 6.936390377632051e-07, "kernel": 5.300959742271516e-07}}, "masked_spec_embed": 0.0}}, "layer_param_norm/": {"decoder": {"model": {"decoder": {"embed_positions": {"embedding": 0.7995960116386414}, "embed_tokens": {"embedding": 2.5083138942718506}, "layernorm_embedding": {"bias": 0.004562107380479574, "scale": 4.0000901222229}, "layers": {"FlaxBartDecoderLayers": {"encoder_attn": {"k_proj": {"bias": 2.7063116192493908e-09, "kernel": 0.43237069249153137}, "out_proj": {"bias": 0.006423539947718382, "kernel": 0.46732479333877563}, "q_proj": {"bias": 3.698390571571508e-08, "kernel": 0.4763868451118469}, "v_proj": {"bias": 0.006897295359522104, "kernel": 0.463136225938797}}, "encoder_attn_layer_norm": {"bias": 0.006547156255692244, "scale": 5.656968116760254}, "fc1": {"bias": 0.003273726673796773, "kernel": 0.23024976253509521}, "fc2": {"bias": 0.006439621560275555, "kernel": 0.22848989069461823}, "final_layer_norm": {"bias": 0.006299057509750128, "scale": 5.657195568084717}, "self_attn": {"k_proj": {"bias": 1.3696276255359408e-05, "kernel": 0.4489610493183136}, "out_proj": {"bias": 0.006426077801734209, "kernel": 0.4596632421016693}, "q_proj": {"bias": 0.005574330221861601, "kernel": 0.4390396177768707}, "v_proj": {"bias": 0.006184465251863003, "kernel": 0.4740023910999298}}, "self_attn_layer_norm": {"bias": 0.00654734019190073, "scale": 5.6569695472717285}}}}}}, "encoder": {"adapter": {"layers": {"0": {"conv": {"bias": 0.003938107285648584, "kernel": 0.7980291247367859}}, "1": {"conv": {"bias": 0.004904463887214661, "kernel": 0.7824782729148865}}, "2": {"conv": {"bias": 0.0047929175198078156, "kernel": 0.7935279607772827}}}}, "encoder": {"layer_norm": {"bias": 0.0015007174806669354, "scale": 4.000539302825928}, "layers": {"FlaxWav2Vec2EncoderLayers": {"attention": {"k_proj": {"bias": 1.1623019213757857e-10, "kernel": 0.6511669158935547}, "out_proj": {"bias": 0.004577825777232647, "kernel": 0.6515982747077942}, "q_proj": {"bias": 9.149670404440258e-06, "kernel": 0.636457085609436}, "v_proj": {"bias": 0.0029153258074074984, "kernel": 0.6421692371368408}}, "feed_forward": {"intermediate_dense": {"bias": 0.0023425323888659477, "kernel": 0.7063462138175964}, "output_dense": {"bias": 0.004827361553907394, "kernel": 0.7153245210647583}}, "final_layer_norm": {"bias": 0.00033075266401283443, "scale": 7.999977111816406}, "layer_norm": {"bias": 0.0006435627001337707, "scale": 8.000053405761719}}}, "pos_conv_embed": {"conv": {"bias": 0.002023016568273306, "weight_g": 2.277714252471924, "weight_v": 2.2775814533233643}}}, "feature_extractor": {"conv_layers": {"0": {"conv": {"kernel": 7.867799758911133}, "layer_norm": {"bias": 0.0, "scale": 5.656854152679443}}, "1": {"conv": {"kernel": 8.025212287902832}, "layer_norm": {"bias": 0.0, "scale": 5.656854152679443}}, "2": {"conv": {"kernel": 7.975250720977783}, "layer_norm": {"bias": 0.0, "scale": 5.656854152679443}}}}, "feature_projection": {"layer_norm": {"bias": 0.0016678336542099714, "scale": 5.656801223754883}, "projection": {"bias": 0.0020024373661726713, "kernel": 0.4300427734851837}}, "masked_spec_embed": 2.404470205307007}}, "train/learning_rate": 4.799978341907263e-06, "train/loss": 6.903526782989502, "train/param_norm": 24.56691551208496, "_timestamp": 1653826062, "_runtime": 164, "_step": 9, "eval/loss": 6.89687442779541, "eval/wer": 1.3195402298850574, "eval/cer": 1.4536741214057507, "eval/step_0k": {"_type": "table-file", "path": "media/table/eval/step_0k_5_92ae240f2472ae79ad31.table.json", "sha256": "92ae240f2472ae79ad316b729700c013d11e821782ca7e4cd33eb61c840df66d", "size": 11870, "artifact_path": "wandb-client-artifact://e0ee9vqfa2o0de6mdryraqvdeo75406tm7340121ydcg07o2xduuis84iob81362njk07nbiunrvc6uhqqk5g7unxjij98oid247vn20rsiy8b2ehvb5ktv1wqg5jt3u:latest/eval/step_0k.table.json", "_latest_artifact_path": "wandb-client-artifact://e0ee9vqfa2o0de6mdryraqvdeo75406tm7340121ydcg07o2xduuis84iob81362njk07nbiunrvc6uhqqk5g7unxjij98oid247vn20rsiy8b2ehvb5ktv1wqg5jt3u:latest/eval/step_0k.table.json", "ncols": 3, "nrows": 50}, "eval/step_0k_incorrect": {"_type": "table-file", "path": "media/table/eval/step_0k_incorrect_5_b30e6671cbaaf5c82b6d.table.json", "sha256": "b30e6671cbaaf5c82b6d15ffbbb5d23d008017499213a95041da6a0e38000fb4", "size": 15179, "artifact_path": "wandb-client-artifact://1g1krxry75qdj2cxj79rep1n2ssa02jrnmc0ylbhmvk2i0qh1p5m5x4y3uli6xtfvwb3m4506hobe5ciztd1wcm2mcv3ebe7bxonu1utd77sa1syhkwywg76ogf8h0j7:latest/eval/step_0k_incorrect.table.json", "_latest_artifact_path": "wandb-client-artifact://1g1krxry75qdj2cxj79rep1n2ssa02jrnmc0ylbhmvk2i0qh1p5m5x4y3uli6xtfvwb3m4506hobe5ciztd1wcm2mcv3ebe7bxonu1utd77sa1syhkwywg76ogf8h0j7:latest/eval/step_0k_incorrect.table.json", "ncols": 3, "nrows": 64}}