sanchit-gandhi's picture
Saving weights and logs of epoch 1
8319971
raw
history blame
6.49 kB
{"train/decoder_grad_norm": 0.2679520845413208, "train/decoder_param_norm": 10.987318992614746, "train/encoder_grad_norm": 5.0892409490188584e-05, "train/encoder_param_norm": 21.97295379638672, "train/grad_norm": 0.2679520845413208, "layer_grad_norm/": {"decoder": {"model": {"decoder": {"embed_positions": {"embedding": 0.1300550401210785}, "embed_tokens": {"embedding": 0.23291321098804474}, "layernorm_embedding": {"bias": 0.0053486633114516735, "scale": 0.004155205562710762}, "layers": {"FlaxBartDecoderLayers": {"encoder_attn": {"k_proj": {"bias": 5.312275142346778e-13, "kernel": 8.975896198179512e-13}, "out_proj": {"bias": 0.008784609846770763, "kernel": 4.107481800019741e-05}, "q_proj": {"bias": 6.116195332313279e-13, "kernel": 8.879417383658716e-13}, "v_proj": {"bias": 0.000990744330920279, "kernel": 3.636425390141085e-06}}, "encoder_attn_layer_norm": {"bias": 0.008962320163846016, "scale": 0.005857730749994516}, "fc1": {"bias": 0.00013585005945060402, "kernel": 0.00037603775854222476}, "fc2": {"bias": 0.008722522296011448, "kernel": 0.00033544827601872385}, "final_layer_norm": {"bias": 0.009417260996997356, "scale": 0.006055876612663269}, "self_attn": {"k_proj": {"bias": 1.1712979475220209e-09, "kernel": 3.8348139241861645e-06}, "out_proj": {"bias": 0.008887370117008686, "kernel": 0.0007119181100279093}, "q_proj": {"bias": 1.3947447996542905e-06, "kernel": 4.4340058593661524e-06}, "v_proj": {"bias": 0.0006612322176806629, "kernel": 0.0006704386323690414}}, "self_attn_layer_norm": {"bias": 0.008963682688772678, "scale": 0.005859017372131348}}}}}}, "encoder": {"adapter": {"layers": {"0": {"conv": {"bias": 2.431644645639608e-07, "kernel": 1.1549407190614147e-06}}, "1": {"conv": {"bias": 2.9579357487818925e-06, "kernel": 1.311571168116643e-06}}, "2": {"conv": {"bias": 5.0703747547231615e-05, "kernel": 2.3987627173482906e-06}}}}, "encoder": {"layer_norm": {"bias": 3.884435884060622e-08, "scale": 3.887453559059395e-08}, "layers": {"FlaxWav2Vec2EncoderLayers": {"attention": {"k_proj": {"bias": 1.3823666637649643e-15, "kernel": 1.1354555490283857e-10}, "out_proj": {"bias": 6.99043198437721e-07, "kernel": 1.1560265988919127e-07}, "q_proj": {"bias": 4.319047897105577e-11, "kernel": 1.1068277544490357e-10}, "v_proj": {"bias": 5.1656833477409236e-08, "kernel": 1.1340067374021601e-07}}, "feed_forward": {"intermediate_dense": {"bias": 2.9407328128172594e-08, "kernel": 8.487170788384901e-08}, "output_dense": {"bias": 6.827488618910138e-07, "kernel": 9.67346522884327e-08}}, "final_layer_norm": {"bias": 2.1070718503324315e-09, "scale": 1.3844979607213759e-09}, "layer_norm": {"bias": 3.887623112319716e-09, "scale": 2.263043530348341e-09}}}, "pos_conv_embed": {"conv": {"bias": 2.4913080665101006e-07, "weight_g": 5.592921237251858e-09, "weight_v": 6.250338202562489e-08}}}, "feature_extractor": {"conv_layers": {"0": {"conv": {"kernel": 0.0}, "layer_norm": {"bias": 0.0, "scale": 0.0}}, "1": {"conv": {"kernel": 0.0}, "layer_norm": {"bias": 0.0, "scale": 0.0}}, "2": {"conv": {"kernel": 0.0}, "layer_norm": {"bias": 0.0, "scale": 0.0}}}}, "feature_projection": {"layer_norm": {"bias": 5.000295999479931e-08, "scale": 6.460602719471353e-09}, "projection": {"bias": 5.360236059459567e-07, "kernel": 4.370844237655547e-07}}, "masked_spec_embed": 0.0}}, "layer_param_norm/": {"decoder": {"model": {"decoder": {"embed_positions": {"embedding": 0.7995666861534119}, "embed_tokens": {"embedding": 2.5087716579437256}, "layernorm_embedding": {"bias": 0.004185355268418789, "scale": 4.000096797943115}, "layers": {"FlaxBartDecoderLayers": {"encoder_attn": {"k_proj": {"bias": 2.4479984706005098e-09, "kernel": 0.43237069249153137}, "out_proj": {"bias": 0.005893721245229244, "kernel": 0.46672675013542175}, "q_proj": {"bias": 2.9117382283061488e-08, "kernel": 0.4763868451118469}, "v_proj": {"bias": 0.006317977327853441, "kernel": 0.4627833366394043}}, "encoder_attn_layer_norm": {"bias": 0.006012107711285353, "scale": 5.656979560852051}, "fc1": {"bias": 0.0029884851537644863, "kernel": 0.230309396982193}, "fc2": {"bias": 0.005906371865421534, "kernel": 0.22855640947818756}, "final_layer_norm": {"bias": 0.005787394475191832, "scale": 5.657113075256348}, "self_attn": {"k_proj": {"bias": 1.2845576748077292e-05, "kernel": 0.4488256871700287}, "out_proj": {"bias": 0.005900240037590265, "kernel": 0.45953789353370667}, "q_proj": {"bias": 0.0051243542693555355, "kernel": 0.4389404356479645}, "v_proj": {"bias": 0.005685505922883749, "kernel": 0.473923921585083}}, "self_attn_layer_norm": {"bias": 0.00601226557046175, "scale": 5.656979560852051}}}}}}, "encoder": {"adapter": {"layers": {"0": {"conv": {"bias": 0.0035623302683234215, "kernel": 0.7978851199150085}}, "1": {"conv": {"bias": 0.004474292043596506, "kernel": 0.7823198437690735}}, "2": {"conv": {"bias": 0.004339096136391163, "kernel": 0.7933635115623474}}}}, "encoder": {"layer_norm": {"bias": 0.0012930977391079068, "scale": 4.000429153442383}, "layers": {"FlaxWav2Vec2EncoderLayers": {"attention": {"k_proj": {"bias": 1.183643461022399e-10, "kernel": 0.6511669158935547}, "out_proj": {"bias": 0.004178609233349562, "kernel": 0.6515294909477234}, "q_proj": {"bias": 8.916323167795781e-06, "kernel": 0.636457085609436}, "v_proj": {"bias": 0.0027142988983541727, "kernel": 0.6421023011207581}}, "feed_forward": {"intermediate_dense": {"bias": 0.0021830981131643057, "kernel": 0.7063364386558533}, "output_dense": {"bias": 0.004428844433277845, "kernel": 0.7153164148330688}}, "final_layer_norm": {"bias": 0.00031358818523585796, "scale": 7.99997615814209}, "layer_norm": {"bias": 0.0006144040380604565, "scale": 8.000043869018555}}}, "pos_conv_embed": {"conv": {"bias": 0.0018276346381753683, "weight_g": 2.2776975631713867, "weight_v": 2.2775840759277344}}}, "feature_extractor": {"conv_layers": {"0": {"conv": {"kernel": 7.867799758911133}, "layer_norm": {"bias": 0.0, "scale": 5.656854152679443}}, "1": {"conv": {"kernel": 8.025212287902832}, "layer_norm": {"bias": 0.0, "scale": 5.656854152679443}}, "2": {"conv": {"kernel": 7.975250720977783}, "layer_norm": {"bias": 0.0, "scale": 5.656854152679443}}}}, "feature_projection": {"layer_norm": {"bias": 0.0015199838671833277, "scale": 5.6568169593811035}, "projection": {"bias": 0.001792658818885684, "kernel": 0.43013235926628113}}, "masked_spec_embed": 2.404470205307007}}, "train/learning_rate": 2.400018274784088e-06, "train/loss": 6.903058052062988, "train/param_norm": 24.566884994506836, "_timestamp": 1651947727, "_runtime": 86, "_step": 4}