{ "_name_or_path": "seed_encoder_3_decoder_layers", "activation_dropout": 0.0, "activation_fn": "gelu", "adaptive_input": false, "adaptive_softmax_cutoff": null, "adaptive_softmax_dropout": 0, "architectures": [ "SEEDEncoderDot_NLL_LN" ], "attention_dropout": 0.1, "cross_self_attention": false, "decoder_atten_window": 8, "decoder_attention_heads": 12, "decoder_embed_dim": 768, "decoder_embed_path": null, "decoder_ffn_embed_dim": 3072, "decoder_input_dim": 768, "decoder_layerdrop": 0, "decoder_layers": 3, "decoder_layers_to_keep": null, "decoder_learned_pos": true, "decoder_normalize_before": true, "decoder_output_dim": 768, "dropout": 0.1, "encoder_attention_heads": 12, "encoder_embed_dim": 768, "encoder_ffn_embed_dim": 3072, "encoder_layerdrop": 0.0, "encoder_layers": 12, "encoder_layers_to_keep": null, "finetuning_task": "msmarco", "layernorm_embedding": true, "max_positions": 512, "max_source_positions": 512, "max_target_positions": 512, "model_type": "seed_encoder", "no_cross_attention": false, "no_scale_embedding": true, "no_token_positional_embeddings": false, "pad_token_id": 1, "pooler_activation_fn": "tanh", "pooler_dropout": 0.0, "quant_noise_pq": 0.0, "quant_noise_pq_block_size": 8, "share_all_embeddings": true, "share_decoder_input_output_embed": true, "tie_adaptive_weights": true, "train_ratio": "0.5:0.5", "vocab_size": 32769 }