{
  "_name_or_path": "jinaai/jina-bert-implementation",
  "model_max_length": 8192,
  "architectures": [
    "JinaBertForMaskedLM"
  ],
  "auto_map": {
    "AutoConfig": "jinaai/jina-bert-flash-implementation--configuration_bert.JinaBertConfig",
    "AutoModel": "jinaai/jina-bert-flash-implementation--modeling_bert.BertModel",
    "AutoModelForPreTraining": "jinaai/jina-bert-flash-implementation--modeling_bert.BertForPreTraining",
    "AutoModelForMaskedLM": "jinaai/jina-bert-flash-implementation--modeling_bert.BertForPreTraining"
  },
  "vocab_size": 30528,
  "hidden_size": 512,
  "num_hidden_layers": 4,
  "num_attention_heads": 8,
  "intermediate_size": 2048,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "attention_probs_dropout_prob": 0.1,
  "type_vocab_size": 0,
  "initializer_range": 0.02,
  "layer_norm_eps": 1e-12,
  "pad_token_id": 0,
  "dense_seq_output": true,
  "fused_mlp": false,
  "mlp_checkpoint_lvl": 0,
  "last_layer_subset": false,
  "fused_dropout_add_ln": false,
  "fused_bias_fc": false,
  "pad_vocab_size_multiple": 1,
  "num_tasks": 6,
  "use_flash_attn": true
}