1 {
2 "_name_or_path": "old_models/distilbert-base-nli-mean-tokens/0_Transformer",
3 "activation": "gelu",
4 "architectures": [
5 "DistilBertModel"
6 ],
7 "attention_dropout": 0.1,
8 "dim": 768,
9 "dropout": 0.1,
10 "hidden_dim": 3072,
11 "initializer_range": 0.02,
12 "max_position_embeddings": 512,
13 "model_type": "distilbert",
14 "n_heads": 12,
15 "n_layers": 6,
16 "pad_token_id": 0,
17 "qa_dropout": 0.1,
18 "seq_classif_dropout": 0.2,
19 "sinusoidal_pos_embds": false,
20 "tie_weights_": true,
21 "transformers_version": "4.7.0",
22 "vocab_size": 30522
23 }
24