Daniel Khashabi
model 840015d
1 {
2 "_name_or_path": "/home/patrick/hugging_face/t5/mt5-base",
3 "architectures": [
4 "MT5ForConditionalGeneration"
5 ],
6 "d_ff": 2048,
7 "d_kv": 64,
8 "d_model": 768,
9 "decoder_start_token_id": 0,
10 "dropout_rate": 0.1,
11 "eos_token_id": 1,
12 "feed_forward_proj": "gated-gelu",
13 "initializer_factor": 1.0,
14 "is_encoder_decoder": true,
15 "layer_norm_epsilon": 1e-06,
16 "model_type": "t5",
17 "num_decoder_layers": 12,
18 "num_heads": 12,
19 "num_layers": 12,
20 "output_past": true,
21 "pad_token_id": 0,
22 "relative_attention_num_buckets": 32,
23 "tie_word_embeddings": false,
24 "tokenizer_class": "T5Tokenizer",
25 "transformers_version": "4.2.1",
26 "use_cache": true,
27 "vocab_size": 250112
28 }
29