t5-tiny-random / config.json
1
{
2
  "architectures": [
3
    "T5ForConditionalGeneration"
4
  ],
5
  "d_ff": 256,
6
  "d_kv": 8,
7
  "d_model": 64,
8
  "decoder_start_token_id": 0,
9
  "dropout_rate": 0.1,
10
  "eos_token_id": 1,
11
  "initializer_factor": 1.0,
12
  "is_encoder_decoder": true,
13
  "layer_norm_epsilon": 1e-06,
14
  "max_length": 10,
15
  "model_type": "t5",
16
  "n_positions": 512,
17
  "num_heads": 2,
18
  "num_layers": 2,
19
  "output_past": true,
20
  "pad_token_id": 0,
21
  "relative_attention_num_buckets": 32,
22
  "task_specific_params": {
23
    "summarization": {
24
      "early_stopping": true,
25
      "length_penalty": 2.0,
26
      "max_length": 200,
27
      "min_length": 30,
28
      "no_repeat_ngram_size": 3,
29
      "num_beams": 4,
30
      "prefix": "summarize: "
31
    },
32
    "translation_en_to_de": {
33
      "early_stopping": true,
34
      "max_length": 300,
35
      "num_beams": 4,
36
      "prefix": "translate English to German: "
37
    },
38
    "translation_en_to_fr": {
39
      "early_stopping": true,
40
      "max_length": 300,
41
      "num_beams": 4,
42
      "prefix": "translate English to French: "
43
    },
44
    "translation_en_to_ro": {
45
      "early_stopping": true,
46
      "max_length": 300,
47
      "num_beams": 4,
48
      "prefix": "translate English to Romanian: "
49
    }
50
  },
51
  "vocab_size": 32128
52
}