File size: 1,496 Bytes
dd40f26
7c2e29f
7b57178
 
 
7c2e29f
dd40f26
 
7c2e29f
dd40f26
7c2e29f
dd40f26
 
 
7c2e29f
dd40f26
7c2e29f
dd40f26
 
 
 
 
 
 
7c2e29f
 
dd40f26
 
 
7c2e29f
dd40f26
 
 
7c2e29f
 
dd40f26
 
 
7c2e29f
dd40f26
7c2e29f
dd40f26
 
 
 
 
 
 
7c2e29f
dd40f26
 
 
 
7c2e29f
 
dd40f26
 
7c2e29f
 
 
 
 
 
 
dd40f26
 
 
7c2e29f
dd40f26
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
{
  "amp": 1,
  "architectures": [
    "FlaubertWithLMHeadModel"
  ],
  "asm": false,
  "attention_dropout": 0.1,
  "bos_index": 0,
  "bos_token_id": 0,
  "bptt": 512,
  "causal": false,
  "clip_grad_norm": 5,
  "dropout": 0.1,
  "emb_dim": 768,
  "embed_init_std": 0.02209708691207961,
  "encoder_only": true,
  "end_n_top": 5,
  "eos_index": 1,
  "fp16": true,
  "gelu_activation": true,
  "group_by_size": true,
  "id2lang": {
    "0": "fr"
  },
  "init_std": 0.02,
  "is_encoder": true,
  "lang2id": {
    "fr": 0
  },
  "lang_id": 0,
  "langs": [
    "fr"
  ],
  "layer_norm_eps": 1e-12,
  "layerdrop": 0.0,
  "lg_sampling_factor": -1,
  "lgs": "fr",
  "mask_index": 5,
  "mask_token_id": 0,
  "max_batch_size": 0,
  "max_position_embeddings": 512,
  "max_vocab": -1,
  "mlm_steps": [
    [
      "fr",
      null
    ]
  ],
  "model_type": "flaubert",
  "n_heads": 12,
  "n_langs": 1,
  "n_layers": 12,
  "pad_index": 2,
  "pad_token_id": 2,
  "pre_norm": false,
  "sample_alpha": 0,
  "share_inout_emb": true,
  "sinusoidal_embeddings": false,
  "start_n_top": 5,
  "summary_activation": null,
  "summary_first_dropout": 0.1,
  "summary_proj_to_labels": true,
  "summary_type": "first",
  "summary_use_proj": true,
  "tokens_per_batch": -1,
  "unk_index": 3,
  "use_lang_emb": true,
  "vocab_size": 67542,
  "word_blank": 0,
  "word_dropout": 0,
  "word_keep": 0.1,
  "word_mask": 0.8,
  "word_mask_keep_rand": "0.8,0.1,0.1",
  "word_pred": 0.15,
  "word_rand": 0.1,
  "word_shuffle": 0
}