File size: 1,516 Bytes
04a532c
f9256f3
3f6ff1c
 
 
f9256f3
04a532c
 
f9256f3
04a532c
f9256f3
04a532c
 
 
f9256f3
04a532c
f9256f3
04a532c
 
 
 
 
 
 
f9256f3
 
04a532c
 
 
f9256f3
04a532c
 
 
 
 
 
 
 
f9256f3
04a532c
f9256f3
04a532c
 
 
 
 
 
 
f9256f3
04a532c
 
 
 
f9256f3
04a532c
 
 
f9256f3
 
 
 
 
 
 
04a532c
 
 
 
f9256f3
04a532c
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
{
  "amp": 1,
  "architectures": [
    "FlaubertWithLMHeadModel"
  ],
  "asm": false,
  "attention_dropout": 0.1,
  "bos_index": 0,
  "bos_token_id": 0,
  "bptt": 512,
  "causal": false,
  "clip_grad_norm": 5,
  "dropout": 0.1,
  "emb_dim": 1024,
  "embed_init_std": 0.02209708691207961,
  "encoder_only": true,
  "end_n_top": 5,
  "eos_index": 1,
  "fp16": true,
  "gelu_activation": true,
  "group_by_size": true,
  "id2lang": {
    "0": "fr"
  },
  "init_std": 0.02,
  "is_encoder": true,
  "lang2id": {
    "fr": 0
  },
  "lang_id": 0,
  "langs": [
    "fr"
  ],
  "layer_norm_eps": 1e-06,
  "layerdrop": 0.2,
  "lg_sampling_factor": -1,
  "lgs": "fr",
  "mask_index": 5,
  "mask_token_id": 0,
  "max_batch_size": 0,
  "max_position_embeddings": 512,
  "max_vocab": -1,
  "mlm_steps": [
    [
      "fr",
      null
    ]
  ],
  "model_type": "flaubert",
  "n_heads": 16,
  "n_langs": 1,
  "n_layers": 24,
  "pad_index": 2,
  "pad_token_id": 2,
  "pre_norm": true,
  "sample_alpha": 0,
  "share_inout_emb": true,
  "sinusoidal_embeddings": false,
  "start_n_top": 5,
  "summary_activation": null,
  "summary_first_dropout": 0.1,
  "summary_proj_to_labels": true,
  "summary_type": "first",
  "summary_use_proj": true,
  "tokens_per_batch": -1,
  "unk_index": 3,
  "use_apex": true,
  "use_lang_emb": true,
  "vocab_size": 68729,
  "word_blank": 0,
  "word_dropout": 0,
  "word_keep": 0.1,
  "word_mask": 0.8,
  "word_mask_keep_rand": "0.8,0.1,0.1",
  "word_pred": 0.15,
  "word_rand": 0.1,
  "word_shuffle": 0
}