File size: 1,496 Bytes
f5de5d5
5adefcd
d75459b
 
 
5adefcd
f5de5d5
 
5adefcd
f5de5d5
5adefcd
f5de5d5
 
 
5adefcd
f5de5d5
5adefcd
f5de5d5
 
 
 
 
 
 
5adefcd
 
f5de5d5
 
 
5adefcd
f5de5d5
 
 
5adefcd
 
f5de5d5
 
 
5adefcd
f5de5d5
5adefcd
f5de5d5
 
 
 
 
 
 
5adefcd
f5de5d5
 
 
 
5adefcd
 
f5de5d5
 
5adefcd
 
 
 
 
 
 
f5de5d5
 
 
5adefcd
f5de5d5
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
{
  "amp": 1,
  "architectures": [
    "FlaubertWithLMHeadModel"
  ],
  "asm": false,
  "attention_dropout": 0.1,
  "bos_index": 0,
  "bos_token_id": 0,
  "bptt": 512,
  "causal": false,
  "clip_grad_norm": 5,
  "dropout": 0.1,
  "emb_dim": 768,
  "embed_init_std": 0.02209708691207961,
  "encoder_only": true,
  "end_n_top": 5,
  "eos_index": 1,
  "fp16": true,
  "gelu_activation": true,
  "group_by_size": true,
  "id2lang": {
    "0": "fr"
  },
  "init_std": 0.02,
  "is_encoder": true,
  "lang2id": {
    "fr": 0
  },
  "lang_id": 0,
  "langs": [
    "fr"
  ],
  "layer_norm_eps": 1e-12,
  "layerdrop": 0.0,
  "lg_sampling_factor": -1,
  "lgs": "fr",
  "mask_index": 5,
  "mask_token_id": 0,
  "max_batch_size": 0,
  "max_position_embeddings": 512,
  "max_vocab": -1,
  "mlm_steps": [
    [
      "fr",
      null
    ]
  ],
  "model_type": "flaubert",
  "n_heads": 12,
  "n_langs": 1,
  "n_layers": 12,
  "pad_index": 2,
  "pad_token_id": 2,
  "pre_norm": false,
  "sample_alpha": 0,
  "share_inout_emb": true,
  "sinusoidal_embeddings": false,
  "start_n_top": 5,
  "summary_activation": null,
  "summary_first_dropout": 0.1,
  "summary_proj_to_labels": true,
  "summary_type": "first",
  "summary_use_proj": true,
  "tokens_per_batch": -1,
  "unk_index": 3,
  "use_lang_emb": true,
  "vocab_size": 68729,
  "word_blank": 0,
  "word_dropout": 0,
  "word_keep": 0.1,
  "word_mask": 0.8,
  "word_mask_keep_rand": "0.8,0.1,0.1",
  "word_pred": 0.15,
  "word_rand": 0.1,
  "word_shuffle": 0
}