File size: 1,516 Bytes
a27545e
4eb58ed
9bf5f86
 
 
4eb58ed
a27545e
 
4eb58ed
a27545e
4eb58ed
a27545e
 
 
4eb58ed
a27545e
4eb58ed
a27545e
 
 
 
 
 
 
4eb58ed
 
a27545e
 
 
4eb58ed
a27545e
 
 
 
 
 
 
 
4eb58ed
a27545e
4eb58ed
a27545e
 
 
 
 
 
 
4eb58ed
a27545e
 
 
 
4eb58ed
a27545e
 
 
4eb58ed
 
 
 
 
 
 
a27545e
 
 
 
4eb58ed
a27545e
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
{
  "amp": 1,
  "architectures": [
    "FlaubertWithLMHeadModel"
  ],
  "asm": false,
  "attention_dropout": 0.1,
  "bos_index": 0,
  "bos_token_id": 0,
  "bptt": 512,
  "causal": false,
  "clip_grad_norm": 5,
  "dropout": 0.1,
  "emb_dim": 1024,
  "embed_init_std": 0.02209708691207961,
  "encoder_only": true,
  "end_n_top": 5,
  "eos_index": 1,
  "fp16": true,
  "gelu_activation": true,
  "group_by_size": true,
  "id2lang": {
    "0": "fr"
  },
  "init_std": 0.02,
  "is_encoder": true,
  "lang2id": {
    "fr": 0
  },
  "lang_id": 0,
  "langs": [
    "fr"
  ],
  "layer_norm_eps": 1e-06,
  "layerdrop": 0.2,
  "lg_sampling_factor": -1,
  "lgs": "fr",
  "mask_index": 5,
  "mask_token_id": 0,
  "max_batch_size": 0,
  "max_position_embeddings": 512,
  "max_vocab": -1,
  "mlm_steps": [
    [
      "fr",
      null
    ]
  ],
  "model_type": "flaubert",
  "n_heads": 16,
  "n_langs": 1,
  "n_layers": 24,
  "pad_index": 2,
  "pad_token_id": 2,
  "pre_norm": true,
  "sample_alpha": 0,
  "share_inout_emb": true,
  "sinusoidal_embeddings": false,
  "start_n_top": 5,
  "summary_activation": null,
  "summary_first_dropout": 0.1,
  "summary_proj_to_labels": true,
  "summary_type": "first",
  "summary_use_proj": true,
  "tokens_per_batch": -1,
  "unk_index": 3,
  "use_apex": true,
  "use_lang_emb": true,
  "vocab_size": 68729,
  "word_blank": 0,
  "word_dropout": 0,
  "word_keep": 0.1,
  "word_mask": 0.8,
  "word_mask_keep_rand": "0.8,0.1,0.1",
  "word_pred": 0.15,
  "word_rand": 0.1,
  "word_shuffle": 0
}