system HF staff commited on
Commit
5adefcd
1 Parent(s): d75459b

Update config.json

Browse files
Files changed (1) hide show
  1. config.json +24 -2
config.json CHANGED
@@ -1,15 +1,20 @@
1
  {
 
2
  "architectures": [
3
  "FlaubertWithLMHeadModel"
4
  ],
5
- "amp": 1,
6
  "attention_dropout": 0.1,
7
  "bos_index": 0,
 
8
  "bptt": 512,
 
9
  "clip_grad_norm": 5,
10
  "dropout": 0.1,
11
  "emb_dim": 768,
 
12
  "encoder_only": true,
 
13
  "eos_index": 1,
14
  "fp16": true,
15
  "gelu_activation": true,
@@ -17,16 +22,23 @@
17
  "id2lang": {
18
  "0": "fr"
19
  },
 
 
20
  "lang2id": {
21
  "fr": 0
22
  },
 
23
  "langs": [
24
  "fr"
25
  ],
 
 
26
  "lg_sampling_factor": -1,
27
  "lgs": "fr",
28
  "mask_index": 5,
 
29
  "max_batch_size": 0,
 
30
  "max_vocab": -1,
31
  "mlm_steps": [
32
  [
@@ -34,16 +46,26 @@
34
  null
35
  ]
36
  ],
 
37
  "n_heads": 12,
38
  "n_langs": 1,
39
  "n_layers": 12,
40
- "n_words": 68729,
41
  "pad_index": 2,
 
 
42
  "sample_alpha": 0,
43
  "share_inout_emb": true,
 
 
 
 
 
 
 
44
  "tokens_per_batch": -1,
45
  "unk_index": 3,
46
  "use_lang_emb": true,
 
47
  "word_blank": 0,
48
  "word_dropout": 0,
49
  "word_keep": 0.1,
 
1
  {
2
+ "amp": 1,
3
  "architectures": [
4
  "FlaubertWithLMHeadModel"
5
  ],
6
+ "asm": false,
7
  "attention_dropout": 0.1,
8
  "bos_index": 0,
9
+ "bos_token_id": 0,
10
  "bptt": 512,
11
+ "causal": false,
12
  "clip_grad_norm": 5,
13
  "dropout": 0.1,
14
  "emb_dim": 768,
15
+ "embed_init_std": 0.02209708691207961,
16
  "encoder_only": true,
17
+ "end_n_top": 5,
18
  "eos_index": 1,
19
  "fp16": true,
20
  "gelu_activation": true,
 
22
  "id2lang": {
23
  "0": "fr"
24
  },
25
+ "init_std": 0.02,
26
+ "is_encoder": true,
27
  "lang2id": {
28
  "fr": 0
29
  },
30
+ "lang_id": 0,
31
  "langs": [
32
  "fr"
33
  ],
34
+ "layer_norm_eps": 1e-12,
35
+ "layerdrop": 0.0,
36
  "lg_sampling_factor": -1,
37
  "lgs": "fr",
38
  "mask_index": 5,
39
+ "mask_token_id": 0,
40
  "max_batch_size": 0,
41
+ "max_position_embeddings": 512,
42
  "max_vocab": -1,
43
  "mlm_steps": [
44
  [
 
46
  null
47
  ]
48
  ],
49
+ "model_type": "flaubert",
50
  "n_heads": 12,
51
  "n_langs": 1,
52
  "n_layers": 12,
 
53
  "pad_index": 2,
54
+ "pad_token_id": 2,
55
+ "pre_norm": false,
56
  "sample_alpha": 0,
57
  "share_inout_emb": true,
58
+ "sinusoidal_embeddings": false,
59
+ "start_n_top": 5,
60
+ "summary_activation": null,
61
+ "summary_first_dropout": 0.1,
62
+ "summary_proj_to_labels": true,
63
+ "summary_type": "first",
64
+ "summary_use_proj": true,
65
  "tokens_per_batch": -1,
66
  "unk_index": 3,
67
  "use_lang_emb": true,
68
+ "vocab_size": 68729,
69
  "word_blank": 0,
70
  "word_dropout": 0,
71
  "word_keep": 0.1,