File size: 1,496 Bytes
372bfe3 f2df4ea 7416ff1 f2df4ea 372bfe3 f2df4ea 372bfe3 f2df4ea 372bfe3 f2df4ea 372bfe3 f2df4ea 372bfe3 f2df4ea 372bfe3 f2df4ea 372bfe3 f2df4ea 372bfe3 f2df4ea 372bfe3 f2df4ea 372bfe3 f2df4ea 372bfe3 f2df4ea 372bfe3 f2df4ea 372bfe3 f2df4ea 372bfe3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 |
{
"amp": 1,
"architectures": [
"FlaubertWithLMHeadModel"
],
"asm": false,
"attention_dropout": 0.1,
"bos_index": 0,
"bos_token_id": 0,
"bptt": 512,
"causal": false,
"clip_grad_norm": 5,
"dropout": 0.1,
"emb_dim": 768,
"embed_init_std": 0.02209708691207961,
"encoder_only": true,
"end_n_top": 5,
"eos_index": 1,
"fp16": true,
"gelu_activation": true,
"group_by_size": true,
"id2lang": {
"0": "fr"
},
"init_std": 0.02,
"is_encoder": true,
"lang2id": {
"fr": 0
},
"lang_id": 0,
"langs": [
"fr"
],
"layer_norm_eps": 1e-12,
"layerdrop": 0.0,
"lg_sampling_factor": -1,
"lgs": "fr",
"mask_index": 5,
"mask_token_id": 0,
"max_batch_size": 0,
"max_position_embeddings": 512,
"max_vocab": -1,
"mlm_steps": [
[
"fr",
null
]
],
"model_type": "flaubert",
"n_heads": 12,
"n_langs": 1,
"n_layers": 12,
"pad_index": 2,
"pad_token_id": 2,
"pre_norm": false,
"sample_alpha": 0,
"share_inout_emb": true,
"sinusoidal_embeddings": false,
"start_n_top": 5,
"summary_activation": null,
"summary_first_dropout": 0.1,
"summary_proj_to_labels": true,
"summary_type": "first",
"summary_use_proj": true,
"tokens_per_batch": -1,
"unk_index": 3,
"use_lang_emb": true,
"vocab_size": 67542,
"word_blank": 0,
"word_dropout": 0,
"word_keep": 0.1,
"word_mask": 0.8,
"word_mask_keep_rand": "0.8,0.1,0.1",
"word_pred": 0.15,
"word_rand": 0.1,
"word_shuffle": 0
}
|