{ "amp": 1, "architectures": [ "FlaubertModel" ], "asm": false, "attention_dropout": 0.1, "bos_index": 0, "bos_token_id": null, "bptt": 512, "causal": false, "clip_grad_norm": 5, "do_sample": false, "dropout": 0.1, "emb_dim": 768, "embed_init_std": 0.02209708691207961, "encoder_only": true, "end_n_top": 5, "eos_index": 1, "eos_token_ids": null, "finetuning_task": null, "fp16": true, "gelu_activation": true, "group_by_size": true, "id2label": { "0": "LABEL_0", "1": "LABEL_1" }, "id2lang": { "0": "fr" }, "init_std": 0.02, "is_decoder": false, "is_encoder": true, "label2id": { "LABEL_0": 0, "LABEL_1": 1 }, "lang2id": { "fr": 0 }, "lang_id": 0, "langs": [ "fr" ], "layer_norm_eps": 1e-12, "layerdrop": 0.0, "length_penalty": 1.0, "lg_sampling_factor": -1, "lgs": "fr", "mask_index": 5, "mask_token_id": 0, "max_batch_size": 0, "max_length": 20, "max_position_embeddings": 512, "max_vocab": -1, "mlm_steps": [ [ "fr", null ] ], "model_type": "flaubert", "n_heads": 12, "n_langs": 1, "n_layers": 12, "num_beams": 1, "num_labels": 2, "num_return_sequences": 1, "output_attentions": false, "output_hidden_states": false, "output_past": true, "pad_index": 2, "pad_token_id": null, "pre_norm": false, "pruned_heads": {}, "repetition_penalty": 1.0, "sample_alpha": 0, "share_inout_emb": true, "sinusoidal_embeddings": false, "start_n_top": 5, "summary_activation": null, "summary_first_dropout": 0.1, "summary_proj_to_labels": true, "summary_type": "first", "summary_use_proj": true, "temperature": 1.0, "tokens_per_batch": -1, "top_k": 50, "top_p": 1.0, "torchscript": false, "unk_index": 3, "use_bfloat16": false, "use_lang_emb": true, "vocab_size": 68729, "word_blank": 0, "word_dropout": 0, "word_keep": 0.1, "word_mask": 0.8, "word_mask_keep_rand": "0.8,0.1,0.1", "word_pred": 0.15, "word_rand": 0.1, "word_shuffle": 0 }