Transformers
Back to all models
Model: xlm-mlm-17-1280

Monthly model downloads

xlm-mlm-17-1280 xlm-mlm-17-1280
- downloads
last 30 days

How to use this model directly from the 🤗/transformers library:

			
Copy model
tokenizer = AutoTokenizer.from_pretrained("xlm-mlm-17-1280") model = AutoModel.from_pretrained("xlm-mlm-17-1280")

Config

See raw config file
dump_path: "/checkpoint/aconneau/dumped/xlm_17_100_big.3/16656237" ...
exp_name: "xlm_17_100_big.3" ...
save_periodic: 0 ...
exp_id: "16656237" ...
fp16: true ...
amp: 2 ...
encoder_only: true ...
emb_dim: 1280 ...
n_layers: 16 ...
n_heads: 16 ...
dropout: 0.1 ...
attention_dropout: 0.1 ...
gelu_activation: true ...
share_inout_emb: true ...
sinusoidal_embeddings: false ...
use_lang_emb: false ...
use_memory: false ...
asm: false ...
context_size: 0 ...
word_pred: 0.15 ...
sample_alpha: 0.5 ...
word_mask_keep_rand: "0.8,0.1,0.1" ...
word_shuffle: 0 ...
word_dropout: 0 ...
word_blank: 0 ...
data_path: "/private/home/aconneau/projects/XLM/data/wiki/17/175k" ...
lgs: "en-fr-es-de-it-pt-nl-sv-pl-ru-ar-tr-zh-ja-ko-hi-vi" ...
max_vocab: 200000 ...
min_count: 0 ...
lg_sampling_factor: 0.7 ...
bptt: 256 ...
max_len: 200 ...
group_by_size: true ...
batch_size: 16 ...
max_batch_size: 0 ...
tokens_per_batch: -1 ...
split_data: true ...
optimizer: "adam_inverse_sqrt,lr=0.00005,warmup_updates=30000,beta1=0.9,beta2=0.999,weight_decay=0.01,eps=0.000001" ...
clip_grad_norm: 1 ...
epoch_size: 200000 ...
max_epoch: 100000 ...
stopping_criterion: "_valid_zh_mlm_ppl,25" ...
validation_metrics: "_valid_en_mlm_ppl,_valid_mlm_ppl,_valid_zh_mlm_ppl" ...
accumulate_gradients: 4 ...
lambda_mlm: 1 ...
lambda_clm: 1 ...
lambda_pc: 1 ...
lambda_ae: 1 ...
lambda_mt: 1 ...
lambda_bt: 1 ...
▾ clm_steps: [] ...
▾ mlm_steps: [ [ "en", null ], [ "fr", null ], [ "es", null ], [ "de", null ], [ "it", null ], [ "pt", null ], [ "nl", null ], [ "sv", null ], [ "pl", null ], [ "ru", null ], [ "ar", null ], [ "tr", null ], [ "zh", null ], [ "ja", null ], [ "ko", null ], [ "hi", null ], [ "vi", null ] ] ...
▾ mt_steps: [] ...
▾ ae_steps: [] ...
▾ bt_steps: [] ...
▾ pc_steps: [] ...
reload_emb: "" ...
reload_model: "/checkpoint/aconneau/dumped/xlm_17_100_240_big_model_upper.2/14884510/best-valid_zh_mlm_ppl.pth" ...
reload_checkpoint: "" ...
beam_size: 1 ...
length_penalty: 1 ...
early_stopping: false ...
eval_bleu: false ...
eval_only: false ...
debug_train: false ...
debug_slurm: false ...
debug: false ...
local_rank: 0 ...
master_port: 14148 ...
▾ langs: [ "en", "fr", "es", "de", "it", "pt", "nl", "sv", "pl", "ru", "ar", "tr", "zh", "ja", "ko", "hi", "vi" ] ...
▾ id2lang: { "0": "ar", "1": "de", "2": "en", "3": "es", "4": "fr", "5": "hi", "6": "it", "7": "ja", "8": "ko", "9": "nl", "10": "pl", "11": "pt", "12": "ru", "13": "sv", "14": "tr", "15": "vi", "16": "zh" } ...
▾ lang2id: { "ar": 0, "de": 1, "en": 2, "es": 3, "fr": 4, "hi": 5, "it": 6, "ja": 7, "ko": 8, "nl": 9, "pl": 10, "pt": 11, "ru": 12, "sv": 13, "tr": 14, "vi": 15, "zh": 16 } ...
n_langs: 17 ...
▾ bt_src_langs: [] ...
▾ mono_dataset: { "en": { "train": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/train.en.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/valid.en.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/test.en.pth" }, "fr": { "train": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/train.fr.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/valid.fr.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/test.fr.pth" }, "es": { "train": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/train.es.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/valid.es.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/test.es.pth" }, "de": { "train": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/train.de.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/valid.de.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/test.de.pth" }, "it": { "train": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/train.it.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/valid.it.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/test.it.pth" }, "pt": { "train": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/train.pt.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/valid.pt.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/test.pt.pth" }, "nl": { "train": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/train.nl.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/valid.nl.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/test.nl.pth" }, "sv": { "train": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/train.sv.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/valid.sv.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/test.sv.pth" }, "pl": { "train": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/train.pl.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/valid.pl.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/test.pl.pth" }, "ru": { "train": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/train.ru.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/valid.ru.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/test.ru.pth" }, "ar": { "train": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/train.ar.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/valid.ar.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/test.ar.pth" }, "tr": { "train": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/train.tr.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/valid.tr.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/test.tr.pth" }, "zh": { "train": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/train.zh.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/valid.zh.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/test.zh.pth" }, "ja": { "train": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/train.ja.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/valid.ja.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/test.ja.pth" }, "ko": { "train": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/train.ko.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/valid.ko.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/test.ko.pth" }, "hi": { "train": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/train.hi.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/valid.hi.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/test.hi.pth" }, "vi": { "train": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/train.vi.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/valid.vi.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/test.vi.pth" } } ...
▾ para_dataset: {} ...
word_mask: 0.8 ...
word_keep: 0.1 ...
word_rand: 0.1 ...
is_slurm_job: true ...
n_nodes: 4 ...
node_id: 0 ...
global_rank: 0 ...
world_size: 32 ...
n_gpu_per_node: 8 ...
master_addr: "learnfair1605" ...
is_master: true ...
multi_node: true ...
multi_gpu: true ...
command: "python /private/home/aconneau/workdir/xlm_17_100_big.3/2019_08_10_19_23_42/train.py --n_heads 16 --bt_steps '' --max_vocab 200000 --word_mask_keep_rand '0.8,0.1,0.1' --use_lang_emb false --data_path '/private/home/aconneau/projects/XLM/data/wiki/17/175k' --save_periodic 0 --max_len 200 --bptt 256 --ae_steps '' --fp16 true --share_inout_emb true --sinusoidal_embeddings false --word_shuffle 0 --tokens_per_batch '-1' --accumulate_gradients 4 --validation_metrics '_valid_en_mlm_ppl,_valid_mlm_ppl,_valid_zh_mlm_ppl' --attention_dropout '0.1' --split_data true --max_epoch 100000 --stopping_criterion '_valid_zh_mlm_ppl,25' --dump_path '/checkpoint/aconneau/dumped' --epoch_size 200000 --word_blank 0 --gelu_activation true --n_layers 16 --optimizer 'adam_inverse_sqrt,lr=0.00005,warmup_updates=30000,beta1=0.9,beta2=0.999,weight_decay=0.01,eps=0.000001' --mlm_steps 'en,fr,es,de,it,pt,nl,sv,pl,ru,ar,tr,zh,ja,ko,hi,vi' --eval_bleu false --dropout '0.1' --mt_steps '' --batch_size 16 --word_dropout 0 --reload_model '/checkpoint/aconneau/dumped/xlm_17_100_240_big_model_upper.2/14884510/best-valid_zh_mlm_ppl.pth' --min_count 0 --amp 2 --group_by_size true --asm false --sample_alpha '0.5' --word_pred '0.15' --clip_grad_norm 1 --emb_dim 1280 --encoder_only true --lgs 'en-fr-es-de-it-pt-nl-sv-pl-ru-ar-tr-zh-ja-ko-hi-vi' --clm_steps '' --exp_name 'xlm_17_100_big.3' --lg_sampling_factor '0.7' --eval_only false --exp_id 16656237 --master_port 14148 --exp_id \"16656237\"" ...
n_words: 200000 ...
bos_index: 0 ...
eos_index: 1 ...
pad_index: 2 ...
unk_index: 3 ...
mask_index: 5 ...
▾ lambda_clm_config: null ...
▾ lambda_mlm_config: null ...
▾ lambda_pc_config: null ...
▾ lambda_ae_config: null ...
▾ lambda_mt_config: null ...
▾ lambda_bt_config: null ...
hyp_path: "/checkpoint/aconneau/dumped/xlm_17_100_big.3/16656237/hypotheses" ...
▾ ref_paths: {} ...
▾ mono_list: [ "en", "fr", "es", "de", "it", "pt", "nl", "sv", "pl", "ru", "ar", "tr", "zh", "ja", "ko", "hi", "vi" ] ...
▾ para_list: [] ...