Transformers
Back to all models
Model: xlm-mlm-100-1280

Monthly model downloads

xlm-mlm-100-1280 xlm-mlm-100-1280
- downloads
last 30 days

How to use this model directly from the 🤗/transformers library:

			
Copy model
tokenizer = AutoTokenizer.from_pretrained("xlm-mlm-100-1280") model = AutoModel.from_pretrained("xlm-mlm-100-1280")

Config

See raw config file
dump_path: "/checkpoint/aconneau/dumped/xlm_17_100_big.3/16656234" ...
exp_name: "xlm_17_100_big.3" ...
save_periodic: 0 ...
exp_id: "16656234" ...
fp16: true ...
amp: 2 ...
encoder_only: true ...
emb_dim: 1280 ...
n_layers: 16 ...
n_heads: 16 ...
dropout: 0.1 ...
attention_dropout: 0.1 ...
gelu_activation: true ...
share_inout_emb: true ...
sinusoidal_embeddings: false ...
use_lang_emb: false ...
use_memory: false ...
asm: false ...
context_size: 0 ...
word_pred: 0.15 ...
sample_alpha: 0.5 ...
word_mask_keep_rand: "0.8,0.1,0.1" ...
word_shuffle: 0 ...
word_dropout: 0 ...
word_blank: 0 ...
data_path: "/private/home/aconneau/projects/XLM/data/wiki/100/175k" ...
lgs: "en-es-fr-de-zh-ru-pt-it-ar-ja-id-tr-nl-pl-simple-fa-vi-sv-ko-he-ro-no-hi-uk-cs-fi-hu-th-da-ca-el-bg-sr-ms-bn-hr-sl-zh_yue-az-sk-eo-ta-sh-lt-et-ml-la-bs-sq-arz-af-ka-mr-eu-tl-ang-gl-nn-ur-kk-be-hy-te-lv-mk-zh_classical-als-is-wuu-my-sco-mn-ceb-ast-cy-kn-br-an-gu-bar-uz-lb-ne-si-war-jv-ga-zh_min_nan-oc-ku-sw-nds-ckb-ia-yi-fy-scn-gan-tt-am" ...
max_vocab: 200000 ...
min_count: 0 ...
lg_sampling_factor: 0.7 ...
bptt: 256 ...
max_len: 200 ...
group_by_size: true ...
batch_size: 16 ...
max_batch_size: 0 ...
tokens_per_batch: -1 ...
split_data: true ...
optimizer: "adam_inverse_sqrt,lr=0.00005,warmup_updates=30000,beta1=0.9,beta2=0.999,weight_decay=0.01,eps=0.000001" ...
clip_grad_norm: 1 ...
epoch_size: 200000 ...
max_epoch: 100000 ...
stopping_criterion: "_valid_zh_mlm_ppl,25" ...
validation_metrics: "_valid_en_mlm_ppl,_valid_mlm_ppl,_valid_zh_mlm_ppl" ...
accumulate_gradients: 4 ...
lambda_mlm: 1 ...
lambda_clm: 1 ...
lambda_pc: 1 ...
lambda_ae: 1 ...
lambda_mt: 1 ...
lambda_bt: 1 ...
▾ clm_steps: [] ...
▾ mlm_steps: [ [ "en", null ], [ "es", null ], [ "fr", null ], [ "de", null ], [ "zh", null ], [ "ru", null ], [ "pt", null ], [ "it", null ], [ "ar", null ], [ "ja", null ], [ "id", null ], [ "tr", null ], [ "nl", null ], [ "pl", null ], [ "simple", null ], [ "fa", null ], [ "vi", null ], [ "sv", null ], [ "ko", null ], [ "he", null ], [ "ro", null ], [ "no", null ], [ "hi", null ], [ "uk", null ], [ "cs", null ], [ "fi", null ], [ "hu", null ], [ "th", null ], [ "da", null ], [ "ca", null ], [ "el", null ], [ "bg", null ], [ "sr", null ], [ "ms", null ], [ "bn", null ], [ "hr", null ], [ "sl", null ], [ "zh_yue", null ], [ "az", null ], [ "sk", null ], [ "eo", null ], [ "ta", null ], [ "sh", null ], [ "lt", null ], [ "et", null ], [ "ml", null ], [ "la", null ], [ "bs", null ], [ "sq", null ], [ "arz", null ], [ "af", null ], [ "ka", null ], [ "mr", null ], [ "eu", null ], [ "tl", null ], [ "ang", null ], [ "gl", null ], [ "nn", null ], [ "ur", null ], [ "kk", null ], [ "be", null ], [ "hy", null ], [ "te", null ], [ "lv", null ], [ "mk", null ], [ "zh_classical", null ], [ "als", null ], [ "is", null ], [ "wuu", null ], [ "my", null ], [ "sco", null ], [ "mn", null ], [ "ceb", null ], [ "ast", null ], [ "cy", null ], [ "kn", null ], [ "br", null ], [ "an", null ], [ "gu", null ], [ "bar", null ], [ "uz", null ], [ "lb", null ], [ "ne", null ], [ "si", null ], [ "war", null ], [ "jv", null ], [ "ga", null ], [ "zh_min_nan", null ], [ "oc", null ], [ "ku", null ], [ "sw", null ], [ "nds", null ], [ "ckb", null ], [ "ia", null ], [ "yi", null ], [ "fy", null ], [ "scn", null ], [ "gan", null ], [ "tt", null ], [ "am", null ] ] ...
▾ mt_steps: [] ...
▾ ae_steps: [] ...
▾ bt_steps: [] ...
▾ pc_steps: [] ...
reload_emb: "" ...
reload_model: "/checkpoint/aconneau/dumped/xlm_17_100_240_big_model_upper.2/14884511/best-valid_zh_mlm_ppl.pth" ...
reload_checkpoint: "" ...
beam_size: 1 ...
length_penalty: 1 ...
early_stopping: false ...
eval_bleu: false ...
eval_only: false ...
debug_train: false ...
debug_slurm: false ...
debug: false ...
local_rank: 0 ...
master_port: 11363 ...
▾ langs: [ "en", "es", "fr", "de", "zh", "ru", "pt", "it", "ar", "ja", "id", "tr", "nl", "pl", "simple", "fa", "vi", "sv", "ko", "he", "ro", "no", "hi", "uk", "cs", "fi", "hu", "th", "da", "ca", "el", "bg", "sr", "ms", "bn", "hr", "sl", "zh_yue", "az", "sk", "eo", "ta", "sh", "lt", "et", "ml", "la", "bs", "sq", "arz", "af", "ka", "mr", "eu", "tl", "ang", "gl", "nn", "ur", "kk", "be", "hy", "te", "lv", "mk", "zh_classical", "als", "is", "wuu", "my", "sco", "mn", "ceb", "ast", "cy", "kn", "br", "an", "gu", "bar", "uz", "lb", "ne", "si", "war", "jv", "ga", "zh_min_nan", "oc", "ku", "sw", "nds", "ckb", "ia", "yi", "fy", "scn", "gan", "tt", "am" ] ...
▾ id2lang: { "0": "af", "1": "als", "2": "am", "3": "an", "4": "ang", "5": "ar", "6": "arz", "7": "ast", "8": "az", "9": "bar", "10": "be", "11": "bg", "12": "bn", "13": "br", "14": "bs", "15": "ca", "16": "ceb", "17": "ckb", "18": "cs", "19": "cy", "20": "da", "21": "de", "22": "el", "23": "en", "24": "eo", "25": "es", "26": "et", "27": "eu", "28": "fa", "29": "fi", "30": "fr", "31": "fy", "32": "ga", "33": "gan", "34": "gl", "35": "gu", "36": "he", "37": "hi", "38": "hr", "39": "hu", "40": "hy", "41": "ia", "42": "id", "43": "is", "44": "it", "45": "ja", "46": "jv", "47": "ka", "48": "kk", "49": "kn", "50": "ko", "51": "ku", "52": "la", "53": "lb", "54": "lt", "55": "lv", "56": "mk", "57": "ml", "58": "mn", "59": "mr", "60": "ms", "61": "my", "62": "nds", "63": "ne", "64": "nl", "65": "nn", "66": "no", "67": "oc", "68": "pl", "69": "pt", "70": "ro", "71": "ru", "72": "scn", "73": "sco", "74": "sh", "75": "si", "76": "simple", "77": "sk", "78": "sl", "79": "sq", "80": "sr", "81": "sv", "82": "sw", "83": "ta", "84": "te", "85": "th", "86": "tl", "87": "tr", "88": "tt", "89": "uk", "90": "ur", "91": "uz", "92": "vi", "93": "war", "94": "wuu", "95": "yi", "96": "zh", "97": "zh_classical", "98": "zh_min_nan", "99": "zh_yue" } ...
▾ lang2id: { "af": 0, "als": 1, "am": 2, "an": 3, "ang": 4, "ar": 5, "arz": 6, "ast": 7, "az": 8, "bar": 9, "be": 10, "bg": 11, "bn": 12, "br": 13, "bs": 14, "ca": 15, "ceb": 16, "ckb": 17, "cs": 18, "cy": 19, "da": 20, "de": 21, "el": 22, "en": 23, "eo": 24, "es": 25, "et": 26, "eu": 27, "fa": 28, "fi": 29, "fr": 30, "fy": 31, "ga": 32, "gan": 33, "gl": 34, "gu": 35, "he": 36, "hi": 37, "hr": 38, "hu": 39, "hy": 40, "ia": 41, "id": 42, "is": 43, "it": 44, "ja": 45, "jv": 46, "ka": 47, "kk": 48, "kn": 49, "ko": 50, "ku": 51, "la": 52, "lb": 53, "lt": 54, "lv": 55, "mk": 56, "ml": 57, "mn": 58, "mr": 59, "ms": 60, "my": 61, "nds": 62, "ne": 63, "nl": 64, "nn": 65, "no": 66, "oc": 67, "pl": 68, "pt": 69, "ro": 70, "ru": 71, "scn": 72, "sco": 73, "sh": 74, "si": 75, "simple": 76, "sk": 77, "sl": 78, "sq": 79, "sr": 80, "sv": 81, "sw": 82, "ta": 83, "te": 84, "th": 85, "tl": 86, "tr": 87, "tt": 88, "uk": 89, "ur": 90, "uz": 91, "vi": 92, "war": 93, "wuu": 94, "yi": 95, "zh": 96, "zh_classical": 97, "zh_min_nan": 98, "zh_yue": 99 } ...
n_langs: 100 ...
▾ bt_src_langs: [] ...
▾ mono_dataset: { "en": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.en.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.en.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.en.pth" }, "es": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.es.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.es.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.es.pth" }, "fr": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.fr.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.fr.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.fr.pth" }, "de": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.de.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.de.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.de.pth" }, "zh": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.zh.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.zh.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.zh.pth" }, "ru": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ru.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ru.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ru.pth" }, "pt": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.pt.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.pt.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.pt.pth" }, "it": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.it.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.it.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.it.pth" }, "ar": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ar.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ar.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ar.pth" }, "ja": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ja.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ja.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ja.pth" }, "id": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.id.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.id.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.id.pth" }, "tr": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.tr.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.tr.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.tr.pth" }, "nl": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.nl.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.nl.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.nl.pth" }, "pl": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.pl.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.pl.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.pl.pth" }, "simple": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.simple.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.simple.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.simple.pth" }, "fa": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.fa.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.fa.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.fa.pth" }, "vi": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.vi.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.vi.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.vi.pth" }, "sv": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.sv.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.sv.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.sv.pth" }, "ko": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ko.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ko.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ko.pth" }, "he": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.he.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.he.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.he.pth" }, "ro": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ro.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ro.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ro.pth" }, "no": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.no.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.no.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.no.pth" }, "hi": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.hi.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.hi.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.hi.pth" }, "uk": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.uk.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.uk.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.uk.pth" }, "cs": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.cs.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.cs.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.cs.pth" }, "fi": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.fi.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.fi.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.fi.pth" }, "hu": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.hu.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.hu.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.hu.pth" }, "th": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.th.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.th.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.th.pth" }, "da": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.da.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.da.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.da.pth" }, "ca": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ca.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ca.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ca.pth" }, "el": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.el.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.el.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.el.pth" }, "bg": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.bg.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.bg.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.bg.pth" }, "sr": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.sr.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.sr.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.sr.pth" }, "ms": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ms.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ms.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ms.pth" }, "bn": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.bn.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.bn.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.bn.pth" }, "hr": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.hr.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.hr.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.hr.pth" }, "sl": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.sl.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.sl.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.sl.pth" }, "zh_yue": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.zh_yue.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.zh_yue.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.zh_yue.pth" }, "az": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.az.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.az.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.az.pth" }, "sk": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.sk.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.sk.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.sk.pth" }, "eo": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.eo.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.eo.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.eo.pth" }, "ta": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ta.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ta.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ta.pth" }, "sh": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.sh.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.sh.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.sh.pth" }, "lt": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.lt.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.lt.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.lt.pth" }, "et": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.et.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.et.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.et.pth" }, "ml": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ml.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ml.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ml.pth" }, "la": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.la.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.la.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.la.pth" }, "bs": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.bs.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.bs.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.bs.pth" }, "sq": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.sq.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.sq.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.sq.pth" }, "arz": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.arz.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.arz.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.arz.pth" }, "af": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.af.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.af.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.af.pth" }, "ka": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ka.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ka.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ka.pth" }, "mr": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.mr.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.mr.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.mr.pth" }, "eu": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.eu.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.eu.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.eu.pth" }, "tl": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.tl.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.tl.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.tl.pth" }, "ang": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ang.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ang.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ang.pth" }, "gl": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.gl.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.gl.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.gl.pth" }, "nn": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.nn.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.nn.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.nn.pth" }, "ur": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ur.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ur.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ur.pth" }, "kk": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.kk.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.kk.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.kk.pth" }, "be": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.be.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.be.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.be.pth" }, "hy": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.hy.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.hy.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.hy.pth" }, "te": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.te.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.te.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.te.pth" }, "lv": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.lv.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.lv.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.lv.pth" }, "mk": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.mk.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.mk.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.mk.pth" }, "zh_classical": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.zh_classical.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.zh_classical.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.zh_classical.pth" }, "als": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.als.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.als.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.als.pth" }, "is": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.is.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.is.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.is.pth" }, "wuu": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.wuu.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.wuu.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.wuu.pth" }, "my": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.my.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.my.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.my.pth" }, "sco": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.sco.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.sco.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.sco.pth" }, "mn": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.mn.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.mn.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.mn.pth" }, "ceb": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ceb.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ceb.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ceb.pth" }, "ast": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ast.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ast.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ast.pth" }, "cy": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.cy.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.cy.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.cy.pth" }, "kn": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.kn.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.kn.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.kn.pth" }, "br": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.br.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.br.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.br.pth" }, "an": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.an.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.an.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.an.pth" }, "gu": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.gu.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.gu.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.gu.pth" }, "bar": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.bar.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.bar.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.bar.pth" }, "uz": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.uz.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.uz.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.uz.pth" }, "lb": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.lb.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.lb.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.lb.pth" }, "ne": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ne.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ne.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ne.pth" }, "si": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.si.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.si.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.si.pth" }, "war": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.war.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.war.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.war.pth" }, "jv": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.jv.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.jv.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.jv.pth" }, "ga": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ga.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ga.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ga.pth" }, "zh_min_nan": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.zh_min_nan.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.zh_min_nan.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.zh_min_nan.pth" }, "oc": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.oc.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.oc.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.oc.pth" }, "ku": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ku.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ku.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ku.pth" }, "sw": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.sw.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.sw.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.sw.pth" }, "nds": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.nds.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.nds.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.nds.pth" }, "ckb": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ckb.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ckb.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ckb.pth" }, "ia": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ia.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ia.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ia.pth" }, "yi": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.yi.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.yi.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.yi.pth" }, "fy": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.fy.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.fy.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.fy.pth" }, "scn": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.scn.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.scn.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.scn.pth" }, "gan": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.gan.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.gan.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.gan.pth" }, "tt": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.tt.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.tt.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.tt.pth" }, "am": { "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.am.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.am.pth", "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.am.pth" } } ...
▾ para_dataset: {} ...
word_mask: 0.8 ...
word_keep: 0.1 ...
word_rand: 0.1 ...
is_slurm_job: true ...
n_nodes: 4 ...
node_id: 0 ...
global_rank: 0 ...
world_size: 32 ...
n_gpu_per_node: 8 ...
master_addr: "learnfair0332" ...
is_master: true ...
multi_node: true ...
multi_gpu: true ...
command: "python /private/home/aconneau/workdir/xlm_17_100_big.3/2019_08_10_19_23_42/train.py --n_heads 16 --bt_steps '' --max_vocab 200000 --word_mask_keep_rand '0.8,0.1,0.1' --use_lang_emb false --data_path '/private/home/aconneau/projects/XLM/data/wiki/100/175k' --save_periodic 0 --max_len 200 --bptt 256 --ae_steps '' --fp16 true --share_inout_emb true --sinusoidal_embeddings false --word_shuffle 0 --tokens_per_batch '-1' --accumulate_gradients 4 --validation_metrics '_valid_en_mlm_ppl,_valid_mlm_ppl,_valid_zh_mlm_ppl' --attention_dropout '0.1' --split_data true --max_epoch 100000 --stopping_criterion '_valid_zh_mlm_ppl,25' --dump_path '/checkpoint/aconneau/dumped' --epoch_size 200000 --word_blank 0 --gelu_activation true --n_layers 16 --optimizer 'adam_inverse_sqrt,lr=0.00005,warmup_updates=30000,beta1=0.9,beta2=0.999,weight_decay=0.01,eps=0.000001' --mlm_steps 'en,es,fr,de,zh,ru,pt,it,ar,ja,id,tr,nl,pl,simple,fa,vi,sv,ko,he,ro,no,hi,uk,cs,fi,hu,th,da,ca,el,bg,sr,ms,bn,hr,sl,zh_yue,az,sk,eo,ta,sh,lt,et,ml,la,bs,sq,arz,af,ka,mr,eu,tl,ang,gl,nn,ur,kk,be,hy,te,lv,mk,zh_classical,als,is,wuu,my,sco,mn,ceb,ast,cy,kn,br,an,gu,bar,uz,lb,ne,si,war,jv,ga,zh_min_nan,oc,ku,sw,nds,ckb,ia,yi,fy,scn,gan,tt,am' --eval_bleu false --dropout '0.1' --mt_steps '' --batch_size 16 --word_dropout 0 --reload_model '/checkpoint/aconneau/dumped/xlm_17_100_240_big_model_upper.2/14884511/best-valid_zh_mlm_ppl.pth' --min_count 0 --amp 2 --group_by_size true --asm false --sample_alpha '0.5' --word_pred '0.15' --clip_grad_norm 1 --emb_dim 1280 --encoder_only true --lgs 'en-es-fr-de-zh-ru-pt-it-ar-ja-id-tr-nl-pl-simple-fa-vi-sv-ko-he-ro-no-hi-uk-cs-fi-hu-th-da-ca-el-bg-sr-ms-bn-hr-sl-zh_yue-az-sk-eo-ta-sh-lt-et-ml-la-bs-sq-arz-af-ka-mr-eu-tl-ang-gl-nn-ur-kk-be-hy-te-lv-mk-zh_classical-als-is-wuu-my-sco-mn-ceb-ast-cy-kn-br-an-gu-bar-uz-lb-ne-si-war-jv-ga-zh_min_nan-oc-ku-sw-nds-ckb-ia-yi-fy-scn-gan-tt-am' --clm_steps '' --exp_name 'xlm_17_100_big.3' --lg_sampling_factor '0.7' --eval_only false --exp_id 16656234 --master_port 11363 --exp_id \"16656234\"" ...
n_words: 200000 ...
bos_index: 0 ...
eos_index: 1 ...
pad_index: 2 ...
unk_index: 3 ...
mask_index: 5 ...
▾ lambda_clm_config: null ...
▾ lambda_mlm_config: null ...
▾ lambda_pc_config: null ...
▾ lambda_ae_config: null ...
▾ lambda_mt_config: null ...
▾ lambda_bt_config: null ...
hyp_path: "/checkpoint/aconneau/dumped/xlm_17_100_big.3/16656234/hypotheses" ...
▾ ref_paths: {} ...
▾ mono_list: [ "en", "es", "fr", "de", "zh", "ru", "pt", "it", "ar", "ja", "id", "tr", "nl", "pl", "simple", "fa", "vi", "sv", "ko", "he", "ro", "no", "hi", "uk", "cs", "fi", "hu", "th", "da", "ca", "el", "bg", "sr", "ms", "bn", "hr", "sl", "zh_yue", "az", "sk", "eo", "ta", "sh", "lt", "et", "ml", "la", "bs", "sq", "arz", "af", "ka", "mr", "eu", "tl", "ang", "gl", "nn", "ur", "kk", "be", "hy", "te", "lv", "mk", "zh_classical", "als", "is", "wuu", "my", "sco", "mn", "ceb", "ast", "cy", "kn", "br", "an", "gu", "bar", "uz", "lb", "ne", "si", "war", "jv", "ga", "zh_min_nan", "oc", "ku", "sw", "nds", "ckb", "ia", "yi", "fy", "scn", "gan", "tt", "am" ] ...
▾ para_list: [] ...