{ "accumulate_gradients": 4, "ae_steps": [], "amp": 2, "architectures": [ "XLMWithLMHeadModel" ], "asm": false, "attention_dropout": 0.1, "batch_size": 16, "beam_size": 1, "bos_index": 0, "bos_token_id": 0, "bptt": 256, "bt_src_langs": [], "bt_steps": [], "causal": false, "clip_grad_norm": 1.0, "clm_steps": [], "command": "python /private/home/aconneau/workdir/xlm_17_100_big.3/2019_08_10_19_23_42/train.py --n_heads 16 --bt_steps '' --max_vocab 200000 --word_mask_keep_rand '0.8,0.1,0.1' --use_lang_emb false --data_path '/private/home/aconneau/projects/XLM/data/wiki/17/175k' --save_periodic 0 --max_len 200 --bptt 256 --ae_steps '' --fp16 true --share_inout_emb true --sinusoidal_embeddings false --word_shuffle 0 --tokens_per_batch '-1' --accumulate_gradients 4 --validation_metrics '_valid_en_mlm_ppl,_valid_mlm_ppl,_valid_zh_mlm_ppl' --attention_dropout '0.1' --split_data true --max_epoch 100000 --stopping_criterion '_valid_zh_mlm_ppl,25' --dump_path '/checkpoint/aconneau/dumped' --epoch_size 200000 --word_blank 0 --gelu_activation true --n_layers 16 --optimizer 'adam_inverse_sqrt,lr=0.00005,warmup_updates=30000,beta1=0.9,beta2=0.999,weight_decay=0.01,eps=0.000001' --mlm_steps 'en,fr,es,de,it,pt,nl,sv,pl,ru,ar,tr,zh,ja,ko,hi,vi' --eval_bleu false --dropout '0.1' --mt_steps '' --batch_size 16 --word_dropout 0 --reload_model '/checkpoint/aconneau/dumped/xlm_17_100_240_big_model_upper.2/14884510/best-valid_zh_mlm_ppl.pth' --min_count 0 --amp 2 --group_by_size true --asm false --sample_alpha '0.5' --word_pred '0.15' --clip_grad_norm 1 --emb_dim 1280 --encoder_only true --lgs 'en-fr-es-de-it-pt-nl-sv-pl-ru-ar-tr-zh-ja-ko-hi-vi' --clm_steps '' --exp_name 'xlm_17_100_big.3' --lg_sampling_factor '0.7' --eval_only false --exp_id 16656237 --master_port 14148 --exp_id \"16656237\"", "context_size": 0, "data_path": "/private/home/aconneau/projects/XLM/data/wiki/17/175k", "debug": false, "debug_slurm": false, "debug_train": false, "dropout": 0.1, "dump_path": "/checkpoint/aconneau/dumped/xlm_17_100_big.3/16656237", "emb_dim": 1280, "embed_init_std": 0.02209708691207961, "encoder_only": true, "end_n_top": 5, "eos_index": 1, "epoch_size": 200000, "eval_bleu": false, "eval_only": false, "exp_id": "16656237", "exp_name": "xlm_17_100_big.3", "fp16": true, "gelu_activation": true, "global_rank": 0, "group_by_size": true, "hyp_path": "/checkpoint/aconneau/dumped/xlm_17_100_big.3/16656237/hypotheses", "id2lang": { "0": "ar", "1": "de", "10": "pl", "11": "pt", "12": "ru", "13": "sv", "14": "tr", "15": "vi", "16": "zh", "2": "en", "3": "es", "4": "fr", "5": "hi", "6": "it", "7": "ja", "8": "ko", "9": "nl" }, "init_std": 0.02, "is_encoder": true, "is_master": true, "is_slurm_job": true, "lambda_ae": 1.0, "lambda_ae_config": null, "lambda_bt": 1.0, "lambda_bt_config": null, "lambda_clm": 1.0, "lambda_clm_config": null, "lambda_mlm": 1.0, "lambda_mlm_config": null, "lambda_mt": 1.0, "lambda_mt_config": null, "lambda_pc": 1.0, "lambda_pc_config": null, "lang2id": { "ar": 0, "de": 1, "en": 2, "es": 3, "fr": 4, "hi": 5, "it": 6, "ja": 7, "ko": 8, "nl": 9, "pl": 10, "pt": 11, "ru": 12, "sv": 13, "tr": 14, "vi": 15, "zh": 16 }, "lang_id": 0, "langs": [ "en", "fr", "es", "de", "it", "pt", "nl", "sv", "pl", "ru", "ar", "tr", "zh", "ja", "ko", "hi", "vi" ], "layer_norm_eps": 1e-12, "lg_sampling_factor": 0.7, "lgs": "en-fr-es-de-it-pt-nl-sv-pl-ru-ar-tr-zh-ja-ko-hi-vi", "local_rank": 0, "mask_index": 5, "mask_token_id": 0, "master_addr": "learnfair1605", "master_port": 14148, "max_batch_size": 0, "max_epoch": 100000, "max_len": 200, "max_position_embeddings": 512, "max_vocab": 200000, "min_count": 0, "mlm_steps": [ [ "en", null ], [ "fr", null ], [ "es", null ], [ "de", null ], [ "it", null ], [ "pt", null ], [ "nl", null ], [ "sv", null ], [ "pl", null ], [ "ru", null ], [ "ar", null ], [ "tr", null ], [ "zh", null ], [ "ja", null ], [ "ko", null ], [ "hi", null ], [ "vi", null ] ], "model_type": "xlm", "mono_dataset": { "ar": { "test": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/test.ar.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/train.ar.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/valid.ar.pth" }, "de": { "test": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/test.de.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/train.de.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/valid.de.pth" }, "en": { "test": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/test.en.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/train.en.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/valid.en.pth" }, "es": { "test": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/test.es.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/train.es.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/valid.es.pth" }, "fr": { "test": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/test.fr.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/train.fr.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/valid.fr.pth" }, "hi": { "test": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/test.hi.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/train.hi.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/valid.hi.pth" }, "it": { "test": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/test.it.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/train.it.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/valid.it.pth" }, "ja": { "test": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/test.ja.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/train.ja.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/valid.ja.pth" }, "ko": { "test": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/test.ko.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/train.ko.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/valid.ko.pth" }, "nl": { "test": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/test.nl.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/train.nl.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/valid.nl.pth" }, "pl": { "test": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/test.pl.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/train.pl.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/valid.pl.pth" }, "pt": { "test": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/test.pt.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/train.pt.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/valid.pt.pth" }, "ru": { "test": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/test.ru.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/train.ru.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/valid.ru.pth" }, "sv": { "test": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/test.sv.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/train.sv.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/valid.sv.pth" }, "tr": { "test": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/test.tr.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/train.tr.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/valid.tr.pth" }, "vi": { "test": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/test.vi.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/train.vi.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/valid.vi.pth" }, "zh": { "test": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/test.zh.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/train.zh.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/valid.zh.pth" } }, "mono_list": [ "en", "fr", "es", "de", "it", "pt", "nl", "sv", "pl", "ru", "ar", "tr", "zh", "ja", "ko", "hi", "vi" ], "mt_steps": [], "multi_gpu": true, "multi_node": true, "n_gpu_per_node": 8, "n_heads": 16, "n_langs": 17, "n_layers": 16, "n_nodes": 4, "node_id": 0, "optimizer": "adam_inverse_sqrt,lr=0.00005,warmup_updates=30000,beta1=0.9,beta2=0.999,weight_decay=0.01,eps=0.000001", "pad_index": 2, "pad_token_id": 2, "para_dataset": {}, "para_list": [], "pc_steps": [], "ref_paths": {}, "reload_checkpoint": "", "reload_emb": "", "reload_model": "/checkpoint/aconneau/dumped/xlm_17_100_240_big_model_upper.2/14884510/best-valid_zh_mlm_ppl.pth", "sample_alpha": 0.5, "save_periodic": 0, "share_inout_emb": true, "sinusoidal_embeddings": false, "split_data": true, "start_n_top": 5, "stopping_criterion": "_valid_zh_mlm_ppl,25", "summary_activation": null, "summary_first_dropout": 0.1, "summary_proj_to_labels": true, "summary_type": "first", "summary_use_proj": true, "tokens_per_batch": -1, "unk_index": 3, "use_lang_emb": false, "use_memory": false, "validation_metrics": "_valid_en_mlm_ppl,_valid_mlm_ppl,_valid_zh_mlm_ppl", "vocab_size": 200000, "word_blank": 0.0, "word_dropout": 0.0, "word_keep": 0.1, "word_mask": 0.8, "word_mask_keep_rand": "0.8,0.1,0.1", "word_pred": 0.15, "word_rand": 0.1, "word_shuffle": 0.0, "world_size": 32 }