{ "output_root": "/gpfs/projects/bsc88/corpus-utils-lm/23-12-2020-72f8c7e/output/model-ready_output/2020-12-23-1900-daf4-ab38", "files": "/gpfs/projects/bsc88/corpus-utils-lm/23-12-2020-72f8c7e/output/model-ready_output/2020-12-23-1900-daf4-ab38/train_valid_test_split_output/2020-12-23-1905-daf4-a0e0/train.txt", "vocab_name": "roberta-ca", "clean_text": true, "handle_chinese_chars": true, "strip_accents": false, "lowercase": false, "vocab_size": 52000, "limit_alphabet": 1000, "show_progress": true, "min_frequency": 2, "extra_tokens": [], "reserve_tokens": 0, "tokenizer": "bbpe", "commit_hash": "daf4d660ec8a4b28d2bc29b3063779100ab85796\n" }