{ "tokenizer_path_name": null, "vocab_name": "bio-clinical", "tokenizer": "bbpe-roberta", "lowercase": false, "vocab_size": 50262, "min_frequency": 6, "extra_tokens": [], "limit_alphabet": 1000, "max_len": 512, "no_show_progress": false, "strip_accents": false, "no_handle_chinese_chars": false, "no_clean_text": false, "reserve_tokens": 0, "use_tokenizers": false, "no_fairseq": false, "bbpe_add_prefix_space": true, "single_paragraph_add_punct": true, "tok_batch_size": 100000000, "files": [ "/home/shared/dt01/temutauro/ccasimiro/corpus-utils-lm/output/model-ready_output/bio-clinical-vocab-50262-2021-12-07-1604-d1d3-849e/train_valid_test_split_output/bio-clinical-2021-12-07-1608-d1d3-fb2f/train.txt", "/home/shared/dt01/temutauro/ccasimiro/corpus-utils-lm/output/model-ready_output/bio-clinical-vocab-50262-2021-12-07-1604-d1d3-849e/train_valid_test_split_output/bio-clinical-2021-12-07-1608-d1d3-fb2f/valid.txt", "/home/shared/dt01/temutauro/ccasimiro/corpus-utils-lm/output/model-ready_output/bio-clinical-vocab-50262-2021-12-07-1604-d1d3-849e/train_valid_test_split_output/bio-clinical-2021-12-07-1608-d1d3-fb2f/test.txt" ], "output_root_path": "/home/shared/dt01/temutauro/ccasimiro/corpus-utils-lm/output/model-ready_output/bio-clinical-vocab-50262-2021-12-07-1604-d1d3-849e", "commit_hash": "d1d3920e7012caf14c9d6968fded36e0dd719a51" }