{ "tokenizer_path_name": null, "vocab_name": "biomedical", "tokenizer": "bbpe-roberta", "lowercase": false, "vocab_size": 50262, "min_frequency": 6, "extra_tokens": [], "limit_alphabet": 1000, "max_len": 512, "no_show_progress": false, "strip_accents": false, "no_handle_chinese_chars": false, "no_clean_text": false, "reserve_tokens": 0, "use_tokenizers": false, "no_fairseq": false, "bbpe_add_prefix_space": true, "single_paragraph_add_punct": true, "tok_batch_size": 100000000, "files": [ "/home/shared/dt01/temutauro/ccasimiro/corpus-utils-lm/output/model-ready_output/biomedical-vocab-50262-2021-12-09-1207-d1d3-e42b/train_valid_test_split_output/biomedical-2021-12-09-1210-d1d3-ad85/train.txt", "/home/shared/dt01/temutauro/ccasimiro/corpus-utils-lm/output/model-ready_output/biomedical-vocab-50262-2021-12-09-1207-d1d3-e42b/train_valid_test_split_output/biomedical-2021-12-09-1210-d1d3-ad85/valid.txt", "/home/shared/dt01/temutauro/ccasimiro/corpus-utils-lm/output/model-ready_output/biomedical-vocab-50262-2021-12-09-1207-d1d3-e42b/train_valid_test_split_output/biomedical-2021-12-09-1210-d1d3-ad85/test.txt" ], "output_root_path": "/home/shared/dt01/temutauro/ccasimiro/corpus-utils-lm/output/model-ready_output/biomedical-vocab-50262-2021-12-09-1207-d1d3-e42b", "commit_hash": "d1d3920e7012caf14c9d6968fded36e0dd719a51" }