|
{ |
|
"tokenizer_path_name": null, |
|
"vocab_name": "bio-clinical", |
|
"tokenizer": "bbpe-roberta", |
|
"lowercase": false, |
|
"vocab_size": 50262, |
|
"min_frequency": 6, |
|
"extra_tokens": [], |
|
"limit_alphabet": 1000, |
|
"max_len": 512, |
|
"no_show_progress": false, |
|
"strip_accents": false, |
|
"no_handle_chinese_chars": false, |
|
"no_clean_text": false, |
|
"reserve_tokens": 0, |
|
"use_tokenizers": false, |
|
"no_fairseq": false, |
|
"bbpe_add_prefix_space": true, |
|
"single_paragraph_add_punct": true, |
|
"tok_batch_size": 100000000, |
|
"files": [ |
|
"/home/shared/dt01/temutauro/ccasimiro/corpus-utils-lm/output/model-ready_output/bio-clinical-vocab-50262-2021-12-07-1604-d1d3-849e/train_valid_test_split_output/bio-clinical-2021-12-07-1608-d1d3-fb2f/train.txt", |
|
"/home/shared/dt01/temutauro/ccasimiro/corpus-utils-lm/output/model-ready_output/bio-clinical-vocab-50262-2021-12-07-1604-d1d3-849e/train_valid_test_split_output/bio-clinical-2021-12-07-1608-d1d3-fb2f/valid.txt", |
|
"/home/shared/dt01/temutauro/ccasimiro/corpus-utils-lm/output/model-ready_output/bio-clinical-vocab-50262-2021-12-07-1604-d1d3-849e/train_valid_test_split_output/bio-clinical-2021-12-07-1608-d1d3-fb2f/test.txt" |
|
], |
|
"output_root_path": "/home/shared/dt01/temutauro/ccasimiro/corpus-utils-lm/output/model-ready_output/bio-clinical-vocab-50262-2021-12-07-1604-d1d3-849e", |
|
"commit_hash": "d1d3920e7012caf14c9d6968fded36e0dd719a51" |
|
} |