File size: 929 Bytes
97db817
e16e71f
 
 
 
 
97db817
e16e71f
 
97db817
 
 
 
 
 
 
 
 
 
e16e71f
97db817
e16e71f
 
97db817
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
{
    "custom_vocab_files": [
        "/home/usuaris/veu/casimiro.pio.carrino/projects/corpus-utils-lm/corpora/bio/biomedical-clinical.txt"
    ],
    "vocab_name": "bio-biomedical-clinical-vocab-52k",
    "tokenizer": "bbpe",
    "lowercase": false,
    "vocab_size": 52000,
    "min_frequency": 10,
    "extra_tokens": [],
    "limit_alphabet": 1000,
    "no_show_progress": false,
    "strip_accents": false,
    "no_handle_chinese_chars": false,
    "no_clean_text": false,
    "reserve_tokens": 0,
    "use_tokenizers": false,
    "no_fairseq": false,
    "files": [
        "/home/usuaris/veu/casimiro.pio.carrino/projects/corpus-utils-lm/corpora/bio/biomedical-clinical.txt"
    ],
    "output_root_path": "/home/usuaris/veu/casimiro.pio.carrino/projects/corpus-utils-lm/output/model-ready_output/bio-biomedical-clinical-vocab-52k-2021-04-26-0955-3a71-240f",
    "commit_hash": "3a7116cf776527c411869becbe6fad8b9e3f5e56"
}