from idiomify.fetchers import fetch_tokenizer def main(): tokenizer = fetch_tokenizer("t-1-1") print(tokenizer.bos_token) print(tokenizer.cls_token) print(tokenizer.eos_token) print(tokenizer.sep_token) print(tokenizer.mask_token) print(tokenizer.pad_token) print(tokenizer.unk_token) print(tokenizer.additional_special_tokens) # this should have been added # the size of the vocab print(len(tokenizer)) """ ['', ''] 50267 """ if __name__ == '__main__': main()