import json from tokenization import make_tokenizer add_sentinel_token = 0 tokenizer = make_tokenizer("ChineseSPTokenizer", None, "tokenizer.model", "50048", None, add_block_symbols=True, cache_dir="cache", add_sentinel_token=add_sentinel_token, add_task_mask=True, add_decoder_mask=False, fix_command_token=False) f_out = open("glm_chinese.vocab.txt", "w", encoding="utf-8") for idx in range(tokenizer.num_tokens): try: decode_str = tokenizer.DecodeIds([idx]) f_out.write("%d\t%s\n" % (idx, decode_str)) except Exception as e: print(idx, e)