# from tokenization_gptpangu import GPTPanguTokenizer # import json # # tokenizer = GPTPanguTokenizer.from_pretrained(".") # with open("tokenizer.json",encoding="utf-8") as f: # cofig = json.load(f) # # # vocab_file = "vocab.vocab" # # f = open(vocab_file, 'r', encoding="utf-8") # lines = f.readlines() # vocab = [] # for line in enumerate(lines): # key = line[1].split('\t')[0] # pair = [key,line[0]] # vocab.append(pair) # # cofig['model']['vocab'] = vocab # # with open("new_tokenizer.json","w",encoding="utf-8") as w: # d = json.dumps(cofig) # w.write(d) # # print("ok") from transformers import AutoTokenizer tokenizer = AutoTokenizer.from_pretrained(".")