from transformers import AutoTokenizer import itertools def get_color(): colors = ['#df7b55', '#2c7482', '#2c8234', '#5581df', '#822c63','#b355df'] return itertools.cycle(colors) def get_res(model_name, input_sentence, single_print=True): tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) color_iterator = get_color() out = tokenizer.encode(input_sentence, add_special_tokens=False) token_num = len(out) work_around = True if work_around: w = [] pre = "" for i in range(len(out)): res = tokenizer.decode(out[:i+1]) if w == []: w.append(res) else: pre_len = len(pre) #0 w.append(res[pre_len:]) pre = res res = [] for x in w: if x == '\n': res.append(' \n') else: res.append(f'{x.replace(" ", " ")}') else: res = [] for x in out: if x == '\n': res.append(' \n') else: res.append(f'{tokenizer.decode(x).replace(" ", " ")}') res = ''.join(res) if single_print: print(res + str(token_num)) else: return res, token_num