Spaces:
Sleeping
Sleeping
from transformers import AutoTokenizer | |
import itertools | |
def get_color(): | |
colors = ['#df7b55', '#2c7482', '#2c8234', '#5581df', '#822c63','#b355df'] | |
return itertools.cycle(colors) | |
def get_res(model_name, input_sentence, single_print=True): | |
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) | |
color_iterator = get_color() | |
out = tokenizer.encode(input_sentence, add_special_tokens=False) | |
token_num = len(out) | |
work_around = True | |
if work_around: | |
w = [] | |
pre = "" | |
for i in range(len(out)): | |
res = tokenizer.decode(out[:i+1]) | |
if w == []: | |
w.append(res) | |
else: | |
pre_len = len(pre) #0 | |
w.append(res[pre_len:]) | |
pre = res | |
res = [] | |
for x in w: | |
if x == '\n': | |
res.append(' \n') | |
else: | |
res.append(f'<span style="font-size:1.25em;background-color:{next(color_iterator)}">{x.replace(" ", " ")}</span>') | |
else: | |
res = [] | |
for x in out: | |
if x == '\n': | |
res.append(' \n') | |
else: | |
res.append(f'<span style="font-size:1.25em;background-color:{next(color_iterator)}">{tokenizer.decode(x).replace(" ", " ")}</span>') | |
res = ''.join(res) | |
if single_print: | |
print(res + str(token_num)) | |
else: | |
return res, token_num | |