File size: 641 Bytes
35996ec
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25

from transformers import AutoTokenizer
import itertools



def get_color():
    colors = [i for i in range(41, 48)]
    return itertools.cycle(colors)

def get_res(model_name, input_sentence, single_print=True):
    
    tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
    color_iterator = get_color()
    out = tokenizer.encode(input_sentence, add_special_tokens=False)
    token_num = len(out)

    w = [ '\033[''1;'+str(next(color_iterator))+f'm {tokenizer.decode(x)}\033[m' for x in out]
    res = ''.join(w) + f' {str(token_num)}'
    if single_print:
        print(res)
    else:
        return res