File size: 413 Bytes
751936e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19


from fairseq.data.encoders.gpt2_bpe import get_encoder
bpe = get_encoder('/workspace/fairseq-models/data/vocab/gpt2/encoder.json', '/workspace/fairseq-models/data/vocab/gpt2/vocab.bpe')

codes = bpe.encode('Hello world')
print(codes)
print(bpe.decode(codes))


test_str = 'Leonardo DiCaprio was born in Los Angeles'
print(bpe.bpe(test_str))
codes = bpe.encode(test_str)
print(codes)
print(bpe.decode(codes))