File size: 413 Bytes
751936e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 |
from fairseq.data.encoders.gpt2_bpe import get_encoder
bpe = get_encoder('/workspace/fairseq-models/data/vocab/gpt2/encoder.json', '/workspace/fairseq-models/data/vocab/gpt2/vocab.bpe')
codes = bpe.encode('Hello world')
print(codes)
print(bpe.decode(codes))
test_str = 'Leonardo DiCaprio was born in Los Angeles'
print(bpe.bpe(test_str))
codes = bpe.encode(test_str)
print(codes)
print(bpe.decode(codes))
|