from fairseq.data.encoders.gpt2_bpe import get_encoder | |
bpe = get_encoder('/workspace/fairseq-models/data/vocab/gpt2/encoder.json', '/workspace/fairseq-models/data/vocab/gpt2/vocab.bpe') | |
codes = bpe.encode('Hello world') | |
print(codes) | |
print(bpe.decode(codes)) | |
test_str = 'Leonardo DiCaprio was born in Los Angeles' | |
print(bpe.bpe(test_str)) | |
codes = bpe.encode(test_str) | |
print(codes) | |
print(bpe.decode(codes)) | |