from fairseq.data.encoders.gpt2_bpe import get_encoder bpe = get_encoder('/workspace/fairseq-models/data/vocab/gpt2/encoder.json', '/workspace/fairseq-models/data/vocab/gpt2/vocab.bpe') codes = bpe.encode('Hello world') print(codes) print(bpe.decode(codes)) test_str = 'Leonardo DiCaprio was born in Los Angeles' print(bpe.bpe(test_str)) codes = bpe.encode(test_str) print(codes) print(bpe.decode(codes))