Spaces:
Runtime error
Runtime error
from tokenizers import Tokenizer | |
def bpe_tokenizer(smiles_string): | |
# Load the tokenizer from the saved file | |
tokenizer = Tokenizer.from_file("chembl_bpe_tokenizer.json") | |
# Tokenize the SMILES string | |
encoded_output = tokenizer.encode(smiles_string) | |
# To get the tokenized output as text | |
tokens_text = encoded_output.tokens | |
# To get the corresponding token IDs | |
token_ids = encoded_output.ids | |
return tokens_text, token_ids | |