from modeling import MT5ForConditionalGeneration from transformers import AutoTokenizer import os class ChemicalConverter: def __init__(self, mode: str): self.mode = mode model_directory = os.path.abspath("models") model_path = os.path.join(model_directory, mode) if not os.path.exists(model_path): raise ValueError(f"Model path does not exist: {model_path}") self.model = MT5ForConditionalGeneration.from_pretrained(model_path) self.smiles_tokenizer = AutoTokenizer.from_pretrained("BioMike/smiles") self.iupac_tokenizer = AutoTokenizer.from_pretrained("BioMike/iupac") self.smiles_max_len = 128 self.iupac_max_len = 156 def convert(self, input): if self.mode == "SMILES2IUPAC": tokenizer = self.smiles_tokenizer reverse_tokenizer = self.iupac_tokenizer max_length = self.smiles_max_len else: tokenizer = self.iupac_tokenizer reverse_tokenizer = self.smiles_tokenizer max_length = self.iupac_max_len encoding = tokenizer(input, return_tensors='pt', padding="max_length", truncation=True, max_length=max_length) # Move the input tensor to GPU encoding = {key: value.to(self.model.device) for key, value in encoding.items()} # Generate names output = self.model.generate(input_ids=encoding['input_ids'], attention_mask=encoding['attention_mask'], max_new_tokens=156, num_beams=1, num_return_sequences=1) # Decode names output = [reverse_tokenizer.decode(ids, skip_special_tokens=True) for ids in output] return output[0]