CHEMISTral7Bv0.3 / inference_transform.py
Clemspace's picture
added inference + api wrapper
32fe622
import re
from rdkit import Chem
from rdkit.Chem import MolFromSmiles, SDWriter
import logging
from Bio import SeqIO
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def process_smiles(smiles: str) -> str:
mol = MolFromSmiles(smiles)
if not mol:
raise ValueError(f"Invalid SMILES string: {smiles}")
sdf_file = "/tmp/output.sdf"
writer = SDWriter(sdf_file)
writer.write(mol)
writer.close()
return sdf_file
def process_pdb(file_path: str) -> str:
sequences = []
with open(file_path, "r") as handle:
for record in SeqIO.parse(handle, "pdb-seqres"):
sequences.append(str(record.seq))
return " ".join(sequences)
def process_sdf(file_path: str) -> str:
return file_path
def extract_smiles(text: str) -> str:
smiles_pattern = r"([^J][0-9BCOHNSOPrIFla@+\-\[\]\(\)\\\/%=#$]{6,})"
matches = re.findall(smiles_pattern, text)
if matches:
return matches[0]
return ""
def is_valid_smiles(smiles: str) -> bool:
mol = MolFromSmiles(smiles)
return mol is not None
def extract_and_convert_to_sdf(text: str) -> str:
smiles = extract_smiles(text)
if smiles and is_valid_smiles(smiles):
return process_smiles(smiles)
raise ValueError("No valid SMILES string found in the text.")