File size: 1,328 Bytes
32fe622
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import re
from rdkit import Chem
from rdkit.Chem import MolFromSmiles, SDWriter
import logging
from Bio import SeqIO


logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

def process_smiles(smiles: str) -> str:
    mol = MolFromSmiles(smiles)
    if not mol:
        raise ValueError(f"Invalid SMILES string: {smiles}")
    
    sdf_file = "/tmp/output.sdf"
    writer = SDWriter(sdf_file)
    writer.write(mol)
    writer.close()
    
    return sdf_file

def process_pdb(file_path: str) -> str:
    sequences = []
    with open(file_path, "r") as handle:
        for record in SeqIO.parse(handle, "pdb-seqres"):
            sequences.append(str(record.seq))
    return " ".join(sequences)

def process_sdf(file_path: str) -> str:
    return file_path

def extract_smiles(text: str) -> str:
    smiles_pattern = r"([^J][0-9BCOHNSOPrIFla@+\-\[\]\(\)\\\/%=#$]{6,})"
    matches = re.findall(smiles_pattern, text)
    if matches:
        return matches[0]
    return ""

def is_valid_smiles(smiles: str) -> bool:
    mol = MolFromSmiles(smiles)
    return mol is not None

def extract_and_convert_to_sdf(text: str) -> str:
    smiles = extract_smiles(text)
    if smiles and is_valid_smiles(smiles):
        return process_smiles(smiles)
    raise ValueError("No valid SMILES string found in the text.")