Spaces:
Runtime error
Runtime error
import os | |
import re | |
import json | |
import shelve | |
from Bio import PDB | |
from typing import Optional, Tuple, List | |
from dataclasses import dataclass, field | |
class EvalTask: | |
in_path: str | |
ref_path: str | |
info: dict | |
structure: str | |
name: str | |
method: str | |
cdr: str | |
ab_chains: List | |
residue_first: Optional[Tuple] = None | |
residue_last: Optional[Tuple] = None | |
scores: dict = field(default_factory=dict) | |
def get_gen_biopython_model(self): | |
parser = PDB.PDBParser(QUIET=True) | |
return parser.get_structure(self.in_path, self.in_path)[0] | |
def get_ref_biopython_model(self): | |
parser = PDB.PDBParser(QUIET=True) | |
return parser.get_structure(self.ref_path, self.ref_path)[0] | |
def save_to_db(self, db: shelve.Shelf): | |
db[self.in_path] = self | |
def to_report_dict(self): | |
return { | |
'method': self.method, | |
'structure': self.structure, | |
'cdr': self.cdr, | |
'filename': os.path.basename(self.in_path), | |
**self.scores | |
} | |
class TaskScanner: | |
def __init__(self, root, postfix=None, db: Optional[shelve.Shelf]=None): | |
super().__init__() | |
self.root = root | |
self.postfix = postfix | |
self.visited = set() | |
self.db = db | |
if db is not None: | |
for k in db.keys(): | |
self.visited.add(k) | |
def _get_metadata(self, fpath): | |
json_path = os.path.join( | |
os.path.dirname(os.path.dirname(fpath)), | |
'metadata.json' | |
) | |
tag_name = os.path.basename(os.path.dirname(fpath)) | |
method_name = os.path.basename( | |
os.path.dirname(os.path.dirname(os.path.dirname(fpath))) | |
) | |
try: | |
antibody_chains = set() | |
info = None | |
with open(json_path, 'r') as f: | |
metadata = json.load(f) | |
for item in metadata['items']: | |
if item['tag'] == tag_name: | |
info = item | |
antibody_chains.add(item['residue_first'][0]) | |
if info is not None: | |
info['antibody_chains'] = list(antibody_chains) | |
info['structure'] = metadata['identifier'] | |
info['method'] = method_name | |
return info | |
except (json.JSONDecodeError, FileNotFoundError) as e: | |
return None | |
def scan(self) -> List[EvalTask]: | |
tasks = [] | |
if self.postfix is None or not self.postfix: | |
input_fname_pattern = '^\d+\.pdb$' | |
ref_fname = 'REF1.pdb' | |
else: | |
input_fname_pattern = f'^\d+\_{self.postfix}\.pdb$' | |
ref_fname = f'REF1_{self.postfix}.pdb' | |
for parent, _, files in os.walk(self.root): | |
for fname in files: | |
fpath = os.path.join(parent, fname) | |
if not re.match(input_fname_pattern, fname): | |
continue | |
if os.path.getsize(fpath) == 0: | |
continue | |
if fpath in self.visited: | |
continue | |
# Path to the reference structure | |
ref_path = os.path.join(parent, ref_fname) | |
if not os.path.exists(ref_path): | |
continue | |
# CDR information | |
info = self._get_metadata(fpath) | |
if info is None: | |
continue | |
tasks.append(EvalTask( | |
in_path = fpath, | |
ref_path = ref_path, | |
info = info, | |
structure = info['structure'], | |
name = info['name'], | |
method = info['method'], | |
cdr = info['tag'], | |
ab_chains = info['antibody_chains'], | |
residue_first = info.get('residue_first', None), | |
residue_last = info.get('residue_last', None), | |
)) | |
self.visited.add(fpath) | |
return tasks | |