Spaces:
Runtime error
Runtime error
File size: 4,019 Bytes
753e275 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 |
import os
import re
import json
import shelve
from Bio import PDB
from typing import Optional, Tuple, List
from dataclasses import dataclass, field
@dataclass
class EvalTask:
in_path: str
ref_path: str
info: dict
structure: str
name: str
method: str
cdr: str
ab_chains: List
residue_first: Optional[Tuple] = None
residue_last: Optional[Tuple] = None
scores: dict = field(default_factory=dict)
def get_gen_biopython_model(self):
parser = PDB.PDBParser(QUIET=True)
return parser.get_structure(self.in_path, self.in_path)[0]
def get_ref_biopython_model(self):
parser = PDB.PDBParser(QUIET=True)
return parser.get_structure(self.ref_path, self.ref_path)[0]
def save_to_db(self, db: shelve.Shelf):
db[self.in_path] = self
def to_report_dict(self):
return {
'method': self.method,
'structure': self.structure,
'cdr': self.cdr,
'filename': os.path.basename(self.in_path),
**self.scores
}
class TaskScanner:
def __init__(self, root, postfix=None, db: Optional[shelve.Shelf]=None):
super().__init__()
self.root = root
self.postfix = postfix
self.visited = set()
self.db = db
if db is not None:
for k in db.keys():
self.visited.add(k)
def _get_metadata(self, fpath):
json_path = os.path.join(
os.path.dirname(os.path.dirname(fpath)),
'metadata.json'
)
tag_name = os.path.basename(os.path.dirname(fpath))
method_name = os.path.basename(
os.path.dirname(os.path.dirname(os.path.dirname(fpath)))
)
try:
antibody_chains = set()
info = None
with open(json_path, 'r') as f:
metadata = json.load(f)
for item in metadata['items']:
if item['tag'] == tag_name:
info = item
antibody_chains.add(item['residue_first'][0])
if info is not None:
info['antibody_chains'] = list(antibody_chains)
info['structure'] = metadata['identifier']
info['method'] = method_name
return info
except (json.JSONDecodeError, FileNotFoundError) as e:
return None
def scan(self) -> List[EvalTask]:
tasks = []
if self.postfix is None or not self.postfix:
input_fname_pattern = '^\d+\.pdb$'
ref_fname = 'REF1.pdb'
else:
input_fname_pattern = f'^\d+\_{self.postfix}\.pdb$'
ref_fname = f'REF1_{self.postfix}.pdb'
for parent, _, files in os.walk(self.root):
for fname in files:
fpath = os.path.join(parent, fname)
if not re.match(input_fname_pattern, fname):
continue
if os.path.getsize(fpath) == 0:
continue
if fpath in self.visited:
continue
# Path to the reference structure
ref_path = os.path.join(parent, ref_fname)
if not os.path.exists(ref_path):
continue
# CDR information
info = self._get_metadata(fpath)
if info is None:
continue
tasks.append(EvalTask(
in_path = fpath,
ref_path = ref_path,
info = info,
structure = info['structure'],
name = info['name'],
method = info['method'],
cdr = info['tag'],
ab_chains = info['antibody_chains'],
residue_first = info.get('residue_first', None),
residue_last = info.get('residue_last', None),
))
self.visited.add(fpath)
return tasks
|