import numpy as np
import pandas as pd
import os
import requests
import json
import tarfile, gzip
import time, glob
from utils import threeToOne
import streamlit as st
from pathlib import Path
import gzip
import shutil
import codecs
import io
def uniprot_pdb_residue_mapping(pdb_id, uniprot_id, save_path):
"""
This code does residue-wise mapping between UniProt and PDB residues.
"""
ascaris = {}
full_ascaris = {}
res = requests.get(f'https://www.ebi.ac.uk/pdbe/download/api/pdb/entry/sifts?id={pdb_id}')
url = json.loads(res.text)['url']
response = requests.get(url, stream=True)
file = tarfile.open(fileobj=response.raw, mode="r|gz")
file.extractall(path=save_path) # Creates another gz file
existing_pdb = list(Path(save_path).glob("*"))
existing_pdb = [str(i) for i in existing_pdb]
try:
with gzip.open(f'{save_path}/{pdb_id.lower()}.xml.gz', 'rt') as f:
file_content = f.read()
except FileNotFoundError:
with gzip.open(f'{save_path}/{pdb_id}.xml.gz', 'rt') as f:
file_content = f.read()
content = file_content.split('\n')
index = [idx for idx, s in enumerate(content) if 'listResidue' in s]
listResidues = []
for ind in range(0, len(index), 2):
try:
if ((content[index[ind]]).strip() == '') & (
(content[index[ind + 1]]).strip() == ''):
listResidues.append(content[index[ind]:index[ind + 1]])
except:
IndexError
for true_content in listResidues:
for sub_content in true_content:
if f'dbAccessionId="{uniprot_id}"' in sub_content:
content = [i.strip() for i in true_content]
sel = [i for i in content if
('