Spaces:
Runtime error
Runtime error
import os | |
from paperqa import Docs | |
import requests | |
import paperqa | |
import json | |
from bs4 import BeautifulSoup | |
import urllib.request | |
class PDBQuery: | |
def __init__(self, pdbid): | |
self.pdbid = pdbid | |
self.pubids = [] | |
self.refs = [] | |
self.citations = [] | |
self.pubids = [] | |
def get_pdb_descriptions(self,pdbid): | |
pdbrest = f"https://data.rcsb.org/rest/v1/core/entry/{pdbid}" | |
r = requests.get(pdbrest) | |
self.data = r.json() | |
return self.data | |
def get_pubids(self): | |
#extract pubmed IDs of the articles | |
for paper in self.result['citation']: | |
for k,v in paper.items(): | |
if k=="pdbx_database_id_pub_med": | |
self.pubids.append(v) | |
return self.pubids | |
def create_citation(self): | |
##Create citation using pubmed ids | |
self.result = self.get_pdb_descriptions(self.pdbid) | |
self.pubids = self.get_pubids() | |
for pid in self.pubids: | |
puburl = f'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=pubmed&id={pid}&retmode=json' | |
response = requests.get(puburl) | |
search_response = requests.get(puburl).json() | |
article = str(pid) | |
author_list = [] | |
pubmed_id = search_response["result"][article]["uid"] | |
title = search_response["result"][article]["title"] | |
authors = search_response["result"][article]["authors"] | |
journal = search_response["result"][article]["source"] | |
pub_date = search_response["result"][article]["pubdate"] | |
volume = search_response["result"][article]["volume"] | |
issue = search_response["result"][article]["issue"] | |
pages = search_response["result"][article]["pages"] | |
doi = search_response["result"][article]["elocationid"] | |
for i in authors: | |
all_authors = i["name"] | |
author_list.append(all_authors) | |
names = str(author_list).replace("'", "").replace("[","").replace("]","") | |
corrected_title = title.replace("<i>", "").replace("</i>", "") | |
self.citations.append(f"{names}.{journal} {pub_date[0:4]};{volume}({issue}):{pages}. {doi}") | |
return self.citations | |
def write_webdata(self): | |
url_list = [f'https://pubmed.ncbi.nlm.nih.gov/{pid}' for pid in self.pubids] | |
#finally add a basic PDB databank search page | |
url_list.append(f"https://data.rcsb.org/rest/v1/core/entry/{self.pdbid}") | |
with open('web_data.txt', "w", encoding="utf-8") as out: | |
for url in url_list: | |
r = requests.get(url) | |
bs = BeautifulSoup(r.text, 'html.parser') | |
## remove html syntax ---> this improved the results much more | |
html = bs.prettify() | |
for i in range(0, len(html)): | |
out.write(html[i]) | |