pdb_answers / utils.py
GeemiW
citation error in utils try-1
65a6215
import os
from paperqa import Docs
import requests
import paperqa
import json
from bs4 import BeautifulSoup
import urllib.request
class PDBQuery:
def __init__(self, pdbid):
self.pdbid = pdbid
self.pubids = []
self.refs = []
self.citations = []
self.pubids = []
def get_pdb_descriptions(self,pdbid):
pdbrest = f"https://data.rcsb.org/rest/v1/core/entry/{pdbid}"
r = requests.get(pdbrest)
self.data = r.json()
return self.data
def get_pubids(self):
#extract pubmed IDs of the articles
for paper in self.result['citation']:
for k,v in paper.items():
if k=="pdbx_database_id_pub_med":
self.pubids.append(v)
return self.pubids
def create_citation(self):
##Create citation using pubmed ids
self.result = self.get_pdb_descriptions(self.pdbid)
self.pubids = self.get_pubids()
for pid in self.pubids:
puburl = f'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=pubmed&id={pid}&retmode=json'
response = requests.get(puburl)
search_response = requests.get(puburl).json()
article = str(pid)
author_list = []
pubmed_id = search_response["result"][article]["uid"]
title = search_response["result"][article]["title"]
authors = search_response["result"][article]["authors"]
journal = search_response["result"][article]["source"]
pub_date = search_response["result"][article]["pubdate"]
volume = search_response["result"][article]["volume"]
issue = search_response["result"][article]["issue"]
pages = search_response["result"][article]["pages"]
doi = search_response["result"][article]["elocationid"]
for i in authors:
all_authors = i["name"]
author_list.append(all_authors)
names = str(author_list).replace("'", "").replace("[","").replace("]","")
corrected_title = title.replace("<i>", "").replace("</i>", "")
self.citations.append(f"{names}.{journal} {pub_date[0:4]};{volume}({issue}):{pages}. {doi}")
return self.citations
def write_webdata(self):
url_list = [f'https://pubmed.ncbi.nlm.nih.gov/{pid}' for pid in self.pubids]
#finally add a basic PDB databank search page
url_list.append(f"https://data.rcsb.org/rest/v1/core/entry/{self.pdbid}")
with open('web_data.txt', "w", encoding="utf-8") as out:
for url in url_list:
r = requests.get(url)
bs = BeautifulSoup(r.text, 'html.parser')
## remove html syntax ---> this improved the results much more
html = bs.prettify()
for i in range(0, len(html)):
out.write(html[i])