Spaces:

GeemiW
/

pdb_answers

Runtime error

pdb_answers / utils.py

GeemiW

citation error in utils try-1

65a6215 over 2 years ago

2.98 kB

	import os
	from paperqa import Docs
	import requests
	import paperqa
	import json
	from bs4 import BeautifulSoup
	import urllib.request

	class PDBQuery:

	def __init__(self, pdbid):
	self.pdbid = pdbid
	self.pubids = []
	self.refs = []
	self.citations = []
	self.pubids = []

	def get_pdb_descriptions(self,pdbid):

	pdbrest = f"https://data.rcsb.org/rest/v1/core/entry/{pdbid}"
	r = requests.get(pdbrest)
	self.data = r.json()
	return self.data

	def get_pubids(self):
	#extract pubmed IDs of the articles

	for paper in self.result['citation']:
	for k,v in paper.items():
	if k=="pdbx_database_id_pub_med":
	self.pubids.append(v)

	return self.pubids

	def create_citation(self):
	##Create citation using pubmed ids
	self.result = self.get_pdb_descriptions(self.pdbid)

	self.pubids = self.get_pubids()

	for pid in self.pubids:

	puburl = f'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=pubmed&id={pid}&retmode=json'
	response = requests.get(puburl)
	search_response = requests.get(puburl).json()

	article = str(pid)
	author_list = []

	pubmed_id = search_response["result"][article]["uid"]
	title = search_response["result"][article]["title"]
	authors = search_response["result"][article]["authors"]
	journal = search_response["result"][article]["source"]
	pub_date = search_response["result"][article]["pubdate"]
	volume = search_response["result"][article]["volume"]
	issue = search_response["result"][article]["issue"]
	pages = search_response["result"][article]["pages"]
	doi = search_response["result"][article]["elocationid"]



	for i in authors:
	all_authors = i["name"]
	author_list.append(all_authors)

	names = str(author_list).replace("'", "").replace("[","").replace("]","")
	corrected_title = title.replace("<i>", "").replace("</i>", "")

	self.citations.append(f"{names}.{journal} {pub_date[0:4]};{volume}({issue}):{pages}. {doi}")

	return self.citations


	def write_webdata(self):

	url_list = [f'https://pubmed.ncbi.nlm.nih.gov/{pid}' for pid in self.pubids]
	#finally add a basic PDB databank search page
	url_list.append(f"https://data.rcsb.org/rest/v1/core/entry/{self.pdbid}")

	with open('web_data.txt', "w", encoding="utf-8") as out:
	for url in url_list:
	r = requests.get(url)
	bs = BeautifulSoup(r.text, 'html.parser')
	## remove html syntax ---> this improved the results much more
	html = bs.prettify()

	for i in range(0, len(html)):
	out.write(html[i])