OpenWormLLM / publications.py
pgleeson's picture
Mainly formatting updates; to v0.2.5
de348fb
# Based on: https://github.com/allenai/s2-folks/tree/main/examples/python
#
#
import os
import traceback
import requests
S2_API_KEY = os.getenv("S2_API_KEY")
result_limit = 10
def find_basis_paper(query, result_limit=10):
papers = None
info = """Search results:
"""
while not papers:
if not query:
continue
try:
if len(S2_API_KEY):
print("Using S2_API_KEY...")
rsp = requests.get(
"https://api.semanticscholar.org/graph/v1/paper/search",
headers={"X-API-KEY": S2_API_KEY},
params={
"query": query,
"limit": 100,
"fields": "title,url,authors,year,tldr,journal,citationCount",
"sort": "citationCount:desc",
},
)
rsp.raise_for_status()
results = rsp.json()
total = results["total"]
if not total:
return "No matches found. Please try another query."
print(f"Found {total} results. Showing up to {result_limit}.")
papers = results["data"]
info += format_paper_list(papers, int(result_limit))
except Exception as e:
print(traceback.format_exc())
info += "There was a problem...\n\n%s" % e
print(info)
return info
def get_element(paper, part):
simple_parts = ["year", "url", "title", "citationCount"]
if part in simple_parts:
if part in paper and paper[part] is not None:
v = paper[part]
return v.strip() if isinstance(v, str) else v
else:
return "???"
elif part == "journal":
if part in paper and paper[part] is not None:
return paper[part]["name"].strip()
else:
return "???"
elif part == "authors":
if part in paper and paper[part] is not None:
return paper[part][0]["name"].strip()
else:
return "???"
elif part == "tldr":
if part in paper and paper[part] is not None:
return paper[part]["text"].strip()
else:
return "???"
else:
return "<unknown field %s>" % part
def format_paper_list(all_papers, result_limit):
list = ""
sorted_papers = sorted(all_papers, key=lambda d: d["citationCount"], reverse=True)[
:result_limit
]
print(sorted_papers)
for idx, paper in enumerate(sorted_papers):
list += f"{idx}: **{get_element(paper, 'authors')} et al. {get_element(paper, 'year')}**, [{get_element(paper, 'journal')}]({get_element(paper, 'url')}) {get_element(paper, 'title')}. Cited by {get_element(paper, 'citationCount')}\n\n"
if paper["tldr"] and paper["tldr"]["text"]:
list += f"> _{paper['tldr']['text'].strip()}_ \n\n"
return list
if __name__ == "__main__":
find_basis_paper("NeuroML")