UraniaLi commited on
Commit
131f93a
1 Parent(s): ccdf22b

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +102 -0
app.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM
3
+ import torch
4
+ import requests
5
+ from bs4 import BeautifulSoup
6
+ import time
7
+ import json
8
+ import xml.etree.ElementTree as ET
9
+
10
+ # Move models to CUDA if available
11
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
12
+
13
+ tokenizer = AutoTokenizer.from_pretrained("stanford-crfm/BioMedLM")
14
+ model = AutoModelForCausalLM.from_pretrained("stanford-crfm/BioMedLM").to(device)
15
+
16
+ api_key = '2c78468d6246082d456a140bb1de415ed108'
17
+ num_results = 10
18
+
19
+
20
+ def extract_longer_answers_from_paragraphs(paragraphs, query, tokenizer, model):
21
+ context = " ".join(paragraphs)
22
+ question = f"What is the mechanism of {query}?"
23
+ context += question
24
+ inputs = tokenizer(context, return_tensors="pt", add_special_tokens=False, output_attentions=False).to(device)
25
+ top_p = 0.9 # Adjust as needed
26
+ max_len = 50 # Adjust as needed
27
+ outputs = model.generate(
28
+ **inputs,
29
+ top_p=top_p,
30
+ max_length=max_len,
31
+ num_beams=1, # Adjust as needed
32
+ no_repeat_ngram_size=2 # Adjust as needed
33
+ )
34
+ answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
35
+
36
+ return answer
37
+
38
+
39
+ def retrieve_and_answer(query1, query2):
40
+ combined_query1 = f"({query1}) AND ({query2})"
41
+ answer = fetch_and_generate(query1, combined_query, tokenizer, model)
42
+
43
+
44
+ return answer1, answer2
45
+
46
+ def fetch_and_generate(query, combined_query, tokenizer, model):
47
+ esearch_url = f"https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&api_key={api_key}&term={combined_query}&retmax={num_results}&sort=relevance"
48
+ headers = {'Accept': 'application/json'}
49
+ response = requests.get(esearch_url, headers=headers)
50
+
51
+ root = ET.fromstring(response.text)
52
+
53
+ if response.status_code == 200:
54
+ paragraphs = []
55
+
56
+ for article_id in root.find('IdList'):
57
+ article_id = article_id.text
58
+ efetch_url = f"https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&api_key={api_key}&id={article_id}&retmode=xml"
59
+ response = requests.get(efetch_url)
60
+
61
+ if response.status_code == 200:
62
+ article_data = response.text
63
+ soup = BeautifulSoup(article_data, 'xml')
64
+ articles = soup.find_all('PubmedArticle')
65
+
66
+ for article in articles:
67
+ title = article.find('ArticleTitle')
68
+
69
+ if title:
70
+ title_text = title.text
71
+
72
+ if article.find('AbstractText'):
73
+ paragraphs.append(article.find('AbstractText').text)
74
+
75
+ else:
76
+ print("Error:", response.status_code)
77
+ time.sleep(3)
78
+
79
+ answer = extract_longer_answers_from_paragraphs(paragraphs, query, tokenizer, model)
80
+ return answer
81
+
82
+ else:
83
+ print("Error:", response.status_code)
84
+ return "Error fetching articles.", []
85
+
86
+
87
+ # Gradio Interface
88
+ iface = gr.Interface(
89
+ fn=retrieve_and_answer,
90
+ inputs=[gr.Textbox(placeholder="Enter Query 1", label= 'query1'),
91
+ gr.Textbox(placeholder="Enter Query 2", label= 'query2')],
92
+ outputs=[ gr.Textbox(placeholder="Answer from BioMedLM"), ],
93
+ live=True,
94
+ title="PubMed Question Answering: Stanford/BioMedLM",
95
+ description="Enter two queries to retrieve PubMed articles and compare answers from different models.",
96
+ examples=[
97
+ ["sertraline", "mechanism"],
98
+ ["cancer", "treatment"]
99
+ ]
100
+ )
101
+
102
+ iface.launch()