File size: 3,521 Bytes
b08cd8d
 
 
 
 
 
 
fce9e44
b08cd8d
 
 
51a391e
 
b08cd8d
 
 
 
 
 
 
 
 
 
4092a7e
b08cd8d
e4df0c6
b08cd8d
 
 
b90f374
 
c971401
e9b8fb8
b08cd8d
 
 
 
 
 
 
655d954
 
b08cd8d
655d954
 
b08cd8d
 
 
 
4018831
fce9e44
b08cd8d
0934632
fce9e44
b08cd8d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
655d954
b1371ca
655d954
 
b08cd8d
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import requests
from bs4 import BeautifulSoup
import time
import json
from lxml import etree 

# Move models to CUDA if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
tokenizer = AutoTokenizer.from_pretrained("microsoft/BioGPT-Large", model_max_length= 1024)
model = AutoModelForCausalLM.from_pretrained("microsoft/BioGPT-Large").to(device)


api_key = '2c78468d6246082d456a140bb1de415ed108'
num_results = 10


def extract_longer_answers_from_paragraphs(paragraphs, query, tokenizer, model):
    context = " ".join(paragraphs)
    question = f"What is the mechanism of {query}?"
    context += question
    inputs = tokenizer(context, return_tensors="pt", add_special_tokens=False).to(device)
    top_p = 0.9  # Adjust as needed
    
    outputs = model.generate(
        **inputs,
        top_p=top_p,
        num_beams=1,  
        do_sample= True,
        no_repeat_ngram_size=2,
        max_new_tokens= 1516,
    )
    answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
    
    return answer


def retrieve_and_answer(query1, query2):
    combined_query = f"({query1}) AND ({query2})"
    answer = fetch_and_generate(query1, combined_query, tokenizer, model) 

    return answer
    
def fetch_and_generate(query, combined_query, tokenizer, model):
    esearch_url = f"https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&api_key={api_key}&term={combined_query}&retmax={num_results}&sort=relevance"
    headers = {'Accept': 'application/json'}
    response = requests.get(esearch_url, headers=headers)
    
    parser = etree.XMLParser(recover=True)

    root = etree.fromstring(response.text.encode('utf-8'), parser=parser)
    

    if response.status_code == 200:
        paragraphs = []

        for article_id in root.find('IdList'):
            article_id = article_id.text
            efetch_url = f"https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&api_key={api_key}&id={article_id}&retmode=xml"
            response = requests.get(efetch_url)

            if response.status_code == 200:
                article_data = response.text
                soup = BeautifulSoup(article_data, 'xml')
                articles = soup.find_all('PubmedArticle')

                for article in articles:
                    title = article.find('ArticleTitle')

                    if title:
                        title_text = title.text
                        
                        if article.find('AbstractText'):
                            paragraphs.append(article.find('AbstractText').text)

            else:
                print("Error:", response.status_code)
                time.sleep(3)

        answer = extract_longer_answers_from_paragraphs(paragraphs, query, tokenizer, model)
        return answer

    else:
        print("Error:", response.status_code)
        return "Error fetching articles.", []


# Gradio Interface
iface = gr.Interface(
    fn=retrieve_and_answer,
    inputs=[gr.Textbox(placeholder="Enter Query 1", label= 'query1'), 
            gr.Textbox(placeholder="Enter Query 2", label= 'query2')],
    outputs=[gr.Textbox(placeholder="Answer from BioGPT"),],
    live=False,
    title="PubMed Question Answering: Microsoft/BioGPT",
    description="Enter two queries to retrieve PubMed articles.",
    examples=[
        ["sertraline", "mechanism"],
        ["cancer", "treatment"]
    ]
)

iface.launch()