Spaces:
Runtime error
Runtime error
Jan Štihec
commited on
Commit
•
3736582
1
Parent(s):
1271b69
Add app files
Browse files- .streamlit/config.toml +3 -0
- GPTHelper.py +92 -0
- app.py +180 -0
- requirements.txt +0 -0
.streamlit/config.toml
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
[theme]
|
2 |
+
base="light"
|
3 |
+
primaryColor="#085189"
|
GPTHelper.py
ADDED
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import openai
|
2 |
+
from time import time
|
3 |
+
import os
|
4 |
+
import logging
|
5 |
+
|
6 |
+
openai.api_key = "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"
|
7 |
+
|
8 |
+
|
9 |
+
def gpt_rephrase(fact):
|
10 |
+
# Dynamically generate the prompt to rephrase the fact as a PubMed query using GPT3.5
|
11 |
+
prompt = f"Rephrase the following fact as a Pubmed search query.\n\
|
12 |
+
FACT: {fact}\n\
|
13 |
+
PUBMED QUERY:"
|
14 |
+
try:
|
15 |
+
response = openai.Completion.create(
|
16 |
+
model="text-ada-001",
|
17 |
+
prompt=prompt,
|
18 |
+
max_tokens=250,
|
19 |
+
temperature=0
|
20 |
+
)
|
21 |
+
response = response['choices'][0]['text'].strip()
|
22 |
+
filename = '%s_gpt3.txt' % time()
|
23 |
+
|
24 |
+
# Create the logs folder if it does not exist
|
25 |
+
if not os.path.exists('gpt3_rephrase_logs'):
|
26 |
+
os.makedirs('gpt3_rephrase_logs')
|
27 |
+
|
28 |
+
# Save the whole prompt and the response so that we can inspect it when necessary
|
29 |
+
with open('gpt3_rephrase_logs/%s' % filename, 'w', encoding="utf-8") as outfile:
|
30 |
+
outfile.write('PROMPT:\n\n' + prompt + '\n\n###############\n\nRESPONSE:\n\n' + response)
|
31 |
+
|
32 |
+
return response
|
33 |
+
|
34 |
+
except Exception as e:
|
35 |
+
logging.error("Error communicating with OpenAI (rephrase): ", exc_info=e)
|
36 |
+
|
37 |
+
|
38 |
+
def check_fact(evidence, fact):
|
39 |
+
# Dynamically generate the prompt to check the fact against the given PubMed article conclusion/abstract
|
40 |
+
prompt = f"Based exclusively on the evidence provided, is the following hypothesis True, False or Undetermined?\n\
|
41 |
+
EVIDENCE: {evidence}\n \
|
42 |
+
HYPOTHESIS: {fact}\n \
|
43 |
+
ANSWER:"
|
44 |
+
try:
|
45 |
+
response = openai.Completion.create(
|
46 |
+
model="text-ada-001",
|
47 |
+
prompt=prompt,
|
48 |
+
max_tokens=2,
|
49 |
+
temperature=0
|
50 |
+
)
|
51 |
+
response = response['choices'][0]['text'].strip()
|
52 |
+
response = response.replace('.', '')
|
53 |
+
filename = '%s_gpt3.txt' % time()
|
54 |
+
|
55 |
+
if not os.path.exists('gpt3_factchecking_logs'):
|
56 |
+
os.makedirs('gpt3_factchecking_logs')
|
57 |
+
|
58 |
+
with open('gpt3_factchecking_logs/%s' % filename, 'w', encoding="utf-8") as outfile:
|
59 |
+
outfile.write('PROMPT:\n\n' + prompt + '\n\n###############\n\nRESPONSE:\n\n' + response)
|
60 |
+
|
61 |
+
return response
|
62 |
+
|
63 |
+
except Exception as e:
|
64 |
+
logging.error("Error communicating with OpenAI (check_fact): ", exc_info=e)
|
65 |
+
|
66 |
+
|
67 |
+
def gpt35_rephrase(fact):
|
68 |
+
# Dynamically generate the prompt to rephrase the fact as a PubMed query using GPT3.5 turbo - lower cost than 3.5
|
69 |
+
prompt = f"Rephrase the following fact as a Pubmed search query.\n\
|
70 |
+
FACT: {fact}\n\
|
71 |
+
PUBMED QUERY:"
|
72 |
+
try:
|
73 |
+
response = openai.ChatCompletion.create(
|
74 |
+
model="gpt-3.5-turbo",
|
75 |
+
messages=[
|
76 |
+
{"role": "user",
|
77 |
+
"content": prompt}
|
78 |
+
]
|
79 |
+
)
|
80 |
+
response = response['choices'][0]['message']['content'].strip()
|
81 |
+
filename = '%s_gpt3.txt' % time()
|
82 |
+
|
83 |
+
if not os.path.exists('gpt35_rephrase_logs'):
|
84 |
+
os.makedirs('gpt35_rephrase_logs')
|
85 |
+
|
86 |
+
with open('gpt35_rephrase_logs/%s' % filename, 'w', encoding="utf-8") as outfile:
|
87 |
+
outfile.write('PROMPT:\n\n' + prompt + '\n\n###############\n\nRESPONSE:\n\n' + response)
|
88 |
+
|
89 |
+
return response
|
90 |
+
|
91 |
+
except Exception as e:
|
92 |
+
logging.error("Error communicating with OpenAI (gpt35_rephrase): ", exc_info=e)
|
app.py
ADDED
@@ -0,0 +1,180 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import GPTHelper
|
3 |
+
from sentence_transformers import CrossEncoder
|
4 |
+
from pymed import PubMed
|
5 |
+
import pandas as pd
|
6 |
+
import plotly.express as px
|
7 |
+
import logging
|
8 |
+
from langdetect import detect
|
9 |
+
|
10 |
+
|
11 |
+
if "valid_inputs_received" not in st.session_state:
|
12 |
+
st.session_state["valid_inputs_received"] = False
|
13 |
+
|
14 |
+
|
15 |
+
def get_articles(query, fetcher) -> dict[str, list[str], str]:
|
16 |
+
# Fetches articles using pymed. Increasing max_results results in longer loading times.
|
17 |
+
results = fetcher.query(query, max_results=50)
|
18 |
+
conclusions = []
|
19 |
+
titles = []
|
20 |
+
links = []
|
21 |
+
for article in results:
|
22 |
+
article_id = 0 # If PubMed search fails to return anything
|
23 |
+
try:
|
24 |
+
article_id = article.pubmed_id[:8] # Sometimes pymed wrongly returns a long list of ids. Use only the firstpip freeze >
|
25 |
+
title = article.title
|
26 |
+
conclusion = article.conclusions
|
27 |
+
abstract = article.abstract
|
28 |
+
article_url = f'https://pubmed.ncbi.nlm.nih.gov/{article_id}/'
|
29 |
+
article_link = f'<a href="{article_url}" style="color: black; font-size: 16px; ' \
|
30 |
+
f'text-decoration: underline;">PubMed ID: {article_id}</a>' # Injects a link to plotly
|
31 |
+
if conclusion:
|
32 |
+
# Not all articles come with the provided conclusions. Abstract is used alternatively.
|
33 |
+
conclusions.append(title+'\n'+conclusion)
|
34 |
+
titles.append(title) # Title is added to the conclusion to improve relevance ranking.
|
35 |
+
links.append(article_link)
|
36 |
+
elif abstract:
|
37 |
+
conclusions.append(title + '\n' + abstract)
|
38 |
+
titles.append(title)
|
39 |
+
links.append(article_link)
|
40 |
+
except Exception as e:
|
41 |
+
logging.warning(f"Error reading article: {article_id}: ", exc_info=e)
|
42 |
+
|
43 |
+
return {
|
44 |
+
"Conclusions": conclusions,
|
45 |
+
"Links": links
|
46 |
+
}
|
47 |
+
|
48 |
+
|
49 |
+
@st.cache_resource
|
50 |
+
def load_cross_encoder():
|
51 |
+
# The pretrained cross-encoder model used for reranking. Can be substituted with a different one.
|
52 |
+
cross_encoder = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')
|
53 |
+
return cross_encoder
|
54 |
+
|
55 |
+
|
56 |
+
@st.cache_resource
|
57 |
+
def load_pubmed_fetcher():
|
58 |
+
pubmed = PubMed(tool="PubmedFactChecker", email="stihec.jan@gmail.com")
|
59 |
+
return pubmed
|
60 |
+
|
61 |
+
|
62 |
+
def run_ui():
|
63 |
+
# This function controls the whole app flow.
|
64 |
+
st.set_page_config(page_title="PUBMED FACT-CHECKER", page_icon="📖")
|
65 |
+
|
66 |
+
sidebar = st.sidebar
|
67 |
+
sidebar.title('ABOUT')
|
68 |
+
sidebar.write("""
|
69 |
+
The PubMed fact-checker app enables users to verify biomedical claims by comparing them against
|
70 |
+
research papers available on PubMed. \n
|
71 |
+
As the number of self-proclaimed experts continues to rise,
|
72 |
+
so does the risk of harmful misinformation. This app showcases the potential of Large Language Models
|
73 |
+
to provide accurate and valuable information to people.
|
74 |
+
""")
|
75 |
+
sidebar.title('EXAMPLES')
|
76 |
+
sidebar.write('Try one of the below examples to see PubMed fact-checker in action.')
|
77 |
+
|
78 |
+
st.title('PubMed FACT CHECKER')
|
79 |
+
with st.form(key="fact_form"):
|
80 |
+
fact = st.text_input('Fact:', placeholder='Enter your fact')
|
81 |
+
submitted = st.form_submit_button("Fact-Check")
|
82 |
+
|
83 |
+
if sidebar.button('Mediterranean diet helps with weight loss.', use_container_width=250):
|
84 |
+
submitted = True
|
85 |
+
fact = 'Mediterranean diet helps with weight loss.'
|
86 |
+
|
87 |
+
if sidebar.button('Low Carb High Fat diet is healthy in long term.', use_container_width=250):
|
88 |
+
submitted = True
|
89 |
+
fact = 'Low Carb High Fat diet is healthy in long term.'
|
90 |
+
|
91 |
+
if sidebar.button('Vaccines are a cause of autism.', use_container_width=250):
|
92 |
+
submitted = True
|
93 |
+
fact = 'Vaccines are a cause of autism.'
|
94 |
+
|
95 |
+
sidebar.info('**GitHub: [@jacinthes](https://github.com/jacinthes/slovene-nli-benchmark)**', icon="💻")
|
96 |
+
|
97 |
+
if not submitted and not st.session_state.valid_inputs_received:
|
98 |
+
st.stop()
|
99 |
+
|
100 |
+
elif submitted and not fact:
|
101 |
+
st.warning('Please enter your fact before fact-checking.')
|
102 |
+
st.session_state.valid_inputs_received = False
|
103 |
+
st.stop()
|
104 |
+
|
105 |
+
elif submitted and not detect(fact) == 'en':
|
106 |
+
st.warning('Please enter valid text in English. For short inputs, language detection is sometimes inaccurate.')
|
107 |
+
st.session_state.valid_inputs_received = False
|
108 |
+
st.stop()
|
109 |
+
|
110 |
+
elif submitted and not len(fact) < 75:
|
111 |
+
st.warning('To ensure accurate searching, please keep your fact under 75 characters.')
|
112 |
+
st.session_state.valid_inputs_received = False
|
113 |
+
st.stop()
|
114 |
+
|
115 |
+
elif submitted or st.session_state.valid_inputs_received:
|
116 |
+
pubmed_query = GPTHelper.gpt35_rephrase(fact) # Call gpt3.5 turbo to rephrase fact as a PubMed query.
|
117 |
+
pubmed = load_pubmed_fetcher()
|
118 |
+
|
119 |
+
with st.spinner('Fetching articles...'):
|
120 |
+
articles = get_articles(pubmed_query, pubmed)
|
121 |
+
|
122 |
+
article_conclusions = articles['Conclusions']
|
123 |
+
article_links = articles['Links']
|
124 |
+
cross_inp = [[fact, conclusions] for conclusions in article_conclusions]
|
125 |
+
|
126 |
+
with st.spinner('Assessing article relevancy...'):
|
127 |
+
cross_encoder = load_cross_encoder()
|
128 |
+
cross_scores = cross_encoder.predict(cross_inp) # Calculate relevancy using the defined cross-encoder.
|
129 |
+
|
130 |
+
df = pd.DataFrame({
|
131 |
+
'Link': article_links,
|
132 |
+
'Conclusion': article_conclusions,
|
133 |
+
'Score': cross_scores
|
134 |
+
})
|
135 |
+
|
136 |
+
df.sort_values(by=['Score'], ascending=False, inplace=True)
|
137 |
+
df = df[df['Score'] > 0] # Only keep articles with relevancy score above 0.
|
138 |
+
if df.shape[0] == 0: # If no relevant article si found, inform the user.
|
139 |
+
st.info(
|
140 |
+
"Unfortunately, I couldn't find anything for your search.\n"
|
141 |
+
"Don't let that discourage you, I have over 35 million citations in my database.\n"
|
142 |
+
"I am sure your next search will be more successful."
|
143 |
+
)
|
144 |
+
st.stop()
|
145 |
+
|
146 |
+
df = df.head(10) # Keep only 10 most relevant articles. This is done to control OpenAI costs and load time.
|
147 |
+
progress_text = "Assessing the validity of the fact based on relevant research papers."
|
148 |
+
fact_checking_bar = st.progress(0, text=progress_text)
|
149 |
+
step = 100/df.shape[0]
|
150 |
+
percent_complete = 0
|
151 |
+
predictions = []
|
152 |
+
for index, row in df.iterrows():
|
153 |
+
predictions.append(GPTHelper.check_fact(row['Conclusion'], fact)) # Prompt to GPT3.5 to fact-check
|
154 |
+
percent_complete += step/100
|
155 |
+
fact_checking_bar.progress(percent_complete, text=progress_text)
|
156 |
+
fact_checking_bar.empty()
|
157 |
+
df['Prediction'] = predictions
|
158 |
+
|
159 |
+
# Prepare DataFrame for plotly sunburst chart.
|
160 |
+
totals = df.groupby('Prediction').size().to_dict()
|
161 |
+
df['Total'] = df['Prediction'].map(totals)
|
162 |
+
|
163 |
+
fig = px.sunburst(df, path=['Prediction', 'Link'], values='Total', height=600, width=600, color='Prediction',
|
164 |
+
color_discrete_map={
|
165 |
+
'False': "#FF8384",
|
166 |
+
'True': "#A5D46A",
|
167 |
+
'Undetermined': "#FFDF80"
|
168 |
+
}
|
169 |
+
)
|
170 |
+
fig.update_layout(
|
171 |
+
margin=dict(l=20, r=20, t=20, b=20),
|
172 |
+
font_size=32,
|
173 |
+
font_color='#000000'
|
174 |
+
)
|
175 |
+
st.write(f'According to PubMed "{fact}" is:')
|
176 |
+
st.plotly_chart(fig, use_container_width=True)
|
177 |
+
|
178 |
+
|
179 |
+
if __name__ == "__main__":
|
180 |
+
run_ui()
|
requirements.txt
ADDED
Binary file (260 Bytes). View file
|
|