Jan Štihec commited on
Commit
3736582
1 Parent(s): 1271b69

Add app files

Browse files
Files changed (4) hide show
  1. .streamlit/config.toml +3 -0
  2. GPTHelper.py +92 -0
  3. app.py +180 -0
  4. requirements.txt +0 -0
.streamlit/config.toml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ [theme]
2
+ base="light"
3
+ primaryColor="#085189"
GPTHelper.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import openai
2
+ from time import time
3
+ import os
4
+ import logging
5
+
6
+ openai.api_key = "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"
7
+
8
+
9
+ def gpt_rephrase(fact):
10
+ # Dynamically generate the prompt to rephrase the fact as a PubMed query using GPT3.5
11
+ prompt = f"Rephrase the following fact as a Pubmed search query.\n\
12
+ FACT: {fact}\n\
13
+ PUBMED QUERY:"
14
+ try:
15
+ response = openai.Completion.create(
16
+ model="text-ada-001",
17
+ prompt=prompt,
18
+ max_tokens=250,
19
+ temperature=0
20
+ )
21
+ response = response['choices'][0]['text'].strip()
22
+ filename = '%s_gpt3.txt' % time()
23
+
24
+ # Create the logs folder if it does not exist
25
+ if not os.path.exists('gpt3_rephrase_logs'):
26
+ os.makedirs('gpt3_rephrase_logs')
27
+
28
+ # Save the whole prompt and the response so that we can inspect it when necessary
29
+ with open('gpt3_rephrase_logs/%s' % filename, 'w', encoding="utf-8") as outfile:
30
+ outfile.write('PROMPT:\n\n' + prompt + '\n\n###############\n\nRESPONSE:\n\n' + response)
31
+
32
+ return response
33
+
34
+ except Exception as e:
35
+ logging.error("Error communicating with OpenAI (rephrase): ", exc_info=e)
36
+
37
+
38
+ def check_fact(evidence, fact):
39
+ # Dynamically generate the prompt to check the fact against the given PubMed article conclusion/abstract
40
+ prompt = f"Based exclusively on the evidence provided, is the following hypothesis True, False or Undetermined?\n\
41
+ EVIDENCE: {evidence}\n \
42
+ HYPOTHESIS: {fact}\n \
43
+ ANSWER:"
44
+ try:
45
+ response = openai.Completion.create(
46
+ model="text-ada-001",
47
+ prompt=prompt,
48
+ max_tokens=2,
49
+ temperature=0
50
+ )
51
+ response = response['choices'][0]['text'].strip()
52
+ response = response.replace('.', '')
53
+ filename = '%s_gpt3.txt' % time()
54
+
55
+ if not os.path.exists('gpt3_factchecking_logs'):
56
+ os.makedirs('gpt3_factchecking_logs')
57
+
58
+ with open('gpt3_factchecking_logs/%s' % filename, 'w', encoding="utf-8") as outfile:
59
+ outfile.write('PROMPT:\n\n' + prompt + '\n\n###############\n\nRESPONSE:\n\n' + response)
60
+
61
+ return response
62
+
63
+ except Exception as e:
64
+ logging.error("Error communicating with OpenAI (check_fact): ", exc_info=e)
65
+
66
+
67
+ def gpt35_rephrase(fact):
68
+ # Dynamically generate the prompt to rephrase the fact as a PubMed query using GPT3.5 turbo - lower cost than 3.5
69
+ prompt = f"Rephrase the following fact as a Pubmed search query.\n\
70
+ FACT: {fact}\n\
71
+ PUBMED QUERY:"
72
+ try:
73
+ response = openai.ChatCompletion.create(
74
+ model="gpt-3.5-turbo",
75
+ messages=[
76
+ {"role": "user",
77
+ "content": prompt}
78
+ ]
79
+ )
80
+ response = response['choices'][0]['message']['content'].strip()
81
+ filename = '%s_gpt3.txt' % time()
82
+
83
+ if not os.path.exists('gpt35_rephrase_logs'):
84
+ os.makedirs('gpt35_rephrase_logs')
85
+
86
+ with open('gpt35_rephrase_logs/%s' % filename, 'w', encoding="utf-8") as outfile:
87
+ outfile.write('PROMPT:\n\n' + prompt + '\n\n###############\n\nRESPONSE:\n\n' + response)
88
+
89
+ return response
90
+
91
+ except Exception as e:
92
+ logging.error("Error communicating with OpenAI (gpt35_rephrase): ", exc_info=e)
app.py ADDED
@@ -0,0 +1,180 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import GPTHelper
3
+ from sentence_transformers import CrossEncoder
4
+ from pymed import PubMed
5
+ import pandas as pd
6
+ import plotly.express as px
7
+ import logging
8
+ from langdetect import detect
9
+
10
+
11
+ if "valid_inputs_received" not in st.session_state:
12
+ st.session_state["valid_inputs_received"] = False
13
+
14
+
15
+ def get_articles(query, fetcher) -> dict[str, list[str], str]:
16
+ # Fetches articles using pymed. Increasing max_results results in longer loading times.
17
+ results = fetcher.query(query, max_results=50)
18
+ conclusions = []
19
+ titles = []
20
+ links = []
21
+ for article in results:
22
+ article_id = 0 # If PubMed search fails to return anything
23
+ try:
24
+ article_id = article.pubmed_id[:8] # Sometimes pymed wrongly returns a long list of ids. Use only the firstpip freeze >
25
+ title = article.title
26
+ conclusion = article.conclusions
27
+ abstract = article.abstract
28
+ article_url = f'https://pubmed.ncbi.nlm.nih.gov/{article_id}/'
29
+ article_link = f'<a href="{article_url}" style="color: black; font-size: 16px; ' \
30
+ f'text-decoration: underline;">PubMed ID: {article_id}</a>' # Injects a link to plotly
31
+ if conclusion:
32
+ # Not all articles come with the provided conclusions. Abstract is used alternatively.
33
+ conclusions.append(title+'\n'+conclusion)
34
+ titles.append(title) # Title is added to the conclusion to improve relevance ranking.
35
+ links.append(article_link)
36
+ elif abstract:
37
+ conclusions.append(title + '\n' + abstract)
38
+ titles.append(title)
39
+ links.append(article_link)
40
+ except Exception as e:
41
+ logging.warning(f"Error reading article: {article_id}: ", exc_info=e)
42
+
43
+ return {
44
+ "Conclusions": conclusions,
45
+ "Links": links
46
+ }
47
+
48
+
49
+ @st.cache_resource
50
+ def load_cross_encoder():
51
+ # The pretrained cross-encoder model used for reranking. Can be substituted with a different one.
52
+ cross_encoder = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')
53
+ return cross_encoder
54
+
55
+
56
+ @st.cache_resource
57
+ def load_pubmed_fetcher():
58
+ pubmed = PubMed(tool="PubmedFactChecker", email="stihec.jan@gmail.com")
59
+ return pubmed
60
+
61
+
62
+ def run_ui():
63
+ # This function controls the whole app flow.
64
+ st.set_page_config(page_title="PUBMED FACT-CHECKER", page_icon="📖")
65
+
66
+ sidebar = st.sidebar
67
+ sidebar.title('ABOUT')
68
+ sidebar.write("""
69
+ The PubMed fact-checker app enables users to verify biomedical claims by comparing them against
70
+ research papers available on PubMed. \n
71
+ As the number of self-proclaimed experts continues to rise,
72
+ so does the risk of harmful misinformation. This app showcases the potential of Large Language Models
73
+ to provide accurate and valuable information to people.
74
+ """)
75
+ sidebar.title('EXAMPLES')
76
+ sidebar.write('Try one of the below examples to see PubMed fact-checker in action.')
77
+
78
+ st.title('PubMed FACT CHECKER')
79
+ with st.form(key="fact_form"):
80
+ fact = st.text_input('Fact:', placeholder='Enter your fact')
81
+ submitted = st.form_submit_button("Fact-Check")
82
+
83
+ if sidebar.button('Mediterranean diet helps with weight loss.', use_container_width=250):
84
+ submitted = True
85
+ fact = 'Mediterranean diet helps with weight loss.'
86
+
87
+ if sidebar.button('Low Carb High Fat diet is healthy in long term.', use_container_width=250):
88
+ submitted = True
89
+ fact = 'Low Carb High Fat diet is healthy in long term.'
90
+
91
+ if sidebar.button('Vaccines are a cause of autism.', use_container_width=250):
92
+ submitted = True
93
+ fact = 'Vaccines are a cause of autism.'
94
+
95
+ sidebar.info('**GitHub: [@jacinthes](https://github.com/jacinthes/slovene-nli-benchmark)**', icon="💻")
96
+
97
+ if not submitted and not st.session_state.valid_inputs_received:
98
+ st.stop()
99
+
100
+ elif submitted and not fact:
101
+ st.warning('Please enter your fact before fact-checking.')
102
+ st.session_state.valid_inputs_received = False
103
+ st.stop()
104
+
105
+ elif submitted and not detect(fact) == 'en':
106
+ st.warning('Please enter valid text in English. For short inputs, language detection is sometimes inaccurate.')
107
+ st.session_state.valid_inputs_received = False
108
+ st.stop()
109
+
110
+ elif submitted and not len(fact) < 75:
111
+ st.warning('To ensure accurate searching, please keep your fact under 75 characters.')
112
+ st.session_state.valid_inputs_received = False
113
+ st.stop()
114
+
115
+ elif submitted or st.session_state.valid_inputs_received:
116
+ pubmed_query = GPTHelper.gpt35_rephrase(fact) # Call gpt3.5 turbo to rephrase fact as a PubMed query.
117
+ pubmed = load_pubmed_fetcher()
118
+
119
+ with st.spinner('Fetching articles...'):
120
+ articles = get_articles(pubmed_query, pubmed)
121
+
122
+ article_conclusions = articles['Conclusions']
123
+ article_links = articles['Links']
124
+ cross_inp = [[fact, conclusions] for conclusions in article_conclusions]
125
+
126
+ with st.spinner('Assessing article relevancy...'):
127
+ cross_encoder = load_cross_encoder()
128
+ cross_scores = cross_encoder.predict(cross_inp) # Calculate relevancy using the defined cross-encoder.
129
+
130
+ df = pd.DataFrame({
131
+ 'Link': article_links,
132
+ 'Conclusion': article_conclusions,
133
+ 'Score': cross_scores
134
+ })
135
+
136
+ df.sort_values(by=['Score'], ascending=False, inplace=True)
137
+ df = df[df['Score'] > 0] # Only keep articles with relevancy score above 0.
138
+ if df.shape[0] == 0: # If no relevant article si found, inform the user.
139
+ st.info(
140
+ "Unfortunately, I couldn't find anything for your search.\n"
141
+ "Don't let that discourage you, I have over 35 million citations in my database.\n"
142
+ "I am sure your next search will be more successful."
143
+ )
144
+ st.stop()
145
+
146
+ df = df.head(10) # Keep only 10 most relevant articles. This is done to control OpenAI costs and load time.
147
+ progress_text = "Assessing the validity of the fact based on relevant research papers."
148
+ fact_checking_bar = st.progress(0, text=progress_text)
149
+ step = 100/df.shape[0]
150
+ percent_complete = 0
151
+ predictions = []
152
+ for index, row in df.iterrows():
153
+ predictions.append(GPTHelper.check_fact(row['Conclusion'], fact)) # Prompt to GPT3.5 to fact-check
154
+ percent_complete += step/100
155
+ fact_checking_bar.progress(percent_complete, text=progress_text)
156
+ fact_checking_bar.empty()
157
+ df['Prediction'] = predictions
158
+
159
+ # Prepare DataFrame for plotly sunburst chart.
160
+ totals = df.groupby('Prediction').size().to_dict()
161
+ df['Total'] = df['Prediction'].map(totals)
162
+
163
+ fig = px.sunburst(df, path=['Prediction', 'Link'], values='Total', height=600, width=600, color='Prediction',
164
+ color_discrete_map={
165
+ 'False': "#FF8384",
166
+ 'True': "#A5D46A",
167
+ 'Undetermined': "#FFDF80"
168
+ }
169
+ )
170
+ fig.update_layout(
171
+ margin=dict(l=20, r=20, t=20, b=20),
172
+ font_size=32,
173
+ font_color='#000000'
174
+ )
175
+ st.write(f'According to PubMed "{fact}" is:')
176
+ st.plotly_chart(fig, use_container_width=True)
177
+
178
+
179
+ if __name__ == "__main__":
180
+ run_ui()
requirements.txt ADDED
Binary file (260 Bytes). View file