jayash391 commited on
Commit
fb0e0f9
1 Parent(s): 50eb941

Upload 3 files

Browse files
Files changed (3) hide show
  1. .env +9 -0
  2. medmind.py +288 -0
  3. requirements.txt +14 -0
.env ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ VECTARA_INDEX_API_KEY = "zwt_ni_bLu6MRQXzWKPIU__Uubvy_0Xz_FEr-2sfUg"
2
+ VECTARA_QUERY_API_KEY = "zwt_ni_bLu6MRQXzWKPIU__Uubvy_0Xz_FEr-2sfUg"
3
+ VECTARA_API_KEY = "zut_ni_bLoa0I3AeNSjxeZ-UfECnm_9Xv5d4RVBAqw"
4
+ VECTARA_CORPUS_ID = "2"
5
+ VECTARA_CUSTOMER_ID = "2653936430"
6
+ TOGETHER_API = "7e6c200b7b36924bc1b4a5973859a20d2efa7180e9b5c977301173a6c099136b"
7
+ GOOGLE_SEARCH_API_KEY = "AIzaSyD-1OMuZ0CxGAek0PaXrzHOmcDWFvZQtm8"
8
+ UNSTRUCTURED_API_KEY = "eBqsGxYYIfTdPRH7PEveZGVIH6ZHny"
9
+ PINECONE_API_KEY = "4523c180-39fd-4c48-99e8-88164df85b0a"
medmind.py ADDED
@@ -0,0 +1,288 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from llama_index.indices.managed.vectara import VectaraIndex
2
+ from dotenv import load_dotenv
3
+ import os
4
+ from docx import Document
5
+ from llama_index.llms.together import TogetherLLM
6
+ from llama_index.core.llms import ChatMessage, MessageRole
7
+ from Bio import Entrez
8
+ import ssl
9
+ from transformers import AutoModelForSequenceClassification, AutoTokenizer
10
+ import streamlit as st
11
+ from googleapiclient.discovery import build
12
+ from typing import List, Optional
13
+
14
+ load_dotenv()
15
+
16
+ os.environ["VECTARA_INDEX_API_KEY"] = os.getenv("VECTARA_INDEX_API_KEY", "zwt_ni_bLu6MRQXzWKPIU__Uubvy_0Xz_FEr-2sfUg")
17
+ os.environ["VECTARA_QUERY_API_KEY"] = os.getenv("VECTARA_QUERY_API_KEY", "zwt_ni_bLu6MRQXzWKPIU__Uubvy_0Xz_FEr-2sfUg")
18
+ os.environ["VECTARA_API_KEY"] = os.getenv("VECTARA_API_KEY", "zut_ni_bLoa0I3AeNSjxeZ-UfECnm_9Xv5d4RVBAqw")
19
+ os.environ["VECTARA_CORPUS_ID"] = os.getenv("VECTARA_CORPUS_ID", "2")
20
+ os.environ["VECTARA_CUSTOMER_ID"] = os.getenv("VECTARA_CUSTOMER_ID", "2653936430")
21
+ os.environ["TOGETHER_API"] = os.getenv("TOGETHER_API", "7e6c200b7b36924bc1b4a5973859a20d2efa7180e9b5c977301173a6c099136b")
22
+ os.environ["GOOGLE_SEARCH_API_KEY"] = os.getenv("GOOGLE_SEARCH_API_KEY", "AIzaSyBnQwS5kPZGKuWj6sH1aBx5F5bZq0Q5jJk")
23
+
24
+ # Initialize the Vectara index
25
+ index = VectaraIndex()
26
+
27
+ endpoint = 'https://api.together.xyz/inference'
28
+
29
+ # Load the hallucination evaluation model
30
+ model_name = "vectara/hallucination_evaluation_model"
31
+ model = AutoModelForSequenceClassification.from_pretrained(model_name)
32
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
33
+
34
+ def search_pubmed(query: str) -> Optional[List[str]]:
35
+ """
36
+ Searches PubMed for a given query and returns a list of formatted results
37
+ (or None if no results are found).
38
+ """
39
+ Entrez.email = "jayashbhardwaj3@gmail.com" # Replace with your email
40
+
41
+ try:
42
+ ssl._create_default_https_context = ssl._create_unverified_context
43
+
44
+ handle = Entrez.esearch(db="pubmed", term=query, retmax=3)
45
+ record = Entrez.read(handle)
46
+ id_list = record["IdList"]
47
+
48
+ if not id_list:
49
+ return None
50
+
51
+ handle = Entrez.efetch(db="pubmed", id=id_list, retmode="xml")
52
+ articles = Entrez.read(handle)
53
+
54
+ results = []
55
+ for article in articles['PubmedArticle']:
56
+ try:
57
+ medline_citation = article['MedlineCitation']
58
+ article_data = medline_citation['Article']
59
+ title = article_data['ArticleTitle']
60
+ abstract = article_data.get('Abstract', {}).get('AbstractText', [""])[0]
61
+
62
+ result = f"**Title:** {title}\n**Abstract:** {abstract}\n"
63
+ result += f"**Link:** https://pubmed.ncbi.nlm.gov/{medline_citation['PMID']}\n\n"
64
+ results.append(result)
65
+ except KeyError as e:
66
+ print(f"Error parsing article: {article}, Error: {e}")
67
+
68
+ return results
69
+
70
+ except Exception as e:
71
+ print(f"Error accessing PubMed: {e}")
72
+ return None
73
+
74
+ def chat_with_pubmed(article_text, article_link):
75
+ """
76
+ Engages in a chat-like interaction with a PubMed article using TogetherLLM.
77
+ """
78
+ try:
79
+ llm = TogetherLLM(model="QWEN/QWEN1.5-14B-CHAT", api_key=os.environ['TOGETHER_API'])
80
+ messages = [
81
+ ChatMessage(role=MessageRole.SYSTEM, content="You are a helpful AI assistant summarizing and answering questions about the following medical research article: " + article_link),
82
+ ChatMessage(role=MessageRole.USER, content=article_text)
83
+ ]
84
+ response = llm.chat(messages)
85
+ return str(response) if response else "I'm sorry, I couldn't generate a summary for this article."
86
+ except Exception as e:
87
+ print(f"Error in chat_with_pubmed: {e}")
88
+ return "An error occurred while generating a summary."
89
+
90
+ def search_web(query: str, num_results: int = 3) -> Optional[List[str]]:
91
+ """
92
+ Searches the web using the Google Search API and returns a list of formatted results
93
+ (or None if no results are found).
94
+ """
95
+ try:
96
+ service = build("customsearch", "v1", developerKey=os.environ["GOOGLE_SEARCH_API_KEY"])
97
+
98
+ # Execute the search request
99
+ res = service.cse().list(q=query, cx="877170db56f5c4629", num=num_results).execute()
100
+
101
+ if "items" not in res:
102
+ return None
103
+
104
+ results = []
105
+ for item in res["items"]:
106
+ title = item["title"]
107
+ link = item["link"]
108
+ snippet = item["snippet"]
109
+ result = f"**Title:** {title}\n**Link:** {link}\n**Snippet:** {snippet}\n\n"
110
+ results.append(result)
111
+
112
+ return results
113
+
114
+ except Exception as e:
115
+ print(f"Error performing web search: {e}")
116
+ return None
117
+
118
+ def medmind_chatbot(user_input, chat_history=None):
119
+ """
120
+ Processes user input, interacts with various resources, and generates a response.
121
+ Handles potential errors, maintains chat history, and evaluates hallucination risk.
122
+ """
123
+
124
+ if chat_history is None:
125
+ chat_history = []
126
+
127
+ response_parts = [] # Collect responses from different sources
128
+
129
+ try:
130
+ # Vectara Search
131
+ try:
132
+ query_str = user_input
133
+ response = index.as_query_engine().query(query_str)
134
+ response_parts.append(f"**MedMind Vectara Knowledge Base Response:**\n{response.response}")
135
+ except Exception as e:
136
+ print(f"Error in Vectara search: {e}")
137
+ response_parts.append("Vectara knowledge base is currently unavailable.")
138
+
139
+ # PubMed Search and Chat
140
+ pubmed_results = search_pubmed(user_input)
141
+ if pubmed_results:
142
+ response_parts.append("**PubMed Articles (Chat & Summarize):**")
143
+ for article_text in pubmed_results:
144
+ title, abstract, link = article_text.split("\n")[:3]
145
+ chat_summary = chat_with_pubmed(abstract, link)
146
+ response_parts.append(f"{title}\n{chat_summary}\n{link}\n")
147
+ else:
148
+ response_parts.append("No relevant PubMed articles found.")
149
+
150
+ # Web Search
151
+ web_results = search_web(user_input)
152
+ if web_results:
153
+ response_parts.append("**Web Search Results:**")
154
+ response_parts.extend(web_results)
155
+ else:
156
+ response_parts.append("No relevant web search results found.")
157
+
158
+ # Combine response parts into a single string
159
+ response_text = "\n\n".join(response_parts)
160
+
161
+ # Hallucination Evaluation
162
+ def vectara_hallucination_evaluation_model(text):
163
+ inputs = tokenizer(text, return_tensors="pt")
164
+ outputs = model(**inputs)
165
+ hallucination_probability = outputs.logits[0][0].item()
166
+ return hallucination_probability
167
+
168
+ hallucination_score = vectara_hallucination_evaluation_model(response_text)
169
+ HIGH_HALLUCINATION_THRESHOLD = 0.9
170
+ if hallucination_score > HIGH_HALLUCINATION_THRESHOLD:
171
+ response_text = "I'm still under development and learning. I cannot confidently answer this question yet."
172
+
173
+ except Exception as e:
174
+ print(f"Error in chatbot: {e}")
175
+ response_text = "An error occurred. Please try again later."
176
+
177
+ chat_history.append((user_input, response_text))
178
+ return response_text, chat_history
179
+
180
+ def show_info_popup():
181
+ with st.expander("How to use MedMind"):
182
+ st.write("""
183
+ **MedMind is an AI-powered chatbot designed to assist with medical information.**
184
+
185
+ **Capabilities:**
186
+
187
+ * **Answers general medical questions:** MedMind utilizes a curated medical knowledge base to provide answers to a wide range of health-related inquiries.
188
+ * **Summarizes relevant research articles from PubMed:** The chatbot can retrieve and summarize research articles from the PubMed database, making complex scientific information more accessible.
189
+ * **Provides insights from a curated medical knowledge base:** Beyond simple answers, MedMind offers additional insights and context from its knowledge base to enhance understanding.
190
+ * **Perform safe web searches related to your query:** The chatbot can perform web searches using the Google Search API, ensuring the safety and relevance of the results.
191
+
192
+ **Limitations:**
193
+
194
+ * **Not a substitute for professional medical advice:** MedMind is not intended to replace professional medical diagnosis and treatment. Always consult a qualified healthcare provider for personalized medical advice.
195
+ * **General knowledge and educational purposes:** The information provided by MedMind is for general knowledge and educational purposes only and may not be exhaustive or specific to individual situations.
196
+ * **Under development:** MedMind is still under development and may occasionally provide inaccurate or incomplete information. It's important to critically evaluate responses and cross-reference with reliable sources.
197
+ * **Hallucination potential:** While MedMind employs a hallucination evaluation model to minimize the risk of generating fabricated information, there remains a possibility of encountering inaccurate responses, especially for complex or niche queries.
198
+
199
+ **How to use:**
200
+
201
+ 1. **Type your medical question in the text box.**
202
+ 2. **MedMind will provide a comprehensive response combining information from various sources.** This may include insights from its knowledge base, summaries of relevant research articles, and safe web search results.
203
+ 3. **You can continue the conversation by asking follow-up questions or providing additional context.** This helps MedMind refine its search and offer more tailored information.
204
+ 4. **in case the Medmind doesn't show the output please check your internet connection or rerun the same command**
205
+ 5. **user can either chat with the documents or with generate resposne from vectara + pubmed + web search**
206
+ 5. **chat with document feature is still under development so it would be better to avoid using it for now**
207
+ """)
208
+
209
+ # Initialize session state
210
+ if 'chat_history' not in st.session_state:
211
+ st.session_state.chat_history = []
212
+
213
+ # Define function to display chat history with highlighted user input and chatbot response
214
+ def display_chat_history():
215
+ for user_msg, bot_msg in st.session_state.chat_history:
216
+ st.info(f"**You:** {user_msg}")
217
+ st.success(f"**MedMind:** {bot_msg}")
218
+
219
+ # Define function to clear chat history
220
+ def clear_chat():
221
+ st.session_state.chat_history = []
222
+
223
+ def main():
224
+ # Streamlit Page Configuration
225
+ st.set_page_config(page_title="MedMind Chatbot", layout="wide")
226
+
227
+ # Custom Styles
228
+ st.markdown(
229
+ """
230
+ <style>
231
+ .css-18e3th9 {
232
+ padding-top: 2rem;
233
+ padding-right: 1rem;
234
+ padding-bottom: 2rem;
235
+ padding-left: 1rem;
236
+ }
237
+ .stButton>button {
238
+ background-color: #4CAF50;
239
+ color: white;
240
+ }
241
+ body {
242
+ background-color: #F0FDF4;
243
+ color: #333333;
244
+ }
245
+ .stMarkdown h1, .stMarkdown h2, .stMarkdown h3, .stMarkdown h4, .stMarkdown h5, .stMarkdown h6 {
246
+ color: #388E3C;
247
+ }
248
+ </style>
249
+ """,
250
+ unsafe_allow_html=True,
251
+ )
252
+
253
+ # Title and Introduction
254
+ st.title("MedMind Chatbot")
255
+ st.write("Ask your medical questions and get reliable information!")
256
+
257
+ # Example Questions (Sidebar)
258
+ example_questions = [
259
+ "What are the symptoms of COVID-19?",
260
+ "How can I manage my diabetes?",
261
+ "What are the potential side effects of ibuprofen?",
262
+ "What lifestyle changes can help prevent heart disease?"
263
+ ]
264
+ st.sidebar.header("Example Questions")
265
+ for question in example_questions:
266
+ st.sidebar.write(question)
267
+
268
+ # Output Container
269
+ output_container = st.container()
270
+
271
+ # User Input and Chat History
272
+ input_container = st.container()
273
+ with input_container:
274
+ user_input = st.text_input("You: ", key="input_placeholder", placeholder="Type your medical question here...")
275
+ new_chat_button = st.button("Start New Chat")
276
+ if new_chat_button:
277
+ st.session_state.chat_history = [] # Clear chat history
278
+
279
+ if user_input:
280
+ response, st.session_state.chat_history = medmind_chatbot(user_input, st.session_state.chat_history)
281
+ with output_container:
282
+ display_chat_history()
283
+
284
+ # Information Popup
285
+ show_info_popup()
286
+
287
+ if __name__ == "__main__":
288
+ main()
requirements.txt ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ llama-index
2
+ python-dotenv
3
+ PyPDF2
4
+ python-docx
5
+ sentence-transformers
6
+ biopython
7
+ langchain
8
+ transformers
9
+ streamlit
10
+ google-api-python-client
11
+ langchain-community
12
+ llama-index-embeddings-huggingface
13
+ llama-index-llms-together
14
+ llama-index-indices-managed-vectara