yash001010 commited on
Commit
88e7d80
1 Parent(s): 93d2c31
Files changed (1) hide show
  1. app.py +105 -112
app.py CHANGED
@@ -4,6 +4,7 @@ from langchain_community.vectorstores import Chroma
4
  from langchain_community.embeddings import HuggingFaceBgeEmbeddings
5
  from groq import Groq
6
  from dotenv import load_dotenv
 
7
 
8
  # Initialize Streamlit page configuration
9
  st.set_page_config(page_title="Medical Knowledge Assistant", layout="wide")
@@ -29,125 +30,117 @@ if not api_key:
29
  # Initialize the app
30
  st.title("Medical Knowledge Assistant")
31
 
32
- try:
33
- # Set up the embeddings
34
- model_name = "BAAI/bge-large-en"
35
- model_kwargs = {'device': 'cpu'}
36
- encode_kwargs = {'normalize_embeddings': False}
37
- embeddings = HuggingFaceBgeEmbeddings(
38
- model_name=model_name,
39
- model_kwargs=model_kwargs,
40
- encode_kwargs=encode_kwargs
41
- )
42
 
43
- # Load the vector store from the local drive
44
- script_dir = os.path.dirname(os.path.abspath(__file__))
45
- persist_directory = os.path.join(script_dir, 'Embedded_Med_books')
 
 
 
 
 
 
 
 
 
46
 
47
-
48
- # Debug information
49
- st.sidebar.header("Debug Information")
50
- st.sidebar.write("Vector store path:", persist_directory)
51
-
52
- with st.sidebar:
53
- st.write("API Key Loaded:", "Yes" if api_key else "No")
54
 
55
- # Check vector store directory
56
- if not os.path.exists(persist_directory):
57
- st.error(f"Vector store directory not found at: {persist_directory}")
58
- if st.button("Create Directory"):
59
- os.makedirs(persist_directory)
60
- st.success("Directory created!")
61
-
62
- try:
63
- vector_store = Chroma(
64
- persist_directory=persist_directory,
65
- embedding_function=embeddings
66
- )
67
- except Exception as e:
68
- st.error(f"Error loading vector store: {e}")
69
 
70
- # vector_store = Chroma(
71
- # persist_directory=persist_directory,
72
- # embedding_function=embeddings
73
- # )
74
-
75
- retriever = vector_store.as_retriever(search_kwargs={'k': 1})
76
-
77
- # Initialize Groq client
78
- client = Groq(api_key=api_key)
79
 
80
- # Streamlit input
81
- query = st.text_input("Enter your medical question here:")
82
 
83
- def query_with_groq(query, retriever):
84
- try:
85
- # Retrieve relevant documents
86
- docs = retriever.get_relevant_documents(query)
87
- context = "\n".join([doc.page_content for doc in docs])
88
 
89
- # Call the Groq API with the query and context
90
- completion = client.chat.completions.create(
91
- model="llama3-70b-8192",
92
- messages=[
93
- {
94
- "role": "system",
95
- "content": (
96
- "You are a knowledgeable medical assistant. For any medical term or disease, include comprehensive information covering: "
97
- "definitions, types, historical background, major theories, known causes, and contributing risk factors. "
98
- "Explain the genesis or theories on its origin, if applicable. Use a structured, thorough approach and keep language accessible. "
99
- "provide symptoms, diagnosis, and treatment and post operative care , address all with indepth explanation , with specific details and step-by-step processes where relevant. "
100
- "If the context does not adequately cover the user's question, respond with: 'I cannot provide an answer based on the available medical dataset.'"
101
- )
102
- },
103
- {
104
- "role": "system",
105
- "content": (
106
- "If the user asks for a medical explanation, ensure accuracy, don't include layman's terms if complex terms are used, "
107
- "and organize responses in a structured way."
108
- )
109
- },
110
- {
111
- "role": "system",
112
- "content": (
113
- "When comparing two terms or conditions, provide a clear, concise, and structured comparison. Highlight key differences in their "
114
- "definitions, symptoms, causes, diagnoses, and treatments with indepth explanation of each. If relevant, include any overlapping characteristics."
115
- )
116
- },
117
- {
118
- "role": "user",
119
- "content": f"{context}\n\nQ: {query}\nA:"
120
- }
121
- ],
122
- temperature=0.7,
123
- max_tokens=3000,
124
- stream=True
125
- )
126
 
127
- # Create a placeholder for streaming response
128
- response_container = st.empty()
129
- response = ""
130
-
131
- # Stream the response
132
- for chunk in completion:
133
- if chunk.choices[0].delta.content:
134
- response += chunk.choices[0].delta.content
135
- response_container.markdown(response)
136
-
137
- return response
138
-
139
- except Exception as e:
140
- st.error(f"Error during query processing: {str(e)}")
141
- return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
 
143
- if st.button("Get Answer"):
144
- if query:
145
- with st.spinner("Processing your query..."):
146
- answer = query_with_groq(query, retriever)
147
- if answer:
148
- st.success("Query processed successfully!")
149
- else:
150
- st.warning("Please enter a query.")
 
 
 
 
 
 
 
151
 
152
- except Exception as e:
153
- st.error(f"Initialization error: {str(e)}")
 
 
 
 
 
 
 
4
  from langchain_community.embeddings import HuggingFaceBgeEmbeddings
5
  from groq import Groq
6
  from dotenv import load_dotenv
7
+ import requests
8
 
9
  # Initialize Streamlit page configuration
10
  st.set_page_config(page_title="Medical Knowledge Assistant", layout="wide")
 
30
  # Initialize the app
31
  st.title("Medical Knowledge Assistant")
32
 
33
+ # Google Drive file ID (use your own file ID)
34
+ file_id = '1lVlF8dYsNFPzrNGqn7jiJos7qX49jmi0' # Replace with your Google Drive file ID
35
+ destination_path = '/tmp/Embedded_Med_books' # Temporary location to store the vector store
 
 
 
 
 
 
 
36
 
37
+ # Function to download file from Google Drive
38
+ def download_from_drive(file_id, destination_path):
39
+ """Download the vector store file from Google Drive."""
40
+ url = f'https://drive.google.com/uc?export=download&id={file_id}'
41
+ response = requests.get(url)
42
+ if response.status_code == 200:
43
+ with open(destination_path, 'wb') as f:
44
+ f.write(response.content)
45
+ return destination_path
46
+ else:
47
+ st.error("Failed to download the file from Google Drive.")
48
+ return None
49
 
50
+ # Check if the vector store file exists, and download it if necessary
51
+ if not os.path.exists(destination_path):
52
+ st.warning("Downloading the vector store from Google Drive...")
53
+ download_from_drive(file_id, destination_path)
54
+ st.success("Vector store downloaded successfully!")
 
 
55
 
56
+ # Set up embeddings
57
+ model_name = "BAAI/bge-large-en"
58
+ model_kwargs = {'device': 'cpu'}
59
+ encode_kwargs = {'normalize_embeddings': False}
60
+ embeddings = HuggingFaceBgeEmbeddings(
61
+ model_name=model_name,
62
+ model_kwargs=model_kwargs,
63
+ encode_kwargs=encode_kwargs
64
+ )
 
 
 
 
 
65
 
66
+ # Load the vector store from the downloaded file
67
+ vector_store = Chroma(
68
+ persist_directory=destination_path,
69
+ embedding_function=embeddings
70
+ )
71
+ retriever = vector_store.as_retriever(search_kwargs={'k': 1})
 
 
 
72
 
73
+ # Initialize Groq client
74
+ client = Groq(api_key=api_key)
75
 
76
+ # Streamlit input
77
+ query = st.text_input("Enter your medical question here:")
 
 
 
78
 
79
+ def query_with_groq(query, retriever):
80
+ try:
81
+ # Retrieve relevant documents
82
+ docs = retriever.get_relevant_documents(query)
83
+ context = "\n".join([doc.page_content for doc in docs])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
 
85
+ # Call the Groq API with the query and context
86
+ completion = client.chat.completions.create(
87
+ model="llama3-70b-8192",
88
+ messages=[
89
+ {
90
+ "role": "system",
91
+ "content": (
92
+ "You are a knowledgeable medical assistant. For any medical term or disease, include comprehensive information covering: "
93
+ "definitions, types, historical background, major theories, known causes, and contributing risk factors. "
94
+ "Explain the genesis or theories on its origin, if applicable. Use a structured, thorough approach and keep language accessible. "
95
+ "provide symptoms, diagnosis, and treatment and post operative care , address all with indepth explanation , with specific details and step-by-step processes where relevant. "
96
+ "If the context does not adequately cover the user's question, respond with: 'I cannot provide an answer based on the available medical dataset.'"
97
+ )
98
+ },
99
+ {
100
+ "role": "system",
101
+ "content": (
102
+ "If the user asks for a medical explanation, ensure accuracy, don't include layman's terms if complex terms are used, "
103
+ "and organize responses in a structured way."
104
+ )
105
+ },
106
+ {
107
+ "role": "system",
108
+ "content": (
109
+ "When comparing two terms or conditions, provide a clear, concise, and structured comparison. Highlight key differences in their "
110
+ "definitions, symptoms, causes, diagnoses, and treatments with indepth explanation of each. If relevant, include any overlapping characteristics."
111
+ )
112
+ },
113
+ {
114
+ "role": "user",
115
+ "content": f"{context}\n\nQ: {query}\nA:"
116
+ }
117
+ ],
118
+ temperature=0.7,
119
+ max_tokens=3000,
120
+ stream=True
121
+ )
122
 
123
+ # Create a placeholder for streaming response
124
+ response_container = st.empty()
125
+ response = ""
126
+
127
+ # Stream the response
128
+ for chunk in completion:
129
+ if chunk.choices[0].delta.content:
130
+ response += chunk.choices[0].delta.content
131
+ response_container.markdown(response)
132
+
133
+ return response
134
+
135
+ except Exception as e:
136
+ st.error(f"Error during query processing: {str(e)}")
137
+ return None
138
 
139
+ if st.button("Get Answer"):
140
+ if query:
141
+ with st.spinner("Processing your query..."):
142
+ answer = query_with_groq(query, retriever)
143
+ if answer:
144
+ st.success("Query processed successfully!")
145
+ else:
146
+ st.warning("Please enter a query.")