Shreyas094 commited on
Commit
d1372f5
1 Parent(s): e81c09c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -27
app.py CHANGED
@@ -110,28 +110,6 @@ class CitingSources(BaseModel):
110
  description="List of sources to cite. Should be an URL of the source."
111
  )
112
 
113
- def get_response_with_search(query):
114
- search_results = duckduckgo_search(query)
115
- context = "\n".join(f"{result['title']}\n{result['body']}\nSource: {result['href']}\n"
116
- for result in search_results if 'body' in result)
117
-
118
- prompt = f"""<s>[INST] Using the following context:
119
- {context}
120
- Write a detailed and complete research document that fulfills the following user request: '{query}'
121
- After writing the document, please provide a list of sources used in your response. [/INST]"""
122
-
123
- generated_text = generate_chunked_response(prompt)
124
-
125
- content_start = generated_text.find("[/INST]")
126
- if content_start != -1:
127
- generated_text = generated_text[content_start + 7:].strip()
128
-
129
- parts = generated_text.split("Sources:", 1)
130
- main_content = parts[0].strip()
131
- sources = parts[1].strip() if len(parts) > 1 else ""
132
-
133
- return main_content, sources
134
-
135
  def get_response_from_pdf(query):
136
  embed = get_embeddings()
137
  if os.path.exists("faiss_database"):
@@ -150,18 +128,40 @@ After writing the response, please provide a list of sources used (document name
150
 
151
  generated_text = generate_chunked_response(prompt)
152
 
153
- # Remove the instruction part from the response
154
- content_start = generated_text.find("[/INST]")
155
- if content_start != -1:
156
- generated_text = generated_text[content_start + 7:].strip()
157
 
158
  # Split the content and sources
159
- parts = generated_text.split("Sources:", 1)
160
  main_content = parts[0].strip()
161
  sources = parts[1].strip() if len(parts) > 1 else ""
162
 
163
  return main_content, sources
164
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
165
  def chatbot_interface(message, history, use_web_search):
166
  if use_web_search:
167
  main_content, sources = get_response_with_search(message)
 
110
  description="List of sources to cite. Should be an URL of the source."
111
  )
112
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
  def get_response_from_pdf(query):
114
  embed = get_embeddings()
115
  if os.path.exists("faiss_database"):
 
128
 
129
  generated_text = generate_chunked_response(prompt)
130
 
131
+ # Remove all instruction-related content
132
+ clean_text = re.sub(r'<s>\[INST\].*?\[/INST\]\s*', '', generated_text, flags=re.DOTALL)
133
+ clean_text = clean_text.replace("Using the following context from the PDF documents:", "").strip()
 
134
 
135
  # Split the content and sources
136
+ parts = clean_text.split("Sources:", 1)
137
  main_content = parts[0].strip()
138
  sources = parts[1].strip() if len(parts) > 1 else ""
139
 
140
  return main_content, sources
141
 
142
+ def get_response_with_search(query):
143
+ search_results = duckduckgo_search(query)
144
+ context = "\n".join(f"{result['title']}\n{result['body']}\nSource: {result['href']}\n"
145
+ for result in search_results if 'body' in result)
146
+
147
+ prompt = f"""<s>[INST] Using the following context:
148
+ {context}
149
+ Write a detailed and complete research document that fulfills the following user request: '{query}'
150
+ After writing the document, please provide a list of sources used in your response. [/INST]"""
151
+
152
+ generated_text = generate_chunked_response(prompt)
153
+
154
+ # Remove all instruction-related content
155
+ clean_text = re.sub(r'<s>\[INST\].*?\[/INST\]\s*', '', generated_text, flags=re.DOTALL)
156
+ clean_text = clean_text.replace("Using the following context:", "").strip()
157
+
158
+ # Split the content and sources
159
+ parts = clean_text.split("Sources:", 1)
160
+ main_content = parts[0].strip()
161
+ sources = parts[1].strip() if len(parts) > 1 else ""
162
+
163
+ return main_content, sources
164
+
165
  def chatbot_interface(message, history, use_web_search):
166
  if use_web_search:
167
  main_content, sources = get_response_with_search(message)