Théo ALVES DA COSTA commited on
Commit
38ed905
1 Parent(s): 2bee256

Corrected bugs causing errors in async mode

Browse files
app.py CHANGED
@@ -104,7 +104,7 @@ def serialize_docs(docs):
104
  return new_docs
105
 
106
 
107
- async def chat(query,history,audience,sources,reports):
108
  """taking a query and a message history, use a pipeline (reformulation, retriever, answering) to yield a tuple of:
109
  (messages in gradio format, messages in langchain format, source documents)"""
110
 
@@ -144,62 +144,102 @@ async def chat(query,history,audience,sources,reports):
144
  # memory.chat_memory.add_message(message)
145
 
146
  inputs = {"query": query,"audience": audience_prompt}
147
- result = rag_chain.astream_log(inputs)
 
148
 
149
  reformulated_question_path_id = "/logs/flatten_dict/final_output"
150
  retriever_path_id = "/logs/Retriever/final_output"
151
  streaming_output_path_id = "/logs/AzureChatOpenAI:2/streamed_output_str/-"
152
  final_output_path_id = "/streamed_output/-"
153
 
154
- docs_html = ""
155
  output_query = ""
156
  output_language = ""
157
  gallery = []
158
-
159
- async for op in result:
160
 
161
- op = op.ops[0]
162
- print(op)
 
 
 
 
 
163
 
164
- if op['path'] == reformulated_question_path_id: # reforulated question
165
- output_language = op['value']["language"] # str
166
- output_query = op["value"]["question"]
167
-
168
- elif op['path'] == retriever_path_id: # documents
169
  try:
170
- docs = op['value']['documents'] # List[Document]
171
  docs_html = []
172
  for i, d in enumerate(docs, 1):
173
  docs_html.append(make_html_source(d, i))
174
  docs_html = "".join(docs_html)
175
  except TypeError:
176
  print("No documents found")
177
- print("op: ",op)
178
  continue
179
 
180
- elif op['path'] == streaming_output_path_id: # final answer
181
- new_token = op['value'] # str
182
  time.sleep(0.03)
183
  answer_yet = history[-1][1] + new_token
184
  answer_yet = parse_output_llm_with_sources(answer_yet)
185
  history[-1] = (query,answer_yet)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
186
 
187
- # elif op['path'] == final_output_path_id:
188
- # final_output = op['value']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
189
 
190
- # if "answer" in final_output:
191
 
192
- # final_output = final_output["answer"]
193
- # print(final_output)
194
- # answer = history[-1][1] + final_output
195
- # answer = parse_output_llm_with_sources(answer)
196
- # history[-1] = (query,answer)
197
 
198
- else:
199
- continue
 
 
 
200
 
201
- history = [tuple(x) for x in history]
202
- yield history,docs_html,output_query,output_language,gallery
203
 
204
  # Log answer on Azure Blob Storage
205
  if os.getenv("GRADIO_ENV") != "local":
@@ -295,12 +335,12 @@ def log_on_azure(file, logs, share_client):
295
  init_prompt = """
296
  Hello, I am ClimateQ&A, a conversational assistant designed to help you understand climate change and biodiversity loss. I will answer your questions by **sifting through the IPCC and IPBES scientific reports**.
297
 
298
- How to use
299
  - **Language**: You can ask me your questions in any language.
300
  - **Audience**: You can specify your audience (children, general public, experts) to get a more adapted answer.
301
  - **Sources**: You can choose to search in the IPCC or IPBES reports, or both.
302
 
303
- Limitations
304
  *Please note that the AI is not perfect and may sometimes give irrelevant answers. If you are not satisfied with the answer, please ask a more specific question or report your feedback to help us improve the system.*
305
 
306
  What do you want to learn ?
@@ -326,7 +366,7 @@ with gr.Blocks(title="Climate Q&A", css="style.css", theme=theme,elem_id = "main
326
  chatbot = gr.Chatbot(
327
  value=[(None,init_prompt)],
328
  show_copy_button=True,show_label = False,elem_id="chatbot",layout = "panel",
329
- avatar_images = ("https://i.ibb.co/YNyd5W2/logo4.png",None),
330
  )#,avatar_images = ("assets/logo4.png",None))
331
 
332
  # bot.like(vote,None,None)
@@ -408,6 +448,8 @@ with gr.Blocks(title="Climate Q&A", css="style.css", theme=theme,elem_id = "main
408
 
409
  def start_chat(query,history):
410
  history = history + [(query,"")]
 
 
411
  return (gr.update(interactive = False),gr.update(selected=1),history)
412
 
413
  def finish_chat():
 
104
  return new_docs
105
 
106
 
107
+ def chat(query,history,audience,sources,reports):
108
  """taking a query and a message history, use a pipeline (reformulation, retriever, answering) to yield a tuple of:
109
  (messages in gradio format, messages in langchain format, source documents)"""
110
 
 
144
  # memory.chat_memory.add_message(message)
145
 
146
  inputs = {"query": query,"audience": audience_prompt}
147
+ # result = rag_chain.astream_log(inputs)
148
+ result = rag_chain.stream(inputs)
149
 
150
  reformulated_question_path_id = "/logs/flatten_dict/final_output"
151
  retriever_path_id = "/logs/Retriever/final_output"
152
  streaming_output_path_id = "/logs/AzureChatOpenAI:2/streamed_output_str/-"
153
  final_output_path_id = "/streamed_output/-"
154
 
155
+ docs_html = "No sources found for this question"
156
  output_query = ""
157
  output_language = ""
158
  gallery = []
 
 
159
 
160
+ for output in result:
161
+
162
+ if "language" in output:
163
+ output_language = output["language"]
164
+ if "question" in output:
165
+ output_query = output["question"]
166
+ if "docs" in output:
167
 
 
 
 
 
 
168
  try:
169
+ docs = output['docs'] # List[Document]
170
  docs_html = []
171
  for i, d in enumerate(docs, 1):
172
  docs_html.append(make_html_source(d, i))
173
  docs_html = "".join(docs_html)
174
  except TypeError:
175
  print("No documents found")
 
176
  continue
177
 
178
+ if "answer" in output:
179
+ new_token = output["answer"] # str
180
  time.sleep(0.03)
181
  answer_yet = history[-1][1] + new_token
182
  answer_yet = parse_output_llm_with_sources(answer_yet)
183
  history[-1] = (query,answer_yet)
184
+
185
+ yield history,docs_html,output_query,output_language,gallery
186
+
187
+
188
+
189
+ # async def fallback_iterator(iterable):
190
+ # async for item in iterable:
191
+ # try:
192
+ # yield item
193
+ # except Exception as e:
194
+ # print(f"Error in fallback iterator: {e}")
195
+ # raise gr.Error(f"ClimateQ&A Error: {e}\nThe error has been noted, try another question and if the error remains, you can contact us :)")
196
+
197
+
198
+ # async for op in fallback_iterator(result):
199
+
200
+ # op = op.ops[0]
201
+ # print("yo",op)
202
+
203
+ # if op['path'] == reformulated_question_path_id: # reforulated question
204
+ # output_language = op['value']["language"] # str
205
+ # output_query = op["value"]["question"]
206
 
207
+ # elif op['path'] == retriever_path_id: # documents
208
+ # try:
209
+ # docs = op['value']['documents'] # List[Document]
210
+ # docs_html = []
211
+ # for i, d in enumerate(docs, 1):
212
+ # docs_html.append(make_html_source(d, i))
213
+ # docs_html = "".join(docs_html)
214
+ # except TypeError:
215
+ # print("No documents found")
216
+ # print("op: ",op)
217
+ # continue
218
+
219
+ # elif op['path'] == streaming_output_path_id: # final answer
220
+ # new_token = op['value'] # str
221
+ # time.sleep(0.03)
222
+ # answer_yet = history[-1][1] + new_token
223
+ # answer_yet = parse_output_llm_with_sources(answer_yet)
224
+ # history[-1] = (query,answer_yet)
225
+
226
+ # # elif op['path'] == final_output_path_id:
227
+ # # final_output = op['value']
228
 
229
+ # # if "answer" in final_output:
230
 
231
+ # # final_output = final_output["answer"]
232
+ # # print(final_output)
233
+ # # answer = history[-1][1] + final_output
234
+ # # answer = parse_output_llm_with_sources(answer)
235
+ # # history[-1] = (query,answer)
236
 
237
+ # else:
238
+ # continue
239
+
240
+ # history = [tuple(x) for x in history]
241
+ # yield history,docs_html,output_query,output_language,gallery
242
 
 
 
243
 
244
  # Log answer on Azure Blob Storage
245
  if os.getenv("GRADIO_ENV") != "local":
 
335
  init_prompt = """
336
  Hello, I am ClimateQ&A, a conversational assistant designed to help you understand climate change and biodiversity loss. I will answer your questions by **sifting through the IPCC and IPBES scientific reports**.
337
 
338
+ How to use
339
  - **Language**: You can ask me your questions in any language.
340
  - **Audience**: You can specify your audience (children, general public, experts) to get a more adapted answer.
341
  - **Sources**: You can choose to search in the IPCC or IPBES reports, or both.
342
 
343
+ ⚠️ Limitations
344
  *Please note that the AI is not perfect and may sometimes give irrelevant answers. If you are not satisfied with the answer, please ask a more specific question or report your feedback to help us improve the system.*
345
 
346
  What do you want to learn ?
 
366
  chatbot = gr.Chatbot(
367
  value=[(None,init_prompt)],
368
  show_copy_button=True,show_label = False,elem_id="chatbot",layout = "panel",
369
+ avatar_images = (None,"https://i.ibb.co/YNyd5W2/logo4.png"),
370
  )#,avatar_images = ("assets/logo4.png",None))
371
 
372
  # bot.like(vote,None,None)
 
448
 
449
  def start_chat(query,history):
450
  history = history + [(query,"")]
451
+ history = [tuple(x) for x in history]
452
+ print(history)
453
  return (gr.update(interactive = False),gr.update(selected=1),history)
454
 
455
  def finish_chat():
climateqa/engine/embeddings.py CHANGED
@@ -1,6 +1,6 @@
1
 
2
- from langchain.embeddings import HuggingFaceBgeEmbeddings
3
- from langchain.embeddings import HuggingFaceEmbeddings
4
 
5
  def get_embeddings_function(version = "v1.2"):
6
 
@@ -22,4 +22,4 @@ def get_embeddings_function(version = "v1.2"):
22
 
23
  embeddings_function = HuggingFaceEmbeddings(model_name = "sentence-transformers/multi-qa-mpnet-base-dot-v1")
24
 
25
- return embeddings_function
 
1
 
2
+ from langchain_community.embeddings import HuggingFaceBgeEmbeddings
3
+ from langchain_community.embeddings import HuggingFaceEmbeddings
4
 
5
  def get_embeddings_function(version = "v1.2"):
6
 
 
22
 
23
  embeddings_function = HuggingFaceEmbeddings(model_name = "sentence-transformers/multi-qa-mpnet-base-dot-v1")
24
 
25
+ return embeddings_function
climateqa/engine/llm.py CHANGED
@@ -1,4 +1,4 @@
1
- from langchain.chat_models import AzureChatOpenAI
2
  import os
3
  # LOAD ENVIRONMENT VARIABLES
4
  try:
 
1
+ from langchain_community.chat_models import AzureChatOpenAI
2
  import os
3
  # LOAD ENVIRONMENT VARIABLES
4
  try:
climateqa/engine/prompts.py CHANGED
@@ -63,7 +63,6 @@ Answer in {language} with the passages citations:
63
  answer_prompt_without_docs_template = """
64
  You are ClimateQ&A, an AI Assistant created by Ekimetrics. Your role is to explain climate-related questions using info from the IPCC and/or IPBES reports.
65
  Always stay true to climate science and do not make up information. If you do not know the answer, just say you do not know.
66
- If the
67
 
68
  Guidelines:
69
  - Start by explaining clearly that you could not find the answer in the IPCC/IPBES reports, so your answer is based on your own knowledge and must be taken with great caution because it's AI generated.
 
63
  answer_prompt_without_docs_template = """
64
  You are ClimateQ&A, an AI Assistant created by Ekimetrics. Your role is to explain climate-related questions using info from the IPCC and/or IPBES reports.
65
  Always stay true to climate science and do not make up information. If you do not know the answer, just say you do not know.
 
66
 
67
  Guidelines:
68
  - Start by explaining clearly that you could not find the answer in the IPCC/IPBES reports, so your answer is based on your own knowledge and must be taken with great caution because it's AI generated.
climateqa/engine/rag.py CHANGED
@@ -1,15 +1,16 @@
1
  from operator import itemgetter
2
 
3
- from langchain.prompts import ChatPromptTemplate
4
- from langchain.schema.output_parser import StrOutputParser
5
- from langchain.schema.runnable import RunnablePassthrough, RunnableLambda, RunnableBranch
6
- from langchain.prompts.prompt import PromptTemplate
7
- from langchain.schema import format_document
8
 
9
  from climateqa.engine.reformulation import make_reformulation_chain
10
  from climateqa.engine.prompts import answer_prompt_template,answer_prompt_without_docs_template
11
  from climateqa.engine.utils import pass_values, flatten_dict
12
 
 
13
  DEFAULT_DOCUMENT_PROMPT = PromptTemplate.from_template(template="{page_content}")
14
 
15
  def _combine_documents(
@@ -72,7 +73,7 @@ def make_rag_chain(retriever,llm):
72
 
73
  # ------- FINAL CHAIN
74
  # Build the final chain
75
- rag_chain = reformulation | find_documents | answer_with_docs
76
 
77
  return rag_chain
78
 
 
1
  from operator import itemgetter
2
 
3
+ from langchain_core.prompts import ChatPromptTemplate
4
+ from langchain_core.output_parsers import StrOutputParser
5
+ from langchain_core.runnables import RunnablePassthrough, RunnableLambda, RunnableBranch
6
+ from langchain_core.prompts.prompt import PromptTemplate
7
+ from langchain_core.prompts.base import format_document
8
 
9
  from climateqa.engine.reformulation import make_reformulation_chain
10
  from climateqa.engine.prompts import answer_prompt_template,answer_prompt_without_docs_template
11
  from climateqa.engine.utils import pass_values, flatten_dict
12
 
13
+
14
  DEFAULT_DOCUMENT_PROMPT = PromptTemplate.from_template(template="{page_content}")
15
 
16
  def _combine_documents(
 
73
 
74
  # ------- FINAL CHAIN
75
  # Build the final chain
76
+ rag_chain = reformulation | find_documents | answer
77
 
78
  return rag_chain
79
 
climateqa/engine/reformulation.py CHANGED
@@ -1,11 +1,10 @@
1
 
2
- from langchain.output_parsers import StructuredOutputParser, ResponseSchema
3
- from langchain.prompts import PromptTemplate
4
- from langchain.llms import OpenAI
5
- from langchain.chat_models import ChatOpenAI
6
 
7
  from climateqa.engine.prompts import reformulation_prompt_template
8
-
9
 
10
 
11
  response_schemas = [
@@ -15,6 +14,12 @@ response_schemas = [
15
  output_parser = StructuredOutputParser.from_response_schemas(response_schemas)
16
  format_instructions = output_parser.get_format_instructions()
17
 
 
 
 
 
 
 
18
 
19
  def make_reformulation_chain(llm):
20
 
@@ -25,4 +30,13 @@ def make_reformulation_chain(llm):
25
  )
26
 
27
  chain = (prompt | llm.bind(stop=["```"]) | output_parser)
28
- return chain
 
 
 
 
 
 
 
 
 
 
1
 
2
+ from langchain.output_parsers.structured import StructuredOutputParser, ResponseSchema
3
+ from langchain_core.prompts import PromptTemplate
4
+ from langchain_core.runnables import RunnablePassthrough, RunnableLambda, RunnableBranch
 
5
 
6
  from climateqa.engine.prompts import reformulation_prompt_template
7
+ from climateqa.engine.utils import pass_values, flatten_dict
8
 
9
 
10
  response_schemas = [
 
14
  output_parser = StructuredOutputParser.from_response_schemas(response_schemas)
15
  format_instructions = output_parser.get_format_instructions()
16
 
17
+ def fallback_default_values(x):
18
+ if x["question"] is None:
19
+ x["question"] = x["query"]
20
+ x["language"] = "english"
21
+
22
+ return x
23
 
24
  def make_reformulation_chain(llm):
25
 
 
30
  )
31
 
32
  chain = (prompt | llm.bind(stop=["```"]) | output_parser)
33
+
34
+ reformulation_chain = (
35
+ {"reformulation":chain,**pass_values(["query"])}
36
+ | RunnablePassthrough()
37
+ | flatten_dict
38
+ | fallback_default_values
39
+ )
40
+
41
+
42
+ return reformulation_chain
climateqa/engine/retriever.py CHANGED
@@ -2,10 +2,12 @@
2
 
3
  import pandas as pd
4
 
5
- from langchain.schema.retriever import BaseRetriever, Document
6
- from langchain.vectorstores.base import VectorStoreRetriever
7
- from langchain.vectorstores import VectorStore
8
- from langchain.callbacks.manager import CallbackManagerForRetrieverRun
 
 
9
  from typing import List
10
  from pydantic import Field
11
 
 
2
 
3
  import pandas as pd
4
 
5
+ from langchain_core.retrievers import BaseRetriever
6
+ from langchain_core.vectorstores import VectorStoreRetriever
7
+ from langchain_core.documents.base import Document
8
+ from langchain_core.vectorstores import VectorStore
9
+ from langchain_core.callbacks.manager import CallbackManagerForRetrieverRun
10
+
11
  from typing import List
12
  from pydantic import Field
13
 
climateqa/engine/utils.py CHANGED
@@ -48,3 +48,5 @@ def flatten_dict(
48
  """
49
  flat_dict = {k: v for k, v in _flatten_dict(nested_dict, parent_key, sep)}
50
  return flat_dict
 
 
 
48
  """
49
  flat_dict = {k: v for k, v in _flatten_dict(nested_dict, parent_key, sep)}
50
  return flat_dict
51
+
52
+
climateqa/engine/vectorstore.py CHANGED
@@ -3,7 +3,7 @@
3
  # And https://python.langchain.com/docs/integrations/vectorstores/pinecone
4
  import os
5
  import pinecone
6
- from langchain.vectorstores import Pinecone
7
 
8
  # LOAD ENVIRONMENT VARIABLES
9
  try:
@@ -23,6 +23,7 @@ def get_pinecone_vectorstore(embeddings,text_key = "text"):
23
 
24
  index_name = os.getenv("PINECONE_API_INDEX")
25
  vectorstore = Pinecone.from_existing_index(index_name, embeddings,text_key = text_key)
 
26
  return vectorstore
27
 
28
 
 
3
  # And https://python.langchain.com/docs/integrations/vectorstores/pinecone
4
  import os
5
  import pinecone
6
+ from langchain_community.vectorstores import Pinecone
7
 
8
  # LOAD ENVIRONMENT VARIABLES
9
  try:
 
23
 
24
  index_name = os.getenv("PINECONE_API_INDEX")
25
  vectorstore = Pinecone.from_existing_index(index_name, embeddings,text_key = text_key)
26
+
27
  return vectorstore
28
 
29