Spaces:

Ekimetrics
/

climate-question-answering

Running

App Files Files Community

TheoLvs commited on Jan 16, 2024

Commit

38ed905

1 Parent(s): 2bee256

Corrected bugs causing errors in async mode

Browse files

Files changed (9) hide show

app.py +73 -31
climateqa/engine/embeddings.py +3 -3
climateqa/engine/llm.py +1 -1
climateqa/engine/prompts.py +0 -1
climateqa/engine/rag.py +7 -6
climateqa/engine/reformulation.py +20 -6
climateqa/engine/retriever.py +6 -4
climateqa/engine/utils.py +2 -0
climateqa/engine/vectorstore.py +2 -1

app.py CHANGED Viewed

@@ -104,7 +104,7 @@ def serialize_docs(docs):
     return new_docs
-async def chat(query,history,audience,sources,reports):
     """taking a query and a message history, use a pipeline (reformulation, retriever, answering) to yield a tuple of:
     (messages in gradio format, messages in langchain format, source documents)"""
@@ -144,62 +144,102 @@ async def chat(query,history,audience,sources,reports):
     #     memory.chat_memory.add_message(message)
     inputs = {"query": query,"audience": audience_prompt}
-    result = rag_chain.astream_log(inputs)
     reformulated_question_path_id = "/logs/flatten_dict/final_output"
     retriever_path_id = "/logs/Retriever/final_output"
     streaming_output_path_id = "/logs/AzureChatOpenAI:2/streamed_output_str/-"
     final_output_path_id = "/streamed_output/-"
-    docs_html = ""
     output_query = ""
     output_language = ""
     gallery = []
-    async for op in result:
-        op = op.ops[0]
-        print(op)
-        if op['path'] == reformulated_question_path_id: # reforulated question
-            output_language = op['value']["language"] # str
-            output_query = op["value"]["question"]
-        elif op['path'] == retriever_path_id: # documents
             try:
-                docs = op['value']['documents'] # List[Document]
                 docs_html = []
                 for i, d in enumerate(docs, 1):
                     docs_html.append(make_html_source(d, i))
                 docs_html = "".join(docs_html)
             except TypeError:
                 print("No documents found")
-                print("op: ",op)
                 continue
-        elif op['path'] == streaming_output_path_id: # final answer
-            new_token = op['value'] # str
             time.sleep(0.03)
             answer_yet = history[-1][1] + new_token
             answer_yet = parse_output_llm_with_sources(answer_yet)
             history[-1] = (query,answer_yet)
-        # elif op['path'] == final_output_path_id:
-        #     final_output = op['value']
-        #     if "answer" in final_output:
-        #         final_output = final_output["answer"]
-        #         print(final_output)
-        #         answer = history[-1][1] + final_output
-        #         answer = parse_output_llm_with_sources(answer)
-        #         history[-1] = (query,answer)
-        else:
-            continue
-        history = [tuple(x) for x in history]
-        yield history,docs_html,output_query,output_language,gallery
     # Log answer on Azure Blob Storage
     if os.getenv("GRADIO_ENV") != "local":
@@ -295,12 +335,12 @@ def log_on_azure(file, logs, share_client):
 init_prompt = """
 Hello, I am ClimateQ&A, a conversational assistant designed to help you understand climate change and biodiversity loss. I will answer your questions by **sifting through the IPCC and IPBES scientific reports**.
-How to use
 - **Language**: You can ask me your questions in any language.
 - **Audience**: You can specify your audience (children, general public, experts) to get a more adapted answer.
 - **Sources**: You can choose to search in the IPCC or IPBES reports, or both.
-Limitations
 *Please note that the AI is not perfect and may sometimes give irrelevant answers. If you are not satisfied with the answer, please ask a more specific question or report your feedback to help us improve the system.*
 What do you want to learn ?
@@ -326,7 +366,7 @@ with gr.Blocks(title="Climate Q&A", css="style.css", theme=theme,elem_id = "main
                 chatbot = gr.Chatbot(
                     value=[(None,init_prompt)],
                     show_copy_button=True,show_label = False,elem_id="chatbot",layout = "panel",
-                    avatar_images = ("https://i.ibb.co/YNyd5W2/logo4.png",None),
                 )#,avatar_images = ("assets/logo4.png",None))
                 # bot.like(vote,None,None)
@@ -408,6 +448,8 @@ with gr.Blocks(title="Climate Q&A", css="style.css", theme=theme,elem_id = "main
                 def start_chat(query,history):
                     history = history + [(query,"")]
                     return (gr.update(interactive = False),gr.update(selected=1),history)
                 def finish_chat():

     return new_docs
+def chat(query,history,audience,sources,reports):
     """taking a query and a message history, use a pipeline (reformulation, retriever, answering) to yield a tuple of:
     (messages in gradio format, messages in langchain format, source documents)"""
     #     memory.chat_memory.add_message(message)
     inputs = {"query": query,"audience": audience_prompt}
+    # result = rag_chain.astream_log(inputs)
+    result = rag_chain.stream(inputs)
     reformulated_question_path_id = "/logs/flatten_dict/final_output"
     retriever_path_id = "/logs/Retriever/final_output"
     streaming_output_path_id = "/logs/AzureChatOpenAI:2/streamed_output_str/-"
     final_output_path_id = "/streamed_output/-"
+    docs_html = "No sources found for this question"
     output_query = ""
     output_language = ""
     gallery = []
+    for output in result:
+        if "language" in output:
+            output_language = output["language"]
+        if "question" in output:
+            output_query = output["question"]
+        if "docs" in output:
             try:
+                docs = output['docs'] # List[Document]
                 docs_html = []
                 for i, d in enumerate(docs, 1):
                     docs_html.append(make_html_source(d, i))
                 docs_html = "".join(docs_html)
             except TypeError:
                 print("No documents found")
                 continue
+        if "answer" in output:
+            new_token = output["answer"] # str
             time.sleep(0.03)
             answer_yet = history[-1][1] + new_token
             answer_yet = parse_output_llm_with_sources(answer_yet)
             history[-1] = (query,answer_yet)
+        yield history,docs_html,output_query,output_language,gallery
+    # async def fallback_iterator(iterable):
+    #     async for item in iterable:
+    #         try:
+    #             yield item
+    #         except Exception as e:
+    #             print(f"Error in fallback iterator: {e}")
+    #             raise gr.Error(f"ClimateQ&A Error: {e}\nThe error has been noted, try another question and if the error remains, you can contact us :)")
+    # async for op in fallback_iterator(result):
+    #     op = op.ops[0]
+    #     print("yo",op)
+    #     if op['path'] == reformulated_question_path_id: # reforulated question
+    #         output_language = op['value']["language"] # str
+    #         output_query = op["value"]["question"]
+    #     elif op['path'] == retriever_path_id: # documents
+    #         try:
+    #             docs = op['value']['documents'] # List[Document]
+    #             docs_html = []
+    #             for i, d in enumerate(docs, 1):
+    #                 docs_html.append(make_html_source(d, i))
+    #             docs_html = "".join(docs_html)
+    #         except TypeError:
+    #             print("No documents found")
+    #             print("op: ",op)
+    #             continue
+    #     elif op['path'] == streaming_output_path_id: # final answer
+    #         new_token = op['value'] # str
+    #         time.sleep(0.03)
+    #         answer_yet = history[-1][1] + new_token
+    #         answer_yet = parse_output_llm_with_sources(answer_yet)
+    #         history[-1] = (query,answer_yet)
+    #     # elif op['path'] == final_output_path_id:
+    #     #     final_output = op['value']
+    #     #     if "answer" in final_output:
+    #     #         final_output = final_output["answer"]
+    #     #         print(final_output)
+    #     #         answer = history[-1][1] + final_output
+    #     #         answer = parse_output_llm_with_sources(answer)
+    #     #         history[-1] = (query,answer)
+    #     else:
+    #         continue
+    #     history = [tuple(x) for x in history]
+    #     yield history,docs_html,output_query,output_language,gallery
     # Log answer on Azure Blob Storage
     if os.getenv("GRADIO_ENV") != "local":
 init_prompt = """
 Hello, I am ClimateQ&A, a conversational assistant designed to help you understand climate change and biodiversity loss. I will answer your questions by **sifting through the IPCC and IPBES scientific reports**.
+❓ How to use
 - **Language**: You can ask me your questions in any language.
 - **Audience**: You can specify your audience (children, general public, experts) to get a more adapted answer.
 - **Sources**: You can choose to search in the IPCC or IPBES reports, or both.
+⚠️ Limitations
 *Please note that the AI is not perfect and may sometimes give irrelevant answers. If you are not satisfied with the answer, please ask a more specific question or report your feedback to help us improve the system.*
 What do you want to learn ?
                 chatbot = gr.Chatbot(
                     value=[(None,init_prompt)],
                     show_copy_button=True,show_label = False,elem_id="chatbot",layout = "panel",
+                    avatar_images = (None,"https://i.ibb.co/YNyd5W2/logo4.png"),
                 )#,avatar_images = ("assets/logo4.png",None))
                 # bot.like(vote,None,None)
                 def start_chat(query,history):
                     history = history + [(query,"")]
+                    history = [tuple(x) for x in history]
+                    print(history)
                     return (gr.update(interactive = False),gr.update(selected=1),history)
                 def finish_chat():

climateqa/engine/embeddings.py CHANGED Viewed

@@ -1,6 +1,6 @@
-from langchain.embeddings import HuggingFaceBgeEmbeddings
-from langchain.embeddings import HuggingFaceEmbeddings
 def get_embeddings_function(version = "v1.2"):
@@ -22,4 +22,4 @@ def get_embeddings_function(version = "v1.2"):
         embeddings_function = HuggingFaceEmbeddings(model_name = "sentence-transformers/multi-qa-mpnet-base-dot-v1")
-    return embeddings_function

+from langchain_community.embeddings import HuggingFaceBgeEmbeddings
+from langchain_community.embeddings import HuggingFaceEmbeddings
 def get_embeddings_function(version = "v1.2"):
         embeddings_function = HuggingFaceEmbeddings(model_name = "sentence-transformers/multi-qa-mpnet-base-dot-v1")
+    return embeddings_function

climateqa/engine/llm.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from langchain.chat_models import AzureChatOpenAI
 import os
 # LOAD ENVIRONMENT VARIABLES
 try:

+from langchain_community.chat_models import AzureChatOpenAI
 import os
 # LOAD ENVIRONMENT VARIABLES
 try:

climateqa/engine/prompts.py CHANGED Viewed

@@ -63,7 +63,6 @@ Answer in {language} with the passages citations:
 answer_prompt_without_docs_template = """
 You are ClimateQ&A, an AI Assistant created by Ekimetrics. Your role is to explain climate-related questions using info from the IPCC and/or IPBES reports.
 Always stay true to climate science and do not make up information. If you do not know the answer, just say you do not know.
-If the
 Guidelines:
 - Start by explaining clearly that you could not find the answer in the IPCC/IPBES reports, so your answer is based on your own knowledge and must be taken with great caution because it's AI generated.

 answer_prompt_without_docs_template = """
 You are ClimateQ&A, an AI Assistant created by Ekimetrics. Your role is to explain climate-related questions using info from the IPCC and/or IPBES reports.
 Always stay true to climate science and do not make up information. If you do not know the answer, just say you do not know.
 Guidelines:
 - Start by explaining clearly that you could not find the answer in the IPCC/IPBES reports, so your answer is based on your own knowledge and must be taken with great caution because it's AI generated.

climateqa/engine/rag.py CHANGED Viewed

@@ -1,15 +1,16 @@
 from operator import itemgetter
-from langchain.prompts import ChatPromptTemplate
-from langchain.schema.output_parser import StrOutputParser
-from langchain.schema.runnable import RunnablePassthrough, RunnableLambda, RunnableBranch
-from langchain.prompts.prompt import PromptTemplate
-from langchain.schema import format_document
 from climateqa.engine.reformulation import make_reformulation_chain
 from climateqa.engine.prompts import answer_prompt_template,answer_prompt_without_docs_template
 from climateqa.engine.utils import pass_values, flatten_dict
 DEFAULT_DOCUMENT_PROMPT = PromptTemplate.from_template(template="{page_content}")
 def _combine_documents(
@@ -72,7 +73,7 @@ def make_rag_chain(retriever,llm):
     # ------- FINAL CHAIN
     # Build the final chain
-    rag_chain = reformulation | find_documents | answer_with_docs
     return rag_chain

 from operator import itemgetter
+from langchain_core.prompts import ChatPromptTemplate
+from langchain_core.output_parsers import StrOutputParser
+from langchain_core.runnables import RunnablePassthrough, RunnableLambda, RunnableBranch
+from langchain_core.prompts.prompt import PromptTemplate
+from langchain_core.prompts.base import format_document
 from climateqa.engine.reformulation import make_reformulation_chain
 from climateqa.engine.prompts import answer_prompt_template,answer_prompt_without_docs_template
 from climateqa.engine.utils import pass_values, flatten_dict
 DEFAULT_DOCUMENT_PROMPT = PromptTemplate.from_template(template="{page_content}")
 def _combine_documents(
     # ------- FINAL CHAIN
     # Build the final chain
+    rag_chain = reformulation | find_documents | answer
     return rag_chain

climateqa/engine/reformulation.py CHANGED Viewed

@@ -1,11 +1,10 @@
-from langchain.output_parsers import StructuredOutputParser, ResponseSchema
-from langchain.prompts import PromptTemplate
-from langchain.llms import OpenAI
-from langchain.chat_models import ChatOpenAI
 from climateqa.engine.prompts import reformulation_prompt_template
 response_schemas = [
@@ -15,6 +14,12 @@ response_schemas = [
 output_parser = StructuredOutputParser.from_response_schemas(response_schemas)
 format_instructions = output_parser.get_format_instructions()
 def make_reformulation_chain(llm):
@@ -25,4 +30,13 @@ def make_reformulation_chain(llm):
     )
     chain = (prompt | llm.bind(stop=["```"]) | output_parser)
-    return chain

+from langchain.output_parsers.structured import StructuredOutputParser, ResponseSchema
+from langchain_core.prompts import PromptTemplate
+from langchain_core.runnables import RunnablePassthrough, RunnableLambda, RunnableBranch
 from climateqa.engine.prompts import reformulation_prompt_template
+from climateqa.engine.utils import pass_values, flatten_dict
 response_schemas = [
 output_parser = StructuredOutputParser.from_response_schemas(response_schemas)
 format_instructions = output_parser.get_format_instructions()
+def fallback_default_values(x):
+    if x["question"] is None:
+        x["question"] = x["query"]
+        x["language"] = "english"
+    return x
 def make_reformulation_chain(llm):
     )
     chain = (prompt | llm.bind(stop=["```"]) | output_parser)
+    reformulation_chain = (
+        {"reformulation":chain,**pass_values(["query"])}
+        | RunnablePassthrough()
+        | flatten_dict
+        | fallback_default_values
+    )
+    return reformulation_chain

climateqa/engine/retriever.py CHANGED Viewed

@@ -2,10 +2,12 @@
 import pandas as pd
-from langchain.schema.retriever import BaseRetriever, Document
-from langchain.vectorstores.base import VectorStoreRetriever
-from langchain.vectorstores import VectorStore
-from langchain.callbacks.manager import CallbackManagerForRetrieverRun
 from typing import List
 from pydantic import Field

 import pandas as pd
+from langchain_core.retrievers import BaseRetriever
+from langchain_core.vectorstores import VectorStoreRetriever
+from langchain_core.documents.base import Document
+from langchain_core.vectorstores import VectorStore
+from langchain_core.callbacks.manager import CallbackManagerForRetrieverRun
 from typing import List
 from pydantic import Field

climateqa/engine/utils.py CHANGED Viewed

@@ -48,3 +48,5 @@ def flatten_dict(
     """
     flat_dict = {k: v for k, v in _flatten_dict(nested_dict, parent_key, sep)}
     return flat_dict

     """
     flat_dict = {k: v for k, v in _flatten_dict(nested_dict, parent_key, sep)}
     return flat_dict

climateqa/engine/vectorstore.py CHANGED Viewed

@@ -3,7 +3,7 @@
 # And https://python.langchain.com/docs/integrations/vectorstores/pinecone
 import os
 import pinecone
-from langchain.vectorstores import Pinecone
 # LOAD ENVIRONMENT VARIABLES
 try:
@@ -23,6 +23,7 @@ def get_pinecone_vectorstore(embeddings,text_key = "text"):
     index_name = os.getenv("PINECONE_API_INDEX")
     vectorstore = Pinecone.from_existing_index(index_name, embeddings,text_key = text_key)
     return vectorstore

 # And https://python.langchain.com/docs/integrations/vectorstores/pinecone
 import os
 import pinecone
+from langchain_community.vectorstores import Pinecone
 # LOAD ENVIRONMENT VARIABLES
 try:
     index_name = os.getenv("PINECONE_API_INDEX")
     vectorstore = Pinecone.from_existing_index(index_name, embeddings,text_key = text_key)
     return vectorstore