Spaces:

Ekimetrics
/

climate-question-answering

Running

App Files Files Community

TheoLvs commited on Feb 17, 2024

Commit

37b1e7a

1 Parent(s): e92f501

Fixed bugs with multi LLMs

Browse files

Files changed (3) hide show

app.py +7 -57
climateqa/engine/rag.py +8 -17
climateqa/engine/utils.py +23 -6

app.py CHANGED Viewed

@@ -146,88 +146,38 @@ async def chat(query,history,audience,sources,reports):
     if len(reports) == 0:
         reports = []
     retriever = ClimateQARetriever(vectorstore=vectorstore,sources = sources,min_size = 200,reports = reports,k_summary = 3,k_total = 15,threshold=0.5)
     rag_chain = make_rag_chain(retriever,llm)
-    # gradio_format = make_pairs([a.content for a in history]) + [(query, "")]
-    # history = history + [(query,"")]
-    # print(history)
-    # print(gradio_format)
-    # # reset memory
-    # memory.clear()
-    # for message in history:
-    #     memory.chat_memory.add_message(message)
     inputs = {"query": query,"audience": audience_prompt}
     result = rag_chain.astream_log(inputs) #{"callbacks":[MyCustomAsyncHandler()]})
     # result = rag_chain.stream(inputs)
-    reformulated_question_path_id = "/logs/flatten_dict/final_output"
-    retriever_path_id = "/logs/Retriever/final_output"
-    streaming_output_path_id = "/logs/AzureChatOpenAI:2/streamed_output_str/-"
-    final_output_path_id = "/streamed_output/-"
     docs_html = ""
     output_query = ""
     output_language = ""
     gallery = []
-    # for output in result:
-    #     if "language" in output:
-    #         output_language = output["language"]
-    #     if "question" in output:
-    #         output_query = output["question"]
-    #     if "docs" in output:
-    #         try:
-    #             docs = output['docs'] # List[Document]
-    #             docs_html = []
-    #             for i, d in enumerate(docs, 1):
-    #                 docs_html.append(make_html_source(d, i))
-    #             docs_html = "".join(docs_html)
-    #         except TypeError:
-    #             print("No documents found")
-    #             continue
-    #     if "answer" in output:
-    #         new_token = output["answer"] # str
-    #         time.sleep(0.03)
-    #         answer_yet = history[-1][1] + new_token
-    #         answer_yet = parse_output_llm_with_sources(answer_yet)
-    #         history[-1] = (query,answer_yet)
-    #     yield history,docs_html,output_query,output_language,gallery
-    # async def fallback_iterator(iterable):
-    #     async for item in iterable:
-    #         try:
-    #             yield item
-    #         except Exception as e:
-    #             print(f"Error in fallback iterator: {e}")
-    #             raise gr.Error(f"ClimateQ&A Error: {e}\nThe error has been noted, try another question and if the error remains, you can contact us :)")
     try:
         async for op in result:
             op = op.ops[0]
             # print("ITERATION",op)
-            if op['path'] == reformulated_question_path_id: # reforulated question
                 try:
                     output_language = op['value']["language"] # str
                     output_query = op["value"]["question"]
                 except Exception as e:
                     raise gr.Error(f"ClimateQ&A Error: {e} - The error has been noted, try another question and if the error remains, you can contact us :)")
-            elif op['path'] == retriever_path_id: # documents
                 try:
-                    docs = op['value']['documents'] # List[Document]
                     docs_html = []
                     for i, d in enumerate(docs, 1):
                         docs_html.append(make_html_source(d, i))
@@ -237,7 +187,7 @@ async def chat(query,history,audience,sources,reports):
                     print("op: ",op)
                     continue
-            elif op['path'] == streaming_output_path_id: # final answer
                 new_token = op['value'] # str
                 time.sleep(0.01)
                 answer_yet = history[-1][1] + new_token

     if len(reports) == 0:
         reports = []
     retriever = ClimateQARetriever(vectorstore=vectorstore,sources = sources,min_size = 200,reports = reports,k_summary = 3,k_total = 15,threshold=0.5)
     rag_chain = make_rag_chain(retriever,llm)
     inputs = {"query": query,"audience": audience_prompt}
     result = rag_chain.astream_log(inputs) #{"callbacks":[MyCustomAsyncHandler()]})
     # result = rag_chain.stream(inputs)
+    path_reformulation = "/logs/reformulation/final_output"
+    path_retriever = "/logs/find_documents/final_output"
+    path_answer = "/logs/answer/streamed_output_str/-"
     docs_html = ""
     output_query = ""
     output_language = ""
     gallery = []
     try:
         async for op in result:
             op = op.ops[0]
             # print("ITERATION",op)
+            if op['path'] == path_reformulation: # reforulated question
                 try:
                     output_language = op['value']["language"] # str
                     output_query = op["value"]["question"]
                 except Exception as e:
                     raise gr.Error(f"ClimateQ&A Error: {e} - The error has been noted, try another question and if the error remains, you can contact us :)")
+            elif op['path'] == path_retriever: # documents
                 try:
+                    docs = op['value']['docs'] # List[Document]
                     docs_html = []
                     for i, d in enumerate(docs, 1):
                         docs_html.append(make_html_source(d, i))
                     print("op: ",op)
                     continue
+            elif op['path'] == path_answer: # final answer
                 new_token = op['value'] # str
                 time.sleep(0.01)
                 answer_yet = history[-1][1] + new_token

climateqa/engine/rag.py CHANGED Viewed

@@ -8,8 +8,7 @@ from langchain_core.prompts.base import format_document
 from climateqa.engine.reformulation import make_reformulation_chain
 from climateqa.engine.prompts import answer_prompt_template,answer_prompt_without_docs_template,answer_prompt_images_template
-from climateqa.engine.utils import pass_values, flatten_dict
 DEFAULT_DOCUMENT_PROMPT = PromptTemplate.from_template(template="{page_content}")
@@ -44,21 +43,13 @@ def make_rag_chain(retriever,llm):
     prompt_without_docs = ChatPromptTemplate.from_template(answer_prompt_without_docs_template)
     # ------- CHAIN 0 - Reformulation
-    reformulation_chain = make_reformulation_chain(llm)
-    reformulation = (
-        {"reformulation":reformulation_chain,**pass_values(["audience","query"])}
-        | RunnablePassthrough()
-        | flatten_dict
-    )
     # ------- CHAIN 1
     # Retrieved documents
-    find_documents =  {
-        "docs": itemgetter("question") | retriever,
-        **pass_values(["question","audience","language","query"])
-    } | RunnablePassthrough()
     # ------- CHAIN 2
     # Construct inputs for the llm
@@ -69,15 +60,15 @@ def make_rag_chain(retriever,llm):
     # ------- CHAIN 3
     # Bot answer
     answer_with_docs = {
-        "answer": input_documents | prompt | llm | StrOutputParser(),
         **pass_values(["question","audience","language","query","docs"]),
     }
     answer_without_docs = {
-        "answer":  prompt_without_docs | llm | StrOutputParser(),
         **pass_values(["question","audience","language","query","docs"]),
     }

 from climateqa.engine.reformulation import make_reformulation_chain
 from climateqa.engine.prompts import answer_prompt_template,answer_prompt_without_docs_template,answer_prompt_images_template
+from climateqa.engine.utils import pass_values, flatten_dict,prepare_chain,rename_chain
 DEFAULT_DOCUMENT_PROMPT = PromptTemplate.from_template(template="{page_content}")
     prompt_without_docs = ChatPromptTemplate.from_template(answer_prompt_without_docs_template)
     # ------- CHAIN 0 - Reformulation
+    reformulation = make_reformulation_chain(llm)
+    reformulation = prepare_chain(reformulation,"reformulation")
     # ------- CHAIN 1
     # Retrieved documents
+    find_documents = {"docs": itemgetter("question") | retriever} | RunnablePassthrough()
+    find_documents = prepare_chain(find_documents,"find_documents")
     # ------- CHAIN 2
     # Construct inputs for the llm
     # ------- CHAIN 3
     # Bot answer
+    llm_final = rename_chain(llm,"answer")
     answer_with_docs = {
+        "answer": input_documents | prompt | llm_final | StrOutputParser(),
         **pass_values(["question","audience","language","query","docs"]),
     }
     answer_without_docs = {
+        "answer":  prompt_without_docs | llm_final | StrOutputParser(),
         **pass_values(["question","audience","language","query","docs"]),
     }

climateqa/engine/utils.py CHANGED Viewed

@@ -1,10 +1,29 @@
-from typing import Any, Dict, Iterable, Tuple, Union
 from operator import itemgetter
 def pass_values(x):
-    if not isinstance(x,list): x = [x]
-    return {k:itemgetter(k) for k in x}
 # Drawn from langchain utils and modified to remove the parent key
@@ -48,5 +67,3 @@ def flatten_dict(
     """
     flat_dict = {k: v for k, v in _flatten_dict(nested_dict, parent_key, sep)}
     return flat_dict

 from operator import itemgetter
+from typing import Any, Dict, Iterable, Tuple
+from langchain_core.runnables import RunnablePassthrough
 def pass_values(x):
+    if not isinstance(x, list):
+        x = [x]
+    return {k: itemgetter(k) for k in x}
+def prepare_chain(chain,name):
+    chain = propagate_inputs(chain)
+    chain = rename_chain(chain,name)
+    return chain
+def propagate_inputs(chain):
+    chain_with_values = {
+        "outputs": chain,
+        "inputs": RunnablePassthrough()
+    } | RunnablePassthrough() | flatten_dict
+    return chain_with_values
+def rename_chain(chain,name):
+    return chain.with_config({"run_name":name})
 # Drawn from langchain utils and modified to remove the parent key
     """
     flat_dict = {k: v for k, v in _flatten_dict(nested_dict, parent_key, sep)}
     return flat_dict