Spaces:

paloma99
/

GreenGreta

Runtime error

App Files Files Community

paloma99 commited on Feb 6

Commit

1fecc0a

•

1 Parent(s): 922dead

Update app.py

Browse files

Files changed (1) hide show

app.py +45 -29

app.py CHANGED Viewed

@@ -54,14 +54,12 @@ image_gradio_app = gr.Interface(
     theme=theme
 )
-# Cell 2: Chatbot Model
-loader = PyPDFDirectoryLoader('pdfs')
 data=loader.load()
 # split documents
 text_splitter = RecursiveCharacterTextSplitter(
-    chunk_size=500,
-    chunk_overlap=70,
     length_function=len
 )
 docs = text_splitter.split_documents(data)
@@ -78,56 +76,74 @@ vectordb = Chroma.from_documents(
     persist_directory=persist_directory
 )
 # define retriever
-retriever = vectordb.as_retriever(search_type="mmr")
 template = """
-Your name is Greta and you are a recycling chatbot with the objective to anwer questions from user in English or Spanish /
-Use the following pieces of context to answer the question if the question is related with recycling /
-No more than two chunks of context /
-Answer in the same language of the question /
-Always say "thanks for asking!" at the end of the answer /
-If the context is not relevant, please answer the question by using your own knowledge about the topic.
-context: {context}
-question: {question}
 """
 # Create the chat prompt templates
-system_prompt = SystemMessagePromptTemplate.from_template(template)
 qa_prompt = ChatPromptTemplate(
-	messages=[
-		system_prompt,
-        MessagesPlaceholder(variable_name="chat_history"),
-		HumanMessagePromptTemplate.from_template("{question}")
-    ]
 )
 llm = HuggingFaceHub(
     repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1",
     task="text-generation",
     model_kwargs={
-        "max_new_tokens": 1024,
         "top_k": 30,
         "temperature": 0.1,
-        "repetition_penalty": 1.03,
     },
 )
-memory = ConversationBufferMemory(llm=llm, memory_key="chat_history", input_key='question', output_key='answer', return_messages=True)
 qa_chain = ConversationalRetrievalChain.from_llm(
     llm = llm,
-    memory = memory,
     retriever = retriever,
     verbose = True,
     combine_docs_chain_kwargs={'prompt': qa_prompt},
     get_chat_history = lambda h : h,
     rephrase_question = False,
-    output_key = 'answer'
 )
 def chat_interface(question,history):
-    result = qa_chain.invoke({"question": question})
-    return result['answer']  # If the result is a string, return it directly
 chatbot_gradio_app = gr.ChatInterface(

     theme=theme
 )
+loader = WebBaseLoader(["https://www.epa.gov/recycle/frequent-questions-recycling", "https://www.whitehorsedc.gov.uk/vale-of-white-horse-district-council/recycling-rubbish-and-waste/lets-get-real-about-recycling/", "https://www.teimas.com/blog/13-preguntas-y-respuestas-sobre-la-ley-de-residuos-07-2022", "https://www.molok.com/es/blog/gestion-de-residuos-solidos-urbanos-rsu-10-dudas-comunes"])
 data=loader.load()
 # split documents
 text_splitter = RecursiveCharacterTextSplitter(
+    chunk_size=1024,
+    chunk_overlap=150,
     length_function=len
 )
 docs = text_splitter.split_documents(data)
     persist_directory=persist_directory
 )
 # define retriever
+retriever = vectordb.as_retriever(search_kwargs={"k": 2}, search_type="mmr")
+class FinalAnswer(BaseModel):
+    question: str = Field(description="the original question")
+    answer: str = Field(description="the extracted answer")
+# Assuming you have a parser for the FinalAnswer class
+parser = PydanticOutputParser(pydantic_object=FinalAnswer)
 template = """
+Your name is AngryGreta and you are a recycling chatbot with the objective to anwer questions from user in English or Spanish /
+Use the following pieces of context to answer the question /
+If the question is English answer in English /
+If the question is Spanish answer in Spanish /
+Do not mention the word context when you answer a question /
+Answer the question fully and provide as much relevant detail as possible. Do not cut your response short /
+Context: {context}
+User: {question}
+{format_instructions}
 """
 # Create the chat prompt templates
+sys_prompt = SystemMessagePromptTemplate.from_template(template)
 qa_prompt = ChatPromptTemplate(
+    messages=[
+        sys_prompt,
+        HumanMessagePromptTemplate.from_template("{question}")],
+    partial_variables={"format_instructions": parser.get_format_instructions()}
 )
 llm = HuggingFaceHub(
     repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1",
     task="text-generation",
     model_kwargs={
+        "max_new_tokens": 2000,
         "top_k": 30,
         "temperature": 0.1,
+        "repetition_penalty": 1.03
     },
 )
 qa_chain = ConversationalRetrievalChain.from_llm(
     llm = llm,
+    memory = ConversationBufferMemory(llm=llm, memory_key="chat_history", input_key='question', output_key='output'),
     retriever = retriever,
     verbose = True,
     combine_docs_chain_kwargs={'prompt': qa_prompt},
     get_chat_history = lambda h : h,
     rephrase_question = False,
+    output_key = 'output',
 )
 def chat_interface(question,history):
+    result = qa_chain.invoke({'question': question})
+    output_string = result['output']
+    # Find the index of the last occurrence of "answer": in the string
+    answer_index = output_string.rfind('"answer":')
+    # Extract the substring starting from the "answer": index
+    answer_part = output_string[answer_index + len('"answer":'):].strip()
+    # Find the next occurrence of a double quote to get the start of the answer value
+    quote_index = answer_part.find('"')
+    # Extract the answer value between double quotes
+    answer_value = answer_part[quote_index + 1:answer_part.find('"', quote_index + 1)]
+    return answer_value
 chatbot_gradio_app = gr.ChatInterface(