api-rag-index-chat

Runtime error

App Files Files Community

Chris4K commited on Feb 3, 2024

Commit

62600e4

verified ·

1 Parent(s): 2770f0b

Update app.py

Browse files

Files changed (1) hide show

app.py +47 -3

app.py CHANGED Viewed

@@ -1,3 +1,6 @@
 #####################################
 ##  BitsAndBytes
 #####################################
@@ -12,7 +15,7 @@ model_name = "bn22/Mistral-7B-Instruct-v0.1-sharded"
 # "HuggingFaceH4/zephyr-7b-beta"
 # function for loading 4-bit quantized model
-def load_quantized_model(model_name: str):
     model =  HuggingFaceHub(
         repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1",
@@ -118,7 +121,7 @@ def get_context_retriever_chain(vector_store):
     # function for loading 4-bit quantized model
-    llm = load_quantized_model(model_name)
     retriever = vector_store.as_retriever()
@@ -134,7 +137,7 @@ def get_context_retriever_chain(vector_store):
 def get_conversational_rag_chain(retriever_chain):
-    llm = load_quantized_model(model_name)
     prompt = ChatPromptTemplate.from_messages([
       ("system", "Du bist ein freundlicher Mitarbeiter einens Call Center und beantwortest basierend auf dem Context. Benutze nur den Inhalt des Context. Antworte mit: Ich bin mir nicht sicher. Wenn die Antwort nicht aus dem Context hervorgeht. Antworte auf Deutsch, bitte? CONTEXT:\n\n{context}"),
@@ -195,6 +198,47 @@ def get_response(user_input):
     return response['answer']
 def simple(text:str):
   return text +" hhhmmm "

 #####################################
 ##  BitsAndBytes
 #####################################
 # "HuggingFaceH4/zephyr-7b-beta"
 # function for loading 4-bit quantized model
+def load_model(model_name: str):
     model =  HuggingFaceHub(
         repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1",
     # function for loading 4-bit quantized model
+    llm = load_model(model_name)
     retriever = vector_store.as_retriever()
 def get_conversational_rag_chain(retriever_chain):
+    llm = load_model(model_name)
     prompt = ChatPromptTemplate.from_messages([
       ("system", "Du bist ein freundlicher Mitarbeiter einens Call Center und beantwortest basierend auf dem Context. Benutze nur den Inhalt des Context. Antworte mit: Ich bin mir nicht sicher. Wenn die Antwort nicht aus dem Context hervorgeht. Antworte auf Deutsch, bitte? CONTEXT:\n\n{context}"),
     return response['answer']
+def history_to_dialog_format(chat_history: list[str]):
+    dialog = []
+    if len(chat_history) > 0:
+        for idx, message in enumerate(chat_history[0]):
+            role = "user" if idx % 2 == 0 else "assistant"
+            dialog.append({
+                "role": role,
+                "content": message,
+            })
+    return dialog
+def get_response(message, history):
+    dialog = history_to_dialog_format(history)
+    dialog.append({"role": "user", "content": message})
+      # Define the prompt as a ChatPromptValue object
+    #user_input = ChatPromptValue(user_input)
+    # Convert the prompt to a tensor
+    #input_ids = user_input.tensor
+    #vs = get_vectorstore_from_url(user_url, all_domain)
+    vs = get_vectorstore_from_url("https://www.bofrost.de/shop/fertige-gerichte_5507/auflaeufe_5509/hack-wirsing-auflauf.html?position=1&clicked=")
+    print("------ here 22 " )
+    chat_history =[]
+    retriever_chain = get_context_retriever_chain(vs)
+    conversation_rag_chain = get_conversational_rag_chain(retriever_chain)
+    response = conversation_rag_chain.invoke({
+        "chat_history": chat_history,
+        "input": user_input
+    })
+    print(response)
+    return response[-1]['generation']['content']
 def simple(text:str):
   return text +" hhhmmm "