Spaces:

PabloVD
/

MedievalChatbot

Sleeping

App Files Files Community

PabloVD commited on Nov 4, 2024

Commit

be0233b

1 Parent(s): 7661979

Correct files

Browse files

Files changed (2) hide show

app.py +64 -19
requirements.txt +4 -4

app.py CHANGED Viewed

@@ -1,28 +1,73 @@
 import gradio as gr
-from langchain_huggingface import HuggingFaceEndpoint
-# Global variables
-conversation_retrieval_chain = None
-# load the model into the HuggingFaceHub
-model_id = "microsoft/Phi-3.5-mini-instruct"
-llm_hub = HuggingFaceEndpoint(repo_id=model_id, temperature=0.1, max_new_tokens=600, model_kwargs={"max_length":600})
-# llm_hub.client.api_url = 'https://api-inference.huggingface.co/models/'+model_id
-def handle_prompt(prompt, chat_history):
-    # Query the model
-    output = llm_hub.invoke({"question": prompt, "chat_history": chat_history})
-    answer = output["result"]
-    # Update the chat history
-    chat_history.append((prompt, answer))
-    # Return the model's response
-    return answer
-greetingsmessage = "Hi, I'm a Chatbot!"
-demo = gr.ChatInterface(handle_prompt, type="messages", title="ChatBot", theme='freddyaboulton/dracula_revamped', description=greetingsmessage)
 demo.launch()

+# Following https://python.langchain.com/docs/tutorials/chatbot/
+# Missing: trimming, streaming with memory
+from langchain_mistralai import ChatMistralAI
+from langchain_core.rate_limiters import InMemoryRateLimiter
+from langgraph.checkpoint.memory import MemorySaver
+from langgraph.graph import START, MessagesState, StateGraph
+from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
+from langchain_core.messages import HumanMessage, AIMessage
 import gradio as gr
+# Prompt template
+prompt = ChatPromptTemplate.from_messages(
+    [
+        (
+            "system",
+            "You talk like a person of the Middle Ages. Answer all questions to the best of your ability.",
+        ),
+        MessagesPlaceholder(variable_name="messages"),
+    ]
+)
+# Rate limiter
+rate_limiter = InMemoryRateLimiter(
+    requests_per_second=0.1,  # <-- MistralAI free. We can only make a request once every second
+    check_every_n_seconds=0.01,  # Wake up every 100 ms to check whether allowed to make a request,
+    max_bucket_size=10,  # Controls the maximum burst size.
+)
+model = ChatMistralAI(model="mistral-large-latest", rate_limiter=rate_limiter)
+# Define a new graph
+workflow = StateGraph(state_schema=MessagesState)
+# Define the function that calls the model
+def call_model(state: MessagesState):
+    chain = prompt | model
+    response = chain.invoke(state)
+    return {"messages": response}
+# Define the (single) node in the graph
+workflow.add_edge(START, "model")
+workflow.add_node("model", call_model)
+# Add memory
+memory = MemorySaver()
+app = workflow.compile(checkpointer=memory)
+# Config with thread
+config = {"configurable": {"thread_id": "abc345"}}
+def handle_prompt(query, history):
+    input_messages = [HumanMessage(query)]
+    try:
+        # Stream output
+        # out=""
+        # for chunk, metadata in app.stream({
+        #                         "messages": input_messages},
+        #                         config,
+        #                         stream_mode="messages"):
+        #     if isinstance(chunk, AIMessage):  # Filter to just model responses
+        #         out += chunk.content
+        #     yield out
+        output = app.invoke({"messages": input_messages}, config)
+        return output["messages"][-1].content
+    except:
+        raise gr.Error("Requests rate limit exceeded")
+demo = gr.ChatInterface(handle_prompt, type="messages", title="Medieval ChatBot", theme=gr.themes.Citrus())
 demo.launch()

requirements.txt CHANGED Viewed

@@ -1,4 +1,4 @@
-langchain
-langchain-community
-langchain-huggingface
-chromadb

+langchain-core
+langgraph>0.2.27
+langchain_mistralai
+gradio