Spaces:
Sleeping
Sleeping
Correct files
Browse files- app.py +64 -19
- requirements.txt +4 -4
app.py
CHANGED
@@ -1,28 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
-
from langchain_huggingface import HuggingFaceEndpoint
|
3 |
|
4 |
-
#
|
5 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
|
7 |
-
#
|
8 |
-
|
9 |
-
llm_hub = HuggingFaceEndpoint(repo_id=model_id, temperature=0.1, max_new_tokens=600, model_kwargs={"max_length":600})
|
10 |
-
# llm_hub.client.api_url = 'https://api-inference.huggingface.co/models/'+model_id
|
11 |
|
12 |
-
def handle_prompt(prompt, chat_history):
|
13 |
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
|
24 |
-
greetingsmessage = "Hi, I'm a Chatbot!"
|
25 |
|
26 |
-
demo = gr.ChatInterface(handle_prompt, type="messages", title="ChatBot", theme=
|
27 |
|
28 |
demo.launch()
|
|
|
1 |
+
# Following https://python.langchain.com/docs/tutorials/chatbot/
|
2 |
+
# Missing: trimming, streaming with memory
|
3 |
+
|
4 |
+
from langchain_mistralai import ChatMistralAI
|
5 |
+
from langchain_core.rate_limiters import InMemoryRateLimiter
|
6 |
+
from langgraph.checkpoint.memory import MemorySaver
|
7 |
+
from langgraph.graph import START, MessagesState, StateGraph
|
8 |
+
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
|
9 |
+
from langchain_core.messages import HumanMessage, AIMessage
|
10 |
import gradio as gr
|
|
|
11 |
|
12 |
+
# Prompt template
|
13 |
+
prompt = ChatPromptTemplate.from_messages(
|
14 |
+
[
|
15 |
+
(
|
16 |
+
"system",
|
17 |
+
"You talk like a person of the Middle Ages. Answer all questions to the best of your ability.",
|
18 |
+
),
|
19 |
+
MessagesPlaceholder(variable_name="messages"),
|
20 |
+
]
|
21 |
+
)
|
22 |
+
|
23 |
+
# Rate limiter
|
24 |
+
rate_limiter = InMemoryRateLimiter(
|
25 |
+
requests_per_second=0.1, # <-- MistralAI free. We can only make a request once every second
|
26 |
+
check_every_n_seconds=0.01, # Wake up every 100 ms to check whether allowed to make a request,
|
27 |
+
max_bucket_size=10, # Controls the maximum burst size.
|
28 |
+
)
|
29 |
+
|
30 |
+
model = ChatMistralAI(model="mistral-large-latest", rate_limiter=rate_limiter)
|
31 |
+
|
32 |
+
# Define a new graph
|
33 |
+
workflow = StateGraph(state_schema=MessagesState)
|
34 |
+
|
35 |
+
# Define the function that calls the model
|
36 |
+
def call_model(state: MessagesState):
|
37 |
+
chain = prompt | model
|
38 |
+
response = chain.invoke(state)
|
39 |
+
return {"messages": response}
|
40 |
+
|
41 |
+
# Define the (single) node in the graph
|
42 |
+
workflow.add_edge(START, "model")
|
43 |
+
workflow.add_node("model", call_model)
|
44 |
+
|
45 |
+
# Add memory
|
46 |
+
memory = MemorySaver()
|
47 |
+
app = workflow.compile(checkpointer=memory)
|
48 |
|
49 |
+
# Config with thread
|
50 |
+
config = {"configurable": {"thread_id": "abc345"}}
|
|
|
|
|
51 |
|
|
|
52 |
|
53 |
+
def handle_prompt(query, history):
|
54 |
+
input_messages = [HumanMessage(query)]
|
55 |
+
try:
|
56 |
+
# Stream output
|
57 |
+
# out=""
|
58 |
+
# for chunk, metadata in app.stream({
|
59 |
+
# "messages": input_messages},
|
60 |
+
# config,
|
61 |
+
# stream_mode="messages"):
|
62 |
+
# if isinstance(chunk, AIMessage): # Filter to just model responses
|
63 |
+
# out += chunk.content
|
64 |
+
# yield out
|
65 |
+
output = app.invoke({"messages": input_messages}, config)
|
66 |
+
return output["messages"][-1].content
|
67 |
+
except:
|
68 |
+
raise gr.Error("Requests rate limit exceeded")
|
69 |
|
|
|
70 |
|
71 |
+
demo = gr.ChatInterface(handle_prompt, type="messages", title="Medieval ChatBot", theme=gr.themes.Citrus())
|
72 |
|
73 |
demo.launch()
|
requirements.txt
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
langchain
|
2 |
-
|
3 |
-
|
4 |
-
|
|
|
1 |
+
langchain-core
|
2 |
+
langgraph>0.2.27
|
3 |
+
langchain_mistralai
|
4 |
+
gradio
|