PabloVD commited on
Commit
be0233b
·
1 Parent(s): 7661979

Correct files

Browse files
Files changed (2) hide show
  1. app.py +64 -19
  2. requirements.txt +4 -4
app.py CHANGED
@@ -1,28 +1,73 @@
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
- from langchain_huggingface import HuggingFaceEndpoint
3
 
4
- # Global variables
5
- conversation_retrieval_chain = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
- # load the model into the HuggingFaceHub
8
- model_id = "microsoft/Phi-3.5-mini-instruct"
9
- llm_hub = HuggingFaceEndpoint(repo_id=model_id, temperature=0.1, max_new_tokens=600, model_kwargs={"max_length":600})
10
- # llm_hub.client.api_url = 'https://api-inference.huggingface.co/models/'+model_id
11
 
12
- def handle_prompt(prompt, chat_history):
13
 
14
- # Query the model
15
- output = llm_hub.invoke({"question": prompt, "chat_history": chat_history})
16
- answer = output["result"]
17
-
18
- # Update the chat history
19
- chat_history.append((prompt, answer))
20
-
21
- # Return the model's response
22
- return answer
 
 
 
 
 
 
 
23
 
24
- greetingsmessage = "Hi, I'm a Chatbot!"
25
 
26
- demo = gr.ChatInterface(handle_prompt, type="messages", title="ChatBot", theme='freddyaboulton/dracula_revamped', description=greetingsmessage)
27
 
28
  demo.launch()
 
1
+ # Following https://python.langchain.com/docs/tutorials/chatbot/
2
+ # Missing: trimming, streaming with memory
3
+
4
+ from langchain_mistralai import ChatMistralAI
5
+ from langchain_core.rate_limiters import InMemoryRateLimiter
6
+ from langgraph.checkpoint.memory import MemorySaver
7
+ from langgraph.graph import START, MessagesState, StateGraph
8
+ from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
9
+ from langchain_core.messages import HumanMessage, AIMessage
10
  import gradio as gr
 
11
 
12
+ # Prompt template
13
+ prompt = ChatPromptTemplate.from_messages(
14
+ [
15
+ (
16
+ "system",
17
+ "You talk like a person of the Middle Ages. Answer all questions to the best of your ability.",
18
+ ),
19
+ MessagesPlaceholder(variable_name="messages"),
20
+ ]
21
+ )
22
+
23
+ # Rate limiter
24
+ rate_limiter = InMemoryRateLimiter(
25
+ requests_per_second=0.1, # <-- MistralAI free. We can only make a request once every second
26
+ check_every_n_seconds=0.01, # Wake up every 100 ms to check whether allowed to make a request,
27
+ max_bucket_size=10, # Controls the maximum burst size.
28
+ )
29
+
30
+ model = ChatMistralAI(model="mistral-large-latest", rate_limiter=rate_limiter)
31
+
32
+ # Define a new graph
33
+ workflow = StateGraph(state_schema=MessagesState)
34
+
35
+ # Define the function that calls the model
36
+ def call_model(state: MessagesState):
37
+ chain = prompt | model
38
+ response = chain.invoke(state)
39
+ return {"messages": response}
40
+
41
+ # Define the (single) node in the graph
42
+ workflow.add_edge(START, "model")
43
+ workflow.add_node("model", call_model)
44
+
45
+ # Add memory
46
+ memory = MemorySaver()
47
+ app = workflow.compile(checkpointer=memory)
48
 
49
+ # Config with thread
50
+ config = {"configurable": {"thread_id": "abc345"}}
 
 
51
 
 
52
 
53
+ def handle_prompt(query, history):
54
+ input_messages = [HumanMessage(query)]
55
+ try:
56
+ # Stream output
57
+ # out=""
58
+ # for chunk, metadata in app.stream({
59
+ # "messages": input_messages},
60
+ # config,
61
+ # stream_mode="messages"):
62
+ # if isinstance(chunk, AIMessage): # Filter to just model responses
63
+ # out += chunk.content
64
+ # yield out
65
+ output = app.invoke({"messages": input_messages}, config)
66
+ return output["messages"][-1].content
67
+ except:
68
+ raise gr.Error("Requests rate limit exceeded")
69
 
 
70
 
71
+ demo = gr.ChatInterface(handle_prompt, type="messages", title="Medieval ChatBot", theme=gr.themes.Citrus())
72
 
73
  demo.launch()
requirements.txt CHANGED
@@ -1,4 +1,4 @@
1
- langchain
2
- langchain-community
3
- langchain-huggingface
4
- chromadb
 
1
+ langchain-core
2
+ langgraph>0.2.27
3
+ langchain_mistralai
4
+ gradio