lamtung16 commited on
Commit
1126c4a
·
1 Parent(s): ae7da67

Create responses.py

Browse files
Files changed (1) hide show
  1. responses.py +31 -0
responses.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.memory import ConversationBufferMemory
2
+ from langchain import LLMChain, PromptTemplate
3
+ from langchain.llms import CTransformers
4
+
5
+
6
+ llm = CTransformers(model='TheBloke/zephyr-7B-beta-GGUF', model_file="zephyr-7b-beta.Q2_K.gguf")
7
+
8
+
9
+ template = """[INST]<<SYS>>
10
+ You are a helpful assistant, you always only answer briefly for the assistant then you stop.
11
+ Read the chat history to get context.
12
+ <</SYS>>
13
+ Chat History:
14
+ {chat_history}
15
+ User: {user_input}[/INST]"""
16
+
17
+
18
+ prompt = PromptTemplate(input_variables=["chat_history", "user_input"], template=template)
19
+ memory = ConversationBufferMemory(memory_key="chat_history", max_new_tokens = 100)
20
+
21
+
22
+ llm_chain = LLMChain(
23
+ llm=llm,
24
+ prompt=prompt,
25
+ memory=memory,
26
+ )
27
+
28
+
29
+ def get_response(prompt: str) -> str:
30
+ response = llm_chain.predict(user_input=prompt)
31
+ return response