Spaces:

lamtung16
/

Zephyr-7B-beta

Runtime error

lamtung16 commited on Dec 1, 2023

Commit

1126c4a

1 Parent(s): ae7da67

Create responses.py

Files changed (1) hide show

responses.py ADDED Viewed

+from langchain.memory import ConversationBufferMemory
+from langchain import LLMChain, PromptTemplate
+from langchain.llms import CTransformers
+llm = CTransformers(model='TheBloke/zephyr-7B-beta-GGUF', model_file="zephyr-7b-beta.Q2_K.gguf")
+template = """[INST]<<SYS>>
+You are a helpful assistant, you always only answer briefly for the assistant then you stop.
+Read the chat history to get context.
+<</SYS>>
+Chat History:
+{chat_history}
+User: {user_input}[/INST]"""
+prompt = PromptTemplate(input_variables=["chat_history", "user_input"], template=template)
+memory = ConversationBufferMemory(memory_key="chat_history", max_new_tokens = 100)
+llm_chain = LLMChain(
+    llm=llm,
+    prompt=prompt,
+    memory=memory,
+)
+def get_response(prompt: str) -> str:
+    response = llm_chain.predict(user_input=prompt)
+    return response