|
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline |
|
from langchain import HuggingFaceHub |
|
from langchain.llms.base import LLM |
|
from langchain.memory import ConversationBufferMemory,ConversationBufferWindowMemory |
|
from langchain.chains import LLMChain, ConversationChain |
|
from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline |
|
from langchain_community.llms import HuggingFaceEndpoint |
|
from langchain.prompts import PromptTemplate, ChatPromptTemplate |
|
from langchain_core.prompts.chat import SystemMessagePromptTemplate, HumanMessagePromptTemplate |
|
import os |
|
import gradio as gr |
|
import spaces |
|
|
|
your_endpoint_url = "https://kp4xdy196cw81uf3.us-east-1.aws.endpoints.huggingface.cloud" |
|
token = os.environ["API_TOKEN"] |
|
|
|
llm = HuggingFaceEndpoint( |
|
endpoint_url=f"{your_endpoint_url}", |
|
huggingfacehub_api_token = f"{token}", |
|
task = "text-generation", |
|
max_new_tokens=128, |
|
top_k=10, |
|
top_p=0.95, |
|
typical_p=0.95, |
|
temperature=0.01, |
|
repetition_penalty=1.03 |
|
) |
|
|
|
|
|
def chat_template_prompt(): |
|
template = """ |
|
Do not repeat questions and do not generate answer for user/human.Do not repeat yourself and do not create/generate dialogues. |
|
|
|
Below is an instruction that describes a task. During the conversation you need to ask the user |
|
the following questions to complete the hotel booking task. After each of the following questions you wait for the response by the user. |
|
1) Where would you like to stay and when? |
|
2) How many people are staying in the room? |
|
3) Do you prefer any ammenities like breakfast included or gym? |
|
4) What is your name, your email address and phone number? |
|
If the user wants to book the room, you confirm the booking otherwise you respond with "Thank, you. Please let me know if there is |
|
any other way to assist you?" |
|
|
|
{history} |
|
|
|
""" |
|
|
|
system_prompt = SystemMessagePromptTemplate.from_template(template) |
|
human_prompt = HumanMessagePromptTemplate.from_template("""{input}""") |
|
chat_prompt = ChatPromptTemplate.from_messages([system_prompt, human_prompt]) |
|
return chat_prompt |
|
|
|
def chain(): |
|
|
|
chat_prompt = chat_template_prompt() |
|
memory = ConversationBufferWindowMemory(k=3) |
|
llm_chain = LLMChain(llm=llm, memory = memory, prompt = chat_prompt) |
|
memory.load_memory_variables({}) |
|
return llm_chain |
|
|
|
@spaces.GPU |
|
def chat_output(message, history): |
|
result = llm_chain.predict(input = message) |
|
return result |
|
|
|
with gr.Blocks() as demo: |
|
llm_chain = chain() |
|
|
|
|
|
|
|
demo.chatbot_interface = gr.ChatInterface( |
|
fn=chat_output, |
|
examples = ["Hello I would like to book a hotel room.", "Hello I want to stay in Nuremberg in 30th of May." ], |
|
|
|
title = "Hotel Booking Assistant Chat π€", |
|
description = "I am your hotel booking assistant. Feel free to start chatting with me." |
|
) |
|
demo.launch() |