import gradio as gr import spaces from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline from langchain.llms.base import LLM from langchain.memory import ConversationBufferMemory from langchain.chains import LLMChain, ConversationChain from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline from langchain.prompts import PromptTemplate, ChatPromptTemplate @spaces.GPU def initialize_model_and_tokenizer(model_name="KvrParaskevi/Llama-2-7b-Hotel-Booking-Model"): model = AutoModelForCausalLM.from_pretrained(model_name) tokenizer = AutoTokenizer.from_pretrained(model_name) return model, tokenizer @spaces.GPU def load_pipeline(): model, tokenizer = initialize_model_and_tokenizer() pipe = pipeline("text-generation", model= model, tokenizer = tokenizer, max_new_tokens = 50, top_k = 30, top_p = 0.7, early_stopping=True, num_beams = 2, temperature = 0.1, repetition_penalty = 1.03) llm = HuggingFacePipeline(pipeline = pipe) return llm def chat_interface(inputs): question = inputs chat_history_tuples = [] for message in chat_history: chat_history_tuples.append((message[0], message[1])) #result = llm_chain({"input": query, "history": chat_history_tuples}) result = llm_chain.invoke({ "input" : inputs, "history" : chat_history }, return_only_outputs=True) formated_result = result['response'] #.split('AI:')[-1] return formated_result #.strip() llm = load_pipeline() chat_history = [] def chat_output(inputs): output = chat_interface(inputs) answer_after_input = output.split('AI:') answer = answer_after_input[1].split('Human:') chat_history.append((inputs, answer[0])) return chat_history template = """You are an AI having conversation with a human. Make sure you receive a logical answer from the user from every question to complete the hotel booking process. Current conversation: {history} Human: {input} AI:""" prompt = PromptTemplate(template=template, input_variables=["history", "input"]) memory = ConversationBufferMemory(memory_key="history", llm = llm) llm_chain = ConversationChain(llm=llm, memory = memory, prompt= prompt) with gr.Blocks() as demo: #gr.Markdown("Hotel Booking Assistant Chat 🤗") #chatbot = gr.Chatbot( [ ],label="Chat history", bubble_full_width=False) #message = gr.Textbox(label="Ask me a question!") #clear = gr.Button("Clear") #llm_chain, llm = init_chain(model, tokenizer) chatbot_component = gr.Chatbot(height=300, label = "history") textbox_component = gr.Textbox(placeholder="Can I help you to book a hotel?", container=False, label = "input", scale=7) demo.chatbot_interface = gr.Interface( fn=chat_output, inputs=[ textbox_component ], outputs=chatbot_component, title = "Hotel Booking Assistant Chat 🤗", description = "I am your hotel booking assistant. Feel free to start chatting with me." ) demo.launch()