File size: 3,203 Bytes
93c9f45
b26efc5
540c3e4
1b9d0f6
8204ebb
 
540c3e4
5177ae4
1b9d0f6
a4697a1
1b9d0f6
a4697a1
1b9d0f6
 
 
0a0731c
cdb4561
548127e
cdb4561
 
 
287918d
725a1f3
 
cdb4561
 
 
 
1b9d0f6
cdb4561
 
1b9d0f6
cdb4561
f3b0cae
fa240df
 
 
 
 
5ac63ea
 
 
e67595d
b656188
5ac63ea
cdb4561
 
e767674
1b9d0f6
ba44700
 
36ca46b
c3774b8
36ca46b
ba44700
 
8d0265b
85a3c90
 
287918d
85a3c90
 
 
 
1b9d0f6
90d41bb
287918d
93c9f45
 
91440b1
bb2a142
f1824de
 
d535138
07ef1f9
bb2a142
93c9f45
b99666c
8f92564
cdb4561
bb2a142
cdb4561
bb2a142
d9dc41d
 
cdb4561
f1824de
93c9f45
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import gradio as gr
import spaces
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from langchain.llms.base import LLM
from langchain.memory import ConversationBufferMemory
from langchain.chains import LLMChain, ConversationChain
from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
from langchain.prompts import PromptTemplate, ChatPromptTemplate

@spaces.GPU
def initialize_model_and_tokenizer(model_name="KvrParaskevi/Llama-2-7b-Hotel-Booking-Model"):
    model = AutoModelForCausalLM.from_pretrained(model_name)
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    return model, tokenizer

@spaces.GPU
def load_pipeline():
    model, tokenizer = initialize_model_and_tokenizer()
    pipe = pipeline("text-generation", 
                    model= model, 
                    tokenizer = tokenizer,
                    max_new_tokens = 50,
                    top_k = 30,
                    top_p = 0.7,
                    early_stopping=True,
                    num_beams = 2,
                    temperature = 0.1,
                    repetition_penalty = 1.03)

    llm = HuggingFacePipeline(pipeline = pipe)
    return llm

def chat_interface(inputs):
    question = inputs
    chat_history_tuples = []
    for message in chat_history:
        chat_history_tuples.append((message[0], message[1]))
    
    #result = llm_chain({"input": query, "history": chat_history_tuples})
    result = llm_chain.invoke({
        "input" : inputs,
        "history" : chat_history
    }, return_only_outputs=True)
    formated_result = result['response'] #.split('AI:')[-1]
    return formated_result #.strip()

llm = load_pipeline()
chat_history = []

def chat_output(inputs):
    output = chat_interface(inputs)
    answer_after_input = output.split('AI:')
    answer = answer_after_input[1].split('Human:')
    chat_history.append((inputs, answer[0]))
    return chat_history

template = """You are an AI having conversation with a human. 
Make sure you receive a logical answer from the user from every question to complete the hotel 
booking process.
Current conversation:
{history}
Human: {input}
AI:"""
prompt = PromptTemplate(template=template, input_variables=["history", "input"])

memory = ConversationBufferMemory(memory_key="history", llm = llm)
llm_chain = ConversationChain(llm=llm, memory = memory, prompt= prompt)

with gr.Blocks() as demo:
    #gr.Markdown("Hotel Booking Assistant Chat 🤗")
    #chatbot = gr.Chatbot( [ ],label="Chat history", bubble_full_width=False)
    #message = gr.Textbox(label="Ask me a question!")
    #clear = gr.Button("Clear")
    #llm_chain, llm = init_chain(model, tokenizer)
    chatbot_component = gr.Chatbot(height=300, label = "history")
    textbox_component = gr.Textbox(placeholder="Can I help you to book a hotel?", container=False, label = "input", scale=7)

    demo.chatbot_interface = gr.Interface(
        fn=chat_output, 
        inputs=[
            textbox_component
        ], 
        outputs=chatbot_component,
        title = "Hotel Booking Assistant Chat 🤗",
        description = "I am your hotel booking assistant. Feel free to start chatting with me."
    )
demo.launch()