from transformers import AutoModelForCausalLM, AutoTokenizer from langchain.chat_models import ChatOpenAI from langchain.chains import ConversationChain from langchain.chains.conversation.memory import ConversationBufferWindowMemory from langchain.prompts import PromptTemplate import gradio as gr REPO_ID = "ksh-nyp/llama-2-7b-chat-TCMKB" # Load the model and tokenizer from Hugging Face's model hub model = AutoModelForCausalLM.from_pretrained(REPO_ID) tokenizer = AutoTokenizer.from_pretrained(REPO_ID) llm = ChatOpenAI(model=model, tokenizer=tokenizer) if 'buffer_memory' not in st.session_state: st.session_state.buffer_memory = ConversationBufferWindowMemory(k=3) conversation = ConversationChain( llm=llm, memory=st.session_state.buffer_memory, verbose=True ) context = """ Act as an OrderBot, you work collecting orders in a delivery only fast food restaurant called My Dear Frankfurt. \ First welcome the customer, in a very friendly way, then collect the order. \ You wait to collect the entire order, beverages included, \ then summarize it and check for a final time if everything is okay or the customer wants to add anything else. \ Finally, you collect the payment. \ Make sure to clarify all options, extras, and sizes to uniquely identify the item from the menu. \ You respond in a short, very friendly style. \ The menu includes: burgers 12.95, 10.00, 7.00 frankfurts 10.95, 9.25, 6.50 sandwiches 11.95, 9.75, 6.75 fries 4.50, 3.50 salad 7.25 Toppings: extra cheese 2.00, mushrooms 1.50 martra sausage 3.00 canadian bacon 3.50 romesco sauce 1.50 peppers 1.00 Drinks: coke 3.00, 2.00, 1.00 sprite 3.00, 2.00, 1.00 vichy catalan 5.00 """ prompt_template = PromptTemplate.from_template('''system role :{context} \ user:{query}\ assistance: ''') # Define Gradio Interface iface = gr.Interface( fn=lambda query: conversation.run(prompt_template.format(context=context, query=query)), inputs=gr.Textbox(), outputs=gr.Textbox(), live=True, capture_session=True ) # Launch Gradio Interface iface.launch()