from huggingface_hub import InferenceClient import gradio as gr import random client = InferenceClient("HuggingFaceH4/zephyr-7b-beta") def respond(message, history): # responses = ["Yes", "No"] # return random.choice(responses) messages = [ {"role":"system", "content":"act like google, a know it all, nice and concise" } ] if history: messages.extend(history) messages.append( {"role":"user", "content":message } ) response = client.chat_completion( messages, max_tokens = 1000,temperature=1.3, top_p = 0.5 ) return response['choices'][0]['message']['content'].strip() chatbot = gr.ChatInterface(respond, type="messages") #temperature and top_p controls randomness chatbot.launch()