import os import requests requests.adapters.DEFAULT_TIMEOUT = 60 from dotenv import load_dotenv, find_dotenv _ = load_dotenv(find_dotenv()) # read local .env file hf_api_key = os.environ['HF_API_KEY'] # Helper function import requests, json from text_generation import Client #FalcomLM-instruct endpoint on the text_generation library URL = "https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-beta" client = Client(URL, timeout=120) #Back to Lesson 2, time flies! import gradio as gr def generate(input, slider): output = client.generate(input, max_new_tokens=slider).generated_text return output def format_chat_prompt(message, chat_history, instruction): prompt = f"System:{instruction}" for turn in chat_history: user_message, bot_message = turn prompt = f"{prompt}\nUser: {user_message}\nAssistant: {bot_message}" prompt = f"{prompt}\nUser: {message}\nAssistant:" return prompt def respond(message, chat_history, instruction, temperature=0.7): prompt = format_chat_prompt(message, chat_history, instruction) chat_history = chat_history + [[message, ""]] stream = client.generate_stream(prompt, max_new_tokens=1024, stop_sequences=["\nUser:", "<|endoftext|>"], temperature=temperature) #stop_sequences to not generate the user answer acc_text = "" #Streaming the tokens for idx, response in enumerate(stream): text_token = response.token.text if response.details: return if idx == 0 and text_token.startswith(" "): text_token = text_token[1:] acc_text += text_token last_turn = list(chat_history.pop(-1)) last_turn[-1] += acc_text chat_history = chat_history + [last_turn] yield "", chat_history acc_text = "" def loadGUI(): with gr.Blocks() as demo: chatbot = gr.Chatbot(height=240) #just to fit the notebook msg = gr.Textbox(label="Prompt") with gr.Accordion(label="Advanced options",open=False): system = gr.Textbox(label="System message", lines=2, value="A conversation between a user and an LLM-based AI assistant. The assistant gives helpful and honest answers.") temperature = gr.Slider(label="temperature", minimum=0.1, maximum=1, value=0.7, step=0.1) btn = gr.Button("Submit") clear = gr.ClearButton(components=[msg, chatbot], value="Clear console") btn.click(respond, inputs=[msg, chatbot, system], outputs=[msg, chatbot]) msg.submit(respond, inputs=[msg, chatbot, system], outputs=[msg, chatbot]) #Press enter to submit gr.close_all() demo.queue().launch(share=True) def main(): loadGUI() if __name__ == "__main__": main()