import gradio as gr from transformers import pipeline, AutoTokenizer # Define examples and model configuration examples = [ "Give me a recipe for pizza with pineapple", "Write me a tweet about the new OpenVINO release", "Explain the difference between CPU and GPU", "Give five ideas for a great weekend with family", "Do Androids dream of Electric sheep?", "Who is Dolly?", "Please give me advice on how to write resume?", "Name 3 advantages to being a cat", "Write instructions on how to become a good AI engineer", "Write a love letter to my best friend", ] # Define the model and its tokenizer model_name = "susnato/phi-2" # Replace with your actual model identifier tokenizer = AutoTokenizer.from_pretrained(model_name) generator = pipeline("text-generation", model=model_name, tokenizer=tokenizer) def run_generation(user_text, top_p, temperature, top_k, max_new_tokens, performance): prompt = f"Instruct:{user_text}\nOutput:" response = generator(prompt, max_length=max_new_tokens, top_p=top_p, temperature=temperature, top_k=top_k)[0]["generated_text"] return response, "N/A" # Replace "N/A" with actual performance metrics if available def reset_textbox(*args): return "", "", "" def main(): with gr.Blocks() as demo: gr.Markdown( "# Question Answering with OpenVINO\n" "Provide instruction which describes a task below or select among predefined examples and model writes response that performs requested task." ) with gr.Row(): with gr.Column(scale=4): user_text = gr.Textbox( placeholder="Write an email about an alpaca that likes flan", label="User instruction", ) model_output = gr.Textbox(label="Model response", interactive=False) performance = gr.Textbox(label="Performance", lines=1, interactive=False) with gr.Column(scale=1): button_clear = gr.Button(value="Clear") button_submit = gr.Button(value="Submit") gr.Examples(examples, user_text) with gr.Column(scale=1): max_new_tokens = gr.Slider( minimum=1, maximum=1000, value=256, step=1, interactive=True, label="Max New Tokens", ) top_p = gr.Slider( minimum=0.05, maximum=1.0, value=0.92, step=0.05, interactive=True, label="Top-p (nucleus sampling)", ) top_k = gr.Slider( minimum=0, maximum=50, value=0, step=1, interactive=True, label="Top-k", ) temperature = gr.Slider( minimum=0.1, maximum=5.0, value=0.8, step=0.1, interactive=True, label="Temperature", ) user_text.submit( run_generation, [user_text, top_p, temperature, top_k, max_new_tokens, performance], [model_output, performance], ) button_submit.click( run_generation, [user_text, top_p, temperature, top_k, max_new_tokens, performance], [model_output, performance], ) button_clear.click( reset_textbox, [user_text, model_output, performance], [user_text, model_output, performance], ) return demo if __name__ == "__main__": iface = main() iface.launch(share=True)