import gradio as gr from transformers import pipeline, AutoTokenizer def load_model(model_name): return pipeline("text-generation", model=model_name, device="cpu") def generate( model_name, template_name, user_input, temperature=0.4, top_p=0.95, top_k=50, max_new_tokens=256, ): pipe = load_model(model_name) # Need to add additional options later. if template_name == "Falcon 1B Template": message_template = [ {"role": "user", "content": "Hello!"}, {"role": "assistant", "content": "Hello! How can I assist you today?"}, {"role": "user", "content": user_input}, ] else: # Default to "TinyLlama Template" message_template = [ { "role": "system", "content": "You are a highly knowledgeable and friendly chatbot equipped with extensive information across various domains. Your goal is to understand and respond to user inquiries with accuracy and clarity. You're adept at providing detailed explanations, concise summaries, and insightful responses. Your interactions are always respectful, helpful, and focused on delivering the most relevant information to the user.", }, {"role": "user", "content": user_input}, ] # Set tokenize correctly. Otherwise ticking the box breaks it. prompt = pipe.tokenizer.apply_chat_template(message_template, tokenize=False, add_generation_prompt=True) outputs = pipe(prompt, max_new_tokens=max_new_tokens, do_sample=True, temperature=temperature, top_k=top_k, top_p=top_p, repetition_penalty=1.10) return outputs[0]["generated_text"] model_choices = ["TinyLlama/TinyLlama-1.1B-Chat-v1.0", "ericzzz/falcon-rw-1b-chat"] template_choices = ["TinyLlama Template", "Falcon Template"] # What at the best options? g = gr.Interface( fn=generate, inputs=[ gr.components.Dropdown(choices=model_choices, label="Model", value=model_choices[0], interactive=True), gr.components.Dropdown(choices=template_choices, label="Template", value=template_choices[0], interactive=True), gr.components.Textbox(lines=2, label="Prompt", value="How many planets are in our solar system?"), gr.components.Slider(minimum=0, maximum=1, value=0.4, label="Temperature"), gr.components.Slider(minimum=0, maximum=1, value=0.95, label="Top p"), gr.components.Slider(minimum=0, maximum=100, step=1, value=50, label="Top k"), gr.components.Slider(minimum=1, maximum=1024, step=1, value=256, label="Max tokens"), ], outputs=[gr.Textbox(lines=10, label="Output")], title="Hugging Face Transformers Model", description="A simple interface for generating text with a Hugging Face Transformers model.", concurrency_limit=1 ) g.launch(max_threads=2)