import spaces import gradio as gr import transformers from transformers import AutoTokenizer, AutoModelForCausalLM import torch # Load the tokenizer and model model_id = "CohereForAI/c4ai-command-r-v01" tokenizer = AutoTokenizer.from_pretrained(model_id) model = AutoModelForCausalLM.from_pretrained(model_id) @spaces.GPU def generate_response(user_input, max_new_tokens, temperature): # Format message with the command-r chat template messages = [{"role": "user", "content": user_input}] input_ids = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt") # Generate tokens gen_tokens = model.generate( input_ids['input_ids'], max_length=max_new_tokens + input_ids['input_ids'].shape[1], # Adjusting max_length to account for input length do_sample=True, temperature=temperature, ) # Decode tokens to string gen_text = tokenizer.decode(gen_tokens[0]) return gen_text # Define the Gradio interface iface = gr.Interface( fn=generate_response, inputs=[ gr.inputs.Textbox(lines=2, label="Your Message"), gr.inputs.Slider(minimum=10, maximum=100, default=50, label="Max New Tokens"), gr.inputs.Slider(minimum=0.1, maximum=1.0, step=0.1, default=0.3, label="Temperature") ], outputs=gr.outputs.Textbox(label="Model Response"), title="Text Generation Model Interface", description="This is a Gradio interface for a text generation model. Enter your message and adjust the parameters to generate a response." ) # Launch the application iface.launch()