import spaces
import gradio as gr
import transformers
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

# Load the tokenizer and model
model_id = "CohereForAI/c4ai-command-r-v01"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id)

@spaces.GPU
def generate_response(user_input, max_new_tokens, temperature):
    # Format message with the command-r chat template
    messages = [{"role": "user", "content": user_input}]
    input_ids = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt")

    # Generate tokens
    gen_tokens = model.generate(
        input_ids['input_ids'], 
        max_length=max_new_tokens + input_ids['input_ids'].shape[1],  # Adjusting max_length to account for input length
        do_sample=True, 
        temperature=temperature,
    )

    # Decode tokens to string
    gen_text = tokenizer.decode(gen_tokens[0])
    return gen_text

# Define the Gradio interface
iface = gr.Interface(
    fn=generate_response,
    inputs=[
        gr.inputs.Textbox(lines=2, label="Your Message"),
        gr.inputs.Slider(minimum=10, maximum=100, default=50, label="Max New Tokens"),
        gr.inputs.Slider(minimum=0.1, maximum=1.0, step=0.1, default=0.3, label="Temperature")
    ],
    outputs=gr.outputs.Textbox(label="Model Response"),
    title="Text Generation Model Interface",
    description="This is a Gradio interface for a text generation model. Enter your message and adjust the parameters to generate a response."
)

# Launch the application
iface.launch()