import gradio as gr
from transformers import pipeline
import spaces
import gradio as gr

# pipeline = pipeline(task="image-classification", model="julien-c/hotdog-not-hotdog")

# @spaces.GPU
# def predict(input_img):
#     predictions = pipeline(input_img)
#     return input_img, {p["label"]: p["score"] for p in predictions} 

# gradio_app = gr.Interface(
#     predict,
#     inputs=gr.Image(label="Select hot dog candidate", sources=['upload', 'webcam'], type="pil"),
#     outputs=[gr.Image(label="Processed Image"), gr.Label(label="Result", num_top_classes=2)],
#     title="Hot Dog? Or Not?",
# ).launch()

from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
device = "cpu" # the device to load the model onto

model = AutoModelForCausalLM.from_pretrained(
    "vilm/VinaLlama2-14B",
    torch_dtype='auto',
    device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained("vilm/VinaLlama2-14B")

@spaces.GPU
def generate_response(input_text):
    prompt = input_text
    messages = [
        {"role": "system", "content": "Bạn là trợ lí AI hữu ích."},
        {"role": "user", "content": prompt}
    ]
    text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True
    )
    model_inputs = tokenizer([text], return_tensors="pt").to(device)

    generated_ids = model.generate(
        model_inputs.input_ids,
        max_new_tokens=1024,
        eos_token_id=tokenizer.eos_token_id,
        temperature=0.25,
    )
    generated_ids = [
        output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
    ]

    response = tokenizer.batch_decode(generated_ids)[0]
    return response

gradio_app = gr.Interface(
    generate_response,
    inputs="text",
    outputs="text",
    title="AI Chatbot",
).launch()