import gradio as gr
import spaces
import torch
torch.jit.script = lambda f: f  # Avoid script error in lambda

from t2v_metrics import VQAScore, list_all_vqascore_models

# Global model variable, but do not initialize or move to CUDA here
model_pipe = VQAScore(model="clip-flant5-xl", device="cuda")  # our recommended scoring model

@spaces.GPU(duration = 20)
def generate(model_name, image, text):
    print(list_all_vqascore_models())  # Debug: List available models
    print("Image:", image)  # Debug: Print image path
    print("Text:", text)  # Debug: Print text input
    print("Generating!")
    # Wrap the model call in a try-except block to capture and debug CUDA errors
    try:
        result = model_pipe(images=[image], texts=[text])  # Perform the model inference
    except RuntimeError as e:
        print(f"RuntimeError during model inference: {e}")
        raise e
    
    return result  # Return the result

demo = gr.Interface(
    fn=generate,  # function to call
    inputs=[gr.Dropdown(["clip-flant5-xl", "clip-flant5-xxl"], label="Model Name"), gr.Image(type="filepath"), gr.Textbox(label="Prompt")],  # define the types of inputs
    outputs="number",  # define the type of output
    title="VQAScore",  # title of the app
    description="This model evaluates the similarity between an image and a text prompt."
)

demo.launch()