import requests
from PIL import Image
import gradio as gr
from transformers import ViTFeatureExtractor, pipeline

model_name = "google/vit-base-patch16-224"
classifier = pipeline("image-classification", model=model_name, feature_extractor=ViTFeatureExtractor.from_pretrained(model_name))

# Step 3: Set up the Gradio interface
image_input = gr.inputs.Image(shape=(224, 224), label="Upload an image")
label_output = gr.outputs.Label(num_top_classes=3, label="Predicted labels")

# Step 4: Create a function that receives an image input, calls the Hugging Face Inference API, and returns the prediction
def classify_image(image):
    image = Image.fromarray(image.astype('uint8'), 'RGB')
    prediction = classifier(image)
    formatted_predictions = {pred['label']: pred['score'] for pred in prediction}
    return formatted_predictions

# Step 5: Run the Gradio interface
gr.Interface(fn=classify_image, inputs=image_input, outputs=label_output, capture_session=True).launch()