Spaces:

isana25
/

image_classification

Sleeping

App Files Files Community

image_classification / app.py

isana25

Create app.py

6c6a63c verified 3 months ago

raw

history blame contribute delete

1.67 kB

	import gradio as gr
	from transformers import VisionEncoderDecoderModel, ViTImageProcessor, AutoTokenizer
	from PIL import Image
	import torch

	# Load the pre-trained model, processor, and tokenizer
	model = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
	feature_extractor = ViTImageProcessor.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
	tokenizer = AutoTokenizer.from_pretrained("nlpconnect/vit-gpt2-image-captioning")

	# Set the device to GPU if available
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	model.to(device)

	# Define generation parameters
	max_length = 16
	num_beams = 4

	# Function to generate caption from image
	def generate_caption(image):
	if image is None:
	return "Please upload an image."
	# Convert image to RGB if it's not
	if image.mode != "RGB":
	image = image.convert(mode="RGB")
	# Preprocess the image
	pixel_values = feature_extractor(images=image, return_tensors="pt").pixel_values
	pixel_values = pixel_values.to(device)
	# Generate caption
	output_ids = model.generate(pixel_values, max_length=max_length, num_beams=num_beams)
	caption = tokenizer.decode(output_ids[0], skip_special_tokens=True)
	return caption.strip()

	# Create Gradio interface
	iface = gr.Interface(
	fn=generate_caption,
	inputs=gr.Image(type="pil", label="Upload an Image"),
	outputs=gr.Textbox(label="Generated Caption"),
	title="🖼️ AI Image Caption Generator",
	description="Upload an image, and the AI will generate a descriptive caption for it.",
	allow_flagging="never"
	)

	# Launch the app
	if __name__ == "__main__":
	iface.launch()