Spaces:

Tonic1
/

kosmos-2

Running

kosmos-2 / app.py

Create app.py

6e402f7 verified 8 months ago

1.61 kB

	import os
	import requests
	from io import BytesIO

	from PIL import Image
	from transformers import AutoProcessor, AutoModelForVision2Seq

	def generate_caption(image):
	# Load pre-trained models & processors
	model = AutoModelForVision2Seq.from_pretrained("microsoft/kosmos-2-patch14-224")
	processor = AutoProcessor.from_pretrained("microsoft/kosmos-2-patch14-224")

	prompt = "<grounding>An image of"

	# Open the uploaded image file
	img = Image.open(BytesIO(image))

	# Save the image locally and open it again to avoid potential issues with reusing the same PIL object
	img.save("temp_image.jpg")
	img = Image.open("temp_image.jpg")

	inputs = processor(text=prompt, images=img, return_tensors="pt")

	# Generate caption
	generated_ids = model.generate(**inputs, max_new_tokens=128)
	generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]

	# Process the generated caption
	processed_text, _ = processor.post_process_generation(generated_text)

	return processed_text

	import gradio as gr

	title = 'Image Caption Generator'
	description = 'Generate descriptive captions for images.'
	examples = [["https://example.com/image1.jpg"]]
	article = '<p style="margin:auto;max-width:600px;">This tool generates descriptive captions for given images.</p>'

	interface = gr.Interface(fn=generate_caption,
	inputs=gr.inputs.Image(source='upload'),
	outputs=gr.outputs.Textbox(),
	title=title, description=description, examples=examples, article=article)

	interface.launch()