colt12
/

maxcushion

StableDiffusionXLPipeline

stable-diffusion-xl

Inference Endpoints

Model card Files Files and versions Community

maxcushion / app.py

colt12's picture

Create app.py

46e68f0 verified 5 months ago

1.07 kB

	import io
	from PIL import Image
	import torch
	from transformers import AutoProcessor, AutoModelForVisionEncoderDecoder

	# Load the model and processor
	model_name = "colt12/maxcushion"
	processor = AutoProcessor.from_pretrained(model_name)
	model = AutoModelForVisionEncoderDecoder.from_pretrained(model_name)

	def predict(image_bytes):
	# Open the image using PIL
	image = Image.open(io.BytesIO(image_bytes))

	# Preprocess the image
	pixel_values = processor(images=image, return_tensors="pt").pixel_values

	# Generate the caption
	generated_ids = model.generate(pixel_values, max_length=50)
	generated_caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]

	return generated_caption

	def run(raw_image):
	# Input validation
	if not raw_image:
	raise ValueError("No image provided")

	try:
	# Process the image and generate the caption
	result = predict(raw_image)
	return {"caption": result}
	except Exception as e:
	# Error handling
	return {"error": str(e)}