Spaces:

GlitchJesus
/

RecursionLabImageAI

Runtime error

recursionlab

Add vision analyzer app with Florence-2

92a7556 15 days ago

2.64 kB

	import gradio as gr
	from PIL import Image
	import torch
	from transformers import AutoProcessor, AutoModelForCausalLM

	# Load Florence-2 (runs on CPU, free tier compatible)
	model_id = "microsoft/Florence-2-large"
	device = "cuda" if torch.cuda.is_available() else "cpu"
	dtype = torch.float16 if torch.cuda.is_available() else torch.float32

	print(f"Loading model on {device}...")
	model = AutoModelForCausalLM.from_pretrained(
	model_id,
	torch_dtype=dtype,
	trust_remote_code=True
	).to(device)
	processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
	print("Model loaded.")

	def analyze_image(image, prompt):
	if image is None:
	return "No image uploaded."

	if not prompt:
	prompt = "<MORE_DETAILED_CAPTION>"

	inputs = processor(text=prompt, images=image, return_tensors="pt").to(device, dtype)

	with torch.no_grad():
	generated_ids = model.generate(
	input_ids=inputs["input_ids"],
	pixel_values=inputs["pixel_values"],
	max_new_tokens=512,
	do_sample=False
	)

	generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
	parsed = processor.post_process_generation(generated_text, task=prompt, image_size=(image.width, image.height))

	# Return the first value from parsed dict
	if isinstance(parsed, dict):
	return list(parsed.values())[0]
	return str(parsed)

	# Available tasks for Florence-2
	TASKS = [
	"<CAPTION>",
	"<DETAILED_CAPTION>",
	"<MORE_DETAILED_CAPTION>",
	"<OCR>",
	"<OCR_WITH_REGION>",
	"<OBJECT_DETECTION>",
	"<REGION_TO_CATEGORY>",
	"<REGION_TO_DESCRIPTION>",
	]

	with gr.Blocks(title="Vision Analyzer") as demo:
	gr.Markdown("# Image Understanding")
	gr.Markdown("Upload an image and select what you want to extract from it.")

	with gr.Row():
	with gr.Column():
	image_input = gr.Image(type="pil", label="Upload Image")
	task_dropdown = gr.Dropdown(choices=TASKS, value="<MORE_DETAILED_CAPTION>", label="Analysis Type")
	text_prompt = gr.Textbox(label="Or enter custom prompt (overrides dropdown)", placeholder="Describe what you see...", lines=2)
	analyze_btn = gr.Button("Analyze")
	with gr.Column():
	output = gr.Textbox(label="Result", lines=15, show_copy_button=True)

	analyze_btn.click(fn=analyze_image, inputs=[image_input, text_prompt], outputs=output)

	gr.Markdown("---")
	gr.Markdown("Powered by Microsoft Florence-2-large on HuggingFace free tier.")

	demo.launch(server_name="0.0.0.0", server_port=7860)