import gradio as gr from PIL import Image import torch from transformers import AutoProcessor, AutoModelForCausalLM # Load Florence-2 (runs on CPU, free tier compatible) model_id = "microsoft/Florence-2-large" device = "cuda" if torch.cuda.is_available() else "cpu" dtype = torch.float16 if torch.cuda.is_available() else torch.float32 print(f"Loading model on {device}...") model = AutoModelForCausalLM.from_pretrained( model_id, torch_dtype=dtype, trust_remote_code=True ).to(device) processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True) print("Model loaded.") def analyze_image(image, prompt): if image is None: return "No image uploaded." if not prompt: prompt = "" inputs = processor(text=prompt, images=image, return_tensors="pt").to(device, dtype) with torch.no_grad(): generated_ids = model.generate( input_ids=inputs["input_ids"], pixel_values=inputs["pixel_values"], max_new_tokens=512, do_sample=False ) generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0] parsed = processor.post_process_generation(generated_text, task=prompt, image_size=(image.width, image.height)) # Return the first value from parsed dict if isinstance(parsed, dict): return list(parsed.values())[0] return str(parsed) # Available tasks for Florence-2 TASKS = [ "", "", "", "", "", "", "", "", ] with gr.Blocks(title="Vision Analyzer") as demo: gr.Markdown("# Image Understanding") gr.Markdown("Upload an image and select what you want to extract from it.") with gr.Row(): with gr.Column(): image_input = gr.Image(type="pil", label="Upload Image") task_dropdown = gr.Dropdown(choices=TASKS, value="", label="Analysis Type") text_prompt = gr.Textbox(label="Or enter custom prompt (overrides dropdown)", placeholder="Describe what you see...", lines=2) analyze_btn = gr.Button("Analyze") with gr.Column(): output = gr.Textbox(label="Result", lines=15, show_copy_button=True) analyze_btn.click(fn=analyze_image, inputs=[image_input, text_prompt], outputs=output) gr.Markdown("---") gr.Markdown("Powered by Microsoft Florence-2-large on HuggingFace free tier.") demo.launch(server_name="0.0.0.0", server_port=7860)