import gradio as gr from transformers import AutoModelForCausalLM, AutoProcessor from PIL import Image, ImageDraw import requests import matplotlib.pyplot as plt import matplotlib.patches as patches import numpy as np import random # Load model and processor model_id = 'microsoft/Florence-2-large' model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True).eval() processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True) def run_example(task_prompt, image, text_input=None): prompt = task_prompt if text_input is None else task_prompt + text_input inputs = processor(text=prompt, images=image, return_tensors="pt") generated_ids = model.generate( input_ids=inputs["input_ids"], pixel_values=inputs["pixel_values"], max_new_tokens=1024, early_stopping=False, do_sample=False, num_beams=3, ) generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0] parsed_answer = processor.post_process_generation( generated_text, task=task_prompt, image_size=(image.width, image.height) ) return parsed_answer def plot_bbox(image, data): fig, ax = plt.subplots() ax.imshow(image) for bbox, label in zip(data['bboxes'], data['labels']): x1, y1, x2, y2 = bbox rect = patches.Rectangle((x1, y1), x2-x1, y2-y1, linewidth=1, edgecolor='r', facecolor='none') ax.add_patch(rect) plt.text(x1, y1, label, color='white', fontsize=8, bbox=dict(facecolor='red', alpha=0.5)) plt.axis('off') plt.show() def draw_polygons(image, prediction, fill_mask=False): draw = ImageDraw.Draw(image) colormap = ['blue', 'orange', 'green', 'purple', 'brown', 'pink', 'gray', 'olive', 'cyan', 'red'] for polygons, label in zip(prediction['polygons'], prediction['labels']): color = random.choice(colormap) fill_color = color if fill_mask else None for polygon in polygons: draw.polygon(polygon, outline=color, fill=fill_color) draw.text((polygon[0][0], polygon[0][1]), label, fill=color) image.show() def gradio_interface(image, task_prompt, text_input): result = run_example(task_prompt, image, text_input) if task_prompt in ['', '']: plot_bbox(image, result) elif task_prompt in ['', '']: draw_polygons(image, result, fill_mask=True) return result with gr.Blocks() as demo: gr.Markdown("## Florence Model Advanced Tasks") with gr.Row(): image_input = gr.Image(type="pil") task_input = gr.Dropdown(label="Select Task", choices=[ '', '', '', '', '', '', '', '', '', '', '', '', '', '' ]) text_input = gr.Textbox(label="Optional Text Input", placeholder="Enter text here if required by the task") submit_btn = gr.Button("Run Task") output = gr.Textbox(label="Output") submit_btn.click(fn=gradio_interface, inputs=[image_input, task_input, text_input], outputs=output) demo.launch()