Spaces:
Sleeping
Sleeping
import gradio as gr | |
from transformers import AutoModelForCausalLM, AutoProcessor | |
from PIL import Image, ImageDraw | |
import requests | |
import matplotlib.pyplot as plt | |
import matplotlib.patches as patches | |
import numpy as np | |
import random | |
# Load model and processor | |
model_id = 'microsoft/Florence-2-large' | |
model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True).eval() | |
processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True) | |
def run_example(task_prompt, image, text_input=None): | |
prompt = task_prompt if text_input is None else task_prompt + text_input | |
inputs = processor(text=prompt, images=image, return_tensors="pt") | |
generated_ids = model.generate( | |
input_ids=inputs["input_ids"], | |
pixel_values=inputs["pixel_values"], | |
max_new_tokens=1024, | |
early_stopping=False, | |
do_sample=False, | |
num_beams=3, | |
) | |
generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0] | |
parsed_answer = processor.post_process_generation( | |
generated_text, | |
task=task_prompt, | |
image_size=(image.width, image.height) | |
) | |
return parsed_answer | |
def plot_bbox(image, data): | |
fig, ax = plt.subplots() | |
ax.imshow(image) | |
for bbox, label in zip(data['bboxes'], data['labels']): | |
x1, y1, x2, y2 = bbox | |
rect = patches.Rectangle((x1, y1), x2-x1, y2-y1, linewidth=1, edgecolor='r', facecolor='none') | |
ax.add_patch(rect) | |
plt.text(x1, y1, label, color='white', fontsize=8, bbox=dict(facecolor='red', alpha=0.5)) | |
plt.axis('off') | |
plt.show() | |
def draw_polygons(image, prediction, fill_mask=False): | |
draw = ImageDraw.Draw(image) | |
colormap = ['blue', 'orange', 'green', 'purple', 'brown', 'pink', 'gray', 'olive', 'cyan', 'red'] | |
for polygons, label in zip(prediction['polygons'], prediction['labels']): | |
color = random.choice(colormap) | |
fill_color = color if fill_mask else None | |
for polygon in polygons: | |
draw.polygon(polygon, outline=color, fill=fill_color) | |
draw.text((polygon[0][0], polygon[0][1]), label, fill=color) | |
image.show() | |
def gradio_interface(image, task_prompt, text_input): | |
result = run_example(task_prompt, image, text_input) | |
if task_prompt in ['<OD>', '<OPEN_VOCABULARY_DETECTION>']: | |
plot_bbox(image, result) | |
elif task_prompt in ['<REFERRING_EXPRESSION_SEGMENTATION>', '<REGION_TO_SEGMENTATION>']: | |
draw_polygons(image, result, fill_mask=True) | |
return result | |
with gr.Blocks() as demo: | |
gr.Markdown("## Florence Model Advanced Tasks") | |
with gr.Row(): | |
image_input = gr.Image(type="pil") | |
task_input = gr.Dropdown(label="Select Task", choices=[ | |
'<CAPTION>', '<DETAILED_CAPTION>', '<MORE_DETAILED_CAPTION>', | |
'<OD>', '<DENSE_REGION_CAPTION>', '<REGION_PROPOSAL>', | |
'<CAPTION_TO_PHRASE_GROUNDING>', '<REFERRING_EXPRESSION_SEGMENTATION>', | |
'<REGION_TO_SEGMENTATION>', '<OPEN_VOCABULARY_DETECTION>', | |
'<REGION_TO_CATEGORY>', '<REGION_TO_DESCRIPTION>', '<OCR>', '<OCR_WITH_REGION>' | |
]) | |
text_input = gr.Textbox(label="Optional Text Input", placeholder="Enter text here if required by the task") | |
submit_btn = gr.Button("Run Task") | |
output = gr.Textbox(label="Output") | |
submit_btn.click(fn=gradio_interface, inputs=[image_input, task_input, text_input], outputs=output) | |
demo.launch() |