import os import warnings import gradio as gr import numpy as np from PIL import Image from lang_efficient_sam.LangEfficientSAM import LangEfficientSAM from lang_efficient_sam.utils.draw_image import draw_image warnings.filterwarnings("ignore") model = LangEfficientSAM() def predict(box_threshold, text_threshold, image_path, text_prompt): print("Predicting... ", box_threshold, text_threshold, image_path, text_prompt) image_pil = Image.open(image_path).convert("RGB") masks, boxes, phrases, logits = model.predict(image_pil, text_prompt, box_threshold, text_threshold) labels = [f"{phrase} {logit:.2f}" for phrase, logit in zip(phrases, logits)] image_array = np.asarray(image_pil) image = draw_image(image_array, masks, boxes, labels) image = Image.fromarray(np.uint8(image)).convert("RGB") return image title = "LangEfficientSAM" inputs = [ gr.Slider(0, 1, value=0.3, label="Box threshold"), gr.Slider(0, 1, value=0.25, label="Text threshold"), gr.Image(type="filepath", label='Image'), gr.Textbox(lines=1, label="Text Prompt"), ] outputs = [gr.Image(type="pil", label="Output Image")] examples = [ [ 0.20, 0.20, os.path.join(os.path.dirname(__file__), "images", "living.jpg"), "fabric", ], [ 0.36, 0.25, os.path.join(os.path.dirname(__file__), "images", "fruits.jpg"), "apple", ], [ 0.20, 0.20, os.path.join(os.path.dirname(__file__), "images", "street.jpg"), "car", ] ] demo = gr.Interface(fn=predict, inputs=inputs, outputs=outputs, examples=examples, title=title) demo.launch(debug=False, share=False)