|
import gradio as gr |
|
from PIL import Image,ImageDraw, ImageFont |
|
import sys |
|
import os |
|
model = os.environ.get('MODEL') |
|
import torch |
|
torch.hub.download_url_to_file(model, 'model.pt') |
|
|
|
from util import Detection, classes |
|
sys.path.append("./") |
|
sys.path.append("./yolov5") |
|
from yolov5.detect import predict, load_yolo_model |
|
|
|
|
|
|
|
model, stride, names, pt, jit, onnx, engine = load_yolo_model("model.pt") |
|
|
|
def run_yolo(img): |
|
|
|
img0 = Image.open(img.name).convert("RGB") |
|
draw = ImageDraw.Draw(img0) |
|
|
|
predictions = predict(model, stride, names, pt, jit, onnx, engine, imgsz=[1280, 1280], conf_thres=0.5, iou_thres=0.3, save_conf=True, |
|
exist_ok=True, save_txt=False, source=img.name, project=None, name=None) |
|
|
|
detections : list[Detection] = [] |
|
for k, (bboxes, img) in enumerate(predictions): |
|
|
|
|
|
|
|
for i, bbox in enumerate(bboxes): |
|
det = Detection( |
|
(k+1)*(i+1), |
|
bbox["xmin"], |
|
bbox["ymin"], |
|
bbox["xmax"], |
|
bbox["ymax"], |
|
bbox["conf"], |
|
bbox["class"], |
|
classes[int(bbox["class"])], |
|
img0.size |
|
) |
|
same = list(filter(lambda x: x.xmin == det.xmin and x.ymin == det.ymin or ( det.xmin > x.xmin and det.ymin > x.ymin and det.xmax < x.xmax and det.ymax < x.ymax ) or ( det.xmin < x.xmin and det.ymin < x.ymin and det.xmax > x.xmax and det.ymax > x.ymax ) or Detection.get_iou(det, x) > 0.6, detections)) |
|
|
|
if len(same) == 0: |
|
detections.append(det) |
|
draw.rectangle(((det.xmin, det.ymin), (det.xmax, det.ymax)), fill=None, outline=(255,255,255)) |
|
draw.rectangle(((det.xmin, det.ymin - 10), (det.xmax, det.ymin)), fill=(255,255,255)) |
|
draw.text((det.xmin, det.ymin - 10), det.class_name, fill=(0,0,0), font=ImageFont.truetype("Roboto-Regular.ttf")) |
|
|
|
return img0 |
|
|
|
|
|
inputs = gr.inputs.Image(type='file', label="Original Image") |
|
outputs = gr.outputs.Image(type="pil", label="Output Image") |
|
|
|
title = "Letter Detection" |
|
description = "Object Detection-based OCR. Upload an image or click an example image to use." |
|
article = "<p style='text-align: center'>This is a character-level OCR trained on: <ul><li>Screenshots</li><li>Random photos taken from smartphone</li><li>Synthetic images</li><li>Receipts</li></p>" |
|
|
|
examples = [['images/cover1.jpg'], ['images/scontrino1.jpg'], ['images/gradientgpu.jpg'], ['images/sugar.jpg'], ['images/google1.jpg'], ] |
|
|
|
gr.Interface(run_yolo, inputs, outputs, title=title, description=description, article=article, examples=examples, theme="huggingface").launch(enable_queue=True) |