Spaces:
Running
Running
File size: 4,072 Bytes
47a8e90 289bee5 47a8e90 289bee5 47a8e90 289bee5 47a8e90 289bee5 47a8e90 c3538d9 47a8e90 344bbb0 47a8e90 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 |
import torch
import gradio as gr
import json
from torchvision import transforms
from PIL import Image, ImageDraw, ImageFont
TORCHSCRIPT_PATH = "res/screenrecognition-web350k-vins.torchscript"
LABELS_PATH = "res/class_map_vins_manual.json"
model = torch.jit.load(TORCHSCRIPT_PATH)
with open(LABELS_PATH, "r") as f:
idx2Label = json.load(f)["idx2Label"]
img_transforms = transforms.ToTensor()
# inter_class_nms and iou functions implemented by GPT
def inter_class_nms(boxes, scores, iou_threshold=0.5):
# Convert boxes and scores to torch tensors if they are not already
boxes = torch.as_tensor(boxes)
scores, class_indices = scores.max(dim=1)
# Keep track of final boxes and scores
final_boxes = []
final_scores = []
final_class_indices = []
for class_index in range(scores.shape[1]):
# Filter boxes and scores for the current class
class_scores = scores[:, class_index]
class_boxes = boxes
# Indices of boxes sorted by score (highest first)
sorted_indices = torch.argsort(class_scores, descending=True)
while len(sorted_indices) > 0:
# Take the box with the highest score
highest_index = sorted_indices[0]
highest_box = class_boxes[highest_index]
# Add the highest box and score to the final list
final_boxes.append(highest_box)
final_scores.append(class_scores[highest_index])
final_class_indices.append(class_index)
# Remove the highest box from the list
sorted_indices = sorted_indices[1:]
# Compute IoU of the highest box with the rest
ious = iou(class_boxes[sorted_indices], highest_box)
# Keep only boxes with IoU less than the threshold
sorted_indices = sorted_indices[ious < iou_threshold]
return {'boxes': final_boxes, 'scores': final_scores}
def iou(boxes1, boxes2):
"""
Compute the Intersection over Union (IoU) of two sets of boxes.
Args:
- boxes1 (Tensor[N, 4]): ground truth boxes
- boxes2 (Tensor[M, 4]): predicted boxes
Returns:
- iou (Tensor[N, M]): the NxM matrix containing the pairwise IoU values for every element in boxes1 and boxes2
"""
area1 = (boxes1[:, 2] - boxes1[:, 0]) * (boxes1[:, 3] - boxes1[:, 1])
area2 = (boxes2[:, 2] - boxes2[:, 0]) * (boxes2[:, 3] - boxes2[:, 1])
lt = torch.max(boxes1[:, None, :2], boxes2[:, :2]) # [N,M,2]
rb = torch.min(boxes1[:, None, 2:], boxes2[:, 2:]) # [N,M,2]
wh = (rb - lt).clamp(min=0) # [N,M,2]
inter = wh[:, :, 0] * wh[:, :, 1] # [N,M]
iou = inter / (area1[:, None] + area2 - inter)
return iou
def predict(img, conf_thresh=0.4):
img_input = [img_transforms(img)]
_, pred = model(img_input)
pred = inter_class_nms(pred['boxes'], pred['scores'])
out_img = img.copy()
draw = ImageDraw.Draw(out_img)
font = ImageFont.truetype("res/Tuffy_Bold.ttf", 25)
for i in range(len(pred[0]['boxes'])):
conf_score = pred[0]['scores'][i]
if conf_score > conf_thresh:
x1, y1, x2, y2 = pred[0]['boxes'][i]
x1 = int(x1)
y1 = int(y1)
x2 = int(x2)
y2 = int(y2)
draw.rectangle([x1, y1, x2, y2], outline='red', width=3)
text = idx2Label[str(int(pred[0]['labels'][i]))] + " {:.2f}".format(float(conf_score))
bbox = draw.textbbox((x1, y1), text, font=font)
draw.rectangle(bbox, fill="red")
draw.text((x1, y1), text, font=font, fill="black")
return out_img
example_imgs = [
["res/example.jpg", 0.4],
["res/screenlane-snapchat-profile.jpg", 0.4],
["res/screenlane-snapchat-settings.jpg", 0.4],
["res/example_pair1.jpg", 0.4],
["res/example_pair2.jpg", 0.4],
]
interface = gr.Interface(fn=predict, inputs=[gr.Image(type="pil", label="Screenshot"), gr.Slider(0.0, 1.0, step=0.1, value=0.4)], outputs=gr.Image(type="pil", label="Annotated Screenshot").style(height=600), examples=example_imgs)
interface.launch()
|