|
|
|
"""YOLOS minimal inference example.ipynb |
|
|
|
Automatically generated by Colaboratory. |
|
|
|
Original file is located at |
|
https://colab.research.google.com/github/NielsRogge/Transformers-Tutorials/blob/master/YOLOS/YOLOS_minimal_inference_example.ipynb |
|
|
|
## Set-up environment |
|
|
|
First, we install the HuggingFace Transformers library (from source for now, as the model was just added to the library and not yet included in a new PyPi release). |
|
""" |
|
import gradio as gr |
|
from gradio.mix import Series |
|
from PIL import Image |
|
import requests |
|
from transformers import AutoFeatureExtractor, YolosForObjectDetection |
|
import torch |
|
import matplotlib.pyplot as plt |
|
import cv2 |
|
|
|
import os |
|
os.getcwd() |
|
|
|
|
|
COLORS = [[0.000, 0.447, 0.741], [0.850, 0.325, 0.098], [0.929, 0.694, 0.125], |
|
[0.494, 0.184, 0.556], [0.466, 0.674, 0.188], [0.301, 0.745, 0.933]] |
|
|
|
def plot_results(pil_img, prob, boxes, count): |
|
plt.figure(figsize=(16,10)) |
|
plt.imshow(pil_img) |
|
ax = plt.gca() |
|
colors = COLORS * 100 |
|
for p, (xmin, ymin, xmax, ymax), c in zip(prob, boxes.tolist(), colors): |
|
ax.add_patch(plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, |
|
fill=False, color=c, linewidth=3)) |
|
cl = p.argmax() |
|
text = f'{model.config.id2label[cl.item()]}: {p[cl]:0.2f}' |
|
ax.text(xmin, ymin, text, fontsize=15, |
|
bbox=dict(facecolor='yellow', alpha=0.5)) |
|
plt.axis('off') |
|
if count < 10: |
|
plt.savefig('exp2/frame0%d.png' % count) |
|
else: plt.savefig('exp2/frame%d.png' % count) |
|
|
|
model = YolosForObjectDetection.from_pretrained("hustvl/yolos-small") |
|
vidcap = cv2.VideoCapture('/content/2022-08-10_ППП-стоянки_кам-3_191356 (online-video-cutter.com).mp4') |
|
success,image = vidcap.read() |
|
count = 0 |
|
|
|
feature_extractor = AutoFeatureExtractor.from_pretrained("hustvl/yolos-small") |
|
|
|
while success: |
|
success,image = vidcap.read() |
|
count += 1 |
|
|
|
if count%10 == 0: |
|
image = Image.fromarray(image) |
|
pixel_values = feature_extractor(image, return_tensors="pt").pixel_values |
|
|
|
with torch.no_grad(): |
|
outputs = model(pixel_values, output_attentions=True) |
|
|
|
|
|
probas = outputs.logits.softmax(-1)[0, :, :-1] |
|
keep = probas.max(-1).values > 0.8 |
|
|
|
|
|
target_sizes = torch.tensor(image.size[::-1]).unsqueeze(0) |
|
postprocessed_outputs = feature_extractor.post_process(outputs, target_sizes) |
|
bboxes_scaled = postprocessed_outputs[0]['boxes'] |
|
plot_results(image, probas[keep], bboxes_scaled[keep], count) |
|
|
|
print('Process a new frame: ', success) |
|
|
|
"""Set model and directory parameters: |
|
|
|
Perform sliced inference on given folder: |
|
""" |
|
|
|
image_folder = '/content/exp2' |
|
file_list = os.listdir(image_folder) |
|
|
|
|
|
def last_2chars(x): |
|
return(x[5:7]) |
|
|
|
srtd = sorted(file_list, key = last_2chars) |
|
|
|
video_name = 'video.avi' |
|
|
|
images = [img for img in srtd if img.endswith(".png")] |
|
frame = cv2.imread(os.path.join(image_folder, images[0])) |
|
height, width, layers = frame.shape |
|
|
|
video = cv2.VideoWriter(video_name, 0, 5, (width,height)) |
|
|
|
for image in images: |
|
video.write(cv2.imread(os.path.join(image_folder, image))) |
|
|
|
cv2.destroyAllWindows() |
|
video.release() |