andrewgleave commited on
Commit
30cc27c
1 Parent(s): 6cb8901

Use OpenCV

Browse files
Files changed (2) hide show
  1. app.py +16 -48
  2. requirements.txt +1 -0
app.py CHANGED
@@ -5,68 +5,38 @@ import time
5
 
6
  import torch
7
  import gradio as gr
8
- import matplotlib
9
- import matplotlib.pyplot as plt
10
- from PIL import Image
11
 
12
  from transformers import AutoFeatureExtractor, AutoModelForObjectDetection
13
 
14
  extractor = AutoFeatureExtractor.from_pretrained("hustvl/yolos-tiny")
15
  model = AutoModelForObjectDetection.from_pretrained("hustvl/yolos-tiny")
16
 
17
- matplotlib.pyplot.switch_backend('Agg')
18
-
19
- COLORS = [
20
- [0.000, 0.447, 0.741],
21
- # [0.850, 0.325, 0.098],
22
- # [0.929, 0.694, 0.125],
23
- # [0.494, 0.184, 0.556],
24
- # [0.466, 0.674, 0.188],
25
- # [0.301, 0.745, 0.933]
26
- ]
27
-
28
  PRED_THRESHOLD = 0.90
29
 
30
- def fig2img(fig):
31
- buf = io.BytesIO()
32
- fig.savefig(buf)
33
- buf.seek(0)
34
- img = Image.open(buf)
35
- return img
36
-
37
- def composite_predictions(img, processed_predictions):
38
- keep = processed_predictions["labels"] == 1 # only interested in people
39
- boxes = processed_predictions["boxes"][keep].tolist()
40
- scores = processed_predictions["scores"][keep].tolist()
41
- labels = processed_predictions["labels"][keep].tolist()
42
-
43
- labels = [model.config.id2label[x] for x in labels]
44
 
45
- plt.figure(figsize=(16, 10))
46
- plt.imshow(img)
47
- axis = plt.gca()
48
- label_counts = defaultdict(int)
49
- for score, (xmin, ymin, xmax, ymax), label, color in zip(scores, boxes, labels, COLORS * len(boxes)):
50
- axis.add_patch(plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, fill=False, color=color, linewidth=3))
51
- axis.text(xmin, ymin, f"{label}: {score:0.2f}", fontsize=15, bbox=dict(facecolor="yellow", alpha=0.5))
52
- label_counts[label] += 1
53
- plt.axis("off")
54
- img = fig2img(plt.gcf())
55
- matplotlib.pyplot.close()
56
- return img, label_counts, datetime.datetime.now()
57
 
58
  def process(img):
59
- #time.sleep(5)
60
  inputs = extractor(images=img, return_tensors="pt")
61
  outputs = model(**inputs)
62
- img_size = torch.tensor([tuple(reversed(img.size))])
 
63
  processed = extractor.post_process_object_detection(outputs, PRED_THRESHOLD, img_size)
64
 
65
  # Composite image and prediction bounding boxes + labels prediction
66
  return composite_predictions(img, processed[0])
67
 
68
 
69
-
70
  with gr.Blocks() as demo:
71
  stream = gr.State()
72
  with gr.Row():
@@ -75,11 +45,9 @@ with gr.Blocks() as demo:
75
  attendance_label = gr.Label(label="Current Attendance")
76
  with gr.Row():
77
  with gr.Column(scale=1, min_width=600):
78
- webcam = gr.Webcam(streaming=True, type='pil')
79
- output = gr.Image(label="Composite")
80
- webcam.stream(process, [webcam], [output, attendance_label, last_refresh_box])
81
- #webcam.change(process, inputs=[], outputs=[output, gr.Label(label="Current Attendance"), last_refresh_box], max_batch_size=10, batch=True)
82
- #demo.load(lambda: datetime.datetime.now(), None, last_refresh_box, every=10)
83
 
84
  if __name__ == "__main__":
85
  demo.queue().launch()
 
5
 
6
  import torch
7
  import gradio as gr
8
+ import cv2
 
 
9
 
10
  from transformers import AutoFeatureExtractor, AutoModelForObjectDetection
11
 
12
  extractor = AutoFeatureExtractor.from_pretrained("hustvl/yolos-tiny")
13
  model = AutoModelForObjectDetection.from_pretrained("hustvl/yolos-tiny")
14
 
15
+ BBOX_COLOR = [255, 0, 0]
 
 
 
 
 
 
 
 
 
 
16
  PRED_THRESHOLD = 0.90
17
 
18
+ def composite_predictions(img, processed_predictions, show_video=False):
19
+ interested_labels = processed_predictions["labels"] == 1 # only interested in people
20
+ scores = processed_predictions["scores"][interested_labels].tolist()
21
+ boxes = [[int(j) for j in x] for x in processed_predictions["boxes"][interested_labels].tolist()]
22
+ labels = [model.config.id2label[x] for x in processed_predictions["labels"][interested_labels].tolist()]
 
 
 
 
 
 
 
 
 
23
 
24
+ for score, box, label in zip(scores, boxes, labels):
25
+ cv2.rectangle(img, box, BBOX_COLOR, 1)
26
+ cv2.putText(img, f"{label}: {score:0.2f}", (box[0]+2, box[1]+10), cv2.FONT_HERSHEY_SIMPLEX, 0.33, BBOX_COLOR, 1, cv2.LINE_AA)
27
+ return img, len(boxes), datetime.datetime.now()
 
 
 
 
 
 
 
 
28
 
29
  def process(img):
 
30
  inputs = extractor(images=img, return_tensors="pt")
31
  outputs = model(**inputs)
32
+ h, w, _ = img.shape
33
+ img_size = torch.tensor([(h, w)])
34
  processed = extractor.post_process_object_detection(outputs, PRED_THRESHOLD, img_size)
35
 
36
  # Composite image and prediction bounding boxes + labels prediction
37
  return composite_predictions(img, processed[0])
38
 
39
 
 
40
  with gr.Blocks() as demo:
41
  stream = gr.State()
42
  with gr.Row():
 
45
  attendance_label = gr.Label(label="Current Attendance")
46
  with gr.Row():
47
  with gr.Column(scale=1, min_width=600):
48
+ webcam = gr.Webcam(streaming=True)
49
+ output = gr.Image(label="Composite", visible=True)
50
+ webcam.stream(process, [webcam], [output, attendance_label, last_refresh_box])
 
 
51
 
52
  if __name__ == "__main__":
53
  demo.queue().launch()
requirements.txt CHANGED
@@ -33,6 +33,7 @@ mdit-py-plugins==0.3.1
33
  mdurl==0.1.2
34
  multidict==6.0.2
35
  numpy==1.23.5
 
36
  orjson==3.8.2
37
  packaging==21.3
38
  pandas==1.5.1
 
33
  mdurl==0.1.2
34
  multidict==6.0.2
35
  numpy==1.23.5
36
+ opencv-python==4.6.0.66
37
  orjson==3.8.2
38
  packaging==21.3
39
  pandas==1.5.1