oskarastrom commited on
Commit
193f172
1 Parent(s): 8ca2651

Annotation: Frame extraction

Browse files
app.py CHANGED
@@ -5,7 +5,7 @@ from gradio_scripts.state_handler import reset_state
5
  import numpy as np
6
  from gradio_scripts.aws_handler import upload_file
7
  from aris import create_metadata_table
8
- from gradio_scripts.annotation_handler import load_frames
9
  import json
10
  from zipfile import ZipFile
11
  import os
@@ -17,7 +17,8 @@ from gradio_scripts.result_ui import Result_Gradio, update_result, table_headers
17
  state = {
18
  'files': [],
19
  'index': 1,
20
- 'total': 1
 
21
  }
22
  result = {}
23
 
@@ -176,19 +177,24 @@ def cancel_inference():
176
 
177
 
178
  # Request loading of animation editor
179
- def prepare_annotation():
 
 
 
180
  return {
181
  annotation_progress: gr.update(value="<p align='center' style='font-size: large;font-style: italic;'>Loading annotation...</p><!--" + str(np.random.rand()) + "-->", visible=True),
182
  master_tabs: gr.update(selected=2)
183
  }
184
 
185
  # Load frames and annotation information and show
186
- def open_annotation(index):
187
- print(index)
 
 
188
 
189
  annotation_html = ""
190
- if result["aris_input"][index]:
191
- frame_info = load_frames(result["aris_input"][index], result['json_result'][index])
192
 
193
  # Header
194
  annotation_html += "<div id='annotation_header'>"
@@ -203,12 +209,12 @@ def open_annotation(index):
203
  annotation_html += "</div>"
204
 
205
  # Dummy objects
206
- annotation_html += "<p id='annotation_info' style='display:none'>" + json.dumps(frame_info) + "</p>"
207
  annotation_html += "<img id='annotation_img' onload='draw()' style='display:none'></img>"
208
  annotation_html += "<!--" + str(np.random.rand()) + "-->"
209
 
210
- return gr.update(value=annotation_html, visible=True), gr.update(visible=False)
211
-
212
 
213
  components = {}
214
 
@@ -285,7 +291,7 @@ with demo:
285
  annotation_editor = gr.HTML("", visible=False)
286
 
287
  # Event listener for opening annotation
288
- annotation_progress.change(open_annotation, annotation_progress, [annotation_editor, annotation_progress], _js="() => window.annotation_index")
289
 
290
  # Event listener for running javascript defined in 'annotation_editor.js'
291
  with open('gradio_scripts/annotation_editor.js', 'r') as f:
 
5
  import numpy as np
6
  from gradio_scripts.aws_handler import upload_file
7
  from aris import create_metadata_table
8
+ from gradio_scripts.annotation_handler import init_frames
9
  import json
10
  from zipfile import ZipFile
11
  import os
 
17
  state = {
18
  'files': [],
19
  'index': 1,
20
+ 'total': 1,
21
+ 'annotation_index': -1
22
  }
23
  result = {}
24
 
 
177
 
178
 
179
  # Request loading of animation editor
180
+ def prepare_annotation(index):
181
+
182
+ state['annotation_index'] = index
183
+
184
  return {
185
  annotation_progress: gr.update(value="<p align='center' style='font-size: large;font-style: italic;'>Loading annotation...</p><!--" + str(np.random.rand()) + "-->", visible=True),
186
  master_tabs: gr.update(selected=2)
187
  }
188
 
189
  # Load frames and annotation information and show
190
+ def open_annotation(_, progress=gr.Progress()):
191
+ result_index = state['annotation_index']
192
+
193
+ set_progress = lambda pct, msg: progress(pct, desc=msg)
194
 
195
  annotation_html = ""
196
+ if result["aris_input"][result_index]:
197
+ annotations = init_frames(result["aris_input"][result_index], result['json_result'][result_index], gp=set_progress)
198
 
199
  # Header
200
  annotation_html += "<div id='annotation_header'>"
 
209
  annotation_html += "</div>"
210
 
211
  # Dummy objects
212
+ annotation_html += "<p id='annotation_info' style='display:none'>" + json.dumps(annotations) + "</p>"
213
  annotation_html += "<img id='annotation_img' onload='draw()' style='display:none'></img>"
214
  annotation_html += "<!--" + str(np.random.rand()) + "-->"
215
 
216
+ return gr.update(value=annotation_html, visible=True), gr.update(visible=False), gr.update(visible=True)
217
+
218
 
219
  components = {}
220
 
 
291
  annotation_editor = gr.HTML("", visible=False)
292
 
293
  # Event listener for opening annotation
294
+ annotation_progress.change(open_annotation, annotation_progress, [annotation_editor, annotation_progress])
295
 
296
  # Event listener for running javascript defined in 'annotation_editor.js'
297
  with open('gradio_scripts/annotation_editor.js', 'r') as f:
dataloader.py CHANGED
@@ -120,7 +120,27 @@ class YOLOFrameDataset(Dataset):
120
  for i in range(0,n,batch_size):
121
  self.batch_indices.append((i, min(n, i+batch_size)))
122
 
123
- self.batches = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
  for batch_idx in self.batch_indices:
125
 
126
  batch = []
@@ -145,30 +165,7 @@ class YOLOFrameDataset(Dataset):
145
 
146
  image = torch.stack(batch)
147
 
148
- self.batches.append((image, labels, shapes))
149
-
150
- @classmethod
151
- def load_image(cls, img, img_size=896):
152
- """Loads and resizes 1 image from dataset, returns img, original hw, resized hw.
153
- Modified from ScaledYOLOv4.datasets.load_image()
154
- """
155
-
156
- h0, w0 = img.shape[:2]
157
- h1, w1 = h0, w0
158
- r = img_size / max(h0, w0)
159
- if r != 1: # always resize down, only resize up if training with augmentation
160
- interp = cv2.INTER_AREA if r < 1 else cv2.INTER_LINEAR
161
- img = cv2.resize(img, (int(w0 * r), int(h0 * r)), interpolation=interp)
162
- h1, w1 = img.shape[:2]
163
-
164
- return img, (h0, w0), (h1, w1) # img, hw_original, hw_resized
165
-
166
- def __len__(self):
167
- return len(self.batches)
168
-
169
- def __iter__(self):
170
- for batch in self.batches:
171
- yield batch
172
 
173
  class ARISBatchedDataset(Dataset):
174
  def __init__(self, aris_filepath, beam_width_dir, annotations_file, batch_size, num_frames_bg_subtract=1000, disable_output=False,
 
120
  for i in range(0,n,batch_size):
121
  self.batch_indices.append((i, min(n, i+batch_size)))
122
 
123
+
124
+ @classmethod
125
+ def load_image(cls, img, img_size=896):
126
+ """Loads and resizes 1 image from dataset, returns img, original hw, resized hw.
127
+ Modified from ScaledYOLOv4.datasets.load_image()
128
+ """
129
+
130
+ h0, w0 = img.shape[:2]
131
+ h1, w1 = h0, w0
132
+ r = img_size / max(h0, w0)
133
+ if r != 1: # always resize down, only resize up if training with augmentation
134
+ interp = cv2.INTER_AREA if r < 1 else cv2.INTER_LINEAR
135
+ img = cv2.resize(img, (int(w0 * r), int(h0 * r)), interpolation=interp)
136
+ h1, w1 = img.shape[:2]
137
+
138
+ return img, (h0, w0), (h1, w1) # img, hw_original, hw_resized
139
+
140
+ def __len__(self):
141
+ return len(self.batches)
142
+
143
+ def __iter__(self):
144
  for batch_idx in self.batch_indices:
145
 
146
  batch = []
 
165
 
166
  image = torch.stack(batch)
167
 
168
+ yield (image, labels, shapes)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
169
 
170
  class ARISBatchedDataset(Dataset):
171
  def __init__(self, aris_filepath, beam_width_dir, annotations_file, batch_size, num_frames_bg_subtract=1000, disable_output=False,
gradio_scripts/annotation_editor.js CHANGED
@@ -20,6 +20,8 @@
20
  window.frames = JSON.parse(document.getElementById("annotation_info").innerHTML);
21
  window.frame_index = 0;
22
 
 
 
23
  document.addEventListener('keydown', keydown);
24
  document.addEventListener('keyup', keyup);
25
 
@@ -50,7 +52,7 @@
50
 
51
  // Load frame image
52
  const frame_img = frame['frame'];
53
- document.getElementById("annotation_img").src = "data:image/png;base64," + frame_img;
54
  // Draw function is called by this element using the onloaded callback
55
 
56
  document.getElementById("annotation_frame_nbr").innerHTML = "Frame " + window.frame_index + "/" + window.frames.length;
 
20
  window.frames = JSON.parse(document.getElementById("annotation_info").innerHTML);
21
  window.frame_index = 0;
22
 
23
+ document.removeEventListener('keydown', keydown);
24
+ document.removeEventListener('keyup', keyup);
25
  document.addEventListener('keydown', keydown);
26
  document.addEventListener('keyup', keyup);
27
 
 
52
 
53
  // Load frame image
54
  const frame_img = frame['frame'];
55
+ document.getElementById("annotation_img").src = "annotation_frame_dir/" + frame_index + ".jpg";
56
  // Draw function is called by this element using the onloaded callback
57
 
58
  document.getElementById("annotation_frame_nbr").innerHTML = "Frame " + window.frame_index + "/" + window.frames.length;
gradio_scripts/annotation_handler.py CHANGED
@@ -6,7 +6,8 @@ import base64
6
 
7
  VIDEO_HEIGHT = 700
8
 
9
- def load_frames(video, preds):
 
10
  """Load frames for annotation editing
11
 
12
 
@@ -20,35 +21,38 @@ def load_frames(video, preds):
20
  )
21
  })
22
  """
23
- if type(video) == str:
24
 
25
- dataloader, dataset = create_dataloader_aris(video, BEAM_WIDTH_DIR, None)
26
- frames = dataset.didson.load_frames(start_frame=0)
27
- else:
28
- frames = video
 
 
 
 
 
 
 
 
29
 
30
- frame_info = []
31
- if len(frames):
32
- # assumes all frames the same size
33
- h, w = frames[0].shape
34
-
35
- # enforce a standard size so that text/box thickness is consistent
36
- scale_factor = VIDEO_HEIGHT / h
37
- h = VIDEO_HEIGHT
38
- w = int(scale_factor*w)
39
 
40
- num_frames = min(len(frames), len(preds['frames']))
 
41
 
42
- for i, frame_raw in enumerate(frames[:num_frames]):
43
- image = cv2.resize(cv2.cvtColor(frame_raw, cv2.COLOR_GRAY2BGR), (w,h))
44
- retval, buffer = cv2.imencode('.jpg', image)
45
- jpg_as_text = base64.b64encode(buffer).decode("utf-8")
 
 
 
46
 
 
47
  frame = {
48
  'annotations': [],
49
- 'frame': jpg_as_text
50
  }
51
- for fish in preds['frames'][i]['fish']:
52
  xmin, ymin, xmax, ymax = fish['bbox']
53
  frame['annotations'].append({
54
  'bbox': {
@@ -60,6 +64,7 @@ def load_frames(video, preds):
60
  'id': str(fish['fish_id']),
61
  'conf': fish['conf']
62
  })
63
- frame_info.append(frame)
64
-
65
- return frame_info
 
 
6
 
7
  VIDEO_HEIGHT = 700
8
 
9
+
10
+ def init_frames(video, preds, gp=None):
11
  """Load frames for annotation editing
12
 
13
 
 
21
  )
22
  })
23
  """
 
24
 
25
+ if gp: gp(0, "Loading Frames")
26
+
27
+ dataloader, dataset = create_dataloader_aris(video, BEAM_WIDTH_DIR, None)
28
+ images = dataset.didson.load_frames(start_frame=0, end_frame=1)
29
+
30
+ # assumes all frames the same size
31
+ h, w = images[0].shape
32
+
33
+ # enforce a standard size so that text/box thickness is consistent
34
+ scale_factor = VIDEO_HEIGHT / h
35
+ h = VIDEO_HEIGHT
36
+ w = int(scale_factor*w)
37
 
38
+ annotations = []
 
 
 
 
 
 
 
 
39
 
40
+ if gp: gp(0, "Extracting Frames")
41
+ if len(preds['frames']):
42
 
43
+ for i, frame_info in enumerate(preds['frames']):
44
+ if gp: gp(i/len(preds['frames']), "Extracting Frames")
45
+
46
+ # Extract frames
47
+ img_raw = dataset.didson.load_frames(start_frame=i, end_frame=i+1)[0]
48
+ image = cv2.resize(cv2.cvtColor(img_raw, cv2.COLOR_GRAY2BGR), (w, h))
49
+ cv2.imwrite("annotation_frame_dir/" + str(i) + ".jpg", image)
50
 
51
+ # Extract annotations
52
  frame = {
53
  'annotations': [],
 
54
  }
55
+ for fish in frame_info['fish']:
56
  xmin, ymin, xmax, ymax = fish['bbox']
57
  frame['annotations'].append({
58
  'bbox': {
 
64
  'id': str(fish['fish_id']),
65
  'conf': fish['conf']
66
  })
67
+ annotations.append(frame)
68
+
69
+ return annotations
70
+
gradio_scripts/result_ui.py CHANGED
@@ -100,7 +100,7 @@ def Result_Gradio(prepare_annotation, components):
100
 
101
  # Button for opening result in annotation editor
102
  annotation_btn = gr.Button("Edit Annotation", visible=False)
103
- annotation_btn.click(prepare_annotation, None, [components['annotation_progress'], components['master_tabs']], _js="() => window.annotation_index=" + str(i))
104
 
105
  # Add components to tab dict for easy access later on
106
  tabs.append({
@@ -114,4 +114,6 @@ def Result_Gradio(prepare_annotation, components):
114
  # Add all components to list of visualization outputs
115
  visual_components.extend([tab, metadata_out, video_out, table_out, annotation_btn])
116
 
 
 
117
  return visual_components
 
100
 
101
  # Button for opening result in annotation editor
102
  annotation_btn = gr.Button("Edit Annotation", visible=False)
103
+ annotation_btn.click(prepare_annotation, annotation_btn, [components['annotation_progress'], components['master_tabs']], _js="() => " + str(i))
104
 
105
  # Add components to tab dict for easy access later on
106
  tabs.append({
 
114
  # Add all components to list of visualization outputs
115
  visual_components.extend([tab, metadata_out, video_out, table_out, annotation_btn])
116
 
117
+ components['result_tabs'] = tab_parent
118
+
119
  return visual_components
inference.py CHANGED
@@ -174,7 +174,6 @@ def do_suppression(inference, gp=None, batch_size=BATCH_SIZE, conf_thres=CONF_TH
174
  output = non_max_suppression(inf_out, conf_thres=conf_thres, iou_thres=iou_thres)
175
 
176
 
177
- print(type(output))
178
  outputs.append(output)
179
 
180
  pbar.update(1*batch_size)
 
174
  output = non_max_suppression(inf_out, conf_thres=conf_thres, iou_thres=iou_thres)
175
 
176
 
 
177
  outputs.append(output)
178
 
179
  pbar.update(1*batch_size)
scripts/detect_frames.py CHANGED
@@ -47,19 +47,23 @@ def main(args, config={}, verbose=True):
47
 
48
  in_loc_dir = os.path.join(dirname, loc)
49
  out_dir = os.path.join(args.output, loc, "tracker", "data")
50
- os.makedirs(out_dir, exist_ok=True)
51
  print(in_loc_dir)
52
  print(out_dir)
53
 
54
  # run detection + tracking
55
- model, device = setup_model(args.weights)
56
 
57
  seq_list = os.listdir(in_loc_dir)
58
- idx = 1
59
 
60
- ann_list = []
61
- with tqdm(total=len(seq_list), desc="...", ncols=0) as pbar:
 
 
 
 
 
 
62
  for seq in seq_list:
 
63
  pbar.update(1)
64
  if (seq.startswith(".")): continue
65
  pbar.set_description("Processing " + seq)
@@ -68,26 +72,43 @@ def main(args, config={}, verbose=True):
68
  print("(" + str(idx) + "/" + str(len(seq_list)) + ") " + seq)
69
  print(" ")
70
  idx += 1
71
- in_seq_dir = os.path.join(in_loc_dir, seq)
72
  frame_list = detect(in_seq_dir, out_dir, config, seq, model, device, verbose)
73
- i = 0
74
  for frame in frame_list:
75
- print(frame)
76
  if frame is not None:
77
  for ann in frame:
78
- print(ann)
79
  ann_list.append({
80
- 'image_id': i,
81
  'category_id': 0,
82
  'bbox': [ann[0], ann[1], ann[2] - ann[0], ann[3] - ann[1]],
83
  'score': ann[4]
84
  })
85
- i += 1
86
- result = json.dumps(ann_list)
87
- with open(os.path.join(args.output, 'pred.json'), 'w') as f:
88
- f.write(result)
89
-
90
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
 
92
  def detect(in_dir, out_dir, config, seq_name, model, device, verbose):
93
 
@@ -107,6 +128,7 @@ def detect(in_dir, out_dir, config, seq_name, model, device, verbose):
107
 
108
  outputs = do_suppression(inference, conf_thres=config['conf_threshold'], iou_thres=config['nms_iou'], verbose=verbose)
109
 
 
110
  frame_list = []
111
  for batch_i, batch in enumerate(outputs):
112
 
@@ -115,14 +137,20 @@ def detect(in_dir, out_dir, config, seq_name, model, device, verbose):
115
  # Format results
116
  for si, pred in enumerate(batch):
117
  (image_shape, original_shape) = batch_shapes[si]
 
118
  # Clip boxes to image bounds and resize to input shape
119
  clip_boxes(pred, (height, width))
120
  boxes = pred[:, :4].clone() # xyxy
121
  confs = pred[:, 4].clone().tolist()
122
  scale_boxes(image_shape, boxes, original_shape[0], original_shape[1]) # to original shape
123
- ann = [ [*bb, conf] for bb, conf in zip(boxes.tolist(), confs) ]
124
 
125
- frame_list.append(ann)
 
 
 
 
 
 
126
 
127
  return frame_list
128
 
 
47
 
48
  in_loc_dir = os.path.join(dirname, loc)
49
  out_dir = os.path.join(args.output, loc, "tracker", "data")
 
50
  print(in_loc_dir)
51
  print(out_dir)
52
 
53
  # run detection + tracking
 
54
 
55
  seq_list = os.listdir(in_loc_dir)
 
56
 
57
+ iterate_sequences(in_loc_dir, out_dir, config, args.weights, seq_list, verbose)
58
+
59
+
60
+ def iterate_sequences(in_dir, out_dir, config, weights, seq_list, verbose):
61
+ model, device = setup_model(weights)
62
+
63
+ idx = 1
64
+ with tqdm(total=len(seq_list), desc="...", ncols=0) as pbar:
65
  for seq in seq_list:
66
+ ann_list = []
67
  pbar.update(1)
68
  if (seq.startswith(".")): continue
69
  pbar.set_description("Processing " + seq)
 
72
  print("(" + str(idx) + "/" + str(len(seq_list)) + ") " + seq)
73
  print(" ")
74
  idx += 1
75
+ in_seq_dir = os.path.join(in_dir, seq)
76
  frame_list = detect(in_seq_dir, out_dir, config, seq, model, device, verbose)
 
77
  for frame in frame_list:
 
78
  if frame is not None:
79
  for ann in frame:
 
80
  ann_list.append({
81
+ 'image_id': ann[5],
82
  'category_id': 0,
83
  'bbox': [ann[0], ann[1], ann[2] - ann[0], ann[3] - ann[1]],
84
  'score': ann[4]
85
  })
86
+ result = json.dumps(ann_list)
87
+
88
+ out_seq_dir = os.path.join(out_dir, seq)
89
+ os.makedirs(out_seq_dir, exist_ok=True)
90
+ with open(os.path.join(out_seq_dir, 'pred.json'), 'w') as f:
91
+ f.write(result)
92
+
93
+ def iterate_files(in_dir, out_dir, config, weights, verbose):
94
+ model, device = setup_model(weights)
95
+
96
+ ann_list = []
97
+ frame_list = detect(in_dir, out_dir, config, "specified folder", model, device, verbose)
98
+ with tqdm(total=len(frame_list), desc="...", ncols=0) as pbar:
99
+ for frame in frame_list:
100
+ if frame is not None:
101
+ for ann in frame:
102
+ ann_list.append({
103
+ 'image_id': ann[5],
104
+ 'category_id': 0,
105
+ 'bbox': [ann[0], ann[1], ann[2] - ann[0], ann[3] - ann[1]],
106
+ 'score': ann[4]
107
+ })
108
+ pbar.update(1)
109
+ result = json.dumps(ann_list)
110
+ with open(os.path.join(out_dir, 'pred.json'), 'w') as f:
111
+ f.write(result)
112
 
113
  def detect(in_dir, out_dir, config, seq_name, model, device, verbose):
114
 
 
128
 
129
  outputs = do_suppression(inference, conf_thres=config['conf_threshold'], iou_thres=config['nms_iou'], verbose=verbose)
130
 
131
+ file_names = dataloader.files
132
  frame_list = []
133
  for batch_i, batch in enumerate(outputs):
134
 
 
137
  # Format results
138
  for si, pred in enumerate(batch):
139
  (image_shape, original_shape) = batch_shapes[si]
140
+
141
  # Clip boxes to image bounds and resize to input shape
142
  clip_boxes(pred, (height, width))
143
  boxes = pred[:, :4].clone() # xyxy
144
  confs = pred[:, 4].clone().tolist()
145
  scale_boxes(image_shape, boxes, original_shape[0], original_shape[1]) # to original shape
 
146
 
147
+ frame = [ [*bb, conf, file_name] for bb, conf, file_name in zip(boxes.tolist(), confs, file_names[batch_i*32+si]) ]
148
+
149
+ file_name = file_names[batch_i*32 + si]
150
+ for ann in frame:
151
+ ann.append(file_name)
152
+
153
+ frame_list.append(frame)
154
 
155
  return frame_list
156