Spaces:

caltech-fish-counting
/

fisheye-experimental

Runtime error

App Files Files Community

oskarastrom commited on Aug 18, 2023

Commit

193f172

•

1 Parent(s): 8ca2651

Annotation: Frame extraction

Browse files

Files changed (7) hide show

app.py +17 -11
dataloader.py +22 -25
gradio_scripts/annotation_editor.js +3 -1
gradio_scripts/annotation_handler.py +30 -25
gradio_scripts/result_ui.py +3 -1
inference.py +0 -1
scripts/detect_frames.py +46 -18

app.py CHANGED Viewed

@@ -5,7 +5,7 @@ from gradio_scripts.state_handler import reset_state
 import numpy as np
 from gradio_scripts.aws_handler import upload_file
 from aris import create_metadata_table
-from gradio_scripts.annotation_handler import load_frames
 import json
 from zipfile import ZipFile
 import os
@@ -17,7 +17,8 @@ from gradio_scripts.result_ui import Result_Gradio, update_result, table_headers
 state = {
     'files': [],
     'index': 1,
-    'total': 1
 }
 result = {}
@@ -176,19 +177,24 @@ def cancel_inference():
 # Request loading of animation editor
-def prepare_annotation():
     return {
         annotation_progress: gr.update(value="<p align='center' style='font-size: large;font-style: italic;'>Loading annotation...</p><!--" + str(np.random.rand()) + "-->", visible=True),
         master_tabs: gr.update(selected=2)
     }
 # Load frames and annotation information and show
-def open_annotation(index):
-    print(index)
     annotation_html = ""
-    if result["aris_input"][index]:
-        frame_info = load_frames(result["aris_input"][index], result['json_result'][index])
         # Header
         annotation_html += "<div id='annotation_header'>"
@@ -203,12 +209,12 @@ def open_annotation(index):
         annotation_html += "</div>"
         # Dummy objects
-        annotation_html += "<p id='annotation_info' style='display:none'>" + json.dumps(frame_info) + "</p>"
         annotation_html += "<img id='annotation_img' onload='draw()' style='display:none'></img>"
         annotation_html += "<!--" + str(np.random.rand()) + "-->"
-    return gr.update(value=annotation_html, visible=True), gr.update(visible=False)
 components = {}
@@ -285,7 +291,7 @@ with demo:
                 annotation_editor = gr.HTML("", visible=False)
                 # Event listener for opening annotation
-                annotation_progress.change(open_annotation, annotation_progress, [annotation_editor, annotation_progress], _js="() => window.annotation_index")
                 # Event listener for running javascript defined in 'annotation_editor.js'
                 with open('gradio_scripts/annotation_editor.js', 'r') as f:

 import numpy as np
 from gradio_scripts.aws_handler import upload_file
 from aris import create_metadata_table
+from gradio_scripts.annotation_handler import init_frames
 import json
 from zipfile import ZipFile
 import os
 state = {
     'files': [],
     'index': 1,
+    'total': 1,
+    'annotation_index': -1
 }
 result = {}
 # Request loading of animation editor
+def prepare_annotation(index):
+    state['annotation_index'] = index
     return {
         annotation_progress: gr.update(value="<p align='center' style='font-size: large;font-style: italic;'>Loading annotation...</p><!--" + str(np.random.rand()) + "-->", visible=True),
         master_tabs: gr.update(selected=2)
     }
 # Load frames and annotation information and show
+def open_annotation(_, progress=gr.Progress()):
+    result_index = state['annotation_index']
+    set_progress = lambda pct, msg: progress(pct, desc=msg)
     annotation_html = ""
+    if result["aris_input"][result_index]:
+        annotations = init_frames(result["aris_input"][result_index], result['json_result'][result_index], gp=set_progress)
         # Header
         annotation_html += "<div id='annotation_header'>"
         annotation_html += "</div>"
         # Dummy objects
+        annotation_html += "<p id='annotation_info' style='display:none'>" + json.dumps(annotations) + "</p>"
         annotation_html += "<img id='annotation_img' onload='draw()' style='display:none'></img>"
         annotation_html += "<!--" + str(np.random.rand()) + "-->"
+    return gr.update(value=annotation_html, visible=True), gr.update(visible=False), gr.update(visible=True)
 components = {}
                 annotation_editor = gr.HTML("", visible=False)
                 # Event listener for opening annotation
+                annotation_progress.change(open_annotation, annotation_progress, [annotation_editor, annotation_progress])
                 # Event listener for running javascript defined in 'annotation_editor.js'
                 with open('gradio_scripts/annotation_editor.js', 'r') as f:

dataloader.py CHANGED Viewed

@@ -120,7 +120,27 @@ class YOLOFrameDataset(Dataset):
         for i in range(0,n,batch_size):
             self.batch_indices.append((i, min(n, i+batch_size)))
-        self.batches = []
         for batch_idx in self.batch_indices:
             batch = []
@@ -145,30 +165,7 @@ class YOLOFrameDataset(Dataset):
             image = torch.stack(batch)
-            self.batches.append((image, labels, shapes))
-    @classmethod
-    def load_image(cls, img, img_size=896):
-        """Loads and resizes 1 image from dataset, returns img, original hw, resized hw.
-        Modified from ScaledYOLOv4.datasets.load_image()
-        """
-        h0, w0 = img.shape[:2]
-        h1, w1 = h0, w0
-        r = img_size / max(h0, w0)
-        if r != 1:  # always resize down, only resize up if training with augmentation
-            interp = cv2.INTER_AREA if r < 1 else cv2.INTER_LINEAR
-            img = cv2.resize(img, (int(w0 * r), int(h0 * r)), interpolation=interp)
-            h1, w1 = img.shape[:2]
-        return img, (h0, w0), (h1, w1)  # img, hw_original, hw_resized
-    def __len__(self):
-        return len(self.batches)
-    def __iter__(self):
-        for batch in self.batches:
-            yield batch
 class ARISBatchedDataset(Dataset):
     def __init__(self, aris_filepath, beam_width_dir, annotations_file, batch_size, num_frames_bg_subtract=1000, disable_output=False,

         for i in range(0,n,batch_size):
             self.batch_indices.append((i, min(n, i+batch_size)))
+    @classmethod
+    def load_image(cls, img, img_size=896):
+        """Loads and resizes 1 image from dataset, returns img, original hw, resized hw.
+        Modified from ScaledYOLOv4.datasets.load_image()
+        """
+        h0, w0 = img.shape[:2]
+        h1, w1 = h0, w0
+        r = img_size / max(h0, w0)
+        if r != 1:  # always resize down, only resize up if training with augmentation
+            interp = cv2.INTER_AREA if r < 1 else cv2.INTER_LINEAR
+            img = cv2.resize(img, (int(w0 * r), int(h0 * r)), interpolation=interp)
+            h1, w1 = img.shape[:2]
+        return img, (h0, w0), (h1, w1)  # img, hw_original, hw_resized
+    def __len__(self):
+        return len(self.batches)
+    def __iter__(self):
         for batch_idx in self.batch_indices:
             batch = []
             image = torch.stack(batch)
+            yield (image, labels, shapes)
 class ARISBatchedDataset(Dataset):
     def __init__(self, aris_filepath, beam_width_dir, annotations_file, batch_size, num_frames_bg_subtract=1000, disable_output=False,

gradio_scripts/annotation_editor.js CHANGED Viewed

@@ -20,6 +20,8 @@
         window.frames = JSON.parse(document.getElementById("annotation_info").innerHTML);
         window.frame_index = 0;
         document.addEventListener('keydown', keydown);
         document.addEventListener('keyup', keyup);
@@ -50,7 +52,7 @@
         // Load frame image
         const frame_img = frame['frame'];
-        document.getElementById("annotation_img").src = "data:image/png;base64," + frame_img;
         // Draw function is called by this element using the onloaded callback
         document.getElementById("annotation_frame_nbr").innerHTML = "Frame " + window.frame_index + "/" + window.frames.length;

         window.frames = JSON.parse(document.getElementById("annotation_info").innerHTML);
         window.frame_index = 0;
+        document.removeEventListener('keydown', keydown);
+        document.removeEventListener('keyup', keyup);
         document.addEventListener('keydown', keydown);
         document.addEventListener('keyup', keyup);
         // Load frame image
         const frame_img = frame['frame'];
+        document.getElementById("annotation_img").src = "annotation_frame_dir/" + frame_index + ".jpg";
         // Draw function is called by this element using the onloaded callback
         document.getElementById("annotation_frame_nbr").innerHTML = "Frame " + window.frame_index + "/" + window.frames.length;

gradio_scripts/annotation_handler.py CHANGED Viewed

@@ -6,7 +6,8 @@ import base64
 VIDEO_HEIGHT = 700
-def load_frames(video, preds):
     """Load frames for annotation editing
@@ -20,35 +21,38 @@ def load_frames(video, preds):
                 )
             })
     """
-    if type(video) == str:
-        dataloader, dataset = create_dataloader_aris(video, BEAM_WIDTH_DIR, None)
-        frames = dataset.didson.load_frames(start_frame=0)
-    else:
-        frames = video
-    frame_info = []
-    if len(frames):
-        # assumes all frames the same size
-        h, w = frames[0].shape
-        # enforce a standard size so that text/box thickness is consistent
-        scale_factor = VIDEO_HEIGHT / h
-        h = VIDEO_HEIGHT
-        w = int(scale_factor*w)
-        num_frames = min(len(frames), len(preds['frames']))
-        for i, frame_raw in enumerate(frames[:num_frames]):
-            image = cv2.resize(cv2.cvtColor(frame_raw, cv2.COLOR_GRAY2BGR), (w,h))
-            retval, buffer = cv2.imencode('.jpg', image)
-            jpg_as_text = base64.b64encode(buffer).decode("utf-8")
             frame = {
                 'annotations': [],
-                'frame': jpg_as_text
             }
-            for fish in preds['frames'][i]['fish']:
                 xmin, ymin, xmax, ymax = fish['bbox']
                 frame['annotations'].append({
                     'bbox': {
@@ -60,6 +64,7 @@ def load_frames(video, preds):
                     'id': str(fish['fish_id']),
                     'conf': fish['conf']
                 })
-            frame_info.append(frame)
-    return frame_info

 VIDEO_HEIGHT = 700
+def init_frames(video, preds, gp=None):
     """Load frames for annotation editing
                 )
             })
     """
+    if gp: gp(0, "Loading Frames")
+    dataloader, dataset = create_dataloader_aris(video, BEAM_WIDTH_DIR, None)
+    images = dataset.didson.load_frames(start_frame=0, end_frame=1)
+    # assumes all frames the same size
+    h, w = images[0].shape
+    # enforce a standard size so that text/box thickness is consistent
+    scale_factor = VIDEO_HEIGHT / h
+    h = VIDEO_HEIGHT
+    w = int(scale_factor*w)
+    annotations = []
+    if gp: gp(0, "Extracting Frames")
+    if len(preds['frames']):
+        for i, frame_info in enumerate(preds['frames']):
+            if gp: gp(i/len(preds['frames']), "Extracting Frames")
+            # Extract frames
+            img_raw = dataset.didson.load_frames(start_frame=i, end_frame=i+1)[0]
+            image = cv2.resize(cv2.cvtColor(img_raw, cv2.COLOR_GRAY2BGR), (w, h))
+            cv2.imwrite("annotation_frame_dir/" + str(i) + ".jpg", image)
+            # Extract annotations
             frame = {
                 'annotations': [],
             }
+            for fish in frame_info['fish']:
                 xmin, ymin, xmax, ymax = fish['bbox']
                 frame['annotations'].append({
                     'bbox': {
                     'id': str(fish['fish_id']),
                     'conf': fish['conf']
                 })
+            annotations.append(frame)
+    return annotations

gradio_scripts/result_ui.py CHANGED Viewed

@@ -100,7 +100,7 @@ def Result_Gradio(prepare_annotation, components):
                 # Button for opening result in annotation editor
                 annotation_btn = gr.Button("Edit Annotation", visible=False)
-                annotation_btn.click(prepare_annotation, None, [components['annotation_progress'], components['master_tabs']], _js="() => window.annotation_index=" + str(i))
                 # Add components to tab dict for easy access later on
                 tabs.append({
@@ -114,4 +114,6 @@ def Result_Gradio(prepare_annotation, components):
                 # Add all components to list of visualization outputs
                 visual_components.extend([tab, metadata_out, video_out, table_out, annotation_btn])
     return visual_components

                 # Button for opening result in annotation editor
                 annotation_btn = gr.Button("Edit Annotation", visible=False)
+                annotation_btn.click(prepare_annotation, annotation_btn, [components['annotation_progress'], components['master_tabs']], _js="() => " + str(i))
                 # Add components to tab dict for easy access later on
                 tabs.append({
                 # Add all components to list of visualization outputs
                 visual_components.extend([tab, metadata_out, video_out, table_out, annotation_btn])
+    components['result_tabs'] = tab_parent
     return visual_components

inference.py CHANGED Viewed

@@ -174,7 +174,6 @@ def do_suppression(inference, gp=None, batch_size=BATCH_SIZE, conf_thres=CONF_TH
                 output = non_max_suppression(inf_out, conf_thres=conf_thres, iou_thres=iou_thres)
-            print(type(output))
             outputs.append(output)
             pbar.update(1*batch_size)

                 output = non_max_suppression(inf_out, conf_thres=conf_thres, iou_thres=iou_thres)
             outputs.append(output)
             pbar.update(1*batch_size)

scripts/detect_frames.py CHANGED Viewed

@@ -47,19 +47,23 @@ def main(args, config={}, verbose=True):
         in_loc_dir = os.path.join(dirname, loc)
         out_dir = os.path.join(args.output, loc, "tracker", "data")
-        os.makedirs(out_dir, exist_ok=True)
         print(in_loc_dir)
         print(out_dir)
         # run detection + tracking
-        model, device = setup_model(args.weights)
         seq_list = os.listdir(in_loc_dir)
-        idx = 1
-        ann_list = []
-        with tqdm(total=len(seq_list), desc="...", ncols=0) as pbar:
             for seq in seq_list:
                 pbar.update(1)
                 if (seq.startswith(".")): continue
                 pbar.set_description("Processing " + seq)
@@ -68,26 +72,43 @@ def main(args, config={}, verbose=True):
                     print("(" + str(idx) + "/" + str(len(seq_list)) + ") " + seq)
                     print(" ")
                 idx += 1
-                in_seq_dir = os.path.join(in_loc_dir, seq)
                 frame_list = detect(in_seq_dir, out_dir, config, seq, model, device, verbose)
-                i = 0
                 for frame in frame_list:
-                    print(frame)
                     if frame is not None:
                         for ann in frame:
-                            print(ann)
                             ann_list.append({
-                                'image_id': i,
                                 'category_id': 0,
                                 'bbox': [ann[0], ann[1], ann[2] - ann[0], ann[3] - ann[1]],
                                 'score': ann[4]
                             })
-                    i += 1
-    result = json.dumps(ann_list)
-    with open(os.path.join(args.output, 'pred.json'), 'w') as f:
-        f.write(result)
 def detect(in_dir, out_dir, config, seq_name, model, device, verbose):
@@ -107,6 +128,7 @@ def detect(in_dir, out_dir, config, seq_name, model, device, verbose):
     outputs = do_suppression(inference, conf_thres=config['conf_threshold'], iou_thres=config['nms_iou'], verbose=verbose)
     frame_list = []
     for batch_i, batch in enumerate(outputs):
@@ -115,14 +137,20 @@ def detect(in_dir, out_dir, config, seq_name, model, device, verbose):
         # Format results
         for si, pred in enumerate(batch):
             (image_shape, original_shape) = batch_shapes[si]
             # Clip boxes to image bounds and resize to input shape
             clip_boxes(pred, (height, width))
             boxes = pred[:, :4].clone()  # xyxy
             confs = pred[:, 4].clone().tolist()
             scale_boxes(image_shape, boxes, original_shape[0], original_shape[1])  # to original shape
-            ann = [ [*bb, conf] for bb, conf in zip(boxes.tolist(), confs) ]
-            frame_list.append(ann)
     return frame_list

         in_loc_dir = os.path.join(dirname, loc)
         out_dir = os.path.join(args.output, loc, "tracker", "data")
         print(in_loc_dir)
         print(out_dir)
         # run detection + tracking
         seq_list = os.listdir(in_loc_dir)
+        iterate_sequences(in_loc_dir, out_dir, config, args.weights, seq_list, verbose)
+def iterate_sequences(in_dir, out_dir, config, weights, seq_list, verbose):
+    model, device = setup_model(weights)
+    idx = 1
+    with tqdm(total=len(seq_list), desc="...", ncols=0) as pbar:
             for seq in seq_list:
+                ann_list = []
                 pbar.update(1)
                 if (seq.startswith(".")): continue
                 pbar.set_description("Processing " + seq)
                     print("(" + str(idx) + "/" + str(len(seq_list)) + ") " + seq)
                     print(" ")
                 idx += 1
+                in_seq_dir = os.path.join(in_dir, seq)
                 frame_list = detect(in_seq_dir, out_dir, config, seq, model, device, verbose)
                 for frame in frame_list:
                     if frame is not None:
                         for ann in frame:
                             ann_list.append({
+                                'image_id': ann[5],
                                 'category_id': 0,
                                 'bbox': [ann[0], ann[1], ann[2] - ann[0], ann[3] - ann[1]],
                                 'score': ann[4]
                             })
+                result = json.dumps(ann_list)
+                out_seq_dir = os.path.join(out_dir, seq)
+                os.makedirs(out_seq_dir, exist_ok=True)
+                with open(os.path.join(out_seq_dir, 'pred.json'), 'w') as f:
+                    f.write(result)
+def iterate_files(in_dir, out_dir, config, weights, verbose):
+    model, device = setup_model(weights)
+    ann_list = []
+    frame_list = detect(in_dir, out_dir, config, "specified folder", model, device, verbose)
+    with tqdm(total=len(frame_list), desc="...", ncols=0) as pbar:
+        for frame in frame_list:
+            if frame is not None:
+                for ann in frame:
+                    ann_list.append({
+                        'image_id': ann[5],
+                        'category_id': 0,
+                        'bbox': [ann[0], ann[1], ann[2] - ann[0], ann[3] - ann[1]],
+                        'score': ann[4]
+                    })
+            pbar.update(1)
+        result = json.dumps(ann_list)
+        with open(os.path.join(out_dir, 'pred.json'), 'w') as f:
+            f.write(result)
 def detect(in_dir, out_dir, config, seq_name, model, device, verbose):
     outputs = do_suppression(inference, conf_thres=config['conf_threshold'], iou_thres=config['nms_iou'], verbose=verbose)
+    file_names = dataloader.files
     frame_list = []
     for batch_i, batch in enumerate(outputs):
         # Format results
         for si, pred in enumerate(batch):
             (image_shape, original_shape) = batch_shapes[si]
             # Clip boxes to image bounds and resize to input shape
             clip_boxes(pred, (height, width))
             boxes = pred[:, :4].clone()  # xyxy
             confs = pred[:, 4].clone().tolist()
             scale_boxes(image_shape, boxes, original_shape[0], original_shape[1])  # to original shape
+            frame = [ [*bb, conf, file_name] for bb, conf, file_name in zip(boxes.tolist(), confs, file_names[batch_i*32+si]) ]
+            file_name = file_names[batch_i*32 + si]
+            for ann in frame:
+                ann.append(file_name)
+            frame_list.append(frame)
     return frame_list