Spaces:

caltech-fish-counting
/

fisheye-experimental

Runtime error

App Files Files Community

oskarastrom commited on Sep 12, 2023

Commit

058f18b

•

1 Parent(s): 2a572c2

Output formats

Browse files

Files changed (4) hide show

app.py +7 -2
gradio_scripts/upload_ui.py +4 -1
inference.py +58 -147
main.py +13 -12

app.py CHANGED Viewed

@@ -24,19 +24,23 @@ state = {
     'total': 1,
     'annotation_index': -1,
     'frame_index': 0,
-    'config': None
 }
 result = {}
 # Called when an Aris file is uploaded for inference
-def on_aris_input(file_list, model_id, conf_thresh, iou_thresh, min_hits, max_age, associative_tracker, boost_power, boost_decay, byte_low_conf, byte_high_conf, min_length, min_travel):
     # Reset Result
     reset_state(result, state)
     state['files'] = file_list
     state['total'] = len(file_list)
     state['version'] = WEBAPP_VERSION
     state['config'] = InferenceConfig(
         weights = models[model_id] if model_id in models else models['master'],
         conf_thresh = conf_thresh,
@@ -170,6 +174,7 @@ def infer_next(_, progress=gr.Progress()):
     json_result, json_filepath, zip_filepath, video_filepath, marking_filepath = predict_task(
         file_path,
         config = state['config'],
         gradio_progress = set_progress
     )

     'total': 1,
     'annotation_index': -1,
     'frame_index': 0,
+    'outputs': [],
+    'config': None,
 }
 result = {}
 # Called when an Aris file is uploaded for inference
+def on_aris_input(file_list, model_id, conf_thresh, iou_thresh, min_hits, max_age, associative_tracker, boost_power, boost_decay, byte_low_conf, byte_high_conf, min_length, min_travel, output_formats):
+    print(output_formats)
     # Reset Result
     reset_state(result, state)
     state['files'] = file_list
     state['total'] = len(file_list)
     state['version'] = WEBAPP_VERSION
+    state['outputs'] = output_formats
     state['config'] = InferenceConfig(
         weights = models[model_id] if model_id in models else models['master'],
         conf_thresh = conf_thresh,
     json_result, json_filepath, zip_filepath, video_filepath, marking_filepath = predict_task(
         file_path,
         config = state['config'],
+        output_formats = state['outputs'],
         gradio_progress = set_progress
     )

gradio_scripts/upload_ui.py CHANGED Viewed

@@ -17,8 +17,8 @@ def Upload_Gradio(gradio_components):
             gr.HTML("<p align='center' style='font-size: large;font-style: italic;'>Submit an .aris file to analyze result.</p>")
             with gr.Accordion("Advanced Settings", open=False):
-                settings = []
                 settings.append(gr.Dropdown(label="Model", value="master", choices=list(models.keys())))
                 gr.Markdown("Detection Parameters")
@@ -49,6 +49,9 @@ def Upload_Gradio(gradio_components):
                 gradio_components['hyperparams'] = settings
             #Input field for aris submission
             gradio_components['input'] = File(file_types=[".aris", ".ddf"], type="binary", label="ARIS Input", file_count="multiple")

             gr.HTML("<p align='center' style='font-size: large;font-style: italic;'>Submit an .aris file to analyze result.</p>")
+            settings = []
             with gr.Accordion("Advanced Settings", open=False):
                 settings.append(gr.Dropdown(label="Model", value="master", choices=list(models.keys())))
                 gr.Markdown("Detection Parameters")
                 gradio_components['hyperparams'] = settings
+            with gr.Row():
+                settings.append(gr.CheckboxGroup(["Annotated Video", "Manual Marking", "PDF"], label="Output formats", interactive=True, value=["Annotated Video", "Manual Marking"]))
             #Input field for aris submission
             gradio_components['input'] = File(file_types=[".aris", ".ddf"], type="binary", label="ARIS Input", file_count="multiple")

inference.py CHANGED Viewed

@@ -53,44 +53,23 @@ def norm(bbox, w, h):
 def do_full_inference(dataloader, image_meter_width, image_meter_height, gp=None, config=InferenceConfig()):
     model, device = setup_model(config.weights)
-    load = False
-    save = False
-    if load:
-        with open('static/example/inference_output.json', 'r') as f:
-            json_object = json.load(f)
-            inference = json_object['inference']
-            width = json_object['width']
-            height = json_object['height']
-            image_shapes = json_object['image_shapes']
-    else:
-        inference, image_shapes, width, height = do_detection(dataloader, model, device, gp=gp)
-    if save:
-        json_object = {
-            'inference': inference,
-            'width': width,
-            'height': height,
-            'image_shapes': image_shapes
-        }
-        json_text = json.dumps(json_object, indent=4)
-        with open('static/example/inference_output.json', 'w') as f:
-            f.write(json_text)
-        return
-    outputs = do_suppression(inference, conf_thres=config.conf_thresh, iou_thres=config.nms_iou, gp=gp)
     if config.associative_tracker == TrackerType.BYTETRACK:
         low_outputs = do_suppression(inference, conf_thres=config.byte_low_conf, iou_thres=config.nms_iou, gp=gp)
         low_preds, real_width, real_height = format_predictions(image_shapes, low_outputs, width, height, gp=gp)
         high_outputs = do_suppression(inference, conf_thres=config.byte_high_conf, iou_thres=config.nms_iou, gp=gp)
         high_preds, real_width, real_height = format_predictions(image_shapes, high_outputs, width, height, gp=gp)
         results = do_associative_tracking(
             low_preds, high_preds, image_meter_width, image_meter_height,
             reverse=False, min_length=config.min_length, min_travel=config.min_travel,
@@ -98,17 +77,21 @@ def do_full_inference(dataloader, image_meter_width, image_meter_height, gp=None
             gp=gp)
     else:
         outputs = do_suppression(inference, conf_thres=config.conf_thresh, iou_thres=config.nms_iou, gp=gp)
         if config.associative_tracker == TrackerType.CONF_BOOST:
             do_confidence_boost(inference, outputs, boost_power=config.boost_power, boost_decay=config.boost_decay, gp=gp)
             outputs = do_suppression(inference, conf_thres=config.conf_thresh, iou_thres=config.nms_iou, gp=gp)
         all_preds, real_width, real_height = format_predictions(image_shapes, outputs, width, height, gp=gp)
         results = do_tracking(
             all_preds, image_meter_width, image_meter_height,
             min_length=config.min_length, min_travel=config.min_travel,
@@ -118,6 +101,9 @@ def do_full_inference(dataloader, image_meter_width, image_meter_height, gp=None
     return results
 def setup_model(weights_fp=WEIGHTS, imgsz=896, batch_size=32):
     if torch.cuda.is_available():
         device = select_device('0', batch_size=batch_size)
@@ -252,13 +238,44 @@ def format_predictions(image_shapes, outputs, width, height, gp=None, batch_size
     return all_preds, real_width, real_height
 def do_confidence_boost(inference, safe_preds, gp=None, batch_size=BATCH_SIZE, boost_power=1, boost_decay=1, verbose=True):
     """
-    Args:
-        frames_dir: a directory containing frames to be evaluated
-        image_meter_width: the width of each image, in meters (used for fish length calculation)
-        gp: a callback function which takes as input 1 parameter, (int) percent complete
-        prep_for_marking: re-index fish for manual marking output
     """
     if (gp): gp(0, "Confidence Boost...")
@@ -303,9 +320,11 @@ def do_confidence_boost(inference, safe_preds, gp=None, batch_size=BATCH_SIZE, b
                         boost_frame(safe_frame, temp_frame, dt, power=boost_scale, decay=boost_decay)
             pbar.update(1*batch_size)
 def boost_frame(safe_frame, base_frame, dt, power=1, decay=1):
     safe_boxes = safe_frame[:, :4]
     boxes = xywh2xyxy(base_frame[:, :4])  # center_x, center_y, width, height) to (x1, y1, x2, y2)≈
@@ -320,34 +339,7 @@ def boost_frame(safe_frame, base_frame, dt, power=1, decay=1):
     base_frame[:, 4] *= 1 + power*(score)*math.exp(-decay*(dt*dt-1))
     return base_frame
-def do_tracking(all_preds, image_meter_width, image_meter_height, gp=None, max_age=MAX_AGE, iou_thres=IOU_THRES, min_hits=MIN_HITS, min_length=MIN_LENGTH, min_travel=MIN_TRAVEL, verbose=True):
-    if (gp): gp(0, "Tracking...")
-    # Initialize tracker
-    clip_info = {
-        'start_frame': 0,
-        'end_frame': len(all_preds),
-        'image_meter_width': image_meter_width,
-        'image_meter_height': image_meter_height
-    }
-    tracker = Tracker(clip_info, args={ 'max_age': max_age, 'min_hits': 0, 'iou_threshold': iou_thres}, min_hits=min_hits)
-    # Run tracking
-    with tqdm(total=len(all_preds), desc="Running tracking", ncols=0, disable=not verbose) as pbar:
-        for i, key in enumerate(sorted(all_preds.keys())):
-            if gp: gp(i / len(all_preds), pbar.__str__())
-            boxes = all_preds[key]
-            if boxes is not None:
-                tracker.update(boxes)
-            else:
-                tracker.update()
-            pbar.update(1)
-    json_data = tracker.finalize(min_length=min_length, min_travel=min_travel)
-    return json_data
 def do_associative_tracking(low_preds, high_preds, image_meter_width, image_meter_height, reverse=False, gp=None, max_age=MAX_AGE, iou_thres=IOU_THRES, min_hits=MIN_HITS, min_length=MIN_LENGTH, min_travel=MIN_TRAVEL, verbose=True):
     if (gp): gp(0, "Tracking...")
@@ -379,6 +371,8 @@ def do_associative_tracking(low_preds, high_preds, image_meter_width, image_mete
     return json_data
 @patch('json.encoder.c_make_encoder', None)
 def json_dump_round_float(some_object, out_path, num_digits=4):
     """Write a json file to disk with a specified level of precision.
@@ -396,8 +390,6 @@ def json_dump_round_float(some_object, out_path, num_digits=4):
     with patch('json.encoder._make_iterencode', wraps=inner):
         return json.dump(some_object, open(out_path, 'w'), indent=2)
 def non_max_suppression(
         prediction,
         conf_thres=0.25,
@@ -406,6 +398,8 @@ def non_max_suppression(
 ):
     """Non-Maximum Suppression (NMS) on inference results to reject overlapping detections
     Returns:
          list of detections, on (n,6) tensor per image [xyxy, conf, cls]
     """
@@ -481,86 +475,3 @@ def non_max_suppression(
     return output
-def no_suppression(
-        prediction,
-        conf_thres=0.25,
-        iou_thres=0.45,
-        max_det=300,
-):
-    """Non-Maximum Suppression (NMS) on inference results to reject overlapping detections
-    Returns:
-         list of detections, on (n,6) tensor per image [xyxy, conf, cls]
-    """
-    # Checks
-    assert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0'
-    assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0'
-    if isinstance(prediction, (list, tuple)):  # YOLOv5 model in validation model, output = (inference_out, loss_out)
-        prediction = prediction[0]  # select only inference output
-    device = prediction.device
-    mps = 'mps' in device.type  # Apple MPS
-    if mps:  # MPS not fully supported yet, convert tensors to CPU before NMS
-        prediction = prediction.cpu()
-    bs = prediction.shape[0]  # batch size
-    xc = prediction[..., 4] > conf_thres  # candidates
-    # Settings
-    # min_wh = 2  # (pixels) minimum box width and height
-    max_nms = 30000  # maximum number of boxes into torchvision.ops.nms()
-    redundant = True  # require redundant detections
-    merge = False  # use merge-NMS
-    output = [torch.zeros((0, 6), device=prediction.device)] * bs
-    for xi, x in enumerate(prediction):  # image index, image inference
-        # Keep boxes that pass confidence threshold
-        x = x[xc[xi]]  # confidence
-        # If none remain process next image
-        if not x.shape[0]:
-            continue
-        # Compute conf
-        x[:, 5:] *= x[:, 4:5]  # conf = obj_conf * cls_conf
-        # Box/Mask
-        box = xywh2xyxy(x[:, :4])  # center_x, center_y, width, height) to (x1, y1, x2, y2)
-        mask = x[:, 6:]  # zero columns if no masks
-        # Detections matrix nx6 (xyxy, conf, cls)
-        conf, j = x[:, 5:6].max(1, keepdim=True)
-        x = torch.cat((box, conf, j.float(), mask), 1)[conf.view(-1) > conf_thres]
-        # Check shape
-        n = x.shape[0]  # number of boxes
-        if not n:  # no boxes
-            continue
-        x = x[x[:, 4].argsort(descending=True)[:max_nms]]  # sort by confidence and remove excess boxes
-        # Batched NMS
-        boxes  = x[:, :4]  # boxes (offset by class), scores
-        scores = x[:, 4]
-        i = torchvision.ops.nms(boxes, scores, iou_thres)  # NMS
-        i = i[:max_det]  # limit detections
-        if merge and (1 < n < 3E3):  # Merge NMS (boxes merged using weighted mean)
-            # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)
-            iou = box_iou(boxes[i], boxes) > iou_thres  # iou matrix
-            weights = iou * scores[None]  # box weights
-            x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True)  # merged boxes
-            if redundant:
-                i = i[iou.sum(1) > 1]  # require redundancy
-        output[xi] = x[i]
-        if mps:
-            output[xi] = output[xi].to(device)
-        logging = False
-    return output

 def do_full_inference(dataloader, image_meter_width, image_meter_height, gp=None, config=InferenceConfig()):
+    # Set up model
     model, device = setup_model(config.weights)
+    # Detect boxes in frames
+    inference, image_shapes, width, height = do_detection(dataloader, model, device, gp=gp)
     if config.associative_tracker == TrackerType.BYTETRACK:
+        # Find low confidence detections
         low_outputs = do_suppression(inference, conf_thres=config.byte_low_conf, iou_thres=config.nms_iou, gp=gp)
         low_preds, real_width, real_height = format_predictions(image_shapes, low_outputs, width, height, gp=gp)
+        # Find high confidence detections
         high_outputs = do_suppression(inference, conf_thres=config.byte_high_conf, iou_thres=config.nms_iou, gp=gp)
         high_preds, real_width, real_height = format_predictions(image_shapes, high_outputs, width, height, gp=gp)
+        # Perform associative tracking (ByteTrack)
         results = do_associative_tracking(
             low_preds, high_preds, image_meter_width, image_meter_height,
             reverse=False, min_length=config.min_length, min_travel=config.min_travel,
             gp=gp)
     else:
+        # Find confident detections
         outputs = do_suppression(inference, conf_thres=config.conf_thresh, iou_thres=config.nms_iou, gp=gp)
         if config.associative_tracker == TrackerType.CONF_BOOST:
+            # Boost confidence based on found confident detections
             do_confidence_boost(inference, outputs, boost_power=config.boost_power, boost_decay=config.boost_decay, gp=gp)
+            # Find confident detections from boosted list
             outputs = do_suppression(inference, conf_thres=config.conf_thresh, iou_thres=config.nms_iou, gp=gp)
+        # Format confident detections
         all_preds, real_width, real_height = format_predictions(image_shapes, outputs, width, height, gp=gp)
+        # Perform SORT tracking
         results = do_tracking(
             all_preds, image_meter_width, image_meter_height,
             min_length=config.min_length, min_travel=config.min_travel,
     return results
 def setup_model(weights_fp=WEIGHTS, imgsz=896, batch_size=32):
     if torch.cuda.is_available():
         device = select_device('0', batch_size=batch_size)
     return all_preds, real_width, real_height
+# ---------------------------------------- TRACKING ------------------------------------------
+def do_tracking(all_preds, image_meter_width, image_meter_height, gp=None, max_age=MAX_AGE, iou_thres=IOU_THRES, min_hits=MIN_HITS, min_length=MIN_LENGTH, min_travel=MIN_TRAVEL, verbose=True):
+    """
+    Perform SORT tracking based on formatted detections
+    """
+    if (gp): gp(0, "Tracking...")
+    # Initialize tracker
+    clip_info = {
+        'start_frame': 0,
+        'end_frame': len(all_preds),
+        'image_meter_width': image_meter_width,
+        'image_meter_height': image_meter_height
+    }
+    tracker = Tracker(clip_info, args={ 'max_age': max_age, 'min_hits': 0, 'iou_threshold': iou_thres}, min_hits=min_hits)
+    # Run tracking
+    with tqdm(total=len(all_preds), desc="Running tracking", ncols=0, disable=not verbose) as pbar:
+        for i, key in enumerate(sorted(all_preds.keys())):
+            if gp: gp(i / len(all_preds), pbar.__str__())
+            boxes = all_preds[key]
+            if boxes is not None:
+                tracker.update(boxes)
+            else:
+                tracker.update()
+            pbar.update(1)
+    json_data = tracker.finalize(min_length=min_length, min_travel=min_travel)
+    return json_data
 def do_confidence_boost(inference, safe_preds, gp=None, batch_size=BATCH_SIZE, boost_power=1, boost_decay=1, verbose=True):
     """
+    Takes in the full YOLO detections 'inference' and formatted non-max suppressed detections 'safe_preds'
+    and boosts the confidence of detections around identified fish that are close in space in neighbouring frames.
     """
     if (gp): gp(0, "Confidence Boost...")
                         boost_frame(safe_frame, temp_frame, dt, power=boost_scale, decay=boost_decay)
             pbar.update(1*batch_size)
 def boost_frame(safe_frame, base_frame, dt, power=1, decay=1):
+    """
+    Boosts confidence of base_frame based on confidence in safe_frame, iou, and the time difference between frames.
+    """
     safe_boxes = safe_frame[:, :4]
     boxes = xywh2xyxy(base_frame[:, :4])  # center_x, center_y, width, height) to (x1, y1, x2, y2)≈
     base_frame[:, 4] *= 1 + power*(score)*math.exp(-decay*(dt*dt-1))
     return base_frame
+# ByteTrack
 def do_associative_tracking(low_preds, high_preds, image_meter_width, image_meter_height, reverse=False, gp=None, max_age=MAX_AGE, iou_thres=IOU_THRES, min_hits=MIN_HITS, min_length=MIN_LENGTH, min_travel=MIN_TRAVEL, verbose=True):
     if (gp): gp(0, "Tracking...")
     return json_data
 @patch('json.encoder.c_make_encoder', None)
 def json_dump_round_float(some_object, out_path, num_digits=4):
     """Write a json file to disk with a specified level of precision.
     with patch('json.encoder._make_iterencode', wraps=inner):
         return json.dump(some_object, open(out_path, 'w'), indent=2)
 def non_max_suppression(
         prediction,
         conf_thres=0.25,
 ):
     """Non-Maximum Suppression (NMS) on inference results to reject overlapping detections
+    NOTE: SIMPLIFIED FOR SINGLE CLASS DETECTION
     Returns:
          list of detections, on (n,6) tensor per image [xyxy, conf, cls]
     """
     return output

main.py CHANGED Viewed

@@ -7,7 +7,7 @@ from dataloader import create_dataloader_aris
 from inference import do_full_inference, json_dump_round_float
 from visualizer import generate_video_batches
-def predict_task(filepath, config, gradio_progress=None):
     """
     Main processing task to be run in gradio
         - Writes aris frames to dirname(filepath)/frames/{i}.jpg
@@ -17,12 +17,11 @@ def predict_task(filepath, config, gradio_progress=None):
         - Zips all results to dirname(filepath)/{filename}_results.zip
     Args:
         filepath (str): path to aris file
-    TODO: Separate into subtasks in different queues; have a GPU-only queue.
     """
     if (gradio_progress): gradio_progress(0, "In task...")
     print("Cuda available in task?", torch.cuda.is_available())
     dirname = os.path.dirname(filepath)
     filename = os.path.basename(filepath).replace(".aris","").replace(".ddf","")
     results_filepath = os.path.join(dirname, f"{filename}_results.json")
@@ -31,11 +30,11 @@ def predict_task(filepath, config, gradio_progress=None):
     zip_filepath = os.path.join(dirname, f"{filename}_results.zip")
     os.makedirs(dirname, exist_ok=True)
-    # create dataloader
     if (gradio_progress): gradio_progress(0, "Initializing Dataloader...")
     dataloader, dataset = create_dataloader_aris(filepath, BEAM_WIDTH_DIR, None)
-    # extract aris/didson info. didson does not yet have pixel-meter info
     if ".ddf" in filepath:
         image_meter_width = -1
         image_meter_height = -1
@@ -47,28 +46,30 @@ def predict_task(filepath, config, gradio_progress=None):
     # run detection + tracking
     results = do_full_inference(dataloader, image_meter_width, image_meter_height, gp=gradio_progress, config=config)
-    # re-index results if desired - this should be done before writing the file
     results = prep_for_mm(results)
     results = add_metadata_to_result(filepath, results)
     results['metadata']['hyperparameters'] = config.to_dict()
-    # write output to disk
     json_dump_round_float(results, results_filepath)
-    if dataset.didson.info['version'][3] == 5: # ARIS only
         create_manual_marking(results, out_path=marking_filepath)
-    # generate a video with tracking results
-    generate_video_batches(dataset.didson, results, frame_rate, video_filepath,
                    image_meter_width=image_meter_width, image_meter_height=image_meter_height, gp=gradio_progress)
-    # zip up the results
     with ZipFile(zip_filepath, 'w') as z:
         for file in [results_filepath, marking_filepath, video_filepath, os.path.join(dirname, 'bg_start.jpg')]:
             if os.path.exists(file):
                 z.write(file, arcname=os.path.basename(file))
-    # release GPU memory
     torch.cuda.empty_cache()
     return results, results_filepath, zip_filepath, video_filepath, marking_filepath

 from inference import do_full_inference, json_dump_round_float
 from visualizer import generate_video_batches
+def predict_task(filepath, config, output_formats=[], gradio_progress=None):
     """
     Main processing task to be run in gradio
         - Writes aris frames to dirname(filepath)/frames/{i}.jpg
         - Zips all results to dirname(filepath)/{filename}_results.zip
     Args:
         filepath (str): path to aris file
     """
     if (gradio_progress): gradio_progress(0, "In task...")
     print("Cuda available in task?", torch.cuda.is_available())
+    # Set up save directory and define file names
     dirname = os.path.dirname(filepath)
     filename = os.path.basename(filepath).replace(".aris","").replace(".ddf","")
     results_filepath = os.path.join(dirname, f"{filename}_results.json")
     zip_filepath = os.path.join(dirname, f"{filename}_results.zip")
     os.makedirs(dirname, exist_ok=True)
+    # Create dataloader
     if (gradio_progress): gradio_progress(0, "Initializing Dataloader...")
     dataloader, dataset = create_dataloader_aris(filepath, BEAM_WIDTH_DIR, None)
+    # Extract aris/didson info. Didson does not yet have pixel-meter info
     if ".ddf" in filepath:
         image_meter_width = -1
         image_meter_height = -1
     # run detection + tracking
     results = do_full_inference(dataloader, image_meter_width, image_meter_height, gp=gradio_progress, config=config)
+    # Generate Metadata and extra inference information
     results = prep_for_mm(results)
     results = add_metadata_to_result(filepath, results)
     results['metadata']['hyperparameters'] = config.to_dict()
+    # Create JSON result file
     json_dump_round_float(results, results_filepath)
+    # Create Manual Marking file
+    if "Manual Marking" in output_formats and dataset.didson.info['version'][3] == 5:
         create_manual_marking(results, out_path=marking_filepath)
+    # Create Annotated Video
+    if "Annotated Video" in output_formats:
+        generate_video_batches(dataset.didson, results, frame_rate, video_filepath,
                    image_meter_width=image_meter_width, image_meter_height=image_meter_height, gp=gradio_progress)
+    # Zip up the results
     with ZipFile(zip_filepath, 'w') as z:
         for file in [results_filepath, marking_filepath, video_filepath, os.path.join(dirname, 'bg_start.jpg')]:
             if os.path.exists(file):
                 z.write(file, arcname=os.path.basename(file))
+    # Release GPU memory
     torch.cuda.empty_cache()
     return results, results_filepath, zip_filepath, video_filepath, marking_filepath