Spaces:

Epitech
/

Scarecrow

Sleeping

App Files Files Community

Rgascoin commited on May 22, 2023

Commit

f2dfae0

1 Parent(s): edcb323

add project

Browse files

Files changed (5) hide show

README.md +1 -1
app.py +134 -4
coco.names +80 -0
output_frames/.gitkeep +0 -0
yolov3.cfg +789 -0

README.md CHANGED Viewed

@@ -10,4 +10,4 @@ pinned: false
 license: mit
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 license: mit
 ---
+Inside original_app there is the base application that was design to work with a back-end and a real scarecrow equipped with a camera.

app.py CHANGED Viewed

@@ -1,7 +1,137 @@
 import gradio as gr
-def greet(name):
-    return "Hello " + name + "!!"
-iface = gr.Interface(fn=greet, inputs="text", outputs="text")
-iface.launch()

+import cv2
 import gradio as gr
+import numpy as np
+import os
+import datetime
+# Load YOLO model
+net = cv2.dnn.readNetFromDarknet('yolov3.cfg', 'yolov3.weights')
+# Set classes
+classes = []
+with open('coco.names', 'r') as f:
+    classes = [line.strip() for line in f.readlines()]
+# Function to detect objects in a video frame
+def detect_birds(video_file):
+    cap = cv2.VideoCapture(video_file)
+    frame_count = 0
+    output_frames = []
+    # Variables for object count and duration
+    object_counts = {class_name: 0 for class_name in classes}
+    object_durations = {class_name: datetime.timedelta() for class_name in classes}
+    last_frame_time = None
+    while True:
+        ret, frame = cap.read()
+        if not ret:
+            break
+        if frame is None:
+            continue
+        height, width, _ = frame.shape
+        # Create a blob from the frame and pass it through the network
+        blob = cv2.dnn.blobFromImage(frame, 1 / 255.0, (416, 416), swapRB=True, crop=False)
+        net.setInput(blob)
+        layer_names = net.getLayerNames()
+        output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]
+        detections = net.forward(output_layers)
+        # Process detections
+        boxes = []
+        confidences = []
+        class_ids = []
+        for detection in detections:
+            for detection_result in detection:
+                scores = detection_result[5:]
+                class_id = np.argmax(scores)
+                confidence = scores[class_id]
+                if confidence > 0.5:
+                    center_x = int(detection_result[0] * width)
+                    center_y = int(detection_result[1] * height)
+                    w = int(detection_result[2] * width)
+                    h = int(detection_result[3] * height)
+                    x = int(center_x - w / 2)
+                    y = int(center_y - h / 2)
+                    boxes.append([x, y, w, h])
+                    confidences.append(float(confidence))
+                    class_ids.append(class_id)
+        # Apply non-maxima suppression to eliminate redundant overlapping boxes
+        indices = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
+        # Draw bounding boxes and labels
+        if len(indices) > 0:
+            for i in indices.flatten():
+                x, y, w, h = boxes[i]
+                label = classes[class_ids[i]]
+                confidence = confidences[i]
+                cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
+                cv2.putText(frame, f'{label} {confidence:.2f}', (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5,
+                            (0, 255, 0), 2)
+                # Update object count and duration
+                object_counts[label] += 1
+                if last_frame_time is not None:
+                    duration = datetime.datetime.now() - last_frame_time
+                    object_durations[label] += duration
+                last_frame_time = datetime.datetime.now()
+        # Save the frame with bounding boxes as an image
+        output_frame_path = f'output_frames/frame_{frame_count:04d}.jpg'
+        cv2.imwrite(output_frame_path, frame)
+        output_frames.append(output_frame_path)
+        frame_count += 1
+    cap.release()
+    # Combine the output frames into a video file
+    output_video_path = 'output.mp4'
+    if frame_count > 0:
+        frame = cv2.imread(output_frames[0])
+        if frame is not None:
+            height, width, _ = frame.shape
+            fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+            writer = cv2.VideoWriter(output_video_path, fourcc, 30, (width, height))
+            for frame_path in output_frames:
+                frame = cv2.imread(frame_path)
+                if frame is not None:
+                    writer.write(frame)
+            writer.release()
+        else:
+            output_video_path = None
+    else:
+        output_video_path = None
+    cv2.destroyAllWindows()
+    # Remove the output frames directory
+    for frame_path in output_frames:
+        os.remove(frame_path)
+    # Format object count and duration as text
+    count_text = '\n'.join([f'{label}: {count}' for label, count in object_counts.items() if count > 0])
+    duration_text = '\n'.join([f'{label}: {str(duration).split(".")[0]}' for label, duration in object_durations.items() if duration.total_seconds() > 0])
+    return output_video_path, count_text, duration_text
+# Create a Gradio interface
+inputs = gr.inputs.Video(label='Input Video')
+outputs = [
+    gr.outputs.Video(label='Output Video'),
+    gr.outputs.Textbox(label='Object Count', type='text'),
+    gr.outputs.Textbox(label='Duration', type='text')
+]
+gr.Interface(fn=detect_birds, inputs=inputs, outputs=outputs, capture_session=True, share=True).launch()

coco.names ADDED Viewed

	@@ -0,0 +1,80 @@

+person
+bicycle
+car
+motorbike
+aeroplane
+bus
+train
+truck
+boat
+traffic light
+fire hydrant
+stop sign
+parking meter
+bench
+bird
+cat
+dog
+horse
+sheep
+cow
+elephant
+bear
+zebra
+giraffe
+backpack
+umbrella
+handbag
+tie
+suitcase
+frisbee
+skis
+snowboard
+sports ball
+kite
+baseball bat
+baseball glove
+skateboard
+surfboard
+tennis racket
+bottle
+wine glass
+cup
+fork
+knife
+spoon
+bowl
+banana
+apple
+sandwich
+orange
+broccoli
+carrot
+hot dog
+pizza
+donut
+cake
+chair
+sofa
+pottedplant
+bed
+diningtable
+toilet
+tvmonitor
+laptop
+mouse
+remote
+keyboard
+cell_phone
+microwave
+oven
+toaster
+sink
+refrigerator
+book
+clock
+vase
+scissors
+teddy bear
+hair drier
+toothbrush

output_frames/.gitkeep ADDED Viewed

File without changes

yolov3.cfg ADDED Viewed

	@@ -0,0 +1,789 @@

+[net]
+# Testing
+# batch=1
+# subdivisions=1
+# Training
+batch=64
+subdivisions=16
+width=608
+height=608
+channels=3
+momentum=0.9
+decay=0.0005
+angle=0
+saturation = 1.5
+exposure = 1.5
+hue=.1
+learning_rate=0.001
+burn_in=1000
+max_batches = 500200
+policy=steps
+steps=400000,450000
+scales=.1,.1
+[convolutional]
+batch_normalize=1
+filters=32
+size=3
+stride=1
+pad=1
+activation=leaky
+# Downsample
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=2
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=32
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=leaky
+[shortcut]
+from=-3
+activation=linear
+# Downsample
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=2
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=leaky
+[shortcut]
+from=-3
+activation=linear
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=leaky
+[shortcut]
+from=-3
+activation=linear
+# Downsample
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=2
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+[shortcut]
+from=-3
+activation=linear
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+[shortcut]
+from=-3
+activation=linear
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+[shortcut]
+from=-3
+activation=linear
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+[shortcut]
+from=-3
+activation=linear
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+[shortcut]
+from=-3
+activation=linear
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+[shortcut]
+from=-3
+activation=linear
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+[shortcut]
+from=-3
+activation=linear
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+[shortcut]
+from=-3
+activation=linear
+# Downsample
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=2
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+[shortcut]
+from=-3
+activation=linear
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+[shortcut]
+from=-3
+activation=linear
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+[shortcut]
+from=-3
+activation=linear
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+[shortcut]
+from=-3
+activation=linear
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+[shortcut]
+from=-3
+activation=linear
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+[shortcut]
+from=-3
+activation=linear
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+[shortcut]
+from=-3
+activation=linear
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+[shortcut]
+from=-3
+activation=linear
+# Downsample
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=2
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=1
+pad=1
+activation=leaky
+[shortcut]
+from=-3
+activation=linear
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=1
+pad=1
+activation=leaky
+[shortcut]
+from=-3
+activation=linear
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=1
+pad=1
+activation=leaky
+[shortcut]
+from=-3
+activation=linear
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=1
+pad=1
+activation=leaky
+[shortcut]
+from=-3
+activation=linear
+######################
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=1024
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=1024
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=1024
+activation=leaky
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=255
+activation=linear
+[yolo]
+mask = 6,7,8
+anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
+classes=80
+num=9
+jitter=.3
+ignore_thresh = .7
+truth_thresh = 1
+random=1
+[route]
+layers = -4
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+[upsample]
+stride=2
+[route]
+layers = -1, 61
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=leaky
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=255
+activation=linear
+[yolo]
+mask = 3,4,5
+anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
+classes=80
+num=9
+jitter=.3
+ignore_thresh = .7
+truth_thresh = 1
+random=1
+[route]
+layers = -4
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+[upsample]
+stride=2
+[route]
+layers = -1, 36
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=leaky
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=255
+activation=linear
+[yolo]
+mask = 0,1,2
+anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
+classes=80
+num=9
+jitter=.3
+ignore_thresh = .7
+truth_thresh = 1
+random=1