Spaces:

brxerq
/

ShelvesDetection

Sleeping

App Files Files Community

brxerq commited on May 18

Commit

2230f78

•

1 Parent(s): cd3bb2c

Update app.py

Browse files

Files changed (1) hide show

app.py +82 -182

app.py CHANGED Viewed

@@ -1,194 +1,94 @@
 import os
-import cv2
 import numpy as np
-import importlib.util
-import gradio as gr
-# Function to load the model
-def load_model(modeldir, graph, labels, edgetpu):
-    pkg = importlib.util.find_spec('tflite_runtime')
-    if pkg:
-        from tflite_runtime.interpreter import Interpreter
-        if edgetpu:
-            from tflite_runtime.interpreter import load_delegate
-    else:
-        from tensorflow.lite.python.interpreter import Interpreter
-        if edgetpu:
-            from tensorflow.lite.python.interpreter import load_delegate
-    if edgetpu and (graph == 'detect.tflite'):
-        graph = 'edgetpu.tflite'
-    PATH_TO_CKPT = os.path.join(modeldir, graph)
-    PATH_TO_LABELS = os.path.join(modeldir, labels)
-    with open(PATH_TO_LABELS, 'r') as f:
-        labels = [line.strip() for line in f.readlines()]
-    if labels[0] == '???':
-        del(labels[0])
-    if edgetpu:
-        interpreter = Interpreter(model_path=PATH_TO_CKPT,
-                                  experimental_delegates=[load_delegate('libedgetpu.so.1.0')])
-    else:
-        interpreter = Interpreter(model_path=PATH_TO_CKPT)
-    interpreter.allocate_tensors()
-    return interpreter, labels
-# Function to detect objects
-def detect_objects(interpreter, labels, input_data, min_conf_threshold):
-    input_details = interpreter.get_input_details()
-    output_details = interpreter.get_output_details()
-    height = input_details[0]['shape'][1]
-    width = input_details[0]['shape'][2]
-    floating_model = (input_details[0]['dtype'] == np.float32)
-    input_mean = 127.5
-    input_std = 127.5
-    if floating_model:
-        input_data = (np.float32(input_data) - input_mean) / input_std
     interpreter.set_tensor(input_details[0]['index'], input_data)
     interpreter.invoke()
-    boxes = interpreter.get_tensor(output_details[0]['index'])[0]
-    classes = interpreter.get_tensor(output_details[1]['index'])[0]
-    scores = interpreter.get_tensor(output_details[2]['index'])[0]
-    detections = []
     for i in range(len(scores)):
-        if ((scores[i] > min_conf_threshold) and (scores[i] <= 1.0)):
-            detections.append({
-                'class': labels[int(classes[i])],
-                'score': scores[i],
-                'bbox': boxes[i]
-            })
-    return detections
-# Function to process images
-def process_image(image, interpreter, labels, min_conf_threshold):
-    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
-    imH, imW, _ = image.shape
-    input_details = interpreter.get_input_details()
-    height = input_details[0]['shape'][1]
-    width = input_details[0]['shape'][2]
-    image_resized = cv2.resize(image_rgb, (width, height))
-    input_data = np.expand_dims(image_resized, axis=0)
-    detections = detect_objects(interpreter, labels, input_data, min_conf_threshold)
-    for detection in detections:
-        ymin = int(max(1, (detection['bbox'][0] * imH)))
-        xmin = int(max(1, (detection['bbox'][1] * imW)))
-        ymax = int(min(imH, (detection['bbox'][2] * imH)))
-        xmax = int(min(imW, (detection['bbox'][3] * imW)))
-        cv2.rectangle(image, (xmin, ymin), (xmax, ymax), (10, 255, 0), 2)
-        label = '%s: %d%%' % (detection['class'], int(detection['score']*100))
-        labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2)
-        label_ymin = max(ymin, labelSize[1] + 10)
-        cv2.rectangle(image, (xmin, label_ymin-labelSize[1]-10), (xmin+labelSize[0], label_ymin+baseLine-10), (255, 255, 255), cv2.FILLED)
-        cv2.putText(image, label, (xmin, label_ymin-7), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2)
-    return image
-# Function to process videos
-def process_video(video_path, interpreter, labels, min_conf_threshold):
-    video = cv2.VideoCapture(video_path)
-    imW = video.get(cv2.CAP_PROP_FRAME_WIDTH)
-    imH = video.get(cv2.CAP_PROP_FRAME_HEIGHT)
-    output_path = "output_" + os.path.basename(video_path)
-    out = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*'mp4v'), 20, (int(imW), int(imH)))
-    while video.isOpened():
-        ret, frame = video.read()
-        if not ret:
-            break
-        image_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-        input_details = interpreter.get_input_details()
-        height = input_details[0]['shape'][1]
-        width = input_details[0]['shape'][2]
-        image_resized = cv2.resize(image_rgb, (width, height))
-        input_data = np.expand_dims(image_resized, axis=0)
-        detections = detect_objects(interpreter, labels, input_data, min_conf_threshold)
-        for detection in detections:
-            ymin = int(max(1, (detection['bbox'][0] * imH)))
-            xmin = int(max(1, (detection['bbox'][1] * imW)))
-            ymax = int(min(imH, (detection['bbox'][2] * imH)))
-            xmax = int(min(imW, (detection['bbox'][3] * imW)))
-            cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (10, 255, 0), 2)
-            label = '%s: %d%%' % (detection['class'], int(detection['score']*100))
-            labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2)
-            label_ymin = max(ymin, labelSize[1] + 10)
-            cv2.rectangle(frame, (xmin, label_ymin-labelSize[1]-10), (xmin+labelSize[0], label_ymin+baseLine-10), (255, 255, 255), cv2.FILLED)
-            cv2.putText(frame, label, (xmin, label_ymin-7), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2)
-        out.write(frame)
-    video.release()
-    out.release()
-    return output_path
-# Gradio interface
-def predict_image(image, modeldir, graph, labels, threshold, edgetpu):
-    interpreter, labels = load_model(modeldir, graph, labels, edgetpu)
-    min_conf_threshold = float(threshold)
-    result_image = process_image(image, interpreter, labels, min_conf_threshold)
-    return result_image
-def predict_video(video, modeldir, graph, labels, threshold, edgetpu):
-    video_path = "temp_video.mp4"
-    with open(video_path, "wb") as f:
-        f.write(video.read())
-    interpreter, labels = load_model(modeldir, graph, labels, edgetpu)
-    min_conf_threshold = float(threshold)
-    output_path = process_video(video_path, interpreter, labels, min_conf_threshold)
-    with open(output_path, "rb") as f:
-        return f.read()
-iface = gr.Blocks()
-with iface:
-    gr.Markdown("# Object Detection")
-    gr.Markdown("Upload an image or a video to detect objects using a TFLite model.")
-    with gr.Tabs():
-        with gr.TabItem("Image Detection"):
-            img_input = gr.Image(type="numpy", label="Upload an Image")
-            model_dir = gr.Textbox(label="Model Directory", value="model/")
-            graph_name = gr.Textbox(label="Graph Name", value="detect.tflite")
-            labels_name = gr.Textbox(label="Labels Name", value="labelmap.txt")
-            threshold = gr.Slider(label="Confidence Threshold", minimum=0, maximum=1, value=0.5)
-            edgetpu = gr.Checkbox(label="Use Edge TPU")
-            img_output = gr.Image(type="numpy", label="Detected Image")
-            img_submit = gr.Button("Submit")
-            img_submit.click(
-                predict_image,
-                inputs=[img_input, model_dir, graph_name, labels_name, threshold, edgetpu],
-                outputs=img_output,
-                show_progress=True
-            )
-        with gr.TabItem("Video Detection"):
-            video_input = gr.Video(type="file", label="Upload a Video")
-            model_dir = gr.Textbox(label="Model Directory", value="model/")
-            graph_name = gr.Textbox(label="Graph Name", value="detect.tflite")
-            labels_name = gr.Textbox(label="Labels Name", value="labelmap.txt")
-            threshold = gr.Slider(label="Confidence Threshold", minimum=0, maximum=1, value=0.5)
-            edgetpu = gr.Checkbox(label="Use Edge TPU")
-            video_output = gr.Video(label="Detected Video")
-            video_submit = gr.Button("Submit")
-            video_submit.click(
-                predict_video,
-                inputs=[video_input, model_dir, graph_name, labels_name, threshold, edgetpu],
-                outputs=video_output,
-                show_progress=True
-            )
-iface.launch()

+import streamlit as st
 import os
 import numpy as np
+import cv2
+from PIL import Image
+import tempfile
+# TensorFlow imports
+from tensorflow.lite.python.interpreter import Interpreter
+if use_TPU:
+    from tensorflow.lite.python.interpreter import load_delegate
+# Setup the model and labels
+MODEL_NAME = 'model'
+GRAPH_NAME = 'detect.tflite'
+LABELMAP_NAME = 'labelmap.txt'
+min_conf_threshold = 0.5
+use_TPU = False  # Change this based on your needs
+PATH_TO_CKPT = os.path.join('model', GRAPH_NAME)
+PATH_TO_LABELS = os.path.join('model', LABELMAP_NAME)
+# Load labels
+with open(PATH_TO_LABELS, 'r') as f:
+    labels = [line.strip() for line in f.readlines()]
+if labels[0] == '???':
+    del(labels[0])
+# Load model
+interpreter = Interpreter(model_path=PATH_TO_CKPT)
+interpreter.allocate_tensors()
+input_details = interpreter.get_input_details()
+output_details = interpreter.get_output_details()
+height = input_details[0]['shape'][1]
+width = input_details[0]['shape'][2]
+# Streamlit interface
+st.title('Object Detection System')
+st.sidebar.title('Settings')
+uploaded_file = st.sidebar.file_uploader("Choose an image or video file", type=['jpg', 'png', 'jpeg', 'mp4'])
+def detect_objects(image):
+    # Prepare image for detection
+    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+    image_resized = cv2.resize(image_rgb, (width, height))
+    input_data = np.expand_dims(image_resized, axis=0)
+    input_data = (np.float32(input_data) - 127.5) / 127.5  # Normalize
+    # Perform detection
     interpreter.set_tensor(input_details[0]['index'], input_data)
     interpreter.invoke()
+    # Retrieve detection results
+    boxes = interpreter.get_tensor(output_details[0]['index'])[0]  # Bounding box coordinates of detected objects
+    classes = interpreter.get_tensor(output_details[1]['index'])[0]  # Class index of detected objects
+    scores = interpreter.get_tensor(output_details[2]['index'])[0]  # Confidence of detected objects
     for i in range(len(scores)):
+        if scores[i] > min_conf_threshold and scores[i] <= 1.0:
+            # Draw bounding boxes and labels on the image
+            ymin, xmin, ymax, xmax = boxes[i]
+            (left, right, top, bottom) = (xmin * imW, xmax * imW, ymin * imH, ymax * imH)
+            cv2.rectangle(image, (int(left), int(top)), (int(right), int(bottom)), (10, 255, 0), 4)
+            object_name = labels[int(classes[i])]
+            label = '%s: %d%%' % (object_name, int(scores[i]*100))
+            cv2.putText(image, label, (int(left), int(top)-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)
+    return image
+if uploaded_file is not None:
+    file_bytes = np.asarray(bytearray(uploaded_file.read()), dtype=np.uint8)
+    if uploaded_file.type == "video/mp4":
+        # Handle video upload
+        tfile = tempfile.NamedTemporaryFile(delete=False)
+        tfile.write(uploaded_file.read())
+        cap = cv2.VideoCapture(tfile.name)
+        stframe = st.empty()
+        while cap.isOpened():
+            ret, frame = cap.read()
+            if not ret:
+                break
+            frame = detect_objects(frame)
+            frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
+            stframe.image(frame)
+    else:
+        # Handle image upload
+        image = cv2.imdecode(file_bytes, cv2.IMREAD_COLOR)
+        image = detect_objects(image)
+        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
+        st.image(image, use_column_width=True)