Spaces:

brxerq
/

ShelvesDetection

Sleeping

App Files Files Community

brxerq commited on May 19, 2024

Commit

2e7653f

verified ·

1 Parent(s): 8423797

Update model_3.py

Browse files

Files changed (1) hide show

model_3.py +74 -7

model_3.py CHANGED Viewed

@@ -1,12 +1,11 @@
-# model_3.py
 import os
 import cv2
 import numpy as np
 import importlib.util
-from PIL import Image
 import gradio as gr
-from common_detection import perform_detection
 MODEL_DIR = 'model_3'
 GRAPH_NAME = 'detect.tflite'
 LABELMAP_NAME = 'labelmap.txt'
@@ -14,18 +13,22 @@ LABELMAP_NAME = 'labelmap.txt'
 pkg = importlib.util.find_spec('tflite_runtime')
 if pkg:
     from tflite_runtime.interpreter import Interpreter
 else:
     from tensorflow.lite.python.interpreter import Interpreter
 PATH_TO_CKPT = os.path.join(MODEL_DIR, GRAPH_NAME)
 PATH_TO_LABELS = os.path.join(MODEL_DIR, LABELMAP_NAME)
 with open(PATH_TO_LABELS, 'r') as f:
     labels = [line.strip() for line in f.readlines()]
 if labels[0] == '???':
     del(labels[0])
 interpreter = Interpreter(model_path=PATH_TO_CKPT)
 interpreter.allocate_tensors()
@@ -35,10 +38,57 @@ height = input_details[0]['shape'][1]
 width = input_details[0]['shape'][2]
 floating_model = (input_details[0]['dtype'] == np.float32)
 def detect_image(input_image):
     image = np.array(input_image)
-    resized_image = cv2.resize(image, (640, 640))
-    result_image = perform_detection(resized_image, interpreter, labels, input_details, output_details, height, width, floating_model)
     return Image.fromarray(result_image)
 def detect_video(input_video):
@@ -50,8 +100,8 @@ def detect_video(input_video):
         if not ret:
             break
-        resized_frame = cv2.resize(frame, (640, 640))
-        result_frame = perform_detection(resized_frame, interpreter, labels, input_details, output_details, height, width, floating_model)
         frames.append(result_frame)
     cap.release()
@@ -70,3 +120,20 @@ def detect_video(input_video):
     out.release()
     return output_video_path

 import os
 import cv2
 import numpy as np
 import importlib.util
 import gradio as gr
+from PIL import Image
+# Load the TensorFlow Lite model
 MODEL_DIR = 'model_3'
 GRAPH_NAME = 'detect.tflite'
 LABELMAP_NAME = 'labelmap.txt'
 pkg = importlib.util.find_spec('tflite_runtime')
 if pkg:
     from tflite_runtime.interpreter import Interpreter
+    from tflite_runtime.interpreter import load_delegate
 else:
     from tensorflow.lite.python.interpreter import Interpreter
+    from tensorflow.lite.python.interpreter import load_delegate
 PATH_TO_CKPT = os.path.join(MODEL_DIR, GRAPH_NAME)
 PATH_TO_LABELS = os.path.join(MODEL_DIR, LABELMAP_NAME)
+# Load the label map
 with open(PATH_TO_LABELS, 'r') as f:
     labels = [line.strip() for line in f.readlines()]
 if labels[0] == '???':
     del(labels[0])
+# Load the TensorFlow Lite model
 interpreter = Interpreter(model_path=PATH_TO_CKPT)
 interpreter.allocate_tensors()
 width = input_details[0]['shape'][2]
 floating_model = (input_details[0]['dtype'] == np.float32)
+input_mean = 127.5
+input_std = 127.5
+outname = output_details[0]['name']
+if ('StatefulPartitionedCall' in outname):
+    boxes_idx, classes_idx, scores_idx = 1, 3, 0
+else:
+    boxes_idx, classes_idx, scores_idx = 0, 1, 2
+def perform_detection(image, interpreter, labels):
+    imH, imW, _ = image.shape
+    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+    image_resized = cv2.resize(image_rgb, (width, height))
+    input_data = np.expand_dims(image_resized, axis=0)
+    if floating_model:
+        input_data = (np.float32(input_data) - input_mean) / input_std
+    interpreter.set_tensor(input_details[0]['index'], input_data)
+    interpreter.invoke()
+    boxes = interpreter.get_tensor(output_details[boxes_idx]['index'])[0]
+    classes = interpreter.get_tensor(output_details[classes_idx]['index'])[0]
+    scores = interpreter.get_tensor(output_details[scores_idx]['index'])[0]
+    detections = []
+    for i in range(len(scores)):
+        if ((scores[i] > 0.5) and (scores[i] <= 1.0)):
+            ymin = int(max(1, (boxes[i][0] * imH)))
+            xmin = int(max(1, (boxes[i][1] * imW)))
+            ymax = int(min(imH, (boxes[i][2] * imH)))
+            xmax = int(min(imW, (boxes[i][3] * imW)))
+            cv2.rectangle(image, (xmin, ymin), (xmax, ymax), (10, 255, 0), 2)
+            object_name = labels[int(classes[i])]
+            label = '%s: %d%%' % (object_name, int(scores[i] * 100))
+            labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2)
+            label_ymin = max(ymin, labelSize[1] + 10)
+            cv2.rectangle(image, (xmin, label_ymin - labelSize[1] - 10), (xmin + labelSize[0], label_ymin + baseLine - 10), (255, 255, 255), cv2.FILLED)
+            cv2.putText(image, label, (xmin, label_ymin - 7), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2)
+            detections.append([object_name, scores[i], xmin, ymin, xmax, ymax])
+    return image
+def resize_image(image, size=640):
+    return cv2.resize(image, (size, size))
 def detect_image(input_image):
     image = np.array(input_image)
+    resized_image = resize_image(image, size=640)  # Resize input image
+    result_image = perform_detection(resized_image, interpreter, labels)
     return Image.fromarray(result_image)
 def detect_video(input_video):
         if not ret:
             break
+        resized_frame = resize_image(frame, size=640)  # Resize each frame
+        result_frame = perform_detection(resized_frame, interpreter, labels)
         frames.append(result_frame)
     cap.release()
     out.release()
     return output_video_path
+app = gr.Blocks()
+with app:
+    with gr.Tab("Image Detection"):
+        gr.Markdown("Upload an image for object detection")
+        image_input = gr.Image(type="pil", label="Upload an image")
+        image_output = gr.Image(type="pil", label="Detection Result")
+        gr.Button("Submit").click(fn=detect_image, inputs=image_input, outputs=image_output)
+    with gr.Tab("Video Detection"):
+        gr.Markdown("Upload a video for object detection")
+        video_input = gr.Video(label="Upload a video")
+        video_output = gr.Video(label="Detection Result")
+        gr.Button("Submit").click(fn=detect_video, inputs=video_input, outputs=video_output)
+app.launch()