Spaces:

brxerq
/

ShelvesDetection

Sleeping

App Files Files Community

ShelvesDetection / app.py

brxerq

Create app.py

0f10612 verified 6 months ago

raw

history blame

4.52 kB

	import os
	import cv2
	import numpy as np
	import importlib.util
	import gradio as gr
	from PIL import Image

	# Load the TensorFlow Lite model
	MODEL_DIR = 'model'
	GRAPH_NAME = 'detect.tflite'
	LABELMAP_NAME = 'labelmap.txt'

	pkg = importlib.util.find_spec('tflite_runtime')
	if pkg:
	from tflite_runtime.interpreter import Interpreter
	from tflite_runtime.interpreter import load_delegate
	else:
	from tensorflow.lite.python.interpreter import Interpreter
	from tensorflow.lite.python.interpreter import load_delegate

	PATH_TO_CKPT = os.path.join(MODEL_DIR, GRAPH_NAME)
	PATH_TO_LABELS = os.path.join(MODEL_DIR, LABELMAP_NAME)

	# Load the label map
	with open(PATH_TO_LABELS, 'r') as f:
	labels = [line.strip() for line in f.readlines()]

	if labels[0] == '???':
	del(labels[0])

	# Load the TensorFlow Lite model
	interpreter = Interpreter(model_path=PATH_TO_CKPT)
	interpreter.allocate_tensors()

	input_details = interpreter.get_input_details()
	output_details = interpreter.get_output_details()
	height = input_details[0]['shape'][1]
	width = input_details[0]['shape'][2]
	floating_model = (input_details[0]['dtype'] == np.float32)

	input_mean = 127.5
	input_std = 127.5

	outname = output_details[0]['name']
	if ('StatefulPartitionedCall' in outname):
	boxes_idx, classes_idx, scores_idx = 1, 3, 0
	else:
	boxes_idx, classes_idx, scores_idx = 0, 1, 2

	def perform_detection(image, interpreter, labels):
	imH, imW, _ = image.shape
	image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
	image_resized = cv2.resize(image_rgb, (width, height))
	input_data = np.expand_dims(image_resized, axis=0)

	if floating_model:
	input_data = (np.float32(input_data) - input_mean) / input_std

	interpreter.set_tensor(input_details[0]['index'], input_data)
	interpreter.invoke()

	boxes = interpreter.get_tensor(output_details[boxes_idx]['index'])[0]
	classes = interpreter.get_tensor(output_details[classes_idx]['index'])[0]
	scores = interpreter.get_tensor(output_details[scores_idx]['index'])[0]

	detections = []
	for i in range(len(scores)):
	if ((scores[i] > 0.5) and (scores[i] <= 1.0)):
	ymin = int(max(1, (boxes[i][0] * imH)))
	xmin = int(max(1, (boxes[i][1] * imW)))
	ymax = int(min(imH, (boxes[i][2] * imH)))
	xmax = int(min(imW, (boxes[i][3] * imW)))

	cv2.rectangle(image, (xmin, ymin), (xmax, ymax), (10, 255, 0), 2)
	object_name = labels[int(classes[i])]
	label = '%s: %d%%' % (object_name, int(scores[i] * 100))
	labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2)
	label_ymin = max(ymin, labelSize[1] + 10)
	cv2.rectangle(image, (xmin, label_ymin - labelSize[1] - 10), (xmin + labelSize[0], label_ymin + baseLine - 10), (255, 255, 255), cv2.FILLED)
	cv2.putText(image, label, (xmin, label_ymin - 7), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2)

	detections.append([object_name, scores[i], xmin, ymin, xmax, ymax])
	return image

	def detect_image(input_image):
	image = np.array(input_image)
	result_image = perform_detection(image, interpreter, labels)
	return Image.fromarray(result_image)

	def detect_video(input_video):
	cap = cv2.VideoCapture(input_video.name)
	frames = []

	while cap.isOpened():
	ret, frame = cap.read()
	if not ret:
	break

	result_frame = perform_detection(frame, interpreter, labels)
	frames.append(result_frame)

	cap.release()

	height, width, layers = frames[0].shape
	size = (width, height)
	output_video_path = "result_" + input_video.name
	out = cv2.VideoWriter(output_video_path, cv2.VideoWriter_fourcc(*'DIVX'), 15, size)

	for frame in frames:
	out.write(frame)

	out.release()

	return output_video_path

	image_input = gr.inputs.Image(type="pil", label="Upload an image")
	image_output = gr.outputs.Image(type="pil", label="Detection Result")

	video_input = gr.inputs.Video(type="file", label="Upload a video")
	video_output = gr.outputs.Video(label="Detection Result")

	app = gr.Interface(
	fn=detect_image,
	inputs=image_input,
	outputs=image_output,
	live=True,
	description="Object Detection on Images"
	)

	app_video = gr.Interface(
	fn=detect_video,
	inputs=video_input,
	outputs=video_output,
	live=True,
	description="Object Detection on Videos"
	)

	gr.TabbedInterface([app, app_video], ["Image Detection", "Video Detection"]).launch()