File size: 4,911 Bytes
d4c4173
 
 
 
 
 
 
 
 
 
 
 
 
764c882
d4c4173
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9b64a9d
 
 
d4c4173
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e441fe1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3613da0
 
542fd83
 
 
3613da0
 
9467d94
d4c4173
 
 
 
 
 
 
e441fe1
d4c4173
3613da0
 
70a42a9
 
a9f5c62
e441fe1
 
 
 
 
1c4d450
70a42a9
0e8fbc4
e441fe1
 
 
70a42a9
e441fe1
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
import matplotlib.pyplot as plt
import numpy as np
from six import BytesIO
from PIL import Image
import tensorflow as tf
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as viz_utils
from object_detection.utils import ops as utils_op
import tarfile
import wget 
import gradio as gr
from huggingface_hub import snapshot_download
import os 
import cv2

PATH_TO_LABELS = 'data/label_map.pbtxt'   
category_index = label_map_util.create_category_index_from_labelmap(PATH_TO_LABELS, use_display_name=True)

def pil_image_as_numpy_array(pilimg):

    img_array = tf.keras.utils.img_to_array(pilimg)
    img_array = np.expand_dims(img_array, axis=0)
    return img_array
    
def load_image_into_numpy_array(path):
                                    
    image = None
    image_data = tf.io.gfile.GFile(path, 'rb').read()
    image = Image.open(BytesIO(image_data))
    return pil_image_as_numpy_array(image)            

def load_model():
    download_dir = snapshot_download(REPO_ID)
    saved_model_dir = os.path.join(download_dir, "saved_model")
    detection_model = tf.saved_model.load(saved_model_dir)
    return detection_model

# samples_folder = 'test_samples
# image_path = 'test_samples/sample_balloon.jpeg
# 

def predict(pilimg):

    image_np = pil_image_as_numpy_array(pilimg)
    return predict2(image_np)

def predict2(image_np):

    results = detection_model(image_np)

    # different object detection models have additional results
    result = {key:value.numpy() for key,value in results.items()}
    
    label_id_offset = 0
    image_np_with_detections = image_np.copy()

    viz_utils.visualize_boxes_and_labels_on_image_array(
        image_np_with_detections[0],
        result['detection_boxes'][0],
        (result['detection_classes'][0] + label_id_offset).astype(int),
        result['detection_scores'][0],
        category_index,
        use_normalized_coordinates=True,
        max_boxes_to_draw=200,
        min_score_thresh=.60,
        agnostic_mode=False,
        line_thickness=2)

    result_pil_img = tf.keras.utils.array_to_img(image_np_with_detections[0])
    
    return result_pil_img


def predict_on_video(video_in_filepath, video_out_filepath, detection_model, category_index):
    video_reader = cv2.VideoCapture(video_in_filepath)
    frame_h = int(video_reader.get(cv2.CAP_PROP_FRAME_HEIGHT))
    frame_w = int(video_reader.get(cv2.CAP_PROP_FRAME_WIDTH))
    fps = video_reader.get(cv2.CAP_PROP_FPS)
    
    video_writer = cv2.VideoWriter(
        video_out_filepath,
        cv2.VideoWriter_fourcc(*'mp4v'),
        fps,
        (frame_w, frame_h)
    )
    while True:
        ret, frame = video_reader.read()
        if not ret:
            break  # Break the loop if the video is finished
        
        processed_frame = predict(frame)
        processed_frame_np = np.array(processed_frame)
        video_writer.write(processed_frame_np)
                
    # Release camera and close windows
    video_reader.release()
    video_writer.release() 
    cv2.destroyAllWindows() 
    cv2.waitKey(1)        
    video_reader.release()
    video_writer.release()
    cv2.destroyAllWindows()
    cv2.waitKey(1)

# Function to process a video
def process_video(video_path):
    output_path = "output_video.mp4"  # Output path for the processed video
    predict_on_video(video_path, output_path, detection_model, category_index)
    return output_path

# Specify paths to example images
sample_images = [["00000031.jpg"], ["00000053.jpg"],
                 ["00000057.jpg"], ["00000078.jpg"], 
                 ["00000854.jpg"], ["00000995.jpg"],
                 ["00001052.jpg"],["00001444.jpg"],["00001452.jpg"]
                ]

REPO_ID = "jiawenchim/iti107model"
detection_model = load_model()
# pil_image = Image.open(image_path)
# image_arr = pil_image_as_numpy_array(pil_image)

# predicted_img = predict(image_arr)
# predicted_img.save('predicted.jpg')

tab1 = gr.Interface(fn=predict,
             inputs=gr.Image(type="pil"),
             outputs=gr.Image(type="pil"), 
             examples=sample_images, 
             title="Image - Object Detection (Battery and Dice)",
             description='Model used: SSD MobileNet V1 FPN 640x640.'
             )

tab2 = gr.Interface(
    fn=process_video,
    inputs=gr.File(label="Upload a video"),
    outputs=gr.File(label="output"),
    title='Video - Object Detection (Battery and Dice)',
    examples=[["Three Dice Trick.mp4"],["Look at the fork and battery-in power.mp4"]], 
    description='For video processing interface, I would like to endorse student 23B712M for his works. \n Model used: SSD MobileNet V1 FPN 640x640. \n Remarks: Running inference on Three Dice Tricks will take roughly 15-20mins'
)


iface = gr.TabbedInterface([tab1, tab2], tab_names  = ['Image','Video'], title='Battery and Dice detection')

iface.launch(share=True)