File size: 4,687 Bytes

a3555b5
 
 
 
2cf5554
6edc5c0
a3555b5
 
 
 
 
6edc5c0
 
 
a3555b5
ad3e283
 
 
0f2ac5f
 
 
 
 
 
 
 
6edc5c0
 
0f2ac5f
 
 
 
6edc5c0
ad3e283
f69deff
 
 
 
 
 
 
ad3e283
 
6edc5c0
 
eab0e5c
a3555b5
 
 
e224bb7
a3555b5
e224bb7
a3555b5
e224bb7
6edc5c0
 
 
 
 
 
a3555b5
355835e
4694dbb
4335d9a
 
ad3e283
 
 
 
 
 
4694dbb
2cf5554
6edc5c0
 
 
4c87af3
ad3e283
6edc5c0
4694dbb
 
041a7e2
6edc5c0
 
 
 
a3555b5
6edc5c0
 
 
a3555b5
ad3e283
6edc5c0
 
fba98a6
 
 
a3555b5
 
 
f69deff
 
ad3e283
6edc5c0
a31ca76
 
6edc5c0
 
f69deff
 
 
ad3e283
a3555b5
 
271b526

import numpy as np
from PIL import Image
import gradio as gr
from ultralytics import YOLO
import cv2
import os

# Load the YOLO model
m_raw_model = YOLO("M-Raw.pt")
n_raw_model = YOLO("N-Raw.pt")
s_raw_model = YOLO("S-Raw.pt")
m_pre_model = YOLO("M-Pre.pt")
n_pre_model = YOLO("N-Pre.pt")
s_pre_model = YOLO("S-Pre.pt")

# Class to syllable map
class_mapping = {0: 'Baybayin Character', 1: 'a', 2: 'b', 3: 'ba', 4: 'be', 5: 'bi', 6: 'bo', 7: 'bu', 8: 'd', 9: 'da', 10: 'di', 11: 'do', 12: 'du', 13: 'e', 14: 'g', 15: 'ga', 16: 'gi', 17: 'go', 18: 'gu', 19: 'ha', 20: 'he', 21: 'hi', 22: 'ho', 23: 'hu', 24: 'i', 25: 'k', 26: 'ka', 27: 'ki', 28: 'ko', 29: 'ku', 30: 'l', 31: 'la', 32: 'le', 33: 'li', 34: 'lo', 35: 'lu', 36: 'm', 37: 'ma', 38: 'me', 39: 'mi', 40: 'mo', 41: 'mu', 42: 'n', 43: 'na', 44: 'ng', 45: 'nga', 46: 'ngi', 47: 'ngo', 48: 'ngu', 49: 'ni', 50: 'no', 51: 'nu', 52: 'o', 53: 'p', 54: 'pa', 55: 'pe', 56: 'pi', 57: 'po', 58: 'pu', 59: 'r', 60: 'ra', 61: 're', 62: 'ri', 63: 'ro', 64: 'ru', 65: 's', 66: 'sa', 67: 'se', 68: 'si', 69: 'so', 70: 'su', 71: 't', 72: 'ta', 73: 'te', 74: 'ti', 75: 'to', 76: 'tu', 77: 'u', 78: 'w', 79: 'wa', 80: 'we', 81: 'wi', 82: 'y', 83: 'ya', 84: 'yi', 85: 'yo', 86: 'yu'}

#-------- 
# Some text
#--------

pageTitle = "Baybayin Instance Detection"
msgWarning = "This demo was created by Adriel Amoguis and Miguel Flores for PCSC 2023"
desc = """
<center>
Take a picture of some Baybayin text and upload it in the Image Box on the left. You can choose the confidence threshold and the IoU threshold using the sliders. <br />
<img src="https://qph.cf2.quoracdn.net/main-qimg-a5ae0e74f5d0ef9980e7b1c59fdf2014-lq" width="60%">
</center>
"""


def snap(upload, conf, iou, show_conf=False):

    # if webcam is not None:
    #     image = webcam
    # elif upload is not None:
    #     image = upload
    # else:
    #     image = webcam
    image = upload

    # If no model selected, use M-Raw
    # if model == None:
    model = "M-Raw"
    
    # Run the selected model
    results = None
    if model == "M-Raw":
        results = m_raw_model(image, conf=conf, iou=iou)
    elif model == "N-Raw":
        results = n_raw_model(image, conf=conf, iou=iou)
    elif model == "S-Raw":
        results = s_raw_model(image, conf=conf, iou=iou)
    elif model == 'M-Pre':
        results = m_pre_model(image, conf=conf, iou=iou)
    elif model == 'N-Pre':
        results = n_pre_model(image, conf=conf, iou=iou)
    elif model == 'S-Pre':
        results = s_pre_model(image, conf=conf, iou=iou)

    # Convert the results list into an output image
    result = results[0]

    if result.boxes.cls.cpu().numpy().size == 0:
        print("No detections.")
        return image
    
    
    classes = result.boxes.cls.cpu().numpy()
    probs = result.boxes.conf.cpu().numpy()
    boxes = result.boxes.xyxy.cpu().numpy()

    # print(classes)
    # print(probs)
    # print(boxes)

    # print(f"Detected {classes} with {probs:.2f} confidence.")
    words = []
    for i in range(len(boxes)):
        x1, y1, x2, y2 = boxes[i]
        x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
        cv2.rectangle(image, (x1, y1), (x2, y2), (255, 0, 0), int(image.shape[1] * 0.001))
        if show_conf: cv2.putText(image, f"{class_mapping[int(classes[i])]} {probs[i]:.2f}", (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), int(image.shape[1] * 0.001))
        else: cv2.putText(image, f"{class_mapping[int(classes[i])]}", (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), int(image.shape[1] * 0.00075))
        # words.append([(x1,y1), (x2,y2), class_mapping[int(classes[i])]])

    # Sort the words by top to bottom, left to right
    # words = sorted(words, key=lambda x: (x[0][1], x[0][0]))
    # words = [word[2] for word in words]

    return image
            
# Get the samples array
# samples_dir = "sample_images"
# samples = os.listdir(samples_dir)
# samples = [os.path.join(samples_dir, sample) for sample in samples]

demo = gr.Interface(
    snap,
    [
    # gr.Webcam(type="numpy", label="Webcam"), 
     gr.Image(source="upload", type="numpy", label="Baybayin Image"),
    #  gr.Radio(["M-Raw", "S-Raw", "N-Raw", "M-Pre", "S-Pre", "N-Pre"], label="Model", value="M-Raw"), 
     gr.Slider(0, 1, value=0.5, label="Classifier Confidence Threshold"), 
     gr.Slider(0, 1, value=0.5, label="IoU Threshold")],
    #  gr.Checkbox(label="Show Confidence Scores", value=False),
    [gr.Image(type="numpy", label="Detected Baybayin")], #gr.Textbox(lines=5, label="Detected Latin Syllabes")],
    title="Baybayin Instance Detection",
    article=msgWarning,
    description=desc
).queue()

if __name__ == "__main__":
    demo.launch()