Spaces:

DaOppaiLoli
/

KanaWrite

Runtime error

File size: 3,626 Bytes

import os

import cv2
import numpy as np
from openvino import Core


class CodecCTC:
    def __init__(self, characters):
        self.chars = ["[blank]"] + list(characters)

    def decode(self, preds, top_k=10):
        index, texts, nbest = 0, list(), list()

        preds_index: np.ndarray = np.argmax(preds, 2)
        preds_index = preds_index.transpose(1, 0)
        preds_index_reshape = preds_index.reshape(-1)
        preds_sizes = np.array([preds_index.shape[1]] * preds_index.shape[0])

        for step in preds_sizes:
            t = preds_index_reshape[index : index + step]

            if t.shape[0] == 0:
                continue

            char_list = []
            for i in range(step):
                if t[i] == 0:
                    continue

                # removing repeated characters and blank.
                if i > 0 and t[i - 1] == t[i]:
                    continue

                char_list.append(self.chars[t[i]])

                # process n-best
                probs = self.softmax(preds[i][0])
                k_idx = np.argsort(-probs)[:top_k]
                k_probs = probs[k_idx]
                k_res = [
                    dict(prob=p, char=self.chars[j]) for j, p in zip(k_idx, k_probs)
                ]
                nbest.append(k_res)

            text = "".join(char_list)
            texts.append(text)

            index += step

        return texts, nbest

    def softmax(self, x):
        e_x = np.exp(x - np.max(x))
        return e_x / np.sum(e_x, axis=0)


class Recognizer:
    def __init__(self, model_path, char_list_path):
        core = Core()
        self.model = core.read_model(model_path)
        self.compiled_model = core.compile_model(self.model, "CPU")
        self.infer_request = self.compiled_model.create_infer_request()

        # (batch_size, channel, width, height)
        _, _, self.inn_h, self.inn_w = self.model.inputs[0].shape
        self.input_tensor_name = self.model.inputs[0].get_any_name()
        self.output_tensor_name = self.model.outputs[0].get_any_name()

        with open(char_list_path, "r", encoding="utf-8") as f:
            char_list = "".join(line.strip("\n") for line in f)
        self.codec = CodecCTC(char_list)

    def __call__(self, inn_img):
        inn_img = self.preprocess(inn_img, height=self.inn_h, width=self.inn_w)
        inn_img = inn_img[None, :, :, :]

        for _ in range(2):
            self.infer_request.infer(inputs={self.input_tensor_name: inn_img})
            preds = self.infer_request.get_tensor(self.output_tensor_name).data[:]
        result, nbest = self.codec.decode(preds)

        return result, nbest

    def preprocess(self, image, height, width, invert=False):
        src: np.ndarray = cv2.cvtColor(image, cv2.COLOR_RGBA2GRAY)
        src = (255 - src) if invert else src
        cv2.imwrite("a.png", src)

        ratio = float(src.shape[1]) / float(src.shape[0])
        tw = int(height * ratio)
        rsz = cv2.resize(src, (tw, height), interpolation=cv2.INTER_AREA).astype(np.float32)

        # [h,w] -> [c,h,w]
        img = rsz[None, :, :]
        _, h, w = img.shape

        # right edge padding
        pad_img = np.pad(img, ((0, 0), (0, height - h), (0, width - w)), mode="edge")

        return pad_img


def main():
    recog = Recognizer("model/model.xml", "model/char_list.txt")

    target_dir = "."
    file_list = [os.path.join(dn, fn) for dn, _, ff in os.walk(target_dir) for fn in ff]
    file_list = sorted(file_list)

    for fp in file_list:
        if fp.endswith(".png"):
            print(recog(fp))


if __name__ == "__main__":
    main()