Spaces:

gryan-galario
/

manga-ocr-demo

Running

File size: 1,643 Bytes

557f02f
 
5650861
0b3b443
ff3e8f8
 
5650861
ff3e8f8
 
 
e9dcd8e
ff3e8f8
 
 
19bed24
 
ff3e8f8
 
 
 
 
 
 
 
 
8bfffa0
ff3e8f8
c20ac67
ff3e8f8
 
 
 
e9dcd8e
ff3e8f8
 
 
 
 
19bed24
ff3e8f8
19bed24
 
ff3e8f8
 
b7694dc

import re
import jaconv
import gradio as gr
from transformers import AutoTokenizer, AutoFeatureExtractor, VisionEncoderDecoderModel
from PIL import Image
import torch


tokenizer = AutoTokenizer.from_pretrained("kha-white/manga-ocr-base")

model = VisionEncoderDecoderModel.from_pretrained("kha-white/manga-ocr-base")

feature_extractor = AutoFeatureExtractor.from_pretrained("kha-white/manga-ocr-base")

examples = ["00.jpg", "01.jpg", "02.jpg", "03.jpg", "04.jpg", "05.jpg", "06.jpg", "07.jpg", "08.jpg", "09.jpg", "10.jpg", "11.jpg"]

def post_process(text):
  text = ''.join(text.split())
  text = text.replace('…', '...')
  text = re.sub('[・.]{2,}', lambda x: (x.end() - x.start()) * '.', text)
  text = jaconv.h2z(text, ascii=True, digit=True)
  return text

def manga_ocr(img):
  img = img.convert('L').convert('RGB')
  pixel_values = feature_extractor(img, return_tensors="pt").pixel_values
  output = model.generate(pixel_values)[0]
  text = tokenizer.decode(output, skip_special_tokens=True)
  text = post_process(text)
  return text

iface = gr.Interface(
    fn=manga_ocr,
    inputs=[gr.inputs.Image(label="Input", type="pil")],
    outputs="text",
    layout="horizontal",
    theme="huggingface",
    title="Manga OCR",
    description="Optical Character Recognization for Japanese Texts with focus on Mangas. The model is trained by kha-white with Github link: <a href=\"https://github.com/kha-white/manga-ocr\">manga-ocr</a> while the Space App is made by me.",
    allow_flagging='never',
    examples=examples,
    article = "Author: <a href=\"https://huggingface.co/gryan-galario\">Gryan Galario</a>",
)

iface.launch()