File size: 1,215 Bytes
557f02f
 
5650861
0b3b443
ff3e8f8
 
5650861
ff3e8f8
 
 
e9dcd8e
ff3e8f8
 
 
 
 
 
 
 
 
 
 
 
8bfffa0
ff3e8f8
c20ac67
ff3e8f8
 
 
 
e9dcd8e
ff3e8f8
 
 
 
 
 
 
 
 
b7694dc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import re
import jaconv
import gradio as gr
from transformers import AutoTokenizer, AutoFeatureExtractor, VisionEncoderDecoderModel
from PIL import Image
import torch


tokenizer = AutoTokenizer.from_pretrained("kha-white/manga-ocr-base")

model = VisionEncoderDecoderModel.from_pretrained("kha-white/manga-ocr-base")

feature_extractor = AutoFeatureExtractor.from_pretrained("kha-white/manga-ocr-base")

def post_process(text):
  text = ''.join(text.split())
  text = text.replace('…', '...')
  text = re.sub('[・.]{2,}', lambda x: (x.end() - x.start()) * '.', text)
  text = jaconv.h2z(text, ascii=True, digit=True)
  return text

def manga_ocr(img):
  img = img.convert('L').convert('RGB')
  pixel_values = feature_extractor(img, return_tensors="pt").pixel_values
  output = model.generate(pixel_values)[0]
  text = tokenizer.decode(output, skip_special_tokens=True)
  text = post_process(text)
  return text

iface = gr.Interface(
    fn=manga_ocr,
    inputs=[gr.inputs.Image(label="Input", type="pil")],
    outputs="text",
    layout="horizontal",
    theme="huggingface",
    title="Manga OCR",
    description="Japanese Character Recognization from Mangas",
    allow_flagging='never',
)

iface.launch()