JAP-OCR / app.py
Snowad's picture
Update app.py
a33bdf1 verified
raw
history blame
1.38 kB
import re
import jaconv
import gradio as gr
from transformers import AutoTokenizer, AutoFeatureExtractor, VisionEncoderDecoderModel
from PIL import Image
import torch
import spaces, time
tokenizer = AutoTokenizer.from_pretrained("kha-white/manga-ocr-base")
model = VisionEncoderDecoderModel.from_pretrained("kha-white/manga-ocr-base")
model.to("cuda")
feature_extractor = AutoFeatureExtractor.from_pretrained("kha-white/manga-ocr-base")
examples = ["00.jpg", "01.jpg", "02.jpg", "03.jpg", "04.jpg", "05.jpg", "06.jpg", "07.jpg", "08.jpg", "09.jpg", "10.jpg", "11.jpg"]
def post_process(text):
text = ''.join(text.split())
text = text.replace('…', '...')
text = re.sub('[・.]{2,}', lambda x: (x.end() - x.start()) * '.', text)
text = jaconv.h2z(text, ascii=True, digit=True)
return text
@spaces.GPU
def manga_ocr(img):
img = img.convert('L').convert('RGB')
pixel_values = feature_extractor(img, return_tensors="pt").pixel_values.to("cuda")
start_time = time.time()
output = model.generate(pixel_values)[0]
print("Time taken for OCR:", time.time() - start_time)
text = tokenizer.decode(output, skip_special_tokens=True)
text = post_process(text)
return text
iface = gr.Interface(
fn=manga_ocr,
inputs=gr.Image(type='pil'),
outputs="text",
title="Manga OCR",
description="Extract Manga in lighting speed ⚡",
)
iface.launch()