Spaces:
Runtime error
Runtime error
import re | |
import jaconv | |
import gradio as gr | |
from transformers import AutoTokenizer, AutoFeatureExtractor, VisionEncoderDecoderModel | |
from PIL import Image | |
import torch | |
import cv2 | |
import os | |
os.system('pip install paddlepaddle') | |
os.system('pip install paddleocr') | |
from paddleocr import PaddleOCR, draw_ocr | |
tokenizer = AutoTokenizer.from_pretrained("kha-white/manga-ocr-base") | |
model = VisionEncoderDecoderModel.from_pretrained("kha-white/manga-ocr-base") | |
feature_extractor = AutoFeatureExtractor.from_pretrained("kha-white/manga-ocr-base") | |
examples = ["japan.jpg"] | |
def post_process(text): | |
text = ''.join(text.split()) | |
text = text.replace('…', '...') | |
text = re.sub('[・.]{2,}', lambda x: (x.end() - x.start()) * '.', text) | |
text = jaconv.h2z(text, ascii=True, digit=True) | |
return text | |
def manga_ocr(img): | |
ocr = PaddleOCR(use_angle_cls=True, lang='japan',use_gpu=False) | |
img_path = img.name | |
result = ocr.ocr(img_path, cls=True) | |
image = Image.open(img_path).convert('RGB') | |
pixel_values = feature_extractor(img, return_tensors="pt").pixel_values | |
output = model.generate(pixel_values)[0] | |
text = tokenizer.decode(output, skip_special_tokens=True) | |
text = post_process(text) | |
return text | |
iface = gr.Interface( | |
fn=manga_ocr, | |
inputs=[gr.inputs.Image(label="Input", type="pil")], | |
outputs="text", | |
layout="horizontal", | |
theme="huggingface", | |
title="Manga OCR", | |
description="Optical Character Recognization for Japanese Texts with focus on Mangas. The model is trained by kha-white with Github link: <a href=\"https://github.com/kha-white/manga-ocr\">manga-ocr</a> while the Space App is made by me.", | |
allow_flagging='never', | |
examples=examples, | |
article = "Author: <a href=\"https://huggingface.co/gryan-galario\">Gryan Galario</a>", | |
) | |
iface.launch() |