Spaces:

DerrylNessie
/

MangaCleaner

Runtime error

App Files Files Community

MangaCleaner / app.py

DerrylNessie

Update app.py

86bdf66 over 2 years ago

raw

history blame

No virus

1.8 kB

	import re
	import jaconv
	import gradio as gr
	from transformers import AutoTokenizer, AutoFeatureExtractor, VisionEncoderDecoderModel
	from PIL import Image
	import torch
	import cv2
	import os
	os.system('pip install paddlepaddle')
	os.system('pip install paddleocr')
	from paddleocr import PaddleOCR, draw_ocr

	tokenizer = AutoTokenizer.from_pretrained("kha-white/manga-ocr-base")

	model = VisionEncoderDecoderModel.from_pretrained("kha-white/manga-ocr-base")

	feature_extractor = AutoFeatureExtractor.from_pretrained("kha-white/manga-ocr-base")

	examples = ["japan.jpg"]

	def post_process(text):
	text = ''.join(text.split())
	text = text.replace('…', '...')
	text = re.sub('[・.]{2,}', lambda x: (x.end() - x.start()) * '.', text)
	text = jaconv.h2z(text, ascii=True, digit=True)
	return text

	def manga_ocr(img):
	ocr = PaddleOCR(use_angle_cls=True, lang='japan',use_gpu=False)
	img_path = img.name
	result = ocr.ocr(img_path, cls=True)
	image = Image.open(img_path).convert('RGB')
	pixel_values = feature_extractor(img, return_tensors="pt").pixel_values
	output = model.generate(pixel_values)[0]
	text = tokenizer.decode(output, skip_special_tokens=True)
	text = post_process(text)
	return text

	iface = gr.Interface(
	fn=manga_ocr,
	inputs=[gr.inputs.Image(label="Input", type="pil")],
	outputs="text",
	layout="horizontal",
	theme="huggingface",
	title="Manga OCR",
	description="Optical Character Recognization for Japanese Texts with focus on Mangas. The model is trained by kha-white with Github link: <a href=\"https://github.com/kha-white/manga-ocr\">manga-ocr</a> while the Space App is made by me.",
	allow_flagging='never',
	examples=examples,
	article = "Author: <a href=\"https://huggingface.co/gryan-galario\">Gryan Galario</a>",
	)

	iface.launch()