import gradio as gr from transformers import AutoTokenizer, AutoFeatureExtractor, VisionEncoderDecoderModel from PIL import Image import requests import torch tokenizer = AutoTokenizer.from_pretrained("kha-white/manga-ocr-base") model = VisionEncoderDecoderModel.from_pretrained("kha-white/manga-ocr-base") feature_extractor = AutoFeatureExtractor.from_pretrained("kha-white/manga-ocr-base") def post_process(text): text = ''.join(text.split()) text = text.replace('…', '...') text = re.sub('[・.]{2,}', lambda x: (x.end() - x.start()) * '.', text) text = jaconv.h2z(text, ascii=True, digit=True) return text def manga_ocr(img): img = img.convert('L').convert('RGB') pixel_values = feature_extractor(img, return_tensors="pt").pixel_values output = model.generate(pixel_values)[0] text = tokenizer.decode(output, skip_special_tokens=True) text = post_process(text) return text iface = gr.Interface( fn=manga_ocr, inputs=[gr.inputs.Image(label="Input", type="pil")], outputs="text", layout="horizontal", theme="huggingface", title="Manga OCR", description="Japanese Character Recognization from Mangas", allow_flagging='never', ) iface.launch()