File size: 1,809 Bytes
1233b47 2dea056 1233b47 705192b 2dea056 1233b47 2dea056 705192b 8f8226b 705192b 2dea056 1233b47 705192b 1233b47 2dea056 4394dea 2dea056 1233b47 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 |
import re
import jaconv
import gradio as gr
from transformers import AutoTokenizer, AutoFeatureExtractor, VisionEncoderDecoderModel
from PIL import Image
import torch, time
import spaces
tokenizer = AutoTokenizer.from_pretrained("kha-white/manga-ocr-base")
model = VisionEncoderDecoderModel.from_pretrained("kha-white/manga-ocr-base")
model.to("cuda")
feature_extractor = AutoFeatureExtractor.from_pretrained("kha-white/manga-ocr-base")
def post_process(text):
text = ''.join(text.split())
text = text.replace('…', '...')
text = re.sub('[・.]{2,}', lambda x: (x.end() - x.start()) * '.', text)
text = jaconv.h2z(text, ascii=True, digit=True)
return text
# @spaces.GPU
# def manga_ocr(img):
# img = img.convert('L').convert('RGB')
# pixel_values = feature_extractor(img, return_tensors="pt").pixel_values.to("cuda")
# start_time = time.time()
# output = model.generate(pixel_values)[0]
# print("Time taken for OCR:", time.time() - start_time)
# text = tokenizer.decode(output, skip_special_tokens=True)
# text = post_process(text)
# return text
@spaces.GPU(duration=8)
def manga_ocr(imgs):
texts = []
for img in imgs:
img = Image.open(img)
img = img.convert('L').convert('RGB')
pixel_values = feature_extractor(img, return_tensors="pt").pixel_values.to("cuda")
start_time = time.time()
output = model.generate(pixel_values)[0]
print("Time taken for OCR:", time.time() - start_time)
text = tokenizer.decode(output, skip_special_tokens=True)
text = post_process(text)
texts.append(text)
return "|||".join(texts)
iface = gr.Interface(
fn=manga_ocr,
inputs=gr.File(file_types=["image"], file_count="multiple"),
outputs="text",
title="Manga OCR",
description="Extract Manga in lighting speed ⚡",
)
iface.launch() |