Spaces:
Running
Running
File size: 1,587 Bytes
b808e5c b496af1 b808e5c 2f7f512 b808e5c 2f7f512 b808e5c 2f7f512 b808e5c 2f7f512 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 |
import numpy as np
import gradio as gr
from transformers import AutoFeatureExtractor, AutoTokenizer, VisionEncoderDecoderModel
import re
import jaconv
#load model
model_path = "model/"
feature_extractor = AutoFeatureExtractor.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = VisionEncoderDecoderModel.from_pretrained(model_path)
examples = ['examples/01.png', 'examples/02.png', 'examples/03.png',
'examples/04.png', 'examples/05.png', 'examples/06.png',
'examples/07.png'
]
def post_process(text):
text = ''.join(text.split())
text = text.replace('…', '...')
text = re.sub('[・.]{2,}', lambda x: (x.end() - x.start()) * '.', text)
text = jaconv.h2z(text, ascii=True, digit=True)
return text
def infer(image):
image = image.convert('L').convert('RGB')
pixel_values = feature_extractor(image, return_tensors="pt").pixel_values
ouput = model.generate(pixel_values)[0]
text = tokenizer.decode(ouput, skip_special_tokens=True)
text = post_process(text)
return text
iface = gr.Interface(
fn=infer,
inputs=[gr.inputs.Image(label="Input", type="pil")],
outputs="text",
layout="horizontal",
theme="huggingface",
title="Optical Character Recognition for Japanese Text",
description="A simple interface for OCR from Japanese manga",
article= "Author: <a href=\"https://huggingface.co/vumichien\">Vu Minh Chien</a>. ",
allow_flagging='never',
examples=examples
)
iface.launch(enable_queue=True, cache_examples=True)
|