Spaces:

DeepLearning101
/

OCR101TW

Build error

File size: 2,962 Bytes

import os
from paddleocr import PaddleOCR, draw_ocr
from PIL import Image
import gradio as gr

# 設定 Hugging Face Hub 的 Access Token
os.environ["HF_TOKEN"] = "TWOCR"

# 升級 pip
subprocess.run(["pip", "install", "--upgrade", "pip==24.1.1"])

def inference(img_path):
    ocr = PaddleOCR(
        rec_char_dict_path='zhtw_common_dict.txt',
        use_gpu=False,
        rec_image_shape="3, 48, 320"
    )
    
    result = ocr.ocr(img_path)
    
    for idx in range(len(result)):
        res = result[idx]
        for line in res:
            print(line)
    
    result = result[0]
    image = Image.open(img_path).convert('RGB')
    boxes = [line[0] for line in result]
    txts = [line[1][0] if line[1] else '' for line in result]  # 確保在無文字時 txts 還是個空字串
    scores = [line[1][1] for line in result]
    im_show_pil = draw_ocr(image, boxes, txts, scores, font_path="./simfang.ttf")

    return im_show_pil, "\n".join(txts)

title = "<p style='text-align: center'><a href='https://www.twman.org/AI/CV' target='_blank'>繁體中文醫療診斷書和收據OCR：PaddleOCR</a></p>"

description = """<a href='https://www.twman.org' target='_blank'>TonTon Huang Ph.D. @ 2023/08 </a><br>
    <a href='https://github.com/PaddlePaddle/PaddleOCR' target='_blank'>PaddleOCR旨在打造一套豐富、領先、且實用的OCR工具庫，協助開發者訓練出更好的模型，並應用落地。</a><br>
    <a href='https://blog.twman.org/2021/04/ASR.html' target='_blank'>那些語音處理 (Speech Processing) 踩的坑</a> | <a href='https://blog.twman.org/2021/04/NLP.html' target='_blank'>那些自然語言處理 (Natural Language Processing, NLP) 踩的坑</a><br>
    <a href='https://blog.twman.org/2024/02/asr-tts.html' target='_blank'>那些ASR和TTS可能會踩的坑</a> | <a href='https://blog.twman.org/2024/02/LLM.html' target='_blank'>那些大模型開發會踩的坑</a><br>
    <a href='https://blog.twman.org/2023/04/GPT.html' target='_blank'>什麼是大語言模型，它是什麼？想要嗎？</a><br>
    <a href='https://blog.twman.org/2023/07/wsl.html' target='_blank'>用PaddleOCR的PPOCRLabel來微調醫療診斷書和收據</a> | <a href='https://blog.twman.org/2023/07/HugIE.html' target='_blank'>基於機器閱讀理解和指令微調的統一信息抽取框架之診斷書醫囑資訊擷取分析</a><br>"""

css = ".output_image, .input_image {height: 40rem !important; width: 100% !important;}"

gr.Interface(
    fn=inference,
    inputs=gr.Image(type='filepath', label='圖片上傳'),
    outputs=[
        gr.Image(type="pil", label="識別結果"),
        gr.Textbox(label="識別文本")
    ],
    examples=[
        ["DEMO/einvoice1.png"],
        ["DEMO/einvoice2.png"], 
        ["DEMO/THSR1.jpg"],
        ["DEMO/THSR2.jpg"], 
        ["DEMO/IDCARD1.jpg"],        
        ["DEMO/HealthCARD1.jpg"]
    ],
    title=title,
    description=description,
    css=css
).launch(debug=True)