rapidocr / cvt_model.py
pitapo's picture
rehost rapidocr
f2a4d3e
# -*- encoding: utf-8 -*-
# @Author: SWHL
# @Contact: liekkaskono@163.com
from pathlib import Path
from typing import List, Union
import onnxruntime as ort
# 0.0.19
from paddleocr_convert import PaddleOCRModelConvert
import onnx
from onnx import ModelProto
class ONNXMetaOp:
@classmethod
def add_meta(
cls,
model_path: Union[str, Path],
key: str,
value: List[str],
delimiter: str = "\n",
) -> ModelProto:
model = onnx.load_model(model_path)
meta = model.metadata_props.add()
meta.key = key
meta.value = delimiter.join(value)
return model
@classmethod
def get_meta(
cls, model_path: Union[str, Path], key: str, split_sym: str = "\n"
) -> List[str]:
sess = ort.InferenceSession(model_path)
meta_map = sess.get_modelmeta().custom_metadata_map
key_content = meta_map.get(key)
key_list = key_content.split(split_sym)
return key_list
@classmethod
def del_meta(cls, model_path: Union[str, Path]) -> ModelProto:
model = onnx.load_model(model_path)
del model.metadata_props[:]
return model
@classmethod
def save_model(cls, save_path: Union[str, Path], model: ModelProto):
onnx.save_model(model, save_path)
model_list = [
# (
# "https://paddleocr.bj.bcebos.com/PP-OCRv4/multilingual/korean_PP-OCRv4_rec_infer.tar",
# "paddle/PP-OCRv4/rec/korean_PP-OCRv4_rec_infer/korean_dict.txt",
# ),
# (
# "https://paddleocr.bj.bcebos.com/PP-OCRv4/multilingual/japan_PP-OCRv4_rec_infer.tar",
# "paddle/PP-OCRv4/rec/japan_PP-OCRv4_rec_infer/japan_dict.txt",
# ),
# (
# "https://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/chinese_cht_PP-OCRv3_rec_infer.tar",
# "paddle/PP-OCRv4/rec/chinese_cht_PP-OCRv3_rec_infer/chinese_cht_dict.txt",
# ),
(
"https://paddleocr.bj.bcebos.com/PP-OCRv4/multilingual/ta_PP-OCRv4_rec_infer.tar",
"paddle/PP-OCRv4/rec/ta_PP-OCRv4_rec_infer/ta_dict.txt",
),
(
"https://paddleocr.bj.bcebos.com/PP-OCRv4/multilingual/te_PP-OCRv4_rec_infer.tar",
"paddle/PP-OCRv4/rec/te_PP-OCRv4_rec_infer/te_dict.txt",
),
(
"https://paddleocr.bj.bcebos.com/PP-OCRv4/multilingual/ka_PP-OCRv4_rec_infer.tar",
"paddle/PP-OCRv4/rec/kannada_PP-OCRv4_rec_infer/ka_dict.txt",
),
(
"https://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/latin_PP-OCRv3_rec_infer.tar",
"paddle/PP-OCRv4/rec/latin_PP-OCRv3_rec_infer/latin_dict.txt",
),
(
"https://paddleocr.bj.bcebos.com/PP-OCRv4/multilingual/arabic_PP-OCRv4_rec_infer.tar",
"paddle/PP-OCRv4/rec/arabic_PP-OCRv4_rec_infer/arabic_dict.txt",
),
(
"https://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/cyrillic_PP-OCRv3_rec_infer.tar",
"paddle/PP-OCRv4/rec/cyrillic_PP-OCRv3_rec_infer/cyrillic_dict.txt",
),
(
"https://paddleocr.bj.bcebos.com/PP-OCRv4/multilingual/devanagari_PP-OCRv4_rec_infer.tar",
"paddle/PP-OCRv4/rec/devanagari_PP-OCRv4_rec_infer/devanagari_dict.txt",
),
]
converter = PaddleOCRModelConvert()
save_dir = "onnx/PP-OCRv4/rec"
for model in model_list:
url, txt_url = model
converter(url, save_dir, txt_path=txt_url)