import uvicorn from fastapi.staticfiles import StaticFiles import hashlib from enum import Enum from fastapi import FastAPI, UploadFile, File from paddleocr import PaddleOCR, PPStructure, save_structure_res from PIL import Image import io import numpy as np app = FastAPI() use_gpu = False output_dir = 'output' class LangEnum(str, Enum): ch = "ch" en = "en" # cache with ocr ocr_cache = {} # get ocr ins def get_ocr(lang, use_gpu=False): if not ocr_cache.get(lang): ocr_cache[lang] = PaddleOCR(use_angle_cls=True, lang=lang, use_gpu=use_gpu) return ocr_cache.get(lang) @app.post("/ocr") async def create_upload_file( file: UploadFile = File(...), lang: LangEnum = LangEnum.ch, # use_gpu: bool = False ): contents = await file.read() image = Image.open(io.BytesIO(contents)) ocr = get_ocr(lang=lang, use_gpu=use_gpu) img2np = np.array(image) result = ocr.ocr(img2np, cls=True)[0] boxes = [line[0] for line in result] txts = [line[1][0] for line in result] scores = [line[1][1] for line in result] # 识别结果 final_result = [dict(boxes=box, txt=txt, score=score) for box, txt, score in zip(boxes, txts, scores)] return final_result @app.post("/ocr_table") async def create_upload_file( file: UploadFile = File(...), lang: LangEnum = LangEnum.ch, # use_gpu: bool = False ): table_engine = PPStructure(show_log=True, table=True, lang=lang) contents = await file.read() # 计算文件内容的哈希值 file_hash = hashlib.sha256(contents).hexdigest() image = Image.open(io.BytesIO(contents)) img2np = np.array(image) result = table_engine(img2np) save_structure_res(result, output_dir, f'{file_hash}') htmls = [] types = [] bboxes = [] for item in result: item_res = item.get('res', {}) htmls.append(item_res.get('html', '')) types.append(item.get('type', '')) bboxes.append(item.get('bbox', '')) return { 'htmls': htmls, 'hash': file_hash, 'bboxes': bboxes, 'types': types, } if __name__ == '__main__': app.mount("/output", StaticFiles(directory="output", follow_symlink=True, html=True), name="output") uvicorn.run(app=app, port=7860)