Spaces:
Running
Running
import uvicorn | |
from fastapi.staticfiles import StaticFiles | |
import hashlib | |
from enum import Enum | |
from fastapi import FastAPI, UploadFile, File | |
from paddleocr import PaddleOCR, PPStructure, save_structure_res | |
from PIL import Image | |
import io | |
import numpy as np | |
app = FastAPI() | |
use_gpu = False | |
output_dir = 'output' | |
class LangEnum(str, Enum): | |
ch = "ch" | |
en = "en" | |
# cache with ocr | |
ocr_cache = {} | |
# get ocr ins | |
def get_ocr(lang, use_gpu=False): | |
if not ocr_cache.get(lang): | |
ocr_cache[lang] = PaddleOCR(use_angle_cls=True, lang=lang, use_gpu=use_gpu) | |
return ocr_cache.get(lang) | |
async def create_upload_file( | |
file: UploadFile = File(...), | |
lang: LangEnum = LangEnum.ch, | |
# use_gpu: bool = False | |
): | |
contents = await file.read() | |
image = Image.open(io.BytesIO(contents)) | |
ocr = get_ocr(lang=lang, use_gpu=use_gpu) | |
img2np = np.array(image) | |
result = ocr.ocr(img2np, cls=True)[0] | |
boxes = [line[0] for line in result] | |
txts = [line[1][0] for line in result] | |
scores = [line[1][1] for line in result] | |
# 识别结果 | |
final_result = [dict(boxes=box, txt=txt, score=score) for box, txt, score in zip(boxes, txts, scores)] | |
return final_result | |
async def create_upload_file( | |
file: UploadFile = File(...), | |
lang: LangEnum = LangEnum.ch, | |
# use_gpu: bool = False | |
): | |
table_engine = PPStructure(show_log=True, table=True, lang=lang) | |
contents = await file.read() | |
# 计算文件内容的哈希值 | |
file_hash = hashlib.sha256(contents).hexdigest() | |
image = Image.open(io.BytesIO(contents)) | |
img2np = np.array(image) | |
result = table_engine(img2np) | |
save_structure_res(result, output_dir, f'{file_hash}') | |
htmls = [] | |
types = [] | |
bboxes = [] | |
for item in result: | |
item_res = item.get('res', {}) | |
htmls.append(item_res.get('html', '')) | |
types.append(item.get('type', '')) | |
bboxes.append(item.get('bbox', '')) | |
return { | |
'htmls': htmls, | |
'hash': file_hash, | |
'bboxes': bboxes, | |
'types': types, | |
} | |
if __name__ == '__main__': | |
app.mount("/output", StaticFiles(directory="output", follow_symlink=True, html=True), name="output") | |
uvicorn.run(app=app, port=7860) |