File size: 5,061 Bytes
fb2cfa5 d170be2 ca9b39d d170be2 fb2cfa5 ca9b39d d170be2 fb2cfa5 ca9b39d d170be2 ca9b39d d170be2 fb2cfa5 ca9b39d cbcc4bf ca9b39d fb2cfa5 d170be2 ca9b39d d170be2 ca9b39d d170be2 fb2cfa5 d170be2 fb2cfa5 ca9b39d d170be2 fb2cfa5 cbcc4bf ccf3b1b cbcc4bf ccf3b1b cbcc4bf fb2cfa5 cbcc4bf fb2cfa5 ca9b39d fb2cfa5 ca9b39d fb2cfa5 d170be2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 |
from __future__ import annotations
import os
from pathlib import Path
from queue import SimpleQueue
from threading import Thread
from time import sleep
from typing import Any
import gradio as gr # type: ignore
import rerun as rr
import rerun.blueprint as rrb
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from gradio_rerun import Rerun # type: ignore
from ocr import detect_and_log_layouts, PAGE_LIMIT
CUSTOM_PATH = "/"
app = FastAPI()
origins = [
"https://app.rerun.io",
]
app.add_middleware(
CORSMiddleware,
allow_origins=origins,
)
def progress_log(log_queue: SimpleQueue[Any], done: SimpleQueue[Any]):
dots = 0
while True:
if not done.empty():
break
sleep(0.7)
log_queue.put([
"log",
"progress",
[rr.TextDocument(f"working{'.'*(dots+1)}")]
])
dots = (dots + 1) % 5
def file_ocr(log_queue: SimpleQueue[Any], file_path: str, start_page: int, end_page: int):
detect_and_log_layouts(log_queue, file_path, start_page, end_page)
log_queue.put("done")
@rr.thread_local_stream("PaddleOCR")
def log_to_rr(file_path: Path, start_page: int = 1, end_page: int = -1):
stream = rr.binary_stream()
log_queue: SimpleQueue[Any] = SimpleQueue()
done: SimpleQueue[Any] = SimpleQueue()
Thread(target=progress_log, args=[log_queue, done]).start()
handle = Thread(target=file_ocr, args=[log_queue, str(file_path), start_page, end_page])
handle.start()
rr.send_blueprint(rrb.Blueprint(
rrb.TextDocumentView(contents=["progress/**"]),
collapse_panels=True,
))
yield stream.read()
while True:
msg = log_queue.get()
if msg == "done":
break
msg_type = msg[0]
if msg_type == "blueprint":
blueprint = msg[1]
rr.send_blueprint(blueprint)
elif msg_type == "log":
entity_path = msg[1]
args = msg[2]
kwargs = msg[3] if len(msg) >= 4 else {}
rr.log(entity_path, *args, **kwargs)
yield stream.read()
rr.log("progress",rr.TextDocument("Done!"))
yield stream.read()
done.put(())
print("done")
DESCRIPTION = """
## PaddleOCR with [Rerun](https://rerun.io/) for visualization
This space demonstrates the ability to visualize and verify the document layout analysis and text detection using [PaddleOCR](https://github.com/PaddlePaddle/PaddleOCR).
The [PP-Structure](https://github.com/PaddlePaddle/PaddleOCR/tree/main/ppstructure) used for this task, is an intelligent document analysis system developed by the PaddleOCR team, aims to help developers better complete tasks related to document understanding such as layout analysis and table recognition.
"""
with gr.Blocks() as demo:
gr.Markdown(DESCRIPTION)
with gr.Row():
with gr.Column(scale=1):
with gr.Tab(label="Upload Image"):
with gr.Row():
input_image_file = gr.Image(label="Input Image", image_mode="RGBA", sources="upload", type="filepath")
# input_image_file = gr.Image(label="Input image")
with gr.Row():
image_button = gr.Button()
with gr.Row():
gr.Examples(
examples=[
os.path.join("image_examples", img_name)
for img_name in sorted(os.listdir("image_examples"))
],
inputs=[input_image_file],
label="Examples",
cache_examples=False,
examples_per_page=12,
)
with gr.Tab(label="Upload pdf"):
with gr.Row():
input_pdf_file = gr.File(label="Input pdf")
gr.Markdown(f"Max {PAGE_LIMIT} pages, -1 on end page means max number of pages")
with gr.Row():
start_page_number = gr.Number(1, label="Start page", minimum=1)
with gr.Row():
end_page_number = gr.Number(-1, label="End page")
with gr.Row():
pdf_button = gr.Button()
with gr.Row():
gr.Examples(
examples=[
os.path.join("pdf_examples", img_name) for img_name in sorted(os.listdir("pdf_examples"))
],
inputs=[input_pdf_file],
label="Examples",
cache_examples=False,
examples_per_page=12,
)
with gr.Column(scale=4):
viewer = Rerun(streaming=True, height=900)
image_button.click(log_to_rr, inputs=[input_image_file], outputs=[viewer])
pdf_button.click(log_to_rr, inputs=[input_pdf_file, start_page_number, end_page_number], outputs=[viewer])
app = gr.mount_gradio_app(app, demo, path=CUSTOM_PATH)
|