File size: 2,743 Bytes
91acf95
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import pathlib
import streamlit as st
from PIL import Image, ImageDraw, ImageFont
from streamlit_image_coordinates import streamlit_image_coordinates
import pytesseract
import pypdfium2

from streamlit.runtime.uploaded_file_manager import UploadedFile


LANG = "eng+por"
OPTIONS = ""
FONT_FILE = pathlib.Path(__file__).parent / "Roboto-Regular.ttf"

st.set_page_config(
    page_title="Streamlit Demo",
    layout="wide",
    page_icon="🪟",
)

"# Streamlit PDF OCR Demo"

@st.cache_data(hash_funcs={"file": lambda uploaded_file: hash(uploaded_file.file_id)})
def convert_pdf_to_img(file: UploadedFile) -> Image.Image:
    doc = pypdfium2.PdfDocument(file)
    raw_image: pypdfium2.PdfBitmap = doc[0].render(scale=2, grayscale=True)
    image: Image.Image = raw_image.to_pil()
    return image


@st.cache_data
def load_image_and_ocr(img: Image.Image, lang: str, config: str) -> tuple[Image.Image, dict, list[dict]]:
    data = pytesseract.image_to_data(img, lang, config, output_type=pytesseract.Output.DICT)
    _boxes = [{"text": text} for text in data["text"]]
    for key in ("left", "top", "width", "height"):
        for i, val in enumerate(data[key]):
            _boxes[i][key] = val
    boxes = [box for box in _boxes if box['text'].strip() != '']
    return img.convert("RGB"), data, boxes


def main(image: Image.Image):
    img, data, boxes = load_image_and_ocr(image, LANG, OPTIONS)

    if (coords := st.session_state.get("image_coords")) is None:
        coords = {"x": 0, "y": 0}

    draw = ImageDraw.Draw(img)
    text = None
    for box in boxes:
        MARGIN = 5
        x1, y1, width, height = (box['left'], box['top'], box['width'], box['height'])
        x2, y2 = x1+width, y1+height
        x1, y1, x2, y2 = (x1-MARGIN, y1-MARGIN, x2+MARGIN, y2+MARGIN)

        if (x1 <= coords["x"] <= x2) and (y1 <= coords["y"] <= y2):
            color = "blue"
            font = ImageFont.FreeTypeFont(FONT_FILE, size=24)
            text = box['text']
            # draw.text((0, 15), text, fill="black", font=font)
            draw.text(((x1+x2)/2, y1-15), text, align="center", anchor="mm", fill="red", font=font)
        else:
            color = "green"

        draw.rectangle((x1, y1, x2, y2), fill=None, outline=color, width=2)

    "## Click at a rectangle"
    if text:
        f'''### Selected text: "{text}"'''
    else:
        '''### Selected text will appear here'''

    # Another option would be `click_and_drag=True` with single-word orientation mode
    streamlit_image_coordinates(img, key="image_coords", click_and_drag=False)

st.file_uploader("Upload PDF", ".pdf", key="pdf")

if (pdf_file := st.session_state.get("pdf")) is not None:
    base_image = convert_pdf_to_img(pdf_file)
    main(base_image)