Spaces:

mindee
/

doctr

Running

App Files Files Community

odulcy-mindee commited on Feb 1, 2024

Commit

4eb7c20

1 Parent(s): 049c6c7

Switch HF Spaces to Torch (credit: @Felix92 )

Browse files

Files changed (4) hide show

README.md +12 -11
app.py +46 -50
backend/pytorch.py +86 -0
requirements.txt +1 -2

README.md CHANGED Viewed

@@ -4,35 +4,36 @@ emoji: 📑
 colorFrom: purple
 colorTo: pink
 sdk: streamlit
-sdk_version: 0.84.2
 app_file: app.py
 pinned: false
 ---
 # Configuration
-`title`: _string_
 Display title for the Space
-`emoji`: _string_
 Space emoji (emoji-only character allowed)
-`colorFrom`: _string_
 Color for Thumbnail gradient (red, yellow, green, blue, indigo, purple, pink, gray)
-`colorTo`: _string_
 Color for Thumbnail gradient (red, yellow, green, blue, indigo, purple, pink, gray)
-`sdk`: _string_
 Can be either `gradio` or `streamlit`
-`sdk_version` : _string_
-Only applicable for `streamlit` SDK.
 See [doc](https://hf.co/docs/hub/spaces) for more info on supported versions.
-`app_file`: _string_
-Path to your main application file (which contains either `gradio` or `streamlit` Python code).
 Path is relative to the root of the repository.
-`pinned`: _boolean_
 Whether the Space stays on top of your list.

 colorFrom: purple
 colorTo: pink
 sdk: streamlit
+sdk_version: 1.30.0
 app_file: app.py
 pinned: false
+license: apache-2.0
 ---
 # Configuration
+`title`: _string_
 Display title for the Space
+`emoji`: _string_
 Space emoji (emoji-only character allowed)
+`colorFrom`: _string_
 Color for Thumbnail gradient (red, yellow, green, blue, indigo, purple, pink, gray)
+`colorTo`: _string_
 Color for Thumbnail gradient (red, yellow, green, blue, indigo, purple, pink, gray)
+`sdk`: _string_
 Can be either `gradio` or `streamlit`
+`sdk_version` : _string_
+Only applicable for `streamlit` SDK.
 See [doc](https://hf.co/docs/hub/spaces) for more info on supported versions.
+`app_file`: _string_
+Path to your main application file (which contains either `gradio` or `streamlit` Python code).
 Path is relative to the root of the repository.
+`pinned`: _boolean_
 Whether the Space stays on top of your list.

app.py CHANGED Viewed

@@ -1,47 +1,35 @@
-# Copyright (C) 2021, Mindee.
-# This program is licensed under the Apache License version 2.
-# See LICENSE or go to <https://www.apache.org/licenses/LICENSE-2.0.txt> for full license details.
-import os
 import matplotlib.pyplot as plt
 import streamlit as st
-os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
-import cv2
-import tensorflow as tf
-gpu_devices = tf.config.experimental.list_physical_devices('GPU')
-if any(gpu_devices):
-    tf.config.experimental.set_memory_growth(gpu_devices[0], True)
 from doctr.io import DocumentFile
-from doctr.models import ocr_predictor
 from doctr.utils.visualization import visualize_page
-DET_ARCHS = ["db_resnet50", "db_mobilenet_v3_large"]
-RECO_ARCHS = ["crnn_vgg16_bn", "crnn_mobilenet_v3_small", "master", "sar_resnet31"]
-def main():
     # Wide mode
     st.set_page_config(layout="wide")
     # Designing the interface
     st.title("docTR: Document Text Recognition")
     # For newline
-    st.write('\n')
-    #
-    st.write('Find more info at: https://github.com/mindee/doctr')
-    # For newline
-    st.write('\n')
     # Instructions
     st.markdown("*Hint: click on the top-right corner of an image to enlarge it!*")
     # Set the columns
-    cols = st.beta_columns((1, 1, 1, 1))
     cols[0].subheader("Input page")
     cols[1].subheader("Segmentation heatmap")
     cols[2].subheader("OCR output")
@@ -50,64 +38,72 @@ def main():
     # Sidebar
     # File selection
     st.sidebar.title("Document selection")
-    # Disabling warning
-    st.set_option('deprecation.showfileUploaderEncoding', False)
     # Choose your own image
-    uploaded_file = st.sidebar.file_uploader("Upload files", type=['pdf', 'png', 'jpeg', 'jpg'])
     if uploaded_file is not None:
-        if uploaded_file.name.endswith('.pdf'):
             doc = DocumentFile.from_pdf(uploaded_file.read())
         else:
             doc = DocumentFile.from_images(uploaded_file.read())
         page_idx = st.sidebar.selectbox("Page selection", [idx + 1 for idx in range(len(doc))]) - 1
-        cols[0].image(doc[page_idx])
     # Model selection
     st.sidebar.title("Model selection")
-    det_arch = st.sidebar.selectbox("Text detection model", DET_ARCHS)
-    reco_arch = st.sidebar.selectbox("Text recognition model", RECO_ARCHS)
     # For newline
-    st.sidebar.write('\n')
     if st.sidebar.button("Analyze page"):
         if uploaded_file is None:
             st.sidebar.write("Please upload a document")
         else:
-            with st.spinner('Loading model...'):
-                predictor = ocr_predictor(det_arch, reco_arch, pretrained=True)
-            with st.spinner('Analyzing...'):
                 # Forward the image to the model
-                processed_batches = predictor.det_predictor.pre_processor([doc[page_idx]])
-                out = predictor.det_predictor.model(processed_batches[0], return_model_output=True)
-                seg_map = out["out_map"]
-                seg_map = tf.squeeze(seg_map[0, ...], axis=[2])
-                seg_map = cv2.resize(seg_map.numpy(), (doc[page_idx].shape[1], doc[page_idx].shape[0]),
-                                     interpolation=cv2.INTER_LINEAR)
                 # Plot the raw heatmap
                 fig, ax = plt.subplots()
                 ax.imshow(seg_map)
-                ax.axis('off')
                 cols[1].pyplot(fig)
                 # Plot OCR output
-                out = predictor([doc[page_idx]])
-                fig = visualize_page(out.pages[0].export(), doc[page_idx], interactive=False)
                 cols[2].pyplot(fig)
                 # Page reconsitution under input page
                 page_export = out.pages[0].export()
-                img = out.pages[0].synthesize()
-                cols[3].image(img, clamp=True)
                 # Display JSON
                 st.markdown("\nHere are your analysis results in JSON format:")
                 st.json(page_export)
-if __name__ == '__main__':
-    main()

+# Copyright (C) 2021-2024, Mindee.
+# This program is licensed under the Apache License 2.0.
+# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+import cv2
 import matplotlib.pyplot as plt
+import numpy as np
 import streamlit as st
+import torch
 from doctr.io import DocumentFile
 from doctr.utils.visualization import visualize_page
+from backend.pytorch import DET_ARCHS, RECO_ARCHS, forward_image, load_predictor
+forward_device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+def main(det_archs, reco_archs):
+    """Build a streamlit layout"""
     # Wide mode
     st.set_page_config(layout="wide")
     # Designing the interface
     st.title("docTR: Document Text Recognition")
     # For newline
+    st.write("\n")
     # Instructions
     st.markdown("*Hint: click on the top-right corner of an image to enlarge it!*")
     # Set the columns
+    cols = st.columns((1, 1, 1, 1))
     cols[0].subheader("Input page")
     cols[1].subheader("Segmentation heatmap")
     cols[2].subheader("OCR output")
     # Sidebar
     # File selection
     st.sidebar.title("Document selection")
     # Choose your own image
+    uploaded_file = st.sidebar.file_uploader("Upload files", type=["pdf", "png", "jpeg", "jpg"])
     if uploaded_file is not None:
+        if uploaded_file.name.endswith(".pdf"):
             doc = DocumentFile.from_pdf(uploaded_file.read())
         else:
             doc = DocumentFile.from_images(uploaded_file.read())
         page_idx = st.sidebar.selectbox("Page selection", [idx + 1 for idx in range(len(doc))]) - 1
+        page = doc[page_idx]
+        cols[0].image(page)
     # Model selection
     st.sidebar.title("Model selection")
+    det_arch = st.sidebar.selectbox("Text detection model", det_archs)
+    reco_arch = st.sidebar.selectbox("Text recognition model", reco_archs)
     # For newline
+    st.sidebar.write("\n")
+    # Only straight pages or possible rotation
+    st.sidebar.title("Parameters")
+    assume_straight_pages = st.sidebar.checkbox("Assume straight pages", value=True)
+    st.sidebar.write("\n")
+    # Straighten pages
+    straighten_pages = st.sidebar.checkbox("Straighten pages", value=False)
+    st.sidebar.write("\n")
+    # Binarization threshold
+    bin_thresh = st.sidebar.slider("Binarization threshold", min_value=0.1, max_value=0.9, value=0.3, step=0.1)
+    st.sidebar.write("\n")
     if st.sidebar.button("Analyze page"):
         if uploaded_file is None:
             st.sidebar.write("Please upload a document")
         else:
+            with st.spinner("Loading model..."):
+                predictor = load_predictor(
+                    det_arch, reco_arch, assume_straight_pages, straighten_pages, bin_thresh, forward_device
+                )
+            with st.spinner("Analyzing..."):
                 # Forward the image to the model
+                seg_map = forward_image(predictor, page, forward_device)
+                seg_map = np.squeeze(seg_map)
+                seg_map = cv2.resize(seg_map, (page.shape[1], page.shape[0]), interpolation=cv2.INTER_LINEAR)
                 # Plot the raw heatmap
                 fig, ax = plt.subplots()
                 ax.imshow(seg_map)
+                ax.axis("off")
                 cols[1].pyplot(fig)
                 # Plot OCR output
+                out = predictor([page])
+                fig = visualize_page(out.pages[0].export(), out.pages[0].page, interactive=False, add_labels=False)
                 cols[2].pyplot(fig)
                 # Page reconsitution under input page
                 page_export = out.pages[0].export()
+                if assume_straight_pages or (not assume_straight_pages and straighten_pages):
+                    img = out.pages[0].synthesize()
+                    cols[3].image(img, clamp=True)
                 # Display JSON
                 st.markdown("\nHere are your analysis results in JSON format:")
                 st.json(page_export)
+if __name__ == "__main__":
+    main(DET_ARCHS, RECO_ARCHS)

backend/pytorch.py ADDED Viewed

	@@ -0,0 +1,86 @@

+# Copyright (C) 2021-2024, Mindee.
+# This program is licensed under the Apache License 2.0.
+# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+import numpy as np
+import torch
+from doctr.models import ocr_predictor
+from doctr.models.predictor import OCRPredictor
+DET_ARCHS = [
+    "db_resnet50",
+    "db_resnet34",
+    "db_mobilenet_v3_large",
+    "linknet_resnet18",
+    "linknet_resnet34",
+    "linknet_resnet50",
+]
+RECO_ARCHS = [
+    "crnn_vgg16_bn",
+    "crnn_mobilenet_v3_small",
+    "crnn_mobilenet_v3_large",
+    "master",
+    "sar_resnet31",
+    "vitstr_small",
+    "vitstr_base",
+    "parseq",
+]
+def load_predictor(
+    det_arch: str,
+    reco_arch: str,
+    assume_straight_pages: bool,
+    straighten_pages: bool,
+    bin_thresh: float,
+    device: torch.device,
+) -> OCRPredictor:
+    """Load a predictor from doctr.models
+    Args:
+    ----
+        det_arch: detection architecture
+        reco_arch: recognition architecture
+        assume_straight_pages: whether to assume straight pages or not
+        straighten_pages: whether to straighten rotated pages or not
+        bin_thresh: binarization threshold for the segmentation map
+        device: torch.device, the device to load the predictor on
+    Returns:
+    -------
+        instance of OCRPredictor
+    """
+    predictor = ocr_predictor(
+        det_arch,
+        reco_arch,
+        pretrained=True,
+        assume_straight_pages=assume_straight_pages,
+        straighten_pages=straighten_pages,
+        export_as_straight_boxes=straighten_pages,
+        detect_orientation=not assume_straight_pages,
+    ).to(device)
+    predictor.det_predictor.model.postprocessor.bin_thresh = bin_thresh
+    return predictor
+def forward_image(predictor: OCRPredictor, image: np.ndarray, device: torch.device) -> np.ndarray:
+    """Forward an image through the predictor
+    Args:
+    ----
+        predictor: instance of OCRPredictor
+        image: image to process
+        device: torch.device, the device to process the image on
+    Returns:
+    -------
+        segmentation map
+    """
+    with torch.no_grad():
+        processed_batches = predictor.det_predictor.pre_processor([image])
+        out = predictor.det_predictor.model(processed_batches[0].to(device), return_model_output=True)
+        seg_map = out["out_map"].to("cpu").numpy()
+    return seg_map

requirements.txt CHANGED Viewed

@@ -1,3 +1,2 @@
--e git+https://github.com/mindee/doctr.git#egg=python-doctr[tf]
 streamlit>=1.0.0
-PyMuPDF>=1.16.0,!=1.18.11,!=1.18.12,!=1.19.5


1	+ -e git+https://github.com/mindee/doctr.git#egg=python-doctr[torch]
2	streamlit>=1.0.0