Spaces:
Sleeping
Sleeping
Commit
·
6f2b338
1
Parent(s):
3671a94
changes
Browse files
app.py
CHANGED
|
@@ -788,25 +788,8 @@ def run_ocr(
|
|
| 788 |
# Prepare plain text by stripping HTML tags and replacing <br>
|
| 789 |
plain_text = re.sub(r"<[^>]*>", "", predicted_html.replace("<br>", "\n"))
|
| 790 |
# Write temporary files
|
| 791 |
-
|
| 792 |
-
|
| 793 |
-
try:
|
| 794 |
-
txt_fd = io.NamedTemporaryFile(delete=False, suffix=".txt", mode="w", encoding="utf-8")
|
| 795 |
-
txt_fd.write(plain_text)
|
| 796 |
-
txt_fd.flush()
|
| 797 |
-
txt_path = txt_fd.name
|
| 798 |
-
txt_fd.close()
|
| 799 |
-
except Exception:
|
| 800 |
-
txt_path = None
|
| 801 |
-
try:
|
| 802 |
-
csv_fd = io.NamedTemporaryFile(delete=False, suffix=".csv", mode="w", encoding="utf-8")
|
| 803 |
-
df_all.to_csv(csv_fd, index=False)
|
| 804 |
-
csv_fd.flush()
|
| 805 |
-
csv_path = csv_fd.name
|
| 806 |
-
csv_fd.close()
|
| 807 |
-
except Exception:
|
| 808 |
-
csv_path = None
|
| 809 |
-
return overlay_img, predicted_html, df_all, txt_path, csv_path
|
| 810 |
|
| 811 |
|
| 812 |
# ----------------------------------------------------------------------
|
|
@@ -840,13 +823,13 @@ def create_gradio_interface():
|
|
| 840 |
overlay_output = gr.Image(label="Detected Regions")
|
| 841 |
predictions_output = gr.HTML(label="Predictions (HTML)")
|
| 842 |
df_output = gr.DataFrame(label="Token Scores", interactive=False)
|
| 843 |
-
txt_file_output = gr.File(label="Download OCR Prediction (.txt)")
|
| 844 |
-
csv_file_output = gr.File(label="Download Token Scores (.csv)")
|
| 845 |
# Editable text
|
| 846 |
edited_text = gr.Textbox(
|
| 847 |
label="Edit full predicted text", lines=8, interactive=True
|
| 848 |
)
|
| 849 |
-
download_edited_btn = gr.Button("Download edited text")
|
| 850 |
|
| 851 |
# Callback for OCR
|
| 852 |
def on_run(image, xml, gray, binarize, metric):
|
|
@@ -855,7 +838,7 @@ def create_gradio_interface():
|
|
| 855 |
run_btn.click(
|
| 856 |
fn=on_run,
|
| 857 |
inputs=[image_input, xml_input, apply_gray_checkbox, apply_bin_checkbox, metric_radio],
|
| 858 |
-
outputs=[overlay_output, predictions_output, df_output
|
| 859 |
)
|
| 860 |
|
| 861 |
# Populate editable text with plain text from predictions
|
|
@@ -869,25 +852,7 @@ def create_gradio_interface():
|
|
| 869 |
outputs=edited_text,
|
| 870 |
)
|
| 871 |
|
| 872 |
-
# Download edited text by writing to a temporary file
|
| 873 |
-
def download_edited(txt):
|
| 874 |
-
if not txt:
|
| 875 |
-
return None
|
| 876 |
-
try:
|
| 877 |
-
fd = io.NamedTemporaryFile(delete=False, suffix=".txt", mode="w", encoding="utf-8")
|
| 878 |
-
fd.write(txt)
|
| 879 |
-
fd.flush()
|
| 880 |
-
path = fd.name
|
| 881 |
-
fd.close()
|
| 882 |
-
return path
|
| 883 |
-
except Exception:
|
| 884 |
-
return None
|
| 885 |
|
| 886 |
-
download_edited_btn.click(
|
| 887 |
-
fn=download_edited,
|
| 888 |
-
inputs=edited_text,
|
| 889 |
-
outputs=txt_file_output,
|
| 890 |
-
)
|
| 891 |
return demo
|
| 892 |
|
| 893 |
|
|
|
|
| 788 |
# Prepare plain text by stripping HTML tags and replacing <br>
|
| 789 |
plain_text = re.sub(r"<[^>]*>", "", predicted_html.replace("<br>", "\n"))
|
| 790 |
# Write temporary files
|
| 791 |
+
|
| 792 |
+
return overlay_img, predicted_html, df_all
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 793 |
|
| 794 |
|
| 795 |
# ----------------------------------------------------------------------
|
|
|
|
| 823 |
overlay_output = gr.Image(label="Detected Regions")
|
| 824 |
predictions_output = gr.HTML(label="Predictions (HTML)")
|
| 825 |
df_output = gr.DataFrame(label="Token Scores", interactive=False)
|
| 826 |
+
# txt_file_output = gr.File(label="Download OCR Prediction (.txt)")
|
| 827 |
+
# csv_file_output = gr.File(label="Download Token Scores (.csv)")
|
| 828 |
# Editable text
|
| 829 |
edited_text = gr.Textbox(
|
| 830 |
label="Edit full predicted text", lines=8, interactive=True
|
| 831 |
)
|
| 832 |
+
# download_edited_btn = gr.Button("Download edited text")
|
| 833 |
|
| 834 |
# Callback for OCR
|
| 835 |
def on_run(image, xml, gray, binarize, metric):
|
|
|
|
| 838 |
run_btn.click(
|
| 839 |
fn=on_run,
|
| 840 |
inputs=[image_input, xml_input, apply_gray_checkbox, apply_bin_checkbox, metric_radio],
|
| 841 |
+
outputs=[overlay_output, predictions_output, df_output],
|
| 842 |
)
|
| 843 |
|
| 844 |
# Populate editable text with plain text from predictions
|
|
|
|
| 852 |
outputs=edited_text,
|
| 853 |
)
|
| 854 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 855 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 856 |
return demo
|
| 857 |
|
| 858 |
|