Spaces:
Running
on
Zero
Running
on
Zero
Commit
·
83e370e
1
Parent(s):
003891a
improve ui
Browse files
app.py
CHANGED
|
@@ -151,7 +151,7 @@ def parse_xml_for_text(xml_file_path):
|
|
| 151 |
elif xml_format == "ALTO":
|
| 152 |
return parse_alto_xml_for_text(xml_file_path)
|
| 153 |
else:
|
| 154 |
-
return
|
| 155 |
|
| 156 |
except Exception as e:
|
| 157 |
return f"Error determining XML format: {str(e)}"
|
|
@@ -282,11 +282,24 @@ def process_files(image_path, xml_path, model_name):
|
|
| 282 |
img_to_display = None
|
| 283 |
xml_text_output = "XML not provided or not processed."
|
| 284 |
hf_ocr_text_output = "Image not provided or OCR not run."
|
|
|
|
|
|
|
| 285 |
|
| 286 |
if image_path:
|
| 287 |
try:
|
| 288 |
img_to_display = Image.open(image_path).convert("RGB")
|
| 289 |
hf_ocr_text_output = run_hf_ocr(image_path, model_name)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 290 |
except Exception as e:
|
| 291 |
img_to_display = None # Clear image if it failed to load
|
| 292 |
hf_ocr_text_output = f"Error loading image or running {model_name} OCR: {e}"
|
|
@@ -295,6 +308,17 @@ def process_files(image_path, xml_path, model_name):
|
|
| 295 |
|
| 296 |
if xml_path:
|
| 297 |
xml_text_output = parse_xml_for_text(xml_path)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 298 |
else:
|
| 299 |
xml_text_output = "No XML file uploaded."
|
| 300 |
|
|
@@ -303,16 +327,16 @@ def process_files(image_path, xml_path, model_name):
|
|
| 303 |
img_to_display = None # No image to display
|
| 304 |
hf_ocr_text_output = "Upload an image to perform OCR."
|
| 305 |
|
| 306 |
-
return img_to_display, xml_text_output, hf_ocr_text_output
|
| 307 |
|
| 308 |
|
| 309 |
# --- Create Gradio App ---
|
| 310 |
|
| 311 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
| 312 |
-
gr.Markdown("# OCR
|
| 313 |
gr.Markdown(
|
| 314 |
-
"
|
| 315 |
-
"
|
| 316 |
)
|
| 317 |
|
| 318 |
with gr.Row():
|
|
@@ -321,7 +345,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
| 321 |
choices=AVAILABLE_MODELS,
|
| 322 |
value="RolmOCR",
|
| 323 |
label="Select OCR Model",
|
| 324 |
-
info="
|
| 325 |
)
|
| 326 |
image_input = gr.File(
|
| 327 |
label="Upload Image (PNG, JPG, etc.)", type="filepath"
|
|
@@ -329,7 +353,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
| 329 |
xml_input = gr.File(
|
| 330 |
label="Upload XML File (Optional, ALTO or PAGE format)", type="filepath"
|
| 331 |
)
|
| 332 |
-
submit_button = gr.Button("
|
| 333 |
|
| 334 |
with gr.Row():
|
| 335 |
with gr.Column(scale=1):
|
|
@@ -338,20 +362,28 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
| 338 |
)
|
| 339 |
with gr.Column(scale=1):
|
| 340 |
hf_ocr_output_textbox = gr.Markdown(
|
| 341 |
-
label="OCR Output",
|
| 342 |
show_copy_button=True,
|
| 343 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 344 |
xml_output_textbox = gr.Textbox(
|
| 345 |
-
label="
|
| 346 |
lines=15,
|
| 347 |
interactive=False,
|
| 348 |
show_copy_button=True,
|
| 349 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 350 |
|
| 351 |
submit_button.click(
|
| 352 |
fn=process_files,
|
| 353 |
inputs=[image_input, xml_input, model_selector],
|
| 354 |
-
outputs=[output_image_display, xml_output_textbox, hf_ocr_output_textbox],
|
| 355 |
)
|
| 356 |
|
| 357 |
gr.Markdown("---")
|
|
|
|
| 151 |
elif xml_format == "ALTO":
|
| 152 |
return parse_alto_xml_for_text(xml_file_path)
|
| 153 |
else:
|
| 154 |
+
return "Error: Unsupported XML format. Expected ALTO or PAGE XML."
|
| 155 |
|
| 156 |
except Exception as e:
|
| 157 |
return f"Error determining XML format: {str(e)}"
|
|
|
|
| 282 |
img_to_display = None
|
| 283 |
xml_text_output = "XML not provided or not processed."
|
| 284 |
hf_ocr_text_output = "Image not provided or OCR not run."
|
| 285 |
+
ocr_download = gr.DownloadButton(visible=False)
|
| 286 |
+
xml_download = gr.DownloadButton(visible=False)
|
| 287 |
|
| 288 |
if image_path:
|
| 289 |
try:
|
| 290 |
img_to_display = Image.open(image_path).convert("RGB")
|
| 291 |
hf_ocr_text_output = run_hf_ocr(image_path, model_name)
|
| 292 |
+
|
| 293 |
+
# Create download file for OCR output
|
| 294 |
+
if hf_ocr_text_output and not hf_ocr_text_output.startswith("Error"):
|
| 295 |
+
ocr_filename = f"vlm_ocr_output_{model_name}.txt"
|
| 296 |
+
with open(ocr_filename, "w", encoding="utf-8") as f:
|
| 297 |
+
f.write(hf_ocr_text_output)
|
| 298 |
+
ocr_download = gr.DownloadButton(
|
| 299 |
+
label="Download VLM OCR",
|
| 300 |
+
value=ocr_filename,
|
| 301 |
+
visible=True
|
| 302 |
+
)
|
| 303 |
except Exception as e:
|
| 304 |
img_to_display = None # Clear image if it failed to load
|
| 305 |
hf_ocr_text_output = f"Error loading image or running {model_name} OCR: {e}"
|
|
|
|
| 308 |
|
| 309 |
if xml_path:
|
| 310 |
xml_text_output = parse_xml_for_text(xml_path)
|
| 311 |
+
|
| 312 |
+
# Create download file for XML text
|
| 313 |
+
if xml_text_output and not xml_text_output.startswith("Error"):
|
| 314 |
+
xml_filename = "traditional_ocr_output.txt"
|
| 315 |
+
with open(xml_filename, "w", encoding="utf-8") as f:
|
| 316 |
+
f.write(xml_text_output)
|
| 317 |
+
xml_download = gr.DownloadButton(
|
| 318 |
+
label="Download XML Text",
|
| 319 |
+
value=xml_filename,
|
| 320 |
+
visible=True
|
| 321 |
+
)
|
| 322 |
else:
|
| 323 |
xml_text_output = "No XML file uploaded."
|
| 324 |
|
|
|
|
| 327 |
img_to_display = None # No image to display
|
| 328 |
hf_ocr_text_output = "Upload an image to perform OCR."
|
| 329 |
|
| 330 |
+
return img_to_display, xml_text_output, hf_ocr_text_output, ocr_download, xml_download
|
| 331 |
|
| 332 |
|
| 333 |
# --- Create Gradio App ---
|
| 334 |
|
| 335 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
| 336 |
+
gr.Markdown("# OCR Comparison Tool: Traditional vs VLM-based")
|
| 337 |
gr.Markdown(
|
| 338 |
+
"Compare traditional OCR outputs (ALTO/PAGE XML) with modern Vision-Language Model OCR that produces clean Markdown. "
|
| 339 |
+
"Upload an image and its XML file to see how VLMs simplify document text extraction."
|
| 340 |
)
|
| 341 |
|
| 342 |
with gr.Row():
|
|
|
|
| 345 |
choices=AVAILABLE_MODELS,
|
| 346 |
value="RolmOCR",
|
| 347 |
label="Select OCR Model",
|
| 348 |
+
info="RolmOCR: Fast extraction, clean readable output | Nanonets-OCR-s: Detailed extraction with tables/math support, outputs structured Markdown"
|
| 349 |
)
|
| 350 |
image_input = gr.File(
|
| 351 |
label="Upload Image (PNG, JPG, etc.)", type="filepath"
|
|
|
|
| 353 |
xml_input = gr.File(
|
| 354 |
label="Upload XML File (Optional, ALTO or PAGE format)", type="filepath"
|
| 355 |
)
|
| 356 |
+
submit_button = gr.Button("Compare OCR Methods", variant="primary")
|
| 357 |
|
| 358 |
with gr.Row():
|
| 359 |
with gr.Column(scale=1):
|
|
|
|
| 362 |
)
|
| 363 |
with gr.Column(scale=1):
|
| 364 |
hf_ocr_output_textbox = gr.Markdown(
|
| 365 |
+
label="VLM OCR Output (Markdown)",
|
| 366 |
show_copy_button=True,
|
| 367 |
)
|
| 368 |
+
ocr_download_btn = gr.DownloadButton(
|
| 369 |
+
label="Download VLM OCR",
|
| 370 |
+
visible=False
|
| 371 |
+
)
|
| 372 |
xml_output_textbox = gr.Textbox(
|
| 373 |
+
label="Traditional OCR (XML Reading Order)",
|
| 374 |
lines=15,
|
| 375 |
interactive=False,
|
| 376 |
show_copy_button=True,
|
| 377 |
)
|
| 378 |
+
xml_download_btn = gr.DownloadButton(
|
| 379 |
+
label="Download XML Text",
|
| 380 |
+
visible=False
|
| 381 |
+
)
|
| 382 |
|
| 383 |
submit_button.click(
|
| 384 |
fn=process_files,
|
| 385 |
inputs=[image_input, xml_input, model_selector],
|
| 386 |
+
outputs=[output_image_display, xml_output_textbox, hf_ocr_output_textbox, ocr_download_btn, xml_download_btn],
|
| 387 |
)
|
| 388 |
|
| 389 |
gr.Markdown("---")
|