Update app.py
Browse files
app.py
CHANGED
|
@@ -491,7 +491,7 @@ def load_file_for_preview(file_path: str) -> Tuple[Optional[Image.Image], str]:
|
|
| 491 |
global pdf_cache
|
| 492 |
|
| 493 |
if not file_path or not os.path.exists(file_path):
|
| 494 |
-
return None, "
|
| 495 |
|
| 496 |
file_ext = os.path.splitext(file_path)[1].lower()
|
| 497 |
|
|
@@ -500,7 +500,7 @@ def load_file_for_preview(file_path: str) -> Tuple[Optional[Image.Image], str]:
|
|
| 500 |
# Load PDF pages
|
| 501 |
images = load_images_from_pdf(file_path)
|
| 502 |
if not images:
|
| 503 |
-
return None, "
|
| 504 |
|
| 505 |
pdf_cache.update({
|
| 506 |
"images": images,
|
|
@@ -511,7 +511,7 @@ def load_file_for_preview(file_path: str) -> Tuple[Optional[Image.Image], str]:
|
|
| 511 |
"results": []
|
| 512 |
})
|
| 513 |
|
| 514 |
-
return images[0], f"
|
| 515 |
|
| 516 |
elif file_ext in ['.jpg', '.jpeg', '.png', '.bmp', '.tiff']:
|
| 517 |
# Load single image
|
|
@@ -526,13 +526,13 @@ def load_file_for_preview(file_path: str) -> Tuple[Optional[Image.Image], str]:
|
|
| 526 |
"results": []
|
| 527 |
})
|
| 528 |
|
| 529 |
-
return image, "
|
| 530 |
else:
|
| 531 |
-
return None, f"
|
| 532 |
|
| 533 |
except Exception as e:
|
| 534 |
print(f"Error loading file: {e}")
|
| 535 |
-
return None, f"
|
| 536 |
|
| 537 |
|
| 538 |
def turn_page(direction: str) -> Tuple[Optional[Image.Image], str, Any, Optional[Image.Image], Optional[Dict]]:
|
|
@@ -540,7 +540,7 @@ def turn_page(direction: str) -> Tuple[Optional[Image.Image], str, Any, Optional
|
|
| 540 |
global pdf_cache
|
| 541 |
|
| 542 |
if not pdf_cache["images"]:
|
| 543 |
-
return None, '<div class="page-info"
|
| 544 |
|
| 545 |
if direction == "prev":
|
| 546 |
pdf_cache["current_page"] = max(0, pdf_cache["current_page"] - 1)
|
|
@@ -552,10 +552,10 @@ def turn_page(direction: str) -> Tuple[Optional[Image.Image], str, Any, Optional
|
|
| 552 |
|
| 553 |
index = pdf_cache["current_page"]
|
| 554 |
current_image_preview = pdf_cache["images"][index]
|
| 555 |
-
page_info_html = f'<div class="page-info"
|
| 556 |
|
| 557 |
# Initialize default result values
|
| 558 |
-
markdown_content = "
|
| 559 |
processed_img = None
|
| 560 |
layout_json = None
|
| 561 |
|
|
@@ -565,7 +565,7 @@ def turn_page(direction: str) -> Tuple[Optional[Image.Image], str, Any, Optional
|
|
| 565 |
pdf_cache["results"][index]):
|
| 566 |
|
| 567 |
result = pdf_cache["results"][index]
|
| 568 |
-
markdown_content = result.get('markdown_content') or result.get('raw_output', '
|
| 569 |
processed_img = result.get('processed_image', None) # Get the processed image
|
| 570 |
layout_json = result.get('layout_result', None) # Get the layout JSON
|
| 571 |
|
|
@@ -635,26 +635,12 @@ def create_gradio_interface():
|
|
| 635 |
}
|
| 636 |
"""
|
| 637 |
|
| 638 |
-
with gr.Blocks(theme=gr.themes.Soft(), css=css, title="
|
| 639 |
|
| 640 |
# Header
|
| 641 |
gr.HTML("""
|
| 642 |
<div class="title" style="text-align: center">
|
| 643 |
-
<h1>๐
|
| 644 |
-
<p style="font-size: 1.1em; color: #6b7280; margin-bottom: 0.6em;">
|
| 645 |
-
A state-of-the-art image/pdf-to-markdown vision language model for intelligent document processing
|
| 646 |
-
</p>
|
| 647 |
-
<div style="display: flex; justify-content: center; gap: 20px; margin: 15px 0;">
|
| 648 |
-
<a href="https://huggingface.co/rednote-hilab/dots.ocr" target="_blank" style="text-decoration: none; color: #2563eb; font-weight: 500;">
|
| 649 |
-
๐ Hugging Face Model
|
| 650 |
-
</a>
|
| 651 |
-
<a href="https://github.com/rednote-hilab/dots.ocr/blob/master/assets/blog.md" target="_blank" style="text-decoration: none; color: #2563eb; font-weight: 500;">
|
| 652 |
-
๐ Release Blog
|
| 653 |
-
</a>
|
| 654 |
-
<a href="https://github.com/rednote-hilab/dots.ocr" target="_blank" style="text-decoration: none; color: #2563eb; font-weight: 500;">
|
| 655 |
-
๐ป GitHub Repository
|
| 656 |
-
</a>
|
| 657 |
-
</div>
|
| 658 |
</div>
|
| 659 |
""")
|
| 660 |
|
|
@@ -665,14 +651,14 @@ def create_gradio_interface():
|
|
| 665 |
|
| 666 |
# File input
|
| 667 |
file_input = gr.File(
|
| 668 |
-
label="
|
| 669 |
file_types=[".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".pdf"],
|
| 670 |
type="filepath"
|
| 671 |
)
|
| 672 |
|
| 673 |
# Image preview
|
| 674 |
image_preview = gr.Image(
|
| 675 |
-
label="
|
| 676 |
type="pil",
|
| 677 |
interactive=False,
|
| 678 |
height=300
|
|
@@ -680,43 +666,40 @@ def create_gradio_interface():
|
|
| 680 |
|
| 681 |
# Page navigation for PDFs
|
| 682 |
with gr.Row():
|
| 683 |
-
prev_page_btn = gr.Button("โ
|
| 684 |
-
page_info = gr.HTML('<div class="page-info"
|
| 685 |
-
next_page_btn = gr.Button("
|
| 686 |
|
| 687 |
# Advanced settings
|
| 688 |
-
with gr.Accordion("
|
| 689 |
max_new_tokens = gr.Slider(
|
| 690 |
minimum=1000,
|
| 691 |
maximum=32000,
|
| 692 |
value=24000,
|
| 693 |
step=1000,
|
| 694 |
-
label="
|
| 695 |
-
info="Maximum number of tokens to generate"
|
| 696 |
)
|
| 697 |
|
| 698 |
min_pixels = gr.Number(
|
| 699 |
value=MIN_PIXELS,
|
| 700 |
-
label="
|
| 701 |
-
info="Minimum image resolution"
|
| 702 |
)
|
| 703 |
|
| 704 |
max_pixels = gr.Number(
|
| 705 |
value=MAX_PIXELS,
|
| 706 |
-
label="
|
| 707 |
-
info="Maximum image resolution"
|
| 708 |
)
|
| 709 |
|
| 710 |
# Process button
|
| 711 |
process_btn = gr.Button(
|
| 712 |
-
"๐
|
| 713 |
variant="primary",
|
| 714 |
elem_classes=["process-button"],
|
| 715 |
size="lg"
|
| 716 |
)
|
| 717 |
|
| 718 |
# Clear button
|
| 719 |
-
clear_btn = gr.Button("๐๏ธ
|
| 720 |
|
| 721 |
# Right column - Results
|
| 722 |
with gr.Column(scale=2):
|
|
@@ -724,23 +707,23 @@ def create_gradio_interface():
|
|
| 724 |
# Results tabs
|
| 725 |
with gr.Tabs():
|
| 726 |
# Processed image tab
|
| 727 |
-
with gr.Tab("๐ผ๏ธ
|
| 728 |
processed_image = gr.Image(
|
| 729 |
-
label="
|
| 730 |
type="pil",
|
| 731 |
interactive=False,
|
| 732 |
height=500
|
| 733 |
)
|
| 734 |
# Markdown output tab
|
| 735 |
-
with gr.Tab("๐
|
| 736 |
markdown_output = gr.Markdown(
|
| 737 |
-
value="
|
| 738 |
height=500
|
| 739 |
)
|
| 740 |
# JSON layout tab
|
| 741 |
-
with gr.Tab("๐
|
| 742 |
json_output = gr.JSON(
|
| 743 |
-
label="
|
| 744 |
value=None
|
| 745 |
)
|
| 746 |
|
|
@@ -751,10 +734,10 @@ def create_gradio_interface():
|
|
| 751 |
|
| 752 |
try:
|
| 753 |
if not file_path:
|
| 754 |
-
return None, "
|
| 755 |
|
| 756 |
if model is None:
|
| 757 |
-
return None, "
|
| 758 |
|
| 759 |
# Load and preview file
|
| 760 |
image, page_info = load_file_for_preview(file_path)
|
|
@@ -775,7 +758,7 @@ def create_gradio_interface():
|
|
| 775 |
)
|
| 776 |
all_results.append(result)
|
| 777 |
if result.get('markdown_content'):
|
| 778 |
-
all_markdown.append(f"##
|
| 779 |
|
| 780 |
pdf_cache["results"] = all_results
|
| 781 |
pdf_cache["is_parsed"] = True
|
|
@@ -807,7 +790,7 @@ def create_gradio_interface():
|
|
| 807 |
pdf_cache["is_parsed"] = True
|
| 808 |
|
| 809 |
# Check if the content contains mostly Arabic text
|
| 810 |
-
content = result['markdown_content'] or "
|
| 811 |
if is_arabic_text(content):
|
| 812 |
markdown_update = gr.update(value=content, rtl=True)
|
| 813 |
else:
|
|
@@ -820,7 +803,7 @@ def create_gradio_interface():
|
|
| 820 |
)
|
| 821 |
|
| 822 |
except Exception as e:
|
| 823 |
-
error_msg = f"
|
| 824 |
print(error_msg)
|
| 825 |
traceback.print_exc()
|
| 826 |
return None, error_msg, None
|
|
@@ -828,7 +811,7 @@ def create_gradio_interface():
|
|
| 828 |
def handle_file_upload(file_path):
|
| 829 |
"""Handle file upload and show preview"""
|
| 830 |
if not file_path:
|
| 831 |
-
return None, "
|
| 832 |
|
| 833 |
image, page_info = load_file_for_preview(file_path)
|
| 834 |
return image, page_info
|
|
@@ -850,9 +833,9 @@ def create_gradio_interface():
|
|
| 850 |
return (
|
| 851 |
None, # file_input
|
| 852 |
None, # image_preview
|
| 853 |
-
'<div class="page-info"
|
| 854 |
None, # processed_image
|
| 855 |
-
"
|
| 856 |
None, # json_output
|
| 857 |
)
|
| 858 |
|
|
@@ -901,4 +884,4 @@ if __name__ == "__main__":
|
|
| 901 |
share=False,
|
| 902 |
debug=True,
|
| 903 |
show_error=True
|
| 904 |
-
)
|
|
|
|
| 491 |
global pdf_cache
|
| 492 |
|
| 493 |
if not file_path or not os.path.exists(file_path):
|
| 494 |
+
return None, "ํ์ผ์ด ์ ํ๋์ง ์์์ต๋๋ค"
|
| 495 |
|
| 496 |
file_ext = os.path.splitext(file_path)[1].lower()
|
| 497 |
|
|
|
|
| 500 |
# Load PDF pages
|
| 501 |
images = load_images_from_pdf(file_path)
|
| 502 |
if not images:
|
| 503 |
+
return None, "PDF ๋ก๋ ์คํจ"
|
| 504 |
|
| 505 |
pdf_cache.update({
|
| 506 |
"images": images,
|
|
|
|
| 511 |
"results": []
|
| 512 |
})
|
| 513 |
|
| 514 |
+
return images[0], f"ํ์ด์ง 1 / {len(images)}"
|
| 515 |
|
| 516 |
elif file_ext in ['.jpg', '.jpeg', '.png', '.bmp', '.tiff']:
|
| 517 |
# Load single image
|
|
|
|
| 526 |
"results": []
|
| 527 |
})
|
| 528 |
|
| 529 |
+
return image, "ํ์ด์ง 1 / 1"
|
| 530 |
else:
|
| 531 |
+
return None, f"์ง์๋์ง ์๋ ํ์ผ ํ์: {file_ext}"
|
| 532 |
|
| 533 |
except Exception as e:
|
| 534 |
print(f"Error loading file: {e}")
|
| 535 |
+
return None, f"ํ์ผ ๋ก๋ ์ค๋ฅ: {str(e)}"
|
| 536 |
|
| 537 |
|
| 538 |
def turn_page(direction: str) -> Tuple[Optional[Image.Image], str, Any, Optional[Image.Image], Optional[Dict]]:
|
|
|
|
| 540 |
global pdf_cache
|
| 541 |
|
| 542 |
if not pdf_cache["images"]:
|
| 543 |
+
return None, '<div class="page-info">ํ์ผ์ด ๋ก๋๋์ง ์์์ต๋๋ค</div>', "์์ง ๊ฒฐ๊ณผ๊ฐ ์์ต๋๋ค", None, None
|
| 544 |
|
| 545 |
if direction == "prev":
|
| 546 |
pdf_cache["current_page"] = max(0, pdf_cache["current_page"] - 1)
|
|
|
|
| 552 |
|
| 553 |
index = pdf_cache["current_page"]
|
| 554 |
current_image_preview = pdf_cache["images"][index]
|
| 555 |
+
page_info_html = f'<div class="page-info">ํ์ด์ง {index + 1} / {pdf_cache["total_pages"]}</div>'
|
| 556 |
|
| 557 |
# Initialize default result values
|
| 558 |
+
markdown_content = "ํ์ด์ง๊ฐ ์์ง ์ฒ๋ฆฌ๋์ง ์์์ต๋๋ค"
|
| 559 |
processed_img = None
|
| 560 |
layout_json = None
|
| 561 |
|
|
|
|
| 565 |
pdf_cache["results"][index]):
|
| 566 |
|
| 567 |
result = pdf_cache["results"][index]
|
| 568 |
+
markdown_content = result.get('markdown_content') or result.get('raw_output', '์ฌ์ฉ ๊ฐ๋ฅํ ์ฝํ
์ธ ๊ฐ ์์ต๋๋ค')
|
| 569 |
processed_img = result.get('processed_image', None) # Get the processed image
|
| 570 |
layout_json = result.get('layout_result', None) # Get the layout JSON
|
| 571 |
|
|
|
|
| 635 |
}
|
| 636 |
"""
|
| 637 |
|
| 638 |
+
with gr.Blocks(theme=gr.themes.Soft(), css=css, title="VIDraft-NH-OCR") as demo:
|
| 639 |
|
| 640 |
# Header
|
| 641 |
gr.HTML("""
|
| 642 |
<div class="title" style="text-align: center">
|
| 643 |
+
<h1>๐ VIDraft-NH-OCR</h1>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 644 |
</div>
|
| 645 |
""")
|
| 646 |
|
|
|
|
| 651 |
|
| 652 |
# File input
|
| 653 |
file_input = gr.File(
|
| 654 |
+
label="์ด๋ฏธ์ง ๋๋ PDF ์
๋ก๋",
|
| 655 |
file_types=[".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".pdf"],
|
| 656 |
type="filepath"
|
| 657 |
)
|
| 658 |
|
| 659 |
# Image preview
|
| 660 |
image_preview = gr.Image(
|
| 661 |
+
label="๋ฏธ๋ฆฌ๋ณด๊ธฐ",
|
| 662 |
type="pil",
|
| 663 |
interactive=False,
|
| 664 |
height=300
|
|
|
|
| 666 |
|
| 667 |
# Page navigation for PDFs
|
| 668 |
with gr.Row():
|
| 669 |
+
prev_page_btn = gr.Button("โ ์ด์ ", size="md")
|
| 670 |
+
page_info = gr.HTML('<div class="page-info">ํ์ผ์ด ๋ก๋๋์ง ์์์ต๋๋ค</div>')
|
| 671 |
+
next_page_btn = gr.Button("๋ค์ โถ", size="md")
|
| 672 |
|
| 673 |
# Advanced settings
|
| 674 |
+
with gr.Accordion("๊ณ ๊ธ ์ค์ ", open=False):
|
| 675 |
max_new_tokens = gr.Slider(
|
| 676 |
minimum=1000,
|
| 677 |
maximum=32000,
|
| 678 |
value=24000,
|
| 679 |
step=1000,
|
| 680 |
+
label="์ต๋ ํ ํฐ ์"
|
|
|
|
| 681 |
)
|
| 682 |
|
| 683 |
min_pixels = gr.Number(
|
| 684 |
value=MIN_PIXELS,
|
| 685 |
+
label="์ต์ ํฝ์
"
|
|
|
|
| 686 |
)
|
| 687 |
|
| 688 |
max_pixels = gr.Number(
|
| 689 |
value=MAX_PIXELS,
|
| 690 |
+
label="์ต๋ ํฝ์
"
|
|
|
|
| 691 |
)
|
| 692 |
|
| 693 |
# Process button
|
| 694 |
process_btn = gr.Button(
|
| 695 |
+
"๐ ๋ฌธ์ ์ฒ๋ฆฌ",
|
| 696 |
variant="primary",
|
| 697 |
elem_classes=["process-button"],
|
| 698 |
size="lg"
|
| 699 |
)
|
| 700 |
|
| 701 |
# Clear button
|
| 702 |
+
clear_btn = gr.Button("๐๏ธ ๋ชจ๋ ์ง์ฐ๊ธฐ", variant="secondary")
|
| 703 |
|
| 704 |
# Right column - Results
|
| 705 |
with gr.Column(scale=2):
|
|
|
|
| 707 |
# Results tabs
|
| 708 |
with gr.Tabs():
|
| 709 |
# Processed image tab
|
| 710 |
+
with gr.Tab("๐ผ๏ธ ์ฒ๋ฆฌ๋ ์ด๋ฏธ์ง"):
|
| 711 |
processed_image = gr.Image(
|
| 712 |
+
label="๋ ์ด์์ ๊ฐ์ง ์ด๋ฏธ์ง",
|
| 713 |
type="pil",
|
| 714 |
interactive=False,
|
| 715 |
height=500
|
| 716 |
)
|
| 717 |
# Markdown output tab
|
| 718 |
+
with gr.Tab("๐ ์ถ์ถ๋ ์ฝํ
์ธ "):
|
| 719 |
markdown_output = gr.Markdown(
|
| 720 |
+
value="'๋ฌธ์ ์ฒ๋ฆฌ'๋ฅผ ํด๋ฆญํ์ฌ ์ฝํ
์ธ ๋ฅผ ์ถ์ถํ์ธ์...",
|
| 721 |
height=500
|
| 722 |
)
|
| 723 |
# JSON layout tab
|
| 724 |
+
with gr.Tab("๐ ๋ ์ด์์ JSON"):
|
| 725 |
json_output = gr.JSON(
|
| 726 |
+
label="๋ ์ด์์ ๋ถ์ ๊ฒฐ๊ณผ",
|
| 727 |
value=None
|
| 728 |
)
|
| 729 |
|
|
|
|
| 734 |
|
| 735 |
try:
|
| 736 |
if not file_path:
|
| 737 |
+
return None, "๋จผ์ ํ์ผ์ ์
๋ก๋ํ์ธ์.", None
|
| 738 |
|
| 739 |
if model is None:
|
| 740 |
+
return None, "๋ชจ๋ธ์ด ๋ก๋๋์ง ์์์ต๋๋ค. ํ์ด์ง๋ฅผ ์๋ก๊ณ ์นจํ๊ณ ๋ค์ ์๋ํ์ธ์.", None
|
| 741 |
|
| 742 |
# Load and preview file
|
| 743 |
image, page_info = load_file_for_preview(file_path)
|
|
|
|
| 758 |
)
|
| 759 |
all_results.append(result)
|
| 760 |
if result.get('markdown_content'):
|
| 761 |
+
all_markdown.append(f"## ํ์ด์ง {i+1}\n\n{result['markdown_content']}")
|
| 762 |
|
| 763 |
pdf_cache["results"] = all_results
|
| 764 |
pdf_cache["is_parsed"] = True
|
|
|
|
| 790 |
pdf_cache["is_parsed"] = True
|
| 791 |
|
| 792 |
# Check if the content contains mostly Arabic text
|
| 793 |
+
content = result['markdown_content'] or "์ถ์ถ๋ ์ฝํ
์ธ ๊ฐ ์์ต๋๋ค"
|
| 794 |
if is_arabic_text(content):
|
| 795 |
markdown_update = gr.update(value=content, rtl=True)
|
| 796 |
else:
|
|
|
|
| 803 |
)
|
| 804 |
|
| 805 |
except Exception as e:
|
| 806 |
+
error_msg = f"๋ฌธ์ ์ฒ๋ฆฌ ์ค๋ฅ: {str(e)}"
|
| 807 |
print(error_msg)
|
| 808 |
traceback.print_exc()
|
| 809 |
return None, error_msg, None
|
|
|
|
| 811 |
def handle_file_upload(file_path):
|
| 812 |
"""Handle file upload and show preview"""
|
| 813 |
if not file_path:
|
| 814 |
+
return None, "ํ์ผ์ด ๋ก๋๋์ง ์์์ต๋๋ค"
|
| 815 |
|
| 816 |
image, page_info = load_file_for_preview(file_path)
|
| 817 |
return image, page_info
|
|
|
|
| 833 |
return (
|
| 834 |
None, # file_input
|
| 835 |
None, # image_preview
|
| 836 |
+
'<div class="page-info">ํ์ผ์ด ๋ก๋๋์ง ์์์ต๋๋ค</div>', # page_info
|
| 837 |
None, # processed_image
|
| 838 |
+
"'๋ฌธ์ ์ฒ๋ฆฌ'๋ฅผ ํด๋ฆญํ์ฌ ์ฝํ
์ธ ๋ฅผ ์ถ์ถํ์ธ์...", # markdown_output
|
| 839 |
None, # json_output
|
| 840 |
)
|
| 841 |
|
|
|
|
| 884 |
share=False,
|
| 885 |
debug=True,
|
| 886 |
show_error=True
|
| 887 |
+
)
|