Spaces:
Runtime error
Runtime error
import gradio as gr | |
from backend import process_request, get_pdf_files, save_result_to_file, extract_text_with_fitz, extract_text_with_docling, preview_image_processing, load_system_prompt, load_user_prompt, load_postprocess_prompt, process_request_preprocessing_only, process_request_postprocessing_only | |
def create_ui(): | |
"""Create and configure the Gradio UI interface.""" | |
with gr.Blocks( | |
title="์ด๋ ฅ์ ๋ถ์ ์์คํ ", | |
css=""" | |
.main-container { max-width: 1400px; margin: 0 auto; } | |
.section-header { margin-bottom: 15px; } | |
.input-group { margin-bottom: 20px; } | |
#batch_result_area, #result_area { | |
min-height: 200px !important; | |
} | |
#batch_info { | |
font-size: 0.85em; | |
color: #666; | |
margin-bottom: 10px; | |
} | |
/* ํธ์ง ์์ญ ์คํ์ผ */ | |
.edit-area { | |
border: 2px dashed #ccc; | |
border-radius: 5px; | |
background-color: #f9f9f9; | |
} | |
""", | |
theme=gr.themes.Soft() | |
) as app: | |
with gr.Column(elem_classes="main-container"): | |
gr.Markdown("# ๐ ์ด๋ ฅ์ ๋ถ์ ์์คํ ", elem_classes="section-header") | |
# ์๋จ ์์ญ: ํ์ผ ์ ํ + ๋ก๊ทธ ์ ๋ณด | |
with gr.Row(equal_height=True): | |
# ํ์ผ ์ ํ ์์ญ (์ผ์ชฝ, ์ปดํฉํธ) | |
with gr.Column(scale=2): | |
with gr.Group(): | |
gr.Markdown("### ๐ ํ์ผ ์ ํ") | |
pdf_files = get_pdf_files() | |
default_pdf = "./resume_samples/pdf/text/๋ฆฌ๋ฉค๋ฒ-S3.pdf" if "./resume_samples/pdf/text/๋ฆฌ๋ฉค๋ฒ-S3.pdf" in pdf_files else (pdf_files[0] if pdf_files else None) | |
pdf_dropdown = gr.Dropdown( | |
label="PDF ํ์ผ ์ ํ", | |
choices=pdf_files, | |
value=default_pdf, | |
interactive=True | |
) | |
file_upload = gr.File( | |
label="๋๋ ์ PDF ํ์ผ ์ ๋ก๋", | |
file_types=[".pdf"], | |
type="filepath" | |
) | |
# ์ค์๊ฐ ์ํ ์ ๋ณด (์ค๋ฅธ์ชฝ) | |
with gr.Column(scale=3): | |
with gr.Group(): | |
gr.Markdown("### ๐ ์ค์๊ฐ ์ํ ์ ๋ณด") | |
status_log_output = gr.Textbox( | |
label="์ฒ๋ฆฌ ์ํ", | |
lines=6, | |
max_lines=10, | |
value="์์คํ ์ค๋น ์๋ฃ - ํ์ผ์ ์ ํํ๊ณ ๋ถ์์ ์์ํ์ธ์...", | |
interactive=False, | |
show_label=False | |
) | |
gr.Markdown("---") | |
with gr.Tabs(): | |
# ๋ถ์ ํญ | |
with gr.TabItem("๐ค AI ๋ถ์"): | |
# ์ค์ ์์ญ | |
with gr.Row(equal_height=True): | |
with gr.Column(scale=2): | |
with gr.Group(): | |
gr.Markdown("### 1๏ธโฃ ํ๋กฌํํธ ์ค์ ") | |
system_prompt_input = gr.TextArea( | |
label="์์คํ ํ๋กฌํํธ", | |
value=load_system_prompt(), | |
lines=5, | |
placeholder="์์คํ ํ๋กฌํํธ๋ฅผ ์ ๋ ฅํ์ธ์..." | |
) | |
prompt_input = gr.TextArea( | |
label="์ฌ์ฉ์ ํ๋กฌํํธ (์ ์ฒ๋ฆฌ)", | |
value=load_user_prompt(), | |
lines=3, | |
placeholder="์ ์ฒ๋ฆฌ์ฉ ์ฌ์ฉ์ ํ๋กฌํํธ๋ฅผ ์ ๋ ฅํ์ธ์..." | |
) | |
postprocess_prompt_input = gr.TextArea( | |
label="ํ์ฒ๋ฆฌ ํ๋กฌํํธ", | |
value=load_postprocess_prompt(), | |
lines=3, | |
placeholder="ํ์ฒ๋ฆฌ์ฉ ํ๋กฌํํธ๋ฅผ ์ ๋ ฅํ์ธ์..." | |
) | |
with gr.Column(scale=1): | |
with gr.Group(): | |
gr.Markdown("### 2๏ธโฃ ์ฒ๋ฆฌ ์ค์ ") | |
use_images = gr.Checkbox( | |
label="์ด๋ฏธ์ง๋ก ๋ณํํ์ฌ ์ฒ๋ฆฌ", | |
value=True, | |
info="PDF๋ฅผ ์ด๋ฏธ์ง๋ก ๋ณํํ์ฌ ๋น์ ๋ชจ๋ธ๋ก ๋ถ์" | |
) | |
image_processing_mode = gr.Radio( | |
choices=["๊ฐ๋ก ๋ณํฉ (2ํ์ด์ง์ฉ)", "์ธ๋ก ๋ณํฉ (2ํ์ด์ง์ฉ)", "๋ฑ๊ฐ ํ์ด์ง"], | |
value="๊ฐ๋ก ๋ณํฉ (2ํ์ด์ง์ฉ)", | |
label="์ด๋ฏธ์ง ์ฒ๋ฆฌ ๋ฐฉ์", | |
info="ํ์ด์ง ๋ณํฉ ๋ฐฉ์ ์ ํ" | |
) | |
overlap_merge_option = gr.Radio( | |
choices=["์ผ๋ฐ ๋ณํฉ", "์ค๋ณต ๋ณํฉ (์ฌ๋ผ์ด๋ฉ ์๋์ฐ)"], | |
value="์ผ๋ฐ ๋ณํฉ", | |
label="๋ณํฉ ๋ฐฉ์", | |
info="์ผ๋ฐ: (1,2), (3,4)... | ์ค๋ณต: (1,2), (2,3)...", | |
visible=True | |
) | |
batch_size_slider = gr.Slider( | |
minimum=1, | |
maximum=3, | |
value=3, | |
step=1, | |
label="์ด๋ฏธ์ง ๋ฐฐ์น ํฌ๊ธฐ", | |
info="ํ ๋ฒ์ ์ฒ๋ฆฌํ ์ด๋ฏธ์ง ์ฅ์ (1-3์ฅ)" | |
) | |
use_docling = gr.Checkbox( | |
label="ํ ์คํธ ํ์ฑ ํจ๊ป ์ํ", | |
value=True, | |
info="Docling์ผ๋ก PDF ํ ์คํธ ์ถ์ถ" | |
) | |
# use_postprocess ์ฒดํฌ๋ฐ์ค ์ ๊ฑฐ - ์ด์ ๋ฒํผ์ผ๋ก ๋ถ๋ฆฌ | |
# ์คํ ๋ฒํผ ์์ญ | |
with gr.Row(): | |
with gr.Column(scale=2): | |
output_filename = gr.Textbox( | |
label="๊ฒฐ๊ณผ ํ์ผ ์ด๋ฆ (ํ์ฅ์ ์์ด)", | |
value="result", | |
placeholder="์ ์ฅํ ํ์ผ ์ด๋ฆ์ ์ ๋ ฅํ์ธ์" | |
) | |
with gr.Column(scale=1): | |
preprocessing_button = gr.Button( | |
"๐ ์ ์ฒ๋ฆฌ ๋ถ์ ์์", | |
variant="primary", | |
size="lg" | |
) | |
with gr.Column(scale=1): | |
postprocessing_button = gr.Button( | |
"๐ฏ ํ์ฒ๋ฆฌ ๋ถ์ ์์", | |
variant="secondary", | |
size="lg" | |
) | |
# ๊ฒฐ๊ณผ ์์ญ | |
gr.Markdown("---") | |
gr.Markdown("## ๐ ๋ถ์ ๊ฒฐ๊ณผ") | |
with gr.Row(): | |
# ๋ฐฐ์น ์ฒ๋ฆฌ ๊ฒฐ๊ณผ - ํธ์ง ๊ฐ๋ฅ | |
with gr.Column(scale=1): | |
with gr.Group(): | |
gr.Markdown("### ๐ ๋ฐฐ์น ์ฒ๋ฆฌ ๊ฒฐ๊ณผ") | |
batch_result_output = gr.Markdown( | |
value="*๋ฐฐ์น ์ฒ๋ฆฌ ๊ฒฐ๊ณผ๊ฐ ์ฌ๊ธฐ์ ํ์๋ฉ๋๋ค...*", | |
elem_id="batch_result_area", | |
show_label=False, | |
) | |
# ๋ฐฐ์น ๊ฒฐ๊ณผ ํธ์ง ์์ญ | |
with gr.Row(): | |
batch_edit_button = gr.Button( | |
"โ๏ธ ํธ์ง", | |
variant="secondary", | |
size="sm" | |
) | |
batch_save_button = gr.Button( | |
"๐พ ์ ์ฅ", | |
variant="primary", | |
size="sm", | |
visible=False | |
) | |
batch_cancel_button = gr.Button( | |
"โ ์ทจ์", | |
variant="secondary", | |
size="sm", | |
visible=False | |
) | |
batch_edit_area = gr.TextArea( | |
value="", | |
lines=15, | |
max_lines=50, | |
interactive=True, | |
show_label=False, | |
visible=False, | |
placeholder="๋ฐฐ์น ์ฒ๋ฆฌ ๊ฒฐ๊ณผ๋ฅผ ํธ์งํ์ธ์..." | |
) | |
# ์ต์ข ๋ถ์ ๊ฒฐ๊ณผ - ์๋ ํฌ๊ธฐ ์กฐ์ | |
with gr.Column(scale=1): | |
with gr.Group(): | |
gr.Markdown("### ๐ฏ ์ต์ข ๋ถ์ ๊ฒฐ๊ณผ") | |
result_output = gr.Markdown( | |
value="*์ต์ข ๋ถ์ ๊ฒฐ๊ณผ๊ฐ ์ฌ๊ธฐ์ ํ์๋ฉ๋๋ค...*", | |
elem_id="result_area", | |
show_label=False, | |
) | |
# ์ต์ข ๊ฒฐ๊ณผ ํธ์ง ์์ญ | |
with gr.Row(): | |
result_edit_button = gr.Button( | |
"โ๏ธ ํธ์ง", | |
variant="secondary", | |
size="sm" | |
) | |
result_save_button = gr.Button( | |
"๐พ ์ ์ฅ", | |
variant="primary", | |
size="sm", | |
visible=False | |
) | |
result_cancel_button = gr.Button( | |
"โ ์ทจ์", | |
variant="secondary", | |
size="sm", | |
visible=False | |
) | |
result_edit_area = gr.TextArea( | |
value="", | |
lines=15, | |
max_lines=50, | |
interactive=True, | |
show_label=False, | |
visible=False, | |
placeholder="์ต์ข ๋ถ์ ๊ฒฐ๊ณผ๋ฅผ ํธ์งํ์ธ์..." | |
) | |
# ํ์ผ ์ ์ฅ ์์ญ | |
with gr.Row(): | |
save_button = gr.Button( | |
"๐พ ๊ฒฐ๊ณผ ์ ์ฅ", | |
variant="secondary", | |
size="sm" | |
) | |
save_message = gr.Markdown( | |
value="", | |
visible=False | |
) | |
# ๋ฏธ๋ฆฌ๋ณด๊ธฐ ํญ | |
with gr.TabItem("๐ ๋ฏธ๋ฆฌ๋ณด๊ธฐ"): | |
# ์ด๋ฏธ์ง ๋ฏธ๋ฆฌ๋ณด๊ธฐ ์์ญ (์๋จ) | |
with gr.Row(equal_height=True): | |
with gr.Column(scale=1): | |
with gr.Group(): | |
gr.Markdown("### ๐ผ๏ธ ์ด๋ฏธ์ง ๋ฏธ๋ฆฌ๋ณด๊ธฐ") | |
preview_button_tab = gr.Button( | |
"์ด๋ฏธ์ง ์ฒ๋ฆฌ ๋ฏธ๋ฆฌ๋ณด๊ธฐ", | |
variant="secondary", | |
size="sm" | |
) | |
image_preview_gallery_tab = gr.Gallery( | |
label="์ฒ๋ฆฌ๋ ์ด๋ฏธ์ง", | |
show_label=False, | |
columns=2, | |
rows=2, | |
height=350, | |
value=[] | |
) | |
gr.Markdown("---") | |
# ํ ์คํธ ์ถ์ถ ๋น๊ต ์์ญ (ํ๋จ) | |
gr.Markdown("### ๐ PDF ํ ์คํธ ์ถ์ถ ๋น๊ต", elem_classes="section-header") | |
with gr.Row(equal_height=True): | |
# ํ ์คํธ ๊ธฐ๋ฐ ์ถ์ถ | |
with gr.Column(scale=1): | |
with gr.Group(): | |
gr.Markdown("#### ๐ ํ ์คํธ ๊ธฐ๋ฐ ์ถ์ถ") | |
gr.Markdown("*PDF์ ํ ์คํธ ๋ ์ด์ด์์ ์ง์ ์ถ์ถ*", elem_id="extract_info") | |
text_extract_method = gr.Radio( | |
choices=["Fitz (PyMuPDF)"], | |
value="Fitz (PyMuPDF)", | |
label="์ถ์ถ ๋ฐฉ์", | |
info="๋น ๋ฅด๊ณ ๊ฐ๋ฒผ์ด ํ ์คํธ ์ถ์ถ" | |
) | |
text_extract_btn = gr.Button( | |
"๐ ํ ์คํธ ์ถ์ถ", | |
variant="primary", | |
size="sm" | |
) | |
text_extract_result = gr.Markdown( | |
value="*ํ ์คํธ ์ถ์ถ ๊ฒฐ๊ณผ๊ฐ ์ฌ๊ธฐ์ ํ์๋ฉ๋๋ค...*", | |
elem_id="text_result_area" | |
) | |
# OCR ๊ธฐ๋ฐ ์ถ์ถ | |
with gr.Column(scale=1): | |
with gr.Group(): | |
gr.Markdown("#### ๐ค OCR ๊ธฐ๋ฐ ์ถ์ถ") | |
gr.Markdown("*์ด๋ฏธ์ง์์ ๊ดํ ๋ฌธ์ ์ธ์์ผ๋ก ์ถ์ถ*", elem_id="ocr_info") | |
ocr_extract_btn = gr.Button( | |
"๐ OCR ์ถ์ถ", | |
variant="primary", | |
size="sm" | |
) | |
ocr_extract_result = gr.Markdown( | |
value="*OCR ์ถ์ถ ๊ฒฐ๊ณผ๊ฐ ์ฌ๊ธฐ์ ํ์๋ฉ๋๋ค...*", | |
elem_id="ocr_result_area" | |
) | |
# ํตํฉ ๋น๊ต ๋ฒํผ | |
with gr.Row(): | |
compare_both_btn = gr.Button( | |
"๐ ์์ชฝ ๋ชจ๋ ์ถ์ถํ์ฌ ๋น๊ต", | |
variant="secondary", | |
size="lg" | |
) | |
# API ์์ฒญ ๋ฐ ๋ก๊ทธ ํญ | |
with gr.TabItem("๐ API ์์ฒญ & ๋ก๊ทธ"): | |
with gr.Row(equal_height=True): | |
# API ์์ฒญ RAW ์ ๋ณด | |
with gr.Column(scale=1): | |
with gr.Group(): | |
gr.Markdown("#### ๐ค API ์์ฒญ (Raw)") | |
api_request_output = gr.Code( | |
value="๋ถ์ ์์ ์ ์ค์ API ์์ฒญ ๋ด์ฉ์ด ํ์๋ฉ๋๋ค", | |
language="json", | |
label=None, | |
interactive=False | |
) | |
# ํ ์คํธ ํ์ฑ ๊ฒฐ๊ณผ (์๋ ์ ๋ฐ์ดํธ) | |
with gr.Column(scale=1): | |
with gr.Group(): | |
gr.Markdown("#### ๐ ํ ์คํธ ํ์ฑ ๊ฒฐ๊ณผ (์ค์๊ฐ)") | |
docling_output = gr.Code( | |
value="PDF ํ ์คํธ ํ์ฑ ๊ฒฐ๊ณผ๊ฐ ์๋์ผ๋ก ํ์๋ฉ๋๋ค", | |
language="markdown", | |
label=None, | |
interactive=False, | |
lines=20 | |
) | |
# === ์ด๋ฒคํธ ํธ๋ค๋ฌ ํจ์๋ค === | |
def update_status_only(status_text): | |
"""์ํ ๋ก๊ทธ๋ง ์ ๋ฐ์ดํธํ๋ ํจ์ (๋ก๋ฉ ํจ๊ณผ ์์)""" | |
return status_text | |
# === ํธ์ง ๊ด๋ จ ํจ์๋ค === | |
def start_batch_edit(batch_content): | |
"""๋ฐฐ์น ๊ฒฐ๊ณผ ํธ์ง ์์""" | |
return ( | |
gr.update(visible=False), # edit button | |
gr.update(visible=True), # save button | |
gr.update(visible=True), # cancel button | |
gr.update(visible=True, value=batch_content), # edit area | |
gr.update(visible=False) # markdown display | |
) | |
def save_batch_edit(edited_content): | |
"""๋ฐฐ์น ๊ฒฐ๊ณผ ํธ์ง ์ ์ฅ""" | |
return ( | |
gr.update(visible=True), # edit button | |
gr.update(visible=False), # save button | |
gr.update(visible=False), # cancel button | |
gr.update(visible=False), # edit area | |
gr.update(visible=True, value=edited_content) # markdown display | |
) | |
def cancel_batch_edit(): | |
"""๋ฐฐ์น ๊ฒฐ๊ณผ ํธ์ง ์ทจ์""" | |
return ( | |
gr.update(visible=True), # edit button | |
gr.update(visible=False), # save button | |
gr.update(visible=False), # cancel button | |
gr.update(visible=False), # edit area | |
gr.update(visible=True) # markdown display | |
) | |
def start_result_edit(result_content): | |
"""์ต์ข ๊ฒฐ๊ณผ ํธ์ง ์์""" | |
return ( | |
gr.update(visible=False), # edit button | |
gr.update(visible=True), # save button | |
gr.update(visible=True), # cancel button | |
gr.update(visible=True, value=result_content), # edit area | |
gr.update(visible=False) # markdown display | |
) | |
def save_result_edit(edited_content): | |
"""์ต์ข ๊ฒฐ๊ณผ ํธ์ง ์ ์ฅ""" | |
return ( | |
gr.update(visible=True), # edit button | |
gr.update(visible=False), # save button | |
gr.update(visible=False), # cancel button | |
gr.update(visible=False), # edit area | |
gr.update(visible=True, value=edited_content) # markdown display | |
) | |
def cancel_result_edit(): | |
"""์ต์ข ๊ฒฐ๊ณผ ํธ์ง ์ทจ์""" | |
return ( | |
gr.update(visible=True), # edit button | |
gr.update(visible=False), # save button | |
gr.update(visible=False), # cancel button | |
gr.update(visible=False), # edit area | |
gr.update(visible=True) # markdown display | |
) | |
def process_preprocessing_wrapper(*args): | |
"""์ ์ฒ๋ฆฌ๋ง ์ํํ๋ ๋ํผ ํจ์""" | |
try: | |
# ์ ์ฒ๋ฆฌ ํจ์์์ Generator ๊ฒฐ๊ณผ ์ถ์ถ | |
generator = process_request_preprocessing_only(*args) | |
final_result = None | |
# Generator์ ๋ชจ๋ ์ค๊ฐ ๊ฒฐ๊ณผ๋ฅผ ์ฒ๋ฆฌํ๋ฉฐ ๋ง์ง๋ง ๊ฒฐ๊ณผ๋ฅผ ์ป์ | |
for result in generator: | |
if result and len(result) >= 5: | |
batch_content, result_content, docling_output, status_log, api_request = result | |
# ์ ์ฒ๋ฆฌ์์๋ ๋ฐฐ์น ๊ฒฐ๊ณผ๋ง ํ์, ์ต์ข ๊ฒฐ๊ณผ๋ ์๋ด ๋ฉ์์ง ์ ์ง | |
yield batch_content, "*์ ์ฒ๋ฆฌ ์๋ฃ ํ ํ์ฒ๋ฆฌ ๋ฒํผ์ ๋๋ฌ์ฃผ์ธ์...*", docling_output, status_log, api_request | |
final_result = result | |
if final_result and len(final_result) >= 5: | |
batch_content, result_content, docling_output, status_log, api_request = final_result | |
# ์ ์ฒ๋ฆฌ ์๋ฃ ์ ์ต์ข ๊ฒฐ๊ณผ๋ ์๋ด ๋ฉ์์ง๋ก ์ ์ง | |
yield batch_content, "*โ ์ ์ฒ๋ฆฌ ์๋ฃ! ํ์ฒ๋ฆฌ ๋ถ์ ๋ฒํผ์ ๋๋ฌ ์ต์ข ๊ฒฐ๊ณผ๋ฅผ ํ์ธํ์ธ์.*", docling_output, status_log, api_request | |
else: | |
yield "๋ฐฐ์น ์ฒ๋ฆฌ ๊ฒฐ๊ณผ๊ฐ ์์ต๋๋ค.", "์ ์ฒ๋ฆฌ๋ฅผ ๋จผ์ ์ํํด์ฃผ์ธ์.", "", "์ ์ฒ๋ฆฌ ์๋ฃ", "" | |
except Exception as e: | |
error_msg = f"์ ์ฒ๋ฆฌ ์ค๋ฅ: {str(e)}" | |
print(f"์ ์ฒ๋ฆฌ ๋ํผ ํจ์ ์ค๋ฅ: {e}") | |
yield "โ **์ ์ฒ๋ฆฌ ์ค๋ฅ ๋ฐ์**", "์ ์ฒ๋ฆฌ ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค.", "", f"์ ์ฒ๋ฆฌ ์ค๋ฅ ๋ฐ์: {str(e)}", "" | |
def process_postprocessing_wrapper(batch_result, system_prompt, postprocess_prompt): | |
"""ํ์ฒ๋ฆฌ๋ง ์ํํ๋ ๋ํผ ํจ์""" | |
try: | |
# ๋ฐฐ์น ๊ฒฐ๊ณผ๊ฐ ๋น์ด์๊ฑฐ๋ ์ด๊ธฐ ๋ฉ์์ง์ธ ๊ฒฝ์ฐ ํ์ธ | |
if not batch_result or batch_result.strip() == "*๋ฐฐ์น ์ฒ๋ฆฌ ๊ฒฐ๊ณผ๊ฐ ์ฌ๊ธฐ์ ํ์๋ฉ๋๋ค...*": | |
yield batch_result, "โ **๋ฐฐ์น ์ฒ๋ฆฌ ๊ฒฐ๊ณผ๊ฐ ์์ต๋๋ค**\n\n์ ์ฒ๋ฆฌ๋ฅผ ๋จผ์ ์ํํด์ฃผ์ธ์.", "", "ํ์ฒ๋ฆฌ ์คํ ๋ถ๊ฐ: ๋ฐฐ์น ๊ฒฐ๊ณผ ์์", "" | |
return | |
# ํ์ฒ๋ฆฌ ํจ์์์ Generator ๊ฒฐ๊ณผ ์ถ์ถ | |
generator = process_request_postprocessing_only(batch_result, system_prompt, postprocess_prompt) | |
final_result = None | |
# Generator์ ๋ชจ๋ ์ค๊ฐ ๊ฒฐ๊ณผ๋ฅผ ์ฒ๋ฆฌํ๋ฉฐ ๋ง์ง๋ง ๊ฒฐ๊ณผ๋ฅผ ์ป์ | |
for result in generator: | |
if result and len(result) >= 5: | |
batch_content, result_content, docling_output, status_log, api_request = result | |
# ์ค์๊ฐ์ผ๋ก ๊ฒฐ๊ณผ ์ ๋ฐ์ดํธ (๋ฐฐ์น ๊ฒฐ๊ณผ๋ ์ ์ง) | |
yield batch_result, result_content, docling_output, status_log, api_request | |
final_result = result | |
if final_result and len(final_result) >= 5: | |
batch_content, result_content, docling_output, status_log, api_request = final_result | |
# ํ์ฒ๋ฆฌ ์๋ฃ ์ ๋ฐฐ์น ๊ฒฐ๊ณผ๋ ์ ์งํ๊ณ ์ต์ข ๊ฒฐ๊ณผ๋ง ์ ๋ฐ์ดํธ | |
yield batch_result, result_content, docling_output, status_log, api_request | |
else: | |
yield batch_result, "ํ์ฒ๋ฆฌ ๊ฒฐ๊ณผ๊ฐ ์์ต๋๋ค.", "", "ํ์ฒ๋ฆฌ ์๋ฃ", "" | |
except Exception as e: | |
error_msg = f"ํ์ฒ๋ฆฌ ์ค๋ฅ: {str(e)}" | |
print(f"ํ์ฒ๋ฆฌ ๋ํผ ํจ์ ์ค๋ฅ: {e}") | |
yield batch_result, f"โ **ํ์ฒ๋ฆฌ ์ค๋ฅ ๋ฐ์**\n\n{error_msg}", "", f"ํ์ฒ๋ฆฌ ์ค๋ฅ ๋ฐ์: {str(e)}", "" | |
def on_save_button_click(result_content, filename): | |
"""๊ฒฐ๊ณผ ์ ์ฅ ์ฒ๋ฆฌ""" | |
result = save_result_to_file(result_content, filename) | |
return gr.update(value=result, visible=True) | |
def extract_with_text_method(pdf_path, uploaded_file, method): | |
"""ํ ์คํธ ๊ธฐ๋ฐ ์ถ์ถ""" | |
final_pdf_path = uploaded_file or pdf_path | |
if not final_pdf_path: | |
return "PDF ํ์ผ์ ์ ํํด์ฃผ์ธ์." | |
return extract_text_with_fitz(final_pdf_path) | |
def extract_with_ocr(pdf_path, uploaded_file): | |
"""OCR ๊ธฐ๋ฐ ์ถ์ถ""" | |
final_pdf_path = uploaded_file or pdf_path | |
if not final_pdf_path: | |
return "PDF ํ์ผ์ ์ ํํด์ฃผ์ธ์." | |
return extract_text_with_docling(final_pdf_path) | |
def extract_both_methods(pdf_path, uploaded_file, text_method): | |
"""์์ชฝ ๋ชจ๋ ์ถ์ถ""" | |
final_pdf_path = uploaded_file or pdf_path | |
if not final_pdf_path: | |
return "PDF ํ์ผ์ ์ ํํด์ฃผ์ธ์.", "PDF ํ์ผ์ ์ ํํด์ฃผ์ธ์." | |
text_result = extract_with_text_method(pdf_path, uploaded_file, text_method) | |
ocr_result = extract_with_ocr(pdf_path, uploaded_file) | |
return text_result, ocr_result | |
def preview_images(pdf_path, uploaded_file, processing_mode, use_images, overlap_option): | |
"""์ด๋ฏธ์ง ๋ฏธ๋ฆฌ๋ณด๊ธฐ""" | |
if not use_images: | |
return [] | |
final_pdf_path = uploaded_file or pdf_path | |
if not final_pdf_path: | |
return [] | |
try: | |
return preview_image_processing(final_pdf_path, processing_mode, overlap_option) | |
except Exception as e: | |
print(f"๋ฏธ๋ฆฌ๋ณด๊ธฐ ์ค๋ฅ: {e}") | |
return [] | |
def update_overlap_visibility(processing_mode): | |
"""๋ณํฉ ์ต์ ํ์/์จ๊น ์ ์ด""" | |
return gr.update(visible="๋ณํฉ" in processing_mode) | |
# === ์ด๋ฒคํธ ์ฐ๊ฒฐ === | |
# ์ ์ฒ๋ฆฌ ๋ถ์ ๋ฒํผ | |
preprocessing_button.click( | |
fn=process_preprocessing_wrapper, | |
inputs=[ | |
prompt_input, system_prompt_input, use_images, use_docling, | |
pdf_dropdown, file_upload, output_filename, | |
image_processing_mode, overlap_merge_option, batch_size_slider | |
], | |
outputs=[batch_result_output, result_output, docling_output, status_log_output, api_request_output], | |
show_progress=True | |
) | |
# ํ์ฒ๋ฆฌ ๋ถ์ ๋ฒํผ | |
postprocessing_button.click( | |
fn=process_postprocessing_wrapper, | |
inputs=[batch_result_output, system_prompt_input, postprocess_prompt_input], | |
outputs=[batch_result_output, result_output, docling_output, status_log_output, api_request_output], | |
show_progress=True | |
) | |
# ์ ์ฅ ๋ฒํผ | |
save_button.click( | |
fn=on_save_button_click, | |
inputs=[result_output, output_filename], | |
outputs=[save_message] | |
) | |
# ์ด๋ฏธ์ง ๋ฏธ๋ฆฌ๋ณด๊ธฐ (๋ฏธ๋ฆฌ๋ณด๊ธฐ ํญ) | |
preview_button_tab.click( | |
fn=preview_images, | |
inputs=[pdf_dropdown, file_upload, image_processing_mode, use_images, overlap_merge_option], | |
outputs=[image_preview_gallery_tab] | |
) | |
# ์ด๋ฏธ์ง ์ฒ๋ฆฌ ๋ชจ๋ ๋ณ๊ฒฝ ์ ์ค๋ณต ์ต์ ํ์/์จ๊น | |
image_processing_mode.change( | |
fn=update_overlap_visibility, | |
inputs=[image_processing_mode], | |
outputs=[overlap_merge_option] | |
) | |
# ํ ์คํธ ์ถ์ถ ์ด๋ฒคํธ๋ค | |
text_extract_btn.click( | |
fn=extract_with_text_method, | |
inputs=[pdf_dropdown, file_upload, text_extract_method], | |
outputs=[text_extract_result] | |
) | |
ocr_extract_btn.click( | |
fn=extract_with_ocr, | |
inputs=[pdf_dropdown, file_upload], | |
outputs=[ocr_extract_result] | |
) | |
compare_both_btn.click( | |
fn=extract_both_methods, | |
inputs=[pdf_dropdown, file_upload, text_extract_method], | |
outputs=[text_extract_result, ocr_extract_result] | |
) | |
# === ํธ์ง ๊ด๋ จ ์ด๋ฒคํธ ์ฐ๊ฒฐ === | |
# ๋ฐฐ์น ๊ฒฐ๊ณผ ํธ์ง ์ด๋ฒคํธ | |
batch_edit_button.click( | |
fn=start_batch_edit, | |
inputs=[batch_result_output], | |
outputs=[batch_edit_button, batch_save_button, batch_cancel_button, batch_edit_area, batch_result_output] | |
) | |
batch_save_button.click( | |
fn=save_batch_edit, | |
inputs=[batch_edit_area], | |
outputs=[batch_edit_button, batch_save_button, batch_cancel_button, batch_edit_area, batch_result_output] | |
) | |
batch_cancel_button.click( | |
fn=cancel_batch_edit, | |
outputs=[batch_edit_button, batch_save_button, batch_cancel_button, batch_edit_area, batch_result_output] | |
) | |
# ์ต์ข ๊ฒฐ๊ณผ ํธ์ง ์ด๋ฒคํธ | |
result_edit_button.click( | |
fn=start_result_edit, | |
inputs=[result_output], | |
outputs=[result_edit_button, result_save_button, result_cancel_button, result_edit_area, result_output] | |
) | |
result_save_button.click( | |
fn=save_result_edit, | |
inputs=[result_edit_area], | |
outputs=[result_edit_button, result_save_button, result_cancel_button, result_edit_area, result_output] | |
) | |
result_cancel_button.click( | |
fn=cancel_result_edit, | |
outputs=[result_edit_button, result_save_button, result_cancel_button, result_edit_area, result_output] | |
) | |
return app | |