Seanya's picture
Upload folder using huggingface_hub
8725d40 verified
import gradio as gr
from backend import process_request, get_pdf_files, save_result_to_file, extract_text_with_fitz, extract_text_with_docling, preview_image_processing, load_system_prompt, load_user_prompt, load_postprocess_prompt, process_request_preprocessing_only, process_request_postprocessing_only
def create_ui():
"""Create and configure the Gradio UI interface."""
with gr.Blocks(
title="์ด๋ ฅ์„œ ๋ถ„์„ ์‹œ์Šคํ…œ",
css="""
.main-container { max-width: 1400px; margin: 0 auto; }
.section-header { margin-bottom: 15px; }
.input-group { margin-bottom: 20px; }
#batch_result_area, #result_area {
min-height: 200px !important;
}
#batch_info {
font-size: 0.85em;
color: #666;
margin-bottom: 10px;
}
/* ํŽธ์ง‘ ์˜์—ญ ์Šคํƒ€์ผ */
.edit-area {
border: 2px dashed #ccc;
border-radius: 5px;
background-color: #f9f9f9;
}
""",
theme=gr.themes.Soft()
) as app:
with gr.Column(elem_classes="main-container"):
gr.Markdown("# ๐Ÿ“‹ ์ด๋ ฅ์„œ ๋ถ„์„ ์‹œ์Šคํ…œ", elem_classes="section-header")
# ์ƒ๋‹จ ์˜์—ญ: ํŒŒ์ผ ์„ ํƒ + ๋กœ๊ทธ ์ •๋ณด
with gr.Row(equal_height=True):
# ํŒŒ์ผ ์„ ํƒ ์˜์—ญ (์™ผ์ชฝ, ์ปดํŒฉํŠธ)
with gr.Column(scale=2):
with gr.Group():
gr.Markdown("### ๐Ÿ“ ํŒŒ์ผ ์„ ํƒ")
pdf_files = get_pdf_files()
default_pdf = "./resume_samples/pdf/text/๋ฆฌ๋ฉค๋ฒ„-S3.pdf" if "./resume_samples/pdf/text/๋ฆฌ๋ฉค๋ฒ„-S3.pdf" in pdf_files else (pdf_files[0] if pdf_files else None)
pdf_dropdown = gr.Dropdown(
label="PDF ํŒŒ์ผ ์„ ํƒ",
choices=pdf_files,
value=default_pdf,
interactive=True
)
file_upload = gr.File(
label="๋˜๋Š” ์ƒˆ PDF ํŒŒ์ผ ์—…๋กœ๋“œ",
file_types=[".pdf"],
type="filepath"
)
# ์‹ค์‹œ๊ฐ„ ์ƒํƒœ ์ •๋ณด (์˜ค๋ฅธ์ชฝ)
with gr.Column(scale=3):
with gr.Group():
gr.Markdown("### ๐Ÿ“Š ์‹ค์‹œ๊ฐ„ ์ƒํƒœ ์ •๋ณด")
status_log_output = gr.Textbox(
label="์ฒ˜๋ฆฌ ์ƒํƒœ",
lines=6,
max_lines=10,
value="์‹œ์Šคํ…œ ์ค€๋น„ ์™„๋ฃŒ - ํŒŒ์ผ์„ ์„ ํƒํ•˜๊ณ  ๋ถ„์„์„ ์‹œ์ž‘ํ•˜์„ธ์š”...",
interactive=False,
show_label=False
)
gr.Markdown("---")
with gr.Tabs():
# ๋ถ„์„ ํƒญ
with gr.TabItem("๐Ÿค– AI ๋ถ„์„"):
# ์„ค์ • ์˜์—ญ
with gr.Row(equal_height=True):
with gr.Column(scale=2):
with gr.Group():
gr.Markdown("### 1๏ธโƒฃ ํ”„๋กฌํ”„ํŠธ ์„ค์ •")
system_prompt_input = gr.TextArea(
label="์‹œ์Šคํ…œ ํ”„๋กฌํ”„ํŠธ",
value=load_system_prompt(),
lines=5,
placeholder="์‹œ์Šคํ…œ ํ”„๋กฌํ”„ํŠธ๋ฅผ ์ž…๋ ฅํ•˜์„ธ์š”..."
)
prompt_input = gr.TextArea(
label="์‚ฌ์šฉ์ž ํ”„๋กฌํ”„ํŠธ (์ „์ฒ˜๋ฆฌ)",
value=load_user_prompt(),
lines=3,
placeholder="์ „์ฒ˜๋ฆฌ์šฉ ์‚ฌ์šฉ์ž ํ”„๋กฌํ”„ํŠธ๋ฅผ ์ž…๋ ฅํ•˜์„ธ์š”..."
)
postprocess_prompt_input = gr.TextArea(
label="ํ›„์ฒ˜๋ฆฌ ํ”„๋กฌํ”„ํŠธ",
value=load_postprocess_prompt(),
lines=3,
placeholder="ํ›„์ฒ˜๋ฆฌ์šฉ ํ”„๋กฌํ”„ํŠธ๋ฅผ ์ž…๋ ฅํ•˜์„ธ์š”..."
)
with gr.Column(scale=1):
with gr.Group():
gr.Markdown("### 2๏ธโƒฃ ์ฒ˜๋ฆฌ ์„ค์ •")
use_images = gr.Checkbox(
label="์ด๋ฏธ์ง€๋กœ ๋ณ€ํ™˜ํ•˜์—ฌ ์ฒ˜๋ฆฌ",
value=True,
info="PDF๋ฅผ ์ด๋ฏธ์ง€๋กœ ๋ณ€ํ™˜ํ•˜์—ฌ ๋น„์ „ ๋ชจ๋ธ๋กœ ๋ถ„์„"
)
image_processing_mode = gr.Radio(
choices=["๊ฐ€๋กœ ๋ณ‘ํ•ฉ (2ํŽ˜์ด์ง€์”ฉ)", "์„ธ๋กœ ๋ณ‘ํ•ฉ (2ํŽ˜์ด์ง€์”ฉ)", "๋‚ฑ๊ฐœ ํŽ˜์ด์ง€"],
value="๊ฐ€๋กœ ๋ณ‘ํ•ฉ (2ํŽ˜์ด์ง€์”ฉ)",
label="์ด๋ฏธ์ง€ ์ฒ˜๋ฆฌ ๋ฐฉ์‹",
info="ํŽ˜์ด์ง€ ๋ณ‘ํ•ฉ ๋ฐฉ์‹ ์„ ํƒ"
)
overlap_merge_option = gr.Radio(
choices=["์ผ๋ฐ˜ ๋ณ‘ํ•ฉ", "์ค‘๋ณต ๋ณ‘ํ•ฉ (์Šฌ๋ผ์ด๋”ฉ ์œˆ๋„์šฐ)"],
value="์ผ๋ฐ˜ ๋ณ‘ํ•ฉ",
label="๋ณ‘ํ•ฉ ๋ฐฉ์‹",
info="์ผ๋ฐ˜: (1,2), (3,4)... | ์ค‘๋ณต: (1,2), (2,3)...",
visible=True
)
batch_size_slider = gr.Slider(
minimum=1,
maximum=3,
value=3,
step=1,
label="์ด๋ฏธ์ง€ ๋ฐฐ์น˜ ํฌ๊ธฐ",
info="ํ•œ ๋ฒˆ์— ์ฒ˜๋ฆฌํ•  ์ด๋ฏธ์ง€ ์žฅ์ˆ˜ (1-3์žฅ)"
)
use_docling = gr.Checkbox(
label="ํ…์ŠคํŠธ ํŒŒ์‹ฑ ํ•จ๊ป˜ ์ˆ˜ํ–‰",
value=True,
info="Docling์œผ๋กœ PDF ํ…์ŠคํŠธ ์ถ”์ถœ"
)
# use_postprocess ์ฒดํฌ๋ฐ•์Šค ์ œ๊ฑฐ - ์ด์ œ ๋ฒ„ํŠผ์œผ๋กœ ๋ถ„๋ฆฌ
# ์‹คํ–‰ ๋ฒ„ํŠผ ์˜์—ญ
with gr.Row():
with gr.Column(scale=2):
output_filename = gr.Textbox(
label="๊ฒฐ๊ณผ ํŒŒ์ผ ์ด๋ฆ„ (ํ™•์žฅ์ž ์—†์ด)",
value="result",
placeholder="์ €์žฅํ•  ํŒŒ์ผ ์ด๋ฆ„์„ ์ž…๋ ฅํ•˜์„ธ์š”"
)
with gr.Column(scale=1):
preprocessing_button = gr.Button(
"๐Ÿ“ ์ „์ฒ˜๋ฆฌ ๋ถ„์„ ์‹œ์ž‘",
variant="primary",
size="lg"
)
with gr.Column(scale=1):
postprocessing_button = gr.Button(
"๐ŸŽฏ ํ›„์ฒ˜๋ฆฌ ๋ถ„์„ ์‹œ์ž‘",
variant="secondary",
size="lg"
)
# ๊ฒฐ๊ณผ ์˜์—ญ
gr.Markdown("---")
gr.Markdown("## ๐Ÿ“Š ๋ถ„์„ ๊ฒฐ๊ณผ")
with gr.Row():
# ๋ฐฐ์น˜ ์ฒ˜๋ฆฌ ๊ฒฐ๊ณผ - ํŽธ์ง‘ ๊ฐ€๋Šฅ
with gr.Column(scale=1):
with gr.Group():
gr.Markdown("### ๐Ÿ“ ๋ฐฐ์น˜ ์ฒ˜๋ฆฌ ๊ฒฐ๊ณผ")
batch_result_output = gr.Markdown(
value="*๋ฐฐ์น˜ ์ฒ˜๋ฆฌ ๊ฒฐ๊ณผ๊ฐ€ ์—ฌ๊ธฐ์— ํ‘œ์‹œ๋ฉ๋‹ˆ๋‹ค...*",
elem_id="batch_result_area",
show_label=False,
)
# ๋ฐฐ์น˜ ๊ฒฐ๊ณผ ํŽธ์ง‘ ์˜์—ญ
with gr.Row():
batch_edit_button = gr.Button(
"โœ๏ธ ํŽธ์ง‘",
variant="secondary",
size="sm"
)
batch_save_button = gr.Button(
"๐Ÿ’พ ์ €์žฅ",
variant="primary",
size="sm",
visible=False
)
batch_cancel_button = gr.Button(
"โŒ ์ทจ์†Œ",
variant="secondary",
size="sm",
visible=False
)
batch_edit_area = gr.TextArea(
value="",
lines=15,
max_lines=50,
interactive=True,
show_label=False,
visible=False,
placeholder="๋ฐฐ์น˜ ์ฒ˜๋ฆฌ ๊ฒฐ๊ณผ๋ฅผ ํŽธ์ง‘ํ•˜์„ธ์š”..."
)
# ์ตœ์ข… ๋ถ„์„ ๊ฒฐ๊ณผ - ์ž๋™ ํฌ๊ธฐ ์กฐ์ •
with gr.Column(scale=1):
with gr.Group():
gr.Markdown("### ๐ŸŽฏ ์ตœ์ข… ๋ถ„์„ ๊ฒฐ๊ณผ")
result_output = gr.Markdown(
value="*์ตœ์ข… ๋ถ„์„ ๊ฒฐ๊ณผ๊ฐ€ ์—ฌ๊ธฐ์— ํ‘œ์‹œ๋ฉ๋‹ˆ๋‹ค...*",
elem_id="result_area",
show_label=False,
)
# ์ตœ์ข… ๊ฒฐ๊ณผ ํŽธ์ง‘ ์˜์—ญ
with gr.Row():
result_edit_button = gr.Button(
"โœ๏ธ ํŽธ์ง‘",
variant="secondary",
size="sm"
)
result_save_button = gr.Button(
"๐Ÿ’พ ์ €์žฅ",
variant="primary",
size="sm",
visible=False
)
result_cancel_button = gr.Button(
"โŒ ์ทจ์†Œ",
variant="secondary",
size="sm",
visible=False
)
result_edit_area = gr.TextArea(
value="",
lines=15,
max_lines=50,
interactive=True,
show_label=False,
visible=False,
placeholder="์ตœ์ข… ๋ถ„์„ ๊ฒฐ๊ณผ๋ฅผ ํŽธ์ง‘ํ•˜์„ธ์š”..."
)
# ํŒŒ์ผ ์ €์žฅ ์˜์—ญ
with gr.Row():
save_button = gr.Button(
"๐Ÿ’พ ๊ฒฐ๊ณผ ์ €์žฅ",
variant="secondary",
size="sm"
)
save_message = gr.Markdown(
value="",
visible=False
)
# ๋ฏธ๋ฆฌ๋ณด๊ธฐ ํƒญ
with gr.TabItem("๐Ÿ” ๋ฏธ๋ฆฌ๋ณด๊ธฐ"):
# ์ด๋ฏธ์ง€ ๋ฏธ๋ฆฌ๋ณด๊ธฐ ์˜์—ญ (์ƒ๋‹จ)
with gr.Row(equal_height=True):
with gr.Column(scale=1):
with gr.Group():
gr.Markdown("### ๐Ÿ–ผ๏ธ ์ด๋ฏธ์ง€ ๋ฏธ๋ฆฌ๋ณด๊ธฐ")
preview_button_tab = gr.Button(
"์ด๋ฏธ์ง€ ์ฒ˜๋ฆฌ ๋ฏธ๋ฆฌ๋ณด๊ธฐ",
variant="secondary",
size="sm"
)
image_preview_gallery_tab = gr.Gallery(
label="์ฒ˜๋ฆฌ๋œ ์ด๋ฏธ์ง€",
show_label=False,
columns=2,
rows=2,
height=350,
value=[]
)
gr.Markdown("---")
# ํ…์ŠคํŠธ ์ถ”์ถœ ๋น„๊ต ์˜์—ญ (ํ•˜๋‹จ)
gr.Markdown("### ๐Ÿ“„ PDF ํ…์ŠคํŠธ ์ถ”์ถœ ๋น„๊ต", elem_classes="section-header")
with gr.Row(equal_height=True):
# ํ…์ŠคํŠธ ๊ธฐ๋ฐ˜ ์ถ”์ถœ
with gr.Column(scale=1):
with gr.Group():
gr.Markdown("#### ๐Ÿ“ ํ…์ŠคํŠธ ๊ธฐ๋ฐ˜ ์ถ”์ถœ")
gr.Markdown("*PDF์˜ ํ…์ŠคํŠธ ๋ ˆ์ด์–ด์—์„œ ์ง์ ‘ ์ถ”์ถœ*", elem_id="extract_info")
text_extract_method = gr.Radio(
choices=["Fitz (PyMuPDF)"],
value="Fitz (PyMuPDF)",
label="์ถ”์ถœ ๋ฐฉ์‹",
info="๋น ๋ฅด๊ณ  ๊ฐ€๋ฒผ์šด ํ…์ŠคํŠธ ์ถ”์ถœ"
)
text_extract_btn = gr.Button(
"๐Ÿ“ ํ…์ŠคํŠธ ์ถ”์ถœ",
variant="primary",
size="sm"
)
text_extract_result = gr.Markdown(
value="*ํ…์ŠคํŠธ ์ถ”์ถœ ๊ฒฐ๊ณผ๊ฐ€ ์—ฌ๊ธฐ์— ํ‘œ์‹œ๋ฉ๋‹ˆ๋‹ค...*",
elem_id="text_result_area"
)
# OCR ๊ธฐ๋ฐ˜ ์ถ”์ถœ
with gr.Column(scale=1):
with gr.Group():
gr.Markdown("#### ๐Ÿค– OCR ๊ธฐ๋ฐ˜ ์ถ”์ถœ")
gr.Markdown("*์ด๋ฏธ์ง€์—์„œ ๊ด‘ํ•™ ๋ฌธ์ž ์ธ์‹์œผ๋กœ ์ถ”์ถœ*", elem_id="ocr_info")
ocr_extract_btn = gr.Button(
"๐Ÿ” OCR ์ถ”์ถœ",
variant="primary",
size="sm"
)
ocr_extract_result = gr.Markdown(
value="*OCR ์ถ”์ถœ ๊ฒฐ๊ณผ๊ฐ€ ์—ฌ๊ธฐ์— ํ‘œ์‹œ๋ฉ๋‹ˆ๋‹ค...*",
elem_id="ocr_result_area"
)
# ํ†ตํ•ฉ ๋น„๊ต ๋ฒ„ํŠผ
with gr.Row():
compare_both_btn = gr.Button(
"๐Ÿ”„ ์–‘์ชฝ ๋ชจ๋‘ ์ถ”์ถœํ•˜์—ฌ ๋น„๊ต",
variant="secondary",
size="lg"
)
# API ์š”์ฒญ ๋ฐ ๋กœ๊ทธ ํƒญ
with gr.TabItem("๐Ÿ“Š API ์š”์ฒญ & ๋กœ๊ทธ"):
with gr.Row(equal_height=True):
# API ์š”์ฒญ RAW ์ •๋ณด
with gr.Column(scale=1):
with gr.Group():
gr.Markdown("#### ๐Ÿ“ค API ์š”์ฒญ (Raw)")
api_request_output = gr.Code(
value="๋ถ„์„ ์‹œ์ž‘ ์‹œ ์‹ค์ œ API ์š”์ฒญ ๋‚ด์šฉ์ด ํ‘œ์‹œ๋ฉ๋‹ˆ๋‹ค",
language="json",
label=None,
interactive=False
)
# ํ…์ŠคํŠธ ํŒŒ์‹ฑ ๊ฒฐ๊ณผ (์ž๋™ ์—…๋ฐ์ดํŠธ)
with gr.Column(scale=1):
with gr.Group():
gr.Markdown("#### ๐Ÿ“„ ํ…์ŠคํŠธ ํŒŒ์‹ฑ ๊ฒฐ๊ณผ (์‹ค์‹œ๊ฐ„)")
docling_output = gr.Code(
value="PDF ํ…์ŠคํŠธ ํŒŒ์‹ฑ ๊ฒฐ๊ณผ๊ฐ€ ์ž๋™์œผ๋กœ ํ‘œ์‹œ๋ฉ๋‹ˆ๋‹ค",
language="markdown",
label=None,
interactive=False,
lines=20
)
# === ์ด๋ฒคํŠธ ํ•ธ๋“ค๋Ÿฌ ํ•จ์ˆ˜๋“ค ===
def update_status_only(status_text):
"""์ƒํƒœ ๋กœ๊ทธ๋งŒ ์—…๋ฐ์ดํŠธํ•˜๋Š” ํ•จ์ˆ˜ (๋กœ๋”ฉ ํšจ๊ณผ ์—†์Œ)"""
return status_text
# === ํŽธ์ง‘ ๊ด€๋ จ ํ•จ์ˆ˜๋“ค ===
def start_batch_edit(batch_content):
"""๋ฐฐ์น˜ ๊ฒฐ๊ณผ ํŽธ์ง‘ ์‹œ์ž‘"""
return (
gr.update(visible=False), # edit button
gr.update(visible=True), # save button
gr.update(visible=True), # cancel button
gr.update(visible=True, value=batch_content), # edit area
gr.update(visible=False) # markdown display
)
def save_batch_edit(edited_content):
"""๋ฐฐ์น˜ ๊ฒฐ๊ณผ ํŽธ์ง‘ ์ €์žฅ"""
return (
gr.update(visible=True), # edit button
gr.update(visible=False), # save button
gr.update(visible=False), # cancel button
gr.update(visible=False), # edit area
gr.update(visible=True, value=edited_content) # markdown display
)
def cancel_batch_edit():
"""๋ฐฐ์น˜ ๊ฒฐ๊ณผ ํŽธ์ง‘ ์ทจ์†Œ"""
return (
gr.update(visible=True), # edit button
gr.update(visible=False), # save button
gr.update(visible=False), # cancel button
gr.update(visible=False), # edit area
gr.update(visible=True) # markdown display
)
def start_result_edit(result_content):
"""์ตœ์ข… ๊ฒฐ๊ณผ ํŽธ์ง‘ ์‹œ์ž‘"""
return (
gr.update(visible=False), # edit button
gr.update(visible=True), # save button
gr.update(visible=True), # cancel button
gr.update(visible=True, value=result_content), # edit area
gr.update(visible=False) # markdown display
)
def save_result_edit(edited_content):
"""์ตœ์ข… ๊ฒฐ๊ณผ ํŽธ์ง‘ ์ €์žฅ"""
return (
gr.update(visible=True), # edit button
gr.update(visible=False), # save button
gr.update(visible=False), # cancel button
gr.update(visible=False), # edit area
gr.update(visible=True, value=edited_content) # markdown display
)
def cancel_result_edit():
"""์ตœ์ข… ๊ฒฐ๊ณผ ํŽธ์ง‘ ์ทจ์†Œ"""
return (
gr.update(visible=True), # edit button
gr.update(visible=False), # save button
gr.update(visible=False), # cancel button
gr.update(visible=False), # edit area
gr.update(visible=True) # markdown display
)
def process_preprocessing_wrapper(*args):
"""์ „์ฒ˜๋ฆฌ๋งŒ ์ˆ˜ํ–‰ํ•˜๋Š” ๋ž˜ํผ ํ•จ์ˆ˜"""
try:
# ์ „์ฒ˜๋ฆฌ ํ•จ์ˆ˜์—์„œ Generator ๊ฒฐ๊ณผ ์ถ”์ถœ
generator = process_request_preprocessing_only(*args)
final_result = None
# Generator์˜ ๋ชจ๋“  ์ค‘๊ฐ„ ๊ฒฐ๊ณผ๋ฅผ ์ฒ˜๋ฆฌํ•˜๋ฉฐ ๋งˆ์ง€๋ง‰ ๊ฒฐ๊ณผ๋ฅผ ์–ป์Œ
for result in generator:
if result and len(result) >= 5:
batch_content, result_content, docling_output, status_log, api_request = result
# ์ „์ฒ˜๋ฆฌ์—์„œ๋Š” ๋ฐฐ์น˜ ๊ฒฐ๊ณผ๋งŒ ํ‘œ์‹œ, ์ตœ์ข… ๊ฒฐ๊ณผ๋Š” ์•ˆ๋‚ด ๋ฉ”์‹œ์ง€ ์œ ์ง€
yield batch_content, "*์ „์ฒ˜๋ฆฌ ์™„๋ฃŒ ํ›„ ํ›„์ฒ˜๋ฆฌ ๋ฒ„ํŠผ์„ ๋ˆŒ๋Ÿฌ์ฃผ์„ธ์š”...*", docling_output, status_log, api_request
final_result = result
if final_result and len(final_result) >= 5:
batch_content, result_content, docling_output, status_log, api_request = final_result
# ์ „์ฒ˜๋ฆฌ ์™„๋ฃŒ ์‹œ ์ตœ์ข… ๊ฒฐ๊ณผ๋Š” ์•ˆ๋‚ด ๋ฉ”์‹œ์ง€๋กœ ์œ ์ง€
yield batch_content, "*โœ… ์ „์ฒ˜๋ฆฌ ์™„๋ฃŒ! ํ›„์ฒ˜๋ฆฌ ๋ถ„์„ ๋ฒ„ํŠผ์„ ๋ˆŒ๋Ÿฌ ์ตœ์ข… ๊ฒฐ๊ณผ๋ฅผ ํ™•์ธํ•˜์„ธ์š”.*", docling_output, status_log, api_request
else:
yield "๋ฐฐ์น˜ ์ฒ˜๋ฆฌ ๊ฒฐ๊ณผ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.", "์ „์ฒ˜๋ฆฌ๋ฅผ ๋จผ์ € ์ˆ˜ํ–‰ํ•ด์ฃผ์„ธ์š”.", "", "์ „์ฒ˜๋ฆฌ ์™„๋ฃŒ", ""
except Exception as e:
error_msg = f"์ „์ฒ˜๋ฆฌ ์˜ค๋ฅ˜: {str(e)}"
print(f"์ „์ฒ˜๋ฆฌ ๋ž˜ํผ ํ•จ์ˆ˜ ์˜ค๋ฅ˜: {e}")
yield "โŒ **์ „์ฒ˜๋ฆฌ ์˜ค๋ฅ˜ ๋ฐœ์ƒ**", "์ „์ฒ˜๋ฆฌ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค.", "", f"์ „์ฒ˜๋ฆฌ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}", ""
def process_postprocessing_wrapper(batch_result, system_prompt, postprocess_prompt):
"""ํ›„์ฒ˜๋ฆฌ๋งŒ ์ˆ˜ํ–‰ํ•˜๋Š” ๋ž˜ํผ ํ•จ์ˆ˜"""
try:
# ๋ฐฐ์น˜ ๊ฒฐ๊ณผ๊ฐ€ ๋น„์–ด์žˆ๊ฑฐ๋‚˜ ์ดˆ๊ธฐ ๋ฉ”์‹œ์ง€์ธ ๊ฒฝ์šฐ ํ™•์ธ
if not batch_result or batch_result.strip() == "*๋ฐฐ์น˜ ์ฒ˜๋ฆฌ ๊ฒฐ๊ณผ๊ฐ€ ์—ฌ๊ธฐ์— ํ‘œ์‹œ๋ฉ๋‹ˆ๋‹ค...*":
yield batch_result, "โŒ **๋ฐฐ์น˜ ์ฒ˜๋ฆฌ ๊ฒฐ๊ณผ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค**\n\n์ „์ฒ˜๋ฆฌ๋ฅผ ๋จผ์ € ์ˆ˜ํ–‰ํ•ด์ฃผ์„ธ์š”.", "", "ํ›„์ฒ˜๋ฆฌ ์‹คํ–‰ ๋ถˆ๊ฐ€: ๋ฐฐ์น˜ ๊ฒฐ๊ณผ ์—†์Œ", ""
return
# ํ›„์ฒ˜๋ฆฌ ํ•จ์ˆ˜์—์„œ Generator ๊ฒฐ๊ณผ ์ถ”์ถœ
generator = process_request_postprocessing_only(batch_result, system_prompt, postprocess_prompt)
final_result = None
# Generator์˜ ๋ชจ๋“  ์ค‘๊ฐ„ ๊ฒฐ๊ณผ๋ฅผ ์ฒ˜๋ฆฌํ•˜๋ฉฐ ๋งˆ์ง€๋ง‰ ๊ฒฐ๊ณผ๋ฅผ ์–ป์Œ
for result in generator:
if result and len(result) >= 5:
batch_content, result_content, docling_output, status_log, api_request = result
# ์‹ค์‹œ๊ฐ„์œผ๋กœ ๊ฒฐ๊ณผ ์—…๋ฐ์ดํŠธ (๋ฐฐ์น˜ ๊ฒฐ๊ณผ๋Š” ์œ ์ง€)
yield batch_result, result_content, docling_output, status_log, api_request
final_result = result
if final_result and len(final_result) >= 5:
batch_content, result_content, docling_output, status_log, api_request = final_result
# ํ›„์ฒ˜๋ฆฌ ์™„๋ฃŒ ์‹œ ๋ฐฐ์น˜ ๊ฒฐ๊ณผ๋Š” ์œ ์ง€ํ•˜๊ณ  ์ตœ์ข… ๊ฒฐ๊ณผ๋งŒ ์—…๋ฐ์ดํŠธ
yield batch_result, result_content, docling_output, status_log, api_request
else:
yield batch_result, "ํ›„์ฒ˜๋ฆฌ ๊ฒฐ๊ณผ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.", "", "ํ›„์ฒ˜๋ฆฌ ์™„๋ฃŒ", ""
except Exception as e:
error_msg = f"ํ›„์ฒ˜๋ฆฌ ์˜ค๋ฅ˜: {str(e)}"
print(f"ํ›„์ฒ˜๋ฆฌ ๋ž˜ํผ ํ•จ์ˆ˜ ์˜ค๋ฅ˜: {e}")
yield batch_result, f"โŒ **ํ›„์ฒ˜๋ฆฌ ์˜ค๋ฅ˜ ๋ฐœ์ƒ**\n\n{error_msg}", "", f"ํ›„์ฒ˜๋ฆฌ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}", ""
def on_save_button_click(result_content, filename):
"""๊ฒฐ๊ณผ ์ €์žฅ ์ฒ˜๋ฆฌ"""
result = save_result_to_file(result_content, filename)
return gr.update(value=result, visible=True)
def extract_with_text_method(pdf_path, uploaded_file, method):
"""ํ…์ŠคํŠธ ๊ธฐ๋ฐ˜ ์ถ”์ถœ"""
final_pdf_path = uploaded_file or pdf_path
if not final_pdf_path:
return "PDF ํŒŒ์ผ์„ ์„ ํƒํ•ด์ฃผ์„ธ์š”."
return extract_text_with_fitz(final_pdf_path)
def extract_with_ocr(pdf_path, uploaded_file):
"""OCR ๊ธฐ๋ฐ˜ ์ถ”์ถœ"""
final_pdf_path = uploaded_file or pdf_path
if not final_pdf_path:
return "PDF ํŒŒ์ผ์„ ์„ ํƒํ•ด์ฃผ์„ธ์š”."
return extract_text_with_docling(final_pdf_path)
def extract_both_methods(pdf_path, uploaded_file, text_method):
"""์–‘์ชฝ ๋ชจ๋‘ ์ถ”์ถœ"""
final_pdf_path = uploaded_file or pdf_path
if not final_pdf_path:
return "PDF ํŒŒ์ผ์„ ์„ ํƒํ•ด์ฃผ์„ธ์š”.", "PDF ํŒŒ์ผ์„ ์„ ํƒํ•ด์ฃผ์„ธ์š”."
text_result = extract_with_text_method(pdf_path, uploaded_file, text_method)
ocr_result = extract_with_ocr(pdf_path, uploaded_file)
return text_result, ocr_result
def preview_images(pdf_path, uploaded_file, processing_mode, use_images, overlap_option):
"""์ด๋ฏธ์ง€ ๋ฏธ๋ฆฌ๋ณด๊ธฐ"""
if not use_images:
return []
final_pdf_path = uploaded_file or pdf_path
if not final_pdf_path:
return []
try:
return preview_image_processing(final_pdf_path, processing_mode, overlap_option)
except Exception as e:
print(f"๋ฏธ๋ฆฌ๋ณด๊ธฐ ์˜ค๋ฅ˜: {e}")
return []
def update_overlap_visibility(processing_mode):
"""๋ณ‘ํ•ฉ ์˜ต์…˜ ํ‘œ์‹œ/์ˆจ๊น€ ์ œ์–ด"""
return gr.update(visible="๋ณ‘ํ•ฉ" in processing_mode)
# === ์ด๋ฒคํŠธ ์—ฐ๊ฒฐ ===
# ์ „์ฒ˜๋ฆฌ ๋ถ„์„ ๋ฒ„ํŠผ
preprocessing_button.click(
fn=process_preprocessing_wrapper,
inputs=[
prompt_input, system_prompt_input, use_images, use_docling,
pdf_dropdown, file_upload, output_filename,
image_processing_mode, overlap_merge_option, batch_size_slider
],
outputs=[batch_result_output, result_output, docling_output, status_log_output, api_request_output],
show_progress=True
)
# ํ›„์ฒ˜๋ฆฌ ๋ถ„์„ ๋ฒ„ํŠผ
postprocessing_button.click(
fn=process_postprocessing_wrapper,
inputs=[batch_result_output, system_prompt_input, postprocess_prompt_input],
outputs=[batch_result_output, result_output, docling_output, status_log_output, api_request_output],
show_progress=True
)
# ์ €์žฅ ๋ฒ„ํŠผ
save_button.click(
fn=on_save_button_click,
inputs=[result_output, output_filename],
outputs=[save_message]
)
# ์ด๋ฏธ์ง€ ๋ฏธ๋ฆฌ๋ณด๊ธฐ (๋ฏธ๋ฆฌ๋ณด๊ธฐ ํƒญ)
preview_button_tab.click(
fn=preview_images,
inputs=[pdf_dropdown, file_upload, image_processing_mode, use_images, overlap_merge_option],
outputs=[image_preview_gallery_tab]
)
# ์ด๋ฏธ์ง€ ์ฒ˜๋ฆฌ ๋ชจ๋“œ ๋ณ€๊ฒฝ ์‹œ ์ค‘๋ณต ์˜ต์…˜ ํ‘œ์‹œ/์ˆจ๊น€
image_processing_mode.change(
fn=update_overlap_visibility,
inputs=[image_processing_mode],
outputs=[overlap_merge_option]
)
# ํ…์ŠคํŠธ ์ถ”์ถœ ์ด๋ฒคํŠธ๋“ค
text_extract_btn.click(
fn=extract_with_text_method,
inputs=[pdf_dropdown, file_upload, text_extract_method],
outputs=[text_extract_result]
)
ocr_extract_btn.click(
fn=extract_with_ocr,
inputs=[pdf_dropdown, file_upload],
outputs=[ocr_extract_result]
)
compare_both_btn.click(
fn=extract_both_methods,
inputs=[pdf_dropdown, file_upload, text_extract_method],
outputs=[text_extract_result, ocr_extract_result]
)
# === ํŽธ์ง‘ ๊ด€๋ จ ์ด๋ฒคํŠธ ์—ฐ๊ฒฐ ===
# ๋ฐฐ์น˜ ๊ฒฐ๊ณผ ํŽธ์ง‘ ์ด๋ฒคํŠธ
batch_edit_button.click(
fn=start_batch_edit,
inputs=[batch_result_output],
outputs=[batch_edit_button, batch_save_button, batch_cancel_button, batch_edit_area, batch_result_output]
)
batch_save_button.click(
fn=save_batch_edit,
inputs=[batch_edit_area],
outputs=[batch_edit_button, batch_save_button, batch_cancel_button, batch_edit_area, batch_result_output]
)
batch_cancel_button.click(
fn=cancel_batch_edit,
outputs=[batch_edit_button, batch_save_button, batch_cancel_button, batch_edit_area, batch_result_output]
)
# ์ตœ์ข… ๊ฒฐ๊ณผ ํŽธ์ง‘ ์ด๋ฒคํŠธ
result_edit_button.click(
fn=start_result_edit,
inputs=[result_output],
outputs=[result_edit_button, result_save_button, result_cancel_button, result_edit_area, result_output]
)
result_save_button.click(
fn=save_result_edit,
inputs=[result_edit_area],
outputs=[result_edit_button, result_save_button, result_cancel_button, result_edit_area, result_output]
)
result_cancel_button.click(
fn=cancel_result_edit,
outputs=[result_edit_button, result_save_button, result_cancel_button, result_edit_area, result_output]
)
return app