File size: 3,905 Bytes
7f823bb
364e0ba
 
bea5394
7f823bb
364e0ba
de1c169
364e0ba
93ea2db
383775a
6f96666
383775a
bea5394
383775a
364e0ba
 
de1c169
 
 
 
 
 
 
d37148b
de1c169
6f96666
de1c169
 
 
 
 
 
 
 
 
 
6f96666
383775a
4dd18db
6f96666
bea5394
 
6f96666
de1c169
 
 
 
 
 
 
 
bea5394
a0e200f
364e0ba
 
 
 
 
3cee98d
de1c169
364e0ba
383775a
364e0ba
383775a
 
364e0ba
 
383775a
364e0ba
 
383775a
 
93ea2db
6f96666
383775a
364e0ba
383775a
364e0ba
383775a
f497fbd
383775a
 
 
 
 
 
 
6f96666
93ea2db
6f96666
bea5394
6f96666
383775a
6f96666
 
93ea2db
6f96666
bea5394
93ea2db
383775a
93ea2db
 
383775a
 
de1c169
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
# app.py
import json
import traceback
import tempfile
import os
import gradio as gr
from typing import Tuple, Optional, Any # Any for file_data_binary for now, though bytes is expected

# Import config first to ensure JAVA_HOME is set early
import config

from main_analyzer import analyze_pdf
# Import language_tool_python only for the test in __main__ if needed
# import language_tool_python


def process_upload(file_data_binary: Optional[bytes]) -> Tuple[str, Optional[str]]: # Explicitly Optional[bytes]
    if not isinstance(file_data_binary, bytes):
        if file_data_binary is None:
            error_msg = "No file uploaded or file data is None."
        else:
            error_msg = f"Unexpected file data type: {type(file_data_binary)}. Expected bytes."
        return json.dumps({"error": error_msg}, indent=2), None

    temp_pdf_path = None
    try:
        # Create a temporary file to store the uploaded PDF bytes
        # delete=False is used because analyze_pdf will open it by path.
        # We are responsible for deleting it in the finally block.
        with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
            temp_pdf_path = temp_file.name
            temp_file.write(file_data_binary)
        # The file is closed when exiting the 'with' block, but still exists due to delete=False.

        print(f"App: Processing PDF via temporary file: {temp_pdf_path}")
        results_dict, _ = analyze_pdf(temp_pdf_path) # Pass the path to the temporary file
        results_json = json.dumps(results_dict, indent=2, ensure_ascii=False)
        return results_json, None

    except Exception as e:
        print(f"Error in process_upload: {e}\n{traceback.format_exc()}")
        error_message = json.dumps({"error": str(e), "traceback": traceback.format_exc()}, indent=2)
        return error_message, None
    finally:
        # Clean up the temporary file if it was created
        if temp_pdf_path and os.path.exists(temp_pdf_path):
            try:
                os.remove(temp_pdf_path)
                print(f"App: Cleaned up temporary PDF file: {temp_pdf_path}")
            except Exception as e_clean:
                print(f"App: Error cleaning up temporary PDF file {temp_pdf_path}: {e_clean}")


def create_interface():
    with gr.Blocks(title="PDF Analyzer") as interface:
        with gr.Row():
            file_input = gr.File(
                label="Upload PDF",
                file_types=[".pdf"],
                type="binary"  # This ensures file_data_binary is bytes
            )

        with gr.Row():
            analyze_btn = gr.Button("Analyze PDF")

        with gr.Row():
            results_output = gr.JSON(
                label="Analysis Results",
                show_label=True
            )

            pdf_output = gr.File(
                label="Annotated PDF (Placeholder - View Coordinates in JSON)",
                show_label=True,
                interactive=False
            )

        analyze_btn.click(
            fn=process_upload,
            inputs=[file_input],
            outputs=[results_output, pdf_output]
        )

    return interface


if __name__ == "__main__":
    print("\n--- Launching Gradio Interface ---")
    # config.set_java_home() is called when config.py is imported.

    # Optional: Test LanguageTool initialization
    try:
        import language_tool_python
        lt_test = language_tool_python.LanguageTool('en-US')
        lt_test.close()
        print("App: LanguageTool initialized successfully for test.")
    except Exception as lt_e:
        print(f"App: Warning: Could not initialize LanguageTool for test. Language checks might fail: {lt_e}")
        print("Please ensure Java is installed and JAVA_HOME is correctly set (see config.py).")

    app_interface = create_interface()
    app_interface.launch(
        share=False,
        # server_port=7860
    )