File size: 10,706 Bytes
02dad28
 
 
 
 
3dd785b
18626e5
 
3dd785b
 
18626e5
 
02dad28
 
18626e5
02dad28
3dd785b
 
 
 
 
 
 
02dad28
18626e5
02dad28
18626e5
 
 
 
 
3dd785b
 
 
 
 
 
 
 
18626e5
3dd785b
02dad28
c070304
 
 
 
02dad28
c070304
 
 
 
 
18626e5
 
c070304
 
cfb190d
 
c070304
 
 
 
 
 
3be8992
c070304
18626e5
c070304
 
18626e5
c070304
 
 
18626e5
c070304
18626e5
 
 
 
 
c070304
 
 
 
c6dfe38
 
c070304
 
 
 
 
 
3be8992
c070304
18626e5
c070304
 
18626e5
c070304
 
 
 
 
18626e5
 
 
 
 
c070304
02dad28
18626e5
 
c070304
 
18626e5
 
 
 
 
 
 
 
 
 
 
 
 
cfb190d
3dd785b
cfb190d
 
 
 
 
 
 
 
 
18626e5
cfb190d
 
18626e5
cfb190d
 
 
 
 
 
3dd785b
cfb190d
 
 
 
 
 
 
 
 
 
 
 
 
 
ca92e41
cfb190d
18626e5
 
 
ca92e41
18626e5
cfb190d
63b8543
18626e5
4027f51
02dad28
 
 
 
 
 
 
18626e5
02dad28
 
 
 
3dd785b
02dad28
 
18626e5
02dad28
 
 
 
3dd785b
02dad28
18626e5
 
 
 
 
 
63b8543
 
18626e5
63b8543
18626e5
63b8543
 
 
 
 
18626e5
 
 
 
 
 
 
02dad28
 
18626e5
 
63b8543
 
02dad28
 
18626e5
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
import openai
import gradio as gr
import json
import time
import logging
import requests
import sys
from azure.core.exceptions import HttpResponseError
from ocr_functions import detect_document, detect_image
from ai_functions import chat_gpt_document, chat_gpt_image
from helpers import save_json, read_logs, clear_logs, Logger
from css import css

logging.basicConfig(filename='app.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
sys.stdout = Logger("output.log")

def retry_unprocessed_documents():
    global global_document_type
    global global_context
    global unprocessed_documents
    if unprocessed_documents:
        output = batch_document(unprocessed_documents, global_document_type, global_context, "None")
        return output
    else:
        return save_json("No Unprocessed Documents", "No Unprocessed Documents"), "All Documents Processed"
    
def clear_unprocessed_documents():
    global unprocessed_documents
    unprocessed_documents = []
    return "All Documents Processed"

def combine_json_files(json_files, progress=gr.Progress()):
    combined_data = []
    progress(0, desc="Starting")
    for file in progress.tqdm(json_files, desc="Combining JSON Files"):
        with open(file.name, 'r') as json_file:
            data = json.load(json_file)
            combined_data.extend(data)
    logging.info("Combined JSON File: ", combined_data)
    print("Combined JSON File: ", combined_data)
    return save_json(combined_data, "Combined Json")

unprocessed_documents = []
global_document_type = None
global_context = None
def batch_document(content, document_type, context, progress = gr.Progress()):
    combined_data = []
    global global_document_type
    global global_context
    global_document_type = document_type
    global_context = context

    unprocessed_docs_temp = []

    if progress == "None":
        for x in content:
            retries = 3
            timeout = 3
            i = 0
            while True: 
                try: 
                    data = json.loads(chat_gpt_document(detect_document(x),document_type,context))
                    combined_data.append(data)
                    break
                except (openai.error.APIConnectionError, openai.error.AuthenticationError, openai.error.RateLimitError, HttpResponseError, requests.exceptions.RequestException) as e:
                    logging.error(f'Retry {i+1} failed: {e}')
                    print(f'Retry {i+1} failed: {e}')
                    if i < retries - 1:
                        logging.error(f'Retrying in {timeout} seconds...')
                        print(f'Retrying in {timeout} seconds...')
                        time.sleep(timeout)
                        i += 1
                    else:
                        unprocessed_docs_temp.append(x)
                        break
                except Exception as e: # catch any other exceptions
                    logging.error(f'Unexpected error {e}')
                    print(f'Unexpected error {e}')
                    unprocessed_docs_temp.append(x)
                    break

    else: 
        progress(0, desc="Starting")
        for x in progress.tqdm(content, desc="Processing"):
            retries = 3
            timeout = 3
            i = 0
            while True: 
                try: 
                    data = json.loads(chat_gpt_document(detect_document(x),document_type,context))
                    combined_data.append(data)
                    break
                except (openai.error.APIConnectionError, openai.error.AuthenticationError, openai.error.RateLimitError, HttpResponseError, requests.exceptions.RequestException) as e:
                    logging.error(f'Retry {i+1} failed: {e}')
                    print(f'Retry {i+1} failed: {e}')
                    if i < retries - 1:
                        logging.error(f'Retrying in {timeout} seconds...')
                        print(f'Retrying in {timeout} seconds...')
                        time.sleep(timeout)
                        i += 1
                    else:
                        unprocessed_documents.append(x)
                        break
                except Exception as e: # catch any other exceptions
                    logging.error(f'Unexpected error {e}')
                    print(f'Unexpected error {e}')
                    unprocessed_documents.append(x)
                    break
    
    logging.info(combined_data)
    print(combined_data)

    if document_type == "":
        document_type = "error"

    if unprocessed_documents: 
        unprocessed = "\n".join([doc.name.split('\\')[-1].split('/')[-1].split('.')[0] for doc in unprocessed_documents])
        logging.info(unprocessed)
        print(unprocessed)
    elif unprocessed_docs_temp:
        unprocessed_documents.extend(unprocessed_docs_temp)
        unprocessed = "\n".join([doc.name.split('\\')[-1].split('/')[-1].split('.')[0] for doc in unprocessed_documents])
        logging.info(unprocessed)
        print(unprocessed)
    else:
        unprocessed = "All Documents Processed"
    return save_json(combined_data, document_type), unprocessed

def image(content, context):
    retries = 3
    timeout = 3
    i = 0
    while True: 
        try: 
            data = chat_gpt_image(detect_image(content), context)
            break
        except (openai.error.APIConnectionError, openai.error.AuthenticationError, openai.error.RateLimitError, HttpResponseError, requests.exceptions.RequestException) as e:
            logging.error(f'Retry {i+1} failed: {e}')
            print(f'Retry {i+1} failed: {e}')
            if i < retries - 1:
                logging.error(f'Retrying in {timeout} seconds...')
                print(f'Retrying in {timeout} seconds...')
                time.sleep(timeout)
                i += 1
            else:
                break
    return data

def document(content, document_type, context):
    retries = 3
    timeout = 3
    i = 0
    while True: 
        try: 
            data = chat_gpt_document(detect_document(content),document_type,context)
            break
        except (openai.error.APIConnectionError, openai.error.AuthenticationError, openai.error.RateLimitError, HttpResponseError, requests.exceptions.RequestException) as e:
            logging.error(f'Retry {i+1} failed: {e}')
            if i < retries - 1:
                logging.error(f'Retrying in {timeout} seconds...')
                time.sleep(timeout)
                i += 1
            else:
                data = f"Error: {e}, Please check document configuration or document type"
                break
        except Exception as e: # catch any other exceptions
            logging.error(f'Unexpected error {e}')
            print(f'Unexpected error {e}')
            data = f"Error: {e}, Please check document configuration or document type"
            break
    return data

with gr.Blocks(title="Axon OCR", css=css) as app:
    gr.Markdown("""# Axon OCR
    Attach Images or Files below and convert them to Text.""", elem_classes="markdown")
    with gr.Tab("Scan Image"):
        with gr.Row():
            with gr.Column():
                image_input = [gr.Image(type="pil"),
                    gr.Textbox(label="What kind of Image is this? (Optional)", placeholder="This is an image of an Official Reciept")]
            image_output = gr.Textbox(label="Result")
        image_button = gr.Button("Scan", variant="primary")
    with gr.Tab("Scan Document"):
        with gr.Row():
            with gr.Column():
                document_input = [gr.File(file_types=["pdf","tiff","image","text"]),
                    gr.Dropdown(["RPFAA Building P1", "RPFAA Building P2", "TDRP"], label="File Type", info="What type of document is this?"),
                    gr.Textbox(label="Any additional information? (Optional)", placeholder="This is document is an Official Reciept")]
            document_output = gr.Textbox(label="Result")
        document_button = gr.Button("Scan", variant="primary")
    with gr.Tab("Batch Scan"):
        with gr.Row():
            with gr.Column():
                batch_document_input = [gr.File(file_types=["pdf","tiff","image","text"], file_count="multiple"),
                    gr.Dropdown(["RPFAA Building P1", "RPFAA Building P2", "TDRP"], label="File Type", info="What type of document is this?"),
                    gr.Textbox(label="Any additional information? (Optional)", placeholder="This is document is an Official Reciept")]
            with gr.Column():
                batch_document_output = gr.File(label="Result") 
                with gr.Accordion("Unprocessed Documents", open=False):
                    batch_unprocessed = gr.Textbox(info="Download the file before retrying Unprocessed Documents and clear unprocessed documents after every scan to avoid overlaps", show_label=False, elem_classes="unprocessed_textbox")
                    clear_unprocessed_button = gr.Button("Clear Unprocessed Documents")
        batch_document_button = gr.Button("Scan", variant="primary")
        with gr.Row():
            with gr.Column():
                retry_button = gr.Button("Retry Unprocessed Documents")
            with gr.Column():
                stop_button = gr.Button("Stop Processing Document", variant="stop")
    with gr.Tab("Combine JSON"):
        with gr.Row():
            with gr.Column():
                json_files_input = gr.File(file_types=[".json"], file_count="multiple", label='Upload JSON files')
                combined_json_output = gr.File(label="Result")
                combine_button = gr.Button('Combine JSON files', variant="primary")
    with gr.Accordion("Logs", open=False):
        logs = gr.Textbox(max_lines=10, show_label=False, elem_classes="log_textbox")
        app.load(read_logs, None, logs, every=1)
        clear_button = gr.Button("Clear Logs")
        clear_button.click(clear_logs)
    clear_unprocessed_button.click(clear_unprocessed_documents, outputs=batch_unprocessed)
    image_button.click(image, inputs=image_input, outputs=image_output)
    document_button.click(document, inputs=document_input, outputs=document_output)
    batch_document_event = batch_document_button.click(batch_document, inputs=batch_document_input, outputs=[batch_document_output,batch_unprocessed])
    retry_button.click(retry_unprocessed_documents, outputs=[batch_document_output,batch_unprocessed])
    stop_button.click(fn=None, inputs=None, outputs=None, cancels=[batch_document_event])
    combine_button.click(combine_json_files, inputs=json_files_input, outputs=combined_json_output)

app.queue()
app.launch(auth=("username", "password"), favicon_path="assets/logo.png")