File size: 6,766 Bytes
02dad28
 
 
 
 
3be8992
02dad28
3dd785b
c070304
3dd785b
 
 
02dad28
 
 
3dd785b
 
 
 
 
 
 
 
02dad28
3dd785b
 
02dad28
3dd785b
 
 
 
 
 
 
 
 
02dad28
c070304
 
 
 
 
02dad28
c070304
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3be8992
c070304
 
 
 
 
 
 
 
 
 
 
 
3dd785b
 
c070304
 
 
 
 
 
3be8992
c070304
 
 
 
 
 
 
 
 
02dad28
c070304
 
 
02dad28
3dd785b
 
 
 
63b8543
37806f5
4027f51
02dad28
 
 
 
 
 
 
 
 
 
 
 
3dd785b
02dad28
 
 
 
 
 
 
3dd785b
02dad28
 
 
63b8543
 
 
 
3dd785b
63b8543
 
 
 
 
 
 
02dad28
 
63b8543
c070304
63b8543
 
02dad28
 
c826037
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
import openai
import gradio as gr
import json
import time
from tqdm import tqdm
from azure.core.exceptions import HttpResponseError
import logging
import requests
import google 
from ocr_functions import detect_document, detect_image
from ai_functions import chat_gpt_document, chat_gpt_image
from helpers import save_json

logging.basicConfig(filename='app.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

def retry_unprocessed_documents():
    global global_document_type
    global global_context
    global unprocessed_documents
    if unprocessed_documents:
        output = batch_document(unprocessed_documents, global_document_type, global_context, "None")
        unprocessed_documents = []
        return output
    else:
        unprocessed_documents = []
        return save_json("No Unprocessed Documents", "No Unprocessed Documents")
    
def combine_json_files(json_files, progress=gr.Progress()):
    combined_data = []
    progress(0, desc="Starting")
    for file in progress.tqdm(json_files, desc="Combining JSON Files"):
        with open(file.name, 'r') as json_file:
            data = json.load(json_file)
            combined_data.extend(data)
    logging.info("Combined JSON File: ", combined_data)
    return save_json(combined_data, "Combined Json")

unprocessed_documents = []
global_document_type = None
global_context = None
def batch_document(content, document_type, context, progress = gr.Progress()):
    logging.info(content)
    combined_data = []
    global global_document_type
    global global_context
    global_document_type = document_type
    global_context = context

    if progress == "None":
        for x in content:
            retries = 1
            timeout = 1
            i = 0
            while True: 
                try: 
                    data = json.loads(chat_gpt_document(detect_document(x),document_type,context))
                    combined_data.append(data)
                    break
                except (openai.error.APIConnectionError, openai.error.AuthenticationError, openai.error.RateLimitError, HttpResponseError, requests.exceptions.RequestException) as e:
                    logging.error(f'Retry {i+1} failed: {e}')
                    if i < retries - 1:
                        logging.error(f'Retrying in {timeout} seconds...')
                        time.sleep(timeout)
                        i += 1
                    else:
                        unprocessed_documents.append(x)
                        break

    else: 
        progress(0, desc="Starting")
        for x in progress.tqdm(content, desc="Processing"):
            retries = 1
            timeout = 1
            i = 0
            while True: 
                try: 
                    data = json.loads(chat_gpt_document(detect_document(x),document_type,context))
                    combined_data.append(data)
                    break
                except (openai.error.APIConnectionError, openai.error.AuthenticationError, openai.error.RateLimitError, HttpResponseError, requests.exceptions.RequestException) as e:
                    logging.error(f'Retry {i+1} failed: {e}')
                    if i < retries - 1:
                        logging.error(f'Retrying in {timeout} seconds...')
                        time.sleep(timeout)
                        i += 1
                    else:
                        unprocessed_documents.append(x)
                        break
    
    logging.info(combined_data)
    logging.info(unprocessed_documents)
    if document_type == "":
        document_type = "error"
    return save_json(combined_data, document_type)
def image(content, context):
    return chat_gpt_image(detect_image(content), context)
def document(content, document_type, context):
    return chat_gpt_document(detect_document(content),document_type,context)

with gr.Blocks(title="Axon OCR", css=".markdown {text-align: center;}") as app:
    gr.Markdown("""# Axon OCR
    Attach Images or Files below and convert them to Text.""", elem_classes="markdown")
    with gr.Tab("Scan Image"):
        with gr.Row():
            with gr.Column():
                image_input = [gr.Image(type="pil"),
                    gr.Textbox(label="What kind of Image is this? (Optional)", placeholder="This is an image of an Official Reciept")]
            image_output = gr.Textbox(label="Result")
        image_button = gr.Button("Scan")
    with gr.Tab("Scan Document"):
        with gr.Row():
            with gr.Column():
                document_input = [gr.File(file_types=["pdf","tiff","image","text"]),
                    gr.Dropdown(["RPFAA Building P1", "RPFAA Building P2", "TDRP"], label="File Type", info="What type of document is this?"),
                    gr.Textbox(label="Any additional information? (Optional)", placeholder="This is document is an Official Reciept")]
            document_output = gr.Textbox(label="Result")
        document_button = gr.Button("Scan")
    with gr.Tab("Batch Scan"):
        with gr.Row():
            with gr.Column():
                batch_document_input = [gr.File(file_types=["pdf","tiff","image","text"], file_count="multiple"),
                    gr.Dropdown(["RPFAA Building P1", "RPFAA Building P2", "TDRP"], label="File Type", info="What type of document is this?"),
                    gr.Textbox(label="Any additional information? (Optional)", placeholder="This is document is an Official Reciept")]
            batch_document_output = gr.File(label="Result")
        batch_document_button = gr.Button("Scan")
        with gr.Row():
            with gr.Column():
                retry_button = gr.Button("Retry Unprocessed Documents", label="Retry")
            with gr.Column():
                stop_button = gr.Button("Stop Processing Document", label="Stop")
    with gr.Tab("Combine JSON"):
        with gr.Row():
            with gr.Column():
                json_files_input = gr.File(file_types=[".json"], file_count="multiple", label='Upload JSON files')
                combined_json_output = gr.File(label="Result")
                combine_button = gr.Button('Combine JSON files')

    image_button.click(image, inputs=image_input, outputs=image_output)
    document_button.click(document, inputs=document_input, outputs=document_output)
    batch_document_event = batch_document_button.click(batch_document, inputs=batch_document_input, outputs=batch_document_output)
    retry_button.click(retry_unprocessed_documents, outputs=batch_document_output)
    stop_button.click(fn=None, inputs=None, outputs=None, cancels=[batch_document_event])
    combine_button.click(combine_json_files, inputs=json_files_input, outputs=combined_json_output)

app.queue()
app.launch(auth=("username", "password"))