File size: 10,706 Bytes
02dad28 3dd785b 18626e5 3dd785b 18626e5 02dad28 18626e5 02dad28 3dd785b 02dad28 18626e5 02dad28 18626e5 3dd785b 18626e5 3dd785b 02dad28 c070304 02dad28 c070304 18626e5 c070304 cfb190d c070304 3be8992 c070304 18626e5 c070304 18626e5 c070304 18626e5 c070304 18626e5 c070304 c6dfe38 c070304 3be8992 c070304 18626e5 c070304 18626e5 c070304 18626e5 c070304 02dad28 18626e5 c070304 18626e5 cfb190d 3dd785b cfb190d 18626e5 cfb190d 18626e5 cfb190d 3dd785b cfb190d ca92e41 cfb190d 18626e5 ca92e41 18626e5 cfb190d 63b8543 18626e5 4027f51 02dad28 18626e5 02dad28 3dd785b 02dad28 18626e5 02dad28 3dd785b 02dad28 18626e5 63b8543 18626e5 63b8543 18626e5 63b8543 18626e5 02dad28 18626e5 63b8543 02dad28 18626e5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 |
import openai
import gradio as gr
import json
import time
import logging
import requests
import sys
from azure.core.exceptions import HttpResponseError
from ocr_functions import detect_document, detect_image
from ai_functions import chat_gpt_document, chat_gpt_image
from helpers import save_json, read_logs, clear_logs, Logger
from css import css
logging.basicConfig(filename='app.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
sys.stdout = Logger("output.log")
def retry_unprocessed_documents():
global global_document_type
global global_context
global unprocessed_documents
if unprocessed_documents:
output = batch_document(unprocessed_documents, global_document_type, global_context, "None")
return output
else:
return save_json("No Unprocessed Documents", "No Unprocessed Documents"), "All Documents Processed"
def clear_unprocessed_documents():
global unprocessed_documents
unprocessed_documents = []
return "All Documents Processed"
def combine_json_files(json_files, progress=gr.Progress()):
combined_data = []
progress(0, desc="Starting")
for file in progress.tqdm(json_files, desc="Combining JSON Files"):
with open(file.name, 'r') as json_file:
data = json.load(json_file)
combined_data.extend(data)
logging.info("Combined JSON File: ", combined_data)
print("Combined JSON File: ", combined_data)
return save_json(combined_data, "Combined Json")
unprocessed_documents = []
global_document_type = None
global_context = None
def batch_document(content, document_type, context, progress = gr.Progress()):
combined_data = []
global global_document_type
global global_context
global_document_type = document_type
global_context = context
unprocessed_docs_temp = []
if progress == "None":
for x in content:
retries = 3
timeout = 3
i = 0
while True:
try:
data = json.loads(chat_gpt_document(detect_document(x),document_type,context))
combined_data.append(data)
break
except (openai.error.APIConnectionError, openai.error.AuthenticationError, openai.error.RateLimitError, HttpResponseError, requests.exceptions.RequestException) as e:
logging.error(f'Retry {i+1} failed: {e}')
print(f'Retry {i+1} failed: {e}')
if i < retries - 1:
logging.error(f'Retrying in {timeout} seconds...')
print(f'Retrying in {timeout} seconds...')
time.sleep(timeout)
i += 1
else:
unprocessed_docs_temp.append(x)
break
except Exception as e: # catch any other exceptions
logging.error(f'Unexpected error {e}')
print(f'Unexpected error {e}')
unprocessed_docs_temp.append(x)
break
else:
progress(0, desc="Starting")
for x in progress.tqdm(content, desc="Processing"):
retries = 3
timeout = 3
i = 0
while True:
try:
data = json.loads(chat_gpt_document(detect_document(x),document_type,context))
combined_data.append(data)
break
except (openai.error.APIConnectionError, openai.error.AuthenticationError, openai.error.RateLimitError, HttpResponseError, requests.exceptions.RequestException) as e:
logging.error(f'Retry {i+1} failed: {e}')
print(f'Retry {i+1} failed: {e}')
if i < retries - 1:
logging.error(f'Retrying in {timeout} seconds...')
print(f'Retrying in {timeout} seconds...')
time.sleep(timeout)
i += 1
else:
unprocessed_documents.append(x)
break
except Exception as e: # catch any other exceptions
logging.error(f'Unexpected error {e}')
print(f'Unexpected error {e}')
unprocessed_documents.append(x)
break
logging.info(combined_data)
print(combined_data)
if document_type == "":
document_type = "error"
if unprocessed_documents:
unprocessed = "\n".join([doc.name.split('\\')[-1].split('/')[-1].split('.')[0] for doc in unprocessed_documents])
logging.info(unprocessed)
print(unprocessed)
elif unprocessed_docs_temp:
unprocessed_documents.extend(unprocessed_docs_temp)
unprocessed = "\n".join([doc.name.split('\\')[-1].split('/')[-1].split('.')[0] for doc in unprocessed_documents])
logging.info(unprocessed)
print(unprocessed)
else:
unprocessed = "All Documents Processed"
return save_json(combined_data, document_type), unprocessed
def image(content, context):
retries = 3
timeout = 3
i = 0
while True:
try:
data = chat_gpt_image(detect_image(content), context)
break
except (openai.error.APIConnectionError, openai.error.AuthenticationError, openai.error.RateLimitError, HttpResponseError, requests.exceptions.RequestException) as e:
logging.error(f'Retry {i+1} failed: {e}')
print(f'Retry {i+1} failed: {e}')
if i < retries - 1:
logging.error(f'Retrying in {timeout} seconds...')
print(f'Retrying in {timeout} seconds...')
time.sleep(timeout)
i += 1
else:
break
return data
def document(content, document_type, context):
retries = 3
timeout = 3
i = 0
while True:
try:
data = chat_gpt_document(detect_document(content),document_type,context)
break
except (openai.error.APIConnectionError, openai.error.AuthenticationError, openai.error.RateLimitError, HttpResponseError, requests.exceptions.RequestException) as e:
logging.error(f'Retry {i+1} failed: {e}')
if i < retries - 1:
logging.error(f'Retrying in {timeout} seconds...')
time.sleep(timeout)
i += 1
else:
data = f"Error: {e}, Please check document configuration or document type"
break
except Exception as e: # catch any other exceptions
logging.error(f'Unexpected error {e}')
print(f'Unexpected error {e}')
data = f"Error: {e}, Please check document configuration or document type"
break
return data
with gr.Blocks(title="Axon OCR", css=css) as app:
gr.Markdown("""# Axon OCR
Attach Images or Files below and convert them to Text.""", elem_classes="markdown")
with gr.Tab("Scan Image"):
with gr.Row():
with gr.Column():
image_input = [gr.Image(type="pil"),
gr.Textbox(label="What kind of Image is this? (Optional)", placeholder="This is an image of an Official Reciept")]
image_output = gr.Textbox(label="Result")
image_button = gr.Button("Scan", variant="primary")
with gr.Tab("Scan Document"):
with gr.Row():
with gr.Column():
document_input = [gr.File(file_types=["pdf","tiff","image","text"]),
gr.Dropdown(["RPFAA Building P1", "RPFAA Building P2", "TDRP"], label="File Type", info="What type of document is this?"),
gr.Textbox(label="Any additional information? (Optional)", placeholder="This is document is an Official Reciept")]
document_output = gr.Textbox(label="Result")
document_button = gr.Button("Scan", variant="primary")
with gr.Tab("Batch Scan"):
with gr.Row():
with gr.Column():
batch_document_input = [gr.File(file_types=["pdf","tiff","image","text"], file_count="multiple"),
gr.Dropdown(["RPFAA Building P1", "RPFAA Building P2", "TDRP"], label="File Type", info="What type of document is this?"),
gr.Textbox(label="Any additional information? (Optional)", placeholder="This is document is an Official Reciept")]
with gr.Column():
batch_document_output = gr.File(label="Result")
with gr.Accordion("Unprocessed Documents", open=False):
batch_unprocessed = gr.Textbox(info="Download the file before retrying Unprocessed Documents and clear unprocessed documents after every scan to avoid overlaps", show_label=False, elem_classes="unprocessed_textbox")
clear_unprocessed_button = gr.Button("Clear Unprocessed Documents")
batch_document_button = gr.Button("Scan", variant="primary")
with gr.Row():
with gr.Column():
retry_button = gr.Button("Retry Unprocessed Documents")
with gr.Column():
stop_button = gr.Button("Stop Processing Document", variant="stop")
with gr.Tab("Combine JSON"):
with gr.Row():
with gr.Column():
json_files_input = gr.File(file_types=[".json"], file_count="multiple", label='Upload JSON files')
combined_json_output = gr.File(label="Result")
combine_button = gr.Button('Combine JSON files', variant="primary")
with gr.Accordion("Logs", open=False):
logs = gr.Textbox(max_lines=10, show_label=False, elem_classes="log_textbox")
app.load(read_logs, None, logs, every=1)
clear_button = gr.Button("Clear Logs")
clear_button.click(clear_logs)
clear_unprocessed_button.click(clear_unprocessed_documents, outputs=batch_unprocessed)
image_button.click(image, inputs=image_input, outputs=image_output)
document_button.click(document, inputs=document_input, outputs=document_output)
batch_document_event = batch_document_button.click(batch_document, inputs=batch_document_input, outputs=[batch_document_output,batch_unprocessed])
retry_button.click(retry_unprocessed_documents, outputs=[batch_document_output,batch_unprocessed])
stop_button.click(fn=None, inputs=None, outputs=None, cancels=[batch_document_event])
combine_button.click(combine_json_files, inputs=json_files_input, outputs=combined_json_output)
app.queue()
app.launch(auth=("username", "password"), favicon_path="assets/logo.png")
|