|
import openai |
|
import gradio as gr |
|
import json |
|
import time |
|
import logging |
|
import requests |
|
import sys |
|
from azure.core.exceptions import HttpResponseError |
|
from ocr_functions import detect_document, detect_image |
|
from ai_functions import chat_gpt_document, chat_gpt_image |
|
from helpers import save_json, read_logs, clear_logs, Logger |
|
from css import css |
|
|
|
logging.basicConfig(filename='app.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') |
|
sys.stdout = Logger("output.log") |
|
|
|
def retry_unprocessed_documents(): |
|
global global_document_type |
|
global global_context |
|
global unprocessed_documents |
|
if unprocessed_documents: |
|
output = batch_document(unprocessed_documents, global_document_type, global_context, "None") |
|
return output |
|
else: |
|
return save_json("No Unprocessed Documents", "No Unprocessed Documents"), "All Documents Processed" |
|
|
|
def clear_unprocessed_documents(): |
|
global unprocessed_documents |
|
unprocessed_documents = [] |
|
return "All Documents Processed" |
|
|
|
def combine_json_files(json_files, progress=gr.Progress()): |
|
combined_data = [] |
|
progress(0, desc="Starting") |
|
for file in progress.tqdm(json_files, desc="Combining JSON Files"): |
|
with open(file.name, 'r') as json_file: |
|
data = json.load(json_file) |
|
combined_data.extend(data) |
|
logging.info("Combined JSON File: ", combined_data) |
|
print("Combined JSON File: ", combined_data) |
|
return save_json(combined_data, "Combined Json") |
|
|
|
unprocessed_documents = [] |
|
global_document_type = None |
|
global_context = None |
|
def batch_document(content, document_type, context, progress = gr.Progress()): |
|
combined_data = [] |
|
global global_document_type |
|
global global_context |
|
global_document_type = document_type |
|
global_context = context |
|
|
|
unprocessed_docs_temp = [] |
|
|
|
if progress == "None": |
|
for x in content: |
|
retries = 3 |
|
timeout = 3 |
|
i = 0 |
|
while True: |
|
try: |
|
data = json.loads(chat_gpt_document(detect_document(x),document_type,context)) |
|
combined_data.append(data) |
|
break |
|
except (openai.error.APIConnectionError, openai.error.AuthenticationError, openai.error.RateLimitError, HttpResponseError, requests.exceptions.RequestException) as e: |
|
logging.error(f'Retry {i+1} failed: {e}') |
|
print(f'Retry {i+1} failed: {e}') |
|
if i < retries - 1: |
|
logging.error(f'Retrying in {timeout} seconds...') |
|
print(f'Retrying in {timeout} seconds...') |
|
time.sleep(timeout) |
|
i += 1 |
|
else: |
|
unprocessed_docs_temp.append(x) |
|
break |
|
except Exception as e: |
|
logging.error(f'Unexpected error {e}') |
|
print(f'Unexpected error {e}') |
|
unprocessed_docs_temp.append(x) |
|
break |
|
|
|
else: |
|
progress(0, desc="Starting") |
|
for x in progress.tqdm(content, desc="Processing"): |
|
retries = 3 |
|
timeout = 3 |
|
i = 0 |
|
while True: |
|
try: |
|
data = json.loads(chat_gpt_document(detect_document(x),document_type,context)) |
|
combined_data.append(data) |
|
break |
|
except (openai.error.APIConnectionError, openai.error.AuthenticationError, openai.error.RateLimitError, HttpResponseError, requests.exceptions.RequestException) as e: |
|
logging.error(f'Retry {i+1} failed: {e}') |
|
print(f'Retry {i+1} failed: {e}') |
|
if i < retries - 1: |
|
logging.error(f'Retrying in {timeout} seconds...') |
|
print(f'Retrying in {timeout} seconds...') |
|
time.sleep(timeout) |
|
i += 1 |
|
else: |
|
unprocessed_documents.append(x) |
|
break |
|
except Exception as e: |
|
logging.error(f'Unexpected error {e}') |
|
print(f'Unexpected error {e}') |
|
unprocessed_documents.append(x) |
|
break |
|
|
|
logging.info(combined_data) |
|
print(combined_data) |
|
|
|
if document_type == "": |
|
document_type = "error" |
|
|
|
if unprocessed_documents: |
|
unprocessed = "\n".join([doc.name.split('\\')[-1].split('/')[-1].split('.')[0] for doc in unprocessed_documents]) |
|
logging.info(unprocessed) |
|
print(unprocessed) |
|
elif unprocessed_docs_temp: |
|
unprocessed_documents.extend(unprocessed_docs_temp) |
|
unprocessed = "\n".join([doc.name.split('\\')[-1].split('/')[-1].split('.')[0] for doc in unprocessed_documents]) |
|
logging.info(unprocessed) |
|
print(unprocessed) |
|
else: |
|
unprocessed = "All Documents Processed" |
|
return save_json(combined_data, document_type), unprocessed |
|
|
|
def image(content, context): |
|
retries = 3 |
|
timeout = 3 |
|
i = 0 |
|
while True: |
|
try: |
|
data = chat_gpt_image(detect_image(content), context) |
|
break |
|
except (openai.error.APIConnectionError, openai.error.AuthenticationError, openai.error.RateLimitError, HttpResponseError, requests.exceptions.RequestException) as e: |
|
logging.error(f'Retry {i+1} failed: {e}') |
|
print(f'Retry {i+1} failed: {e}') |
|
if i < retries - 1: |
|
logging.error(f'Retrying in {timeout} seconds...') |
|
print(f'Retrying in {timeout} seconds...') |
|
time.sleep(timeout) |
|
i += 1 |
|
else: |
|
break |
|
return data |
|
|
|
def document(content, document_type, context): |
|
retries = 3 |
|
timeout = 3 |
|
i = 0 |
|
while True: |
|
try: |
|
data = chat_gpt_document(detect_document(content),document_type,context) |
|
break |
|
except (openai.error.APIConnectionError, openai.error.AuthenticationError, openai.error.RateLimitError, HttpResponseError, requests.exceptions.RequestException) as e: |
|
logging.error(f'Retry {i+1} failed: {e}') |
|
if i < retries - 1: |
|
logging.error(f'Retrying in {timeout} seconds...') |
|
time.sleep(timeout) |
|
i += 1 |
|
else: |
|
data = f"Error: {e}, Please check document configuration or document type" |
|
break |
|
except Exception as e: |
|
logging.error(f'Unexpected error {e}') |
|
print(f'Unexpected error {e}') |
|
data = f"Error: {e}, Please check document configuration or document type" |
|
break |
|
return data |
|
|
|
with gr.Blocks(title="Axon OCR", css=css) as app: |
|
gr.Markdown("""# Axon OCR |
|
Attach Images or Files below and convert them to Text.""", elem_classes="markdown") |
|
with gr.Tab("Scan Image"): |
|
with gr.Row(): |
|
with gr.Column(): |
|
image_input = [gr.Image(type="pil"), |
|
gr.Textbox(label="What kind of Image is this? (Optional)", placeholder="This is an image of an Official Reciept")] |
|
image_output = gr.Textbox(label="Result") |
|
image_button = gr.Button("Scan", variant="primary") |
|
with gr.Tab("Scan Document"): |
|
with gr.Row(): |
|
with gr.Column(): |
|
document_input = [gr.File(file_types=["pdf","tiff","image","text"]), |
|
gr.Dropdown(["RPFAA Building P1", "RPFAA Building P2", "TDRP"], label="File Type", info="What type of document is this?"), |
|
gr.Textbox(label="Any additional information? (Optional)", placeholder="This is document is an Official Reciept")] |
|
document_output = gr.Textbox(label="Result") |
|
document_button = gr.Button("Scan", variant="primary") |
|
with gr.Tab("Batch Scan"): |
|
with gr.Row(): |
|
with gr.Column(): |
|
batch_document_input = [gr.File(file_types=["pdf","tiff","image","text"], file_count="multiple"), |
|
gr.Dropdown(["RPFAA Building P1", "RPFAA Building P2", "TDRP"], label="File Type", info="What type of document is this?"), |
|
gr.Textbox(label="Any additional information? (Optional)", placeholder="This is document is an Official Reciept")] |
|
with gr.Column(): |
|
batch_document_output = gr.File(label="Result") |
|
with gr.Accordion("Unprocessed Documents", open=False): |
|
batch_unprocessed = gr.Textbox(info="Download the file before retrying Unprocessed Documents and clear unprocessed documents after every scan to avoid overlaps", show_label=False, elem_classes="unprocessed_textbox") |
|
clear_unprocessed_button = gr.Button("Clear Unprocessed Documents") |
|
batch_document_button = gr.Button("Scan", variant="primary") |
|
with gr.Row(): |
|
with gr.Column(): |
|
retry_button = gr.Button("Retry Unprocessed Documents") |
|
with gr.Column(): |
|
stop_button = gr.Button("Stop Processing Document", variant="stop") |
|
with gr.Tab("Combine JSON"): |
|
with gr.Row(): |
|
with gr.Column(): |
|
json_files_input = gr.File(file_types=[".json"], file_count="multiple", label='Upload JSON files') |
|
combined_json_output = gr.File(label="Result") |
|
combine_button = gr.Button('Combine JSON files', variant="primary") |
|
with gr.Accordion("Logs", open=False): |
|
logs = gr.Textbox(max_lines=10, show_label=False, elem_classes="log_textbox") |
|
app.load(read_logs, None, logs, every=1) |
|
clear_button = gr.Button("Clear Logs") |
|
clear_button.click(clear_logs) |
|
clear_unprocessed_button.click(clear_unprocessed_documents, outputs=batch_unprocessed) |
|
image_button.click(image, inputs=image_input, outputs=image_output) |
|
document_button.click(document, inputs=document_input, outputs=document_output) |
|
batch_document_event = batch_document_button.click(batch_document, inputs=batch_document_input, outputs=[batch_document_output,batch_unprocessed]) |
|
retry_button.click(retry_unprocessed_documents, outputs=[batch_document_output,batch_unprocessed]) |
|
stop_button.click(fn=None, inputs=None, outputs=None, cancels=[batch_document_event]) |
|
combine_button.click(combine_json_files, inputs=json_files_input, outputs=combined_json_output) |
|
|
|
app.queue() |
|
app.launch(auth=("username", "password"), favicon_path="assets/logo.png") |
|
|