File size: 6,766 Bytes
02dad28 3dd785b c070304 3dd785b 02dad28 3dd785b 02dad28 3dd785b 02dad28 3dd785b 02dad28 c070304 02dad28 c070304 3dd785b c070304 02dad28 c070304 02dad28 3dd785b 63b8543 37806f5 4027f51 02dad28 3dd785b 02dad28 3dd785b 02dad28 63b8543 3dd785b 63b8543 02dad28 63b8543 c070304 63b8543 02dad28 3dd785b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 |
import openai
import gradio as gr
import json
import time
from tqdm import tqdm
import logging
import requests
import google
from ocr_functions import detect_document, detect_image
from ai_functions import chat_gpt_document, chat_gpt_image
from helpers import save_json
logging.basicConfig(filename='app.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
def retry_unprocessed_documents():
global global_document_type
global global_context
global unprocessed_documents
if unprocessed_documents:
output = batch_document(unprocessed_documents, global_document_type, global_context, "None")
unprocessed_documents = []
return output
else:
unprocessed_documents = []
return save_json("No Unprocessed Documents", "No Unprocessed Documents")
def combine_json_files(json_files, progress=gr.Progress()):
combined_data = []
progress(0, desc="Starting")
for file in progress.tqdm(json_files, desc="Combining JSON Files"):
with open(file.name, 'r') as json_file:
data = json.load(json_file)
combined_data.extend(data)
logging.info("Combined JSON File: ", combined_data)
return save_json(combined_data, "Combined Json")
unprocessed_documents = []
global_document_type = None
global_context = None
def batch_document(content, document_type, context, progress = gr.Progress()):
logging.info(content)
combined_data = []
global global_document_type
global global_context
global_document_type = document_type
global_context = context
if progress == "None":
for x in content:
retries = 1
timeout = 1
i = 0
while True:
try:
data = json.loads(chat_gpt_document(detect_document(x),document_type,context))
combined_data.append(data)
break
except (openai.error.APIConnectionError, openai.error.AuthenticationError, openai.error.RateLimitError, google.api_core.exceptions.RetryError, requests.exceptions.RequestException) as e:
logging.error(f'Retry {i+1} failed: {e}')
if i < retries - 1:
logging.error(f'Retrying in {timeout} seconds...')
time.sleep(timeout)
i += 1
else:
unprocessed_documents.append(x)
break
else:
progress(0, desc="Starting")
for x in progress.tqdm(content, desc="Processing"):
retries = 1
timeout = 1
i = 0
while True:
try:
data = json.loads(chat_gpt_document(detect_document(x),document_type,context))
combined_data.append(data)
break
except (openai.error.APIConnectionError, openai.error.AuthenticationError, openai.error.RateLimitError, google.api_core.exceptions.RetryError, requests.exceptions.RequestException) as e:
logging.error(f'Retry {i+1} failed: {e}')
if i < retries - 1:
logging.error(f'Retrying in {timeout} seconds...')
time.sleep(timeout)
i += 1
else:
unprocessed_documents.append(x)
break
logging.info(combined_data)
logging.info(unprocessed_documents)
if document_type == "":
document_type = "error"
return save_json(combined_data, document_type)
def image(content, context):
return chat_gpt_image(detect_image(content), context)
def document(content, document_type, context):
return chat_gpt_document(detect_document(content),document_type,context)
with gr.Blocks(title="Axon OCR", css=".markdown {text-align: center;}") as app:
gr.Markdown("""# Axon OCR
Attach Images or Files below and convert them to Text.""", elem_classes="markdown")
with gr.Tab("Scan Image"):
with gr.Row():
with gr.Column():
image_input = [gr.Image(type="pil"),
gr.Textbox(label="What kind of Image is this? (Optional)", placeholder="This is an image of an Official Reciept")]
image_output = gr.Textbox(label="Result")
image_button = gr.Button("Scan")
with gr.Tab("Scan Document"):
with gr.Row():
with gr.Column():
document_input = [gr.File(file_types=["pdf","tiff","image","text"]),
gr.Dropdown(["RPFAA Building P1", "RPFAA Building P2", "TDRP"], label="File Type", info="What type of document is this?"),
gr.Textbox(label="Any additional information? (Optional)", placeholder="This is document is an Official Reciept")]
document_output = gr.Textbox(label="Result")
document_button = gr.Button("Scan")
with gr.Tab("Batch Scan"):
with gr.Row():
with gr.Column():
batch_document_input = [gr.File(file_types=["pdf","tiff","image","text"], file_count="multiple"),
gr.Dropdown(["RPFAA Building P1", "RPFAA Building P2", "TDRP"], label="File Type", info="What type of document is this?"),
gr.Textbox(label="Any additional information? (Optional)", placeholder="This is document is an Official Reciept")]
batch_document_output = gr.File(label="Result")
batch_document_button = gr.Button("Scan")
with gr.Row():
with gr.Column():
retry_button = gr.Button("Retry Unprocessed Documents", label="Retry")
with gr.Column():
stop_button = gr.Button("Stop Processing Document", label="Stop")
with gr.Tab("Combine JSON"):
with gr.Row():
with gr.Column():
json_files_input = gr.File(file_types=[".json"], file_count="multiple", label='Upload JSON files')
combined_json_output = gr.File(label="Result")
combine_button = gr.Button('Combine JSON files')
image_button.click(image, inputs=image_input, outputs=image_output)
document_button.click(document, inputs=document_input, outputs=document_output)
batch_document_event = batch_document_button.click(batch_document, inputs=batch_document_input, outputs=batch_document_output)
retry_button.click(retry_unprocessed_documents, outputs=batch_document_output)
stop_button.click(fn=None, inputs=None, outputs=None, cancels=[batch_document_event])
combine_button.click(combine_json_files, inputs=json_files_input, outputs=combined_json_output)
app.queue()
app.launch(share=True, auth=("username", "password"))
|