Sage
commited on
Commit
•
c070304
1
Parent(s):
763acf9
added error fixes
Browse files
app.py
CHANGED
@@ -12,6 +12,7 @@ import time
|
|
12 |
from settings import char_remove, gpt_model, RPFAAP2, RPFAAP1, project_id, project_location, processor_id
|
13 |
from tqdm import tqdm
|
14 |
import logging
|
|
|
15 |
|
16 |
logging.basicConfig(filename='app.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
17 |
|
@@ -61,8 +62,8 @@ def chat_gpt_document(content, document_type, context):
|
|
61 |
document = "RPFAAP2.json"
|
62 |
desired_format = RPFAAP2
|
63 |
else:
|
64 |
-
|
65 |
-
|
66 |
|
67 |
if context == "":
|
68 |
sequence_1 = (document_prefix, content_info, desired_format)
|
@@ -127,6 +128,7 @@ def detect_image(content, lang):
|
|
127 |
response.error.message))
|
128 |
|
129 |
os.remove(temp_file_path)
|
|
|
130 |
return(response.full_text_annotation.text)
|
131 |
|
132 |
def detect_document(content):
|
@@ -182,35 +184,78 @@ def image(content, lang, context):
|
|
182 |
def document(content, document_type, context):
|
183 |
return chat_gpt_document(detect_document(content),document_type,context)
|
184 |
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
j = 0
|
191 |
combined_data = []
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
logging.error(f'
|
209 |
-
|
210 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
211 |
logging.info(combined_data)
|
|
|
|
|
|
|
212 |
return save_json(combined_data, document_type)
|
213 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
214 |
def save_json(text, filename):
|
215 |
filename = filename+".json"
|
216 |
with open(filename, "w", encoding='utf-8') as outfile:
|
@@ -244,11 +289,13 @@ with gr.Blocks(title="Axon OCR", css=".markdown {text-align: center;}") as app:
|
|
244 |
gr.Textbox(label="Any additional information? (Optional)", placeholder="This is document is an Official Reciept")]
|
245 |
batch_document_output = gr.File(label="Result")
|
246 |
batch_document_button = gr.Button("Scan")
|
|
|
247 |
|
248 |
|
249 |
image_button.click(image, inputs=image_input, outputs=image_output)
|
250 |
document_button.click(document, inputs=document_input, outputs=document_output)
|
251 |
batch_document_button.click(batch_document, inputs=batch_document_input, outputs=batch_document_output)
|
|
|
252 |
|
253 |
app.queue()
|
254 |
app.launch(auth=("username", "password"))
|
|
|
12 |
from settings import char_remove, gpt_model, RPFAAP2, RPFAAP1, project_id, project_location, processor_id
|
13 |
from tqdm import tqdm
|
14 |
import logging
|
15 |
+
import google
|
16 |
|
17 |
logging.basicConfig(filename='app.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
18 |
|
|
|
62 |
document = "RPFAAP2.json"
|
63 |
desired_format = RPFAAP2
|
64 |
else:
|
65 |
+
property_info = ["Please Select a Document Type"]
|
66 |
+
return json.dumps(property_info, indent=4)
|
67 |
|
68 |
if context == "":
|
69 |
sequence_1 = (document_prefix, content_info, desired_format)
|
|
|
128 |
response.error.message))
|
129 |
|
130 |
os.remove(temp_file_path)
|
131 |
+
logging.info(response)
|
132 |
return(response.full_text_annotation.text)
|
133 |
|
134 |
def detect_document(content):
|
|
|
184 |
def document(content, document_type, context):
|
185 |
return chat_gpt_document(detect_document(content),document_type,context)
|
186 |
|
187 |
+
unprocessed_documents = []
|
188 |
+
global_document_type = None
|
189 |
+
global_context = None
|
190 |
+
def batch_document(content, document_type, context, progress = gr.Progress()):
|
191 |
+
logging.info(content)
|
|
|
192 |
combined_data = []
|
193 |
+
global global_document_type
|
194 |
+
global global_context
|
195 |
+
global_document_type = document_type
|
196 |
+
global_context = context
|
197 |
+
|
198 |
+
if progress == "None":
|
199 |
+
for x in content:
|
200 |
+
retries = 1
|
201 |
+
timeout = 1
|
202 |
+
i = 0
|
203 |
+
while True:
|
204 |
+
try:
|
205 |
+
data = json.loads(chat_gpt_document(detect_document(x),document_type,context))
|
206 |
+
combined_data.append(data)
|
207 |
+
break
|
208 |
+
except (openai.error.APIConnectionError, openai.error.AuthenticationError, openai.error.RateLimitError, google.api_core.exceptions.RetryError, requests.exceptions.RequestException) as e:
|
209 |
+
logging.error(f'Retry {i+1} failed: {e}')
|
210 |
+
if i < retries - 1:
|
211 |
+
logging.error(f'Retrying in {timeout} seconds...')
|
212 |
+
time.sleep(timeout)
|
213 |
+
i += 1
|
214 |
+
else:
|
215 |
+
unprocessed_documents.append(x)
|
216 |
+
break
|
217 |
+
|
218 |
+
else:
|
219 |
+
progress(0, desc="Starting")
|
220 |
+
for x in progress.tqdm(content, desc="Processing"):
|
221 |
+
retries = 1
|
222 |
+
timeout = 1
|
223 |
+
i = 0
|
224 |
+
while True:
|
225 |
+
try:
|
226 |
+
data = json.loads(chat_gpt_document(detect_document(x),document_type,context))
|
227 |
+
combined_data.append(data)
|
228 |
+
break
|
229 |
+
except (openai.error.APIConnectionError, openai.error.AuthenticationError, openai.error.RateLimitError, google.api_core.exceptions.RetryError, requests.exceptions.RequestException) as e:
|
230 |
+
logging.error(f'Retry {i+1} failed: {e}')
|
231 |
+
if i < retries - 1:
|
232 |
+
logging.error(f'Retrying in {timeout} seconds...')
|
233 |
+
time.sleep(timeout)
|
234 |
+
i += 1
|
235 |
+
else:
|
236 |
+
unprocessed_documents.append(x)
|
237 |
+
break
|
238 |
+
|
239 |
logging.info(combined_data)
|
240 |
+
logging.info(unprocessed_documents)
|
241 |
+
if document_type == "":
|
242 |
+
document_type = "error"
|
243 |
return save_json(combined_data, document_type)
|
244 |
|
245 |
+
def retry_unprocessed_documents():
|
246 |
+
# This function will use the documents stored in unprocessed_documents
|
247 |
+
# and call batch_document on them
|
248 |
+
global global_document_type
|
249 |
+
global global_context
|
250 |
+
global unprocessed_documents
|
251 |
+
if unprocessed_documents:
|
252 |
+
output = batch_document(unprocessed_documents, global_document_type, global_context, "None")
|
253 |
+
unprocessed_documents = []
|
254 |
+
return output
|
255 |
+
else:
|
256 |
+
unprocessed_documents = []
|
257 |
+
return save_json("No Unprocessed Documents", "No Unprocessed Documents")
|
258 |
+
|
259 |
def save_json(text, filename):
|
260 |
filename = filename+".json"
|
261 |
with open(filename, "w", encoding='utf-8') as outfile:
|
|
|
289 |
gr.Textbox(label="Any additional information? (Optional)", placeholder="This is document is an Official Reciept")]
|
290 |
batch_document_output = gr.File(label="Result")
|
291 |
batch_document_button = gr.Button("Scan")
|
292 |
+
retry_button = gr.Button("Retry Unprocessed Documents", label="Retry")
|
293 |
|
294 |
|
295 |
image_button.click(image, inputs=image_input, outputs=image_output)
|
296 |
document_button.click(document, inputs=document_input, outputs=document_output)
|
297 |
batch_document_button.click(batch_document, inputs=batch_document_input, outputs=batch_document_output)
|
298 |
+
retry_button.click(retry_unprocessed_documents, outputs=batch_document_output)
|
299 |
|
300 |
app.queue()
|
301 |
app.launch(auth=("username", "password"))
|