Sage commited on
Commit
c070304
1 Parent(s): 763acf9

added error fixes

Browse files
Files changed (1) hide show
  1. app.py +74 -27
app.py CHANGED
@@ -12,6 +12,7 @@ import time
12
  from settings import char_remove, gpt_model, RPFAAP2, RPFAAP1, project_id, project_location, processor_id
13
  from tqdm import tqdm
14
  import logging
 
15
 
16
  logging.basicConfig(filename='app.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
17
 
@@ -61,8 +62,8 @@ def chat_gpt_document(content, document_type, context):
61
  document = "RPFAAP2.json"
62
  desired_format = RPFAAP2
63
  else:
64
- document = ""
65
- desired_format = ""
66
 
67
  if context == "":
68
  sequence_1 = (document_prefix, content_info, desired_format)
@@ -127,6 +128,7 @@ def detect_image(content, lang):
127
  response.error.message))
128
 
129
  os.remove(temp_file_path)
 
130
  return(response.full_text_annotation.text)
131
 
132
  def detect_document(content):
@@ -182,35 +184,78 @@ def image(content, lang, context):
182
  def document(content, document_type, context):
183
  return chat_gpt_document(detect_document(content),document_type,context)
184
 
185
- def batch_document(content, document_type, context, progress=gr.Progress()):
186
- progress(0, desc="Starting")
187
- retries = 5
188
- timeout = 5
189
- i = 0
190
- j = 0
191
  combined_data = []
192
- for x in progress.tqdm(content, desc="Processing"):
193
- while True:
194
- k = i+j
195
- try:
196
- data = json.loads(chat_gpt_document(detect_document(x),document_type,context))
197
- combined_data.append(data)
198
- break
199
- except openai.error.APIConnectionError:
200
- logging.error(f'Retry {k+1} failed: openai.error.APIConnectionError')
201
- if i < retries - 1:
202
- logging.error(f'Retrying in {timeout} seconds...')
203
- time.sleep(timeout)
204
- i += 1
205
- except openai.error.RateLimitError:
206
- logging.error(f'Retry {k+1} failed: openai.error.RateLimitError')
207
- if j < retries - 1:
208
- logging.error(f'Retrying in {timeout} seconds...')
209
- time.sleep(timeout)
210
- j += 1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
211
  logging.info(combined_data)
 
 
 
212
  return save_json(combined_data, document_type)
213
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
214
  def save_json(text, filename):
215
  filename = filename+".json"
216
  with open(filename, "w", encoding='utf-8') as outfile:
@@ -244,11 +289,13 @@ with gr.Blocks(title="Axon OCR", css=".markdown {text-align: center;}") as app:
244
  gr.Textbox(label="Any additional information? (Optional)", placeholder="This is document is an Official Reciept")]
245
  batch_document_output = gr.File(label="Result")
246
  batch_document_button = gr.Button("Scan")
 
247
 
248
 
249
  image_button.click(image, inputs=image_input, outputs=image_output)
250
  document_button.click(document, inputs=document_input, outputs=document_output)
251
  batch_document_button.click(batch_document, inputs=batch_document_input, outputs=batch_document_output)
 
252
 
253
  app.queue()
254
  app.launch(auth=("username", "password"))
 
12
  from settings import char_remove, gpt_model, RPFAAP2, RPFAAP1, project_id, project_location, processor_id
13
  from tqdm import tqdm
14
  import logging
15
+ import google
16
 
17
  logging.basicConfig(filename='app.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
18
 
 
62
  document = "RPFAAP2.json"
63
  desired_format = RPFAAP2
64
  else:
65
+ property_info = ["Please Select a Document Type"]
66
+ return json.dumps(property_info, indent=4)
67
 
68
  if context == "":
69
  sequence_1 = (document_prefix, content_info, desired_format)
 
128
  response.error.message))
129
 
130
  os.remove(temp_file_path)
131
+ logging.info(response)
132
  return(response.full_text_annotation.text)
133
 
134
  def detect_document(content):
 
184
  def document(content, document_type, context):
185
  return chat_gpt_document(detect_document(content),document_type,context)
186
 
187
+ unprocessed_documents = []
188
+ global_document_type = None
189
+ global_context = None
190
+ def batch_document(content, document_type, context, progress = gr.Progress()):
191
+ logging.info(content)
 
192
  combined_data = []
193
+ global global_document_type
194
+ global global_context
195
+ global_document_type = document_type
196
+ global_context = context
197
+
198
+ if progress == "None":
199
+ for x in content:
200
+ retries = 1
201
+ timeout = 1
202
+ i = 0
203
+ while True:
204
+ try:
205
+ data = json.loads(chat_gpt_document(detect_document(x),document_type,context))
206
+ combined_data.append(data)
207
+ break
208
+ except (openai.error.APIConnectionError, openai.error.AuthenticationError, openai.error.RateLimitError, google.api_core.exceptions.RetryError, requests.exceptions.RequestException) as e:
209
+ logging.error(f'Retry {i+1} failed: {e}')
210
+ if i < retries - 1:
211
+ logging.error(f'Retrying in {timeout} seconds...')
212
+ time.sleep(timeout)
213
+ i += 1
214
+ else:
215
+ unprocessed_documents.append(x)
216
+ break
217
+
218
+ else:
219
+ progress(0, desc="Starting")
220
+ for x in progress.tqdm(content, desc="Processing"):
221
+ retries = 1
222
+ timeout = 1
223
+ i = 0
224
+ while True:
225
+ try:
226
+ data = json.loads(chat_gpt_document(detect_document(x),document_type,context))
227
+ combined_data.append(data)
228
+ break
229
+ except (openai.error.APIConnectionError, openai.error.AuthenticationError, openai.error.RateLimitError, google.api_core.exceptions.RetryError, requests.exceptions.RequestException) as e:
230
+ logging.error(f'Retry {i+1} failed: {e}')
231
+ if i < retries - 1:
232
+ logging.error(f'Retrying in {timeout} seconds...')
233
+ time.sleep(timeout)
234
+ i += 1
235
+ else:
236
+ unprocessed_documents.append(x)
237
+ break
238
+
239
  logging.info(combined_data)
240
+ logging.info(unprocessed_documents)
241
+ if document_type == "":
242
+ document_type = "error"
243
  return save_json(combined_data, document_type)
244
 
245
+ def retry_unprocessed_documents():
246
+ # This function will use the documents stored in unprocessed_documents
247
+ # and call batch_document on them
248
+ global global_document_type
249
+ global global_context
250
+ global unprocessed_documents
251
+ if unprocessed_documents:
252
+ output = batch_document(unprocessed_documents, global_document_type, global_context, "None")
253
+ unprocessed_documents = []
254
+ return output
255
+ else:
256
+ unprocessed_documents = []
257
+ return save_json("No Unprocessed Documents", "No Unprocessed Documents")
258
+
259
  def save_json(text, filename):
260
  filename = filename+".json"
261
  with open(filename, "w", encoding='utf-8') as outfile:
 
289
  gr.Textbox(label="Any additional information? (Optional)", placeholder="This is document is an Official Reciept")]
290
  batch_document_output = gr.File(label="Result")
291
  batch_document_button = gr.Button("Scan")
292
+ retry_button = gr.Button("Retry Unprocessed Documents", label="Retry")
293
 
294
 
295
  image_button.click(image, inputs=image_input, outputs=image_output)
296
  document_button.click(document, inputs=document_input, outputs=document_output)
297
  batch_document_button.click(batch_document, inputs=batch_document_input, outputs=batch_document_output)
298
+ retry_button.click(retry_unprocessed_documents, outputs=batch_document_output)
299
 
300
  app.queue()
301
  app.launch(auth=("username", "password"))