Sage commited on
Commit
18626e5
β€’
1 Parent(s): cfb190d

error handling, UI changes, and Logs

Browse files
ai_functions.py CHANGED
@@ -1,10 +1,13 @@
1
- from settings import gpt_model, RPFAAP2, RPFAAP1, TDRP, TDRP_COORDS
2
  import openai
3
  import json
4
  import logging
5
- from helpers import remove_na, filter_tables, merge_strings
6
  import os
 
 
 
7
  logging.basicConfig(filename='app.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 
8
 
9
  def chat_gpt_image(content, context):
10
  openai.api_key = os.environ['GPT_API_KEY']
@@ -20,6 +23,7 @@ def chat_gpt_image(content, context):
20
 
21
  final_content = (" ".join(sequence))
22
  logging.info(final_content)
 
23
  completion = openai.ChatCompletion.create(
24
  model=gpt_model,
25
  user="1",
@@ -29,6 +33,7 @@ def chat_gpt_image(content, context):
29
  ]
30
  )
31
  logging.info(completion.choices[0].message.content)
 
32
  return(completion.choices[0].message.content)
33
 
34
  def chat_gpt_document(content, document_type, context):
@@ -43,17 +48,17 @@ def chat_gpt_document(content, document_type, context):
43
  content_name = content[3]
44
 
45
  if document_type == "RPFAA Building P1":
46
- document = "RPFAAP1.json"
47
  desired_format = RPFAAP1
48
  tables = [3]
49
  input_coords = TDRP_COORDS
50
  elif document_type == "RPFAA Building P2":
51
- document = "RPFAAP2.json"
52
  desired_format = RPFAAP2
53
  tables = []
54
  input_coords = TDRP_COORDS
55
  elif document_type == "TDRP":
56
- document = "TDRP.json"
57
  desired_format = TDRP
58
  tables = [0]
59
  input_coords = TDRP_COORDS
@@ -68,6 +73,7 @@ def chat_gpt_document(content, document_type, context):
68
 
69
  content_1 = (" ".join(sequence_1))
70
  logging.info(content_1)
 
71
 
72
  completion_1 = openai.ChatCompletion.create(
73
  model=gpt_model,
@@ -78,6 +84,7 @@ def chat_gpt_document(content, document_type, context):
78
  ]
79
  )
80
  logging.info(completion_1.choices[0].message.content)
 
81
  input_string = remove_na(completion_1.choices[0].message.content)
82
  input_string = merge_strings(input_string,input_coords,document_content)
83
 
 
 
1
  import openai
2
  import json
3
  import logging
4
+ import sys
5
  import os
6
+ from settings import gpt_model, RPFAAP2, RPFAAP1, TDRP, TDRP_COORDS
7
+ from helpers import remove_na, filter_tables, merge_strings, Logger
8
+
9
  logging.basicConfig(filename='app.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
10
+ sys.stdout = Logger("output.log")
11
 
12
  def chat_gpt_image(content, context):
13
  openai.api_key = os.environ['GPT_API_KEY']
 
23
 
24
  final_content = (" ".join(sequence))
25
  logging.info(final_content)
26
+ print(final_content)
27
  completion = openai.ChatCompletion.create(
28
  model=gpt_model,
29
  user="1",
 
33
  ]
34
  )
35
  logging.info(completion.choices[0].message.content)
36
+ print(completion.choices[0].message.content)
37
  return(completion.choices[0].message.content)
38
 
39
  def chat_gpt_document(content, document_type, context):
 
48
  content_name = content[3]
49
 
50
  if document_type == "RPFAA Building P1":
51
+ document = "./templates/RPFAAP1.json"
52
  desired_format = RPFAAP1
53
  tables = [3]
54
  input_coords = TDRP_COORDS
55
  elif document_type == "RPFAA Building P2":
56
+ document = "./templates/RPFAAP2.json"
57
  desired_format = RPFAAP2
58
  tables = []
59
  input_coords = TDRP_COORDS
60
  elif document_type == "TDRP":
61
+ document = "./templates/TDRP.json"
62
  desired_format = TDRP
63
  tables = [0]
64
  input_coords = TDRP_COORDS
 
73
 
74
  content_1 = (" ".join(sequence_1))
75
  logging.info(content_1)
76
+ print(content_1)
77
 
78
  completion_1 = openai.ChatCompletion.create(
79
  model=gpt_model,
 
84
  ]
85
  )
86
  logging.info(completion_1.choices[0].message.content)
87
+ print(completion_1.choices[0].message.content)
88
  input_string = remove_na(completion_1.choices[0].message.content)
89
  input_string = merge_strings(input_string,input_coords,document_content)
90
 
app.py CHANGED
@@ -2,16 +2,18 @@ import openai
2
  import gradio as gr
3
  import json
4
  import time
5
- from tqdm import tqdm
6
- from azure.core.exceptions import HttpResponseError
7
  import logging
8
  import requests
9
- import google
 
 
10
  from ocr_functions import detect_document, detect_image
11
  from ai_functions import chat_gpt_document, chat_gpt_image
12
- from helpers import save_json
 
13
 
14
  logging.basicConfig(filename='app.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 
15
 
16
  def retry_unprocessed_documents():
17
  global global_document_type
@@ -19,12 +21,15 @@ def retry_unprocessed_documents():
19
  global unprocessed_documents
20
  if unprocessed_documents:
21
  output = batch_document(unprocessed_documents, global_document_type, global_context, "None")
22
- unprocessed_documents = []
23
  return output
24
  else:
25
- unprocessed_documents = []
26
- return save_json("No Unprocessed Documents", "No Unprocessed Documents")
27
 
 
 
 
 
 
28
  def combine_json_files(json_files, progress=gr.Progress()):
29
  combined_data = []
30
  progress(0, desc="Starting")
@@ -33,19 +38,21 @@ def combine_json_files(json_files, progress=gr.Progress()):
33
  data = json.load(json_file)
34
  combined_data.extend(data)
35
  logging.info("Combined JSON File: ", combined_data)
 
36
  return save_json(combined_data, "Combined Json")
37
 
38
  unprocessed_documents = []
39
  global_document_type = None
40
  global_context = None
41
  def batch_document(content, document_type, context, progress = gr.Progress()):
42
- logging.info(content)
43
  combined_data = []
44
  global global_document_type
45
  global global_context
46
  global_document_type = document_type
47
  global_context = context
48
 
 
 
49
  if progress == "None":
50
  for x in content:
51
  retries = 3
@@ -55,17 +62,23 @@ def batch_document(content, document_type, context, progress = gr.Progress()):
55
  try:
56
  data = json.loads(chat_gpt_document(detect_document(x),document_type,context))
57
  combined_data.append(data)
58
- i = 0
59
  break
60
  except (openai.error.APIConnectionError, openai.error.AuthenticationError, openai.error.RateLimitError, HttpResponseError, requests.exceptions.RequestException) as e:
61
  logging.error(f'Retry {i+1} failed: {e}')
 
62
  if i < retries - 1:
63
  logging.error(f'Retrying in {timeout} seconds...')
 
64
  time.sleep(timeout)
65
  i += 1
66
  else:
67
- unprocessed_documents.append(x)
68
  break
 
 
 
 
 
69
 
70
  else:
71
  progress(0, desc="Starting")
@@ -77,23 +90,42 @@ def batch_document(content, document_type, context, progress = gr.Progress()):
77
  try:
78
  data = json.loads(chat_gpt_document(detect_document(x),document_type,context))
79
  combined_data.append(data)
80
- i = 0
81
  break
82
  except (openai.error.APIConnectionError, openai.error.AuthenticationError, openai.error.RateLimitError, HttpResponseError, requests.exceptions.RequestException) as e:
83
  logging.error(f'Retry {i+1} failed: {e}')
 
84
  if i < retries - 1:
85
  logging.error(f'Retrying in {timeout} seconds...')
 
86
  time.sleep(timeout)
87
  i += 1
88
  else:
89
  unprocessed_documents.append(x)
90
  break
 
 
 
 
 
91
 
92
  logging.info(combined_data)
93
- logging.info(unprocessed_documents)
 
94
  if document_type == "":
95
  document_type = "error"
96
- return save_json(combined_data, document_type)
 
 
 
 
 
 
 
 
 
 
 
 
97
 
98
  def image(content, context):
99
  retries = 3
@@ -105,8 +137,10 @@ def image(content, context):
105
  break
106
  except (openai.error.APIConnectionError, openai.error.AuthenticationError, openai.error.RateLimitError, HttpResponseError, requests.exceptions.RequestException) as e:
107
  logging.error(f'Retry {i+1} failed: {e}')
 
108
  if i < retries - 1:
109
  logging.error(f'Retrying in {timeout} seconds...')
 
110
  time.sleep(timeout)
111
  i += 1
112
  else:
@@ -129,9 +163,13 @@ def document(content, document_type, context):
129
  i += 1
130
  else:
131
  break
 
 
 
 
132
  return data
133
 
134
- with gr.Blocks(title="Axon OCR", css=".markdown {text-align: center;}") as app:
135
  gr.Markdown("""# Axon OCR
136
  Attach Images or Files below and convert them to Text.""", elem_classes="markdown")
137
  with gr.Tab("Scan Image"):
@@ -140,7 +178,7 @@ with gr.Blocks(title="Axon OCR", css=".markdown {text-align: center;}") as app:
140
  image_input = [gr.Image(type="pil"),
141
  gr.Textbox(label="What kind of Image is this? (Optional)", placeholder="This is an image of an Official Reciept")]
142
  image_output = gr.Textbox(label="Result")
143
- image_button = gr.Button("Scan")
144
  with gr.Tab("Scan Document"):
145
  with gr.Row():
146
  with gr.Column():
@@ -148,33 +186,42 @@ with gr.Blocks(title="Axon OCR", css=".markdown {text-align: center;}") as app:
148
  gr.Dropdown(["RPFAA Building P1", "RPFAA Building P2", "TDRP"], label="File Type", info="What type of document is this?"),
149
  gr.Textbox(label="Any additional information? (Optional)", placeholder="This is document is an Official Reciept")]
150
  document_output = gr.Textbox(label="Result")
151
- document_button = gr.Button("Scan")
152
  with gr.Tab("Batch Scan"):
153
  with gr.Row():
154
  with gr.Column():
155
  batch_document_input = [gr.File(file_types=["pdf","tiff","image","text"], file_count="multiple"),
156
  gr.Dropdown(["RPFAA Building P1", "RPFAA Building P2", "TDRP"], label="File Type", info="What type of document is this?"),
157
  gr.Textbox(label="Any additional information? (Optional)", placeholder="This is document is an Official Reciept")]
158
- batch_document_output = gr.File(label="Result")
159
- batch_document_button = gr.Button("Scan")
 
 
 
 
160
  with gr.Row():
161
  with gr.Column():
162
- retry_button = gr.Button("Retry Unprocessed Documents", label="Retry")
163
  with gr.Column():
164
- stop_button = gr.Button("Stop Processing Document", label="Stop")
165
  with gr.Tab("Combine JSON"):
166
  with gr.Row():
167
  with gr.Column():
168
  json_files_input = gr.File(file_types=[".json"], file_count="multiple", label='Upload JSON files')
169
  combined_json_output = gr.File(label="Result")
170
- combine_button = gr.Button('Combine JSON files')
171
-
 
 
 
 
 
172
  image_button.click(image, inputs=image_input, outputs=image_output)
173
  document_button.click(document, inputs=document_input, outputs=document_output)
174
- batch_document_event = batch_document_button.click(batch_document, inputs=batch_document_input, outputs=batch_document_output)
175
- retry_button.click(retry_unprocessed_documents, outputs=batch_document_output)
176
  stop_button.click(fn=None, inputs=None, outputs=None, cancels=[batch_document_event])
177
  combine_button.click(combine_json_files, inputs=json_files_input, outputs=combined_json_output)
178
 
179
  app.queue()
180
- app.launch(auth=("username", "password"))
 
2
  import gradio as gr
3
  import json
4
  import time
 
 
5
  import logging
6
  import requests
7
+ import sys
8
+ from tqdm import tqdm
9
+ from azure.core.exceptions import HttpResponseError
10
  from ocr_functions import detect_document, detect_image
11
  from ai_functions import chat_gpt_document, chat_gpt_image
12
+ from helpers import save_json, read_logs, clear_logs, Logger
13
+ from css import css
14
 
15
  logging.basicConfig(filename='app.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
16
+ sys.stdout = Logger("output.log")
17
 
18
  def retry_unprocessed_documents():
19
  global global_document_type
 
21
  global unprocessed_documents
22
  if unprocessed_documents:
23
  output = batch_document(unprocessed_documents, global_document_type, global_context, "None")
 
24
  return output
25
  else:
26
+ return save_json("No Unprocessed Documents", "No Unprocessed Documents"), "All Documents Processed"
 
27
 
28
+ def clear_unprocessed_documents():
29
+ global unprocessed_documents
30
+ unprocessed_documents = []
31
+ return "All Documents Processed"
32
+
33
  def combine_json_files(json_files, progress=gr.Progress()):
34
  combined_data = []
35
  progress(0, desc="Starting")
 
38
  data = json.load(json_file)
39
  combined_data.extend(data)
40
  logging.info("Combined JSON File: ", combined_data)
41
+ print("Combined JSON File: ", combined_data)
42
  return save_json(combined_data, "Combined Json")
43
 
44
  unprocessed_documents = []
45
  global_document_type = None
46
  global_context = None
47
  def batch_document(content, document_type, context, progress = gr.Progress()):
 
48
  combined_data = []
49
  global global_document_type
50
  global global_context
51
  global_document_type = document_type
52
  global_context = context
53
 
54
+ unprocessed_docs_temp = []
55
+
56
  if progress == "None":
57
  for x in content:
58
  retries = 3
 
62
  try:
63
  data = json.loads(chat_gpt_document(detect_document(x),document_type,context))
64
  combined_data.append(data)
 
65
  break
66
  except (openai.error.APIConnectionError, openai.error.AuthenticationError, openai.error.RateLimitError, HttpResponseError, requests.exceptions.RequestException) as e:
67
  logging.error(f'Retry {i+1} failed: {e}')
68
+ print(f'Retry {i+1} failed: {e}')
69
  if i < retries - 1:
70
  logging.error(f'Retrying in {timeout} seconds...')
71
+ print(f'Retrying in {timeout} seconds...')
72
  time.sleep(timeout)
73
  i += 1
74
  else:
75
+ unprocessed_docs_temp.append(x)
76
  break
77
+ except Exception as e: # catch any other exceptions
78
+ logging.error(f'Unexpected error {e}')
79
+ print(f'Unexpected error {e}')
80
+ unprocessed_docs_temp.append(x)
81
+ break
82
 
83
  else:
84
  progress(0, desc="Starting")
 
90
  try:
91
  data = json.loads(chat_gpt_document(detect_document(x),document_type,context))
92
  combined_data.append(data)
 
93
  break
94
  except (openai.error.APIConnectionError, openai.error.AuthenticationError, openai.error.RateLimitError, HttpResponseError, requests.exceptions.RequestException) as e:
95
  logging.error(f'Retry {i+1} failed: {e}')
96
+ print(f'Retry {i+1} failed: {e}')
97
  if i < retries - 1:
98
  logging.error(f'Retrying in {timeout} seconds...')
99
+ print(f'Retrying in {timeout} seconds...')
100
  time.sleep(timeout)
101
  i += 1
102
  else:
103
  unprocessed_documents.append(x)
104
  break
105
+ except Exception as e: # catch any other exceptions
106
+ logging.error(f'Unexpected error {e}')
107
+ print(f'Unexpected error {e}')
108
+ unprocessed_documents.append(x)
109
+ break
110
 
111
  logging.info(combined_data)
112
+ print(combined_data)
113
+
114
  if document_type == "":
115
  document_type = "error"
116
+
117
+ if unprocessed_documents:
118
+ unprocessed = "\n".join([doc.name.split('\\')[-1].split('/')[-1].split('.')[0] for doc in unprocessed_documents])
119
+ logging.info(unprocessed)
120
+ print(unprocessed)
121
+ elif unprocessed_docs_temp:
122
+ unprocessed_documents.extend(unprocessed_docs_temp)
123
+ unprocessed = "\n".join([doc.name.split('\\')[-1].split('/')[-1].split('.')[0] for doc in unprocessed_documents])
124
+ logging.info(unprocessed)
125
+ print(unprocessed)
126
+ else:
127
+ unprocessed = "All Documents Processed"
128
+ return save_json(combined_data, document_type), unprocessed
129
 
130
  def image(content, context):
131
  retries = 3
 
137
  break
138
  except (openai.error.APIConnectionError, openai.error.AuthenticationError, openai.error.RateLimitError, HttpResponseError, requests.exceptions.RequestException) as e:
139
  logging.error(f'Retry {i+1} failed: {e}')
140
+ print(f'Retry {i+1} failed: {e}')
141
  if i < retries - 1:
142
  logging.error(f'Retrying in {timeout} seconds...')
143
+ print(f'Retrying in {timeout} seconds...')
144
  time.sleep(timeout)
145
  i += 1
146
  else:
 
163
  i += 1
164
  else:
165
  break
166
+ except Exception as e: # catch any other exceptions
167
+ logging.error(f'Unexpected error {e}')
168
+ print(f'Unexpected error {e}')
169
+ break
170
  return data
171
 
172
+ with gr.Blocks(title="Axon OCR", css=css) as app:
173
  gr.Markdown("""# Axon OCR
174
  Attach Images or Files below and convert them to Text.""", elem_classes="markdown")
175
  with gr.Tab("Scan Image"):
 
178
  image_input = [gr.Image(type="pil"),
179
  gr.Textbox(label="What kind of Image is this? (Optional)", placeholder="This is an image of an Official Reciept")]
180
  image_output = gr.Textbox(label="Result")
181
+ image_button = gr.Button("Scan", variant="primary")
182
  with gr.Tab("Scan Document"):
183
  with gr.Row():
184
  with gr.Column():
 
186
  gr.Dropdown(["RPFAA Building P1", "RPFAA Building P2", "TDRP"], label="File Type", info="What type of document is this?"),
187
  gr.Textbox(label="Any additional information? (Optional)", placeholder="This is document is an Official Reciept")]
188
  document_output = gr.Textbox(label="Result")
189
+ document_button = gr.Button("Scan", variant="primary")
190
  with gr.Tab("Batch Scan"):
191
  with gr.Row():
192
  with gr.Column():
193
  batch_document_input = [gr.File(file_types=["pdf","tiff","image","text"], file_count="multiple"),
194
  gr.Dropdown(["RPFAA Building P1", "RPFAA Building P2", "TDRP"], label="File Type", info="What type of document is this?"),
195
  gr.Textbox(label="Any additional information? (Optional)", placeholder="This is document is an Official Reciept")]
196
+ with gr.Column():
197
+ batch_document_output = gr.File(label="Result")
198
+ with gr.Accordion("Unprocessed Documents", open=False):
199
+ batch_unprocessed = gr.Textbox(info="Download the file before retrying Unprocessed Documents and clear unprocessed documents after every scan to avoid overlaps", show_label=False, elem_classes="unprocessed_textbox")
200
+ clear_unprocessed_button = gr.Button("Clear Unprocessed Documents")
201
+ batch_document_button = gr.Button("Scan", variant="primary")
202
  with gr.Row():
203
  with gr.Column():
204
+ retry_button = gr.Button("Retry Unprocessed Documents")
205
  with gr.Column():
206
+ stop_button = gr.Button("Stop Processing Document", variant="stop")
207
  with gr.Tab("Combine JSON"):
208
  with gr.Row():
209
  with gr.Column():
210
  json_files_input = gr.File(file_types=[".json"], file_count="multiple", label='Upload JSON files')
211
  combined_json_output = gr.File(label="Result")
212
+ combine_button = gr.Button('Combine JSON files', variant="primary")
213
+ with gr.Accordion("Logs", open=False):
214
+ logs = gr.Textbox(max_lines=10, show_label=False, elem_classes="log_textbox")
215
+ app.load(read_logs, None, logs, every=1)
216
+ clear_button = gr.Button("Clear Logs")
217
+ clear_button.click(clear_logs)
218
+ clear_unprocessed_button.click(clear_unprocessed_documents, outputs=batch_unprocessed)
219
  image_button.click(image, inputs=image_input, outputs=image_output)
220
  document_button.click(document, inputs=document_input, outputs=document_output)
221
+ batch_document_event = batch_document_button.click(batch_document, inputs=batch_document_input, outputs=[batch_document_output,batch_unprocessed])
222
+ retry_button.click(retry_unprocessed_documents, outputs=[batch_document_output,batch_unprocessed])
223
  stop_button.click(fn=None, inputs=None, outputs=None, cancels=[batch_document_event])
224
  combine_button.click(combine_json_files, inputs=json_files_input, outputs=combined_json_output)
225
 
226
  app.queue()
227
+ app.launch(auth=("username", "password"), favicon_path="assets/logo.png")
assets/logo.png ADDED
css.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ css = """
2
+ .log_textbox textarea {
3
+ height: 300px !important;
4
+ }
5
+ .markdown {
6
+ text-align: center;
7
+ }
8
+ .unprocessed_textbox textarea {
9
+ height: 100px !important;
10
+ }
11
+ """
helpers.py CHANGED
@@ -1,7 +1,26 @@
1
  from settings import char_remove
2
  import re
3
  import json
 
4
  import logging
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  logging.basicConfig(filename='app.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
6
 
7
  def remove_na(string):
@@ -29,8 +48,12 @@ def filter_tables(input_string, table_numbers):
29
 
30
  for table_number in table_numbers:
31
  # Picking the specific table
32
- table_str = tables[table_number]
33
-
 
 
 
 
34
  # Extracting cell coordinates and contents
35
  cells = re.findall(r"Cell\[(\d+)\]\[(\d+)\] has content '(.*?)'", table_str)
36
 
@@ -78,10 +101,14 @@ def merge_strings(input_string, input_coords, extract_coords):
78
  # Filter out empty lines and strip leading/trailing whitespaces
79
  lines2 = [line.strip() for line in lines2 if line.strip()]
80
 
81
- logging.info(lines2)
82
  # Creating dictionaries to store the key-value pairs
83
- dict1 = {line.split(": ")[0]: line.split(": ")[1] for line in lines1}
84
- dict2 = {line.split(": ")[0]: line.split(": ")[1] for line in lines2}
 
 
 
 
 
85
 
86
  # Updating the values in dict1 with the ones from dict2 if they share the same key
87
  for key in dict1.keys():
@@ -103,4 +130,14 @@ def merge_strings(input_string, input_coords, extract_coords):
103
  # Constructing the updated string1
104
  input_string = '\n'.join([f"{key}: {value}" for key, value in dict1.items()])
105
 
106
- return input_string
 
 
 
 
 
 
 
 
 
 
 
1
  from settings import char_remove
2
  import re
3
  import json
4
+ import sys
5
  import logging
6
+
7
+ class Logger:
8
+ def __init__(self, filename):
9
+ self.terminal = sys.stdout
10
+ self.log = open(filename, "w")
11
+
12
+ def write(self, message):
13
+ self.terminal.write(message)
14
+ self.log.write(message)
15
+
16
+ def flush(self):
17
+ self.terminal.flush()
18
+ self.log.flush()
19
+
20
+ def isatty(self):
21
+ return False
22
+
23
+ sys.stdout = Logger("output.log")
24
  logging.basicConfig(filename='app.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
25
 
26
  def remove_na(string):
 
48
 
49
  for table_number in table_numbers:
50
  # Picking the specific table
51
+ try:
52
+ table_str = tables[table_number]
53
+ except (IndexError, UnboundLocalError) as e:
54
+ logging.error(f"Error: {e}, Please check document configuration or document type")
55
+ print(f"Error: {e}, Please check document configuration or document type")
56
+ raise e
57
  # Extracting cell coordinates and contents
58
  cells = re.findall(r"Cell\[(\d+)\]\[(\d+)\] has content '(.*?)'", table_str)
59
 
 
101
  # Filter out empty lines and strip leading/trailing whitespaces
102
  lines2 = [line.strip() for line in lines2 if line.strip()]
103
 
 
104
  # Creating dictionaries to store the key-value pairs
105
+ try:
106
+ dict1 = {line.split(": ")[0]: line.split(": ")[1] for line in lines1}
107
+ dict2 = {line.split(": ")[0]: line.split(": ")[1] for line in lines2}
108
+ except (IndexError, UnboundLocalError) as e:
109
+ logging.error(f"Error: {e}, Please check document configuration or document type")
110
+ print(f"Error: {e}, Please check document configuration or document type")
111
+ raise e
112
 
113
  # Updating the values in dict1 with the ones from dict2 if they share the same key
114
  for key in dict1.keys():
 
130
  # Constructing the updated string1
131
  input_string = '\n'.join([f"{key}: {value}" for key, value in dict1.items()])
132
 
133
+ return input_string
134
+
135
+ def read_logs():
136
+ sys.stdout.flush()
137
+ with open("output.log","r",encoding="utf-8") as f:
138
+ lines = f.readlines()
139
+ return ''.join(lines[-100:])
140
+
141
+ def clear_logs():
142
+ with open("output.log","w",encoding="utf-8") as f:
143
+ f.write("")
ocr_functions.py CHANGED
@@ -1,11 +1,14 @@
1
  from azure.core.credentials import AzureKeyCredential
2
  from azure.ai.formrecognizer import DocumentAnalysisClient
3
  from io import BytesIO
4
- from helpers import format_polygon
5
  import logging
6
  import os
 
7
 
8
  logging.basicConfig(filename='app.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 
 
9
  endpoint = os.environ['AZURE_API_ENDPOINT']
10
  key = os.environ['AZURE_API_KEY']
11
 
@@ -28,6 +31,7 @@ def detect_document(content):
28
  kv_pair.value.content
29
  )
30
  logging.info(pair_content)
 
31
  document_content = "----Lines found in document----\n"
32
  for page in result.pages:
33
  for line_idx, line in enumerate(page.lines):
@@ -37,6 +41,7 @@ def detect_document(content):
37
  format_polygon(line.polygon),
38
  )
39
  logging.info(document_content)
 
40
  table_content = "----Tables found in document----\n"
41
  for table_idx, table in enumerate(result.tables):
42
  table_content += "Table # {} has {} rows and {} columns\n".format(
@@ -49,6 +54,7 @@ def detect_document(content):
49
  cell.content,
50
  )
51
  logging.info(table_content)
 
52
  name = content.name.split('\\')[-1]
53
  name = name.split("/")[-1]
54
  name = name.split('.')[0]
@@ -67,4 +73,5 @@ def detect_image(content):
67
 
68
  result = poller.result()
69
  logging.info(result.content)
 
70
  return(result.content)
 
1
  from azure.core.credentials import AzureKeyCredential
2
  from azure.ai.formrecognizer import DocumentAnalysisClient
3
  from io import BytesIO
4
+ from helpers import format_polygon, Logger
5
  import logging
6
  import os
7
+ import sys
8
 
9
  logging.basicConfig(filename='app.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
10
+ sys.stdout = Logger("output.log")
11
+
12
  endpoint = os.environ['AZURE_API_ENDPOINT']
13
  key = os.environ['AZURE_API_KEY']
14
 
 
31
  kv_pair.value.content
32
  )
33
  logging.info(pair_content)
34
+ print(pair_content)
35
  document_content = "----Lines found in document----\n"
36
  for page in result.pages:
37
  for line_idx, line in enumerate(page.lines):
 
41
  format_polygon(line.polygon),
42
  )
43
  logging.info(document_content)
44
+ print(document_content)
45
  table_content = "----Tables found in document----\n"
46
  for table_idx, table in enumerate(result.tables):
47
  table_content += "Table # {} has {} rows and {} columns\n".format(
 
54
  cell.content,
55
  )
56
  logging.info(table_content)
57
+ print(table_content)
58
  name = content.name.split('\\')[-1]
59
  name = name.split("/")[-1]
60
  name = name.split('.')[0]
 
73
 
74
  result = poller.result()
75
  logging.info(result.content)
76
+ print(result.content)
77
  return(result.content)
RPFAAP1.json β†’ templates/RPFAAP1.json RENAMED
@@ -1,7 +1,7 @@
1
  {"File Name": "%s",
2
  "General Information": {
3
  "ARP No.": "",
4
- "Owner": "",
5
  "Address": "",
6
  "Tel No.": "",
7
  "Administrator/Beneficial User": "",
 
1
  {"File Name": "%s",
2
  "General Information": {
3
  "ARP No.": "",
4
+ "OWNER": "",
5
  "Address": "",
6
  "Tel No.": "",
7
  "Administrator/Beneficial User": "",
RPFAAP2.json β†’ templates/RPFAAP2.json RENAMED
File without changes
TDRP.json β†’ templates/TDRP.json RENAMED
File without changes