added error trapping when no data was received
Browse files- Layoutlmv3_inference/ocr.py +223 -1
- app.py +27 -42
- templates/extractor.html +51 -24
Layoutlmv3_inference/ocr.py
CHANGED
@@ -72,7 +72,229 @@ def enhance_txt(img, intensity_increase=20, bilateral_filter_diameter=9, bilater
|
|
72 |
img = np.clip(img + intensity_increase, 0, 255).astype(np.uint8)
|
73 |
|
74 |
# Apply bilateral filter to reduce noise
|
75 |
-
img = cv2.bilateralFilter(img,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
76 |
|
77 |
_, binary = cv2.threshold(blurred, threshold, 255, cv2.THRESH_BINARY)
|
78 |
return binary
|
|
|
72 |
img = np.clip(img + intensity_increase, 0, 255).astype(np.uint8)
|
73 |
|
74 |
# Apply bilateral filter to reduce noise
|
75 |
+
img = cv2.bilateralFilter(img, bilaterimport os
|
76 |
+
import pandas as pd
|
77 |
+
import cv2
|
78 |
+
import numpy as np
|
79 |
+
import json
|
80 |
+
import requests
|
81 |
+
import traceback
|
82 |
+
import tempfile
|
83 |
+
from rembg import remove
|
84 |
+
|
85 |
+
|
86 |
+
from PIL import Image
|
87 |
+
|
88 |
+
|
89 |
+
def preprocess_image(image_path, max_file_size_mb=1, target_file_size_mb=0.5):
|
90 |
+
try:
|
91 |
+
# Read the image
|
92 |
+
image = cv2.imread(image_path)
|
93 |
+
# Enhance text
|
94 |
+
enhanced = enhance_txt(image)
|
95 |
+
|
96 |
+
# Save the enhanced image to a temporary file
|
97 |
+
temp_file_path = tempfile.NamedTemporaryFile(suffix='.jpg').name
|
98 |
+
cv2.imwrite(temp_file_path, enhanced)
|
99 |
+
|
100 |
+
# Check file size of the temporary file
|
101 |
+
file_size_mb = os.path.getsize(
|
102 |
+
temp_file_path) / (1024 * 1024) # Convert to megabytes
|
103 |
+
|
104 |
+
while file_size_mb > max_file_size_mb:
|
105 |
+
print(
|
106 |
+
f"File size ({file_size_mb} MB) exceeds the maximum allowed size ({max_file_size_mb} MB). Resizing the image.")
|
107 |
+
ratio = np.sqrt(target_file_size_mb / file_size_mb)
|
108 |
+
new_width = int(image.shape[1] * ratio)
|
109 |
+
new_height = int(image.shape[0] * ratio)
|
110 |
+
|
111 |
+
# Resize the image
|
112 |
+
enhanced = cv2.resize(enhanced, (new_width, new_height))
|
113 |
+
|
114 |
+
# Save the resized image to a temporary file
|
115 |
+
temp_file_path = tempfile.NamedTemporaryFile(suffix='.jpg').name
|
116 |
+
cv2.imwrite(temp_file_path, enhanced)
|
117 |
+
|
118 |
+
# Update file size
|
119 |
+
file_size_mb = os.path.getsize(temp_file_path) / (1024 * 1024)
|
120 |
+
print(f"New file size: ({file_size_mb} MB)")
|
121 |
+
|
122 |
+
# Return the final resized image
|
123 |
+
image_resized = cv2.imread(temp_file_path)
|
124 |
+
return image_resized
|
125 |
+
|
126 |
+
except Exception as e:
|
127 |
+
print(f"An error occurred in preprocess_image: {str(e)}")
|
128 |
+
return None
|
129 |
+
|
130 |
+
|
131 |
+
def enhance_txt(img, intensity_increase=20, bilateral_filter_diameter=9, bilateral_filter_sigma_color=75, bilateral_filter_sigma_space=75):
|
132 |
+
# Get the width and height of the image
|
133 |
+
w = img.shape[1]
|
134 |
+
h = img.shape[0]
|
135 |
+
w1 = int(w * 0.05)
|
136 |
+
w2 = int(w * 0.95)
|
137 |
+
h1 = int(h * 0.05)
|
138 |
+
h2 = int(h * 0.95)
|
139 |
+
ROI = img[h1:h2, w1:w2] # 95% of the center of the image
|
140 |
+
threshold = np.mean(ROI) * 0.88 # % of average brightness
|
141 |
+
|
142 |
+
# Convert image to grayscale
|
143 |
+
grayscale_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
144 |
+
|
145 |
+
# Find contours
|
146 |
+
contours, _ = cv2.findContours(
|
147 |
+
grayscale_img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
148 |
+
|
149 |
+
# # Apply Gaussian blur
|
150 |
+
blurred = cv2.GaussianBlur(grayscale_img, (1, 1), 0)
|
151 |
+
|
152 |
+
edged = 255 - cv2.Canny(blurred, 100, 150, apertureSize=7)
|
153 |
+
|
154 |
+
# Increase intensity by adding a constant value
|
155 |
+
img = np.clip(img + intensity_increase, 0, 255).astype(np.uint8)
|
156 |
+
|
157 |
+
# Apply bilateral filter to reduce noise
|
158 |
+
img = cv2.bilateralFilter(img, bilateral_filter_diameter,
|
159 |
+
bilateral_filter_sigma_color, bilateral_filter_sigma_space)
|
160 |
+
|
161 |
+
_, binary = cv2.threshold(blurred, threshold, 255, cv2.THRESH_BINARY)
|
162 |
+
return binary
|
163 |
+
|
164 |
+
|
165 |
+
def run_tesseract_on_preprocessed_image(preprocessed_image, image_path):
|
166 |
+
image_name = os.path.basename(image_path)
|
167 |
+
image_name = image_name[:image_name.find('.')]
|
168 |
+
|
169 |
+
# Create the "temp" folder if it doesn't exist
|
170 |
+
temp_folder = "static/temp"
|
171 |
+
if not os.path.exists(temp_folder):
|
172 |
+
os.makedirs(temp_folder)
|
173 |
+
|
174 |
+
# Define the OCR API endpoint
|
175 |
+
url = "https://api.ocr.space/parse/image"
|
176 |
+
|
177 |
+
# Define the API key and the language
|
178 |
+
api_key = os.getenv("ocr_space")
|
179 |
+
language = "eng"
|
180 |
+
|
181 |
+
# Save the preprocessed image
|
182 |
+
cv2.imwrite(os.path.join(
|
183 |
+
temp_folder, f"{image_name}_preprocessed.jpg"), preprocessed_image)
|
184 |
+
|
185 |
+
# Open the preprocessed image file as binary
|
186 |
+
with open(os.path.join(temp_folder, f"{image_name}_preprocessed.jpg"), "rb") as f:
|
187 |
+
# Define the payload for the API request
|
188 |
+
payload = {
|
189 |
+
"apikey": api_key,
|
190 |
+
"language": language,
|
191 |
+
"isOverlayRequired": True,
|
192 |
+
"OCREngine": 2
|
193 |
+
}
|
194 |
+
# Define the file parameter for the API request
|
195 |
+
file = {
|
196 |
+
"file": f
|
197 |
+
}
|
198 |
+
# Send the POST request to the OCR API
|
199 |
+
response = requests.post(url, data=payload, files=file)
|
200 |
+
|
201 |
+
|
202 |
+
# Check the status code of the response
|
203 |
+
if response.status_code == 200:
|
204 |
+
# Parse the JSON response
|
205 |
+
result = response.json()
|
206 |
+
print("---JSON file saved")
|
207 |
+
# Save the OCR result as JSON
|
208 |
+
with open(os.path.join(temp_folder, f"{image_name}_ocr.json"), 'w') as f:
|
209 |
+
json.dump(result, f)
|
210 |
+
|
211 |
+
return os.path.join(temp_folder, f"{image_name}_ocr.json")
|
212 |
+
else:
|
213 |
+
raise Exception("An error occurred: " + response.text)
|
214 |
+
|
215 |
+
|
216 |
+
def clean_tesseract_output(json_output_path):
|
217 |
+
try:
|
218 |
+
with open(json_output_path, 'r') as json_file:
|
219 |
+
data = json.load(json_file)
|
220 |
+
|
221 |
+
lines = data['ParsedResults'][0]['TextOverlay']['Lines']
|
222 |
+
|
223 |
+
words = []
|
224 |
+
for line in lines:
|
225 |
+
for word_info in line['Words']:
|
226 |
+
word = {}
|
227 |
+
origin_box = [
|
228 |
+
word_info['Left'],
|
229 |
+
word_info['Top'],
|
230 |
+
word_info['Left'] + word_info['Width'],
|
231 |
+
word_info['Top'] + word_info['Height']
|
232 |
+
]
|
233 |
+
|
234 |
+
word['word_text'] = word_info['WordText']
|
235 |
+
word['word_box'] = origin_box
|
236 |
+
words.append(word)
|
237 |
+
|
238 |
+
return words
|
239 |
+
except (KeyError, IndexError, FileNotFoundError, json.JSONDecodeError) as e:
|
240 |
+
print(f"Check your Internet Connection.")
|
241 |
+
|
242 |
+
print(f"Error cleaning Tesseract output: {str(e)}")
|
243 |
+
return None
|
244 |
+
|
245 |
+
|
246 |
+
def prepare_batch_for_inference(image_paths):
|
247 |
+
# print("my_function was called")
|
248 |
+
# traceback.print_stack() # This will print the stack trace
|
249 |
+
# Print the total number of images to be processed
|
250 |
+
print(f"Number of images to process: {len(image_paths)}")
|
251 |
+
print("1. Preparing for Inference")
|
252 |
+
tsv_output_paths = []
|
253 |
+
|
254 |
+
inference_batch = dict()
|
255 |
+
print("2. Starting Preprocessing")
|
256 |
+
# Ensure that the image is only 1
|
257 |
+
for image_path in image_paths:
|
258 |
+
# Print the image being processed
|
259 |
+
print(f"Processing the image: {image_path}")
|
260 |
+
print("3. Preprocessing the Receipt")
|
261 |
+
preprocessed_image = preprocess_image(image_path)
|
262 |
+
if preprocessed_image is not None:
|
263 |
+
try:
|
264 |
+
print("4. Preprocessing done. Running OCR")
|
265 |
+
try:
|
266 |
+
json_output_path = run_tesseract_on_preprocessed_image(
|
267 |
+
preprocessed_image, image_path)
|
268 |
+
except Exception as e:
|
269 |
+
print(f"An error has occured: {str(e)}")
|
270 |
+
raise e
|
271 |
+
print("5. OCR Complete")
|
272 |
+
except Exception as e:
|
273 |
+
print(f"An error has occured: {str(e)}")
|
274 |
+
raise e
|
275 |
+
if json_output_path:
|
276 |
+
tsv_output_paths.append(json_output_path)
|
277 |
+
|
278 |
+
print("6. Preprocessing and OCR Done")
|
279 |
+
# clean_outputs is a list of lists
|
280 |
+
clean_outputs = [clean_tesseract_output(
|
281 |
+
tsv_path) for tsv_path in tsv_output_paths]
|
282 |
+
print("7. Cleaned OCR output")
|
283 |
+
word_lists = [[word['word_text'] for word in clean_output]
|
284 |
+
for clean_output in clean_outputs]
|
285 |
+
print("8. Word List Created")
|
286 |
+
boxes_lists = [[word['word_box'] for word in clean_output]
|
287 |
+
for clean_output in clean_outputs]
|
288 |
+
print("9. Box List Created")
|
289 |
+
inference_batch = {
|
290 |
+
"image_path": image_paths,
|
291 |
+
"bboxes": boxes_lists,
|
292 |
+
"words": word_lists
|
293 |
+
}
|
294 |
+
|
295 |
+
print("10. Prepared for Inference Batch")
|
296 |
+
return inference_batch
|
297 |
+
al_filter_diameter, bilateral_filter_sigma_color, bilateral_filter_sigma_space)
|
298 |
|
299 |
_, binary = cv2.threshold(blurred, threshold, 255, cv2.THRESH_BINARY)
|
300 |
return binary
|
app.py
CHANGED
@@ -24,7 +24,6 @@ import signal
|
|
24 |
import shutil
|
25 |
from datetime import datetime
|
26 |
import zipfile
|
27 |
-
from pathlib import Path
|
28 |
|
29 |
# LLM
|
30 |
import argparse
|
@@ -34,10 +33,17 @@ from Layoutlmv3_inference.inference_handler import handle
|
|
34 |
import logging
|
35 |
import os
|
36 |
import copy
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
|
38 |
|
39 |
# Upload Folder
|
40 |
-
UPLOAD_FOLDER =
|
41 |
if not os.path.exists(UPLOAD_FOLDER):
|
42 |
os.makedirs(UPLOAD_FOLDER)
|
43 |
|
@@ -83,7 +89,7 @@ def allowed_file(filename):
|
|
83 |
|
84 |
@app.route('/upload', methods=['GET', 'POST'])
|
85 |
def upload_files():
|
86 |
-
UPLOAD_FOLDER =
|
87 |
if not os.path.exists(UPLOAD_FOLDER):
|
88 |
os.makedirs(UPLOAD_FOLDER)
|
89 |
if request.method == 'POST':
|
@@ -102,13 +108,12 @@ def upload_files():
|
|
102 |
return render_template('index.html')
|
103 |
|
104 |
|
105 |
-
from pathlib import Path
|
106 |
def make_predictions(image_paths):
|
107 |
-
|
108 |
try:
|
109 |
-
#
|
110 |
-
|
111 |
-
|
112 |
|
113 |
model_path = Path(r'model/export')
|
114 |
learner = load_learner(model_path)
|
@@ -126,16 +131,14 @@ def make_predictions(image_paths):
|
|
126 |
predicted_class_str = str(prediction_class)
|
127 |
|
128 |
predictions.append(predicted_class_str)
|
129 |
-
|
130 |
-
print(f"Prediction: {predictions}")
|
131 |
|
132 |
return predictions
|
133 |
|
134 |
except Exception as e:
|
135 |
return {"error in make_predictions": str(e)}
|
136 |
|
137 |
-
|
138 |
-
|
139 |
|
140 |
|
141 |
@app.route('/predict/<filenames>', methods=['GET', 'POST'])
|
@@ -168,12 +171,8 @@ def predict_files(filenames):
|
|
168 |
|
169 |
if os.path.exists(file_path):
|
170 |
# Call make_predictions automatically
|
171 |
-
prediction_result = make_predictions([file_path])
|
172 |
-
|
173 |
-
prediction_results.append(prediction_result[0]) # Append only the first prediction result
|
174 |
-
else:
|
175 |
-
print(f"Error making prediction for {file}: {prediction_result}")
|
176 |
-
|
177 |
prediction_results_copy = copy.deepcopy(prediction_results)
|
178 |
|
179 |
non_receipt_indices = []
|
@@ -187,15 +186,16 @@ def predict_files(filenames):
|
|
187 |
if os.path.exists(file_to_remove):
|
188 |
os.remove(file_to_remove)
|
189 |
|
|
|
190 |
return render_template('extractor.html', index_url=index_url, image_paths=image_paths, prediction_results = prediction_results, predictions=dict(zip(image_paths, prediction_results_copy)))
|
191 |
|
192 |
|
193 |
|
194 |
-
|
195 |
-
|
196 |
-
#
|
197 |
-
|
198 |
-
|
199 |
|
200 |
|
201 |
def process_images(model_path: str, images_path: str) -> None:
|
@@ -205,12 +205,14 @@ def process_images(model_path: str, images_path: str) -> None:
|
|
205 |
inference_batch = prepare_batch_for_inference(images_path)
|
206 |
context = {"model_dir": model_path}
|
207 |
handle(inference_batch, context)
|
208 |
-
except Exception as
|
|
|
209 |
os.makedirs('log', exist_ok=True)
|
210 |
logging.basicConfig(filename='log/error_output.log', level=logging.ERROR,
|
211 |
format='%(asctime)s %(levelname)s %(name)s %(message)s')
|
212 |
logger = logging.getLogger(__name__)
|
213 |
logger.error(err)
|
|
|
214 |
|
215 |
@app.route('/run_inference', methods=['GET'])
|
216 |
def run_inference():
|
@@ -335,24 +337,8 @@ def create_csv():
|
|
335 |
|
336 |
except Exception as e:
|
337 |
print(f"An error occurred in create_csv: {str(e)}")
|
338 |
-
return
|
339 |
|
340 |
-
except Exception as e:
|
341 |
-
print(f"An error occurred in create_csv: {str(e)}")
|
342 |
-
return None
|
343 |
-
|
344 |
-
except FileNotFoundError as e:
|
345 |
-
print(f"File not found error: {str(e)}")
|
346 |
-
return jsonify({'error': 'File not found.'}), 404
|
347 |
-
except json.JSONDecodeError as e:
|
348 |
-
print(f"JSON decoding error: {str(e)}")
|
349 |
-
return jsonify({'error': 'JSON decoding error.'}), 500
|
350 |
-
except csv.Error as e:
|
351 |
-
print(f"CSV error: {str(e)}")
|
352 |
-
return jsonify({'error': 'CSV error.'}), 500
|
353 |
-
except Exception as e:
|
354 |
-
print(f"An unexpected error occurred: {str(e)}")
|
355 |
-
return jsonify({'error': 'An unexpected error occurred.'}), 500
|
356 |
|
357 |
@app.route('/get_data')
|
358 |
def get_data():
|
@@ -372,6 +358,5 @@ def download_csv():
|
|
372 |
return jsonify({"error": f"Download failed: {str(e)}"})
|
373 |
|
374 |
|
375 |
-
|
376 |
if __name__ == '__main__':
|
377 |
app.run(debug=True)
|
|
|
24 |
import shutil
|
25 |
from datetime import datetime
|
26 |
import zipfile
|
|
|
27 |
|
28 |
# LLM
|
29 |
import argparse
|
|
|
33 |
import logging
|
34 |
import os
|
35 |
import copy
|
36 |
+
import warnings
|
37 |
+
warnings.filterwarnings("ignore", category=UserWarning, module='torch.serialization', lineno=1113)
|
38 |
+
warnings.filterwarnings("ignore")
|
39 |
+
from torch.serialization import SourceChangeWarning
|
40 |
+
|
41 |
+
warnings.filterwarnings("ignore", category=FutureWarning)
|
42 |
+
warnings.filterwarnings("ignore", category=SourceChangeWarning)
|
43 |
|
44 |
|
45 |
# Upload Folder
|
46 |
+
UPLOAD_FOLDER = 'static/temp/uploads'
|
47 |
if not os.path.exists(UPLOAD_FOLDER):
|
48 |
os.makedirs(UPLOAD_FOLDER)
|
49 |
|
|
|
89 |
|
90 |
@app.route('/upload', methods=['GET', 'POST'])
|
91 |
def upload_files():
|
92 |
+
UPLOAD_FOLDER = 'static/temp/uploads'
|
93 |
if not os.path.exists(UPLOAD_FOLDER):
|
94 |
os.makedirs(UPLOAD_FOLDER)
|
95 |
if request.method == 'POST':
|
|
|
108 |
return render_template('index.html')
|
109 |
|
110 |
|
|
|
111 |
def make_predictions(image_paths):
|
112 |
+
temp = None
|
113 |
try:
|
114 |
+
# For Windows OS
|
115 |
+
temp = pathlib.PosixPath # Save the original state
|
116 |
+
pathlib.PosixPath = pathlib.WindowsPath # Change to WindowsPath temporarily
|
117 |
|
118 |
model_path = Path(r'model/export')
|
119 |
learner = load_learner(model_path)
|
|
|
131 |
predicted_class_str = str(prediction_class)
|
132 |
|
133 |
predictions.append(predicted_class_str)
|
|
|
|
|
134 |
|
135 |
return predictions
|
136 |
|
137 |
except Exception as e:
|
138 |
return {"error in make_predictions": str(e)}
|
139 |
|
140 |
+
finally:
|
141 |
+
pathlib.PosixPath = temp
|
142 |
|
143 |
|
144 |
@app.route('/predict/<filenames>', methods=['GET', 'POST'])
|
|
|
171 |
|
172 |
if os.path.exists(file_path):
|
173 |
# Call make_predictions automatically
|
174 |
+
prediction_result = make_predictions([file_path]) # Pass file_path as a list
|
175 |
+
prediction_results.append(prediction_result[0]) # Append only the first prediction result
|
|
|
|
|
|
|
|
|
176 |
prediction_results_copy = copy.deepcopy(prediction_results)
|
177 |
|
178 |
non_receipt_indices = []
|
|
|
186 |
if os.path.exists(file_to_remove):
|
187 |
os.remove(file_to_remove)
|
188 |
|
189 |
+
|
190 |
return render_template('extractor.html', index_url=index_url, image_paths=image_paths, prediction_results = prediction_results, predictions=dict(zip(image_paths, prediction_results_copy)))
|
191 |
|
192 |
|
193 |
|
194 |
+
@app.route('/get_inference_image')
|
195 |
+
def get_inference_image():
|
196 |
+
# Assuming the new image is stored in the 'inferenced' folder with the name 'temp_inference.jpg'
|
197 |
+
inferenced_image = 'static/temp/inferenced/temp_inference.jpg'
|
198 |
+
return jsonify(updatedImagePath=inferenced_image), 200 # Return the image path with a 200 status code
|
199 |
|
200 |
|
201 |
def process_images(model_path: str, images_path: str) -> None:
|
|
|
205 |
inference_batch = prepare_batch_for_inference(images_path)
|
206 |
context = {"model_dir": model_path}
|
207 |
handle(inference_batch, context)
|
208 |
+
except Exception as e:
|
209 |
+
print("No Internet connection.")
|
210 |
os.makedirs('log', exist_ok=True)
|
211 |
logging.basicConfig(filename='log/error_output.log', level=logging.ERROR,
|
212 |
format='%(asctime)s %(levelname)s %(name)s %(message)s')
|
213 |
logger = logging.getLogger(__name__)
|
214 |
logger.error(err)
|
215 |
+
return redirect(url_for('index'))
|
216 |
|
217 |
@app.route('/run_inference', methods=['GET'])
|
218 |
def run_inference():
|
|
|
337 |
|
338 |
except Exception as e:
|
339 |
print(f"An error occurred in create_csv: {str(e)}")
|
340 |
+
return render_template('extractor.html', error_message=str(e))
|
341 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
342 |
|
343 |
@app.route('/get_data')
|
344 |
def get_data():
|
|
|
358 |
return jsonify({"error": f"Download failed: {str(e)}"})
|
359 |
|
360 |
|
|
|
361 |
if __name__ == '__main__':
|
362 |
app.run(debug=True)
|
templates/extractor.html
CHANGED
@@ -223,6 +223,10 @@
|
|
223 |
|
224 |
|
225 |
function updateTable(data) {
|
|
|
|
|
|
|
|
|
226 |
Papa.parse(data, {
|
227 |
header: true,
|
228 |
skipEmptyLines: true,
|
@@ -230,37 +234,60 @@
|
|
230 |
const tbody = document.querySelector('#dataTable tbody');
|
231 |
tbody.innerHTML = ''; // Clear existing rows
|
232 |
|
233 |
-
results.data.
|
234 |
-
const RECEIPTNUMBER = row['RECEIPTNUMBER'] || '';
|
235 |
-
const MERCHANTNAME = row['MERCHANTNAME'] || '';
|
236 |
-
const MERCHANTADDRESS = row['MERCHANTADDRESS'] || '';
|
237 |
-
const TRANSACTIONDATE = row['TRANSACTIONDATE'] || '';
|
238 |
-
const TRANSACTIONTIME = row['TRANSACTIONTIME'] || '';
|
239 |
-
const ITEMS = row['ITEMS'] || '';
|
240 |
-
const PRICE = row['PRICE'] || '';
|
241 |
-
const TOTAL = row['TOTAL'] || '';
|
242 |
-
const VATTAX = row['VATTAX'] || '';
|
243 |
-
|
244 |
const tr = document.createElement('tr');
|
245 |
-
|
246 |
-
|
247 |
-
|
248 |
-
|
249 |
-
|
250 |
-
|
251 |
-
|
252 |
-
|
253 |
-
|
254 |
-
|
255 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
256 |
tbody.appendChild(tr);
|
257 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
258 |
}
|
259 |
});
|
260 |
}
|
261 |
-
|
262 |
});
|
263 |
|
|
|
264 |
document.querySelector('#downloadButton').addEventListener('click', function (event) {
|
265 |
event.preventDefault(); // Prevent the default action
|
266 |
|
|
|
223 |
|
224 |
|
225 |
function updateTable(data) {
|
226 |
+
if (!data || data.trim() === '') {
|
227 |
+
return 'Error extracting the information: data is empty';
|
228 |
+
}
|
229 |
+
|
230 |
Papa.parse(data, {
|
231 |
header: true,
|
232 |
skipEmptyLines: true,
|
|
|
234 |
const tbody = document.querySelector('#dataTable tbody');
|
235 |
tbody.innerHTML = ''; // Clear existing rows
|
236 |
|
237 |
+
if (results.data.length === 0) {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
238 |
const tr = document.createElement('tr');
|
239 |
+
const td = document.createElement('td');
|
240 |
+
td.colSpan = 9;
|
241 |
+
td.style.textAlign = 'center'; // Center the content
|
242 |
+
td.style.fontSize = '1.2em'; // Increase the font size
|
243 |
+
td.style.height = '100px'; // Set the height of the row
|
244 |
+
td.style.verticalAlign = 'middle'; // Vertically center the content
|
245 |
+
|
246 |
+
const icon = document.createElement('i');
|
247 |
+
icon.className = 'fas fa-exclamation-triangle'; // Replace with the class of your error icon
|
248 |
+
td.appendChild(icon);
|
249 |
+
|
250 |
+
const br = document.createElement('br'); // Create a line break element
|
251 |
+
td.appendChild(br); // Append the line break element
|
252 |
+
|
253 |
+
const text = document.createTextNode(' An error occurred in extracting the data. Please contact the developers.');
|
254 |
+
td.appendChild(text);
|
255 |
+
|
256 |
+
tr.appendChild(td);
|
257 |
tbody.appendChild(tr);
|
258 |
+
} else {
|
259 |
+
results.data.forEach(row => {
|
260 |
+
const RECEIPTNUMBER = row['RECEIPTNUMBER'] || '';
|
261 |
+
const MERCHANTNAME = row['MERCHANTNAME'] || '';
|
262 |
+
const MERCHANTADDRESS = row['MERCHANTADDRESS'] || '';
|
263 |
+
const TRANSACTIONDATE = row['TRANSACTIONDATE'] || '';
|
264 |
+
const TRANSACTIONTIME = row['TRANSACTIONTIME'] || '';
|
265 |
+
const ITEMS = row['ITEMS'] || '';
|
266 |
+
const PRICE = row['PRICE'] || '';
|
267 |
+
const TOTAL = row['TOTAL'] || '';
|
268 |
+
const VATTAX = row['VATTAX'] || '';
|
269 |
+
|
270 |
+
const tr = document.createElement('tr');
|
271 |
+
tr.innerHTML = `
|
272 |
+
<td contenteditable="true">${RECEIPTNUMBER}</td>
|
273 |
+
<td contenteditable="true">${MERCHANTNAME}</td>
|
274 |
+
<td contenteditable="true">${MERCHANTADDRESS}</td>
|
275 |
+
<td contenteditable="true">${TRANSACTIONDATE}</td>
|
276 |
+
<td contenteditable="true">${TRANSACTIONTIME}</td>
|
277 |
+
<td contenteditable="true">${ITEMS}</td>
|
278 |
+
<td contenteditable="true">${PRICE}</td>
|
279 |
+
<td contenteditable="true">${TOTAL}</td>
|
280 |
+
<td contenteditable="true">${VATTAX}</td>
|
281 |
+
`;
|
282 |
+
tbody.appendChild(tr);
|
283 |
+
});
|
284 |
+
}
|
285 |
}
|
286 |
});
|
287 |
}
|
|
|
288 |
});
|
289 |
|
290 |
+
|
291 |
document.querySelector('#downloadButton').addEventListener('click', function (event) {
|
292 |
event.preventDefault(); // Prevent the default action
|
293 |
|