Soufiane commited on
Commit
8565879
1 Parent(s): 1ce10d7
Files changed (3) hide show
  1. app.py +57 -0
  2. doc.py +64 -0
  3. invoice.py +116 -0
app.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from PIL import Image, ImageOps
3
+ import pandas as pd
4
+ import numpy as np
5
+ from invoice import extract_data, extract_tables, INVOICE
6
+ import cv2
7
+
8
+
9
+
10
+ def process_image(lang, to_be_extracted, input_image):
11
+ data = extract_data(lang, to_be_extracted, input_image)
12
+ return data
13
+
14
+ def main():
15
+ st.title("Image Processing App")
16
+ st.write("Upload an image and click the 'Process Image' button to process it.")
17
+ uploaded_image = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png","webp"])
18
+
19
+ if uploaded_image is not None:
20
+ # Display the uploaded image
21
+ st.image(uploaded_image, caption="Uploaded Image", use_column_width=True)
22
+
23
+ lang = st.selectbox("Select Language", ["french", "english", "arabic"])
24
+
25
+ # UI for adding elements to extract list
26
+ st.write("Add elements to extract:")
27
+ extract_input = st.text_input("Add elements")
28
+ extract_list = st.session_state.get("extract_list", INVOICE)
29
+ if extract_input:
30
+ extract_list.append(extract_input.strip())
31
+ st.session_state["extract_list"] = extract_list
32
+
33
+ # Display the extract list as chips
34
+ st.write("Elements to extract:")
35
+ for item in extract_list:
36
+ st.write(f"`{item}`", unsafe_allow_html=True)
37
+
38
+ if st.button("Extract information"):
39
+ pil_image = Image.open(uploaded_image).convert('RGB')
40
+ numpy_image = np.array(pil_image)
41
+ image_info = process_image(lang, extract_list, numpy_image)
42
+
43
+ df = pd.DataFrame(list(image_info.items()), columns=["Field", "Value"])
44
+ st.write("Extracted information:")
45
+ st.dataframe(df)
46
+
47
+ if st.button("Extract Tables"):
48
+ df = pd.DataFrame([])
49
+ csv = df.to_csv(index=False, header=False)
50
+ st.download_button(label="Download CSV", data=csv, file_name='data.csv', mime='text/csv')
51
+
52
+
53
+ else:
54
+ st.session_state['extract_list'] = INVOICE
55
+
56
+ if __name__ == "__main__":
57
+ main()
doc.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import base64
3
+ from utils import *
4
+
5
+ CIN = ["Nom complet", "Date de naissance", "Date de validité", "Lieu de naissance", "Numéro CIN"]
6
+
7
+
8
+
9
+ def extract_face(image, scale_factor=1.2):
10
+ # Load the pre-trained face detector
11
+ face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_alt.xml')
12
+
13
+
14
+ # Convert the image to grayscale
15
+ gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
16
+
17
+ # Detect faces in the image
18
+ faces = face_cascade.detectMultiScale(gray, scaleFactor=scale_factor, minNeighbors=5, minSize=(30, 30))
19
+ print(len(faces))
20
+ if len(faces) == 0:
21
+ print("No faces found")
22
+ return None
23
+
24
+ # Assume only one face in the image for simplicity
25
+ (x, y, w, h) = max(faces, key=lambda x: x[2])
26
+
27
+ # Expand the bounding box to include a slightly larger region
28
+ x -= int(0.1 * w)
29
+ y -= int(0.1 * h)
30
+ w += int(0.2 * w)
31
+ h += int(0.2 * h)
32
+
33
+ # Ensure the coordinates are within the image boundaries
34
+ x = max(x, 0)
35
+ y = max(y, 0)
36
+ w = min(w, image.shape[1])
37
+ h = min(h, image.shape[0])
38
+
39
+ # Extract the face region from the image
40
+ face = image[y:y+h, x:x+w]
41
+
42
+ # Convert the face to base64 string
43
+ _, encoded_image = cv2.imencode('.jpg', face)
44
+ encoded_image_str = base64.b64encode(encoded_image).decode('utf-8')
45
+
46
+ # Create a dictionary to store image information
47
+ image_info = {
48
+ "width": face.shape[1],
49
+ "height": face.shape[0],
50
+ "data": encoded_image_str
51
+ }
52
+
53
+ return image_info
54
+
55
+
56
+ if __name__ == '__main__':
57
+ lang = "french"
58
+ to_be_extracted = CIN
59
+ image_path = "./docs for ocr/CIN 2.png"
60
+ image = cv2.imread(image_path)
61
+
62
+ text_data = extract_data(lang, to_be_extracted, image)
63
+ print(text_data)
64
+ face_data = extract_face(image)
invoice.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ from PIL import Image
3
+ # from ultralyticsplus import YOLO
4
+ # from transformers import pipeline
5
+ import pandas as pd
6
+ import numpy as np
7
+ import easyocr
8
+ from utils import *
9
+
10
+ INVOICE = ["Numéro de facture", "Date", "Numéro de commande", "Echéance", "Total"]
11
+
12
+ # model = YOLO('keremberke/yolov8s-table-extraction')
13
+ # model.overrides['conf'] = 0.25 # NMS confidence threshold
14
+ # model.overrides['iou'] = 0.45 # NMS IoU threshold
15
+ # model.overrides['agnostic_nms'] = False # NMS class-agnostic
16
+ # model.overrides['max_det'] = 1000 # maximum number of detections per image
17
+
18
+ # pipe = pipeline("object-detection", model="bilguun/table-transformer-structure-recognition")
19
+
20
+
21
+ def detect_tables(image):
22
+ # image is an np array
23
+ results = model.predict(image)
24
+
25
+
26
+ result = results[0]
27
+ xyxy = result.boxes.xyxy
28
+ scores = result.boxes.conf
29
+ tables = []
30
+ for i in range(len(scores)):
31
+ if scores[i] >= 0.5:
32
+ table = image[int(xyxy[i,1]):int(xyxy[i,3]), int(xyxy[i,0]):int(xyxy[i,2])]
33
+ table = Image.fromarray(table)
34
+ tables.append(table)
35
+ return tables
36
+
37
+ def insert(el, listt, pos):
38
+ if not listt:
39
+ listt.append(el)
40
+ else:
41
+ inserted = False
42
+ for i in range(len(listt)):
43
+ if el[pos] <= listt[i][pos]:
44
+ listt.insert(i, el)
45
+ inserted = True
46
+ break
47
+ if not inserted:
48
+ listt.append(el)
49
+
50
+ def rec_table(table, reader):
51
+ col_row = pipe(table)
52
+ cols = []
53
+ rows = []
54
+ for el in col_row:
55
+ if el["label"] == 'table column':
56
+ insert(el["box"], cols, pos = "xmin")
57
+ elif el["label"] == 'table row':
58
+ insert(el["box"], rows, pos = "ymin")
59
+
60
+ table = np.array(table)
61
+
62
+ csv = []
63
+ for row in rows:
64
+ temp = []
65
+ for col in cols:
66
+ box = intersection(row, col)
67
+ cell = table[box['ymin']:box['ymax'], box['xmin']:box['xmax']]
68
+ res = get_ocr(cell,reader)
69
+ temp.append(get_input(res))
70
+ csv.append(temp)
71
+
72
+ df = pd.DataFrame(csv)
73
+ return df
74
+
75
+
76
+ def intersection(box1, box2):
77
+ # Extract coordinates of first bounding box
78
+ x1min, y1min, x1max, y1max = box1['xmin'], box1['ymin'], box1['xmax'], box1['ymax']
79
+
80
+ # Extract coordinates of second bounding box
81
+ x2min, y2min, x2max, y2max = box2['xmin'], box2['ymin'], box2['xmax'], box2['ymax']
82
+
83
+ # Calculate coordinates of intersection
84
+ xmin = max(x1min, x2min)
85
+ ymin = max(y1min, y2min)
86
+ xmax = min(x1max, x2max)
87
+ ymax = min(y1max, y2max)
88
+
89
+ # Check if there is no intersection
90
+ if xmin >= xmax or ymin >= ymax:
91
+ return None
92
+
93
+ # Return the coordinates of the intersection
94
+ return {'xmin': xmin, 'ymin': ymin, 'xmax': xmax, 'ymax': ymax}
95
+
96
+
97
+ def extract_tables(lang, image):
98
+ reader = easyocr.Reader([langs[lang]])
99
+ tables = detect_tables(image)
100
+
101
+ for i in range(len(tables)):
102
+ df = rec_table(tables[i], reader)
103
+ df.to_excel(f'table_{i+1}.xlsx', index=False, header=False)
104
+
105
+ if __name__ == '__main__':
106
+ lang = "french"
107
+ to_be_extracted = INVOICE
108
+ image_path = "./docs for ocr/invoices/facture.png"
109
+ image = cv2.imread(image_path)
110
+ print(image.shape)
111
+
112
+ text_data = extract_data(lang, to_be_extracted, image)
113
+ print(text_data)
114
+
115
+ # extract_tables(lang, image) # extract tables from the image and download them in excel format to the current directory
116
+