Spaces:

eniafou
/

FactureOCR

Runtime error

App Files Files Community

Soufiane commited on Apr 25, 2024

Commit

8a6a4ae

1 Parent(s): 8565879

added table extraction

Browse files

Files changed (18) hide show

app.py +6 -7
invoice.py +21 -10
invoices/facture.png +0 -0
invoices/facture1.png +0 -0
invoices/facture2.webp +0 -0
invoices/facture3.png +0 -0
invoices/facture4.webp +0 -0
invoices/facture5.png +0 -0
invoices/facture6.jpg +0 -0
invoices/pdf/facture.pdf +0 -0
invoices/pdf/facture1.pdf +0 -0
invoices/pdf/facture2.pdf +0 -0
invoices/pdf/releve de compte.pdf +0 -0
requirements.txt +11 -0
tables/Fgwf1.png +0 -0
tables/article with tables.pdf +0 -0
tables/facture_table.png +0 -0
utils.py +140 -0

app.py CHANGED Viewed

@@ -1,9 +1,9 @@
 import streamlit as st
-from PIL import Image, ImageOps
 import pandas as pd
 import numpy as np
 from invoice import extract_data, extract_tables, INVOICE
-import cv2
@@ -21,8 +21,9 @@ def main():
         st.image(uploaded_image, caption="Uploaded Image", use_column_width=True)
         lang = st.selectbox("Select Language", ["french", "english", "arabic"])
-        # UI for adding elements to extract list
         st.write("Add elements to extract:")
         extract_input = st.text_input("Add elements")
         extract_list = st.session_state.get("extract_list", INVOICE)
@@ -36,8 +37,7 @@ def main():
             st.write(f"`{item}`", unsafe_allow_html=True)
         if st.button("Extract information"):
-            pil_image = Image.open(uploaded_image).convert('RGB')
-            numpy_image = np.array(pil_image)
             image_info = process_image(lang, extract_list, numpy_image)
             df = pd.DataFrame(list(image_info.items()), columns=["Field", "Value"])
@@ -45,8 +45,7 @@ def main():
             st.dataframe(df)
         if st.button("Extract Tables"):
-            df = pd.DataFrame([])
-            csv = df.to_csv(index=False, header=False)
             st.download_button(label="Download CSV", data=csv, file_name='data.csv', mime='text/csv')

 import streamlit as st
+from PIL import Image
 import pandas as pd
 import numpy as np
 from invoice import extract_data, extract_tables, INVOICE
         st.image(uploaded_image, caption="Uploaded Image", use_column_width=True)
         lang = st.selectbox("Select Language", ["french", "english", "arabic"])
+        pil_image = Image.open(uploaded_image).convert('RGB')
+        numpy_image = np.array(pil_image)
         st.write("Add elements to extract:")
         extract_input = st.text_input("Add elements")
         extract_list = st.session_state.get("extract_list", INVOICE)
             st.write(f"`{item}`", unsafe_allow_html=True)
         if st.button("Extract information"):
             image_info = process_image(lang, extract_list, numpy_image)
             df = pd.DataFrame(list(image_info.items()), columns=["Field", "Value"])
             st.dataframe(df)
         if st.button("Extract Tables"):
+            csv = extract_tables(lang, numpy_image)
             st.download_button(label="Download CSV", data=csv, file_name='data.csv', mime='text/csv')

invoice.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import cv2
 from PIL import Image
-# from ultralyticsplus import YOLO
-# from transformers import pipeline
 import pandas as pd
 import numpy as np
 import easyocr
@@ -9,13 +9,13 @@ from utils import *
 INVOICE = ["Numéro de facture", "Date", "Numéro de commande", "Echéance", "Total"]
-# model = YOLO('keremberke/yolov8s-table-extraction')
-# model.overrides['conf'] = 0.25  # NMS confidence threshold
-# model.overrides['iou'] = 0.45  # NMS IoU threshold
-# model.overrides['agnostic_nms'] = False  # NMS class-agnostic
-# model.overrides['max_det'] = 1000  # maximum number of detections per image
-# pipe = pipeline("object-detection", model="bilguun/table-transformer-structure-recognition")
 def detect_tables(image):
@@ -94,13 +94,24 @@ def intersection(box1, box2):
     return {'xmin': xmin, 'ymin': ymin, 'xmax': xmax, 'ymax': ymax}
 def extract_tables(lang, image):
   reader = easyocr.Reader([langs[lang]])
   tables = detect_tables(image)
   for i in range(len(tables)):
     df = rec_table(tables[i], reader)
-    df.to_excel(f'table_{i+1}.xlsx', index=False, header=False)
 if __name__ == '__main__':
     lang = "french"

 import cv2
 from PIL import Image
+from ultralyticsplus import YOLO
+from transformers import pipeline
 import pandas as pd
 import numpy as np
 import easyocr
 INVOICE = ["Numéro de facture", "Date", "Numéro de commande", "Echéance", "Total"]
+model = YOLO('keremberke/yolov8s-table-extraction')
+model.overrides['conf'] = 0.25  # NMS confidence threshold
+model.overrides['iou'] = 0.45  # NMS IoU threshold
+model.overrides['agnostic_nms'] = False  # NMS class-agnostic
+model.overrides['max_det'] = 1000  # maximum number of detections per image
+pipe = pipeline("object-detection", model="bilguun/table-transformer-structure-recognition")
 def detect_tables(image):
     return {'xmin': xmin, 'ymin': ymin, 'xmax': xmax, 'ymax': ymax}
+# def extract_tables(lang, image):
+#   reader = easyocr.Reader([langs[lang]])
+#   tables = detect_tables(image)
+#   for i in range(len(tables)):
+#     df = rec_table(tables[i], reader)
+#     df.to_excel(f'table_{i+1}.xlsx', index=False, header=False)
 def extract_tables(lang, image):
   reader = easyocr.Reader([langs[lang]])
   tables = detect_tables(image)
+  csvs = []
   for i in range(len(tables)):
     df = rec_table(tables[i], reader)
+    csv = df.to_csv(index=False, header=False)
+    csvs.append(csvs)
+  return csvs[0]
 if __name__ == '__main__':
     lang = "french"

invoices/facture.png ADDED Viewed

invoices/facture1.png ADDED Viewed

invoices/facture2.webp ADDED Viewed

invoices/facture3.png ADDED Viewed

invoices/facture4.webp ADDED Viewed

invoices/facture5.png ADDED Viewed

invoices/facture6.jpg ADDED Viewed

invoices/pdf/facture.pdf ADDED Viewed

Binary file (104 kB). View file

invoices/pdf/facture1.pdf ADDED Viewed

Binary file (37.9 kB). View file

invoices/pdf/facture2.pdf ADDED Viewed

Binary file (24 kB). View file

invoices/pdf/releve de compte.pdf ADDED Viewed

Binary file (42.7 kB). View file

requirements.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+easyocr
+json
+opencv-python
+google-generativeai
+streamlit
+Pillow
+pandas
+numpy
+base64
+ultralyticsplus
+transformers

tables/Fgwf1.png ADDED Viewed

tables/article with tables.pdf ADDED Viewed

Binary file (296 kB). View file

tables/facture_table.png ADDED Viewed

utils.py ADDED Viewed

	@@ -0,0 +1,140 @@

+import easyocr
+import json
+import re
+import cv2
+import easyocr
+import google.generativeai as genai
+langs = {'french': 'fr', 'english': 'en', 'arabic': 'ar'}
+GOOGLE_API_KEY = "AIzaSyC6NXwTrucSl2JkY23YWsucFZPMBDoaqJw"
+genai.configure(api_key=GOOGLE_API_KEY)
+model = genai.GenerativeModel('gemini-pro')
+def get_ocr(image,reader):
+  result = reader.readtext(image)
+  return result
+def get_input(result, sep = " "):
+    a = {}
+    for (bbox, text, prob) in result:
+        if prob<0.3:
+            continue
+        k = True
+        for row in a:
+            if abs(bbox[0][1] - row)<=5:
+                k = False
+                a[row]+= sep
+                a[row]+= text
+        if k:
+            a[bbox[0][1]] = text
+    inputt = ""
+    for row in a:
+        inputt+= a[row]
+        inputt+= "\n"
+    return inputt
+def imp_ocr(result, image, reader):
+  v_exp_rate = 0.2
+  h_exp_rate = 0.3
+  imp_result = []
+  for i in range(len(result)):
+    prob = result[i][2]
+    if prob < 0.1:
+      continue
+    if prob < 0.9:
+      bbox = result[i][0]
+      x = int(bbox[0][0] - (bbox[2][0] - bbox[0][0])*h_exp_rate/2)
+      x_h = int(bbox[2][0] + (bbox[2][0] - bbox[0][0])*h_exp_rate/2)
+      y = int(bbox[0][1] - (bbox[2][1] - bbox[0][1])*v_exp_rate/2)
+      y_h =  int(bbox[2][1] + (bbox[2][1] - bbox[0][1])*v_exp_rate/2)
+      x = max(x, 0)
+      x_h = min(x_h, image.shape[1])
+      y = max(y, 0)
+      y_h = min(y_h, image.shape[0])
+      sub_img = image[y:y_h, x:x_h]
+      res = get_ocr(sub_img,reader)
+      if not res:
+        imp_result.append(result[i])
+        continue
+      if len(res)>1:
+        res = sorted(res, key=lambda x: x[2], reverse=True)
+      if res[0][2] >= prob:
+        imp_result.append((result[i][0], res[0][1], res[0][2]))
+      else:
+        imp_result.append(result[i])
+    else:
+      imp_result.append(result[i])
+  return imp_result
+def extract_data(lang, to_be_extracted, image):
+   reader = easyocr.Reader([langs[lang]])
+   ocr_result = get_ocr(image,reader)
+   imp_result = imp_ocr(ocr_result, image, reader)
+   inputt = get_input(imp_result, sep = "     ")
+   return get_output(inputt, to_be_extracted, lang)
+def get_output(inputt, to_be_extracted, lang):
+    prompt = f"""
+    Bellow is the ouptut text of an OCR system. The text is in {lang}.
+    Your job is:
+    1. Format the output in a python dictionary format with the following keys :
+    {to_be_extracted}.
+    2. correct any mistakes such as date formating (should be dd/mm/yyyy) or spelling mistakes (this is important).
+    here is your input:
+    {inputt}
+    """
+    response = model.generate_content(prompt)
+    data = extract_json(response.text)
+    return data
+def extract_json(text_response):
+    # This pattern matches a string that starts with '{' and ends with '}'
+    pattern = r'\{[^{}]*\}'
+    matches = re.finditer(pattern, text_response)
+    json_objects = []
+    for match in matches:
+        json_str = match.group(0)
+        try:
+            # Validate if the extracted string is valid JSON
+            json_obj = eval(json_str)
+            json_objects.append(json_obj)
+        except json.JSONDecodeError:
+            # Extend the search for nested structures
+            extended_json_str = extend_search(text_response, match.span())
+            try:
+                json_obj = eval(extended_json_str)
+                json_objects.append(json_obj)
+            except json.JSONDecodeError:
+                # Handle cases where the extraction is not valid JSON
+                continue
+    if json_objects:
+        return json_objects[0]
+    else:
+        return {}
+def extend_search(text, span):
+    # Extend the search to try to capture nested structures
+    start, end = span
+    nest_count = 0
+    for i in range(start, len(text)):
+        if text[i] == '{':
+            nest_count += 1
+        elif text[i] == '}':
+            nest_count -= 1
+            if nest_count == 0:
+                return text[start:i+1]
+    return text[start:end]