File size: 3,301 Bytes
8565879
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import cv2
from PIL import Image
# from ultralyticsplus import YOLO
# from transformers import pipeline
import pandas as pd
import numpy as np
import easyocr
from utils import *

INVOICE = ["Numéro de facture", "Date", "Numéro de commande", "Echéance", "Total"]

# model = YOLO('keremberke/yolov8s-table-extraction')
# model.overrides['conf'] = 0.25  # NMS confidence threshold
# model.overrides['iou'] = 0.45  # NMS IoU threshold
# model.overrides['agnostic_nms'] = False  # NMS class-agnostic
# model.overrides['max_det'] = 1000  # maximum number of detections per image

# pipe = pipeline("object-detection", model="bilguun/table-transformer-structure-recognition")


def detect_tables(image):
  # image is an np array
  results = model.predict(image)
  

  result = results[0]
  xyxy = result.boxes.xyxy
  scores = result.boxes.conf
  tables = []
  for i in range(len(scores)):
    if scores[i] >= 0.5:
      table = image[int(xyxy[i,1]):int(xyxy[i,3]), int(xyxy[i,0]):int(xyxy[i,2])]
      table = Image.fromarray(table)
      tables.append(table)
  return tables

def insert(el, listt, pos):
  if not listt:
      listt.append(el)
  else:
      inserted = False
      for i in range(len(listt)):
          if el[pos] <= listt[i][pos]:
              listt.insert(i, el)
              inserted = True
              break
      if not inserted:
          listt.append(el)

def rec_table(table, reader):
  col_row = pipe(table)
  cols = []
  rows = []
  for el in col_row:
    if el["label"] == 'table column':
      insert(el["box"], cols, pos = "xmin")
    elif el["label"] == 'table row':
      insert(el["box"], rows, pos = "ymin")

  table = np.array(table)

  csv = []
  for row in rows:
    temp = []
    for col in cols:
      box = intersection(row, col)
      cell = table[box['ymin']:box['ymax'], box['xmin']:box['xmax']]
      res = get_ocr(cell,reader)
      temp.append(get_input(res))
    csv.append(temp)
  
  df = pd.DataFrame(csv)
  return df


def intersection(box1, box2):
    # Extract coordinates of first bounding box
    x1min, y1min, x1max, y1max = box1['xmin'], box1['ymin'], box1['xmax'], box1['ymax']

    # Extract coordinates of second bounding box
    x2min, y2min, x2max, y2max = box2['xmin'], box2['ymin'], box2['xmax'], box2['ymax']

    # Calculate coordinates of intersection
    xmin = max(x1min, x2min)
    ymin = max(y1min, y2min)
    xmax = min(x1max, x2max)
    ymax = min(y1max, y2max)

    # Check if there is no intersection
    if xmin >= xmax or ymin >= ymax:
        return None

    # Return the coordinates of the intersection
    return {'xmin': xmin, 'ymin': ymin, 'xmax': xmax, 'ymax': ymax}


def extract_tables(lang, image):
  reader = easyocr.Reader([langs[lang]])
  tables = detect_tables(image)

  for i in range(len(tables)):
    df = rec_table(tables[i], reader)
    df.to_excel(f'table_{i+1}.xlsx', index=False, header=False)

if __name__ == '__main__':
    lang = "french"
    to_be_extracted = INVOICE
    image_path = "./docs for ocr/invoices/facture.png"
    image = cv2.imread(image_path)
    print(image.shape)
    
    text_data = extract_data(lang, to_be_extracted, image)
    print(text_data)

    # extract_tables(lang, image) # extract tables from the image and download them in excel format to the current directory