Abijith commited on
Commit
714cc46
1 Parent(s): f9df96e

Upload 2 files

Browse files
Files changed (2) hide show
  1. datatypes/config.py +13 -0
  2. datatypes/datatypes.py +53 -0
datatypes/config.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ # config values
4
+
5
+ Config = {"table_detection_padding_pixel":10, 'table_recognition_padding_pixel':5,
6
+ 'table_detection_threshold':0.7, 'table_recognition_threshold':0.8,
7
+ 'table_padd': 20, 'row_padd':6, 'cell_padd':3,
8
+ }
9
+
10
+ tesseract_config = {'tesseractpath':'C://Program Files//Tesseract-OCR//tesseract.exe'}
11
+
12
+ model_config = {'detection_model_path':'D:/Table-detection/models/detection-model',
13
+ 'recognition_model_path':'D:/Table-detection/models/recognition-model'}
datatypes/datatypes.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from dataclasses import dataclass, field
3
+ from enum import Enum
4
+ from typing import List, Dict
5
+
6
+ class DetectionLabels(Enum):
7
+ table = 0
8
+ table_column = 1
9
+ table_row = 2
10
+ table_column_header = 3
11
+ table_projected_row_header = 4
12
+ table_spanning_cell = 5
13
+
14
+ class ExtractionContext(Enum):
15
+ document = 1
16
+ table = 2
17
+ row = 3
18
+
19
+ @dataclass
20
+ class Cell:
21
+ cellindex : int = 0
22
+ value : str = ''
23
+ prob : float = 0.5
24
+
25
+ @dataclass
26
+ class Row:
27
+ rowindex : int = 0
28
+ extracted_cells : List[Cell]= field(default_factory=lambda: [])
29
+
30
+ @dataclass
31
+ class TableRecognitionData:
32
+ scores : List = field(default_factory=lambda: [])
33
+ labels : List = field(default_factory=lambda: [])
34
+ boxes : List = field(default_factory=lambda: [])
35
+
36
+ @dataclass
37
+ class TableRecognitionOrdered:
38
+ recognized_row : List = field(default_factory=lambda: [])
39
+ recognized_column : List = field(default_factory=lambda: [])
40
+
41
+ @dataclass
42
+ class TableDetectionData:
43
+ detection_score : float = 0.0
44
+ detection_label : int = 0
45
+ detection_box : List = field(default_factory=lambda: [])
46
+ recognitiondata : TableRecognitionData = field(default_factory=lambda: [])
47
+ ordered_recognitiondata : List[TableRecognitionOrdered] = field(default_factory=lambda: [])
48
+ extracted_rows : List[Row] = field(default_factory=lambda: [])
49
+
50
+
51
+ @dataclass
52
+ class ImageData:
53
+ tables: List[TableDetectionData]