napatswift commited on
Commit
b7f49b8
·
1 Parent(s): 15e4f3a

Init project

Browse files
Dockerfile ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9
2
+
3
+ WORKDIR /code
4
+
5
+ RUN apt-get update && apt-get install ffmpeg libsm6 libxext6 -y
6
+
7
+ COPY ./requirements.txt /code/requirements.txt
8
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
9
+
10
+ RUN pip install -U openmim
11
+ RUN mim install mmengine
12
+ RUN mim install mmcv
13
+ RUN mim install mmdet
14
+ RUN mim install mmocr
15
+
16
+ # Set up a new user named "user" with user ID 1000
17
+ RUN useradd -m -u 1000 user
18
+ # Switch to the "user" user
19
+ USER user
20
+ # Set home to the user's home directory
21
+ ENV HOME=/home/user \
22
+ PATH=/home/user/.local/bin:$PATH
23
+
24
+ # Set the working directory to the user's home directory
25
+ WORKDIR $HOME/app
26
+
27
+ # Copy the current directory contents into the container at $HOME/app setting the owner to the user
28
+ COPY --chown=user . $HOME/app
29
+ RUN ls
30
+
31
+ CMD ["python", "main.py"]
main.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from mmdet.apis import init_detector, inference_detector
2
+ import gradio as gr
3
+ import cv2
4
+ import sys
5
+ import torch
6
+ import numpy as np
7
+
8
+ print('Loading model...')
9
+ device = 'gpu' if torch.cuda.is_available() else 'cpu'
10
+
11
+ table_det = init_detector('model/table-det/config.py',
12
+ 'model/table-det/model.pth', device=device)
13
+ def get_corners(points):
14
+ """
15
+ Returns the top-left, top-right, bottom-right, and bottom-left corners
16
+ of a rectangle defined by a list of four points in the form of tuples.
17
+ """
18
+ # Sort points by x-coordinate
19
+ sorted_points = sorted(points, key=lambda p: p[0])
20
+
21
+ # Split sorted points into left and right halves
22
+ left_points = sorted_points[:2]
23
+ right_points = sorted_points[2:]
24
+
25
+ # Sort left and right points by y-coordinate
26
+ left_points = sorted(left_points, key=lambda p: p[1])
27
+ right_points = sorted(right_points, key=lambda p: p[1], reverse=True)
28
+
29
+ # Return corners in order: top-left, top-right, bottom-right, bottom-left
30
+ return (left_points[0], right_points[0], right_points[1], left_points[1])
31
+
32
+ def funct(mask_array):
33
+ table_images = []
34
+ table_bboxes = []
35
+ contours, hierarchy = cv2.findContours(mask_array, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
36
+ for cnt in contours:
37
+ rect = cv2.minAreaRect(cnt)
38
+ box = cv2.boxPoints(rect)
39
+ epsilon = cv2.arcLength(cnt,True)
40
+ approx = cv2.approxPolyDP(cnt, 0.02*epsilon, True)
41
+ points = np.squeeze(approx)
42
+ if len(points) != 4:
43
+ points = box
44
+ tl, br, bl, tr = get_corners(points.tolist())
45
+ table_bboxes.append([tl, tr, br, bl])
46
+ return table_bboxes
47
+
48
+
49
+ def predict(image_input):
50
+ results = inference_detector(table_det, image_input)
51
+ print(results)
52
+ return {'message': 'success'}
53
+
54
+ def run():
55
+ demo = gr.Interface(
56
+ fn=predict,
57
+ inputs=gr.components.Image(),
58
+ outputs=gr.JSON(),
59
+ )
60
+
61
+ demo.launch(server_name="0.0.0.0", server_port=7860)
62
+
63
+
64
+ if __name__ == "__main__":
65
+ run()
model/table-det/config.py ADDED
@@ -0,0 +1,318 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model = dict(
2
+ type='MaskRCNN',
3
+ data_preprocessor=dict(
4
+ type='DetDataPreprocessor',
5
+ mean=[123.675, 116.28, 103.53],
6
+ std=[58.395, 57.12, 57.375],
7
+ bgr_to_rgb=True,
8
+ pad_mask=True,
9
+ pad_size_divisor=32),
10
+ backbone=dict(
11
+ type='ResNet',
12
+ depth=50,
13
+ num_stages=4,
14
+ out_indices=(0, 1, 2, 3),
15
+ frozen_stages=1,
16
+ norm_cfg=dict(type='BN', requires_grad=True),
17
+ norm_eval=True,
18
+ style='pytorch',
19
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
20
+ neck=dict(
21
+ type='FPN',
22
+ in_channels=[256, 512, 1024, 2048],
23
+ out_channels=256,
24
+ num_outs=5),
25
+ rpn_head=dict(
26
+ type='RPNHead',
27
+ in_channels=256,
28
+ feat_channels=256,
29
+ anchor_generator=dict(
30
+ type='AnchorGenerator',
31
+ scales=[8],
32
+ ratios=[0.5, 1.0, 2.0],
33
+ strides=[4, 8, 16, 32, 64]),
34
+ bbox_coder=dict(
35
+ type='DeltaXYWHBBoxCoder',
36
+ target_means=[0.0, 0.0, 0.0, 0.0],
37
+ target_stds=[1.0, 1.0, 1.0, 1.0]),
38
+ loss_cls=dict(
39
+ type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
40
+ loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
41
+ roi_head=dict(
42
+ type='StandardRoIHead',
43
+ bbox_roi_extractor=dict(
44
+ type='SingleRoIExtractor',
45
+ roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
46
+ out_channels=256,
47
+ featmap_strides=[4, 8, 16, 32]),
48
+ bbox_head=dict(
49
+ type='Shared2FCBBoxHead',
50
+ in_channels=256,
51
+ fc_out_channels=1024,
52
+ roi_feat_size=7,
53
+ num_classes=1,
54
+ bbox_coder=dict(
55
+ type='DeltaXYWHBBoxCoder',
56
+ target_means=[0.0, 0.0, 0.0, 0.0],
57
+ target_stds=[0.1, 0.1, 0.2, 0.2]),
58
+ reg_class_agnostic=False,
59
+ loss_cls=dict(
60
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
61
+ loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
62
+ mask_roi_extractor=dict(
63
+ type='SingleRoIExtractor',
64
+ roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
65
+ out_channels=256,
66
+ featmap_strides=[4, 8, 16, 32]),
67
+ mask_head=dict(
68
+ type='FCNMaskHead',
69
+ num_convs=4,
70
+ in_channels=256,
71
+ conv_out_channels=256,
72
+ num_classes=1,
73
+ loss_mask=dict(
74
+ type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
75
+ train_cfg=dict(
76
+ rpn=dict(
77
+ assigner=dict(
78
+ type='MaxIoUAssigner',
79
+ pos_iou_thr=0.7,
80
+ neg_iou_thr=0.3,
81
+ min_pos_iou=0.3,
82
+ match_low_quality=True,
83
+ ignore_iof_thr=-1),
84
+ sampler=dict(
85
+ type='RandomSampler',
86
+ num=256,
87
+ pos_fraction=0.5,
88
+ neg_pos_ub=-1,
89
+ add_gt_as_proposals=False),
90
+ allowed_border=-1,
91
+ pos_weight=-1,
92
+ debug=False),
93
+ rpn_proposal=dict(
94
+ nms_pre=2000,
95
+ max_per_img=1000,
96
+ nms=dict(type='nms', iou_threshold=0.7),
97
+ min_bbox_size=0),
98
+ rcnn=dict(
99
+ assigner=dict(
100
+ type='MaxIoUAssigner',
101
+ pos_iou_thr=0.5,
102
+ neg_iou_thr=0.5,
103
+ min_pos_iou=0.5,
104
+ match_low_quality=True,
105
+ ignore_iof_thr=-1),
106
+ sampler=dict(
107
+ type='RandomSampler',
108
+ num=512,
109
+ pos_fraction=0.25,
110
+ neg_pos_ub=-1,
111
+ add_gt_as_proposals=True),
112
+ mask_size=28,
113
+ pos_weight=-1,
114
+ debug=False)),
115
+ test_cfg=dict(
116
+ rpn=dict(
117
+ nms_pre=1000,
118
+ max_per_img=1000,
119
+ nms=dict(type='nms', iou_threshold=0.7),
120
+ min_bbox_size=0),
121
+ rcnn=dict(
122
+ score_thr=0.05,
123
+ nms=dict(type='nms', iou_threshold=0.5),
124
+ max_per_img=100,
125
+ mask_thr_binary=0.5)))
126
+ backend_args = None
127
+ train_pipeline = [
128
+ dict(type='LoadImageFromFile', backend_args=None),
129
+ dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
130
+ dict(type='Resize', scale=(1333, 800), keep_ratio=True),
131
+ dict(type='Rotate', level=10),
132
+ dict(type='RandomFlip', prob=0.5),
133
+ dict(type='PackDetInputs')
134
+ ]
135
+ test_pipeline = [
136
+ dict(type='LoadImageFromFile', backend_args=None),
137
+ dict(type='Resize', scale=(1333, 800), keep_ratio=True),
138
+ dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
139
+ dict(
140
+ type='PackDetInputs',
141
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
142
+ 'scale_factor'))
143
+ ]
144
+ data_root = 'data/table-det-elect66/'
145
+ metainfo = dict(classes=('Table', ), palette=[(220, 20, 60)])
146
+ dataset_elect66 = dict(
147
+ type='CocoDataset',
148
+ data_root='data/table-det-elect66/',
149
+ ann_file='result.json',
150
+ data_prefix=dict(img=''),
151
+ metainfo=dict(classes=('Table', ), palette=[(220, 20, 60)]),
152
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
153
+ pipeline=[
154
+ dict(type='LoadImageFromFile', backend_args=None),
155
+ dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
156
+ dict(type='Resize', scale=(1333, 800), keep_ratio=True),
157
+ dict(type='Rotate', level=10),
158
+ dict(type='RandomFlip', prob=0.5),
159
+ dict(type='PackDetInputs')
160
+ ])
161
+ dataset_vote62 = dict(
162
+ type='CocoDataset',
163
+ data_root='data/table-det-740/',
164
+ ann_file='train_coco.json',
165
+ data_prefix=dict(img=''),
166
+ metainfo=dict(classes=('Table', ), palette=[(220, 20, 60)]),
167
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
168
+ pipeline=[
169
+ dict(type='LoadImageFromFile', backend_args=None),
170
+ dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
171
+ dict(type='Resize', scale=(1333, 800), keep_ratio=True),
172
+ dict(type='Rotate', level=10),
173
+ dict(type='RandomFlip', prob=0.5),
174
+ dict(type='PackDetInputs')
175
+ ])
176
+ train_dataloader = dict(
177
+ batch_size=2,
178
+ num_workers=2,
179
+ persistent_workers=True,
180
+ sampler=dict(type='DefaultSampler', shuffle=True),
181
+ batch_sampler=dict(type='AspectRatioBatchSampler'),
182
+ dataset=dict(
183
+ type='ConcatDataset',
184
+ datasets=[
185
+ dict(
186
+ type='CocoDataset',
187
+ data_root='data/table-det-elect66/',
188
+ ann_file='result.json',
189
+ data_prefix=dict(img=''),
190
+ metainfo=dict(classes=('Table', ), palette=[(220, 20, 60)]),
191
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
192
+ pipeline=[
193
+ dict(type='LoadImageFromFile', backend_args=None),
194
+ dict(
195
+ type='LoadAnnotations', with_bbox=True,
196
+ with_mask=True),
197
+ dict(type='Resize', scale=(1333, 800), keep_ratio=True),
198
+ dict(type='Rotate', level=10),
199
+ dict(type='RandomFlip', prob=0.5),
200
+ dict(type='PackDetInputs')
201
+ ]),
202
+ dict(
203
+ type='CocoDataset',
204
+ data_root='data/table-det-740/',
205
+ ann_file='train_coco.json',
206
+ data_prefix=dict(img=''),
207
+ metainfo=dict(classes=('Table', ), palette=[(220, 20, 60)]),
208
+ filter_cfg=dict(filter_empty_gt=True, min_size=32),
209
+ pipeline=[
210
+ dict(type='LoadImageFromFile', backend_args=None),
211
+ dict(
212
+ type='LoadAnnotations', with_bbox=True,
213
+ with_mask=True),
214
+ dict(type='Resize', scale=(1333, 800), keep_ratio=True),
215
+ dict(type='Rotate', level=10),
216
+ dict(type='RandomFlip', prob=0.5),
217
+ dict(type='PackDetInputs')
218
+ ])
219
+ ]))
220
+ val_dataloader = dict(
221
+ batch_size=1,
222
+ num_workers=2,
223
+ persistent_workers=True,
224
+ drop_last=False,
225
+ sampler=dict(type='DefaultSampler', shuffle=False),
226
+ dataset=dict(
227
+ type='CocoDataset',
228
+ data_root='data/table-det-elect66/',
229
+ ann_file='result.json',
230
+ data_prefix=dict(img=''),
231
+ test_mode=True,
232
+ pipeline=[
233
+ dict(type='LoadImageFromFile', backend_args=None),
234
+ dict(type='Resize', scale=(1333, 800), keep_ratio=True),
235
+ dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
236
+ dict(
237
+ type='PackDetInputs',
238
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
239
+ 'scale_factor'))
240
+ ],
241
+ backend_args=None,
242
+ metainfo=dict(classes=('Table', ), palette=[(220, 20, 60)])))
243
+ test_dataloader = dict(
244
+ batch_size=1,
245
+ num_workers=2,
246
+ persistent_workers=True,
247
+ drop_last=False,
248
+ sampler=dict(type='DefaultSampler', shuffle=False),
249
+ dataset=dict(
250
+ type='CocoDataset',
251
+ data_root='data/table-det-elect66/',
252
+ ann_file='result.json',
253
+ data_prefix=dict(img=''),
254
+ test_mode=True,
255
+ pipeline=[
256
+ dict(type='LoadImageFromFile', backend_args=None),
257
+ dict(type='Resize', scale=(1333, 800), keep_ratio=True),
258
+ dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
259
+ dict(
260
+ type='PackDetInputs',
261
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
262
+ 'scale_factor'))
263
+ ],
264
+ backend_args=None,
265
+ metainfo=dict(classes=('Table', ), palette=[(220, 20, 60)])))
266
+ val_evaluator = dict(
267
+ type='CocoMetric',
268
+ ann_file='data/table-det-elect66/result.json',
269
+ metric=['bbox', 'segm'],
270
+ format_only=False,
271
+ backend_args=None)
272
+ test_evaluator = dict(
273
+ type='CocoMetric',
274
+ ann_file='data/table-det-elect66/result.json',
275
+ metric=['bbox', 'segm'],
276
+ format_only=False,
277
+ backend_args=None)
278
+ train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=10, val_interval=5)
279
+ val_cfg = dict(type='ValLoop')
280
+ test_cfg = dict(type='TestLoop')
281
+ param_scheduler = [
282
+ dict(
283
+ type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500),
284
+ dict(
285
+ type='MultiStepLR',
286
+ begin=0,
287
+ end=12,
288
+ by_epoch=True,
289
+ milestones=[8, 11],
290
+ gamma=0.1)
291
+ ]
292
+ optim_wrapper = dict(
293
+ type='OptimWrapper',
294
+ optimizer=dict(type='SGD', lr=0.005, momentum=0.9, weight_decay=0.0001))
295
+ auto_scale_lr = dict(enable=False, base_batch_size=16)
296
+ default_scope = 'mmdet'
297
+ default_hooks = dict(
298
+ timer=dict(type='IterTimerHook'),
299
+ logger=dict(type='LoggerHook', interval=100),
300
+ param_scheduler=dict(type='ParamSchedulerHook'),
301
+ checkpoint=dict(type='CheckpointHook', interval=5),
302
+ sampler_seed=dict(type='DistSamplerSeedHook'),
303
+ visualization=dict(type='DetVisualizationHook'))
304
+ env_cfg = dict(
305
+ cudnn_benchmark=False,
306
+ mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
307
+ dist_cfg=dict(backend='nccl'))
308
+ vis_backends = [dict(type='LocalVisBackend')]
309
+ visualizer = dict(
310
+ type='DetLocalVisualizer',
311
+ vis_backends=[dict(type='LocalVisBackend')],
312
+ name='visualizer')
313
+ log_processor = dict(type='LogProcessor', window_size=50, by_epoch=True)
314
+ log_level = 'INFO'
315
+ load_from = None
316
+ resume = True
317
+ launcher = 'none'
318
+ work_dir = './work_dirs/vote-config'
model/table-det/model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d42b7f3e8a73cfff6d126cacb5218b0547efba90e5ba89dc158097a0b15b9d33
3
+ size 351999009
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ opencv-python
2
+ gradio
3
+ torch
4
+ torchvision