Image-to-Text
HTRflow
Swedish
Gabriel commited on
Commit
0e7e4f0
1 Parent(s): 5afd3cc

Upload 3 files

Browse files
Files changed (3) hide show
  1. _base_satrn_shallow_concat.py +364 -0
  2. dict1700.txt +148 -0
  3. epoch_5 (1).pth +3 -0
_base_satrn_shallow_concat.py ADDED
@@ -0,0 +1,364 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ default_scope = 'mmocr'
2
+ env_cfg = dict(
3
+ cudnn_benchmark=True,
4
+ mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
5
+ dist_cfg=dict(backend='nccl'))
6
+ randomness = dict(seed=None)
7
+ default_hooks = dict(
8
+ timer=dict(type='IterTimerHook'),
9
+ logger=dict(type='LoggerHook', interval=100),
10
+ param_scheduler=dict(type='ParamSchedulerHook'),
11
+ checkpoint=dict(type='CheckpointHook', interval=1),
12
+ sampler_seed=dict(type='DistSamplerSeedHook'),
13
+ sync_buffer=dict(type='SyncBuffersHook'),
14
+ visualization=dict(
15
+ type='VisualizationHook',
16
+ interval=1,
17
+ enable=False,
18
+ show=False,
19
+ draw_gt=False,
20
+ draw_pred=False))
21
+ log_level = 'INFO'
22
+ log_processor = dict(type='LogProcessor', window_size=10, by_epoch=True)
23
+ load_from = './epoch_5.pth'
24
+ resume = False
25
+ val_evaluator = dict(
26
+ type='Evaluator',
27
+ metrics=[
28
+ dict(
29
+ type='WordMetric',
30
+ mode=['exact', 'ignore_case', 'ignore_case_symbol'],
31
+ valid_symbol='[^A-Z^a-z^0-9^一-龥^å^ä^ö^Å^Ä^Ö]'),
32
+ dict(type='CharMetric', valid_symbol='[^A-Z^a-z^0-9^一-龥^å^ä^ö^Å^Ä^Ö]'),
33
+ dict(
34
+ type='OneMinusNEDMetric',
35
+ valid_symbol='[^A-Z^a-z^0-9^一-龥^å^ä^ö^Å^Ä^Ö]')
36
+ ])
37
+ test_evaluator = dict(
38
+ type='Evaluator',
39
+ metrics=[
40
+ dict(
41
+ type='WordMetric',
42
+ mode=['exact', 'ignore_case', 'ignore_case_symbol'],
43
+ valid_symbol='[^A-Z^a-z^0-9^一-龥^å^ä^ö^Å^Ä^Ö]'),
44
+ dict(type='CharMetric', valid_symbol='[^A-Z^a-z^0-9^一-龥^å^ä^ö^Å^Ä^Ö]'),
45
+ dict(
46
+ type='OneMinusNEDMetric',
47
+ valid_symbol='[^A-Z^a-z^0-9^一-龥^å^ä^ö^Å^Ä^Ö]')
48
+ ])
49
+ vis_backends = [dict(type='LocalVisBackend')]
50
+ visualizer = dict(
51
+ type='TextRecogLocalVisualizer',
52
+ name='visualizer',
53
+ vis_backends=[dict(type='TensorboardVisBackend')])
54
+ optim_wrapper = dict(
55
+ type='OptimWrapper', optimizer=dict(type='Adam', lr=0.0003))
56
+ train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=5, val_interval=1)
57
+ val_cfg = dict(type='ValLoop')
58
+ test_cfg = dict(type='TestLoop')
59
+ param_scheduler = [dict(type='MultiStepLR', milestones=[3, 4], end=5)]
60
+ file_client_args = dict(backend='disk')
61
+ dictionary = dict(
62
+ type='Dictionary',
63
+ dict_file=
64
+ './models--Riksarkivet--HTR_pipeline_models/snapshots/296681baf68583f07e89b5fed08136b77e3904cd/SATRN/dict1700.txt',
65
+ with_padding=True,
66
+ with_unknown=True,
67
+ same_start_end=True,
68
+ with_start=True,
69
+ with_end=True)
70
+ model = dict(
71
+ type='SATRN',
72
+ backbone=dict(type='ShallowCNN', input_channels=3, hidden_dim=512),
73
+ encoder=dict(
74
+ type='SATRNEncoder',
75
+ n_layers=12,
76
+ n_head=8,
77
+ d_k=64,
78
+ d_v=64,
79
+ d_model=512,
80
+ n_position=100,
81
+ d_inner=2048,
82
+ dropout=0.1),
83
+ decoder=dict(
84
+ type='NRTRDecoder',
85
+ n_layers=6,
86
+ d_embedding=512,
87
+ n_head=8,
88
+ d_model=512,
89
+ d_inner=2048,
90
+ d_k=64,
91
+ d_v=64,
92
+ module_loss=dict(
93
+ type='CEModuleLoss', flatten=True, ignore_first_char=True),
94
+ dictionary=dict(
95
+ type='Dictionary',
96
+ dict_file=
97
+ './models--Riksarkivet--HTR_pipeline_models/snapshots/296681baf68583f07e89b5fed08136b77e3904cd/SATRN/dict1700.txt',
98
+ with_padding=True,
99
+ with_unknown=True,
100
+ same_start_end=True,
101
+ with_start=True,
102
+ with_end=True),
103
+ max_seq_len=100,
104
+ postprocessor=dict(type='AttentionPostprocessor')),
105
+ data_preprocessor=dict(
106
+ type='TextRecogDataPreprocessor',
107
+ mean=[123.675, 116.28, 103.53],
108
+ std=[58.395, 57.12, 57.375]))
109
+ train_pipeline = [
110
+ dict(
111
+ type='LoadImageFromFile',
112
+ file_client_args=dict(backend='disk'),
113
+ ignore_empty=True,
114
+ min_size=2),
115
+ dict(type='LoadOCRAnnotations', with_text=True),
116
+ dict(type='Resize', scale=(400, 64), keep_ratio=False),
117
+ dict(
118
+ type='PackTextRecogInputs',
119
+ meta_keys=('img_path', 'ori_shape', 'img_shape', 'valid_ratio'))
120
+ ]
121
+ test_pipeline = [
122
+ dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')),
123
+ dict(type='Resize', scale=(400, 64), keep_ratio=False),
124
+ dict(type='LoadOCRAnnotations', with_text=True),
125
+ dict(
126
+ type='PackTextRecogInputs',
127
+ meta_keys=('img_path', 'ori_shape', 'img_shape', 'valid_ratio'))
128
+ ]
129
+ HTR_1700_combined_train = dict(
130
+ type='RecogTextDataset',
131
+ parser_cfg=dict(type='LineJsonParser', keys=['filename', 'text']),
132
+ data_root='/ceph/hpc/scratch/user/euerikl/data/HTR_1700_clean',
133
+ ann_file=
134
+ '/ceph/hpc/home/euerikl/projects/hf_openmmlab_models/data/processed/1700_HTR_shuffled_train.jsonl',
135
+ test_mode=False,
136
+ pipeline=None)
137
+ HTR_1700_combined_test = dict(
138
+ type='RecogTextDataset',
139
+ parser_cfg=dict(type='LineJsonParser', keys=['filename', 'text']),
140
+ data_root='/ceph/hpc/scratch/user/euerikl/data/HTR_1700_clean',
141
+ ann_file=
142
+ '/ceph/hpc/home/euerikl/projects/hf_openmmlab_models/data/processed/1700_HTR_shuffled_val.jsonl',
143
+ test_mode=True,
144
+ pipeline=None)
145
+ pr_cr_combined_train = dict(
146
+ type='RecogTextDataset',
147
+ parser_cfg=dict(
148
+ type='LineStrParser', keys=['filename', 'text'], separator='|'),
149
+ data_root='/ceph/hpc/scratch/user/euerikl/data/line_images',
150
+ ann_file=
151
+ '/ceph/hpc/home/euerikl/projects/htr_1800/gt_files/combined_train.txt',
152
+ test_mode=False,
153
+ pipeline=None)
154
+ pr_cr_combined_test = dict(
155
+ type='RecogTextDataset',
156
+ parser_cfg=dict(
157
+ type='LineStrParser', keys=['filename', 'text'], separator='|'),
158
+ data_root='/ceph/hpc/scratch/user/euerikl/data/line_images',
159
+ ann_file=
160
+ '/ceph/hpc/home/euerikl/projects/htr_1800/gt_files/combined_eval.txt',
161
+ test_mode=True,
162
+ pipeline=None)
163
+ out_of_domain_1700_all_test = dict(
164
+ type='RecogTextDataset',
165
+ parser_cfg=dict(type='LineJsonParser', keys=['filename', 'text']),
166
+ data_root='/ceph/hpc/scratch/user/euerikl/data/HTR_1700_testsets_clean',
167
+ ann_file=
168
+ '/ceph/hpc/home/euerikl/projects/hf_openmmlab_models/data/processed/1700_testsets_gt/1700_HTR_testsets_all.jsonl',
169
+ test_mode=True,
170
+ pipeline=None)
171
+ train_list = [
172
+ dict(
173
+ type='RecogTextDataset',
174
+ parser_cfg=dict(type='LineJsonParser', keys=['filename', 'text']),
175
+ data_root='/ceph/hpc/scratch/user/euerikl/data/HTR_1700_clean',
176
+ ann_file=
177
+ '/ceph/hpc/home/euerikl/projects/hf_openmmlab_models/data/processed/1700_HTR_shuffled_train.jsonl',
178
+ test_mode=False,
179
+ pipeline=None),
180
+ dict(
181
+ type='RecogTextDataset',
182
+ parser_cfg=dict(
183
+ type='LineStrParser', keys=['filename', 'text'], separator='|'),
184
+ data_root='/ceph/hpc/scratch/user/euerikl/data/line_images',
185
+ ann_file=
186
+ '/ceph/hpc/home/euerikl/projects/htr_1800/gt_files/combined_train.txt',
187
+ test_mode=False,
188
+ pipeline=None)
189
+ ]
190
+ test_list = [
191
+ dict(
192
+ type='RecogTextDataset',
193
+ parser_cfg=dict(type='LineJsonParser', keys=['filename', 'text']),
194
+ data_root='/ceph/hpc/scratch/user/euerikl/data/HTR_1700_testsets_clean',
195
+ ann_file=
196
+ '/ceph/hpc/home/euerikl/projects/hf_openmmlab_models/data/processed/1700_testsets_gt/1700_HTR_testsets_all.jsonl',
197
+ test_mode=True,
198
+ pipeline=None)
199
+ ]
200
+ train_dataset = dict(
201
+ type='ConcatDataset',
202
+ datasets=[
203
+ dict(
204
+ type='RecogTextDataset',
205
+ parser_cfg=dict(type='LineJsonParser', keys=['filename', 'text']),
206
+ data_root='/ceph/hpc/scratch/user/euerikl/data/HTR_1700_clean',
207
+ ann_file=
208
+ '/ceph/hpc/home/euerikl/projects/hf_openmmlab_models/data/processed/1700_HTR_shuffled_train.jsonl',
209
+ test_mode=False,
210
+ pipeline=None),
211
+ dict(
212
+ type='RecogTextDataset',
213
+ parser_cfg=dict(
214
+ type='LineStrParser', keys=['filename', 'text'],
215
+ separator='|'),
216
+ data_root='/ceph/hpc/scratch/user/euerikl/data/line_images',
217
+ ann_file=
218
+ '/ceph/hpc/home/euerikl/projects/htr_1800/gt_files/combined_train.txt',
219
+ test_mode=False,
220
+ pipeline=None)
221
+ ],
222
+ pipeline=[
223
+ dict(
224
+ type='LoadImageFromFile',
225
+ file_client_args=dict(backend='disk'),
226
+ ignore_empty=True,
227
+ min_size=2),
228
+ dict(type='LoadOCRAnnotations', with_text=True),
229
+ dict(type='Resize', scale=(400, 64), keep_ratio=False),
230
+ dict(
231
+ type='PackTextRecogInputs',
232
+ meta_keys=('img_path', 'ori_shape', 'img_shape', 'valid_ratio'))
233
+ ])
234
+ test_dataset = dict(
235
+ type='ConcatDataset',
236
+ datasets=[
237
+ dict(
238
+ type='RecogTextDataset',
239
+ parser_cfg=dict(type='LineJsonParser', keys=['filename', 'text']),
240
+ data_root=
241
+ '/ceph/hpc/scratch/user/euerikl/data/HTR_1700_testsets_clean',
242
+ ann_file=
243
+ '/ceph/hpc/home/euerikl/projects/hf_openmmlab_models/data/processed/1700_testsets_gt/1700_HTR_testsets_all.jsonl',
244
+ test_mode=True,
245
+ pipeline=None)
246
+ ],
247
+ pipeline=[
248
+ dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')),
249
+ dict(type='Resize', scale=(400, 64), keep_ratio=False),
250
+ dict(type='LoadOCRAnnotations', with_text=True),
251
+ dict(
252
+ type='PackTextRecogInputs',
253
+ meta_keys=('img_path', 'ori_shape', 'img_shape', 'valid_ratio'))
254
+ ])
255
+ train_dataloader = dict(
256
+ batch_size=8,
257
+ num_workers=1,
258
+ persistent_workers=True,
259
+ sampler=dict(type='DefaultSampler', shuffle=True),
260
+ dataset=dict(
261
+ type='ConcatDataset',
262
+ datasets=[
263
+ dict(
264
+ type='RecogTextDataset',
265
+ parser_cfg=dict(
266
+ type='LineJsonParser', keys=['filename', 'text']),
267
+ data_root='/ceph/hpc/scratch/user/euerikl/data/HTR_1700_clean',
268
+ ann_file=
269
+ '/ceph/hpc/home/euerikl/projects/hf_openmmlab_models/data/processed/1700_HTR_shuffled_train.jsonl',
270
+ test_mode=False,
271
+ pipeline=None),
272
+ dict(
273
+ type='RecogTextDataset',
274
+ parser_cfg=dict(
275
+ type='LineStrParser',
276
+ keys=['filename', 'text'],
277
+ separator='|'),
278
+ data_root='/ceph/hpc/scratch/user/euerikl/data/line_images',
279
+ ann_file=
280
+ '/ceph/hpc/home/euerikl/projects/htr_1800/gt_files/combined_train.txt',
281
+ test_mode=False,
282
+ pipeline=None)
283
+ ],
284
+ pipeline=[
285
+ dict(
286
+ type='LoadImageFromFile',
287
+ file_client_args=dict(backend='disk'),
288
+ ignore_empty=True,
289
+ min_size=2),
290
+ dict(type='LoadOCRAnnotations', with_text=True),
291
+ dict(type='Resize', scale=(400, 64), keep_ratio=False),
292
+ dict(
293
+ type='PackTextRecogInputs',
294
+ meta_keys=('img_path', 'ori_shape', 'img_shape',
295
+ 'valid_ratio'))
296
+ ]))
297
+ test_dataloader = dict(
298
+ batch_size=8,
299
+ num_workers=1,
300
+ persistent_workers=True,
301
+ drop_last=False,
302
+ sampler=dict(type='DefaultSampler', shuffle=False),
303
+ dataset=dict(
304
+ type='ConcatDataset',
305
+ datasets=[
306
+ dict(
307
+ type='RecogTextDataset',
308
+ parser_cfg=dict(
309
+ type='LineJsonParser', keys=['filename', 'text']),
310
+ data_root=
311
+ '/ceph/hpc/scratch/user/euerikl/data/HTR_1700_testsets_clean',
312
+ ann_file=
313
+ '/ceph/hpc/home/euerikl/projects/hf_openmmlab_models/data/processed/1700_testsets_gt/1700_HTR_testsets_all.jsonl',
314
+ test_mode=True,
315
+ pipeline=None)
316
+ ],
317
+ pipeline=[
318
+ dict(
319
+ type='LoadImageFromFile',
320
+ file_client_args=dict(backend='disk')),
321
+ dict(type='Resize', scale=(400, 64), keep_ratio=False),
322
+ dict(type='LoadOCRAnnotations', with_text=True),
323
+ dict(
324
+ type='PackTextRecogInputs',
325
+ meta_keys=('img_path', 'ori_shape', 'img_shape',
326
+ 'valid_ratio'))
327
+ ]))
328
+ val_dataloader = dict(
329
+ batch_size=8,
330
+ num_workers=1,
331
+ persistent_workers=True,
332
+ drop_last=False,
333
+ sampler=dict(type='DefaultSampler', shuffle=False),
334
+ dataset=dict(
335
+ type='ConcatDataset',
336
+ datasets=[
337
+ dict(
338
+ type='RecogTextDataset',
339
+ parser_cfg=dict(
340
+ type='LineJsonParser', keys=['filename', 'text']),
341
+ data_root=
342
+ '/ceph/hpc/scratch/user/euerikl/data/HTR_1700_testsets_clean',
343
+ ann_file=
344
+ '/ceph/hpc/home/euerikl/projects/hf_openmmlab_models/data/processed/1700_testsets_gt/1700_HTR_testsets_all.jsonl',
345
+ test_mode=True,
346
+ pipeline=None)
347
+ ],
348
+ pipeline=[
349
+ dict(
350
+ type='LoadImageFromFile',
351
+ file_client_args=dict(backend='disk')),
352
+ dict(type='Resize', scale=(400, 64), keep_ratio=False),
353
+ dict(type='LoadOCRAnnotations', with_text=True),
354
+ dict(
355
+ type='PackTextRecogInputs',
356
+ meta_keys=('img_path', 'ori_shape', 'img_shape',
357
+ 'valid_ratio'))
358
+ ]))
359
+ gpu_ids = range(0, 4)
360
+ cudnn_benchmark = True
361
+ work_dir = '/ceph/hpc/home/euerikl/projects/hf_openmmlab_models/models/checkpoints/1700_1800_combined_satrn'
362
+ checkpoint_config = dict(interval=1)
363
+ auto_scale_lr = dict(base_batch_size=32)
364
+ launcher = 'pytorch'
dict1700.txt ADDED
@@ -0,0 +1,148 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+ !
4
+ "
5
+ #
6
+ %
7
+ &
8
+ '
9
+ (
10
+ )
11
+ *
12
+ +
13
+ ,
14
+ -
15
+ .
16
+ /
17
+ 0
18
+ 1
19
+ 2
20
+ 3
21
+ 4
22
+ 5
23
+ 6
24
+ 7
25
+ 8
26
+ 9
27
+ :
28
+ ;
29
+ <
30
+ =
31
+ ?
32
+ A
33
+ B
34
+ C
35
+ D
36
+ E
37
+ F
38
+ G
39
+ H
40
+ I
41
+ J
42
+ K
43
+ L
44
+ M
45
+ N
46
+ O
47
+ P
48
+ Q
49
+ R
50
+ S
51
+ T
52
+ U
53
+ V
54
+ W
55
+ X
56
+ Y
57
+ Z
58
+ [
59
+ \
60
+ ]
61
+ _
62
+ a
63
+ b
64
+ c
65
+ d
66
+ e
67
+ f
68
+ g
69
+ h
70
+ i
71
+ j
72
+ k
73
+ l
74
+ m
75
+ n
76
+ o
77
+ p
78
+ q
79
+ r
80
+ s
81
+ t
82
+ u
83
+ v
84
+ w
85
+ x
86
+ y
87
+ z
88
+ {
89
+ |
90
+ }
91
+ ~
92
+ £
93
+ §
94
+ ¨
95
+ ¬
96
+ ¼
97
+ ½
98
+ ¾
99
+ Ä
100
+ Å
101
+ Ö
102
+ Ü
103
+ ß
104
+ à
105
+ á
106
+ ä
107
+ å
108
+ æ
109
+ ç
110
+ è
111
+ é
112
+ ê
113
+ ë
114
+ ï
115
+ ô
116
+ ö
117
+ ü
118
+ ý
119
+ ÿ
120
+ œ
121
+ ƒ
122
+ ̄
123
+ ̅
124
+ Ψ
125
+ β
126
+ ӕ
127
+ َ
128
+
129
+
130
+
131
+
132
+
133
+
134
+
135
+
136
+
137
+
138
+
139
+
140
+
141
+
142
+
143
+
144
+
145
+
146
+
147
+ 🜍
148
+ 🜔
epoch_5 (1).pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f33697ee5e56fb8357fe0d759e168123e43cd2d48d87c2ef4413cdb84bf668cc
3
+ size 800082032