Raghavan commited on
Commit
a9e2406
1 Parent(s): 85d0d01

Upload 4 files

Browse files
Files changed (2) hide show
  1. config.json +428 -333
  2. model.safetensors +2 -2
config.json CHANGED
@@ -2,364 +2,456 @@
2
  "architectures": [
3
  "FastForSceneTextRecognition"
4
  ],
5
- "backbone_act_func": "relu",
6
- "backbone_bias": false,
7
- "backbone_dilation": 1,
8
- "backbone_dropout_rate": 0,
9
- "backbone_groups": 1,
10
- "backbone_has_shuffle": false,
11
- "backbone_in_channels": 3,
12
- "backbone_kernel_size": 3,
13
- "backbone_ops_order": "weight_bn_act",
14
- "backbone_out_channels": 64,
15
- "backbone_stage1_dilation": [
16
- 1,
17
- 1,
18
- 1,
19
- 1,
20
- 1,
21
- 1,
22
- 1,
23
- 1,
24
- 1,
25
- 1
26
- ],
27
- "backbone_stage1_groups": [
28
- 1,
29
- 1,
30
- 1,
31
- 1,
32
- 1,
33
- 1,
34
- 1,
35
- 1,
36
- 1,
37
- 1
38
- ],
39
- "backbone_stage1_in_channels": [
40
- 64,
41
- 64,
42
- 64,
43
- 64,
44
- 64,
45
- 64,
46
- 64,
47
- 64,
48
- 64,
49
- 64
50
- ],
51
- "backbone_stage1_kernel_size": [
52
- [
53
- 3,
54
- 3
55
- ],
56
- [
57
- 3,
58
- 3
59
- ],
60
- [
61
- 3,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  1
63
  ],
64
- [
65
- 3,
66
- 3
67
- ],
68
- [
69
- 3,
 
 
 
 
70
  1
71
  ],
72
- [
73
- 3,
74
- 3
75
- ],
76
- [
77
- 3,
78
- 3
79
- ],
80
- [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
  1,
82
- 3
83
- ],
84
- [
85
- 3,
86
- 3
87
- ],
88
- [
89
- 3,
90
- 3
91
- ]
92
- ],
93
- "backbone_stage1_out_channels": [
94
- 64,
95
- 64,
96
- 64,
97
- 64,
98
- 64,
99
- 64,
100
- 64,
101
- 64,
102
- 64,
103
- 64
104
- ],
105
- "backbone_stage1_stride": [
106
- 1,
107
- 2,
108
- 1,
109
- 1,
110
- 1,
111
- 1,
112
- 1,
113
- 1,
114
- 1,
115
- 1
116
- ],
117
- "backbone_stage2_dilation": [
118
- 1,
119
- 1,
120
- 1,
121
- 1,
122
- 1,
123
- 1,
124
- 1,
125
- 1,
126
- 1,
127
- 1
128
- ],
129
- "backbone_stage2_groups": [
130
- 1,
131
- 1,
132
- 1,
133
- 1,
134
- 1,
135
- 1,
136
- 1,
137
- 1,
138
- 1,
139
- 1
140
- ],
141
- "backbone_stage2_in_channels": [
142
- 64,
143
- 128,
144
- 128,
145
- 128,
146
- 128,
147
- 128,
148
- 128,
149
- 128,
150
- 128,
151
- 128
152
- ],
153
- "backbone_stage2_kernel_size": [
154
- [
155
- 3,
156
- 3
157
- ],
158
- [
159
  1,
160
- 3
161
- ],
162
- [
163
- 3,
164
- 3
165
- ],
166
- [
167
- 3,
168
  1
169
  ],
170
- [
171
- 3,
172
- 3
173
- ],
174
- [
175
- 3,
176
- 3
177
- ],
178
- [
179
- 3,
180
  1
181
  ],
182
- [
183
- 3,
 
 
 
 
 
 
 
 
184
  1
185
  ],
186
- [
187
- 3,
188
- 3
189
- ],
190
- [
191
- 3,
192
- 3
193
- ]
194
- ],
195
- "backbone_stage2_out_channels": [
196
- 128,
197
- 128,
198
- 128,
199
- 128,
200
- 128,
201
- 128,
202
- 128,
203
- 128,
204
- 128,
205
- 128
206
- ],
207
- "backbone_stage2_stride": [
208
- 2,
209
- 1,
210
- 1,
211
- 1,
212
- 1,
213
- 1,
214
- 1,
215
- 1,
216
- 1,
217
- 1
218
- ],
219
- "backbone_stage3_dilation": [
220
- 1,
221
- 1,
222
- 1,
223
- 1,
224
- 1,
225
- 1,
226
- 1,
227
- 1
228
- ],
229
- "backbone_stage3_groups": [
230
- 1,
231
- 1,
232
- 1,
233
- 1,
234
- 1,
235
- 1,
236
- 1,
237
- 1
238
- ],
239
- "backbone_stage3_in_channels": [
240
- 128,
241
- 256,
242
- 256,
243
- 256,
244
- 256,
245
- 256,
246
- 256,
247
- 256
248
- ],
249
- "backbone_stage3_kernel_size": [
250
- [
251
- 3,
252
- 3
253
- ],
254
- [
255
- 3,
256
- 3
257
- ],
258
- [
259
- 3,
260
- 3
261
- ],
262
- [
263
  1,
264
- 3
265
- ],
266
- [
267
- 3,
268
- 3
269
- ],
270
- [
271
- 3,
272
  1
273
  ],
274
- [
275
- 3,
276
- 3
 
 
 
 
 
 
277
  ],
278
- [
279
- 3,
 
 
 
 
 
 
280
  1
281
- ]
282
- ],
283
- "backbone_stage3_out_channels": [
284
- 256,
285
- 256,
286
- 256,
287
- 256,
288
- 256,
289
- 256,
290
- 256,
291
- 256
292
- ],
293
- "backbone_stage3_stride": [
294
- 2,
295
- 1,
296
- 1,
297
- 1,
298
- 1,
299
- 1,
300
- 1,
301
- 1
302
- ],
303
- "backbone_stage4_dilation": [
304
- 1,
305
- 1,
306
- 1,
307
- 1,
308
- 1
309
- ],
310
- "backbone_stage4_groups": [
311
- 1,
312
- 1,
313
- 1,
314
- 1,
315
- 1
316
- ],
317
- "backbone_stage4_in_channels": [
318
- 256,
319
- 512,
320
- 512,
321
- 512,
322
- 512
323
- ],
324
- "backbone_stage4_kernel_size": [
325
- [
326
- 3,
327
- 3
328
  ],
329
- [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
330
  1,
331
- 3
 
 
 
 
 
332
  ],
333
- [
334
- 3,
 
 
 
335
  1
336
  ],
337
- [
338
- 3,
 
 
 
339
  1
340
  ],
341
- [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
342
  1,
343
- 3
344
- ]
345
- ],
346
- "backbone_stage4_out_channels": [
347
- 512,
348
- 512,
349
- 512,
350
- 512,
351
- 512
352
- ],
353
- "backbone_stage4_stride": [
354
- 2,
355
- 1,
356
- 1,
357
- 1,
358
- 1
359
- ],
360
- "backbone_stride": 2,
361
- "backbone_use_bn": true,
 
 
 
 
 
 
 
 
362
  "bbox_type": "poly",
 
363
  "head_conv_dilation": 1,
364
  "head_conv_groups": 1,
365
  "head_conv_in_channels": 512,
@@ -434,6 +526,9 @@
434
  1,
435
  1
436
  ],
 
437
  "torch_dtype": "float32",
438
- "transformers_version": "4.35.0.dev0"
 
 
439
  }
 
2
  "architectures": [
3
  "FastForSceneTextRecognition"
4
  ],
5
+ "backbone": null,
6
+ "backbone_config": {
7
+ "_name_or_path": "",
8
+ "act_func": "relu",
9
+ "add_cross_attention": false,
10
+ "architectures": null,
11
+ "bad_words_ids": null,
12
+ "begin_suppress_tokens": null,
13
+ "bias": false,
14
+ "bos_token_id": null,
15
+ "chunk_size_feed_forward": 0,
16
+ "cross_attention_hidden_size": null,
17
+ "decoder_start_token_id": null,
18
+ "depths": [
19
+ 10,
20
+ 10,
21
+ 8,
22
+ 5
23
+ ],
24
+ "dilation": 1,
25
+ "diversity_penalty": 0.0,
26
+ "do_sample": false,
27
+ "dropout_rate": 0,
28
+ "early_stopping": false,
29
+ "encoder_no_repeat_ngram_size": 0,
30
+ "eos_token_id": null,
31
+ "exponential_decay_length_penalty": null,
32
+ "finetuning_task": null,
33
+ "forced_bos_token_id": null,
34
+ "forced_eos_token_id": null,
35
+ "groups": 1,
36
+ "has_shuffle": false,
37
+ "hidden_sizes": [
38
+ 64,
39
+ 64,
40
+ 128,
41
+ 256,
42
+ 512
43
+ ],
44
+ "id2label": {
45
+ "0": "LABEL_0",
46
+ "1": "LABEL_1"
47
+ },
48
+ "in_channels": 3,
49
+ "initializer_range": 0.02,
50
+ "is_decoder": false,
51
+ "is_encoder_decoder": false,
52
+ "kernel_size": 3,
53
+ "label2id": {
54
+ "LABEL_0": 0,
55
+ "LABEL_1": 1
56
+ },
57
+ "length_penalty": 1.0,
58
+ "max_length": 20,
59
+ "min_length": 0,
60
+ "model_type": "textnet",
61
+ "no_repeat_ngram_size": 0,
62
+ "num_beam_groups": 1,
63
+ "num_beams": 1,
64
+ "num_return_sequences": 1,
65
+ "ops_order": "weight_bn_act",
66
+ "out_channels": 64,
67
+ "out_features": [
68
+ "stage4"
69
+ ],
70
+ "out_indices": [
71
+ 4
72
+ ],
73
+ "output_attentions": false,
74
+ "output_hidden_states": false,
75
+ "output_scores": false,
76
+ "pad_token_id": null,
77
+ "prefix": null,
78
+ "problem_type": null,
79
+ "pruned_heads": {},
80
+ "remove_invalid_values": false,
81
+ "repetition_penalty": 1.0,
82
+ "return_dict": true,
83
+ "return_dict_in_generate": false,
84
+ "sep_token_id": null,
85
+ "stage1_dilation": [
86
+ 1,
87
+ 1,
88
+ 1,
89
+ 1,
90
+ 1,
91
+ 1,
92
+ 1,
93
+ 1,
94
+ 1,
95
  1
96
  ],
97
+ "stage1_groups": [
98
+ 1,
99
+ 1,
100
+ 1,
101
+ 1,
102
+ 1,
103
+ 1,
104
+ 1,
105
+ 1,
106
+ 1,
107
  1
108
  ],
109
+ "stage1_in_channels": [
110
+ 64,
111
+ 64,
112
+ 64,
113
+ 64,
114
+ 64,
115
+ 64,
116
+ 64,
117
+ 64,
118
+ 64,
119
+ 64
120
+ ],
121
+ "stage1_kernel_size": [
122
+ [
123
+ 3,
124
+ 3
125
+ ],
126
+ [
127
+ 3,
128
+ 3
129
+ ],
130
+ [
131
+ 3,
132
+ 1
133
+ ],
134
+ [
135
+ 3,
136
+ 3
137
+ ],
138
+ [
139
+ 3,
140
+ 1
141
+ ],
142
+ [
143
+ 3,
144
+ 3
145
+ ],
146
+ [
147
+ 3,
148
+ 3
149
+ ],
150
+ [
151
+ 1,
152
+ 3
153
+ ],
154
+ [
155
+ 3,
156
+ 3
157
+ ],
158
+ [
159
+ 3,
160
+ 3
161
+ ]
162
+ ],
163
+ "stage1_out_channels": [
164
+ 64,
165
+ 64,
166
+ 64,
167
+ 64,
168
+ 64,
169
+ 64,
170
+ 64,
171
+ 64,
172
+ 64,
173
+ 64
174
+ ],
175
+ "stage1_stride": [
176
+ 1,
177
+ 2,
178
+ 1,
179
+ 1,
180
+ 1,
181
+ 1,
182
+ 1,
183
  1,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
184
  1,
 
 
 
 
 
 
 
 
185
  1
186
  ],
187
+ "stage2_dilation": [
188
+ 1,
189
+ 1,
190
+ 1,
191
+ 1,
192
+ 1,
193
+ 1,
194
+ 1,
195
+ 1,
196
+ 1,
197
  1
198
  ],
199
+ "stage2_groups": [
200
+ 1,
201
+ 1,
202
+ 1,
203
+ 1,
204
+ 1,
205
+ 1,
206
+ 1,
207
+ 1,
208
+ 1,
209
  1
210
  ],
211
+ "stage2_in_channels": [
212
+ 64,
213
+ 128,
214
+ 128,
215
+ 128,
216
+ 128,
217
+ 128,
218
+ 128,
219
+ 128,
220
+ 128,
221
+ 128
222
+ ],
223
+ "stage2_kernel_size": [
224
+ [
225
+ 3,
226
+ 3
227
+ ],
228
+ [
229
+ 1,
230
+ 3
231
+ ],
232
+ [
233
+ 3,
234
+ 3
235
+ ],
236
+ [
237
+ 3,
238
+ 1
239
+ ],
240
+ [
241
+ 3,
242
+ 3
243
+ ],
244
+ [
245
+ 3,
246
+ 3
247
+ ],
248
+ [
249
+ 3,
250
+ 1
251
+ ],
252
+ [
253
+ 3,
254
+ 1
255
+ ],
256
+ [
257
+ 3,
258
+ 3
259
+ ],
260
+ [
261
+ 3,
262
+ 3
263
+ ]
264
+ ],
265
+ "stage2_out_channels": [
266
+ 128,
267
+ 128,
268
+ 128,
269
+ 128,
270
+ 128,
271
+ 128,
272
+ 128,
273
+ 128,
274
+ 128,
275
+ 128
276
+ ],
277
+ "stage2_stride": [
278
+ 2,
279
+ 1,
280
+ 1,
281
+ 1,
282
+ 1,
283
+ 1,
284
+ 1,
285
+ 1,
 
 
286
  1,
 
 
 
 
 
 
 
 
287
  1
288
  ],
289
+ "stage3_dilation": [
290
+ 1,
291
+ 1,
292
+ 1,
293
+ 1,
294
+ 1,
295
+ 1,
296
+ 1,
297
+ 1
298
  ],
299
+ "stage3_groups": [
300
+ 1,
301
+ 1,
302
+ 1,
303
+ 1,
304
+ 1,
305
+ 1,
306
+ 1,
307
  1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
308
  ],
309
+ "stage3_in_channels": [
310
+ 128,
311
+ 256,
312
+ 256,
313
+ 256,
314
+ 256,
315
+ 256,
316
+ 256,
317
+ 256
318
+ ],
319
+ "stage3_kernel_size": [
320
+ [
321
+ 3,
322
+ 3
323
+ ],
324
+ [
325
+ 3,
326
+ 3
327
+ ],
328
+ [
329
+ 3,
330
+ 3
331
+ ],
332
+ [
333
+ 1,
334
+ 3
335
+ ],
336
+ [
337
+ 3,
338
+ 3
339
+ ],
340
+ [
341
+ 3,
342
+ 1
343
+ ],
344
+ [
345
+ 3,
346
+ 3
347
+ ],
348
+ [
349
+ 3,
350
+ 1
351
+ ]
352
+ ],
353
+ "stage3_out_channels": [
354
+ 256,
355
+ 256,
356
+ 256,
357
+ 256,
358
+ 256,
359
+ 256,
360
+ 256,
361
+ 256
362
+ ],
363
+ "stage3_stride": [
364
+ 2,
365
  1,
366
+ 1,
367
+ 1,
368
+ 1,
369
+ 1,
370
+ 1,
371
+ 1
372
  ],
373
+ "stage4_dilation": [
374
+ 1,
375
+ 1,
376
+ 1,
377
+ 1,
378
  1
379
  ],
380
+ "stage4_groups": [
381
+ 1,
382
+ 1,
383
+ 1,
384
+ 1,
385
  1
386
  ],
387
+ "stage4_in_channels": [
388
+ 256,
389
+ 512,
390
+ 512,
391
+ 512,
392
+ 512
393
+ ],
394
+ "stage4_kernel_size": [
395
+ [
396
+ 3,
397
+ 3
398
+ ],
399
+ [
400
+ 1,
401
+ 3
402
+ ],
403
+ [
404
+ 3,
405
+ 1
406
+ ],
407
+ [
408
+ 3,
409
+ 1
410
+ ],
411
+ [
412
+ 1,
413
+ 3
414
+ ]
415
+ ],
416
+ "stage4_out_channels": [
417
+ 512,
418
+ 512,
419
+ 512,
420
+ 512,
421
+ 512
422
+ ],
423
+ "stage4_stride": [
424
+ 2,
425
  1,
426
+ 1,
427
+ 1,
428
+ 1
429
+ ],
430
+ "stage_names": [
431
+ "stem",
432
+ "stage1",
433
+ "stage2",
434
+ "stage3",
435
+ "stage4"
436
+ ],
437
+ "stride": 2,
438
+ "suppress_tokens": null,
439
+ "task_specific_params": null,
440
+ "temperature": 1.0,
441
+ "tf_legacy_loss": false,
442
+ "tie_encoder_decoder": false,
443
+ "tie_word_embeddings": true,
444
+ "tokenizer_class": null,
445
+ "top_k": 50,
446
+ "top_p": 1.0,
447
+ "torch_dtype": null,
448
+ "torchscript": false,
449
+ "typical_p": 1.0,
450
+ "use_bfloat16": false,
451
+ "use_bn": true
452
+ },
453
  "bbox_type": "poly",
454
+ "dilation": null,
455
  "head_conv_dilation": 1,
456
  "head_conv_groups": 1,
457
  "head_conv_in_channels": 512,
 
526
  1,
527
  1
528
  ],
529
+ "num_channels": 3,
530
  "torch_dtype": "float32",
531
+ "transformers_version": "4.35.0.dev0",
532
+ "use_pretrained_backbone": null,
533
+ "use_timm_backbone": false
534
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2d237669ed2cd9a1dd8045fc83d6d3998128d1e468c262003abb2758f2331a05
3
- size 65633016
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7d9b44ab14e3b8b1ac32572dcb22b3114428f3e28eb9cf20f677921807977f9
3
+ size 65638304