File size: 2,409 Bytes
33c0fae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
project: vietocr_new
name: Train

device: cuda:0

# change to list chars of your dataset or use default vietnamese chars
vocab: 'aAร ร€แบฃแบขรฃรƒรกรแบกแบ ฤƒฤ‚แบฑแบฐแบณแบฒแบตแบดแบฏแบฎแบทแบถรขร‚แบงแบฆแบฉแบจแบซแบชแบฅแบคแบญแบฌbBcCdDฤ‘ฤeEรจรˆแบปแบบแบฝแบผรฉร‰แบนแบธรชรŠแปแป€แปƒแป‚แป…แป„แบฟแบพแป‡แป†fFgGhHiIรฌรŒแป‰แปˆฤฉฤจรญรแป‹แปŠjJkKlLmMnNoOรฒร’แปแปŽรตร•รณร“แปแปŒรดร”แป“แป’แป•แป”แป—แป–แป‘แปแป™แป˜ฦกฦ แปแปœแปŸแปžแปกแป แป›แปšแปฃแปขpPqQrRsStTuUรนร™แปงแปฆลฉลจรบรšแปฅแปคฦฐฦฏแปซแปชแปญแปฌแปฏแปฎแปฉแปจแปฑแปฐvVwWxXyYแปณแปฒแปทแปถแปนแปธรฝรแปตแปดzZ0123456789!"#$%&''()*+,-./:;<=>?@[\]^_`{|}~ '

seq_modeling: seq2seq
transformer:  
  encoder_hidden: 256
  decoder_hidden: 256
  img_channel: 256
  decoder_embedded: 256
  dropout: 0.1

optimizer:
  max_lr: 0.001 
  pct_start: 0.1

trainer:
  batch_size: 128
  print_every: 100
  valid_every: 500
  test_every: 500
  iters: 10000
  # where to save our model for prediction
  export: weights/train_model.pth
  checkpoint: ./checkpoint/checkpoint_model.pth
  log: ./train.log
  # null to disable compuate accuracy, or change to number of sample to enable validiation while training
  metrics: 49228
  test_metrics: 28918
  pretrained: false

dataset:    
  # path to image
  data_root: /mnt/disk3/CGGANv2
  # path to annotation
  train_annotation: datasets/labels/train.txt
  valid_annotation: datasets/labels/valid.txt
  test_annotation: datasets/labels/test.txt
  # path to lmdb datasets
  train_lmdb: datasets/lmdb/train
  valid_lmdb: datasets/lmdb/valid
  test_lmdb: datasets/lmdb/test

  # resize image to 32 height, larger height will increase accuracy
  image_height: 32
  image_min_width: 32
  image_max_width: 512

dataloader:
  num_workers: 12
  pin_memory: true

aug:
  image_aug: false
  masked_language_model: false

predictor:
  # disable or enable beamsearch while prediction, use beamsearch will be slower
  beamsearch: false

quiet: false

# for train
pretrain: https://vocr.vn/data/vietocr/vgg_seq2seq.pth

# url or local path (for predict)
weights: https://vocr.vn/data/vietocr/vgg_seq2seq.pth

backbone: vgg19_bn
cnn:
  # pooling stride size
  ss:
    - [2, 2]
    - [2, 2]
    - [2, 1]
    - [2, 1]
    - [1, 1]         
  # pooling kernel size 
  ks:
    - [2, 2]
    - [2, 2]
    - [2, 1]
    - [2, 1]
    - [1, 1]
  # dim of ouput feature map
  hidden: 256