File size: 1,809 Bytes
bd22b5e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
# change to list chars of your dataset or use default vietnamese chars
vocab: 'aAร ร€แบฃแบขรฃรƒรกรแบกแบ ฤƒฤ‚แบฑแบฐแบณแบฒแบตแบดแบฏแบฎแบทแบถรขร‚แบงแบฆแบฉแบจแบซแบชแบฅแบคแบญแบฌbBcCdDฤ‘ฤeEรจรˆแบปแบบแบฝแบผรฉร‰แบนแบธรชรŠแปแป€แปƒแป‚แป…แป„แบฟแบพแป‡แป†fFgGhHiIรฌรŒแป‰แปˆฤฉฤจรญรแป‹แปŠjJkKlLmMnNoOรฒร’แปแปŽรตร•รณร“แปแปŒรดร”แป“แป’แป•แป”แป—แป–แป‘แปแป™แป˜ฦกฦ แปแปœแปŸแปžแปกแป แป›แปšแปฃแปขpPqQrRsStTuUรนร™แปงแปฆลฉลจรบรšแปฅแปคฦฐฦฏแปซแปชแปญแปฌแปฏแปฎแปฉแปจแปฑแปฐvVwWxXyYแปณแปฒแปทแปถแปนแปธรฝรแปตแปดzZ0123456789!"#$%&''()*+,-./:;<=>?@[\]^_`{|}~ '

# cpu, cuda, cuda:0
device: cuda:0

seq_modeling: transformer
transformer:  
    d_model: 256
    nhead: 8
    num_encoder_layers: 6
    num_decoder_layers: 6
    dim_feedforward: 2048
    max_seq_length: 1024
    pos_dropout: 0.1
    trans_dropout: 0.1

optimizer:
    max_lr: 0.0003 
    pct_start: 0.1

trainer:
    batch_size: 32
    print_every: 200
    valid_every: 4000
    iters: 100000
    # where to save our model for prediction
    export: ./weights/transformerocr.pth
    checkpoint: ./checkpoint/transformerocr_checkpoint.pth
    log: ./train.log
    # null to disable compuate accuracy, or change to number of sample to enable validiation while training
    metrics: null

dataset:    
    # name of your dataset
    name: data
    # path to annotation and image
    data_root: ./img/
    train_annotation: annotation_train.txt
    valid_annotation: annotation_val_small.txt
    # resize image to 32 height, larger height will increase accuracy
    image_height: 32
    image_min_width: 32
    image_max_width: 512

dataloader:
    num_workers: 3
    pin_memory: True

aug:
    image_aug: true
    masked_language_model: true

predictor:
    # disable or enable beamsearch while prediction, use beamsearch will be slower
    beamsearch: False

quiet: False