VietOCR / vgg-seq2seq.yaml
nhay103's picture
add app
113bcef
raw
history blame
2.41 kB
project: vietocr_new
name: Train
device: cuda:0
# change to list chars of your dataset or use default vietnamese chars
vocab: 'aAàÀảẢãÃáÁạẠăĂằẰẳẲẵẴắẮặẶâÂầẦẩẨẫẪấẤậẬbBcCdDđĐeEèÈẻẺẽẼéÉẹẸêÊềỀểỂễỄếẾệỆfFgGhHiIìÌỉỈĩĨíÍịỊjJkKlLmMnNoOòÒỏỎõÕóÓọỌôÔồỒổỔỗỖốỐộỘơƠờỜởỞỡỠớỚợỢpPqQrRsStTuUùÙủỦũŨúÚụỤưƯừỪửỬữỮứỨựỰvVwWxXyYỳỲỷỶỹỸýÝỵỴzZ0123456789!"#$%&''()*+,-./:;<=>?@[\]^_`{|}~ '
seq_modeling: seq2seq
transformer:
encoder_hidden: 256
decoder_hidden: 256
img_channel: 256
decoder_embedded: 256
dropout: 0.1
optimizer:
max_lr: 0.001
pct_start: 0.1
trainer:
batch_size: 128
print_every: 100
valid_every: 500
test_every: 500
iters: 10000
# where to save our model for prediction
export: weights/train_model.pth
checkpoint: ./checkpoint/checkpoint_model.pth
log: ./train.log
# null to disable compuate accuracy, or change to number of sample to enable validiation while training
metrics: 49228
test_metrics: 28918
pretrained: false
dataset:
# path to image
data_root: /mnt/disk3/CGGANv2
# path to annotation
train_annotation: datasets/labels/train.txt
valid_annotation: datasets/labels/valid.txt
test_annotation: datasets/labels/test.txt
# path to lmdb datasets
train_lmdb: datasets/lmdb/train
valid_lmdb: datasets/lmdb/valid
test_lmdb: datasets/lmdb/test
# resize image to 32 height, larger height will increase accuracy
image_height: 32
image_min_width: 32
image_max_width: 512
dataloader:
num_workers: 12
pin_memory: true
aug:
image_aug: false
masked_language_model: false
predictor:
# disable or enable beamsearch while prediction, use beamsearch will be slower
beamsearch: false
quiet: false
# for train
pretrain: https://vocr.vn/data/vietocr/vgg_seq2seq.pth
# url or local path (for predict)
weights: https://vocr.vn/data/vietocr/vgg_seq2seq.pth
backbone: vgg19_bn
cnn:
# pooling stride size
ss:
- [2, 2]
- [2, 2]
- [2, 1]
- [2, 1]
- [1, 1]
# pooling kernel size
ks:
- [2, 2]
- [2, 2]
- [2, 1]
- [2, 1]
- [1, 1]
# dim of ouput feature map
hidden: 256