project: vietocr_new name: Train device: cuda:0 # change to list chars of your dataset or use default vietnamese chars vocab: 'aAàÀảẢãÃáÁạẠăĂằẰẳẲẵẴắẮặẶâÂầẦẩẨẫẪấẤậẬbBcCdDđĐeEèÈẻẺẽẼéÉẹẸêÊềỀểỂễỄếẾệỆfFgGhHiIìÌỉỈĩĨíÍịỊjJkKlLmMnNoOòÒỏỎõÕóÓọỌôÔồỒổỔỗỖốỐộỘơƠờỜởỞỡỠớỚợỢpPqQrRsStTuUùÙủỦũŨúÚụỤưƯừỪửỬữỮứỨựỰvVwWxXyYỳỲỷỶỹỸýÝỵỴzZ0123456789!"#$%&''()*+,-./:;<=>?@[\]^_`{|}~ ' seq_modeling: seq2seq transformer: encoder_hidden: 256 decoder_hidden: 256 img_channel: 256 decoder_embedded: 256 dropout: 0.1 optimizer: max_lr: 0.001 pct_start: 0.1 trainer: batch_size: 128 print_every: 100 valid_every: 500 test_every: 500 iters: 10000 # where to save our model for prediction export: weights/train_model.pth checkpoint: ./checkpoint/checkpoint_model.pth log: ./train.log # null to disable compuate accuracy, or change to number of sample to enable validiation while training metrics: 49228 test_metrics: 28918 pretrained: false dataset: # path to image data_root: /mnt/disk3/CGGANv2 # path to annotation train_annotation: datasets/labels/train.txt valid_annotation: datasets/labels/valid.txt test_annotation: datasets/labels/test.txt # path to lmdb datasets train_lmdb: datasets/lmdb/train valid_lmdb: datasets/lmdb/valid test_lmdb: datasets/lmdb/test # resize image to 32 height, larger height will increase accuracy image_height: 32 image_min_width: 32 image_max_width: 512 dataloader: num_workers: 12 pin_memory: true aug: image_aug: false masked_language_model: false predictor: # disable or enable beamsearch while prediction, use beamsearch will be slower beamsearch: false quiet: false # for train pretrain: https://vocr.vn/data/vietocr/vgg_seq2seq.pth # url or local path (for predict) weights: https://vocr.vn/data/vietocr/vgg_seq2seq.pth backbone: vgg19_bn cnn: # pooling stride size ss: - [2, 2] - [2, 2] - [2, 1] - [2, 1] - [1, 1] # pooling kernel size ks: - [2, 2] - [2, 2] - [2, 1] - [2, 1] - [1, 1] # dim of ouput feature map hidden: 256