# change to list chars of your dataset or use default vietnamese chars
vocab: 'aAàÀảẢãÃáÁạẠăĂằẰẳẲẵẴắẮặẶâÂầẦẩẨẫẪấẤậẬbBcCdDđĐeEèÈẻẺẽẼéÉẹẸêÊềỀểỂễỄếẾệỆfFgGhHiIìÌỉỈĩĨíÍịỊjJkKlLmMnNoOòÒỏỎõÕóÓọỌôÔồỒổỔỗỖốỐộỘơƠờỜởỞỡỠớỚợỢpPqQrRsStTuUùÙủỦũŨúÚụỤưƯừỪửỬữỮứỨựỰvVwWxXyYỳỲỷỶỹỸýÝỵỴzZ0123456789!"#$%&''()*+,-./:;<=>?@[\]^_`{|}~ '

# cpu, cuda, cuda:0
device: cuda:0

seq_modeling: transformer
transformer:  
    d_model: 256
    nhead: 8
    num_encoder_layers: 6
    num_decoder_layers: 6
    dim_feedforward: 2048
    max_seq_length: 1024
    pos_dropout: 0.1
    trans_dropout: 0.1

optimizer:
    max_lr: 0.0003 
    pct_start: 0.1

trainer:
    batch_size: 32
    print_every: 200
    valid_every: 4000
    iters: 100000
    # where to save our model for prediction
    export: ./weights/transformerocr.pth
    checkpoint: ./checkpoint/transformerocr_checkpoint.pth
    log: ./train.log
    # null to disable compuate accuracy, or change to number of sample to enable validiation while training
    metrics: null

dataset:    
    # name of your dataset
    name: data
    # path to annotation and image
    data_root: ./img/
    train_annotation: annotation_train.txt
    valid_annotation: annotation_val_small.txt
    # resize image to 32 height, larger height will increase accuracy
    image_height: 32
    image_min_width: 32
    image_max_width: 512

dataloader:
    num_workers: 3
    pin_memory: True

aug:
    image_aug: true
    masked_language_model: true

predictor:
    # disable or enable beamsearch while prediction, use beamsearch will be slower
    beamsearch: False

quiet: False