trainer: "sts" model_name: "bertimbau-100m-europarl-eubookshop-ted2020-tatoeba-ct1-nli-gist10-sts-cosent20-v1" base_model_name: "bertimbau-100m-europarl-eubookshop-ted2020-tatoeba-ct1-nli-gist10-v1" loss_function: "cosent" seed: 1 # best seed for this model from first 30 seeds learning_rate: 1e-5 warmup_ratio: 0.1 weight_decay: 0.01 batch_size: 64 use_amp: True epochs: 20 validations_per_epoch: 10 # HPs used by JRodrigues to train albertina-100m-portuguese-ptpt-encoder: # learning_rate 1e-5 # lr_scheduler_type linear # weight_decay 0.01 # per_device_train_batch_size 192 # gradient_accumulation_steps 1 # num_train_epochs 150 # num_warmup_steps 10000