trainer: "sts" | |
model_name: "bertimbau-100m-europarl-eubookshop-ted2020-tatoeba-ct1-nli-gist10-sts-cosent20-v1" | |
base_model_name: "bertimbau-100m-europarl-eubookshop-ted2020-tatoeba-ct1-nli-gist10-v1" | |
loss_function: "cosent" | |
seed: 1 # best seed for this model from first 30 seeds | |
learning_rate: 1e-5 | |
warmup_ratio: 0.1 | |
weight_decay: 0.01 | |
batch_size: 64 | |
use_amp: True | |
epochs: 20 | |
validations_per_epoch: 10 | |
# HPs used by JRodrigues to train albertina-100m-portuguese-ptpt-encoder: | |
# learning_rate 1e-5 | |
# lr_scheduler_type linear | |
# weight_decay 0.01 | |
# per_device_train_batch_size 192 | |
# gradient_accumulation_steps 1 | |
# num_train_epochs 150 | |
# num_warmup_steps 10000 | |