trainer: "gist" model_name: "bertimbau-335m-mmarco-pairs-gist1-v1" base_model_name: "bertimbau-335m-europarl-eubookshop-ted2020-tatoeba-ct1-nli-gist10-sts-angle20-v3" guide_model_name: "bertimbau-100m-europarl-eubookshop-ted2020-tatoeba-ct1-nli-gist10-sts-cosent20-v1" validation_ir: True validation_ir_corpus_size: 50000 # validation_ir_corpus_size: 500 # see https://huggingface.co/docs/datasets/v2.18.0/en/about_dataset_load train_dataset_configs: - alias: "mmarco" path: "unicamp-dl/mmarco" name: "portuguese" split: "train" # split: "train[1000:2000]" examples_are_triples: False examples_are_labelled: False seed: 1 learning_rate: 1e-5 warmup_ratio: 0.1 weight_decay: 0.01 # batch_size: 100 # 100 fits very tightly (40GB used), could crash on batches of longer texts batch_size: 85 # 85 uses up to 37.5GB out of 40GB use_amp: True epochs: 1 # validations_per_epoch: 1 validations_per_epoch: 100