# lightning.pytorch==2.3.3 seed_everything: 0 trainer: precision: bf16-mixed max_steps: 50000 data: class_path: lightning_ir.LightningIRDataModule init_args: num_workers: 1 train_batch_size: 64 shuffle_train: true train_dataset: class_path: lightning_ir.RunDataset init_args: run_path_or_id: msmarco-passage/train/rank-distillm/set-encoder depth: 100 sample_size: 8 sampling_strategy: log_random targets: score normalize_targets: false model: class_path: lightning_ir.BiEncoderModule init_args: model_name_or_path: bert-base-uncased config: class_path: lightning_ir.ColConfig init_args: similarity_function: dot query_expansion: true attend_to_query_expanded_tokens: true query_mask_scoring_tokens: null doc_mask_scoring_tokens: punctuation query_aggregation_function: mean normalize: false add_marker_tokens: false embedding_dim: 128 projection: linear query_pooling_strategy: mean doc_expansion: false attend_to_doc_expanded_tokens: false doc_pooling_strategy: mean sparsification: null query_length: 32 doc_length: 256 loss_functions: - class_path: lightning_ir.SupervisedMarginMSE - class_path: lightning_ir.KLDivergence - class_path: lightning_ir.InBatchCrossEntropy init_args: pos_sampling_technique: first neg_sampling_technique: first max_num_neg_samples: 8 optimizer: class_path: torch.optim.AdamW init_args: lr: 2.0e-05 lr_scheduler: class_path: lightning_ir.LinearLRSchedulerWithLinearWarmup init_args: num_warmup_steps: 5000 final_value: 0.02 num_delay_steps: 0