{ "T": 0.1, "architectures": [ "BertModel" ], "attention_probs_dropout_prob": 0.1, "augmentation": "none", "beta1": 0.9, "beta2": 0.98, "bi_encoder": false, "chunk_length": 256, "classifier_dropout": null, "continue_training": false, "contrastive_mode": "moco", "dropout": 0.1, "eps": 1e-06, "eval_data": [ "minilm_denoised_T0_32_datasets_fixed_instruction_unfollowing_dev.jsonl" ], "eval_datasets": [], "eval_datasets_dir": "./", "eval_freq": 2000, "eval_normalize_text": false, "freeze_ctx_encoder": false, "global_rank": 0, "hard_order": false, "hidden_act": "gelu", "hidden_dropout_prob": 0.1, "hidden_size": 768, "initializer_range": 0.02, "intermediate_size": 3072, "kd": false, "label_smoothing": 0.0, "layer_norm_eps": 1e-12, "loading_mode": "split", "local_rank": 0, "log_freq": 100, "loss_type": "kl", "lower_case": false, "lr": 1e-05, "lr_min_ratio": 0.0, "main_addr": "learnfair7603", "main_port": 15972, "max_position_embeddings": 512, "maxload": null, "moco_train_mode_encoder_k": false, "model_path": "/checkpoint/akariasai/contriever/contriever_wiki2020_ft_msmarco/checkpoint/step-10000/", "model_type": "bert", "momentum": 0.999, "n_context": 50, "negative_ctxs": 5, "negative_hard_min_idx": 0, "negative_hard_ratio": 0.1, "norm_doc": false, "norm_query": false, "num_attention_heads": 12, "num_hidden_layers": 12, "num_workers": 5, "optim": "adamw", "output_dir": "/checkpoint/akariasai/contriever/instruction_unfollowing_full_data_from_new_checkpoints_5_0.1", "pad_token_id": 0, "per_gpu_batch_size": 16, "per_gpu_eval_batch_size": 16, "pooling": "average", "position_embedding_type": "absolute", "prob_augmentation": 0.0, "projection_size": 768, "queue_size": 65536, "random_init": false, "random_sort": false, "ratio_max": 0.5, "ratio_min": 0.1, "retriever_model_id": "bert-base-uncased", "rho": 0.05, "sampling_coefficient": 0.0, "save_freq": 2000, "scheduler": "linear", "score_function": "dot", "seed": 0, "temperature": 0.05, "torch_dtype": "float32", "total_steps": 20000, "train_data": [ "minilm_denoised_T0_32_datasets_fixed_instruction_unfollowing_train.jsonl" ], "transformers_version": "4.36.2", "type_vocab_size": 2, "use_cache": true, "vocab_size": 30522, "warmup_steps": 1000, "weight_decay": 0.01, "world_size": 64 }