orionweller's picture
tart for sentence-bert form
f04aef7
{
"T": 0.1,
"architectures": [
"BertModel"
],
"attention_probs_dropout_prob": 0.1,
"augmentation": "none",
"beta1": 0.9,
"beta2": 0.98,
"bi_encoder": false,
"chunk_length": 256,
"classifier_dropout": null,
"continue_training": false,
"contrastive_mode": "moco",
"dropout": 0.1,
"eps": 1e-06,
"eval_data": [
"minilm_denoised_T0_32_datasets_fixed_instruction_unfollowing_dev.jsonl"
],
"eval_datasets": [],
"eval_datasets_dir": "./",
"eval_freq": 2000,
"eval_normalize_text": false,
"freeze_ctx_encoder": false,
"global_rank": 0,
"hard_order": false,
"hidden_act": "gelu",
"hidden_dropout_prob": 0.1,
"hidden_size": 768,
"initializer_range": 0.02,
"intermediate_size": 3072,
"kd": false,
"label_smoothing": 0.0,
"layer_norm_eps": 1e-12,
"loading_mode": "split",
"local_rank": 0,
"log_freq": 100,
"loss_type": "kl",
"lower_case": false,
"lr": 1e-05,
"lr_min_ratio": 0.0,
"main_addr": "learnfair7603",
"main_port": 15972,
"max_position_embeddings": 512,
"maxload": null,
"moco_train_mode_encoder_k": false,
"model_path": "/checkpoint/akariasai/contriever/contriever_wiki2020_ft_msmarco/checkpoint/step-10000/",
"model_type": "bert",
"momentum": 0.999,
"n_context": 50,
"negative_ctxs": 5,
"negative_hard_min_idx": 0,
"negative_hard_ratio": 0.1,
"norm_doc": false,
"norm_query": false,
"num_attention_heads": 12,
"num_hidden_layers": 12,
"num_workers": 5,
"optim": "adamw",
"output_dir": "/checkpoint/akariasai/contriever/instruction_unfollowing_full_data_from_new_checkpoints_5_0.1",
"pad_token_id": 0,
"per_gpu_batch_size": 16,
"per_gpu_eval_batch_size": 16,
"pooling": "average",
"position_embedding_type": "absolute",
"prob_augmentation": 0.0,
"projection_size": 768,
"queue_size": 65536,
"random_init": false,
"random_sort": false,
"ratio_max": 0.5,
"ratio_min": 0.1,
"retriever_model_id": "bert-base-uncased",
"rho": 0.05,
"sampling_coefficient": 0.0,
"save_freq": 2000,
"scheduler": "linear",
"score_function": "dot",
"seed": 0,
"temperature": 0.05,
"torch_dtype": "float32",
"total_steps": 20000,
"train_data": [
"minilm_denoised_T0_32_datasets_fixed_instruction_unfollowing_train.jsonl"
],
"transformers_version": "4.36.2",
"type_vocab_size": 2,
"use_cache": true,
"vocab_size": 30522,
"warmup_steps": 1000,
"weight_decay": 0.01,
"world_size": 64
}