File size: 3,040 Bytes
acce984 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 |
{
"query_token_id": "[unused0]",
"doc_token_id": "[unused1]",
"query_token": "[Q]",
"doc_token": "[D]",
"ncells": null,
"centroid_score_threshold": null,
"ndocs": null,
"only_approx": false,
"index_path": null,
"nbits": 1,
"kmeans_niters": 4,
"resume": false,
"max_sampled_pid": -1,
"max_num_partitions": -1,
"use_lagacy_build_ivf": false,
"reuse_centroids_from": null,
"similarity": "cosine",
"bsize": 8,
"accumsteps": 1,
"lr": 5e-6,
"maxsteps": 400000,
"save_every": null,
"resume_optimizer": false,
"fix_broken_optimizer_state": false,
"warmup": null,
"warmup_bert": null,
"relu": false,
"nway": 6,
"n_query_alternative": 1,
"use_ib_negatives": false,
"kd_loss": "KLD",
"reranker": false,
"distillation_alpha": 1.0,
"ignore_scores": false,
"model_name": "xlm-roberta-large",
"force_resize_embeddings": true,
"shuffle_passages": true,
"sampling_max_beta": 1.0,
"over_one_epoch": true,
"multilang": false,
"nolangreg": true,
"query_maxlen": 32,
"attend_to_mask_tokens": false,
"interaction": "colbert",
"dim": 128,
"doc_maxlen": 220,
"mask_punctuation": true,
"checkpoint": "xlm-roberta-large",
"triples": "\/expscratch\/eyang\/workspace\/plaid-aux\/training_triples\/msmarco-passages\/triples_mt5xxl-monot5-mmarco-engeng.jsonl",
"collection": "Combination(elements)[.\/mt_msmarco\/de\/collection_passages.tsv+.\/mt_msmarco\/es\/collection_passages.tsv+.\/mt_msmarco\/fr\/collection_passages.tsv]",
"queries": "irds:msmarco-passage\/train:queries",
"index_name": null,
"debug": false,
"overwrite": false,
"root": "\/expscratch\/eyang\/workspace\/plaid-aux\/experiments",
"experiment": "mtt-tdistill",
"index_root": null,
"name": "multi-KLD-shuf-5e-6\/mt5xxl-monot5-mmarco-engeng.de+es+fr\/64bat.6way",
"rank": 0,
"nranks": 8,
"amp": true,
"ivf_num_processes": 20,
"ivf_use_tempdir": false,
"ivf_merging_ways": 2,
"gpus": 8,
"meta": {
"hostname": "r8n06",
"git_branch": "eugene-training",
"git_hash": "683c265c010eb72e00617bdb6771b333acfa0191",
"git_commit_datetime": "2024-01-04 14:11:31-05:00",
"current_datetime": "Jan 19, 2024 ; 2:10AM EST (-0500)",
"cmd": "train.py --model_name xlm-roberta-large --training_triples \/expscratch\/eyang\/workspace\/plaid-aux\/training_triples\/msmarco-passages\/triples_mt5xxl-monot5-mmarco-engeng.jsonl --training_queries msmarco-passage\/train --training_collection .\/mt_msmarco\/de\/collection_passages.tsv .\/mt_msmarco\/es\/collection_passages.tsv .\/mt_msmarco\/fr\/collection_passages.tsv --training_collection_mixing elements --other_args nolangreg=True --maxsteps 400000 --learning_rate 5e-6 --kd_loss KLD --per_device_batch_size 8 --nway 6 --run_tag multi-KLD-shuf-5e-6\/mt5xxl-monot5-mmarco-engeng.de+es+fr --experiment mtt-tdistill",
"version": "colbert-v0.4"
}
}
|