|
{ |
|
"query_token_id": "[unused0]", |
|
"doc_token_id": "[unused1]", |
|
"query_token": "[Q]", |
|
"doc_token": "[D]", |
|
"ncells": null, |
|
"centroid_score_threshold": null, |
|
"ndocs": null, |
|
"only_approx": false, |
|
"index_path": null, |
|
"nbits": 1, |
|
"kmeans_niters": 4, |
|
"resume": false, |
|
"max_sampled_pid": -1, |
|
"max_num_partitions": -1, |
|
"use_lagacy_build_ivf": false, |
|
"reuse_centroids_from": null, |
|
"similarity": "cosine", |
|
"bsize": 8, |
|
"accumsteps": 1, |
|
"lr": 5e-6, |
|
"maxsteps": 400000, |
|
"save_every": null, |
|
"resume_optimizer": false, |
|
"fix_broken_optimizer_state": false, |
|
"warmup": null, |
|
"warmup_bert": null, |
|
"relu": false, |
|
"nway": 6, |
|
"n_query_alternative": 1, |
|
"use_ib_negatives": false, |
|
"kd_loss": "KLD", |
|
"reranker": false, |
|
"distillation_alpha": 1.0, |
|
"ignore_scores": false, |
|
"model_name": "xlm-roberta-large", |
|
"force_resize_embeddings": true, |
|
"shuffle_passages": true, |
|
"sampling_max_beta": 1.0, |
|
"over_one_epoch": true, |
|
"multilang": false, |
|
"nolangreg": true, |
|
"query_maxlen": 32, |
|
"attend_to_mask_tokens": false, |
|
"interaction": "colbert", |
|
"dim": 128, |
|
"doc_maxlen": 220, |
|
"mask_punctuation": true, |
|
"checkpoint": "xlm-roberta-large", |
|
"triples": "\/expscratch\/eyang\/workspace\/plaid-aux\/training_triples\/msmarco-passages\/triples_mt5xxl-monot5-mmarco-engeng.jsonl", |
|
"collection": "Combination(elements)[.\/mt_msmarco\/de\/collection_passages.tsv+.\/mt_msmarco\/es\/collection_passages.tsv+.\/mt_msmarco\/fr\/collection_passages.tsv]", |
|
"queries": "irds:msmarco-passage\/train:queries", |
|
"index_name": null, |
|
"debug": false, |
|
"overwrite": false, |
|
"root": "\/expscratch\/eyang\/workspace\/plaid-aux\/experiments", |
|
"experiment": "mtt-tdistill", |
|
"index_root": null, |
|
"name": "multi-KLD-shuf-5e-6\/mt5xxl-monot5-mmarco-engeng.de+es+fr\/64bat.6way", |
|
"rank": 0, |
|
"nranks": 8, |
|
"amp": true, |
|
"ivf_num_processes": 20, |
|
"ivf_use_tempdir": false, |
|
"ivf_merging_ways": 2, |
|
"gpus": 8, |
|
"meta": { |
|
"hostname": "r8n06", |
|
"git_branch": "eugene-training", |
|
"git_hash": "683c265c010eb72e00617bdb6771b333acfa0191", |
|
"git_commit_datetime": "2024-01-04 14:11:31-05:00", |
|
"current_datetime": "Jan 19, 2024 ; 2:10AM EST (-0500)", |
|
"cmd": "train.py --model_name xlm-roberta-large --training_triples \/expscratch\/eyang\/workspace\/plaid-aux\/training_triples\/msmarco-passages\/triples_mt5xxl-monot5-mmarco-engeng.jsonl --training_queries msmarco-passage\/train --training_collection .\/mt_msmarco\/de\/collection_passages.tsv .\/mt_msmarco\/es\/collection_passages.tsv .\/mt_msmarco\/fr\/collection_passages.tsv --training_collection_mixing elements --other_args nolangreg=True --maxsteps 400000 --learning_rate 5e-6 --kd_loss KLD --per_device_batch_size 8 --nway 6 --run_tag multi-KLD-shuf-5e-6\/mt5xxl-monot5-mmarco-engeng.de+es+fr --experiment mtt-tdistill", |
|
"version": "colbert-v0.4" |
|
} |
|
} |
|
|