File size: 2,505 Bytes
220e845
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
{
    "query_token_id": "[unused0]",
    "doc_token_id": "[unused1]",
    "query_token": "[Q]",
    "doc_token": "[D]",
    "ncells": null,
    "centroid_score_threshold": null,
    "ndocs": null,
    "index_path": null,
    "nbits": 1,
    "kmeans_niters": 4,
    "resume": false,
    "max_sampled_pid": -1,
    "max_num_partitions": -1,
    "use_lagacy_build_ivf": false,
    "reuse_centroids_from": null,
    "similarity": "cosine",
    "bsize": 8,
    "accumsteps": 1,
    "lr": 5e-6,
    "maxsteps": 200000,
    "save_every": null,
    "resume_optimizer": false,
    "fix_broken_optimizer_state": false,
    "warmup": null,
    "warmup_bert": null,
    "relu": false,
    "nway": 6,
    "n_query_alternative": 1,
    "use_ib_negatives": false,
    "kd_loss": "KLD",
    "reranker": false,
    "distillation_alpha": 1.0,
    "ignore_scores": false,
    "model_name": "xlm-roberta-large",
    "force_resize_embeddings": true,
    "shuffle_passages": true,
    "sampling_max_beta": 1.0,
    "over_one_epoch": true,
    "query_maxlen": 32,
    "attend_to_mask_tokens": false,
    "interaction": "colbert",
    "dim": 128,
    "doc_maxlen": 220,
    "mask_punctuation": true,
    "checkpoint": "xlm-roberta-large",
    "triples": "\/expscratch\/eyang\/workspace\/plaid-aux\/training_triples\/msmarco-passages\/triples_mt5xxl-monot5-mmarco-engeng.jsonl",
    "collection": null,
    "queries": null,
    "index_name": null,
    "overwrite": false,
    "root": "\/expscratch\/eyang\/workspace\/plaid-aux\/experiments",
    "experiment": "plaid_xlm-roberta-large_fixeddp",
    "index_root": null,
    "name": "rus-KLD-shuf-5e-6\/mt5xxl-monot5-mmarco-engeng\/64bat.6way",
    "rank": 0,
    "nranks": 8,
    "amp": true,
    "ivf_num_processes": 20,
    "gpus": 8,
    "meta": {
        "hostname": "r10n05",
        "git_branch": "eugene-training",
        "git_hash": "220bdf8155f0615e1bb2d997d8efe924ffe806b5",
        "git_commit_datetime": "2023-09-29 16:36:59-04:00",
        "current_datetime": "Oct 04, 2023 ;  1:17AM EDT (-0400)",
        "cmd": "train.py --model_name xlm-roberta-large --training_triples \/expscratch\/eyang\/workspace\/plaid-aux\/training_triples\/msmarco-passages\/triples_mt5xxl-monot5-mmarco-engeng.jsonl --training_irds_id neumarco\/ru\/train --maxsteps 200000 --learning_rate 5e-6 --kd_loss KLD --per_device_batch_size 8 --nway 6 --run_tag rus-KLD-shuf-5e-6\/mt5xxl-monot5-mmarco-engeng --experiment plaid_xlm-roberta-large_fixeddp",
        "version": "colbert-v0.4"
    }
}