Transformers
PyTorch
xlm-roberta
clir
colbertx
plaidx
xlm-roberta-large
Inference Endpoints
File size: 3,040 Bytes
acce984
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
{
    "query_token_id": "[unused0]",
    "doc_token_id": "[unused1]",
    "query_token": "[Q]",
    "doc_token": "[D]",
    "ncells": null,
    "centroid_score_threshold": null,
    "ndocs": null,
    "only_approx": false,
    "index_path": null,
    "nbits": 1,
    "kmeans_niters": 4,
    "resume": false,
    "max_sampled_pid": -1,
    "max_num_partitions": -1,
    "use_lagacy_build_ivf": false,
    "reuse_centroids_from": null,
    "similarity": "cosine",
    "bsize": 8,
    "accumsteps": 1,
    "lr": 5e-6,
    "maxsteps": 400000,
    "save_every": null,
    "resume_optimizer": false,
    "fix_broken_optimizer_state": false,
    "warmup": null,
    "warmup_bert": null,
    "relu": false,
    "nway": 6,
    "n_query_alternative": 1,
    "use_ib_negatives": false,
    "kd_loss": "KLD",
    "reranker": false,
    "distillation_alpha": 1.0,
    "ignore_scores": false,
    "model_name": "xlm-roberta-large",
    "force_resize_embeddings": true,
    "shuffle_passages": true,
    "sampling_max_beta": 1.0,
    "over_one_epoch": true,
    "multilang": false,
    "nolangreg": true,
    "query_maxlen": 32,
    "attend_to_mask_tokens": false,
    "interaction": "colbert",
    "dim": 128,
    "doc_maxlen": 220,
    "mask_punctuation": true,
    "checkpoint": "xlm-roberta-large",
    "triples": "\/expscratch\/eyang\/workspace\/plaid-aux\/training_triples\/msmarco-passages\/triples_mt5xxl-monot5-mmarco-engeng.jsonl",
    "collection": "Combination(elements)[.\/mt_msmarco\/de\/collection_passages.tsv+.\/mt_msmarco\/es\/collection_passages.tsv+.\/mt_msmarco\/fr\/collection_passages.tsv]",
    "queries": "irds:msmarco-passage\/train:queries",
    "index_name": null,
    "debug": false,
    "overwrite": false,
    "root": "\/expscratch\/eyang\/workspace\/plaid-aux\/experiments",
    "experiment": "mtt-tdistill",
    "index_root": null,
    "name": "multi-KLD-shuf-5e-6\/mt5xxl-monot5-mmarco-engeng.de+es+fr\/64bat.6way",
    "rank": 0,
    "nranks": 8,
    "amp": true,
    "ivf_num_processes": 20,
    "ivf_use_tempdir": false,
    "ivf_merging_ways": 2,
    "gpus": 8,
    "meta": {
        "hostname": "r8n06",
        "git_branch": "eugene-training",
        "git_hash": "683c265c010eb72e00617bdb6771b333acfa0191",
        "git_commit_datetime": "2024-01-04 14:11:31-05:00",
        "current_datetime": "Jan 19, 2024 ;  2:10AM EST (-0500)",
        "cmd": "train.py --model_name xlm-roberta-large --training_triples \/expscratch\/eyang\/workspace\/plaid-aux\/training_triples\/msmarco-passages\/triples_mt5xxl-monot5-mmarco-engeng.jsonl --training_queries msmarco-passage\/train --training_collection .\/mt_msmarco\/de\/collection_passages.tsv .\/mt_msmarco\/es\/collection_passages.tsv .\/mt_msmarco\/fr\/collection_passages.tsv --training_collection_mixing elements --other_args nolangreg=True --maxsteps 400000 --learning_rate 5e-6 --kd_loss KLD --per_device_batch_size 8 --nway 6 --run_tag multi-KLD-shuf-5e-6\/mt5xxl-monot5-mmarco-engeng.de+es+fr --experiment mtt-tdistill",
        "version": "colbert-v0.4"
    }
}