File size: 1,506 Bytes
626eca0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
shared_params:
  passages_path: null
  max_passage_length: 64
  prefetch_batches: True
  use_topics: False

datamodule:
  _target_: goldenretriever.lightning_modules.pl_data_modules.PLDataModule
  tokenizer: ${model.language_model}
  datasets:
    train:
      _target_: goldenretriever.data.dpr.datasets.InBatchNegativesDPRDataset
      name: "train"
      path: null
      passages_path: ${data.shared_params.passages_path}
      max_passage_length: ${data.shared_params.max_passage_length}
      prefetch_batches: ${data.shared_params.prefetch_batches}
      subsample: null
      shuffle: True
      use_topics: ${data.shared_params.use_topics}

    val:
      - _target_: goldenretriever.data.dpr.datasets.InBatchNegativesDPRDataset
        name: "val"
        path: null
        passages_path: ${data.shared_params.passages_path}
        max_passage_length: ${data.shared_params.max_passage_length}
        prefetch_batches: ${data.shared_params.prefetch_batches}
        use_topics: ${data.shared_params.use_topics}

    test:
      - _target_: goldenretriever.data.dpr.datasets.InBatchNegativesDPRDataset
        name: "test"
        path: null
        passages_path: ${data.shared_params.passages_path}
        max_passage_length: ${data.shared_params.max_passage_length}
        prefetch_batches: ${data.shared_params.prefetch_batches}
        use_topics: ${data.shared_params.use_topics}

  batch_sizes:
    train: 64
    val: 64
    test: 64

  num_workers:
    train: 4
    val: 4
    test: 4