File size: 1,907 Bytes
084226a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
{
    "dataset_reader": {
        "type": "snli",
        "token_indexers": {
            "tokens": {
                "type": "pretrained_transformer",
                "max_length": 512,
                "model_name": "roberta-large"
            }
        },
        "tokenizer": {
            "type": "pretrained_transformer",
            "add_special_tokens": false,
            "model_name": "roberta-large"
        }
    },
    "model": {
        "type": "basic_classifier",
        "dropout": 0.1,
        "feedforward": {
            "activations": "tanh",
            "hidden_dims": 1024,
            "input_dim": 1024,
            "num_layers": 1
        },
        "namespace": "tags",
        "seq2vec_encoder": {
            "type": "cls_pooler",
            "embedding_dim": 1024
        },
        "text_field_embedder": {
            "token_embedders": {
                "tokens": {
                    "type": "pretrained_transformer",
                    "max_length": 512,
                    "model_name": "roberta-large"
                }
            }
        }
    },
    "train_data_path": "https://allennlp.s3.amazonaws.com/datasets/multinli/multinli_1.0_train.jsonl",
    "validation_data_path": "https://allennlp.s3.amazonaws.com/datasets/multinli/multinli_1.0_dev_matched.jsonl",
    "test_data_path": "https://allennlp.s3.amazonaws.com/datasets/multinli/multinli_1.0_dev_mismatched.jsonl",
    "trainer": {
        "cuda_device": 0,
        "learning_rate_scheduler": {
            "type": "slanted_triangular",
            "cut_frac": 0.06
        },
        "num_epochs": 3,
        "optimizer": {
            "type": "huggingface_adamw",
            "lr": 2e-06,
            "weight_decay": 0.1
        },
        "validation_metric": "+accuracy"
    },
    "data_loader": {
        "batch_sampler": {
            "type": "bucket",
            "batch_size": 16
        }
    }
}