File size: 1,681 Bytes
07485f1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
{
    "dataset_reader": {
        "type": "ccqa",
        "lazy": true,
        "token_indexers": {
            "bert": {
                "type": "pretrained_transformer",
                "model_name": "roberta-base"
            }
        },
        "tokenizer": {
            "type": "pretrained_transformer",
            "add_special_tokens": false,
            "model_name": "roberta-base"
        }
    },
    "model": {
        "type": "basic_classifier_modified",
        "seq2vec_encoder": {
            "type": "bert_pooler",
            "override_weights_file": "PretrainedRoberta/checkpoint_best.pt",
            "pretrained_model": "roberta-base",
            "requires_grad": true
        },
        "text_field_embedder": {
            "type": "basic",
            "token_embedders": {
                "bert": {
                    "type": "pretrained_transformer",
                    "last_layer_only": false,
                    "model_name": "roberta-base",
                    "override_weights_file": "PretrainedRoberta/checkpoint_best.pt"
                }
            }
        }
    },
    "train_data_path": "SufficientCompanyData/train_df_balanced_1500_15000.tsv",
    "validation_data_path": "SufficientCompanyData/val_df_balanced_750_7500.tsv",
    "trainer": {
        "checkpointer": {
            "num_serialized_models_to_keep": 1
        },
        "cuda_device": 0,
        "grad_norm": 1,
        "num_epochs": 50,
        "optimizer": {
            "type": "huggingface_adamw",
            "lr": 2e-06,
            "weight_decay": 0.1
        },
        "validation_metric": "+accuracy"
    },
    "data_loader": {
        "batch_size": 8
    }
}