janbakker commited on
Commit
92f0919
1 Parent(s): 819b848

Upload RobertaForContextualSequenceClassification

Browse files
Files changed (2) hide show
  1. config.json +124 -0
  2. pytorch_model.bin +3 -0
config.json ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "roberta-base",
3
+ "accelerator": null,
4
+ "accumulate_grad_batches": null,
5
+ "add_context": true,
6
+ "add_cross_attention": true,
7
+ "amp_backend": null,
8
+ "amp_level": null,
9
+ "architectures": [
10
+ "RobertaForContextualSequenceClassification"
11
+ ],
12
+ "attention_probs_dropout_prob": 0.1,
13
+ "auto_lr_find": false,
14
+ "auto_scale_batch_size": false,
15
+ "auto_select_gpus": null,
16
+ "batch_size": 32,
17
+ "benchmark": null,
18
+ "binary_clf": false,
19
+ "bos_token_id": 0,
20
+ "check_val_every_n_epoch": 1,
21
+ "checkpoint": "planning_models/classifier/f2012rua/checkpoints/epoch=7-step=53400.ckpt",
22
+ "ckpt_metric": "val_macro_f1",
23
+ "classifier_dropout": null,
24
+ "context_dir": "context/complex/train",
25
+ "context_doc_id": "pair_id",
26
+ "context_window": 13,
27
+ "default_root_dir": null,
28
+ "detect_anomaly": false,
29
+ "devices": "2",
30
+ "doc_pos_embeds": true,
31
+ "enable_checkpointing": true,
32
+ "enable_model_summary": true,
33
+ "enable_progress_bar": true,
34
+ "eos_token_id": 2,
35
+ "fast_dev_run": false,
36
+ "gpus": null,
37
+ "gradient_clip_algorithm": null,
38
+ "gradient_clip_val": null,
39
+ "hidden_act": "gelu",
40
+ "hidden_dropout_prob": 0.1,
41
+ "hidden_size": 768,
42
+ "id2label": {
43
+ "0": "LABEL_0",
44
+ "1": "LABEL_1",
45
+ "2": "LABEL_2",
46
+ "3": "LABEL_3"
47
+ },
48
+ "inference_mode": true,
49
+ "initializer_range": 0.02,
50
+ "intermediate_size": 3072,
51
+ "ipus": null,
52
+ "label2id": {
53
+ "LABEL_0": 0,
54
+ "LABEL_1": 1,
55
+ "LABEL_2": 2,
56
+ "LABEL_3": 3
57
+ },
58
+ "layer_norm_eps": 1e-05,
59
+ "learning_rate": 1e-05,
60
+ "left_z_only": false,
61
+ "limit_predict_batches": null,
62
+ "limit_test_batches": null,
63
+ "limit_train_batches": null,
64
+ "limit_val_batches": null,
65
+ "log_class_acc": false,
66
+ "log_every_n_steps": 50,
67
+ "logger": true,
68
+ "lr_scheduler": false,
69
+ "max_epochs": 10,
70
+ "max_length": 128,
71
+ "max_position_embeddings": 514,
72
+ "max_samples": -1,
73
+ "max_steps": -1,
74
+ "max_time": null,
75
+ "min_epochs": null,
76
+ "min_steps": null,
77
+ "model_type": "context-roberta",
78
+ "move_metrics_to_cpu": false,
79
+ "multiple_trainloader_mode": "max_size_cycle",
80
+ "name": "dyn-init-docpos",
81
+ "no_context_pos": false,
82
+ "num_attention_heads": 12,
83
+ "num_hidden_layers": 12,
84
+ "num_nodes": 1,
85
+ "num_processes": null,
86
+ "num_sanity_val_steps": 2,
87
+ "overfit_batches": 0.0,
88
+ "pad_token_id": 1,
89
+ "plugins": null,
90
+ "position_embedding_type": "absolute",
91
+ "precision": 32,
92
+ "profiler": null,
93
+ "project": "planning_models",
94
+ "reading_lvl": null,
95
+ "regression": false,
96
+ "reload_dataloaders_every_n_epochs": 0,
97
+ "replace_sampler_ddp": true,
98
+ "resume_from_checkpoint": null,
99
+ "save_dir": null,
100
+ "simple_context_dir": "context/simple/train",
101
+ "simple_context_doc_id": "pair_id",
102
+ "src_lvl": null,
103
+ "strategy": null,
104
+ "sync_batchnorm": false,
105
+ "torch_dtype": "float32",
106
+ "tpu_cores": null,
107
+ "track_grad_norm": -1,
108
+ "train_check_interval": 0.2,
109
+ "train_file": "data/wikiauto_sents_train.csv",
110
+ "train_split": 0.9,
111
+ "train_workers": 8,
112
+ "transformers_version": "4.29.1",
113
+ "type_vocab_size": 1,
114
+ "upsample_classes": false,
115
+ "use_cache": true,
116
+ "val_check_interval": null,
117
+ "val_file": "data/wikiauto_sents_valid.csv",
118
+ "val_split": 0.05,
119
+ "val_workers": 8,
120
+ "vocab_size": 50274,
121
+ "wandb_id": null,
122
+ "x_col": "complex",
123
+ "y_col": "label"
124
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f554f181d8f13e53d5514e94c1a8d5b48e52de3611f8e0e0204bc4ab781b4b33
3
+ size 615371173