kleinay commited on
Commit
860c4d2
1 Parent(s): d4ab640

Upload T5ForConditionalGeneration

Browse files
Files changed (3) hide show
  1. config.json +13 -8
  2. generation_config.json +7 -0
  3. pytorch_model.bin +2 -2
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "trained_models/t5_qasrl-baseline-10.05.22",
3
  "append_verb_form": true,
4
  "architectures": [
5
  "T5ForConditionalGeneration"
@@ -9,8 +9,9 @@
9
  "d_model": 512,
10
  "debug_mode": false,
11
  "decoder_start_token_id": 0,
12
- "description": "optimal qasrl baseline config based on finer sweep1",
13
- "dir_switch": "qasrl/baseline",
 
14
  "do_eval_on": "validation",
15
  "dropout_rate": 0.1,
16
  "eos_token_id": 1,
@@ -21,6 +22,7 @@
21
  "gradient_accumulation_steps": 8,
22
  "initializer_factor": 1.0,
23
  "is_encoder_decoder": true,
 
24
  "layer_norm_epsilon": 1e-06,
25
  "learning_rate": 0.005,
26
  "load_best_model_at_end": true,
@@ -41,11 +43,12 @@
41
  "predicate_marker_type": "generic",
42
  "predict_with_generate": true,
43
  "preprocess_input_func": "input_predicate_marker",
 
44
  "preprocessing_kwargs": {
45
  "append_verb_form": true,
46
  "debug_mode": false,
47
- "description": "optimal qasrl baseline config based on finer sweep1",
48
- "dir_switch": "qasrl/baseline",
49
  "do_eval_on": "validation",
50
  "dropout_rate": 0.1,
51
  "eval_steps": 500,
@@ -65,16 +68,18 @@
65
  "predicate_marker_type": "generic",
66
  "predict_with_generate": true,
67
  "preprocess_input_func": "input_predicate_marker",
 
68
  "qanom_joint_factor": 1,
69
  "save_steps": 500,
70
  "save_strategy": "steps",
71
  "seed": 44,
72
  "source_prefix": "parse: ",
73
  "train_dataset": "qasrl",
74
- "train_epochs": 5,
75
  "use_bilateral_predicate_marker": true
76
  },
77
  "qanom_joint_factor": 1,
 
78
  "relative_attention_num_buckets": 32,
79
  "save_steps": 500,
80
  "save_strategy": "steps",
@@ -111,8 +116,8 @@
111
  },
112
  "torch_dtype": "float32",
113
  "train_dataset": "qasrl",
114
- "train_epochs": 5,
115
- "transformers_version": "4.17.0",
116
  "use_bilateral_predicate_marker": true,
117
  "use_cache": true,
118
  "vocab_size": 32101
 
1
  {
2
+ "_name_or_path": "/home/nlp/kleinay/tmp/t5-tst-summarization/qasrl/full-data-linearization/all_by_role_ordering",
3
  "append_verb_form": true,
4
  "architectures": [
5
  "T5ForConditionalGeneration"
 
9
  "d_model": 512,
10
  "debug_mode": false,
11
  "decoder_start_token_id": 0,
12
+ "dense_act_fn": "relu",
13
+ "description": "qasrl baseline with output_linearization=all_by_role_ordering",
14
+ "dir_switch": "full-data-linearization/all_by_role_ordering",
15
  "do_eval_on": "validation",
16
  "dropout_rate": 0.1,
17
  "eos_token_id": 1,
 
22
  "gradient_accumulation_steps": 8,
23
  "initializer_factor": 1.0,
24
  "is_encoder_decoder": true,
25
+ "is_gated_act": false,
26
  "layer_norm_epsilon": 1e-06,
27
  "learning_rate": 0.005,
28
  "load_best_model_at_end": true,
 
43
  "predicate_marker_type": "generic",
44
  "predict_with_generate": true,
45
  "preprocess_input_func": "input_predicate_marker",
46
+ "preprocess_output_func": "all_by_role_ordering",
47
  "preprocessing_kwargs": {
48
  "append_verb_form": true,
49
  "debug_mode": false,
50
+ "description": "qasrl baseline with output_linearization=all_by_role_ordering",
51
+ "dir_switch": "full-data-linearization/all_by_role_ordering",
52
  "do_eval_on": "validation",
53
  "dropout_rate": 0.1,
54
  "eval_steps": 500,
 
68
  "predicate_marker_type": "generic",
69
  "predict_with_generate": true,
70
  "preprocess_input_func": "input_predicate_marker",
71
+ "preprocess_output_func": "all_by_role_ordering",
72
  "qanom_joint_factor": 1,
73
  "save_steps": 500,
74
  "save_strategy": "steps",
75
  "seed": 44,
76
  "source_prefix": "parse: ",
77
  "train_dataset": "qasrl",
78
+ "train_epochs": 20,
79
  "use_bilateral_predicate_marker": true
80
  },
81
  "qanom_joint_factor": 1,
82
+ "relative_attention_max_distance": 128,
83
  "relative_attention_num_buckets": 32,
84
  "save_steps": 500,
85
  "save_strategy": "steps",
 
116
  },
117
  "torch_dtype": "float32",
118
  "train_dataset": "qasrl",
119
+ "train_epochs": 20,
120
+ "transformers_version": "4.26.1",
121
  "use_bilateral_predicate_marker": true,
122
  "use_cache": true,
123
  "vocab_size": 32101
generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "decoder_start_token_id": 0,
4
+ "eos_token_id": 1,
5
+ "pad_token_id": 0,
6
+ "transformers_version": "4.26.1"
7
+ }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:83821efedd28325976e71d0e003bed5678ea9de6bef45f29f90004118311f402
3
- size 242028475
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99bf08ccbd1a2886850e03cbf398a6c0599815818acf35f4cbc7bffe5bf4481d
3
+ size 242016842