Tim Miller commited on
Commit
9456c19
1 Parent(s): 315a31a

Update for cnlpt v0.6.0

Browse files
added_tokens.json CHANGED
@@ -1 +1,10 @@
1
- {"<neg>": 30529, "<a1>": 30524, "<cr>": 30528, "</a2>": 30527, "</a1>": 30525, "<a2>": 30526, "</e>": 30523, "<e>": 30522}
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</a1>": 30525,
3
+ "</a2>": 30527,
4
+ "</e>": 30523,
5
+ "<a1>": 30524,
6
+ "<a2>": 30526,
7
+ "<cr>": 30528,
8
+ "<e>": 30522,
9
+ "<neg>": 30529
10
+ }
config.json CHANGED
@@ -2,8 +2,9 @@
2
  "architectures": [
3
  "CnlpModelForClassification"
4
  ],
 
5
  "encoder_config": {
6
- "_name_or_path": "microsoft/xtremedistil-l6-h256-uncased",
7
  "add_cross_attention": false,
8
  "architectures": [
9
  "BertModel"
@@ -20,19 +21,20 @@
20
  "early_stopping": false,
21
  "encoder_no_repeat_ngram_size": 0,
22
  "eos_token_id": null,
 
23
  "finetuning_task": null,
24
  "forced_bos_token_id": null,
25
  "forced_eos_token_id": null,
26
  "gradient_checkpointing": false,
27
  "hidden_act": "gelu",
28
  "hidden_dropout_prob": 0.1,
29
- "hidden_size": 256,
30
  "id2label": {
31
  "0": "LABEL_0",
32
  "1": "LABEL_1"
33
  },
34
  "initializer_range": 0.02,
35
- "intermediate_size": 1024,
36
  "is_decoder": false,
37
  "is_encoder_decoder": false,
38
  "label2id": {
@@ -46,7 +48,7 @@
46
  "min_length": 0,
47
  "model_type": "bert",
48
  "no_repeat_ngram_size": 0,
49
- "num_attention_heads": 8,
50
  "num_beam_groups": 1,
51
  "num_beams": 1,
52
  "num_hidden_layers": 6,
@@ -66,6 +68,7 @@
66
  "sep_token_id": null,
67
  "task_specific_params": null,
68
  "temperature": 1.0,
 
69
  "tie_encoder_decoder": false,
70
  "tie_word_embeddings": true,
71
  "tokenizer_class": null,
@@ -73,35 +76,41 @@
73
  "top_p": 1.0,
74
  "torch_dtype": null,
75
  "torchscript": false,
76
- "transformers_version": "4.15.0",
77
  "type_vocab_size": 2,
 
78
  "use_bfloat16": false,
79
  "use_cache": true,
80
  "vocab_size": 30530
81
  },
82
- "encoder_name": "microsoft/xtremedistil-l6-h256-uncased",
83
  "finetuning_task": [
84
  "dtr"
85
  ],
86
  "hidden_dropout_prob": 0.1,
87
- "hidden_size": 256,
88
- "layer": -1,
 
 
 
 
 
 
 
 
 
89
  "model_type": "cnlpt",
90
- "num_labels_list": [
91
- 4
92
- ],
93
  "num_rel_attention_heads": 12,
94
- "num_tokens": -1,
95
  "rel_attention_head_dims": 64,
96
- "relations": [
97
- false
98
- ],
99
- "tagger": [
100
- false
101
- ],
102
  "tokens": false,
103
  "torch_dtype": "float32",
104
- "transformers_version": "4.15.0",
105
  "use_prior_tasks": false,
106
  "vocab_size": 30530
107
  }
 
2
  "architectures": [
3
  "CnlpModelForClassification"
4
  ],
5
+ "cnlpt_version": "0.6.0",
6
  "encoder_config": {
7
+ "_name_or_path": "microsoft/xtremedistil-l6-h384-uncased",
8
  "add_cross_attention": false,
9
  "architectures": [
10
  "BertModel"
 
21
  "early_stopping": false,
22
  "encoder_no_repeat_ngram_size": 0,
23
  "eos_token_id": null,
24
+ "exponential_decay_length_penalty": null,
25
  "finetuning_task": null,
26
  "forced_bos_token_id": null,
27
  "forced_eos_token_id": null,
28
  "gradient_checkpointing": false,
29
  "hidden_act": "gelu",
30
  "hidden_dropout_prob": 0.1,
31
+ "hidden_size": 384,
32
  "id2label": {
33
  "0": "LABEL_0",
34
  "1": "LABEL_1"
35
  },
36
  "initializer_range": 0.02,
37
+ "intermediate_size": 1536,
38
  "is_decoder": false,
39
  "is_encoder_decoder": false,
40
  "label2id": {
 
48
  "min_length": 0,
49
  "model_type": "bert",
50
  "no_repeat_ngram_size": 0,
51
+ "num_attention_heads": 12,
52
  "num_beam_groups": 1,
53
  "num_beams": 1,
54
  "num_hidden_layers": 6,
 
68
  "sep_token_id": null,
69
  "task_specific_params": null,
70
  "temperature": 1.0,
71
+ "tf_legacy_loss": false,
72
  "tie_encoder_decoder": false,
73
  "tie_word_embeddings": true,
74
  "tokenizer_class": null,
 
76
  "top_p": 1.0,
77
  "torch_dtype": null,
78
  "torchscript": false,
79
+ "transformers_version": "4.22.2",
80
  "type_vocab_size": 2,
81
+ "typical_p": 1.0,
82
  "use_bfloat16": false,
83
  "use_cache": true,
84
  "vocab_size": 30530
85
  },
86
+ "encoder_name": "microsoft/xtremedistil-l6-h384-uncased",
87
  "finetuning_task": [
88
  "dtr"
89
  ],
90
  "hidden_dropout_prob": 0.1,
91
+ "hidden_size": 384,
92
+ "hier_head_config": null,
93
+ "label_dictionary": {
94
+ "dtr": [
95
+ "AFTER",
96
+ "BEFORE",
97
+ "BEFORE/OVERLAP",
98
+ "OVERLAP"
99
+ ]
100
+ },
101
+ "layer": 6,
102
  "model_type": "cnlpt",
 
 
 
103
  "num_rel_attention_heads": 12,
 
104
  "rel_attention_head_dims": 64,
105
+ "relations": {
106
+ "dtr": false
107
+ },
108
+ "tagger": {
109
+ "dtr": false
110
+ },
111
  "tokens": false,
112
  "torch_dtype": "float32",
113
+ "transformers_version": "4.22.2",
114
  "use_prior_tasks": false,
115
  "vocab_size": 30530
116
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d8a6105f29b4f019286de46c7c2e19d0e93ad6093d5202e4b6f1a19548aaa8f4
3
- size 51307399
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:656c9caad2072035ec356257e965b42ffadec00bf5fc060e65d12ea8495baf17
3
+ size 91503485
special_tokens_map.json CHANGED
@@ -1 +1,17 @@
1
- {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "additional_special_tokens": ["<e>", "</e>", "<a1>", "</a1>", "<a2>", "</a2>", "<cr>", "<neg>"]}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<e>",
4
+ "</e>",
5
+ "<a1>",
6
+ "</a1>",
7
+ "<a2>",
8
+ "</a2>",
9
+ "<cr>",
10
+ "<neg>"
11
+ ],
12
+ "cls_token": "[CLS]",
13
+ "mask_token": "[MASK]",
14
+ "pad_token": "[PAD]",
15
+ "sep_token": "[SEP]",
16
+ "unk_token": "[UNK]"
17
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -1 +1,26 @@
1
- {"do_lower_case": true, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "add_prefix_space": true, "additional_special_tokens": ["<e>", "</e>", "<a1>", "</a1>", "<a2>", "</a2>", "<cr>", "<neg>"], "special_tokens_map_file": null, "name_or_path": "microsoft/xtremedistil-l6-h256-uncased", "do_basic_tokenize": true, "never_split": null, "tokenizer_class": "BertTokenizer"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": true,
3
+ "additional_special_tokens": [
4
+ "<e>",
5
+ "</e>",
6
+ "<a1>",
7
+ "</a1>",
8
+ "<a2>",
9
+ "</a2>",
10
+ "<cr>",
11
+ "<neg>"
12
+ ],
13
+ "cls_token": "[CLS]",
14
+ "do_basic_tokenize": true,
15
+ "do_lower_case": true,
16
+ "mask_token": "[MASK]",
17
+ "name_or_path": "microsoft/xtremedistil-l6-h384-uncased",
18
+ "never_split": null,
19
+ "pad_token": "[PAD]",
20
+ "sep_token": "[SEP]",
21
+ "special_tokens_map_file": null,
22
+ "strip_accents": null,
23
+ "tokenize_chinese_chars": true,
24
+ "tokenizer_class": "BertTokenizer",
25
+ "unk_token": "[UNK]"
26
+ }