AvivNavon commited on
Commit
aeb7365
1 Parent(s): eabf7e9

first model version

Browse files
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
1
+ .DS_Store
2
+ .idea/
README.md CHANGED
@@ -1,7 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # CoReNer
2
 
3
  A multi-task model for named-entity recognition, relation extraction, entity mention detection and coreference resolution.
4
- See details and usage examples at: https://github.com/aiola-lab/corener
5
-
6
- ---
7
- license: apache-2.0
1
+ ---
2
+ language:
3
+ - en
4
+ tags:
5
+ - NER
6
+ - named entity recognition
7
+ - RE
8
+ - relation extraction
9
+ - entity mention detection
10
+ - EMD
11
+ - coreference resolution
12
+ license: apache-2.0
13
+ datasets:
14
+ - Ontonotes
15
+ - CoNLL04
16
+ ---
17
+
18
  # CoReNer
19
 
20
  A multi-task model for named-entity recognition, relation extraction, entity mention detection and coreference resolution.
21
+ See details and usage examples at: https://github.com/aiola-lab/corener.
 
 
 
config.json ADDED
@@ -0,0 +1,149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "roberta-base",
3
+ "architectures": [
4
+ "RobertaForMaskedLM"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "classifier_dropout": null,
9
+ "corener_config": {
10
+ "cls_token": 0,
11
+ "max_pairs": 1000,
12
+ "ner_classes": 19,
13
+ "pad_token": 1,
14
+ "relation_classes": 5,
15
+ "size_embedding": 25
16
+ },
17
+ "eos_token_id": 2,
18
+ "hidden_act": "gelu",
19
+ "hidden_dropout_prob": 0.1,
20
+ "hidden_size": 768,
21
+ "initializer_range": 0.02,
22
+ "intermediate_size": 3072,
23
+ "layer_norm_eps": 1e-05,
24
+ "max_position_embeddings": 514,
25
+ "model_type": "roberta",
26
+ "num_attention_heads": 12,
27
+ "num_hidden_layers": 12,
28
+ "pad_token_id": 1,
29
+ "position_embedding_type": "absolute",
30
+ "transformers_version": "4.19.1",
31
+ "type_vocab_size": 1,
32
+ "types": {
33
+ "entities": {
34
+ "CARDINAL": {
35
+ "short": "CARDINAL",
36
+ "verbose": "CARDINAL"
37
+ },
38
+ "DATE": {
39
+ "short": "DATE",
40
+ "verbose": "DATE"
41
+ },
42
+ "EVENT": {
43
+ "short": "EVENT",
44
+ "verbose": "EVENT"
45
+ },
46
+ "FAC": {
47
+ "short": "FAC",
48
+ "verbose": "Buildings, airports, highways, bridges"
49
+ },
50
+ "GPE": {
51
+ "short": "GPE",
52
+ "verbose": "Countries, cities, states."
53
+ },
54
+ "LANGUAGE": {
55
+ "short": "LANGUAGE",
56
+ "verbose": "LANGUAGE"
57
+ },
58
+ "LAW": {
59
+ "short": "LAW",
60
+ "verbose": "LAW"
61
+ },
62
+ "LOC": {
63
+ "short": "LOC",
64
+ "verbose": "LOCATION"
65
+ },
66
+ "MONEY": {
67
+ "short": "MONEY",
68
+ "verbose": "MONEY"
69
+ },
70
+ "NORP": {
71
+ "short": "NORP",
72
+ "verbose": "Nationalities or religious or political groups"
73
+ },
74
+ "ORDINAL": {
75
+ "short": "ORDINAL",
76
+ "verbose": "ORDINAL"
77
+ },
78
+ "ORG": {
79
+ "short": "ORG",
80
+ "verbose": "ORGANIZATION"
81
+ },
82
+ "PERCENT": {
83
+ "short": "PERCENT",
84
+ "verbose": "PERCENT"
85
+ },
86
+ "PERSON": {
87
+ "short": "PER",
88
+ "verbose": "PERSON"
89
+ },
90
+ "PRODUCT": {
91
+ "short": "PROD",
92
+ "verbose": "PRODUCT"
93
+ },
94
+ "QUANTITY": {
95
+ "short": "QUANTITY",
96
+ "verbose": "QUANTITY"
97
+ },
98
+ "TIME": {
99
+ "short": "TIME",
100
+ "verbose": "TIME"
101
+ },
102
+ "WORK_OF_ART": {
103
+ "short": "WORK_OF_ART",
104
+ "verbose": "WORK_OF_ART"
105
+ }
106
+ },
107
+ "mentions": {
108
+ "MENTION": {
109
+ "short": "MENTION",
110
+ "verbose": "MENTION"
111
+ }
112
+ },
113
+ "references": {
114
+ "COREF": {
115
+ "short": "COREF",
116
+ "verbose": "COREF"
117
+ }
118
+ },
119
+ "relations": {
120
+ "Kill": {
121
+ "short": "Kill",
122
+ "symmetric": false,
123
+ "verbose": "Kill"
124
+ },
125
+ "Live_In": {
126
+ "short": "Live",
127
+ "symmetric": false,
128
+ "verbose": "Live in"
129
+ },
130
+ "Located_In": {
131
+ "short": "LocIn",
132
+ "symmetric": false,
133
+ "verbose": "Located in"
134
+ },
135
+ "OrgBased_In": {
136
+ "short": "OrgBI",
137
+ "symmetric": false,
138
+ "verbose": "Organization based in"
139
+ },
140
+ "Work_For": {
141
+ "short": "Work",
142
+ "symmetric": false,
143
+ "verbose": "Work for"
144
+ }
145
+ }
146
+ },
147
+ "use_cache": true,
148
+ "vocab_size": 50265
149
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3bab81b34f26b5dedf4a0a9ecd5090883ecf1e6fcd61fb88f48264f406e5196
3
+ size 525207305
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "<unk>", "sep_token": "</s>", "pad_token": "<pad>", "cls_token": "<s>", "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": false}}
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
1
+ {"unk_token": "<unk>", "bos_token": "<s>", "eos_token": "</s>", "add_prefix_space": false, "errors": "replace", "sep_token": "</s>", "cls_token": "<s>", "pad_token": "<pad>", "mask_token": "<mask>", "do_lower_case": false, "model_max_length": 512, "special_tokens_map_file": null, "name_or_path": "roberta-base", "tokenizer_class": "RobertaTokenizer"}
vocab.json ADDED
The diff for this file is too large to render. See raw diff