tomaarsen HF staff commited on
Commit
3f26801
1 Parent(s): f005c29

Upload model

Browse files
Files changed (5) hide show
  1. README.md +8 -11
  2. config.json +14 -14
  3. pytorch_model.bin +1 -1
  4. tokenizer.json +2 -16
  5. tokenizer_config.json +1 -1
README.md CHANGED
@@ -1,21 +1,18 @@
 
1
  ---
2
  license: apache-2.0
3
- library_name: span_marker
4
  tags:
5
- - span_marker
6
  - token-classification
7
  - ner
8
  - named-entity-recognition
9
  pipeline_tag: token-classification
10
- datasets:
11
- - DFKI-SLT/few-nerd
12
- language:
13
- - en
14
  ---
15
 
16
  # SpanMarker for Named Entity Recognition
17
 
18
- This is a [SpanMarker](https://github.com/tomaarsen/SpanMarkerNER) model that can be used for Named Entity Recognition. In particular, this SpanMarker model uses [bert-base-cased](https://huggingface.co/bert-base-cased) as the underlying encoder.
19
 
20
  ## Usage
21
 
@@ -25,15 +22,15 @@ To use this model for inference, first install the `span_marker` library:
25
  pip install span_marker
26
  ```
27
 
28
- You can then run inference as follows:
29
 
30
  ```python
31
  from span_marker import SpanMarkerModel
32
 
33
- # Download from Hub and run inference
34
- model = SpanMarkerModel.from_pretrained("tomaarsen/span-marker-bert-base-fewnerd-fine-super")
35
  # Run inference
36
  entities = model.predict("Amelia Earhart flew her single engine Lockheed Vega 5B across the Atlantic to Paris.")
37
  ```
38
 
39
- See the [SpanMarker](https://github.com/tomaarsen/SpanMarkerNER) repository for documentation and additional information on this model framework.
 
1
+
2
  ---
3
  license: apache-2.0
4
+ library_name: span-marker
5
  tags:
6
+ - span-marker
7
  - token-classification
8
  - ner
9
  - named-entity-recognition
10
  pipeline_tag: token-classification
 
 
 
 
11
  ---
12
 
13
  # SpanMarker for Named Entity Recognition
14
 
15
+ This is a [SpanMarker](https://github.com/tomaarsen/SpanMarkerNER) model that can be usedfor Named Entity Recognition. In particular, this SpanMarker model uses [bert-base-cased](https://huggingface.co/bert-base-cased) as the underlying encoder.
16
 
17
  ## Usage
18
 
 
22
  pip install span_marker
23
  ```
24
 
25
+ You can then run inference with this model like so:
26
 
27
  ```python
28
  from span_marker import SpanMarkerModel
29
 
30
+ # Download from the 🤗 Hub
31
+ model = SpanMarkerModel.from_pretrained("Temp/tmpac7jsfce")
32
  # Run inference
33
  entities = model.predict("Amelia Earhart flew her single engine Lockheed Vega 5B across the Atlantic to Paris.")
34
  ```
35
 
36
+ See the [SpanMarker](https://github.com/tomaarsen/SpanMarkerNER) repository for documentation and additional information on this library.
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "models\\bb-full-256-1\\checkpoint-final",
3
  "architectures": [
4
  "SpanMarkerModel"
5
  ],
@@ -33,6 +33,14 @@
33
  "id2label": {
34
  "0": "O",
35
  "1": "art-broadcastprogram",
 
 
 
 
 
 
 
 
36
  "10": "building-library",
37
  "11": "building-other",
38
  "12": "building-restaurant",
@@ -43,7 +51,6 @@
43
  "17": "event-election",
44
  "18": "event-other",
45
  "19": "event-protest",
46
- "2": "art-film",
47
  "20": "event-sportsevent",
48
  "21": "location-GPE",
49
  "22": "location-bodiesofwater",
@@ -54,7 +61,6 @@
54
  "27": "location-road/railway/highway/transit",
55
  "28": "organization-company",
56
  "29": "organization-education",
57
- "3": "art-music",
58
  "30": "organization-government/governmentagency",
59
  "31": "organization-media/newspaper",
60
  "32": "organization-other",
@@ -65,7 +71,6 @@
65
  "37": "organization-sportsteam",
66
  "38": "other-astronomything",
67
  "39": "other-award",
68
- "4": "art-other",
69
  "40": "other-biologything",
70
  "41": "other-chemicalthing",
71
  "42": "other-currency",
@@ -76,7 +81,6 @@
76
  "47": "other-law",
77
  "48": "other-livingthing",
78
  "49": "other-medical",
79
- "5": "art-painting",
80
  "50": "person-actor",
81
  "51": "person-artist/author",
82
  "52": "person-athlete",
@@ -87,17 +91,13 @@
87
  "57": "person-soldier",
88
  "58": "product-airplane",
89
  "59": "product-car",
90
- "6": "art-writtenart",
91
  "60": "product-food",
92
  "61": "product-game",
93
  "62": "product-other",
94
  "63": "product-ship",
95
  "64": "product-software",
96
  "65": "product-train",
97
- "66": "product-weapon",
98
- "7": "building-airport",
99
- "8": "building-hospital",
100
- "9": "building-hotel"
101
  },
102
  "initializer_range": 0.02,
103
  "intermediate_size": 3072,
@@ -215,12 +215,12 @@
215
  "use_cache": true,
216
  "vocab_size": 28998
217
  },
218
- "entity_max_length": 16,
219
- "marker_max_length": 256,
220
- "model_max_length": null,
221
  "model_max_length_default": 512,
222
  "model_type": "span-marker",
223
- "outside_id": 0,
224
  "torch_dtype": "float32",
225
  "transformers_version": "4.27.2",
226
  "vocab_size": 28998
 
1
  {
2
+ "_name_or_path": "models\\bb-reduced-padding-full-2\\checkpoint-final",
3
  "architectures": [
4
  "SpanMarkerModel"
5
  ],
 
33
  "id2label": {
34
  "0": "O",
35
  "1": "art-broadcastprogram",
36
+ "2": "art-film",
37
+ "3": "art-music",
38
+ "4": "art-other",
39
+ "5": "art-painting",
40
+ "6": "art-writtenart",
41
+ "7": "building-airport",
42
+ "8": "building-hospital",
43
+ "9": "building-hotel",
44
  "10": "building-library",
45
  "11": "building-other",
46
  "12": "building-restaurant",
 
51
  "17": "event-election",
52
  "18": "event-other",
53
  "19": "event-protest",
 
54
  "20": "event-sportsevent",
55
  "21": "location-GPE",
56
  "22": "location-bodiesofwater",
 
61
  "27": "location-road/railway/highway/transit",
62
  "28": "organization-company",
63
  "29": "organization-education",
 
64
  "30": "organization-government/governmentagency",
65
  "31": "organization-media/newspaper",
66
  "32": "organization-other",
 
71
  "37": "organization-sportsteam",
72
  "38": "other-astronomything",
73
  "39": "other-award",
 
74
  "40": "other-biologything",
75
  "41": "other-chemicalthing",
76
  "42": "other-currency",
 
81
  "47": "other-law",
82
  "48": "other-livingthing",
83
  "49": "other-medical",
 
84
  "50": "person-actor",
85
  "51": "person-artist/author",
86
  "52": "person-athlete",
 
91
  "57": "person-soldier",
92
  "58": "product-airplane",
93
  "59": "product-car",
 
94
  "60": "product-food",
95
  "61": "product-game",
96
  "62": "product-other",
97
  "63": "product-ship",
98
  "64": "product-software",
99
  "65": "product-train",
100
+ "66": "product-weapon"
 
 
 
101
  },
102
  "initializer_range": 0.02,
103
  "intermediate_size": 3072,
 
215
  "use_cache": true,
216
  "vocab_size": 28998
217
  },
218
+ "entity_max_length": 8,
219
+ "marker_max_length": 128,
220
+ "model_max_length": 256,
221
  "model_max_length_default": 512,
222
  "model_type": "span-marker",
223
+ "span_marker_version": "1.0.0.dev1",
224
  "torch_dtype": "float32",
225
  "transformers_version": "4.27.2",
226
  "vocab_size": 28998
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6769de9d368d9f26b05b55aaa1de6f6ab60e8c71a73b20c4ef8527b7653f60a4
3
  size 433729717
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29b83eacf702a76c796a485152e7749c9493753c9de619f28a6d9e842dc7ab53
3
  size 433729717
tokenizer.json CHANGED
@@ -1,21 +1,7 @@
1
  {
2
  "version": "1.0",
3
- "truncation": {
4
- "direction": "Right",
5
- "max_length": 256,
6
- "strategy": "LongestFirst",
7
- "stride": 0
8
- },
9
- "padding": {
10
- "strategy": {
11
- "Fixed": 256
12
- },
13
- "direction": "Right",
14
- "pad_to_multiple_of": null,
15
- "pad_id": 0,
16
- "pad_type_id": 0,
17
- "pad_token": "[PAD]"
18
- },
19
  "added_tokens": [
20
  {
21
  "id": 0,
 
1
  {
2
  "version": "1.0",
3
+ "truncation": null,
4
+ "padding": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  "added_tokens": [
6
  {
7
  "id": 0,
tokenizer_config.json CHANGED
@@ -3,7 +3,7 @@
3
  "cls_token": "[CLS]",
4
  "do_lower_case": false,
5
  "mask_token": "[MASK]",
6
- "model_max_length": 256,
7
  "pad_token": "[PAD]",
8
  "sep_token": "[SEP]",
9
  "special_tokens_map_file": null,
 
3
  "cls_token": "[CLS]",
4
  "do_lower_case": false,
5
  "mask_token": "[MASK]",
6
+ "model_max_length": 512,
7
  "pad_token": "[PAD]",
8
  "sep_token": "[SEP]",
9
  "special_tokens_map_file": null,