lizgzil commited on
Commit
ce96244
1 Parent(s): e995418

Update spaCy pipeline

Browse files
README.md CHANGED
@@ -14,25 +14,25 @@ model-index:
14
  metrics:
15
  - name: NER Precision
16
  type: precision
17
- value: 0.5991309071
18
  - name: NER Recall
19
  type: recall
20
- value: 0.5768828452
21
  - name: NER F Score
22
  type: f_score
23
- value: 0.5877964295
24
  ---
25
  A Named Entity Recognition (NER) model to extract SKILL, EXPERIENCE and BENEFIT from job adverts.
26
 
27
  | Feature | Description |
28
  | --- | --- |
29
  | **Name** | `en_skillner` |
30
- | **Version** | `3.5.0` |
31
- | **spaCy** | `>=3.5.3,<3.6.0` |
32
  | **Default Pipeline** | `tok2vec`, `tagger`, `parser`, `attribute_ruler`, `lemmatizer`, `ner` |
33
  | **Components** | `tok2vec`, `tagger`, `parser`, `senter`, `attribute_ruler`, `lemmatizer`, `ner` |
34
  | **Vectors** | 514157 keys, 514157 unique vectors (300 dimensions) |
35
- | **Sources** | [OntoNotes 5](https://catalog.ldc.upenn.edu/LDC2013T19) (Ralph Weischedel, Martha Palmer, Mitchell Marcus, Eduard Hovy, Sameer Pradhan, Lance Ramshaw, Nianwen Xue, Ann Taylor, Jeff Kaufman, Michelle Franchini, Mohammed El-Bachouti, Robert Belvin, Ann Houston)<br />[ClearNLP Constituent-to-Dependency Conversion](https://github.com/clir/clearnlp-guidelines/blob/master/md/components/dependency_conversion.md) (Emory University)<br />[WordNet 3.0](https://wordnet.princeton.edu/) (Princeton University)<br />[Explosion Vectors (OSCAR 2109 + Wikipedia + OpenSubtitles + WMT News Crawl)](https://github.com/explosion/spacy-vectors-builder) (Explosion) |
36
  | **License** | `MIT` |
37
  | **Author** | [nestauk](https://explosion.ai) |
38
 
@@ -52,15 +52,15 @@ A Named Entity Recognition (NER) model to extract SKILL, EXPERIENCE and BENEFIT
52
 
53
  | Type | Score |
54
  | --- | --- |
55
- | `ENTS_P` | 59.91 |
56
- | `ENTS_R` | 57.69 |
57
- | `ENTS_F` | 58.78 |
58
- | `SKILL_P` | 72.77 |
59
- | `SKILL_R` | 72.38 |
60
- | `SKILL_F` | 72.57 |
61
- | `EXPERIENCE_P` | 56.00 |
62
- | `EXPERIENCE_R` | 47.73 |
63
- | `EXPERIENCE_F` | 51.53 |
64
- | `BENEFIT_P` | 77.42 |
65
- | `BENEFIT_R` | 35.82 |
66
- | `BENEFIT_F` | 48.98 |
 
14
  metrics:
15
  - name: NER Precision
16
  type: precision
17
+ value: 0.5919354839
18
  - name: NER Recall
19
  type: recall
20
+ value: 0.5758368201
21
  - name: NER F Score
22
  type: f_score
23
+ value: 0.5837751856
24
  ---
25
  A Named Entity Recognition (NER) model to extract SKILL, EXPERIENCE and BENEFIT from job adverts.
26
 
27
  | Feature | Description |
28
  | --- | --- |
29
  | **Name** | `en_skillner` |
30
+ | **Version** | `3.7.1` |
31
+ | **spaCy** | `>=3.7.4,<3.8.0` |
32
  | **Default Pipeline** | `tok2vec`, `tagger`, `parser`, `attribute_ruler`, `lemmatizer`, `ner` |
33
  | **Components** | `tok2vec`, `tagger`, `parser`, `senter`, `attribute_ruler`, `lemmatizer`, `ner` |
34
  | **Vectors** | 514157 keys, 514157 unique vectors (300 dimensions) |
35
+ | **Sources** | [OntoNotes 5](https://catalog.ldc.upenn.edu/LDC2013T19) (Ralph Weischedel, Martha Palmer, Mitchell Marcus, Eduard Hovy, Sameer Pradhan, Lance Ramshaw, Nianwen Xue, Ann Taylor, Jeff Kaufman, Michelle Franchini, Mohammed El-Bachouti, Robert Belvin, Ann Houston)<br>[ClearNLP Constituent-to-Dependency Conversion](https://github.com/clir/clearnlp-guidelines/blob/master/md/components/dependency_conversion.md) (Emory University)<br>[WordNet 3.0](https://wordnet.princeton.edu/) (Princeton University)<br>[Explosion Vectors (OSCAR 2109 + Wikipedia + OpenSubtitles + WMT News Crawl)](https://github.com/explosion/spacy-vectors-builder) (Explosion) |
36
  | **License** | `MIT` |
37
  | **Author** | [nestauk](https://explosion.ai) |
38
 
 
52
 
53
  | Type | Score |
54
  | --- | --- |
55
+ | `ENTS_P` | 59.19 |
56
+ | `ENTS_R` | 57.58 |
57
+ | `ENTS_F` | 58.38 |
58
+ | `SKILL_P` | 72.19 |
59
+ | `SKILL_R` | 72.62 |
60
+ | `SKILL_F` | 72.40 |
61
+ | `EXPERIENCE_P` | 52.14 |
62
+ | `EXPERIENCE_R` | 41.48 |
63
+ | `EXPERIENCE_F` | 46.20 |
64
+ | `BENEFIT_P` | 75.61 |
65
+ | `BENEFIT_R` | 46.27 |
66
+ | `BENEFIT_F` | 57.41 |
attribute_ruler/patterns CHANGED
Binary files a/attribute_ruler/patterns and b/attribute_ruler/patterns differ
 
config.cfg CHANGED
@@ -17,6 +17,7 @@ after_creation = null
17
  after_pipeline_creation = null
18
  batch_size = 256
19
  tokenizer = {"@tokenizers":"spacy.Tokenizer.v1"}
 
20
 
21
  [components]
22
 
@@ -116,6 +117,7 @@ maxout_pieces = 2
116
 
117
  [components.tagger]
118
  factory = "tagger"
 
119
  neg_prefix = "!"
120
  overwrite = false
121
  scorer = {"@scorers":"spacy.tagger_scorer.v1"}
 
17
  after_pipeline_creation = null
18
  batch_size = 256
19
  tokenizer = {"@tokenizers":"spacy.Tokenizer.v1"}
20
+ vectors = {"@vectors":"spacy.Vectors.v1"}
21
 
22
  [components]
23
 
 
117
 
118
  [components.tagger]
119
  factory = "tagger"
120
+ label_smoothing = 0.0
121
  neg_prefix = "!"
122
  overwrite = false
123
  scorer = {"@scorers":"spacy.tagger_scorer.v1"}
en_skillner-any-py3-none-any.whl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aebd76db8a853da31818688df06486b6bd9728c0647a818307ef7251bd7c4d8c
3
- size 587688924
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b840315f54eda471ecbeece805f216c4c25b519a88a6962084730d7803c54d1
3
+ size 587689424
meta.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
  "lang":"en",
3
  "name":"skillner",
4
- "version":"3.5.0",
5
  "description":"A Named Entity Recognition (NER) model to extract SKILL, EXPERIENCE and BENEFIT from job adverts.",
6
  "author":"nestauk",
7
  "email":"contact@explosion.ai",
8
  "url":"https://explosion.ai",
9
  "license":"MIT",
10
- "spacy_version":">=3.5.3,<3.6.0",
11
- "spacy_git_version":"9e0322de1",
12
  "vectors":{
13
  "width":300,
14
  "vectors":514157,
@@ -43,54 +43,54 @@
43
  "senter"
44
  ],
45
  "performance":{
46
- "ents_p":0.5991309071,
47
- "ents_r":0.5768828452,
48
- "ents_f":0.5877964295,
49
- "skill_p":0.7277108434,
50
- "skill_r":0.7237866986,
51
- "skill_f":0.7257434665,
52
- "experience_p":0.56,
53
- "experience_r":0.4772727273,
54
- "experience_f":0.5153374233,
55
- "benefit_p":0.7741935484,
56
- "benefit_r":0.3582089552,
57
- "benefit_f":0.4897959184,
58
  "ents_per_type":{
59
  "SKILL":{
60
- "correct":1208,
61
  "incorrect":32,
62
  "partial":0,
63
- "missed":429,
64
- "spurious":420,
65
  "possible":1669,
66
- "actual":1660,
67
- "precision":0.7277108434,
68
- "recall":0.7237866986,
69
- "f1":0.7257434665
70
  },
71
  "EXPERIENCE":{
72
- "correct":84,
73
- "incorrect":37,
74
  "partial":0,
75
- "missed":55,
76
- "spurious":29,
77
  "possible":176,
78
- "actual":150,
79
- "precision":0.56,
80
- "recall":0.4772727273,
81
- "f1":0.5153374233
82
  },
83
  "BENEFIT":{
84
- "correct":24,
85
- "incorrect":3,
86
  "partial":0,
87
- "missed":40,
88
- "spurious":4,
89
  "possible":67,
90
- "actual":31,
91
- "precision":0.7741935484,
92
- "recall":0.3582089552,
93
- "f1":0.4897959184
94
  }
95
  }
96
  },
 
1
  {
2
  "lang":"en",
3
  "name":"skillner",
4
+ "version":"3.7.1",
5
  "description":"A Named Entity Recognition (NER) model to extract SKILL, EXPERIENCE and BENEFIT from job adverts.",
6
  "author":"nestauk",
7
  "email":"contact@explosion.ai",
8
  "url":"https://explosion.ai",
9
  "license":"MIT",
10
+ "spacy_version":">=3.7.4,<3.8.0",
11
+ "spacy_git_version":"bd2c17e20",
12
  "vectors":{
13
  "width":300,
14
  "vectors":514157,
 
43
  "senter"
44
  ],
45
  "performance":{
46
+ "ents_p":0.5919354839,
47
+ "ents_r":0.5758368201,
48
+ "ents_f":0.5837751856,
49
+ "skill_p":0.721858249,
50
+ "skill_r":0.7261833433,
51
+ "skill_f":0.7240143369,
52
+ "experience_p":0.5214285714,
53
+ "experience_r":0.4147727273,
54
+ "experience_f":0.4620253165,
55
+ "benefit_p":0.756097561,
56
+ "benefit_r":0.4626865672,
57
+ "benefit_f":0.5740740741,
58
  "ents_per_type":{
59
  "SKILL":{
60
+ "correct":1212,
61
  "incorrect":32,
62
  "partial":0,
63
+ "missed":425,
64
+ "spurious":435,
65
  "possible":1669,
66
+ "actual":1679,
67
+ "precision":0.721858249,
68
+ "recall":0.7261833433,
69
+ "f1":0.7240143369
70
  },
71
  "EXPERIENCE":{
72
+ "correct":73,
73
+ "incorrect":39,
74
  "partial":0,
75
+ "missed":64,
76
+ "spurious":28,
77
  "possible":176,
78
+ "actual":140,
79
+ "precision":0.5214285714,
80
+ "recall":0.4147727273,
81
+ "f1":0.4620253165
82
  },
83
  "BENEFIT":{
84
+ "correct":31,
85
+ "incorrect":4,
86
  "partial":0,
87
+ "missed":32,
88
+ "spurious":6,
89
  "possible":67,
90
+ "actual":41,
91
+ "precision":0.756097561,
92
+ "recall":0.4626865672,
93
+ "f1":0.5740740741
94
  }
95
  }
96
  },
ner/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5dd953e4cc7eeaa1052539fcb71b312a0650910a310fb84b5eac2d89e993e8c0
3
  size 6384063
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:feadcb06a1ff7169640e3394e886c696675ccc6d651ebe5bf862fa97d761e0fe
3
  size 6384063
ner/moves CHANGED
@@ -1 +1 @@
1
- ��moves��{"0":{},"1":{"ORG":56516,"DATE":40493,"PERSON":36534,"GPE":26745,"MONEY":15158,"CARDINAL":14109,"NORP":9641,"PERCENT":9199,"WORK_OF_ART":4488,"LOC":4055,"TIME":3678,"QUANTITY":3123,"FAC":3046,"EVENT":3021,"ORDINAL":2142,"PRODUCT":1787,"LAW":1624,"LANGUAGE":355,"EXPERIENCE":-1,"SKILL":-2,"BENEFIT":-3},"2":{"ORG":56516,"DATE":40493,"PERSON":36534,"GPE":26745,"MONEY":15158,"CARDINAL":14109,"NORP":9641,"PERCENT":9199,"WORK_OF_ART":4488,"LOC":4055,"TIME":3678,"QUANTITY":3123,"FAC":3046,"EVENT":3021,"ORDINAL":2142,"PRODUCT":1787,"LAW":1624,"LANGUAGE":355,"EXPERIENCE":-1,"SKILL":-2,"BENEFIT":-3},"3":{"ORG":56516,"DATE":40493,"PERSON":36534,"GPE":26745,"MONEY":15158,"CARDINAL":14109,"NORP":9641,"PERCENT":9199,"WORK_OF_ART":4488,"LOC":4055,"TIME":3678,"QUANTITY":3123,"FAC":3046,"EVENT":3021,"ORDINAL":2142,"PRODUCT":1787,"LAW":1624,"LANGUAGE":355,"EXPERIENCE":-1,"SKILL":-2,"BENEFIT":-3},"4":{"ORG":56516,"DATE":40493,"PERSON":36534,"GPE":26745,"MONEY":15158,"CARDINAL":14109,"NORP":9641,"PERCENT":9199,"WORK_OF_ART":4488,"LOC":4055,"TIME":3678,"QUANTITY":3123,"FAC":3046,"EVENT":3021,"ORDINAL":2142,"PRODUCT":1787,"LAW":1624,"LANGUAGE":355,"":1,"EXPERIENCE":-1,"SKILL":-2,"BENEFIT":-3},"5":{"":1}}�cfg��neg_key�
 
1
+ ��moves��{"0":{},"1":{"ORG":56516,"DATE":40493,"PERSON":36534,"GPE":26745,"MONEY":15158,"CARDINAL":14109,"NORP":9641,"PERCENT":9199,"WORK_OF_ART":4488,"LOC":4055,"TIME":3678,"QUANTITY":3123,"FAC":3046,"EVENT":3021,"ORDINAL":2142,"PRODUCT":1787,"LAW":1624,"LANGUAGE":355,"SKILL":-1,"EXPERIENCE":-2,"BENEFIT":-3},"2":{"ORG":56516,"DATE":40493,"PERSON":36534,"GPE":26745,"MONEY":15158,"CARDINAL":14109,"NORP":9641,"PERCENT":9199,"WORK_OF_ART":4488,"LOC":4055,"TIME":3678,"QUANTITY":3123,"FAC":3046,"EVENT":3021,"ORDINAL":2142,"PRODUCT":1787,"LAW":1624,"LANGUAGE":355,"SKILL":-1,"EXPERIENCE":-2,"BENEFIT":-3},"3":{"ORG":56516,"DATE":40493,"PERSON":36534,"GPE":26745,"MONEY":15158,"CARDINAL":14109,"NORP":9641,"PERCENT":9199,"WORK_OF_ART":4488,"LOC":4055,"TIME":3678,"QUANTITY":3123,"FAC":3046,"EVENT":3021,"ORDINAL":2142,"PRODUCT":1787,"LAW":1624,"LANGUAGE":355,"SKILL":-1,"EXPERIENCE":-2,"BENEFIT":-3},"4":{"ORG":56516,"DATE":40493,"PERSON":36534,"GPE":26745,"MONEY":15158,"CARDINAL":14109,"NORP":9641,"PERCENT":9199,"WORK_OF_ART":4488,"LOC":4055,"TIME":3678,"QUANTITY":3123,"FAC":3046,"EVENT":3021,"ORDINAL":2142,"PRODUCT":1787,"LAW":1624,"LANGUAGE":355,"":1,"SKILL":-1,"EXPERIENCE":-2,"BENEFIT":-3},"5":{"":1}}�cfg��neg_key�
tagger/cfg CHANGED
@@ -1,4 +1,5 @@
1
  {
 
2
  "labels":[
3
  "$",
4
  "''",
 
1
  {
2
+ "label_smoothing":0.0,
3
  "labels":[
4
  "$",
5
  "''",
vocab/lookups.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1ddd140ecac6a8c4592e9146d6e30074569ffaed97ee51edc9587dc510f8934c
3
- size 69982
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fce9c883c56165f29573cc938c2a1c9d417ac61bd8f56b671dd5f7996de70682
3
+ size 70040