adrianeboyd commited on
Commit
84fa336
1 Parent(s): 69892c0

Update spaCy pipeline

Browse files
LICENSES_SOURCES CHANGED
@@ -11,10 +11,10 @@ http://www.gnu.org/licenses/gpl.html```
11
 
12
 
13
 
14
- # UD Catalan AnCora v2.8 + NER v3.2.8
15
 
16
  * Author: Carlos Rodríguez-Penagos and Carme Armentano-Oller
17
- * URL: https://github.com/TeMU-BSC/spacy/releases/tag/3.2.8
18
  * License: CC BY 4.0
19
 
20
  ```
 
11
 
12
 
13
 
14
+ # UD Catalan AnCora v2.8 + NER v3.2.9
15
 
16
  * Author: Carlos Rodríguez-Penagos and Carme Armentano-Oller
17
+ * URL: https://github.com/TeMU-BSC/spacy/releases/tag/3.2.9
18
  * License: CC BY 4.0
19
 
20
  ```
README.md CHANGED
@@ -78,12 +78,12 @@ Catalan pipeline optimized for CPU. Components: tok2vec, morphologizer, parser,
78
  | Feature | Description |
79
  | --- | --- |
80
  | **Name** | `ca_core_news_lg` |
81
- | **Version** | `3.5.0` |
82
- | **spaCy** | `>=3.5.0,<3.6.0` |
83
  | **Default Pipeline** | `tok2vec`, `morphologizer`, `parser`, `attribute_ruler`, `lemmatizer`, `ner` |
84
  | **Components** | `tok2vec`, `morphologizer`, `parser`, `senter`, `attribute_ruler`, `lemmatizer`, `ner` |
85
  | **Vectors** | 500000 keys, 500000 unique vectors (300 dimensions) |
86
- | **Sources** | [UD Catalan AnCora v2.8](https://github.com/UniversalDependencies/UD_Catalan-AnCora) (Martínez Alonso, Héctor; Pascual, Elena; Zeman, Daniel)<br />[UD Catalan AnCora v2.8 + NER v3.2.8](https://github.com/TeMU-BSC/spacy/releases/tag/3.2.8) (Carlos Rodríguez-Penagos and Carme Armentano-Oller)<br />[Catalan Lemmatizer](https://github.com/explosion/spacy-lookups-data) (Text Mining Unit, Barcelona Supercomputing Center)<br />[Catalan Word Embeddings in FastText (Version 1.0)](http://doi.org/10.5281/zenodo.4522041) (Gutiérrez-Fandiño, Asier, Armengol-Estapé, Jordi, Gonzalez-Agirre, Aitor, Carrino, Casimiro Pio, de Gibert, Ona, & Villegas, Marta) |
87
  | **License** | `GNU GPL 3.0` |
88
  | **Author** | [Explosion](https://explosion.ai) |
89
 
 
78
  | Feature | Description |
79
  | --- | --- |
80
  | **Name** | `ca_core_news_lg` |
81
+ | **Version** | `3.6.0` |
82
+ | **spaCy** | `>=3.6.0,<3.7.0` |
83
  | **Default Pipeline** | `tok2vec`, `morphologizer`, `parser`, `attribute_ruler`, `lemmatizer`, `ner` |
84
  | **Components** | `tok2vec`, `morphologizer`, `parser`, `senter`, `attribute_ruler`, `lemmatizer`, `ner` |
85
  | **Vectors** | 500000 keys, 500000 unique vectors (300 dimensions) |
86
+ | **Sources** | [UD Catalan AnCora v2.8](https://github.com/UniversalDependencies/UD_Catalan-AnCora) (Martínez Alonso, Héctor; Pascual, Elena; Zeman, Daniel)<br />[UD Catalan AnCora v2.8 + NER v3.2.9](https://github.com/TeMU-BSC/spacy/releases/tag/3.2.9) (Carlos Rodríguez-Penagos and Carme Armentano-Oller)<br />[Catalan Lemmatizer](https://github.com/explosion/spacy-lookups-data) (Text Mining Unit, Barcelona Supercomputing Center)<br />[Catalan Word Embeddings in FastText (Version 1.0)](http://doi.org/10.5281/zenodo.4522041) (Gutiérrez-Fandiño, Asier, Armengol-Estapé, Jordi, Gonzalez-Agirre, Aitor, Carrino, Casimiro Pio, de Gibert, Ona, & Villegas, Marta) |
87
  | **License** | `GNU GPL 3.0` |
88
  | **Author** | [Explosion](https://explosion.ai) |
89
 
accuracy.json CHANGED
@@ -299,5 +299,5 @@
299
  "f": 0.9053857351
300
  }
301
  },
302
- "speed": 5764.8200329329
303
  }
 
299
  "f": 0.9053857351
300
  }
301
  },
302
+ "speed": 6515.4852090537
303
  }
ca_core_news_lg-any-py3-none-any.whl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fcbfcf4436c59e80a0db34e86bbdee42812f5b36324802a31563639409dacdc2
3
- size 574001510
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea08bbdf8795dc073143c0c1d6ac1d1d340d98388ebd2bc209252671f167988c
3
+ size 574001554
config.cfg CHANGED
@@ -35,6 +35,7 @@ scorer = {"@scorers":"spacy.lemmatizer_scorer.v1"}
35
  [components.morphologizer]
36
  factory = "morphologizer"
37
  extend = false
 
38
  overwrite = true
39
  scorer = {"@scorers":"spacy.morphologizer_scorer.v1"}
40
 
 
35
  [components.morphologizer]
36
  factory = "morphologizer"
37
  extend = false
38
+ label_smoothing = 0.0
39
  overwrite = true
40
  scorer = {"@scorers":"spacy.morphologizer_scorer.v1"}
41
 
meta.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
  "lang":"ca",
3
  "name":"core_news_lg",
4
- "version":"3.5.0",
5
  "description":"Catalan pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.",
6
  "author":"Explosion",
7
  "email":"contact@explosion.ai",
8
  "url":"https://explosion.ai",
9
  "license":"GNU GPL 3.0",
10
- "spacy_version":">=3.5.0,<3.6.0",
11
- "spacy_git_version":"9e0322de1",
12
  "vectors":{
13
  "width":300,
14
  "vectors":500000,
@@ -670,7 +670,7 @@
670
  "f":0.9053857351
671
  }
672
  },
673
- "speed":5764.8200329329
674
  },
675
  "sources":[
676
  {
@@ -680,8 +680,8 @@
680
  "author":"Mart\u00ednez Alonso, H\u00e9ctor; Pascual, Elena; Zeman, Daniel"
681
  },
682
  {
683
- "name":"UD Catalan AnCora v2.8 + NER v3.2.8",
684
- "url":"https://github.com/TeMU-BSC/spacy/releases/tag/3.2.8",
685
  "license":"CC BY 4.0",
686
  "author":"Carlos Rodr\u00edguez-Penagos and Carme Armentano-Oller"
687
  },
 
1
  {
2
  "lang":"ca",
3
  "name":"core_news_lg",
4
+ "version":"3.6.0",
5
  "description":"Catalan pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.",
6
  "author":"Explosion",
7
  "email":"contact@explosion.ai",
8
  "url":"https://explosion.ai",
9
  "license":"GNU GPL 3.0",
10
+ "spacy_version":">=3.6.0,<3.7.0",
11
+ "spacy_git_version":"cb4fdc83e",
12
  "vectors":{
13
  "width":300,
14
  "vectors":500000,
 
670
  "f":0.9053857351
671
  }
672
  },
673
+ "speed":6515.4852090537
674
  },
675
  "sources":[
676
  {
 
680
  "author":"Mart\u00ednez Alonso, H\u00e9ctor; Pascual, Elena; Zeman, Daniel"
681
  },
682
  {
683
+ "name":"UD Catalan AnCora v2.8 + NER v3.2.9",
684
+ "url":"https://github.com/TeMU-BSC/spacy/releases/tag/3.2.9",
685
  "license":"CC BY 4.0",
686
  "author":"Carlos Rodr\u00edguez-Penagos and Carme Armentano-Oller"
687
  },
morphologizer/cfg CHANGED
@@ -1,5 +1,6 @@
1
  {
2
  "extend":false,
 
3
  "labels_morph":{
4
  "Definite=Def|Gender=Masc|Number=Sing|POS=DET|PronType=Art":"Definite=Def|Gender=Masc|Number=Sing|PronType=Art",
5
  "POS=PROPN":"",
 
1
  {
2
  "extend":false,
3
+ "label_smoothing":0.0,
4
  "labels_morph":{
5
  "Definite=Def|Gender=Masc|Number=Sing|POS=DET|PronType=Art":"Definite=Def|Gender=Masc|Number=Sing|PronType=Art",
6
  "POS=PROPN":"",