deepin-tech commited on
Commit
01635c2
1 Parent(s): 6f75a6f

Update spaCy pipeline

Browse files
.gitattributes CHANGED
@@ -33,3 +33,11 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ fr_pipeline-any-py3-none-any.whl filter=lfs diff=lfs merge=lfs -text
37
+ national/model filter=lfs diff=lfs merge=lfs -text
38
+ ner/model filter=lfs diff=lfs merge=lfs -text
39
+ sentiments/model filter=lfs diff=lfs merge=lfs -text
40
+ tagger/model filter=lfs diff=lfs merge=lfs -text
41
+ thematic/model filter=lfs diff=lfs merge=lfs -text
42
+ tok2vec/model filter=lfs diff=lfs merge=lfs -text
43
+ vocab/vectors filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - spacy
4
+ - token-classification
5
+ language:
6
+ - fr
7
+ model-index:
8
+ - name: fr_pipeline
9
+ results: []
10
+ ---
11
+ | Feature | Description |
12
+ | --- | --- |
13
+ | **Name** | `fr_pipeline` |
14
+ | **Version** | `0.0.0` |
15
+ | **spaCy** | `>=3.4.1,<3.5.0` |
16
+ | **Default Pipeline** | `tok2vec`, `tagger`, `national`, `thematic`, `sentiments`, `ner` |
17
+ | **Components** | `tok2vec`, `tagger`, `national`, `thematic`, `sentiments`, `ner` |
18
+ | **Vectors** | -1 keys, 50000 unique vectors (300 dimensions) |
19
+ | **Sources** | n/a |
20
+ | **License** | n/a |
21
+ | **Author** | [n/a]() |
22
+
23
+ ### Label Scheme
24
+
25
+ <details>
26
+
27
+ <summary>View label scheme (71 labels for 5 components)</summary>
28
+
29
+ | Component | Labels |
30
+ | --- | --- |
31
+ | **`tagger`** | `1`, `10`, `100`, `125`, `13`, `14`, `15`, `17`, `19`, `190`, `2`, `25`, `3`, `46`, `5`, `50`, `500`, `6`, `7`, `80`, `800`, `ADJ`, `ADP`, `ADV`, `AUX`, `CCONJ`, `CONJ`, `DET`, `INTJ`, `NOUN`, `NUM`, `PRON`, `PROPN`, `PUNCT`, `SCONJ`, `SYM`, `VERB`, `X` |
32
+ | **`national`** | `International`, `National` |
33
+ | **`thematic`** | `Coopération/Diplomatie`, `Culture`, `Economie`, `Education`, `Election`, `Environnement`, `Gouvernement`, `Insolite`, `Justice`, `Nécrologie`, `People`, `Politique`, `Réligion`, `Santé`, `Sexualité/Vie couple`, `Société`, `Sport`, `Sécurité`, `Sécurité routière`, `Technologie` |
34
+ | **`sentiments`** | `Negatif`, `Neutre`, `Positif` |
35
+ | **`ner`** | `DATE`, `EVENT`, `LOC`, `MISC`, `MONEY`, `ORG`, `PER`, `PER-T` |
36
+
37
+ </details>
38
+
39
+ ### Accuracy
40
+
41
+ | Type | Score |
42
+ | --- | --- |
43
+ | `CATS_SCORE` | 90.37 |
44
+ | `CATS_MICRO_P` | 96.04 |
45
+ | `CATS_MICRO_R` | 96.04 |
46
+ | `CATS_MICRO_F` | 96.04 |
47
+ | `CATS_MACRO_P` | 92.01 |
48
+ | `CATS_MACRO_R` | 88.90 |
49
+ | `CATS_MACRO_F` | 90.37 |
50
+ | `CATS_MACRO_AUC` | 97.36 |
51
+ | `CATS_MACRO_AUC_PER_TYPE` | 0.00 |
52
+ | `TOK2VEC_LOSS` | 0.00 |
53
+ | `TEXTCAT_LOSS` | 478.59 |
accuracy.json ADDED
@@ -0,0 +1,230 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "tagger" : {
3
+ "tag_acc":0.988275739,
4
+ "tok2vec_loss":0.0,
5
+ "tagger_loss":4150.690571785
6
+ },
7
+ "thematic" : {
8
+ "cats_score":0.9611255871,
9
+ "cats_score_desc":"macro AUC",
10
+ "cats_micro_p":0.8275566232,
11
+ "cats_micro_r":0.7722978383,
12
+ "cats_micro_f":0.7989729148,
13
+ "cats_macro_p":0.7304133124,
14
+ "cats_macro_r":0.6277125906,
15
+ "cats_macro_f":0.6704716103,
16
+ "cats_macro_auc":0.9611255871,
17
+ "cats_f_per_type":{
18
+ "Coop\u00e9ration/Diplomatie":{
19
+ "p":0.8024691358,
20
+ "r":0.5,
21
+ "f":0.6161137441
22
+ },
23
+ "Culture":{
24
+ "p":0.7845744681,
25
+ "r":0.6766055046,
26
+ "f":0.7266009852
27
+ },
28
+ "Economie":{
29
+ "p":0.8031674208,
30
+ "r":0.7071713147,
31
+ "f":0.7521186441
32
+ },
33
+ "Education":{
34
+ "p":0.8840206186,
35
+ "r":0.8070588235,
36
+ "f":0.8437884379
37
+ },
38
+ "Election":{
39
+ "p":0.8252788104,
40
+ "r":0.874015748,
41
+ "f":0.8489483748
42
+ },
43
+ "Environnement":{
44
+ "p":0.0,
45
+ "r":0.0,
46
+ "f":0.0
47
+ },
48
+ "Gouvernement":{
49
+ "p":0.92,
50
+ "r":0.71875,
51
+ "f":0.8070175439
52
+ },
53
+ "Insolite":{
54
+ "p":0.7083333333,
55
+ "r":0.3863636364,
56
+ "f":0.5
57
+ },
58
+ "Justice":{
59
+ "p":0.8082524272,
60
+ "r":0.7585421412,
61
+ "f":0.7826086957
62
+ },
63
+ "N\u00e9crologie":{
64
+ "p":0.9230769231,
65
+ "r":0.8,
66
+ "f":0.8571428571
67
+ },
68
+ "People":{
69
+ "p":0.6842105263,
70
+ "r":0.4333333333,
71
+ "f":0.5306122449
72
+ },
73
+ "Politique":{
74
+ "p":0.8357050453,
75
+ "r":0.7699642431,
76
+ "f":0.8014888337
77
+ },
78
+ "R\u00e9ligion":{
79
+ "p":0.8556149733,
80
+ "r":0.7582938389,
81
+ "f":0.8040201005
82
+ },
83
+ "Sant\u00e9":{
84
+ "p":0.8976377953,
85
+ "r":0.8028169014,
86
+ "f":0.8475836431
87
+ },
88
+ "Sexualit\u00e9/Vie couple":{
89
+ "p":0.862745098,
90
+ "r":0.6111111111,
91
+ "f":0.7154471545
92
+ },
93
+ "Soci\u00e9t\u00e9":{
94
+ "p":0.7378151261,
95
+ "r":0.6532738095,
96
+ "f":0.6929755328
97
+ },
98
+ "Sport":{
99
+ "p":0.8952772074,
100
+ "r":0.9083333333,
101
+ "f":0.9017580145
102
+ },
103
+ "S\u00e9curit\u00e9":{
104
+ "p":0.8481724461,
105
+ "r":0.9169199595,
106
+ "f":0.8812074002
107
+ },
108
+ "S\u00e9curit\u00e9 routi\u00e8re":{
109
+ "p":0.0,
110
+ "r":0.0,
111
+ "f":0.0
112
+ },
113
+ "Technologie":{
114
+ "p":0.5319148936,
115
+ "r":0.4716981132,
116
+ "f":0.5
117
+ }
118
+ },
119
+ "cats_macro_auc_per_type":0.0,
120
+ "tok2vec_loss":0.0,
121
+ "textcat_multilabel_loss":0.6762271562
122
+ },
123
+ "national" : {
124
+ "cats_score":0.9037233173,
125
+ "cats_score_desc":"macro F",
126
+ "cats_micro_p":0.9603673272,
127
+ "cats_micro_r":0.9603673272,
128
+ "cats_micro_f":0.9603673272,
129
+ "cats_macro_p":0.9201033646,
130
+ "cats_macro_r":0.8890054321,
131
+ "cats_macro_f":0.9037233173,
132
+ "cats_macro_auc":0.9736423963,
133
+ "cats_f_per_type":{
134
+ "International":{
135
+ "p":0.8683068017,
136
+ "r":0.7947019868,
137
+ "f":0.8298755187
138
+ },
139
+ "National":{
140
+ "p":0.9718999275,
141
+ "r":0.9833088775,
142
+ "f":0.977571116
143
+ }
144
+ },
145
+ "cats_macro_auc_per_type":0.0,
146
+ "tok2vec_loss":0.0,
147
+ "textcat_loss":4.785939348
148
+ },
149
+ "sentiments" : {
150
+ "cats_score":0.9330357215,
151
+ "cats_score_desc":"macro AUC",
152
+ "cats_micro_p":0.8108448928,
153
+ "cats_micro_r":0.798757764,
154
+ "cats_micro_f":0.8047559449,
155
+ "cats_macro_p":0.7994666767,
156
+ "cats_macro_r":0.7540619675,
157
+ "cats_macro_f":0.7683867088,
158
+ "cats_macro_auc":0.9330357215,
159
+ "cats_f_per_type":{
160
+ "Negatif":{
161
+ "p":0.8489208633,
162
+ "r":0.9007633588,
163
+ "f":0.8740740741
164
+ },
165
+ "Neutre":{
166
+ "p":0.7916666667,
167
+ "r":0.5428571429,
168
+ "f":0.6440677966
169
+ },
170
+ "Positif":{
171
+ "p":0.7578125,
172
+ "r":0.8185654008,
173
+ "f":0.7870182556
174
+ }
175
+ },
176
+ "cats_macro_auc_per_type":0.0,
177
+ "tok2vec_loss":0.0,
178
+ "textcat_multilabel_loss":13.0663842873
179
+ },
180
+ "ner": {
181
+ "ents_f":0.7588811189,
182
+ "ents_p":0.7762517883,
183
+ "ents_r":0.7422708618,
184
+ "ents_per_type":{
185
+ "LOC":{
186
+ "p":0.8458942632,
187
+ "r":0.8421052632,
188
+ "f":0.8439955107
189
+ },
190
+ "DATE":{
191
+ "p":0.9045138889,
192
+ "r":0.9387387387,
193
+ "f":0.9213085765
194
+ },
195
+ "ORG":{
196
+ "p":0.7252619325,
197
+ "r":0.7681874229,
198
+ "f":0.7461077844
199
+ },
200
+ "EVENT":{
201
+ "p":0.546875,
202
+ "r":0.5497382199,
203
+ "f":0.5483028721
204
+ },
205
+ "PER":{
206
+ "p":0.8877005348,
207
+ "r":0.8713910761,
208
+ "f":0.8794701987
209
+ },
210
+ "PER-T":{
211
+ "p":0.7157534247,
212
+ "r":0.4696629213,
213
+ "f":0.5671641791
214
+ },
215
+ "MISC":{
216
+ "p":0.4787234043,
217
+ "r":0.2710843373,
218
+ "f":0.3461538462
219
+ },
220
+ "MONEY":{
221
+ "p":0.7777777778,
222
+ "r":0.9545454545,
223
+ "f":0.8571428571
224
+ }
225
+ },
226
+ "tok2vec_loss":0.0,
227
+ "ner_loss":7193.7047263753
228
+ },
229
+ "speed": 4445.2953793177
230
+ }
config.cfg ADDED
@@ -0,0 +1,287 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [paths]
2
+ train = null
3
+ dev = null
4
+ vectors = null
5
+ init_tok2vec = null
6
+
7
+ [system]
8
+ seed = 0
9
+ gpu_allocator = null
10
+
11
+ [nlp]
12
+ lang = "fr"
13
+ pipeline = ["tok2vec","tagger","national","thematic","sentiments","ner"]
14
+ disabled = []
15
+ before_creation = null
16
+ after_creation = null
17
+ after_pipeline_creation = null
18
+ batch_size = 1000
19
+ tokenizer = {"@tokenizers":"spacy.Tokenizer.v1"}
20
+
21
+ [components]
22
+
23
+ [components.national]
24
+ factory = "textcat"
25
+ scorer = {"@scorers":"spacy.textcat_scorer.v1"}
26
+ threshold = 0.5
27
+
28
+ [components.national.model]
29
+ @architectures = "spacy.TextCatEnsemble.v2"
30
+ nO = null
31
+
32
+ [components.national.model.linear_model]
33
+ @architectures = "spacy.TextCatBOW.v2"
34
+ exclusive_classes = true
35
+ ngram_size = 1
36
+ no_output_layer = false
37
+ nO = null
38
+
39
+ [components.national.model.tok2vec]
40
+ @architectures = "spacy.Tok2Vec.v2"
41
+
42
+ [components.national.model.tok2vec.embed]
43
+ @architectures = "spacy.MultiHashEmbed.v2"
44
+ width = 64
45
+ rows = [2000,2000,1000,1000,1000,1000]
46
+ attrs = ["ORTH","LOWER","PREFIX","SUFFIX","SHAPE","ID"]
47
+ include_static_vectors = true
48
+
49
+ [components.national.model.tok2vec.encode]
50
+ @architectures = "spacy.MaxoutWindowEncoder.v2"
51
+ width = 64
52
+ window_size = 1
53
+ maxout_pieces = 3
54
+ depth = 2
55
+
56
+ [components.ner]
57
+ factory = "ner"
58
+ incorrect_spans_key = null
59
+ moves = null
60
+ scorer = {"@scorers":"spacy.ner_scorer.v1"}
61
+ update_with_oracle_cut_size = 100
62
+
63
+ [components.ner.model]
64
+ @architectures = "spacy.TransitionBasedParser.v2"
65
+ state_type = "ner"
66
+ extra_state_tokens = false
67
+ hidden_width = 64
68
+ maxout_pieces = 2
69
+ use_upper = true
70
+ nO = null
71
+
72
+ [components.ner.model.tok2vec]
73
+ @architectures = "spacy.Tok2Vec.v2"
74
+
75
+ [components.ner.model.tok2vec.embed]
76
+ @architectures = "spacy.MultiHashEmbed.v2"
77
+ width = 96
78
+ attrs = ["NORM","PREFIX","SUFFIX","SHAPE","SPACY"]
79
+ rows = [5000,1000,2500,2500,50]
80
+ include_static_vectors = true
81
+
82
+ [components.ner.model.tok2vec.encode]
83
+ @architectures = "spacy.MaxoutWindowEncoder.v2"
84
+ width = 96
85
+ depth = 4
86
+ window_size = 1
87
+ maxout_pieces = 3
88
+
89
+ [components.sentiments]
90
+ factory = "textcat_multilabel"
91
+ scorer = {"@scorers":"spacy.textcat_multilabel_scorer.v1"}
92
+ threshold = 0.5
93
+
94
+ [components.sentiments.model]
95
+ @architectures = "spacy.TextCatEnsemble.v2"
96
+ nO = null
97
+
98
+ [components.sentiments.model.linear_model]
99
+ @architectures = "spacy.TextCatBOW.v2"
100
+ exclusive_classes = false
101
+ ngram_size = 1
102
+ no_output_layer = false
103
+ nO = null
104
+
105
+ [components.sentiments.model.tok2vec]
106
+ @architectures = "spacy.Tok2Vec.v1"
107
+
108
+ [components.sentiments.model.tok2vec.embed]
109
+ @architectures = "spacy.MultiHashEmbed.v2"
110
+ width = 64
111
+ rows = [2000,2000,1000,1000,1000,1000]
112
+ attrs = ["ORTH","LOWER","PREFIX","SUFFIX","SHAPE","ID"]
113
+ include_static_vectors = true
114
+
115
+ [components.sentiments.model.tok2vec.encode]
116
+ @architectures = "spacy.MaxoutWindowEncoder.v1"
117
+ width = 64
118
+ window_size = 1
119
+ maxout_pieces = 3
120
+ depth = 2
121
+
122
+ [components.tagger]
123
+ factory = "tagger"
124
+ neg_prefix = "!"
125
+ overwrite = false
126
+ scorer = {"@scorers":"spacy.tagger_scorer.v1"}
127
+
128
+ [components.tagger.model]
129
+ @architectures = "spacy.Tagger.v2"
130
+ nO = null
131
+ normalize = false
132
+
133
+ [components.tagger.model.tok2vec]
134
+ @architectures = "spacy.HashEmbedCNN.v2"
135
+ pretrained_vectors = null
136
+ width = 96
137
+ depth = 4
138
+ embed_size = 2000
139
+ window_size = 1
140
+ maxout_pieces = 3
141
+ subword_features = true
142
+
143
+ [components.thematic]
144
+ factory = "textcat_multilabel"
145
+ scorer = {"@scorers":"spacy.textcat_multilabel_scorer.v1"}
146
+ threshold = 0.5
147
+
148
+ [components.thematic.model]
149
+ @architectures = "spacy.TextCatEnsemble.v2"
150
+ nO = null
151
+
152
+ [components.thematic.model.linear_model]
153
+ @architectures = "spacy.TextCatBOW.v2"
154
+ exclusive_classes = false
155
+ ngram_size = 1
156
+ no_output_layer = false
157
+ nO = null
158
+
159
+ [components.thematic.model.tok2vec]
160
+ @architectures = "spacy.Tok2Vec.v1"
161
+
162
+ [components.thematic.model.tok2vec.embed]
163
+ @architectures = "spacy.MultiHashEmbed.v2"
164
+ width = 64
165
+ rows = [2000,2000,1000,1000,1000,1000]
166
+ attrs = ["ORTH","LOWER","PREFIX","SUFFIX","SHAPE","ID"]
167
+ include_static_vectors = true
168
+
169
+ [components.thematic.model.tok2vec.encode]
170
+ @architectures = "spacy.MaxoutWindowEncoder.v1"
171
+ width = 64
172
+ window_size = 1
173
+ maxout_pieces = 3
174
+ depth = 2
175
+
176
+ [components.tok2vec]
177
+ factory = "tok2vec"
178
+
179
+ [components.tok2vec.model]
180
+ @architectures = "spacy.Tok2Vec.v2"
181
+
182
+ [components.tok2vec.model.embed]
183
+ @architectures = "spacy.MultiHashEmbed.v2"
184
+ width = 96
185
+ attrs = ["NORM","PREFIX","SUFFIX","SHAPE","SPACY"]
186
+ rows = [5000,1000,2500,2500,50]
187
+ include_static_vectors = false
188
+
189
+ [components.tok2vec.model.encode]
190
+ @architectures = "spacy.MaxoutWindowEncoder.v2"
191
+ width = 96
192
+ depth = 4
193
+ window_size = 1
194
+ maxout_pieces = 3
195
+
196
+ [corpora]
197
+
198
+ [corpora.dev]
199
+ @readers = "spacy.Corpus.v1"
200
+ path = ${paths.dev}
201
+ gold_preproc = false
202
+ max_length = 0
203
+ limit = 0
204
+ augmenter = null
205
+
206
+ [corpora.train]
207
+ @readers = "spacy.Corpus.v1"
208
+ path = ${paths.train}
209
+ gold_preproc = false
210
+ max_length = 0
211
+ limit = 0
212
+ augmenter = null
213
+
214
+ [training]
215
+ seed = ${system.seed}
216
+ gpu_allocator = ${system.gpu_allocator}
217
+ dropout = 0.1
218
+ accumulate_gradient = 1
219
+ patience = 1600
220
+ max_epochs = 0
221
+ max_steps = 20000
222
+ eval_frequency = 200
223
+ frozen_components = []
224
+ annotating_components = []
225
+ dev_corpus = "corpora.dev"
226
+ train_corpus = "corpora.train"
227
+ before_to_disk = null
228
+
229
+ [training.batcher]
230
+ @batchers = "spacy.batch_by_words.v1"
231
+ discard_oversize = false
232
+ tolerance = 0.2
233
+ get_length = null
234
+
235
+ [training.batcher.size]
236
+ @schedules = "compounding.v1"
237
+ start = 100
238
+ stop = 1000
239
+ compound = 1.001
240
+ t = 0.0
241
+
242
+ [training.logger]
243
+ @loggers = "spacy.ConsoleLogger.v1"
244
+ progress_bar = false
245
+
246
+ [training.optimizer]
247
+ @optimizers = "Adam.v1"
248
+ beta1 = 0.9
249
+ beta2 = 0.999
250
+ L2_is_weight_decay = true
251
+ L2 = 0.01
252
+ grad_clip = 1.0
253
+ use_averages = false
254
+ eps = 0.00000001
255
+ learn_rate = 0.001
256
+
257
+ [training.score_weights]
258
+ tag_acc = 0.5
259
+ cats_score = 0.25
260
+ cats_score_desc = null
261
+ cats_micro_p = null
262
+ cats_micro_r = null
263
+ cats_micro_f = null
264
+ cats_macro_p = null
265
+ cats_macro_r = null
266
+ cats_macro_f = null
267
+ cats_macro_auc = null
268
+ cats_f_per_type = null
269
+ cats_macro_auc_per_type = null
270
+ ents_f = 0.25
271
+ ents_p = 0.0
272
+ ents_r = 0.0
273
+ ents_per_type = null
274
+
275
+ [pretraining]
276
+
277
+ [initialize]
278
+ vectors = ${paths.vectors}
279
+ init_tok2vec = ${paths.init_tok2vec}
280
+ vocab_data = null
281
+ lookups = null
282
+ before_init = null
283
+ after_init = null
284
+
285
+ [initialize.components]
286
+
287
+ [initialize.tokenizer]
fr_pipeline-any-py3-none-any.whl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22d6d0352bf758c4678aa39186c2631eabfc44f47c384c7b14e757911185a037
3
+ size 80370708
meta.json ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "lang":"fr",
3
+ "name":"pipeline",
4
+ "version":"0.0.0",
5
+ "description":"",
6
+ "author":"",
7
+ "email":"",
8
+ "url":"",
9
+ "license":"",
10
+ "spacy_version":">=3.4.1,<3.5.0",
11
+ "spacy_git_version":"5c2a00cef",
12
+ "vectors":{
13
+ "width":300,
14
+ "vectors":50000,
15
+ "keys":-1,
16
+ "name":"fr_pipeline.vectors"
17
+ },
18
+ "labels":{
19
+ "tok2vec":[
20
+
21
+ ],
22
+ "tagger":[
23
+ "1",
24
+ "10",
25
+ "100",
26
+ "125",
27
+ "13",
28
+ "14",
29
+ "15",
30
+ "17",
31
+ "19",
32
+ "190",
33
+ "2",
34
+ "25",
35
+ "3",
36
+ "46",
37
+ "5",
38
+ "50",
39
+ "500",
40
+ "6",
41
+ "7",
42
+ "80",
43
+ "800",
44
+ "ADJ",
45
+ "ADP",
46
+ "ADV",
47
+ "AUX",
48
+ "CCONJ",
49
+ "CONJ",
50
+ "DET",
51
+ "INTJ",
52
+ "NOUN",
53
+ "NUM",
54
+ "PRON",
55
+ "PROPN",
56
+ "PUNCT",
57
+ "SCONJ",
58
+ "SYM",
59
+ "VERB",
60
+ "X"
61
+ ],
62
+ "national":[
63
+ "International",
64
+ "National"
65
+ ],
66
+ "thematic":[
67
+ "Coop\u00e9ration/Diplomatie",
68
+ "Culture",
69
+ "Economie",
70
+ "Education",
71
+ "Election",
72
+ "Environnement",
73
+ "Gouvernement",
74
+ "Insolite",
75
+ "Justice",
76
+ "N\u00e9crologie",
77
+ "People",
78
+ "Politique",
79
+ "R\u00e9ligion",
80
+ "Sant\u00e9",
81
+ "Sexualit\u00e9/Vie couple",
82
+ "Soci\u00e9t\u00e9",
83
+ "Sport",
84
+ "S\u00e9curit\u00e9",
85
+ "S\u00e9curit\u00e9 routi\u00e8re",
86
+ "Technologie"
87
+ ],
88
+ "sentiments":[
89
+ "Negatif",
90
+ "Neutre",
91
+ "Positif"
92
+ ],
93
+ "ner":[
94
+ "DATE",
95
+ "EVENT",
96
+ "LOC",
97
+ "MISC",
98
+ "MONEY",
99
+ "ORG",
100
+ "PER",
101
+ "PER-T"
102
+ ]
103
+ },
104
+ "pipeline":[
105
+ "tok2vec",
106
+ "tagger",
107
+ "national",
108
+ "thematic",
109
+ "sentiments",
110
+ "ner"
111
+ ],
112
+ "components":[
113
+ "tok2vec",
114
+ "tagger",
115
+ "national",
116
+ "thematic",
117
+ "sentiments",
118
+ "ner"
119
+ ],
120
+ "disabled":[
121
+
122
+ ],
123
+ "performance":{
124
+ "cats_score":0.9037233173,
125
+ "cats_score_desc":"macro F",
126
+ "cats_micro_p":0.9603673272,
127
+ "cats_micro_r":0.9603673272,
128
+ "cats_micro_f":0.9603673272,
129
+ "cats_macro_p":0.9201033646,
130
+ "cats_macro_r":0.8890054321,
131
+ "cats_macro_f":0.9037233173,
132
+ "cats_macro_auc":0.9736423963,
133
+ "cats_f_per_type":{
134
+ "International":{
135
+ "p":0.8683068017,
136
+ "r":0.7947019868,
137
+ "f":0.8298755187
138
+ },
139
+ "National":{
140
+ "p":0.9718999275,
141
+ "r":0.9833088775,
142
+ "f":0.977571116
143
+ }
144
+ },
145
+ "cats_macro_auc_per_type":0.0,
146
+ "tok2vec_loss":0.0,
147
+ "textcat_loss":4.785939348
148
+ },
149
+ "requirements":[
150
+
151
+ ]
152
+ }
national/cfg ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "labels":[
3
+ "International",
4
+ "National"
5
+ ],
6
+ "threshold":0.5,
7
+ "positive_label":null
8
+ }
national/model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e93d7563e844309aa12f0ca4701a261cf713c757c0e1a9f89131ac4fa2221d01
3
+ size 4926202
ner/cfg ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "moves":null,
3
+ "update_with_oracle_cut_size":100,
4
+ "multitasks":[
5
+
6
+ ],
7
+ "min_action_freq":1,
8
+ "learn_tokens":false,
9
+ "beam_width":1,
10
+ "beam_density":0.0,
11
+ "beam_update_prob":0.0,
12
+ "incorrect_spans_key":null
13
+ }
ner/model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80ddab6bed0e3b6e6edc4e4d6c40586fd3d9c8b5e766075872c4878deb80e62e
3
+ size 6500752
ner/moves ADDED
@@ -0,0 +1 @@
 
 
1
+ ��moves��{"0":{},"1":{"ORG":18944,"PER-T":13386,"LOC":11738,"DATE":10836,"EVENT":10154,"PER":9767,"MISC":3411,"MONEY":1103},"2":{"ORG":18944,"PER-T":13386,"LOC":11738,"DATE":10836,"EVENT":10154,"PER":9767,"MISC":3411,"MONEY":1103},"3":{"ORG":18944,"PER-T":13386,"LOC":11738,"DATE":10836,"EVENT":10154,"PER":9767,"MISC":3411,"MONEY":1103},"4":{"ORG":18944,"PER-T":13386,"LOC":11738,"DATE":10836,"EVENT":10154,"PER":9767,"MISC":3411,"MONEY":1103,"":1},"5":{"":1}}�cfg��neg_key�
sentiments/cfg ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "labels":[
3
+ "Negatif",
4
+ "Neutre",
5
+ "Positif"
6
+ ],
7
+ "threshold":0.5
8
+ }
sentiments/model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a67a3e67a31aecb3d59941041c25638c6747f3812747f058e1bc97ddd388bfd
3
+ size 5975077
tagger/cfg ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "labels":[
3
+ "1",
4
+ "10",
5
+ "100",
6
+ "125",
7
+ "13",
8
+ "14",
9
+ "15",
10
+ "17",
11
+ "19",
12
+ "190",
13
+ "2",
14
+ "25",
15
+ "3",
16
+ "46",
17
+ "5",
18
+ "50",
19
+ "500",
20
+ "6",
21
+ "7",
22
+ "80",
23
+ "800",
24
+ "ADJ",
25
+ "ADP",
26
+ "ADV",
27
+ "AUX",
28
+ "CCONJ",
29
+ "CONJ",
30
+ "DET",
31
+ "INTJ",
32
+ "NOUN",
33
+ "NUM",
34
+ "PRON",
35
+ "PROPN",
36
+ "PUNCT",
37
+ "SCONJ",
38
+ "SYM",
39
+ "VERB",
40
+ "X"
41
+ ],
42
+ "neg_prefix":"!",
43
+ "overwrite":false
44
+ }
tagger/model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0fa81343501835afd2784186e6156f7b82e1da77ed6c93e46c1eaa628d2b5a35
3
+ size 3720585
thematic/cfg ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "labels":[
3
+ "Coop\u00e9ration/Diplomatie",
4
+ "Culture",
5
+ "Economie",
6
+ "Education",
7
+ "Election",
8
+ "Environnement",
9
+ "Gouvernement",
10
+ "Insolite",
11
+ "Justice",
12
+ "N\u00e9crologie",
13
+ "People",
14
+ "Politique",
15
+ "R\u00e9ligion",
16
+ "Sant\u00e9",
17
+ "Sexualit\u00e9/Vie couple",
18
+ "Soci\u00e9t\u00e9",
19
+ "Sport",
20
+ "S\u00e9curit\u00e9",
21
+ "S\u00e9curit\u00e9 routi\u00e8re",
22
+ "Technologie"
23
+ ],
24
+ "threshold":0.5
25
+ }
thematic/model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e06410b435c568130af53164f842517fed97eb780d6db06df6923897fca872f5
3
+ size 23806921
tok2vec/cfg ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+
3
+ }
tok2vec/model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9835eccff496d59c0432118900e713451105982eb97db53c5c198302ce48ebb9
3
+ size 6139229
tokenizer ADDED
The diff for this file is too large to render. See raw diff
 
vocab/key2row ADDED
@@ -0,0 +1 @@
 
 
1
+
vocab/lookups.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76be8b528d0075f7aae98d6fa57a6d3c83ae480a8469e668d7b0af968995ac71
3
+ size 1
vocab/strings.json ADDED
The diff for this file is too large to render. See raw diff
 
vocab/vectors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:390a8105437b78f1cbdea520c45b08957fef7053663871b2d1eda63068894637
3
+ size 60000128
vocab/vectors.cfg ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "mode":"floret",
3
+ "minn":3,
4
+ "maxn":6,
5
+ "hash_count":2,
6
+ "hash_seed":2166136261,
7
+ "bow":"<",
8
+ "eow":">"
9
+ }