GiLfr commited on
Commit
db695fb
1 Parent(s): 3c29e21

Update spaCy pipeline

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ textcat_multilabel/model filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - spacy
4
+ - text-classification
5
+ language:
6
+ - fr
7
+ license: mit
8
+ model-index:
9
+ - name: fr_eff_hd_textcat_multi1pcd
10
+ results: []
11
+ ---
config.cfg ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [paths]
2
+ train = "..\\test\\train_multi1pcd.spacy"
3
+ dev = "..\\test\\dev_multi1pcd.spacy"
4
+ vectors = null
5
+ init_tok2vec = null
6
+
7
+ [system]
8
+ gpu_allocator = null
9
+ seed = 0
10
+
11
+ [nlp]
12
+ lang = "fr"
13
+ pipeline = ["textcat_multilabel"]
14
+ batch_size = 1000
15
+ disabled = []
16
+ before_creation = null
17
+ after_creation = null
18
+ after_pipeline_creation = null
19
+ tokenizer = {"@tokenizers":"spacy.Tokenizer.v1"}
20
+
21
+ [components]
22
+
23
+ [components.textcat_multilabel]
24
+ factory = "textcat_multilabel"
25
+ scorer = {"@scorers":"spacy.textcat_multilabel_scorer.v1"}
26
+ threshold = 0.5
27
+
28
+ [components.textcat_multilabel.model]
29
+ @architectures = "spacy.TextCatBOW.v2"
30
+ exclusive_classes = false
31
+ ngram_size = 1
32
+ no_output_layer = false
33
+ nO = null
34
+
35
+ [corpora]
36
+
37
+ [corpora.dev]
38
+ @readers = "spacy.Corpus.v1"
39
+ path = ${paths.dev}
40
+ max_length = 0
41
+ gold_preproc = false
42
+ limit = 0
43
+ augmenter = null
44
+
45
+ [corpora.train]
46
+ @readers = "spacy.Corpus.v1"
47
+ path = ${paths.train}
48
+ max_length = 0
49
+ gold_preproc = false
50
+ limit = 0
51
+ augmenter = null
52
+
53
+ [training]
54
+ dev_corpus = "corpora.dev"
55
+ train_corpus = "corpora.train"
56
+ seed = ${system.seed}
57
+ gpu_allocator = ${system.gpu_allocator}
58
+ dropout = 0.1
59
+ accumulate_gradient = 1
60
+ patience = 1600
61
+ max_epochs = 0
62
+ max_steps = 20000
63
+ eval_frequency = 200
64
+ frozen_components = []
65
+ annotating_components = []
66
+ before_to_disk = null
67
+
68
+ [training.batcher]
69
+ @batchers = "spacy.batch_by_words.v1"
70
+ discard_oversize = false
71
+ tolerance = 0.2
72
+ get_length = null
73
+
74
+ [training.batcher.size]
75
+ @schedules = "compounding.v1"
76
+ start = 100
77
+ stop = 1000
78
+ compound = 1.001
79
+ t = 0.0
80
+
81
+ [training.logger]
82
+ @loggers = "spacy.WandbLogger.v3"
83
+ project_name = "pat-hd_models"
84
+ remove_config_values = ["paths.train","paths.dev","corpora.train.path","corpora.dev.path"]
85
+ log_dataset_dir = "./corpus"
86
+ model_log_interval = 1000
87
+ entity = null
88
+ run_name = null
89
+
90
+ [training.optimizer]
91
+ @optimizers = "Adam.v1"
92
+ beta1 = 0.9
93
+ beta2 = 0.999
94
+ L2_is_weight_decay = true
95
+ L2 = 0.01
96
+ grad_clip = 1.0
97
+ use_averages = false
98
+ eps = 0.00000001
99
+ learn_rate = 0.001
100
+
101
+ [training.score_weights]
102
+ cats_score = 1.0
103
+ cats_score_desc = null
104
+ cats_micro_p = null
105
+ cats_micro_r = null
106
+ cats_micro_f = null
107
+ cats_macro_p = null
108
+ cats_macro_r = null
109
+ cats_macro_f = null
110
+ cats_macro_auc = null
111
+ cats_f_per_type = null
112
+ cats_macro_auc_per_type = null
113
+
114
+ [pretraining]
115
+
116
+ [initialize]
117
+ vectors = ${paths.vectors}
118
+ init_tok2vec = ${paths.init_tok2vec}
119
+ vocab_data = null
120
+ lookups = null
121
+ before_init = null
122
+ after_init = null
123
+
124
+ [initialize.components]
125
+
126
+ [initialize.tokenizer]
fr_eff_hd_textcat_multi1pcd-any-py3-none-any.whl ADDED
Binary file (821 kB). View file
 
meta.json ADDED
@@ -0,0 +1,211 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "lang":"fr",
3
+ "name":"eff_hd_textcat_multi1pcd",
4
+ "version":"2023.6.0",
5
+ "description":"Modele de classement des incidents du Support 2424",
6
+ "author":"Gilles LANTERI",
7
+ "email":"gilles.lanteri@malakoffhumanis.com",
8
+ "url":"",
9
+ "license":"MIT",
10
+ "spacy_version":">=3.4.4,<3.5.0",
11
+ "spacy_git_version":"77833bfef",
12
+ "vectors":{
13
+ "width":0,
14
+ "vectors":0,
15
+ "keys":0,
16
+ "name":null
17
+ },
18
+ "labels":{
19
+ "textcat_multilabel":[
20
+ "Citrix",
21
+ "Lecteur",
22
+ "Mot de passe",
23
+ "PC portable",
24
+ "P\u00e9riph\u00e9rique",
25
+ "VPN",
26
+ "Assistance",
27
+ "Incident",
28
+ "",
29
+ "Aide changement MDP expir\u00e9",
30
+ "Aide suite \u00e0 la migration",
31
+ "Aide \u00e0 l'utilisation",
32
+ "Ajout lecteur",
33
+ "Bascule version simplifi\u00e9 en Workspace",
34
+ "Changement de mot de passe",
35
+ "Changement numero MFA",
36
+ "Demande Informations",
37
+ "Demande Myaccess a faire/en attente",
38
+ "Demande Myaccess a faire/en cours",
39
+ "Erreur/oublie de MDP/Login",
40
+ "Mot de passe expir\u00e9",
41
+ "Oublie de connecter le VPN",
42
+ "Parametre Affichage",
43
+ "Pas connect\u00e9 \u00e0 internet",
44
+ "Reglage Affichage",
45
+ "Restauration dossier/fichier",
46
+ "VPN non connect\u00e9"
47
+ ]
48
+ },
49
+ "pipeline":[
50
+ "textcat_multilabel"
51
+ ],
52
+ "components":[
53
+ "textcat_multilabel"
54
+ ],
55
+ "disabled":[
56
+
57
+ ],
58
+ "performance":{
59
+ "cats_score":0.7164608016,
60
+ "cats_score_desc":"macro AUC",
61
+ "cats_micro_p":0.8013013013,
62
+ "cats_micro_r":0.7280582083,
63
+ "cats_micro_f":0.7629258995,
64
+ "cats_macro_p":0.4105767019,
65
+ "cats_macro_r":0.3202640974,
66
+ "cats_macro_f":0.3506143374,
67
+ "cats_macro_auc":0.7164608016,
68
+ "cats_f_per_type":{
69
+ "Citrix":{
70
+ "p":0.9292035398,
71
+ "r":0.8333333333,
72
+ "f":0.8786610879
73
+ },
74
+ "Lecteur":{
75
+ "p":0.9509803922,
76
+ "r":0.8660714286,
77
+ "f":0.9065420561
78
+ },
79
+ "Mot de passe":{
80
+ "p":0.8309859155,
81
+ "r":0.8194444444,
82
+ "f":0.8251748252
83
+ },
84
+ "PC portable":{
85
+ "p":0.6538461538,
86
+ "r":0.3695652174,
87
+ "f":0.4722222222
88
+ },
89
+ "P\u00e9riph\u00e9rique":{
90
+ "p":0.8717948718,
91
+ "r":0.85,
92
+ "f":0.8607594937
93
+ },
94
+ "VPN":{
95
+ "p":0.9206349206,
96
+ "r":0.9027237354,
97
+ "f":0.9115913556
98
+ },
99
+ "Assistance":{
100
+ "p":0.6220095694,
101
+ "r":0.5752212389,
102
+ "f":0.5977011494
103
+ },
104
+ "Incident":{
105
+ "p":0.8131212724,
106
+ "r":0.8067061144,
107
+ "f":0.8099009901
108
+ },
109
+ "":{
110
+ "p":0.8121330724,
111
+ "r":0.8185404339,
112
+ "f":0.815324165
113
+ },
114
+ "Aide changement MDP expir\u00e9":{
115
+ "p":0.1111111111,
116
+ "r":0.1,
117
+ "f":0.1052631579
118
+ },
119
+ "Aide suite \u00e0 la migration":{
120
+ "p":0.5263157895,
121
+ "r":0.2380952381,
122
+ "f":0.3278688525
123
+ },
124
+ "Aide \u00e0 l'utilisation":{
125
+ "p":0.2222222222,
126
+ "r":0.1176470588,
127
+ "f":0.1538461538
128
+ },
129
+ "Ajout lecteur":{
130
+ "p":0.5,
131
+ "r":0.2857142857,
132
+ "f":0.3636363636
133
+ },
134
+ "Bascule version simplifi\u00e9 en Workspace":{
135
+ "p":0.0,
136
+ "r":0.0,
137
+ "f":0.0
138
+ },
139
+ "Changement de mot de passe":{
140
+ "p":0.4,
141
+ "r":0.1333333333,
142
+ "f":0.2
143
+ },
144
+ "Changement numero MFA":{
145
+ "p":0.0,
146
+ "r":0.0,
147
+ "f":0.0
148
+ },
149
+ "Demande Informations":{
150
+ "p":0.0,
151
+ "r":0.0,
152
+ "f":0.0
153
+ },
154
+ "Demande Myaccess a faire/en attente":{
155
+ "p":0.0,
156
+ "r":0.0,
157
+ "f":0.0
158
+ },
159
+ "Demande Myaccess a faire/en cours":{
160
+ "p":0.5,
161
+ "r":0.2,
162
+ "f":0.2857142857
163
+ },
164
+ "Erreur/oublie de MDP/Login":{
165
+ "p":0.4545454545,
166
+ "r":0.1612903226,
167
+ "f":0.2380952381
168
+ },
169
+ "Mot de passe expir\u00e9":{
170
+ "p":0.0,
171
+ "r":0.0,
172
+ "f":0.0
173
+ },
174
+ "Oublie de connecter le VPN":{
175
+ "p":0.0,
176
+ "r":0.0,
177
+ "f":0.0
178
+ },
179
+ "Parametre Affichage":{
180
+ "p":0.0,
181
+ "r":0.0,
182
+ "f":0.0
183
+ },
184
+ "Pas connect\u00e9 \u00e0 internet":{
185
+ "p":0.0,
186
+ "r":0.0,
187
+ "f":0.0
188
+ },
189
+ "Reglage Affichage":{
190
+ "p":0.8,
191
+ "r":0.4444444444,
192
+ "f":0.5714285714
193
+ },
194
+ "Restauration dossier/fichier":{
195
+ "p":0.0,
196
+ "r":0.0,
197
+ "f":0.0
198
+ },
199
+ "VPN non connect\u00e9":{
200
+ "p":0.1666666667,
201
+ "r":0.125,
202
+ "f":0.1428571429
203
+ }
204
+ },
205
+ "cats_macro_auc_per_type":0.0,
206
+ "textcat_multilabel_loss":1.758717101
207
+ },
208
+ "requirements":[
209
+
210
+ ]
211
+ }
textcat_multilabel/cfg ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "labels":[
3
+ "Citrix",
4
+ "Lecteur",
5
+ "Mot de passe",
6
+ "PC portable",
7
+ "P\u00e9riph\u00e9rique",
8
+ "VPN",
9
+ "Assistance",
10
+ "Incident",
11
+ "",
12
+ "Aide changement MDP expir\u00e9",
13
+ "Aide suite \u00e0 la migration",
14
+ "Aide \u00e0 l'utilisation",
15
+ "Ajout lecteur",
16
+ "Bascule version simplifi\u00e9 en Workspace",
17
+ "Changement de mot de passe",
18
+ "Changement numero MFA",
19
+ "Demande Informations",
20
+ "Demande Myaccess a faire/en attente",
21
+ "Demande Myaccess a faire/en cours",
22
+ "Erreur/oublie de MDP/Login",
23
+ "Mot de passe expir\u00e9",
24
+ "Oublie de connecter le VPN",
25
+ "Parametre Affichage",
26
+ "Pas connect\u00e9 \u00e0 internet",
27
+ "Reglage Affichage",
28
+ "Restauration dossier/fichier",
29
+ "VPN non connect\u00e9"
30
+ ],
31
+ "threshold":0.5
32
+ }
textcat_multilabel/model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:719b722bac370535d11b515a8ec88a10d5cfe4466ad9931c5b773f5aa2bc6a4f
3
+ size 28312365
tokenizer ADDED
The diff for this file is too large to render. See raw diff
 
vocab/key2row ADDED
@@ -0,0 +1 @@
 
 
1
+
vocab/lookups.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76be8b528d0075f7aae98d6fa57a6d3c83ae480a8469e668d7b0af968995ac71
3
+ size 1
vocab/strings.json ADDED
The diff for this file is too large to render. See raw diff
 
vocab/vectors ADDED
Binary file (128 Bytes). View file
 
vocab/vectors.cfg ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "mode":"default"
3
+ }