ychafiqui commited on
Commit
eec05a7
1 Parent(s): 2f54a50

Update spaCy pipeline

Browse files
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ fr_job_classif_fr-any-py3-none-any.whl filter=lfs diff=lfs merge=lfs -text
37
+ textcat/model filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - spacy
4
+ - text-classification
5
+ language:
6
+ - fr
7
+ model-index:
8
+ - name: fr_job_classif_fr
9
+ results: []
10
+ ---
11
+ | Feature | Description |
12
+ | --- | --- |
13
+ | **Name** | `fr_job_classif_fr` |
14
+ | **Version** | `0.0.0` |
15
+ | **spaCy** | `>=3.5.1,<3.6.0` |
16
+ | **Default Pipeline** | `textcat` |
17
+ | **Components** | `textcat` |
18
+ | **Vectors** | 0 keys, 0 unique vectors (0 dimensions) |
19
+ | **Sources** | n/a |
20
+ | **License** | n/a |
21
+ | **Author** | [n/a]() |
22
+
23
+ ### Label Scheme
24
+
25
+ <details>
26
+
27
+ <summary>View label scheme (17 labels for 1 components)</summary>
28
+
29
+ | Component | Labels |
30
+ | --- | --- |
31
+ | **`textcat`** | `systèmes et réseaux informatiques`, `data`, `marketing`, `mécanique`, `télécom`, `juridique`, `assistance, secrétariat et accueil`, `commerce`, `chimie`, `restauration, tourisme et hotellerie`, `comptabilité`, `développement informatique`, `ressources humaines`, `agriculture`, `électronique`, `logistique`, `production` |
32
+
33
+ </details>
34
+
35
+ ### Accuracy
36
+
37
+ | Type | Score |
38
+ | --- | --- |
39
+ | `CATS_SCORE` | 84.73 |
40
+ | `CATS_MICRO_P` | 84.57 |
41
+ | `CATS_MICRO_R` | 84.57 |
42
+ | `CATS_MICRO_F` | 84.57 |
43
+ | `CATS_MACRO_P` | 84.90 |
44
+ | `CATS_MACRO_R` | 84.75 |
45
+ | `CATS_MACRO_F` | 84.73 |
46
+ | `CATS_MACRO_AUC` | 97.97 |
47
+ | `TEXTCAT_LOSS` | 93.24 |
config.cfg ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [paths]
2
+ train = "./spacy/train.spacy"
3
+ dev = "./spacy/dev.spacy"
4
+ vectors = null
5
+ init_tok2vec = null
6
+
7
+ [system]
8
+ gpu_allocator = null
9
+ seed = 0
10
+
11
+ [nlp]
12
+ lang = "fr"
13
+ pipeline = ["textcat"]
14
+ batch_size = 1000
15
+ disabled = []
16
+ before_creation = null
17
+ after_creation = null
18
+ after_pipeline_creation = null
19
+ tokenizer = {"@tokenizers":"spacy.Tokenizer.v1"}
20
+
21
+ [components]
22
+
23
+ [components.textcat]
24
+ factory = "textcat"
25
+ scorer = {"@scorers":"spacy.textcat_scorer.v2"}
26
+ threshold = 0.0
27
+
28
+ [components.textcat.model]
29
+ @architectures = "spacy.TextCatBOW.v2"
30
+ exclusive_classes = true
31
+ ngram_size = 1
32
+ no_output_layer = false
33
+ nO = null
34
+
35
+ [corpora]
36
+
37
+ [corpora.dev]
38
+ @readers = "spacy.Corpus.v1"
39
+ path = ${paths.dev}
40
+ max_length = 0
41
+ gold_preproc = false
42
+ limit = 0
43
+ augmenter = null
44
+
45
+ [corpora.train]
46
+ @readers = "spacy.Corpus.v1"
47
+ path = ${paths.train}
48
+ max_length = 0
49
+ gold_preproc = false
50
+ limit = 0
51
+ augmenter = null
52
+
53
+ [training]
54
+ dev_corpus = "corpora.dev"
55
+ train_corpus = "corpora.train"
56
+ seed = ${system.seed}
57
+ gpu_allocator = ${system.gpu_allocator}
58
+ dropout = 0.1
59
+ accumulate_gradient = 1
60
+ patience = 1600
61
+ max_epochs = 0
62
+ max_steps = 20000
63
+ eval_frequency = 200
64
+ frozen_components = []
65
+ annotating_components = []
66
+ before_to_disk = null
67
+ before_update = null
68
+
69
+ [training.batcher]
70
+ @batchers = "spacy.batch_by_words.v1"
71
+ discard_oversize = false
72
+ tolerance = 0.2
73
+ get_length = null
74
+
75
+ [training.batcher.size]
76
+ @schedules = "compounding.v1"
77
+ start = 100
78
+ stop = 1000
79
+ compound = 1.001
80
+ t = 0.0
81
+
82
+ [training.logger]
83
+ @loggers = "spacy.ConsoleLogger.v1"
84
+ progress_bar = false
85
+
86
+ [training.optimizer]
87
+ @optimizers = "Adam.v1"
88
+ beta1 = 0.9
89
+ beta2 = 0.999
90
+ L2_is_weight_decay = true
91
+ L2 = 0.01
92
+ grad_clip = 1.0
93
+ use_averages = false
94
+ eps = 0.00000001
95
+ learn_rate = 0.001
96
+
97
+ [training.score_weights]
98
+ cats_score = 1.0
99
+ cats_score_desc = null
100
+ cats_micro_p = null
101
+ cats_micro_r = null
102
+ cats_micro_f = null
103
+ cats_macro_p = null
104
+ cats_macro_r = null
105
+ cats_macro_f = null
106
+ cats_macro_auc = null
107
+ cats_f_per_type = null
108
+
109
+ [pretraining]
110
+
111
+ [initialize]
112
+ vectors = ${paths.vectors}
113
+ init_tok2vec = ${paths.init_tok2vec}
114
+ vocab_data = null
115
+ lookups = null
116
+ before_init = null
117
+ after_init = null
118
+
119
+ [initialize.components]
120
+
121
+ [initialize.tokenizer]
fr_job_classif_fr-any-py3-none-any.whl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c04e7d654d38918ff1c12ff64fcd0a89cb3c22376f4f2ccf2cb9c9d86aeae346
3
+ size 1441552
meta.json ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "lang":"fr",
3
+ "name":"job_classif_fr",
4
+ "version":"0.0.0",
5
+ "description":"",
6
+ "author":"",
7
+ "email":"",
8
+ "url":"",
9
+ "license":"",
10
+ "spacy_version":">=3.5.1,<3.6.0",
11
+ "spacy_git_version":"8153bd573",
12
+ "vectors":{
13
+ "width":0,
14
+ "vectors":0,
15
+ "keys":0,
16
+ "name":null
17
+ },
18
+ "labels":{
19
+ "textcat":[
20
+ "syst\u00e8mes et r\u00e9seaux informatiques",
21
+ "data",
22
+ "marketing",
23
+ "m\u00e9canique",
24
+ "t\u00e9l\u00e9com",
25
+ "juridique",
26
+ "assistance, secr\u00e9tariat et accueil",
27
+ "commerce",
28
+ "chimie",
29
+ "restauration, tourisme et hotellerie",
30
+ "comptabilit\u00e9",
31
+ "d\u00e9veloppement informatique",
32
+ "ressources humaines",
33
+ "agriculture",
34
+ "\u00e9lectronique",
35
+ "logistique",
36
+ "production"
37
+ ]
38
+ },
39
+ "pipeline":[
40
+ "textcat"
41
+ ],
42
+ "components":[
43
+ "textcat"
44
+ ],
45
+ "disabled":[
46
+
47
+ ],
48
+ "performance":{
49
+ "cats_score":0.8473187426,
50
+ "cats_score_desc":"macro F",
51
+ "cats_micro_p":0.8456926475,
52
+ "cats_micro_r":0.8456926475,
53
+ "cats_micro_f":0.8456926475,
54
+ "cats_macro_p":0.8489559714,
55
+ "cats_macro_r":0.8475314703,
56
+ "cats_macro_f":0.8473187426,
57
+ "cats_macro_auc":0.979672823,
58
+ "cats_f_per_type":{
59
+ "syst\u00e8mes et r\u00e9seaux informatiques":{
60
+ "p":0.7873563218,
61
+ "r":0.7326203209,
62
+ "f":0.7590027701
63
+ },
64
+ "data":{
65
+ "p":0.873015873,
66
+ "r":0.8870967742,
67
+ "f":0.88
68
+ },
69
+ "marketing":{
70
+ "p":0.883248731,
71
+ "r":0.8446601942,
72
+ "f":0.8635235732
73
+ },
74
+ "m\u00e9canique":{
75
+ "p":0.7541899441,
76
+ "r":0.8083832335,
77
+ "f":0.7803468208
78
+ },
79
+ "t\u00e9l\u00e9com":{
80
+ "p":0.8,
81
+ "r":0.7572815534,
82
+ "f":0.7780548628
83
+ },
84
+ "juridique":{
85
+ "p":0.9325153374,
86
+ "r":0.9156626506,
87
+ "f":0.9240121581
88
+ },
89
+ "assistance, secr\u00e9tariat et accueil":{
90
+ "p":0.7729468599,
91
+ "r":0.8205128205,
92
+ "f":0.7960199005
93
+ },
94
+ "commerce":{
95
+ "p":0.8347457627,
96
+ "r":0.929245283,
97
+ "f":0.8794642857
98
+ },
99
+ "chimie":{
100
+ "p":0.9438202247,
101
+ "r":0.9081081081,
102
+ "f":0.9256198347
103
+ },
104
+ "restauration, tourisme et hotellerie":{
105
+ "p":0.9664804469,
106
+ "r":0.920212766,
107
+ "f":0.9427792916
108
+ },
109
+ "comptabilit\u00e9":{
110
+ "p":0.8706467662,
111
+ "r":0.8974358974,
112
+ "f":0.8838383838
113
+ },
114
+ "d\u00e9veloppement informatique":{
115
+ "p":0.8558139535,
116
+ "r":0.9108910891,
117
+ "f":0.8824940048
118
+ },
119
+ "ressources humaines":{
120
+ "p":0.917721519,
121
+ "r":0.917721519,
122
+ "f":0.917721519
123
+ },
124
+ "agriculture":{
125
+ "p":0.8571428571,
126
+ "r":0.9180327869,
127
+ "f":0.8865435356
128
+ },
129
+ "\u00e9lectronique":{
130
+ "p":0.7451923077,
131
+ "r":0.7828282828,
132
+ "f":0.763546798
133
+ },
134
+ "logistique":{
135
+ "p":0.9090909091,
136
+ "r":0.8396946565,
137
+ "f":0.873015873
138
+ },
139
+ "production":{
140
+ "p":0.7283236994,
141
+ "r":0.6176470588,
142
+ "f":0.6684350133
143
+ }
144
+ },
145
+ "textcat_loss":0.9324012361
146
+ },
147
+ "requirements":[
148
+
149
+ ]
150
+ }
textcat/cfg ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "labels":[
3
+ "syst\u00e8mes et r\u00e9seaux informatiques",
4
+ "data",
5
+ "marketing",
6
+ "m\u00e9canique",
7
+ "t\u00e9l\u00e9com",
8
+ "juridique",
9
+ "assistance, secr\u00e9tariat et accueil",
10
+ "commerce",
11
+ "chimie",
12
+ "restauration, tourisme et hotellerie",
13
+ "comptabilit\u00e9",
14
+ "d\u00e9veloppement informatique",
15
+ "ressources humaines",
16
+ "agriculture",
17
+ "\u00e9lectronique",
18
+ "logistique",
19
+ "production"
20
+ ],
21
+ "threshold":0.0,
22
+ "positive_label":null
23
+ }
textcat/model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f93ee50cc77c1b9b3d5926a6566f24425a331c199f914e6c41d97c797f149af
3
+ size 17826595
tokenizer ADDED
The diff for this file is too large to render. See raw diff
 
vocab/key2row ADDED
@@ -0,0 +1 @@
 
 
1
+
vocab/lookups.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76be8b528d0075f7aae98d6fa57a6d3c83ae480a8469e668d7b0af968995ac71
3
+ size 1
vocab/strings.json ADDED
The diff for this file is too large to render. See raw diff
 
vocab/vectors ADDED
Binary file (128 Bytes). View file
 
vocab/vectors.cfg ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "mode":"default"
3
+ }