remydecoupes commited on
Commit
1445488
1 Parent(s): 3c7d033

fix tokenize issue

Browse files
.gitattributes DELETED
@@ -1,35 +0,0 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
README.md DELETED
@@ -1,26 +0,0 @@
1
- ---
2
- license: cc-by-nc-4.0
3
- ---
4
- # [TETIS](https://www.umr-tetis.fr) @ [Challenge TextMine 2024](https://textmine.sciencesconf.org/resource/page/id/9)
5
-
6
- ---
7
- ## This model is a NER based on Camembert-Large for the Kaggle Competition (in French): https://www.kaggle.com/competitions/defi-textmine-2024/
8
-
9
- This model could be re-use with HuggingFace transormers pipeline. To use it, please refer to its [Github](https://github.com/tetis-nlp/tetis-challenge_textmine_2024)
10
- ---
11
-
12
-
13
- <img align="left" src="https://www.umr-tetis.fr/images/logo-header-tetis.png">
14
-
15
- | Participants |
16
- |----------------------|
17
- | Rémy Decoupes |
18
- | Roberto Interdonato |
19
- | Rodrique Kafando |
20
- | Mehtab Syed Alam |
21
- | Maguelonne Teisseire |
22
- | Mathieu Roche |
23
- | Sarah Valentin |
24
-
25
- ---
26
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
config.json DELETED
@@ -1,44 +0,0 @@
1
- {
2
- "_name_or_path": "camembert/camembert-large",
3
- "architectures": [
4
- "CamembertForTokenClassification"
5
- ],
6
- "attention_probs_dropout_prob": 0.1,
7
- "bos_token_id": 0,
8
- "classifier_dropout": null,
9
- "eos_token_id": 2,
10
- "hidden_act": "gelu",
11
- "hidden_dropout_prob": 0.1,
12
- "hidden_size": 1024,
13
- "id2label": {
14
- "0": "aucun",
15
- "1": "geogFeat",
16
- "2": "geogFeat geogName",
17
- "3": "geogName",
18
- "4": "name",
19
- "5": "name geogName"
20
- },
21
- "initializer_range": 0.02,
22
- "intermediate_size": 4096,
23
- "label2id": {
24
- "aucun": 0,
25
- "geogFeat": 1,
26
- "geogFeat geogName": 2,
27
- "geogName": 3,
28
- "name": 4,
29
- "name geogName": 5
30
- },
31
- "layer_norm_eps": 1e-05,
32
- "max_position_embeddings": 514,
33
- "model_type": "camembert",
34
- "num_attention_heads": 16,
35
- "num_hidden_layers": 24,
36
- "output_past": true,
37
- "pad_token_id": 1,
38
- "position_embedding_type": "absolute",
39
- "torch_dtype": "float32",
40
- "transformers_version": "4.20.1",
41
- "type_vocab_size": 1,
42
- "use_cache": true,
43
- "vocab_size": 32005
44
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
pytorch_model.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:f277133ad9f1bdea34f94320dd9295bfd8885c5424aa2e05dc039c50ad8a8bbf
3
- size 60555264
 
 
 
 
sentencepiece.bpe.model DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:f98f266fdc548c94216aaadc13ffaaafacf0c8793303e2195322d954549ea261
3
- size 808767
 
 
 
 
special_tokens_map.json DELETED
@@ -1,19 +0,0 @@
1
- {
2
- "additional_special_tokens": [
3
- "<s>NOTUSED",
4
- "</s>NOTUSED"
5
- ],
6
- "bos_token": "<s>",
7
- "cls_token": "<s>",
8
- "eos_token": "</s>",
9
- "mask_token": {
10
- "content": "<mask>",
11
- "lstrip": true,
12
- "normalized": true,
13
- "rstrip": false,
14
- "single_word": false
15
- },
16
- "pad_token": "<pad>",
17
- "sep_token": "</s>",
18
- "unk_token": "<unk>"
19
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tokenizer.json DELETED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json DELETED
@@ -1,24 +0,0 @@
1
- {
2
- "additional_special_tokens": [
3
- "<s>NOTUSED",
4
- "</s>NOTUSED"
5
- ],
6
- "bos_token": "<s>",
7
- "cls_token": "<s>",
8
- "eos_token": "</s>",
9
- "mask_token": {
10
- "__type": "AddedToken",
11
- "content": "<mask>",
12
- "lstrip": true,
13
- "normalized": true,
14
- "rstrip": false,
15
- "single_word": false
16
- },
17
- "name_or_path": "camembert/camembert-large",
18
- "pad_token": "<pad>",
19
- "sep_token": "</s>",
20
- "sp_model_kwargs": {},
21
- "special_tokens_map_file": null,
22
- "tokenizer_class": "CamembertTokenizer",
23
- "unk_token": "<unk>"
24
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:03cc59526da8166036f5dd656c548647fedc1d2cf71889d9698b66d9c8e62f11
3
- size 3375