jbl2024 commited on
Commit
88da5d4
·
verified ·
1 Parent(s): e915e36

Ajout du modèle et du tokenizer

Browse files
Files changed (4) hide show
  1. config.json +33 -28
  2. model.safetensors +2 -2
  3. tokenizer.json +16 -2
  4. tokenizer_config.json +1 -1
config.json CHANGED
@@ -1,52 +1,57 @@
1
  {
2
- "_name_or_path": "almanach/camembertv2-base",
3
  "architectures": [
4
- "RobertaForTokenClassification"
5
  ],
6
  "attention_probs_dropout_prob": 0.1,
7
  "bos_token_id": 1,
8
- "classifier_dropout": null,
 
9
  "embedding_size": 768,
10
  "eos_token_id": 2,
11
  "hidden_act": "gelu",
12
  "hidden_dropout_prob": 0.1,
13
  "hidden_size": 768,
14
  "id2label": {
15
- "0": "LABEL_0",
16
- "1": "LABEL_1",
17
- "2": "LABEL_2",
18
- "3": "LABEL_3",
19
- "4": "LABEL_4",
20
- "5": "LABEL_5",
21
- "6": "LABEL_6",
22
- "7": "LABEL_7",
23
- "8": "LABEL_8"
24
  },
25
  "initializer_range": 0.02,
26
  "intermediate_size": 3072,
27
  "label2id": {
28
- "LABEL_0": 0,
29
- "LABEL_1": 1,
30
- "LABEL_2": 2,
31
- "LABEL_3": 3,
32
- "LABEL_4": 4,
33
- "LABEL_5": 5,
34
- "LABEL_6": 6,
35
- "LABEL_7": 7,
36
- "LABEL_8": 8
37
  },
38
  "layer_norm_eps": 1e-07,
39
- "max_position_embeddings": 1025,
40
- "model_name": "camembertv2-base",
41
- "model_type": "roberta",
 
 
42
  "num_attention_heads": 12,
43
  "num_hidden_layers": 12,
44
  "pad_token_id": 0,
45
- "position_biased_input": true,
46
- "position_embedding_type": "absolute",
 
 
 
 
 
 
 
 
 
47
  "torch_dtype": "float32",
48
  "transformers_version": "4.46.2",
49
- "type_vocab_size": 1,
50
- "use_cache": true,
51
  "vocab_size": 32768
52
  }
 
1
  {
2
+ "_name_or_path": "almanach/camembertav2-base",
3
  "architectures": [
4
+ "DebertaV2ForTokenClassification"
5
  ],
6
  "attention_probs_dropout_prob": 0.1,
7
  "bos_token_id": 1,
8
+ "conv_act": "gelu",
9
+ "conv_kernel_size": 0,
10
  "embedding_size": 768,
11
  "eos_token_id": 2,
12
  "hidden_act": "gelu",
13
  "hidden_dropout_prob": 0.1,
14
  "hidden_size": 768,
15
  "id2label": {
16
+ "0": "O",
17
+ "1": "LOC",
18
+ "2": "PER",
19
+ "3": "MISC",
20
+ "4": "ORG",
21
+ "5": "DATE"
 
 
 
22
  },
23
  "initializer_range": 0.02,
24
  "intermediate_size": 3072,
25
  "label2id": {
26
+ "DATE": 5,
27
+ "LOC": 1,
28
+ "MISC": 3,
29
+ "O": 0,
30
+ "ORG": 4,
31
+ "PER": 2
 
 
 
32
  },
33
  "layer_norm_eps": 1e-07,
34
+ "max_position_embeddings": 1024,
35
+ "max_relative_positions": -1,
36
+ "model_name": "camembertav2-base",
37
+ "model_type": "deberta-v2",
38
+ "norm_rel_ebd": "layer_norm",
39
  "num_attention_heads": 12,
40
  "num_hidden_layers": 12,
41
  "pad_token_id": 0,
42
+ "pooler_dropout": 0,
43
+ "pooler_hidden_act": "gelu",
44
+ "pooler_hidden_size": 768,
45
+ "pos_att_type": [
46
+ "p2c",
47
+ "c2p"
48
+ ],
49
+ "position_biased_input": false,
50
+ "position_buckets": 256,
51
+ "relative_attention": true,
52
+ "share_att_key": true,
53
  "torch_dtype": "float32",
54
  "transformers_version": "4.46.2",
55
+ "type_vocab_size": 0,
 
56
  "vocab_size": 32768
57
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0efe413590205ded30196138b6e4d09575443b2c0eeec57406bbcc82606eff86
3
- size 444090780
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c24c62ec70fe60773bc19a0bef33d9f21e6ca2e51673a06f8af3fcfbe4bc7418
3
+ size 442509128
tokenizer.json CHANGED
@@ -1,7 +1,21 @@
1
  {
2
  "version": "1.0",
3
- "truncation": null,
4
- "padding": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  "added_tokens": [
6
  {
7
  "id": 0,
 
1
  {
2
  "version": "1.0",
3
+ "truncation": {
4
+ "direction": "Right",
5
+ "max_length": 128,
6
+ "strategy": "LongestFirst",
7
+ "stride": 0
8
+ },
9
+ "padding": {
10
+ "strategy": {
11
+ "Fixed": 128
12
+ },
13
+ "direction": "Right",
14
+ "pad_to_multiple_of": null,
15
+ "pad_id": 0,
16
+ "pad_type_id": 0,
17
+ "pad_token": "[PAD]"
18
+ },
19
  "added_tokens": [
20
  {
21
  "id": 0,
tokenizer_config.json CHANGED
@@ -48,7 +48,7 @@
48
  "eos_token": "[SEP]",
49
  "errors": "replace",
50
  "mask_token": "[MASK]",
51
- "model_max_length": 1024,
52
  "pad_token": "[PAD]",
53
  "sep_token": "[SEP]",
54
  "tokenizer_class": "RobertaTokenizer",
 
48
  "eos_token": "[SEP]",
49
  "errors": "replace",
50
  "mask_token": "[MASK]",
51
+ "model_max_length": 1000000000000000019884624838656,
52
  "pad_token": "[PAD]",
53
  "sep_token": "[SEP]",
54
  "tokenizer_class": "RobertaTokenizer",