gunghio commited on
Commit
77213bb
1 Parent(s): a94ee9b

updated model from notebook

Browse files
Files changed (6) hide show
  1. README.md +8 -8
  2. config.json +12 -11
  3. pytorch_model.bin +2 -2
  4. tokenizer.json +0 -0
  5. tokenizer_config.json +1 -1
  6. vocab.txt +0 -0
README.md CHANGED
@@ -16,11 +16,11 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  This model was trained from scratch on an unkown dataset.
18
  It achieves the following results on the evaluation set:
19
- - Loss: 0.0484
20
- - Precision: 0.9340
21
- - Recall: 0.9413
22
- - F1: 0.9376
23
- - Accuracy: 0.9875
24
 
25
  ## Model description
26
 
@@ -51,9 +51,9 @@ The following hyperparameters were used during training:
51
 
52
  | Training Loss | Epoch | Step | Validation Loss | Precision | Recall | F1 | Accuracy |
53
  |:-------------:|:-----:|:----:|:---------------:|:---------:|:------:|:------:|:--------:|
54
- | 0.1931 | 1.0 | 878 | 0.0518 | 0.9146 | 0.9276 | 0.9210 | 0.9852 |
55
- | 0.0389 | 2.0 | 1756 | 0.0470 | 0.9261 | 0.9389 | 0.9325 | 0.9870 |
56
- | 0.0228 | 3.0 | 2634 | 0.0484 | 0.9340 | 0.9413 | 0.9376 | 0.9875 |
57
 
58
 
59
  ### Framework versions
 
16
 
17
  This model was trained from scratch on an unkown dataset.
18
  It achieves the following results on the evaluation set:
19
+ - Loss: 0.0388
20
+ - Precision: 0.9360
21
+ - Recall: 0.9458
22
+ - F1: 0.9409
23
+ - Accuracy: 0.9902
24
 
25
  ## Model description
26
 
 
51
 
52
  | Training Loss | Epoch | Step | Validation Loss | Precision | Recall | F1 | Accuracy |
53
  |:-------------:|:-----:|:----:|:---------------:|:---------:|:------:|:------:|:--------:|
54
+ | 0.1653 | 1.0 | 878 | 0.0465 | 0.9267 | 0.9300 | 0.9283 | 0.9883 |
55
+ | 0.0322 | 2.0 | 1756 | 0.0404 | 0.9360 | 0.9431 | 0.9396 | 0.9897 |
56
+ | 0.0185 | 3.0 | 2634 | 0.0388 | 0.9360 | 0.9458 | 0.9409 | 0.9902 |
57
 
58
 
59
  ### Framework versions
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "distilbert-base-uncased",
3
  "activation": "gelu",
4
  "architectures": [
5
  "DistilBertForTokenClassification"
@@ -9,15 +9,15 @@
9
  "dropout": 0.1,
10
  "hidden_dim": 3072,
11
  "id2label": {
12
- "0": "O",
13
- "1": "B-PER",
14
- "2": "I-PER",
15
- "3": "B-ORG",
16
- "4": "I-ORG",
17
- "5": "B-LOC",
18
- "6": "I-LOC",
19
- "7": "B-MISC",
20
- "8": "I-MISC"
21
  },
22
  "initializer_range": 0.02,
23
  "label2id": {
@@ -35,11 +35,12 @@
35
  "model_type": "distilbert",
36
  "n_heads": 12,
37
  "n_layers": 6,
 
38
  "pad_token_id": 0,
39
  "qa_dropout": 0.1,
40
  "seq_classif_dropout": 0.2,
41
  "sinusoidal_pos_embds": false,
42
  "tie_weights_": true,
43
  "transformers_version": "4.6.1",
44
- "vocab_size": 30522
45
  }
 
1
  {
2
+ "_name_or_path": "distilbert-base-multilingual-cased",
3
  "activation": "gelu",
4
  "architectures": [
5
  "DistilBertForTokenClassification"
 
9
  "dropout": 0.1,
10
  "hidden_dim": 3072,
11
  "id2label": {
12
+ "0": "LABEL_0",
13
+ "1": "LABEL_1",
14
+ "2": "LABEL_2",
15
+ "3": "LABEL_3",
16
+ "4": "LABEL_4",
17
+ "5": "LABEL_5",
18
+ "6": "LABEL_6",
19
+ "7": "LABEL_7",
20
+ "8": "LABEL_8"
21
  },
22
  "initializer_range": 0.02,
23
  "label2id": {
 
35
  "model_type": "distilbert",
36
  "n_heads": 12,
37
  "n_layers": 6,
38
+ "output_past": true,
39
  "pad_token_id": 0,
40
  "qa_dropout": 0.1,
41
  "seq_classif_dropout": 0.2,
42
  "sinusoidal_pos_embds": false,
43
  "tie_weights_": true,
44
  "transformers_version": "4.6.1",
45
+ "vocab_size": 119547
46
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:76b8f43b516f5f0340d896894e4aa0f3f903752bac218e5c8d1b8477a853a7c3
3
- size 265521605
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:36de80edacc15d43b97c4284f99a5c39d304e498676d7cfcbb52cf0a7f48a045
3
+ size 539006405
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -1 +1 @@
1
- {"do_lower_case": true, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "model_max_length": 512, "special_tokens_map_file": null, "name_or_path": "distilbert-base-uncased"}
 
1
+ {"do_lower_case": false, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "model_max_length": 512, "special_tokens_map_file": null, "name_or_path": "distilbert-base-multilingual-cased"}
vocab.txt CHANGED
The diff for this file is too large to render. See raw diff