Habaznya commited on
Commit
5a44430
·
verified ·
1 Parent(s): f157846

End of training

Browse files
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
  license: apache-2.0
3
- base_model: google-bert/bert-base-multilingual-cased
4
  tags:
5
  - generated_from_trainer
6
  metrics:
@@ -15,10 +15,10 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  # p_model_2
17
 
18
- This model is a fine-tuned version of [google-bert/bert-base-multilingual-cased](https://huggingface.co/google-bert/bert-base-multilingual-cased) on the None dataset.
19
  It achieves the following results on the evaluation set:
20
- - Loss: 0.5330
21
- - Accuracy: 0.8769
22
 
23
  ## Model description
24
 
@@ -43,19 +43,27 @@ The following hyperparameters were used during training:
43
  - seed: 42
44
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
45
  - lr_scheduler_type: linear
46
- - num_epochs: 7
47
 
48
  ### Training results
49
 
50
- | Training Loss | Epoch | Step | Validation Loss | Accuracy |
51
- |:-------------:|:-----:|:----:|:---------------:|:--------:|
52
- | 0.8391 | 1.0 | 832 | 0.5759 | 0.7909 |
53
- | 0.4761 | 2.0 | 1664 | 0.4916 | 0.8333 |
54
- | 0.416 | 3.0 | 2496 | 0.4148 | 0.8551 |
55
- | 0.3095 | 4.0 | 3328 | 0.5365 | 0.8311 |
56
- | 0.2578 | 5.0 | 4160 | 0.4860 | 0.8621 |
57
- | 0.2211 | 6.0 | 4992 | 0.4994 | 0.8753 |
58
- | 0.1621 | 7.0 | 5824 | 0.5330 | 0.8769 |
 
 
 
 
 
 
 
 
59
 
60
 
61
  ### Framework versions
 
1
  ---
2
  license: apache-2.0
3
+ base_model: DmitryPogrebnoy/distilbert-base-russian-cased
4
  tags:
5
  - generated_from_trainer
6
  metrics:
 
15
 
16
  # p_model_2
17
 
18
+ This model is a fine-tuned version of [DmitryPogrebnoy/distilbert-base-russian-cased](https://huggingface.co/DmitryPogrebnoy/distilbert-base-russian-cased) on the None dataset.
19
  It achieves the following results on the evaluation set:
20
+ - Loss: 0.9677
21
+ - Accuracy: 0.7463
22
 
23
  ## Model description
24
 
 
43
  - seed: 42
44
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
45
  - lr_scheduler_type: linear
46
+ - num_epochs: 15
47
 
48
  ### Training results
49
 
50
+ | Training Loss | Epoch | Step | Validation Loss | Accuracy |
51
+ |:-------------:|:-----:|:-----:|:---------------:|:--------:|
52
+ | 0.9388 | 1.0 | 832 | 0.7499 | 0.7188 |
53
+ | 0.7211 | 2.0 | 1664 | 0.7321 | 0.7256 |
54
+ | 0.6823 | 3.0 | 2496 | 0.7019 | 0.7431 |
55
+ | 0.6092 | 4.0 | 3328 | 0.7059 | 0.7481 |
56
+ | 0.5631 | 5.0 | 4160 | 0.7234 | 0.7447 |
57
+ | 0.5552 | 6.0 | 4992 | 0.7394 | 0.7474 |
58
+ | 0.5058 | 7.0 | 5824 | 0.7752 | 0.7483 |
59
+ | 0.4731 | 8.0 | 6656 | 0.7877 | 0.7431 |
60
+ | 0.4635 | 9.0 | 7488 | 0.8051 | 0.7515 |
61
+ | 0.434 | 10.0 | 8320 | 0.8866 | 0.7431 |
62
+ | 0.4246 | 11.0 | 9152 | 0.8953 | 0.7472 |
63
+ | 0.4024 | 12.0 | 9984 | 0.9281 | 0.7478 |
64
+ | 0.3917 | 13.0 | 10816 | 0.9527 | 0.7465 |
65
+ | 0.3787 | 14.0 | 11648 | 0.9664 | 0.7456 |
66
+ | 0.3672 | 15.0 | 12480 | 0.9677 | 0.7463 |
67
 
68
 
69
  ### Framework versions
config.json CHANGED
@@ -1,14 +1,13 @@
1
  {
2
- "_name_or_path": "google-bert/bert-base-multilingual-cased",
 
3
  "architectures": [
4
- "BertForSequenceClassification"
5
  ],
6
- "attention_probs_dropout_prob": 0.1,
7
- "classifier_dropout": null,
8
- "directionality": "bidi",
9
- "hidden_act": "gelu",
10
- "hidden_dropout_prob": 0.1,
11
- "hidden_size": 768,
12
  "id2label": {
13
  "0": "POETRY",
14
  "1": "PROSE",
@@ -17,7 +16,6 @@
17
  "4": "BOOKS"
18
  },
19
  "initializer_range": 0.02,
20
- "intermediate_size": 3072,
21
  "label2id": {
22
  "BOOKS": 4,
23
  "EVENTS": 2,
@@ -25,22 +23,18 @@
25
  "POETRY": 0,
26
  "PROSE": 1
27
  },
28
- "layer_norm_eps": 1e-12,
29
  "max_position_embeddings": 512,
30
- "model_type": "bert",
31
- "num_attention_heads": 12,
32
- "num_hidden_layers": 12,
 
33
  "pad_token_id": 0,
34
- "pooler_fc_size": 768,
35
- "pooler_num_attention_heads": 12,
36
- "pooler_num_fc_layers": 3,
37
- "pooler_size_per_head": 128,
38
- "pooler_type": "first_token_transform",
39
- "position_embedding_type": "absolute",
40
  "problem_type": "single_label_classification",
 
 
 
 
41
  "torch_dtype": "float32",
42
  "transformers_version": "4.41.2",
43
- "type_vocab_size": 2,
44
- "use_cache": true,
45
- "vocab_size": 119547
46
  }
 
1
  {
2
+ "_name_or_path": "DmitryPogrebnoy/distilbert-base-russian-cased",
3
+ "activation": "gelu",
4
  "architectures": [
5
+ "DistilBertForSequenceClassification"
6
  ],
7
+ "attention_dropout": 0.1,
8
+ "dim": 768,
9
+ "dropout": 0.1,
10
+ "hidden_dim": 3072,
 
 
11
  "id2label": {
12
  "0": "POETRY",
13
  "1": "PROSE",
 
16
  "4": "BOOKS"
17
  },
18
  "initializer_range": 0.02,
 
19
  "label2id": {
20
  "BOOKS": 4,
21
  "EVENTS": 2,
 
23
  "POETRY": 0,
24
  "PROSE": 1
25
  },
 
26
  "max_position_embeddings": 512,
27
+ "model_type": "distilbert",
28
+ "n_heads": 12,
29
+ "n_layers": 6,
30
+ "output_past": true,
31
  "pad_token_id": 0,
 
 
 
 
 
 
32
  "problem_type": "single_label_classification",
33
+ "qa_dropout": 0.1,
34
+ "seq_classif_dropout": 0.2,
35
+ "sinusoidal_pos_embds": false,
36
+ "tie_weights_": true,
37
  "torch_dtype": "float32",
38
  "transformers_version": "4.41.2",
39
+ "vocab_size": 13982
 
 
40
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:06da4d15d40e7f8bbce738b30ce5095d4c9ac12d4c83d560953ef369788dc3b0
3
- size 711452684
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c094e9c9513a7b15a1492696327536658e93789efacac2b88ca4eb5db8728e9
3
+ size 217030868
runs/Jun18_10-52-27_6dd0c134a67e/events.out.tfevents.1718707948.6dd0c134a67e.448.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d264c77a08d126c6affe5d52f495c5e3703dd33b6926c9aaaed0e222b63d86b5
3
+ size 4956
runs/Jun18_10-53-41_6dd0c134a67e/events.out.tfevents.1718708022.6dd0c134a67e.448.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28c2ac6103249d1d0ff54e1f0654bc6515dba7a65aeef1e4a262b026ff147dd1
3
+ size 4957
runs/Jun18_10-55-00_6dd0c134a67e/events.out.tfevents.1718708100.6dd0c134a67e.448.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f37b2d7327863ba3b6859c44ebcc134b715f0fb0916ba0a8f15bcdfd0c56145
3
+ size 15220
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -43,13 +43,15 @@
43
  },
44
  "clean_up_tokenization_spaces": true,
45
  "cls_token": "[CLS]",
 
46
  "do_lower_case": false,
47
  "mask_token": "[MASK]",
48
  "model_max_length": 512,
 
49
  "pad_token": "[PAD]",
50
  "sep_token": "[SEP]",
51
  "strip_accents": null,
52
  "tokenize_chinese_chars": true,
53
- "tokenizer_class": "BertTokenizer",
54
  "unk_token": "[UNK]"
55
  }
 
43
  },
44
  "clean_up_tokenization_spaces": true,
45
  "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
  "do_lower_case": false,
48
  "mask_token": "[MASK]",
49
  "model_max_length": 512,
50
+ "never_split": null,
51
  "pad_token": "[PAD]",
52
  "sep_token": "[SEP]",
53
  "strip_accents": null,
54
  "tokenize_chinese_chars": true,
55
+ "tokenizer_class": "DistilBertTokenizer",
56
  "unk_token": "[UNK]"
57
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4fbfbbb05010cbf1cc72fe0f6693c5510a77da26e0ef0768b6a69eeeaf4396d5
3
  size 5112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3db2b0d264611103c12783d7f136a90baae132a2c4df19fbfd38852ebde554df
3
  size 5112
vocab.txt CHANGED
The diff for this file is too large to render. See raw diff