End of training

Browse files

Files changed (10) hide show

README.md +22 -14
config.json +16 -22
model.safetensors +2 -2
runs/Jun18_10-52-27_6dd0c134a67e/events.out.tfevents.1718707948.6dd0c134a67e.448.0 +3 -0
runs/Jun18_10-53-41_6dd0c134a67e/events.out.tfevents.1718708022.6dd0c134a67e.448.1 +3 -0
runs/Jun18_10-55-00_6dd0c134a67e/events.out.tfevents.1718708100.6dd0c134a67e.448.2 +3 -0
tokenizer.json +0 -0
tokenizer_config.json +3 -1
training_args.bin +1 -1
vocab.txt +0 -0

README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
 license: apache-2.0
-base_model: google-bert/bert-base-multilingual-cased
 tags:
 - generated_from_trainer
 metrics:
@@ -15,10 +15,10 @@ should probably proofread and complete it, then remove this comment. -->
 # p_model_2
-This model is a fine-tuned version of [google-bert/bert-base-multilingual-cased](https://huggingface.co/google-bert/bert-base-multilingual-cased) on the None dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.5330
-- Accuracy: 0.8769
 ## Model description
@@ -43,19 +43,27 @@ The following hyperparameters were used during training:
 - seed: 42
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
-- num_epochs: 7
 ### Training results
-| Training Loss | Epoch | Step | Validation Loss | Accuracy |
-|:-------------:|:-----:|:----:|:---------------:|:--------:|
-| 0.8391        | 1.0   | 832  | 0.5759          | 0.7909   |
-| 0.4761        | 2.0   | 1664 | 0.4916          | 0.8333   |
-| 0.416         | 3.0   | 2496 | 0.4148          | 0.8551   |
-| 0.3095        | 4.0   | 3328 | 0.5365          | 0.8311   |
-| 0.2578        | 5.0   | 4160 | 0.4860          | 0.8621   |
-| 0.2211        | 6.0   | 4992 | 0.4994          | 0.8753   |
-| 0.1621        | 7.0   | 5824 | 0.5330          | 0.8769   |
 ### Framework versions

 ---
 license: apache-2.0
+base_model: DmitryPogrebnoy/distilbert-base-russian-cased
 tags:
 - generated_from_trainer
 metrics:
 # p_model_2
+This model is a fine-tuned version of [DmitryPogrebnoy/distilbert-base-russian-cased](https://huggingface.co/DmitryPogrebnoy/distilbert-base-russian-cased) on the None dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.9677
+- Accuracy: 0.7463
 ## Model description
 - seed: 42
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
+- num_epochs: 15
 ### Training results
+| Training Loss | Epoch | Step  | Validation Loss | Accuracy |
+|:-------------:|:-----:|:-----:|:---------------:|:--------:|
+| 0.9388        | 1.0   | 832   | 0.7499          | 0.7188   |
+| 0.7211        | 2.0   | 1664  | 0.7321          | 0.7256   |
+| 0.6823        | 3.0   | 2496  | 0.7019          | 0.7431   |
+| 0.6092        | 4.0   | 3328  | 0.7059          | 0.7481   |
+| 0.5631        | 5.0   | 4160  | 0.7234          | 0.7447   |
+| 0.5552        | 6.0   | 4992  | 0.7394          | 0.7474   |
+| 0.5058        | 7.0   | 5824  | 0.7752          | 0.7483   |
+| 0.4731        | 8.0   | 6656  | 0.7877          | 0.7431   |
+| 0.4635        | 9.0   | 7488  | 0.8051          | 0.7515   |
+| 0.434         | 10.0  | 8320  | 0.8866          | 0.7431   |
+| 0.4246        | 11.0  | 9152  | 0.8953          | 0.7472   |
+| 0.4024        | 12.0  | 9984  | 0.9281          | 0.7478   |
+| 0.3917        | 13.0  | 10816 | 0.9527          | 0.7465   |
+| 0.3787        | 14.0  | 11648 | 0.9664          | 0.7456   |
+| 0.3672        | 15.0  | 12480 | 0.9677          | 0.7463   |
 ### Framework versions

config.json CHANGED Viewed

@@ -1,14 +1,13 @@
 {
-  "_name_or_path": "google-bert/bert-base-multilingual-cased",
   "architectures": [
-    "BertForSequenceClassification"
   ],
-  "attention_probs_dropout_prob": 0.1,
-  "classifier_dropout": null,
-  "directionality": "bidi",
-  "hidden_act": "gelu",
-  "hidden_dropout_prob": 0.1,
-  "hidden_size": 768,
   "id2label": {
     "0": "POETRY",
     "1": "PROSE",
@@ -17,7 +16,6 @@
     "4": "BOOKS"
   },
   "initializer_range": 0.02,
-  "intermediate_size": 3072,
   "label2id": {
     "BOOKS": 4,
     "EVENTS": 2,
@@ -25,22 +23,18 @@
     "POETRY": 0,
     "PROSE": 1
   },
-  "layer_norm_eps": 1e-12,
   "max_position_embeddings": 512,
-  "model_type": "bert",
-  "num_attention_heads": 12,
-  "num_hidden_layers": 12,
   "pad_token_id": 0,
-  "pooler_fc_size": 768,
-  "pooler_num_attention_heads": 12,
-  "pooler_num_fc_layers": 3,
-  "pooler_size_per_head": 128,
-  "pooler_type": "first_token_transform",
-  "position_embedding_type": "absolute",
   "problem_type": "single_label_classification",
   "torch_dtype": "float32",
   "transformers_version": "4.41.2",
-  "type_vocab_size": 2,
-  "use_cache": true,
-  "vocab_size": 119547
 }

 {
+  "_name_or_path": "DmitryPogrebnoy/distilbert-base-russian-cased",
+  "activation": "gelu",
   "architectures": [
+    "DistilBertForSequenceClassification"
   ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
   "id2label": {
     "0": "POETRY",
     "1": "PROSE",
     "4": "BOOKS"
   },
   "initializer_range": 0.02,
   "label2id": {
     "BOOKS": 4,
     "EVENTS": 2,
     "POETRY": 0,
     "PROSE": 1
   },
   "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "output_past": true,
   "pad_token_id": 0,
   "problem_type": "single_label_classification",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
   "torch_dtype": "float32",
   "transformers_version": "4.41.2",
+  "vocab_size": 13982
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:06da4d15d40e7f8bbce738b30ce5095d4c9ac12d4c83d560953ef369788dc3b0
-size 711452684

 version https://git-lfs.github.com/spec/v1
+oid sha256:5c094e9c9513a7b15a1492696327536658e93789efacac2b88ca4eb5db8728e9
+size 217030868

runs/Jun18_10-52-27_6dd0c134a67e/events.out.tfevents.1718707948.6dd0c134a67e.448.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d264c77a08d126c6affe5d52f495c5e3703dd33b6926c9aaaed0e222b63d86b5
+size 4956

runs/Jun18_10-53-41_6dd0c134a67e/events.out.tfevents.1718708022.6dd0c134a67e.448.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:28c2ac6103249d1d0ff54e1f0654bc6515dba7a65aeef1e4a262b026ff147dd1
+size 4957

runs/Jun18_10-55-00_6dd0c134a67e/events.out.tfevents.1718708100.6dd0c134a67e.448.2 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8f37b2d7327863ba3b6859c44ebcc134b715f0fb0916ba0a8f15bcdfd0c56145
+size 15220

tokenizer.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json CHANGED Viewed

@@ -43,13 +43,15 @@
   },
   "clean_up_tokenization_spaces": true,
   "cls_token": "[CLS]",
   "do_lower_case": false,
   "mask_token": "[MASK]",
   "model_max_length": 512,
   "pad_token": "[PAD]",
   "sep_token": "[SEP]",
   "strip_accents": null,
   "tokenize_chinese_chars": true,
-  "tokenizer_class": "BertTokenizer",
   "unk_token": "[UNK]"
 }

   },
   "clean_up_tokenization_spaces": true,
   "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
   "do_lower_case": false,
   "mask_token": "[MASK]",
   "model_max_length": 512,
+  "never_split": null,
   "pad_token": "[PAD]",
   "sep_token": "[SEP]",
   "strip_accents": null,
   "tokenize_chinese_chars": true,
+  "tokenizer_class": "DistilBertTokenizer",
   "unk_token": "[UNK]"
 }

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4fbfbbb05010cbf1cc72fe0f6693c5510a77da26e0ef0768b6a69eeeaf4396d5
 size 5112

 version https://git-lfs.github.com/spec/v1
+oid sha256:3db2b0d264611103c12783d7f136a90baae132a2c4df19fbfd38852ebde554df
 size 5112

vocab.txt CHANGED Viewed

The diff for this file is too large to render. See raw diff