stulcrad commited on Dec 12, 2023

Commit

e558d71

•

1 Parent(s): 4b6489f

Training in progress, step 500

Browse files

Files changed (19) hide show

.gitignore +2 -0
README.md +77 -0
config.json +49 -0
model.safetensors +3 -0
runs/Dec11_00-37-20_n27/events.out.tfevents.1702251461.n27.196116.0 +3 -0
runs/Dec11_00-41-30_n27/events.out.tfevents.1702251694.n27.198673.0 +3 -0
runs/Dec11_00-50-54_n27/events.out.tfevents.1702252258.n27.201587.0 +3 -0
runs/Dec11_00-56-13_n27/events.out.tfevents.1702252575.n27.208325.0 +3 -0
runs/Dec11_00-56-13_n27/events.out.tfevents.1702253548.n27.208325.1 +3 -0
runs/Dec12_17-22-06_n26/events.out.tfevents.1702398138.n26.31130.0 +3 -0
runs/Dec12_17-22-06_n26/events.out.tfevents.1702398882.n26.31130.1 +3 -0
runs/Dec12_23-49-17_n26/events.out.tfevents.1702421366.n26.73556.0 +3 -0
runs/Dec12_23-49-17_n26/events.out.tfevents.1702421584.n26.73556.1 +3 -0
runs/Dec13_00-09-53_n26/events.out.tfevents.1702422595.n26.74777.0 +3 -0
special_tokens_map.json +7 -0
tokenizer.json +0 -0
tokenizer_config.json +55 -0
training_args.bin +3 -0
vocab.txt +0 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ checkpoint*
2	+ runs*

README.md ADDED Viewed

	@@ -0,0 +1,77 @@

+---
+license: apache-2.0
+base_model: bert-base-multilingual-cased
+tags:
+- generated_from_trainer
+datasets:
+- wikiann
+model-index:
+- name: fine_tuned_BERT_cs_wikann
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# fine_tuned_BERT_cs_wikann
+This model is a fine-tuned version of [bert-base-multilingual-cased](https://huggingface.co/bert-base-multilingual-cased) on the wikiann dataset.
+It achieves the following results on the evaluation set:
+- Loss: 0.1618
+- Overall Accuracy: 0.9672
+- Overall F1: 0.9184
+- Overall Precision: 0.9155
+- Overall Recall: 0.9213
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 5e-05
+- train_batch_size: 8
+- eval_batch_size: 8
+- seed: 42
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: linear
+- num_epochs: 3.0
+### Training results
+| Training Loss | Epoch | Step | Validation Loss | Overall Accuracy | Overall F1 | Overall Precision | Overall Recall |
+|:-------------:|:-----:|:----:|:---------------:|:----------------:|:----------:|:-----------------:|:--------------:|
+| 0.3538        | 0.2   | 500  | 0.2330          | 0.9392           | 0.8365     | 0.8271            | 0.8461         |
+| 0.2331        | 0.4   | 1000 | 0.2291          | 0.9429           | 0.8536     | 0.8442            | 0.8633         |
+| 0.2093        | 0.6   | 1500 | 0.1936          | 0.9515           | 0.8720     | 0.8777            | 0.8663         |
+| 0.1976        | 0.8   | 2000 | 0.1728          | 0.9512           | 0.8673     | 0.8634            | 0.8714         |
+| 0.1911        | 1.0   | 2500 | 0.1811          | 0.9586           | 0.8911     | 0.8797            | 0.9027         |
+| 0.1245        | 1.2   | 3000 | 0.1771          | 0.9604           | 0.8977     | 0.8933            | 0.9022         |
+| 0.1219        | 1.4   | 3500 | 0.1731          | 0.9595           | 0.8965     | 0.8893            | 0.9039         |
+| 0.1102        | 1.6   | 4000 | 0.1721          | 0.9625           | 0.9060     | 0.9041            | 0.9078         |
+| 0.1203        | 1.8   | 4500 | 0.1538          | 0.9625           | 0.9038     | 0.9095            | 0.8981         |
+| 0.1105        | 2.0   | 5000 | 0.1562          | 0.9656           | 0.9120     | 0.9065            | 0.9177         |
+| 0.0601        | 2.2   | 5500 | 0.1700          | 0.9648           | 0.9113     | 0.9006            | 0.9222         |
+| 0.0579        | 2.4   | 6000 | 0.1569          | 0.9659           | 0.9140     | 0.9105            | 0.9176         |
+| 0.0571        | 2.6   | 6500 | 0.1595          | 0.9673           | 0.9168     | 0.9154            | 0.9183         |
+| 0.0504        | 2.8   | 7000 | 0.1664          | 0.9670           | 0.9174     | 0.9120            | 0.9228         |
+| 0.0588        | 3.0   | 7500 | 0.1618          | 0.9672           | 0.9184     | 0.9155            | 0.9213         |
+### Framework versions
+- Transformers 4.36.0.dev0
+- Pytorch 2.0.1+cu117
+- Datasets 2.14.5
+- Tokenizers 0.15.0

config.json ADDED Viewed

	@@ -0,0 +1,49 @@

+{
+  "_name_or_path": "bert-base-multilingual-cased",
+  "architectures": [
+    "BertForTokenClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "directionality": "bidi",
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "O",
+    "1": "B-PER",
+    "2": "I-PER",
+    "3": "B-ORG",
+    "4": "I-ORG",
+    "5": "B-LOC",
+    "6": "I-LOC"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "B-LOC": 5,
+    "B-ORG": 3,
+    "B-PER": 1,
+    "I-LOC": 6,
+    "I-ORG": 4,
+    "I-PER": 2,
+    "O": 0
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 0,
+  "pooler_fc_size": 768,
+  "pooler_num_attention_heads": 12,
+  "pooler_num_fc_layers": 3,
+  "pooler_size_per_head": 128,
+  "pooler_type": "first_token_transform",
+  "position_embedding_type": "absolute",
+  "torch_dtype": "float32",
+  "transformers_version": "4.36.0.dev0",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 119547
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8e45ad0aee723b99188dcd22128e757706c0397bb807a2c871acb083eb1f3a4c
+size 709096284

runs/Dec11_00-37-20_n27/events.out.tfevents.1702251461.n27.196116.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:166f699da044a547d2c2f032ea073bd024bf988d4354be92463981cb1ab420a7
+size 40

runs/Dec11_00-41-30_n27/events.out.tfevents.1702251694.n27.198673.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1b9b32e4b6b5d7f690d5b88439afeeae16f3724abb44b358d8506899db591ce2
+size 40

runs/Dec11_00-50-54_n27/events.out.tfevents.1702252258.n27.201587.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:35119170fee60d6ba627a9d640b93d53c133f5073c4a2076e026484a38e32d1a
+size 4762

runs/Dec11_00-56-13_n27/events.out.tfevents.1702252575.n27.208325.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:302e398553f1c6072cf69e1c898529578dfb8087a1f3ed416be75ac4b6e9fa41
+size 15018

runs/Dec11_00-56-13_n27/events.out.tfevents.1702253548.n27.208325.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6df7971c08d8ad80216074e3c2ba892a9e56190fddf5bdb6bb669cd9f4e2ccbf
+size 1048

runs/Dec12_17-22-06_n26/events.out.tfevents.1702398138.n26.31130.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:97050da79ca29b27f1b9693b29bf1e9136a05511d8ceeec311b91b1a78b716c0
+size 14984

runs/Dec12_17-22-06_n26/events.out.tfevents.1702398882.n26.31130.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2a7161165534ef44a869d4d7c1a70868c3c5f9829f6c6db4821a21e47470245b
+size 1048

runs/Dec12_23-49-17_n26/events.out.tfevents.1702421366.n26.73556.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b0c61b3174afd3e289839b0deaa16d34cd2dd2431b78543c153532b8a81e5ae3
+size 5068

runs/Dec12_23-49-17_n26/events.out.tfevents.1702421584.n26.73556.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fdfa095b13d5e5ab0d80a18092d82a678c90cea0875decb3e46c6ebf9c842071
+size 544

runs/Dec13_00-09-53_n26/events.out.tfevents.1702422595.n26.74777.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:70ab2630a29a46cfd72da1d4f6f687df7af8005c84b3eb04847b30993eb02d8c
+size 5378

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,55 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": false,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:eb901db0b6253e9b3fa7efd40bde4f38339b5e781762531d44cabbb5869c1dfa
+size 4219

vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff