End of training

Browse files

Files changed (8) hide show

README.md +9 -7
config.json +1 -1
logs/events.out.tfevents.1704536554.9585f193967c.27.0 +3 -0
model.safetensors +1 -1
special_tokens_map.json +5 -35
tokenizer_config.json +0 -7
trainer_state.json +18 -74
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -1,4 +1,6 @@
 ---
 tags:
 - generated_from_trainer
 model-index:
@@ -11,14 +13,14 @@ should probably proofread and complete it, then remove this comment. -->
 # ner-bert-ingredients
-This model was trained from scratch on an unknown dataset.
 It achieves the following results on the evaluation set:
-- eval_loss: 1.7413
-- eval_runtime: 84.4082
-- eval_samples_per_second: 59.236
-- eval_steps_per_second: 0.936
-- epoch: 10.11
-- step: 7500
 ## Model description

 ---
+license: apache-2.0
+base_model: bert-base-uncased
 tags:
 - generated_from_trainer
 model-index:
 # ner-bert-ingredients
+This model is a fine-tuned version of [bert-base-uncased](https://huggingface.co/bert-base-uncased) on an unknown dataset.
 It achieves the following results on the evaluation set:
+- eval_loss: 4.0105
+- eval_runtime: 85.2403
+- eval_samples_per_second: 58.658
+- eval_steps_per_second: 0.927
+- epoch: 1.01
+- step: 750
 ## Model description

config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "/kaggle/working/ner-bert-ingredients",
   "architectures": [
     "BertForTokenClassification"
   ],

 {
+  "_name_or_path": "bert-base-uncased",
   "architectures": [
     "BertForTokenClassification"
   ],

logs/events.out.tfevents.1704536554.9585f193967c.27.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5bc8a9a6e3a7aea1818d9cae2294eee899d6ffcb9714a4534d8163379799e8f3
+size 1717266

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1df161abd771fcf710eabc03224865fdebcab37a40a5f8579b71911cbd9f9dd1
 size 535667604

 version https://git-lfs.github.com/spec/v1
+oid sha256:51e67bf3988dfeca2610acc41b0ba71cad4c8ed7aedf5b9a68ff42087ab7512e
 size 535667604

special_tokens_map.json CHANGED Viewed

@@ -1,37 +1,7 @@
 {
-  "cls_token": {
-    "content": "[CLS]",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "mask_token": {
-    "content": "[MASK]",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "pad_token": {
-    "content": "[PAD]",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "sep_token": {
-    "content": "[SEP]",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "unk_token": {
-    "content": "[UNK]",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  }
 }

 {
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
 }

tokenizer_config.json CHANGED Viewed

@@ -45,18 +45,11 @@
   "cls_token": "[CLS]",
   "do_lower_case": true,
   "mask_token": "[MASK]",
-  "max_length": 512,
   "model_max_length": 512,
-  "pad_to_multiple_of": null,
   "pad_token": "[PAD]",
-  "pad_token_type_id": 0,
-  "padding_side": "right",
   "sep_token": "[SEP]",
-  "stride": 0,
   "strip_accents": null,
   "tokenize_chinese_chars": true,
   "tokenizer_class": "BertTokenizer",
-  "truncation_side": "right",
-  "truncation_strategy": "longest_first",
   "unk_token": "[UNK]"
 }

   "cls_token": "[CLS]",
   "do_lower_case": true,
   "mask_token": "[MASK]",
   "model_max_length": 512,
   "pad_token": "[PAD]",
   "sep_token": "[SEP]",
   "strip_accents": null,
   "tokenize_chinese_chars": true,
   "tokenizer_class": "BertTokenizer",
   "unk_token": "[UNK]"
 }

trainer_state.json CHANGED Viewed

@@ -1,90 +1,34 @@
 {
-  "best_metric": 1.63157057762146,
-  "best_model_checkpoint": "ner-bert-ingredients/checkpoint-4500",
-  "epoch": 10.107106769956214,
-  "eval_steps": 1500,
-  "global_step": 7500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
-      "epoch": 2.02,
-      "learning_rate": 4.8989218328840976e-05,
-      "loss": 4.3834,
-      "step": 1500
     },
     {
-      "epoch": 2.02,
-      "eval_loss": 2.8215107917785645,
-      "eval_runtime": 84.9249,
-      "eval_samples_per_second": 58.876,
-      "eval_steps_per_second": 0.93,
-      "step": 1500
-    },
-    {
-      "epoch": 4.04,
-      "learning_rate": 4.8989218328840976e-05,
-      "loss": 3.4717,
-      "step": 3000
-    },
-    {
-      "epoch": 4.04,
-      "eval_loss": 2.236969232559204,
-      "eval_runtime": 84.7753,
-      "eval_samples_per_second": 58.979,
-      "eval_steps_per_second": 0.932,
-      "step": 3000
-    },
-    {
-      "epoch": 6.06,
-      "learning_rate": 4.7979110512129385e-05,
-      "loss": 1.7627,
-      "step": 4500
-    },
-    {
-      "epoch": 6.06,
-      "eval_loss": 1.63157057762146,
-      "eval_runtime": 84.483,
-      "eval_samples_per_second": 59.183,
-      "eval_steps_per_second": 0.935,
-      "step": 4500
-    },
-    {
-      "epoch": 8.08,
-      "learning_rate": 4.696900269541779e-05,
-      "loss": 1.2736,
-      "step": 6000
-    },
-    {
-      "epoch": 8.08,
-      "eval_loss": 1.4030611515045166,
-      "eval_runtime": 84.5417,
-      "eval_samples_per_second": 59.142,
-      "eval_steps_per_second": 0.934,
-      "step": 6000
-    },
-    {
-      "epoch": 10.11,
-      "learning_rate": 4.8989218328840976e-05,
-      "loss": 2.5862,
-      "step": 7500
-    },
-    {
-      "epoch": 10.11,
-      "eval_loss": 1.7413185834884644,
-      "eval_runtime": 84.4082,
-      "eval_samples_per_second": 59.236,
-      "eval_steps_per_second": 0.936,
-      "step": 7500
     }
   ],
-  "logging_steps": 1500,
   "max_steps": 74200,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 100,
-  "save_steps": 1500,
-  "total_flos": 3.245985557508096e+17,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.0104412260020208,
+  "eval_steps": 750,
+  "global_step": 750,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
+      "epoch": 1.01,
+      "learning_rate": 4.949460916442048e-05,
+      "loss": 5.3025,
+      "step": 750
     },
     {
+      "epoch": 1.01,
+      "eval_loss": 4.010455131530762,
+      "eval_runtime": 85.2403,
+      "eval_samples_per_second": 58.658,
+      "eval_steps_per_second": 0.927,
+      "step": 750
     }
   ],
+  "logging_steps": 750,
   "max_steps": 74200,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 100,
+  "save_steps": 750,
+  "total_flos": 3.245985557508096e+16,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f593d4ea75754b8c22e75c7f17003f0a08348c63a20a400df9886c12cbeedc59
 size 4283

 version https://git-lfs.github.com/spec/v1
+oid sha256:37a91c4ff3b80be24391e06650540dddea861285c3a8030b1771c2469cae3f40
 size 4283