cmcmaster commited on Dec 1, 2023

Commit

69be20e

•

1 Parent(s): 64da2d8

End of training

Browse files

Files changed (19) hide show

README.md +22 -76
added_tokens.json +5 -0
config.json +5 -5
generation_config.json +2 -1
model.safetensors +2 -2
runs/Dec01_13-22-37_christopher-System-Product-Name/events.out.tfevents.1701397358.christopher-System-Product-Name.719702.0 +3 -0
runs/Dec01_13-23-31_christopher-System-Product-Name/events.out.tfevents.1701397412.christopher-System-Product-Name.720516.0 +3 -0
runs/Dec01_13-24-20_christopher-System-Product-Name/events.out.tfevents.1701397461.christopher-System-Product-Name.721356.0 +3 -0
runs/Dec01_13-25-12_christopher-System-Product-Name/events.out.tfevents.1701397513.christopher-System-Product-Name.722333.0 +3 -0
runs/Dec01_13-26-53_christopher-System-Product-Name/events.out.tfevents.1701397614.christopher-System-Product-Name.724388.0 +3 -0
runs/Dec01_13-31-52_christopher-System-Product-Name/events.out.tfevents.1701397913.christopher-System-Product-Name.730346.0 +3 -0
runs/Dec01_13-32-46_christopher-System-Product-Name/events.out.tfevents.1701397967.christopher-System-Product-Name.731453.0 +3 -0
runs/Dec01_13-38-13_christopher-System-Product-Name/events.out.tfevents.1701398294.christopher-System-Product-Name.737472.0 +3 -0
runs/Dec01_13-42-50_christopher-System-Product-Name/events.out.tfevents.1701398571.christopher-System-Product-Name.742627.0 +3 -0
runs/Nov29_11-48-56_christopher-System-Product-Name/events.out.tfevents.1701218937.christopher-System-Product-Name.111994.0 +3 -0
runs/Nov29_11-49-25_christopher-System-Product-Name/events.out.tfevents.1701218966.christopher-System-Product-Name.112623.0 +3 -0
tokenizer.json +27 -0
tokenizer_config.json +24 -0
training_args.bin +2 -2

README.md CHANGED Viewed

@@ -1,6 +1,4 @@
 ---
-license: apache-2.0
-base_model: google/flan-t5-small
 tags:
 - generated_from_trainer
 model-index:
@@ -13,9 +11,9 @@ should probably proofread and complete it, then remove this comment. -->
 # medication-lists
-This model is a fine-tuned version of [google/flan-t5-small](https://huggingface.co/google/flan-t5-small) on the None dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.1027
 ## Model description
@@ -34,89 +32,37 @@ More information needed
 ### Training hyperparameters
 The following hyperparameters were used during training:
-- learning_rate: 0.0003
-- train_batch_size: 16
-- eval_batch_size: 16
 - seed: 42
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
 - lr_scheduler_warmup_ratio: 0.03
-- num_epochs: 20
 ### Training results
 | Training Loss | Epoch | Step | Validation Loss |
 |:-------------:|:-----:|:----:|:---------------:|
-| 1.7961        | 0.3   | 50   | 0.7538          |
-| 0.7207        | 0.61  | 100  | 0.3876          |
-| 0.4596        | 0.91  | 150  | 0.2763          |
-| 0.3536        | 1.22  | 200  | 0.2265          |
-| 0.3089        | 1.52  | 250  | 0.1937          |
-| 0.2736        | 1.83  | 300  | 0.1842          |
-| 0.2415        | 2.13  | 350  | 0.1713          |
-| 0.2309        | 2.44  | 400  | 0.1601          |
-| 0.2011        | 2.74  | 450  | 0.1533          |
-| 0.198         | 3.05  | 500  | 0.1464          |
-| 0.1816        | 3.35  | 550  | 0.1418          |
-| 0.1887        | 3.66  | 600  | 0.1354          |
-| 0.1717        | 3.96  | 650  | 0.1295          |
-| 0.1589        | 4.27  | 700  | 0.1320          |
-| 0.1606        | 4.57  | 750  | 0.1230          |
-| 0.1545        | 4.88  | 800  | 0.1255          |
-| 0.1502        | 5.18  | 850  | 0.1247          |
-| 0.1438        | 5.49  | 900  | 0.1251          |
-| 0.1395        | 5.79  | 950  | 0.1222          |
-| 0.1414        | 6.1   | 1000 | 0.1173          |
-| 0.133         | 6.4   | 1050 | 0.1149          |
-| 0.1338        | 6.71  | 1100 | 0.1124          |
-| 0.1361        | 7.01  | 1150 | 0.1148          |
-| 0.1269        | 7.32  | 1200 | 0.1137          |
-| 0.123         | 7.62  | 1250 | 0.1145          |
-| 0.1203        | 7.93  | 1300 | 0.1129          |
-| 0.1194        | 8.23  | 1350 | 0.1081          |
-| 0.1177        | 8.54  | 1400 | 0.1099          |
-| 0.1173        | 8.84  | 1450 | 0.1109          |
-| 0.113         | 9.15  | 1500 | 0.1107          |
-| 0.1122        | 9.45  | 1550 | 0.1068          |
-| 0.11          | 9.76  | 1600 | 0.1072          |
-| 0.1078        | 10.06 | 1650 | 0.1086          |
-| 0.101         | 10.37 | 1700 | 0.1088          |
-| 0.1106        | 10.67 | 1750 | 0.1079          |
-| 0.1094        | 10.98 | 1800 | 0.1109          |
-| 0.1072        | 11.28 | 1850 | 0.1054          |
-| 0.103         | 11.59 | 1900 | 0.1062          |
-| 0.1009        | 11.89 | 1950 | 0.1051          |
-| 0.1005        | 12.2  | 2000 | 0.1049          |
-| 0.0985        | 12.5  | 2050 | 0.1059          |
-| 0.0983        | 12.8  | 2100 | 0.1063          |
-| 0.0953        | 13.11 | 2150 | 0.1062          |
-| 0.0935        | 13.41 | 2200 | 0.1044          |
-| 0.1003        | 13.72 | 2250 | 0.1034          |
-| 0.0935        | 14.02 | 2300 | 0.1049          |
-| 0.0935        | 14.33 | 2350 | 0.1038          |
-| 0.096         | 14.63 | 2400 | 0.1020          |
-| 0.0894        | 14.94 | 2450 | 0.1048          |
-| 0.0931        | 15.24 | 2500 | 0.1034          |
-| 0.0888        | 15.55 | 2550 | 0.1030          |
-| 0.0904        | 15.85 | 2600 | 0.1038          |
-| 0.0885        | 16.16 | 2650 | 0.1046          |
-| 0.088         | 16.46 | 2700 | 0.1041          |
-| 0.0925        | 16.77 | 2750 | 0.1027          |
-| 0.0835        | 17.07 | 2800 | 0.1034          |
-| 0.089         | 17.38 | 2850 | 0.1036          |
-| 0.0844        | 17.68 | 2900 | 0.1043          |
-| 0.0866        | 17.99 | 2950 | 0.1031          |
-| 0.0835        | 18.29 | 3000 | 0.1030          |
-| 0.0826        | 18.6  | 3050 | 0.1028          |
-| 0.0874        | 18.9  | 3100 | 0.1018          |
-| 0.0846        | 19.21 | 3150 | 0.1030          |
-| 0.0852        | 19.51 | 3200 | 0.1026          |
-| 0.0835        | 19.82 | 3250 | 0.1027          |
 ### Framework versions
-- Transformers 4.35.0
-- Pytorch 2.1.0+cu121
-- Datasets 2.11.0
 - Tokenizers 0.14.1

 ---
 tags:
 - generated_from_trainer
 model-index:
 # medication-lists
+This model was trained from scratch on an unknown dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.0228
 ## Model description
 ### Training hyperparameters
 The following hyperparameters were used during training:
+- learning_rate: 0.004
+- train_batch_size: 3
+- eval_batch_size: 3
 - seed: 42
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
 - lr_scheduler_warmup_ratio: 0.03
+- num_epochs: 2
 ### Training results
 | Training Loss | Epoch | Step | Validation Loss |
 |:-------------:|:-----:|:----:|:---------------:|
+| 0.2309        | 0.15  | 400  | 0.1886          |
+| 0.151         | 0.3   | 800  | 0.1260          |
+| 0.1061        | 0.45  | 1200 | 0.0852          |
+| 0.0773        | 0.6   | 1600 | 0.0610          |
+| 0.0693        | 0.75  | 2000 | 0.0498          |
+| 0.0505        | 0.9   | 2400 | 0.0428          |
+| 0.0428        | 1.05  | 2800 | 0.0387          |
+| 0.0343        | 1.2   | 3200 | 0.0324          |
+| 0.0289        | 1.35  | 3600 | 0.0299          |
+| 0.0281        | 1.5   | 4000 | 0.0265          |
+| 0.0251        | 1.65  | 4400 | 0.0250          |
+| 0.0208        | 1.8   | 4800 | 0.0236          |
+| 0.021         | 1.95  | 5200 | 0.0228          |
 ### Framework versions
+- Transformers 4.35.2
+- Pytorch 2.0.1+cu117
+- Datasets 2.14.7
 - Tokenizers 0.14.1

added_tokens.json ADDED Viewed

	@@ -0,0 +1,5 @@

+{
+  "\n": 32100,
+  "{": 32101,
+  "}": 32102
+}

config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "google/flan-t5-small",
   "architectures": [
     "T5ForConditionalGeneration"
   ],
@@ -8,10 +8,10 @@
   "d_kv": 64,
   "d_model": 512,
   "decoder_start_token_id": 0,
-  "dense_act_fn": "gelu_new",
   "dropout_rate": 0.1,
   "eos_token_id": 1,
-  "feed_forward_proj": "gated-gelu",
   "initializer_factor": 1.0,
   "is_encoder_decoder": true,
   "is_gated_act": true,
@@ -56,7 +56,7 @@
   },
   "tie_word_embeddings": false,
   "torch_dtype": "float32",
-  "transformers_version": "4.35.0",
   "use_cache": false,
-  "vocab_size": 32128
 }

 {
+  "_name_or_path": "/mnt/hdd/pretrained_models/flan-t5-small",
   "architectures": [
     "T5ForConditionalGeneration"
   ],
   "d_kv": 64,
   "d_model": 512,
   "decoder_start_token_id": 0,
+  "dense_act_fn": "gelu",
   "dropout_rate": 0.1,
   "eos_token_id": 1,
+  "feed_forward_proj": "gelu",
   "initializer_factor": 1.0,
   "is_encoder_decoder": true,
   "is_gated_act": true,
   },
   "tie_word_embeddings": false,
   "torch_dtype": "float32",
+  "transformers_version": "4.35.2",
   "use_cache": false,
+  "vocab_size": 32103
 }

generation_config.json CHANGED Viewed

@@ -3,5 +3,6 @@
   "decoder_start_token_id": 0,
   "eos_token_id": 1,
   "pad_token_id": 0,
-  "transformers_version": "4.35.0"
 }

   "decoder_start_token_id": 0,
   "eos_token_id": 1,
   "pad_token_id": 0,
+  "transformers_version": "4.35.2",
+  "use_cache": false
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e6fa312c0462dad2cef000dae1feadd95c8999644309948dfd9f2c794e141225
-size 307867048

 version https://git-lfs.github.com/spec/v1
+oid sha256:fa09322e2aac1456fc50b8dda2c3c76545f9e3cae9a38ce93d775ef0ad9061d9
+size 307764648

runs/Dec01_13-22-37_christopher-System-Product-Name/events.out.tfevents.1701397358.christopher-System-Product-Name.719702.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d4eb3a61d4a011aa3211abd36dce4ae173983034f7ef5b864d349ac56bcab732
+size 5327

runs/Dec01_13-23-31_christopher-System-Product-Name/events.out.tfevents.1701397412.christopher-System-Product-Name.720516.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:48e6e90b112c989ec4ebf409cc423bb5ee85dce9a7d18e859079ad09dc8d71b5
+size 5635

runs/Dec01_13-24-20_christopher-System-Product-Name/events.out.tfevents.1701397461.christopher-System-Product-Name.721356.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4517dff2d85b966e58f642a093ba393acd29f7a0e17a44ea9ed342d23b15dd80
+size 4184

runs/Dec01_13-25-12_christopher-System-Product-Name/events.out.tfevents.1701397513.christopher-System-Product-Name.722333.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:20d3456920e0463052e382f1ccecc2203f97c4babad2198e65440265559aecfe
+size 5902

runs/Dec01_13-26-53_christopher-System-Product-Name/events.out.tfevents.1701397614.christopher-System-Product-Name.724388.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:843bc897063aae6517cefd3eb1e6a5ab28555f6c39b8b6be568396a8fa09d997
+size 7750

runs/Dec01_13-31-52_christopher-System-Product-Name/events.out.tfevents.1701397913.christopher-System-Product-Name.730346.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d12ed56d124c4a6f97e19dba2601af66fe6135ddea3792139f19ae2ceb7a532f
+size 5485

runs/Dec01_13-32-46_christopher-System-Product-Name/events.out.tfevents.1701397967.christopher-System-Product-Name.731453.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4b47d95b66ee017e4182ce0d4929968376fa3fa5af135e25837b7708495667ac
+size 7949

runs/Dec01_13-38-13_christopher-System-Product-Name/events.out.tfevents.1701398294.christopher-System-Product-Name.737472.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0f78adfa98198c727c4914bb587452d70ae72580fad84b9d169abf2aed04cff5
+size 7478

runs/Dec01_13-42-50_christopher-System-Product-Name/events.out.tfevents.1701398571.christopher-System-Product-Name.742627.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8024b0f9cac171b602252fd7b38e1fea427adca9aa2c08eea3041a513e73d3f3
+size 25842

runs/Nov29_11-48-56_christopher-System-Product-Name/events.out.tfevents.1701218937.christopher-System-Product-Name.111994.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f0a2e632428c42d837dea5ec37a18c1df1de12b4750e1aca6ff068343eab2d63
+size 5327

runs/Nov29_11-49-25_christopher-System-Product-Name/events.out.tfevents.1701218966.christopher-System-Product-Name.112623.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:57ee57a7670456a4a0c8dbe3ded1dbeff2436a934ec62766c1db64a65be14f44
+size 13382

tokenizer.json CHANGED Viewed

@@ -929,6 +929,33 @@
       "rstrip": false,
       "normalized": false,
       "special": true
     }
   ],
   "normalizer": {

       "rstrip": false,
       "normalized": false,
       "special": true
+    },
+    {
+      "id": 32100,
+      "content": "\n",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": true,
+      "special": false
+    },
+    {
+      "id": 32101,
+      "content": "{",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": true,
+      "special": false
+    },
+    {
+      "id": 32102,
+      "content": "}",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": true,
+      "special": false
     }
   ],
   "normalizer": {

tokenizer_config.json CHANGED Viewed

@@ -823,6 +823,30 @@
       "rstrip": false,
       "single_word": false,
       "special": true
     }
   },
   "additional_special_tokens": [

       "rstrip": false,
       "single_word": false,
       "special": true
+    },
+    "32100": {
+      "content": "\n",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "32101": {
+      "content": "{",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "32102": {
+      "content": "}",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
     }
   },
   "additional_special_tokens": [

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4b905a3df27a9d55cacafa59f3b6d59fcbcdb4816ce375f5a526c4e0a1f0cd09
-size 4792

 version https://git-lfs.github.com/spec/v1
+oid sha256:6b37bd029f8e6712134df6d32e0ac86d8a68ab00fd8b8b33747c29c726abc98c
+size 4347