End of training

Browse files

Files changed (6) hide show

README.md +28 -108
adapter_config.json +6 -6
adapter_model.safetensors +2 -2
runs/Sep25_10-20-01_sammie/events.out.tfevents.1727281819.sammie.5660.0 +3 -0
tokenizer_config.json +1 -1
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -21,7 +21,7 @@ should probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [unsloth/tinyllama-chat-bnb-4bit](https://huggingface.co/unsloth/tinyllama-chat-bnb-4bit) on the generator dataset.
 It achieves the following results on the evaluation set:
-- Loss: 1.3256
 ## Model description
@@ -40,7 +40,7 @@ More information needed
 ### Training hyperparameters
 The following hyperparameters were used during training:
-- learning_rate: 0.0002
 - train_batch_size: 8
 - eval_batch_size: 8
 - seed: 42
@@ -55,112 +55,32 @@ The following hyperparameters were used during training:
 | Training Loss | Epoch  | Step | Validation Loss |
 |:-------------:|:------:|:----:|:---------------:|
-| 2.0636        | 0.0094 | 10   | 1.9387          |
-| 1.8627        | 0.0188 | 20   | 1.8200          |
-| 1.7274        | 0.0282 | 30   | 1.7460          |
-| 1.7278        | 0.0377 | 40   | 1.6963          |
-| 1.6922        | 0.0471 | 50   | 1.6616          |
-| 1.6037        | 0.0565 | 60   | 1.6337          |
-| 1.5869        | 0.0659 | 70   | 1.6158          |
-| 1.5963        | 0.0753 | 80   | 1.5981          |
-| 1.5748        | 0.0847 | 90   | 1.5865          |
-| 1.5177        | 0.0942 | 100  | 1.5741          |
-| 1.5486        | 0.1036 | 110  | 1.5612          |
-| 1.5296        | 0.1130 | 120  | 1.5508          |
-| 1.5375        | 0.1224 | 130  | 1.5413          |
-| 1.5544        | 0.1318 | 140  | 1.5346          |
-| 1.4778        | 0.1412 | 150  | 1.5259          |
-| 1.5084        | 0.1507 | 160  | 1.5194          |
-| 1.4812        | 0.1601 | 170  | 1.5132          |
-| 1.4668        | 0.1695 | 180  | 1.5077          |
-| 1.4839        | 0.1789 | 190  | 1.5023          |
-| 1.4593        | 0.1883 | 200  | 1.4966          |
-| 1.5284        | 0.1977 | 210  | 1.4897          |
-| 1.4317        | 0.2072 | 220  | 1.4848          |
-| 1.4335        | 0.2166 | 230  | 1.4771          |
-| 1.4312        | 0.2260 | 240  | 1.4730          |
-| 1.4437        | 0.2354 | 250  | 1.4675          |
-| 1.4266        | 0.2448 | 260  | 1.4659          |
-| 1.4569        | 0.2542 | 270  | 1.4604          |
-| 1.4287        | 0.2637 | 280  | 1.4549          |
-| 1.412         | 0.2731 | 290  | 1.4505          |
-| 1.4113        | 0.2825 | 300  | 1.4474          |
-| 1.4106        | 0.2919 | 310  | 1.4435          |
-| 1.4074        | 0.3013 | 320  | 1.4398          |
-| 1.4139        | 0.3107 | 330  | 1.4362          |
-| 1.3989        | 0.3202 | 340  | 1.4325          |
-| 1.3803        | 0.3296 | 350  | 1.4292          |
-| 1.3489        | 0.3390 | 360  | 1.4272          |
-| 1.4065        | 0.3484 | 370  | 1.4236          |
-| 1.4201        | 0.3578 | 380  | 1.4208          |
-| 1.3936        | 0.3672 | 390  | 1.4174          |
-| 1.3934        | 0.3766 | 400  | 1.4142          |
-| 1.4117        | 0.3861 | 410  | 1.4121          |
-| 1.3812        | 0.3955 | 420  | 1.4091          |
-| 1.3882        | 0.4049 | 430  | 1.4065          |
-| 1.3849        | 0.4143 | 440  | 1.4039          |
-| 1.3537        | 0.4237 | 450  | 1.4011          |
-| 1.3892        | 0.4331 | 460  | 1.3993          |
-| 1.3915        | 0.4426 | 470  | 1.3971          |
-| 1.3766        | 0.4520 | 480  | 1.3967          |
-| 1.3422        | 0.4614 | 490  | 1.3968          |
-| 1.3525        | 0.4708 | 500  | 1.3917          |
-| 1.3662        | 0.4802 | 510  | 1.3878          |
-| 1.3254        | 0.4896 | 520  | 1.3871          |
-| 1.3047        | 0.4991 | 530  | 1.3840          |
-| 1.3064        | 0.5085 | 540  | 1.3820          |
-| 1.3662        | 0.5179 | 550  | 1.3805          |
-| 1.3177        | 0.5273 | 560  | 1.3793          |
-| 1.315         | 0.5367 | 570  | 1.3762          |
-| 1.3275        | 0.5461 | 580  | 1.3742          |
-| 1.3631        | 0.5556 | 590  | 1.3727          |
-| 1.3324        | 0.5650 | 600  | 1.3697          |
-| 1.317         | 0.5744 | 610  | 1.3679          |
-| 1.3479        | 0.5838 | 620  | 1.3669          |
-| 1.3526        | 0.5932 | 630  | 1.3650          |
-| 1.2873        | 0.6026 | 640  | 1.3640          |
-| 1.3318        | 0.6121 | 650  | 1.3615          |
-| 1.302         | 0.6215 | 660  | 1.3599          |
-| 1.2907        | 0.6309 | 670  | 1.3582          |
-| 1.3387        | 0.6403 | 680  | 1.3565          |
-| 1.3278        | 0.6497 | 690  | 1.3552          |
-| 1.3073        | 0.6591 | 700  | 1.3538          |
-| 1.2986        | 0.6685 | 710  | 1.3532          |
-| 1.3461        | 0.6780 | 720  | 1.3509          |
-| 1.3229        | 0.6874 | 730  | 1.3500          |
-| 1.3055        | 0.6968 | 740  | 1.3490          |
-| 1.3307        | 0.7062 | 750  | 1.3476          |
-| 1.3308        | 0.7156 | 760  | 1.3468          |
-| 1.3083        | 0.7250 | 770  | 1.3459          |
-| 1.312         | 0.7345 | 780  | 1.3447          |
-| 1.3068        | 0.7439 | 790  | 1.3424          |
-| 1.3202        | 0.7533 | 800  | 1.3415          |
-| 1.2872        | 0.7627 | 810  | 1.3403          |
-| 1.311         | 0.7721 | 820  | 1.3393          |
-| 1.3213        | 0.7815 | 830  | 1.3385          |
-| 1.3302        | 0.7910 | 840  | 1.3372          |
-| 1.3243        | 0.8004 | 850  | 1.3362          |
-| 1.3077        | 0.8098 | 860  | 1.3352          |
-| 1.3229        | 0.8192 | 870  | 1.3344          |
-| 1.3243        | 0.8286 | 880  | 1.3333          |
-| 1.2909        | 0.8380 | 890  | 1.3327          |
-| 1.3205        | 0.8475 | 900  | 1.3319          |
-| 1.2863        | 0.8569 | 910  | 1.3314          |
-| 1.2583        | 0.8663 | 920  | 1.3305          |
-| 1.29          | 0.8757 | 930  | 1.3298          |
-| 1.2706        | 0.8851 | 940  | 1.3294          |
-| 1.272         | 0.8945 | 950  | 1.3291          |
-| 1.2899        | 0.9040 | 960  | 1.3285          |
-| 1.3184        | 0.9134 | 970  | 1.3280          |
-| 1.3323        | 0.9228 | 980  | 1.3276          |
-| 1.3314        | 0.9322 | 990  | 1.3272          |
-| 1.2965        | 0.9416 | 1000 | 1.3269          |
-| 1.2874        | 0.9510 | 1010 | 1.3266          |
-| 1.2569        | 0.9605 | 1020 | 1.3263          |
-| 1.2622        | 0.9699 | 1030 | 1.3261          |
-| 1.3258        | 0.9793 | 1040 | 1.3259          |
-| 1.3072        | 0.9887 | 1050 | 1.3257          |
-| 1.2555        | 0.9981 | 1060 | 1.3256          |
 ### Framework versions

 This model is a fine-tuned version of [unsloth/tinyllama-chat-bnb-4bit](https://huggingface.co/unsloth/tinyllama-chat-bnb-4bit) on the generator dataset.
 It achieves the following results on the evaluation set:
+- Loss: 1.6823
 ## Model description
 ### Training hyperparameters
 The following hyperparameters were used during training:
+- learning_rate: 0.0001
 - train_batch_size: 8
 - eval_batch_size: 8
 - seed: 42
 | Training Loss | Epoch  | Step | Validation Loss |
 |:-------------:|:------:|:----:|:---------------:|
+| 4.5606        | 0.0377 | 10   | 4.0606          |
+| 3.7693        | 0.0753 | 20   | 3.5539          |
+| 3.3915        | 0.1130 | 30   | 3.2022          |
+| 3.0938        | 0.1507 | 40   | 2.9560          |
+| 2.8692        | 0.1883 | 50   | 2.7576          |
+| 2.6774        | 0.2260 | 60   | 2.5706          |
+| 2.5012        | 0.2637 | 70   | 2.3805          |
+| 2.3332        | 0.3013 | 80   | 2.2277          |
+| 2.159         | 0.3390 | 90   | 2.1030          |
+| 2.0806        | 0.3766 | 100  | 2.0125          |
+| 1.9781        | 0.4143 | 110  | 1.9497          |
+| 1.9683        | 0.4520 | 120  | 1.9005          |
+| 1.917         | 0.4896 | 130  | 1.8584          |
+| 1.8551        | 0.5273 | 140  | 1.8224          |
+| 1.8121        | 0.5650 | 150  | 1.7910          |
+| 1.7998        | 0.6026 | 160  | 1.7673          |
+| 1.7484        | 0.6403 | 170  | 1.7486          |
+| 1.7221        | 0.6780 | 180  | 1.7331          |
+| 1.7171        | 0.7156 | 190  | 1.7207          |
+| 1.7103        | 0.7533 | 200  | 1.7108          |
+| 1.7086        | 0.7910 | 210  | 1.7025          |
+| 1.7083        | 0.8286 | 220  | 1.6955          |
+| 1.7065        | 0.8663 | 230  | 1.6907          |
+| 1.6829        | 0.9040 | 240  | 1.6864          |
+| 1.6892        | 0.9416 | 250  | 1.6838          |
+| 1.6985        | 0.9793 | 260  | 1.6823          |
 ### Framework versions

adapter_config.json CHANGED Viewed

@@ -20,17 +20,17 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "self_attn.o_proj.weight",
     "up_proj",
-    "self_attn.qkv_proj.weight",
     "gate_proj",
-    "v_proj",
     "mlp.down_proj",
-    "k_proj",
     "q_proj",
-    "mlp.gate_up_proj",
-    "down_proj",
-    "o_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "down_proj",
+    "o_proj",
+    "mlp.gate_up_proj",
     "self_attn.o_proj.weight",
     "up_proj",
     "gate_proj",
     "mlp.down_proj",
+    "self_attn.qkv_proj.weight",
+    "v_proj",
     "q_proj",
+    "k_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:557552902b20a33a04721e891b1592661f4067c47e2d1ba906292388ef38302b
-size 403743472

 version https://git-lfs.github.com/spec/v1
+oid sha256:60d95b10b6e140a9626a7058d5038528f2ff80148dc4569b881db56052046509
+size 40

runs/Sep25_10-20-01_sammie/events.out.tfevents.1727281819.sammie.5660.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f3e63819d085438648634605eb393dfba776abe42f88c364ec419d89de2b4d87
+size 18738

tokenizer_config.json CHANGED Viewed

@@ -35,7 +35,7 @@
   "legacy": false,
   "model_max_length": 2048,
   "pad_token": "</s>",
-  "padding_side": "left",
   "sp_model_kwargs": {},
   "tokenizer_class": "LlamaTokenizer",
   "unk_token": "<unk>",

   "legacy": false,
   "model_max_length": 2048,
   "pad_token": "</s>",
+  "padding_side": "right",
   "sp_model_kwargs": {},
   "tokenizer_class": "LlamaTokenizer",
   "unk_token": "<unk>",

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8604a1a40ba8f678deb2830727de0c6cd6660e4c0a7d61f8a3c6b30c00e31773
 size 5560

 version https://git-lfs.github.com/spec/v1
+oid sha256:92ef00ea7a676ed017f06f373e7b051d049f571471eb21b599f6dcb299f671b4
 size 5560