Training in progress, step 23

Files changed (8) hide show

README.md CHANGED Viewed

@@ -15,7 +15,7 @@ should probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on the None dataset.
 It achieves the following results on the evaluation set:
-- Loss: 2.3244
 ## Model description
@@ -36,13 +36,13 @@ More information needed
 The following hyperparameters were used during training:
 - learning_rate: 1e-05
 - train_batch_size: 1
-- eval_batch_size: 1
 - seed: 0
 - distributed_type: multi-GPU
-- num_devices: 4
 - gradient_accumulation_steps: 4
-- total_train_batch_size: 16
-- total_eval_batch_size: 4
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
 - training_steps: 1

 This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on the None dataset.
 It achieves the following results on the evaluation set:
+- Loss: 2.3265
 ## Model description
 The following hyperparameters were used during training:
 - learning_rate: 1e-05
 - train_batch_size: 1
+- eval_batch_size: 2
 - seed: 0
 - distributed_type: multi-GPU
+- num_devices: 3
 - gradient_accumulation_steps: 4
+- total_train_batch_size: 12
+- total_eval_batch_size: 6
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
 - training_steps: 1

adapter_config.json CHANGED Viewed

@@ -21,9 +21,9 @@
   "target_modules": [
     "down_proj",
     "q_proj",
-    "gate_proj",
     "v_proj",
-    "up_proj"
   ],
   "task_type": "CAUSAL_LM"
 }

   "target_modules": [
     "down_proj",
     "q_proj",
     "v_proj",
+    "up_proj",
+    "gate_proj"
   ],
   "task_type": "CAUSAL_LM"
 }

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:618f7c3c384f8c4df79ed9c7b90c7046f1b2138650a0204c3a84bc30b222faa1
 size 281061608

 version https://git-lfs.github.com/spec/v1
+oid sha256:a0e683818f3737d3f621de0742433e36175dd01e4cc1fb3ba73043092a94748a
 size 281061608

config.json CHANGED Viewed

@@ -23,38 +23,38 @@
   "rope_theta": 10000.0,
   "sliding_window": 4096,
   "thresholds": [
-    0.017051145434379578,
     0.023069201037287712,
-    0.049147434532642365,
-    0.051153454929590225,
-    0.061183542013168335,
-    0.07121363282203674,
-    0.0732196494936943,
-    0.07923770695924759,
-    0.08324974030256271,
-    0.08124372363090515,
-    0.089267797768116,
-    0.09127381443977356,
-    0.10130390524864197,
-    0.0992978885769844,
-    0.10732196271419525,
-    0.12337010353803635,
-    0.14343027770519257,
-    0.16148445010185242,
-    0.17953860759735107,
-    0.1935807317495346,
-    0.1995987892150879,
     0.2196589708328247,
-    0.2196589708328247,
-    0.23169508576393127,
-    0.2357071191072464,
-    0.23370109498500824,
-    0.225677028298378,
-    0.22968906164169312,
-    0.225677028298378,
     0.22768303751945496,
-    0.2457372099161148,
-    0.2678034007549286
   ],
   "tie_word_embeddings": false,
   "torch_dtype": "bfloat16",

   "rope_theta": 10000.0,
   "sliding_window": 4096,
   "thresholds": [
     0.023069201037287712,
+    0.03309928998351097,
+    0.04312938079237938,
+    0.05516548827290535,
+    0.07522567361593246,
+    0.09327983111143112,
+    0.10531593859195709,
+    0.11935807019472122,
+    0.12738214433193207,
+    0.12738214433193207,
+    0.1313941776752472,
+    0.13340020179748535,
+    0.13941824436187744,
+    0.1414242684841156,
+    0.15546639263629913,
+    0.1675025075674057,
+    0.18555666506290436,
+    0.19157472252845764,
+    0.20762285590171814,
     0.2196589708328247,
     0.22768303751945496,
+    0.23771312832832336,
+    0.2357071191072464,
+    0.23771312832832336,
+    0.24172517657279968,
+    0.24172517657279968,
+    0.24172517657279968,
+    0.24172517657279968,
+    0.24172517657279968,
+    0.23971915245056152,
+    0.2357071191072464,
+    0.225677028298378
   ],
   "tie_word_embeddings": false,
   "torch_dtype": "bfloat16",

model-00001-of-00003.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5911787808afa752c8244407a9d8cb33084339ffeed4eea9bbff386f12098f53
 size 4943162336

 version https://git-lfs.github.com/spec/v1
+oid sha256:c33a22761a44ac1bbfcc481e6fa0d5aa9022678c6ccfdac9e824e1e36d01fff9
 size 4943162336

model-00002-of-00003.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3601f0d928ba7b47182816f3315737a27f63661193641f306591225c498b1991
 size 4999819336

 version https://git-lfs.github.com/spec/v1
+oid sha256:3d22bcdd4a37d68894daa852415cc04d78cfb3a342c02f8e1487315fc33815bb
 size 4999819336

model-00003-of-00003.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:79a4dc55e86e504b865080b2e14a23f4547c9db064eb409f1bc7bd7b066694b0
 size 4540516344

 version https://git-lfs.github.com/spec/v1
+oid sha256:7412e818b679a6d0013cf994db7d8970df225f83a5e429609d2ebf32a832ec8d
 size 4540516344

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:66299b2c729096a65452b6d03a9daa98bf3280b2eaacbacf3df6caacd7ad1d74
 size 6456

 version https://git-lfs.github.com/spec/v1
+oid sha256:62a9254f9424d6ed8aeff2636f645b370874f626a656b2e2b79b552c0a30f8ed
 size 6456