New version: 2k context

Files changed (3) hide show

adapter_config.json CHANGED Viewed

@@ -8,10 +8,12 @@
   "lora_dropout": 0.05,
   "modules_to_save": null,
   "peft_type": "LORA",
-  "r": 8,
   "target_modules": [
     "q_proj",
-    "v_proj"
   ],
   "task_type": "CAUSAL_LM"
-}

   "lora_dropout": 0.05,
   "modules_to_save": null,
   "peft_type": "LORA",
+  "r": 16,
   "target_modules": [
     "q_proj",
+    "v_proj",
+    "k_proj",
+    "o_proj"
   ],
   "task_type": "CAUSAL_LM"
+}

adapter_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4100276c846c3be67ff42bd8ba8a5ee485bfceba0ee398e47685feaeb5bd08a2
-size 16822989

 version https://git-lfs.github.com/spec/v1
+oid sha256:1929a4ec5b10b9a4fb61f34d18882205b89df8270444a035a19e00ce3673e19f
+size 67201357

training_config.json CHANGED Viewed

@@ -1,14 +1,14 @@
 {
     "trainer": {
         "evaluation_strategy": "steps",
-        "per_device_train_batch_size": 12,
-        "per_device_eval_batch_size": 12,
-        "gradient_accumulation_steps": 10,
         "eval_steps": 50,
         "save_steps": 50,
         "logging_steps": 5,
         "learning_rate": 0.0003,
-        "num_train_epochs": 3,
         "lr_scheduler_type": "cosine",
         "warmup_steps": 30,
         "fp16": true,
@@ -17,19 +17,19 @@
         "optim": "adamw_torch"
     },
     "lora": {
-        "r": 8,
         "lora_alpha": 16,
         "lora_dropout": 0.05,
         "bias": "none",
-        "target_modules": ["q_proj", "v_proj"],
         "task_type": "CAUSAL_LM"
     },
     "load_in_8bit": true,
     "only_target_loss": true,
-    "model": "chat",
     "templates_path": "ru_saiga_template.json",
-    "model_name": "models/llama-7b-hf",
     "model_type": "causal",
-    "max_tokens_count": 1024
 }

 {
     "trainer": {
         "evaluation_strategy": "steps",
+        "per_device_train_batch_size": 4,
+        "per_device_eval_batch_size": 4,
+        "gradient_accumulation_steps": 32,
         "eval_steps": 50,
         "save_steps": 50,
         "logging_steps": 5,
         "learning_rate": 0.0003,
+        "num_train_epochs": 5,
         "lr_scheduler_type": "cosine",
         "warmup_steps": 30,
         "fp16": true,
         "optim": "adamw_torch"
     },
     "lora": {
+        "r": 16,
         "lora_alpha": 16,
         "lora_dropout": 0.05,
         "bias": "none",
+        "target_modules": ["q_proj", "v_proj", "k_proj", "o_proj"],
         "task_type": "CAUSAL_LM"
     },
     "load_in_8bit": true,
     "only_target_loss": true,
+    "mode": "chat",
     "templates_path": "ru_saiga_template.json",
+    "model_name": "models/llama-7b",
     "model_type": "causal",
+    "max_tokens_count": 2000
 }