QLoRA applied #2

Browse files

Files changed (7) hide show

README.md +3 -3
adapter_config.json +5 -14
adapter_model.bin +2 -2
special_tokens_map.json +2 -1
tokenizer.json +1 -6
tokenizer_config.json +113 -0
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -33,7 +33,7 @@ More information needed
 The following hyperparameters were used during training:
 - learning_rate: 0.0002
 - train_batch_size: 4
-- eval_batch_size: 2
 - seed: 42
 - gradient_accumulation_steps: 4
 - total_train_batch_size: 16
@@ -48,7 +48,7 @@ The following hyperparameters were used during training:
 ### Framework versions
-- Transformers 4.33.2
 - Pytorch 2.0.1+cu118
 - Datasets 2.14.5
-- Tokenizers 0.13.3

 The following hyperparameters were used during training:
 - learning_rate: 0.0002
 - train_batch_size: 4
+- eval_batch_size: 1
 - seed: 42
 - gradient_accumulation_steps: 4
 - total_train_batch_size: 16
 ### Framework versions
+- Transformers 4.34.0
 - Pytorch 2.0.1+cu118
 - Datasets 2.14.5
+- Tokenizers 0.14.0

adapter_config.json CHANGED Viewed

@@ -2,23 +2,18 @@
   "alpha_pattern": {},
   "auto_mapping": null,
   "base_model_name_or_path": "ybelkada/falcon-7b-sharded-bf16",
-  "beta1": 0.85,
-  "beta2": 0.85,
   "bias": "none",
-  "deltaT": 10,
   "fan_in_fan_out": false,
   "inference_mode": true,
   "init_lora_weights": true,
-  "init_r": 12,
   "layers_pattern": null,
   "layers_to_transform": null,
-  "lora_alpha": 32,
   "lora_dropout": 0.1,
   "modules_to_save": null,
-  "orth_reg_weight": 0.5,
-  "peft_type": "ADALORA",
-  "r": 8,
-  "rank_pattern": null,
   "revision": null,
   "target_modules": [
     "query_key_value",
@@ -26,9 +21,5 @@
     "dense_h_to_4h",
     "dense_4h_to_h"
   ],
-  "target_r": 8,
-  "task_type": "CAUSAL_LM",
-  "tfinal": 1000,
-  "tinit": 200,
-  "total_step": null
 }

   "alpha_pattern": {},
   "auto_mapping": null,
   "base_model_name_or_path": "ybelkada/falcon-7b-sharded-bf16",
   "bias": "none",
   "fan_in_fan_out": false,
   "inference_mode": true,
   "init_lora_weights": true,
   "layers_pattern": null,
   "layers_to_transform": null,
+  "lora_alpha": 16,
   "lora_dropout": 0.1,
   "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 64,
+  "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "query_key_value",
     "dense_h_to_4h",
     "dense_4h_to_h"
   ],
+  "task_type": "CAUSAL_LM"
 }

adapter_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c76e12333a01e95fa24b0e2238c3df1dfb613fd6e8706fa89bee9fd3a7941fed
-size 98048461

 version https://git-lfs.github.com/spec/v1
+oid sha256:bc7e2e15a4a1a9cb9d39e55c002d6709070611ab628daf0701720ec1d24279b5
+size 522284877

special_tokens_map.json CHANGED Viewed

@@ -10,7 +10,8 @@
     ">>DOMAIN<<",
     ">>PREFIX<<",
     ">>SUFFIX<<",
-    ">>MIDDLE<<"
   ],
   "eos_token": "<|endoftext|>",
   "pad_token": "<|endoftext|>"

     ">>DOMAIN<<",
     ">>PREFIX<<",
     ">>SUFFIX<<",
+    ">>MIDDLE<<",
+    "<|endoftext|>"
   ],
   "eos_token": "<|endoftext|>",
   "pad_token": "<|endoftext|>"

tokenizer.json CHANGED Viewed

@@ -1,11 +1,6 @@
 {
   "version": "1.0",
-  "truncation": {
-    "direction": "Right",
-    "max_length": 2048,
-    "strategy": "LongestFirst",
-    "stride": 0
-  },
   "padding": null,
   "added_tokens": [
     {

 {
   "version": "1.0",
+  "truncation": null,
   "padding": null,
   "added_tokens": [
     {

tokenizer_config.json CHANGED Viewed

@@ -1,9 +1,122 @@
 {
   "add_prefix_space": false,
   "clean_up_tokenization_spaces": true,
   "eos_token": "<|endoftext|>",
   "max_length": 512,
   "model_max_length": 2048,
   "stride": 0,
   "tokenizer_class": "PreTrainedTokenizerFast",
   "truncation_side": "right",

 {
   "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "0": {
+      "content": ">>TITLE<<",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": ">>ABSTRACT<<",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": ">>INTRODUCTION<<",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": ">>SUMMARY<<",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "4": {
+      "content": ">>COMMENT<<",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "5": {
+      "content": ">>ANSWER<<",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "6": {
+      "content": ">>QUESTION<<",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "7": {
+      "content": ">>DOMAIN<<",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "8": {
+      "content": ">>PREFIX<<",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "9": {
+      "content": ">>SUFFIX<<",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "10": {
+      "content": ">>MIDDLE<<",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "11": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [
+    ">>TITLE<<",
+    ">>ABSTRACT<<",
+    ">>INTRODUCTION<<",
+    ">>SUMMARY<<",
+    ">>COMMENT<<",
+    ">>ANSWER<<",
+    ">>QUESTION<<",
+    ">>DOMAIN<<",
+    ">>PREFIX<<",
+    ">>SUFFIX<<",
+    ">>MIDDLE<<",
+    "<|endoftext|>"
+  ],
   "clean_up_tokenization_spaces": true,
   "eos_token": "<|endoftext|>",
   "max_length": 512,
   "model_max_length": 2048,
+  "pad_token": "<|endoftext|>",
   "stride": 0,
   "tokenizer_class": "PreTrainedTokenizerFast",
   "truncation_side": "right",

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:05affc715a4251a28ca850ed23b9a4573187399e32a14d3e9a534cef01cd79e8
 size 4091

 version https://git-lfs.github.com/spec/v1
+oid sha256:78c45554dd5cc708c869af76cdf8b4b7d989e20dd8c4a0a94962904e60091e28
 size 4091