Upload 7 files

Files changed (4) hide show

README.md CHANGED Viewed

@@ -7,8 +7,6 @@ base_model: mistralai/Mistral-7B-v0.1
 model-index:
 - name: llm_train/test_out
   results: []
-datasets:
-- CleverShovel/paper_reviews
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -30,7 +28,6 @@ strict: false
 bnb_config_kwargs:
   llm_int8_has_fp16_weight: true
-  bnb_4bit_compute_dtype: float16
   bnb_4bit_quant_type: nf4
   bnb_4bit_use_double_quant: false
@@ -44,7 +41,7 @@ output_dir: ./llm_train/test_out
 #using lora for lower cost
 adapter: qlora
 lora_r: 8
-lora_alpha: 16
 lora_dropout: 0.05
 lora_target_modules:
   - q_proj
@@ -107,7 +104,7 @@ special_tokens:
 This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on the None dataset.
 It achieves the following results on the evaluation set:
-- Loss: 2.0924
 ## Model description
@@ -141,13 +138,13 @@ The following hyperparameters were used during training:
 | Training Loss | Epoch | Step | Validation Loss |
 |:-------------:|:-----:|:----:|:---------------:|
-| 2.075         | 0.13  | 300  | 2.0924          |
 ### Framework versions
-- PEFT 0.7.1
-- Transformers 4.37.0
-- Pytorch 2.1.2+cu121
 - Datasets 2.16.1
 - Tokenizers 0.15.0

 model-index:
 - name: llm_train/test_out
   results: []
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 bnb_config_kwargs:
   llm_int8_has_fp16_weight: true
   bnb_4bit_quant_type: nf4
   bnb_4bit_use_double_quant: false
 #using lora for lower cost
 adapter: qlora
 lora_r: 8
+lora_alpha: 32
 lora_dropout: 0.05
 lora_target_modules:
   - q_proj
 This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on the None dataset.
 It achieves the following results on the evaluation set:
+- Loss: 2.0276
 ## Model description
 | Training Loss | Epoch | Step | Validation Loss |
 |:-------------:|:-----:|:----:|:---------------:|
+| 2.0121        | 0.13  | 300  | 2.0276          |
 ### Framework versions
+- PEFT 0.8.2
+- Transformers 4.38.0.dev0
+- Pytorch 2.1.2+cu118
 - Datasets 2.16.1
 - Tokenizers 0.15.0

adapter_config.json CHANGED Viewed

@@ -9,7 +9,7 @@
   "layers_pattern": null,
   "layers_to_transform": null,
   "loftq_config": {},
-  "lora_alpha": 16,
   "lora_dropout": 0.05,
   "megatron_config": null,
   "megatron_core": "megatron.core",
@@ -19,8 +19,9 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "q_proj",
-    "v_proj"
   ],
-  "task_type": "CAUSAL_LM"
 }

   "layers_pattern": null,
   "layers_to_transform": null,
   "loftq_config": {},
+  "lora_alpha": 32,
   "lora_dropout": 0.05,
   "megatron_config": null,
   "megatron_core": "megatron.core",
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "v_proj",
+    "q_proj"
   ],
+  "task_type": "CAUSAL_LM",
+  "use_rslora": false
 }

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:de8e409acca30ca66c0a950881e63eb5dd09be5416afd84b23b62440c853caf9
 size 13648432

 version https://git-lfs.github.com/spec/v1
+oid sha256:0f45a27de191344315748c5f9764a27df9eeec0b9462340f82616f139e43d900
 size 13648432

config.json CHANGED Viewed

@@ -16,6 +16,8 @@
   "num_hidden_layers": 32,
   "num_key_value_heads": 8,
   "quantization_config": {
     "bnb_4bit_compute_dtype": "float16",
     "bnb_4bit_quant_type": "nf4",
     "bnb_4bit_use_double_quant": false,
@@ -32,7 +34,7 @@
   "sliding_window": 4096,
   "tie_word_embeddings": false,
   "torch_dtype": "bfloat16",
-  "transformers_version": "4.37.0",
   "use_cache": false,
   "vocab_size": 32000
 }

   "num_hidden_layers": 32,
   "num_key_value_heads": 8,
   "quantization_config": {
+    "_load_in_4bit": true,
+    "_load_in_8bit": false,
     "bnb_4bit_compute_dtype": "float16",
     "bnb_4bit_quant_type": "nf4",
     "bnb_4bit_use_double_quant": false,
   "sliding_window": 4096,
   "tie_word_embeddings": false,
   "torch_dtype": "bfloat16",
+  "transformers_version": "4.38.0.dev0",
   "use_cache": false,
   "vocab_size": 32000
 }