Upload folder using huggingface_hub

Browse files

Files changed (4) hide show

README.md +15 -19
adapter_config.json +6 -6
adapter_model.safetensors +1 -1
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -5,7 +5,7 @@ tags:
 - generated_from_trainer
 base_model: meta-llama/Meta-Llama-3-8B-Instruct
 model-index:
-- name: outputs/llama-3-8b-claudstruct-v3/
   results: []
 ---
@@ -31,7 +31,7 @@ datasets:
     type: alpaca
 dataset_prepared_path: last_run_prepared
 val_set_size: 0.05
-output_dir: ./outputs/llama-3-8b-claudstruct-v3/
 adapter: qlora
 lora_model_dir:
@@ -54,8 +54,8 @@ wandb_name:
 wandb_log_model:
 gradient_accumulation_steps: 1
-micro_batch_size: 8
-num_epochs: 2
 optimizer: adamw_torch
 lr_scheduler: cosine
 learning_rate: 0.00001
@@ -103,11 +103,11 @@ special_tokens:
 </details><br>
-# outputs/llama-3-8b-claudstruct-v3/
 This model is a fine-tuned version of [meta-llama/Meta-Llama-3-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct) on the None dataset.
 It achieves the following results on the evaluation set:
-- Loss: 1.6226
 ## Model description
@@ -127,30 +127,26 @@ More information needed
 The following hyperparameters were used during training:
 - learning_rate: 1e-05
-- train_batch_size: 8
-- eval_batch_size: 8
 - seed: 42
 - distributed_type: multi-GPU
 - num_devices: 2
-- total_train_batch_size: 16
-- total_eval_batch_size: 16
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: cosine
 - lr_scheduler_warmup_steps: 10
-- num_epochs: 2
 ### Training results
 | Training Loss | Epoch  | Step | Validation Loss |
 |:-------------:|:------:|:----:|:---------------:|
-| 2.2209        | 0.0007 | 1    | 2.0399          |
-| 1.7842        | 0.2502 | 341  | 1.6960          |
-| 1.6914        | 0.5004 | 682  | 1.6590          |
-| 1.6757        | 0.7506 | 1023 | 1.6414          |
-| 1.5182        | 1.0007 | 1364 | 1.6319          |
-| 1.8421        | 1.2509 | 1705 | 1.6264          |
-| 1.7271        | 1.5011 | 2046 | 1.6237          |
-| 1.4817        | 1.7513 | 2387 | 1.6226          |
 ### Framework versions

 - generated_from_trainer
 base_model: meta-llama/Meta-Llama-3-8B-Instruct
 model-index:
+- name: outputs/llama-3-8b-claudstruct-v2/
   results: []
 ---
     type: alpaca
 dataset_prepared_path: last_run_prepared
 val_set_size: 0.05
+output_dir: ./outputs/llama-3-8b-claudstruct-v2/
 adapter: qlora
 lora_model_dir:
 wandb_log_model:
 gradient_accumulation_steps: 1
+micro_batch_size: 16
+num_epochs: 1
 optimizer: adamw_torch
 lr_scheduler: cosine
 learning_rate: 0.00001
 </details><br>
+# outputs/llama-3-8b-claudstruct-v2/
 This model is a fine-tuned version of [meta-llama/Meta-Llama-3-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct) on the None dataset.
 It achieves the following results on the evaluation set:
+- Loss: 1.6839
 ## Model description
 The following hyperparameters were used during training:
 - learning_rate: 1e-05
+- train_batch_size: 16
+- eval_batch_size: 16
 - seed: 42
 - distributed_type: multi-GPU
 - num_devices: 2
+- total_train_batch_size: 32
+- total_eval_batch_size: 32
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: cosine
 - lr_scheduler_warmup_steps: 10
+- num_epochs: 1
 ### Training results
 | Training Loss | Epoch  | Step | Validation Loss |
 |:-------------:|:------:|:----:|:---------------:|
+| 2.0639        | 0.0015 | 1    | 2.0395          |
+| 1.7905        | 0.2507 | 171  | 1.7402          |
+| 1.8968        | 0.5015 | 342  | 1.6960          |
+| 1.6319        | 0.7522 | 513  | 1.6839          |
 ### Framework versions

adapter_config.json CHANGED Viewed

@@ -4,7 +4,7 @@
   "base_model_name_or_path": "meta-llama/Meta-Llama-3-8B-Instruct",
   "bias": "none",
   "fan_in_fan_out": null,
-  "inference_mode": true,
   "init_lora_weights": true,
   "layer_replication": null,
   "layers_pattern": null,
@@ -20,13 +20,13 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "v_proj",
-    "up_proj",
-    "gate_proj",
     "down_proj",
     "o_proj",
-    "k_proj",
-    "q_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

   "base_model_name_or_path": "meta-llama/Meta-Llama-3-8B-Instruct",
   "bias": "none",
   "fan_in_fan_out": null,
+  "inference_mode": false,
   "init_lora_weights": true,
   "layer_replication": null,
   "layers_pattern": null,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "down_proj",
+    "up_proj",
+    "v_proj",
     "o_proj",
+    "gate_proj",
+    "q_proj",
+    "k_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:87a0ecab10e267e31e2d676cb9219607001f5b9c2e4496c5c3ded514352d594d
 size 83945296

 version https://git-lfs.github.com/spec/v1
+oid sha256:6b6e0677afc12f9bd6f0287c17c067ffcab7db91f52132f729145124c8e0e043
 size 83945296

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:899f1662c0af7238f2da34f9cfa528d22053871728445d51705cf917a4b82a05
 size 6456

 version https://git-lfs.github.com/spec/v1
+oid sha256:313ecaaf2979fa4223bfe586f355a60818fb1fb4b9d7c3ccb2f966acc3ae428c
 size 6456