AKTaylor98 commited on May 24, 2024

Commit

a78f671

verified ·

1 Parent(s): 9926325

Model save

Browse files

Files changed (21) hide show

README.md +2 -2
adapter_config.json +4 -4
all_results.json +8 -3
config.json +40 -0
dataset_dict.json +1 -1
eval_results.json +8 -0
evaluation/data-00000-of-00001.arrow +3 -0
evaluation/dataset_info.json +48 -0
evaluation/state.json +13 -0
runs/May23_17-07-55_scai5.cs.ucla.edu/events.out.tfevents.1716509488.scai5.cs.ucla.edu.1145330.1 +3 -0
runs/May23_17-23-15_scai5.cs.ucla.edu/events.out.tfevents.1716510206.scai5.cs.ucla.edu.1148301.0 +3 -0
test/cache-6d56a9de8848869b.arrow +3 -0
test/cache-ad0c3feb522b00d2.arrow +3 -0
test/cache-bb2f31305a09ac2e_00000_of_00002.arrow +3 -0
test/cache-bb2f31305a09ac2e_00001_of_00002.arrow +3 -0
test/cache-e38c0144724a4def.arrow +3 -0
test/data-00000-of-00001.arrow +2 -2
test/state.json +1 -1
train_results.json +3 -3
trainer_state.json +7 -7
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -18,7 +18,7 @@ should probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [meta-llama/Meta-Llama-3-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct) on the None dataset.
 It achieves the following results on the evaluation set:
-- Loss: 2.5661
 ## Model description
@@ -53,7 +53,7 @@ The following hyperparameters were used during training:
 | Training Loss | Epoch | Step | Validation Loss |
 |:-------------:|:-----:|:----:|:---------------:|
-| 1.4265        | 1.0   | 1    | 2.5661          |
 ### Framework versions

 This model is a fine-tuned version of [meta-llama/Meta-Llama-3-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct) on the None dataset.
 It achieves the following results on the evaluation set:
+- Loss: 2.8863
 ## Model description
 | Training Loss | Epoch | Step | Validation Loss |
 |:-------------:|:-----:|:----:|:---------------:|
+| 1.4265        | 1.0   | 1    | 2.8863          |
 ### Framework versions

adapter_config.json CHANGED Viewed

@@ -20,13 +20,13 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "q_proj",
-    "up_proj",
     "down_proj",
-    "o_proj",
-    "v_proj",
     "gate_proj",
-    "k_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "o_proj",
     "q_proj",
     "down_proj",
+    "up_proj",
     "gate_proj",
+    "k_proj",
+    "v_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

all_results.json CHANGED Viewed

@@ -1,8 +1,13 @@
 {
     "epoch": 1.0,
     "train_loss": 1.4264578819274902,
-    "train_runtime": 1.98,
     "train_samples": 4,
-    "train_samples_per_second": 2.02,
-    "train_steps_per_second": 0.505
 }

 {
     "epoch": 1.0,
+    "eval_loss": 2.566110134124756,
+    "eval_runtime": 0.2859,
+    "eval_samples": 4,
+    "eval_samples_per_second": 13.992,
+    "eval_steps_per_second": 3.498,
     "train_loss": 1.4264578819274902,
+    "train_runtime": 2.0986,
     "train_samples": 4,
+    "train_samples_per_second": 1.906,
+    "train_steps_per_second": 0.477
 }

config.json ADDED Viewed

	@@ -0,0 +1,40 @@

+{
+  "_name_or_path": "meta-llama/Meta-Llama-3-8B-Instruct",
+  "architectures": [
+    "LlamaForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "bos_token_id": 128000,
+  "eos_token_id": 128009,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 14336,
+  "max_position_embeddings": 8192,
+  "model_type": "llama",
+  "num_attention_heads": 32,
+  "num_hidden_layers": 32,
+  "num_key_value_heads": 8,
+  "pretraining_tp": 1,
+  "quantization_config": {
+    "bnb_4bit_compute_dtype": "bfloat16",
+    "bnb_4bit_quant_type": "nf4",
+    "bnb_4bit_use_double_quant": false,
+    "llm_int8_enable_fp32_cpu_offload": false,
+    "llm_int8_has_fp16_weight": false,
+    "llm_int8_skip_modules": null,
+    "llm_int8_threshold": 6.0,
+    "load_in_4bit": true,
+    "load_in_8bit": false,
+    "quant_method": "bitsandbytes"
+  },
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": null,
+  "rope_theta": 500000.0,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.36.2",
+  "use_cache": true,
+  "vocab_size": 128256
+}

dataset_dict.json CHANGED Viewed

	@@ -1 +1 @@
1	- {"splits": ["train", "test"]}


1	+ {"splits": ["train", "test", "evaluation"]}

eval_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 1.0,
+    "eval_loss": 2.566110134124756,
+    "eval_runtime": 0.2859,
+    "eval_samples": 4,
+    "eval_samples_per_second": 13.992,
+    "eval_steps_per_second": 3.498
+}

evaluation/data-00000-of-00001.arrow ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d32bef1c42293fadaad0794f163ccb1bb2d8f83df2968eb1544bc51d7d62b0c3
+size 4384

evaluation/dataset_info.json ADDED Viewed

	@@ -0,0 +1,48 @@

+{
+  "citation": "",
+  "description": "",
+  "features": {
+    "prompt": {
+      "dtype": "string",
+      "_type": "Value"
+    },
+    "chosen": [
+      {
+        "content": {
+          "dtype": "string",
+          "_type": "Value"
+        },
+        "role": {
+          "dtype": "string",
+          "_type": "Value"
+        }
+      }
+    ],
+    "rejected": {
+      "dtype": "null",
+      "_type": "Value"
+    },
+    "messages": [
+      {
+        "content": {
+          "dtype": "string",
+          "_type": "Value"
+        },
+        "role": {
+          "dtype": "string",
+          "_type": "Value"
+        }
+      }
+    ],
+    "score_chosen": {
+      "dtype": "int64",
+      "_type": "Value"
+    },
+    "score_rejected": {
+      "dtype": "int64",
+      "_type": "Value"
+    }
+  },
+  "homepage": "",
+  "license": ""
+}

evaluation/state.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+  "_data_files": [
+    {
+      "filename": "data-00000-of-00001.arrow"
+    }
+  ],
+  "_fingerprint": "a5cce1ec601ad10f",
+  "_format_columns": null,
+  "_format_kwargs": {},
+  "_format_type": null,
+  "_output_all_columns": false,
+  "_split": null
+}

runs/May23_17-07-55_scai5.cs.ucla.edu/events.out.tfevents.1716509488.scai5.cs.ucla.edu.1145330.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1c4b77d8f08c8cf39b6b9b55ad6c248dfc40a5ec24dbec61071fc6c161207190
+size 354

runs/May23_17-23-15_scai5.cs.ucla.edu/events.out.tfevents.1716510206.scai5.cs.ucla.edu.1148301.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fad893d887989d4eeb325a1b2c96192149d1e4462e595adb6541a71b07a4c1f2
+size 5840

test/cache-6d56a9de8848869b.arrow ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7c60ecb6771979ecb6c71bdc5e56814a16ae51b130fd3ce7813c1206c4bb7394
+size 464

test/cache-ad0c3feb522b00d2.arrow ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:230d4d5106c83a8eb49619c59478849b6c8979d83e572f24a96ba8009af0246c
+size 1968

test/cache-bb2f31305a09ac2e_00000_of_00002.arrow ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cd1858af16739f83b6da3ee7f3dd61a896f9b9a2b28f8399f637b2f3c0ac7888
+size 904

test/cache-bb2f31305a09ac2e_00001_of_00002.arrow ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a57f6a414c4469eff7963f5af8ef9000ba75ed6aef2c8745c3eece87d38dd1e9
+size 880

test/cache-e38c0144724a4def.arrow ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:753210810e41cb4a973ffda88cb96dd1a36631a5bf2c2b66a90e0926a42827a4
+size 464

test/data-00000-of-00001.arrow CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:89b447fbcadcbc4ad282e3a02100b92af74635598251b6f14a6a045acb5259be
-size 6368

 version https://git-lfs.github.com/spec/v1
+oid sha256:44c159a71342e18b02d8ab9af9db7a2aa56d9ed0fccea084cc32b6d22814cf9a
+size 3904

test/state.json CHANGED Viewed

@@ -4,7 +4,7 @@
       "filename": "data-00000-of-00001.arrow"
     }
   ],
-  "_fingerprint": "d43f8e3963fd18ec",
   "_format_columns": null,
   "_format_kwargs": {},
   "_format_type": null,

       "filename": "data-00000-of-00001.arrow"
     }
   ],
+  "_fingerprint": "fe438c5dd0a3efa5",
   "_format_columns": null,
   "_format_kwargs": {},
   "_format_type": null,

train_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
     "epoch": 1.0,
     "train_loss": 1.4264578819274902,
-    "train_runtime": 1.98,
     "train_samples": 4,
-    "train_samples_per_second": 2.02,
-    "train_steps_per_second": 0.505
 }

 {
     "epoch": 1.0,
     "train_loss": 1.4264578819274902,
+    "train_runtime": 2.0986,
     "train_samples": 4,
+    "train_samples_per_second": 1.906,
+    "train_steps_per_second": 0.477
 }

trainer_state.json CHANGED Viewed

@@ -16,10 +16,10 @@
     },
     {
       "epoch": 1.0,
-      "eval_loss": 2.566110134124756,
-      "eval_runtime": 0.2634,
-      "eval_samples_per_second": 15.184,
-      "eval_steps_per_second": 3.796,
       "step": 1
     },
     {
@@ -27,9 +27,9 @@
       "step": 1,
       "total_flos": 32602467598336.0,
       "train_loss": 1.4264578819274902,
-      "train_runtime": 1.98,
-      "train_samples_per_second": 2.02,
-      "train_steps_per_second": 0.505
     }
   ],
   "logging_steps": 5,

     },
     {
       "epoch": 1.0,
+      "eval_loss": 2.88632869720459,
+      "eval_runtime": 0.263,
+      "eval_samples_per_second": 7.604,
+      "eval_steps_per_second": 3.802,
       "step": 1
     },
     {
       "step": 1,
       "total_flos": 32602467598336.0,
       "train_loss": 1.4264578819274902,
+      "train_runtime": 2.0986,
+      "train_samples_per_second": 1.906,
+      "train_steps_per_second": 0.477
     }
   ],
   "logging_steps": 5,

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:782dfa2d331e6c9615faebbd7bbbb707d63345cb137deef1e6c6b6e9b31171be
 size 4920

 version https://git-lfs.github.com/spec/v1
+oid sha256:7f34f7e356f32afb3f6b92b747a154e8890a05871c5f1c11868b6a2373cc17df
 size 4920