Yofuria commited on
Commit
c12fa12
1 Parent(s): 7e1d58b

Model save

Browse files
README.md CHANGED
@@ -16,7 +16,7 @@ model-index:
16
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
17
  should probably proofread and complete it, then remove this comment. -->
18
 
19
- [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="200" height="32"/>](https://wandb.ai/nlp-xiaobo/huggingface/runs/6m5wzyco)
20
  # Llama-3-8b-sft-qlora
21
 
22
  This model is a fine-tuned version of [meta-llama/Meta-Llama-3-8B](https://huggingface.co/meta-llama/Meta-Llama-3-8B) on the generator dataset.
@@ -58,6 +58,6 @@ The following hyperparameters were used during training:
58
 
59
  - PEFT 0.11.1
60
  - Transformers 4.42.2
61
- - Pytorch 2.3.1+cu121
62
  - Datasets 2.20.0
63
  - Tokenizers 0.19.1
 
16
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
17
  should probably proofread and complete it, then remove this comment. -->
18
 
19
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="200" height="32"/>](https://wandb.ai/nlp-xiaobo/huggingface/runs/fterg85h)
20
  # Llama-3-8b-sft-qlora
21
 
22
  This model is a fine-tuned version of [meta-llama/Meta-Llama-3-8B](https://huggingface.co/meta-llama/Meta-Llama-3-8B) on the generator dataset.
 
58
 
59
  - PEFT 0.11.1
60
  - Transformers 4.42.2
61
+ - Pytorch 2.0.1+cu117
62
  - Datasets 2.20.0
63
  - Tokenizers 0.19.1
adapter_config.json CHANGED
@@ -20,13 +20,13 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "o_proj",
24
- "k_proj",
25
- "q_proj",
26
  "up_proj",
27
  "gate_proj",
28
  "v_proj",
29
- "down_proj"
 
 
 
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
 
 
 
23
  "up_proj",
24
  "gate_proj",
25
  "v_proj",
26
+ "down_proj",
27
+ "q_proj",
28
+ "o_proj",
29
+ "k_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
all_results.json CHANGED
@@ -2,8 +2,8 @@
2
  "epoch": 1.0,
3
  "total_flos": 1.1104476526052114e+19,
4
  "train_loss": 0.0,
5
- "train_runtime": 10.0721,
6
  "train_samples": 207864,
7
- "train_samples_per_second": 11888.555,
8
- "train_steps_per_second": 1486.082
9
  }
 
2
  "epoch": 1.0,
3
  "total_flos": 1.1104476526052114e+19,
4
  "train_loss": 0.0,
5
+ "train_runtime": 6.7946,
6
  "train_samples": 207864,
7
+ "train_samples_per_second": 17623.245,
8
+ "train_steps_per_second": 2202.924
9
  }
train_results.json CHANGED
@@ -2,8 +2,8 @@
2
  "epoch": 1.0,
3
  "total_flos": 1.1104476526052114e+19,
4
  "train_loss": 0.0,
5
- "train_runtime": 10.0721,
6
  "train_samples": 207864,
7
- "train_samples_per_second": 11888.555,
8
- "train_steps_per_second": 1486.082
9
  }
 
2
  "epoch": 1.0,
3
  "total_flos": 1.1104476526052114e+19,
4
  "train_loss": 0.0,
5
+ "train_runtime": 6.7946,
6
  "train_samples": 207864,
7
+ "train_samples_per_second": 17623.245,
8
+ "train_steps_per_second": 2202.924
9
  }
trainer_state.json CHANGED
@@ -20971,9 +20971,9 @@
20971
  "step": 14968,
20972
  "total_flos": 1.1104476526052114e+19,
20973
  "train_loss": 0.0,
20974
- "train_runtime": 10.0721,
20975
- "train_samples_per_second": 11888.555,
20976
- "train_steps_per_second": 1486.082
20977
  }
20978
  ],
20979
  "logging_steps": 5,
 
20971
  "step": 14968,
20972
  "total_flos": 1.1104476526052114e+19,
20973
  "train_loss": 0.0,
20974
+ "train_runtime": 6.7946,
20975
+ "train_samples_per_second": 17623.245,
20976
+ "train_steps_per_second": 2202.924
20977
  }
20978
  ],
20979
  "logging_steps": 5,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a2f5067f1985f081dd8a0167604545d06bd0d9f901c5e24a1a22153816c8b281
3
- size 5240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d20ccbc769d5fe51f9b1bd21a041663bad72e925035a93f89bedab4b78399525
3
+ size 4795