chansung commited on
Commit
b7b7975
·
verified ·
1 Parent(s): 2e8fb8c

Model save

Browse files
README.md CHANGED
@@ -1,13 +1,13 @@
1
  ---
2
  base_model: meta-llama/Meta-Llama-3-8B
3
  datasets:
4
- - llama-duo/synth_closed_qa_dataset_dedup
5
  library_name: peft
6
  license: llama3
7
  tags:
8
- - alignment-handbook
9
  - trl
10
  - sft
 
11
  - generated_from_trainer
12
  model-index:
13
  - name: llama3.1-8b-gpt4o_100k_closedqa-k
@@ -19,9 +19,7 @@ should probably proofread and complete it, then remove this comment. -->
19
 
20
  # llama3.1-8b-gpt4o_100k_closedqa-k
21
 
22
- This model is a fine-tuned version of [meta-llama/Meta-Llama-3-8B](https://huggingface.co/meta-llama/Meta-Llama-3-8B) on the llama-duo/synth_closed_qa_dataset_dedup dataset.
23
- It achieves the following results on the evaluation set:
24
- - Loss: 1.9355
25
 
26
  ## Model description
27
 
@@ -56,9 +54,6 @@ The following hyperparameters were used during training:
56
 
57
  ### Training results
58
 
59
- | Training Loss | Epoch | Step | Validation Loss |
60
- |:-------------:|:-----:|:----:|:---------------:|
61
- | 0.8284 | 1.0 | 256 | 1.9355 |
62
 
63
 
64
  ### Framework versions
 
1
  ---
2
  base_model: meta-llama/Meta-Llama-3-8B
3
  datasets:
4
+ - generator
5
  library_name: peft
6
  license: llama3
7
  tags:
 
8
  - trl
9
  - sft
10
+ - alignment-handbook
11
  - generated_from_trainer
12
  model-index:
13
  - name: llama3.1-8b-gpt4o_100k_closedqa-k
 
19
 
20
  # llama3.1-8b-gpt4o_100k_closedqa-k
21
 
22
+ This model is a fine-tuned version of [meta-llama/Meta-Llama-3-8B](https://huggingface.co/meta-llama/Meta-Llama-3-8B) on the generator dataset.
 
 
23
 
24
  ## Model description
25
 
 
54
 
55
  ### Training results
56
 
 
 
 
57
 
58
 
59
  ### Framework versions
all_results.json CHANGED
@@ -6,9 +6,9 @@
6
  "eval_samples_per_second": 5.161,
7
  "eval_steps_per_second": 0.86,
8
  "total_flos": 7.558147382936863e+17,
9
- "train_loss": 0.9328742581419647,
10
- "train_runtime": 2769.5152,
11
  "train_samples": 111440,
12
- "train_samples_per_second": 5.905,
13
- "train_steps_per_second": 0.092
14
  }
 
6
  "eval_samples_per_second": 5.161,
7
  "eval_steps_per_second": 0.86,
8
  "total_flos": 7.558147382936863e+17,
9
+ "train_loss": 0.0,
10
+ "train_runtime": 1.5255,
11
  "train_samples": 111440,
12
+ "train_samples_per_second": 10721.267,
13
+ "train_steps_per_second": 167.817
14
  }
runs/Sep29_05-21-03_82b7fdadb1f8/events.out.tfevents.1727587393.82b7fdadb1f8.380653.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57810629ebe30478b2637206712912aa4cec264ef6f703ce288c15e3771127a1
3
+ size 6415
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 1.0,
3
  "total_flos": 7.558147382936863e+17,
4
- "train_loss": 0.9328742581419647,
5
- "train_runtime": 2769.5152,
6
  "train_samples": 111440,
7
- "train_samples_per_second": 5.905,
8
- "train_steps_per_second": 0.092
9
  }
 
1
  {
2
  "epoch": 1.0,
3
  "total_flos": 7.558147382936863e+17,
4
+ "train_loss": 0.0,
5
+ "train_runtime": 1.5255,
6
  "train_samples": 111440,
7
+ "train_samples_per_second": 10721.267,
8
+ "train_steps_per_second": 167.817
9
  }
trainer_state.json CHANGED
@@ -372,22 +372,14 @@
372
  "loss": 0.8284,
373
  "step": 255
374
  },
375
- {
376
- "epoch": 1.0,
377
- "eval_loss": 1.93549382686615,
378
- "eval_runtime": 1.1526,
379
- "eval_samples_per_second": 5.206,
380
- "eval_steps_per_second": 0.868,
381
- "step": 256
382
- },
383
  {
384
  "epoch": 1.0,
385
  "step": 256,
386
  "total_flos": 7.558147382936863e+17,
387
- "train_loss": 0.9328742581419647,
388
- "train_runtime": 2769.5152,
389
- "train_samples_per_second": 5.905,
390
- "train_steps_per_second": 0.092
391
  }
392
  ],
393
  "logging_steps": 5,
 
372
  "loss": 0.8284,
373
  "step": 255
374
  },
 
 
 
 
 
 
 
 
375
  {
376
  "epoch": 1.0,
377
  "step": 256,
378
  "total_flos": 7.558147382936863e+17,
379
+ "train_loss": 0.0,
380
+ "train_runtime": 1.5255,
381
+ "train_samples_per_second": 10721.267,
382
+ "train_steps_per_second": 167.817
383
  }
384
  ],
385
  "logging_steps": 5,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8bb810d2219c051e2c476f04eb6fb1bc5d816e5d8f9d840bad517fb46ae2130c
3
  size 5688
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f8276b5a70d47ca2b9e0a99bbf9a4f71e4152a582276acd233b0dc30d077f8d
3
  size 5688