sedrickkeh commited on
Commit
2c97847
1 Parent(s): 1c0449f

End of training

Browse files
README.md CHANGED
@@ -4,6 +4,7 @@ license: llama3
4
  base_model: meta-llama/Meta-Llama-3-8B
5
  tags:
6
  - llama-factory
 
7
  - generated_from_trainer
8
  model-index:
9
  - name: model
@@ -15,7 +16,7 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  # model
17
 
18
- This model is a fine-tuned version of [meta-llama/Meta-Llama-3-8B](https://huggingface.co/meta-llama/Meta-Llama-3-8B) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
  - Loss: nan
21
 
 
4
  base_model: meta-llama/Meta-Llama-3-8B
5
  tags:
6
  - llama-factory
7
+ - full
8
  - generated_from_trainer
9
  model-index:
10
  - name: model
 
16
 
17
  # model
18
 
19
+ This model is a fine-tuned version of [meta-llama/Meta-Llama-3-8B](https://huggingface.co/meta-llama/Meta-Llama-3-8B) on the PrincetonPLI/Instruct-SkillMix-SDD dataset.
20
  It achieves the following results on the evaluation set:
21
  - Loss: nan
22
 
all_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
  "epoch": 2.4,
3
  "eval_loss": NaN,
4
- "eval_runtime": 2.0141,
5
- "eval_samples_per_second": 50.146,
6
- "eval_steps_per_second": 0.993,
7
  "total_flos": 4.249521302148219e+17,
8
- "train_loss": 0.606980217827691,
9
- "train_runtime": 621.1198,
10
- "train_samples_per_second": 9.269,
11
- "train_steps_per_second": 0.014
12
  }
 
1
  {
2
  "epoch": 2.4,
3
  "eval_loss": NaN,
4
+ "eval_runtime": 2.0791,
5
+ "eval_samples_per_second": 48.58,
6
+ "eval_steps_per_second": 0.962,
7
  "total_flos": 4.249521302148219e+17,
8
+ "train_loss": 0.4152366585201687,
9
+ "train_runtime": 603.4863,
10
+ "train_samples_per_second": 9.54,
11
+ "train_steps_per_second": 0.015
12
  }
eval_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "epoch": 2.4,
3
  "eval_loss": NaN,
4
- "eval_runtime": 2.0141,
5
- "eval_samples_per_second": 50.146,
6
- "eval_steps_per_second": 0.993
7
  }
 
1
  {
2
  "epoch": 2.4,
3
  "eval_loss": NaN,
4
+ "eval_runtime": 2.0791,
5
+ "eval_samples_per_second": 48.58,
6
+ "eval_steps_per_second": 0.962
7
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 2.4,
3
  "total_flos": 4.249521302148219e+17,
4
- "train_loss": 0.606980217827691,
5
- "train_runtime": 621.1198,
6
- "train_samples_per_second": 9.269,
7
- "train_steps_per_second": 0.014
8
  }
 
1
  {
2
  "epoch": 2.4,
3
  "total_flos": 4.249521302148219e+17,
4
+ "train_loss": 0.4152366585201687,
5
+ "train_runtime": 603.4863,
6
+ "train_samples_per_second": 9.54,
7
+ "train_steps_per_second": 0.015
8
  }
trainer_state.json CHANGED
@@ -10,14 +10,14 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.26666666666666666,
13
- "grad_norm": 69.07160949707031,
14
  "learning_rate": 5e-06,
15
  "loss": 1.9082,
16
  "step": 1
17
  },
18
  {
19
  "epoch": 0.5333333333333333,
20
- "grad_norm": 28.931533813476562,
21
  "learning_rate": 5e-06,
22
  "loss": 1.829,
23
  "step": 2
@@ -26,15 +26,15 @@
26
  "epoch": 0.8,
27
  "grad_norm": NaN,
28
  "learning_rate": 5e-06,
29
- "loss": 1.7257,
30
  "step": 3
31
  },
32
  {
33
  "epoch": 0.8,
34
  "eval_loss": NaN,
35
- "eval_runtime": 2.4891,
36
- "eval_samples_per_second": 40.577,
37
- "eval_steps_per_second": 0.804,
38
  "step": 3
39
  },
40
  {
@@ -68,9 +68,9 @@
68
  {
69
  "epoch": 1.8666666666666667,
70
  "eval_loss": NaN,
71
- "eval_runtime": 2.4704,
72
- "eval_samples_per_second": 40.884,
73
- "eval_steps_per_second": 0.81,
74
  "step": 7
75
  },
76
  {
@@ -90,19 +90,19 @@
90
  {
91
  "epoch": 2.4,
92
  "eval_loss": NaN,
93
- "eval_runtime": 1.9225,
94
- "eval_samples_per_second": 52.537,
95
- "eval_steps_per_second": 1.04,
96
  "step": 9
97
  },
98
  {
99
  "epoch": 2.4,
100
  "step": 9,
101
  "total_flos": 4.249521302148219e+17,
102
- "train_loss": 0.606980217827691,
103
- "train_runtime": 621.1198,
104
- "train_samples_per_second": 9.269,
105
- "train_steps_per_second": 0.014
106
  }
107
  ],
108
  "logging_steps": 1.0,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.26666666666666666,
13
+ "grad_norm": 67.7900161743164,
14
  "learning_rate": 5e-06,
15
  "loss": 1.9082,
16
  "step": 1
17
  },
18
  {
19
  "epoch": 0.5333333333333333,
20
+ "grad_norm": NaN,
21
  "learning_rate": 5e-06,
22
  "loss": 1.829,
23
  "step": 2
 
26
  "epoch": 0.8,
27
  "grad_norm": NaN,
28
  "learning_rate": 5e-06,
29
+ "loss": 0.0,
30
  "step": 3
31
  },
32
  {
33
  "epoch": 0.8,
34
  "eval_loss": NaN,
35
+ "eval_runtime": 2.3783,
36
+ "eval_samples_per_second": 42.467,
37
+ "eval_steps_per_second": 0.841,
38
  "step": 3
39
  },
40
  {
 
68
  {
69
  "epoch": 1.8666666666666667,
70
  "eval_loss": NaN,
71
+ "eval_runtime": 2.3676,
72
+ "eval_samples_per_second": 42.66,
73
+ "eval_steps_per_second": 0.845,
74
  "step": 7
75
  },
76
  {
 
90
  {
91
  "epoch": 2.4,
92
  "eval_loss": NaN,
93
+ "eval_runtime": 1.8057,
94
+ "eval_samples_per_second": 55.934,
95
+ "eval_steps_per_second": 1.108,
96
  "step": 9
97
  },
98
  {
99
  "epoch": 2.4,
100
  "step": 9,
101
  "total_flos": 4.249521302148219e+17,
102
+ "train_loss": 0.4152366585201687,
103
+ "train_runtime": 603.4863,
104
+ "train_samples_per_second": 9.54,
105
+ "train_steps_per_second": 0.015
106
  }
107
  ],
108
  "logging_steps": 1.0,
training_loss.png CHANGED