Holmeister commited on
Commit
3a9f458
1 Parent(s): 8d676e6

End of training

Browse files
Files changed (5) hide show
  1. README.md +3 -2
  2. all_results.json +6 -6
  3. eval_results.json +3 -3
  4. train_results.json +3 -3
  5. trainer_state.json +21 -21
README.md CHANGED
@@ -2,6 +2,7 @@
2
  library_name: peft
3
  tags:
4
  - llama-factory
 
5
  - generated_from_trainer
6
  base_model: mistralai/Mistral-7B-v0.3
7
  model-index:
@@ -14,9 +15,9 @@ should probably proofread and complete it, then remove this comment. -->
14
 
15
  # Mistral_AAID_new_mixed_train
16
 
17
- This model is a fine-tuned version of [mistralai/Mistral-7B-v0.3](https://huggingface.co/mistralai/Mistral-7B-v0.3) on an unknown dataset.
18
  It achieves the following results on the evaluation set:
19
- - Loss: 0.5525
20
 
21
  ## Model description
22
 
 
2
  library_name: peft
3
  tags:
4
  - llama-factory
5
+ - lora
6
  - generated_from_trainer
7
  base_model: mistralai/Mistral-7B-v0.3
8
  model-index:
 
15
 
16
  # Mistral_AAID_new_mixed_train
17
 
18
+ This model is a fine-tuned version of [mistralai/Mistral-7B-v0.3](https://huggingface.co/mistralai/Mistral-7B-v0.3) on the AAID_new_mixed dataset.
19
  It achieves the following results on the evaluation set:
20
+ - Loss: 0.5337
21
 
22
  ## Model description
23
 
all_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
  "epoch": 0.06562756357670221,
3
  "eval_loss": 0.5337400436401367,
4
- "eval_runtime": 230.7886,
5
- "eval_samples_per_second": 20.417,
6
- "eval_steps_per_second": 0.641,
7
  "total_flos": 1.143902298880082e+17,
8
  "train_loss": 0.43926427761713666,
9
- "train_runtime": 4213.5123,
10
- "train_samples_per_second": 166.635,
11
- "train_steps_per_second": 0.651
12
  }
 
1
  {
2
  "epoch": 0.06562756357670221,
3
  "eval_loss": 0.5337400436401367,
4
+ "eval_runtime": 229.7017,
5
+ "eval_samples_per_second": 20.514,
6
+ "eval_steps_per_second": 0.644,
7
  "total_flos": 1.143902298880082e+17,
8
  "train_loss": 0.43926427761713666,
9
+ "train_runtime": 4197.7951,
10
+ "train_samples_per_second": 167.259,
11
+ "train_steps_per_second": 0.653
12
  }
eval_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "epoch": 0.06562756357670221,
3
  "eval_loss": 0.5337400436401367,
4
- "eval_runtime": 230.7886,
5
- "eval_samples_per_second": 20.417,
6
- "eval_steps_per_second": 0.641
7
  }
 
1
  {
2
  "epoch": 0.06562756357670221,
3
  "eval_loss": 0.5337400436401367,
4
+ "eval_runtime": 229.7017,
5
+ "eval_samples_per_second": 20.514,
6
+ "eval_steps_per_second": 0.644
7
  }
train_results.json CHANGED
@@ -2,7 +2,7 @@
2
  "epoch": 0.06562756357670221,
3
  "total_flos": 1.143902298880082e+17,
4
  "train_loss": 0.43926427761713666,
5
- "train_runtime": 4213.5123,
6
- "train_samples_per_second": 166.635,
7
- "train_steps_per_second": 0.651
8
  }
 
2
  "epoch": 0.06562756357670221,
3
  "total_flos": 1.143902298880082e+17,
4
  "train_loss": 0.43926427761713666,
5
+ "train_runtime": 4197.7951,
6
+ "train_samples_per_second": 167.259,
7
+ "train_steps_per_second": 0.653
8
  }
trainer_state.json CHANGED
@@ -18,9 +18,9 @@
18
  {
19
  "epoch": 0.010937927262783703,
20
  "eval_loss": 0.5337400436401367,
21
- "eval_runtime": 231.0066,
22
- "eval_samples_per_second": 20.398,
23
- "eval_steps_per_second": 0.641,
24
  "step": 10
25
  },
26
  {
@@ -33,9 +33,9 @@
33
  {
34
  "epoch": 0.021875854525567406,
35
  "eval_loss": 0.5784199833869934,
36
- "eval_runtime": 231.0005,
37
- "eval_samples_per_second": 20.398,
38
- "eval_steps_per_second": 0.641,
39
  "step": 20
40
  },
41
  {
@@ -48,9 +48,9 @@
48
  {
49
  "epoch": 0.03281378178835111,
50
  "eval_loss": 0.5616388320922852,
51
- "eval_runtime": 230.9639,
52
- "eval_samples_per_second": 20.401,
53
- "eval_steps_per_second": 0.641,
54
  "step": 30
55
  },
56
  {
@@ -63,9 +63,9 @@
63
  {
64
  "epoch": 0.04375170905113481,
65
  "eval_loss": 0.5677462220191956,
66
- "eval_runtime": 231.0825,
67
- "eval_samples_per_second": 20.391,
68
- "eval_steps_per_second": 0.64,
69
  "step": 40
70
  },
71
  {
@@ -78,9 +78,9 @@
78
  {
79
  "epoch": 0.05468963631391851,
80
  "eval_loss": 0.5386738181114197,
81
- "eval_runtime": 231.0958,
82
- "eval_samples_per_second": 20.39,
83
- "eval_steps_per_second": 0.64,
84
  "step": 50
85
  },
86
  {
@@ -93,9 +93,9 @@
93
  {
94
  "epoch": 0.06562756357670221,
95
  "eval_loss": 0.5525479912757874,
96
- "eval_runtime": 230.9516,
97
- "eval_samples_per_second": 20.403,
98
- "eval_steps_per_second": 0.641,
99
  "step": 60
100
  },
101
  {
@@ -103,9 +103,9 @@
103
  "step": 60,
104
  "total_flos": 1.143902298880082e+17,
105
  "train_loss": 0.43926427761713666,
106
- "train_runtime": 4213.5123,
107
- "train_samples_per_second": 166.635,
108
- "train_steps_per_second": 0.651
109
  }
110
  ],
111
  "logging_steps": 10,
 
18
  {
19
  "epoch": 0.010937927262783703,
20
  "eval_loss": 0.5337400436401367,
21
+ "eval_runtime": 229.8812,
22
+ "eval_samples_per_second": 20.498,
23
+ "eval_steps_per_second": 0.644,
24
  "step": 10
25
  },
26
  {
 
33
  {
34
  "epoch": 0.021875854525567406,
35
  "eval_loss": 0.5784199833869934,
36
+ "eval_runtime": 229.9779,
37
+ "eval_samples_per_second": 20.489,
38
+ "eval_steps_per_second": 0.644,
39
  "step": 20
40
  },
41
  {
 
48
  {
49
  "epoch": 0.03281378178835111,
50
  "eval_loss": 0.5616388320922852,
51
+ "eval_runtime": 229.9158,
52
+ "eval_samples_per_second": 20.494,
53
+ "eval_steps_per_second": 0.644,
54
  "step": 30
55
  },
56
  {
 
63
  {
64
  "epoch": 0.04375170905113481,
65
  "eval_loss": 0.5677462220191956,
66
+ "eval_runtime": 230.0577,
67
+ "eval_samples_per_second": 20.482,
68
+ "eval_steps_per_second": 0.643,
69
  "step": 40
70
  },
71
  {
 
78
  {
79
  "epoch": 0.05468963631391851,
80
  "eval_loss": 0.5386738181114197,
81
+ "eval_runtime": 229.8738,
82
+ "eval_samples_per_second": 20.498,
83
+ "eval_steps_per_second": 0.644,
84
  "step": 50
85
  },
86
  {
 
93
  {
94
  "epoch": 0.06562756357670221,
95
  "eval_loss": 0.5525479912757874,
96
+ "eval_runtime": 229.8844,
97
+ "eval_samples_per_second": 20.497,
98
+ "eval_steps_per_second": 0.644,
99
  "step": 60
100
  },
101
  {
 
103
  "step": 60,
104
  "total_flos": 1.143902298880082e+17,
105
  "train_loss": 0.43926427761713666,
106
+ "train_runtime": 4197.7951,
107
+ "train_samples_per_second": 167.259,
108
+ "train_steps_per_second": 0.653
109
  }
110
  ],
111
  "logging_steps": 10,