alecwangcq commited on
Commit
4c5deca
1 Parent(s): beceece

Model save

Browse files
README.md CHANGED
@@ -15,7 +15,7 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on an unknown dataset.
17
  It achieves the following results on the evaluation set:
18
- - Loss: 1.0686
19
 
20
  ## Model description
21
 
@@ -51,7 +51,7 @@ The following hyperparameters were used during training:
51
 
52
  | Training Loss | Epoch | Step | Validation Loss |
53
  |:-------------:|:-----:|:----:|:---------------:|
54
- | 1.0775 | 0.26 | 31 | 1.0669 |
55
 
56
 
57
  ### Framework versions
 
15
 
16
  This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on an unknown dataset.
17
  It achieves the following results on the evaluation set:
18
+ - Loss: 1.0683
19
 
20
  ## Model description
21
 
 
51
 
52
  | Training Loss | Epoch | Step | Validation Loss |
53
  |:-------------:|:-----:|:----:|:---------------:|
54
+ | 1.0771 | 0.26 | 31 | 1.0666 |
55
 
56
 
57
  ### Framework versions
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "epoch": 0.26,
3
- "eval_loss": 1.06858229637146,
4
- "eval_runtime": 6.5753,
5
  "eval_samples": 1000,
6
- "eval_samples_per_second": 152.085,
7
- "eval_steps_per_second": 1.217,
8
- "train_loss": 1.2061054245118172,
9
- "train_runtime": 1488.8374,
10
  "train_samples": 61966,
11
- "train_samples_per_second": 41.62,
12
  "train_steps_per_second": 0.081
13
  }
 
1
  {
2
  "epoch": 0.26,
3
+ "eval_loss": 1.0683468580245972,
4
+ "eval_runtime": 6.6188,
5
  "eval_samples": 1000,
6
+ "eval_samples_per_second": 151.085,
7
+ "eval_steps_per_second": 1.209,
8
+ "train_loss": 1.2071462215915802,
9
+ "train_runtime": 1490.1678,
10
  "train_samples": 61966,
11
+ "train_samples_per_second": 41.583,
12
  "train_steps_per_second": 0.081
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 0.26,
3
- "eval_loss": 1.06858229637146,
4
- "eval_runtime": 6.5753,
5
  "eval_samples": 1000,
6
- "eval_samples_per_second": 152.085,
7
- "eval_steps_per_second": 1.217
8
  }
 
1
  {
2
  "epoch": 0.26,
3
+ "eval_loss": 1.0683468580245972,
4
+ "eval_runtime": 6.6188,
5
  "eval_samples": 1000,
6
+ "eval_samples_per_second": 151.085,
7
+ "eval_steps_per_second": 1.209
8
  }
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e7090011f379d2f31fe315762a1504972627a1d26862424eecd5e3fa12acd1ef
3
  size 4943162336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9807e76613b2db37f0898e9a732f39ec4491d65735a97423d0e7bfed386b4ef
3
  size 4943162336
model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a6317d911d6fe78d7147461b42b87028ba4ad576c8b41439ac8b83da846a64c3
3
  size 4999819336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e0abde6f22f4baadb136891b425b532d77c7c8fc09a10d5dec6441ce659c73c
3
  size 4999819336
model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3f6e3325ff22f5396df10a092b349d9ac56a5299f1f4fb0ae31a3d9368d8568e
3
  size 4540516344
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6a2b4b503bc04ae15796eba94e15b1a94a44bd44c594d49015382134ee46383
3
  size 4540516344
runs/Nov18_08-54-00_j004-ds/events.out.tfevents.1700319321.j004-ds.1903041.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9bd536d3a0a5002fcd2c3c04ba7087d35dc6518ad8005111094efab8ab7a9ef7
3
+ size 5921
runs/Nov18_08-54-00_j004-ds/events.out.tfevents.1700320817.j004-ds.1903041.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8eb2163a1dc8a6abd142d51a2b57573820d34fec8b39027c452e7cc1d2bffbe6
3
+ size 354
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 0.26,
3
- "train_loss": 1.2061054245118172,
4
- "train_runtime": 1488.8374,
5
  "train_samples": 61966,
6
- "train_samples_per_second": 41.62,
7
  "train_steps_per_second": 0.081
8
  }
 
1
  {
2
  "epoch": 0.26,
3
+ "train_loss": 1.2071462215915802,
4
+ "train_runtime": 1490.1678,
5
  "train_samples": 61966,
6
+ "train_samples_per_second": 41.583,
7
  "train_steps_per_second": 0.081
8
  }
trainer_state.json CHANGED
@@ -23,55 +23,55 @@
23
  {
24
  "epoch": 0.08,
25
  "learning_rate": 1.966483553946637e-05,
26
- "loss": 1.2967,
27
  "step": 10
28
  },
29
  {
30
  "epoch": 0.12,
31
  "learning_rate": 1.9251166435386837e-05,
32
- "loss": 1.1809,
33
  "step": 15
34
  },
35
  {
36
  "epoch": 0.16,
37
  "learning_rate": 1.868180920098644e-05,
38
- "loss": 1.1281,
39
  "step": 20
40
  },
41
  {
42
  "epoch": 0.21,
43
  "learning_rate": 1.796634556457236e-05,
44
- "loss": 1.0929,
45
  "step": 25
46
  },
47
  {
48
  "epoch": 0.25,
49
  "learning_rate": 1.7116816083045603e-05,
50
- "loss": 1.0775,
51
  "step": 30
52
  },
53
  {
54
  "epoch": 0.26,
55
- "eval_loss": 1.066861629486084,
56
- "eval_runtime": 6.7745,
57
- "eval_samples_per_second": 147.613,
58
- "eval_steps_per_second": 1.181,
59
  "step": 31
60
  },
61
  {
62
  "epoch": 0.26,
63
  "step": 31,
64
  "total_flos": 50905428787200.0,
65
- "train_loss": 1.2061054245118172,
66
- "train_runtime": 1488.8374,
67
- "train_samples_per_second": 41.62,
68
  "train_steps_per_second": 0.081
69
  }
70
  ],
71
  "logging_steps": 5,
72
  "max_steps": 121,
73
  "num_train_epochs": 1,
74
- "save_steps": 500,
75
  "total_flos": 50905428787200.0,
76
  "trial_name": null,
77
  "trial_params": null
 
23
  {
24
  "epoch": 0.08,
25
  "learning_rate": 1.966483553946637e-05,
26
+ "loss": 1.3001,
27
  "step": 10
28
  },
29
  {
30
  "epoch": 0.12,
31
  "learning_rate": 1.9251166435386837e-05,
32
+ "loss": 1.1836,
33
  "step": 15
34
  },
35
  {
36
  "epoch": 0.16,
37
  "learning_rate": 1.868180920098644e-05,
38
+ "loss": 1.1287,
39
  "step": 20
40
  },
41
  {
42
  "epoch": 0.21,
43
  "learning_rate": 1.796634556457236e-05,
44
+ "loss": 1.093,
45
  "step": 25
46
  },
47
  {
48
  "epoch": 0.25,
49
  "learning_rate": 1.7116816083045603e-05,
50
+ "loss": 1.0771,
51
  "step": 30
52
  },
53
  {
54
  "epoch": 0.26,
55
+ "eval_loss": 1.0666340589523315,
56
+ "eval_runtime": 6.8061,
57
+ "eval_samples_per_second": 146.926,
58
+ "eval_steps_per_second": 1.175,
59
  "step": 31
60
  },
61
  {
62
  "epoch": 0.26,
63
  "step": 31,
64
  "total_flos": 50905428787200.0,
65
+ "train_loss": 1.2071462215915802,
66
+ "train_runtime": 1490.1678,
67
+ "train_samples_per_second": 41.583,
68
  "train_steps_per_second": 0.081
69
  }
70
  ],
71
  "logging_steps": 5,
72
  "max_steps": 121,
73
  "num_train_epochs": 1,
74
+ "save_steps": 50,
75
  "total_flos": 50905428787200.0,
76
  "trial_name": null,
77
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:39cc359ce3258b2e63e327c39e82644b160cae8a11afa9e752dd762b8c47d000
3
  size 5624
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a76446f98f2efebc3742cb697a455c19f8e656d97426c4e9ce3a189d1b2c484c
3
  size 5624