Violet24K commited on
Commit
9bd2460
1 Parent(s): 3b073b5

Model save

Browse files
README.md CHANGED
@@ -2,13 +2,11 @@
2
  license: apache-2.0
3
  base_model: mistralai/Mistral-7B-v0.1
4
  tags:
5
- - alignment-handbook
6
- - generated_from_trainer
7
  - trl
8
  - sft
9
  - generated_from_trainer
10
  datasets:
11
- - HuggingFaceH4/ultrachat_200k
12
  model-index:
13
  - name: zephyr-7b-sft-full
14
  results: []
@@ -19,7 +17,7 @@ should probably proofread and complete it, then remove this comment. -->
19
 
20
  # zephyr-7b-sft-full
21
 
22
- This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on the HuggingFaceH4/ultrachat_200k dataset.
23
  It achieves the following results on the evaluation set:
24
  - Loss: 0.9357
25
 
@@ -57,7 +55,7 @@ The following hyperparameters were used during training:
57
 
58
  | Training Loss | Epoch | Step | Validation Loss |
59
  |:-------------:|:-----:|:----:|:---------------:|
60
- | 0.9081 | 1.0 | 1090 | 0.9357 |
61
 
62
 
63
  ### Framework versions
 
2
  license: apache-2.0
3
  base_model: mistralai/Mistral-7B-v0.1
4
  tags:
 
 
5
  - trl
6
  - sft
7
  - generated_from_trainer
8
  datasets:
9
+ - generator
10
  model-index:
11
  - name: zephyr-7b-sft-full
12
  results: []
 
17
 
18
  # zephyr-7b-sft-full
19
 
20
+ This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on the generator dataset.
21
  It achieves the following results on the evaluation set:
22
  - Loss: 0.9357
23
 
 
55
 
56
  | Training Loss | Epoch | Step | Validation Loss |
57
  |:-------------:|:-----:|:----:|:---------------:|
58
+ | 0.9082 | 1.0 | 1090 | 0.9357 |
59
 
60
 
61
  ### Framework versions
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "epoch": 1.0,
3
- "eval_loss": 0.9357115030288696,
4
- "eval_runtime": 337.5218,
5
  "eval_samples": 23110,
6
- "eval_samples_per_second": 45.719,
7
- "eval_steps_per_second": 0.717,
8
- "train_loss": 0.9779492704146499,
9
- "train_runtime": 12151.2808,
10
  "train_samples": 207865,
11
- "train_samples_per_second": 11.474,
12
- "train_steps_per_second": 0.09
13
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "eval_loss": 0.9357138276100159,
4
+ "eval_runtime": 337.7533,
5
  "eval_samples": 23110,
6
+ "eval_samples_per_second": 45.687,
7
+ "eval_steps_per_second": 0.716,
8
+ "train_loss": 0.07625311886498687,
9
+ "train_runtime": 1283.1445,
10
  "train_samples": 207865,
11
+ "train_samples_per_second": 108.66,
12
+ "train_steps_per_second": 0.849
13
  }
config.json CHANGED
@@ -21,6 +21,6 @@
21
  "tie_word_embeddings": false,
22
  "torch_dtype": "bfloat16",
23
  "transformers_version": "4.36.2",
24
- "use_cache": true,
25
  "vocab_size": 32000
26
  }
 
21
  "tie_word_embeddings": false,
22
  "torch_dtype": "bfloat16",
23
  "transformers_version": "4.36.2",
24
+ "use_cache": false,
25
  "vocab_size": 32000
26
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "eval_loss": 0.9357115030288696,
4
- "eval_runtime": 337.5218,
5
  "eval_samples": 23110,
6
- "eval_samples_per_second": 45.719,
7
- "eval_steps_per_second": 0.717
8
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "eval_loss": 0.9357138276100159,
4
+ "eval_runtime": 337.7533,
5
  "eval_samples": 23110,
6
+ "eval_samples_per_second": 45.687,
7
+ "eval_steps_per_second": 0.716
8
  }
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9caa0e01c728e53d96bf22580aa44c7e7cbee59915dca4055d2b770d5496a3c7
3
  size 4943162336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:070f2c2cfe338875a56ccfa23a395385e1570c0d012d8fbe3f49dee58dcaca17
3
  size 4943162336
model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:71821a0b0dd8f8297e58c224ae8c4c270b43a26d8f7124778e9319129e0c628a
3
  size 4999819336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86687db1e94132049ffb71bdd40bfdd82bf0601f4370a96a0b9d9ac80115c681
3
  size 4999819336
model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9ea43e2df8d41a2e19a56943a634eba0b781e850c96a8c7e807a97dd185546b2
3
  size 4540516344
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0124f92197c6639a9e532627e51bc4a75e29aa6c51d724b17ff0aba1366a75d7
3
  size 4540516344
runs/Feb20_14-12-02_idealab-05.cs.illinois.edu/events.out.tfevents.1708459946.idealab-05.cs.illinois.edu.1352443.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03d908b92954853e30d065702659a5f8e17349f5852cfc0ba86fd45de1a7633e
3
+ size 7893
runs/Feb20_14-12-02_idealab-05.cs.illinois.edu/events.out.tfevents.1708461567.idealab-05.cs.illinois.edu.1352443.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0506fae4e3944944daa47bb70d8a7542bd09ba7a8e3f19df328dee16322b8671
3
+ size 359
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "train_loss": 0.9779492704146499,
4
- "train_runtime": 12151.2808,
5
  "train_samples": 207865,
6
- "train_samples_per_second": 11.474,
7
- "train_steps_per_second": 0.09
8
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "train_loss": 0.07625311886498687,
4
+ "train_runtime": 1283.1445,
5
  "train_samples": 207865,
6
+ "train_samples_per_second": 108.66,
7
+ "train_steps_per_second": 0.849
8
  }
trainer_state.json CHANGED
@@ -1229,7 +1229,7 @@
1229
  {
1230
  "epoch": 0.93,
1231
  "learning_rate": 2.870552305351382e-07,
1232
- "loss": 0.9293,
1233
  "step": 1015
1234
  },
1235
  {
@@ -1253,7 +1253,7 @@
1253
  {
1254
  "epoch": 0.95,
1255
  "learning_rate": 1.5471558192656776e-07,
1256
- "loss": 0.9232,
1257
  "step": 1035
1258
  },
1259
  {
@@ -1271,7 +1271,7 @@
1271
  {
1272
  "epoch": 0.96,
1273
  "learning_rate": 8.19327959602012e-08,
1274
- "loss": 0.9372,
1275
  "step": 1050
1276
  },
1277
  {
@@ -1301,7 +1301,7 @@
1301
  {
1302
  "epoch": 0.99,
1303
  "learning_rate": 1.1535349032167908e-08,
1304
- "loss": 0.9247,
1305
  "step": 1075
1306
  },
1307
  {
@@ -1313,31 +1313,31 @@
1313
  {
1314
  "epoch": 1.0,
1315
  "learning_rate": 1.2819245493955746e-09,
1316
- "loss": 0.9163,
1317
  "step": 1085
1318
  },
1319
  {
1320
  "epoch": 1.0,
1321
  "learning_rate": 0.0,
1322
- "loss": 0.9081,
1323
  "step": 1090
1324
  },
1325
  {
1326
  "epoch": 1.0,
1327
- "eval_loss": 0.9357115030288696,
1328
- "eval_runtime": 337.6383,
1329
- "eval_samples_per_second": 45.703,
1330
- "eval_steps_per_second": 0.717,
1331
  "step": 1090
1332
  },
1333
  {
1334
  "epoch": 1.0,
1335
  "step": 1090,
1336
  "total_flos": 456447649382400.0,
1337
- "train_loss": 0.9779492704146499,
1338
- "train_runtime": 12151.2808,
1339
- "train_samples_per_second": 11.474,
1340
- "train_steps_per_second": 0.09
1341
  }
1342
  ],
1343
  "logging_steps": 5,
 
1229
  {
1230
  "epoch": 0.93,
1231
  "learning_rate": 2.870552305351382e-07,
1232
+ "loss": 0.9294,
1233
  "step": 1015
1234
  },
1235
  {
 
1253
  {
1254
  "epoch": 0.95,
1255
  "learning_rate": 1.5471558192656776e-07,
1256
+ "loss": 0.9233,
1257
  "step": 1035
1258
  },
1259
  {
 
1271
  {
1272
  "epoch": 0.96,
1273
  "learning_rate": 8.19327959602012e-08,
1274
+ "loss": 0.9371,
1275
  "step": 1050
1276
  },
1277
  {
 
1301
  {
1302
  "epoch": 0.99,
1303
  "learning_rate": 1.1535349032167908e-08,
1304
+ "loss": 0.9248,
1305
  "step": 1075
1306
  },
1307
  {
 
1313
  {
1314
  "epoch": 1.0,
1315
  "learning_rate": 1.2819245493955746e-09,
1316
+ "loss": 0.9164,
1317
  "step": 1085
1318
  },
1319
  {
1320
  "epoch": 1.0,
1321
  "learning_rate": 0.0,
1322
+ "loss": 0.9082,
1323
  "step": 1090
1324
  },
1325
  {
1326
  "epoch": 1.0,
1327
+ "eval_loss": 0.9357138276100159,
1328
+ "eval_runtime": 338.0357,
1329
+ "eval_samples_per_second": 45.649,
1330
+ "eval_steps_per_second": 0.716,
1331
  "step": 1090
1332
  },
1333
  {
1334
  "epoch": 1.0,
1335
  "step": 1090,
1336
  "total_flos": 456447649382400.0,
1337
+ "train_loss": 0.07625311886498687,
1338
+ "train_runtime": 1283.1445,
1339
+ "train_samples_per_second": 108.66,
1340
+ "train_steps_per_second": 0.849
1341
  }
1342
  ],
1343
  "logging_steps": 5,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:abe12a4b60609475166cb6e2ac4cc51e96a1d0ee98ca97c0a946d7d0a5039515
3
  size 5880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ba4634a5dc42447eaee695ee8aa28ff4185b93c3a3c211a42704f15c8610336
3
  size 5880