terry69 commited on
Commit
9781278
1 Parent(s): 80a14c7

Model save

Browse files
README.md CHANGED
@@ -1,16 +1,12 @@
1
  ---
 
2
  license: apache-2.0
3
  base_model: mistralai/Mistral-7B-Instruct-v0.2
4
  tags:
5
- - alignment-handbook
6
- - trl
7
- - sft
8
- - generated_from_trainer
9
  - trl
10
  - sft
 
11
  - generated_from_trainer
12
- datasets:
13
- - preference-data
14
  model-index:
15
  - name: preference_p0.2_seed42_level2_raremixbatch16
16
  results: []
@@ -21,7 +17,7 @@ should probably proofread and complete it, then remove this comment. -->
21
 
22
  # preference_p0.2_seed42_level2_raremixbatch16
23
 
24
- This model is a fine-tuned version of [mistralai/Mistral-7B-Instruct-v0.2](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2) on the preference-data dataset.
25
  It achieves the following results on the evaluation set:
26
  - Loss: 0.2978
27
 
@@ -64,7 +60,7 @@ The following hyperparameters were used during training:
64
 
65
  ### Framework versions
66
 
67
- - Transformers 4.43.4
68
  - Pytorch 2.3.1+cu121
69
  - Datasets 2.19.1
70
  - Tokenizers 0.19.1
 
1
  ---
2
+ library_name: transformers
3
  license: apache-2.0
4
  base_model: mistralai/Mistral-7B-Instruct-v0.2
5
  tags:
 
 
 
 
6
  - trl
7
  - sft
8
+ - alignment-handbook
9
  - generated_from_trainer
 
 
10
  model-index:
11
  - name: preference_p0.2_seed42_level2_raremixbatch16
12
  results: []
 
17
 
18
  # preference_p0.2_seed42_level2_raremixbatch16
19
 
20
+ This model is a fine-tuned version of [mistralai/Mistral-7B-Instruct-v0.2](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2) on the None dataset.
21
  It achieves the following results on the evaluation set:
22
  - Loss: 0.2978
23
 
 
60
 
61
  ### Framework versions
62
 
63
+ - Transformers 4.44.2
64
  - Pytorch 2.3.1+cu121
65
  - Datasets 2.19.1
66
  - Tokenizers 0.19.1
all_results.json CHANGED
@@ -6,9 +6,9 @@
6
  "eval_samples_per_second": 2.288,
7
  "eval_steps_per_second": 0.763,
8
  "total_flos": 252616554577920.0,
9
- "train_loss": 0.5160677275912573,
10
- "train_runtime": 24380.0574,
11
  "train_samples": 98881,
12
- "train_samples_per_second": 1.584,
13
- "train_steps_per_second": 0.099
14
  }
 
6
  "eval_samples_per_second": 2.288,
7
  "eval_steps_per_second": 0.763,
8
  "total_flos": 252616554577920.0,
9
+ "train_loss": 0.0,
10
+ "train_runtime": 0.0114,
11
  "train_samples": 98881,
12
+ "train_samples_per_second": 3374709.271,
13
+ "train_steps_per_second": 210930.256
14
  }
config.json CHANGED
@@ -21,7 +21,7 @@
21
  "sliding_window": null,
22
  "tie_word_embeddings": false,
23
  "torch_dtype": "bfloat16",
24
- "transformers_version": "4.43.4",
25
- "use_cache": true,
26
  "vocab_size": 32000
27
  }
 
21
  "sliding_window": null,
22
  "tie_word_embeddings": false,
23
  "torch_dtype": "bfloat16",
24
+ "transformers_version": "4.44.2",
25
+ "use_cache": false,
26
  "vocab_size": 32000
27
  }
generation_config.json CHANGED
@@ -2,5 +2,5 @@
2
  "_from_model_config": true,
3
  "bos_token_id": 1,
4
  "eos_token_id": 2,
5
- "transformers_version": "4.43.4"
6
  }
 
2
  "_from_model_config": true,
3
  "bos_token_id": 1,
4
  "eos_token_id": 2,
5
+ "transformers_version": "4.44.2"
6
  }
runs/Sep15_08-35-03_COE-CS-sv004/events.out.tfevents.1726390105.COE-CS-sv004.1922595.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3cef41a7d5b1daf720d87c02366cb076f0cb04d90a0763a8449ce32b9e6a4060
3
+ size 5620
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 1.0,
3
  "total_flos": 252616554577920.0,
4
- "train_loss": 0.5160677275912573,
5
- "train_runtime": 24380.0574,
6
  "train_samples": 98881,
7
- "train_samples_per_second": 1.584,
8
- "train_steps_per_second": 0.099
9
  }
 
1
  {
2
  "epoch": 1.0,
3
  "total_flos": 252616554577920.0,
4
+ "train_loss": 0.0,
5
+ "train_runtime": 0.0114,
6
  "train_samples": 98881,
7
+ "train_samples_per_second": 3374709.271,
8
+ "train_steps_per_second": 210930.256
9
  }
trainer_state.json CHANGED
@@ -3401,10 +3401,10 @@
3401
  "epoch": 1.0,
3402
  "step": 2413,
3403
  "total_flos": 252616554577920.0,
3404
- "train_loss": 0.5160677275912573,
3405
- "train_runtime": 24380.0574,
3406
- "train_samples_per_second": 1.584,
3407
- "train_steps_per_second": 0.099
3408
  }
3409
  ],
3410
  "logging_steps": 5,
 
3401
  "epoch": 1.0,
3402
  "step": 2413,
3403
  "total_flos": 252616554577920.0,
3404
+ "train_loss": 0.0,
3405
+ "train_runtime": 0.0114,
3406
+ "train_samples_per_second": 3374709.271,
3407
+ "train_steps_per_second": 210930.256
3408
  }
3409
  ],
3410
  "logging_steps": 5,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:66620393a949675652bad6af04ba7c628396c77736dbdc2d00735479ef4e3f41
3
- size 6584
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f601c7b20f78cffbc8d6ff050826de00392857a78353c245cf7f34118053e52
3
+ size 7032