lewtun HF staff commited on
Commit
d774692
1 Parent(s): 378f250

Model save

Browse files
README.md CHANGED
@@ -1,6 +1,5 @@
1
  ---
2
- license: other
3
- base_model: Qwen/Qwen1.5-0.5B-Chat
4
  tags:
5
  - generated_from_trainer
6
  model-index:
@@ -13,7 +12,7 @@ should probably proofread and complete it, then remove this comment. -->
13
 
14
  # dummy-model
15
 
16
- This model is a fine-tuned version of [Qwen/Qwen1.5-0.5B-Chat](https://huggingface.co/Qwen/Qwen1.5-0.5B-Chat) on the None dataset.
17
 
18
  ## Model description
19
 
@@ -37,9 +36,9 @@ The following hyperparameters were used during training:
37
  - eval_batch_size: 8
38
  - seed: 42
39
  - distributed_type: multi-GPU
40
- - num_devices: 2
41
- - total_train_batch_size: 16
42
- - total_eval_batch_size: 16
43
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
44
  - lr_scheduler_type: linear
45
  - lr_scheduler_warmup_ratio: 0.1
1
  ---
2
+ base_model: HuggingFaceH4/qwen-1.5-0.5b-ift
 
3
  tags:
4
  - generated_from_trainer
5
  model-index:
12
 
13
  # dummy-model
14
 
15
+ This model is a fine-tuned version of [HuggingFaceH4/qwen-1.5-0.5b-ift](https://huggingface.co/HuggingFaceH4/qwen-1.5-0.5b-ift) on the None dataset.
16
 
17
  ## Model description
18
 
36
  - eval_batch_size: 8
37
  - seed: 42
38
  - distributed_type: multi-GPU
39
+ - num_devices: 8
40
+ - total_train_batch_size: 64
41
+ - total_eval_batch_size: 64
42
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
43
  - lr_scheduler_type: linear
44
  - lr_scheduler_warmup_ratio: 0.1
all_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "epoch": 0.0,
3
  "train_loss": 0.6931471824645996,
4
- "train_runtime": 10.6478,
5
- "train_samples_per_second": 1.503,
6
- "train_steps_per_second": 0.094
7
  }
1
  {
2
  "epoch": 0.0,
3
  "train_loss": 0.6931471824645996,
4
+ "train_runtime": 22.1254,
5
+ "train_samples_per_second": 2.893,
6
+ "train_steps_per_second": 0.045
7
  }
config.json CHANGED
@@ -1,11 +1,11 @@
1
  {
2
- "_name_or_path": "Qwen/Qwen1.5-0.5B-Chat",
3
  "architectures": [
4
  "Qwen2ForCausalLM"
5
  ],
6
  "attention_dropout": 0.0,
7
  "bos_token_id": 151643,
8
- "eos_token_id": 151645,
9
  "hidden_act": "silu",
10
  "hidden_size": 1024,
11
  "initializer_range": 0.02,
1
  {
2
+ "_name_or_path": "HuggingFaceH4/qwen-1.5-0.5b-ift",
3
  "architectures": [
4
  "Qwen2ForCausalLM"
5
  ],
6
  "attention_dropout": 0.0,
7
  "bos_token_id": 151643,
8
+ "eos_token_id": 151643,
9
  "hidden_act": "silu",
10
  "hidden_size": 1024,
11
  "initializer_range": 0.02,
generation_config.json CHANGED
@@ -1,11 +1,6 @@
1
  {
2
  "bos_token_id": 151643,
3
- "do_sample": true,
4
- "eos_token_id": [
5
- 151645,
6
- 151643
7
- ],
8
- "repetition_penalty": 1.1,
9
- "top_p": 0.8,
10
  "transformers_version": "4.37.2"
11
  }
1
  {
2
  "bos_token_id": 151643,
3
+ "eos_token_id": 151643,
4
+ "max_new_tokens": 2048,
 
 
 
 
 
5
  "transformers_version": "4.37.2"
6
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2ae33e290572bff9d1123269a1d8344f47b1b46c2ce43e057553a7a6da3f2ebc
3
  size 928008104
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17a9586a6a3cf563746cd9081937545a92e46ed2a0edaa8b7ea7386f4d3785cb
3
  size 928008104
special_tokens_map.json CHANGED
@@ -4,7 +4,7 @@
4
  "<|im_end|>"
5
  ],
6
  "eos_token": {
7
- "content": "<|im_end|>",
8
  "lstrip": false,
9
  "normalized": false,
10
  "rstrip": false,
4
  "<|im_end|>"
5
  ],
6
  "eos_token": {
7
+ "content": "<|endoftext|>",
8
  "lstrip": false,
9
  "normalized": false,
10
  "rstrip": false,
tokenizer_config.json CHANGED
@@ -31,9 +31,9 @@
31
  "<|im_end|>"
32
  ],
33
  "bos_token": null,
34
- "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content']}}{% if (loop.last and add_generation_prompt) or not loop.last %}{{ '<|im_end|>' + '\n'}}{% endif %}{% endfor %}{% if add_generation_prompt and messages[-1]['role'] != 'assistant' %}{{ '<|im_start|>assistant\n' }}{% endif %}",
35
  "clean_up_tokenization_spaces": false,
36
- "eos_token": "<|im_end|>",
37
  "errors": "replace",
38
  "model_max_length": 32768,
39
  "pad_token": "<|endoftext|>",
31
  "<|im_end|>"
32
  ],
33
  "bos_token": null,
34
+ "chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n' + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}",
35
  "clean_up_tokenization_spaces": false,
36
+ "eos_token": "<|endoftext|>",
37
  "errors": "replace",
38
  "model_max_length": 32768,
39
  "pad_token": "<|endoftext|>",
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "epoch": 0.0,
3
  "train_loss": 0.6931471824645996,
4
- "train_runtime": 10.6478,
5
- "train_samples_per_second": 1.503,
6
- "train_steps_per_second": 0.094
7
  }
1
  {
2
  "epoch": 0.0,
3
  "train_loss": 0.6931471824645996,
4
+ "train_runtime": 22.1254,
5
+ "train_samples_per_second": 2.893,
6
+ "train_steps_per_second": 0.045
7
  }
trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.00032133676092544985,
5
  "eval_steps": 500,
6
  "global_step": 1,
7
  "is_hyper_param_search": false,
@@ -11,10 +11,10 @@
11
  {
12
  "epoch": 0.0,
13
  "learning_rate": 0.0,
14
- "logits/generated": -1.854715347290039,
15
- "logits/real": -1.7835123538970947,
16
- "logps/generated": -667.4280395507812,
17
- "logps/real": -384.09954833984375,
18
  "loss": 0.6931,
19
  "rewards/accuracies": 0.0,
20
  "rewards/generated": 0.0,
@@ -27,9 +27,9 @@
27
  "step": 1,
28
  "total_flos": 0.0,
29
  "train_loss": 0.6931471824645996,
30
- "train_runtime": 10.6478,
31
- "train_samples_per_second": 1.503,
32
- "train_steps_per_second": 0.094
33
  }
34
  ],
35
  "logging_steps": 10,
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.0012853470437017994,
5
  "eval_steps": 500,
6
  "global_step": 1,
7
  "is_hyper_param_search": false,
11
  {
12
  "epoch": 0.0,
13
  "learning_rate": 0.0,
14
+ "logits/generated": -1.3129560947418213,
15
+ "logits/real": -0.6997354626655579,
16
+ "logps/generated": -609.5880126953125,
17
+ "logps/real": -542.52783203125,
18
  "loss": 0.6931,
19
  "rewards/accuracies": 0.0,
20
  "rewards/generated": 0.0,
27
  "step": 1,
28
  "total_flos": 0.0,
29
  "train_loss": 0.6931471824645996,
30
+ "train_runtime": 22.1254,
31
+ "train_samples_per_second": 2.893,
32
+ "train_steps_per_second": 0.045
33
  }
34
  ],
35
  "logging_steps": 10,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:167e5554608d0755ce3da2e17f4ed94b3e74641b26d088bb4c0fefde26f4c2d5
3
- size 5880
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b9cec95750bdee2bcf6a04a483459188304950ed8dc641aec56433fae8350ad
3
+ size 5944