marianna13 commited on
Commit
2c38a96
·
verified ·
1 Parent(s): 6d06af9

Upload folder using huggingface_hub

Browse files
README.md CHANGED
@@ -18,7 +18,7 @@ should probably proofread and complete it, then remove this comment. -->
18
 
19
  This model is a fine-tuned version of [TinyLlama/TinyLlama-1.1B-Chat-v1.0](https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v1.0) on the tatsu-lab/alpaca dataset.
20
  It achieves the following results on the evaluation set:
21
- - Loss: 1.2132
22
 
23
  ## Model description
24
 
@@ -46,7 +46,7 @@ The following hyperparameters were used during training:
46
  - gradient_accumulation_steps: 4
47
  - total_train_batch_size: 512
48
  - total_eval_batch_size: 128
49
- - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
50
  - lr_scheduler_type: cosine
51
  - lr_scheduler_warmup_ratio: 0.1
52
  - num_epochs: 1.0
@@ -55,12 +55,12 @@ The following hyperparameters were used during training:
55
 
56
  | Training Loss | Epoch | Step | Validation Loss |
57
  |:-------------:|:-----:|:----:|:---------------:|
58
- | 1.2457 | 1.0 | 23 | 1.2132 |
59
 
60
 
61
  ### Framework versions
62
 
63
- - Transformers 4.45.0
64
- - Pytorch 2.3.1+cu118
65
  - Datasets 3.0.2
66
  - Tokenizers 0.20.3
 
18
 
19
  This model is a fine-tuned version of [TinyLlama/TinyLlama-1.1B-Chat-v1.0](https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v1.0) on the tatsu-lab/alpaca dataset.
20
  It achieves the following results on the evaluation set:
21
+ - Loss: 1.2134
22
 
23
  ## Model description
24
 
 
46
  - gradient_accumulation_steps: 4
47
  - total_train_batch_size: 512
48
  - total_eval_batch_size: 128
49
+ - optimizer: Use adamw_torch with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
50
  - lr_scheduler_type: cosine
51
  - lr_scheduler_warmup_ratio: 0.1
52
  - num_epochs: 1.0
 
55
 
56
  | Training Loss | Epoch | Step | Validation Loss |
57
  |:-------------:|:-----:|:----:|:---------------:|
58
+ | 1.2486 | 1.0 | 23 | 1.2134 |
59
 
60
 
61
  ### Framework versions
62
 
63
+ - Transformers 4.46.1
64
+ - Pytorch 2.5.0a0+b465a5843b.nv24.09
65
  - Datasets 3.0.2
66
  - Tokenizers 0.20.3
all_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
  "epoch": 1.0,
3
- "eval_loss": 1.2132008075714111,
4
- "eval_runtime": 0.4587,
5
- "eval_samples_per_second": 1351.645,
6
- "eval_steps_per_second": 10.9,
7
  "total_flos": 3.742438758599885e+16,
8
- "train_loss": 1.3028446384098218,
9
- "train_runtime": 44.4512,
10
- "train_samples_per_second": 264.65,
11
- "train_steps_per_second": 0.517
12
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "eval_loss": 1.2133620977401733,
4
+ "eval_runtime": 0.4304,
5
+ "eval_samples_per_second": 1440.355,
6
+ "eval_steps_per_second": 11.616,
7
  "total_flos": 3.742438758599885e+16,
8
+ "train_loss": 1.3040671763212786,
9
+ "train_runtime": 45.9345,
10
+ "train_samples_per_second": 256.104,
11
+ "train_steps_per_second": 0.501
12
  }
config.json CHANGED
@@ -24,7 +24,7 @@
24
  "rope_theta": 10000.0,
25
  "tie_word_embeddings": false,
26
  "torch_dtype": "bfloat16",
27
- "transformers_version": "4.45.0",
28
  "use_cache": false,
29
  "vocab_size": 32000
30
  }
 
24
  "rope_theta": 10000.0,
25
  "tie_word_embeddings": false,
26
  "torch_dtype": "bfloat16",
27
+ "transformers_version": "4.46.1",
28
  "use_cache": false,
29
  "vocab_size": 32000
30
  }
eval_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "epoch": 1.0,
3
- "eval_loss": 1.2132008075714111,
4
- "eval_runtime": 0.4587,
5
- "eval_samples_per_second": 1351.645,
6
- "eval_steps_per_second": 10.9
7
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "eval_loss": 1.2133620977401733,
4
+ "eval_runtime": 0.4304,
5
+ "eval_samples_per_second": 1440.355,
6
+ "eval_steps_per_second": 11.616
7
  }
generation_config.json CHANGED
@@ -3,5 +3,5 @@
3
  "eos_token_id": 2,
4
  "max_length": 2048,
5
  "pad_token_id": 0,
6
- "transformers_version": "4.45.0"
7
  }
 
3
  "eos_token_id": 2,
4
  "max_length": 2048,
5
  "pad_token_id": 0,
6
+ "transformers_version": "4.46.1"
7
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3bc13bf8a501edce8765b6ea8a93a8dbc4dd1ed074b375217df8effbbaad2ba5
3
  size 2200119864
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88f7bfdf386632461b23edf663b3a8dcb3aceef65956f3c58e666a52f99a9462
3
  size 2200119864
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
  "total_flos": 3.742438758599885e+16,
4
- "train_loss": 1.3028446384098218,
5
- "train_runtime": 44.4512,
6
- "train_samples_per_second": 264.65,
7
- "train_steps_per_second": 0.517
8
  }
 
1
  {
2
  "epoch": 1.0,
3
  "total_flos": 3.742438758599885e+16,
4
+ "train_loss": 1.3040671763212786,
5
+ "train_runtime": 45.9345,
6
+ "train_samples_per_second": 256.104,
7
+ "train_steps_per_second": 0.501
8
  }
trainer_log.jsonl CHANGED
@@ -1,4 +1,4 @@
1
- {"current_steps": 10, "total_steps": 23, "loss": 1.3742, "learning_rate": 1.4539904997395468e-05, "epoch": 0.43478260869565216, "percentage": 43.48, "elapsed_time": "0:00:15", "remaining_time": "0:00:20"}
2
- {"current_steps": 20, "total_steps": 23, "loss": 1.2457, "learning_rate": 1.0899347581163222e-06, "epoch": 0.8695652173913043, "percentage": 86.96, "elapsed_time": "0:00:29", "remaining_time": "0:00:04"}
3
- {"current_steps": 23, "total_steps": 23, "eval_loss": 1.2132008075714111, "epoch": 1.0, "percentage": 100.0, "elapsed_time": "0:00:37", "remaining_time": "0:00:00"}
4
- {"current_steps": 23, "total_steps": 23, "epoch": 1.0, "percentage": 100.0, "elapsed_time": "0:00:41", "remaining_time": "0:00:00"}
 
1
+ {"current_steps": 10, "total_steps": 23, "loss": 1.3742, "learning_rate": 1.4539904997395468e-05, "epoch": 0.43478260869565216, "percentage": 43.48, "elapsed_time": "0:00:19", "remaining_time": "0:00:25"}
2
+ {"current_steps": 20, "total_steps": 23, "loss": 1.2486, "learning_rate": 1.0899347581163222e-06, "epoch": 0.8695652173913043, "percentage": 86.96, "elapsed_time": "0:00:31", "remaining_time": "0:00:04"}
3
+ {"current_steps": 23, "total_steps": 23, "eval_loss": 1.2133620977401733, "epoch": 1.0, "percentage": 100.0, "elapsed_time": "0:00:40", "remaining_time": "0:00:00"}
4
+ {"current_steps": 23, "total_steps": 23, "epoch": 1.0, "percentage": 100.0, "elapsed_time": "0:00:43", "remaining_time": "0:00:00"}
trainer_state.json CHANGED
@@ -10,34 +10,34 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.43478260869565216,
13
- "grad_norm": 0.8412916660308838,
14
  "learning_rate": 1.4539904997395468e-05,
15
  "loss": 1.3742,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 0.8695652173913043,
20
- "grad_norm": 0.5978009700775146,
21
  "learning_rate": 1.0899347581163222e-06,
22
- "loss": 1.2457,
23
  "step": 20
24
  },
25
  {
26
  "epoch": 1.0,
27
- "eval_loss": 1.2132008075714111,
28
- "eval_runtime": 0.5736,
29
- "eval_samples_per_second": 1080.949,
30
- "eval_steps_per_second": 8.717,
31
  "step": 23
32
  },
33
  {
34
  "epoch": 1.0,
35
  "step": 23,
36
  "total_flos": 3.742438758599885e+16,
37
- "train_loss": 1.3028446384098218,
38
- "train_runtime": 44.4512,
39
- "train_samples_per_second": 264.65,
40
- "train_steps_per_second": 0.517
41
  }
42
  ],
43
  "logging_steps": 10,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.43478260869565216,
13
+ "grad_norm": 0.8347742557525635,
14
  "learning_rate": 1.4539904997395468e-05,
15
  "loss": 1.3742,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 0.8695652173913043,
20
+ "grad_norm": 0.5884836316108704,
21
  "learning_rate": 1.0899347581163222e-06,
22
+ "loss": 1.2486,
23
  "step": 20
24
  },
25
  {
26
  "epoch": 1.0,
27
+ "eval_loss": 1.2133620977401733,
28
+ "eval_runtime": 0.5411,
29
+ "eval_samples_per_second": 1145.916,
30
+ "eval_steps_per_second": 9.241,
31
  "step": 23
32
  },
33
  {
34
  "epoch": 1.0,
35
  "step": 23,
36
  "total_flos": 3.742438758599885e+16,
37
+ "train_loss": 1.3040671763212786,
38
+ "train_runtime": 45.9345,
39
+ "train_samples_per_second": 256.104,
40
+ "train_steps_per_second": 0.501
41
  }
42
  ],
43
  "logging_steps": 10,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:228e5acd5cb6b0cf84b6f368b9529eeb3bc84227da62ebef15a3fd87143e4618
3
- size 6712
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:abe9cbc400346e4105e77711f48ffdc0a695f7d76414a932ce10713f588d4c3d
3
+ size 7032