indiejoseph commited on
Commit
8ddabd8
1 Parent(s): 0379dc0

Model save

Browse files
README.md CHANGED
@@ -1,21 +1,22 @@
1
  ---
2
- license: other
3
  base_model: hon9kon9ize/yi-1.5-6b-yub-vocab-expanded
4
  tags:
5
- - llama-factory
6
- - full
7
  - generated_from_trainer
 
 
8
  model-index:
9
- - name: cantonesellm-6b
10
  results: []
11
  ---
12
 
13
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
14
  should probably proofread and complete it, then remove this comment. -->
15
 
16
- # cantonesellm-6b
17
 
18
- This model is a fine-tuned version of [hon9kon9ize/yi-1.5-6b-yub-vocab-expanded](https://huggingface.co/hon9kon9ize/yi-1.5-6b-yub-vocab-expanded) on the yue_corpus dataset.
19
 
20
  ## Model description
21
 
@@ -35,15 +36,15 @@ More information needed
35
 
36
  The following hyperparameters were used during training:
37
  - learning_rate: 5e-05
38
- - train_batch_size: 4
39
  - eval_batch_size: 8
40
  - seed: 42
41
- - gradient_accumulation_steps: 32
42
  - total_train_batch_size: 128
43
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
44
  - lr_scheduler_type: cosine
45
- - lr_scheduler_warmup_ratio: 0.03
46
- - num_epochs: 1.0
47
 
48
  ### Training results
49
 
@@ -52,6 +53,6 @@ The following hyperparameters were used during training:
52
  ### Framework versions
53
 
54
  - Transformers 4.40.2
55
- - Pytorch 2.2.0+cu121
56
  - Datasets 2.19.1
57
  - Tokenizers 0.19.1
 
1
  ---
 
2
  base_model: hon9kon9ize/yi-1.5-6b-yub-vocab-expanded
3
  tags:
4
+ - trl
5
+ - sft
6
  - generated_from_trainer
7
+ datasets:
8
+ - generator
9
  model-index:
10
+ - name: cantonesellm-cpt-202405
11
  results: []
12
  ---
13
 
14
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
  should probably proofread and complete it, then remove this comment. -->
16
 
17
+ # cantonesellm-cpt-202405
18
 
19
+ This model is a fine-tuned version of [hon9kon9ize/yi-1.5-6b-yub-vocab-expanded](https://huggingface.co/hon9kon9ize/yi-1.5-6b-yub-vocab-expanded) on the generator dataset.
20
 
21
  ## Model description
22
 
 
36
 
37
  The following hyperparameters were used during training:
38
  - learning_rate: 5e-05
39
+ - train_batch_size: 8
40
  - eval_batch_size: 8
41
  - seed: 42
42
+ - gradient_accumulation_steps: 16
43
  - total_train_batch_size: 128
44
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
45
  - lr_scheduler_type: cosine
46
+ - lr_scheduler_warmup_ratio: 0.05
47
+ - num_epochs: 1
48
 
49
  ### Training results
50
 
 
53
  ### Framework versions
54
 
55
  - Transformers 4.40.2
56
+ - Pytorch 2.1.0+cu118
57
  - Datasets 2.19.1
58
  - Tokenizers 0.19.1
all_results.json CHANGED
@@ -1,8 +1,9 @@
1
  {
2
- "epoch": 0.9994304157964686,
3
- "total_flos": 1.2030734967018357e+19,
4
- "train_loss": 0.9496028307361081,
5
- "train_runtime": 40588.5091,
6
- "train_samples_per_second": 2.076,
7
- "train_steps_per_second": 0.016
 
8
  }
 
1
  {
2
+ "epoch": 0.9991804622193083,
3
+ "total_flos": 1.0450600815746875e+19,
4
+ "train_loss": 2.588996330897013,
5
+ "train_runtime": 133512.32,
6
+ "train_samples": 384224,
7
+ "train_samples_per_second": 1.097,
8
+ "train_steps_per_second": 0.009
9
  }
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c317ae80702f365ce4635a377d3c370f61eaa255aa3c2831fbe4b2e69aea5f43
3
- size 4959450112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03dc0f2fe5a777636711eb1a111d6d56d4121ea63e4acfccf5a03d91562bc922
3
+ size 4961022976
model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c9aefbf25642d48b4cd135ee46bc3bdfff583fa4bb9cf614942500ceb0087f9e
3
  size 4976802816
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca746068ea80ba06dc5e5b6b8f4bb696254a8b56f366ec0722bdf573882061cc
3
  size 4976802816
model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:034334f303ff192f0868f6c726fff67630852297ff5134a9ebd2fc1fefa1e619
3
- size 2239329256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d731aa325a7e038e9c8ac14022b4d1b00ab723b739653cf6aaa2ecedc3538f01
3
+ size 2240902120
model.safetensors.index.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "metadata": {
3
- "total_size": 12175548416
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "model-00003-of-00003.safetensors",
 
1
  {
2
  "metadata": {
3
+ "total_size": 12178694144
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "model-00003-of-00003.safetensors",
train_results.json CHANGED
@@ -1,8 +1,9 @@
1
  {
2
- "epoch": 0.9994304157964686,
3
- "total_flos": 1.2030734967018357e+19,
4
- "train_loss": 0.9496028307361081,
5
- "train_runtime": 40588.5091,
6
- "train_samples_per_second": 2.076,
7
- "train_steps_per_second": 0.016
 
8
  }
 
1
  {
2
+ "epoch": 0.9991804622193083,
3
+ "total_flos": 1.0450600815746875e+19,
4
+ "train_loss": 2.588996330897013,
5
+ "train_runtime": 133512.32,
6
+ "train_samples": 384224,
7
+ "train_samples_per_second": 1.097,
8
+ "train_steps_per_second": 0.009
9
  }
trainer_state.json CHANGED
The diff for this file is too large to render. See raw diff