LongshenOu commited on
Commit
149c9ba
1 Parent(s): 41f1394

End of training

Browse files
Files changed (32) hide show
  1. README.md +54 -0
  2. config.json +39 -0
  3. generation_config.json +6 -0
  4. model.safetensors +3 -0
  5. runs/Jun20_05-01-14_smc-gpu3/events.out.tfevents.1718859678.smc-gpu3.126680.0 +3 -0
  6. runs/Jun20_05-02-10_smc-gpu3/events.out.tfevents.1718859732.smc-gpu3.126680.1 +3 -0
  7. runs/Jun20_05-08-32_smc-gpu3/events.out.tfevents.1718860116.smc-gpu3.126680.2 +3 -0
  8. runs/Jun20_05-10-31_smc-gpu3/events.out.tfevents.1718860236.smc-gpu3.126680.3 +3 -0
  9. runs/Jun20_05-12-09_smc-gpu3/events.out.tfevents.1718860332.smc-gpu3.126680.4 +3 -0
  10. runs/Jun20_05-13-16_smc-gpu3/events.out.tfevents.1718860400.smc-gpu3.131587.0 +3 -0
  11. runs/Jun20_05-15-24_smc-gpu3/events.out.tfevents.1718860528.smc-gpu3.132607.0 +3 -0
  12. runs/Jun20_05-15-58_smc-gpu3/events.out.tfevents.1718860560.smc-gpu3.132607.1 +3 -0
  13. runs/Jun20_05-16-49_smc-gpu3/events.out.tfevents.1718860613.smc-gpu3.133223.0 +3 -0
  14. runs/Jun20_05-23-07_smc-gpu3/events.out.tfevents.1718860997.smc-gpu3.133223.1 +3 -0
  15. runs/Jun20_05-23-58_smc-gpu3/events.out.tfevents.1718861041.smc-gpu3.136973.0 +3 -0
  16. runs/Jun20_05-24-18_smc-gpu3/events.out.tfevents.1718861061.smc-gpu3.136973.1 +3 -0
  17. runs/Jun20_05-34-30_smc-gpu3/events.out.tfevents.1718861672.smc-gpu3.136973.2 +3 -0
  18. runs/Jun20_05-35-05_smc-gpu3/events.out.tfevents.1718861707.smc-gpu3.139838.0 +3 -0
  19. runs/Jun20_05-51-37_smc-gpu3/events.out.tfevents.1718862701.smc-gpu3.148769.0 +3 -0
  20. runs/Jun20_05-52-57_smc-gpu3/events.out.tfevents.1718862781.smc-gpu3.149973.0 +3 -0
  21. runs/Jun20_06-07-03_smc-gpu3/events.out.tfevents.1718863626.smc-gpu3.149973.1 +3 -0
  22. runs/Jun20_06-16-58_smc-gpu3/events.out.tfevents.1718864221.smc-gpu3.149973.2 +3 -0
  23. runs/Jun20_06-17-48_smc-gpu3/events.out.tfevents.1718864270.smc-gpu3.149973.3 +3 -0
  24. runs/Jun20_06-18-35_smc-gpu3/events.out.tfevents.1718864318.smc-gpu3.149973.4 +3 -0
  25. runs/Jun20_06-19-31_smc-gpu3/events.out.tfevents.1718864374.smc-gpu3.161446.0 +3 -0
  26. runs/Jun20_06-20-37_smc-gpu3/events.out.tfevents.1718864441.smc-gpu3.161972.0 +3 -0
  27. runs/Jun20_09-20-46_smc-gpu3/events.out.tfevents.1718875248.smc-gpu3.205648.0 +3 -0
  28. runs/Jun20_09-55-03_smc-gpu3/events.out.tfevents.1718877306.smc-gpu3.205648.1 +3 -0
  29. special_tokens_map.json +45 -0
  30. tokenizer.json +0 -0
  31. tokenizer_config.json +0 -0
  32. training_args.bin +3 -0
README.md ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - generated_from_trainer
4
+ model-index:
5
+ - name: m2m_pt
6
+ results: []
7
+ ---
8
+
9
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
10
+ should probably proofread and complete it, then remove this comment. -->
11
+
12
+ # m2m_pt
13
+
14
+ This model was trained from scratch on an unknown dataset.
15
+
16
+ ## Model description
17
+
18
+ More information needed
19
+
20
+ ## Intended uses & limitations
21
+
22
+ More information needed
23
+
24
+ ## Training and evaluation data
25
+
26
+ More information needed
27
+
28
+ ## Training procedure
29
+
30
+ ### Training hyperparameters
31
+
32
+ The following hyperparameters were used during training:
33
+ - learning_rate: 0.0005
34
+ - train_batch_size: 12
35
+ - eval_batch_size: 12
36
+ - seed: 42
37
+ - gradient_accumulation_steps: 8
38
+ - total_train_batch_size: 96
39
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
40
+ - lr_scheduler_type: cosine
41
+ - lr_scheduler_warmup_steps: 1000
42
+ - num_epochs: 1
43
+ - mixed_precision_training: Native AMP
44
+
45
+ ### Training results
46
+
47
+
48
+
49
+ ### Framework versions
50
+
51
+ - Transformers 4.40.0.dev0
52
+ - Pytorch 2.0.1+cu117
53
+ - Datasets 2.20.0
54
+ - Tokenizers 0.15.2
config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "test_model",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 2,
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 1,
11
+ "initializer_range": 0.02,
12
+ "layer_norm_epsilon": 1e-05,
13
+ "model_type": "gpt2",
14
+ "n_ctx": 2048,
15
+ "n_embd": 768,
16
+ "n_head": 16,
17
+ "n_inner": null,
18
+ "n_layer": 12,
19
+ "n_positions": 2048,
20
+ "reorder_and_upcast_attn": false,
21
+ "resid_pdrop": 0.1,
22
+ "scale_attn_by_inverse_layer_idx": false,
23
+ "scale_attn_weights": true,
24
+ "summary_activation": null,
25
+ "summary_first_dropout": 0.1,
26
+ "summary_proj_to_labels": true,
27
+ "summary_type": "cls_index",
28
+ "summary_use_proj": true,
29
+ "task_specific_params": {
30
+ "text-generation": {
31
+ "do_sample": true,
32
+ "max_length": 50
33
+ }
34
+ },
35
+ "torch_dtype": "bfloat16",
36
+ "transformers_version": "4.40.0.dev0",
37
+ "use_cache": true,
38
+ "vocab_size": 989
39
+ }
generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 2,
4
+ "eos_token_id": 1,
5
+ "transformers_version": "4.40.0.dev0"
6
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03683952ad3cd90b64018db92971f35c661a578c2fc983be2bb6413b0136083f
3
+ size 174791872
runs/Jun20_05-01-14_smc-gpu3/events.out.tfevents.1718859678.smc-gpu3.126680.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0dc515b5d81bc6f582e11aa26c4c043c3184add77554aaf7752bad42487a28a7
3
+ size 4860
runs/Jun20_05-02-10_smc-gpu3/events.out.tfevents.1718859732.smc-gpu3.126680.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1d5a45daa85458bf6f9af05cbe2ec7eff128d3f5d19289370edee59beb416da
3
+ size 5067
runs/Jun20_05-08-32_smc-gpu3/events.out.tfevents.1718860116.smc-gpu3.126680.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66a7dddc63da4f02c87f81b6af8d94168630fac119828dfe4abac8b3a58df3ee
3
+ size 5068
runs/Jun20_05-10-31_smc-gpu3/events.out.tfevents.1718860236.smc-gpu3.126680.3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5571d0d15e90fe280cd331e5c6a1212d618b6a34e59dd960337d02db865d93e2
3
+ size 5489
runs/Jun20_05-12-09_smc-gpu3/events.out.tfevents.1718860332.smc-gpu3.126680.4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a44ed9b1fc39baab3000d69d6596338a4265270cdc9e051d49e5124b1529baa
3
+ size 4868
runs/Jun20_05-13-16_smc-gpu3/events.out.tfevents.1718860400.smc-gpu3.131587.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f9461b9f8a3a2d3c292fe82e6c9bac710c0673ff919a525507d2aa12aeeff6e
3
+ size 5282
runs/Jun20_05-15-24_smc-gpu3/events.out.tfevents.1718860528.smc-gpu3.132607.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a268947c46b4814499aed3b3c55da50c612a7e6e9ae1098620672d17e88242c
3
+ size 4867
runs/Jun20_05-15-58_smc-gpu3/events.out.tfevents.1718860560.smc-gpu3.132607.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb8c47f3c175eb01ecef844aa7056a02652439bdbfaf86ce4eb900e0a4e6ad78
3
+ size 4866
runs/Jun20_05-16-49_smc-gpu3/events.out.tfevents.1718860613.smc-gpu3.133223.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90fc75958cb63e606a9198f45c57d8e5a32c11423076a2b45f95314c4685783d
3
+ size 7557
runs/Jun20_05-23-07_smc-gpu3/events.out.tfevents.1718860997.smc-gpu3.133223.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:254caf1b2a5b61a6c5c72edb6032b2f287e2884d3cd68450a57cf07cf59a79f3
3
+ size 4867
runs/Jun20_05-23-58_smc-gpu3/events.out.tfevents.1718861041.smc-gpu3.136973.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce324d454b876f24397abe7709e54ba59ed64d73401b6b39332c523bc018fe44
3
+ size 4867
runs/Jun20_05-24-18_smc-gpu3/events.out.tfevents.1718861061.smc-gpu3.136973.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94653ee8f46cae73a464aa6d7adf9f230db342cb9bd3a60153041fb68a774738
3
+ size 6937
runs/Jun20_05-34-30_smc-gpu3/events.out.tfevents.1718861672.smc-gpu3.136973.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db7fee3c70d9ccef273c22490be53011b8a09dd347e44eec94e2c9de814c6711
3
+ size 4867
runs/Jun20_05-35-05_smc-gpu3/events.out.tfevents.1718861707.smc-gpu3.139838.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c61cfd43e1919d0500b225de480dc52ba1bfc5d24a56fcea48c10d5166c4c9df
3
+ size 12785
runs/Jun20_05-51-37_smc-gpu3/events.out.tfevents.1718862701.smc-gpu3.148769.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8832db673c85e4d03579b074acfdf519467789bf0eeeb17c345fedecc4a758ad
3
+ size 4867
runs/Jun20_05-52-57_smc-gpu3/events.out.tfevents.1718862781.smc-gpu3.149973.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4f55da7b6235701f86e24ba98714149934ddbe07bb7a02ca1551a230f02a081
3
+ size 4867
runs/Jun20_06-07-03_smc-gpu3/events.out.tfevents.1718863626.smc-gpu3.149973.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d35e4bf1be747abb3cf71b8d865ff279340efefef5e22b5fc701f5e286163b93
3
+ size 7765
runs/Jun20_06-16-58_smc-gpu3/events.out.tfevents.1718864221.smc-gpu3.149973.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41b7dba08777ea81e80cb5a0f25117b025f9aab932acc2db6b3bc28ea718b943
3
+ size 5902
runs/Jun20_06-17-48_smc-gpu3/events.out.tfevents.1718864270.smc-gpu3.149973.3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:27e5477df0ea0ac9eb7157becdcb77e024742d0cf2a99853c8ecd88956072ca8
3
+ size 5281
runs/Jun20_06-18-35_smc-gpu3/events.out.tfevents.1718864318.smc-gpu3.149973.4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9e73c1cc1f4d52ba60cba1b9727428d2208f4be16e6ce16257b13a35a972f98
3
+ size 4867
runs/Jun20_06-19-31_smc-gpu3/events.out.tfevents.1718864374.smc-gpu3.161446.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68acc57a4eb7af1df5be0b0e4f961fd46abeee1b76e19d6f192491012e14716b
3
+ size 4867
runs/Jun20_06-20-37_smc-gpu3/events.out.tfevents.1718864441.smc-gpu3.161972.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e06f69a2ffe598ee79a3250838abc028b92a09339de2ded336c90c3dfac04486
3
+ size 5902
runs/Jun20_09-20-46_smc-gpu3/events.out.tfevents.1718875248.smc-gpu3.205648.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee02ca5daabf5202b0f6dc8c7e1c10b0de1d7cd13c8614698975ec204a563b85
3
+ size 20592
runs/Jun20_09-55-03_smc-gpu3/events.out.tfevents.1718877306.smc-gpu3.205648.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb86d2f915855f0f640b880bf5421119f93466c52c43479d2763278513c9beab
3
+ size 5215
special_tokens_map.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "[BOS]",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "[CLS]",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "[EOS]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "[MASK]",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": "[EOS]",
31
+ "sep_token": {
32
+ "content": "[SEP]",
33
+ "lstrip": false,
34
+ "normalized": false,
35
+ "rstrip": false,
36
+ "single_word": false
37
+ },
38
+ "unk_token": {
39
+ "content": "[UNK]",
40
+ "lstrip": false,
41
+ "normalized": false,
42
+ "rstrip": false,
43
+ "single_word": false
44
+ }
45
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
The diff for this file is too large to render. See raw diff
 
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9aefdecf499035c9c781d3ddc8de54d72659e515cafd3b917f4130b179bd3657
3
+ size 4475