LongshenOu
commited on
Commit
•
149c9ba
1
Parent(s):
41f1394
End of training
Browse files- README.md +54 -0
- config.json +39 -0
- generation_config.json +6 -0
- model.safetensors +3 -0
- runs/Jun20_05-01-14_smc-gpu3/events.out.tfevents.1718859678.smc-gpu3.126680.0 +3 -0
- runs/Jun20_05-02-10_smc-gpu3/events.out.tfevents.1718859732.smc-gpu3.126680.1 +3 -0
- runs/Jun20_05-08-32_smc-gpu3/events.out.tfevents.1718860116.smc-gpu3.126680.2 +3 -0
- runs/Jun20_05-10-31_smc-gpu3/events.out.tfevents.1718860236.smc-gpu3.126680.3 +3 -0
- runs/Jun20_05-12-09_smc-gpu3/events.out.tfevents.1718860332.smc-gpu3.126680.4 +3 -0
- runs/Jun20_05-13-16_smc-gpu3/events.out.tfevents.1718860400.smc-gpu3.131587.0 +3 -0
- runs/Jun20_05-15-24_smc-gpu3/events.out.tfevents.1718860528.smc-gpu3.132607.0 +3 -0
- runs/Jun20_05-15-58_smc-gpu3/events.out.tfevents.1718860560.smc-gpu3.132607.1 +3 -0
- runs/Jun20_05-16-49_smc-gpu3/events.out.tfevents.1718860613.smc-gpu3.133223.0 +3 -0
- runs/Jun20_05-23-07_smc-gpu3/events.out.tfevents.1718860997.smc-gpu3.133223.1 +3 -0
- runs/Jun20_05-23-58_smc-gpu3/events.out.tfevents.1718861041.smc-gpu3.136973.0 +3 -0
- runs/Jun20_05-24-18_smc-gpu3/events.out.tfevents.1718861061.smc-gpu3.136973.1 +3 -0
- runs/Jun20_05-34-30_smc-gpu3/events.out.tfevents.1718861672.smc-gpu3.136973.2 +3 -0
- runs/Jun20_05-35-05_smc-gpu3/events.out.tfevents.1718861707.smc-gpu3.139838.0 +3 -0
- runs/Jun20_05-51-37_smc-gpu3/events.out.tfevents.1718862701.smc-gpu3.148769.0 +3 -0
- runs/Jun20_05-52-57_smc-gpu3/events.out.tfevents.1718862781.smc-gpu3.149973.0 +3 -0
- runs/Jun20_06-07-03_smc-gpu3/events.out.tfevents.1718863626.smc-gpu3.149973.1 +3 -0
- runs/Jun20_06-16-58_smc-gpu3/events.out.tfevents.1718864221.smc-gpu3.149973.2 +3 -0
- runs/Jun20_06-17-48_smc-gpu3/events.out.tfevents.1718864270.smc-gpu3.149973.3 +3 -0
- runs/Jun20_06-18-35_smc-gpu3/events.out.tfevents.1718864318.smc-gpu3.149973.4 +3 -0
- runs/Jun20_06-19-31_smc-gpu3/events.out.tfevents.1718864374.smc-gpu3.161446.0 +3 -0
- runs/Jun20_06-20-37_smc-gpu3/events.out.tfevents.1718864441.smc-gpu3.161972.0 +3 -0
- runs/Jun20_09-20-46_smc-gpu3/events.out.tfevents.1718875248.smc-gpu3.205648.0 +3 -0
- runs/Jun20_09-55-03_smc-gpu3/events.out.tfevents.1718877306.smc-gpu3.205648.1 +3 -0
- special_tokens_map.json +45 -0
- tokenizer.json +0 -0
- tokenizer_config.json +0 -0
- training_args.bin +3 -0
README.md
ADDED
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
tags:
|
3 |
+
- generated_from_trainer
|
4 |
+
model-index:
|
5 |
+
- name: m2m_pt
|
6 |
+
results: []
|
7 |
+
---
|
8 |
+
|
9 |
+
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
10 |
+
should probably proofread and complete it, then remove this comment. -->
|
11 |
+
|
12 |
+
# m2m_pt
|
13 |
+
|
14 |
+
This model was trained from scratch on an unknown dataset.
|
15 |
+
|
16 |
+
## Model description
|
17 |
+
|
18 |
+
More information needed
|
19 |
+
|
20 |
+
## Intended uses & limitations
|
21 |
+
|
22 |
+
More information needed
|
23 |
+
|
24 |
+
## Training and evaluation data
|
25 |
+
|
26 |
+
More information needed
|
27 |
+
|
28 |
+
## Training procedure
|
29 |
+
|
30 |
+
### Training hyperparameters
|
31 |
+
|
32 |
+
The following hyperparameters were used during training:
|
33 |
+
- learning_rate: 0.0005
|
34 |
+
- train_batch_size: 12
|
35 |
+
- eval_batch_size: 12
|
36 |
+
- seed: 42
|
37 |
+
- gradient_accumulation_steps: 8
|
38 |
+
- total_train_batch_size: 96
|
39 |
+
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
40 |
+
- lr_scheduler_type: cosine
|
41 |
+
- lr_scheduler_warmup_steps: 1000
|
42 |
+
- num_epochs: 1
|
43 |
+
- mixed_precision_training: Native AMP
|
44 |
+
|
45 |
+
### Training results
|
46 |
+
|
47 |
+
|
48 |
+
|
49 |
+
### Framework versions
|
50 |
+
|
51 |
+
- Transformers 4.40.0.dev0
|
52 |
+
- Pytorch 2.0.1+cu117
|
53 |
+
- Datasets 2.20.0
|
54 |
+
- Tokenizers 0.15.2
|
config.json
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "test_model",
|
3 |
+
"activation_function": "gelu_new",
|
4 |
+
"architectures": [
|
5 |
+
"GPT2LMHeadModel"
|
6 |
+
],
|
7 |
+
"attn_pdrop": 0.1,
|
8 |
+
"bos_token_id": 2,
|
9 |
+
"embd_pdrop": 0.1,
|
10 |
+
"eos_token_id": 1,
|
11 |
+
"initializer_range": 0.02,
|
12 |
+
"layer_norm_epsilon": 1e-05,
|
13 |
+
"model_type": "gpt2",
|
14 |
+
"n_ctx": 2048,
|
15 |
+
"n_embd": 768,
|
16 |
+
"n_head": 16,
|
17 |
+
"n_inner": null,
|
18 |
+
"n_layer": 12,
|
19 |
+
"n_positions": 2048,
|
20 |
+
"reorder_and_upcast_attn": false,
|
21 |
+
"resid_pdrop": 0.1,
|
22 |
+
"scale_attn_by_inverse_layer_idx": false,
|
23 |
+
"scale_attn_weights": true,
|
24 |
+
"summary_activation": null,
|
25 |
+
"summary_first_dropout": 0.1,
|
26 |
+
"summary_proj_to_labels": true,
|
27 |
+
"summary_type": "cls_index",
|
28 |
+
"summary_use_proj": true,
|
29 |
+
"task_specific_params": {
|
30 |
+
"text-generation": {
|
31 |
+
"do_sample": true,
|
32 |
+
"max_length": 50
|
33 |
+
}
|
34 |
+
},
|
35 |
+
"torch_dtype": "bfloat16",
|
36 |
+
"transformers_version": "4.40.0.dev0",
|
37 |
+
"use_cache": true,
|
38 |
+
"vocab_size": 989
|
39 |
+
}
|
generation_config.json
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_from_model_config": true,
|
3 |
+
"bos_token_id": 2,
|
4 |
+
"eos_token_id": 1,
|
5 |
+
"transformers_version": "4.40.0.dev0"
|
6 |
+
}
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:03683952ad3cd90b64018db92971f35c661a578c2fc983be2bb6413b0136083f
|
3 |
+
size 174791872
|
runs/Jun20_05-01-14_smc-gpu3/events.out.tfevents.1718859678.smc-gpu3.126680.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0dc515b5d81bc6f582e11aa26c4c043c3184add77554aaf7752bad42487a28a7
|
3 |
+
size 4860
|
runs/Jun20_05-02-10_smc-gpu3/events.out.tfevents.1718859732.smc-gpu3.126680.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f1d5a45daa85458bf6f9af05cbe2ec7eff128d3f5d19289370edee59beb416da
|
3 |
+
size 5067
|
runs/Jun20_05-08-32_smc-gpu3/events.out.tfevents.1718860116.smc-gpu3.126680.2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:66a7dddc63da4f02c87f81b6af8d94168630fac119828dfe4abac8b3a58df3ee
|
3 |
+
size 5068
|
runs/Jun20_05-10-31_smc-gpu3/events.out.tfevents.1718860236.smc-gpu3.126680.3
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5571d0d15e90fe280cd331e5c6a1212d618b6a34e59dd960337d02db865d93e2
|
3 |
+
size 5489
|
runs/Jun20_05-12-09_smc-gpu3/events.out.tfevents.1718860332.smc-gpu3.126680.4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4a44ed9b1fc39baab3000d69d6596338a4265270cdc9e051d49e5124b1529baa
|
3 |
+
size 4868
|
runs/Jun20_05-13-16_smc-gpu3/events.out.tfevents.1718860400.smc-gpu3.131587.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3f9461b9f8a3a2d3c292fe82e6c9bac710c0673ff919a525507d2aa12aeeff6e
|
3 |
+
size 5282
|
runs/Jun20_05-15-24_smc-gpu3/events.out.tfevents.1718860528.smc-gpu3.132607.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8a268947c46b4814499aed3b3c55da50c612a7e6e9ae1098620672d17e88242c
|
3 |
+
size 4867
|
runs/Jun20_05-15-58_smc-gpu3/events.out.tfevents.1718860560.smc-gpu3.132607.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eb8c47f3c175eb01ecef844aa7056a02652439bdbfaf86ce4eb900e0a4e6ad78
|
3 |
+
size 4866
|
runs/Jun20_05-16-49_smc-gpu3/events.out.tfevents.1718860613.smc-gpu3.133223.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:90fc75958cb63e606a9198f45c57d8e5a32c11423076a2b45f95314c4685783d
|
3 |
+
size 7557
|
runs/Jun20_05-23-07_smc-gpu3/events.out.tfevents.1718860997.smc-gpu3.133223.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:254caf1b2a5b61a6c5c72edb6032b2f287e2884d3cd68450a57cf07cf59a79f3
|
3 |
+
size 4867
|
runs/Jun20_05-23-58_smc-gpu3/events.out.tfevents.1718861041.smc-gpu3.136973.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ce324d454b876f24397abe7709e54ba59ed64d73401b6b39332c523bc018fe44
|
3 |
+
size 4867
|
runs/Jun20_05-24-18_smc-gpu3/events.out.tfevents.1718861061.smc-gpu3.136973.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:94653ee8f46cae73a464aa6d7adf9f230db342cb9bd3a60153041fb68a774738
|
3 |
+
size 6937
|
runs/Jun20_05-34-30_smc-gpu3/events.out.tfevents.1718861672.smc-gpu3.136973.2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:db7fee3c70d9ccef273c22490be53011b8a09dd347e44eec94e2c9de814c6711
|
3 |
+
size 4867
|
runs/Jun20_05-35-05_smc-gpu3/events.out.tfevents.1718861707.smc-gpu3.139838.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c61cfd43e1919d0500b225de480dc52ba1bfc5d24a56fcea48c10d5166c4c9df
|
3 |
+
size 12785
|
runs/Jun20_05-51-37_smc-gpu3/events.out.tfevents.1718862701.smc-gpu3.148769.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8832db673c85e4d03579b074acfdf519467789bf0eeeb17c345fedecc4a758ad
|
3 |
+
size 4867
|
runs/Jun20_05-52-57_smc-gpu3/events.out.tfevents.1718862781.smc-gpu3.149973.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c4f55da7b6235701f86e24ba98714149934ddbe07bb7a02ca1551a230f02a081
|
3 |
+
size 4867
|
runs/Jun20_06-07-03_smc-gpu3/events.out.tfevents.1718863626.smc-gpu3.149973.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d35e4bf1be747abb3cf71b8d865ff279340efefef5e22b5fc701f5e286163b93
|
3 |
+
size 7765
|
runs/Jun20_06-16-58_smc-gpu3/events.out.tfevents.1718864221.smc-gpu3.149973.2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:41b7dba08777ea81e80cb5a0f25117b025f9aab932acc2db6b3bc28ea718b943
|
3 |
+
size 5902
|
runs/Jun20_06-17-48_smc-gpu3/events.out.tfevents.1718864270.smc-gpu3.149973.3
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:27e5477df0ea0ac9eb7157becdcb77e024742d0cf2a99853c8ecd88956072ca8
|
3 |
+
size 5281
|
runs/Jun20_06-18-35_smc-gpu3/events.out.tfevents.1718864318.smc-gpu3.149973.4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d9e73c1cc1f4d52ba60cba1b9727428d2208f4be16e6ce16257b13a35a972f98
|
3 |
+
size 4867
|
runs/Jun20_06-19-31_smc-gpu3/events.out.tfevents.1718864374.smc-gpu3.161446.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:68acc57a4eb7af1df5be0b0e4f961fd46abeee1b76e19d6f192491012e14716b
|
3 |
+
size 4867
|
runs/Jun20_06-20-37_smc-gpu3/events.out.tfevents.1718864441.smc-gpu3.161972.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e06f69a2ffe598ee79a3250838abc028b92a09339de2ded336c90c3dfac04486
|
3 |
+
size 5902
|
runs/Jun20_09-20-46_smc-gpu3/events.out.tfevents.1718875248.smc-gpu3.205648.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ee02ca5daabf5202b0f6dc8c7e1c10b0de1d7cd13c8614698975ec204a563b85
|
3 |
+
size 20592
|
runs/Jun20_09-55-03_smc-gpu3/events.out.tfevents.1718877306.smc-gpu3.205648.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cb86d2f915855f0f640b880bf5421119f93466c52c43479d2763278513c9beab
|
3 |
+
size 5215
|
special_tokens_map.json
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": {
|
3 |
+
"content": "[BOS]",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"cls_token": {
|
10 |
+
"content": "[CLS]",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"eos_token": {
|
17 |
+
"content": "[EOS]",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": false,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": false
|
22 |
+
},
|
23 |
+
"mask_token": {
|
24 |
+
"content": "[MASK]",
|
25 |
+
"lstrip": false,
|
26 |
+
"normalized": false,
|
27 |
+
"rstrip": false,
|
28 |
+
"single_word": false
|
29 |
+
},
|
30 |
+
"pad_token": "[EOS]",
|
31 |
+
"sep_token": {
|
32 |
+
"content": "[SEP]",
|
33 |
+
"lstrip": false,
|
34 |
+
"normalized": false,
|
35 |
+
"rstrip": false,
|
36 |
+
"single_word": false
|
37 |
+
},
|
38 |
+
"unk_token": {
|
39 |
+
"content": "[UNK]",
|
40 |
+
"lstrip": false,
|
41 |
+
"normalized": false,
|
42 |
+
"rstrip": false,
|
43 |
+
"single_word": false
|
44 |
+
}
|
45 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9aefdecf499035c9c781d3ddc8de54d72659e515cafd3b917f4130b179bd3657
|
3 |
+
size 4475
|