Tongjilibo commited on
Commit
3e5558b
1 Parent(s): 782bf57

init commit

Browse files
Files changed (5) hide show
  1. README.md +5 -0
  2. bert4torch_config.json +16 -0
  3. convert.py +113 -0
  4. pytorch_model.bin +3 -0
  5. vocab.txt +0 -0
README.md CHANGED
@@ -1,3 +1,8 @@
1
  ---
2
  license: apache-2.0
3
  ---
 
 
 
 
 
 
1
  ---
2
  license: apache-2.0
3
  ---
4
+
5
+
6
+ - 源项目:https://github.com/ZhuiyiTechnology/t5-pegasus
7
+ - 本项目权重是下载tf权重后,使用convert脚本转换后得到的,可直接下载使用
8
+ - 此项目权重仅适配bert4torch项目
bert4torch_config.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "hidden_act": "gelu",
3
+ "hidden_dropout_prob": 0.1,
4
+ "hidden_size": 512,
5
+ "initializer_range": 0.02,
6
+ "intermediate_size": 1024,
7
+ "num_attention_heads": 6,
8
+ "attention_head_size": 64,
9
+ "num_hidden_layers": 8,
10
+ "vocab_size": 50000,
11
+ "relative_attention_num_buckets": 32,
12
+ "attention_scale": false,
13
+ "is_dropout": true,
14
+ "model": "mt5.1.1",
15
+ "segment_vocab_size": 0
16
+ }
convert.py ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # t5_pegasus从tf转为bert4torch适配的pytorch版本
2
+ # 权重链接:https://github.com/ZhuiyiTechnology/t5-pegasus
3
+ import torch
4
+ import tensorflow as tf
5
+ import json
6
+
7
+ choice = 'small'
8
+
9
+ if choice == 'small':
10
+ ckpt_dir = 'E:/pretrain_ckpt/t5/sushen@chinese_t5_pegasus_small_torch/'
11
+ tf_dir = 'E:/pretrain_ckpt/t5/sushen@chinese_t5_pegasus_small_tf/'
12
+ torch_path = ckpt_dir + 'pytorch_model.bin'
13
+ elif choice == 'base':
14
+ ckpt_dir = 'E:/pretrain_ckpt/t5/sushen@chinese_t5_pegasus_base_torch/'
15
+ tf_dir = 'E:/pretrain_ckpt/t5/sushen@chinese_t5_pegasus_base_tf/'
16
+ torch_path = ckpt_dir + 'pytorch_model.bin'
17
+ else:
18
+ raise ValueError(f'{choice} not in pre maintained choices')
19
+
20
+
21
+ tf_path = tf_dir + 'model.ckpt'
22
+ with open(tf_dir + 'config.json', 'r', encoding='utf-8') as f:
23
+ config = json.load(f)
24
+ num_layers = config['num_hidden_layers']
25
+ torch_state_dict = {}
26
+
27
+ mapping = {
28
+ 'shared/embedding': 'shared.weight',
29
+ 'encoder/block_000/layer_000/SelfAttention/relative_attention_bias': 'encoder.block.0.layer.0.SelfAttention.relative_attention_bias.weight##T', # 自定义标记,##T结尾表示要转置
30
+ 'encoder/rms_norm/scale': 'encoder.final_layer_norm.weight',
31
+ 'decoder/block_000/layer_000/SelfAttention/relative_attention_bias': 'decoder.block.0.layer.0.SelfAttention.relative_attention_bias.weight##T',
32
+ 'decoder/rms_norm/scale': 'decoder.final_layer_norm.weight',
33
+ 'decoder/logits/kernel': 'lm_head.weight##T'
34
+ }
35
+
36
+
37
+ for i in range(num_layers):
38
+ i1 = str(i).rjust(3, '0')
39
+ mapping.update({
40
+ f'encoder/block_{i1}/layer_000/SelfAttention/q': f'encoder.block.{i}.layer.0.SelfAttention.q.weight##T',
41
+ f'encoder/block_{i1}/layer_000/SelfAttention/k': f'encoder.block.{i}.layer.0.SelfAttention.k.weight##T',
42
+ f'encoder/block_{i1}/layer_000/SelfAttention/v': f'encoder.block.{i}.layer.0.SelfAttention.v.weight##T',
43
+ f'encoder/block_{i1}/layer_000/SelfAttention/o': f'encoder.block.{i}.layer.0.SelfAttention.o.weight##T',
44
+ f'encoder/block_{i1}/layer_000/rms_norm/scale': f'encoder.block.{i}.layer.0.layer_norm.weight',
45
+ f'encoder/block_{i1}/layer_001/DenseReluDense/wi_0/kernel': f'encoder.block.{i}.layer.1.DenseReluDense.wi_0.weight##T',
46
+ f'encoder/block_{i1}/layer_001/DenseReluDense/wi_1/kernel': f'encoder.block.{i}.layer.1.DenseReluDense.wi_1.weight##T',
47
+ f'encoder/block_{i1}/layer_001/DenseReluDense/wo/kernel': f'encoder.block.{i}.layer.1.DenseReluDense.wo.weight##T',
48
+ f'encoder/block_{i1}/layer_001/rms_norm/scale': f'encoder.block.{i}.layer.1.layer_norm.weight',
49
+ f'decoder/block_{i1}/layer_000/SelfAttention/q': f'decoder.block.{i}.layer.0.SelfAttention.q.weight##T',
50
+ f'decoder/block_{i1}/layer_000/SelfAttention/k': f'decoder.block.{i}.layer.0.SelfAttention.k.weight##T',
51
+ f'decoder/block_{i1}/layer_000/SelfAttention/v': f'decoder.block.{i}.layer.0.SelfAttention.v.weight##T',
52
+ f'decoder/block_{i1}/layer_000/SelfAttention/o': f'decoder.block.{i}.layer.0.SelfAttention.o.weight##T',
53
+ f'decoder/block_{i1}/layer_000/rms_norm/scale': f'decoder.block.{i}.layer.0.layer_norm.weight',
54
+ f'decoder/block_{i1}/layer_001/EncDecAttention/q': f'decoder.block.{i}.layer.1.EncDecAttention.q.weight##T',
55
+ f'decoder/block_{i1}/layer_001/EncDecAttention/k': f'decoder.block.{i}.layer.1.EncDecAttention.k.weight##T',
56
+ f'decoder/block_{i1}/layer_001/EncDecAttention/v': f'decoder.block.{i}.layer.1.EncDecAttention.v.weight##T',
57
+ f'decoder/block_{i1}/layer_001/EncDecAttention/o': f'decoder.block.{i}.layer.1.EncDecAttention.o.weight##T',
58
+ f'decoder/block_{i1}/layer_001/rms_norm/scale': f'decoder.block.{i}.layer.1.layer_norm.weight',
59
+ f'decoder/block_{i1}/layer_002/DenseReluDense/wi_0/kernel': f'decoder.block.{i}.layer.2.DenseReluDense.wi_0.weight##T',
60
+ f'decoder/block_{i1}/layer_002/DenseReluDense/wi_1/kernel': f'decoder.block.{i}.layer.2.DenseReluDense.wi_1.weight##T',
61
+ f'decoder/block_{i1}/layer_002/DenseReluDense/wo/kernel': f'decoder.block.{i}.layer.2.DenseReluDense.wo.weight##T',
62
+ f'decoder/block_{i1}/layer_002/rms_norm/scale': f'decoder.block.{i}.layer.2.layer_norm.weight',
63
+ })
64
+
65
+ transpose_layers = ['']
66
+ for k, v in mapping.items():
67
+ ts = torch.from_numpy(tf.train.load_variable(tf_path, k))
68
+ # if len(ts.shape)==2 and ts.shape[0] == ts.shape[1]:
69
+ # print(k, v)
70
+
71
+ if v.endswith('##T'):
72
+ torch_state_dict[v.rstrip('##T')] = ts.T
73
+ else:
74
+ torch_state_dict[v] = ts
75
+
76
+ torch.save(torch_state_dict, torch_path)
77
+
78
+ if choice == 'base':
79
+ config = \
80
+ {
81
+ "hidden_act": "gelu",
82
+ "hidden_dropout_prob": 0.1,
83
+ "hidden_size": 768,
84
+ "initializer_range": 0.02,
85
+ "intermediate_size": 2048,
86
+ "num_attention_heads": 12,
87
+ "attention_head_size": 64,
88
+ "num_hidden_layers": 12,
89
+ "vocab_size": 50000,
90
+ "relative_attention_num_buckets": 32,
91
+ "attention_scale": False,
92
+ "is_dropout": True
93
+ }
94
+
95
+ elif choice == 'small':
96
+ config = \
97
+ {
98
+ "hidden_act": "gelu",
99
+ "hidden_dropout_prob": 0.1,
100
+ "hidden_size": 512,
101
+ "initializer_range": 0.02,
102
+ "intermediate_size": 1024,
103
+ "num_attention_heads": 6,
104
+ "attention_head_size": 64,
105
+ "num_hidden_layers": 8,
106
+ "vocab_size": 50000,
107
+ "relative_attention_num_buckets": 32,
108
+ "attention_scale": False,
109
+ "is_dropout": True
110
+ }
111
+
112
+ with open(ckpt_dir+'/bert4torch_config.json', 'w') as f:
113
+ f.write(json.dumps(config, indent=4))
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1deb82235e25b61ff64c61f0dae488b511f32ced08ce73f991111ccbbbff8a86
3
+ size 381109253
vocab.txt ADDED
The diff for this file is too large to render. See raw diff