wangfan commited on
Commit
b49528a
1 Parent(s): 84c9101

1st commit

Browse files
Files changed (6) hide show
  1. README.md +23 -0
  2. config.json +29 -0
  3. special_tokens_map.json +1 -0
  4. tokenizer.json +0 -0
  5. tokenizer_config.json +1 -0
  6. vocab.txt +0 -0
README.md ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 在众多业务中,越来越频繁的使用预训练语言模型(Pre-trained Language Models),为了在金融场景下各任务中取得更好效果,我们发布了jdt-fin-roberta-wwm模型
2
+
3
+ ## 模型
4
+ * `base模型`:12-layer, 768-hidden, 12-heads, 110M parameters
5
+
6
+ | 模型简称 | 语料 | 京盘下载 |
7
+ | - | - | - |
8
+ | fin-roberta-wwm | 金融语料 | - |
9
+
10
+ ## 快速加载
11
+ ### 使用Huggingface-Transformers
12
+ 依托于[Huggingface-Transformers](https://github.com/huggingface/transformers),可轻松调用以上模型。
13
+ ```
14
+ tokenizer = BertTokenizer.from_pretrained("MODEL_NAME")
15
+ model = BertModel.from_pretrained("MODEL_NAME")
16
+ ```
17
+ **注意:本目录中的所有模型均使用BertTokenizer以及BertModel加载,请勿使用RobertaTokenizer/RobertaModel!**
18
+ 其中`MODEL_NAME`对应列表如下:
19
+
20
+ | 模型名 | MODEL_NAME |
21
+ | - | - |
22
+ | fin-roberta-wwm | wangfan/jdt-fin-roberta-wwm |
23
+
config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertForMaskedLM"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "directionality": "bidi",
7
+ "gradient_checkpointing": false,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 768,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 3072,
13
+ "layer_norm_eps": 1e-12,
14
+ "max_position_embeddings": 512,
15
+ "model_type": "bert",
16
+ "num_attention_heads": 12,
17
+ "num_hidden_layers": 12,
18
+ "pad_token_id": 0,
19
+ "pooler_fc_size": 768,
20
+ "pooler_num_attention_heads": 12,
21
+ "pooler_num_fc_layers": 3,
22
+ "pooler_size_per_head": 128,
23
+ "pooler_type": "first_token_transform",
24
+ "position_embedding_type": "absolute",
25
+ "transformers_version": "4.6.1",
26
+ "type_vocab_size": 2,
27
+ "use_cache": true,
28
+ "vocab_size": 21128
29
+ }
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
1
+ {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
1
+ {"do_lower_case": true, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "special_tokens_map_file": null}
vocab.txt ADDED
The diff for this file is too large to render. See raw diff