patrickvonplaten commited on
Commit
df44cf0
1 Parent(s): 62b2465
Files changed (5) hide show
  1. README.md +26 -0
  2. config.json +59 -0
  3. merges.txt +0 -0
  4. pytorch_model.bin +3 -0
  5. vocab.json +0 -0
README.md ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - en
4
+ thumbnail:
5
+ tags:
6
+ - convAI
7
+ - conversational
8
+ - facebook
9
+ license: apache-2.0
10
+ datasets:
11
+ - blended_skill_talk
12
+ metrics:
13
+ - perplexity
14
+ ---
15
+
16
+ ## Model description
17
+
18
+ + Paper: [Recipes for building an open-domain chatbot](https://arxiv.org/abs/1907.06616)
19
+ + [Original PARLAI Code](https://parl.ai/projects/recipes/)
20
+
21
+
22
+ ### Abstract
23
+
24
+
25
+ Building open-domain chatbots is a challenging area for machine learning research. While prior work has shown that scaling neural models in the number of parameters and the size of the data they are trained on gives improved results, we show that other ingredients are important for a high-performing chatbot. Good conversation requires a number of skills that an expert conversationalist blends in a seamless way: providing engaging talking points and listening to their partners, both asking and answering questions, and displaying knowledge, empathy and personality appropriately, depending on the situation. We show that large scale models can learn these skills when given appropriate training data and choice of generation strategy. We build variants of these recipes with 90M, 2.7B and 9.4B parameter neural models, and make our models and code publicly available. Human evaluations show our best models are superior to existing approaches in multi-turn dialogue in terms of engagingness and humanness measurements. We then discuss the limitations of this work by analyzing failure cases of our models.
26
+
config.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "./",
3
+ "activation_dropout": 0.0,
4
+ "activation_function": "gelu",
5
+ "add_bias_logits": false,
6
+ "add_final_layer_norm": false,
7
+ "architectures": [
8
+ "BlenderbotSmallForConditionalGeneration"
9
+ ],
10
+ "attention_dropout": 0.0,
11
+ "bos_token_id": 1,
12
+ "classif_dropout": 0.0,
13
+ "classifier_dropout": 0.0,
14
+ "d_model": 512,
15
+ "decoder_attention_heads": 16,
16
+ "decoder_ffn_dim": 2048,
17
+ "decoder_layerdrop": 0.0,
18
+ "decoder_layers": 8,
19
+ "decoder_start_token_id": 1,
20
+ "do_blenderbot_90_layernorm": true,
21
+ "dropout": 0.1,
22
+ "encoder_attention_heads": 16,
23
+ "encoder_ffn_dim": 2048,
24
+ "encoder_layerdrop": 0.0,
25
+ "encoder_layers": 8,
26
+ "eos_token_id": 2,
27
+ "extra_pos_embeddings": 0,
28
+ "force_bos_token_to_be_generated": false,
29
+ "gradient_checkpointing": false,
30
+ "id2label": {
31
+ "0": "LABEL_0",
32
+ "1": "LABEL_1",
33
+ "2": "LABEL_2"
34
+ },
35
+ "init_std": 0.02,
36
+ "is_encoder_decoder": true,
37
+ "label2id": {
38
+ "LABEL_0": 0,
39
+ "LABEL_1": 1,
40
+ "LABEL_2": 2
41
+ },
42
+ "layernorm_variant": "xlm",
43
+ "length_penalty": 0.65,
44
+ "max_length": 128,
45
+ "max_position_embeddings": 512,
46
+ "min_length": 20,
47
+ "model_type": "blenderbot-small",
48
+ "no_repeat_ngram_size": 3,
49
+ "normalize_before": false,
50
+ "normalize_embedding": true,
51
+ "num_beams": 10,
52
+ "num_hidden_layers": 8,
53
+ "pad_token_id": 0,
54
+ "scale_embedding": true,
55
+ "static_position_embeddings": false,
56
+ "unk_token_id": 3,
57
+ "use_cache": true,
58
+ "vocab_size": 54944
59
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f1a4ebe462768ee697bc4529af5baf7a5418df822bb900bc4e02af7b6eeae9d
3
+ size 350387079
vocab.json ADDED
The diff for this file is too large to render. See raw diff