deepparag commited on
Commit
a871b0e
1 Parent(s): 63f7260
Files changed (2) hide show
  1. config.json +18 -54
  2. pytorch_model.bin +2 -2
config.json CHANGED
@@ -1,74 +1,38 @@
1
  {
2
- "_name_or_path": "EleutherAI/gpt-neo-1.3B",
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
- "GPTNeoForCausalLM"
6
- ],
7
- "attention_dropout": 0,
8
- "attention_layers": [
9
- "global",
10
- "local",
11
- "global",
12
- "local",
13
- "global",
14
- "local",
15
- "global",
16
- "local",
17
- "global",
18
- "local",
19
- "global",
20
- "local",
21
- "global",
22
- "local",
23
- "global",
24
- "local",
25
- "global",
26
- "local",
27
- "global",
28
- "local",
29
- "global",
30
- "local",
31
- "global",
32
- "local"
33
- ],
34
- "attention_types": [
35
- [
36
- [
37
- "global",
38
- "local"
39
- ],
40
- 12
41
- ]
42
  ],
 
43
  "bos_token_id": 50256,
44
- "embed_dropout": 0,
45
  "eos_token_id": 50256,
46
- "gradient_checkpointing": false,
47
- "hidden_size": 2048,
48
  "initializer_range": 0.02,
49
- "intermediate_size": null,
50
  "layer_norm_epsilon": 1e-05,
51
- "max_position_embeddings": 2048,
52
- "model_type": "gpt_neo",
53
- "num_heads": 16,
54
- "num_layers": 24,
55
- "resid_dropout": 0,
 
 
 
 
 
 
56
  "summary_activation": null,
57
  "summary_first_dropout": 0.1,
58
  "summary_proj_to_labels": true,
59
  "summary_type": "cls_index",
60
  "summary_use_proj": true,
61
  "task_specific_params": {
62
- "text-generation": {
63
- "do_sample": true,
64
- "max_length": 50,
65
- "temperature": 0.9
66
  }
67
  },
68
- "tokenizer_class": "GPT2Tokenizer",
69
  "torch_dtype": "float32",
70
  "transformers_version": "4.15.0",
71
  "use_cache": true,
72
- "vocab_size": 50257,
73
- "window_size": 256
74
  }
 
1
  {
2
+ "_name_or_path": "microsoft/DialoGPT-large",
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
+ "GPT2LMHeadModel"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  ],
7
+ "attn_pdrop": 0.1,
8
  "bos_token_id": 50256,
9
+ "embd_pdrop": 0.1,
10
  "eos_token_id": 50256,
 
 
11
  "initializer_range": 0.02,
 
12
  "layer_norm_epsilon": 1e-05,
13
+ "model_type": "gpt2",
14
+ "n_ctx": 1024,
15
+ "n_embd": 1280,
16
+ "n_head": 20,
17
+ "n_inner": null,
18
+ "n_layer": 36,
19
+ "n_positions": 1024,
20
+ "reorder_and_upcast_attn": false,
21
+ "resid_pdrop": 0.1,
22
+ "scale_attn_by_inverse_layer_idx": false,
23
+ "scale_attn_weights": true,
24
  "summary_activation": null,
25
  "summary_first_dropout": 0.1,
26
  "summary_proj_to_labels": true,
27
  "summary_type": "cls_index",
28
  "summary_use_proj": true,
29
  "task_specific_params": {
30
+ "conversational": {
31
+ "max_length": 1000
 
 
32
  }
33
  },
 
34
  "torch_dtype": "float32",
35
  "transformers_version": "4.15.0",
36
  "use_cache": true,
37
+ "vocab_size": 50257
 
38
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9e9ad5cb419a610e647b85d5aa0163ebf909f61a11fb21170e0d5fddd52859c9
3
- size 5363100545
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b23e9eab2b440e18612a2ab19505a7bf128c346a441804c21bcedd1076fca4ac
3
+ size 3134045897