koziev ilya commited on
Commit
8bb1806
1 Parent(s): 3468ac6

uploading better model with 760 mln params

Browse files
Files changed (3) hide show
  1. config.json +5 -5
  2. pytorch_model.bin +2 -2
  3. tokenizer_config.json +1 -1
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "sberbank-ai/rugpt3small_based_on_gpt2",
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
  "GPT2LMHeadModel"
@@ -13,10 +13,10 @@
13
  "layer_norm_epsilon": 1e-05,
14
  "model_type": "gpt2",
15
  "n_ctx": 2048,
16
- "n_embd": 768,
17
- "n_head": 12,
18
  "n_inner": null,
19
- "n_layer": 12,
20
  "n_positions": 2048,
21
  "reorder_and_upcast_attn": false,
22
  "resid_pdrop": 0.1,
@@ -28,7 +28,7 @@
28
  "summary_type": "cls_index",
29
  "summary_use_proj": true,
30
  "torch_dtype": "float32",
31
- "transformers_version": "4.21.3",
32
  "use_cache": true,
33
  "vocab_size": 50258
34
  }
1
  {
2
+ "_name_or_path": "sberbank-ai/rugpt3large_based_on_gpt2",
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
  "GPT2LMHeadModel"
13
  "layer_norm_epsilon": 1e-05,
14
  "model_type": "gpt2",
15
  "n_ctx": 2048,
16
+ "n_embd": 1536,
17
+ "n_head": 16,
18
  "n_inner": null,
19
+ "n_layer": 24,
20
  "n_positions": 2048,
21
  "reorder_and_upcast_attn": false,
22
  "resid_pdrop": 0.1,
28
  "summary_type": "cls_index",
29
  "summary_use_proj": true,
30
  "torch_dtype": "float32",
31
+ "transformers_version": "4.21.1",
32
  "use_cache": true,
33
  "vocab_size": 50258
34
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:98ae88bfbed22ba3fb913bec46ae4a206825bf67b240af7057f88315a3090b90
3
- size 551296803
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5207dc71dfa8d825fbab940864930b2c9b7e0e43b28a0cd124522a9185413303
3
+ size 3141986147
tokenizer_config.json CHANGED
@@ -18,7 +18,7 @@
18
  "single_word": false
19
  },
20
  "errors": "replace",
21
- "name_or_path": "sberbank-ai/rugpt3small_based_on_gpt2",
22
  "pad_token": null,
23
  "special_tokens_map_file": null,
24
  "tokenizer_class": "GPT2Tokenizer",
18
  "single_word": false
19
  },
20
  "errors": "replace",
21
+ "name_or_path": "sberbank-ai/rugpt3large_based_on_gpt2",
22
  "pad_token": null,
23
  "special_tokens_map_file": null,
24
  "tokenizer_class": "GPT2Tokenizer",