d.tsimerman commited on
Commit
d4a38f8
1 Parent(s): 5aed84c
README.md CHANGED
@@ -1,3 +1,43 @@
1
  ---
2
  license: mit
 
 
 
 
 
 
 
 
 
 
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  license: mit
3
+ pipeline_tag: text-generation
4
+ widget:
5
+ - text: "@@ПЕРВЫЙ@@ привет @@ВТОРОЙ@@ привет @@ПЕРВЫЙ@@ как дела? @@ВТОРОЙ@@"
6
+ example_title: "how r u"
7
+ - text: "@@ПЕРВЫЙ@@ что ты делал на выходных? @@ВТОРОЙ@@"
8
+ example_title: "wyd"
9
+ language:
10
+ - ru
11
+ tags:
12
+ - conversational
13
  ---
14
+ This generation model is based on [sberbank-ai/rugpt3small_based_on_gpt2](https://huggingface.co/sberbank-ai/rugpt3small_based_on_gpt2). It's trained on large corpus of dialog data and can be used for buildning generative conversational agents
15
+
16
+ The model was trained with context size 3
17
+
18
+
19
+ On a validation set we calculated metrics introduced in [this paper](https://arxiv.org/pdf/2001.09977.pdf):
20
+ - Sensibleness: Operators are asked whether model's response makes sense given the context
21
+ - Specificity: Operators are asked whether model's response is specific for given context, in other words we don't want our model to give general and boring responses
22
+ - SSA which is the average of two metrics above (Sensibleness Specificity Average)
23
+
24
+ | | sensibleness | specificity | SSA |
25
+ |:----------------------------------------------------|---------------:|--------------:|------:|
26
+ | [tinkoff-ai/ruDialoGPT-small](https://huggingface.co/tinkoff-ai/ruDialoGPT-small) | 0.64 | 0.5 | 0.57 |
27
+ | [tinkoff-ai/ruDialoGPT-medium](https://huggingface.co/tinkoff-ai/ruDialoGPT-medium) | 0.78 | 0.69 | 0.735 |
28
+
29
+
30
+ How to use:
31
+
32
+ ```python
33
+ import torch
34
+ from transformers import AutoTokenizer, AutoModelWithLMHead
35
+
36
+ tokenizer = AutoTokenizer.from_pretrained('tinkoff-ai/ruDialoGPT-small')
37
+ model = AutoModelWithLMHead.from_pretrained('tinkoff-ai/ruDialoGPT-small')
38
+ inputs = tokenizer('@@ПЕРВЫЙ@@ привет @@ВТОРОЙ@@ привет @@ПЕРВЫЙ@@ как дела? @@ВТОРОЙ@@', return_tensors='pt')
39
+ with torch.inference_mode():
40
+ generated_token_ids = model.generate(**inputs)
41
+ context_with_response = tokenizer.decode(generated_token_ids[0])
42
+ context_with_response
43
+ ```
added_tokens.json ADDED
@@ -0,0 +1 @@
 
1
+ {"@@ПЕРВЫЙ@@": 50257, "@@ВТОРОЙ@@": 50258, "<FIRST_SPEAKER>": 50259, "<SECOND_SPEAKER>": 50260}
all_results.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "eval_loss": 3.6833789348602295,
4
+ "eval_runtime": 180.5086,
5
+ "eval_samples": 40311,
6
+ "eval_samples_per_second": 223.319,
7
+ "eval_steps_per_second": 18.614,
8
+ "perplexity": 39.780583236136096,
9
+ "train_loss": 3.773873895684371,
10
+ "train_runtime": 142283.9744,
11
+ "train_samples": 8570756,
12
+ "train_samples_per_second": 60.237,
13
+ "train_steps_per_second": 5.02
14
+ }
config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation_function": "gelu_new",
3
+ "attn_pdrop": 0.1,
4
+ "bos_token_id": 50256,
5
+ "embd_pdrop": 0.1,
6
+ "eos_token_id": 50256,
7
+ "gradient_checkpointing": false,
8
+ "initializer_range": 0.02,
9
+ "layer_norm_epsilon": 1e-05,
10
+ "model_type": "gpt2",
11
+ "n_ctx": 2048,
12
+ "n_embd": 768,
13
+ "n_head": 12,
14
+ "n_inner": null,
15
+ "n_layer": 12,
16
+ "n_positions": 2048,
17
+ "reorder_and_upcast_attn": false,
18
+ "resid_pdrop": 0.1,
19
+ "scale_attn_by_inverse_layer_idx": false,
20
+ "scale_attn_weights": true,
21
+ "summary_activation": null,
22
+ "summary_first_dropout": 0.1,
23
+ "summary_proj_to_labels": true,
24
+ "summary_type": "cls_index",
25
+ "summary_use_proj": true,
26
+ "torch_dtype": "float32",
27
+ "transformers_version": "4.15.0",
28
+ "use_cache": true,
29
+ "vocab_size": 50261
30
+ }
eval_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "eval_loss": 3.6833789348602295,
4
+ "eval_runtime": 180.5086,
5
+ "eval_samples": 40311,
6
+ "eval_samples_per_second": 223.319,
7
+ "eval_steps_per_second": 18.614,
8
+ "perplexity": 39.780583236136096
9
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf9455a4326b0038ec5f80baca7f49498f863459a1e816de8ce9096edd2efb9c
3
+ size 551310569
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
1
+ {"bos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "eos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "unk_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "pad_token": "<pad>"}
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
1
+ {"errors": "replace", "unk_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "bos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "eos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "add_prefix_space": false, "special_tokens_map_file": null, "use_fast": true, "tokenizer_class": "GPT2Tokenizer"}
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "train_loss": 3.773873895684371,
4
+ "train_runtime": 142283.9744,
5
+ "train_samples": 8570756,
6
+ "train_samples_per_second": 60.237,
7
+ "train_steps_per_second": 5.02
8
+ }
trainer_state.json ADDED
The diff for this file is too large to render. See raw diff
vocab.json ADDED
The diff for this file is too large to render. See raw diff