squash

Browse files

Files changed (12) hide show

README.md +40 -0
added_tokens.json +1 -0
all_results.json +14 -0
config.json +30 -0
eval_results.json +9 -0
merges.txt +0 -0
pytorch_model.bin +3 -0
special_tokens_map.json +1 -0
tokenizer_config.json +1 -0
train_results.json +8 -0
trainer_state.json +0 -0
vocab.json +0 -0

README.md CHANGED Viewed

@@ -1,3 +1,43 @@
 ---
 license: mit
 ---

 ---
 license: mit
+pipeline_tag: text-generation
+widget:
+- text: "@@ПЕРВЫЙ@@ привет @@ВТОРОЙ@@ привет @@ПЕРВЫЙ@@ как дела? @@ВТОРОЙ@@"
+  example_title: "how r u"
+- text: "@@ПЕРВЫЙ@@ что ты делал на выходных? @@ВТОРОЙ@@"
+  example_title: "wyd"
+language:
+- ru
+tags:
+- conversational
 ---
+This generation model is based on [sberbank-ai/rugpt3small_based_on_gpt2](https://huggingface.co/sberbank-ai/rugpt3small_based_on_gpt2). It's trained on large corpus of dialog data and can be used for buildning generative conversational agents
+The model was trained with context size 3
+On a validation set we calculated metrics introduced in [this paper](https://arxiv.org/pdf/2001.09977.pdf):
+- Sensibleness: Operators are asked whether model's response makes sense given the context
+- Specificity: Operators are asked whether model's response is specific for given context, in other words we don't want our model to give general and boring responses
+- SSA which is the average of two metrics above (Sensibleness Specificity Average)
+|                                                     |   sensibleness |   specificity |   SSA |
+|:----------------------------------------------------|---------------:|--------------:|------:|
+| [tinkoff-ai/ruDialoGPT-small](https://huggingface.co/tinkoff-ai/ruDialoGPT-small)  |           0.64 |          0.5  | 0.57  |
+| [tinkoff-ai/ruDialoGPT-medium](https://huggingface.co/tinkoff-ai/ruDialoGPT-medium) |           0.78 |          0.69 | 0.735 |
+How to use:
+```python
+import torch
+from transformers import AutoTokenizer, AutoModelWithLMHead
+tokenizer = AutoTokenizer.from_pretrained('tinkoff-ai/ruDialoGPT-small')
+model = AutoModelWithLMHead.from_pretrained('tinkoff-ai/ruDialoGPT-small')
+inputs = tokenizer('@@ПЕРВЫЙ@@ привет @@ВТОРОЙ@@ привет @@ПЕРВЫЙ@@ как дела? @@ВТОРОЙ@@', return_tensors='pt')
+with torch.inference_mode():
+    generated_token_ids = model.generate(**inputs)
+    context_with_response = tokenizer.decode(generated_token_ids[0])
+context_with_response
+```

added_tokens.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"@@ПЕРВЫЙ@@": 50257, "@@ВТОРОЙ@@": 50258, "<FIRST_SPEAKER>": 50259, "<SECOND_SPEAKER>": 50260}

all_results.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+    "epoch": 1.0,
+    "eval_loss": 3.6833789348602295,
+    "eval_runtime": 180.5086,
+    "eval_samples": 40311,
+    "eval_samples_per_second": 223.319,
+    "eval_steps_per_second": 18.614,
+    "perplexity": 39.780583236136096,
+    "train_loss": 3.773873895684371,
+    "train_runtime": 142283.9744,
+    "train_samples": 8570756,
+    "train_samples_per_second": 60.237,
+    "train_steps_per_second": 5.02
+}

config.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "activation_function": "gelu_new",
+  "attn_pdrop": 0.1,
+  "bos_token_id": 50256,
+  "embd_pdrop": 0.1,
+  "eos_token_id": 50256,
+  "gradient_checkpointing": false,
+  "initializer_range": 0.02,
+  "layer_norm_epsilon": 1e-05,
+  "model_type": "gpt2",
+  "n_ctx": 2048,
+  "n_embd": 768,
+  "n_head": 12,
+  "n_inner": null,
+  "n_layer": 12,
+  "n_positions": 2048,
+  "reorder_and_upcast_attn": false,
+  "resid_pdrop": 0.1,
+  "scale_attn_by_inverse_layer_idx": false,
+  "scale_attn_weights": true,
+  "summary_activation": null,
+  "summary_first_dropout": 0.1,
+  "summary_proj_to_labels": true,
+  "summary_type": "cls_index",
+  "summary_use_proj": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.15.0",
+  "use_cache": true,
+  "vocab_size": 50261
+}

eval_results.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+    "epoch": 1.0,
+    "eval_loss": 3.6833789348602295,
+    "eval_runtime": 180.5086,
+    "eval_samples": 40311,
+    "eval_samples_per_second": 223.319,
+    "eval_steps_per_second": 18.614,
+    "perplexity": 39.780583236136096
+}

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cf9455a4326b0038ec5f80baca7f49498f863459a1e816de8ce9096edd2efb9c
+size 551310569

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"bos_token": {"content": "<\|endoftext\|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "eos_token": {"content": "<\|endoftext\|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "unk_token": {"content": "<\|endoftext\|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "pad_token": "<pad>"}

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"errors": "replace", "unk_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "bos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "eos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "add_prefix_space": false, "special_tokens_map_file": null, "use_fast": true, "tokenizer_class": "GPT2Tokenizer"}

train_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 1.0,
+    "train_loss": 3.773873895684371,
+    "train_runtime": 142283.9744,
+    "train_samples": 8570756,
+    "train_samples_per_second": 60.237,
+    "train_steps_per_second": 5.02
+}

trainer_state.json ADDED Viewed

The diff for this file is too large to render. See raw diff

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff