dmitry-vorobiev commited on
Commit
a993fba
1 Parent(s): 5c346e9

upd weights: 99% ria, title=36

Browse files
Files changed (4) hide show
  1. README.md +5 -6
  2. config.json +5 -3
  3. pytorch_model.bin +1 -1
  4. tokenizer_config.json +1 -1
README.md CHANGED
@@ -12,7 +12,7 @@ license: MIT
12
 
13
  ## Description
14
  *bert2bert* model, initialized with the `DeepPavlov/rubert-base-cased` pretrained weights and
15
- fine-tuned on the first 90% of ["Rossiya Segodnya" news dataset](https://github.com/RossiyaSegodnya/ria_news_dataset) for 3 epochs.
16
 
17
  ## Usage example
18
 
@@ -35,7 +35,7 @@ encoded_batch = tokenizer.prepare_seq2seq_batch(
35
 
36
  output_ids = model.generate(
37
  input_ids=encoded_batch["input_ids"],
38
- max_length=32,
39
  no_repeat_ngram_size=3,
40
  num_beams=5,
41
  top_k=0
@@ -80,7 +80,6 @@ python nlp_headline_rus/src/train_seq2seq.py \
80
 
81
  ## Validation results
82
 
83
- - Using [last 1% of ria](https://drive.google.com/drive/folders/1xtCnkbGNNu5jGQ9H9Mg55Cx7RTcyhQw9) dataset
84
- - Using [last 10% of ria](https://drive.google.com/drive/folders/1w6rAXhpFUO8I4A7xfHKUjMBPEKBHEO3h) dataset
85
- - Using [gazeta_ru test](https://drive.google.com/drive/folders/185ALuNVbbT_C1ZHQYn1OlOc9vRVILvHs) split
86
- - Using [gazeta_ru val](https://drive.google.com/drive/folders/1BLiL3H0n56e8Q9jSuDgaH_3LLpmKxuVG) split
 
12
 
13
  ## Description
14
  *bert2bert* model, initialized with the `DeepPavlov/rubert-base-cased` pretrained weights and
15
+ fine-tuned on the first 99% of ["Rossiya Segodnya" news dataset](https://github.com/RossiyaSegodnya/ria_news_dataset) for 2 epochs.
16
 
17
  ## Usage example
18
 
 
35
 
36
  output_ids = model.generate(
37
  input_ids=encoded_batch["input_ids"],
38
+ max_length=36,
39
  no_repeat_ngram_size=3,
40
  num_beams=5,
41
  top_k=0
 
80
 
81
  ## Validation results
82
 
83
+ - Using [last 1% of ria](https://drive.google.com/drive/folders/1ztAeyb1BiLMgXwOgOJS7WMR4PGiI1q92) dataset
84
+ - Using [gazeta_ru test](https://drive.google.com/drive/folders/1CyowuRpecsLTcDbqEfmAvkCWOod58g_e) split
85
+ - Using [gazeta_ru val](https://drive.google.com/drive/folders/1XZFOXHSXLKdhzm61ceVLw3aautrdskIu) split
 
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "/kaggle/input/bert2bertweights-192/ep_2",
3
  "architectures": [
4
  "EncoderDecoderModel"
5
  ],
@@ -19,6 +19,7 @@
19
  "diversity_penalty": 0.0,
20
  "do_sample": false,
21
  "early_stopping": false,
 
22
  "eos_token_id": null,
23
  "finetuning_task": null,
24
  "gradient_checkpointing": false,
@@ -74,7 +75,7 @@
74
  "top_k": 50,
75
  "top_p": 1.0,
76
  "torchscript": false,
77
- "transformers_version": "4.2.2",
78
  "type_vocab_size": 2,
79
  "use_bfloat16": false,
80
  "use_cache": true,
@@ -98,6 +99,7 @@
98
  "diversity_penalty": 0.0,
99
  "do_sample": false,
100
  "early_stopping": false,
 
101
  "eos_token_id": null,
102
  "finetuning_task": null,
103
  "gradient_checkpointing": false,
@@ -153,7 +155,7 @@
153
  "top_k": 50,
154
  "top_p": 1.0,
155
  "torchscript": false,
156
- "transformers_version": "4.2.2",
157
  "type_vocab_size": 2,
158
  "use_bfloat16": false,
159
  "use_cache": true,
 
1
  {
2
+ "_name_or_path": "/kaggle/input/bert2bert-wexp/ep_1_v9",
3
  "architectures": [
4
  "EncoderDecoderModel"
5
  ],
 
19
  "diversity_penalty": 0.0,
20
  "do_sample": false,
21
  "early_stopping": false,
22
+ "encoder_no_repeat_ngram_size": 0,
23
  "eos_token_id": null,
24
  "finetuning_task": null,
25
  "gradient_checkpointing": false,
 
75
  "top_k": 50,
76
  "top_p": 1.0,
77
  "torchscript": false,
78
+ "transformers_version": "4.3.2",
79
  "type_vocab_size": 2,
80
  "use_bfloat16": false,
81
  "use_cache": true,
 
99
  "diversity_penalty": 0.0,
100
  "do_sample": false,
101
  "early_stopping": false,
102
+ "encoder_no_repeat_ngram_size": 0,
103
  "eos_token_id": null,
104
  "finetuning_task": null,
105
  "gradient_checkpointing": false,
 
155
  "top_k": 50,
156
  "top_p": 1.0,
157
  "torchscript": false,
158
+ "transformers_version": "4.3.2",
159
  "type_vocab_size": 2,
160
  "use_bfloat16": false,
161
  "use_cache": true,
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e1b2e1c4f765d2c3801c5dcc484ae464b3c0c72d50e9e0aa2589a1db06de504d
3
  size 827914439
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71f249206ee2da240fc75f3b8d228ceee50861ff493ac0b6437e2509ad2754e0
3
  size 827914439
tokenizer_config.json CHANGED
@@ -1 +1 @@
1
- {"do_lower_case": true, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "special_tokens_map_file": "/kaggle/input/deeppavlov-rubertbasecased/special_tokens_map.json", "name_or_path": "/kaggle/input/bert2bertweights-192/ep_2", "do_basic_tokenize": true, "never_split": null}
 
1
+ {"do_lower_case": true, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "special_tokens_map_file": "/kaggle/input/deeppavlov-rubertbasecased/special_tokens_map.json", "name_or_path": "/kaggle/input/bert2bert-wexp/ep_1_v9", "do_basic_tokenize": true, "never_split": null}