MonoHime commited on
Commit
4d12352
1 Parent(s): 6a37d1c

Add original model

Browse files
classes.dict ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ 1 90717
2
+ 2 49736
3
+ 0 49438
config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "../huggingface/rubert-base-cased-sentiment",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "directionality": "bidi",
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "NEUTRAL",
14
+ "1": "POSITIVE",
15
+ "2": "NEGATIVE"
16
+ },
17
+ "initializer_range": 0.02,
18
+ "intermediate_size": 3072,
19
+ "label2id": {
20
+ "NEGATIVE": 2,
21
+ "NEUTRAL": 0,
22
+ "POSITIVE": 1
23
+ },
24
+ "layer_norm_eps": 1e-12,
25
+ "max_position_embeddings": 512,
26
+ "model_type": "bert",
27
+ "num_attention_heads": 12,
28
+ "num_hidden_layers": 12,
29
+ "output_past": true,
30
+ "pad_token_id": 0,
31
+ "pooler_fc_size": 768,
32
+ "pooler_num_attention_heads": 12,
33
+ "pooler_num_fc_layers": 3,
34
+ "pooler_size_per_head": 128,
35
+ "pooler_type": "first_token_transform",
36
+ "return_dict": true,
37
+ "type_vocab_size": 2,
38
+ "vocab_size": 119547
39
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17c5e6f11e5672c158b12ed629edb94a2d5adfb0c0eacf55c21d250c7381dac1
3
+ size 711509513
rubert_sentiment.json ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_reader": {
3
+ "class_name": "basic_classification_reader",
4
+ "x": "text",
5
+ "y": "sentiment",
6
+ "data_path": "/content/drive/MyDrive/BERT/train/",
7
+ "train": "train.csv",
8
+ "valid": "valid.csv"
9
+ },
10
+ "dataset_iterator": {
11
+ "class_name": "basic_classification_iterator",
12
+ "seed": 42
13
+ },
14
+ "chainer": {
15
+ "in": [
16
+ "x"
17
+ ],
18
+ "in_y": [
19
+ "y"
20
+ ],
21
+ "pipe": [
22
+ {
23
+ "id": "classes_vocab",
24
+ "class_name": "simple_vocab",
25
+ "fit_on": [
26
+ "y"
27
+ ],
28
+ "save_path": "/content/drive/MyDrive/BERT/sentiment_bert_model/classes.dict",
29
+ "load_path": "/content/drive/MyDrive/BERT/sentiment_bert_model/classes.dict",
30
+ "in": "y",
31
+ "out": "y_ids"
32
+ },
33
+ {
34
+ "class_name": "torch_transformers_preprocessor",
35
+ "vocab_file": "/content/drive/MyDrive/BERT/rubert-base-cased-sentiment/",
36
+ "do_lower_case": true,
37
+ "max_seq_length": 512,
38
+ "in": [
39
+ "x"
40
+ ],
41
+ "out": [
42
+ "bert_features"
43
+ ]
44
+ },
45
+ {
46
+ "in": "y_ids",
47
+ "out": "y_onehot",
48
+ "class_name": "one_hotter",
49
+ "depth": "#classes_vocab.len",
50
+ "single_vector": true
51
+ },
52
+ {
53
+ "class_name": "torch_transformers_classifier",
54
+ "n_classes": 3,
55
+ "return_probas": true,
56
+ "pretrained_bert": "/content/drive/MyDrive/BERT/rubert-base-cased-sentiment/",
57
+ "save_path": "/content/drive/MyDrive/BERT/sentiment_bert_model/model",
58
+ "load_path": "/content/drive/MyDrive/BERT/sentiment_bert_model/model",
59
+ "optimizer": "AdamW",
60
+ "optimizer_parameters": {
61
+ "lr": 1e-05
62
+ },
63
+ "learning_rate_drop_patience": 5,
64
+ "learning_rate_drop_div": 2.0,
65
+ "in": [
66
+ "bert_features"
67
+ ],
68
+ "in_y": [
69
+ "y_ids"
70
+ ],
71
+ "out": [
72
+ "y_pred_probas"
73
+ ]
74
+ },
75
+ {
76
+ "in": "y_pred_probas",
77
+ "out": "y_pred_ids",
78
+ "class_name": "proba2labels",
79
+ "max_proba": true
80
+ },
81
+ {
82
+ "in": "y_pred_ids",
83
+ "out": "y_pred_labels",
84
+ "ref": "classes_vocab"
85
+ }
86
+ ],
87
+ "out": [
88
+ "y_pred_labels"
89
+ ]
90
+ },
91
+ "train": {
92
+ "epochs": 5,
93
+ "batch_size": 8,
94
+ "metrics": [
95
+ "accuracy",
96
+ "f1_macro",
97
+ "f1_weighted",
98
+ {
99
+ "name": "roc_auc",
100
+ "inputs": [
101
+ "y_onehot",
102
+ "y_pred_probas"
103
+ ]
104
+ }
105
+ ],
106
+ "validation_patience": 2,
107
+ "val_every_n_epochs": 1,
108
+ "log_every_n_epochs": 1,
109
+ "show_examples": false,
110
+ "evaluation_targets": [
111
+ "train",
112
+ "valid"
113
+ ],
114
+ "class_name": "nn_trainer"
115
+ }
116
+ }
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
1
+ {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
1
+ {"do_lower_case": false, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "special_tokens_map_file": "/home/igor/.cache/torch/transformers/1f428acdde727eed5de979d6856ce350a470be2a64e134a1fdae04af78a27301.dd8bd9bfd3664b530ea4e645105f557769387b3da9f79bdb55ed556bdd80611d", "name_or_path": "DeepPavlov/rubert-base-cased-conversational", "do_basic_tokenize": true, "never_split": null}
vocab.txt ADDED
The diff for this file is too large to render. See raw diff