avichr commited on
Commit
18a8d6e
1 Parent(s): 9918a2c

chars, wiki, comments v0.1

Browse files
Files changed (5) hide show
  1. README.md +5 -0
  2. config.json +11 -1
  3. pytorch_model.bin +2 -2
  4. training_args.bin +1 -1
  5. vocab.txt +0 -0
README.md CHANGED
@@ -2,6 +2,11 @@
2
  We will release a better one soon:)
3
 
4
  <br><br>
 
 
 
 
 
5
 
6
  ## HeBERT: Pre-trained BERT for Polarity Analysis and Emotion Recognition
7
  HeBERT is a Hebrew pretrained language model. It is based on Google's BERT architecture and it is BERT-Base config [(Devlin et al. 2018)](https://arxiv.org/abs/1810.04805). <br>
 
2
  We will release a better one soon:)
3
 
4
  <br><br>
5
+ ## train details:
6
+ **token**: char based (1000K vocab) <br>
7
+ **lm dataset**: wiki <br>
8
+ **sentiment dataset**: comments on news
9
+
10
 
11
  ## HeBERT: Pre-trained BERT for Polarity Analysis and Emotion Recognition
12
  HeBERT is a Hebrew pretrained language model. It is based on Google's BERT architecture and it is BERT-Base config [(Devlin et al. 2018)](https://arxiv.org/abs/1810.04805). <br>
config.json CHANGED
@@ -7,15 +7,25 @@
7
  "hidden_act": "gelu",
8
  "hidden_dropout_prob": 0.1,
9
  "hidden_size": 768,
 
 
 
 
 
10
  "initializer_range": 0.02,
11
  "intermediate_size": 3072,
 
 
 
 
 
12
  "layer_norm_eps": 1e-12,
13
  "max_position_embeddings": 514,
14
  "model_type": "bert",
15
  "num_attention_heads": 12,
16
  "num_hidden_layers": 6,
17
  "pad_token_id": 0,
18
- "total_flos": 3118796722622472192,
19
  "type_vocab_size": 1,
20
  "vocab_size": 52000
21
  }
 
7
  "hidden_act": "gelu",
8
  "hidden_dropout_prob": 0.1,
9
  "hidden_size": 768,
10
+ "id2label": {
11
+ "0": "natural",
12
+ "1": "possitive",
13
+ "2": "negetive"
14
+ },
15
  "initializer_range": 0.02,
16
  "intermediate_size": 3072,
17
+ "label2id": {
18
+ "natural": 0,
19
+ "negetive": 2,
20
+ "possitive": 1
21
+ },
22
  "layer_norm_eps": 1e-12,
23
  "max_position_embeddings": 514,
24
  "model_type": "bert",
25
  "num_attention_heads": 12,
26
  "num_hidden_layers": 6,
27
  "pad_token_id": 0,
28
+ "total_flos": 451483328659341312,
29
  "type_vocab_size": 1,
30
  "vocab_size": 52000
31
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f8c18b6c74323cc1d85483e93c48b66f6ad0a456bfc427e0ce74a1e667fe2825
3
- size 333858111
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f325bb655098baf107564cb827f5c8f96c69e0af542e04883fd3625dc5457fc
3
+ size 333861183
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7f50e8cde1db05799ace5177491a0b663684c58e0226b05be15789d636ce053e
3
  size 1839
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab1d77889f7695c8eb2f2631c8e21f7f2461cb9d9df43a8f982bb3674bfa2fee
3
  size 1839
vocab.txt CHANGED
The diff for this file is too large to render. See raw diff