m3hrdadfi commited on
Commit
d093bd9
1 Parent(s): dcd620d

Initialized

Browse files
README.md ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language: fa
3
+ license: apache-2.0
4
+ ---
5
+
6
+ # DistilBERT
7
+
8
+ This model can tackle the zero-width non-joiner character for Persian writing. Also, the model was trained on new multi-types corpora with a new set of vocabulary.
9
+
10
+
11
+ ## Questions?
12
+ Post a Github issue on the [ParsBERT Issues](https://github.com/hooshvare/parsbert/issues) repo.
config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation": "gelu",
3
+ "architectures": [
4
+ "DistilBertForMaskedLM"
5
+ ],
6
+ "attention_dropout": 0.1,
7
+ "dim": 768,
8
+ "dropout": 0.1,
9
+ "hidden_dim": 3072,
10
+ "initializer_range": 0.02,
11
+ "max_position_embeddings": 512,
12
+ "model_type": "distilbert",
13
+ "n_heads": 12,
14
+ "n_layers": 6,
15
+ "output_past": true,
16
+ "pad_token_id": 0,
17
+ "qa_dropout": 0.1,
18
+ "seq_classif_dropout": 0.2,
19
+ "sinusoidal_pos_embds": false,
20
+ "tie_weights_": true,
21
+ "transformers_version": "4.2.2",
22
+ "vocab_size": 42000
23
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6fb06d71fdc2d51b9d43e80dd0255e85505f88d0adea623e1559706161284b40
3
+ size 303291090
special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "unk_token": "[UNK]",
3
+ "sep_token": "[SEP]",
4
+ "pad_token": "[PAD]",
5
+ "cls_token": "[CLS]",
6
+ "mask_token": "[MASK]"
7
+ }
tf_model.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4dff42295d516084101a09f1c730bce340a945660de493dbdfc9a4357f03d498
3
+ size 433990736
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_lower_case": false,
3
+ "unk_token": "[UNK]",
4
+ "sep_token": "[SEP]",
5
+ "pad_token": "[PAD]",
6
+ "cls_token": "[CLS]",
7
+ "mask_token": "[MASK]",
8
+ "tokenize_chinese_chars": true,
9
+ "strip_accents": false,
10
+ "special_tokens_map_file": null
11
+ }
vocab.txt ADDED
The diff for this file is too large to render. See raw diff