m3hrdadfi commited on
Commit
938712f
1 Parent(s): 4a8185d

Initialized

Browse files
README.md ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language: fa
3
+ license: apache-2.0
4
+ ---
5
+
6
+ # ALBERT-Persian
7
+
8
+ A Lite BERT for Self-supervised Learning of Language Representations for the Persian Language
9
+
10
+ > میتونی بهش بگی برت_کوچولو
11
+
12
+ > Call it little_berty
13
+
14
+
15
+ ### BibTeX entry and citation info
16
+
17
+ Please cite in your publication as the following:
18
+
19
+ ```bibtex
20
+ @misc{ALBERTPersian,
21
+ author = {Mehrdad Farahani},
22
+ title = {ALBERT-Persian: A Lite BERT for Self-supervised Learning of Language Representations for the Persian Language},
23
+ year = {2021},
24
+ publisher = {GitHub},
25
+ journal = {GitHub repository},
26
+ howpublished = {\url{https://github.com/m3hrdadfi/albert-persian}},
27
+ }
28
+ ```
29
+
30
+
31
+ ## Questions?
32
+ Post a Github issue on the [ALBERT-Persian](https://github.com/m3hrdadfi/albert-persian) repo.
config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "AlbertForMaskedLM"
4
+ ],
5
+ "attention_probs_dropout_prob": 0,
6
+ "bos_token_id": 2,
7
+ "classifier_dropout_prob": 0.1,
8
+ "down_scale_factor": 1,
9
+ "embedding_size": 128,
10
+ "eos_token_id": 3,
11
+ "gap_size": 0,
12
+ "hidden_act": "gelu_new",
13
+ "hidden_dropout_prob": 0,
14
+ "hidden_size": 768,
15
+ "initializer_range": 0.02,
16
+ "inner_group_num": 1,
17
+ "intermediate_size": 3072,
18
+ "layer_norm_eps": 1e-12,
19
+ "max_position_embeddings": 512,
20
+ "model_type": "albert",
21
+ "net_structure_type": 0,
22
+ "num_attention_heads": 12,
23
+ "num_hidden_groups": 1,
24
+ "num_hidden_layers": 12,
25
+ "num_memory_blocks": 0,
26
+ "pad_token_id": 0,
27
+ "position_embedding_type": "absolute",
28
+ "transformers_version": "4.2.2",
29
+ "type_vocab_size": 2,
30
+ "vocab_size": 30000
31
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d68f3f623302e00bea37f6cdc4b6c01cb28fbd936367319c2a6ad2a6dd7a6f5
3
+ size 44903266
special_tokens_map.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "eos_token": "[SEP]",
4
+ "unk_token": "<unk>",
5
+ "sep_token": "[SEP]",
6
+ "pad_token": "<pad>",
7
+ "cls_token": "[CLS]",
8
+ "mask_token": "[MASK]"
9
+ }
spiece.model ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:903319b1a4a7e58e49383764d33897a7f49784510247d68438e4f3bff25b01f1
3
+ size 857476
spiece.vocab ADDED
The diff for this file is too large to render. See raw diff
tf_model.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6ec5b99be35daa07eeb205f5cfb6589706a3a11f8c2050d954b435cc74a4cd8
3
+ size 60415224
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
tokenizer_config.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_lower_case": false,
3
+ "remove_space": true,
4
+ "keep_accents": false,
5
+ "bos_token": "[CLS]",
6
+ "eos_token": "[SEP]",
7
+ "unk_token": "<unk>",
8
+ "sep_token": "[SEP]",
9
+ "pad_token": "<pad>",
10
+ "cls_token": "[CLS]",
11
+ "mask_token": "[MASK]",
12
+ "special_tokens_map_file": null
13
+ }
unigram.json ADDED
The diff for this file is too large to render. See raw diff