m3hrdadfi commited on
Commit
952943f
1 Parent(s): 90f9631

Hello, persian gpt2

Browse files
README.md ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language: fa
3
+ license: apache-2.0
4
+ widget:
5
+ - text: "در یک اتفاق شگفت انگیز، پژوهشگران"
6
+ - text: "گرفتگی بینی در کودکان و به‌خصوص نوزادان باعث می‌شود"
7
+ - text: "امیدواریم نوروز امسال سالی"
8
+ ---
9
+
10
+ # ParsGPT2
11
+
12
+
13
+ ### BibTeX entry and citation info
14
+
15
+ Please cite in publications as the following:
16
+
17
+ ```bibtex
18
+ @misc{ParsGPT2,
19
+ author = {Hooshvare Team},
20
+ title = {ParsGPT2 the Persian version of GPT2},
21
+ year = {2021},
22
+ publisher = {GitHub},
23
+ journal = {GitHub repository},
24
+ howpublished = {\url{https://github.com/hooshvare/parsgpt}},
25
+ }
26
+ ```
27
+
28
+ ## Questions?
29
+ Post a Github issue on the [ParsGPT2 Issues](https://github.com/hooshvare/parsgpt/issues) repo.
added_tokens.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"[N]": 42000}
config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation_function": "gelu_new",
3
+ "architectures": [
4
+ "GPT2LMHeadModel"
5
+ ],
6
+ "attn_pdrop": 0.1,
7
+ "bos_token_id": 5,
8
+ "embd_pdrop": 0.1,
9
+ "eos_token_id": 5,
10
+ "gradient_checkpointing": false,
11
+ "initializer_range": 0.02,
12
+ "layer_norm_epsilon": 1e-05,
13
+ "model_type": "gpt2",
14
+ "n_ctx": 1024,
15
+ "n_embd": 768,
16
+ "n_head": 12,
17
+ "n_inner": null,
18
+ "n_layer": 12,
19
+ "n_positions": 1024,
20
+ "resid_pdrop": 0.1,
21
+ "summary_activation": null,
22
+ "summary_first_dropout": 0.1,
23
+ "summary_proj_to_labels": true,
24
+ "summary_type": "cls_index",
25
+ "summary_use_proj": true,
26
+ "task_specific_params": {
27
+ "text-generation": {
28
+ "do_sample": true,
29
+ "max_length": 50,
30
+ "top_k": 50,
31
+ "top_p": 0.95
32
+ }
33
+ },
34
+ "transformers_version": "4.2.1",
35
+ "use_cache": true,
36
+ "vocab_size": 42001
37
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46b0b806c740a0f0a9f056f5574c5fa896166fe844945fd3c849bf34365e5060
3
+ size 485044198
special_tokens_map.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<|endoftext|>",
3
+ "eos_token": "<|endoftext|>",
4
+ "unk_token": "<|endoftext|>"
5
+ }
tf_model.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b8af318c7dcde1cd1b86aad51b8a5b47c7b6b35b6db40c18378577a6bfe33bf
3
+ size 472571216
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "unk_token": {
3
+ "content": "<|endoftext|>",
4
+ "single_word": false,
5
+ "lstrip": false,
6
+ "rstrip": false,
7
+ "normalized": true,
8
+ "__type": "AddedToken"
9
+ },
10
+ "bos_token": {
11
+ "content": "<|endoftext|>",
12
+ "single_word": false,
13
+ "lstrip": false,
14
+ "rstrip": false,
15
+ "normalized": true,
16
+ "__type": "AddedToken"
17
+ },
18
+ "eos_token": {
19
+ "content": "<|endoftext|>",
20
+ "single_word": false,
21
+ "lstrip": false,
22
+ "rstrip": false,
23
+ "normalized": true,
24
+ "__type": "AddedToken"
25
+ },
26
+ "add_prefix_space": false,
27
+ "special_tokens_map_file": null,
28
+ "errors": "replace"
29
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff