Doron Adler commited on
Commit
db16f0e
1 Parent(s): d0e1863

Added onnx model

Browse files
Files changed (8) hide show
  1. .gitattributes +2 -0
  2. added_tokens.json +5 -1
  3. config.json +3 -2
  4. merges.txt +1 -1
  5. model.onnx +3 -0
  6. tokenizer.json +3 -0
  7. tokenizer_config.json +36 -1
  8. vocab.json +0 -0
.gitattributes CHANGED
@@ -14,3 +14,5 @@
14
  *.pb filter=lfs diff=lfs merge=lfs -text
15
  *.pt filter=lfs diff=lfs merge=lfs -text
16
  *.pth filter=lfs diff=lfs merge=lfs -text
 
 
14
  *.pb filter=lfs diff=lfs merge=lfs -text
15
  *.pt filter=lfs diff=lfs merge=lfs -text
16
  *.pth filter=lfs diff=lfs merge=lfs -text
17
+ model.onnx filter=lfs diff=lfs merge=lfs -text
18
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
added_tokens.json CHANGED
@@ -1 +1,5 @@
1
- {"<|unknown|>": 50259, "<|startoftext|>": 50258, "<|endoftext|>": 50257}
 
 
 
 
1
+ {
2
+ "<|endoftext|>": 50257,
3
+ "<|startoftext|>": 50258,
4
+ "<|unknown|>": 50259
5
+ }
config.json CHANGED
@@ -1,4 +1,5 @@
1
  {
 
2
  "activation_function": "gelu_new",
3
  "architectures": [
4
  "GPTNeoForCausalLM"
@@ -46,8 +47,8 @@
46
  "summary_type": "cls_index",
47
  "summary_use_proj": true,
48
  "torch_dtype": "float32",
49
- "transformers_version": "4.9.0.dev0",
50
- "use_cache": true,
51
  "vocab_size": 50257,
52
  "window_size": 256
53
  }
1
  {
2
+ "_name_or_path": "Norod78/hebrew-gpt_neo-small",
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
  "GPTNeoForCausalLM"
47
  "summary_type": "cls_index",
48
  "summary_use_proj": true,
49
  "torch_dtype": "float32",
50
+ "transformers_version": "4.21.0.dev0",
51
+ "use_cache": false,
52
  "vocab_size": 50257,
53
  "window_size": 256
54
  }
merges.txt CHANGED
@@ -1,4 +1,4 @@
1
- #version: 0.2
2
  Ġ ×
3
  × Ļ
4
  × ķ
1
+ #version: 0.2 - Trained by `huggingface/tokenizers`
2
  Ġ ×
3
  × Ļ
4
  × ķ
model.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72bca6fd44ee5d09b22ca008c3560c47da01a7cdc4f3432689d6aa7a6bfb0563
3
+ size 705644234
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01b6ae5616c4d5b7743359dc37a85430dc32ddadc45757cb8d2c899b2bed13a9
3
+ size 3231208
tokenizer_config.json CHANGED
@@ -1 +1,36 @@
1
- {"do_lower_case": false, "max_len": 1024, "bos_token": "<|startoftext|>", "eos_token": "<|endoftext|>", "unk_token": "<|endoftext|>", "special_tokens_map_file": "special_tokens_map.json", "full_tokenizer_file": null}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "bos_token": {
5
+ "__type": "AddedToken",
6
+ "content": "<|startoftext|>",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "do_lower_case": false,
13
+ "eos_token": {
14
+ "__type": "AddedToken",
15
+ "content": "<|endoftext|>",
16
+ "lstrip": false,
17
+ "normalized": true,
18
+ "rstrip": false,
19
+ "single_word": false
20
+ },
21
+ "errors": "replace",
22
+ "full_tokenizer_file": null,
23
+ "max_len": 1024,
24
+ "name_or_path": "./hebrew-gpt_neo-small",
25
+ "pad_token": null,
26
+ "special_tokens_map_file": "special_tokens_map.json",
27
+ "tokenizer_class": "GPT2Tokenizer",
28
+ "unk_token": {
29
+ "__type": "AddedToken",
30
+ "content": "<|endoftext|>",
31
+ "lstrip": false,
32
+ "normalized": true,
33
+ "rstrip": false,
34
+ "single_word": false
35
+ }
36
+ }
vocab.json CHANGED
The diff for this file is too large to render. See raw diff