Konstantinos commited on
Commit
08a93ef
1 Parent(s): f05f345

Checkpoint Zero

Browse files
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:77cbf57033130bda092530405d71fbc3d2f37e7eb58fa3c7adb0501a31004315
3
  size 2596329174
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:971e9e284c0b8ec38078ba365579b4b4dc0e507c605390d02fc44ac1bdff86e6
3
  size 2596329174
special_tokens_map.json CHANGED
@@ -1,5 +1,23 @@
1
  {
2
- "bos_token": "<|endoftext|>",
3
- "eos_token": "<|endoftext|>",
4
- "unk_token": "<|endoftext|>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  }
 
1
  {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "unk_token": {
17
+ "content": "<unk>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ }
23
  }
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:abd5ba2b312cd47f8bcc2b183d792c474210770920f676ca26f121a67a4a25bc
3
+ size 485442
tokenizer_config.json CHANGED
@@ -1,33 +1,40 @@
1
  {
2
- "add_prefix_space": false,
3
- "bos_token": {
4
- "__type": "AddedToken",
5
- "content": "<|endoftext|>",
6
- "lstrip": false,
7
- "normalized": true,
8
- "rstrip": false,
9
- "single_word": false
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  },
11
- "clean_up_tokenization_spaces": true,
12
- "eos_token": {
13
- "__type": "AddedToken",
14
- "content": "<|endoftext|>",
15
- "lstrip": false,
16
- "normalized": true,
17
- "rstrip": false,
18
- "single_word": false
19
- },
20
- "errors": "replace",
21
- "max_len": 1024,
22
- "model_max_length": 1024,
23
  "pad_token": null,
 
 
24
  "tokenizer_class": "LlamaTokenizer",
25
- "unk_token": {
26
- "__type": "AddedToken",
27
- "content": "<|endoftext|>",
28
- "lstrip": false,
29
- "normalized": true,
30
- "rstrip": false,
31
- "single_word": false
32
- }
33
  }
 
1
  {
2
+ "add_prefix_space": true,
3
+ "added_tokens_decoder": {
4
+ "0": {
5
+ "content": "<unk>",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "1": {
13
+ "content": "<s>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "3": {
21
+ "content": "</s>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ }
28
  },
29
+ "bos_token": "<s>",
30
+ "clean_up_tokenization_spaces": false,
31
+ "eos_token": "</s>",
32
+ "legacy": true,
33
+ "model_max_length": 1000000000000000019884624838656,
 
 
 
 
 
 
 
34
  "pad_token": null,
35
+ "sp_model_kwargs": {},
36
+ "spaces_between_special_tokens": false,
37
  "tokenizer_class": "LlamaTokenizer",
38
+ "unk_token": "<unk>",
39
+ "use_default_system_prompt": false
 
 
 
 
 
 
40
  }
training_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "global_step": 4000,
3
  "update_step": 500,
4
- "tokens_seen": 95632015,
5
- "tokens_seen_before": 95441505,
6
- "update_time": 63.00493049621582
7
  }
 
1
  {
2
  "global_step": 4000,
3
  "update_step": 500,
4
+ "tokens_seen": 92375840,
5
+ "tokens_seen_before": 92190623,
6
+ "update_time": 63.27517485618591
7
  }