jcr987 commited on
Commit
a549069
1 Parent(s): 0c274c8

Training in progress, epoch 1

Browse files
added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "<unk>NOTUSED": 32005
3
+ }
config.json CHANGED
@@ -39,7 +39,7 @@
39
  "pad_token_id": 1,
40
  "position_embedding_type": "absolute",
41
  "torch_dtype": "float32",
42
- "transformers_version": "4.30.1",
43
  "type_vocab_size": 1,
44
  "use_cache": true,
45
  "vocab_size": 32005
 
39
  "pad_token_id": 1,
40
  "position_embedding_type": "absolute",
41
  "torch_dtype": "float32",
42
+ "transformers_version": "4.35.2",
43
  "type_vocab_size": 1,
44
  "use_cache": true,
45
  "vocab_size": 32005
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b4d426366567820cd23a0208fc26d7769de7bd94122a90846e1e0f4b229c086
3
+ size 440170892
runs/Feb05_18-53-41_e5b14e023a24/events.out.tfevents.1707159232.e5b14e023a24.150.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6a7895deccb2a1ee0be0078df2e58877192536e98c86e33e58ccdbd2adaafd6
3
+ size 5800
special_tokens_map.json CHANGED
@@ -1,7 +1,8 @@
1
  {
2
  "additional_special_tokens": [
3
  "<s>NOTUSED",
4
- "</s>NOTUSED"
 
5
  ],
6
  "bos_token": "<s>",
7
  "cls_token": "<s>",
 
1
  {
2
  "additional_special_tokens": [
3
  "<s>NOTUSED",
4
+ "</s>NOTUSED",
5
+ "<unk>NOTUSED"
6
  ],
7
  "bos_token": "<s>",
8
  "cls_token": "<s>",
tokenizer.json CHANGED
@@ -70,6 +70,15 @@
70
  "rstrip": false,
71
  "normalized": false,
72
  "special": true
 
 
 
 
 
 
 
 
 
73
  }
74
  ],
75
  "normalizer": {
@@ -85,7 +94,8 @@
85
  {
86
  "type": "Metaspace",
87
  "replacement": "▁",
88
- "add_prefix_space": true
 
89
  }
90
  ]
91
  },
@@ -173,7 +183,8 @@
173
  "decoder": {
174
  "type": "Metaspace",
175
  "replacement": "▁",
176
- "add_prefix_space": true
 
177
  },
178
  "model": {
179
  "type": "Unigram",
@@ -128199,6 +128210,7 @@
128199
  "<mask>",
128200
  0.0
128201
  ]
128202
- ]
 
128203
  }
128204
  }
 
70
  "rstrip": false,
71
  "normalized": false,
72
  "special": true
73
+ },
74
+ {
75
+ "id": 32005,
76
+ "content": "<unk>NOTUSED",
77
+ "single_word": false,
78
+ "lstrip": false,
79
+ "rstrip": false,
80
+ "normalized": false,
81
+ "special": true
82
  }
83
  ],
84
  "normalizer": {
 
94
  {
95
  "type": "Metaspace",
96
  "replacement": "▁",
97
+ "add_prefix_space": true,
98
+ "prepend_scheme": "always"
99
  }
100
  ]
101
  },
 
183
  "decoder": {
184
  "type": "Metaspace",
185
  "replacement": "▁",
186
+ "add_prefix_space": true,
187
+ "prepend_scheme": "always"
188
  },
189
  "model": {
190
  "type": "Unigram",
 
128210
  "<mask>",
128211
  0.0
128212
  ]
128213
+ ],
128214
+ "byte_fallback": false
128215
  }
128216
  }
tokenizer_config.json CHANGED
@@ -1,20 +1,80 @@
1
  {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  "additional_special_tokens": [
3
  "<s>NOTUSED",
4
- "</s>NOTUSED"
 
5
  ],
6
  "bos_token": "<s>",
7
  "clean_up_tokenization_spaces": true,
8
  "cls_token": "<s>",
9
  "eos_token": "</s>",
10
- "mask_token": {
11
- "__type": "AddedToken",
12
- "content": "<mask>",
13
- "lstrip": true,
14
- "normalized": true,
15
- "rstrip": false,
16
- "single_word": false
17
- },
18
  "model_max_length": 512,
19
  "pad_token": "<pad>",
20
  "sep_token": "</s>",
 
1
  {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<s>NOTUSED",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<pad>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "</s>NOTUSED",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "4": {
28
+ "content": "<unk>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "5": {
36
+ "content": "<s>",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "6": {
44
+ "content": "</s>",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ },
51
+ "32004": {
52
+ "content": "<mask>",
53
+ "lstrip": true,
54
+ "normalized": false,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": true
58
+ },
59
+ "32005": {
60
+ "content": "<unk>NOTUSED",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": true
66
+ }
67
+ },
68
  "additional_special_tokens": [
69
  "<s>NOTUSED",
70
+ "</s>NOTUSED",
71
+ "<unk>NOTUSED"
72
  ],
73
  "bos_token": "<s>",
74
  "clean_up_tokenization_spaces": true,
75
  "cls_token": "<s>",
76
  "eos_token": "</s>",
77
+ "mask_token": "<mask>",
 
 
 
 
 
 
 
78
  "model_max_length": 512,
79
  "pad_token": "<pad>",
80
  "sep_token": "</s>",
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8196d9f2c24479d51031a13543fc3761555c19086170186f0334034d69dcb864
3
- size 3963
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f232da538f8c8e4f9571d4d8d611472e14373800095a99a7715587755ae012f
3
+ size 4600