burcucan commited on
Commit
c911e00
1 Parent(s): d4eb58e

Training in progress, step 5000

Browse files
config.json CHANGED
@@ -10,15 +10,15 @@
10
  "dropout": 0.1,
11
  "enable_bias": true,
12
  "eos_token_id": 2,
13
- "ffn_dim": 3072,
14
- "hidden_size": 768,
15
  "init_std": 0.02,
16
  "layer_norm_elementwise_affine": true,
17
  "layerdrop": 0.0,
18
- "max_position_embeddings": 2048,
19
  "model_type": "opt",
20
- "num_attention_heads": 6,
21
- "num_hidden_layers": 6,
22
  "pad_token_id": 1,
23
  "torch_dtype": "float32",
24
  "transformers_version": "4.39.3",
 
10
  "dropout": 0.1,
11
  "enable_bias": true,
12
  "eos_token_id": 2,
13
+ "ffn_dim": 576,
14
+ "hidden_size": 192,
15
  "init_std": 0.02,
16
  "layer_norm_elementwise_affine": true,
17
  "layerdrop": 0.0,
18
+ "max_position_embeddings": 256,
19
  "model_type": "opt",
20
+ "num_attention_heads": 12,
21
+ "num_hidden_layers": 1,
22
  "pad_token_id": 1,
23
  "torch_dtype": "float32",
24
  "transformers_version": "4.39.3",
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e159ac460590121bb9645dddaeb64ebaaa4db46038771c51850b91a56ec2902b
3
- size 330859680
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33d94eccbdfb672bb7ac906131e1e8010b79bd1758ae9e9f8a48d89b25f3f24d
3
+ size 157301184
runs/Apr10_11-09-49_905f6aa6fa62/events.out.tfevents.1712747390.905f6aa6fa62.2752.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb6f043bf31b81dd420ce620277393484a4247226f9746cc55a4e1b7639cedab
3
+ size 5006
special_tokens_map.json CHANGED
@@ -1,15 +1,6 @@
1
  {
2
- "bos_token": "<s>",
3
- "cls_token": "<s>",
4
- "eos_token": "</s>",
5
- "mask_token": {
6
- "content": "<mask>",
7
- "lstrip": true,
8
- "normalized": false,
9
- "rstrip": false,
10
- "single_word": false
11
- },
12
- "pad_token": "</s>",
13
- "sep_token": "</s>",
14
- "unk_token": "<unk>"
15
  }
 
1
  {
2
+ "bos_token": "<|endoftext|>",
3
+ "eos_token": "<|endoftext|>",
4
+ "pad_token": "<|endoftext|>",
5
+ "unk_token": "<|endoftext|>"
 
 
 
 
 
 
 
 
 
6
  }
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -1,57 +1,20 @@
1
  {
2
  "add_prefix_space": false,
3
  "added_tokens_decoder": {
4
- "0": {
5
- "content": "<s>",
6
  "lstrip": false,
7
  "normalized": true,
8
  "rstrip": false,
9
  "single_word": false,
10
  "special": true
11
- },
12
- "1": {
13
- "content": "<pad>",
14
- "lstrip": false,
15
- "normalized": true,
16
- "rstrip": false,
17
- "single_word": false,
18
- "special": true
19
- },
20
- "2": {
21
- "content": "</s>",
22
- "lstrip": false,
23
- "normalized": true,
24
- "rstrip": false,
25
- "single_word": false,
26
- "special": true
27
- },
28
- "3": {
29
- "content": "<unk>",
30
- "lstrip": false,
31
- "normalized": true,
32
- "rstrip": false,
33
- "single_word": false,
34
- "special": true
35
- },
36
- "50264": {
37
- "content": "<mask>",
38
- "lstrip": true,
39
- "normalized": false,
40
- "rstrip": false,
41
- "single_word": false,
42
- "special": true
43
  }
44
  },
45
- "bos_token": "<s>",
46
  "clean_up_tokenization_spaces": true,
47
- "cls_token": "<s>",
48
- "eos_token": "</s>",
49
- "errors": "replace",
50
- "mask_token": "<mask>",
51
- "model_max_length": 512,
52
- "pad_token": "</s>",
53
- "sep_token": "</s>",
54
- "tokenizer_class": "RobertaTokenizer",
55
- "trim_offsets": true,
56
- "unk_token": "<unk>"
57
  }
 
1
  {
2
  "add_prefix_space": false,
3
  "added_tokens_decoder": {
4
+ "50256": {
5
+ "content": "<|endoftext|>",
6
  "lstrip": false,
7
  "normalized": true,
8
  "rstrip": false,
9
  "single_word": false,
10
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  }
12
  },
13
+ "bos_token": "<|endoftext|>",
14
  "clean_up_tokenization_spaces": true,
15
+ "eos_token": "<|endoftext|>",
16
+ "model_max_length": 1024,
17
+ "pad_token": "<|endoftext|>",
18
+ "tokenizer_class": "GPT2Tokenizer",
19
+ "unk_token": "<|endoftext|>"
 
 
 
 
 
20
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3ce9064fe87f193a0577df2034056be9a6ad81fbbfe80907b999fe38d7261eb9
3
  size 4920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae36c3aa80ebe334e02b0d25f208d2843c1770f3e03db8bfab4419fa8f8225ad
3
  size 4920
vocab.json CHANGED
The diff for this file is too large to render. See raw diff