Chakita commited on
Commit
97ce588
1 Parent(s): 680cb49

Training in progress, step 500

Browse files
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ checkpoint-*/
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "checkpoint-13000",
3
  "architectures": [
4
  "RobertaForMaskedLM"
5
  ],
@@ -21,7 +21,7 @@
21
  "pad_token_id": 1,
22
  "position_embedding_type": "absolute",
23
  "torch_dtype": "float32",
24
- "transformers_version": "4.16.2",
25
  "type_vocab_size": 1,
26
  "use_cache": true,
27
  "vocab_size": 52000
 
1
  {
2
+ "_name_or_path": "Naveen-k/KanBERTo",
3
  "architectures": [
4
  "RobertaForMaskedLM"
5
  ],
 
21
  "pad_token_id": 1,
22
  "position_embedding_type": "absolute",
23
  "torch_dtype": "float32",
24
+ "transformers_version": "4.20.1",
25
  "type_vocab_size": 1,
26
  "use_cache": true,
27
  "vocab_size": 52000
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:497dff520215a153f9388a4efddd4a1ccb8eb21785c53aebd6b24f7bf5570e24
3
- size 334062635
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7c74feaac5bfb82ddd7c17a2f2ab3fc01131285b5b8ece62eb076d08a3cd866
3
+ size 334056939
runs/Jan04_16-33-48_a1156351ca8d/1672850140.2152631/events.out.tfevents.1672850140.a1156351ca8d.23.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28baef5313dba3865295c823132fa43eb6f9f2198bd464081154cbcef7d7d6ca
3
+ size 5309
runs/Jan04_16-33-48_a1156351ca8d/events.out.tfevents.1672850140.a1156351ca8d.23.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80ab2ca3e198be5e68406c50614d498296c9bc777dd8b096fa3a8337189a2f19
3
+ size 3793
special_tokens_map.json CHANGED
@@ -1 +1,51 @@
1
- {"bos_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "eos_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "unk_token": {"content": "<unk>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "sep_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "pad_token": {"content": "<pad>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "cls_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "<s>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": true,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "<mask>",
25
+ "lstrip": true,
26
+ "normalized": true,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": {
31
+ "content": "<pad>",
32
+ "lstrip": false,
33
+ "normalized": true,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "</s>",
39
+ "lstrip": false,
40
+ "normalized": true,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "<unk>",
46
+ "lstrip": false,
47
+ "normalized": true,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
51
+ }
tokenizer.json CHANGED
@@ -1,73 +1,60 @@
1
  {
2
  "version": "1.0",
3
- "truncation": {
4
- "direction": "Right",
5
- "max_length": 128,
6
- "strategy": "LongestFirst",
7
- "stride": 0
8
- },
9
- "padding": {
10
- "strategy": {
11
- "Fixed": 128
12
- },
13
- "direction": "Right",
14
- "pad_to_multiple_of": null,
15
- "pad_id": 1,
16
- "pad_type_id": 0,
17
- "pad_token": "<pad>"
18
- },
19
  "added_tokens": [
20
  {
21
  "id": 0,
22
- "special": true,
23
  "content": "<s>",
24
  "single_word": false,
25
  "lstrip": false,
26
  "rstrip": false,
27
- "normalized": true
 
28
  },
29
  {
30
  "id": 1,
31
- "special": true,
32
  "content": "<pad>",
33
  "single_word": false,
34
  "lstrip": false,
35
  "rstrip": false,
36
- "normalized": true
 
37
  },
38
  {
39
  "id": 2,
40
- "special": true,
41
  "content": "</s>",
42
  "single_word": false,
43
  "lstrip": false,
44
  "rstrip": false,
45
- "normalized": true
 
46
  },
47
  {
48
  "id": 3,
49
- "special": true,
50
  "content": "<unk>",
51
  "single_word": false,
52
  "lstrip": false,
53
  "rstrip": false,
54
- "normalized": true
 
55
  },
56
  {
57
  "id": 4,
58
- "special": true,
59
  "content": "<mask>",
60
  "single_word": false,
61
  "lstrip": true,
62
  "rstrip": false,
63
- "normalized": true
 
64
  }
65
  ],
66
  "normalizer": null,
67
  "pre_tokenizer": {
68
  "type": "ByteLevel",
69
  "add_prefix_space": false,
70
- "trim_offsets": true
 
71
  },
72
  "post_processor": {
73
  "type": "RobertaProcessing",
@@ -85,7 +72,8 @@
85
  "decoder": {
86
  "type": "ByteLevel",
87
  "add_prefix_space": true,
88
- "trim_offsets": true
 
89
  },
90
  "model": {
91
  "type": "BPE",
 
1
  {
2
  "version": "1.0",
3
+ "truncation": null,
4
+ "padding": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  "added_tokens": [
6
  {
7
  "id": 0,
 
8
  "content": "<s>",
9
  "single_word": false,
10
  "lstrip": false,
11
  "rstrip": false,
12
+ "normalized": true,
13
+ "special": true
14
  },
15
  {
16
  "id": 1,
 
17
  "content": "<pad>",
18
  "single_word": false,
19
  "lstrip": false,
20
  "rstrip": false,
21
+ "normalized": true,
22
+ "special": true
23
  },
24
  {
25
  "id": 2,
 
26
  "content": "</s>",
27
  "single_word": false,
28
  "lstrip": false,
29
  "rstrip": false,
30
+ "normalized": true,
31
+ "special": true
32
  },
33
  {
34
  "id": 3,
 
35
  "content": "<unk>",
36
  "single_word": false,
37
  "lstrip": false,
38
  "rstrip": false,
39
+ "normalized": true,
40
+ "special": true
41
  },
42
  {
43
  "id": 4,
 
44
  "content": "<mask>",
45
  "single_word": false,
46
  "lstrip": true,
47
  "rstrip": false,
48
+ "normalized": true,
49
+ "special": true
50
  }
51
  ],
52
  "normalizer": null,
53
  "pre_tokenizer": {
54
  "type": "ByteLevel",
55
  "add_prefix_space": false,
56
+ "trim_offsets": true,
57
+ "use_regex": true
58
  },
59
  "post_processor": {
60
  "type": "RobertaProcessing",
 
72
  "decoder": {
73
  "type": "ByteLevel",
74
  "add_prefix_space": true,
75
+ "trim_offsets": true,
76
+ "use_regex": true
77
  },
78
  "model": {
79
  "type": "BPE",
tokenizer_config.json CHANGED
@@ -1 +1,64 @@
1
- {"unk_token": {"content": "<unk>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "bos_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "eos_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "add_prefix_space": false, "errors": "replace", "sep_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "cls_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "pad_token": {"content": "<pad>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "trim_offsets": true, "special_tokens_map_file": null, "name_or_path": "checkpoint-13000", "tokenizer_class": "RobertaTokenizer"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "bos_token": {
4
+ "__type": "AddedToken",
5
+ "content": "<s>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false
10
+ },
11
+ "cls_token": {
12
+ "__type": "AddedToken",
13
+ "content": "<s>",
14
+ "lstrip": false,
15
+ "normalized": true,
16
+ "rstrip": false,
17
+ "single_word": false
18
+ },
19
+ "eos_token": {
20
+ "__type": "AddedToken",
21
+ "content": "</s>",
22
+ "lstrip": false,
23
+ "normalized": true,
24
+ "rstrip": false,
25
+ "single_word": false
26
+ },
27
+ "errors": "replace",
28
+ "mask_token": {
29
+ "__type": "AddedToken",
30
+ "content": "<mask>",
31
+ "lstrip": true,
32
+ "normalized": true,
33
+ "rstrip": false,
34
+ "single_word": false
35
+ },
36
+ "name_or_path": "Naveen-k/KanBERTo",
37
+ "pad_token": {
38
+ "__type": "AddedToken",
39
+ "content": "<pad>",
40
+ "lstrip": false,
41
+ "normalized": true,
42
+ "rstrip": false,
43
+ "single_word": false
44
+ },
45
+ "sep_token": {
46
+ "__type": "AddedToken",
47
+ "content": "</s>",
48
+ "lstrip": false,
49
+ "normalized": true,
50
+ "rstrip": false,
51
+ "single_word": false
52
+ },
53
+ "special_tokens_map_file": null,
54
+ "tokenizer_class": "RobertaTokenizer",
55
+ "trim_offsets": true,
56
+ "unk_token": {
57
+ "__type": "AddedToken",
58
+ "content": "<unk>",
59
+ "lstrip": false,
60
+ "normalized": true,
61
+ "rstrip": false,
62
+ "single_word": false
63
+ }
64
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76f20e82c567565e0a829c061d9a899e49656ee75fc5ec37dc66e2da44dc7fd7
3
+ size 3311