sahithyaravi commited on
Commit
570ae82
1 Parent(s): ddb6942

Upload tokenizer

Browse files
added_tokens.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "<TGT>": 50317,
3
+ "AtLocation": 50265,
4
+ "CapableOf": 50266,
5
+ "Causes": 50267,
6
+ "CausesDesire": 50268,
7
+ "CreatedBy": 50269,
8
+ "DefinedAs": 50270,
9
+ "DesireOf": 50271,
10
+ "Desires": 50272,
11
+ "HasA": 50273,
12
+ "HasFirstSubevent": 50274,
13
+ "HasLastSubevent": 50275,
14
+ "HasPainCharacter": 50276,
15
+ "HasPainIntensity": 50277,
16
+ "HasPrerequisite": 50278,
17
+ "HasProperty": 50279,
18
+ "HasSubEvent": 50280,
19
+ "HasSubevent": 50281,
20
+ "HinderedBy": 50282,
21
+ "InheritsFrom": 50283,
22
+ "InstanceOf": 50284,
23
+ "IsA": 50285,
24
+ "LocatedNear": 50286,
25
+ "LocationOfAction": 50287,
26
+ "MadeOf": 50288,
27
+ "MadeUpOf": 50289,
28
+ "MotivatedByGoal": 50290,
29
+ "NotCapableOf": 50291,
30
+ "NotDesires": 50292,
31
+ "NotHasA": 50293,
32
+ "NotHasProperty": 50294,
33
+ "NotIsA": 50295,
34
+ "NotMadeOf": 50296,
35
+ "ObjectUse": 50297,
36
+ "PartOf": 50298,
37
+ "ReceivesAction": 50299,
38
+ "RelatedTo": 50300,
39
+ "SymbolOf": 50301,
40
+ "UsedFor": 50302,
41
+ "[GEN]": 50316,
42
+ "isAfter": 50303,
43
+ "isBefore": 50304,
44
+ "isFilledBy": 50305,
45
+ "oEffect": 50306,
46
+ "oReact": 50307,
47
+ "oWant": 50308,
48
+ "xAttr": 50309,
49
+ "xEffect": 50310,
50
+ "xIntent": 50311,
51
+ "xNeed": 50312,
52
+ "xReact": 50313,
53
+ "xReason": 50314,
54
+ "xWant": 50315
55
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
special_tokens_map.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "<s>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": true,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "<mask>",
25
+ "lstrip": true,
26
+ "normalized": true,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": {
31
+ "content": "<pad>",
32
+ "lstrip": false,
33
+ "normalized": true,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "</s>",
39
+ "lstrip": false,
40
+ "normalized": true,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "<unk>",
46
+ "lstrip": false,
47
+ "normalized": true,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
51
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "bos_token": {
4
+ "__type": "AddedToken",
5
+ "content": "<s>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false
10
+ },
11
+ "clean_up_tokenization_spaces": true,
12
+ "cls_token": {
13
+ "__type": "AddedToken",
14
+ "content": "<s>",
15
+ "lstrip": false,
16
+ "normalized": true,
17
+ "rstrip": false,
18
+ "single_word": false
19
+ },
20
+ "eos_token": {
21
+ "__type": "AddedToken",
22
+ "content": "</s>",
23
+ "lstrip": false,
24
+ "normalized": true,
25
+ "rstrip": false,
26
+ "single_word": false
27
+ },
28
+ "errors": "replace",
29
+ "mask_token": {
30
+ "__type": "AddedToken",
31
+ "content": "<mask>",
32
+ "lstrip": true,
33
+ "normalized": true,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "model_max_length": 1024,
38
+ "pad_token": {
39
+ "__type": "AddedToken",
40
+ "content": "<pad>",
41
+ "lstrip": false,
42
+ "normalized": true,
43
+ "rstrip": false,
44
+ "single_word": false
45
+ },
46
+ "sep_token": {
47
+ "__type": "AddedToken",
48
+ "content": "</s>",
49
+ "lstrip": false,
50
+ "normalized": true,
51
+ "rstrip": false,
52
+ "single_word": false
53
+ },
54
+ "tokenizer_class": "BartTokenizer",
55
+ "trim_offsets": true,
56
+ "unk_token": {
57
+ "__type": "AddedToken",
58
+ "content": "<unk>",
59
+ "lstrip": false,
60
+ "normalized": true,
61
+ "rstrip": false,
62
+ "single_word": false
63
+ }
64
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff