Gopal2002 commited on
Commit
562d1da
1 Parent(s): 3af556e

Upload processor

Browse files
added_tokens.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
- "</s_answer>": 57530,
3
- "</s_question>": 57528,
4
- "<no/>": 57526,
5
- "<s_answer>": 57529,
6
- "<s_docvqa>": 57531,
7
- "<s_iitcdip>": 57523,
8
- "<s_question>": 57527,
9
- "<s_synthdog>": 57524,
10
  "<sep/>": 57522,
11
- "<yes/>": 57525
12
  }
 
1
  {
2
+ "</s_answer>": 57523,
3
+ "</s_question>": 57524,
4
+ "<no/>": 57525,
5
+ "<s_answer>": 57526,
6
+ "<s_docvqa>": 57527,
7
+ "<s_iitcdip>": 57528,
8
+ "<s_question>": 57529,
9
+ "<s_synthdog>": 57530,
10
  "<sep/>": 57522,
11
+ "<yes/>": 57531
12
  }
special_tokens_map.json CHANGED
@@ -1,7 +1,14 @@
1
  {
2
  "additional_special_tokens": [
 
 
 
 
 
3
  "<s_iitcdip>",
4
- "<s_synthdog>"
 
 
5
  ],
6
  "bos_token": {
7
  "content": "<s>",
 
1
  {
2
  "additional_special_tokens": [
3
+ "</s_answer>",
4
+ "</s_question>",
5
+ "<no/>",
6
+ "<s_answer>",
7
+ "<s_docvqa>",
8
  "<s_iitcdip>",
9
+ "<s_question>",
10
+ "<s_synthdog>",
11
+ "<yes/>"
12
  ],
13
  "bos_token": {
14
  "content": "<s>",
tokenizer.json CHANGED
@@ -73,25 +73,25 @@
73
  },
74
  {
75
  "id": 57523,
76
- "content": "<s_iitcdip>",
77
  "single_word": false,
78
  "lstrip": false,
79
  "rstrip": false,
80
- "normalized": false,
81
- "special": true
82
  },
83
  {
84
  "id": 57524,
85
- "content": "<s_synthdog>",
86
  "single_word": false,
87
  "lstrip": false,
88
  "rstrip": false,
89
- "normalized": false,
90
- "special": true
91
  },
92
  {
93
  "id": 57525,
94
- "content": "<yes/>",
95
  "single_word": false,
96
  "lstrip": false,
97
  "rstrip": false,
@@ -100,7 +100,7 @@
100
  },
101
  {
102
  "id": 57526,
103
- "content": "<no/>",
104
  "single_word": false,
105
  "lstrip": false,
106
  "rstrip": false,
@@ -109,7 +109,7 @@
109
  },
110
  {
111
  "id": 57527,
112
- "content": "<s_question>",
113
  "single_word": false,
114
  "lstrip": false,
115
  "rstrip": false,
@@ -118,16 +118,16 @@
118
  },
119
  {
120
  "id": 57528,
121
- "content": "</s_question>",
122
  "single_word": false,
123
  "lstrip": false,
124
  "rstrip": false,
125
- "normalized": true,
126
- "special": false
127
  },
128
  {
129
  "id": 57529,
130
- "content": "<s_answer>",
131
  "single_word": false,
132
  "lstrip": false,
133
  "rstrip": false,
@@ -136,16 +136,16 @@
136
  },
137
  {
138
  "id": 57530,
139
- "content": "</s_answer>",
140
  "single_word": false,
141
  "lstrip": false,
142
  "rstrip": false,
143
- "normalized": true,
144
- "special": false
145
  },
146
  {
147
  "id": 57531,
148
- "content": "<s_docvqa>",
149
  "single_word": false,
150
  "lstrip": false,
151
  "rstrip": false,
 
73
  },
74
  {
75
  "id": 57523,
76
+ "content": "</s_answer>",
77
  "single_word": false,
78
  "lstrip": false,
79
  "rstrip": false,
80
+ "normalized": true,
81
+ "special": false
82
  },
83
  {
84
  "id": 57524,
85
+ "content": "</s_question>",
86
  "single_word": false,
87
  "lstrip": false,
88
  "rstrip": false,
89
+ "normalized": true,
90
+ "special": false
91
  },
92
  {
93
  "id": 57525,
94
+ "content": "<no/>",
95
  "single_word": false,
96
  "lstrip": false,
97
  "rstrip": false,
 
100
  },
101
  {
102
  "id": 57526,
103
+ "content": "<s_answer>",
104
  "single_word": false,
105
  "lstrip": false,
106
  "rstrip": false,
 
109
  },
110
  {
111
  "id": 57527,
112
+ "content": "<s_docvqa>",
113
  "single_word": false,
114
  "lstrip": false,
115
  "rstrip": false,
 
118
  },
119
  {
120
  "id": 57528,
121
+ "content": "<s_iitcdip>",
122
  "single_word": false,
123
  "lstrip": false,
124
  "rstrip": false,
125
+ "normalized": false,
126
+ "special": true
127
  },
128
  {
129
  "id": 57529,
130
+ "content": "<s_question>",
131
  "single_word": false,
132
  "lstrip": false,
133
  "rstrip": false,
 
136
  },
137
  {
138
  "id": 57530,
139
+ "content": "<s_synthdog>",
140
  "single_word": false,
141
  "lstrip": false,
142
  "rstrip": false,
143
+ "normalized": false,
144
+ "special": true
145
  },
146
  {
147
  "id": 57531,
148
+ "content": "<yes/>",
149
  "single_word": false,
150
  "lstrip": false,
151
  "rstrip": false,
tokenizer_config.json CHANGED
@@ -49,23 +49,23 @@
49
  "special": false
50
  },
51
  "57523": {
52
- "content": "<s_iitcdip>",
53
  "lstrip": false,
54
- "normalized": false,
55
  "rstrip": false,
56
  "single_word": false,
57
- "special": true
58
  },
59
  "57524": {
60
- "content": "<s_synthdog>",
61
  "lstrip": false,
62
- "normalized": false,
63
  "rstrip": false,
64
  "single_word": false,
65
- "special": true
66
  },
67
  "57525": {
68
- "content": "<yes/>",
69
  "lstrip": false,
70
  "normalized": true,
71
  "rstrip": false,
@@ -73,7 +73,7 @@
73
  "special": false
74
  },
75
  "57526": {
76
- "content": "<no/>",
77
  "lstrip": false,
78
  "normalized": true,
79
  "rstrip": false,
@@ -81,7 +81,7 @@
81
  "special": false
82
  },
83
  "57527": {
84
- "content": "<s_question>",
85
  "lstrip": false,
86
  "normalized": true,
87
  "rstrip": false,
@@ -89,15 +89,15 @@
89
  "special": false
90
  },
91
  "57528": {
92
- "content": "</s_question>",
93
  "lstrip": false,
94
- "normalized": true,
95
  "rstrip": false,
96
  "single_word": false,
97
- "special": false
98
  },
99
  "57529": {
100
- "content": "<s_answer>",
101
  "lstrip": false,
102
  "normalized": true,
103
  "rstrip": false,
@@ -105,15 +105,15 @@
105
  "special": false
106
  },
107
  "57530": {
108
- "content": "</s_answer>",
109
  "lstrip": false,
110
- "normalized": true,
111
  "rstrip": false,
112
  "single_word": false,
113
- "special": false
114
  },
115
  "57531": {
116
- "content": "<s_docvqa>",
117
  "lstrip": false,
118
  "normalized": true,
119
  "rstrip": false,
@@ -122,8 +122,15 @@
122
  }
123
  },
124
  "additional_special_tokens": [
 
 
 
 
 
125
  "<s_iitcdip>",
126
- "<s_synthdog>"
 
 
127
  ],
128
  "bos_token": "<s>",
129
  "clean_up_tokenization_spaces": true,
 
49
  "special": false
50
  },
51
  "57523": {
52
+ "content": "</s_answer>",
53
  "lstrip": false,
54
+ "normalized": true,
55
  "rstrip": false,
56
  "single_word": false,
57
+ "special": false
58
  },
59
  "57524": {
60
+ "content": "</s_question>",
61
  "lstrip": false,
62
+ "normalized": true,
63
  "rstrip": false,
64
  "single_word": false,
65
+ "special": false
66
  },
67
  "57525": {
68
+ "content": "<no/>",
69
  "lstrip": false,
70
  "normalized": true,
71
  "rstrip": false,
 
73
  "special": false
74
  },
75
  "57526": {
76
+ "content": "<s_answer>",
77
  "lstrip": false,
78
  "normalized": true,
79
  "rstrip": false,
 
81
  "special": false
82
  },
83
  "57527": {
84
+ "content": "<s_docvqa>",
85
  "lstrip": false,
86
  "normalized": true,
87
  "rstrip": false,
 
89
  "special": false
90
  },
91
  "57528": {
92
+ "content": "<s_iitcdip>",
93
  "lstrip": false,
94
+ "normalized": false,
95
  "rstrip": false,
96
  "single_word": false,
97
+ "special": true
98
  },
99
  "57529": {
100
+ "content": "<s_question>",
101
  "lstrip": false,
102
  "normalized": true,
103
  "rstrip": false,
 
105
  "special": false
106
  },
107
  "57530": {
108
+ "content": "<s_synthdog>",
109
  "lstrip": false,
110
+ "normalized": false,
111
  "rstrip": false,
112
  "single_word": false,
113
+ "special": true
114
  },
115
  "57531": {
116
+ "content": "<yes/>",
117
  "lstrip": false,
118
  "normalized": true,
119
  "rstrip": false,
 
122
  }
123
  },
124
  "additional_special_tokens": [
125
+ "</s_answer>",
126
+ "</s_question>",
127
+ "<no/>",
128
+ "<s_answer>",
129
+ "<s_docvqa>",
130
  "<s_iitcdip>",
131
+ "<s_question>",
132
+ "<s_synthdog>",
133
+ "<yes/>"
134
  ],
135
  "bos_token": "<s>",
136
  "clean_up_tokenization_spaces": true,