NourMohamed91 commited on
Commit
1d8e286
1 Parent(s): 2ef1e57

Training in progress epoch 0

Browse files
Files changed (6) hide show
  1. README.md +6 -15
  2. config.json +8 -1
  3. tf_model.h5 +1 -1
  4. tokenizer.json +0 -0
  5. tokenizer_config.json +113 -105
  6. vocab.txt +0 -0
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- base_model: aubmindlab/bert-base-arabertv2
3
  tags:
4
  - generated_from_keras_callback
5
  model-index:
@@ -12,11 +12,11 @@ probably proofread and complete it, then remove this comment. -->
12
 
13
  # NourMohamed91/v_model
14
 
15
- This model is a fine-tuned version of [aubmindlab/bert-base-arabertv2](https://huggingface.co/aubmindlab/bert-base-arabertv2) on an unknown dataset.
16
  It achieves the following results on the evaluation set:
17
- - Train Loss: 0.0860
18
- - Validation Loss: 0.0561
19
- - Epoch: 9
20
 
21
  ## Model description
22
 
@@ -42,16 +42,7 @@ The following hyperparameters were used during training:
42
 
43
  | Train Loss | Validation Loss | Epoch |
44
  |:----------:|:---------------:|:-----:|
45
- | 4.4674 | 2.8440 | 0 |
46
- | 1.7610 | 0.5503 | 1 |
47
- | 0.4084 | 0.1634 | 2 |
48
- | 0.2088 | 0.1181 | 3 |
49
- | 0.1409 | 0.0843 | 4 |
50
- | 0.1130 | 0.0592 | 5 |
51
- | 0.1097 | 0.0590 | 6 |
52
- | 0.0911 | 0.0517 | 7 |
53
- | 0.0896 | 0.0568 | 8 |
54
- | 0.0860 | 0.0561 | 9 |
55
 
56
 
57
  ### Framework versions
 
1
  ---
2
+ base_model: gp-tar4/QA_FineTuned_Arabert
3
  tags:
4
  - generated_from_keras_callback
5
  model-index:
 
12
 
13
  # NourMohamed91/v_model
14
 
15
+ This model is a fine-tuned version of [gp-tar4/QA_FineTuned_Arabert](https://huggingface.co/gp-tar4/QA_FineTuned_Arabert) on an unknown dataset.
16
  It achieves the following results on the evaluation set:
17
+ - Train Loss: 1.9933
18
+ - Validation Loss: 0.2847
19
+ - Epoch: 0
20
 
21
  ## Model description
22
 
 
42
 
43
  | Train Loss | Validation Loss | Epoch |
44
  |:----------:|:---------------:|:-----:|
45
+ | 1.9933 | 0.2847 | 0 |
 
 
 
 
 
 
 
 
 
46
 
47
 
48
  ### Framework versions
config.json CHANGED
@@ -1,10 +1,11 @@
1
  {
2
- "_name_or_path": "aubmindlab/bert-base-arabertv2",
3
  "architectures": [
4
  "BertForQuestionAnswering"
5
  ],
6
  "attention_probs_dropout_prob": 0.1,
7
  "classifier_dropout": null,
 
8
  "hidden_act": "gelu",
9
  "hidden_dropout_prob": 0.1,
10
  "hidden_size": 768,
@@ -16,7 +17,13 @@
16
  "num_attention_heads": 12,
17
  "num_hidden_layers": 12,
18
  "pad_token_id": 0,
 
 
 
 
 
19
  "position_embedding_type": "absolute",
 
20
  "transformers_version": "4.41.0",
21
  "type_vocab_size": 2,
22
  "use_cache": true,
 
1
  {
2
+ "_name_or_path": "gp-tar4/QA_FineTuned_Arabert",
3
  "architectures": [
4
  "BertForQuestionAnswering"
5
  ],
6
  "attention_probs_dropout_prob": 0.1,
7
  "classifier_dropout": null,
8
+ "directionality": "bidi",
9
  "hidden_act": "gelu",
10
  "hidden_dropout_prob": 0.1,
11
  "hidden_size": 768,
 
17
  "num_attention_heads": 12,
18
  "num_hidden_layers": 12,
19
  "pad_token_id": 0,
20
+ "pooler_fc_size": 768,
21
+ "pooler_num_attention_heads": 12,
22
+ "pooler_num_fc_layers": 3,
23
+ "pooler_size_per_head": 128,
24
+ "pooler_type": "first_token_transform",
25
  "position_embedding_type": "absolute",
26
+ "torch_dtype": "float32",
27
  "transformers_version": "4.41.0",
28
  "type_vocab_size": 2,
29
  "use_cache": true,
tf_model.h5 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7307bfd4ed3e7a83e2a870154c95b62816d09bebca1c107bd88935dd6e5f45f3
3
  size 538686416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37749a0d36fb7ec773accf8788945d92009a230583f94f083e53982fdb46abc0
3
  size 538686416
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "added_tokens_decoder": {
3
- "0": {
4
  "content": "+ا",
5
  "lstrip": false,
6
  "normalized": true,
@@ -8,284 +8,284 @@
8
  "single_word": true,
9
  "special": true
10
  },
11
- "1": {
12
- "content": "",
13
  "lstrip": false,
14
  "normalized": true,
15
  "rstrip": false,
16
  "single_word": true,
17
  "special": true
18
  },
19
- "2": {
20
- "content": "ب+",
21
  "lstrip": false,
22
  "normalized": true,
23
  "rstrip": false,
24
  "single_word": true,
25
  "special": true
26
  },
27
- "3": {
28
- "content": "+هم",
29
  "lstrip": false,
30
  "normalized": true,
31
  "rstrip": false,
32
  "single_word": true,
33
  "special": true
34
  },
35
- "4": {
36
- "content": "+ات",
37
  "lstrip": false,
38
  "normalized": true,
39
  "rstrip": false,
40
  "single_word": true,
41
  "special": true
42
  },
43
- "5": {
44
- "content": "",
45
  "lstrip": false,
46
  "normalized": true,
47
  "rstrip": false,
48
  "single_word": true,
49
  "special": true
50
  },
51
- "6": {
52
- "content": "ل+",
53
  "lstrip": false,
54
  "normalized": true,
55
  "rstrip": false,
56
  "single_word": true,
57
  "special": true
58
  },
59
- "7": {
60
- "content": "+هما",
61
  "lstrip": false,
62
  "normalized": true,
63
  "rstrip": false,
64
  "single_word": true,
65
  "special": true
66
  },
67
- "8": {
68
- "content": "+نا",
69
  "lstrip": false,
70
  "normalized": true,
71
  "rstrip": false,
72
  "single_word": true,
73
  "special": true
74
  },
75
- "9": {
76
- "content": "",
77
  "lstrip": false,
78
  "normalized": true,
79
  "rstrip": false,
80
  "single_word": true,
81
  "special": true
82
  },
83
- "10": {
84
- "content": "+ها",
85
  "lstrip": false,
86
  "normalized": true,
87
  "rstrip": false,
88
  "single_word": true,
89
  "special": true
90
  },
91
- "11": {
92
- "content": "+كما",
93
  "lstrip": false,
94
  "normalized": true,
95
  "rstrip": false,
96
  "single_word": true,
97
  "special": true
98
  },
99
- "12": {
100
- "content": "",
101
  "lstrip": false,
102
  "normalized": true,
103
  "rstrip": false,
104
  "single_word": true,
105
  "special": true
106
  },
107
- "13": {
108
- "content": "ف+",
109
  "lstrip": false,
110
  "normalized": true,
111
  "rstrip": false,
112
  "single_word": true,
113
  "special": true
114
  },
115
- "14": {
116
- "content": "+كم",
117
  "lstrip": false,
118
  "normalized": true,
119
  "rstrip": false,
120
  "single_word": true,
121
  "special": true
122
  },
123
- "15": {
124
- "content": "+كن",
125
  "lstrip": false,
126
  "normalized": true,
127
  "rstrip": false,
128
  "single_word": true,
129
  "special": true
130
  },
131
- "16": {
132
- "content": "",
133
  "lstrip": false,
134
  "normalized": true,
135
  "rstrip": false,
136
  "single_word": true,
137
  "special": true
138
  },
139
- "17": {
140
- "content": "[بريد]",
141
  "lstrip": false,
142
  "normalized": true,
143
  "rstrip": false,
144
  "single_word": true,
145
  "special": true
146
  },
147
- "18": {
148
- "content": "[مستخدم]",
149
  "lstrip": false,
150
  "normalized": true,
151
  "rstrip": false,
152
  "single_word": true,
153
  "special": true
154
  },
155
- "19": {
156
- "content": "لل+",
157
  "lstrip": false,
158
  "normalized": true,
159
  "rstrip": false,
160
  "single_word": true,
161
  "special": true
162
  },
163
- "20": {
164
- "content": "ال+",
165
  "lstrip": false,
166
  "normalized": true,
167
  "rstrip": false,
168
  "single_word": true,
169
  "special": true
170
  },
171
- "21": {
172
- "content": "[رابط]",
173
  "lstrip": false,
174
  "normalized": true,
175
  "rstrip": false,
176
  "single_word": true,
177
  "special": true
178
  },
179
- "22": {
180
- "content": "س+",
181
  "lstrip": false,
182
  "normalized": true,
183
  "rstrip": false,
184
  "single_word": true,
185
  "special": true
186
  },
187
- "23": {
188
- "content": "+ان",
189
  "lstrip": false,
190
  "normalized": true,
191
  "rstrip": false,
192
  "single_word": true,
193
  "special": true
194
  },
195
- "24": {
196
- "content": "+وا",
197
  "lstrip": false,
198
  "normalized": true,
199
  "rstrip": false,
200
  "single_word": true,
201
  "special": true
202
  },
203
- "25": {
204
- "content": "",
205
  "lstrip": false,
206
  "normalized": true,
207
  "rstrip": false,
208
  "single_word": true,
209
  "special": true
210
  },
211
- "26": {
212
- "content": "+ون",
213
  "lstrip": false,
214
  "normalized": true,
215
  "rstrip": false,
216
  "single_word": true,
217
  "special": true
218
  },
219
- "27": {
220
- "content": "+هن",
221
  "lstrip": false,
222
  "normalized": true,
223
  "rstrip": false,
224
  "single_word": true,
225
  "special": true
226
  },
227
- "28": {
228
- "content": "+ين",
229
  "lstrip": false,
230
- "normalized": true,
231
  "rstrip": false,
232
- "single_word": true,
233
  "special": true
234
  },
235
- "29": {
236
- "content": "و+",
237
  "lstrip": false,
238
- "normalized": true,
239
  "rstrip": false,
240
- "single_word": true,
241
  "special": true
242
  },
243
- "30": {
244
- "content": "ك+",
245
  "lstrip": false,
246
- "normalized": true,
247
  "rstrip": false,
248
- "single_word": true,
249
  "special": true
250
  },
251
- "31": {
252
- "content": "[PAD]",
253
  "lstrip": false,
254
  "normalized": false,
255
  "rstrip": false,
256
  "single_word": false,
257
  "special": true
258
  },
259
- "32": {
260
- "content": "[UNK]",
261
  "lstrip": false,
262
  "normalized": false,
263
  "rstrip": false,
264
  "single_word": false,
265
  "special": true
266
  },
267
- "33": {
268
- "content": "[CLS]",
269
  "lstrip": false,
270
- "normalized": false,
271
  "rstrip": false,
272
- "single_word": false,
273
  "special": true
274
  },
275
- "34": {
276
- "content": "[SEP]",
277
  "lstrip": false,
278
- "normalized": false,
279
  "rstrip": false,
280
- "single_word": false,
281
  "special": true
282
  },
283
- "35": {
284
- "content": "[MASK]",
285
  "lstrip": false,
286
- "normalized": false,
287
  "rstrip": false,
288
- "single_word": false,
289
  "special": true
290
  }
291
  },
@@ -293,46 +293,54 @@
293
  "cls_token": "[CLS]",
294
  "do_basic_tokenize": true,
295
  "do_lower_case": false,
 
296
  "mask_token": "[MASK]",
297
  "max_len": 512,
 
298
  "model_max_length": 512,
299
  "never_split": [
300
- "+ك",
301
- "+كما",
302
- "ك+",
303
  "+وا",
304
- "+ين",
305
- "و+",
306
- "+كن",
307
- "+ان",
308
  "+هم",
309
  "+ة",
310
- "[بريد]",
311
- "لل+",
312
- "+ي",
313
- "+ت",
314
  "+ن",
315
- "س+",
316
- "ل+",
317
- "[مستخدم]",
318
- "+كم",
 
319
  "+ا",
 
 
320
  "ب+",
321
- "ف+",
322
  "+نا",
323
- "+ها",
324
- "+ون",
 
 
325
  "+هما",
 
 
 
 
 
 
 
326
  "ال+",
327
- "",
328
- "+هن",
329
- "+ات",
330
- "[رابط]"
331
  ],
 
332
  "pad_token": "[PAD]",
 
 
333
  "sep_token": "[SEP]",
 
334
  "strip_accents": null,
335
  "tokenize_chinese_chars": true,
336
  "tokenizer_class": "BertTokenizer",
 
 
337
  "unk_token": "[UNK]"
338
  }
 
1
  {
2
  "added_tokens_decoder": {
3
+ "124": {
4
  "content": "+ا",
5
  "lstrip": false,
6
  "normalized": true,
 
8
  "single_word": true,
9
  "special": true
10
  },
11
+ "125": {
12
+ "content": "",
13
  "lstrip": false,
14
  "normalized": true,
15
  "rstrip": false,
16
  "single_word": true,
17
  "special": true
18
  },
19
+ "126": {
20
+ "content": "",
21
  "lstrip": false,
22
  "normalized": true,
23
  "rstrip": false,
24
  "single_word": true,
25
  "special": true
26
  },
27
+ "127": {
28
+ "content": "",
29
  "lstrip": false,
30
  "normalized": true,
31
  "rstrip": false,
32
  "single_word": true,
33
  "special": true
34
  },
35
+ "128": {
36
+ "content": "",
37
  "lstrip": false,
38
  "normalized": true,
39
  "rstrip": false,
40
  "single_word": true,
41
  "special": true
42
  },
43
+ "129": {
44
+ "content": "",
45
  "lstrip": false,
46
  "normalized": true,
47
  "rstrip": false,
48
  "single_word": true,
49
  "special": true
50
  },
51
+ "130": {
52
+ "content": "",
53
  "lstrip": false,
54
  "normalized": true,
55
  "rstrip": false,
56
  "single_word": true,
57
  "special": true
58
  },
59
+ "448": {
60
+ "content": "ب+",
61
  "lstrip": false,
62
  "normalized": true,
63
  "rstrip": false,
64
  "single_word": true,
65
  "special": true
66
  },
67
+ "635": {
68
+ "content": "س+",
69
  "lstrip": false,
70
  "normalized": true,
71
  "rstrip": false,
72
  "single_word": true,
73
  "special": true
74
  },
75
+ "765": {
76
+ "content": "ف+",
77
  "lstrip": false,
78
  "normalized": true,
79
  "rstrip": false,
80
  "single_word": true,
81
  "special": true
82
  },
83
+ "802": {
84
+ "content": "ك+",
85
  "lstrip": false,
86
  "normalized": true,
87
  "rstrip": false,
88
  "single_word": true,
89
  "special": true
90
  },
91
+ "816": {
92
+ "content": "ل+",
93
  "lstrip": false,
94
  "normalized": true,
95
  "rstrip": false,
96
  "single_word": true,
97
  "special": true
98
  },
99
+ "897": {
100
+ "content": "و+",
101
  "lstrip": false,
102
  "normalized": true,
103
  "rstrip": false,
104
  "single_word": true,
105
  "special": true
106
  },
107
+ "1012": {
108
+ "content": "+ات",
109
  "lstrip": false,
110
  "normalized": true,
111
  "rstrip": false,
112
  "single_word": true,
113
  "special": true
114
  },
115
+ "1013": {
116
+ "content": "+ان",
117
  "lstrip": false,
118
  "normalized": true,
119
  "rstrip": false,
120
  "single_word": true,
121
  "special": true
122
  },
123
+ "1015": {
124
+ "content": "+كم",
125
  "lstrip": false,
126
  "normalized": true,
127
  "rstrip": false,
128
  "single_word": true,
129
  "special": true
130
  },
131
+ "1016": {
132
+ "content": "+كن",
133
  "lstrip": false,
134
  "normalized": true,
135
  "rstrip": false,
136
  "single_word": true,
137
  "special": true
138
  },
139
+ "1017": {
140
+ "content": "+نا",
141
  "lstrip": false,
142
  "normalized": true,
143
  "rstrip": false,
144
  "single_word": true,
145
  "special": true
146
  },
147
+ "1018": {
148
+ "content": "+ها",
149
  "lstrip": false,
150
  "normalized": true,
151
  "rstrip": false,
152
  "single_word": true,
153
  "special": true
154
  },
155
+ "1019": {
156
+ "content": "+هم",
157
  "lstrip": false,
158
  "normalized": true,
159
  "rstrip": false,
160
  "single_word": true,
161
  "special": true
162
  },
163
+ "1020": {
164
+ "content": "+هن",
165
  "lstrip": false,
166
  "normalized": true,
167
  "rstrip": false,
168
  "single_word": true,
169
  "special": true
170
  },
171
+ "1021": {
172
+ "content": "+وا",
173
  "lstrip": false,
174
  "normalized": true,
175
  "rstrip": false,
176
  "single_word": true,
177
  "special": true
178
  },
179
+ "1022": {
180
+ "content": "+ون",
181
  "lstrip": false,
182
  "normalized": true,
183
  "rstrip": false,
184
  "single_word": true,
185
  "special": true
186
  },
187
+ "1023": {
188
+ "content": "+ين",
189
  "lstrip": false,
190
  "normalized": true,
191
  "rstrip": false,
192
  "single_word": true,
193
  "special": true
194
  },
195
+ "3000": {
196
+ "content": "ال+",
197
  "lstrip": false,
198
  "normalized": true,
199
  "rstrip": false,
200
  "single_word": true,
201
  "special": true
202
  },
203
+ "6154": {
204
+ "content": "لل+",
205
  "lstrip": false,
206
  "normalized": true,
207
  "rstrip": false,
208
  "single_word": true,
209
  "special": true
210
  },
211
+ "8270": {
212
+ "content": "+كما",
213
  "lstrip": false,
214
  "normalized": true,
215
  "rstrip": false,
216
  "single_word": true,
217
  "special": true
218
  },
219
+ "8271": {
220
+ "content": "+هما",
221
  "lstrip": false,
222
  "normalized": true,
223
  "rstrip": false,
224
  "single_word": true,
225
  "special": true
226
  },
227
+ "29756": {
228
+ "content": "[CLS]",
229
  "lstrip": false,
230
+ "normalized": false,
231
  "rstrip": false,
232
+ "single_word": false,
233
  "special": true
234
  },
235
+ "29757": {
236
+ "content": "[PAD]",
237
  "lstrip": false,
238
+ "normalized": false,
239
  "rstrip": false,
240
+ "single_word": false,
241
  "special": true
242
  },
243
+ "29758": {
244
+ "content": "[SEP]",
245
  "lstrip": false,
246
+ "normalized": false,
247
  "rstrip": false,
248
+ "single_word": false,
249
  "special": true
250
  },
251
+ "29759": {
252
+ "content": "[UNK]",
253
  "lstrip": false,
254
  "normalized": false,
255
  "rstrip": false,
256
  "single_word": false,
257
  "special": true
258
  },
259
+ "46585": {
260
+ "content": "[MASK]",
261
  "lstrip": false,
262
  "normalized": false,
263
  "rstrip": false,
264
  "single_word": false,
265
  "special": true
266
  },
267
+ "46586": {
268
+ "content": "[بريد]",
269
  "lstrip": false,
270
+ "normalized": true,
271
  "rstrip": false,
272
+ "single_word": true,
273
  "special": true
274
  },
275
+ "46587": {
276
+ "content": "[رابط]",
277
  "lstrip": false,
278
+ "normalized": true,
279
  "rstrip": false,
280
+ "single_word": true,
281
  "special": true
282
  },
283
+ "57701": {
284
+ "content": "[مستخدم]",
285
  "lstrip": false,
286
+ "normalized": true,
287
  "rstrip": false,
288
+ "single_word": true,
289
  "special": true
290
  }
291
  },
 
293
  "cls_token": "[CLS]",
294
  "do_basic_tokenize": true,
295
  "do_lower_case": false,
296
+ "full_tokenizer_file": null,
297
  "mask_token": "[MASK]",
298
  "max_len": 512,
299
+ "max_length": 384,
300
  "model_max_length": 512,
301
  "never_split": [
 
 
 
302
  "+وا",
303
+ "س+",
304
+ "[مستخدم]",
305
+ "",
 
306
  "+هم",
307
  "+ة",
 
 
 
 
308
  "+ن",
309
+ "لل+",
310
+ "[بريد]",
311
+ "[رابط]",
312
+ "",
313
+ "+كن",
314
  "+ا",
315
+ "+ات",
316
+ "+ي",
317
  "ب+",
 
318
  "+نا",
319
+ "+هن",
320
+ "+كم",
321
+ "ك+",
322
+ "+ين",
323
  "+هما",
324
+ "و+",
325
+ "+كما",
326
+ "+ان",
327
+ "+ت",
328
+ "+ون",
329
+ "ل+",
330
+ "+ها",
331
  "ال+",
332
+ "ف+"
 
 
 
333
  ],
334
+ "pad_to_multiple_of": null,
335
  "pad_token": "[PAD]",
336
+ "pad_token_type_id": 0,
337
+ "padding_side": "right",
338
  "sep_token": "[SEP]",
339
+ "stride": 0,
340
  "strip_accents": null,
341
  "tokenize_chinese_chars": true,
342
  "tokenizer_class": "BertTokenizer",
343
+ "truncation_side": "right",
344
+ "truncation_strategy": "only_second",
345
  "unk_token": "[UNK]"
346
  }
vocab.txt CHANGED
The diff for this file is too large to render. See raw diff