seenuvasanfoss commited on
Commit
9936523
1 Parent(s): ba73e65

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -2,13 +2,11 @@
2
  *.arrow filter=lfs diff=lfs merge=lfs -text
3
  *.bin filter=lfs diff=lfs merge=lfs -text
4
  *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
  *.ftz filter=lfs diff=lfs merge=lfs -text
7
  *.gz filter=lfs diff=lfs merge=lfs -text
8
  *.h5 filter=lfs diff=lfs merge=lfs -text
9
  *.joblib filter=lfs diff=lfs merge=lfs -text
10
  *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
  *.model filter=lfs diff=lfs merge=lfs -text
13
  *.msgpack filter=lfs diff=lfs merge=lfs -text
14
  *.npy filter=lfs diff=lfs merge=lfs -text
@@ -22,14 +20,15 @@
22
  *.pt filter=lfs diff=lfs merge=lfs -text
23
  *.pth filter=lfs diff=lfs merge=lfs -text
24
  *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
  saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
  *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
  *.tflite filter=lfs diff=lfs merge=lfs -text
30
  *.tgz filter=lfs diff=lfs merge=lfs -text
31
  *.wasm filter=lfs diff=lfs merge=lfs -text
32
  *.xz filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
2
  *.arrow filter=lfs diff=lfs merge=lfs -text
3
  *.bin filter=lfs diff=lfs merge=lfs -text
4
  *.bz2 filter=lfs diff=lfs merge=lfs -text
 
5
  *.ftz filter=lfs diff=lfs merge=lfs -text
6
  *.gz filter=lfs diff=lfs merge=lfs -text
7
  *.h5 filter=lfs diff=lfs merge=lfs -text
8
  *.joblib filter=lfs diff=lfs merge=lfs -text
9
  *.lfs.* filter=lfs diff=lfs merge=lfs -text
 
10
  *.model filter=lfs diff=lfs merge=lfs -text
11
  *.msgpack filter=lfs diff=lfs merge=lfs -text
12
  *.npy filter=lfs diff=lfs merge=lfs -text
 
20
  *.pt filter=lfs diff=lfs merge=lfs -text
21
  *.pth filter=lfs diff=lfs merge=lfs -text
22
  *.rar filter=lfs diff=lfs merge=lfs -text
 
23
  saved_model/**/* filter=lfs diff=lfs merge=lfs -text
24
  *.tar.* filter=lfs diff=lfs merge=lfs -text
 
25
  *.tflite filter=lfs diff=lfs merge=lfs -text
26
  *.tgz filter=lfs diff=lfs merge=lfs -text
27
  *.wasm filter=lfs diff=lfs merge=lfs -text
28
  *.xz filter=lfs diff=lfs merge=lfs -text
29
  *.zip filter=lfs diff=lfs merge=lfs -text
30
+ *.zstandard filter=lfs diff=lfs merge=lfs -text
31
  *tfevents* filter=lfs diff=lfs merge=lfs -text
32
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
33
+ pytorch_model.bin filter=lfs diff=lfs merge=lfs -text
34
+ model.safetensors filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,242 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language: en
3
+ license: cc-by-4.0
4
+ tags:
5
+ - deberta
6
+ - deberta-v3
7
+ - deberta-v3-large
8
+ datasets:
9
+ - squad_v2
10
+ model-index:
11
+ - name: deepset/deberta-v3-large-squad2
12
+ results:
13
+ - task:
14
+ type: question-answering
15
+ name: Question Answering
16
+ dataset:
17
+ name: squad_v2
18
+ type: squad_v2
19
+ config: squad_v2
20
+ split: validation
21
+ metrics:
22
+ - type: exact_match
23
+ value: 88.0876
24
+ name: Exact Match
25
+ verified: true
26
+ verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiZmE0MWEwNjBkNTA1MmU0ZDkyYTA1OGEwNzY3NGE4NWU4NGI0NTQzNjRlNjY1NGRmNDU2MjA0NjU1N2JlZmNhYiIsInZlcnNpb24iOjF9.PnBF_vD0HujNBSShGJzsJnjmiBP_qT8xb2E7ORmpKfNspKXEuN_pBk9iV0IHRzdqOSyllcxlCv93XMPblNjWDw
27
+ - type: f1
28
+ value: 91.1623
29
+ name: F1
30
+ verified: true
31
+ verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiZDBkNDUzZmNkNDQwOGRkMmVlZjkxZWVlMzk3NzFmMGIxMTFmMjZlZDcyOWFiMjljNjM5MThlZDM4OWRmNzMwOCIsInZlcnNpb24iOjF9.bacyetziNI2DxO67GWpTyeRPXqF1POkyv00wEHXlyZu71pZngsNpZyrnuj2aJlCqQwHGnF_lT2ysaXKHprQRBg
32
+ - task:
33
+ type: question-answering
34
+ name: Question Answering
35
+ dataset:
36
+ name: squad
37
+ type: squad
38
+ config: plain_text
39
+ split: validation
40
+ metrics:
41
+ - type: exact_match
42
+ value: 89.2366
43
+ name: Exact Match
44
+ verified: true
45
+ verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiMjQ1Yjk3YTdiYTY1NmYxMTI1ZGZlMjRkNTlhZTkyNjRkNjgxYWJiNDk2NzE3NjAyYmY3YmRjNjg4YmEyNDkyYyIsInZlcnNpb24iOjF9.SEWyqX_FPQJOJt2KjOCNgQ2giyVeLj5bmLI5LT_Pfo33tbWPWD09TySYdsthaVTjUGT5DvDzQLASSwBH05FyBw
46
+ - type: f1
47
+ value: 95.0569
48
+ name: F1
49
+ verified: true
50
+ verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiY2QyODQ1NWVlYjQxMjA0YTgyNmQ2NmIxOWY3MDRmZjE3ZWI5Yjc4ZDE4NzA2YjE2YTE1YTBlNzNiYmNmNzI3NCIsInZlcnNpb24iOjF9.NcXEc9xoggV76w1bQKxuJDYbOTxFzdny2k-85_b6AIMtfpYV3rGR1Z5YF6tVY2jyp7mgm5Jd5YSgGI3NvNE-CQ
51
+ - task:
52
+ type: question-answering
53
+ name: Question Answering
54
+ dataset:
55
+ name: adversarial_qa
56
+ type: adversarial_qa
57
+ config: adversarialQA
58
+ split: validation
59
+ metrics:
60
+ - type: exact_match
61
+ value: 42.100
62
+ name: Exact Match
63
+ - type: f1
64
+ value: 56.587
65
+ name: F1
66
+ - task:
67
+ type: question-answering
68
+ name: Question Answering
69
+ dataset:
70
+ name: squad_adversarial
71
+ type: squad_adversarial
72
+ config: AddOneSent
73
+ split: validation
74
+ metrics:
75
+ - type: exact_match
76
+ value: 83.548
77
+ name: Exact Match
78
+ - type: f1
79
+ value: 89.385
80
+ name: F1
81
+ - task:
82
+ type: question-answering
83
+ name: Question Answering
84
+ dataset:
85
+ name: squadshifts amazon
86
+ type: squadshifts
87
+ config: amazon
88
+ split: test
89
+ metrics:
90
+ - type: exact_match
91
+ value: 72.979
92
+ name: Exact Match
93
+ - type: f1
94
+ value: 87.254
95
+ name: F1
96
+ - task:
97
+ type: question-answering
98
+ name: Question Answering
99
+ dataset:
100
+ name: squadshifts new_wiki
101
+ type: squadshifts
102
+ config: new_wiki
103
+ split: test
104
+ metrics:
105
+ - type: exact_match
106
+ value: 83.938
107
+ name: Exact Match
108
+ - type: f1
109
+ value: 92.695
110
+ name: F1
111
+ - task:
112
+ type: question-answering
113
+ name: Question Answering
114
+ dataset:
115
+ name: squadshifts nyt
116
+ type: squadshifts
117
+ config: nyt
118
+ split: test
119
+ metrics:
120
+ - type: exact_match
121
+ value: 85.534
122
+ name: Exact Match
123
+ - type: f1
124
+ value: 93.153
125
+ name: F1
126
+ - task:
127
+ type: question-answering
128
+ name: Question Answering
129
+ dataset:
130
+ name: squadshifts reddit
131
+ type: squadshifts
132
+ config: reddit
133
+ split: test
134
+ metrics:
135
+ - type: exact_match
136
+ value: 73.284
137
+ name: Exact Match
138
+ - type: f1
139
+ value: 85.307
140
+ name: F1
141
+ ---
142
+ # deberta-v3-large for QA
143
+
144
+ This is the [deberta-v3-large](https://huggingface.co/microsoft/deberta-v3-large) model, fine-tuned using the [SQuAD2.0](https://huggingface.co/datasets/squad_v2) dataset. It's been trained on question-answer pairs, including unanswerable questions, for the task of Question Answering.
145
+
146
+
147
+ ## Overview
148
+ **Language model:** deberta-v3-large
149
+ **Language:** English
150
+ **Downstream-task:** Extractive QA
151
+ **Training data:** SQuAD 2.0
152
+ **Eval data:** SQuAD 2.0
153
+ **Code:** See [an example QA pipeline on Haystack](https://haystack.deepset.ai/tutorials/first-qa-system)
154
+ **Infrastructure**: 1x NVIDIA A10G
155
+
156
+ ## Hyperparameters
157
+
158
+ ```
159
+ batch_size = 2
160
+ grad_acc_steps = 32
161
+ n_epochs = 6
162
+ base_LM_model = "microsoft/deberta-v3-large"
163
+ max_seq_len = 512
164
+ learning_rate = 7e-6
165
+ lr_schedule = LinearWarmup
166
+ warmup_proportion = 0.2
167
+ doc_stride=128
168
+ max_query_length=64
169
+ ```
170
+
171
+ ## Usage
172
+
173
+ ### In Haystack
174
+ Haystack is an NLP framework by deepset. You can use this model in a Haystack pipeline to do question answering at scale (over many documents). To load the model in [Haystack](https://github.com/deepset-ai/haystack/):
175
+ ```python
176
+ reader = FARMReader(model_name_or_path="deepset/deberta-v3-large-squad2")
177
+ # or
178
+ reader = TransformersReader(model_name_or_path="deepset/deberta-v3-large-squad2",tokenizer="deepset/deberta-v3-large-squad2")
179
+ ```
180
+
181
+ ### In Transformers
182
+ ```python
183
+ from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline
184
+
185
+ model_name = "deepset/deberta-v3-large-squad2"
186
+
187
+ # a) Get predictions
188
+ nlp = pipeline('question-answering', model=model_name, tokenizer=model_name)
189
+ QA_input = {
190
+ 'question': 'Why is model conversion important?',
191
+ 'context': 'The option to convert models between FARM and transformers gives freedom to the user and let people easily switch between frameworks.'
192
+ }
193
+ res = nlp(QA_input)
194
+
195
+ # b) Load model & tokenizer
196
+ model = AutoModelForQuestionAnswering.from_pretrained(model_name)
197
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
198
+ ```
199
+
200
+ ## Performance
201
+ Evaluated on the SQuAD 2.0 dev set with the [official eval script](https://worksheets.codalab.org/rest/bundles/0x6b567e1cf2e041ec80d7098f031c5c9e/contents/blob/).
202
+
203
+ ```
204
+ "exact": 87.6105449338836,
205
+ "f1": 90.75307008866517,
206
+
207
+ "total": 11873,
208
+ "HasAns_exact": 84.37921727395411,
209
+ "HasAns_f1": 90.6732795483674,
210
+ "HasAns_total": 5928,
211
+ "NoAns_exact": 90.83263246425568,
212
+ "NoAns_f1": 90.83263246425568,
213
+ "NoAns_total": 5945
214
+ ```
215
+
216
+ ## About us
217
+ <div class="grid lg:grid-cols-2 gap-x-4 gap-y-3">
218
+ <div class="w-full h-40 object-cover mb-2 rounded-lg flex items-center justify-center">
219
+ <img alt="" src="https://huggingface.co/spaces/deepset/README/resolve/main/haystack-logo-colored.svg" class="w-40"/>
220
+ </div>
221
+ <div class="w-full h-40 object-cover mb-2 rounded-lg flex items-center justify-center">
222
+ <img alt="" src="https://huggingface.co/spaces/deepset/README/resolve/main/deepset-logo-colored.svg" class="w-40"/>
223
+ </div>
224
+ </div>
225
+
226
+ [deepset](http://deepset.ai/) is the company behind the open-source NLP framework [Haystack](https://haystack.deepset.ai/) which is designed to help you build production ready NLP systems that use: Question answering, summarization, ranking etc.
227
+
228
+
229
+ Some of our other work:
230
+ - [Distilled roberta-base-squad2 (aka "tinyroberta-squad2")]([https://huggingface.co/deepset/tinyroberta-squad2)
231
+ - [German BERT (aka "bert-base-german-cased")](https://deepset.ai/german-bert)
232
+ - [GermanQuAD and GermanDPR datasets and models (aka "gelectra-base-germanquad", "gbert-base-germandpr")](https://deepset.ai/germanquad)
233
+
234
+ ## Get in touch and join the Haystack community
235
+
236
+ <p>For more info on Haystack, visit our <strong><a href="https://github.com/deepset-ai/haystack">GitHub</a></strong> repo and <strong><a href="https://haystack.deepset.ai">Documentation</a></strong>.
237
+
238
+ We also have a <strong><a class="h-7" href="https://haystack.deepset.ai/community/join">Discord community open to everyone!</a></strong></p>
239
+
240
+ [Twitter](https://twitter.com/deepset_ai) | [LinkedIn](https://www.linkedin.com/company/deepset-ai/) | [Discord](https://haystack.deepset.ai/community/join) | [GitHub Discussions](https://github.com/deepset-ai/haystack/discussions) | [Website](https://deepset.ai)
241
+
242
+ By the way: [we're hiring!](http://www.deepset.ai/jobs)
added_tokens.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"[MASK]": 128000}
config.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "checkpoint_deberta_large/language_model.bin",
3
+ "architectures": [
4
+ "DebertaV2ForQuestionAnswering"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.1,
9
+ "hidden_size": 1024,
10
+ "initializer_range": 0.02,
11
+ "intermediate_size": 4096,
12
+ "language": "english",
13
+ "layer_norm_eps": 1e-07,
14
+ "max_position_embeddings": 512,
15
+ "max_relative_positions": -1,
16
+ "model_type": "deberta-v2",
17
+ "name": "DebertaV2",
18
+ "norm_rel_ebd": "layer_norm",
19
+ "num_attention_heads": 16,
20
+ "num_hidden_layers": 24,
21
+ "pad_token_id": 0,
22
+ "pooler_dropout": 0,
23
+ "pooler_hidden_act": "gelu",
24
+ "pooler_hidden_size": 1024,
25
+ "pos_att_type": [
26
+ "p2c",
27
+ "c2p"
28
+ ],
29
+ "position_biased_input": false,
30
+ "position_buckets": 256,
31
+ "relative_attention": true,
32
+ "share_att_key": true,
33
+ "summary_activation": "tanh",
34
+ "summary_last_dropout": 0,
35
+ "summary_type": "first",
36
+ "summary_use_proj": false,
37
+ "torch_dtype": "float32",
38
+ "transformers_version": "4.19.0",
39
+ "type_vocab_size": 0,
40
+ "vocab_size": 128100
41
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b812abad65a88f85d211877306e139c6ce643768b3138a7d490b46f010ceab8
3
+ size 1736110072
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc31220db2ad55672fea1f369664c17628c021b528b1ae65b4b3f2bc7c6910e4
3
+ size 1736194351
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": "[CLS]", "eos_token": "[SEP]", "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
spm.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c679fbf93643d19aab7ee10c0b99e460bdbc02fedf34b92b05af343b4af586fd
3
+ size 2464616
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"do_lower_case": false, "model_max_length": 512, "bos_token": "[CLS]", "eos_token": "[SEP]", "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "split_by_punct": false, "vocab_type": "spm", "special_tokens_map_file": null, "name_or_path": "checkpoint_deberta_large", "sp_model_kwargs": {}, "tokenizer_class": "DebertaV2Tokenizer"}