bogdankostic commited on
Commit
26f4844
1 Parent(s): 7bc6dc7

Add model files

Browse files
README.md ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language: en
3
+ tags:
4
+ - tapas
5
+ license: apache-2.0
6
+ ---
7
+
8
+ This model contains the converted PyTorch checkpoint of the original Tensorflow model available in the [TaPas repository](https://github.com/google-research/tapas/blob/master/DENSE_TABLE_RETRIEVER.md#reader-models).
9
+ It is described in Herzig et al.'s (2021) [paper](https://aclanthology.org/2021.naacl-main.43/) _Open Domain Question Answering over Tables via Dense Retrieval_.
10
+
11
+
12
+ # Usage
13
+ ## In Haystack
14
+ If you want to use this model for question-answering over tables, you can load it in [Haystack](https://github.com/deepset-ai/haystack/):
15
+ ```python
16
+ from haystack.nodes import TableReader
17
+
18
+ table_reader = TableReader(model_name_or_path="deepset/tapas-large-nq-reader")
19
+ ```
config.json ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "aggregation_labels": null,
3
+ "aggregation_loss_weight": 1.0,
4
+ "aggregation_temperature": 1.0,
5
+ "allow_empty_column_selection": false,
6
+ "answer_loss_cutoff": null,
7
+ "answer_loss_importance": 1.0,
8
+ "architectures": [
9
+ "TapasForScoredQA"
10
+ ],
11
+ "attention_probs_dropout_prob": 0.034,
12
+ "average_approximation_function": "ratio",
13
+ "average_logits_per_cell": false,
14
+ "cell_selection_preference": null,
15
+ "disable_per_token_loss": false,
16
+ "hidden_act": "gelu",
17
+ "hidden_dropout_prob": 0.2,
18
+ "hidden_size": 1024,
19
+ "huber_loss_delta": null,
20
+ "init_cell_selection_weights_to_zero": false,
21
+ "initializer_range": 0.02,
22
+ "intermediate_size": 4096,
23
+ "layer_norm_eps": 1e-12,
24
+ "max_num_columns": 32,
25
+ "max_num_rows": 64,
26
+ "max_position_embeddings": 1024,
27
+ "model_type": "tapas",
28
+ "no_aggregation_label_index": null,
29
+ "num_aggregation_labels": 0,
30
+ "num_attention_heads": 16,
31
+ "num_hidden_layers": 24,
32
+ "pad_token_id": 0,
33
+ "positive_label_weight": 10.0,
34
+ "reset_position_index_per_cell": true,
35
+ "select_one_column": true,
36
+ "softmax_temperature": 1.0,
37
+ "torch_dtype": "float32",
38
+ "transformers_version": "4.16.0.dev0",
39
+ "type_vocab_size": [
40
+ 3,
41
+ 256,
42
+ 256,
43
+ 2,
44
+ 256,
45
+ 256,
46
+ 10
47
+ ],
48
+ "type_vocab_sizes": [
49
+ 3,
50
+ 256,
51
+ 256,
52
+ 2,
53
+ 256,
54
+ 256,
55
+ 10
56
+ ],
57
+ "use_answer_as_supervision": null,
58
+ "use_gumbel_for_aggregation": false,
59
+ "use_gumbel_for_cells": false,
60
+ "use_normalized_answer_loss": false,
61
+ "vocab_size": 30522
62
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4267f2a38e29c2e2e9fadef32ab4a70a81a07ffeaf9b554dfa1d2ce2615ccee6
3
+ size 1347084063
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
1
+ {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "additional_special_tokens": ["[EMPTY]"]}
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
1
+ {"do_lower_case": true, "do_basic_tokenize": true, "never_split": null, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "empty_token": "[EMPTY]", "tokenize_chinese_chars": true, "strip_accents": null, "cell_trim_length": -1, "max_column_id": null, "max_row_id": null, "strip_column_names": false, "update_answer_coordinates": false, "min_question_length": null, "max_question_length": null, "model_max_length": 512, "additional_special_tokens": ["[EMPTY]"], "tokenizer_class": "TapasTokenizer"}
vocab.txt ADDED
The diff for this file is too large to render. See raw diff