Timothyxxx commited on
Commit
f6f97d8
1 Parent(s): c747572
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .DS_Store +0 -0
  2. LICENSE +21 -0
  3. README.md +1 -1
  4. app.py +228 -0
  5. datasets/missing_squall.py +133 -0
  6. demos/get_key.py +10 -0
  7. generation/generator.py +180 -0
  8. generation/prompt.py +499 -0
  9. nsql/.DS_Store +0 -0
  10. nsql/database.py +202 -0
  11. nsql/nsql_exec.py +161 -0
  12. nsql/nsql_exec_python.py +129 -0
  13. nsql/parser.py +179 -0
  14. nsql/qa_module/0af77d205dc6673001cdd9ea753f880e.JPG +0 -0
  15. nsql/qa_module/__init__.py +0 -0
  16. nsql/qa_module/openai_qa.py +196 -0
  17. nsql/qa_module/vqa.py +10 -0
  18. requirements.txt +209 -0
  19. resources/intro.png +0 -0
  20. resources/introduction.md +2 -0
  21. resources/summary.md +4 -0
  22. retrieval/retrieve_pool.py +55 -0
  23. retrieval/retriever.py +95 -0
  24. templates/.DS_Store +0 -0
  25. templates/mmqa_qa_retrieve_pool_v2.json +1600 -0
  26. templates/prompts/prompt_mmqa_v2.txt +523 -0
  27. templates/prompts/prompt_mmqa_v2_Qa.txt +521 -0
  28. templates/prompts/prompt_qa_balanced.txt +275 -0
  29. templates/prompts/prompt_qa_balanced_no_table_input.txt +36 -0
  30. templates/prompts/prompt_tab_fact_puresql_v2.txt +277 -0
  31. templates/prompts/prompt_tab_fact_sqllike_v3.txt +358 -0
  32. templates/prompts/prompt_tab_fact_word.txt +228 -0
  33. templates/prompts/prompt_wikitq_puresql_v3.txt +287 -0
  34. templates/prompts/prompt_wikitq_python_simplified_v4.txt +426 -0
  35. templates/prompts/prompt_wikitq_v3.txt +295 -0
  36. templates/qa_retrieve_pool.json +3885 -0
  37. utils/.DS_Store +0 -0
  38. utils/__init__.py +0 -0
  39. utils/errors.py +4 -0
  40. utils/evaluator.py +105 -0
  41. utils/gpt2/config.json +31 -0
  42. utils/gpt2/merges.txt +0 -0
  43. utils/gpt2/tokenizer.json +0 -0
  44. utils/gpt2/vocab.json +0 -0
  45. utils/matcher.py +65 -0
  46. utils/normalizer.py +498 -0
  47. utils/sql/__init__.py +0 -0
  48. utils/sql/all_keywords.py +31 -0
  49. utils/sql/extraction_from_sql.py +622 -0
  50. utils/sql/process_sql.py +595 -0
.DS_Store ADDED
Binary file (10.2 kB). View file
 
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2022 Tianbao Xie
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
  title: Binder
3
- emoji: 👀
4
  colorFrom: green
5
  colorTo: green
6
  sdk: streamlit
 
1
  ---
2
  title: Binder
3
+ emoji: 🔗
4
  colorFrom: green
5
  colorTo: green
6
  sdk: streamlit
app.py ADDED
@@ -0,0 +1,228 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+
4
+ import pandas as pd
5
+ import streamlit as st
6
+ import argparse
7
+ import traceback
8
+ from typing import Dict
9
+ import requests
10
+ from utils.utils import load_data_split
11
+ from nsql.database import NeuralDB
12
+ from nsql.nsql_exec import NSQLExecutor
13
+ from nsql.nsql_exec_python import NPythonExecutor
14
+ from generation.generator import Generator
15
+ import time
16
+
17
+ ROOT_DIR = os.path.join(os.path.dirname(__file__), "./")
18
+ EXAMPLE_TABLES = {
19
+ "Estonia men's national volleyball team": (558, "what are the total number of players from france?"),
20
+ "Highest mountain peaks of California": (5, "which is the lowest mountain?"),
21
+ "2010–11 UAB Blazers men's basketball team": (1, "how many players come from alabama?"),
22
+ "1999 European Tour": (209, "how many consecutive times was south africa the host country?"),
23
+ "Nissan SR20DET": (438, "which car is the only one with more than 230 hp?"),
24
+ }
25
+
26
+
27
+ @st.cache
28
+ def load_data():
29
+ return load_data_split("missing_squall", "validation")
30
+
31
+
32
+ @st.cache
33
+ def get_key():
34
+ # print the public IP of the demo machine
35
+ ip = requests.get('https://checkip.amazonaws.com').text.strip()
36
+ print(ip)
37
+
38
+ URL = "http://54.242.37.195:20217/api/predict"
39
+ # The springboard machine we built to protect the key, 20217 is the birthday of Tianbao's girlfriend
40
+ # we will only let the demo machine have the access to the keys
41
+
42
+ one_key = requests.post(url=URL, json={"data": "Hi, binder server. Give me a key!"}).json()['data'][0]
43
+ return one_key
44
+
45
+
46
+ def read_markdown(path):
47
+ with open(path, "r") as f:
48
+ output = f.read()
49
+ st.markdown(output, unsafe_allow_html=True)
50
+
51
+
52
+ def generate_binder_program(_args, _generator, _data_item):
53
+ n_shots = _args.n_shots
54
+ few_shot_prompt = _generator.build_few_shot_prompt_from_file(
55
+ file_path=_args.prompt_file,
56
+ n_shots=n_shots
57
+ )
58
+ generate_prompt = _generator.build_generate_prompt(
59
+ data_item=_data_item,
60
+ generate_type=(_args.generate_type,)
61
+ )
62
+ prompt = few_shot_prompt + "\n\n" + generate_prompt
63
+
64
+ # Ensure the input length fit Codex max input tokens by shrinking the n_shots
65
+ max_prompt_tokens = _args.max_api_total_tokens - _args.max_generation_tokens
66
+ from transformers import AutoTokenizer
67
+ tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=os.path.join(ROOT_DIR, "utils", "gpt2"))
68
+ while len(tokenizer.tokenize(prompt)) >= max_prompt_tokens: # TODO: Add shrink rows
69
+ n_shots -= 1
70
+ assert n_shots >= 0
71
+ few_shot_prompt = _generator.build_few_shot_prompt_from_file(
72
+ file_path=_args.prompt_file,
73
+ n_shots=n_shots
74
+ )
75
+ prompt = few_shot_prompt + "\n\n" + generate_prompt
76
+
77
+ response_dict = _generator.generate_one_pass(
78
+ prompts=[("0", prompt)], # the "0" is the place taker, take effect only when there are multi threads
79
+ verbose=_args.verbose
80
+ )
81
+ print(response_dict)
82
+ return response_dict["0"][0][0]
83
+
84
+
85
+ # Set up
86
+ parser = argparse.ArgumentParser()
87
+
88
+ parser.add_argument('--prompt_file', type=str, default='templates/prompts/prompt_wikitq_v3.txt')
89
+ # Binder program generation options
90
+ parser.add_argument('--prompt_style', type=str, default='create_table_select_3_full_table',
91
+ choices=['create_table_select_3_full_table',
92
+ 'create_table_select_full_table',
93
+ 'create_table_select_3',
94
+ 'create_table',
95
+ 'create_table_select_3_full_table_w_all_passage_image',
96
+ 'create_table_select_3_full_table_w_gold_passage_image',
97
+ 'no_table'])
98
+ parser.add_argument('--generate_type', type=str, default='nsql',
99
+ choices=['nsql', 'sql', 'answer', 'npython', 'python'])
100
+ parser.add_argument('--n_shots', type=int, default=14)
101
+ parser.add_argument('--seed', type=int, default=42)
102
+
103
+ # Codex options
104
+ # todo: Allow adjusting Codex parameters
105
+ parser.add_argument('--engine', type=str, default="code-davinci-002")
106
+ parser.add_argument('--max_generation_tokens', type=int, default=512)
107
+ parser.add_argument('--max_api_total_tokens', type=int, default=8001)
108
+ parser.add_argument('--temperature', type=float, default=0.)
109
+ parser.add_argument('--sampling_n', type=int, default=1)
110
+ parser.add_argument('--top_p', type=float, default=1.0)
111
+ parser.add_argument('--stop_tokens', type=str, default='\n\n',
112
+ help='Split stop tokens by ||')
113
+ parser.add_argument('--qa_retrieve_pool_file', type=str, default='templates/qa_retrieve_pool.json')
114
+
115
+ # debug options
116
+ parser.add_argument('-v', '--verbose', action='store_false')
117
+ args = parser.parse_args()
118
+ keys = [get_key()]
119
+
120
+ # The title
121
+ st.markdown("# Binder Playground")
122
+
123
+ # Summary about Binder
124
+ read_markdown('resources/summary.md')
125
+
126
+ # Introduction of Binder
127
+ # todo: Write Binder introduction here
128
+ # read_markdown('resources/introduction.md')
129
+ st.image('resources/intro.png')
130
+
131
+ # Upload tables/Switch tables
132
+
133
+ st.markdown('### Try Binder!')
134
+ col1, _ = st.columns(2)
135
+ with col1:
136
+ selected_table_title = st.selectbox(
137
+ "Select an example table",
138
+ (
139
+ "Estonia men's national volleyball team",
140
+ "Highest mountain peaks of California",
141
+ "2010–11 UAB Blazers men's basketball team",
142
+ "1999 European Tour",
143
+ "Nissan SR20DET",
144
+ )
145
+ )
146
+
147
+ # Here we just use ourselves'
148
+ data_items = load_data()
149
+ data_item = data_items[EXAMPLE_TABLES[selected_table_title][0]]
150
+ table = data_item['table']
151
+ header, rows, title = table['header'], table['rows'], table['page_title']
152
+ db = NeuralDB(
153
+ [{"title": title, "table": table}]) # todo: try to cache this db instead of re-creating it again and again.
154
+ df = db.get_table_df()
155
+ st.markdown("Title: {}".format(title))
156
+ st.dataframe(df)
157
+
158
+ # Let user input the question
159
+ question = st.text_input(
160
+ "Ask a question about the table:",
161
+ value=EXAMPLE_TABLES[selected_table_title][1]
162
+ )
163
+ with col1:
164
+ # todo: Why selecting language will flush the page?
165
+ selected_language = st.selectbox(
166
+ "Select a programming language",
167
+ ("SQL", "Python"),
168
+ )
169
+ if selected_language == 'SQL':
170
+ args.prompt_file = 'templates/prompts/prompt_wikitq_v3.txt'
171
+ args.generate_type = 'nsql'
172
+ elif selected_language == 'Python':
173
+ args.prompt_file = 'templates/prompts/prompt_wikitq_python_simplified_v4.txt'
174
+ args.generate_type = 'npython'
175
+ else:
176
+ raise ValueError(f'{selected_language} language is not supported.')
177
+ button = st.button("Generate program")
178
+ if not button:
179
+ st.stop()
180
+
181
+ # Generate Binder Program
182
+ generator = Generator(args, keys=keys)
183
+ with st.spinner("Generating program ..."):
184
+ binder_program = generate_binder_program(args, generator,
185
+ {"question": question, "table": db.get_table_df(), "title": title})
186
+
187
+
188
+ # Do execution
189
+ st.markdown("#### Binder program")
190
+ if selected_language == 'SQL':
191
+ with st.container():
192
+ st.write(binder_program)
193
+ executor = NSQLExecutor(args, keys=keys)
194
+ elif selected_language == 'Python':
195
+ st.code(binder_program, language='python')
196
+ executor = NPythonExecutor(args, keys=keys)
197
+ db = db.get_table_df()
198
+ else:
199
+ raise ValueError(f'{selected_language} language is not supported.')
200
+ try:
201
+ os.makedirs('tmp_for_vis/', exist_ok=True)
202
+ with st.spinner("Executing program ..."):
203
+ exec_answer = executor.nsql_exec(binder_program, db)
204
+ # todo: Make it more pretty!
205
+ # todo: Do we need vis for Python?
206
+ if selected_language == 'SQL':
207
+ with open("tmp_for_vis/tmp_for_vis_steps.txt", "r") as f:
208
+ steps = json.load(f)
209
+ st.markdown("#### Steps & Intermediate results")
210
+ for i, step in enumerate(steps):
211
+ st.markdown(step)
212
+ st.text("↓")
213
+ with st.spinner('...'):
214
+ time.sleep(1)
215
+ with open("tmp_for_vis/result_step_{}.txt".format(i), "r") as f:
216
+ result_in_this_step = json.load(f)
217
+ if isinstance(result_in_this_step, Dict):
218
+ st.dataframe(pd.DataFrame(pd.DataFrame(result_in_this_step["rows"], columns=result_in_this_step["header"])))
219
+ else:
220
+ st.markdown(result_in_this_step)
221
+ st.text("↓")
222
+ elif selected_language == 'Python':
223
+ pass
224
+ if isinstance(exec_answer, list) and len(exec_answer) == 1:
225
+ exec_answer = exec_answer[0]
226
+ st.markdown(f'Execution answer: {exec_answer}')
227
+ except Exception as e:
228
+ traceback.print_exc()
datasets/missing_squall.py ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ # Copyright 2021 The HuggingFace Datasets Authors, The Google AI Language Team Authors and the current dataset script contributor.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ """The WikiTableQuestions dataset is for the task of question answering on semi-structured HTML tables"""
16
+
17
+ import json
18
+ import os
19
+ import datasets
20
+ from utils.wtq.utils import _load_table_w_page as _load_table
21
+
22
+ # Find for instance the citation on arxiv or on the dataset repo/website
23
+ _CITATION = """\
24
+ @inproceedings{pasupat-liang-2015-compositional,
25
+ title = "Compositional Semantic Parsing on Semi-Structured Tables",
26
+ author = "Pasupat, Panupong and
27
+ Liang, Percy",
28
+ booktitle = "Proceedings of the 53rd Annual Meeting of the Association for Computational Linguistics and the 7th International Joint Conference on Natural Language Processing (Volume 1: Long Papers)",
29
+ month = jul,
30
+ year = "2015",
31
+ address = "Beijing, China",
32
+ publisher = "Association for Computational Linguistics",
33
+ url = "https://aclanthology.org/P15-1142",
34
+ doi = "10.3115/v1/P15-1142",
35
+ pages = "1470--1480",
36
+ }
37
+ """
38
+
39
+ _DESCRIPTION = """\
40
+ Two important aspects of semantic parsing for question answering are the breadth of the knowledge source and the depth of
41
+ logical compositionality. While existing work trades off one aspect for another, this paper simultaneously makes progress
42
+ on both fronts through a new task: answering complex questions on semi-structured tables using question-answer pairs as
43
+ supervision. The central challenge arises from two compounding factors: the broader domain results in an open-ended set
44
+ of relations, and the deeper compositionality results in a combinatorial explosion in the space of logical forms. We
45
+ propose a logical-form driven parsing algorithm guided by strong typing constraints and show that it obtains significant
46
+ improvements over natural baselines. For evaluation, we created a new dataset of 22,033 complex questions on Wikipedia
47
+ tables, which is made publicly available.
48
+ """
49
+
50
+ _HOMEPAGE = "https://ppasupat.github.io/WikiTableQuestions/"
51
+
52
+ _LICENSE = "CC-BY-SA-4.0 License"
53
+
54
+ _URL = "https://github.com/ppasupat/WikiTableQuestions/archive/refs/heads/master.zip"
55
+ _SQUALL_URL = "https://github.com/tzshi/squall/archive/refs/heads/main.zip"
56
+
57
+
58
+ class WikiTableQuestion(datasets.GeneratorBasedBuilder):
59
+ """The WikiTableQuestions dataset"""
60
+
61
+ def _info(self):
62
+ return datasets.DatasetInfo(
63
+ description=_DESCRIPTION,
64
+ features=datasets.Features(
65
+ {
66
+ "id": datasets.Value("string"),
67
+ "question": datasets.Value("string"),
68
+ "table_id": datasets.Value("string"),
69
+ "table": {"page_title": datasets.Value("string"),
70
+ "header": datasets.features.Sequence(datasets.Value("string")),
71
+ "rows": datasets.features.Sequence(datasets.features.Sequence(datasets.Value("string")))},
72
+ "answer_text": datasets.features.Sequence(datasets.Value("string")),
73
+ }
74
+ ),
75
+ supervised_keys=None,
76
+ homepage=_HOMEPAGE,
77
+ license=_LICENSE,
78
+ citation=_CITATION,
79
+ )
80
+
81
+ def _split_generators(self, dl_manager):
82
+ """Returns SplitGenerators."""
83
+ data_dir = os.path.join(dl_manager.download_and_extract(_URL), 'WikiTableQuestions-master')
84
+ squall_dir = os.path.join(dl_manager.download_and_extract(_SQUALL_URL), 'squall-main')
85
+
86
+ return [
87
+ datasets.SplitGenerator(
88
+ name=datasets.Split.TRAIN,
89
+ gen_kwargs={"filepath": os.path.join(data_dir, "data/random-split-1-train.tsv"),
90
+ "data_dir": data_dir,
91
+ "squall_path": os.path.join(squall_dir, "data/squall.json")},
92
+ ),
93
+ datasets.SplitGenerator(
94
+ name=datasets.Split.VALIDATION,
95
+ gen_kwargs={"filepath": os.path.join(data_dir, "data/random-split-1-dev.tsv"),
96
+ "data_dir": data_dir,
97
+ "squall_path": os.path.join(squall_dir, "data/squall.json")},
98
+ ),
99
+ datasets.SplitGenerator(
100
+ name=datasets.Split.TEST,
101
+ gen_kwargs={"filepath": os.path.join(data_dir, "data/pristine-unseen-tables.tsv"),
102
+ "data_dir": data_dir,
103
+ "squall_path": os.path.join(squall_dir, "data/squall.json")},
104
+ ),
105
+
106
+ ]
107
+
108
+ def _generate_examples(self, filepath, data_dir, squall_path):
109
+ """Yields examples."""
110
+ squall_id_list = []
111
+ with open(squall_path) as f:
112
+ squall_data = json.load(f)
113
+ for squall_item in squall_data:
114
+ squall_id_list.append(squall_item["nt"])
115
+ # data_id, question, table_id, gold_result_str
116
+ with open(filepath, encoding="utf-8") as f:
117
+ for idx, line in enumerate(f):
118
+ # skip the header
119
+ if idx == 0:
120
+ continue
121
+ data_id, question, table_id, gold_result_str = line.strip("\n").split("\t")
122
+ if data_id not in squall_id_list:
123
+ gold_result = gold_result_str.split('|')
124
+ yield idx, {
125
+ "id": data_id,
126
+ "question": question,
127
+ "table_id": table_id,
128
+ "table": _load_table(os.path.join(data_dir, table_id.replace('.csv', '.tsv'))),
129
+ # convert the .csv postfix to .tsv, for easier read-in
130
+ "answer_text": gold_result,
131
+ }
132
+ else:
133
+ continue
demos/get_key.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+
3
+
4
+ def get_key():
5
+ URL = "http://54.242.37.195:20217/api/predict"
6
+ # The springboard machine we built to protect the key, 20217 is the birthday of Tianbao's girlfriend
7
+ # we will only let the demo machine have the access to the keys
8
+
9
+ one_key = requests.post(url=URL, json={"data": "Hi, binder server. Give me a key!"}).json()['data'][0]
10
+ return one_key
generation/generator.py ADDED
@@ -0,0 +1,180 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Generate nsql and questions.
3
+ """
4
+
5
+ from typing import Dict, List, Union, Tuple
6
+ import openai
7
+ import time
8
+
9
+ from generation.prompt import PromptBuilder
10
+
11
+
12
+ class Generator(object):
13
+ """
14
+ Codex generation wrapper.
15
+ """
16
+
17
+ def __init__(self, args, keys=None):
18
+ self.args = args
19
+ self.keys = keys
20
+ self.current_key_id = 0
21
+
22
+ # if the args provided, will initialize with the prompt builder for full usage
23
+ self.prompt_builder = PromptBuilder(args) if args else None
24
+
25
+ def prompt_row_truncate(
26
+ self,
27
+ prompt: str,
28
+ num_rows_to_remain: int,
29
+ table_end_token: str = '*/',
30
+ ):
31
+ """
32
+ Fit prompt into max token limits by row truncation.
33
+ """
34
+ table_end_pos = prompt.rfind(table_end_token)
35
+ assert table_end_pos != -1
36
+ prompt_part1, prompt_part2 = prompt[:table_end_pos], prompt[table_end_pos:]
37
+ prompt_part1_lines = prompt_part1.split('\n')[::-1]
38
+ trunc_line_index = None
39
+ for idx, line in enumerate(prompt_part1_lines):
40
+ if '\t' not in line:
41
+ continue
42
+ row_id = int(line.split('\t')[0])
43
+ if row_id <= num_rows_to_remain:
44
+ trunc_line_index = idx
45
+ break
46
+ new_prompt_part1 = '\n'.join(prompt_part1_lines[trunc_line_index:][::-1])
47
+ prompt = new_prompt_part1 + '\n' + prompt_part2
48
+ return prompt
49
+
50
+ def build_few_shot_prompt_from_file(
51
+ self,
52
+ file_path: str,
53
+ n_shots: int
54
+ ):
55
+ """
56
+ Build few-shot prompt for generation from file.
57
+ """
58
+ with open(file_path, 'r') as f:
59
+ lines = f.readlines()
60
+ few_shot_prompt_list = []
61
+ one_shot_prompt = ''
62
+ last_line = None
63
+ for line in lines:
64
+ if line == '\n' and last_line == '\n':
65
+ few_shot_prompt_list.append(one_shot_prompt)
66
+ one_shot_prompt = ''
67
+ else:
68
+ one_shot_prompt += line
69
+ last_line = line
70
+ few_shot_prompt_list.append(one_shot_prompt)
71
+ few_shot_prompt_list = few_shot_prompt_list[:n_shots]
72
+ few_shot_prompt_list[-1] = few_shot_prompt_list[
73
+ -1].strip() # It is essential for prompting to remove extra '\n'
74
+ few_shot_prompt = '\n'.join(few_shot_prompt_list)
75
+ return few_shot_prompt
76
+
77
+ def build_generate_prompt(
78
+ self,
79
+ data_item: Dict,
80
+ generate_type: Tuple
81
+ ):
82
+ """
83
+ Build the generate prompt
84
+ """
85
+ return self.prompt_builder.build_generate_prompt(
86
+ **data_item,
87
+ generate_type=generate_type
88
+ )
89
+
90
+ def generate_one_pass(
91
+ self,
92
+ prompts: List[Tuple],
93
+ verbose: bool = False
94
+ ):
95
+ """
96
+ Generate one pass with codex according to the generation phase.
97
+ """
98
+ result_idx_to_eid = []
99
+ for p in prompts:
100
+ result_idx_to_eid.extend([p[0]] * self.args.sampling_n)
101
+ prompts = [p[1] for p in prompts]
102
+
103
+ start_time = time.time()
104
+
105
+ result = self._call_codex_api(
106
+ engine=self.args.engine,
107
+ prompt=prompts,
108
+ max_tokens=self.args.max_generation_tokens,
109
+ temperature=self.args.temperature,
110
+ top_p=self.args.top_p,
111
+ n=self.args.sampling_n,
112
+ stop=self.args.stop_tokens
113
+ )
114
+ print(f'Openai api one inference time: {time.time() - start_time}')
115
+
116
+ if verbose:
117
+ print('\n', '*' * 20, 'Codex API Call', '*' * 20)
118
+ for prompt in prompts:
119
+ print(prompt)
120
+ print('\n')
121
+ print('- - - - - - - - - - ->>')
122
+
123
+ # parse api results
124
+ response_dict = dict()
125
+ for idx, g in enumerate(result['choices']):
126
+ try:
127
+ text = g['text']
128
+ logprob = sum(g['logprobs']['token_logprobs'])
129
+ eid = result_idx_to_eid[idx]
130
+ eid_pairs = response_dict.get(eid, None)
131
+ if eid_pairs is None:
132
+ eid_pairs = []
133
+ response_dict[eid] = eid_pairs
134
+ eid_pairs.append((text, logprob))
135
+
136
+ if verbose:
137
+ print(text)
138
+
139
+ except ValueError as e:
140
+ if verbose:
141
+ print('----------- Error Msg--------')
142
+ print(e)
143
+ print(text)
144
+ print('-----------------------------')
145
+ pass
146
+
147
+ return response_dict
148
+
149
+ def _call_codex_api(
150
+ self,
151
+ engine: str,
152
+ prompt: Union[str, List],
153
+ max_tokens,
154
+ temperature: float,
155
+ top_p: float,
156
+ n: int,
157
+ stop: List[str]
158
+ ):
159
+ start_time = time.time()
160
+ result = None
161
+ while result is None:
162
+ try:
163
+ key = self.keys[self.current_key_id]
164
+ self.current_key_id = (self.current_key_id + 1) % len(self.keys)
165
+ result = openai.Completion.create(
166
+ engine=engine,
167
+ prompt=prompt,
168
+ api_key=key,
169
+ max_tokens=max_tokens,
170
+ temperature=temperature,
171
+ top_p=top_p,
172
+ n=n,
173
+ stop=stop,
174
+ logprobs=1
175
+ )
176
+ print('Openai api inference time:', time.time() - start_time)
177
+ return result
178
+ except Exception as e:
179
+ print(e, 'Retry.')
180
+ time.sleep(5)
generation/prompt.py ADDED
@@ -0,0 +1,499 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Build NSQL generation prompt.
3
+ Two main parts:
4
+ 1) PromptBuilder makes prompt for calling codex to generate NSQL(Binder-SQL).
5
+ 2) OpenAIQAPromptBuilder makes prompt for calling codex to generate QA answers.
6
+ """
7
+
8
+ import random
9
+ from typing import Dict, Tuple
10
+ import pandas as pd
11
+ import copy
12
+
13
+ from utils.errors import DuplicateColumnsError
14
+ from utils.mmqa.image_stuff import get_caption_map
15
+ from retrieval.retrieve_pool import QAItem
16
+
17
+ from utils.normalizer import prepare_df_for_neuraldb_from_table
18
+
19
+
20
+ def _create_table_prompt(df: pd.DataFrame, title: str):
21
+ """
22
+ Return the CREATE TABLE clause as prompt.
23
+ """
24
+ string = "CREATE TABLE {}(\n".format(title)
25
+ for header in df.columns:
26
+ column_type = 'text'
27
+ try:
28
+ if df[header].dtype == 'int64':
29
+ column_type = 'int'
30
+ elif df[header].dtype == 'float64':
31
+ column_type = 'real'
32
+ elif df[header].dtype == 'datetime64':
33
+ column_type = 'datetime'
34
+ except AttributeError as e:
35
+ raise DuplicateColumnsError(e)
36
+
37
+ string += '\t{} {},\n'.format(header, column_type)
38
+ string = string.rstrip(',\n') + ')\n'
39
+ return string
40
+
41
+
42
+ class PromptBuilder(object):
43
+ def __init__(self, args):
44
+ self.args = args
45
+ self.prompt_style = args.prompt_style
46
+ random.seed(args.seed)
47
+
48
+ def _select_x_prompt(self, df: pd.DataFrame, num_rows: int,
49
+ few_shot_demonstration=True):
50
+ """
51
+ Return the first X rows table contents as prompt.
52
+ """
53
+ if self.prompt_style == 'create_table_select_full_table':
54
+ string = '/*\nAll rows of the table:\nSELECT * FROM w;\n'
55
+ elif self.prompt_style == 'create_table_select_3':
56
+ string = '/*\n{} example rows:\nSELECT * FROM w LIMIT {};\n'.format(num_rows, num_rows)
57
+ elif self.prompt_style == 'create_table_select_3_hidden':
58
+ string = '/*\n{} example rows:\n'.format(num_rows)
59
+ elif few_shot_demonstration is True and self.prompt_style in \
60
+ ["create_table_select_3_full_table",
61
+ "create_table_select_3_full_table_w_gold_passage_image",
62
+ "create_table_select_3_full_table_w_all_passage_image"]:
63
+ string = '/*\n{} example rows:\nSELECT * FROM w LIMIT {};\n'.format(num_rows, num_rows)
64
+ elif few_shot_demonstration is False and self.prompt_style in \
65
+ ["create_table_select_3_full_table",
66
+ "create_table_select_3_full_table_w_gold_passage_image",
67
+ "create_table_select_3_full_table_w_all_passage_image"]:
68
+ string = '/*\nAll rows of the table:\nSELECT * FROM w;\n'
69
+ else:
70
+ raise ValueError(f"Select x prompt style {self.prompt_style} is not supported.")
71
+
72
+ for column_id, header in enumerate(df.columns):
73
+ string += str(header)
74
+ if column_id != len(df.columns) - 1:
75
+ string += '\t'
76
+ string += '\n'
77
+
78
+ for row_id, row in df.iloc[:num_rows].iterrows():
79
+ for column_id, header in enumerate(df.columns):
80
+ string += str(row[header])
81
+ if column_id != len(df.columns) - 1:
82
+ string += '\t'
83
+ string += '\n'
84
+ string += '*/\n'
85
+
86
+ return string
87
+
88
+ def _passage_prompt(self, passages, only_title, db_style_prompt=True):
89
+ """
90
+ Return the passage prompt.
91
+ """
92
+ if not db_style_prompt:
93
+ string = "Passages: "
94
+ for passage in passages:
95
+ if only_title:
96
+ string += passage['title'] + ';; '
97
+ else:
98
+ string += passage['title'] + f" ({passage['text']})" + ';; '
99
+ string = string.rstrip(';; ')
100
+ string += '\n'
101
+ return string
102
+ else:
103
+ if len(passages) == 0:
104
+ return ""
105
+ passage_table_prompt = ""
106
+ _header = []
107
+ _rows = [[]]
108
+ for passage in passages:
109
+ _header.append(passage['title'])
110
+ _rows[0].append(passage['text'])
111
+ passage_table = prepare_df_for_neuraldb_from_table({"header": _header, "rows": _rows})
112
+ passage_table_prompt += _create_table_prompt(passage_table, "Passages")
113
+ if not only_title:
114
+ passage_table_prompt += self._select_x_prompt(
115
+ df=passage_table,
116
+ num_rows=passage_table.shape[0]
117
+ )
118
+ return passage_table_prompt
119
+
120
+ def _image_prompt(self, images, only_title, db_style_prompt=True):
121
+ """
122
+ Return the image prompt.
123
+ """
124
+ if not db_style_prompt:
125
+ string = "Images: "
126
+ for image in images:
127
+ if only_title:
128
+ string += image['title'] + ';;'
129
+ else:
130
+ string += image['title'] + f" ({image['caption']})" + ';; '
131
+ string = string.rstrip(';; ')
132
+ string += '\n'
133
+ return string
134
+ else:
135
+ if len(images) == 0:
136
+ return ""
137
+ image_table_prompt = ""
138
+ _header = []
139
+ _rows = [[]]
140
+ for image in images:
141
+ _header.append(image['title'])
142
+ _rows[0].append(image['caption'])
143
+ image_table = prepare_df_for_neuraldb_from_table({"header": _header, "rows": _rows})
144
+ image_table_prompt += _create_table_prompt(image_table, "Images")
145
+ if not only_title:
146
+ image_table_prompt += self._select_x_prompt(
147
+ df=image_table,
148
+ num_rows=image_table.shape[0]
149
+ )
150
+ return image_table_prompt
151
+
152
+ def _pick_target_columns(self, df, strategy):
153
+ """
154
+ Pick the controllable target columns for generation.
155
+ """
156
+ if strategy == 'random':
157
+ return random.choice(list(df.columns) + ['*'])
158
+ elif strategy == 'traverse':
159
+ raise NotImplementedError
160
+ else:
161
+ return ValueError
162
+
163
+ def _pick_operators(self, df, strategy):
164
+ """
165
+ Pick the controllable operators for generation.
166
+ """
167
+ candidate_operators = ['none', 'count', 'max', 'min', 'sum']
168
+ if strategy == 'random':
169
+ return random.choice(candidate_operators)
170
+ elif strategy == 'traverse':
171
+ raise NotImplementedError
172
+ else:
173
+ return ValueError
174
+
175
+ def _pick_nested_levels(self, df, strategy):
176
+ """
177
+ Pick the controllable(maybe) nested levels for generation.
178
+ """
179
+ if strategy == 'fixed':
180
+ return 2
181
+ elif strategy == 'random':
182
+ raise NotImplementedError
183
+ elif strategy == 'traverse':
184
+ raise NotImplementedError
185
+ else:
186
+ raise ValueError
187
+
188
+ def build_one_shot_prompt(
189
+ self,
190
+ prompt_type: Tuple,
191
+ table: pd.DataFrame,
192
+ question: str,
193
+ answer_text: str,
194
+ nsql: str,
195
+ passages: Dict = None,
196
+ images: Dict = None,
197
+ title: str = None,
198
+ only_title: bool = False,
199
+ **kwargs
200
+ ):
201
+ """
202
+ Build one-shot prompt with table-question-nsql.
203
+ """
204
+ one_shot_prompt = ""
205
+ if self.prompt_style == 'create_table_select_full_table':
206
+ one_shot_prompt += _create_table_prompt(table, title)
207
+ one_shot_prompt += self._select_x_prompt(
208
+ df=table,
209
+ num_rows=table.shape[0]
210
+ )
211
+ elif self.prompt_style in ['create_table_select_3_full_table', 'create_table_select_3']:
212
+ one_shot_prompt += _create_table_prompt(table, title)
213
+ one_shot_prompt += self._select_x_prompt(
214
+ df=table,
215
+ num_rows=3,
216
+ )
217
+ elif self.prompt_style == 'create_table':
218
+ one_shot_prompt += _create_table_prompt(table, title)
219
+ elif self.prompt_style == 'no_table':
220
+ # No table input, to test Codex QA with only internal knowledge
221
+ pass
222
+ elif self.prompt_style in ['create_table_select_3_full_table_w_all_passage_image']:
223
+ assert passages is not None and images is not None
224
+ one_shot_prompt += _create_table_prompt(table, title)
225
+ one_shot_prompt += self._select_x_prompt(
226
+ df=table,
227
+ num_rows=3,
228
+ )
229
+ all_passages, all_images = [], []
230
+ caption_map = get_caption_map()
231
+
232
+ for passage_idx in range(len(passages['id'])):
233
+ all_passages.append({
234
+ 'id': passages['id'][passage_idx],
235
+ 'title': passages['title'][passage_idx],
236
+ 'url': passages['url'][passage_idx],
237
+ 'text': passages['text'][passage_idx]
238
+ })
239
+
240
+ for image_idx in range(len(images['id'])):
241
+ all_images.append({
242
+ "id": images['id'][image_idx],
243
+ "title": images['title'][image_idx],
244
+ "url": images['url'][image_idx],
245
+ "path": images['path'][image_idx],
246
+ "pic": images['pic'][image_idx],
247
+ "caption": caption_map[images['id'][image_idx]]
248
+ })
249
+
250
+ one_shot_prompt += self._passage_prompt(
251
+ passages=all_passages,
252
+ only_title=only_title
253
+ )
254
+ one_shot_prompt += self._image_prompt(
255
+ images=all_images,
256
+ only_title=only_title
257
+ )
258
+ else:
259
+ raise ValueError('{} is not supported.'.format(self.prompt_style))
260
+
261
+ # question and nsql pairs
262
+ if prompt_type == ('question', 'nsql'):
263
+ one_shot_prompt += 'Q: {}\n'.format(question)
264
+ one_shot_prompt += 'NeuralSQL: {}\n'.format(nsql)
265
+ elif prompt_type == ('question', 'sql'):
266
+ one_shot_prompt += 'Q: {}\n'.format(question)
267
+ one_shot_prompt += 'SQL: {}\n'.format(nsql)
268
+ elif prompt_type == ('question', 'answer'):
269
+ one_shot_prompt += 'Q: {}\n'.format(question)
270
+ one_shot_prompt += 'A: {}\n'.format(', '.join(answer_text))
271
+ else:
272
+ raise ValueError(f'Prompt type {prompt_type} is not supported.')
273
+
274
+ return one_shot_prompt
275
+
276
+ def build_generate_prompt(
277
+ self,
278
+ generate_type: Tuple,
279
+ table: pd.DataFrame,
280
+ question: str = None,
281
+ passages: Dict = None,
282
+ images: Dict = None,
283
+ title: str = None,
284
+ only_title: bool = False,
285
+ supporting_context: Dict = None,
286
+ **kwargs
287
+ ):
288
+ """
289
+ Build the prompt of the generation sample.
290
+ """
291
+ generate_prompt = ""
292
+
293
+ # task instruction
294
+ if generate_type == ('answer',):
295
+ generate_prompt += """\n-- Answer the question based on the given table below.\n\n"""
296
+ elif generate_type == ('nsql',):
297
+ generate_prompt += """\n-- Parse the question into NeuralSQL based on the given table below.\n\n"""
298
+ elif generate_type == ('sql',):
299
+ generate_prompt += """\n-- Parse the question into SQL based on the given table below.\n\n"""
300
+ elif generate_type == ('npython',):
301
+ generate_prompt += """\n-- Parse the question into NeuralPython based on the given table below.\n\n"""
302
+ elif generate_type == ('python',):
303
+ generate_prompt += """\n-- Parse the question into Python based on the given table below.\n\n"""
304
+ else:
305
+ generate_prompt += """\n-- Generate NeuralSQL and question pairs based on the given table below.\n\n"""
306
+
307
+ # table prompt
308
+ if self.prompt_style in ['create_table_select_full_table', 'create_table_select_3_full_table']:
309
+ generate_prompt += _create_table_prompt(table, title)
310
+ generate_prompt += self._select_x_prompt(
311
+ df=table,
312
+ num_rows=table.shape[0],
313
+ few_shot_demonstration=False
314
+ )
315
+ elif self.prompt_style in ['create_table_select_3']:
316
+ generate_prompt += _create_table_prompt(table, title)
317
+ generate_prompt += self._select_x_prompt(
318
+ df=table,
319
+ num_rows=3,
320
+ few_shot_demonstration=False
321
+ )
322
+ elif self.prompt_style == 'create_table':
323
+ generate_prompt += _create_table_prompt(table, title)
324
+ elif self.prompt_style == 'no_table':
325
+ # No table input, to test Codex QA with only internal knowledge
326
+ pass
327
+ elif self.prompt_style in ['create_table_select_3_full_table_w_all_passage_image']:
328
+ assert passages is not None and images is not None
329
+ generate_prompt += _create_table_prompt(table, title)
330
+ generate_prompt += self._select_x_prompt(
331
+ df=table,
332
+ num_rows=table.shape[0],
333
+ few_shot_demonstration=False
334
+ )
335
+ all_passages, all_images = [], []
336
+ caption_map = get_caption_map()
337
+
338
+ for passage_idx in range(len(passages['id'])):
339
+ all_passages.append({
340
+ 'id': passages['id'][passage_idx],
341
+ 'title': passages['title'][passage_idx],
342
+ 'url': passages['url'][passage_idx],
343
+ 'text': passages['text'][passage_idx]
344
+ })
345
+
346
+ for image_idx in range(len(images['id'])):
347
+ all_images.append({
348
+ "id": images['id'][image_idx],
349
+ "title": images['title'][image_idx],
350
+ "url": images['url'][image_idx],
351
+ "path": images['path'][image_idx],
352
+ "pic": images['pic'][image_idx],
353
+ "caption": caption_map[images['id'][image_idx]]
354
+ })
355
+
356
+ generate_prompt += self._passage_prompt(
357
+ passages=all_passages,
358
+ only_title=only_title
359
+ )
360
+ generate_prompt += self._image_prompt(
361
+ images=all_images,
362
+ only_title=only_title
363
+ )
364
+ elif self.prompt_style in ['create_table_select_3_full_table_w_gold_passage_image']:
365
+ assert passages is not None and images is not None
366
+ generate_prompt += _create_table_prompt(table, title)
367
+ generate_prompt += self._select_x_prompt(
368
+ df=table,
369
+ num_rows=table.shape[0],
370
+ few_shot_demonstration=False
371
+ )
372
+ gold_passages, gold_images = [], []
373
+ caption_map = get_caption_map()
374
+ for doc_id, doc_part in zip(supporting_context['doc_id'], supporting_context['doc_part']):
375
+ if doc_part == 'text':
376
+ passage_idx = passages['id'].index(doc_id)
377
+ gold_passages.append({
378
+ 'id': passages['id'][passage_idx],
379
+ 'title': passages['title'][passage_idx],
380
+ 'url': passages['url'][passage_idx],
381
+ 'text': passages['text'][passage_idx]
382
+ })
383
+ elif doc_part == 'image':
384
+ image_idx = images['id'].index(doc_id)
385
+ gold_images.append({
386
+ "id": images['id'][image_idx],
387
+ "title": images['title'][image_idx],
388
+ "url": images['url'][image_idx],
389
+ "path": images['path'][image_idx],
390
+ "pic": images['pic'][image_idx],
391
+ "caption": caption_map[doc_id]
392
+ })
393
+ generate_prompt += self._passage_prompt(
394
+ passages=gold_passages,
395
+ only_title=only_title
396
+ )
397
+ generate_prompt += self._image_prompt(
398
+ images=gold_images,
399
+ only_title=only_title
400
+ )
401
+ else:
402
+ raise ValueError('{} is not supported.'.format(self.prompt_style))
403
+
404
+ # determine the target to generate
405
+ if generate_type == ('answer',):
406
+ generate_prompt += 'Q: {}\n'.format(question)
407
+ generate_prompt += 'A: '
408
+ elif generate_type == ('nsql',):
409
+ generate_prompt += 'Q: {}\n'.format(question)
410
+ generate_prompt += 'NeuralSQL: '
411
+ elif generate_type == ('sql',):
412
+ generate_prompt += 'Q: {}\n'.format(question)
413
+ generate_prompt += 'SQL: '
414
+ elif generate_type == ('npython',):
415
+ generate_prompt += 'Q: {}\n'.format(question)
416
+ generate_prompt += 'NeuralPython: '
417
+ elif generate_type == ('python',):
418
+ generate_prompt += 'Q: {}\n'.format(question)
419
+ generate_prompt += 'Python: '
420
+ else:
421
+ raise ValueError(f'Generate type {generate_type} is not supported.')
422
+
423
+ return generate_prompt
424
+
425
+
426
+ class OpenAIQAPromptBuilder(object):
427
+ @staticmethod
428
+ def table2codex_prompt(table, table_title=None, drop_row_id=True, ):
429
+ _table = copy.deepcopy(table)
430
+ header = _table['header']
431
+ rows = _table['rows']
432
+ if drop_row_id:
433
+ if header[0] == "row_id":
434
+ header = header[1:]
435
+ rows = [_row[1:] for _row in rows]
436
+ prompt_str = 'Table: {}\n'.format(table_title) if table_title else ''
437
+ prompt_str += "/*\n"
438
+ prompt_str += "\t".join(header) + "\n"
439
+ prompt_str += '\n'.join(["\t".join([str(cell) for cell in row]) for row in rows]) + "\n"
440
+ prompt_str += "*/"
441
+ return prompt_str
442
+
443
+ @staticmethod
444
+ def build_one_shot_prompt(
445
+ item: QAItem,
446
+ answer_split_token: str = ';',
447
+ verbose: bool = False,
448
+ prompting_method='new_db',
449
+ db_mapping_token="😅"
450
+ ) -> str:
451
+ """
452
+ Build one-shot QA prompt.
453
+ """
454
+ assert prompting_method in ['basic', 'new_db']
455
+ qa_type, qa_question = item.qa_question.split('@')
456
+ prompt = ''
457
+ db_prompt = OpenAIQAPromptBuilder.table2codex_prompt(item.table, item.title)
458
+ prompt += "Give a database as shown below:\n{}\n\n".format(db_prompt)
459
+
460
+ if prompting_method == 'basic':
461
+ if qa_type == "map":
462
+ prompt += "Q: Answer question \"{}\" row by row.".format(qa_question)
463
+ assert answer_split_token is not None
464
+ prompt += " The answer should be a list split by '{}' and have {} items in total.".format(
465
+ answer_split_token, len(item.table['rows']))
466
+ prompt += "\nA: {}\n\n".format(f'{answer_split_token}'.join(item.qa_answer))
467
+ elif qa_type == "ans":
468
+ prompt += "Q: Answer question \"{}\" for the table.".format(qa_question)
469
+ prompt += " "
470
+ prompt += "\nA: {}\n\n".format(f'{answer_split_token}'.join(item.qa_answer))
471
+ else:
472
+ raise ValueError("The QA type is not supported!")
473
+
474
+ return prompt
475
+
476
+ elif prompting_method == "new_db":
477
+ if qa_type == "map":
478
+ prompt += "Q: Answer question \"{}\" row by row.".format(qa_question)
479
+ assert answer_split_token is not None
480
+ db_prompt_lines = db_prompt.split("\n")[2:-1] # skip Title, /*, and */
481
+ db_prompt_lines_with_answer = []
482
+ db_prompt_lines_with_answer.append("/*")
483
+ db_prompt_lines_with_answer.append(db_prompt_lines[0])
484
+ assert len(db_prompt_lines[1:]) == len(
485
+ item.qa_answer), "answer items and table rows must be in the same number, check annotations"
486
+ for db_prompt_line, qa_answer_item in zip(db_prompt_lines[1:], item.qa_answer):
487
+ db_prompt_lines_with_answer.append(
488
+ "{}{}{}".format(db_prompt_line, db_mapping_token, qa_answer_item))
489
+ db_prompt_lines_with_answer.append("*/")
490
+ prompt += "\n{}\n".format("\n".join(db_prompt_lines_with_answer))
491
+
492
+ elif qa_type == "ans":
493
+ prompt += "Q: Answer question \"{}\" for the table.".format(qa_question)
494
+ prompt += " "
495
+ prompt += "\nA: {}\n".format(f'{answer_split_token}'.join(item.qa_answer))
496
+ else:
497
+ raise ValueError("The QA type is not supported!")
498
+
499
+ return prompt
nsql/.DS_Store ADDED
Binary file (6.15 kB). View file
 
nsql/database.py ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import copy
2
+ import os
3
+ import sqlite3
4
+ import records
5
+ import sqlalchemy
6
+ import pandas as pd
7
+ from typing import Dict, List
8
+ import uuid
9
+
10
+ from utils.normalizer import convert_df_type, prepare_df_for_neuraldb_from_table
11
+ from utils.mmqa.image_stuff import get_caption
12
+
13
+
14
+ def check_in_and_return(key: str, source: dict):
15
+ # `` wrapped means as a whole
16
+ if key.startswith("`") and key.endswith("`"):
17
+ key = key[1:-1]
18
+ if key in source.keys():
19
+ return source[key]
20
+ else:
21
+ for _k, _v in source.items():
22
+ if _k.lower() == key.lower():
23
+ return _v
24
+ raise ValueError("{} not in {}".format(key, source))
25
+
26
+
27
+ class NeuralDB(object):
28
+ def __init__(self, tables: List[Dict[str, Dict]], passages=None, images=None):
29
+ self.raw_tables = copy.deepcopy(tables)
30
+ self.passages = {}
31
+ self.images = {}
32
+ self.image_captions = {}
33
+ self.passage_linker = {} # The links from cell value to passage
34
+ self.image_linker = {} # The links from cell value to images
35
+
36
+ # Get passages
37
+ if passages:
38
+ for passage in passages:
39
+ title, passage_content = passage['title'], passage['text']
40
+ self.passages[title] = passage_content
41
+
42
+ # Get images
43
+ if images:
44
+ for image in images:
45
+ _id, title, picture = image['id'], image['title'], image['pic']
46
+ self.images[title] = picture
47
+ self.image_captions[title] = get_caption(_id)
48
+
49
+ # Link grounding resources from other modalities(passages, images).
50
+ if self.raw_tables[0]['table'].get('rows_with_links', None):
51
+ rows = self.raw_tables[0]['table']['rows']
52
+ rows_with_links = self.raw_tables[0]['table']['rows_with_links']
53
+
54
+ link_title2cell_map = {}
55
+ for row_id in range(len(rows)):
56
+ for col_id in range(len(rows[row_id])):
57
+ cell = rows_with_links[row_id][col_id]
58
+ for text, title, url in zip(cell[0], cell[1], cell[2]):
59
+ text = text.lower().strip()
60
+ link_title2cell_map[title] = text
61
+
62
+ # Link Passages
63
+ for passage in passages:
64
+ title, passage_content = passage['title'], passage['text']
65
+ linked_cell = link_title2cell_map.get(title, None)
66
+ if linked_cell:
67
+ self.passage_linker[linked_cell] = title
68
+
69
+ # Images
70
+ for image in images:
71
+ title, picture = image['title'], image['pic']
72
+ linked_cell = link_title2cell_map.get(title, None)
73
+ if linked_cell:
74
+ self.image_linker[linked_cell] = title
75
+
76
+ for table_info in tables:
77
+ table_info['table'] = prepare_df_for_neuraldb_from_table(table_info['table'])
78
+
79
+ self.tables = tables
80
+
81
+ # Connect to SQLite database
82
+ self.tmp_path = "tmp"
83
+ os.makedirs(self.tmp_path, exist_ok=True)
84
+ # self.db_path = os.path.join(self.tmp_path, '{}.db'.format(hash(time.time())))
85
+ self.db_path = os.path.join(self.tmp_path, '{}.db'.format(uuid.uuid4()))
86
+ self.sqlite_conn = sqlite3.connect(self.db_path)
87
+
88
+ # Create DB
89
+ assert len(tables) >= 1, "DB has no table inside"
90
+ table_0 = tables[0]
91
+ if len(tables) > 1:
92
+ raise ValueError("More than one table not support yet.")
93
+ else:
94
+ table_0["table"].to_sql("w", self.sqlite_conn)
95
+ self.table_name = "w"
96
+ self.table_title = table_0.get('title', None)
97
+
98
+ # Records conn
99
+ self.db = records.Database('sqlite:///{}'.format(self.db_path))
100
+ self.records_conn = self.db.get_connection()
101
+
102
+ def __str__(self):
103
+ return str(self.execute_query("SELECT * FROM {}".format(self.table_name)))
104
+
105
+ def get_table(self, table_name=None):
106
+ table_name = self.table_name if not table_name else table_name
107
+ sql_query = "SELECT * FROM {}".format(table_name)
108
+ _table = self.execute_query(sql_query)
109
+ return _table
110
+
111
+ def get_header(self, table_name=None):
112
+ _table = self.get_table(table_name)
113
+ return _table['header']
114
+
115
+ def get_rows(self, table_name):
116
+ _table = self.get_table(table_name)
117
+ return _table['rows']
118
+
119
+ def get_table_df(self):
120
+ return self.tables[0]['table']
121
+
122
+ def get_table_raw(self):
123
+ return self.raw_tables[0]['table']
124
+
125
+ def get_table_title(self):
126
+ return self.tables[0]['title']
127
+
128
+ def get_passages_titles(self):
129
+ return list(self.passages.keys())
130
+
131
+ def get_images_titles(self):
132
+ return list(self.images.keys())
133
+
134
+ def get_passage_by_title(self, title: str):
135
+ return check_in_and_return(title, self.passages)
136
+
137
+ def get_image_by_title(self, title):
138
+ return check_in_and_return(title, self.images)
139
+
140
+ def get_image_caption_by_title(self, title):
141
+ return check_in_and_return(title, self.image_captions)
142
+
143
+ def get_image_linker(self):
144
+ return copy.deepcopy(self.image_linker)
145
+
146
+ def get_passage_linker(self):
147
+ return copy.deepcopy(self.passage_linker)
148
+
149
+ def execute_query(self, sql_query: str):
150
+ """
151
+ Basic operation. Execute the sql query on the database we hold.
152
+ @param sql_query:
153
+ @return:
154
+ """
155
+ # When the sql query is a column name (@deprecated: or a certain value with '' and "" surrounded).
156
+ if len(sql_query.split(' ')) == 1 or (sql_query.startswith('`') and sql_query.endswith('`')):
157
+ col_name = sql_query
158
+ new_sql_query = r"SELECT row_id, {} FROM {}".format(col_name, self.table_name)
159
+ # Here we use a hack that when a value is surrounded by '' or "", the sql will return a column of the value,
160
+ # while for variable, no ''/"" surrounded, this sql will query for the column.
161
+ out = self.records_conn.query(new_sql_query)
162
+ # When the sql query wants all cols or col_id, which is no need for us to add 'row_id'.
163
+ elif sql_query.lower().startswith("select *") or sql_query.startswith("select col_id"):
164
+ out = self.records_conn.query(sql_query)
165
+ else:
166
+ try:
167
+ # SELECT row_id in addition, needed for result and old table alignment.
168
+ new_sql_query = "SELECT row_id, " + sql_query[7:]
169
+ out = self.records_conn.query(new_sql_query)
170
+ except sqlalchemy.exc.OperationalError as e:
171
+ # Execute normal SQL, and in this case the row_id is actually in no need.
172
+ out = self.records_conn.query(sql_query)
173
+
174
+ results = out.all()
175
+ unmerged_results = []
176
+ merged_results = []
177
+
178
+ headers = out.dataset.headers
179
+ for i in range(len(results)):
180
+ unmerged_results.append(list(results[i].values()))
181
+ merged_results.extend(results[i].values())
182
+
183
+ return {"header": headers, "rows": unmerged_results}
184
+
185
+ def add_sub_table(self, sub_table, table_name=None, verbose=True):
186
+ """
187
+ Add sub_table into the table.
188
+ @return:
189
+ """
190
+ table_name = self.table_name if not table_name else table_name
191
+ sql_query = "SELECT * FROM {}".format(table_name)
192
+ oring_table = self.execute_query(sql_query)
193
+ old_table = pd.DataFrame(oring_table["rows"], columns=oring_table["header"])
194
+ # concat the new column into old table
195
+ sub_table_df_normed = convert_df_type(pd.DataFrame(data=sub_table['rows'], columns=sub_table['header']))
196
+ new_table = old_table.merge(sub_table_df_normed,
197
+ how='left', on='row_id') # do left join
198
+ new_table.to_sql(table_name, self.sqlite_conn, if_exists='replace',
199
+ index=False)
200
+ if verbose:
201
+ print("Insert column(s) {} (dtypes: {}) into table.\n".format(', '.join([_ for _ in sub_table['header']]),
202
+ sub_table_df_normed.dtypes))
nsql/nsql_exec.py ADDED
@@ -0,0 +1,161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from typing import List, Dict
3
+ from nsql.qa_module.openai_qa import OpenAIQAModel
4
+ from nsql.qa_module.vqa import vqa_call
5
+ from nsql.database import NeuralDB
6
+ from nsql.parser import get_cfg_tree, get_steps, remove_duplicate, TreeNode, parse_question_paras, nsql_role_recognize, \
7
+ extract_answers
8
+
9
+
10
+ class NSQLExecutor(object):
11
+ def __init__(self, args, keys=None):
12
+ self.new_col_name_id = 0
13
+ self.qa_model = OpenAIQAModel(args, keys)
14
+
15
+ def generate_new_col_names(self, number):
16
+ col_names = ["col_{}".format(i) for i in range(self.new_col_name_id, self.new_col_name_id + number)]
17
+ self.new_col_name_id += number
18
+ return col_names
19
+
20
+ def sql_exec(self, sql: str, db: NeuralDB, verbose=True):
21
+ if verbose:
22
+ print("Exec SQL '{}' with additional row_id on {}".format(sql, db))
23
+ result = db.execute_query(sql)
24
+ return result
25
+
26
+ def nsql_exec(self, stamp, nsql: str, db: NeuralDB, verbose=True):
27
+ steps = []
28
+ root_node = get_cfg_tree(nsql) # Parse execution tree from nsql.
29
+ get_steps(root_node, steps) # Flatten the execution tree and get the steps.
30
+ steps = remove_duplicate(steps) # Remove the duplicate steps.
31
+ if verbose:
32
+ print("Steps:", [s.rename for s in steps])
33
+ with open("tmp_for_vis/{}_tmp_for_vis_steps.txt".format(stamp), "w") as f:
34
+ json.dump([s.rename for s in steps], f)
35
+ col_idx = 0
36
+ for step in steps:
37
+ # All steps should be formatted as 'QA()' except for last step which could also be normal SQL.
38
+ assert isinstance(step, TreeNode), "step must be treenode"
39
+ nsql = step.rename
40
+ if nsql.startswith('QA('):
41
+ question, sql_s = parse_question_paras(nsql, self.qa_model)
42
+ sql_executed_sub_tables = []
43
+
44
+ # Execute all SQLs and get the results as parameters
45
+ for sql_item in sql_s:
46
+ role, sql_item = nsql_role_recognize(sql_item,
47
+ db.get_header(),
48
+ db.get_passages_titles(),
49
+ db.get_images_titles())
50
+ if role in ['col', 'complete_sql']:
51
+ sql_executed_sub_table = self.sql_exec(sql_item, db, verbose=verbose)
52
+ sql_executed_sub_tables.append(sql_executed_sub_table)
53
+ elif role == 'val':
54
+ val = eval(sql_item)
55
+ sql_executed_sub_tables.append({
56
+ "header": ["row_id", "val"],
57
+ "rows": [["0", val]]
58
+ })
59
+ elif role == 'passage_title_and_image_title':
60
+ sql_executed_sub_tables.append({
61
+ "header": ["row_id", "{}".format(sql_item)],
62
+ "rows": [["0", db.get_passage_by_title(sql_item) +
63
+ db.get_image_caption_by_title(sql_item)
64
+ # "{} (The answer of '{}' is {})".format(
65
+ # sql_item,
66
+ # # Add image qa result as backup info
67
+ # question[len("***@"):],
68
+ # vqa_call(question=question[len("***@"):],
69
+ # image_path=db.get_image_by_title(sql_item)))
70
+ ]]
71
+ })
72
+ elif role == 'passage_title':
73
+ sql_executed_sub_tables.append({
74
+ "header": ["row_id", "{}".format(sql_item)],
75
+ "rows": [["0", db.get_passage_by_title(sql_item)]]
76
+ })
77
+ elif role == 'image_title':
78
+ sql_executed_sub_tables.append({
79
+ "header": ["row_id", "{}".format(sql_item)],
80
+ "rows": [["0", db.get_image_caption_by_title(sql_item)]],
81
+ # "rows": [["0", "{} (The answer of '{}' is {})".format(
82
+ # sql_item,
83
+ # # Add image qa result as backup info
84
+ # question[len("***@"):],
85
+ # vqa_call(question=question[len("***@"):],
86
+ # image_path=db.get_image_by_title(sql_item)))]],
87
+ })
88
+
89
+ # If the sub_tables to execute with link, append it to the cell.
90
+ passage_linker = db.get_passage_linker()
91
+ image_linker = db.get_image_linker()
92
+ for _sql_executed_sub_table in sql_executed_sub_tables:
93
+ for i in range(len(_sql_executed_sub_table['rows'])):
94
+ for j in range(len(_sql_executed_sub_table['rows'][i])):
95
+ _cell = _sql_executed_sub_table['rows'][i][j]
96
+ if _cell in passage_linker.keys():
97
+ _sql_executed_sub_table['rows'][i][j] += " ({})".format(
98
+ # Add passage text as backup info
99
+ db.get_passage_by_title(passage_linker[_cell]))
100
+
101
+ if _cell in image_linker.keys():
102
+ _sql_executed_sub_table['rows'][i][j] += " ({})".format(
103
+ # Add image caption as backup info
104
+ db.get_image_caption_by_title(image_linker[_cell]))
105
+ # _sql_executed_sub_table['rows'][i][j] += " (The answer of '{}' is {})".format(
106
+ # # Add image qa result as backup info
107
+ # question[len("***@"):],
108
+ # vqa_call(question=question[len("***@"):],
109
+ # image_path=db.get_image_by_title(image_linker[_cell])))
110
+ pass
111
+
112
+ if question.lower().startswith("map@"):
113
+ # When the question is a type of mapping, we return the mapped column.
114
+ question = question[len("map@"):]
115
+ if step.father:
116
+ step.rename_father_col(col_idx=col_idx)
117
+ sub_table: Dict = self.qa_model.qa(question,
118
+ sql_executed_sub_tables,
119
+ table_title=db.table_title,
120
+ qa_type="map",
121
+ new_col_name_s=step.produced_col_name_s,
122
+ verbose=verbose)
123
+ with open("tmp_for_vis/{}_result_step_{}.txt".format(stamp, steps.index(step)), "w") as f:
124
+ json.dump(sub_table, f)
125
+ db.add_sub_table(sub_table, verbose=verbose)
126
+ col_idx += 1
127
+ else: # This step is the final step
128
+ sub_table: Dict = self.qa_model.qa(question,
129
+ sql_executed_sub_tables,
130
+ table_title=db.table_title,
131
+ qa_type="map",
132
+ new_col_name_s=["col_{}".format(col_idx)],
133
+ verbose=verbose)
134
+ with open("tmp_for_vis/{}_result_step_{}.txt".format(stamp, steps.index(step)), "w") as f:
135
+ json.dump(sub_table, f)
136
+ return extract_answers(sub_table)
137
+
138
+ elif question.lower().startswith("ans@"):
139
+ # When the question is a type of answering, we return an answer list.
140
+ question = question[len("ans@"):]
141
+ answer: List = self.qa_model.qa(question,
142
+ sql_executed_sub_tables,
143
+ table_title=db.table_title,
144
+ qa_type="ans",
145
+ verbose=verbose)
146
+ with open("tmp_for_vis/{}_result_step_{}.txt".format(stamp, steps.index(step)), "w") as f:
147
+ json.dump(answer, f)
148
+ if step.father:
149
+ step.rename_father_val(answer)
150
+ else: # This step is the final step
151
+ return answer
152
+ else:
153
+ raise ValueError(
154
+ "Except for operators or NL question must start with 'map@' or 'ans@'!, check '{}'".format(
155
+ question))
156
+
157
+ else:
158
+ sub_table = self.sql_exec(nsql, db, verbose=verbose)
159
+ with open("tmp_for_vis/{}_result_step_{}.txt".format(stamp, steps.index(step)), "w") as f:
160
+ json.dump(sub_table, f)
161
+ return extract_answers(sub_table)
nsql/nsql_exec_python.py ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # For sync the envs.
2
+ import random
3
+ import json
4
+ import pandas as pd
5
+ import pickle
6
+ from nsql.qa_module.openai_qa import OpenAIQAModel
7
+ import os
8
+ import time
9
+ from subprocess import PIPE, Popen
10
+ import uuid
11
+
12
+
13
+ # For Python execution.
14
+ class NPythonExecutor(object):
15
+ def __init__(self, args, keys=None):
16
+ self.new_col_name_id = 0
17
+ self.qa_model = OpenAIQAModel(args, keys)
18
+
19
+ def nsql_exec(self, nsql: str, db: pd.DataFrame, verbose=True):
20
+ # Add import part
21
+ import_part = """import sys
22
+ import random
23
+ import json
24
+ import pandas as pd
25
+ import pickle
26
+ import numpy as np
27
+ import copy
28
+ import os
29
+ import time
30
+ sys.path.append('./')
31
+ from collections.abc import Iterable
32
+ from nsql.qa_module.openai_qa import OpenAIQAModel
33
+ from nsql.database import NeuralDB
34
+ verbose = {}""".format(str(verbose))
35
+
36
+ # Add qa_map function
37
+ qa_map_function_part = """def qa_map(db: pd.DataFrame, question, columns):
38
+ new_db = NeuralDB([{"title": "", "table": {"header": db.columns.values.tolist(), "rows": db.values.tolist()}}])
39
+ sql_executed_sub_tables = []
40
+ for column in columns:
41
+ column = f"`{column}`"
42
+ sql_executed_sub_tables.append(new_db.execute_query(column))
43
+ sub_table = qa_model.qa(question,
44
+ sql_executed_sub_tables,
45
+ table_title=new_db.table_title,
46
+ qa_type="map",
47
+ new_col_name_s=[question],
48
+ verbose=verbose)
49
+ new_db.add_sub_table(sub_table, verbose=verbose)
50
+ table = new_db.get_table()
51
+ return pd.DataFrame(table["rows"], columns=table["header"])"""
52
+
53
+ # Add qa_ans function
54
+ qa_ans_function_part = """def qa_ans(db: pd.DataFrame, question, columns):
55
+ new_db = NeuralDB([{"title": "", "table": {"header": db.columns.values.tolist(), "rows": db.values.tolist()}}])
56
+ sql_executed_sub_tables = []
57
+ for column in columns:
58
+ column = f"`{column}`"
59
+ sql_executed_sub_tables.append(new_db.execute_query(column))
60
+ answer = qa_model.qa(question,sql_executed_sub_tables,table_title=new_db.table_title,qa_type="ans",verbose=verbose)
61
+ return answer"""
62
+
63
+ # Convert np number type to python type
64
+ convert_part = """def nested_to_python_number(x):
65
+ if isinstance(x, np.int64):
66
+ return int(x)
67
+ if isinstance(x, np.float64):
68
+ return float(x)
69
+ if isinstance(x, Iterable) and not isinstance(x, (str, bytes)):
70
+ return [nested_to_python_number(d) for d in x]
71
+ return x"""
72
+ # The prediction is a neural-python.
73
+
74
+ # Add main function
75
+ tmp_root_path = "tmp_python"
76
+ os.makedirs(tmp_root_path, exist_ok=True)
77
+ # Save the db
78
+ db_file_path = '{}.db'.format(format(uuid.uuid4()))
79
+ db_path = os.path.join(tmp_root_path, db_file_path)
80
+ with open(db_path, "wb") as f:
81
+ pickle.dump(db, f)
82
+
83
+ # Save the qa_model
84
+ model_file_path = '{}.model'.format(format(uuid.uuid4()))
85
+ model_path = os.path.join(tmp_root_path, model_file_path)
86
+ with open(model_path, "wb") as f:
87
+ pickle.dump(self.qa_model, f)
88
+
89
+ # Set the result path
90
+ result_file_path = '{}.json'.format(format(uuid.uuid4()))
91
+ result_path = os.path.join(tmp_root_path, result_file_path)
92
+
93
+ # Read it and call solve function
94
+ main_part = """if __name__ == '__main__':
95
+ with open("{}", "rb") as f:
96
+ db = pickle.load(f)
97
+ with open("{}", "rb") as f:
98
+ qa_model = pickle.load(f)
99
+ result = solve(db)
100
+ result = nested_to_python_number(result)
101
+ with open("{}", "w") as f:
102
+ json.dump(result, f)""".format(db_path, model_path, result_path)
103
+
104
+ # Concat the code and execute the python
105
+ all_code = "{}\n\n{}\n\n{}\n\n{}\n\n".format(import_part, qa_map_function_part, qa_ans_function_part,
106
+ convert_part) + nsql + "\n\n" + main_part
107
+
108
+ if verbose:
109
+ print("----> Code <----")
110
+ print(all_code)
111
+
112
+ python_file_path = '{}.py'.format(format(uuid.uuid4()))
113
+ python_path = os.path.join(tmp_root_path, python_file_path)
114
+ with open(python_path, "w") as f:
115
+ f.write(all_code)
116
+
117
+ p = Popen("python " + python_path, shell=True, stdout=PIPE, stderr=PIPE)
118
+ stdout, stderr = p.communicate()
119
+
120
+ # Error in execution so that we didn't get result.
121
+ if not os.path.exists(result_path):
122
+ print("stderr: ", stderr)
123
+ raise ValueError("Error execution!")
124
+
125
+ # Read the result
126
+ with open(result_path, "r") as f:
127
+ result = json.load(f)
128
+
129
+ return result
nsql/parser.py ADDED
@@ -0,0 +1,179 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List
2
+ import re
3
+ import sqlparse
4
+
5
+
6
+ class TreeNode(object):
7
+ def __init__(self, name=None, father=None):
8
+ self.name: str = name
9
+ self.rename: str = name
10
+ self.father: TreeNode = father
11
+ self.children: List = []
12
+ self.produced_col_name_s = None
13
+
14
+ def __eq__(self, other):
15
+ return self.rename == other.rename
16
+
17
+ def __hash__(self):
18
+ return hash(self.rename)
19
+
20
+ def set_name(self, name):
21
+ self.name = name
22
+ self.rename = name
23
+
24
+ def add_child(self, child):
25
+ self.children.append(child)
26
+ child.father = self
27
+
28
+ def rename_father_col(self, col_idx: int, col_prefix: str = "col_"):
29
+ new_col_name = "{}{}".format(col_prefix, col_idx)
30
+ self.father.rename = self.father.rename.replace(self.name, "{}".format(new_col_name))
31
+ self.produced_col_name_s = [new_col_name] # fixme when multiple outputs for a qa func
32
+
33
+ def rename_father_val(self, val_names):
34
+ if len(val_names) == 1:
35
+ val_name = val_names[0]
36
+ new_val_equals_str = "'{}'".format(val_name) if isinstance(convert_type(val_name), str) else "{}".format(
37
+ val_name)
38
+ else:
39
+ new_val_equals_str = '({})'.format(', '.join(["'{}'".format(val_name) for val_name in val_names]))
40
+ self.father.rename = self.father.rename.replace(self.name, new_val_equals_str)
41
+
42
+
43
+ def get_cfg_tree(nsql: str):
44
+ """
45
+ Parse QA() into a tree for execution guiding.
46
+ @param nsql:
47
+ @return:
48
+ """
49
+
50
+ stack: List = [] # Saving the state of the char.
51
+ expression_stack: List = [] # Saving the state of the expression.
52
+ current_tree_node = TreeNode(name=nsql)
53
+
54
+ for idx in range(len(nsql)):
55
+ if nsql[idx] == "(":
56
+ stack.append(idx)
57
+ if idx > 1 and nsql[idx - 2:idx + 1] == "QA(" and idx - 2 != 0:
58
+ tree_node = TreeNode()
59
+ current_tree_node.add_child(tree_node)
60
+ expression_stack.append(current_tree_node)
61
+ current_tree_node = tree_node
62
+ elif nsql[idx] == ")":
63
+ left_clause_idx = stack.pop()
64
+ if idx > 1 and nsql[left_clause_idx - 2:left_clause_idx + 1] == "QA(" and left_clause_idx - 2 != 0:
65
+ # the QA clause
66
+ nsql_span = nsql[left_clause_idx - 2:idx + 1]
67
+ current_tree_node.set_name(nsql_span)
68
+ current_tree_node = expression_stack.pop()
69
+
70
+ return current_tree_node
71
+
72
+
73
+ def get_steps(tree_node: TreeNode, steps: List):
74
+ """Pred-Order Traversal"""
75
+ for child in tree_node.children:
76
+ get_steps(child, steps)
77
+ steps.append(tree_node)
78
+
79
+
80
+ def parse_question_paras(nsql: str, qa_model):
81
+ # We assume there's no nested qa inside when running this func
82
+ nsql = nsql.strip(" ;")
83
+ assert nsql[:3] == "QA(" and nsql[-1] == ")", "must start with QA( symbol and end with )"
84
+ assert not "QA" in nsql[2:-1], "must have no nested qa inside"
85
+
86
+ # Get question and the left part(paras_raw_str)
87
+ all_quote_idx = [i.start() for i in re.finditer('\"', nsql)]
88
+ question = nsql[all_quote_idx[0] + 1: all_quote_idx[1]]
89
+ paras_raw_str = nsql[all_quote_idx[1] + 1:-1].strip(" ;")
90
+
91
+ # Split Parameters(SQL/column/value) from all parameters.
92
+ paras = [_para.strip(' ;') for _para in sqlparse.split(paras_raw_str)]
93
+ return question, paras
94
+
95
+
96
+ def convert_type(value):
97
+ try:
98
+ return eval(value)
99
+ except Exception as e:
100
+ return value
101
+
102
+
103
+ def nsql_role_recognize(nsql_like_str, all_headers, all_passage_titles, all_image_titles):
104
+ """Recognize role. (SQL/column/value) """
105
+ orig_nsql_like_str = nsql_like_str
106
+
107
+ # strip the first and the last '`'
108
+ if nsql_like_str.startswith('`') and nsql_like_str.endswith('`'):
109
+ nsql_like_str = nsql_like_str[1:-1]
110
+
111
+ # Case 1: if col in header, it is column type.
112
+ if nsql_like_str in all_headers or nsql_like_str in list(map(lambda x: x.lower(), all_headers)):
113
+ return 'col', orig_nsql_like_str
114
+
115
+ # fixme: add case when the this nsql_like_str both in table headers, images title and in passages title.
116
+ # Case 2.1: if it is title of certain passage.
117
+ if (nsql_like_str.lower() in list(map(lambda x: x.lower(), all_passage_titles))) \
118
+ and (nsql_like_str.lower() in list(map(lambda x: x.lower(), all_image_titles))):
119
+ return "passage_title_and_image_title", orig_nsql_like_str
120
+ else:
121
+ try:
122
+ nsql_like_str_evaled = str(eval(nsql_like_str))
123
+ if (nsql_like_str_evaled.lower() in list(map(lambda x: x.lower(), all_passage_titles))) \
124
+ and (nsql_like_str_evaled.lower() in list(map(lambda x: x.lower(), all_image_titles))):
125
+ return "passage_title_and_image_title", nsql_like_str_evaled
126
+ except:
127
+ pass
128
+
129
+ # Case 2.2: if it is title of certain passage.
130
+ if nsql_like_str.lower() in list(map(lambda x: x.lower(), all_passage_titles)):
131
+ return "passage_title", orig_nsql_like_str
132
+ else:
133
+ try:
134
+ nsql_like_str_evaled = str(eval(nsql_like_str))
135
+ if nsql_like_str_evaled.lower() in list(map(lambda x: x.lower(), all_passage_titles)):
136
+ return "passage_title", nsql_like_str_evaled
137
+ except:
138
+ pass
139
+
140
+ # Case 2.3: if it is title of certain picture.
141
+ if nsql_like_str.lower() in list(map(lambda x: x.lower(), all_image_titles)):
142
+ return "image_title", orig_nsql_like_str
143
+ else:
144
+ try:
145
+ nsql_like_str_evaled = str(eval(nsql_like_str))
146
+ if nsql_like_str_evaled.lower() in list(map(lambda x: x.lower(), all_image_titles)):
147
+ return "image_title", nsql_like_str_evaled
148
+ except:
149
+ pass
150
+
151
+ # Case 4: if it can be parsed by eval(), it is value type.
152
+ try:
153
+ eval(nsql_like_str)
154
+ return 'val', orig_nsql_like_str
155
+ except Exception as e:
156
+ pass
157
+
158
+ # Case 5: else it should be the sql, if it isn't, exception will be raised.
159
+ return 'complete_sql', orig_nsql_like_str
160
+
161
+
162
+ def remove_duplicate(original_list):
163
+ no_duplicate_list = []
164
+ [no_duplicate_list.append(i) for i in original_list if i not in no_duplicate_list]
165
+ return no_duplicate_list
166
+
167
+
168
+ def extract_answers(sub_table):
169
+ if not sub_table or sub_table['header'] is None:
170
+ return []
171
+ answer = []
172
+ if 'row_id' in sub_table['header']:
173
+ for _row in sub_table['rows']:
174
+ answer.extend(_row[1:])
175
+ return answer
176
+ else:
177
+ for _row in sub_table['rows']:
178
+ answer.extend(_row)
179
+ return answer
nsql/qa_module/0af77d205dc6673001cdd9ea753f880e.JPG ADDED
nsql/qa_module/__init__.py ADDED
File without changes
nsql/qa_module/openai_qa.py ADDED
@@ -0,0 +1,196 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import random
3
+
4
+ from generation.prompt import OpenAIQAPromptBuilder
5
+ from generation.generator import Generator
6
+ from retrieval.retriever import OpenAIQARetriever
7
+ from retrieval.retrieve_pool import OpenAIQARetrievePool, QAItem
8
+
9
+ num_parallel_prompts = 10
10
+ num_qa_shots = 8
11
+ infinite_rows_len = 50 # If the table contain rows larger than this number, it will be handled rows by rows.
12
+ max_tokens = 1024
13
+ ROOT_DIR = os.path.join(os.path.dirname(__file__), "../../")
14
+
15
+
16
+ class OpenAIQAModel(object):
17
+ def __init__(self, args, keys=None):
18
+ super().__init__()
19
+
20
+ # Prepare keys
21
+ self.key_current_id = 0
22
+ self.keys = keys
23
+ random.seed(42)
24
+ random.shuffle(self.keys)
25
+
26
+ retrieve_pool = OpenAIQARetrievePool(
27
+ data_path=os.path.join(ROOT_DIR, args.qa_retrieve_pool_file)
28
+ )
29
+ self.retriever = OpenAIQARetriever(retrieve_pool)
30
+ self.generator = Generator(args=None, keys=self.keys) # Just to use its call api function
31
+
32
+ self.prompting_method = 'new_db'
33
+ self.answer_split_token: str = ';'
34
+ self.db_mapping_token = "\t"
35
+
36
+ def call_openai_api_completion(self, prompt):
37
+ completion = self.generator._call_codex_api(engine="code-davinci-002",
38
+ prompt=prompt,
39
+ max_tokens=max_tokens,
40
+ temperature=0,
41
+ top_p=1,
42
+ n=1,
43
+ stop=["\n\n"])
44
+ return completion
45
+
46
+ def call_openai_for_completion_text(self, prompt, openai_usage_type="completion"):
47
+ if openai_usage_type == "completion":
48
+ completion = self.call_openai_api_completion(prompt)
49
+ return completion.choices[0].text
50
+ else:
51
+ raise ValueError("The model usage type '{}' doesn't exists!".format(openai_usage_type))
52
+
53
+ @staticmethod
54
+ def merge_tables(tables, by='row_id'):
55
+ assert len(set([len(_table['rows']) for _table in tables])) == 1, "Tables must have the same rows!"
56
+ merged_header = [by]
57
+ by_idx = tables[0]['header'].index(by)
58
+ merged_rows = [[_row[by_idx]] for _row in tables[0]['rows']]
59
+
60
+ for _table in tables:
61
+ header, rows = _table['header'], _table['rows']
62
+ for col_idx, col in enumerate(header):
63
+ if col == by:
64
+ continue
65
+ if col in merged_header:
66
+ # When the column is duplicate, and postfix _0, _1 etc.
67
+ col = "{}_{}".format(col, merged_header.count(col))
68
+ merged_header.append(col)
69
+ for i, row in enumerate(rows):
70
+ merged_rows[i].append(row[col_idx])
71
+ return {"header": merged_header, "rows": merged_rows}
72
+
73
+ def wrap_with_prompt_for_table_qa(self,
74
+ question,
75
+ sub_table,
76
+ table_title=None,
77
+ answer_split_token=None,
78
+ qa_type="ans",
79
+ prompting_method="new_db",
80
+ db_mapping_token="😅",
81
+ verbose=True):
82
+ prompt = "Question Answering Over Database:\n\n"
83
+ if qa_type in ['map', 'ans'] and num_qa_shots > 0:
84
+ query_item = QAItem(qa_question=question, table=sub_table, title=table_title)
85
+ retrieved_items = self.retriever.retrieve(item=query_item, num_shots=num_qa_shots, qa_type=qa_type)
86
+ few_shot_prompt_list = []
87
+ for item in retrieved_items:
88
+ one_shot_prompt = OpenAIQAPromptBuilder.build_one_shot_prompt(
89
+ item=item,
90
+ answer_split_token=answer_split_token,
91
+ verbose=verbose,
92
+ prompting_method=prompting_method,
93
+ db_mapping_token=db_mapping_token
94
+ )
95
+ few_shot_prompt_list.append(one_shot_prompt)
96
+ few_shot_prompt = '\n'.join(few_shot_prompt_list[:num_qa_shots])
97
+ prompt = few_shot_prompt
98
+
99
+ prompt += "\nGive a database as shown below:\n{}\n\n".format(
100
+ OpenAIQAPromptBuilder.table2codex_prompt(sub_table, table_title)
101
+ )
102
+
103
+ if qa_type == "map":
104
+ prompt += "Q: Answer question \"{}\" row by row.".format(question)
105
+ assert answer_split_token is not None
106
+ if prompting_method == "basic":
107
+ prompt += " The answer should be a list split by '{}' and have {} items in total.".format(
108
+ answer_split_token, len(sub_table['rows']))
109
+
110
+ elif qa_type == "ans":
111
+ prompt += "Q: Answer question \"{}\" for the table.".format(question)
112
+ prompt += " "
113
+ else:
114
+ raise ValueError("The QA type is not supported!")
115
+
116
+ prompt += "\n"
117
+ if qa_type == "map":
118
+ if prompting_method == "basic":
119
+ prompt += "A:"
120
+ elif qa_type == "ans":
121
+ prompt += "A:"
122
+
123
+ return prompt
124
+
125
+ def qa(self, question, sub_tables, qa_type: str, verbose: bool = True, **args):
126
+ # If it is not a problem API can handle, answer it with a QA model.
127
+ merged_table = OpenAIQAModel.merge_tables(sub_tables)
128
+ if verbose:
129
+ print("Make Question {} on {}".format(question, merged_table))
130
+ if qa_type == "map":
131
+ # Map: col(s) -question> one col
132
+
133
+ # Make model make a QA towards a sub-table
134
+ # col(s) -> one col, all QA in one time
135
+ def do_map(_table):
136
+ _prompt = self.wrap_with_prompt_for_table_qa(question,
137
+ _table,
138
+ args['table_title'],
139
+ self.answer_split_token,
140
+ qa_type,
141
+ prompting_method=self.prompting_method,
142
+ db_mapping_token=self.db_mapping_token,
143
+ verbose=verbose)
144
+ completion_str = self.call_openai_for_completion_text(_prompt).lower().strip(' []')
145
+
146
+ if verbose:
147
+ print(f'QA map@ input:\n{_prompt}')
148
+ print(f'QA map@ output:\n{completion_str}')
149
+
150
+ if self.prompting_method == "basic":
151
+ answers = [_answer.strip(" '").lower() for _answer in
152
+ completion_str.split(self.answer_split_token)]
153
+ elif self.prompting_method == "new_db":
154
+ answers = [line.split(self.db_mapping_token)[-1] for line in completion_str.split("\n")[2:-1]]
155
+ else:
156
+ raise ValueError("No such prompting methods: '{}'! ".format(self.prompting_method))
157
+ return answers
158
+
159
+ # Handle infinite rows, rows by rows.
160
+ answers = []
161
+ rows_len = len(merged_table['rows'])
162
+ run_times = int(rows_len / infinite_rows_len) if rows_len % infinite_rows_len == 0 else int(
163
+ rows_len / infinite_rows_len) + 1
164
+
165
+ for run_idx in range(run_times):
166
+ _table = {
167
+ "header": merged_table['header'],
168
+ "rows": merged_table['rows'][run_idx * infinite_rows_len:]
169
+ } if run_idx == run_times - 1 else \
170
+ {
171
+ "header": merged_table['header'],
172
+ "rows": merged_table['rows'][run_idx * infinite_rows_len:(run_idx + 1) * infinite_rows_len]
173
+ }
174
+
175
+ answers.extend(do_map(_table))
176
+ if verbose:
177
+ print("The map@ openai answers are {}".format(answers))
178
+ # Add row_id in addition for finding to corresponding rows.
179
+ return {"header": ['row_id'] + args['new_col_name_s'],
180
+ "rows": [[row[0], answer] for row, answer in zip(merged_table['rows'], answers)]}
181
+ elif qa_type == "ans":
182
+ # Ans: col(s) -question> answer
183
+ prompt = self.wrap_with_prompt_for_table_qa(question,
184
+ merged_table,
185
+ args['table_title'],
186
+ prompting_method=self.prompting_method,
187
+ verbose=verbose)
188
+ answers = [self.call_openai_for_completion_text(prompt).lower().strip(' []')]
189
+
190
+ if verbose:
191
+ print(f'QA ans@ input:\n{prompt}')
192
+ print(f'QA ans@ output:\n{answers}')
193
+
194
+ return answers
195
+ else:
196
+ raise ValueError("Please choose from map and ans in the qa usage!!")
nsql/qa_module/vqa.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import base64
3
+ import time
4
+
5
+
6
+ def vqa_call(question, image_path, api_url='https://hf.space/embed/OFA-Sys/OFA-vqa/+/api/predict/'):
7
+ with open(image_path, "rb") as f:
8
+ base64_data = base64.b64encode(f.read())
9
+ base64_data_to_send = "data:image/{};base64,{}".format(image_path.split(".")[-1], str(base64_data)[2:-1])
10
+ return requests.post(url=api_url, json={"data": [base64_data_to_send, question]}).json()['data'][0]
requirements.txt ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This file may be used to create an environment using:
2
+ # $ conda create --name <env> --file <this file>
3
+ # platform: osx-64
4
+ aiohttp=3.8.1=pypi_0
5
+ aiosignal=1.2.0=pypi_0
6
+ altair=4.2.0=pypi_0
7
+ anyio=3.6.1=pypi_0
8
+ appnope=0.1.2=pypi_0
9
+ argon2-cffi=21.3.0=pypi_0
10
+ argon2-cffi-bindings=21.2.0=pypi_0
11
+ async-generator=1.10=pypi_0
12
+ async-timeout=4.0.2=pypi_0
13
+ asynctest=0.13.0=pypi_0
14
+ attrs=21.4.0=pypi_0
15
+ backcall=0.2.0=pypi_0
16
+ backoff=2.0.1=pypi_0
17
+ backports-zoneinfo=0.2.1=pypi_0
18
+ bcrypt=4.0.0=pypi_0
19
+ beautifulsoup4=4.10.0=pypi_0
20
+ bleach=4.1.0=pypi_0
21
+ blinker=1.4=pypi_0
22
+ brotlipy=0.7.0=py37h9ed2024_1003
23
+ ca-certificates=2022.4.26=hecd8cb5_0
24
+ cached-property=1.5.2=pypi_0
25
+ cachetools=5.0.0=pypi_0
26
+ certifi=2022.6.15=py37hecd8cb5_0
27
+ cffi=1.15.0=py37hc55c11b_1
28
+ charset-normalizer=2.0.12=pypi_0
29
+ click=8.0.4=pypi_0
30
+ cryptography=37.0.1=py37hf6deb26_0
31
+ cssselect=1.1.0=pypi_0
32
+ cycler=0.11.0=pypi_0
33
+ datasets=1.14.0=pypi_0
34
+ datedelta=1.4=pypi_0
35
+ debugpy=1.6.0=pypi_0
36
+ decorator=5.1.1=pypi_0
37
+ defusedxml=0.7.1=pypi_0
38
+ dill=0.3.4=pypi_0
39
+ docopt=0.6.2=pypi_0
40
+ emoji=1.7.0=pypi_0
41
+ entrypoints=0.4=pypi_0
42
+ et-xmlfile=1.1.0=pypi_0
43
+ fastapi=0.85.0=pypi_0
44
+ ffmpy=0.3.0=pypi_0
45
+ filelock=3.6.0=pypi_0
46
+ fonttools=4.37.4=pypi_0
47
+ frozenlist=1.3.0=pypi_0
48
+ fsspec=2022.2.0=pypi_0
49
+ fuzzywuzzy=0.18.0=pypi_0
50
+ gitdb=4.0.9=pypi_0
51
+ gitpython=3.1.27=pypi_0
52
+ gradio=3.4.0=pypi_0
53
+ grapheme=0.6.0=pypi_0
54
+ greenlet=1.1.2=pypi_0
55
+ h11=0.12.0=pypi_0
56
+ h5py=3.6.0=pypi_0
57
+ httpcore=0.15.0=pypi_0
58
+ httpx=0.23.0=pypi_0
59
+ huggingface-hub=0.0.19=pypi_0
60
+ idna=3.3=pyhd3eb1b0_0
61
+ importlib-metadata=4.11.3=pypi_0
62
+ importlib-resources=5.6.0=pypi_0
63
+ ipykernel=6.9.2=pypi_0
64
+ ipython=7.32.0=pypi_0
65
+ ipython-genutils=0.2.0=pypi_0
66
+ ipywidgets=7.7.0=pypi_0
67
+ jdcal=1.4.1=pypi_0
68
+ jedi=0.18.1=pypi_0
69
+ jinja2=3.1.1=pypi_0
70
+ joblib=1.1.0=pypi_0
71
+ jsonschema=4.4.0=pypi_0
72
+ jupyter-client=7.1.2=pypi_0
73
+ jupyter-core=4.9.2=pypi_0
74
+ jupyterlab-pygments=0.1.2=pypi_0
75
+ jupyterlab-widgets=1.1.0=pypi_0
76
+ kiwisolver=1.4.4=pypi_0
77
+ libcxx=12.0.0=h2f01273_0
78
+ libffi=3.3=hb1e8313_2
79
+ linkify-it-py=1.0.3=pypi_0
80
+ lxml=4.8.0=pypi_0
81
+ markdown-it-py=2.1.0=pypi_0
82
+ markupsafe=2.1.1=pypi_0
83
+ matplotlib=3.5.3=pypi_0
84
+ matplotlib-inline=0.1.3=pypi_0
85
+ mdit-py-plugins=0.3.1=pypi_0
86
+ mdurl=0.1.2=pypi_0
87
+ mistune=0.8.4=pypi_0
88
+ multidict=6.0.2=pypi_0
89
+ multipledispatch=0.6.0=pypi_0
90
+ multiprocess=0.70.12.2=pypi_0
91
+ nbclient=0.5.13=pypi_0
92
+ nbconvert=6.4.5=pypi_0
93
+ nbformat=5.2.0=pypi_0
94
+ ncurses=6.3=hca72f7f_2
95
+ nest-asyncio=1.5.4=pypi_0
96
+ nltk=3.6.2=pypi_0
97
+ notebook=6.4.10=pypi_0
98
+ numpy=1.21.5=pypi_0
99
+ openai=0.20.0=pypi_0
100
+ openpyxl=2.4.11=pypi_0
101
+ openssl=1.1.1o=hca72f7f_0
102
+ orjson=3.8.0=pypi_0
103
+ outcome=1.1.0=pypi_0
104
+ packaging=21.3=pypi_0
105
+ pandas=1.3.5=pypi_0
106
+ pandas-stubs=1.2.0.58=pypi_0
107
+ pandocfilters=1.5.0=pypi_0
108
+ paramiko=2.11.0=pypi_0
109
+ parso=0.8.3=pypi_0
110
+ pexpect=4.8.0=pypi_0
111
+ pickleshare=0.7.5=pypi_0
112
+ pillow=9.0.1=pypi_0
113
+ pip=21.2.2=py37hecd8cb5_0
114
+ prometheus-client=0.13.1=pypi_0
115
+ prompt-toolkit=3.0.28=pypi_0
116
+ protobuf=3.19.4=pypi_0
117
+ psutil=5.9.0=pypi_0
118
+ ptyprocess=0.7.0=pypi_0
119
+ pyarrow=7.0.0=pypi_0
120
+ pycparser=2.21=pyhd3eb1b0_0
121
+ pycryptodome=3.15.0=pypi_0
122
+ pydantic=1.10.2=pypi_0
123
+ pydeck=0.7.1=pypi_0
124
+ pydub=0.25.1=pypi_0
125
+ pygments=2.11.2=pypi_0
126
+ pyhtml2pdf=0.0.3=pypi_0
127
+ pympler=1.0.1=pypi_0
128
+ pynacl=1.5.0=pypi_0
129
+ pyopenssl=22.0.0=pyhd3eb1b0_0
130
+ pyparsing=3.0.7=pypi_0
131
+ pyquery=1.4.3=pypi_0
132
+ pyrsistent=0.18.1=pypi_0
133
+ pysocks=1.7.1=py37hecd8cb5_0
134
+ python=3.7.11=h88f2d9e_0
135
+ python-dateutil=2.8.2=pypi_0
136
+ python-levenshtein=0.12.2=pypi_0
137
+ python-multipart=0.0.5=pypi_0
138
+ pytz=2022.1=pypi_0
139
+ pytz-deprecation-shim=0.1.0.post0=pypi_0
140
+ pyyaml=6.0=pypi_0
141
+ pyzmq=22.3.0=pypi_0
142
+ readline=8.1.2=hca72f7f_1
143
+ recognizers-text=1.0.2a2=pypi_0
144
+ recognizers-text-choice=1.0.2a2=pypi_0
145
+ recognizers-text-date-time=1.0.2a2=pypi_0
146
+ recognizers-text-number=1.0.2a2=pypi_0
147
+ recognizers-text-number-with-unit=1.0.2a2=pypi_0
148
+ recognizers-text-sequence=1.0.2a2=pypi_0
149
+ recognizers-text-suite=1.0.2a2=pypi_0
150
+ records=0.5.3=pypi_0
151
+ regex=2022.3.15=pypi_0
152
+ requests=2.28.0=pypi_0
153
+ rfc3986=1.5.0=pypi_0
154
+ sacremoses=0.0.49=pypi_0
155
+ scikit-learn=1.0.2=pypi_0
156
+ scipy=1.7.3=pypi_0
157
+ selenium=4.1.3=pypi_0
158
+ semver=2.13.0=pypi_0
159
+ send2trash=1.8.0=pypi_0
160
+ sentencepiece=0.1.97=pypi_0
161
+ setuptools=58.0.4=py37hecd8cb5_0
162
+ six=1.16.0=pypi_0
163
+ smmap=5.0.0=pypi_0
164
+ sniffio=1.2.0=pypi_0
165
+ sortedcontainers=2.4.0=pypi_0
166
+ soupsieve=2.3.1=pypi_0
167
+ sqlalchemy=1.4.36=pypi_0
168
+ sqlite=3.37.2=h707629a_0
169
+ sqlparse=0.4.2=pypi_0
170
+ stanza=1.4.2=pypi_0
171
+ starlette=0.20.4=pypi_0
172
+ streamlit=1.8.0=pypi_0
173
+ tablib=3.2.1=pypi_0
174
+ terminado=0.13.3=pypi_0
175
+ testpath=0.6.0=pypi_0
176
+ threadpoolctl=3.1.0=pypi_0
177
+ timeout-decorator=0.5.0=pypi_0
178
+ tk=8.6.11=h7bc2e8c_0
179
+ tokenizers=0.10.3=pypi_0
180
+ toml=0.10.2=pypi_0
181
+ toolz=0.11.2=pypi_0
182
+ torch=1.12.1=pypi_0
183
+ tornado=6.1=pypi_0
184
+ tqdm=4.63.1=pypi_0
185
+ traitlets=5.1.1=pypi_0
186
+ transformers=4.12.2=pypi_0
187
+ trio=0.20.0=pypi_0
188
+ trio-websocket=0.9.2=pypi_0
189
+ typing-extensions=4.1.1=pypi_0
190
+ tzdata=2022.1=pypi_0
191
+ tzlocal=4.1=pypi_0
192
+ uc-micro-py=1.0.1=pypi_0
193
+ urllib3=1.26.9=py37hecd8cb5_0
194
+ uvicorn=0.18.3=pypi_0
195
+ validators=0.18.2=pypi_0
196
+ watchdog=2.1.7=pypi_0
197
+ wcwidth=0.2.5=pypi_0
198
+ webdriver-manager=3.5.4=pypi_0
199
+ webencodings=0.5.1=pypi_0
200
+ websockets=10.3=pypi_0
201
+ wheel=0.37.1=pyhd3eb1b0_0
202
+ widgetsnbextension=3.6.0=pypi_0
203
+ word2number=1.1=pypi_0
204
+ wsproto=1.1.0=pypi_0
205
+ xxhash=3.0.0=pypi_0
206
+ xz=5.2.5=h1de35cc_0
207
+ yarl=1.7.2=pypi_0
208
+ zipp=3.7.0=pypi_0
209
+ zlib=1.2.11=h4dc903c_4
resources/intro.png ADDED
resources/introduction.md ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ ## Introduction
2
+ [placeholder, mainly introduce Figure1(better the gif version)]
resources/summary.md ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ Binder is a training-free neural-symbolic framework that maps the task input to a program, which
2
+ + Allows binding a unified API of language model(LM) functionalities to a programming language.
3
+ + Adopts Codex as both the program parser and the underlying LM of API calls.
4
+ + Requires only a dozen of in-context exemplar annotations.
retrieval/retrieve_pool.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Retrieval pool of candidates
3
+ """
4
+ from dataclasses import dataclass
5
+ from typing import List, Dict
6
+ import json
7
+
8
+
9
+ class OpenAIQARetrievePool(object):
10
+ def __init__(self, data_path):
11
+ with open(data_path, 'r') as f:
12
+ data = json.load(f)
13
+ self.data = []
14
+ for d in data:
15
+ if isinstance(d['qa_column'], List):
16
+ d['qa_column'] = '|'.join(d['qa_column'])
17
+ qa_item = QAItem(
18
+ id=d['id'],
19
+ qa_question=d['qa_question'],
20
+ qa_column=d['qa_column'],
21
+ qa_answer=d['qa_answer'],
22
+ table=d['table'],
23
+ title=d['title']
24
+ )
25
+ self.data.append(qa_item)
26
+
27
+ self.pointer = 0
28
+
29
+ def __iter__(self):
30
+ return self
31
+
32
+ def __next__(self):
33
+ pointer = self.pointer
34
+ if pointer < len(self):
35
+ self.pointer += 1
36
+ return self.data[pointer]
37
+ else:
38
+ self.pointer = 0
39
+ raise StopIteration
40
+
41
+ def __getitem__(self, item):
42
+ return self.data[item]
43
+
44
+ def __len__(self):
45
+ return len(self.data)
46
+
47
+
48
+ @dataclass
49
+ class QAItem(object):
50
+ id: int = None
51
+ qa_question: str = None
52
+ qa_column: str = None
53
+ qa_answer: str = None
54
+ table: Dict = None
55
+ title: str = None
retrieval/retriever.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Retriever to retrieve relevant examples from annotations.
3
+ """
4
+
5
+ import copy
6
+ from typing import Dict, List, Tuple, Any
7
+ import nltk
8
+ from nltk.stem import SnowballStemmer
9
+ from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
10
+
11
+ from utils.normalizer import normalize
12
+ from retrieval.retrieve_pool import OpenAIQARetrievePool, QAItem
13
+
14
+
15
+ class OpenAIQARetriever(object):
16
+ def __init__(self, retrieve_pool: OpenAIQARetrievePool):
17
+ self.retrieve_pool = retrieve_pool
18
+
19
+ @staticmethod
20
+ def _string_bleu(q1: str, q2: str, stop_words=None, stemmer=None):
21
+ """
22
+ BLEU score.
23
+ """
24
+ q1, q2 = normalize(q1), normalize(q2)
25
+ reference = [[tk for tk in nltk.word_tokenize(q1)]]
26
+ candidate = [tk for tk in nltk.word_tokenize(q2)]
27
+ if stemmer is not None:
28
+ reference = [[stemmer.stem(tk) for tk in reference[0]]]
29
+ candidate = [stemmer.stem(tk) for tk in candidate]
30
+
31
+ chencherry_smooth = SmoothingFunction() # bleu smooth to avoid hard behaviour when no ngram overlaps
32
+ bleu_score = sentence_bleu(
33
+ reference,
34
+ candidate,
35
+ weights=(0.25, 0.3, 0.3, 0.15),
36
+ smoothing_function=chencherry_smooth.method1
37
+ )
38
+ return bleu_score
39
+
40
+ def _qh2qh_similarity(
41
+ self,
42
+ item: QAItem,
43
+ num_retrieve_samples: int,
44
+ score_func: str,
45
+ qa_type: str,
46
+ weight_h: float = 0.2,
47
+ verbose: bool = False
48
+ ):
49
+ """
50
+ Retrieve top K nsqls based on query&header to query&header similarities.
51
+ """
52
+ q = item.qa_question
53
+ header_wo_row_id = copy.copy(item.table['header'])
54
+ header_wo_row_id.remove('row_id')
55
+ h = ' '.join(header_wo_row_id)
56
+ stemmer = SnowballStemmer('english')
57
+ if score_func == 'bleu':
58
+ retrieve_q_list = [(d, self._string_bleu(q, d.qa_question.split('@')[1], stemmer=stemmer))
59
+ for d in self.retrieve_pool if d.qa_question.split('@')[0] == qa_type]
60
+ retrieve_h_list = [(d, self._string_bleu(h, ' '.join(d.table['header']), stemmer=stemmer))
61
+ for d in self.retrieve_pool if d.qa_question.split('@')[0] == qa_type]
62
+ retrieve_list = [(retrieve_q_list[idx][0], retrieve_q_list[idx][1] + weight_h * retrieve_h_list[idx][1])
63
+ for idx in range(len(retrieve_q_list))]
64
+ else:
65
+ raise ValueError
66
+ retrieve_list = sorted(retrieve_list, key=lambda x: x[1], reverse=True)
67
+ retrieve_list = list(map(lambda x: x[0], retrieve_list))[:num_retrieve_samples]
68
+
69
+ if verbose:
70
+ print(retrieve_list)
71
+
72
+ return retrieve_list
73
+
74
+ def retrieve(
75
+ self,
76
+ item: QAItem,
77
+ num_shots: int,
78
+ method: str = 'qh2qh_bleu',
79
+ qa_type: str = 'map',
80
+ verbose: bool = False
81
+ ) -> List[QAItem]:
82
+ """
83
+ Retrieve a list of relevant QA samples.
84
+ """
85
+ if method == 'qh2qh_bleu':
86
+ retrieved_items = self._qh2qh_similarity(
87
+ item=item,
88
+ num_retrieve_samples=num_shots,
89
+ score_func='bleu',
90
+ qa_type=qa_type,
91
+ verbose=verbose
92
+ )
93
+ return retrieved_items
94
+ else:
95
+ raise ValueError(f'Retrieve method {method} is not supported.')
templates/.DS_Store ADDED
Binary file (6.15 kB). View file
 
templates/mmqa_qa_retrieve_pool_v2.json ADDED
@@ -0,0 +1,1600 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "id": 15127,
4
+ "qa_question": "map@Is its title screen featuring 5 women and 1 man?",
5
+ "qa_column": [
6
+ "Title"
7
+ ],
8
+ "qa_answer": [
9
+ "no",
10
+ "no",
11
+ "no",
12
+ "no",
13
+ "no",
14
+ "no",
15
+ "yes",
16
+ "no",
17
+ "no",
18
+ "no",
19
+ "no"
20
+ ],
21
+ "table": {
22
+ "header": [
23
+ "Title"
24
+ ],
25
+ "rows": [
26
+ [
27
+ "Endurance (an image of the endurance logo.)"
28
+ ],
29
+ [
30
+ "Jack & Bobby (a poster with a boy and jack & bobby words.)"
31
+ ],
32
+ [
33
+ "Into the West (indian silhouette and a man holding a gun and a woman.)"
34
+ ],
35
+ [
36
+ "Without a Trace (a black and white photo of a city without a trade)"
37
+ ],
38
+ [
39
+ "CSI: Miami (a sign that says csi miami on a glass window)"
40
+ ],
41
+ [
42
+ "Rizzoli & Isles (how to draw the numbers of the 100th anniversary of birth of a child)"
43
+ ],
44
+ [
45
+ "The Lying Game (a group of people standing next to each other)"
46
+ ],
47
+ [
48
+ "NCIS (a close up of the ncis logo on a computer screen)"
49
+ ],
50
+ [
51
+ "Scream Queens (the logo for scream queens is shown in red)"
52
+ ],
53
+ [
54
+ "All Hail King Julien: Exiled (all hail king julien movie poster)"
55
+ ],
56
+ [
57
+ "All Hail King Julien (all hail king julien movie poster)"
58
+ ]
59
+ ]
60
+ },
61
+ "title": "Glen Powell (Television)"
62
+ },
63
+ {
64
+ "id": 16173,
65
+ "qa_question": "map@Does its logo has stars?",
66
+ "qa_column": [
67
+ "Team"
68
+ ],
69
+ "qa_answer": [
70
+ "no",
71
+ "yes",
72
+ "no",
73
+ "no",
74
+ "no",
75
+ "no",
76
+ "no",
77
+ "yes",
78
+ "no",
79
+ "no"
80
+ ],
81
+ "table": {
82
+ "header": [
83
+ "Team"
84
+ ],
85
+ "rows": [
86
+ [
87
+ "Spokane Chiefs (an image of the boston spokane chiefs team logo, a alphabet s with wing)"
88
+ ],
89
+ [
90
+ "Tri-City Americans (the logo of the tri-City americans team, three stars in the middle, with wing of american red and blue)"
91
+ ],
92
+ [
93
+ "Kamloops Blazers (logo of kamloops blazers, a alphabet B contain a orange fire inside)"
94
+ ],
95
+ [
96
+ "Spokane Chiefs (an image of the boston spokane chiefs team logo, a alphabet s with wing)"
97
+ ],
98
+ [
99
+ "Lethbridge Hurricanes (the logo of lethbridge hurricanes, a red alphabet capital h with a red and black ring around it)"
100
+ ],
101
+ [
102
+ "Medicine Hat Tigers (the logo of medicine hat tigers, a tiger head)"
103
+ ],
104
+ [
105
+ "Lethbridge Hurricanes (the logo of lethbridge hurricanes, a red alphabet capital h with a red and black ring around it)"
106
+ ],
107
+ [
108
+ "Tri-City Americans (the logo of the tri-City americans team, three stars in the middle, with wing of american red and blue)"
109
+ ],
110
+ [
111
+ "Prince Albert Raiders (the logo of prince albert raiders, the capital fa form as a shield, and a sword go through its middle, slogan raiders on the top of the shield)"
112
+ ],
113
+ [
114
+ "Swift Current Broncos (the logo of the swift current broncos team, a silhouette of a bull on a blue background passes through a green arch and the letters SC with a slogan consisting of uppercase blue Broncos)"
115
+ ]
116
+ ]
117
+ },
118
+ "title": "1990\u201391 WHL season (Scoring leaders)"
119
+ },
120
+ {
121
+ "id": 17770,
122
+ "qa_question": "map@Is it the city where 1988 Olympics were held?",
123
+ "qa_column": [
124
+ "Location"
125
+ ],
126
+ "qa_answer": [
127
+ "no",
128
+ "yes",
129
+ "no",
130
+ "no",
131
+ "no",
132
+ "no",
133
+ "no",
134
+ "no",
135
+ "yes",
136
+ "no",
137
+ "yes",
138
+ "no",
139
+ "no",
140
+ "no",
141
+ "no",
142
+ "yes",
143
+ "no",
144
+ "no",
145
+ "no",
146
+ "no",
147
+ "no",
148
+ "no",
149
+ "no",
150
+ "no"
151
+ ],
152
+ "table": {
153
+ "header": [
154
+ "Location"
155
+ ],
156
+ "rows": [
157
+ [
158
+ "Tokyo, Japan"
159
+ ],
160
+ [
161
+ "Seoul, South Korea"
162
+ ],
163
+ [
164
+ "Melbourne, Australia"
165
+ ],
166
+ [
167
+ "Gold Coast, Australia (a beach with a city skyline in the background)"
168
+ ],
169
+ [
170
+ "Tokyo, Japan"
171
+ ],
172
+ [
173
+ "Stockholm, Sweden"
174
+ ],
175
+ [
176
+ "Hiroshima, Japan"
177
+ ],
178
+ [
179
+ ""
180
+ ],
181
+ [
182
+ "Seoul, South Korea"
183
+ ],
184
+ [
185
+ "Tokyo, Japan"
186
+ ],
187
+ [
188
+ "Seoul, South Korea"
189
+ ],
190
+ [
191
+ "Tokyo, Japan"
192
+ ],
193
+ [
194
+ ""
195
+ ],
196
+ [
197
+ "Yokohama, Japan"
198
+ ],
199
+ [
200
+ "Tokyo, Japan"
201
+ ],
202
+ [
203
+ "Seoul, South Korea"
204
+ ],
205
+ [
206
+ "Tokyo, Japan"
207
+ ],
208
+ [
209
+ "Yokohama, Japan"
210
+ ],
211
+ [
212
+ "Tokyo, Japan"
213
+ ],
214
+ [
215
+ ""
216
+ ],
217
+ [
218
+ "Busan, South Korea"
219
+ ],
220
+ [
221
+ "Yokohama, Japan"
222
+ ],
223
+ [
224
+ "Tokyo, Japan"
225
+ ],
226
+ [
227
+ ""
228
+ ]
229
+ ]
230
+ },
231
+ "title": "Virgil Kalakoda (Kickboxing record)"
232
+ },
233
+ {
234
+ "id": 14982,
235
+ "qa_question": "map@Does poster have a silhouette on it?",
236
+ "qa_column": [
237
+ "Title"
238
+ ],
239
+ "qa_answer": [
240
+ "yes",
241
+ "no",
242
+ "no",
243
+ "no",
244
+ "no",
245
+ "no"
246
+ ],
247
+ "table": {
248
+ "header": [
249
+ "Title"
250
+ ],
251
+ "rows": [
252
+ [
253
+ "Ladybug Ladybug ()"
254
+ ],
255
+ [
256
+ "Bonnie and Clyde (a poster for the movie starring a man and a woman on a motorcycle)"
257
+ ],
258
+ [
259
+ "Rachel, Rachel (a poster of a girl with a pink doll)"
260
+ ],
261
+ [
262
+ "Don't Drink the Water (a poster for the movie they re caught in a security leak, a group of people block the grim reaper pouring from the giant water tap)"
263
+ ],
264
+ [
265
+ "Watermelon Man (a very funny thing happened to jeff gerber advertising poster, a black man's head aside by a watermelon with american sign)"
266
+ ],
267
+ [
268
+ "I Walk the Line (a poster for the movie walk the line, a man is holding on to a woman with blond hair)"
269
+ ]
270
+ ]
271
+ },
272
+ "title": "Estelle Parsons (Kickboxing record)"
273
+ },
274
+ {
275
+ "id": 561,
276
+ "qa_question": "map@What sport is shown in the logo?",
277
+ "qa_column": [
278
+ "Club"
279
+ ],
280
+ "qa_answer": [
281
+ "/",
282
+ "/",
283
+ "/",
284
+ "soccer",
285
+ "soccer"
286
+ ],
287
+ "table": {
288
+ "header": [
289
+ "Club"
290
+ ],
291
+ "rows": [
292
+ [
293
+ "Bolton Wanderers (the symbol for peace with a red ribbon in front of it)"
294
+ ],
295
+ [
296
+ "St Johnstone (loan)"
297
+ ],
298
+ [
299
+ "Macclesfield Town (loan)"
300
+ ],
301
+ [
302
+ "Crawley Town (A shield with an alternating grid of red and white, with a soccer on it.)"
303
+ ],
304
+ [
305
+ "Kilmarnock (the logo for the manhattan football league 150 years ago, soccer, shield, hands with two fingers)"
306
+ ]
307
+ ]
308
+ },
309
+ "title": "Mark Connolly (Career statistics)"
310
+ },
311
+ {
312
+ "id": 2488,
313
+ "qa_question": "map@Does it has an instrument in its cover?",
314
+ "qa_column": [
315
+ "Title"
316
+ ],
317
+ "qa_answer": [
318
+ "yes",
319
+ "yes",
320
+ "no",
321
+ "no",
322
+ "no",
323
+ "yes",
324
+ "yes"
325
+ ],
326
+ "table": {
327
+ "header": [
328
+ "Title"
329
+ ],
330
+ "rows": [
331
+ [
332
+ "Gettin' to It (a man playing a cello in front of a car)"
333
+ ],
334
+ [
335
+ "Number Two Express (a man standing next to a cello)"
336
+ ],
337
+ [
338
+ "A Family Affair (a man wearing sunglasses standing in front of a wall)"
339
+ ],
340
+ [
341
+ "SciFi (a man on a screen with a glass window)"
342
+ ],
343
+ [
344
+ "The Philadelphia Experiment (a poster for the philadelphia experiment with three people looking at a city)"
345
+ ],
346
+ [
347
+ "Vertical Vision (a man in the middle of a guitar's string)"
348
+ ],
349
+ [
350
+ "Live at Tonic (a man is standing on the sidewalk with a cello)"
351
+ ]
352
+ ]
353
+ },
354
+ "title": "Christian McBride (Discography | As leader)"
355
+ },
356
+ {
357
+ "id": 20323,
358
+ "qa_question": "map@Is it features a man on its logo?",
359
+ "qa_column": [
360
+ "Title"
361
+ ],
362
+ "qa_answer": [
363
+ "no",
364
+ "no",
365
+ "no",
366
+ "no",
367
+ "yes"
368
+ ],
369
+ "table": {
370
+ "header": [
371
+ "Title"
372
+ ],
373
+ "rows": [
374
+ [
375
+ "Guiding Light (a neon sign that says guildlight in the dark)"
376
+ ],
377
+ [
378
+ "Hack (a man in a car with his face on a screen)"
379
+ ],
380
+ [
381
+ "Everwood (a neon sign that says everwood in front of mountains)"
382
+ ],
383
+ [
384
+ "Law & Order: Criminal Intent (a neon law order criminal intent sign on a dark)"
385
+ ],
386
+ [
387
+ "Deception (a man standing in the darkness)"
388
+ ]
389
+ ]
390
+ },
391
+ "title": "Jess Weixler (Television)"
392
+ },
393
+ {
394
+ "id": 7157,
395
+ "qa_question": "map@Does it has a flower on its logo?",
396
+ "qa_column": [
397
+ "Title"
398
+ ],
399
+ "qa_answer": [
400
+ "yes",
401
+ "no",
402
+ "no",
403
+ "no",
404
+ "no",
405
+ "no",
406
+ "no",
407
+ "yes"
408
+ ],
409
+ "table": {
410
+ "header": [
411
+ "Title"
412
+ ],
413
+ "rows": [
414
+ [
415
+ "Aldershot"
416
+ ],
417
+ [
418
+ "Barrow (the logo of the barrow afc and the bee logo, with flower and football and submarine on the shield)"
419
+ ],
420
+ [
421
+ "Bradford City (the logo of the bcaf afc announces its new logo, a rooster on a sheild)"
422
+ ],
423
+ [
424
+ "Brentford (the logo for the brentford football club, a bee in the red circle)"
425
+ ],
426
+ [
427
+ "Chester (a crest of a lion wearing a crown and a laurel wreath)"
428
+ ],
429
+ [
430
+ "Crewe Alexandra (the logo of the glen alexandra football club, a red dragon among golden ears of wheat)"
431
+ ],
432
+ [
433
+ "Darlington (the logo for the quakers, an orange and white shield, unfolding a tractor and a top hat)"
434
+ ],
435
+ [
436
+ "Halifax Town (the logo of the shaymen football club, a blue striped and white shield with a floral pattern, the uppercase ntfc letters nesting a football)"
437
+ ]
438
+ ]
439
+ },
440
+ "title": "1962\u201363 Football League Cup (First round | Ties)"
441
+ },
442
+ {
443
+ "id": 21652,
444
+ "qa_question": "map@Is he/she has blonde hair that is curly in the back and has sideburns?",
445
+ "qa_column": [
446
+ "Player"
447
+ ],
448
+ "qa_answer": [
449
+ "no",
450
+ "no",
451
+ "no",
452
+ "no",
453
+ "yes"
454
+ ],
455
+ "table": {
456
+ "header": [
457
+ "Player"
458
+ ],
459
+ "rows": [
460
+ [
461
+ "David Duval (a man wearing a hat and sunglasses and a shirt)"
462
+ ],
463
+ [
464
+ "Niclas Fasth (a man standing in front of a cricket field with blue shirt and gold chain)"
465
+ ],
466
+ [
467
+ "Darren Clarke (a man wearing a hat smiles at the camera)"
468
+ ],
469
+ [
470
+ "Ernie Els (a man in a black shirt holding a golf club)"
471
+ ],
472
+ [
473
+ "Miguel \u00c1ngel Jim\u00e9nez (a man wearing a baseball cap and a blue shirt is looking off into the distance)"
474
+ ]
475
+ ]
476
+ },
477
+ "title": "2001 Open Championship (Final round)"
478
+ },
479
+ {
480
+ "id": 9492,
481
+ "qa_question": "ans@Did the team that has a logo shaped like a quarter note with wings and is listed in the leading goaltenders in the 2008-09 National Hockey League season make the playoffs for the Stanley Cup?",
482
+ "qa_column": [
483
+ "St. Louis Blues"
484
+ ],
485
+ "qa_answer": [
486
+ "no"
487
+ ],
488
+ "table": {
489
+ "header": [
490
+ "St. Louis Blues"
491
+ ],
492
+ "rows": [
493
+ [
494
+ "The team is named after the famous W.C. Handy song \"Saint Louis Blues.\" The franchise was founded in 1967 as an expansion team during the league's 1967 NHL Expansion, which expanded the league from six teams to twelve. The Blues are the oldest active NHL team never to have won the Stanley Cup, although they played in the Stanley Cup Finals three times in 1968, 1969 and 1970.St. Louis Blues. The logo has wings on it."
495
+ ]
496
+ ]
497
+ },
498
+ "title": "2008\u201309 NHL season"
499
+ },
500
+ {
501
+ "id": 5555,
502
+ "qa_question": "map@Is the film poster with a male reaching his hand out?",
503
+ "qa_column": "film",
504
+ "qa_answer": [
505
+ "no",
506
+ "no",
507
+ "yes",
508
+ "no",
509
+ "no"
510
+ ],
511
+ "table": {
512
+ "header": [
513
+ "film"
514
+ ],
515
+ "rows": [
516
+ [
517
+ "sethu ( a naked man and a beautiful woman )"
518
+ ],
519
+ [
520
+ "nandha ( two men are staring at someone )"
521
+ ],
522
+ [
523
+ "pithamagan ( a man is reaching the hand out )"
524
+ ],
525
+ [
526
+ "maayavi ( a man is smiling and a woman is shocked )"
527
+ ],
528
+ [
529
+ "naan kadavul ( a man is running in the sea )"
530
+ ]
531
+ ]
532
+ },
533
+ "title": "Bala (director)"
534
+ },
535
+ {
536
+ "id": 4456,
537
+ "qa_question": "ans@Which resort was it filmed at?",
538
+ "qa_column": [
539
+ "title",
540
+ "Couples Retreat"
541
+ ],
542
+ "qa_answer": [
543
+ "the St. Regis Bora Bora Resort, Bora Bora in French Polynesia"
544
+ ],
545
+ "table": {
546
+ "header": [
547
+ "title",
548
+ "Couples Retreat"
549
+ ],
550
+ "rows": [
551
+ [
552
+ "couples retreat (The primary location for filming was at the St. Regis Bora Bora Resort, Bora Bora in French Polynesia.[citation needed] Other filming locations include Los Angeles, Universal Studios and O'Hare International Airport.) (A man and a woman are standing in the water.)"
553
+ ],
554
+ [
555
+ "The primary location for filming was at the St. Regis Bora Bora Resort, Bora Bora in French Polynesia.[citation needed] Other filming locations include Los Angeles, Universal Studios and O'Hare International Airport. water is listed (or ranked) 2 on the list what to watch if you love life"
556
+ ]
557
+ ]
558
+ },
559
+ "title": "Justin Deeley"
560
+ },
561
+ {
562
+ "id": 13437,
563
+ "qa_question": "map@Was he/she playing the violin?",
564
+ "qa_column": "artist",
565
+ "qa_answer": [
566
+ "no",
567
+ "no",
568
+ "no",
569
+ "no",
570
+ "no",
571
+ "no",
572
+ "yes"
573
+ ],
574
+ "table": {
575
+ "header": [
576
+ "artist"
577
+ ],
578
+ "rows": [
579
+ [
580
+ "tina turner ( a singer is singing on the stage )"
581
+ ],
582
+ [
583
+ "soundgarden ( a band is playing the song )"
584
+ ],
585
+ [
586
+ "live ( a band is playing with a naked lead singer )"
587
+ ],
588
+ [
589
+ "r.e.m. ( a bald singer is singing in the dark )"
590
+ ],
591
+ [
592
+ "tom petty ( a man is playing the guitar )"
593
+ ],
594
+ [
595
+ "bruce springsteen ( a man is playing the guitar )"
596
+ ],
597
+ [
598
+ "tracy bonham ( a woman is playing the violin )"
599
+ ]
600
+ ]
601
+ },
602
+ "title": "Bala (director)"
603
+ },
604
+ {
605
+ "id": 10968,
606
+ "qa_question": "ans@When was the last time the player in the 2nd round of the Players 2013 championship when the score was 67-67=134 was number 1?",
607
+ "qa_column": [
608
+ "player",
609
+ "List of World Number One male golfers"
610
+ ],
611
+ "qa_answer": [
612
+ "2009"
613
+ ],
614
+ "table": {
615
+ "header": [
616
+ "player",
617
+ "List of World Number One male golfers"
618
+ ],
619
+ "rows": [
620
+ [
621
+ "tiger woods ( this is a close up of golfer.)"
622
+ ],
623
+ [
624
+ "The following is a list of the 20 golfers who have risen to the top of the Official World Golf Ranking. As of January 21, 2018, Dustin Johnson is the number one ranked golfer. Tiger Woods has spent the most consecutive weeks (281) and most total weeks (683) in that position. Three golfers have spent an entire calendar year atop the rankings: Nick Faldo (1993), Greg Norman (1996), and Woods (2000, 2001, 2002, 2003, 2006, 2007, 2008, 2009)."
625
+ ]
626
+ ]
627
+ },
628
+ "title": "2013 Players Championship"
629
+ },
630
+ {
631
+ "id": 9184,
632
+ "qa_question": "ans@When did this season of the show where Jerrika Hinton plays Stephanie Edwards start?",
633
+ "qa_column": [
634
+ "title",
635
+ "Grey's Anatomy"
636
+ ],
637
+ "qa_answer": [
638
+ "September 28, 2017"
639
+ ],
640
+ "table": {
641
+ "header": [
642
+ "title",
643
+ "Grey's Anatomy"
644
+ ],
645
+ "rows": [
646
+ [
647
+ "grey's anatomy (On February 10, 2017, ABC renewed Grey's Anatomy for a fourteenth season, which premiered on September 28, 2017. The series' success catapulted such long-running cast members as Pompeo, Dempsey, and Oh to worldwide recognition; they were among the top five highest-earning television actors in 2013.)"
648
+ ],
649
+ [
650
+ "On February 10, 2017, ABC renewed Grey's Anatomy for a fourteenth season, which premiered on September 28, 2017. The series' success catapulted such long-running cast members as Pompeo, Dempsey, and Oh to worldwide recognition; they were among the top five highest-earning television actors in 2013."
651
+ ]
652
+ ]
653
+ },
654
+ "title": "Jerrika Hinton"
655
+ },
656
+ {
657
+ "id": 13726,
658
+ "qa_question": "ans@The place where the he first international earth summit held has how many photos in its collage?",
659
+ "qa_column": [
660
+ "Earth Summit",
661
+ "Rio de Janeiro"
662
+ ],
663
+ "qa_answer": [
664
+ "six"
665
+ ],
666
+ "table": {
667
+ "header": [
668
+ "Earth Summit",
669
+ "Rio de Janeiro"
670
+ ],
671
+ "rows": [
672
+ [
673
+ "The United Nations Conference on Environment and Development (UNCED), also known as the Rio de Janeiro Earth Summit, the Rio Summit, the Rio Conference, and the Earth Summit (Portuguese: ECO92), was a major United Nations conference held in Rio de Janeiro from 3 to 14 June 1992."
674
+ ],
675
+ [
676
+ "six views of city rio de janeiro, brazil"
677
+ ]
678
+ ]
679
+ },
680
+ "title": "2014\u201315 NBB season"
681
+ },
682
+ {
683
+ "id": 14059,
684
+ "qa_question": "ans@What color is the Young Artist Award?",
685
+ "qa_column": [
686
+ "Young Artist Award"
687
+ ],
688
+ "qa_answer": [
689
+ "gold"
690
+ ],
691
+ "table": {
692
+ "header": [
693
+ "Young Artist Award"
694
+ ],
695
+ "rows": [
696
+ [
697
+ "The Young Artist Award (originally known as the Youth in Film Award) is an accolade presented by the Young Artist Association, a non-profit organization founded in 1978 to honor excellence of youth performers, and to provide scholarships for young artists who may be physically disabled or financially unstable. a gold sculpture holding a star."
698
+ ]
699
+ ]
700
+ },
701
+ "title": "Jenna Boyd"
702
+ },
703
+ {
704
+ "id": 6031,
705
+ "qa_question": "map@Does the video game produced by Mark Meer feature a view of Earth from space on its cover?",
706
+ "qa_column": "title",
707
+ "qa_answer": [
708
+ "no",
709
+ "no",
710
+ "yes",
711
+ "no",
712
+ "no",
713
+ "no"
714
+ ],
715
+ "table": {
716
+ "header": [
717
+ "title"
718
+ ],
719
+ "rows": [
720
+ [
721
+ "baldur's gate ii: shadows of amn \uff08 card of shadows of amn \uff09"
722
+ ],
723
+ [
724
+ "baldur's gate ii: throne of bhaal ( card of throne of bhaal )"
725
+ ],
726
+ [
727
+ "Mass Effect 3 ( mass effect 3 with a view of the earth)"
728
+ ],
729
+ [
730
+ "baldur's gate: enhanced edition ( a sword at the bottom )"
731
+ ],
732
+ [
733
+ "baldur's gate: enhanced edition ( a man versus a dragon )"
734
+ ],
735
+ [
736
+ "dragon age: inquisition ( a man is battling with a dragon)"
737
+ ]
738
+ ]
739
+ },
740
+ "title": "Mark Meer"
741
+ },
742
+ {
743
+ "id": 21257,
744
+ "qa_question": "ans@The location with two tall narrow pointed towers near the waterfront, in Individual podiums of Riitta-Liisa Roponen, is?",
745
+ "qa_column": [
746
+ "Lahti"
747
+ ],
748
+ "qa_answer": [
749
+ "Lahti"
750
+ ],
751
+ "table": {
752
+ "header": [
753
+ "Lahti"
754
+ ],
755
+ "rows": [
756
+ [
757
+ "a photo of lake geneva at night and at night in lahti"
758
+ ]
759
+ ]
760
+ },
761
+ "title": "Riitta-Liisa Roponen"
762
+ },
763
+ {
764
+ "id": 8555,
765
+ "qa_question": "ans@Was the film in which the role of Shannon was played by Madeline Carroll based on a true story?",
766
+ "qa_column": [
767
+ "title",
768
+ "I Can Only Imagine (film)"
769
+ ],
770
+ "qa_answer": [
771
+ "yes"
772
+ ],
773
+ "table": {
774
+ "header": [
775
+ "title",
776
+ "I Can Only Imagine (film)"
777
+ ],
778
+ "rows": [
779
+ [
780
+ "i can only imagine (I Can Only Imagine is a 2018 American Christian drama film directed by the Erwin Brothers and written by Alex Cramer, Jon Erwin, and Brent McCorkle, based on the story behind the MercyMe song of the same name, the best-selling Christian single of all time. The film stars J. Michael Finley as Bart Millard, the lead singer who wrote the song about his relationship with his father (Dennis Quaid). Madeline Carroll, Priscilla Shirer, Cloris Leachman, and Trace Adkins also star.) ( the only machine is listed (or ranked) 10 on the list the best movies and tv)"
781
+ ],
782
+ [
783
+ "I Can Only Imagine is a 2018 American Christian drama film directed by the Erwin Brothers and written by Alex Cramer, Jon Erwin, and Brent McCorkle, based on the story behind the MercyMe song of the same name, the best-selling Christian single of all time. The film stars J. Michael Finley as Bart Millard, the lead singer who wrote the song about his relationship with his father (Dennis Quaid). Madeline Carroll, Priscilla Shirer, Cloris Leachman, and Trace Adkins also star. the only machine is listed (or ranked) 10 on the list the best movies and tv"
784
+ ]
785
+ ]
786
+ },
787
+ "title": "Madeline Carroll"
788
+ },
789
+ {
790
+ "id": 3145,
791
+ "qa_question": "ans@In fourth round proper of 1957\u201358 FA Cup, who got promoted to premier league this year and was the Home team when the Score was 4\u20131?",
792
+ "qa_column": [
793
+ "home team"
794
+ ],
795
+ "qa_answer": [
796
+ "Cardiff City"
797
+ ],
798
+ "table": {
799
+ "header": [
800
+ "home team"
801
+ ],
802
+ "rows": [
803
+ [
804
+ "darlington ( railroads of the quakers logo)",
805
+ "cardiff city"
806
+ ]
807
+ ]
808
+ },
809
+ "title": "1957\u201358 FA Cup"
810
+ },
811
+ {
812
+ "id": 2936,
813
+ "qa_question": "ans@Who does the voice in the show?",
814
+ "qa_column": [
815
+ "title",
816
+ "the Belko Experiment"
817
+ ],
818
+ "qa_answer": [
819
+ "gregg henry"
820
+ ],
821
+ "table": {
822
+ "header": [
823
+ "title",
824
+ "the Belko Experiment"
825
+ ],
826
+ "rows": [
827
+ [
828
+ "the belko experiment (In a rage, Mike kills Barry with a tape dispenser. The building is then unsealed, as he is the last survivor, and the soldiers escort him to the hangar next door. There, he meets the owner of the voice (Gregg Henry), who introduces himself as a social scientist who believes that discoveries about human nature can only come from placing people in extreme environments. As he and his colleagues begin to question Mike about his emotional and mental state, Mike notices a panel of switches that correspond to the eighty employees. Having planted the trackers that Marty collected on the soldiers and the Voice, he flips every switch except his own. The trackers explode, killing the soldiers and wounding the Voice, before Mike grabs a gun and kills the remaining scientists. The Voice attempts to reason with Mike but Mike kills him. He then leaves the warehouse in a state of shock. The view zooms out to reveal that Mike is one of many sole survivors from similar experiments, being watched by another group through security cameras. A new voice states \"end stage one\" and \"commence stage two.\") ( actor in a still from the movie)"
829
+ ],
830
+ [
831
+ "In a rage, Mike kills Barry with a tape dispenser. The building is then unsealed, as he is the last survivor, and the soldiers escort him to the hangar next door. There, he meets the owner of the voice (Gregg Henry), who introduces himself as a social scientist who believes that discoveries about human nature can only come from placing people in extreme environments. As he and his colleagues begin to question Mike about his emotional and mental state, Mike notices a panel of switches that correspond to the eighty employees. Having planted the trackers that Marty collected on the soldiers and the Voice, he flips every switch except his own. The trackers explode, killing the soldiers and wounding the Voice, before Mike grabs a gun and kills the remaining scientists. The Voice attempts to reason with Mike but Mike kills him. He then leaves the warehouse in a state of shock. The view zooms out to reveal that Mike is one of many sole survivors from similar experiments, being watched by another group through security cameras. A new voice states \"end stage one\" and \"commence stage two.\" actor in a still from the movie"
832
+ ]
833
+ ]
834
+ },
835
+ "title": "Sean Gunn"
836
+ },
837
+ {
838
+ "id": 23229,
839
+ "qa_question": "ans@in Tournaments of 1998 Ladies European Tour, does Spain share a land border with the location that has a star in the exact center of the flag?",
840
+ "qa_column": [
841
+ "Morocco\u2013Spain border",
842
+ "Morocco"
843
+ ],
844
+ "qa_answer": [
845
+ "yes"
846
+ ],
847
+ "table": {
848
+ "header": [
849
+ "Morocco\u2013Spain border",
850
+ "Morocco"
851
+ ],
852
+ "rows": [
853
+ [
854
+ "The Morocco-Spain border is located along the Plazas de soberan\u00eda, Ceuta, Melilla, and Albor\u00e1n Island along the north coast of Morocco."
855
+ ],
856
+ [
857
+ "flag of the islamic republic of morocco with a star in the center"
858
+ ]
859
+ ]
860
+ },
861
+ "title": "1998 Ladies European Tour"
862
+ },
863
+ {
864
+ "id": 14260,
865
+ "qa_question": "map@Is there a wildcat on the logo?",
866
+ "qa_column": "wnba team",
867
+ "qa_answer": [
868
+ "no",
869
+ "no",
870
+ "yes",
871
+ "no"
872
+ ],
873
+ "table": {
874
+ "header": [
875
+ "wnba team"
876
+ ],
877
+ "rows": [
878
+ [
879
+ "houston comets"
880
+ ],
881
+ [
882
+ "san antonio silver stars ( san antonio starts logo with a boom )"
883
+ ],
884
+ [
885
+ "minnesota lynx ( a logo with wildcat of minnesota lynx )"
886
+ ],
887
+ [
888
+ "phoenix mercury ( the phoenix mercury logo with the sun )"
889
+ ]
890
+ ]
891
+ },
892
+ "title": "2006 WNBA draft"
893
+ },
894
+ {
895
+ "id": 2376,
896
+ "qa_question": "ans@How many times did the opponent that have a bird on their logo, been in the super bowl?",
897
+ "qa_column": [
898
+ "History of the Philadelphia Eagles",
899
+ "Philadelphia Eagles"
900
+ ],
901
+ "qa_answer": [
902
+ "3"
903
+ ],
904
+ "table": {
905
+ "header": [
906
+ "History of the Philadelphia Eagles",
907
+ "Philadelphia Eagles"
908
+ ],
909
+ "rows": [
910
+ [
911
+ "The history of the Philadelphia Eagles begins in 1933. In their history, the Eagles have appeared in the Super Bowl three times, losing in their first two appearances but winning the third, in 2018. They won three NFL Championships, the precursor to the Super Bowl, in four appearances."
912
+ ],
913
+ [
914
+ "the logo with an eagle on it"
915
+ ]
916
+ ]
917
+ },
918
+ "title": "1951 Green Bay Packers season"
919
+ },
920
+ {
921
+ "id": 200,
922
+ "qa_question": "ans@This song released on September 29, 1983 and inspired a hit song by Usher was written by who?",
923
+ "qa_column": [
924
+ "Uptown Girl",
925
+ "Can't Stop Won't Stop (Usher song)"
926
+ ],
927
+ "qa_answer": [
928
+ "Billy Joel"
929
+ ],
930
+ "table": {
931
+ "header": [
932
+ "Uptown Girl",
933
+ "Can't Stop Won't Stop (Usher song)"
934
+ ],
935
+ "rows": [
936
+ [
937
+ "\"Uptown Girl\" is a song written and performed by American musician Billy Joel. It was released on September 29, 1983, on his ninth studio album An Innocent Man (1983). The lyrics describe a working-class \"downtown man\" attempting to woo a wealthy \"uptown girl.\"",
938
+ "\"Can't Stop Won't Stop\" is a song recorded by American recording artist Usher for his seventh studio album Looking 4 Myself (2012). Written and produced by Will \"will.i.am\" Adams and Keith Harris, the song contains an interpolation of the bridge to Billy Joel's 1983 hit single \"Uptown Girl\". Musically, \"Can't Stop Won't Stop\" is a eurodance and dance-pop song that incorporates elements of dubstep."
939
+ ]
940
+ ]
941
+ },
942
+ "title": "PNC Park"
943
+ },
944
+ {
945
+ "id": 300,
946
+ "qa_question": "ans@Oliver Mellor played Dr. Matt Carter on the TV show that had Tracy Barlow kill who?",
947
+ "qa_column": [
948
+ "title",
949
+ "Charlie Stubbs (Coronation Street)"
950
+ ],
951
+ "qa_answer": [
952
+ "Charlie"
953
+ ],
954
+ "table": {
955
+ "header": [
956
+ "title",
957
+ "Charlie Stubbs (Coronation Street)"
958
+ ],
959
+ "rows": [
960
+ [
961
+ "coronation street( a screenshot of the coronation street soundtrack.)",
962
+ "In 2005, Charlie began a relationship with Tracy Barlow (Kate Ford). He convinced her to move in with him and later in February 2006, manipulated her into having her daughter Amy (Amber Chadwick) move in with her parents. In turn, Tracy began to manipulate Charlie. She pretended to be pregnant and used the money he gave her for an abortion to buy expensive shoes and used her \"grief\" to have him allow Amy to move back in. When Shelley visited before her mother\u2019s marriage to Fred Elliott (John Savident), she and Charlie had a one-night stand. She told Tracy about their night of passion, who accused her of lying. Shelley later revealed that she was pregnant with Charlie\u2019s baby but didn\u2019t allow Charlie to have anything to do with the baby, and left. He and Tracy briefly split but reconciled. Charlie later began an affair with Maria Sutherland (Samia Smith), who was renting his flat. When David Platt (Jack P. Shepherd) discovered the affair he tried to blackmail Charlie, threatening to reveal the affair to Tracy. Charlie retaliated by trying to drown David in the bath. When Tracy eventually found out about the affair, they split once more. Tracy began to plot revenge against Charlie and pretended to make amends with Charlie. She pretended he was abusing her to the point of burning herself with an iron to make it look like Charlie was responsible for her injuries. Charlie eventually realized his partner was seeking revenge and when he was about to tell her their relationship was over, she insisted on performing a lap dance for him. She hit him round the head with a heavy ornament, and he later died in hospital. She claimed she\u2019d killed him in self-defence but the court found her guilty and she was given a life sentence."
963
+ ]
964
+ ]
965
+ },
966
+ "title": "Oliver Mellor"
967
+ },
968
+ {
969
+ "id": 350,
970
+ "qa_question": "ans@corn beans and squash the three most important crops of the wampanoag were also known as?",
971
+ "qa_column": [
972
+ "Wampanoag"
973
+ ],
974
+ "qa_answer": [
975
+ "three sisters"
976
+ ],
977
+ "table": {
978
+ "header": [
979
+ "Wampanoag"
980
+ ],
981
+ "rows": [
982
+ [
983
+ "Traditionally Wampanoag people have been semi-sedentary, with seasonal movements between fixed sites in present-day southern New England. The men often traveled far north and south along the Eastern seaboard for seasonal fishing expeditions, and sometimes stayed in those distant locations for weeks and months at a time. The women cultivated varieties of the \"three sisters\" (the intercropping of maize, climbing beans, and squash) as the staples of their diet, supplemented by fish and game caught by the men. Each community had authority over a well-defined territory from which the people derived their livelihood through a seasonal round of fishing, planting, harvesting, and hunting. Because southern New England was thickly populated by indigenous peoples, hunting grounds had strictly defined boundaries."
984
+ ]
985
+ ]
986
+ },
987
+ "title": "Peter Egan"
988
+ },
989
+ {
990
+ "id": 402,
991
+ "qa_question": "ans@who played the part of the Cowardly Lion?",
992
+ "qa_column": [
993
+ "title",
994
+ "Bert Lahr"
995
+ ],
996
+ "qa_answer": [
997
+ "Bert Lahr"
998
+ ],
999
+ "table": {
1000
+ "header": [
1001
+ "title",
1002
+ "Bert Lahr"
1003
+ ],
1004
+ "rows": [
1005
+ [
1006
+ "the wizard of oz( theatrical poster for silent film.)",
1007
+ "Bert Lahr (August 13, 1895 \u2013 December 4, 1967) was an American actor, particularly of stage and film, and comedian. Lahr is known for his role as the Cowardly Lion, as well as his counterpart Kansas farmworker Zeke, in The Wizard of Oz (1939). He was well known for his explosive humor, but also adapted well to dramatic roles and his work in burlesque, vaudeville, and on Broadway."
1008
+ ]
1009
+ ]
1010
+ },
1011
+ "title": "1980 in home video"
1012
+ },
1013
+ {
1014
+ "id": 410,
1015
+ "qa_question": "ans@what is the city that was the center of imperial life in the roman empire in the early fifth century?",
1016
+ "qa_column": [
1017
+ "Imperial fora"
1018
+ ],
1019
+ "qa_answer": [
1020
+ "Rome"
1021
+ ],
1022
+ "table": {
1023
+ "header": [
1024
+ "Imperial fora"
1025
+ ],
1026
+ "rows": [
1027
+ [
1028
+ "The Imperial Fora (\"Fori Imperiali \" in Italian) are a series of monumental \"fora\" (public squares), constructed in Rome over a period of one and a half centuries, between 46 BC and 113 AD. The forums were the center of the Roman Republic and of the Roman Empire."
1029
+ ]
1030
+ ]
1031
+ },
1032
+ "title": "List of newspapers in Italy"
1033
+ },
1034
+ {
1035
+ "id": 2005,
1036
+ "qa_question": "ans@which one has a rooster on his logo?",
1037
+ "qa_column": [
1038
+ "college"
1039
+ ],
1040
+ "qa_answer": [
1041
+ "South Carolina"
1042
+ ],
1043
+ "table": {
1044
+ "header": [
1045
+ "college"
1046
+ ],
1047
+ "rows": [
1048
+ [
1049
+ "byu( this is the new yankee stadium that will be used at the super bowl of the)"
1050
+ ],
1051
+ [
1052
+ "south carolina( i pinned this because i think this is what the logo would look like on sports equipment business)"
1053
+ ]
1054
+ ]
1055
+ },
1056
+ "title": "2013 Detroit Lions season"
1057
+ },
1058
+ {
1059
+ "id": 2015,
1060
+ "qa_question": "ans@who is the guy in call me maybe video?",
1061
+ "qa_column": [
1062
+ "Call Me Maybe"
1063
+ ],
1064
+ "qa_answer": [
1065
+ "Holden Nowell"
1066
+ ],
1067
+ "table": {
1068
+ "header": [
1069
+ "Call Me Maybe"
1070
+ ],
1071
+ "rows": [
1072
+ [
1073
+ "The video begins with Jepsen spying on her attractive tattooed neighbour (Holden Nowell) as he is working on his lawn. As he takes his shirt off and notices she is staring at him, Jepsen slips on her high heels and falls below her window. She is reading the books Love at First Sight (Men In Uniform) by B.J. Daniels and Skylar's Outlaw by Linda Warren. The scene then cuts to her garage, where she is rehearsing the track with her band. Following the rehearsals, her bandmates push her to go and wash her car, where she tries to gain her neighbour's attention with various provocative poses only to fall from the hood of the car. She is briefly knocked out from the fall, during which she dreams of a romance novel-type encounter with her crush against the backdrop of Peggys Cove. As she comes to, the neighbour then helps her get up, and watches the band rehearse the track again. After turning and writing down her telephone number, Jepsen sees her neighbour pass one of her male bandmates (Tavish Crowe) his own number, indicating he doesn't like women at all and is gay, where the very end shows that Jepsen is taken aback by this. The video received three nominations on the 2012 MuchMusic Video Awards in the categories of UR Fave Video, Pop Video of the Year, and Video of the Year. wallpaper with a well dressed girl titled pop artist"
1074
+ ]
1075
+ ]
1076
+ },
1077
+ "title": "The Voice UK (series 2)"
1078
+ },
1079
+ {
1080
+ "id": 410,
1081
+ "qa_question": "ans@the building in the top right has what at its top?",
1082
+ "qa_column": [
1083
+ "val"
1084
+ ],
1085
+ "qa_answer": [
1086
+ "Dome"
1087
+ ],
1088
+ "table": {
1089
+ "header": [
1090
+ "val"
1091
+ ],
1092
+ "rows": [
1093
+ [
1094
+ "rome( visit the vatican city and sistine chapel in rome with a dome.)"
1095
+ ]
1096
+ ]
1097
+ },
1098
+ "title": "List of newspapers in Italy"
1099
+ },
1100
+ {
1101
+ "id": 100,
1102
+ "qa_question": "map@Has crossed swords on its logo?",
1103
+ "qa_column": "signed from",
1104
+ "qa_answer": [
1105
+ "no",
1106
+ "no",
1107
+ "yes",
1108
+ "no",
1109
+ "no",
1110
+ "no",
1111
+ "no",
1112
+ "no",
1113
+ "no"
1114
+ ],
1115
+ "table": {
1116
+ "header": [
1117
+ "signed from"
1118
+ ],
1119
+ "rows": [
1120
+ [
1121
+ "penrith panthers (Logo of Penrith Panthers, a cartoon picture of a black leopard)"
1122
+ ],
1123
+ [
1124
+ "south sydney rabbitohs (Logo of South Sydney Rabbitohs, A white rabbit jumps on an oval pattern with a green frame on a red background )"
1125
+ ],
1126
+ [
1127
+ "gold coast titans (Logo of Gold Coast Titans, a warrior in golden armor holding two swords in a cross shape)"
1128
+ ],
1129
+ [
1130
+ "salford red devils"
1131
+ ],
1132
+ [
1133
+ "leigh centurions (Logo of Leigh Centurions, a warrior wearing a half-moon helmet and holding a red-edged shield)"
1134
+ ],
1135
+ [
1136
+ "castleford tigers (Logo of Castleford Tigers, the head of a roaring tiger in the middle)"
1137
+ ],
1138
+ [
1139
+ "sale sharks (Logo of Sale Sharks, a shark in deep blue)"
1140
+ ],
1141
+ [
1142
+ "l\u00e9zignan sangliers (Logo of L\u00e9zignan Sangliers, a pink and green logo with name on the right and a windmill-like logo on the left)"
1143
+ ],
1144
+ [
1145
+ "leigh centurions (Logo of Leigh Centurions, a warrior wearing a half-moon helmet and holding a red-edged shield)"
1146
+ ]
1147
+ ]
1148
+ },
1149
+ "title": "2018 Warrington Wolves season (Transfers | In)"
1150
+ },
1151
+ {
1152
+ "id": 150,
1153
+ "qa_question": "map@In which year did the San Francisco 49ers move to their new stadium?",
1154
+ "qa_column": "game site",
1155
+ "qa_answer": [
1156
+ "/",
1157
+ "/",
1158
+ "/",
1159
+ "/",
1160
+ "/",
1161
+ "1971",
1162
+ "/",
1163
+ "/",
1164
+ "/",
1165
+ "/",
1166
+ "/",
1167
+ "/",
1168
+ "/",
1169
+ "/",
1170
+ "/",
1171
+ "/",
1172
+ "/"
1173
+ ],
1174
+ "table": {
1175
+ "header": [
1176
+ "game site"
1177
+ ],
1178
+ "rows": [
1179
+ [
1180
+ "edward jones dome"
1181
+ ],
1182
+ [
1183
+ "university of phoenix stadium"
1184
+ ],
1185
+ [
1186
+ "mercedes-benz superdome (The logo of mercedes-benz superdome, in a beautiful font)"
1187
+ ],
1188
+ [
1189
+ "raymond james stadium (The logo of raymond james stadium, in a beautiful font)"
1190
+ ],
1191
+ [
1192
+ "university of phoenix stadium"
1193
+ ],
1194
+ [
1195
+ "candlestick park (Candlestick Park was an outdoor sports and entertainment stadium in the West Coast of the United States, located in San Francisco, in the Bayview Heights area. The stadium was originally the home of Major League Baseball's San Francisco Giants, who played there from 1960 until moving into Pacific Bell Park (since renamed AT&T Park) in 2000. It was also the home field of the San Francisco 49ers of the National Football League from 1971 through 2013. The 49ers moved to Levi's Stadium in Santa Clara for the 2014 season.)"
1196
+ ],
1197
+ [
1198
+ "university of phoenix stadium"
1199
+ ],
1200
+ [
1201
+ "university of phoenix stadium"
1202
+ ],
1203
+ [
1204
+ "bye"
1205
+ ],
1206
+ [
1207
+ "university of phoenix stadium"
1208
+ ],
1209
+ [
1210
+ "everbank field"
1211
+ ],
1212
+ [
1213
+ "university of phoenix stadium"
1214
+ ],
1215
+ [
1216
+ "lincoln financial field"
1217
+ ],
1218
+ [
1219
+ "university of phoenix stadium"
1220
+ ],
1221
+ [
1222
+ "lp field"
1223
+ ],
1224
+ [
1225
+ "centurylink field (The logo of CenturyLink Field, on the left is a minimalist icon of a stadium, on the right is a text logo)"
1226
+ ],
1227
+ [
1228
+ "university of phoenix stadium"
1229
+ ]
1230
+ ]
1231
+ },
1232
+ "title": "2013 Arizona Cardinals season (Regular season)"
1233
+ },
1234
+ {
1235
+ "id": 250,
1236
+ "qa_question": "map@what is the person behind holding?",
1237
+ "qa_column": "driver",
1238
+ "qa_answer": [
1239
+ "/",
1240
+ "water",
1241
+ "/",
1242
+ "/",
1243
+ "/",
1244
+ "/",
1245
+ "camera",
1246
+ "/",
1247
+ "/",
1248
+ "/"
1249
+ ],
1250
+ "table": {
1251
+ "header": [
1252
+ "driver"
1253
+ ],
1254
+ "rows": [
1255
+ [
1256
+ "dale earnhardt, jr. (r)"
1257
+ ],
1258
+ [
1259
+ "jeff burton (A man with a sunglasses and racing jacket just finish the game. The man behind him is holding a bottle of water.)"
1260
+ ],
1261
+ [
1262
+ "bobby labonte (A man with a sunglasses and racing jacket is waving his hands on the left and grabbing a coke on the right.)"
1263
+ ],
1264
+ [
1265
+ "rusty wallace (A man with a sunglasses and racing jacket is smiling, a man is behind him trying to push something.)"
1266
+ ],
1267
+ [
1268
+ "kevin lepage (A man with sunglasses and black racing jacket is smiling.)"
1269
+ ],
1270
+ [
1271
+ "jeremy mayfield (A man with a cap and racing jacket is cheering.)"
1272
+ ],
1273
+ [
1274
+ "dale earnhardt (A man with mustache is wearing a racing jacket, sunglasses, smiling. The man behind him is holding a camera and taking photo.)"
1275
+ ],
1276
+ [
1277
+ "terry labonte (A man with mustache is wearing a yellow racing jacket and smiling.)"
1278
+ ],
1279
+ [
1280
+ "tony stewart (A man with beard, grey hair and sunglasses is looking towards the left side of the picture.)"
1281
+ ],
1282
+ [
1283
+ "ricky rudd (A man in racing jacket and sunglasses gets out of his car before the race.)"
1284
+ ]
1285
+ ]
1286
+ },
1287
+ "title": "2000 DirecTV 500 (Top 10 results)"
1288
+ },
1289
+ {
1290
+ "id": 450,
1291
+ "qa_question": "map@Has a ship in logo or Charlotte Knights?",
1292
+ "qa_column": "team",
1293
+ "qa_answer": [
1294
+ "no",
1295
+ "no",
1296
+ "yes",
1297
+ "no",
1298
+ "no",
1299
+ "no",
1300
+ "yes",
1301
+ "no",
1302
+ "no",
1303
+ "no"
1304
+ ],
1305
+ "table": {
1306
+ "header": [
1307
+ "team"
1308
+ ],
1309
+ "rows": [
1310
+ [
1311
+ "scranton/wilkes-barre railriders (An angry porcupine riding a railroad)"
1312
+ ],
1313
+ [
1314
+ "syracuse mets (An icon with orange words on a blue background)"
1315
+ ],
1316
+ [
1317
+ "charlotte knights (A knight's helmet surrounded by a crescent moon with the word art of Charlotte Knights under it and a crown above it)"
1318
+ ],
1319
+ [
1320
+ "durham bulls (A puffing bull is crossing the blue letter D)"
1321
+ ],
1322
+ [
1323
+ "gwinnett stripers (A green fish with an open mouth is swimming over the letter G of the word art for \"Gwinnett Stripers\")"
1324
+ ],
1325
+ [
1326
+ "norfolk tides (A toothy seahorse holding a sea god's fork stands in the middle of a circle with the word Norfolk Tides)"
1327
+ ],
1328
+ [
1329
+ "columbus clippers (A ship sails on word art on Columbus Clippers)"
1330
+ ],
1331
+ [
1332
+ "indianapolis indians (An indian style octagon pattern)"
1333
+ ],
1334
+ [
1335
+ "louisville bats (A flying bat clutching a baseball bat, flying over a baseball as a background and Louisville Bats as a circled icon)"
1336
+ ],
1337
+ [
1338
+ "toledo mud hens (A chicken in a baseball cap with the letter T is waving a baseball bat with word art by Toledo Mud Hens)"
1339
+ ]
1340
+ ]
1341
+ },
1342
+ "title": "International League (Current teams)"
1343
+ },
1344
+ {
1345
+ "id": 142,
1346
+ "qa_question": "map@What is the time span?",
1347
+ "qa_column": "Term",
1348
+ "qa_answer": [
1349
+ "5",
1350
+ "5",
1351
+ "11",
1352
+ "/",
1353
+ "1",
1354
+ "6",
1355
+ "3",
1356
+ "9",
1357
+ "4",
1358
+ "3",
1359
+ "/",
1360
+ "11",
1361
+ "5",
1362
+ "4",
1363
+ "3",
1364
+ "/"
1365
+ ],
1366
+ "table": {
1367
+ "header": [
1368
+ "Term"
1369
+ ],
1370
+ "rows": [
1371
+ [
1372
+ "1859\u20131864"
1373
+ ],
1374
+ [
1375
+ "1864\u20131869"
1376
+ ],
1377
+ [
1378
+ "1869\u20131880"
1379
+ ],
1380
+ [
1381
+ "Term"
1382
+ ],
1383
+ [
1384
+ "1894\u20131895"
1385
+ ],
1386
+ [
1387
+ "1895\u20131901"
1388
+ ],
1389
+ [
1390
+ "1901\u20131904"
1391
+ ],
1392
+ [
1393
+ "1904\u20131913"
1394
+ ],
1395
+ [
1396
+ "1913\u20131917"
1397
+ ],
1398
+ [
1399
+ "1917\u20131920"
1400
+ ],
1401
+ [
1402
+ "Term"
1403
+ ],
1404
+ [
1405
+ "1927\u20131938"
1406
+ ],
1407
+ [
1408
+ "1938\u20131943"
1409
+ ],
1410
+ [
1411
+ "1943\u20131947"
1412
+ ],
1413
+ [
1414
+ "1947\u20131950"
1415
+ ],
1416
+ [
1417
+ "Term"
1418
+ ]
1419
+ ]
1420
+ },
1421
+ "title": "Electoral district of Lachlan"
1422
+ },
1423
+ {
1424
+ "id": 145,
1425
+ "qa_question": "map@Is the date during in 1900's?",
1426
+ "qa_column": "Created",
1427
+ "qa_answer": [
1428
+ "no",
1429
+ "no",
1430
+ "no",
1431
+ "yes",
1432
+ "no",
1433
+ "yes",
1434
+ "no",
1435
+ "yes",
1436
+ "yes",
1437
+ "yes",
1438
+ "yes",
1439
+ "yes",
1440
+ "yes",
1441
+ "yes",
1442
+ "no"
1443
+ ],
1444
+ "table": {
1445
+ "header": [
1446
+ "Created"
1447
+ ],
1448
+ "rows": [
1449
+ [
1450
+ "29 October 1874"
1451
+ ],
1452
+ [
1453
+ "2 January 1857"
1454
+ ],
1455
+ [
1456
+ "26 October 1874"
1457
+ ],
1458
+ [
1459
+ "15 July 1949"
1460
+ ],
1461
+ [
1462
+ "4 August 1821"
1463
+ ],
1464
+ [
1465
+ "24 April 1940"
1466
+ ],
1467
+ [
1468
+ "2 January 1857"
1469
+ ],
1470
+ [
1471
+ "3 March 1970"
1472
+ ],
1473
+ [
1474
+ "12 December 1961"
1475
+ ],
1476
+ [
1477
+ "6 January 1965"
1478
+ ],
1479
+ [
1480
+ "16 March 1964"
1481
+ ],
1482
+ [
1483
+ "13 December 1963"
1484
+ ],
1485
+ [
1486
+ "6 February 1962"
1487
+ ],
1488
+ [
1489
+ "16 August 1921"
1490
+ ],
1491
+ [
1492
+ "2 January 1857"
1493
+ ]
1494
+ ]
1495
+ },
1496
+ "title": "List of districts of Lima"
1497
+ },
1498
+ {
1499
+ "id": 155,
1500
+ "qa_question": "map@Is the time less than a week?",
1501
+ "qa_column": "Length of use",
1502
+ "qa_answer": [
1503
+ "no",
1504
+ "no",
1505
+ "no",
1506
+ "no",
1507
+ "no",
1508
+ "no",
1509
+ "yes",
1510
+ "no",
1511
+ "no",
1512
+ "no",
1513
+ "no",
1514
+ "yes"
1515
+ ],
1516
+ "table": {
1517
+ "header": [
1518
+ "Length of use"
1519
+ ],
1520
+ "rows": [
1521
+ [
1522
+ "14 days"
1523
+ ],
1524
+ [
1525
+ "10 days"
1526
+ ],
1527
+ [
1528
+ "21 days"
1529
+ ],
1530
+ [
1531
+ "7 days"
1532
+ ],
1533
+ [
1534
+ "10 days"
1535
+ ],
1536
+ [
1537
+ "10 days"
1538
+ ],
1539
+ [
1540
+ "Daily"
1541
+ ],
1542
+ [
1543
+ "14 days"
1544
+ ],
1545
+ [
1546
+ "10 days"
1547
+ ],
1548
+ [
1549
+ "14 days"
1550
+ ],
1551
+ [
1552
+ "20 days"
1553
+ ],
1554
+ [
1555
+ "2 hours"
1556
+ ]
1557
+ ]
1558
+ },
1559
+ "title": "Crest Whitestrips"
1560
+ },
1561
+ {
1562
+ "id": 578,
1563
+ "qa_question": "map@Is this player from Norway?",
1564
+ "qa_column": "Player",
1565
+ "qa_answer": [
1566
+ "no",
1567
+ "no",
1568
+ "no",
1569
+ "no",
1570
+ "no",
1571
+ "no"
1572
+ ],
1573
+ "table": {
1574
+ "header": [
1575
+ "Player"
1576
+ ],
1577
+ "rows": [
1578
+ [
1579
+ "Raymond van Barneveld"
1580
+ ],
1581
+ [
1582
+ "Raymond van Barneveld"
1583
+ ],
1584
+ [
1585
+ "Adrian Lewis"
1586
+ ],
1587
+ [
1588
+ "Dean Winstanley"
1589
+ ],
1590
+ [
1591
+ "Michael van Gerwen"
1592
+ ],
1593
+ [
1594
+ "Terry Jenkins"
1595
+ ]
1596
+ ]
1597
+ },
1598
+ "title": "PDC World Darts Championship"
1599
+ }
1600
+ ]
templates/prompts/prompt_mmqa_v2.txt ADDED
@@ -0,0 +1,523 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Generate SQL given the question, table, passages, image captions to answer the question correctly.
2
+ If question-relevant column(s) contents are not suitable for SQL comparisons or calculations, map it to a new column with clean content by a new grammar QA("map@").
3
+ If mapping to a new column still can not answer the question with valid SQL, turn to an end-to-end solution by a new grammar QA("ans@"). This grammar aims to solve all the rest of complex questions or tables or passages or image captions.
4
+
5
+ CREATE TABLE Dutch Ruppersberger (Electoral history)(
6
+ row_id int,
7
+ year int,
8
+ office text,
9
+ election text,
10
+ filledcolumnname real,
11
+ subject text,
12
+ party text,
13
+ votes text,
14
+ % text,
15
+ filledcolumnname_2 real,
16
+ opponent text,
17
+ party_2 text,
18
+ votes_2 text,
19
+ %_2 text)
20
+ /*
21
+ 3 example rows:
22
+ SELECT * FROM w LIMIT 3;
23
+ row_id year office election filledcolumnname subject party votes % filledcolumnname_2 opponent party_2 votes_2 %_2
24
+ 0 1994 baltimore county executive general nan dutch ruppersberger democratic n/a n/a nan n/a n/a n/a n/a
25
+ 1 1998 baltimore county executive general nan dutch ruppersberger democratic 166482 70.47 nan john j. bishop republican 69449 29.4
26
+ 2 2002 none general nan dutch ruppersberger democratic 105718 54.16 nan helen delich bentley republican 88954 45.57
27
+ */
28
+ Q: What year was Elizabeth Matory the opponent of Charles Albert Ruppersberger?
29
+ NeuralSQL: SELECT year FROM w WHERE opponent = 'elizabeth matory'
30
+
31
+
32
+ CREATE TABLE Virtual Console (Titles)(
33
+ row_id int,
34
+ system text,
35
+ japan int,
36
+ [[list of virtual console games for wii u (north america)|north america]] real,
37
+ pal region - europe real,
38
+ pal region - australia real)
39
+ /*
40
+ 3 example rows:
41
+ SELECT * FROM w LIMIT 3;
42
+ row_id system japan [[list of virtual console games for wii u (north america)|north america]] pal region - europe pal region - australia
43
+ 0 nes/famicom 148 94.0 89.0 89.0
44
+ 1 super nes/super famicom 101 51.0 49.0 49.0
45
+ 2 nintendo 64 22 21.0 21.0 21.0
46
+ */
47
+ Q: Which system has a lower number for Japan of the virtual console systems: Game Boy Advance or the Japan-only console MSX?
48
+ NeuralSQL: SELECT system FROM w WHERE system IN ('game boy advance', 'msx (japan only)') ORDER BY japan LIMIT 1
49
+
50
+
51
+ CREATE TABLE 2018 Warrington Wolves season (Transfers | In)(
52
+ row_id int,
53
+ player text,
54
+ signed from text,
55
+ contract length text,
56
+ announced text)
57
+ /*
58
+ 3 example rows:
59
+ SELECT * FROM w LIMIT 3;
60
+ row_id player signed from contract length announced
61
+ 0 sitaleki akauola penrith panthers p2y 2017-08-01 00:00:00
62
+ 1 bryson goodwin south sydney rabbitohs p2y 2017-10-01 00:00:00
63
+ 2 tyrone roberts gold coast titans p3y 2017-10-01 00:00:00
64
+ */
65
+ CREATE TABLE Images(
66
+ row_id int,
67
+ gold coast titans text)
68
+ /*
69
+ All rows of the table:
70
+ SELECT * FROM w;
71
+ row_id gold coast titans
72
+ 0 a logo for the golden knights is painted on the beach.
73
+ */
74
+ Q: What player was transferred from the team that has crossed swords on its logo to the Warrington Wolves in the 2018 season?
75
+ NeuralSQL: SELECT player FROM w WHERE QA("map@Has crossed swords on its logo?"; `signed from`) = 'yes'
76
+
77
+
78
+ CREATE TABLE 2013 Arizona Cardinals season (Regular season)(
79
+ row_id int,
80
+ week int,
81
+ date text,
82
+ opponent text,
83
+ result text,
84
+ record text,
85
+ game site text,
86
+ nfl.com recap text)
87
+ /*
88
+ 3 example rows:
89
+ SELECT * FROM w LIMIT 3;
90
+ row_id week date opponent result record game site nfl.com recap
91
+ 0 1 september 8 at st. louis rams l 24–27 0–1 edward jones dome [http://www.nfl.com/gamecenter/2013090810/2013/reg1/cardinals@rams recap]
92
+ 1 2 september 15 detroit lions w 25–21 1–1 university of phoenix stadium [http://www.nfl.com/gamecenter/2013091509/2013/reg2/lions@cardinals recap]
93
+ 2 3 september 22 at new orleans saints l 7–31 1–2 mercedes-benz superdome [http://www.nfl.com/gamecenter/2013092207/2013/reg3/cardinals@saints recap]
94
+ */
95
+ CREATE TABLE Passages(
96
+ row_id int,
97
+ candlestick park text)
98
+ /*
99
+ All rows of the table:
100
+ SELECT * FROM w;
101
+ row_id candlestick park
102
+ 0 candlestick park was an outdoor sports and entertainment stadium in the west coast of the united states, located in san francisco, in the bayview heights area. the stadium was originally the home of major league baseball's san francisco giants, who played there from 1960 until moving into pacific bell park (since renamed at&t park) in 2000. it was also the home field of the san francisco 49ers of the national football league from 1971 through 2013. the 49ers moved to levi's stadium in santa clara for the 2014 season.
103
+ */
104
+ Q: In which year did the San Francisco 49ers move to their new stadium, which was the location that the Arizona Cardinals lost a 2013 regular season game by the score of 20 to 32?
105
+ NeuralSQL: SELECT QA("map@In which year did the San Francisco 49ers move to their new stadium?"; `game site`) FROM w WHERE opponent LIKE '%san francisco 49ers%' AND result = 'l 20–32'
106
+
107
+
108
+ CREATE TABLE PNC Park (Concerts)(
109
+ row_id int,
110
+ date text,
111
+ artist text,
112
+ opening act(s) text,
113
+ tour / concert name text,
114
+ attendance text,
115
+ revenue text,
116
+ notes text)
117
+ /*
118
+ 3 example rows:
119
+ SELECT * FROM w LIMIT 3;
120
+ row_id date artist opening act(s) tour / concert name attendance revenue notes
121
+ 0 2003-08-06 00:00:00 bruce springsteen & the e street band — the rising tour 42301 / 48074 $3137575 none
122
+ 1 2005-06-26 00:00:00 jimmy buffett — a salty piece of land tour — — sonny landreth and jake shimabukuro were special guests http://www.buffettworld.com/archives/2005-a-salty-piece-of-land/6-26/
123
+ 2 2005-09-28 00:00:00 the rolling stones pearl jam a bigger bang — — none
124
+ */
125
+ CREATE TABLE Passages(
126
+ row_id int,
127
+ can't stop won't stop (usher song) text,
128
+ uptown girl text)
129
+ /*
130
+ All rows of the table:
131
+ SELECT * FROM w;
132
+ row_id can't stop won't stop (usher song) uptown girl
133
+ 0 "can't stop won't stop" is a song recorded by american recording artist usher for his seventh studio album looking 4 myself (2012). written and produced by will "will.i.am" adams and keith harris, the song contains an interpolation of the bridge to billy joel's 1983 hit single "uptown girl". musically, "can't stop won't stop" is a eurodance and dance-pop song that incorporates elements of dubstep. "uptown girl" is a song written and performed by american musician billy joel. it was released on 1983-9-29, on his ninth studio album an innocent man (1983). the lyrics describe a working-class "downtown man" attempting to woo a wealthy "uptown girl."
134
+ */
135
+ Q: This song released on September 29, 1983 and inspired a hit song by Usher was written by who?
136
+ NeuralSQL: QA("ans@This song released on September 29, 1983 and inspired a hit song by Usher was written by who?"; Uptown Girl; Can't Stop Won't Stop (Usher song) )
137
+
138
+
139
+ CREATE TABLE 2000 DirecTV 500 (Top 10 results)(
140
+ row_id int,
141
+ pos int,
142
+ grid int,
143
+ car number (no.) int,
144
+ driver text,
145
+ team text,
146
+ manufacturer text,
147
+ laps completed (laps) int,
148
+ points int)
149
+ /*
150
+ 3 example rows:
151
+ SELECT * FROM w LIMIT 3;
152
+ row_id pos grid car number (no.) driver team manufacturer laps completed (laps) points
153
+ 0 1 4 8 dale earnhardt, jr. (r) dale earnhardt, inc. chevrolet 334 185
154
+ 1 2 37 99 jeff burton roush racing ford 334 175
155
+ 2 3 14 18 bobby labonte joe gibbs racing pontiac 334 170
156
+ */
157
+ CREATE TABLE Images(
158
+ row_id int,
159
+ dale earnhardt text)
160
+ /*
161
+ All rows of the table:
162
+ SELECT * FROM w;
163
+ row_id dale earnhardt
164
+ 0 a man wearing a number of neckties and a mustache.
165
+ */
166
+ Q: The 2000 DirecTv 500 Top 10 Driver with 146 points has a person behind them holding what?
167
+ NeuralSQL: SELECT QA("map@what is the person behind holding?"; driver) FROM w WHERE points = 146
168
+
169
+
170
+ CREATE TABLE Oliver Mellor (Credits | Television)(
171
+ row_id int,
172
+ year text,
173
+ title text,
174
+ role text,
175
+ notes text)
176
+ /*
177
+ 3 example rows:
178
+ SELECT * FROM w LIMIT 3;
179
+ row_id year title role notes
180
+ 0 2006 the royal dr. guy fitzgerald none
181
+ 1 2006 hollyoaks: in the city detective monroe 3 episodes
182
+ 2 2006 doctor who matt episode "army of ghosts"
183
+ */
184
+ CREATE TABLE Passages(
185
+ row_id int,
186
+ charlie stubbs (coronation street) text)
187
+ /*
188
+ All rows of the table:
189
+ SELECT * FROM w;
190
+ row_id charlie stubbs (coronation street)
191
+ 0 in 2005, charlie began a relationship with tracy barlow (kate ford). he convinced her to move in with him and later in february 2006, manipulated her into having her daughter amy (amber chadwick) move in with her parents. in turn, tracy began to manipulate charlie. she pretended to be pregnant and used the money he gave her for an abortion to buy expensive shoes and used her "grief" to have him allow amy to move back in. when shelley visited before her mother’s marriage to fred elliott (john savident), she and charlie had a 1-tni stand. she told tracy about their tni of passion, who accused her of lying. shelley later revealed that she was pregnant with charlie’s baby but didn’t allow charlie to have anything to do with the baby, and left. he and tracy briefly split but reconciled. charlie later began an affair with maria sutherland (samia smith), who was renting his flat. when david platt (jack p. shepherd) discovered the affair he tried to blackmail charlie, threatening to reveal the affair to tracy. charlie retaliated by trying to drown david in the bath. when tracy eventually found out about the affair, they split once more. tracy began to plot revenge against charlie and pretended to make amends with charlie. she pretended he was abusing her to the point of burning herself with an iron to make it look like charlie was responsible for her injuries. charlie eventually realized his partner was seeking revenge and when he was about to tell her their relationship was over, she insisted on performing a lap dance for him. she hit him round the head with a heavy ornament, and he later died in hospital. she claimed she’d killed him in self-defence but the court found her guilty and she was given a life sentence.
192
+ */
193
+ Q: Oliver Mellor played Dr. Matt Carter on the TV show that had Tracy Barlow kill who?
194
+ NeuralSQL: QA("ans@Oliver Mellor played Dr. Matt Carter on the TV show that had Tracy Barlow kill who?"; SELECT title FROM w WHERE role = 'dr. matt carter'; Charlie Stubbs (Coronation Street))
195
+
196
+
197
+ CREATE TABLE Peter Egan (Filmography)(
198
+ row_id int,
199
+ year text,
200
+ title text,
201
+ role text,
202
+ notes text)
203
+ /*
204
+ 3 example rows:
205
+ SELECT * FROM w LIMIT 3;
206
+ row_id year title role notes
207
+ 0 1971 1 brief su bill denton none
208
+ 1 1971 elizabeth r earl of southampton episode: "sweet englands pride"
209
+ 2 1973 the hireling captain hugh cantrip none
210
+ */
211
+ CREATE TABLE Passages(
212
+ row_id int,
213
+ wampanoag text)
214
+ /*
215
+ All rows of the table:
216
+ SELECT * FROM w;
217
+ row_id wampanoag
218
+ 0 traditionally wampanoag people have been semi-sedentary, with seasonal movements between fixed sites in present-day southern new england. the men often traveled far north and south along the eastern seaboard for seasonal fishing expeditions, and sometimes stayed in those distant locations for weeks and months at a time. the women cultivated varieties of the "3 sisters" (the intercropping of maize, climbing beans, and squash) as the staples of their diet, supplemented by fish and game caught by the men. each community had authority over a well-defined territory from which the people derived their livelihood through a seasonal round of fishing, planting, harvesting, and hunting. because southern new england was thickly populated by indigenous peoples, hunting grounds had strictly defined boundaries.
219
+ */
220
+ Q: corn beans and squash the three most important crops of the wampanoag were also known as
221
+ NeuralSQL: QA("ans@corn beans and squash the three most important crops of the wampanoag were also known as?"; Wampanoag)
222
+
223
+
224
+ CREATE TABLE 1980 in home video (Movie releases)(
225
+ row_id int,
226
+ u.s./canada release date text,
227
+ title text,
228
+ studio text,
229
+ notes text)
230
+ /*
231
+ 3 example rows:
232
+ SELECT * FROM w LIMIT 3;
233
+ row_id u.s./canada release date title studio notes
234
+ 0 january 1 the muppet movie magnetic video betamax release laserdisc release vhs release
235
+ 1 march 4 20000 leagues under the sea walt disney home entertainment betamax release vhs release
236
+ 2 march 4 the apple dumpling gang walt disney home entertainment betamax release vhs release
237
+ */
238
+ CREATE TABLE Passages(
239
+ row_id int,
240
+ bert lahr text)
241
+ /*
242
+ All rows of the table:
243
+ SELECT * FROM w;
244
+ row_id bert lahr
245
+ 0 bert lahr ((1895-8-131967-12-4,p26410d)) was an american actor, particularly of stage and film, and comedian. lahr is known for his role as the cowardly lion, as well as his counterpart kansas farmworker zeke, in the wizard of oz (1939). he was well known for his explosive humor, but also adapted well to dramatic roles and his work in burlesque, vaudeville, and on broadway.
246
+ */
247
+ Q: In the 1980 movie that was put out by the MGM/CBS Home Video studio, who played the part of the Cowardly Lion?
248
+ NeuralSQL: QA("ans@who played the part of the Cowardly Lion?"; SELECT title FROM w WHERE studio = 'mgm/cbs home video'; Bert Lahr)
249
+
250
+
251
+ CREATE TABLE List of newspapers in Italy (National daily newspapers)(
252
+ row_id int,
253
+ newspaper text,
254
+ circulation text,
255
+ headquarters text,
256
+ est. int,
257
+ political alignment text,
258
+ nameplate text)
259
+ /*
260
+ 3 example rows:
261
+ SELECT * FROM w LIMIT 3;
262
+ row_id newspaper circulation headquarters est. political alignment nameplate
263
+ 0 corriere della sera 242684 milan 1876 centrism 200x200px
264
+ 1 la repubblica 198835 rome 1976 social democracy 150x150px
265
+ 2 la gazzetta dello sport 161796 milan 1896 — 200x200px
266
+ */
267
+ CREATE TABLE Passages(
268
+ row_id int,
269
+ early middle ages text)
270
+ /*
271
+ All rows of the table:
272
+ SELECT * FROM w;
273
+ row_id early middle ages
274
+ 0 for almost p1000y, rome was the most politically important, richest and largest city in europe. around 100 ce, it had a population of about 450000, and declined to a mere 20000 during the early middle ages, reducing the sprawling city to groups of inhabited buildings interspersed among large areas of ruins and vegetation.
275
+ */
276
+ CREATE TABLE Images(
277
+ row_id int,
278
+ rome text)
279
+ /*
280
+ All rows of the table:
281
+ SELECT * FROM w;
282
+ row_id rome
283
+ 0 a series of photographs showing a colorful scene.
284
+ */
285
+ Q: In the city that was the center of imperial life in the roman empire in the early fifth century, the building in the top right has what at its top?
286
+ NeuralSQL: QA("ans@he building in the top right has what at its top?"; QA("ans@what is the city that was the center of imperial life in the roman empire in the early fifth century?"; Imperial fora))
287
+
288
+
289
+ CREATE TABLE International League (Current teams)(
290
+ row_id int,
291
+ division text,
292
+ team text,
293
+ founded int,
294
+ mlb affiliation text,
295
+ affiliated int,
296
+ city text,
297
+ stadium text,
298
+ capacity int)
299
+ /*
300
+ 3 example rows:
301
+ SELECT * FROM w LIMIT 3;
302
+ row_id division team founded mlb affiliation affiliated city stadium capacity
303
+ 0 north buffalo bisons 1985 toronto blue jays 2013 buffalo, new york sahlen field 16600
304
+ 1 north lehigh valley ironpigs 2008 philadelphia phillies 2007 allentown, pennsylvania coca-cola park 10100
305
+ 2 north pawtucket red sox 1973 boston red sox 1970 pawtucket, rhode island mccoy stadium 10031
306
+ */
307
+ CREATE TABLE Images(
308
+ row_id int,
309
+ columbus clippers text)
310
+ /*
311
+ All rows of the table:
312
+ SELECT * FROM w;
313
+ row_id columbus clippers
314
+ 0 a large blue and white clock on the side of a building.
315
+ */
316
+ Q: Was the Team that has a ship in logo or Charlotte Knights, the one with earlier affiliation in Current teams of International League?
317
+ NeuralSQL: SELECT team FROM w WHERE team = 'charlotte knights' OR QA("map@Has a ship in logo or Charlotte Knights?"; team) = 'yes' ORDER BY founded LIMIT 1
318
+
319
+
320
+ CREATE TABLE Warren Burton (Filmography)(
321
+ row_id int,
322
+ year int,
323
+ title text,
324
+ role text,
325
+ notes text)
326
+ /*
327
+ 3 example rows:
328
+ SELECT * FROM w LIMIT 3;
329
+ row_id year title role notes
330
+ 0 1976 baby blue marine second serviceman none
331
+ 1 1977 chatterbox tv reporter none
332
+ 2 1977 the world's greatest lover ludwig none
333
+ */
334
+ CREATE TABLE Images(
335
+ row_id int,
336
+ green lantern (film) text)
337
+ /*
338
+ All rows of the table:
339
+ SELECT * FROM w;
340
+ row_id green lantern (film)
341
+ 0 a picture of a green and white costume and glasses.
342
+ */
343
+ Q: How many people are on the poster for Green Lantern (film)?
344
+ NeuralSQL: QA("ans@How many people are on the poster for Green Lantern (film)?"; Green Lantern (film))
345
+
346
+
347
+ CREATE TABLE One Hour Photo (Accolades)(
348
+ row_id int,
349
+ award text,
350
+ category text,
351
+ recipients text,
352
+ result real)
353
+ /*
354
+ 3 example rows:
355
+ SELECT * FROM w LIMIT 3;
356
+ row_id award category recipients result
357
+ 0 critics' choice movie awards best actor robin williams nan
358
+ 1 dallas–fort worth film critics association best actor robin williams nan
359
+ 2 online film critics society best actor robin williams nan
360
+ */
361
+ CREATE TABLE Images(
362
+ row_id int,
363
+ saturn award text)
364
+ /*
365
+ All rows of the table:
366
+ SELECT * FROM w;
367
+ row_id saturn award
368
+ 0 a man in a suit and tie holding a glass.
369
+ */
370
+ Q: What is he holding in Saturn Award?
371
+ NeuralSQL: QA("ans@What is he holding?"; Saturn Award)
372
+
373
+
374
+ CREATE TABLE 2013 Detroit Lions season (2013 Draft class)(
375
+ row_id int,
376
+ draft order - round int,
377
+ draft order - choice int,
378
+ draft order - overall int,
379
+ player name text,
380
+ position text,
381
+ height text,
382
+ weight text,
383
+ college text,
384
+ contract text,
385
+ notes text,
386
+ source text)
387
+ /*
388
+ 3 example rows:
389
+ SELECT * FROM w LIMIT 3;
390
+ row_id draft order - round draft order - choice draft order - overall player name position height weight college contract notes source
391
+ 0 1 5 5 ezekiel ansah defensive end 6ft 5 in 271lbs byu p5y / none [http://www.mlive.com/lions/index.ssf/2013-4/detroit_lions_select_ezekiel_a.html detroit lions select ezekiel ansah in first round of 2013 nfl draft] mlive.com, 2013-4-26
392
+ 1 2 4 36 darius slay defensive back 6ft 1 in 190lbs mississippi state p4y / none [http://www.mlive.com/lions/index.ssf/2013-4/detroit_lions_select_mississip.html detroit lions select mississippi state cb darius slay in second round of 2013 nfl draft] mlive.com, 2013-4-27
393
+ 2 3 3 65 larry warford offensive lineman 6ft 3 in 343lbs kentucky p4y / none [http://www.mlive.com/lions/index.ssf/2013-4/detroit_lions_fill_massive_nee.html detroit lions fill massive need with massive guard prospect larry warford] mlive.com, 2013-4-27
394
+ */
395
+ CREATE TABLE Images(
396
+ row_id int,
397
+ south carolina gamecocks football text,
398
+ seattle seahawks text)
399
+ /*
400
+ All rows of the table:
401
+ SELECT * FROM w;
402
+ row_id south carolina gamecocks football seattle seahawks
403
+ 0 a group of people standing next to each other. a large green and white bird with numbers.
404
+ */
405
+ Q: What educational institution has a rooster on its logo and was the school listed in the 2013 Detroit Lions draft class for the defensive end player position?
406
+ NeuralSQL: QA("ans@which one has a rooster on his logo?"; SELECT college FROM w WHERE position='defensive end')
407
+
408
+
409
+ CREATE TABLE Melia Kreiling (Filmography | Film roles)(
410
+ row_id int,
411
+ year int,
412
+ title text,
413
+ role text,
414
+ notes text)
415
+ /*
416
+ 3 example rows:
417
+ SELECT * FROM w LIMIT 3;
418
+ row_id year title role notes
419
+ 0 2012 suspension of disbelief juliette none
420
+ 1 2013 company of heroes kestrel direct-to-video film
421
+ 2 2013 leopard kara none
422
+ */
423
+ CREATE TABLE Passages(
424
+ row_id int,
425
+ list of marvel cinematic universe films text)
426
+ /*
427
+ All rows of the table:
428
+ SELECT * FROM w;
429
+ row_id list of marvel cinematic universe films
430
+ 0 the first film in the marvel cinematic universe was iron man (2008), which was distributed by paramount pictures. paramount also distributed iron man 2 (2010), thor (2011) and captain america: the first avenger (2011), while universal pictures distributed the incredible hulk (2008). walt disney studios motion pictures began distributing the films with the 2012 crossover film the avengers, which concluded phase 1 of the franchise. phase 2 includes iron man 3 (2013), thor: the dark world (2013), captain america: wi soldier (2014), guardians of the galaxy (2014), avengers: age of ultron (2015), and ant-man (2015).
431
+ */
432
+ Q: What was Melia Kreiling's role in the film that is the next Marvel movie after 'Captain America the Winter Soldier'?
433
+ NeuralSQL: SELECT role FROM w WHERE title = QA("ans@which is the next Marvel movie after 'Captain America the Winter Soldier'?"; List of Marvel Cinematic Universe films)
434
+
435
+
436
+ CREATE TABLE 2006 Grand Prix of Portland (Qualifying results)(
437
+ row_id int,
438
+ pos int,
439
+ nat real,
440
+ name text,
441
+ team text,
442
+ qual 1 text,
443
+ qual 2 text,
444
+ best text)
445
+ /*
446
+ 3 example rows:
447
+ SELECT * FROM w LIMIT 3;
448
+ row_id pos nat name team qual 1 qual 2 best
449
+ 0 1 nan bruno junqueira newman/haas racing 59.576 57.631 57.631
450
+ 1 2 nan a. j. allmendinger forsythe racing 58.378 57.639 57.639
451
+ 2 3 nan sébastien bourdais newman/haas racing 58.464 57.646 57.646
452
+ */
453
+ CREATE TABLE Passages(
454
+ row_id int,
455
+ jtg daugherty racing text)
456
+ /*
457
+ All rows of the table:
458
+ SELECT * FROM w;
459
+ row_id jtg daugherty racing
460
+ 0 jtg daugherty racing (formerly st motorsports and jtg racing) is an american professional stock car racing team that currently competes in the monster energy nascar cup series. the team is owned by former advertising executive tad geschickter and his wife jodi, along with current espn analyst brad daugherty. the team formerly had alliances with wood brothers racing, then michael waltrip racing, and currently has a technical alliance with richard childress racing. the team currently fields the no. 37 cottonelle chevrolet ss driven by roush development driver chris buescher and the no. 47 clorox/bush's/scott products chevrolet ss driven by a. j. allmendinger in the monster energy nascar cup series.
461
+ */
462
+ Q: The driver of Nascar number 47 qualified for the 2006 Grand Prix of Portland for which team?
463
+ NeuralSQL: SELECT name FROM w WHERE team = QA("ans@which driver is number 47?"; JTG Daugherty Racing)
464
+
465
+
466
+ CREATE TABLE List of churches in Copenhagen ([[Amager]])(
467
+ row_id int,
468
+ name text,
469
+ denomination text,
470
+ year int,
471
+ coordinates real,
472
+ image text,
473
+ refs real)
474
+ /*
475
+ 3 example rows:
476
+ SELECT * FROM w LIMIT 3;
477
+ row_id name denomination year coordinates image refs
478
+ 0 all saints' church church of denmark 1932 nan 150px nan
479
+ 1 dragør church church of denmark 1885 nan 150px nan
480
+ 2 hans tausen's church church of denmark 1924 nan 150px nan
481
+ */
482
+ CREATE TABLE Images(
483
+ row_id int,
484
+ all saints' church, copenhagen text,
485
+ dragør church text,
486
+ nathanael's church text,
487
+ st. anne's church, copenhagen text,
488
+ sundby church text)
489
+ /*
490
+ All rows of the table:
491
+ SELECT * FROM w;
492
+ row_id all saints' church, copenhagen dragør church nathanael's church st. anne's church, copenhagen sundby church
493
+ 0 type of place of worship church of the holy trinity church of the holy trinity the building where the hotel is located a red brick church with a steeple and a flagpole in front of it.
494
+ */
495
+ Q: Among Copenhagen churches on the "Amager" list, which have spires and are affiliated with the Church of Denmark denomination?
496
+ NeuralSQL: SELECT name FROM w WHERE denomination = 'church of denmark' AND QA("map@does it have spires?"; name) = 'yes'
497
+
498
+
499
+ CREATE TABLE Final Straw Tour (UK Tour (Leg III))(
500
+ row_id int,
501
+ date text,
502
+ city text,
503
+ country text,
504
+ venue text)
505
+ /*
506
+ 3 example rows:
507
+ SELECT * FROM w LIMIT 3;
508
+ row_id date city country venue
509
+ 0 support acts: terra diablo & astrid support acts: terra diablo & astrid support acts: terra diablo & astrid support acts: terra diablo & astrid
510
+ 1 2004-3-2 newcastle england newcastle university
511
+ 2 2004-3-3 liverpool england carling academy
512
+ */
513
+ CREATE TABLE Images(
514
+ row_id int,
515
+ oxford text)
516
+ /*
517
+ All rows of the table:
518
+ SELECT * FROM w;
519
+ row_id oxford
520
+ 0 a guide to the city of edinburgh
521
+ */
522
+ Q: The final straw tour held leg 3 of the UK tour on March 13, 2004 in this city with how many views on the bottom?
523
+ NeuralSQL: SELECT QA("map@how many views on the bottom?"; city) FROM w WHERE date = '2004-3-13'
templates/prompts/prompt_mmqa_v2_Qa.txt ADDED
@@ -0,0 +1,521 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Generate answer given the question, table, passages, image captions to answer the question correctly.
2
+
3
+ CREATE TABLE Dutch Ruppersberger (Electoral history)(
4
+ row_id int,
5
+ year int,
6
+ office text,
7
+ election text,
8
+ filledcolumnname real,
9
+ subject text,
10
+ party text,
11
+ votes text,
12
+ % text,
13
+ filledcolumnname_2 real,
14
+ opponent text,
15
+ party_2 text,
16
+ votes_2 text,
17
+ %_2 text)
18
+ /*
19
+ 3 example rows:
20
+ SELECT * FROM w LIMIT 3;
21
+ row_id year office election filledcolumnname subject party votes % filledcolumnname_2 opponent party_2 votes_2 %_2
22
+ 0 1994 baltimore county executive general nan dutch ruppersberger democratic n/a n/a nan n/a n/a n/a n/a
23
+ 1 1998 baltimore county executive general nan dutch ruppersberger democratic 166482 70.47 nan john j. bishop republican 69449 29.4
24
+ 2 2002 none general nan dutch ruppersberger democratic 105718 54.16 nan helen delich bentley republican 88954 45.57
25
+ */
26
+ Q: What year was Elizabeth Matory the opponent of Charles Albert Ruppersberger?
27
+ A: 2018
28
+
29
+
30
+ CREATE TABLE Virtual Console (Titles)(
31
+ row_id int,
32
+ system text,
33
+ japan int,
34
+ [[list of virtual console games for wii u (north america)|north america]] real,
35
+ pal region - europe real,
36
+ pal region - australia real)
37
+ /*
38
+ 3 example rows:
39
+ SELECT * FROM w LIMIT 3;
40
+ row_id system japan [[list of virtual console games for wii u (north america)|north america]] pal region - europe pal region - australia
41
+ 0 nes/famicom 148 94.0 89.0 89.0
42
+ 1 super nes/super famicom 101 51.0 49.0 49.0
43
+ 2 nintendo 64 22 21.0 21.0 21.0
44
+ */
45
+ Q: Which system has a lower number for Japan of the virtual console systems: Game Boy Advance or the Japan-only console MSX?
46
+ A: msx (japan only)
47
+
48
+
49
+ CREATE TABLE 2018 Warrington Wolves season (Transfers | In)(
50
+ row_id int,
51
+ player text,
52
+ signed from text,
53
+ contract length text,
54
+ announced text)
55
+ /*
56
+ 3 example rows:
57
+ SELECT * FROM w LIMIT 3;
58
+ row_id player signed from contract length announced
59
+ 0 sitaleki akauola penrith panthers p2y 2017-08-01 00:00:00
60
+ 1 bryson goodwin south sydney rabbitohs p2y 2017-10-01 00:00:00
61
+ 2 tyrone roberts gold coast titans p3y 2017-10-01 00:00:00
62
+ */
63
+ CREATE TABLE Images(
64
+ row_id int,
65
+ gold coast titans text)
66
+ /*
67
+ All rows of the table:
68
+ SELECT * FROM w;
69
+ row_id gold coast titans
70
+ 0 a logo for the golden knights is painted on the beach.
71
+ */
72
+ Q: What player was transferred from the team that has crossed swords on its logo to the Warrington Wolves in the 2018 season?
73
+ A: tyrone roberts
74
+
75
+
76
+ CREATE TABLE 2013 Arizona Cardinals season (Regular season)(
77
+ row_id int,
78
+ week int,
79
+ date text,
80
+ opponent text,
81
+ result text,
82
+ record text,
83
+ game site text,
84
+ nfl.com recap text)
85
+ /*
86
+ 3 example rows:
87
+ SELECT * FROM w LIMIT 3;
88
+ row_id week date opponent result record game site nfl.com recap
89
+ 0 1 september 8 at st. louis rams l 24–27 0–1 edward jones dome [http://www.nfl.com/gamecenter/2013090810/2013/reg1/cardinals@rams recap]
90
+ 1 2 september 15 detroit lions w 25–21 1–1 university of phoenix stadium [http://www.nfl.com/gamecenter/2013091509/2013/reg2/lions@cardinals recap]
91
+ 2 3 september 22 at new orleans saints l 7–31 1–2 mercedes-benz superdome [http://www.nfl.com/gamecenter/2013092207/2013/reg3/cardinals@saints recap]
92
+ */
93
+ CREATE TABLE Passages(
94
+ row_id int,
95
+ candlestick park text)
96
+ /*
97
+ All rows of the table:
98
+ SELECT * FROM w;
99
+ row_id candlestick park
100
+ 0 candlestick park was an outdoor sports and entertainment stadium in the west coast of the united states, located in san francisco, in the bayview heights area. the stadium was originally the home of major league baseball's san francisco giants, who played there from 1960 until moving into pacific bell park (since renamed at&t park) in 2000. it was also the home field of the san francisco 49ers of the national football league from 1971 through 2013. the 49ers moved to levi's stadium in santa clara for the 2014 season.
101
+ */
102
+ Q: In which year did the San Francisco 49ers move to their new stadium, which was the location that the Arizona Cardinals lost a 2013 regular season game by the score of 20 to 32?
103
+ A: 1971
104
+
105
+
106
+ CREATE TABLE PNC Park (Concerts)(
107
+ row_id int,
108
+ date text,
109
+ artist text,
110
+ opening act(s) text,
111
+ tour / concert name text,
112
+ attendance text,
113
+ revenue text,
114
+ notes text)
115
+ /*
116
+ 3 example rows:
117
+ SELECT * FROM w LIMIT 3;
118
+ row_id date artist opening act(s) tour / concert name attendance revenue notes
119
+ 0 2003-08-06 00:00:00 bruce springsteen & the e street band — the rising tour 42301 / 48074 $3137575 none
120
+ 1 2005-06-26 00:00:00 jimmy buffett — a salty piece of land tour — — sonny landreth and jake shimabukuro were special guests http://www.buffettworld.com/archives/2005-a-salty-piece-of-land/6-26/
121
+ 2 2005-09-28 00:00:00 the rolling stones pearl jam a bigger bang — — none
122
+ */
123
+ CREATE TABLE Passages(
124
+ row_id int,
125
+ can't stop won't stop (usher song) text,
126
+ uptown girl text)
127
+ /*
128
+ All rows of the table:
129
+ SELECT * FROM w;
130
+ row_id can't stop won't stop (usher song) uptown girl
131
+ 0 "can't stop won't stop" is a song recorded by american recording artist usher for his seventh studio album looking 4 myself (2012). written and produced by will "will.i.am" adams and keith harris, the song contains an interpolation of the bridge to billy joel's 1983 hit single "uptown girl". musically, "can't stop won't stop" is a eurodance and dance-pop song that incorporates elements of dubstep. "uptown girl" is a song written and performed by american musician billy joel. it was released on 1983-9-29, on his ninth studio album an innocent man (1983). the lyrics describe a working-class "downtown man" attempting to woo a wealthy "uptown girl."
132
+ */
133
+ Q: This song released on September 29, 1983 and inspired a hit song by Usher was written by who?
134
+ A: billy joel
135
+
136
+
137
+ CREATE TABLE 2000 DirecTV 500 (Top 10 results)(
138
+ row_id int,
139
+ pos int,
140
+ grid int,
141
+ car number (no.) int,
142
+ driver text,
143
+ team text,
144
+ manufacturer text,
145
+ laps completed (laps) int,
146
+ points int)
147
+ /*
148
+ 3 example rows:
149
+ SELECT * FROM w LIMIT 3;
150
+ row_id pos grid car number (no.) driver team manufacturer laps completed (laps) points
151
+ 0 1 4 8 dale earnhardt, jr. (r) dale earnhardt, inc. chevrolet 334 185
152
+ 1 2 37 99 jeff burton roush racing ford 334 175
153
+ 2 3 14 18 bobby labonte joe gibbs racing pontiac 334 170
154
+ */
155
+ CREATE TABLE Images(
156
+ row_id int,
157
+ dale earnhardt text)
158
+ /*
159
+ All rows of the table:
160
+ SELECT * FROM w;
161
+ row_id dale earnhardt
162
+ 0 a man wearing a number of neckties and a mustache.
163
+ */
164
+ Q: The 2000 DirecTv 500 Top 10 Driver with 146 points has a person behind them holding what?
165
+ A: camera
166
+
167
+
168
+ CREATE TABLE Oliver Mellor (Credits | Television)(
169
+ row_id int,
170
+ year text,
171
+ title text,
172
+ role text,
173
+ notes text)
174
+ /*
175
+ 3 example rows:
176
+ SELECT * FROM w LIMIT 3;
177
+ row_id year title role notes
178
+ 0 2006 the royal dr. guy fitzgerald none
179
+ 1 2006 hollyoaks: in the city detective monroe 3 episodes
180
+ 2 2006 doctor who matt episode "army of ghosts"
181
+ */
182
+ CREATE TABLE Passages(
183
+ row_id int,
184
+ charlie stubbs (coronation street) text)
185
+ /*
186
+ All rows of the table:
187
+ SELECT * FROM w;
188
+ row_id charlie stubbs (coronation street)
189
+ 0 in 2005, charlie began a relationship with tracy barlow (kate ford). he convinced her to move in with him and later in february 2006, manipulated her into having her daughter amy (amber chadwick) move in with her parents. in turn, tracy began to manipulate charlie. she pretended to be pregnant and used the money he gave her for an abortion to buy expensive shoes and used her "grief" to have him allow amy to move back in. when shelley visited before her mother’s marriage to fred elliott (john savident), she and charlie had a 1-tni stand. she told tracy about their tni of passion, who accused her of lying. shelley later revealed that she was pregnant with charlie’s baby but didn’t allow charlie to have anything to do with the baby, and left. he and tracy briefly split but reconciled. charlie later began an affair with maria sutherland (samia smith), who was renting his flat. when david platt (jack p. shepherd) discovered the affair he tried to blackmail charlie, threatening to reveal the affair to tracy. charlie retaliated by trying to drown david in the bath. when tracy eventually found out about the affair, they split once more. tracy began to plot revenge against charlie and pretended to make amends with charlie. she pretended he was abusing her to the point of burning herself with an iron to make it look like charlie was responsible for her injuries. charlie eventually realized his partner was seeking revenge and when he was about to tell her their relationship was over, she insisted on performing a lap dance for him. she hit him round the head with a heavy ornament, and he later died in hospital. she claimed she’d killed him in self-defence but the court found her guilty and she was given a life sentence.
190
+ */
191
+ Q: Oliver Mellor played Dr. Matt Carter on the TV show that had Tracy Barlow kill who?
192
+ A: charlie
193
+
194
+
195
+ CREATE TABLE Peter Egan (Filmography)(
196
+ row_id int,
197
+ year text,
198
+ title text,
199
+ role text,
200
+ notes text)
201
+ /*
202
+ 3 example rows:
203
+ SELECT * FROM w LIMIT 3;
204
+ row_id year title role notes
205
+ 0 1971 1 brief su bill denton none
206
+ 1 1971 elizabeth r earl of southampton episode: "sweet englands pride"
207
+ 2 1973 the hireling captain hugh cantrip none
208
+ */
209
+ CREATE TABLE Passages(
210
+ row_id int,
211
+ wampanoag text)
212
+ /*
213
+ All rows of the table:
214
+ SELECT * FROM w;
215
+ row_id wampanoag
216
+ 0 traditionally wampanoag people have been semi-sedentary, with seasonal movements between fixed sites in present-day southern new england. the men often traveled far north and south along the eastern seaboard for seasonal fishing expeditions, and sometimes stayed in those distant locations for weeks and months at a time. the women cultivated varieties of the "3 sisters" (the intercropping of maize, climbing beans, and squash) as the staples of their diet, supplemented by fish and game caught by the men. each community had authority over a well-defined territory from which the people derived their livelihood through a seasonal round of fishing, planting, harvesting, and hunting. because southern new england was thickly populated by indigenous peoples, hunting grounds had strictly defined boundaries.
217
+ */
218
+ Q: corn beans and squash the three most important crops of the wampanoag were also known as
219
+ A: three sisters
220
+
221
+
222
+ CREATE TABLE 1980 in home video (Movie releases)(
223
+ row_id int,
224
+ u.s./canada release date text,
225
+ title text,
226
+ studio text,
227
+ notes text)
228
+ /*
229
+ 3 example rows:
230
+ SELECT * FROM w LIMIT 3;
231
+ row_id u.s./canada release date title studio notes
232
+ 0 january 1 the muppet movie magnetic video betamax release laserdisc release vhs release
233
+ 1 march 4 20000 leagues under the sea walt disney home entertainment betamax release vhs release
234
+ 2 march 4 the apple dumpling gang walt disney home entertainment betamax release vhs release
235
+ */
236
+ CREATE TABLE Passages(
237
+ row_id int,
238
+ bert lahr text)
239
+ /*
240
+ All rows of the table:
241
+ SELECT * FROM w;
242
+ row_id bert lahr
243
+ 0 bert lahr ((1895-8-131967-12-4,p26410d)) was an american actor, particularly of stage and film, and comedian. lahr is known for his role as the cowardly lion, as well as his counterpart kansas farmworker zeke, in the wizard of oz (1939). he was well known for his explosive humor, but also adapted well to dramatic roles and his work in burlesque, vaudeville, and on broadway.
244
+ */
245
+ Q: In the 1980 movie that was put out by the MGM/CBS Home Video studio, who played the part of the Cowardly Lion?
246
+ A: bert lahr
247
+
248
+
249
+ CREATE TABLE List of newspapers in Italy (National daily newspapers)(
250
+ row_id int,
251
+ newspaper text,
252
+ circulation text,
253
+ headquarters text,
254
+ est. int,
255
+ political alignment text,
256
+ nameplate text)
257
+ /*
258
+ 3 example rows:
259
+ SELECT * FROM w LIMIT 3;
260
+ row_id newspaper circulation headquarters est. political alignment nameplate
261
+ 0 corriere della sera 242684 milan 1876 centrism 200x200px
262
+ 1 la repubblica 198835 rome 1976 social democracy 150x150px
263
+ 2 la gazzetta dello sport 161796 milan 1896 — 200x200px
264
+ */
265
+ CREATE TABLE Passages(
266
+ row_id int,
267
+ early middle ages text)
268
+ /*
269
+ All rows of the table:
270
+ SELECT * FROM w;
271
+ row_id early middle ages
272
+ 0 for almost p1000y, rome was the most politically important, richest and largest city in europe. around 100 ce, it had a population of about 450000, and declined to a mere 20000 during the early middle ages, reducing the sprawling city to groups of inhabited buildings interspersed among large areas of ruins and vegetation.
273
+ */
274
+ CREATE TABLE Images(
275
+ row_id int,
276
+ rome text)
277
+ /*
278
+ All rows of the table:
279
+ SELECT * FROM w;
280
+ row_id rome
281
+ 0 a series of photographs showing a colorful scene.
282
+ */
283
+ Q: In the city that was the center of imperial life in the roman empire in the early fifth century, the building in the top right has what at its top?
284
+ A: dome
285
+
286
+
287
+ CREATE TABLE International League (Current teams)(
288
+ row_id int,
289
+ division text,
290
+ team text,
291
+ founded int,
292
+ mlb affiliation text,
293
+ affiliated int,
294
+ city text,
295
+ stadium text,
296
+ capacity int)
297
+ /*
298
+ 3 example rows:
299
+ SELECT * FROM w LIMIT 3;
300
+ row_id division team founded mlb affiliation affiliated city stadium capacity
301
+ 0 north buffalo bisons 1985 toronto blue jays 2013 buffalo, new york sahlen field 16600
302
+ 1 north lehigh valley ironpigs 2008 philadelphia phillies 2007 allentown, pennsylvania coca-cola park 10100
303
+ 2 north pawtucket red sox 1973 boston red sox 1970 pawtucket, rhode island mccoy stadium 10031
304
+ */
305
+ CREATE TABLE Images(
306
+ row_id int,
307
+ columbus clippers text)
308
+ /*
309
+ All rows of the table:
310
+ SELECT * FROM w;
311
+ row_id columbus clippers
312
+ 0 a large blue and white clock on the side of a building.
313
+ */
314
+ Q: Was the Team that has a ship in logo or Charlotte Knights, the one with earlier affiliation in Current teams of International League?
315
+ A: charlotte knights
316
+
317
+
318
+ CREATE TABLE Warren Burton (Filmography)(
319
+ row_id int,
320
+ year int,
321
+ title text,
322
+ role text,
323
+ notes text)
324
+ /*
325
+ 3 example rows:
326
+ SELECT * FROM w LIMIT 3;
327
+ row_id year title role notes
328
+ 0 1976 baby blue marine second serviceman none
329
+ 1 1977 chatterbox tv reporter none
330
+ 2 1977 the world's greatest lover ludwig none
331
+ */
332
+ CREATE TABLE Images(
333
+ row_id int,
334
+ green lantern (film) text)
335
+ /*
336
+ All rows of the table:
337
+ SELECT * FROM w;
338
+ row_id green lantern (film)
339
+ 0 a picture of a green and white costume and glasses.
340
+ */
341
+ Q: How many people are on the poster for Green Lantern (film)?
342
+ A: 4
343
+
344
+
345
+ CREATE TABLE One Hour Photo (Accolades)(
346
+ row_id int,
347
+ award text,
348
+ category text,
349
+ recipients text,
350
+ result real)
351
+ /*
352
+ 3 example rows:
353
+ SELECT * FROM w LIMIT 3;
354
+ row_id award category recipients result
355
+ 0 critics' choice movie awards best actor robin williams nan
356
+ 1 dallas–fort worth film critics association best actor robin williams nan
357
+ 2 online film critics society best actor robin williams nan
358
+ */
359
+ CREATE TABLE Images(
360
+ row_id int,
361
+ saturn award text)
362
+ /*
363
+ All rows of the table:
364
+ SELECT * FROM w;
365
+ row_id saturn award
366
+ 0 a man in a suit and tie holding a glass.
367
+ */
368
+ Q: What is he holding in Saturn Award?
369
+ A: trophy
370
+
371
+
372
+ CREATE TABLE 2013 Detroit Lions season (2013 Draft class)(
373
+ row_id int,
374
+ draft order - round int,
375
+ draft order - choice int,
376
+ draft order - overall int,
377
+ player name text,
378
+ position text,
379
+ height text,
380
+ weight text,
381
+ college text,
382
+ contract text,
383
+ notes text,
384
+ source text)
385
+ /*
386
+ 3 example rows:
387
+ SELECT * FROM w LIMIT 3;
388
+ row_id draft order - round draft order - choice draft order - overall player name position height weight college contract notes source
389
+ 0 1 5 5 ezekiel ansah defensive end 6ft 5 in 271lbs byu p5y / none [http://www.mlive.com/lions/index.ssf/2013-4/detroit_lions_select_ezekiel_a.html detroit lions select ezekiel ansah in first round of 2013 nfl draft] mlive.com, 2013-4-26
390
+ 1 2 4 36 darius slay defensive back 6ft 1 in 190lbs mississippi state p4y / none [http://www.mlive.com/lions/index.ssf/2013-4/detroit_lions_select_mississip.html detroit lions select mississippi state cb darius slay in second round of 2013 nfl draft] mlive.com, 2013-4-27
391
+ 2 3 3 65 larry warford offensive lineman 6ft 3 in 343lbs kentucky p4y / none [http://www.mlive.com/lions/index.ssf/2013-4/detroit_lions_fill_massive_nee.html detroit lions fill massive need with massive guard prospect larry warford] mlive.com, 2013-4-27
392
+ */
393
+ CREATE TABLE Images(
394
+ row_id int,
395
+ south carolina gamecocks football text,
396
+ seattle seahawks text)
397
+ /*
398
+ All rows of the table:
399
+ SELECT * FROM w;
400
+ row_id south carolina gamecocks football seattle seahawks
401
+ 0 a group of people standing next to each other. a large green and white bird with numbers.
402
+ */
403
+ Q: What educational institution has a rooster on its logo and was the school listed in the 2013 Detroit Lions draft class for the defensive end player position?
404
+ A: south carolina
405
+
406
+
407
+ CREATE TABLE Melia Kreiling (Filmography | Film roles)(
408
+ row_id int,
409
+ year int,
410
+ title text,
411
+ role text,
412
+ notes text)
413
+ /*
414
+ 3 example rows:
415
+ SELECT * FROM w LIMIT 3;
416
+ row_id year title role notes
417
+ 0 2012 suspension of disbelief juliette none
418
+ 1 2013 company of heroes kestrel direct-to-video film
419
+ 2 2013 leopard kara none
420
+ */
421
+ CREATE TABLE Passages(
422
+ row_id int,
423
+ list of marvel cinematic universe films text)
424
+ /*
425
+ All rows of the table:
426
+ SELECT * FROM w;
427
+ row_id list of marvel cinematic universe films
428
+ 0 the first film in the marvel cinematic universe was iron man (2008), which was distributed by paramount pictures. paramount also distributed iron man 2 (2010), thor (2011) and captain america: the first avenger (2011), while universal pictures distributed the incredible hulk (2008). walt disney studios motion pictures began distributing the films with the 2012 crossover film the avengers, which concluded phase 1 of the franchise. phase 2 includes iron man 3 (2013), thor: the dark world (2013), captain america: wi soldier (2014), guardians of the galaxy (2014), avengers: age of ultron (2015), and ant-man (2015).
429
+ */
430
+ Q: What was Melia Kreiling's role in the film that is the next Marvel movie after 'Captain America the Winter Soldier'?
431
+ A: bereet
432
+
433
+
434
+ CREATE TABLE 2006 Grand Prix of Portland (Qualifying results)(
435
+ row_id int,
436
+ pos int,
437
+ nat real,
438
+ name text,
439
+ team text,
440
+ qual 1 text,
441
+ qual 2 text,
442
+ best text)
443
+ /*
444
+ 3 example rows:
445
+ SELECT * FROM w LIMIT 3;
446
+ row_id pos nat name team qual 1 qual 2 best
447
+ 0 1 nan bruno junqueira newman/haas racing 59.576 57.631 57.631
448
+ 1 2 nan a. j. allmendinger forsythe racing 58.378 57.639 57.639
449
+ 2 3 nan sébastien bourdais newman/haas racing 58.464 57.646 57.646
450
+ */
451
+ CREATE TABLE Passages(
452
+ row_id int,
453
+ jtg daugherty racing text)
454
+ /*
455
+ All rows of the table:
456
+ SELECT * FROM w;
457
+ row_id jtg daugherty racing
458
+ 0 jtg daugherty racing (formerly st motorsports and jtg racing) is an american professional stock car racing team that currently competes in the monster energy nascar cup series. the team is owned by former advertising executive tad geschickter and his wife jodi, along with current espn analyst brad daugherty. the team formerly had alliances with wood brothers racing, then michael waltrip racing, and currently has a technical alliance with richard childress racing. the team currently fields the no. 37 cottonelle chevrolet ss driven by roush development driver chris buescher and the no. 47 clorox/bush's/scott products chevrolet ss driven by a. j. allmendinger in the monster energy nascar cup series.
459
+ */
460
+ Q: The driver of Nascar number 47 qualified for the 2006 Grand Prix of Portland for which team?
461
+ A: forsythe racing
462
+
463
+
464
+ CREATE TABLE List of churches in Copenhagen ([[Amager]])(
465
+ row_id int,
466
+ name text,
467
+ denomination text,
468
+ year int,
469
+ coordinates real,
470
+ image text,
471
+ refs real)
472
+ /*
473
+ 3 example rows:
474
+ SELECT * FROM w LIMIT 3;
475
+ row_id name denomination year coordinates image refs
476
+ 0 all saints' church church of denmark 1932 nan 150px nan
477
+ 1 dragør church church of denmark 1885 nan 150px nan
478
+ 2 hans tausen's church church of denmark 1924 nan 150px nan
479
+ */
480
+ CREATE TABLE Images(
481
+ row_id int,
482
+ all saints' church, copenhagen text,
483
+ dragør church text,
484
+ nathanael's church text,
485
+ st. anne's church, copenhagen text,
486
+ sundby church text)
487
+ /*
488
+ All rows of the table:
489
+ SELECT * FROM w;
490
+ row_id all saints' church, copenhagen dragør church nathanael's church st. anne's church, copenhagen sundby church
491
+ 0 type of place of worship church of the holy trinity church of the holy trinity the building where the hotel is located a red brick church with a steeple and a flagpole in front of it.
492
+ */
493
+ Q: Among Copenhagen churches on the "Amager" list, which have spires and are affiliated with the Church of Denmark denomination?
494
+ A: all saints' church | nathanael's church | dragør church | sundby church
495
+
496
+
497
+ CREATE TABLE Final Straw Tour (UK Tour (Leg III))(
498
+ row_id int,
499
+ date text,
500
+ city text,
501
+ country text,
502
+ venue text)
503
+ /*
504
+ 3 example rows:
505
+ SELECT * FROM w LIMIT 3;
506
+ row_id date city country venue
507
+ 0 support acts: terra diablo & astrid support acts: terra diablo & astrid support acts: terra diablo & astrid support acts: terra diablo & astrid
508
+ 1 2004-3-2 newcastle england newcastle university
509
+ 2 2004-3-3 liverpool england carling academy
510
+ */
511
+ CREATE TABLE Images(
512
+ row_id int,
513
+ oxford text)
514
+ /*
515
+ All rows of the table:
516
+ SELECT * FROM w;
517
+ row_id oxford
518
+ 0 a guide to the city of edinburgh
519
+ */
520
+ Q: The final straw tour held leg 3 of the UK tour on March 13, 2004 in this city with how many views on the bottom?
521
+ A: three
templates/prompts/prompt_qa_balanced.txt ADDED
@@ -0,0 +1,275 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Generate answer given the question and table to answer the question correctly.
2
+
3
+ CREATE TABLE Fabrice Santoro(
4
+ row_id int,
5
+ name text,
6
+ 1989 text,
7
+ 1990 text,
8
+ 1991 text,
9
+ 1992 text,
10
+ 1993 text,
11
+ 1994 text,
12
+ 1995 text,
13
+ 1996 text,
14
+ 1997 text,
15
+ 1998 text,
16
+ 1999 text,
17
+ 2000 text,
18
+ 2001 text,
19
+ 2002 text,
20
+ 2003 text,
21
+ 2004 text,
22
+ 2005 text,
23
+ 2006 text,
24
+ 2007 text,
25
+ 2008 text,
26
+ 2009 text,
27
+ 2010 text,
28
+ career\nsr text,
29
+ career\nwin-loss text)
30
+ /*
31
+ All rows of the table:
32
+ SELECT * FROM w;
33
+ row_id name 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 career\nsr career\nwin-loss
34
+ 0 australian open a a 1r a 2r 3r 2r 1r a 3r 4r 1r 2r 1r 3r 2r 1r qf 3r 2r 3r 1r 0 / 18 22–18
35
+ 1 french open 1r 2r 4r 1r 1r 3r 1r a 1r 3r 1r 2r 4r 2r 2r 3r 1r 1r 1r 2r 1r a 0 / 20 17–20
36
+ 2 wimbledon a 1r a a a a 1r a 1r a 2r 2r 3r 2r 2r 2r 2r 2r 2r 1r 2r a 0 / 14 11–14
37
+ 3 us open a 3r 1r 2r 1r a 1r a 1r 3r 3r 1r 2r 1r 2r 3r 2r 1r 2r 1r 1r a 0 / 18 13–18
38
+ 4 grand slam sr 0 / 1 0 / 3 0 / 3 0 / 2 0 / 3 0 / 2 0 / 4 0 / 1 0 / 3 0 / 3 0 / 4 0 / 4 0 / 4 0 / 4 0 / 4 0 / 4 0 / 4 0 / 4 0 / 4 0 / 4 0 / 4 0 / 1 0 / 70 n/a
39
+ 5 grand slam win-loss 0–1 3–3 3–3 1–2 1–3 4–2 1–4 0–1 0–3 6–3 6–4 2–4 7–4 2–4 5–4 6–4 2–4 5–4 4–4 2–4 3–4 0–1 n/a 63–70
40
+ 6 indian wells nme a 3r 1r qf 3r 2r a a 1r a 3r 2r 3r 1r 1r 4r 1r a a a a 0 / 13 16–13
41
+ 7 miami nme 2r 2r 1r 3r a a a a 4r 3r 2r 4r 2r a 1r a 2r 3r 3r 2r a 0 / 14 15–14
42
+ 8 monte carlo nme 1r 2r 2r 1r a 3r 3r sf qf a 2r 1r 1r 1r 3r 2r 1r a 1r a a 0 / 16 17–16
43
+ 9 rome nme a qf 3r 3r a 3r a 2r 1r 3r 3r 2r 1r 1r a 2r 3r a 1r a a 0 / 14 18–14
44
+ 10 hamburg nme 2r a a a a 1r a a qf 2r 1r qf 1r 1r a 1r 1r a a nme nme 0 / 10 8–10
45
+ 11 canada nme a a a a a a a qf 2r qf 1r sf qf 1r qf 1r 1r a a a a 0 / 10 17–10
46
+ 12 cincinnati nme a a a a a a a 2r 1r 2r qf 2r 1r 2r qf 2r 1r a a a a 0 / 10 11–10
47
+ 13 stuttgart/madrid nme a a a a a a a 3r 1r 2r 2r 1r sf a a a a 1r a a a 0 / 7 8–7
48
+ 14 paris nme 1r 1r 1r a a a a 2r 2r 2r qf 2r 1r 2r a 2r 1r 3r a 1r a 0 / 14 10–14
49
+ 15 masters series sr n/a 0 / 4 0 / 5 0 / 5 0 / 4 0 / 1 0 / 4 0 / 1 0 / 6 0 / 9 0 / 7 0 / 9 0 / 9 0 / 9 0 / 7 0 / 5 0 / 7 0 / 8 0 / 3 0 / 3 0 / 2 0 / 0 0 / 108 n/a
50
+ 16 p1y win-loss n/a 2–4 7–5 3–5 6–4 2–1 5–4 2–1 12–6 10–9 10–7 12–9 13–9 9–9 2–7 8–5 7–7 3–8 4–3 2–3 1–2 0–0 n/a 120–108
51
+ 17 year end ranking 235 62 43 43 55 46 102 118 29 41 34 31 22 35 62 52 58 52 37 52 68 – n/a none
52
+ */
53
+ Q: did he win more at the australian open or indian wells?
54
+ A: australian open
55
+
56
+
57
+ CREATE TABLE Matthew Morrison(
58
+ row_id int,
59
+ year int,
60
+ title text,
61
+ role text,
62
+ notes text)
63
+ /*
64
+ All rows of the table:
65
+ SELECT * FROM w;
66
+ row_id year title role notes
67
+ 0 1999 bob rizzo's simply funk with suzanne [himself] as matthew j. morrison
68
+ 1 2003 marci x boyz r us as matthew j. morrison
69
+ 2 2005 once upon a mattress sir harry none
70
+ 3 2006 blinders scott none
71
+ 4 2007 music and lyrics ray none
72
+ 5 2007 dan in real life policeman none
73
+ 6 2007 i think i love my wife salesman #2 none
74
+ 7 2011 the muppets mahna mahna host none
75
+ 8 2012 what to expect when you're expecting evan none
76
+ */
77
+ Q: what movies was morrison involved with in 2007?
78
+ A: music and lyrics, dan in real life, i think i love my wife
79
+
80
+
81
+ CREATE TABLE 2007 New Orleans Saints season(
82
+ row_id int,
83
+ week int,
84
+ date text,
85
+ opponent text,
86
+ time text,
87
+ game site text,
88
+ tv text,
89
+ result/score text,
90
+ record text)
91
+ /*
92
+ All rows of the table:
93
+ SELECT * FROM w;
94
+ row_id week date opponent time game site tv result/score record
95
+ 0 1 2007-9-6 indianapolis colts t20:30 edt rca dome nbc l 41 – 10 0–1
96
+ 1 2 2007-9-16 tampa bay buccaneers t13:0 edt raymond james stadium fox l 31 – 14 0–2
97
+ 2 3 2007-9-24 tennessee titans t20:30 edt louisiana superdome espn l 31 – 14 0–3
98
+ 3 4 bye bye bye bye bye bye none
99
+ 4 5 2007-10-7 carolina panthers t13:0 edt louisiana superdome fox l 16 – 13 0–4
100
+ 5 6 2007-10-14 seattle seahawks t20:15 edt qwest field nbc w 28 – 17 1–4
101
+ 6 7 2007-10-21 atlanta falcons t13:0 edt louisiana superdome fox w 22 – 16 2–4
102
+ 7 8 2007-10-28 san francisco 49ers t16:15 edt monster park fox w 31 – 10 3–4
103
+ 8 9 2007-11-4 jacksonville jaguars t13:0 est louisiana superdome cbs w 41 – 24 4–4
104
+ 9 10 2007-11-11 st. louis rams t13:0 est louisiana superdome fox l 37 – 29 4–5
105
+ 10 11 2007-11-18 houston texans t13:0 est reliant stadium fox l 23 – 10 4–6
106
+ 11 12 2007-11-25 carolina panthers t13:0 est bank of america stadium fox w 31 – 6 5–6
107
+ 12 13 2007-12-2 tampa bay buccaneers t13:0 est louisiana superdome fox l 27 – 23 5–7
108
+ 13 14 2007-12-10 atlanta falcons t20:30 est georgia dome espn w 34 – 14 6–7
109
+ 14 15 2007-12-16 arizona cardinals t13:0 est louisiana superdome fox w 31–24 7–7
110
+ 15 16 2007-12-23 philadelphia eagles t13:0 est louisiana superdome fox l 38–23 7–8
111
+ 16 17 2007-12-30 chicago bears t13:0 est soldier field fox l 33–25 7–9
112
+ */
113
+ Q: what number of games were lost at home?
114
+ A: 5
115
+
116
+
117
+ CREATE TABLE Demographics of Alaska(
118
+ row_id int,
119
+ by race text,
120
+ white text,
121
+ black text,
122
+ aian* text,
123
+ asian text,
124
+ nhpi* text)
125
+ /*
126
+ All rows of the table:
127
+ SELECT * FROM w;
128
+ row_id by race white black aian* asian nhpi*
129
+ 0 2000 (total population) 75.43% 4.46% 19.06% 5.24% 0.88%
130
+ 1 2000 (hispanic only) 3.42% 0.33% 0.45% 0.16% 0.06%
131
+ 2 2005 (total population) 74.71% 4.72% 18.77% 5.9% 0.88%
132
+ 3 2005 (hispanic only) 4.32% 0.38% 0.48% 0.19% 0.05%
133
+ 4 growth 2000–5 (total population) 4.85% 12.03% 4.27% 19.23% 5.35%
134
+ 5 growth 2000–5 (non-hispanic only) 3.49% 11.3% 4.02% 18.96% 5.86%
135
+ 6 growth 2000–5 (hispanic only) 33.56% 21.02% 14.52% 27.89% -1.95%
136
+ */
137
+ Q: which hispanic population had the greatest growth from 2000 to 2005?
138
+ A: white
139
+
140
+
141
+ CREATE TABLE Highest mountain peaks of California(
142
+ row_id int,
143
+ rank int,
144
+ mountain peak text,
145
+ mountain range text,
146
+ elevation text,
147
+ prominence text,
148
+ isolation text,
149
+ location text)
150
+ /*
151
+ All rows of the table:
152
+ SELECT * FROM w;
153
+ row_id rank mountain peak mountain range elevation prominence isolation location
154
+ 0 1 mount whitney sierra nevada 14505 ft; 4421 m 10080 ft; 3072 m 1646 mi; 2649 km 36°34′43″n 118°17′31″w / 36.5786°n 118.292°w
155
+ 1 2 mount williamson sierra nevada 14379 ft; 4383 m 1677 ft; 511 m 5.4 mi; 8.7 km 36°39′21″n 118°18′40″w / 36.6559°n 118.3111°w
156
+ 2 3 white mountain peak white mountains 14252 ft; 4344 m 7196 ft; 2193 m 67 mi; 109 km 37°38′3″n 118°15′21″w / 37.6341°n 118.2557°w
157
+ 3 4 north palisade sierra nevada 14248 ft; 4343 m 2894 ft; 882 m 32 mi; 52 km 37°5′39″n 118°30′52″w / 37.0943°n 118.5145°w
158
+ 4 5 mount shasta cascade range 14179 ft; 4322 m 9832 ft; 2997 m 335 mi; 539 km 41°24′33″n 122°11′42″w / 41.4092°n 122.1949°w
159
+ 5 6 mount humphreys sierra nevada 13992 ft; 4265 m 2563 ft; 781 m 15 mi; 24 km 37°16′14″n 118°40′23″w / 37.2705°n 118.673°w
160
+ 6 7 mount keith sierra nevada 13982 ft; 4262 m 1936 ft; 590 m 3.1 mi; 5 km 36°42′0″n 118°20′37″w / 36.7001°n 118.3436°w
161
+ 7 8 mount darwin sierra nevada 13837 ft; 4218 m 1891 ft; 576 m 7 mi; 11 km 37°10′1″n 118°40′20″w / 37.1669°n 118.6721°w
162
+ 8 9 mount kaweah sierra nevada 13807 ft; 4209 m 2027 ft; 618 m 11 mi; 17 km 36°31′34″n 118°28′43″w / 36.5261°n 118.4785°w
163
+ 9 10 mount morgan sierra nevada 13758 ft; 4193 m 2648 ft; 807 m 10 mi; 16 km 37°24′19″n 118°43′58″w / 37.4053°n 118.7329°w
164
+ 10 11 mount gabb sierra nevada 13747 ft; 4190 m 2601 ft; 793 m 4.3 mi; 6.9 km 37°22′37″n 118°48′9″w / 37.3769°n 118.8025°w
165
+ 11 12 mount tom sierra nevada 13657 ft; 4163 m 1992 ft; 607 m 4.8 mi; 7.7 km 37°22′34″n 119°10′44″w / 37.3762°n 119.1789°w
166
+ 12 13 mount dubois white mountains 13565 ft; 4135 m 2339 ft; 713 m 10 mi; 16 km 37°47′0″n 118°20′36″w / 37.7834°n 118.3432°w
167
+ 13 14 mount pinchot sierra nevada 13500 ft; 4115 m 2110 ft; 643 m 4.7 mi; 7.6 km 36°56′50″n 118°24′19″w / 36.9473°n 118.4054°w
168
+ 14 15 red slate mountain sierra nevada 13162 ft; 4012 m 1736 ft; 529 m 8 mi; 13 km 37°30′27″n 118°52′9″w / 37.5075°n 118.8693°w
169
+ 15 16 mount ritter sierra nevada 13149 ft; 4008 m 3990 ft; 1216 m 22 mi; 35 km 37°41′21″n 119°11′59″w / 37.6891°n 119.1996°w
170
+ */
171
+ Q: which mountain peak has a prominence more than 10,000 ft?
172
+ A: mount whitney
173
+
174
+
175
+ CREATE TABLE Daegu FC(
176
+ row_id int,
177
+ season int,
178
+ division int,
179
+ tms. int,
180
+ pos. int,
181
+ fa cup text,
182
+ afc cl real)
183
+ /*
184
+ All rows of the table:
185
+ SELECT * FROM w;
186
+ row_id season division tms. pos. fa cup afc cl
187
+ 0 2003 1 12 11 quarter final nan
188
+ 1 2004 1 13 10 round of 32 nan
189
+ 2 2005 1 13 8 quarter final nan
190
+ 3 2006 1 14 7 quarter final nan
191
+ 4 2007 1 14 12 round of 16 nan
192
+ 5 2008 1 14 11 semi final nan
193
+ 6 2009 1 15 15 quarter-final nan
194
+ 7 2010 1 15 15 round of 32 nan
195
+ 8 2011 1 16 12 round of 32 nan
196
+ 9 2012 1 16 10 round of 16 nan
197
+ */
198
+ Q: how far did they make it in the fa cup after 2009?
199
+ A: round of 16
200
+
201
+ CREATE TABLE Portugal in the Eurovision Song Contest 1979(
202
+ row_id int,
203
+ draw int,
204
+ artist text,
205
+ song text,
206
+ points int,
207
+ place text)
208
+ /*
209
+ All rows of the table:
210
+ SELECT * FROM w;
211
+ row_id draw artist song points place
212
+ 0 1 gonzaga coutinho "tema para um homem só" 102 5th
213
+ 1 2 pedro osório s.a.r.l. "uma canção comercial" 123 3rd
214
+ 2 3 concha "qualquer dia, quem diria" 78 6th
215
+ 3 4 gabriela schaaf "eu só quero" 132 2nd
216
+ 4 5 tózé brito "novo canto português" 110 4th
217
+ 5 6 teresa silva carvalho "cantemos até ser dia" 52 9th
218
+ 6 7 florência "o combóio do tua" 63 8th
219
+ 7 8 manuel josé soares "quando chego a casa" 76 7th
220
+ 8 9 manuela bravo "sobe, sobe, balão sobe" 149 1st
221
+ */
222
+ Q: who was the last draw?
223
+ A: manuela bravo
224
+
225
+
226
+ CREATE TABLE List of spans(
227
+ row_id int,
228
+ tramway text,
229
+ country text,
230
+ city text,
231
+ height of pylons text,
232
+ span width,\nleaning straight line text,
233
+ span width,\nhorizontal measurement text,
234
+ height of cable over ground text,
235
+ year of inauguration text,
236
+ notes text)
237
+ /*
238
+ All rows of the table:
239
+ SELECT * FROM w;
240
+ row_id tramway country city height of pylons span width,\nleaning straight line span width,\nhorizontal measurement height of cable over ground year of inauguration notes
241
+ 0 peak 2 peak gondola canada whistler 65m 3024 m 3019 m 436 m 2008 3s aerial tramway constructed by doppelmayr
242
+ 1 hut of regensburg material transport aerial railway austria falbeson ? ? ? 430 m ? none
243
+ 2 vanoise express france vanoise none 1850 m 1800 m 380 m 2003 none
244
+ 3 aiguille du midi france chamonix none 2867 m 2500 m ? 1955 2nd section
245
+ 4 vallee blanche aerial tramway france mont blanc none 2831 m, 1684 m span is almost horizontal appr. 300 m 1958 rock anchored support structure
246
+ 5 3s aerial tramway austria kitzbühel 0 m, 80m 2507 m ? 400 m 2004 none
247
+ 6 sandia peak tramway usa albuquerque 70.7 m, 21.33 m 2353 m ? 274 m 1966 none
248
+ 7 feldmoos-chli-titlis aerial tramway switzerland titlis 37.6 m 3476.2 m ? ? 1979 temp. site tramway, demolished in 1986
249
+ */
250
+ Q: was the sandia peak tramway innagurate before or after the 3s aerial tramway?
251
+ A: before
252
+
253
+
254
+ CREATE TABLE Płock Governorate(
255
+ row_id int,
256
+ language text,
257
+ number int,
258
+ percentage (%) text,
259
+ males int,
260
+ females int)
261
+ /*
262
+ All rows of the table:
263
+ SELECT * FROM w;
264
+ row_id language number percentage (%) males females
265
+ 0 polish 447685 80.86 216794 230891
266
+ 1 yiddish 51215 9.25 24538 26677
267
+ 2 german 35931 6.49 17409 18522
268
+ 3 russian 15137 2.73 13551 1586
269
+ 4 ukrainian 2350 0.42 2302 48
270
+ 5 other 1285 0.23 1041 244
271
+ 6 persons; that didn't name; their native language 27 >0.01 14 13
272
+ 7 total 553633 100 275652 277981
273
+ */
274
+ Q: how many male and female german speakers are there?
275
+ A: 35931
templates/prompts/prompt_qa_balanced_no_table_input.txt ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Generate answer to answer the question correctly.
2
+
3
+ Q: did he win more at the australian open or indian wells?
4
+ A: australian open
5
+
6
+
7
+ Q: what movies was morrison involved with in 2007?
8
+ A: music and lyrics, dan in real life, i think i love my wife
9
+
10
+
11
+ Q: what number of games were lost at home?
12
+ A: 5
13
+
14
+
15
+ Q: which hispanic population had the greatest growth from 2000 to 2005?
16
+ A: white
17
+
18
+
19
+ Q: which mountain peak has a prominence more than 10,000 ft?
20
+ A: mount whitney
21
+
22
+
23
+ Q: how far did they make it in the fa cup after 2009?
24
+ A: round of 16
25
+
26
+
27
+ Q: who was the last draw?
28
+ A: manuela bravo
29
+
30
+
31
+ Q: was the sandia peak tramway innagurate before or after the 3s aerial tramway?
32
+ A: before
33
+
34
+
35
+ Q: how many male and female german speakers are there?
36
+ A: 35931
templates/prompts/prompt_tab_fact_puresql_v2.txt ADDED
@@ -0,0 +1,277 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Generate SQL given the statement and table to verify the statement correctly.
2
+
3
+ CREATE TABLE turkish cup(
4
+ row_id int,
5
+ round text,
6
+ clubs remaining int,
7
+ clubs involved int,
8
+ winners from previous round real,
9
+ new entries this round real,
10
+ leagues entering at this round text)
11
+ /*
12
+ 3 example rows:
13
+ SELECT * FROM w LIMIT 3;
14
+ row_id round clubs remaining clubs involved winners from previous round new entries this round leagues entering at this round
15
+ 0 first round 156 86 nan 86.0 tff third league & turkish regional amateur league
16
+ 1 second round 113 108 43.0 65.0 süper lig & tff first league & tff second league
17
+ 2 third round 59 54 54.0 nan none
18
+ */
19
+ Q: during the 3rd round of the turkish cup , there be no new entry during that stage
20
+ SQL: SELECT (SELECT `new entries this round` FROM w WHERE round = 'third round') IS NULL
21
+
22
+
23
+ CREATE TABLE turkish cup(
24
+ row_id int,
25
+ round text,
26
+ clubs remaining int,
27
+ clubs involved int,
28
+ winners from previous round real,
29
+ new entries this round real,
30
+ leagues entering at this round text)
31
+ /*
32
+ 3 example rows:
33
+ SELECT * FROM w LIMIT 3;
34
+ row_id round clubs remaining clubs involved winners from previous round new entries this round leagues entering at this round
35
+ 0 first round 156 86 nan 86.0 tff third league
36
+ 1 second round 113 108 43.0 65.0 süper ligs
37
+ 2 third round 59 54 54.0 nan none
38
+ */
39
+ Q: süper lig be the league to win a round in the turkish cup with 110 clubs
40
+ SQL: SELECT (SELECT clubs FROM w WHERE `leagues entering at this round` = 'süper ligs') = 110
41
+
42
+
43
+ CREATE TABLE turkish cup(
44
+ row_id int,
45
+ round text,
46
+ clubs remaining int,
47
+ clubs involved int,
48
+ winners from previous round real,
49
+ new entries this round real,
50
+ leagues entering at this round text)
51
+ /*
52
+ 3 example rows:
53
+ SELECT * FROM w LIMIT 3;
54
+ row_id round clubs remaining clubs involved winners from previous round new entries this round leagues entering at this round
55
+ 0 first round 156 86 nan 86.0 tff third league & turkish regional amateur league
56
+ 1 second round 113 108 43.0 65.0 süper lig & tff first league & tff second league
57
+ 2 third round 59 54 54.0 nan none
58
+ */
59
+ Q: the lowest number of new entry conclude a round in the turkish cup be 5
60
+ SQL: SELECT (SELECT MIN(`new entries this round`) FROM w) = 5
61
+
62
+
63
+ CREATE TABLE cultural interest fraternities and sororities(
64
+ row_id int,
65
+ letters text,
66
+ organization text,
67
+ nickname text,
68
+ founding time text,
69
+ founding university text,
70
+ type text)
71
+ /*
72
+ 3 example rows:
73
+ SELECT * FROM w LIMIT 3;
74
+ row_id letters organization nickname founding time founding university type
75
+ 0 αεπ alpha epsilon pi 1 aepi 1913-11-07 00:00:00 new york university fraternity
76
+ 1 αεφ alpha epsilon phi 2 aephi 1909-10-24 00:00:00 barnard college sorority
77
+ 2 σαεπ sigma alpha epsilon pi 3 sigma 1998-10-01 00:00:00 university of california , davis sorority
78
+ */
79
+ Q: 4 of the cultural interest fraternity and sorority be fraternity while 3 be a sorority
80
+ SQL: SELECT (SELECT (SELECT COUNT(*) FROM w WHERE type = 'fraternity') = 4) AND (SELECT (SELECT COUNT(*) FROM w WHERE type = 'sorority') = 3)
81
+
82
+
83
+ CREATE TABLE british records in athletics(
84
+ row_id int,
85
+ event text,
86
+ data text,
87
+ athlete text,
88
+ date text,
89
+ place text)
90
+ /*
91
+ 3 example rows:
92
+ SELECT * FROM w LIMIT 3;
93
+ row_id event data athlete date place
94
+ 0 5 km t19:29 andi drake 1990-05-27 00:00:00 norway
95
+ 1 5 miles 32:38 + ian mccombie 1985-03-23 00:00:00 united kingdom
96
+ 2 10 km 40:17 chris maddocks 1989-04-30 00:00:00 united kingdom
97
+ */
98
+ Q: there be 8 different event that take place within the united kingdom
99
+ SQL: SELECT (SELECT COUNT(place) FROM w WHERE place = 'united kingdom') = 8
100
+
101
+
102
+ CREATE TABLE jeev milkha singh(
103
+ row_id int,
104
+ tournament text,
105
+ wins int,
106
+ top - 10 int,
107
+ top - 25 int,
108
+ events int,
109
+ cuts made int)
110
+ /*
111
+ 3 example rows:
112
+ SELECT * FROM w LIMIT 3;
113
+ row_id tournament wins top - 10 top - 25 events cuts made
114
+ 0 masters tournament 0 0 1 3 2
115
+ 1 us open 0 0 0 4 3
116
+ 2 the open championship 0 0 0 2 1
117
+ */
118
+ Q: the number of cut made in the pga championship tournament be smaller than the number of event
119
+ SQL: SELECT (SELECT `cuts made` FROM w WHERE tournament = 'pga championship') < (SELECT events FROM w WHERE tournament = 'pga championship')
120
+
121
+
122
+ CREATE TABLE 2008 women 's british open(
123
+ row_id int,
124
+ place text,
125
+ player text,
126
+ country text,
127
+ score int,
128
+ to par int)
129
+ /*
130
+ 3 example rows:
131
+ SELECT * FROM w LIMIT 3;
132
+ row_id place player country score to par
133
+ 0 1 juli inkster united states 65 7
134
+ 1 t2 momoko ueda japan 66 6
135
+ 2 t2 laura diaz united states 66 6
136
+ */
137
+ Q: the 3 player from japan have the same score
138
+ SQL: SELECT (SELECT COUNT(DISTINCT score) FROM w WHERE country = 'japan' GROUP BY score) = 1
139
+
140
+
141
+ CREATE TABLE espn sunday night football results (1987 - 2005)(
142
+ row_id int,
143
+ date text,
144
+ visiting team text,
145
+ final score text,
146
+ host team text,
147
+ stadium text)
148
+ /*
149
+ 3 example rows:
150
+ SELECT * FROM w LIMIT 3;
151
+ row_id date visiting team final score host team stadium
152
+ 0 new year eve indianapolis colts 24 - 7 baltimore ravens m&t bank stadium
153
+ 1 new year eve kansas city chiefs 23 - 17 oakland raiders mcafee coliseum
154
+ 2 new year's day new york giants 23 - 45 san diego chargers qualcomm stadium
155
+ */
156
+ Q: the hosting team be the new york giant on new year even and the st louis ram on new year 's day
157
+ SQL: SELECT (SELECT (SELECT `host team` FROM w WHERE date = 'new year eve') = 'new york giant') AND (SELECT (SELECT `host team` FROM w WHERE date = 'new year's day') = 'st louis ram')
158
+
159
+
160
+ CREATE TABLE 2008 women 's british open(
161
+ row_id int,
162
+ place text,
163
+ player text,
164
+ country text,
165
+ score text,
166
+ to par int)
167
+ /*
168
+ 3 example rows:
169
+ SELECT * FROM w LIMIT 3;
170
+ row_id place player country score to par
171
+ 0 t1 yuri fudoh japan 134 10
172
+ 1 t1 jiyai shin south korea 134 10
173
+ 2 3 juli inkster united states 135 9
174
+ */
175
+ Q: kristie kerr , tie for 4th place , finish the round 1 stroke under lorena ochoa of mexico
176
+ SQL: SELECT (SELECT (SELECT score FROM w WHERE player = 'cristie kerr') < (SELECT score FROM w WHERE player = 'lorena ochoa' AND country = 'mexico')) AND (SELECT (SELECT place FROM w WHERE player = 'cristie kerr') = "t4")
177
+
178
+
179
+ CREATE TABLE connecticut public radio(
180
+ row_id int,
181
+ call sign text,
182
+ frequency text,
183
+ city of license text,
184
+ facility id int,
185
+ erp / power w int,
186
+ height m ( ft ) real,
187
+ class text)
188
+ /*
189
+ 3 example rows:
190
+ SELECT * FROM w LIMIT 3;
191
+ row_id call sign frequency city of license facility id erp / power w height m ( ft ) class
192
+ 0 waic 91.9 fm springfield , ma 1749 230 nan b1
193
+ 1 wedw - fm 88.5 fm stamford , ct 13619 2000 nan a
194
+ 2 wnpr 90.5 fm ( hd ) connecticut public radio meriden , ct 13627 18500 nan b
195
+ */
196
+ Q: there be 3 station with a call sign number in the 90s
197
+ SQL: SELECT (SELECT COUNT(*) FROM w WHERE frequency > 90 GROUP BY `call sign`) = 3
198
+
199
+
200
+ CREATE TABLE 2003 chicago white sox season(
201
+ row_id int,
202
+ date text,
203
+ opponent text,
204
+ score text,
205
+ loss text,
206
+ time text,
207
+ att int,
208
+ record text)
209
+ /*
210
+ 3 example rows:
211
+ SELECT * FROM w LIMIT 3;
212
+ row_id date opponent score loss time att record
213
+ 0 august 1 mariners 12 - 1 garcía (9 - 11) 2:52 39337 58 - 51
214
+ 1 august 2 mariners 0 - 10 wright (0 - 5) 2:22 45719 58 - 52
215
+ 2 august 3 mariners 2 - 8 buehrle (9 - 11) 2:57 45632 58 - 53
216
+ */
217
+ Q: the 2003 chicago white sox game play on 26th august be longer than the game play on 24th august
218
+ SQL: SELECT (SELECT time FROM w WHERE date = 'august 26') > (SELECT time FROM w WHERE date = 'august 24')
219
+
220
+
221
+ CREATE TABLE 1987 masters tournament(
222
+ row_id int,
223
+ place text,
224
+ player text,
225
+ country text,
226
+ score text,
227
+ to par text,
228
+ money text)
229
+ /*
230
+ 3 example rows:
231
+ SELECT * FROM w LIMIT 3;
232
+ row_id place player country score to par money
233
+ 0 t1 larry mize united states 70 + 72 + 72 + 71 = 285 -3 playoff
234
+ 1 t1 bernhard langer spain 73 + 71 + 70 + 71 = 285 -3 playoff
235
+ 2 t1 greg norman australia 73 + 74 + 66 + 72 = 285 -3 playoff
236
+ */
237
+ Q: bernhard m. langer have more point than roger maltbie during the 1987 master tournament
238
+ SQL: SELECT (SELECT score FROM w WHERE player = 'bernhard langer') > (SELECT score FROM w WHERE player = 'roger maltbie')
239
+
240
+
241
+ CREATE TABLE 1987 masters tournament(
242
+ row_id int,
243
+ place text,
244
+ player text,
245
+ country text,
246
+ score text,
247
+ to par text,
248
+ money text)
249
+ /*
250
+ 3 example rows:
251
+ SELECT * FROM w LIMIT 3;
252
+ row_id place player country score to par money
253
+ 0 t1 larry mize united states 70 + 72 + 72 + 71 = 285 -3 playoff
254
+ 1 t1 seve ballesteros spain 73 + 71 + 70 + 71 = 285 -3 playoff
255
+ 2 t1 greg norman australia 73 + 74 + 66 + 72 = 285 -3 playoff
256
+ */
257
+ Q: most of the people who play for the 1987 master tournament be spanish
258
+ SQL: SELECT (SELECT(SELECT COUNT(*) FROM w WHERE country = 'spain') / (SELECT COUNT(*) FROM w)) > 0.5
259
+
260
+
261
+ CREATE TABLE 1976 world junior figure skating championships(
262
+ row_id int,
263
+ rank int,
264
+ name text,
265
+ nation text,
266
+ points real,
267
+ places int)
268
+ /*
269
+ 3 example rows:
270
+ SELECT * FROM w LIMIT 3;
271
+ row_id rank name nation points places
272
+ 0 1 sherri baier / robin cowan canada 128.39 9
273
+ 1 2 lorene mitchell / donald mitchell united states 124.94 16
274
+ 2 3 elizabeth cain / peter cain australia 116.67 33
275
+ */
276
+ Q: 2 of the 7 top - ranked figure skate team be from france
277
+ SQL: SELECT (SELECT (SELECT COUNT(*) FROM w) = 7) AND (SELECT (SELECT COUNT(*) FROM w WHERE nation = 'france') = 2)
templates/prompts/prompt_tab_fact_sqllike_v3.txt ADDED
@@ -0,0 +1,358 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Generate SQL given the statement and table to verify the statement correctly.
2
+ If statement-relevant column(s) contents are not suitable for SQL comparisons or calculations, map it to a new column with clean content by a new grammar QA("map@").
3
+ If mapping to a new column still can not answer the statement with valid SQL, turn to an end-to-end solution by a new grammar QA("ans@"). This grammar aims to solve all the rest of complex statements or tables.
4
+
5
+ CREATE TABLE jason chambers(
6
+ row_id int,
7
+ res text,
8
+ record text,
9
+ opponent text,
10
+ method text,
11
+ event text,
12
+ round text)
13
+ /*
14
+ 3 example rows:
15
+ SELECT * FROM w LIMIT 3;
16
+ row_id res record opponent method event round
17
+ 0 win 18 - 5 - 2 dan new submission (rear naked choke) tfc - power fights 1
18
+ 1 win 17 - 5 - 2 rene gonzalez decision (split) mainstream mma - cold war n / a
19
+ 2 loss 16 - 5 - 2 tristan yunker submission ( armbar ) tfc 7 - total fight challenge 7 1
20
+ */
21
+ Q: in mac - midwest absolute challenge , the player be defeat by dan spychalski in 1 round
22
+ NeuralSQL: SELECT (SELECT opponent, round FROM w WHERE event = "mac - midwest absolute challenge")=("dan spychalski", 1)
23
+
24
+
25
+ CREATE TABLE 1943 vfl season(
26
+ row_id int,
27
+ home team text,
28
+ home team score text,
29
+ away team text,
30
+ away team score text,
31
+ venue text,
32
+ crowd int,
33
+ date text)
34
+ /*
35
+ 3 example rows:
36
+ SELECT * FROM w LIMIT 3;
37
+ row_id home team home team score away team away team score venue crowd date
38
+ 0 footscray 10.11 (71) south melbourne 6.14 (50) western oval 7500 1943-06-26 00:00:00
39
+ 1 collingwood 10.21 (81) melbourne 13.9 (87) victoria park 5000 1943-06-26 00:00:00
40
+ 2 carlton 15.16 (106) fitzroy 9.13 (67) princes park 12000 1943-06-26 00:00:00
41
+ */
42
+ Q: western oval be the venue when the home team footscray score 10.11 (71)
43
+ NeuralSQL: SELECT (SELECT venue FROM w WHERE `home team`="footscray" AND `home team score`="10.11 (71)") = "western oval"
44
+
45
+
46
+ CREATE TABLE 2005 pba draft(
47
+ row_id int,
48
+ pick int,
49
+ player text,
50
+ country of origin text,
51
+ pba team text,
52
+ college text)
53
+ /*
54
+ 3 example rows:
55
+ SELECT * FROM w LIMIT 3;
56
+ row_id pick player country of origin pba team college
57
+ 0 1 jay washington united states air21 express eckerd
58
+ 1 2 alex cabagnot united states sta lucia realtors hawaii - hilo
59
+ 2 3 dennis miranda philippines coca - cola tigers feu
60
+ */
61
+ Q: leo najorda be from philippine
62
+ NeuralSQL: SELECT (SELECT `country of origin` FROM w WHERE player = "leo najorda")="philippines"
63
+
64
+
65
+ CREATE TABLE none(
66
+ row_id int,
67
+ event text,
68
+ long course / short course text,
69
+ year set int,
70
+ time text,
71
+ meet text)
72
+ /*
73
+ 3 example rows:
74
+ SELECT * FROM w LIMIT 3;
75
+ row_id event long course / short course year set time meet
76
+ 0 100 m freestyle long course 2007 54.08 2007 fina world aquatic championships
77
+ 1 200 m individual medley long course 2011 2:11.23 2011 canadian world championship trials
78
+ 2 4 x 100 m medley relay long course 2010 3:38.14 2010 pan pacific championships
79
+ */
80
+ Q: in 2009 the record be set in 7:51:80
81
+ NeuralSQL: SELECT (SELECT time FROM w WHERE `year set` = 2009)="7:51.8"
82
+
83
+
84
+ CREATE TABLE turkish cup(
85
+ row_id int,
86
+ round text,
87
+ clubs remaining int,
88
+ clubs involved int,
89
+ winners from previous round real,
90
+ new entries this round real,
91
+ leagues entering at this round text)
92
+ /*
93
+ 3 example rows:
94
+ SELECT * FROM w LIMIT 3;
95
+ row_id round clubs remaining clubs involved winners from previous round new entries this round leagues entering at this round
96
+ 0 first round 156 86 nan 86.0 tff third league & turkish regional amateur league
97
+ 1 second round 113 108 43.0 65.0 süper lig & tff first league & tff second league
98
+ 2 third round 59 54 54.0 nan none
99
+ */
100
+ Q: during the 3rd round of the turkish cup , there be no new entry during that stage
101
+ NeuralSQL: SELECT (SELECT `new entries this round` FROM w WHERE round = 'third round') IS NULL
102
+
103
+
104
+ CREATE TABLE turkish cup(
105
+ row_id int,
106
+ round text,
107
+ clubs remaining int,
108
+ clubs involved int,
109
+ winners from previous round real,
110
+ new entries this round real,
111
+ leagues entering at this round text)
112
+ /*
113
+ 3 example rows:
114
+ SELECT * FROM w LIMIT 3;
115
+ row_id round clubs remaining clubs involved winners from previous round new entries this round leagues entering at this round
116
+ 0 first round 156 86 nan 86.0 tff third league & turkish regional amateur league
117
+ 1 second round 113 108 43.0 65.0 süper ligs & tff first league & tff second league
118
+ 2 third round 59 54 54.0 nan none
119
+ */
120
+ Q: süper lig be the most common league to win a round in the turkish cup
121
+ NeuralSQL: SELECT QA("ans@what is the most common league?"; `leagues entering at this round`) = 'süper ligs'
122
+
123
+
124
+ CREATE TABLE turkish cup(
125
+ row_id int,
126
+ round text,
127
+ clubs remaining int,
128
+ clubs involved int,
129
+ winners from previous round real,
130
+ new entries this round real,
131
+ leagues entering at this round text)
132
+ /*
133
+ 3 example rows:
134
+ SELECT * FROM w LIMIT 3;
135
+ row_id round clubs remaining clubs involved winners from previous round new entries this round leagues entering at this round
136
+ 0 first round 156 86 nan 86.0 tff third league & turkish regional amateur league
137
+ 1 second round 113 108 43.0 65.0 süper lig & tff first league & tff second league
138
+ 2 third round 59 54 54.0 nan none
139
+ */
140
+ Q: the lowest number of new entry conclude a round in the turkish cup be 5
141
+ NeuralSQL: SELECT (SELECT MIN(`new entries this round`) FROM w) = 5
142
+
143
+
144
+ CREATE TABLE cultural interest fraternities and sororities(
145
+ row_id int,
146
+ letters text,
147
+ organization text,
148
+ nickname text,
149
+ founding time text,
150
+ founding university text,
151
+ type text)
152
+ /*
153
+ 3 example rows:
154
+ SELECT * FROM w LIMIT 3;
155
+ row_id letters organization nickname founding time founding university type
156
+ 0 αεπ alpha epsilon pi 1 aepi 1913-11-07 00:00:00 new york university fraternity
157
+ 1 αεφ alpha epsilon phi 2 aephi 1909-10-24 00:00:00 barnard college sorority
158
+ 2 σαεπ sigma alpha epsilon pi 3 sigma 1998-10-01 00:00:00 university of california , davis sorority
159
+ */
160
+ Q: 4 of the cultural interest fraternity and sorority be fraternity while 3 be a sorority
161
+ NeuralSQL: SELECT (SELECT (SELECT COUNT(*) FROM w WHERE type = 'fraternity') = 4) AND (SELECT (SELECT COUNT(*) FROM w WHERE type = 'sorority') = 3)
162
+
163
+
164
+ CREATE TABLE british records in athletics(
165
+ row_id int,
166
+ event text,
167
+ data text,
168
+ athlete text,
169
+ date text,
170
+ place text)
171
+ /*
172
+ 3 example rows:
173
+ SELECT * FROM w LIMIT 3;
174
+ row_id event data athlete date place
175
+ 0 5 km t19:29 andi drake 1990-05-27 00:00:00 søfteland , norway
176
+ 1 5 miles 32:38 + ian mccombie 1985-03-23 00:00:00 york , united kingdom
177
+ 2 10 km 40:17 chris maddocks 1989-04-30 00:00:00 burrator , united kingdom
178
+ */
179
+ Q: there be 8 different event that take place within the united kingdom
180
+ NeuralSQL: SELECT (SELECT COUNT(place) FROM w WHERE QA("map@is it in united kingdom?"; place) = 'yes') = 8
181
+
182
+
183
+ CREATE TABLE jeev milkha singh(
184
+ row_id int,
185
+ tournament text,
186
+ wins int,
187
+ top - 10 int,
188
+ top - 25 int,
189
+ events int,
190
+ cuts made int)
191
+ /*
192
+ 3 example rows:
193
+ SELECT * FROM w LIMIT 3;
194
+ row_id tournament wins top - 10 top - 25 events cuts made
195
+ 0 masters tournament 0 0 1 3 2
196
+ 1 us open 0 0 0 4 3
197
+ 2 the open championship 0 0 0 2 1
198
+ */
199
+ Q: the number of cut made in the pga championship tournament be smaller than the number of event
200
+ NeuralSQL: SELECT (SELECT `cuts made` FROM w WHERE tournament = 'pga championship') < (SELECT events FROM w WHERE tournament = 'pga championship')
201
+
202
+
203
+ CREATE TABLE 2008 women 's british open(
204
+ row_id int,
205
+ place text,
206
+ player text,
207
+ country text,
208
+ score int,
209
+ to par int)
210
+ /*
211
+ 3 example rows:
212
+ SELECT * FROM w LIMIT 3;
213
+ row_id place player country score to par
214
+ 0 1 juli inkster united states 65 7
215
+ 1 t2 momoko ueda japan 66 6
216
+ 2 t2 laura diaz united states 66 6
217
+ */
218
+ Q: the 3 player from japan have the same score
219
+ NeuralSQL: SELECT (SELECT COUNT(DISTINCT score) FROM w WHERE country = 'japan' GROUP BY score) = 1
220
+
221
+
222
+ CREATE TABLE espn sunday night football results (1987 - 2005)(
223
+ row_id int,
224
+ date text,
225
+ visiting team text,
226
+ final score text,
227
+ host team text,
228
+ stadium text)
229
+ /*
230
+ 3 example rows:
231
+ SELECT * FROM w LIMIT 3;
232
+ row_id date visiting team final score host team stadium
233
+ 0 september 11 indianapolis colts 24 - 7 baltimore ravens m&t bank stadium
234
+ 1 september 18 kansas city chiefs 23 - 17 oakland raiders mcafee coliseum
235
+ 2 september 25 new york giants 23 - 45 san diego chargers qualcomm stadium
236
+ */
237
+ Q: the hosting team be the new york giant on new year even and the st louis ram on new year 's day
238
+ NeuralSQL: SELECT (SELECT (SELECT `host team` FROM w WHERE QA("map@is it new year even?"; date) = 'yes') = 'new york giant') AND (SELECT (SELECT `host team` FROM w WHERE QA("map@is it new year's day?"; date) = 'yes') = 'st louis ram')
239
+
240
+
241
+ CREATE TABLE 2008 women 's british open(
242
+ row_id int,
243
+ place text,
244
+ player text,
245
+ country text,
246
+ score text,
247
+ to par int)
248
+ /*
249
+ 3 example rows:
250
+ SELECT * FROM w LIMIT 3;
251
+ row_id place player country score to par
252
+ 0 t1 yuri fudoh japan 66 + 68 = 134 10
253
+ 1 t1 jiyai shin south korea 66 + 68 = 134 10
254
+ 2 3 juli inkster united states 65 + 70 = 135 9
255
+ */
256
+ Q: kristie kerr , tie for 4th place , finish the round 1 stroke under lorena ochoa of mexico
257
+ NeuralSQL: SELECT (SELECT (SELECT QA("map@what is the derived score?"; score) FROM w WHERE player = 'cristie kerr') < (SELECT QA("map@what is the derived score?"; score) FROM w WHERE player = 'lorena ochoa' AND country = 'mexico')) AND (SELECT (SELECT place FROM w WHERE player = 'cristie kerr') = "t4")
258
+
259
+
260
+ CREATE TABLE connecticut public radio(
261
+ row_id int,
262
+ call sign text,
263
+ frequency text,
264
+ city of license text,
265
+ facility id int,
266
+ erp / power w int,
267
+ height m ( ft ) real,
268
+ class text)
269
+ /*
270
+ 3 example rows:
271
+ SELECT * FROM w LIMIT 3;
272
+ row_id call sign frequency city of license facility id erp / power w height m ( ft ) class
273
+ 0 waic 91.9 fm springfield , ma 1749 230 nan b1
274
+ 1 wedw - fm 88.5 fm stamford , ct 13619 2000 nan a
275
+ 2 wnpr 90.5 fm ( hd ) connecticut public radio meriden , ct 13627 18500 nan b
276
+ */
277
+ Q: there be 3 station with a call sign number in the 90s
278
+ NeuralSQL: SELECT (SELECT COUNT(*) FROM w WHERE QA("map@is it in 90s?"; frequency) = 'yes' GROUP BY `call sign`) = 3
279
+
280
+
281
+ CREATE TABLE 2003 chicago white sox season(
282
+ row_id int,
283
+ date text,
284
+ opponent text,
285
+ score text,
286
+ loss text,
287
+ time text,
288
+ att int,
289
+ record text)
290
+ /*
291
+ 3 example rows:
292
+ SELECT * FROM w LIMIT 3;
293
+ row_id date opponent score loss time att record
294
+ 0 august 1 mariners 12 - 1 garcía (9 - 11) 2:52 39337 58 - 51
295
+ 1 august 2 mariners 0 - 10 wright (0 - 5) 2:22 45719 58 - 52
296
+ 2 august 3 mariners 2 - 8 buehrle (9 - 11) 2:57 45632 58 - 53
297
+ */
298
+ Q: the 2003 chicago white sox game play on 26th august be longer than the game play on 24th august
299
+ NeuralSQL: SELECT (SELECT time FROM w WHERE date = 'august 26') > (SELECT time FROM w WHERE date = 'august 24')
300
+
301
+
302
+ CREATE TABLE 1987 masters tournament(
303
+ row_id int,
304
+ place text,
305
+ player text,
306
+ country text,
307
+ score text,
308
+ to par text,
309
+ money text)
310
+ /*
311
+ 3 example rows:
312
+ SELECT * FROM w LIMIT 3;
313
+ row_id place player country score to par money
314
+ 0 t1 larry mize united states 70 + 72 + 72 + 71 = 285 -3 playoff
315
+ 1 t1 bernhard langer spain 73 + 71 + 70 + 71 = 285 -3 playoff
316
+ 2 t1 greg norman australia 73 + 74 + 66 + 72 = 285 -3 playoff
317
+ */
318
+ Q: bernhard m. langer have more point than roger maltbie during the 1987 master tournament
319
+ NeuralSQL: SELECT (SELECT QA("map@what is the total score?"; score) FROM w WHERE player = 'bernhard langer') > (SELECT QA("map@what is the total score?"; score) FROM w WHERE player = 'roger maltbie')
320
+
321
+
322
+ CREATE TABLE 1987 masters tournament(
323
+ row_id int,
324
+ place text,
325
+ player text,
326
+ country text,
327
+ score text,
328
+ to par text,
329
+ money text)
330
+ /*
331
+ 3 example rows:
332
+ SELECT * FROM w LIMIT 3;
333
+ row_id place player country score to par money
334
+ 0 t1 larry mize united states 70 + 72 + 72 + 71 = 285 -3 playoff
335
+ 1 t1 seve ballesteros spain 73 + 71 + 70 + 71 = 285 -3 playoff
336
+ 2 t1 greg norman australia 73 + 74 + 66 + 72 = 285 -3 playoff
337
+ */
338
+ Q: most of the people who play for the 1987 master tournament be spanish
339
+ NeuralSQL: SELECT (SELECT(SELECT COUNT(*) FROM w WHERE country = 'spain') / (SELECT COUNT(*) FROM w)) > 0.5
340
+
341
+
342
+ CREATE TABLE 1976 world junior figure skating championships(
343
+ row_id int,
344
+ rank int,
345
+ name text,
346
+ nation text,
347
+ points real,
348
+ places int)
349
+ /*
350
+ 3 example rows:
351
+ SELECT * FROM w LIMIT 3;
352
+ row_id rank name nation points places
353
+ 0 1 sherri baier / robin cowan canada 128.39 9
354
+ 1 2 lorene mitchell / donald mitchell united states 124.94 16
355
+ 2 3 elizabeth cain / peter cain australia 116.67 33
356
+ */
357
+ Q: 2 of the 7 top - ranked figure skate team be from france
358
+ NeuralSQL: SELECT (SELECT (SELECT COUNT(*) FROM w) = 7) AND (SELECT (SELECT COUNT(*) FROM w WHERE nation = 'france') = 2)
templates/prompts/prompt_tab_fact_word.txt ADDED
@@ -0,0 +1,228 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Generate answer given the question and table to answer the question correctly.
2
+
3
+ CREATE TABLE 1981 oakland raiders season(
4
+ row_id int,
5
+ round int,
6
+ overall int,
7
+ player text,
8
+ position text,
9
+ college text)
10
+ /*
11
+ All rows of the table:
12
+ SELECT * FROM w;
13
+ row_id round overall player position college
14
+ 0 1 21 ted watts cb texas tech
15
+ 1 1 23 curt marsh ot washington
16
+ 2 2 48 howie long de villanova
17
+ 3 4 111 johnny robinson dt louisiana tech
18
+ 4 5 118 james davis cb southern
19
+ 5 9 248 curt mohl ot ucla
20
+ 6 10 276 frank hawkins hb nevada
21
+ 7 11 304 chester willis hb auburn
22
+ 8 12 332 phil nelson te delaware
23
+ */
24
+ Q: the oakland raider drafter more defensive player than offensive player in the 1981 nlf draft
25
+ A: entailed
26
+
27
+
28
+ CREATE TABLE 1981 oakland raiders season(
29
+ row_id int,
30
+ round int,
31
+ overall int,
32
+ player text,
33
+ position text,
34
+ college text)
35
+ /*
36
+ All rows of the table:
37
+ SELECT * FROM w;
38
+ row_id round overall player position college
39
+ 0 1 21 ted watts cb texas tech
40
+ 1 1 23 curt marsh ot washington
41
+ 2 2 48 howie long de villanova
42
+ 3 4 111 johnny robinson dt louisiana tech
43
+ 4 5 118 james davis cb southern
44
+ 5 9 248 curt mohl ot ucla
45
+ 6 10 276 frank hawkins hb nevada
46
+ 7 11 304 chester willis hb auburn
47
+ 8 12 332 phil nelson te delaware
48
+ */
49
+ Q: the raider pick up halfback in back to back round in the 1981 nfl draft
50
+ A: refuted
51
+
52
+
53
+ CREATE TABLE wake forest demon deacons football , 1980 - 89(
54
+ row_id int,
55
+ date text,
56
+ opponent text,
57
+ location text,
58
+ result text,
59
+ attendance int)
60
+ /*
61
+ All rows of the table:
62
+ SELECT * FROM w;
63
+ row_id date opponent location result attendance
64
+ 0 1988-09-03 00:00:00 villanova villanova stadium villanova , pa w 31 - 11 11624
65
+ 1 1988-09-10 00:00:00 illinois state groves stadium winston - salem , nc w 35 - 0 22250
66
+ 2 1988-09-17 00:00:00 north carolina state carter - finley stadium raleigh , nc l 6 - 14 48000
67
+ 3 1988-09-24 00:00:00 19 michigan michigan stadium ann arbor , mi l 9 - 19 102776
68
+ 4 1988-10-08 00:00:00 north carolina groves stadium winston - salem , nc w 42 - 24 33500
69
+ 5 1988-10-15 00:00:00 maryland byrd stadium college park , md w 27 - 24 41278
70
+ 6 1988-10-22 00:00:00 virginia groves stadium winston - salem , nc l 14 - 34 21300
71
+ 7 1988-10-29 00:00:00 15 clemson groves stadium winston - salem , nc l 21 - 38 27300
72
+ 8 1988-11-05 00:00:00 duke wallace wade stadium durham , nc w 35 - 16 35500
73
+ 9 1988-11-12 00:00:00 georgia tech groves stadium winston - salem , nc w 28 - 24 21500
74
+ 10 1988-11-19 00:00:00 appalachian state groves stadium winston - salem , nc t 34 - 34 21050
75
+ */
76
+ Q: the demon deacon finish the season with a 6 - 4 - 1 record
77
+ A: refuted
78
+
79
+
80
+ CREATE TABLE wake forest demon deacons football , 1980 - 89(
81
+ row_id int,
82
+ date text,
83
+ opponent text,
84
+ location text,
85
+ result text,
86
+ attendance int)
87
+ /*
88
+ All rows of the table:
89
+ SELECT * FROM w;
90
+ row_id date opponent location result attendance
91
+ 0 1988-09-03 00:00:00 villanova villanova stadium villanova , pa w 31 - 11 11624
92
+ 1 1988-09-10 00:00:00 illinois state groves stadium winston - salem , nc w 35 - 0 22250
93
+ 2 1988-09-17 00:00:00 north carolina state carter - finley stadium raleigh , nc l 6 - 14 48000
94
+ 3 1988-09-24 00:00:00 19 michigan michigan stadium ann arbor , mi l 9 - 19 102776
95
+ 4 1988-10-08 00:00:00 north carolina groves stadium winston - salem , nc w 42 - 24 33500
96
+ 5 1988-10-15 00:00:00 maryland byrd stadium college park , md w 27 - 24 41278
97
+ 6 1988-10-22 00:00:00 virginia groves stadium winston - salem , nc l 14 - 34 21300
98
+ 7 1988-10-29 00:00:00 15 clemson groves stadium winston - salem , nc l 21 - 38 27300
99
+ 8 1988-11-05 00:00:00 duke wallace wade stadium durham , nc w 35 - 16 35500
100
+ 9 1988-11-12 00:00:00 georgia tech groves stadium winston - salem , nc w 28 - 24 21500
101
+ 10 1988-11-19 00:00:00 appalachian state groves stadium winston - salem , nc t 34 - 34 21050
102
+ */
103
+ Q: wake forest lose the only 5 game they play versus rank opponent
104
+ A: entailed
105
+
106
+
107
+ CREATE TABLE 2008 women 's british open(
108
+ row_id int,
109
+ place text,
110
+ player text,
111
+ country text,
112
+ score int,
113
+ to par int)
114
+ /*
115
+ All rows of the table:
116
+ SELECT * FROM w;
117
+ row_id place player country score to par
118
+ 0 1 juli inkster united states 65 7
119
+ 1 t2 momoko ueda japan 66 6
120
+ 2 t2 laura diaz united states 66 6
121
+ 3 t2 ji young oh south korea 66 6
122
+ 4 t2 yuri fudoh japan 66 6
123
+ 5 t2 johanna head england 66 6
124
+ 6 t2 stacy prammanasudh united states 66 6
125
+ 7 t2 jiyai shin south korea 66 6
126
+ 8 t9 kristy mcpherson united states 67 5
127
+ 9 t9 karen stupples england 67 5
128
+ 10 t9 rebecca hudson england 67 5
129
+ 11 t9 sherri steinhauer united states 67 5
130
+ */
131
+ Q: 7 of the player have an identical score in the 2008 woman 's british open
132
+ A: refuted
133
+
134
+
135
+ CREATE TABLE 2008 women 's british open(
136
+ row_id int,
137
+ place text,
138
+ player text,
139
+ country text,
140
+ score int,
141
+ to par int)
142
+ /*
143
+ All rows of the table:
144
+ SELECT * FROM w;
145
+ row_id place player country score to par
146
+ 0 1 juli inkster united states 65 7
147
+ 1 t2 momoko ueda japan 66 6
148
+ 2 t2 laura diaz united states 66 6
149
+ 3 t2 ji young oh south korea 66 6
150
+ 4 t2 yuri fudoh japan 66 6
151
+ 5 t2 johanna head england 66 6
152
+ 6 t2 stacy prammanasudh united states 66 6
153
+ 7 t2 jiyai shin south korea 66 6
154
+ 8 t9 kristy mcpherson united states 67 5
155
+ 9 t9 karen stupples england 67 5
156
+ 10 t9 rebecca hudson england 67 5
157
+ 11 t9 sherri steinhauer united states 67 5
158
+ */
159
+ Q: there be 3 player total from the united state
160
+ A: entailed
161
+
162
+
163
+ CREATE TABLE espn sunday night football results (1987 - 2005)(
164
+ row_id int,
165
+ date text,
166
+ visiting team text,
167
+ final score text,
168
+ host team text,
169
+ stadium text)
170
+ /*
171
+ All rows of the table:
172
+ SELECT * FROM w;
173
+ row_id date visiting team final score host team stadium
174
+ 0 september 11 indianapolis colts 24 - 7 baltimore ravens m&t bank stadium
175
+ 1 september 18 kansas city chiefs 23 - 17 oakland raiders mcafee coliseum
176
+ 2 september 25 new york giants 23 - 45 san diego chargers qualcomm stadium
177
+ 3 october 2 san francisco 49ers 14 - 31 arizona cardinals estadio azteca
178
+ 4 october 9 cincinnati bengals 20 - 23 jacksonville jaguars alltel stadium
179
+ 5 october 16 houston texans 10 - 42 seattle seahawks qwest field
180
+ 6 october 30 buffalo bills 16 - 21 new england patriots gillette stadium
181
+ 7 november 6 philadelphia eagles 10 - 17 washington redskins fedex field
182
+ 8 november 13 cleveland browns 21 - 34 pittsburgh steelers heinz field
183
+ 9 november 20 kansas city chiefs 45 - 17 houston texans reliant stadium
184
+ 10 november 27 new orleans saints 21 - 19 new york jets giants stadium
185
+ 11 december 4 oakland raiders 10 - 34 san diego chargers qualcomm stadium
186
+ 12 december 11 detroit lions 13 - 16 green bay packers lambeau field
187
+ 13 december 17 denver broncos 28 - 17 buffalo bills ralph wilson stadium
188
+ 14 december 18 atlanta falcons 3 - 16 chicago bears soldier field
189
+ 15 december 25 minnesota vikings 23 - 30 baltimore ravens m&t bank stadium
190
+ 16 december 31 new york giants 30 - 21 oakland raiders mcafee coliseum
191
+ 17 january 1 (2006) st louis rams 20 - 10 dallas cowboys texas stadium
192
+ */
193
+ Q: the visiting team be the new york giant on new year 's eve , and st louis ram on new year 's day
194
+ A: refuted
195
+
196
+
197
+ CREATE TABLE espn sunday night football results (1987 - 2005)(
198
+ row_id int,
199
+ date text,
200
+ visiting team text,
201
+ final score text,
202
+ host team text,
203
+ stadium text)
204
+ /*
205
+ All rows of the table:
206
+ SELECT * FROM w;
207
+ row_id date visiting team final score host team stadium
208
+ 0 september 11 indianapolis colts 24 - 7 baltimore ravens m&t bank stadium
209
+ 1 september 18 kansas city chiefs 23 - 17 oakland raiders mcafee coliseum
210
+ 2 september 25 new york giants 23 - 45 san diego chargers qualcomm stadium
211
+ 3 october 2 san francisco 49ers 14 - 31 arizona cardinals estadio azteca
212
+ 4 october 9 cincinnati bengals 20 - 23 jacksonville jaguars alltel stadium
213
+ 5 october 16 houston texans 10 - 42 seattle seahawks qwest field
214
+ 6 october 30 buffalo bills 16 - 21 new england patriots gillette stadium
215
+ 7 november 6 philadelphia eagles 10 - 17 washington redskins fedex field
216
+ 8 november 13 cleveland browns 21 - 34 pittsburgh steelers heinz field
217
+ 9 november 20 kansas city chiefs 45 - 17 houston texans reliant stadium
218
+ 10 november 27 new orleans saints 21 - 19 new york jets giants stadium
219
+ 11 december 4 oakland raiders 10 - 34 san diego chargers qualcomm stadium
220
+ 12 december 11 detroit lions 13 - 16 green bay packers lambeau field
221
+ 13 december 17 denver broncos 28 - 17 buffalo bills ralph wilson stadium
222
+ 14 december 18 atlanta falcons 3 - 16 chicago bears soldier field
223
+ 15 december 25 minnesota vikings 23 - 30 baltimore ravens m&t bank stadium
224
+ 16 december 31 new york giants 30 - 21 oakland raiders mcafee coliseum
225
+ 17 january 1 (2006) st louis rams 20 - 10 dallas cowboys texas stadium
226
+ */
227
+ Q: the indianapolis colt and kansas city chief be the only visit team in september
228
+ A: entailed
templates/prompts/prompt_wikitq_puresql_v3.txt ADDED
@@ -0,0 +1,287 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Generate SQL given the question and table to answer the question correctly.
2
+
3
+ CREATE TABLE Fabrice Santoro(
4
+ row_id int,
5
+ name text,
6
+ 2001 text,
7
+ 2002 text,
8
+ 2003 text,
9
+ 2004 text,
10
+ 2005 text,
11
+ 2006 text,
12
+ 2007 text,
13
+ 2008 text,
14
+ 2009 text,
15
+ 2010 text,
16
+ career\nsr text,
17
+ wins int)
18
+ /*
19
+ 3 example rows:
20
+ SELECT * FROM w LIMIT 3;
21
+ row_id name 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 career\nsr wins
22
+ 0 australian open 2r 1r 3r 2r 1r qf 3r 2r 3r 1r 0 / 18 22
23
+ 1 french open 4r 2r 2r 3r 1r 1r 1r 2r 1r a 0 / 20 17
24
+ 2 wimbledon 3r 2r 2r 2r 2r 2r 2r 1r 2r a 0 / 14 11
25
+ */
26
+ Q: did he win more at the australian open or indian wells?
27
+ SQL: SELECT name FROM w WHERE name IN ('australian open', 'indian wells') ORDER BY wins DESC LIMIT 1
28
+
29
+
30
+ CREATE TABLE 2007 New Orleans Saints season(
31
+ row_id int,
32
+ week int,
33
+ date text,
34
+ opponent text,
35
+ time text,
36
+ game site text,
37
+ tv text,
38
+ result text,
39
+ record text)
40
+ /*
41
+ 3 example rows:
42
+ SELECT * FROM w LIMIT 3;
43
+ row_id week date opponent time game site tv result record
44
+ 0 1 2007-9-6 indianapolis colts t20:30 edt rca dome nbc l 0–1
45
+ 1 2 2007-9-16 tampa bay buccaneers t13:0 edt raymond james stadium fox l 0–2
46
+ 2 3 2007-9-24 tennessee titans t20:30 edt louisiana superdome espn l 0–3
47
+ */
48
+ Q: what number of games were lost at home?
49
+ SQL: SELECT COUNT(*) FROM w WHERE result = 'l' AND `game site` = 'louisiana superdome'
50
+
51
+
52
+ CREATE TABLE 2007 New Orleans Saints season(
53
+ row_id int,
54
+ week int,
55
+ date text,
56
+ opponent text,
57
+ time text,
58
+ game site text,
59
+ tv text,
60
+ result/score text,
61
+ record text)
62
+ /*
63
+ 3 example rows:
64
+ SELECT * FROM w LIMIT 3;
65
+ row_id week date opponent time game site tv result/score record
66
+ 0 1 2007-9-6 indianapolis colts t20:30 edt away nbc loss 0–1
67
+ 1 2 2007-9-16 tampa bay buccaneers t13:0 edt home fox win 1-1
68
+ 2 3 2007-9-24 tennessee titans t20:30 edt away espn loss 1-2
69
+ */
70
+ Q: what number of games were lost at home?
71
+ SQL: SELECT COUNT(*) FROM w WHERE `result/score` = 'loss' AND `game site` = 'home'
72
+
73
+
74
+ CREATE TABLE Electricity in Sri Lanka(
75
+ row_id int,
76
+ filledcolumnname text,
77
+ 2005 int,
78
+ 2006 int,
79
+ 2007 int,
80
+ 2008 int,
81
+ 2009 int,
82
+ 2010 int,
83
+ 2011 int,
84
+ 2012 int)
85
+ /*
86
+ 3 example rows:
87
+ SELECT * FROM w LIMIT 3;
88
+ row_id filledcolumnname 2005 2006 2007 2008 2009 2010 2011 2012
89
+ 0 hydro power 1293 1316 1326 1357 1379 1382 1401 1584
90
+ 1 thermal 1155 1155 1155 1285 1290 1390 1690 1638
91
+ 2 other renewables 3 3 3 3 15 45 50 90
92
+ */
93
+ Q: did the hydro power increase or decrease from 2010 to 2012?
94
+ SQL: SELECT CASE WHEN (SELECT `2010` FROM w WHERE filledcolumnname = 'hydro power') < (SELECT `2012` FROM w WHERE filledcolumnname = 'hydro power') THEN 'increase' ELSE 'decrease' END
95
+
96
+
97
+ CREATE TABLE Portugal in the Eurovision Song Contest 1979(
98
+ row_id int,
99
+ draw int,
100
+ artist text,
101
+ song text,
102
+ points int,
103
+ place text)
104
+ /*
105
+ 3 example rows:
106
+ SELECT * FROM w LIMIT 3;
107
+ row_id draw artist song points place
108
+ 0 1 gonzaga coutinho "tema para um homem só" 102 5th
109
+ 1 2 pedro osório s.a.r.l. "uma canção comercial" 123 3rd
110
+ 2 3 concha "qualquer dia, quem diria" 78 6th
111
+ */
112
+ Q: who was the last draw?
113
+ SQL: SELECT `artist` FROM w ORDER by `draw` desc LIMIT 1
114
+
115
+
116
+ CREATE TABLE GER Class N31(
117
+ row_id int,
118
+ year int,
119
+ order text,
120
+ quantity int,
121
+ ger nos. text)
122
+ /*
123
+ 3 example rows:
124
+ SELECT * FROM w LIMIT 3;
125
+ row_id year order quantity ger nos.
126
+ 0 1893 n31 1 999
127
+ 1 1893 h33 10 979
128
+ 2 1894 l33 10 989
129
+ */
130
+ Q: which had more ger numbers, 1898 or 1893?
131
+ SQL: SELECT `year` FROM w WHERE `year` IN ( '1898' , '1893' ) GROUP by `year` ORDER by SUM (`ger nos.`) desc LIMIT 1
132
+
133
+
134
+ CREATE TABLE List of spans(
135
+ row_id int,
136
+ tramway text,
137
+ country text,
138
+ city text,
139
+ height of pylons text,
140
+ span width,\nleaning straight line text,
141
+ span width,\nhorizontal measurement text,
142
+ height of cable over ground text,
143
+ year of inauguration text,
144
+ notes text)
145
+ /*
146
+ 3 example rows:
147
+ SELECT * FROM w LIMIT 3;
148
+ row_id tramway country city height of pylons span width,\nleaning straight line span width,\nhorizontal measurement height of cable over ground year of inauguration notes
149
+ 0 peak 2 peak gondola canada whistler 65m 3024 m 3019 m 436 m 2008 3s aerial tramway constructed by doppelmayr
150
+ 1 hut of regensburg material transport aerial railway austria falbeson ? ? ? 430 m ? none
151
+ 2 vanoise express france vanoise none 1850 m 1800 m 380 m 2003 none
152
+ */
153
+ Q: was the sandia peak tramway innagurate before or after the 3s aerial tramway?
154
+ SQL: SELECT ( SELECT `year of inauguration` FROM w WHERE `tramway` = 'sandia peak tramway' ) < ( SELECT `year of inauguration` FROM w WHERE `tramway` = '3s aerial tramway' )
155
+
156
+
157
+ CREATE TABLE World Artistic Gymnastics Championships – Women's floor(
158
+ id int,
159
+ year int,
160
+ location text,
161
+ gold text,
162
+ silver text,
163
+ bronze text)
164
+ /*
165
+ 3 example rows:
166
+ SELECT * FROM w LIMIT 3;
167
+ id year location gold silver bronze
168
+ 0 1950 basel helena rakoczy tereza kočiš stefania reindlova
169
+ 1 1954 rome tamara manina eva bosáková maria gorokovskaya
170
+ 2 1958 moscow eva bosáková larisa latynina keiko tanaka
171
+ */
172
+ Q: where were the championships held before the 1962 prague championships?
173
+ SQL: SELECT `location` FROM w WHERE `year` < 1962 ORDER by `year` desc LIMIT 1
174
+
175
+
176
+ CREATE TABLE WSL World Heavyweight Championship(
177
+ id int,
178
+ wrestler: text,
179
+ times: text,
180
+ date: text,
181
+ location: text,
182
+ notes: text)
183
+ /*
184
+ 3 example rows:
185
+ SELECT * FROM w LIMIT 3;
186
+ id wrestler: times: date: location: notes:
187
+ 0 jonnie stewart 1 1996-6-6 rochester, minnesota defeated larry gligorovich to win the awa superstars of wrestling world heavyweight championship.
188
+ 1 king kong bundy 1 1999-3-31 oshkosh, wisconsin later stripped of the title by owner dale gagne.
189
+ 2 the patriot; (danny dominion) 1 2000-7-29 pine bluff, arkansas defeated dale gagne in an impromptu match to win the title.
190
+ */
191
+ Q: when did steve corino win his first wsl title?
192
+ SQL: SELECT `date:` FROM w WHERE `wrestler:` = 'steve corino' ORDER by `date:` LIMIT 1
193
+
194
+
195
+ CREATE TABLE Płock Governorate(
196
+ row_id int,
197
+ language text,
198
+ number int,
199
+ percentage (%) text,
200
+ males int,
201
+ females int)
202
+ /*
203
+ 3 example rows:
204
+ SELECT * FROM w LIMIT 3;
205
+ row_id language number percentage (%) males females
206
+ 0 polish 447685 80.86 216794 230891
207
+ 1 yiddish 51215 9.25 24538 26677
208
+ 2 german 35931 6.49 17409 18522
209
+ */
210
+ Q: how many male and female german speakers are there?
211
+ SQL: SELECT `males` + `females` FROM w WHERE `language` = 'german'
212
+
213
+
214
+ CREATE TABLE Shikoku Pilgrimage(
215
+ row_id int,
216
+ no. int,
217
+ temple text,
218
+ honzon (main image) text,
219
+ city/town/village text,
220
+ prefecture text)
221
+ /*
222
+ 3 example rows:
223
+ SELECT * FROM w LIMIT 3;
224
+ row_id no. temple honzon (main image) city/town/village prefecture
225
+ 0 1 ryōzen-ji (霊山寺) shaka nyorai naruto tokushima prefecture
226
+ 1 2 gokuraku-ji (極楽寺) amida nyorai naruto tokushima prefecture
227
+ 2 3 konsen-ji (金泉寺) shaka nyorai itano tokushima prefecture
228
+ */
229
+ Q: what is the difference in the number of temples between imabari and matsuyama?
230
+ SQL: SELECT abs ( ( SELECT COUNT ( `temple` ) FROM w WHERE `city/town/village` = 'imabari' ) - ( SELECT COUNT ( `temple` ) FROM w WHERE `city/town/village` = 'matsuyama' ) )
231
+
232
+
233
+ CREATE TABLE Athletics at the 2001 Goodwill Games – Results(
234
+ row_id int,
235
+ rank real,
236
+ name text,
237
+ nationality text,
238
+ time text)
239
+ /*
240
+ 3 example rows:
241
+ SELECT * FROM w LIMIT 3;
242
+ row_id rank name nationality time
243
+ 0 nan brahim boulami morocco 2022-07-17 08:17:43
244
+ 1 nan reuben kosgei kenya 2022-07-17 08:18:37
245
+ 2 nan stephen cherono kenya 2022-07-17 08:19:58
246
+ */
247
+ Q: what counties had the least participants for the race?
248
+ SQL: SELECT `nationality` FROM w GROUP by `nationality` having COUNT ( `name` ) = ( SELECT COUNT ( `name` ) FROM w GROUP by `nationality` ORDER by COUNT ( `name` ) asc LIMIT 1 )
249
+
250
+
251
+ CREATE TABLE Saint Helena, Ascension and Tristan da Cunha(
252
+ row_id int,
253
+ administrative\narea text,
254
+ area\nkm2 real,
255
+ area\nsq mi int,
256
+ population int,
257
+ administrative\ncentre text)
258
+ /*
259
+ 3 example rows:
260
+ SELECT * FROM w LIMIT 3;
261
+ row_id administrative\narea area\nkm2 area\nsq mi population administrative\ncentre
262
+ 0 saint helena 122.0 47 5809 jamestown
263
+ 1 ascension island 91.0 35 1532 georgetown
264
+ 2 tristan da cunha 184.0 71 388 edinburgh of the 7 seas
265
+ */
266
+ Q: is the are of saint helena more than that of nightingale island?
267
+ SQL: SELECT ( SELECT `area\\nkm2` FROM w WHERE `administrative\\narea` = 'saint helena' ) > ( SELECT `area\\nkm2` FROM w WHERE `administrative\\narea` = 'nightingale island' )
268
+
269
+
270
+ CREATE TABLE The Boys (comics)(
271
+ row_id int,
272
+ # int,
273
+ title text,
274
+ tpb isbn text,
275
+ tpb release date text,
276
+ tpb page number int,
277
+ collected material text)
278
+ /*
279
+ 3 example rows:
280
+ SELECT * FROM w LIMIT 3;
281
+ row_id # title tpb isbn tpb release date tpb page number collected material
282
+ 0 1 the name of the game isbn 91-33-30546-3 2007-06-01 00:00:00 152 the boys #1-6
283
+ 1 2 get some isbn 1-933305-68-1 2008-03-01 00:00:00 192 the boys #7–14
284
+ 2 3 good for the soul isbn 1-933305-92-4 2008-10-01 00:00:00 192 the boys #15-22
285
+ */
286
+ Q: what title appears before "the self-preservation society"?
287
+ SQL: SELECT `title` FROM w WHERE row_id = ( SELECT row_id FROM w WHERE `title` = 'the self-preservation society' ) - 1
templates/prompts/prompt_wikitq_python_simplified_v4.txt ADDED
@@ -0,0 +1,426 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Generate Python given the question and table to answer the question correctly.
2
+ If question-relevant column(s) contents require external knowledge or unsupported Python grammar, map it to a new column by calling function qa_map(table, question, column(s)).
3
+ The `qa_map()` function definition is listed to help know its functionality better:
4
+ <code>
5
+ def qa_map(db: pd.DataFrame, question: str, columns: List[str]) -> pd.DataFrame:
6
+ qa_model = OpenAIQAModel()
7
+ new_db = NeuralDB([{"title": "", "table": {"header": db.columns.values.tolist(), "rows": db.values.tolist()}}])
8
+ sql_executed_sub_tables = []
9
+ for column in columns:
10
+ column = f"`{column}`"
11
+ sql_executed_sub_tables.append(new_db.execute_query(column))
12
+ sub_table = qa_model.qa(question, sql_executed_sub_tables,)
13
+ new_db.add_subtable(sub_table, verbose=verbose)
14
+ table = new_db.get_table()
15
+ return pd.DataFrame(table["rows"], columns=table["header"])
16
+ </code>
17
+
18
+ Here are some examples.
19
+
20
+ CREATE TABLE Fabrice Santoro(
21
+ row_id int,
22
+ name text,
23
+ 2001 text,
24
+ 2002 text,
25
+ 2003 text,
26
+ 2004 text,
27
+ 2005 text,
28
+ 2006 text,
29
+ 2007 text,
30
+ 2008 text,
31
+ 2009 text,
32
+ 2010 text,
33
+ career\nsr text,
34
+ career\nwin-loss text)
35
+ /*
36
+ 3 example rows:
37
+ SELECT * FROM w LIMIT 3;
38
+ row_id name 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 career\nsr career\nwin-loss
39
+ 0 australian open 2r 1r 3r 2r 1r qf 3r 2r 3r 1r 0 / 18 22–18
40
+ 1 french open 4r 2r 2r 3r 1r 1r 1r 2r 1r a 0 / 20 17–20
41
+ 2 wimbledon 3r 2r 2r 2r 2r 2r 2r 1r 2r a 0 / 14 11–14
42
+ */
43
+ Q: did he win more at the australian open or indian wells?
44
+ NeuralPython:
45
+ def solve(table: pd.DataFrame):
46
+ table = qa_map(table, "how many wins?", ["career\\nwin-loss"])
47
+ sub_table = table[(table['name'] == 'australian open') | (table['name'] == 'indian wells')]
48
+ tmp = [(x, y) for x, y in zip(sub_table['name'], sub_table['how many wins?'])]
49
+ tmp = sorted(tmp, key=lambda x: x[1], reverse=True)
50
+ result = list(map(lambda x: x[0], tmp))[0]
51
+ return result
52
+
53
+
54
+ CREATE TABLE 2007 New Orleans Saints season(
55
+ row_id int,
56
+ week int,
57
+ date text,
58
+ opponent text,
59
+ time text,
60
+ game site text,
61
+ tv text,
62
+ result text,
63
+ record text)
64
+ /*
65
+ 3 example rows:
66
+ SELECT * FROM w LIMIT 3;
67
+ row_id week date opponent time game site tv result record
68
+ 0 1 2007-9-6 indianapolis colts t20:30 edt rca dome nbc l 0–1
69
+ 1 2 2007-9-16 tampa bay buccaneers t13:0 edt raymond james stadium fox l 0–2
70
+ 2 3 2007-9-24 tennessee titans t20:30 edt louisiana superdome espn l 0–3
71
+ */
72
+ Q: what number of games were lost at home?
73
+ NeuralNeuralPython:
74
+ def solve(table: pd.DataFrame):
75
+ sub_table = table[(table['result'] == 'l') & (table['game site'] == 'louisiana superdome')]
76
+ result = len(sub_table)
77
+ return result
78
+
79
+
80
+ CREATE TABLE Electricity in Sri Lanka(
81
+ row_id int,
82
+ filledcolumnname text,
83
+ 2005 int,
84
+ 2006 int,
85
+ 2007 int,
86
+ 2008 int,
87
+ 2009 int,
88
+ 2010 int,
89
+ 2011 int,
90
+ 2012 int)
91
+ /*
92
+ 3 example rows:
93
+ SELECT * FROM w LIMIT 3;
94
+ row_id filledcolumnname 2005 2006 2007 2008 2009 2010 2011 2012
95
+ 0 hydro power 1293 1316 1326 1357 1379 1382 1401 1584
96
+ 1 thermal 1155 1155 1155 1285 1290 1390 1690 1638
97
+ 2 other renewables 3 3 3 3 15 45 50 90
98
+ */
99
+ Q: did the hydro power increase or decrease from 2010 to 2012?
100
+ NeuralPython:
101
+ def solve(table: pd.DataFrame):
102
+ result = table[table['filledcolumnname'] == 'hydro power']['2010'].values[0] - table[table['filledcolumnname'] == 'hydro power']['2012'].values[0]
103
+ if result > 0:
104
+ return 'decrease'
105
+ else:
106
+ return 'increase'
107
+
108
+
109
+ CREATE TABLE 2007 New Orleans Saints season(
110
+ row_id int,
111
+ week int,
112
+ date text,
113
+ opponent text,
114
+ time text,
115
+ game site text,
116
+ tv text,
117
+ result/score text,
118
+ record text)
119
+ /*
120
+ 3 example rows:
121
+ SELECT * FROM w LIMIT 3;
122
+ row_id week date opponent time game site tv result/score record
123
+ 0 1 2007-9-6 indianapolis colts t20:30 edt rca dome nbc l 41 – 10 0–1
124
+ 1 2 2007-9-16 tampa bay buccaneers t13:0 edt raymond james stadium fox l 31 – 14 0–2
125
+ 2 3 2007-9-24 tennessee titans t20:30 edt louisiana superdome espn l 31 – 14 0–3
126
+ */
127
+ Q: what number of games were lost at home?
128
+ NeuralPython:
129
+ def solve(table: pd.DataFrame):
130
+ table = qa_map(table, "is it a loss?", ["result/score"])
131
+ table = qa_map(table, "is it the home court of is it the home court of New Orleans Saints?", ["game site"])
132
+ sub_table = table[(table['is it a loss?'] == 'yes') & (table['is it the home court of is it the home court of New Orleans Saints?'] == 'yes')]
133
+ result = len(sub_table)
134
+ return result
135
+
136
+
137
+ CREATE TABLE Portugal in the Eurovision Song Contest 1979(
138
+ row_id int,
139
+ draw int,
140
+ artist text,
141
+ song text,
142
+ points int,
143
+ place text)
144
+ /*
145
+ 3 example rows:
146
+ SELECT * FROM w LIMIT 3;
147
+ row_id draw artist song points place
148
+ 0 1 gonzaga coutinho "tema para um homem só" 102 5th
149
+ 1 2 pedro osório s.a.r.l. "uma canção comercial" 123 3rd
150
+ 2 3 concha "qualquer dia, quem diria" 78 6th
151
+ */
152
+ Q: who was the last draw in the table?
153
+ NeuralPython:
154
+ def solve(table: pd.DataFrame):
155
+ sub_table = table
156
+ tmp = [(x, y) for x, y in zip(sub_table['artist'], sub_table['row_id'])]
157
+ tmp = sorted(tmp, key=lambda x: x[1], reverse=True)
158
+ result = list(map(lambda x: x[0], tmp))[0]
159
+ return result
160
+
161
+
162
+ CREATE TABLE Highest mountain peaks of California(
163
+ row_id int,
164
+ rank int,
165
+ mountain peak text,
166
+ mountain range text,
167
+ elevation text,
168
+ prominence text,
169
+ isolation text,
170
+ location text)
171
+ /*
172
+ 3 example rows:
173
+ SELECT * FROM w LIMIT 3;
174
+ row_id rank mountain peak mountain range elevation prominence isolation location
175
+ 0 1 mount whitney sierra nevada 14505 ft; 4421 m 10080 ft; 3072 m 1646 mi; 2649 km 36°34′43″n 118°17′31″w / 36.5786°n 118.292°w
176
+ 1 2 mount williamson sierra nevada 14379 ft; 4383 m 1677 ft; 511 m 5.4 mi; 8.7 km 36°39′21″n 118°18′40″w / 36.6559°n 118.3111°w
177
+ 2 3 white mountain peak white mountains 14252 ft; 4344 m 7196 ft; 2193 m 67 mi; 109 km 37°38′3″n 118°15′21″w / 37.6341°n 118.2557°w
178
+ */
179
+ Q: which mountain peak has a prominence more than 10,000 ft?
180
+ NeuralPython:
181
+ def solve(table: pd.DataFrame):
182
+ table = qa_map(table, "how many feet is the prominence?", ["prominence"])
183
+ sub_table = table[(table['how many feet is the prominence?'] > 10000)]
184
+ result = [x for x in sub_table['mountain peak']]
185
+ return result
186
+
187
+
188
+ CREATE TABLE List of spans(
189
+ row_id int,
190
+ tramway text,
191
+ country text,
192
+ city text,
193
+ height of pylons text,
194
+ span width,\nleaning straight line text,
195
+ span width,\nhorizontal measurement text,
196
+ height of cable over ground text,
197
+ year of inauguration text,
198
+ notes text)
199
+ /*
200
+ 3 example rows:
201
+ SELECT * FROM w LIMIT 3;
202
+ row_id tramway country city height of pylons span width,\nleaning straight line span width,\nhorizontal measurement notes
203
+ 0 peak 2 peak gondola canada whistler 65m 3024 m 3019 m 436 m 2008 3s aerial tramway constructed by doppelmayr
204
+ 1 hut of regensburg material transport aerial railway austria falbeson ? ? ? 430 m ? none
205
+ 2 vanoise express france vanoise none 1850 m 1800 m 380 m 2003 none
206
+ */
207
+ Q: was the sandia peak tramway innagurate before or after the 3s aerial tramway?
208
+ NeuralPython:
209
+ def solve(table: pd.DataFrame):
210
+ result = table[table['tramway'] == 'sandia peak tramway']['year of inauguration'].values[0] - table[table['tramway'] == '3s aerial tramway']['year of inauguration'].values[0]
211
+ if result > 0:
212
+ return 'after'
213
+ else:
214
+ return 'before'
215
+
216
+
217
+ CREATE TABLE WSL World Heavyweight Championship(
218
+ id int,
219
+ wrestler: text,
220
+ times: text,
221
+ date: text,
222
+ location: text,
223
+ notes: text)
224
+ /*
225
+ 3 example rows:
226
+ SELECT * FROM w LIMIT 3;
227
+ id wrestler: times: date: location: notes:
228
+ 0 jonnie stewart 1 1996-6-6 rochester, minnesota defeated larry gligorovich to win the awa superstars of wrestling world heavyweight championship.
229
+ 1 king kong bundy 1 1999-3-31 oshkosh, wisconsin later stripped of the title by owner dale gagne.
230
+ 2 the patriot; (danny dominion) 1 2000-7-29 pine bluff, arkansas defeated dale gagne in an impromptu match to win the title.
231
+ */
232
+ Q: when did steve corino win his first wsl title?
233
+ NeuralPython:
234
+ sub_table = table[(table['wrestler:'] == 'steve corino')]
235
+ tmp = [x for x in sub_table['date:']]
236
+ tmp = sorted(tmp, reverse=False)
237
+ result = tmp[0]
238
+ return result
239
+
240
+
241
+ CREATE TABLE Shikoku Pilgrimage(
242
+ row_id int,
243
+ no. int,
244
+ temple text,
245
+ honzon (main image) text,
246
+ city/town/village text,
247
+ prefecture text)
248
+ /*
249
+ 3 example rows:
250
+ SELECT * FROM w LIMIT 3;
251
+ row_id no. temple honzon (main image) city/town/village prefecture
252
+ 0 1 ryōzen-ji (霊山寺) shaka nyorai naruto tokushima prefecture
253
+ 1 2 gokuraku-ji (極楽寺) amida nyorai naruto tokushima prefecture
254
+ 2 3 konsen-ji (金泉寺) shaka nyorai itano tokushima prefecture
255
+ */
256
+ Q: what is the difference in the number of temples between imabari and matsuyama?
257
+ NeuralPython:
258
+ def solve(table: pd.DataFrame):
259
+ sub_table1 = table[(table['city/town/village'] == 'imabari')]
260
+ sub_table2 = table[(table['city/town/village'] == 'matsuyama')]
261
+ result = math.abs(len(sub_table1) - len(sub_table2))
262
+ return result
263
+
264
+
265
+ CREATE TABLE FC Seoul in Asian football(
266
+ row_id int,
267
+ # int,
268
+ season int,
269
+ competition text,
270
+ date text,
271
+ round text,
272
+ opponent text,
273
+ h / a text,
274
+ result text,
275
+ scorer (s) text)
276
+ /*
277
+ 3 example rows:
278
+ SELECT * FROM w LIMIT 3;
279
+ row_id # season competition date round opponent h / a result scorer (s)
280
+ 0 35 2011 afc; champions league 2011-03-02 00:00:00 group stage al-ain a 1–0 s : dejan damjanović
281
+ 1 36 2011 afc; champions league 2011-03-15 00:00:00 group stage hangzhou greentown h 3–0 s : dejan damjanović, ou kyoung-jun, mauricio molina
282
+ 2 37 2011 afc; champions league 2011-04-06 00:00:00 group stage nagoya grampus a 1–1 s : choi hyun-tae; n : kensuke nagai
283
+ */
284
+ Q: how many consecutive games did dejan damjanovic score a goal in during the 2013 season?
285
+ NeuralPython:
286
+ def solve(table: pd.DataFrame):
287
+ result = 0
288
+ consecutive_flag = False
289
+ for row_id, row in table.iterrows():
290
+ if row['season'] == 2013 and row['scorer (s)'] == 'dejan damjanović':
291
+ result += 1
292
+ consecutive_flag = True
293
+ elif consecutive_flag:
294
+ break
295
+ return result
296
+
297
+
298
+ CREATE TABLE Płock Governorate(
299
+ row_id int,
300
+ language text,
301
+ number int,
302
+ percentage (%) text,
303
+ males int,
304
+ females int)
305
+ /*
306
+ 3 example rows:
307
+ SELECT * FROM w LIMIT 3;
308
+ row_id language number percentage (%) males females
309
+ 0 polish 447685 80.86 216794 230891
310
+ 1 yiddish 51215 9.25 24538 26677
311
+ 2 german 35931 6.49 17409 18522
312
+ */
313
+ Q: how many male and female german speakers are there?
314
+ NeuralPython:
315
+ def solve(table: pd.DataFrame):
316
+ sub_table = table[(table['language'] == 'german')]
317
+ result = sum([x + y for x, y in zip(sub_table['males'], sub_table['females'])])
318
+ return result
319
+
320
+
321
+ CREATE TABLE Saint Helena, Ascension and Tristan da Cunha(
322
+ row_id int,
323
+ administrative\narea text,
324
+ area\nkm2 real,
325
+ area\nsq mi int,
326
+ population int,
327
+ administrative\ncentre text)
328
+ /*
329
+ 3 example rows:
330
+ SELECT * FROM w LIMIT 3;
331
+ row_id administrative\narea area\nkm2 area\nsq mi population administrative\ncentre
332
+ 0 saint helena 122.0 47 5809 jamestown
333
+ 1 ascension island 91.0 35 1532 georgetown
334
+ 2 tristan da cunha 184.0 71 388 edinburgh of the 7 seas
335
+ */
336
+ Q: is the are of saint helena more than that of nightingale island?
337
+ NeuralPython:
338
+ def solve(table: pd.DataFrame):
339
+ result = table[table['administrative\\narea'] == 'saint helena']['area\\nkm2'].values[0] - table[table['administrative\\narea'] == 'nightingale island']['area\\nkm2'].values[0]
340
+ if result > 0:
341
+ return 'yes'
342
+ else:
343
+ return 'no'
344
+
345
+
346
+ CREATE TABLE List of political parties in Japan(
347
+ row_id int,
348
+ party text,
349
+ diet representation\nrepresentatives int,
350
+ diet representation\ncouncillors int,
351
+ party leader(s) text,
352
+ comments text)
353
+ /*
354
+ 3 example rows:
355
+ SELECT * FROM w LIMIT 3;
356
+ row_id party diet representation\nrepresentatives diet representation\ncouncillors party leader(s) comments
357
+ 0 your party (yp); minna no tō みんなの党; ("everybody's party") 18 18 yoshimi watanabe reps. conservative liberalism, neoliberalism, economic liberalism, libertarianism, anti-nuclear
358
+ 1 japanese communist party (jcp); nihon kyōsan-tō 日本共産党 8 11 kazuo shii reps. the japanese communist party is japan's oldest party. it was formed in 1922 as an underground organization in the empire of japan, but was legalized after world war ii during the occupation. it used to be a communist party, but the party has past_ref shifted to a socialist party.
359
+ 2 people's life party (plp); seikatsu no tō 生活の党 7 2 ichirō ozawa reps. life party was founded by ichirō ozawa and 14 other diet members who were in the 2022-7-4 party of japan after a leadership dispute between ozawa and yukiko kada.
360
+ */
361
+ Q: what party is listed previous to the new renaissance party?
362
+ NeuralPython:
363
+ def solve(table: pd.DataFrame):
364
+ result = []
365
+ for row_id, row in table.iterrows():
366
+ if row['party'] == 'new renaissance party':
367
+ result.append(table.iloc[row_id - 1]['party'])
368
+ return result
369
+
370
+
371
+ CREATE TABLE 1975 24 Hours of Le Mans(
372
+ row_id int,
373
+ pos int,
374
+ class text,
375
+ no int,
376
+ team text,
377
+ drivers text,
378
+ chassis text,
379
+ engine text)
380
+ /*
381
+ 3 example rows:
382
+ SELECT * FROM w LIMIT 3;
383
+ row_id pos class no team drivers chassis engine
384
+ 0 32 s; 2 27 société roc laurent ferrier; xavier lapeyre; christian ethuin lola t294 roc-simca 2.0l i4
385
+ 1 33 s; 2 29 société roc pierre-marie painvin; franz hummel lola t292 roc-simca 2.0l i4
386
+ 2 34 s; 3 3 christian poirot christian poirot; gérard cuynet; guillermo ortega; jean-claude lagniez porsche 454 porsche 3.0l flat-8
387
+ */
388
+ Q: which country has the most teams on the list?
389
+ NeuralPython:
390
+ def solve(table: pd.DataFrame):
391
+ table = qa_map(table, "what is the country?", ["team"])
392
+ result = table['what is the country?'].value_counts().idxmax()
393
+ return result
394
+
395
+
396
+ CREATE TABLE Fabrice Santoro(
397
+ row_id int,
398
+ name text,
399
+ 2001 text,
400
+ 2002 text,
401
+ 2003 text,
402
+ 2004 text,
403
+ 2005 text,
404
+ 2006 text,
405
+ 2007 text,
406
+ 2008 text,
407
+ 2009 text,
408
+ 2010 text,
409
+ career\nsr text,
410
+ wins int)
411
+ /*
412
+ 3 example rows:
413
+ SELECT * FROM w LIMIT 3;
414
+ row_id name 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 career\nsr wins
415
+ 0 australian open 2r 1r 3r 2r 1r qf 3r 2r 3r 1r 0 / 18 22
416
+ 1 french open 4r 2r 2r 3r 1r 1r 1r 2r 1r a 0 / 20 17
417
+ 2 wimbledon 3r 2r 2r 2r 2r 2r 2r 1r 2r a 0 / 14 11
418
+ */
419
+ Q: did he win more at the australian open or indian wells?
420
+ NeuralPython:
421
+ def solve(table: pd.DataFrame):
422
+ sub_table = table[(table['name'] == 'australian open') | (table['name'] == 'indian wells')]
423
+ tmp = [(x, y) for x, y in zip(sub_table['name'], sub_table['wins'])]
424
+ tmp = sorted(tmp, key=lambda x: x[1], reverse=True)
425
+ result = list(map(lambda x: x[0], tmp))[0]
426
+ return result
templates/prompts/prompt_wikitq_v3.txt ADDED
@@ -0,0 +1,295 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Generate SQL given the question and table to answer the question correctly.
2
+ If question-relevant column(s) contents are not suitable for SQL comparisons or calculations, map it to a new column with clean content by a new grammar QA("map@").
3
+ If mapping to a new column still can not answer the question with valid SQL, turn to an end-to-end solution by a new grammar QA("ans@"). This grammar aims to solve all the rest of complex questions or tables.
4
+
5
+ CREATE TABLE Fabrice Santoro(
6
+ row_id int,
7
+ name text,
8
+ 2001 text,
9
+ 2002 text,
10
+ 2003 text,
11
+ 2004 text,
12
+ 2005 text,
13
+ 2006 text,
14
+ 2007 text,
15
+ 2008 text,
16
+ 2009 text,
17
+ 2010 text,
18
+ career\nsr text,
19
+ career\nwin-loss text)
20
+ /*
21
+ 3 example rows:
22
+ SELECT * FROM w LIMIT 3;
23
+ row_id name 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 career\nsr career\nwin-loss
24
+ 0 australian open 2r 1r 3r 2r 1r qf 3r 2r 3r 1r 0 / 18 22–18
25
+ 1 french open 4r 2r 2r 3r 1r 1r 1r 2r 1r a 0 / 20 17–20
26
+ 2 wimbledon 3r 2r 2r 2r 2r 2r 2r 1r 2r a 0 / 14 11–14
27
+ */
28
+ Q: did he win more at the australian open or indian wells?
29
+ NeuralSQL: SELECT name FROM w WHERE name IN ('australian open', 'indian wells') ORDER BY QA("map@how many wins?"; `career\nwin-loss`) DESC LIMIT 1
30
+
31
+
32
+ CREATE TABLE 2007 New Orleans Saints season(
33
+ row_id int,
34
+ week int,
35
+ date text,
36
+ opponent text,
37
+ time text,
38
+ game site text,
39
+ tv text,
40
+ result/score text,
41
+ record text)
42
+ /*
43
+ 3 example rows:
44
+ SELECT * FROM w LIMIT 3;
45
+ row_id week date opponent time game site tv result/score record
46
+ 0 1 2007-9-6 indianapolis colts t20:30 edt rca dome nbc l 41 – 10 0–1
47
+ 1 2 2007-9-16 tampa bay buccaneers t13:0 edt raymond james stadium fox l 31 – 14 0–2
48
+ 2 3 2007-9-24 tennessee titans t20:30 edt louisiana superdome espn l 31 – 14 0–3
49
+ */
50
+ Q: what number of games were lost at home?
51
+ NeuralSQL: SELECT COUNT(*) FROM w WHERE QA("map@is it a loss?"; `result/score`) = 'yes' AND QA("map@is it the home court of New Orleans Saints?"; `game site`) = 'yes'
52
+
53
+
54
+ CREATE TABLE 2007 New Orleans Saints season(
55
+ row_id int,
56
+ week int,
57
+ date text,
58
+ opponent text,
59
+ time text,
60
+ game site text,
61
+ tv text,
62
+ result/score text,
63
+ record text)
64
+ /*
65
+ 3 example rows:
66
+ SELECT * FROM w LIMIT 3;
67
+ row_id week date opponent time game site tv result/score record
68
+ 0 1 2007-9-6 indianapolis colts t20:30 edt away nbc loss 0–1
69
+ 1 2 2007-9-16 tampa bay buccaneers t13:0 edt home fox win 1-1
70
+ 2 3 2007-9-24 tennessee titans t20:30 edt away espn loss 1-2
71
+ */
72
+ Q: what number of games were lost at home?
73
+ NeuralSQL: SELECT COUNT(*) FROM w WHERE `result/score` = 'loss' AND `game site` = 'home'
74
+
75
+
76
+ CREATE TABLE Demographics of Alaska(
77
+ row_id int,
78
+ by race text,
79
+ white text,
80
+ black text,
81
+ aian* text,
82
+ asian text,
83
+ nhpi* text)
84
+ /*
85
+ 3 example rows:
86
+ SELECT * FROM w LIMIT 3;
87
+ row_id by race white black aian* asian nhpi*
88
+ 0 2000 (total population) 75.43% 4.46% 19.06% 5.24% 0.88%
89
+ 1 2000 (hispanic only) 3.42% 0.33% 0.45% 0.16% 0.06%
90
+ 2 2005 (total population) 74.71% 4.72% 18.77% 5.9% 0.88%
91
+ */
92
+ Q: which hispanic population had the greatest growth from 2000 to 2005?
93
+ NeuralSQL: QA("ans@which race had the greatest value?"; SELECT white, black, `aian*`, asian, `nhpi*` FROM w WHERE `by race` = 'growth 2000–5 (hispanic only)')
94
+
95
+
96
+ CREATE TABLE Highest mountain peaks of California(
97
+ row_id int,
98
+ rank int,
99
+ mountain peak text,
100
+ mountain range text,
101
+ elevation text,
102
+ prominence text,
103
+ isolation text,
104
+ location text)
105
+ /*
106
+ 3 example rows:
107
+ SELECT * FROM w LIMIT 3;
108
+ row_id rank mountain peak mountain range elevation prominence isolation location
109
+ 0 1 mount whitney sierra nevada 14505 ft; 4421 m 10080 ft; 3072 m 1646 mi; 2649 km 36°34′43″n 118°17′31″w / 36.5786°n 118.292°w
110
+ 1 2 mount williamson sierra nevada 14379 ft; 4383 m 1677 ft; 511 m 5.4 mi; 8.7 km 36°39′21″n 118°18′40″w / 36.6559°n 118.3111°w
111
+ 2 3 white mountain peak white mountains 14252 ft; 4344 m 7196 ft; 2193 m 67 mi; 109 km 37°38′3″n 118°15′21″w / 37.6341°n 118.2557°w
112
+ */
113
+ Q: which mountain peak has a prominence more than 10,000 ft?
114
+ NeuralSQL: SELECT `mountain peak` FROM w WHERE QA("map@prominence in ft?"; prominence) > 10000
115
+
116
+
117
+ CREATE TABLE Daegu FC(
118
+ row_id int,
119
+ season int,
120
+ division int,
121
+ tms. int,
122
+ pos. int,
123
+ fa cup text,
124
+ afc cl text)
125
+ /*
126
+ 3 example rows:
127
+ SELECT * FROM w LIMIT 3;
128
+ row_id season division tms. pos. fa cup afc cl
129
+ 0 2003 1 12 11 quarter final none
130
+ 1 2004 1 13 10 round of 32 none
131
+ 2 2005 1 13 8 quarter final none
132
+ */
133
+ Q: how far did they make it in the fa cup after 2009?
134
+ NeuralSQL: QA("ans@how far did they make?"; SELECT `fa cup` FROM w WHERE season > 2009)
135
+
136
+
137
+ CREATE TABLE Electricity in Sri Lanka(
138
+ row_id int,
139
+ filledcolumnname text,
140
+ 2005 int,
141
+ 2006 int,
142
+ 2007 int,
143
+ 2008 int,
144
+ 2009 int,
145
+ 2010 int,
146
+ 2011 int,
147
+ 2012 int)
148
+ /*
149
+ 3 example rows:
150
+ SELECT * FROM w LIMIT 3;
151
+ row_id filledcolumnname 2005 2006 2007 2008 2009 2010 2011 2012
152
+ 0 hydro power 1293 1316 1326 1357 1379 1382 1401 1584
153
+ 1 thermal 1155 1155 1155 1285 1290 1390 1690 1638
154
+ 2 other renewables 3 3 3 3 15 45 50 90
155
+ */
156
+ Q: did the hydro power increase or decrease from 2010 to 2012?
157
+ NeuralSQL: SELECT CASE WHEN (SELECT `2010` FROM w WHERE filledcolumnname = 'hydro power') < (SELECT `2012` FROM w WHERE filledcolumnname = 'hydro power') THEN 'increase' ELSE 'decrease' END
158
+
159
+
160
+ CREATE TABLE List of political parties in Japan(
161
+ row_id int,
162
+ party text,
163
+ diet representation\nrepresentatives int,
164
+ diet representation\ncouncillors int,
165
+ party leader(s) text,
166
+ comments text)
167
+ /*
168
+ 3 example rows:
169
+ SELECT * FROM w LIMIT 3;
170
+ row_id party diet representation\nrepresentatives diet representation\ncouncillors party leader(s) comments
171
+ 0 your party (yp); minna no tō みんなの党; ("everybody's party") 18 18 yoshimi watanabe reps. conservative liberalism, neoliberalism, economic liberalism, libertarianism, anti-nuclear
172
+ 1 japanese communist party (jcp); nihon kyōsan-tō 日本共産党 8 11 kazuo shii reps. the japanese communist party is japan's oldest party. it was formed in 1922 as an underground organization in the empire of japan, but was legalized after world war ii during the occupation. it used to be a communist party, but the party has past_ref shifted to a socialist party.
173
+ 2 people's life party (plp); seikatsu no tō 生活の党 7 2 ichirō ozawa reps. life party was founded by ichirō ozawa and 14 other diet members who were in the 2022-7-4 party of japan after a leadership dispute between ozawa and yukiko kada.
174
+ */
175
+ Q: what party is listed previous to the new renaissance party?
176
+ NeuralSQL: SELECT QA("map@what is party name?"; party) FROM w WHERE row_id = (SELECT row_id FROM w WHERE QA("map@what is party name?"; party) = 'new renaissance party') - 1
177
+
178
+
179
+ CREATE TABLE FC Seoul in Asian football(
180
+ row_id int,
181
+ # int,
182
+ season int,
183
+ competition text,
184
+ date text,
185
+ round text,
186
+ opponent text,
187
+ h / a text,
188
+ result text,
189
+ scorer (s) text)
190
+ /*
191
+ 3 example rows:
192
+ SELECT * FROM w LIMIT 3;
193
+ row_id # season competition date round opponent h / a result scorer (s)
194
+ 0 35 2011 afc; champions league 2011-03-02 00:00:00 group stage al-ain a 1–0 s : dejan damjanović
195
+ 1 36 2011 afc; champions league 2011-03-15 00:00:00 group stage hangzhou greentown h 3–0 s : dejan damjanović, ou kyoung-jun, mauricio molina
196
+ 2 37 2011 afc; champions league 2011-04-06 00:00:00 group stage nagoya grampus a 1–1 s : choi hyun-tae; n : kensuke nagai
197
+ */
198
+ Q: how many consecutive games did dejan damjanovic score a goal in during the 2013 season?
199
+ NeuralSQL: QA("ans@how many consecutive games did dejan damjanovic score a goal?"; SELECT `scorer (s)` FROM w WHERE season = 2013)
200
+
201
+
202
+ CREATE TABLE Electoral district of Lachlan(
203
+ row_id int,
204
+ member text,
205
+ party text,
206
+ term text)
207
+ /*
208
+ 3 example rows:
209
+ SELECT * FROM w LIMIT 3;
210
+ row_id member party term
211
+ 0 john ryan none 1859–1864
212
+ 1 james martin none 1864–1869
213
+ 2 james watson none 1869–1880
214
+ */
215
+ Q: of the members of the third incarnation of the lachlan, who served the longest?
216
+ NeuralSQL: SELECT member FROM w ORDER BY QA("map@how long does it last?"; term) DESC LIMIT 1
217
+
218
+
219
+ CREATE TABLE Portugal in the Eurovision Song Contest 1979(
220
+ row_id int,
221
+ draw int,
222
+ artist text,
223
+ song text,
224
+ points int,
225
+ place text)
226
+ /*
227
+ 3 example rows:
228
+ SELECT * FROM w LIMIT 3;
229
+ row_id draw artist song points place
230
+ 0 1 gonzaga coutinho "tema para um homem só" 102 5th
231
+ 1 2 pedro osório s.a.r.l. "uma canção comercial" 123 3rd
232
+ 2 3 concha "qualquer dia, quem diria" 78 6th
233
+ */
234
+ Q: who was the last draw?
235
+ NeuralSQL: SELECT `artist` FROM w ORDER by `draw` desc LIMIT 1
236
+
237
+
238
+ CREATE TABLE GER Class N31(
239
+ row_id int,
240
+ year int,
241
+ order text,
242
+ quantity int,
243
+ ger nos. text)
244
+ /*
245
+ 3 example rows:
246
+ SELECT * FROM w LIMIT 3;
247
+ row_id year order quantity ger nos.
248
+ 0 1893 n31 1 999
249
+ 1 1893 h33 10 979
250
+ 2 1894 l33 10 989
251
+ */
252
+ Q: which had more ger numbers, 1898 or 1893?
253
+ NeuralSQL: SELECT `year` FROM w WHERE `year` IN ( '1898' , '1893' ) GROUP by `year` ORDER by SUM (`ger nos.`) desc LIMIT 1
254
+
255
+
256
+ CREATE TABLE List of spans(
257
+ row_id int,
258
+ tramway text,
259
+ country text,
260
+ city text,
261
+ height of pylons text,
262
+ span width,\nleaning straight line text,
263
+ span width,\nhorizontal measurement text,
264
+ height of cable over ground text,
265
+ year of inauguration text,
266
+ notes text)
267
+ /*
268
+ 3 example rows:
269
+ SELECT * FROM w LIMIT 3;
270
+ row_id tramway country city height of pylons span width,\nleaning straight line span width,\nhorizontal measurement height of cable over ground year of inauguration notes
271
+ 0 peak 2 peak gondola canada whistler 65m 3024 m 3019 m 436 m 2008 3s aerial tramway constructed by doppelmayr
272
+ 1 hut of regensburg material transport aerial railway austria falbeson ? ? ? 430 m ? none
273
+ 2 vanoise express france vanoise none 1850 m 1800 m 380 m 2003 none
274
+ */
275
+ Q: was the sandia peak tramway innagurate before or after the 3s aerial tramway?
276
+ NeuralSQL: SELECT ( SELECT `year of inauguration` FROM w WHERE `tramway` = 'sandia peak tramway' ) < ( SELECT `year of inauguration` FROM w WHERE `tramway` = '3s aerial tramway' )
277
+
278
+
279
+ CREATE TABLE Płock Governorate(
280
+ row_id int,
281
+ language text,
282
+ number int,
283
+ percentage (%) text,
284
+ males int,
285
+ females int)
286
+ /*
287
+ 3 example rows:
288
+ SELECT * FROM w LIMIT 3;
289
+ row_id language number percentage (%) males females
290
+ 0 polish 447685 80.86 216794 230891
291
+ 1 yiddish 51215 9.25 24538 26677
292
+ 2 german 35931 6.49 17409 18522
293
+ */
294
+ Q: how many male and female german speakers are there?
295
+ NeuralSQL: SELECT `males` + `females` FROM w WHERE `language` = 'german'
templates/qa_retrieve_pool.json ADDED
@@ -0,0 +1,3885 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "id": 142,
4
+ "qa_question": "map@What is the time span?",
5
+ "qa_column": "Term",
6
+ "qa_answer": [
7
+ "5",
8
+ "5",
9
+ "11",
10
+ "/",
11
+ "1",
12
+ "6",
13
+ "3",
14
+ "9",
15
+ "4",
16
+ "3",
17
+ "/",
18
+ "11",
19
+ "5",
20
+ "4",
21
+ "3",
22
+ "/"
23
+ ],
24
+ "table": {
25
+ "header": [
26
+ "Term"
27
+ ],
28
+ "rows": [
29
+ [
30
+ "1859\u20131864"
31
+ ],
32
+ [
33
+ "1864\u20131869"
34
+ ],
35
+ [
36
+ "1869\u20131880"
37
+ ],
38
+ [
39
+ "Term"
40
+ ],
41
+ [
42
+ "1894\u20131895"
43
+ ],
44
+ [
45
+ "1895\u20131901"
46
+ ],
47
+ [
48
+ "1901\u20131904"
49
+ ],
50
+ [
51
+ "1904\u20131913"
52
+ ],
53
+ [
54
+ "1913\u20131917"
55
+ ],
56
+ [
57
+ "1917\u20131920"
58
+ ],
59
+ [
60
+ "Term"
61
+ ],
62
+ [
63
+ "1927\u20131938"
64
+ ],
65
+ [
66
+ "1938\u20131943"
67
+ ],
68
+ [
69
+ "1943\u20131947"
70
+ ],
71
+ [
72
+ "1947\u20131950"
73
+ ],
74
+ [
75
+ "Term"
76
+ ]
77
+ ]
78
+ },
79
+ "title": "Electoral district of Lachlan"
80
+ },
81
+ {
82
+ "id": 145,
83
+ "qa_question": "map@Is the date during in 1900's?",
84
+ "qa_column": "Created",
85
+ "qa_answer": [
86
+ "no",
87
+ "no",
88
+ "no",
89
+ "yes",
90
+ "no",
91
+ "yes",
92
+ "no",
93
+ "yes",
94
+ "yes",
95
+ "yes",
96
+ "yes",
97
+ "yes",
98
+ "yes",
99
+ "yes",
100
+ "no"
101
+ ],
102
+ "table": {
103
+ "header": [
104
+ "Created"
105
+ ],
106
+ "rows": [
107
+ [
108
+ "29 October 1874"
109
+ ],
110
+ [
111
+ "2 January 1857"
112
+ ],
113
+ [
114
+ "26 October 1874"
115
+ ],
116
+ [
117
+ "15 July 1949"
118
+ ],
119
+ [
120
+ "4 August 1821"
121
+ ],
122
+ [
123
+ "24 April 1940"
124
+ ],
125
+ [
126
+ "2 January 1857"
127
+ ],
128
+ [
129
+ "3 March 1970"
130
+ ],
131
+ [
132
+ "12 December 1961"
133
+ ],
134
+ [
135
+ "6 January 1965"
136
+ ],
137
+ [
138
+ "16 March 1964"
139
+ ],
140
+ [
141
+ "13 December 1963"
142
+ ],
143
+ [
144
+ "6 February 1962"
145
+ ],
146
+ [
147
+ "16 August 1921"
148
+ ],
149
+ [
150
+ "2 January 1857"
151
+ ]
152
+ ]
153
+ },
154
+ "title": "List of districts of Lima"
155
+ },
156
+ {
157
+ "id": 155,
158
+ "qa_question": "map@Is the time less than a week?",
159
+ "qa_column": "Length of use",
160
+ "qa_answer": [
161
+ "no",
162
+ "no",
163
+ "no",
164
+ "no",
165
+ "no",
166
+ "no",
167
+ "yes",
168
+ "no",
169
+ "no",
170
+ "no",
171
+ "no",
172
+ "yes"
173
+ ],
174
+ "table": {
175
+ "header": [
176
+ "Length of use"
177
+ ],
178
+ "rows": [
179
+ [
180
+ "14 days"
181
+ ],
182
+ [
183
+ "10 days"
184
+ ],
185
+ [
186
+ "21 days"
187
+ ],
188
+ [
189
+ "7 days"
190
+ ],
191
+ [
192
+ "10 days"
193
+ ],
194
+ [
195
+ "10 days"
196
+ ],
197
+ [
198
+ "Daily"
199
+ ],
200
+ [
201
+ "14 days"
202
+ ],
203
+ [
204
+ "10 days"
205
+ ],
206
+ [
207
+ "14 days"
208
+ ],
209
+ [
210
+ "20 days"
211
+ ],
212
+ [
213
+ "2 hours"
214
+ ]
215
+ ]
216
+ },
217
+ "title": "Crest Whitestrips"
218
+ },
219
+ {
220
+ "id": 157,
221
+ "qa_question": "map@Is the eliminated time the first?",
222
+ "qa_column": "Eliminated",
223
+ "qa_answer": [
224
+ "no",
225
+ "no",
226
+ "no",
227
+ "no",
228
+ "no",
229
+ "no",
230
+ "no",
231
+ "no",
232
+ "no",
233
+ "no",
234
+ "no"
235
+ ],
236
+ "table": {
237
+ "header": [
238
+ "Eliminated"
239
+ ],
240
+ "rows": [
241
+ [
242
+ "Winner"
243
+ ],
244
+ [
245
+ "Second Place"
246
+ ],
247
+ [
248
+ "Third Place"
249
+ ],
250
+ [
251
+ "Week 10"
252
+ ],
253
+ [
254
+ "Week 9"
255
+ ],
256
+ [
257
+ "Week 7"
258
+ ],
259
+ [
260
+ "Week 6"
261
+ ],
262
+ [
263
+ "Week 5"
264
+ ],
265
+ [
266
+ "Week 4 & Week 8(Winner of Star Salvation)"
267
+ ],
268
+ [
269
+ "Week 3"
270
+ ],
271
+ [
272
+ "Week 2"
273
+ ]
274
+ ]
275
+ },
276
+ "title": "Food Network Star"
277
+ },
278
+ {
279
+ "id": 200,
280
+ "qa_question": "map@Is it in march 1982?",
281
+ "qa_column": "Date",
282
+ "qa_answer": [
283
+ "no",
284
+ "yes",
285
+ "yes",
286
+ "no",
287
+ "no",
288
+ "no",
289
+ "no",
290
+ "no",
291
+ "no",
292
+ "no"
293
+ ],
294
+ "table": {
295
+ "header": [
296
+ "Date"
297
+ ],
298
+ "rows": [
299
+ [
300
+ "January 26, 1982"
301
+ ],
302
+ [
303
+ "March 3, 1982"
304
+ ],
305
+ [
306
+ "March 21, 1982"
307
+ ],
308
+ [
309
+ "May 5, 1982"
310
+ ],
311
+ [
312
+ "May 19, 1982"
313
+ ],
314
+ [
315
+ "May 27, 1982"
316
+ ],
317
+ [
318
+ "June 14, 1982"
319
+ ],
320
+ [
321
+ "June 18, 1982"
322
+ ],
323
+ [
324
+ "June 23, 1982"
325
+ ],
326
+ [
327
+ "July 2, 1982"
328
+ ]
329
+ ]
330
+ },
331
+ "title": "1982 in Brazilian football"
332
+ },
333
+ {
334
+ "id": 578,
335
+ "qa_question": "map@Is this player from Norway?",
336
+ "qa_column": "Player",
337
+ "qa_answer": [
338
+ "no",
339
+ "no",
340
+ "no",
341
+ "no",
342
+ "no",
343
+ "no"
344
+ ],
345
+ "table": {
346
+ "header": [
347
+ "Player"
348
+ ],
349
+ "rows": [
350
+ [
351
+ "Raymond van Barneveld"
352
+ ],
353
+ [
354
+ "Raymond van Barneveld"
355
+ ],
356
+ [
357
+ "Adrian Lewis"
358
+ ],
359
+ [
360
+ "Dean Winstanley"
361
+ ],
362
+ [
363
+ "Michael van Gerwen"
364
+ ],
365
+ [
366
+ "Terry Jenkins"
367
+ ]
368
+ ]
369
+ },
370
+ "title": "PDC World Darts Championship"
371
+ },
372
+ {
373
+ "id": 624,
374
+ "qa_question": "map@Is it designated?",
375
+ "qa_column": "Artist(s)",
376
+ "qa_answer": [
377
+ "no",
378
+ "yes",
379
+ "no",
380
+ "yes",
381
+ "yes",
382
+ "yes",
383
+ "yes",
384
+ "yes",
385
+ "yes"
386
+ ],
387
+ "table": {
388
+ "header": [
389
+ "Artist(s)"
390
+ ],
391
+ "rows": [
392
+ [
393
+ "various artists"
394
+ ],
395
+ [
396
+ "Robin"
397
+ ],
398
+ [
399
+ "various artists"
400
+ ],
401
+ [
402
+ "Robin"
403
+ ],
404
+ [
405
+ "Adele"
406
+ ],
407
+ [
408
+ "Jukka Poika"
409
+ ],
410
+ [
411
+ "Jesse Kaikuranta"
412
+ ],
413
+ [
414
+ "Chisu"
415
+ ],
416
+ [
417
+ "Juha Tapio"
418
+ ]
419
+ ]
420
+ },
421
+ "title": "List of number-one albums of 2012 (Finland)"
422
+ },
423
+ {
424
+ "id": 548,
425
+ "qa_question": "map@What is the winner's name?",
426
+ "qa_column": "Winner",
427
+ "qa_answer": [
428
+ "Jacinto Sicam",
429
+ "Romeo Bonzo",
430
+ "Ruben Carino",
431
+ "Pepito Calip",
432
+ "Rolando Pagnanawon",
433
+ "Reynaldo Dequito",
434
+ "Armando Catalan",
435
+ "Gerardo Igos",
436
+ "Manuel Buenaventura",
437
+ "Bernardo Llentada",
438
+ "Renato Dolosa",
439
+ "Carlo Guieb",
440
+ "Carlo Guieb",
441
+ "Renato Dolosa",
442
+ "Victor Espiritu",
443
+ "Wong Kam-po",
444
+ "Warren Davadilla"
445
+ ],
446
+ "table": {
447
+ "header": [
448
+ "Winner"
449
+ ],
450
+ "rows": [
451
+ [
452
+ "Jacinto Sicam\u00a0(PHI)"
453
+ ],
454
+ [
455
+ "Romeo Bonzo\u00a0(PHI)"
456
+ ],
457
+ [
458
+ "Ruben Carino\u00a0(PHI)"
459
+ ],
460
+ [
461
+ "Pepito Calip\u00a0(PHI)"
462
+ ],
463
+ [
464
+ "Rolando Pagnanawon\u00a0(PHI)"
465
+ ],
466
+ [
467
+ "Reynaldo Dequito\u00a0(PHI)"
468
+ ],
469
+ [
470
+ "Armando Catalan\u00a0(PHI)"
471
+ ],
472
+ [
473
+ "Gerardo Igos\u00a0(PHI)"
474
+ ],
475
+ [
476
+ "Manuel Buenaventura\u00a0(PHI)"
477
+ ],
478
+ [
479
+ "Bernardo Llentada\u00a0(PHI)"
480
+ ],
481
+ [
482
+ "Renato Dolosa\u00a0(PHI)"
483
+ ],
484
+ [
485
+ "Carlo Guieb\u00a0(PHI)"
486
+ ],
487
+ [
488
+ "Carlo Guieb\u00a0(PHI)"
489
+ ],
490
+ [
491
+ "Renato Dolosa\u00a0(PHI)"
492
+ ],
493
+ [
494
+ "Victor Espiritu\u00a0(PHI)"
495
+ ],
496
+ [
497
+ "Wong Kam-po\u00a0(HKG)"
498
+ ],
499
+ [
500
+ "Warren Davadilla\u00a0(PHI)"
501
+ ]
502
+ ]
503
+ },
504
+ "title": "Le Tour de Filipinas"
505
+ },
506
+ {
507
+ "id": 149,
508
+ "qa_question": "map@What is the last name of the whole name?",
509
+ "qa_column": "Performer",
510
+ "qa_answer": [
511
+ "Segura",
512
+ "Kosta",
513
+ "Wang",
514
+ "Peretti",
515
+ "D'Elia",
516
+ "Bargatze",
517
+ "Kondabolu",
518
+ "Whitehall",
519
+ "Jackson",
520
+ "Kinane",
521
+ "Fulchiron",
522
+ "Vecchione",
523
+ "Klein",
524
+ "Katz",
525
+ "Larson"
526
+ ],
527
+ "table": {
528
+ "header": [
529
+ "Performer"
530
+ ],
531
+ "rows": [
532
+ [
533
+ "Tom Segura"
534
+ ],
535
+ [
536
+ "Michael Kosta"
537
+ ],
538
+ [
539
+ "Sheng Wang"
540
+ ],
541
+ [
542
+ "Chelsea Peretti"
543
+ ],
544
+ [
545
+ "Chris D'Elia"
546
+ ],
547
+ [
548
+ "Nate Bargatze"
549
+ ],
550
+ [
551
+ "Hari Kondabolu"
552
+ ],
553
+ [
554
+ "Jack Whitehall"
555
+ ],
556
+ [
557
+ "Al Jackson"
558
+ ],
559
+ [
560
+ "Kyle Kinane"
561
+ ],
562
+ [
563
+ "Matt Fulchiron"
564
+ ],
565
+ [
566
+ "Mike Vecchione"
567
+ ],
568
+ [
569
+ "Jessi Klein"
570
+ ],
571
+ [
572
+ "Louis Katz"
573
+ ],
574
+ [
575
+ "Jay Larson"
576
+ ]
577
+ ]
578
+ },
579
+ "title": "List of Comedy Central Presents episodes"
580
+ },
581
+ {
582
+ "id": 159,
583
+ "qa_question": "map@What is the name outside paratheses?",
584
+ "qa_column": "Name",
585
+ "qa_answer": [
586
+ "Harriet Churchill",
587
+ "Henrietta Churchill",
588
+ "Anne Churchill",
589
+ "John Churchill",
590
+ "Elizabeth Churchill",
591
+ "Mary Churchill"
592
+ ],
593
+ "table": {
594
+ "header": [
595
+ "Name"
596
+ ],
597
+ "rows": [
598
+ [
599
+ "Harriet Churchill"
600
+ ],
601
+ [
602
+ "Henrietta Churchill (later Godolphin), 2nd Duchess of Marlborough in her own right"
603
+ ],
604
+ [
605
+ "Anne Churchill (later Spencer)"
606
+ ],
607
+ [
608
+ "John Churchill, Marquess of Blandford"
609
+ ],
610
+ [
611
+ "Elizabeth Churchill (later Egerton)"
612
+ ],
613
+ [
614
+ "Mary Churchill (later Montagu)"
615
+ ]
616
+ ]
617
+ },
618
+ "title": "Sarah Churchill, Duchess of Marlborough"
619
+ },
620
+ {
621
+ "id": 208,
622
+ "qa_question": "map@Did Kansas State win?",
623
+ "qa_column": "Winning team",
624
+ "qa_answer": [
625
+ "no",
626
+ "no",
627
+ "no",
628
+ "no",
629
+ "yes",
630
+ "no",
631
+ "no",
632
+ "no",
633
+ "no",
634
+ "no",
635
+ "no",
636
+ "no",
637
+ "no",
638
+ "no",
639
+ "no",
640
+ "no",
641
+ "no",
642
+ "no",
643
+ "no",
644
+ "no",
645
+ "no",
646
+ "yes",
647
+ "yes"
648
+ ],
649
+ "table": {
650
+ "header": [
651
+ "Winning team"
652
+ ],
653
+ "rows": [
654
+ [
655
+ "Kansas 16"
656
+ ],
657
+ [
658
+ "Kansas 34"
659
+ ],
660
+ [
661
+ "Kansas 41"
662
+ ],
663
+ [
664
+ "Kansas 28"
665
+ ],
666
+ [
667
+ "Kansas State 6"
668
+ ],
669
+ [
670
+ "Kansas 29"
671
+ ],
672
+ [
673
+ "Kansas 12"
674
+ ],
675
+ [
676
+ "Kansas 5"
677
+ ],
678
+ [
679
+ "Kansas 6"
680
+ ],
681
+ [
682
+ "Kansas 19"
683
+ ],
684
+ [
685
+ "Kansas 26"
686
+ ],
687
+ [
688
+ "Kansas 27"
689
+ ],
690
+ [
691
+ "Kansas 19"
692
+ ],
693
+ [
694
+ "Kansas 0"
695
+ ],
696
+ [
697
+ "Kansas 9"
698
+ ],
699
+ [
700
+ "Kansas 13"
701
+ ],
702
+ [
703
+ "Kansas 16"
704
+ ],
705
+ [
706
+ "Kansas 14"
707
+ ],
708
+ [
709
+ "Kansas 21"
710
+ ],
711
+ [
712
+ "Kansas 7"
713
+ ],
714
+ [
715
+ "Kansas 0"
716
+ ],
717
+ [
718
+ "Kansas State 6"
719
+ ],
720
+ [
721
+ "Kansas State 14"
722
+ ]
723
+ ]
724
+ },
725
+ "title": "Kansas\u2013Kansas State football rivalry"
726
+ },
727
+ {
728
+ "id": 224,
729
+ "qa_question": "map@Is the last name start with 'b'?",
730
+ "qa_column": "Name",
731
+ "qa_answer": [
732
+ "yes",
733
+ "no",
734
+ "yes",
735
+ "no",
736
+ "no",
737
+ "yes"
738
+ ],
739
+ "table": {
740
+ "header": [
741
+ "Name"
742
+ ],
743
+ "rows": [
744
+ [
745
+ "Yelizaveta Bryzhina"
746
+ ],
747
+ [
748
+ "Myriam Soumar\u00e9"
749
+ ],
750
+ [
751
+ "Ksenija Balta"
752
+ ],
753
+ [
754
+ "Niamh Whelan"
755
+ ],
756
+ [
757
+ "Sabina Veit"
758
+ ],
759
+ [
760
+ "Elin Backman"
761
+ ]
762
+ ]
763
+ },
764
+ "title": "2010 European Athletics Championships \u2013 Women's 200 metres"
765
+ },
766
+ {
767
+ "id": 528,
768
+ "qa_question": "map@Return the km value.",
769
+ "qa_column": "Isolation",
770
+ "qa_answer": [
771
+ "2649",
772
+ "8.70",
773
+ "109",
774
+ "52",
775
+ "539",
776
+ "24",
777
+ "5",
778
+ "11",
779
+ "17",
780
+ "16",
781
+ "6.9",
782
+ "7.7",
783
+ "16",
784
+ "7.6",
785
+ "13"
786
+ ],
787
+ "table": {
788
+ "header": [
789
+ "Isolation"
790
+ ],
791
+ "rows": [
792
+ [
793
+ "1,646\u00a0mi\\n2,649\u00a0km"
794
+ ],
795
+ [
796
+ "5.4\u00a0mi\\n8.7\u00a0km"
797
+ ],
798
+ [
799
+ "67\u00a0mi\\n109\u00a0km"
800
+ ],
801
+ [
802
+ "32\u00a0mi\\n52\u00a0km"
803
+ ],
804
+ [
805
+ "335\u00a0mi\\n539\u00a0km"
806
+ ],
807
+ [
808
+ "15\u00a0mi\\n24\u00a0km"
809
+ ],
810
+ [
811
+ "3.1\u00a0mi\\n5.0\u00a0km"
812
+ ],
813
+ [
814
+ "7\u00a0mi\\n11\u00a0km"
815
+ ],
816
+ [
817
+ "11\u00a0mi\\n17\u00a0km"
818
+ ],
819
+ [
820
+ "10\u00a0mi\\n16\u00a0km"
821
+ ],
822
+ [
823
+ "4.3\u00a0mi\\n6.9\u00a0km"
824
+ ],
825
+ [
826
+ "4.8\u00a0mi\\n7.7\u00a0km"
827
+ ],
828
+ [
829
+ "10\u00a0mi\\n16\u00a0km"
830
+ ],
831
+ [
832
+ "4.7\u00a0mi\\n7.6\u00a0km"
833
+ ],
834
+ [
835
+ "8\u00a0mi\\n13\u00a0km"
836
+ ]
837
+ ]
838
+ },
839
+ "title": "Highest mountain peaks of California"
840
+ },
841
+ {
842
+ "id": 212,
843
+ "qa_question": "map@Can it go faster than 450km/h?",
844
+ "qa_column": "Top speed (km/h)",
845
+ "qa_answer": [
846
+ "/",
847
+ "no",
848
+ "no",
849
+ "no",
850
+ "no",
851
+ "no",
852
+ "no",
853
+ "/",
854
+ "no",
855
+ "no",
856
+ "no",
857
+ "no",
858
+ "/",
859
+ "yes"
860
+ ],
861
+ "table": {
862
+ "header": [
863
+ "Top speed (km/h)"
864
+ ],
865
+ "rows": [
866
+ [
867
+ ""
868
+ ],
869
+ [
870
+ "90 (1971)"
871
+ ],
872
+ [
873
+ "164 (October 1971)"
874
+ ],
875
+ [
876
+ "140 (September 1972)"
877
+ ],
878
+ [
879
+ "160 / 230 (1974)\u00a0?"
880
+ ],
881
+ [
882
+ "250 (end 1973), 253.2 (21 November 1977)"
883
+ ],
884
+ [
885
+ "401.3 (1974)"
886
+ ],
887
+ [
888
+ ""
889
+ ],
890
+ [
891
+ "36 (or 40\u00a0?)"
892
+ ],
893
+ [
894
+ "75"
895
+ ],
896
+ [
897
+ "302 (1984), 355 (1985), 392 (1987), 406 (1987), 412.6 (January 1988)"
898
+ ],
899
+ [
900
+ "436 (1989), 450 (17 June 1993)"
901
+ ],
902
+ [
903
+ ""
904
+ ],
905
+ [
906
+ "501 (12 November 2003)"
907
+ ]
908
+ ]
909
+ },
910
+ "title": "Transrapid"
911
+ },
912
+ {
913
+ "id": 223,
914
+ "qa_question": "map@What is the price money?",
915
+ "qa_column": "Tournament",
916
+ "qa_answer": [
917
+ "10000",
918
+ "10000",
919
+ "25000",
920
+ "25000",
921
+ "10000",
922
+ "25000",
923
+ "10000",
924
+ "10000",
925
+ "10000",
926
+ "10000"
927
+ ],
928
+ "table": {
929
+ "header": [
930
+ "Tournament"
931
+ ],
932
+ "rows": [
933
+ [
934
+ "$10,000 Bournemouth, Great Britain"
935
+ ],
936
+ [
937
+ "$10,000 Hatfield, Great Britain"
938
+ ],
939
+ [
940
+ "$25,000 Mount Gambier, Australia"
941
+ ],
942
+ [
943
+ "$25,000 Sunderland, Great Britain"
944
+ ],
945
+ [
946
+ "$10,000 Tipton, Great Britain"
947
+ ],
948
+ [
949
+ "$25,000 Felixstowe, Great Britain"
950
+ ],
951
+ [
952
+ "$10,000 Frinton, Great Britain"
953
+ ],
954
+ [
955
+ "$10,000 Wrexham, Great Britain"
956
+ ],
957
+ [
958
+ "$10,000 Cumberland (London),\\nGreat Britain"
959
+ ],
960
+ [
961
+ "$10,000 Mollerusa, Spain"
962
+ ]
963
+ ]
964
+ },
965
+ "title": "Jane O'Donoghue"
966
+ },
967
+ {
968
+ "id": 227,
969
+ "qa_question": "map@What is the height in inch?",
970
+ "qa_column": "Height",
971
+ "qa_answer": [
972
+ "77",
973
+ "78",
974
+ "74",
975
+ "79",
976
+ "74",
977
+ "80",
978
+ "77",
979
+ "82",
980
+ "80",
981
+ "78",
982
+ "82",
983
+ "76"
984
+ ],
985
+ "table": {
986
+ "header": [
987
+ "Height"
988
+ ],
989
+ "rows": [
990
+ [
991
+ "6'5\""
992
+ ],
993
+ [
994
+ "6'6\""
995
+ ],
996
+ [
997
+ "6'2\""
998
+ ],
999
+ [
1000
+ "6'7\""
1001
+ ],
1002
+ [
1003
+ "6'2\""
1004
+ ],
1005
+ [
1006
+ "6'8\""
1007
+ ],
1008
+ [
1009
+ "6'5\""
1010
+ ],
1011
+ [
1012
+ "6'10\""
1013
+ ],
1014
+ [
1015
+ "6'8\""
1016
+ ],
1017
+ [
1018
+ "6'6\""
1019
+ ],
1020
+ [
1021
+ "6'10\""
1022
+ ],
1023
+ [
1024
+ "6'4\""
1025
+ ]
1026
+ ]
1027
+ },
1028
+ "title": "2009\u201310 Fresno State Bulldogs men's basketball team"
1029
+ },
1030
+ {
1031
+ "id": 579,
1032
+ "qa_question": "map@Is it US Bank Plaza?",
1033
+ "qa_column": "Name",
1034
+ "qa_answer": [
1035
+ "no",
1036
+ "no",
1037
+ "yes",
1038
+ "no",
1039
+ "no",
1040
+ "no",
1041
+ "no",
1042
+ "no",
1043
+ "no",
1044
+ "no",
1045
+ "no",
1046
+ "no",
1047
+ "no"
1048
+ ],
1049
+ "table": {
1050
+ "header": [
1051
+ "Name"
1052
+ ],
1053
+ "rows": [
1054
+ [
1055
+ "Zions Bank Building\\n\\n\\n\\n\\nZions Bank Building in Downtown Boise, Idaho"
1056
+ ],
1057
+ [
1058
+ "Boise Airport Air Traffic Control Tower"
1059
+ ],
1060
+ [
1061
+ "US Bank Plaza\\n\\n\\n\\n\\nUS Bank Building in Downtown Boise"
1062
+ ],
1063
+ [
1064
+ "One Capital Center\\n\\n\\n\\n\\nOne Capital Center in Downtown Boise, Idaho"
1065
+ ],
1066
+ [
1067
+ "Idaho State Capitol\\n\\n\\n\\n\\nIdaho State Capitol; Boise, Idaho"
1068
+ ],
1069
+ [
1070
+ "The Grove Hotel\\n\\n\\n\\n\\nGrove Hotel in Downtown Boise, Idaho"
1071
+ ],
1072
+ [
1073
+ "The Aspen\\n\\n\\n\\n\\nAspen Loft Building in Downtown Boise, Idaho"
1074
+ ],
1075
+ [
1076
+ "Wells Fargo Building\\n\\n\\n\\n\\nWells Fargo Building in Downtown Boise, Idaho"
1077
+ ],
1078
+ [
1079
+ "Banner Bank Building\\n\\n\\n\\n\\nBanner Bank Building in Downtown Boise, Idaho"
1080
+ ],
1081
+ [
1082
+ "Key Tower\\n\\n\\n\\n\\nKey Bank Building in Downtown Boise, Idaho"
1083
+ ],
1084
+ [
1085
+ "Bronco Stadium\\n\\n\\n\\n\\nBronco stadium in Boise, Idaho"
1086
+ ],
1087
+ [
1088
+ "Hoff Building\\n\\n\\n\\n\\nHoff Building in Downtown Boise, Idaho"
1089
+ ],
1090
+ [
1091
+ "Chase Tower Plaza\\n\\n\\n\\n\\nChase Building in Downtown Boise, Idaho"
1092
+ ]
1093
+ ]
1094
+ },
1095
+ "title": "List of tallest buildings in Boise"
1096
+ },
1097
+ {
1098
+ "id": 160,
1099
+ "qa_question": "map@is this model crest whitestrip classic?",
1100
+ "qa_column": "Model",
1101
+ "qa_answer": [
1102
+ "yes",
1103
+ "no",
1104
+ "no",
1105
+ "no",
1106
+ "no",
1107
+ "no",
1108
+ "no",
1109
+ "no",
1110
+ "no",
1111
+ "no",
1112
+ "no",
1113
+ "no"
1114
+ ],
1115
+ "table": {
1116
+ "header": [
1117
+ "Model"
1118
+ ],
1119
+ "rows": [
1120
+ [
1121
+ "Crest Whitestrips Classic\\npreviously Crest Whitestrips"
1122
+ ],
1123
+ [
1124
+ "Crest Whitestrips Professional"
1125
+ ],
1126
+ [
1127
+ "Crest Whitestrips Supreme"
1128
+ ],
1129
+ [
1130
+ "Crest Whitestrips Premium"
1131
+ ],
1132
+ [
1133
+ "Crest Whitestrips Pro\\npreviously Crest Whitestrips Premium Plus"
1134
+ ],
1135
+ [
1136
+ "Crest Whitestrips Renewal"
1137
+ ],
1138
+ [
1139
+ "Crest Whitestrips Daily Multicare"
1140
+ ],
1141
+ [
1142
+ "Crest Whitestrips Advanced Seal"
1143
+ ],
1144
+ [
1145
+ "Crest Whitestrips 3D Vivid"
1146
+ ],
1147
+ [
1148
+ "Crest Whitestrips 3D Advanced Vivid"
1149
+ ],
1150
+ [
1151
+ "Crest Whitestrips 3D Professional Effects"
1152
+ ],
1153
+ [
1154
+ "Crest 3D White 2 Hour Express"
1155
+ ]
1156
+ ]
1157
+ },
1158
+ "title": "Crest Whitestrips"
1159
+ },
1160
+ {
1161
+ "id": 216,
1162
+ "qa_question": "map@What is the certification?",
1163
+ "qa_column": "Certifications\\n(sales threshold)",
1164
+ "qa_answer": [
1165
+ "Platinum",
1166
+ "Platinum",
1167
+ "Platinum",
1168
+ "Gold",
1169
+ "Gold"
1170
+ ],
1171
+ "table": {
1172
+ "header": [
1173
+ "Certifications\\n(sales threshold)"
1174
+ ],
1175
+ "rows": [
1176
+ [
1177
+ "RIAA: Platinum\\nMC: Gold"
1178
+ ],
1179
+ [
1180
+ "RIAA: Platinum"
1181
+ ],
1182
+ [
1183
+ "RIAA: Platinum"
1184
+ ],
1185
+ [
1186
+ "RIAA: Gold"
1187
+ ],
1188
+ [
1189
+ "RIAA: Gold"
1190
+ ]
1191
+ ]
1192
+ },
1193
+ "title": "Michael W. Smith discography"
1194
+ },
1195
+ {
1196
+ "id": 220,
1197
+ "qa_question": "map@Contain Promotion of the Chernobyl Program?",
1198
+ "qa_column": "Projects and activities they support",
1199
+ "qa_answer": [
1200
+ "no",
1201
+ "no",
1202
+ "no",
1203
+ "no",
1204
+ "no",
1205
+ "no",
1206
+ "no",
1207
+ "no",
1208
+ "no",
1209
+ "yes",
1210
+ "no",
1211
+ "no",
1212
+ "/"
1213
+ ],
1214
+ "table": {
1215
+ "header": [
1216
+ "Projects and activities they support"
1217
+ ],
1218
+ "rows": [
1219
+ [
1220
+ "Dialogue among Civilizations"
1221
+ ],
1222
+ [
1223
+ "Construction of Knowledge Societies"
1224
+ ],
1225
+ [
1226
+ "Promotion and safeguarding of intangible cultural heritage, especially oral traditions and expressions"
1227
+ ],
1228
+ [
1229
+ "Promotion of ballet dancing (Programme of Intangible Heritage)"
1230
+ ],
1231
+ [
1232
+ "Peace"
1233
+ ],
1234
+ [
1235
+ "Education of young people through seminars, science conferences and projects in the field"
1236
+ ],
1237
+ [
1238
+ "Campaign against HIV/AIDS, human rights, Cultural Heritage"
1239
+ ],
1240
+ [
1241
+ "Education and Social Inclusion"
1242
+ ],
1243
+ [
1244
+ "Fundraising for children in distress and victims of war"
1245
+ ],
1246
+ [
1247
+ "Promotion of the Chernobyl Program, creation of the Six Flags of Tolerance in 1995 and distribution in UNESCO Member States"
1248
+ ],
1249
+ [
1250
+ "Promotion of women's rights, especially for women in the Mediterranean; Environment issues"
1251
+ ],
1252
+ [
1253
+ "Protection of children and the family, empowerment of women and girls in Africa"
1254
+ ],
1255
+ [
1256
+ ""
1257
+ ]
1258
+ ]
1259
+ },
1260
+ "title": "UNESCO Goodwill Ambassador"
1261
+ },
1262
+ {
1263
+ "id": 226,
1264
+ "qa_question": "map@Is this party the Green Wind Party?",
1265
+ "qa_column": "Party",
1266
+ "qa_answer": [
1267
+ "no",
1268
+ "no",
1269
+ "no",
1270
+ "no",
1271
+ "yes",
1272
+ "no",
1273
+ "no"
1274
+ ],
1275
+ "table": {
1276
+ "header": [
1277
+ "Party"
1278
+ ],
1279
+ "rows": [
1280
+ [
1281
+ "Your Party (YP)\\nMinna no T\u014d \u307f\u3093\u306a\u306e\u515a\\n(\"Everybody's Party\")"
1282
+ ],
1283
+ [
1284
+ "Japanese Communist Party (JCP)\\nNihon Ky\u014dsan-t\u014d \u65e5\u672c\u5171\u7523\u515a"
1285
+ ],
1286
+ [
1287
+ "People's Life Party (PLP)\\nSeikatsu no T\u014d \u751f\u6d3b\u306e\u515a"
1288
+ ],
1289
+ [
1290
+ "Social Democratic Party (SDP)\\nShakai Minshu-t\u014d \u793e\u4f1a\u6c11\u4e3b\u515a"
1291
+ ],
1292
+ [
1293
+ "Green Wind\\nMidori no Kaze \u307f\u3069\u308a\u306e\u98a8"
1294
+ ],
1295
+ [
1296
+ "New Party Daichi \u2013 True Democrats\\nShint\u014d Daichi \u2013 Shinminshu \u65b0\u515a\u5927\u5730\u30fb\u771f\u6c11\u4e3b"
1297
+ ],
1298
+ [
1299
+ "New Renaissance Party (NRP)\\nShint\u014d Kaikaku \u65b0\u515a\u6539\u9769\\n(\"New Reform Party\")"
1300
+ ]
1301
+ ]
1302
+ },
1303
+ "title": "List of political parties in Japan"
1304
+ },
1305
+ {
1306
+ "id": 229,
1307
+ "qa_question": "map@Is the language derived from a country?",
1308
+ "qa_column": "Language",
1309
+ "qa_answer": [
1310
+ "yes",
1311
+ "no",
1312
+ "yes",
1313
+ "yes",
1314
+ "yes",
1315
+ "/",
1316
+ "/"
1317
+ ],
1318
+ "table": {
1319
+ "header": [
1320
+ "Language"
1321
+ ],
1322
+ "rows": [
1323
+ [
1324
+ "Polish"
1325
+ ],
1326
+ [
1327
+ "Yiddish"
1328
+ ],
1329
+ [
1330
+ "German"
1331
+ ],
1332
+ [
1333
+ "Russian"
1334
+ ],
1335
+ [
1336
+ "Ukrainian"
1337
+ ],
1338
+ [
1339
+ "Other"
1340
+ ],
1341
+ [
1342
+ "Persons\\nthat didn't name\\ntheir native language"
1343
+ ]
1344
+ ]
1345
+ },
1346
+ "title": "P\u0142ock Governorate"
1347
+ },
1348
+ {
1349
+ "id": 206,
1350
+ "qa_question": "map@What is the year?",
1351
+ "qa_column": "Notes",
1352
+ "qa_answer": [
1353
+ "1933",
1354
+ "1933",
1355
+ "1941",
1356
+ "1941",
1357
+ "1932",
1358
+ "1932",
1359
+ "1932",
1360
+ "1932",
1361
+ "1932",
1362
+ "1932",
1363
+ "1929",
1364
+ "1929"
1365
+ ],
1366
+ "table": {
1367
+ "header": [
1368
+ "Notes"
1369
+ ],
1370
+ "rows": [
1371
+ [
1372
+ "retired 1933"
1373
+ ],
1374
+ [
1375
+ "retired 1933"
1376
+ ],
1377
+ [
1378
+ "became inspection car in 1932 retired 1941"
1379
+ ],
1380
+ [
1381
+ "became inspection car in 1932 retired 1941"
1382
+ ],
1383
+ [
1384
+ "retired 1932"
1385
+ ],
1386
+ [
1387
+ "retired 1932"
1388
+ ],
1389
+ [
1390
+ "retired 1932 preserved Western Railway Museum"
1391
+ ],
1392
+ [
1393
+ "retired 1932"
1394
+ ],
1395
+ [
1396
+ "retired 1932"
1397
+ ],
1398
+ [
1399
+ "converted to express trailer in 1919 retired 1932"
1400
+ ],
1401
+ [
1402
+ "retired 1929"
1403
+ ],
1404
+ [
1405
+ "retired 1929"
1406
+ ]
1407
+ ]
1408
+ },
1409
+ "title": "Petaluma and Santa Rosa Railroad"
1410
+ },
1411
+ {
1412
+ "id": 613,
1413
+ "qa_question": "map@What is the month?",
1414
+ "qa_column": "Dates",
1415
+ "qa_answer": [
1416
+ "17 Jan",
1417
+ "24 Jan",
1418
+ "31 Jan",
1419
+ "7 Feb",
1420
+ "14 Feb",
1421
+ "20 Feb",
1422
+ "28 Feb",
1423
+ "7 Mar",
1424
+ "14 Mar",
1425
+ "28 Mar",
1426
+ "11 Apr",
1427
+ "18 Apr",
1428
+ "25 Apr",
1429
+ "2 May",
1430
+ "9 May",
1431
+ "16 May",
1432
+ "24 May",
1433
+ "31 May",
1434
+ "6 Jun"
1435
+ ],
1436
+ "table": {
1437
+ "header": [
1438
+ "Dates"
1439
+ ],
1440
+ "rows": [
1441
+ [
1442
+ "14\u201317\u00a0Jan"
1443
+ ],
1444
+ [
1445
+ "21\u201324\u00a0Jan"
1446
+ ],
1447
+ [
1448
+ "28\u201331\u00a0Jan"
1449
+ ],
1450
+ [
1451
+ "4\u20137\u00a0Feb"
1452
+ ],
1453
+ [
1454
+ "11\u201314\u00a0Feb"
1455
+ ],
1456
+ [
1457
+ "17\u201320\u00a0Feb"
1458
+ ],
1459
+ [
1460
+ "24\u201328\u00a0Feb"
1461
+ ],
1462
+ [
1463
+ "4\u20137\u00a0Mar"
1464
+ ],
1465
+ [
1466
+ "11\u201314\u00a0Mar"
1467
+ ],
1468
+ [
1469
+ "25\u201328\u00a0Mar"
1470
+ ],
1471
+ [
1472
+ "8\u201311\u00a0Apr"
1473
+ ],
1474
+ [
1475
+ "15\u201318\u00a0Apr"
1476
+ ],
1477
+ [
1478
+ "22\u201325\u00a0Apr"
1479
+ ],
1480
+ [
1481
+ "29\u00a0Apr\u20132\u00a0May"
1482
+ ],
1483
+ [
1484
+ "6\u20139\u00a0May"
1485
+ ],
1486
+ [
1487
+ "13\u201316\u00a0May"
1488
+ ],
1489
+ [
1490
+ "21\u201324\u00a0May"
1491
+ ],
1492
+ [
1493
+ "28\u201331\u00a0May"
1494
+ ],
1495
+ [
1496
+ "3\u20136\u00a0Jun"
1497
+ ]
1498
+ ]
1499
+ },
1500
+ "title": "1999 European Tour"
1501
+ },
1502
+ {
1503
+ "id": 590,
1504
+ "qa_question": "map@Is it below 2:01",
1505
+ "qa_column": "Time",
1506
+ "qa_answer": [
1507
+ "yes",
1508
+ "yes",
1509
+ "yes",
1510
+ "no",
1511
+ "no",
1512
+ "no",
1513
+ "no"
1514
+ ],
1515
+ "table": {
1516
+ "header": [
1517
+ "Time"
1518
+ ],
1519
+ "rows": [
1520
+ [
1521
+ "2:00.06"
1522
+ ],
1523
+ [
1524
+ "2:00.11"
1525
+ ],
1526
+ [
1527
+ "2:00.77"
1528
+ ],
1529
+ [
1530
+ "2:01.39"
1531
+ ],
1532
+ [
1533
+ "2:01.53"
1534
+ ],
1535
+ [
1536
+ "2:01.86"
1537
+ ],
1538
+ [
1539
+ "2:02.64"
1540
+ ]
1541
+ ]
1542
+ },
1543
+ "title": "Athletics at the 2003 Summer Universiade \u2013 Women's 800 metres"
1544
+ },
1545
+ {
1546
+ "id": 543,
1547
+ "qa_question": "map@Is it april fools day?",
1548
+ "qa_column": "Date signed",
1549
+ "qa_answer": [
1550
+ "no",
1551
+ "no",
1552
+ "no",
1553
+ "yes",
1554
+ "no",
1555
+ "no",
1556
+ "no",
1557
+ "/",
1558
+ "no",
1559
+ "no",
1560
+ "no",
1561
+ "no",
1562
+ "no"
1563
+ ],
1564
+ "table": {
1565
+ "header": [
1566
+ "Date signed"
1567
+ ],
1568
+ "rows": [
1569
+ [
1570
+ "March 29"
1571
+ ],
1572
+ [
1573
+ "August 11"
1574
+ ],
1575
+ [
1576
+ "March 27"
1577
+ ],
1578
+ [
1579
+ "April 1"
1580
+ ],
1581
+ [
1582
+ "April 10"
1583
+ ],
1584
+ [
1585
+ "March 25"
1586
+ ],
1587
+ [
1588
+ "June 25"
1589
+ ],
1590
+ [
1591
+ "\u2013"
1592
+ ],
1593
+ [
1594
+ "March 13"
1595
+ ],
1596
+ [
1597
+ "March 15"
1598
+ ],
1599
+ [
1600
+ "March 15"
1601
+ ],
1602
+ [
1603
+ "March 19"
1604
+ ],
1605
+ [
1606
+ "July 29"
1607
+ ]
1608
+ ]
1609
+ },
1610
+ "title": "2013 Chicago Bears season"
1611
+ },
1612
+ {
1613
+ "id": 207,
1614
+ "qa_question": "map@What is the whole points scored?",
1615
+ "qa_column": "Result",
1616
+ "qa_answer": [
1617
+ "8",
1618
+ "5",
1619
+ "5",
1620
+ "18",
1621
+ "5",
1622
+ "5",
1623
+ "5",
1624
+ "7",
1625
+ "7",
1626
+ "13",
1627
+ "/",
1628
+ "22",
1629
+ "15",
1630
+ "16",
1631
+ "29",
1632
+ "17",
1633
+ "4",
1634
+ "10",
1635
+ "8"
1636
+ ],
1637
+ "table": {
1638
+ "header": [
1639
+ "Result"
1640
+ ],
1641
+ "rows": [
1642
+ [
1643
+ "2\u20133\\n3\u20130"
1644
+ ],
1645
+ [
1646
+ "2\u20133"
1647
+ ],
1648
+ [
1649
+ "5\u20130"
1650
+ ],
1651
+ [
1652
+ "0\u20130\\n5\u20130\\n13\u20130"
1653
+ ],
1654
+ [
1655
+ "2\u20133 (AET)"
1656
+ ],
1657
+ [
1658
+ "1\u20130\\n0\u20131\\n2\u20131"
1659
+ ],
1660
+ [
1661
+ "1\u20134"
1662
+ ],
1663
+ [
1664
+ "1\u20130\\n3\u20130\\n0\u20133"
1665
+ ],
1666
+ [
1667
+ "0\u20137"
1668
+ ],
1669
+ [
1670
+ "0\u20132\\n0\u20130\\n6\u20130\\n1\u20134"
1671
+ ],
1672
+ [
1673
+ "not played"
1674
+ ],
1675
+ [
1676
+ "0\u20131 1\u20130\\n12\u20130 8\u20130"
1677
+ ],
1678
+ [
1679
+ "1\u20132 1\u20130\\n7\u20130 4\u20130"
1680
+ ],
1681
+ [
1682
+ "0\u20135\\n2\u20134\\n1\u20134"
1683
+ ],
1684
+ [
1685
+ "21\u20130\\n8\u20130"
1686
+ ],
1687
+ [
1688
+ "17\u20130"
1689
+ ],
1690
+ [
1691
+ "3\u20131"
1692
+ ],
1693
+ [
1694
+ "1\u20131\\n1\u20133\\n1\u20133"
1695
+ ],
1696
+ [
1697
+ "0\u20133\\n1\u20131\\n1\u20132"
1698
+ ]
1699
+ ]
1700
+ },
1701
+ "title": "Australia women's national association football team"
1702
+ },
1703
+ {
1704
+ "id": 0,
1705
+ "qa_question": "map@Did Kansas State lost with 0 points?",
1706
+ "qa_column": "Losing team",
1707
+ "qa_answer": [
1708
+ "yes",
1709
+ "yes",
1710
+ "no",
1711
+ "yes",
1712
+ "no",
1713
+ "no",
1714
+ "no",
1715
+ "no",
1716
+ "yes",
1717
+ "no",
1718
+ "yes",
1719
+ "yes",
1720
+ "no",
1721
+ "yes",
1722
+ "yes",
1723
+ "no",
1724
+ "no",
1725
+ "yes",
1726
+ "no"
1727
+ ],
1728
+ "table": {
1729
+ "header": [
1730
+ "Losing team"
1731
+ ],
1732
+ "rows": [
1733
+ [
1734
+ "Kansas State 0"
1735
+ ],
1736
+ [
1737
+ "Kansas State 0"
1738
+ ],
1739
+ [
1740
+ "Kansas State 4"
1741
+ ],
1742
+ [
1743
+ "Kansas State 0"
1744
+ ],
1745
+ [
1746
+ "Kansas 4"
1747
+ ],
1748
+ [
1749
+ "Kansas State 10"
1750
+ ],
1751
+ [
1752
+ "Kansas State 6"
1753
+ ],
1754
+ [
1755
+ "Kansas State 3"
1756
+ ],
1757
+ [
1758
+ "Kansas State 0"
1759
+ ],
1760
+ [
1761
+ "Kansas State 6"
1762
+ ],
1763
+ [
1764
+ "Kansas State 0"
1765
+ ],
1766
+ [
1767
+ "Kansas State 0"
1768
+ ],
1769
+ [
1770
+ "Kansas State 7"
1771
+ ],
1772
+ [
1773
+ "Kansas State 0"
1774
+ ],
1775
+ [
1776
+ "Kansas State 0"
1777
+ ],
1778
+ [
1779
+ "Kansas State 7"
1780
+ ],
1781
+ [
1782
+ "Kansas State 3"
1783
+ ],
1784
+ [
1785
+ "Kansas State 0"
1786
+ ],
1787
+ [
1788
+ "Kansas State 7"
1789
+ ]
1790
+ ]
1791
+ },
1792
+ "title": "Kansas\u2013Kansas State football rivalry"
1793
+ },
1794
+ {
1795
+ "id": 5,
1796
+ "qa_question": "map@What is the value of in feet?",
1797
+ "qa_column": "Prominence",
1798
+ "qa_answer": [
1799
+ "10080",
1800
+ "1677",
1801
+ "7196",
1802
+ "2894",
1803
+ "9832",
1804
+ "2563",
1805
+ "1936",
1806
+ "1891",
1807
+ "2027",
1808
+ "2648",
1809
+ "2601",
1810
+ "1992",
1811
+ "2339",
1812
+ "2110",
1813
+ "1736"
1814
+ ],
1815
+ "table": {
1816
+ "header": [
1817
+ "Prominence"
1818
+ ],
1819
+ "rows": [
1820
+ [
1821
+ "10,080\u00a0ft\\n3072\u00a0m"
1822
+ ],
1823
+ [
1824
+ "1,677\u00a0ft\\n511\u00a0m"
1825
+ ],
1826
+ [
1827
+ "7,196\u00a0ft\\n2193\u00a0m"
1828
+ ],
1829
+ [
1830
+ "2,894\u00a0ft\\n882\u00a0m"
1831
+ ],
1832
+ [
1833
+ "9,832\u00a0ft\\n2997\u00a0m"
1834
+ ],
1835
+ [
1836
+ "2,563\u00a0ft\\n781\u00a0m"
1837
+ ],
1838
+ [
1839
+ "1,936\u00a0ft\\n590\u00a0m"
1840
+ ],
1841
+ [
1842
+ "1,891\u00a0ft\\n576\u00a0m"
1843
+ ],
1844
+ [
1845
+ "2,027\u00a0ft\\n618\u00a0m"
1846
+ ],
1847
+ [
1848
+ "2,648\u00a0ft\\n807\u00a0m"
1849
+ ],
1850
+ [
1851
+ "2,601\u00a0ft\\n793\u00a0m"
1852
+ ],
1853
+ [
1854
+ "1,992\u00a0ft\\n607\u00a0m"
1855
+ ],
1856
+ [
1857
+ "2,339\u00a0ft\\n713\u00a0m"
1858
+ ],
1859
+ [
1860
+ "2,110\u00a0ft\\n643\u00a0m"
1861
+ ],
1862
+ [
1863
+ "1,736\u00a0ft\\n529\u00a0m"
1864
+ ]
1865
+ ]
1866
+ },
1867
+ "title": "Highest mountain peaks of California"
1868
+ },
1869
+ {
1870
+ "id": 12,
1871
+ "qa_question": "map@What is the height in ft before / ?",
1872
+ "qa_column": "Height\\nft / m",
1873
+ "qa_answer": [
1874
+ "629",
1875
+ "555",
1876
+ "530",
1877
+ "512",
1878
+ "503",
1879
+ "485",
1880
+ "464",
1881
+ "456",
1882
+ "438",
1883
+ "408",
1884
+ "366",
1885
+ "357",
1886
+ "350",
1887
+ "348",
1888
+ "317",
1889
+ "314",
1890
+ "302",
1891
+ "286",
1892
+ "280",
1893
+ "267",
1894
+ "260",
1895
+ "260",
1896
+ "256",
1897
+ "253",
1898
+ "243",
1899
+ "226",
1900
+ "212",
1901
+ "202",
1902
+ "200"
1903
+ ],
1904
+ "table": {
1905
+ "header": [
1906
+ "Height\\nft / m"
1907
+ ],
1908
+ "rows": [
1909
+ [
1910
+ "629 / 192"
1911
+ ],
1912
+ [
1913
+ "555 / 169"
1914
+ ],
1915
+ [
1916
+ "530 / 162"
1917
+ ],
1918
+ [
1919
+ "512 / 156"
1920
+ ],
1921
+ [
1922
+ "503 / 153"
1923
+ ],
1924
+ [
1925
+ "485 / 148"
1926
+ ],
1927
+ [
1928
+ "464 / 141"
1929
+ ],
1930
+ [
1931
+ "456 / 139"
1932
+ ],
1933
+ [
1934
+ "438 / 134"
1935
+ ],
1936
+ [
1937
+ "408 / 124"
1938
+ ],
1939
+ [
1940
+ "366 / 112"
1941
+ ],
1942
+ [
1943
+ "357 / 109"
1944
+ ],
1945
+ [
1946
+ "350 / 107"
1947
+ ],
1948
+ [
1949
+ "348 / 106"
1950
+ ],
1951
+ [
1952
+ "317 / 97"
1953
+ ],
1954
+ [
1955
+ "314 / 96"
1956
+ ],
1957
+ [
1958
+ "302 / 92"
1959
+ ],
1960
+ [
1961
+ "286 / 87"
1962
+ ],
1963
+ [
1964
+ "280 / 85"
1965
+ ],
1966
+ [
1967
+ "267 / 81"
1968
+ ],
1969
+ [
1970
+ "260 / 79"
1971
+ ],
1972
+ [
1973
+ "260 / 79"
1974
+ ],
1975
+ [
1976
+ "256 / 78"
1977
+ ],
1978
+ [
1979
+ "253 / 77"
1980
+ ],
1981
+ [
1982
+ "243 / 74"
1983
+ ],
1984
+ [
1985
+ "226 / 69"
1986
+ ],
1987
+ [
1988
+ "212 / 64.6"
1989
+ ],
1990
+ [
1991
+ "202 / 59.4"
1992
+ ],
1993
+ [
1994
+ "200 / 57.9"
1995
+ ]
1996
+ ]
1997
+ },
1998
+ "title": "List of tallest buildings in Columbus, Ohio"
1999
+ },
2000
+ {
2001
+ "id": 13,
2002
+ "qa_question": "map@The number of series?",
2003
+ "qa_column": "DVD Title",
2004
+ "qa_answer": [
2005
+ "1",
2006
+ "2",
2007
+ "3",
2008
+ "4",
2009
+ "5",
2010
+ "6",
2011
+ "7",
2012
+ "8",
2013
+ "9",
2014
+ "10",
2015
+ "11",
2016
+ "12",
2017
+ "/",
2018
+ "/",
2019
+ "/",
2020
+ "/"
2021
+ ],
2022
+ "table": {
2023
+ "header": [
2024
+ "DVD Title"
2025
+ ],
2026
+ "rows": [
2027
+ [
2028
+ "Complete Series 1"
2029
+ ],
2030
+ [
2031
+ "Complete Series 2"
2032
+ ],
2033
+ [
2034
+ "Complete Series 3"
2035
+ ],
2036
+ [
2037
+ "Complete Series 4"
2038
+ ],
2039
+ [
2040
+ "Complete Series 5"
2041
+ ],
2042
+ [
2043
+ "Complete Series 6"
2044
+ ],
2045
+ [
2046
+ "Complete Series 7"
2047
+ ],
2048
+ [
2049
+ "Complete Series 8"
2050
+ ],
2051
+ [
2052
+ "Complete Series 9"
2053
+ ],
2054
+ [
2055
+ "Complete Series 10"
2056
+ ],
2057
+ [
2058
+ "Complete Series 11"
2059
+ ],
2060
+ [
2061
+ "Complete Series 12"
2062
+ ],
2063
+ [
2064
+ "The Christmas Specials"
2065
+ ],
2066
+ [
2067
+ "The Complete Collection"
2068
+ ],
2069
+ [
2070
+ "Two Ronnies In Australia"
2071
+ ],
2072
+ [
2073
+ "The Best of...Volume 1"
2074
+ ]
2075
+ ]
2076
+ },
2077
+ "title": "The Two Ronnies"
2078
+ },
2079
+ {
2080
+ "id": 15,
2081
+ "qa_question": "map@return the number before '-",
2082
+ "qa_column": "W\u2013L",
2083
+ "qa_answer": [
2084
+ "3",
2085
+ "5",
2086
+ "9",
2087
+ "2",
2088
+ "19",
2089
+ "0"
2090
+ ],
2091
+ "table": {
2092
+ "header": [
2093
+ "W\u2013L"
2094
+ ],
2095
+ "rows": [
2096
+ [
2097
+ "3\u20135"
2098
+ ],
2099
+ [
2100
+ "5\u20135"
2101
+ ],
2102
+ [
2103
+ "9\u20134"
2104
+ ],
2105
+ [
2106
+ "2\u20134"
2107
+ ],
2108
+ [
2109
+ "19\u201318"
2110
+ ],
2111
+ [
2112
+ "0\u20132"
2113
+ ]
2114
+ ]
2115
+ },
2116
+ "title": "\u0141ukasz Kubot"
2117
+ },
2118
+ {
2119
+ "id": 16,
2120
+ "qa_question": "map@Is it from alabama?",
2121
+ "qa_column": "Hometown",
2122
+ "qa_answer": [
2123
+ "no",
2124
+ "no",
2125
+ "yes",
2126
+ "no",
2127
+ "no",
2128
+ "no",
2129
+ "no",
2130
+ "no",
2131
+ "no",
2132
+ " no",
2133
+ "no",
2134
+ "no"
2135
+ ],
2136
+ "table": {
2137
+ "header": [
2138
+ "Hometown"
2139
+ ],
2140
+ "rows": [
2141
+ [
2142
+ "Chicago, IL, U.S."
2143
+ ],
2144
+ [
2145
+ "Oklahoma City, OK, U.S."
2146
+ ],
2147
+ [
2148
+ "Montgomery, AL, U.S."
2149
+ ],
2150
+ [
2151
+ "Greenville, MS, U.S."
2152
+ ],
2153
+ [
2154
+ "Stone Mountain, GA, U.S."
2155
+ ],
2156
+ [
2157
+ "Douala, Cameroon"
2158
+ ],
2159
+ [
2160
+ "San Antonio, TX, U.S."
2161
+ ],
2162
+ [
2163
+ "Orlando, FL, U.S."
2164
+ ],
2165
+ [
2166
+ "Conway, AR, U.S."
2167
+ ],
2168
+ [
2169
+ "London, England, U.K."
2170
+ ],
2171
+ [
2172
+ "Nashville, TN, U.S."
2173
+ ],
2174
+ [
2175
+ "Wichita, KS, U.S."
2176
+ ]
2177
+ ]
2178
+ },
2179
+ "title": "2010\u201311 UAB Blazers men's basketball team"
2180
+ },
2181
+ {
2182
+ "id": 17,
2183
+ "qa_question": "map@value of elevation in feet",
2184
+ "qa_column": "Elevation",
2185
+ "qa_answer": [
2186
+ "14505",
2187
+ "14379",
2188
+ "14252",
2189
+ "14248",
2190
+ "14179",
2191
+ "13992",
2192
+ "13982",
2193
+ "13837",
2194
+ "13807",
2195
+ "13758",
2196
+ "13747",
2197
+ "13657",
2198
+ "13565",
2199
+ "13500",
2200
+ "13162"
2201
+ ],
2202
+ "table": {
2203
+ "header": [
2204
+ "Elevation"
2205
+ ],
2206
+ "rows": [
2207
+ [
2208
+ "14,505\u00a0ft\\n4421\u00a0m"
2209
+ ],
2210
+ [
2211
+ "14,379\u00a0ft\\n4383\u00a0m"
2212
+ ],
2213
+ [
2214
+ "14,252\u00a0ft\\n4344\u00a0m"
2215
+ ],
2216
+ [
2217
+ "14,248\u00a0ft\\n4343\u00a0m"
2218
+ ],
2219
+ [
2220
+ "14,179\u00a0ft\\n4322\u00a0m"
2221
+ ],
2222
+ [
2223
+ "13,992\u00a0ft\\n4265\u00a0m"
2224
+ ],
2225
+ [
2226
+ "13,982\u00a0ft\\n4262\u00a0m"
2227
+ ],
2228
+ [
2229
+ "13,837\u00a0ft\\n4218\u00a0m"
2230
+ ],
2231
+ [
2232
+ "13,807\u00a0ft\\n4209\u00a0m"
2233
+ ],
2234
+ [
2235
+ "13,758\u00a0ft\\n4193\u00a0m"
2236
+ ],
2237
+ [
2238
+ "13,747\u00a0ft\\n4190\u00a0m"
2239
+ ],
2240
+ [
2241
+ "13,657\u00a0ft\\n4163\u00a0m"
2242
+ ],
2243
+ [
2244
+ "13,565\u00a0ft\\n4135\u00a0m"
2245
+ ],
2246
+ [
2247
+ "13,500\u00a0ft\\n4115\u00a0m"
2248
+ ],
2249
+ [
2250
+ "13,162\u00a0ft\\n4012\u00a0m"
2251
+ ]
2252
+ ]
2253
+ },
2254
+ "title": "Highest mountain peaks of California"
2255
+ },
2256
+ {
2257
+ "id": 19,
2258
+ "qa_question": "map@What is his/her country?",
2259
+ "qa_column": "Driver",
2260
+ "qa_answer": [
2261
+ "uk",
2262
+ "us",
2263
+ "uk",
2264
+ "australia",
2265
+ "south africa",
2266
+ "new zealand",
2267
+ "uk",
2268
+ "uk",
2269
+ "us",
2270
+ "netherlands",
2271
+ "switzerland",
2272
+ "belgium",
2273
+ "scotland",
2274
+ "france",
2275
+ "uk",
2276
+ "us",
2277
+ "uk",
2278
+ "us",
2279
+ "uk"
2280
+ ],
2281
+ "table": {
2282
+ "header": [
2283
+ "Driver"
2284
+ ],
2285
+ "rows": [
2286
+ [
2287
+ "Jim Clark"
2288
+ ],
2289
+ [
2290
+ "Richie Ginther"
2291
+ ],
2292
+ [
2293
+ "Graham Hill"
2294
+ ],
2295
+ [
2296
+ "Jack Brabham"
2297
+ ],
2298
+ [
2299
+ "Tony Maggs"
2300
+ ],
2301
+ [
2302
+ "Bruce McLaren"
2303
+ ],
2304
+ [
2305
+ "Mike Hailwood"
2306
+ ],
2307
+ [
2308
+ "Ian Burgess"
2309
+ ],
2310
+ [
2311
+ "Peter Revson"
2312
+ ],
2313
+ [
2314
+ "Carel Godin de Beaufort"
2315
+ ],
2316
+ [
2317
+ "Jo Siffert"
2318
+ ],
2319
+ [
2320
+ "Andr\u00e9 Pilette"
2321
+ ],
2322
+ [
2323
+ "Innes Ireland"
2324
+ ],
2325
+ [
2326
+ "Bernard Collomb"
2327
+ ],
2328
+ [
2329
+ "Ian Raby"
2330
+ ],
2331
+ [
2332
+ "Dan Gurney"
2333
+ ],
2334
+ [
2335
+ "Mike Beckwith"
2336
+ ],
2337
+ [
2338
+ "Masten Gregory"
2339
+ ],
2340
+ [
2341
+ "Trevor Taylor"
2342
+ ]
2343
+ ]
2344
+ },
2345
+ "title": "1963 International Gold Cup"
2346
+ },
2347
+ {
2348
+ "id": 23,
2349
+ "qa_question": "map@What is the number?",
2350
+ "qa_column": "Serial format",
2351
+ "qa_answer": [
2352
+ "12345",
2353
+ "123",
2354
+ "123",
2355
+ "123",
2356
+ "123",
2357
+ "123",
2358
+ "123",
2359
+ "123",
2360
+ "123"
2361
+ ],
2362
+ "table": {
2363
+ "header": [
2364
+ "Serial format"
2365
+ ],
2366
+ "rows": [
2367
+ [
2368
+ "A-12345"
2369
+ ],
2370
+ [
2371
+ "ABC-123"
2372
+ ],
2373
+ [
2374
+ "ABC-123"
2375
+ ],
2376
+ [
2377
+ "ABC-123"
2378
+ ],
2379
+ [
2380
+ "ABC-123"
2381
+ ],
2382
+ [
2383
+ "ABC-123"
2384
+ ],
2385
+ [
2386
+ "ABC-123"
2387
+ ],
2388
+ [
2389
+ "ABC-123"
2390
+ ],
2391
+ [
2392
+ "123\u00b7ABC"
2393
+ ]
2394
+ ]
2395
+ },
2396
+ "title": "Vehicle registration plates of Arizona"
2397
+ },
2398
+ {
2399
+ "id": 28,
2400
+ "qa_question": "map@Is sabaru impreza?",
2401
+ "qa_column": "Car",
2402
+ "qa_answer": [
2403
+ "no",
2404
+ "no",
2405
+ "yes",
2406
+ "no",
2407
+ "no",
2408
+ "no",
2409
+ "yes",
2410
+ "no",
2411
+ "no",
2412
+ "yes",
2413
+ "yes",
2414
+ "yes",
2415
+ "yes",
2416
+ "yes",
2417
+ "yes",
2418
+ "yes",
2419
+ "yes"
2420
+ ],
2421
+ "table": {
2422
+ "header": [
2423
+ "Car"
2424
+ ],
2425
+ "rows": [
2426
+ [
2427
+ "Toyota Celica ST 185"
2428
+ ],
2429
+ [
2430
+ "Toyota Supra"
2431
+ ],
2432
+ [
2433
+ "Subaru Impreza"
2434
+ ],
2435
+ [
2436
+ "Toyota Supra"
2437
+ ],
2438
+ [
2439
+ "Mitsubishi Lancer Evo 4"
2440
+ ],
2441
+ [
2442
+ "-"
2443
+ ],
2444
+ [
2445
+ "Subaru Impreza WRX"
2446
+ ],
2447
+ [
2448
+ "Mitsubishi Lancer Evo 4"
2449
+ ],
2450
+ [
2451
+ "Mitsubishi Lancer Evo 4"
2452
+ ],
2453
+ [
2454
+ "Subaru Impreza N10"
2455
+ ],
2456
+ [
2457
+ "Subaru Impreza N8"
2458
+ ],
2459
+ [
2460
+ "Subaru Impreza N10"
2461
+ ],
2462
+ [
2463
+ "Subaru Impreza N10"
2464
+ ],
2465
+ [
2466
+ "Subaru Impreza N8"
2467
+ ],
2468
+ [
2469
+ "Subaru Impreza N10"
2470
+ ],
2471
+ [
2472
+ "Subaru Impreza N8"
2473
+ ],
2474
+ [
2475
+ "Subaru Impreza N12"
2476
+ ]
2477
+ ]
2478
+ },
2479
+ "title": "Pearl of Africa Rally"
2480
+ },
2481
+ {
2482
+ "id": 30,
2483
+ "qa_question": "map@When did it scrapped",
2484
+ "qa_column": "Fate",
2485
+ "qa_answer": [
2486
+ "1972",
2487
+ "1974",
2488
+ "1959",
2489
+ "1960",
2490
+ "1969",
2491
+ "1966",
2492
+ "1960",
2493
+ "1966",
2494
+ "1964",
2495
+ "1970",
2496
+ "1959",
2497
+ "1963",
2498
+ "1960",
2499
+ "1960",
2500
+ "1971",
2501
+ "1963",
2502
+ "1960",
2503
+ "1977",
2504
+ "1966"
2505
+ ],
2506
+ "table": {
2507
+ "header": [
2508
+ "Fate"
2509
+ ],
2510
+ "rows": [
2511
+ [
2512
+ "Scrapped in 1972"
2513
+ ],
2514
+ [
2515
+ "Scrapped in 1974"
2516
+ ],
2517
+ [
2518
+ "Scrapped in 1959"
2519
+ ],
2520
+ [
2521
+ "Scrapped in 1960"
2522
+ ],
2523
+ [
2524
+ "Scrapped in 1969"
2525
+ ],
2526
+ [
2527
+ "Scrapped in 1966"
2528
+ ],
2529
+ [
2530
+ "Scrapped in 1960"
2531
+ ],
2532
+ [
2533
+ "Scrapped in 1966"
2534
+ ],
2535
+ [
2536
+ "Scrapped in 1964"
2537
+ ],
2538
+ [
2539
+ "Scrapped in 1970"
2540
+ ],
2541
+ [
2542
+ "Scrapped in 1959"
2543
+ ],
2544
+ [
2545
+ "Scrapped in 1963"
2546
+ ],
2547
+ [
2548
+ "Sold as oil hulk in 1960"
2549
+ ],
2550
+ [
2551
+ "Scrapped in 1960"
2552
+ ],
2553
+ [
2554
+ "Scrapped in 1971"
2555
+ ],
2556
+ [
2557
+ "Scrapped in 1963"
2558
+ ],
2559
+ [
2560
+ "Scrapped in 1960"
2561
+ ],
2562
+ [
2563
+ "Scrapped in 1977"
2564
+ ],
2565
+ [
2566
+ "Scrapped in 1966"
2567
+ ]
2568
+ ]
2569
+ },
2570
+ "title": "Wave-class oiler"
2571
+ },
2572
+ {
2573
+ "id": 31,
2574
+ "qa_question": "map@What is the max number instead of year?",
2575
+ "qa_column": "Top speed (km/h)",
2576
+ "qa_answer": [
2577
+ "/",
2578
+ "90",
2579
+ "164",
2580
+ "140",
2581
+ "160",
2582
+ "250",
2583
+ "401.3",
2584
+ "/",
2585
+ "36",
2586
+ "75",
2587
+ "412.6",
2588
+ "450",
2589
+ "/",
2590
+ "501"
2591
+ ],
2592
+ "table": {
2593
+ "header": [
2594
+ "Top speed (km/h)"
2595
+ ],
2596
+ "rows": [
2597
+ [
2598
+ ""
2599
+ ],
2600
+ [
2601
+ "90 (1971)"
2602
+ ],
2603
+ [
2604
+ "164 (October 1971)"
2605
+ ],
2606
+ [
2607
+ "140 (September 1972)"
2608
+ ],
2609
+ [
2610
+ "160 / 230 (1974)\u00a0?"
2611
+ ],
2612
+ [
2613
+ "250 (end 1973), 253.2 (21 November 1977)"
2614
+ ],
2615
+ [
2616
+ "401.3 (1974)"
2617
+ ],
2618
+ [
2619
+ ""
2620
+ ],
2621
+ [
2622
+ "36 (or 40\u00a0?)"
2623
+ ],
2624
+ [
2625
+ "75"
2626
+ ],
2627
+ [
2628
+ "302 (1984), 355 (1985), 392 (1987), 406 (1987), 412.6 (January 1988)"
2629
+ ],
2630
+ [
2631
+ "436 (1989), 450 (17 June 1993)"
2632
+ ],
2633
+ [
2634
+ ""
2635
+ ],
2636
+ [
2637
+ "501 (12 November 2003)"
2638
+ ]
2639
+ ]
2640
+ },
2641
+ "title": "Transrapid"
2642
+ },
2643
+ {
2644
+ "id": 44,
2645
+ "qa_question": "map@Is it in united states?",
2646
+ "qa_column": "Location",
2647
+ "qa_answer": [
2648
+ "no",
2649
+ "no",
2650
+ "no",
2651
+ "no",
2652
+ "no",
2653
+ "no",
2654
+ "no",
2655
+ "france",
2656
+ "yes",
2657
+ "no",
2658
+ "no",
2659
+ "no",
2660
+ "no",
2661
+ "no",
2662
+ "yes",
2663
+ "no",
2664
+ "no",
2665
+ "no",
2666
+ "no",
2667
+ "no",
2668
+ "no",
2669
+ "no",
2670
+ "no",
2671
+ "no",
2672
+ "yes",
2673
+ "no",
2674
+ "no",
2675
+ "no",
2676
+ "no",
2677
+ "no",
2678
+ "no",
2679
+ "no",
2680
+ "no"
2681
+ ],
2682
+ "table": {
2683
+ "header": [
2684
+ "Location"
2685
+ ],
2686
+ "rows": [
2687
+ [
2688
+ "Basel"
2689
+ ],
2690
+ [
2691
+ "Rome"
2692
+ ],
2693
+ [
2694
+ "Moscow"
2695
+ ],
2696
+ [
2697
+ "Prague"
2698
+ ],
2699
+ [
2700
+ "Dortmund"
2701
+ ],
2702
+ [
2703
+ "Ljubljana"
2704
+ ],
2705
+ [
2706
+ "Varna"
2707
+ ],
2708
+ [
2709
+ "Strasbourg"
2710
+ ],
2711
+ [
2712
+ "Fort Worth"
2713
+ ],
2714
+ [
2715
+ "Moscow"
2716
+ ],
2717
+ [
2718
+ "Budapest"
2719
+ ],
2720
+ [
2721
+ "Montreal"
2722
+ ],
2723
+ [
2724
+ "Rotterdam"
2725
+ ],
2726
+ [
2727
+ "Stuttgart"
2728
+ ],
2729
+ [
2730
+ "Indianapolis"
2731
+ ],
2732
+ [
2733
+ "Paris"
2734
+ ],
2735
+ [
2736
+ "Birmingham"
2737
+ ],
2738
+ [
2739
+ "Brisbane"
2740
+ ],
2741
+ [
2742
+ "Sabae"
2743
+ ],
2744
+ [
2745
+ "San Juan"
2746
+ ],
2747
+ [
2748
+ "Lausanne"
2749
+ ],
2750
+ [
2751
+ "Tianjin"
2752
+ ],
2753
+ [
2754
+ "Ghent"
2755
+ ],
2756
+ [
2757
+ "Debrecen"
2758
+ ],
2759
+ [
2760
+ "Anaheim"
2761
+ ],
2762
+ [
2763
+ "Melbourne"
2764
+ ],
2765
+ [
2766
+ "Aarhus"
2767
+ ],
2768
+ [
2769
+ "Stuttgart"
2770
+ ],
2771
+ [
2772
+ "London"
2773
+ ],
2774
+ [
2775
+ "Rotterdam"
2776
+ ],
2777
+ [
2778
+ "Tokyo"
2779
+ ],
2780
+ [
2781
+ "Antwerp"
2782
+ ],
2783
+ [
2784
+ "Nanning"
2785
+ ]
2786
+ ]
2787
+ },
2788
+ "title": "World Artistic Gymnastics Championships \u2013 Women's floor"
2789
+ },
2790
+ {
2791
+ "id": 65,
2792
+ "qa_question": "map@Which country is the company from?",
2793
+ "qa_column": "Company (country)",
2794
+ "qa_answer": [
2795
+ "us",
2796
+ "us",
2797
+ "uk",
2798
+ "us",
2799
+ "us",
2800
+ "us",
2801
+ "france",
2802
+ "italy",
2803
+ "us",
2804
+ "us",
2805
+ "france",
2806
+ "china",
2807
+ "us",
2808
+ "us",
2809
+ "france",
2810
+ "us",
2811
+ "uk",
2812
+ "us",
2813
+ "us",
2814
+ "us",
2815
+ "us"
2816
+ ],
2817
+ "table": {
2818
+ "header": [
2819
+ "Company (country)"
2820
+ ],
2821
+ "rows": [
2822
+ [
2823
+ "Lockheed Martin"
2824
+ ],
2825
+ [
2826
+ "Boeing"
2827
+ ],
2828
+ [
2829
+ "BAE Systems"
2830
+ ],
2831
+ [
2832
+ "General Dynamics"
2833
+ ],
2834
+ [
2835
+ "Raytheon"
2836
+ ],
2837
+ [
2838
+ "Northrop Grumman"
2839
+ ],
2840
+ [
2841
+ "Airbus Group"
2842
+ ],
2843
+ [
2844
+ "Finmeccanica"
2845
+ ],
2846
+ [
2847
+ "L-3 Communications"
2848
+ ],
2849
+ [
2850
+ "United Technologies Corporation"
2851
+ ],
2852
+ [
2853
+ "Thales Group"
2854
+ ],
2855
+ [
2856
+ "SAIC"
2857
+ ],
2858
+ [
2859
+ "Huntington Ingalls Industries"
2860
+ ],
2861
+ [
2862
+ "Honeywell"
2863
+ ],
2864
+ [
2865
+ "SAFRAN"
2866
+ ],
2867
+ [
2868
+ "Computer Sciences Corp."
2869
+ ],
2870
+ [
2871
+ "Rolls-Royce"
2872
+ ],
2873
+ [
2874
+ "United Aircraft Corporation"
2875
+ ],
2876
+ [
2877
+ "Oshkosh Corporation"
2878
+ ],
2879
+ [
2880
+ "General Electric"
2881
+ ],
2882
+ [
2883
+ "ITT Corp."
2884
+ ]
2885
+ ]
2886
+ },
2887
+ "title": "List of defense contractors"
2888
+ },
2889
+ {
2890
+ "id": 73,
2891
+ "qa_question": "map@Is the result a win?",
2892
+ "qa_column": "Result",
2893
+ "qa_answer": [
2894
+ "yes",
2895
+ "no",
2896
+ "yes",
2897
+ "yes",
2898
+ "yes",
2899
+ "no",
2900
+ "no",
2901
+ "no",
2902
+ "no",
2903
+ "yes"
2904
+ ],
2905
+ "table": {
2906
+ "header": [
2907
+ "Result"
2908
+ ],
2909
+ "rows": [
2910
+ [
2911
+ "W\u00a044\u20136"
2912
+ ],
2913
+ [
2914
+ "L\u00a014\u201324"
2915
+ ],
2916
+ [
2917
+ "W\u00a025\u201320"
2918
+ ],
2919
+ [
2920
+ "W\u00a029\u201322"
2921
+ ],
2922
+ [
2923
+ "W\u00a034\u201327"
2924
+ ],
2925
+ [
2926
+ "L\u00a010\u201341"
2927
+ ],
2928
+ [
2929
+ "L\u00a010\u201313"
2930
+ ],
2931
+ [
2932
+ "L\u00a06\u201320"
2933
+ ],
2934
+ [
2935
+ "L\u00a017\u201337"
2936
+ ],
2937
+ [
2938
+ "W\u00a031\u201317"
2939
+ ]
2940
+ ]
2941
+ },
2942
+ "title": "1987 Oregon Ducks football team"
2943
+ },
2944
+ {
2945
+ "id": 88,
2946
+ "qa_question": "map@NA inside?",
2947
+ "qa_column": "Regions",
2948
+ "qa_answer": [
2949
+ "no",
2950
+ "yes",
2951
+ "yes",
2952
+ "no",
2953
+ "yes",
2954
+ "yes",
2955
+ "no",
2956
+ "yes",
2957
+ "no",
2958
+ "no",
2959
+ "no",
2960
+ "no"
2961
+ ],
2962
+ "table": {
2963
+ "header": [
2964
+ "Regions"
2965
+ ],
2966
+ "rows": [
2967
+ [
2968
+ "JP"
2969
+ ],
2970
+ [
2971
+ "JP, NA"
2972
+ ],
2973
+ [
2974
+ "JP, NA"
2975
+ ],
2976
+ [
2977
+ "JP"
2978
+ ],
2979
+ [
2980
+ "JP, NA"
2981
+ ],
2982
+ [
2983
+ "JP, NA"
2984
+ ],
2985
+ [
2986
+ "JP"
2987
+ ],
2988
+ [
2989
+ "JP, NA"
2990
+ ],
2991
+ [
2992
+ "JP"
2993
+ ],
2994
+ [
2995
+ "JP"
2996
+ ],
2997
+ [
2998
+ "JP"
2999
+ ],
3000
+ [
3001
+ "JP"
3002
+ ]
3003
+ ]
3004
+ },
3005
+ "title": "Super Chinese"
3006
+ },
3007
+ {
3008
+ "id": 103,
3009
+ "qa_question": "map@Is he/she a brazilian?",
3010
+ "qa_column": "Gold",
3011
+ "qa_answer": [
3012
+ "no",
3013
+ "no",
3014
+ "no",
3015
+ "no",
3016
+ "no",
3017
+ "no",
3018
+ "no",
3019
+ "no",
3020
+ "no",
3021
+ "no",
3022
+ "no",
3023
+ "no",
3024
+ "no",
3025
+ "no",
3026
+ " no",
3027
+ "no",
3028
+ "no",
3029
+ "no",
3030
+ "no",
3031
+ "no",
3032
+ "no",
3033
+ "no",
3034
+ "no",
3035
+ "no",
3036
+ "yes",
3037
+ "no",
3038
+ "no",
3039
+ "no",
3040
+ "no",
3041
+ "no",
3042
+ "no",
3043
+ "no",
3044
+ "/"
3045
+ ],
3046
+ "table": {
3047
+ "header": [
3048
+ "Gold"
3049
+ ],
3050
+ "rows": [
3051
+ [
3052
+ "Helena Rakoczy"
3053
+ ],
3054
+ [
3055
+ "Tamara Manina"
3056
+ ],
3057
+ [
3058
+ "Eva Bos\u00e1kov\u00e1"
3059
+ ],
3060
+ [
3061
+ "Larisa Latynina"
3062
+ ],
3063
+ [
3064
+ "Natalia Kutchinskaya"
3065
+ ],
3066
+ [
3067
+ "Ludmila Turicheva"
3068
+ ],
3069
+ [
3070
+ "Ludmila Turicheva"
3071
+ ],
3072
+ [
3073
+ "Nellie Kim\\n Elena Mukhina"
3074
+ ],
3075
+ [
3076
+ "Emilia Eberle"
3077
+ ],
3078
+ [
3079
+ "Natalia Ilienko"
3080
+ ],
3081
+ [
3082
+ "Ekaterina Szabo"
3083
+ ],
3084
+ [
3085
+ "Oksana Omelianchik"
3086
+ ],
3087
+ [
3088
+ "Daniela Siliva\u015f\\n Elena Shushunova"
3089
+ ],
3090
+ [
3091
+ "Daniela Siliva\u015f\\n Svetlana Boguinskaya"
3092
+ ],
3093
+ [
3094
+ "Cristina Bonta\u015f\\n Oksana Chusovitina"
3095
+ ],
3096
+ [
3097
+ "Kim Zmeskal"
3098
+ ],
3099
+ [
3100
+ "Shannon Miller"
3101
+ ],
3102
+ [
3103
+ "Dina Kochetkova"
3104
+ ],
3105
+ [
3106
+ "Gina Gogean"
3107
+ ],
3108
+ [
3109
+ "Kui Yuan-Yuan\\n Gina Gogean"
3110
+ ],
3111
+ [
3112
+ "Gina Gogean"
3113
+ ],
3114
+ [
3115
+ "Andreea R\u0103ducan"
3116
+ ],
3117
+ [
3118
+ "Andreea R\u0103ducan"
3119
+ ],
3120
+ [
3121
+ "Elena G\u00f3mez"
3122
+ ],
3123
+ [
3124
+ "Daiane Dos Santos"
3125
+ ],
3126
+ [
3127
+ "Alicia Sacramone"
3128
+ ],
3129
+ [
3130
+ "Cheng Fei"
3131
+ ],
3132
+ [
3133
+ "Shawn Johnson"
3134
+ ],
3135
+ [
3136
+ "Elizabeth Tweddle"
3137
+ ],
3138
+ [
3139
+ "Lauren Mitchell"
3140
+ ],
3141
+ [
3142
+ "Ksenia Afanasyeva"
3143
+ ],
3144
+ [
3145
+ "Simone Biles"
3146
+ ],
3147
+ [
3148
+ ""
3149
+ ]
3150
+ ]
3151
+ },
3152
+ "title": "World Artistic Gymnastics Championships \u2013 Women's floor"
3153
+ },
3154
+ {
3155
+ "id": 124,
3156
+ "qa_question": "map@What is the country?",
3157
+ "qa_column": "Team",
3158
+ "qa_answer": [
3159
+ "switzerland",
3160
+ "italy",
3161
+ "united states",
3162
+ "switzerland",
3163
+ "italy",
3164
+ "germany",
3165
+ "austria",
3166
+ "germany",
3167
+ "spain",
3168
+ "austria",
3169
+ "norway",
3170
+ "great britain",
3171
+ "sweden",
3172
+ "romania",
3173
+ "poland",
3174
+ "sweden",
3175
+ "great britain",
3176
+ "france",
3177
+ "united states",
3178
+ "romania"
3179
+ ],
3180
+ "table": {
3181
+ "header": [
3182
+ "Team"
3183
+ ],
3184
+ "rows": [
3185
+ [
3186
+ "Switzerland\u00a0(SUI) Switzerland I"
3187
+ ],
3188
+ [
3189
+ "Italy\u00a0(ITA) Italy II"
3190
+ ],
3191
+ [
3192
+ "United States\u00a0(USA) USA I"
3193
+ ],
3194
+ [
3195
+ "Switzerland\u00a0(SUI) Switzerland II"
3196
+ ],
3197
+ [
3198
+ "Italy\u00a0(ITA) Italy I"
3199
+ ],
3200
+ [
3201
+ "Germany\u00a0(GER) Germany I"
3202
+ ],
3203
+ [
3204
+ "Austria\u00a0(AUT) Austria II"
3205
+ ],
3206
+ [
3207
+ "Germany\u00a0(GER) Germany II"
3208
+ ],
3209
+ [
3210
+ "Spain\u00a0(ESP) Spain I"
3211
+ ],
3212
+ [
3213
+ "Austria\u00a0(AUT) Austria I"
3214
+ ],
3215
+ [
3216
+ "Norway\u00a0(NOR) Norway I"
3217
+ ],
3218
+ [
3219
+ "Great Britain\u00a0(GBR) Great Britain I"
3220
+ ],
3221
+ [
3222
+ "Sweden\u00a0(SWE) Sweden II"
3223
+ ],
3224
+ [
3225
+ "Romania\u00a0(ROU) Romania I"
3226
+ ],
3227
+ [
3228
+ "Poland\u00a0(POL) Poland I"
3229
+ ],
3230
+ [
3231
+ "Sweden\u00a0(SWE) Sweden I"
3232
+ ],
3233
+ [
3234
+ "Great Britain\u00a0(GBR) Great Britain II"
3235
+ ],
3236
+ [
3237
+ "France\u00a0(FRA) France I"
3238
+ ],
3239
+ [
3240
+ "United States\u00a0(USA) USA II"
3241
+ ],
3242
+ [
3243
+ "Romania\u00a0(ROU) Romania II"
3244
+ ]
3245
+ ]
3246
+ },
3247
+ "title": "Bobsleigh at the 1956 Winter Olympics \u2013 Four-man"
3248
+ },
3249
+ {
3250
+ "id": 127,
3251
+ "qa_question": "map@What is the last name?",
3252
+ "qa_column": "Menteri Besar",
3253
+ "qa_answer": [
3254
+ "Mohamed",
3255
+ "Mahbob",
3256
+ "Jaafar",
3257
+ "Jaafar",
3258
+ "Yusof",
3259
+ "Majid",
3260
+ "Jaafar",
3261
+ "Mohamed"
3262
+ ],
3263
+ "table": {
3264
+ "header": [
3265
+ "Menteri Besar"
3266
+ ],
3267
+ "rows": [
3268
+ [
3269
+ "Jaafar Mohamed"
3270
+ ],
3271
+ [
3272
+ "Mohamed Mahbob"
3273
+ ],
3274
+ [
3275
+ "Abdullah Jaafar"
3276
+ ],
3277
+ [
3278
+ "Mustapha Jaafar"
3279
+ ],
3280
+ [
3281
+ "Abdul Hamid Yusof"
3282
+ ],
3283
+ [
3284
+ "Ungku Abdul Aziz Abdul Majid"
3285
+ ],
3286
+ [
3287
+ "Onn Jaafar"
3288
+ ],
3289
+ [
3290
+ "Syed Abdul Kadir Mohamed"
3291
+ ]
3292
+ ]
3293
+ },
3294
+ "title": "Menteri Besar of Johor"
3295
+ },
3296
+ {
3297
+ "id": 131,
3298
+ "qa_question": "map@Is it a tie competition?",
3299
+ "qa_column": "Place",
3300
+ "qa_answer": [
3301
+ "yes",
3302
+ "yes",
3303
+ "no",
3304
+ "no",
3305
+ "no",
3306
+ "no",
3307
+ "yes",
3308
+ "yes",
3309
+ "no",
3310
+ "no",
3311
+ "no"
3312
+ ],
3313
+ "table": {
3314
+ "header": [
3315
+ "Place"
3316
+ ],
3317
+ "rows": [
3318
+ [
3319
+ "4th (tie)"
3320
+ ],
3321
+ [
3322
+ "10th (tie)"
3323
+ ],
3324
+ [
3325
+ "8th"
3326
+ ],
3327
+ [
3328
+ "1st"
3329
+ ],
3330
+ [
3331
+ "12th"
3332
+ ],
3333
+ [
3334
+ "2nd"
3335
+ ],
3336
+ [
3337
+ "4th (tie)"
3338
+ ],
3339
+ [
3340
+ "10th (tie)"
3341
+ ],
3342
+ [
3343
+ "9th"
3344
+ ],
3345
+ [
3346
+ "6th"
3347
+ ],
3348
+ [
3349
+ "7th"
3350
+ ]
3351
+ ]
3352
+ },
3353
+ "title": "Israel in the Eurovision Song Contest 1986"
3354
+ },
3355
+ {
3356
+ "id": 134,
3357
+ "qa_question": "map@Is it a win?",
3358
+ "qa_column": "Result",
3359
+ "qa_answer": [
3360
+ "yes",
3361
+ "yes",
3362
+ "no",
3363
+ "yes",
3364
+ "yes",
3365
+ "yes",
3366
+ "yes",
3367
+ "yes",
3368
+ "yes",
3369
+ "yes",
3370
+ "no"
3371
+ ],
3372
+ "table": {
3373
+ "header": [
3374
+ "Result"
3375
+ ],
3376
+ "rows": [
3377
+ [
3378
+ "W\u00a010-0"
3379
+ ],
3380
+ [
3381
+ "W\u00a038-7"
3382
+ ],
3383
+ [
3384
+ "L\u00a028-29"
3385
+ ],
3386
+ [
3387
+ "W\u00a035-7"
3388
+ ],
3389
+ [
3390
+ "W\u00a046-0"
3391
+ ],
3392
+ [
3393
+ "W\u00a027-6"
3394
+ ],
3395
+ [
3396
+ "W\u00a035-15"
3397
+ ],
3398
+ [
3399
+ "W\u00a042-0"
3400
+ ],
3401
+ [
3402
+ "W\u00a035-0"
3403
+ ],
3404
+ [
3405
+ "W\u00a035-7"
3406
+ ],
3407
+ [
3408
+ "L\u00a06-14"
3409
+ ]
3410
+ ]
3411
+ },
3412
+ "title": "1977 Ohio State Buckeyes football team"
3413
+ },
3414
+ {
3415
+ "id": 138,
3416
+ "qa_question": "map@What is the album name?",
3417
+ "qa_column": "Album",
3418
+ "qa_answer": [
3419
+ "Go West Young Man",
3420
+ "Change Your World",
3421
+ "I'll Lead You Home",
3422
+ "Live the Life",
3423
+ "Christmastime"
3424
+ ],
3425
+ "table": {
3426
+ "header": [
3427
+ "Album"
3428
+ ],
3429
+ "rows": [
3430
+ [
3431
+ "Go West Young Man\\n\\nReleased: October 1, 1990\\nLabel: Reunion\\nFormat: CD"
3432
+ ],
3433
+ [
3434
+ "Change Your World\\n\\nReleased: September 1, 1992\\nLabel: Reunion\\nFormat: CD"
3435
+ ],
3436
+ [
3437
+ "I'll Lead You Home\\n\\nReleased: August 29, 1995\\nLabel: Reunion\\nFormat: CD"
3438
+ ],
3439
+ [
3440
+ "Live the Life\\n\\nReleased: April 28, 1998\\nLabel: Reunion\\nFormat: CD"
3441
+ ],
3442
+ [
3443
+ "Christmastime\\n\\nReleased: October 13, 1998\\nLabel: Reunion\\nFormat: CD"
3444
+ ]
3445
+ ]
3446
+ },
3447
+ "title": "Michael W. Smith discography"
3448
+ },
3449
+ {
3450
+ "id": 139,
3451
+ "qa_question": "map@What is the number in km?",
3452
+ "qa_column": "Event",
3453
+ "qa_answer": [
3454
+ "20",
3455
+ "20",
3456
+ "20",
3457
+ "50",
3458
+ "20",
3459
+ "50",
3460
+ "5",
3461
+ "50",
3462
+ "50",
3463
+ "50",
3464
+ "50",
3465
+ "20",
3466
+ "50",
3467
+ "50",
3468
+ "50",
3469
+ "20",
3470
+ "50",
3471
+ "20",
3472
+ "20",
3473
+ "50",
3474
+ "50",
3475
+ "20",
3476
+ "50",
3477
+ "50"
3478
+ ],
3479
+ "table": {
3480
+ "header": [
3481
+ "Event"
3482
+ ],
3483
+ "rows": [
3484
+ [
3485
+ "20\u00a0km walk"
3486
+ ],
3487
+ [
3488
+ "20\u00a0km walk"
3489
+ ],
3490
+ [
3491
+ "20\u00a0km walk"
3492
+ ],
3493
+ [
3494
+ "50\u00a0km walk"
3495
+ ],
3496
+ [
3497
+ "20\u00a0km walk"
3498
+ ],
3499
+ [
3500
+ "50\u00a0km walk"
3501
+ ],
3502
+ [
3503
+ "5000 m walk"
3504
+ ],
3505
+ [
3506
+ "50\u00a0km walk"
3507
+ ],
3508
+ [
3509
+ "50\u00a0km walk"
3510
+ ],
3511
+ [
3512
+ "50\u00a0km walk"
3513
+ ],
3514
+ [
3515
+ "50\u00a0km walk"
3516
+ ],
3517
+ [
3518
+ "20\u00a0km walk"
3519
+ ],
3520
+ [
3521
+ "50\u00a0km walk"
3522
+ ],
3523
+ [
3524
+ "50\u00a0km walk"
3525
+ ],
3526
+ [
3527
+ "50\u00a0km walk"
3528
+ ],
3529
+ [
3530
+ "20\u00a0km walk"
3531
+ ],
3532
+ [
3533
+ "50\u00a0km walk"
3534
+ ],
3535
+ [
3536
+ "20\u00a0km walk"
3537
+ ],
3538
+ [
3539
+ "20\u00a0km walk"
3540
+ ],
3541
+ [
3542
+ "50\u00a0km walk"
3543
+ ],
3544
+ [
3545
+ "50\u00a0km walk"
3546
+ ],
3547
+ [
3548
+ "20,000 m walk"
3549
+ ],
3550
+ [
3551
+ "50\u00a0km walk"
3552
+ ],
3553
+ [
3554
+ "50\u00a0km walk"
3555
+ ]
3556
+ ]
3557
+ },
3558
+ "title": "Robert Korzeniowski"
3559
+ },
3560
+ {
3561
+ "id": 140,
3562
+ "qa_question": "map@Is the etymology a battle?",
3563
+ "qa_column": "Etymology",
3564
+ "qa_answer": [
3565
+ "yes",
3566
+ "no",
3567
+ "no",
3568
+ "no",
3569
+ "no",
3570
+ "no",
3571
+ "no",
3572
+ "no",
3573
+ "no",
3574
+ "no",
3575
+ "no",
3576
+ "no",
3577
+ "no",
3578
+ "no",
3579
+ "no",
3580
+ "no",
3581
+ "no",
3582
+ "no",
3583
+ "no",
3584
+ "no",
3585
+ "no",
3586
+ "no",
3587
+ "no",
3588
+ "no",
3589
+ "no",
3590
+ "no",
3591
+ "no",
3592
+ "no",
3593
+ "no",
3594
+ "no",
3595
+ "no"
3596
+ ],
3597
+ "table": {
3598
+ "header": [
3599
+ "Etymology"
3600
+ ],
3601
+ "rows": [
3602
+ [
3603
+ "The Battle of Alamance which was derived from the local Native American word meaning \"blue clay\" found in the Great Alamance Creek"
3604
+ ],
3605
+ [
3606
+ "William J. Alexander, member of the legislature and Speaker of the North Carolina House of Commons"
3607
+ ],
3608
+ [
3609
+ "Derived from a corruption of the Delaware Indian name for the Allegheny and Ohio Rivers and is said to have meant \"a fine stream\""
3610
+ ],
3611
+ [
3612
+ "George, Lord Anson (1697\u20131762), a celebrated English admiral who circumnavigated the globe"
3613
+ ],
3614
+ [
3615
+ "Samuel Ashe (1725\u20131813), a Revolutionary patriot, superior court judge, and governor of North Carolina"
3616
+ ],
3617
+ [
3618
+ "Waightstill Avery (1741\u20131821), a soldier of the Revolution and Attorney General of North Carolina"
3619
+ ],
3620
+ [
3621
+ "Henry Somerset, Duke of Beaufort, who in 1709 became one of the Lords Proprietor"
3622
+ ],
3623
+ [
3624
+ "James or Henry Bertie, two Lords Proprietor of colonial North Carolina"
3625
+ ],
3626
+ [
3627
+ "Martin Bladen, a member of the Board of Trade"
3628
+ ],
3629
+ [
3630
+ "George I of Great Britain (1660\u20131727), Duke of Brunswick and L\u00fcneburg"
3631
+ ],
3632
+ [
3633
+ "Edward Buncombe, a Revolutionary soldier, who was wounded and captured at the Battle of Germantown, and died a paroled prisoner in Philadelphia"
3634
+ ],
3635
+ [
3636
+ "Thomas Burke (1747\u20131783), a member of the Continental Congress and governor of North Carolina"
3637
+ ],
3638
+ [
3639
+ "Stephen Cabarrus (1754\u20131808), member of the legislature and Speaker of the North Carolina House of Commons"
3640
+ ],
3641
+ [
3642
+ "Joseph Caldwell (1773\u20131835), the first president of the University of North Carolina"
3643
+ ],
3644
+ [
3645
+ "Charles Pratt, 1st Earl Camden (1714\u20131794), who opposed the taxation of the American colonists"
3646
+ ],
3647
+ [
3648
+ "John Carteret, 2nd Earl Granville (1690\u20131763), who inherited one-eighth share in the Province of Carolina through his great-grandfather George Carteret"
3649
+ ],
3650
+ [
3651
+ "Richard Caswell (1729\u20131789), member of the first Continental Congress and first governor of North Carolina after the Declaration of Independence"
3652
+ ],
3653
+ [
3654
+ "Catawba Nation"
3655
+ ],
3656
+ [
3657
+ "William Pitt, 1st Earl of Chatham (1708\u20131778), Secretary of State during the French and Indian War and was later Prime Minister of Great Britain"
3658
+ ],
3659
+ [
3660
+ "Cherokee Nation"
3661
+ ],
3662
+ [
3663
+ "Chowan Native American tribe"
3664
+ ],
3665
+ [
3666
+ "Henry Clay (1777\u20131852), statesman and orator who represented Kentucky in both the House of Representatives and Senate"
3667
+ ],
3668
+ [
3669
+ "Benjamin Cleveland (1738\u20131806), a colonel in the American Revolutionary War who took part in the Battle of Kings Mountain"
3670
+ ],
3671
+ [
3672
+ "Christopher Columbus (1451\u20131507), navigator, explorer, and one of the first Europeans to explore the Americas"
3673
+ ],
3674
+ [
3675
+ "William Craven, 1st Earl of Craven (1608\u20131697), who was a Lords Proprietor of colonial North Carolina"
3676
+ ],
3677
+ [
3678
+ "Prince William, Duke of Cumberland (1721\u20131765), a military leader and son of George II"
3679
+ ],
3680
+ [
3681
+ "Traditionally said to be an American Indian word for wild geese, also rendered \"Coratank\""
3682
+ ],
3683
+ [
3684
+ "Virginia Dare (b. 1587), the first child born of English parents in America"
3685
+ ],
3686
+ [
3687
+ "William Lee Davidson (1746\u20131781), an American Revolutionary War general who was mortally wounded at Cowan's Ford"
3688
+ ],
3689
+ [
3690
+ "William Richardson Davie (1756\u20131820), a member of the Federal Convention and governor of North Carolina"
3691
+ ],
3692
+ [
3693
+ "Thomas Hay, Viscount Dupplin (1710\u20131787), who was the 9th Earl of Kinnoull"
3694
+ ]
3695
+ ]
3696
+ },
3697
+ "title": "List of counties in North Carolina"
3698
+ },
3699
+ {
3700
+ "id": 142,
3701
+ "qa_question": "map@What is the time span?",
3702
+ "qa_column": "Term",
3703
+ "qa_answer": [
3704
+ "5",
3705
+ "5",
3706
+ "11",
3707
+ "/",
3708
+ "1",
3709
+ "6",
3710
+ "3",
3711
+ "9",
3712
+ "4",
3713
+ "3",
3714
+ "/",
3715
+ "11",
3716
+ "5",
3717
+ "4",
3718
+ "3",
3719
+ "/"
3720
+ ],
3721
+ "table": {
3722
+ "header": [
3723
+ "Term"
3724
+ ],
3725
+ "rows": [
3726
+ [
3727
+ "1859\u20131864"
3728
+ ],
3729
+ [
3730
+ "1864\u20131869"
3731
+ ],
3732
+ [
3733
+ "1869\u20131880"
3734
+ ],
3735
+ [
3736
+ "Term"
3737
+ ],
3738
+ [
3739
+ "1894\u20131895"
3740
+ ],
3741
+ [
3742
+ "1895\u20131901"
3743
+ ],
3744
+ [
3745
+ "1901\u20131904"
3746
+ ],
3747
+ [
3748
+ "1904\u20131913"
3749
+ ],
3750
+ [
3751
+ "1913\u20131917"
3752
+ ],
3753
+ [
3754
+ "1917\u20131920"
3755
+ ],
3756
+ [
3757
+ "Term"
3758
+ ],
3759
+ [
3760
+ "1927\u20131938"
3761
+ ],
3762
+ [
3763
+ "1938\u20131943"
3764
+ ],
3765
+ [
3766
+ "1943\u20131947"
3767
+ ],
3768
+ [
3769
+ "1947\u20131950"
3770
+ ],
3771
+ [
3772
+ "Term"
3773
+ ]
3774
+ ]
3775
+ },
3776
+ "title": "Electoral district of Lachlan"
3777
+ },
3778
+ {
3779
+ "id": 112,
3780
+ "qa_question": "map@What is his/her country?",
3781
+ "qa_column": "Rider",
3782
+ "qa_answer": [
3783
+ "italy",
3784
+ "spain",
3785
+ "finland",
3786
+ "us",
3787
+ "spain",
3788
+ "italy",
3789
+ "japan",
3790
+ "spain",
3791
+ "thailand",
3792
+ "italy",
3793
+ "czech republic",
3794
+ "italy",
3795
+ "hungray",
3796
+ "czech republic",
3797
+ "italy",
3798
+ "spain",
3799
+ "indonesia",
3800
+ "japan",
3801
+ "italy",
3802
+ "spain",
3803
+ "italy",
3804
+ "spain",
3805
+ "italy"
3806
+ ],
3807
+ "table": {
3808
+ "header": [
3809
+ "Rider"
3810
+ ],
3811
+ "rows": [
3812
+ [
3813
+ "Marco Simoncelli"
3814
+ ],
3815
+ [
3816
+ "\u00c1lvaro Bautista"
3817
+ ],
3818
+ [
3819
+ "Mika Kallio"
3820
+ ],
3821
+ [
3822
+ "Julian Simon"
3823
+ ],
3824
+ [
3825
+ "Alex Debon"
3826
+ ],
3827
+ [
3828
+ "Roberto Locatelli"
3829
+ ],
3830
+ [
3831
+ "Yuki Takahashi"
3832
+ ],
3833
+ [
3834
+ "Aleix Espargaro"
3835
+ ],
3836
+ [
3837
+ "Ratthapark Wilairot"
3838
+ ],
3839
+ [
3840
+ "Fabrizio Lai"
3841
+ ],
3842
+ [
3843
+ "Karel Abraham"
3844
+ ],
3845
+ [
3846
+ "Alex Baldolini"
3847
+ ],
3848
+ [
3849
+ "Imre Toth"
3850
+ ],
3851
+ [
3852
+ "Lukas Pesek"
3853
+ ],
3854
+ [
3855
+ "Simone Grotzkyj"
3856
+ ],
3857
+ [
3858
+ "Daniel Arcas"
3859
+ ],
3860
+ [
3861
+ "Doni Tata Pradita"
3862
+ ],
3863
+ [
3864
+ "Hiroshi Aoyama"
3865
+ ],
3866
+ [
3867
+ "Mattia Pasini"
3868
+ ],
3869
+ [
3870
+ "H\u00e9ctor Faubel"
3871
+ ],
3872
+ [
3873
+ "Federico Sandi"
3874
+ ],
3875
+ [
3876
+ "Manuel Hernandez"
3877
+ ],
3878
+ [
3879
+ "Stefano Bianco"
3880
+ ]
3881
+ ]
3882
+ },
3883
+ "title": "2008 Australian motorcycle Grand Prix"
3884
+ }
3885
+ ]
utils/.DS_Store ADDED
Binary file (8.2 kB). View file
 
utils/__init__.py ADDED
File without changes
utils/errors.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ class DuplicateColumnsError(Exception):
2
+ def __init__(self, msg):
3
+ self.msg = msg
4
+
utils/evaluator.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+
3
+ from utils.normalizer import str_normalize
4
+ from utils.wtq.evaluator import to_value_list, check_denotation
5
+ from utils.mmqa.evaluator import acc
6
+
7
+
8
+ class Evaluator:
9
+ def __init__(self):
10
+ pass
11
+
12
+ def evaluate(
13
+ self,
14
+ pred_answer,
15
+ gold_answer,
16
+ dataset,
17
+ allow_semantic=True,
18
+ question=None
19
+ ):
20
+ if dataset == 'wikitq':
21
+ return self.eval_ex_match(pred_answer, gold_answer, allow_semantic, question)
22
+ elif dataset == 'tab_fact':
23
+ return self.eval_tabfact_match(pred_answer, gold_answer)
24
+ elif dataset == 'mmqa':
25
+ # For more metrics on MMQA,
26
+ # please use the utils/mmqa/eval_mmqa.py to call official on all prediction data
27
+ return self.eval_mmqa_match(pred_answer, gold_answer)
28
+ else:
29
+ raise ValueError(f'{dataset} evaluator is not supported.')
30
+
31
+ def eval_ex_match(self, pred, gold, allow_semantic=True, question=None):
32
+ pred = [str(p).lower().strip() for p in pred]
33
+ gold = [str(g).lower().strip() for g in gold]
34
+
35
+ if not allow_semantic:
36
+ # WikiTQ eval w. string normalization using recognizer
37
+ pred = [str_normalize(span) for span in pred]
38
+ gold = [str_normalize(span) for span in gold]
39
+ pred = to_value_list(pred)
40
+ gold = to_value_list(gold)
41
+ return check_denotation(pred, gold)
42
+ else:
43
+ assert isinstance(question, str)
44
+ question = re.sub('\s+', ' ', question).strip().lower()
45
+ pred = [str_normalize(span) for span in pred]
46
+ gold = [str_normalize(span) for span in gold]
47
+ pred = sorted(list(set(pred)))
48
+ gold = sorted(list(set(gold)))
49
+ # (1) 0 matches 'no', 1 matches 'yes'; 0 matches 'more', 1 matches 'less', etc.
50
+ if len(pred) == 1 and len(gold) == 1:
51
+ if (pred[0] == '0' and gold[0] == 'no') \
52
+ or (pred[0] == '1' and gold[0] == 'yes'):
53
+ return True
54
+ question_tokens = question.split()
55
+ try:
56
+ pos_or = question_tokens.index('or')
57
+ token_before_or, token_after_or = question_tokens[pos_or - 1], question_tokens[pos_or + 1]
58
+ if (pred[0] == '0' and gold[0] == token_after_or) \
59
+ or (pred[0] == '1' and gold[0] == token_before_or):
60
+ return True
61
+ except Exception as e:
62
+ pass
63
+ # (2) Number value (allow units) and Date substring match
64
+ if len(pred) == 1 and len(gold) == 1:
65
+ NUMBER_UNITS_PATTERN = re.compile('^\$*[+-]?([0-9]*[.])?[0-9]+(\s*%*|\s+\w+)$')
66
+ DATE_PATTERN = re.compile('[0-9]{4}-[0-9]{1,2}-[0-9]{1,2}\s*([0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2})?')
67
+ DURATION_PATTERN = re.compile('(P|PT)(\d+)(Y|M|D|H|S)')
68
+ p, g = pred[0], gold[0]
69
+ # Restore `duration` type, e.g., from 'P3Y' -> '3'
70
+ if re.match(DURATION_PATTERN, p):
71
+ p = re.match(DURATION_PATTERN, p).group(2)
72
+ if re.match(DURATION_PATTERN, g):
73
+ g = re.match(DURATION_PATTERN, g).group(2)
74
+ match = False
75
+ num_flag, date_flag = False, False
76
+ # Number w. unit match after string normalization.
77
+ # Either pred or gold being number w. units suffices it.
78
+ if re.match(NUMBER_UNITS_PATTERN, p) or re.match(NUMBER_UNITS_PATTERN, g):
79
+ num_flag = True
80
+ # Date match after string normalization.
81
+ # Either pred or gold being date suffices it.
82
+ if re.match(DATE_PATTERN, p) or re.match(DATE_PATTERN, g):
83
+ date_flag = True
84
+ if num_flag:
85
+ p_set, g_set = set(p.split()), set(g.split())
86
+ if p_set.issubset(g_set) or g_set.issubset(p_set):
87
+ match = True
88
+ if date_flag:
89
+ p_set, g_set = set(p.replace('-', ' ').split()), set(g.replace('-', ' ').split())
90
+ if p_set.issubset(g_set) or g_set.issubset(p_set):
91
+ match = True
92
+ if match:
93
+ return True
94
+ pred = to_value_list(pred)
95
+ gold = to_value_list(gold)
96
+ return check_denotation(pred, gold)
97
+
98
+ def eval_tabfact_match(self, pred, gold):
99
+ if isinstance(pred, list):
100
+ pred = pred[0]
101
+ pred, gold = str(pred), str(gold)
102
+ return pred == gold
103
+
104
+ def eval_mmqa_match(self, pred_answer, gold_answer):
105
+ return acc(pred_answer, gold_answer)
utils/gpt2/config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation_function": "gelu_new",
3
+ "architectures": [
4
+ "GPT2LMHeadModel"
5
+ ],
6
+ "attn_pdrop": 0.1,
7
+ "bos_token_id": 50256,
8
+ "embd_pdrop": 0.1,
9
+ "eos_token_id": 50256,
10
+ "initializer_range": 0.02,
11
+ "layer_norm_epsilon": 1e-05,
12
+ "model_type": "gpt2",
13
+ "n_ctx": 1024,
14
+ "n_embd": 768,
15
+ "n_head": 12,
16
+ "n_layer": 12,
17
+ "n_positions": 1024,
18
+ "resid_pdrop": 0.1,
19
+ "summary_activation": null,
20
+ "summary_first_dropout": 0.1,
21
+ "summary_proj_to_labels": true,
22
+ "summary_type": "cls_index",
23
+ "summary_use_proj": true,
24
+ "task_specific_params": {
25
+ "text-generation": {
26
+ "do_sample": true,
27
+ "max_length": 50
28
+ }
29
+ },
30
+ "vocab_size": 50257
31
+ }
utils/gpt2/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
utils/gpt2/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
utils/gpt2/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
utils/matcher.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fuzzywuzzy import fuzz
2
+ import pandas as pd
3
+ import string
4
+
5
+ from utils.normalizer import str_normalize
6
+
7
+
8
+ class Matcher(object):
9
+ def __init__(self):
10
+ pass
11
+
12
+ def match_sentence_with_table(self, sent: str, df: pd.DataFrame, fuzz_threshold=100):
13
+ phrase2matched_cells = dict()
14
+ sent = str_normalize(sent)
15
+ sent = sent.strip(string.punctuation)
16
+ for ngram in range(5, 0, -1):
17
+ ngram_tokens_list = self._create_ngram_list(sent.split(), ngram)
18
+ for row_id, row in df.iterrows():
19
+ for col_id, cell in enumerate(row):
20
+ if df.columns[col_id] == 'row_id':
21
+ continue
22
+ cell = str(cell)
23
+ for ngram_phrase in ngram_tokens_list:
24
+ fuzz_score = fuzz.ratio(ngram_phrase, cell)
25
+ if fuzz_score >= fuzz_threshold:
26
+ if ngram_phrase not in phrase2matched_cells:
27
+ phrase2matched_cells[ngram_phrase] = []
28
+ phrase2matched_cells[ngram_phrase].append((cell, fuzz_score, (row_id, col_id)))
29
+ # Remove non-longest phrase
30
+ phrases = list(phrase2matched_cells.keys())
31
+ for phrase in phrases:
32
+ for other_phrase in phrases:
33
+ if phrase != other_phrase and phrase in other_phrase:
34
+ del phrase2matched_cells[phrase]
35
+ break
36
+ # Sort by fuzzy score
37
+ for matched_cells in phrase2matched_cells.values():
38
+ matched_cells.sort(key=lambda x: x[1], reverse=True)
39
+
40
+ return phrase2matched_cells
41
+
42
+ def match_phrase_with_table(self, phrase: str, df: pd.DataFrame, fuzz_threshold=70):
43
+ matched_cells = []
44
+ for row_id, row in df.iterrows():
45
+ for col_id, cell in enumerate(row):
46
+ cell = str(cell)
47
+ fuzz_score = fuzz.ratio(phrase, cell)
48
+ # if fuzz_score == 100:
49
+ # matched_cells = [(cell, fuzz_score, (row_id, col_id))]
50
+ # return matched_cells
51
+ if fuzz_score >= fuzz_threshold:
52
+ matched_cells.append((cell, fuzz_score, (row_id, col_id)))
53
+ # Sort by fuzzy score
54
+ matched_cells.sort(key=lambda x: x[1], reverse=True)
55
+ return matched_cells
56
+
57
+ def _create_ngram_list(self, input_list, ngram_num):
58
+ ngram_list = []
59
+ if len(input_list) <= ngram_num:
60
+ ngram_list.extend(input_list)
61
+ else:
62
+ for tmp in zip(*[input_list[i:] for i in range(ngram_num)]):
63
+ tmp = " ".join(tmp)
64
+ ngram_list.append(tmp)
65
+ return ngram_list
utils/normalizer.py ADDED
@@ -0,0 +1,498 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List, Dict
2
+ import pandas as pd
3
+ import recognizers_suite
4
+ from recognizers_suite import Culture
5
+ import re
6
+ import unicodedata
7
+ from fuzzywuzzy import fuzz
8
+
9
+ from utils.sql.extraction_from_sql import *
10
+ from utils.sql.all_keywords import ALL_KEY_WORDS
11
+
12
+ culture = Culture.English
13
+
14
+
15
+ def str_normalize(user_input, recognition_types=None):
16
+ """A string normalizer which recognize and normalize value based on recognizers_suite"""
17
+ user_input = str(user_input)
18
+ user_input = user_input.replace("\\n", "; ")
19
+
20
+ def replace_by_idx_pairs(orig_str, strs_to_replace, idx_pairs):
21
+ assert len(strs_to_replace) == len(idx_pairs)
22
+ last_end = 0
23
+ to_concat = []
24
+ for idx_pair, str_to_replace in zip(idx_pairs, strs_to_replace):
25
+ to_concat.append(orig_str[last_end:idx_pair[0]])
26
+ to_concat.append(str_to_replace)
27
+ last_end = idx_pair[1]
28
+ to_concat.append(orig_str[last_end:])
29
+ return ''.join(to_concat)
30
+
31
+ if recognition_types is None:
32
+ recognition_types = ["datetime",
33
+ "number",
34
+ # "ordinal",
35
+ # "percentage",
36
+ # "age",
37
+ # "currency",
38
+ # "dimension",
39
+ # "temperature",
40
+ ]
41
+
42
+ for recognition_type in recognition_types:
43
+ if re.match("\d+/\d+", user_input):
44
+ # avoid calculating str as 1991/92
45
+ continue
46
+ recognized_list = getattr(recognizers_suite, "recognize_{}".format(recognition_type))(user_input,
47
+ culture) # may match multiple parts
48
+ strs_to_replace = []
49
+ idx_pairs = []
50
+ for recognized in recognized_list:
51
+ if not recognition_type == 'datetime':
52
+ recognized_value = recognized.resolution['value']
53
+ if str(recognized_value).startswith("P"):
54
+ # if the datetime is a period:
55
+ continue
56
+ else:
57
+ strs_to_replace.append(recognized_value)
58
+ idx_pairs.append((recognized.start, recognized.end + 1))
59
+ else:
60
+ if recognized.resolution: # in some cases, this variable could be none.
61
+ if len(recognized.resolution['values']) == 1:
62
+ strs_to_replace.append(
63
+ recognized.resolution['values'][0]['timex']) # We use timex as normalization
64
+ idx_pairs.append((recognized.start, recognized.end + 1))
65
+
66
+ if len(strs_to_replace) > 0:
67
+ user_input = replace_by_idx_pairs(user_input, strs_to_replace, idx_pairs)
68
+
69
+ if re.match("(.*)-(.*)-(.*) 00:00:00", user_input):
70
+ user_input = user_input[:-len("00:00:00") - 1]
71
+ # '2008-04-13 00:00:00' -> '2008-04-13'
72
+ return user_input
73
+
74
+
75
+ def prepare_df_for_neuraldb_from_table(table: Dict, add_row_id=True, normalize=True, lower_case=True):
76
+ header, rows = table['header'], table['rows']
77
+ if add_row_id and 'row_id' not in header:
78
+ header = ["row_id"] + header
79
+ rows = [["{}".format(i)] + row for i, row in enumerate(rows)]
80
+ if normalize:
81
+ df = convert_df_type(pd.DataFrame(data=rows, columns=header), lower_case=lower_case)
82
+ else:
83
+ df = pd.DataFrame(data=rows, columns=header)
84
+
85
+ return df
86
+
87
+
88
+ def convert_df_type(df: pd.DataFrame, lower_case=True):
89
+ """
90
+ A simple converter of dataframe data type from string to int/float/datetime.
91
+ """
92
+
93
+ def get_table_content_in_column(table):
94
+ if isinstance(table, pd.DataFrame):
95
+ header = table.columns.tolist()
96
+ rows = table.values.tolist()
97
+ else:
98
+ # Standard table dict format
99
+ header, rows = table['header'], table['rows']
100
+ all_col_values = []
101
+ for i in range(len(header)):
102
+ one_col_values = []
103
+ for _row in rows:
104
+ one_col_values.append(_row[i])
105
+ all_col_values.append(one_col_values)
106
+ return all_col_values
107
+
108
+ # Rename empty columns
109
+ new_columns = []
110
+ for idx, header in enumerate(df.columns):
111
+ if header == '':
112
+ new_columns.append('FilledColumnName') # Fixme: give it a better name when all finished!
113
+ else:
114
+ new_columns.append(header)
115
+ df.columns = new_columns
116
+
117
+ # Rename duplicate columns
118
+ new_columns = []
119
+ for idx, header in enumerate(df.columns):
120
+ if header in new_columns:
121
+ new_header, suffix = header, 2
122
+ while new_header in new_columns:
123
+ new_header = header + '_' + str(suffix)
124
+ suffix += 1
125
+ new_columns.append(new_header)
126
+ else:
127
+ new_columns.append(header)
128
+ df.columns = new_columns
129
+
130
+ # Recognize null values like "-"
131
+ null_tokens = ['', '-', '/']
132
+ for header in df.columns:
133
+ df[header] = df[header].map(lambda x: str(None) if x in null_tokens else x)
134
+
135
+ # Convert the null values in digit column to "NaN"
136
+ all_col_values = get_table_content_in_column(df)
137
+ for col_i, one_col_values in enumerate(all_col_values):
138
+ all_number_flag = True
139
+ for row_i, cell_value in enumerate(one_col_values):
140
+ try:
141
+ float(cell_value)
142
+ except Exception as e:
143
+ if not cell_value in [str(None), str(None).lower()]:
144
+ # None or none
145
+ all_number_flag = False
146
+ if all_number_flag:
147
+ _header = df.columns[col_i]
148
+ df[_header] = df[_header].map(lambda x: "NaN" if x in [str(None), str(None).lower()] else x)
149
+
150
+ # Normalize cell values.
151
+ for header in df.columns:
152
+ df[header] = df[header].map(lambda x: str_normalize(x))
153
+
154
+ # Strip the mis-added "01-01 00:00:00"
155
+ all_col_values = get_table_content_in_column(df)
156
+ for col_i, one_col_values in enumerate(all_col_values):
157
+ all_with_00_00_00 = True
158
+ all_with_01_00_00_00 = True
159
+ all_with_01_01_00_00_00 = True
160
+ for row_i, cell_value in enumerate(one_col_values):
161
+ if not str(cell_value).endswith(" 00:00:00"):
162
+ all_with_00_00_00 = False
163
+ if not str(cell_value).endswith("-01 00:00:00"):
164
+ all_with_01_00_00_00 = False
165
+ if not str(cell_value).endswith("-01-01 00:00:00"):
166
+ all_with_01_01_00_00_00 = False
167
+ if all_with_01_01_00_00_00:
168
+ _header = df.columns[col_i]
169
+ df[_header] = df[_header].map(lambda x: x[:-len("-01-01 00:00:00")])
170
+ continue
171
+
172
+ if all_with_01_00_00_00:
173
+ _header = df.columns[col_i]
174
+ df[_header] = df[_header].map(lambda x: x[:-len("-01 00:00:00")])
175
+ continue
176
+
177
+ if all_with_00_00_00:
178
+ _header = df.columns[col_i]
179
+ df[_header] = df[_header].map(lambda x: x[:-len(" 00:00:00")])
180
+ continue
181
+
182
+ # Do header and cell value lower case
183
+ if lower_case:
184
+ new_columns = []
185
+ for header in df.columns:
186
+ lower_header = str(header).lower()
187
+ if lower_header in new_columns:
188
+ new_header, suffix = lower_header, 2
189
+ while new_header in new_columns:
190
+ new_header = lower_header + '-' + str(suffix)
191
+ suffix += 1
192
+ new_columns.append(new_header)
193
+ else:
194
+ new_columns.append(lower_header)
195
+ df.columns = new_columns
196
+ for header in df.columns:
197
+ # df[header] = df[header].map(lambda x: str(x).lower())
198
+ df[header] = df[header].map(lambda x: str(x).lower().strip())
199
+
200
+ # Recognize header type
201
+ for header in df.columns:
202
+
203
+ float_able = False
204
+ int_able = False
205
+ datetime_able = False
206
+
207
+ # Recognize int & float type
208
+ try:
209
+ df[header].astype("float")
210
+ float_able = True
211
+ except:
212
+ pass
213
+
214
+ if float_able:
215
+ try:
216
+ if all(df[header].astype("float") == df[header].astype(int)):
217
+ int_able = True
218
+ except:
219
+ pass
220
+
221
+ if float_able:
222
+ if int_able:
223
+ df[header] = df[header].astype(int)
224
+ else:
225
+ df[header] = df[header].astype(float)
226
+
227
+ # Recognize datetime type
228
+ try:
229
+ df[header].astype("datetime64")
230
+ datetime_able = True
231
+ except:
232
+ pass
233
+
234
+ if datetime_able:
235
+ df[header] = df[header].astype("datetime64")
236
+
237
+ return df
238
+
239
+
240
+ def normalize(x):
241
+ """ Normalize string. """
242
+ # Copied from WikiTableQuestions dataset official evaluator.
243
+ if x is None:
244
+ return None
245
+ # Remove diacritics
246
+ x = ''.join(c for c in unicodedata.normalize('NFKD', x)
247
+ if unicodedata.category(c) != 'Mn')
248
+ # Normalize quotes and dashes
249
+ x = re.sub("[‘’´`]", "'", x)
250
+ x = re.sub("[“”]", "\"", x)
251
+ x = re.sub("[‐‑‒–—−]", "-", x)
252
+ while True:
253
+ old_x = x
254
+ # Remove citations
255
+ x = re.sub("((?<!^)\[[^\]]*\]|\[\d+\]|[•♦†‡*#+])*$", "", x.strip())
256
+ # Remove details in parenthesis
257
+ x = re.sub("(?<!^)( \([^)]*\))*$", "", x.strip())
258
+ # Remove outermost quotation mark
259
+ x = re.sub('^"([^"]*)"$', r'\1', x.strip())
260
+ if x == old_x:
261
+ break
262
+ # Remove final '.'
263
+ if x and x[-1] == '.':
264
+ x = x[:-1]
265
+ # Collapse whitespaces and convert to lower case
266
+ x = re.sub('\s+', ' ', x, flags=re.U).lower().strip()
267
+ return x
268
+
269
+
270
+ def post_process_sql(sql_str, df, table_title=None, process_program_with_fuzzy_match_on_db=True, verbose=False):
271
+ """Post process SQL: including basic fix and further fuzzy match on cell and SQL to process"""
272
+
273
+ def basic_fix(sql_str, all_headers, table_title=None):
274
+ def finditer(sub_str: str, mother_str: str):
275
+ result = []
276
+ start_index = 0
277
+ while True:
278
+ start_index = mother_str.find(sub_str, start_index, -1)
279
+ if start_index == -1:
280
+ break
281
+ end_idx = start_index + len(sub_str)
282
+ result.append((start_index, end_idx))
283
+ start_index = end_idx
284
+ return result
285
+
286
+ if table_title:
287
+ sql_str = sql_str.replace("FROM " + table_title, "FROM w")
288
+ sql_str = sql_str.replace("FROM " + table_title.lower(), "FROM w")
289
+
290
+ """Case 1: Fix the `` missing. """
291
+ # Remove the null header.
292
+ while '' in all_headers:
293
+ all_headers.remove('')
294
+
295
+ # Remove the '\n' in header.
296
+ # This is because the WikiTQ won't actually show the str in two lines,
297
+ # they use '\n' to mean that, and display it in the same line when print.
298
+ sql_str = sql_str.replace("\\n", "\n")
299
+ sql_str = sql_str.replace("\n", "\\n")
300
+
301
+ # Add `` in SQL.
302
+
303
+ all_headers.sort(key=lambda x: len(x), reverse=True)
304
+ have_matched = [0 for i in range(len(sql_str))]
305
+
306
+ # match quotation
307
+ idx_s_single_quotation = [_ for _ in range(1, len(sql_str)) if
308
+ sql_str[_] in ["\'"] and sql_str[_ - 1] not in ["\'"]]
309
+ idx_s_double_quotation = [_ for _ in range(1, len(sql_str)) if
310
+ sql_str[_] in ["\""] and sql_str[_ - 1] not in ["\""]]
311
+ for idx_s in [idx_s_single_quotation, idx_s_double_quotation]:
312
+ if len(idx_s) % 2 == 0:
313
+ for idx in range(int(len(idx_s) / 2)):
314
+ start_idx = idx_s[idx * 2]
315
+ end_idx = idx_s[idx * 2 + 1]
316
+ have_matched[start_idx: end_idx] = [2 for _ in range(end_idx - start_idx)]
317
+
318
+ # match headers
319
+ for header in all_headers:
320
+ if (header in sql_str) and (header not in ALL_KEY_WORDS):
321
+ all_matched_of_this_header = finditer(header, sql_str)
322
+ for matched_of_this_header in all_matched_of_this_header:
323
+ start_idx, end_idx = matched_of_this_header
324
+ if all(have_matched[start_idx: end_idx]) == 0 and (not sql_str[start_idx - 1] == "`") and (
325
+ not sql_str[end_idx] == "`"):
326
+ have_matched[start_idx: end_idx] = [1 for _ in range(end_idx - start_idx)]
327
+ # a bit ugly, but anyway.
328
+
329
+ # re-compose sql from the matched idx.
330
+ start_have_matched = [0] + have_matched
331
+ end_have_matched = have_matched + [0]
332
+ start_idx_s = [idx - 1 for idx in range(1, len(start_have_matched)) if
333
+ start_have_matched[idx - 1] == 0 and start_have_matched[idx] == 1]
334
+ end_idx_s = [idx for idx in range(len(end_have_matched) - 1) if
335
+ end_have_matched[idx] == 1 and end_have_matched[idx + 1] == 0]
336
+ assert len(start_idx_s) == len(end_idx_s)
337
+ spans = []
338
+ current_idx = 0
339
+ for start_idx, end_idx in zip(start_idx_s, end_idx_s):
340
+ spans.append(sql_str[current_idx:start_idx])
341
+ spans.append(sql_str[start_idx:end_idx + 1])
342
+ current_idx = end_idx + 1
343
+ spans.append(sql_str[current_idx:])
344
+ sql_str = '`'.join(spans)
345
+
346
+ return sql_str
347
+
348
+ def fuzzy_match_process(sql_str, df, verbose=False):
349
+ """
350
+ Post-process SQL by fuzzy matching value with table contents.
351
+ """
352
+
353
+ def _get_matched_cells(value_str, df, fuzz_threshold=70):
354
+ """
355
+ Get matched table cells with value token.
356
+ """
357
+ matched_cells = []
358
+ for row_id, row in df.iterrows():
359
+ for cell in row:
360
+ cell = str(cell)
361
+ fuzz_score = fuzz.ratio(value_str, cell)
362
+ if fuzz_score == 100:
363
+ matched_cells = [(cell, fuzz_score)]
364
+ return matched_cells
365
+ if fuzz_score >= fuzz_threshold:
366
+ matched_cells.append((cell, fuzz_score))
367
+
368
+ matched_cells = sorted(matched_cells, key=lambda x: x[1], reverse=True)
369
+ return matched_cells
370
+
371
+ def _check_valid_fuzzy_match(value_str, matched_cell):
372
+ """
373
+ Check if the fuzzy match is valid, now considering:
374
+ 1. The number/date should not be disturbed, but adding new number or deleting number is valid.
375
+ """
376
+ number_pattern = "[+]?[.]?[\d]+(?:,\d\d\d)*[\.]?\d*(?:[eE][-+]?\d+)?"
377
+ numbers_in_value = re.findall(number_pattern, value_str)
378
+ numbers_in_matched_cell = re.findall(number_pattern, matched_cell)
379
+ try:
380
+ numbers_in_value = [float(num.replace(',', '')) for num in numbers_in_value]
381
+ except:
382
+ print(f"Can't convert number string {numbers_in_value} into float in _check_valid_fuzzy_match().")
383
+ try:
384
+ numbers_in_matched_cell = [float(num.replace(',', '')) for num in numbers_in_matched_cell]
385
+ except:
386
+ print(
387
+ f"Can't convert number string {numbers_in_matched_cell} into float in _check_valid_fuzzy_match().")
388
+ numbers_in_value = set(numbers_in_value)
389
+ numbers_in_matched_cell = set(numbers_in_matched_cell)
390
+
391
+ if numbers_in_value.issubset(numbers_in_matched_cell) or numbers_in_matched_cell.issubset(numbers_in_value):
392
+ return True
393
+ else:
394
+ return False
395
+
396
+ # Drop trailing '\n```', a pattern that may appear in Codex SQL generation
397
+ sql_str = sql_str.rstrip('```').rstrip('\n')
398
+
399
+ # Replace QA module with placeholder
400
+ qa_pattern = "QA\(.+?;.*?`.+?`.*?\)"
401
+ qas = re.findall(qa_pattern, sql_str)
402
+ for idx, qa in enumerate(qas):
403
+ sql_str = sql_str.replace(qa, f"placeholder{idx}")
404
+
405
+ # Parse and replace SQL value with table contents
406
+ sql_tokens = tokenize(sql_str)
407
+ sql_template_tokens = extract_partial_template_from_sql(sql_str)
408
+ # Fix 'between' keyword bug in parsing templates
409
+ fixed_sql_template_tokens = []
410
+ sql_tok_bias = 0
411
+ for idx, sql_templ_tok in enumerate(sql_template_tokens):
412
+ sql_tok = sql_tokens[idx + sql_tok_bias]
413
+ if sql_tok == 'between' and sql_templ_tok == '[WHERE_OP]':
414
+ fixed_sql_template_tokens.extend(['[WHERE_OP]', '[VALUE]', 'and'])
415
+ sql_tok_bias += 2 # pass '[VALUE]', 'and'
416
+ else:
417
+ fixed_sql_template_tokens.append(sql_templ_tok)
418
+ sql_template_tokens = fixed_sql_template_tokens
419
+ for idx, tok in enumerate(sql_tokens):
420
+ if tok in ALL_KEY_WORDS:
421
+ sql_tokens[idx] = tok.upper()
422
+
423
+ if verbose:
424
+ print(sql_tokens)
425
+ print(sql_template_tokens)
426
+
427
+ assert len(sql_tokens) == len(sql_template_tokens)
428
+ value_indices = [idx for idx in range(len(sql_template_tokens)) if sql_template_tokens[idx] == '[VALUE]']
429
+ for value_idx in value_indices:
430
+ # Skip the value if the where condition column is QA module
431
+ if value_idx >= 2 and sql_tokens[value_idx - 2].startswith('placeholder'):
432
+ continue
433
+ value_str = sql_tokens[value_idx]
434
+ # Drop \"\" for fuzzy match
435
+ is_string = False
436
+ if value_str[0] == "\"" and value_str[-1] == "\"":
437
+ value_str = value_str[1:-1]
438
+ is_string = True
439
+ # If already fuzzy match, skip
440
+ if value_str[0] == '%' or value_str[-1] == '%':
441
+ continue
442
+ value_str = value_str.lower()
443
+ # Fuzzy Match
444
+ matched_cells = _get_matched_cells(value_str, df)
445
+
446
+ if verbose:
447
+ print(matched_cells)
448
+
449
+ new_value_str = value_str
450
+ if matched_cells:
451
+ # new_value_str = matched_cells[0][0]
452
+ for matched_cell, fuzz_score in matched_cells:
453
+ if _check_valid_fuzzy_match(value_str, matched_cell):
454
+ new_value_str = matched_cell
455
+ if verbose and new_value_str != value_str:
456
+ print("\tfuzzy match replacing!", value_str, '->', matched_cell, f'fuzz_score:{fuzz_score}')
457
+ break
458
+ if is_string:
459
+ new_value_str = f"\"{new_value_str}\""
460
+ sql_tokens[value_idx] = new_value_str
461
+ # Compose new sql string
462
+ # Clean column name in SQL since columns may have been tokenized in the postprocessing, e.g., (ppp) -> ( ppp )
463
+ new_sql_str = ' '.join(sql_tokens)
464
+ sql_columns = re.findall('`\s(.*?)\s`', new_sql_str)
465
+ for sql_col in sql_columns:
466
+ matched_columns = []
467
+ for col in df.columns:
468
+ score = fuzz.ratio(sql_col.lower(), col)
469
+ if score == 100:
470
+ matched_columns = [(col, score)]
471
+ break
472
+ if score >= 80:
473
+ matched_columns.append((col, score))
474
+ matched_columns = sorted(matched_columns, key=lambda x: x[1], reverse=True)
475
+ if matched_columns:
476
+ matched_col = matched_columns[0][0]
477
+ new_sql_str = new_sql_str.replace(f"` {sql_col} `", f"`{matched_col}`")
478
+ else:
479
+ new_sql_str = new_sql_str.replace(f"` {sql_col} `", f"`{sql_col}`")
480
+
481
+ # Restore QA modules
482
+ for idx, qa in enumerate(qas):
483
+ new_sql_str = new_sql_str.replace(f"placeholder{idx}", qa)
484
+
485
+ # Fix '<>' when composing the new sql
486
+ new_sql_str = new_sql_str.replace('< >', '<>')
487
+
488
+ return new_sql_str
489
+
490
+ sql_str = basic_fix(sql_str, list(df.columns), table_title)
491
+
492
+ if process_program_with_fuzzy_match_on_db:
493
+ try:
494
+ sql_str = fuzzy_match_process(sql_str, df, verbose)
495
+ except:
496
+ pass
497
+
498
+ return sql_str
utils/sql/__init__.py ADDED
File without changes
utils/sql/all_keywords.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ CLAUSE_KEYWORDS = (
2
+ "select",
3
+ "from",
4
+ "where",
5
+ "group",
6
+ "order",
7
+ "limit",
8
+ "intersect",
9
+ "union",
10
+ "except",
11
+ )
12
+ JOIN_KEYWORDS = ("join", "on", "as")
13
+
14
+ WHERE_OPS = (
15
+ "not",
16
+ "between",
17
+ "=",
18
+ ">",
19
+ "<",
20
+ ">=",
21
+ "<=",
22
+ "!=",
23
+ "in",
24
+ "like",
25
+ "is",
26
+ "exists",
27
+ )
28
+ UNIT_OPS = ("none", "-", "+", "*", "/")
29
+ AGG_OPS = ("none", "max", "min", "count", "sum", "avg")
30
+
31
+ ALL_KEY_WORDS = CLAUSE_KEYWORDS + JOIN_KEYWORDS + WHERE_OPS + UNIT_OPS + AGG_OPS
utils/sql/extraction_from_sql.py ADDED
@@ -0,0 +1,622 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import json
3
+ from utils.sql.process_sql import (
4
+ tokenize, CLAUSE_KEYWORDS, WHERE_OPS, COND_OPS, UNIT_OPS, AGG_OPS,
5
+ JOIN_KEYWORDS, ORDER_OPS, skip_semicolon, SQL_OPS)
6
+ KEPT_WHERE_OP = ('not', 'in', 'exists')
7
+
8
+
9
+ def parse_table_unit(toks, start_idx, tables_with_alias):
10
+ idx = start_idx
11
+ len_ = len(toks)
12
+ key = toks[idx]
13
+
14
+ if idx + 1 < len_ and toks[idx + 1] == "as":
15
+ tables_with_alias[toks[idx + 2]] = toks[idx]
16
+ idx += 3
17
+ else:
18
+ idx += 1
19
+
20
+ return idx, key
21
+
22
+ def parse_col(toks, start_idx, tables_with_alias, schema, default_tables=None):
23
+ """
24
+ :returns next idx, column id
25
+ """
26
+ tok = toks[start_idx]
27
+ if tok == "*":
28
+ return start_idx + 1
29
+
30
+ if '.' in tok: # if token is a composite
31
+ alias, col = tok.split('.')
32
+ # key = tables_with_alias[alias] + "." + col
33
+ table = tables_with_alias[alias]
34
+ """
35
+ Add schema
36
+ """
37
+ if table not in schema:
38
+ schema[table] = []
39
+ schema[table].append(col)
40
+ # We also want to normalize the column
41
+ toks[start_idx] = "{}.{}".format(table, col)
42
+ """
43
+ END
44
+ """
45
+ return start_idx + 1
46
+
47
+ assert default_tables is not None and len(default_tables) > 0, "Default tables should not be None or empty"
48
+
49
+ # assert len(default_tables) == 1, "Default table should only have one time"
50
+
51
+ """
52
+ Add schema
53
+ """
54
+ # Find the best table here
55
+ def choose_best_table(default_tables, tok):
56
+ lower_tok = tok.lower()
57
+ candidate = process.extractOne(lower_tok, [table.lower() for table in default_tables])[0]
58
+ return candidate
59
+
60
+ if len(default_tables) != 1:
61
+ # print(default_tables)
62
+ table = choose_best_table(default_tables, tok)
63
+ # assert len(default_tables) == 1, "Default table should only have one time"
64
+ else:
65
+ table = default_tables[0]
66
+ if table not in schema:
67
+ schema[table] = []
68
+ schema[table].append(tok)
69
+ toks[start_idx] = "{}.{}".format(table, tok)
70
+ return start_idx + 1
71
+
72
+ # for alias in default_tables:
73
+ # table = tables_with_alias[alias]
74
+ # if tok in schema.schema[table]:
75
+ # key = table + "." + tok
76
+ # return start_idx + 1, schema.idMap[key]
77
+
78
+ # assert False, "Error col: {}".format(tok)
79
+
80
+ def parse_col_unit(toks, start_idx, tables_with_alias, schema, default_tables=None, end_idx=None):
81
+ """
82
+ :returns next idx, (agg_op id, col_id)
83
+ """
84
+ idx = start_idx
85
+ if end_idx is not None:
86
+ len_ = len(toks[start_idx:end_idx])
87
+ else:
88
+ len_ = len(toks)
89
+ isBlock = False
90
+ isDistinct = False
91
+ if toks[idx] == '(':
92
+ isBlock = True
93
+ idx += 1
94
+
95
+ if toks[idx] in AGG_OPS:
96
+ agg_id = AGG_OPS.index(toks[idx])
97
+ idx += 1
98
+ assert idx < len_ and toks[idx] == '('
99
+ idx += 1
100
+ if toks[idx] == "distinct":
101
+ idx += 1
102
+ isDistinct = True
103
+ idx = parse_col(toks, idx, tables_with_alias, schema, default_tables)
104
+ assert idx < len_ and toks[idx] == ')'
105
+ idx += 1
106
+ return idx
107
+
108
+ if toks[idx] == "distinct":
109
+ idx += 1
110
+ isDistinct = True
111
+ agg_id = AGG_OPS.index("none")
112
+ idx = parse_col(toks, idx, tables_with_alias, schema, default_tables)
113
+
114
+ if isBlock:
115
+ assert toks[idx] == ')'
116
+ idx += 1 # skip ')'
117
+
118
+ return idx
119
+
120
+ def parse_val_unit(toks, start_idx, tables_with_alias, schema, default_tables=None):
121
+ idx = start_idx
122
+ len_ = len(toks)
123
+ isBlock = False
124
+ if toks[idx] == '(':
125
+ isBlock = True
126
+ idx += 1
127
+
128
+ col_unit1 = None
129
+ col_unit2 = None
130
+ unit_op = UNIT_OPS.index('none')
131
+
132
+ idx = parse_col_unit(toks, idx, tables_with_alias, schema, default_tables)
133
+ if idx < len_ and toks[idx] in UNIT_OPS:
134
+ unit_op = UNIT_OPS.index(toks[idx])
135
+ idx += 1
136
+ idx = parse_col_unit(toks, idx, tables_with_alias, schema, default_tables)
137
+
138
+ if isBlock:
139
+ assert toks[idx] == ')'
140
+ idx += 1 # skip ')'
141
+
142
+ return idx
143
+
144
+ def parse_value(toks, start_idx, tables_with_alias, schema, default_tables=None):
145
+ idx = start_idx
146
+ len_ = len(toks)
147
+
148
+ isBlock = False
149
+ if toks[idx] == '(':
150
+ isBlock = True
151
+ idx += 1
152
+
153
+ if toks[idx] == 'select':
154
+ idx = parse_sql(toks, idx, schema)
155
+ elif "\"" in toks[idx]: # token is a string value
156
+ val = toks[idx]
157
+ # Replace with placeholder
158
+ toks[idx] = "_str_value_"
159
+ idx += 1
160
+ else:
161
+ try:
162
+ val = float(toks[idx])
163
+ toks[idx] = "_num_value_"
164
+ idx += 1
165
+ except:
166
+ end_idx = idx
167
+ while end_idx < len_ and toks[end_idx] != ',' and toks[end_idx] != ')' \
168
+ and toks[end_idx] != 'and' and toks[end_idx] not in CLAUSE_KEYWORDS and toks[
169
+ end_idx] not in JOIN_KEYWORDS:
170
+ end_idx += 1
171
+
172
+ # idx = parse_col_unit(toks[start_idx: end_idx], 0, tables_with_alias, schema, default_tables)
173
+ idx = parse_col_unit(toks, start_idx, tables_with_alias, schema, default_tables, end_idx=end_idx)
174
+ idx = end_idx
175
+
176
+ if isBlock:
177
+ assert toks[idx] == ')'
178
+ idx += 1
179
+
180
+ return idx
181
+
182
+ def parse_condition(toks, start_idx, tables_with_alias, schema, default_tables=None):
183
+ idx = start_idx
184
+ len_ = len(toks)
185
+ # conds = []
186
+
187
+ while idx < len_:
188
+ idx = parse_val_unit(toks, idx, tables_with_alias, schema, default_tables)
189
+ not_op = False
190
+ if toks[idx] == 'not':
191
+ not_op = True
192
+ idx += 1
193
+
194
+ assert idx < len_ and toks[idx] in WHERE_OPS, "Error condition: idx: {}, tok: {}".format(idx, toks[idx])
195
+ op_id = WHERE_OPS.index(toks[idx])
196
+ idx += 1
197
+ val1 = val2 = None
198
+ if op_id == WHERE_OPS.index('between'): # between..and... special case: dual values
199
+ idx = parse_value(toks, idx, tables_with_alias, schema, default_tables)
200
+ assert toks[idx] == 'and'
201
+ idx += 1
202
+ idx = parse_value(toks, idx, tables_with_alias, schema, default_tables)
203
+ else: # normal case: single value
204
+ idx = parse_value(toks, idx, tables_with_alias, schema, default_tables)
205
+ val2 = None
206
+
207
+ # conds.append((not_op, op_id, val_unit, val1, val2))
208
+
209
+ if idx < len_ and (toks[idx] in CLAUSE_KEYWORDS or toks[idx] in (")", ";") or toks[idx] in JOIN_KEYWORDS):
210
+ break
211
+
212
+ if idx < len_ and toks[idx] in COND_OPS:
213
+ # conds.append(toks[idx])
214
+ idx += 1 # skip and/or
215
+ return idx# , conds
216
+
217
+
218
+ def parse_from(toks, start_idx, schema):
219
+ assert 'from' in toks[start_idx:], "'from' not found"
220
+ tables_with_alias = {}
221
+
222
+ len_ = len(toks)
223
+ idx = toks.index('from', start_idx) + 1
224
+ default_tables = []
225
+ table_units = []
226
+ conds = []
227
+ # print(idx, len_)
228
+ while idx < len_:
229
+ # print("idx", idx, toks[idx])
230
+ isBlock = False
231
+ if toks[idx] == '(':
232
+ isBlock = True
233
+ idx += 1
234
+
235
+ if toks[idx] == 'select':
236
+ idx = parse_sql(toks, idx, schema)
237
+ # table_units.append((TABLE_TYPE['sql'], sql))
238
+ else:
239
+ if idx < len_ and toks[idx] == 'join':
240
+ idx += 1 # skip join
241
+ idx, table_name = parse_table_unit(toks, idx, tables_with_alias)
242
+ # print(table_name)
243
+ # table_units.append((TABLE_TYPE['table_unit'], table_unit))
244
+ default_tables.append(table_name)
245
+ """
246
+ Add schema
247
+ """
248
+ if table_name not in schema:
249
+ schema[table_name] = []
250
+ """
251
+ END
252
+ """
253
+
254
+ if idx < len_ and toks[idx] == "on":
255
+ idx += 1 # skip on
256
+ idx = parse_condition(toks, idx, tables_with_alias, schema, default_tables)
257
+ # if len(conds) > 0:
258
+ # conds.append('and')
259
+ # conds.extend(this_conds)
260
+
261
+ if isBlock:
262
+ assert toks[idx] == ')'
263
+ idx += 1
264
+
265
+ if idx < len_ and (toks[idx] in CLAUSE_KEYWORDS or toks[idx] in (")", ";")):
266
+ break
267
+
268
+ return idx, default_tables, tables_with_alias
269
+
270
+ def parse_select(toks, start_idx, tables_with_alias, schema, default_tables=None):
271
+ idx = start_idx
272
+ len_ = len(toks)
273
+
274
+ assert toks[idx] == 'select', "'select' not found"
275
+ idx += 1
276
+ isDistinct = False
277
+ if idx < len_ and toks[idx] == 'distinct':
278
+ idx += 1
279
+ isDistinct = True
280
+ val_units = []
281
+
282
+ while idx < len_ and toks[idx] not in CLAUSE_KEYWORDS:
283
+ agg_id = AGG_OPS.index("none")
284
+ if toks[idx] in AGG_OPS:
285
+ agg_id = AGG_OPS.index(toks[idx])
286
+ idx += 1
287
+ idx = parse_val_unit(toks, idx, tables_with_alias, schema, default_tables)
288
+ # val_units.append((agg_id, val_unit))
289
+ if idx < len_ and toks[idx] == ',':
290
+ idx += 1 # skip ','
291
+
292
+ return idx
293
+
294
+ def parse_where(toks, start_idx, tables_with_alias, schema, default_tables):
295
+ idx = start_idx
296
+ len_ = len(toks)
297
+
298
+ if idx >= len_ or toks[idx] != 'where':
299
+ return idx
300
+
301
+ idx += 1
302
+ idx = parse_condition(toks, idx, tables_with_alias, schema, default_tables)
303
+ return idx
304
+
305
+ def parse_group_by(toks, start_idx, tables_with_alias, schema, default_tables):
306
+ idx = start_idx
307
+ len_ = len(toks)
308
+ col_units = []
309
+
310
+ if idx >= len_ or toks[idx] != 'group':
311
+ return idx
312
+
313
+ idx += 1
314
+ assert toks[idx] == 'by'
315
+ idx += 1
316
+
317
+ while idx < len_ and not (toks[idx] in CLAUSE_KEYWORDS or toks[idx] in (")", ";")):
318
+ idx = parse_col_unit(toks, idx, tables_with_alias, schema, default_tables)
319
+ # col_units.append(col_unit)
320
+ if idx < len_ and toks[idx] == ',':
321
+ idx += 1 # skip ','
322
+ else:
323
+ break
324
+
325
+ return idx
326
+
327
+ def parse_having(toks, start_idx, tables_with_alias, schema, default_tables):
328
+ idx = start_idx
329
+ len_ = len(toks)
330
+
331
+ if idx >= len_ or toks[idx] != 'having':
332
+ return idx
333
+
334
+ idx += 1
335
+ idx = parse_condition(toks, idx, tables_with_alias, schema, default_tables)
336
+ return idx
337
+
338
+ def parse_order_by(toks, start_idx, tables_with_alias, schema, default_tables):
339
+ idx = start_idx
340
+ len_ = len(toks)
341
+ val_units = []
342
+ order_type = 'asc' # default type is 'asc'
343
+
344
+ if idx >= len_ or toks[idx] != 'order':
345
+ return idx
346
+
347
+ idx += 1
348
+ assert toks[idx] == 'by'
349
+ idx += 1
350
+
351
+ while idx < len_ and not (toks[idx] in CLAUSE_KEYWORDS or toks[idx] in (")", ";")):
352
+ idx = parse_val_unit(toks, idx, tables_with_alias, schema, default_tables)
353
+ # val_units.append(val_unit)
354
+ if idx < len_ and toks[idx] in ORDER_OPS:
355
+ order_type = toks[idx]
356
+ idx += 1
357
+ if idx < len_ and toks[idx] == ',':
358
+ idx += 1 # skip ','
359
+ else:
360
+ break
361
+
362
+ return idx
363
+
364
+ def parse_limit(toks, start_idx):
365
+ idx = start_idx
366
+ len_ = len(toks)
367
+
368
+ if idx < len_ and toks[idx] == 'limit':
369
+ idx += 2
370
+ toks[idx - 1] = "_limit_value_"
371
+ # make limit value can work, cannot assume put 1 as a fake limit number
372
+ if type(toks[idx - 1]) != int:
373
+ return idx
374
+
375
+ return idx
376
+
377
+ return idx
378
+
379
+ def parse_sql(toks, start_idx, schema):
380
+ isBlock = False # indicate whether this is a block of sql/sub-sql
381
+ len_ = len(toks)
382
+ idx = start_idx
383
+
384
+ if toks[idx] == '(':
385
+ isBlock = True
386
+ idx += 1
387
+
388
+ from_end_idx, default_tables, tables_with_alias = parse_from(toks, start_idx, schema)
389
+
390
+ _ = parse_select(toks, idx, tables_with_alias, schema, default_tables)
391
+ idx = from_end_idx
392
+
393
+ idx = parse_where(toks, idx, tables_with_alias, schema, default_tables)
394
+ idx = parse_group_by(toks, idx, tables_with_alias, schema, default_tables)
395
+ idx = parse_having(toks, idx, tables_with_alias, schema, default_tables)
396
+ idx = parse_order_by(toks, idx, tables_with_alias, schema, default_tables)
397
+ idx = parse_limit(toks, idx)
398
+ #
399
+ idx = skip_semicolon(toks, idx)
400
+ if isBlock:
401
+ assert toks[idx] == ')'
402
+ idx += 1 # skip ')'
403
+ idx = skip_semicolon(toks, idx)
404
+
405
+ # for op in SQL_OPS: # initialize IUE
406
+ # sql[op] = None
407
+ if idx < len_ and toks[idx] in SQL_OPS:
408
+ sql_op = toks[idx]
409
+ idx += 1
410
+ idx = parse_sql(toks, idx, schema)
411
+ # sql[sql_op] = IUE_sql
412
+ return idx
413
+
414
+ def extract_schema_from_sql(schema, sql):
415
+ toks = tokenize(sql)
416
+ parse_sql(toks=toks, start_idx=0, schema=schema)
417
+ return toks
418
+
419
+ def extract_template_from_sql(sql, schema={}):
420
+ try:
421
+ toks = tokenize(sql)
422
+ except:
423
+ print("Tokenization error for {}".format(sql))
424
+ toks = []
425
+ # print(toks)
426
+ template = []
427
+ # ignore_follow_up_and = False
428
+ len_ = len(toks)
429
+ idx = 0
430
+ while idx < len_:
431
+ tok = toks[idx]
432
+ if tok == "from":
433
+ template.append(tok)
434
+ if toks[idx+1] != "(":
435
+ template.append("[FROM_PART]")
436
+ idx += 1
437
+ while idx < len_ and (toks[idx] not in CLAUSE_KEYWORDS and toks[idx] != ")"):
438
+ idx += 1
439
+ continue
440
+ elif tok in CLAUSE_KEYWORDS:
441
+ template.append(tok)
442
+ elif tok in AGG_OPS:
443
+ template.append(tok)
444
+ elif tok in [",", "*", "(", ")", "having", "by", "distinct"]:
445
+ template.append(tok)
446
+ elif tok in ["asc", "desc"]:
447
+ template.append("[ORDER_DIRECTION]")
448
+ elif tok in WHERE_OPS:
449
+ if tok in KEPT_WHERE_OP:
450
+ template.append(tok)
451
+ else:
452
+ template.append("[WHERE_OP]")
453
+ if tok == "between":
454
+ idx += 2
455
+ elif tok in COND_OPS:
456
+ template.append(tok)
457
+ elif template[-1] == "[WHERE_OP]":
458
+ template.append("[VALUE]")
459
+ elif template[-1] == "limit":
460
+ template.append("[LIMIT_VALUE]")
461
+ elif template[-1] != "[MASK]": # value, schema, join on as
462
+ template.append("[MASK]")
463
+ idx += 1
464
+ return template
465
+
466
+ def extract_partial_template_from_sql(sql, schema={}):
467
+ toks = tokenize(sql)
468
+ # print(toks)
469
+ template = []
470
+ # ignore_follow_up_and = False
471
+ len_ = len(toks)
472
+ idx = 0
473
+ while idx < len_:
474
+ tok = toks[idx]
475
+ if tok == "from":
476
+ template.append(tok)
477
+ if toks[idx+1] != "(":
478
+ # template.append("[FROM_PART]")
479
+ idx += 1
480
+ while idx < len_ and (toks[idx] not in CLAUSE_KEYWORDS and toks[idx] != ")"):
481
+ template.append(toks[idx])
482
+ idx += 1
483
+ continue
484
+ elif tok in CLAUSE_KEYWORDS:
485
+ template.append(tok)
486
+ elif tok in AGG_OPS:
487
+ template.append(tok)
488
+ elif tok in [",", "*", "(", ")", "having", "by", "distinct"]:
489
+ template.append(tok)
490
+ elif tok in ["asc", "desc"]:
491
+ template.append("[ORDER_DIRECTION]")
492
+ elif tok in WHERE_OPS:
493
+ if tok in KEPT_WHERE_OP:
494
+ template.append(tok)
495
+ else:
496
+ template.append("[WHERE_OP]")
497
+ if tok == "between":
498
+ idx += 2
499
+ elif tok in COND_OPS:
500
+ template.append(tok)
501
+ elif template[-1] == "[WHERE_OP]":
502
+ template.append("[VALUE]")
503
+ elif template[-1] == "limit":
504
+ template.append("[LIMIT_VALUE]")
505
+ else:
506
+ template.append(tok)
507
+ idx += 1
508
+ return template
509
+
510
+
511
+ def is_valid_schema(schema):
512
+ # There is no "." and " " in the column name
513
+ for table in schema:
514
+ if "." in table:
515
+ return False
516
+ if any([keyword == table for keyword in CLAUSE_KEYWORDS]):
517
+ return False
518
+ for column in schema[table]:
519
+ if "." in column or " " in column or '"' in column or "'" in column:
520
+ return False
521
+ return True
522
+
523
+ def clean_sql(sql):
524
+ while "JOIN JOIN" in sql:
525
+ sql = sql.replace("JOIN JOIN", "JOIN")
526
+ if "JOIN WHERE" in sql:
527
+ sql = sql.replace("JOIN WHERE", "WHERE")
528
+ if "JOIN GROUP BY" in sql:
529
+ sql = sql.replace("JOIN GROUP BY", "GROUP BY")
530
+ return sql
531
+
532
+ if __name__ == "__main__":
533
+ parser = argparse.ArgumentParser()
534
+ parser.add_argument("--input_file", type=str)
535
+ parser.add_argument("--output_file", type=str)
536
+ parser.add_argument("--mode", type=str, choices=["debug", "verbose", "silent"])
537
+ parser.add_argument("--task", type=str, choices=["template_extraction", "schema_extraction"])
538
+ args = parser.parse_args()
539
+
540
+ if args.task == "schema_extraction":
541
+ if args.mode == "debug":
542
+ sql = "SELECT count(*) FROM games"
543
+ sql = sql + " INTERSECT " + "SELECT sacks, year FROM players"
544
+ sql = sql + " EXCEPT " + 'SELECT T1.year, T1.sacks FROM players AS T1 JOIN tackles AS T2 ON T1.id = T2.player_id WHERE T2.manager = "A" and T2.season NOT IN (SELECT season FROM match WHERE match_name = "IVL" INTERSECT SELECT T1.year, T1.sacks FROM sack AS T1) GROUP BY T1.year, T1.sacks HAVING count(T1.coach) > 10 ORDER BY T2.score LIMIT 5'
545
+ sql = "SELECT T1.pld FROM pld AS T1 JOIN games AS T2 ON T1.crs_code = T2.crs_code JOIN GROUP BY T1.pld WHERE T2.gf = '8' AND T2.gf = '9'"
546
+ sql = 'select * from head where height = "6-0" or height = "6-0" order by height asc'
547
+ schema = {}
548
+ extract_schema_from_sql(schema, sql)
549
+ print(schema, is_valid_schema(schema))
550
+ elif args.mode == "verbose":
551
+ fout = open(args.output_file, "w")
552
+ with open(args.input_file) as fin:
553
+ for line in fin:
554
+ example = json.loads(line)
555
+ schema = {}
556
+ try:
557
+ sql = example["sql"] if "sql" in example else example["pred"]
558
+ sql = clean_sql(sql)
559
+ example["sql"] = sql
560
+ extract_schema_from_sql(schema, sql)
561
+
562
+ except:
563
+ # print(sql)
564
+ continue
565
+ for table in schema:
566
+ schema[table] = list(set(schema[table]))
567
+ if is_valid_schema(schema):
568
+ example["extracted_schema"] = schema
569
+ fout.write(json.dumps(example) + "\n")
570
+ elif args.mode == "verbose":
571
+ fout = open(args.output_file, "w")
572
+ with open(args.input_file) as fin:
573
+ for line in fin:
574
+ example = json.loads(line)
575
+ schema = {}
576
+ sql = example["sql"] if "sql" in example else example["pred"]
577
+ sql = clean_sql(sql)
578
+ example["sql"] = sql
579
+ extract_schema_from_sql(schema, sql)
580
+ for table in schema:
581
+ schema[table] = list(set(schema[table]))
582
+ example["extracted_schema"] = schema
583
+ fout.write(json.dumps(example) + "\n")
584
+ if is_valid_schema(schema):
585
+ example["extracted_schema"] = schema
586
+ fout.write(json.dumps(example) + "\n")
587
+ elif args.task == "template_extraction":
588
+ if args.mode == "debug":
589
+ sql = "SELECT avg(T1.Votes) FROM seats AS T1 JOIN votes AS T2 ON T1.Seat_ID = T2.Seat_ID WHERE T1.seats BETWEEN 1 AND 2 and T1.Seats = 1 AND T2.Votes = 10"
590
+ print(extract_template_from_sql(sql))
591
+ print(extract_partial_template_from_sql(sql))
592
+ elif args.mode == "verbose":
593
+ fout_json = open(args.output_file + ".json", "w")
594
+ fout_txt = open(args.output_file + ".txt", "w")
595
+ low_freq_txt = open(args.output_file + ".low_freq", "w")
596
+ high_freq_txt = open(args.output_file + ".high_freq", "w")
597
+ all_templates = set()
598
+ # for input_file in args.input_file.split(","):
599
+ templates = {}
600
+ with open(args.input_file) as fin:
601
+ for line in fin:
602
+ example = json.loads(line)
603
+ sql = example["sql"] if "sql" in example else example["pred"]
604
+ if isinstance(sql, list):
605
+ sql = sql[-1]
606
+ template = extract_template_from_sql(sql)
607
+ template_str = " ".join(template)
608
+ if template_str not in templates:
609
+ templates[template_str] = []
610
+ templates[template_str].append(sql)
611
+ print("{} has template {}".format(args.input_file, len(templates)))
612
+
613
+ json.dump(templates, fout_json)
614
+ for template in sorted(templates.keys()):
615
+ if len(templates[template]) > 1:
616
+ high_freq_txt.write(template + "\n")
617
+ else:
618
+ low_freq_txt.write(template + "\n")
619
+ fout_txt.write(template + "\n")
620
+
621
+
622
+
utils/sql/process_sql.py ADDED
@@ -0,0 +1,595 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ################################
2
+ # Assumptions:
3
+ # 1. sql is correct
4
+ # 2. only table name has alias
5
+ # 3. only one intersect/union/except
6
+ #
7
+ # val: number(float)/string(str)/sql(dict)
8
+ # col_unit: (agg_id, col_id, isDistinct(bool))
9
+ # val_unit: (unit_op, col_unit1, col_unit2)
10
+ # table_unit: (table_type, col_unit/sql)
11
+ # cond_unit: (not_op, op_id, val_unit, val1, val2)
12
+ # condition: [cond_unit1, 'and'/'or', cond_unit2, ...]
13
+ # sql {
14
+ # 'select': (isDistinct(bool), [(agg_id, val_unit), (agg_id, val_unit), ...])
15
+ # 'from': {'table_units': [table_unit1, table_unit2, ...], 'conds': condition}
16
+ # 'where': condition
17
+ # 'groupBy': [col_unit1, col_unit2, ...]
18
+ # 'orderBy': ('asc'/'desc', [val_unit1, val_unit2, ...])
19
+ # 'having': condition
20
+ # 'limit': None/limit value
21
+ # 'intersect': None/sql
22
+ # 'except': None/sql
23
+ # 'union': None/sql
24
+ # }
25
+ ################################
26
+
27
+ import json
28
+ import sqlite3
29
+ from nltk import word_tokenize
30
+
31
+ CLAUSE_KEYWORDS = ('select', 'from', 'where', 'group', 'order', 'limit', 'intersect', 'union', 'except')
32
+ JOIN_KEYWORDS = ('join', 'on', 'as')
33
+
34
+ WHERE_OPS = ('not', 'between', '=', '>', '<', '>=', '<=', '!=', 'in', 'like', 'is', 'exists')
35
+ UNIT_OPS = ('none', '-', '+', "*", '/')
36
+ AGG_OPS = ('none', 'max', 'min', 'count', 'sum', 'avg')
37
+ TABLE_TYPE = {
38
+ 'sql': "sql",
39
+ 'table_unit': "table_unit",
40
+ }
41
+
42
+ COND_OPS = ('and', 'or')
43
+ SQL_OPS = ('intersect', 'union', 'except')
44
+ ORDER_OPS = ('desc', 'asc')
45
+
46
+
47
+
48
+ class Schema:
49
+ """
50
+ Simple schema which maps table&column to a unique identifier
51
+ """
52
+ def __init__(self, schema):
53
+ self._schema = schema
54
+ self._idMap = self._map(self._schema)
55
+
56
+ @property
57
+ def schema(self):
58
+ return self._schema
59
+
60
+ @property
61
+ def idMap(self):
62
+ return self._idMap
63
+
64
+ def _map(self, schema):
65
+ idMap = {'*': "__all__"}
66
+ id = 1
67
+ for key, vals in schema.items():
68
+ for val in vals:
69
+ idMap[key.lower() + "." + val.lower()] = "__" + key.lower() + "." + val.lower() + "__"
70
+ id += 1
71
+
72
+ for key in schema:
73
+ idMap[key.lower()] = "__" + key.lower() + "__"
74
+ id += 1
75
+
76
+ return idMap
77
+
78
+
79
+ def get_schema(db):
80
+ """
81
+ Get database's schema, which is a dict with table name as key
82
+ and list of column names as value
83
+ :param db: database path
84
+ :return: schema dict
85
+ """
86
+
87
+ schema = {}
88
+ conn = sqlite3.connect(db)
89
+ cursor = conn.cursor()
90
+
91
+ # fetch table names
92
+ cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
93
+ tables = [str(table[0].lower()) for table in cursor.fetchall()]
94
+
95
+ # fetch table info
96
+ for table in tables:
97
+ cursor.execute("PRAGMA table_info({})".format(table))
98
+ schema[table] = [str(col[1].lower()) for col in cursor.fetchall()]
99
+
100
+ return schema
101
+
102
+
103
+ def get_schema_from_json(fpath):
104
+ with open(fpath) as f:
105
+ data = json.load(f)
106
+
107
+ schema = {}
108
+ for entry in data:
109
+ table = str(entry['table'].lower())
110
+ cols = [str(col['column_name'].lower()) for col in entry['col_data']]
111
+ schema[table] = cols
112
+
113
+ return schema
114
+
115
+
116
+ def tokenize(string):
117
+ string = str(string)
118
+ string = string.replace("\'", "\"") # ensures all string values wrapped by "" problem??
119
+ quote_idxs = [idx for idx, char in enumerate(string) if char == '"']
120
+ assert len(quote_idxs) % 2 == 0, "Unexpected quote"
121
+
122
+ # keep string value as token
123
+ vals = {}
124
+ for i in range(len(quote_idxs)-1, -1, -2):
125
+ qidx1 = quote_idxs[i-1]
126
+ qidx2 = quote_idxs[i]
127
+ val = string[qidx1: qidx2+1]
128
+ key = "__val_{}_{}__".format(qidx1, qidx2)
129
+ string = string[:qidx1] + key + string[qidx2+1:]
130
+ vals[key] = val
131
+
132
+ # tokenize sql
133
+ toks_tmp = [word.lower() for word in word_tokenize(string)]
134
+ toks = []
135
+ for tok in toks_tmp:
136
+ if tok.startswith('=__val_'):
137
+ tok = tok[1:]
138
+ toks.append('=')
139
+ toks.append(tok)
140
+
141
+ # replace with string value token
142
+ for i in range(len(toks)):
143
+ if toks[i] in vals:
144
+ toks[i] = vals[toks[i]]
145
+
146
+ # find if there exists !=, >=, <=
147
+ eq_idxs = [idx for idx, tok in enumerate(toks) if tok == "="]
148
+ eq_idxs.reverse()
149
+ prefix = ('!', '>', '<')
150
+ for eq_idx in eq_idxs:
151
+ pre_tok = toks[eq_idx-1]
152
+ if pre_tok in prefix:
153
+ toks = toks[:eq_idx-1] + [pre_tok + "="] + toks[eq_idx+1: ]
154
+
155
+ return toks
156
+
157
+
158
+ def scan_alias(toks):
159
+ """Scan the index of 'as' and build the map for all alias"""
160
+ as_idxs = [idx for idx, tok in enumerate(toks) if tok == 'as']
161
+ alias = {}
162
+ for idx in as_idxs:
163
+ alias[toks[idx+1]] = toks[idx-1]
164
+ return alias
165
+
166
+
167
+ def get_tables_with_alias(schema, toks):
168
+ tables = scan_alias(toks)
169
+ for key in schema:
170
+ assert key not in tables, "Alias {} has the same name in table".format(key)
171
+ tables[key] = key
172
+ return tables
173
+
174
+
175
+ def parse_col(toks, start_idx, tables_with_alias, schema, default_tables=None):
176
+ """
177
+ :returns next idx, column id
178
+ """
179
+ tok = toks[start_idx]
180
+ if tok == "*":
181
+ return start_idx + 1, schema.idMap[tok]
182
+
183
+ if '.' in tok: # if token is a composite
184
+ alias, col = tok.split('.')
185
+ key = tables_with_alias[alias] + "." + col
186
+ return start_idx+1, schema.idMap[key]
187
+
188
+ assert default_tables is not None and len(default_tables) > 0, "Default tables should not be None or empty"
189
+
190
+ for alias in default_tables:
191
+ table = tables_with_alias[alias]
192
+ if tok in schema.schema[table]:
193
+ key = table + "." + tok
194
+ return start_idx+1, schema.idMap[key]
195
+
196
+ assert False, "Error col: {}".format(tok)
197
+
198
+
199
+ def parse_col_unit(toks, start_idx, tables_with_alias, schema, default_tables=None):
200
+ """
201
+ :returns next idx, (agg_op id, col_id)
202
+ """
203
+ idx = start_idx
204
+ len_ = len(toks)
205
+ isBlock = False
206
+ isDistinct = False
207
+ if toks[idx] == '(':
208
+ isBlock = True
209
+ idx += 1
210
+
211
+ if toks[idx] in AGG_OPS:
212
+ agg_id = AGG_OPS.index(toks[idx])
213
+ idx += 1
214
+ assert idx < len_ and toks[idx] == '('
215
+ idx += 1
216
+ if toks[idx] == "distinct":
217
+ idx += 1
218
+ isDistinct = True
219
+ idx, col_id = parse_col(toks, idx, tables_with_alias, schema, default_tables)
220
+ assert idx < len_ and toks[idx] == ')'
221
+ idx += 1
222
+ return idx, (agg_id, col_id, isDistinct)
223
+
224
+ if toks[idx] == "distinct":
225
+ idx += 1
226
+ isDistinct = True
227
+ agg_id = AGG_OPS.index("none")
228
+ idx, col_id = parse_col(toks, idx, tables_with_alias, schema, default_tables)
229
+
230
+ if isBlock:
231
+ assert toks[idx] == ')'
232
+ idx += 1 # skip ')'
233
+
234
+ return idx, (agg_id, col_id, isDistinct)
235
+
236
+
237
+ def parse_val_unit(toks, start_idx, tables_with_alias, schema, default_tables=None):
238
+ idx = start_idx
239
+ len_ = len(toks)
240
+ isBlock = False
241
+ if toks[idx] == '(':
242
+ isBlock = True
243
+ idx += 1
244
+
245
+ col_unit1 = None
246
+ col_unit2 = None
247
+ unit_op = UNIT_OPS.index('none')
248
+
249
+ idx, col_unit1 = parse_col_unit(toks, idx, tables_with_alias, schema, default_tables)
250
+ if idx < len_ and toks[idx] in UNIT_OPS:
251
+ unit_op = UNIT_OPS.index(toks[idx])
252
+ idx += 1
253
+ idx, col_unit2 = parse_col_unit(toks, idx, tables_with_alias, schema, default_tables)
254
+
255
+ if isBlock:
256
+ assert toks[idx] == ')'
257
+ idx += 1 # skip ')'
258
+
259
+ return idx, (unit_op, col_unit1, col_unit2)
260
+
261
+
262
+ def parse_table_unit(toks, start_idx, tables_with_alias, schema):
263
+ """
264
+ :returns next idx, table id, table name
265
+ """
266
+ idx = start_idx
267
+ len_ = len(toks)
268
+ key = tables_with_alias[toks[idx]]
269
+
270
+ if idx + 1 < len_ and toks[idx+1] == "as":
271
+ idx += 3
272
+ else:
273
+ idx += 1
274
+
275
+ return idx, schema.idMap[key], key
276
+
277
+
278
+ def parse_value(toks, start_idx, tables_with_alias, schema, default_tables=None):
279
+ idx = start_idx
280
+ len_ = len(toks)
281
+
282
+ isBlock = False
283
+ if toks[idx] == '(':
284
+ isBlock = True
285
+ idx += 1
286
+
287
+ if toks[idx] == 'select':
288
+ idx, val = parse_sql(toks, idx, tables_with_alias, schema)
289
+ elif "\"" in toks[idx]: # token is a string value
290
+ val = toks[idx]
291
+ idx += 1
292
+ else:
293
+ try:
294
+ val = float(toks[idx])
295
+ idx += 1
296
+ except:
297
+ end_idx = idx
298
+ while end_idx < len_ and toks[end_idx] != ',' and toks[end_idx] != ')'\
299
+ and toks[end_idx] != 'and' and toks[end_idx] not in CLAUSE_KEYWORDS and toks[end_idx] not in JOIN_KEYWORDS:
300
+ end_idx += 1
301
+
302
+ idx, val = parse_col_unit(toks[start_idx: end_idx], 0, tables_with_alias, schema, default_tables)
303
+ idx = end_idx
304
+
305
+ if isBlock:
306
+ assert toks[idx] == ')'
307
+ idx += 1
308
+
309
+ return idx, val
310
+
311
+
312
+ def parse_condition(toks, start_idx, tables_with_alias, schema, default_tables=None):
313
+ idx = start_idx
314
+ len_ = len(toks)
315
+ conds = []
316
+
317
+ while idx < len_:
318
+ idx, val_unit = parse_val_unit(toks, idx, tables_with_alias, schema, default_tables)
319
+ not_op = False
320
+ if toks[idx] == 'not':
321
+ not_op = True
322
+ idx += 1
323
+
324
+ assert idx < len_ and toks[idx] in WHERE_OPS, "Error condition: idx: {}, tok: {}".format(idx, toks[idx])
325
+ op_id = WHERE_OPS.index(toks[idx])
326
+ idx += 1
327
+ val1 = val2 = None
328
+ if op_id == WHERE_OPS.index('between'): # between..and... special case: dual values
329
+ idx, val1 = parse_value(toks, idx, tables_with_alias, schema, default_tables)
330
+ assert toks[idx] == 'and'
331
+ idx += 1
332
+ idx, val2 = parse_value(toks, idx, tables_with_alias, schema, default_tables)
333
+ else: # normal case: single value
334
+ idx, val1 = parse_value(toks, idx, tables_with_alias, schema, default_tables)
335
+ val2 = None
336
+
337
+ conds.append((not_op, op_id, val_unit, val1, val2))
338
+
339
+ if idx < len_ and (toks[idx] in CLAUSE_KEYWORDS or toks[idx] in (")", ";") or toks[idx] in JOIN_KEYWORDS):
340
+ break
341
+
342
+ if idx < len_ and toks[idx] in COND_OPS:
343
+ conds.append(toks[idx])
344
+ idx += 1 # skip and/or
345
+
346
+ return idx, conds
347
+
348
+
349
+ def parse_select(toks, start_idx, tables_with_alias, schema, default_tables=None):
350
+ idx = start_idx
351
+ len_ = len(toks)
352
+
353
+ assert toks[idx] == 'select', "'select' not found"
354
+ idx += 1
355
+ isDistinct = False
356
+ if idx < len_ and toks[idx] == 'distinct':
357
+ idx += 1
358
+ isDistinct = True
359
+ val_units = []
360
+
361
+ while idx < len_ and toks[idx] not in CLAUSE_KEYWORDS:
362
+ agg_id = AGG_OPS.index("none")
363
+ if toks[idx] in AGG_OPS:
364
+ agg_id = AGG_OPS.index(toks[idx])
365
+ idx += 1
366
+ idx, val_unit = parse_val_unit(toks, idx, tables_with_alias, schema, default_tables)
367
+ val_units.append((agg_id, val_unit))
368
+ if idx < len_ and toks[idx] == ',':
369
+ idx += 1 # skip ','
370
+
371
+ return idx, (isDistinct, val_units)
372
+
373
+
374
+ def parse_from(toks, start_idx, tables_with_alias, schema):
375
+ """
376
+ Assume in the from clause, all table units are combined with join
377
+ """
378
+ assert 'from' in toks[start_idx:], "'from' not found"
379
+
380
+ len_ = len(toks)
381
+ idx = toks.index('from', start_idx) + 1
382
+ default_tables = []
383
+ table_units = []
384
+ conds = []
385
+
386
+ while idx < len_:
387
+ isBlock = False
388
+ if toks[idx] == '(':
389
+ isBlock = True
390
+ idx += 1
391
+
392
+ if toks[idx] == 'select':
393
+ idx, sql = parse_sql(toks, idx, tables_with_alias, schema)
394
+ table_units.append((TABLE_TYPE['sql'], sql))
395
+ else:
396
+ if idx < len_ and toks[idx] == 'join':
397
+ idx += 1 # skip join
398
+ idx, table_unit, table_name = parse_table_unit(toks, idx, tables_with_alias, schema)
399
+ table_units.append((TABLE_TYPE['table_unit'],table_unit))
400
+ default_tables.append(table_name)
401
+ if idx < len_ and toks[idx] == "on":
402
+ idx += 1 # skip on
403
+ idx, this_conds = parse_condition(toks, idx, tables_with_alias, schema, default_tables)
404
+ if len(conds) > 0:
405
+ conds.append('and')
406
+ conds.extend(this_conds)
407
+
408
+ if isBlock:
409
+ assert toks[idx] == ')'
410
+ idx += 1
411
+ if idx < len_ and (toks[idx] in CLAUSE_KEYWORDS or toks[idx] in (")", ";")):
412
+ break
413
+
414
+ return idx, table_units, conds, default_tables
415
+
416
+
417
+ def parse_where(toks, start_idx, tables_with_alias, schema, default_tables):
418
+ idx = start_idx
419
+ len_ = len(toks)
420
+
421
+ if idx >= len_ or toks[idx] != 'where':
422
+ return idx, []
423
+
424
+ idx += 1
425
+ idx, conds = parse_condition(toks, idx, tables_with_alias, schema, default_tables)
426
+ return idx, conds
427
+
428
+
429
+ def parse_group_by(toks, start_idx, tables_with_alias, schema, default_tables):
430
+ idx = start_idx
431
+ len_ = len(toks)
432
+ col_units = []
433
+
434
+ if idx >= len_ or toks[idx] != 'group':
435
+ return idx, col_units
436
+
437
+ idx += 1
438
+ assert toks[idx] == 'by'
439
+ idx += 1
440
+
441
+ while idx < len_ and not (toks[idx] in CLAUSE_KEYWORDS or toks[idx] in (")", ";")):
442
+ idx, col_unit = parse_col_unit(toks, idx, tables_with_alias, schema, default_tables)
443
+ col_units.append(col_unit)
444
+ if idx < len_ and toks[idx] == ',':
445
+ idx += 1 # skip ','
446
+ else:
447
+ break
448
+
449
+ return idx, col_units
450
+
451
+
452
+ def parse_order_by(toks, start_idx, tables_with_alias, schema, default_tables):
453
+ idx = start_idx
454
+ len_ = len(toks)
455
+ val_units = []
456
+ order_type = 'asc' # default type is 'asc'
457
+
458
+ if idx >= len_ or toks[idx] != 'order':
459
+ return idx, val_units
460
+
461
+ idx += 1
462
+ assert toks[idx] == 'by'
463
+ idx += 1
464
+
465
+ while idx < len_ and not (toks[idx] in CLAUSE_KEYWORDS or toks[idx] in (")", ";")):
466
+ idx, val_unit = parse_val_unit(toks, idx, tables_with_alias, schema, default_tables)
467
+ val_units.append(val_unit)
468
+ if idx < len_ and toks[idx] in ORDER_OPS:
469
+ order_type = toks[idx]
470
+ idx += 1
471
+ if idx < len_ and toks[idx] == ',':
472
+ idx += 1 # skip ','
473
+ else:
474
+ break
475
+
476
+ return idx, (order_type, val_units)
477
+
478
+
479
+ def parse_having(toks, start_idx, tables_with_alias, schema, default_tables):
480
+ idx = start_idx
481
+ len_ = len(toks)
482
+
483
+ if idx >= len_ or toks[idx] != 'having':
484
+ return idx, []
485
+
486
+ idx += 1
487
+ idx, conds = parse_condition(toks, idx, tables_with_alias, schema, default_tables)
488
+ return idx, conds
489
+
490
+
491
+ def parse_limit(toks, start_idx):
492
+ idx = start_idx
493
+ len_ = len(toks)
494
+
495
+ if idx < len_ and toks[idx] == 'limit':
496
+ idx += 2
497
+ # make limit value can work, cannot assume put 1 as a fake limit number
498
+ if type(toks[idx-1]) != int:
499
+ return idx, 1
500
+
501
+ return idx, int(toks[idx-1])
502
+
503
+ return idx, None
504
+
505
+
506
+ def parse_sql(toks, start_idx, tables_with_alias, schema):
507
+ isBlock = False # indicate whether this is a block of sql/sub-sql
508
+ len_ = len(toks)
509
+ idx = start_idx
510
+
511
+ sql = {}
512
+ if toks[idx] == '(':
513
+ isBlock = True
514
+ idx += 1
515
+
516
+ # parse from clause in order to get default tables
517
+ from_end_idx, table_units, conds, default_tables = parse_from(toks, start_idx, tables_with_alias, schema)
518
+ sql['from'] = {'table_units': table_units, 'conds': conds}
519
+ # select clause
520
+ _, select_col_units = parse_select(toks, idx, tables_with_alias, schema, default_tables)
521
+ idx = from_end_idx
522
+ sql['select'] = select_col_units
523
+ # where clause
524
+ idx, where_conds = parse_where(toks, idx, tables_with_alias, schema, default_tables)
525
+ sql['where'] = where_conds
526
+ # group by clause
527
+ idx, group_col_units = parse_group_by(toks, idx, tables_with_alias, schema, default_tables)
528
+ sql['groupBy'] = group_col_units
529
+ # having clause
530
+ idx, having_conds = parse_having(toks, idx, tables_with_alias, schema, default_tables)
531
+ sql['having'] = having_conds
532
+ # order by clause
533
+ idx, order_col_units = parse_order_by(toks, idx, tables_with_alias, schema, default_tables)
534
+ sql['orderBy'] = order_col_units
535
+ # limit clause
536
+ idx, limit_val = parse_limit(toks, idx)
537
+ sql['limit'] = limit_val
538
+
539
+ idx = skip_semicolon(toks, idx)
540
+ if isBlock:
541
+ assert toks[idx] == ')'
542
+ idx += 1 # skip ')'
543
+ idx = skip_semicolon(toks, idx)
544
+
545
+ # intersect/union/except clause
546
+ for op in SQL_OPS: # initialize IUE
547
+ sql[op] = None
548
+ if idx < len_ and toks[idx] in SQL_OPS:
549
+ sql_op = toks[idx]
550
+ idx += 1
551
+ idx, IUE_sql = parse_sql(toks, idx, tables_with_alias, schema)
552
+ sql[sql_op] = IUE_sql
553
+ return idx, sql
554
+
555
+
556
+ def load_data(fpath):
557
+ with open(fpath) as f:
558
+ data = json.load(f)
559
+ return data
560
+
561
+
562
+ def get_sql(schema, query):
563
+ toks = tokenize(query)
564
+ tables_with_alias = get_tables_with_alias(schema.schema, toks)
565
+ _, sql = parse_sql(toks, 0, tables_with_alias, schema)
566
+
567
+ return sql
568
+
569
+
570
+ def skip_semicolon(toks, start_idx):
571
+ idx = start_idx
572
+ while idx < len(toks) and toks[idx] == ";":
573
+ idx += 1
574
+ return idx
575
+
576
+ def get_schemas_from_json(fpath):
577
+ with open(fpath) as f:
578
+ data = json.load(f)
579
+ db_names = [db['db_id'] for db in data]
580
+
581
+ tables = {}
582
+ schemas = {}
583
+ for db in data:
584
+ db_id = db['db_id']
585
+ schema = {} #{'table': [col.lower, ..., ]} * -> __all__
586
+ column_names_original = db['column_names_original']
587
+ table_names_original = db['table_names_original']
588
+ tables[db_id] = {'column_names_original': column_names_original, 'table_names_original': table_names_original}
589
+ for i, tabn in enumerate(table_names_original):
590
+ table = str(tabn.lower())
591
+ cols = [str(col.lower()) for td, col in column_names_original if td == i]
592
+ schema[table] = cols
593
+ schemas[db_id] = schema
594
+
595
+ return schemas, db_names, tables