|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import unittest |
|
|
|
from transformers import ( |
|
MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING, |
|
AutoModelForTableQuestionAnswering, |
|
AutoTokenizer, |
|
TableQuestionAnsweringPipeline, |
|
TFAutoModelForTableQuestionAnswering, |
|
is_torch_available, |
|
pipeline, |
|
) |
|
from transformers.testing_utils import ( |
|
is_pipeline_test, |
|
require_pandas, |
|
require_tensorflow_probability, |
|
require_tf, |
|
require_torch, |
|
slow, |
|
) |
|
|
|
|
|
if is_torch_available(): |
|
from transformers.pytorch_utils import is_torch_greater_or_equal_than_1_12 |
|
else: |
|
is_torch_greater_or_equal_than_1_12 = False |
|
|
|
|
|
@is_pipeline_test |
|
class TQAPipelineTests(unittest.TestCase): |
|
|
|
|
|
model_mapping = MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING |
|
|
|
@require_tensorflow_probability |
|
@require_pandas |
|
@require_tf |
|
@require_torch |
|
def test_small_model_tf(self): |
|
model_id = "lysandre/tiny-tapas-random-wtq" |
|
model = TFAutoModelForTableQuestionAnswering.from_pretrained(model_id, from_pt=True) |
|
tokenizer = AutoTokenizer.from_pretrained(model_id) |
|
self.assertIsInstance(model.config.aggregation_labels, dict) |
|
self.assertIsInstance(model.config.no_aggregation_label_index, int) |
|
|
|
table_querier = TableQuestionAnsweringPipeline(model=model, tokenizer=tokenizer) |
|
outputs = table_querier( |
|
table={ |
|
"actors": ["brad pitt", "leonardo di caprio", "george clooney"], |
|
"age": ["56", "45", "59"], |
|
"number of movies": ["87", "53", "69"], |
|
"date of birth": ["7 february 1967", "10 june 1996", "28 november 1967"], |
|
}, |
|
query="how many movies has george clooney played in?", |
|
) |
|
self.assertEqual( |
|
outputs, |
|
{"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"}, |
|
) |
|
outputs = table_querier( |
|
table={ |
|
"actors": ["brad pitt", "leonardo di caprio", "george clooney"], |
|
"age": ["56", "45", "59"], |
|
"number of movies": ["87", "53", "69"], |
|
"date of birth": ["7 february 1967", "10 june 1996", "28 november 1967"], |
|
}, |
|
query=["how many movies has george clooney played in?", "how old is he?", "what's his date of birth?"], |
|
) |
|
self.assertEqual( |
|
outputs, |
|
[ |
|
{"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"}, |
|
{"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"}, |
|
{"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"}, |
|
], |
|
) |
|
outputs = table_querier( |
|
table={ |
|
"Repository": ["Transformers", "Datasets", "Tokenizers"], |
|
"Stars": ["36542", "4512", "3934"], |
|
"Contributors": ["651", "77", "34"], |
|
"Programming language": ["Python", "Python", "Rust, Python and NodeJS"], |
|
}, |
|
query=[ |
|
"What repository has the largest number of stars?", |
|
"Given that the numbers of stars defines if a repository is active, what repository is the most" |
|
" active?", |
|
"What is the number of repositories?", |
|
"What is the average number of stars?", |
|
"What is the total amount of stars?", |
|
], |
|
) |
|
self.assertEqual( |
|
outputs, |
|
[ |
|
{"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"}, |
|
{"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"}, |
|
{"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"}, |
|
{"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"}, |
|
{"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"}, |
|
], |
|
) |
|
|
|
with self.assertRaises(ValueError): |
|
table_querier(query="What does it do with empty context ?", table=None) |
|
with self.assertRaises(ValueError): |
|
table_querier(query="What does it do with empty context ?", table="") |
|
with self.assertRaises(ValueError): |
|
table_querier(query="What does it do with empty context ?", table={}) |
|
with self.assertRaises(ValueError): |
|
table_querier( |
|
table={ |
|
"Repository": ["Transformers", "Datasets", "Tokenizers"], |
|
"Stars": ["36542", "4512", "3934"], |
|
"Contributors": ["651", "77", "34"], |
|
"Programming language": ["Python", "Python", "Rust, Python and NodeJS"], |
|
} |
|
) |
|
with self.assertRaises(ValueError): |
|
table_querier( |
|
query="", |
|
table={ |
|
"Repository": ["Transformers", "Datasets", "Tokenizers"], |
|
"Stars": ["36542", "4512", "3934"], |
|
"Contributors": ["651", "77", "34"], |
|
"Programming language": ["Python", "Python", "Rust, Python and NodeJS"], |
|
}, |
|
) |
|
with self.assertRaises(ValueError): |
|
table_querier( |
|
query=None, |
|
table={ |
|
"Repository": ["Transformers", "Datasets", "Tokenizers"], |
|
"Stars": ["36542", "4512", "3934"], |
|
"Contributors": ["651", "77", "34"], |
|
"Programming language": ["Python", "Python", "Rust, Python and NodeJS"], |
|
}, |
|
) |
|
|
|
@unittest.skipIf(not is_torch_greater_or_equal_than_1_12, reason="Tapas is only available in torch v1.12+") |
|
@require_torch |
|
def test_small_model_pt(self): |
|
model_id = "lysandre/tiny-tapas-random-wtq" |
|
model = AutoModelForTableQuestionAnswering.from_pretrained(model_id) |
|
tokenizer = AutoTokenizer.from_pretrained(model_id) |
|
self.assertIsInstance(model.config.aggregation_labels, dict) |
|
self.assertIsInstance(model.config.no_aggregation_label_index, int) |
|
|
|
table_querier = TableQuestionAnsweringPipeline(model=model, tokenizer=tokenizer) |
|
outputs = table_querier( |
|
table={ |
|
"actors": ["brad pitt", "leonardo di caprio", "george clooney"], |
|
"age": ["56", "45", "59"], |
|
"number of movies": ["87", "53", "69"], |
|
"date of birth": ["7 february 1967", "10 june 1996", "28 november 1967"], |
|
}, |
|
query="how many movies has george clooney played in?", |
|
) |
|
self.assertEqual( |
|
outputs, |
|
{"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"}, |
|
) |
|
outputs = table_querier( |
|
table={ |
|
"actors": ["brad pitt", "leonardo di caprio", "george clooney"], |
|
"age": ["56", "45", "59"], |
|
"number of movies": ["87", "53", "69"], |
|
"date of birth": ["7 february 1967", "10 june 1996", "28 november 1967"], |
|
}, |
|
query=["how many movies has george clooney played in?", "how old is he?", "what's his date of birth?"], |
|
) |
|
self.assertEqual( |
|
outputs, |
|
[ |
|
{"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"}, |
|
{"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"}, |
|
{"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"}, |
|
], |
|
) |
|
outputs = table_querier( |
|
table={ |
|
"Repository": ["Transformers", "Datasets", "Tokenizers"], |
|
"Stars": ["36542", "4512", "3934"], |
|
"Contributors": ["651", "77", "34"], |
|
"Programming language": ["Python", "Python", "Rust, Python and NodeJS"], |
|
}, |
|
query=[ |
|
"What repository has the largest number of stars?", |
|
"Given that the numbers of stars defines if a repository is active, what repository is the most" |
|
" active?", |
|
"What is the number of repositories?", |
|
"What is the average number of stars?", |
|
"What is the total amount of stars?", |
|
], |
|
) |
|
self.assertEqual( |
|
outputs, |
|
[ |
|
{"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"}, |
|
{"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"}, |
|
{"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"}, |
|
{"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"}, |
|
{"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"}, |
|
], |
|
) |
|
|
|
with self.assertRaises(ValueError): |
|
table_querier(query="What does it do with empty context ?", table=None) |
|
with self.assertRaises(ValueError): |
|
table_querier(query="What does it do with empty context ?", table="") |
|
with self.assertRaises(ValueError): |
|
table_querier(query="What does it do with empty context ?", table={}) |
|
with self.assertRaises(ValueError): |
|
table_querier( |
|
table={ |
|
"Repository": ["Transformers", "Datasets", "Tokenizers"], |
|
"Stars": ["36542", "4512", "3934"], |
|
"Contributors": ["651", "77", "34"], |
|
"Programming language": ["Python", "Python", "Rust, Python and NodeJS"], |
|
} |
|
) |
|
with self.assertRaises(ValueError): |
|
table_querier( |
|
query="", |
|
table={ |
|
"Repository": ["Transformers", "Datasets", "Tokenizers"], |
|
"Stars": ["36542", "4512", "3934"], |
|
"Contributors": ["651", "77", "34"], |
|
"Programming language": ["Python", "Python", "Rust, Python and NodeJS"], |
|
}, |
|
) |
|
with self.assertRaises(ValueError): |
|
table_querier( |
|
query=None, |
|
table={ |
|
"Repository": ["Transformers", "Datasets", "Tokenizers"], |
|
"Stars": ["36542", "4512", "3934"], |
|
"Contributors": ["651", "77", "34"], |
|
"Programming language": ["Python", "Python", "Rust, Python and NodeJS"], |
|
}, |
|
) |
|
|
|
@unittest.skipIf(not is_torch_greater_or_equal_than_1_12, reason="Tapas is only available in torch v1.12+") |
|
@require_torch |
|
def test_slow_tokenizer_sqa_pt(self): |
|
model_id = "lysandre/tiny-tapas-random-sqa" |
|
model = AutoModelForTableQuestionAnswering.from_pretrained(model_id) |
|
tokenizer = AutoTokenizer.from_pretrained(model_id) |
|
table_querier = TableQuestionAnsweringPipeline(model=model, tokenizer=tokenizer) |
|
|
|
inputs = { |
|
"table": { |
|
"actors": ["brad pitt", "leonardo di caprio", "george clooney"], |
|
"age": ["56", "45", "59"], |
|
"number of movies": ["87", "53", "69"], |
|
"date of birth": ["7 february 1967", "10 june 1996", "28 november 1967"], |
|
}, |
|
"query": ["how many movies has george clooney played in?", "how old is he?", "what's his date of birth?"], |
|
} |
|
sequential_outputs = table_querier(**inputs, sequential=True) |
|
batch_outputs = table_querier(**inputs, sequential=False) |
|
|
|
self.assertEqual(len(sequential_outputs), 3) |
|
self.assertEqual(len(batch_outputs), 3) |
|
self.assertEqual(sequential_outputs[0], batch_outputs[0]) |
|
self.assertNotEqual(sequential_outputs[1], batch_outputs[1]) |
|
|
|
|
|
table_querier = TableQuestionAnsweringPipeline(model=model, tokenizer=tokenizer) |
|
outputs = table_querier( |
|
table={ |
|
"actors": ["brad pitt", "leonardo di caprio", "george clooney"], |
|
"age": ["56", "45", "59"], |
|
"number of movies": ["87", "53", "69"], |
|
"date of birth": ["7 february 1967", "10 june 1996", "28 november 1967"], |
|
}, |
|
query="how many movies has george clooney played in?", |
|
) |
|
self.assertEqual( |
|
outputs, |
|
{"answer": "7 february 1967", "coordinates": [(0, 3)], "cells": ["7 february 1967"]}, |
|
) |
|
outputs = table_querier( |
|
table={ |
|
"actors": ["brad pitt", "leonardo di caprio", "george clooney"], |
|
"age": ["56", "45", "59"], |
|
"number of movies": ["87", "53", "69"], |
|
"date of birth": ["7 february 1967", "10 june 1996", "28 november 1967"], |
|
}, |
|
query=["how many movies has george clooney played in?", "how old is he?", "what's his date of birth?"], |
|
) |
|
self.assertEqual( |
|
outputs, |
|
[ |
|
{"answer": "7 february 1967", "coordinates": [(0, 3)], "cells": ["7 february 1967"]}, |
|
{"answer": "7 february 1967", "coordinates": [(0, 3)], "cells": ["7 february 1967"]}, |
|
{"answer": "7 february 1967", "coordinates": [(0, 3)], "cells": ["7 february 1967"]}, |
|
], |
|
) |
|
outputs = table_querier( |
|
table={ |
|
"Repository": ["Transformers", "Datasets", "Tokenizers"], |
|
"Stars": ["36542", "4512", "3934"], |
|
"Contributors": ["651", "77", "34"], |
|
"Programming language": ["Python", "Python", "Rust, Python and NodeJS"], |
|
}, |
|
query=[ |
|
"What repository has the largest number of stars?", |
|
"Given that the numbers of stars defines if a repository is active, what repository is the most" |
|
" active?", |
|
"What is the number of repositories?", |
|
"What is the average number of stars?", |
|
"What is the total amount of stars?", |
|
], |
|
) |
|
self.assertEqual( |
|
outputs, |
|
[ |
|
{"answer": "Python, Python", "coordinates": [(0, 3), (1, 3)], "cells": ["Python", "Python"]}, |
|
{"answer": "Python, Python", "coordinates": [(0, 3), (1, 3)], "cells": ["Python", "Python"]}, |
|
{"answer": "Python, Python", "coordinates": [(0, 3), (1, 3)], "cells": ["Python", "Python"]}, |
|
{"answer": "Python, Python", "coordinates": [(0, 3), (1, 3)], "cells": ["Python", "Python"]}, |
|
{"answer": "Python, Python", "coordinates": [(0, 3), (1, 3)], "cells": ["Python", "Python"]}, |
|
], |
|
) |
|
|
|
with self.assertRaises(ValueError): |
|
table_querier(query="What does it do with empty context ?", table=None) |
|
with self.assertRaises(ValueError): |
|
table_querier(query="What does it do with empty context ?", table="") |
|
with self.assertRaises(ValueError): |
|
table_querier(query="What does it do with empty context ?", table={}) |
|
with self.assertRaises(ValueError): |
|
table_querier( |
|
table={ |
|
"Repository": ["Transformers", "Datasets", "Tokenizers"], |
|
"Stars": ["36542", "4512", "3934"], |
|
"Contributors": ["651", "77", "34"], |
|
"Programming language": ["Python", "Python", "Rust, Python and NodeJS"], |
|
} |
|
) |
|
with self.assertRaises(ValueError): |
|
table_querier( |
|
query="", |
|
table={ |
|
"Repository": ["Transformers", "Datasets", "Tokenizers"], |
|
"Stars": ["36542", "4512", "3934"], |
|
"Contributors": ["651", "77", "34"], |
|
"Programming language": ["Python", "Python", "Rust, Python and NodeJS"], |
|
}, |
|
) |
|
with self.assertRaises(ValueError): |
|
table_querier( |
|
query=None, |
|
table={ |
|
"Repository": ["Transformers", "Datasets", "Tokenizers"], |
|
"Stars": ["36542", "4512", "3934"], |
|
"Contributors": ["651", "77", "34"], |
|
"Programming language": ["Python", "Python", "Rust, Python and NodeJS"], |
|
}, |
|
) |
|
|
|
@require_tf |
|
@require_tensorflow_probability |
|
@require_pandas |
|
@require_torch |
|
def test_slow_tokenizer_sqa_tf(self): |
|
model_id = "lysandre/tiny-tapas-random-sqa" |
|
model = TFAutoModelForTableQuestionAnswering.from_pretrained(model_id, from_pt=True) |
|
tokenizer = AutoTokenizer.from_pretrained(model_id) |
|
table_querier = TableQuestionAnsweringPipeline(model=model, tokenizer=tokenizer) |
|
|
|
inputs = { |
|
"table": { |
|
"actors": ["brad pitt", "leonardo di caprio", "george clooney"], |
|
"age": ["56", "45", "59"], |
|
"number of movies": ["87", "53", "69"], |
|
"date of birth": ["7 february 1967", "10 june 1996", "28 november 1967"], |
|
}, |
|
"query": ["how many movies has george clooney played in?", "how old is he?", "what's his date of birth?"], |
|
} |
|
sequential_outputs = table_querier(**inputs, sequential=True) |
|
batch_outputs = table_querier(**inputs, sequential=False) |
|
|
|
self.assertEqual(len(sequential_outputs), 3) |
|
self.assertEqual(len(batch_outputs), 3) |
|
self.assertEqual(sequential_outputs[0], batch_outputs[0]) |
|
self.assertNotEqual(sequential_outputs[1], batch_outputs[1]) |
|
|
|
|
|
table_querier = TableQuestionAnsweringPipeline(model=model, tokenizer=tokenizer) |
|
outputs = table_querier( |
|
table={ |
|
"actors": ["brad pitt", "leonardo di caprio", "george clooney"], |
|
"age": ["56", "45", "59"], |
|
"number of movies": ["87", "53", "69"], |
|
"date of birth": ["7 february 1967", "10 june 1996", "28 november 1967"], |
|
}, |
|
query="how many movies has george clooney played in?", |
|
) |
|
self.assertEqual( |
|
outputs, |
|
{"answer": "7 february 1967", "coordinates": [(0, 3)], "cells": ["7 february 1967"]}, |
|
) |
|
outputs = table_querier( |
|
table={ |
|
"actors": ["brad pitt", "leonardo di caprio", "george clooney"], |
|
"age": ["56", "45", "59"], |
|
"number of movies": ["87", "53", "69"], |
|
"date of birth": ["7 february 1967", "10 june 1996", "28 november 1967"], |
|
}, |
|
query=["how many movies has george clooney played in?", "how old is he?", "what's his date of birth?"], |
|
) |
|
self.assertEqual( |
|
outputs, |
|
[ |
|
{"answer": "7 february 1967", "coordinates": [(0, 3)], "cells": ["7 february 1967"]}, |
|
{"answer": "7 february 1967", "coordinates": [(0, 3)], "cells": ["7 february 1967"]}, |
|
{"answer": "7 february 1967", "coordinates": [(0, 3)], "cells": ["7 february 1967"]}, |
|
], |
|
) |
|
outputs = table_querier( |
|
table={ |
|
"Repository": ["Transformers", "Datasets", "Tokenizers"], |
|
"Stars": ["36542", "4512", "3934"], |
|
"Contributors": ["651", "77", "34"], |
|
"Programming language": ["Python", "Python", "Rust, Python and NodeJS"], |
|
}, |
|
query=[ |
|
"What repository has the largest number of stars?", |
|
"Given that the numbers of stars defines if a repository is active, what repository is the most" |
|
" active?", |
|
"What is the number of repositories?", |
|
"What is the average number of stars?", |
|
"What is the total amount of stars?", |
|
], |
|
) |
|
self.assertEqual( |
|
outputs, |
|
[ |
|
{"answer": "Python, Python", "coordinates": [(0, 3), (1, 3)], "cells": ["Python", "Python"]}, |
|
{"answer": "Python, Python", "coordinates": [(0, 3), (1, 3)], "cells": ["Python", "Python"]}, |
|
{"answer": "Python, Python", "coordinates": [(0, 3), (1, 3)], "cells": ["Python", "Python"]}, |
|
{"answer": "Python, Python", "coordinates": [(0, 3), (1, 3)], "cells": ["Python", "Python"]}, |
|
{"answer": "Python, Python", "coordinates": [(0, 3), (1, 3)], "cells": ["Python", "Python"]}, |
|
], |
|
) |
|
|
|
with self.assertRaises(ValueError): |
|
table_querier(query="What does it do with empty context ?", table=None) |
|
with self.assertRaises(ValueError): |
|
table_querier(query="What does it do with empty context ?", table="") |
|
with self.assertRaises(ValueError): |
|
table_querier(query="What does it do with empty context ?", table={}) |
|
with self.assertRaises(ValueError): |
|
table_querier( |
|
table={ |
|
"Repository": ["Transformers", "Datasets", "Tokenizers"], |
|
"Stars": ["36542", "4512", "3934"], |
|
"Contributors": ["651", "77", "34"], |
|
"Programming language": ["Python", "Python", "Rust, Python and NodeJS"], |
|
} |
|
) |
|
with self.assertRaises(ValueError): |
|
table_querier( |
|
query="", |
|
table={ |
|
"Repository": ["Transformers", "Datasets", "Tokenizers"], |
|
"Stars": ["36542", "4512", "3934"], |
|
"Contributors": ["651", "77", "34"], |
|
"Programming language": ["Python", "Python", "Rust, Python and NodeJS"], |
|
}, |
|
) |
|
with self.assertRaises(ValueError): |
|
table_querier( |
|
query=None, |
|
table={ |
|
"Repository": ["Transformers", "Datasets", "Tokenizers"], |
|
"Stars": ["36542", "4512", "3934"], |
|
"Contributors": ["651", "77", "34"], |
|
"Programming language": ["Python", "Python", "Rust, Python and NodeJS"], |
|
}, |
|
) |
|
|
|
@unittest.skipIf(not is_torch_greater_or_equal_than_1_12, reason="Tapas is only available in torch v1.12+") |
|
@slow |
|
@require_torch |
|
def test_integration_wtq_pt(self): |
|
table_querier = pipeline("table-question-answering") |
|
|
|
data = { |
|
"Repository": ["Transformers", "Datasets", "Tokenizers"], |
|
"Stars": ["36542", "4512", "3934"], |
|
"Contributors": ["651", "77", "34"], |
|
"Programming language": ["Python", "Python", "Rust, Python and NodeJS"], |
|
} |
|
queries = [ |
|
"What repository has the largest number of stars?", |
|
"Given that the numbers of stars defines if a repository is active, what repository is the most active?", |
|
"What is the number of repositories?", |
|
"What is the average number of stars?", |
|
"What is the total amount of stars?", |
|
] |
|
|
|
results = table_querier(data, queries) |
|
|
|
expected_results = [ |
|
{"answer": "Transformers", "coordinates": [(0, 0)], "cells": ["Transformers"], "aggregator": "NONE"}, |
|
{"answer": "Transformers", "coordinates": [(0, 0)], "cells": ["Transformers"], "aggregator": "NONE"}, |
|
{ |
|
"answer": "COUNT > Transformers, Datasets, Tokenizers", |
|
"coordinates": [(0, 0), (1, 0), (2, 0)], |
|
"cells": ["Transformers", "Datasets", "Tokenizers"], |
|
"aggregator": "COUNT", |
|
}, |
|
{ |
|
"answer": "AVERAGE > 36542, 4512, 3934", |
|
"coordinates": [(0, 1), (1, 1), (2, 1)], |
|
"cells": ["36542", "4512", "3934"], |
|
"aggregator": "AVERAGE", |
|
}, |
|
{ |
|
"answer": "SUM > 36542, 4512, 3934", |
|
"coordinates": [(0, 1), (1, 1), (2, 1)], |
|
"cells": ["36542", "4512", "3934"], |
|
"aggregator": "SUM", |
|
}, |
|
] |
|
self.assertListEqual(results, expected_results) |
|
|
|
@slow |
|
@require_tensorflow_probability |
|
@require_pandas |
|
def test_integration_wtq_tf(self): |
|
model_id = "google/tapas-base-finetuned-wtq" |
|
model = TFAutoModelForTableQuestionAnswering.from_pretrained(model_id) |
|
tokenizer = AutoTokenizer.from_pretrained(model_id) |
|
table_querier = pipeline("table-question-answering", model=model, tokenizer=tokenizer) |
|
|
|
data = { |
|
"Repository": ["Transformers", "Datasets", "Tokenizers"], |
|
"Stars": ["36542", "4512", "3934"], |
|
"Contributors": ["651", "77", "34"], |
|
"Programming language": ["Python", "Python", "Rust, Python and NodeJS"], |
|
} |
|
queries = [ |
|
"What repository has the largest number of stars?", |
|
"Given that the numbers of stars defines if a repository is active, what repository is the most active?", |
|
"What is the number of repositories?", |
|
"What is the average number of stars?", |
|
"What is the total amount of stars?", |
|
] |
|
|
|
results = table_querier(data, queries) |
|
|
|
expected_results = [ |
|
{"answer": "Transformers", "coordinates": [(0, 0)], "cells": ["Transformers"], "aggregator": "NONE"}, |
|
{"answer": "Transformers", "coordinates": [(0, 0)], "cells": ["Transformers"], "aggregator": "NONE"}, |
|
{ |
|
"answer": "COUNT > Transformers, Datasets, Tokenizers", |
|
"coordinates": [(0, 0), (1, 0), (2, 0)], |
|
"cells": ["Transformers", "Datasets", "Tokenizers"], |
|
"aggregator": "COUNT", |
|
}, |
|
{ |
|
"answer": "AVERAGE > 36542, 4512, 3934", |
|
"coordinates": [(0, 1), (1, 1), (2, 1)], |
|
"cells": ["36542", "4512", "3934"], |
|
"aggregator": "AVERAGE", |
|
}, |
|
{ |
|
"answer": "SUM > 36542, 4512, 3934", |
|
"coordinates": [(0, 1), (1, 1), (2, 1)], |
|
"cells": ["36542", "4512", "3934"], |
|
"aggregator": "SUM", |
|
}, |
|
] |
|
self.assertListEqual(results, expected_results) |
|
|
|
@unittest.skipIf(not is_torch_greater_or_equal_than_1_12, reason="Tapas is only available in torch v1.12+") |
|
@slow |
|
@require_torch |
|
def test_integration_sqa_pt(self): |
|
table_querier = pipeline( |
|
"table-question-answering", |
|
model="google/tapas-base-finetuned-sqa", |
|
tokenizer="google/tapas-base-finetuned-sqa", |
|
) |
|
data = { |
|
"Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"], |
|
"Age": ["56", "45", "59"], |
|
"Number of movies": ["87", "53", "69"], |
|
"Date of birth": ["7 february 1967", "10 june 1996", "28 november 1967"], |
|
} |
|
queries = ["How many movies has George Clooney played in?", "How old is he?", "What's his date of birth?"] |
|
results = table_querier(data, queries, sequential=True) |
|
|
|
expected_results = [ |
|
{"answer": "69", "coordinates": [(2, 2)], "cells": ["69"]}, |
|
{"answer": "59", "coordinates": [(2, 1)], "cells": ["59"]}, |
|
{"answer": "28 november 1967", "coordinates": [(2, 3)], "cells": ["28 november 1967"]}, |
|
] |
|
self.assertListEqual(results, expected_results) |
|
|
|
@slow |
|
@require_tensorflow_probability |
|
@require_pandas |
|
def test_integration_sqa_tf(self): |
|
model_id = "google/tapas-base-finetuned-sqa" |
|
model = TFAutoModelForTableQuestionAnswering.from_pretrained(model_id) |
|
tokenizer = AutoTokenizer.from_pretrained(model_id) |
|
table_querier = pipeline( |
|
"table-question-answering", |
|
model=model, |
|
tokenizer=tokenizer, |
|
) |
|
data = { |
|
"Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"], |
|
"Age": ["56", "45", "59"], |
|
"Number of movies": ["87", "53", "69"], |
|
"Date of birth": ["7 february 1967", "10 june 1996", "28 november 1967"], |
|
} |
|
queries = ["How many movies has George Clooney played in?", "How old is he?", "What's his date of birth?"] |
|
results = table_querier(data, queries, sequential=True) |
|
|
|
expected_results = [ |
|
{"answer": "69", "coordinates": [(2, 2)], "cells": ["69"]}, |
|
{"answer": "59", "coordinates": [(2, 1)], "cells": ["59"]}, |
|
{"answer": "28 november 1967", "coordinates": [(2, 3)], "cells": ["28 november 1967"]}, |
|
] |
|
self.assertListEqual(results, expected_results) |
|
|
|
@slow |
|
@require_torch |
|
def test_large_model_pt_tapex(self): |
|
model_id = "microsoft/tapex-large-finetuned-wtq" |
|
table_querier = pipeline( |
|
"table-question-answering", |
|
model=model_id, |
|
) |
|
data = { |
|
"Actors": ["Brad Pitt", "Leonardo Di Caprio", "George Clooney"], |
|
"Age": ["56", "45", "59"], |
|
"Number of movies": ["87", "53", "69"], |
|
"Date of birth": ["7 february 1967", "10 june 1996", "28 november 1967"], |
|
} |
|
queries = [ |
|
"How many movies has George Clooney played in?", |
|
"How old is Mr Clooney ?", |
|
"What's the date of birth of Leonardo ?", |
|
] |
|
results = table_querier(data, queries, sequential=True) |
|
|
|
expected_results = [ |
|
{"answer": " 69"}, |
|
{"answer": " 59"}, |
|
{"answer": " 10 june 1996"}, |
|
] |
|
self.assertListEqual(results, expected_results) |
|
|