Spaces:
Runtime error
Runtime error
Commit
·
54ac152
1
Parent(s):
d61e332
Add max sequence length to tokenizer inference
Browse files
app.py
CHANGED
@@ -6,6 +6,8 @@ from huggingface_hub import hf_hub_download
|
|
6 |
from onnxruntime import InferenceSession
|
7 |
from transformers import AutoModelForQuestionAnswering, AutoTokenizer
|
8 |
|
|
|
|
|
9 |
models = {
|
10 |
"Base model": "bert-large-uncased-whole-word-masking-finetuned-squad",
|
11 |
"Pruned model": "madlag/bert-large-uncased-wwm-squadv2-x2.63-f82.6-d16-hybrid-v1",
|
@@ -43,13 +45,19 @@ def run_normal_hf(model_name, inputs):
|
|
43 |
def inference(model_name, context, question):
|
44 |
tokenizer = AutoTokenizer.from_pretrained(models[model_name])
|
45 |
if model_name == "Pruned ONNX Optimized FP16":
|
46 |
-
inputs = dict(
|
|
|
|
|
|
|
|
|
47 |
output, inference_time = run_ort_inference(model_name, inputs)
|
48 |
answer_start_scores, answer_end_scores = torch.tensor(output[0]), torch.tensor(
|
49 |
output[1]
|
50 |
)
|
51 |
else:
|
52 |
-
inputs = tokenizer(
|
|
|
|
|
53 |
output, inference_time = run_normal_hf(model_name, inputs)
|
54 |
answer_start_scores, answer_end_scores = output
|
55 |
|
|
|
6 |
from onnxruntime import InferenceSession
|
7 |
from transformers import AutoModelForQuestionAnswering, AutoTokenizer
|
8 |
|
9 |
+
MAX_SEQUENCE_LENGTH = 512
|
10 |
+
|
11 |
models = {
|
12 |
"Base model": "bert-large-uncased-whole-word-masking-finetuned-squad",
|
13 |
"Pruned model": "madlag/bert-large-uncased-wwm-squadv2-x2.63-f82.6-d16-hybrid-v1",
|
|
|
45 |
def inference(model_name, context, question):
|
46 |
tokenizer = AutoTokenizer.from_pretrained(models[model_name])
|
47 |
if model_name == "Pruned ONNX Optimized FP16":
|
48 |
+
inputs = dict(
|
49 |
+
tokenizer(
|
50 |
+
question, context, return_tensors="np", max_length=MAX_SEQUENCE_LENGTH
|
51 |
+
)
|
52 |
+
)
|
53 |
output, inference_time = run_ort_inference(model_name, inputs)
|
54 |
answer_start_scores, answer_end_scores = torch.tensor(output[0]), torch.tensor(
|
55 |
output[1]
|
56 |
)
|
57 |
else:
|
58 |
+
inputs = tokenizer(
|
59 |
+
question, context, return_tensors="pt", max_length=MAX_SEQUENCE_LENGTH
|
60 |
+
)
|
61 |
output, inference_time = run_normal_hf(model_name, inputs)
|
62 |
answer_start_scores, answer_end_scores = output
|
63 |
|