juanfkurucz commited on
Commit
d61e332
1 Parent(s): 4dc3331

Preload models

Browse files
Files changed (1) hide show
  1. app.py +15 -4
app.py CHANGED
@@ -12,10 +12,21 @@ models = {
12
  "Pruned ONNX Optimized FP16": "tryolabs/bert-large-uncased-wwm-squadv2-optimized-f16",
13
  }
14
 
 
 
 
 
 
 
 
 
 
 
15
 
16
  def run_ort_inference(model_name, inputs):
17
- model_path = hf_hub_download(repo_id=models[model_name], filename="model.onnx")
18
- sess = InferenceSession(model_path, providers=["CPUExecutionProvider"])
 
19
  start_time = time.time()
20
  output = sess.run(None, input_feed=inputs)
21
  end_time = time.time()
@@ -24,9 +35,9 @@ def run_ort_inference(model_name, inputs):
24
 
25
  def run_normal_hf(model_name, inputs):
26
  start_time = time.time()
27
- model = AutoModelForQuestionAnswering.from_pretrained(models[model_name])
28
  end_time = time.time()
29
- return model(**inputs).values(), (end_time - start_time)
30
 
31
 
32
  def inference(model_name, context, question):
 
12
  "Pruned ONNX Optimized FP16": "tryolabs/bert-large-uncased-wwm-squadv2-optimized-f16",
13
  }
14
 
15
+ loaded_models = {
16
+ "Pruned ONNX Optimized FP16": hf_hub_download(
17
+ repo_id=models["Pruned ONNX Optimized FP16"], filename="model.onnx"
18
+ ),
19
+ "Base model": AutoModelForQuestionAnswering.from_pretrained(models["Base model"]),
20
+ "Pruned model": AutoModelForQuestionAnswering.from_pretrained(
21
+ models["Pruned model"]
22
+ ),
23
+ }
24
+
25
 
26
  def run_ort_inference(model_name, inputs):
27
+ sess = InferenceSession(
28
+ loaded_models[model_name], providers=["CPUExecutionProvider"]
29
+ )
30
  start_time = time.time()
31
  output = sess.run(None, input_feed=inputs)
32
  end_time = time.time()
 
35
 
36
  def run_normal_hf(model_name, inputs):
37
  start_time = time.time()
38
+ output = loaded_models[model_name](**inputs).values()
39
  end_time = time.time()
40
+ return output, (end_time - start_time)
41
 
42
 
43
  def inference(model_name, context, question):