Spaces:

shingjan
/

gpt2_gradio

Build error

shingjan commited on Dec 20, 2022

Commit

a07656a

•

1 Parent(s): cdf4c06

Add TVM and speedups

Files changed (1) hide show

app.py CHANGED Viewed

@@ -5,8 +5,9 @@ import torch._dynamo as dynamo
 model = torch.load("GPT2Model.pt")
-optimized_model = dynamo.optimize("inductor")(model)
 tokenizer = torch.load("GPT2Tokenizer.pt")
 def timed(fn):
     start = time.time()
@@ -14,18 +15,22 @@ def timed(fn):
     end = time.time() - start
     return result, float("{:.5f}".format(end))
 def gpt2(prompt):
     input_ids = tokenizer(prompt, return_tensors="pt").input_ids
     eager_outputs, eager_time = timed(lambda: model.generate(input_ids, do_sample=False, max_length=30))
-    dynamo_outputs, dynamo_time = timed(lambda: optimized_model.generate(input_ids, do_sample=False, max_length=30))
-    if torch.allclose(eager_outputs, dynamo_outputs):
-        actual_output = tokenizer.batch_decode(dynamo_outputs, skip_special_tokens=True)[0]
     else:
         actual_output = "Result is not correct between dynamo and eager!"
-    expect_output = f"Torch eager takes: {eager_time} \nDynamo takes: {dynamo_time} \nSpeedup: "
-    expect_output += "{:.2f}".format(eager_time/dynamo_time) + f"x \nOutput: {actual_output}"
     return expect_output
 demo = gr.Interface(fn=gpt2, inputs="text", outputs="text")
-demo.launch()

 model = torch.load("GPT2Model.pt")
 tokenizer = torch.load("GPT2Tokenizer.pt")
+inductor_model = dynamo.optimize("inductor")(model)
+tvm_model = dynamo.optimize("tvm")(model)
 def timed(fn):
     start = time.time()
     end = time.time() - start
     return result, float("{:.5f}".format(end))
 def gpt2(prompt):
     input_ids = tokenizer(prompt, return_tensors="pt").input_ids
     eager_outputs, eager_time = timed(lambda: model.generate(input_ids, do_sample=False, max_length=30))
+    inductor_outputs, inductor_time = timed(lambda: inductor_model.generate(input_ids, do_sample=False, max_length=30))
+    tvm_outputs, tvm_time = timed(lambda: tvm_model.generate(input_ids, do_sample=False, max_length=30))
+    if torch.allclose(eager_outputs, inductor_outputs) and torch.allclose(eager_outputs, tvm_outputs):
+        actual_output = tokenizer.batch_decode(eager_outputs, skip_special_tokens=True)[0]
     else:
         actual_output = "Result is not correct between dynamo and eager!"
+    expect_output = f"Torch eager takes: {eager_time} sec\n"
+    expect_output += f"Inductor takes: {inductor_time} sec with " + "{:.2}x speedup\n".format(eager_time/inductor_time)
+    expect_output += f"TVM takes: {tvm_time} sec with " + "{:.2}x speedup\n".format(eager_time/tvm_time)
+    expect_output += f"Output: {actual_output}"
     return expect_output
 demo = gr.Interface(fn=gpt2, inputs="text", outputs="text")
+demo.launch()