shingjan commited on
Commit
cdf4c06
1 Parent(s): 6f8d7b6

Add dynamo for optimization

Browse files
Files changed (1) hide show
  1. app.py +19 -2
app.py CHANGED
@@ -1,13 +1,30 @@
 
1
  import torch
2
  import gradio as gr
 
3
 
4
 
5
  model = torch.load("GPT2Model.pt")
 
6
  tokenizer = torch.load("GPT2Tokenizer.pt")
 
 
 
 
 
 
 
7
  def gpt2(prompt):
8
  input_ids = tokenizer(prompt, return_tensors="pt").input_ids
9
- outputs = model.generate(input_ids, do_sample=False, max_length=30)
10
- return tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
 
 
 
 
 
 
 
11
 
12
  demo = gr.Interface(fn=gpt2, inputs="text", outputs="text")
13
 
 
1
+ import time
2
  import torch
3
  import gradio as gr
4
+ import torch._dynamo as dynamo
5
 
6
 
7
  model = torch.load("GPT2Model.pt")
8
+ optimized_model = dynamo.optimize("inductor")(model)
9
  tokenizer = torch.load("GPT2Tokenizer.pt")
10
+
11
+ def timed(fn):
12
+ start = time.time()
13
+ result = fn()
14
+ end = time.time() - start
15
+ return result, float("{:.5f}".format(end))
16
+
17
  def gpt2(prompt):
18
  input_ids = tokenizer(prompt, return_tensors="pt").input_ids
19
+ eager_outputs, eager_time = timed(lambda: model.generate(input_ids, do_sample=False, max_length=30))
20
+ dynamo_outputs, dynamo_time = timed(lambda: optimized_model.generate(input_ids, do_sample=False, max_length=30))
21
+ if torch.allclose(eager_outputs, dynamo_outputs):
22
+ actual_output = tokenizer.batch_decode(dynamo_outputs, skip_special_tokens=True)[0]
23
+ else:
24
+ actual_output = "Result is not correct between dynamo and eager!"
25
+ expect_output = f"Torch eager takes: {eager_time} \nDynamo takes: {dynamo_time} \nSpeedup: "
26
+ expect_output += "{:.2f}".format(eager_time/dynamo_time) + f"x \nOutput: {actual_output}"
27
+ return expect_output
28
 
29
  demo = gr.Interface(fn=gpt2, inputs="text", outputs="text")
30