Spaces:

yuhuili
/

EAGLE-2

Runtime error

yuhuili commited on Jun 30

Commit

21ca00e

•

1 Parent(s): c2dc54b

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -97,6 +97,12 @@ def warmup(model):
 @spaces.GPU(duration=60)
 def bot(history, temperature, top_p, use_EaInfer, highlight_EaInfer,session_state,):
     model.cuda()
     if not history:
         return history, "0.00 tokens/s", "0.00", session_state
     pure_history = session_state.get("pure_history", [])
@@ -259,7 +265,7 @@ parser.add_argument("--model-type", type=str, default="llama-3-instruct",choices
 parser.add_argument(
     "--total-token",
     type=int,
-    default=59,
     help="The maximum number of new generated tokens.",
 )
 parser.add_argument(

 @spaces.GPU(duration=60)
 def bot(history, temperature, top_p, use_EaInfer, highlight_EaInfer,session_state,):
     model.cuda()
+    warmup_id = torch.tensor([[0,1]]).cuda()
+    warmup_hidden= torch.randn(1,2,model.base_model.config.hidden_size).half().cuda()
+    out=model.base_model(warmup_id)
+    out0=model.ea_layer(warmup_hidden,warmup_id)
+    torch.cuda.synchronize()
+    del out,out0,warmup_id,warmup_hidden
     if not history:
         return history, "0.00 tokens/s", "0.00", session_state
     pure_history = session_state.get("pure_history", [])
 parser.add_argument(
     "--total-token",
     type=int,
+    default=64,
     help="The maximum number of new generated tokens.",
 )
 parser.add_argument(