Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -97,6 +97,12 @@ def warmup(model):
|
|
97 |
@spaces.GPU(duration=60)
|
98 |
def bot(history, temperature, top_p, use_EaInfer, highlight_EaInfer,session_state,):
|
99 |
model.cuda()
|
|
|
|
|
|
|
|
|
|
|
|
|
100 |
if not history:
|
101 |
return history, "0.00 tokens/s", "0.00", session_state
|
102 |
pure_history = session_state.get("pure_history", [])
|
@@ -259,7 +265,7 @@ parser.add_argument("--model-type", type=str, default="llama-3-instruct",choices
|
|
259 |
parser.add_argument(
|
260 |
"--total-token",
|
261 |
type=int,
|
262 |
-
default=
|
263 |
help="The maximum number of new generated tokens.",
|
264 |
)
|
265 |
parser.add_argument(
|
|
|
97 |
@spaces.GPU(duration=60)
|
98 |
def bot(history, temperature, top_p, use_EaInfer, highlight_EaInfer,session_state,):
|
99 |
model.cuda()
|
100 |
+
warmup_id = torch.tensor([[0,1]]).cuda()
|
101 |
+
warmup_hidden= torch.randn(1,2,model.base_model.config.hidden_size).half().cuda()
|
102 |
+
out=model.base_model(warmup_id)
|
103 |
+
out0=model.ea_layer(warmup_hidden,warmup_id)
|
104 |
+
torch.cuda.synchronize()
|
105 |
+
del out,out0,warmup_id,warmup_hidden
|
106 |
if not history:
|
107 |
return history, "0.00 tokens/s", "0.00", session_state
|
108 |
pure_history = session_state.get("pure_history", [])
|
|
|
265 |
parser.add_argument(
|
266 |
"--total-token",
|
267 |
type=int,
|
268 |
+
default=64,
|
269 |
help="The maximum number of new generated tokens.",
|
270 |
)
|
271 |
parser.add_argument(
|