deepseek-v2-lite

Runtime error

minhdang commited on May 21

Commit

e4738ca

•

1 Parent(s): a57ae7f

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -7,6 +7,15 @@ import spaces
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 # test
 MAX_MAX_NEW_TOKENS = 2048
 DEFAULT_MAX_NEW_TOKENS = 1024
 total_count=0

 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 # test
+import os
+import subprocess
+# Install flash attention
+subprocess.run(
+    "pip install flash-attn --no-build-isolation",
+    env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
+    shell=True,
+)
 MAX_MAX_NEW_TOKENS = 2048
 DEFAULT_MAX_NEW_TOKENS = 1024
 total_count=0