Spaces:
Running
on
Zero
Running
on
Zero
Commit
·
cc040f7
1
Parent(s):
e772d15
add 70b models
Browse files
app.py
CHANGED
@@ -12,14 +12,34 @@ from huggingface_hub import snapshot_download
|
|
12 |
from vptq.app_utils import get_chat_loop_generator
|
13 |
|
14 |
models = [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
{
|
16 |
"name": "VPTQ-community/Meta-Llama-3.1-8B-Instruct-v8-k65536-65536-woft",
|
17 |
"bits": "4 bits"
|
18 |
},
|
19 |
{
|
20 |
-
"name": "VPTQ-community/
|
|
|
|
|
|
|
|
|
21 |
"bits": "3 bits"
|
22 |
},
|
|
|
|
|
|
|
|
|
23 |
]
|
24 |
|
25 |
# Queues for storing historical data (saving the last 100 GPU utilization and memory usage values)
|
@@ -153,7 +173,7 @@ download_thread.start()
|
|
153 |
|
154 |
loaded_models = {}
|
155 |
|
156 |
-
@spaces.GPU
|
157 |
def respond(
|
158 |
message,
|
159 |
history: list[tuple[str, str]],
|
|
|
12 |
from vptq.app_utils import get_chat_loop_generator
|
13 |
|
14 |
models = [
|
15 |
+
{
|
16 |
+
"name": "VPTQ-community/Meta-Llama-3.1-70B-Instruct-v16-k65536-65536-woft",
|
17 |
+
"bits": "2 bits"
|
18 |
+
},
|
19 |
+
{
|
20 |
+
"name": "VPTQ-community/Meta-Llama-3.1-70B-Instruct-v8-k65536-256-woft",
|
21 |
+
"bits": "3 bits"
|
22 |
+
},
|
23 |
+
{
|
24 |
+
"name": "VPTQ-community/Meta-Llama-3.1-70B-Instruct-v8-k65536-65536-woft",
|
25 |
+
"bits": "4 bits"
|
26 |
+
},
|
27 |
{
|
28 |
"name": "VPTQ-community/Meta-Llama-3.1-8B-Instruct-v8-k65536-65536-woft",
|
29 |
"bits": "4 bits"
|
30 |
},
|
31 |
{
|
32 |
+
"name": "VPTQ-community/Qwen2.5-72B-Instruct-v8-k65536-65536-woft",
|
33 |
+
"bits": "4 bits"
|
34 |
+
},
|
35 |
+
{
|
36 |
+
"name": "VPTQ-community/Qwen2.5-72B-Instruct-v8-k65536-256-woft",
|
37 |
"bits": "3 bits"
|
38 |
},
|
39 |
+
{
|
40 |
+
"name": "VPTQ-community/Qwen2.5-72B-Instruct-v16-k65536-65536-woft",
|
41 |
+
"bits": "2 bits"
|
42 |
+
},
|
43 |
]
|
44 |
|
45 |
# Queues for storing historical data (saving the last 100 GPU utilization and memory usage values)
|
|
|
173 |
|
174 |
loaded_models = {}
|
175 |
|
176 |
+
@spaces.GPU(duration=120)
|
177 |
def respond(
|
178 |
message,
|
179 |
history: list[tuple[str, str]],
|