Spaces:

OpenSourceRonin
/

VPTQ_demo

Running on Zero

yangwang92 commited on Oct 16, 2024

Commit

cc040f7

1 Parent(s): e772d15

add 70b models

Files changed (1) hide show

app.py CHANGED Viewed

@@ -12,14 +12,34 @@ from huggingface_hub import snapshot_download
 from vptq.app_utils import get_chat_loop_generator
 models = [
     {
         "name": "VPTQ-community/Meta-Llama-3.1-8B-Instruct-v8-k65536-65536-woft",
         "bits": "4 bits"
     },
     {
-        "name": "VPTQ-community/Meta-Llama-3.1-8B-Instruct-v8-k65536-256-woft",
         "bits": "3 bits"
     },
 ]
 # Queues for storing historical data (saving the last 100 GPU utilization and memory usage values)
@@ -153,7 +173,7 @@ download_thread.start()
 loaded_models = {}
-@spaces.GPU
 def respond(
     message,
     history: list[tuple[str, str]],

 from vptq.app_utils import get_chat_loop_generator
 models = [
+    {
+        "name": "VPTQ-community/Meta-Llama-3.1-70B-Instruct-v16-k65536-65536-woft",
+        "bits": "2 bits"
+    },
+    {
+        "name": "VPTQ-community/Meta-Llama-3.1-70B-Instruct-v8-k65536-256-woft",
+        "bits": "3 bits"
+    },
+    {
+        "name": "VPTQ-community/Meta-Llama-3.1-70B-Instruct-v8-k65536-65536-woft",
+        "bits": "4 bits"
+    },
     {
         "name": "VPTQ-community/Meta-Llama-3.1-8B-Instruct-v8-k65536-65536-woft",
         "bits": "4 bits"
     },
     {
+        "name": "VPTQ-community/Qwen2.5-72B-Instruct-v8-k65536-65536-woft",
+        "bits": "4 bits"
+    },
+    {
+        "name": "VPTQ-community/Qwen2.5-72B-Instruct-v8-k65536-256-woft",
         "bits": "3 bits"
     },
+    {
+        "name": "VPTQ-community/Qwen2.5-72B-Instruct-v16-k65536-65536-woft",
+        "bits": "2 bits"
+    },
 ]
 # Queues for storing historical data (saving the last 100 GPU utilization and memory usage values)
 loaded_models = {}
+@spaces.GPU(duration=120)
 def respond(
     message,
     history: list[tuple[str, str]],