Spaces:

OpenSourceRonin
/

VPTQ-demo

Running on Zero

App Files Files Community

OpenSourceRonin commited on 3 days ago

Commit

81a2b2b

•

1 Parent(s): fef40a8

title

Browse files

Files changed (1) hide show

app.py +58 -18

app.py CHANGED Viewed

@@ -2,14 +2,32 @@ import spaces
 import gradio as gr
 from huggingface_hub import InferenceClient
 """
 For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
 """
-#VPTQ-community/Qwen2.5-72B-Instruct-v8-k65536-0-woft
 from vptq.app_utils import get_chat_loop_generator
-chat_completion = get_chat_loop_generator("VPTQ-community/Meta-Llama-3.1-70B-Instruct-v8-k32768-0-woft")
 @spaces.GPU
 def respond(
@@ -46,24 +64,46 @@ def respond(
 """
 For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
 """
-demo = gr.ChatInterface(
-    respond,
-    additional_inputs=[
-        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
-        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
-        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(
-            minimum=0.1,
-            maximum=1.0,
-            value=0.95,
-            step=0.05,
-            label="Top-p (nucleus sampling)",
-        ),
-    ],
-)
 if __name__ == "__main__":

 import gradio as gr
 from huggingface_hub import InferenceClient
 """
 For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
 """
 from vptq.app_utils import get_chat_loop_generator
+model_list=["VPTQ-community/Meta-Llama-3.1-8B-Instruct-v12-k65536-4096-woft",
+            "VPTQ-community/Meta-Llama-3.1-70B-Instruct-v8-k32768-0-woft",
+            "VPTQ-community/Qwen2.5-7B-Instruct-v8-k256-256-woft",
+            "VPTQ-community/Qwen2.5-14B-Instruct-v8-k256-256-woft",
+            "VPTQ-community/Qwen2.5-32B-Instruct-v16-k65536-65536-woft",
+            "VPTQ-community/Qwen2.5-72B-Instruct-v8-k65536-0-woft",
+            ]
+current_model_g = model_list[0]
+chat_completion = get_chat_loop_generator(current_model_g)
+def update_title_and_chatmodel(model):
+    model = str(model)
+    global chat_completion
+    global current_model_g
+    if model != current_model_g:
+        current_model_g = model
+        chat_completion = get_chat_loop_generator(current_model_g)
+    return model
 @spaces.GPU
 def respond(
+css = """
+h1 {
+  text-align: center;
+  display: block;
+}
+"""
 """
 For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
 """
+chatbot = gr.Chatbot(label="Gradio ChatInterface")
+with gr.Blocks() as demo:
+    with gr.Column(scale=1):
+        title_output = gr.Markdown("Please select a model to run")
+        chat_demo = gr.ChatInterface(
+            respond,
+            #chatbot=chatbot,
+            additional_inputs_accordion=gr.Accordion(
+                label="⚙️ Parameters", open=False, render=False
+            ),
+            fill_height=False,
+            additional_inputs=[
+                gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
+                gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
+                gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
+                gr.Slider(
+                    minimum=0.1,
+                    maximum=1.0,
+                    value=0.95,
+                    step=0.05,
+                    label="Top-p (nucleus sampling)",
+                ),
+            ],
+        )
+        model_select = gr.Dropdown(
+                    choices=model_list,
+                    label="Models",
+                    value=model_list[0],
+        )
+        model_select.change(update_title_and_chatmodel, inputs=[model_select], outputs=title_output)
 if __name__ == "__main__":