OpenSourceRonin commited on
Commit
81a2b2b
1 Parent(s): fef40a8
Files changed (1) hide show
  1. app.py +58 -18
app.py CHANGED
@@ -2,14 +2,32 @@ import spaces
2
  import gradio as gr
3
  from huggingface_hub import InferenceClient
4
 
 
5
  """
6
  For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
7
  """
8
- #VPTQ-community/Qwen2.5-72B-Instruct-v8-k65536-0-woft
9
-
10
  from vptq.app_utils import get_chat_loop_generator
11
 
12
- chat_completion = get_chat_loop_generator("VPTQ-community/Meta-Llama-3.1-70B-Instruct-v8-k32768-0-woft")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
  @spaces.GPU
15
  def respond(
@@ -46,24 +64,46 @@ def respond(
46
 
47
 
48
 
 
 
 
 
 
 
49
  """
50
  For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
51
  """
52
- demo = gr.ChatInterface(
53
- respond,
54
- additional_inputs=[
55
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
56
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
57
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
58
- gr.Slider(
59
- minimum=0.1,
60
- maximum=1.0,
61
- value=0.95,
62
- step=0.05,
63
- label="Top-p (nucleus sampling)",
64
- ),
65
- ],
66
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
68
 
69
  if __name__ == "__main__":
 
2
  import gradio as gr
3
  from huggingface_hub import InferenceClient
4
 
5
+
6
  """
7
  For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
8
  """
 
 
9
  from vptq.app_utils import get_chat_loop_generator
10
 
11
+ model_list=["VPTQ-community/Meta-Llama-3.1-8B-Instruct-v12-k65536-4096-woft",
12
+ "VPTQ-community/Meta-Llama-3.1-70B-Instruct-v8-k32768-0-woft",
13
+ "VPTQ-community/Qwen2.5-7B-Instruct-v8-k256-256-woft",
14
+ "VPTQ-community/Qwen2.5-14B-Instruct-v8-k256-256-woft",
15
+ "VPTQ-community/Qwen2.5-32B-Instruct-v16-k65536-65536-woft",
16
+ "VPTQ-community/Qwen2.5-72B-Instruct-v8-k65536-0-woft",
17
+ ]
18
+
19
+ current_model_g = model_list[0]
20
+ chat_completion = get_chat_loop_generator(current_model_g)
21
+
22
+ def update_title_and_chatmodel(model):
23
+ model = str(model)
24
+ global chat_completion
25
+ global current_model_g
26
+ if model != current_model_g:
27
+ current_model_g = model
28
+ chat_completion = get_chat_loop_generator(current_model_g)
29
+ return model
30
+
31
 
32
  @spaces.GPU
33
  def respond(
 
64
 
65
 
66
 
67
+ css = """
68
+ h1 {
69
+ text-align: center;
70
+ display: block;
71
+ }
72
+ """
73
  """
74
  For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
75
  """
76
+ chatbot = gr.Chatbot(label="Gradio ChatInterface")
77
+ with gr.Blocks() as demo:
78
+ with gr.Column(scale=1):
79
+ title_output = gr.Markdown("Please select a model to run")
80
+ chat_demo = gr.ChatInterface(
81
+ respond,
82
+ #chatbot=chatbot,
83
+ additional_inputs_accordion=gr.Accordion(
84
+ label="⚙️ Parameters", open=False, render=False
85
+ ),
86
+ fill_height=False,
87
+ additional_inputs=[
88
+ gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
89
+ gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
90
+ gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
91
+ gr.Slider(
92
+ minimum=0.1,
93
+ maximum=1.0,
94
+ value=0.95,
95
+ step=0.05,
96
+ label="Top-p (nucleus sampling)",
97
+ ),
98
+ ],
99
+ )
100
+ model_select = gr.Dropdown(
101
+ choices=model_list,
102
+ label="Models",
103
+ value=model_list[0],
104
+ )
105
+
106
+ model_select.change(update_title_and_chatmodel, inputs=[model_select], outputs=title_output)
107
 
108
 
109
  if __name__ == "__main__":