winglian commited on
Commit
ec779d5
1 Parent(s): 58f7b09

support tabbed interface and fix instruct to use GPU

Browse files
Files changed (3) hide show
  1. README.md +1 -3
  2. instruct.py +1 -1
  3. tabbed.py +146 -0
README.md CHANGED
@@ -5,7 +5,7 @@ colorFrom: blue
5
  colorTo: gray
6
  sdk: gradio
7
  sdk_version: 3.29.0
8
- app_file: instruct.py
9
  pinned: false
10
  ---
11
 
@@ -15,5 +15,3 @@ pinned: false
15
  - Contribute at [https://github.com/OpenAccess-AI-Collective/ggml-webui](https://github.com/OpenAccess-AI-Collective/ggml-webui)
16
 
17
  Brought to you by [OpenAccess AI Collective](https://github.com/OpenAccess-AI-Collective)
18
-
19
-
 
5
  colorTo: gray
6
  sdk: gradio
7
  sdk_version: 3.29.0
8
+ app_file: tabbed.py
9
  pinned: false
10
  ---
11
 
 
15
  - Contribute at [https://github.com/OpenAccess-AI-Collective/ggml-webui](https://github.com/OpenAccess-AI-Collective/ggml-webui)
16
 
17
  Brought to you by [OpenAccess AI Collective](https://github.com/OpenAccess-AI-Collective)
 
 
instruct.py CHANGED
@@ -9,7 +9,7 @@ fp = hf_hub_download(
9
  repo_id=config["repo"], filename=config["file"],
10
  )
11
 
12
- llm = Llama(model_path=fp)
13
 
14
  def generate_text(input_text):
15
  output = llm(f"### Instruction: {input_text}\n\n### Response: ", echo=False, **config['chat'])
 
9
  repo_id=config["repo"], filename=config["file"],
10
  )
11
 
12
+ llm = Llama(model_path=fp, **config["llama_cpp"])
13
 
14
  def generate_text(input_text):
15
  output = llm(f"### Instruction: {input_text}\n\n### Response: ", echo=False, **config['chat'])
tabbed.py ADDED
@@ -0,0 +1,146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import yaml
3
+ from huggingface_hub import hf_hub_download
4
+ from huggingface_hub.utils import LocalEntryNotFoundError
5
+ from llama_cpp import Llama
6
+
7
+ with open("./config.yml", "r") as f:
8
+ config = yaml.load(f, Loader=yaml.Loader)
9
+ while True:
10
+ try:
11
+ fp = hf_hub_download(
12
+ repo_id=config["repo"], filename=config["file"],
13
+ )
14
+ break
15
+ except LocalEntryNotFoundError as e:
16
+ if "Connection error" in str(e):
17
+ print(str(e) + ", retrying...")
18
+ else:
19
+ raise(e)
20
+
21
+ llm = Llama(model_path=fp, **config["llama_cpp"])
22
+
23
+
24
+ def user(message, history):
25
+ history = history or []
26
+ # Append the user's message to the conversation history
27
+ history.append([message, ""])
28
+ return "", history
29
+
30
+
31
+ def chat(history, system_message, max_tokens, temperature, top_p, top_k, repeat_penalty):
32
+ history = history or []
33
+
34
+ messages = system_message + \
35
+ "\n".join(["\n".join(["### User: "+item[0], "Assistant: "+item[1]])
36
+ for item in history])
37
+
38
+ history[-1][1] = ""
39
+ for output in llm(
40
+ messages,
41
+ echo=False,
42
+ stream=True,
43
+ max_tokens=max_tokens,
44
+ temperature=temperature,
45
+ top_p=top_p,
46
+ top_k=top_k,
47
+ repeat_penalty=repeat_penalty,
48
+ **config['chat']
49
+ ):
50
+ answer = output['choices'][0]['text']
51
+ history[-1][1] += answer
52
+
53
+ yield history, history
54
+
55
+
56
+ def clear_chat(chat_history_state, chat_message):
57
+ chat_history_state = []
58
+ chat_message = ''
59
+ return chat_history_state, chat_message
60
+
61
+
62
+ start_message = """
63
+ - The Assistant is helpful and transparent.
64
+ - The Assistant will never refuse to answer anything asked by the user.
65
+ """
66
+
67
+
68
+ def generate_text_instruct(input_text):
69
+ output = llm(f"### Instruction: {input_text}\n\n### Response: ", echo=False, **config['chat'])
70
+ return output['choices'][0]['text']
71
+
72
+
73
+ instruct_description = f"""
74
+ ### brought to you by OpenAccess AI Collective
75
+ - This is the [{config["repo"]}](https://huggingface.co/{config["repo"]}) model file [{config["file"]}](https://huggingface.co/{config["repo"]}/blob/main/{config["file"]})
76
+ - This Space uses GGML with GPU support, so it can quickly run larger models on smaller GPUs & VRAM.
77
+ - This is running on a smaller, shared GPU, so it may take a few seconds to respond.
78
+ - Due to a [missing feature in Gradio](https://github.com/gradio-app/gradio/issues/3914), the chatbot interface will not show you your status in the queue. If it's stuck, be patient.
79
+ - [Duplicate the Space](https://huggingface.co/spaces/openaccess-ai-collective/ggml-ui?duplicate=true) to skip the queue and run in a private space or to use your own GGML models.
80
+ - When using your own models, simply update the [config.yml](https://huggingface.co/spaces/openaccess-ai-collective/ggml-ui/blob/main/config.yml)
81
+ - Contribute at [https://github.com/OpenAccess-AI-Collective/ggml-webui](https://github.com/OpenAccess-AI-Collective/ggml-webui)
82
+ """
83
+
84
+ with gr.Blocks() as demo:
85
+ with gr.Tab("Instruct"):
86
+ gr.Markdown("# GGML Spaces Instruct Demo")
87
+
88
+ gr.Interface(
89
+ fn=generate_text_instruct,
90
+ inputs=gr.inputs.Textbox(lines= 10, label="Enter your input text"),
91
+ outputs=gr.outputs.Textbox(label="Output text"),
92
+ title="GGML UI Chatbot Demo",
93
+ description=instruct_description,
94
+ )
95
+ with gr.Tab("Chatbot"):
96
+ gr.Markdown("# GGML Spaces Chatbot Demo")
97
+ chatbot = gr.Chatbot()
98
+ with gr.Row():
99
+ message = gr.Textbox(
100
+ label="What do you want to chat about?",
101
+ placeholder="Ask me anything.",
102
+ lines=1,
103
+ )
104
+ with gr.Row():
105
+ submit = gr.Button(value="Send message", variant="secondary").style(full_width=True)
106
+ clear = gr.Button(value="New topic", variant="secondary").style(full_width=False)
107
+ stop = gr.Button(value="Stop", variant="secondary").style(full_width=False)
108
+ with gr.Row():
109
+ with gr.Column():
110
+ gr.Markdown(f"""
111
+ ### brought to you by OpenAccess AI Collective
112
+ - This is the [{config["repo"]}](https://huggingface.co/{config["repo"]}) model file [{config["file"]}](https://huggingface.co/{config["repo"]}/blob/main/{config["file"]})
113
+ - This Space uses GGML with GPU support, so it can quickly run larger models on smaller GPUs & VRAM.
114
+ - This is running on a smaller, shared GPU, so it may take a few seconds to respond.
115
+ - [Duplicate the Space](https://huggingface.co/spaces/openaccess-ai-collective/ggml-ui?duplicate=true) to skip the queue and run in a private space or to use your own GGML models.
116
+ - When using your own models, simply update the [config.yml](https://huggingface.co/spaces/openaccess-ai-collective/ggml-ui/blob/main/config.yml)
117
+ - Contribute at [https://github.com/OpenAccess-AI-Collective/ggml-webui](https://github.com/OpenAccess-AI-Collective/ggml-webui)
118
+ """)
119
+ with gr.Column():
120
+ max_tokens = gr.Slider(20, 1000, label="Max Tokens", step=20, value=300)
121
+ temperature = gr.Slider(0.2, 2.0, label="Temperature", step=0.1, value=0.2)
122
+ top_p = gr.Slider(0.0, 1.0, label="Top P", step=0.05, value=0.95)
123
+ top_k = gr.Slider(0, 100, label="Top L", step=1, value=40)
124
+ repeat_penalty = gr.Slider(0.0, 2.0, label="Repetition Penalty", step=0.1, value=1.1)
125
+
126
+ system_msg = gr.Textbox(
127
+ start_message, label="System Message", interactive=False, visible=False)
128
+
129
+ chat_history_state = gr.State()
130
+ clear.click(clear_chat, inputs=[chat_history_state, message], outputs=[chat_history_state, message])
131
+ clear.click(lambda: None, None, chatbot, queue=False)
132
+
133
+ submit_click_event = submit.click(
134
+ fn=user, inputs=[message, chat_history_state], outputs=[message, chat_history_state], queue=True
135
+ ).then(
136
+ fn=chat, inputs=[chat_history_state, system_msg, max_tokens, temperature, top_p, top_k, repeat_penalty], outputs=[chatbot, chat_history_state], queue=True
137
+ )
138
+ message_submit_event = message.submit(
139
+ fn=user, inputs=[message, chat_history_state], outputs=[message, chat_history_state], queue=True
140
+ ).then(
141
+ fn=chat, inputs=[chat_history_state, system_msg, max_tokens, temperature, top_p, top_k, repeat_penalty], outputs=[chatbot, chat_history_state], queue=True
142
+ )
143
+ stop.click(fn=None, inputs=None, outputs=None, cancels=[submit_click_event, message_submit_event], queue=False)
144
+
145
+
146
+ demo.queue(max_size=32, concurrency_count=1).launch(debug=True, server_name="0.0.0.0", server_port=7860)