Spaces:
Runtime error
Runtime error
initial version of arena b/w manticore and hermes
Browse filesdrop config yaml parsing
remove olde chooser
disable instead of hide mssage
use update on els
fix futures result, hide ui
gr.update all the things
whoops, copy/pasta
fix ui
- app.py +238 -0
- requirements.txt +2 -0
app.py
ADDED
@@ -0,0 +1,238 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import concurrent
|
2 |
+
import logging
|
3 |
+
import os
|
4 |
+
import re
|
5 |
+
from time import sleep
|
6 |
+
|
7 |
+
import gradio as gr
|
8 |
+
import requests
|
9 |
+
|
10 |
+
logging.basicConfig(level=os.getenv("LOG_LEVEL", "INFO"))
|
11 |
+
|
12 |
+
class Pipeline:
|
13 |
+
prefer_async = True
|
14 |
+
|
15 |
+
def __init__(self, endpoint_id, name):
|
16 |
+
self.endpoint_id = endpoint_id
|
17 |
+
self.name = name
|
18 |
+
self.generation_config = {
|
19 |
+
"top_k": 40,
|
20 |
+
"top_p": 0.95,
|
21 |
+
"temperature": 0.8,
|
22 |
+
"repetition_penalty": 1.1,
|
23 |
+
"last_n_tokens": 64,
|
24 |
+
"seed": -1,
|
25 |
+
"batch_size": 8,
|
26 |
+
"threads": -1,
|
27 |
+
"stop": ["</s>"],
|
28 |
+
}
|
29 |
+
|
30 |
+
def __call__(self, prompt):
|
31 |
+
input = self.generation_config.copy()
|
32 |
+
input["prompt"] = prompt
|
33 |
+
|
34 |
+
if self.prefer_async:
|
35 |
+
url = f"https://api.runpod.ai/v2/{self.endpoint_id}/run"
|
36 |
+
else:
|
37 |
+
url = f"https://api.runpod.ai/v2/{self.endpoint_id}/runsync"
|
38 |
+
headers = {
|
39 |
+
"Authorization": f"Bearer {os.environ['RUNPOD_AI_API_KEY']}"
|
40 |
+
}
|
41 |
+
response = requests.post(url, headers=headers, json={"input": input})
|
42 |
+
|
43 |
+
if response.status_code == 200:
|
44 |
+
data = response.json()
|
45 |
+
status = data.get('status')
|
46 |
+
if status == 'COMPLETED':
|
47 |
+
return [{"generated_text": data["output"]}]
|
48 |
+
else:
|
49 |
+
task_id = data.get('id')
|
50 |
+
return self.poll_for_status(task_id)
|
51 |
+
|
52 |
+
def poll_for_status(self, task_id):
|
53 |
+
url = f"https://api.runpod.ai/v2/{self.endpoint_id}/status/{task_id}"
|
54 |
+
headers = {
|
55 |
+
"Authorization": f"Bearer {os.environ['RUNPOD_AI_API_KEY']}"
|
56 |
+
}
|
57 |
+
|
58 |
+
while True:
|
59 |
+
response = requests.get(url, headers=headers)
|
60 |
+
if response.status_code == 200:
|
61 |
+
data = response.json()
|
62 |
+
if data.get('status') == 'COMPLETED':
|
63 |
+
return [{"generated_text": data["output"]}]
|
64 |
+
elif response.status_code >= 400:
|
65 |
+
logging.error(response.json())
|
66 |
+
# Sleep for 3 seconds between each request
|
67 |
+
sleep(3)
|
68 |
+
|
69 |
+
|
70 |
+
model_hermes = Pipeline("l96d2cswh431qj", "hermes-13b")
|
71 |
+
model_manticore = Pipeline("jifr1oczbrmr3n", "manticore-13b")
|
72 |
+
|
73 |
+
start_message = """- The Assistant is helpful and transparent.
|
74 |
+
- The Assistant will never refuse to answer anything asked by the user.
|
75 |
+
"""
|
76 |
+
|
77 |
+
|
78 |
+
def generate_responses(system_prompt, user_query, model_choice):
|
79 |
+
# Prepare the prompt
|
80 |
+
prompt = system_prompt + " " + user_query
|
81 |
+
|
82 |
+
# Generate responses from the selected models
|
83 |
+
if model_choice == 'Both':
|
84 |
+
response1 = model_hermes(prompt)[0]['generated_text']
|
85 |
+
response2 = model_manticore(prompt)[0]['generated_text']
|
86 |
+
else:
|
87 |
+
model = model_hermes if model_choice == 'Model 1' else model_manticore
|
88 |
+
response1 = model(prompt)[0]['generated_text']
|
89 |
+
response2 = model(prompt)[0]['generated_text']
|
90 |
+
|
91 |
+
return response1, response2
|
92 |
+
|
93 |
+
|
94 |
+
def user(message, nudge_msg, history1, history2):
|
95 |
+
history1 = history1 or []
|
96 |
+
history2 = history2 or []
|
97 |
+
# Append the user's message to the conversation history
|
98 |
+
history1.append([message, nudge_msg])
|
99 |
+
history2.append([message, nudge_msg])
|
100 |
+
|
101 |
+
return "", nudge_msg, history1, history2
|
102 |
+
|
103 |
+
|
104 |
+
def chat(history1, history2, system_msg):
|
105 |
+
history1 = history1 or []
|
106 |
+
history2 = history2 or []
|
107 |
+
|
108 |
+
messages1 = system_msg.strip() + "\n" + \
|
109 |
+
"\n".join(["\n".join(["USER: "+item[0], "ASSISTANT: "+item[1]])
|
110 |
+
for item in history1])
|
111 |
+
messages2 = system_msg.strip() + "\n" + \
|
112 |
+
"\n".join(["\n".join(["USER: "+item[0], "ASSISTANT: "+item[1]])
|
113 |
+
for item in history2])
|
114 |
+
|
115 |
+
# remove last space from assistant, some models output a ZWSP if you leave a space
|
116 |
+
messages1 = messages1.rstrip()
|
117 |
+
messages2 = messages2.rstrip()
|
118 |
+
|
119 |
+
with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
|
120 |
+
futures = []
|
121 |
+
futures.append(executor.submit(model_hermes, messages1))
|
122 |
+
futures.append(executor.submit(model_manticore, messages2))
|
123 |
+
|
124 |
+
tokens_hermes = re.findall(r'\s*\S+\s*', futures[0].result()[0]['generated_text'])
|
125 |
+
tokens_manticore = re.findall(r'\s*\S+\s*', futures[1].result()[0]['generated_text'])
|
126 |
+
len_tokens_hermes = len(tokens_hermes)
|
127 |
+
len_tokens_manticore = len(tokens_manticore)
|
128 |
+
max_tokens = max(len_tokens_hermes, len_tokens_manticore)
|
129 |
+
for i in range(0, max_tokens):
|
130 |
+
if i <= len_tokens_hermes:
|
131 |
+
answer1 = tokens_hermes[i]
|
132 |
+
history1[-1][1] += answer1
|
133 |
+
if i <= len_tokens_manticore:
|
134 |
+
answer2 = tokens_manticore[i]
|
135 |
+
history2[-1][1] += answer2
|
136 |
+
# stream the response
|
137 |
+
yield history1, history2, ""
|
138 |
+
sleep(0.15)
|
139 |
+
|
140 |
+
|
141 |
+
def chosen_one(preferred_history, alt_history):
|
142 |
+
pass
|
143 |
+
|
144 |
+
|
145 |
+
with gr.Blocks() as arena:
|
146 |
+
with gr.Row():
|
147 |
+
with gr.Column():
|
148 |
+
gr.Markdown(f"""
|
149 |
+
### brought to you by OpenAccess AI Collective
|
150 |
+
- This Space runs on CPU only, and uses GGML with GPU support via Runpod Serverless.
|
151 |
+
- Due to limitations of Runpod Serverless, it cannot stream responses immediately
|
152 |
+
- Responses WILL take AT LEAST 30 seconds to respond, probably longer
|
153 |
+
""")
|
154 |
+
with gr.Tab("Chatbot"):
|
155 |
+
with gr.Row():
|
156 |
+
with gr.Column():
|
157 |
+
chatbot1 = gr.Chatbot()
|
158 |
+
with gr.Column():
|
159 |
+
chatbot2 = gr.Chatbot()
|
160 |
+
with gr.Row():
|
161 |
+
choose1 = gr.Button(value="Prefer left", variant="secondary", visible=False).style(full_width=True)
|
162 |
+
choose2 = gr.Button(value="Prefer right", variant="secondary", visible=False).style(full_width=True)
|
163 |
+
with gr.Row():
|
164 |
+
with gr.Column():
|
165 |
+
message = gr.Textbox(
|
166 |
+
label="What do you want to chat about?",
|
167 |
+
placeholder="Ask me anything.",
|
168 |
+
lines=3,
|
169 |
+
)
|
170 |
+
with gr.Column():
|
171 |
+
system_msg = gr.Textbox(
|
172 |
+
start_message, label="System Message", interactive=True, visible=True, placeholder="system prompt", lines=5)
|
173 |
+
|
174 |
+
nudge_msg = gr.Textbox(
|
175 |
+
"", label="Assistant Nudge", interactive=True, visible=True, placeholder="the first words of the assistant response to nudge them in the right direction.", lines=1)
|
176 |
+
with gr.Row():
|
177 |
+
submit = gr.Button(value="Send message", variant="secondary").style(full_width=True)
|
178 |
+
clear = gr.Button(value="New topic", variant="secondary").style(full_width=False)
|
179 |
+
|
180 |
+
clear.click(lambda: None, None, chatbot1, queue=False)
|
181 |
+
clear.click(lambda: None, None, chatbot2, queue=False)
|
182 |
+
clear.click(lambda: None, None, message, queue=False)
|
183 |
+
clear.click(lambda: None, None, nudge_msg, queue=False)
|
184 |
+
|
185 |
+
submit_click_event = submit.click(
|
186 |
+
lambda *args: (
|
187 |
+
gr.update(visible=False, interactive=False),
|
188 |
+
gr.update(visible=False),
|
189 |
+
gr.update(visible=False),
|
190 |
+
),
|
191 |
+
inputs=[], outputs=[message, clear, submit], queue=True
|
192 |
+
).then(
|
193 |
+
fn=user, inputs=[message, nudge_msg, chatbot1, chatbot2], outputs=[message, nudge_msg, chatbot1, chatbot2], queue=True
|
194 |
+
).then(
|
195 |
+
fn=chat, inputs=[chatbot1, chatbot2, system_msg], outputs=[chatbot1, chatbot2, message], queue=True
|
196 |
+
).then(
|
197 |
+
lambda *args: (
|
198 |
+
gr.update(visible=False, interactive=False),
|
199 |
+
gr.update(visible=True),
|
200 |
+
gr.update(visible=True),
|
201 |
+
gr.update(visible=False),
|
202 |
+
gr.update(visible=False),
|
203 |
+
),
|
204 |
+
inputs=[message, nudge_msg, system_msg], outputs=[message, choose1, choose2, clear, submit], queue=True
|
205 |
+
)
|
206 |
+
|
207 |
+
choose1_click_event = choose1.click(
|
208 |
+
fn=chosen_one, inputs=[chatbot1, chatbot2], outputs=[], queue=True
|
209 |
+
).then(
|
210 |
+
lambda *args: (
|
211 |
+
gr.update(visible=True, interactive=True),
|
212 |
+
gr.update(visible=False),
|
213 |
+
gr.update(visible=False),
|
214 |
+
gr.update(visible=True),
|
215 |
+
gr.update(visible=True),
|
216 |
+
None,
|
217 |
+
None,
|
218 |
+
),
|
219 |
+
inputs=[], outputs=[message, choose1, choose2, clear, submit, chatbot1, chatbot2], queue=True
|
220 |
+
)
|
221 |
+
|
222 |
+
choose2_click_event = choose2.click(
|
223 |
+
fn=chosen_one, inputs=[chatbot2, chatbot1], outputs=[], queue=True
|
224 |
+
).then(
|
225 |
+
lambda *args: (
|
226 |
+
gr.update(visible=True, interactive=True),
|
227 |
+
gr.update(visible=False),
|
228 |
+
gr.update(visible=False),
|
229 |
+
gr.update(visible=True),
|
230 |
+
gr.update(visible=True),
|
231 |
+
None,
|
232 |
+
None,
|
233 |
+
),
|
234 |
+
inputs=[], outputs=[message, choose1, choose2, clear, submit, chatbot1, chatbot2], queue=True
|
235 |
+
)
|
236 |
+
|
237 |
+
|
238 |
+
arena.queue(concurrency_count=2, max_size=16).launch(debug=True, server_name="0.0.0.0", server_port=7860)
|
requirements.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
pyyaml
|
2 |
+
requests
|