Spaces:
Running
Running
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,119 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import openai
|
3 |
+
import os
|
4 |
+
import time
|
5 |
+
|
6 |
+
# Available models in increasing order of size/capability.
|
7 |
+
MODELS = [
|
8 |
+
"Meta-Llama-3.1-8B-Instruct",
|
9 |
+
"Meta-Llama-3.1-70B-Instruct",
|
10 |
+
"Meta-Llama-3.1-405B-Instruct"
|
11 |
+
]
|
12 |
+
|
13 |
+
# Sambanova API base URL
|
14 |
+
API_BASE = "https://api.sambanova.ai/v1"
|
15 |
+
|
16 |
+
def create_client(api_key=None):
|
17 |
+
"""Creates an OpenAI client instance."""
|
18 |
+
if api_key:
|
19 |
+
openai.api_key = api_key
|
20 |
+
else:
|
21 |
+
openai.api_key = os.getenv("API_KEY")
|
22 |
+
|
23 |
+
return openai.OpenAI(api_key=openai.api_key, base_url=API_BASE)
|
24 |
+
|
25 |
+
def chat_with_ai(message, chat_history):
|
26 |
+
"""Formats the chat history for the API call."""
|
27 |
+
messages = [{"role": "system", "content": "You are a helpful assistant."}]
|
28 |
+
for tup in chat_history:
|
29 |
+
first_key = list(tup.keys())[0]
|
30 |
+
last_key = list(tup.keys())[-1]
|
31 |
+
messages.append({"role": "user", "content": tup[first_key]})
|
32 |
+
messages.append({"role": "assistant", "content": tup[last_key]})
|
33 |
+
messages.append({"role": "user", "content": message})
|
34 |
+
return messages
|
35 |
+
|
36 |
+
def respond(message, chat_history, model, api_key):
|
37 |
+
"""Sends the message to the API and gets the response."""
|
38 |
+
client = create_client(api_key)
|
39 |
+
messages = chat_with_ai(message, chat_history)
|
40 |
+
start_time = time.time()
|
41 |
+
|
42 |
+
try:
|
43 |
+
completion = client.chat.completions.create(model=model, messages=messages)
|
44 |
+
response = completion.choices[0].message.content
|
45 |
+
thinking_time = time.time() - start_time
|
46 |
+
return response, thinking_time
|
47 |
+
except Exception as e:
|
48 |
+
return f"Error: {str(e)}", time.time() - start_time
|
49 |
+
|
50 |
+
def generate(message, history, model_index, api_key):
|
51 |
+
"""Generates the chatbot response using the current model."""
|
52 |
+
if type(model_index) is not int:
|
53 |
+
model_index = model_index[0]
|
54 |
+
model = MODELS[model_index] # Select model by index
|
55 |
+
answer, thinking_time = respond(message, history, model, api_key)
|
56 |
+
|
57 |
+
if answer.startswith("Error:"):
|
58 |
+
return history + [({"role": "system", "content": answer},)], ""
|
59 |
+
|
60 |
+
messages = [
|
61 |
+
{"role": "user", "content": message},
|
62 |
+
{"role": "assistant", "content": answer + f"\n\n<sub>press regenerate to make it try harder. Time: {thinking_time:.2f} sec</sub>"},
|
63 |
+
]
|
64 |
+
|
65 |
+
return history + messages, message
|
66 |
+
|
67 |
+
def first_try(message, history, model_index, api_key):
|
68 |
+
model_index = [0, 0]
|
69 |
+
history, message = generate(message, history, model_index, api_key)
|
70 |
+
return history, message, (0, 0)
|
71 |
+
|
72 |
+
def try_harder(history, retry_state, message, api_key):
|
73 |
+
"""Tries to generate a response, possibly escalating to a larger model."""
|
74 |
+
model_index, retries = retry_state # Unpack current state.
|
75 |
+
|
76 |
+
if retries < 1:
|
77 |
+
retries += 1 # Allow one more attempt with the current model.
|
78 |
+
else:
|
79 |
+
# Move to the next larger model, if available.
|
80 |
+
model_index = min(model_index + 1, len(MODELS) - 1)
|
81 |
+
retries = 0 # Reset retries for the new model.
|
82 |
+
|
83 |
+
# Generate a new response with the selected model.
|
84 |
+
history = history[:-2] # remove the last turn of conversation
|
85 |
+
new_history, _ = generate(message, history, model_index, api_key)
|
86 |
+
|
87 |
+
return new_history, (model_index, retries) # Update retry state.
|
88 |
+
|
89 |
+
with gr.Blocks() as demo:
|
90 |
+
gr.Markdown("# π Please Try Harder")
|
91 |
+
gr.Markdown("[Powered by SambaNova Cloud, Get Your API Key Here](https://cloud.sambanova.ai/apis)")
|
92 |
+
|
93 |
+
with gr.Row():
|
94 |
+
api_key = gr.Textbox(label="API Key", type="password", placeholder="(Optional) Enter your API key here for more availability")
|
95 |
+
|
96 |
+
chatbot = gr.Chatbot(label="Chat", show_label=False, show_share_button=False, layout="panel", type="messages")
|
97 |
+
msg = gr.Textbox(label="Type your message here...", placeholder="Enter your message...")
|
98 |
+
|
99 |
+
# State to track (1) current model index and (2) retries on the same model.
|
100 |
+
retry_state = gr.State(value=(0, 0)) # (model_index, retries)
|
101 |
+
|
102 |
+
# Button to clear chat, resets history and retry state.
|
103 |
+
clear_btn = gr.Button("Clear Chat")
|
104 |
+
clear_btn.click(lambda: ([], (0, 0)), inputs=None, outputs=[chatbot, retry_state])
|
105 |
+
|
106 |
+
# Generate response when a message is submitted.
|
107 |
+
msg.submit(
|
108 |
+
first_try,
|
109 |
+
inputs=[msg, chatbot, retry_state, api_key],
|
110 |
+
outputs=[chatbot, msg, retry_state]
|
111 |
+
)
|
112 |
+
|
113 |
+
chatbot.retry(
|
114 |
+
try_harder,
|
115 |
+
inputs=[chatbot, retry_state, msg, api_key],
|
116 |
+
outputs=[chatbot, retry_state]
|
117 |
+
)
|
118 |
+
|
119 |
+
demo.launch(share=True, show_api=False)
|