Raullen commited on
Commit
b419a89
·
verified ·
1 Parent(s): 7c937ef

feat: cleaner model labels + ?ref attribution + first-response nudge

Browse files
Files changed (1) hide show
  1. app.py +48 -21
app.py CHANGED
@@ -9,6 +9,10 @@ banner at the bottom sends them to quicksilverpro.io for their own key.
9
  Single-tenant QSP key (stored as the `QSP_KEY` Space secret) with a monthly
10
  budget cap configured on the QSP side. In-process per-session rate-limit
11
  keeps casual spam from spiking the bill.
 
 
 
 
12
  """
13
 
14
  from __future__ import annotations
@@ -26,13 +30,21 @@ from openai import OpenAI
26
  QSP_KEY = os.environ.get("QSP_KEY", "").strip()
27
  QSP_BASE = os.environ.get("QSP_BASE", "https://api.quicksilverpro.io/v1")
28
 
 
 
 
 
 
 
 
 
 
29
  MODELS = [
30
- ("deepseek-v3", "DeepSeek V3 general-purpose, fast"),
31
- ("deepseek-r1", "DeepSeek R1 reasoning, slower, deeper"),
32
- ("qwen3.5-35b", "Qwen 3.5-35B-A3B 262K context, multilingual"),
33
  ]
34
- MODEL_CHOICES = [f"{m} — {desc}" for m, desc in MODELS]
35
- DEFAULT_MODEL_LABEL = MODEL_CHOICES[0]
36
 
37
  DEFAULT_SYSTEM_PROMPT = "You are a helpful assistant."
38
 
@@ -43,6 +55,18 @@ RATE_WINDOW_SEC = 60
43
  RATE_MAX_MSGS = 8
44
 
45
  _session_buckets: dict[str, deque] = {}
 
 
 
 
 
 
 
 
 
 
 
 
46
 
47
 
48
  def _rate_limited(session_hash: str) -> bool:
@@ -66,14 +90,10 @@ else:
66
  client = OpenAI(base_url=QSP_BASE, api_key=QSP_KEY)
67
 
68
 
69
- def _parse_model_label(label: str) -> str:
70
- return label.split(" — ", 1)[0]
71
-
72
-
73
  def respond(
74
  message: str,
75
  history: list[tuple[str, str]],
76
- model_label: str,
77
  system_prompt: str,
78
  temperature: float,
79
  max_tokens: int,
@@ -94,7 +114,8 @@ def respond(
94
  )
95
  return
96
 
97
- model = _parse_model_label(model_label)
 
98
  messages: list[dict[str, str]] = []
99
  if system_prompt.strip():
100
  messages.append({"role": "system", "content": system_prompt.strip()})
@@ -127,31 +148,37 @@ def respond(
127
  accumulated += delta
128
  yield accumulated
129
 
 
 
 
 
 
 
 
130
 
131
  # ────────────────────────── UI ──────────────────────────
132
 
133
- HEADER_MD = """
134
  # ⚡ QuickSilver Pro Chat
135
 
136
  Try **DeepSeek V3 / R1** and **Qwen 3.5-35B-A3B** via an OpenAI-compatible API — no signup needed here.
137
 
138
- <sub>Running on [QuickSilver Pro](https://quicksilverpro.io) · Get your own key ($1 free credits): [quicksilverpro.io](https://quicksilverpro.io) · CLI: `pip install quicksilverpro`</sub>
139
  """
140
 
141
- FOOTER_MD = """
142
  ---
143
- <sub>Powered by <a href="https://quicksilverpro.io">QuickSilver Pro</a> — open-source LLM inference, OpenAI-compatible, ~20% below OpenRouter / Together / Fireworks. Built by <a href="https://quicksilverpro.io">MachineFi Labs</a>.</sub>
144
  """
145
 
146
- # theme moved to launch() in Gradio 6, dropped here to stay forward-compatible
147
  with gr.Blocks(title="QuickSilver Pro Chat") as demo:
148
  gr.Markdown(HEADER_MD)
149
 
150
  with gr.Row():
151
  with gr.Column(scale=1):
152
  model_dropdown = gr.Dropdown(
153
- choices=MODEL_CHOICES,
154
- value=DEFAULT_MODEL_LABEL,
155
  label="Model",
156
  interactive=True,
157
  )
@@ -168,9 +195,9 @@ with gr.Blocks(title="QuickSilver Pro Chat") as demo:
168
  label="Max tokens", minimum=64, maximum=4096, step=64, value=1024
169
  )
170
  with gr.Column(scale=3):
171
- # Gradio 6.0 removed submit_btn / retry_btn / undo_btn / clear_btn args
172
- # in favor of a more opinionated default layout; dropping them keeps
173
- # this compatible with both 5.x and 6.x.
174
  gr.ChatInterface(
175
  fn=respond,
176
  additional_inputs=[model_dropdown, system_prompt, temperature, max_tokens],
 
9
  Single-tenant QSP key (stored as the `QSP_KEY` Space secret) with a monthly
10
  budget cap configured on the QSP side. In-process per-session rate-limit
11
  keeps casual spam from spiking the bill.
12
+
13
+ Outbound links all carry `?ref=GHKN4L37` — the reserved REFERRAL_CODES entry
14
+ earmarked for HF-sourced signups. Lets us attribute signup volume from this
15
+ Space separate from other channels (Discord, Twitter, direct).
16
  """
17
 
18
  from __future__ import annotations
 
30
  QSP_KEY = os.environ.get("QSP_KEY", "").strip()
31
  QSP_BASE = os.environ.get("QSP_BASE", "https://api.quicksilverpro.io/v1")
32
 
33
+ # Attribution code for this Space — reserved from REFERRAL_CODES private pool
34
+ # per growth/PROMO.md. Hardcoded here rather than env-configured because it
35
+ # never changes (a single Space = a single attribution bucket).
36
+ REF_CODE = "GHKN4L37"
37
+ SIGNUP_URL = f"https://quicksilverpro.io/?ref={REF_CODE}"
38
+ CLI_URL = "https://github.com/machinefi/qspro-cli"
39
+
40
+ # Gradio Dropdown accepts (display_label, value) tuples; the callback
41
+ # receives the value string, so we don't need to parse it back.
42
  MODELS = [
43
+ ("DeepSeek V3 general-purpose, fast", "deepseek-v3"),
44
+ ("DeepSeek R1 reasoning, slower, deeper", "deepseek-r1"),
45
+ ("Qwen 3.5-35B-A3B 262K context, multilingual", "qwen3.5-35b"),
46
  ]
47
+ DEFAULT_MODEL_VALUE = MODELS[0][1]
 
48
 
49
  DEFAULT_SYSTEM_PROMPT = "You are a helpful assistant."
50
 
 
55
  RATE_MAX_MSGS = 8
56
 
57
  _session_buckets: dict[str, deque] = {}
58
+ # Tracks which sessions have already received the first-response nudge, so
59
+ # we only attach it once per session instead of on every assistant message.
60
+ _session_nudged: set[str] = set()
61
+
62
+ # Appended to the first assistant response per session. Markdown-safe. The
63
+ # "---" horizontal rule visually separates the nudge from real model output
64
+ # so users don't confuse it with generated content.
65
+ NUDGE_MD = (
66
+ f"\n\n---\n\n"
67
+ f"💡 *Liked this? [Get your own key]({SIGNUP_URL}) — $5 in free credits, "
68
+ f"no card required. Or `pip install quicksilverpro` for the [CLI]({CLI_URL}).*"
69
+ )
70
 
71
 
72
  def _rate_limited(session_hash: str) -> bool:
 
90
  client = OpenAI(base_url=QSP_BASE, api_key=QSP_KEY)
91
 
92
 
 
 
 
 
93
  def respond(
94
  message: str,
95
  history: list[tuple[str, str]],
96
+ model: str,
97
  system_prompt: str,
98
  temperature: float,
99
  max_tokens: int,
 
114
  )
115
  return
116
 
117
+ is_first_response = not (history or [])
118
+
119
  messages: list[dict[str, str]] = []
120
  if system_prompt.strip():
121
  messages.append({"role": "system", "content": system_prompt.strip()})
 
148
  accumulated += delta
149
  yield accumulated
150
 
151
+ # Append the signup nudge to the first assistant response of the session
152
+ # only — a persistent nudge on every turn would feel spammy. Guarded by a
153
+ # set of session hashes so a fast re-click doesn't double-attach.
154
+ if is_first_response and session_hash not in _session_nudged:
155
+ _session_nudged.add(session_hash)
156
+ yield accumulated + NUDGE_MD
157
+
158
 
159
  # ────────────────────────── UI ──────────────────────────
160
 
161
+ HEADER_MD = f"""
162
  # ⚡ QuickSilver Pro Chat
163
 
164
  Try **DeepSeek V3 / R1** and **Qwen 3.5-35B-A3B** via an OpenAI-compatible API — no signup needed here.
165
 
166
+ <sub>Running on [QuickSilver Pro]({SIGNUP_URL}) · Get your own key ($5 free credits): [{SIGNUP_URL.replace('https://', '')}]({SIGNUP_URL}) · CLI: `pip install quicksilverpro`</sub>
167
  """
168
 
169
+ FOOTER_MD = f"""
170
  ---
171
+ <sub>Powered by <a href="{SIGNUP_URL}">QuickSilver Pro</a> — open-source LLM inference, OpenAI-compatible, ~20% below OpenRouter / Together / Fireworks. Built by <a href="{SIGNUP_URL}">MachineFi Labs</a>.</sub>
172
  """
173
 
 
174
  with gr.Blocks(title="QuickSilver Pro Chat") as demo:
175
  gr.Markdown(HEADER_MD)
176
 
177
  with gr.Row():
178
  with gr.Column(scale=1):
179
  model_dropdown = gr.Dropdown(
180
+ choices=MODELS,
181
+ value=DEFAULT_MODEL_VALUE,
182
  label="Model",
183
  interactive=True,
184
  )
 
195
  label="Max tokens", minimum=64, maximum=4096, step=64, value=1024
196
  )
197
  with gr.Column(scale=3):
198
+ # Gradio 6.0 removed the submit_btn / retry_btn / undo_btn / clear_btn
199
+ # args in favor of a more opinionated default layout; dropping them
200
+ # keeps this compatible with both 5.x and 6.x.
201
  gr.ChatInterface(
202
  fn=respond,
203
  additional_inputs=[model_dropdown, system_prompt, temperature, max_tokens],