Spaces:

MachineFi
/

QuickSilverPro-Chat

Running

App Files Files Community

Raullen commited on 14 days ago

Commit

b419a89

verified ·

1 Parent(s): 7c937ef

feat: cleaner model labels + ?ref attribution + first-response nudge

Browse files

Files changed (1) hide show

app.py +48 -21

app.py CHANGED Viewed

@@ -9,6 +9,10 @@ banner at the bottom sends them to quicksilverpro.io for their own key.
 Single-tenant QSP key (stored as the `QSP_KEY` Space secret) with a monthly
 budget cap configured on the QSP side. In-process per-session rate-limit
 keeps casual spam from spiking the bill.
 """
 from __future__ import annotations
@@ -26,13 +30,21 @@ from openai import OpenAI
 QSP_KEY = os.environ.get("QSP_KEY", "").strip()
 QSP_BASE = os.environ.get("QSP_BASE", "https://api.quicksilverpro.io/v1")
 MODELS = [
-    ("deepseek-v3", "DeepSeek V3 — general-purpose, fast"),
-    ("deepseek-r1", "DeepSeek R1 — reasoning, slower, deeper"),
-    ("qwen3.5-35b", "Qwen 3.5-35B-A3B — 262K context, multilingual"),
 ]
-MODEL_CHOICES = [f"{m} — {desc}" for m, desc in MODELS]
-DEFAULT_MODEL_LABEL = MODEL_CHOICES[0]
 DEFAULT_SYSTEM_PROMPT = "You are a helpful assistant."
@@ -43,6 +55,18 @@ RATE_WINDOW_SEC = 60
 RATE_MAX_MSGS = 8
 _session_buckets: dict[str, deque] = {}
 def _rate_limited(session_hash: str) -> bool:
@@ -66,14 +90,10 @@ else:
     client = OpenAI(base_url=QSP_BASE, api_key=QSP_KEY)
-def _parse_model_label(label: str) -> str:
-    return label.split(" — ", 1)[0]
 def respond(
     message: str,
     history: list[tuple[str, str]],
-    model_label: str,
     system_prompt: str,
     temperature: float,
     max_tokens: int,
@@ -94,7 +114,8 @@ def respond(
         )
         return
-    model = _parse_model_label(model_label)
     messages: list[dict[str, str]] = []
     if system_prompt.strip():
         messages.append({"role": "system", "content": system_prompt.strip()})
@@ -127,31 +148,37 @@ def respond(
             accumulated += delta
             yield accumulated
 # ────────────────────────── UI ──────────────────────────
-HEADER_MD = """
 # ⚡ QuickSilver Pro Chat
 Try **DeepSeek V3 / R1** and **Qwen 3.5-35B-A3B** via an OpenAI-compatible API — no signup needed here.
-<sub>Running on [QuickSilver Pro](https://quicksilverpro.io) · Get your own key ($1 free credits): [quicksilverpro.io](https://quicksilverpro.io) · CLI: `pip install quicksilverpro`</sub>
 """
-FOOTER_MD = """
 ---
-<sub>Powered by <a href="https://quicksilverpro.io">QuickSilver Pro</a> — open-source LLM inference, OpenAI-compatible, ~20% below OpenRouter / Together / Fireworks. Built by <a href="https://quicksilverpro.io">MachineFi Labs</a>.</sub>
 """
-# theme moved to launch() in Gradio 6, dropped here to stay forward-compatible
 with gr.Blocks(title="QuickSilver Pro Chat") as demo:
     gr.Markdown(HEADER_MD)
     with gr.Row():
         with gr.Column(scale=1):
             model_dropdown = gr.Dropdown(
-                choices=MODEL_CHOICES,
-                value=DEFAULT_MODEL_LABEL,
                 label="Model",
                 interactive=True,
             )
@@ -168,9 +195,9 @@ with gr.Blocks(title="QuickSilver Pro Chat") as demo:
                 label="Max tokens", minimum=64, maximum=4096, step=64, value=1024
             )
         with gr.Column(scale=3):
-            # Gradio 6.0 removed submit_btn / retry_btn / undo_btn / clear_btn args
-            # in favor of a more opinionated default layout; dropping them keeps
-            # this compatible with both 5.x and 6.x.
             gr.ChatInterface(
                 fn=respond,
                 additional_inputs=[model_dropdown, system_prompt, temperature, max_tokens],

 Single-tenant QSP key (stored as the `QSP_KEY` Space secret) with a monthly
 budget cap configured on the QSP side. In-process per-session rate-limit
 keeps casual spam from spiking the bill.
+Outbound links all carry `?ref=GHKN4L37` — the reserved REFERRAL_CODES entry
+earmarked for HF-sourced signups. Lets us attribute signup volume from this
+Space separate from other channels (Discord, Twitter, direct).
 """
 from __future__ import annotations
 QSP_KEY = os.environ.get("QSP_KEY", "").strip()
 QSP_BASE = os.environ.get("QSP_BASE", "https://api.quicksilverpro.io/v1")
+# Attribution code for this Space — reserved from REFERRAL_CODES private pool
+# per growth/PROMO.md. Hardcoded here rather than env-configured because it
+# never changes (a single Space = a single attribution bucket).
+REF_CODE = "GHKN4L37"
+SIGNUP_URL = f"https://quicksilverpro.io/?ref={REF_CODE}"
+CLI_URL = "https://github.com/machinefi/qspro-cli"
+# Gradio Dropdown accepts (display_label, value) tuples; the callback
+# receives the value string, so we don't need to parse it back.
 MODELS = [
+    ("DeepSeek V3  —  general-purpose, fast", "deepseek-v3"),
+    ("DeepSeek R1  —  reasoning, slower, deeper", "deepseek-r1"),
+    ("Qwen 3.5-35B-A3B  —  262K context, multilingual", "qwen3.5-35b"),
 ]
+DEFAULT_MODEL_VALUE = MODELS[0][1]
 DEFAULT_SYSTEM_PROMPT = "You are a helpful assistant."
 RATE_MAX_MSGS = 8
 _session_buckets: dict[str, deque] = {}
+# Tracks which sessions have already received the first-response nudge, so
+# we only attach it once per session instead of on every assistant message.
+_session_nudged: set[str] = set()
+# Appended to the first assistant response per session. Markdown-safe. The
+# "---" horizontal rule visually separates the nudge from real model output
+# so users don't confuse it with generated content.
+NUDGE_MD = (
+    f"\n\n---\n\n"
+    f"💡 *Liked this? [Get your own key]({SIGNUP_URL}) — $5 in free credits, "
+    f"no card required. Or `pip install quicksilverpro` for the [CLI]({CLI_URL}).*"
+)
 def _rate_limited(session_hash: str) -> bool:
     client = OpenAI(base_url=QSP_BASE, api_key=QSP_KEY)
 def respond(
     message: str,
     history: list[tuple[str, str]],
+    model: str,
     system_prompt: str,
     temperature: float,
     max_tokens: int,
         )
         return
+    is_first_response = not (history or [])
     messages: list[dict[str, str]] = []
     if system_prompt.strip():
         messages.append({"role": "system", "content": system_prompt.strip()})
             accumulated += delta
             yield accumulated
+    # Append the signup nudge to the first assistant response of the session
+    # only — a persistent nudge on every turn would feel spammy. Guarded by a
+    # set of session hashes so a fast re-click doesn't double-attach.
+    if is_first_response and session_hash not in _session_nudged:
+        _session_nudged.add(session_hash)
+        yield accumulated + NUDGE_MD
 # ────────────────────────── UI ──────────────────────────
+HEADER_MD = f"""
 # ⚡ QuickSilver Pro Chat
 Try **DeepSeek V3 / R1** and **Qwen 3.5-35B-A3B** via an OpenAI-compatible API — no signup needed here.
+<sub>Running on [QuickSilver Pro]({SIGNUP_URL}) · Get your own key ($5 free credits): [{SIGNUP_URL.replace('https://', '')}]({SIGNUP_URL}) · CLI: `pip install quicksilverpro`</sub>
 """
+FOOTER_MD = f"""
 ---
+<sub>Powered by <a href="{SIGNUP_URL}">QuickSilver Pro</a> — open-source LLM inference, OpenAI-compatible, ~20% below OpenRouter / Together / Fireworks. Built by <a href="{SIGNUP_URL}">MachineFi Labs</a>.</sub>
 """
 with gr.Blocks(title="QuickSilver Pro Chat") as demo:
     gr.Markdown(HEADER_MD)
     with gr.Row():
         with gr.Column(scale=1):
             model_dropdown = gr.Dropdown(
+                choices=MODELS,
+                value=DEFAULT_MODEL_VALUE,
                 label="Model",
                 interactive=True,
             )
                 label="Max tokens", minimum=64, maximum=4096, step=64, value=1024
             )
         with gr.Column(scale=3):
+            # Gradio 6.0 removed the submit_btn / retry_btn / undo_btn / clear_btn
+            # args in favor of a more opinionated default layout; dropping them
+            # keeps this compatible with both 5.x and 6.x.
             gr.ChatInterface(
                 fn=respond,
                 additional_inputs=[model_dropdown, system_prompt, temperature, max_tokens],