Spaces:
Running
Running
| """ | |
| QuickSilver Pro Chat — Hugging Face Space. | |
| A zero-friction try-it demo for QuickSilver Pro. Anyone on HF can chat with | |
| DeepSeek V3 / R1 / Qwen 3.5 through our OpenAI-compatible endpoint, without | |
| creating an account first. The goal is top-of-funnel discoverability: the | |
| banner at the bottom sends them to quicksilverpro.io for their own key. | |
| Single-tenant QSP key (stored as the `QSP_KEY` Space secret) with a monthly | |
| budget cap configured on the QSP side. In-process per-session rate-limit | |
| keeps casual spam from spiking the bill. | |
| Outbound links all carry `?ref=GHKN4L37` — the reserved REFERRAL_CODES entry | |
| earmarked for HF-sourced signups. Lets us attribute signup volume from this | |
| Space separate from other channels (Discord, Twitter, direct). | |
| """ | |
| from __future__ import annotations | |
| import os | |
| import time | |
| from collections import deque | |
| from typing import Iterable | |
| import gradio as gr | |
| from openai import OpenAI | |
| # ────────────────────────── Configuration ────────────────────────── | |
| QSP_KEY = os.environ.get("QSP_KEY", "").strip() | |
| QSP_BASE = os.environ.get("QSP_BASE", "https://api.quicksilverpro.io/v1") | |
| # Attribution code for this Space — reserved from REFERRAL_CODES private pool | |
| # per growth/PROMO.md. Hardcoded here rather than env-configured because it | |
| # never changes (a single Space = a single attribution bucket). | |
| REF_CODE = "GHKN4L37" | |
| SIGNUP_URL = f"https://quicksilverpro.io/?ref={REF_CODE}" | |
| CLI_URL = "https://github.com/machinefi/qspro-cli" | |
| # Gradio Dropdown accepts (display_label, value) tuples; the callback | |
| # receives the value string, so we don't need to parse it back. | |
| MODELS = [ | |
| ("DeepSeek V3 — general-purpose, fast", "deepseek-v3"), | |
| ("DeepSeek R1 — reasoning, slower, deeper", "deepseek-r1"), | |
| ("Qwen 3.5-35B-A3B — 262K context, multilingual", "qwen3.5-35b"), | |
| ] | |
| DEFAULT_MODEL_VALUE = MODELS[0][1] | |
| DEFAULT_SYSTEM_PROMPT = "You are a helpful assistant." | |
| # Per-session soft rate limit. Not a security boundary — the QSP-side budget | |
| # cap on the shared key is. This just keeps one noisy session from blowing | |
| # through the daily allowance in 90 seconds. | |
| RATE_WINDOW_SEC = 60 | |
| RATE_MAX_MSGS = 8 | |
| _session_buckets: dict[str, deque] = {} | |
| # Tracks which sessions have already received the first-response nudge, so | |
| # we only attach it once per session instead of on every assistant message. | |
| _session_nudged: set[str] = set() | |
| # Appended to the first assistant response per session. Markdown-safe. The | |
| # "---" horizontal rule visually separates the nudge from real model output | |
| # so users don't confuse it with generated content. | |
| NUDGE_MD = ( | |
| f"\n\n---\n\n" | |
| f"💡 *Liked this? [Get your own key]({SIGNUP_URL}) — $5 in free credits, " | |
| f"no card required. Or `pip install quicksilverpro` for the [CLI]({CLI_URL}).*" | |
| ) | |
| def _rate_limited(session_hash: str) -> bool: | |
| now = time.time() | |
| bucket = _session_buckets.setdefault(session_hash, deque()) | |
| while bucket and now - bucket[0] > RATE_WINDOW_SEC: | |
| bucket.popleft() | |
| if len(bucket) >= RATE_MAX_MSGS: | |
| return True | |
| bucket.append(now) | |
| return False | |
| # ────────────────────────── OpenAI client ────────────────────────── | |
| if not QSP_KEY: | |
| # Don't crash on import — let the UI render a clear error banner instead, | |
| # so the Space owner sees "QSP_KEY secret not set" rather than a 500. | |
| client = None | |
| else: | |
| client = OpenAI(base_url=QSP_BASE, api_key=QSP_KEY) | |
| def respond( | |
| message: str, | |
| history: list[tuple[str, str]], | |
| model: str, | |
| system_prompt: str, | |
| temperature: float, | |
| max_tokens: int, | |
| request: gr.Request | None = None, | |
| ) -> Iterable[str]: | |
| if client is None: | |
| yield ( | |
| "⚠️ Space misconfigured: `QSP_KEY` secret is not set. " | |
| "Owner: configure it in Settings → Variables and secrets." | |
| ) | |
| return | |
| session_hash = (request.session_hash if request else "anon") or "anon" | |
| if _rate_limited(session_hash): | |
| yield ( | |
| f"⏳ Rate limit reached ({RATE_MAX_MSGS} messages / " | |
| f"{RATE_WINDOW_SEC}s). Take a breath, then try again." | |
| ) | |
| return | |
| is_first_response = not (history or []) | |
| messages: list[dict[str, str]] = [] | |
| if system_prompt.strip(): | |
| messages.append({"role": "system", "content": system_prompt.strip()}) | |
| for user_msg, assistant_msg in history or []: | |
| if user_msg: | |
| messages.append({"role": "user", "content": user_msg}) | |
| if assistant_msg: | |
| messages.append({"role": "assistant", "content": assistant_msg}) | |
| messages.append({"role": "user", "content": message}) | |
| try: | |
| stream = client.chat.completions.create( | |
| model=model, | |
| messages=messages, | |
| temperature=float(temperature), | |
| max_tokens=int(max_tokens), | |
| stream=True, | |
| ) | |
| except Exception as e: | |
| yield f"❌ API error: {type(e).__name__}: {str(e)[:300]}" | |
| return | |
| accumulated = "" | |
| for chunk in stream: | |
| try: | |
| delta = chunk.choices[0].delta.content or "" | |
| except (AttributeError, IndexError): | |
| delta = "" | |
| if delta: | |
| accumulated += delta | |
| yield accumulated | |
| # Append the signup nudge to the first assistant response of the session | |
| # only — a persistent nudge on every turn would feel spammy. Guarded by a | |
| # set of session hashes so a fast re-click doesn't double-attach. | |
| if is_first_response and session_hash not in _session_nudged: | |
| _session_nudged.add(session_hash) | |
| yield accumulated + NUDGE_MD | |
| # ────────────────────────── UI ────────────────────────── | |
| HEADER_MD = f""" | |
| # ⚡ QuickSilver Pro Chat | |
| Try **DeepSeek V3 / R1** and **Qwen 3.5-35B-A3B** via an OpenAI-compatible API — no signup needed here. | |
| <sub>Running on [QuickSilver Pro]({SIGNUP_URL}) · Get your own key ($5 free credits): [{SIGNUP_URL.replace('https://', '')}]({SIGNUP_URL}) · CLI: `pip install quicksilverpro`</sub> | |
| """ | |
| FOOTER_MD = f""" | |
| --- | |
| <sub>Powered by <a href="{SIGNUP_URL}">QuickSilver Pro</a> — open-source LLM inference, OpenAI-compatible, ~20% below OpenRouter / Together / Fireworks. Built by <a href="{SIGNUP_URL}">MachineFi Labs</a>.</sub> | |
| """ | |
| with gr.Blocks(title="QuickSilver Pro Chat") as demo: | |
| gr.Markdown(HEADER_MD) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| model_dropdown = gr.Dropdown( | |
| choices=MODELS, | |
| value=DEFAULT_MODEL_VALUE, | |
| label="Model", | |
| interactive=True, | |
| ) | |
| system_prompt = gr.Textbox( | |
| label="System prompt", | |
| value=DEFAULT_SYSTEM_PROMPT, | |
| lines=3, | |
| max_lines=8, | |
| ) | |
| temperature = gr.Slider( | |
| label="Temperature", minimum=0.0, maximum=2.0, step=0.1, value=0.7 | |
| ) | |
| max_tokens = gr.Slider( | |
| label="Max tokens", minimum=64, maximum=4096, step=64, value=1024 | |
| ) | |
| with gr.Column(scale=3): | |
| # Gradio 6.0 removed the submit_btn / retry_btn / undo_btn / clear_btn | |
| # args in favor of a more opinionated default layout; dropping them | |
| # keeps this compatible with both 5.x and 6.x. | |
| gr.ChatInterface( | |
| fn=respond, | |
| additional_inputs=[model_dropdown, system_prompt, temperature, max_tokens], | |
| examples=[ | |
| ["Write a concise git commit message for: fixed off-by-one error in pagination"], | |
| ["Explain closures in JavaScript in 2 sentences"], | |
| ["What's the fastest sorting algorithm for 100k integers and why?"], | |
| ["Translate 'Hello, how are you?' into formal Japanese, Hindi, and Russian"], | |
| ], | |
| cache_examples=False, | |
| ) | |
| gr.Markdown(FOOTER_MD) | |
| if __name__ == "__main__": | |
| demo.queue(default_concurrency_limit=4, max_size=64).launch() | |