Spaces:

yogies
/

precise_chat

Paused

App Files Files

yogies commited on Sep 4

Commit

40940bf

verified ·

1 Parent(s): 94678f5

Update app.py

Browse files

Files changed (1) hide show

app.py +42 -61

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import os
 import gradio as gr
-from huggingface_hub import InferenceClient
 # ----------------------------------------------------------------------
 # Helper to read a secret (fallback is useful when you run locally)
@@ -16,66 +16,57 @@ def _secret(key: str, fallback: str = "") -> str:
 def respond(
     message: str,
     history: list[dict[str, str]],
-    # max_tokens: int,
-    # temperature: float,
-    # top_p: float,
     model_name: str,
 ):
     """
-    Generate a response using the HuggingFace Inference API.
     * System prompt = secret `prec_chat`
-    * HF inference token = secret `HF_TOKEN`
     """
-    # 1️⃣  Load the system prompt (fallback = generic assistant)
-    system_message = _secret("prec_chat", "You are a helpful assistant.")
-    # 2️⃣  Load the HF inference token
-    hf_token = _secret("HF_TOKEN")
-    if not hf_token:
         raise RuntimeError(
-            "HF_TOKEN not found in secrets. Add it to secrets.toml (or via the Space UI)."
         )
-    # 3️⃣  Initialise the HF inference client
-    client = InferenceClient(token=hf_token, model=model_name)
-    # 4️⃣  Build the message list for the chat‑completion endpoint
-    messages = [{"role": "system", "content": system_message}]
     messages.extend(history)                     # previous conversation turns
     messages.append({"role": "user", "content": message})  # current user query
-    # 5️⃣  Stream the response back to the UI
     response = ""
-    for chunk in client.chat_completion(
-        messages,
-        # max_tokens=max_tokens,
-        max_tokens = 8096,
         stream=True
-        # temperature=temperature,
-        # top_p=top_p,
-    ):
-        choices = chunk.choices
-        token = ""
-        if choices and choices[0].delta.content:
-            token = choices[0].delta.content
-        response += token
-        yield response
 # ──────────────────────────────────────────────────────────────────────
-# 1️⃣  List of models that the UI will show.
-#     Add new model IDs to this list whenever you want to make them
-#     selectable – no other code changes are required.
 # ──────────────────────────────────────────────────────────────────────
 AVAILABLE_MODELS = [
-    "deepseek-ai/DeepSeek-V3.1",
-    # "openai/gpt-oss-20b",
-    # "Qwen/Qwen3-4B-Thinking-2507",
-    # "Qwen/Qwen3-30B-A3B-Thinking-2507",
-    # "openai/gpt-oss-120b",
-    # "Qwen/Qwen3-235B-A22B-Thinking-2507"
-]          # ← add more strings here (e.g. "openai/gpt-oss-350b")
 # ----------------------------------------------------------------------
 # UI – the system‑prompt textbox has been removed.
@@ -83,24 +74,14 @@ AVAILABLE_MODELS = [
 chatbot = gr.ChatInterface(
     respond,
     type="messages",
-    additional_inputs=[
-        gr.Dropdown(
-            choices=AVAILABLE_MODELS,
-            value=AVAILABLE_MODELS[0],
-            label="Model",
-            interactive=True,
-        ),
-        # Only generation parameters are exposed now.
-        # gr.Slider(minimum=1, maximum=8096, value=512, step=1, label="Max new tokens"),
-        # gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-        # gr.Slider(
-        #     minimum=0.1,
-        #     maximum=1.0,
-        #     value=0.95,
-        #     step=0.05,
-        #     label="Top‑p (nucleus sampling)",
-        # ),
-    ],
 )
 # ----------------------------------------------------------------------
@@ -115,7 +96,7 @@ with gr.Blocks() as demo:
 # ----------------------------------------------------------------------
 if __name__ == "__main__":
     # ------------------------------------------------------------------
-    # 1️⃣  Pull the allowed credentials from secrets (fail fast if missing)
     # ------------------------------------------------------------------
     allowed_user = _secret("CHAT_USER")
     allowed_pass = _secret("CHAT_PASS")
@@ -127,7 +108,7 @@ if __name__ == "__main__":
         )
     # ------------------------------------------------------------------
-    # 2️⃣  Launch
     # ------------------------------------------------------------------
     demo.launch(
         auth=(allowed_user, allowed_pass),   # <-- Gradio's built‑in basic auth

 import os
 import gradio as gr
+from openai import OpenAI
 # ----------------------------------------------------------------------
 # Helper to read a secret (fallback is useful when you run locally)
 def respond(
     message: str,
     history: list[dict[str, str]],
     model_name: str,
 ):
     """
+    Generate a response using OpenRouter API via OpenAI client.
     * System prompt = secret `prec_chat`
+    * OpenRouter API key = secret `OPENROUTER_API_KEY`
     """
+    # 1️⃣ Load the system prompt (fallback = generic assistant)
+    # system_message = _secret("prec_chat", "You are a helpful assistant.")
+    # 2️⃣ Load the OpenRouter API key
+    openrouter_api_key = _secret("OPENROUTER_API_KEY")
+    if not openrouter_api_key:
         raise RuntimeError(
+            "OPENROUTER_API_KEY not found in secrets. Add it to secrets.toml (or via the Space UI)."
         )
+    # 3️⃣ Initialize OpenAI client with OpenRouter configuration
+    client = OpenAI(
+        base_url="https://openrouter.ai/api/v1",
+        api_key=openrouter_api_key,
+    )
+    # 4️⃣ Build the message list for the chat completion
+    messages = []
     messages.extend(history)                     # previous conversation turns
     messages.append({"role": "user", "content": message})  # current user query
+    # 5️⃣ Stream the response back to the UI
     response = ""
+    stream = client.chat.completions.create(
+        model=model_name,
+        messages=messages,
+        max_tokens=8096,
         stream=True
+    )
+    for chunk in stream:
+        if chunk.choices[0].delta.content is not None:
+            token = chunk.choices[0].delta.content
+            response += token
+            yield response
 # ──────────────────────────────────────────────────────────────────────
+# List of models available through OpenRouter
 # ──────────────────────────────────────────────────────────────────────
 AVAILABLE_MODELS = [
+"@preset/precise-chat-agent"
+]
 # ----------------------------------------------------------------------
 # UI – the system‑prompt textbox has been removed.
 chatbot = gr.ChatInterface(
     respond,
     type="messages",
+    # additional_inputs=[
+    #     gr.Dropdown(
+    #         choices=AVAILABLE_MODELS,
+    #         value=AVAILABLE_MODELS[0],
+    #         label="Model",
+    #         interactive=True,
+    #     ),
+    # ],
 )
 # ----------------------------------------------------------------------
 # ----------------------------------------------------------------------
 if __name__ == "__main__":
     # ------------------------------------------------------------------
+    # 1️⃣ Pull the allowed credentials from secrets (fail fast if missing)
     # ------------------------------------------------------------------
     allowed_user = _secret("CHAT_USER")
     allowed_pass = _secret("CHAT_PASS")
         )
     # ------------------------------------------------------------------
+    # 2️⃣ Launch
     # ------------------------------------------------------------------
     demo.launch(
         auth=(allowed_user, allowed_pass),   # <-- Gradio's built‑in basic auth