File size: 4,015 Bytes
0ff301d
4e4df74
 
 
de611b5
a00c633
859ac01
 
de611b5
4e4df74
de611b5
 
a00c633
de611b5
4e4df74
0ff301d
4e4df74
0ff301d
 
 
4e4df74
 
 
0ff301d
 
de611b5
 
4e4df74
a00c633
de611b5
0ff301d
a00c633
4e4df74
 
a00c633
4e4df74
a00c633
 
4e4df74
a00c633
4e4df74
0ff301d
4e4df74
 
 
 
 
 
0ff301d
4e4df74
0ff301d
4e4df74
 
 
 
 
de611b5
a00c633
de611b5
4e4df74
 
 
 
de611b5
4e4df74
 
 
 
 
 
 
0ff301d
4e4df74
 
 
 
de611b5
a00c633
de611b5
4e4df74
 
 
 
de611b5
a00c633
de611b5
4e4df74
a00c633
859ac01
 
de611b5
 
 
 
859ac01
de611b5
 
 
 
a00c633
 
859ac01
a00c633
 
859ac01
de611b5
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import os
import gradio as gr
from huggingface_hub import InferenceClient

# ----------------------------------------------------------------------
# Helper to read a secret (fallback is useful when you run locally)
# ----------------------------------------------------------------------
def _secret(key: str, fallback: str = "") -> str:
    return os.getenv(key, fallback)


# ----------------------------------------------------------------------
# Core chat logic – system prompt comes from the secret `prec_chat`
# ----------------------------------------------------------------------
def respond(
    message: str,
    history: list[dict[str, str]],
    max_tokens: int,
    temperature: float,
    top_p: float,
    hf_token: gr.OAuthToken,
):
    """
    Generate a response using the HuggingFace Inference API.

    The system prompt is taken from the secret **prec_chat**.
    Users cannot edit it from the UI.
    """
    # 1️⃣  Load the system prompt (fallback = generic assistant)
    system_message = _secret("prec_chat", "You are a helpful assistant.")

    # 2️⃣  Initialise the HF inference client
    client = InferenceClient(token=hf_token.token, model="openai/gpt-oss-20b")

    # 3️⃣  Build the message list for the chat‑completion endpoint
    messages = [{"role": "system", "content": system_message}]
    messages.extend(history)                     # previous turns
    messages.append({"role": "user", "content": message})  # current query

    # 4️⃣  Stream the response back to the UI
    response = ""
    for chunk in client.chat_completion(
        messages,
        max_tokens=max_tokens,
        stream=True,
        temperature=temperature,
        top_p=top_p,
    ):
        choices = chunk.choices
        token = ""
        if choices and choices[0].delta.content:
            token = choices[0].delta.content
        response += token
        yield response


# ----------------------------------------------------------------------
# UI – the system‑prompt textbox has been removed.
# ----------------------------------------------------------------------
chatbot = gr.ChatInterface(
    respond,
    type="messages",
    additional_inputs=[
        # Only generation parameters are exposed now.
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.95,
            step=0.05,
            label="Top‑p (nucleus sampling)",
        ),
    ],
)

# ----------------------------------------------------------------------
# Assemble the Blocks layout (no LoginButton – we use basic auth)
# ----------------------------------------------------------------------
with gr.Blocks() as demo:
    chatbot.render()


# ----------------------------------------------------------------------
# Launch – protect the UI with the credentials from secrets.
# ----------------------------------------------------------------------
if __name__ == "__main__":
    # Pull the allowed credentials from secrets (fail fast if missing)
    allowed_user = _secret("CHAT_USER")
    allowed_pass = _secret("CHAT_PASS")

    if not allowed_user or not allowed_pass:
        raise RuntimeError(
            "Authentication credentials not found in secrets. "
            "Add CHAT_USER and CHAT_PASS to secrets.toml (or via the HF Spaces UI)."
        )

    demo.launch(
        auth=(allowed_user, allowed_pass),   # <-- Gradio's built‑in basic auth
        # Turn off server‑side rendering to avoid the i18n locale error
        ssr_mode=False,
        # In a remote environment (HF Spaces, Docker, cloud VM) you need a shareable link:
        share=True,                         # <-- remove if you run locally and can reach http://127.0.0.1:7860
        # Optional – listen on all interfaces (useful inside containers)
        server_name="0.0.0.0",
    )