Romi Nur Ismanto Claude Opus 4.6 (1M context) commited on
Commit
67046fd
·
1 Parent(s): 551c5e0

Fix InferenceClient to use provider=auto and add error handling

Browse files

- Use provider="auto" to let HF Hub pick the right inference provider
- Pass model to chat_completion() instead of constructor
- Add try/except to show actual error messages in chat

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

Files changed (1) hide show
  1. app.py +19 -17
app.py CHANGED
@@ -18,30 +18,32 @@ def respond(
18
  yield "⚠️ Silakan login dulu dengan tombol Login di sidebar."
19
  return
20
 
21
- client = InferenceClient(token=hf_token.token, model="openai/gpt-oss-20b")
 
 
 
22
 
23
  messages = [{"role": "system", "content": system_message}]
24
-
25
  messages.extend(history)
26
-
27
  messages.append({"role": "user", "content": message})
28
 
29
  response = ""
30
 
31
- for chunk in client.chat_completion(
32
- messages,
33
- max_tokens=max_tokens,
34
- stream=True,
35
- temperature=temperature,
36
- top_p=top_p,
37
- ):
38
- choices = chunk.choices
39
- token = ""
40
- if len(choices) and choices[0].delta.content:
41
- token = choices[0].delta.content
42
-
43
- response += token
44
- yield response
 
45
 
46
 
47
  """
 
18
  yield "⚠️ Silakan login dulu dengan tombol Login di sidebar."
19
  return
20
 
21
+ client = InferenceClient(
22
+ provider="auto",
23
+ api_key=hf_token.token,
24
+ )
25
 
26
  messages = [{"role": "system", "content": system_message}]
 
27
  messages.extend(history)
 
28
  messages.append({"role": "user", "content": message})
29
 
30
  response = ""
31
 
32
+ try:
33
+ for chunk in client.chat_completion(
34
+ messages,
35
+ model="openai/gpt-oss-20b",
36
+ max_tokens=max_tokens,
37
+ stream=True,
38
+ temperature=temperature,
39
+ top_p=top_p,
40
+ ):
41
+ choices = chunk.choices
42
+ if len(choices) and choices[0].delta.content:
43
+ response += choices[0].delta.content
44
+ yield response
45
+ except Exception as e:
46
+ yield f"❌ Error: {e}"
47
 
48
 
49
  """