Sebastien De Greef commited on
Commit
6ee44e1
1 Parent(s): 1c69950

fix chat_completion

Browse files
Files changed (1) hide show
  1. app.py +4 -2
app.py CHANGED
@@ -5,7 +5,7 @@ from huggingface_hub import InferenceClient
5
  For more information on huggingface_hub Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
  """
7
  # client = InferenceClient("unsloth/Llama-3.2-1B-Instruct")
8
- client = InferenceClient(model="llama-3-1-8b-medical-f16-qip")
9
 
10
  def respond(
11
  message,
@@ -41,11 +41,13 @@ def respond(
41
  messages,
42
  max_tokens=max_tokens,
43
  stream=True,
 
44
  temperature=temperature,
45
  top_p=top_p,
46
  ):
47
  token = message.choices[0].delta.content
48
-
 
49
  response += token
50
  yield response
51
 
 
5
  For more information on huggingface_hub Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
  """
7
  # client = InferenceClient("unsloth/Llama-3.2-1B-Instruct")
8
+ client = InferenceClient( model="https://kjynd32snp9r6qb7.us-east-1.aws.endpoints.huggingface.cloud")
9
 
10
  def respond(
11
  message,
 
41
  messages,
42
  max_tokens=max_tokens,
43
  stream=True,
44
+ stop=["<|im_end|><|im_end|>", "<|im_end|>"],
45
  temperature=temperature,
46
  top_p=top_p,
47
  ):
48
  token = message.choices[0].delta.content
49
+ if not token:
50
+ break
51
  response += token
52
  yield response
53