DietitianAssistant

Sleeping

Sebastien De Greef commited on Oct 19, 2024

Commit

6ee44e1

1 Parent(s): 1c69950

fix chat_completion

Files changed (1) hide show

app.py CHANGED Viewed

@@ -5,7 +5,7 @@ from huggingface_hub import InferenceClient
 For more information on huggingface_hub Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
 """
 # client = InferenceClient("unsloth/Llama-3.2-1B-Instruct")
-client = InferenceClient(model="llama-3-1-8b-medical-f16-qip")
 def respond(
     message,
@@ -41,11 +41,13 @@ def respond(
         messages,
         max_tokens=max_tokens,
         stream=True,
         temperature=temperature,
         top_p=top_p,
     ):
         token = message.choices[0].delta.content
         response += token
         yield response

 For more information on huggingface_hub Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
 """
 # client = InferenceClient("unsloth/Llama-3.2-1B-Instruct")
+client = InferenceClient( model="https://kjynd32snp9r6qb7.us-east-1.aws.endpoints.huggingface.cloud")
 def respond(
     message,
         messages,
         max_tokens=max_tokens,
         stream=True,
+        stop=["<|im_end|><|im_end|>", "<|im_end|>"],
         temperature=temperature,
         top_p=top_p,
     ):
         token = message.choices[0].delta.content
+        if not token:
+            break
         response += token
         yield response