sebdg commited on
Commit
1c69950
1 Parent(s): 64c7bb3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -23
app.py CHANGED
@@ -5,19 +5,7 @@ from huggingface_hub import InferenceClient
5
  For more information on huggingface_hub Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
  """
7
  # client = InferenceClient("unsloth/Llama-3.2-1B-Instruct")
8
- #client = InferenceClient(model="https://aq0teqpujnx3bv68.us-east-1.aws.endpoints.huggingface.cloud/")
9
- import requests
10
-
11
- API_URL = "https://aq0teqpujnx3bv68.us-east-1.aws.endpoints.huggingface.cloud"
12
- headers = {
13
- "Accept" : "application/json",
14
- "Content-Type": "application/json"
15
- }
16
-
17
- def query(payload):
18
- response = requests.post(API_URL, headers=headers, json=payload)
19
- return response.json()
20
-
21
 
22
  def respond(
23
  message,
@@ -36,21 +24,32 @@ def respond(
36
  max_tokens = 512
37
  temperature = 0.7
38
  top_p = 0.95
39
-
40
 
41
- for message in query({
42
- "inputs":system_message + "\n\n" + message,
43
- "parameters": {
44
- "max_new_tokens": 150
45
- }
46
- }):
47
- print(message)
48
- token = message
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  response += token
50
  yield response
51
 
52
 
53
-
54
  """
55
  For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
56
  """
 
5
  For more information on huggingface_hub Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
  """
7
  # client = InferenceClient("unsloth/Llama-3.2-1B-Instruct")
8
+ client = InferenceClient(model="llama-3-1-8b-medical-f16-qip")
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
  def respond(
11
  message,
 
24
  max_tokens = 512
25
  temperature = 0.7
26
  top_p = 0.95
 
27
 
28
+ messages = [{"role": "system", "content": system_message}]
29
+
30
+ for val in history:
31
+ if val[0]:
32
+ messages.append({"role": "user", "content": val[0]})
33
+ if val[1]:
34
+ messages.append({"role": "assistant", "content": val[1]})
35
+
36
+ messages.append({"role": "user", "content": message})
37
+
38
+ response = ""
39
+
40
+ for message in client.chat_completion(
41
+ messages,
42
+ max_tokens=max_tokens,
43
+ stream=True,
44
+ temperature=temperature,
45
+ top_p=top_p,
46
+ ):
47
+ token = message.choices[0].delta.content
48
+
49
  response += token
50
  yield response
51
 
52
 
 
53
  """
54
  For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
55
  """