ysharma HF staff commited on
Commit
6bfce88
1 Parent(s): 8c7d524

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -14
app.py CHANGED
@@ -6,8 +6,6 @@ from huggingface_hub import AsyncInferenceClient
6
 
7
  HF_TOKEN = os.getenv('HF_TOKEN')
8
  api_url = os.getenv('API_URL')
9
- #api_url_nostream = os.getenv('API_URL_NOSTREAM')
10
- #headers = {'Content-Type': 'application/json',}
11
  headers = {"Authorization": f"Bearer {HF_TOKEN}"}
12
  client = AsyncInferenceClient(api_url)
13
 
@@ -41,7 +39,7 @@ examples=[
41
  # <s>[INST] {{ user_msg_1 }} [/INST] {{ model_answer_1 }} </s><s>[INST] {{ user_msg_2 }} [/INST]
42
 
43
 
44
- # Stream text
45
  async def predict(message, chatbot, system_prompt="", temperature=0.9, max_new_tokens=256, top_p=0.6, repetition_penalty=1.0,):
46
 
47
  if system_prompt != "":
@@ -72,10 +70,9 @@ async def predict(message, chatbot, system_prompt="", temperature=0.9, max_new_t
72
  yield partial_message
73
 
74
 
75
- # No Stream
76
  def predict_batch(message, chatbot, system_prompt="", temperature=0.9, max_new_tokens=256, top_p=0.6, repetition_penalty=1.0,):
77
- print(f"message - {message}")
78
- print(f"chatbot - {chatbot}")
79
  if system_prompt != "":
80
  input_prompt = f"<s>[INST] <<SYS>>\n{system_prompt}\n<</SYS>>\n\n "
81
  else:
@@ -104,16 +101,10 @@ def predict_batch(message, chatbot, system_prompt="", temperature=0.9, max_new_t
104
  }
105
 
106
  response = requests.post(api_url, headers=headers, json=data ) #auth=('hf', hf_token)) data=json.dumps(data),
107
- print(f"response - {response}")
108
- print(f"response.status_code - {response.status_code}")
109
- print(f"response.text - {response.text}")
110
- print(f"type(response.text) - {type(response.text)}")
111
 
112
  if response.status_code == 200: # check if the request was successful
113
  try:
114
  json_obj = response.json()
115
- print(f"type(response.json) - {type(json_obj)}")
116
- print(f"response.json - {json_obj}")
117
  if 'generated_text' in json_obj[0] and len(json_obj[0]['generated_text']) > 0:
118
  return json_obj[0]['generated_text']
119
  elif 'error' in json_obj[0]:
@@ -199,12 +190,12 @@ chat_interface_batch=gr.ChatInterface(predict_batch,
199
  with gr.Blocks() as demo:
200
 
201
  with gr.Tab("Streaming"):
202
- #gr.ChatInterface(predict, title=title, description=description, css=css, examples=examples, cache_examples=True, additional_inputs=additional_inputs,)
203
  chatbot_stream.like(vote, None, None)
204
  chat_interface_stream.render()
205
 
206
  with gr.Tab("Batch"):
207
- #gr.ChatInterface(predict_batch, title=title, description=description, css=css, examples=examples, cache_examples=True, additional_inputs=additional_inputs,)
208
  chatbot_batch.like(vote, None, None)
209
  chat_interface_batch.render()
210
 
 
6
 
7
  HF_TOKEN = os.getenv('HF_TOKEN')
8
  api_url = os.getenv('API_URL')
 
 
9
  headers = {"Authorization": f"Bearer {HF_TOKEN}"}
10
  client = AsyncInferenceClient(api_url)
11
 
 
39
  # <s>[INST] {{ user_msg_1 }} [/INST] {{ model_answer_1 }} </s><s>[INST] {{ user_msg_2 }} [/INST]
40
 
41
 
42
+ # Stream text - stream tokens with InferenceClient from TGI
43
  async def predict(message, chatbot, system_prompt="", temperature=0.9, max_new_tokens=256, top_p=0.6, repetition_penalty=1.0,):
44
 
45
  if system_prompt != "":
 
70
  yield partial_message
71
 
72
 
73
+ # No Stream - batch produce tokens using TGI inference endpoint
74
  def predict_batch(message, chatbot, system_prompt="", temperature=0.9, max_new_tokens=256, top_p=0.6, repetition_penalty=1.0,):
75
+
 
76
  if system_prompt != "":
77
  input_prompt = f"<s>[INST] <<SYS>>\n{system_prompt}\n<</SYS>>\n\n "
78
  else:
 
101
  }
102
 
103
  response = requests.post(api_url, headers=headers, json=data ) #auth=('hf', hf_token)) data=json.dumps(data),
 
 
 
 
104
 
105
  if response.status_code == 200: # check if the request was successful
106
  try:
107
  json_obj = response.json()
 
 
108
  if 'generated_text' in json_obj[0] and len(json_obj[0]['generated_text']) > 0:
109
  return json_obj[0]['generated_text']
110
  elif 'error' in json_obj[0]:
 
190
  with gr.Blocks() as demo:
191
 
192
  with gr.Tab("Streaming"):
193
+ # streaming chatbot
194
  chatbot_stream.like(vote, None, None)
195
  chat_interface_stream.render()
196
 
197
  with gr.Tab("Batch"):
198
+ # non-streaming chatbot
199
  chatbot_batch.like(vote, None, None)
200
  chat_interface_batch.render()
201