Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Update app.py
Browse files
app.py
CHANGED
@@ -6,8 +6,6 @@ from huggingface_hub import AsyncInferenceClient
|
|
6 |
|
7 |
HF_TOKEN = os.getenv('HF_TOKEN')
|
8 |
api_url = os.getenv('API_URL')
|
9 |
-
#api_url_nostream = os.getenv('API_URL_NOSTREAM')
|
10 |
-
#headers = {'Content-Type': 'application/json',}
|
11 |
headers = {"Authorization": f"Bearer {HF_TOKEN}"}
|
12 |
client = AsyncInferenceClient(api_url)
|
13 |
|
@@ -41,7 +39,7 @@ examples=[
|
|
41 |
# <s>[INST] {{ user_msg_1 }} [/INST] {{ model_answer_1 }} </s><s>[INST] {{ user_msg_2 }} [/INST]
|
42 |
|
43 |
|
44 |
-
# Stream text
|
45 |
async def predict(message, chatbot, system_prompt="", temperature=0.9, max_new_tokens=256, top_p=0.6, repetition_penalty=1.0,):
|
46 |
|
47 |
if system_prompt != "":
|
@@ -72,10 +70,9 @@ async def predict(message, chatbot, system_prompt="", temperature=0.9, max_new_t
|
|
72 |
yield partial_message
|
73 |
|
74 |
|
75 |
-
# No Stream
|
76 |
def predict_batch(message, chatbot, system_prompt="", temperature=0.9, max_new_tokens=256, top_p=0.6, repetition_penalty=1.0,):
|
77 |
-
|
78 |
-
print(f"chatbot - {chatbot}")
|
79 |
if system_prompt != "":
|
80 |
input_prompt = f"<s>[INST] <<SYS>>\n{system_prompt}\n<</SYS>>\n\n "
|
81 |
else:
|
@@ -104,16 +101,10 @@ def predict_batch(message, chatbot, system_prompt="", temperature=0.9, max_new_t
|
|
104 |
}
|
105 |
|
106 |
response = requests.post(api_url, headers=headers, json=data ) #auth=('hf', hf_token)) data=json.dumps(data),
|
107 |
-
print(f"response - {response}")
|
108 |
-
print(f"response.status_code - {response.status_code}")
|
109 |
-
print(f"response.text - {response.text}")
|
110 |
-
print(f"type(response.text) - {type(response.text)}")
|
111 |
|
112 |
if response.status_code == 200: # check if the request was successful
|
113 |
try:
|
114 |
json_obj = response.json()
|
115 |
-
print(f"type(response.json) - {type(json_obj)}")
|
116 |
-
print(f"response.json - {json_obj}")
|
117 |
if 'generated_text' in json_obj[0] and len(json_obj[0]['generated_text']) > 0:
|
118 |
return json_obj[0]['generated_text']
|
119 |
elif 'error' in json_obj[0]:
|
@@ -199,12 +190,12 @@ chat_interface_batch=gr.ChatInterface(predict_batch,
|
|
199 |
with gr.Blocks() as demo:
|
200 |
|
201 |
with gr.Tab("Streaming"):
|
202 |
-
#
|
203 |
chatbot_stream.like(vote, None, None)
|
204 |
chat_interface_stream.render()
|
205 |
|
206 |
with gr.Tab("Batch"):
|
207 |
-
#
|
208 |
chatbot_batch.like(vote, None, None)
|
209 |
chat_interface_batch.render()
|
210 |
|
|
|
6 |
|
7 |
HF_TOKEN = os.getenv('HF_TOKEN')
|
8 |
api_url = os.getenv('API_URL')
|
|
|
|
|
9 |
headers = {"Authorization": f"Bearer {HF_TOKEN}"}
|
10 |
client = AsyncInferenceClient(api_url)
|
11 |
|
|
|
39 |
# <s>[INST] {{ user_msg_1 }} [/INST] {{ model_answer_1 }} </s><s>[INST] {{ user_msg_2 }} [/INST]
|
40 |
|
41 |
|
42 |
+
# Stream text - stream tokens with InferenceClient from TGI
|
43 |
async def predict(message, chatbot, system_prompt="", temperature=0.9, max_new_tokens=256, top_p=0.6, repetition_penalty=1.0,):
|
44 |
|
45 |
if system_prompt != "":
|
|
|
70 |
yield partial_message
|
71 |
|
72 |
|
73 |
+
# No Stream - batch produce tokens using TGI inference endpoint
|
74 |
def predict_batch(message, chatbot, system_prompt="", temperature=0.9, max_new_tokens=256, top_p=0.6, repetition_penalty=1.0,):
|
75 |
+
|
|
|
76 |
if system_prompt != "":
|
77 |
input_prompt = f"<s>[INST] <<SYS>>\n{system_prompt}\n<</SYS>>\n\n "
|
78 |
else:
|
|
|
101 |
}
|
102 |
|
103 |
response = requests.post(api_url, headers=headers, json=data ) #auth=('hf', hf_token)) data=json.dumps(data),
|
|
|
|
|
|
|
|
|
104 |
|
105 |
if response.status_code == 200: # check if the request was successful
|
106 |
try:
|
107 |
json_obj = response.json()
|
|
|
|
|
108 |
if 'generated_text' in json_obj[0] and len(json_obj[0]['generated_text']) > 0:
|
109 |
return json_obj[0]['generated_text']
|
110 |
elif 'error' in json_obj[0]:
|
|
|
190 |
with gr.Blocks() as demo:
|
191 |
|
192 |
with gr.Tab("Streaming"):
|
193 |
+
# streaming chatbot
|
194 |
chatbot_stream.like(vote, None, None)
|
195 |
chat_interface_stream.render()
|
196 |
|
197 |
with gr.Tab("Batch"):
|
198 |
+
# non-streaming chatbot
|
199 |
chatbot_batch.like(vote, None, None)
|
200 |
chat_interface_batch.render()
|
201 |
|