Spaces:
Runtime error
Runtime error
import json | |
import gradio as gr | |
import os | |
import requests | |
hf_token = os.getenv('HF_TOKEN') | |
api_url = os.getenv('API_URL') | |
headers = { | |
'Authorization': 'Bearer ' + hf_token, | |
'Content-Type': 'application/json', | |
} | |
system_message = "\nTesting by KelvinLo UD\n" | |
title = "Llama-2 Chatbot" | |
description = """ | |
Demo by Kelvin Lo, UD | |
""" | |
css = """.toast-wrap { display: none !important } """ | |
examples=[ | |
'Can you write a javascripts sample to print the time now?', | |
'可以用中文字作詩比我?', | |
"Write a 100-word article on 'Benefits of private AI Server'", | |
] | |
def predict(message, chatbot): | |
input_prompt = f"[INST] <<SYS>>\n{system_message}\n<</SYS>>\n\n " | |
for interaction in chatbot: | |
input_prompt = input_prompt + str(interaction[0]) + " [/INST] " + str(interaction[1]) + " </s><s> [INST] " | |
input_prompt = input_prompt + str(message) + " [/INST] " | |
data = { | |
"inputs": input_prompt, | |
"parameters": {"max_new_tokens": 1000, | |
"do_sample":True, | |
"top_p":0.6, | |
"temperature":0.9,} | |
} | |
response = requests.post(api_url, headers=headers, data=json.dumps(data), stream=True) | |
print(response) | |
partial_message = "" | |
for line in response.iter_lines(): | |
if line: # filter out keep-alive new lines | |
# Decode from bytes to string | |
decoded_line = line.decode('utf-8') | |
json_line = decoded_line | |
print(decoded_line) | |
# Remove 'data:' prefix | |
#if decoded_line.startswith('data:'): | |
# json_line = decoded_line[5:] # Exclude the first 5 characters ('data:') | |
#else: | |
#gr.Warning(f"This line does not start with 'data:': {decoded_line}") | |
# json_line = decoded_line | |
# print(decoded_line) | |
# continue | |
# Load as JSON | |
try: | |
json_obj = json.loads(json_line)[0] | |
print (json_obj) | |
if 'generated_text' in json_obj: | |
partial_message = partial_message + json_obj['generated_text']#['token']['text'] | |
yield partial_message | |
elif 'error' in json_obj: | |
yield json_obj['error'] + '. Please refresh and try again with an appropriate smaller input prompt.' | |
else: | |
gr.Warning(f"The key 'token' does not exist in this JSON object: {json_obj}") | |
except json.JSONDecodeError: | |
gr.Warning(f"This line is not valid JSON: {json_line}") | |
continue | |
except KeyError as e: | |
gr.Warning(f"KeyError: {e} occurred for JSON object: {json_obj}") | |
continue | |
gr.ChatInterface(predict, title=title, description=description, css=css, examples=examples, cache_examples=True).queue(concurrency_count=75).launch() | |