Spaces:
Runtime error
Runtime error
import os | |
from threading import Thread | |
from typing import Iterator | |
import gradio as gr | |
import spaces | |
import torch | |
from openai import OpenAI, APIError | |
client = OpenAI( | |
base_url="https://hjopms3xd7gembdu.us-east-1.aws.endpoints.huggingface.cloud/v1/", | |
api_key="hf_XXXXX" | |
) | |
MAX_MAX_NEW_TOKENS = 2048 | |
DEFAULT_MAX_NEW_TOKENS = 512 | |
MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096")) | |
DESCRIPTION = """ | |
Llama3-TenyxChat-70B is part of the TenyxChat series, models trained to function as useful assistants. | |
The model is obtained via direct preference tuning using Tenyx's fine-tuning technology. Model details available at our model page. | |
""" | |
LICENSE = """ | |
This demo is governed by the license available [here.](https://huggingface.co/spaces/tenyx/Llama3-TenyxChat-70B/blob/main/LICENSE.txt)""" | |
def generate( | |
message: str, | |
chat_history: list[tuple[str, str]], | |
) -> Iterator[str]: | |
conversation = [{"role": "system", "content": "You are a helpful assistant developed by Tenyx, a conversational voice AI company."}] | |
for user, assistant in chat_history: | |
conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}]) | |
conversation.append({"role": "user", "content": message}) | |
try: | |
response = client.chat.completions.create( | |
model="tgi", | |
messages=conversation, | |
stop=["<|end_of_text|>", "<|eot_id|>"], | |
stream=True, | |
max_tokens=1024, | |
) | |
outputs = [] | |
for chunk in response: | |
outputs.append(chunk.choices[0].delta.content) | |
yield "".join(outputs) | |
except APIError as e: | |
# Handle API errors or network errors here | |
print(f"Error: {e}") | |
yield "An error occurred. Please try again later." | |
demo = gr.ChatInterface( | |
fn=generate, | |
# additional_inputs=[ | |
# gr.Textbox(label="System prompt", lines=6), | |
# gr.Slider( | |
# label="Max new tokens", | |
# minimum=1, | |
# maximum=MAX_MAX_NEW_TOKENS, | |
# step=1, | |
# value=DEFAULT_MAX_NEW_TOKENS, | |
# ), | |
# ], | |
stop_btn=None, | |
examples=[ | |
["Hello there! How are you doing?"], | |
["Can you explain briefly to me what is the Python programming language?"], | |
["Explain the potential role of Conversational AIs in customer support."], | |
["How many hours does it take a man to eat a Helicopter?"], | |
["Write a 100-word article on 'Benefits of Open-Source in AI research'"], | |
], | |
) | |
# with gr.Blocks() as demo: | |
# # gr.Markdown(DESCRIPTION) | |
# # gr.Markdown(LICENSE) | |
# # gr.DuplicateButton(value="Duplicate Space for private use", elem_id="duplicate-button") | |
# chat_interface.render() | |
if __name__ == "__main__": | |
demo.queue(max_size=4).launch() | |