Llama3-TenyxChat-70B

Running

File size: 2,903 Bytes

45d60f0
 
 
 
 
 
 
fd397ae
 
 
 
43a032b
fd397ae
45d60f0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fd397ae
45d60f0
 
 
fd397ae
 
 
 
 
 
 
af0f40b
fd397ae
 
 
 
ccfa62c
fd397ae
 
 
 
 
1e4339b
fd397ae
 
 
 
45d60f0
fd397ae
 
 
 
 
 
 
 
 
 
45d60f0
 
 
 
 
 
 
 
 
 
fd397ae
 
 
 
 
 
45d60f0
 
ef90a85
45d60f0

import os
from threading import Thread
from typing import Iterator

import gradio as gr
import spaces
import torch
from openai import OpenAI, APIError

client = OpenAI(
	base_url="https://hjopms3xd7gembdu.us-east-1.aws.endpoints.huggingface.cloud/v1/", 
	api_key="hf_XXXXX" 
)

DESCRIPTION = """
Llama3-TenyxChat-70B is part of the TenyxChat series, models trained to function as useful assistants. 
The model is obtained via direct preference tuning using Tenyx's fine-tuning technology. Model details available at our model page. 
"""


LICENSE = """
This demo is governed by the license available [here.](https://huggingface.co/spaces/tenyx/Llama3-TenyxChat-70B/blob/main/LICENSE.txt)"""

@spaces.GPU
def generate(
    message: str,
    chat_history: list[tuple[str, str]],
) -> Iterator[str]:
    conversation = [{"role": "system", "content": "You are a helpful assistant developed by Tenyx, a conversational voice AI company."}]

    for user, assistant in chat_history:
        conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
    conversation.append({"role": "user", "content": message})

    try:
        response = client.chat.completions.create(
                            model="tgi",
                            messages=conversation,
                            stop=["<|end_of_text|>", "<|eot_id|>"],
                            stream=True,
                            temperature=0.1,
                            max_tokens=1024,
                        )
        outputs = []
        for chunk in response:
            outputs.append(chunk.choices[0].delta.content)
            yield "".join(outputs)

    except APIError as e:
        # Handle API errors or network errors here
        print(f"Error: {e}")
        yield "API error. The model is currently unavailable/down. Please try again later."



demo = gr.ChatInterface(
    fn=generate,
    # additional_inputs=[
    #     gr.Textbox(label="System prompt", lines=6),
    #     gr.Slider(
    #         label="Max new tokens",
    #         minimum=1,
    #         maximum=MAX_MAX_NEW_TOKENS,
    #         step=1,
    #         value=DEFAULT_MAX_NEW_TOKENS,
    #     ),
    # ],
    stop_btn=None,
    examples=[
        ["Hello there! How are you doing?"],
        ["Can you explain briefly to me what is the Python programming language?"],
        ["Explain the potential role of Conversational AIs in customer support."],
        ["How many hours does it take a man to eat a Helicopter?"],
        ["Write a 100-word article on 'Benefits of Open-Source in AI research'"],
    ],
)

# with gr.Blocks() as demo:
#     # gr.Markdown(DESCRIPTION)
#     # gr.Markdown(LICENSE)
#     # gr.DuplicateButton(value="Duplicate Space for private use", elem_id="duplicate-button")
#     chat_interface.render()
    

if __name__ == "__main__":
    demo.queue(max_size=4).launch()