File size: 2,903 Bytes
45d60f0
 
 
 
 
 
 
fd397ae
 
 
 
43a032b
fd397ae
45d60f0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fd397ae
45d60f0
 
 
fd397ae
 
 
 
 
 
 
af0f40b
fd397ae
 
 
 
ccfa62c
fd397ae
 
 
 
 
1e4339b
fd397ae
 
 
 
45d60f0
fd397ae
 
 
 
 
 
 
 
 
 
45d60f0
 
 
 
 
 
 
 
 
 
fd397ae
 
 
 
 
 
45d60f0
 
ef90a85
45d60f0
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import os
from threading import Thread
from typing import Iterator

import gradio as gr
import spaces
import torch
from openai import OpenAI, APIError

client = OpenAI(
	base_url="https://hjopms3xd7gembdu.us-east-1.aws.endpoints.huggingface.cloud/v1/", 
	api_key="hf_XXXXX" 
)

DESCRIPTION = """
Llama3-TenyxChat-70B is part of the TenyxChat series, models trained to function as useful assistants. 
The model is obtained via direct preference tuning using Tenyx's fine-tuning technology. Model details available at our model page. 
"""


LICENSE = """
This demo is governed by the license available [here.](https://huggingface.co/spaces/tenyx/Llama3-TenyxChat-70B/blob/main/LICENSE.txt)"""

@spaces.GPU
def generate(
    message: str,
    chat_history: list[tuple[str, str]],
) -> Iterator[str]:
    conversation = [{"role": "system", "content": "You are a helpful assistant developed by Tenyx, a conversational voice AI company."}]

    for user, assistant in chat_history:
        conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
    conversation.append({"role": "user", "content": message})

    try:
        response = client.chat.completions.create(
                            model="tgi",
                            messages=conversation,
                            stop=["<|end_of_text|>", "<|eot_id|>"],
                            stream=True,
                            temperature=0.1,
                            max_tokens=1024,
                        )
        outputs = []
        for chunk in response:
            outputs.append(chunk.choices[0].delta.content)
            yield "".join(outputs)

    except APIError as e:
        # Handle API errors or network errors here
        print(f"Error: {e}")
        yield "API error. The model is currently unavailable/down. Please try again later."



demo = gr.ChatInterface(
    fn=generate,
    # additional_inputs=[
    #     gr.Textbox(label="System prompt", lines=6),
    #     gr.Slider(
    #         label="Max new tokens",
    #         minimum=1,
    #         maximum=MAX_MAX_NEW_TOKENS,
    #         step=1,
    #         value=DEFAULT_MAX_NEW_TOKENS,
    #     ),
    # ],
    stop_btn=None,
    examples=[
        ["Hello there! How are you doing?"],
        ["Can you explain briefly to me what is the Python programming language?"],
        ["Explain the potential role of Conversational AIs in customer support."],
        ["How many hours does it take a man to eat a Helicopter?"],
        ["Write a 100-word article on 'Benefits of Open-Source in AI research'"],
    ],
)

# with gr.Blocks() as demo:
#     # gr.Markdown(DESCRIPTION)
#     # gr.Markdown(LICENSE)
#     # gr.DuplicateButton(value="Duplicate Space for private use", elem_id="duplicate-button")
#     chat_interface.render()
    

if __name__ == "__main__":
    demo.queue(max_size=4).launch()