File size: 2,827 Bytes
e23ea2d
0f45270
e23ea2d
 
 
b3e5cb2
e23ea2d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80a9208
e23ea2d
80a9208
e23ea2d
 
 
 
 
0f45270
 
 
 
e23ea2d
 
0f45270
 
 
 
f9df74e
b96ceec
0f45270
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e23ea2d
 
0f45270
 
 
 
e23ea2d
 
 
 
 
 
 
 
 
 
 
 
b8c70f9
e23ea2d
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import gradio as gr
from gradio_client import Client
import os
import requests

tulu = "https://tonic1-tulu.hf.space/--replicas/pqcgw/"

HF_TOKEN = os.getenv("HF_TOKEN")
HEADERS = {"Authorization": f"Bearer {HF_TOKEN}"}

def build_input_prompt(message, chatbot, system_prompt):
    """
    Constructs the input prompt string from the chatbot interactions and the current message.
    """
    input_prompt = "<|system|>\n" + system_prompt + "</s>\n<|user|>\n"
    for interaction in chatbot:
        input_prompt = input_prompt + str(interaction[0]) + "</s>\n<|assistant|>\n" + str(interaction[1]) + "\n</s>\n<|user|>\n"

    input_prompt = input_prompt + str(message) + "</s>\n<|assistant|>"
    return input_prompt


def post_request_beta(payload):
    """
    Sends a POST request to the predefined Tulu URL and returns the JSON response.
    """
    response = requests.post(tulu, headers=HEADERS, json=payload)
    response.raise_for_status()  # Will raise an HTTPError if the HTTP request returned an unsuccessful status code
    return response.json()


def predict_beta(message, chatbot=[], system_prompt=""):
    client = Client(tulu)  # Assuming Client is properly defined and tulu is a valid argument

    # Build the input prompt
    input_prompt = build_input_prompt(message, chatbot, system_prompt)  # Ensure this function is defined

    try:
        # Adjust these parameters as needed
        max_new_tokens = 1200
        temperature = 0.4
        top_p = 0.9
        repetition_penalty = 0.9
        advanced = True

        # Making the prediction
        result = client.predict(
            input_prompt,  # Using the built input prompt
            max_new_tokens,
            temperature,
            top_p,
            repetition_penalty,
            advanced,
            fn_index=0
        )

        # Extracting the response
        if result is not None and len(result) > 0:
            bot_message = result[0]  # Assuming the response is in the first element
            return bot_message
        else:
            raise gr.Error("No response received from the model.")

    except Exception as e:
        error_msg = f"An error occurred: {str(e)}"
        raise gr.Error(error_msg)

def test_preview_chatbot(message, history):
    response = predict_beta(message, history, SYSTEM_PROMPT)
    text_start = response.rfind("<|assistant|>", ) + len("<|assistant|>")
    response = response[text_start:]
    return response


welcome_preview_message = f"""
Welcome to **{TITLE}**! Say something like: 

''{EXAMPLE_INPUT}''
"""

chatbot_preview = gr.Chatbot(layout="panel", value=[(None, welcome_preview_message)])
textbox_preview = gr.Textbox(scale=7, container=False, value=EXAMPLE_INPUT)

demo = gr.ChatInterface(test_preview_chatbot, chatbot=chatbot_preview, textbox=textbox_preview)

demo.launch()