File size: 2,202 Bytes
20c9ade
006cc46
 
 
20c9ade
006cc46
51ad36a
006cc46
 
 
 
20c9ade
e9a8c32
51ad36a
006cc46
 
2b8a170
006cc46
2b8a170
006cc46
 
 
e9a8c32
 
 
f406d70
006cc46
 
 
e9a8c32
f406d70
 
006cc46
 
51ad36a
2b8a170
3307170
2b8a170
 
3307170
006cc46
 
 
 
 
 
e9a8c32
006cc46
e9a8c32
51ad36a
006cc46
e9a8c32
006cc46
 
 
 
 
e9a8c32
006cc46
 
3307170
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import gradio as gr
from openai import OpenAI
import os
import time

# Initialize the OpenAI Client with your API key and endpoint
api_key = os.environ.get("RUNPOD_API_KEY")  # Make sure your API key is correctly configured
client = OpenAI(
    api_key=api_key,
    base_url="https://api.runpod.ai/v2/vllm-k0g4c60zor9xuu/openai/v1",
)

def get_response(user_message, history, verbosity):
    # Format the history for the OpenAI call
    history_openai_format = []
    for human, assistant in history:
        if human:
            history_openai_format.append({"role": "user", "content": human})
        if assistant:
            history_openai_format.append({"role": "assistant", "content": assistant})
    history_openai_format.append({"role": "user", "content": user_message})

    # Adjust the temperature based on the verbosity level
    temperature = 0.5 if verbosity == "Balanced" else 0.3 if verbosity == "Concise" else 0.7

    # Make the API call with a custom stop sequence
    response = client.chat.completions.create(
        model='ambrosfitz/llama-3-history',
        messages=history_openai_format,
        temperature=temperature,
        max_tokens=150,
        stop=["<|eot_id|>"]  # Add a custom stop sequence
    )

    # Access the text response
    if response.choices:
        bot_message = response.choices[0].message.content.strip() if response.choices[0].message.content.strip() else "No response generated."
    else:
        bot_message = "No response generated."

    return bot_message

with gr.Blocks() as demo:
    chatbot = gr.Chatbot()
    msg = gr.Textbox()
    clear = gr.Button("Clear")
    verbosity = gr.Radio(["Concise", "Balanced", "Detailed"], value="Balanced", label="Verbosity")

    def user(user_message, history, verbosity):
        if not user_message.strip():
            return "", history
        bot_response = get_response(user_message, history, verbosity)
        return "", history + [[user_message, bot_response]]

    def clear_chat():
        return "", []  # Clear the chat history

    msg.submit(user, inputs=[msg, chatbot, verbosity], outputs=[msg, chatbot])
    clear.click(clear_chat, inputs=None, outputs=[msg, chatbot])

demo.launch()