File size: 5,963 Bytes
c551206
 
020a962
1df13e1
c551206
8cd9af7
 
c551206
1df13e1
c551206
 
 
 
 
 
 
 
8acbe0b
 
 
 
 
8cd9af7
8acbe0b
 
 
 
 
 
 
 
 
c551206
8acbe0b
c551206
 
 
 
 
85dbf4a
8acbe0b
 
c551206
 
8acbe0b
0e16686
c551206
 
 
8acbe0b
0e16686
cb4c132
0e16686
8acbe0b
8c77830
8acbe0b
 
 
 
 
9809955
8acbe0b
 
 
 
9faed3d
8acbe0b
c551206
8acbe0b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
890954b
8acbe0b
 
9faed3d
8acbe0b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cb4c132
 
fc26c64
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
import gradio as gr
import requests
import os
import json

API_KEY = os.getenv('API_KEY')
INVOKE_URL = "https://api.nvcf.nvidia.com/v2/nvcf/pexec/functions/0e349b44-440a-44e1-93e9-abe8dcb27158"
FETCH_URL_FORMAT = "https://api.nvcf.nvidia.com/v2/nvcf/pexec/status/"

headers = {
    "Authorization": f"Bearer {API_KEY}",
    "Accept": "application/json",
    "Content-Type": "application/json",
}

BASE_SYSTEM_MESSAGE = "I carefully provide accurate, factual, thoughtful, nuanced answers and am brilliant at reasoning."

def clear_chat(chat_history_state, chat_message):
    print("Clearing chat...")
    chat_history_state = []
    chat_message = ''
    return chat_history_state, chat_message

def user(message, history, system_message=None):
    print(f"User message: {message}")
    history = history or []
    if system_message:  # Check if a system message is provided and should be added
        history.append({"role": "system", "content": system_message})
    history.append({"role": "user", "content": message})
    return history

def call_nvidia_api(history, max_tokens, temperature, top_p):
    payload = {
        "messages": history,
        "temperature": temperature,
        "top_p": top_p,
        "max_tokens": max_tokens,
        "stream": False
    }

    print(f"Payload enviado: {payload}")  # Imprime o payload enviado

    session = requests.Session()
    response = session.post(INVOKE_URL, headers=headers, json=payload)

    while response.status_code == 202:
        request_id = response.headers.get("NVCF-REQID")
        fetch_url = FETCH_URL_FORMAT + request_id
        response = session.get(fetch_url, headers=headers)
    
    response.raise_for_status()
    response_body = response.json()

    print(f"Payload recebido: {response_body}")  # Imprime o payload recebido

    if response_body["choices"]:
        assistant_message = response_body["choices"][0]["message"]["content"]
        history.append({"role": "assistant", "content": assistant_message})
    
    return history

def chat(history, system_message, max_tokens, temperature, top_p):
    print("Starting chat...")
    updated_history = call_nvidia_api(history, max_tokens, temperature, top_p)
    return updated_history, ""

# Gradio interface setup
with gr.Blocks() as demo:
    with gr.Row():
        with gr.Column():
            gr.Markdown("LLAMA 70B Free Demo")
            description="""
<div style="text-align: center; font-size: 1.5em; margin-bottom: 20px;">
    <strong>Explore the Capabilities of LLAMA 70B</strong>
</div>
<p>Code Llama is an LLM capable of generating code, and natural language about code, from both code and natural language prompts.
</p>
<p> <strong>How to Use:</strong></p>
<ol>
    <li>Enter your <strong>message</strong> in the textbox to start a conversation or ask a question.</li>
    <li>Adjust the <strong>Temperature</strong> and <strong>Top P</strong> sliders to control the creativity and diversity of the responses.</li>
    <li>Set the <strong>Max Tokens</strong> slider to determine the length of the response.</li>
    <li>Use the <strong>System Message</strong> textbox if you wish to provide a specific context or instruction for the AI.</li>
    <li>Click <strong>Send message</strong> to submit your query and receive a response from LLAMA70B.</li>
    <li>Press <strong>New topic</strong> to clear the chat history and start a new conversation thread.</li>
</ol>
<p> <strong>Powered by NVIDIA's cutting-edge AI API, LLAMA 70B offers an unparalleled opportunity to interact with an AI model of exceptional conversational ability, accessible to everyone at no cost.</strong></p>
<p> <strong>HF Created by:</strong> @artificialguybr (<a href="https://twitter.com/artificialguybr">Twitter</a>)</p>
<p> <strong>Discover more:</strong> <a href="https://artificialguy.com">artificialguy.com</a></p>
"""
    gr.Markdown(description)
    chatbot = gr.Chatbot()
    message = gr.Textbox(label="What do you want to chat about?", placeholder="Ask me anything.", lines=3)
    submit = gr.Button(value="Send message")
    clear = gr.Button(value="New topic")
    system_msg = gr.Textbox(BASE_SYSTEM_MESSAGE, label="System Message", placeholder="System prompt.", lines=5)
    max_tokens = gr.Slider(20, 1024, label="Max Tokens", step=20, value=500, interactive=True)
    temperature = gr.Slider(0.0, 1.0, label="Temperature", step=0.1, value=0.7, interactive=True)
    top_p = gr.Slider(0.0, 1.0, label="Top P", step=0.05, value=0.95, interactive=True)
    chat_history_state = gr.State([])

    # Ajuste na definição da função update_chatbot para aceitar o valor atualizado do system_msg
    def update_chatbot(message, chat_history, system_message, max_tokens, temperature, top_p):
        print("Updating chatbot...")
        if not chat_history or (chat_history and chat_history[-1]["role"] != "user"):
            chat_history = user(message, chat_history, system_message if not chat_history else None)
        else:
            chat_history = user(message, chat_history)
        chat_history, _ = chat(chat_history, system_message, max_tokens, temperature, top_p)
    
        formatted_chat_history = []
        for user_msg, assistant_msg in zip([msg["content"].strip() for msg in chat_history if msg["role"] == "user"],
                                        [msg["content"].strip() for msg in chat_history if msg["role"] == "assistant"]):
            if user_msg or assistant_msg:  # Verify if either message is not empty
                formatted_chat_history.append([user_msg, assistant_msg])
    
        return formatted_chat_history, chat_history, ""
    
    submit.click(
        fn=update_chatbot,
        inputs=[message, chat_history_state, system_msg, max_tokens, temperature, top_p],
        outputs=[chatbot, chat_history_state, message]
    )

    clear.click(
        fn=clear_chat,
        inputs=[chat_history_state, message],
        outputs=[chat_history_state, message]
    )

demo.launch()