File size: 2,805 Bytes
c551206 020a962 1df13e1 c551206 8cd9af7 c551206 1df13e1 c551206 9faed3d fc26c64 14126e6 fc26c64 8cd9af7 c551206 a9af4d7 c551206 85dbf4a c551206 0e16686 c551206 0e16686 cb4c132 0e16686 8c77830 cb4c132 9faed3d a414401 9faed3d 8c77830 9faed3d 9809955 fc26c64 9faed3d c551206 9faed3d fc26c64 9faed3d fc26c64 cb4c132 fc26c64 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 |
import gradio as gr
import requests
import os
import json
API_KEY = os.getenv('API_KEY')
INVOKE_URL = "https://api.nvcf.nvidia.com/v2/nvcf/pexec/functions/0e349b44-440a-44e1-93e9-abe8dcb27158"
FETCH_URL_FORMAT = "https://api.nvcf.nvidia.com/v2/nvcf/pexec/status/"
headers = {
"Authorization": f"Bearer {API_KEY}",
"Accept": "application/json",
"Content-Type": "application/json",
}
BASE_SYSTEM_MESSAGE = "I carefully provide accurate, factual, thoughtful, nuanced answers and am brilliant at reasoning."
def call_nvidia_api(message, history_api, system_message, max_tokens, temperature, top_p):
messages = [{"role": "system", "content": system_message}] if system_message else []
messages.extend([{"role": "user", "content": message}])
for msg in history_api:
messages.extend([{"role": "user", "content": msg[0]}, {"role": "assistant", "content": msg[1]}])
payload = {
"messages": messages,
"temperature": temperature,
"top_p": top_p,
"max_tokens": max_tokens,
"stream": False
}
session = requests.Session()
response = session.post(INVOKE_URL, headers=headers, json=payload)
while response.status_code == 202:
request_id = response.headers.get("NVCF-REQID")
fetch_url = FETCH_URL_FORMAT + request_id
response = session.get(fetch_url, headers=headers)
response.raise_for_status()
response_body = response.json()
if response_body.get("choices"):
assistant_message = response_body["choices"][0]["message"]["content"]
return assistant_message
else:
return "Desculpe, ocorreu um erro ao gerar a resposta."
def chatbot_function(message, history_api, system_message, max_tokens, temperature, top_p):
assistant_message = call_nvidia_api(message, history_api, system_message, max_tokens, temperature, top_p)
history_api.append([message, assistant_message])
return assistant_message, history_api
system_msg = gr.Textbox(value=BASE_SYSTEM_MESSAGE, label="System Message", placeholder="System prompt.", lines=5)
max_tokens = gr.Slider(minimum=20, maximum=1024, label="Max Tokens", step=20, value=1024)
temperature = gr.Slider(minimum=0.0, maximum=1.0, label="Temperature", step=0.1, value=0.2)
top_p = gr.Slider(minimum=0.0, maximum=1.0, label="Top P", step=0.05, value=0.7)
with gr.Blocks() as demo:
chat_history_state = gr.State([])
chat_interface = gr.ChatInterface(
fn=chatbot_function,
inputs=["message", "history_api", system_msg, max_tokens, temperature, top_p],
outputs=["assistant_message", "history_api"],
title="LLAMA 70B Free Demo",
description="Explore the capabilities of LLAMA 2 70B",
additional_inputs=[system_msg, max_tokens, temperature, top_p]
)
demo.launch() |