import gradio as gr import requests import os import json # Carrega a chave da API do ambiente ou define diretamente API_KEY = os.getenv('API_KEY') INVOKE_URL = "https://api.nvcf.nvidia.com/v2/nvcf/pexec/functions/df2bee43-fb69-42b9-9ee5-f4eabbeaf3a8" FETCH_URL_FORMAT = "https://api.nvcf.nvidia.com/v2/nvcf/pexec/status/" headers = { "Authorization": f"Bearer {API_KEY}", "Accept": "application/json", "Content-Type": "application/json", } BASE_SYSTEM_MESSAGE = "I carefully provide accurate, factual, thoughtful, nuanced answers and am brilliant at reasoning." def call_nvidia_api(history, system_message, max_tokens, temperature, top_p): messages = [{"role": "system", "content": system_message}] if system_message else [] messages.extend([{"role": "user", "content": msg[0]}, {"role": "assistant", "content": msg[1]} for msg in history if msg[1]]) payload = { "messages": messages, "temperature": temperature, "top_p": top_p, "max_tokens": max_tokens, "stream": False } session = requests.Session() response = session.post(INVOKE_URL, headers=headers, json=payload) while response.status_code == 202: request_id = response.headers.get("NVCF-REQID") fetch_url = FETCH_URL_FORMAT + request_id response = session.get(fetch_url, headers=headers) response.raise_for_status() response_body = response.json() if response_body.get("choices"): assistant_message = response_body["choices"][0]["message"]["content"] # Retorna tanto a mensagem formatada para o usuário quanto a estrutura completa para o histórico da API return assistant_message, response_body["choices"][0] else: return "Desculpe, ocorreu um erro ao gerar a resposta.", None def chatbot_submit(message, chat_history_ui, chat_history_api, system_message, max_tokens_val, temperature_val, top_p_val): print("Updating chatbot...") # Chama a API da NVIDIA para gerar uma resposta assistant_message, api_response = call_nvidia_api(chat_history_api, system_message, max_tokens_val, temperature_val, top_p_val) # Atualiza o histórico da interface do usuário chat_history_ui.append([message, assistant_message]) # Atualiza o histórico da API se a resposta incluir o formato esperado if api_response: chat_history_api.append(api_response) return assistant_message, chat_history_ui, chat_history_api system_msg = gr.Textbox(BASE_SYSTEM_MESSAGE, label="System Message", placeholder="System prompt.", lines=5) max_tokens = gr.Slider(20, 1024, label="Max Tokens", step=20, value=1024) temperature = gr.Slider(0.0, 1.0, label="Temperature", step=0.1, value=0.2) top_p = gr.Slider(0.0, 1.0, label="Top P", step=0.05, value=0.7) # Gradio interface setup with gr.Blocks() as demo: chat_history_state_ui = gr.State([]) chat_history_state_api = gr.State([]) # Outros componentes da interface... chatbot = gr.ChatInterface( fn=chatbot_submit, inputs=[gr.Textbox(label="Your Message"), chat_history_state_ui, chat_history_state_api, system_msg, max_tokens, temperature, top_p], outputs=[gr.Text(label="Assistant Response"), chat_history_state_ui, chat_history_state_api], title="Chatbot Interface" description="""
Explore the Capabilities of LLAMA 2 70B

Llama 2 is a large language AI model capable of generating text and code in response to prompts.

How to Use:

  1. Enter your message in the textbox to start a conversation or ask a question.
  2. Adjust the parameters in the "Additional Inputs" accordion to control the model's behavior.
  3. Use the buttons below the chatbot to submit your query, clear the chat history, or perform other actions.

Powered by NVIDIA's cutting-edge AI API, LLAMA 2 70B offers an unparalleled opportunity to interact with an AI model of exceptional conversational ability, accessible to everyone at no cost.

HF Created by: @artificialguybr (Twitter)

Discover more: artificialguy.com

""", submit_btn="Submit", clear_btn="🗑️ Clear", ) demo.launch()