Spaces:

artificialguybr
/

LLAMA-2-70B-FREE-DEMO

Runtime error

App Files Files Community

artificialguybr commited on Mar 9

Commit

0e16686

•

1 Parent(s): 85dbf4a

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -21

app.py CHANGED Viewed

@@ -17,10 +17,8 @@ headers = {
 BASE_SYSTEM_MESSAGE = "I carefully provide accurate, factual, thoughtful, nuanced answers and am brilliant at reasoning."
 def call_nvidia_api(history, system_message, max_tokens, temperature, top_p):
-    messages = []
-    if system_message:
-        messages.append({"role": "system", "content": system_message})
-    messages.extend([{"role": "user", "content": msg[0]} for msg in history])
     payload = {
         "messages": messages,
@@ -29,37 +27,38 @@ def call_nvidia_api(history, system_message, max_tokens, temperature, top_p):
         "max_tokens": max_tokens,
         "stream": False
     }
-    print("Payload sendo enviado:", json.dumps(payload, indent=4))  # Debug: Imprime a payload
     session = requests.Session()
     response = session.post(INVOKE_URL, headers=headers, json=payload)
-    if response.status_code == 202:
         request_id = response.headers.get("NVCF-REQID")
         fetch_url = FETCH_URL_FORMAT + request_id
         response = session.get(fetch_url, headers=headers)
-    response.raise_for_status()  # Isso lançará uma exceção se o status não for 200
     response_body = response.json()
     if response_body.get("choices"):
         assistant_message = response_body["choices"][0]["message"]["content"]
-        return assistant_message
     else:
-        return "Desculpe, ocorreu um erro ao gerar a resposta."
-def chatbot_submit(message, chat_history, system_message, max_tokens_val, temperature_val, top_p_val):
-    """Submits the user message to the chatbot and updates the chat history."""
     print("Updating chatbot...")
-    # Adiciona a mensagem do usuário ao histórico para exibição
-    chat_history.append([message, ""])
     # Chama a API da NVIDIA para gerar uma resposta
-    assistant_message = call_nvidia_api(chat_history, system_message, max_tokens_val, temperature_val, top_p_val)
-    # Atualiza o histórico com a resposta do assistente
-    chat_history[-1][1] = assistant_message
-    return assistant_message, chat_history
 system_msg = gr.Textbox(BASE_SYSTEM_MESSAGE, label="System Message", placeholder="System prompt.", lines=5)
 max_tokens = gr.Slider(20, 1024, label="Max Tokens", step=20, value=1024)
@@ -67,11 +66,14 @@ temperature = gr.Slider(0.0, 1.0, label="Temperature", step=0.1, value=0.2)
 top_p = gr.Slider(0.0, 1.0, label="Top P", step=0.05, value=0.7)
 # Gradio interface setup
 with gr.Blocks() as demo:
-    chat_history_state = gr.State([])
     chatbot = gr.ChatInterface(
         fn=chatbot_submit,
-        additional_inputs=[system_msg, max_tokens, temperature, top_p],
-        title="LLAMA 70B Free Demo",
         description="""<div style="text-align: center; font-size: 1.5em; margin-bottom: 20px;">
             <strong>Explore the Capabilities of LLAMA 2 70B</strong>
         </div>

 BASE_SYSTEM_MESSAGE = "I carefully provide accurate, factual, thoughtful, nuanced answers and am brilliant at reasoning."
 def call_nvidia_api(history, system_message, max_tokens, temperature, top_p):
+    messages = [{"role": "system", "content": system_message}] if system_message else []
+    messages.extend([{"role": "user", "content": msg[0]}, {"role": "assistant", "content": msg[1]} for msg in history if msg[1]])
     payload = {
         "messages": messages,
         "max_tokens": max_tokens,
         "stream": False
     }
     session = requests.Session()
     response = session.post(INVOKE_URL, headers=headers, json=payload)
+    while response.status_code == 202:
         request_id = response.headers.get("NVCF-REQID")
         fetch_url = FETCH_URL_FORMAT + request_id
         response = session.get(fetch_url, headers=headers)
+    response.raise_for_status()
     response_body = response.json()
     if response_body.get("choices"):
         assistant_message = response_body["choices"][0]["message"]["content"]
+        # Retorna tanto a mensagem formatada para o usuário quanto a estrutura completa para o histórico da API
+        return assistant_message, response_body["choices"][0]
     else:
+        return "Desculpe, ocorreu um erro ao gerar a resposta.", None
+def chatbot_submit(message, chat_history_ui, chat_history_api, system_message, max_tokens_val, temperature_val, top_p_val):
     print("Updating chatbot...")
     # Chama a API da NVIDIA para gerar uma resposta
+    assistant_message, api_response = call_nvidia_api(chat_history_api, system_message, max_tokens_val, temperature_val, top_p_val)
+    # Atualiza o histórico da interface do usuário
+    chat_history_ui.append([message, assistant_message])
+    # Atualiza o histórico da API se a resposta incluir o formato esperado
+    if api_response:
+        chat_history_api.append(api_response)
+    return assistant_message, chat_history_ui, chat_history_api
 system_msg = gr.Textbox(BASE_SYSTEM_MESSAGE, label="System Message", placeholder="System prompt.", lines=5)
 max_tokens = gr.Slider(20, 1024, label="Max Tokens", step=20, value=1024)
 top_p = gr.Slider(0.0, 1.0, label="Top P", step=0.05, value=0.7)
 # Gradio interface setup
 with gr.Blocks() as demo:
+    chat_history_state_ui = gr.State([])
+    chat_history_state_api = gr.State([])
+    # Outros componentes da interface...
     chatbot = gr.ChatInterface(
         fn=chatbot_submit,
+        inputs=[gr.Textbox(label="Your Message"), chat_history_state_ui, chat_history_state_api, system_msg, max_tokens, temperature, top_p],
+        outputs=[gr.Text(label="Assistant Response"), chat_history_state_ui, chat_history_state_api],
+        title="Chatbot Interface"
         description="""<div style="text-align: center; font-size: 1.5em; margin-bottom: 20px;">
             <strong>Explore the Capabilities of LLAMA 2 70B</strong>
         </div>