Spaces:
Sleeping
Sleeping
import gradio as gr | |
import os | |
import requests | |
API_URL = "https://api-inference.huggingface.co/models/nvidia/Llama-3.1-Nemotron-70B-Instruct-HF" | |
headers = {"Authorization": f"Bearer {os.getenv('HUGGINGFACE_API_KEY')}"} | |
async def generate_response(user_input): | |
payload = { | |
"model": "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF", | |
"messages": [{"role": "user", "content": user_input}], | |
"max_tokens": 16384, | |
"max_completion_tokens": 16384 | |
} | |
response = requests.post(API_URL, headers=headers, json=payload) | |
return response.json()[0]['generated_text'] | |
demo = gr.Interface( | |
fn=generate_response, | |
inputs=gr.Textbox(label="Your message"), | |
outputs=gr.Textbox(label="AI Response"), | |
title="AI Chat Interface", | |
description="Chat with Llama 3.1 Nemotron" | |
) | |
demo.launch() |