Nvidia-llama / app.py
fullstuckdev's picture
first commit
160e363
raw
history blame
830 Bytes
import gradio as gr
import os
import requests
API_URL = "https://api-inference.huggingface.co/models/nvidia/Llama-3.1-Nemotron-70B-Instruct-HF"
headers = {"Authorization": f"Bearer {os.getenv('HUGGINGFACE_API_KEY')}"}
async def generate_response(user_input):
payload = {
"model": "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF",
"messages": [{"role": "user", "content": user_input}],
"max_tokens": 16384,
"max_completion_tokens": 16384
}
response = requests.post(API_URL, headers=headers, json=payload)
return response.json()[0]['generated_text']
demo = gr.Interface(
fn=generate_response,
inputs=gr.Textbox(label="Your message"),
outputs=gr.Textbox(label="AI Response"),
title="AI Chat Interface",
description="Chat with Llama 3.1 Nemotron"
)
demo.launch()