|
import os |
|
import requests |
|
import gradio as gr |
|
|
|
|
|
hf_token = os.getenv("hf_token") |
|
api_url = "https://api-inference.huggingface.co/models/antony-pk/llama-3-8b-Instruct-bnb-4bit-e10-emp-gold-jul16" |
|
|
|
def query_huggingface_api(prompt): |
|
headers = {"Authorization": f"Bearer {hf_token}"} |
|
payload = {"inputs": prompt} |
|
response = requests.post(api_url, headers=headers, json=payload) |
|
return response.json() |
|
|
|
def generate_response(prompt): |
|
response = query_huggingface_api(prompt) |
|
return response.get("generated_text", "No response received") |
|
|
|
|
|
interface = gr.Interface( |
|
fn=generate_response, |
|
inputs="text", |
|
outputs="text", |
|
title="LLama Model Interaction", |
|
description="Enter a prompt to receive a response from the private LLama model." |
|
) |
|
|
|
|
|
interface.launch() |
|
|