Spaces:
Sleeping
Sleeping
import gradio as gr | |
import os | |
from huggingface_hub import login | |
# # api_key = os.getenv('llama3token') | |
# # login(api_key) | |
# HF_TOKEN = os.getenv('llama3token') | |
# login(HF_TOKEN) | |
# demo = gr.load("deepseek-ai/DeepSeek-R1-Distill-Llama-8B", src="models") | |
# demo.launch() | |
import streamlit as st | |
import requests | |
# Hugging Face API URL | |
# API_URL = "https://api-inference.huggingface.co/models/deepseek-ai/DeepSeek-R1-Distill-Llama-8B" # | |
# The model meta-llama/Meta-Llama-3-8B is too large to be loaded automatically (16GB > 10GB). Please use Spaces (https://huggingface.co/spaces) or Inference Endpoints (https://huggingface.co/inference-endpoints). | |
# API_URL = "https://api-inference.huggingface.co/models/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B" | |
API_URL = "https://api-inference.huggingface.co/models/meta-llama/Llama-3.2-3B-Instruct" | |
HF_TOKEN = os.getenv('hftoken') | |
# Function to query the Hugging Face API | |
def query(payload): | |
headers = {"Authorization": f"Bearer {HF_TOKEN}"} | |
response = requests.post(API_URL, headers=headers, json=payload) | |
print(response.json()) | |
return response.json() | |
# Streamlit app | |
st.title("DeepSeek-R1-Distill-Qwen-32B Chatbot") | |
# Input text box | |
user_input = st.text_input("Enter your message:") | |
if user_input: | |
# Query the Hugging Face API with the user input | |
payload = {"inputs": user_input} | |
output = query(payload) | |
# Display the output | |
if isinstance(output, list) and len(output) > 0 and 'generated_text' in output[0]: | |
st.write("Response:") | |
st.write(output[0]['generated_text']) | |
else: | |
st.write("Error: Unable to generate a response. Please try again.") | |