import streamlit as st | |
from huggingface_hub import InferenceClient | |
# MUST SET HF_TOKEN IN STREAMLIT SETTINGS IN HUGGINGFACE REPO SECRETS | |
HF_TOKEN = st.secrets["HF_TOKEN"] | |
# INIT THE INFERENCE CLIENT WITH YOUR HF TOKEN | |
client = InferenceClient( | |
provider="hf-inference", | |
api_key=HF_TOKEN, | |
) | |
# THIS IS JUST THE streamlit TEXT INPUT WIDGET | |
user_input = st.text_input( | |
"Place your prompt here", | |
"This is a placeholder", | |
key="placeholder", | |
) | |
# THIS IS THE INFERENCE CLIENT CALL | |
completion = client.chat.completions.create( | |
model="HuggingFaceH4/zephyr-7b-beta", | |
messages=[ | |
{ | |
"role": "user", | |
"content": user_input | |
} | |
], | |
max_tokens=512, | |
) | |
# THIS IS THE RESPONSE FROM THE INFERENCE CLIENT | |
ai_response = completion.choices[0].message.content | |
# THIS IS THE STREAMLIT TEXT OUTPUT WIDGET WITH THE RESPONSE FROM THE INFERENCE CLIENT | |
st.text(ai_response) | |
### WRONG WAY TO TRY AND LOAD MODELS::: | |
# Load model directly | |
# from transformers import AutoTokenizer, AutoModelForCausalLM | |
# tokenizer = AutoTokenizer.from_pretrained("deepseek-ai/DeepSeek-Prover-V2-671B", trust_remote_code=True) | |
# model = AutoModelForCausalLM.from_pretrained("deepseek-ai/DeepSeek-Prover-V2-671B", trust_remote_code=True) |