Spaces:
Running
Running
import streamlit as st | |
from huggingface_hub import hf_hub_download | |
from llama_cpp import Llama | |
# Hugging Face model repo details | |
HF_MODEL_REPO = "anush76/unsloth-model" | |
MODEL_FILENAME = "unsloth.Q4_K_M.gguf" | |
# Download model from Hugging Face Hub | |
st.sidebar.write("π₯ Downloading model from Hugging Face...") | |
model_path = hf_hub_download(repo_id=HF_MODEL_REPO, filename=MODEL_FILENAME) | |
# Load the model with llama-cpp-python | |
st.sidebar.write("π Loading model...") | |
llm = Llama(model_path=model_path, n_threads=8, n_batch=512, n_gpu_layers=20) | |
# Streamlit UI | |
st.title("π¦₯ Unsloth Chatbot") | |
st.write("π¬ Ask me anything!") | |
user_input = st.text_input("You:") | |
if user_input: | |
response = llm.create_completion( | |
prompt=f"Answer in a clear paragraph format:\n\n{user_input}", | |
max_tokens=300, # Ensures a complete response | |
temperature=0.6, | |
top_p=0.9, | |
stream=False # Disables word-by-word output | |
) | |
full_response = response["choices"][0]["text"].strip() | |
# Format response into a paragraph | |
st.write("π€ Chatbot:\n\n", full_response) | |