|
|
import streamlit as st |
|
|
from huggingface_hub import hf_hub_download |
|
|
from llama_cpp import Llama |
|
|
import time |
|
|
|
|
|
|
|
|
st.set_page_config( |
|
|
page_title="Llama 3.2 AI Assistant", |
|
|
page_icon="🤖", |
|
|
layout="wide", |
|
|
initial_sidebar_state="expanded" |
|
|
) |
|
|
|
|
|
|
|
|
st.markdown(""" |
|
|
<style> |
|
|
/* 隐藏 Streamlit 默认的汉堡菜单和页脚 */ |
|
|
#MainMenu {visibility: hidden;} |
|
|
footer {visibility: hidden;} |
|
|
header {visibility: hidden;} |
|
|
|
|
|
/* 调整主容器的顶部 padding,让内容更紧凑 */ |
|
|
.block-container { |
|
|
padding-top: 2rem; |
|
|
padding-bottom: 2rem; |
|
|
} |
|
|
|
|
|
/* 美化侧边栏 */ |
|
|
section[data-testid="stSidebar"] { |
|
|
background-color: #f7f9fc; /* 浅灰蓝背景 */ |
|
|
} |
|
|
|
|
|
/* 自定义标题样式 */ |
|
|
.title-text { |
|
|
font-family: 'Helvetica Neue', sans-serif; |
|
|
font-weight: 700; |
|
|
font-size: 2.5rem; |
|
|
color: #1E88E5; /* 科技蓝 */ |
|
|
text-align: center; |
|
|
margin-bottom: 20px; |
|
|
} |
|
|
|
|
|
.subtitle-text { |
|
|
font-family: 'Helvetica Neue', sans-serif; |
|
|
font-weight: 400; |
|
|
font-size: 1.1rem; |
|
|
color: #666; |
|
|
text-align: center; |
|
|
margin-bottom: 40px; |
|
|
} |
|
|
</style> |
|
|
""", unsafe_allow_html=True) |
|
|
|
|
|
|
|
|
st.markdown('<div class="title-text">🤖 Llama 3.2-3B AI Assistant</div>', unsafe_allow_html=True) |
|
|
st.markdown('<div class="subtitle-text">Powered by Marcus719/Llama-3.2-3B-changedata-Lab2-GGUF</div>', unsafe_allow_html=True) |
|
|
|
|
|
|
|
|
with st.sidebar: |
|
|
st.image("https://huggingface.co/front/assets/huggingface_logo-noborder.svg", width=50) |
|
|
st.header("⚙️ 控制面板") |
|
|
|
|
|
|
|
|
temperature = st.slider("Temperature (创造性)", min_value=0.1, max_value=1.5, value=0.7, step=0.1, help="值越高,回答越随机;值越低,回答越严谨。") |
|
|
max_tokens = st.slider("Max Tokens (最大长度)", min_value=64, max_value=2048, value=512, step=64) |
|
|
|
|
|
st.divider() |
|
|
|
|
|
|
|
|
system_prompt = st.text_area( |
|
|
"系统设定 (System Prompt)", |
|
|
value="You are a helpful and polite AI assistant.", |
|
|
height=100 |
|
|
) |
|
|
|
|
|
st.divider() |
|
|
|
|
|
|
|
|
if st.button("🗑️ 清除对话历史", use_container_width=True): |
|
|
st.session_state.messages = [] |
|
|
st.rerun() |
|
|
|
|
|
st.markdown("---") |
|
|
st.markdown("Optimization: **Unsloth Q4_K_M**") |
|
|
|
|
|
|
|
|
REPO_ID = "Marcus719/Llama-3.2-3B-changedata-Lab2-GGUF" |
|
|
FILENAME = "unsloth.Q4_K_M.gguf" |
|
|
|
|
|
@st.cache_resource |
|
|
def load_model(): |
|
|
model_path = hf_hub_download(repo_id=REPO_ID, filename=FILENAME) |
|
|
llm = Llama( |
|
|
model_path=model_path, |
|
|
n_ctx=4096, |
|
|
n_threads=2, |
|
|
verbose=False |
|
|
) |
|
|
return llm |
|
|
|
|
|
try: |
|
|
if "llm" not in st.session_state: |
|
|
with st.spinner("🚀 正在启动 AI 引擎,请稍候..."): |
|
|
st.session_state.llm = load_model() |
|
|
except Exception as e: |
|
|
st.error(f"模型加载失败: {e}") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if "messages" not in st.session_state: |
|
|
st.session_state.messages = [] |
|
|
|
|
|
|
|
|
for message in st.session_state.messages: |
|
|
|
|
|
avatar = "🧑💻" if message["role"] == "user" else "🤖" |
|
|
with st.chat_message(message["role"], avatar=avatar): |
|
|
st.markdown(message["content"]) |
|
|
|
|
|
|
|
|
if prompt := st.chat_input("在此输入您的问题..."): |
|
|
|
|
|
st.session_state.messages.append({"role": "user", "content": prompt}) |
|
|
with st.chat_message("user", avatar="🧑💻"): |
|
|
st.markdown(prompt) |
|
|
|
|
|
|
|
|
with st.chat_message("assistant", avatar="🤖"): |
|
|
message_placeholder = st.empty() |
|
|
full_response = "" |
|
|
|
|
|
|
|
|
messages_payload = [{"role": "system", "content": system_prompt}] + [ |
|
|
{"role": m["role"], "content": m["content"]} |
|
|
for m in st.session_state.messages |
|
|
] |
|
|
|
|
|
stream = st.session_state.llm.create_chat_completion( |
|
|
messages=messages_payload, |
|
|
stream=True, |
|
|
max_tokens=max_tokens, |
|
|
temperature=temperature |
|
|
) |
|
|
|
|
|
for chunk in stream: |
|
|
if "content" in chunk["choices"][0]["delta"]: |
|
|
token = chunk["choices"][0]["delta"]["content"] |
|
|
full_response += token |
|
|
|
|
|
message_placeholder.markdown(full_response + "▌") |
|
|
|
|
|
message_placeholder.markdown(full_response) |
|
|
|
|
|
|
|
|
st.session_state.messages.append({"role": "assistant", "content": full_response}) |