Quasar / oldapp.py
Eiad Gomaa
new model
5ab0078
import streamlit as st
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
@st.cache_resource
def load_model():
"""Load model and tokenizer with caching"""
try:
tokenizer = AutoTokenizer.from_pretrained("NousResearch/Llama-3.2-1B")
model = AutoModelForCausalLM.from_pretrained("NousResearch/Llama-3.2-1B")
# Set up padding token
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
model.config.pad_token_id = model.config.eos_token_id
return model, tokenizer
except Exception as e:
st.error(f"Error loading model: {str(e)}")
return None, None
# Page config
st.set_page_config(page_title="Chat with Quasar-32B", layout="wide")
st.title("Chat with Quasar-32B")
# Initialize session state for chat history
if 'messages' not in st.session_state:
st.session_state.messages = []
# Load model and tokenizer
model, tokenizer = load_model()
# Chat interface
def generate_response(prompt):
"""Generate response from the model"""
try:
# Prepare the input
inputs = tokenizer(
prompt,
return_tensors="pt",
padding=True,
truncation=True,
max_length=512 # Add max length for input
)
# Generate response
with torch.no_grad():
outputs = model.generate(
inputs["input_ids"],
max_length=200,
num_return_sequences=1,
temperature=0.7,
pad_token_id=tokenizer.pad_token_id,
attention_mask=inputs["attention_mask"] # Add attention mask
)
# Decode and return the response
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
return response.replace(prompt, "").strip() # Remove the input prompt from response
except Exception as e:
return f"Error generating response: {str(e)}"
# Chat interface
st.write("### Chat")
chat_container = st.container()
# Display chat history
with chat_container:
for message in st.session_state.messages:
with st.chat_message(message["role"]):
st.write(message["content"])
# User input
if prompt := st.chat_input("Type your message here"):
# Add user message to chat history
st.session_state.messages.append({"role": "user", "content": prompt})
# Display user message
with chat_container:
with st.chat_message("user"):
st.write(prompt)
# Generate and display assistant response
if model and tokenizer:
with st.chat_message("assistant"):
with st.spinner("Thinking..."):
response = generate_response(prompt)
st.write(response)
st.session_state.messages.append({"role": "assistant", "content": response})
else:
st.error("Model failed to load. Please check your configuration.")
# Add a button to clear chat history
if st.button("Clear Chat History"):
st.session_state.messages = []
st.experimental_rerun()
# Display system information
with st.sidebar:
st.write("### System Information")
st.write("Model: Quasar-32B")
st.write("Status: Running" if model and tokenizer else "Status: Not loaded")
# Add some helpful instructions
st.write("### Instructions")
st.write("1. Type your message in the chat input")
st.write("2. Press Enter or click Send")
st.write("3. Wait for the AI to respond")
st.write("4. Use 'Clear Chat History' to start fresh")