Spaces:

eyad-silx
/

Quasar

Runtime error

Quasar / oldapp.py

Eiad Gomaa

new model

5ab0078 9 months ago

3.62 kB

	import streamlit as st
	from transformers import AutoTokenizer, AutoModelForCausalLM
	import torch

	@st.cache_resource
	def load_model():
	"""Load model and tokenizer with caching"""
	try:
	tokenizer = AutoTokenizer.from_pretrained("NousResearch/Llama-3.2-1B")
	model = AutoModelForCausalLM.from_pretrained("NousResearch/Llama-3.2-1B")

	# Set up padding token
	if tokenizer.pad_token is None:
	tokenizer.pad_token = tokenizer.eos_token
	model.config.pad_token_id = model.config.eos_token_id

	return model, tokenizer
	except Exception as e:
	st.error(f"Error loading model: {str(e)}")
	return None, None

	# Page config
	st.set_page_config(page_title="Chat with Quasar-32B", layout="wide")
	st.title("Chat with Quasar-32B")

	# Initialize session state for chat history
	if 'messages' not in st.session_state:
	st.session_state.messages = []

	# Load model and tokenizer
	model, tokenizer = load_model()

	# Chat interface
	def generate_response(prompt):
	"""Generate response from the model"""
	try:
	# Prepare the input
	inputs = tokenizer(
	prompt,
	return_tensors="pt",
	padding=True,
	truncation=True,
	max_length=512 # Add max length for input
	)

	# Generate response
	with torch.no_grad():
	outputs = model.generate(
	inputs["input_ids"],
	max_length=200,
	num_return_sequences=1,
	temperature=0.7,
	pad_token_id=tokenizer.pad_token_id,
	attention_mask=inputs["attention_mask"] # Add attention mask
	)

	# Decode and return the response
	response = tokenizer.decode(outputs[0], skip_special_tokens=True)
	return response.replace(prompt, "").strip() # Remove the input prompt from response
	except Exception as e:
	return f"Error generating response: {str(e)}"

	# Chat interface
	st.write("### Chat")
	chat_container = st.container()

	# Display chat history
	with chat_container:
	for message in st.session_state.messages:
	with st.chat_message(message["role"]):
	st.write(message["content"])

	# User input
	if prompt := st.chat_input("Type your message here"):
	# Add user message to chat history
	st.session_state.messages.append({"role": "user", "content": prompt})

	# Display user message
	with chat_container:
	with st.chat_message("user"):
	st.write(prompt)

	# Generate and display assistant response
	if model and tokenizer:
	with st.chat_message("assistant"):
	with st.spinner("Thinking..."):
	response = generate_response(prompt)
	st.write(response)
	st.session_state.messages.append({"role": "assistant", "content": response})
	else:
	st.error("Model failed to load. Please check your configuration.")

	# Add a button to clear chat history
	if st.button("Clear Chat History"):
	st.session_state.messages = []
	st.experimental_rerun()

	# Display system information
	with st.sidebar:
	st.write("### System Information")
	st.write("Model: Quasar-32B")
	st.write("Status: Running" if model and tokenizer else "Status: Not loaded")

	# Add some helpful instructions
	st.write("### Instructions")
	st.write("1. Type your message in the chat input")
	st.write("2. Press Enter or click Send")
	st.write("3. Wait for the AI to respond")
	st.write("4. Use 'Clear Chat History' to start fresh")