import streamlit as st
from transformers import pipeline

# Initialize the text generation pipeline with optimizations
pipe = pipeline(
    "text-generation",
    model="Qwen/Qwen2.5-0.5B-Instruct",
    device=-1,  # Ensure it runs on CPU
    use_fast=True,  # Use fast tokenizer
)

# Streamlit app
st.title("Qwen Model Chat")

# Text input from the user
user_input = st.text_input("Enter your message:", "Delete this and write your query?")

# Generate text when the button is clicked
if st.button("Generate"):
    messages = [{"role": "user", "content": user_input}]
    # Reduce max_new_tokens for faster generation
    output = pipe(messages, max_new_tokens=150)  # Adjust as needed for speed
    generated_text = output[0]['generated_text']
    
    # Display the generated text
    st.write("Generated Response:")
    st.write(generated_text)