import streamlit as st
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

# Define the path where the model and tokenizer are saved
save_directory = "RAG_model"


# Load the model and tokenizer from the saved directory
@st.cache(allow_output_mutation=True)
def load_model():
    model = AutoModelForCausalLM.from_pretrained(save_directory)
    tokenizer = AutoTokenizer.from_pretrained(save_directory)
    return model, tokenizer


model, tokenizer = load_model()

# Set up the text generation pipeline
query_pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    device=-1,  # Use CPU
    device_map="auto",
)

st.title("Text Generation with Llama-2 Model")
st.write("This is a simple Streamlit app to generate text using the Llama-2 model.")

# Text input for the user
user_input = st.text_area("Enter your prompt:", "")

# Generate text when the user clicks the button
if st.button("Generate"):
    if user_input:
        with st.spinner("Generating..."):
            sequences = query_pipeline(
                user_input,
                do_sample=True,
                top_k=10,
                num_return_sequences=1,
                eos_token_id=tokenizer.eos_token_id,
                max_length=200,
            )
            for seq in sequences:
                st.write("Generated text:")
                st.write(seq['generated_text'])
    else:
        st.write("Please enter a prompt to generate text.")

# Add an example usage
st.write("Example usage: Enter a prompt like 'What is Artificial Intelligence?' and click 'Generate'.")