import streamlit as st from transformers import AutoTokenizer, LlamaForCausalLM import torch # Title of the app st.title("LLaMA 2 Chatbot") # Load the LLaMA model and tokenizer from Hugging Face @st.cache_resource def load_model_and_tokenizer(): # Load the model and tokenizer tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf") model = LlamaForCausalLM.from_pretrained("meta-llama/Llama-2-7b-hf") return tokenizer, model # Function to generate text based on a prompt def generate_text(prompt, tokenizer, model): inputs = tokenizer(prompt, return_tensors="pt") # Generate text with torch.no_grad(): generate_ids = model.generate(inputs.input_ids, max_length=50) return tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0] # Input field for user prompt user_input = st.text_input("Enter your prompt:", "Hey, are you conscious? Can you talk to me?") # Load model and tokenizer tokenizer, model = load_model_and_tokenizer() # Generate response when user enters a prompt if st.button("Generate Response"): with st.spinner("Generating response..."): response = generate_text(user_input, tokenizer, model) st.write(f"Response: {response}")