import streamlit as st from transformers import pipeline import torch # Set device device = 0 if torch.cuda.is_available() else -1 # Initialize the pipeline with reduced precision @st.cache_resource def load_pipeline(): return pipeline( "text-generation", model="Irina-Igmm/llama2-Immo", torch_dtype=torch.float16, # Use float16 for reduced memory usage device=device, # Use GPU if available ) pipe = load_pipeline() st.title("Demande informations avec Llama2-Immo") # User input user_input = st.text_area("Entrer votre invite:", "") if st.button("Generate"): if user_input: # Process the input with st.spinner("Generating..."): result = pipe(user_input, max_length=500) # Adjust max_length as needed # Display the output st.subheader("Informations demandées") st.write(result[0]["generated_text"]) else: st.warning("S'il vous plaît, entrez un prompt.") # Clean up torch.cuda.empty_cache()