import streamlit as st 
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import pandas as pd

# Initialize the LLM from HuggingFace
tokenizer = AutoTokenizer.from_pretrained("upstage/SOLAR-0-70b-16bit")
model = AutoModelForCausalLM.from_pretrained("upstage/SOLAR-0-70b-16bit")
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)

st.title("📄 Document Conversation 🤖")
uploaded_file = st.file_uploader("Upload a CSV file", type="csv")

if uploaded_file is not None:
    df = pd.read_csv(uploaded_file)
    st.write(f"Loaded CSV with {df.shape[0]} rows and {df.shape[1]} columns.")
    st.write("Columns:", df.columns.tolist())
    
    # Allow user to select columns to focus on
    selected_columns = st.multiselect("Select columns to focus on:", df.columns.tolist())
    if selected_columns:
        st.write(df[selected_columns].head())  # Display first few rows of selected columns
        
        # Generate a textual representation of the selected data
        context = f"The selected data has columns: {', '.join(selected_columns)}. Here are the first few entries: {df[selected_columns].head().to_string(index=False)}"
        
        # Query through LLM    
        question = st.text_input("Ask something about the selected data", placeholder="What is the average of ...?")    
        if question:
            full_query = context + " " + question
            response = pipe(full_query, max_length=250, do_sample=True, top_k=50)
            st.write(response[0]['generated_text'])