import streamlit as st from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline import pandas as pd # Initialize the LLM from HuggingFace tokenizer = AutoTokenizer.from_pretrained("upstage/SOLAR-0-70b-16bit") model = AutoModelForCausalLM.from_pretrained("upstage/SOLAR-0-70b-16bit") pipe = pipeline("text-generation", model=model, tokenizer=tokenizer) st.title("📄 Document Conversation 🤖") uploaded_file = st.file_uploader("Upload a CSV file", type="csv") if uploaded_file is not None: df = pd.read_csv(uploaded_file) st.write(f"Loaded CSV with {df.shape[0]} rows and {df.shape[1]} columns.") st.write("Columns:", df.columns.tolist()) # Allow user to select columns to focus on selected_columns = st.multiselect("Select columns to focus on:", df.columns.tolist()) if selected_columns: st.write(df[selected_columns].head()) # Display first few rows of selected columns # Generate a textual representation of the selected data context = f"The selected data has columns: {', '.join(selected_columns)}. Here are the first few entries: {df[selected_columns].head().to_string(index=False)}" # Query through LLM question = st.text_input("Ask something about the selected data", placeholder="What is the average of ...?") if question: full_query = context + " " + question response = pipe(full_query, max_length=250, do_sample=True, top_k=50) st.write(response[0]['generated_text'])