llma_tabular_qa / app.py
jskinner215's picture
changing to add basic contextual options before analyzing data
70c712a
import streamlit as st
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import pandas as pd
# Initialize the LLM from HuggingFace
tokenizer = AutoTokenizer.from_pretrained("upstage/SOLAR-0-70b-16bit")
model = AutoModelForCausalLM.from_pretrained("upstage/SOLAR-0-70b-16bit")
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
st.title("πŸ“„ Document Conversation πŸ€–")
uploaded_file = st.file_uploader("Upload a CSV file", type="csv")
if uploaded_file is not None:
df = pd.read_csv(uploaded_file)
st.write(f"Loaded CSV with {df.shape[0]} rows and {df.shape[1]} columns.")
st.write("Columns:", df.columns.tolist())
# Allow user to select columns to focus on
selected_columns = st.multiselect("Select columns to focus on:", df.columns.tolist())
if selected_columns:
st.write(df[selected_columns].head()) # Display first few rows of selected columns
# Generate a textual representation of the selected data
context = f"The selected data has columns: {', '.join(selected_columns)}. Here are the first few entries: {df[selected_columns].head().to_string(index=False)}"
# Query through LLM
question = st.text_input("Ask something about the selected data", placeholder="What is the average of ...?")
if question:
full_query = context + " " + question
response = pipe(full_query, max_length=250, do_sample=True, top_k=50)
st.write(response[0]['generated_text'])