llma_tabular_qa / app.py
jskinner215's picture
changing to add basic contextual options before analyzing data
70c712a
raw
history blame contribute delete
No virus
1.54 kB
import streamlit as st
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import pandas as pd
# Initialize the LLM from HuggingFace
tokenizer = AutoTokenizer.from_pretrained("upstage/SOLAR-0-70b-16bit")
model = AutoModelForCausalLM.from_pretrained("upstage/SOLAR-0-70b-16bit")
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
st.title("πŸ“„ Document Conversation πŸ€–")
uploaded_file = st.file_uploader("Upload a CSV file", type="csv")
if uploaded_file is not None:
df = pd.read_csv(uploaded_file)
st.write(f"Loaded CSV with {df.shape[0]} rows and {df.shape[1]} columns.")
st.write("Columns:", df.columns.tolist())
# Allow user to select columns to focus on
selected_columns = st.multiselect("Select columns to focus on:", df.columns.tolist())
if selected_columns:
st.write(df[selected_columns].head()) # Display first few rows of selected columns
# Generate a textual representation of the selected data
context = f"The selected data has columns: {', '.join(selected_columns)}. Here are the first few entries: {df[selected_columns].head().to_string(index=False)}"
# Query through LLM
question = st.text_input("Ask something about the selected data", placeholder="What is the average of ...?")
if question:
full_query = context + " " + question
response = pipe(full_query, max_length=250, do_sample=True, top_k=50)
st.write(response[0]['generated_text'])