DexterSptizu's picture
Create app.py
5d000a0 verified
raw
history blame
3.49 kB
import streamlit as st
import pandas as pd
from io import StringIO
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
# Predefined example CSV content
EXAMPLE_CSV_CONTENT = """
"Loss","Date","Score","Opponent","Record","Attendance"
"Hampton (14–12)","September 25","8–7","Padres","67–84","31,193"
"Speier (5–3)","September 26","3–1","Padres","67–85","30,711"
"Elarton (4–9)","September 22","3–1","@ Expos","65–83","9,707"
"Lundquist (0–1)","September 24","15–11","Padres","67–83","30,774"
"Hampton (13–11)","September 6","9–5","Dodgers","61–78","31,407"
"""
# Load the model and tokenizer
@st.cache_resource
def load_model_and_tokenizer():
model_name = "tablegpt/TableGPT2-7B"
model = AutoModelForCausalLM.from_pretrained(
model_name, torch_dtype="auto", device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained(model_name)
return model, tokenizer
model, tokenizer = load_model_and_tokenizer()
# Application UI
st.title("Table Question Answering App")
st.write(
"""
This app uses a language model to answer questions about tabular data.
You can upload your own CSV file or use a predefined example to test it.
"""
)
# Sidebar for input options
st.sidebar.header("Input Options")
data_source = st.sidebar.radio("Choose a data source:", ("Example CSV", "Upload CSV"))
if data_source == "Example CSV":
st.subheader("Using Example CSV Data")
csv_file = StringIO(EXAMPLE_CSV_CONTENT)
df = pd.read_csv(csv_file)
else:
st.subheader("Upload Your CSV File")
uploaded_file = st.file_uploader("Upload a CSV file", type=["csv"])
if uploaded_file is not None:
df = pd.read_csv(uploaded_file)
else:
st.warning("Please upload a CSV file to proceed.")
st.stop()
# Display the loaded dataframe
st.write("### Data Preview")
st.dataframe(df)
# Question Input
st.write("### Ask a Question")
question = st.text_input("Enter your question:", "ε“ͺδΊ›ζ―”θ΅›ηš„ζˆ˜η»©θΎΎεˆ°δΊ†40θƒœ40负?")
# Generate response if question is provided
if question:
example_prompt_template = """Given access to several pandas dataframes, write the Python code to answer the user's question.
/*
"{var_name}.head(5).to_string(index=False)" as follows:
{df_info}
*/
Question: {user_question}
"""
prompt = example_prompt_template.format(
var_name="df",
df_info=df.head(5).to_string(index=False),
user_question=question,
)
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt},
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
with st.spinner("Generating response..."):
generated_ids = model.generate(**model_inputs, max_new_tokens=512)
generated_ids = [
output_ids[len(input_ids) :]
for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
# Display response
st.write("### Model Response")
st.text_area("Response", response, height=200)
# Footer
st.sidebar.info(
"""
This app demonstrates the use of a language model for tabular data understanding.
Powered by [Hugging Face Transformers](https://huggingface.co/).
"""
)