zeroshotcat / app.py
davidefiocco's picture
Update app.py
8e532c0
from io import BytesIO
import pandas as pd
import streamlit as st
import tokenizers
import torch
from transformers import Pipeline, pipeline
st.set_page_config(
page_title="Zero-shot classification from tabular data",
page_icon=None,
layout="wide",
initial_sidebar_state="auto",
menu_items=None,
)
@st.cache(
hash_funcs={
torch.nn.parameter.Parameter: lambda _: None,
tokenizers.Tokenizer: lambda _: None,
tokenizers.AddedToken: lambda _: None,
},
allow_output_mutation=True,
show_spinner=False,
)
def load_classifier() -> Pipeline:
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
return classifier
with st.spinner(text="Setting stuff up related to the inference engine..."):
classifier = load_classifier()
st.title("Zero-shot classification from tabular data")
st.text(
"Upload an Excel table and perform zero-shot classification on a set of custom labels"
)
data = st.file_uploader(
"Upload Excel file (it should contain a column named `text` in its header):"
)
labels = st.text_input("Enter comma-separated labels:")
# classify first N snippets only for faster inference
N = 10000
if st.button("Calculate labels"):
try:
labels_list = labels.split(",")
table = pd.read_excel(data)
table = table.head(N).reset_index(drop=True)
prog_bar = st.progress(0)
preds = []
for i in range(len(table)):
preds.append(classifier(table.loc[i, "text"], labels)["labels"][0])
prog_bar.progress((i + 1) / len(table))
table["label"] = preds
st.table(table[["text", "label"]])
buf = BytesIO()
table[["text", "label"]].to_excel(buf)
st.download_button(
label="Download table", data=buf.getvalue(), file_name="output.xlsx"
)
except:
st.error(
"Something went wrong. Make sure you upload an Excel file containing a column named `text` and a set of comma-separated labels is provided"
)