from io import BytesIO import pandas as pd import streamlit as st import tokenizers import torch from transformers import Pipeline, pipeline st.set_page_config( page_title="Zero-shot classification from tabular data", page_icon=None, layout="wide", initial_sidebar_state="auto", menu_items=None, ) @st.cache( hash_funcs={ torch.nn.parameter.Parameter: lambda _: None, tokenizers.Tokenizer: lambda _: None, tokenizers.AddedToken: lambda _: None, }, allow_output_mutation=True, show_spinner=False, ) def load_classifier() -> Pipeline: classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli") return classifier with st.spinner(text="Setting stuff up related to the inference engine..."): classifier = load_classifier() st.title("Zero-shot classification from tabular data") st.text( "Upload an Excel table and perform zero-shot classification on a set of custom labels" ) data = st.file_uploader( "Upload Excel file (it should contain a column named `text` in its header):" ) labels = st.text_input("Enter comma-separated labels:") # classify first N snippets only for faster inference N = 10000 if st.button("Calculate labels"): try: labels_list = labels.split(",") table = pd.read_excel(data) table = table.head(N).reset_index(drop=True) prog_bar = st.progress(0) preds = [] for i in range(len(table)): preds.append(classifier(table.loc[i, "text"], labels)["labels"][0]) prog_bar.progress((i + 1) / len(table)) table["label"] = preds st.table(table[["text", "label"]]) buf = BytesIO() table[["text", "label"]].to_excel(buf) st.download_button( label="Download table", data=buf.getvalue(), file_name="output.xlsx" ) except: st.error( "Something went wrong. Make sure you upload an Excel file containing a column named `text` and a set of comma-separated labels is provided" )