Spaces:
Sleeping
Sleeping
| from io import BytesIO | |
| import pandas as pd | |
| import streamlit as st | |
| import tokenizers | |
| import torch | |
| from transformers import Pipeline, pipeline | |
| st.set_page_config( | |
| page_title="Zero-shot classification from tabular data", | |
| page_icon=None, | |
| layout="wide", | |
| initial_sidebar_state="auto", | |
| menu_items=None, | |
| ) | |
| def load_classifier() -> Pipeline: | |
| classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli") | |
| return classifier | |
| with st.spinner(text="Setting stuff up related to the inference engine..."): | |
| classifier = load_classifier() | |
| st.title("Zero-shot classification from tabular data") | |
| st.text( | |
| "Upload an Excel table and perform zero-shot classification on a set of custom labels" | |
| ) | |
| data = st.file_uploader( | |
| "Upload Excel file (it should contain a column named `text` in its header):" | |
| ) | |
| labels = st.text_input("Enter comma-separated labels:") | |
| # classify first N snippets only for faster inference | |
| N = 10000 | |
| if st.button("Calculate labels"): | |
| try: | |
| labels_list = labels.split(",") | |
| table = pd.read_excel(data) | |
| table = table.head(N).reset_index(drop=True) | |
| prog_bar = st.progress(0) | |
| preds = [] | |
| for i in range(len(table)): | |
| preds.append(classifier(table.loc[i, "text"], labels)["labels"][0]) | |
| prog_bar.progress((i + 1) / len(table)) | |
| table["label"] = preds | |
| st.table(table[["text", "label"]]) | |
| buf = BytesIO() | |
| table[["text", "label"]].to_excel(buf) | |
| st.download_button( | |
| label="Download table", data=buf.getvalue(), file_name="output.xlsx" | |
| ) | |
| except: | |
| st.error( | |
| "Something went wrong. Make sure you upload an Excel file containing a column named `text` and a set of comma-separated labels is provided" | |
| ) | |