jeffhaines's picture
Update app.py
51278b1
from transformers import DistilBertTokenizerFast, DistilBertForSequenceClassification
import torch
import pandas as pd
import streamlit as st
from transformers import pipeline
from transformers_interpret import SequenceClassificationExplainer
from transformers import AutoModelForSequenceClassification, AutoTokenizer
st.title('Ethical Judgment Classifier')
st.write('This app uses a pre-trained Distilbert model fine-tuned on the Commonsense and Justice datasets from the Aligning AI With Shared Human Values project (for more information, see https://github.com/hendrycks/ethics). It judges whether a given action of scenario is wrong or not wrong and uses transformers-interpret (https://pypi.org/project/transformers-interpret/) to show how the words in your scenario affected the model\'s judgment.')
loaded_model = DistilBertForSequenceClassification.from_pretrained('distilbert')
model_name = 'distilbert-base-uncased'
tokenizer = DistilBertTokenizerFast.from_pretrained(model_name)
cls_explainer = SequenceClassificationExplainer(loaded_model, tokenizer)
clf = pipeline("text-classification", model = loaded_model, tokenizer = tokenizer)
text = st.text_input('Enter a scenario or action.')
if text:
answer = clf(text)
label = 'wrong' if answer[0]['label'] == 'LABEL_0' else 'not wrong'
st.write(f'This action is {label} (confidence level {answer[0]["score"]*100:.2f}%).')
attributions = cls_explainer(text)
df = pd.DataFrame(attributions[1:-1])
df.rename(columns = {0: 'Token', 1: 'Contribution'}, inplace = True)
st.sidebar.write('This table shows how each word contributes to the model\'s judgment. Positive scores indicate that the word contributed toward the judgment while negative scores show how much the word pushed the model in the other direction.')
st.sidebar.write(df.style.hide(axis = 'index'))
st.write(cls_explainer.visualize())
st.write('Note that the model is trained on a dataset of ethical judgments that are meant to represent the intuitive judgments of Americans and as such might conflict with what you believe, the judgments of people in other cultures, or what might in fact be right.')