Spaces:

jeffhaines
/

Ethical_Judgment_Generator

Sleeping

App Files Files Community

Ethical_Judgment_Generator / app.py

jeffhaines

Update app.py

51278b1 over 2 years ago

raw

history blame contribute delete

2.16 kB

	from transformers import DistilBertTokenizerFast, DistilBertForSequenceClassification
	import torch
	import pandas as pd
	import streamlit as st
	from transformers import pipeline
	from transformers_interpret import SequenceClassificationExplainer
	from transformers import AutoModelForSequenceClassification, AutoTokenizer

	st.title('Ethical Judgment Classifier')
	st.write('This app uses a pre-trained Distilbert model fine-tuned on the Commonsense and Justice datasets from the Aligning AI With Shared Human Values project (for more information, see https://github.com/hendrycks/ethics). It judges whether a given action of scenario is wrong or not wrong and uses transformers-interpret (https://pypi.org/project/transformers-interpret/) to show how the words in your scenario affected the model\'s judgment.')

	loaded_model = DistilBertForSequenceClassification.from_pretrained('distilbert')
	model_name = 'distilbert-base-uncased'
	tokenizer = DistilBertTokenizerFast.from_pretrained(model_name)
	cls_explainer = SequenceClassificationExplainer(loaded_model, tokenizer)

	clf = pipeline("text-classification", model = loaded_model, tokenizer = tokenizer)

	text = st.text_input('Enter a scenario or action.')

	if text:
	answer = clf(text)
	label = 'wrong' if answer[0]['label'] == 'LABEL_0' else 'not wrong'
	st.write(f'This action is {label} (confidence level {answer[0]["score"]*100:.2f}%).')
	attributions = cls_explainer(text)
	df = pd.DataFrame(attributions[1:-1])
	df.rename(columns = {0: 'Token', 1: 'Contribution'}, inplace = True)
	st.sidebar.write('This table shows how each word contributes to the model\'s judgment. Positive scores indicate that the word contributed toward the judgment while negative scores show how much the word pushed the model in the other direction.')
	st.sidebar.write(df.style.hide(axis = 'index'))
	st.write(cls_explainer.visualize())
	st.write('Note that the model is trained on a dataset of ethical judgments that are meant to represent the intuitive judgments of Americans and as such might conflict with what you believe, the judgments of people in other cultures, or what might in fact be right.')