Spaces:

butterswords
/

nlc-explorer

Running

App Files Files Community

nlc-explorer / viznlc-app.py

Nathan Butters

Add all files

03287bc about 2 years ago

raw

history blame

No virus

6.52 kB

	#Import the libraries we know we'll need for the Generator.
	import pandas as pd, spacy, nltk, numpy as np
	from spacy.matcher import Matcher
	#!python -m spacy download en_core_web_md #Not sure if we need this so I'm going to keep it just in case
	nlp = spacy.load("en_core_web_lg")
	lemmatizer = nlp.get_pipe("lemmatizer")

	#Import the libraries to support the model and predictions.
	from transformers import AutoTokenizer, AutoModelForSequenceClassification, TextClassificationPipeline
	import lime
	import torch
	import torch.nn.functional as F
	from lime.lime_text import LimeTextExplainer

	#Import the libraries for human interaction and visualization.
	import altair as alt
	import streamlit as st
	from annotated_text import annotated_text as ant

	#Import functions needed to build dataframes of keywords from WordNet
	from WNgen import *
	from NLselector import *

	@st.experimental_singleton
	def set_up_explainer():
	class_names = ['negative', 'positive']
	explainer = LimeTextExplainer(class_names=class_names)
	return explainer

	@st.experimental_singleton
	def prepare_model():
	tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
	model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
	pipe = TextClassificationPipeline(model=model, tokenizer=tokenizer, return_all_scores=True)
	return tokenizer, model, pipe

	@st.experimental_singleton
	def prepare_lists():
	countries = pd.read_csv("Assets/Countries/combined-countries.csv")
	professions = pd.read_csv("Assets/Professions/soc-professions-2018.csv")
	word_lists = [list(countries.Words),list(professions.Words)]
	return countries, professions, word_lists

	#Provide all the functions necessary to run the app
	#get definitions for control flow in Streamlit
	def get_def(word, POS=False):
	pos_options = ['NOUN','VERB','ADJ','ADV']
	m_word = word.replace(" ", "_")
	if POS in pos_options:
	seed_definitions = [syn.definition() for syn in wordnet.synsets(m_word, pos=getattr(wordnet, POS))]
	else:
	seed_definitions = [syn.definition() for syn in wordnet.synsets(m_word)]
	seed_definition = col1.selectbox("Which definition is most relevant?", seed_definitions, key= "WN_definition")
	if col1.button("Choose Definition"):
	col1.write("You've chosen a definition.")
	st.session_state.definition = seed_definition
	return seed_definition
	else:
	col1.write("Please choose a definition.")

	###Start coding the actual app###
	st.set_page_config(layout="wide", page_title="VizNLC Generator Test")
	st.title('VizNLC Generator Test')
	st.write('This is a test of the pipeline Nathan built to generate counterfactuals for the STP-3 research project. Here we test the Nathan\'s elaboration for comparing the Natural Language Explanation and a visual display against the original input from a person.')

	#Prepare the model
	tokenizer, model, pipe = prepare_model()
	countries, professions, word_lists = prepare_lists()
	explainer = set_up_explainer()
	text2 = ""
	text3 = ""
	cf_df = pd.DataFrame()
	if 'definition' not in st.session_state:
	st.session_state.definition = None
	if 'option' not in st.session_state:
	st.session_state.option = None
	proceed = False
	#Get the user to input a sentence
	st.write('This first iteration only allows you to evaluate countries.')

	col1, col2, col3 = st.columns(3)
	with col1:
	text = st.text_input('Provide a sentence you want to evaluate.', placeholder = "I like you. I love you.", key="input")

	#Use spaCy to make the sentence into a doc so we can do NLP.
	doc = nlp(st.session_state.input)
	#Evaluate the provided sentence for sentiment and probability.
	if st.session_state.input != "":
	probability, sentiment = eval_pred(text, return_all=True)
	options, lime = critical_words(st.session_state.input,options=True)
	nat_lang_explanation = construct_nlexp(text,sentiment,probability)
	st.altair_chart(lime_viz(lime))

	#Allow the user to pick an option to generate counterfactuals from.
	option = st.radio('Which word would you like to use to generate alternatives?', options, key = "option")
	if (any(option in sublist for sublist in word_lists)):
	st.write(f'You selected {option}. It matches a list.')
	elif option:
	st.write(f'You selected {option}. It does not match a list.')
	definition = get_def(option)
	else:
	st.write('Awaiting your selection.')

	if st.button('Generate Alternatives'):
	if option in list(countries.Words):
	cf_df = gen_cf_country(countries, doc, option)
	col1.write('Alternatives created.')
	elif option in list(professions.Words):
	cf_df = gen_cf_country(professions, doc, option)
	col1.write('Alternatives created.')
	else:
	ant("Generating alternatives for",(option,"opt","#E0FBFB"), "with a definition of: ",(st.session_state.definition,"def","#E0FBFB"),".")
	cf_df = cf_from_wordnet_df(option,text,seed_definition=st.session_state.definition)
	col1.write('Alternatives created.')


	if len(cf_df) != 0:
	text2, text3 = get_min_max(cf_df, option)

	with col2:
	if text2 != "":
	sim2 = cf_df.loc[cf_df['text'] == text2, 'similarity'].iloc[0]
	st.write(f"This alternate example is similar to {option}.")
	st.write(f" Similarity Score: {np.round(sim2, 2)}, Num Checked: {len(cf_df)}") #for QA purposes
	st.write(text2)
	exp2 = explainer.explain_instance(text2, predictor, num_features=15, num_samples=2000)
	lime_results2 = exp2.as_list()
	probability2, sentiment2 = eval_pred(text2, return_all=True)
	nat_lang_explanation = construct_nlexp(text2,sentiment2,probability2)
	st.altair_chart(lime_viz(lime_results2))

	with col3:
	if not cf_df.empty:
	single_nearest = alt.selection_single(on='mouseover', nearest=True)
	full = alt.Chart(cf_df).encode(
	alt.X('similarity:Q', scale=alt.Scale(zero=False)),
	alt.Y('pred:Q'),
	color=alt.Color('Categories:N', legend=alt.Legend(title="Color of Categories")),
	size=alt.Size('seed:O'),
	tooltip=('Categories','text','pred')
	).mark_circle(opacity=.5).properties(width=450, height=450).add_selection(single_nearest)
	st.altair_chart(full)