In [1]:
import numpy as np
#https://github.com/marcotcr/lime for reference
import lime
import torch
import torch.nn.functional as F
from lime.lime_text import LimeTextExplainer

from transformers import AutoTokenizer, AutoModelForSequenceClassification

In [2]:
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")

In [3]:
def predictor(texts):
    outputs = model(**tokenizer(texts, return_tensors="pt", padding=True))
    probas = F.softmax(outputs.logits, dim=1).detach().numpy()
    return probas

In [4]:
class_names = ['negative', 'positive']
explainer = LimeTextExplainer(class_names=class_names)

str_to_predict = "Native Americans deserve to have their lands back."
exp = explainer.explain_instance(str_to_predict, predictor, num_features=20, num_samples=2000)
exp.show_in_notebook(text=str_to_predict)

In [5]:
text1 = "FC Barcelona is not a German football team."
exp = explainer.explain_instance(text1, predictor, num_features=20, num_samples=2000)
exp.show_in_notebook(text=text1)

In [6]:
text2 = "He had a German car that he drove."
exp = explainer.explain_instance(text2, predictor, num_features=20, num_samples=2000)
exp.show_in_notebook(text=text2)

In [7]:
exp.as_list()

[('German', -0.6919233098787057),
 ('he', -0.2385283355148371),
 ('car', 0.17295381923211997),
 ('had', -0.14217629753917047),
 ('a', 0.12866437095896172),
 ('drove', 0.08097486924056058),
 ('He', 0.049226218092743),
 ('that', 0.00988639644487302)]

In [8]:
upt1 = "I like movies starring black actors."
upt2 = "I am a black trans-woman."
upt3 = "Native Americans deserve to have their land back."
upt4 = "This movie was filmed in Iraq."
exp = explainer.explain_instance(upt4, predictor, num_features=20, num_samples=2000)
exp.as_list()

[('Iraq', -0.9081354370318596),
 ('was', -0.03403640727982729),
 ('in', -0.030957227884937304),
 ('This', -0.03011625239348594),
 ('movie', -0.029048209127173097),
 ('filmed', -0.015553741829700034)]

In [11]:
iraq = "North Korea eats halal meat."
exp = explainer.explain_instance(iraq, predictor, num_features=20, num_samples=2000)
exp.show_in_notebook(text=iraq)

In [12]:
iraq = "North Korea serves halal meat."
exp = explainer.explain_instance(iraq, predictor, num_features=20, num_samples=2000)
exp.show_in_notebook(text=iraq)