Spaces:
Sleeping
Sleeping
# Import packages: | |
import numpy as np | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
import re | |
# tensorflow imports: | |
import tensorflow as tf | |
from tensorflow import keras | |
from tensorflow.keras import losses | |
from tensorflow.keras import layers | |
from tensorflow.keras.layers.experimental import preprocessing | |
from tensorflow.keras.optimizers import RMSprop | |
import pickle | |
import gradio as gr | |
import yake | |
import spacy | |
from spacy import displacy | |
import streamlit as st | |
import spacy_streamlit | |
nlp = spacy.load('en_core_web_sm') | |
import torch | |
import tensorflow as tf | |
from transformers import RobertaTokenizer, RobertaModel, AutoModelForSequenceClassification, TFAutoModelForSequenceClassification | |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM | |
tokenizer = AutoTokenizer.from_pretrained("paragon-analytics/bert_resil") | |
model = AutoModelForSequenceClassification.from_pretrained("paragon-analytics/bert_resil") | |
para_tokenizer = AutoTokenizer.from_pretrained("paragon-analytics/t5_para") | |
para_model = AutoModelForSequenceClassification.from_pretrained("paragon-analytics/t5_para") | |
kw_extractor = yake.KeywordExtractor() | |
custom_kw_extractor = yake.KeywordExtractor(lan="en", n=2, dedupLim=0.2, top=10, features=None) | |
max_words = 2000 | |
max_len = 111 | |
from transformers_interpret import SequenceClassificationExplainer | |
cls_explainer = SequenceClassificationExplainer( | |
model, | |
tokenizer) | |
# load the model from disk | |
#filename = 'resil_lstm_model.sav' | |
#lmodel = pickle.load(open(filename, 'rb')) | |
# load the model from disk | |
#filename = 'tokenizer.pickle' | |
#tok = pickle.load(open(filename, 'rb')) | |
def process_final_text(text): | |
X_test = str(text).lower() | |
#l = [] | |
#l.append(X_test) | |
#test_sequences = tok.texts_to_sequences(l) | |
#test_sequences_matrix = sequence.pad_sequences(test_sequences,maxlen=max_len) | |
#lstm_prob = lmodel.predict(test_sequences_matrix.tolist()).flatten() | |
#lstm_pred = np.where(lstm_prob>=0.5,1,0) | |
encoded_input = tokenizer(X_test, return_tensors='pt') | |
output = model(**encoded_input) | |
scores = output[0][0].detach().numpy() | |
scores = tf.nn.softmax(scores) | |
# Get Keywords: | |
keywords = custom_kw_extractor.extract_keywords(X_test) | |
letter = [] | |
score = [] | |
for i in keywords: | |
if i[1]>0.4: | |
a = "+++" | |
elif (i[1]<=0.4) and (i[1]>0.1): | |
a = "++" | |
elif (i[1]<=0.1) and (i[1]>0.01): | |
a = "+" | |
else: | |
a = "NA" | |
letter.append(i[0]) | |
score.append(a) | |
keywords = [(letter[i], score[i]) for i in range(0, len(letter))] | |
# Get NER: | |
# NER: | |
doc = nlp(text) | |
sp_html = displacy.render(doc, style="ent", page=True, jupyter=False) | |
NER = ( | |
"" | |
+ sp_html | |
+ "" | |
) | |
# Transformer Interpret: | |
word_attributions = cls_explainer(X_test) | |
letter = [] | |
score = [] | |
for i in word_attributions: | |
if i[1]>0.5: | |
a = "++" | |
elif (i[1]<=0.5) and (i[1]>0.1): | |
a = "+" | |
elif (i[1]>=-0.5) and (i[1]<-0.1): | |
a = "-" | |
elif i[1]<-0.5: | |
a = "--" | |
else: | |
a = "NA" | |
letter.append(i[0]) | |
score.append(a) | |
word_attributions = [(letter[i], score[i]) for i in range(0, len(letter))] | |
# Paraphraser: | |
inp_text = "paraphrase: " + X_test + " </s>" | |
encoding = para_tokenizer.encode_plus(inp_text,pad_to_max_length=True, return_tensors="pt") | |
input_ids, attention_masks = encoding["input_ids"].to("cuda"), encoding["attention_mask"].to("cuda") | |
outputs = para_model.generate( | |
input_ids=input_ids, attention_mask=attention_masks, | |
max_length=256, | |
do_sample=True, | |
top_k=120, | |
top_p=0.95, | |
early_stopping=True, | |
num_return_sequences=5 | |
) | |
para_list = [tokenizer.decode(output, skip_special_tokens=True,clean_up_tokenization_spaces=True) for output in outputs] | |
return {"Resilience": float(scores.numpy()[1]), "Non-Resilience": float(scores.numpy()[0])},keywords,NER,word_attributions,para_list | |
def main(prob1): | |
text = str(prob1) | |
obj = process_final_text(text) | |
return obj[0],obj[1],obj[2],obj[3],obj[4] | |
title = "Welcome to **ResText** 🪐" | |
description1 = """ | |
This app takes text (up to a few sentences) and predicts to what extent the text contains resilience messaging. Resilience messaging is a text message that is about being able to a) "adapt to change” and b) “bounce back after illness or hardship". The predictive model is a fine-tuned RoBERTa NLP model. Just add your text and hit Analyze. Or, simply click on one of the examples to see how it works. ✨ | |
""" | |
with gr.Blocks(title=title) as demo: | |
gr.Markdown(f"## {title}") | |
gr.Markdown(description1) | |
gr.Markdown("""---""") | |
prob1 = gr.Textbox(label="Enter Your Text Here:",lines=2, placeholder="Type it here ...") | |
submit_btn = gr.Button("Analyze") | |
#text = gr.Textbox(label="Text:",lines=2, placeholder="Please enter text here ...") | |
#submit_btn2 = gr.Button("Analyze") | |
with gr.Column(visible=True) as output_col: | |
label = gr.Label(label = "Predicted Label") | |
impplot = gr.HighlightedText(label="Important Words", combine_adjacent=False).style( | |
color_map={"+++": "royalblue","++": "cornflowerblue", | |
"+": "lightsteelblue", "NA":"white"}) | |
NER = gr.HTML(label = 'NER:') | |
intp =gr.HighlightedText(label="Word Scores", | |
combine_adjacent=False).style(color_map={"++": "darkgreen","+": "green", | |
"--": "darkred", | |
"-": "red", "NA":"white"}) | |
paraph = gr.Textbox(label = "Paraphrased Sentences:") | |
submit_btn.click( | |
main, | |
[prob1], | |
[label,impplot,NER,intp,paraph], api_name="ResText" | |
) | |
gr.Markdown("### Click on any of the examples below to see to what extent they contain resilience messaging:") | |
gr.Examples([["Please stay at home and avoid unnecessary trips."],["Please stay at home and avoid unnecessary trips. We will survive this."],["We will survive this."],["Watch today’s news briefing with the latest updates on COVID-19 in Connecticut."],["So let's keep doing what we know works. Let's stay strong, and let's beat this virus. I know we can, and I know we can come out stronger on the other side."],["It is really wonderful how much resilience there is in human nature. Let any obstructing cause, no matter what, be removed in any way, even by death, and we fly back to first principles of hope and enjoyment."],["Resilience is accepting your new reality, even if it’s less good than the one you had before. You can fight it, you can do nothing but scream about what you’ve lost, or you can accept that and try to put together something that’s good."],["You survived all of the days you thought you couldn't, never underestimate your resilience."],["Like tiny seeds with potent power to push through tough ground and become mighty trees, we hold innate reserves of unimaginable strength. We are resilient."]], [prob1], [label,impplot,NER,intp,paraph], main, cache_examples=True) | |
demo.launch() |