Spaces:
Sleeping
Sleeping
File size: 7,248 Bytes
64c5f29 ade4cec 64c5f29 6616eb4 64c5f29 6585483 ea558ed a4c7eed 6585483 64c5f29 ea558ed 64c5f29 9e1422b 64c5f29 f4295c7 64c5f29 f4295c7 64c5f29 f4295c7 64c5f29 6585483 64c5f29 4260f66 868eab7 ea558ed 4260f66 ea558ed 64c5f29 ea558ed 64c5f29 7fd5f22 64c5f29 f4295c7 64c5f29 868eab7 ea558ed 64c5f29 ea558ed 64c5f29 7fd5f22 ea558ed 64c5f29 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 |
# Import packages:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import re
# tensorflow imports:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import losses
from tensorflow.keras import layers
from tensorflow.keras.layers.experimental import preprocessing
from tensorflow.keras.optimizers import RMSprop
import pickle
import gradio as gr
import yake
import spacy
from spacy import displacy
import streamlit as st
import spacy_streamlit
nlp = spacy.load('en_core_web_sm')
import torch
import tensorflow as tf
from transformers import RobertaTokenizer, RobertaModel, AutoModelForSequenceClassification, TFAutoModelForSequenceClassification
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
tokenizer = AutoTokenizer.from_pretrained("paragon-analytics/bert_resil")
model = AutoModelForSequenceClassification.from_pretrained("paragon-analytics/bert_resil")
para_tokenizer = AutoTokenizer.from_pretrained("paragon-analytics/t5_para")
para_model = AutoModelForSequenceClassification.from_pretrained("paragon-analytics/t5_para")
kw_extractor = yake.KeywordExtractor()
custom_kw_extractor = yake.KeywordExtractor(lan="en", n=2, dedupLim=0.2, top=10, features=None)
max_words = 2000
max_len = 111
from transformers_interpret import SequenceClassificationExplainer
cls_explainer = SequenceClassificationExplainer(
model,
tokenizer)
# load the model from disk
#filename = 'resil_lstm_model.sav'
#lmodel = pickle.load(open(filename, 'rb'))
# load the model from disk
#filename = 'tokenizer.pickle'
#tok = pickle.load(open(filename, 'rb'))
def process_final_text(text):
X_test = str(text).lower()
#l = []
#l.append(X_test)
#test_sequences = tok.texts_to_sequences(l)
#test_sequences_matrix = sequence.pad_sequences(test_sequences,maxlen=max_len)
#lstm_prob = lmodel.predict(test_sequences_matrix.tolist()).flatten()
#lstm_pred = np.where(lstm_prob>=0.5,1,0)
encoded_input = tokenizer(X_test, return_tensors='pt')
output = model(**encoded_input)
scores = output[0][0].detach().numpy()
scores = tf.nn.softmax(scores)
# Get Keywords:
keywords = custom_kw_extractor.extract_keywords(X_test)
letter = []
score = []
for i in keywords:
if i[1]>0.4:
a = "+++"
elif (i[1]<=0.4) and (i[1]>0.1):
a = "++"
elif (i[1]<=0.1) and (i[1]>0.01):
a = "+"
else:
a = "NA"
letter.append(i[0])
score.append(a)
keywords = [(letter[i], score[i]) for i in range(0, len(letter))]
# Get NER:
# NER:
doc = nlp(text)
sp_html = displacy.render(doc, style="ent", page=True, jupyter=False)
NER = (
""
+ sp_html
+ ""
)
# Transformer Interpret:
word_attributions = cls_explainer(X_test)
letter = []
score = []
for i in word_attributions:
if i[1]>0.5:
a = "++"
elif (i[1]<=0.5) and (i[1]>0.1):
a = "+"
elif (i[1]>=-0.5) and (i[1]<-0.1):
a = "-"
elif i[1]<-0.5:
a = "--"
else:
a = "NA"
letter.append(i[0])
score.append(a)
word_attributions = [(letter[i], score[i]) for i in range(0, len(letter))]
# Paraphraser:
inp_text = "paraphrase: " + X_test + " </s>"
encoding = para_tokenizer.encode_plus(inp_text,pad_to_max_length=True, return_tensors="pt")
input_ids, attention_masks = encoding["input_ids"].to("cuda"), encoding["attention_mask"].to("cuda")
outputs = para_model.generate(
input_ids=input_ids, attention_mask=attention_masks,
max_length=256,
do_sample=True,
top_k=120,
top_p=0.95,
early_stopping=True,
num_return_sequences=5
)
para_list = [tokenizer.decode(output, skip_special_tokens=True,clean_up_tokenization_spaces=True) for output in outputs]
return {"Resilience": float(scores.numpy()[1]), "Non-Resilience": float(scores.numpy()[0])},keywords,NER,word_attributions,para_list
def main(prob1):
text = str(prob1)
obj = process_final_text(text)
return obj[0],obj[1],obj[2],obj[3],obj[4]
title = "Welcome to **ResText** 🪐"
description1 = """
This app takes text (up to a few sentences) and predicts to what extent the text contains resilience messaging. Resilience messaging is a text message that is about being able to a) "adapt to change” and b) “bounce back after illness or hardship". The predictive model is a fine-tuned RoBERTa NLP model. Just add your text and hit Analyze. Or, simply click on one of the examples to see how it works. ✨
"""
with gr.Blocks(title=title) as demo:
gr.Markdown(f"## {title}")
gr.Markdown(description1)
gr.Markdown("""---""")
prob1 = gr.Textbox(label="Enter Your Text Here:",lines=2, placeholder="Type it here ...")
submit_btn = gr.Button("Analyze")
#text = gr.Textbox(label="Text:",lines=2, placeholder="Please enter text here ...")
#submit_btn2 = gr.Button("Analyze")
with gr.Column(visible=True) as output_col:
label = gr.Label(label = "Predicted Label")
impplot = gr.HighlightedText(label="Important Words", combine_adjacent=False).style(
color_map={"+++": "royalblue","++": "cornflowerblue",
"+": "lightsteelblue", "NA":"white"})
NER = gr.HTML(label = 'NER:')
intp =gr.HighlightedText(label="Word Scores",
combine_adjacent=False).style(color_map={"++": "darkgreen","+": "green",
"--": "darkred",
"-": "red", "NA":"white"})
paraph = gr.Textbox(label = "Paraphrased Sentences:")
submit_btn.click(
main,
[prob1],
[label,impplot,NER,intp,paraph], api_name="ResText"
)
gr.Markdown("### Click on any of the examples below to see to what extent they contain resilience messaging:")
gr.Examples([["Please stay at home and avoid unnecessary trips."],["Please stay at home and avoid unnecessary trips. We will survive this."],["We will survive this."],["Watch today’s news briefing with the latest updates on COVID-19 in Connecticut."],["So let's keep doing what we know works. Let's stay strong, and let's beat this virus. I know we can, and I know we can come out stronger on the other side."],["It is really wonderful how much resilience there is in human nature. Let any obstructing cause, no matter what, be removed in any way, even by death, and we fly back to first principles of hope and enjoyment."],["Resilience is accepting your new reality, even if it’s less good than the one you had before. You can fight it, you can do nothing but scream about what you’ve lost, or you can accept that and try to put together something that’s good."],["You survived all of the days you thought you couldn't, never underestimate your resilience."],["Like tiny seeds with potent power to push through tough ground and become mighty trees, we hold innate reserves of unimaginable strength. We are resilient."]], [prob1], [label,impplot,NER,intp,paraph], main, cache_examples=True)
demo.launch() |