PR-ENT_Dashboard / pages /Actor_Target.py
PR ENT
Push dashboard
88d8172
raw history blame
No virus
3.92 kB
import sys
import os
import streamlit as st
from PIL import Image
import pandas as pd
from transformers import pipeline
import spacy
import en_core_web_lg
current = os.path.dirname(os.path.realpath(__file__))
parent = os.path.dirname(current)
sys.path.append(parent)
from helpers import display_nli_pr_results_as_list, prompt_to_nli, get_who_what_whom_qa
@st.cache(allow_output_mutation=True)
def load_spacy_pipeline():
return en_core_web_lg.load()
def choose_text_menu(text):
if 'text' not in st.session_state:
st.session_state.text = 'Several demonstrators were injured.'
text = st.text_area('Event description', st.session_state.text)
return text
# # Load Models in cache
@st.cache(allow_output_mutation=True)
def load_model_prompting():
return pipeline("fill-mask", model="distilbert-base-uncased")
@st.cache(allow_output_mutation=True)
def load_model_nli():
return pipeline(task="sentiment-analysis", model="roberta-large-mnli")
@st.cache(allow_output_mutation=True)
def load_model_qa():
model_name = "deepset/roberta-base-squad2"
model = pipeline(model=model_name, tokenizer=model_name, task="question-answering")
return model
nlp = load_spacy_pipeline()
### App START
st.markdown("""# Rethinking the Event Coding Pipeline with Prompt Entailment
## Author: Anonymized for submission""")
st.markdown("### 1. Actor-target coding (experimental):")
@st.cache()
def load_qa_image():
return Image.open('pipeline_qa.png')
st.image(load_qa_image(),caption="""Actor-target Coding Flow. First we get the entailed answer candidates through the PR-ENT pipeline.
Then we construct questions based on these tokens to extract actors and targets, 2 questions per verb.
Finally, we pass these questions and event description to a pre-trained extractive question answering model and fill a table of [Actor, Action, Target].""")
st.markdown("""
Here we use an extractive question answering model to find the actor and target of an event.
As this is still in experimental phase, there are some limitations:
- The only template possible is `People were [Z].`, this allows us to get a verb to construct the two questions:
- `Who was [Z]?` to find the target.
- `Who [Z] people?` to find the actor.
- `top_k = 10` and `entailment_threshold = 0.5`.
The results of the QA are given along the confidence score of the model in brackets `[xx.x%]`
""")
## Load Models
model_nli = load_model_nli()
model_prompting = load_model_prompting()
model_qa = load_model_qa()
st.markdown("""
### 2. Write an event description:
The first step is to write an event description that will be fed to the pipeline. This can be any text in English.
""")
text = choose_text_menu('')
st.session_state.text = text
st.markdown("""
### 3. Run actor-target coding:
""")
if "old_text_qa" not in st.session_state:
st.session_state.old_text_qa =st.session_state.text
qa_button = st.button("Run actor-target coding")
if qa_button:
computation_state_qa = st.text("Computation Running.")
st.session_state.old_text_qa =st.session_state.text
prompt = "People were {}."
results = prompt_to_nli(text, prompt, model_prompting, model_nli, nlp, 10, 0.5, True)
list_results = [x[0][0] + ' ' + str(int(x[1][1]*100)) + '%' for x in results]
st.session_state.list_results_prompt_qa = list_results
list_tokens = [x[0][0] for x in results]
who_what_whom = get_who_what_whom_qa(text, list_tokens, model_qa)
st.session_state.who_what_whom = who_what_whom
computation_state_qa.text("Computation Done.")
if 'who_what_whom' in st.session_state:
st.write('**Event Description**: {}'.format(st.session_state.old_text_qa))
st.write('**Template**: "{}"; **Top K**: {}; **Entailment Threshold**: {}.'.format("People were [Z]",10, 0.5))
display_nli_pr_results_as_list('', st.session_state.list_results_prompt_qa)
st.write(pd.DataFrame(st.session_state.who_what_whom))