Spaces:

clef
/

PRENT-Demo

Running

File size: 3,805 Bytes

8504fa5

import sys
import os

import streamlit as st

from PIL import Image
import pandas as pd

from transformers import pipeline
import spacy
import en_core_web_lg

current = os.path.dirname(os.path.realpath(__file__))
parent = os.path.dirname(current)
sys.path.append(parent)
from helpers import display_nli_pr_results_as_list, prompt_to_nli, get_who_what_whom_qa

@st.cache(allow_output_mutation=True)
def load_spacy_pipeline():
    return en_core_web_lg.load()

def choose_text_menu(text):
    if 'text' not in st.session_state:
        st.session_state.text = 'Several demonstrators were injured.'
    text = st.text_area('Event description', st.session_state.text)

    return text

# # Load Models in cache
@st.cache(allow_output_mutation=True)
def load_model_prompting():
    return pipeline("fill-mask", model="distilbert-base-uncased")

@st.cache(allow_output_mutation=True)
def load_model_nli():
    return pipeline(task="sentiment-analysis", model="roberta-large-mnli")

@st.cache(allow_output_mutation=True)
def load_model_qa():
    model_name = "deepset/roberta-base-squad2"
    model = pipeline(model=model_name, tokenizer=model_name, task="question-answering")
    return model

nlp = load_spacy_pipeline()

### App START
st.markdown("### 1. Actor-target coding (experimental):")
@st.cache()
def load_qa_image():
    return Image.open('pipeline_qa.png')
st.image(load_qa_image(),caption="""Actor-target Coding Flow. First we get the entailed answer candidates through the PR-ENT pipeline. 
Then we construct questions based on these tokens to extract actors and targets, 2 questions per verb. 
Finally, we pass these questions and event description to a pre-trained extractive question answering model and fill a table of [Actor, Action, Target].""")

st.markdown("""
Here we use an extractive question answering model to find the actor and target of an event.
As this is still in experimental phase, there are some limitations:
- The only template possible is `People were [Z].`, this allows us to get a verb to construct the two questions:
- `Who was [Z]?` to find the target.
- `Who [Z] people?` to find the actor.
- `top_k = 10` and `entailment_threshold = 0.5`.
The results of the QA are given along the confidence score of the model in brackets `[xx.x%]`
""")

## Load Models
model_nli = load_model_nli()
model_prompting = load_model_prompting()
model_qa = load_model_qa()



st.markdown("""
### 2. Write an event description:
The first step is to write an event description that will be fed to the pipeline. This can be any text in English.
""")
text = choose_text_menu('')
st.session_state.text = text


st.markdown("""
### 3. Run actor-target coding:
""")

if "old_text_qa" not in st.session_state:
    st.session_state.old_text_qa =st.session_state.text

qa_button = st.button("Run actor-target coding")
if qa_button:
    computation_state_qa = st.text("Computation Running.")
    st.session_state.old_text_qa =st.session_state.text
    prompt = "People were {}."
    results = prompt_to_nli(text, prompt, model_prompting, model_nli, nlp, 10, 0.5, True)
    list_results = [x[0][0] + ' ' + str(int(x[1][1]*100)) + '%' for x in results]
    st.session_state.list_results_prompt_qa = list_results
    list_tokens = [x[0][0] for x in results]
    who_what_whom = get_who_what_whom_qa(text, list_tokens, model_qa)
    st.session_state.who_what_whom = who_what_whom
    computation_state_qa.text("Computation Done.")

if 'who_what_whom' in st.session_state:
    st.write('**Event Description**: {}'.format(st.session_state.old_text_qa))
    st.write('**Template**: "{}"; **Top K**: {}; **Entailment Threshold**: {}.'.format("People were [Z]",10, 0.5))
    display_nli_pr_results_as_list('', st.session_state.list_results_prompt_qa)
    st.write(pd.DataFrame(st.session_state.who_what_whom))