import sys import os import streamlit as st from PIL import Image import pandas as pd from transformers import pipeline import spacy import en_core_web_lg current = os.path.dirname(os.path.realpath(__file__)) parent = os.path.dirname(current) sys.path.append(parent) from helpers import display_nli_pr_results_as_list, prompt_to_nli, get_who_what_whom_qa @st.cache(allow_output_mutation=True) def load_spacy_pipeline(): return en_core_web_lg.load() def choose_text_menu(text): if 'text' not in st.session_state: st.session_state.text = 'Several demonstrators were injured.' text = st.text_area('Event description', st.session_state.text) return text # # Load Models in cache @st.cache(allow_output_mutation=True) def load_model_prompting(): return pipeline("fill-mask", model="distilbert-base-uncased") @st.cache(allow_output_mutation=True) def load_model_nli(): return pipeline(task="sentiment-analysis", model="roberta-large-mnli") @st.cache(allow_output_mutation=True) def load_model_qa(): model_name = "deepset/roberta-base-squad2" model = pipeline(model=model_name, tokenizer=model_name, task="question-answering") return model nlp = load_spacy_pipeline() ### App START st.markdown("""# Rethinking the Event Coding Pipeline with Prompt Entailment ## Author: Anonymized for submission""") st.markdown("### 1. Actor-target coding (experimental):") @st.cache() def load_qa_image(): return Image.open('pipeline_qa.png') st.image(load_qa_image(),caption="""Actor-target Coding Flow. First we get the entailed answer candidates through the PR-ENT pipeline. Then we construct questions based on these tokens to extract actors and targets, 2 questions per verb. Finally, we pass these questions and event description to a pre-trained extractive question answering model and fill a table of [Actor, Action, Target].""") st.markdown(""" Here we use an extractive question answering model to find the actor and target of an event. As this is still in experimental phase, there are some limitations: - The only template possible is `People were [Z].`, this allows us to get a verb to construct the two questions: - `Who was [Z]?` to find the target. - `Who [Z] people?` to find the actor. - `top_k = 10` and `entailment_threshold = 0.5`. The results of the QA are given along the confidence score of the model in brackets `[xx.x%]` """) ## Load Models model_nli = load_model_nli() model_prompting = load_model_prompting() model_qa = load_model_qa() st.markdown(""" ### 2. Write an event description: The first step is to write an event description that will be fed to the pipeline. This can be any text in English. """) text = choose_text_menu('') st.session_state.text = text st.markdown(""" ### 3. Run actor-target coding: """) if "old_text_qa" not in st.session_state: st.session_state.old_text_qa =st.session_state.text qa_button = st.button("Run actor-target coding") if qa_button: computation_state_qa = st.text("Computation Running.") st.session_state.old_text_qa =st.session_state.text prompt = "People were {}." results = prompt_to_nli(text, prompt, model_prompting, model_nli, nlp, 10, 0.5, True) list_results = [x[0][0] + ' ' + str(int(x[1][1]*100)) + '%' for x in results] st.session_state.list_results_prompt_qa = list_results list_tokens = [x[0][0] for x in results] who_what_whom = get_who_what_whom_qa(text, list_tokens, model_qa) st.session_state.who_what_whom = who_what_whom computation_state_qa.text("Computation Done.") if 'who_what_whom' in st.session_state: st.write('**Event Description**: {}'.format(st.session_state.old_text_qa)) st.write('**Template**: "{}"; **Top K**: {}; **Entailment Threshold**: {}.'.format("People were [Z]",10, 0.5)) display_nli_pr_results_as_list('', st.session_state.list_results_prompt_qa) st.write(pd.DataFrame(st.session_state.who_what_whom))