|
import streamlit as st |
|
from streamlit import session_state |
|
from random import choice |
|
from annotated_text import annotated_text |
|
from resources import * |
|
from helpers import * |
|
|
|
if "counter" not in session_state: |
|
session_state.counter = 0 |
|
|
|
if "augment" not in session_state: |
|
session_state.augment = False |
|
|
|
base_model = "xlnet-base-cased" |
|
sentences = load_sentences() |
|
baseline_classifier = load_model(f"Dagobert42/{base_model}-biored-finetuned") |
|
augmented_classifier = load_model(f"Dagobert42/{base_model}-biored-augmented-super") |
|
|
|
st.title("Semantic Frame Augmentation") |
|
st.subheader("Analysing challenging domains with only a handful of examples") |
|
|
|
st.write(f"""This space uses models based on [XLNet](https://huggingface.co/xlnet-base-cased) to identify medical entities in a text. |
|
The following is a random sentence from [bigbio/biored](https://huggingface.co/datasets/bigbio/biored). |
|
It was tagged by a model which was trained on just 200 examples from the original dataset. |
|
It is very possible that there are some mistakes. |
|
""") |
|
|
|
txt = sentences[session_state.counter] |
|
|
|
st.write("Example with data augmentation:" if session_state.augment else "Example without data augmentation:") |
|
tokens = augmented_classifier(txt) if session_state.augment else baseline_classifier(txt) |
|
|
|
annotated_text(annotate_sentence(txt, tokens)) |
|
st.write(annotate_sentence(txt, tokens)) |
|
|
|
st.write("Now try the augmented model. Hopefully it's a bit better :)") |
|
def refresh_model(): |
|
session_state.augment = not session_state.augment |
|
st.toggle("augmentations on" if session_state.augment else "augmentations off", session_state.augment, on_change=refresh_model) |
|
|
|
st.write("Or load another sentence") |
|
def refresh_example(): |
|
session_state.counter += 1 |
|
st.button( ":twisted_rightwards_arrows:", on_click=refresh_example) |
|
|