qanastek's picture
Update
49c3a11
raw history blame
No virus
2.29 kB
import time
import random
import streamlit as st
from annotated_text import annotated_text
import matplotlib
from flair.data import Sentence
from flair.models import SequenceTagger
checkpoints = [
"qanastek/pos-french",
]
colors = list(matplotlib.colors.cnames.values())
@st.cache(suppress_st_warning=True, allow_output_mutation=True)
def get_model(model_name):
return SequenceTagger.load(model_name) # Load the model
def getPos(s: Sentence):
texts = []
labels = []
for t in s.tokens:
for label in t.annotation_layers.keys():
texts.append(t.text)
labels.append(t.get_labels(label)[0].value)
return texts, labels
def getDictFromPOS(texts, labels):
return [{ "text": t, "label": l } for t, l in zip(texts, labels)]
def getAnnotatedFromPOS(texts, labels):
return [(t,l,random.choice(colors)) for t, l in zip(texts, labels)]
def main():
st.title("🥖 French Part-Of-Speech Tagging")
checkpoint = st.selectbox("Choose model", checkpoints)
model = get_model(checkpoint)
default_text = "George Washington est allé à Washington"
input_text = st.text_area(
label="Original text",
value=default_text,
)
start = None
if st.button("🧠 Compute"):
start = time.time()
with st.spinner("Search for Part-Of-Speech Tags 🔍"):
# Build Sentence
s = Sentence(input_text)
# predict tags
model.predict(s)
try:
texts, labels = getPos(s)
st.header("Labels:")
anns = getAnnotatedFromPOS(texts, labels)
annotated_text(*anns)
st.header("JSON:")
st.json(getDictFromPOS(texts, labels))
except Exception as e:
st.error("Some error occured!" + str(e))
st.stop()
st.write("---")
st.markdown(
"Built by [Yanis Labrak](https://www.linkedin.com/in/yanis-labrak-8a7412145/) 🚀"
)
st.markdown(
"_Source code made with [FlairNLP](https://github.com/flairNLP/flair)_"
)
if start is not None:
st.text(f"prediction took {time.time() - start:.2f}s")
if __name__ == "__main__":
main()