Spaces:

qanastek
/

French-Part-Of-Speech-Tagging

Running

File size: 2,372 Bytes

64f92ba
049266b
 
2c5db9f
 
 
049266b
 
 
 
 
 
 
 
2c5db9f
 
049266b
 
2c5db9f
049266b
64f92ba
 
 
 
 
 
 
 
 
 
 
049266b
2c5db9f
 
 
049266b
 
e6c30c5
049266b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
601925f
049266b
 
601925f
049266b
 
64f92ba
 
 
dbcd12b
64f92ba
2c5db9f
 
 
6cda240
64f92ba
dbcd12b
8dffc3b
64f92ba
049266b
 
 
 
 
64f92ba
049266b

import json
import time
import streamlit as st
from annotated_text import annotated_text

import matplotlib

from flair.data import Sentence
from flair.models import SequenceTagger

checkpoints = [
    "qanastek/pos-french",
]

colors = list(matplotlib.colors.cnames.values())

@st.cache(suppress_st_warning=True, allow_output_mutation=True)
def get_model(model_name):
    return SequenceTagger.load(model_name) # Load the model

def getPos(s: Sentence):
    texts = []
    labels = []
    for t in s.tokens:
        for label in t.annotation_layers.keys():
            texts.append(t.text)
            labels.append(t.get_labels(label)[0].value)          
    return texts, labels

def getDictFromPOS(texts, labels):
    return [{ "text": t, "label": l } for t, l in zip(texts, labels)]

def getAnnotatedFromPOS(texts, labels):
    return [(t,l,"#8ef") for t, l in zip(texts, labels)]

def main():

    st.title("🥖 French Part-Of-Speech Tagging")

    checkpoint = st.selectbox("Choose model", checkpoints)
    model = get_model(checkpoint)

    default_text = "George Washington est allé à Washington"
    input_text = st.text_area(
        label="Original text",
        value=default_text,
    )

    start = None
    if st.button("🧠 Compute"):
        start = time.time()
        with st.spinner("Search for Part-Of-Speech Tags 🔍"):
            
            # Build Sentence
            s = Sentence(input_text)

            # predict tags
            model.predict(s)

            try:

                texts, labels = getPos(s)
                
                st.header("Labels:")
                st.write(" ".join(labels))
                
                st.header("Labels:")
                anns = getAnnotatedFromPOS(texts, labels)
                annotated_text(anns)

                st.header("JSON:")
                st.json(getDictFromPOS(texts, labels))

            except Exception as e:
                st.error("Some error occured!" + str(e))
                st.stop()

    st.write("---")

    st.markdown(
        "Built by [Yanis Labrak](https://www.linkedin.com/in/yanis-labrak-8a7412145/) 🚀"
    )
    st.markdown(
        "_Source code made with [FlairNLP](https://github.com/flairNLP/flair)_"
    )

    if start is not None:
        st.text(f"prediction took {time.time() - start:.2f}s")


if __name__ == "__main__":
    main()