Mise à jour
Browse files- README.md +6 -5
- app.py +85 -0
- requirements.txt +2 -0
README.md
CHANGED
@@ -1,11 +1,12 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
sdk: streamlit
|
|
|
7 |
app_file: app.py
|
8 |
-
pinned:
|
9 |
---
|
10 |
|
11 |
# Configuration
|
|
|
1 |
---
|
2 |
+
title: Étiqueteur morphosyntaxique étendu pour le français
|
3 |
+
emoji: 🥖
|
4 |
+
colorFrom: yellow
|
5 |
+
colorTo: yellow
|
6 |
sdk: streamlit
|
7 |
+
# sdk: gradio
|
8 |
app_file: app.py
|
9 |
+
pinned: true
|
10 |
---
|
11 |
|
12 |
# Configuration
|
app.py
ADDED
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import time
|
2 |
+
import streamlit as st
|
3 |
+
from annotated_text import annotated_text
|
4 |
+
|
5 |
+
from flair.data import Sentence
|
6 |
+
from flair.models import SequenceTagger
|
7 |
+
|
8 |
+
checkpoints = [
|
9 |
+
"qanastek/pos-french",
|
10 |
+
]
|
11 |
+
|
12 |
+
colors = {'DET': '#b9d9a6', 'NFP': '#eddc92', 'ADJFP': '#95e9d7', 'AUX': '#e797db', 'VPPMS': '#9ff48b', 'ADV': '#ed92b4', 'PREP': '#decfa1', 'PDEMMS': '#ada7d7', 'NMS': '#85fad8', 'COSUB': '#8ba4f4', 'PINDMS': '#e7a498', 'PPOBJMS': '#e5c79a', 'VERB': '#eb94b6', 'DETFS': '#e698ae', 'NFS': '#d9d1a6', 'YPFOR': '#96e89f', 'VPPFS': '#e698c6', 'PUNCT': '#ddbfa2', 'DETMS': '#f788cd', 'PROPN': '#f19c8d', 'ADJMS': '#8ed5f0', 'PPER3FS': '#c4d8a6', 'ADJFS': '#e39bdc', 'COCO': '#8df1e2', 'NMP': '#d7f787', 'PREL': '#f986f0', 'PPER1S': '#878df8', 'ADJMP': '#83fe80', 'VPPMP': '#a6d8c9', 'DINTMS': '#d9a6cc', 'PPER3MS': '#a1deda', 'PPER3MP': '#8fefe1', 'PREF': '#e3c79b', 'ADJ': '#fb81fe', 'DINTFS': '#d5fe81', 'CHIF': '#8084ff', 'XFAMIL': '#dd80fe', 'PRELFS': '#9ce3e3', 'SYM': '#9fbddf', 'NOUN': '#dea1b5', 'MOTINC': '#93b8ec', 'PINDFS': '#f787a5', 'PPOBJMP': '#dca3d2', 'NUM': '#b2e897', 'PREFP': '#e39cd0', 'PDEMFS': '#d8a7cb', 'VPPFP': '#83d9fb', 'PPER3FP': '#a1ddaa', 'PPOBJFS': '#e9ca95', 'PINDMP': '#e897e3', 'PRON': '#e29dcc', 'PPOBJFP': '#86f9dc', 'PART': '#aa96e8', 'PDEMMP': '#b2d7a8', 'PRELMS': '#e39bde', 'PDEMFP': '#b1e599', 'PRELFP': '#bbe39b', 'INTJ': '#bde996', 'PREFS': '#b39be4', 'PINDFP': '#e2e897', 'PRELMP': '#a5c0da', 'PINTFS': '#ceff80', 'PPER2S': '#d5a2dd', 'VPPRE': '#e78af4', '<START>': '#e6a899', '<STOP>': '#9adde5'}
|
13 |
+
|
14 |
+
@st.cache(suppress_st_warning=True, allow_output_mutation=True)
|
15 |
+
def get_model(model_name):
|
16 |
+
return SequenceTagger.load(model_name) # Charge le modèle
|
17 |
+
|
18 |
+
def getPos(s: Sentence):
|
19 |
+
texts = []
|
20 |
+
labels = []
|
21 |
+
for t in s.tokens:
|
22 |
+
for label in t.annotation_layers.keys():
|
23 |
+
texts.append(t.text)
|
24 |
+
labels.append(t.get_labels(label)[0].value)
|
25 |
+
return texts, labels
|
26 |
+
|
27 |
+
def getDictFromPOS(texts, labels):
|
28 |
+
return [{ "texte": t, "étiquette": l } for t, l in zip(texts, labels)]
|
29 |
+
|
30 |
+
def getAnnotatedFromPOS(texts, labels):
|
31 |
+
return [(t,l,colors[l]) for t, l in zip(texts, labels)]
|
32 |
+
|
33 |
+
def main():
|
34 |
+
|
35 |
+
st.title("🥖 Étiqueteur morphosyntaxique étendu pour le français")
|
36 |
+
|
37 |
+
checkpoint = st.selectbox("Choix du modèle", checkpoints)
|
38 |
+
model = get_model(checkpoint)
|
39 |
+
|
40 |
+
default_text = "George Washington est allé à Washington"
|
41 |
+
input_text = st.text_area(
|
42 |
+
label="Texte",
|
43 |
+
value=default_text,
|
44 |
+
)
|
45 |
+
|
46 |
+
start = None
|
47 |
+
if st.button("🧠 Calculer"):
|
48 |
+
start = time.time()
|
49 |
+
with st.spinner("Calcul des étiquettes morphosyntaxiques en cours... 🔍"):
|
50 |
+
|
51 |
+
# Build Sentence
|
52 |
+
s = Sentence(input_text)
|
53 |
+
|
54 |
+
# predict tags
|
55 |
+
model.predict(s)
|
56 |
+
|
57 |
+
try:
|
58 |
+
|
59 |
+
texts, labels = getPos(s)
|
60 |
+
|
61 |
+
st.header("Étiquettes:")
|
62 |
+
anns = getAnnotatedFromPOS(texts, labels)
|
63 |
+
annotated_text(*anns)
|
64 |
+
|
65 |
+
st.header("JSON:")
|
66 |
+
st.json(getDictFromPOS(texts, labels))
|
67 |
+
|
68 |
+
except Exception as e:
|
69 |
+
st.error("Une erreur s'est produite!" + str(e))
|
70 |
+
st.stop()
|
71 |
+
|
72 |
+
st.write("---")
|
73 |
+
|
74 |
+
st.markdown(
|
75 |
+
"Construit par [Yanis Labrak](https://www.linkedin.com/in/yanis-labrak-8a7412145/) & [Richard Dufour](https://cv.archives-ouvertes.fr/richard-dufour) avec [FlairNLP](https://github.com/flairNLP/flair) 🚀"
|
76 |
+
)
|
77 |
+
st.markdown(
|
78 |
+
"_Ce travail a été soutenu financièrement par [Zenidoc](https://zenidoc.fr/)_"
|
79 |
+
)
|
80 |
+
|
81 |
+
if start is not None:
|
82 |
+
st.text(f"La prédiction a prise {time.time() - start:.2f}s")
|
83 |
+
|
84 |
+
if __name__ == "__main__":
|
85 |
+
main()
|
requirements.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
flair==0.8.0.post1
|
2 |
+
st-annotated-text
|