Spaces:
Sleeping
Sleeping
add spinners
Browse files
app.py
CHANGED
@@ -533,46 +533,48 @@ def main():
|
|
533 |
|
534 |
if len(raw_text) > 0:
|
535 |
|
536 |
-
|
537 |
-
|
538 |
-
|
539 |
-
|
540 |
-
|
541 |
-
|
542 |
-
|
543 |
-
|
544 |
-
|
545 |
-
|
546 |
-
|
547 |
-
|
548 |
-
|
549 |
-
|
550 |
-
|
551 |
-
|
552 |
-
|
553 |
-
|
554 |
-
|
555 |
-
|
556 |
-
|
557 |
-
|
558 |
-
|
559 |
-
|
560 |
-
|
561 |
-
|
562 |
-
|
563 |
-
|
564 |
-
|
565 |
-
|
566 |
-
|
567 |
-
|
568 |
-
|
569 |
-
|
570 |
-
|
571 |
-
|
572 |
-
|
573 |
-
|
574 |
-
|
575 |
-
|
|
|
|
|
576 |
|
577 |
else:
|
578 |
st.warning("Veuillez saisir un exemple.")
|
@@ -600,47 +602,49 @@ def main():
|
|
600 |
|
601 |
if len(file_contents) > 0:
|
602 |
|
603 |
-
|
604 |
-
if bouton_phraseur:
|
605 |
-
file_contents = apply_senter(senter, file_contents)
|
606 |
|
607 |
-
|
608 |
-
|
609 |
-
|
610 |
|
611 |
-
|
612 |
-
|
613 |
-
|
614 |
-
st.write("")
|
615 |
|
616 |
-
|
617 |
-
|
618 |
-
|
619 |
-
st.
|
620 |
|
621 |
-
|
622 |
-
|
623 |
-
|
624 |
-
|
625 |
|
626 |
-
df = create_df(entities)
|
627 |
-
st.write("")
|
628 |
-
# Display the entities as a table
|
629 |
-
with st.expander("Voir les entités sous forme de tableau"):
|
630 |
st.write("")
|
631 |
-
|
|
|
|
|
632 |
|
633 |
-
|
|
|
|
|
|
|
|
|
|
|
634 |
|
635 |
-
|
636 |
|
637 |
-
|
638 |
-
|
639 |
-
|
640 |
-
|
641 |
-
|
642 |
-
|
643 |
-
|
|
|
|
|
644 |
|
645 |
else:
|
646 |
st.warning("Le fichier importé est vide.")
|
@@ -670,42 +674,44 @@ def main():
|
|
670 |
|
671 |
if len(file_contents) > 0:
|
672 |
|
673 |
-
|
674 |
-
modified_xml = entities_to_xml(file_contents, ner)
|
675 |
|
676 |
-
|
|
|
677 |
|
678 |
-
|
679 |
-
modified_xml = html.unescape(modified_xml)
|
680 |
|
681 |
-
|
|
|
682 |
|
683 |
-
|
684 |
-
st.markdown(
|
685 |
-
"**Les entités ont été converties comme suit : (en construction)** \n\n- **CHRONOLOGIE :** ```<date>``` \n- **MOBILIER :** ```<objectType>``` \n- **STRUCTURE :** ```<name type=\"structure\">``` \n- **MATERIAU :** ```<material>``` \n- **ID :** ```<idno type=\"entite\">``` \n- **TECHNIQUE_STYLE :** ```<name type=\"technique_style\">``` \n- **DECOR :** ```<name type=\"decor\">``` \n- **ESPECE :** ```<name type=\"espece\">``` \n- **EDIFICE :** ```<placeName type=\"edifice\">``` \n- **PEUPLE_CULTURE :** ```<orgName type=\"peuple_culture\">``` \n- **PERSONNE :** ```<persName>``` \n- **ORG :** ```<orgName>``` \n- **GPE :** ```<placeName>``` \n- **LOC :** ```<geogName>``` \n- **LIEUDIT_SITE :** ```<placeName type=\"lieudit_site\">```")
|
686 |
-
st.write("")
|
687 |
|
688 |
-
|
689 |
-
|
690 |
-
|
691 |
-
|
692 |
-
|
693 |
-
|
694 |
-
|
695 |
-
|
696 |
-
|
697 |
-
|
698 |
-
|
699 |
-
|
700 |
-
|
701 |
-
|
702 |
-
|
703 |
-
|
704 |
-
|
705 |
-
|
706 |
-
|
707 |
-
|
708 |
-
|
|
|
|
|
|
|
|
|
|
|
709 |
|
710 |
# ===== MODE: CONLL2002 & CSV EXPORT =====
|
711 |
if choix_xml == "Ne pas conserver les balises (export conll2002 ou csv du <body> uniquement)":
|
@@ -733,53 +739,55 @@ def main():
|
|
733 |
|
734 |
if len(file_contents) > 0:
|
735 |
|
736 |
-
st.
|
737 |
-
|
738 |
-
|
|
|
|
|
739 |
|
740 |
-
|
741 |
|
742 |
-
|
743 |
-
|
744 |
-
|
745 |
|
746 |
-
|
747 |
-
|
748 |
-
|
749 |
|
750 |
-
|
751 |
-
|
752 |
-
|
753 |
-
|
754 |
|
755 |
-
|
756 |
-
|
757 |
-
|
758 |
-
|
759 |
|
760 |
-
|
761 |
-
|
762 |
-
|
763 |
-
|
764 |
|
765 |
-
|
766 |
-
st.write("")
|
767 |
-
# Display the entities as a table
|
768 |
-
with st.expander("Voir les entités sous forme de tableau"):
|
769 |
st.write("")
|
770 |
-
|
|
|
|
|
|
|
771 |
|
772 |
-
|
773 |
|
774 |
-
|
775 |
|
776 |
-
|
777 |
-
|
778 |
-
|
779 |
-
|
780 |
-
|
781 |
-
|
782 |
-
|
783 |
|
784 |
st.markdown("# ")
|
785 |
st.markdown("# ")
|
|
|
533 |
|
534 |
if len(raw_text) > 0:
|
535 |
|
536 |
+
with st.spinner("Application du modèle.."):
|
537 |
+
|
538 |
+
# If requested, apply the sentence segmentation model
|
539 |
+
if bouton_phraseur:
|
540 |
+
raw_text = apply_senter(senter, raw_text)
|
541 |
+
|
542 |
+
# Apply ner model
|
543 |
+
doc = get_doc(ner, raw_text)
|
544 |
+
entities = get_entities(doc)
|
545 |
+
|
546 |
+
st.write("")
|
547 |
+
st.subheader("Résultats :")
|
548 |
+
st.write("")
|
549 |
+
st.write("")
|
550 |
+
|
551 |
+
# Display the entities with displacy
|
552 |
+
my_displacy = create_displacy(raw_text, entities)
|
553 |
+
st.markdown(my_displacy, unsafe_allow_html=True)
|
554 |
+
|
555 |
+
st.write("")
|
556 |
+
# Download results as a conll2002 file
|
557 |
+
doc_to_conll(doc)
|
558 |
+
st.write("")
|
559 |
+
|
560 |
+
df = create_df(entities)
|
561 |
+
st.write("")
|
562 |
+
# Display the entities as a table
|
563 |
+
st.markdown("**Tableau regroupant les entités détectées**")
|
564 |
+
st.write("")
|
565 |
+
st.dataframe(df, use_container_width=True)
|
566 |
+
|
567 |
+
csv = df_to_csv(df)
|
568 |
+
|
569 |
+
st.write("")
|
570 |
+
|
571 |
+
# Download results as a csv file
|
572 |
+
st.download_button(
|
573 |
+
label="Télécharger le fichier CSV",
|
574 |
+
data=csv,
|
575 |
+
file_name="prediction_arches.csv",
|
576 |
+
mime="text/csv",
|
577 |
+
)
|
578 |
|
579 |
else:
|
580 |
st.warning("Veuillez saisir un exemple.")
|
|
|
602 |
|
603 |
if len(file_contents) > 0:
|
604 |
|
605 |
+
with st.spinner("Application du modèle.."):
|
|
|
|
|
606 |
|
607 |
+
# If requested, apply the sentence segmentation model
|
608 |
+
if bouton_phraseur:
|
609 |
+
file_contents = apply_senter(senter, file_contents)
|
610 |
|
611 |
+
# Apply the ner model
|
612 |
+
doc = get_doc(ner, file_contents)
|
613 |
+
entities = get_entities(doc)
|
|
|
614 |
|
615 |
+
st.write("")
|
616 |
+
st.subheader("Résultats :")
|
617 |
+
st.write("")
|
618 |
+
st.write("")
|
619 |
|
620 |
+
# Display the entities with displacy
|
621 |
+
with st.expander("Voir les entités dans le texte"):
|
622 |
+
my_displacy = create_displacy(file_contents, entities)
|
623 |
+
st.markdown(my_displacy, unsafe_allow_html=True)
|
624 |
|
|
|
|
|
|
|
|
|
625 |
st.write("")
|
626 |
+
# Download the results as a conll2002 file
|
627 |
+
doc_to_conll(doc, updated_name)
|
628 |
+
st.write("")
|
629 |
|
630 |
+
df = create_df(entities)
|
631 |
+
st.write("")
|
632 |
+
# Display the entities as a table
|
633 |
+
with st.expander("Voir les entités sous forme de tableau"):
|
634 |
+
st.write("")
|
635 |
+
st.dataframe(df, use_container_width=True)
|
636 |
|
637 |
+
csv = df_to_csv(df)
|
638 |
|
639 |
+
st.write("")
|
640 |
+
|
641 |
+
# Download the results as a csv file
|
642 |
+
st.download_button(
|
643 |
+
label="Télécharger le fichier CSV",
|
644 |
+
data=csv,
|
645 |
+
file_name=updated_name + ".csv",
|
646 |
+
mime="text/csv",
|
647 |
+
)
|
648 |
|
649 |
else:
|
650 |
st.warning("Le fichier importé est vide.")
|
|
|
674 |
|
675 |
if len(file_contents) > 0:
|
676 |
|
677 |
+
with st.spinner("Application du modèle.."):
|
|
|
678 |
|
679 |
+
# Apply the ner model to an xml file
|
680 |
+
modified_xml = entities_to_xml(file_contents, ner)
|
681 |
|
682 |
+
if modified_xml is not None:
|
|
|
683 |
|
684 |
+
# Convert HTML entities back to characters
|
685 |
+
modified_xml = html.unescape(modified_xml)
|
686 |
|
687 |
+
st.write("")
|
|
|
|
|
|
|
688 |
|
689 |
+
with st.expander("Au sujet du mapping XML des entités"):
|
690 |
+
st.markdown(
|
691 |
+
"**Les entités ont été converties comme suit : (en construction)** \n\n- **CHRONOLOGIE :** ```<date>``` \n- **MOBILIER :** ```<objectType>``` \n- **STRUCTURE :** ```<name type=\"structure\">``` \n- **MATERIAU :** ```<material>``` \n- **ID :** ```<idno type=\"entite\">``` \n- **TECHNIQUE_STYLE :** ```<name type=\"technique_style\">``` \n- **DECOR :** ```<name type=\"decor\">``` \n- **ESPECE :** ```<name type=\"espece\">``` \n- **EDIFICE :** ```<placeName type=\"edifice\">``` \n- **PEUPLE_CULTURE :** ```<orgName type=\"peuple_culture\">``` \n- **PERSONNE :** ```<persName>``` \n- **ORG :** ```<orgName>``` \n- **GPE :** ```<placeName>``` \n- **LOC :** ```<geogName>``` \n- **LIEUDIT_SITE :** ```<placeName type=\"lieudit_site\">```")
|
692 |
+
st.write("")
|
693 |
+
|
694 |
+
# Display the modified XML
|
695 |
+
with st.expander("Contenu XML modifié"):
|
696 |
+
# Wrap the code
|
697 |
+
with stylable_container(
|
698 |
+
"codeblock",
|
699 |
+
"""
|
700 |
+
code {
|
701 |
+
white-space: pre-wrap !important;
|
702 |
+
}
|
703 |
+
""",
|
704 |
+
):
|
705 |
+
st.code(modified_xml, language="xml")
|
706 |
+
|
707 |
+
# Download the modified XML
|
708 |
+
# We add a "_entites" at the end of the file name to differentiate it from the original
|
709 |
+
st.download_button(
|
710 |
+
label="Télécharger le fichier xml modifié",
|
711 |
+
data=modified_xml,
|
712 |
+
file_name=updated_name + "_entites.xml",
|
713 |
+
mime="xml",
|
714 |
+
)
|
715 |
|
716 |
# ===== MODE: CONLL2002 & CSV EXPORT =====
|
717 |
if choix_xml == "Ne pas conserver les balises (export conll2002 ou csv du <body> uniquement)":
|
|
|
739 |
|
740 |
if len(file_contents) > 0:
|
741 |
|
742 |
+
with st.spinner("Application du modèle.."):
|
743 |
+
|
744 |
+
st.write("")
|
745 |
+
# Strip the <body> of its tags
|
746 |
+
body_text = get_body_text(file_contents)
|
747 |
|
748 |
+
if body_text is not None:
|
749 |
|
750 |
+
# If requested, apply the sentence segmentation model
|
751 |
+
if bouton_phraseur:
|
752 |
+
body_text = apply_senter(senter, body_text)
|
753 |
|
754 |
+
# Apply ner model
|
755 |
+
doc = get_doc(ner, body_text)
|
756 |
+
entities = get_entities(doc)
|
757 |
|
758 |
+
st.write("")
|
759 |
+
st.subheader("Résultats :")
|
760 |
+
st.write("")
|
761 |
+
st.write("")
|
762 |
|
763 |
+
# Display the entities with displacy
|
764 |
+
with st.expander("Voir les entités dans le texte"):
|
765 |
+
my_displacy = create_displacy(body_text, entities)
|
766 |
+
st.markdown(my_displacy, unsafe_allow_html=True)
|
767 |
|
768 |
+
st.write("")
|
769 |
+
# Download the results as a conll2002 file
|
770 |
+
doc_to_conll(doc, updated_name)
|
771 |
+
st.write("")
|
772 |
|
773 |
+
df = create_df(entities)
|
|
|
|
|
|
|
774 |
st.write("")
|
775 |
+
# Display the entities as a table
|
776 |
+
with st.expander("Voir les entités sous forme de tableau"):
|
777 |
+
st.write("")
|
778 |
+
st.dataframe(df, use_container_width=True)
|
779 |
|
780 |
+
csv = df_to_csv(df)
|
781 |
|
782 |
+
st.write("")
|
783 |
|
784 |
+
# Download the results as a csv file
|
785 |
+
st.download_button(
|
786 |
+
label="Télécharger le fichier CSV",
|
787 |
+
data=csv,
|
788 |
+
file_name=updated_name + ".csv",
|
789 |
+
mime="text/csv",
|
790 |
+
)
|
791 |
|
792 |
st.markdown("# ")
|
793 |
st.markdown("# ")
|