import streamlit as st import base64 from pathlib import Path from UDlexPT import UDlexPT # Define your dictionary lex = UDlexPT() translations = { '🇧🇷': { 'title': 'PortiLexicon-UD', 'subtitle': 'Um léxico para Português Brasileiro de acordo com o modelo Universal Dependencies', 'input_label': 'Entre uma palavra para buscar:', 'buscando': 'Buscando', 'intro': 'PortiLexicon-UD contém mais de 1,2 milhão de formas de palavras em português brasileiro, juntamente com suas respectivas informações morfológicas e morfossintáticas, seguindo o modelo internacional do Universal Dependencies. O léxico é baseado no Unitex-PB e faz parte do projeto POeTiSA.', 'to_cite': 'Para citar o PortiLexicon-UD', 'not_found': 'Esta palavra não existe no Portilexicon (ainda...)', 'multiword_error': 'Essa versão do Portilexicon não possui multipalavras ou palavras compostas.', 'dev': 'Desenvolvido por' }, '🇺🇸': { 'title': 'PortiLexicon-UD', 'subtitle': 'A lexicon for Brazilian Portuguese according to Universal Dependencies model', 'input_label': 'Enter a word to search:', 'buscando': 'Searching', 'intro': 'PortiLexicon-UD contains over 1.2 million word forms in Brazilian Portuguese with their respective morphological and morphosyntactic information, following the Universal Dependencies international model. The lexicon is based on Unitex-PB and is part of the POeTiSA project.', 'to_cite': 'To cite PortiLexicon-UD', 'not_found': 'This word does not exist in Portilexicon (yet...)', 'multiword_error': 'This version of Portilexicon does not handle multiwords or compound words.', 'dev': 'Developed by' }, '🇫🇷': { 'title': 'PortiLexicon-UD', 'subtitle': 'Un lexique pour le portugais brésilien selon le modèle Universal Dependencies', 'input_label': 'Entrez un mot à rechercher :', 'buscando': 'Recherche en cours', 'intro': 'PortiLexicon-UD contient plus de 1,2 million de formes de mots en portugais brésilien, accompagnées de leurs informations morphologiques et morphosyntaxiques respectives, conformément au modèle international des Universal Dependencies. Le lexique est basé sur Unitex-PB et fait partie du projet POeTiSA.', 'to_cite': 'Pour citer PortiLexicon-UD', 'not_found': 'Ce mot n’existe pas dans PortiLexicon (pas encore...)', 'multiword_error': 'Cette version de PortiLexicon ne prend pas en charge les mots composés ou les expressions multi-mots.', 'dev': 'Développé par' }, '🇮🇹': { 'title': 'PortiLexicon-UD', 'subtitle': 'Un lessico per il portoghese brasiliano secondo il modello Universal Dependencies', 'input_label': 'Inserisci una parola da cercare:', 'buscando': 'Ricerca in corso', 'intro': 'PortiLexicon-UD contiene oltre 1,2 milioni di forme di parole in portoghese brasiliano, con le rispettive informazioni morfologiche e morfosintattiche, secondo il modello internazionale delle Universal Dependencies. Il lessico è basato su Unitex-PB e fa parte del progetto POeTiSA.', 'to_cite': 'Per citare PortiLexicon-UD', 'not_found': 'Questa parola non esiste in PortiLexicon (ancora...)', 'multiword_error': 'Questa versione di PortiLexicon non gestisce parole composte o espressioni multi-parola.', 'dev': 'Developed by' }, '🇪🇸': { 'title': 'PortiLexicon-UD', 'subtitle': 'Un léxico para el portugués brasileño según el modelo Universal Dependencies', 'input_label': 'Ingresa una palabra para buscar:', 'buscando': 'Buscando', 'intro': 'PortiLexicon-UD contiene más de 1,2 millones de formas de palabras en portugués brasileño, junto con su respectiva información morfológica y morfosintáctica, siguiendo el modelo internacional de Universal Dependencies. El léxico está basado en Unitex-PB y forma parte del proyecto POeTiSA.', 'to_cite': 'Para citar PortiLexicon-UD', 'not_found': 'Esta palabra no existe en PortiLexicon (todavía...)', 'multiword_error': 'Esta versión de PortiLexicon no admite palabras compuestas ni expresiones multi-palabra.', 'dev': 'Desarrollado por' } } def img_to_bytes(img_path): img_bytes = Path(img_path).read_bytes() encoded = base64.b64encode(img_bytes).decode() return encoded def img_to_html(img_path, img_style='max-width: 100%;'): img_html = f"" return img_html st.markdown(""" """, unsafe_allow_html=True) st.set_page_config( page_title="PortiLexicon-UD", layout="centered", initial_sidebar_state="collapsed" ) # language sidebar lang_options = { "🇧🇷 Português": "🇧🇷", "🇺🇸 English": "🇺🇸", "🇫🇷 Français": "🇫🇷", "🇮🇹 Italiano": "🇮🇹", "🇪🇸 Español": "🇪🇸" } selected = st.sidebar.radio("🌐 Interface", list(lang_options.keys())) t = translations[lang_options[selected]] # Streamlit app title st.markdown(f"

{t['title']}

", unsafe_allow_html=True) #st.title(t["title"]) st.markdown("
"+t["subtitle"]+"
", unsafe_allow_html=True) # introduction #st.write(t["intro"]+"

", unsafe_allow_html=True) logo_html = img_to_html("img/logo-ud.png", img_style="width:100px; margin-right:20px;") intro_combined = f"""
{logo_html}
{t['intro']}


""" st.markdown(intro_combined, unsafe_allow_html=True) # Text input from the user (main interface item) word = st.text_input("**"+t["input_label"]+"**", "").strip().lower() # Check and display result if word: if ((word.count(" ")+word.count("-")) == 0): with st.spinner("🔍"+t["buscando"]+"..."): opts = lex.sget(word) if (opts == []): st.write(t["not_found"]) else: for o in opts: st.write(f"- {o[1]}
LEMMA: {o[0]}
FEATS: {o[2]}", unsafe_allow_html=True) else: st.write(t["multiword_error"]) # To cite expander st.write("

", unsafe_allow_html=True) with st.expander(t["to_cite"], expanded=False): st.markdown( """ Lopes, L., Duran, M., Fernandes, P., and Pardo, T. (2022). PortiLexicon-UD: a portuguese lexical resource according to universal dependencies model. In Proceedings of the Language Resources and Evaluation Conference, pages 6635–6643, Marseille, France. European Language Resources Association.
Links: PDF - URL
BibTeX:
""", unsafe_allow_html=True) st.code(""" @InProceedings{lopes-EtAl-2022, author = {Lopes, Lucelene and Duran, Magali and Fernandes, Paulo and Pardo, Thiago}, title = {{P}orti{L}exicon-{UD}: a Portuguese Lexical Resource according to {U}niversal {D}ependencies Model}, booktitle = {Proceedings of the Language Resources and Evaluation Conference}, month = {June}, year = {2022}, address = {Marseille, France}, publisher = {European Language Resources Association}, pages = {6635--6643}, url = {https://aclanthology.org/2022.lrec-1.715} }""") # Footer / Footnote with st.container(): logorow1 = st.columns([3,4,1,4,1,4,3]) with logorow1[1]: st.markdown(""+img_to_html('./img/icmc.png')+"",unsafe_allow_html=True) with logorow1[3]: st.markdown(""+img_to_html('./img/c4ia.png')+"",unsafe_allow_html=True) with logorow1[5]: st.markdown(""+img_to_html('./img/nilc-removebg.png','max-width:80%')+"",unsafe_allow_html=True) logorow2 = st.columns([1,4,1,4,1,5,1,4,1]) with logorow2[1]: st.markdown(""+img_to_html('./img/inova_nobackground.png')+"",unsafe_allow_html=True) with logorow2[3]: st.markdown("" + img_to_html('./img/softex_nobackground.png') + "",unsafe_allow_html=True) with logorow2[5]: st.markdown("" + img_to_html('./img/mcti_nobackground.png') + "",unsafe_allow_html=True) with logorow2[7]: st.markdown(""+img_to_html('./img/motorola_nobackground.png', 'max-width:70%; object-position: center bottom')+"",unsafe_allow_html=True) creditrow = st.columns([3,18,3]) with creditrow[1]: st.markdown('

'+t["dev"]+' Lucelene Lopes\ \ \
open source License: MIT

',unsafe_allow_html=True)