PortiLexicon-UD / app.py
NILC-ICMC-USP's picture
Update app.py
c32dac2 verified
import streamlit as st
import base64
from pathlib import Path
from UDlexPT import UDlexPT
# Define your dictionary
lex = UDlexPT()
translations = {
'🇧🇷': {
'title': 'PortiLexicon-UD',
'subtitle': 'Um léxico para Português Brasileiro de acordo com o modelo Universal Dependencies',
'input_label': 'Entre uma palavra para buscar:',
'buscando': 'Buscando',
'intro': 'PortiLexicon-UD contém mais de 1,2 milhão de formas de palavras em português brasileiro, juntamente com suas respectivas informações morfológicas e morfossintáticas, seguindo o modelo internacional do <a href="https://universaldependencies.org" target="_blank">Universal Dependencies</a>. O léxico é baseado no <a href="http://www.nilc.icmc.usp.br/nilc/projects/unitex-pb/web/index.html" target="_blank">Unitex-PB</a> e faz parte do projeto <a href="https://sites.google.com/icmc.usp.br/poetisa" target="_blank">POeTiSA</a>.',
'to_cite': 'Para citar o PortiLexicon-UD',
'not_found': 'Esta palavra não existe no Portilexicon (ainda...)',
'multiword_error': 'Essa versão do Portilexicon não possui multipalavras ou palavras compostas.',
'dev': 'Desenvolvido por'
},
'🇺🇸': {
'title': 'PortiLexicon-UD',
'subtitle': 'A lexicon for Brazilian Portuguese according to Universal Dependencies model',
'input_label': 'Enter a word to search:',
'buscando': 'Searching',
'intro': 'PortiLexicon-UD contains over 1.2 million word forms in Brazilian Portuguese with their respective morphological and morphosyntactic information, following the <a href="https://universaldependencies.org" target="_blank">Universal Dependencies</a> international model. The lexicon is based on <a href="http://www.nilc.icmc.usp.br/nilc/projects/unitex-pb/web/index.html" target="_blank">Unitex-PB</a> and is part of the <a href="https://sites.google.com/icmc.usp.br/poetisa" target="_blank">POeTiSA</a> project.',
'to_cite': 'To cite PortiLexicon-UD',
'not_found': 'This word does not exist in Portilexicon (yet...)',
'multiword_error': 'This version of Portilexicon does not handle multiwords or compound words.',
'dev': 'Developed by'
},
'🇫🇷': {
'title': 'PortiLexicon-UD',
'subtitle': 'Un lexique pour le portugais brésilien selon le modèle Universal Dependencies',
'input_label': 'Entrez un mot à rechercher :',
'buscando': 'Recherche en cours',
'intro': 'PortiLexicon-UD contient plus de 1,2 million de formes de mots en portugais brésilien, accompagnées de leurs informations morphologiques et morphosyntaxiques respectives, conformément au modèle international des <a href="https://universaldependencies.org" target="_blank">Universal Dependencies</a>. Le lexique est basé sur <a href="http://www.nilc.icmc.usp.br/nilc/projects/unitex-pb/web/index.html" target="_blank">Unitex-PB</a> et fait partie du projet <a href="https://sites.google.com/icmc.usp.br/poetisa" target="_blank">POeTiSA</a>.',
'to_cite': 'Pour citer PortiLexicon-UD',
'not_found': 'Ce mot n’existe pas dans PortiLexicon (pas encore...)',
'multiword_error': 'Cette version de PortiLexicon ne prend pas en charge les mots composés ou les expressions multi-mots.',
'dev': 'Développé par'
},
'🇮🇹': {
'title': 'PortiLexicon-UD',
'subtitle': 'Un lessico per il portoghese brasiliano secondo il modello Universal Dependencies',
'input_label': 'Inserisci una parola da cercare:',
'buscando': 'Ricerca in corso',
'intro': 'PortiLexicon-UD contiene oltre 1,2 milioni di forme di parole in portoghese brasiliano, con le rispettive informazioni morfologiche e morfosintattiche, secondo il modello internazionale delle <a href="https://universaldependencies.org" target="_blank">Universal Dependencies</a>. Il lessico è basato su <a href="http://www.nilc.icmc.usp.br/nilc/projects/unitex-pb/web/index.html" target="_blank">Unitex-PB</a> e fa parte del progetto <a href="https://sites.google.com/icmc.usp.br/poetisa" target="_blank">POeTiSA</a>.',
'to_cite': 'Per citare PortiLexicon-UD',
'not_found': 'Questa parola non esiste in PortiLexicon (ancora...)',
'multiword_error': 'Questa versione di PortiLexicon non gestisce parole composte o espressioni multi-parola.',
'dev': 'Developed by'
},
'🇪🇸': {
'title': 'PortiLexicon-UD',
'subtitle': 'Un léxico para el portugués brasileño según el modelo Universal Dependencies',
'input_label': 'Ingresa una palabra para buscar:',
'buscando': 'Buscando',
'intro': 'PortiLexicon-UD contiene más de 1,2 millones de formas de palabras en portugués brasileño, junto con su respectiva información morfológica y morfosintáctica, siguiendo el modelo internacional de <a href="https://universaldependencies.org" target="_blank">Universal Dependencies</a>. El léxico está basado en <a href="http://www.nilc.icmc.usp.br/nilc/projects/unitex-pb/web/index.html" target="_blank">Unitex-PB</a> y forma parte del proyecto <a href="https://sites.google.com/icmc.usp.br/poetisa" target="_blank">POeTiSA</a>.',
'to_cite': 'Para citar PortiLexicon-UD',
'not_found': 'Esta palabra no existe en PortiLexicon (todavía...)',
'multiword_error': 'Esta versión de PortiLexicon no admite palabras compuestas ni expresiones multi-palabra.',
'dev': 'Desarrollado por'
}
}
def img_to_bytes(img_path):
img_bytes = Path(img_path).read_bytes()
encoded = base64.b64encode(img_bytes).decode()
return encoded
def img_to_html(img_path, img_style='max-width: 100%;'):
img_html = f"<img src='data:image/png;base64,{img_to_bytes(img_path)}' style='{img_style}'>"
return img_html
st.markdown("""
<style>
[data-testid="collapsedControl"]::after {
content: " Interface Seetings";
margin-left: 5px;
}
body {
background-color: #ffffff !important;
color: #000000 !important;
}
</style>
""", unsafe_allow_html=True)
st.set_page_config(
page_title="PortiLexicon-UD",
layout="centered",
initial_sidebar_state="collapsed"
)
# language sidebar
lang_options = {
"🇧🇷 Português": "🇧🇷",
"🇺🇸 English": "🇺🇸",
"🇫🇷 Français": "🇫🇷",
"🇮🇹 Italiano": "🇮🇹",
"🇪🇸 Español": "🇪🇸"
}
selected = st.sidebar.radio("🌐 Interface", list(lang_options.keys()))
t = translations[lang_options[selected]]
# Streamlit app title
st.markdown(f"<h1 style='text-align:center;'>{t['title']}</h1>", unsafe_allow_html=True)
#st.title(t["title"])
st.markdown("<h6 style='text-align:center; margin-top:-20px;'>"+t["subtitle"]+"</h6>", unsafe_allow_html=True)
# introduction
#st.write(t["intro"]+"<br><br>", unsafe_allow_html=True)
logo_html = img_to_html("img/logo-ud.png", img_style="width:100px; margin-right:20px;")
intro_combined = f"""
<div style="display:flex; align-items:flex-start;">
{logo_html}
<div style="flex: 1; text-align: justify;">{t['intro']}</div>
</div><br><br>
"""
st.markdown(intro_combined, unsafe_allow_html=True)
# Text input from the user (main interface item)
word = st.text_input("**"+t["input_label"]+"**", "").strip().lower()
# Check and display result
if word:
if ((word.count(" ")+word.count("-")) == 0):
with st.spinner("🔍"+t["buscando"]+"..."):
opts = lex.sget(word)
if (opts == []):
st.write(t["not_found"])
else:
for o in opts:
st.write(f"- <span style='font-size: large;color: cornflowerblue;'>{o[1]}</span><br>LEMMA: <span style='color: cornflowerblue;'>{o[0]}</span><br>FEATS: <span style='color: cornflowerblue;'>{o[2]}</span>", unsafe_allow_html=True)
else:
st.write(t["multiword_error"])
# To cite expander
st.write("<br><br>", unsafe_allow_html=True)
with st.expander(t["to_cite"], expanded=False):
st.markdown(
"""
<small>
Lopes, L., Duran, M., Fernandes, P., and Pardo, T. (2022). PortiLexicon-UD: a portuguese
lexical resource according to universal dependencies model. In Proceedings of the
Language Resources and Evaluation Conference, pages 6635–6643, Marseille, France.
European Language Resources Association.<br>
<i>Links</i>: <a href='http://www.lrec-conf.org/proceedings/lrec2022/pdf/2022.lrec-1.715.pdf' target='_blank'><i>PDF</i></a> - <a href='https://aclanthology.org/2022.lrec-1.715' target='_blank'><i>URL</i></a><br>
<i>BibTeX</i>:
</small>
""",
unsafe_allow_html=True)
st.code("""
@InProceedings{lopes-EtAl-2022,
author = {Lopes, Lucelene and Duran, Magali and Fernandes, Paulo and Pardo, Thiago},
title = {{P}orti{L}exicon-{UD}: a Portuguese Lexical Resource according to {U}niversal {D}ependencies Model},
booktitle = {Proceedings of the Language Resources and Evaluation Conference},
month = {June},
year = {2022},
address = {Marseille, France},
publisher = {European Language Resources Association},
pages = {6635--6643},
url = {https://aclanthology.org/2022.lrec-1.715}
}""")
# Footer / Footnote
with st.container():
logorow1 = st.columns([3,4,1,4,1,4,3])
with logorow1[1]:
st.markdown("<a href='https://www.icmc.usp.br/' target='_blank'>"+img_to_html('./img/icmc.png')+"</a>",unsafe_allow_html=True)
with logorow1[3]:
st.markdown("<a href='https://c4ai.inova.usp.br/pt/inicio/' target='_blank'>"+img_to_html('./img/c4ia.png')+"</a>",unsafe_allow_html=True)
with logorow1[5]:
st.markdown("<a href='https://sites.google.com/view/nilc-usp/' target='_blank'>"+img_to_html('./img/nilc-removebg.png','max-width:80%')+"</a>",unsafe_allow_html=True)
logorow2 = st.columns([1,4,1,4,1,5,1,4,1])
with logorow2[1]:
st.markdown("<a href='https://inova.usp.br/' target='_blank'>"+img_to_html('./img/inova_nobackground.png')+"</a>",unsafe_allow_html=True)
with logorow2[3]:
st.markdown("<a href='https://softex.br/' target='_blank'>" + img_to_html('./img/softex_nobackground.png') + "</a>",unsafe_allow_html=True)
with logorow2[5]:
st.markdown("<a href='https://www.gov.br/mcti/pt-br' target='_blank'>" + img_to_html('./img/mcti_nobackground.png') + "</a>",unsafe_allow_html=True)
with logorow2[7]:
st.markdown("<a href='https://www.motorola.com.br/' target='_blank'>"+img_to_html('./img/motorola_nobackground.png', 'max-width:70%; object-position: center bottom')+"</a>",unsafe_allow_html=True)
creditrow = st.columns([3,18,3])
with creditrow[1]:
st.markdown('<p style="text-align: center;margin-top:10px"> '+t["dev"]+' Lucelene Lopes\
<a href="https://github.com/LuceleneL" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" fill="currentColor" class="bi bi-github" viewBox="0 0 16 16">\
<path d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27s1.36.09 2 .27c1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.01 8.01 0 0 0 16 8c0-4.42-3.58-8-8-8"/>\
</svg></a> <br> open source <a href="https://opensource.org/licenses/MIT" target="_blank" rel="noopener noreferrer"><img src="https://img.shields.io/badge/License-MIT-yellow.svg" alt="License: MIT"></a></p>',unsafe_allow_html=True)