Spaces:
Running
Running
import streamlit as st | |
import base64 | |
from pathlib import Path | |
from UDlexPT import UDlexPT | |
# Define your dictionary | |
lex = UDlexPT() | |
translations = { | |
'🇧🇷': { | |
'title': 'PortiLexicon-UD', | |
'subtitle': 'Um léxico para Português Brasileiro de acordo com o modelo Universal Dependencies', | |
'input_label': 'Entre uma palavra para buscar:', | |
'buscando': 'Buscando', | |
'intro': 'PortiLexicon-UD contém mais de 1,2 milhão de formas de palavras em português brasileiro, juntamente com suas respectivas informações morfológicas e morfossintáticas, seguindo o modelo internacional do <a href="https://universaldependencies.org" target="_blank">Universal Dependencies</a>. O léxico é baseado no <a href="http://www.nilc.icmc.usp.br/nilc/projects/unitex-pb/web/index.html" target="_blank">Unitex-PB</a> e faz parte do projeto <a href="https://sites.google.com/icmc.usp.br/poetisa" target="_blank">POeTiSA</a>.', | |
'to_cite': 'Para citar o PortiLexicon-UD', | |
'not_found': 'Esta palavra não existe no Portilexicon (ainda...)', | |
'multiword_error': 'Essa versão do Portilexicon não possui multipalavras ou palavras compostas.', | |
'dev': 'Desenvolvido por' | |
}, | |
'🇺🇸': { | |
'title': 'PortiLexicon-UD', | |
'subtitle': 'A lexicon for Brazilian Portuguese according to Universal Dependencies model', | |
'input_label': 'Enter a word to search:', | |
'buscando': 'Searching', | |
'intro': 'PortiLexicon-UD contains over 1.2 million word forms in Brazilian Portuguese with their respective morphological and morphosyntactic information, following the <a href="https://universaldependencies.org" target="_blank">Universal Dependencies</a> international model. The lexicon is based on <a href="http://www.nilc.icmc.usp.br/nilc/projects/unitex-pb/web/index.html" target="_blank">Unitex-PB</a> and is part of the <a href="https://sites.google.com/icmc.usp.br/poetisa" target="_blank">POeTiSA</a> project.', | |
'to_cite': 'To cite PortiLexicon-UD', | |
'not_found': 'This word does not exist in Portilexicon (yet...)', | |
'multiword_error': 'This version of Portilexicon does not handle multiwords or compound words.', | |
'dev': 'Developed by' | |
}, | |
'🇫🇷': { | |
'title': 'PortiLexicon-UD', | |
'subtitle': 'Un lexique pour le portugais brésilien selon le modèle Universal Dependencies', | |
'input_label': 'Entrez un mot à rechercher :', | |
'buscando': 'Recherche en cours', | |
'intro': 'PortiLexicon-UD contient plus de 1,2 million de formes de mots en portugais brésilien, accompagnées de leurs informations morphologiques et morphosyntaxiques respectives, conformément au modèle international des <a href="https://universaldependencies.org" target="_blank">Universal Dependencies</a>. Le lexique est basé sur <a href="http://www.nilc.icmc.usp.br/nilc/projects/unitex-pb/web/index.html" target="_blank">Unitex-PB</a> et fait partie du projet <a href="https://sites.google.com/icmc.usp.br/poetisa" target="_blank">POeTiSA</a>.', | |
'to_cite': 'Pour citer PortiLexicon-UD', | |
'not_found': 'Ce mot n’existe pas dans PortiLexicon (pas encore...)', | |
'multiword_error': 'Cette version de PortiLexicon ne prend pas en charge les mots composés ou les expressions multi-mots.', | |
'dev': 'Développé par' | |
}, | |
'🇮🇹': { | |
'title': 'PortiLexicon-UD', | |
'subtitle': 'Un lessico per il portoghese brasiliano secondo il modello Universal Dependencies', | |
'input_label': 'Inserisci una parola da cercare:', | |
'buscando': 'Ricerca in corso', | |
'intro': 'PortiLexicon-UD contiene oltre 1,2 milioni di forme di parole in portoghese brasiliano, con le rispettive informazioni morfologiche e morfosintattiche, secondo il modello internazionale delle <a href="https://universaldependencies.org" target="_blank">Universal Dependencies</a>. Il lessico è basato su <a href="http://www.nilc.icmc.usp.br/nilc/projects/unitex-pb/web/index.html" target="_blank">Unitex-PB</a> e fa parte del progetto <a href="https://sites.google.com/icmc.usp.br/poetisa" target="_blank">POeTiSA</a>.', | |
'to_cite': 'Per citare PortiLexicon-UD', | |
'not_found': 'Questa parola non esiste in PortiLexicon (ancora...)', | |
'multiword_error': 'Questa versione di PortiLexicon non gestisce parole composte o espressioni multi-parola.', | |
'dev': 'Developed by' | |
}, | |
'🇪🇸': { | |
'title': 'PortiLexicon-UD', | |
'subtitle': 'Un léxico para el portugués brasileño según el modelo Universal Dependencies', | |
'input_label': 'Ingresa una palabra para buscar:', | |
'buscando': 'Buscando', | |
'intro': 'PortiLexicon-UD contiene más de 1,2 millones de formas de palabras en portugués brasileño, junto con su respectiva información morfológica y morfosintáctica, siguiendo el modelo internacional de <a href="https://universaldependencies.org" target="_blank">Universal Dependencies</a>. El léxico está basado en <a href="http://www.nilc.icmc.usp.br/nilc/projects/unitex-pb/web/index.html" target="_blank">Unitex-PB</a> y forma parte del proyecto <a href="https://sites.google.com/icmc.usp.br/poetisa" target="_blank">POeTiSA</a>.', | |
'to_cite': 'Para citar PortiLexicon-UD', | |
'not_found': 'Esta palabra no existe en PortiLexicon (todavía...)', | |
'multiword_error': 'Esta versión de PortiLexicon no admite palabras compuestas ni expresiones multi-palabra.', | |
'dev': 'Desarrollado por' | |
} | |
} | |
def img_to_bytes(img_path): | |
img_bytes = Path(img_path).read_bytes() | |
encoded = base64.b64encode(img_bytes).decode() | |
return encoded | |
def img_to_html(img_path, img_style='max-width: 100%;'): | |
img_html = f"<img src='data:image/png;base64,{img_to_bytes(img_path)}' style='{img_style}'>" | |
return img_html | |
st.markdown(""" | |
<style> | |
[data-testid="collapsedControl"]::after { | |
content: " Interface Seetings"; | |
margin-left: 5px; | |
} | |
body { | |
background-color: #ffffff !important; | |
color: #000000 !important; | |
} | |
</style> | |
""", unsafe_allow_html=True) | |
st.set_page_config( | |
page_title="PortiLexicon-UD", | |
layout="centered", | |
initial_sidebar_state="collapsed" | |
) | |
# language sidebar | |
lang_options = { | |
"🇧🇷 Português": "🇧🇷", | |
"🇺🇸 English": "🇺🇸", | |
"🇫🇷 Français": "🇫🇷", | |
"🇮🇹 Italiano": "🇮🇹", | |
"🇪🇸 Español": "🇪🇸" | |
} | |
selected = st.sidebar.radio("🌐 Interface", list(lang_options.keys())) | |
t = translations[lang_options[selected]] | |
# Streamlit app title | |
st.markdown(f"<h1 style='text-align:center;'>{t['title']}</h1>", unsafe_allow_html=True) | |
#st.title(t["title"]) | |
st.markdown("<h6 style='text-align:center; margin-top:-20px;'>"+t["subtitle"]+"</h6>", unsafe_allow_html=True) | |
# introduction | |
#st.write(t["intro"]+"<br><br>", unsafe_allow_html=True) | |
logo_html = img_to_html("img/logo-ud.png", img_style="width:100px; margin-right:20px;") | |
intro_combined = f""" | |
<div style="display:flex; align-items:flex-start;"> | |
{logo_html} | |
<div style="flex: 1; text-align: justify;">{t['intro']}</div> | |
</div><br><br> | |
""" | |
st.markdown(intro_combined, unsafe_allow_html=True) | |
# Text input from the user (main interface item) | |
word = st.text_input("**"+t["input_label"]+"**", "").strip().lower() | |
# Check and display result | |
if word: | |
if ((word.count(" ")+word.count("-")) == 0): | |
with st.spinner("🔍"+t["buscando"]+"..."): | |
opts = lex.sget(word) | |
if (opts == []): | |
st.write(t["not_found"]) | |
else: | |
for o in opts: | |
st.write(f"- <span style='font-size: large;color: cornflowerblue;'>{o[1]}</span><br>LEMMA: <span style='color: cornflowerblue;'>{o[0]}</span><br>FEATS: <span style='color: cornflowerblue;'>{o[2]}</span>", unsafe_allow_html=True) | |
else: | |
st.write(t["multiword_error"]) | |
# To cite expander | |
st.write("<br><br>", unsafe_allow_html=True) | |
with st.expander(t["to_cite"], expanded=False): | |
st.markdown( | |
""" | |
<small> | |
Lopes, L., Duran, M., Fernandes, P., and Pardo, T. (2022). PortiLexicon-UD: a portuguese | |
lexical resource according to universal dependencies model. In Proceedings of the | |
Language Resources and Evaluation Conference, pages 6635–6643, Marseille, France. | |
European Language Resources Association.<br> | |
<i>Links</i>: <a href='http://www.lrec-conf.org/proceedings/lrec2022/pdf/2022.lrec-1.715.pdf' target='_blank'><i>PDF</i></a> - <a href='https://aclanthology.org/2022.lrec-1.715' target='_blank'><i>URL</i></a><br> | |
<i>BibTeX</i>: | |
</small> | |
""", | |
unsafe_allow_html=True) | |
st.code(""" | |
@InProceedings{lopes-EtAl-2022, | |
author = {Lopes, Lucelene and Duran, Magali and Fernandes, Paulo and Pardo, Thiago}, | |
title = {{P}orti{L}exicon-{UD}: a Portuguese Lexical Resource according to {U}niversal {D}ependencies Model}, | |
booktitle = {Proceedings of the Language Resources and Evaluation Conference}, | |
month = {June}, | |
year = {2022}, | |
address = {Marseille, France}, | |
publisher = {European Language Resources Association}, | |
pages = {6635--6643}, | |
url = {https://aclanthology.org/2022.lrec-1.715} | |
}""") | |
# Footer / Footnote | |
with st.container(): | |
logorow1 = st.columns([3,4,1,4,1,4,3]) | |
with logorow1[1]: | |
st.markdown("<a href='https://www.icmc.usp.br/' target='_blank'>"+img_to_html('./img/icmc.png')+"</a>",unsafe_allow_html=True) | |
with logorow1[3]: | |
st.markdown("<a href='https://c4ai.inova.usp.br/pt/inicio/' target='_blank'>"+img_to_html('./img/c4ia.png')+"</a>",unsafe_allow_html=True) | |
with logorow1[5]: | |
st.markdown("<a href='https://sites.google.com/view/nilc-usp/' target='_blank'>"+img_to_html('./img/nilc-removebg.png','max-width:80%')+"</a>",unsafe_allow_html=True) | |
logorow2 = st.columns([1,4,1,4,1,5,1,4,1]) | |
with logorow2[1]: | |
st.markdown("<a href='https://inova.usp.br/' target='_blank'>"+img_to_html('./img/inova_nobackground.png')+"</a>",unsafe_allow_html=True) | |
with logorow2[3]: | |
st.markdown("<a href='https://softex.br/' target='_blank'>" + img_to_html('./img/softex_nobackground.png') + "</a>",unsafe_allow_html=True) | |
with logorow2[5]: | |
st.markdown("<a href='https://www.gov.br/mcti/pt-br' target='_blank'>" + img_to_html('./img/mcti_nobackground.png') + "</a>",unsafe_allow_html=True) | |
with logorow2[7]: | |
st.markdown("<a href='https://www.motorola.com.br/' target='_blank'>"+img_to_html('./img/motorola_nobackground.png', 'max-width:70%; object-position: center bottom')+"</a>",unsafe_allow_html=True) | |
creditrow = st.columns([3,18,3]) | |
with creditrow[1]: | |
st.markdown('<p style="text-align: center;margin-top:10px"> '+t["dev"]+' Lucelene Lopes\ | |
<a href="https://github.com/LuceleneL" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" fill="currentColor" class="bi bi-github" viewBox="0 0 16 16">\ | |
<path d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27s1.36.09 2 .27c1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.01 8.01 0 0 0 16 8c0-4.42-3.58-8-8-8"/>\ | |
</svg></a> <br> open source <a href="https://opensource.org/licenses/MIT" target="_blank" rel="noopener noreferrer"><img src="https://img.shields.io/badge/License-MIT-yellow.svg" alt="License: MIT"></a></p>',unsafe_allow_html=True) | |