|
|
|
|
|
!pip install https://huggingface.co/spacy/es_core_news_lg/resolve/main/es_core_news_lg-any-py3-none-any.whl |
|
|
|
import os |
|
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0' |
|
os.environ['KMP_DUPLICATE_LIB_OK']='TRUE' |
|
|
|
import streamlit as st |
|
import spacy |
|
from spacy import displacy |
|
import re |
|
import pydantic |
|
import numpy as np |
|
import thinc |
|
|
|
st.write(f"spaCy version: {spacy.__version__}") |
|
st.write(f"Pydantic version: {pydantic.__version__}") |
|
st.write(f"NumPy version: {np.__version__}") |
|
st.write(f"Thinc version: {thinc.__version__}") |
|
|
|
from modules.syntax_analysis import highlight_repeated_words, get_repeated_words_colors, POS_COLORS, POS_TRANSLATIONS, visualize_syntax |
|
|
|
|
|
|
|
|
|
|
|
@st.cache_resource |
|
def load_spacy_model(): |
|
try: |
|
nlp = spacy.load("es_core_news_lg") |
|
except IOError: |
|
st.info("Downloading spaCy model...") |
|
spacy.cli.download("es_core_news_lg") |
|
nlp = spacy.load("es_core_news_lg") |
|
return nlp |
|
|
|
|
|
nlp = spacy.load("es_core_news_lg") |
|
|
|
|
|
st.set_page_config(layout="wide", page_title="AIdeaText") |
|
|
|
st.markdown("### AIdeaText - Advanced Text Analysis Tool") |
|
|
|
|
|
sentence_input = st.text_area("Ingresa un texto para analizar (max 5,000 words):", height=150) |
|
|
|
if st.button("Analizar texto"): |
|
if sentence_input: |
|
doc = nlp(sentence_input) |
|
|
|
|
|
with st.expander("Palabras repetidas", expanded=True): |
|
|
|
|
|
word_colors = get_repeated_words_colors(doc) |
|
highlighted_text = highlight_repeated_words(doc, word_colors) |
|
st.markdown(highlighted_text, unsafe_allow_html=True) |
|
|
|
|
|
st.markdown("##### Legenda: Categorías gramaticales") |
|
legend_html = "<div style='display: flex; flex-wrap: wrap;'>" |
|
for pos, color in POS_COLORS.items(): |
|
if pos in POS_TRANSLATIONS: |
|
legend_html += f"<div style='margin-right: 10px;'><span style='background-color: {color}; padding: 2px 5px;'>{POS_TRANSLATIONS[pos]}</span></div>" |
|
legend_html += "</div>" |
|
st.markdown(legend_html, unsafe_allow_html=True) |
|
|
|
|
|
with st.expander("Análisis sintáctico: Diagrama de arco", expanded=True): |
|
|
|
|
|
sentences = list(doc.sents) |
|
for i, sent in enumerate(sentences): |
|
st.subheader(f"Sentence {i+1}") |
|
html = displacy.render(sent, style="dep", options={"distance": 100}) |
|
|
|
html = html.replace('height="375"', 'height="200"') |
|
|
|
html = re.sub(r'<svg[^>]*>', lambda m: m.group(0).replace('height="450"', 'height="300"'), html) |
|
html = re.sub(r'<g [^>]*transform="translate\((\d+),(\d+)\)"', |
|
lambda m: f'<g transform="translate({m.group(1)},50)"', html) |
|
st.write(html, unsafe_allow_html=True) |
|
|
|
|
|
with st.expander("Análisis sintáctico: Diagrama de red", expanded=True): |
|
st.markdown("#### Análisis sintáctico: Diagrama de red") |
|
st.write("Esta sección muestra la estructura sintáctica del texto completo usando un diagrama de red.") |
|
|
|
fig = visualize_syntax(sentence_input) |
|
st.pyplot(fig) |