test2

Paused

App Files Files Community

test2 / app.py

AIdeaText

Update app.py

568dae8 verified 8 months ago

raw

history blame

3.85 kB


	# app.py
	!pip install https://huggingface.co/spacy/es_core_news_lg/resolve/main/es_core_news_lg-any-py3-none-any.whl

	import os
	os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'
	os.environ['KMP_DUPLICATE_LIB_OK']='TRUE'

	import streamlit as st
	import spacy
	from spacy import displacy
	import re
	import pydantic
	import numpy as np
	import thinc

	st.write(f"spaCy version: {spacy.__version__}")
	st.write(f"Pydantic version: {pydantic.__version__}")
	st.write(f"NumPy version: {np.__version__}")
	st.write(f"Thinc version: {thinc.__version__}")

	from modules.syntax_analysis import highlight_repeated_words, get_repeated_words_colors, POS_COLORS, POS_TRANSLATIONS, visualize_syntax

	#@st.cache_resource
	#def load_spacy_model():
	# return spacy.load("es_core_news_lg")

	@st.cache_resource
	def load_spacy_model():
	try:
	nlp = spacy.load("es_core_news_lg")
	except IOError:
	st.info("Downloading spaCy model...")
	spacy.cli.download("es_core_news_lg")
	nlp = spacy.load("es_core_news_lg")
	return nlp

	# Load spaCy model
	nlp = spacy.load("es_core_news_lg")

	# Configure the page to use the full width
	st.set_page_config(layout="wide", page_title="AIdeaText")

	st.markdown("### AIdeaText - Advanced Text Analysis Tool")

	# First horizontal band: Text Input
	sentence_input = st.text_area("Ingresa un texto para analizar (max 5,000 words):", height=150)

	if st.button("Analizar texto"):
	if sentence_input:
	doc = nlp(sentence_input)

	# Second horizontal band: Highlighted Repeated Words
	with st.expander("Palabras repetidas", expanded=True):
	#st.markdown("#### Palabras repetidas")
	#st.write("En esta sección, se indican las palabras repetidas por categoría gramatical.")
	word_colors = get_repeated_words_colors(doc)
	highlighted_text = highlight_repeated_words(doc, word_colors)
	st.markdown(highlighted_text, unsafe_allow_html=True)

	# Legend for grammatical categories
	st.markdown("##### Legenda: Categorías gramaticales")
	legend_html = "<div style='display: flex; flex-wrap: wrap;'>"
	for pos, color in POS_COLORS.items():
	if pos in POS_TRANSLATIONS:
	legend_html += f"<div style='margin-right: 10px;'><span style='background-color: {color}; padding: 2px 5px;'>{POS_TRANSLATIONS[pos]}</span></div>"
	legend_html += "</div>"
	st.markdown(legend_html, unsafe_allow_html=True)

	# Third horizontal band: Arc Diagram
	with st.expander("Análisis sintáctico: Diagrama de arco", expanded=True):
	#st.write("This section displays the syntactic structure of each sentence using arc diagrams.")

	sentences = list(doc.sents)
	for i, sent in enumerate(sentences):
	st.subheader(f"Sentence {i+1}")
	html = displacy.render(sent, style="dep", options={"distance": 100})
	# Reduce the height of the SVG
	html = html.replace('height="375"', 'height="200"')
	# Reduce the top margin of the SVG
	html = re.sub(r'<svg[^>]*>', lambda m: m.group(0).replace('height="450"', 'height="300"'), html)
	html = re.sub(r'<g [^>]*transform="translate\((\d+),(\d+)\)"',
	lambda m: f'<g transform="translate({m.group(1)},50)"', html)
	st.write(html, unsafe_allow_html=True)

	# Fourth horizontal band: Network graph
	with st.expander("Análisis sintáctico: Diagrama de red", expanded=True):
	st.markdown("#### Análisis sintáctico: Diagrama de red")
	st.write("Esta sección muestra la estructura sintáctica del texto completo usando un diagrama de red.")

	fig = visualize_syntax(sentence_input)
	st.pyplot(fig)