|  |  | 
					
						
						|  | import spacy | 
					
						
						|  | import streamlit as st | 
					
						
						|  | import networkx as nx | 
					
						
						|  | import matplotlib.pyplot as plt | 
					
						
						|  | from collections import Counter | 
					
						
						|  |  | 
					
						
						|  | @st.cache_resource | 
					
						
						|  | def load_spacy_model(): | 
					
						
						|  | return spacy.load("es_core_news_lg") | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | nlp = spacy.load("es_core_news_lg") | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | POS_COLORS = { | 
					
						
						|  | 'ADJ': '#FFA07A', | 
					
						
						|  | 'ADP': '#98FB98', | 
					
						
						|  | 'ADV': '#87CEFA', | 
					
						
						|  | 'AUX': '#DDA0DD', | 
					
						
						|  | 'CCONJ': '#F0E68C', | 
					
						
						|  | 'DET': '#FFB6C1', | 
					
						
						|  | 'INTJ': '#FF6347', | 
					
						
						|  | 'NOUN': '#90EE90', | 
					
						
						|  | 'NUM': '#FAFAD2', | 
					
						
						|  | 'PART': '#D3D3D3', | 
					
						
						|  | 'PRON': '#FFA500', | 
					
						
						|  | 'PROPN': '#20B2AA', | 
					
						
						|  | 'SCONJ': '#DEB887', | 
					
						
						|  | 'SYM': '#7B68EE', | 
					
						
						|  | 'VERB': '#FF69B4', | 
					
						
						|  | 'X': '#A9A9A9', | 
					
						
						|  | } | 
					
						
						|  |  | 
					
						
						|  | POS_TRANSLATIONS = { | 
					
						
						|  | 'ADJ': 'Adjetivo', | 
					
						
						|  | 'ADP': 'Advposici贸n', | 
					
						
						|  | 'ADV': 'Adverbio', | 
					
						
						|  | 'AUX': 'Auxiliar', | 
					
						
						|  | 'CCONJ': 'Conjunci贸n Coordinante', | 
					
						
						|  | 'DET': 'Determinante', | 
					
						
						|  | 'INTJ': 'Interjecci贸n', | 
					
						
						|  | 'NOUN': 'Sustantivo', | 
					
						
						|  | 'NUM': 'N煤mero', | 
					
						
						|  | 'PART': 'Part铆cula', | 
					
						
						|  | 'PRON': 'Pronombre', | 
					
						
						|  | 'PROPN': 'Nombre Propio', | 
					
						
						|  | 'SCONJ': 'Conjunci贸n Subordinante', | 
					
						
						|  | 'SYM': 'S铆mbolo', | 
					
						
						|  | 'VERB': 'Verbo', | 
					
						
						|  | 'X': 'Otro', | 
					
						
						|  | } | 
					
						
						|  |  | 
					
						
						|  | def count_pos(doc): | 
					
						
						|  | return Counter(token.pos_ for token in doc if token.pos_ != 'PUNCT') | 
					
						
						|  |  | 
					
						
						|  | def create_syntax_graph(doc): | 
					
						
						|  | G = nx.DiGraph() | 
					
						
						|  | pos_counts = count_pos(doc) | 
					
						
						|  | word_nodes = {} | 
					
						
						|  | word_colors = {} | 
					
						
						|  |  | 
					
						
						|  | for token in doc: | 
					
						
						|  | if token.pos_ != 'PUNCT': | 
					
						
						|  | lower_text = token.text.lower() | 
					
						
						|  | if lower_text not in word_nodes: | 
					
						
						|  | node_id = len(word_nodes) | 
					
						
						|  | word_nodes[lower_text] = node_id | 
					
						
						|  | color = POS_COLORS.get(token.pos_, '#FFFFFF') | 
					
						
						|  | word_colors[lower_text] = color | 
					
						
						|  | G.add_node(node_id, | 
					
						
						|  | label=f"{token.text}\n[{POS_TRANSLATIONS.get(token.pos_, token.pos_)}]", | 
					
						
						|  | pos=token.pos_, | 
					
						
						|  | size=pos_counts[token.pos_] * 500, | 
					
						
						|  | color=color) | 
					
						
						|  |  | 
					
						
						|  | if token.dep_ != "ROOT" and token.head.pos_ != 'PUNCT': | 
					
						
						|  | head_id = word_nodes.get(token.head.text.lower()) | 
					
						
						|  | if head_id is not None: | 
					
						
						|  | G.add_edge(head_id, word_nodes[lower_text], label=token.dep_) | 
					
						
						|  |  | 
					
						
						|  | return G, word_colors | 
					
						
						|  |  | 
					
						
						|  | def visualize_syntax_graph(doc): | 
					
						
						|  | G, word_colors = create_syntax_graph(doc) | 
					
						
						|  |  | 
					
						
						|  | plt.figure(figsize=(20, 15)) | 
					
						
						|  | pos = nx.spring_layout(G, k=2, iterations=100) | 
					
						
						|  |  | 
					
						
						|  | node_colors = [data['color'] for _, data in G.nodes(data=True)] | 
					
						
						|  | node_sizes = [data['size'] for _, data in G.nodes(data=True)] | 
					
						
						|  |  | 
					
						
						|  | nx.draw(G, pos, with_labels=False, node_color=node_colors, node_size=node_sizes, arrows=True) | 
					
						
						|  |  | 
					
						
						|  | nx.draw_networkx_labels(G, pos, {node: data['label'] for node, data in G.nodes(data=True)}, font_size=8) | 
					
						
						|  |  | 
					
						
						|  | edge_labels = nx.get_edge_attributes(G, 'label') | 
					
						
						|  | nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels, font_size=8) | 
					
						
						|  |  | 
					
						
						|  | plt.title("An谩lisis Sint谩ctico") | 
					
						
						|  | plt.axis('off') | 
					
						
						|  |  | 
					
						
						|  | legend_elements = [plt.Rectangle((0,0),1,1, facecolor=color, edgecolor='none', label=f"{POS_TRANSLATIONS[pos]} ({count_pos(doc)[pos]})") | 
					
						
						|  | for pos, color in POS_COLORS.items() if pos in set(nx.get_node_attributes(G, 'pos').values())] | 
					
						
						|  | plt.legend(handles=legend_elements, loc='center left', bbox_to_anchor=(1, 0.5)) | 
					
						
						|  |  | 
					
						
						|  | return plt | 
					
						
						|  |  | 
					
						
						|  | def visualize_syntax(text): | 
					
						
						|  | max_tokens = 5000 | 
					
						
						|  | doc = nlp(text) | 
					
						
						|  | if len(doc) > max_tokens: | 
					
						
						|  | doc = nlp(text[:max_tokens]) | 
					
						
						|  | print(f"Warning: The input text is too long. Only the first {max_tokens} tokens will be visualized.") | 
					
						
						|  | return visualize_syntax_graph(doc) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | def get_repeated_words_colors(doc): | 
					
						
						|  | word_counts = Counter(token.text.lower() for token in doc if token.pos_ != 'PUNCT') | 
					
						
						|  | repeated_words = {word: count for word, count in word_counts.items() if count > 1} | 
					
						
						|  |  | 
					
						
						|  | word_colors = {} | 
					
						
						|  | for token in doc: | 
					
						
						|  | if token.text.lower() in repeated_words: | 
					
						
						|  | word_colors[token.text.lower()] = POS_COLORS.get(token.pos_, '#FFFFFF') | 
					
						
						|  |  | 
					
						
						|  | return word_colors | 
					
						
						|  |  | 
					
						
						|  | def highlight_repeated_words(doc, word_colors): | 
					
						
						|  | highlighted_text = [] | 
					
						
						|  | for token in doc: | 
					
						
						|  | if token.text.lower() in word_colors: | 
					
						
						|  | color = word_colors[token.text.lower()] | 
					
						
						|  | highlighted_text.append(f'<span style="background-color: {color};">{token.text}</span>') | 
					
						
						|  | else: | 
					
						
						|  | highlighted_text.append(token.text) | 
					
						
						|  | return ' '.join(highlighted_text) |