|
|
|
import streamlit as st |
|
import spacy |
|
import networkx as nx |
|
import matplotlib.pyplot as plt |
|
from collections import Counter |
|
|
|
|
|
|
|
|
|
POS_COLORS = { |
|
'ADJ': '#FFA07A', |
|
'ADP': '#98FB98', |
|
'ADV': '#87CEFA', |
|
'AUX': '#DDA0DD', |
|
'CCONJ': '#F0E68C', |
|
'DET': '#FFB6C1', |
|
'INTJ': '#FF6347', |
|
'NOUN': '#90EE90', |
|
'NUM': '#FAFAD2', |
|
'PART': '#D3D3D3', |
|
'PRON': '#FFA500', |
|
'PROPN': '#20B2AA', |
|
'SCONJ': '#DEB887', |
|
'SYM': '#7B68EE', |
|
'VERB': '#FF69B4', |
|
'X': '#A9A9A9', |
|
} |
|
|
|
POS_TRANSLATIONS = { |
|
'es': { |
|
'ADJ': 'Adjetivo', |
|
'ADP': 'Adposición', |
|
'ADV': 'Adverbio', |
|
'AUX': 'Auxiliar', |
|
'CCONJ': 'Conjunción Coordinante', |
|
'DET': 'Determinante', |
|
'INTJ': 'Interjección', |
|
'NOUN': 'Sustantivo', |
|
'NUM': 'Número', |
|
'PART': 'Partícula', |
|
'PRON': 'Pronombre', |
|
'PROPN': 'Nombre Propio', |
|
'SCONJ': 'Conjunción Subordinante', |
|
'SYM': 'Símbolo', |
|
'VERB': 'Verbo', |
|
'X': 'Otro', |
|
}, |
|
'en': { |
|
'ADJ': 'Adjective', |
|
'ADP': 'Adposition', |
|
'ADV': 'Adverb', |
|
'AUX': 'Auxiliary', |
|
'CCONJ': 'Coordinating Conjunction', |
|
'DET': 'Determiner', |
|
'INTJ': 'Interjection', |
|
'NOUN': 'Noun', |
|
'NUM': 'Number', |
|
'PART': 'Particle', |
|
'PRON': 'Pronoun', |
|
'PROPN': 'Proper Noun', |
|
'SCONJ': 'Subordinating Conjunction', |
|
'SYM': 'Symbol', |
|
'VERB': 'Verb', |
|
'X': 'Other', |
|
}, |
|
'fr': { |
|
'ADJ': 'Adjectif', |
|
'ADP': 'Adposition', |
|
'ADV': 'Adverbe', |
|
'AUX': 'Auxiliaire', |
|
'CCONJ': 'Conjonction de Coordination', |
|
'DET': 'Déterminant', |
|
'INTJ': 'Interjection', |
|
'NOUN': 'Nom', |
|
'NUM': 'Nombre', |
|
'PART': 'Particule', |
|
'PRON': 'Pronom', |
|
'PROPN': 'Nom Propre', |
|
'SCONJ': 'Conjonction de Subordination', |
|
'SYM': 'Symbole', |
|
'VERB': 'Verbe', |
|
'X': 'Autre', |
|
} |
|
} |
|
|
|
|
|
def count_pos(doc): |
|
return Counter(token.pos_ for token in doc if token.pos_ != 'PUNCT') |
|
|
|
def extract_entities(doc): |
|
entities = { |
|
"Personas": [], |
|
"Conceptos": [], |
|
"Lugares": [], |
|
"Fechas": [] |
|
} |
|
|
|
for ent in doc.ents: |
|
if ent.label_ == "PER": |
|
entities["Personas"].append(ent.text) |
|
elif ent.label_ in ["LOC", "GPE"]: |
|
entities["Lugares"].append(ent.text) |
|
elif ent.label_ == "DATE": |
|
entities["Fechas"].append(ent.text) |
|
else: |
|
entities["Conceptos"].append(ent.text) |
|
|
|
return entities |
|
|
|
def visualize_context_graph(doc, lang): |
|
G = nx.Graph() |
|
entities = extract_entities(doc) |
|
|
|
|
|
for category, items in entities.items(): |
|
for item in items: |
|
G.add_node(item, category=category) |
|
|
|
|
|
for sent in doc.sents: |
|
sent_entities = [ent.text for ent in sent.ents if ent.text in G.nodes()] |
|
for i in range(len(sent_entities)): |
|
for j in range(i+1, len(sent_entities)): |
|
G.add_edge(sent_entities[i], sent_entities[j]) |
|
|
|
|
|
plt.figure(figsize=(20, 15)) |
|
pos = nx.spring_layout(G, k=0.5, iterations=50) |
|
|
|
color_map = {"Personas": "lightblue", "Conceptos": "lightgreen", "Lugares": "lightcoral", "Fechas": "lightyellow"} |
|
node_colors = [color_map[G.nodes[node]['category']] for node in G.nodes()] |
|
|
|
nx.draw(G, pos, node_color=node_colors, with_labels=True, node_size=3000, font_size=8, font_weight='bold') |
|
|
|
|
|
legend_elements = [plt.Rectangle((0,0),1,1,fc=color, edgecolor='none') for color in color_map.values()] |
|
plt.legend(legend_elements, color_map.keys(), loc='upper left', bbox_to_anchor=(1, 1)) |
|
|
|
plt.title("Análisis de Contexto" if lang == 'es' else "Context Analysis" if lang == 'en' else "Analyse de Contexte", fontsize=20) |
|
plt.axis('off') |
|
|
|
return plt |
|
|
|
def create_semantic_graph(doc, lang): |
|
G = nx.Graph() |
|
pos_counts = count_pos(doc) |
|
|
|
for token in doc: |
|
if token.pos_ != 'PUNCT': |
|
G.add_node(token.text, |
|
pos=token.pos_, |
|
color=POS_COLORS.get(token.pos_, '#CCCCCC'), |
|
size=pos_counts.get(token.pos_, 1) * 100) |
|
|
|
for token in doc: |
|
if token.dep_ != "ROOT" and token.head.text in G.nodes and token.text in G.nodes: |
|
G.add_edge(token.head.text, token.text, label=token.dep_) |
|
|
|
return G, pos_counts |
|
|
|
def visualize_semantic_relations(doc, lang): |
|
G, pos_counts = create_semantic_graph(doc, lang) |
|
|
|
plt.figure(figsize=(24, 18)) |
|
pos = nx.spring_layout(G, k=0.9, iterations=50) |
|
|
|
node_colors = [G.nodes[node].get('color', '#CCCCCC') for node in G.nodes()] |
|
node_sizes = [G.nodes[node].get('size', 100) for node in G.nodes()] |
|
|
|
nx.draw(G, pos, node_color=node_colors, node_size=node_sizes, with_labels=True, |
|
font_size=8, font_weight='bold', arrows=True, arrowsize=20, width=2, edge_color='gray') |
|
|
|
edge_labels = nx.get_edge_attributes(G, 'label') |
|
nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels, font_size=8) |
|
|
|
plt.title("Análisis de Relaciones Semánticas" if lang == 'es' else "Semantic Relations Analysis" if lang == 'en' else "Analyse des Relations Sémantiques", |
|
fontsize=20, fontweight='bold') |
|
plt.axis('off') |
|
|
|
legend_elements = [plt.Rectangle((0,0),1,1, facecolor=color, edgecolor='none', |
|
label=f"{POS_TRANSLATIONS[lang].get(pos, pos)} ({pos_counts.get(pos, 0)})") |
|
for pos, color in POS_COLORS.items() if pos in pos_counts] |
|
plt.legend(handles=legend_elements, loc='center left', bbox_to_anchor=(1, 0.5), fontsize=12) |
|
|
|
return plt |
|
|
|
def perform_semantic_analysis(text, nlp, lang): |
|
doc = nlp(text) |
|
context_graph = visualize_context_graph(doc, lang) |
|
relations_graph = visualize_semantic_relations(doc, lang) |
|
return context_graph, relations_graph |