|
|
|
import streamlit as st |
|
import spacy |
|
import networkx as nx |
|
import matplotlib.pyplot as plt |
|
from collections import Counter |
|
from collections import defaultdict |
|
|
|
|
|
|
|
|
|
POS_COLORS = { |
|
'ADJ': '#FFA07A', |
|
'ADP': '#98FB98', |
|
'ADV': '#87CEFA', |
|
'AUX': '#DDA0DD', |
|
'CCONJ': '#F0E68C', |
|
'DET': '#FFB6C1', |
|
'INTJ': '#FF6347', |
|
'NOUN': '#90EE90', |
|
'NUM': '#FAFAD2', |
|
'PART': '#D3D3D3', |
|
'PRON': '#FFA500', |
|
'PROPN': '#20B2AA', |
|
'SCONJ': '#DEB887', |
|
'SYM': '#7B68EE', |
|
'VERB': '#FF69B4', |
|
'X': '#A9A9A9', |
|
} |
|
|
|
POS_TRANSLATIONS = { |
|
'es': { |
|
'ADJ': 'Adjetivo', |
|
'ADP': 'Adposici贸n', |
|
'ADV': 'Adverbio', |
|
'AUX': 'Auxiliar', |
|
'CCONJ': 'Conjunci贸n Coordinante', |
|
'DET': 'Determinante', |
|
'INTJ': 'Interjecci贸n', |
|
'NOUN': 'Sustantivo', |
|
'NUM': 'N煤mero', |
|
'PART': 'Part铆cula', |
|
'PRON': 'Pronombre', |
|
'PROPN': 'Nombre Propio', |
|
'SCONJ': 'Conjunci贸n Subordinante', |
|
'SYM': 'S铆mbolo', |
|
'VERB': 'Verbo', |
|
'X': 'Otro', |
|
}, |
|
'en': { |
|
'ADJ': 'Adjective', |
|
'ADP': 'Adposition', |
|
'ADV': 'Adverb', |
|
'AUX': 'Auxiliary', |
|
'CCONJ': 'Coordinating Conjunction', |
|
'DET': 'Determiner', |
|
'INTJ': 'Interjection', |
|
'NOUN': 'Noun', |
|
'NUM': 'Number', |
|
'PART': 'Particle', |
|
'PRON': 'Pronoun', |
|
'PROPN': 'Proper Noun', |
|
'SCONJ': 'Subordinating Conjunction', |
|
'SYM': 'Symbol', |
|
'VERB': 'Verb', |
|
'X': 'Other', |
|
}, |
|
'fr': { |
|
'ADJ': 'Adjectif', |
|
'ADP': 'Adposition', |
|
'ADV': 'Adverbe', |
|
'AUX': 'Auxiliaire', |
|
'CCONJ': 'Conjonction de Coordination', |
|
'DET': 'D茅terminant', |
|
'INTJ': 'Interjection', |
|
'NOUN': 'Nom', |
|
'NUM': 'Nombre', |
|
'PART': 'Particule', |
|
'PRON': 'Pronom', |
|
'PROPN': 'Nom Propre', |
|
'SCONJ': 'Conjonction de Subordination', |
|
'SYM': 'Symbole', |
|
'VERB': 'Verbe', |
|
'X': 'Autre', |
|
} |
|
} |
|
|
|
|
|
|
|
ENTITY_LABELS = { |
|
'es': { |
|
"Personas": "lightblue", |
|
"Conceptos": "lightgreen", |
|
"Lugares": "lightcoral", |
|
"Fechas": "lightyellow" |
|
}, |
|
'en': { |
|
"People": "lightblue", |
|
"Concepts": "lightgreen", |
|
"Places": "lightcoral", |
|
"Dates": "lightyellow" |
|
}, |
|
'fr': { |
|
"Personnes": "lightblue", |
|
"Concepts": "lightgreen", |
|
"Lieux": "lightcoral", |
|
"Dates": "lightyellow" |
|
} |
|
} |
|
|
|
|
|
def count_pos(doc): |
|
return Counter(token.pos_ for token in doc if token.pos_ != 'PUNCT') |
|
|
|
|
|
|
|
def create_semantic_graph(doc, lang): |
|
G = nx.Graph() |
|
word_freq = defaultdict(int) |
|
lemma_to_word = {} |
|
lemma_to_pos = {} |
|
|
|
|
|
for token in doc: |
|
if token.pos_ in ['NOUN', 'VERB']: |
|
lemma = token.lemma_.lower() |
|
word_freq[lemma] += 1 |
|
if lemma not in lemma_to_word or token.text.lower() == lemma: |
|
lemma_to_word[lemma] = token.text |
|
lemma_to_pos[lemma] = token.pos_ |
|
|
|
|
|
top_lemmas = [lemma for lemma, _ in sorted(word_freq.items(), key=lambda x: x[1], reverse=True)[:20]] |
|
|
|
|
|
for lemma in top_lemmas: |
|
word = lemma_to_word[lemma] |
|
G.add_node(word, pos=lemma_to_pos[lemma]) |
|
|
|
|
|
for token in doc: |
|
if token.lemma_.lower() in top_lemmas: |
|
if token.head.lemma_.lower() in top_lemmas: |
|
source = lemma_to_word[token.lemma_.lower()] |
|
target = lemma_to_word[token.head.lemma_.lower()] |
|
if source != target: |
|
G.add_edge(source, target, label=token.dep_) |
|
|
|
return G, word_freq |
|
|
|
|
|
|
|
def visualize_semantic_relations(doc, lang): |
|
G = nx.Graph() |
|
word_freq = defaultdict(int) |
|
lemma_to_word = {} |
|
lemma_to_pos = {} |
|
|
|
|
|
for token in doc: |
|
if token.pos_ in ['NOUN', 'VERB']: |
|
lemma = token.lemma_.lower() |
|
word_freq[lemma] += 1 |
|
if lemma not in lemma_to_word or token.text.lower() == lemma: |
|
lemma_to_word[lemma] = token.text |
|
lemma_to_pos[lemma] = token.pos_ |
|
|
|
|
|
top_lemmas = [lemma for lemma, _ in sorted(word_freq.items(), key=lambda x: x[1], reverse=True)[:20]] |
|
|
|
|
|
for lemma in top_lemmas: |
|
word = lemma_to_word[lemma] |
|
G.add_node(word, pos=lemma_to_pos[lemma]) |
|
|
|
|
|
for token in doc: |
|
if token.lemma_.lower() in top_lemmas: |
|
if token.head.lemma_.lower() in top_lemmas: |
|
source = lemma_to_word[token.lemma_.lower()] |
|
target = lemma_to_word[token.head.lemma_.lower()] |
|
if source != target: |
|
G.add_edge(source, target, label=token.dep_) |
|
|
|
fig, ax = plt.subplots(figsize=(36, 27)) |
|
pos = nx.spring_layout(G, k=0.7, iterations=50) |
|
|
|
node_colors = [POS_COLORS.get(G.nodes[node]['pos'], '#CCCCCC') for node in G.nodes()] |
|
|
|
nx.draw(G, pos, node_color=node_colors, with_labels=True, |
|
node_size=10000, |
|
font_size=16, |
|
font_weight='bold', |
|
arrows=True, |
|
arrowsize=30, |
|
width=3, |
|
edge_color='gray', |
|
ax=ax) |
|
|
|
edge_labels = nx.get_edge_attributes(G, 'label') |
|
nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels, font_size=14, ax=ax) |
|
|
|
title = { |
|
'es': "Relaciones Sem谩nticas Relevantes", |
|
'en': "Relevant Semantic Relations", |
|
'fr': "Relations S茅mantiques Pertinentes" |
|
} |
|
ax.set_title(title[lang], fontsize=24, fontweight='bold') |
|
ax.axis('off') |
|
|
|
legend_elements = [plt.Rectangle((0,0),1,1,fc=POS_COLORS.get(pos, '#CCCCCC'), edgecolor='none', |
|
label=f"{POS_TRANSLATIONS[lang].get(pos, pos)}") |
|
for pos in ['NOUN', 'VERB']] |
|
ax.legend(handles=legend_elements, loc='center left', bbox_to_anchor=(1, 0.5), fontsize=16) |
|
|
|
return fig |
|
|
|
|
|
def perform_semantic_analysis(text, nlp, lang): |
|
doc = nlp(text) |
|
|
|
|
|
print(f"Entidades encontradas ({lang}):") |
|
for ent in doc.ents: |
|
print(f"{ent.text} - {ent.label_}") |
|
|
|
relations_graph = visualize_semantic_relations(doc, lang) |
|
return relations_graph |
|
|
|
__all__ = ['visualize_semantic_relations', 'create_semantic_graph', 'POS_COLORS', 'POS_TRANSLATIONS'] |