Update modules/semantic_analysis.py
Browse files- modules/semantic_analysis.py +35 -18
modules/semantic_analysis.py
CHANGED
|
@@ -4,6 +4,7 @@ import spacy
|
|
| 4 |
import networkx as nx
|
| 5 |
import matplotlib.pyplot as plt
|
| 6 |
from collections import Counter
|
|
|
|
| 7 |
|
| 8 |
# Remove the global nlp model loading
|
| 9 |
|
|
@@ -180,50 +181,66 @@ def visualize_context_graph(doc, lang):
|
|
| 180 |
|
| 181 |
def visualize_semantic_relations(doc, lang):
|
| 182 |
G = nx.Graph()
|
| 183 |
-
word_freq =
|
| 184 |
-
|
| 185 |
|
|
|
|
| 186 |
for token in doc:
|
| 187 |
-
if token.pos_ in ['NOUN', 'VERB']
|
| 188 |
-
|
|
|
|
|
|
|
|
|
|
| 189 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 190 |
for token in doc:
|
| 191 |
-
if token.
|
| 192 |
-
if token.head.
|
| 193 |
-
|
|
|
|
|
|
|
|
|
|
| 194 |
|
| 195 |
-
fig, ax = plt.subplots(figsize=(36, 27))
|
| 196 |
-
pos = nx.spring_layout(G, k=0.7, iterations=50)
|
| 197 |
|
| 198 |
node_colors = [POS_COLORS.get(G.nodes[node]['pos'], '#CCCCCC') for node in G.nodes()]
|
| 199 |
|
| 200 |
nx.draw(G, pos, node_color=node_colors, with_labels=True,
|
| 201 |
-
node_size=10000,
|
| 202 |
-
font_size=16,
|
| 203 |
font_weight='bold',
|
| 204 |
arrows=True,
|
| 205 |
-
arrowsize=30,
|
| 206 |
-
width=3,
|
| 207 |
edge_color='gray',
|
| 208 |
-
ax=ax)
|
| 209 |
|
| 210 |
edge_labels = nx.get_edge_attributes(G, 'label')
|
| 211 |
-
nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels, font_size=14, ax=ax)
|
| 212 |
|
| 213 |
title = {
|
| 214 |
'es': "Relaciones Semánticas Relevantes",
|
| 215 |
'en': "Relevant Semantic Relations",
|
| 216 |
'fr': "Relations Sémantiques Pertinentes"
|
| 217 |
}
|
| 218 |
-
ax.set_title(title[lang], fontsize=24, fontweight='bold')
|
| 219 |
ax.axis('off')
|
| 220 |
|
| 221 |
legend_elements = [plt.Rectangle((0,0),1,1,fc=POS_COLORS.get(pos, '#CCCCCC'), edgecolor='none',
|
| 222 |
label=f"{POS_TRANSLATIONS[lang].get(pos, pos)}")
|
| 223 |
for pos in ['NOUN', 'VERB']]
|
| 224 |
-
ax.legend(handles=legend_elements, loc='center left', bbox_to_anchor=(1, 0.5), fontsize=16)
|
| 225 |
|
| 226 |
-
return fig
|
| 227 |
|
| 228 |
|
| 229 |
############################################################################################################################################
|
|
|
|
| 4 |
import networkx as nx
|
| 5 |
import matplotlib.pyplot as plt
|
| 6 |
from collections import Counter
|
| 7 |
+
from collections import defaultdic
|
| 8 |
|
| 9 |
# Remove the global nlp model loading
|
| 10 |
|
|
|
|
| 181 |
|
| 182 |
def visualize_semantic_relations(doc, lang):
|
| 183 |
G = nx.Graph()
|
| 184 |
+
word_freq = defaultdict(int)
|
| 185 |
+
lemma_to_word = {}
|
| 186 |
|
| 187 |
+
# Count frequencies of lemmas and map lemmas to their most common word form
|
| 188 |
for token in doc:
|
| 189 |
+
if token.pos_ in ['NOUN', 'VERB']:
|
| 190 |
+
lemma = token.lemma_.lower()
|
| 191 |
+
word_freq[lemma] += 1
|
| 192 |
+
if lemma not in lemma_to_word or token.text.lower() == lemma:
|
| 193 |
+
lemma_to_word[lemma] = token.text
|
| 194 |
|
| 195 |
+
# Get top 20 most frequent lemmas
|
| 196 |
+
top_lemmas = [lemma for lemma, _ in sorted(word_freq.items(), key=lambda x: x[1], reverse=True)[:20]]
|
| 197 |
+
|
| 198 |
+
# Add nodes
|
| 199 |
+
for lemma in top_lemmas:
|
| 200 |
+
word = lemma_to_word[lemma]
|
| 201 |
+
G.add_node(word, pos=doc.vocab[lemma].pos_)
|
| 202 |
+
|
| 203 |
+
# Add edges
|
| 204 |
for token in doc:
|
| 205 |
+
if token.lemma_.lower() in top_lemmas:
|
| 206 |
+
if token.head.lemma_.lower() in top_lemmas:
|
| 207 |
+
source = lemma_to_word[token.lemma_.lower()]
|
| 208 |
+
target = lemma_to_word[token.head.lemma_.lower()]
|
| 209 |
+
if source != target: # Avoid self-loops
|
| 210 |
+
G.add_edge(source, target, label=token.dep_)
|
| 211 |
|
| 212 |
+
fig, ax = plt.subplots(figsize=(36, 27))
|
| 213 |
+
pos = nx.spring_layout(G, k=0.7, iterations=50)
|
| 214 |
|
| 215 |
node_colors = [POS_COLORS.get(G.nodes[node]['pos'], '#CCCCCC') for node in G.nodes()]
|
| 216 |
|
| 217 |
nx.draw(G, pos, node_color=node_colors, with_labels=True,
|
| 218 |
+
node_size=10000,
|
| 219 |
+
font_size=16,
|
| 220 |
font_weight='bold',
|
| 221 |
arrows=True,
|
| 222 |
+
arrowsize=30,
|
| 223 |
+
width=3,
|
| 224 |
edge_color='gray',
|
| 225 |
+
ax=ax)
|
| 226 |
|
| 227 |
edge_labels = nx.get_edge_attributes(G, 'label')
|
| 228 |
+
nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels, font_size=14, ax=ax)
|
| 229 |
|
| 230 |
title = {
|
| 231 |
'es': "Relaciones Semánticas Relevantes",
|
| 232 |
'en': "Relevant Semantic Relations",
|
| 233 |
'fr': "Relations Sémantiques Pertinentes"
|
| 234 |
}
|
| 235 |
+
ax.set_title(title[lang], fontsize=24, fontweight='bold')
|
| 236 |
ax.axis('off')
|
| 237 |
|
| 238 |
legend_elements = [plt.Rectangle((0,0),1,1,fc=POS_COLORS.get(pos, '#CCCCCC'), edgecolor='none',
|
| 239 |
label=f"{POS_TRANSLATIONS[lang].get(pos, pos)}")
|
| 240 |
for pos in ['NOUN', 'VERB']]
|
| 241 |
+
ax.legend(handles=legend_elements, loc='center left', bbox_to_anchor=(1, 0.5), fontsize=16)
|
| 242 |
|
| 243 |
+
return fig
|
| 244 |
|
| 245 |
|
| 246 |
############################################################################################################################################
|