test2

Build error

App Files Files Community

AIdeaText commited on Jul 25, 2024

Commit

ed063c4

verified ·

1 Parent(s): 43b44fb

Update modules/semantic_analysis.py

Browse files

Files changed (1) hide show

modules/semantic_analysis.py +35 -18

modules/semantic_analysis.py CHANGED Viewed

@@ -4,6 +4,7 @@ import spacy
 import networkx as nx
 import matplotlib.pyplot as plt
 from collections import Counter
 # Remove the global nlp model loading
@@ -180,50 +181,66 @@ def visualize_context_graph(doc, lang):
 def visualize_semantic_relations(doc, lang):
     G = nx.Graph()
-    word_freq = Counter(token.text.lower() for token in doc if token.pos_ in ['NOUN', 'VERB'])
-    top_words = [word for word, _ in word_freq.most_common(20)]  # Top 20 most frequent nouns and verbs
     for token in doc:
-        if token.pos_ in ['NOUN', 'VERB'] and token.text.lower() in top_words:
-            G.add_node(token.text, pos=token.pos_)
     for token in doc:
-        if token.pos_ in ['NOUN', 'VERB'] and token.text.lower() in top_words:
-            if token.head.pos_ in ['NOUN', 'VERB'] and token.head.text.lower() in top_words:
-                G.add_edge(token.text, token.head.text, label=token.dep_)
-    fig, ax = plt.subplots(figsize=(36, 27))  # Create a figure and axis
-    pos = nx.spring_layout(G, k=0.7, iterations=50)  # Adjusted layout
     node_colors = [POS_COLORS.get(G.nodes[node]['pos'], '#CCCCCC') for node in G.nodes()]
     nx.draw(G, pos, node_color=node_colors, with_labels=True,
-            node_size=10000,  # Increased node size
-            font_size=16,  # Increased font size
             font_weight='bold',
             arrows=True,
-            arrowsize=30,  # Increased arrow size
-            width=3,  # Increased edge width
             edge_color='gray',
-            ax=ax)  # Draw on the axis
     edge_labels = nx.get_edge_attributes(G, 'label')
-    nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels, font_size=14, ax=ax)  # Increased edge label font size
     title = {
         'es': "Relaciones Semánticas Relevantes",
         'en': "Relevant Semantic Relations",
         'fr': "Relations Sémantiques Pertinentes"
     }
-    ax.set_title(title[lang], fontsize=24, fontweight='bold')  # Set title on the axis
     ax.axis('off')
     legend_elements = [plt.Rectangle((0,0),1,1,fc=POS_COLORS.get(pos, '#CCCCCC'), edgecolor='none',
                        label=f"{POS_TRANSLATIONS[lang].get(pos, pos)}")
                        for pos in ['NOUN', 'VERB']]
-    ax.legend(handles=legend_elements, loc='center left', bbox_to_anchor=(1, 0.5), fontsize=16)  # Add legend to the axis
-    return fig  # Return the figure instead of plt
 ############################################################################################################################################

 import networkx as nx
 import matplotlib.pyplot as plt
 from collections import Counter
+from collections import defaultdic
 # Remove the global nlp model loading
 def visualize_semantic_relations(doc, lang):
     G = nx.Graph()
+    word_freq = defaultdict(int)
+    lemma_to_word = {}
+    # Count frequencies of lemmas and map lemmas to their most common word form
     for token in doc:
+        if token.pos_ in ['NOUN', 'VERB']:
+            lemma = token.lemma_.lower()
+            word_freq[lemma] += 1
+            if lemma not in lemma_to_word or token.text.lower() == lemma:
+                lemma_to_word[lemma] = token.text
+    # Get top 20 most frequent lemmas
+    top_lemmas = [lemma for lemma, _ in sorted(word_freq.items(), key=lambda x: x[1], reverse=True)[:20]]
+    # Add nodes
+    for lemma in top_lemmas:
+        word = lemma_to_word[lemma]
+        G.add_node(word, pos=doc.vocab[lemma].pos_)
+    # Add edges
     for token in doc:
+        if token.lemma_.lower() in top_lemmas:
+            if token.head.lemma_.lower() in top_lemmas:
+                source = lemma_to_word[token.lemma_.lower()]
+                target = lemma_to_word[token.head.lemma_.lower()]
+                if source != target:  # Avoid self-loops
+                    G.add_edge(source, target, label=token.dep_)
+    fig, ax = plt.subplots(figsize=(36, 27))
+    pos = nx.spring_layout(G, k=0.7, iterations=50)
     node_colors = [POS_COLORS.get(G.nodes[node]['pos'], '#CCCCCC') for node in G.nodes()]
     nx.draw(G, pos, node_color=node_colors, with_labels=True,
+            node_size=10000,
+            font_size=16,
             font_weight='bold',
             arrows=True,
+            arrowsize=30,
+            width=3,
             edge_color='gray',
+            ax=ax)
     edge_labels = nx.get_edge_attributes(G, 'label')
+    nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels, font_size=14, ax=ax)
     title = {
         'es': "Relaciones Semánticas Relevantes",
         'en': "Relevant Semantic Relations",
         'fr': "Relations Sémantiques Pertinentes"
     }
+    ax.set_title(title[lang], fontsize=24, fontweight='bold')
     ax.axis('off')
     legend_elements = [plt.Rectangle((0,0),1,1,fc=POS_COLORS.get(pos, '#CCCCCC'), edgecolor='none',
                        label=f"{POS_TRANSLATIONS[lang].get(pos, pos)}")
                        for pos in ['NOUN', 'VERB']]
+    ax.legend(handles=legend_elements, loc='center left', bbox_to_anchor=(1, 0.5), fontsize=16)
+    return fig
 ############################################################################################################################################