Spaces:

vialibre
/

edia_we_es

Configuration error

App Files Files Community

nanom commited on Jan 26, 2023

Commit

1804cab

•

1 Parent(s): 33738cd

Improvement in the display of the graph axes labels. Minor fixes.

Browse files

Files changed (2) hide show

modules/module_BiasExplorer.py +45 -23
modules/utils.py +87 -1

modules/module_BiasExplorer.py CHANGED Viewed

@@ -5,14 +5,15 @@ import seaborn as sns
 import matplotlib.pyplot as plt
 from sklearn.decomposition import PCA
 from typing import List, Dict, Tuple, Optional, Any
-from modules.utils import normalize, cosine_similarity, project_params, take_two_sides_extreme_sorted
 __all__ = ['WordBiasExplorer', 'WEBiasExplorer2Spaces', 'WEBiasExplorer4Spaces']
 class WordBiasExplorer:
     def __init__(
         self,
-        embedding  # Embedding Class instance
     ) -> None:
         self.embedding = embedding
@@ -20,6 +21,7 @@ class WordBiasExplorer:
         self.positive_end = None
         self.negative_end = None
         self.DIRECTION_METHODS = ['single', 'sum', 'pca']
     def __copy__(
         self
@@ -245,12 +247,12 @@ class WordBiasExplorer:
         out_msj = ""
         if not word:
-            out_msj = "Error: Primero debe ingresar una palabra!"
         else:
             if word not in self.embedding:
-                out_msj = f"Error: La palabra '<b>{word}</b>' no se encuentra en el vocabulario!"
-        return out_msj
     def check_oov(
         self,
@@ -267,10 +269,11 @@ class WordBiasExplorer:
 class WEBiasExplorer2Spaces(WordBiasExplorer):
     def __init__(
         self,
-        embedding   # Embedding class instance
     ) -> None:
-        super().__init__(embedding)
     def calculate_bias(
         self,
@@ -283,7 +286,7 @@ class WEBiasExplorer2Spaces(WordBiasExplorer):
         for wordlist in wordlists:
             if not wordlist:
-                raise Exception('Debe ingresar al menos 1 palabra en las lista de palabras a diagnosticar, sesgo 1 y sesgo 2')
         err = self.check_oov(wordlists)
         if err:
@@ -368,9 +371,14 @@ class WEBiasExplorer2Spaces(WordBiasExplorer):
         plt.xticks(np.arange(-most_extream_projection,
                              most_extream_projection + axis_projection_step,
                              axis_projection_step))
-        xlabel = ('← {} {} {} →'.format(self.negative_end,
-                                        ' ' * 20,
-                                        self.positive_end))
         plt.xlabel(xlabel)
         plt.ylabel('Words')
@@ -381,10 +389,11 @@ class WEBiasExplorer2Spaces(WordBiasExplorer):
 class WEBiasExplorer4Spaces(WordBiasExplorer):
     def __init__(
         self,
-        embedding   # Embedding Class instance
     ) -> None:
-        super().__init__(embedding)
     def calculate_bias(
         self,
@@ -405,7 +414,7 @@ class WEBiasExplorer4Spaces(WordBiasExplorer):
         for wordlist in wordlists:
             if not wordlist:
-                raise Exception('¡Para graficar con 4 espacios, debe ingresar al menos 1 palabra en todas las listas!')
         err = self.check_oov(wordlists)
         if err:
@@ -495,9 +504,15 @@ class WEBiasExplorer4Spaces(WordBiasExplorer):
             projections_df['projection']
             .abs()
             .max(),
-            decimals=1)
-        sns.scatterplot(x='projection_x', y='projection_y', data=projections_df,
-                        palette=projections_df['color'])
         plt.xticks(np.arange(-most_extream_projection,
                              most_extream_projection + axis_projection_step,
@@ -505,13 +520,20 @@ class WEBiasExplorer4Spaces(WordBiasExplorer):
         for _, row in (projections_df.iterrows()):
             ax.annotate(
                 row['word'], (row['projection_x'], row['projection_y']))
-        x_label = '← {} {} {} →'.format(name_left,
-                                        ' ' * 20,
-                                        name_right)
-        y_label = '← {} {} {} →'.format(name_top,
-                                        ' ' * 20,
-                                        name_bottom)
         plt.xlabel(x_label)
         ax.xaxis.set_label_position('bottom')

 import matplotlib.pyplot as plt
 from sklearn.decomposition import PCA
 from typing import List, Dict, Tuple, Optional, Any
+from modules.utils import normalize, cosine_similarity, project_params, take_two_sides_extreme_sorted, axes_labels_format
 __all__ = ['WordBiasExplorer', 'WEBiasExplorer2Spaces', 'WEBiasExplorer4Spaces']
 class WordBiasExplorer:
     def __init__(
         self,
+        embedding,      # Embedding class instance
+        errorManager    # ErrorManager class instance
     ) -> None:
         self.embedding = embedding
         self.positive_end = None
         self.negative_end = None
         self.DIRECTION_METHODS = ['single', 'sum', 'pca']
+        self.errorManager = errorManager
     def __copy__(
         self
         out_msj = ""
         if not word:
+            out_msj = ['EMBEDDING_NO_WORD_PROVIDED']
         else:
             if word not in self.embedding:
+                out_msj = ['EMBEDDING_WORD_OOV', word]
+        return self.errorManager.process(out_msj)
     def check_oov(
         self,
 class WEBiasExplorer2Spaces(WordBiasExplorer):
     def __init__(
         self,
+        embedding,      # Embedding class instance
+        errorManager    # ErrorManager class instance
     ) -> None:
+        super().__init__(embedding, errorManager)
     def calculate_bias(
         self,
         for wordlist in wordlists:
             if not wordlist:
+                raise Exception('At least one word should be in the to diagnose list, bias 1 list and bias 2 list')
         err = self.check_oov(wordlists)
         if err:
         plt.xticks(np.arange(-most_extream_projection,
                              most_extream_projection + axis_projection_step,
                              axis_projection_step))
+        xlabel = axes_labels_format(
+            left=self.negative_end,
+            right=self.positive_end,
+            sep=' ' * 20,
+            word_wrap=3
+        )
         plt.xlabel(xlabel)
         plt.ylabel('Words')
 class WEBiasExplorer4Spaces(WordBiasExplorer):
     def __init__(
         self,
+        embedding,      # Embedding Class instance
+        errorManager    # ErrorManager class instance
     ) -> None:
+        super().__init__(embedding, errorManager)
     def calculate_bias(
         self,
         for wordlist in wordlists:
             if not wordlist:
+                raise Exception('To plot with 4 spaces, you must enter at least one word in all lists')
         err = self.check_oov(wordlists)
         if err:
             projections_df['projection']
             .abs()
             .max(),
+            decimals=1
+        )
+        sns.scatterplot(x='projection_x',
+                        y='projection_y',
+                        data=projections_df,
+                        # color=list(projections_df['color'].to_list()), # No se distinguen los colores
+                        color='blue'
+        )
         plt.xticks(np.arange(-most_extream_projection,
                              most_extream_projection + axis_projection_step,
         for _, row in (projections_df.iterrows()):
             ax.annotate(
                 row['word'], (row['projection_x'], row['projection_y']))
+        x_label = axes_labels_format(
+            left=name_left,
+            right=name_right,
+            sep=' ' * 20,
+            word_wrap=3
+        )
+        y_label = axes_labels_format(
+            left=name_top,
+            right=name_bottom,
+            sep=' ' * 20,
+            word_wrap=3
+        )
         plt.xlabel(x_label)
         ax.xaxis.set_label_position('bottom')

modules/utils.py CHANGED Viewed

@@ -1,5 +1,32 @@
 import numpy as np
 import pandas as pd
 def take_two_sides_extreme_sorted(
     df: pd.DataFrame,
@@ -55,4 +82,63 @@ def cosine_similarity(
     v_norm = np.linalg.norm(v)
     u_norm = np.linalg.norm(u)
     similarity = v @ u / (v_norm * u_norm)
-    return similarity

 import numpy as np
 import pandas as pd
+import pytz
+from datetime import datetime
+from typing import List
+class DateLogs:
+    def __init__(
+        self,
+        zone: str = "America/Argentina/Cordoba"
+    ) -> None:
+        self.time_zone = pytz.timezone(zone)
+    def full(
+        self
+    ) -> str:
+        now = datetime.now(self.time_zone)
+        return now.strftime("%H:%M:%S %d-%m-%Y")
+    def day(
+        self
+    ) -> str:
+        now = datetime.now(self.time_zone)
+        return now.strftime("%d-%m-%Y")
 def take_two_sides_extreme_sorted(
     df: pd.DataFrame,
     v_norm = np.linalg.norm(v)
     u_norm = np.linalg.norm(u)
     similarity = v @ u / (v_norm * u_norm)
+    return similarity
+def axes_labels_format(
+    left: str,
+    right: str,
+    sep: str,
+    word_wrap: int = 4
+) -> str:
+    def sparse(
+        word: str,
+        max_len: int
+    ) -> str:
+        diff = max_len-len(word)
+        rest = diff if diff > 0 else 0
+        return word+" "*rest
+    def gen_block(
+        list_: List[str],
+        n_rows:int,
+        n_cols:int
+    ) -> List[str]:
+        block = []
+        block_row = []
+        for r in range(n_rows):
+            for c in range(n_cols):
+                i = r * n_cols + c
+                w = list_[i] if i <= len(list_) - 1 else ""
+                block_row.append(w)
+                if (i+1) % n_cols == 0:
+                    block.append(block_row)
+                    block_row = []
+        return block
+    # Transform 'string' to list of string
+    l_list = [word.strip() for word in left.split(",") if word.strip() != ""]
+    r_list = [word.strip() for word in right.split(",") if word.strip() != ""]
+    # Get longest word, and longest_list
+    longest_list = max(len(l_list), len(r_list))
+    longest_word = len(max( max(l_list, key=len), max(r_list, key=len)))
+    # Creation of word blocks for each list
+    n_rows =  (longest_list // word_wrap) if longest_list % word_wrap == 0 else (longest_list // word_wrap) + 1
+    n_cols = word_wrap
+    l_block = gen_block(l_list, n_rows, n_cols)
+    r_block = gen_block(r_list, n_rows, n_cols)
+    # Transform list of list to sparse string
+    labels = ""
+    for i,(l,r) in enumerate(zip(l_block, r_block)):
+        line = ' '.join([sparse(w, longest_word) for w in l]) + sep + \
+                ' '.join([sparse(w, longest_word) for w in r])
+        labels += f"← {line} →\n" if i==0 else f"  {line}  \n"
+    return labels