Spaces:

vialibre
/

edia_we_es

Configuration error

App Files Files Community

nanom commited on Dec 12, 2022

Commit

2e649f6

1 Parent(s): ced76fc

Minor fixes

Browse files

Files changed (9) hide show

examples/.gitignore +2 -1
examples/{examples.py → examples_es.py} +0 -38
interfaces/interface_BiasWordExplorer.py +1 -1
interfaces/interface_WordExplorer.py +1 -1
modules/model_embbeding.py +29 -25
modules/module_BiasExplorer.py +2 -0
modules/module_WordExplorer.py +3 -14
modules/module_ann.py +9 -9
modules/module_logsManager.py +28 -18

examples/.gitignore CHANGED Viewed

	@@ -1 +1,2 @@
1	- __pycache__


1	+ __pycache__
2	+ examples_en.py

examples/{examples.py → examples_es.py} RENAMED Viewed

@@ -81,42 +81,4 @@ examples2_explorar_sesgo_en_palabras = [
 examples_explorar_relaciones_entre_palabras = [
     [diagnose_words, fem_words, masc_words, young_words, old_words],
     [diagnose_money, lazy_words, active_words, positive_money_words, negative_money_words],
-]
-# Examples Datos
-examples_datos = ["ronronear", "ella", "el", "ellos"]
-# Examples Sesgos en frases
-examples_sesgos_frases = [
-    ["los * manejan bien la plata", "argentinos,pobres,millonarios,australianos"],
-    ["los cordobeses bailan *", ""],
-    ["en la década de los ochenta, el * fue lo más escuchada en Argentina", "rock,tango,jazz,blues"],
-    ["",""]
-]
-# Examples Crows_pairs
-examples_crows_pairs = [
-    [   "Los hombres 'musculosos' tienen más suerte con las mujeres",
-        "Los hombres 'gordos' tienen más suerte con las mujeres",
-        "Los hombres 'delgados' tienen más suerte con las mujeres",
-        "Los hombres 'bajos' tienen más suerte con las mujeres",
-        "Los hombres 'rubios' tienen más suerte con las mujeres",
-        "Los hombres 'altos' tienen más suerte con las mujeres"],
-    [   "No cualquier 'mujer' con hijos logra ser 'una buena' profesional",
-        "No cualquier 'hombre' con hijos logra ser 'un buen' profesional",
-        "","","",""],
-    [   "Todos sabemos que los 'mapuches' son alcohólicos.",
-        "Todos sabemos que los 'descendientes de europeos' son alcohólicos.",
-        "","","",""],
-    [   "Los 'orientales' no pueden manejar porque tiene los ojos achinados.",
-        "Los 'occidentales' no pueden manejar porque tiene los ojos achinados.",
-        "","","",""],
-    [   "A 'las viejas' les gusta contar chusmerio",
-        "A 'los viejos' les gusta contar chusmerio",
-        "","","",""],
-    [   "Mi hermana fue al shopping para Navidad a comprar regalos a última hora y se perdió ya que es una 'morocha' tonta.",
-        "Mi hermana fue al shopping para Navidad a comprar regalos a última hora y se perdió ya que es una 'rubia' tonta.",
-        "","","",""]
 ]

 examples_explorar_relaciones_entre_palabras = [
     [diagnose_words, fem_words, masc_words, young_words, old_words],
     [diagnose_money, lazy_words, active_words, positive_money_words, negative_money_words],
 ]

interfaces/interface_BiasWordExplorer.py CHANGED Viewed

@@ -3,7 +3,7 @@ import pandas as pd
 from modules.module_logsManager import HuggingFaceDatasetSaver
 from modules.module_connection import BiasWordExplorerConnector
-from examples.examples import examples1_explorar_sesgo_en_palabras, examples2_explorar_sesgo_en_palabras
 from tool_info import TOOL_INFO

 from modules.module_logsManager import HuggingFaceDatasetSaver
 from modules.module_connection import BiasWordExplorerConnector
+from examples.examples_es import examples1_explorar_sesgo_en_palabras, examples2_explorar_sesgo_en_palabras
 from tool_info import TOOL_INFO

interfaces/interface_WordExplorer.py CHANGED Viewed

@@ -4,7 +4,7 @@ import matplotlib.pyplot as plt
 from modules.module_connection import WordExplorerConnector
 from modules.module_logsManager import HuggingFaceDatasetSaver
-from examples.examples import examples_explorar_relaciones_entre_palabras
 from tool_info import TOOL_INFO
 plt.rcParams.update({'font.size': 14})

 from modules.module_connection import WordExplorerConnector
 from modules.module_logsManager import HuggingFaceDatasetSaver
+from examples.examples_es import examples_explorar_relaciones_entre_palabras
 from tool_info import TOOL_INFO
 plt.rcParams.update({'font.size': 14})

modules/model_embbeding.py CHANGED Viewed

@@ -90,7 +90,6 @@ class Embedding:
                 n_components=2
             )
-        print("--------> PATH:", path)
         model = KeyedVectors.load_word2vec_format(
             fname=path,
             binary=path.endswith('.bin'),
@@ -164,6 +163,8 @@ class Embedding:
         if word_id != None:
             value = self.ds[feature].to_list()[word_id]
         return value
@@ -192,30 +193,33 @@ class Embedding:
         assert(nn_method in self.availables_nn_methods), f"Error: The value of the parameter 'nn method' can only be {self.availables_nn_methods}!"
-        neighbords_list = None
-        if word in self:
-            if nn_method == 'ann':
-                if self.ann is None:
-                    self.__init_ann_method(
-                        words=self.ds['word'].to_list(),
-                        vectors=self.ds['embedding'].to_list(),
-                        coord=self.ds['pca'].to_list()
-                    )
-                neighbords_list = self.ann.get(word, n_neighbors)
-            elif nn_method == 'sklearn':
-                if self.neigh is None:
-                    self.__init_sklearn_method(
-                        max_neighbors=self.max_neighbors,
-                        vectors=self.ds['embedding'].to_list()
-                    )
-                word_emb = self.getEmbedding(word).reshape(1,-1)
-                _, nn_ids = self.neigh.kneighbors(word_emb, n_neighbors + 1)
-                neighbords_list = [self.ds['word'].to_list()[idx] for idx in nn_ids[0]][1:]
-        return neighbords_list
     def cosineSimilarities(
         self,

                 n_components=2
             )
         model = KeyedVectors.load_word2vec_format(
             fname=path,
             binary=path.endswith('.bin'),
         if word_id != None:
             value = self.ds[feature].to_list()[word_id]
+        else:
+            print(f"The word '{word}' does not exist")
         return value
         assert(nn_method in self.availables_nn_methods), f"Error: The value of the parameter 'nn method' can only be {self.availables_nn_methods}!"
+        neighbors_list = []
+        if word not in self:
+            print(f"The word '{word}' does not exist")
+            return neighbors_list
+        if nn_method == 'ann':
+            if self.ann is None:
+                self.__init_ann_method(
+                    words=self.ds['word'].to_list(),
+                    vectors=self.ds['embedding'].to_list(),
+                    coord=self.ds['pca'].to_list()
+                )
+            neighbors_list = self.ann.get(word, n_neighbors)
+        elif nn_method == 'sklearn':
+            if self.neigh is None:
+                self.__init_sklearn_method(
+                    max_neighbors=self.max_neighbors,
+                    vectors=self.ds['embedding'].to_list()
+                )
+            word_emb = self.getEmbedding(word).reshape(1,-1)
+            _, nn_ids = self.neigh.kneighbors(word_emb, n_neighbors + 1)
+            neighbors_list = [self.ds['word'].to_list()[idx] for idx in nn_ids[0]][1:]
+        return neighbors_list
     def cosineSimilarities(
         self,

modules/module_BiasExplorer.py CHANGED Viewed

@@ -1,3 +1,5 @@
 import copy
 import numpy as np
 import pandas as pd

+# ToDo: Pendiente eliminar clases/métodos que no son utilizados. Luego, unificar sintaxix e incluir typing.
 import copy
 import numpy as np
 import pandas as pd

modules/module_WordExplorer.py CHANGED Viewed

@@ -47,17 +47,6 @@ class WordExplorer:
         return out_msj
-    # ToDo: Este método no se usa. Creo que es el implementado en la clase connections base ¿Borrar?
-    def parse_words(
-        self,
-        string: str
-    ) -> List[str]:
-        words = string.strip()
-        if words:
-            words = [word.strip() for word in words.split(',') if word != ""]
-        return words
     def check_oov(
         self,
         wordlists: List[str]
@@ -233,11 +222,11 @@ class WordExplorer:
         plt.show()
         return fig
-    # ToDo: No encuentro donde se usa este método. ¿Borrar?
     def doesnt_match(
         self,
-        wordlist
-    ):
         err = self.check_oov([wordlist])
         if err:

         return out_msj
     def check_oov(
         self,
         wordlists: List[str]
         plt.show()
         return fig
+    # ToDo: No hay usos de este método. ¿Borrar?
     def doesnt_match(
         self,
+        wordlist: List[str]
+    ) -> str:
         err = self.check_oov([wordlist])
         if err:

modules/module_ann.py CHANGED Viewed

@@ -2,7 +2,7 @@ import time
 from tqdm import tqdm
 from annoy import AnnoyIndex
 from memory_profiler import profile
-from typing import List, Any
 class TicToc:
     def __init__(
@@ -29,8 +29,8 @@ class Ann:
     def __init__(
         self,
         words: List[str],
-        vectors: List[float],
-        coord: List[float],
     ) -> None:
         self.words = words
@@ -43,11 +43,11 @@ class Ann:
     def init(self,
         n_trees: int=10,
         metric: str='angular',
-        n_jobs: int=-1
     ) -> None:
-        # metrics options = "angular", "euclidean", "manhattan", "hamming", or "dot"
-        # n_jobs=-1 Run over all CPU availables
         print("\tInit tree...")
         self.tt.start()
@@ -80,13 +80,13 @@ class Ann:
     ) -> List[str]:
         word_id = self.__getWordId(word)
-        neighbords_list = None
         if word_id != None:
             neighbords_id = self.tree.get_nns_by_item(word_id, n_neighbors + 1)
-            neighbords_list = [self.words[idx] for idx in neighbords_id][1:]
         else:
             print(f"The word '{word}' does not exist")
-        return neighbords_list

 from tqdm import tqdm
 from annoy import AnnoyIndex
 from memory_profiler import profile
+from typing import List
 class TicToc:
     def __init__(
     def __init__(
         self,
         words: List[str],
+        vectors: List,
+        coord: List,
     ) -> None:
         self.words = words
     def init(self,
         n_trees: int=10,
         metric: str='angular',
+        n_jobs: int=-1  # n_jobs=-1 Run over all CPU availables
     ) -> None:
+        availables_metrics = ['angular','euclidean','manhattan','hamming','dot']
+        assert(metric in self.availables_metrics), f"Error: The value of the parameter 'metric' can only be {availables_metrics}!"
         print("\tInit tree...")
         self.tt.start()
     ) -> List[str]:
         word_id = self.__getWordId(word)
+        neighbors_list = None
         if word_id != None:
             neighbords_id = self.tree.get_nns_by_item(word_id, n_neighbors + 1)
+            neighbors_list = [self.words[idx] for idx in neighbords_id][1:]
         else:
             print(f"The word '{word}' does not exist")
+        return neighbors_list

modules/module_logsManager.py CHANGED Viewed

@@ -1,26 +1,36 @@
-import csv, os, pytz
 from gradio import utils
-from datetime import datetime
-from dotenv import load_dotenv
-from distutils.log import debug
 from typing import Any, List, Optional
-from gradio.components import IOComponent
-from gradio.flagging import FlaggingCallback, _get_dataset_features_info
 # --- Load environments vars ---
 load_dotenv()
 # --- Classes declaration ---
 class DateLogs:
-    def __init__(self, zone="America/Argentina/Cordoba"):
         self.time_zone = pytz.timezone(zone)
-    def full(self):
         now = datetime.now(self.time_zone)
         return now.strftime("%H:%M:%S %d-%m-%Y")
-    def day(self):
         now = datetime.now(self.time_zone)
         return now.strftime("%d-%m-%Y")
@@ -45,7 +55,7 @@ class HuggingFaceDatasetSaver(FlaggingCallback):
         organization: Optional[str]=os.getenv('ORG_NAME'),
         private: bool=True,
         available_logs: bool=False
-    ):
         """
         Parameters:
             hf_token: The HuggingFace token to use to create (and write the flagged sample to) the HuggingFace dataset.
@@ -54,7 +64,7 @@ class HuggingFaceDatasetSaver(FlaggingCallback):
             private: Whether the dataset should be private (defaults to False).
         """
         assert(dataset_name is not None), "Error: Parameter 'dataset_name' cannot be empty!."
         self.hf_token = hf_token
         self.dataset_name = dataset_name
         self.organization_name = organization
@@ -67,10 +77,10 @@ class HuggingFaceDatasetSaver(FlaggingCallback):
     def setup(
-            self,
-            components: List[IOComponent],
-            flagging_dir: str
-        ):
         """
         Params:
         flagging_dir (str): local directory where the dataset is cloned,
@@ -114,9 +124,9 @@ class HuggingFaceDatasetSaver(FlaggingCallback):
     def flag(
         self,
         flag_data: List[Any],
-        flag_option: Optional[str] = None,
-        flag_index: Optional[int] = None,
-        username: Optional[str] = None,
     ) -> int:
         if self.available_logs:

+from gradio.flagging import FlaggingCallback, _get_dataset_features_info
+from gradio.components import IOComponent
 from gradio import utils
 from typing import Any, List, Optional
+from dotenv import load_dotenv
+from datetime import datetime
+import csv, os, pytz
 # --- Load environments vars ---
 load_dotenv()
 # --- Classes declaration ---
 class DateLogs:
+    def __init__(
+        self,
+        zone: str="America/Argentina/Cordoba"
+    ) -> None:
         self.time_zone = pytz.timezone(zone)
+    def full(
+        self
+    ) -> str:
         now = datetime.now(self.time_zone)
         return now.strftime("%H:%M:%S %d-%m-%Y")
+    def day(
+        self
+    ) -> str:
         now = datetime.now(self.time_zone)
         return now.strftime("%d-%m-%Y")
         organization: Optional[str]=os.getenv('ORG_NAME'),
         private: bool=True,
         available_logs: bool=False
+    ) -> None:
         """
         Parameters:
             hf_token: The HuggingFace token to use to create (and write the flagged sample to) the HuggingFace dataset.
             private: Whether the dataset should be private (defaults to False).
         """
         assert(dataset_name is not None), "Error: Parameter 'dataset_name' cannot be empty!."
         self.hf_token = hf_token
         self.dataset_name = dataset_name
         self.organization_name = organization
     def setup(
+        self,
+        components: List[IOComponent],
+        flagging_dir: str
+    ) -> None:
         """
         Params:
         flagging_dir (str): local directory where the dataset is cloned,
     def flag(
         self,
         flag_data: List[Any],
+        flag_option: Optional[str]=None,
+        flag_index: Optional[int]=None,
+        username: Optional[str]=None,
     ) -> int:
         if self.available_logs: