Spaces:
Configuration error
Configuration error
Improvement in the display of the graph axes labels. Minor fixes.
Browse files- modules/module_BiasExplorer.py +45 -23
- modules/utils.py +87 -1
modules/module_BiasExplorer.py
CHANGED
@@ -5,14 +5,15 @@ import seaborn as sns
|
|
5 |
import matplotlib.pyplot as plt
|
6 |
from sklearn.decomposition import PCA
|
7 |
from typing import List, Dict, Tuple, Optional, Any
|
8 |
-
from modules.utils import normalize, cosine_similarity, project_params, take_two_sides_extreme_sorted
|
9 |
|
10 |
__all__ = ['WordBiasExplorer', 'WEBiasExplorer2Spaces', 'WEBiasExplorer4Spaces']
|
11 |
|
12 |
class WordBiasExplorer:
|
13 |
def __init__(
|
14 |
self,
|
15 |
-
embedding
|
|
|
16 |
) -> None:
|
17 |
|
18 |
self.embedding = embedding
|
@@ -20,6 +21,7 @@ class WordBiasExplorer:
|
|
20 |
self.positive_end = None
|
21 |
self.negative_end = None
|
22 |
self.DIRECTION_METHODS = ['single', 'sum', 'pca']
|
|
|
23 |
|
24 |
def __copy__(
|
25 |
self
|
@@ -245,12 +247,12 @@ class WordBiasExplorer:
|
|
245 |
out_msj = ""
|
246 |
|
247 |
if not word:
|
248 |
-
out_msj =
|
249 |
else:
|
250 |
if word not in self.embedding:
|
251 |
-
out_msj =
|
252 |
|
253 |
-
return out_msj
|
254 |
|
255 |
def check_oov(
|
256 |
self,
|
@@ -267,10 +269,11 @@ class WordBiasExplorer:
|
|
267 |
class WEBiasExplorer2Spaces(WordBiasExplorer):
|
268 |
def __init__(
|
269 |
self,
|
270 |
-
embedding
|
|
|
271 |
) -> None:
|
272 |
|
273 |
-
super().__init__(embedding)
|
274 |
|
275 |
def calculate_bias(
|
276 |
self,
|
@@ -283,7 +286,7 @@ class WEBiasExplorer2Spaces(WordBiasExplorer):
|
|
283 |
|
284 |
for wordlist in wordlists:
|
285 |
if not wordlist:
|
286 |
-
raise Exception('
|
287 |
|
288 |
err = self.check_oov(wordlists)
|
289 |
if err:
|
@@ -368,9 +371,14 @@ class WEBiasExplorer2Spaces(WordBiasExplorer):
|
|
368 |
plt.xticks(np.arange(-most_extream_projection,
|
369 |
most_extream_projection + axis_projection_step,
|
370 |
axis_projection_step))
|
371 |
-
|
372 |
-
|
373 |
-
|
|
|
|
|
|
|
|
|
|
|
374 |
|
375 |
plt.xlabel(xlabel)
|
376 |
plt.ylabel('Words')
|
@@ -381,10 +389,11 @@ class WEBiasExplorer2Spaces(WordBiasExplorer):
|
|
381 |
class WEBiasExplorer4Spaces(WordBiasExplorer):
|
382 |
def __init__(
|
383 |
self,
|
384 |
-
embedding
|
|
|
385 |
) -> None:
|
386 |
|
387 |
-
super().__init__(embedding)
|
388 |
|
389 |
def calculate_bias(
|
390 |
self,
|
@@ -405,7 +414,7 @@ class WEBiasExplorer4Spaces(WordBiasExplorer):
|
|
405 |
|
406 |
for wordlist in wordlists:
|
407 |
if not wordlist:
|
408 |
-
raise Exception('
|
409 |
|
410 |
err = self.check_oov(wordlists)
|
411 |
if err:
|
@@ -495,9 +504,15 @@ class WEBiasExplorer4Spaces(WordBiasExplorer):
|
|
495 |
projections_df['projection']
|
496 |
.abs()
|
497 |
.max(),
|
498 |
-
decimals=1
|
499 |
-
|
500 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
501 |
|
502 |
plt.xticks(np.arange(-most_extream_projection,
|
503 |
most_extream_projection + axis_projection_step,
|
@@ -505,13 +520,20 @@ class WEBiasExplorer4Spaces(WordBiasExplorer):
|
|
505 |
for _, row in (projections_df.iterrows()):
|
506 |
ax.annotate(
|
507 |
row['word'], (row['projection_x'], row['projection_y']))
|
508 |
-
x_label = '← {} {} {} →'.format(name_left,
|
509 |
-
' ' * 20,
|
510 |
-
name_right)
|
511 |
|
512 |
-
|
513 |
-
|
514 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
515 |
|
516 |
plt.xlabel(x_label)
|
517 |
ax.xaxis.set_label_position('bottom')
|
|
|
5 |
import matplotlib.pyplot as plt
|
6 |
from sklearn.decomposition import PCA
|
7 |
from typing import List, Dict, Tuple, Optional, Any
|
8 |
+
from modules.utils import normalize, cosine_similarity, project_params, take_two_sides_extreme_sorted, axes_labels_format
|
9 |
|
10 |
__all__ = ['WordBiasExplorer', 'WEBiasExplorer2Spaces', 'WEBiasExplorer4Spaces']
|
11 |
|
12 |
class WordBiasExplorer:
|
13 |
def __init__(
|
14 |
self,
|
15 |
+
embedding, # Embedding class instance
|
16 |
+
errorManager # ErrorManager class instance
|
17 |
) -> None:
|
18 |
|
19 |
self.embedding = embedding
|
|
|
21 |
self.positive_end = None
|
22 |
self.negative_end = None
|
23 |
self.DIRECTION_METHODS = ['single', 'sum', 'pca']
|
24 |
+
self.errorManager = errorManager
|
25 |
|
26 |
def __copy__(
|
27 |
self
|
|
|
247 |
out_msj = ""
|
248 |
|
249 |
if not word:
|
250 |
+
out_msj = ['EMBEDDING_NO_WORD_PROVIDED']
|
251 |
else:
|
252 |
if word not in self.embedding:
|
253 |
+
out_msj = ['EMBEDDING_WORD_OOV', word]
|
254 |
|
255 |
+
return self.errorManager.process(out_msj)
|
256 |
|
257 |
def check_oov(
|
258 |
self,
|
|
|
269 |
class WEBiasExplorer2Spaces(WordBiasExplorer):
|
270 |
def __init__(
|
271 |
self,
|
272 |
+
embedding, # Embedding class instance
|
273 |
+
errorManager # ErrorManager class instance
|
274 |
) -> None:
|
275 |
|
276 |
+
super().__init__(embedding, errorManager)
|
277 |
|
278 |
def calculate_bias(
|
279 |
self,
|
|
|
286 |
|
287 |
for wordlist in wordlists:
|
288 |
if not wordlist:
|
289 |
+
raise Exception('At least one word should be in the to diagnose list, bias 1 list and bias 2 list')
|
290 |
|
291 |
err = self.check_oov(wordlists)
|
292 |
if err:
|
|
|
371 |
plt.xticks(np.arange(-most_extream_projection,
|
372 |
most_extream_projection + axis_projection_step,
|
373 |
axis_projection_step))
|
374 |
+
|
375 |
+
|
376 |
+
xlabel = axes_labels_format(
|
377 |
+
left=self.negative_end,
|
378 |
+
right=self.positive_end,
|
379 |
+
sep=' ' * 20,
|
380 |
+
word_wrap=3
|
381 |
+
)
|
382 |
|
383 |
plt.xlabel(xlabel)
|
384 |
plt.ylabel('Words')
|
|
|
389 |
class WEBiasExplorer4Spaces(WordBiasExplorer):
|
390 |
def __init__(
|
391 |
self,
|
392 |
+
embedding, # Embedding Class instance
|
393 |
+
errorManager # ErrorManager class instance
|
394 |
) -> None:
|
395 |
|
396 |
+
super().__init__(embedding, errorManager)
|
397 |
|
398 |
def calculate_bias(
|
399 |
self,
|
|
|
414 |
|
415 |
for wordlist in wordlists:
|
416 |
if not wordlist:
|
417 |
+
raise Exception('To plot with 4 spaces, you must enter at least one word in all lists')
|
418 |
|
419 |
err = self.check_oov(wordlists)
|
420 |
if err:
|
|
|
504 |
projections_df['projection']
|
505 |
.abs()
|
506 |
.max(),
|
507 |
+
decimals=1
|
508 |
+
)
|
509 |
+
|
510 |
+
sns.scatterplot(x='projection_x',
|
511 |
+
y='projection_y',
|
512 |
+
data=projections_df,
|
513 |
+
# color=list(projections_df['color'].to_list()), # No se distinguen los colores
|
514 |
+
color='blue'
|
515 |
+
)
|
516 |
|
517 |
plt.xticks(np.arange(-most_extream_projection,
|
518 |
most_extream_projection + axis_projection_step,
|
|
|
520 |
for _, row in (projections_df.iterrows()):
|
521 |
ax.annotate(
|
522 |
row['word'], (row['projection_x'], row['projection_y']))
|
|
|
|
|
|
|
523 |
|
524 |
+
|
525 |
+
x_label = axes_labels_format(
|
526 |
+
left=name_left,
|
527 |
+
right=name_right,
|
528 |
+
sep=' ' * 20,
|
529 |
+
word_wrap=3
|
530 |
+
)
|
531 |
+
y_label = axes_labels_format(
|
532 |
+
left=name_top,
|
533 |
+
right=name_bottom,
|
534 |
+
sep=' ' * 20,
|
535 |
+
word_wrap=3
|
536 |
+
)
|
537 |
|
538 |
plt.xlabel(x_label)
|
539 |
ax.xaxis.set_label_position('bottom')
|
modules/utils.py
CHANGED
@@ -1,5 +1,32 @@
|
|
1 |
import numpy as np
|
2 |
import pandas as pd
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
|
4 |
def take_two_sides_extreme_sorted(
|
5 |
df: pd.DataFrame,
|
@@ -55,4 +82,63 @@ def cosine_similarity(
|
|
55 |
v_norm = np.linalg.norm(v)
|
56 |
u_norm = np.linalg.norm(u)
|
57 |
similarity = v @ u / (v_norm * u_norm)
|
58 |
-
return similarity
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import numpy as np
|
2 |
import pandas as pd
|
3 |
+
import pytz
|
4 |
+
from datetime import datetime
|
5 |
+
from typing import List
|
6 |
+
|
7 |
+
|
8 |
+
|
9 |
+
class DateLogs:
|
10 |
+
def __init__(
|
11 |
+
self,
|
12 |
+
zone: str = "America/Argentina/Cordoba"
|
13 |
+
) -> None:
|
14 |
+
|
15 |
+
self.time_zone = pytz.timezone(zone)
|
16 |
+
|
17 |
+
def full(
|
18 |
+
self
|
19 |
+
) -> str:
|
20 |
+
|
21 |
+
now = datetime.now(self.time_zone)
|
22 |
+
return now.strftime("%H:%M:%S %d-%m-%Y")
|
23 |
+
|
24 |
+
def day(
|
25 |
+
self
|
26 |
+
) -> str:
|
27 |
+
|
28 |
+
now = datetime.now(self.time_zone)
|
29 |
+
return now.strftime("%d-%m-%Y")
|
30 |
|
31 |
def take_two_sides_extreme_sorted(
|
32 |
df: pd.DataFrame,
|
|
|
82 |
v_norm = np.linalg.norm(v)
|
83 |
u_norm = np.linalg.norm(u)
|
84 |
similarity = v @ u / (v_norm * u_norm)
|
85 |
+
return similarity
|
86 |
+
|
87 |
+
|
88 |
+
def axes_labels_format(
|
89 |
+
left: str,
|
90 |
+
right: str,
|
91 |
+
sep: str,
|
92 |
+
word_wrap: int = 4
|
93 |
+
) -> str:
|
94 |
+
|
95 |
+
def sparse(
|
96 |
+
word: str,
|
97 |
+
max_len: int
|
98 |
+
) -> str:
|
99 |
+
|
100 |
+
diff = max_len-len(word)
|
101 |
+
rest = diff if diff > 0 else 0
|
102 |
+
return word+" "*rest
|
103 |
+
|
104 |
+
def gen_block(
|
105 |
+
list_: List[str],
|
106 |
+
n_rows:int,
|
107 |
+
n_cols:int
|
108 |
+
) -> List[str]:
|
109 |
+
|
110 |
+
block = []
|
111 |
+
block_row = []
|
112 |
+
for r in range(n_rows):
|
113 |
+
for c in range(n_cols):
|
114 |
+
i = r * n_cols + c
|
115 |
+
w = list_[i] if i <= len(list_) - 1 else ""
|
116 |
+
block_row.append(w)
|
117 |
+
if (i+1) % n_cols == 0:
|
118 |
+
block.append(block_row)
|
119 |
+
block_row = []
|
120 |
+
return block
|
121 |
+
|
122 |
+
# Transform 'string' to list of string
|
123 |
+
l_list = [word.strip() for word in left.split(",") if word.strip() != ""]
|
124 |
+
r_list = [word.strip() for word in right.split(",") if word.strip() != ""]
|
125 |
+
|
126 |
+
# Get longest word, and longest_list
|
127 |
+
longest_list = max(len(l_list), len(r_list))
|
128 |
+
longest_word = len(max( max(l_list, key=len), max(r_list, key=len)))
|
129 |
+
|
130 |
+
# Creation of word blocks for each list
|
131 |
+
n_rows = (longest_list // word_wrap) if longest_list % word_wrap == 0 else (longest_list // word_wrap) + 1
|
132 |
+
n_cols = word_wrap
|
133 |
+
|
134 |
+
l_block = gen_block(l_list, n_rows, n_cols)
|
135 |
+
r_block = gen_block(r_list, n_rows, n_cols)
|
136 |
+
|
137 |
+
# Transform list of list to sparse string
|
138 |
+
labels = ""
|
139 |
+
for i,(l,r) in enumerate(zip(l_block, r_block)):
|
140 |
+
line = ' '.join([sparse(w, longest_word) for w in l]) + sep + \
|
141 |
+
' '.join([sparse(w, longest_word) for w in r])
|
142 |
+
labels += f"← {line} →\n" if i==0 else f" {line} \n"
|
143 |
+
|
144 |
+
return labels
|