Spaces:

vialibre
/

edia_datos_es

Runtime error

App Files Files Community

nanom commited on Dec 6, 2022

Commit

2d0d0c7

•

1 Parent(s): 4b0aadc

Code correction and typing added

Browse files

Files changed (8) hide show

app.py +9 -3
interfaces/interface_datos.py +103 -38
modules/module_connection.py +46 -13
modules/module_customSubsetsLabel.py +35 -6
modules/module_logsManager.py +26 -17
modules/module_segmentedWordCloud.py +18 -4
modules/module_vocabulary.py +50 -10
modules/module_word2Context.py +71 -85

app.py CHANGED Viewed

@@ -1,26 +1,32 @@
 # --- Imports modules ---
 from modules.module_vocabulary import Vocabulary
 # --- Imports interfaces ---
 from interfaces.interface_datos import interface as interface_datos
 # --- Tool config ---
-AVAILABLE_LOGS      = True                          # [True | False]
-LANGUAGE            = "spanish"                     # [spanish]
-VOCABULARY_SUBSET   = "full"                        # [full]
 # ToDo: Change the owner of the context dataset from nanom to vialibre
 CONTEXTS_DATASET    = "nanom/splittedspanish3bwc"
 # --- Init classes ---
 vocabulary = Vocabulary(
     subset_name=VOCABULARY_SUBSET
 )
 # --- Main App ---
 iface = interface_datos(
     vocabulary=vocabulary,
     contexts=CONTEXTS_DATASET,
     available_logs=AVAILABLE_LOGS,
     lang=LANGUAGE
 )

 # --- Imports modules ---
 from modules.module_vocabulary import Vocabulary
 # --- Imports interfaces ---
 from interfaces.interface_datos import interface as interface_datos
 # --- Tool config ---
 # ToDo: Change the owner of the context dataset from nanom to vialibre
 CONTEXTS_DATASET    = "nanom/splittedspanish3bwc"
+AVAILABLE_WORDCLOUD = False                         # [True | False]
+AVAILABLE_LOGS      = True                         # [True | False]
+LANGUAGE            = "spanish"                     # [spanish]
+VOCABULARY_SUBSET   = "full"                        # [full]
 # --- Init classes ---
 vocabulary = Vocabulary(
     subset_name=VOCABULARY_SUBSET
 )
 # --- Main App ---
 iface = interface_datos(
     vocabulary=vocabulary,
     contexts=CONTEXTS_DATASET,
     available_logs=AVAILABLE_LOGS,
+    available_wordcloud=AVAILABLE_WORDCLOUD,
     lang=LANGUAGE
 )

interfaces/interface_datos.py CHANGED Viewed

@@ -4,7 +4,14 @@ from tool_info import TOOL_INFO
 import gradio as gr
 import pandas as pd
-def interface(vocabulary, contexts, available_logs, lang="spanish"):
     # --- Init logs ---
     log_callback = HuggingFaceDatasetSaver(
@@ -12,58 +19,112 @@ def interface(vocabulary, contexts, available_logs, lang="spanish"):
     )
     # --- Init Class ---
-    connector = Word2ContextExplorerConnector(vocabulary=vocabulary, context=contexts)
-    labels = pd.read_json(f"language/{lang}.json")["DataExplorer_interface"]
     # --- Interface ---
-    iface = gr.Blocks(css=".container { max-width: 90%; margin: auto;}")
     with iface:
         with gr.Row():
             with gr.Column():
                 with gr.Group():
-                    gr.Markdown(labels["step1"])
-                    with gr.Row(): input_word = gr.Textbox(label=labels["inputWord"]["title"],
-                                                            show_label=False,
-                                                            placeholder=labels["inputWord"]["placeholder"])
-                    with gr.Row(): btn_get_w_info = gr.Button(labels["wordInfoButton"])
                 with gr.Group():
-                    gr.Markdown(labels["step2"])
-                    n_context = gr.Slider(label="",
-                                        step=1, minimum=1, maximum=30, value=5,
-                                        visible=True, interactive=True)
                 with gr.Group():
-                    gr.Markdown(labels["step3"])
-                    subsets_choice = gr.CheckboxGroup(label="",
-                                        interactive=True,
-                                        visible=True)
-                    with gr.Row(): btn_get_contexts = gr.Button(labels["wordContextButton"], visible=True)
-                with gr.Row(): out_msj = gr.Markdown(label="", visible=True)
             with gr.Column():
                 with gr.Group():
-                    gr.Markdown(labels["wordDistributionTitle"])
-                    dist_plot = gr.Plot(label="", show_label=False)
-                    # Set visibility to "true" if you want to see cloud of related words by frequency
-                    wc_plot = gr.Plot(label="", show_label=False, visible=False)
                 with gr.Group():
-                    gr.Markdown(labels["frequencyPerSetTitle"])
-                    subsets_freq = gr.HTML(label="")
         with gr.Row():
             with gr.Group():
-                with gr.Row(): gr.Markdown(labels["contextList"])
-                with gr.Row(): out_context = gr.Dataframe(label="",
-                                                interactive=False,
-                                                value=pd.DataFrame([], columns=['']),
-                                                wrap=True,
-                                                datatype=['str','markdown','str','markdown'])
         with gr.Group():
-            gr.Markdown(TOOL_INFO)
         btn_get_w_info.click(
             fn=connector.get_word_info,
@@ -73,10 +134,11 @@ def interface(vocabulary, contexts, available_logs, lang="spanish"):
                     subsets_freq,
                     dist_plot,
                     wc_plot,
-                    subsets_choice]
         )
-        btn_get_contexts.click(
             fn=connector.get_word_context,
             inputs=[input_word, n_context, subsets_choice],
             outputs=[out_msj, out_context]
@@ -84,13 +146,16 @@ def interface(vocabulary, contexts, available_logs, lang="spanish"):
         # --- Logs ---
         save_field = [input_word, subsets_choice]
-        log_callback.setup(components=save_field, flagging_dir="edia_datos_es")
         btn_get_contexts.click(
             fn=lambda *args: log_callback.flag(
-                    flag_data=args,
-                    flag_option="datos",
-                    username="vialibre"
             ),
             inputs=save_field,
             outputs=None,

 import gradio as gr
 import pandas as pd
+def interface(
+    vocabulary, # Vocabulary class instance
+    contexts: str,
+    available_logs: bool,
+    available_wordcloud: bool,
+    lang: str="spanish"
+) -> gr.Blocks:
     # --- Init logs ---
     log_callback = HuggingFaceDatasetSaver(
     )
     # --- Init Class ---
+    connector = Word2ContextExplorerConnector(
+        vocabulary=vocabulary,
+        context=contexts
+    )
+    # --- Load language ---
+    labels = pd.read_json(
+        f"language/{lang}.json"
+    )["DataExplorer_interface"]
     # --- Interface ---
+    iface = gr.Blocks(
+        css=".container { max-width: 90%; margin: auto;}"
+    )
     with iface:
         with gr.Row():
             with gr.Column():
                 with gr.Group():
+                    gr.Markdown(
+                        value=labels["step1"]
+                    )
+                    with gr.Row():
+                        input_word = gr.Textbox(
+                            label=labels["inputWord"]["title"],
+                            show_label=False,
+                            placeholder=labels["inputWord"]["placeholder"]
+                        )
+                    with gr.Row():
+                        btn_get_w_info = gr.Button(
+                            value=labels["wordInfoButton"]
+                        )
                 with gr.Group():
+                    gr.Markdown(
+                        value=labels["step2"]
+                    )
+                    n_context = gr.Slider(
+                        label="",
+                        step=1, minimum=1, maximum=30, value=5,
+                        visible=True,
+                        interactive=True
+                    )
                 with gr.Group():
+                    gr.Markdown(
+                        value=labels["step3"]
+                    )
+                    subsets_choice = gr.CheckboxGroup(
+                        label="",
+                        interactive=True,
+                        visible=True
+                    )
+                    with gr.Row():
+                        btn_get_contexts = gr.Button(
+                            value=labels["wordContextButton"],
+                            visible=True
+                        )
+                with gr.Row():
+                    out_msj = gr.Markdown(
+                        label="",
+                        visible=True
+                    )
             with gr.Column():
                 with gr.Group():
+                    gr.Markdown(
+                        value=labels["wordDistributionTitle"]
+                    )
+                    dist_plot = gr.Plot(
+                        label="",
+                        show_label=False
+                    )
+                    wc_plot = gr.Plot(
+                        label="",
+                        show_label=False,
+                        visible=available_wordcloud
+                    )
                 with gr.Group():
+                    gr.Markdown(
+                        value=labels["frequencyPerSetTitle"]
+                    )
+                    subsets_freq = gr.HTML(
+                        label=""
+                    )
         with gr.Row():
             with gr.Group():
+                with gr.Row():
+                    gr.Markdown(
+                        value=labels["contextList"]
+                    )
+                with gr.Row():
+                    out_context = gr.Dataframe(
+                        label="",
+                        interactive=False,
+                        value=pd.DataFrame([], columns=['']),
+                        wrap=True,
+                        datatype=['str','markdown','str','markdown']
+                    )
         with gr.Group():
+            gr.Markdown(
+                value=TOOL_INFO
+            )
         btn_get_w_info.click(
             fn=connector.get_word_info,
                     subsets_freq,
                     dist_plot,
                     wc_plot,
+                    subsets_choice
+            ]
         )
+        btn_get_contexts.click(
             fn=connector.get_word_context,
             inputs=[input_word, n_context, subsets_choice],
             outputs=[out_msj, out_context]
         # --- Logs ---
         save_field = [input_word, subsets_choice]
+        log_callback.setup(
+            components=save_field,
+            flagging_dir=f"edia_datos_{lang}"
+        )
         btn_get_contexts.click(
             fn=lambda *args: log_callback.flag(
+                flag_data=args,
+                flag_option="datos",
+                username="vialibre"
             ),
             inputs=save_field,
             outputs=None,

modules/module_connection.py CHANGED Viewed

@@ -1,37 +1,64 @@
 import pandas as pd
 import gradio as gr
 from abc import ABC
-from modules.module_word2Context import Word2Context
 class Connector(ABC):
-    def parse_word(self, word : str):
         return word.lower().strip()
-    def parse_words(self, array_in_string : str):
         words = array_in_string.strip()
         if not words:
             return []
-        words = [self.parse_word(word) for word in words.split(',') if word.strip() != '']
         return words
-    def process_error(self, err: str):
-        if err is None:
-            return
-        return "<center><h3>" + err + "</h3></center>"
 class Word2ContextExplorerConnector(Connector):
-    def __init__(self, **kwargs):
         vocabulary = kwargs.get('vocabulary', None)
         context = kwargs.get('context', None)
         if vocabulary is None and context is None:
             raise KeyError
-        self.word2context_explorer = Word2Context(context, vocabulary)
-    def get_word_info(self, word):
         err = ""
-        contexts = pd.DataFrame([],columns=[''])
         subsets_info = ""
         distribution_plot = None
         word_cloud_plot = None
@@ -53,7 +80,13 @@ class Word2ContextExplorerConnector(Connector):
         return self.process_error(err), contexts, subsets_info, distribution_plot, word_cloud_plot, subsets_choice
-    def get_word_context(self, word, n_context, subset_choice):
         word = self.parse_word(word)
         n_context = int(n_context)
         err = ""

+from modules.module_word2Context import Word2Context
+from typing import List, Tuple
 import pandas as pd
 import gradio as gr
 from abc import ABC
 class Connector(ABC):
+    def parse_word(
+        self,
+        word: str
+    ) -> str:
         return word.lower().strip()
+    def parse_words(
+        self,
+        array_in_string: str
+    ) -> List[str]:
         words = array_in_string.strip()
         if not words:
             return []
+        words = [
+            self.parse_word(word)
+            for word in words.split(',') if word.strip() != ''
+        ]
         return words
+    def process_error(
+        self,
+        err: str
+    ) -> str:
+        if err:
+            err = "<center><h3>" + err + "</h3></center>"
+        return err
 class Word2ContextExplorerConnector(Connector):
+    def __init__(
+        self,
+        **kwargs
+    ) -> None:
         vocabulary = kwargs.get('vocabulary', None)
         context = kwargs.get('context', None)
         if vocabulary is None and context is None:
             raise KeyError
+        self.word2context_explorer = Word2Context(
+            context,    # Context dataset HF name | path
+            vocabulary  # Vocabulary class instance
+        )
+    def get_word_info(
+        self,
+        word: str
+    ) -> Tuple:
         err = ""
+        contexts = pd.DataFrame([], columns=[''])
         subsets_info = ""
         distribution_plot = None
         word_cloud_plot = None
         return self.process_error(err), contexts, subsets_info, distribution_plot, word_cloud_plot, subsets_choice
+    def get_word_context(
+        self,
+        word: str,
+        n_context: int,
+        subset_choice: List[str]
+    ) -> Tuple:
         word = self.parse_word(word)
         n_context = int(n_context)
         err = ""

modules/module_customSubsetsLabel.py CHANGED Viewed

@@ -1,5 +1,10 @@
 class CustomSubsetsLabel:
-    def __init__(self):
         self.html_head = """
         <html>
             <head>
@@ -50,7 +55,14 @@ class CustomSubsetsLabel:
             'UN': "http://opus.nlpl.eu/UN.php",
         }
-    def __progressbar(self, percentage, subset, freq, size=15):
         html = f"""
         <div id="myturn">
             <progress value="{int(percentage)}" max="100"></progress>
@@ -66,7 +78,13 @@ class CustomSubsetsLabel:
         """
         return html
-    def __render(self, subsets, freqs, percentages):
         html = ""
         for subset, freq, perc in zip(subsets, freqs, percentages):
             html += self.__progressbar(
@@ -77,13 +95,24 @@ class CustomSubsetsLabel:
         return self.html_head + html + self.html_footer
-    def compute(self, subsets_dic):
         subsets_dic_info = {
             k.split()[0]:{'freq':int(k.split()[1][1:-1]),'perc':round(v*100,2)}
             for k,v in subsets_dic.items()
         }
         subsets = list(subsets_dic_info.keys())
-        freqs = [d['freq'] for d in subsets_dic_info.values()]
-        percentages = [d['perc'] for d in subsets_dic_info.values()]
         return self.__render(subsets, freqs, percentages)

+from typing import List, Dict
 class CustomSubsetsLabel:
+    def __init__(
+        self
+    ) -> None:
         self.html_head = """
         <html>
             <head>
             'UN': "http://opus.nlpl.eu/UN.php",
         }
+    def __progressbar(
+        self,
+        percentage: float,
+        subset: str,
+        freq: int,
+        size: int=15
+    ) -> str:
         html = f"""
         <div id="myturn">
             <progress value="{int(percentage)}" max="100"></progress>
         """
         return html
+    def __render(
+        self,
+        subsets: List[str],
+        freqs: List[int],
+        percentages: List[float]
+    ) -> str:
         html = ""
         for subset, freq, perc in zip(subsets, freqs, percentages):
             html += self.__progressbar(
         return self.html_head + html + self.html_footer
+    def compute(
+        self,
+        subsets_dic: Dict[str, int]
+    ) -> str:
         subsets_dic_info = {
             k.split()[0]:{'freq':int(k.split()[1][1:-1]),'perc':round(v*100,2)}
             for k,v in subsets_dic.items()
         }
         subsets = list(subsets_dic_info.keys())
+        freqs = [
+            d['freq']
+            for d in subsets_dic_info.values()
+        ]
+        percentages = [
+            d['perc']
+            for d in subsets_dic_info.values()
+        ]
         return self.__render(subsets, freqs, percentages)

modules/module_logsManager.py CHANGED Viewed

@@ -1,4 +1,3 @@
-from distutils.log import debug
 from gradio.flagging import FlaggingCallback, _get_dataset_features_info
 from gradio.components import IOComponent
 from gradio import utils
@@ -14,14 +13,24 @@ load_dotenv()
 # --- Classes declaration ---
 class DateLogs:
-    def __init__(self, zone="America/Argentina/Cordoba"):
         self.time_zone = pytz.timezone(zone)
-    def full(self):
         now = datetime.now(self.time_zone)
         return now.strftime("%H:%M:%S %d-%m-%Y")
-    def day(self):
         now = datetime.now(self.time_zone)
         return now.strftime("%d-%m-%Y")
@@ -41,12 +50,12 @@ class HuggingFaceDatasetSaver(FlaggingCallback):
     def __init__(
         self,
-        hf_token: str = os.getenv('HF_TOKEN'),
-        dataset_name: str = os.getenv('DS_LOGS_NAME'),
-        organization: Optional[str] = os.getenv('ORG_NAME'),
-        private: bool = True,
-        available_logs: bool = False
-    ):
         """
         Parameters:
             hf_token: The HuggingFace token to use to create (and write the flagged sample to) the HuggingFace dataset.
@@ -66,10 +75,10 @@ class HuggingFaceDatasetSaver(FlaggingCallback):
     def setup(
-            self,
-            components: List[IOComponent],
-            flagging_dir: str
-        ):
         """
         Params:
         flagging_dir (str): local directory where the dataset is cloned,
@@ -113,9 +122,9 @@ class HuggingFaceDatasetSaver(FlaggingCallback):
     def flag(
         self,
         flag_data: List[Any],
-        flag_option: Optional[str] = None,
-        flag_index: Optional[int] = None,
-        username: Optional[str] = None,
     ) -> int:
         if self.available_logs:

 from gradio.flagging import FlaggingCallback, _get_dataset_features_info
 from gradio.components import IOComponent
 from gradio import utils
 # --- Classes declaration ---
 class DateLogs:
+    def __init__(
+        self,
+        zone: str="America/Argentina/Cordoba"
+    ) -> None:
         self.time_zone = pytz.timezone(zone)
+    def full(
+        self
+    ) -> str:
         now = datetime.now(self.time_zone)
         return now.strftime("%H:%M:%S %d-%m-%Y")
+    def day(
+        self
+    ) -> str:
         now = datetime.now(self.time_zone)
         return now.strftime("%d-%m-%Y")
     def __init__(
         self,
+        hf_token: str=os.getenv('HF_TOKEN'),
+        dataset_name: str=os.getenv('DS_LOGS_NAME'),
+        organization: Optional[str]=os.getenv('ORG_NAME'),
+        private: bool=True,
+        available_logs: bool=False
+    ) -> None:
         """
         Parameters:
             hf_token: The HuggingFace token to use to create (and write the flagged sample to) the HuggingFace dataset.
     def setup(
+        self,
+        components: List[IOComponent],
+        flagging_dir: str
+    ) -> None:
         """
         Params:
         flagging_dir (str): local directory where the dataset is cloned,
     def flag(
         self,
         flag_data: List[Any],
+        flag_option: Optional[str]=None,
+        flag_index: Optional[int]=None,
+        username: Optional[str]=None,
     ) -> int:
         if self.available_logs:

modules/module_segmentedWordCloud.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from wordcloud import WordCloud
 import matplotlib.pyplot as plt
 class SimpleGroupedColorFunc(object):
     """Create a color function object which assigns EXACT colors
@@ -16,7 +16,12 @@ class SimpleGroupedColorFunc(object):
          of any value from color_to_words.
     """
-    def __init__(self, color_to_words, default_color):
         self.word_to_color = {
             word: color
             for (color, words) in color_to_words.items()
@@ -30,7 +35,13 @@ class SimpleGroupedColorFunc(object):
 class SegmentedWordCloud:
-    def __init__(self, freq_dic, less_group, greater_group):
         colors = {
             'less': '#529ef3',
             'salient':'#d35400',
@@ -56,7 +67,10 @@ class SegmentedWordCloud:
         self.wc.recolor(color_func=grouped_color_func)
-    def plot(self, figsize):
         fig, ax = plt.subplots(figsize=figsize)
         ax.imshow(self.wc, interpolation="bilinear")
         ax.axis("off")

 from wordcloud import WordCloud
 import matplotlib.pyplot as plt
+from typing import Dict, Tuple, List
 class SimpleGroupedColorFunc(object):
     """Create a color function object which assigns EXACT colors
          of any value from color_to_words.
     """
+    def __init__(
+        self,
+        color_to_words: Dict,
+        default_color: str
+    ) -> Dict:
         self.word_to_color = {
             word: color
             for (color, words) in color_to_words.items()
 class SegmentedWordCloud:
+    def __init__(
+        self,
+        freq_dic: Dict[str, int],
+        less_group: List[str],
+        greater_group: List[str]
+    ) :
         colors = {
             'less': '#529ef3',
             'salient':'#d35400',
         self.wc.recolor(color_func=grouped_color_func)
+    def plot(
+        self,
+        figsize: Tuple[int,int]
+    ):
         fig, ax = plt.subplots(figsize=figsize)
         ax.imshow(self.wc, interpolation="bilinear")
         ax.axis("off")

modules/module_vocabulary.py CHANGED Viewed

@@ -1,9 +1,14 @@
 from memory_profiler import profile
 import pandas as pd
 class Vocabulary:
     @profile
-    def __init__(self, subset_name):
         # Dataset info
         self.subset_name = subset_name
         self.ds_path = f"data/{subset_name}_vocab_v6.zip"
@@ -17,10 +22,17 @@ class Vocabulary:
         # Load vocabulary dataset
         self.__load()
-    def __contains__(self, word):
         return word in self.df_vocab['word'].to_list()
-    def __load(self):
         print(f"Preparing {self.subset_name} vocabulary...")
         # --- Download vocab dataset ---
@@ -41,7 +53,11 @@ class Vocabulary:
             reverse=True
         )
-    def __getValue(self, word, feature):
         word_id, value = None, None
         if word in self:
@@ -52,23 +68,47 @@ class Vocabulary:
         return value
-    def getFreq(self, word):
         return self.__getValue(word, 'freq')
-    def getPercentile(self, word):
         return self.__getValue(word, 'percentile')
-    def getSplits(self, word):
         return self.__getValue(word, 'splits')
-    def getSubsets(self, word):
         return self.__getValue(word, 'in_subset')
-    def distribution(self):
         x_values, y_values = zip(*self.histogram)
         return x_values, y_values
-    def getWordNeighbors(self, word, n_neighbors=20):
         word_id = self.df_vocab['word'].to_list().index(word)
         words = self.df_vocab['word'].to_list()
         freqs = self.df_vocab['freq'].to_list()

 from memory_profiler import profile
 import pandas as pd
+from typing import List, Dict, Tuple
 class Vocabulary:
     @profile
+    def __init__(
+        self,
+        subset_name: str
+    ) -> None:
         # Dataset info
         self.subset_name = subset_name
         self.ds_path = f"data/{subset_name}_vocab_v6.zip"
         # Load vocabulary dataset
         self.__load()
+    def __contains__(
+        self,
+        word: str
+    ) -> bool:
         return word in self.df_vocab['word'].to_list()
+    def __load(
+        self
+    ) -> None:
         print(f"Preparing {self.subset_name} vocabulary...")
         # --- Download vocab dataset ---
             reverse=True
         )
+    def __getValue(
+        self,
+        word: str,
+        feature: str
+    ):
         word_id, value = None, None
         if word in self:
         return value
+    def getFreq(
+        self,
+        word
+    ) -> int:
         return self.__getValue(word, 'freq')
+    def getPercentile(
+        self,
+        word:str
+    ) -> float:
         return self.__getValue(word, 'percentile')
+    def getSplits(
+        self,
+        word: str
+    ) -> List[str]:
         return self.__getValue(word, 'splits')
+    def getSubsets(
+        self,
+        word: str
+    ) -> Dict[str, int]:
         return self.__getValue(word, 'in_subset')
+    def distribution(
+        self
+    ) -> Tuple:
         x_values, y_values = zip(*self.histogram)
         return x_values, y_values
+    def getWordNeighbors(
+        self,
+        word: str,
+        n_neighbors: int=20
+    )-> Tuple:
         word_id = self.df_vocab['word'].to_list().index(word)
         words = self.df_vocab['word'].to_list()
         freqs = self.df_vocab['freq'].to_list()

modules/module_word2Context.py CHANGED Viewed

@@ -1,8 +1,8 @@
 from datasets import load_dataset, interleave_datasets
 from modules.module_segmentedWordCloud import SegmentedWordCloud
 from modules.module_customSubsetsLabel import CustomSubsetsLabel
 from random import sample as random_sample
 import re
 import matplotlib as mpl
@@ -11,7 +11,12 @@ import matplotlib.pyplot as plt
 class Word2Context:
-    def __init__(self, context_ds_name, vocabulary):
         self.context_ds_name = context_ds_name
         # Vocabulary class
@@ -20,7 +25,11 @@ class Word2Context:
         # Custom Label component
         self.Label = CustomSubsetsLabel()
-    def errorChecking(self, word):
         out_msj = ""
         if not word:
@@ -31,19 +40,33 @@ class Word2Context:
         return out_msj
-    def genWebLink(self,text):
         text = text.replace("\"", "'")
         text = text.replace("<u><b>", "")
         text = text.replace("</b></u>", "")
         url = "https://www.google.com.tr/search?q={}".format(text)
         return '<a href="{}" rel="noopener noreferrer" target="_blank"><center>🌐🔍</center></a>'.format(url)
-    def genWordCloudPlot(self, word, figsize=(9,3)):
         freq_dic, l_group, g_group = self.vocab.getWordNeighbors(word, n_neighbors=10)
         wc = SegmentedWordCloud(freq_dic, l_group, g_group)
         return wc.plot(figsize)
-    def genDistributionPlot(self, word, figsize=(6,1)):
         x_values, y_values = self.vocab.distribution()
         w_percentile = self.vocab.getPercentile(word)
         w_freq = self.vocab.getFreq(word)
@@ -52,19 +75,20 @@ class Word2Context:
         ax.plot(x_values, y_values, color='green')
         ax.fill_between(x_values, y_values, color='lightgreen',)
-        # -- Uncomment if wordcloud is enabled in the application interface --
-        # ax.axvline(x=max(0,w_percentile-.01),
-        #     color='blue',
-        #     linewidth=7,
-        #     alpha=.2,
-        #     linestyle='-'
-        # )
-        # ax.axvline(x=min(100,w_percentile+.01),
-        #     color='black',
-        #     linewidth=7,
-        #     alpha=.2,
-        #     linestyle='-'
-        # )
         ax.axvline(x=w_percentile,
             color='#d35400',
             linewidth=2,
@@ -76,7 +100,12 @@ class Word2Context:
         plt.legend(loc='upper left', prop={'size': 7})
         return fig
-    def findSplits(self, word, subsets_list):
         w_splits = self.vocab.getSplits(word)
         splits_list = []
@@ -102,7 +131,12 @@ class Word2Context:
         return datasets
-    def findContexts(self, sample, word):
         sample = sample['text'].strip()
         context = ""
         m = re.search(r'\b{}\b'.format(word), sample)
@@ -112,7 +146,11 @@ class Word2Context:
             context = sample[:init]+"<u><b>"+word+"</b></u>"+sample[end:]
         return {'context':context}
-    def getSubsetsInfo(self, word):
         total_freq = self.vocab.getFreq(word)
         subsets_name_list = list(self.vocab.getSubsets(word).keys())
         subsets_freq_list = list(self.vocab.getSubsets(word).values())
@@ -127,73 +165,21 @@ class Word2Context:
         subsets_info = self.Label.compute(subsets_origin_info)
         return subsets_info, subsets_origin_info
-    def getContexts(self, word, n_context, ds):
-        ds_w_contexts = ds.map(lambda sample: self.findContexts(sample, word))
-        only_contexts = ds_w_contexts.filter(lambda sample: sample['context'] != "")
-        shuffle_contexts = only_contexts.shuffle(buffer_size=10)
-        list_of_dict = list(shuffle_contexts.take(n_context))
-        list_of_contexts = [(i,dic['context'],dic['subset']) for i,dic in enumerate(list_of_dict)]
-        return list_of_contexts
-    # TODO: The next methods can be removed, or keep them as a wrapper method of several ones
-    '''
-    def getWordInfo(self, word):
-        errors = ""
-        contexts = pd.DataFrame([],columns=[''])
-        subsets_info = ""
-        distribution_plot = None
-        word_cloud_plot = None
-        subsets_choice = gr.CheckboxGroup.update(choices=[])
-        errors = self.errorChecking(word)
-        if errors:
-            return errors, contexts, subsets_info, distribution_plot, word_cloud_plot, subsets_choice
-        total_freq = self.vocab.getFreq(word)
-        subsets_name_list = list(self.vocab.getSubsets(word).keys())
-        subsets_freq_list = list(self.vocab.getSubsets(word).values())
-        # Create subset frequency dict to subset_freq component
-        subsets_info = {
-            s_name + f" ({s_freq})": s_freq/total_freq
-            for s_name, s_freq in zip(subsets_name_list, subsets_freq_list)
-        }
-        subsets_origin_info = dict(sorted(subsets_info.items(), key=lambda x: x[1], reverse=True))
-        subsets_info = self.Label.compute(subsets_origin_info)
-        # Create sort list to subsets_choice component
-        clean_keys = [key.split(" ")[0].strip() for key in subsets_origin_info]
-        subsets_choice = gr.CheckboxGroup.update(choices=clean_keys)
-        # Get word distribution, and wordcloud graph
-        distribution_plot = self.genDistributionPlot(word)
-        word_cloud_plot = self.genWordCloudPlot(word)
-        return errors, contexts, subsets_info, distribution_plot, word_cloud_plot, subsets_choice
-    def getWordContext(self, word, n_context, subset_choice):
-        n_context = int(n_context)
-        errors = ""
-        if len(subset_choice) > 0:
-            ds = self.findSplits(word, subset_choice)
-        else:
-            errors = "Error: Palabra no ingresada y/o conjunto/s de interés no seleccionado/s!"
-            errors = "<center><h3>"+errors+"</h3></center>"
-            return errors, pd.DataFrame([], columns=[''])
         ds_w_contexts = ds.map(lambda sample: self.findContexts(sample, word))
         only_contexts = ds_w_contexts.filter(lambda sample: sample['context'] != "")
         shuffle_contexts = only_contexts.shuffle(buffer_size=10)
         list_of_dict = list(shuffle_contexts.take(n_context))
-        list_of_contexts = [(i,dic['context'],dic['subset']) for i,dic in enumerate(list_of_dict)]
-        contexts = pd.DataFrame(list_of_contexts, columns=['#','contexto','conjunto'])
-        contexts["buscar"] = contexts.contexto.apply(lambda text: self.genWebLink(text))
-        return errors, contexts
-    '''

 from datasets import load_dataset, interleave_datasets
 from modules.module_segmentedWordCloud import SegmentedWordCloud
 from modules.module_customSubsetsLabel import CustomSubsetsLabel
 from random import sample as random_sample
+from typing import Tuple, List, Dict
 import re
 import matplotlib as mpl
 class Word2Context:
+    def __init__(
+        self,
+        context_ds_name: str,
+        vocabulary  # Vocabulary class instance
+    ) -> None:
         self.context_ds_name = context_ds_name
         # Vocabulary class
         # Custom Label component
         self.Label = CustomSubsetsLabel()
+    def errorChecking(
+        self,
+        word: str
+    ) -> str:
         out_msj = ""
         if not word:
         return out_msj
+    def genWebLink(
+        self,
+        text: str
+    ) -> str:
         text = text.replace("\"", "'")
         text = text.replace("<u><b>", "")
         text = text.replace("</b></u>", "")
         url = "https://www.google.com.tr/search?q={}".format(text)
         return '<a href="{}" rel="noopener noreferrer" target="_blank"><center>🌐🔍</center></a>'.format(url)
+    def genWordCloudPlot(
+        self,
+        word: str,
+        figsize: Tuple[int,int]=(9,3)
+    ): # ToDO: Figure typing
         freq_dic, l_group, g_group = self.vocab.getWordNeighbors(word, n_neighbors=10)
         wc = SegmentedWordCloud(freq_dic, l_group, g_group)
         return wc.plot(figsize)
+    def genDistributionPlot(
+        self,
+        word: str,
+        figsize: Tuple[int,int]=(6,1)
+    ): # ToDO: Figure typing
         x_values, y_values = self.vocab.distribution()
         w_percentile = self.vocab.getPercentile(word)
         w_freq = self.vocab.getFreq(word)
         ax.plot(x_values, y_values, color='green')
         ax.fill_between(x_values, y_values, color='lightgreen',)
+        ax.axvline(x=max(0,w_percentile-.01),
+            color='blue',
+            linewidth=7,
+            alpha=.1,
+            linestyle='-'
+        )
+        ax.axvline(x=min(100,w_percentile+.01),
+            color='black',
+            linewidth=7,
+            alpha=.1,
+            linestyle='-'
+        )
         ax.axvline(x=w_percentile,
             color='#d35400',
             linewidth=2,
         plt.legend(loc='upper left', prop={'size': 7})
         return fig
+    def findSplits(
+        self,
+        word: str,
+        subsets_list: List[str]
+    ):
         w_splits = self.vocab.getSplits(word)
         splits_list = []
         return datasets
+    def findContexts(
+        self,
+        sample: str,
+        word: str
+    ) -> Dict[str,str]:
         sample = sample['text'].strip()
         context = ""
         m = re.search(r'\b{}\b'.format(word), sample)
             context = sample[:init]+"<u><b>"+word+"</b></u>"+sample[end:]
         return {'context':context}
+    def getSubsetsInfo(
+        self,
+        word: str
+    ) -> Tuple:
         total_freq = self.vocab.getFreq(word)
         subsets_name_list = list(self.vocab.getSubsets(word).keys())
         subsets_freq_list = list(self.vocab.getSubsets(word).values())
         subsets_info = self.Label.compute(subsets_origin_info)
         return subsets_info, subsets_origin_info
+    def getContexts(
+        self,
+        word: str,
+        n_context: int,
+        ds
+    ) -> List:
         ds_w_contexts = ds.map(lambda sample: self.findContexts(sample, word))
         only_contexts = ds_w_contexts.filter(lambda sample: sample['context'] != "")
         shuffle_contexts = only_contexts.shuffle(buffer_size=10)
         list_of_dict = list(shuffle_contexts.take(n_context))
+        list_of_contexts = [
+            (i, dic['context'], dic['subset'])
+            for i,dic in enumerate(list_of_dict)
+        ]
+        return list_of_contexts