Spaces:
Runtime error
Runtime error
Changed logs dataset target
Browse files- .gitignore +1 -1
- app.py +5 -3
- data/mini_vocab_v6.zip +3 -0
- interfaces/interface_datos.py +5 -3
- language/spanish.json +1 -1
- modules/module_connection.py +3 -4
- modules/module_logsManager.py +4 -2
.gitignore
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
__pycache__/
|
2 |
.env
|
3 |
-
|
|
|
1 |
__pycache__/
|
2 |
.env
|
3 |
+
logs_edia_datos_spanish/
|
app.py
CHANGED
@@ -9,10 +9,12 @@ from interfaces.interface_datos import interface as interface_datos
|
|
9 |
# --- Tool config ---
|
10 |
# ToDo: Change the owner of the context dataset from nanom to vialibre
|
11 |
CONTEXTS_DATASET = "nanom/splittedspanish3bwc"
|
12 |
-
|
13 |
-
AVAILABLE_LOGS = True # [True | False]
|
14 |
LANGUAGE = "spanish" # [spanish]
|
15 |
-
VOCABULARY_SUBSET = "full" # [full]
|
|
|
|
|
|
|
16 |
|
17 |
|
18 |
# --- Init classes ---
|
|
|
9 |
# --- Tool config ---
|
10 |
# ToDo: Change the owner of the context dataset from nanom to vialibre
|
11 |
CONTEXTS_DATASET = "nanom/splittedspanish3bwc"
|
12 |
+
|
|
|
13 |
LANGUAGE = "spanish" # [spanish]
|
14 |
+
VOCABULARY_SUBSET = "full" # [full, mini]
|
15 |
+
|
16 |
+
AVAILABLE_WORDCLOUD = False # [True | False]
|
17 |
+
AVAILABLE_LOGS = True # [True | False]
|
18 |
|
19 |
|
20 |
# --- Init classes ---
|
data/mini_vocab_v6.zip
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e1ac44f7478778f4c08bb57dc6aa79ededc89654b404320d0017586e578768a9
|
3 |
+
size 93612
|
interfaces/interface_datos.py
CHANGED
@@ -15,7 +15,8 @@ def interface(
|
|
15 |
|
16 |
# --- Init logs ---
|
17 |
log_callback = HuggingFaceDatasetSaver(
|
18 |
-
available_logs=available_logs
|
|
|
19 |
)
|
20 |
|
21 |
# --- Init Class ---
|
@@ -67,7 +68,8 @@ def interface(
|
|
67 |
value=labels["step3"]
|
68 |
)
|
69 |
subsets_choice = gr.CheckboxGroup(
|
70 |
-
label="",
|
|
|
71 |
interactive=True,
|
72 |
visible=True
|
73 |
)
|
@@ -148,7 +150,7 @@ def interface(
|
|
148 |
save_field = [input_word, subsets_choice]
|
149 |
log_callback.setup(
|
150 |
components=save_field,
|
151 |
-
flagging_dir=
|
152 |
)
|
153 |
|
154 |
btn_get_contexts.click(
|
|
|
15 |
|
16 |
# --- Init logs ---
|
17 |
log_callback = HuggingFaceDatasetSaver(
|
18 |
+
available_logs=available_logs,
|
19 |
+
dataset_name=f"logs_edia_datos_{lang}"
|
20 |
)
|
21 |
|
22 |
# --- Init Class ---
|
|
|
68 |
value=labels["step3"]
|
69 |
)
|
70 |
subsets_choice = gr.CheckboxGroup(
|
71 |
+
label="Subset_choices",
|
72 |
+
show_label=False,
|
73 |
interactive=True,
|
74 |
visible=True
|
75 |
)
|
|
|
150 |
save_field = [input_word, subsets_choice]
|
151 |
log_callback.setup(
|
152 |
components=save_field,
|
153 |
+
flagging_dir="logs"
|
154 |
)
|
155 |
|
156 |
btn_get_contexts.click(
|
language/spanish.json
CHANGED
@@ -4,7 +4,7 @@
|
|
4 |
"step2": "2. Seleccione cantidad máxima de contextos a recuperar",
|
5 |
"step3": "3. Seleccione conjuntos de interés",
|
6 |
"inputWord": {
|
7 |
-
"title": "",
|
8 |
"placeholder": "Ingresar aquí la palabra ..."
|
9 |
},
|
10 |
"wordInfoButton": "Obtener información de palabra",
|
|
|
4 |
"step2": "2. Seleccione cantidad máxima de contextos a recuperar",
|
5 |
"step3": "3. Seleccione conjuntos de interés",
|
6 |
"inputWord": {
|
7 |
+
"title": "Word",
|
8 |
"placeholder": "Ingresar aquí la palabra ..."
|
9 |
},
|
10 |
"wordInfoButton": "Obtener información de palabra",
|
modules/module_connection.py
CHANGED
@@ -81,14 +81,13 @@ class Word2ContextExplorerConnector(Connector):
|
|
81 |
return self.process_error(err), contexts, subsets_info, distribution_plot, word_cloud_plot, subsets_choice
|
82 |
|
83 |
def get_word_context(
|
84 |
-
self,
|
85 |
-
word: str,
|
86 |
-
n_context: int,
|
87 |
subset_choice: List[str]
|
88 |
) -> Tuple:
|
89 |
|
90 |
word = self.parse_word(word)
|
91 |
-
n_context = int(n_context)
|
92 |
err = ""
|
93 |
contexts = pd.DataFrame([], columns=[''])
|
94 |
|
|
|
81 |
return self.process_error(err), contexts, subsets_info, distribution_plot, word_cloud_plot, subsets_choice
|
82 |
|
83 |
def get_word_context(
|
84 |
+
self,
|
85 |
+
word: str,
|
86 |
+
n_context: int,
|
87 |
subset_choice: List[str]
|
88 |
) -> Tuple:
|
89 |
|
90 |
word = self.parse_word(word)
|
|
|
91 |
err = ""
|
92 |
contexts = pd.DataFrame([], columns=[''])
|
93 |
|
modules/module_logsManager.py
CHANGED
@@ -50,8 +50,8 @@ class HuggingFaceDatasetSaver(FlaggingCallback):
|
|
50 |
|
51 |
def __init__(
|
52 |
self,
|
|
|
53 |
hf_token: str=os.getenv('HF_TOKEN'),
|
54 |
-
dataset_name: str=os.getenv('DS_LOGS_NAME'),
|
55 |
organization: Optional[str]=os.getenv('ORG_NAME'),
|
56 |
private: bool=True,
|
57 |
available_logs: bool=False
|
@@ -63,8 +63,10 @@ class HuggingFaceDatasetSaver(FlaggingCallback):
|
|
63 |
organization: The organization to save the dataset under. The hf_token must provide write access to this organization. If not provided, saved under the name of the user corresponding to the hf_token.
|
64 |
private: Whether the dataset should be private (defaults to False).
|
65 |
"""
|
66 |
-
|
|
|
67 |
self.dataset_name = dataset_name
|
|
|
68 |
self.organization_name = organization
|
69 |
self.dataset_private = private
|
70 |
self.datetime = DateLogs()
|
|
|
50 |
|
51 |
def __init__(
|
52 |
self,
|
53 |
+
dataset_name: str=None,
|
54 |
hf_token: str=os.getenv('HF_TOKEN'),
|
|
|
55 |
organization: Optional[str]=os.getenv('ORG_NAME'),
|
56 |
private: bool=True,
|
57 |
available_logs: bool=False
|
|
|
63 |
organization: The organization to save the dataset under. The hf_token must provide write access to this organization. If not provided, saved under the name of the user corresponding to the hf_token.
|
64 |
private: Whether the dataset should be private (defaults to False).
|
65 |
"""
|
66 |
+
assert(dataset_name is not None), "Error: Parameter 'dataset_name' cannot be empty!."
|
67 |
+
|
68 |
self.dataset_name = dataset_name
|
69 |
+
self.hf_token = hf_token
|
70 |
self.organization_name = organization
|
71 |
self.dataset_private = private
|
72 |
self.datetime = DateLogs()
|