dominguesm commited on
Commit
3e3d92e
1 Parent(s): 3a4d722

Reformulação do exemplo e atualização das descrições

Browse files
app.py CHANGED
@@ -189,67 +189,53 @@ with gr.Blocks(theme=gr.themes.Soft()) as app:
189
  interactive=True,
190
  )
191
  with gr.Row():
192
- with gr.Column():
193
- gr.Markdown("""## PARAMETERS GRID""")
194
  gr.Markdown(load_description("description_parameter_grid"))
195
- with gr.Column():
196
- gr.Markdown("""### Classifier Alpha""")
197
- gr.Markdown(load_description("parameter_grid/alpha"))
198
-
199
- clf__alpha = gr.Textbox(
200
- label="clf__alpha",
201
- value="1.e-06, 1.e-05, 1.e-04",
202
- info="Due to practical considerations, this parameter was kept constant.",
203
- interactive=False,
204
- )
205
-
206
- with gr.Column():
207
- gr.Markdown("""### Vectorizer max_df""")
208
- gr.Markdown(load_description("parameter_grid/max_df"))
209
-
210
- vect__max_df = gr.Textbox(
211
- label="vect__max_df",
212
- value="0.2, 0.4, 0.6, 0.8, 1.0",
213
- info="Values ranging from 0 to 1.0, separated by a comma.",
214
- interactive=True,
215
- )
216
-
217
- with gr.Column():
218
- gr.Markdown("""### Vectorizer min_df""")
219
- gr.Markdown(load_description("parameter_grid/min_df"))
220
-
221
- vect__min_df = gr.Textbox(
222
- label="vect__min_df",
223
- value="1, 3, 5, 10",
224
- info="Values ranging from 0 to 1.0, separated by a comma, or integers separated by a comma. If float, the parameter represents a proportion of documents, integer absolute counts.",
225
- interactive=True,
226
- )
227
- with gr.Column():
228
- gr.Markdown("""### Vectorizer ngram_range""")
229
- gr.Markdown(load_description("parameter_grid/ngram_range"))
230
-
231
- vect__ngram_range = gr.Textbox(
232
- label="vect__ngram_range",
233
- value="(1, 1), (1, 2)",
234
- info="""Tuples of integer values separated by a comma. For example an ``ngram_range`` of ``(1, 1)`` means only unigrams, ``(1, 2)`` means unigrams and bigrams, and ``(2, 2)`` means only bigrams.""",
235
- interactive=True,
236
- )
237
- with gr.Column():
238
- gr.Markdown("""### Vectorizer norm""")
239
- gr.Markdown(load_description("parameter_grid/norm"))
240
- gr.Markdown(
241
- """- 'l2': Sum of squares of vector elements is 1. The cosine
242
- similarity between two vectors is their dot product when l2 norm has
243
- been applied.
244
- - 'l1': Sum of absolute values of vector elements is 1."""
245
- )
246
-
247
- vect__norm = gr.Textbox(
248
- label="vect__norm",
249
- value="l1, l2",
250
- info="'l1' or 'l2', separated by a comma",
251
- interactive=True,
252
- )
253
 
254
  with gr.Row():
255
  gr.Markdown(
 
189
  interactive=True,
190
  )
191
  with gr.Row():
192
+ with gr.Tab("PARAMETERS GRID"):
 
193
  gr.Markdown(load_description("description_parameter_grid"))
194
+ with gr.Row():
195
+ with gr.Column():
196
+ clf__alpha = gr.Textbox(
197
+ label="Classifier Alpha (clf__alpha)",
198
+ value="1.e-06, 1.e-05, 1.e-04",
199
+ info="Due to practical considerations, this parameter was kept constant.",
200
+ interactive=False,
201
+ )
202
+ vect__max_df = gr.Textbox(
203
+ label="Vectorizer max_df (vect__max_df)",
204
+ value="0.2, 0.4, 0.6, 0.8, 1.0",
205
+ info="Values ranging from 0 to 1.0, separated by a comma.",
206
+ interactive=True,
207
+ )
208
+ vect__min_df = gr.Textbox(
209
+ label="Vectorizer min_df (vect__min_df)",
210
+ value="1, 3, 5, 10",
211
+ info="Values ranging from 0 to 1.0, separated by a comma, or integers separated by a comma. If float, the parameter represents a proportion of documents, integer absolute counts.",
212
+ interactive=True,
213
+ )
214
+ with gr.Column():
215
+ vect__ngram_range = gr.Textbox(
216
+ label="Vectorizer ngram_range (vect__ngram_range)",
217
+ value="(1, 1), (1, 2)",
218
+ info="""Tuples of integer values separated by a comma. For example an `ngram_range` of `(1, 1)` means only unigrams, `(1, 2)` means unigrams and bigrams, and `(2, 2)` means only bigrams.""",
219
+ interactive=True,
220
+ )
221
+ vect__norm = gr.Textbox(
222
+ label="Vectorizer norm (vect__norm)",
223
+ value="l1, l2",
224
+ info="'l1' or 'l2', separated by a comma",
225
+ interactive=True,
226
+ )
227
+
228
+ with gr.Tab("DESCRIPTION OF PARAMETERS"):
229
+ gr.Markdown("""### Classifier Alpha""")
230
+ gr.Markdown(load_description("parameter_grid/alpha"))
231
+ gr.Markdown("""### Vectorizer max_df""")
232
+ gr.Markdown(load_description("parameter_grid/max_df"))
233
+ gr.Markdown("""### Vectorizer min_df""")
234
+ gr.Markdown(load_description("parameter_grid/min_df"))
235
+ gr.Markdown("""### Vectorizer ngram_range""")
236
+ gr.Markdown(load_description("parameter_grid/ngram_range"))
237
+ gr.Markdown("""### Vectorizer norm""")
238
+ gr.Markdown(load_description("parameter_grid/norm"))
 
 
 
 
 
 
 
 
 
 
 
 
 
239
 
240
  with gr.Row():
241
  gr.Markdown(
descriptions/parameter_grid/alpha.md CHANGED
@@ -1 +1 @@
1
- The value of "alpha" adds a constant amount to the occurrence counters of features, ensuring that even unobserved feature values have a non-zero probability. Smaller values of "alpha" result in weaker smoothing, while larger values increase the level of smoothing. The default value is 1.0, which applies Laplace smoothing, but it can be adjusted based on the model's requirements.
 
1
+ The "alpha" parameter adds a constant value to the occurrence counters of features, ensuring that even unobserved feature values have a non-zero probability. Smaller values of "alpha" result in weaker smoothing, while larger values increase the level of smoothing. The default value is 1.0, which applies Laplace smoothing, but it can be adjusted based on the model's requirements.
descriptions/parameter_grid/max_df.md CHANGED
@@ -1 +1 @@
1
- The "max_df" parameter of TfidfVectorizer in scikit-learn is used to set an upper limit on the term frequency within a document, where terms that occur more frequently than the specified value are ignored during the vectorization process.
 
1
+ The "max_df" parameter of TfidfVectorizer in scikit-learn is used to set an upper limit on the term frequency within a document. Terms that occur more frequently than the specified value are ignored during the vectorization process.
descriptions/parameter_grid/min_df.md CHANGED
@@ -1 +1 @@
1
- The "min_df" parameter of TfidfVectorizer in scikit-learn is used to set a lower limit on the term frequency within a document, where terms that occur less frequently than the specified value are ignored during the vectorization process.
 
1
+ The "min_df" parameter of TfidfVectorizer in scikit-learn is used to set a lower limit on the term frequency within a document. Terms that occur less frequently than the specified value are ignored during the vectorization process.
descriptions/parameter_grid/ngram_range.md CHANGED
@@ -1 +1 @@
1
- The "ngram_range" parameter of TfidfVectorizer in scikit-learn is used to specify the range of n-grams (contiguous sequences of n words) to consider during the vectorization process. It defines the lower and upper bounds for the n-gram sizes that will be included in the feature representation.
 
1
+ The "ngram_range" parameter of TfidfVectorizer in scikit-learn is used to specify the range of n-grams (contiguous sequences of n words) considered during the vectorization process. It defines the lower and upper bounds for the n-gram sizes that will be included in the feature representation.
descriptions/parameter_grid/norm.md CHANGED
@@ -1 +1,6 @@
1
- The "norm" parameter of TfidfVectorizer in scikit-learn is used to specify the normalization method applied to the resulting TF-IDF vectors. It controls whether the vectors should be normalized to have unit norm (L2 normalization) or left unnormalized (None).
 
 
 
 
 
 
1
+ The "norm" parameter of TfidfVectorizer in scikit-learn is used to specify the normalization method applied to the resulting TF-IDF vectors. It controls whether the vectors should be normalized to have unit norm (L2 normalization) or left unnormalized (None).
2
+
3
+ ```
4
+ - 'l2': The sum of squares of vector elements is 1. The cosine similarity between two vectors is their dot product when the L2 norm has been applied.
5
+ - 'l1': The sum of the absolute values of vector elements is 1.
6
+ ```