joaogante HF staff commited on
Commit
d9f5161
1 Parent(s): b7e4225

length problems

Browse files
Files changed (1) hide show
  1. app.py +110 -11
app.py CHANGED
@@ -25,8 +25,8 @@ HUB_TAGS = {
25
  "chat": ("conversational", "text-generation", "text2text-generation"),
26
  "txtqa": ("text-generation", "text2text-generation"),
27
  "otherqa": ("table-question-answering", "document-question-answering", "visual-question-answering"),
28
- "asr": ("automatic-speech-recognition"),
29
- "img2txt": ("image-to-text"),
30
  }
31
  assert len(TASK_TYPES) == len(TASK_TYPES)
32
  assert all(tag in HUB_TAGS for tag in TASK_TYPES.values())
@@ -46,7 +46,7 @@ PROBLEMS = {
46
  INIT_MARKDOWN = """
47
   
48
 
49
- 👈 Fill in as much data as you can...
50
 
51
   
52
 
@@ -68,9 +68,6 @@ DEMO_MARKDOWN = """
68
  """
69
 
70
  SUGGETIONS_HEADER = """
71
- 🫥 NOTE: Clicking on links will crash the app. Copy them into your browser instead.
72
- See the Gradio issue [here](https://github.com/gradio-app/gradio/issues/3234). 🫥
73
-
74
  ✨ Here is a list of suggestions for you -- click to expand ✨
75
  """
76
 
@@ -80,19 +77,108 @@ TASK_MODEL_MISMATCH = """
80
 
81
  🤔 Why?  
82
 
83
- The selected model (`{model_name}`) isn't tagged as a good fit for the task you selected ("{task_type}").  
 
84
 
85
  🤗 How?  
86
 
87
- Our recommendation is to go to our [tasks page](https://huggingface.co/tasks) and select one of our suggested
88
  models as a starting point.  
89
 
90
  😱 Caveats  
91
 
92
- The tags of a model are defined by the community and are not always accurate. If you think the model is incorrectly
93
  tagged or missing a tag, please open an issue on the [model card](https://huggingface.co/{model_name}/tree/main).
94
  </details>
95
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
  # =====================================================================================================================
97
 
98
 
@@ -122,13 +208,26 @@ def get_suggestions(task_type, model_name, problem_type):
122
  # Check if the model is valid for the task. If not, return straight away
123
  if not is_valid_task_for_model(model_name, task_type):
124
  counter += 1
125
- suggestions += TASK_MODEL_MISMATCH.format(count=counter, model_name=model_name, task_type=task_type)
 
 
 
126
  return suggestions
127
 
128
  # Demo shortcut: only a few sections are working
129
- if PROBLEMS.get(problem_type) not in ("", "length", "quality", "speed"):
130
  return DEMO_MARKDOWN
131
 
 
 
 
 
 
 
 
 
 
 
132
  return suggestions
133
  # =====================================================================================================================
134
 
 
25
  "chat": ("conversational", "text-generation", "text2text-generation"),
26
  "txtqa": ("text-generation", "text2text-generation"),
27
  "otherqa": ("table-question-answering", "document-question-answering", "visual-question-answering"),
28
+ "asr": ("automatic-speech-recognition",),
29
+ "img2txt": ("image-to-text",),
30
  }
31
  assert len(TASK_TYPES) == len(TASK_TYPES)
32
  assert all(tag in HUB_TAGS for tag in TASK_TYPES.values())
 
46
  INIT_MARKDOWN = """
47
  &nbsp;
48
 
49
+ 👈 Fill in as much information as you can...
50
 
51
  &nbsp;
52
 
 
68
  """
69
 
70
  SUGGETIONS_HEADER = """
 
 
 
71
  ✨ Here is a list of suggestions for you -- click to expand ✨
72
  """
73
 
 
77
 
78
  🤔 Why? &nbsp;
79
 
80
+ The selected model (`{model_name}`) doesn't have a tag compatible with the task you selected ("{task_type}").
81
+ Expected tags for this task are: {tags} &nbsp;
82
 
83
  🤗 How? &nbsp;
84
 
85
+ Our recommendation is to go to our [tasks page](https://huggingface.co/tasks) and select one of the suggested
86
  models as a starting point. &nbsp;
87
 
88
  😱 Caveats &nbsp;
89
 
90
+ 1. The tags of a model are defined by the community and are not always accurate. If you think the model is incorrectly
91
  tagged or missing a tag, please open an issue on the [model card](https://huggingface.co/{model_name}/tree/main).
92
  </details>
93
  """
94
+
95
+ SET_MAX_NEW_TOKENS = """
96
+ <details><summary>{count}. Control the maximum output length with `max_new_tokens`.</summary>
97
+ &nbsp;
98
+
99
+ 🤔 Why? &nbsp;
100
+
101
+ All text generation calls have a length-related stopping condition. Depending on the model and/or the tool you're
102
+ using to generate text, the default value may be too small or too large. I'd recommend ALWAYS setting this option.
103
+ &nbsp;
104
+
105
+ 🤗 How? &nbsp;
106
+
107
+ Our text generation interfaces accept a `max_new_tokens` option. Set it to define the maximum number of tokens
108
+ that can be generated. &nbsp;
109
+
110
+ 😱 Caveats &nbsp;
111
+
112
+ 1. Allowing a longer output doesn't necessarily mean that the model will generate longer outputs. By default,
113
+ the model will stop generating when it generates a special `eos_token_id` token.
114
+ 2. You shouldn't set `max_new_tokens` to a value larger than the maximum sequence length of the model. If you need a
115
+ longer output, consider using a model with a larger maximum sequence length.
116
+ 3. The longer the output, the longer it will take to generate.
117
+ </details>
118
+ """
119
+
120
+ SET_MIN_LENGTH = """
121
+ <details><summary>{count}. Force a minimum output length with `min_new_tokens`.</summary>
122
+ &nbsp;
123
+
124
+ 🤔 Why? &nbsp;
125
+
126
+ Text generation stops when the model generates a special `eos_token_id`. If you prevent it from happening, the model is
127
+ forced to continue generating. &nbsp;
128
+
129
+ 🤗 How? &nbsp;
130
+
131
+ Our text generation interfaces accept a `min_new_tokens` argument. Set it to prevent `eos_token_id` from being
132
+ generated until `min_new_tokens` tokens are generated. &nbsp;
133
+
134
+ 😱 Caveats &nbsp;
135
+
136
+ 1. The quality of the output may suffer if the model is forced to generate beyond its own original expectations.
137
+ 2. `min_new_tokens` must be smaller than than `max_new_tokens` (see related tip).
138
+ </details>
139
+ """
140
+
141
+ REMOVE_EOS_TOKEN = """
142
+ <details><summary>{count}. Prevent the model of halting generation by removing `eos_token_id`.</summary>
143
+ &nbsp;
144
+
145
+ 🤔 Why? &nbsp;
146
+
147
+ Text generation stops when the model generates a special `eos_token_id`. If there is no `eos_token_id`, the model can't
148
+ stop. &nbsp;
149
+
150
+
151
+ 🤗 How? &nbsp;
152
+
153
+ Our text generation interfaces accept a `eos_token_id` argument. Set it to a null value (e.g., in Python,
154
+ `eos_token_id=None`) to prevent generation to stop before it reaches other stopping conditions. &nbsp;
155
+
156
+ 😱 Caveats &nbsp;
157
+
158
+ 1. The quality of the output may suffer if the model is forced to generate beyond its own original expectations.
159
+ </details>
160
+ """
161
+
162
+ LIST_EOS_TOKEN = """
163
+ <details><summary>{count}. Add a stop word through `eos_token_id`.</summary>
164
+ &nbsp;
165
+
166
+ 🤔 Why? &nbsp;
167
+
168
+ Text generation stops when the model generates a special `eos_token_id`. Actually, this attribute can be a list of
169
+ tokens, which means you can define arbitrary stop words. &nbsp;
170
+
171
+
172
+ 🤗 How? &nbsp;
173
+
174
+ Our text generation interfaces accept a `eos_token_id` argument. You can pass a list of tokens to make generation
175
+ stop in the presence of any of those tokens. &nbsp;
176
+
177
+ 😱 Caveats &nbsp;
178
+
179
+ 1. When passing a list of tokens, you probably shouldn't forget to include the default `eos_token_id` there.
180
+ </details>
181
+ """
182
  # =====================================================================================================================
183
 
184
 
 
208
  # Check if the model is valid for the task. If not, return straight away
209
  if not is_valid_task_for_model(model_name, task_type):
210
  counter += 1
211
+ possible_tags = " ".join("`" + tag + "`" for tag in HUB_TAGS[TASK_TYPES[task_type]])
212
+ suggestions += TASK_MODEL_MISMATCH.format(
213
+ count=counter, model_name=model_name, task_type=task_type, tags=possible_tags
214
+ )
215
  return suggestions
216
 
217
  # Demo shortcut: only a few sections are working
218
+ if PROBLEMS.get(problem_type, "") not in ("", "length", "quality", "speed"):
219
  return DEMO_MARKDOWN
220
 
221
+ if PROBLEMS.get(problem_type, "") == "length":
222
+ counter += 1
223
+ suggestions += SET_MAX_NEW_TOKENS.format(count=counter)
224
+ counter += 1
225
+ suggestions += SET_MIN_LENGTH.format(count=counter)
226
+ counter += 1
227
+ suggestions += REMOVE_EOS_TOKEN.format(count=counter)
228
+ counter += 1
229
+ suggestions += LIST_EOS_TOKEN.format(count=counter)
230
+
231
  return suggestions
232
  # =====================================================================================================================
233