Spaces:
Runtime error
Runtime error
length problems
Browse files
app.py
CHANGED
@@ -25,8 +25,8 @@ HUB_TAGS = {
|
|
25 |
"chat": ("conversational", "text-generation", "text2text-generation"),
|
26 |
"txtqa": ("text-generation", "text2text-generation"),
|
27 |
"otherqa": ("table-question-answering", "document-question-answering", "visual-question-answering"),
|
28 |
-
"asr": ("automatic-speech-recognition"),
|
29 |
-
"img2txt": ("image-to-text"),
|
30 |
}
|
31 |
assert len(TASK_TYPES) == len(TASK_TYPES)
|
32 |
assert all(tag in HUB_TAGS for tag in TASK_TYPES.values())
|
@@ -46,7 +46,7 @@ PROBLEMS = {
|
|
46 |
INIT_MARKDOWN = """
|
47 |
|
48 |
|
49 |
-
👈 Fill in as much
|
50 |
|
51 |
|
52 |
|
@@ -68,9 +68,6 @@ DEMO_MARKDOWN = """
|
|
68 |
"""
|
69 |
|
70 |
SUGGETIONS_HEADER = """
|
71 |
-
🫥 NOTE: Clicking on links will crash the app. Copy them into your browser instead.
|
72 |
-
See the Gradio issue [here](https://github.com/gradio-app/gradio/issues/3234). 🫥
|
73 |
-
|
74 |
✨ Here is a list of suggestions for you -- click to expand ✨
|
75 |
"""
|
76 |
|
@@ -80,19 +77,108 @@ TASK_MODEL_MISMATCH = """
|
|
80 |
|
81 |
🤔 Why?
|
82 |
|
83 |
-
The selected model (`{model_name}`)
|
|
|
84 |
|
85 |
🤗 How?
|
86 |
|
87 |
-
Our recommendation is to go to our [tasks page](https://huggingface.co/tasks) and select one of
|
88 |
models as a starting point.
|
89 |
|
90 |
😱 Caveats
|
91 |
|
92 |
-
The tags of a model are defined by the community and are not always accurate. If you think the model is incorrectly
|
93 |
tagged or missing a tag, please open an issue on the [model card](https://huggingface.co/{model_name}/tree/main).
|
94 |
</details>
|
95 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
96 |
# =====================================================================================================================
|
97 |
|
98 |
|
@@ -122,13 +208,26 @@ def get_suggestions(task_type, model_name, problem_type):
|
|
122 |
# Check if the model is valid for the task. If not, return straight away
|
123 |
if not is_valid_task_for_model(model_name, task_type):
|
124 |
counter += 1
|
125 |
-
|
|
|
|
|
|
|
126 |
return suggestions
|
127 |
|
128 |
# Demo shortcut: only a few sections are working
|
129 |
-
if PROBLEMS.get(problem_type) not in ("", "length", "quality", "speed"):
|
130 |
return DEMO_MARKDOWN
|
131 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
132 |
return suggestions
|
133 |
# =====================================================================================================================
|
134 |
|
|
|
25 |
"chat": ("conversational", "text-generation", "text2text-generation"),
|
26 |
"txtqa": ("text-generation", "text2text-generation"),
|
27 |
"otherqa": ("table-question-answering", "document-question-answering", "visual-question-answering"),
|
28 |
+
"asr": ("automatic-speech-recognition",),
|
29 |
+
"img2txt": ("image-to-text",),
|
30 |
}
|
31 |
assert len(TASK_TYPES) == len(TASK_TYPES)
|
32 |
assert all(tag in HUB_TAGS for tag in TASK_TYPES.values())
|
|
|
46 |
INIT_MARKDOWN = """
|
47 |
|
48 |
|
49 |
+
👈 Fill in as much information as you can...
|
50 |
|
51 |
|
52 |
|
|
|
68 |
"""
|
69 |
|
70 |
SUGGETIONS_HEADER = """
|
|
|
|
|
|
|
71 |
✨ Here is a list of suggestions for you -- click to expand ✨
|
72 |
"""
|
73 |
|
|
|
77 |
|
78 |
🤔 Why?
|
79 |
|
80 |
+
The selected model (`{model_name}`) doesn't have a tag compatible with the task you selected ("{task_type}").
|
81 |
+
Expected tags for this task are: {tags}
|
82 |
|
83 |
🤗 How?
|
84 |
|
85 |
+
Our recommendation is to go to our [tasks page](https://huggingface.co/tasks) and select one of the suggested
|
86 |
models as a starting point.
|
87 |
|
88 |
😱 Caveats
|
89 |
|
90 |
+
1. The tags of a model are defined by the community and are not always accurate. If you think the model is incorrectly
|
91 |
tagged or missing a tag, please open an issue on the [model card](https://huggingface.co/{model_name}/tree/main).
|
92 |
</details>
|
93 |
"""
|
94 |
+
|
95 |
+
SET_MAX_NEW_TOKENS = """
|
96 |
+
<details><summary>{count}. Control the maximum output length with `max_new_tokens`.</summary>
|
97 |
+
|
98 |
+
|
99 |
+
🤔 Why?
|
100 |
+
|
101 |
+
All text generation calls have a length-related stopping condition. Depending on the model and/or the tool you're
|
102 |
+
using to generate text, the default value may be too small or too large. I'd recommend ALWAYS setting this option.
|
103 |
+
|
104 |
+
|
105 |
+
🤗 How?
|
106 |
+
|
107 |
+
Our text generation interfaces accept a `max_new_tokens` option. Set it to define the maximum number of tokens
|
108 |
+
that can be generated.
|
109 |
+
|
110 |
+
😱 Caveats
|
111 |
+
|
112 |
+
1. Allowing a longer output doesn't necessarily mean that the model will generate longer outputs. By default,
|
113 |
+
the model will stop generating when it generates a special `eos_token_id` token.
|
114 |
+
2. You shouldn't set `max_new_tokens` to a value larger than the maximum sequence length of the model. If you need a
|
115 |
+
longer output, consider using a model with a larger maximum sequence length.
|
116 |
+
3. The longer the output, the longer it will take to generate.
|
117 |
+
</details>
|
118 |
+
"""
|
119 |
+
|
120 |
+
SET_MIN_LENGTH = """
|
121 |
+
<details><summary>{count}. Force a minimum output length with `min_new_tokens`.</summary>
|
122 |
+
|
123 |
+
|
124 |
+
🤔 Why?
|
125 |
+
|
126 |
+
Text generation stops when the model generates a special `eos_token_id`. If you prevent it from happening, the model is
|
127 |
+
forced to continue generating.
|
128 |
+
|
129 |
+
🤗 How?
|
130 |
+
|
131 |
+
Our text generation interfaces accept a `min_new_tokens` argument. Set it to prevent `eos_token_id` from being
|
132 |
+
generated until `min_new_tokens` tokens are generated.
|
133 |
+
|
134 |
+
😱 Caveats
|
135 |
+
|
136 |
+
1. The quality of the output may suffer if the model is forced to generate beyond its own original expectations.
|
137 |
+
2. `min_new_tokens` must be smaller than than `max_new_tokens` (see related tip).
|
138 |
+
</details>
|
139 |
+
"""
|
140 |
+
|
141 |
+
REMOVE_EOS_TOKEN = """
|
142 |
+
<details><summary>{count}. Prevent the model of halting generation by removing `eos_token_id`.</summary>
|
143 |
+
|
144 |
+
|
145 |
+
🤔 Why?
|
146 |
+
|
147 |
+
Text generation stops when the model generates a special `eos_token_id`. If there is no `eos_token_id`, the model can't
|
148 |
+
stop.
|
149 |
+
|
150 |
+
|
151 |
+
🤗 How?
|
152 |
+
|
153 |
+
Our text generation interfaces accept a `eos_token_id` argument. Set it to a null value (e.g., in Python,
|
154 |
+
`eos_token_id=None`) to prevent generation to stop before it reaches other stopping conditions.
|
155 |
+
|
156 |
+
😱 Caveats
|
157 |
+
|
158 |
+
1. The quality of the output may suffer if the model is forced to generate beyond its own original expectations.
|
159 |
+
</details>
|
160 |
+
"""
|
161 |
+
|
162 |
+
LIST_EOS_TOKEN = """
|
163 |
+
<details><summary>{count}. Add a stop word through `eos_token_id`.</summary>
|
164 |
+
|
165 |
+
|
166 |
+
🤔 Why?
|
167 |
+
|
168 |
+
Text generation stops when the model generates a special `eos_token_id`. Actually, this attribute can be a list of
|
169 |
+
tokens, which means you can define arbitrary stop words.
|
170 |
+
|
171 |
+
|
172 |
+
🤗 How?
|
173 |
+
|
174 |
+
Our text generation interfaces accept a `eos_token_id` argument. You can pass a list of tokens to make generation
|
175 |
+
stop in the presence of any of those tokens.
|
176 |
+
|
177 |
+
😱 Caveats
|
178 |
+
|
179 |
+
1. When passing a list of tokens, you probably shouldn't forget to include the default `eos_token_id` there.
|
180 |
+
</details>
|
181 |
+
"""
|
182 |
# =====================================================================================================================
|
183 |
|
184 |
|
|
|
208 |
# Check if the model is valid for the task. If not, return straight away
|
209 |
if not is_valid_task_for_model(model_name, task_type):
|
210 |
counter += 1
|
211 |
+
possible_tags = " ".join("`" + tag + "`" for tag in HUB_TAGS[TASK_TYPES[task_type]])
|
212 |
+
suggestions += TASK_MODEL_MISMATCH.format(
|
213 |
+
count=counter, model_name=model_name, task_type=task_type, tags=possible_tags
|
214 |
+
)
|
215 |
return suggestions
|
216 |
|
217 |
# Demo shortcut: only a few sections are working
|
218 |
+
if PROBLEMS.get(problem_type, "") not in ("", "length", "quality", "speed"):
|
219 |
return DEMO_MARKDOWN
|
220 |
|
221 |
+
if PROBLEMS.get(problem_type, "") == "length":
|
222 |
+
counter += 1
|
223 |
+
suggestions += SET_MAX_NEW_TOKENS.format(count=counter)
|
224 |
+
counter += 1
|
225 |
+
suggestions += SET_MIN_LENGTH.format(count=counter)
|
226 |
+
counter += 1
|
227 |
+
suggestions += REMOVE_EOS_TOKEN.format(count=counter)
|
228 |
+
counter += 1
|
229 |
+
suggestions += LIST_EOS_TOKEN.format(count=counter)
|
230 |
+
|
231 |
return suggestions
|
232 |
# =====================================================================================================================
|
233 |
|