Spaces:

flax-community
/

spanish-image-captioning

Runtime error

App Files Files Community

gchhablani commited on Jul 21, 2021

Commit

1a320a6

•

1 Parent(s): 8b6c25d

Add more generation parameters

Browse files

Files changed (1) hide show

app.py +6 -3

app.py CHANGED Viewed

@@ -29,10 +29,10 @@ def load_model(ckpt):
 tokenizer = MarianTokenizer.from_pretrained("Helsinki-NLP/opus-mt-en-es")
 @st.cache
-def generate_sequence(pixel_values, num_beams, temperature, top_p):
-    output_ids = state.model.generate(input_ids=pixel_values, max_length=64, num_beams=num_beams, temperature=temperature, top_p = top_p)
     print(output_ids)
-    output_sequence = tokenizer.batch_decode(output_ids[0], skip_special_tokens=True, max_length=64)
     return output_sequence
 def read_markdown(path, parent="./sections/"):
@@ -56,6 +56,9 @@ st.write(
 )
 st.sidebar.title("Generation Parameters")
 num_beams = st.sidebar.number_input("Number of Beams", min_value=2, max_value=10, value=4, step=1, help="Number of beams to be used in beam search.")
 temperature = st.sidebar.select_slider("Temperature", options = list(np.arange(0.0,1.1, step=0.1)), value=1.0, help ="The value used to module the next token probabilities.", format_func=lambda x: f"{x:.2f}")
 top_p = st.sidebar.select_slider("Top-P", options = list(np.arange(0.0,1.1, step=0.1)),value=1.0, help="Nucleus Sampling : If set to float < 1, only the most probable tokens with probabilities that add up to :obj:`top_p` or higher are kept for generation.", format_func=lambda x: f"{x:.2f}")

 tokenizer = MarianTokenizer.from_pretrained("Helsinki-NLP/opus-mt-en-es")
 @st.cache
+def generate_sequence(pixel_values, num_beams, temperature, top_p, do_sample, top_k, max_length):
+    output_ids = state.model.generate(input_ids=pixel_values, max_length=max_length, num_beams=num_beams, temperature=temperature, top_p = top_p, top_k=top_k, do_sample=do_sample)
     print(output_ids)
+    output_sequence = tokenizer.batch_decode(output_ids[0], skip_special_tokens=True, max_length=max_length)
     return output_sequence
 def read_markdown(path, parent="./sections/"):
 )
 st.sidebar.title("Generation Parameters")
+max_length = st.sidebar.number_input("Max Length", min_value=2, max_value=10, value=4, step=1, help="The maximum length of sequence to be generated.")
+do_sample = st.sidebar.checkbox("Sample", value=False, help="Sample from the model instead of using beam search.")
+top_k = st.sidebar.number_input("Top K", min_value=10, max_value=200, value=50, step=1, help="The number of highest probability vocabulary tokens to keep for top-k-filtering.")
 num_beams = st.sidebar.number_input("Number of Beams", min_value=2, max_value=10, value=4, step=1, help="Number of beams to be used in beam search.")
 temperature = st.sidebar.select_slider("Temperature", options = list(np.arange(0.0,1.1, step=0.1)), value=1.0, help ="The value used to module the next token probabilities.", format_func=lambda x: f"{x:.2f}")
 top_p = st.sidebar.select_slider("Top-P", options = list(np.arange(0.0,1.1, step=0.1)),value=1.0, help="Nucleus Sampling : If set to float < 1, only the most probable tokens with probabilities that add up to :obj:`top_p` or higher are kept for generation.", format_func=lambda x: f"{x:.2f}")