VictorSanh commited on
Commit
69ef035
1 Parent(s): 00346ea
Files changed (1) hide show
  1. app_dialogue.py +6 -7
app_dialogue.py CHANGED
@@ -15,9 +15,9 @@ from text_generation import Client
15
  from transformers import AutoProcessor
16
 
17
 
18
- MODELS = [ # TODO uncomment
19
  "HuggingFaceM4/idefics-9b-instruct",
20
- # "HuggingFaceM4/idefics-80b-instruct",
21
  ]
22
 
23
  API_PATHS = {
@@ -326,7 +326,7 @@ with gr.Blocks(title="IDEFICS Playground", theme=gr.themes.Base()) as demo:
326
  **EMBARGO UNTIL AUGUST 22ND** This demo showcaes **IDEFICS**, a open-access large visual lanugage model. Like GPT-4, the multimodal model accepts arbitrary sequences of image and text inputs and produces text outputs. IDEFICS can answer questions about images, describe visual content, create stories grounded in multiple images, etc.
327
  <br>IDEFICS (which stans for **I**mage-aware **D**ecoder **E**nhanced à la **F**lamingo with **I**nterleaved **C**ross-attention**S**) is an open-access reproduction of [Flamingo](https://huggingface.co/papers/2204.14198), a closed-source visual language model developed by Deepmind. IDEFICS was built solely on publicly available data and models. It is currently the only visual language model of this scale available in open-access.
328
 
329
- 📚 The variants available in this demo were fine-tuned on a mixture of supervised and intruction fine-tuning to make the models more suitable in conversational settings. For more details, we refer to our [blog post](TODO).
330
 
331
  🅿️ **Intended uses:** This demo along with the [supporting models](https://huggingface.co/models?sort=trending&search=HuggingFaceM4%2Fidefics) are provided as research artefacts to the community. We detail misuses and out-of-scope uses [here](https://huggingface.co/HuggingFaceM4/idefics-80b#misuse-and-out-of-scope-use).
332
 
@@ -338,7 +338,7 @@ with gr.Blocks(title="IDEFICS Playground", theme=gr.themes.Base()) as demo:
338
  with gr.Row(elem_id="model_selector_row"):
339
  model_selector = gr.Dropdown(
340
  choices=MODELS,
341
- value="HuggingFaceM4/idefics-9b-instruct",
342
  interactive=True,
343
  show_label=False,
344
  container=False,
@@ -555,7 +555,6 @@ with gr.Blocks(title="IDEFICS Playground", theme=gr.themes.Base()) as demo:
555
  model_selector="HuggingFaceM4/idefics-80b-instruct"
556
  user_prompt_str=message
557
  chat_history=[]
558
- decoding_strategy="Greedy"
559
  max_new_tokens=512
560
 
561
  formated_prompt_list, user_prompt_list = format_user_prompt_with_im_history_and_system_conditioning(
@@ -596,7 +595,7 @@ with gr.Blocks(title="IDEFICS Playground", theme=gr.themes.Base()) as demo:
596
  query = prompt_list_to_tgi_input(formated_prompt_list)
597
  generated_text = client.generate(prompt=query, **generation_args)
598
  if generated_text.endswith("\nUser"):
599
- generated_text = generate_text[:-5]
600
 
601
  last_turn = chat_history.pop(-1)
602
  last_turn[-1] += generated_text
@@ -831,7 +830,7 @@ with gr.Blocks(title="IDEFICS Playground", theme=gr.themes.Base()) as demo:
831
  inputs=[textbox, imagebox],
832
  outputs=[textbox, imagebox, chatbot],
833
  fn=process_example,
834
- cache_examples=False,
835
  examples_per_page=6,
836
  label=(
837
  "Click on any example below to get started.\nFor convenience, the model generations have been"
 
15
  from transformers import AutoProcessor
16
 
17
 
18
+ MODELS = [
19
  "HuggingFaceM4/idefics-9b-instruct",
20
+ "HuggingFaceM4/idefics-80b-instruct",
21
  ]
22
 
23
  API_PATHS = {
 
326
  **EMBARGO UNTIL AUGUST 22ND** This demo showcaes **IDEFICS**, a open-access large visual lanugage model. Like GPT-4, the multimodal model accepts arbitrary sequences of image and text inputs and produces text outputs. IDEFICS can answer questions about images, describe visual content, create stories grounded in multiple images, etc.
327
  <br>IDEFICS (which stans for **I**mage-aware **D**ecoder **E**nhanced à la **F**lamingo with **I**nterleaved **C**ross-attention**S**) is an open-access reproduction of [Flamingo](https://huggingface.co/papers/2204.14198), a closed-source visual language model developed by Deepmind. IDEFICS was built solely on publicly available data and models. It is currently the only visual language model of this scale available in open-access.
328
 
329
+ 📚 The variants available in this demo were fine-tuned on a mixture of supervised and intruction fine-tuning to make the models more suitable in conversational settings. For more details, we refer to our [blog post](https://huggingface.co/blog/idefics).
330
 
331
  🅿️ **Intended uses:** This demo along with the [supporting models](https://huggingface.co/models?sort=trending&search=HuggingFaceM4%2Fidefics) are provided as research artefacts to the community. We detail misuses and out-of-scope uses [here](https://huggingface.co/HuggingFaceM4/idefics-80b#misuse-and-out-of-scope-use).
332
 
 
338
  with gr.Row(elem_id="model_selector_row"):
339
  model_selector = gr.Dropdown(
340
  choices=MODELS,
341
+ value="HuggingFaceM4/idefics-80b-instruct",
342
  interactive=True,
343
  show_label=False,
344
  container=False,
 
555
  model_selector="HuggingFaceM4/idefics-80b-instruct"
556
  user_prompt_str=message
557
  chat_history=[]
 
558
  max_new_tokens=512
559
 
560
  formated_prompt_list, user_prompt_list = format_user_prompt_with_im_history_and_system_conditioning(
 
595
  query = prompt_list_to_tgi_input(formated_prompt_list)
596
  generated_text = client.generate(prompt=query, **generation_args)
597
  if generated_text.endswith("\nUser"):
598
+ generated_text = generated_text[:-5]
599
 
600
  last_turn = chat_history.pop(-1)
601
  last_turn[-1] += generated_text
 
830
  inputs=[textbox, imagebox],
831
  outputs=[textbox, imagebox, chatbot],
832
  fn=process_example,
833
+ cache_examples=True,
834
  examples_per_page=6,
835
  label=(
836
  "Click on any example below to get started.\nFor convenience, the model generations have been"