Spaces:

jykoh
/

fromage

Build error

App Files Files Community

jykoh commited on Feb 21, 2023

Commit

5b4ede2

1 Parent(s): 278032e

Add submit button

Browse files

Files changed (2) hide show

app.py +12 -4
fromage/models.py +14 -10

app.py CHANGED Viewed

@@ -127,19 +127,27 @@ with gr.Blocks(css=css) as demo:
         share_button = gr.Button("Share to community", elem_id="share-btn")
     with gr.Row():
-        with gr.Column(scale=0.3, min_width=0):
             ret_scale_factor = gr.Slider(minimum=0.0, maximum=3.0, value=1.0, step=0.1, interactive=True, label="Multiplier for returning images (higher means more frequent)")
             max_ret_images = gr.Number(minimum=0, maximum=3, value=1, precision=1, interactive=True, label="Max images to return")
             gr_max_len = gr.Number(value=32, precision=1, label="Max # of words returned", interactive=True)
             gr_temperature = gr.Number(value=0.0, label="Temperature", interactive=True)
-        with gr.Column(scale=0.7, min_width=0):
             image_btn = gr.UploadButton("🖼️ Image Input", file_types=["image"])
-            text_input = gr.Textbox(label="Text Input", lines=1, placeholder="Upload an image above [optional]. Then enter a text prompt, and press enter!")
-            clear_btn = gr.Button("Clear History")
     text_input.submit(generate_for_prompt, [text_input, gr_state, ret_scale_factor, max_ret_images, gr_max_len, gr_temperature], [gr_state, chatbot])
     text_input.submit(lambda: "", None, text_input)  # Reset chatbox.
     image_btn.upload(upload_image, [gr_state, image_btn], [gr_state, chatbot])
     clear_btn.click(reset, [], [gr_state, chatbot])
     share_button.click(None, [], [], _js=share_js)

         share_button = gr.Button("Share to community", elem_id="share-btn")
     with gr.Row():
+        with gr.Column(scale=0.3, min_width=100):
             ret_scale_factor = gr.Slider(minimum=0.0, maximum=3.0, value=1.0, step=0.1, interactive=True, label="Multiplier for returning images (higher means more frequent)")
             max_ret_images = gr.Number(minimum=0, maximum=3, value=1, precision=1, interactive=True, label="Max images to return")
             gr_max_len = gr.Number(value=32, precision=1, label="Max # of words returned", interactive=True)
             gr_temperature = gr.Number(value=0.0, label="Temperature", interactive=True)
+        with gr.Column(scale=0.7, min_width=400):
             image_btn = gr.UploadButton("🖼️ Image Input", file_types=["image"])
+            text_input = gr.Textbox(label="Chat Input", lines=1, placeholder="Upload an image above [optional]. Then enter a text prompt, and press enter!")
+            with gr.Row():
+                with gr.Column(scale=0.5):
+                    submit_btn = gr.Button("Submit", interactive=True, variant="primary")
+                with gr.Column(scale=0.5):
+                    clear_btn = gr.Button("Clear History")
     text_input.submit(generate_for_prompt, [text_input, gr_state, ret_scale_factor, max_ret_images, gr_max_len, gr_temperature], [gr_state, chatbot])
     text_input.submit(lambda: "", None, text_input)  # Reset chatbox.
+    submit_btn.click(generate_for_prompt, [text_input, gr_state, ret_scale_factor, max_ret_images, gr_max_len, gr_temperature], [gr_state, chatbot])
+    submit_btn.click(lambda: "", None, text_input)  # Reset chatbox.
     image_btn.upload(upload_image, [gr_state, image_btn], [gr_state, chatbot])
     clear_btn.click(reset, [], [gr_state, chatbot])
     share_button.click(None, [], [], _js=share_js)

fromage/models.py CHANGED Viewed

@@ -634,21 +634,25 @@ def load_fromage(embeddings_dir: str, model_args_path: str, model_ckpt_path: str
   ret_token_idx = tokenizer('[RET]', add_special_tokens=False).input_ids
   assert len(ret_token_idx) == 1, ret_token_idx
   model_kwargs['retrieval_token_idx'] = ret_token_idx[0]
-  # model_kwargs['opt_version'] = 'facebook/opt-125m'
-  # model_kwargs['visual_encoder'] = 'openai/clip-vit-base-patch32'
   args = namedtuple('args', model_kwargs)(**model_kwargs)
   # Initialize model for inference.
   model = Fromage(tokenizer, args, path_array=path_array, emb_matrix=emb_matrix)
   model = model.eval()
-  model = model.bfloat16()
-  model = model.cuda()
-  # Load pretrained linear mappings and [RET] embeddings.
-  checkpoint = torch.load(model_ckpt_path)
-  model.load_state_dict(checkpoint['state_dict'], strict=False)
-  with torch.no_grad():
-      model.model.input_embeddings.weight[model.model.retrieval_token_idx, :].copy_(checkpoint['state_dict']['ret_input_embeddings.weight'].cpu().detach())
   logit_scale = model.model.logit_scale.exp()
   emb_matrix = torch.tensor(emb_matrix, dtype=logit_scale.dtype).to(logit_scale.device)

   ret_token_idx = tokenizer('[RET]', add_special_tokens=False).input_ids
   assert len(ret_token_idx) == 1, ret_token_idx
   model_kwargs['retrieval_token_idx'] = ret_token_idx[0]
+  debug = False
+  if debug:
+    model_kwargs['opt_version'] = 'facebook/opt-125m'
+    model_kwargs['visual_encoder'] = 'openai/clip-vit-base-patch32'
   args = namedtuple('args', model_kwargs)(**model_kwargs)
   # Initialize model for inference.
   model = Fromage(tokenizer, args, path_array=path_array, emb_matrix=emb_matrix)
   model = model.eval()
+  if not debug:
+    model = model.bfloat16()
+    model = model.cuda()
+    # Load pretrained linear mappings and [RET] embeddings.
+    checkpoint = torch.load(model_ckpt_path)
+    model.load_state_dict(checkpoint['state_dict'], strict=False)
+    with torch.no_grad():
+        model.model.input_embeddings.weight[model.model.retrieval_token_idx, :].copy_(checkpoint['state_dict']['ret_input_embeddings.weight'].cpu().detach())
   logit_scale = model.model.logit_scale.exp()
   emb_matrix = torch.tensor(emb_matrix, dtype=logit_scale.dtype).to(logit_scale.device)