Spaces:

jykoh
/

fromage

Build error

App Files Files Community

jykoh commited on Feb 20, 2023

Commit

5067d2d

•

1 Parent(s): 69c0e42

Add fix

Browse files

Files changed (2) hide show

app.py +25 -16
fromage/models.py +1 -0

app.py CHANGED Viewed

@@ -27,7 +27,7 @@ class FromageChatBot:
     def upload_image(self, state, image_input):
-        state += [(f"![](/file={image_input.name})", ":)")]
         self.input_image = Image.open(image_input.name).resize((224, 224)).convert('RGB')
         return state, state
@@ -42,7 +42,7 @@ class FromageChatBot:
     def generate_for_prompt(self, input_text, state, ret_scale_factor, max_nm_rets, num_words, temperature):
         input_prompt = 'Q: ' + input_text + '\nA:'
         self.chat_history += input_prompt
-        print('Generating for', self.chat_history)
         # If an image was uploaded, prepend it to the model.
         model_inputs = None
@@ -57,6 +57,7 @@ class FromageChatBot:
         model_outputs = self.model.generate_for_images_and_texts(model_inputs,
             num_words=num_words, ret_scale_factor=ret_scale_factor, top_p=top_p,
             temperature=temperature, max_num_rets=max_nm_rets)
         im_names = []
         response = ''
@@ -73,6 +74,7 @@ class FromageChatBot:
                     filename = self.save_image_to_local(output)
                     response += f'<img src="/file={filename}">'
         self.chat_history += ' '.join(text_outputs)
         if self.chat_history[-1] != '\n':
             self.chat_history += '\n'
@@ -88,26 +90,33 @@ class FromageChatBot:
                 '### Grounding Language Models to Images for Multimodal Generation'
             )
-            with gr.Column(scale=0.3, min_width=0):
-                ret_scale_factor = gr.Slider(minimum=0.0, maximum=3.0, value=1.0, step=0.1, interactive=True, label="Multiplier for returning images (higher means more frequent)")
-                max_ret_images = gr.Number(minimum=0, maximum=3, value=1, precision=1, interactive=True, label="Max images to return")
-                gr_max_len = gr.Number(value=32, precision=1, label="Max # of words returned", interactive=True)
-                gr_temperature = gr.Number(value=0.0, label="Temperature", interactive=True)
-            with gr.Column(scale=0.7, min_width=0):
-                image_btn = gr.UploadButton("Image Input", file_types=["image"])
-                text_input = gr.Textbox(label="Text Input", lines=1, placeholder="Upload an image above [optional]. Then enter a text prompt, and press enter!")
-                chatbot = gr.Chatbot()
-                gr_state = gr.State([])
-                clear_btn = gr.Button("Clear History")
             text_input.submit(self.generate_for_prompt, [text_input, gr_state, ret_scale_factor, max_ret_images, gr_max_len, gr_temperature], [gr_state, chatbot])
-            text_input.submit(lambda :"", None, text_input)
             image_btn.upload(self.upload_image, [gr_state, image_btn], [gr_state, chatbot])
             clear_btn.click(self.reset, [], [gr_state, chatbot])
         demo.launch(share=False, server_name="0.0.0.0")
-chatbot = FromageChatBot()
-chatbot.launch()

     def upload_image(self, state, image_input):
+        state += [(f"![](/file={image_input.name})", "(Image received. Type or ask something to continue.)")]
         self.input_image = Image.open(image_input.name).resize((224, 224)).convert('RGB')
         return state, state
     def generate_for_prompt(self, input_text, state, ret_scale_factor, max_nm_rets, num_words, temperature):
         input_prompt = 'Q: ' + input_text + '\nA:'
         self.chat_history += input_prompt
+        print('Generating for', self.chat_history, flush=True)
         # If an image was uploaded, prepend it to the model.
         model_inputs = None
         model_outputs = self.model.generate_for_images_and_texts(model_inputs,
             num_words=num_words, ret_scale_factor=ret_scale_factor, top_p=top_p,
             temperature=temperature, max_num_rets=max_nm_rets)
+        print('model_outputs', model_outputs, flush=True)
         im_names = []
         response = ''
                     filename = self.save_image_to_local(output)
                     response += f'<img src="/file={filename}">'
+        # TODO(jykoh): Persist image inputs.
         self.chat_history += ' '.join(text_outputs)
         if self.chat_history[-1] != '\n':
             self.chat_history += '\n'
                 '### Grounding Language Models to Images for Multimodal Generation'
             )
+            chatbot = gr.Chatbot()
+            gr_state = gr.State([])
+            with gr.Row():
+                with gr.Column(scale=0.3, min_width=0):
+                    ret_scale_factor = gr.Slider(minimum=0.0, maximum=3.0, value=1.0, step=0.1, interactive=True, label="Multiplier for returning images (higher means more frequent)")
+                    max_ret_images = gr.Number(minimum=0, maximum=3, value=1, precision=1, interactive=True, label="Max images to return")
+                    gr_max_len = gr.Number(value=32, precision=1, label="Max # of words returned", interactive=True)
+                    gr_temperature = gr.Number(value=0.0, label="Temperature", interactive=True)
+                with gr.Column(scale=0.7, min_width=0):
+                    image_btn = gr.UploadButton("Image Input", file_types=["image"])
+                    text_input = gr.Textbox(label="Text Input", lines=1, placeholder="Upload an image above [optional]. Then enter a text prompt, and press enter!")
+                    clear_btn = gr.Button("Clear History")
             text_input.submit(self.generate_for_prompt, [text_input, gr_state, ret_scale_factor, max_ret_images, gr_max_len, gr_temperature], [gr_state, chatbot])
             image_btn.upload(self.upload_image, [gr_state, image_btn], [gr_state, chatbot])
             clear_btn.click(self.reset, [], [gr_state, chatbot])
         demo.launch(share=False, server_name="0.0.0.0")
+def main():
+    chatbot = FromageChatBot()
+    chatbot.launch()
+if __name__ == "__main__":
+    chatbot = FromageChatBot()
+    chatbot.launch()

fromage/models.py CHANGED Viewed

@@ -651,6 +651,7 @@ def load_fromage(embeddings_dir: str, model_args_path: str, model_ckpt_path: str
   emb_matrix = emb_matrix / emb_matrix.norm(dim=1, keepdim=True)
   emb_matrix = logit_scale * emb_matrix
   model.emb_matrix = emb_matrix
   return model

   emb_matrix = emb_matrix / emb_matrix.norm(dim=1, keepdim=True)
   emb_matrix = logit_scale * emb_matrix
   model.emb_matrix = emb_matrix
+  print('Done loading FROMAGe!')
   return model