Spaces:

wiusdy
/

VQA_fashion_hvar

Sleeping

wiusdy commited on Feb 14

Commit

79c1479

•

1 Parent(s): 31d780f

solving model problem

Files changed (3) hide show

VQA_FineTuning_Fashion_Datasets.ipynb CHANGED Viewed

The diff for this file is too large to render. See raw diff

app.py CHANGED Viewed

@@ -7,15 +7,17 @@ inference = Inference()
 with gr.Blocks() as block:
-    txt = gr.Textbox(label="Insert a question..", lines=2)
-    outputs = [gr.outputs.Textbox(label="Answer from BLIP saffal model"), gr.outputs.Textbox(label="Answer from BLIP control net")]
     btn = gr.Button(value="Submit")
     dogs = os.path.join(os.path.dirname(__file__), "617.jpg")
     image = gr.Image(type="pil", value=dogs)
-    btn.click(inference.inference, inputs=[image, txt], outputs=outputs)
 if __name__ == "__main__":
     block.launch()

 with gr.Blocks() as block:
+    options = gr.Dropdown(choices=["Blip Saffal", "Blip Control Net"], label="Models", info="Select the model to use..", )
+    # need to improve this one...
+    txt = gr.Textbox(label="Insert a question..", lines=2)
+    txt_3 = gr.Textbox(value="", label="Your answer is here..")
     btn = gr.Button(value="Submit")
     dogs = os.path.join(os.path.dirname(__file__), "617.jpg")
     image = gr.Image(type="pil", value=dogs)
+    btn.click(inference.inference, inputs=[options, image, txt], outputs=[txt_3])
 if __name__ == "__main__":
     block.launch()

inference.py CHANGED Viewed

@@ -1,8 +1,6 @@
-from transformers import ViltProcessor, ViltForQuestionAnswering, BlipProcessor, BlipForQuestionAnswering
 from transformers.utils import logging
-import torch
 class Inference:
     def __init__(self):
         self.blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base")
@@ -12,12 +10,16 @@ class Inference:
         logging.set_verbosity_info()
         self.logger = logging.get_logger("transformers")
-    def inference(self, image, text):
-        self.logger.info(f"Running inference for model BLIP Saffal")
-        BLIP_saffal_inference = self.__inference_saffal_blip(image, text)
-        self.logger.info(f"Running inference for model BLIP Control Net")
-        BLIP_control_net_inference = self.__inference_control_net_blip(image, text)
-        return BLIP_saffal_inference, BLIP_control_net_inference
     def __inference_saffal_blip(self, image, text):
         encoding = self.blip_processor(image, text, return_tensors="pt")

+from transformers import BlipProcessor, BlipForQuestionAnswering
 from transformers.utils import logging
 class Inference:
     def __init__(self):
         self.blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base")
         logging.set_verbosity_info()
         self.logger = logging.get_logger("transformers")
+    def inference(self, options, image, text):
+        if options == "Blip Saffal":
+            self.logger.info(f"Running inference for model BLIP Saffal")
+            BLIP_saffal_inference = self.__inference_saffal_blip(image, text)
+            return BLIP_saffal_inference
+        elif options == "Blip Control Net":
+            self.logger.info(f"Running inference for model BLIP Control Net")
+            BLIP_control_net_inference = self.__inference_control_net_blip(image, text)
+            return BLIP_control_net_inference
     def __inference_saffal_blip(self, image, text):
         encoding = self.blip_processor(image, text, return_tensors="pt")