Spaces:
Sleeping
Sleeping
solving model problem
Browse files- VQA_FineTuning_Fashion_Datasets.ipynb +0 -0
- app.py +5 -3
- inference.py +11 -9
VQA_FineTuning_Fashion_Datasets.ipynb
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
app.py
CHANGED
@@ -7,15 +7,17 @@ inference = Inference()
|
|
7 |
|
8 |
|
9 |
with gr.Blocks() as block:
|
10 |
-
|
11 |
-
|
12 |
|
|
|
|
|
13 |
btn = gr.Button(value="Submit")
|
14 |
|
15 |
dogs = os.path.join(os.path.dirname(__file__), "617.jpg")
|
16 |
image = gr.Image(type="pil", value=dogs)
|
17 |
|
18 |
-
btn.click(inference.inference, inputs=[image, txt], outputs=
|
19 |
|
20 |
if __name__ == "__main__":
|
21 |
block.launch()
|
|
|
7 |
|
8 |
|
9 |
with gr.Blocks() as block:
|
10 |
+
options = gr.Dropdown(choices=["Blip Saffal", "Blip Control Net"], label="Models", info="Select the model to use..", )
|
11 |
+
# need to improve this one...
|
12 |
|
13 |
+
txt = gr.Textbox(label="Insert a question..", lines=2)
|
14 |
+
txt_3 = gr.Textbox(value="", label="Your answer is here..")
|
15 |
btn = gr.Button(value="Submit")
|
16 |
|
17 |
dogs = os.path.join(os.path.dirname(__file__), "617.jpg")
|
18 |
image = gr.Image(type="pil", value=dogs)
|
19 |
|
20 |
+
btn.click(inference.inference, inputs=[options, image, txt], outputs=[txt_3])
|
21 |
|
22 |
if __name__ == "__main__":
|
23 |
block.launch()
|
inference.py
CHANGED
@@ -1,8 +1,6 @@
|
|
1 |
-
from transformers import
|
2 |
from transformers.utils import logging
|
3 |
|
4 |
-
import torch
|
5 |
-
|
6 |
class Inference:
|
7 |
def __init__(self):
|
8 |
self.blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base")
|
@@ -12,12 +10,16 @@ class Inference:
|
|
12 |
logging.set_verbosity_info()
|
13 |
self.logger = logging.get_logger("transformers")
|
14 |
|
15 |
-
def inference(self, image, text):
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
|
|
|
|
|
|
|
|
21 |
|
22 |
def __inference_saffal_blip(self, image, text):
|
23 |
encoding = self.blip_processor(image, text, return_tensors="pt")
|
|
|
1 |
+
from transformers import BlipProcessor, BlipForQuestionAnswering
|
2 |
from transformers.utils import logging
|
3 |
|
|
|
|
|
4 |
class Inference:
|
5 |
def __init__(self):
|
6 |
self.blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base")
|
|
|
10 |
logging.set_verbosity_info()
|
11 |
self.logger = logging.get_logger("transformers")
|
12 |
|
13 |
+
def inference(self, options, image, text):
|
14 |
+
if options == "Blip Saffal":
|
15 |
+
self.logger.info(f"Running inference for model BLIP Saffal")
|
16 |
+
BLIP_saffal_inference = self.__inference_saffal_blip(image, text)
|
17 |
+
return BLIP_saffal_inference
|
18 |
+
|
19 |
+
elif options == "Blip Control Net":
|
20 |
+
self.logger.info(f"Running inference for model BLIP Control Net")
|
21 |
+
BLIP_control_net_inference = self.__inference_control_net_blip(image, text)
|
22 |
+
return BLIP_control_net_inference
|
23 |
|
24 |
def __inference_saffal_blip(self, image, text):
|
25 |
encoding = self.blip_processor(image, text, return_tensors="pt")
|