Harzis commited on
Commit
d614303
1 Parent(s): aa70c4b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -16
app.py CHANGED
@@ -1,29 +1,31 @@
1
- # Image Captioning from:
2
- # https://learn.deeplearning.ai/courses/open-source-models-hugging-face/lesson/12/image-captioning
3
  #
4
 
5
- from transformers import BlipForConditionalGeneration
6
- model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
7
 
8
  from transformers import AutoProcessor
9
- processor = AutoProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
10
 
11
  from PIL import Image
 
12
  import gradio as gr
13
 
14
- def captioning(input):
15
- image_tensors = processor(input, return_tensors="pt")
16
- image_text_tensors = model.generate(**image_tensors)
17
- output = processor.decode(image_text_tensors[0], skip_special_tokens=True)
18
  return output
19
 
20
  gr.close_all()
21
 
22
- app = gr.Interface(fn=captioning,
23
- inputs=[gr.Image(label="Laita tähä joku kuva", type="pil")],
24
- outputs=[gr.Textbox(label="Mitä näkyy?")],
25
- title="Harzan kuvan selitys aplikaatio",
26
- description="Harzan ihme aplikaatio kertomaan mitä kuvassa on",
27
- allow_flagging="never")
 
28
  app.launch()
29
- gr.close_all()
 
1
+ # Visual question answering from
2
+ # https://learn.deeplearning.ai/courses/open-source-models-hugging-face/lesson/13/multimodal-visual-question-answering
3
  #
4
 
5
+ from transformers import BlipForQuestionAnswering
6
+ model = BlipForQuestionAnswering.from_pretrained("Salesforce/blip-vqa-base")
7
 
8
  from transformers import AutoProcessor
9
+ processor = AutoProcessor.from_pretrained("Salesforce/blip-vqa-base")
10
 
11
  from PIL import Image
12
+
13
  import gradio as gr
14
 
15
+ def answering(image, question):
16
+ inputs = processor(image, question, return_tensors="pt")
17
+ out = model.generate(**inputs)
18
+ output = processor.decode(out[0], skip_special_tokens=True)
19
  return output
20
 
21
  gr.close_all()
22
 
23
+ app = gr.Interface(fn=answering,
24
+ inputs=[gr.Image(label="Picture here", type="pil"),
25
+ gr.Textbox(label="Question about picture here")],
26
+ outputs=[gr.Textbox(label="Answer"),],
27
+ title="Harza's application for answering questions about picture'",
28
+ description="Harza's miracle application that can answer questions about given picuture!'",
29
+ allow_flagging="never")
30
  app.launch()
31
+ gr.close_all()