Dineshkumars commited on
Commit
38322f1
1 Parent(s): b0149f9

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -0
app.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ from PIL import Image
4
+
5
+ from transformers import VisionEncoderDecoderModel, ViTImageProcessor, GPT2TokenizerFast
6
+ import torch
7
+ from PIL import Image
8
+
9
+ model = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
10
+ feature_extractor = ViTImageProcessor.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
11
+ tokenizer=GPT2TokenizerFast.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
12
+ gen_kwargs1 ={"max_length": 4,"num_beams": 2}
13
+ gen_kwargs2 ={"max_length": 32,"num_beams": 16}
14
+
15
+ def predict_step(images):
16
+ pixel_values = feature_extractor(images=images, return_tensors='pt').pixel_values
17
+ output_ids1 = model.generate(pixel_values)
18
+ output_ids2 = model.generate(pixel_values,**gen_kwargs1)
19
+ output_ids3 = model.generate(pixel_values,**gen_kwargs2)
20
+ preds1 = tokenizer.batch_decode(output_ids1, skip_special_tokens=True)
21
+ preds2 = tokenizer.batch_decode(output_ids2, skip_special_tokens=True)
22
+ preds3 = tokenizer.batch_decode(output_ids3, skip_special_tokens=True)
23
+ preds1 =[pred.strip() for pred in preds1]
24
+ preds2 =[pred.strip() for pred in preds2]
25
+ preds3 =[pred.strip() for pred in preds3]
26
+ return preds1[0],preds2[0],preds3[0]
27
+
28
+ st.title("Image Caption Generator")
29
+ upload_image = st.file_uploader(label='Upload image', type=['png', 'jpg','jpeg'], accept_multiple_files=False)
30
+ if upload_image is not None:
31
+ image = Image.open(upload_image)
32
+ if image.mode != "RGB":
33
+ image = image.convert(mode="RGB")
34
+ output = predict_step([image])
35
+ st.header("Captions are : ")
36
+ st.text(output[0])
37
+ st.text(output[1])
38
+ st.text(output[2])
39
+
40
+