IvaElen commited on
Commit
dc751b1
1 Parent(s): 18aa1b2

Update pages/ImageToText.py

Browse files
Files changed (1) hide show
  1. pages/ImageToText.py +33 -0
pages/ImageToText.py CHANGED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from PIL import Image
2
+ from transformers import AutoProcessor, AutoTokenizer, AutoImageProcessor, AutoModelForCausalLM, BlipForConditionalGeneration, VisionEncoderDecoderModel
3
+ import torch
4
+
5
+ vitgpt_processor = AutoImageProcessor.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
6
+ vitgpt_model = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
7
+ vitgpt_tokenizer = AutoTokenizer.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
8
+
9
+ device = "cuda" if torch.cuda.is_available() else "cpu"
10
+
11
+ vitgpt_model.to(device)
12
+
13
+ def generate_caption(processor, model, image, tokenizer=None):
14
+
15
+ inputs = processor(images=image, return_tensors="pt").to(device)
16
+ generated_ids = model.generate(pixel_values=inputs.pixel_values, max_length=50)
17
+
18
+ if tokenizer is not None:
19
+ generated_caption = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
20
+ else:
21
+ generated_caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
22
+ return generated_caption
23
+
24
+ def generate_captions(image):
25
+ caption_vitgpt = generate_caption(vitgpt_processor, vitgpt_model, image, vitgpt_tokenizer)
26
+ return caption_vitgpt
27
+
28
+ uploaded_file = st.file_uploader("Upload your image")
29
+ if uploaded_file is not None:
30
+ image = Image.open(uploaded_file)
31
+ generated_caption = generate_caption(vitgpt_processor, vitgpt_model, image, vitgpt_tokenizer)
32
+ st.write(generated_caption)
33
+