Shriharshan commited on
Commit
e770a77
1 Parent(s): 8d8f414

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -0
app.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Image captioning with ViT+GPT2
2
+ from PIL import Image
3
+ from transformers import VisionEncoderDecoderModel, ViTFeatureExtractor, PreTrainedTokenizerFast
4
+ import requests
5
+ model = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
6
+ vit_feature_extactor = ViTFeatureExtractor.from_pretrained("google/vit-base-patch16-224-in21k")
7
+ tokenizer = PreTrainedTokenizerFast.from_pretrained("distilgpt2")
8
+ #url = 'https://d2gp644kobdlm6.cloudfront.net/wp-content/uploads/2016/06/bigstock-Shocked-and-surprised-boy-on-t-113798588-300x212.jpg'
9
+ # with Image.open(requests.get(url, stream=True).raw) as img:
10
+ # pixel_values = vit_feature_extactor(images=img, return_tensors="pt").pixel_values
11
+ # encoder_outputs = model.generate(pixel_values.to('cpu'),num_beams = 5)
12
+ # generated_senetences = tokenizer.batch_decode(encoder_outputs, skip_special_tokens=True,)
13
+ # generated_senetences
14
+ # generated_senetences[0].split(".")[0]
15
+ def vit2distilgpt2(img):
16
+ pixel_values = vit_feature_extactor(images=img, return_tensors="pt").pixel_values
17
+ encoder_outputs = generated_ids = model.generate(pixel_values.to('cpu'),num_beams=5)
18
+ generated_senetences = tokenizer.batch_decode(encoder_outputs, skip_special_tokens=True)
19
+
20
+ return(generated_senetences[0].split('.')[0])
21
+ import gradio as gr
22
+ inputs = [
23
+ gr.inputs.Image(type="pil",label="Original Images")
24
+ ]
25
+
26
+ outputs = [
27
+ gr.outputs.Textbox(label = "Caption")
28
+ ]
29
+
30
+ title = "Image Captioning using ViT + GPT2"
31
+ description = "ViT and GPT2 are used to generate Image Caption for the uploaded image.COCO DataSet is used for Training"
32
+ examples = [
33
+ ["Image1.png"],
34
+ ["Image2.png"],
35
+ ["Image3.png"]
36
+ ]
37
+
38
+
39
+
40
+
41
+ gr.Interface(
42
+ vit2distilgpt2,
43
+ inputs,
44
+ outputs,
45
+ title=title,
46
+ description=description,
47
+ examples=examples,
48
+ theme="huggingface",
49
+ ).launch(debug=True, enable_queue=True)