swaptr commited on
Commit
44eade1
1 Parent(s): f3f75c4

add code for image captioning

Browse files
Files changed (9) hide show
  1. README.md +4 -2
  2. app.py +40 -0
  3. e1.jpg +0 -0
  4. e2.jpg +0 -0
  5. e3.jpg +0 -0
  6. e4.jpg +0 -0
  7. e5.jpg +0 -0
  8. e6.jpg +0 -0
  9. requirements.txt +2 -0
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- title: Text Captioning
3
  emoji: 💻
4
  colorFrom: purple
5
  colorTo: green
@@ -9,4 +9,6 @@ app_file: app.py
9
  pinned: false
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
1
  ---
2
+ title: Image Captioning
3
  emoji: 💻
4
  colorFrom: purple
5
  colorTo: green
 
9
  pinned: false
10
  ---
11
 
12
+ Image Captioning
13
+
14
+ This space contains the code for image captioning. All you need to do is import an image and the system will generate the caption for you.
app.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import gradio as gr
3
+ import torch
4
+ from torch.nn import functional as F
5
+ from transformers import AutoTokenizer, ViTFeatureExtractor, VisionEncoderDecoderModel
6
+
7
+ device="cpu"
8
+ feature_extractor = ViTFeatureExtractor.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
9
+ cat_tokenizer = AutoTokenizer.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
10
+ cap_model = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning").to(device)
11
+
12
+ def predict(image, max_length=64, num_beams=4):
13
+ image = image.convert('RGB')
14
+ image = feature_extractor(image, return_tensors="pt").pixel_values.to(device)
15
+ clean_text = lambda x: x.replace('<|endoftext|>','').split('\n')[0]
16
+ caption_ids = cap_model.generate(image, max_length=max_length)[0]
17
+ caption_text = clean_text(cat_tokenizer.decode(caption_ids))
18
+ return caption_text
19
+
20
+ input = gr.components.Image(label="Upload Image", type = 'pil')
21
+ caption = gr.components.Textbox(type="text", label="Captions")
22
+ examples = [f"e{i}.jpg" for i in range(1,7)]
23
+
24
+ title = "Image Caption"
25
+ description = "Made by: Swapnil Tripathi"
26
+
27
+ interface = gr.Interface(
28
+ fn=predict,
29
+ description=description,
30
+ inputs=input,
31
+ theme=gr.themes.Default(
32
+ primary_hue=gr.themes.colors.orange,
33
+ secondary_hue=gr.themes.colors.slate
34
+ ),
35
+ outputs=caption,
36
+ examples=examples,
37
+ title=title,
38
+ )
39
+
40
+ interface.launch(debug=True)
e1.jpg ADDED
e2.jpg ADDED
e3.jpg ADDED
e4.jpg ADDED
e5.jpg ADDED
e6.jpg ADDED
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ torch
2
+ transformers