evlinzxxx commited on
Commit
0d1dd00
1 Parent(s): df6256e

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +33 -0
README.md ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ metrics:
3
+ - bleu
4
+ - rouge
5
+ tags:
6
+ - image-to-text
7
+ - image-captioning
8
+ - vision-transformer
9
+ - ViT-B/16
10
+ language:
11
+ - id
12
+ - en
13
+ ---
14
+
15
+ # Sample running code
16
+ ```python
17
+ from transformers import VisionEncoderDecoderModel, ViTImageProcessor, GPT2Tokenizer
18
+ import torch
19
+ from PIL import Image
20
+ model = VisionEncoderDecoderModel.from_pretrained("evlinzxxx/my_model_ViTB-16")
21
+ feature_extractor = ViTImageProcessor.from_pretrained("evlinzxxx/my_model_ViTB-16")
22
+ tokenizer = GPT2Tokenizer.from_pretrained("evlinzxxx/my_model_ViTB-16")
23
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
24
+ model.to(device)
25
+ def show_image_and_captions(url):
26
+ # get the image and display it
27
+ display(load_image(url))
28
+ # get the captions on various models
29
+ our_caption = get_caption(model, image_processor, tokenizer, url)
30
+ # print the captions
31
+ print(f"Our caption: {our_caption}")
32
+ show_image_and_captions("/content/drive/MyDrive/try/test_400/gl_16.jpg") # ['navigate around the obstacle ahead adjusting your route to bypass the parked car.']
33
+ ```