Create README.md
Browse files
README.md
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
metrics:
|
3 |
+
- bleu
|
4 |
+
- rouge
|
5 |
+
tags:
|
6 |
+
- image-to-text
|
7 |
+
- image-captioning
|
8 |
+
- vision-transformer
|
9 |
+
- ViT-B/16
|
10 |
+
language:
|
11 |
+
- id
|
12 |
+
- en
|
13 |
+
---
|
14 |
+
|
15 |
+
# Sample running code
|
16 |
+
```python
|
17 |
+
from transformers import VisionEncoderDecoderModel, ViTImageProcessor, GPT2Tokenizer
|
18 |
+
import torch
|
19 |
+
from PIL import Image
|
20 |
+
model = VisionEncoderDecoderModel.from_pretrained("evlinzxxx/my_model_ViTB-16")
|
21 |
+
feature_extractor = ViTImageProcessor.from_pretrained("evlinzxxx/my_model_ViTB-16")
|
22 |
+
tokenizer = GPT2Tokenizer.from_pretrained("evlinzxxx/my_model_ViTB-16")
|
23 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
24 |
+
model.to(device)
|
25 |
+
def show_image_and_captions(url):
|
26 |
+
# get the image and display it
|
27 |
+
display(load_image(url))
|
28 |
+
# get the captions on various models
|
29 |
+
our_caption = get_caption(model, image_processor, tokenizer, url)
|
30 |
+
# print the captions
|
31 |
+
print(f"Our caption: {our_caption}")
|
32 |
+
show_image_and_captions("/content/drive/MyDrive/try/test_400/gl_16.jpg") # ['navigate around the obstacle ahead adjusting your route to bypass the parked car.']
|
33 |
+
```
|