krypticmouse
commited on
Commit
•
576debb
1
Parent(s):
595277d
Update README.md
Browse files
README.md
CHANGED
@@ -11,7 +11,8 @@ Here is how to use this model to caption an image of the Flickr8k dataset:
|
|
11 |
import torch
|
12 |
import requests
|
13 |
from PIL import Image
|
14 |
-
from transformers import ViTFeatureExtractor, AutoTokenizer,
|
|
|
15 |
|
16 |
if torch.cuda.is_available():
|
17 |
device = 'cuda'
|
@@ -23,10 +24,10 @@ image = Image.open(requests.get(url, stream=True).raw)
|
|
23 |
|
24 |
encoder_checkpoint = 'google/vit-base-patch16-224'
|
25 |
decoder_checkpoint = 'surajp/gpt2-hindi'
|
26 |
-
|
27 |
feature_extractor = ViTFeatureExtractor.from_pretrained(encoder_checkpoint)
|
28 |
tokenizer = AutoTokenizer.from_pretrained(decoder_checkpoint)
|
29 |
-
model = VisionEncoderDecoderModel.from_pretrained(
|
30 |
|
31 |
#Inference
|
32 |
sample = feature_extractor(image, return_tensors="pt").pixel_values.to(device)
|
|
|
11 |
import torch
|
12 |
import requests
|
13 |
from PIL import Image
|
14 |
+
from transformers import ViTFeatureExtractor, AutoTokenizer,
|
15 |
+
VisionEncoderDecoderModel
|
16 |
|
17 |
if torch.cuda.is_available():
|
18 |
device = 'cuda'
|
|
|
24 |
|
25 |
encoder_checkpoint = 'google/vit-base-patch16-224'
|
26 |
decoder_checkpoint = 'surajp/gpt2-hindi'
|
27 |
+
model_checkpoint = 'team-indain-image-caption/hindi-image-captioning'
|
28 |
feature_extractor = ViTFeatureExtractor.from_pretrained(encoder_checkpoint)
|
29 |
tokenizer = AutoTokenizer.from_pretrained(decoder_checkpoint)
|
30 |
+
model = VisionEncoderDecoderModel.from_pretrained(model_checkpoint).to(device)
|
31 |
|
32 |
#Inference
|
33 |
sample = feature_extractor(image, return_tensors="pt").pixel_values.to(device)
|